diff --git a/.buildinfo b/.buildinfo
old mode 100644
new mode 100755
index 4008a6ed..4151526e
--- a/.buildinfo
+++ b/.buildinfo
@@ -1,4 +1,4 @@
-# Sphinx build info version 1
-# This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done.
-config: 88136a339ad9f24f63bebc3ce6bec1df
-tags: 645f666f9bcd5a90fca523b33c5a78b7
+# Sphinx build info version 1
+# This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done.
+config: 6c7bebb5f823a0444f206cc24f8717b0
+tags: 645f666f9bcd5a90fca523b33c5a78b7
diff --git a/_images/0-side-view-photo-of-17-year-old-girl-in-a-japanese-school.gif b/_images/0-side-view-photo-of-17-year-old-girl-in-a-japanese-school.gif
old mode 100644
new mode 100755
diff --git a/_images/0-side-view-photo-of-17-year-old-girl-in-a-japanese-school_edit.gif b/_images/0-side-view-photo-of-17-year-old-girl-in-a-japanese-school_edit.gif
old mode 100644
new mode 100755
diff --git a/_images/0-side-view-photo-of-man-in-black-padded-jumper,-hallway,.gif b/_images/0-side-view-photo-of-man-in-black-padded-jumper,-hallway,.gif
old mode 100644
new mode 100755
diff --git a/_images/0-side-view-photo-of-man-in-black-padded-jumper,-hallway,_edit.gif b/_images/0-side-view-photo-of-man-in-black-padded-jumper,-hallway,_edit.gif
old mode 100644
new mode 100755
diff --git a/_images/00.png b/_images/00.png
old mode 100644
new mode 100755
diff --git a/_images/001.png b/_images/001.png
old mode 100644
new mode 100755
diff --git a/_images/01.png b/_images/01.png
old mode 100644
new mode 100755
diff --git a/_images/010.png b/_images/010.png
old mode 100644
new mode 100755
diff --git a/_images/011.png b/_images/011.png
old mode 100644
new mode 100755
diff --git a/_images/012.png b/_images/012.png
old mode 100644
new mode 100755
diff --git a/_images/013.png b/_images/013.png
new file mode 100755
index 00000000..fa74e292
Binary files /dev/null and b/_images/013.png differ
diff --git a/_images/02.png b/_images/02.png
old mode 100644
new mode 100755
diff --git a/_images/021.png b/_images/021.png
old mode 100644
new mode 100755
diff --git a/_images/022.png b/_images/022.png
new file mode 100755
index 00000000..aee92089
Binary files /dev/null and b/_images/022.png differ
diff --git a/_images/03.png b/_images/03.png
old mode 100644
new mode 100755
diff --git a/_images/031.png b/_images/031.png
old mode 100644
new mode 100755
diff --git a/_images/032.png b/_images/032.png
new file mode 100755
index 00000000..846a1e6e
Binary files /dev/null and b/_images/032.png differ
diff --git a/_images/04.png b/_images/04.png
old mode 100644
new mode 100755
diff --git a/_images/041.png b/_images/041.png
old mode 100644
new mode 100755
diff --git a/_images/042.png b/_images/042.png
new file mode 100755
index 00000000..3ef7267c
Binary files /dev/null and b/_images/042.png differ
diff --git a/_images/05.png b/_images/05.png
old mode 100644
new mode 100755
diff --git a/_images/051.png b/_images/051.png
old mode 100644
new mode 100755
diff --git a/_images/052.png b/_images/052.png
new file mode 100755
index 00000000..71781f20
Binary files /dev/null and b/_images/052.png differ
diff --git a/_images/06.png b/_images/06.png
old mode 100644
new mode 100755
diff --git a/_images/061.png b/_images/061.png
old mode 100644
new mode 100755
diff --git a/_images/062.png b/_images/062.png
new file mode 100755
index 00000000..491b65fb
Binary files /dev/null and b/_images/062.png differ
diff --git a/_images/07.png b/_images/07.png
old mode 100644
new mode 100755
diff --git a/_images/071.png b/_images/071.png
old mode 100644
new mode 100755
diff --git a/_images/072.png b/_images/072.png
new file mode 100755
index 00000000..a7baf832
Binary files /dev/null and b/_images/072.png differ
diff --git a/_images/08.png b/_images/08.png
old mode 100644
new mode 100755
diff --git a/_images/081.png b/_images/081.png
old mode 100644
new mode 100755
diff --git a/_images/09.png b/_images/09.png
old mode 100644
new mode 100755
diff --git a/_images/091.png b/_images/091.png
old mode 100644
new mode 100755
diff --git a/_images/1.png b/_images/1.png
old mode 100644
new mode 100755
diff --git a/_images/10.png b/_images/10.png
old mode 100644
new mode 100755
diff --git a/_images/101.png b/_images/101.png
old mode 100644
new mode 100755
diff --git a/_images/102.png b/_images/102.png
old mode 100644
new mode 100755
diff --git a/_images/11.png b/_images/11.png
old mode 100644
new mode 100755
diff --git a/_images/12.png b/_images/12.png
old mode 100644
new mode 100755
diff --git a/_images/13.png b/_images/13.png
old mode 100644
new mode 100755
diff --git a/_images/14.png b/_images/14.png
old mode 100644
new mode 100755
diff --git a/_images/2.png b/_images/2.png
old mode 100644
new mode 100755
diff --git a/_images/21.png b/_images/21.png
old mode 100644
new mode 100755
diff --git a/_images/3.png b/_images/3.png
old mode 100644
new mode 100755
diff --git a/_images/31.png b/_images/31.png
old mode 100644
new mode 100755
diff --git a/_images/4.png b/_images/4.png
old mode 100644
new mode 100755
diff --git a/_images/41.png b/_images/41.png
old mode 100644
new mode 100755
diff --git a/_images/4classes.png b/_images/4classes.png
old mode 100644
new mode 100755
diff --git a/_images/5.png b/_images/5.png
old mode 100644
new mode 100755
diff --git a/_images/51.png b/_images/51.png
old mode 100644
new mode 100755
diff --git a/_images/54544834-db8a-49f3-9ac7-d4a530724fd8.png b/_images/54544834-db8a-49f3-9ac7-d4a530724fd8.png
old mode 100644
new mode 100755
diff --git a/_images/6-image-of-a-man-with-blonde-hair-and-blue-eyes,.gif b/_images/6-image-of-a-man-with-blonde-hair-and-blue-eyes,.gif
old mode 100644
new mode 100755
diff --git a/_images/6-image-of-a-man-with-blonde-hair-and-blue-eyes,_edit.gif b/_images/6-image-of-a-man-with-blonde-hair-and-blue-eyes,_edit.gif
old mode 100644
new mode 100755
diff --git a/_images/6.png b/_images/6.png
old mode 100644
new mode 100755
diff --git a/_images/61.png b/_images/61.png
old mode 100644
new mode 100755
diff --git a/_images/7.png b/_images/7.png
old mode 100644
new mode 100755
diff --git a/_images/71.png b/_images/71.png
old mode 100644
new mode 100755
diff --git a/_images/8.png b/_images/8.png
old mode 100644
new mode 100755
diff --git a/_images/81.png b/_images/81.png
old mode 100644
new mode 100755
diff --git a/_images/9.png b/_images/9.png
old mode 100644
new mode 100755
diff --git a/_images/91.png b/_images/91.png
old mode 100644
new mode 100755
diff --git a/_images/ConceptLab01.png b/_images/ConceptLab01.png
old mode 100644
new mode 100755
diff --git a/_images/ConceptLab02.png b/_images/ConceptLab02.png
old mode 100644
new mode 100755
diff --git a/_images/ConceptLab03.png b/_images/ConceptLab03.png
old mode 100644
new mode 100755
diff --git a/_images/ConceptLab04.png b/_images/ConceptLab04.png
old mode 100644
new mode 100755
diff --git a/_images/ConceptLab05.png b/_images/ConceptLab05.png
old mode 100644
new mode 100755
diff --git a/_images/ConceptLab06.png b/_images/ConceptLab06.png
old mode 100644
new mode 100755
diff --git a/_images/ConceptLab07.png b/_images/ConceptLab07.png
old mode 100644
new mode 100755
diff --git a/_images/ConceptLab08.png b/_images/ConceptLab08.png
old mode 100644
new mode 100755
diff --git a/_images/ConceptLab09.png b/_images/ConceptLab09.png
old mode 100644
new mode 100755
diff --git a/_images/ConceptLab10.png b/_images/ConceptLab10.png
old mode 100644
new mode 100755
diff --git a/_images/ConceptLab11.png b/_images/ConceptLab11.png
old mode 100644
new mode 100755
diff --git a/_images/ConceptLab12.png b/_images/ConceptLab12.png
old mode 100644
new mode 100755
diff --git a/_images/DDIM_pic.png b/_images/DDIM_pic.png
old mode 100644
new mode 100755
diff --git a/_images/DDPM_eq.png b/_images/DDPM_eq.png
old mode 100644
new mode 100755
diff --git a/_images/DreamFusioneq1.png b/_images/DreamFusioneq1.png
new file mode 100755
index 00000000..afe4bee4
Binary files /dev/null and b/_images/DreamFusioneq1.png differ
diff --git a/_images/DreamFusioneq2.png b/_images/DreamFusioneq2.png
new file mode 100755
index 00000000..0caba869
Binary files /dev/null and b/_images/DreamFusioneq2.png differ
diff --git a/_images/DreamFusioneq3.png b/_images/DreamFusioneq3.png
new file mode 100755
index 00000000..7d15bca4
Binary files /dev/null and b/_images/DreamFusioneq3.png differ
diff --git a/_images/DreamFusioneq4.png b/_images/DreamFusioneq4.png
new file mode 100755
index 00000000..fcebcfb6
Binary files /dev/null and b/_images/DreamFusioneq4.png differ
diff --git a/_images/DreamFusioneq5.png b/_images/DreamFusioneq5.png
new file mode 100755
index 00000000..01fe5e55
Binary files /dev/null and b/_images/DreamFusioneq5.png differ
diff --git a/_images/DreamFusioneq6.png b/_images/DreamFusioneq6.png
new file mode 100755
index 00000000..a3d92133
Binary files /dev/null and b/_images/DreamFusioneq6.png differ
diff --git a/_images/DreamFusioneq7.png b/_images/DreamFusioneq7.png
new file mode 100755
index 00000000..95d56959
Binary files /dev/null and b/_images/DreamFusioneq7.png differ
diff --git a/_images/DreamFusionfig1.png b/_images/DreamFusionfig1.png
new file mode 100755
index 00000000..d88e444e
Binary files /dev/null and b/_images/DreamFusionfig1.png differ
diff --git a/_images/DreamFusionfig2.png b/_images/DreamFusionfig2.png
new file mode 100755
index 00000000..a262a273
Binary files /dev/null and b/_images/DreamFusionfig2.png differ
diff --git a/_images/DreamFusionfig3.png b/_images/DreamFusionfig3.png
new file mode 100755
index 00000000..ef007c10
Binary files /dev/null and b/_images/DreamFusionfig3.png differ
diff --git a/_images/DreamFusionfig3_fig.png b/_images/DreamFusionfig3_fig.png
new file mode 100755
index 00000000..2f785a7b
Binary files /dev/null and b/_images/DreamFusionfig3_fig.png differ
diff --git a/_images/DreamFusionfig4.png b/_images/DreamFusionfig4.png
new file mode 100755
index 00000000..a5df160f
Binary files /dev/null and b/_images/DreamFusionfig4.png differ
diff --git a/_images/DreamFusionfig5.png b/_images/DreamFusionfig5.png
new file mode 100755
index 00000000..455ee23c
Binary files /dev/null and b/_images/DreamFusionfig5.png differ
diff --git a/_images/DreamFusionfig8.png b/_images/DreamFusionfig8.png
new file mode 100755
index 00000000..fb18e48b
Binary files /dev/null and b/_images/DreamFusionfig8.png differ
diff --git a/_images/DreamFusiontable1.png b/_images/DreamFusiontable1.png
new file mode 100755
index 00000000..36c3833e
Binary files /dev/null and b/_images/DreamFusiontable1.png differ
diff --git a/_images/IMG_4859.png b/_images/IMG_4859.png
old mode 100644
new mode 100755
diff --git a/_images/IMG_4860.png b/_images/IMG_4860.png
old mode 100644
new mode 100755
diff --git a/_images/IMG_4861.png b/_images/IMG_4861.png
old mode 100644
new mode 100755
diff --git a/_images/IMG_4869.png b/_images/IMG_4869.png
old mode 100644
new mode 100755
diff --git a/_images/IMG_4872.png b/_images/IMG_4872.png
old mode 100644
new mode 100755
diff --git a/_images/IMG_4874.png b/_images/IMG_4874.png
old mode 100644
new mode 100755
diff --git a/_images/IMG_4891.png b/_images/IMG_4891.png
old mode 100644
new mode 100755
diff --git a/_images/LCM-LoRA_1.png b/_images/LCM-LoRA_1.png
old mode 100644
new mode 100755
diff --git a/_images/LCM-LoRA_10.png b/_images/LCM-LoRA_10.png
old mode 100644
new mode 100755
diff --git a/_images/LCM-LoRA_2.png b/_images/LCM-LoRA_2.png
old mode 100644
new mode 100755
diff --git a/_images/LCM-LoRA_3.png b/_images/LCM-LoRA_3.png
old mode 100644
new mode 100755
diff --git a/_images/LCM-LoRA_4.png b/_images/LCM-LoRA_4.png
old mode 100644
new mode 100755
diff --git a/_images/LCM-LoRA_5.png b/_images/LCM-LoRA_5.png
old mode 100644
new mode 100755
diff --git a/_images/LCM-LoRA_6.png b/_images/LCM-LoRA_6.png
old mode 100644
new mode 100755
diff --git a/_images/LCM-LoRA_7.png b/_images/LCM-LoRA_7.png
old mode 100644
new mode 100755
diff --git a/_images/LCM-LoRA_8.png b/_images/LCM-LoRA_8.png
old mode 100644
new mode 100755
diff --git a/_images/LCM-LoRA_9.png b/_images/LCM-LoRA_9.png
old mode 100644
new mode 100755
diff --git a/_images/LMD1.png b/_images/LMD1.png
old mode 100644
new mode 100755
diff --git a/_images/LMD10.png b/_images/LMD10.png
old mode 100644
new mode 100755
diff --git a/_images/LMD11.png b/_images/LMD11.png
old mode 100644
new mode 100755
diff --git a/_images/LMD12.png b/_images/LMD12.png
old mode 100644
new mode 100755
diff --git a/_images/LMD13.png b/_images/LMD13.png
old mode 100644
new mode 100755
diff --git a/_images/LMD14.png b/_images/LMD14.png
old mode 100644
new mode 100755
diff --git a/_images/LMD2.png b/_images/LMD2.png
old mode 100644
new mode 100755
diff --git a/_images/LMD3.png b/_images/LMD3.png
old mode 100644
new mode 100755
diff --git a/_images/LMD4.png b/_images/LMD4.png
old mode 100644
new mode 100755
diff --git a/_images/LMD6.png b/_images/LMD6.png
old mode 100644
new mode 100755
diff --git a/_images/LMD7.png b/_images/LMD7.png
old mode 100644
new mode 100755
diff --git a/_images/LMD8.png b/_images/LMD8.png
old mode 100644
new mode 100755
diff --git a/_images/LMD9.png b/_images/LMD9.png
old mode 100644
new mode 100755
diff --git a/_images/Latent_Space.png b/_images/Latent_Space.png
old mode 100644
new mode 100755
diff --git a/_images/MimicBrush_1.png b/_images/MimicBrush_1.png
old mode 100644
new mode 100755
diff --git a/_images/MimicBrush_2.png b/_images/MimicBrush_2.png
old mode 100644
new mode 100755
diff --git a/_images/MimicBrush_3.png b/_images/MimicBrush_3.png
old mode 100644
new mode 100755
diff --git a/_images/MimicBrush_4.png b/_images/MimicBrush_4.png
old mode 100644
new mode 100755
diff --git a/_images/MimicBrush_5.png b/_images/MimicBrush_5.png
old mode 100644
new mode 100755
diff --git a/_images/MimicBrush_6.png b/_images/MimicBrush_6.png
old mode 100644
new mode 100755
diff --git a/_images/styleGAN_fig1.png b/_images/StyleGAN_fig1.png
old mode 100644
new mode 100755
similarity index 100%
rename from _images/styleGAN_fig1.png
rename to _images/StyleGAN_fig1.png
diff --git a/_images/styleGAN_fig2.png b/_images/StyleGAN_fig2.png
old mode 100644
new mode 100755
similarity index 100%
rename from _images/styleGAN_fig2.png
rename to _images/StyleGAN_fig2.png
diff --git a/_images/styleGAN_fig3.png b/_images/StyleGAN_fig3.png
old mode 100644
new mode 100755
similarity index 100%
rename from _images/styleGAN_fig3.png
rename to _images/StyleGAN_fig3.png
diff --git a/_images/styleGAN_fig4.png b/_images/StyleGAN_fig4.png
old mode 100644
new mode 100755
similarity index 100%
rename from _images/styleGAN_fig4.png
rename to _images/StyleGAN_fig4.png
diff --git a/_images/styleGAN_fig5.png b/_images/StyleGAN_fig5.png
old mode 100644
new mode 100755
similarity index 100%
rename from _images/styleGAN_fig5.png
rename to _images/StyleGAN_fig5.png
diff --git a/_images/styleGAN_fig6.png b/_images/StyleGAN_fig6.png
old mode 100644
new mode 100755
similarity index 100%
rename from _images/styleGAN_fig6.png
rename to _images/StyleGAN_fig6.png
diff --git a/_images/styleGAN_fig7.png b/_images/StyleGAN_fig7.png
old mode 100644
new mode 100755
similarity index 100%
rename from _images/styleGAN_fig7.png
rename to _images/StyleGAN_fig7.png
diff --git a/_images/styleGAN_fig8.png b/_images/StyleGAN_fig8.png
old mode 100644
new mode 100755
similarity index 100%
rename from _images/styleGAN_fig8.png
rename to _images/StyleGAN_fig8.png
diff --git a/_images/TEXTUALINVERSION.png b/_images/TEXTUALINVERSION.png
old mode 100644
new mode 100755
diff --git a/_images/Unet.png b/_images/Unet.png
old mode 100644
new mode 100755
diff --git a/_images/Untitled.png b/_images/Untitled.png
old mode 100644
new mode 100755
diff --git a/_images/Untitled1.png b/_images/Untitled1.png
old mode 100644
new mode 100755
diff --git a/_images/Untitled10.png b/_images/Untitled10.png
old mode 100644
new mode 100755
diff --git a/_images/Untitled11.png b/_images/Untitled11.png
old mode 100644
new mode 100755
diff --git a/_images/Untitled111.png b/_images/Untitled111.png
old mode 100644
new mode 100755
diff --git a/_images/Untitled2.png b/_images/Untitled2.png
old mode 100644
new mode 100755
diff --git a/_images/Untitled3.png b/_images/Untitled3.png
old mode 100644
new mode 100755
diff --git a/_images/Untitled31.png b/_images/Untitled31.png
old mode 100644
new mode 100755
diff --git a/_images/Untitled4.png b/_images/Untitled4.png
old mode 100644
new mode 100755
diff --git a/_images/Untitled5.png b/_images/Untitled5.png
old mode 100644
new mode 100755
diff --git a/_images/Untitled6.png b/_images/Untitled6.png
old mode 100644
new mode 100755
diff --git a/_images/Untitled7.png b/_images/Untitled7.png
old mode 100644
new mode 100755
diff --git a/_images/Untitled8.png b/_images/Untitled8.png
old mode 100644
new mode 100755
diff --git a/_images/Untitled9.png b/_images/Untitled9.png
old mode 100644
new mode 100755
diff --git a/_images/Untitled_1.png b/_images/Untitled_1.png
old mode 100644
new mode 100755
diff --git a/_images/Untitled_10.png b/_images/Untitled_10.png
old mode 100644
new mode 100755
diff --git a/_images/Untitled_12.png b/_images/Untitled_12.png
old mode 100644
new mode 100755
diff --git a/_images/Untitled_2.png b/_images/Untitled_2.png
old mode 100644
new mode 100755
diff --git a/_images/Untitled_3.png b/_images/Untitled_3.png
old mode 100644
new mode 100755
diff --git a/_images/Untitled_4.png b/_images/Untitled_4.png
old mode 100644
new mode 100755
diff --git a/_images/Untitled_6.png b/_images/Untitled_6.png
old mode 100644
new mode 100755
diff --git a/_images/Untitled_8.png b/_images/Untitled_8.png
old mode 100644
new mode 100755
diff --git a/_images/Untitled_9.png b/_images/Untitled_9.png
old mode 100644
new mode 100755
diff --git a/_images/adagn_table.png b/_images/adagn_table.png
old mode 100644
new mode 100755
diff --git a/_images/algorithm.png b/_images/algorithm.png
old mode 100644
new mode 100755
diff --git a/_images/animation.png b/_images/animation.png
old mode 100644
new mode 100755
diff --git a/_images/architect_1.png b/_images/architect_1.png
old mode 100644
new mode 100755
diff --git a/_images/architect_2.png b/_images/architect_2.png
old mode 100644
new mode 100755
diff --git a/_images/architect_3.png b/_images/architect_3.png
old mode 100644
new mode 100755
diff --git a/_images/architecture.png b/_images/architecture.png
old mode 100644
new mode 100755
diff --git a/_images/architecture2.png b/_images/architecture2.png
old mode 100644
new mode 100755
diff --git a/_images/attention3d.png b/_images/attention3d.png
old mode 100644
new mode 100755
diff --git a/_images/block.png b/_images/block.png
old mode 100644
new mode 100755
diff --git a/_images/cascaded_dms.png b/_images/cascaded_dms.png
old mode 100644
new mode 100755
diff --git a/_images/cat.png b/_images/cat.png
old mode 100644
new mode 100755
diff --git a/_images/class_eq1.png b/_images/class_eq1.png
old mode 100644
new mode 100755
diff --git a/_images/class_eq2.png b/_images/class_eq2.png
old mode 100644
new mode 100755
diff --git a/_images/classifier_guidance_vis.png b/_images/classifier_guidance_vis.png
old mode 100644
new mode 100755
diff --git a/_images/clip.png b/_images/clip.png
old mode 100644
new mode 100755
diff --git a/_images/cm3leon_result.png b/_images/cm3leon_result.png
old mode 100644
new mode 100755
diff --git a/_images/coin3d_01.png b/_images/coin3d_01.png
new file mode 100755
index 00000000..653c2a2c
Binary files /dev/null and b/_images/coin3d_01.png differ
diff --git a/_images/coin3d_02.png b/_images/coin3d_02.png
new file mode 100755
index 00000000..bde73044
Binary files /dev/null and b/_images/coin3d_02.png differ
diff --git a/_images/coin3d_03.png b/_images/coin3d_03.png
new file mode 100755
index 00000000..f027cfb2
Binary files /dev/null and b/_images/coin3d_03.png differ
diff --git a/_images/coin3d_04.png b/_images/coin3d_04.png
new file mode 100755
index 00000000..9903cf9b
Binary files /dev/null and b/_images/coin3d_04.png differ
diff --git a/_images/coin3d_05.png b/_images/coin3d_05.png
new file mode 100755
index 00000000..d9821f98
Binary files /dev/null and b/_images/coin3d_05.png differ
diff --git a/_images/coin3d_06.png b/_images/coin3d_06.png
new file mode 100755
index 00000000..4783f237
Binary files /dev/null and b/_images/coin3d_06.png differ
diff --git a/_images/coin3d_07.png b/_images/coin3d_07.png
new file mode 100755
index 00000000..8171360f
Binary files /dev/null and b/_images/coin3d_07.png differ
diff --git a/_images/coin3d_08.png b/_images/coin3d_08.png
new file mode 100755
index 00000000..84c784eb
Binary files /dev/null and b/_images/coin3d_08.png differ
diff --git a/_images/coin3d_09.png b/_images/coin3d_09.png
new file mode 100755
index 00000000..9d566f60
Binary files /dev/null and b/_images/coin3d_09.png differ
diff --git a/_images/coin3d_10.png b/_images/coin3d_10.png
new file mode 100755
index 00000000..f323194a
Binary files /dev/null and b/_images/coin3d_10.png differ
diff --git a/_images/coin3d_11.png b/_images/coin3d_11.png
new file mode 100755
index 00000000..66e99d95
Binary files /dev/null and b/_images/coin3d_11.png differ
diff --git a/_images/compare_table.png b/_images/compare_table.png
old mode 100644
new mode 100755
diff --git a/_images/consistency_models_01.png b/_images/consistency_models_01.png
old mode 100644
new mode 100755
diff --git a/_images/consistency_models_02.png b/_images/consistency_models_02.png
old mode 100644
new mode 100755
diff --git a/_images/consistency_models_03.png b/_images/consistency_models_03.png
old mode 100644
new mode 100755
diff --git a/_images/consistency_models_04.png b/_images/consistency_models_04.png
old mode 100644
new mode 100755
diff --git a/_images/consistency_models_05.png b/_images/consistency_models_05.png
old mode 100644
new mode 100755
diff --git a/_images/consistency_models_06.png b/_images/consistency_models_06.png
old mode 100644
new mode 100755
diff --git a/_images/consistency_models_07.png b/_images/consistency_models_07.png
old mode 100644
new mode 100755
diff --git a/_images/consistency_models_08.png b/_images/consistency_models_08.png
old mode 100644
new mode 100755
diff --git a/_images/consistency_models_09.png b/_images/consistency_models_09.png
old mode 100644
new mode 100755
diff --git a/_images/consistency_models_10.png b/_images/consistency_models_10.png
old mode 100644
new mode 100755
diff --git a/_images/consistency_models_11.png b/_images/consistency_models_11.png
old mode 100644
new mode 100755
diff --git a/_images/consistency_models_12.png b/_images/consistency_models_12.png
old mode 100644
new mode 100755
diff --git a/_images/consistency_models_13.png b/_images/consistency_models_13.png
old mode 100644
new mode 100755
diff --git a/_images/consistency_models_14.png b/_images/consistency_models_14.png
old mode 100644
new mode 100755
diff --git a/_images/consistency_models_15.png b/_images/consistency_models_15.png
old mode 100644
new mode 100755
diff --git a/_images/consistency_models_16.png b/_images/consistency_models_16.png
old mode 100644
new mode 100755
diff --git a/_images/consistency_models_17.png b/_images/consistency_models_17.png
old mode 100644
new mode 100755
diff --git a/_images/consistency_models_18.png b/_images/consistency_models_18.png
old mode 100644
new mode 100755
diff --git a/_images/consistency_models_19.png b/_images/consistency_models_19.png
old mode 100644
new mode 100755
diff --git a/_images/consistency_models_20.png b/_images/consistency_models_20.png
old mode 100644
new mode 100755
diff --git a/_images/consistency_models_21.png b/_images/consistency_models_21.png
old mode 100644
new mode 100755
diff --git a/_images/consistency_models_22.png b/_images/consistency_models_22.png
old mode 100644
new mode 100755
diff --git a/_images/consistency_models_23.png b/_images/consistency_models_23.png
old mode 100644
new mode 100755
diff --git a/_images/consistency_models_24.png b/_images/consistency_models_24.png
old mode 100644
new mode 100755
diff --git a/_images/consistency_models_25.png b/_images/consistency_models_25.png
old mode 100644
new mode 100755
diff --git a/_images/consistency_models_26.png b/_images/consistency_models_26.png
old mode 100644
new mode 100755
diff --git a/_images/conv3d.png b/_images/conv3d.png
old mode 100644
new mode 100755
diff --git a/_images/ddim_pipe.png b/_images/ddim_pipe.png
old mode 100644
new mode 100755
diff --git a/_images/ddpm_pipeline.png b/_images/ddpm_pipeline.png
old mode 100644
new mode 100755
diff --git a/_images/deer.png b/_images/deer.png
old mode 100644
new mode 100755
diff --git a/_images/dreambooth_01.png b/_images/dreambooth_01.png
old mode 100644
new mode 100755
diff --git a/_images/dreambooth_02.png b/_images/dreambooth_02.png
old mode 100644
new mode 100755
diff --git a/_images/dreambooth_03.png b/_images/dreambooth_03.png
old mode 100644
new mode 100755
diff --git a/_images/dreambooth_04.png b/_images/dreambooth_04.png
old mode 100644
new mode 100755
diff --git a/_images/dreambooth_05.png b/_images/dreambooth_05.png
old mode 100644
new mode 100755
diff --git a/_images/dreambooth_06.png b/_images/dreambooth_06.png
old mode 100644
new mode 100755
diff --git a/_images/dreambooth_07.png b/_images/dreambooth_07.png
old mode 100644
new mode 100755
diff --git a/_images/dreambooth_08.png b/_images/dreambooth_08.png
old mode 100644
new mode 100755
diff --git a/_images/dreambooth_09.png b/_images/dreambooth_09.png
old mode 100644
new mode 100755
diff --git a/_images/dreamfieldfig1.png b/_images/dreamfieldfig1.png
new file mode 100755
index 00000000..b12ed913
Binary files /dev/null and b/_images/dreamfieldfig1.png differ
diff --git a/_images/eDiff-I.png b/_images/eDiff-I.png
old mode 100644
new mode 100755
diff --git a/_images/efficiency.png b/_images/efficiency.png
old mode 100644
new mode 100755
diff --git a/_images/einops.png b/_images/einops.png
old mode 100644
new mode 100755
diff --git a/_images/eq_1.png b/_images/eq_1.png
old mode 100644
new mode 100755
diff --git a/_images/eq_11.png b/_images/eq_11.png
old mode 100644
new mode 100755
diff --git a/_images/evalution.png b/_images/evalution.png
old mode 100644
new mode 100755
diff --git a/_images/exp-1.png b/_images/exp-1.png
old mode 100644
new mode 100755
diff --git a/_images/exp-2.png b/_images/exp-2.png
old mode 100644
new mode 100755
diff --git a/_images/exp-3.png b/_images/exp-3.png
old mode 100644
new mode 100755
diff --git a/_images/exp-4.png b/_images/exp-4.png
old mode 100644
new mode 100755
diff --git a/_images/exp-5.png b/_images/exp-5.png
old mode 100644
new mode 100755
diff --git a/_images/experiment1.png b/_images/experiment1.png
old mode 100644
new mode 100755
diff --git a/_images/f637f3b3-9e18-48d1-946a-784830e6fb98.png b/_images/f637f3b3-9e18-48d1-946a-784830e6fb98.png
new file mode 100755
index 00000000..0e1c70c6
Binary files /dev/null and b/_images/f637f3b3-9e18-48d1-946a-784830e6fb98.png differ
diff --git a/_images/fed6a1b4-97d4-4ef7-ab99-6ac3cef4bbbd.png b/_images/fed6a1b4-97d4-4ef7-ab99-6ac3cef4bbbd.png
new file mode 100755
index 00000000..8232902b
Binary files /dev/null and b/_images/fed6a1b4-97d4-4ef7-ab99-6ac3cef4bbbd.png differ
diff --git a/_images/fig1.png b/_images/fig1.png
old mode 100644
new mode 100755
diff --git a/_images/fig10.png b/_images/fig10.png
old mode 100644
new mode 100755
diff --git a/_images/fig11.png b/_images/fig11.png
old mode 100644
new mode 100755
diff --git a/_images/fig12.png b/_images/fig12.png
old mode 100644
new mode 100755
diff --git a/_images/fig13.png b/_images/fig13.png
old mode 100644
new mode 100755
diff --git a/_images/fig14.png b/_images/fig14.png
old mode 100644
new mode 100755
diff --git a/_images/fig15.png b/_images/fig15.png
old mode 100644
new mode 100755
diff --git a/_images/fig16.png b/_images/fig16.png
old mode 100644
new mode 100755
diff --git a/_images/fig2.png b/_images/fig2.png
old mode 100644
new mode 100755
diff --git a/_images/fig21.png b/_images/fig21.png
old mode 100644
new mode 100755
diff --git a/_images/fig3.png b/_images/fig3.png
old mode 100644
new mode 100755
diff --git a/_images/fig4.gif b/_images/fig4.gif
old mode 100644
new mode 100755
diff --git a/_images/fig4.png b/_images/fig4.png
old mode 100644
new mode 100755
diff --git a/_images/fig5.png b/_images/fig5.png
old mode 100644
new mode 100755
diff --git a/_images/fig51.png b/_images/fig51.png
old mode 100644
new mode 100755
diff --git a/_images/fig6.png b/_images/fig6.png
old mode 100644
new mode 100755
diff --git a/_images/fig61.png b/_images/fig61.png
old mode 100644
new mode 100755
diff --git a/_images/fig7.png b/_images/fig7.png
old mode 100644
new mode 100755
diff --git a/_images/fig71.png b/_images/fig71.png
old mode 100644
new mode 100755
diff --git a/_images/fig8.png b/_images/fig8.png
old mode 100644
new mode 100755
diff --git a/_images/fig81.png b/_images/fig81.png
old mode 100644
new mode 100755
diff --git a/_images/fig9.png b/_images/fig9.png
old mode 100644
new mode 100755
diff --git a/_images/fig91.png b/_images/fig91.png
old mode 100644
new mode 100755
diff --git a/_images/fig_1.png b/_images/fig_1.png
old mode 100644
new mode 100755
diff --git a/_images/fig_10.png b/_images/fig_10.png
old mode 100644
new mode 100755
diff --git a/_images/fig_11.png b/_images/fig_11.png
old mode 100644
new mode 100755
diff --git a/_images/fig_13.png b/_images/fig_13.png
old mode 100644
new mode 100755
diff --git a/_images/fig_131.png b/_images/fig_131.png
old mode 100644
new mode 100755
diff --git a/_images/fig_2.png b/_images/fig_2.png
old mode 100644
new mode 100755
diff --git a/_images/fig_3.png b/_images/fig_3.png
old mode 100644
new mode 100755
diff --git a/_images/fig_31.png b/_images/fig_31.png
old mode 100644
new mode 100755
diff --git a/_images/fig_4.png b/_images/fig_4.png
old mode 100644
new mode 100755
diff --git a/_images/fig_41.png b/_images/fig_41.png
old mode 100644
new mode 100755
diff --git a/_images/fig_5.png b/_images/fig_5.png
old mode 100644
new mode 100755
diff --git a/_images/fig_6.png b/_images/fig_6.png
old mode 100644
new mode 100755
diff --git a/_images/fig_61.png b/_images/fig_61.png
old mode 100644
new mode 100755
diff --git a/_images/fig_7.png b/_images/fig_7.png
old mode 100644
new mode 100755
diff --git a/_images/fig_8.png b/_images/fig_8.png
old mode 100644
new mode 100755
diff --git a/_images/figure1.1.png b/_images/figure1.1.png
old mode 100644
new mode 100755
diff --git a/_images/figure1.png b/_images/figure1.png
old mode 100644
new mode 100755
diff --git a/_images/figure11.png b/_images/figure11.png
old mode 100644
new mode 100755
diff --git a/_images/figure2.png b/_images/figure2.png
old mode 100644
new mode 100755
diff --git a/_images/figure3.10.png b/_images/figure3.10.png
old mode 100644
new mode 100755
diff --git a/_images/figure3.3.png b/_images/figure3.3.png
old mode 100644
new mode 100755
diff --git a/_images/figure3.8.png b/_images/figure3.8.png
old mode 100644
new mode 100755
diff --git a/_images/figure3.9.png b/_images/figure3.9.png
old mode 100644
new mode 100755
diff --git a/_images/figure3.png b/_images/figure3.png
old mode 100644
new mode 100755
diff --git a/_images/figure4.1.png b/_images/figure4.1.png
old mode 100644
new mode 100755
diff --git a/_images/figure4.10.png b/_images/figure4.10.png
old mode 100644
new mode 100755
diff --git a/_images/figure4.11.png b/_images/figure4.11.png
old mode 100644
new mode 100755
diff --git a/_images/figure4.12.png b/_images/figure4.12.png
old mode 100644
new mode 100755
diff --git a/_images/figure4.6.png b/_images/figure4.6.png
old mode 100644
new mode 100755
diff --git a/_images/figure4.7.png b/_images/figure4.7.png
old mode 100644
new mode 100755
diff --git a/_images/figure4.8.png b/_images/figure4.8.png
old mode 100644
new mode 100755
diff --git a/_images/figure4.9.png b/_images/figure4.9.png
old mode 100644
new mode 100755
diff --git a/_images/figure4.png b/_images/figure4.png
old mode 100644
new mode 100755
diff --git a/_images/figure41.png b/_images/figure41.png
old mode 100644
new mode 100755
diff --git a/_images/figure5.1.png b/_images/figure5.1.png
old mode 100644
new mode 100755
diff --git a/_images/figure5.2.png b/_images/figure5.2.png
old mode 100644
new mode 100755
diff --git a/_images/figure5.3.png b/_images/figure5.3.png
old mode 100644
new mode 100755
diff --git a/_images/figure5.4.png b/_images/figure5.4.png
old mode 100644
new mode 100755
diff --git a/_images/figure5.5.png b/_images/figure5.5.png
old mode 100644
new mode 100755
diff --git a/_images/figure5.6.png b/_images/figure5.6.png
old mode 100644
new mode 100755
diff --git a/_images/figure5.7.png b/_images/figure5.7.png
old mode 100644
new mode 100755
diff --git a/_images/figure5.png b/_images/figure5.png
old mode 100644
new mode 100755
diff --git a/_images/figure6.png b/_images/figure6.png
old mode 100644
new mode 100755
diff --git a/_images/figure61.png b/_images/figure61.png
old mode 100644
new mode 100755
diff --git a/_images/figure7.png b/_images/figure7.png
old mode 100644
new mode 100755
diff --git a/_images/figure71.png b/_images/figure71.png
old mode 100644
new mode 100755
diff --git a/_images/figure8.png b/_images/figure8.png
old mode 100644
new mode 100755
diff --git a/_images/figure_1.png b/_images/figure_1.png
old mode 100644
new mode 100755
diff --git a/_images/figure_16.png b/_images/figure_16.png
old mode 100644
new mode 100755
diff --git a/_images/figure_2.png b/_images/figure_2.png
old mode 100644
new mode 100755
diff --git a/_images/figure_21.png b/_images/figure_21.png
old mode 100644
new mode 100755
diff --git a/_images/figure_3.png b/_images/figure_3.png
old mode 100644
new mode 100755
diff --git a/_images/figure_31.png b/_images/figure_31.png
old mode 100644
new mode 100755
diff --git a/_images/figure_4.png b/_images/figure_4.png
old mode 100644
new mode 100755
diff --git a/_images/figure_41.png b/_images/figure_41.png
old mode 100644
new mode 100755
diff --git a/_images/figure_5.png b/_images/figure_5.png
old mode 100644
new mode 100755
diff --git a/_images/figure_51.png b/_images/figure_51.png
old mode 100644
new mode 100755
diff --git a/_images/figure_52.png b/_images/figure_52.png
old mode 100644
new mode 100755
diff --git a/_images/figure_6.png b/_images/figure_6.png
old mode 100644
new mode 100755
diff --git a/_images/figure_61.png b/_images/figure_61.png
old mode 100644
new mode 100755
diff --git a/_images/figure_6_1.png b/_images/figure_6_1.png
old mode 100644
new mode 100755
diff --git a/_images/figure_7.png b/_images/figure_7.png
old mode 100644
new mode 100755
diff --git a/_images/figure_8_9.png b/_images/figure_8_9.png
old mode 100644
new mode 100755
diff --git a/_images/gan_01.png b/_images/gan_01.png
old mode 100644
new mode 100755
diff --git a/_images/gan_02.png b/_images/gan_02.png
old mode 100644
new mode 100755
diff --git a/_images/gan_03.png b/_images/gan_03.png
old mode 100644
new mode 100755
diff --git a/_images/gan_04.png b/_images/gan_04.png
old mode 100644
new mode 100755
diff --git a/_images/gan_05.png b/_images/gan_05.png
old mode 100644
new mode 100755
diff --git a/_images/glide1.png b/_images/glide1.png
old mode 100644
new mode 100755
diff --git a/_images/glide10.png b/_images/glide10.png
old mode 100644
new mode 100755
diff --git a/_images/glide12.png b/_images/glide12.png
old mode 100644
new mode 100755
diff --git a/_images/glide13.png b/_images/glide13.png
old mode 100644
new mode 100755
diff --git a/_images/glide14.png b/_images/glide14.png
old mode 100644
new mode 100755
diff --git a/_images/glide15.png b/_images/glide15.png
old mode 100644
new mode 100755
diff --git a/_images/glide2.png b/_images/glide2.png
old mode 100644
new mode 100755
diff --git a/_images/glide5.png b/_images/glide5.png
old mode 100644
new mode 100755
diff --git a/_images/glide6.png b/_images/glide6.png
old mode 100644
new mode 100755
diff --git a/_images/glide7.png b/_images/glide7.png
old mode 100644
new mode 100755
diff --git a/_images/glide8.png b/_images/glide8.png
old mode 100644
new mode 100755
diff --git a/_images/glide9.png b/_images/glide9.png
old mode 100644
new mode 100755
diff --git a/_images/hyperdreambooth_01.png b/_images/hyperdreambooth_01.png
old mode 100644
new mode 100755
diff --git a/_images/hyperdreambooth_02.png b/_images/hyperdreambooth_02.png
old mode 100644
new mode 100755
diff --git a/_images/hyperdreambooth_03.png b/_images/hyperdreambooth_03.png
old mode 100644
new mode 100755
diff --git a/_images/hyperdreambooth_04.png b/_images/hyperdreambooth_04.png
old mode 100644
new mode 100755
diff --git a/_images/hyperdreambooth_05.png b/_images/hyperdreambooth_05.png
old mode 100644
new mode 100755
diff --git a/_images/hyperdreambooth_06.png b/_images/hyperdreambooth_06.png
old mode 100644
new mode 100755
diff --git a/_images/hyperdreambooth_07.png b/_images/hyperdreambooth_07.png
old mode 100644
new mode 100755
diff --git a/_images/hyperdreambooth_08.png b/_images/hyperdreambooth_08.png
old mode 100644
new mode 100755
diff --git a/_images/hyperdreambooth_09.png b/_images/hyperdreambooth_09.png
old mode 100644
new mode 100755
diff --git a/_images/hyperdreambooth_10.png b/_images/hyperdreambooth_10.png
old mode 100644
new mode 100755
diff --git a/_images/hyperdreambooth_11.png b/_images/hyperdreambooth_11.png
old mode 100644
new mode 100755
diff --git a/_images/illustration.png b/_images/illustration.png
old mode 100644
new mode 100755
diff --git a/_images/image%2011.png b/_images/image%2011.png
new file mode 100755
index 00000000..d7e3a8f0
Binary files /dev/null and b/_images/image%2011.png differ
diff --git a/_images/image%2012.png b/_images/image%2012.png
new file mode 100755
index 00000000..d324cf2a
Binary files /dev/null and b/_images/image%2012.png differ
diff --git a/_images/image%2013.png b/_images/image%2013.png
new file mode 100755
index 00000000..c1d2c362
Binary files /dev/null and b/_images/image%2013.png differ
diff --git a/_images/image%2014.png b/_images/image%2014.png
new file mode 100755
index 00000000..30f76d31
Binary files /dev/null and b/_images/image%2014.png differ
diff --git a/_images/image%2015.png b/_images/image%2015.png
new file mode 100755
index 00000000..b3f8a257
Binary files /dev/null and b/_images/image%2015.png differ
diff --git a/_images/image%2017.png b/_images/image%2017.png
new file mode 100755
index 00000000..b8505d12
Binary files /dev/null and b/_images/image%2017.png differ
diff --git a/_images/image%202.png b/_images/image%202.png
new file mode 100755
index 00000000..683e90e8
Binary files /dev/null and b/_images/image%202.png differ
diff --git a/_images/image%203.png b/_images/image%203.png
new file mode 100755
index 00000000..91e364c9
Binary files /dev/null and b/_images/image%203.png differ
diff --git a/_images/image%204.png b/_images/image%204.png
new file mode 100755
index 00000000..d418cd6b
Binary files /dev/null and b/_images/image%204.png differ
diff --git a/_images/image(0).png b/_images/image(0).png
old mode 100644
new mode 100755
diff --git a/_images/image(1).png b/_images/image(1).png
old mode 100644
new mode 100755
diff --git a/_images/image(2).png b/_images/image(2).png
old mode 100644
new mode 100755
diff --git a/_images/image(3).png b/_images/image(3).png
old mode 100644
new mode 100755
diff --git a/_images/image(4).png b/_images/image(4).png
old mode 100644
new mode 100755
diff --git a/_images/image(5).png b/_images/image(5).png
old mode 100644
new mode 100755
diff --git a/_images/image(6).png b/_images/image(6).png
old mode 100644
new mode 100755
diff --git a/_images/image(7).png b/_images/image(7).png
old mode 100644
new mode 100755
diff --git a/_images/image(8).png b/_images/image(8).png
old mode 100644
new mode 100755
diff --git a/_images/image.png b/_images/image.png
old mode 100644
new mode 100755
diff --git a/_images/image0.png b/_images/image0.png
old mode 100644
new mode 100755
diff --git a/_images/image1.png b/_images/image1.png
old mode 100644
new mode 100755
diff --git a/_images/image10.png b/_images/image10.png
old mode 100644
new mode 100755
diff --git a/_images/image11.png b/_images/image11.png
old mode 100644
new mode 100755
diff --git a/_images/image111.png b/_images/image111.png
old mode 100644
new mode 100755
diff --git a/_images/image12.png b/_images/image12.png
old mode 100644
new mode 100755
diff --git a/_images/image121.png b/_images/image121.png
old mode 100644
new mode 100755
diff --git a/_images/image13.png b/_images/image13.png
old mode 100644
new mode 100755
diff --git a/_images/image131.png b/_images/image131.png
old mode 100644
new mode 100755
diff --git a/_images/image14.png b/_images/image14.png
old mode 100644
new mode 100755
diff --git a/_images/image15.png b/_images/image15.png
old mode 100644
new mode 100755
diff --git a/_images/image16.png b/_images/image16.png
old mode 100644
new mode 100755
diff --git a/_images/image17.png b/_images/image17.png
old mode 100644
new mode 100755
diff --git a/_images/image18.png b/_images/image18.png
old mode 100644
new mode 100755
diff --git a/_images/image181.png b/_images/image181.png
old mode 100644
new mode 100755
diff --git a/_images/image19.png b/_images/image19.png
old mode 100644
new mode 100755
diff --git a/_images/image191.png b/_images/image191.png
old mode 100644
new mode 100755
diff --git a/_images/image2.png b/_images/image2.png
old mode 100644
new mode 100755
diff --git a/_images/image20.png b/_images/image20.png
old mode 100644
new mode 100755
diff --git a/_images/image21.png b/_images/image21.png
old mode 100644
new mode 100755
diff --git a/_images/image211.png b/_images/image211.png
old mode 100644
new mode 100755
diff --git a/_images/image22.png b/_images/image22.png
old mode 100644
new mode 100755
diff --git a/_images/image23.png b/_images/image23.png
old mode 100644
new mode 100755
index 92e9adef..f17b5eb8
Binary files a/_images/image23.png and b/_images/image23.png differ
diff --git a/_images/image24.png b/_images/image24.png
old mode 100644
new mode 100755
diff --git a/_images/image3.png b/_images/image3.png
old mode 100644
new mode 100755
diff --git a/_images/image31.png b/_images/image31.png
old mode 100644
new mode 100755
diff --git a/_images/image4.png b/_images/image4.png
old mode 100644
new mode 100755
diff --git a/_images/image41.png b/_images/image41.png
old mode 100644
new mode 100755
diff --git a/_images/image5.png b/_images/image5.png
old mode 100644
new mode 100755
diff --git a/_images/image6.png b/_images/image6.png
old mode 100644
new mode 100755
diff --git a/_images/image61.png b/_images/image61.png
old mode 100644
new mode 100755
diff --git a/_images/image7.png b/_images/image7.png
old mode 100644
new mode 100755
diff --git a/_images/image71.png b/_images/image71.png
old mode 100644
new mode 100755
diff --git a/_images/image8.png b/_images/image8.png
old mode 100644
new mode 100755
diff --git a/_images/image81.png b/_images/image81.png
old mode 100644
new mode 100755
diff --git a/_images/image9.png b/_images/image9.png
old mode 100644
new mode 100755
diff --git a/_images/image_1.png b/_images/image_1.png
new file mode 100755
index 00000000..26299f4f
Binary files /dev/null and b/_images/image_1.png differ
diff --git a/_images/image_10.png b/_images/image_10.png
new file mode 100755
index 00000000..719cdba1
Binary files /dev/null and b/_images/image_10.png differ
diff --git a/_images/image_101.png b/_images/image_101.png
new file mode 100755
index 00000000..16d49986
Binary files /dev/null and b/_images/image_101.png differ
diff --git a/_images/image_11.png b/_images/image_11.png
new file mode 100755
index 00000000..835e03f4
Binary files /dev/null and b/_images/image_11.png differ
diff --git a/_images/image_111.png b/_images/image_111.png
new file mode 100755
index 00000000..055086b8
Binary files /dev/null and b/_images/image_111.png differ
diff --git a/_images/image_12.png b/_images/image_12.png
new file mode 100755
index 00000000..a632471e
Binary files /dev/null and b/_images/image_12.png differ
diff --git a/_images/image_121.png b/_images/image_121.png
new file mode 100755
index 00000000..f1d6ae7b
Binary files /dev/null and b/_images/image_121.png differ
diff --git a/_images/image_13.png b/_images/image_13.png
new file mode 100755
index 00000000..b77fe9f5
Binary files /dev/null and b/_images/image_13.png differ
diff --git a/_images/image_131.png b/_images/image_131.png
new file mode 100755
index 00000000..84a3b475
Binary files /dev/null and b/_images/image_131.png differ
diff --git a/_images/image_14.png b/_images/image_14.png
new file mode 100755
index 00000000..d2da7675
Binary files /dev/null and b/_images/image_14.png differ
diff --git a/_images/image_141.png b/_images/image_141.png
new file mode 100755
index 00000000..3d1eafe4
Binary files /dev/null and b/_images/image_141.png differ
diff --git a/_images/image_15.png b/_images/image_15.png
new file mode 100755
index 00000000..7e197e9c
Binary files /dev/null and b/_images/image_15.png differ
diff --git a/_images/image_151.png b/_images/image_151.png
new file mode 100755
index 00000000..169df153
Binary files /dev/null and b/_images/image_151.png differ
diff --git a/_images/image_16.png b/_images/image_16.png
new file mode 100755
index 00000000..0d3f092e
Binary files /dev/null and b/_images/image_16.png differ
diff --git a/_images/image_161.png b/_images/image_161.png
new file mode 100755
index 00000000..538812aa
Binary files /dev/null and b/_images/image_161.png differ
diff --git a/_images/image_17.png b/_images/image_17.png
new file mode 100755
index 00000000..c68e1506
Binary files /dev/null and b/_images/image_17.png differ
diff --git a/_images/image_171.png b/_images/image_171.png
new file mode 100755
index 00000000..69f07e17
Binary files /dev/null and b/_images/image_171.png differ
diff --git a/_images/image_18.png b/_images/image_18.png
new file mode 100755
index 00000000..5b39fc04
Binary files /dev/null and b/_images/image_18.png differ
diff --git a/_images/image_181.png b/_images/image_181.png
new file mode 100755
index 00000000..6eb24b79
Binary files /dev/null and b/_images/image_181.png differ
diff --git a/_images/image_19.png b/_images/image_19.png
new file mode 100755
index 00000000..e7c42fe6
Binary files /dev/null and b/_images/image_19.png differ
diff --git a/_images/image_2.png b/_images/image_2.png
new file mode 100755
index 00000000..62276650
Binary files /dev/null and b/_images/image_2.png differ
diff --git a/_images/image_21.png b/_images/image_21.png
new file mode 100755
index 00000000..410e2e76
Binary files /dev/null and b/_images/image_21.png differ
diff --git a/_images/image_22.png b/_images/image_22.png
new file mode 100755
index 00000000..685acb9b
Binary files /dev/null and b/_images/image_22.png differ
diff --git a/_images/image_23.png b/_images/image_23.png
new file mode 100755
index 00000000..e636965b
Binary files /dev/null and b/_images/image_23.png differ
diff --git a/_images/image_24.png b/_images/image_24.png
new file mode 100755
index 00000000..096fe207
Binary files /dev/null and b/_images/image_24.png differ
diff --git a/_images/image_25.png b/_images/image_25.png
new file mode 100755
index 00000000..7d250eba
Binary files /dev/null and b/_images/image_25.png differ
diff --git a/_images/image_26.png b/_images/image_26.png
new file mode 100755
index 00000000..c2b83e7f
Binary files /dev/null and b/_images/image_26.png differ
diff --git a/_images/image_27.png b/_images/image_27.png
new file mode 100755
index 00000000..b5d95e4a
Binary files /dev/null and b/_images/image_27.png differ
diff --git a/_images/image_28.png b/_images/image_28.png
new file mode 100755
index 00000000..51d5aa89
Binary files /dev/null and b/_images/image_28.png differ
diff --git a/_images/image_29.png b/_images/image_29.png
new file mode 100755
index 00000000..7de3b645
Binary files /dev/null and b/_images/image_29.png differ
diff --git a/_images/image_3.png b/_images/image_3.png
new file mode 100755
index 00000000..4ae123b9
Binary files /dev/null and b/_images/image_3.png differ
diff --git a/_images/image_30.png b/_images/image_30.png
new file mode 100755
index 00000000..b34af1ba
Binary files /dev/null and b/_images/image_30.png differ
diff --git a/_images/image_31.png b/_images/image_31.png
new file mode 100755
index 00000000..76a89370
Binary files /dev/null and b/_images/image_31.png differ
diff --git a/_images/image_32.png b/_images/image_32.png
new file mode 100755
index 00000000..c4ca45f4
Binary files /dev/null and b/_images/image_32.png differ
diff --git a/_images/image_33.png b/_images/image_33.png
new file mode 100755
index 00000000..556ca308
Binary files /dev/null and b/_images/image_33.png differ
diff --git a/_images/image_34.png b/_images/image_34.png
new file mode 100755
index 00000000..ff459da1
Binary files /dev/null and b/_images/image_34.png differ
diff --git a/_images/image_35.png b/_images/image_35.png
new file mode 100755
index 00000000..b2d6896b
Binary files /dev/null and b/_images/image_35.png differ
diff --git a/_images/image_36.png b/_images/image_36.png
new file mode 100755
index 00000000..a55dad90
Binary files /dev/null and b/_images/image_36.png differ
diff --git a/_images/image_37.png b/_images/image_37.png
new file mode 100755
index 00000000..8be2ec76
Binary files /dev/null and b/_images/image_37.png differ
diff --git a/_images/image_38.png b/_images/image_38.png
new file mode 100755
index 00000000..ed4a882b
Binary files /dev/null and b/_images/image_38.png differ
diff --git a/_images/image_39.png b/_images/image_39.png
new file mode 100755
index 00000000..c5bbfe12
Binary files /dev/null and b/_images/image_39.png differ
diff --git a/_images/image_4.png b/_images/image_4.png
new file mode 100755
index 00000000..883fac02
Binary files /dev/null and b/_images/image_4.png differ
diff --git a/_images/image_40.png b/_images/image_40.png
new file mode 100755
index 00000000..152ab77c
Binary files /dev/null and b/_images/image_40.png differ
diff --git a/_images/image_5.png b/_images/image_5.png
new file mode 100755
index 00000000..f17b5eb8
Binary files /dev/null and b/_images/image_5.png differ
diff --git a/_images/image_6.png b/_images/image_6.png
new file mode 100755
index 00000000..2baffdf6
Binary files /dev/null and b/_images/image_6.png differ
diff --git a/_images/image_7.png b/_images/image_7.png
new file mode 100755
index 00000000..72bed245
Binary files /dev/null and b/_images/image_7.png differ
diff --git a/_images/image_8.png b/_images/image_8.png
new file mode 100755
index 00000000..af3daa7b
Binary files /dev/null and b/_images/image_8.png differ
diff --git a/_images/image_81.png b/_images/image_81.png
new file mode 100755
index 00000000..e18b49d9
Binary files /dev/null and b/_images/image_81.png differ
diff --git a/_images/image_9.png b/_images/image_9.png
new file mode 100755
index 00000000..9342ff9e
Binary files /dev/null and b/_images/image_9.png differ
diff --git a/_images/image_91.png b/_images/image_91.png
new file mode 100755
index 00000000..25d172a4
Binary files /dev/null and b/_images/image_91.png differ
diff --git a/_images/imagen.png b/_images/imagen.png
old mode 100644
new mode 100755
diff --git a/_images/imagen_1.png b/_images/imagen_1.png
old mode 100644
new mode 100755
diff --git a/_images/imagen_10.png b/_images/imagen_10.png
old mode 100644
new mode 100755
diff --git a/_images/imagen_11.png b/_images/imagen_11.png
old mode 100644
new mode 100755
diff --git a/_images/imagen_12.png b/_images/imagen_12.png
old mode 100644
new mode 100755
diff --git a/_images/imagen_13.png b/_images/imagen_13.png
old mode 100644
new mode 100755
diff --git a/_images/imagen_2.png b/_images/imagen_2.png
old mode 100644
new mode 100755
diff --git a/_images/imagen_3.png b/_images/imagen_3.png
old mode 100644
new mode 100755
diff --git a/_images/imagen_5.png b/_images/imagen_5.png
old mode 100644
new mode 100755
diff --git a/_images/imagen_6.png b/_images/imagen_6.png
old mode 100644
new mode 100755
diff --git a/_images/imagen_7.png b/_images/imagen_7.png
old mode 100644
new mode 100755
diff --git a/_images/imagen_8.png b/_images/imagen_8.png
old mode 100644
new mode 100755
diff --git a/_images/imagen_9.png b/_images/imagen_9.png
old mode 100644
new mode 100755
diff --git a/_images/imagen_editor_01.png b/_images/imagen_editor_01.png
old mode 100644
new mode 100755
diff --git a/_images/imagen_editor_02.png b/_images/imagen_editor_02.png
old mode 100644
new mode 100755
diff --git a/_images/imagen_editor_03.png b/_images/imagen_editor_03.png
old mode 100644
new mode 100755
diff --git a/_images/imagen_editor_04.png b/_images/imagen_editor_04.png
old mode 100644
new mode 100755
diff --git a/_images/imagen_editor_05.png b/_images/imagen_editor_05.png
old mode 100644
new mode 100755
diff --git a/_images/imagen_editor_06.png b/_images/imagen_editor_06.png
old mode 100644
new mode 100755
diff --git a/_images/img.png b/_images/img.png
old mode 100644
new mode 100755
diff --git a/_images/img0.png b/_images/img0.png
old mode 100644
new mode 100755
diff --git a/_images/img01.png b/_images/img01.png
old mode 100644
new mode 100755
diff --git a/_images/img02.png b/_images/img02.png
old mode 100644
new mode 100755
diff --git a/_images/img03.png b/_images/img03.png
old mode 100644
new mode 100755
diff --git a/_images/img04.png b/_images/img04.png
old mode 100644
new mode 100755
diff --git a/_images/img1.png b/_images/img1.png
old mode 100644
new mode 100755
diff --git a/_images/img10.png b/_images/img10.png
old mode 100644
new mode 100755
diff --git a/_images/img101.png b/_images/img101.png
old mode 100644
new mode 100755
diff --git a/_images/img102.png b/_images/img102.png
old mode 100644
new mode 100755
diff --git a/_images/img103.png b/_images/img103.png
old mode 100644
new mode 100755
diff --git a/_images/img11.png b/_images/img11.png
old mode 100644
new mode 100755
diff --git a/_images/img110.png b/_images/img110.png
old mode 100644
new mode 100755
diff --git a/_images/img111.png b/_images/img111.png
old mode 100644
new mode 100755
diff --git a/_images/img112.png b/_images/img112.png
old mode 100644
new mode 100755
diff --git a/_images/img113.png b/_images/img113.png
old mode 100644
new mode 100755
diff --git a/_images/img114.png b/_images/img114.png
old mode 100644
new mode 100755
diff --git a/_images/img12.png b/_images/img12.png
old mode 100644
new mode 100755
diff --git a/_images/img121.png b/_images/img121.png
old mode 100644
new mode 100755
diff --git a/_images/img122.png b/_images/img122.png
old mode 100644
new mode 100755
diff --git a/_images/img123.png b/_images/img123.png
old mode 100644
new mode 100755
diff --git a/_images/img13.png b/_images/img13.png
old mode 100644
new mode 100755
diff --git a/_images/img131.png b/_images/img131.png
old mode 100644
new mode 100755
diff --git a/_images/img132.png b/_images/img132.png
old mode 100644
new mode 100755
diff --git a/_images/img14.png b/_images/img14.png
old mode 100644
new mode 100755
diff --git a/_images/img141.png b/_images/img141.png
old mode 100644
new mode 100755
diff --git a/_images/img142.png b/_images/img142.png
old mode 100644
new mode 100755
diff --git a/_images/img15.png b/_images/img15.png
old mode 100644
new mode 100755
diff --git a/_images/img151.png b/_images/img151.png
old mode 100644
new mode 100755
diff --git a/_images/img152.png b/_images/img152.png
old mode 100644
new mode 100755
diff --git a/_images/img16.png b/_images/img16.png
old mode 100644
new mode 100755
diff --git a/_images/img161.png b/_images/img161.png
old mode 100644
new mode 100755
diff --git a/_images/img17.png b/_images/img17.png
old mode 100644
new mode 100755
diff --git a/_images/img171.png b/_images/img171.png
old mode 100644
new mode 100755
diff --git a/_images/img18.png b/_images/img18.png
old mode 100644
new mode 100755
diff --git a/_images/img181.png b/_images/img181.png
old mode 100644
new mode 100755
diff --git a/_images/img19.png b/_images/img19.png
old mode 100644
new mode 100755
diff --git a/_images/img2.png b/_images/img2.png
old mode 100644
new mode 100755
diff --git a/_images/img21.png b/_images/img21.png
old mode 100644
new mode 100755
diff --git a/_images/img22.png b/_images/img22.png
old mode 100644
new mode 100755
diff --git a/_images/img23.png b/_images/img23.png
old mode 100644
new mode 100755
diff --git a/_images/img24.png b/_images/img24.png
old mode 100644
new mode 100755
diff --git a/_images/img25.png b/_images/img25.png
old mode 100644
new mode 100755
diff --git a/_images/img26.png b/_images/img26.png
old mode 100644
new mode 100755
diff --git a/_images/img3.png b/_images/img3.png
old mode 100644
new mode 100755
diff --git a/_images/img31.png b/_images/img31.png
old mode 100644
new mode 100755
diff --git a/_images/img32.png b/_images/img32.png
old mode 100644
new mode 100755
diff --git a/_images/img33.png b/_images/img33.png
old mode 100644
new mode 100755
diff --git a/_images/img34.png b/_images/img34.png
old mode 100644
new mode 100755
diff --git a/_images/img35.png b/_images/img35.png
old mode 100644
new mode 100755
diff --git a/_images/img36.png b/_images/img36.png
old mode 100644
new mode 100755
diff --git a/_images/img4.png b/_images/img4.png
old mode 100644
new mode 100755
diff --git a/_images/img41.png b/_images/img41.png
old mode 100644
new mode 100755
diff --git a/_images/img42.png b/_images/img42.png
old mode 100644
new mode 100755
diff --git a/_images/img43.png b/_images/img43.png
old mode 100644
new mode 100755
diff --git a/_images/img44.png b/_images/img44.png
old mode 100644
new mode 100755
diff --git a/_images/img45.png b/_images/img45.png
old mode 100644
new mode 100755
diff --git a/_images/img5.png b/_images/img5.png
old mode 100644
new mode 100755
diff --git a/_images/img51.png b/_images/img51.png
old mode 100644
new mode 100755
diff --git a/_images/img52.png b/_images/img52.png
old mode 100644
new mode 100755
diff --git a/_images/img53.png b/_images/img53.png
old mode 100644
new mode 100755
diff --git a/_images/img54.png b/_images/img54.png
old mode 100644
new mode 100755
diff --git a/_images/img55.png b/_images/img55.png
old mode 100644
new mode 100755
diff --git a/_images/img6.png b/_images/img6.png
old mode 100644
new mode 100755
diff --git a/_images/img61.png b/_images/img61.png
old mode 100644
new mode 100755
diff --git a/_images/img62.png b/_images/img62.png
old mode 100644
new mode 100755
diff --git a/_images/img63.png b/_images/img63.png
old mode 100644
new mode 100755
diff --git a/_images/img64.png b/_images/img64.png
old mode 100644
new mode 100755
diff --git a/_images/img65.png b/_images/img65.png
old mode 100644
new mode 100755
diff --git a/_images/img7.png b/_images/img7.png
old mode 100644
new mode 100755
diff --git a/_images/img71.png b/_images/img71.png
old mode 100644
new mode 100755
diff --git a/_images/img72.png b/_images/img72.png
old mode 100644
new mode 100755
diff --git a/_images/img73.png b/_images/img73.png
old mode 100644
new mode 100755
diff --git a/_images/img74.png b/_images/img74.png
old mode 100644
new mode 100755
diff --git a/_images/img75.png b/_images/img75.png
old mode 100644
new mode 100755
diff --git a/_images/img8.png b/_images/img8.png
old mode 100644
new mode 100755
diff --git a/_images/img81.png b/_images/img81.png
old mode 100644
new mode 100755
diff --git a/_images/img82.png b/_images/img82.png
old mode 100644
new mode 100755
diff --git a/_images/img83.png b/_images/img83.png
old mode 100644
new mode 100755
diff --git a/_images/img84.png b/_images/img84.png
old mode 100644
new mode 100755
diff --git a/_images/img85.png b/_images/img85.png
old mode 100644
new mode 100755
diff --git a/_images/img9.png b/_images/img9.png
old mode 100644
new mode 100755
diff --git a/_images/img91.png b/_images/img91.png
old mode 100644
new mode 100755
diff --git a/_images/img92.png b/_images/img92.png
old mode 100644
new mode 100755
diff --git a/_images/img93.png b/_images/img93.png
old mode 100644
new mode 100755
diff --git a/_images/img94.png b/_images/img94.png
old mode 100644
new mode 100755
diff --git a/_images/img_00.png b/_images/img_00.png
old mode 100644
new mode 100755
diff --git a/_images/img_001.png b/_images/img_001.png
old mode 100644
new mode 100755
diff --git a/_images/img_002.png b/_images/img_002.png
old mode 100644
new mode 100755
diff --git a/_images/img_01.png b/_images/img_01.png
old mode 100644
new mode 100755
diff --git a/_images/img_011.png b/_images/img_011.png
old mode 100644
new mode 100755
diff --git a/_images/img_012.png b/_images/img_012.png
old mode 100644
new mode 100755
diff --git a/_images/img_02.png b/_images/img_02.png
old mode 100644
new mode 100755
diff --git a/_images/img_021.png b/_images/img_021.png
old mode 100644
new mode 100755
diff --git a/_images/img_022.png b/_images/img_022.png
old mode 100644
new mode 100755
diff --git a/_images/img_03.png b/_images/img_03.png
old mode 100644
new mode 100755
diff --git a/_images/img_031.png b/_images/img_031.png
old mode 100644
new mode 100755
diff --git a/_images/img_032.png b/_images/img_032.png
old mode 100644
new mode 100755
diff --git a/_images/img_04.png b/_images/img_04.png
old mode 100644
new mode 100755
diff --git a/_images/img_041.png b/_images/img_041.png
old mode 100644
new mode 100755
diff --git a/_images/img_05.png b/_images/img_05.png
old mode 100644
new mode 100755
diff --git a/_images/img_051.png b/_images/img_051.png
old mode 100644
new mode 100755
diff --git a/_images/img_06.png b/_images/img_06.png
old mode 100644
new mode 100755
diff --git a/_images/img_061.png b/_images/img_061.png
old mode 100644
new mode 100755
diff --git a/_images/img_062.png b/_images/img_062.png
old mode 100644
new mode 100755
diff --git a/_images/img_07.png b/_images/img_07.png
old mode 100644
new mode 100755
diff --git a/_images/img_071.png b/_images/img_071.png
old mode 100644
new mode 100755
diff --git a/_images/img_08.png b/_images/img_08.png
old mode 100644
new mode 100755
diff --git a/_images/img_081.png b/_images/img_081.png
old mode 100644
new mode 100755
diff --git a/_images/img_082.png b/_images/img_082.png
old mode 100644
new mode 100755
diff --git a/_images/img_09.png b/_images/img_09.png
old mode 100644
new mode 100755
diff --git a/_images/img_091.png b/_images/img_091.png
old mode 100644
new mode 100755
diff --git a/_images/img_092.png b/_images/img_092.png
old mode 100644
new mode 100755
diff --git a/_images/img_10.png b/_images/img_10.png
old mode 100644
new mode 100755
diff --git a/_images/img_101.png b/_images/img_101.png
old mode 100644
new mode 100755
diff --git a/_images/img_102.png b/_images/img_102.png
old mode 100644
new mode 100755
diff --git a/_images/img_11.png b/_images/img_11.png
old mode 100644
new mode 100755
diff --git a/_images/img_111.png b/_images/img_111.png
old mode 100644
new mode 100755
diff --git a/_images/img_112.png b/_images/img_112.png
old mode 100644
new mode 100755
diff --git a/_images/img_12.png b/_images/img_12.png
old mode 100644
new mode 100755
diff --git a/_images/img_121.png b/_images/img_121.png
old mode 100644
new mode 100755
diff --git a/_images/img_13.png b/_images/img_13.png
old mode 100644
new mode 100755
diff --git a/_images/img_131.png b/_images/img_131.png
old mode 100644
new mode 100755
diff --git a/_images/img_14.png b/_images/img_14.png
old mode 100644
new mode 100755
diff --git a/_images/img_141.png b/_images/img_141.png
old mode 100644
new mode 100755
diff --git a/_images/img_15.png b/_images/img_15.png
old mode 100644
new mode 100755
diff --git a/_images/img_151.png b/_images/img_151.png
old mode 100644
new mode 100755
diff --git a/_images/img_16.png b/_images/img_16.png
old mode 100644
new mode 100755
diff --git a/_images/img_161.png b/_images/img_161.png
old mode 100644
new mode 100755
diff --git a/_images/img_17.png b/_images/img_17.png
old mode 100644
new mode 100755
diff --git a/_images/img_171.png b/_images/img_171.png
old mode 100644
new mode 100755
diff --git a/_images/img_18.png b/_images/img_18.png
old mode 100644
new mode 100755
diff --git a/_images/img_181.png b/_images/img_181.png
old mode 100644
new mode 100755
diff --git a/_images/img_19.png b/_images/img_19.png
old mode 100644
new mode 100755
diff --git a/_images/img_191.png b/_images/img_191.png
old mode 100644
new mode 100755
diff --git a/_images/img_19_2.png b/_images/img_19_2.png
old mode 100644
new mode 100755
diff --git a/_images/img_20.png b/_images/img_20.png
old mode 100644
new mode 100755
diff --git a/_images/img_201.png b/_images/img_201.png
old mode 100644
new mode 100755
diff --git a/_images/img_21.png b/_images/img_21.png
old mode 100644
new mode 100755
diff --git a/_images/img_211.png b/_images/img_211.png
old mode 100644
new mode 100755
diff --git a/_images/img_22.png b/_images/img_22.png
old mode 100644
new mode 100755
diff --git a/_images/img_221.png b/_images/img_221.png
old mode 100644
new mode 100755
diff --git a/_images/img_23.png b/_images/img_23.png
old mode 100644
new mode 100755
diff --git a/_images/img_231.png b/_images/img_231.png
old mode 100644
new mode 100755
diff --git a/_images/img_24.png b/_images/img_24.png
old mode 100644
new mode 100755
diff --git a/_images/img_241.png b/_images/img_241.png
old mode 100644
new mode 100755
diff --git a/_images/img_25.png b/_images/img_25.png
old mode 100644
new mode 100755
diff --git a/_images/img_26.png b/_images/img_26.png
old mode 100644
new mode 100755
diff --git a/_images/img_261.png b/_images/img_261.png
old mode 100644
new mode 100755
diff --git a/_images/img_27.png b/_images/img_27.png
old mode 100644
new mode 100755
diff --git a/_images/img_271.png b/_images/img_271.png
old mode 100644
new mode 100755
diff --git a/_images/img_28.png b/_images/img_28.png
old mode 100644
new mode 100755
diff --git a/_images/img_281.png b/_images/img_281.png
old mode 100644
new mode 100755
diff --git a/_images/img_29.png b/_images/img_29.png
old mode 100644
new mode 100755
diff --git a/_images/img_291.png b/_images/img_291.png
old mode 100644
new mode 100755
diff --git a/_images/img_30.png b/_images/img_30.png
old mode 100644
new mode 100755
diff --git a/_images/img_301.png b/_images/img_301.png
old mode 100644
new mode 100755
diff --git a/_images/img_31.png b/_images/img_31.png
old mode 100644
new mode 100755
diff --git a/_images/img_32.png b/_images/img_32.png
old mode 100644
new mode 100755
diff --git a/_images/img_33.png b/_images/img_33.png
old mode 100644
new mode 100755
diff --git a/_images/img_34.png b/_images/img_34.png
old mode 100644
new mode 100755
diff --git a/_images/img_35.png b/_images/img_35.png
old mode 100644
new mode 100755
diff --git a/_images/img_36.png b/_images/img_36.png
old mode 100644
new mode 100755
diff --git a/_images/img_37.png b/_images/img_37.png
old mode 100644
new mode 100755
diff --git a/_images/img_38.png b/_images/img_38.png
old mode 100644
new mode 100755
diff --git a/_images/img_39.png b/_images/img_39.png
old mode 100644
new mode 100755
diff --git a/_images/img_40.png b/_images/img_40.png
old mode 100644
new mode 100755
diff --git a/_images/img_41.png b/_images/img_41.png
old mode 100644
new mode 100755
diff --git a/_images/img_42.png b/_images/img_42.png
old mode 100644
new mode 100755
diff --git a/_images/img_43.png b/_images/img_43.png
old mode 100644
new mode 100755
diff --git a/_images/img_44.png b/_images/img_44.png
old mode 100644
new mode 100755
diff --git a/_images/img_results.png b/_images/img_results.png
old mode 100644
new mode 100755
diff --git a/_images/improved_ddpm_eq.png b/_images/improved_ddpm_eq.png
old mode 100644
new mode 100755
diff --git a/_images/improved_ddpm_pic.png b/_images/improved_ddpm_pic.png
old mode 100644
new mode 100755
diff --git a/_images/inference_pipeline.png b/_images/inference_pipeline.png
old mode 100644
new mode 100755
diff --git a/_images/interpolation.png b/_images/interpolation.png
old mode 100644
new mode 100755
diff --git a/_images/langevin.gif b/_images/langevin.gif
new file mode 100755
index 00000000..964abb85
Binary files /dev/null and b/_images/langevin.gif differ
diff --git a/_images/layout_to_image.png b/_images/layout_to_image.png
old mode 100644
new mode 100755
diff --git a/_images/ldm_01.png b/_images/ldm_01.png
old mode 100644
new mode 100755
diff --git a/_images/ldm_02.png b/_images/ldm_02.png
old mode 100644
new mode 100755
diff --git a/_images/ldm_03.png b/_images/ldm_03.png
old mode 100644
new mode 100755
diff --git a/_images/ldm_04.png b/_images/ldm_04.png
old mode 100644
new mode 100755
diff --git a/_images/ldm_05.png b/_images/ldm_05.png
old mode 100644
new mode 100755
diff --git a/_images/ldm_06.png b/_images/ldm_06.png
old mode 100644
new mode 100755
diff --git a/_images/ldm_07.png b/_images/ldm_07.png
old mode 100644
new mode 100755
diff --git a/_images/ldm_08.png b/_images/ldm_08.png
old mode 100644
new mode 100755
diff --git a/_images/ldm_09.png b/_images/ldm_09.png
old mode 100644
new mode 100755
diff --git a/_images/leaf_db.png b/_images/leaf_db.png
old mode 100644
new mode 100755
diff --git a/_images/leaf_pp.png b/_images/leaf_pp.png
old mode 100644
new mode 100755
diff --git a/_images/leaf_sd.png b/_images/leaf_sd.png
old mode 100644
new mode 100755
diff --git a/_images/limit.png b/_images/limit.png
old mode 100644
new mode 100755
diff --git a/_images/limitation.png b/_images/limitation.png
old mode 100644
new mode 100755
diff --git a/_images/lora.png b/_images/lora.png
old mode 100644
new mode 100755
diff --git a/_images/loss.png b/_images/loss.png
old mode 100644
new mode 100755
diff --git a/_images/magic_3d_01.png b/_images/magic_3d_01.png
old mode 100644
new mode 100755
diff --git a/_images/magic_3d_02.png b/_images/magic_3d_02.png
old mode 100644
new mode 100755
diff --git a/_images/magic_3d_03.png b/_images/magic_3d_03.png
old mode 100644
new mode 100755
diff --git a/_images/magic_3d_04.png b/_images/magic_3d_04.png
old mode 100644
new mode 100755
diff --git a/_images/magic_3d_05.png b/_images/magic_3d_05.png
old mode 100644
new mode 100755
diff --git a/_images/magic_3d_06.png b/_images/magic_3d_06.png
new file mode 100755
index 00000000..2f4ce60c
Binary files /dev/null and b/_images/magic_3d_06.png differ
diff --git a/_images/magic_3d_07.png b/_images/magic_3d_07.png
old mode 100644
new mode 100755
diff --git a/_images/magic_3d_08.png b/_images/magic_3d_08.png
old mode 100644
new mode 100755
diff --git a/_images/magic_3d_09.png b/_images/magic_3d_09.png
old mode 100644
new mode 100755
diff --git a/_images/main.jpeg b/_images/main.jpeg
old mode 100644
new mode 100755
diff --git a/_images/maskgit_1.png b/_images/maskgit_1.png
old mode 100644
new mode 100755
diff --git a/_images/maskgit_2.png b/_images/maskgit_2.png
old mode 100644
new mode 100755
diff --git a/_images/mipnerf.png b/_images/mipnerf.png
old mode 100644
new mode 100755
diff --git a/_images/multi_aspect_ratio.png b/_images/multi_aspect_ratio.png
old mode 100644
new mode 100755
diff --git a/_images/multiple_db.png b/_images/multiple_db.png
old mode 100644
new mode 100755
diff --git a/_images/multiple_ex.png b/_images/multiple_ex.png
old mode 100644
new mode 100755
diff --git a/_images/multiple_pp.png b/_images/multiple_pp.png
old mode 100644
new mode 100755
diff --git a/_images/multiple_sd.png b/_images/multiple_sd.png
old mode 100644
new mode 100755
diff --git a/_images/nerf.png b/_images/nerf.png
old mode 100644
new mode 100755
diff --git a/_images/notebook-example_2_1.png b/_images/notebook-example_2_1.png
old mode 100644
new mode 100755
diff --git a/_images/photo_db.png b/_images/photo_db.png
old mode 100644
new mode 100755
diff --git a/_images/photo_pp.png b/_images/photo_pp.png
old mode 100644
new mode 100755
diff --git a/_images/photo_sd.png b/_images/photo_sd.png
old mode 100644
new mode 100755
diff --git a/_images/pirate.png b/_images/pirate.png
old mode 100644
new mode 100755
diff --git a/_images/plot_result.png b/_images/plot_result.png
old mode 100644
new mode 100755
diff --git a/_images/pose.png b/_images/pose.png
old mode 100644
new mode 100755
diff --git a/_images/progressive_distillation_01.png b/_images/progressive_distillation_01.png
old mode 100644
new mode 100755
diff --git a/_images/progressive_distillation_02.png b/_images/progressive_distillation_02.png
old mode 100644
new mode 100755
diff --git a/_images/progressive_distillation_03.png b/_images/progressive_distillation_03.png
old mode 100644
new mode 100755
diff --git a/_images/progressive_distillation_04.png b/_images/progressive_distillation_04.png
old mode 100644
new mode 100755
diff --git a/_images/progressive_distillation_05.png b/_images/progressive_distillation_05.png
old mode 100644
new mode 100755
diff --git a/_images/progressive_distillation_06.png b/_images/progressive_distillation_06.png
old mode 100644
new mode 100755
diff --git a/_images/progressive_distillation_07.png b/_images/progressive_distillation_07.png
old mode 100644
new mode 100755
diff --git a/_images/progressive_distillation_08.png b/_images/progressive_distillation_08.png
old mode 100644
new mode 100755
diff --git a/_images/progressive_distillation_09.png b/_images/progressive_distillation_09.png
old mode 100644
new mode 100755
diff --git a/_images/progressive_distillation_10.png b/_images/progressive_distillation_10.png
old mode 100644
new mode 100755
diff --git a/_images/progressive_distillation_11.png b/_images/progressive_distillation_11.png
old mode 100644
new mode 100755
diff --git a/_images/progressive_distillation_12.png b/_images/progressive_distillation_12.png
old mode 100644
new mode 100755
diff --git a/_images/progressive_distillation_13.png b/_images/progressive_distillation_13.png
old mode 100644
new mode 100755
diff --git a/_images/progressive_distillation_14.png b/_images/progressive_distillation_14.png
old mode 100644
new mode 100755
diff --git a/_images/progressive_distillation_15.png b/_images/progressive_distillation_15.png
old mode 100644
new mode 100755
diff --git a/_images/progressive_distillation_16.png b/_images/progressive_distillation_16.png
old mode 100644
new mode 100755
diff --git a/_images/progressive_distillation_17.png b/_images/progressive_distillation_17.png
old mode 100644
new mode 100755
diff --git a/_images/progressive_distillation_18.png b/_images/progressive_distillation_18.png
old mode 100644
new mode 100755
diff --git a/_images/progressive_distillation_19.png b/_images/progressive_distillation_19.png
old mode 100644
new mode 100755
diff --git a/_images/result_base.png b/_images/result_base.png
old mode 100644
new mode 100755
diff --git a/_images/result_new.png b/_images/result_new.png
old mode 100644
new mode 100755
diff --git a/_images/sdxl_result.png b/_images/sdxl_result.png
old mode 100644
new mode 100755
diff --git a/_images/sea.png b/_images/sea.png
old mode 100644
new mode 100755
diff --git a/_images/seg.png b/_images/seg.png
old mode 100644
new mode 100755
diff --git a/_images/spherical_coord.png b/_images/spherical_coord.png
old mode 100644
new mode 100755
diff --git a/_images/structure.png b/_images/structure.png
old mode 100644
new mode 100755
diff --git a/_images/swjo_exp_01.png b/_images/swjo_exp_01.png
old mode 100644
new mode 100755
diff --git a/_images/swjo_exp_02.png b/_images/swjo_exp_02.png
old mode 100644
new mode 100755
diff --git a/_images/swjo_exp_03.png b/_images/swjo_exp_03.png
old mode 100644
new mode 100755
diff --git a/_images/swjo_exp_04.png b/_images/swjo_exp_04.png
old mode 100644
new mode 100755
diff --git a/_images/swjo_exp_05.png b/_images/swjo_exp_05.png
old mode 100644
new mode 100755
diff --git a/_images/swjo_exp_06.png b/_images/swjo_exp_06.png
old mode 100644
new mode 100755
diff --git a/_images/swjo_exp_07.png b/_images/swjo_exp_07.png
old mode 100644
new mode 100755
diff --git a/_images/swjo_exp_08.png b/_images/swjo_exp_08.png
old mode 100644
new mode 100755
diff --git a/_images/swjo_exp_09.png b/_images/swjo_exp_09.png
old mode 100644
new mode 100755
diff --git a/_images/swjo_exp_10.png b/_images/swjo_exp_10.png
old mode 100644
new mode 100755
diff --git a/_images/swjo_exp_11.png b/_images/swjo_exp_11.png
old mode 100644
new mode 100755
diff --git a/_images/swjo_exp_12.png b/_images/swjo_exp_12.png
old mode 100644
new mode 100755
diff --git a/_images/swjo_exp_13.png b/_images/swjo_exp_13.png
old mode 100644
new mode 100755
diff --git a/_images/t2i_adapter_01.png b/_images/t2i_adapter_01.png
old mode 100644
new mode 100755
diff --git a/_images/t2i_adapter_02.png b/_images/t2i_adapter_02.png
old mode 100644
new mode 100755
diff --git a/_images/t2i_adapter_03.png b/_images/t2i_adapter_03.png
old mode 100644
new mode 100755
diff --git a/_images/t2i_adapter_04.png b/_images/t2i_adapter_04.png
old mode 100644
new mode 100755
diff --git a/_images/t2i_adapter_05.png b/_images/t2i_adapter_05.png
old mode 100644
new mode 100755
diff --git a/_images/t2i_adapter_06.png b/_images/t2i_adapter_06.png
old mode 100644
new mode 100755
diff --git a/_images/t2i_adapter_07.png b/_images/t2i_adapter_07.png
old mode 100644
new mode 100755
diff --git a/_images/t2i_adapter_08.png b/_images/t2i_adapter_08.png
old mode 100644
new mode 100755
diff --git a/_images/t2i_adapter_09.png b/_images/t2i_adapter_09.png
old mode 100644
new mode 100755
diff --git a/_images/t2i_adapter_10.png b/_images/t2i_adapter_10.png
old mode 100644
new mode 100755
diff --git a/_images/t2i_adapter_11.png b/_images/t2i_adapter_11.png
old mode 100644
new mode 100755
diff --git a/_images/t2i_adapter_12.png b/_images/t2i_adapter_12.png
old mode 100644
new mode 100755
diff --git a/_images/t2i_adapter_13.png b/_images/t2i_adapter_13.png
old mode 100644
new mode 100755
diff --git a/_images/t2i_adapter_14.png b/_images/t2i_adapter_14.png
old mode 100644
new mode 100755
diff --git a/_images/t2i_adapter_15.png b/_images/t2i_adapter_15.png
old mode 100644
new mode 100755
diff --git a/_images/t2i_adapter_16.png b/_images/t2i_adapter_16.png
old mode 100644
new mode 100755
diff --git a/_images/t2i_adapter_17.png b/_images/t2i_adapter_17.png
old mode 100644
new mode 100755
diff --git a/_images/t2i_adapter_18.png b/_images/t2i_adapter_18.png
old mode 100644
new mode 100755
diff --git a/_images/table1.png b/_images/table1.png
old mode 100644
new mode 100755
diff --git a/_images/table11.png b/_images/table11.png
old mode 100644
new mode 100755
diff --git a/_images/table12.png b/_images/table12.png
old mode 100644
new mode 100755
diff --git a/_images/table2.png b/_images/table2.png
old mode 100644
new mode 100755
diff --git a/_images/table21.png b/_images/table21.png
old mode 100644
new mode 100755
diff --git a/_images/table3.png b/_images/table3.png
old mode 100644
new mode 100755
diff --git a/_images/table4_5.png b/_images/table4_5.png
old mode 100644
new mode 100755
diff --git a/_images/table_1.png b/_images/table_1.png
old mode 100644
new mode 100755
diff --git a/_images/table_2.png b/_images/table_2.png
old mode 100644
new mode 100755
diff --git a/_images/table_3.png b/_images/table_3.png
old mode 100644
new mode 100755
diff --git a/_images/table_31.png b/_images/table_31.png
old mode 100644
new mode 100755
diff --git a/_images/table_6.png b/_images/table_6.png
old mode 100644
new mode 100755
diff --git a/_images/text_to_image.png b/_images/text_to_image.png
old mode 100644
new mode 100755
diff --git a/_images/title_fig.png b/_images/title_fig.png
old mode 100644
new mode 100755
diff --git a/_images/trade_off.png b/_images/trade_off.png
old mode 100644
new mode 100755
diff --git a/_images/training_pipeline.png b/_images/training_pipeline.png
old mode 100644
new mode 100755
diff --git a/_images/training_result.png b/_images/training_result.png
old mode 100644
new mode 100755
diff --git a/_images/translation_turbo_01.png b/_images/translation_turbo_01.png
old mode 100644
new mode 100755
diff --git a/_images/translation_turbo_02.png b/_images/translation_turbo_02.png
old mode 100644
new mode 100755
diff --git a/_images/translation_turbo_03.png b/_images/translation_turbo_03.png
old mode 100644
new mode 100755
diff --git a/_images/translation_turbo_04.png b/_images/translation_turbo_04.png
old mode 100644
new mode 100755
diff --git a/_images/translation_turbo_05.png b/_images/translation_turbo_05.png
old mode 100644
new mode 100755
diff --git a/_images/translation_turbo_06.png b/_images/translation_turbo_06.png
old mode 100644
new mode 100755
diff --git a/_images/translation_turbo_07.png b/_images/translation_turbo_07.png
old mode 100644
new mode 100755
diff --git a/_images/translation_turbo_08.png b/_images/translation_turbo_08.png
old mode 100644
new mode 100755
diff --git a/_images/translation_turbo_09.png b/_images/translation_turbo_09.png
old mode 100644
new mode 100755
diff --git a/_images/translation_turbo_10.png b/_images/translation_turbo_10.png
old mode 100644
new mode 100755
diff --git a/_images/translation_turbo_11.png b/_images/translation_turbo_11.png
old mode 100644
new mode 100755
diff --git a/_images/translation_turbo_12.png b/_images/translation_turbo_12.png
old mode 100644
new mode 100755
diff --git a/_images/translation_turbo_13.png b/_images/translation_turbo_13.png
old mode 100644
new mode 100755
diff --git a/_images/translation_turbo_14.png b/_images/translation_turbo_14.png
old mode 100644
new mode 100755
diff --git a/_images/translation_turbo_15.png b/_images/translation_turbo_15.png
old mode 100644
new mode 100755
diff --git a/_images/translation_turbo_16.png b/_images/translation_turbo_16.png
old mode 100644
new mode 100755
diff --git a/_images/translation_turbo_17.png b/_images/translation_turbo_17.png
old mode 100644
new mode 100755
diff --git a/_images/translation_turbo_18.png b/_images/translation_turbo_18.png
old mode 100644
new mode 100755
diff --git a/_images/translation_turbo_19.png b/_images/translation_turbo_19.png
old mode 100644
new mode 100755
diff --git a/_images/vae_01.png b/_images/vae_01.png
old mode 100644
new mode 100755
diff --git a/_images/vae_05.png b/_images/vae_05.png
old mode 100644
new mode 100755
diff --git a/_images/vae_07.png b/_images/vae_07.png
old mode 100644
new mode 100755
diff --git a/_images/vae_08.png b/_images/vae_08.png
old mode 100644
new mode 100755
diff --git a/_images/vae_09.png b/_images/vae_09.png
old mode 100644
new mode 100755
diff --git a/_images/vae_10.png b/_images/vae_10.png
old mode 100644
new mode 100755
diff --git a/_images/vae_11.png b/_images/vae_11.png
old mode 100644
new mode 100755
diff --git a/_images/visual_result.png b/_images/visual_result.png
old mode 100644
new mode 100755
diff --git a/_images/wallpaper.png b/_images/wallpaper.png
old mode 100644
new mode 100755
diff --git a/_images/zero123plus_01.png b/_images/zero123plus_01.png
new file mode 100755
index 00000000..e7ee4613
Binary files /dev/null and b/_images/zero123plus_01.png differ
diff --git a/_images/zero123plus_02.png b/_images/zero123plus_02.png
new file mode 100755
index 00000000..b50aa897
Binary files /dev/null and b/_images/zero123plus_02.png differ
diff --git a/_images/zero123plus_03.png b/_images/zero123plus_03.png
new file mode 100755
index 00000000..860b8c67
Binary files /dev/null and b/_images/zero123plus_03.png differ
diff --git a/_images/zero123plus_04.png b/_images/zero123plus_04.png
new file mode 100755
index 00000000..80b7321f
Binary files /dev/null and b/_images/zero123plus_04.png differ
diff --git a/_images/zero123plus_05.png b/_images/zero123plus_05.png
new file mode 100755
index 00000000..cf2a48f5
Binary files /dev/null and b/_images/zero123plus_05.png differ
diff --git a/_images/zero123plus_06.png b/_images/zero123plus_06.png
new file mode 100755
index 00000000..9d7ea46f
Binary files /dev/null and b/_images/zero123plus_06.png differ
diff --git a/_images/zero123plus_07.png b/_images/zero123plus_07.png
new file mode 100755
index 00000000..3758c408
Binary files /dev/null and b/_images/zero123plus_07.png differ
diff --git a/_images/zero123plus_08.png b/_images/zero123plus_08.png
new file mode 100755
index 00000000..667257f7
Binary files /dev/null and b/_images/zero123plus_08.png differ
diff --git a/_images/zero123plus_09.png b/_images/zero123plus_09.png
new file mode 100755
index 00000000..5f402a1c
Binary files /dev/null and b/_images/zero123plus_09.png differ
diff --git a/_images/zero123plus_10.png b/_images/zero123plus_10.png
new file mode 100755
index 00000000..e5afbcff
Binary files /dev/null and b/_images/zero123plus_10.png differ
diff --git a/_images/zero123plus_11.png b/_images/zero123plus_11.png
new file mode 100755
index 00000000..894c220f
Binary files /dev/null and b/_images/zero123plus_11.png differ
diff --git a/_images/zero123plus_12.png b/_images/zero123plus_12.png
new file mode 100755
index 00000000..1839d5be
Binary files /dev/null and b/_images/zero123plus_12.png differ
diff --git a/_panels_static/panels-main.c949a650a448cc0ae9fd3441c0e17fb0.css b/_panels_static/panels-main.c949a650a448cc0ae9fd3441c0e17fb0.css
old mode 100644
new mode 100755
diff --git a/_panels_static/panels-variables.06eb56fa6e07937060861dad626602ad.css b/_panels_static/panels-variables.06eb56fa6e07937060861dad626602ad.css
old mode 100644
new mode 100755
diff --git a/_sources/docs/experiments/js_exp.md b/_sources/docs/experiments/js_exp.md
old mode 100644
new mode 100755
diff --git a/_sources/docs/experiments/swjo_exp.md b/_sources/docs/experiments/swjo_exp.md
old mode 100644
new mode 100755
diff --git a/_sources/docs/markdown-example.md b/_sources/docs/markdown-example.md
old mode 100644
new mode 100755
diff --git a/_sources/docs/notebook-example.ipynb b/_sources/docs/notebook-example.ipynb
old mode 100644
new mode 100755
diff --git a/_sources/docs/review/A_Study_on_the_Evaluation_of_Generative_Models.md b/_sources/docs/review/A_Study_on_the_Evaluation_of_Generative_Models.md
old mode 100644
new mode 100755
diff --git a/_sources/docs/review/AnimateDiff.md b/_sources/docs/review/AnimateDiff.md
old mode 100644
new mode 100755
diff --git a/_sources/docs/review/Animate_Anyone.md b/_sources/docs/review/Animate_Anyone.md
old mode 100644
new mode 100755
diff --git a/_sources/docs/review/BBDM.md b/_sources/docs/review/BBDM.md
old mode 100644
new mode 100755
diff --git a/_sources/docs/review/CM3leon.md b/_sources/docs/review/CM3leon.md
old mode 100644
new mode 100755
diff --git a/_sources/docs/review/Coin3D.md b/_sources/docs/review/Coin3D.md
new file mode 100755
index 00000000..505f44e3
--- /dev/null
+++ b/_sources/docs/review/Coin3D.md
@@ -0,0 +1,273 @@
+``` {admonition} Information
+- **Title:** Controllable and Interactive 3D Assets Generation with Proxy-Guided Conditioning (SIGGRAPH 2024)
+
+- **Reference**
+    - Paper: [https://arxiv.org/pdf/2405.08054](https://arxiv.org/pdf/2405.08054)
+    - Code: [https://github.com/zju3dv/Coin3D](https://github.com/zju3dv/Coin3D)
+    - Project Page : [https://zju3dv.github.io/coin3d/](https://zju3dv.github.io/coin3d/)
+
+- **Author:** Donggeun Sean Ko
+
+- **Last updated on January. 07, 2025**
+```   
+
+# Coin3D
+
+## 1. Introduction
+
+:::{figure-md} 
+<img src="../../pics/Coin3D/coin3d_01.png" alt="main_fig" class="bg-primary mb-1" width="700px">
+
+Overview of Coin3D
+:::
+
+“사용자 친화적인” & “제어 가능”한 3D assets 생성 프레임워크는 3가지 특성을 가져야 한다고 주장함
+- **3D Controllable**: Basic shape를 이용해서 간단하고 쉽게 원하는 형태를 만들 수 있어야 됨
+- **Flexible**: Interactive하게 (UI 등)을 활용하여 다양한 결과물을 만들 수 있어야 됨 (간단)
+- **Responsive**: 중간 결과물 및 빠른 결과물을 만들 수 있게 해야 됨 (fast inference time)
+
+
+## 2. Related Works
+
+**3D Object Generation**
+- Polygon-mesh based
+- Point Cloud
+- Voxels, Implicit Fields
+- CAD-based
+- Multiview Image generation (zero123++,Wonder3D,etc)
+
+**Controllable and Interactive Generation**
+- Latent-NeRF
+- Fantasia3D 
+
+**이전 방법들의 문제점**
+- "다중 얼굴 야누스 문제" (하나의 객체가 다양한 각도에서 일관성 있는 모습을 유지하지 못하는 문제)
+- 텍스트 프롬프트(텍스트-3D)나 이미지(이미지-3D)에만 집중함
+- 3D 형태를 정확하게 제어할 수 없음
+
+## Overall
+- Coin3D Input Condition (전처리)
+- 3D-Aware Conditioned Generation
+- Preview and Reconstruction
+
+:::{figure-md} 
+<img src="../../pics/Coin3D/coin3d_02.png" alt="overall_coin3d_arch" class="bg-primary mb-1" width="700px">
+
+Overview of Coin3D Main Architecture
+:::
+
+## 3. Method
+
+## 3.1: Proxy-Guided 3D Conditiong for Diffusion
+:::{figure-md} 
+<img src="../../pics/Coin3D/coin3d_03.png" alt="3d_proxy" class="bg-primary mb-1" width="700px">
+
+Proxy-based Initial Condition Generation 
+:::
+
+**3D Proxy as an initial condition (Preprocessing)**
+- Coarse shape $P$와 prompt $y$로 $N_v$를 다양한 camera pose에 대하여 예측
+
+$$
+\mathbf{X}_{(i:N_v)} = f(P, y, \mathbf{c}_{(i:N_v)})
+$$
+
+Where:
+- $N_v$: consistent image  
+- $P$: coarse shape  
+- $f$: Multiview diffusion-based generator  
+- $y$: prompt  
+- $\mathbf{c}$: camera poses
+
+
+## 3.2 3D Aware Conditioned Generation
+:::{figure-md} 
+<img src="../../pics/Coin3D/coin3d_04.png" alt="3d_aware" class="bg-primary mb-1" width="700px">
+
+3D-Aware Conditioned Generation
+:::
+
+1. 3D Proxy Sample을 Voxelize를 통해 Voxel Grid $F_v$ 생성
+2. **Multiview Image 생성**
+   - 2-1. Image Candidates를 **Clip** + (Rotation, Translation)로 **Denoising U-Net** condition input으로 입력
+   - 2-2. Volume Projected Condition도 입력 (학습)
+3. MV Images → Project Fusion을 통해  
+   multiview feature volume, $F_l^t$ 생성
+4. 3D Convolution ($f_{VP}$)을 통해 intermediate feature를 MVConv  
+   (3DConv intermediate layer)에 계층적으로 추가
+5. $F_c^t = f_{VM(1 \dots N)}(F_l^t) + f_{VM(1 \dots N)}(f_{VP}(F_v)_{1 \dots N})$
+6. 3D control volume 완성!
+7. 3D Control Volume을 다시 $f_u$에 넣어 MV image 생성
+
+
+## 3.2.1 Training Pipeline of 3D Aware Conditioned Generation
+
+1. **(Preprocess)** 각 학습 데이터를 MV image와 균일하게 샘플링된 coarse proxies로 변환
+2. **(Training)** $B$개의 condition 및 target image를 무작위로 sampling하고, 대응하는 coarse proxy points를 샘플링함
+3. **(Training)** $B$ timestamp with Gaussian Noise도 샘플링  
+   $\epsilon_{(1:B)} \sim \mathcal{N}(0, 1)$
+4. 아래의 loss를 이용하여 추가된 noise를 network $\epsilon_\theta$를 통해 예측
+
+$$
+\epsilon_\theta : \text{model’s predicted noise}  
+c(I, F_c^t, c_i) : \text{conditioned embedding}  
+c_i : \text{camera view}  
+F_c^t : \text{3D Control volume}  
+I : \text{Candidate Image}
+$$
+
+### Loss Equation
+$$
+\min_{\theta} \mathbb{E}_{t, \mathbf{x}_{(1:N_v)}, \epsilon_{(1:N_v)}} 
+\| \epsilon_i - \epsilon_\theta (\mathbf{x}_i^t, t, c(I, F_c^t, \mathbf{c}_i)) \|, \tag{2}
+$$
+
+## 3.3. Interactive Generation Workflow
+
+## 3.3.1. Proxy-bounded part editing
+
+:::{figure-md} 
+<img src="../../pics/Coin3D/coin3d_05.png" alt="proxy-bounded-editing" class="bg-primary mb-1" width="700px">
+
+Proxy-bounded Part Editing
+:::
+
+- MV diffusion은 3D volume & 2D image에 conditionin이 되어 있기 때문에,  
+  이런 condition을 고려해서 편집을 해야하는 게 간단하지 않음.
+  
+- 따라서, two-pathway condition editing scheme을 구성함:
+  - Projected 2D Mask → 2D Latent Diffusion Model
+  - 3D Volume Mask → Partial Update Volume
+  - 2D Image condition + 3D masked volume condition으로 "3D image editing"을 진행
+
+$$
+\hat{F}_C^t = (1 - M) F_C^t + M \tilde{F}_C^t, \tag{3}
+$$
+
+Where:
+- $\hat{F}_C^t$: updated volume  
+- $\tilde{F}_C^t$: predicted volume at $t$  
+- $F_C^t$: cached original volume  
+
+## 3.3.2. Interactive Preview with Progressive Volume Caching
+
+:::{figure-md} 
+<img src="../../pics/Coin3D/coin3d_06.png" alt="progressive_volume" class="bg-primary mb-1" width="700px">
+
+Interactive Preview with Progressive Volume Caching
+:::
+
+- 목표: Interactive preview를 통해 수정된 결과를 몇 초 내에 확인하고  
+  임의의 시점에서 효과를 검사 및 수정이 가능
+
+- Progressive Volume Caching
+  - 각 timestamp $t$에서 최신 3D Control Volume, $F_C^t$를 캐싱함
+  - 이를 $F_C^t$를 반복적으로 계산할 필요가 없음
+
+- Viewpoint Panning
+  - Preview 단계에서 user’s viewpoint poses $c'$를 MV diffusion viewpoint condition에 전달
+  - 이를 통해 원하는 시점 (arbitrary viewpoints)에서 프리뷰 이미지를 렌더링할 수 있음
+
+- 핵심
+  - Cache를 이용하여 불필요한 연산을 제거하고 Cache에서 저장된 3D adapter rendering output을 이용하여 여러 preview를 생성
+
+
+## 3.4 Volume-Conditioned Reconstruction
+
+## 3.4.1. Preview & Reconstruction 
+
+:::{figure-md} 
+<img src="../../pics/Coin3D/coin3d_07.png" alt="preview_recon" class="bg-primary mb-1" width="700px">
+
+Preview and Reconstruction 
+:::
+
+- 기존 Multiview images를 활용한 3D Reconstruction은  
+  viewpoint가 적어 unexpected geometry가 만들어져 결과물이 뭉개지거나 한계점이 보임
+
+- 3D-aware context from 3D control volume을 활용해 3D Reconstruction quality를 올림
+  - 개인적인 의견: 더 정교한 3D 물체 + Multiview가 있으니 더 정교한 결과물이 만들어진다? 라고 보여짐...
+
+- **Propose Volume SDS**  
+  - integrating 3D control prior from voxelized feature $F_C^t$ to the field’s backpropagation
+
+$$
+\Delta_x L_{V-SDS} = w(t) \left( \epsilon_\theta \left( \mathbf{x}_t, t, c(I, F_C^t, \mathbf{c}) \right) - \epsilon \right),
+$$
+
+where $w(t)$ is the weighting function from **DreamFusion**.
+
+## 4. Results
+
+:::{figure-md} 
+<img src="../../pics/Coin3D/coin3d_08.png" alt="results" class="bg-primary mb-1" width="700px">
+
+Qualitative Results
+:::
+
+### 프록시 기반 생성 방법 비교
+
+- **Wonder3D와 SyncDreamer 디테일 비교**:
+  1. **더 높은 품질의 멀티뷰 이미지**:
+     - Coin3D 결과물은 **다양한 시점에서 일관성 있는 이미지**를 생성하며, 왜곡이나 아티팩트가 최소화됨.
+     - 반면, Wonder3D와 SyncDreamer는 복잡한 객체(예: 거북이와 오리)에서 **기하학적 불일치** 또는 텍스처 불일치를 보임.
+
+  2. **더 나은 텍스처 메쉬**:
+     - Ours는 **더욱 현실적이고 세밀한 텍스처**를 재구성하며, 부드러운 전환과 정밀한 정렬을 유지.
+     - Wonder3D는 텍스처 불일치가 나타나고, SyncDreamer는 단순화된 텍스처를 생성하는 경향이 있음.
+
+  3. **객체 형태의 보존**:
+     - Ours는 입력된 **coarse shape**를 정확히 보존하면서 세부 정보를 강화함.
+     - 다른 방법론은 재구성 중 형태 왜곡(예: 의자가 휘거나 일그러짐)이 나타남.
+
+  4. **더 자연스러운 출력**:
+     - Ours의 출력은 **미적 품질이 높고 자연스러운 결과물**을 제공하며, 복잡한 텍스처(예: 도넛)에서도 특히 돋보임.
+     - Wonder3D와 SyncDreamer는 인공적이거나 세부 사항이 부족한 경우가 많음.
+
+### 주요 관찰점
+- **Coarse Shapes**: 모든 방법이 비슷한 코스 쉐이프에서 시작하지만, Ours는 이를 가장 잘 개선함.
+- **멀티뷰 이미지**: Ours는 명확하고 일관된 멀티뷰 이미지를 생성하여 정확한 3D 재구성을 가능하게 함.
+- **텍스처 메쉬**: Ours는 현실적인 텍스처를 생성하며, 경쟁 방법론보다 높은 수준의 사실성을 보여줌.
+
+### 요약:
+- **(a) Ours**: 멀티뷰 이미지의 일관성과 텍스처 메쉬 품질에서 우수함.
+- **(b) Wonder3D**: 기하학적 일관성과 텍스처 세부 정보에서 어려움.
+- **(c) SyncDreamer**: 텍스처가 단순화되고 형태가 불일치함.
+
+## 4.2 Quantitative Results
+:::{figure-md} 
+<img src="../../pics/Coin3D/coin3d_09.png" alt="quant_results" class="bg-primary mb-1" width="700px">
+
+Quantitative Results
+:::
+
+- **TEXTure** (Richardson et al., 2023) user study guideline 채택
+- 30명 사용자에게 35개의 test case를 무작위 순서로 제시한 후  
+  perceptual quality & content matching degree (w.r.t the given image or text prompts)를 기준으로 정렬 및 점수 배정
+- 3점 Best 기준으로 함.
+
+
+## 5. Ablation Study
+
+### Volume SDS
+
+:::{figure-md} 
+<img src="../../pics/Coin3D/coin3d_10.png" alt="vol_sds" class="bg-primary mb-1" width="700px">
+
+Ablation Study on Volume SDS
+:::
+- Volume SDS Loss를 추가시 렌더링에 artifacts가 없으며 더 스무스하고 자연스러운 텍스쳐를 바탕으로 생성함
+
+### Proxy Condition & 3D Mask Dilation
+:::{figure-md} 
+<img src="../../pics/Coin3D/coin3d_11.png" alt="proxy_cond" class="bg-primary mb-1" width="700px">
+
+Ablation Study on Proxy Condition and 3D Mask Dilation
+:::
+- Proxy 하고 Dilation이 없을 시 rendering이 고르게 안되는 현상이 생김. 
+- Full method는 proxy와 dilation을 둘다 사용했으며
+
+## 6. Conclusion
+- Basic block만 있으면 원하는 3D 생성을 할 수 있음
+- Flexible 하고 UI-friendly 함 (ComfyUI 등) 
+- 타 모델들에 비해 3D 결과물이 더 좋음
\ No newline at end of file
diff --git a/_sources/docs/review/ConceptLab.md b/_sources/docs/review/ConceptLab.md
old mode 100644
new mode 100755
diff --git a/_sources/docs/review/ControlNet.md b/_sources/docs/review/ControlNet.md
old mode 100644
new mode 100755
diff --git a/_sources/docs/review/CustomDiffusion.md b/_sources/docs/review/CustomDiffusion.md
old mode 100644
new mode 100755
diff --git a/_sources/docs/review/DALLE2.md b/_sources/docs/review/DALLE2.md
old mode 100644
new mode 100755
diff --git a/_sources/docs/review/DDIM.md b/_sources/docs/review/DDIM.md
old mode 100644
new mode 100755
diff --git a/_sources/docs/review/DDPM.md b/_sources/docs/review/DDPM.md
old mode 100644
new mode 100755
diff --git a/_sources/docs/review/DiT.md b/_sources/docs/review/DiT.md
old mode 100644
new mode 100755
diff --git a/_sources/docs/review/Diffusion_models_already_have_a_Semantic_Latent_Space.md b/_sources/docs/review/Diffusion_models_already_have_a_Semantic_Latent_Space.md
old mode 100644
new mode 100755
diff --git a/_sources/docs/review/DreaMoving.md b/_sources/docs/review/DreaMoving.md
old mode 100644
new mode 100755
diff --git a/_sources/docs/review/DreamBooth3D.md b/_sources/docs/review/DreamBooth3D.md
old mode 100644
new mode 100755
diff --git a/_sources/docs/review/DreamFusion.md b/_sources/docs/review/DreamFusion.md
old mode 100644
new mode 100755
diff --git a/_sources/docs/review/DreamGaussian.md b/_sources/docs/review/DreamGaussian.md
new file mode 100755
index 00000000..b00a1a87
--- /dev/null
+++ b/_sources/docs/review/DreamGaussian.md
@@ -0,0 +1,623 @@
+``` {admonition} Information
+- **Title:** DreamGaussian: Generative Gaussian Splatting for Efficient 3D Content Creation (ICLR 2024)
+
+- **Reference**
+    - Paper: [https://arxiv.org/abs/2309.16653](https://arxiv.org/abs/2309.16653)
+    - Code: [https://github.com/dreamgaussian/dreamgaussian/tree/main](https://github.com/dreamgaussian/dreamgaussian/tree/main)
+    - Project Page: [https://dreamgaussian.github.io/](https://dreamgaussian.github.io/)
+    
+- **Author:** Kyeongmin Yu
+
+- **Last updated on Dec. 26, 2024**
+```
+
+# DreamGaussian
+
+
+# 1. Abstract
+
+<aside>
+
+- A novel 3D content generation framework that achieves both efficiency and quality simultaneously.
+- 3D gaussian을 3D generation task에 적용하고 3D gaussians 로 부터 textured mesh 를 추출하는 효율적인 알고리즘 제안.
+- DreamGaussian produces high-quality textured meshes in just 2 min from a single-view image, achieving approximately 10 times acceleration compared to existing methods.
+</aside>
+
+# 2. Related Work
+
+## 2.1 3D representations
+
+<details>
+<summary> <strong> Neural Radiance Fields (NeRF) </strong></summary>
+
+<table>
+<tr>
+<th align="center">
+<strong>original NeRF</strong>
+</th>
+<th align="center">
+NeRF와 <strong>Mip-NeRF</strong>의 차이점
+</th>
+<th align="center">
+<strong>Mip-NeRF 360</strong>의 contract($\cdot$) function
+</th>
+</tr>
+<tr>
+<td>
+<img src="/book/pics/DreamGaussian/image_1.png" alt="original nerf" width="400px">
+</td>
+<td>
+<img src="../../pics/DreamGaussian/image_1.png" alt="mip-nerf" width="400px">
+</td>
+<td>
+<img src="../../pics/DreamGaussian/image_2.png" alt="mip-nerf" width="400px">
+</td>
+</tr>
+<tr>
+<td>
+신경망(MLP)을 이용해 3d 물체에 대한 정보를 표현했다.
+신경망은 카메라의 위치와 방향을 입력으로 해당 지점의 density와 color를 리턴한다. 온전한 하나의 이미지를 렌더링 하기 위해서는 모든 camera ray를 따라 (이론상)적분이 필요하다.
+</td>
+<td >
+ray tracing이 아닌 cone tracing 방식으로, 렌더링 된 이미지의 품질을 향상시킴.
+</td>
+<td >
+파란 영역은 euclidean space, 노란 영역은 맵핑된 영역이다.
+이외에도 기존 방식(coarse-to-fine)과 다르게 n개의 신경망을 사용했으며 초기 신경망을 최종 결과물 출력시 사용하지 않았다.
+</td>
+</tr>
+</table>
+
+<table>
+<tr>
+<th align="center">
+<strong>Instant NGP</strong>
+</th>
+<th align="center">
+<strong>Block-NeRF</strong>
+</th>
+<th align="center">
+<strong>NeRF in the wild</strong>
+</th>
+</tr>
+<tr>
+<td>
+<img src="../../pics/DreamGaussian/image_3.png" alt="instant-ngp" width="400px">
+</td>
+<td>
+<img src="../../pics/DreamGaussian/image_4.png" alt="block-nerf" width="400px">
+</td>
+<td>
+<img src="../../pics/DreamGaussian/image_5.png" alt="nerfinthewild" width="400px">
+</td>
+</tr>
+<tr>
+<td>
+voxel기반의 multiresolution hash encoding을 통해 기존 sin/cos을 이용한 positional encoding 방식을 발전시킴. 이러한 encoding 방식을 채택함으로써 encoding 자체의 속도도 빨라졌으며 다중 스케일 정보를 담은 encoding을 이용함으로써 original NeRF의 신경망보다 오히려 작은 구조를 채택할 수 있었다. 또한 cuda를 활용하여 처리속도를 향상시킴으로써 real-time에 가까운 속도를 낼 수 있었다.
+</td>
+<td >
+대규모 장면을 모델링하기 위해 여러 NeRF를 합쳐서 하나의 큰 장면을 구성하는 방법
+</td>
+<td >
+in the wild dataset에서 scene 재구성을 위해 scene을 
+"static 요소"와 "transient 요소"로 분리하여 모델링
+</td>
+</tr>
+</table>
+
+</details>
+
+<details>
+<summary> <strong>3D Gaussian Splatting (3DGS)</strong></summary>
+
+<table>
+<tr>
+<th align="center">
+<strong>point cloud 예시</strong>
+</th>
+<th align="center">
+<strong>Gaussian densification 과정</strong>
+</th>
+</tr>
+<tr>
+<td>
+<img src="../../pics/DreamGaussian/image_6.png" alt="point cloud" class="mb-1" width="400px">
+</td>
+<td>
+<img src="../../pics/DreamGaussian/image_7.png" alt="gaussian densification" class="mb-1" width="400px">
+</td>
+</tr>
+</table>
+
+
+
+:::{figure-md} 
+<img src="../../pics/DreamGaussian/image_8.png" alt="GS optimization" class="mb-1" width="700px">
+
+Gaussian Splatting optimization
+colmap과 같은 SfM 을 이용해 3D keypoint를 찾아 해당 지점으로 3D gaussian을 초기화 한다. 이후 gaussian들을 합치거나 쪼개며 최적화를 진행한다. 렌더링 시에는 3D gaussian들을 2D projection하여 하나의 이미지를 만든다.
+:::
+
+
+
+---
+
+- **3D Gaussian representation [[참고1](https://towardsdatascience.com/a-comprehensive-overview-of-gaussian-splatting-e7d570081362#4cd8)][[참고2](https://patapom.com/blog/SHPortal/)]**
+    
+    각각의 3D Gaussians은 3D position(mean) $\mu$, opacity $\alpha$, anisotropic covariance $\Sigma$로 위치와 크기, 방향을 표현하며, 여기에 spherical harmonic (SH) coefficients를 추가해 view-dependent appearance(color)를 표현했다.
+    
+    - covariance matrix $\Sigma$ 를 rotation matrix $\mathbf R$ 와 scaling matrix $\mathbf S$로 분리가능.
+        - rotation matrix는 (real $r$+ imaginery $i,j,k$) 4차원의 quaternion으로  표현가능.
+        - scaling matrix는 ($x,y,z$) 3차원의 scale로 표현가능.
+    - SH 계수의 경우 일반적으로 각 색상 채널(R, G, B)당 9개의 계수, 총 27개의 계수를 사용합니다.
+        - 9개의 계수를 사용한다는 말은 $l\in[0,1,2]$ 라는 뜻이며, $m \in [-l,+l]$ 이다.
+        - SH를 이용하면 빛이 균일하게 퍼지는 표면(매트한 표면)외에도 입사각(보는 방향)에 따라 달라지는 Non-Lambertian effects도 표현할 수 있다.
+    
+    **Spherical Harmonics** $Y_l^m(\theta,\phi)$ **를 이용한 lighting** [[paper](https://3dvar.com/Green2003Spherical.pdf)] 
+    
+    Spherical Harmonics는 번역하면 구면조화함수로 구의 표면에서 정의되는 함수를 말한다. 구면좌표계 $(r,\theta,\phi)$ 에서 $r$ 을 고정하고 $\theta, \phi$에 따라 값을 출력하는 함수이다. 
+    
+    :::{figure-md} 
+    <img src="../../pics/DreamGaussian/image_9.png" alt="image9" class="mb-1" width="500px">
+
+    수학적으로는 라플라스 미분방정식의 해
+    :::
+    
+    :::{figure-md} 
+    <img src="../../pics/DreamGaussian/image_10.png" alt="image10" class="mb-1" width="500px">
+
+    Spherical Harmonics 시각화 [[eq](https://en.wikipedia.org/wiki/Table_of_spherical_harmonics)]
+    :::
+    
+    :::{figure-md} 
+    <img src="../../pics/DreamGaussian/image_11.png" alt="image11" class="mb-1" width="500px">
+
+    Spherical Harmonics 시각화
+    :::
+    
+    <img src="../../pics/DreamGaussian/Rotating_spherical_harmonics.gif" width="40" height="40"/>
+    
+    - cf. fourier series
+        
+        :::{figure-md} 
+        <img src="../../pics/DreamGaussian/image_12.png" alt="image12" class="mb-1" width="500px">
+
+        삼각함수들을 이용해 임의의 주기함수를 근사하는 푸리에 변환의 3D 확장판
+        :::
+        
+        :::{figure-md} 
+        <img src="../../pics/DreamGaussian/image_13.png" alt="image13" class="mb-1" width="500px">
+
+        SH를 이용한 근사 예시. SH는 구 표면에서의 분포의 basis에 해당.
+        :::
+        
+    
+    **Lambertian vs. Non-lambertian**
+    
+    :::{figure-md} 
+    <img src="../../pics/DreamGaussian/image_14.png" alt="image14" class="mb-1" width="500px">
+
+    Diffuse reflection (~ Lambertian effects)
+    :::
+    
+    Lambertian reflection은 어떤 각도에서 보든 같은 양 빛을 관찰 할 수 있는 이상적인 상태
+    
+    :::{figure-md} 
+    <img src="../../pics/DreamGaussian/image_15.png" alt="image15" class="mb-1" width="500px">
+
+    Non-lambertian effects -> 즉, 반사광 표현가능
+    :::
+
+---
+
+- **Pruning and Densification**
+    
+    :::{figure-md} 
+    <img src="../../pics/DreamGaussian/image_16.png" alt="image16" class="mb-1" width="500px">
+
+    3D Gaussian Splatting 
+    initialization, optimization, adaptive control of gaussians
+    :::
+    
+
+---
+    
+- **Volumetric Rendering**
+    
+    
+    :::{figure-md} 
+    <img src="../../pics/DreamGaussian/image_17.png" alt="image17" class="mb-1" width="500px">
+
+    3D Gaussian Splatting Volumetric Rendering
+    :::
+    
+    :::{figure-md} 
+    <img src="../../pics/DreamGaussian/image_18.png" alt="image18" class="mb-1" width="500px">
+
+    from nerfstudio
+    :::
+    
+    
+    
+    - frustrum culling을 통해 보이지 않는 3D gaussian들을 제외하고 2D로 projection.
+    - 3D gaussian들을 sorting하고 각 픽셀의 ray에 겹치는 3D gaussian들만 color, opacity 값을 반영한다.
+    
+    :::{figure-md} 
+    <img src="../../pics/DreamGaussian/image_19.png" alt="image19" class="mb-1" width="500px">
+
+    original NeRF vs 3d Gaussian Splatting rendering
+    :::
+    
+---
+</details> 
+
+## 2.2 Text-to-3D Generation
+
+- data-driven 3D diffusion models
+- 3D native diffusion models
+- lift 2D image models for 3D generation
+
+## 2.3 Image-to-3D Generation
+
+- generating 3D assets form a reference image(~ single-view 3D reconstruction)
+- text-to-3D methods can also be image-to-3D methods
+- Zero-1-to-3
+- One-2-3-45
+
+### 2.4 추가 참고자료
+
+<details>
+<summary> <strong>DreamFusion</strong></summary>
+
+- Score Distillation Sampling (SDS) [[arXiv](https://arxiv.org/abs/2209.14988)]
+    - pretrained 2D diffusion model을 parametric image generator로서 사용하는 방식을 제안함. image generator로서 NeRF를 사용하여 differentiable 3D representation을 가능하도록 함.
+
+    $$
+    \mathbf x=g_\Theta(p)
+    $$
+    - $\mathbf x$ 는 카메라 포즈 $p$에서 렌더링된 이미지를 의미한다. $g_\Theta(\cdot)$는 differentiable rendering function으로 NeRF parameters $\Theta$를 paramter로 갖는다.
+
+    $$
+    \triangledown_\Theta\mathcal L_\text{SDS}=\Bbb E_{t,p,\epsilon}\Big[w(t)(\epsilon_\phi(\mathbf x;t,e)-\epsilon)\frac{\partial\mathbf x}{\partial\Theta} \Big]
+    $$
+    - $w(t)=\sigma_t^2$ 는 DDPM의 weighting function, $\epsilon_\phi(\cdot)$은 pretrained parameter $\phi$로  noise를 예측하는 함수이다.
+- 즉, SDS formulation은 사전학습된 2D 이미지 생성 디퓨전 모델을 이용해 NeRF parameter $\Theta$를 최적화하는 식이다. 이를 통해 text description에 맞는 NeRF 3D shape을 최적화 할 수 있는 것이다.
+</details>
+
+<details>
+<summary> <strong>Marching Cube</strong></summary>
+- 3d 모델에서 표면(mesh)을 추출하기 위한 알고리즘. 여기서 표면은 밀도가 특정 값을 넘는 지점을 의미한다. 
+
+- 주어진 3D 공간을 작은 <strong>큐브</strong>(cube/voxel)로 나누고, 각 큐브의 <strong>8개 코너</strong>에서 값(일반적으로 밀도값)을 보고 그 값을 바탕으로 표면을 추출한다. 
+
+- 8개의 코너의 밀도값에 따라 어떤 표면을 가지게 되는지는 미리 정해둔 정보($2^8$)를 이용한다. 
+
+1. 3D 공간을 작은 큐브로 분할
+2. 임계값을 기준으로 판단
+    - 해당 지점이 물체 내부에 속하는지 외부에 속하는지 판단함
+3. 표면 생성
+    - 각 큐브의 꼭짓점 값에 따라 표면이 어떻게 생길지에 대한 규칙을 미리 정의해두고, 
+    이를 바탕으로 표면을 추출
+        
+    :::{figure-md} 
+    <img src="../../pics/DreamGaussian/image_21.png" alt="marching cube" class="mb-1" width="500px">
+
+    from wikipedia
+    :::        
+    - 오렌지 점들은 물체의 표면 혹은 가장자리에 위치한 점들을 의미함.
+</details>        
+
+
+<details>
+<summary> <strong>NVDiffrast</strong></summary>
+
+nvidia 2020 ACMTOG [[arXiv](https://arxiv.org/abs/2011.03277)][[github](https://github.com/NVlabs/nvdiffrast?tab=readme-ov-file)]
+
+- 미분가능한 렌더링 방법론으로 cuda를 이용해 가속화한 것이 특징.
+    - 주로 삼각형 메시를 효율적으로 렌더링하고 그래디언트를 계산하기 위해 사용됨.
+- FLAME, 3DMM, SMPL등은 미분가능한 3D 모델으로 렌더링 방법론은 아님.
+    - 3DMM (3D Morphable Models) 1999
+        
+    :::{figure-md} 
+    <img src="../../pics/DreamGaussian/image_22.png" alt="3DMM" class="mb-1" width="500px">
+
+    3DMM
+    :::
+        
+    - SMPL (Skinned Multi-Person Linear Model) 2015
+        
+    :::{figure-md} 
+    <img src="../../pics/DreamGaussian/image_23.png" alt="SMPL" class="mb-1" width="500px">
+
+    SMPL
+    :::
+        
+    - FLAME (Faces Learned with an Articulated Model and Expressions) 2017
+        
+    :::{figure-md} 
+    <img src="../../pics/DreamGaussian/image_24.png" alt="FLAME" class="mb-1" width="500px">
+
+    FLAME
+    :::
+</details>       
+
+
+<details>
+<summary> <strong>Zero-1-to-3</strong></summary>
+
+[[project page](https://zero123.cs.columbia.edu/)] [[DDPM (NeurIPS 2020)](https://www.notion.so/DDPM-NeurIPS-2020-05eb365e0ece43c0bc55ef21a8d4c6f0?pvs=21)]
+
+- **Zero-1-to-3** control the camera perspective in large-scale diffusion models, 
+enabling zero-shot novel view synthesis and 3D reconstruction from a single image.
+
+- RGB image $x\in\Bbb R^{H\times W\times 3}$ , relative camera rotation $R\in \Bbb R^{3\times 3}$,relative camera translation $T\in\Bbb R^3$
+
+:::{figure-md} 
+<img src="../../pics/DreamGaussian/image_24.png" alt="zero 1-to-3" class="mb-1" width="400px">
+
+zero 1-to-3
+:::
+
+:::{figure-md} 
+<img src="../../pics/DreamGaussian/image_25.png" alt="zero 1-to-3" class="mb-1" width="400px">
+
+zero 1-to-3
+:::
+</details>
+
+
+# 3. Method
+
+:::{figure-md} 
+<img src="../../pics/DreamGaussian/image_26.png" alt="method overview" class="mb-1" width="500px">
+
+Method Overview
+:::
+
+- 2-stage framework for 3D content generation 
+for both Image-to-3D and Text-to-3D tasks.
+    - SDS를 이용해 초기화 한 3D gaussian splatting을 이용하여 3D generation
+    - 3D gaussians에서 textured mesh 추출
+    - UV-space refinement를 통해 texture fine-tuning
+    
+
+## 3.1 Generative Gaussian Splatting
+
+개별 3D gaussian의 위치와 형태는 center($\mathbf x$), scaling factor($\mathbf x$), rotation quaternion($\mathbf q$)으로 표현되며, opacity value($\alpha$), color feature($\mathbf c$)를 저장하여 volumetric rendering시 사용한다.
+
+$\Theta_i=\{\mathbf x_i, \mathbf s_i,\mathbf q_i, \alpha_i, \mathbf c_i\}$,  $\mathbf x \in \Bbb R^3, \mathbf s\in \Bbb R^3, \mathbf q \in \Bbb R^4, \alpha\in \Bbb R, \mathbf c \in \Bbb R^3$ 
+original gaussian splatting에서는 spherical harmonics 계수를 이용하여 색을 표현하지만 
+simple diffuse color를 모델링 하기 위해 간략화 → 재질 표현이 어려울 수 있다.
+
+3D Gaussians은 random position, unit scaling, no rotation으로 initialization 한 후, SDS를 이용해 최적화 한다.
+
+### Image-to-3D
+
+- 사전학습된 Zero-1-to-3 XL을 사용했으며 image $\tilde I^r_\text{RGB}$ 와 foreground mask $\tilde I^r_A$ 를 입력으로 사용한다.
+    
+    $$
+    \triangledown_\Theta\mathcal L_\text{SDS}=\Bbb E_{t,p,\epsilon} \Big[w(t)(\epsilon_\phi(I^p_\text{RGB};t,\tilde I^r_\text{RGB},\triangle p)-\epsilon)\frac{\partial I^p_\text{RGB}}{\partial\Theta} \Big] \tag 1
+    $$
+    
+    $w(t)$는 weighting function이고, $\epsilon_\phi(\cdot)$ 는 사전학습된 $\phi$를 이용해 예측된 noise를 뜻한다. 
+    $\triangle p$ 는 relative camera pose, $r$은 reference camera이다.
+    
+- 추가적으로 reference view와 transparency 를 input에 align했다.
+    
+    $$
+    \mathcal L_\text{Ref}=\lambda_\text{RGB}\|I^r_\text{RGB}-\tilde I_\text{RGB}^r \|^2_2 + \lambda_A\|I^r_A-\tilde I^r_A\|^2_2 \tag 2
+    $$
+    
+
+### Text-to-3D
+
+Stable diffusion을 활용하여 text-to-3D task를 수행했다. 
+
+$$
+\triangledown_\Theta\mathcal L_\text{SDS}=\Bbb E_{t,p,\epsilon}\Big [ w(t)(\epsilon_\phi(I^p_\text{RGB};t,e)-\epsilon)\frac{\partial I^p_\text{RGB}}{\partial \Theta} \Big] \tag 3
+$$
+
+$e$는 주어진 text prompt의 CLIP embedding을 의미한다. 
+
+### Discussion
+
+하지만 논문의 저자들은 SDS loss의 ambiguity 때문에 길게 학습하더라도 생성된 3D gaussians이  blurry하고 디테일이 부족하다고 한다. 이를 개선하기 위해 다음 단계인 mesh extraction과 texture refinement를 수행한다.
+
+## 3.2 Efficient Mesh Extraction
+
+block-wise local density query와 back-projected color를 이용해 textured mesh를 추출하는 효과적인 알고리즘을 제안한다.
+
+### Local Density Query
+
+marching cube algorithm을 적용하기 위해서는 local density grid가 필요하다. gaussian splatting 알고리즘의 주요 특징은 over-sized Gaussian 들은 최적화 과정에서 split 및 pruning된다는 점이다. 이는 효과적인 rasterization을 위해 culling technique을 적용 할 수 있는 근거가 된다. 또한 이 점은 block-wise density queries를 perform 할 때도 사용할 수 있다.
+
+먼저 3D space를 $(-1,1)^3$ 으로 맵핑한다. (그냥 최대, 최소값을 이용해 정규화)  그리고 이 공간을 $16^3$의 overlapping blocks(multiscale voxels)으로 나눈다. 그리고 각 블록의 외부에 위치한 gaussian들은 제외한다. 이를 통해 계산해야 할 gaussian의 총 개수를 효과적으로 줄일 수 있다. 그리고 각 블록의 내부에 $8^3$ dense grid를 만들어 최종적으로는 $128^3$의 dense grid를 만든다. grid position $\mathbf x$의 각 query는 남아있는 3D gaussian들의 opacity의 weighted sum으로 local density grid를 얻는다.
+
+$$
+d(\mathbf x)=\sum_i\alpha_i\text{exp}(-\frac{1}{2}(\mathbf x-\mathbf x_i)^T\Sigma_i^{-1}(\mathbf x-\mathbf x_i)) \tag{4}
+$$
+
+$\Sigma$는 covariance matrix로 scaling $\mathbf s$, rotation $\mathbf q$로 이루어져 있다. 이후에는 empirical threshold를 marching cube 알고리즘에 적용하여 mesh surface를 추출한다. [decimation과 remeshing](https://www.meshlab.net)을 이용해 후처리하여 더욱 자연스럽고(smoother), 간결한(compact) mesh를 만들었다.
+
+### Color Back-projection
+
+앞선 단계에서 mesh를 얻었기 때문에 rendered RGB 이미지를 mesh surface로 back-project하여 texture map으로 만들 수 있다. 
+
+<details>
+<summary>UV mapping from wikipedia</summary>
+
+:::{figure-md} 
+<img src="../../pics/DreamGaussian/image_27.png" alt="uv mapping" class="mb-1" width="400px">
+
+uv mapping
+:::
+
+먼저 mesh의 UV coordinate를 unwrap하고 빈 texture image로 초기화 한다. 그리고 8개의 azimuth, 3개의 elevation을 균일하게 선택하고 top, bottom view까지 포함하여 corresponding RGB image를 렌더링 할 수 있게 한다. 이러한 RGB 이미지들의 각 픽셀은 UV coordinate를 기반으로 texture image로 맵핑할 수 있다. 
+
+이렇게 back-project된 texture image는 다음의 texture fine-tuning 단계의 초기 설정으로 사용된다.
+</details>
+
+## 3.3 UV-space Texture Refinement
+
+:::{figure-md} 
+<img src="../../pics/DreamGaussian/image_28.png" alt="uv-space texture refinement" class="mb-1" width="400px">
+
+UV-space Texture Refinement
+:::
+
+앞선 back-projection된 coarse texture를 시작으로 texture 품질을 올리고자 했으나, SDS loss를 이용해 
+UV-space를 직접 fine-tuning 하면 위의 그림과 같은 artifact가 발생하게된다. 이는 differentiable rasterization시 사용되는 mipmap texture sampling 기법때문이다. SDS와 같이 모호한 guidance를 이용하면 각 mipmap level에 따라 over-saturation된 color block으로 gradient가 전파 된다.
+
+- mipmap in rasterization
+    
+    
+    :::{figure-md} 
+    <img src="../../pics/DreamGaussian/image_29.png" alt="rasterization" class="mb-1" width="400px">
+
+    from widipedia
+    :::
+    
+    :::{figure-md} 
+    <img src="../../pics/DreamGaussian/image_30.png" alt="rasterization" class="mb-1" width="400px">
+
+    from unity document
+    :::   
+    
+    위와 같은 고품질 렌더링와 렌더링 속도 향상을 위해 mipmap이라는 기법을 활용한다. 본 논문에서 texture mapping, rendering시 사용한 NVdiffrast도 mipmap을 활용하고 있다.
+    mipmap은 texture를 여러 레벨의 화질으로 저장하는 방식으로 mipmap level은 특정 화질의 texture version을 의미한다. 카메라로 부터 멀리 떨어진 object는 저레벨의 mipmap을 사용해 렌더링 한다. 저레벨의 mipmap은 이미 정보손실이 일어난 상태이고 이를 이용해 렌더링 되었다면 gradient가 흐르는 방향이 왜곡 될수 있다.
+    
+    ---
+    
+
+$$
+I^p_\text{fine}=f_\phi(I^p_\text{coarse}+\epsilon(t_\text{start});t_\text{start},c) \tag 5
+$$
+
+stage 2 의 학습은 image-to-image synthesis와 같은 방식으로 진행된다. initialization texture가 있으므로 임의의 camera view $p$ $p$
+
+:::{figure-md} 
+<img src="../../pics/DreamGaussian/image_31.png" alt="from SDEdit" class="mb-1" width="400px">
+
+from SDEdit
+:::
+
+$f_\phi(\cdot)$ 는 사전학습된 2D diffusion 을 통해 refined image를 얻는 multi-step denoising process를 의미한다. $\epsilon(t_\text{start})$ 는 timestep $t_\text{start}$의 랜덤 노이즈를 말한다. $c$ 는 image-to-3D를 위한 condition인 카메라 포즈 변화량 $\Delta p$ 이고, $e$는 text-to-3D의 condition을 말한다. 시작 timestep $t_\text{start}$는 noise 강도를 제한하기 위해 신중히 선택되어야 refined image의 original content를 유지하며 detail을 향상 시킬수 있다고 한다. refined image는 이후 pixel-wise MSE loss에 texture 최적화를 위해 사용된다. 
+
+$$
+\mathcal L_\text{MSE}=\|I^p_\text{fine}-I^p_\text{coarse}\|^2_2 \tag 6
+$$
+
+image-to-3D task에서는 reference view RGBA loss $\mathcal L_\text{Ref}$ 를 적용했다. 
+
+실험 결과에 따르면 50 step 정도 만에 대부분 detail이 좋아졌다고 하며 반복횟수를 늘릴수록 texture의 detail이 향상되었다고 합니다.
+
+# 4. Experiments
+
+## 4.1 Implementation Details
+
+- Number of iterations
+    - first stage - 500 steps
+    - second stage - 50 steps
+- 3D Gaussian initialization
+    - number - 5000 for image-to-3D, 1000 for text-to-3D
+    - opacity - 0.1
+    - color - grey
+    - radius - 0.5
+- Rendering resolution - 64 to 512 for gaussian splatting, 128 to 1024 for mesh
+- Loss weights in eq(2) - RGB, transperency 가중치($\lambda_\text{RGB}, \lambda_A$)는 0에서 부터 각각 10000, 1000로 linearly increasing
+- Camera pose sampling - fixed radius 2 for image-to-3D / 2.5 for text-to-3D,
+                                           y-axis FOV 49 degree, 
+                                           azimuth in $[-180,180]$ degree, elevation in $[-30,30]$.
+
+:::{figure-md} 
+<img src="../../pics/DreamGaussian/image_32.png" alt="Horizontal coordinates from wikipedia" class="mb-1" width="300px">
+
+Horizontal coordinates from wikipedia
+:::
+
+:::{figure-md} 
+<img src="../../pics/DreamGaussian/image_33.png" alt="Horizontal coordinates from wikipedia" class="mb-1" width="300px">
+
+[출처](https://www.epd.gov.hk/eia/register/report/eiareport/eia_2522017/EIA/html/Appendix/Appendix%2011.1.pdf)
+:::
+
+- Background color - white or black randomly for gaussian splatting
+- Run-time - 1min/stage for image-to-3D 
+                   2min/stage for text-to-3D w. Stable Diffusion $512\times512$ resolution
+- Marching cube threshold - 1
+- GPU - NVIDIA V100(16GB), less than 8GB for this experiments
+
+## 4.2 Qualitative Comparison
+
+**Image-to-3D comparison**
+
+:::{figure-md} 
+<img src="../../pics/DreamGaussian/image_34.png" alt="실험결과" class="mb-1" width="400px">
+
+실험결과
+:::
+
+**Text-to-3D comparison**
+
+:::{figure-md} 
+<img src="../../pics/DreamGaussian/image_35.png" alt="실험결과" class="mb-1" width="400px">
+
+실험결과
+:::
+
+최적화를 진행하는 방법론 뿐만 아니라 inference-only 방법론들과 비교해도 매우 빠른 생성 속도를 보였다고 함.
+
+:::{figure-md} 
+<img src="../../pics/DreamGaussian/image_36.png" alt="실험결과" class="mb-1" width="400px">
+
+실험결과
+:::
+
+이렇게 뽑아낸 mesh는 blender와 같은 리깅 툴을 이용해 애니메이팅 가능.
+
+## 4.3 Quantitative Comparison
+
+:::{figure-md} 
+<img src="../../pics/DreamGaussian/image_37.png" alt="실험결과" class="mb-1" width="400px">
+
+실험결과
+:::
+:::{figure-md} 
+<img src="../../pics/DreamGaussian/image_38.png" alt="실험결과" class="mb-1" width="400px">
+
+실험결과
+:::
+
+## 4.4 Ablation Study
+
+:::{figure-md} 
+<img src="../../pics/DreamGaussian/image_39.png" alt="실험결과" class="mb-1" width="400px">
+
+실험결과
+:::
+
+논문에서 제안하는 파이프라인의 모든 과정이 필요함을 보여줌.
+
+- Periodical densificaiton of 3D Gaussians
+- Linear annealing of timestep t for SDS loss
+- Effect of the reference view loss $\mathcal L_{\text{Ref}}$
+
+# 5. Limitations and Conclusion
+
+- 3D content generation framework인 DreamGaussian을 통해 3D content 생성을 효율성을 증대.
+- 3D Gaussian으로 부터 mesh를 추출하는 알고리즘 제안.
+- texture fine-tuning stage를 통해 image나 text로 부터 고품질의 polygonal mesh생성 가능.
+
+:::{figure-md} 
+<img src="../../pics/DreamGaussian/image_40.png" alt="실험결과" class="mb-1" width="400px">
+
+실험결과
+:::
+
+아래와 같은 기존 방법론들의 문제점들을 여전히 가지고 있으나 score debiasing/ camera-conditioned 2D diffusion models/ BRDF auto-encoder와 같은 방법을 도입하면, 개선가능할 것으로 기대함.
+
+- Janus prob
+- over saturated texture
+- baked lighting
+
+덧붙여 texture refinement를 진행하는 stage 2에서 blurry한 결과를 얻을 수 있으나 학습을 더 진행하면 개선된다고 함.
\ No newline at end of file
diff --git a/_sources/docs/review/DreamPose.md b/_sources/docs/review/DreamPose.md
old mode 100644
new mode 100755
diff --git a/_sources/docs/review/GIGAGAN.md b/_sources/docs/review/GIGAGAN.md
old mode 100644
new mode 100755
diff --git a/_sources/docs/review/GLIDE.md b/_sources/docs/review/GLIDE.md
old mode 100644
new mode 100755
diff --git a/_sources/docs/review/HyperDreamBooth.md b/_sources/docs/review/HyperDreamBooth.md
old mode 100644
new mode 100755
diff --git a/_sources/docs/review/I-DDPM.md b/_sources/docs/review/I-DDPM.md
old mode 100644
new mode 100755
diff --git a/_sources/docs/review/IP_Adapter.md b/_sources/docs/review/IP_Adapter.md
old mode 100644
new mode 100755
diff --git a/_sources/docs/review/LCM-LoRA.md b/_sources/docs/review/LCM-LoRA.md
old mode 100644
new mode 100755
diff --git a/_sources/docs/review/LLM_grounded_Diffusion.md b/_sources/docs/review/LLM_grounded_Diffusion.md
old mode 100644
new mode 100755
diff --git a/_sources/docs/review/Latent_Diffusion_Model.md b/_sources/docs/review/Latent_Diffusion_Model.md
old mode 100644
new mode 100755
diff --git a/_sources/docs/review/LoRA.md b/_sources/docs/review/LoRA.md
old mode 100644
new mode 100755
diff --git a/_sources/docs/review/Make_A_Video.md b/_sources/docs/review/Make_A_Video.md
old mode 100644
new mode 100755
diff --git a/_sources/docs/review/MimicBrush.md b/_sources/docs/review/MimicBrush.md
old mode 100644
new mode 100755
diff --git a/_sources/docs/review/Muse.md b/_sources/docs/review/Muse.md
old mode 100644
new mode 100755
diff --git a/_sources/docs/review/NeRF.md b/_sources/docs/review/NeRF.md
old mode 100644
new mode 100755
diff --git a/_sources/docs/review/Point_E.md b/_sources/docs/review/Point_E.md
old mode 100644
new mode 100755
diff --git a/_sources/docs/review/ProlificDreamer.md b/_sources/docs/review/ProlificDreamer.md
new file mode 100755
index 00000000..e4b83ef2
--- /dev/null
+++ b/_sources/docs/review/ProlificDreamer.md
@@ -0,0 +1,398 @@
+```{admonition} Information
+- **Title:** ProlificDreamer: High-Fidelity and Diverse Text-to-3D Generation with Variational Score Distillation (NeurIPS 2023 Spotlight)
+
+- **Reference**
+    - Paper: [https://arxiv.org/abs/2305.16213](https://arxiv.org/abs/2305.16213)
+    - Code: [https://github.com/thu-ml/prolificdreamer?tab=readme-ov-file](https://github.com/thu-ml/prolificdreamer?tab=readme-ov-file)
+
+- **Author:** Kyeongmin Yu
+
+- **Last updated on Dec. 26, 2024**
+```
+
+
+# ProlificDreamer: High-Fidelity and Diverse Text-to-3D Generation with Variational Score Distillation
+
+- view generation이 가능한 DM의 특성을 3D rendering 모델로 전달하여 pretrained 된 DM이 생성하는 이미지 분포와 3D representaiton의 분포를 맞춰가는 것으로 Dream Fusion(SDS)과 유사하지만 개선된 아이디어(VSD)를 제안하고자한 논문이다.
+
+:::{figure-md}
+<img src="../../pics/ProlificDreamer/image.png" alt="prolificdreamer_1" class="mb-1" width="700px">
+
+Prolific Dreamer Overview
+:::
+
+📌  **Prolific Dreamer 2-stage approach**
+
+1. optimize a high-resolution NeRF by **VSD**
+2. geometry optimization of mesh from NeRF with **SDS** (optional)
+
+appendix를 참고하면, triangle 크기가 비교적 클때 VSD와 SDS의 차이가 크지 않으므로 SDS를 사용했다고 하며, 더 섬세한 mesh의 경우 VSD가 SDS에 비해 표현력이 좋을것으로 믿는다고 함.
+
+
+:::{figure-md}
+<img src="../../pics/ProlificDreamer/image_1.png" alt="prolificdreamer_2" class="mb-1" width="400px">
+
+ProlificDreamer vs. DreamFusion 정성적 결과
+:::
+
+:::{figure-md}
+<img src="../../pics/ProlificDreamer/image_2.png" alt="prolificdreamer_3" class="mb-1" width="400px">
+
+ProlificDreamer vs. DreamFusion 정성적 결과 
+:::
+
+
+# 1 Introduction
+
+고품질의 3D content를 생산하는 것은 품이 많이 드는 일이다. 이러한 어려움을 해결하기 위해 text-to-3D 방식이 발전하고 있다. texture 기술을 기반으로 3D content 생성을 자동화하는 방식은 유망하며 다양한 분야(encompassing architecture, animation, gaming, AR/VR)  paradigm의 변화를 일으킬 수 있을 것이다. 
+
+Diffusion model의 text-to-image 생성력을 바탕으로 3D content를 생성하려는 DreamFusion과 같은 시도가 있었다. Dream Fusion은  **Score Distillation Sampling(SDS)** 알고리즘을 이용해 단일 3D representation을 최적화 했다. 이는 어떤 방향에서 렌더링된 이미지라도 주어진 text에 대해 높은 likelihood를 갖도록 한것으로써 diffusion model에 의해 evaluate되었다. (diffusion model을 loss에 활용) 하지만 over-satuation, over-smoothing, low-diversity 문제가 발생했다. 또한 text-to-3D의 design space에서 orthogonal 한 요소(rendering resolution - distillation time schedule)들에 대한 파악이 아직 부족하다. 
+
+본 논문에서는 섬세한 3D representation을 얻기 위해 이러한 모든 요소에 대해 systematic study를 진행한다. 먼저 **Variational Score Distillation(VSD)** 를 제안한다. 이는 주어진 textual prompt와 해당 3D scene을 하나의 random variable로 취급하며 하나의 점(single point)로 취급한 SDS와는 다르다. VSD는 3D scene의 분포를 최적화하며 이러한 분포는 모든 시점에 대해 rendered images의 분포가 서로 최대한 가까워지도록 한다. pretrained 2D diffusion model의 경우에는 KL divergence에 해당한다?! 이러한 variational formulation에서 VS는 multiple 3D scene을 하나의 prompt에 정렬할 수 있는 특성을 가질 수 있다.
+
+- 원문참고
+    
+    VSD optimizesa distribution of 3D scenes such that the distribution induced on images rendered from all views aligns as closely as possible, in terms of KL divergence, with the one defined by the pretrained 2D diffusion model (see Sec. 3.1). Under this variational formulation, VSD naturally characterizes the phenomenon that multiple 3D scenes can potentially align with one prompt.
+    
+
+해당 variational formulation을 효율적으로 풀기 위해 VSD는 **particle-based variational inference**를 도입하고, 3D representation을 표현하기 위해 하나의 3D parameters 집합을 particles로 표현하였다. **Wasserstein gradient flow**를 이용해 이러한 particles로 부터 새로운 gradient-based update rule을 이끌어 냈다. 이는 최적화 수렴 후, 해당 particles가 desired distribution으로 부터 sample된 것임을 보장한다. update 시에는 diffused rendered images의 분포의 score function이 필요한데 이는 **pretrained diffusion model + low-rank adaptation(LoRA)** 로 얻을 수 있었다. 최종적으로 particles과 score function을 업데이트 하는 형태가 된다.
+
+Sec 3.3에서 SDS는 variational distribution에 single-point Dirac distribution을 사용하는 VSD라고 볼 수 있음을 보인다. 이를 통해 SDS가 diversity가 fidelity가 낮은 이유를 알 수 있다. single particle만으로도 VSD는 parametric score model을 학습할 수 있고 잠재적으로 SDS보다 더 나은 생성 결과를 제공할 수 있다. 또한 동일한 렌더링 함수를 이용해 2D space에서 SDS와 VSD를 비교하여 3D 요소만 분리하여 비교한 결과를 담았다. diffusion model의 고전 샘플링 방식과 같이 VSD는 CFG의 가중치 조절을 통해 보다 사실적인 sample을 생성할 수 있다. 반면 SDS는 이전 text-to-3D연구과 유사한 over-saturation, over-smoothing문제를 보이는 부족한 결과를 보였다.
+
+:::{figure-md}
+<img src="../../pics/ProlificDreamer/image_3.png" alt="prolificdreamer_3" class="mb-1" width="300px">
+
+Prolific Dreamer 
+:::
+
+$$
+\delta(x)=\begin{cases}0, &{x!=0} \\ \infty, &{x=0} \end{cases}
+$$
+
+$$
+\int_{-\infty} ^\infty \delta(x) dx=1
+$$
+
+Sec 4는 text-to-3D 알고리즘의 orthogonal 요소들에 대한 추가적인 **systematically study**와 clear **design space**를 담고 있다. 특히 훈련과정 중 고화질 렌더링과 시각적 품질 개선을 위한 **annealed distilling time schedule**을 제안한다. 또한 복잡한 scene을 표현하기 위한 s**cene initialization**을 제안한다. 요소들에 대한 ablation study는 Sec 5에서 볼 수 있으며, 앞서 언급한 요소들은 VSD에 효과적임을 보인다. 결론적으로 high-fidelity, diverse 3D 결과를 얻을 수있으며 이를 **ProlificDreamer**라고 한다.
+
+Sec 5에서 ProlificDreamer의 고화질(512x512) rendering 능력과 rich structure와 complex effects를 Neural Radiance Fields(NeRF)상에서 표현할 수 있음을 보인다. ProlificDreamer는 다중 물체가 포함된 복잡한 scene의 360도 전방향을 성공적으로 표현하는 것에 처음으로 성공했다. 게다가 NeRF로 초기화 한 후 ProlificDreamer로 세세하고 photorealistic한 3D texture mesh들을 생성할 수있다.
+
+:::{figure-md}
+<img src="../../pics/ProlificDreamer/image_4.png" alt="prolificdreamer_5" class="mb-1" width="700px">
+
+Prolific Dreamer  생성 결과
+:::
+
+# 2. Background
+
+### Diffusion models (DMs)
+
+**Score-Based Generative Modeling through Stochastic Differential Equations**
+
+- **forward process** $\{q_t\}_{t\in[0,1]}$ - gradually add noise to a data point $x_0\sim p_0(x_0)$
+    
+    $$
+    q_t(x_t|x_0):=\mathcal N(\alpha_tx_0,\sigma_t^2 I)\\q_t(x_t):=\int q_t(x_t|x_0)q_0(x_0) dx_0
+    $$
+    
+    $\alpha_t, \sigma_t >0$ 는 hyperparameter로 $\alpha_0\approx 1,\sigma_0\approx 0,\alpha_1\approx 0, \sigma_t\approx 1$ 이다.
+    
+- **reverse process** $p_t$ - denoising from $p_1(x_1):=\mathcal N(0,I)$ by predicting the noise added to a clean data $x_0$
+    
+    noise prediction network $\epsilon_\phi(x_t,t)$을 학습하는 과정은 아래와 같다.
+    
+    $$
+    \mathcal L_\text{Diff}(\phi):=\Bbb E_{x_0\sim q_0(x_0),t\sim\mathcal U(0,1),\epsilon \sim \mathcal N(o,I)}\Big[ \omega(t)\|\epsilon_\phi(\alpha_t x_0+\sigma_t\epsilon)-\epsilon\|_2^2\Big], \tag 1
+    $$
+    
+    $\omega(t)$는 time dependent weighting function이다. 훈련이 끝나면 $p_t\approx q_t$ 가 되며 따라서 $p_0\approx q_0$ 으로 sample들을 그릴 수 있게 된다. 덧붙여 noise prediction network는 $p_t, q_t$ 의 score function을 approximating하는 데에도 사용가능하다. $\triangledown_{x_t}\text{log}q_t(x_t)\approx\triangledown_{x_t}\text{log}p_t(x_t)\approx-\epsilon_\phi(x_t,t)/\sigma_t$
+    
+
+diffusion model이 가장 활발히 활용되고 있는 분야 중 하나는 text-to-image generation으로 text prompt $y$를 조건으로 noise를 예측한다. 또한 Classifier-free Guidence를 통해 샘플 품질과 다양성을 조절한다. guidance scale이 커질 수록 품질이 올라가지만 다양성이 감소하는 경향을 보인다.
+
+### **Text-to-3D by score distillation sampling**
+
+- **Score Distillation Sampling (SDS)** from DreamFusion
+- **Score Jacobian Chaining (SJC)** 라고도 불리며 Zero-1-to-3, Magic3d, Fantasia3d, Latent NeRF 등 다양한 연구에 활용되고 있다.
+
+사전학습된 T2I diffusion model $p_t(x_t|y)$과 noise prediction network $\epsilon_\text{pretrained}(x_t,t,y)$ 을 이용해 SDS는 single 3D representation의 parameter $\theta \in \Theta$를 최적화 한다. 이때 사용하는 metric은 **Euclidean metric**으로 $\Theta$는 Euclidean space이다.
+
+camera parameter $c$가 분포 $p(c)$를 따르고, 
+differentiable rendering mapping $g(\cdot,c):\Theta \rightarrow \Bbb R^d$ 이 주어진다고 하자. 
+$y^c$를 view dependent prompt라고 하면, rendering image $g(\theta,c)$에서 시작하는 forward diffusion process는 $q_t^\theta(x_t|c)$로 표현할 수 있다. 
+
+SDS는 parameter $\theta$를 아래와 같이 최적화한다.
+
+$$
+\mathcal L_{\text{SDS}}(\theta):=\Bbb E_{t,c}\Big [\frac{\sigma_t}{\alpha_t}\space\omega(t)\space D_\text{KL}(q_t^\theta(x_t|c)\|p_t(x_t|y^c)\Big] \tag{2}
+$$
+
+$$
+\mathcal L_{\text{SDS}}(\theta)\approx\Bbb E_{t,\epsilon,c}\Big [\omega(t)\space \big(\epsilon_\text{pretrained}(x_t,t,y^c)-\epsilon\big)\frac{\partial g(\theta,c)}{\partial\theta}\Big] \tag{3}
+$$
+
+### 3D representations
+
+- **NeRF** → MLP
+    - multilayer perceptron을 이용해 3D 객체를 표현한다. 3차원 공간상의 위치 정보를 입력하면 해당하는 색과 밀도를 얻을 수 있다. 이때 $\theta$는 MLP의 내부 파라미터를 의미한다.
+    - 카메라 위치 $c$가 주어질때, rendering process $g(\theta,c)$는 casting rays로 정의되며 각 ray의 sampling points의 색을 가중합하여 각 픽셀의 값을 결정한다.
+    - NeRF는 최적화 측면에서 유연하고 복잡한 장면도 표현가능하다. (매우 상대적인 표현으로 사료됨.)
+- **Textured mesh** → triangle mesh + texture
+    - triangle mesh와 해당 mesh 표면의 texture, color로 3D 객체를 표현한다. 여기서 3D parameter $\theta$는 triangle meshes의 좌표와 texture parameter를 의미한다.
+    - 카메라 위치 $c$가 주어질때, rendering process $g(\theta,c)$는 casting rays로 정의되며 각 ray가 지나는 mesh의 intersection의 색을 계산함으로써 각 픽셀의 값을 결정한다.
+    - Textured mesh는 고화질 렌더링이 가능하고 differentiable rasterization을 이용하면 렌더링 속도가 빠르다.
+
+# 3. Variational Score Distillation
+
+:::{figure-md}
+<img src="../../pics/ProlificDreamer/image_5.png" alt="prolificdreamer_5" class="mb-1" width="700px">
+
+Prolific Dreamer 
+:::
+
+### 3.1 Sampling from 3D Distribution via Variational Inference
+
+3D represetation에 사용되는 parameter $\theta$들은 확률밀도 $\mu(\theta|y)$로 모델링 할 수 있다.
+$q_0^\mu(x_0|c,y)$는 rendered image $x_0;=g(\theta,c)$의 분포, $p_0(x_0|y^c)$는 $t=0$ 일때 marginal distribution이다. 
+
+고품질의 3D representation을 얻기 위해서 distribution $\mu$를 최적화 하는 방법을 제안한다. 사전학습된 DM을 이용해 모든 view에 대한 rendered image sample과 distribution $\mu$를 정렬(align)하는 것은 아래와 같이 두 분포의 거리를 좁히는 것이라고 할 수 있다. 
+
+$$
+\text{min}_\mu D_\text{KL}\big(q_0^\mu(x_0|c,y)\|p_0(x_0|y^c)\big) \tag{4}
+$$
+
+- SDS eq.2
+    
+    $$
+    \mathcal L_{\text{SDS}}(\theta):=\Bbb E_{t,c}\Big [({\sigma_t}/{\alpha_t})\space\omega(t)\space D_\text{KL}(q_t^\theta(x_t|c)\|p_t(x_t|y^c)\Big] \tag{2}
+    $$
+    
+
+위의 식은 일반적인 variational inference problem으로 variational distribution $q_0^\mu(x_0|c,y)$을 target distribution $p_0(x_0|y^c)$으로 근사(distill)하는 방식을 사용한다.
+
+위의 식 4의 prob을 직접 푸는것은 복잡하고 비효율적이기 때문에 diffusion model을 이용해 series of optimization problem을 통해 해결하고자 한다. $t$가 $T$를 향해 점점 커질때, 위의 최적화 문제는 diffused distribution이 gaussian distribution에 가까워 지며 점점 쉬워진다.
+
+(기존 SDS 최적화 식의 parameter $\theta$가 distribution $\mu$로 바뀐 형태)
+
+$$
+\mu^*:=\text{argmin}_\mu\Bbb E_{t,c}\Big[ (\sigma_t/\alpha_t)\omega(t)D_{KL}(q_t^\mu(x_t|c,y)\|p_t(x_t|y^c))\Big] \tag5
+$$
+
+- SDS eq.2
+    
+    $$
+    \mathcal L_{\text{SDS}}(\theta):=\Bbb E_{t,c}\Big [({\sigma_t}/{\alpha_t})\space\omega(t)\space D_\text{KL}(q_t^\theta(x_t|c)\|p_t(x_t|y^c)\Big] \tag{2}
+    $$
+    
+
+### 3.2 Update Rule for Variational Score Distillation
+
+식 5의 prob을 풀기위해 또 다른 생성모델을 훈련하여 풀 수 있는데 이는 resource가 많이 필요하고 최적화 과정이 복잡해진다. 앞선 particle-based variational inference 연구와 유사하게,  n개의 3D particles를 유지하고 해당 particles을 위한 새로운 update rule을 제안한다. 즉, $\{\theta\}^n_{i=1}$을 현재 distribution $\mu$를 표현하기 위해 사용하는 것이다. $\theta^{(i)}$는 최적화 과정이 수렴되고 하면 최적 분포 $\mu^*$에서 샘플링된 것이 된다. 
+
+$$
+\frac{d\theta_\tau}{d\tau}=-\Bbb E_{t,\epsilon,c}\Big[\omega(t)\big(-\sigma_t\triangledown_{x_t} \text{log}p_t(x_t|y^c)-(-\sigma_t\triangledown_{x_t}\text{log}q_t^{\mu_\tau}(x_t|c,y))\big)\frac{\partial g(\theta_\tau,c)}{\partial\theta_\tau} \tag 7
+$$
+
+$$
+\text{min}_{\phi}\sum^n_{i=1}\Bbb E_{t\sim\mathcal U(0,1),\epsilon\sim\mathcal N(o,I),c\sim p(c)}\Big[\|\epsilon_\phi(\alpha_tg(\theta^{(i)}),c)+\sigma_t\epsilon,t,c,y)-\epsilon\|^2_2\Big] \tag 8
+$$
+
+최종적으로는 아래와 같은 objective function을 얻는다.
+
+$$
+\triangledown_\theta\mathcal L_{VSD}(\theta)\triangleq\Bbb E_{t,\epsilon,c}\Big[\omega(t
+)(\epsilon_{\text{pretrain}}(x_t,t,y^c)-\epsilon_\phi(x_t,t,c,y))\frac{\partial g(\theta,c)}{\partial\theta}\Big]\tag {9}
+$$
+
+:::{figure-md}
+<img src="../../pics/ProlificDreamer/image_6.png" alt="prolificdreamer_6" class="mb-1" width="700px">
+
+Prolific Dreamer 
+:::
+
+### 3.3 Comparison with SDS
+
+**SDS as a special case of VSD**
+
+$$
+\mathcal L_{\text{SDS}}(\theta)\approx\Bbb E_{t,\epsilon,c}\Big [\omega(t)\space \big(\epsilon_\text{pretrained}(x_t,t,y^c)-\epsilon\big)\frac{\partial g(\theta,c)}{\partial\theta}\Big] \tag{SDS}
+$$
+
+$$
+\triangledown_\theta\mathcal L_{VSD}(\theta)\triangleq\Bbb E_{t,\epsilon,c}\Big[\omega(t
+)(\epsilon_{\text{pretrain}}(x_t,t,y^c)-\epsilon_\phi(x_t,t,c,y))\frac{\partial g(\theta,c)}{\partial\theta}\Big]\tag {VSD}
+$$
+
+SDS는 $\mu(\theta|y)\approx \delta(\theta-\theta^{(1)})$ 인 VSD의 special case에 해당한다. VSD는 potential mutliple particles일 뿐 아니라 parametric score function $\epsilon_\phi$도 학습하기 때문에 SDS와 동일하게 single particle을 사용해도 성능이 좋다. 또한 LoRA를 사용해 text prompt 로 부터 추가적으로 뽑아낸 정보를 estimation $\epsilon_\phi(x_t,t,c,y)$에 반영할 수 있다.
+    
+:::{figure-md}
+<img src="../../pics/ProlificDreamer/image_7.png" alt="prolificdreamer_7" class="mb-1" width="300px">
+
+particle이 뭘까? 느낌적인 느낌을 받아보자.
+:::
+    
+
+**VSD is friendly to CFG**
+
+VSD는 사전학습된 diffusion model을 이용해 optimal $\mu^*$에서 sample $\theta$를 추출하고자 한다. 때문에 3D sampling에서 CFG를 tuning한 효과가 기존 2D이미지 생성시 DPM-solver에서 CFG 값을 조절하는 것과 유사하다. 그래서 CFG 값을 조절하면서 더 다양한 실험결과를 얻을 수 있게 된다. SDS도 이점은 마찬가지 이나, CFG 값이 클때만 유효한 3D content를 만들어 낼 수 있었다.
+
+**VSD vs. SDS in 2D experiments that isolate 3D representations**
+
+동일한 rendering 모델을 이용해 VSD와 SDS의 3D 표현력만 비교한 결과이다. 
+
+:::{figure-md}
+<img src="../../pics/ProlificDreamer/image_8.png" alt="prolificdreamer_8" class="mb-1" width="700px">
+
+particle이 뭘까? 느낌적인 느낌을 받아보자.
+:::
+
+<details>
+<summary>Appendix의 실험결과</summary>
+    
+:::{figure-md}
+<img src="../../pics/ProlificDreamer/image_9.png" alt="prolificdreamer_9" class="mb-1" width="700px">
+
+particle 개수에 따른 생성 퀄리티 비교 (single particle을 사용할 때도 SDS보다 성능이 좋다고 함)
+:::
+
+:::{figure-md}
+<img src="../../pics/ProlificDreamer/image_10.png" alt="prolificdreamer_10" class="mb-1" width="700px">
+
+2D 이미지 생성으로 비교한 VSD와 SDS의 생성 품질  차이 / SDS는 VSD에 비해 부드럽고 세부표현이 부족하다.
+:::
+
+
+:::{figure-md}
+<img src="../../pics/ProlificDreamer/image_11.png" alt="prolificdreamer_11" class="mb-1" width="700px">
+
+gradient visualization에서도 SDS와 VSD의 차이점을 확인 할 수 있다.
+:::
+</details>
+
+# 4. Prolific Dreamer
+
+### 4.1 Design Space of Text-to-3D Generation
+
+**two-stage approch**를 이용해 text-to-3D 생성의 design space를 개선하고자 했다.
+
+1. **First Stage** - optimize a high-resolution NeRF by VSD
+2. **Second Stage** - DMTet to extract textured mesh from NeRF
+
+:::{figure-md}
+<img src="../../pics/ProlificDreamer/image_12.png" alt="prolificdreamer_12" class="mb-1" width="700px">
+
+Prolific Dreamer와 다른 모델의 특성 비교
+:::
+
+### 4.2 3D Representation and Training
+
+:::{figure-md}
+<img src="../../pics/ProlificDreamer/image_13.png" alt="prolificdreamer_13" class="mb-1" width="700px">
+
+Prolific Dreamer 수행 결과
+:::
+
+**High-resolution rendering for NeRF training** (in 1st stage)
+
+ProlificDreamer에서는 고화질 렌더링을 위해 **Instant NGP**를 사용했으며 VSD를 이용해 512 resolution 까지 NeRF를 최적화 했다. VSD를 사용함으로써 high-fidelity 결과를 얻을 수 있었다.
+
+- Instant NGP
+
+**Scene initialization for NeRF training** (in 1st stage)
+
+NeRF의 초기 density는 $\sigma_\text{init}(\mu)=\lambda_\sigma(1-\frac{\|\mu\|_2}{r})$로 초기화 한다. $\lambda$ 는 density strength, $r$ 는 density radius, $\mu$는 3d coordinate이다. 
+
+object-centric scene에서는 Magic3D의 방식을 따랐으며($\lambda=10, r=0.5$), 
+복잡한 scene의 경우  $\lambda=-10$ 로 하여 density가 거의 비어있도록 하고, $r$을 2.5로 하여 camera를 둘러 싸도록 했다.
+
+**Annealed time schedule for score distillation** (in 1st stage)
+
+단순한 2단계 annealing을 score distillation objective에 적용했다. 이는 SDS나 VSD 모두에 적용가능하다. 초기 몇 스텝에서는 $t\sim \mathcal U(0.02,0.98)$로 하고 이후에는 $t\sim \mathcal U(0.02,0.50)$로 설정했다.
+
+여기서 핵심은 $q_0^\mu(x_0|c,y)$와 $p_0(x_0|y^c)$를 맞추는 것인데 t가 커지면 KL divergence가 학습초기에 더 적당한 최적화 방향으로 갈 수 있다. t가 작으면 더 세부적인 조정이 가능하므로 $p_t(x^*|y^c)$와  $p_0(x^*|y^C)$의 차를 더 줄일 수 있다.
+
+**Mesh representation and fine-tuning** (in 2nd stage)
+
+coordinate-based hash grid encoder의 특성을 이용해 NeRF에서 mesh를 추출했다. Fantasia3D의 방법론을 따랐는데 여기서는 geometry와 texture를 분리하여 최적화했다. 첫번째로는 normal map을 이용해 geometry를 최적화하고 두번째로 texture를 최적화하는 식이다. 실험결과에서 이단계에서는 SDS와 VSD의 품질 차이가 크지않아 효율성을 위해 SDS를 사용했다. 하지만 Fantasia3D와 비교했을때 VSD 및 앞선 방법론을 이용해 최적화한 NeRF에서 뽑아낸 mesh는 SDS를 이용한 것보다 뛰어났다.
+
+# 5. Experiments
+
+### 5.1 Results of Prolific Dreamer
+
+<details>
+<summary>Appendix의 실험결과</summary>
+    
+:::{figure-md}
+<img src="../../pics/ProlificDreamer/image_9.png" alt="prolificdreamer_9" class="mb-1" width="700px">
+
+particle 개수에 따른 생성 퀄리티 비교 (single particle을 사용할 때도 SDS보다 성능이 좋다고 함)
+:::
+
+:::{figure-md}
+<img src="../../pics/ProlificDreamer/image_10.png" alt="prolificdreamer_10" class="mb-1" width="700px">
+
+2D 이미지 생성으로 비교한 VSD와 SDS의 생성 품질  차이 / SDS는 VSD에 비해 부드럽고 세부표현이 부족하다.
+:::
+
+
+:::{figure-md}
+<img src="../../pics/ProlificDreamer/image_11.png" alt="prolificdreamer_11" class="mb-1" width="700px">
+
+gradient visualization에서도 SDS와 VSD의 차이점을 확인 할 수 있다.
+:::
+</details>
+    
+
+### 5.2 Ablation Study
+
+**Ablation on NeRF Training**
+
+64x64 rendering + SDS에서 시작하여 요소들을 추가하며 실험한 결과는 아래와 같다. 
+:::{figure-md}
+<img src="../../pics/ProlificDreamer/image_14.png" alt="prolificdreamer_14" class="mb-1" width="700px">
+
+Prolific Dreamer 실험 결과
+:::
+
+**Ablation on mesh fine-tuning**
+
+:::{figure-md}
+<img src="../../pics/ProlificDreamer/image_15.png" alt="prolificdreamer_15" class="mb-1" width="700px">
+
+Prolific Dreamer 실험 결과
+:::
+
+**Ablation on CFG**
+
+CFG 값이 작으면 diversity 상승, CFG 값이 크면 비교적 diversity가 하락하는 실험결과를 얻음. VSD의 경우 CFG 값이 작을때에도 좋은 품질의 3D content를 생성할 수있기 때문에 충분한 diversity의 결과를 얻을 수 있지만 SDS의 경우 CFG 값이 커야만 괜찮은 3D content를 생성하기 때문에 diversity가 하락할 수밖에 없음.
+
+:::{figure-md}
+<img src="../../pics/ProlificDreamer/image_16.png" alt="prolificdreamer_16" class="mb-1" width="700px">
+
+Prolific Dreamer 실험 결과
+:::
+
+:::{figure-md}
+<img src="../../pics/ProlificDreamer/image_17.png" alt="prolificdreamer_17" class="mb-1" width="700px">
+
+Prolific Dreamer 실험 결과
+:::
+
+:::{figure-md}
+<img src="../../pics/ProlificDreamer/image_18.png" alt="prolificdreamer_18" class="mb-1" width="700px">
+
+Prolific Dreamer 실험 결과
+:::
+
+
+    📌개인적 감상
+
+    GAN : WGAN = DreamFusion : ProlificDreamer
+
+    GAN : Diffusion = DreamFusion : ProlificDreamer
+
+
+# Future Work
+
+- GECO
+    - 고정된 수의 particle을 이용하는 prolific dreamer의 단점을 보완해 새로운 샘플을 생성할 수 있도록 함.
\ No newline at end of file
diff --git a/_sources/docs/review/SDEdit.md b/_sources/docs/review/SDEdit.md
old mode 100644
new mode 100755
diff --git a/_sources/docs/review/SDXL.md b/_sources/docs/review/SDXL.md
old mode 100644
new mode 100755
diff --git a/_sources/docs/review/Shap-E.md b/_sources/docs/review/Shap-E.md
old mode 100644
new mode 100755
diff --git a/_sources/docs/review/StyO.md b/_sources/docs/review/StyO.md
old mode 100644
new mode 100755
diff --git a/_sources/docs/review/StyleGAN.md b/_sources/docs/review/StyleGAN.md
old mode 100644
new mode 100755
diff --git a/_sources/docs/review/Synthetic_Data_from_Diffusion_Models_Improves_ImageNet_Classification.md b/_sources/docs/review/Synthetic_Data_from_Diffusion_Models_Improves_ImageNet_Classification.md
old mode 100644
new mode 100755
diff --git a/_sources/docs/review/Textual_Inversion.md b/_sources/docs/review/Textual_Inversion.md
old mode 100644
new mode 100755
diff --git a/_sources/docs/review/VideoLDM.md b/_sources/docs/review/VideoLDM.md
old mode 100644
new mode 100755
diff --git a/_sources/docs/review/Your_Diffusion_Model_is_Secretly_a_Zero_Shot_Classifier.md b/_sources/docs/review/Your_Diffusion_Model_is_Secretly_a_Zero_Shot_Classifier.md
old mode 100644
new mode 100755
diff --git a/_sources/docs/review/consistency_models.md b/_sources/docs/review/consistency_models.md
old mode 100644
new mode 100755
diff --git a/_sources/docs/review/cycleGAN.md b/_sources/docs/review/cycleGAN.md
old mode 100644
new mode 100755
diff --git a/_sources/docs/review/dalle.md b/_sources/docs/review/dalle.md
old mode 100644
new mode 100755
diff --git a/_sources/docs/review/diffusion_beats_GANs.md b/_sources/docs/review/diffusion_beats_GANs.md
old mode 100644
new mode 100755
diff --git a/_sources/docs/review/dreambooth.md b/_sources/docs/review/dreambooth.md
old mode 100644
new mode 100755
diff --git a/_sources/docs/review/gan.md b/_sources/docs/review/gan.md
old mode 100644
new mode 100755
diff --git a/_sources/docs/review/imagen.md b/_sources/docs/review/imagen.md
old mode 100644
new mode 100755
diff --git a/_sources/docs/review/imagen_editor.md b/_sources/docs/review/imagen_editor.md
old mode 100644
new mode 100755
diff --git a/_sources/docs/review/latent_consistency_models.md b/_sources/docs/review/latent_consistency_models.md
old mode 100644
new mode 100755
diff --git a/_sources/docs/review/magic-3d.md b/_sources/docs/review/magic-3d.md
old mode 100644
new mode 100755
diff --git a/_sources/docs/review/one-step-image-translation.md b/_sources/docs/review/one-step-image-translation.md
old mode 100644
new mode 100755
diff --git a/_sources/docs/review/one_step_diffusion_with_distribution_matching_distillation.md b/_sources/docs/review/one_step_diffusion_with_distribution_matching_distillation.md
new file mode 100755
index 00000000..9733e0a4
--- /dev/null
+++ b/_sources/docs/review/one_step_diffusion_with_distribution_matching_distillation.md
@@ -0,0 +1,209 @@
+``` {admonition} Information
+- **Title:** One-Step Image Diffusion with Distribution Matching Distillation
+
+- **Reference**
+    - Paper: [https://arxiv.org/abs/2311.18828](https://arxiv.org/abs/2311.18828)
+    - Code: [https://github.com/tianweiy/DMD2](https://github.com/tianweiy/DMD2)
+    
+- **Author:** Joongwon Lee
+
+- **Last updated on Oct. 16, 2024**
+```
+
+# One-step Diffusion with Distribution Matching Distillation
+
+# Introduction and Preliminaries
+
+## Overview
+
+- Diffusion model has revolutionized image generation, 하지만 느린 sampling speed 가 느린 문제점이 있음
+- Accelerating sampling speed 를 목적으로 하는 많은 연구가 이루어져 왔음
+    - ODE solving: diffusion model 의 큰 틀은 유지한 채 sampling step 의 수를 줄이면서 유사한 수준의 sample 생성 (DDIM, InstaFlow, CFM)
+
+    - 하지만 여전히 50 ~ 100 step 이하로 step 을 줄이게 되면 sample quality 가 크게 감소하여 diffusion distillation 을 통한 one-step generation 방법이 연구되어지고 있음
+    - Single step distillation: Diffusion model 을 teacher 삼아 one-step generation model 학습
+    - 직관적으로 생각해보면, diffusion model 을 학습 시킨후 학습된 모델을 통해 다수의 (noise, image) pair 얻은 후 one-step VAE 를 학습시키는 것을 생각 해 볼 수 있음.
+    - 그러나, 학습된 multi step 으로 학습 된 diffusion model 을 one-step generation model 에 distillation 을 하는 것은 어려움이 있음
+        - Noise level 을 점진적으로 증가 시키며, one-step generation 을 학습시키는 방법
+- GAN 에서 영감을 받아, (noise, image) 의 대응을 강제하는 대신 (such as autoencoder),  student model 이 생성하는 이미지를 teacher model (original diffusion model) 이 생성하는 이미지와 indistinguishable 하게 학습시키는 distribution matching 전략을 생각해 볼 수 있음
+    
+- Diffusion model 의 score ($\newcommand{\laplacianx}{\Delta x}$$\nabla_{\mathbf{x}} \log p(\mathbf{x})$) 을 사용해서 student model ($p(x)$) 을 학습시킬 수 있음 (real image score 가 증가하는 방향으로 pattern 을 업데이트 하는 것이 desired 방향 + fake image 를 생성하는 diffusion model 의 score 을 감소시키는 방향으로 parameter update)
+    - Diffusion model and score base model
+        
+        엄밀하게 본다면, 이 논문은 diffusion model (DDPM style) 보다는 score matching 의 철학과 논리전개를 바탕으로 두고 있음. 그러나, diffusion model 과 score based model 은 궁극적으로 같은 objective 를 다른 방식으로 학습하고 있을 뿐이고, 그 score 과 diffusion model 이 예측하는 one-step denoised 분포 ($\mu_{base}$) 는 쉽게 변환 가능함.
+        
+        $$
+        s_{\text{real}}(x_t, t) = - \frac{x_t - \alpha_t \mu_{\text{base}}(x_t, t)}{\sigma_t^2}
+        $$
+
+        :::{figure-md} 
+        <img src="../../pics/one_step_diffusion_with_distribution_matching_distillation/image%202.png"  alt ="NCSN" class="bg-primary mb-1" width="700px">
+
+        NCSN
+        :::
+
+        :::{figure-md} 
+        <img src="../../pics/one_step_diffusion_with_distribution_matching_distillation/langevin.gif" alt ="Langevin" class="bg-primary mb-1" width="300px">
+
+        Langevin sampling of score models
+        :::
+        
+
+# Method
+
+## Overview
+
+:::{figure-md} 
+<img src="../../pics/one_step_diffusion_with_distribution_matching_distillation/image%203.png" class="bg-primary mb-1" width="700px">
+
+Overall scheme
+:::
+
+- 학습된 diffusion model (real data score function)  이 주어진 상황에서 one-step generator ($G_{\theta}$) 를 학습시키기 위해, 두개의 loss 1) distribution matching gradient (엄밀하게는 loss 보다는 parameter update gradient) 2) regression loss 를 사용
+- Adversarial AutoEncoder 가 연상되는 architecture 를 가지고 있음
+    - Adversarial AutoEncoder
+        
+        :::{figure-md} 
+        <img src="../../pics/one_step_diffusion_with_distribution_matching_distillation/image%204.png" class="bg-primary mb-1" width="700px">
+
+        Adeverserial AE architecture
+        :::
+        
+        AAE 는 VAE 가 생성하는 이미지에 대한 1) regression loss 와 2) implicit distribution matching loss 를 가지고 있는데, 여기서 implicit distribution matching 을 teacher diffusion model 의 distribution matching gradient 로 대체한 형태
+        
+- 총  네 부분으로 나뉘어져 있음
+    - Paired dataset construction
+    - Pretrained base model (= real data score function, freezed)
+    - One-step generator (main objective)
+    - Fake data generator (= fake data score function, on-line training)
+
+## Distribution Matching Loss
+
+우선, 생성모델의 training objective를 생각해보면, $p_\text{fake}$ (one-step generator이 생성하는 분포)  $p_\text{real}$ (실제 데이터의 분포) 를 matching 시키도록 학습을 시켜야하는 것이 one-step generator의 학습 objective이고 아래와 같이 씌여질 수 있음:
+
+$$
+\begin{align*}
+D_{KL}(p_{\text{fake}} \parallel p_{\text{real}}) &= \mathbb{E}_{x \sim p_{\text{fake}}} \left( \log \frac{p_{\text{fake}}(x)}{p_{\text{real}}(x)} \right) \\
+&= \mathbb{E}_{\substack{z \sim \mathcal{N}(0; I) \\ x = G_{\theta}(z)}} \left( - \log p_{\text{real}}(x) + \log p_{\text{fake}}(x|z)\right)
+\end{align*}
+$$
+
+지만, $p_\text{real}(x)$ 를 바로 구하는 것이 어려움 (이것이 곧 생성모델의 objective). 그러나, 모델을 학습시키기 위해서는 $D_{KL}$ 을 직접 구할 필요는 없고, $D_{KL}$ 을 minimize 하는 (fake 과 real 의 분포사이의 거리를 최소화 시키는 방향으로) parameter update 를 하기 위한 미분값만 알면 충분함. 위 식을 one-step generator 의 learnable paramter ($\theta$) 에 대해 미분 해주면, 
+
+$$
+\nabla_{\theta} D_{KL} = \mathbb{E}_{\substack{z \sim \mathcal{N}(0; I) \\ x = G_{\theta}(z)}} \left[ - \left( s_{\text{real}}(x) - s_{\text{fake}}(x) \right) \nabla_{\theta} G_{\theta}(z) \right]
+$$
+
+$$
+s_{\text{real}}(x) = \nabla_x \log p_{\text{real}}(x), \quad s_{\text{fake}}(x) = \nabla_x \log p_{\text{fake}}(x)
+$$
+
+이 유도되는데, 여기서 score $s_{\text{real}}(x)$ 와  $s_{\text{fake}}(x)$ 를 정확히 알 수 있다면, one-step generator 을 학습시킬 수 있음. 단, score 이 $x$ 가 존재하는 전체 space 에 대해서 잘 작동하는 score 이여야함 (= Score-SDE).
+
+이제, 그러면 우리의 objective 는 real score 와 fake score 을 어떻게 구할지가 되는데, $s_{\text{real}}(x)$ 은 pretrained diffusion model 에서: 
+
+$$
+s_{\text{real}}(x_t, t) = - \frac{x_t - \alpha_t \mu_{\text{base}}(x_t, t)}{\sigma_t^2}
+$$
+
+와 같이 유도됨.  $s_{\text{fake}}(x)$ 의 경우가 복잡해지는데, $s_{\text{fake}}(x)$ 는 one-step generator 가 생성하는 이미지의 score function 라서 one-step generator 로 생성 된 이미지가 있어야 해당 이미지를 생성하는 diffusion 모델을 학습시켜서 구할 수 있음. 
+
+:::{figure-md} 
+<img src="../../pics/one_step_diffusion_with_distribution_matching_distillation/f637f3b3-9e18-48d1-946a-784830e6fb98.png" class="bg-primary mb-1" width="700px">
+
+Distribution matching gradient computation
+:::
+
+여기서 저자들은 fake data score function (initialized to real data score function) 을 동시에 학습시키는 방법으로 해결 
+
+$$
+s_{\text{fake}}(x_t, t) = - \frac{x_t - \alpha_t \mu_{\text{fake}}^{\phi}(x_t, t)}{\sigma_t^2}
+$$
+
+$$
+\mathcal{L}_{\text{denoise}}^{\phi} = \left\lVert \mu_{\text{fake}}^{\phi}(x_t, t) - x_0 \right\rVert_2^2
+$$
+
+정리하자면, real score 은 real distribution (data distribution) 방향으로 parameter update를 하면서 fake distribution (one-step generation 의 output) 을 real distribution 에 가깝게 일치시키는 역할을 하며, fake score 의 반대방향으로 parameter update 를 하는 것은 fake data generator (one-step generator) 의 반대방향으로 distribution 을 밀어내서 most probable 한 한개의 점으로 모든 fake image 가 collapse 하는것을 방지하는 regularizer 역할을 한다.
+
+
+## Regression Loss
+
+그런데, score 만을 사용하여 one-step generator 을 학습시키는 것은 충분하지 않음. 두가지 측면에서 생각 해 볼 수 있는데 1) Practically, 매우 작은 noise level 에서는 score 이 reliable 하지 않아짐 2) Theoretically, $\nabla_x \log p_(x)$ 는 $p(x)$ 의 scale 에 영향을 받지 않아, 데이터의 높고 낮음에 대한 정보를 줄 수 없음.
+
+따라서, real + fake score 로 학습이 진행된다면,  낮은 real score 을 보이는 영역은 커버하지 못하는 부분으로 one-step generation 모델이 수렴하게 될 것 이고, high dimension 에서는 generated image 의 pixel level accuracy 에 문제가 생길 수 있음. 
+
+여기서 저자들은 pixel-wise MSE (regression loss) 를 사용하여 간단히 이 문제를 해결함.
+
+:::{figure-md} 
+<img src="../../pics/one_step_diffusion_with_distribution_matching_distillation/fed6a1b4-97d4-4ef7-ab99-6ac3cef4bbbd.png" class="bg-primary mb-1" width="500px">
+
+Regression loss
+:::
+
+:::{figure-md} 
+<img src="../../pics/one_step_diffusion_with_distribution_matching_distillation/image%2011.png" class="bg-primary mb-1" width="500px">
+
+The effect of real and fake scores and regression loss
+:::
+
+- 그렇다면, Regression loss 하나만으로는 학습이 왜 불가능한가? (개인적 생각)
+    
+    이론상 regression loss 만을 사용해도 충분히 one-step generator 을 학습시킬 수 있어 보인다. 그러나, regression 의 근본적 문제점은 distribution to distribution matching 이 아니라는 점이다. e.g. 
+    
+     
+    
+
+Regression loss 를 얻기 위해서는 (noise, real image) pair 가 필요하게 되는데, 저자들은 학습된 diffusion model 에서부터 ODE solver 을 사용하여 gaussian noise와 real image 사이에 쌍을 얻어서 데이터셋을 학습 이전에 구축, 해당 pair들을 바탕으로 regression loss 를 구함 (Learned Perceptual Image Patch Similarity).
+
+$$
+\mathcal{L}_{\text{reg}} = \mathbb{E}_{(z,y) \sim \mathcal{D}} \, \ell(G_\theta(z), y)
+$$
+
+## Full algorithm
+
+:::{figure-md} 
+<img src="../../pics/one_step_diffusion_with_distribution_matching_distillation/image%2012.png" class="bg-primary mb-1" width="400px">
+
+Training algorithm
+:::
+
+
+# Results
+
+## Main comparison
+
+:::{figure-md} 
+<img src="../../pics/one_step_diffusion_with_distribution_matching_distillation/image%2013.png" class="bg-primary mb-1" width="300px">
+
+Image generation benchmarks
+:::
+
+## Ablation Study
+
+:::{figure-md} 
+<img src="../../pics/one_step_diffusion_with_distribution_matching_distillation/image%2014.png" class="bg-primary mb-1" width="300px">
+
+Ablation on distribution matching
+:::
+
+:::{figure-md} 
+<img src="../../pics/one_step_diffusion_with_distribution_matching_distillation/image%2015.png" class="bg-primary mb-1" width="500px">
+
+Ablation on regression loss
+:::
+
+
+## Comparison with Unaccelerated Models
+
+:::{figure-md} 
+<img src="../../pics/one_step_diffusion_with_distribution_matching_distillation/image%2017.png" class="bg-primary mb-1" width="500px">
+
+Comparison with Unaccelerated Models
+:::
+
+# Conclusion and Limitations
+
+- Score model 을 사용한 distribution matching loss 와 regularizing term 인 regression loss 를 통해, teacher model 에 준하는 성능을 낼 수 있었다
+- One step generator 와 multi-step generation 사이에는 근본적인 성능 tradeoff 가 존재함
+- one-step generator 의 성능은 teacher diffusion model 의 성능에 종속된다
\ No newline at end of file
diff --git a/_sources/docs/review/progressive_distillation.md b/_sources/docs/review/progressive_distillation.md
old mode 100644
new mode 100755
diff --git a/_sources/docs/review/t2i_adapter.md b/_sources/docs/review/t2i_adapter.md
old mode 100644
new mode 100755
diff --git a/_sources/docs/review/vae.md b/_sources/docs/review/vae.md
old mode 100644
new mode 100755
diff --git a/_sources/docs/review/zero123plus.md b/_sources/docs/review/zero123plus.md
new file mode 100755
index 00000000..3ffde840
--- /dev/null
+++ b/_sources/docs/review/zero123plus.md
@@ -0,0 +1,160 @@
+``` {admonition} Information
+- **Title:** Zero123++: a Single Image to Consistent Multi-view Diffusion Base Model 
+
+- **Reference**
+    - Paper: [https://arxiv.org/abs/2310.15110](https://arxiv.org/abs/2310.15110)
+    - Code: [https://github.com/SUDO-AI-3D/zero123plus](https://github.com/SUDO-AI-3D/zero123plus)
+    
+- **Author:** Sangwoo Jo
+
+- **Last updated on Dec. 16, 2024**
+```
+
+# Zero123++
+
+## 1. Introduction
+
+:::{figure-md} 
+<img src="../../pics/zero123plus/zero123plus_01.png" alt="zero123plus_01" class="bg-primary mb-1" width="700px">
+
+High-quality, consistent multi-view 3D images from Zero123++
+:::
+
+Zero-1-to-3(Zero123) 논문이 zero-shot 형태로 single-image-to-3D conversion 을 하는 기법을 처음으로 소개하였습니다. 하지만 해당 방식으로는 주어진 view 에 대해서 독립적으로 객체를 생성하게 됨으로써 multi-view consistency 에서 부족한 부분을 보여주게 된다고 설명합니다. Zero123++ 논문에서 이를 해결하기 위해 여섯개의 view 로부터 하나의 이미지를 생성하여 multi-view 에 대한 joint distirbution 을 학습할 수 있도록 설정합니다. 
+
+또한, Zero-1-to-3 논문에서 다음과 같은 한계점이 있다고 제시합니다.
+
+a) 첫번째로 global 및 local conditioning mechanism 을 비롯한 Stable Diffusion model prior 를 효율적으로 사용하지 않았고,
+
+b) 두번째로 Zero-1-to-3 논문에서 512x512 이미지 해상도로 학습 시 불안정하게 수렴하게 되어 256x256 해상도로 줄인 부분에 대해 논문 저자는 원인을 분석하며 새로운 scheduling 기법을 소개합니다. 
+
+## 2. Improving Consistency and Conditioning
+
+### 2.1. Multi-view Generation
+
+Zero-1-to-3 모델은 단일 이미지를 독립적으로 생성하며 multi-view 이미지에 대한 상관관계를 학습 혹은 생성 시에 고려하지 않습니다. 따라서, Zero123++에서는 3×2 layout 의 6개 이미지를 단일 프레임으로 tiling 하여 multiple image 에 대한 joint distribution 을 학습하게 됩니다. 
+
+Objaverse 데이터셋은 기본적으로 gravity axis 은 동일하지만 객체들이 일관된 canonical pose 를 가지고 있지 않습니다. 따라서 절대적인 camera pose 를 기반으로 해당 데이터셋을 학습하게 되면 객체의 orientation 을 학습하는데 어려움이 있다고 주장합니다. 
+
+반면에 Zero-1-to-3 는 input view 에 대한 상대적인 camera pose(elevation/azimuth angle) 을 입력받아 학습하였습니다. 그러나 해당 방식을 활용한다면 novel view 에 대한 relative pose 를 구하기 위해서는 input view 에 대한 elevation angle 을 사전에 알아야 한다는 단점이 있습니다. 후속적으로 One-2-3-45 그리고 DreamGaussian 논문에서 elevation angle 을 추가적으로 예측하는 모듈을 정의하고, 이에 따라 오차율도 증가하게 됩니다. 
+
+- Elevation/Azimuth angle 이란?
+    
+    :::{figure-md} 
+    <img src="../../pics/zero123plus/zero123plus_02.png" alt="zero123plus_02" class="bg-primary mb-1" width="700px">
+
+    Elevation/Azimuth angle
+    :::
+    
+
+이러한 문제를 해결하기 위해 elevation angle 을 고정시킨 상태에서 상대적인 azimuth angle 을 통한 novel view pose 를 정의합니다. 더 자세하게는 6개의 pose 를 아래 사진과 같이 정의하게 됩니다.  
+
+:::{figure-md} 
+<img src="../../pics/zero123plus/zero123plus_03.png" alt="zero123plus_03" class="bg-primary mb-1" width="700px">
+
+3x2 layout of Zero123++ prediction
+:::
+
+### 2.2. Consistency and Stability: Noise Schedule
+
+Stable Diffusion 모델에서 사용되었던 scaled-linear schedule 은 local detail 을 학습하는데 초점을 두고 Signal-to-Noise Ratio (SNR) 가 낮은 timestep 이 극히 드뭅니다. SNR 이 낮은 구간에서 global low frequency 정보들을 학습하게 되며 해당 단계에서 step 수가 적으면 구조적인 변형이 클 수가 있습니다. 따라서, 이러한 scheduling 은 단일 이미지를 생성하는데는 유용하지만 multi-view consistent 한 이미지를 생성하는데 한계가 있다고 주장합니다.   
+
+:::{figure-md} 
+<img src="../../pics/zero123plus/zero123plus_04.png" alt="zero123plus_04" class="bg-primary mb-1" width="700px">
+
+Linear vs Scaled linear schedule
+:::
+
+또한, 동일한 noise 가 주입되었을때 고해상도 이미지가 저해상도 이미지에 비해 noise level 이 적기 때문에, Zero-1-to-3 모델에서 고해상도 이미지를 학습하였을 때 불안정한 모습을 보여주었던 것도 동일한 원인 때문이라고 설명합니다.
+
+Zero123++ 에서는 scaled-linear schedule 대신에 linear schedule 를 사용하게 되고, 변화된 schedule 에 따라 $x$-prediction, $\epsilon$-prediction 모델보다 $v$-prediction 모델이 더 안정적으로 학습되었다고 합니다. 따라서, Stable Diffusion 2 $v$-prediction 모델로 fine-tuning 을 진행하였다고 합니다. 
+
+### 2.3. Local Condition: Scaled Reference Attention
+
+기존에 Zero-1-to-3 논문에서 noisy input 과 conditioned image(single-view input) 가 feature-wise concatenate 하는데 해당 방식으로는 pixel-wise spatial correspondence 가 정확하지 않다고 합니다. 
+
+Zero123++ 에서는 이 부분을 보완하여 Reference Attention 이라는 기법을 소개합니다. Reference Attention 이란, 아래 그림과 같이 noisy latent 와 conditioned latent 간에 self-attention 모듈에서의 key, value 값을 추가하여 연산 작업을 진행합니다. 이때, noisy input 에 주입된 noise 를 동일하게 conditioned image 에 적용하였다고 합니다.
+
+:::{figure-md} 
+<img src="../../pics/zero123plus/zero123plus_05.png" alt="zero123plus_05" class="bg-primary mb-1" width="700px">
+
+Reference Attention
+:::
+
+Reference Attention 기법을 적용한 결과, fine-tuning 작업을 진행하지 않아도 reference image 에서의 semantic content 와 texture 가 잘 반영되었습니다. 또한, fine-tuning 을 하였을때 reference latent 을 5x scaling 하였을때 reference image 와의 일관성을 가장 잘 보여주었다고 합니다. 
+
+:::{figure-md} 
+<img src="../../pics/zero123plus/zero123plus_06.png" alt="zero123plus_06" class="bg-primary mb-1" width="700px">
+
+Comparison on local conditioning
+:::
+
+### 2.4. Global Condition: FlexDiffuse
+
+Zero123++ 논문에서 추가적으로 FlexDiffuse 에서 소개한 linear guidance mechanism 을 활용하여 fine-tuning 범위를 최소화하는 선에서 global image conditioning 하였습니다. 
+
+더 자세하게는, $L \times D$ 차원의 prompt embedding $T$ 와 $D$ 차원의 CLIP global image embedding $I$ 에 global weight $w_i$ 를 곱한 값을 더하여 모델에 입력합니다. 이때, $L$ 은 token length 이고 $D$ 는 token embedding 의 차원 크기입니다. 이때, $w_i = \frac{i}{L}$ 로 초기 가중치 값을 설정하였습니다. Text condition 이 없을 경우에는 empty prompt 를 encoding 하여 $T$ 를 얻게 됩니다. 
+
+:::{figure-md} 
+<img src="../../pics/zero123plus/zero123plus_07.png" alt="zero123plus_07" class="bg-primary mb-1" width="700px">
+
+FlexDiffuse’s linear guidance
+:::
+
+위와 같은 global conditioning 을 하였을때, 보이지 않은 unseen region 에서도 semantic 한 정보들을 유지한채 이미지를 잘 생성하는 부분을 확인할 수 있습니다.
+
+:::{figure-md} 
+<img src="../../pics/zero123plus/zero123plus_08.png" alt="zero123plus_08" class="bg-primary mb-1" width="700px">
+
+Ablation on global conditioning
+:::
+
+### 2.5. Putting Everything Together
+
+정리하자면 해당 논문은 Stable Diffusion 2 $v$-model 을 사용하였고, Objaverse 데이터를 random HDRI environment lighting 를 적용하여 렌더링한 데이터에 학습하였습니다. 그리고 Stable Diffusion Image Variations model 의 학습 방식을 도입하여 two-stage 로 학습을 진행하였습니다. 
+
+첫번째 phase 에서는 self-attention layer 와 cross-attention layer 의 KV 행렬만 fine-tuning 을 하였고, AdamW optimizer 와 cosine annealing schedule 을 사용하였습니다. 두번째 phase 에서는 UNet 모델 전체를 학습하고 $5 \times 10^{-6}$ 값의 constant learning rate 를 사용하였습니다. 그리고 학습 과정을 더 효율적으로 하기 위해 Min-SNR weighting 기법도 활용하였습니다.
+
+## 3. Comparison to the State of the Art
+
+### 3.1. Image to Multi-view
+
+**Qualitative Comparison** 
+
+논문에서 Zero-1-to-3 XL 그리고 SyncDreamer 모델과의 성능을 비교합니다. Zero123++ 모델이 unseen view 에 대해서 가장 월등하게 이미지를 생성하는 것을 확인할 수 있습니다.
+
+:::{figure-md} 
+<img src="../../pics/zero123plus/zero123plus_09.png" alt="zero123plus_09" class="bg-primary mb-1" width="700px">
+
+Qualitative comparison on image to multi-view task
+:::
+
+**Quantitative Comparison**
+
+정량적으로 LPIPS 지표를 기준으로 비교하였을 때에도 Zero123++ 모델이 가장 좋은 성능을 보여주고 있습니다. 이때, 모델이 생성된 6개의 이미지와 Objaverse 데이터셋을 렌더링한 6개의 이미지를 각각 결합하여 LPIPS 를 측정하였다고 합니다.
+
+:::{figure-md} 
+<img src="../../pics/zero123plus/zero123plus_10.png" alt="zero123plus_10" class="bg-primary mb-1" width="700px">
+
+Quantitative Comparison on image to multi-view task
+:::
+
+### 3.2. Text to Multi-view
+
+Text 를 입력받아 우선적으로 SDXL 모델을 통해 단일 이미지를 생성한 후, Zero123++ 모델을 적용한 결과입니다. MVDream 과 Zero-1-to-3 XL 모델과 비교하였을 때, Zero123++ 모델이 가장 realistic 하고 multi-view consistent 한 이미지를 생성하는 부분을 확인할 수 있습니다.
+
+:::{figure-md} 
+<img src="../../pics/zero123plus/zero123plus_11.png" alt="zero123plus_11" class="bg-primary mb-1" width="700px">
+
+Qualitative comparison on text to multi-view task
+:::
+
+## 4. Depth ControlNet for Zero123++
+
+아래 사진은 추가적으로 렌더링한 depth map 를 기반으로 ControlNet 을 학습한 결과입니다. 
+
+:::{figure-md} 
+<img src="../../pics/zero123plus/zero123plus_12.png" alt="zero123plus_12" class="bg-primary mb-1" width="700px">
+
+Depth-controlled Zero123++
+:::
\ No newline at end of file
diff --git a/_sources/intro.md b/_sources/intro.md
old mode 100644
new mode 100755
diff --git a/_sphinx_design_static/design-style.4045f2051d55cab465a707391d5b2007.min.css b/_sphinx_design_static/design-style.4045f2051d55cab465a707391d5b2007.min.css
old mode 100644
new mode 100755
diff --git a/_sphinx_design_static/design-tabs.js b/_sphinx_design_static/design-tabs.js
old mode 100644
new mode 100755
diff --git a/_static/PseudoLab_logo.png b/_static/PseudoLab_logo.png
old mode 100644
new mode 100755
diff --git a/_static/__init__.py b/_static/__init__.py
old mode 100644
new mode 100755
diff --git a/_static/__pycache__/__init__.cpython-36.pyc b/_static/__pycache__/__init__.cpython-36.pyc
old mode 100644
new mode 100755
diff --git a/_static/__pycache__/__init__.cpython-37.pyc b/_static/__pycache__/__init__.cpython-37.pyc
old mode 100644
new mode 100755
diff --git a/_static/_sphinx_javascript_frameworks_compat.js b/_static/_sphinx_javascript_frameworks_compat.js
old mode 100644
new mode 100755
diff --git a/_static/basic.css b/_static/basic.css
old mode 100644
new mode 100755
index 9e364ed3..61778181
--- a/_static/basic.css
+++ b/_static/basic.css
@@ -1,930 +1,928 @@
-/*
- * basic.css
- * ~~~~~~~~~
- *
- * Sphinx stylesheet -- basic theme.
- *
- * :copyright: Copyright 2007-2022 by the Sphinx team, see AUTHORS.
- * :license: BSD, see LICENSE for details.
- *
- */
-
-/* -- main layout ----------------------------------------------------------- */
-
-div.clearer {
-    clear: both;
-}
-
-div.section::after {
-    display: block;
-    content: '';
-    clear: left;
-}
-
-/* -- relbar ---------------------------------------------------------------- */
-
-div.related {
-    width: 100%;
-    font-size: 90%;
-}
-
-div.related h3 {
-    display: none;
-}
-
-div.related ul {
-    margin: 0;
-    padding: 0 0 0 10px;
-    list-style: none;
-}
-
-div.related li {
-    display: inline;
-}
-
-div.related li.right {
-    float: right;
-    margin-right: 5px;
-}
-
-/* -- sidebar --------------------------------------------------------------- */
-
-div.sphinxsidebarwrapper {
-    padding: 10px 5px 0 10px;
-}
-
-div.sphinxsidebar {
-    float: left;
-    width: 270px;
-    margin-left: -100%;
-    font-size: 90%;
-    word-wrap: break-word;
-    overflow-wrap : break-word;
-}
-
-div.sphinxsidebar ul {
-    list-style: none;
-}
-
-div.sphinxsidebar ul ul,
-div.sphinxsidebar ul.want-points {
-    margin-left: 20px;
-    list-style: square;
-}
-
-div.sphinxsidebar ul ul {
-    margin-top: 0;
-    margin-bottom: 0;
-}
-
-div.sphinxsidebar form {
-    margin-top: 10px;
-}
-
-div.sphinxsidebar input {
-    border: 1px solid #98dbcc;
-    font-family: sans-serif;
-    font-size: 1em;
-}
-
-div.sphinxsidebar #searchbox form.search {
-    overflow: hidden;
-}
-
-div.sphinxsidebar #searchbox input[type="text"] {
-    float: left;
-    width: 80%;
-    padding: 0.25em;
-    box-sizing: border-box;
-}
-
-div.sphinxsidebar #searchbox input[type="submit"] {
-    float: left;
-    width: 20%;
-    border-left: none;
-    padding: 0.25em;
-    box-sizing: border-box;
-}
-
-
-img {
-    border: 0;
-    max-width: 100%;
-}
-
-/* -- search page ----------------------------------------------------------- */
-
-ul.search {
-    margin: 10px 0 0 20px;
-    padding: 0;
-}
-
-ul.search li {
-    padding: 5px 0 5px 20px;
-    background-image: url(file.png);
-    background-repeat: no-repeat;
-    background-position: 0 7px;
-}
-
-ul.search li a {
-    font-weight: bold;
-}
-
-ul.search li p.context {
-    color: #888;
-    margin: 2px 0 0 30px;
-    text-align: left;
-}
-
-ul.keywordmatches li.goodmatch a {
-    font-weight: bold;
-}
-
-/* -- index page ------------------------------------------------------------ */
-
-table.contentstable {
-    width: 90%;
-    margin-left: auto;
-    margin-right: auto;
-}
-
-table.contentstable p.biglink {
-    line-height: 150%;
-}
-
-a.biglink {
-    font-size: 1.3em;
-}
-
-span.linkdescr {
-    font-style: italic;
-    padding-top: 5px;
-    font-size: 90%;
-}
-
-/* -- general index --------------------------------------------------------- */
-
-table.indextable {
-    width: 100%;
-}
-
-table.indextable td {
-    text-align: left;
-    vertical-align: top;
-}
-
-table.indextable ul {
-    margin-top: 0;
-    margin-bottom: 0;
-    list-style-type: none;
-}
-
-table.indextable > tbody > tr > td > ul {
-    padding-left: 0em;
-}
-
-table.indextable tr.pcap {
-    height: 10px;
-}
-
-table.indextable tr.cap {
-    margin-top: 10px;
-    background-color: #f2f2f2;
-}
-
-img.toggler {
-    margin-right: 3px;
-    margin-top: 3px;
-    cursor: pointer;
-}
-
-div.modindex-jumpbox {
-    border-top: 1px solid #ddd;
-    border-bottom: 1px solid #ddd;
-    margin: 1em 0 1em 0;
-    padding: 0.4em;
-}
-
-div.genindex-jumpbox {
-    border-top: 1px solid #ddd;
-    border-bottom: 1px solid #ddd;
-    margin: 1em 0 1em 0;
-    padding: 0.4em;
-}
-
-/* -- domain module index --------------------------------------------------- */
-
-table.modindextable td {
-    padding: 2px;
-    border-collapse: collapse;
-}
-
-/* -- general body styles --------------------------------------------------- */
-
-div.body {
-    min-width: 360px;
-    max-width: 800px;
-}
-
-div.body p, div.body dd, div.body li, div.body blockquote {
-    -moz-hyphens: auto;
-    -ms-hyphens: auto;
-    -webkit-hyphens: auto;
-    hyphens: auto;
-}
-
-a.headerlink {
-    visibility: hidden;
-}
-
-h1:hover > a.headerlink,
-h2:hover > a.headerlink,
-h3:hover > a.headerlink,
-h4:hover > a.headerlink,
-h5:hover > a.headerlink,
-h6:hover > a.headerlink,
-dt:hover > a.headerlink,
-caption:hover > a.headerlink,
-p.caption:hover > a.headerlink,
-div.code-block-caption:hover > a.headerlink {
-    visibility: visible;
-}
-
-div.body p.caption {
-    text-align: inherit;
-}
-
-div.body td {
-    text-align: left;
-}
-
-.first {
-    margin-top: 0 !important;
-}
-
-p.rubric {
-    margin-top: 30px;
-    font-weight: bold;
-}
-
-img.align-left, figure.align-left, .figure.align-left, object.align-left {
-    clear: left;
-    float: left;
-    margin-right: 1em;
-}
-
-img.align-right, figure.align-right, .figure.align-right, object.align-right {
-    clear: right;
-    float: right;
-    margin-left: 1em;
-}
-
-img.align-center, figure.align-center, .figure.align-center, object.align-center {
-  display: block;
-  margin-left: auto;
-  margin-right: auto;
-}
-
-img.align-default, figure.align-default, .figure.align-default {
-  display: block;
-  margin-left: auto;
-  margin-right: auto;
-}
-
-.align-left {
-    text-align: left;
-}
-
-.align-center {
-    text-align: center;
-}
-
-.align-default {
-    text-align: center;
-}
-
-.align-right {
-    text-align: right;
-}
-
-/* -- sidebars -------------------------------------------------------------- */
-
-div.sidebar,
-aside.sidebar {
-    margin: 0 0 0.5em 1em;
-    border: 1px solid #ddb;
-    padding: 7px;
-    background-color: #ffe;
-    width: 40%;
-    float: right;
-    clear: right;
-    overflow-x: auto;
-}
-
-p.sidebar-title {
-    font-weight: bold;
-}
-nav.contents,
-aside.topic,
-
-div.admonition, div.topic, blockquote {
-    clear: left;
-}
-
-/* -- topics ---------------------------------------------------------------- */
-nav.contents,
-aside.topic,
-
-div.topic {
-    border: 1px solid #ccc;
-    padding: 7px;
-    margin: 10px 0 10px 0;
-}
-
-p.topic-title {
-    font-size: 1.1em;
-    font-weight: bold;
-    margin-top: 10px;
-}
-
-/* -- admonitions ----------------------------------------------------------- */
-
-div.admonition {
-    margin-top: 10px;
-    margin-bottom: 10px;
-    padding: 7px;
-}
-
-div.admonition dt {
-    font-weight: bold;
-}
-
-p.admonition-title {
-    margin: 0px 10px 5px 0px;
-    font-weight: bold;
-}
-
-div.body p.centered {
-    text-align: center;
-    margin-top: 25px;
-}
-
-/* -- content of sidebars/topics/admonitions -------------------------------- */
-
-div.sidebar > :last-child,
-aside.sidebar > :last-child,
-nav.contents > :last-child,
-aside.topic > :last-child,
-
-div.topic > :last-child,
-div.admonition > :last-child {
-    margin-bottom: 0;
-}
-
-div.sidebar::after,
-aside.sidebar::after,
-nav.contents::after,
-aside.topic::after,
-
-div.topic::after,
-div.admonition::after,
-blockquote::after {
-    display: block;
-    content: '';
-    clear: both;
-}
-
-/* -- tables ---------------------------------------------------------------- */
-
-table.docutils {
-    margin-top: 10px;
-    margin-bottom: 10px;
-    border: 0;
-    border-collapse: collapse;
-}
-
-table.align-center {
-    margin-left: auto;
-    margin-right: auto;
-}
-
-table.align-default {
-    margin-left: auto;
-    margin-right: auto;
-}
-
-table caption span.caption-number {
-    font-style: italic;
-}
-
-table caption span.caption-text {
-}
-
-table.docutils td, table.docutils th {
-    padding: 1px 8px 1px 5px;
-    border-top: 0;
-    border-left: 0;
-    border-right: 0;
-    border-bottom: 1px solid #aaa;
-}
-
-th {
-    text-align: left;
-    padding-right: 5px;
-}
-
-table.citation {
-    border-left: solid 1px gray;
-    margin-left: 1px;
-}
-
-table.citation td {
-    border-bottom: none;
-}
-
-th > :first-child,
-td > :first-child {
-    margin-top: 0px;
-}
-
-th > :last-child,
-td > :last-child {
-    margin-bottom: 0px;
-}
-
-/* -- figures --------------------------------------------------------------- */
-
-div.figure, figure {
-    margin: 0.5em;
-    padding: 0.5em;
-}
-
-div.figure p.caption, figcaption {
-    padding: 0.3em;
-}
-
-div.figure p.caption span.caption-number,
-figcaption span.caption-number {
-    font-style: italic;
-}
-
-div.figure p.caption span.caption-text,
-figcaption span.caption-text {
-}
-
-/* -- field list styles ----------------------------------------------------- */
-
-table.field-list td, table.field-list th {
-    border: 0 !important;
-}
-
-.field-list ul {
-    margin: 0;
-    padding-left: 1em;
-}
-
-.field-list p {
-    margin: 0;
-}
-
-.field-name {
-    -moz-hyphens: manual;
-    -ms-hyphens: manual;
-    -webkit-hyphens: manual;
-    hyphens: manual;
-}
-
-/* -- hlist styles ---------------------------------------------------------- */
-
-table.hlist {
-    margin: 1em 0;
-}
-
-table.hlist td {
-    vertical-align: top;
-}
-
-/* -- object description styles --------------------------------------------- */
-
-.sig {
-	font-family: 'Consolas', 'Menlo', 'DejaVu Sans Mono', 'Bitstream Vera Sans Mono', monospace;
-}
-
-.sig-name, code.descname {
-    background-color: transparent;
-    font-weight: bold;
-}
-
-.sig-name {
-	font-size: 1.1em;
-}
-
-code.descname {
-    font-size: 1.2em;
-}
-
-.sig-prename, code.descclassname {
-    background-color: transparent;
-}
-
-.optional {
-    font-size: 1.3em;
-}
-
-.sig-paren {
-    font-size: larger;
-}
-
-.sig-param.n {
-	font-style: italic;
-}
-
-/* C++ specific styling */
-
-.sig-inline.c-texpr,
-.sig-inline.cpp-texpr {
-	font-family: unset;
-}
-
-.sig.c   .k, .sig.c   .kt,
-.sig.cpp .k, .sig.cpp .kt {
-	color: #0033B3;
-}
-
-.sig.c   .m,
-.sig.cpp .m {
-	color: #1750EB;
-}
-
-.sig.c   .s, .sig.c   .sc,
-.sig.cpp .s, .sig.cpp .sc {
-	color: #067D17;
-}
-
-
-/* -- other body styles ----------------------------------------------------- */
-
-ol.arabic {
-    list-style: decimal;
-}
-
-ol.loweralpha {
-    list-style: lower-alpha;
-}
-
-ol.upperalpha {
-    list-style: upper-alpha;
-}
-
-ol.lowerroman {
-    list-style: lower-roman;
-}
-
-ol.upperroman {
-    list-style: upper-roman;
-}
-
-:not(li) > ol > li:first-child > :first-child,
-:not(li) > ul > li:first-child > :first-child {
-    margin-top: 0px;
-}
-
-:not(li) > ol > li:last-child > :last-child,
-:not(li) > ul > li:last-child > :last-child {
-    margin-bottom: 0px;
-}
-
-ol.simple ol p,
-ol.simple ul p,
-ul.simple ol p,
-ul.simple ul p {
-    margin-top: 0;
-}
-
-ol.simple > li:not(:first-child) > p,
-ul.simple > li:not(:first-child) > p {
-    margin-top: 0;
-}
-
-ol.simple p,
-ul.simple p {
-    margin-bottom: 0;
-}
-
-/* Docutils 0.17 and older (footnotes & citations) */
-dl.footnote > dt,
-dl.citation > dt {
-    float: left;
-    margin-right: 0.5em;
-}
-
-dl.footnote > dd,
-dl.citation > dd {
-    margin-bottom: 0em;
-}
-
-dl.footnote > dd:after,
-dl.citation > dd:after {
-    content: "";
-    clear: both;
-}
-
-/* Docutils 0.18+ (footnotes & citations) */
-aside.footnote > span,
-div.citation > span {
-    float: left;
-}
-aside.footnote > span:last-of-type,
-div.citation > span:last-of-type {
-  padding-right: 0.5em;
-}
-aside.footnote > p {
-  margin-left: 2em;
-}
-div.citation > p {
-  margin-left: 4em;
-}
-aside.footnote > p:last-of-type,
-div.citation > p:last-of-type {
-    margin-bottom: 0em;
-}
-aside.footnote > p:last-of-type:after,
-div.citation > p:last-of-type:after {
-    content: "";
-    clear: both;
-}
-
-/* Footnotes & citations ends */
-
-dl.field-list {
-    display: grid;
-    grid-template-columns: fit-content(30%) auto;
-}
-
-dl.field-list > dt {
-    font-weight: bold;
-    word-break: break-word;
-    padding-left: 0.5em;
-    padding-right: 5px;
-}
-
-dl.field-list > dt:after {
-    content: ":";
-}
-
-dl.field-list > dd {
-    padding-left: 0.5em;
-    margin-top: 0em;
-    margin-left: 0em;
-    margin-bottom: 0em;
-}
-
-dl {
-    margin-bottom: 15px;
-}
-
-dd > :first-child {
-    margin-top: 0px;
-}
-
-dd ul, dd table {
-    margin-bottom: 10px;
-}
-
-dd {
-    margin-top: 3px;
-    margin-bottom: 10px;
-    margin-left: 30px;
-}
-
-dl > dd:last-child,
-dl > dd:last-child > :last-child {
-    margin-bottom: 0;
-}
-
-dt:target, span.highlighted {
-    background-color: #fbe54e;
-}
-
-rect.highlighted {
-    fill: #fbe54e;
-}
-
-dl.glossary dt {
-    font-weight: bold;
-    font-size: 1.1em;
-}
-
-.versionmodified {
-    font-style: italic;
-}
-
-.system-message {
-    background-color: #fda;
-    padding: 5px;
-    border: 3px solid red;
-}
-
-.footnote:target  {
-    background-color: #ffa;
-}
-
-.line-block {
-    display: block;
-    margin-top: 1em;
-    margin-bottom: 1em;
-}
-
-.line-block .line-block {
-    margin-top: 0;
-    margin-bottom: 0;
-    margin-left: 1.5em;
-}
-
-.guilabel, .menuselection {
-    font-family: sans-serif;
-}
-
-.accelerator {
-    text-decoration: underline;
-}
-
-.classifier {
-    font-style: oblique;
-}
-
-.classifier:before {
-    font-style: normal;
-    margin: 0 0.5em;
-    content: ":";
-    display: inline-block;
-}
-
-abbr, acronym {
-    border-bottom: dotted 1px;
-    cursor: help;
-}
-
-/* -- code displays --------------------------------------------------------- */
-
-pre {
-    overflow: auto;
-    overflow-y: hidden;  /* fixes display issues on Chrome browsers */
-}
-
-pre, div[class*="highlight-"] {
-    clear: both;
-}
-
-span.pre {
-    -moz-hyphens: none;
-    -ms-hyphens: none;
-    -webkit-hyphens: none;
-    hyphens: none;
-    white-space: nowrap;
-}
-
-div[class*="highlight-"] {
-    margin: 1em 0;
-}
-
-td.linenos pre {
-    border: 0;
-    background-color: transparent;
-    color: #aaa;
-}
-
-table.highlighttable {
-    display: block;
-}
-
-table.highlighttable tbody {
-    display: block;
-}
-
-table.highlighttable tr {
-    display: flex;
-}
-
-table.highlighttable td {
-    margin: 0;
-    padding: 0;
-}
-
-table.highlighttable td.linenos {
-    padding-right: 0.5em;
-}
-
-table.highlighttable td.code {
-    flex: 1;
-    overflow: hidden;
-}
-
-.highlight .hll {
-    display: block;
-}
-
-div.highlight pre,
-table.highlighttable pre {
-    margin: 0;
-}
-
-div.code-block-caption + div {
-    margin-top: 0;
-}
-
-div.code-block-caption {
-    margin-top: 1em;
-    padding: 2px 5px;
-    font-size: small;
-}
-
-div.code-block-caption code {
-    background-color: transparent;
-}
-
-table.highlighttable td.linenos,
-span.linenos,
-div.highlight span.gp {  /* gp: Generic.Prompt */
-  user-select: none;
-  -webkit-user-select: text; /* Safari fallback only */
-  -webkit-user-select: none; /* Chrome/Safari */
-  -moz-user-select: none; /* Firefox */
-  -ms-user-select: none; /* IE10+ */
-}
-
-div.code-block-caption span.caption-number {
-    padding: 0.1em 0.3em;
-    font-style: italic;
-}
-
-div.code-block-caption span.caption-text {
-}
-
-div.literal-block-wrapper {
-    margin: 1em 0;
-}
-
-code.xref, a code {
-    background-color: transparent;
-    font-weight: bold;
-}
-
-h1 code, h2 code, h3 code, h4 code, h5 code, h6 code {
-    background-color: transparent;
-}
-
-.viewcode-link {
-    float: right;
-}
-
-.viewcode-back {
-    float: right;
-    font-family: sans-serif;
-}
-
-div.viewcode-block:target {
-    margin: -1px -10px;
-    padding: 0 10px;
-}
-
-/* -- math display ---------------------------------------------------------- */
-
-img.math {
-    vertical-align: middle;
-}
-
-div.body div.math p {
-    text-align: center;
-}
-
-span.eqno {
-    float: right;
-}
-
-span.eqno a.headerlink {
-    position: absolute;
-    z-index: 1;
-}
-
-div.math:hover a.headerlink {
-    visibility: visible;
-}
-
-/* -- printout stylesheet --------------------------------------------------- */
-
-@media print {
-    div.document,
-    div.documentwrapper,
-    div.bodywrapper {
-        margin: 0 !important;
-        width: 100%;
-    }
-
-    div.sphinxsidebar,
-    div.related,
-    div.footer,
-    #top-link {
-        display: none;
-    }
+/*
+ * basic.css
+ * ~~~~~~~~~
+ *
+ * Sphinx stylesheet -- basic theme.
+ *
+ * :copyright: Copyright 2007-2022 by the Sphinx team, see AUTHORS.
+ * :license: BSD, see LICENSE for details.
+ *
+ */
+
+/* -- main layout ----------------------------------------------------------- */
+
+div.clearer {
+    clear: both;
+}
+
+div.section::after {
+    display: block;
+    content: '';
+    clear: left;
+}
+
+/* -- relbar ---------------------------------------------------------------- */
+
+div.related {
+    width: 100%;
+    font-size: 90%;
+}
+
+div.related h3 {
+    display: none;
+}
+
+div.related ul {
+    margin: 0;
+    padding: 0 0 0 10px;
+    list-style: none;
+}
+
+div.related li {
+    display: inline;
+}
+
+div.related li.right {
+    float: right;
+    margin-right: 5px;
+}
+
+/* -- sidebar --------------------------------------------------------------- */
+
+div.sphinxsidebarwrapper {
+    padding: 10px 5px 0 10px;
+}
+
+div.sphinxsidebar {
+    float: left;
+    width: 270px;
+    margin-left: -100%;
+    font-size: 90%;
+    word-wrap: break-word;
+    overflow-wrap : break-word;
+}
+
+div.sphinxsidebar ul {
+    list-style: none;
+}
+
+div.sphinxsidebar ul ul,
+div.sphinxsidebar ul.want-points {
+    margin-left: 20px;
+    list-style: square;
+}
+
+div.sphinxsidebar ul ul {
+    margin-top: 0;
+    margin-bottom: 0;
+}
+
+div.sphinxsidebar form {
+    margin-top: 10px;
+}
+
+div.sphinxsidebar input {
+    border: 1px solid #98dbcc;
+    font-family: sans-serif;
+    font-size: 1em;
+}
+
+div.sphinxsidebar #searchbox form.search {
+    overflow: hidden;
+}
+
+div.sphinxsidebar #searchbox input[type="text"] {
+    float: left;
+    width: 80%;
+    padding: 0.25em;
+    box-sizing: border-box;
+}
+
+div.sphinxsidebar #searchbox input[type="submit"] {
+    float: left;
+    width: 20%;
+    border-left: none;
+    padding: 0.25em;
+    box-sizing: border-box;
+}
+
+
+img {
+    border: 0;
+    max-width: 100%;
+}
+
+/* -- search page ----------------------------------------------------------- */
+
+ul.search {
+    margin: 10px 0 0 20px;
+    padding: 0;
+}
+
+ul.search li {
+    padding: 5px 0 5px 20px;
+    background-image: url(file.png);
+    background-repeat: no-repeat;
+    background-position: 0 7px;
+}
+
+ul.search li a {
+    font-weight: bold;
+}
+
+ul.search li p.context {
+    color: #888;
+    margin: 2px 0 0 30px;
+    text-align: left;
+}
+
+ul.keywordmatches li.goodmatch a {
+    font-weight: bold;
+}
+
+/* -- index page ------------------------------------------------------------ */
+
+table.contentstable {
+    width: 90%;
+    margin-left: auto;
+    margin-right: auto;
+}
+
+table.contentstable p.biglink {
+    line-height: 150%;
+}
+
+a.biglink {
+    font-size: 1.3em;
+}
+
+span.linkdescr {
+    font-style: italic;
+    padding-top: 5px;
+    font-size: 90%;
+}
+
+/* -- general index --------------------------------------------------------- */
+
+table.indextable {
+    width: 100%;
+}
+
+table.indextable td {
+    text-align: left;
+    vertical-align: top;
+}
+
+table.indextable ul {
+    margin-top: 0;
+    margin-bottom: 0;
+    list-style-type: none;
+}
+
+table.indextable > tbody > tr > td > ul {
+    padding-left: 0em;
+}
+
+table.indextable tr.pcap {
+    height: 10px;
+}
+
+table.indextable tr.cap {
+    margin-top: 10px;
+    background-color: #f2f2f2;
+}
+
+img.toggler {
+    margin-right: 3px;
+    margin-top: 3px;
+    cursor: pointer;
+}
+
+div.modindex-jumpbox {
+    border-top: 1px solid #ddd;
+    border-bottom: 1px solid #ddd;
+    margin: 1em 0 1em 0;
+    padding: 0.4em;
+}
+
+div.genindex-jumpbox {
+    border-top: 1px solid #ddd;
+    border-bottom: 1px solid #ddd;
+    margin: 1em 0 1em 0;
+    padding: 0.4em;
+}
+
+/* -- domain module index --------------------------------------------------- */
+
+table.modindextable td {
+    padding: 2px;
+    border-collapse: collapse;
+}
+
+/* -- general body styles --------------------------------------------------- */
+
+div.body {
+    min-width: 360px;
+    max-width: 800px;
+}
+
+div.body p, div.body dd, div.body li, div.body blockquote {
+    -moz-hyphens: auto;
+    -ms-hyphens: auto;
+    -webkit-hyphens: auto;
+    hyphens: auto;
+}
+
+a.headerlink {
+    visibility: hidden;
+}
+a.brackets:before,
+span.brackets > a:before{
+    content: "[";
+}
+
+a.brackets:after,
+span.brackets > a:after {
+    content: "]";
+}
+
+
+h1:hover > a.headerlink,
+h2:hover > a.headerlink,
+h3:hover > a.headerlink,
+h4:hover > a.headerlink,
+h5:hover > a.headerlink,
+h6:hover > a.headerlink,
+dt:hover > a.headerlink,
+caption:hover > a.headerlink,
+p.caption:hover > a.headerlink,
+div.code-block-caption:hover > a.headerlink {
+    visibility: visible;
+}
+
+div.body p.caption {
+    text-align: inherit;
+}
+
+div.body td {
+    text-align: left;
+}
+
+.first {
+    margin-top: 0 !important;
+}
+
+p.rubric {
+    margin-top: 30px;
+    font-weight: bold;
+}
+
+img.align-left, figure.align-left, .figure.align-left, object.align-left {
+    clear: left;
+    float: left;
+    margin-right: 1em;
+}
+
+img.align-right, figure.align-right, .figure.align-right, object.align-right {
+    clear: right;
+    float: right;
+    margin-left: 1em;
+}
+
+img.align-center, figure.align-center, .figure.align-center, object.align-center {
+  display: block;
+  margin-left: auto;
+  margin-right: auto;
+}
+
+img.align-default, figure.align-default, .figure.align-default {
+  display: block;
+  margin-left: auto;
+  margin-right: auto;
+}
+
+.align-left {
+    text-align: left;
+}
+
+.align-center {
+    text-align: center;
+}
+
+.align-default {
+    text-align: center;
+}
+
+.align-right {
+    text-align: right;
+}
+
+/* -- sidebars -------------------------------------------------------------- */
+
+div.sidebar,
+aside.sidebar {
+    margin: 0 0 0.5em 1em;
+    border: 1px solid #ddb;
+    padding: 7px;
+    background-color: #ffe;
+    width: 40%;
+    float: right;
+    clear: right;
+    overflow-x: auto;
+}
+
+p.sidebar-title {
+    font-weight: bold;
+}
+div.admonition, div.topic, blockquote {
+    clear: left;
+}
+
+/* -- topics ---------------------------------------------------------------- */
+div.topic {
+    border: 1px solid #ccc;
+    padding: 7px;
+    margin: 10px 0 10px 0;
+}
+
+p.topic-title {
+    font-size: 1.1em;
+    font-weight: bold;
+    margin-top: 10px;
+}
+
+/* -- admonitions ----------------------------------------------------------- */
+
+div.admonition {
+    margin-top: 10px;
+    margin-bottom: 10px;
+    padding: 7px;
+}
+
+div.admonition dt {
+    font-weight: bold;
+}
+
+p.admonition-title {
+    margin: 0px 10px 5px 0px;
+    font-weight: bold;
+}
+
+div.body p.centered {
+    text-align: center;
+    margin-top: 25px;
+}
+
+/* -- content of sidebars/topics/admonitions -------------------------------- */
+
+div.sidebar > :last-child,
+aside.sidebar > :last-child,
+div.topic > :last-child,
+div.admonition > :last-child {
+    margin-bottom: 0;
+}
+
+div.sidebar::after,
+aside.sidebar::after,
+div.topic::after,
+div.admonition::after,
+blockquote::after {
+    display: block;
+    content: '';
+    clear: both;
+}
+
+/* -- tables ---------------------------------------------------------------- */
+
+table.docutils {
+    margin-top: 10px;
+    margin-bottom: 10px;
+    border: 0;
+    border-collapse: collapse;
+}
+
+table.align-center {
+    margin-left: auto;
+    margin-right: auto;
+}
+
+table.align-default {
+    margin-left: auto;
+    margin-right: auto;
+}
+
+table caption span.caption-number {
+    font-style: italic;
+}
+
+table caption span.caption-text {
+}
+
+table.docutils td, table.docutils th {
+    padding: 1px 8px 1px 5px;
+    border-top: 0;
+    border-left: 0;
+    border-right: 0;
+    border-bottom: 1px solid #aaa;
+}
+
+th {
+    text-align: left;
+    padding-right: 5px;
+}
+
+table.citation {
+    border-left: solid 1px gray;
+    margin-left: 1px;
+}
+
+table.citation td {
+    border-bottom: none;
+}
+
+th > :first-child,
+td > :first-child {
+    margin-top: 0px;
+}
+
+th > :last-child,
+td > :last-child {
+    margin-bottom: 0px;
+}
+
+/* -- figures --------------------------------------------------------------- */
+
+div.figure, figure {
+    margin: 0.5em;
+    padding: 0.5em;
+}
+
+div.figure p.caption, figcaption {
+    padding: 0.3em;
+}
+
+div.figure p.caption span.caption-number,
+figcaption span.caption-number {
+    font-style: italic;
+}
+
+div.figure p.caption span.caption-text,
+figcaption span.caption-text {
+}
+
+/* -- field list styles ----------------------------------------------------- */
+
+table.field-list td, table.field-list th {
+    border: 0 !important;
+}
+
+.field-list ul {
+    margin: 0;
+    padding-left: 1em;
+}
+
+.field-list p {
+    margin: 0;
+}
+
+.field-name {
+    -moz-hyphens: manual;
+    -ms-hyphens: manual;
+    -webkit-hyphens: manual;
+    hyphens: manual;
+}
+
+/* -- hlist styles ---------------------------------------------------------- */
+
+table.hlist {
+    margin: 1em 0;
+}
+
+table.hlist td {
+    vertical-align: top;
+}
+
+/* -- object description styles --------------------------------------------- */
+
+.sig {
+	font-family: 'Consolas', 'Menlo', 'DejaVu Sans Mono', 'Bitstream Vera Sans Mono', monospace;
+}
+
+.sig-name, code.descname {
+    background-color: transparent;
+    font-weight: bold;
+}
+
+.sig-name {
+	font-size: 1.1em;
+}
+
+code.descname {
+    font-size: 1.2em;
+}
+
+.sig-prename, code.descclassname {
+    background-color: transparent;
+}
+
+.optional {
+    font-size: 1.3em;
+}
+
+.sig-paren {
+    font-size: larger;
+}
+
+.sig-param.n {
+	font-style: italic;
+}
+
+/* C++ specific styling */
+
+.sig-inline.c-texpr,
+.sig-inline.cpp-texpr {
+	font-family: unset;
+}
+
+.sig.c   .k, .sig.c   .kt,
+.sig.cpp .k, .sig.cpp .kt {
+	color: #0033B3;
+}
+
+.sig.c   .m,
+.sig.cpp .m {
+	color: #1750EB;
+}
+
+.sig.c   .s, .sig.c   .sc,
+.sig.cpp .s, .sig.cpp .sc {
+	color: #067D17;
+}
+
+
+/* -- other body styles ----------------------------------------------------- */
+
+ol.arabic {
+    list-style: decimal;
+}
+
+ol.loweralpha {
+    list-style: lower-alpha;
+}
+
+ol.upperalpha {
+    list-style: upper-alpha;
+}
+
+ol.lowerroman {
+    list-style: lower-roman;
+}
+
+ol.upperroman {
+    list-style: upper-roman;
+}
+
+:not(li) > ol > li:first-child > :first-child,
+:not(li) > ul > li:first-child > :first-child {
+    margin-top: 0px;
+}
+
+:not(li) > ol > li:last-child > :last-child,
+:not(li) > ul > li:last-child > :last-child {
+    margin-bottom: 0px;
+}
+
+ol.simple ol p,
+ol.simple ul p,
+ul.simple ol p,
+ul.simple ul p {
+    margin-top: 0;
+}
+
+ol.simple > li:not(:first-child) > p,
+ul.simple > li:not(:first-child) > p {
+    margin-top: 0;
+}
+
+ol.simple p,
+ul.simple p {
+    margin-bottom: 0;
+}
+
+/* Docutils 0.17 and older (footnotes & citations) */
+dl.footnote > dt,
+dl.citation > dt {
+    float: left;
+    margin-right: 0.5em;
+}
+
+dl.footnote > dd,
+dl.citation > dd {
+    margin-bottom: 0em;
+}
+
+dl.footnote > dd:after,
+dl.citation > dd:after {
+    content: "";
+    clear: both;
+}
+
+/* Docutils 0.18+ (footnotes & citations) */
+aside.footnote > span,
+div.citation > span {
+    float: left;
+}
+aside.footnote > span:last-of-type,
+div.citation > span:last-of-type {
+  padding-right: 0.5em;
+}
+aside.footnote > p {
+  margin-left: 2em;
+}
+div.citation > p {
+  margin-left: 4em;
+}
+aside.footnote > p:last-of-type,
+div.citation > p:last-of-type {
+    margin-bottom: 0em;
+}
+aside.footnote > p:last-of-type:after,
+div.citation > p:last-of-type:after {
+    content: "";
+    clear: both;
+}
+
+/* Footnotes & citations ends */
+
+dl.field-list {
+    display: grid;
+    grid-template-columns: fit-content(30%) auto;
+}
+
+dl.field-list > dt {
+    font-weight: bold;
+    word-break: break-word;
+    padding-left: 0.5em;
+    padding-right: 5px;
+}
+
+dl.field-list > dt:after {
+    content: ":";
+}
+
+dl.field-list > dd {
+    padding-left: 0.5em;
+    margin-top: 0em;
+    margin-left: 0em;
+    margin-bottom: 0em;
+}
+
+dl {
+    margin-bottom: 15px;
+}
+
+dd > :first-child {
+    margin-top: 0px;
+}
+
+dd ul, dd table {
+    margin-bottom: 10px;
+}
+
+dd {
+    margin-top: 3px;
+    margin-bottom: 10px;
+    margin-left: 30px;
+}
+
+dl > dd:last-child,
+dl > dd:last-child > :last-child {
+    margin-bottom: 0;
+}
+
+dt:target, span.highlighted {
+    background-color: #fbe54e;
+}
+
+rect.highlighted {
+    fill: #fbe54e;
+}
+
+dl.glossary dt {
+    font-weight: bold;
+    font-size: 1.1em;
+}
+
+.versionmodified {
+    font-style: italic;
+}
+
+.system-message {
+    background-color: #fda;
+    padding: 5px;
+    border: 3px solid red;
+}
+
+.footnote:target  {
+    background-color: #ffa;
+}
+
+.line-block {
+    display: block;
+    margin-top: 1em;
+    margin-bottom: 1em;
+}
+
+.line-block .line-block {
+    margin-top: 0;
+    margin-bottom: 0;
+    margin-left: 1.5em;
+}
+
+.guilabel, .menuselection {
+    font-family: sans-serif;
+}
+
+.accelerator {
+    text-decoration: underline;
+}
+
+.classifier {
+    font-style: oblique;
+}
+
+.classifier:before {
+    font-style: normal;
+    margin: 0 0.5em;
+    content: ":";
+    display: inline-block;
+}
+
+abbr, acronym {
+    border-bottom: dotted 1px;
+    cursor: help;
+}
+
+/* -- code displays --------------------------------------------------------- */
+
+pre {
+    overflow: auto;
+    overflow-y: hidden;  /* fixes display issues on Chrome browsers */
+}
+
+pre, div[class*="highlight-"] {
+    clear: both;
+}
+
+span.pre {
+    -moz-hyphens: none;
+    -ms-hyphens: none;
+    -webkit-hyphens: none;
+    hyphens: none;
+    white-space: nowrap;
+}
+
+div[class*="highlight-"] {
+    margin: 1em 0;
+}
+
+td.linenos pre {
+    border: 0;
+    background-color: transparent;
+    color: #aaa;
+}
+
+table.highlighttable {
+    display: block;
+}
+
+table.highlighttable tbody {
+    display: block;
+}
+
+table.highlighttable tr {
+    display: flex;
+}
+
+table.highlighttable td {
+    margin: 0;
+    padding: 0;
+}
+
+table.highlighttable td.linenos {
+    padding-right: 0.5em;
+}
+
+table.highlighttable td.code {
+    flex: 1;
+    overflow: hidden;
+}
+
+.highlight .hll {
+    display: block;
+}
+
+div.highlight pre,
+table.highlighttable pre {
+    margin: 0;
+}
+
+div.code-block-caption + div {
+    margin-top: 0;
+}
+
+div.code-block-caption {
+    margin-top: 1em;
+    padding: 2px 5px;
+    font-size: small;
+}
+
+div.code-block-caption code {
+    background-color: transparent;
+}
+
+table.highlighttable td.linenos,
+span.linenos,
+div.highlight span.gp {  /* gp: Generic.Prompt */
+  user-select: none;
+  -webkit-user-select: text; /* Safari fallback only */
+  -webkit-user-select: none; /* Chrome/Safari */
+  -moz-user-select: none; /* Firefox */
+  -ms-user-select: none; /* IE10+ */
+}
+
+div.code-block-caption span.caption-number {
+    padding: 0.1em 0.3em;
+    font-style: italic;
+}
+
+div.code-block-caption span.caption-text {
+}
+
+div.literal-block-wrapper {
+    margin: 1em 0;
+}
+
+code.xref, a code {
+    background-color: transparent;
+    font-weight: bold;
+}
+
+h1 code, h2 code, h3 code, h4 code, h5 code, h6 code {
+    background-color: transparent;
+}
+
+.viewcode-link {
+    float: right;
+}
+
+.viewcode-back {
+    float: right;
+    font-family: sans-serif;
+}
+
+div.viewcode-block:target {
+    margin: -1px -10px;
+    padding: 0 10px;
+}
+
+/* -- math display ---------------------------------------------------------- */
+
+img.math {
+    vertical-align: middle;
+}
+
+div.body div.math p {
+    text-align: center;
+}
+
+span.eqno {
+    float: right;
+}
+
+span.eqno a.headerlink {
+    position: absolute;
+    z-index: 1;
+}
+
+div.math:hover a.headerlink {
+    visibility: visible;
+}
+
+/* -- printout stylesheet --------------------------------------------------- */
+
+@media print {
+    div.document,
+    div.documentwrapper,
+    div.bodywrapper {
+        margin: 0 !important;
+        width: 100%;
+    }
+
+    div.sphinxsidebar,
+    div.related,
+    div.footer,
+    #top-link {
+        display: none;
+    }
 }
\ No newline at end of file
diff --git a/_static/check-solid.svg b/_static/check-solid.svg
old mode 100644
new mode 100755
diff --git a/_static/clipboard.min.js b/_static/clipboard.min.js
old mode 100644
new mode 100755
diff --git a/_static/copy-button.svg b/_static/copy-button.svg
old mode 100644
new mode 100755
diff --git a/_static/copybutton.css b/_static/copybutton.css
old mode 100644
new mode 100755
diff --git a/_static/copybutton.js b/_static/copybutton.js
old mode 100644
new mode 100755
index 2ea7ff3e..f4ec4edc
--- a/_static/copybutton.js
+++ b/_static/copybutton.js
@@ -1,248 +1,248 @@
-// Localization support
-const messages = {
-  'en': {
-    'copy': 'Copy',
-    'copy_to_clipboard': 'Copy to clipboard',
-    'copy_success': 'Copied!',
-    'copy_failure': 'Failed to copy',
-  },
-  'es' : {
-    'copy': 'Copiar',
-    'copy_to_clipboard': 'Copiar al portapapeles',
-    'copy_success': '¡Copiado!',
-    'copy_failure': 'Error al copiar',
-  },
-  'de' : {
-    'copy': 'Kopieren',
-    'copy_to_clipboard': 'In die Zwischenablage kopieren',
-    'copy_success': 'Kopiert!',
-    'copy_failure': 'Fehler beim Kopieren',
-  },
-  'fr' : {
-    'copy': 'Copier',
-    'copy_to_clipboard': 'Copier dans le presse-papier',
-    'copy_success': 'Copié !',
-    'copy_failure': 'Échec de la copie',
-  },
-  'ru': {
-    'copy': 'Скопировать',
-    'copy_to_clipboard': 'Скопировать в буфер',
-    'copy_success': 'Скопировано!',
-    'copy_failure': 'Не удалось скопировать',
-  },
-  'zh-CN': {
-    'copy': '复制',
-    'copy_to_clipboard': '复制到剪贴板',
-    'copy_success': '复制成功!',
-    'copy_failure': '复制失败',
-  },
-  'it' : {
-    'copy': 'Copiare',
-    'copy_to_clipboard': 'Copiato negli appunti',
-    'copy_success': 'Copiato!',
-    'copy_failure': 'Errore durante la copia',
-  }
-}
-
-let locale = 'en'
-if( document.documentElement.lang !== undefined
-    && messages[document.documentElement.lang] !== undefined ) {
-  locale = document.documentElement.lang
-}
-
-let doc_url_root = DOCUMENTATION_OPTIONS.URL_ROOT;
-if (doc_url_root == '#') {
-    doc_url_root = '';
-}
-
-/**
- * SVG files for our copy buttons
- */
-let iconCheck = `<svg xmlns="http://www.w3.org/2000/svg" class="icon icon-tabler icon-tabler-check" width="44" height="44" viewBox="0 0 24 24" stroke-width="2" stroke="#22863a" fill="none" stroke-linecap="round" stroke-linejoin="round">
-  <title>${messages[locale]['copy_success']}</title>
-  <path stroke="none" d="M0 0h24v24H0z" fill="none"/>
-  <path d="M5 12l5 5l10 -10" />
-</svg>`
-
-// If the user specified their own SVG use that, otherwise use the default
-let iconCopy = ``;
-if (!iconCopy) {
-  iconCopy = `<svg xmlns="http://www.w3.org/2000/svg" class="icon icon-tabler icon-tabler-copy" width="44" height="44" viewBox="0 0 24 24" stroke-width="1.5" stroke="#000000" fill="none" stroke-linecap="round" stroke-linejoin="round">
-  <title>${messages[locale]['copy_to_clipboard']}</title>
-  <path stroke="none" d="M0 0h24v24H0z" fill="none"/>
-  <rect x="8" y="8" width="12" height="12" rx="2" />
-  <path d="M16 8v-2a2 2 0 0 0 -2 -2h-8a2 2 0 0 0 -2 2v8a2 2 0 0 0 2 2h2" />
-</svg>`
-}
-
-/**
- * Set up copy/paste for code blocks
- */
-
-const runWhenDOMLoaded = cb => {
-  if (document.readyState != 'loading') {
-    cb()
-  } else if (document.addEventListener) {
-    document.addEventListener('DOMContentLoaded', cb)
-  } else {
-    document.attachEvent('onreadystatechange', function() {
-      if (document.readyState == 'complete') cb()
-    })
-  }
-}
-
-const codeCellId = index => `codecell${index}`
-
-// Clears selected text since ClipboardJS will select the text when copying
-const clearSelection = () => {
-  if (window.getSelection) {
-    window.getSelection().removeAllRanges()
-  } else if (document.selection) {
-    document.selection.empty()
-  }
-}
-
-// Changes tooltip text for a moment, then changes it back
-// We want the timeout of our `success` class to be a bit shorter than the
-// tooltip and icon change, so that we can hide the icon before changing back.
-var timeoutIcon = 2000;
-var timeoutSuccessClass = 1500;
-
-const temporarilyChangeTooltip = (el, oldText, newText) => {
-  el.setAttribute('data-tooltip', newText)
-  el.classList.add('success')
-  // Remove success a little bit sooner than we change the tooltip
-  // So that we can use CSS to hide the copybutton first
-  setTimeout(() => el.classList.remove('success'), timeoutSuccessClass)
-  setTimeout(() => el.setAttribute('data-tooltip', oldText), timeoutIcon)
-}
-
-// Changes the copy button icon for two seconds, then changes it back
-const temporarilyChangeIcon = (el) => {
-  el.innerHTML = iconCheck;
-  setTimeout(() => {el.innerHTML = iconCopy}, timeoutIcon)
-}
-
-const addCopyButtonToCodeCells = () => {
-  // If ClipboardJS hasn't loaded, wait a bit and try again. This
-  // happens because we load ClipboardJS asynchronously.
-  if (window.ClipboardJS === undefined) {
-    setTimeout(addCopyButtonToCodeCells, 250)
-    return
-  }
-
-  // Add copybuttons to all of our code cells
-  const COPYBUTTON_SELECTOR = 'div.highlight pre';
-  const codeCells = document.querySelectorAll(COPYBUTTON_SELECTOR)
-  codeCells.forEach((codeCell, index) => {
-    const id = codeCellId(index)
-    codeCell.setAttribute('id', id)
-
-    const clipboardButton = id =>
-    `<button class="copybtn o-tooltip--left" data-tooltip="${messages[locale]['copy']}" data-clipboard-target="#${id}">
-      ${iconCopy}
-    </button>`
-    codeCell.insertAdjacentHTML('afterend', clipboardButton(id))
-  })
-
-function escapeRegExp(string) {
-    return string.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); // $& means the whole matched string
-}
-
-/**
- * Removes excluded text from a Node.
- *
- * @param {Node} target Node to filter.
- * @param {string} exclude CSS selector of nodes to exclude.
- * @returns {DOMString} Text from `target` with text removed.
- */
-function filterText(target, exclude) {
-    const clone = target.cloneNode(true);  // clone as to not modify the live DOM
-    if (exclude) {
-        // remove excluded nodes
-        clone.querySelectorAll(exclude).forEach(node => node.remove());
-    }
-    return clone.innerText;
-}
-
-// Callback when a copy button is clicked. Will be passed the node that was clicked
-// should then grab the text and replace pieces of text that shouldn't be used in output
-function formatCopyText(textContent, copybuttonPromptText, isRegexp = false, onlyCopyPromptLines = true, removePrompts = true, copyEmptyLines = true, lineContinuationChar = "", hereDocDelim = "") {
-    var regexp;
-    var match;
-
-    // Do we check for line continuation characters and "HERE-documents"?
-    var useLineCont = !!lineContinuationChar
-    var useHereDoc = !!hereDocDelim
-
-    // create regexp to capture prompt and remaining line
-    if (isRegexp) {
-        regexp = new RegExp('^(' + copybuttonPromptText + ')(.*)')
-    } else {
-        regexp = new RegExp('^(' + escapeRegExp(copybuttonPromptText) + ')(.*)')
-    }
-
-    const outputLines = [];
-    var promptFound = false;
-    var gotLineCont = false;
-    var gotHereDoc = false;
-    const lineGotPrompt = [];
-    for (const line of textContent.split('\n')) {
-        match = line.match(regexp)
-        if (match || gotLineCont || gotHereDoc) {
-            promptFound = regexp.test(line)
-            lineGotPrompt.push(promptFound)
-            if (removePrompts && promptFound) {
-                outputLines.push(match[2])
-            } else {
-                outputLines.push(line)
-            }
-            gotLineCont = line.endsWith(lineContinuationChar) & useLineCont
-            if (line.includes(hereDocDelim) & useHereDoc)
-                gotHereDoc = !gotHereDoc
-        } else if (!onlyCopyPromptLines) {
-            outputLines.push(line)
-        } else if (copyEmptyLines && line.trim() === '') {
-            outputLines.push(line)
-        }
-    }
-
-    // If no lines with the prompt were found then just use original lines
-    if (lineGotPrompt.some(v => v === true)) {
-        textContent = outputLines.join('\n');
-    }
-
-    // Remove a trailing newline to avoid auto-running when pasting
-    if (textContent.endsWith("\n")) {
-        textContent = textContent.slice(0, -1)
-    }
-    return textContent
-}
-
-
-var copyTargetText = (trigger) => {
-  var target = document.querySelector(trigger.attributes['data-clipboard-target'].value);
-
-  // get filtered text
-  let exclude = '.linenos';
-
-  let text = filterText(target, exclude);
-  return formatCopyText(text, '', false, true, true, true, '', '')
-}
-
-  // Initialize with a callback so we can modify the text before copy
-  const clipboard = new ClipboardJS('.copybtn', {text: copyTargetText})
-
-  // Update UI with error/success messages
-  clipboard.on('success', event => {
-    clearSelection()
-    temporarilyChangeTooltip(event.trigger, messages[locale]['copy'], messages[locale]['copy_success'])
-    temporarilyChangeIcon(event.trigger)
-  })
-
-  clipboard.on('error', event => {
-    temporarilyChangeTooltip(event.trigger, messages[locale]['copy'], messages[locale]['copy_failure'])
-  })
-}
-
+// Localization support
+const messages = {
+  'en': {
+    'copy': 'Copy',
+    'copy_to_clipboard': 'Copy to clipboard',
+    'copy_success': 'Copied!',
+    'copy_failure': 'Failed to copy',
+  },
+  'es' : {
+    'copy': 'Copiar',
+    'copy_to_clipboard': 'Copiar al portapapeles',
+    'copy_success': '¡Copiado!',
+    'copy_failure': 'Error al copiar',
+  },
+  'de' : {
+    'copy': 'Kopieren',
+    'copy_to_clipboard': 'In die Zwischenablage kopieren',
+    'copy_success': 'Kopiert!',
+    'copy_failure': 'Fehler beim Kopieren',
+  },
+  'fr' : {
+    'copy': 'Copier',
+    'copy_to_clipboard': 'Copier dans le presse-papier',
+    'copy_success': 'Copié !',
+    'copy_failure': 'Échec de la copie',
+  },
+  'ru': {
+    'copy': 'Скопировать',
+    'copy_to_clipboard': 'Скопировать в буфер',
+    'copy_success': 'Скопировано!',
+    'copy_failure': 'Не удалось скопировать',
+  },
+  'zh-CN': {
+    'copy': '复制',
+    'copy_to_clipboard': '复制到剪贴板',
+    'copy_success': '复制成功!',
+    'copy_failure': '复制失败',
+  },
+  'it' : {
+    'copy': 'Copiare',
+    'copy_to_clipboard': 'Copiato negli appunti',
+    'copy_success': 'Copiato!',
+    'copy_failure': 'Errore durante la copia',
+  }
+}
+
+let locale = 'en'
+if( document.documentElement.lang !== undefined
+    && messages[document.documentElement.lang] !== undefined ) {
+  locale = document.documentElement.lang
+}
+
+let doc_url_root = DOCUMENTATION_OPTIONS.URL_ROOT;
+if (doc_url_root == '#') {
+    doc_url_root = '';
+}
+
+/**
+ * SVG files for our copy buttons
+ */
+let iconCheck = `<svg xmlns="http://www.w3.org/2000/svg" class="icon icon-tabler icon-tabler-check" width="44" height="44" viewBox="0 0 24 24" stroke-width="2" stroke="#22863a" fill="none" stroke-linecap="round" stroke-linejoin="round">
+  <title>${messages[locale]['copy_success']}</title>
+  <path stroke="none" d="M0 0h24v24H0z" fill="none"/>
+  <path d="M5 12l5 5l10 -10" />
+</svg>`
+
+// If the user specified their own SVG use that, otherwise use the default
+let iconCopy = ``;
+if (!iconCopy) {
+  iconCopy = `<svg xmlns="http://www.w3.org/2000/svg" class="icon icon-tabler icon-tabler-copy" width="44" height="44" viewBox="0 0 24 24" stroke-width="1.5" stroke="#000000" fill="none" stroke-linecap="round" stroke-linejoin="round">
+  <title>${messages[locale]['copy_to_clipboard']}</title>
+  <path stroke="none" d="M0 0h24v24H0z" fill="none"/>
+  <rect x="8" y="8" width="12" height="12" rx="2" />
+  <path d="M16 8v-2a2 2 0 0 0 -2 -2h-8a2 2 0 0 0 -2 2v8a2 2 0 0 0 2 2h2" />
+</svg>`
+}
+
+/**
+ * Set up copy/paste for code blocks
+ */
+
+const runWhenDOMLoaded = cb => {
+  if (document.readyState != 'loading') {
+    cb()
+  } else if (document.addEventListener) {
+    document.addEventListener('DOMContentLoaded', cb)
+  } else {
+    document.attachEvent('onreadystatechange', function() {
+      if (document.readyState == 'complete') cb()
+    })
+  }
+}
+
+const codeCellId = index => `codecell${index}`
+
+// Clears selected text since ClipboardJS will select the text when copying
+const clearSelection = () => {
+  if (window.getSelection) {
+    window.getSelection().removeAllRanges()
+  } else if (document.selection) {
+    document.selection.empty()
+  }
+}
+
+// Changes tooltip text for a moment, then changes it back
+// We want the timeout of our `success` class to be a bit shorter than the
+// tooltip and icon change, so that we can hide the icon before changing back.
+var timeoutIcon = 2000;
+var timeoutSuccessClass = 1500;
+
+const temporarilyChangeTooltip = (el, oldText, newText) => {
+  el.setAttribute('data-tooltip', newText)
+  el.classList.add('success')
+  // Remove success a little bit sooner than we change the tooltip
+  // So that we can use CSS to hide the copybutton first
+  setTimeout(() => el.classList.remove('success'), timeoutSuccessClass)
+  setTimeout(() => el.setAttribute('data-tooltip', oldText), timeoutIcon)
+}
+
+// Changes the copy button icon for two seconds, then changes it back
+const temporarilyChangeIcon = (el) => {
+  el.innerHTML = iconCheck;
+  setTimeout(() => {el.innerHTML = iconCopy}, timeoutIcon)
+}
+
+const addCopyButtonToCodeCells = () => {
+  // If ClipboardJS hasn't loaded, wait a bit and try again. This
+  // happens because we load ClipboardJS asynchronously.
+  if (window.ClipboardJS === undefined) {
+    setTimeout(addCopyButtonToCodeCells, 250)
+    return
+  }
+
+  // Add copybuttons to all of our code cells
+  const COPYBUTTON_SELECTOR = 'div.highlight pre';
+  const codeCells = document.querySelectorAll(COPYBUTTON_SELECTOR)
+  codeCells.forEach((codeCell, index) => {
+    const id = codeCellId(index)
+    codeCell.setAttribute('id', id)
+
+    const clipboardButton = id =>
+    `<button class="copybtn o-tooltip--left" data-tooltip="${messages[locale]['copy']}" data-clipboard-target="#${id}">
+      ${iconCopy}
+    </button>`
+    codeCell.insertAdjacentHTML('afterend', clipboardButton(id))
+  })
+
+function escapeRegExp(string) {
+    return string.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); // $& means the whole matched string
+}
+
+/**
+ * Removes excluded text from a Node.
+ *
+ * @param {Node} target Node to filter.
+ * @param {string} exclude CSS selector of nodes to exclude.
+ * @returns {DOMString} Text from `target` with text removed.
+ */
+function filterText(target, exclude) {
+    const clone = target.cloneNode(true);  // clone as to not modify the live DOM
+    if (exclude) {
+        // remove excluded nodes
+        clone.querySelectorAll(exclude).forEach(node => node.remove());
+    }
+    return clone.innerText;
+}
+
+// Callback when a copy button is clicked. Will be passed the node that was clicked
+// should then grab the text and replace pieces of text that shouldn't be used in output
+function formatCopyText(textContent, copybuttonPromptText, isRegexp = false, onlyCopyPromptLines = true, removePrompts = true, copyEmptyLines = true, lineContinuationChar = "", hereDocDelim = "") {
+    var regexp;
+    var match;
+
+    // Do we check for line continuation characters and "HERE-documents"?
+    var useLineCont = !!lineContinuationChar
+    var useHereDoc = !!hereDocDelim
+
+    // create regexp to capture prompt and remaining line
+    if (isRegexp) {
+        regexp = new RegExp('^(' + copybuttonPromptText + ')(.*)')
+    } else {
+        regexp = new RegExp('^(' + escapeRegExp(copybuttonPromptText) + ')(.*)')
+    }
+
+    const outputLines = [];
+    var promptFound = false;
+    var gotLineCont = false;
+    var gotHereDoc = false;
+    const lineGotPrompt = [];
+    for (const line of textContent.split('\n')) {
+        match = line.match(regexp)
+        if (match || gotLineCont || gotHereDoc) {
+            promptFound = regexp.test(line)
+            lineGotPrompt.push(promptFound)
+            if (removePrompts && promptFound) {
+                outputLines.push(match[2])
+            } else {
+                outputLines.push(line)
+            }
+            gotLineCont = line.endsWith(lineContinuationChar) & useLineCont
+            if (line.includes(hereDocDelim) & useHereDoc)
+                gotHereDoc = !gotHereDoc
+        } else if (!onlyCopyPromptLines) {
+            outputLines.push(line)
+        } else if (copyEmptyLines && line.trim() === '') {
+            outputLines.push(line)
+        }
+    }
+
+    // If no lines with the prompt were found then just use original lines
+    if (lineGotPrompt.some(v => v === true)) {
+        textContent = outputLines.join('\n');
+    }
+
+    // Remove a trailing newline to avoid auto-running when pasting
+    if (textContent.endsWith("\n")) {
+        textContent = textContent.slice(0, -1)
+    }
+    return textContent
+}
+
+
+var copyTargetText = (trigger) => {
+  var target = document.querySelector(trigger.attributes['data-clipboard-target'].value);
+
+  // get filtered text
+  let exclude = '.linenos';
+
+  let text = filterText(target, exclude);
+  return formatCopyText(text, '', false, true, true, true, '', '')
+}
+
+  // Initialize with a callback so we can modify the text before copy
+  const clipboard = new ClipboardJS('.copybtn', {text: copyTargetText})
+
+  // Update UI with error/success messages
+  clipboard.on('success', event => {
+    clearSelection()
+    temporarilyChangeTooltip(event.trigger, messages[locale]['copy'], messages[locale]['copy_success'])
+    temporarilyChangeIcon(event.trigger)
+  })
+
+  clipboard.on('error', event => {
+    temporarilyChangeTooltip(event.trigger, messages[locale]['copy'], messages[locale]['copy_failure'])
+  })
+}
+
 runWhenDOMLoaded(addCopyButtonToCodeCells)
\ No newline at end of file
diff --git a/_static/copybutton_funcs.js b/_static/copybutton_funcs.js
old mode 100644
new mode 100755
diff --git a/_static/css/blank.css b/_static/css/blank.css
old mode 100644
new mode 100755
diff --git a/_static/css/index.73d71520a4ca3b99cfee5594769eaaae.css b/_static/css/index.73d71520a4ca3b99cfee5594769eaaae.css
old mode 100644
new mode 100755
diff --git a/_static/css/index.ff1ffe594081f20da1ef19478df9384b.css b/_static/css/index.ff1ffe594081f20da1ef19478df9384b.css
old mode 100644
new mode 100755
diff --git a/_static/css/theme.css b/_static/css/theme.css
old mode 100644
new mode 100755
diff --git a/_static/design-style.4045f2051d55cab465a707391d5b2007.min.css b/_static/design-style.4045f2051d55cab465a707391d5b2007.min.css
old mode 100644
new mode 100755
diff --git a/_static/design-tabs.js b/_static/design-tabs.js
old mode 100644
new mode 100755
diff --git a/_static/doctools.js b/_static/doctools.js
old mode 100644
new mode 100755
diff --git a/_static/documentation_options.js b/_static/documentation_options.js
old mode 100644
new mode 100755
index 162a6ba8..f48413d2
--- a/_static/documentation_options.js
+++ b/_static/documentation_options.js
@@ -1,14 +1,14 @@
-var DOCUMENTATION_OPTIONS = {
-    URL_ROOT: document.getElementById("documentation_options").getAttribute('data-url_root'),
-    VERSION: '',
-    LANGUAGE: 'en',
-    COLLAPSE_INDEX: false,
-    BUILDER: 'html',
-    FILE_SUFFIX: '.html',
-    LINK_SUFFIX: '.html',
-    HAS_SOURCE: true,
-    SOURCELINK_SUFFIX: '',
-    NAVIGATION_WITH_KEYS: false,
-    SHOW_SEARCH_SUMMARY: true,
-    ENABLE_SEARCH_SHORTCUTS: false,
+var DOCUMENTATION_OPTIONS = {
+    URL_ROOT: document.getElementById("documentation_options").getAttribute('data-url_root'),
+    VERSION: '',
+    LANGUAGE: 'en',
+    COLLAPSE_INDEX: false,
+    BUILDER: 'html',
+    FILE_SUFFIX: '.html',
+    LINK_SUFFIX: '.html',
+    HAS_SOURCE: true,
+    SOURCELINK_SUFFIX: '',
+    NAVIGATION_WITH_KEYS: false,
+    SHOW_SEARCH_SUMMARY: true,
+    ENABLE_SEARCH_SHORTCUTS: false,
 };
\ No newline at end of file
diff --git a/_static/file.png b/_static/file.png
old mode 100644
new mode 100755
diff --git a/_static/images/logo_binder.svg b/_static/images/logo_binder.svg
old mode 100644
new mode 100755
diff --git a/_static/images/logo_colab.png b/_static/images/logo_colab.png
old mode 100644
new mode 100755
diff --git a/_static/images/logo_deepnote.svg b/_static/images/logo_deepnote.svg
old mode 100644
new mode 100755
diff --git a/_static/images/logo_jupyterhub.svg b/_static/images/logo_jupyterhub.svg
old mode 100644
new mode 100755
diff --git a/_static/jquery-3.5.1.js b/_static/jquery-3.5.1.js
old mode 100644
new mode 100755
diff --git a/_static/jquery-3.6.0.js b/_static/jquery-3.6.0.js
old mode 100644
new mode 100755
diff --git a/_static/jquery.js b/_static/jquery.js
old mode 100644
new mode 100755
diff --git a/_static/js/index.3da636dd464baa7582d2.js b/_static/js/index.3da636dd464baa7582d2.js
old mode 100644
new mode 100755
diff --git a/_static/js/index.be7d3bbb2ef33a8344ce.js b/_static/js/index.be7d3bbb2ef33a8344ce.js
old mode 100644
new mode 100755
diff --git a/_static/language_data.js b/_static/language_data.js
old mode 100644
new mode 100755
index 2e22b06a..29455f02
--- a/_static/language_data.js
+++ b/_static/language_data.js
@@ -1,199 +1,199 @@
-/*
- * language_data.js
- * ~~~~~~~~~~~~~~~~
- *
- * This script contains the language-specific data used by searchtools.js,
- * namely the list of stopwords, stemmer, scorer and splitter.
- *
- * :copyright: Copyright 2007-2022 by the Sphinx team, see AUTHORS.
- * :license: BSD, see LICENSE for details.
- *
- */
-
-var stopwords = ["a", "and", "are", "as", "at", "be", "but", "by", "for", "if", "in", "into", "is", "it", "near", "no", "not", "of", "on", "or", "such", "that", "the", "their", "then", "there", "these", "they", "this", "to", "was", "will", "with"];
-
-
-/* Non-minified version is copied as a separate JS file, is available */
-
-/**
- * Porter Stemmer
- */
-var Stemmer = function() {
-
-  var step2list = {
-    ational: 'ate',
-    tional: 'tion',
-    enci: 'ence',
-    anci: 'ance',
-    izer: 'ize',
-    bli: 'ble',
-    alli: 'al',
-    entli: 'ent',
-    eli: 'e',
-    ousli: 'ous',
-    ization: 'ize',
-    ation: 'ate',
-    ator: 'ate',
-    alism: 'al',
-    iveness: 'ive',
-    fulness: 'ful',
-    ousness: 'ous',
-    aliti: 'al',
-    iviti: 'ive',
-    biliti: 'ble',
-    logi: 'log'
-  };
-
-  var step3list = {
-    icate: 'ic',
-    ative: '',
-    alize: 'al',
-    iciti: 'ic',
-    ical: 'ic',
-    ful: '',
-    ness: ''
-  };
-
-  var c = "[^aeiou]";          // consonant
-  var v = "[aeiouy]";          // vowel
-  var C = c + "[^aeiouy]*";    // consonant sequence
-  var V = v + "[aeiou]*";      // vowel sequence
-
-  var mgr0 = "^(" + C + ")?" + V + C;                      // [C]VC... is m>0
-  var meq1 = "^(" + C + ")?" + V + C + "(" + V + ")?$";    // [C]VC[V] is m=1
-  var mgr1 = "^(" + C + ")?" + V + C + V + C;              // [C]VCVC... is m>1
-  var s_v   = "^(" + C + ")?" + v;                         // vowel in stem
-
-  this.stemWord = function (w) {
-    var stem;
-    var suffix;
-    var firstch;
-    var origword = w;
-
-    if (w.length < 3)
-      return w;
-
-    var re;
-    var re2;
-    var re3;
-    var re4;
-
-    firstch = w.substr(0,1);
-    if (firstch == "y")
-      w = firstch.toUpperCase() + w.substr(1);
-
-    // Step 1a
-    re = /^(.+?)(ss|i)es$/;
-    re2 = /^(.+?)([^s])s$/;
-
-    if (re.test(w))
-      w = w.replace(re,"$1$2");
-    else if (re2.test(w))
-      w = w.replace(re2,"$1$2");
-
-    // Step 1b
-    re = /^(.+?)eed$/;
-    re2 = /^(.+?)(ed|ing)$/;
-    if (re.test(w)) {
-      var fp = re.exec(w);
-      re = new RegExp(mgr0);
-      if (re.test(fp[1])) {
-        re = /.$/;
-        w = w.replace(re,"");
-      }
-    }
-    else if (re2.test(w)) {
-      var fp = re2.exec(w);
-      stem = fp[1];
-      re2 = new RegExp(s_v);
-      if (re2.test(stem)) {
-        w = stem;
-        re2 = /(at|bl|iz)$/;
-        re3 = new RegExp("([^aeiouylsz])\\1$");
-        re4 = new RegExp("^" + C + v + "[^aeiouwxy]$");
-        if (re2.test(w))
-          w = w + "e";
-        else if (re3.test(w)) {
-          re = /.$/;
-          w = w.replace(re,"");
-        }
-        else if (re4.test(w))
-          w = w + "e";
-      }
-    }
-
-    // Step 1c
-    re = /^(.+?)y$/;
-    if (re.test(w)) {
-      var fp = re.exec(w);
-      stem = fp[1];
-      re = new RegExp(s_v);
-      if (re.test(stem))
-        w = stem + "i";
-    }
-
-    // Step 2
-    re = /^(.+?)(ational|tional|enci|anci|izer|bli|alli|entli|eli|ousli|ization|ation|ator|alism|iveness|fulness|ousness|aliti|iviti|biliti|logi)$/;
-    if (re.test(w)) {
-      var fp = re.exec(w);
-      stem = fp[1];
-      suffix = fp[2];
-      re = new RegExp(mgr0);
-      if (re.test(stem))
-        w = stem + step2list[suffix];
-    }
-
-    // Step 3
-    re = /^(.+?)(icate|ative|alize|iciti|ical|ful|ness)$/;
-    if (re.test(w)) {
-      var fp = re.exec(w);
-      stem = fp[1];
-      suffix = fp[2];
-      re = new RegExp(mgr0);
-      if (re.test(stem))
-        w = stem + step3list[suffix];
-    }
-
-    // Step 4
-    re = /^(.+?)(al|ance|ence|er|ic|able|ible|ant|ement|ment|ent|ou|ism|ate|iti|ous|ive|ize)$/;
-    re2 = /^(.+?)(s|t)(ion)$/;
-    if (re.test(w)) {
-      var fp = re.exec(w);
-      stem = fp[1];
-      re = new RegExp(mgr1);
-      if (re.test(stem))
-        w = stem;
-    }
-    else if (re2.test(w)) {
-      var fp = re2.exec(w);
-      stem = fp[1] + fp[2];
-      re2 = new RegExp(mgr1);
-      if (re2.test(stem))
-        w = stem;
-    }
-
-    // Step 5
-    re = /^(.+?)e$/;
-    if (re.test(w)) {
-      var fp = re.exec(w);
-      stem = fp[1];
-      re = new RegExp(mgr1);
-      re2 = new RegExp(meq1);
-      re3 = new RegExp("^" + C + v + "[^aeiouwxy]$");
-      if (re.test(stem) || (re2.test(stem) && !(re3.test(stem))))
-        w = stem;
-    }
-    re = /ll$/;
-    re2 = new RegExp(mgr1);
-    if (re.test(w) && re2.test(w)) {
-      re = /.$/;
-      w = w.replace(re,"");
-    }
-
-    // and turn initial Y back to y
-    if (firstch == "y")
-      w = firstch.toLowerCase() + w.substr(1);
-    return w;
-  }
-}
-
+/*
+ * language_data.js
+ * ~~~~~~~~~~~~~~~~
+ *
+ * This script contains the language-specific data used by searchtools.js,
+ * namely the list of stopwords, stemmer, scorer and splitter.
+ *
+ * :copyright: Copyright 2007-2022 by the Sphinx team, see AUTHORS.
+ * :license: BSD, see LICENSE for details.
+ *
+ */
+
+var stopwords = ["a", "and", "are", "as", "at", "be", "but", "by", "for", "if", "in", "into", "is", "it", "near", "no", "not", "of", "on", "or", "such", "that", "the", "their", "then", "there", "these", "they", "this", "to", "was", "will", "with"];
+
+
+/* Non-minified version is copied as a separate JS file, is available */
+
+/**
+ * Porter Stemmer
+ */
+var Stemmer = function() {
+
+  var step2list = {
+    ational: 'ate',
+    tional: 'tion',
+    enci: 'ence',
+    anci: 'ance',
+    izer: 'ize',
+    bli: 'ble',
+    alli: 'al',
+    entli: 'ent',
+    eli: 'e',
+    ousli: 'ous',
+    ization: 'ize',
+    ation: 'ate',
+    ator: 'ate',
+    alism: 'al',
+    iveness: 'ive',
+    fulness: 'ful',
+    ousness: 'ous',
+    aliti: 'al',
+    iviti: 'ive',
+    biliti: 'ble',
+    logi: 'log'
+  };
+
+  var step3list = {
+    icate: 'ic',
+    ative: '',
+    alize: 'al',
+    iciti: 'ic',
+    ical: 'ic',
+    ful: '',
+    ness: ''
+  };
+
+  var c = "[^aeiou]";          // consonant
+  var v = "[aeiouy]";          // vowel
+  var C = c + "[^aeiouy]*";    // consonant sequence
+  var V = v + "[aeiou]*";      // vowel sequence
+
+  var mgr0 = "^(" + C + ")?" + V + C;                      // [C]VC... is m>0
+  var meq1 = "^(" + C + ")?" + V + C + "(" + V + ")?$";    // [C]VC[V] is m=1
+  var mgr1 = "^(" + C + ")?" + V + C + V + C;              // [C]VCVC... is m>1
+  var s_v   = "^(" + C + ")?" + v;                         // vowel in stem
+
+  this.stemWord = function (w) {
+    var stem;
+    var suffix;
+    var firstch;
+    var origword = w;
+
+    if (w.length < 3)
+      return w;
+
+    var re;
+    var re2;
+    var re3;
+    var re4;
+
+    firstch = w.substr(0,1);
+    if (firstch == "y")
+      w = firstch.toUpperCase() + w.substr(1);
+
+    // Step 1a
+    re = /^(.+?)(ss|i)es$/;
+    re2 = /^(.+?)([^s])s$/;
+
+    if (re.test(w))
+      w = w.replace(re,"$1$2");
+    else if (re2.test(w))
+      w = w.replace(re2,"$1$2");
+
+    // Step 1b
+    re = /^(.+?)eed$/;
+    re2 = /^(.+?)(ed|ing)$/;
+    if (re.test(w)) {
+      var fp = re.exec(w);
+      re = new RegExp(mgr0);
+      if (re.test(fp[1])) {
+        re = /.$/;
+        w = w.replace(re,"");
+      }
+    }
+    else if (re2.test(w)) {
+      var fp = re2.exec(w);
+      stem = fp[1];
+      re2 = new RegExp(s_v);
+      if (re2.test(stem)) {
+        w = stem;
+        re2 = /(at|bl|iz)$/;
+        re3 = new RegExp("([^aeiouylsz])\\1$");
+        re4 = new RegExp("^" + C + v + "[^aeiouwxy]$");
+        if (re2.test(w))
+          w = w + "e";
+        else if (re3.test(w)) {
+          re = /.$/;
+          w = w.replace(re,"");
+        }
+        else if (re4.test(w))
+          w = w + "e";
+      }
+    }
+
+    // Step 1c
+    re = /^(.+?)y$/;
+    if (re.test(w)) {
+      var fp = re.exec(w);
+      stem = fp[1];
+      re = new RegExp(s_v);
+      if (re.test(stem))
+        w = stem + "i";
+    }
+
+    // Step 2
+    re = /^(.+?)(ational|tional|enci|anci|izer|bli|alli|entli|eli|ousli|ization|ation|ator|alism|iveness|fulness|ousness|aliti|iviti|biliti|logi)$/;
+    if (re.test(w)) {
+      var fp = re.exec(w);
+      stem = fp[1];
+      suffix = fp[2];
+      re = new RegExp(mgr0);
+      if (re.test(stem))
+        w = stem + step2list[suffix];
+    }
+
+    // Step 3
+    re = /^(.+?)(icate|ative|alize|iciti|ical|ful|ness)$/;
+    if (re.test(w)) {
+      var fp = re.exec(w);
+      stem = fp[1];
+      suffix = fp[2];
+      re = new RegExp(mgr0);
+      if (re.test(stem))
+        w = stem + step3list[suffix];
+    }
+
+    // Step 4
+    re = /^(.+?)(al|ance|ence|er|ic|able|ible|ant|ement|ment|ent|ou|ism|ate|iti|ous|ive|ize)$/;
+    re2 = /^(.+?)(s|t)(ion)$/;
+    if (re.test(w)) {
+      var fp = re.exec(w);
+      stem = fp[1];
+      re = new RegExp(mgr1);
+      if (re.test(stem))
+        w = stem;
+    }
+    else if (re2.test(w)) {
+      var fp = re2.exec(w);
+      stem = fp[1] + fp[2];
+      re2 = new RegExp(mgr1);
+      if (re2.test(stem))
+        w = stem;
+    }
+
+    // Step 5
+    re = /^(.+?)e$/;
+    if (re.test(w)) {
+      var fp = re.exec(w);
+      stem = fp[1];
+      re = new RegExp(mgr1);
+      re2 = new RegExp(meq1);
+      re3 = new RegExp("^" + C + v + "[^aeiouwxy]$");
+      if (re.test(stem) || (re2.test(stem) && !(re3.test(stem))))
+        w = stem;
+    }
+    re = /ll$/;
+    re2 = new RegExp(mgr1);
+    if (re.test(w) && re2.test(w)) {
+      re = /.$/;
+      w = w.replace(re,"");
+    }
+
+    // and turn initial Y back to y
+    if (firstch == "y")
+      w = firstch.toLowerCase() + w.substr(1);
+    return w;
+  }
+}
+
diff --git a/_static/locales/ar/LC_MESSAGES/booktheme.mo b/_static/locales/ar/LC_MESSAGES/booktheme.mo
old mode 100644
new mode 100755
diff --git a/_static/locales/ar/LC_MESSAGES/booktheme.po b/_static/locales/ar/LC_MESSAGES/booktheme.po
old mode 100644
new mode 100755
diff --git a/_static/locales/bg/LC_MESSAGES/booktheme.mo b/_static/locales/bg/LC_MESSAGES/booktheme.mo
old mode 100644
new mode 100755
diff --git a/_static/locales/bg/LC_MESSAGES/booktheme.po b/_static/locales/bg/LC_MESSAGES/booktheme.po
old mode 100644
new mode 100755
diff --git a/_static/locales/bn/LC_MESSAGES/booktheme.mo b/_static/locales/bn/LC_MESSAGES/booktheme.mo
old mode 100644
new mode 100755
diff --git a/_static/locales/bn/LC_MESSAGES/booktheme.po b/_static/locales/bn/LC_MESSAGES/booktheme.po
old mode 100644
new mode 100755
diff --git a/_static/locales/ca/LC_MESSAGES/booktheme.mo b/_static/locales/ca/LC_MESSAGES/booktheme.mo
old mode 100644
new mode 100755
diff --git a/_static/locales/ca/LC_MESSAGES/booktheme.po b/_static/locales/ca/LC_MESSAGES/booktheme.po
old mode 100644
new mode 100755
diff --git a/_static/locales/cs/LC_MESSAGES/booktheme.mo b/_static/locales/cs/LC_MESSAGES/booktheme.mo
old mode 100644
new mode 100755
diff --git a/_static/locales/cs/LC_MESSAGES/booktheme.po b/_static/locales/cs/LC_MESSAGES/booktheme.po
old mode 100644
new mode 100755
diff --git a/_static/locales/da/LC_MESSAGES/booktheme.mo b/_static/locales/da/LC_MESSAGES/booktheme.mo
old mode 100644
new mode 100755
diff --git a/_static/locales/da/LC_MESSAGES/booktheme.po b/_static/locales/da/LC_MESSAGES/booktheme.po
old mode 100644
new mode 100755
diff --git a/_static/locales/de/LC_MESSAGES/booktheme.mo b/_static/locales/de/LC_MESSAGES/booktheme.mo
old mode 100644
new mode 100755
diff --git a/_static/locales/de/LC_MESSAGES/booktheme.po b/_static/locales/de/LC_MESSAGES/booktheme.po
old mode 100644
new mode 100755
diff --git a/_static/locales/el/LC_MESSAGES/booktheme.mo b/_static/locales/el/LC_MESSAGES/booktheme.mo
old mode 100644
new mode 100755
diff --git a/_static/locales/el/LC_MESSAGES/booktheme.po b/_static/locales/el/LC_MESSAGES/booktheme.po
old mode 100644
new mode 100755
diff --git a/_static/locales/eo/LC_MESSAGES/booktheme.mo b/_static/locales/eo/LC_MESSAGES/booktheme.mo
old mode 100644
new mode 100755
diff --git a/_static/locales/eo/LC_MESSAGES/booktheme.po b/_static/locales/eo/LC_MESSAGES/booktheme.po
old mode 100644
new mode 100755
diff --git a/_static/locales/es/LC_MESSAGES/booktheme.mo b/_static/locales/es/LC_MESSAGES/booktheme.mo
old mode 100644
new mode 100755
diff --git a/_static/locales/es/LC_MESSAGES/booktheme.po b/_static/locales/es/LC_MESSAGES/booktheme.po
old mode 100644
new mode 100755
diff --git a/_static/locales/et/LC_MESSAGES/booktheme.mo b/_static/locales/et/LC_MESSAGES/booktheme.mo
old mode 100644
new mode 100755
diff --git a/_static/locales/et/LC_MESSAGES/booktheme.po b/_static/locales/et/LC_MESSAGES/booktheme.po
old mode 100644
new mode 100755
diff --git a/_static/locales/fi/LC_MESSAGES/booktheme.mo b/_static/locales/fi/LC_MESSAGES/booktheme.mo
old mode 100644
new mode 100755
diff --git a/_static/locales/fi/LC_MESSAGES/booktheme.po b/_static/locales/fi/LC_MESSAGES/booktheme.po
old mode 100644
new mode 100755
diff --git a/_static/locales/fr/LC_MESSAGES/booktheme.mo b/_static/locales/fr/LC_MESSAGES/booktheme.mo
old mode 100644
new mode 100755
diff --git a/_static/locales/fr/LC_MESSAGES/booktheme.po b/_static/locales/fr/LC_MESSAGES/booktheme.po
old mode 100644
new mode 100755
diff --git a/_static/locales/hr/LC_MESSAGES/booktheme.mo b/_static/locales/hr/LC_MESSAGES/booktheme.mo
old mode 100644
new mode 100755
diff --git a/_static/locales/hr/LC_MESSAGES/booktheme.po b/_static/locales/hr/LC_MESSAGES/booktheme.po
old mode 100644
new mode 100755
diff --git a/_static/locales/id/LC_MESSAGES/booktheme.mo b/_static/locales/id/LC_MESSAGES/booktheme.mo
old mode 100644
new mode 100755
diff --git a/_static/locales/id/LC_MESSAGES/booktheme.po b/_static/locales/id/LC_MESSAGES/booktheme.po
old mode 100644
new mode 100755
diff --git a/_static/locales/it/LC_MESSAGES/booktheme.mo b/_static/locales/it/LC_MESSAGES/booktheme.mo
old mode 100644
new mode 100755
diff --git a/_static/locales/it/LC_MESSAGES/booktheme.po b/_static/locales/it/LC_MESSAGES/booktheme.po
old mode 100644
new mode 100755
diff --git a/_static/locales/iw/LC_MESSAGES/booktheme.mo b/_static/locales/iw/LC_MESSAGES/booktheme.mo
old mode 100644
new mode 100755
diff --git a/_static/locales/iw/LC_MESSAGES/booktheme.po b/_static/locales/iw/LC_MESSAGES/booktheme.po
old mode 100644
new mode 100755
diff --git a/_static/locales/ja/LC_MESSAGES/booktheme.mo b/_static/locales/ja/LC_MESSAGES/booktheme.mo
old mode 100644
new mode 100755
diff --git a/_static/locales/ja/LC_MESSAGES/booktheme.po b/_static/locales/ja/LC_MESSAGES/booktheme.po
old mode 100644
new mode 100755
diff --git a/_static/locales/ko/LC_MESSAGES/booktheme.mo b/_static/locales/ko/LC_MESSAGES/booktheme.mo
old mode 100644
new mode 100755
diff --git a/_static/locales/ko/LC_MESSAGES/booktheme.po b/_static/locales/ko/LC_MESSAGES/booktheme.po
old mode 100644
new mode 100755
diff --git a/_static/locales/lt/LC_MESSAGES/booktheme.mo b/_static/locales/lt/LC_MESSAGES/booktheme.mo
old mode 100644
new mode 100755
diff --git a/_static/locales/lt/LC_MESSAGES/booktheme.po b/_static/locales/lt/LC_MESSAGES/booktheme.po
old mode 100644
new mode 100755
diff --git a/_static/locales/lv/LC_MESSAGES/booktheme.mo b/_static/locales/lv/LC_MESSAGES/booktheme.mo
old mode 100644
new mode 100755
diff --git a/_static/locales/lv/LC_MESSAGES/booktheme.po b/_static/locales/lv/LC_MESSAGES/booktheme.po
old mode 100644
new mode 100755
diff --git a/_static/locales/ml/LC_MESSAGES/booktheme.mo b/_static/locales/ml/LC_MESSAGES/booktheme.mo
old mode 100644
new mode 100755
diff --git a/_static/locales/ml/LC_MESSAGES/booktheme.po b/_static/locales/ml/LC_MESSAGES/booktheme.po
old mode 100644
new mode 100755
diff --git a/_static/locales/mr/LC_MESSAGES/booktheme.mo b/_static/locales/mr/LC_MESSAGES/booktheme.mo
old mode 100644
new mode 100755
diff --git a/_static/locales/mr/LC_MESSAGES/booktheme.po b/_static/locales/mr/LC_MESSAGES/booktheme.po
old mode 100644
new mode 100755
diff --git a/_static/locales/ms/LC_MESSAGES/booktheme.mo b/_static/locales/ms/LC_MESSAGES/booktheme.mo
old mode 100644
new mode 100755
diff --git a/_static/locales/ms/LC_MESSAGES/booktheme.po b/_static/locales/ms/LC_MESSAGES/booktheme.po
old mode 100644
new mode 100755
diff --git a/_static/locales/nl/LC_MESSAGES/booktheme.mo b/_static/locales/nl/LC_MESSAGES/booktheme.mo
old mode 100644
new mode 100755
diff --git a/_static/locales/nl/LC_MESSAGES/booktheme.po b/_static/locales/nl/LC_MESSAGES/booktheme.po
old mode 100644
new mode 100755
diff --git a/_static/locales/no/LC_MESSAGES/booktheme.mo b/_static/locales/no/LC_MESSAGES/booktheme.mo
old mode 100644
new mode 100755
diff --git a/_static/locales/no/LC_MESSAGES/booktheme.po b/_static/locales/no/LC_MESSAGES/booktheme.po
old mode 100644
new mode 100755
diff --git a/_static/locales/pl/LC_MESSAGES/booktheme.mo b/_static/locales/pl/LC_MESSAGES/booktheme.mo
old mode 100644
new mode 100755
diff --git a/_static/locales/pl/LC_MESSAGES/booktheme.po b/_static/locales/pl/LC_MESSAGES/booktheme.po
old mode 100644
new mode 100755
diff --git a/_static/locales/pt/LC_MESSAGES/booktheme.mo b/_static/locales/pt/LC_MESSAGES/booktheme.mo
old mode 100644
new mode 100755
diff --git a/_static/locales/pt/LC_MESSAGES/booktheme.po b/_static/locales/pt/LC_MESSAGES/booktheme.po
old mode 100644
new mode 100755
diff --git a/_static/locales/ro/LC_MESSAGES/booktheme.mo b/_static/locales/ro/LC_MESSAGES/booktheme.mo
old mode 100644
new mode 100755
diff --git a/_static/locales/ro/LC_MESSAGES/booktheme.po b/_static/locales/ro/LC_MESSAGES/booktheme.po
old mode 100644
new mode 100755
diff --git a/_static/locales/ru/LC_MESSAGES/booktheme.mo b/_static/locales/ru/LC_MESSAGES/booktheme.mo
old mode 100644
new mode 100755
diff --git a/_static/locales/ru/LC_MESSAGES/booktheme.po b/_static/locales/ru/LC_MESSAGES/booktheme.po
old mode 100644
new mode 100755
diff --git a/_static/locales/sk/LC_MESSAGES/booktheme.mo b/_static/locales/sk/LC_MESSAGES/booktheme.mo
old mode 100644
new mode 100755
diff --git a/_static/locales/sk/LC_MESSAGES/booktheme.po b/_static/locales/sk/LC_MESSAGES/booktheme.po
old mode 100644
new mode 100755
diff --git a/_static/locales/sl/LC_MESSAGES/booktheme.mo b/_static/locales/sl/LC_MESSAGES/booktheme.mo
old mode 100644
new mode 100755
diff --git a/_static/locales/sl/LC_MESSAGES/booktheme.po b/_static/locales/sl/LC_MESSAGES/booktheme.po
old mode 100644
new mode 100755
diff --git a/_static/locales/sr/LC_MESSAGES/booktheme.mo b/_static/locales/sr/LC_MESSAGES/booktheme.mo
old mode 100644
new mode 100755
diff --git a/_static/locales/sr/LC_MESSAGES/booktheme.po b/_static/locales/sr/LC_MESSAGES/booktheme.po
old mode 100644
new mode 100755
diff --git a/_static/locales/sv/LC_MESSAGES/booktheme.mo b/_static/locales/sv/LC_MESSAGES/booktheme.mo
old mode 100644
new mode 100755
diff --git a/_static/locales/sv/LC_MESSAGES/booktheme.po b/_static/locales/sv/LC_MESSAGES/booktheme.po
old mode 100644
new mode 100755
diff --git a/_static/locales/ta/LC_MESSAGES/booktheme.mo b/_static/locales/ta/LC_MESSAGES/booktheme.mo
old mode 100644
new mode 100755
diff --git a/_static/locales/ta/LC_MESSAGES/booktheme.po b/_static/locales/ta/LC_MESSAGES/booktheme.po
old mode 100644
new mode 100755
diff --git a/_static/locales/te/LC_MESSAGES/booktheme.mo b/_static/locales/te/LC_MESSAGES/booktheme.mo
old mode 100644
new mode 100755
diff --git a/_static/locales/te/LC_MESSAGES/booktheme.po b/_static/locales/te/LC_MESSAGES/booktheme.po
old mode 100644
new mode 100755
diff --git a/_static/locales/tg/LC_MESSAGES/booktheme.mo b/_static/locales/tg/LC_MESSAGES/booktheme.mo
old mode 100644
new mode 100755
diff --git a/_static/locales/tg/LC_MESSAGES/booktheme.po b/_static/locales/tg/LC_MESSAGES/booktheme.po
old mode 100644
new mode 100755
diff --git a/_static/locales/th/LC_MESSAGES/booktheme.mo b/_static/locales/th/LC_MESSAGES/booktheme.mo
old mode 100644
new mode 100755
diff --git a/_static/locales/th/LC_MESSAGES/booktheme.po b/_static/locales/th/LC_MESSAGES/booktheme.po
old mode 100644
new mode 100755
diff --git a/_static/locales/tl/LC_MESSAGES/booktheme.mo b/_static/locales/tl/LC_MESSAGES/booktheme.mo
old mode 100644
new mode 100755
diff --git a/_static/locales/tl/LC_MESSAGES/booktheme.po b/_static/locales/tl/LC_MESSAGES/booktheme.po
old mode 100644
new mode 100755
diff --git a/_static/locales/tr/LC_MESSAGES/booktheme.mo b/_static/locales/tr/LC_MESSAGES/booktheme.mo
old mode 100644
new mode 100755
diff --git a/_static/locales/tr/LC_MESSAGES/booktheme.po b/_static/locales/tr/LC_MESSAGES/booktheme.po
old mode 100644
new mode 100755
diff --git a/_static/locales/uk/LC_MESSAGES/booktheme.mo b/_static/locales/uk/LC_MESSAGES/booktheme.mo
old mode 100644
new mode 100755
diff --git a/_static/locales/uk/LC_MESSAGES/booktheme.po b/_static/locales/uk/LC_MESSAGES/booktheme.po
old mode 100644
new mode 100755
diff --git a/_static/locales/ur/LC_MESSAGES/booktheme.mo b/_static/locales/ur/LC_MESSAGES/booktheme.mo
old mode 100644
new mode 100755
diff --git a/_static/locales/ur/LC_MESSAGES/booktheme.po b/_static/locales/ur/LC_MESSAGES/booktheme.po
old mode 100644
new mode 100755
diff --git a/_static/locales/vi/LC_MESSAGES/booktheme.mo b/_static/locales/vi/LC_MESSAGES/booktheme.mo
old mode 100644
new mode 100755
diff --git a/_static/locales/vi/LC_MESSAGES/booktheme.po b/_static/locales/vi/LC_MESSAGES/booktheme.po
old mode 100644
new mode 100755
diff --git a/_static/locales/zh_CN/LC_MESSAGES/booktheme.mo b/_static/locales/zh_CN/LC_MESSAGES/booktheme.mo
old mode 100644
new mode 100755
diff --git a/_static/locales/zh_CN/LC_MESSAGES/booktheme.po b/_static/locales/zh_CN/LC_MESSAGES/booktheme.po
old mode 100644
new mode 100755
diff --git a/_static/locales/zh_TW/LC_MESSAGES/booktheme.mo b/_static/locales/zh_TW/LC_MESSAGES/booktheme.mo
old mode 100644
new mode 100755
diff --git a/_static/locales/zh_TW/LC_MESSAGES/booktheme.po b/_static/locales/zh_TW/LC_MESSAGES/booktheme.po
old mode 100644
new mode 100755
diff --git a/_static/minus.png b/_static/minus.png
old mode 100644
new mode 100755
diff --git a/_static/mystnb.4510f1fc1dee50b3e5859aac5469c37c29e427902b24a333a5f9fcb2f0b3ac41.css b/_static/mystnb.4510f1fc1dee50b3e5859aac5469c37c29e427902b24a333a5f9fcb2f0b3ac41.css
old mode 100644
new mode 100755
diff --git a/_static/mystnb.css b/_static/mystnb.css
old mode 100644
new mode 100755
diff --git a/_static/panels-main.c949a650a448cc0ae9fd3441c0e17fb0.css b/_static/panels-main.c949a650a448cc0ae9fd3441c0e17fb0.css
old mode 100644
new mode 100755
diff --git a/_static/panels-variables.06eb56fa6e07937060861dad626602ad.css b/_static/panels-variables.06eb56fa6e07937060861dad626602ad.css
old mode 100644
new mode 100755
diff --git a/_static/play-solid.svg b/_static/play-solid.svg
old mode 100644
new mode 100755
diff --git a/_static/plus.png b/_static/plus.png
old mode 100644
new mode 100755
diff --git a/_static/pygments.css b/_static/pygments.css
old mode 100644
new mode 100755
index 012e6a00..1bf3c248
--- a/_static/pygments.css
+++ b/_static/pygments.css
@@ -1,152 +1,152 @@
-html[data-theme="light"] .highlight pre { line-height: 125%; }
-html[data-theme="light"] .highlight td.linenos .normal { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; }
-html[data-theme="light"] .highlight span.linenos { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; }
-html[data-theme="light"] .highlight td.linenos .special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; }
-html[data-theme="light"] .highlight span.linenos.special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; }
-html[data-theme="light"] .highlight .hll { background-color: #fae4c2 }
-html[data-theme="light"] .highlight { background: #fefefe; color: #080808 }
-html[data-theme="light"] .highlight .c { color: #515151 } /* Comment */
-html[data-theme="light"] .highlight .err { color: #a12236 } /* Error */
-html[data-theme="light"] .highlight .k { color: #6730c5 } /* Keyword */
-html[data-theme="light"] .highlight .l { color: #7f4707 } /* Literal */
-html[data-theme="light"] .highlight .n { color: #080808 } /* Name */
-html[data-theme="light"] .highlight .o { color: #00622f } /* Operator */
-html[data-theme="light"] .highlight .p { color: #080808 } /* Punctuation */
-html[data-theme="light"] .highlight .ch { color: #515151 } /* Comment.Hashbang */
-html[data-theme="light"] .highlight .cm { color: #515151 } /* Comment.Multiline */
-html[data-theme="light"] .highlight .cp { color: #515151 } /* Comment.Preproc */
-html[data-theme="light"] .highlight .cpf { color: #515151 } /* Comment.PreprocFile */
-html[data-theme="light"] .highlight .c1 { color: #515151 } /* Comment.Single */
-html[data-theme="light"] .highlight .cs { color: #515151 } /* Comment.Special */
-html[data-theme="light"] .highlight .gd { color: #005b82 } /* Generic.Deleted */
-html[data-theme="light"] .highlight .ge { font-style: italic } /* Generic.Emph */
-html[data-theme="light"] .highlight .gh { color: #005b82 } /* Generic.Heading */
-html[data-theme="light"] .highlight .gs { font-weight: bold } /* Generic.Strong */
-html[data-theme="light"] .highlight .gu { color: #005b82 } /* Generic.Subheading */
-html[data-theme="light"] .highlight .kc { color: #6730c5 } /* Keyword.Constant */
-html[data-theme="light"] .highlight .kd { color: #6730c5 } /* Keyword.Declaration */
-html[data-theme="light"] .highlight .kn { color: #6730c5 } /* Keyword.Namespace */
-html[data-theme="light"] .highlight .kp { color: #6730c5 } /* Keyword.Pseudo */
-html[data-theme="light"] .highlight .kr { color: #6730c5 } /* Keyword.Reserved */
-html[data-theme="light"] .highlight .kt { color: #7f4707 } /* Keyword.Type */
-html[data-theme="light"] .highlight .ld { color: #7f4707 } /* Literal.Date */
-html[data-theme="light"] .highlight .m { color: #7f4707 } /* Literal.Number */
-html[data-theme="light"] .highlight .s { color: #00622f } /* Literal.String */
-html[data-theme="light"] .highlight .na { color: #912583 } /* Name.Attribute */
-html[data-theme="light"] .highlight .nb { color: #7f4707 } /* Name.Builtin */
-html[data-theme="light"] .highlight .nc { color: #005b82 } /* Name.Class */
-html[data-theme="light"] .highlight .no { color: #005b82 } /* Name.Constant */
-html[data-theme="light"] .highlight .nd { color: #7f4707 } /* Name.Decorator */
-html[data-theme="light"] .highlight .ni { color: #00622f } /* Name.Entity */
-html[data-theme="light"] .highlight .ne { color: #6730c5 } /* Name.Exception */
-html[data-theme="light"] .highlight .nf { color: #005b82 } /* Name.Function */
-html[data-theme="light"] .highlight .nl { color: #7f4707 } /* Name.Label */
-html[data-theme="light"] .highlight .nn { color: #080808 } /* Name.Namespace */
-html[data-theme="light"] .highlight .nx { color: #080808 } /* Name.Other */
-html[data-theme="light"] .highlight .py { color: #005b82 } /* Name.Property */
-html[data-theme="light"] .highlight .nt { color: #005b82 } /* Name.Tag */
-html[data-theme="light"] .highlight .nv { color: #a12236 } /* Name.Variable */
-html[data-theme="light"] .highlight .ow { color: #6730c5 } /* Operator.Word */
-html[data-theme="light"] .highlight .pm { color: #080808 } /* Punctuation.Marker */
-html[data-theme="light"] .highlight .w { color: #080808 } /* Text.Whitespace */
-html[data-theme="light"] .highlight .mb { color: #7f4707 } /* Literal.Number.Bin */
-html[data-theme="light"] .highlight .mf { color: #7f4707 } /* Literal.Number.Float */
-html[data-theme="light"] .highlight .mh { color: #7f4707 } /* Literal.Number.Hex */
-html[data-theme="light"] .highlight .mi { color: #7f4707 } /* Literal.Number.Integer */
-html[data-theme="light"] .highlight .mo { color: #7f4707 } /* Literal.Number.Oct */
-html[data-theme="light"] .highlight .sa { color: #00622f } /* Literal.String.Affix */
-html[data-theme="light"] .highlight .sb { color: #00622f } /* Literal.String.Backtick */
-html[data-theme="light"] .highlight .sc { color: #00622f } /* Literal.String.Char */
-html[data-theme="light"] .highlight .dl { color: #00622f } /* Literal.String.Delimiter */
-html[data-theme="light"] .highlight .sd { color: #00622f } /* Literal.String.Doc */
-html[data-theme="light"] .highlight .s2 { color: #00622f } /* Literal.String.Double */
-html[data-theme="light"] .highlight .se { color: #00622f } /* Literal.String.Escape */
-html[data-theme="light"] .highlight .sh { color: #00622f } /* Literal.String.Heredoc */
-html[data-theme="light"] .highlight .si { color: #00622f } /* Literal.String.Interpol */
-html[data-theme="light"] .highlight .sx { color: #00622f } /* Literal.String.Other */
-html[data-theme="light"] .highlight .sr { color: #a12236 } /* Literal.String.Regex */
-html[data-theme="light"] .highlight .s1 { color: #00622f } /* Literal.String.Single */
-html[data-theme="light"] .highlight .ss { color: #005b82 } /* Literal.String.Symbol */
-html[data-theme="light"] .highlight .bp { color: #7f4707 } /* Name.Builtin.Pseudo */
-html[data-theme="light"] .highlight .fm { color: #005b82 } /* Name.Function.Magic */
-html[data-theme="light"] .highlight .vc { color: #a12236 } /* Name.Variable.Class */
-html[data-theme="light"] .highlight .vg { color: #a12236 } /* Name.Variable.Global */
-html[data-theme="light"] .highlight .vi { color: #a12236 } /* Name.Variable.Instance */
-html[data-theme="light"] .highlight .vm { color: #7f4707 } /* Name.Variable.Magic */
-html[data-theme="light"] .highlight .il { color: #7f4707 } /* Literal.Number.Integer.Long */
-html[data-theme="dark"] .highlight pre { line-height: 125%; }
-html[data-theme="dark"] .highlight td.linenos .normal { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; }
-html[data-theme="dark"] .highlight span.linenos { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; }
-html[data-theme="dark"] .highlight td.linenos .special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; }
-html[data-theme="dark"] .highlight span.linenos.special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; }
-html[data-theme="dark"] .highlight .hll { background-color: #ffd9002e }
-html[data-theme="dark"] .highlight { background: #2b2b2b; color: #f8f8f2 }
-html[data-theme="dark"] .highlight .c { color: #ffd900 } /* Comment */
-html[data-theme="dark"] .highlight .err { color: #ffa07a } /* Error */
-html[data-theme="dark"] .highlight .k { color: #dcc6e0 } /* Keyword */
-html[data-theme="dark"] .highlight .l { color: #ffd900 } /* Literal */
-html[data-theme="dark"] .highlight .n { color: #f8f8f2 } /* Name */
-html[data-theme="dark"] .highlight .o { color: #abe338 } /* Operator */
-html[data-theme="dark"] .highlight .p { color: #f8f8f2 } /* Punctuation */
-html[data-theme="dark"] .highlight .ch { color: #ffd900 } /* Comment.Hashbang */
-html[data-theme="dark"] .highlight .cm { color: #ffd900 } /* Comment.Multiline */
-html[data-theme="dark"] .highlight .cp { color: #ffd900 } /* Comment.Preproc */
-html[data-theme="dark"] .highlight .cpf { color: #ffd900 } /* Comment.PreprocFile */
-html[data-theme="dark"] .highlight .c1 { color: #ffd900 } /* Comment.Single */
-html[data-theme="dark"] .highlight .cs { color: #ffd900 } /* Comment.Special */
-html[data-theme="dark"] .highlight .gd { color: #00e0e0 } /* Generic.Deleted */
-html[data-theme="dark"] .highlight .ge { font-style: italic } /* Generic.Emph */
-html[data-theme="dark"] .highlight .gh { color: #00e0e0 } /* Generic.Heading */
-html[data-theme="dark"] .highlight .gs { font-weight: bold } /* Generic.Strong */
-html[data-theme="dark"] .highlight .gu { color: #00e0e0 } /* Generic.Subheading */
-html[data-theme="dark"] .highlight .kc { color: #dcc6e0 } /* Keyword.Constant */
-html[data-theme="dark"] .highlight .kd { color: #dcc6e0 } /* Keyword.Declaration */
-html[data-theme="dark"] .highlight .kn { color: #dcc6e0 } /* Keyword.Namespace */
-html[data-theme="dark"] .highlight .kp { color: #dcc6e0 } /* Keyword.Pseudo */
-html[data-theme="dark"] .highlight .kr { color: #dcc6e0 } /* Keyword.Reserved */
-html[data-theme="dark"] .highlight .kt { color: #ffd900 } /* Keyword.Type */
-html[data-theme="dark"] .highlight .ld { color: #ffd900 } /* Literal.Date */
-html[data-theme="dark"] .highlight .m { color: #ffd900 } /* Literal.Number */
-html[data-theme="dark"] .highlight .s { color: #abe338 } /* Literal.String */
-html[data-theme="dark"] .highlight .na { color: #ffd900 } /* Name.Attribute */
-html[data-theme="dark"] .highlight .nb { color: #ffd900 } /* Name.Builtin */
-html[data-theme="dark"] .highlight .nc { color: #00e0e0 } /* Name.Class */
-html[data-theme="dark"] .highlight .no { color: #00e0e0 } /* Name.Constant */
-html[data-theme="dark"] .highlight .nd { color: #ffd900 } /* Name.Decorator */
-html[data-theme="dark"] .highlight .ni { color: #abe338 } /* Name.Entity */
-html[data-theme="dark"] .highlight .ne { color: #dcc6e0 } /* Name.Exception */
-html[data-theme="dark"] .highlight .nf { color: #00e0e0 } /* Name.Function */
-html[data-theme="dark"] .highlight .nl { color: #ffd900 } /* Name.Label */
-html[data-theme="dark"] .highlight .nn { color: #f8f8f2 } /* Name.Namespace */
-html[data-theme="dark"] .highlight .nx { color: #f8f8f2 } /* Name.Other */
-html[data-theme="dark"] .highlight .py { color: #00e0e0 } /* Name.Property */
-html[data-theme="dark"] .highlight .nt { color: #00e0e0 } /* Name.Tag */
-html[data-theme="dark"] .highlight .nv { color: #ffa07a } /* Name.Variable */
-html[data-theme="dark"] .highlight .ow { color: #dcc6e0 } /* Operator.Word */
-html[data-theme="dark"] .highlight .pm { color: #f8f8f2 } /* Punctuation.Marker */
-html[data-theme="dark"] .highlight .w { color: #f8f8f2 } /* Text.Whitespace */
-html[data-theme="dark"] .highlight .mb { color: #ffd900 } /* Literal.Number.Bin */
-html[data-theme="dark"] .highlight .mf { color: #ffd900 } /* Literal.Number.Float */
-html[data-theme="dark"] .highlight .mh { color: #ffd900 } /* Literal.Number.Hex */
-html[data-theme="dark"] .highlight .mi { color: #ffd900 } /* Literal.Number.Integer */
-html[data-theme="dark"] .highlight .mo { color: #ffd900 } /* Literal.Number.Oct */
-html[data-theme="dark"] .highlight .sa { color: #abe338 } /* Literal.String.Affix */
-html[data-theme="dark"] .highlight .sb { color: #abe338 } /* Literal.String.Backtick */
-html[data-theme="dark"] .highlight .sc { color: #abe338 } /* Literal.String.Char */
-html[data-theme="dark"] .highlight .dl { color: #abe338 } /* Literal.String.Delimiter */
-html[data-theme="dark"] .highlight .sd { color: #abe338 } /* Literal.String.Doc */
-html[data-theme="dark"] .highlight .s2 { color: #abe338 } /* Literal.String.Double */
-html[data-theme="dark"] .highlight .se { color: #abe338 } /* Literal.String.Escape */
-html[data-theme="dark"] .highlight .sh { color: #abe338 } /* Literal.String.Heredoc */
-html[data-theme="dark"] .highlight .si { color: #abe338 } /* Literal.String.Interpol */
-html[data-theme="dark"] .highlight .sx { color: #abe338 } /* Literal.String.Other */
-html[data-theme="dark"] .highlight .sr { color: #ffa07a } /* Literal.String.Regex */
-html[data-theme="dark"] .highlight .s1 { color: #abe338 } /* Literal.String.Single */
-html[data-theme="dark"] .highlight .ss { color: #00e0e0 } /* Literal.String.Symbol */
-html[data-theme="dark"] .highlight .bp { color: #ffd900 } /* Name.Builtin.Pseudo */
-html[data-theme="dark"] .highlight .fm { color: #00e0e0 } /* Name.Function.Magic */
-html[data-theme="dark"] .highlight .vc { color: #ffa07a } /* Name.Variable.Class */
-html[data-theme="dark"] .highlight .vg { color: #ffa07a } /* Name.Variable.Global */
-html[data-theme="dark"] .highlight .vi { color: #ffa07a } /* Name.Variable.Instance */
-html[data-theme="dark"] .highlight .vm { color: #ffd900 } /* Name.Variable.Magic */
-html[data-theme="dark"] .highlight .il { color: #ffd900 } /* Literal.Number.Integer.Long */
\ No newline at end of file
+html[data-theme="light"] .highlight pre { line-height: 125%; }
+html[data-theme="light"] .highlight td.linenos .normal { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; }
+html[data-theme="light"] .highlight span.linenos { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; }
+html[data-theme="light"] .highlight td.linenos .special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; }
+html[data-theme="light"] .highlight span.linenos.special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; }
+html[data-theme="light"] .highlight .hll { background-color: #fae4c2 }
+html[data-theme="light"] .highlight { background: #fefefe; color: #080808 }
+html[data-theme="light"] .highlight .c { color: #515151 } /* Comment */
+html[data-theme="light"] .highlight .err { color: #A12236 } /* Error */
+html[data-theme="light"] .highlight .k { color: #6730C5 } /* Keyword */
+html[data-theme="light"] .highlight .l { color: #7F4707 } /* Literal */
+html[data-theme="light"] .highlight .n { color: #080808 } /* Name */
+html[data-theme="light"] .highlight .o { color: #00622F } /* Operator */
+html[data-theme="light"] .highlight .p { color: #080808 } /* Punctuation */
+html[data-theme="light"] .highlight .ch { color: #515151 } /* Comment.Hashbang */
+html[data-theme="light"] .highlight .cm { color: #515151 } /* Comment.Multiline */
+html[data-theme="light"] .highlight .cp { color: #515151 } /* Comment.Preproc */
+html[data-theme="light"] .highlight .cpf { color: #515151 } /* Comment.PreprocFile */
+html[data-theme="light"] .highlight .c1 { color: #515151 } /* Comment.Single */
+html[data-theme="light"] .highlight .cs { color: #515151 } /* Comment.Special */
+html[data-theme="light"] .highlight .gd { color: #005B82 } /* Generic.Deleted */
+html[data-theme="light"] .highlight .ge { font-style: italic } /* Generic.Emph */
+html[data-theme="light"] .highlight .gh { color: #005B82 } /* Generic.Heading */
+html[data-theme="light"] .highlight .gs { font-weight: bold } /* Generic.Strong */
+html[data-theme="light"] .highlight .gu { color: #005B82 } /* Generic.Subheading */
+html[data-theme="light"] .highlight .kc { color: #6730C5 } /* Keyword.Constant */
+html[data-theme="light"] .highlight .kd { color: #6730C5 } /* Keyword.Declaration */
+html[data-theme="light"] .highlight .kn { color: #6730C5 } /* Keyword.Namespace */
+html[data-theme="light"] .highlight .kp { color: #6730C5 } /* Keyword.Pseudo */
+html[data-theme="light"] .highlight .kr { color: #6730C5 } /* Keyword.Reserved */
+html[data-theme="light"] .highlight .kt { color: #7F4707 } /* Keyword.Type */
+html[data-theme="light"] .highlight .ld { color: #7F4707 } /* Literal.Date */
+html[data-theme="light"] .highlight .m { color: #7F4707 } /* Literal.Number */
+html[data-theme="light"] .highlight .s { color: #00622F } /* Literal.String */
+html[data-theme="light"] .highlight .na { color: #912583 } /* Name.Attribute */
+html[data-theme="light"] .highlight .nb { color: #7F4707 } /* Name.Builtin */
+html[data-theme="light"] .highlight .nc { color: #005B82 } /* Name.Class */
+html[data-theme="light"] .highlight .no { color: #005B82 } /* Name.Constant */
+html[data-theme="light"] .highlight .nd { color: #7F4707 } /* Name.Decorator */
+html[data-theme="light"] .highlight .ni { color: #00622F } /* Name.Entity */
+html[data-theme="light"] .highlight .ne { color: #6730C5 } /* Name.Exception */
+html[data-theme="light"] .highlight .nf { color: #005B82 } /* Name.Function */
+html[data-theme="light"] .highlight .nl { color: #7F4707 } /* Name.Label */
+html[data-theme="light"] .highlight .nn { color: #080808 } /* Name.Namespace */
+html[data-theme="light"] .highlight .nx { color: #080808 } /* Name.Other */
+html[data-theme="light"] .highlight .py { color: #005B82 } /* Name.Property */
+html[data-theme="light"] .highlight .nt { color: #005B82 } /* Name.Tag */
+html[data-theme="light"] .highlight .nv { color: #A12236 } /* Name.Variable */
+html[data-theme="light"] .highlight .ow { color: #6730C5 } /* Operator.Word */
+html[data-theme="light"] .highlight .pm { color: #080808 } /* Punctuation.Marker */
+html[data-theme="light"] .highlight .w { color: #080808 } /* Text.Whitespace */
+html[data-theme="light"] .highlight .mb { color: #7F4707 } /* Literal.Number.Bin */
+html[data-theme="light"] .highlight .mf { color: #7F4707 } /* Literal.Number.Float */
+html[data-theme="light"] .highlight .mh { color: #7F4707 } /* Literal.Number.Hex */
+html[data-theme="light"] .highlight .mi { color: #7F4707 } /* Literal.Number.Integer */
+html[data-theme="light"] .highlight .mo { color: #7F4707 } /* Literal.Number.Oct */
+html[data-theme="light"] .highlight .sa { color: #00622F } /* Literal.String.Affix */
+html[data-theme="light"] .highlight .sb { color: #00622F } /* Literal.String.Backtick */
+html[data-theme="light"] .highlight .sc { color: #00622F } /* Literal.String.Char */
+html[data-theme="light"] .highlight .dl { color: #00622F } /* Literal.String.Delimiter */
+html[data-theme="light"] .highlight .sd { color: #00622F } /* Literal.String.Doc */
+html[data-theme="light"] .highlight .s2 { color: #00622F } /* Literal.String.Double */
+html[data-theme="light"] .highlight .se { color: #00622F } /* Literal.String.Escape */
+html[data-theme="light"] .highlight .sh { color: #00622F } /* Literal.String.Heredoc */
+html[data-theme="light"] .highlight .si { color: #00622F } /* Literal.String.Interpol */
+html[data-theme="light"] .highlight .sx { color: #00622F } /* Literal.String.Other */
+html[data-theme="light"] .highlight .sr { color: #A12236 } /* Literal.String.Regex */
+html[data-theme="light"] .highlight .s1 { color: #00622F } /* Literal.String.Single */
+html[data-theme="light"] .highlight .ss { color: #005B82 } /* Literal.String.Symbol */
+html[data-theme="light"] .highlight .bp { color: #7F4707 } /* Name.Builtin.Pseudo */
+html[data-theme="light"] .highlight .fm { color: #005B82 } /* Name.Function.Magic */
+html[data-theme="light"] .highlight .vc { color: #A12236 } /* Name.Variable.Class */
+html[data-theme="light"] .highlight .vg { color: #A12236 } /* Name.Variable.Global */
+html[data-theme="light"] .highlight .vi { color: #A12236 } /* Name.Variable.Instance */
+html[data-theme="light"] .highlight .vm { color: #7F4707 } /* Name.Variable.Magic */
+html[data-theme="light"] .highlight .il { color: #7F4707 } /* Literal.Number.Integer.Long */
+html[data-theme="dark"] .highlight pre { line-height: 125%; }
+html[data-theme="dark"] .highlight td.linenos .normal { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; }
+html[data-theme="dark"] .highlight span.linenos { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; }
+html[data-theme="dark"] .highlight td.linenos .special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; }
+html[data-theme="dark"] .highlight span.linenos.special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; }
+html[data-theme="dark"] .highlight .hll { background-color: #ffd9002e }
+html[data-theme="dark"] .highlight { background: #2b2b2b; color: #F8F8F2 }
+html[data-theme="dark"] .highlight .c { color: #FFD900 } /* Comment */
+html[data-theme="dark"] .highlight .err { color: #FFA07A } /* Error */
+html[data-theme="dark"] .highlight .k { color: #DCC6E0 } /* Keyword */
+html[data-theme="dark"] .highlight .l { color: #FFD900 } /* Literal */
+html[data-theme="dark"] .highlight .n { color: #F8F8F2 } /* Name */
+html[data-theme="dark"] .highlight .o { color: #ABE338 } /* Operator */
+html[data-theme="dark"] .highlight .p { color: #F8F8F2 } /* Punctuation */
+html[data-theme="dark"] .highlight .ch { color: #FFD900 } /* Comment.Hashbang */
+html[data-theme="dark"] .highlight .cm { color: #FFD900 } /* Comment.Multiline */
+html[data-theme="dark"] .highlight .cp { color: #FFD900 } /* Comment.Preproc */
+html[data-theme="dark"] .highlight .cpf { color: #FFD900 } /* Comment.PreprocFile */
+html[data-theme="dark"] .highlight .c1 { color: #FFD900 } /* Comment.Single */
+html[data-theme="dark"] .highlight .cs { color: #FFD900 } /* Comment.Special */
+html[data-theme="dark"] .highlight .gd { color: #00E0E0 } /* Generic.Deleted */
+html[data-theme="dark"] .highlight .ge { font-style: italic } /* Generic.Emph */
+html[data-theme="dark"] .highlight .gh { color: #00E0E0 } /* Generic.Heading */
+html[data-theme="dark"] .highlight .gs { font-weight: bold } /* Generic.Strong */
+html[data-theme="dark"] .highlight .gu { color: #00E0E0 } /* Generic.Subheading */
+html[data-theme="dark"] .highlight .kc { color: #DCC6E0 } /* Keyword.Constant */
+html[data-theme="dark"] .highlight .kd { color: #DCC6E0 } /* Keyword.Declaration */
+html[data-theme="dark"] .highlight .kn { color: #DCC6E0 } /* Keyword.Namespace */
+html[data-theme="dark"] .highlight .kp { color: #DCC6E0 } /* Keyword.Pseudo */
+html[data-theme="dark"] .highlight .kr { color: #DCC6E0 } /* Keyword.Reserved */
+html[data-theme="dark"] .highlight .kt { color: #FFD900 } /* Keyword.Type */
+html[data-theme="dark"] .highlight .ld { color: #FFD900 } /* Literal.Date */
+html[data-theme="dark"] .highlight .m { color: #FFD900 } /* Literal.Number */
+html[data-theme="dark"] .highlight .s { color: #ABE338 } /* Literal.String */
+html[data-theme="dark"] .highlight .na { color: #FFD900 } /* Name.Attribute */
+html[data-theme="dark"] .highlight .nb { color: #FFD900 } /* Name.Builtin */
+html[data-theme="dark"] .highlight .nc { color: #00E0E0 } /* Name.Class */
+html[data-theme="dark"] .highlight .no { color: #00E0E0 } /* Name.Constant */
+html[data-theme="dark"] .highlight .nd { color: #FFD900 } /* Name.Decorator */
+html[data-theme="dark"] .highlight .ni { color: #ABE338 } /* Name.Entity */
+html[data-theme="dark"] .highlight .ne { color: #DCC6E0 } /* Name.Exception */
+html[data-theme="dark"] .highlight .nf { color: #00E0E0 } /* Name.Function */
+html[data-theme="dark"] .highlight .nl { color: #FFD900 } /* Name.Label */
+html[data-theme="dark"] .highlight .nn { color: #F8F8F2 } /* Name.Namespace */
+html[data-theme="dark"] .highlight .nx { color: #F8F8F2 } /* Name.Other */
+html[data-theme="dark"] .highlight .py { color: #00E0E0 } /* Name.Property */
+html[data-theme="dark"] .highlight .nt { color: #00E0E0 } /* Name.Tag */
+html[data-theme="dark"] .highlight .nv { color: #FFA07A } /* Name.Variable */
+html[data-theme="dark"] .highlight .ow { color: #DCC6E0 } /* Operator.Word */
+html[data-theme="dark"] .highlight .pm { color: #F8F8F2 } /* Punctuation.Marker */
+html[data-theme="dark"] .highlight .w { color: #F8F8F2 } /* Text.Whitespace */
+html[data-theme="dark"] .highlight .mb { color: #FFD900 } /* Literal.Number.Bin */
+html[data-theme="dark"] .highlight .mf { color: #FFD900 } /* Literal.Number.Float */
+html[data-theme="dark"] .highlight .mh { color: #FFD900 } /* Literal.Number.Hex */
+html[data-theme="dark"] .highlight .mi { color: #FFD900 } /* Literal.Number.Integer */
+html[data-theme="dark"] .highlight .mo { color: #FFD900 } /* Literal.Number.Oct */
+html[data-theme="dark"] .highlight .sa { color: #ABE338 } /* Literal.String.Affix */
+html[data-theme="dark"] .highlight .sb { color: #ABE338 } /* Literal.String.Backtick */
+html[data-theme="dark"] .highlight .sc { color: #ABE338 } /* Literal.String.Char */
+html[data-theme="dark"] .highlight .dl { color: #ABE338 } /* Literal.String.Delimiter */
+html[data-theme="dark"] .highlight .sd { color: #ABE338 } /* Literal.String.Doc */
+html[data-theme="dark"] .highlight .s2 { color: #ABE338 } /* Literal.String.Double */
+html[data-theme="dark"] .highlight .se { color: #ABE338 } /* Literal.String.Escape */
+html[data-theme="dark"] .highlight .sh { color: #ABE338 } /* Literal.String.Heredoc */
+html[data-theme="dark"] .highlight .si { color: #ABE338 } /* Literal.String.Interpol */
+html[data-theme="dark"] .highlight .sx { color: #ABE338 } /* Literal.String.Other */
+html[data-theme="dark"] .highlight .sr { color: #FFA07A } /* Literal.String.Regex */
+html[data-theme="dark"] .highlight .s1 { color: #ABE338 } /* Literal.String.Single */
+html[data-theme="dark"] .highlight .ss { color: #00E0E0 } /* Literal.String.Symbol */
+html[data-theme="dark"] .highlight .bp { color: #FFD900 } /* Name.Builtin.Pseudo */
+html[data-theme="dark"] .highlight .fm { color: #00E0E0 } /* Name.Function.Magic */
+html[data-theme="dark"] .highlight .vc { color: #FFA07A } /* Name.Variable.Class */
+html[data-theme="dark"] .highlight .vg { color: #FFA07A } /* Name.Variable.Global */
+html[data-theme="dark"] .highlight .vi { color: #FFA07A } /* Name.Variable.Instance */
+html[data-theme="dark"] .highlight .vm { color: #FFD900 } /* Name.Variable.Magic */
+html[data-theme="dark"] .highlight .il { color: #FFD900 } /* Literal.Number.Integer.Long */
\ No newline at end of file
diff --git a/_static/sbt-webpack-macros.html b/_static/sbt-webpack-macros.html
old mode 100644
new mode 100755
diff --git a/_static/scripts/bootstrap.js b/_static/scripts/bootstrap.js
old mode 100644
new mode 100755
diff --git a/_static/scripts/bootstrap.js.LICENSE.txt b/_static/scripts/bootstrap.js.LICENSE.txt
old mode 100644
new mode 100755
diff --git a/_static/scripts/bootstrap.js.map b/_static/scripts/bootstrap.js.map
old mode 100644
new mode 100755
diff --git a/_static/scripts/pydata-sphinx-theme.js b/_static/scripts/pydata-sphinx-theme.js
old mode 100644
new mode 100755
diff --git a/_static/scripts/pydata-sphinx-theme.js.map b/_static/scripts/pydata-sphinx-theme.js.map
old mode 100644
new mode 100755
diff --git a/_static/scripts/sphinx-book-theme.js b/_static/scripts/sphinx-book-theme.js
old mode 100644
new mode 100755
diff --git a/_static/scripts/sphinx-book-theme.js.map b/_static/scripts/sphinx-book-theme.js.map
old mode 100644
new mode 100755
diff --git a/_static/searchtools.js b/_static/searchtools.js
old mode 100644
new mode 100755
diff --git a/_static/sphinx-book-theme.40e2e510f6b7d1648584402491bb10fe.css b/_static/sphinx-book-theme.40e2e510f6b7d1648584402491bb10fe.css
old mode 100644
new mode 100755
diff --git a/_static/sphinx-book-theme.css b/_static/sphinx-book-theme.css
old mode 100644
new mode 100755
diff --git a/_static/sphinx-book-theme.d31b09fe5c1d09cb49b26a786de4a05d.js b/_static/sphinx-book-theme.d31b09fe5c1d09cb49b26a786de4a05d.js
old mode 100644
new mode 100755
diff --git a/_static/sphinx-book-theme.d59cb220de22ca1c485ebbdc042f0030.js b/_static/sphinx-book-theme.d59cb220de22ca1c485ebbdc042f0030.js
old mode 100644
new mode 100755
diff --git a/_static/sphinx-book-theme.e2363ea40746bee74734a24ffefccd78.css b/_static/sphinx-book-theme.e2363ea40746bee74734a24ffefccd78.css
old mode 100644
new mode 100755
diff --git a/_static/sphinx-design.min.css b/_static/sphinx-design.min.css
old mode 100644
new mode 100755
diff --git a/_static/sphinx-thebe.css b/_static/sphinx-thebe.css
old mode 100644
new mode 100755
diff --git a/_static/sphinx-thebe.js b/_static/sphinx-thebe.js
old mode 100644
new mode 100755
diff --git a/_static/sphinx_highlight.js b/_static/sphinx_highlight.js
old mode 100644
new mode 100755
diff --git a/_static/styles/bootstrap.css b/_static/styles/bootstrap.css
old mode 100644
new mode 100755
diff --git a/_static/styles/bootstrap.css.map b/_static/styles/bootstrap.css.map
old mode 100644
new mode 100755
diff --git a/_static/styles/pydata-sphinx-theme.css b/_static/styles/pydata-sphinx-theme.css
old mode 100644
new mode 100755
diff --git a/_static/styles/pydata-sphinx-theme.css.map b/_static/styles/pydata-sphinx-theme.css.map
old mode 100644
new mode 100755
diff --git a/_static/styles/sphinx-book-theme.css b/_static/styles/sphinx-book-theme.css
old mode 100644
new mode 100755
diff --git a/_static/styles/sphinx-book-theme.css.map b/_static/styles/sphinx-book-theme.css.map
old mode 100644
new mode 100755
diff --git a/_static/styles/theme.css b/_static/styles/theme.css
old mode 100644
new mode 100755
diff --git a/_static/togglebutton.css b/_static/togglebutton.css
old mode 100644
new mode 100755
diff --git a/_static/togglebutton.js b/_static/togglebutton.js
old mode 100644
new mode 100755
diff --git a/_static/underscore-1.13.1.js b/_static/underscore-1.13.1.js
old mode 100644
new mode 100755
diff --git a/_static/underscore-1.3.1.js b/_static/underscore-1.3.1.js
old mode 100644
new mode 100755
diff --git a/_static/underscore.js b/_static/underscore.js
old mode 100644
new mode 100755
diff --git a/_static/vendor/fontawesome/5.13.0/LICENSE.txt b/_static/vendor/fontawesome/5.13.0/LICENSE.txt
old mode 100644
new mode 100755
diff --git a/_static/vendor/fontawesome/5.13.0/css/all.min.css b/_static/vendor/fontawesome/5.13.0/css/all.min.css
old mode 100644
new mode 100755
diff --git a/_static/vendor/fontawesome/5.13.0/webfonts/fa-brands-400.eot b/_static/vendor/fontawesome/5.13.0/webfonts/fa-brands-400.eot
old mode 100644
new mode 100755
diff --git a/_static/vendor/fontawesome/5.13.0/webfonts/fa-brands-400.svg b/_static/vendor/fontawesome/5.13.0/webfonts/fa-brands-400.svg
old mode 100644
new mode 100755
diff --git a/_static/vendor/fontawesome/5.13.0/webfonts/fa-brands-400.ttf b/_static/vendor/fontawesome/5.13.0/webfonts/fa-brands-400.ttf
old mode 100644
new mode 100755
diff --git a/_static/vendor/fontawesome/5.13.0/webfonts/fa-brands-400.woff b/_static/vendor/fontawesome/5.13.0/webfonts/fa-brands-400.woff
old mode 100644
new mode 100755
diff --git a/_static/vendor/fontawesome/5.13.0/webfonts/fa-brands-400.woff2 b/_static/vendor/fontawesome/5.13.0/webfonts/fa-brands-400.woff2
old mode 100644
new mode 100755
diff --git a/_static/vendor/fontawesome/5.13.0/webfonts/fa-regular-400.eot b/_static/vendor/fontawesome/5.13.0/webfonts/fa-regular-400.eot
old mode 100644
new mode 100755
diff --git a/_static/vendor/fontawesome/5.13.0/webfonts/fa-regular-400.svg b/_static/vendor/fontawesome/5.13.0/webfonts/fa-regular-400.svg
old mode 100644
new mode 100755
diff --git a/_static/vendor/fontawesome/5.13.0/webfonts/fa-regular-400.ttf b/_static/vendor/fontawesome/5.13.0/webfonts/fa-regular-400.ttf
old mode 100644
new mode 100755
diff --git a/_static/vendor/fontawesome/5.13.0/webfonts/fa-regular-400.woff b/_static/vendor/fontawesome/5.13.0/webfonts/fa-regular-400.woff
old mode 100644
new mode 100755
diff --git a/_static/vendor/fontawesome/5.13.0/webfonts/fa-regular-400.woff2 b/_static/vendor/fontawesome/5.13.0/webfonts/fa-regular-400.woff2
old mode 100644
new mode 100755
diff --git a/_static/vendor/fontawesome/5.13.0/webfonts/fa-solid-900.eot b/_static/vendor/fontawesome/5.13.0/webfonts/fa-solid-900.eot
old mode 100644
new mode 100755
diff --git a/_static/vendor/fontawesome/5.13.0/webfonts/fa-solid-900.svg b/_static/vendor/fontawesome/5.13.0/webfonts/fa-solid-900.svg
old mode 100644
new mode 100755
diff --git a/_static/vendor/fontawesome/5.13.0/webfonts/fa-solid-900.ttf b/_static/vendor/fontawesome/5.13.0/webfonts/fa-solid-900.ttf
old mode 100644
new mode 100755
diff --git a/_static/vendor/fontawesome/5.13.0/webfonts/fa-solid-900.woff b/_static/vendor/fontawesome/5.13.0/webfonts/fa-solid-900.woff
old mode 100644
new mode 100755
diff --git a/_static/vendor/fontawesome/5.13.0/webfonts/fa-solid-900.woff2 b/_static/vendor/fontawesome/5.13.0/webfonts/fa-solid-900.woff2
old mode 100644
new mode 100755
diff --git a/_static/vendor/fontawesome/6.1.2/LICENSE.txt b/_static/vendor/fontawesome/6.1.2/LICENSE.txt
old mode 100644
new mode 100755
diff --git a/_static/vendor/fontawesome/6.1.2/css/all.min.css b/_static/vendor/fontawesome/6.1.2/css/all.min.css
old mode 100644
new mode 100755
diff --git a/_static/vendor/fontawesome/6.1.2/js/all.min.js b/_static/vendor/fontawesome/6.1.2/js/all.min.js
old mode 100644
new mode 100755
diff --git a/_static/vendor/fontawesome/6.1.2/js/all.min.js.LICENSE.txt b/_static/vendor/fontawesome/6.1.2/js/all.min.js.LICENSE.txt
old mode 100644
new mode 100755
diff --git a/_static/vendor/fontawesome/6.1.2/webfonts/fa-brands-400.ttf b/_static/vendor/fontawesome/6.1.2/webfonts/fa-brands-400.ttf
old mode 100644
new mode 100755
diff --git a/_static/vendor/fontawesome/6.1.2/webfonts/fa-brands-400.woff2 b/_static/vendor/fontawesome/6.1.2/webfonts/fa-brands-400.woff2
old mode 100644
new mode 100755
diff --git a/_static/vendor/fontawesome/6.1.2/webfonts/fa-regular-400.ttf b/_static/vendor/fontawesome/6.1.2/webfonts/fa-regular-400.ttf
old mode 100644
new mode 100755
diff --git a/_static/vendor/fontawesome/6.1.2/webfonts/fa-regular-400.woff2 b/_static/vendor/fontawesome/6.1.2/webfonts/fa-regular-400.woff2
old mode 100644
new mode 100755
diff --git a/_static/vendor/fontawesome/6.1.2/webfonts/fa-solid-900.ttf b/_static/vendor/fontawesome/6.1.2/webfonts/fa-solid-900.ttf
old mode 100644
new mode 100755
diff --git a/_static/vendor/fontawesome/6.1.2/webfonts/fa-solid-900.woff2 b/_static/vendor/fontawesome/6.1.2/webfonts/fa-solid-900.woff2
old mode 100644
new mode 100755
diff --git a/_static/vendor/fontawesome/6.1.2/webfonts/fa-v4compatibility.ttf b/_static/vendor/fontawesome/6.1.2/webfonts/fa-v4compatibility.ttf
old mode 100644
new mode 100755
diff --git a/_static/vendor/fontawesome/6.1.2/webfonts/fa-v4compatibility.woff2 b/_static/vendor/fontawesome/6.1.2/webfonts/fa-v4compatibility.woff2
old mode 100644
new mode 100755
diff --git a/_static/vendor/fontawesome/6.5.1/LICENSE.txt b/_static/vendor/fontawesome/6.5.1/LICENSE.txt
old mode 100644
new mode 100755
diff --git a/_static/vendor/fontawesome/6.5.1/css/all.min.css b/_static/vendor/fontawesome/6.5.1/css/all.min.css
old mode 100644
new mode 100755
diff --git a/_static/vendor/fontawesome/6.5.1/js/all.min.js b/_static/vendor/fontawesome/6.5.1/js/all.min.js
old mode 100644
new mode 100755
diff --git a/_static/vendor/fontawesome/6.5.1/js/all.min.js.LICENSE.txt b/_static/vendor/fontawesome/6.5.1/js/all.min.js.LICENSE.txt
old mode 100644
new mode 100755
diff --git a/_static/vendor/fontawesome/6.5.1/webfonts/fa-brands-400.ttf b/_static/vendor/fontawesome/6.5.1/webfonts/fa-brands-400.ttf
old mode 100644
new mode 100755
diff --git a/_static/vendor/fontawesome/6.5.1/webfonts/fa-brands-400.woff2 b/_static/vendor/fontawesome/6.5.1/webfonts/fa-brands-400.woff2
old mode 100644
new mode 100755
diff --git a/_static/vendor/fontawesome/6.5.1/webfonts/fa-regular-400.ttf b/_static/vendor/fontawesome/6.5.1/webfonts/fa-regular-400.ttf
old mode 100644
new mode 100755
diff --git a/_static/vendor/fontawesome/6.5.1/webfonts/fa-regular-400.woff2 b/_static/vendor/fontawesome/6.5.1/webfonts/fa-regular-400.woff2
old mode 100644
new mode 100755
diff --git a/_static/vendor/fontawesome/6.5.1/webfonts/fa-solid-900.ttf b/_static/vendor/fontawesome/6.5.1/webfonts/fa-solid-900.ttf
old mode 100644
new mode 100755
diff --git a/_static/vendor/fontawesome/6.5.1/webfonts/fa-solid-900.woff2 b/_static/vendor/fontawesome/6.5.1/webfonts/fa-solid-900.woff2
old mode 100644
new mode 100755
diff --git a/_static/vendor/fontawesome/6.5.1/webfonts/fa-v4compatibility.ttf b/_static/vendor/fontawesome/6.5.1/webfonts/fa-v4compatibility.ttf
old mode 100644
new mode 100755
diff --git a/_static/vendor/fontawesome/6.5.1/webfonts/fa-v4compatibility.woff2 b/_static/vendor/fontawesome/6.5.1/webfonts/fa-v4compatibility.woff2
old mode 100644
new mode 100755
diff --git a/_static/vendor/fontawesome/6.5.2/LICENSE.txt b/_static/vendor/fontawesome/6.5.2/LICENSE.txt
old mode 100644
new mode 100755
diff --git a/_static/vendor/fontawesome/6.5.2/css/all.min.css b/_static/vendor/fontawesome/6.5.2/css/all.min.css
old mode 100644
new mode 100755
diff --git a/_static/vendor/fontawesome/6.5.2/js/all.min.js b/_static/vendor/fontawesome/6.5.2/js/all.min.js
old mode 100644
new mode 100755
diff --git a/_static/vendor/fontawesome/6.5.2/js/all.min.js.LICENSE.txt b/_static/vendor/fontawesome/6.5.2/js/all.min.js.LICENSE.txt
old mode 100644
new mode 100755
diff --git a/_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.ttf b/_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.ttf
old mode 100644
new mode 100755
diff --git a/_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.woff2 b/_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.woff2
old mode 100644
new mode 100755
diff --git a/_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.ttf b/_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.ttf
old mode 100644
new mode 100755
diff --git a/_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.woff2 b/_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.woff2
old mode 100644
new mode 100755
diff --git a/_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.ttf b/_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.ttf
old mode 100644
new mode 100755
diff --git a/_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.woff2 b/_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.woff2
old mode 100644
new mode 100755
diff --git a/_static/vendor/fontawesome/6.5.2/webfonts/fa-v4compatibility.ttf b/_static/vendor/fontawesome/6.5.2/webfonts/fa-v4compatibility.ttf
old mode 100644
new mode 100755
diff --git a/_static/vendor/fontawesome/6.5.2/webfonts/fa-v4compatibility.woff2 b/_static/vendor/fontawesome/6.5.2/webfonts/fa-v4compatibility.woff2
old mode 100644
new mode 100755
diff --git a/_static/vendor/lato_latin-ext/1.44.1/LICENSE.md b/_static/vendor/lato_latin-ext/1.44.1/LICENSE.md
old mode 100644
new mode 100755
diff --git a/_static/vendor/lato_latin-ext/1.44.1/files/lato-latin-ext-100-italic.woff b/_static/vendor/lato_latin-ext/1.44.1/files/lato-latin-ext-100-italic.woff
old mode 100644
new mode 100755
diff --git a/_static/vendor/lato_latin-ext/1.44.1/files/lato-latin-ext-100-italic.woff2 b/_static/vendor/lato_latin-ext/1.44.1/files/lato-latin-ext-100-italic.woff2
old mode 100644
new mode 100755
diff --git a/_static/vendor/lato_latin-ext/1.44.1/files/lato-latin-ext-100.woff b/_static/vendor/lato_latin-ext/1.44.1/files/lato-latin-ext-100.woff
old mode 100644
new mode 100755
diff --git a/_static/vendor/lato_latin-ext/1.44.1/files/lato-latin-ext-100.woff2 b/_static/vendor/lato_latin-ext/1.44.1/files/lato-latin-ext-100.woff2
old mode 100644
new mode 100755
diff --git a/_static/vendor/lato_latin-ext/1.44.1/files/lato-latin-ext-300-italic.woff b/_static/vendor/lato_latin-ext/1.44.1/files/lato-latin-ext-300-italic.woff
old mode 100644
new mode 100755
diff --git a/_static/vendor/lato_latin-ext/1.44.1/files/lato-latin-ext-300-italic.woff2 b/_static/vendor/lato_latin-ext/1.44.1/files/lato-latin-ext-300-italic.woff2
old mode 100644
new mode 100755
diff --git a/_static/vendor/lato_latin-ext/1.44.1/files/lato-latin-ext-300.woff b/_static/vendor/lato_latin-ext/1.44.1/files/lato-latin-ext-300.woff
old mode 100644
new mode 100755
diff --git a/_static/vendor/lato_latin-ext/1.44.1/files/lato-latin-ext-300.woff2 b/_static/vendor/lato_latin-ext/1.44.1/files/lato-latin-ext-300.woff2
old mode 100644
new mode 100755
diff --git a/_static/vendor/lato_latin-ext/1.44.1/files/lato-latin-ext-400-italic.woff b/_static/vendor/lato_latin-ext/1.44.1/files/lato-latin-ext-400-italic.woff
old mode 100644
new mode 100755
diff --git a/_static/vendor/lato_latin-ext/1.44.1/files/lato-latin-ext-400-italic.woff2 b/_static/vendor/lato_latin-ext/1.44.1/files/lato-latin-ext-400-italic.woff2
old mode 100644
new mode 100755
diff --git a/_static/vendor/lato_latin-ext/1.44.1/files/lato-latin-ext-400.woff b/_static/vendor/lato_latin-ext/1.44.1/files/lato-latin-ext-400.woff
old mode 100644
new mode 100755
diff --git a/_static/vendor/lato_latin-ext/1.44.1/files/lato-latin-ext-400.woff2 b/_static/vendor/lato_latin-ext/1.44.1/files/lato-latin-ext-400.woff2
old mode 100644
new mode 100755
diff --git a/_static/vendor/lato_latin-ext/1.44.1/files/lato-latin-ext-700-italic.woff b/_static/vendor/lato_latin-ext/1.44.1/files/lato-latin-ext-700-italic.woff
old mode 100644
new mode 100755
diff --git a/_static/vendor/lato_latin-ext/1.44.1/files/lato-latin-ext-700-italic.woff2 b/_static/vendor/lato_latin-ext/1.44.1/files/lato-latin-ext-700-italic.woff2
old mode 100644
new mode 100755
diff --git a/_static/vendor/lato_latin-ext/1.44.1/files/lato-latin-ext-700.woff b/_static/vendor/lato_latin-ext/1.44.1/files/lato-latin-ext-700.woff
old mode 100644
new mode 100755
diff --git a/_static/vendor/lato_latin-ext/1.44.1/files/lato-latin-ext-700.woff2 b/_static/vendor/lato_latin-ext/1.44.1/files/lato-latin-ext-700.woff2
old mode 100644
new mode 100755
diff --git a/_static/vendor/lato_latin-ext/1.44.1/files/lato-latin-ext-900-italic.woff b/_static/vendor/lato_latin-ext/1.44.1/files/lato-latin-ext-900-italic.woff
old mode 100644
new mode 100755
diff --git a/_static/vendor/lato_latin-ext/1.44.1/files/lato-latin-ext-900-italic.woff2 b/_static/vendor/lato_latin-ext/1.44.1/files/lato-latin-ext-900-italic.woff2
old mode 100644
new mode 100755
diff --git a/_static/vendor/lato_latin-ext/1.44.1/files/lato-latin-ext-900.woff b/_static/vendor/lato_latin-ext/1.44.1/files/lato-latin-ext-900.woff
old mode 100644
new mode 100755
diff --git a/_static/vendor/lato_latin-ext/1.44.1/files/lato-latin-ext-900.woff2 b/_static/vendor/lato_latin-ext/1.44.1/files/lato-latin-ext-900.woff2
old mode 100644
new mode 100755
diff --git a/_static/vendor/lato_latin-ext/1.44.1/index.css b/_static/vendor/lato_latin-ext/1.44.1/index.css
old mode 100644
new mode 100755
diff --git a/_static/vendor/open-sans_all/1.44.1/LICENSE.md b/_static/vendor/open-sans_all/1.44.1/LICENSE.md
old mode 100644
new mode 100755
diff --git a/_static/vendor/open-sans_all/1.44.1/files/open-sans-all-400-italic.woff b/_static/vendor/open-sans_all/1.44.1/files/open-sans-all-400-italic.woff
old mode 100644
new mode 100755
diff --git a/_static/vendor/open-sans_all/1.44.1/files/open-sans-all-400-italic.woff2 b/_static/vendor/open-sans_all/1.44.1/files/open-sans-all-400-italic.woff2
old mode 100644
new mode 100755
diff --git a/_static/vendor/open-sans_all/1.44.1/files/open-sans-all-400.woff b/_static/vendor/open-sans_all/1.44.1/files/open-sans-all-400.woff
old mode 100644
new mode 100755
diff --git a/_static/vendor/open-sans_all/1.44.1/files/open-sans-all-400.woff2 b/_static/vendor/open-sans_all/1.44.1/files/open-sans-all-400.woff2
old mode 100644
new mode 100755
diff --git a/_static/vendor/open-sans_all/1.44.1/index.css b/_static/vendor/open-sans_all/1.44.1/index.css
old mode 100644
new mode 100755
diff --git a/_static/webpack-macros.html b/_static/webpack-macros.html
old mode 100644
new mode 100755
diff --git a/docs/experiments/js_exp.html b/docs/experiments/js_exp.html
old mode 100644
new mode 100755
index fdcb3357..69a573f8
--- a/docs/experiments/js_exp.html
+++ b/docs/experiments/js_exp.html
@@ -1,874 +1,894 @@
-
-
-<!DOCTYPE html>
-
-
-<html lang="en" data-content_root="" >
-
-  <head>
-    <meta charset="utf-8" />
-    <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.18.1: http://docutils.sourceforge.net/" />
-
-    <title>Synthetic Data with Stable Diffusion for Foliar Disease Classification &#8212; Text-to-Image Generation-feat-Diffusion</title>
-  
-  
-  
-  <script data-cfasync="false">
-    document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
-    document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
-  </script>
-  
-  <!-- Loaded before other Sphinx assets -->
-  <link href="../../_static/styles/theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-<link href="../../_static/styles/bootstrap.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-<link href="../../_static/styles/pydata-sphinx-theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-
-  
-  <link href="../../_static/vendor/fontawesome/6.5.2/css/all.min.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-  <link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.woff2" />
-<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.woff2" />
-<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.woff2" />
-
-    <link rel="stylesheet" type="text/css" href="../../_static/pygments.css" />
-    <link rel="stylesheet" href="../../_static/styles/sphinx-book-theme.css?digest=14f4ca6b54d191a8c7657f6c759bf11a5fb86285" type="text/css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/togglebutton.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/mystnb.4510f1fc1dee50b3e5859aac5469c37c29e427902b24a333a5f9fcb2f0b3ac41.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/sphinx-thebe.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/design-style.4045f2051d55cab465a707391d5b2007.min.css" />
-  
-  <!-- Pre-loaded scripts that we'll load fully later -->
-  <link rel="preload" as="script" href="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b" />
-<link rel="preload" as="script" href="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b" />
-  <script src="../../_static/vendor/fontawesome/6.5.2/js/all.min.js?digest=dfe6caa3a7d634c4db9b"></script>
-
-    <script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
-    <script src="../../_static/jquery.js"></script>
-    <script src="../../_static/underscore.js"></script>
-    <script src="../../_static/_sphinx_javascript_frameworks_compat.js"></script>
-    <script src="../../_static/doctools.js"></script>
-    <script src="../../_static/clipboard.min.js"></script>
-    <script src="../../_static/copybutton.js"></script>
-    <script src="../../_static/scripts/sphinx-book-theme.js?digest=5a5c038af52cf7bc1a1ec88eea08e6366ee68824"></script>
-    <script>let toggleHintShow = 'Click to show';</script>
-    <script>let toggleHintHide = 'Click to hide';</script>
-    <script>let toggleOpenOnPrint = 'true';</script>
-    <script src="../../_static/togglebutton.js"></script>
-    <script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
-    <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
-    <script src="../../_static/design-tabs.js"></script>
-    <script>const THEBE_JS_URL = "https://unpkg.com/thebe@0.8.2/lib/index.js"
-const thebe_selector = ".thebe,.cell"
-const thebe_selector_input = "pre"
-const thebe_selector_output = ".output, .cell_output"
-</script>
-    <script async="async" src="../../_static/sphinx-thebe.js"></script>
-    <script>DOCUMENTATION_OPTIONS.pagename = 'docs/experiments/js_exp';</script>
-    <link rel="shortcut icon" href="../../_static/PseudoLab_logo.png"/>
-    <link rel="index" title="Index" href="../../genindex.html" />
-    <link rel="search" title="Search" href="../../search.html" />
-    <link rel="next" title="Training DreamBooth on Naver Webtoon Face Dataset" href="swjo_exp.html" />
-    <link rel="prev" title="Dream Booth 3D" href="../review/DreamBooth3D.html" />
-  <meta name="viewport" content="width=device-width, initial-scale=1"/>
-  <meta name="docsearch:language" content="en"/>
-  </head>
-  
-  
-  <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
-
-  
-  
-  <div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
-  
-  <div id="pst-scroll-pixel-helper"></div>
-  
-  <button type="button" class="btn rounded-pill" id="pst-back-to-top">
-    <i class="fa-solid fa-arrow-up"></i>Back to top</button>
-
-  
-  <input type="checkbox"
-          class="sidebar-toggle"
-          id="pst-primary-sidebar-checkbox"/>
-  <label class="overlay overlay-primary" for="pst-primary-sidebar-checkbox"></label>
-  
-  <input type="checkbox"
-          class="sidebar-toggle"
-          id="pst-secondary-sidebar-checkbox"/>
-  <label class="overlay overlay-secondary" for="pst-secondary-sidebar-checkbox"></label>
-  
-  <div class="search-button__wrapper">
-    <div class="search-button__overlay"></div>
-    <div class="search-button__search-container">
-<form class="bd-search d-flex align-items-center"
-      action="../../search.html"
-      method="get">
-  <i class="fa-solid fa-magnifying-glass"></i>
-  <input type="search"
-         class="form-control"
-         name="q"
-         id="search-input"
-         placeholder="Search this book..."
-         aria-label="Search this book..."
-         autocomplete="off"
-         autocorrect="off"
-         autocapitalize="off"
-         spellcheck="false"/>
-  <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
-</form></div>
-  </div>
-
-  <div class="pst-async-banner-revealer d-none">
-  <aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
-</div>
-
-  
-    <header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
-    </header>
-  
-
-  <div class="bd-container">
-    <div class="bd-container__inner bd-page-width">
-      
-      
-      
-      <div class="bd-sidebar-primary bd-sidebar">
-        
-
-  
-  <div class="sidebar-header-items sidebar-primary__section">
-    
-    
-    
-    
-  </div>
-  
-    <div class="sidebar-primary-items__start sidebar-primary__section">
-        <div class="sidebar-primary-item">
-
-  
-    
-  
-
-<a class="navbar-brand logo" href="../../intro.html">
-  
-  
-  
-  
-  
-    
-    
-      
-    
-    
-    <img src="../../_static/PseudoLab_logo.png" class="logo__image only-light" alt="Text-to-Image Generation-feat-Diffusion - Home"/>
-    <script>document.write(`<img src="../../_static/PseudoLab_logo.png" class="logo__image only-dark" alt="Text-to-Image Generation-feat-Diffusion - Home"/>`);</script>
-  
-  
-</a></div>
-        <div class="sidebar-primary-item"><nav class="bd-links" id="bd-docs-nav" aria-label="Main">
-    <div class="bd-toc-item navbar-nav active">
-        
-        <ul class="nav bd-sidenav bd-sidenav__home-link">
-            <li class="toctree-l1">
-                <a class="reference internal" href="../../intro.html">
-                    Welcome to PseudoDiffusers!!
-                </a>
-            </li>
-        </ul>
-        <p aria-level="2" class="caption" role="heading"><span class="caption-text">Preliminary Works</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="../review/vae.html">VAE</a></li>
-<li class="toctree-l1"><a class="reference internal" href="../review/gan.html">GAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="../review/DDPM.html">DDPM</a></li>
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="../review/DDIM.html">DDIM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="../review/A_Study_on_the_Evaluation_of_Generative_Models.html">A Study on the Evaluation of Generative Models</a></li>
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Image Generation</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="../review/cycleGAN.html">CycleGAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="../review/StyleGAN.html">StyleGAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="../review/diffusion_beats_GANs.html">Diffusion Models Beat GANs on Image Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="../review/dalle.html">DALL-E</a></li>
-<li class="toctree-l1"><a class="reference internal" href="../review/DALLE2.html">DALL-E 2</a></li>
-<li class="toctree-l1"><a class="reference internal" href="../review/dreambooth.html">DreamBooth</a></li>
-<li class="toctree-l1"><a class="reference internal" href="../review/ControlNet.html">ControlNet</a></li>
-<li class="toctree-l1"><a class="reference internal" href="../review/Latent_Diffusion_Model.html">Introduction</a></li>
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="../review/Textual_Inversion.html">Textual Inversion</a></li>
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="../review/CustomDiffusion.html">Custom Diffusion</a></li>
-<li class="toctree-l1"><a class="reference internal" href="../review/LoRA.html">LoRA</a></li>
-
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="../review/I-DDPM.html">I-DDPM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="../review/StyO.html">StyO</a></li>
-<li class="toctree-l1"><a class="reference internal" href="../review/imagen.html">Imagen</a></li>
-<li class="toctree-l1"><a class="reference internal" href="../review/imagen_editor.html">Imagen Editor</a></li>
-<li class="toctree-l1"><a class="reference internal" href="../review/SDEdit.html">SDEdit</a></li>
-<li class="toctree-l1"><a class="reference internal" href="../review/SDXL.html">SDXL</a></li>
-<li class="toctree-l1"><a class="reference internal" href="../review/t2i_adapter.html">T2I-Adapter</a></li>
-<li class="toctree-l1"><a class="reference internal" href="../review/IP_Adapter.html">IP-Adapter</a></li>
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="../review/HyperDreamBooth.html">HyperDreamBooth</a></li>
-<li class="toctree-l1"><a class="reference internal" href="../review/CM3leon.html">CM3leon</a></li>
-
-<li class="toctree-l1"><a class="reference internal" href="../review/Synthetic_Data_from_Diffusion_Models_Improves_ImageNet_Classification.html">Synthetic Data from Diffusion Models Improves ImageNet Classification</a></li>
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="../review/GLIDE.html">GLIDE</a></li>
-<li class="toctree-l1"><a class="reference internal" href="../review/BBDM.html">BBDM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="../review/Your_Diffusion_Model_is_Secretly_a_Zero_Shot_Classifier.html">Your Diffusion Model is Secretly a Zero-Shot Classifier</a></li>
-<li class="toctree-l1"><a class="reference internal" href="../review/progressive_distillation.html">Progressive Distillation for Fast Sampling of Diffusion Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="../review/ConceptLab.html">ConceptLab</a></li>
-<li class="toctree-l1"><a class="reference internal" href="../review/Diffusion_models_already_have_a_Semantic_Latent_Space.html">Diffusion Models already have a Semantic Latent Space</a></li>
-<li class="toctree-l1"><a class="reference internal" href="../review/Muse.html">Muse</a></li>
-
-
-<li class="toctree-l1"><a class="reference internal" href="../review/GIGAGAN.html">Scaling up GANs for Text-to-Image Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="../review/consistency_models.html">Consistency Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="../review/latent_consistency_models.html">Latent Consistency Models</a></li>
-
-<li class="toctree-l1"><a class="reference internal" href="../review/LLM_grounded_Diffusion.html">LLM Grounded Diffusion</a></li>
-<li class="toctree-l1"><a class="reference internal" href="../review/DiT.html">DiT</a></li>
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="../review/one-step-image-translation.html">One-Step Image Translation with Text-to-Image Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="../review/LCM-LoRA.html">LCM-LoRA: A Universal Stable-Diffusion Acceleration Module</a></li>
-
-
-<li class="toctree-l1"><a class="reference internal" href="../review/MimicBrush.html">MimicBrush: Zero-shot Image Editing with Reference Imitation</a></li>
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Video Generation</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="../review/Make_A_Video.html">Make A Video</a></li>
-<li class="toctree-l1"><a class="reference internal" href="../review/VideoLDM.html">VideoLDM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="../review/AnimateDiff.html">AnimateDiff</a></li>
-<li class="toctree-l1"><a class="reference internal" href="../review/Animate_Anyone.html">Animate Anyone</a></li>
-<li class="toctree-l1"><a class="reference internal" href="../review/DreaMoving.html">DreaMoving</a></li>
-<li class="toctree-l1"><a class="reference internal" href="../review/DreamPose.html">DreamPose: Fashion Image-to-Video Synthesis via Stable Diffusion</a></li>
-
-
-
-
-
-
-
-
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">3D Generation</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="../review/NeRF.html">NeRF : Representing Scenes as Neural Radiance Fields for View Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="../review/3DGS.html">3D Gaussian Splatting for Real-Time Radiance Field Rendering</a></li>
-<li class="toctree-l1"><a class="reference internal" href="../review/Point_E.html">Point-E: A System for Generating 3D Point Clouds from Complex Prompts (Arxiv 2022)</a></li>
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="../review/Shap-E.html">Shap-E</a></li>
-
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="../review/DreamFusion.html"><strong>DreamFusion</strong></a></li>
-<li class="toctree-l1"><a class="reference internal" href="../review/magic-3d.html">Magic3D</a></li>
-<li class="toctree-l1"><a class="reference internal" href="../review/DreamBooth3D.html">Dream Booth 3D</a></li>
-
-
-
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Experiments</span></p>
-<ul class="current nav bd-sidenav">
-<li class="toctree-l1 current active"><a class="current reference internal" href="#">Synthetic Data with Stable Diffusion for Foliar Disease Classification</a></li>
-<li class="toctree-l1"><a class="reference internal" href="swjo_exp.html">Training DreamBooth on Naver Webtoon Face Dataset</a></li>
-</ul>
-
-    </div>
-</nav></div>
-    </div>
-  
-  
-  <div class="sidebar-primary-items__end sidebar-primary__section">
-  </div>
-  
-  <div id="rtd-footer-container"></div>
-
-
-      </div>
-      
-      <main id="main-content" class="bd-main" role="main">
-        
-        
-
-<div class="sbt-scroll-pixel-helper"></div>
-
-          <div class="bd-content">
-            <div class="bd-article-container">
-              
-              <div class="bd-header-article d-print-none">
-<div class="header-article-items header-article__inner">
-  
-    <div class="header-article-items__start">
-      
-        <div class="header-article-item"><label class="sidebar-toggle primary-toggle btn btn-sm" for="__primary" title="Toggle primary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
-  <span class="fa-solid fa-bars"></span>
-</label></div>
-      
-    </div>
-  
-  
-    <div class="header-article-items__end">
-      
-        <div class="header-article-item">
-
-<div class="article-header-buttons">
-
-
-
-
-
-<div class="dropdown dropdown-source-buttons">
-  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Source repositories">
-    <i class="fab fa-github"></i>
-  </button>
-  <ul class="dropdown-menu">
-      
-      
-      
-      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion" target="_blank"
-   class="btn btn-sm btn-source-repository-button dropdown-item"
-   title="Source repository"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fab fa-github"></i>
-  </span>
-<span class="btn__text-container">Repository</span>
-</a>
-</li>
-      
-      
-      
-      
-      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion/issues/new?title=Issue%20on%20page%20%2Fdocs/experiments/js_exp.html&body=Your%20issue%20content%20here." target="_blank"
-   class="btn btn-sm btn-source-issues-button dropdown-item"
-   title="Open an issue"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-lightbulb"></i>
-  </span>
-<span class="btn__text-container">Open issue</span>
-</a>
-</li>
-      
-  </ul>
-</div>
-
-
-
-
-
-
-<div class="dropdown dropdown-download-buttons">
-  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Download this page">
-    <i class="fas fa-download"></i>
-  </button>
-  <ul class="dropdown-menu">
-      
-      
-      
-      <li><a href="../../_sources/docs/experiments/js_exp.md" target="_blank"
-   class="btn btn-sm btn-download-source-button dropdown-item"
-   title="Download source file"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-file"></i>
-  </span>
-<span class="btn__text-container">.md</span>
-</a>
-</li>
-      
-      
-      
-      
-      <li>
-<button onclick="window.print()"
-  class="btn btn-sm btn-download-pdf-button dropdown-item"
-  title="Print to PDF"
-  data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-file-pdf"></i>
-  </span>
-<span class="btn__text-container">.pdf</span>
-</button>
-</li>
-      
-  </ul>
-</div>
-
-
-
-
-<button onclick="toggleFullScreen()"
-  class="btn btn-sm btn-fullscreen-button"
-  title="Fullscreen mode"
-  data-bs-placement="bottom" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-expand"></i>
-  </span>
-
-</button>
-
-
-
-<script>
-document.write(`
-  <button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light"></i>
-    <i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark"></i>
-    <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"></i>
-  </button>
-`);
-</script>
-
-
-<script>
-document.write(`
-  <button class="btn btn-sm pst-navbar-icon search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <i class="fa-solid fa-magnifying-glass fa-lg"></i>
-  </button>
-`);
-</script>
-<label class="sidebar-toggle secondary-toggle btn btn-sm" for="__secondary"title="Toggle secondary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <span class="fa-solid fa-list"></span>
-</label>
-</div></div>
-      
-    </div>
-  
-</div>
-</div>
-              
-              
-
-<div id="jb-print-docs-body" class="onlyprint">
-    <h1>Synthetic Data with Stable Diffusion for Foliar Disease Classification</h1>
-    <!-- Table of contents -->
-    <div id="print-main-content">
-        <div id="jb-print-toc">
-            
-            <div>
-                <h2> Contents </h2>
-            </div>
-            <nav aria-label="Page">
-                <ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#id1">1. 개요</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#baseline">2. Baseline 구축</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#stable-diffusion-fine-tuning">3. Stable diffusion fine tuning</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#id2">4. 성능 비교</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#discussion">5. Discussion</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#appendix">6. Appendix</a></li>
-</ul>
-            </nav>
-        </div>
-    </div>
-</div>
-
-              
-                
-<div id="searchbox"></div>
-                <article class="bd-article">
-                  
-  <div class="admonition-information admonition">
-<p class="admonition-title">Information</p>
-<ul class="simple">
-<li><p><strong>Title:</strong> Synthetic Data with Stable Diffusion for Foliar Disease Classification</p></li>
-<li><p><strong>Author:</strong> Jisu Kim</p></li>
-<li><p><strong>Last updated on Jul. 05, 2023</strong></p></li>
-</ul>
-</div>
-<section class="tex2jax_ignore mathjax_ignore" id="synthetic-data-with-stable-diffusion-for-foliar-disease-classification">
-<h1>Synthetic Data with Stable Diffusion for Foliar Disease Classification<a class="headerlink" href="#synthetic-data-with-stable-diffusion-for-foliar-disease-classification" title="Permalink to this heading">#</a></h1>
-<section id="id1">
-<h2>1. 개요<a class="headerlink" href="#id1" title="Permalink to this heading">#</a></h2>
-<ul class="simple">
-<li><p>사과 나무의 잎에 생기는 질병을 이미지로 판별하는 Kaggle competition (<a class="reference external" href="https://www.kaggle.com/competitions/plant-pathology-2020-fgvc7">링크</a>)에서 아이디어를 얻어서 진행한 프로젝트입니다.</p></li>
-<li><p>해당 competition은 사과나무 잎에 걸린 질병에 따라 잎 이미지를 4개의 class로 분류하는 task입니다.</p></li>
-</ul>
-<figure class="align-default" id="id3">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/4classes.png"><img alt="4classes" class="bg-primary mb-1" src="../../_images/4classes.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 730 </span><span class="caption-text">4 classes of leaves</span><a class="headerlink" href="#id3" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p>competition을 설명한 article (<a class="reference external" href="https://bsapubs.onlinelibrary.wiley.com/doi/10.1002/aps3.11390">링크</a>)에서 전체적인 accuracy는 97%이지만 multiple diseases class의 경우 accuracy가 51%에 불과했다고 언급합니다.</p></li>
-<li><p>multiple diseases class의 이미지 개수가 다른 class에 비해 적은 점에 주목했고, stable diffusion을 사용하여 해당 클래스의 데이터 개수를 늘려서 classifier 학습에 사용하면 더 좋은 성능의 classifier를 얻을 수 있을 것으로 기대했습니다.</p></li>
-</ul>
-</section>
-<section id="baseline">
-<h2>2. Baseline 구축<a class="headerlink" href="#baseline" title="Permalink to this heading">#</a></h2>
-<ul class="simple">
-<li><p>문제 상황을 재현하기 위해 기존 데이터로 image classifier를 학습하여 baseline으로 잡았습니다.</p></li>
-<li><p>모델은 pretrained된 ResNet18에 linear layer를 붙여서 사용했습니다.</p></li>
-<li><p>전체 accuracy는 97.7%, class별 accuracy는 healthy: 99.6%, multiple diseases: 73.6%, rust: 99.2%, scab: 98.1%</p></li>
-<li><p>multiple diseases class는 이미지 개수 91개로 다른 클래스들에 비해서 개수가 적습니다.</p></li>
-<li><p>class별 data imbalance가 성능을 낮추는 원인일 것이라 가정하고 stable diffusion으로 multiple diseases class의 data를 추가로 생성해보기로 했습니다.</p></li>
-<li><p>multiple diseases class 예시</p></li>
-</ul>
-<figure class="align-default" id="id4">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/multiple_ex.png"><img alt="multiple_ex" class="bg-primary mb-1" src="../../_images/multiple_ex.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 731 </span><span class="caption-text">4 classes of leaves</span><a class="headerlink" href="#id4" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</section>
-<section id="stable-diffusion-fine-tuning">
-<h2>3. Stable diffusion fine tuning<a class="headerlink" href="#stable-diffusion-fine-tuning" title="Permalink to this heading">#</a></h2>
-<ul class="simple">
-<li><p>pretraned stable diffusion의 경우 multiple diseases class에 대한 정보가 없어서 이미지를 생성할 경우 아래와 같이 관련없는 이미지가 생성됩니다.</p></li>
-</ul>
-<figure class="align-default" id="id5">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/multiple_sd.png"><img alt="multiple_sd" class="bg-primary mb-1" src="../../_images/multiple_sd.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 732 </span><span class="caption-text">prompt: “a photo of leaves with multiple diseases</span><a class="headerlink" href="#id5" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p>따라서 stable diffusion model (<a class="reference external" href="https://huggingface.co/runwayml/stable-diffusion-v1-5">링크</a>)에 해당 class에 대한 정보를 넣어주기 위해 dreambooth (<a class="reference external" href="https://arxiv.org/abs/2208.12242">링크</a>)를 사용하여 stable diffusion을 fine tuning했습니다.</p></li>
-<li><p>training에 사용한 prompt는 “a photo of a &lt;diseaes-leaf&gt; leaf”이며, 생성한 이미지의 예시는 아래와 같습니다.</p></li>
-<li><p>생성 이미지 예시</p></li>
-</ul>
-<figure class="align-default" id="id6">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/multiple_db.png"><img alt="multiple_db" class="bg-primary mb-1" src="../../_images/multiple_db.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 733 </span><span class="caption-text">prompt: “a photo of a &lt;diseaes-leaf&gt; leaf”</span><a class="headerlink" href="#id6" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p>prompt engineering을 수행하던 중 의도하지않은 결과를 발견했습니다.</p></li>
-<li><p>아래는 이에 대한 예시로 fine tuning 전의 stable diffusion model의 결과와 비교입니다.</p></li>
-<li><p>상황1 (prompt: “a photo of a leaf”)</p></li>
-</ul>
-<figure class="align-default" id="id7">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/leaf_sd.png"><img alt="leaf_sd" class="bg-primary mb-1" src="../../_images/leaf_sd.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 734 </span><span class="caption-text">fine tuning 전</span><a class="headerlink" href="#id7" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<figure class="align-default" id="id8">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/leaf_db.png"><img alt="leaf_db" class="bg-primary mb-1" src="../../_images/leaf_db.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 735 </span><span class="caption-text">fine tuning 후</span><a class="headerlink" href="#id8" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p>상황1을 보면 multiple diseases class 정보를 담은 unique identifier &lt;diseaes-leaf&gt;가 없음에도 multiple diseases의 정보를 담은 잎들만 생성됩니다. 이는 같은 class (leaf)에 속하는 다른 이미지들을 생성해내지 못하고 있다는 것입니다. 이 현상을 language drift라고 하며, 모델이 multiple diseases class의 leaf가 아닌 일반적인 leaf class에 관한 정보를 잊어버렸기 때문입니다.</p></li>
-<li><p>상황2 (prompt: “a photo”)</p></li>
-</ul>
-<figure class="align-default" id="id9">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/photo_sd.png"><img alt="photo_sd" class="bg-primary mb-1" src="../../_images/photo_sd.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 736 </span><span class="caption-text">fine tuning 전</span><a class="headerlink" href="#id9" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<figure class="align-default" id="id10">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/photo_db.png"><img alt="photo_db" class="bg-primary mb-1" src="../../_images/photo_db.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 737 </span><span class="caption-text">fine tuning 후</span><a class="headerlink" href="#id10" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p>상황2를 보면 photo라는 prompt만 사용하였는데도 생성한 이미지들에 multiple diseases class의 특징들이 나타납니다.</p></li>
-<li><p>dreambooth에서는 language drift를 prior preservation loss를 사용해서 해결하였으므로 같은 방법을 사용했습니다. 상황2를 해결하기 위해 training prompt에서 “photo”를 제외하고 최대한 단순한 prompt “&lt;diseases-leaf&gt; leaf”를 사용하여 stable diffusion model을 다시 fine tuning했습니다.</p></li>
-</ul>
-<figure class="align-default" id="id11">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/multiple_pp.png"><img alt="multiple_pp" class="bg-primary mb-1" src="../../_images/multiple_pp.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 738 </span><span class="caption-text">multiple diseases class 이미지 생성 결과, prompt: “&lt;diseaes-leaf&gt; leaf”</span><a class="headerlink" href="#id11" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<figure class="align-default" id="id12">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/leaf_pp.png"><img alt="leaf_pp" class="bg-primary mb-1" src="../../_images/leaf_pp.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 739 </span><span class="caption-text">leaf 생성 결과, prompt: “leaf”</span><a class="headerlink" href="#id12" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p>재훈련 결과, fine tuning 이후에도 기존 stable diffusion model로 “leaf”를 생성하였을 때와 비슷한 이미지가 생성됩니다.</p></li>
-</ul>
-<figure class="align-default" id="id13">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/photo_pp.png"><img alt="photo_pp" class="bg-primary mb-1" src="../../_images/photo_pp.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 740 </span><span class="caption-text">photo 생성 결과, prompt: “photo”</span><a class="headerlink" href="#id13" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p>“photo”의 경우에는 여전히 multiple diseases class의 영향을 받은 것같은 이미지들이 생성됩니다. photo의 경우에는 여러 대상들과 사용되는 일반적인 특성을 가지고있어서 그런 것이라는 생각이 들었고, 이를 체크해보기 위해 특정한 대상들과 photo와 비슷한 용도로 사용되는 다른 prompt들로 이미지들을 생성보았습니다.</p></li>
-<li><p>특정한 대상 세가지로는 cat, sea, pirate을 사용했고, photo와 비슷하게 사용되는 텍스트 세가지는 illustration, animation, wallpaper를 사용했습니다. (이미지는 글 마지막 부분의 appendix에 있습니다.)</p></li>
-<li><p>이미지 생성 결과, 특정한 대상을 지칭하는 텍스트의 경우 대상의 특징이 잘 드러나는 이미지가 생성되었지만, 여러 대상과 함께 쓰이는 텍스트의 경우 잎사귀의 특징을 가지는 이미지들이 일부 생성되었습니다.</p></li>
-</ul>
-</section>
-<section id="id2">
-<h2>4. 성능 비교<a class="headerlink" href="#id2" title="Permalink to this heading">#</a></h2>
-<ul class="simple">
-<li><p>fine tuning한 stable diffusion model로 multiple diseases class의 이미지를 400장 생성하여 classifier를 다시 훈련했습니다.</p></li>
-</ul>
-<p>baseline</p>
-<ul class="simple">
-<li><p>전체 accuracy는 97.7%, class별 accuracy는 healthy: 99.6%, multiple diseases: 73.6%, rust: 99.2%, scab: 98.1%</p></li>
-</ul>
-<figure class="align-default" id="id14">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/result_base.png"><img alt="result_base" class="bg-primary mb-1" src="../../_images/result_base.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 741 </span><span class="caption-text">result_base</span><a class="headerlink" href="#id14" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>생성한 이미지를 추가 데이터로 활용한 경우</p>
-<ul class="simple">
-<li><p>전체 accuracy는 97.9%, class별 accuracy는 healthy: 98.1%, multiple diseases: 84.6%, rust: 98.2%, scab: 99.3%</p></li>
-</ul>
-<figure class="align-default" id="id15">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/result_new.png"><img alt="result_new" class="bg-primary mb-1" src="../../_images/result_new.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 742 </span><span class="caption-text">result_now</span><a class="headerlink" href="#id15" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p>kaggle에서 제공하는 test set에 적용했을 때는 baseline이 94.6%, stable diffusion으로 생성한 이미지들을 사용한 경우가 93.7%여서 baseline보다 좋은 성능을 얻지는 못 했습니다.</p></li>
-</ul>
-</section>
-<section id="discussion">
-<h2>5. Discussion<a class="headerlink" href="#discussion" title="Permalink to this heading">#</a></h2>
-<ul class="simple">
-<li><p>stable diffusion 훈련 중간중간에 일정 step마다 이미지를 생성하게해서 훈련에 대한 모니터링이 있으면 좋겠다는 생각을 했습니다.</p></li>
-<li><p>stable diffusion 훈련시 hyperparameter tuning을 좀 더 철저하게 해야겠다는 생각을 했습니다.</p></li>
-<li><p>stable diffusion으로 생성한 이미지가 실제로 multiple diseases class 조건을 만족하는지 검수할 방안이 필요합니다.</p></li>
-<li><p>multiple diseases 내에서도 카테고리를 나눌 수 있다면 나눠서 각각에 대한 stable diffusion model을 fine tuning할 수도 있을 것입니다.</p></li>
-<li><p>다른 diffusion model fine tuning 방법을 활용해볼 수도 있을 것입니다.</p></li>
-<li><p>submission score에서 baseline을 이기지 못 했지만 text-to-image model을 이용한 synthetic data의 가능성을 볼 수 있었다고 생각합니다.</p></li>
-</ul>
-</section>
-<section id="appendix">
-<h2>6. Appendix<a class="headerlink" href="#appendix" title="Permalink to this heading">#</a></h2>
-<ul class="simple">
-<li><p>앞에서 언급한 prompt에 대한 이미지 생성 예시입니다. 일부 이미지는 NSFW로 판단되어 검은색으로 나왔습니다.</p></li>
-</ul>
-<figure class="align-default" id="id16">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/cat.png"><img alt="cat" class="bg-primary mb-1" src="../../_images/cat.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 743 </span><span class="caption-text">cat 생성 결과, prompt: “cat”</span><a class="headerlink" href="#id16" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<figure class="align-default" id="id17">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/sea.png"><img alt="sea" class="bg-primary mb-1" src="../../_images/sea.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 744 </span><span class="caption-text">sea 생성 결과, prompt: “sea”</span><a class="headerlink" href="#id17" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<figure class="align-default" id="id18">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/pirate.png"><img alt="pirate" class="bg-primary mb-1" src="../../_images/pirate.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 745 </span><span class="caption-text">pirate 생성 결과, prompt: “pirate”</span><a class="headerlink" href="#id18" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<figure class="align-default" id="id19">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/illustration.png"><img alt="illustration" class="bg-primary mb-1" src="../../_images/illustration.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 746 </span><span class="caption-text">illustration 생성 결과, prompt: “illustration”</span><a class="headerlink" href="#id19" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<figure class="align-default" id="id20">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/animation.png"><img alt="animation" class="bg-primary mb-1" src="../../_images/animation.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 747 </span><span class="caption-text">animation 생성 결과, prompt: “animation”</span><a class="headerlink" href="#id20" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<figure class="align-default" id="id21">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/wallpaper.png"><img alt="wallpaper" class="bg-primary mb-1" src="../../_images/wallpaper.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 748 </span><span class="caption-text">wallpaper 생성 결과, prompt: “wallpaper”</span><a class="headerlink" href="#id21" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</section>
-</section>
-
-    <script type="text/x-thebe-config">
-    {
-        requestKernel: true,
-        binderOptions: {
-            repo: "binder-examples/jupyter-stacks-datascience",
-            ref: "master",
-        },
-        codeMirrorConfig: {
-            theme: "abcdef",
-            mode: "python"
-        },
-        kernelOptions: {
-            name: "python3",
-            path: "./docs/experiments"
-        },
-        predefinedOutput: true
-    }
-    </script>
-    <script>kernelName = 'python3'</script>
-
-                </article>
-              
-
-              
-              
-              
-              
-                <footer class="prev-next-footer d-print-none">
-                  
-<div class="prev-next-area">
-    <a class="left-prev"
-       href="../review/DreamBooth3D.html"
-       title="previous page">
-      <i class="fa-solid fa-angle-left"></i>
-      <div class="prev-next-info">
-        <p class="prev-next-subtitle">previous</p>
-        <p class="prev-next-title">Dream Booth 3D</p>
-      </div>
-    </a>
-    <a class="right-next"
-       href="swjo_exp.html"
-       title="next page">
-      <div class="prev-next-info">
-        <p class="prev-next-subtitle">next</p>
-        <p class="prev-next-title">Training DreamBooth on Naver Webtoon Face Dataset</p>
-      </div>
-      <i class="fa-solid fa-angle-right"></i>
-    </a>
-</div>
-                </footer>
-              
-            </div>
-            
-            
-              
-                <div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
-
-
-  <div class="sidebar-secondary-item">
-  <div class="page-toc tocsection onthispage">
-    <i class="fa-solid fa-list"></i> Contents
-  </div>
-  <nav class="bd-toc-nav page-toc">
-    <ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#id1">1. 개요</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#baseline">2. Baseline 구축</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#stable-diffusion-fine-tuning">3. Stable diffusion fine tuning</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#id2">4. 성능 비교</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#discussion">5. Discussion</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#appendix">6. Appendix</a></li>
-</ul>
-  </nav></div>
-
-</div></div>
-              
-            
-          </div>
-          <footer class="bd-footer-content">
-            
-<div class="bd-footer-content__inner container">
-  
-  <div class="footer-item">
-    
-<p class="component-author">
-By PseudoLab
-</p>
-
-  </div>
-  
-  <div class="footer-item">
-    
-
-  <p class="copyright">
-    
-      © Copyright 2022.
-      <br/>
-    
-  </p>
-
-  </div>
-  
-  <div class="footer-item">
-    
-  </div>
-  
-  <div class="footer-item">
-    
-  </div>
-  
-</div>
-          </footer>
-        
-
-      </main>
-    </div>
-  </div>
-  
-  <!-- Scripts loaded after <body> so the DOM is not blocked -->
-  <script src="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b"></script>
-<script src="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b"></script>
-
-  <footer class="bd-footer">
-  </footer>
-  </body>
+
+
+<!DOCTYPE html>
+
+
+<html lang="en" data-content_root="" >
+
+  <head>
+    <meta charset="utf-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.17.1: http://docutils.sourceforge.net/" />
+
+    <title>Synthetic Data with Stable Diffusion for Foliar Disease Classification &#8212; Text-to-Image Generation-feat-Diffusion</title>
+  
+  
+  
+  <script data-cfasync="false">
+    document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
+    document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
+  </script>
+  
+  <!-- Loaded before other Sphinx assets -->
+  <link href="../../_static/styles/theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+<link href="../../_static/styles/bootstrap.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+<link href="../../_static/styles/pydata-sphinx-theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+
+  
+  <link href="../../_static/vendor/fontawesome/6.5.2/css/all.min.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+  <link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.woff2" />
+<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.woff2" />
+<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.woff2" />
+
+    <link rel="stylesheet" type="text/css" href="../../_static/pygments.css" />
+    <link rel="stylesheet" href="../../_static/styles/sphinx-book-theme.css?digest=14f4ca6b54d191a8c7657f6c759bf11a5fb86285" type="text/css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/togglebutton.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/mystnb.4510f1fc1dee50b3e5859aac5469c37c29e427902b24a333a5f9fcb2f0b3ac41.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/sphinx-thebe.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/design-style.4045f2051d55cab465a707391d5b2007.min.css" />
+  
+  <!-- Pre-loaded scripts that we'll load fully later -->
+  <link rel="preload" as="script" href="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b" />
+<link rel="preload" as="script" href="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b" />
+  <script src="../../_static/vendor/fontawesome/6.5.2/js/all.min.js?digest=dfe6caa3a7d634c4db9b"></script>
+
+    <script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
+    <script src="../../_static/jquery.js"></script>
+    <script src="../../_static/underscore.js"></script>
+    <script src="../../_static/_sphinx_javascript_frameworks_compat.js"></script>
+    <script src="../../_static/doctools.js"></script>
+    <script src="../../_static/clipboard.min.js"></script>
+    <script src="../../_static/copybutton.js"></script>
+    <script src="../../_static/scripts/sphinx-book-theme.js?digest=5a5c038af52cf7bc1a1ec88eea08e6366ee68824"></script>
+    <script>let toggleHintShow = 'Click to show';</script>
+    <script>let toggleHintHide = 'Click to hide';</script>
+    <script>let toggleOpenOnPrint = 'true';</script>
+    <script src="../../_static/togglebutton.js"></script>
+    <script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
+    <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
+    <script src="../../_static/design-tabs.js"></script>
+    <script>const THEBE_JS_URL = "https://unpkg.com/thebe@0.8.2/lib/index.js"
+const thebe_selector = ".thebe,.cell"
+const thebe_selector_input = "pre"
+const thebe_selector_output = ".output, .cell_output"
+</script>
+    <script async="async" src="../../_static/sphinx-thebe.js"></script>
+    <script>DOCUMENTATION_OPTIONS.pagename = 'docs/experiments/js_exp';</script>
+    <link rel="shortcut icon" href="../../_static/PseudoLab_logo.png"/>
+    <link rel="index" title="Index" href="../../genindex.html" />
+    <link rel="search" title="Search" href="../../search.html" />
+    <link rel="next" title="Training DreamBooth on Naver Webtoon Face Dataset" href="swjo_exp.html" />
+    <link rel="prev" title="Coin3D" href="../review/Coin3D.html" />
+  <meta name="viewport" content="width=device-width, initial-scale=1"/>
+  <meta name="docsearch:language" content="en"/>
+  </head>
+  
+  
+  <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
+
+  
+  
+  <div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
+  
+  <div id="pst-scroll-pixel-helper"></div>
+  
+  <button type="button" class="btn rounded-pill" id="pst-back-to-top">
+    <i class="fa-solid fa-arrow-up"></i>Back to top</button>
+
+  
+  <input type="checkbox"
+          class="sidebar-toggle"
+          id="pst-primary-sidebar-checkbox"/>
+  <label class="overlay overlay-primary" for="pst-primary-sidebar-checkbox"></label>
+  
+  <input type="checkbox"
+          class="sidebar-toggle"
+          id="pst-secondary-sidebar-checkbox"/>
+  <label class="overlay overlay-secondary" for="pst-secondary-sidebar-checkbox"></label>
+  
+  <div class="search-button__wrapper">
+    <div class="search-button__overlay"></div>
+    <div class="search-button__search-container">
+<form class="bd-search d-flex align-items-center"
+      action="../../search.html"
+      method="get">
+  <i class="fa-solid fa-magnifying-glass"></i>
+  <input type="search"
+         class="form-control"
+         name="q"
+         id="search-input"
+         placeholder="Search this book..."
+         aria-label="Search this book..."
+         autocomplete="off"
+         autocorrect="off"
+         autocapitalize="off"
+         spellcheck="false"/>
+  <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
+</form></div>
+  </div>
+
+  <div class="pst-async-banner-revealer d-none">
+  <aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
+</div>
+
+  
+    <header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
+    </header>
+  
+
+  <div class="bd-container">
+    <div class="bd-container__inner bd-page-width">
+      
+      
+      
+      <div class="bd-sidebar-primary bd-sidebar">
+        
+
+  
+  <div class="sidebar-header-items sidebar-primary__section">
+    
+    
+    
+    
+  </div>
+  
+    <div class="sidebar-primary-items__start sidebar-primary__section">
+        <div class="sidebar-primary-item">
+
+  
+    
+  
+
+<a class="navbar-brand logo" href="../../intro.html">
+  
+  
+  
+  
+  
+    
+    
+      
+    
+    
+    <img src="../../_static/PseudoLab_logo.png" class="logo__image only-light" alt="Text-to-Image Generation-feat-Diffusion - Home"/>
+    <script>document.write(`<img src="../../_static/PseudoLab_logo.png" class="logo__image only-dark" alt="Text-to-Image Generation-feat-Diffusion - Home"/>`);</script>
+  
+  
+</a></div>
+        <div class="sidebar-primary-item"><nav class="bd-links" id="bd-docs-nav" aria-label="Main">
+    <div class="bd-toc-item navbar-nav active">
+        
+        <ul class="nav bd-sidenav bd-sidenav__home-link">
+            <li class="toctree-l1">
+                <a class="reference internal" href="../../intro.html">
+                    Welcome to PseudoDiffusers!!
+                </a>
+            </li>
+        </ul>
+        <p aria-level="2" class="caption" role="heading"><span class="caption-text">Preliminary Works</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="../review/vae.html">VAE</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../review/gan.html">GAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../review/DDPM.html">DDPM</a></li>
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="../review/DDIM.html">DDIM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../review/A_Study_on_the_Evaluation_of_Generative_Models.html">A Study on the Evaluation of Generative Models</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Image Generation</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="../review/cycleGAN.html">CycleGAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../review/StyleGAN.html">StyleGAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../review/diffusion_beats_GANs.html">Diffusion Models Beat GANs on Image Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../review/dalle.html">DALL-E</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../review/DALLE2.html">DALL-E 2</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../review/dreambooth.html">DreamBooth</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../review/ControlNet.html">ControlNet</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../review/Latent_Diffusion_Model.html">Introduction</a></li>
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="../review/Textual_Inversion.html">Textual Inversion</a></li>
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="../review/CustomDiffusion.html">Custom Diffusion</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../review/LoRA.html">LoRA</a></li>
+
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="../review/I-DDPM.html">I-DDPM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../review/StyO.html">StyO</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../review/imagen.html">Imagen</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../review/imagen_editor.html">Imagen Editor</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../review/SDEdit.html">SDEdit</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../review/SDXL.html">SDXL</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../review/t2i_adapter.html">T2I-Adapter</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../review/IP_Adapter.html">IP-Adapter</a></li>
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="../review/HyperDreamBooth.html">HyperDreamBooth</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../review/CM3leon.html">CM3leon</a></li>
+
+<li class="toctree-l1"><a class="reference internal" href="../review/Synthetic_Data_from_Diffusion_Models_Improves_ImageNet_Classification.html">Synthetic Data from Diffusion Models Improves ImageNet Classification</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="../review/GLIDE.html">GLIDE</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../review/BBDM.html">BBDM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../review/Your_Diffusion_Model_is_Secretly_a_Zero_Shot_Classifier.html">Your Diffusion Model is Secretly a Zero-Shot Classifier</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../review/progressive_distillation.html">Progressive Distillation for Fast Sampling of Diffusion Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../review/ConceptLab.html">ConceptLab</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../review/Diffusion_models_already_have_a_Semantic_Latent_Space.html">Diffusion Models already have a Semantic Latent Space</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../review/Muse.html">Muse</a></li>
+
+
+<li class="toctree-l1"><a class="reference internal" href="../review/GIGAGAN.html">Scaling up GANs for Text-to-Image Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../review/consistency_models.html">Consistency Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../review/latent_consistency_models.html">Latent Consistency Models</a></li>
+
+<li class="toctree-l1"><a class="reference internal" href="../review/LLM_grounded_Diffusion.html">LLM Grounded Diffusion</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../review/DiT.html">DiT</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="../review/one-step-image-translation.html">One-Step Image Translation with Text-to-Image Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../review/LCM-LoRA.html">LCM-LoRA: A Universal Stable-Diffusion Acceleration Module</a></li>
+
+
+<li class="toctree-l1"><a class="reference internal" href="../review/MimicBrush.html">MimicBrush: Zero-shot Image Editing with Reference Imitation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../review/one_step_diffusion_with_distribution_matching_distillation.html">One-step Diffusion with Distribution Matching Distillation</a></li>
+
+
+
+
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Video Generation</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="../review/Make_A_Video.html">Make A Video</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../review/VideoLDM.html">VideoLDM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../review/AnimateDiff.html">AnimateDiff</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../review/Animate_Anyone.html">Animate Anyone</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../review/DreaMoving.html">DreaMoving</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../review/DreamPose.html">DreamPose: Fashion Image-to-Video Synthesis via Stable Diffusion</a></li>
+
+
+
+
+
+
+
+
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">3D Generation</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="../review/NeRF.html">NeRF : Representing Scenes as Neural Radiance Fields for View Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../review/3DGS.html">3D Gaussian Splatting for Real-Time Radiance Field Rendering</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../review/Point_E.html">Point-E: A System for Generating 3D Point Clouds from Complex Prompts (Arxiv 2022)</a></li>
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="../review/Shap-E.html">Shap-E</a></li>
+
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="../review/DreamFusion.html"><strong>DreamFusion</strong></a></li>
+<li class="toctree-l1"><a class="reference internal" href="../review/magic-3d.html">Magic3D</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../review/DreamBooth3D.html">Dream Booth 3D</a></li>
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="../review/zero123plus.html">Zero123++</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../review/ProlificDreamer.html">ProlificDreamer: High-Fidelity and Diverse Text-to-3D Generation with Variational Score Distillation</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="../review/DreamGaussian.html">DreamGaussian</a></li>
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="../review/Coin3D.html">Coin3D</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Experiments</span></p>
+<ul class="current nav bd-sidenav">
+<li class="toctree-l1 current active"><a class="current reference internal" href="#">Synthetic Data with Stable Diffusion for Foliar Disease Classification</a></li>
+<li class="toctree-l1"><a class="reference internal" href="swjo_exp.html">Training DreamBooth on Naver Webtoon Face Dataset</a></li>
+</ul>
+
+    </div>
+</nav></div>
+    </div>
+  
+  
+  <div class="sidebar-primary-items__end sidebar-primary__section">
+  </div>
+  
+  <div id="rtd-footer-container"></div>
+
+
+      </div>
+      
+      <main id="main-content" class="bd-main" role="main">
+        
+        
+
+<div class="sbt-scroll-pixel-helper"></div>
+
+          <div class="bd-content">
+            <div class="bd-article-container">
+              
+              <div class="bd-header-article d-print-none">
+<div class="header-article-items header-article__inner">
+  
+    <div class="header-article-items__start">
+      
+        <div class="header-article-item"><label class="sidebar-toggle primary-toggle btn btn-sm" for="__primary" title="Toggle primary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
+  <span class="fa-solid fa-bars"></span>
+</label></div>
+      
+    </div>
+  
+  
+    <div class="header-article-items__end">
+      
+        <div class="header-article-item">
+
+<div class="article-header-buttons">
+
+
+
+
+
+<div class="dropdown dropdown-source-buttons">
+  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Source repositories">
+    <i class="fab fa-github"></i>
+  </button>
+  <ul class="dropdown-menu">
+      
+      
+      
+      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion" target="_blank"
+   class="btn btn-sm btn-source-repository-button dropdown-item"
+   title="Source repository"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fab fa-github"></i>
+  </span>
+<span class="btn__text-container">Repository</span>
+</a>
+</li>
+      
+      
+      
+      
+      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion/issues/new?title=Issue%20on%20page%20%2Fdocs/experiments/js_exp.html&body=Your%20issue%20content%20here." target="_blank"
+   class="btn btn-sm btn-source-issues-button dropdown-item"
+   title="Open an issue"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-lightbulb"></i>
+  </span>
+<span class="btn__text-container">Open issue</span>
+</a>
+</li>
+      
+  </ul>
+</div>
+
+
+
+
+
+
+<div class="dropdown dropdown-download-buttons">
+  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Download this page">
+    <i class="fas fa-download"></i>
+  </button>
+  <ul class="dropdown-menu">
+      
+      
+      
+      <li><a href="../../_sources/docs/experiments/js_exp.md" target="_blank"
+   class="btn btn-sm btn-download-source-button dropdown-item"
+   title="Download source file"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-file"></i>
+  </span>
+<span class="btn__text-container">.md</span>
+</a>
+</li>
+      
+      
+      
+      
+      <li>
+<button onclick="window.print()"
+  class="btn btn-sm btn-download-pdf-button dropdown-item"
+  title="Print to PDF"
+  data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-file-pdf"></i>
+  </span>
+<span class="btn__text-container">.pdf</span>
+</button>
+</li>
+      
+  </ul>
+</div>
+
+
+
+
+<button onclick="toggleFullScreen()"
+  class="btn btn-sm btn-fullscreen-button"
+  title="Fullscreen mode"
+  data-bs-placement="bottom" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-expand"></i>
+  </span>
+
+</button>
+
+
+
+<script>
+document.write(`
+  <button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light"></i>
+    <i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark"></i>
+    <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"></i>
+  </button>
+`);
+</script>
+
+
+<script>
+document.write(`
+  <button class="btn btn-sm pst-navbar-icon search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <i class="fa-solid fa-magnifying-glass fa-lg"></i>
+  </button>
+`);
+</script>
+<label class="sidebar-toggle secondary-toggle btn btn-sm" for="__secondary"title="Toggle secondary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <span class="fa-solid fa-list"></span>
+</label>
+</div></div>
+      
+    </div>
+  
+</div>
+</div>
+              
+              
+
+<div id="jb-print-docs-body" class="onlyprint">
+    <h1>Synthetic Data with Stable Diffusion for Foliar Disease Classification</h1>
+    <!-- Table of contents -->
+    <div id="print-main-content">
+        <div id="jb-print-toc">
+            
+            <div>
+                <h2> Contents </h2>
+            </div>
+            <nav aria-label="Page">
+                <ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#id1">1. 개요</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#baseline">2. Baseline 구축</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#stable-diffusion-fine-tuning">3. Stable diffusion fine tuning</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#id2">4. 성능 비교</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#discussion">5. Discussion</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#appendix">6. Appendix</a></li>
+</ul>
+            </nav>
+        </div>
+    </div>
+</div>
+
+              
+                
+<div id="searchbox"></div>
+                <article class="bd-article">
+                  
+  <div class="admonition-information admonition">
+<p class="admonition-title">Information</p>
+<ul class="simple">
+<li><p><strong>Title:</strong> Synthetic Data with Stable Diffusion for Foliar Disease Classification</p></li>
+<li><p><strong>Author:</strong> Jisu Kim</p></li>
+<li><p><strong>Last updated on Jul. 05, 2023</strong></p></li>
+</ul>
+</div>
+<section class="tex2jax_ignore mathjax_ignore" id="synthetic-data-with-stable-diffusion-for-foliar-disease-classification">
+<h1>Synthetic Data with Stable Diffusion for Foliar Disease Classification<a class="headerlink" href="#synthetic-data-with-stable-diffusion-for-foliar-disease-classification" title="Permalink to this heading">#</a></h1>
+<section id="id1">
+<h2>1. 개요<a class="headerlink" href="#id1" title="Permalink to this heading">#</a></h2>
+<ul class="simple">
+<li><p>사과 나무의 잎에 생기는 질병을 이미지로 판별하는 Kaggle competition (<a class="reference external" href="https://www.kaggle.com/competitions/plant-pathology-2020-fgvc7">링크</a>)에서 아이디어를 얻어서 진행한 프로젝트입니다.</p></li>
+<li><p>해당 competition은 사과나무 잎에 걸린 질병에 따라 잎 이미지를 4개의 class로 분류하는 task입니다.</p></li>
+</ul>
+<figure class="align-default" id="id3">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/4classes.png"><img alt="4classes" class="bg-primary mb-1" src="../../_images/4classes.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 820 </span><span class="caption-text">4 classes of leaves</span><a class="headerlink" href="#id3" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p>competition을 설명한 article (<a class="reference external" href="https://bsapubs.onlinelibrary.wiley.com/doi/10.1002/aps3.11390">링크</a>)에서 전체적인 accuracy는 97%이지만 multiple diseases class의 경우 accuracy가 51%에 불과했다고 언급합니다.</p></li>
+<li><p>multiple diseases class의 이미지 개수가 다른 class에 비해 적은 점에 주목했고, stable diffusion을 사용하여 해당 클래스의 데이터 개수를 늘려서 classifier 학습에 사용하면 더 좋은 성능의 classifier를 얻을 수 있을 것으로 기대했습니다.</p></li>
+</ul>
+</section>
+<section id="baseline">
+<h2>2. Baseline 구축<a class="headerlink" href="#baseline" title="Permalink to this heading">#</a></h2>
+<ul class="simple">
+<li><p>문제 상황을 재현하기 위해 기존 데이터로 image classifier를 학습하여 baseline으로 잡았습니다.</p></li>
+<li><p>모델은 pretrained된 ResNet18에 linear layer를 붙여서 사용했습니다.</p></li>
+<li><p>전체 accuracy는 97.7%, class별 accuracy는 healthy: 99.6%, multiple diseases: 73.6%, rust: 99.2%, scab: 98.1%</p></li>
+<li><p>multiple diseases class는 이미지 개수 91개로 다른 클래스들에 비해서 개수가 적습니다.</p></li>
+<li><p>class별 data imbalance가 성능을 낮추는 원인일 것이라 가정하고 stable diffusion으로 multiple diseases class의 data를 추가로 생성해보기로 했습니다.</p></li>
+<li><p>multiple diseases class 예시</p></li>
+</ul>
+<figure class="align-default" id="id4">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/multiple_ex.png"><img alt="multiple_ex" class="bg-primary mb-1" src="../../_images/multiple_ex.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 821 </span><span class="caption-text">4 classes of leaves</span><a class="headerlink" href="#id4" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</section>
+<section id="stable-diffusion-fine-tuning">
+<h2>3. Stable diffusion fine tuning<a class="headerlink" href="#stable-diffusion-fine-tuning" title="Permalink to this heading">#</a></h2>
+<ul class="simple">
+<li><p>pretraned stable diffusion의 경우 multiple diseases class에 대한 정보가 없어서 이미지를 생성할 경우 아래와 같이 관련없는 이미지가 생성됩니다.</p></li>
+</ul>
+<figure class="align-default" id="id5">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/multiple_sd.png"><img alt="multiple_sd" class="bg-primary mb-1" src="../../_images/multiple_sd.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 822 </span><span class="caption-text">prompt: “a photo of leaves with multiple diseases</span><a class="headerlink" href="#id5" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p>따라서 stable diffusion model (<a class="reference external" href="https://huggingface.co/runwayml/stable-diffusion-v1-5">링크</a>)에 해당 class에 대한 정보를 넣어주기 위해 dreambooth (<a class="reference external" href="https://arxiv.org/abs/2208.12242">링크</a>)를 사용하여 stable diffusion을 fine tuning했습니다.</p></li>
+<li><p>training에 사용한 prompt는 “a photo of a &lt;diseaes-leaf&gt; leaf”이며, 생성한 이미지의 예시는 아래와 같습니다.</p></li>
+<li><p>생성 이미지 예시</p></li>
+</ul>
+<figure class="align-default" id="id6">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/multiple_db.png"><img alt="multiple_db" class="bg-primary mb-1" src="../../_images/multiple_db.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 823 </span><span class="caption-text">prompt: “a photo of a &lt;diseaes-leaf&gt; leaf”</span><a class="headerlink" href="#id6" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p>prompt engineering을 수행하던 중 의도하지않은 결과를 발견했습니다.</p></li>
+<li><p>아래는 이에 대한 예시로 fine tuning 전의 stable diffusion model의 결과와 비교입니다.</p></li>
+<li><p>상황1 (prompt: “a photo of a leaf”)</p></li>
+</ul>
+<figure class="align-default" id="id7">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/leaf_sd.png"><img alt="leaf_sd" class="bg-primary mb-1" src="../../_images/leaf_sd.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 824 </span><span class="caption-text">fine tuning 전</span><a class="headerlink" href="#id7" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<figure class="align-default" id="id8">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/leaf_db.png"><img alt="leaf_db" class="bg-primary mb-1" src="../../_images/leaf_db.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 825 </span><span class="caption-text">fine tuning 후</span><a class="headerlink" href="#id8" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p>상황1을 보면 multiple diseases class 정보를 담은 unique identifier &lt;diseaes-leaf&gt;가 없음에도 multiple diseases의 정보를 담은 잎들만 생성됩니다. 이는 같은 class (leaf)에 속하는 다른 이미지들을 생성해내지 못하고 있다는 것입니다. 이 현상을 language drift라고 하며, 모델이 multiple diseases class의 leaf가 아닌 일반적인 leaf class에 관한 정보를 잊어버렸기 때문입니다.</p></li>
+<li><p>상황2 (prompt: “a photo”)</p></li>
+</ul>
+<figure class="align-default" id="id9">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/photo_sd.png"><img alt="photo_sd" class="bg-primary mb-1" src="../../_images/photo_sd.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 826 </span><span class="caption-text">fine tuning 전</span><a class="headerlink" href="#id9" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<figure class="align-default" id="id10">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/photo_db.png"><img alt="photo_db" class="bg-primary mb-1" src="../../_images/photo_db.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 827 </span><span class="caption-text">fine tuning 후</span><a class="headerlink" href="#id10" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p>상황2를 보면 photo라는 prompt만 사용하였는데도 생성한 이미지들에 multiple diseases class의 특징들이 나타납니다.</p></li>
+<li><p>dreambooth에서는 language drift를 prior preservation loss를 사용해서 해결하였으므로 같은 방법을 사용했습니다. 상황2를 해결하기 위해 training prompt에서 “photo”를 제외하고 최대한 단순한 prompt “&lt;diseases-leaf&gt; leaf”를 사용하여 stable diffusion model을 다시 fine tuning했습니다.</p></li>
+</ul>
+<figure class="align-default" id="id11">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/multiple_pp.png"><img alt="multiple_pp" class="bg-primary mb-1" src="../../_images/multiple_pp.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 828 </span><span class="caption-text">multiple diseases class 이미지 생성 결과, prompt: “&lt;diseaes-leaf&gt; leaf”</span><a class="headerlink" href="#id11" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<figure class="align-default" id="id12">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/leaf_pp.png"><img alt="leaf_pp" class="bg-primary mb-1" src="../../_images/leaf_pp.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 829 </span><span class="caption-text">leaf 생성 결과, prompt: “leaf”</span><a class="headerlink" href="#id12" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p>재훈련 결과, fine tuning 이후에도 기존 stable diffusion model로 “leaf”를 생성하였을 때와 비슷한 이미지가 생성됩니다.</p></li>
+</ul>
+<figure class="align-default" id="id13">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/photo_pp.png"><img alt="photo_pp" class="bg-primary mb-1" src="../../_images/photo_pp.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 830 </span><span class="caption-text">photo 생성 결과, prompt: “photo”</span><a class="headerlink" href="#id13" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p>“photo”의 경우에는 여전히 multiple diseases class의 영향을 받은 것같은 이미지들이 생성됩니다. photo의 경우에는 여러 대상들과 사용되는 일반적인 특성을 가지고있어서 그런 것이라는 생각이 들었고, 이를 체크해보기 위해 특정한 대상들과 photo와 비슷한 용도로 사용되는 다른 prompt들로 이미지들을 생성보았습니다.</p></li>
+<li><p>특정한 대상 세가지로는 cat, sea, pirate을 사용했고, photo와 비슷하게 사용되는 텍스트 세가지는 illustration, animation, wallpaper를 사용했습니다. (이미지는 글 마지막 부분의 appendix에 있습니다.)</p></li>
+<li><p>이미지 생성 결과, 특정한 대상을 지칭하는 텍스트의 경우 대상의 특징이 잘 드러나는 이미지가 생성되었지만, 여러 대상과 함께 쓰이는 텍스트의 경우 잎사귀의 특징을 가지는 이미지들이 일부 생성되었습니다.</p></li>
+</ul>
+</section>
+<section id="id2">
+<h2>4. 성능 비교<a class="headerlink" href="#id2" title="Permalink to this heading">#</a></h2>
+<ul class="simple">
+<li><p>fine tuning한 stable diffusion model로 multiple diseases class의 이미지를 400장 생성하여 classifier를 다시 훈련했습니다.</p></li>
+</ul>
+<p>baseline</p>
+<ul class="simple">
+<li><p>전체 accuracy는 97.7%, class별 accuracy는 healthy: 99.6%, multiple diseases: 73.6%, rust: 99.2%, scab: 98.1%</p></li>
+</ul>
+<figure class="align-default" id="id14">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/result_base.png"><img alt="result_base" class="bg-primary mb-1" src="../../_images/result_base.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 831 </span><span class="caption-text">result_base</span><a class="headerlink" href="#id14" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>생성한 이미지를 추가 데이터로 활용한 경우</p>
+<ul class="simple">
+<li><p>전체 accuracy는 97.9%, class별 accuracy는 healthy: 98.1%, multiple diseases: 84.6%, rust: 98.2%, scab: 99.3%</p></li>
+</ul>
+<figure class="align-default" id="id15">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/result_new.png"><img alt="result_new" class="bg-primary mb-1" src="../../_images/result_new.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 832 </span><span class="caption-text">result_now</span><a class="headerlink" href="#id15" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p>kaggle에서 제공하는 test set에 적용했을 때는 baseline이 94.6%, stable diffusion으로 생성한 이미지들을 사용한 경우가 93.7%여서 baseline보다 좋은 성능을 얻지는 못 했습니다.</p></li>
+</ul>
+</section>
+<section id="discussion">
+<h2>5. Discussion<a class="headerlink" href="#discussion" title="Permalink to this heading">#</a></h2>
+<ul class="simple">
+<li><p>stable diffusion 훈련 중간중간에 일정 step마다 이미지를 생성하게해서 훈련에 대한 모니터링이 있으면 좋겠다는 생각을 했습니다.</p></li>
+<li><p>stable diffusion 훈련시 hyperparameter tuning을 좀 더 철저하게 해야겠다는 생각을 했습니다.</p></li>
+<li><p>stable diffusion으로 생성한 이미지가 실제로 multiple diseases class 조건을 만족하는지 검수할 방안이 필요합니다.</p></li>
+<li><p>multiple diseases 내에서도 카테고리를 나눌 수 있다면 나눠서 각각에 대한 stable diffusion model을 fine tuning할 수도 있을 것입니다.</p></li>
+<li><p>다른 diffusion model fine tuning 방법을 활용해볼 수도 있을 것입니다.</p></li>
+<li><p>submission score에서 baseline을 이기지 못 했지만 text-to-image model을 이용한 synthetic data의 가능성을 볼 수 있었다고 생각합니다.</p></li>
+</ul>
+</section>
+<section id="appendix">
+<h2>6. Appendix<a class="headerlink" href="#appendix" title="Permalink to this heading">#</a></h2>
+<ul class="simple">
+<li><p>앞에서 언급한 prompt에 대한 이미지 생성 예시입니다. 일부 이미지는 NSFW로 판단되어 검은색으로 나왔습니다.</p></li>
+</ul>
+<figure class="align-default" id="id16">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/cat.png"><img alt="cat" class="bg-primary mb-1" src="../../_images/cat.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 833 </span><span class="caption-text">cat 생성 결과, prompt: “cat”</span><a class="headerlink" href="#id16" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<figure class="align-default" id="id17">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/sea.png"><img alt="sea" class="bg-primary mb-1" src="../../_images/sea.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 834 </span><span class="caption-text">sea 생성 결과, prompt: “sea”</span><a class="headerlink" href="#id17" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<figure class="align-default" id="id18">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/pirate.png"><img alt="pirate" class="bg-primary mb-1" src="../../_images/pirate.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 835 </span><span class="caption-text">pirate 생성 결과, prompt: “pirate”</span><a class="headerlink" href="#id18" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<figure class="align-default" id="id19">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/illustration.png"><img alt="illustration" class="bg-primary mb-1" src="../../_images/illustration.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 836 </span><span class="caption-text">illustration 생성 결과, prompt: “illustration”</span><a class="headerlink" href="#id19" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<figure class="align-default" id="id20">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/animation.png"><img alt="animation" class="bg-primary mb-1" src="../../_images/animation.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 837 </span><span class="caption-text">animation 생성 결과, prompt: “animation”</span><a class="headerlink" href="#id20" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<figure class="align-default" id="id21">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/wallpaper.png"><img alt="wallpaper" class="bg-primary mb-1" src="../../_images/wallpaper.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 838 </span><span class="caption-text">wallpaper 생성 결과, prompt: “wallpaper”</span><a class="headerlink" href="#id21" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</section>
+</section>
+
+    <script type="text/x-thebe-config">
+    {
+        requestKernel: true,
+        binderOptions: {
+            repo: "binder-examples/jupyter-stacks-datascience",
+            ref: "master",
+        },
+        codeMirrorConfig: {
+            theme: "abcdef",
+            mode: "python"
+        },
+        kernelOptions: {
+            name: "python3",
+            path: "./docs\experiments"
+        },
+        predefinedOutput: true
+    }
+    </script>
+    <script>kernelName = 'python3'</script>
+
+                </article>
+              
+
+              
+              
+              
+              
+                <footer class="prev-next-footer d-print-none">
+                  
+<div class="prev-next-area">
+    <a class="left-prev"
+       href="../review/Coin3D.html"
+       title="previous page">
+      <i class="fa-solid fa-angle-left"></i>
+      <div class="prev-next-info">
+        <p class="prev-next-subtitle">previous</p>
+        <p class="prev-next-title">Coin3D</p>
+      </div>
+    </a>
+    <a class="right-next"
+       href="swjo_exp.html"
+       title="next page">
+      <div class="prev-next-info">
+        <p class="prev-next-subtitle">next</p>
+        <p class="prev-next-title">Training DreamBooth on Naver Webtoon Face Dataset</p>
+      </div>
+      <i class="fa-solid fa-angle-right"></i>
+    </a>
+</div>
+                </footer>
+              
+            </div>
+            
+            
+              
+                <div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
+
+
+  <div class="sidebar-secondary-item">
+  <div class="page-toc tocsection onthispage">
+    <i class="fa-solid fa-list"></i> Contents
+  </div>
+  <nav class="bd-toc-nav page-toc">
+    <ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#id1">1. 개요</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#baseline">2. Baseline 구축</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#stable-diffusion-fine-tuning">3. Stable diffusion fine tuning</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#id2">4. 성능 비교</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#discussion">5. Discussion</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#appendix">6. Appendix</a></li>
+</ul>
+  </nav></div>
+
+</div></div>
+              
+            
+          </div>
+          <footer class="bd-footer-content">
+            
+<div class="bd-footer-content__inner container">
+  
+  <div class="footer-item">
+    
+<p class="component-author">
+By PseudoLab
+</p>
+
+  </div>
+  
+  <div class="footer-item">
+    
+
+  <p class="copyright">
+    
+      © Copyright 2022.
+      <br/>
+    
+  </p>
+
+  </div>
+  
+  <div class="footer-item">
+    
+  </div>
+  
+  <div class="footer-item">
+    
+  </div>
+  
+</div>
+          </footer>
+        
+
+      </main>
+    </div>
+  </div>
+  
+  <!-- Scripts loaded after <body> so the DOM is not blocked -->
+  <script src="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b"></script>
+<script src="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b"></script>
+
+  <footer class="bd-footer">
+  </footer>
+  </body>
 </html>
\ No newline at end of file
diff --git a/docs/experiments/swjo_exp.html b/docs/experiments/swjo_exp.html
old mode 100644
new mode 100755
index c0c3d16c..99fb3875
--- a/docs/experiments/swjo_exp.html
+++ b/docs/experiments/swjo_exp.html
@@ -1,952 +1,972 @@
-
-
-<!DOCTYPE html>
-
-
-<html lang="en" data-content_root="" >
-
-  <head>
-    <meta charset="utf-8" />
-    <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.18.1: http://docutils.sourceforge.net/" />
-
-    <title>Training DreamBooth on Naver Webtoon Face Dataset &#8212; Text-to-Image Generation-feat-Diffusion</title>
-  
-  
-  
-  <script data-cfasync="false">
-    document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
-    document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
-  </script>
-  
-  <!-- Loaded before other Sphinx assets -->
-  <link href="../../_static/styles/theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-<link href="../../_static/styles/bootstrap.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-<link href="../../_static/styles/pydata-sphinx-theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-
-  
-  <link href="../../_static/vendor/fontawesome/6.5.2/css/all.min.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-  <link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.woff2" />
-<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.woff2" />
-<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.woff2" />
-
-    <link rel="stylesheet" type="text/css" href="../../_static/pygments.css" />
-    <link rel="stylesheet" href="../../_static/styles/sphinx-book-theme.css?digest=14f4ca6b54d191a8c7657f6c759bf11a5fb86285" type="text/css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/togglebutton.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/mystnb.4510f1fc1dee50b3e5859aac5469c37c29e427902b24a333a5f9fcb2f0b3ac41.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/sphinx-thebe.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/design-style.4045f2051d55cab465a707391d5b2007.min.css" />
-  
-  <!-- Pre-loaded scripts that we'll load fully later -->
-  <link rel="preload" as="script" href="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b" />
-<link rel="preload" as="script" href="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b" />
-  <script src="../../_static/vendor/fontawesome/6.5.2/js/all.min.js?digest=dfe6caa3a7d634c4db9b"></script>
-
-    <script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
-    <script src="../../_static/jquery.js"></script>
-    <script src="../../_static/underscore.js"></script>
-    <script src="../../_static/_sphinx_javascript_frameworks_compat.js"></script>
-    <script src="../../_static/doctools.js"></script>
-    <script src="../../_static/clipboard.min.js"></script>
-    <script src="../../_static/copybutton.js"></script>
-    <script src="../../_static/scripts/sphinx-book-theme.js?digest=5a5c038af52cf7bc1a1ec88eea08e6366ee68824"></script>
-    <script>let toggleHintShow = 'Click to show';</script>
-    <script>let toggleHintHide = 'Click to hide';</script>
-    <script>let toggleOpenOnPrint = 'true';</script>
-    <script src="../../_static/togglebutton.js"></script>
-    <script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
-    <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
-    <script src="../../_static/design-tabs.js"></script>
-    <script>const THEBE_JS_URL = "https://unpkg.com/thebe@0.8.2/lib/index.js"
-const thebe_selector = ".thebe,.cell"
-const thebe_selector_input = "pre"
-const thebe_selector_output = ".output, .cell_output"
-</script>
-    <script async="async" src="../../_static/sphinx-thebe.js"></script>
-    <script>DOCUMENTATION_OPTIONS.pagename = 'docs/experiments/swjo_exp';</script>
-    <link rel="shortcut icon" href="../../_static/PseudoLab_logo.png"/>
-    <link rel="index" title="Index" href="../../genindex.html" />
-    <link rel="search" title="Search" href="../../search.html" />
-    <link rel="prev" title="Synthetic Data with Stable Diffusion for Foliar Disease Classification" href="js_exp.html" />
-  <meta name="viewport" content="width=device-width, initial-scale=1"/>
-  <meta name="docsearch:language" content="en"/>
-  </head>
-  
-  
-  <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
-
-  
-  
-  <div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
-  
-  <div id="pst-scroll-pixel-helper"></div>
-  
-  <button type="button" class="btn rounded-pill" id="pst-back-to-top">
-    <i class="fa-solid fa-arrow-up"></i>Back to top</button>
-
-  
-  <input type="checkbox"
-          class="sidebar-toggle"
-          id="pst-primary-sidebar-checkbox"/>
-  <label class="overlay overlay-primary" for="pst-primary-sidebar-checkbox"></label>
-  
-  <input type="checkbox"
-          class="sidebar-toggle"
-          id="pst-secondary-sidebar-checkbox"/>
-  <label class="overlay overlay-secondary" for="pst-secondary-sidebar-checkbox"></label>
-  
-  <div class="search-button__wrapper">
-    <div class="search-button__overlay"></div>
-    <div class="search-button__search-container">
-<form class="bd-search d-flex align-items-center"
-      action="../../search.html"
-      method="get">
-  <i class="fa-solid fa-magnifying-glass"></i>
-  <input type="search"
-         class="form-control"
-         name="q"
-         id="search-input"
-         placeholder="Search this book..."
-         aria-label="Search this book..."
-         autocomplete="off"
-         autocorrect="off"
-         autocapitalize="off"
-         spellcheck="false"/>
-  <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
-</form></div>
-  </div>
-
-  <div class="pst-async-banner-revealer d-none">
-  <aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
-</div>
-
-  
-    <header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
-    </header>
-  
-
-  <div class="bd-container">
-    <div class="bd-container__inner bd-page-width">
-      
-      
-      
-      <div class="bd-sidebar-primary bd-sidebar">
-        
-
-  
-  <div class="sidebar-header-items sidebar-primary__section">
-    
-    
-    
-    
-  </div>
-  
-    <div class="sidebar-primary-items__start sidebar-primary__section">
-        <div class="sidebar-primary-item">
-
-  
-    
-  
-
-<a class="navbar-brand logo" href="../../intro.html">
-  
-  
-  
-  
-  
-    
-    
-      
-    
-    
-    <img src="../../_static/PseudoLab_logo.png" class="logo__image only-light" alt="Text-to-Image Generation-feat-Diffusion - Home"/>
-    <script>document.write(`<img src="../../_static/PseudoLab_logo.png" class="logo__image only-dark" alt="Text-to-Image Generation-feat-Diffusion - Home"/>`);</script>
-  
-  
-</a></div>
-        <div class="sidebar-primary-item"><nav class="bd-links" id="bd-docs-nav" aria-label="Main">
-    <div class="bd-toc-item navbar-nav active">
-        
-        <ul class="nav bd-sidenav bd-sidenav__home-link">
-            <li class="toctree-l1">
-                <a class="reference internal" href="../../intro.html">
-                    Welcome to PseudoDiffusers!!
-                </a>
-            </li>
-        </ul>
-        <p aria-level="2" class="caption" role="heading"><span class="caption-text">Preliminary Works</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="../review/vae.html">VAE</a></li>
-<li class="toctree-l1"><a class="reference internal" href="../review/gan.html">GAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="../review/DDPM.html">DDPM</a></li>
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="../review/DDIM.html">DDIM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="../review/A_Study_on_the_Evaluation_of_Generative_Models.html">A Study on the Evaluation of Generative Models</a></li>
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Image Generation</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="../review/cycleGAN.html">CycleGAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="../review/StyleGAN.html">StyleGAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="../review/diffusion_beats_GANs.html">Diffusion Models Beat GANs on Image Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="../review/dalle.html">DALL-E</a></li>
-<li class="toctree-l1"><a class="reference internal" href="../review/DALLE2.html">DALL-E 2</a></li>
-<li class="toctree-l1"><a class="reference internal" href="../review/dreambooth.html">DreamBooth</a></li>
-<li class="toctree-l1"><a class="reference internal" href="../review/ControlNet.html">ControlNet</a></li>
-<li class="toctree-l1"><a class="reference internal" href="../review/Latent_Diffusion_Model.html">Introduction</a></li>
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="../review/Textual_Inversion.html">Textual Inversion</a></li>
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="../review/CustomDiffusion.html">Custom Diffusion</a></li>
-<li class="toctree-l1"><a class="reference internal" href="../review/LoRA.html">LoRA</a></li>
-
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="../review/I-DDPM.html">I-DDPM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="../review/StyO.html">StyO</a></li>
-<li class="toctree-l1"><a class="reference internal" href="../review/imagen.html">Imagen</a></li>
-<li class="toctree-l1"><a class="reference internal" href="../review/imagen_editor.html">Imagen Editor</a></li>
-<li class="toctree-l1"><a class="reference internal" href="../review/SDEdit.html">SDEdit</a></li>
-<li class="toctree-l1"><a class="reference internal" href="../review/SDXL.html">SDXL</a></li>
-<li class="toctree-l1"><a class="reference internal" href="../review/t2i_adapter.html">T2I-Adapter</a></li>
-<li class="toctree-l1"><a class="reference internal" href="../review/IP_Adapter.html">IP-Adapter</a></li>
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="../review/HyperDreamBooth.html">HyperDreamBooth</a></li>
-<li class="toctree-l1"><a class="reference internal" href="../review/CM3leon.html">CM3leon</a></li>
-
-<li class="toctree-l1"><a class="reference internal" href="../review/Synthetic_Data_from_Diffusion_Models_Improves_ImageNet_Classification.html">Synthetic Data from Diffusion Models Improves ImageNet Classification</a></li>
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="../review/GLIDE.html">GLIDE</a></li>
-<li class="toctree-l1"><a class="reference internal" href="../review/BBDM.html">BBDM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="../review/Your_Diffusion_Model_is_Secretly_a_Zero_Shot_Classifier.html">Your Diffusion Model is Secretly a Zero-Shot Classifier</a></li>
-<li class="toctree-l1"><a class="reference internal" href="../review/progressive_distillation.html">Progressive Distillation for Fast Sampling of Diffusion Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="../review/ConceptLab.html">ConceptLab</a></li>
-<li class="toctree-l1"><a class="reference internal" href="../review/Diffusion_models_already_have_a_Semantic_Latent_Space.html">Diffusion Models already have a Semantic Latent Space</a></li>
-<li class="toctree-l1"><a class="reference internal" href="../review/Muse.html">Muse</a></li>
-
-
-<li class="toctree-l1"><a class="reference internal" href="../review/GIGAGAN.html">Scaling up GANs for Text-to-Image Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="../review/consistency_models.html">Consistency Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="../review/latent_consistency_models.html">Latent Consistency Models</a></li>
-
-<li class="toctree-l1"><a class="reference internal" href="../review/LLM_grounded_Diffusion.html">LLM Grounded Diffusion</a></li>
-<li class="toctree-l1"><a class="reference internal" href="../review/DiT.html">DiT</a></li>
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="../review/one-step-image-translation.html">One-Step Image Translation with Text-to-Image Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="../review/LCM-LoRA.html">LCM-LoRA: A Universal Stable-Diffusion Acceleration Module</a></li>
-
-
-<li class="toctree-l1"><a class="reference internal" href="../review/MimicBrush.html">MimicBrush: Zero-shot Image Editing with Reference Imitation</a></li>
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Video Generation</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="../review/Make_A_Video.html">Make A Video</a></li>
-<li class="toctree-l1"><a class="reference internal" href="../review/VideoLDM.html">VideoLDM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="../review/AnimateDiff.html">AnimateDiff</a></li>
-<li class="toctree-l1"><a class="reference internal" href="../review/Animate_Anyone.html">Animate Anyone</a></li>
-<li class="toctree-l1"><a class="reference internal" href="../review/DreaMoving.html">DreaMoving</a></li>
-<li class="toctree-l1"><a class="reference internal" href="../review/DreamPose.html">DreamPose: Fashion Image-to-Video Synthesis via Stable Diffusion</a></li>
-
-
-
-
-
-
-
-
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">3D Generation</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="../review/NeRF.html">NeRF : Representing Scenes as Neural Radiance Fields for View Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="../review/3DGS.html">3D Gaussian Splatting for Real-Time Radiance Field Rendering</a></li>
-<li class="toctree-l1"><a class="reference internal" href="../review/Point_E.html">Point-E: A System for Generating 3D Point Clouds from Complex Prompts (Arxiv 2022)</a></li>
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="../review/Shap-E.html">Shap-E</a></li>
-
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="../review/DreamFusion.html"><strong>DreamFusion</strong></a></li>
-<li class="toctree-l1"><a class="reference internal" href="../review/magic-3d.html">Magic3D</a></li>
-<li class="toctree-l1"><a class="reference internal" href="../review/DreamBooth3D.html">Dream Booth 3D</a></li>
-
-
-
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Experiments</span></p>
-<ul class="current nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="js_exp.html">Synthetic Data with Stable Diffusion for Foliar Disease Classification</a></li>
-<li class="toctree-l1 current active"><a class="current reference internal" href="#">Training DreamBooth on Naver Webtoon Face Dataset</a></li>
-</ul>
-
-    </div>
-</nav></div>
-    </div>
-  
-  
-  <div class="sidebar-primary-items__end sidebar-primary__section">
-  </div>
-  
-  <div id="rtd-footer-container"></div>
-
-
-      </div>
-      
-      <main id="main-content" class="bd-main" role="main">
-        
-        
-
-<div class="sbt-scroll-pixel-helper"></div>
-
-          <div class="bd-content">
-            <div class="bd-article-container">
-              
-              <div class="bd-header-article d-print-none">
-<div class="header-article-items header-article__inner">
-  
-    <div class="header-article-items__start">
-      
-        <div class="header-article-item"><label class="sidebar-toggle primary-toggle btn btn-sm" for="__primary" title="Toggle primary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
-  <span class="fa-solid fa-bars"></span>
-</label></div>
-      
-    </div>
-  
-  
-    <div class="header-article-items__end">
-      
-        <div class="header-article-item">
-
-<div class="article-header-buttons">
-
-
-
-
-
-<div class="dropdown dropdown-source-buttons">
-  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Source repositories">
-    <i class="fab fa-github"></i>
-  </button>
-  <ul class="dropdown-menu">
-      
-      
-      
-      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion" target="_blank"
-   class="btn btn-sm btn-source-repository-button dropdown-item"
-   title="Source repository"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fab fa-github"></i>
-  </span>
-<span class="btn__text-container">Repository</span>
-</a>
-</li>
-      
-      
-      
-      
-      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion/issues/new?title=Issue%20on%20page%20%2Fdocs/experiments/swjo_exp.html&body=Your%20issue%20content%20here." target="_blank"
-   class="btn btn-sm btn-source-issues-button dropdown-item"
-   title="Open an issue"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-lightbulb"></i>
-  </span>
-<span class="btn__text-container">Open issue</span>
-</a>
-</li>
-      
-  </ul>
-</div>
-
-
-
-
-
-
-<div class="dropdown dropdown-download-buttons">
-  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Download this page">
-    <i class="fas fa-download"></i>
-  </button>
-  <ul class="dropdown-menu">
-      
-      
-      
-      <li><a href="../../_sources/docs/experiments/swjo_exp.md" target="_blank"
-   class="btn btn-sm btn-download-source-button dropdown-item"
-   title="Download source file"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-file"></i>
-  </span>
-<span class="btn__text-container">.md</span>
-</a>
-</li>
-      
-      
-      
-      
-      <li>
-<button onclick="window.print()"
-  class="btn btn-sm btn-download-pdf-button dropdown-item"
-  title="Print to PDF"
-  data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-file-pdf"></i>
-  </span>
-<span class="btn__text-container">.pdf</span>
-</button>
-</li>
-      
-  </ul>
-</div>
-
-
-
-
-<button onclick="toggleFullScreen()"
-  class="btn btn-sm btn-fullscreen-button"
-  title="Fullscreen mode"
-  data-bs-placement="bottom" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-expand"></i>
-  </span>
-
-</button>
-
-
-
-<script>
-document.write(`
-  <button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light"></i>
-    <i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark"></i>
-    <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"></i>
-  </button>
-`);
-</script>
-
-
-<script>
-document.write(`
-  <button class="btn btn-sm pst-navbar-icon search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <i class="fa-solid fa-magnifying-glass fa-lg"></i>
-  </button>
-`);
-</script>
-<label class="sidebar-toggle secondary-toggle btn btn-sm" for="__secondary"title="Toggle secondary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <span class="fa-solid fa-list"></span>
-</label>
-</div></div>
-      
-    </div>
-  
-</div>
-</div>
-              
-              
-
-<div id="jb-print-docs-body" class="onlyprint">
-    <h1>Training DreamBooth on Naver Webtoon Face Dataset</h1>
-    <!-- Table of contents -->
-    <div id="print-main-content">
-        <div id="jb-print-toc">
-            
-            <div>
-                <h2> Contents </h2>
-            </div>
-            <nav aria-label="Page">
-                <ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">Introduction</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#ablation-studies">Ablation Studies</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#prior-preservation-loss">Prior Preservation Loss</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#negative-prompt">Negative Prompt</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#instance-prompt-guidance-scale">Instance Prompt / Guidance Scale</a></li>
-</ul>
-</li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#appendix">Appendix</a></li>
-</ul>
-            </nav>
-        </div>
-    </div>
-</div>
-
-              
-                
-<div id="searchbox"></div>
-                <article class="bd-article">
-                  
-  <div class="admonition-information admonition">
-<p class="admonition-title">Information</p>
-<ul class="simple">
-<li><p><strong>Title:</strong> Training DreamBooth on Naver Webtoon Face Dataset</p></li>
-<li><p><strong>Author:</strong> Sangwoo Jo</p></li>
-<li><p><strong>Last updated on Jul. 09, 2023</strong></p></li>
-</ul>
-</div>
-<section class="tex2jax_ignore mathjax_ignore" id="training-dreambooth-on-naver-webtoon-face-dataset">
-<h1>Training DreamBooth on Naver Webtoon Face Dataset<a class="headerlink" href="#training-dreambooth-on-naver-webtoon-face-dataset" title="Permalink to this heading">#</a></h1>
-<section id="introduction">
-<h2>Introduction<a class="headerlink" href="#introduction" title="Permalink to this heading">#</a></h2>
-<p>이번 포스팅에서는 DreamBooth 를 직접 학습해보고 실험한 결과들을 공유할려고 합니다.</p>
-<p>우선적으로 학습데이터는 <a class="github reference external" href="https://github.com/bryandlee/naver-webtoon-data">bryandlee/naver-webtoon-data</a> 에 공개된 YOLOv5 모델 및 Waifu2x 후처리 기법을 활용하여 프리드로우에 등장하는 인물 사진들을 수집했습니다. 논문에서는 3-5 장으로 fine-tuning 이 가능하다고 제시되어있지만, 인물 사진 같은 경우 더 많은 데이터로 학습하면 성능이 더 좋아져서 15-20 장의 이미지로 학습하였습니다. 학습한 이미지들 예시입니다.</p>
-<figure class="align-default" id="id1">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/swjo_exp_01.png"><img alt="swjo_exp_01" class="bg-primary mb-1" src="../../_images/swjo_exp_01.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 749 </span><span class="caption-text">Training Data</span><a class="headerlink" href="#id1" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>DreamBooth 를 실험하면서 대표적으로 instance prompt, guidance scale, negative prompt, 그리고 마지막으로 prior preservation loss 를 반영하는 정도를 조절하는 prior_loss_weight 를 바꿔가면서 학습해보았습니다. 사전학습된 text-to-image 모델로 처음에는 <em>hakurei/waifu-diffusion</em> 모델을 시도해봤지만 결과가 만족스럽지 못해 <em>runwayml/stable-diffusion-v1-5</em> 모델로 fine-tuning 작업을 진행했습니다.</p>
-</section>
-<section id="ablation-studies">
-<h2>Ablation Studies<a class="headerlink" href="#ablation-studies" title="Permalink to this heading">#</a></h2>
-<section id="prior-preservation-loss">
-<h3>Prior Preservation Loss<a class="headerlink" href="#prior-preservation-loss" title="Permalink to this heading">#</a></h3>
-<p>Prior Preservation Loss 를 제외한 동일한 configuration 으로 모델 학습한 결과입니다.</p>
-<div class="highlight-default notranslate"><div class="highlight"><pre><span></span># with prior-preservation loss
-MODEL_NAME = “runwayml/stable-diffusion-v1-5”
-instance_prompt = &quot;A photo of sks girl&quot;
-class_prompt = &quot;A photo of a girl&quot;
-
-python3 train_dreambooth.py \
-  --pretrained_model_name_or_path=$MODEL_NAME \
-  --pretrained_vae_name_or_path=&quot;stabilityai/sd-vae-ft-mse&quot; \
-  --output_dir=$OUTPUT_DIR \
-  --revision=&quot;fp16&quot; \
-  --with_prior_preservation --prior_loss_weight=1.0 \
-  --seed=1337 \
-  --resolution=512 \
-  --train_batch_size=1 \
-  --train_text_encoder \
-  --mixed_precision=&quot;fp16&quot; \
-  --use_8bit_adam \
-  --gradient_accumulation_steps=1 --gradient_checkpointing \
-  --learning_rate=1e-6 \
-  --lr_scheduler=&quot;constant&quot; \
-  --lr_warmup_steps=0 \
-  --num_class_images=200 \
-  --sample_batch_size=4 \
-  --max_train_steps=800 \
-  --save_interval=100 \
-  --save_sample_prompt=&quot;A photo of sks girl&quot; \
-  --concepts_list=&quot;concepts_list.json&quot;
-</pre></div>
-</div>
-<div class="highlight-default notranslate"><div class="highlight"><pre><span></span># w/o prior-preservation loss
-MODEL_NAME = “runwayml/stable-diffusion-v1-5”
-instance_prompt = &quot;A photo of sks girl&quot;
-class_prompt = &quot;A photo of a girl&quot;
-
-python3 train_dreambooth.py \
-  --pretrained_model_name_or_path=$MODEL_NAME \
-  --pretrained_vae_name_or_path=&quot;stabilityai/sd-vae-ft-mse&quot; \
-  --output_dir=$OUTPUT_DIR \
-  --revision=&quot;fp16&quot; \
-  --with_prior_preservation --prior_loss_weight=0.0 \
-  --seed=1337 \
-  --resolution=512 \
-  --train_batch_size=1 \
-  --train_text_encoder \
-  --mixed_precision=&quot;fp16&quot; \
-  --use_8bit_adam \
-  --gradient_accumulation_steps=1 --gradient_checkpointing \
-  --learning_rate=1e-6 \
-  --lr_scheduler=&quot;constant&quot; \
-  --lr_warmup_steps=0 \
-  --num_class_images=200 \
-  --sample_batch_size=4 \
-  --max_train_steps=800 \
-  --save_interval=100 \
-  --save_sample_prompt=&quot;A photo of sks girl&quot; \
-  --concepts_list=&quot;concepts_list.json&quot;
-</pre></div>
-</div>
-<p>아래 그림처럼 동일한 inference prompt 를 입력했을 때, prior preservation loss 를 제외함으로써 input images 에 더 가까운 웹툰 사진들을 생성할 수 있었습니다. 또한, 핑크색 머리를 한 이민지 캐릭터를 어느 정도 잘 생성하는 부분도 확인할 수 있습니다.</p>
-<ul class="simple">
-<li><p><strong>Inference Prompt: “A photo of <em>sks</em> girl with pink hair” (with prior-preservation loss)</strong></p></li>
-</ul>
-<figure class="align-default" id="id2">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/swjo_exp_02.png"><img alt="swjo_exp_02" class="bg-primary mb-1" src="../../_images/swjo_exp_02.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 750 </span><span class="caption-text">With Prior Preservation Loss</span><a class="headerlink" href="#id2" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p><strong>Inference Prompt: ” A photo of <em>sks</em> girl with pink hair” (w/o prior-preservation loss)</strong></p></li>
-</ul>
-<figure class="align-default" id="id3">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/swjo_exp_03.png"><img alt="swjo_exp_03" class="bg-primary mb-1" src="../../_images/swjo_exp_03.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 751 </span><span class="caption-text">Without Prior Preservation Loss</span><a class="headerlink" href="#id3" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</section>
-<section id="negative-prompt">
-<h3>Negative Prompt<a class="headerlink" href="#negative-prompt" title="Permalink to this heading">#</a></h3>
-<p>Negative Prompt 에 대한 Ablation Study 도 진행했습니다. 캐릭터의 부자연스러운 부분이나 저해상도 이미지들을 생성하는 경우들이 종종 발생했는데, negative prompt 를 통해 더 좋은 퀄리티의 웹툰 캐릭터를 생성할 수 있었습니다.</p>
-<ul class="simple">
-<li><p><strong>Inference Prompt: ” A photo of <em>sks</em> girl with pink hair” (w/o negative prompt)</strong></p></li>
-</ul>
-<figure class="align-default" id="id4">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/swjo_exp_03.png"><img alt="swjo_exp_03" class="bg-primary mb-1" src="../../_images/swjo_exp_03.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 752 </span><span class="caption-text">Without Negative Prompt</span><a class="headerlink" href="#id4" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul>
-<li><p><strong>Inference Prompt: ” A photo of <em>sks</em> girl with pink hair”</strong></p>
-<p><strong>+</strong> <strong>Negative Prompt: “ugly, disfigured, deformed, low resolution”</strong></p>
-</li>
-</ul>
-<figure class="align-default" id="id5">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/swjo_exp_04.png"><img alt="swjo_exp_04" class="bg-primary mb-1" src="../../_images/swjo_exp_04.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 753 </span><span class="caption-text">With Negative Prompt</span><a class="headerlink" href="#id5" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</section>
-<section id="instance-prompt-guidance-scale">
-<h3>Instance Prompt / Guidance Scale<a class="headerlink" href="#instance-prompt-guidance-scale" title="Permalink to this heading">#</a></h3>
-<p>DreamBooth 논문에서 제시한 instance prompt 외에 “A photo of a girl in the style of <em>sks</em>” 라는 prompt 로 학습을 시도해보기도 했습니다. <em>sks</em> 라는 unique identifier 에 특정 여자 캐릭터에 대한 정보뿐만 아니라 프리드로우 그림체 자체를 담아내기 위한 목적이였습니다.</p>
-<div class="highlight-default notranslate"><div class="highlight"><pre><span></span># different instance prompt with prior-preservation loss
-MODEL_NAME = “runwayml/stable-diffusion-v1-5”
-instance_prompt = &quot;A photo of a girl in the style of sks&quot;
-class_prompt = &quot;A photo of a girl&quot;
-
-python3 train_dreambooth.py \
-  --pretrained_model_name_or_path=$MODEL_NAME \
-  --pretrained_vae_name_or_path=&quot;stabilityai/sd-vae-ft-mse&quot; \
-  --output_dir=$OUTPUT_DIR \
-  --revision=&quot;fp16&quot; \
-  --with_prior_preservation --prior_loss_weight=1.0 \
-  --seed=1337 \
-  --resolution=512 \
-  --train_batch_size=1 \
-  --train_text_encoder \
-  --mixed_precision=&quot;fp16&quot; \
-  --use_8bit_adam \
-  --gradient_accumulation_steps=1 --gradient_checkpointing \
-  --learning_rate=1e-6 \
-  --lr_scheduler=&quot;constant&quot; \
-  --lr_warmup_steps=0 \
-  --num_class_images=200 \
-  --sample_batch_size=4 \
-  --max_train_steps=800 \
-  --save_interval=100 \
-  --save_sample_prompt=&quot;A photo of sks girl&quot; \
-  --concepts_list=&quot;concepts_list.json&quot;
-</pre></div>
-</div>
-<div class="highlight-default notranslate"><div class="highlight"><pre><span></span># different instance prompt w/o prior-preservation loss
-MODEL_NAME = “runwayml/stable-diffusion-v1-5”
-instance_prompt = &quot;A photo of a girl in the style of sks&quot;
-class_prompt = &quot;A photo of a girl&quot;
-
-python3 train_dreambooth.py \
-  --pretrained_model_name_or_path=$MODEL_NAME \
-  --pretrained_vae_name_or_path=&quot;stabilityai/sd-vae-ft-mse&quot; \
-  --output_dir=$OUTPUT_DIR \
-  --revision=&quot;fp16&quot; \
-  --with_prior_preservation --prior_loss_weight=0.0 \
-  --seed=1337 \
-  --resolution=512 \
-  --train_batch_size=1 \
-  --train_text_encoder \
-  --mixed_precision=&quot;fp16&quot; \
-  --use_8bit_adam \
-  --gradient_accumulation_steps=1 --gradient_checkpointing \
-  --learning_rate=1e-6 \
-  --lr_scheduler=&quot;constant&quot; \
-  --lr_warmup_steps=0 \
-  --num_class_images=200 \
-  --sample_batch_size=4 \
-  --max_train_steps=800 \
-  --save_interval=100 \
-  --save_sample_prompt=&quot;A photo of sks girl&quot; \
-  --concepts_list=&quot;concepts_list.json&quot;
-</pre></div>
-</div>
-<p>Inference 시, 프리드로우의 그림체가 반영된 남자가 생성되도록 prompt 를 “A photo of a boy in the style of <em>sks</em>” 로 입력했을때의 결과입니다. DreamBooth 혹은 사전학습된 text-to-image 모델을 프리드로우 작가님의 웹툰 장면들로 전체적으로 학습하게 된다면 더 다양한 inference 결과들을 볼 수 있을 것 같습니다.</p>
-<ul>
-<li><p><strong>Inference Prompt: “A photo of a boy in the style of <em>sks</em>” (num_inference_steps = 24 / with prior-preservation loss)</strong></p>
-<p><strong>+</strong> <strong>Negative Prompt: “ugly, disfigured, deformed, low resolution”</strong></p>
-</li>
-</ul>
-<figure class="align-default" id="id6">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/swjo_exp_05.png"><img alt="swjo_exp_05" class="bg-primary mb-1" src="../../_images/swjo_exp_05.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 754 </span><span class="caption-text">Instance Prompt</span><a class="headerlink" href="#id6" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>Inference step 을 늘려가면서 추론된 인물 이미지의 퀄리티가 상승하는 부분도 확인할 수 있었습니다. 또한, guidance scale 에 대한 실험도 진행했는데 guidance scale 이 작을수록 prompt 와 무관한 random 한 이미지들을 생성하게 됩니다. 최종적으로 num_inference steps 와 guidance scale 의 값은 각각 100 과 7.5 로 설정하였습니다.</p>
-<ul class="simple">
-<li><p><strong>Inference Prompt: “A photo of a boy in the style of <em>sks</em>” (num_inference_steps=100 / with prior-preservation loss)</strong></p></li>
-</ul>
-<figure class="align-default" id="id7">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/swjo_exp_06.png"><img alt="swjo_exp_06" class="bg-primary mb-1" src="../../_images/swjo_exp_06.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 755 </span><span class="caption-text">Increasing Number of Inference Steps</span><a class="headerlink" href="#id7" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul>
-<li><p><strong>Inference Prompt: “A photo of a boy in the style of <em>sks</em>” (num_inference_steps = 100 / with prior-preservation loss)</strong></p>
-<p><strong>+</strong> <strong>Negative Prompt: “ugly, disfigured, deformed, low resolution”</strong></p>
-</li>
-</ul>
-<figure class="align-default" id="id8">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/swjo_exp_07.png"><img alt="swjo_exp_07" class="bg-primary mb-1" src="../../_images/swjo_exp_07.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 756 </span><span class="caption-text">Increasing Number of Inference Steps / Negative Prompt</span><a class="headerlink" href="#id8" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul>
-<li><p><strong>Inference Prompt: “A photo of a boy in the style of <em>sks</em>” (num_inference_steps = 100 / with prior-preservation loss)</strong></p>
-<p><strong>+</strong> <strong>Negative Prompt: “ugly, disfigured, deformed, low resolution”</strong></p>
-<p><strong>+ guidance_scale = 4</strong></p>
-</li>
-</ul>
-<figure class="align-default" id="id9">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/swjo_exp_08.png"><img alt="swjo_exp_08" class="bg-primary mb-1" src="../../_images/swjo_exp_08.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 757 </span><span class="caption-text">Guidance Scale</span><a class="headerlink" href="#id9" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>동일한 inference prompt 로 prior-preservation loss 를 제외해본 결과, 생성된 남자의 머리카락이 더 길어지고 더 여성스러운 생김새를 가지는 놀라운 사실도 발견했습니다.</p>
-<ul>
-<li><p><strong>Inference Prompt: “A photo of a boy in the style of <em>sks</em>” (num_inference_steps = 100 / w/o prior-preservation loss)</strong></p>
-<p><strong>+</strong> <strong>Negative Prompt: “ugly, disfigured, deformed, low resolution”</strong></p>
-</li>
-</ul>
-<figure class="align-default" id="id10">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/swjo_exp_09.png"><img alt="swjo_exp_09" class="bg-primary mb-1" src="../../_images/swjo_exp_09.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 758 </span><span class="caption-text">Without Prior Preservation Loss</span><a class="headerlink" href="#id10" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</section>
-</section>
-<section id="appendix">
-<h2>Appendix<a class="headerlink" href="#appendix" title="Permalink to this heading">#</a></h2>
-<p>그 외 다양한 inference prompt 에 따른 재미있는 실험결과들을 공유합니다. 아직 손의 모양을 text-to-image 모델이 생성하지 못하는 부분도 재차 확인할 수 있었습니다.</p>
-<ul>
-<li><p><strong>Inference Prompt: “A photo of a boy climbing up the mountain in the style of <em>sks</em>” (num_inference_steps = 100 / w/o prior-preservation loss)</strong></p>
-<p><strong>+</strong> <strong>Negative Prompt: “ugly, disfigured, deformed, low resolution”</strong></p>
-</li>
-</ul>
-<figure class="align-default" id="id11">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/swjo_exp_10.png"><img alt="swjo_exp_10" class="bg-primary mb-1" src="../../_images/swjo_exp_10.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 759 </span><span class="caption-text">Appendix 1</span><a class="headerlink" href="#id11" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul>
-<li><p><strong>Inference Prompt: “A painting of a boy in the style of <em>sks</em>” (num_inference_steps = 100 / w/o prior-preservation loss)</strong></p>
-<p><strong>+</strong> <strong>Negative Prompt: “ugly, disfigured, deformed, low resolution”</strong></p>
-</li>
-</ul>
-<figure class="align-default" id="id12">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/swjo_exp_11.png"><img alt="swjo_exp_11" class="bg-primary mb-1" src="../../_images/swjo_exp_11.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 760 </span><span class="caption-text">Appendix 2</span><a class="headerlink" href="#id12" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul>
-<li><p><strong>Inference Prompt: “A hand drawing of a boy in the style of <em>sks</em>” (num_inference_steps = 100 / w/o prior-preservation loss)</strong></p>
-<p><strong>+</strong> <strong>Negative Prompt: “ugly, disfigured, deformed, low resolution”</strong></p>
-</li>
-</ul>
-<figure class="align-default" id="id13">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/swjo_exp_12.png"><img alt="swjo_exp_12" class="bg-primary mb-1" src="../../_images/swjo_exp_12.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 761 </span><span class="caption-text">Appendix 3</span><a class="headerlink" href="#id13" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>마지막으로 하단의 좌측과 우측 사진은 각각 “A photo of <em>sks</em> girl” 그리고 “A photo of a girl in the style of <em>sks</em>” 이라는 prompt 로 DreamBooth 모델을 각각 학습한 후, 나비를 생성하라는 동일한 prompt 로 추론해본 결과입니다. <em>sks</em> 가 수식하는 명사가 girl 이 아닌 style 이도록 prompt 를 수정함으로써, butterfly 사진을 생성할때 조금이나마 더 프리드로우 웹툰의 그림체를 반영할 수 있었던 부분도 확인할 수 있었습니다.</p>
-<ul class="simple">
-<li><p><strong>Inference Prompt: “A photo of a butterfly in the style of <em>sks</em>” (num_inference_steps = 100 / with prior-preservation loss)</strong></p></li>
-</ul>
-<figure class="align-default" id="id14">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/swjo_exp_13.png"><img alt="swjo_exp_13" class="bg-primary mb-1" src="../../_images/swjo_exp_13.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 762 </span><span class="caption-text">Appendix 4</span><a class="headerlink" href="#id14" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</section>
-</section>
-
-    <script type="text/x-thebe-config">
-    {
-        requestKernel: true,
-        binderOptions: {
-            repo: "binder-examples/jupyter-stacks-datascience",
-            ref: "master",
-        },
-        codeMirrorConfig: {
-            theme: "abcdef",
-            mode: "python"
-        },
-        kernelOptions: {
-            name: "python3",
-            path: "./docs/experiments"
-        },
-        predefinedOutput: true
-    }
-    </script>
-    <script>kernelName = 'python3'</script>
-
-                </article>
-              
-
-              
-              
-              
-              
-                <footer class="prev-next-footer d-print-none">
-                  
-<div class="prev-next-area">
-    <a class="left-prev"
-       href="js_exp.html"
-       title="previous page">
-      <i class="fa-solid fa-angle-left"></i>
-      <div class="prev-next-info">
-        <p class="prev-next-subtitle">previous</p>
-        <p class="prev-next-title">Synthetic Data with Stable Diffusion for Foliar Disease Classification</p>
-      </div>
-    </a>
-</div>
-                </footer>
-              
-            </div>
-            
-            
-              
-                <div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
-
-
-  <div class="sidebar-secondary-item">
-  <div class="page-toc tocsection onthispage">
-    <i class="fa-solid fa-list"></i> Contents
-  </div>
-  <nav class="bd-toc-nav page-toc">
-    <ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">Introduction</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#ablation-studies">Ablation Studies</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#prior-preservation-loss">Prior Preservation Loss</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#negative-prompt">Negative Prompt</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#instance-prompt-guidance-scale">Instance Prompt / Guidance Scale</a></li>
-</ul>
-</li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#appendix">Appendix</a></li>
-</ul>
-  </nav></div>
-
-</div></div>
-              
-            
-          </div>
-          <footer class="bd-footer-content">
-            
-<div class="bd-footer-content__inner container">
-  
-  <div class="footer-item">
-    
-<p class="component-author">
-By PseudoLab
-</p>
-
-  </div>
-  
-  <div class="footer-item">
-    
-
-  <p class="copyright">
-    
-      © Copyright 2022.
-      <br/>
-    
-  </p>
-
-  </div>
-  
-  <div class="footer-item">
-    
-  </div>
-  
-  <div class="footer-item">
-    
-  </div>
-  
-</div>
-          </footer>
-        
-
-      </main>
-    </div>
-  </div>
-  
-  <!-- Scripts loaded after <body> so the DOM is not blocked -->
-  <script src="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b"></script>
-<script src="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b"></script>
-
-  <footer class="bd-footer">
-  </footer>
-  </body>
+
+
+<!DOCTYPE html>
+
+
+<html lang="en" data-content_root="" >
+
+  <head>
+    <meta charset="utf-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.17.1: http://docutils.sourceforge.net/" />
+
+    <title>Training DreamBooth on Naver Webtoon Face Dataset &#8212; Text-to-Image Generation-feat-Diffusion</title>
+  
+  
+  
+  <script data-cfasync="false">
+    document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
+    document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
+  </script>
+  
+  <!-- Loaded before other Sphinx assets -->
+  <link href="../../_static/styles/theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+<link href="../../_static/styles/bootstrap.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+<link href="../../_static/styles/pydata-sphinx-theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+
+  
+  <link href="../../_static/vendor/fontawesome/6.5.2/css/all.min.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+  <link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.woff2" />
+<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.woff2" />
+<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.woff2" />
+
+    <link rel="stylesheet" type="text/css" href="../../_static/pygments.css" />
+    <link rel="stylesheet" href="../../_static/styles/sphinx-book-theme.css?digest=14f4ca6b54d191a8c7657f6c759bf11a5fb86285" type="text/css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/togglebutton.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/mystnb.4510f1fc1dee50b3e5859aac5469c37c29e427902b24a333a5f9fcb2f0b3ac41.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/sphinx-thebe.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/design-style.4045f2051d55cab465a707391d5b2007.min.css" />
+  
+  <!-- Pre-loaded scripts that we'll load fully later -->
+  <link rel="preload" as="script" href="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b" />
+<link rel="preload" as="script" href="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b" />
+  <script src="../../_static/vendor/fontawesome/6.5.2/js/all.min.js?digest=dfe6caa3a7d634c4db9b"></script>
+
+    <script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
+    <script src="../../_static/jquery.js"></script>
+    <script src="../../_static/underscore.js"></script>
+    <script src="../../_static/_sphinx_javascript_frameworks_compat.js"></script>
+    <script src="../../_static/doctools.js"></script>
+    <script src="../../_static/clipboard.min.js"></script>
+    <script src="../../_static/copybutton.js"></script>
+    <script src="../../_static/scripts/sphinx-book-theme.js?digest=5a5c038af52cf7bc1a1ec88eea08e6366ee68824"></script>
+    <script>let toggleHintShow = 'Click to show';</script>
+    <script>let toggleHintHide = 'Click to hide';</script>
+    <script>let toggleOpenOnPrint = 'true';</script>
+    <script src="../../_static/togglebutton.js"></script>
+    <script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
+    <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
+    <script src="../../_static/design-tabs.js"></script>
+    <script>const THEBE_JS_URL = "https://unpkg.com/thebe@0.8.2/lib/index.js"
+const thebe_selector = ".thebe,.cell"
+const thebe_selector_input = "pre"
+const thebe_selector_output = ".output, .cell_output"
+</script>
+    <script async="async" src="../../_static/sphinx-thebe.js"></script>
+    <script>DOCUMENTATION_OPTIONS.pagename = 'docs/experiments/swjo_exp';</script>
+    <link rel="shortcut icon" href="../../_static/PseudoLab_logo.png"/>
+    <link rel="index" title="Index" href="../../genindex.html" />
+    <link rel="search" title="Search" href="../../search.html" />
+    <link rel="prev" title="Synthetic Data with Stable Diffusion for Foliar Disease Classification" href="js_exp.html" />
+  <meta name="viewport" content="width=device-width, initial-scale=1"/>
+  <meta name="docsearch:language" content="en"/>
+  </head>
+  
+  
+  <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
+
+  
+  
+  <div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
+  
+  <div id="pst-scroll-pixel-helper"></div>
+  
+  <button type="button" class="btn rounded-pill" id="pst-back-to-top">
+    <i class="fa-solid fa-arrow-up"></i>Back to top</button>
+
+  
+  <input type="checkbox"
+          class="sidebar-toggle"
+          id="pst-primary-sidebar-checkbox"/>
+  <label class="overlay overlay-primary" for="pst-primary-sidebar-checkbox"></label>
+  
+  <input type="checkbox"
+          class="sidebar-toggle"
+          id="pst-secondary-sidebar-checkbox"/>
+  <label class="overlay overlay-secondary" for="pst-secondary-sidebar-checkbox"></label>
+  
+  <div class="search-button__wrapper">
+    <div class="search-button__overlay"></div>
+    <div class="search-button__search-container">
+<form class="bd-search d-flex align-items-center"
+      action="../../search.html"
+      method="get">
+  <i class="fa-solid fa-magnifying-glass"></i>
+  <input type="search"
+         class="form-control"
+         name="q"
+         id="search-input"
+         placeholder="Search this book..."
+         aria-label="Search this book..."
+         autocomplete="off"
+         autocorrect="off"
+         autocapitalize="off"
+         spellcheck="false"/>
+  <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
+</form></div>
+  </div>
+
+  <div class="pst-async-banner-revealer d-none">
+  <aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
+</div>
+
+  
+    <header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
+    </header>
+  
+
+  <div class="bd-container">
+    <div class="bd-container__inner bd-page-width">
+      
+      
+      
+      <div class="bd-sidebar-primary bd-sidebar">
+        
+
+  
+  <div class="sidebar-header-items sidebar-primary__section">
+    
+    
+    
+    
+  </div>
+  
+    <div class="sidebar-primary-items__start sidebar-primary__section">
+        <div class="sidebar-primary-item">
+
+  
+    
+  
+
+<a class="navbar-brand logo" href="../../intro.html">
+  
+  
+  
+  
+  
+    
+    
+      
+    
+    
+    <img src="../../_static/PseudoLab_logo.png" class="logo__image only-light" alt="Text-to-Image Generation-feat-Diffusion - Home"/>
+    <script>document.write(`<img src="../../_static/PseudoLab_logo.png" class="logo__image only-dark" alt="Text-to-Image Generation-feat-Diffusion - Home"/>`);</script>
+  
+  
+</a></div>
+        <div class="sidebar-primary-item"><nav class="bd-links" id="bd-docs-nav" aria-label="Main">
+    <div class="bd-toc-item navbar-nav active">
+        
+        <ul class="nav bd-sidenav bd-sidenav__home-link">
+            <li class="toctree-l1">
+                <a class="reference internal" href="../../intro.html">
+                    Welcome to PseudoDiffusers!!
+                </a>
+            </li>
+        </ul>
+        <p aria-level="2" class="caption" role="heading"><span class="caption-text">Preliminary Works</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="../review/vae.html">VAE</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../review/gan.html">GAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../review/DDPM.html">DDPM</a></li>
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="../review/DDIM.html">DDIM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../review/A_Study_on_the_Evaluation_of_Generative_Models.html">A Study on the Evaluation of Generative Models</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Image Generation</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="../review/cycleGAN.html">CycleGAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../review/StyleGAN.html">StyleGAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../review/diffusion_beats_GANs.html">Diffusion Models Beat GANs on Image Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../review/dalle.html">DALL-E</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../review/DALLE2.html">DALL-E 2</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../review/dreambooth.html">DreamBooth</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../review/ControlNet.html">ControlNet</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../review/Latent_Diffusion_Model.html">Introduction</a></li>
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="../review/Textual_Inversion.html">Textual Inversion</a></li>
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="../review/CustomDiffusion.html">Custom Diffusion</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../review/LoRA.html">LoRA</a></li>
+
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="../review/I-DDPM.html">I-DDPM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../review/StyO.html">StyO</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../review/imagen.html">Imagen</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../review/imagen_editor.html">Imagen Editor</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../review/SDEdit.html">SDEdit</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../review/SDXL.html">SDXL</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../review/t2i_adapter.html">T2I-Adapter</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../review/IP_Adapter.html">IP-Adapter</a></li>
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="../review/HyperDreamBooth.html">HyperDreamBooth</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../review/CM3leon.html">CM3leon</a></li>
+
+<li class="toctree-l1"><a class="reference internal" href="../review/Synthetic_Data_from_Diffusion_Models_Improves_ImageNet_Classification.html">Synthetic Data from Diffusion Models Improves ImageNet Classification</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="../review/GLIDE.html">GLIDE</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../review/BBDM.html">BBDM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../review/Your_Diffusion_Model_is_Secretly_a_Zero_Shot_Classifier.html">Your Diffusion Model is Secretly a Zero-Shot Classifier</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../review/progressive_distillation.html">Progressive Distillation for Fast Sampling of Diffusion Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../review/ConceptLab.html">ConceptLab</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../review/Diffusion_models_already_have_a_Semantic_Latent_Space.html">Diffusion Models already have a Semantic Latent Space</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../review/Muse.html">Muse</a></li>
+
+
+<li class="toctree-l1"><a class="reference internal" href="../review/GIGAGAN.html">Scaling up GANs for Text-to-Image Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../review/consistency_models.html">Consistency Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../review/latent_consistency_models.html">Latent Consistency Models</a></li>
+
+<li class="toctree-l1"><a class="reference internal" href="../review/LLM_grounded_Diffusion.html">LLM Grounded Diffusion</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../review/DiT.html">DiT</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="../review/one-step-image-translation.html">One-Step Image Translation with Text-to-Image Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../review/LCM-LoRA.html">LCM-LoRA: A Universal Stable-Diffusion Acceleration Module</a></li>
+
+
+<li class="toctree-l1"><a class="reference internal" href="../review/MimicBrush.html">MimicBrush: Zero-shot Image Editing with Reference Imitation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../review/one_step_diffusion_with_distribution_matching_distillation.html">One-step Diffusion with Distribution Matching Distillation</a></li>
+
+
+
+
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Video Generation</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="../review/Make_A_Video.html">Make A Video</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../review/VideoLDM.html">VideoLDM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../review/AnimateDiff.html">AnimateDiff</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../review/Animate_Anyone.html">Animate Anyone</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../review/DreaMoving.html">DreaMoving</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../review/DreamPose.html">DreamPose: Fashion Image-to-Video Synthesis via Stable Diffusion</a></li>
+
+
+
+
+
+
+
+
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">3D Generation</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="../review/NeRF.html">NeRF : Representing Scenes as Neural Radiance Fields for View Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../review/3DGS.html">3D Gaussian Splatting for Real-Time Radiance Field Rendering</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../review/Point_E.html">Point-E: A System for Generating 3D Point Clouds from Complex Prompts (Arxiv 2022)</a></li>
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="../review/Shap-E.html">Shap-E</a></li>
+
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="../review/DreamFusion.html"><strong>DreamFusion</strong></a></li>
+<li class="toctree-l1"><a class="reference internal" href="../review/magic-3d.html">Magic3D</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../review/DreamBooth3D.html">Dream Booth 3D</a></li>
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="../review/zero123plus.html">Zero123++</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../review/ProlificDreamer.html">ProlificDreamer: High-Fidelity and Diverse Text-to-3D Generation with Variational Score Distillation</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="../review/DreamGaussian.html">DreamGaussian</a></li>
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="../review/Coin3D.html">Coin3D</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Experiments</span></p>
+<ul class="current nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="js_exp.html">Synthetic Data with Stable Diffusion for Foliar Disease Classification</a></li>
+<li class="toctree-l1 current active"><a class="current reference internal" href="#">Training DreamBooth on Naver Webtoon Face Dataset</a></li>
+</ul>
+
+    </div>
+</nav></div>
+    </div>
+  
+  
+  <div class="sidebar-primary-items__end sidebar-primary__section">
+  </div>
+  
+  <div id="rtd-footer-container"></div>
+
+
+      </div>
+      
+      <main id="main-content" class="bd-main" role="main">
+        
+        
+
+<div class="sbt-scroll-pixel-helper"></div>
+
+          <div class="bd-content">
+            <div class="bd-article-container">
+              
+              <div class="bd-header-article d-print-none">
+<div class="header-article-items header-article__inner">
+  
+    <div class="header-article-items__start">
+      
+        <div class="header-article-item"><label class="sidebar-toggle primary-toggle btn btn-sm" for="__primary" title="Toggle primary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
+  <span class="fa-solid fa-bars"></span>
+</label></div>
+      
+    </div>
+  
+  
+    <div class="header-article-items__end">
+      
+        <div class="header-article-item">
+
+<div class="article-header-buttons">
+
+
+
+
+
+<div class="dropdown dropdown-source-buttons">
+  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Source repositories">
+    <i class="fab fa-github"></i>
+  </button>
+  <ul class="dropdown-menu">
+      
+      
+      
+      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion" target="_blank"
+   class="btn btn-sm btn-source-repository-button dropdown-item"
+   title="Source repository"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fab fa-github"></i>
+  </span>
+<span class="btn__text-container">Repository</span>
+</a>
+</li>
+      
+      
+      
+      
+      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion/issues/new?title=Issue%20on%20page%20%2Fdocs/experiments/swjo_exp.html&body=Your%20issue%20content%20here." target="_blank"
+   class="btn btn-sm btn-source-issues-button dropdown-item"
+   title="Open an issue"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-lightbulb"></i>
+  </span>
+<span class="btn__text-container">Open issue</span>
+</a>
+</li>
+      
+  </ul>
+</div>
+
+
+
+
+
+
+<div class="dropdown dropdown-download-buttons">
+  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Download this page">
+    <i class="fas fa-download"></i>
+  </button>
+  <ul class="dropdown-menu">
+      
+      
+      
+      <li><a href="../../_sources/docs/experiments/swjo_exp.md" target="_blank"
+   class="btn btn-sm btn-download-source-button dropdown-item"
+   title="Download source file"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-file"></i>
+  </span>
+<span class="btn__text-container">.md</span>
+</a>
+</li>
+      
+      
+      
+      
+      <li>
+<button onclick="window.print()"
+  class="btn btn-sm btn-download-pdf-button dropdown-item"
+  title="Print to PDF"
+  data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-file-pdf"></i>
+  </span>
+<span class="btn__text-container">.pdf</span>
+</button>
+</li>
+      
+  </ul>
+</div>
+
+
+
+
+<button onclick="toggleFullScreen()"
+  class="btn btn-sm btn-fullscreen-button"
+  title="Fullscreen mode"
+  data-bs-placement="bottom" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-expand"></i>
+  </span>
+
+</button>
+
+
+
+<script>
+document.write(`
+  <button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light"></i>
+    <i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark"></i>
+    <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"></i>
+  </button>
+`);
+</script>
+
+
+<script>
+document.write(`
+  <button class="btn btn-sm pst-navbar-icon search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <i class="fa-solid fa-magnifying-glass fa-lg"></i>
+  </button>
+`);
+</script>
+<label class="sidebar-toggle secondary-toggle btn btn-sm" for="__secondary"title="Toggle secondary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <span class="fa-solid fa-list"></span>
+</label>
+</div></div>
+      
+    </div>
+  
+</div>
+</div>
+              
+              
+
+<div id="jb-print-docs-body" class="onlyprint">
+    <h1>Training DreamBooth on Naver Webtoon Face Dataset</h1>
+    <!-- Table of contents -->
+    <div id="print-main-content">
+        <div id="jb-print-toc">
+            
+            <div>
+                <h2> Contents </h2>
+            </div>
+            <nav aria-label="Page">
+                <ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">Introduction</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#ablation-studies">Ablation Studies</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#prior-preservation-loss">Prior Preservation Loss</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#negative-prompt">Negative Prompt</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#instance-prompt-guidance-scale">Instance Prompt / Guidance Scale</a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#appendix">Appendix</a></li>
+</ul>
+            </nav>
+        </div>
+    </div>
+</div>
+
+              
+                
+<div id="searchbox"></div>
+                <article class="bd-article">
+                  
+  <div class="admonition-information admonition">
+<p class="admonition-title">Information</p>
+<ul class="simple">
+<li><p><strong>Title:</strong> Training DreamBooth on Naver Webtoon Face Dataset</p></li>
+<li><p><strong>Author:</strong> Sangwoo Jo</p></li>
+<li><p><strong>Last updated on Jul. 09, 2023</strong></p></li>
+</ul>
+</div>
+<section class="tex2jax_ignore mathjax_ignore" id="training-dreambooth-on-naver-webtoon-face-dataset">
+<h1>Training DreamBooth on Naver Webtoon Face Dataset<a class="headerlink" href="#training-dreambooth-on-naver-webtoon-face-dataset" title="Permalink to this heading">#</a></h1>
+<section id="introduction">
+<h2>Introduction<a class="headerlink" href="#introduction" title="Permalink to this heading">#</a></h2>
+<p>이번 포스팅에서는 DreamBooth 를 직접 학습해보고 실험한 결과들을 공유할려고 합니다.</p>
+<p>우선적으로 학습데이터는 <a class="github reference external" href="https://github.com/bryandlee/naver-webtoon-data">bryandlee/naver-webtoon-data</a> 에 공개된 YOLOv5 모델 및 Waifu2x 후처리 기법을 활용하여 프리드로우에 등장하는 인물 사진들을 수집했습니다. 논문에서는 3-5 장으로 fine-tuning 이 가능하다고 제시되어있지만, 인물 사진 같은 경우 더 많은 데이터로 학습하면 성능이 더 좋아져서 15-20 장의 이미지로 학습하였습니다. 학습한 이미지들 예시입니다.</p>
+<figure class="align-default" id="id1">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/swjo_exp_01.png"><img alt="swjo_exp_01" class="bg-primary mb-1" src="../../_images/swjo_exp_01.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 839 </span><span class="caption-text">Training Data</span><a class="headerlink" href="#id1" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>DreamBooth 를 실험하면서 대표적으로 instance prompt, guidance scale, negative prompt, 그리고 마지막으로 prior preservation loss 를 반영하는 정도를 조절하는 prior_loss_weight 를 바꿔가면서 학습해보았습니다. 사전학습된 text-to-image 모델로 처음에는 <em>hakurei/waifu-diffusion</em> 모델을 시도해봤지만 결과가 만족스럽지 못해 <em>runwayml/stable-diffusion-v1-5</em> 모델로 fine-tuning 작업을 진행했습니다.</p>
+</section>
+<section id="ablation-studies">
+<h2>Ablation Studies<a class="headerlink" href="#ablation-studies" title="Permalink to this heading">#</a></h2>
+<section id="prior-preservation-loss">
+<h3>Prior Preservation Loss<a class="headerlink" href="#prior-preservation-loss" title="Permalink to this heading">#</a></h3>
+<p>Prior Preservation Loss 를 제외한 동일한 configuration 으로 모델 학습한 결과입니다.</p>
+<div class="highlight-default notranslate"><div class="highlight"><pre><span></span># with prior-preservation loss
+MODEL_NAME = “runwayml/stable-diffusion-v1-5”
+instance_prompt = &quot;A photo of sks girl&quot;
+class_prompt = &quot;A photo of a girl&quot;
+
+python3 train_dreambooth.py \
+  --pretrained_model_name_or_path=$MODEL_NAME \
+  --pretrained_vae_name_or_path=&quot;stabilityai/sd-vae-ft-mse&quot; \
+  --output_dir=$OUTPUT_DIR \
+  --revision=&quot;fp16&quot; \
+  --with_prior_preservation --prior_loss_weight=1.0 \
+  --seed=1337 \
+  --resolution=512 \
+  --train_batch_size=1 \
+  --train_text_encoder \
+  --mixed_precision=&quot;fp16&quot; \
+  --use_8bit_adam \
+  --gradient_accumulation_steps=1 --gradient_checkpointing \
+  --learning_rate=1e-6 \
+  --lr_scheduler=&quot;constant&quot; \
+  --lr_warmup_steps=0 \
+  --num_class_images=200 \
+  --sample_batch_size=4 \
+  --max_train_steps=800 \
+  --save_interval=100 \
+  --save_sample_prompt=&quot;A photo of sks girl&quot; \
+  --concepts_list=&quot;concepts_list.json&quot;
+</pre></div>
+</div>
+<div class="highlight-default notranslate"><div class="highlight"><pre><span></span># w/o prior-preservation loss
+MODEL_NAME = “runwayml/stable-diffusion-v1-5”
+instance_prompt = &quot;A photo of sks girl&quot;
+class_prompt = &quot;A photo of a girl&quot;
+
+python3 train_dreambooth.py \
+  --pretrained_model_name_or_path=$MODEL_NAME \
+  --pretrained_vae_name_or_path=&quot;stabilityai/sd-vae-ft-mse&quot; \
+  --output_dir=$OUTPUT_DIR \
+  --revision=&quot;fp16&quot; \
+  --with_prior_preservation --prior_loss_weight=0.0 \
+  --seed=1337 \
+  --resolution=512 \
+  --train_batch_size=1 \
+  --train_text_encoder \
+  --mixed_precision=&quot;fp16&quot; \
+  --use_8bit_adam \
+  --gradient_accumulation_steps=1 --gradient_checkpointing \
+  --learning_rate=1e-6 \
+  --lr_scheduler=&quot;constant&quot; \
+  --lr_warmup_steps=0 \
+  --num_class_images=200 \
+  --sample_batch_size=4 \
+  --max_train_steps=800 \
+  --save_interval=100 \
+  --save_sample_prompt=&quot;A photo of sks girl&quot; \
+  --concepts_list=&quot;concepts_list.json&quot;
+</pre></div>
+</div>
+<p>아래 그림처럼 동일한 inference prompt 를 입력했을 때, prior preservation loss 를 제외함으로써 input images 에 더 가까운 웹툰 사진들을 생성할 수 있었습니다. 또한, 핑크색 머리를 한 이민지 캐릭터를 어느 정도 잘 생성하는 부분도 확인할 수 있습니다.</p>
+<ul class="simple">
+<li><p><strong>Inference Prompt: “A photo of <em>sks</em> girl with pink hair” (with prior-preservation loss)</strong></p></li>
+</ul>
+<figure class="align-default" id="id2">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/swjo_exp_02.png"><img alt="swjo_exp_02" class="bg-primary mb-1" src="../../_images/swjo_exp_02.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 840 </span><span class="caption-text">With Prior Preservation Loss</span><a class="headerlink" href="#id2" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p><strong>Inference Prompt: ” A photo of <em>sks</em> girl with pink hair” (w/o prior-preservation loss)</strong></p></li>
+</ul>
+<figure class="align-default" id="id3">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/swjo_exp_03.png"><img alt="swjo_exp_03" class="bg-primary mb-1" src="../../_images/swjo_exp_03.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 841 </span><span class="caption-text">Without Prior Preservation Loss</span><a class="headerlink" href="#id3" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</section>
+<section id="negative-prompt">
+<h3>Negative Prompt<a class="headerlink" href="#negative-prompt" title="Permalink to this heading">#</a></h3>
+<p>Negative Prompt 에 대한 Ablation Study 도 진행했습니다. 캐릭터의 부자연스러운 부분이나 저해상도 이미지들을 생성하는 경우들이 종종 발생했는데, negative prompt 를 통해 더 좋은 퀄리티의 웹툰 캐릭터를 생성할 수 있었습니다.</p>
+<ul class="simple">
+<li><p><strong>Inference Prompt: ” A photo of <em>sks</em> girl with pink hair” (w/o negative prompt)</strong></p></li>
+</ul>
+<figure class="align-default" id="id4">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/swjo_exp_03.png"><img alt="swjo_exp_03" class="bg-primary mb-1" src="../../_images/swjo_exp_03.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 842 </span><span class="caption-text">Without Negative Prompt</span><a class="headerlink" href="#id4" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul>
+<li><p><strong>Inference Prompt: ” A photo of <em>sks</em> girl with pink hair”</strong></p>
+<p><strong>+</strong> <strong>Negative Prompt: “ugly, disfigured, deformed, low resolution”</strong></p>
+</li>
+</ul>
+<figure class="align-default" id="id5">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/swjo_exp_04.png"><img alt="swjo_exp_04" class="bg-primary mb-1" src="../../_images/swjo_exp_04.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 843 </span><span class="caption-text">With Negative Prompt</span><a class="headerlink" href="#id5" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</section>
+<section id="instance-prompt-guidance-scale">
+<h3>Instance Prompt / Guidance Scale<a class="headerlink" href="#instance-prompt-guidance-scale" title="Permalink to this heading">#</a></h3>
+<p>DreamBooth 논문에서 제시한 instance prompt 외에 “A photo of a girl in the style of <em>sks</em>” 라는 prompt 로 학습을 시도해보기도 했습니다. <em>sks</em> 라는 unique identifier 에 특정 여자 캐릭터에 대한 정보뿐만 아니라 프리드로우 그림체 자체를 담아내기 위한 목적이였습니다.</p>
+<div class="highlight-default notranslate"><div class="highlight"><pre><span></span># different instance prompt with prior-preservation loss
+MODEL_NAME = “runwayml/stable-diffusion-v1-5”
+instance_prompt = &quot;A photo of a girl in the style of sks&quot;
+class_prompt = &quot;A photo of a girl&quot;
+
+python3 train_dreambooth.py \
+  --pretrained_model_name_or_path=$MODEL_NAME \
+  --pretrained_vae_name_or_path=&quot;stabilityai/sd-vae-ft-mse&quot; \
+  --output_dir=$OUTPUT_DIR \
+  --revision=&quot;fp16&quot; \
+  --with_prior_preservation --prior_loss_weight=1.0 \
+  --seed=1337 \
+  --resolution=512 \
+  --train_batch_size=1 \
+  --train_text_encoder \
+  --mixed_precision=&quot;fp16&quot; \
+  --use_8bit_adam \
+  --gradient_accumulation_steps=1 --gradient_checkpointing \
+  --learning_rate=1e-6 \
+  --lr_scheduler=&quot;constant&quot; \
+  --lr_warmup_steps=0 \
+  --num_class_images=200 \
+  --sample_batch_size=4 \
+  --max_train_steps=800 \
+  --save_interval=100 \
+  --save_sample_prompt=&quot;A photo of sks girl&quot; \
+  --concepts_list=&quot;concepts_list.json&quot;
+</pre></div>
+</div>
+<div class="highlight-default notranslate"><div class="highlight"><pre><span></span># different instance prompt w/o prior-preservation loss
+MODEL_NAME = “runwayml/stable-diffusion-v1-5”
+instance_prompt = &quot;A photo of a girl in the style of sks&quot;
+class_prompt = &quot;A photo of a girl&quot;
+
+python3 train_dreambooth.py \
+  --pretrained_model_name_or_path=$MODEL_NAME \
+  --pretrained_vae_name_or_path=&quot;stabilityai/sd-vae-ft-mse&quot; \
+  --output_dir=$OUTPUT_DIR \
+  --revision=&quot;fp16&quot; \
+  --with_prior_preservation --prior_loss_weight=0.0 \
+  --seed=1337 \
+  --resolution=512 \
+  --train_batch_size=1 \
+  --train_text_encoder \
+  --mixed_precision=&quot;fp16&quot; \
+  --use_8bit_adam \
+  --gradient_accumulation_steps=1 --gradient_checkpointing \
+  --learning_rate=1e-6 \
+  --lr_scheduler=&quot;constant&quot; \
+  --lr_warmup_steps=0 \
+  --num_class_images=200 \
+  --sample_batch_size=4 \
+  --max_train_steps=800 \
+  --save_interval=100 \
+  --save_sample_prompt=&quot;A photo of sks girl&quot; \
+  --concepts_list=&quot;concepts_list.json&quot;
+</pre></div>
+</div>
+<p>Inference 시, 프리드로우의 그림체가 반영된 남자가 생성되도록 prompt 를 “A photo of a boy in the style of <em>sks</em>” 로 입력했을때의 결과입니다. DreamBooth 혹은 사전학습된 text-to-image 모델을 프리드로우 작가님의 웹툰 장면들로 전체적으로 학습하게 된다면 더 다양한 inference 결과들을 볼 수 있을 것 같습니다.</p>
+<ul>
+<li><p><strong>Inference Prompt: “A photo of a boy in the style of <em>sks</em>” (num_inference_steps = 24 / with prior-preservation loss)</strong></p>
+<p><strong>+</strong> <strong>Negative Prompt: “ugly, disfigured, deformed, low resolution”</strong></p>
+</li>
+</ul>
+<figure class="align-default" id="id6">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/swjo_exp_05.png"><img alt="swjo_exp_05" class="bg-primary mb-1" src="../../_images/swjo_exp_05.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 844 </span><span class="caption-text">Instance Prompt</span><a class="headerlink" href="#id6" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>Inference step 을 늘려가면서 추론된 인물 이미지의 퀄리티가 상승하는 부분도 확인할 수 있었습니다. 또한, guidance scale 에 대한 실험도 진행했는데 guidance scale 이 작을수록 prompt 와 무관한 random 한 이미지들을 생성하게 됩니다. 최종적으로 num_inference steps 와 guidance scale 의 값은 각각 100 과 7.5 로 설정하였습니다.</p>
+<ul class="simple">
+<li><p><strong>Inference Prompt: “A photo of a boy in the style of <em>sks</em>” (num_inference_steps=100 / with prior-preservation loss)</strong></p></li>
+</ul>
+<figure class="align-default" id="id7">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/swjo_exp_06.png"><img alt="swjo_exp_06" class="bg-primary mb-1" src="../../_images/swjo_exp_06.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 845 </span><span class="caption-text">Increasing Number of Inference Steps</span><a class="headerlink" href="#id7" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul>
+<li><p><strong>Inference Prompt: “A photo of a boy in the style of <em>sks</em>” (num_inference_steps = 100 / with prior-preservation loss)</strong></p>
+<p><strong>+</strong> <strong>Negative Prompt: “ugly, disfigured, deformed, low resolution”</strong></p>
+</li>
+</ul>
+<figure class="align-default" id="id8">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/swjo_exp_07.png"><img alt="swjo_exp_07" class="bg-primary mb-1" src="../../_images/swjo_exp_07.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 846 </span><span class="caption-text">Increasing Number of Inference Steps / Negative Prompt</span><a class="headerlink" href="#id8" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul>
+<li><p><strong>Inference Prompt: “A photo of a boy in the style of <em>sks</em>” (num_inference_steps = 100 / with prior-preservation loss)</strong></p>
+<p><strong>+</strong> <strong>Negative Prompt: “ugly, disfigured, deformed, low resolution”</strong></p>
+<p><strong>+ guidance_scale = 4</strong></p>
+</li>
+</ul>
+<figure class="align-default" id="id9">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/swjo_exp_08.png"><img alt="swjo_exp_08" class="bg-primary mb-1" src="../../_images/swjo_exp_08.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 847 </span><span class="caption-text">Guidance Scale</span><a class="headerlink" href="#id9" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>동일한 inference prompt 로 prior-preservation loss 를 제외해본 결과, 생성된 남자의 머리카락이 더 길어지고 더 여성스러운 생김새를 가지는 놀라운 사실도 발견했습니다.</p>
+<ul>
+<li><p><strong>Inference Prompt: “A photo of a boy in the style of <em>sks</em>” (num_inference_steps = 100 / w/o prior-preservation loss)</strong></p>
+<p><strong>+</strong> <strong>Negative Prompt: “ugly, disfigured, deformed, low resolution”</strong></p>
+</li>
+</ul>
+<figure class="align-default" id="id10">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/swjo_exp_09.png"><img alt="swjo_exp_09" class="bg-primary mb-1" src="../../_images/swjo_exp_09.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 848 </span><span class="caption-text">Without Prior Preservation Loss</span><a class="headerlink" href="#id10" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</section>
+</section>
+<section id="appendix">
+<h2>Appendix<a class="headerlink" href="#appendix" title="Permalink to this heading">#</a></h2>
+<p>그 외 다양한 inference prompt 에 따른 재미있는 실험결과들을 공유합니다. 아직 손의 모양을 text-to-image 모델이 생성하지 못하는 부분도 재차 확인할 수 있었습니다.</p>
+<ul>
+<li><p><strong>Inference Prompt: “A photo of a boy climbing up the mountain in the style of <em>sks</em>” (num_inference_steps = 100 / w/o prior-preservation loss)</strong></p>
+<p><strong>+</strong> <strong>Negative Prompt: “ugly, disfigured, deformed, low resolution”</strong></p>
+</li>
+</ul>
+<figure class="align-default" id="id11">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/swjo_exp_10.png"><img alt="swjo_exp_10" class="bg-primary mb-1" src="../../_images/swjo_exp_10.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 849 </span><span class="caption-text">Appendix 1</span><a class="headerlink" href="#id11" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul>
+<li><p><strong>Inference Prompt: “A painting of a boy in the style of <em>sks</em>” (num_inference_steps = 100 / w/o prior-preservation loss)</strong></p>
+<p><strong>+</strong> <strong>Negative Prompt: “ugly, disfigured, deformed, low resolution”</strong></p>
+</li>
+</ul>
+<figure class="align-default" id="id12">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/swjo_exp_11.png"><img alt="swjo_exp_11" class="bg-primary mb-1" src="../../_images/swjo_exp_11.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 850 </span><span class="caption-text">Appendix 2</span><a class="headerlink" href="#id12" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul>
+<li><p><strong>Inference Prompt: “A hand drawing of a boy in the style of <em>sks</em>” (num_inference_steps = 100 / w/o prior-preservation loss)</strong></p>
+<p><strong>+</strong> <strong>Negative Prompt: “ugly, disfigured, deformed, low resolution”</strong></p>
+</li>
+</ul>
+<figure class="align-default" id="id13">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/swjo_exp_12.png"><img alt="swjo_exp_12" class="bg-primary mb-1" src="../../_images/swjo_exp_12.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 851 </span><span class="caption-text">Appendix 3</span><a class="headerlink" href="#id13" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>마지막으로 하단의 좌측과 우측 사진은 각각 “A photo of <em>sks</em> girl” 그리고 “A photo of a girl in the style of <em>sks</em>” 이라는 prompt 로 DreamBooth 모델을 각각 학습한 후, 나비를 생성하라는 동일한 prompt 로 추론해본 결과입니다. <em>sks</em> 가 수식하는 명사가 girl 이 아닌 style 이도록 prompt 를 수정함으로써, butterfly 사진을 생성할때 조금이나마 더 프리드로우 웹툰의 그림체를 반영할 수 있었던 부분도 확인할 수 있었습니다.</p>
+<ul class="simple">
+<li><p><strong>Inference Prompt: “A photo of a butterfly in the style of <em>sks</em>” (num_inference_steps = 100 / with prior-preservation loss)</strong></p></li>
+</ul>
+<figure class="align-default" id="id14">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/swjo_exp_13.png"><img alt="swjo_exp_13" class="bg-primary mb-1" src="../../_images/swjo_exp_13.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 852 </span><span class="caption-text">Appendix 4</span><a class="headerlink" href="#id14" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</section>
+</section>
+
+    <script type="text/x-thebe-config">
+    {
+        requestKernel: true,
+        binderOptions: {
+            repo: "binder-examples/jupyter-stacks-datascience",
+            ref: "master",
+        },
+        codeMirrorConfig: {
+            theme: "abcdef",
+            mode: "python"
+        },
+        kernelOptions: {
+            name: "python3",
+            path: "./docs\experiments"
+        },
+        predefinedOutput: true
+    }
+    </script>
+    <script>kernelName = 'python3'</script>
+
+                </article>
+              
+
+              
+              
+              
+              
+                <footer class="prev-next-footer d-print-none">
+                  
+<div class="prev-next-area">
+    <a class="left-prev"
+       href="js_exp.html"
+       title="previous page">
+      <i class="fa-solid fa-angle-left"></i>
+      <div class="prev-next-info">
+        <p class="prev-next-subtitle">previous</p>
+        <p class="prev-next-title">Synthetic Data with Stable Diffusion for Foliar Disease Classification</p>
+      </div>
+    </a>
+</div>
+                </footer>
+              
+            </div>
+            
+            
+              
+                <div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
+
+
+  <div class="sidebar-secondary-item">
+  <div class="page-toc tocsection onthispage">
+    <i class="fa-solid fa-list"></i> Contents
+  </div>
+  <nav class="bd-toc-nav page-toc">
+    <ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">Introduction</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#ablation-studies">Ablation Studies</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#prior-preservation-loss">Prior Preservation Loss</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#negative-prompt">Negative Prompt</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#instance-prompt-guidance-scale">Instance Prompt / Guidance Scale</a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#appendix">Appendix</a></li>
+</ul>
+  </nav></div>
+
+</div></div>
+              
+            
+          </div>
+          <footer class="bd-footer-content">
+            
+<div class="bd-footer-content__inner container">
+  
+  <div class="footer-item">
+    
+<p class="component-author">
+By PseudoLab
+</p>
+
+  </div>
+  
+  <div class="footer-item">
+    
+
+  <p class="copyright">
+    
+      © Copyright 2022.
+      <br/>
+    
+  </p>
+
+  </div>
+  
+  <div class="footer-item">
+    
+  </div>
+  
+  <div class="footer-item">
+    
+  </div>
+  
+</div>
+          </footer>
+        
+
+      </main>
+    </div>
+  </div>
+  
+  <!-- Scripts loaded after <body> so the DOM is not blocked -->
+  <script src="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b"></script>
+<script src="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b"></script>
+
+  <footer class="bd-footer">
+  </footer>
+  </body>
 </html>
\ No newline at end of file
diff --git a/docs/markdown-example.html b/docs/markdown-example.html
old mode 100644
new mode 100755
diff --git a/docs/notebook-example.html b/docs/notebook-example.html
old mode 100644
new mode 100755
diff --git a/docs/review/3DGS.html b/docs/review/3DGS.html
index 5b4df2f0..099b4126 100755
--- a/docs/review/3DGS.html
+++ b/docs/review/3DGS.html
@@ -1,861 +1,881 @@
-
-
-<!DOCTYPE html>
-
-
-<html lang="en" data-content_root="" >
-
-  <head>
-    <meta charset="utf-8" />
-    <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.18.1: http://docutils.sourceforge.net/" />
-
-    <title>3D Gaussian Splatting for Real-Time Radiance Field Rendering &#8212; Text-to-Image Generation-feat-Diffusion</title>
-  
-  
-  
-  <script data-cfasync="false">
-    document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
-    document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
-  </script>
-  
-  <!-- Loaded before other Sphinx assets -->
-  <link href="../../_static/styles/theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-<link href="../../_static/styles/bootstrap.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-<link href="../../_static/styles/pydata-sphinx-theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-
-  
-  <link href="../../_static/vendor/fontawesome/6.5.2/css/all.min.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-  <link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.woff2" />
-<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.woff2" />
-<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.woff2" />
-
-    <link rel="stylesheet" type="text/css" href="../../_static/pygments.css" />
-    <link rel="stylesheet" href="../../_static/styles/sphinx-book-theme.css?digest=14f4ca6b54d191a8c7657f6c759bf11a5fb86285" type="text/css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/togglebutton.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/mystnb.4510f1fc1dee50b3e5859aac5469c37c29e427902b24a333a5f9fcb2f0b3ac41.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/sphinx-thebe.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/design-style.4045f2051d55cab465a707391d5b2007.min.css" />
-  
-  <!-- Pre-loaded scripts that we'll load fully later -->
-  <link rel="preload" as="script" href="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b" />
-<link rel="preload" as="script" href="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b" />
-  <script src="../../_static/vendor/fontawesome/6.5.2/js/all.min.js?digest=dfe6caa3a7d634c4db9b"></script>
-
-    <script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
-    <script src="../../_static/jquery.js"></script>
-    <script src="../../_static/underscore.js"></script>
-    <script src="../../_static/_sphinx_javascript_frameworks_compat.js"></script>
-    <script src="../../_static/doctools.js"></script>
-    <script src="../../_static/clipboard.min.js"></script>
-    <script src="../../_static/copybutton.js"></script>
-    <script src="../../_static/scripts/sphinx-book-theme.js?digest=5a5c038af52cf7bc1a1ec88eea08e6366ee68824"></script>
-    <script>let toggleHintShow = 'Click to show';</script>
-    <script>let toggleHintHide = 'Click to hide';</script>
-    <script>let toggleOpenOnPrint = 'true';</script>
-    <script src="../../_static/togglebutton.js"></script>
-    <script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
-    <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
-    <script src="../../_static/design-tabs.js"></script>
-    <script>const THEBE_JS_URL = "https://unpkg.com/thebe@0.8.2/lib/index.js"
-const thebe_selector = ".thebe,.cell"
-const thebe_selector_input = "pre"
-const thebe_selector_output = ".output, .cell_output"
-</script>
-    <script async="async" src="../../_static/sphinx-thebe.js"></script>
-    <script>DOCUMENTATION_OPTIONS.pagename = 'docs/review/3DGS';</script>
-    <link rel="shortcut icon" href="../../_static/PseudoLab_logo.png"/>
-    <link rel="index" title="Index" href="../../genindex.html" />
-    <link rel="search" title="Search" href="../../search.html" />
-    <link rel="next" title="Point-E: A System for Generating 3D Point Clouds from Complex Prompts (Arxiv 2022)" href="Point_E.html" />
-    <link rel="prev" title="NeRF : Representing Scenes as Neural Radiance Fields for View Synthesis" href="NeRF.html" />
-  <meta name="viewport" content="width=device-width, initial-scale=1"/>
-  <meta name="docsearch:language" content="en"/>
-  </head>
-  
-  
-  <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
-
-  
-  
-  <div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
-  
-  <div id="pst-scroll-pixel-helper"></div>
-  
-  <button type="button" class="btn rounded-pill" id="pst-back-to-top">
-    <i class="fa-solid fa-arrow-up"></i>Back to top</button>
-
-  
-  <input type="checkbox"
-          class="sidebar-toggle"
-          id="pst-primary-sidebar-checkbox"/>
-  <label class="overlay overlay-primary" for="pst-primary-sidebar-checkbox"></label>
-  
-  <input type="checkbox"
-          class="sidebar-toggle"
-          id="pst-secondary-sidebar-checkbox"/>
-  <label class="overlay overlay-secondary" for="pst-secondary-sidebar-checkbox"></label>
-  
-  <div class="search-button__wrapper">
-    <div class="search-button__overlay"></div>
-    <div class="search-button__search-container">
-<form class="bd-search d-flex align-items-center"
-      action="../../search.html"
-      method="get">
-  <i class="fa-solid fa-magnifying-glass"></i>
-  <input type="search"
-         class="form-control"
-         name="q"
-         id="search-input"
-         placeholder="Search this book..."
-         aria-label="Search this book..."
-         autocomplete="off"
-         autocorrect="off"
-         autocapitalize="off"
-         spellcheck="false"/>
-  <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
-</form></div>
-  </div>
-
-  <div class="pst-async-banner-revealer d-none">
-  <aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
-</div>
-
-  
-    <header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
-    </header>
-  
-
-  <div class="bd-container">
-    <div class="bd-container__inner bd-page-width">
-      
-      
-      
-      <div class="bd-sidebar-primary bd-sidebar">
-        
-
-  
-  <div class="sidebar-header-items sidebar-primary__section">
-    
-    
-    
-    
-  </div>
-  
-    <div class="sidebar-primary-items__start sidebar-primary__section">
-        <div class="sidebar-primary-item">
-
-  
-    
-  
-
-<a class="navbar-brand logo" href="../../intro.html">
-  
-  
-  
-  
-  
-    
-    
-      
-    
-    
-    <img src="../../_static/PseudoLab_logo.png" class="logo__image only-light" alt="Text-to-Image Generation-feat-Diffusion - Home"/>
-    <script>document.write(`<img src="../../_static/PseudoLab_logo.png" class="logo__image only-dark" alt="Text-to-Image Generation-feat-Diffusion - Home"/>`);</script>
-  
-  
-</a></div>
-        <div class="sidebar-primary-item"><nav class="bd-links" id="bd-docs-nav" aria-label="Main">
-    <div class="bd-toc-item navbar-nav active">
-        
-        <ul class="nav bd-sidenav bd-sidenav__home-link">
-            <li class="toctree-l1">
-                <a class="reference internal" href="../../intro.html">
-                    Welcome to PseudoDiffusers!!
-                </a>
-            </li>
-        </ul>
-        <p aria-level="2" class="caption" role="heading"><span class="caption-text">Preliminary Works</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="vae.html">VAE</a></li>
-<li class="toctree-l1"><a class="reference internal" href="gan.html">GAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DDPM.html">DDPM</a></li>
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="DDIM.html">DDIM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="A_Study_on_the_Evaluation_of_Generative_Models.html">A Study on the Evaluation of Generative Models</a></li>
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Image Generation</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="cycleGAN.html">CycleGAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="StyleGAN.html">StyleGAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="diffusion_beats_GANs.html">Diffusion Models Beat GANs on Image Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="dalle.html">DALL-E</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DALLE2.html">DALL-E 2</a></li>
-<li class="toctree-l1"><a class="reference internal" href="dreambooth.html">DreamBooth</a></li>
-<li class="toctree-l1"><a class="reference internal" href="ControlNet.html">ControlNet</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Latent_Diffusion_Model.html">Introduction</a></li>
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="Textual_Inversion.html">Textual Inversion</a></li>
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="CustomDiffusion.html">Custom Diffusion</a></li>
-<li class="toctree-l1"><a class="reference internal" href="LoRA.html">LoRA</a></li>
-
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="I-DDPM.html">I-DDPM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="StyO.html">StyO</a></li>
-<li class="toctree-l1"><a class="reference internal" href="imagen.html">Imagen</a></li>
-<li class="toctree-l1"><a class="reference internal" href="imagen_editor.html">Imagen Editor</a></li>
-<li class="toctree-l1"><a class="reference internal" href="SDEdit.html">SDEdit</a></li>
-<li class="toctree-l1"><a class="reference internal" href="SDXL.html">SDXL</a></li>
-<li class="toctree-l1"><a class="reference internal" href="t2i_adapter.html">T2I-Adapter</a></li>
-<li class="toctree-l1"><a class="reference internal" href="IP_Adapter.html">IP-Adapter</a></li>
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="HyperDreamBooth.html">HyperDreamBooth</a></li>
-<li class="toctree-l1"><a class="reference internal" href="CM3leon.html">CM3leon</a></li>
-
-<li class="toctree-l1"><a class="reference internal" href="Synthetic_Data_from_Diffusion_Models_Improves_ImageNet_Classification.html">Synthetic Data from Diffusion Models Improves ImageNet Classification</a></li>
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="GLIDE.html">GLIDE</a></li>
-<li class="toctree-l1"><a class="reference internal" href="BBDM.html">BBDM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Your_Diffusion_Model_is_Secretly_a_Zero_Shot_Classifier.html">Your Diffusion Model is Secretly a Zero-Shot Classifier</a></li>
-<li class="toctree-l1"><a class="reference internal" href="progressive_distillation.html">Progressive Distillation for Fast Sampling of Diffusion Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="ConceptLab.html">ConceptLab</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Diffusion_models_already_have_a_Semantic_Latent_Space.html">Diffusion Models already have a Semantic Latent Space</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Muse.html">Muse</a></li>
-
-
-<li class="toctree-l1"><a class="reference internal" href="GIGAGAN.html">Scaling up GANs for Text-to-Image Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="consistency_models.html">Consistency Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="latent_consistency_models.html">Latent Consistency Models</a></li>
-
-<li class="toctree-l1"><a class="reference internal" href="LLM_grounded_Diffusion.html">LLM Grounded Diffusion</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DiT.html">DiT</a></li>
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="one-step-image-translation.html">One-Step Image Translation with Text-to-Image Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="LCM-LoRA.html">LCM-LoRA: A Universal Stable-Diffusion Acceleration Module</a></li>
-
-
-<li class="toctree-l1"><a class="reference internal" href="MimicBrush.html">MimicBrush: Zero-shot Image Editing with Reference Imitation</a></li>
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Video Generation</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="Make_A_Video.html">Make A Video</a></li>
-<li class="toctree-l1"><a class="reference internal" href="VideoLDM.html">VideoLDM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="AnimateDiff.html">AnimateDiff</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Animate_Anyone.html">Animate Anyone</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreaMoving.html">DreaMoving</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreamPose.html">DreamPose: Fashion Image-to-Video Synthesis via Stable Diffusion</a></li>
-
-
-
-
-
-
-
-
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">3D Generation</span></p>
-<ul class="current nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="NeRF.html">NeRF : Representing Scenes as Neural Radiance Fields for View Synthesis</a></li>
-<li class="toctree-l1 current active"><a class="current reference internal" href="#">3D Gaussian Splatting for Real-Time Radiance Field Rendering</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Point_E.html">Point-E: A System for Generating 3D Point Clouds from Complex Prompts (Arxiv 2022)</a></li>
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="Shap-E.html">Shap-E</a></li>
-
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="DreamFusion.html"><strong>DreamFusion</strong></a></li>
-<li class="toctree-l1"><a class="reference internal" href="magic-3d.html">Magic3D</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreamBooth3D.html">Dream Booth 3D</a></li>
-
-
-
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Experiments</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="../experiments/js_exp.html">Synthetic Data with Stable Diffusion for Foliar Disease Classification</a></li>
-<li class="toctree-l1"><a class="reference internal" href="../experiments/swjo_exp.html">Training DreamBooth on Naver Webtoon Face Dataset</a></li>
-</ul>
-
-    </div>
-</nav></div>
-    </div>
-  
-  
-  <div class="sidebar-primary-items__end sidebar-primary__section">
-  </div>
-  
-  <div id="rtd-footer-container"></div>
-
-
-      </div>
-      
-      <main id="main-content" class="bd-main" role="main">
-        
-        
-
-<div class="sbt-scroll-pixel-helper"></div>
-
-          <div class="bd-content">
-            <div class="bd-article-container">
-              
-              <div class="bd-header-article d-print-none">
-<div class="header-article-items header-article__inner">
-  
-    <div class="header-article-items__start">
-      
-        <div class="header-article-item"><label class="sidebar-toggle primary-toggle btn btn-sm" for="__primary" title="Toggle primary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
-  <span class="fa-solid fa-bars"></span>
-</label></div>
-      
-    </div>
-  
-  
-    <div class="header-article-items__end">
-      
-        <div class="header-article-item">
-
-<div class="article-header-buttons">
-
-
-
-
-
-<div class="dropdown dropdown-source-buttons">
-  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Source repositories">
-    <i class="fab fa-github"></i>
-  </button>
-  <ul class="dropdown-menu">
-      
-      
-      
-      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion" target="_blank"
-   class="btn btn-sm btn-source-repository-button dropdown-item"
-   title="Source repository"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fab fa-github"></i>
-  </span>
-<span class="btn__text-container">Repository</span>
-</a>
-</li>
-      
-      
-      
-      
-      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion/issues/new?title=Issue%20on%20page%20%2Fdocs/review/3DGS.html&body=Your%20issue%20content%20here." target="_blank"
-   class="btn btn-sm btn-source-issues-button dropdown-item"
-   title="Open an issue"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-lightbulb"></i>
-  </span>
-<span class="btn__text-container">Open issue</span>
-</a>
-</li>
-      
-  </ul>
-</div>
-
-
-
-
-
-
-<div class="dropdown dropdown-download-buttons">
-  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Download this page">
-    <i class="fas fa-download"></i>
-  </button>
-  <ul class="dropdown-menu">
-      
-      
-      
-      <li><a href="../../_sources/docs/review/3DGS.md" target="_blank"
-   class="btn btn-sm btn-download-source-button dropdown-item"
-   title="Download source file"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-file"></i>
-  </span>
-<span class="btn__text-container">.md</span>
-</a>
-</li>
-      
-      
-      
-      
-      <li>
-<button onclick="window.print()"
-  class="btn btn-sm btn-download-pdf-button dropdown-item"
-  title="Print to PDF"
-  data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-file-pdf"></i>
-  </span>
-<span class="btn__text-container">.pdf</span>
-</button>
-</li>
-      
-  </ul>
-</div>
-
-
-
-
-<button onclick="toggleFullScreen()"
-  class="btn btn-sm btn-fullscreen-button"
-  title="Fullscreen mode"
-  data-bs-placement="bottom" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-expand"></i>
-  </span>
-
-</button>
-
-
-
-<script>
-document.write(`
-  <button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light"></i>
-    <i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark"></i>
-    <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"></i>
-  </button>
-`);
-</script>
-
-
-<script>
-document.write(`
-  <button class="btn btn-sm pst-navbar-icon search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <i class="fa-solid fa-magnifying-glass fa-lg"></i>
-  </button>
-`);
-</script>
-<label class="sidebar-toggle secondary-toggle btn btn-sm" for="__secondary"title="Toggle secondary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <span class="fa-solid fa-list"></span>
-</label>
-</div></div>
-      
-    </div>
-  
-</div>
-</div>
-              
-              
-
-<div id="jb-print-docs-body" class="onlyprint">
-    <h1>3D Gaussian Splatting for Real-Time Radiance Field Rendering</h1>
-    <!-- Table of contents -->
-    <div id="print-main-content">
-        <div id="jb-print-toc">
-            
-            <div>
-                <h2> Contents </h2>
-            </div>
-            <nav aria-label="Page">
-                <ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#overview">Overview</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#differentiable-3d-gaussian-splatting">Differentiable 3D Gaussian Splatting</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#optimization-with-adaptive-density-control-of-3d-gaussians">Optimization with Adaptive Density Control of 3D Gaussians</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#optimization">Optimization</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#adaptive-control-of-gaussians">Adaptive Control of Gaussians</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#fast-diffenrentiable-rasterization-for-gaussians">Fast Diffenrentiable Rasterization for Gaussians</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#results-and-evaluation">Results and Evaluation</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#limitations">Limitations</a></li>
-</ul>
-            </nav>
-        </div>
-    </div>
-</div>
-
-              
-                
-<div id="searchbox"></div>
-                <article class="bd-article">
-                  
-  <div class="admonition-information admonition">
-<p class="admonition-title">Information</p>
-<ul class="simple">
-<li><p><strong>Title:</strong> 3D Gaussian Splatting for Real-Time Radiance Field Rendering</p></li>
-<li><p><strong>Reference</strong></p>
-<ul>
-<li><p>Paper:  <a class="reference external" href="https://arxiv.org/abs/2308.04079">https://arxiv.org/abs/2308.04079</a></p></li>
-<li><p>Project: <a class="reference external" href="https://repo-sam.inria.fr/fungraph/3d-gaussian-splatting/">https://repo-sam.inria.fr/fungraph/3d-gaussian-splatting/</a></p></li>
-</ul>
-</li>
-<li><p><strong>Author:</strong> Hyunsoo Kim</p></li>
-<li><p><strong>Last updated on June. 12, 2024</strong></p></li>
-</ul>
-</div>
-<section class="tex2jax_ignore mathjax_ignore" id="d-gaussian-splatting-for-real-time-radiance-field-rendering">
-<h1>3D Gaussian Splatting for Real-Time Radiance Field Rendering<a class="headerlink" href="#d-gaussian-splatting-for-real-time-radiance-field-rendering" title="Permalink to this heading">#</a></h1>
-<p>NeRF 기반 방식들은 높은 품질의 새로운 장면 합성이 가능하지만, 시간이 많이 걸려 실시간 렌더링에는 한계가 있다. 다른 방식들은 시간은 빠르지만 품질이 떨어진다. 이를 해결하기 위해, 논문은 3D Gaussian Splatting을 제안합니다. 이 방법은 Mip-NeRF360과 유사한 성능을 제공하면서도 InstantNGP만큼 빠르게 학습할 수 있다.</p>
-<p>3D Gaussian Splatting은 다음 세 가지로 구성된다다:</p>
-<ul class="simple">
-<li><p>Structure-from-Motion(SfM)에서 얻은 희소 점 구름을 초기값으로 3D 가우시안을 도입</p></li>
-<li><p>3D 위치, 불투명도, 이방성 공분산, 구형 고조파(SH) 계수의 최적화</p></li>
-<li><p>타일 기반 래스터화에서 영감을 받은 빠른 GPU 정렬 알고리즘을 통한 실시간 렌더링</p></li>
-</ul>
-<p>이 방법은 이전의 implicit radiance field 접근법들과 동등하거나 더 나은 품질을 제공하며, 이전의 가장 빠른 방식과 유사한 학습 속도와 품질을 보여주어 고수준의 새로운 뷰 합성에 대해 처음으로 실시간 렌더링을 제공한다.</p>
-<section id="overview">
-<h2>Overview<a class="headerlink" href="#overview" title="Permalink to this heading">#</a></h2>
-<figure class="align-default" id="id1">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/image0.png"><img alt="image0" class="bg-primary mb-1" src="../../_images/image0.png" style="width: 800px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 657 </span><span class="caption-text">Main process of 3D Gaussian Splatting</span><a class="headerlink" href="#id1" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<figure class="align-default" id="id2">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/image1.png"><img alt="image1" class="bg-primary mb-1" src="../../_images/image1.png" style="width: 800px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 658 </span><span class="caption-text">Peusdo Algorithm of 3D Gaussian Splatting</span><a class="headerlink" href="#id2" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</section>
-<section id="differentiable-3d-gaussian-splatting">
-<h2>Differentiable 3D Gaussian Splatting<a class="headerlink" href="#differentiable-3d-gaussian-splatting" title="Permalink to this heading">#</a></h2>
-<p>이 논문은 normal(표면 법선)이 없는 Structure-from-Motion(SfM) 포인트들의 sparse한 셋을 initial point로 하여, 고품질의 novel view synthesis를 가능하게 하는 scene representation을 최적화하는 것을 목표로 한다.</p>
-<p>빠른 렌더링을 위해 unstructured하고 explicit한 primitive를 필요로 하며, 이를 위해 미분 가능하고 2D splats로 쉽게 project되는 3D Gaussian을 선택했다.</p>
-<figure class="align-default" id="id3">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/image2.png"><img alt="image2" class="bg-primary mb-1" src="../../_images/image2.png" style="width: 800px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 659 </span><span class="caption-text">3D Gaussian Splatting Equation 4</span><a class="headerlink" href="#id3" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>3D Gaussian은 포인트(mean) μ를 중심으로 하고, 3D 공분산 행렬 Σ로 정의한다. 렌더링을 위해 3D Gaussian을 2D로 project해야 하며, 이는 viewing transformation W에 따라 카메라 좌표계에서의 공분산 행렬 Σ’로 나타낼 수 있다. 최적화를 위해, Σ는 positive semi-definite 행렬이어야 하며, 이 때문에 최적화가 어렵다고 한다.</p>
-<figure class="align-default" id="id4">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/image3.png"><img alt="image3" class="bg-primary mb-1" src="../../_images/image3.png" style="width: 800px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 660 </span><span class="caption-text">3D Gaussian Splatting Equation 5</span><a class="headerlink" href="#id4" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>따라서 논문에서는 더 직관적이고 최적화에 적합한 representation을 선택한다. 3D Gaussian의 공분산 행렬 Σ는 타원체의 구성을 설명하는 것과 유사하며, 이를 위해 scaling matrix S와 rotation matrix R을 사용한다.</p>
-<p>scaling은 3D vector s로, rotation은 quaternion q로 표현하며, 이들은 각각의 행렬로 변환될 수 있다. 학습 동안 Auto grad(자동 미분)의 오버헤드를 피하기 위해 모든 파라미터에 대한 gradient를 명시적으로 유도한다.</p>
-<figure class="align-default" id="id5">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/image4.png"><img alt="image4" class="bg-primary mb-1" src="../../_images/image4.png" style="width: 800px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 661 </span><span class="caption-text">3D Gaussian Splatting Equation 6</span><a class="headerlink" href="#id5" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>최적화에 적합한 anisotropic covariance representation은 장면의 다양한 geometry에 적응하도록 3D Gaussian을 최적화한다.</p>
-</section>
-<section id="optimization-with-adaptive-density-control-of-3d-gaussians">
-<h2>Optimization with Adaptive Density Control of 3D Gaussians<a class="headerlink" href="#optimization-with-adaptive-density-control-of-3d-gaussians" title="Permalink to this heading">#</a></h2>
-<ul class="simple">
-<li><p><strong>Optimization</strong></p></li>
-<li><p><strong>Adaptive Control of Gaussians</strong></p></li>
-</ul>
-<p>이 논문의 핵심 접근법은 free-view synthesis를 위해 장면을 정확하게 표현하는 3D Gaussian의 밀집된 세트를 만드는 최적화 단계다. 여기에는 position 𝑝, 투명도 𝛼, 공분산 Σ뿐만 아니라, scene의 view-dependent appearance를 정확하게 위한 각 Gaussian의 색상 c를 표현하는 SH coefficients까지 포함된다.</p>
-</section>
-<section id="optimization">
-<h2>Optimization<a class="headerlink" href="#optimization" title="Permalink to this heading">#</a></h2>
-<ul class="simple">
-<li><p>3D를 2D로 project할 때 발생할 수 있는 모호함을 피하기 위해, optimization 과정에서 geometry가 더 생성되거나, 삭제되거나 혹은 이동할 수 있어야 함</p>
-<ul>
-<li><p>공분산 파라미터의 퀄리티는 큰 homogeneous area들을 적은 수의 큰 anisotropic Gaussian들로 캡처될 수 있기 때문에 representation의 compactness에 중요</p></li>
-</ul>
-</li>
-<li><p>논문은 SGD를 사용하고 일부 연산은 CUDA 커널을 사용합니다. 특히 빠른 rasterization은 최적화의 효율성에 중요합니다.</p>
-<ul>
-<li><p>이는 최적화의 주요 computation bottleneck이기 때문</p></li>
-</ul>
-</li>
-<li><p>투명도 𝛼에 대해서는 sigmoid function을, 공분산의 scale에 대해서는 exponential activation 함수를 사용</p>
-<ul>
-<li><p>initial 공분산 행렬은 가장 가까운 세 점까지의 거리의 평균을 축으로 하는 isotropic Gaussian으로 추정</p></li>
-<li><p>position에 대해서만 exponential decay 스케줄링을 사용</p></li>
-</ul>
-</li>
-</ul>
-<p>Loss function은 D-SSIM과 L1 loss를 사용하며, D-SSIM loss는 이미지의 왜곡(distortion)을 잘 반영하고 미분 가능하여 evaluation metric뿐만 아니라 loss로도 사용 가능.</p>
-<p>참고로 SSIM은 이미지의 밝기, 대조, 구조를 고려하여 두 이미지 간의 유사성을 측정하는 메트릭이다.</p>
-<figure class="align-default" id="id6">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/image6.png"><img alt="image6" class="bg-primary mb-1" src="../../_images/image6.png" style="width: 800px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 662 </span><span class="caption-text">D-SSIM equation</span><a class="headerlink" href="#id6" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<figure class="align-default" id="id7">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/image7.png"><img alt="image7" class="bg-primary mb-1" src="../../_images/image7.png" style="width: 800px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 663 </span><span class="caption-text">SSIM equation</span><a class="headerlink" href="#id7" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</section>
-<section id="adaptive-control-of-gaussians">
-<h2>Adaptive Control of Gaussians<a class="headerlink" href="#adaptive-control-of-gaussians" title="Permalink to this heading">#</a></h2>
-<p>또한 Structure-from-Motion(SfM)으로 얻은 초기 희소 점들을 시작으로, scene을 더 잘 표현하기 위해 unit volume  내 Gaussian들의 수와 밀도를 점진적으로 최적화하는 방식을 제안한다.</p>
-<ul class="simple">
-<li><p>매 100번의 반복(iter)마다 Gaussian을 추가하고, 투명도 𝛼가 일정 값보다 작은 Gaussian을 제거</p></li>
-</ul>
-<p>Adaptive Control of Gaussians는 빈 공간을 채우고, missing geometric feature이 있는 region과 하나의 Gaussian이 너무 넓은 region을 커버하는 region에 집중합니다.</p>
-<p>이러한 지역은 모두 큰 view-space positional gradient를 가지며, 최적화를 통해 Gaussian을 이동시킵니다.</p>
-<ul class="simple">
-<li><p>Under-reconstruction: 동일한 크기의 Gaussian을 복제하고 위치 기울기 방향으로 이동.</p></li>
-<li><p>Over-reconstruction: Gaussian을 두 개로 나누고, 위치는 기존 Gaussian의 PDF를 샘플링해서 초기화.</p></li>
-</ul>
-<p>최적화 과정에서 입력 카메라에 가까운 Gaussian density의 부적절한 증가를 방지하기 위해, 3000번의 반복마다 투명도 α를 0에 가깝게 설정한다.</p>
-<figure class="align-default" id="id8">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/image8.png"><img alt="image8" class="bg-primary mb-1" src="../../_images/image8.png" style="width: 800px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 664 </span><span class="caption-text">Adaptive Control of Gaussians</span><a class="headerlink" href="#id8" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</section>
-<section id="fast-diffenrentiable-rasterization-for-gaussians">
-<h2>Fast Diffenrentiable Rasterization for Gaussians<a class="headerlink" href="#fast-diffenrentiable-rasterization-for-gaussians" title="Permalink to this heading">#</a></h2>
-<p>본 논문의 목표는 대략적인 α-blending을 가능하게 하고 이전 연구에 존재하던 gradient를 얻을 수 있는 splat들의 수에 대한 강한 제약을 피하는 빠른 전체적인 렌더링과 빠른 sorting을 가지는 것이다.</p>
-<p>이러한 목표를 달성하기 위해 본 논문은 tile-based rasterization for Gaussian splats를 제안한다.</p>
-<p>tile-based rasterization은 아래와 같은 흐름으로 진행된다.</p>
-<ul class="simple">
-<li><p>Screen을 16*16으로 나누고 view frustum과 각 타일에 대해 3D Gaussian들을 cull함</p>
-<ul>
-<li><p>view frustum외에 있는 것들을 제거하는 절차</p></li>
-<li><p>16*16으로 나눈 이유는 각 타일마다 다른 GPU thread에서 계산해서 빠르게 계산하기 위함</p></li>
-</ul>
-</li>
-<li><p>guard band를 이용해 극단적인 위치(너무 가깝거나 먼)에 있는 gaussian들 또한 제거</p>
-<ul>
-<li><p>이들의 projected 2D covariance를 계산하는 것은 불안정적</p></li>
-</ul>
-</li>
-<li><p>그런 뒤, gaussian들 마다 속한 tile과 depth에 따라 key를 부여</p></li>
-<li><p>이를 GPU Radix sort를 이용해 정렬</p></li>
-<li><p>각 타일마다 front-to-back으로 color와 α값을 accumulate해서 픽셀 값을 구함</p></li>
-</ul>
-</section>
-<section id="results-and-evaluation">
-<h2>Results and Evaluation<a class="headerlink" href="#results-and-evaluation" title="Permalink to this heading">#</a></h2>
-<ul class="simple">
-<li><p>Results and Evaluation
-데이터셋에 따라 결과는 다르지만 SOTA이상의 퀄리티를 내면서 좋은 Training time과 FPS를 보인다.</p></li>
-</ul>
-<figure class="align-default" id="id9">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/image12.png"><img alt="image12" class="bg-primary mb-1" src="../../_images/image12.png" style="width: 800px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 665 </span><span class="caption-text">3D Gaussian Splatting experiments table</span><a class="headerlink" href="#id9" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<figure class="align-default" id="id10">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/image13.png"><img alt="image13" class="bg-primary mb-1" src="../../_images/image13.png" style="width: 800px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 666 </span><span class="caption-text">3D Gaussian Splatting experiments result</span><a class="headerlink" href="#id10" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p>Ablations</p></li>
-</ul>
-<figure class="align-default" id="id11">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/image19.png"><img alt="image19" class="bg-primary mb-1" src="../../_images/image19.png" style="width: 800px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 667 </span><span class="caption-text">3D Gaussian Splatting ablation test image</span><a class="headerlink" href="#id11" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<figure class="align-default" id="id12">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/image18.png"><img alt="image18" class="bg-primary mb-1" src="../../_images/image18.png" style="width: 800px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 668 </span><span class="caption-text">3D Gaussian Splatting result image</span><a class="headerlink" href="#id12" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</section>
-<section id="limitations">
-<h2>Limitations<a class="headerlink" href="#limitations" title="Permalink to this heading">#</a></h2>
-<ul class="simple">
-<li><p>이전의 방식들과 유사하게 잘 관측되지 않은 장면은 artifact들이 존재</p></li>
-<li><p>이전의 방식들과 유사하게 늘어지고 얼룩진 artifact를 생성할 수 있음</p></li>
-<li><p>최적화에서 거대한 Gaussian이 만들어지면 popping artifacts 가끔 발생</p></li>
-<li><p>최적화에서 regularization을 적용하지 않음</p></li>
-<li><p>NeRF-based 기법들보다 memory consumption이 상당히 높음</p></li>
-</ul>
-</section>
-</section>
-
-    <script type="text/x-thebe-config">
-    {
-        requestKernel: true,
-        binderOptions: {
-            repo: "binder-examples/jupyter-stacks-datascience",
-            ref: "master",
-        },
-        codeMirrorConfig: {
-            theme: "abcdef",
-            mode: "python"
-        },
-        kernelOptions: {
-            name: "python3",
-            path: "./docs/review"
-        },
-        predefinedOutput: true
-    }
-    </script>
-    <script>kernelName = 'python3'</script>
-
-                </article>
-              
-
-              
-              
-              
-              
-                <footer class="prev-next-footer d-print-none">
-                  
-<div class="prev-next-area">
-    <a class="left-prev"
-       href="NeRF.html"
-       title="previous page">
-      <i class="fa-solid fa-angle-left"></i>
-      <div class="prev-next-info">
-        <p class="prev-next-subtitle">previous</p>
-        <p class="prev-next-title">NeRF : Representing Scenes as Neural Radiance Fields for View Synthesis</p>
-      </div>
-    </a>
-    <a class="right-next"
-       href="Point_E.html"
-       title="next page">
-      <div class="prev-next-info">
-        <p class="prev-next-subtitle">next</p>
-        <p class="prev-next-title">Point-E: A System for Generating 3D Point Clouds from Complex Prompts (Arxiv 2022)</p>
-      </div>
-      <i class="fa-solid fa-angle-right"></i>
-    </a>
-</div>
-                </footer>
-              
-            </div>
-            
-            
-              
-                <div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
-
-
-  <div class="sidebar-secondary-item">
-  <div class="page-toc tocsection onthispage">
-    <i class="fa-solid fa-list"></i> Contents
-  </div>
-  <nav class="bd-toc-nav page-toc">
-    <ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#overview">Overview</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#differentiable-3d-gaussian-splatting">Differentiable 3D Gaussian Splatting</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#optimization-with-adaptive-density-control-of-3d-gaussians">Optimization with Adaptive Density Control of 3D Gaussians</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#optimization">Optimization</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#adaptive-control-of-gaussians">Adaptive Control of Gaussians</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#fast-diffenrentiable-rasterization-for-gaussians">Fast Diffenrentiable Rasterization for Gaussians</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#results-and-evaluation">Results and Evaluation</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#limitations">Limitations</a></li>
-</ul>
-  </nav></div>
-
-</div></div>
-              
-            
-          </div>
-          <footer class="bd-footer-content">
-            
-<div class="bd-footer-content__inner container">
-  
-  <div class="footer-item">
-    
-<p class="component-author">
-By PseudoLab
-</p>
-
-  </div>
-  
-  <div class="footer-item">
-    
-
-  <p class="copyright">
-    
-      © Copyright 2022.
-      <br/>
-    
-  </p>
-
-  </div>
-  
-  <div class="footer-item">
-    
-  </div>
-  
-  <div class="footer-item">
-    
-  </div>
-  
-</div>
-          </footer>
-        
-
-      </main>
-    </div>
-  </div>
-  
-  <!-- Scripts loaded after <body> so the DOM is not blocked -->
-  <script src="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b"></script>
-<script src="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b"></script>
-
-  <footer class="bd-footer">
-  </footer>
-  </body>
+
+
+<!DOCTYPE html>
+
+
+<html lang="en" data-content_root="" >
+
+  <head>
+    <meta charset="utf-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.17.1: http://docutils.sourceforge.net/" />
+
+    <title>3D Gaussian Splatting for Real-Time Radiance Field Rendering &#8212; Text-to-Image Generation-feat-Diffusion</title>
+  
+  
+  
+  <script data-cfasync="false">
+    document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
+    document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
+  </script>
+  
+  <!-- Loaded before other Sphinx assets -->
+  <link href="../../_static/styles/theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+<link href="../../_static/styles/bootstrap.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+<link href="../../_static/styles/pydata-sphinx-theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+
+  
+  <link href="../../_static/vendor/fontawesome/6.5.2/css/all.min.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+  <link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.woff2" />
+<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.woff2" />
+<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.woff2" />
+
+    <link rel="stylesheet" type="text/css" href="../../_static/pygments.css" />
+    <link rel="stylesheet" href="../../_static/styles/sphinx-book-theme.css?digest=14f4ca6b54d191a8c7657f6c759bf11a5fb86285" type="text/css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/togglebutton.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/mystnb.4510f1fc1dee50b3e5859aac5469c37c29e427902b24a333a5f9fcb2f0b3ac41.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/sphinx-thebe.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/design-style.4045f2051d55cab465a707391d5b2007.min.css" />
+  
+  <!-- Pre-loaded scripts that we'll load fully later -->
+  <link rel="preload" as="script" href="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b" />
+<link rel="preload" as="script" href="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b" />
+  <script src="../../_static/vendor/fontawesome/6.5.2/js/all.min.js?digest=dfe6caa3a7d634c4db9b"></script>
+
+    <script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
+    <script src="../../_static/jquery.js"></script>
+    <script src="../../_static/underscore.js"></script>
+    <script src="../../_static/_sphinx_javascript_frameworks_compat.js"></script>
+    <script src="../../_static/doctools.js"></script>
+    <script src="../../_static/clipboard.min.js"></script>
+    <script src="../../_static/copybutton.js"></script>
+    <script src="../../_static/scripts/sphinx-book-theme.js?digest=5a5c038af52cf7bc1a1ec88eea08e6366ee68824"></script>
+    <script>let toggleHintShow = 'Click to show';</script>
+    <script>let toggleHintHide = 'Click to hide';</script>
+    <script>let toggleOpenOnPrint = 'true';</script>
+    <script src="../../_static/togglebutton.js"></script>
+    <script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
+    <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
+    <script src="../../_static/design-tabs.js"></script>
+    <script>const THEBE_JS_URL = "https://unpkg.com/thebe@0.8.2/lib/index.js"
+const thebe_selector = ".thebe,.cell"
+const thebe_selector_input = "pre"
+const thebe_selector_output = ".output, .cell_output"
+</script>
+    <script async="async" src="../../_static/sphinx-thebe.js"></script>
+    <script>DOCUMENTATION_OPTIONS.pagename = 'docs/review/3DGS';</script>
+    <link rel="shortcut icon" href="../../_static/PseudoLab_logo.png"/>
+    <link rel="index" title="Index" href="../../genindex.html" />
+    <link rel="search" title="Search" href="../../search.html" />
+    <link rel="next" title="Point-E: A System for Generating 3D Point Clouds from Complex Prompts (Arxiv 2022)" href="Point_E.html" />
+    <link rel="prev" title="NeRF : Representing Scenes as Neural Radiance Fields for View Synthesis" href="NeRF.html" />
+  <meta name="viewport" content="width=device-width, initial-scale=1"/>
+  <meta name="docsearch:language" content="en"/>
+  </head>
+  
+  
+  <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
+
+  
+  
+  <div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
+  
+  <div id="pst-scroll-pixel-helper"></div>
+  
+  <button type="button" class="btn rounded-pill" id="pst-back-to-top">
+    <i class="fa-solid fa-arrow-up"></i>Back to top</button>
+
+  
+  <input type="checkbox"
+          class="sidebar-toggle"
+          id="pst-primary-sidebar-checkbox"/>
+  <label class="overlay overlay-primary" for="pst-primary-sidebar-checkbox"></label>
+  
+  <input type="checkbox"
+          class="sidebar-toggle"
+          id="pst-secondary-sidebar-checkbox"/>
+  <label class="overlay overlay-secondary" for="pst-secondary-sidebar-checkbox"></label>
+  
+  <div class="search-button__wrapper">
+    <div class="search-button__overlay"></div>
+    <div class="search-button__search-container">
+<form class="bd-search d-flex align-items-center"
+      action="../../search.html"
+      method="get">
+  <i class="fa-solid fa-magnifying-glass"></i>
+  <input type="search"
+         class="form-control"
+         name="q"
+         id="search-input"
+         placeholder="Search this book..."
+         aria-label="Search this book..."
+         autocomplete="off"
+         autocorrect="off"
+         autocapitalize="off"
+         spellcheck="false"/>
+  <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
+</form></div>
+  </div>
+
+  <div class="pst-async-banner-revealer d-none">
+  <aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
+</div>
+
+  
+    <header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
+    </header>
+  
+
+  <div class="bd-container">
+    <div class="bd-container__inner bd-page-width">
+      
+      
+      
+      <div class="bd-sidebar-primary bd-sidebar">
+        
+
+  
+  <div class="sidebar-header-items sidebar-primary__section">
+    
+    
+    
+    
+  </div>
+  
+    <div class="sidebar-primary-items__start sidebar-primary__section">
+        <div class="sidebar-primary-item">
+
+  
+    
+  
+
+<a class="navbar-brand logo" href="../../intro.html">
+  
+  
+  
+  
+  
+    
+    
+      
+    
+    
+    <img src="../../_static/PseudoLab_logo.png" class="logo__image only-light" alt="Text-to-Image Generation-feat-Diffusion - Home"/>
+    <script>document.write(`<img src="../../_static/PseudoLab_logo.png" class="logo__image only-dark" alt="Text-to-Image Generation-feat-Diffusion - Home"/>`);</script>
+  
+  
+</a></div>
+        <div class="sidebar-primary-item"><nav class="bd-links" id="bd-docs-nav" aria-label="Main">
+    <div class="bd-toc-item navbar-nav active">
+        
+        <ul class="nav bd-sidenav bd-sidenav__home-link">
+            <li class="toctree-l1">
+                <a class="reference internal" href="../../intro.html">
+                    Welcome to PseudoDiffusers!!
+                </a>
+            </li>
+        </ul>
+        <p aria-level="2" class="caption" role="heading"><span class="caption-text">Preliminary Works</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="vae.html">VAE</a></li>
+<li class="toctree-l1"><a class="reference internal" href="gan.html">GAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DDPM.html">DDPM</a></li>
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DDIM.html">DDIM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="A_Study_on_the_Evaluation_of_Generative_Models.html">A Study on the Evaluation of Generative Models</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Image Generation</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="cycleGAN.html">CycleGAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="StyleGAN.html">StyleGAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="diffusion_beats_GANs.html">Diffusion Models Beat GANs on Image Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="dalle.html">DALL-E</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DALLE2.html">DALL-E 2</a></li>
+<li class="toctree-l1"><a class="reference internal" href="dreambooth.html">DreamBooth</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ControlNet.html">ControlNet</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Latent_Diffusion_Model.html">Introduction</a></li>
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Textual_Inversion.html">Textual Inversion</a></li>
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="CustomDiffusion.html">Custom Diffusion</a></li>
+<li class="toctree-l1"><a class="reference internal" href="LoRA.html">LoRA</a></li>
+
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="I-DDPM.html">I-DDPM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="StyO.html">StyO</a></li>
+<li class="toctree-l1"><a class="reference internal" href="imagen.html">Imagen</a></li>
+<li class="toctree-l1"><a class="reference internal" href="imagen_editor.html">Imagen Editor</a></li>
+<li class="toctree-l1"><a class="reference internal" href="SDEdit.html">SDEdit</a></li>
+<li class="toctree-l1"><a class="reference internal" href="SDXL.html">SDXL</a></li>
+<li class="toctree-l1"><a class="reference internal" href="t2i_adapter.html">T2I-Adapter</a></li>
+<li class="toctree-l1"><a class="reference internal" href="IP_Adapter.html">IP-Adapter</a></li>
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="HyperDreamBooth.html">HyperDreamBooth</a></li>
+<li class="toctree-l1"><a class="reference internal" href="CM3leon.html">CM3leon</a></li>
+
+<li class="toctree-l1"><a class="reference internal" href="Synthetic_Data_from_Diffusion_Models_Improves_ImageNet_Classification.html">Synthetic Data from Diffusion Models Improves ImageNet Classification</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="GLIDE.html">GLIDE</a></li>
+<li class="toctree-l1"><a class="reference internal" href="BBDM.html">BBDM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Your_Diffusion_Model_is_Secretly_a_Zero_Shot_Classifier.html">Your Diffusion Model is Secretly a Zero-Shot Classifier</a></li>
+<li class="toctree-l1"><a class="reference internal" href="progressive_distillation.html">Progressive Distillation for Fast Sampling of Diffusion Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ConceptLab.html">ConceptLab</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Diffusion_models_already_have_a_Semantic_Latent_Space.html">Diffusion Models already have a Semantic Latent Space</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Muse.html">Muse</a></li>
+
+
+<li class="toctree-l1"><a class="reference internal" href="GIGAGAN.html">Scaling up GANs for Text-to-Image Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="consistency_models.html">Consistency Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="latent_consistency_models.html">Latent Consistency Models</a></li>
+
+<li class="toctree-l1"><a class="reference internal" href="LLM_grounded_Diffusion.html">LLM Grounded Diffusion</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DiT.html">DiT</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="one-step-image-translation.html">One-Step Image Translation with Text-to-Image Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="LCM-LoRA.html">LCM-LoRA: A Universal Stable-Diffusion Acceleration Module</a></li>
+
+
+<li class="toctree-l1"><a class="reference internal" href="MimicBrush.html">MimicBrush: Zero-shot Image Editing with Reference Imitation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="one_step_diffusion_with_distribution_matching_distillation.html">One-step Diffusion with Distribution Matching Distillation</a></li>
+
+
+
+
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Video Generation</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="Make_A_Video.html">Make A Video</a></li>
+<li class="toctree-l1"><a class="reference internal" href="VideoLDM.html">VideoLDM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="AnimateDiff.html">AnimateDiff</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Animate_Anyone.html">Animate Anyone</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreaMoving.html">DreaMoving</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreamPose.html">DreamPose: Fashion Image-to-Video Synthesis via Stable Diffusion</a></li>
+
+
+
+
+
+
+
+
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">3D Generation</span></p>
+<ul class="current nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="NeRF.html">NeRF : Representing Scenes as Neural Radiance Fields for View Synthesis</a></li>
+<li class="toctree-l1 current active"><a class="current reference internal" href="#">3D Gaussian Splatting for Real-Time Radiance Field Rendering</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Point_E.html">Point-E: A System for Generating 3D Point Clouds from Complex Prompts (Arxiv 2022)</a></li>
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Shap-E.html">Shap-E</a></li>
+
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DreamFusion.html"><strong>DreamFusion</strong></a></li>
+<li class="toctree-l1"><a class="reference internal" href="magic-3d.html">Magic3D</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreamBooth3D.html">Dream Booth 3D</a></li>
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="zero123plus.html">Zero123++</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ProlificDreamer.html">ProlificDreamer: High-Fidelity and Diverse Text-to-3D Generation with Variational Score Distillation</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DreamGaussian.html">DreamGaussian</a></li>
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Coin3D.html">Coin3D</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Experiments</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="../experiments/js_exp.html">Synthetic Data with Stable Diffusion for Foliar Disease Classification</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../experiments/swjo_exp.html">Training DreamBooth on Naver Webtoon Face Dataset</a></li>
+</ul>
+
+    </div>
+</nav></div>
+    </div>
+  
+  
+  <div class="sidebar-primary-items__end sidebar-primary__section">
+  </div>
+  
+  <div id="rtd-footer-container"></div>
+
+
+      </div>
+      
+      <main id="main-content" class="bd-main" role="main">
+        
+        
+
+<div class="sbt-scroll-pixel-helper"></div>
+
+          <div class="bd-content">
+            <div class="bd-article-container">
+              
+              <div class="bd-header-article d-print-none">
+<div class="header-article-items header-article__inner">
+  
+    <div class="header-article-items__start">
+      
+        <div class="header-article-item"><label class="sidebar-toggle primary-toggle btn btn-sm" for="__primary" title="Toggle primary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
+  <span class="fa-solid fa-bars"></span>
+</label></div>
+      
+    </div>
+  
+  
+    <div class="header-article-items__end">
+      
+        <div class="header-article-item">
+
+<div class="article-header-buttons">
+
+
+
+
+
+<div class="dropdown dropdown-source-buttons">
+  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Source repositories">
+    <i class="fab fa-github"></i>
+  </button>
+  <ul class="dropdown-menu">
+      
+      
+      
+      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion" target="_blank"
+   class="btn btn-sm btn-source-repository-button dropdown-item"
+   title="Source repository"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fab fa-github"></i>
+  </span>
+<span class="btn__text-container">Repository</span>
+</a>
+</li>
+      
+      
+      
+      
+      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion/issues/new?title=Issue%20on%20page%20%2Fdocs/review/3DGS.html&body=Your%20issue%20content%20here." target="_blank"
+   class="btn btn-sm btn-source-issues-button dropdown-item"
+   title="Open an issue"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-lightbulb"></i>
+  </span>
+<span class="btn__text-container">Open issue</span>
+</a>
+</li>
+      
+  </ul>
+</div>
+
+
+
+
+
+
+<div class="dropdown dropdown-download-buttons">
+  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Download this page">
+    <i class="fas fa-download"></i>
+  </button>
+  <ul class="dropdown-menu">
+      
+      
+      
+      <li><a href="../../_sources/docs/review/3DGS.md" target="_blank"
+   class="btn btn-sm btn-download-source-button dropdown-item"
+   title="Download source file"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-file"></i>
+  </span>
+<span class="btn__text-container">.md</span>
+</a>
+</li>
+      
+      
+      
+      
+      <li>
+<button onclick="window.print()"
+  class="btn btn-sm btn-download-pdf-button dropdown-item"
+  title="Print to PDF"
+  data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-file-pdf"></i>
+  </span>
+<span class="btn__text-container">.pdf</span>
+</button>
+</li>
+      
+  </ul>
+</div>
+
+
+
+
+<button onclick="toggleFullScreen()"
+  class="btn btn-sm btn-fullscreen-button"
+  title="Fullscreen mode"
+  data-bs-placement="bottom" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-expand"></i>
+  </span>
+
+</button>
+
+
+
+<script>
+document.write(`
+  <button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light"></i>
+    <i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark"></i>
+    <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"></i>
+  </button>
+`);
+</script>
+
+
+<script>
+document.write(`
+  <button class="btn btn-sm pst-navbar-icon search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <i class="fa-solid fa-magnifying-glass fa-lg"></i>
+  </button>
+`);
+</script>
+<label class="sidebar-toggle secondary-toggle btn btn-sm" for="__secondary"title="Toggle secondary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <span class="fa-solid fa-list"></span>
+</label>
+</div></div>
+      
+    </div>
+  
+</div>
+</div>
+              
+              
+
+<div id="jb-print-docs-body" class="onlyprint">
+    <h1>3D Gaussian Splatting for Real-Time Radiance Field Rendering</h1>
+    <!-- Table of contents -->
+    <div id="print-main-content">
+        <div id="jb-print-toc">
+            
+            <div>
+                <h2> Contents </h2>
+            </div>
+            <nav aria-label="Page">
+                <ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#overview">Overview</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#differentiable-3d-gaussian-splatting">Differentiable 3D Gaussian Splatting</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#optimization-with-adaptive-density-control-of-3d-gaussians">Optimization with Adaptive Density Control of 3D Gaussians</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#optimization">Optimization</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#adaptive-control-of-gaussians">Adaptive Control of Gaussians</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#fast-diffenrentiable-rasterization-for-gaussians">Fast Diffenrentiable Rasterization for Gaussians</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#results-and-evaluation">Results and Evaluation</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#limitations">Limitations</a></li>
+</ul>
+            </nav>
+        </div>
+    </div>
+</div>
+
+              
+                
+<div id="searchbox"></div>
+                <article class="bd-article">
+                  
+  <div class="admonition-information admonition">
+<p class="admonition-title">Information</p>
+<ul class="simple">
+<li><p><strong>Title:</strong> 3D Gaussian Splatting for Real-Time Radiance Field Rendering</p></li>
+<li><p><strong>Reference</strong></p>
+<ul>
+<li><p>Paper:  <a class="reference external" href="https://arxiv.org/abs/2308.04079">https://arxiv.org/abs/2308.04079</a></p></li>
+<li><p>Project: <a class="reference external" href="https://repo-sam.inria.fr/fungraph/3d-gaussian-splatting/">https://repo-sam.inria.fr/fungraph/3d-gaussian-splatting/</a></p></li>
+</ul>
+</li>
+<li><p><strong>Author:</strong> Hyunsoo Kim</p></li>
+<li><p><strong>Last updated on June. 12, 2024</strong></p></li>
+</ul>
+</div>
+<section class="tex2jax_ignore mathjax_ignore" id="d-gaussian-splatting-for-real-time-radiance-field-rendering">
+<h1>3D Gaussian Splatting for Real-Time Radiance Field Rendering<a class="headerlink" href="#d-gaussian-splatting-for-real-time-radiance-field-rendering" title="Permalink to this heading">#</a></h1>
+<p>NeRF 기반 방식들은 높은 품질의 새로운 장면 합성이 가능하지만, 시간이 많이 걸려 실시간 렌더링에는 한계가 있다. 다른 방식들은 시간은 빠르지만 품질이 떨어진다. 이를 해결하기 위해, 논문은 3D Gaussian Splatting을 제안합니다. 이 방법은 Mip-NeRF360과 유사한 성능을 제공하면서도 InstantNGP만큼 빠르게 학습할 수 있다.</p>
+<p>3D Gaussian Splatting은 다음 세 가지로 구성된다다:</p>
+<ul class="simple">
+<li><p>Structure-from-Motion(SfM)에서 얻은 희소 점 구름을 초기값으로 3D 가우시안을 도입</p></li>
+<li><p>3D 위치, 불투명도, 이방성 공분산, 구형 고조파(SH) 계수의 최적화</p></li>
+<li><p>타일 기반 래스터화에서 영감을 받은 빠른 GPU 정렬 알고리즘을 통한 실시간 렌더링</p></li>
+</ul>
+<p>이 방법은 이전의 implicit radiance field 접근법들과 동등하거나 더 나은 품질을 제공하며, 이전의 가장 빠른 방식과 유사한 학습 속도와 품질을 보여주어 고수준의 새로운 뷰 합성에 대해 처음으로 실시간 렌더링을 제공한다.</p>
+<section id="overview">
+<h2>Overview<a class="headerlink" href="#overview" title="Permalink to this heading">#</a></h2>
+<figure class="align-default" id="id1">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/image0.png"><img alt="image0" class="bg-primary mb-1" src="../../_images/image0.png" style="width: 800px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 669 </span><span class="caption-text">Main process of 3D Gaussian Splatting</span><a class="headerlink" href="#id1" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<figure class="align-default" id="id2">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/image1.png"><img alt="image1" class="bg-primary mb-1" src="../../_images/image1.png" style="width: 800px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 670 </span><span class="caption-text">Peusdo Algorithm of 3D Gaussian Splatting</span><a class="headerlink" href="#id2" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</section>
+<section id="differentiable-3d-gaussian-splatting">
+<h2>Differentiable 3D Gaussian Splatting<a class="headerlink" href="#differentiable-3d-gaussian-splatting" title="Permalink to this heading">#</a></h2>
+<p>이 논문은 normal(표면 법선)이 없는 Structure-from-Motion(SfM) 포인트들의 sparse한 셋을 initial point로 하여, 고품질의 novel view synthesis를 가능하게 하는 scene representation을 최적화하는 것을 목표로 한다.</p>
+<p>빠른 렌더링을 위해 unstructured하고 explicit한 primitive를 필요로 하며, 이를 위해 미분 가능하고 2D splats로 쉽게 project되는 3D Gaussian을 선택했다.</p>
+<figure class="align-default" id="id3">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/image2.png"><img alt="image2" class="bg-primary mb-1" src="../../_images/image2.png" style="width: 800px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 671 </span><span class="caption-text">3D Gaussian Splatting Equation 4</span><a class="headerlink" href="#id3" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>3D Gaussian은 포인트(mean) μ를 중심으로 하고, 3D 공분산 행렬 Σ로 정의한다. 렌더링을 위해 3D Gaussian을 2D로 project해야 하며, 이는 viewing transformation W에 따라 카메라 좌표계에서의 공분산 행렬 Σ’로 나타낼 수 있다. 최적화를 위해, Σ는 positive semi-definite 행렬이어야 하며, 이 때문에 최적화가 어렵다고 한다.</p>
+<figure class="align-default" id="id4">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/image3.png"><img alt="image3" class="bg-primary mb-1" src="../../_images/image3.png" style="width: 800px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 672 </span><span class="caption-text">3D Gaussian Splatting Equation 5</span><a class="headerlink" href="#id4" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>따라서 논문에서는 더 직관적이고 최적화에 적합한 representation을 선택한다. 3D Gaussian의 공분산 행렬 Σ는 타원체의 구성을 설명하는 것과 유사하며, 이를 위해 scaling matrix S와 rotation matrix R을 사용한다.</p>
+<p>scaling은 3D vector s로, rotation은 quaternion q로 표현하며, 이들은 각각의 행렬로 변환될 수 있다. 학습 동안 Auto grad(자동 미분)의 오버헤드를 피하기 위해 모든 파라미터에 대한 gradient를 명시적으로 유도한다.</p>
+<figure class="align-default" id="id5">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/image4.png"><img alt="image4" class="bg-primary mb-1" src="../../_images/image4.png" style="width: 800px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 673 </span><span class="caption-text">3D Gaussian Splatting Equation 6</span><a class="headerlink" href="#id5" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>최적화에 적합한 anisotropic covariance representation은 장면의 다양한 geometry에 적응하도록 3D Gaussian을 최적화한다.</p>
+</section>
+<section id="optimization-with-adaptive-density-control-of-3d-gaussians">
+<h2>Optimization with Adaptive Density Control of 3D Gaussians<a class="headerlink" href="#optimization-with-adaptive-density-control-of-3d-gaussians" title="Permalink to this heading">#</a></h2>
+<ul class="simple">
+<li><p><strong>Optimization</strong></p></li>
+<li><p><strong>Adaptive Control of Gaussians</strong></p></li>
+</ul>
+<p>이 논문의 핵심 접근법은 free-view synthesis를 위해 장면을 정확하게 표현하는 3D Gaussian의 밀집된 세트를 만드는 최적화 단계다. 여기에는 position 𝑝, 투명도 𝛼, 공분산 Σ뿐만 아니라, scene의 view-dependent appearance를 정확하게 위한 각 Gaussian의 색상 c를 표현하는 SH coefficients까지 포함된다.</p>
+</section>
+<section id="optimization">
+<h2>Optimization<a class="headerlink" href="#optimization" title="Permalink to this heading">#</a></h2>
+<ul class="simple">
+<li><p>3D를 2D로 project할 때 발생할 수 있는 모호함을 피하기 위해, optimization 과정에서 geometry가 더 생성되거나, 삭제되거나 혹은 이동할 수 있어야 함</p>
+<ul>
+<li><p>공분산 파라미터의 퀄리티는 큰 homogeneous area들을 적은 수의 큰 anisotropic Gaussian들로 캡처될 수 있기 때문에 representation의 compactness에 중요</p></li>
+</ul>
+</li>
+<li><p>논문은 SGD를 사용하고 일부 연산은 CUDA 커널을 사용합니다. 특히 빠른 rasterization은 최적화의 효율성에 중요합니다.</p>
+<ul>
+<li><p>이는 최적화의 주요 computation bottleneck이기 때문</p></li>
+</ul>
+</li>
+<li><p>투명도 𝛼에 대해서는 sigmoid function을, 공분산의 scale에 대해서는 exponential activation 함수를 사용</p>
+<ul>
+<li><p>initial 공분산 행렬은 가장 가까운 세 점까지의 거리의 평균을 축으로 하는 isotropic Gaussian으로 추정</p></li>
+<li><p>position에 대해서만 exponential decay 스케줄링을 사용</p></li>
+</ul>
+</li>
+</ul>
+<p>Loss function은 D-SSIM과 L1 loss를 사용하며, D-SSIM loss는 이미지의 왜곡(distortion)을 잘 반영하고 미분 가능하여 evaluation metric뿐만 아니라 loss로도 사용 가능.</p>
+<p>참고로 SSIM은 이미지의 밝기, 대조, 구조를 고려하여 두 이미지 간의 유사성을 측정하는 메트릭이다.</p>
+<figure class="align-default" id="id6">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/image6.png"><img alt="image6" class="bg-primary mb-1" src="../../_images/image6.png" style="width: 800px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 674 </span><span class="caption-text">D-SSIM equation</span><a class="headerlink" href="#id6" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<figure class="align-default" id="id7">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/image7.png"><img alt="image7" class="bg-primary mb-1" src="../../_images/image7.png" style="width: 800px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 675 </span><span class="caption-text">SSIM equation</span><a class="headerlink" href="#id7" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</section>
+<section id="adaptive-control-of-gaussians">
+<h2>Adaptive Control of Gaussians<a class="headerlink" href="#adaptive-control-of-gaussians" title="Permalink to this heading">#</a></h2>
+<p>또한 Structure-from-Motion(SfM)으로 얻은 초기 희소 점들을 시작으로, scene을 더 잘 표현하기 위해 unit volume  내 Gaussian들의 수와 밀도를 점진적으로 최적화하는 방식을 제안한다.</p>
+<ul class="simple">
+<li><p>매 100번의 반복(iter)마다 Gaussian을 추가하고, 투명도 𝛼가 일정 값보다 작은 Gaussian을 제거</p></li>
+</ul>
+<p>Adaptive Control of Gaussians는 빈 공간을 채우고, missing geometric feature이 있는 region과 하나의 Gaussian이 너무 넓은 region을 커버하는 region에 집중합니다.</p>
+<p>이러한 지역은 모두 큰 view-space positional gradient를 가지며, 최적화를 통해 Gaussian을 이동시킵니다.</p>
+<ul class="simple">
+<li><p>Under-reconstruction: 동일한 크기의 Gaussian을 복제하고 위치 기울기 방향으로 이동.</p></li>
+<li><p>Over-reconstruction: Gaussian을 두 개로 나누고, 위치는 기존 Gaussian의 PDF를 샘플링해서 초기화.</p></li>
+</ul>
+<p>최적화 과정에서 입력 카메라에 가까운 Gaussian density의 부적절한 증가를 방지하기 위해, 3000번의 반복마다 투명도 α를 0에 가깝게 설정한다.</p>
+<figure class="align-default" id="id8">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/image8.png"><img alt="image8" class="bg-primary mb-1" src="../../_images/image8.png" style="width: 800px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 676 </span><span class="caption-text">Adaptive Control of Gaussians</span><a class="headerlink" href="#id8" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</section>
+<section id="fast-diffenrentiable-rasterization-for-gaussians">
+<h2>Fast Diffenrentiable Rasterization for Gaussians<a class="headerlink" href="#fast-diffenrentiable-rasterization-for-gaussians" title="Permalink to this heading">#</a></h2>
+<p>본 논문의 목표는 대략적인 α-blending을 가능하게 하고 이전 연구에 존재하던 gradient를 얻을 수 있는 splat들의 수에 대한 강한 제약을 피하는 빠른 전체적인 렌더링과 빠른 sorting을 가지는 것이다.</p>
+<p>이러한 목표를 달성하기 위해 본 논문은 tile-based rasterization for Gaussian splats를 제안한다.</p>
+<p>tile-based rasterization은 아래와 같은 흐름으로 진행된다.</p>
+<ul class="simple">
+<li><p>Screen을 16*16으로 나누고 view frustum과 각 타일에 대해 3D Gaussian들을 cull함</p>
+<ul>
+<li><p>view frustum외에 있는 것들을 제거하는 절차</p></li>
+<li><p>16*16으로 나눈 이유는 각 타일마다 다른 GPU thread에서 계산해서 빠르게 계산하기 위함</p></li>
+</ul>
+</li>
+<li><p>guard band를 이용해 극단적인 위치(너무 가깝거나 먼)에 있는 gaussian들 또한 제거</p>
+<ul>
+<li><p>이들의 projected 2D covariance를 계산하는 것은 불안정적</p></li>
+</ul>
+</li>
+<li><p>그런 뒤, gaussian들 마다 속한 tile과 depth에 따라 key를 부여</p></li>
+<li><p>이를 GPU Radix sort를 이용해 정렬</p></li>
+<li><p>각 타일마다 front-to-back으로 color와 α값을 accumulate해서 픽셀 값을 구함</p></li>
+</ul>
+</section>
+<section id="results-and-evaluation">
+<h2>Results and Evaluation<a class="headerlink" href="#results-and-evaluation" title="Permalink to this heading">#</a></h2>
+<ul class="simple">
+<li><p>Results and Evaluation
+데이터셋에 따라 결과는 다르지만 SOTA이상의 퀄리티를 내면서 좋은 Training time과 FPS를 보인다.</p></li>
+</ul>
+<figure class="align-default" id="id9">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/image12.png"><img alt="image12" class="bg-primary mb-1" src="../../_images/image12.png" style="width: 800px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 677 </span><span class="caption-text">3D Gaussian Splatting experiments table</span><a class="headerlink" href="#id9" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<figure class="align-default" id="id10">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/image13.png"><img alt="image13" class="bg-primary mb-1" src="../../_images/image13.png" style="width: 800px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 678 </span><span class="caption-text">3D Gaussian Splatting experiments result</span><a class="headerlink" href="#id10" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p>Ablations</p></li>
+</ul>
+<figure class="align-default" id="id11">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/image19.png"><img alt="image19" class="bg-primary mb-1" src="../../_images/image19.png" style="width: 800px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 679 </span><span class="caption-text">3D Gaussian Splatting ablation test image</span><a class="headerlink" href="#id11" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<figure class="align-default" id="id12">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/image18.png"><img alt="image18" class="bg-primary mb-1" src="../../_images/image18.png" style="width: 800px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 680 </span><span class="caption-text">3D Gaussian Splatting result image</span><a class="headerlink" href="#id12" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</section>
+<section id="limitations">
+<h2>Limitations<a class="headerlink" href="#limitations" title="Permalink to this heading">#</a></h2>
+<ul class="simple">
+<li><p>이전의 방식들과 유사하게 잘 관측되지 않은 장면은 artifact들이 존재</p></li>
+<li><p>이전의 방식들과 유사하게 늘어지고 얼룩진 artifact를 생성할 수 있음</p></li>
+<li><p>최적화에서 거대한 Gaussian이 만들어지면 popping artifacts 가끔 발생</p></li>
+<li><p>최적화에서 regularization을 적용하지 않음</p></li>
+<li><p>NeRF-based 기법들보다 memory consumption이 상당히 높음</p></li>
+</ul>
+</section>
+</section>
+
+    <script type="text/x-thebe-config">
+    {
+        requestKernel: true,
+        binderOptions: {
+            repo: "binder-examples/jupyter-stacks-datascience",
+            ref: "master",
+        },
+        codeMirrorConfig: {
+            theme: "abcdef",
+            mode: "python"
+        },
+        kernelOptions: {
+            name: "python3",
+            path: "./docs\review"
+        },
+        predefinedOutput: true
+    }
+    </script>
+    <script>kernelName = 'python3'</script>
+
+                </article>
+              
+
+              
+              
+              
+              
+                <footer class="prev-next-footer d-print-none">
+                  
+<div class="prev-next-area">
+    <a class="left-prev"
+       href="NeRF.html"
+       title="previous page">
+      <i class="fa-solid fa-angle-left"></i>
+      <div class="prev-next-info">
+        <p class="prev-next-subtitle">previous</p>
+        <p class="prev-next-title">NeRF : Representing Scenes as Neural Radiance Fields for View Synthesis</p>
+      </div>
+    </a>
+    <a class="right-next"
+       href="Point_E.html"
+       title="next page">
+      <div class="prev-next-info">
+        <p class="prev-next-subtitle">next</p>
+        <p class="prev-next-title">Point-E: A System for Generating 3D Point Clouds from Complex Prompts (Arxiv 2022)</p>
+      </div>
+      <i class="fa-solid fa-angle-right"></i>
+    </a>
+</div>
+                </footer>
+              
+            </div>
+            
+            
+              
+                <div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
+
+
+  <div class="sidebar-secondary-item">
+  <div class="page-toc tocsection onthispage">
+    <i class="fa-solid fa-list"></i> Contents
+  </div>
+  <nav class="bd-toc-nav page-toc">
+    <ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#overview">Overview</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#differentiable-3d-gaussian-splatting">Differentiable 3D Gaussian Splatting</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#optimization-with-adaptive-density-control-of-3d-gaussians">Optimization with Adaptive Density Control of 3D Gaussians</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#optimization">Optimization</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#adaptive-control-of-gaussians">Adaptive Control of Gaussians</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#fast-diffenrentiable-rasterization-for-gaussians">Fast Diffenrentiable Rasterization for Gaussians</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#results-and-evaluation">Results and Evaluation</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#limitations">Limitations</a></li>
+</ul>
+  </nav></div>
+
+</div></div>
+              
+            
+          </div>
+          <footer class="bd-footer-content">
+            
+<div class="bd-footer-content__inner container">
+  
+  <div class="footer-item">
+    
+<p class="component-author">
+By PseudoLab
+</p>
+
+  </div>
+  
+  <div class="footer-item">
+    
+
+  <p class="copyright">
+    
+      © Copyright 2022.
+      <br/>
+    
+  </p>
+
+  </div>
+  
+  <div class="footer-item">
+    
+  </div>
+  
+  <div class="footer-item">
+    
+  </div>
+  
+</div>
+          </footer>
+        
+
+      </main>
+    </div>
+  </div>
+  
+  <!-- Scripts loaded after <body> so the DOM is not blocked -->
+  <script src="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b"></script>
+<script src="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b"></script>
+
+  <footer class="bd-footer">
+  </footer>
+  </body>
 </html>
\ No newline at end of file
diff --git a/docs/review/A_Study_on_the_Evaluation_of_Generative_Models.html b/docs/review/A_Study_on_the_Evaluation_of_Generative_Models.html
old mode 100644
new mode 100755
index 861fcbea..b4a7ccc3
--- a/docs/review/A_Study_on_the_Evaluation_of_Generative_Models.html
+++ b/docs/review/A_Study_on_the_Evaluation_of_Generative_Models.html
@@ -1,992 +1,1012 @@
-
-
-<!DOCTYPE html>
-
-
-<html lang="en" data-content_root="" >
-
-  <head>
-    <meta charset="utf-8" />
-    <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.18.1: http://docutils.sourceforge.net/" />
-
-    <title>A Study on the Evaluation of Generative Models &#8212; Text-to-Image Generation-feat-Diffusion</title>
-  
-  
-  
-  <script data-cfasync="false">
-    document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
-    document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
-  </script>
-  
-  <!-- Loaded before other Sphinx assets -->
-  <link href="../../_static/styles/theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-<link href="../../_static/styles/bootstrap.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-<link href="../../_static/styles/pydata-sphinx-theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-
-  
-  <link href="../../_static/vendor/fontawesome/6.5.2/css/all.min.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-  <link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.woff2" />
-<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.woff2" />
-<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.woff2" />
-
-    <link rel="stylesheet" type="text/css" href="../../_static/pygments.css" />
-    <link rel="stylesheet" href="../../_static/styles/sphinx-book-theme.css?digest=14f4ca6b54d191a8c7657f6c759bf11a5fb86285" type="text/css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/togglebutton.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/mystnb.4510f1fc1dee50b3e5859aac5469c37c29e427902b24a333a5f9fcb2f0b3ac41.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/sphinx-thebe.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/design-style.4045f2051d55cab465a707391d5b2007.min.css" />
-  
-  <!-- Pre-loaded scripts that we'll load fully later -->
-  <link rel="preload" as="script" href="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b" />
-<link rel="preload" as="script" href="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b" />
-  <script src="../../_static/vendor/fontawesome/6.5.2/js/all.min.js?digest=dfe6caa3a7d634c4db9b"></script>
-
-    <script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
-    <script src="../../_static/jquery.js"></script>
-    <script src="../../_static/underscore.js"></script>
-    <script src="../../_static/_sphinx_javascript_frameworks_compat.js"></script>
-    <script src="../../_static/doctools.js"></script>
-    <script src="../../_static/clipboard.min.js"></script>
-    <script src="../../_static/copybutton.js"></script>
-    <script src="../../_static/scripts/sphinx-book-theme.js?digest=5a5c038af52cf7bc1a1ec88eea08e6366ee68824"></script>
-    <script>let toggleHintShow = 'Click to show';</script>
-    <script>let toggleHintHide = 'Click to hide';</script>
-    <script>let toggleOpenOnPrint = 'true';</script>
-    <script src="../../_static/togglebutton.js"></script>
-    <script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
-    <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
-    <script src="../../_static/design-tabs.js"></script>
-    <script>const THEBE_JS_URL = "https://unpkg.com/thebe@0.8.2/lib/index.js"
-const thebe_selector = ".thebe,.cell"
-const thebe_selector_input = "pre"
-const thebe_selector_output = ".output, .cell_output"
-</script>
-    <script async="async" src="../../_static/sphinx-thebe.js"></script>
-    <script>window.MathJax = {"options": {"processHtmlClass": "tex2jax_process|mathjax_process|math|output_area"}}</script>
-    <script defer="defer" src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
-    <script>DOCUMENTATION_OPTIONS.pagename = 'docs/review/A_Study_on_the_Evaluation_of_Generative_Models';</script>
-    <link rel="shortcut icon" href="../../_static/PseudoLab_logo.png"/>
-    <link rel="index" title="Index" href="../../genindex.html" />
-    <link rel="search" title="Search" href="../../search.html" />
-    <link rel="next" title="CycleGAN" href="cycleGAN.html" />
-    <link rel="prev" title="DDIM" href="DDIM.html" />
-  <meta name="viewport" content="width=device-width, initial-scale=1"/>
-  <meta name="docsearch:language" content="en"/>
-  </head>
-  
-  
-  <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
-
-  
-  
-  <div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
-  
-  <div id="pst-scroll-pixel-helper"></div>
-  
-  <button type="button" class="btn rounded-pill" id="pst-back-to-top">
-    <i class="fa-solid fa-arrow-up"></i>Back to top</button>
-
-  
-  <input type="checkbox"
-          class="sidebar-toggle"
-          id="pst-primary-sidebar-checkbox"/>
-  <label class="overlay overlay-primary" for="pst-primary-sidebar-checkbox"></label>
-  
-  <input type="checkbox"
-          class="sidebar-toggle"
-          id="pst-secondary-sidebar-checkbox"/>
-  <label class="overlay overlay-secondary" for="pst-secondary-sidebar-checkbox"></label>
-  
-  <div class="search-button__wrapper">
-    <div class="search-button__overlay"></div>
-    <div class="search-button__search-container">
-<form class="bd-search d-flex align-items-center"
-      action="../../search.html"
-      method="get">
-  <i class="fa-solid fa-magnifying-glass"></i>
-  <input type="search"
-         class="form-control"
-         name="q"
-         id="search-input"
-         placeholder="Search this book..."
-         aria-label="Search this book..."
-         autocomplete="off"
-         autocorrect="off"
-         autocapitalize="off"
-         spellcheck="false"/>
-  <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
-</form></div>
-  </div>
-
-  <div class="pst-async-banner-revealer d-none">
-  <aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
-</div>
-
-  
-    <header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
-    </header>
-  
-
-  <div class="bd-container">
-    <div class="bd-container__inner bd-page-width">
-      
-      
-      
-      <div class="bd-sidebar-primary bd-sidebar">
-        
-
-  
-  <div class="sidebar-header-items sidebar-primary__section">
-    
-    
-    
-    
-  </div>
-  
-    <div class="sidebar-primary-items__start sidebar-primary__section">
-        <div class="sidebar-primary-item">
-
-  
-    
-  
-
-<a class="navbar-brand logo" href="../../intro.html">
-  
-  
-  
-  
-  
-    
-    
-      
-    
-    
-    <img src="../../_static/PseudoLab_logo.png" class="logo__image only-light" alt="Text-to-Image Generation-feat-Diffusion - Home"/>
-    <script>document.write(`<img src="../../_static/PseudoLab_logo.png" class="logo__image only-dark" alt="Text-to-Image Generation-feat-Diffusion - Home"/>`);</script>
-  
-  
-</a></div>
-        <div class="sidebar-primary-item"><nav class="bd-links" id="bd-docs-nav" aria-label="Main">
-    <div class="bd-toc-item navbar-nav active">
-        
-        <ul class="nav bd-sidenav bd-sidenav__home-link">
-            <li class="toctree-l1">
-                <a class="reference internal" href="../../intro.html">
-                    Welcome to PseudoDiffusers!!
-                </a>
-            </li>
-        </ul>
-        <p aria-level="2" class="caption" role="heading"><span class="caption-text">Preliminary Works</span></p>
-<ul class="current nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="vae.html">VAE</a></li>
-<li class="toctree-l1"><a class="reference internal" href="gan.html">GAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DDPM.html">DDPM</a></li>
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="DDIM.html">DDIM</a></li>
-<li class="toctree-l1 current active"><a class="current reference internal" href="#">A Study on the Evaluation of Generative Models</a></li>
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Image Generation</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="cycleGAN.html">CycleGAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="StyleGAN.html">StyleGAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="diffusion_beats_GANs.html">Diffusion Models Beat GANs on Image Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="dalle.html">DALL-E</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DALLE2.html">DALL-E 2</a></li>
-<li class="toctree-l1"><a class="reference internal" href="dreambooth.html">DreamBooth</a></li>
-<li class="toctree-l1"><a class="reference internal" href="ControlNet.html">ControlNet</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Latent_Diffusion_Model.html">Introduction</a></li>
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="Textual_Inversion.html">Textual Inversion</a></li>
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="CustomDiffusion.html">Custom Diffusion</a></li>
-<li class="toctree-l1"><a class="reference internal" href="LoRA.html">LoRA</a></li>
-
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="I-DDPM.html">I-DDPM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="StyO.html">StyO</a></li>
-<li class="toctree-l1"><a class="reference internal" href="imagen.html">Imagen</a></li>
-<li class="toctree-l1"><a class="reference internal" href="imagen_editor.html">Imagen Editor</a></li>
-<li class="toctree-l1"><a class="reference internal" href="SDEdit.html">SDEdit</a></li>
-<li class="toctree-l1"><a class="reference internal" href="SDXL.html">SDXL</a></li>
-<li class="toctree-l1"><a class="reference internal" href="t2i_adapter.html">T2I-Adapter</a></li>
-<li class="toctree-l1"><a class="reference internal" href="IP_Adapter.html">IP-Adapter</a></li>
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="HyperDreamBooth.html">HyperDreamBooth</a></li>
-<li class="toctree-l1"><a class="reference internal" href="CM3leon.html">CM3leon</a></li>
-
-<li class="toctree-l1"><a class="reference internal" href="Synthetic_Data_from_Diffusion_Models_Improves_ImageNet_Classification.html">Synthetic Data from Diffusion Models Improves ImageNet Classification</a></li>
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="GLIDE.html">GLIDE</a></li>
-<li class="toctree-l1"><a class="reference internal" href="BBDM.html">BBDM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Your_Diffusion_Model_is_Secretly_a_Zero_Shot_Classifier.html">Your Diffusion Model is Secretly a Zero-Shot Classifier</a></li>
-<li class="toctree-l1"><a class="reference internal" href="progressive_distillation.html">Progressive Distillation for Fast Sampling of Diffusion Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="ConceptLab.html">ConceptLab</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Diffusion_models_already_have_a_Semantic_Latent_Space.html">Diffusion Models already have a Semantic Latent Space</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Muse.html">Muse</a></li>
-
-
-<li class="toctree-l1"><a class="reference internal" href="GIGAGAN.html">Scaling up GANs for Text-to-Image Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="consistency_models.html">Consistency Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="latent_consistency_models.html">Latent Consistency Models</a></li>
-
-<li class="toctree-l1"><a class="reference internal" href="LLM_grounded_Diffusion.html">LLM Grounded Diffusion</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DiT.html">DiT</a></li>
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="one-step-image-translation.html">One-Step Image Translation with Text-to-Image Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="LCM-LoRA.html">LCM-LoRA: A Universal Stable-Diffusion Acceleration Module</a></li>
-
-
-<li class="toctree-l1"><a class="reference internal" href="MimicBrush.html">MimicBrush: Zero-shot Image Editing with Reference Imitation</a></li>
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Video Generation</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="Make_A_Video.html">Make A Video</a></li>
-<li class="toctree-l1"><a class="reference internal" href="VideoLDM.html">VideoLDM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="AnimateDiff.html">AnimateDiff</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Animate_Anyone.html">Animate Anyone</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreaMoving.html">DreaMoving</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreamPose.html">DreamPose: Fashion Image-to-Video Synthesis via Stable Diffusion</a></li>
-
-
-
-
-
-
-
-
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">3D Generation</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="NeRF.html">NeRF : Representing Scenes as Neural Radiance Fields for View Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="3DGS.html">3D Gaussian Splatting for Real-Time Radiance Field Rendering</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Point_E.html">Point-E: A System for Generating 3D Point Clouds from Complex Prompts (Arxiv 2022)</a></li>
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="Shap-E.html">Shap-E</a></li>
-
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="DreamFusion.html"><strong>DreamFusion</strong></a></li>
-<li class="toctree-l1"><a class="reference internal" href="magic-3d.html">Magic3D</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreamBooth3D.html">Dream Booth 3D</a></li>
-
-
-
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Experiments</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="../experiments/js_exp.html">Synthetic Data with Stable Diffusion for Foliar Disease Classification</a></li>
-<li class="toctree-l1"><a class="reference internal" href="../experiments/swjo_exp.html">Training DreamBooth on Naver Webtoon Face Dataset</a></li>
-</ul>
-
-    </div>
-</nav></div>
-    </div>
-  
-  
-  <div class="sidebar-primary-items__end sidebar-primary__section">
-  </div>
-  
-  <div id="rtd-footer-container"></div>
-
-
-      </div>
-      
-      <main id="main-content" class="bd-main" role="main">
-        
-        
-
-<div class="sbt-scroll-pixel-helper"></div>
-
-          <div class="bd-content">
-            <div class="bd-article-container">
-              
-              <div class="bd-header-article d-print-none">
-<div class="header-article-items header-article__inner">
-  
-    <div class="header-article-items__start">
-      
-        <div class="header-article-item"><label class="sidebar-toggle primary-toggle btn btn-sm" for="__primary" title="Toggle primary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
-  <span class="fa-solid fa-bars"></span>
-</label></div>
-      
-    </div>
-  
-  
-    <div class="header-article-items__end">
-      
-        <div class="header-article-item">
-
-<div class="article-header-buttons">
-
-
-
-
-
-<div class="dropdown dropdown-source-buttons">
-  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Source repositories">
-    <i class="fab fa-github"></i>
-  </button>
-  <ul class="dropdown-menu">
-      
-      
-      
-      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion" target="_blank"
-   class="btn btn-sm btn-source-repository-button dropdown-item"
-   title="Source repository"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fab fa-github"></i>
-  </span>
-<span class="btn__text-container">Repository</span>
-</a>
-</li>
-      
-      
-      
-      
-      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion/issues/new?title=Issue%20on%20page%20%2Fdocs/review/A_Study_on_the_Evaluation_of_Generative_Models.html&body=Your%20issue%20content%20here." target="_blank"
-   class="btn btn-sm btn-source-issues-button dropdown-item"
-   title="Open an issue"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-lightbulb"></i>
-  </span>
-<span class="btn__text-container">Open issue</span>
-</a>
-</li>
-      
-  </ul>
-</div>
-
-
-
-
-
-
-<div class="dropdown dropdown-download-buttons">
-  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Download this page">
-    <i class="fas fa-download"></i>
-  </button>
-  <ul class="dropdown-menu">
-      
-      
-      
-      <li><a href="../../_sources/docs/review/A_Study_on_the_Evaluation_of_Generative_Models.md" target="_blank"
-   class="btn btn-sm btn-download-source-button dropdown-item"
-   title="Download source file"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-file"></i>
-  </span>
-<span class="btn__text-container">.md</span>
-</a>
-</li>
-      
-      
-      
-      
-      <li>
-<button onclick="window.print()"
-  class="btn btn-sm btn-download-pdf-button dropdown-item"
-  title="Print to PDF"
-  data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-file-pdf"></i>
-  </span>
-<span class="btn__text-container">.pdf</span>
-</button>
-</li>
-      
-  </ul>
-</div>
-
-
-
-
-<button onclick="toggleFullScreen()"
-  class="btn btn-sm btn-fullscreen-button"
-  title="Fullscreen mode"
-  data-bs-placement="bottom" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-expand"></i>
-  </span>
-
-</button>
-
-
-
-<script>
-document.write(`
-  <button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light"></i>
-    <i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark"></i>
-    <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"></i>
-  </button>
-`);
-</script>
-
-
-<script>
-document.write(`
-  <button class="btn btn-sm pst-navbar-icon search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <i class="fa-solid fa-magnifying-glass fa-lg"></i>
-  </button>
-`);
-</script>
-<label class="sidebar-toggle secondary-toggle btn btn-sm" for="__secondary"title="Toggle secondary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <span class="fa-solid fa-list"></span>
-</label>
-</div></div>
-      
-    </div>
-  
-</div>
-</div>
-              
-              
-
-<div id="jb-print-docs-body" class="onlyprint">
-    <h1>A Study on the Evaluation of Generative Models</h1>
-    <!-- Table of contents -->
-    <div id="print-main-content">
-        <div id="jb-print-toc">
-            
-            <div>
-                <h2> Contents </h2>
-            </div>
-            <nav aria-label="Page">
-                <ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#id1">학습 자료</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#abstract">0. Abstract</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">1. Introduction</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#background">2. BackGround</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#kl-divergence-kullback-leibler-divergence">2.1. KL-Divergence(Kullback-Leibler divergence)</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#inception-score-is">2.2. Inception Score(IS)</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#fid-frechet-inception-distance">2.3. FiD(Fréchet Inception Distance)</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#kernel-inception-distance">2.4. Kernel Inception Distance</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#fid-is">2.5. FID∞ &amp; IS∞</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#clean-fid">2.5. Clean FiD</a></li>
-</ul>
-</li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#synthetic-dataset-as-a-benchmark">3. Synthetic dataset as a benchmark</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#comparison-between-evaluation-metrics">4. Comparison between evaluation metrics</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#is-inception-all-we-need">5. Is Inception all we need?</a></li>
-</ul>
-            </nav>
-        </div>
-    </div>
-</div>
-
-              
-                
-<div id="searchbox"></div>
-                <article class="bd-article">
-                  
-  <section class="tex2jax_ignore mathjax_ignore" id="a-study-on-the-evaluation-of-generative-models">
-<h1>A Study on the Evaluation of Generative Models<a class="headerlink" href="#a-study-on-the-evaluation-of-generative-models" title="Permalink to this heading">#</a></h1>
-<section id="id1">
-<h2>학습 자료<a class="headerlink" href="#id1" title="Permalink to this heading">#</a></h2>
-<p>A Study on the Evaluation of Generative Models</p>
-<p><a class="reference external" href="https://arxiv.org/pdf/2206.10935.pdf">https://arxiv.org/pdf/2206.10935.pdf</a></p>
-</section>
-<hr class="docutils" />
-<section id="abstract">
-<h2>0. Abstract<a class="headerlink" href="#abstract" title="Permalink to this heading">#</a></h2>
-<ul class="simple">
-<li><p>GAN, Diffusion등 생성 모델의 놀라운 발전이 이어지고있다.</p></li>
-<li><p>다만 이러한 생성모델을 평가하는 척도(metric)의 선정은 아직 어려운 문제로 남아있다.</p></li>
-<li><p>그나마 Inception Score(IS)나, FID Score를 통해 모델을 평가하고있지만 이 metric들도 완전하지 않음</p></li>
-<li><p>이 논문을 통해</p>
-<ul>
-<li><p>생성 평가의 지표에 대해 한번더 고찰하고</p></li>
-<li><p>현존하는 Metric에 대한 방향을 제시</p></li>
-</ul>
-</li>
-</ul>
-</section>
-<section id="introduction">
-<h2>1. Introduction<a class="headerlink" href="#introduction" title="Permalink to this heading">#</a></h2>
-<ul class="simple">
-<li><p>최근 GAN, Diffusion 등 Implicit generative model들이 뛰어난 성능을 보여줌</p></li>
-<li><p>하지만 다른 task(classification, segmentation 등)와는 다르게 생성 모델의 metric을 정하는것은 challenging ( classification ; P&amp;R, F1 score / segmentation ; IOU(Intersection Over Union)</p></li>
-<li><p>그나마 이미지의 featue map이나 classfier score를 사용하는 FiD, Inception score가 잘 쓰이는 추세</p></li>
-<li><p>위 metric의 단점</p>
-<ol class="arabic simple">
-<li><p>real 이미지 분포의 space에서 해당 수치가 정말 유의미한 연관이 있는지 증명되지 않음</p></li>
-<li><p>pretrained model의 거대한 train set이 specific 이미지의 feature에 얼마나 좋은 성능을 미치는지 알수 없음(inception net ; imagenet / ddpm ; face)</p></li>
-</ol>
-</li>
-<li><p>Human study의 직관적인 방식도 있지만 time과 cost를 매우 필요로한다는 점과 model의 Diversity는 측정하기 어렵다는 단점</p>
-<ul>
-<li><p>e.g ) 하나의 좋은 이미지만 생성해도 좋은 score를 받을 수 있음</p></li>
-</ul>
-</li>
-<li><p>이 논문에서는</p>
-<ol class="arabic simple">
-<li><p>Image-GPT 모델을 통해 high quality의 new synthetic dataset을 생성</p></li>
-<li><p>여러 모델을 위의 데이터로 학습하고 FiD, IS등 다양한 metric을 측정</p></li>
-<li><p>이를 실제 KL Divergence, Reverse KL Divergence 값과 비교해서 metric의 유효성을 검증</p></li>
-<li><p>FID, IS등 다양한 metric의 base model로 쓰이는 Inception-V3과 CLIP 의 비교를 통해 Inception-V3 모델의 적합성을 검증</p></li>
-</ol>
-</li>
-</ul>
-</section>
-<section id="background">
-<h2>2. BackGround<a class="headerlink" href="#background" title="Permalink to this heading">#</a></h2>
-<section id="kl-divergence-kullback-leibler-divergence">
-<h3>2.1. KL-Divergence(Kullback-Leibler divergence)<a class="headerlink" href="#kl-divergence-kullback-leibler-divergence" title="Permalink to this heading">#</a></h3>
-<ul class="simple">
-<li><p>두 확률분포의 유사도를 측정하는 지표</p></li>
-</ul>
-<div class="math notranslate nohighlight">
-\[
-KL(P || Q) = \sum_{x} P(x) \log\left(\frac{P(x)}{Q(x)}\right)
-\]</div>
-<ul class="simple">
-<li><p>특징</p>
-<ul>
-<li><p>lower is better</p></li>
-<li><p>KL ≥ 0, (KL(p, q) = 0, if p ==q)</p></li>
-<li><p>KL(p, q) ≠ KL(q, p)  // not symmetric</p></li>
-<li><p>Reverse Kullback-Leibler Divergence(RKL) = KL(q, p)</p></li>
-<li><p>대부분 P가 True distribution, Q가 estimated distribution</p></li>
-</ul>
-</li>
-</ul>
-</section>
-<section id="inception-score-is">
-<h3>2.2. Inception Score(IS)<a class="headerlink" href="#inception-score-is" title="Permalink to this heading">#</a></h3>
-<ul>
-<li><p>생성된 이미지의 Fidelity와 Diversity를 측정</p>
-<ul>
-<li><p>fidelity : 특정 Label의 이미지를 얼마나 잘 예측하는지</p></li>
-<li><p>diversity : 다양한 class의 이미지들을 얼마나 고르게 생성해내는지</p>
-<figure class="align-default" id="id2">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img0.png"><img alt="A_Study_on_the_Evaluation_of_Generative_Models_01" class="bg-primary mb-1" src="../../_images/img0.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 36 </span><span class="caption-text">Image 1</span><a class="headerlink" href="#id2" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</li>
-</ul>
-</li>
-</ul>
-<div class="math notranslate nohighlight">
-\[
-\text{IS}(G) = \exp\left(\mathbb{E}_x \left[D_{\text{KL}}(P(y|x) \, || \, P(y))\right]\right)
-\]</div>
-<ul class="simple">
-<li><p>특징</p>
-<ul>
-<li><p><span class="math notranslate nohighlight">\(P(y|x)\)</span> ; 모델의 Fidelity, <span class="math notranslate nohighlight">\(P(y)\)</span>; 모델의 Diversity</p></li>
-<li><p>higher is better</p></li>
-</ul>
-</li>
-</ul>
-</section>
-<section id="fid-frechet-inception-distance">
-<h3>2.3. FiD(Fréchet Inception Distance)<a class="headerlink" href="#fid-frechet-inception-distance" title="Permalink to this heading">#</a></h3>
-<ul class="simple">
-<li><p>real 이미지와 generated 이미지의 Feature vector를 추출 후 평균과 공분산을 통해 계산(Frechet distance)하는 평가지표</p></li>
-</ul>
-<div class="math notranslate nohighlight">
-\[
-FID = \lVert \mu_x - \mu_g \rVert^2 + \text{Tr}(\Sigma_x + \Sigma_g - 2(\Sigma_x\Sigma_g)^{1/2})
-\]</div>
-<ul class="simple">
-<li><p>특징</p>
-<ul>
-<li><p>Inception-V3의 마지막 pooling layer의 feature map을 사용</p></li>
-<li><p>Lower is better</p></li>
-<li><p><span class="math notranslate nohighlight">\(\mu_x - \mu_g\)</span>; 이미지의 Quality를 측정</p></li>
-<li><p><span class="math notranslate nohighlight">\(\text{Tr}(\Sigma_x + \Sigma_g - 2(\Sigma_x\Sigma_g)^{1/2}\)</span>; 모델의 Diversity를 측정</p></li>
-</ul>
-</li>
-</ul>
-</section>
-<section id="kernel-inception-distance">
-<h3>2.4. Kernel Inception Distance<a class="headerlink" href="#kernel-inception-distance" title="Permalink to this heading">#</a></h3>
-<ul class="simple">
-<li><p>FiD에서 Frechet distance를 사용하는 대신 kernel trick을 사용해 확률 분포의 유사도를 계산</p></li>
-<li><p>특징</p>
-<ul>
-<li><p>적은 데이터셋의 평가에 효과적임</p></li>
-<li><p>FiD metric보다 속도가 오래걸림 (FiD : O(n), KiD : O(n^2))</p></li>
-</ul>
-</li>
-</ul>
-</section>
-<section id="fid-is">
-<h3>2.5. FID∞ &amp; IS∞<a class="headerlink" href="#fid-is" title="Permalink to this heading">#</a></h3>
-<ul>
-<li><p><a class="reference external" href="https://arxiv.org/pdf/1911.07023.pdf">해당 논문</a>에서 FiD와 IS metric에 bias가 있음을 증명하고 dataset의 sampling 기법을 변경(gaussian random sampling → sobol sequence sampling)하여 unbiased 한 metric을 제안</p>
-<figure class="align-default" id="id3">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img1.png"><img alt="A_Study_on_the_Evaluation_of_Generative_Models_02" class="bg-primary mb-1" src="../../_images/img1.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 37 </span><span class="caption-text">Image 2</span><a class="headerlink" href="#id3" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</li>
-</ul>
-</section>
-<section id="clean-fid">
-<h3>2.5. Clean FiD<a class="headerlink" href="#clean-fid" title="Permalink to this heading">#</a></h3>
-<ul class="simple">
-<li><p>Inception-v3에 이미지를 통과하기위해 image resize 과정이 포함되는데 이는 score값에 영향을 줄수 있어 best percformance의 metric을 측정하기 위한 all in one process를 제안</p></li>
-</ul>
-</section>
-</section>
-<section id="synthetic-dataset-as-a-benchmark">
-<h2>3. Synthetic dataset as a benchmark<a class="headerlink" href="#synthetic-dataset-as-a-benchmark" title="Permalink to this heading">#</a></h2>
-<figure class="align-default" id="id4">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img2.png"><img alt="A_Study_on_the_Evaluation_of_Generative_Models_03" class="bg-primary mb-1" src="../../_images/img2.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 38 </span><span class="caption-text">Image 3</span><a class="headerlink" href="#id4" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul>
-<li><p>imagenet의 데이터를 ImageGPT를 통해 재생성(a.k.a. NotImageNet)</p>
-<ul>
-<li><p>imageGPT</p>
-<ul>
-<li><p>vision 분야에 transformer(in gpt-2)를 사용 + labeling dataset이 필요없는 자기지도 학습 방식</p>
-<figure class="align-default" id="id5">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img3.png"><img alt="A_Study_on_the_Evaluation_of_Generative_Models_04" class="bg-primary mb-1" src="../../_images/img3.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 39 </span><span class="caption-text">Image 4</span><a class="headerlink" href="#id5" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</li>
-<li><p>imagenet challenge에서도 상당한 score를 보임</p></li>
-</ul>
-</li>
-</ul>
-</li>
-<li><p>이를 생성모델에 통과한 <span class="math notranslate nohighlight">\(P_{2}(\hat{x})\)</span>과 <span class="math notranslate nohighlight">\(P_{1}(\hat{x})\)</span> 두 분포를 비교</p></li>
-<li><p>한계</p>
-<ul class="simple">
-<li><p>explicit model에만 적용 가능하고 implicit model에는 적용할 수 없음</p></li>
-<li><p>explicit model : 생성되는 데이터의 분포를 명시적으로 모델링하여 학습하고 주로 Gaussian Noise로부터 이미지를 생성 (VAE …)</p></li>
-<li><p>implicit model : 데이터의 생성 과정에 대해 학습하고 주로 주어진 데이터 분포로부터 샘플링하여 학습 (GAN …)</p></li>
-</ul>
-</li>
-</ul>
-</section>
-<section id="comparison-between-evaluation-metrics">
-<h2>4. Comparison between evaluation metrics<a class="headerlink" href="#comparison-between-evaluation-metrics" title="Permalink to this heading">#</a></h2>
-<p><strong>4.1. Volatility</strong></p>
-<figure class="align-default" id="id6">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img4.png"><img alt="A_Study_on_the_Evaluation_of_Generative_Models_05" class="bg-primary mb-1" src="../../_images/img4.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 40 </span><span class="caption-text">Image 5</span><a class="headerlink" href="#id6" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p>KL, RKL은 적은 양의 Epoch(15-20) 후에 바로 수렴하는 방면 FID와 IS는 큰 변동성을 보임</p></li>
-<li><p>모델의 Capacity가 증가할수록 KL과 RKL의 수치가 개선되는 것을 확인</p></li>
-<li><p>FID나 IS가 KL, RKL의 그래프와 매우 다른 형태를 띄는것을 확인(특히 IS)</p></li>
-</ul>
-<figure class="align-default" id="id7">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img5.png"><img alt="A_Study_on_the_Evaluation_of_Generative_Models_06" class="bg-primary mb-1" src="../../_images/img5.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 41 </span><span class="caption-text">Image 6</span><a class="headerlink" href="#id7" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p>FID나 (negative)IS가 KL과는 높은 colleration을 보이지만 RKL과는 높지 않은 colleration을 보인다.</p></li>
-<li><p>모델의 Capacity에 따라 KL, RKL의 수치 변화는 크지 않은 데 반해 FID나 IS는 굉장히 큰 수치의 변화를 보여준다.</p></li>
-</ul>
-<p><strong>4.1. Ranking Colleration</strong></p>
-<ul>
-<li><p>여러 모델에 대해 metric 별로 순위를 매겨 순위의 유사도를 비교</p></li>
-<li><p>Kendall’s τ</p>
-<ul class="simple">
-<li><p>ranking이 매겨진 수열 사이의 유사도를 측정</p></li>
-</ul>
-<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="kn">from</span> <span class="nn">scipy</span> <span class="kn">import</span> <span class="n">stats</span>
-<span class="o">&gt;&gt;&gt;</span> <span class="n">h</span> <span class="o">=</span> <span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">,</span> <span class="mi">5</span><span class="p">]</span>
-<span class="o">&gt;&gt;&gt;</span> <span class="n">w</span> <span class="o">=</span> <span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">,</span> <span class="mi">5</span><span class="p">]</span>
-<span class="o">&gt;&gt;&gt;</span> <span class="n">z</span> <span class="o">=</span> <span class="p">[</span><span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">5</span><span class="p">]</span>
-<span class="o">&gt;&gt;&gt;</span> <span class="n">stats</span><span class="o">.</span><span class="n">kendalltau</span><span class="p">(</span><span class="n">h</span><span class="p">,</span> <span class="n">w</span><span class="p">)</span>
-<span class="n">SignificanceResult</span><span class="p">(</span><span class="n">statistic</span><span class="o">=</span><span class="mf">0.9999999999999999</span><span class="p">,</span> <span class="n">pvalue</span><span class="o">=</span><span class="mf">0.016666666666666666</span><span class="p">)</span>
-<span class="o">&gt;&gt;&gt;</span> <span class="n">stats</span><span class="o">.</span><span class="n">kendalltau</span><span class="p">(</span><span class="n">h</span><span class="p">,</span> <span class="n">w</span><span class="p">)</span>
-<span class="n">SignificanceResult</span><span class="p">(</span><span class="n">statistic</span><span class="o">=</span><span class="mf">0.19999999999999998</span><span class="p">,</span> <span class="n">pvalue</span><span class="o">=</span><span class="mf">0.8166666666666667</span><span class="p">)</span>
-</pre></div>
-</div>
-</li>
-<li><p>Result</p>
-<figure class="align-default" id="id8">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img6.png"><img alt="A_Study_on_the_Evaluation_of_Generative_Models_07" class="bg-primary mb-1" src="../../_images/img6.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 42 </span><span class="caption-text">Image 7</span><a class="headerlink" href="#id8" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p>KL - RKL의 유사도는 매우 높음(0.889)</p></li>
-<li><p>KL과의 유사도를 비교해보면 FID infinity &gt; FID &gt; IS</p></li>
-<li><p>CleanFID-KID(0.96)을 제외한 나머지 metric간 유사도는 굉장히 낮음</p></li>
-<li><p>Inception network 기반의 metric 중에서는 FID infinity이 가장 높고, IS와 IS infinity score가 가장 낮음</p></li>
-</ul>
-</li>
-</ul>
-</section>
-<section id="is-inception-all-we-need">
-<h2>5. Is Inception all we need?<a class="headerlink" href="#is-inception-all-we-need" title="Permalink to this heading">#</a></h2>
-<ul>
-<li><p>FID, Inception Score 등 대부분의 metric이 이미지의 feature 혹은 score 측정을 위해 inception-v3를 사용하는데 과연 적절한가?</p></li>
-<li><p>가정</p>
-<ul class="simple">
-<li><p>FID, FID infinity는 feature space가 gaussian distribution을 따른다는 가정하에 측정되는 score</p></li>
-</ul>
-</li>
-<li><p>실험</p>
-<ol class="arabic simple">
-<li><p>따라서 생성 모델을 통해 10K의 이미지를 생성하고</p></li>
-<li><p>원본의 20K의 이미지를 sampling</p></li>
-<li><p>각각의 이미지를 Inception network와 CLIP network를 통해 feature vector를 추출</p></li>
-<li><p>Gaussian model에 feature vector를 fitting</p></li>
-<li><p>이때 gaussian model을 기반으로 각 샘플의 확률값을 계산한다.</p></li>
-</ol>
-</li>
-<li><p>결과</p>
-<figure class="align-default" id="id9">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img7.png"><img alt="A_Study_on_the_Evaluation_of_Generative_Models_08" class="bg-primary mb-1" src="../../_images/img7.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 43 </span><span class="caption-text">Image 8</span><a class="headerlink" href="#id9" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p>확률 값이 낮은 tail 부분의 feature vector의 원본 이미지들을 퀄리티가 낮아야함</p></li>
-<li><p>실제로 tail 부분의 확률을 갖는 이미지들을 확인해보면 CLIP을 보면 확실히 퀄리티가 떨어지는 반면 Inception의 이미지들은 좋은 퀄리티를 보이고 있음 → Gaussian 분포의 가정에 위배</p></li>
-</ul>
-</li>
-</ul>
-<p><strong>5.2 Normality test for latent representation</strong></p>
-<ul>
-<li><p>위의 feature vector들을 1 Dimension에 투영시켜 normal distribution을 따르는 지 확인한다.</p></li>
-<li><p>실험</p>
-<ol class="arabic simple">
-<li><p>Inception, CLIP을 통해 feature vector를 추출한다.</p></li>
-<li><p>linear transformation 연산을 통해 각각 1-D로 투영시킨다.</p></li>
-<li><p>각각의 p-value를 구한다.</p>
-<ol class="arabic simple">
-<li><p>p-value : 어떠한 사건이 우연히 일어날 확률</p></li>
-<li><p>if p-value &lt; 0.05 ; 우연히 발생할 확률이 거의 없다. 인과관계가 있다.</p></li>
-<li><p>if p-value &gt; 0.05 ; 우연히 발생할 확률이 크다. 인과관계가 없다.</p></li>
-<li><p>gaussian normal distribution은 random을 기반으로하기때문에 인과관계가 작아야한다. 즉, p-value가 커야한다.</p></li>
-</ol>
-</li>
-</ol>
-</li>
-<li><p>결과</p>
-<figure class="align-default" id="id10">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img8.png"><img alt="A_Study_on_the_Evaluation_of_Generative_Models_09" class="bg-primary mb-1" src="../../_images/img8.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 44 </span><span class="caption-text">Image 9</span><a class="headerlink" href="#id10" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p>모든 test dataset에 대해 CLIP의 p-value값은 0.05를 넘어 random성을 유지하지만, Inception은 0.05보다 낮은 값을 보여 random성을 유지하지 못한다.</p></li>
-<li><p>따라서, Inception net을 통한 metric 측정보다 CLIP을 통한 metric 측정을 제안한다.</p></li>
-</ul>
-</li>
-</ul>
-</section>
-</section>
-
-    <script type="text/x-thebe-config">
-    {
-        requestKernel: true,
-        binderOptions: {
-            repo: "binder-examples/jupyter-stacks-datascience",
-            ref: "master",
-        },
-        codeMirrorConfig: {
-            theme: "abcdef",
-            mode: "python"
-        },
-        kernelOptions: {
-            name: "python3",
-            path: "./docs/review"
-        },
-        predefinedOutput: true
-    }
-    </script>
-    <script>kernelName = 'python3'</script>
-
-                </article>
-              
-
-              
-              
-              
-              
-                <footer class="prev-next-footer d-print-none">
-                  
-<div class="prev-next-area">
-    <a class="left-prev"
-       href="DDIM.html"
-       title="previous page">
-      <i class="fa-solid fa-angle-left"></i>
-      <div class="prev-next-info">
-        <p class="prev-next-subtitle">previous</p>
-        <p class="prev-next-title">DDIM</p>
-      </div>
-    </a>
-    <a class="right-next"
-       href="cycleGAN.html"
-       title="next page">
-      <div class="prev-next-info">
-        <p class="prev-next-subtitle">next</p>
-        <p class="prev-next-title">CycleGAN</p>
-      </div>
-      <i class="fa-solid fa-angle-right"></i>
-    </a>
-</div>
-                </footer>
-              
-            </div>
-            
-            
-              
-                <div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
-
-
-  <div class="sidebar-secondary-item">
-  <div class="page-toc tocsection onthispage">
-    <i class="fa-solid fa-list"></i> Contents
-  </div>
-  <nav class="bd-toc-nav page-toc">
-    <ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#id1">학습 자료</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#abstract">0. Abstract</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">1. Introduction</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#background">2. BackGround</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#kl-divergence-kullback-leibler-divergence">2.1. KL-Divergence(Kullback-Leibler divergence)</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#inception-score-is">2.2. Inception Score(IS)</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#fid-frechet-inception-distance">2.3. FiD(Fréchet Inception Distance)</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#kernel-inception-distance">2.4. Kernel Inception Distance</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#fid-is">2.5. FID∞ &amp; IS∞</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#clean-fid">2.5. Clean FiD</a></li>
-</ul>
-</li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#synthetic-dataset-as-a-benchmark">3. Synthetic dataset as a benchmark</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#comparison-between-evaluation-metrics">4. Comparison between evaluation metrics</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#is-inception-all-we-need">5. Is Inception all we need?</a></li>
-</ul>
-  </nav></div>
-
-</div></div>
-              
-            
-          </div>
-          <footer class="bd-footer-content">
-            
-<div class="bd-footer-content__inner container">
-  
-  <div class="footer-item">
-    
-<p class="component-author">
-By PseudoLab
-</p>
-
-  </div>
-  
-  <div class="footer-item">
-    
-
-  <p class="copyright">
-    
-      © Copyright 2022.
-      <br/>
-    
-  </p>
-
-  </div>
-  
-  <div class="footer-item">
-    
-  </div>
-  
-  <div class="footer-item">
-    
-  </div>
-  
-</div>
-          </footer>
-        
-
-      </main>
-    </div>
-  </div>
-  
-  <!-- Scripts loaded after <body> so the DOM is not blocked -->
-  <script src="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b"></script>
-<script src="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b"></script>
-
-  <footer class="bd-footer">
-  </footer>
-  </body>
+
+
+<!DOCTYPE html>
+
+
+<html lang="en" data-content_root="" >
+
+  <head>
+    <meta charset="utf-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.17.1: http://docutils.sourceforge.net/" />
+
+    <title>A Study on the Evaluation of Generative Models &#8212; Text-to-Image Generation-feat-Diffusion</title>
+  
+  
+  
+  <script data-cfasync="false">
+    document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
+    document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
+  </script>
+  
+  <!-- Loaded before other Sphinx assets -->
+  <link href="../../_static/styles/theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+<link href="../../_static/styles/bootstrap.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+<link href="../../_static/styles/pydata-sphinx-theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+
+  
+  <link href="../../_static/vendor/fontawesome/6.5.2/css/all.min.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+  <link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.woff2" />
+<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.woff2" />
+<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.woff2" />
+
+    <link rel="stylesheet" type="text/css" href="../../_static/pygments.css" />
+    <link rel="stylesheet" href="../../_static/styles/sphinx-book-theme.css?digest=14f4ca6b54d191a8c7657f6c759bf11a5fb86285" type="text/css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/togglebutton.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/mystnb.4510f1fc1dee50b3e5859aac5469c37c29e427902b24a333a5f9fcb2f0b3ac41.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/sphinx-thebe.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/design-style.4045f2051d55cab465a707391d5b2007.min.css" />
+  
+  <!-- Pre-loaded scripts that we'll load fully later -->
+  <link rel="preload" as="script" href="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b" />
+<link rel="preload" as="script" href="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b" />
+  <script src="../../_static/vendor/fontawesome/6.5.2/js/all.min.js?digest=dfe6caa3a7d634c4db9b"></script>
+
+    <script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
+    <script src="../../_static/jquery.js"></script>
+    <script src="../../_static/underscore.js"></script>
+    <script src="../../_static/_sphinx_javascript_frameworks_compat.js"></script>
+    <script src="../../_static/doctools.js"></script>
+    <script src="../../_static/clipboard.min.js"></script>
+    <script src="../../_static/copybutton.js"></script>
+    <script src="../../_static/scripts/sphinx-book-theme.js?digest=5a5c038af52cf7bc1a1ec88eea08e6366ee68824"></script>
+    <script>let toggleHintShow = 'Click to show';</script>
+    <script>let toggleHintHide = 'Click to hide';</script>
+    <script>let toggleOpenOnPrint = 'true';</script>
+    <script src="../../_static/togglebutton.js"></script>
+    <script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
+    <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
+    <script src="../../_static/design-tabs.js"></script>
+    <script>const THEBE_JS_URL = "https://unpkg.com/thebe@0.8.2/lib/index.js"
+const thebe_selector = ".thebe,.cell"
+const thebe_selector_input = "pre"
+const thebe_selector_output = ".output, .cell_output"
+</script>
+    <script async="async" src="../../_static/sphinx-thebe.js"></script>
+    <script>window.MathJax = {"options": {"processHtmlClass": "tex2jax_process|mathjax_process|math|output_area"}}</script>
+    <script defer="defer" src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
+    <script>DOCUMENTATION_OPTIONS.pagename = 'docs/review/A_Study_on_the_Evaluation_of_Generative_Models';</script>
+    <link rel="shortcut icon" href="../../_static/PseudoLab_logo.png"/>
+    <link rel="index" title="Index" href="../../genindex.html" />
+    <link rel="search" title="Search" href="../../search.html" />
+    <link rel="next" title="CycleGAN" href="cycleGAN.html" />
+    <link rel="prev" title="DDIM" href="DDIM.html" />
+  <meta name="viewport" content="width=device-width, initial-scale=1"/>
+  <meta name="docsearch:language" content="en"/>
+  </head>
+  
+  
+  <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
+
+  
+  
+  <div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
+  
+  <div id="pst-scroll-pixel-helper"></div>
+  
+  <button type="button" class="btn rounded-pill" id="pst-back-to-top">
+    <i class="fa-solid fa-arrow-up"></i>Back to top</button>
+
+  
+  <input type="checkbox"
+          class="sidebar-toggle"
+          id="pst-primary-sidebar-checkbox"/>
+  <label class="overlay overlay-primary" for="pst-primary-sidebar-checkbox"></label>
+  
+  <input type="checkbox"
+          class="sidebar-toggle"
+          id="pst-secondary-sidebar-checkbox"/>
+  <label class="overlay overlay-secondary" for="pst-secondary-sidebar-checkbox"></label>
+  
+  <div class="search-button__wrapper">
+    <div class="search-button__overlay"></div>
+    <div class="search-button__search-container">
+<form class="bd-search d-flex align-items-center"
+      action="../../search.html"
+      method="get">
+  <i class="fa-solid fa-magnifying-glass"></i>
+  <input type="search"
+         class="form-control"
+         name="q"
+         id="search-input"
+         placeholder="Search this book..."
+         aria-label="Search this book..."
+         autocomplete="off"
+         autocorrect="off"
+         autocapitalize="off"
+         spellcheck="false"/>
+  <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
+</form></div>
+  </div>
+
+  <div class="pst-async-banner-revealer d-none">
+  <aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
+</div>
+
+  
+    <header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
+    </header>
+  
+
+  <div class="bd-container">
+    <div class="bd-container__inner bd-page-width">
+      
+      
+      
+      <div class="bd-sidebar-primary bd-sidebar">
+        
+
+  
+  <div class="sidebar-header-items sidebar-primary__section">
+    
+    
+    
+    
+  </div>
+  
+    <div class="sidebar-primary-items__start sidebar-primary__section">
+        <div class="sidebar-primary-item">
+
+  
+    
+  
+
+<a class="navbar-brand logo" href="../../intro.html">
+  
+  
+  
+  
+  
+    
+    
+      
+    
+    
+    <img src="../../_static/PseudoLab_logo.png" class="logo__image only-light" alt="Text-to-Image Generation-feat-Diffusion - Home"/>
+    <script>document.write(`<img src="../../_static/PseudoLab_logo.png" class="logo__image only-dark" alt="Text-to-Image Generation-feat-Diffusion - Home"/>`);</script>
+  
+  
+</a></div>
+        <div class="sidebar-primary-item"><nav class="bd-links" id="bd-docs-nav" aria-label="Main">
+    <div class="bd-toc-item navbar-nav active">
+        
+        <ul class="nav bd-sidenav bd-sidenav__home-link">
+            <li class="toctree-l1">
+                <a class="reference internal" href="../../intro.html">
+                    Welcome to PseudoDiffusers!!
+                </a>
+            </li>
+        </ul>
+        <p aria-level="2" class="caption" role="heading"><span class="caption-text">Preliminary Works</span></p>
+<ul class="current nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="vae.html">VAE</a></li>
+<li class="toctree-l1"><a class="reference internal" href="gan.html">GAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DDPM.html">DDPM</a></li>
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DDIM.html">DDIM</a></li>
+<li class="toctree-l1 current active"><a class="current reference internal" href="#">A Study on the Evaluation of Generative Models</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Image Generation</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="cycleGAN.html">CycleGAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="StyleGAN.html">StyleGAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="diffusion_beats_GANs.html">Diffusion Models Beat GANs on Image Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="dalle.html">DALL-E</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DALLE2.html">DALL-E 2</a></li>
+<li class="toctree-l1"><a class="reference internal" href="dreambooth.html">DreamBooth</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ControlNet.html">ControlNet</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Latent_Diffusion_Model.html">Introduction</a></li>
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Textual_Inversion.html">Textual Inversion</a></li>
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="CustomDiffusion.html">Custom Diffusion</a></li>
+<li class="toctree-l1"><a class="reference internal" href="LoRA.html">LoRA</a></li>
+
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="I-DDPM.html">I-DDPM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="StyO.html">StyO</a></li>
+<li class="toctree-l1"><a class="reference internal" href="imagen.html">Imagen</a></li>
+<li class="toctree-l1"><a class="reference internal" href="imagen_editor.html">Imagen Editor</a></li>
+<li class="toctree-l1"><a class="reference internal" href="SDEdit.html">SDEdit</a></li>
+<li class="toctree-l1"><a class="reference internal" href="SDXL.html">SDXL</a></li>
+<li class="toctree-l1"><a class="reference internal" href="t2i_adapter.html">T2I-Adapter</a></li>
+<li class="toctree-l1"><a class="reference internal" href="IP_Adapter.html">IP-Adapter</a></li>
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="HyperDreamBooth.html">HyperDreamBooth</a></li>
+<li class="toctree-l1"><a class="reference internal" href="CM3leon.html">CM3leon</a></li>
+
+<li class="toctree-l1"><a class="reference internal" href="Synthetic_Data_from_Diffusion_Models_Improves_ImageNet_Classification.html">Synthetic Data from Diffusion Models Improves ImageNet Classification</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="GLIDE.html">GLIDE</a></li>
+<li class="toctree-l1"><a class="reference internal" href="BBDM.html">BBDM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Your_Diffusion_Model_is_Secretly_a_Zero_Shot_Classifier.html">Your Diffusion Model is Secretly a Zero-Shot Classifier</a></li>
+<li class="toctree-l1"><a class="reference internal" href="progressive_distillation.html">Progressive Distillation for Fast Sampling of Diffusion Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ConceptLab.html">ConceptLab</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Diffusion_models_already_have_a_Semantic_Latent_Space.html">Diffusion Models already have a Semantic Latent Space</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Muse.html">Muse</a></li>
+
+
+<li class="toctree-l1"><a class="reference internal" href="GIGAGAN.html">Scaling up GANs for Text-to-Image Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="consistency_models.html">Consistency Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="latent_consistency_models.html">Latent Consistency Models</a></li>
+
+<li class="toctree-l1"><a class="reference internal" href="LLM_grounded_Diffusion.html">LLM Grounded Diffusion</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DiT.html">DiT</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="one-step-image-translation.html">One-Step Image Translation with Text-to-Image Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="LCM-LoRA.html">LCM-LoRA: A Universal Stable-Diffusion Acceleration Module</a></li>
+
+
+<li class="toctree-l1"><a class="reference internal" href="MimicBrush.html">MimicBrush: Zero-shot Image Editing with Reference Imitation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="one_step_diffusion_with_distribution_matching_distillation.html">One-step Diffusion with Distribution Matching Distillation</a></li>
+
+
+
+
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Video Generation</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="Make_A_Video.html">Make A Video</a></li>
+<li class="toctree-l1"><a class="reference internal" href="VideoLDM.html">VideoLDM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="AnimateDiff.html">AnimateDiff</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Animate_Anyone.html">Animate Anyone</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreaMoving.html">DreaMoving</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreamPose.html">DreamPose: Fashion Image-to-Video Synthesis via Stable Diffusion</a></li>
+
+
+
+
+
+
+
+
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">3D Generation</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="NeRF.html">NeRF : Representing Scenes as Neural Radiance Fields for View Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="3DGS.html">3D Gaussian Splatting for Real-Time Radiance Field Rendering</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Point_E.html">Point-E: A System for Generating 3D Point Clouds from Complex Prompts (Arxiv 2022)</a></li>
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Shap-E.html">Shap-E</a></li>
+
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DreamFusion.html"><strong>DreamFusion</strong></a></li>
+<li class="toctree-l1"><a class="reference internal" href="magic-3d.html">Magic3D</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreamBooth3D.html">Dream Booth 3D</a></li>
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="zero123plus.html">Zero123++</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ProlificDreamer.html">ProlificDreamer: High-Fidelity and Diverse Text-to-3D Generation with Variational Score Distillation</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DreamGaussian.html">DreamGaussian</a></li>
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Coin3D.html">Coin3D</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Experiments</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="../experiments/js_exp.html">Synthetic Data with Stable Diffusion for Foliar Disease Classification</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../experiments/swjo_exp.html">Training DreamBooth on Naver Webtoon Face Dataset</a></li>
+</ul>
+
+    </div>
+</nav></div>
+    </div>
+  
+  
+  <div class="sidebar-primary-items__end sidebar-primary__section">
+  </div>
+  
+  <div id="rtd-footer-container"></div>
+
+
+      </div>
+      
+      <main id="main-content" class="bd-main" role="main">
+        
+        
+
+<div class="sbt-scroll-pixel-helper"></div>
+
+          <div class="bd-content">
+            <div class="bd-article-container">
+              
+              <div class="bd-header-article d-print-none">
+<div class="header-article-items header-article__inner">
+  
+    <div class="header-article-items__start">
+      
+        <div class="header-article-item"><label class="sidebar-toggle primary-toggle btn btn-sm" for="__primary" title="Toggle primary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
+  <span class="fa-solid fa-bars"></span>
+</label></div>
+      
+    </div>
+  
+  
+    <div class="header-article-items__end">
+      
+        <div class="header-article-item">
+
+<div class="article-header-buttons">
+
+
+
+
+
+<div class="dropdown dropdown-source-buttons">
+  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Source repositories">
+    <i class="fab fa-github"></i>
+  </button>
+  <ul class="dropdown-menu">
+      
+      
+      
+      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion" target="_blank"
+   class="btn btn-sm btn-source-repository-button dropdown-item"
+   title="Source repository"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fab fa-github"></i>
+  </span>
+<span class="btn__text-container">Repository</span>
+</a>
+</li>
+      
+      
+      
+      
+      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion/issues/new?title=Issue%20on%20page%20%2Fdocs/review/A_Study_on_the_Evaluation_of_Generative_Models.html&body=Your%20issue%20content%20here." target="_blank"
+   class="btn btn-sm btn-source-issues-button dropdown-item"
+   title="Open an issue"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-lightbulb"></i>
+  </span>
+<span class="btn__text-container">Open issue</span>
+</a>
+</li>
+      
+  </ul>
+</div>
+
+
+
+
+
+
+<div class="dropdown dropdown-download-buttons">
+  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Download this page">
+    <i class="fas fa-download"></i>
+  </button>
+  <ul class="dropdown-menu">
+      
+      
+      
+      <li><a href="../../_sources/docs/review/A_Study_on_the_Evaluation_of_Generative_Models.md" target="_blank"
+   class="btn btn-sm btn-download-source-button dropdown-item"
+   title="Download source file"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-file"></i>
+  </span>
+<span class="btn__text-container">.md</span>
+</a>
+</li>
+      
+      
+      
+      
+      <li>
+<button onclick="window.print()"
+  class="btn btn-sm btn-download-pdf-button dropdown-item"
+  title="Print to PDF"
+  data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-file-pdf"></i>
+  </span>
+<span class="btn__text-container">.pdf</span>
+</button>
+</li>
+      
+  </ul>
+</div>
+
+
+
+
+<button onclick="toggleFullScreen()"
+  class="btn btn-sm btn-fullscreen-button"
+  title="Fullscreen mode"
+  data-bs-placement="bottom" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-expand"></i>
+  </span>
+
+</button>
+
+
+
+<script>
+document.write(`
+  <button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light"></i>
+    <i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark"></i>
+    <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"></i>
+  </button>
+`);
+</script>
+
+
+<script>
+document.write(`
+  <button class="btn btn-sm pst-navbar-icon search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <i class="fa-solid fa-magnifying-glass fa-lg"></i>
+  </button>
+`);
+</script>
+<label class="sidebar-toggle secondary-toggle btn btn-sm" for="__secondary"title="Toggle secondary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <span class="fa-solid fa-list"></span>
+</label>
+</div></div>
+      
+    </div>
+  
+</div>
+</div>
+              
+              
+
+<div id="jb-print-docs-body" class="onlyprint">
+    <h1>A Study on the Evaluation of Generative Models</h1>
+    <!-- Table of contents -->
+    <div id="print-main-content">
+        <div id="jb-print-toc">
+            
+            <div>
+                <h2> Contents </h2>
+            </div>
+            <nav aria-label="Page">
+                <ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#id1">학습 자료</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#abstract">0. Abstract</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">1. Introduction</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#background">2. BackGround</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#kl-divergence-kullback-leibler-divergence">2.1. KL-Divergence(Kullback-Leibler divergence)</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#inception-score-is">2.2. Inception Score(IS)</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#fid-frechet-inception-distance">2.3. FiD(Fréchet Inception Distance)</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#kernel-inception-distance">2.4. Kernel Inception Distance</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#fid-is">2.5. FID∞ &amp; IS∞</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#clean-fid">2.5. Clean FiD</a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#synthetic-dataset-as-a-benchmark">3. Synthetic dataset as a benchmark</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#comparison-between-evaluation-metrics">4. Comparison between evaluation metrics</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#is-inception-all-we-need">5. Is Inception all we need?</a></li>
+</ul>
+            </nav>
+        </div>
+    </div>
+</div>
+
+              
+                
+<div id="searchbox"></div>
+                <article class="bd-article">
+                  
+  <section class="tex2jax_ignore mathjax_ignore" id="a-study-on-the-evaluation-of-generative-models">
+<h1>A Study on the Evaluation of Generative Models<a class="headerlink" href="#a-study-on-the-evaluation-of-generative-models" title="Permalink to this heading">#</a></h1>
+<section id="id1">
+<h2>학습 자료<a class="headerlink" href="#id1" title="Permalink to this heading">#</a></h2>
+<p>A Study on the Evaluation of Generative Models</p>
+<p><a class="reference external" href="https://arxiv.org/pdf/2206.10935.pdf">https://arxiv.org/pdf/2206.10935.pdf</a></p>
+</section>
+<hr class="docutils" />
+<section id="abstract">
+<h2>0. Abstract<a class="headerlink" href="#abstract" title="Permalink to this heading">#</a></h2>
+<ul class="simple">
+<li><p>GAN, Diffusion등 생성 모델의 놀라운 발전이 이어지고있다.</p></li>
+<li><p>다만 이러한 생성모델을 평가하는 척도(metric)의 선정은 아직 어려운 문제로 남아있다.</p></li>
+<li><p>그나마 Inception Score(IS)나, FID Score를 통해 모델을 평가하고있지만 이 metric들도 완전하지 않음</p></li>
+<li><p>이 논문을 통해</p>
+<ul>
+<li><p>생성 평가의 지표에 대해 한번더 고찰하고</p></li>
+<li><p>현존하는 Metric에 대한 방향을 제시</p></li>
+</ul>
+</li>
+</ul>
+</section>
+<section id="introduction">
+<h2>1. Introduction<a class="headerlink" href="#introduction" title="Permalink to this heading">#</a></h2>
+<ul class="simple">
+<li><p>최근 GAN, Diffusion 등 Implicit generative model들이 뛰어난 성능을 보여줌</p></li>
+<li><p>하지만 다른 task(classification, segmentation 등)와는 다르게 생성 모델의 metric을 정하는것은 challenging ( classification ; P&amp;R, F1 score / segmentation ; IOU(Intersection Over Union)</p></li>
+<li><p>그나마 이미지의 featue map이나 classfier score를 사용하는 FiD, Inception score가 잘 쓰이는 추세</p></li>
+<li><p>위 metric의 단점</p>
+<ol class="arabic simple">
+<li><p>real 이미지 분포의 space에서 해당 수치가 정말 유의미한 연관이 있는지 증명되지 않음</p></li>
+<li><p>pretrained model의 거대한 train set이 specific 이미지의 feature에 얼마나 좋은 성능을 미치는지 알수 없음(inception net ; imagenet / ddpm ; face)</p></li>
+</ol>
+</li>
+<li><p>Human study의 직관적인 방식도 있지만 time과 cost를 매우 필요로한다는 점과 model의 Diversity는 측정하기 어렵다는 단점</p>
+<ul>
+<li><p>e.g ) 하나의 좋은 이미지만 생성해도 좋은 score를 받을 수 있음</p></li>
+</ul>
+</li>
+<li><p>이 논문에서는</p>
+<ol class="arabic simple">
+<li><p>Image-GPT 모델을 통해 high quality의 new synthetic dataset을 생성</p></li>
+<li><p>여러 모델을 위의 데이터로 학습하고 FiD, IS등 다양한 metric을 측정</p></li>
+<li><p>이를 실제 KL Divergence, Reverse KL Divergence 값과 비교해서 metric의 유효성을 검증</p></li>
+<li><p>FID, IS등 다양한 metric의 base model로 쓰이는 Inception-V3과 CLIP 의 비교를 통해 Inception-V3 모델의 적합성을 검증</p></li>
+</ol>
+</li>
+</ul>
+</section>
+<section id="background">
+<h2>2. BackGround<a class="headerlink" href="#background" title="Permalink to this heading">#</a></h2>
+<section id="kl-divergence-kullback-leibler-divergence">
+<h3>2.1. KL-Divergence(Kullback-Leibler divergence)<a class="headerlink" href="#kl-divergence-kullback-leibler-divergence" title="Permalink to this heading">#</a></h3>
+<ul class="simple">
+<li><p>두 확률분포의 유사도를 측정하는 지표</p></li>
+</ul>
+<div class="math notranslate nohighlight">
+\[
+KL(P || Q) = \sum_{x} P(x) \log\left(\frac{P(x)}{Q(x)}\right)
+\]</div>
+<ul class="simple">
+<li><p>특징</p>
+<ul>
+<li><p>lower is better</p></li>
+<li><p>KL ≥ 0, (KL(p, q) = 0, if p ==q)</p></li>
+<li><p>KL(p, q) ≠ KL(q, p)  // not symmetric</p></li>
+<li><p>Reverse Kullback-Leibler Divergence(RKL) = KL(q, p)</p></li>
+<li><p>대부분 P가 True distribution, Q가 estimated distribution</p></li>
+</ul>
+</li>
+</ul>
+</section>
+<section id="inception-score-is">
+<h3>2.2. Inception Score(IS)<a class="headerlink" href="#inception-score-is" title="Permalink to this heading">#</a></h3>
+<ul>
+<li><p>생성된 이미지의 Fidelity와 Diversity를 측정</p>
+<ul>
+<li><p>fidelity : 특정 Label의 이미지를 얼마나 잘 예측하는지</p></li>
+<li><p>diversity : 다양한 class의 이미지들을 얼마나 고르게 생성해내는지</p>
+<figure class="align-default" id="id2">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img0.png"><img alt="A_Study_on_the_Evaluation_of_Generative_Models_01" class="bg-primary mb-1" src="../../_images/img0.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 36 </span><span class="caption-text">Image 1</span><a class="headerlink" href="#id2" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</li>
+</ul>
+</li>
+</ul>
+<div class="math notranslate nohighlight">
+\[
+\text{IS}(G) = \exp\left(\mathbb{E}_x \left[D_{\text{KL}}(P(y|x) \, || \, P(y))\right]\right)
+\]</div>
+<ul class="simple">
+<li><p>특징</p>
+<ul>
+<li><p><span class="math notranslate nohighlight">\(P(y|x)\)</span> ; 모델의 Fidelity, <span class="math notranslate nohighlight">\(P(y)\)</span>; 모델의 Diversity</p></li>
+<li><p>higher is better</p></li>
+</ul>
+</li>
+</ul>
+</section>
+<section id="fid-frechet-inception-distance">
+<h3>2.3. FiD(Fréchet Inception Distance)<a class="headerlink" href="#fid-frechet-inception-distance" title="Permalink to this heading">#</a></h3>
+<ul class="simple">
+<li><p>real 이미지와 generated 이미지의 Feature vector를 추출 후 평균과 공분산을 통해 계산(Frechet distance)하는 평가지표</p></li>
+</ul>
+<div class="math notranslate nohighlight">
+\[
+FID = \lVert \mu_x - \mu_g \rVert^2 + \text{Tr}(\Sigma_x + \Sigma_g - 2(\Sigma_x\Sigma_g)^{1/2})
+\]</div>
+<ul class="simple">
+<li><p>특징</p>
+<ul>
+<li><p>Inception-V3의 마지막 pooling layer의 feature map을 사용</p></li>
+<li><p>Lower is better</p></li>
+<li><p><span class="math notranslate nohighlight">\(\mu_x - \mu_g\)</span>; 이미지의 Quality를 측정</p></li>
+<li><p><span class="math notranslate nohighlight">\(\text{Tr}(\Sigma_x + \Sigma_g - 2(\Sigma_x\Sigma_g)^{1/2}\)</span>; 모델의 Diversity를 측정</p></li>
+</ul>
+</li>
+</ul>
+</section>
+<section id="kernel-inception-distance">
+<h3>2.4. Kernel Inception Distance<a class="headerlink" href="#kernel-inception-distance" title="Permalink to this heading">#</a></h3>
+<ul class="simple">
+<li><p>FiD에서 Frechet distance를 사용하는 대신 kernel trick을 사용해 확률 분포의 유사도를 계산</p></li>
+<li><p>특징</p>
+<ul>
+<li><p>적은 데이터셋의 평가에 효과적임</p></li>
+<li><p>FiD metric보다 속도가 오래걸림 (FiD : O(n), KiD : O(n^2))</p></li>
+</ul>
+</li>
+</ul>
+</section>
+<section id="fid-is">
+<h3>2.5. FID∞ &amp; IS∞<a class="headerlink" href="#fid-is" title="Permalink to this heading">#</a></h3>
+<ul>
+<li><p><a class="reference external" href="https://arxiv.org/pdf/1911.07023.pdf">해당 논문</a>에서 FiD와 IS metric에 bias가 있음을 증명하고 dataset의 sampling 기법을 변경(gaussian random sampling → sobol sequence sampling)하여 unbiased 한 metric을 제안</p>
+<figure class="align-default" id="id3">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img1.png"><img alt="A_Study_on_the_Evaluation_of_Generative_Models_02" class="bg-primary mb-1" src="../../_images/img1.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 37 </span><span class="caption-text">Image 2</span><a class="headerlink" href="#id3" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</li>
+</ul>
+</section>
+<section id="clean-fid">
+<h3>2.5. Clean FiD<a class="headerlink" href="#clean-fid" title="Permalink to this heading">#</a></h3>
+<ul class="simple">
+<li><p>Inception-v3에 이미지를 통과하기위해 image resize 과정이 포함되는데 이는 score값에 영향을 줄수 있어 best percformance의 metric을 측정하기 위한 all in one process를 제안</p></li>
+</ul>
+</section>
+</section>
+<section id="synthetic-dataset-as-a-benchmark">
+<h2>3. Synthetic dataset as a benchmark<a class="headerlink" href="#synthetic-dataset-as-a-benchmark" title="Permalink to this heading">#</a></h2>
+<figure class="align-default" id="id4">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img2.png"><img alt="A_Study_on_the_Evaluation_of_Generative_Models_03" class="bg-primary mb-1" src="../../_images/img2.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 38 </span><span class="caption-text">Image 3</span><a class="headerlink" href="#id4" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul>
+<li><p>imagenet의 데이터를 ImageGPT를 통해 재생성(a.k.a. NotImageNet)</p>
+<ul>
+<li><p>imageGPT</p>
+<ul>
+<li><p>vision 분야에 transformer(in gpt-2)를 사용 + labeling dataset이 필요없는 자기지도 학습 방식</p>
+<figure class="align-default" id="id5">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img3.png"><img alt="A_Study_on_the_Evaluation_of_Generative_Models_04" class="bg-primary mb-1" src="../../_images/img3.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 39 </span><span class="caption-text">Image 4</span><a class="headerlink" href="#id5" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</li>
+<li><p>imagenet challenge에서도 상당한 score를 보임</p></li>
+</ul>
+</li>
+</ul>
+</li>
+<li><p>이를 생성모델에 통과한 <span class="math notranslate nohighlight">\(P_{2}(\hat{x})\)</span>과 <span class="math notranslate nohighlight">\(P_{1}(\hat{x})\)</span> 두 분포를 비교</p></li>
+<li><p>한계</p>
+<ul class="simple">
+<li><p>explicit model에만 적용 가능하고 implicit model에는 적용할 수 없음</p></li>
+<li><p>explicit model : 생성되는 데이터의 분포를 명시적으로 모델링하여 학습하고 주로 Gaussian Noise로부터 이미지를 생성 (VAE …)</p></li>
+<li><p>implicit model : 데이터의 생성 과정에 대해 학습하고 주로 주어진 데이터 분포로부터 샘플링하여 학습 (GAN …)</p></li>
+</ul>
+</li>
+</ul>
+</section>
+<section id="comparison-between-evaluation-metrics">
+<h2>4. Comparison between evaluation metrics<a class="headerlink" href="#comparison-between-evaluation-metrics" title="Permalink to this heading">#</a></h2>
+<p><strong>4.1. Volatility</strong></p>
+<figure class="align-default" id="id6">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img4.png"><img alt="A_Study_on_the_Evaluation_of_Generative_Models_05" class="bg-primary mb-1" src="../../_images/img4.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 40 </span><span class="caption-text">Image 5</span><a class="headerlink" href="#id6" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p>KL, RKL은 적은 양의 Epoch(15-20) 후에 바로 수렴하는 방면 FID와 IS는 큰 변동성을 보임</p></li>
+<li><p>모델의 Capacity가 증가할수록 KL과 RKL의 수치가 개선되는 것을 확인</p></li>
+<li><p>FID나 IS가 KL, RKL의 그래프와 매우 다른 형태를 띄는것을 확인(특히 IS)</p></li>
+</ul>
+<figure class="align-default" id="id7">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img5.png"><img alt="A_Study_on_the_Evaluation_of_Generative_Models_06" class="bg-primary mb-1" src="../../_images/img5.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 41 </span><span class="caption-text">Image 6</span><a class="headerlink" href="#id7" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p>FID나 (negative)IS가 KL과는 높은 colleration을 보이지만 RKL과는 높지 않은 colleration을 보인다.</p></li>
+<li><p>모델의 Capacity에 따라 KL, RKL의 수치 변화는 크지 않은 데 반해 FID나 IS는 굉장히 큰 수치의 변화를 보여준다.</p></li>
+</ul>
+<p><strong>4.1. Ranking Colleration</strong></p>
+<ul>
+<li><p>여러 모델에 대해 metric 별로 순위를 매겨 순위의 유사도를 비교</p></li>
+<li><p>Kendall’s τ</p>
+<ul class="simple">
+<li><p>ranking이 매겨진 수열 사이의 유사도를 측정</p></li>
+</ul>
+<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="kn">from</span><span class="w"> </span><span class="nn">scipy</span><span class="w"> </span><span class="kn">import</span> <span class="n">stats</span>
+<span class="o">&gt;&gt;&gt;</span> <span class="n">h</span> <span class="o">=</span> <span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">,</span> <span class="mi">5</span><span class="p">]</span>
+<span class="o">&gt;&gt;&gt;</span> <span class="n">w</span> <span class="o">=</span> <span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">,</span> <span class="mi">5</span><span class="p">]</span>
+<span class="o">&gt;&gt;&gt;</span> <span class="n">z</span> <span class="o">=</span> <span class="p">[</span><span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">5</span><span class="p">]</span>
+<span class="o">&gt;&gt;&gt;</span> <span class="n">stats</span><span class="o">.</span><span class="n">kendalltau</span><span class="p">(</span><span class="n">h</span><span class="p">,</span> <span class="n">w</span><span class="p">)</span>
+<span class="n">SignificanceResult</span><span class="p">(</span><span class="n">statistic</span><span class="o">=</span><span class="mf">0.9999999999999999</span><span class="p">,</span> <span class="n">pvalue</span><span class="o">=</span><span class="mf">0.016666666666666666</span><span class="p">)</span>
+<span class="o">&gt;&gt;&gt;</span> <span class="n">stats</span><span class="o">.</span><span class="n">kendalltau</span><span class="p">(</span><span class="n">h</span><span class="p">,</span> <span class="n">w</span><span class="p">)</span>
+<span class="n">SignificanceResult</span><span class="p">(</span><span class="n">statistic</span><span class="o">=</span><span class="mf">0.19999999999999998</span><span class="p">,</span> <span class="n">pvalue</span><span class="o">=</span><span class="mf">0.8166666666666667</span><span class="p">)</span>
+</pre></div>
+</div>
+</li>
+<li><p>Result</p>
+<figure class="align-default" id="id8">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img6.png"><img alt="A_Study_on_the_Evaluation_of_Generative_Models_07" class="bg-primary mb-1" src="../../_images/img6.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 42 </span><span class="caption-text">Image 7</span><a class="headerlink" href="#id8" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p>KL - RKL의 유사도는 매우 높음(0.889)</p></li>
+<li><p>KL과의 유사도를 비교해보면 FID infinity &gt; FID &gt; IS</p></li>
+<li><p>CleanFID-KID(0.96)을 제외한 나머지 metric간 유사도는 굉장히 낮음</p></li>
+<li><p>Inception network 기반의 metric 중에서는 FID infinity이 가장 높고, IS와 IS infinity score가 가장 낮음</p></li>
+</ul>
+</li>
+</ul>
+</section>
+<section id="is-inception-all-we-need">
+<h2>5. Is Inception all we need?<a class="headerlink" href="#is-inception-all-we-need" title="Permalink to this heading">#</a></h2>
+<ul>
+<li><p>FID, Inception Score 등 대부분의 metric이 이미지의 feature 혹은 score 측정을 위해 inception-v3를 사용하는데 과연 적절한가?</p></li>
+<li><p>가정</p>
+<ul class="simple">
+<li><p>FID, FID infinity는 feature space가 gaussian distribution을 따른다는 가정하에 측정되는 score</p></li>
+</ul>
+</li>
+<li><p>실험</p>
+<ol class="arabic simple">
+<li><p>따라서 생성 모델을 통해 10K의 이미지를 생성하고</p></li>
+<li><p>원본의 20K의 이미지를 sampling</p></li>
+<li><p>각각의 이미지를 Inception network와 CLIP network를 통해 feature vector를 추출</p></li>
+<li><p>Gaussian model에 feature vector를 fitting</p></li>
+<li><p>이때 gaussian model을 기반으로 각 샘플의 확률값을 계산한다.</p></li>
+</ol>
+</li>
+<li><p>결과</p>
+<figure class="align-default" id="id9">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img7.png"><img alt="A_Study_on_the_Evaluation_of_Generative_Models_08" class="bg-primary mb-1" src="../../_images/img7.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 43 </span><span class="caption-text">Image 8</span><a class="headerlink" href="#id9" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p>확률 값이 낮은 tail 부분의 feature vector의 원본 이미지들을 퀄리티가 낮아야함</p></li>
+<li><p>실제로 tail 부분의 확률을 갖는 이미지들을 확인해보면 CLIP을 보면 확실히 퀄리티가 떨어지는 반면 Inception의 이미지들은 좋은 퀄리티를 보이고 있음 → Gaussian 분포의 가정에 위배</p></li>
+</ul>
+</li>
+</ul>
+<p><strong>5.2 Normality test for latent representation</strong></p>
+<ul>
+<li><p>위의 feature vector들을 1 Dimension에 투영시켜 normal distribution을 따르는 지 확인한다.</p></li>
+<li><p>실험</p>
+<ol class="arabic simple">
+<li><p>Inception, CLIP을 통해 feature vector를 추출한다.</p></li>
+<li><p>linear transformation 연산을 통해 각각 1-D로 투영시킨다.</p></li>
+<li><p>각각의 p-value를 구한다.</p>
+<ol class="arabic simple">
+<li><p>p-value : 어떠한 사건이 우연히 일어날 확률</p></li>
+<li><p>if p-value &lt; 0.05 ; 우연히 발생할 확률이 거의 없다. 인과관계가 있다.</p></li>
+<li><p>if p-value &gt; 0.05 ; 우연히 발생할 확률이 크다. 인과관계가 없다.</p></li>
+<li><p>gaussian normal distribution은 random을 기반으로하기때문에 인과관계가 작아야한다. 즉, p-value가 커야한다.</p></li>
+</ol>
+</li>
+</ol>
+</li>
+<li><p>결과</p>
+<figure class="align-default" id="id10">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img8.png"><img alt="A_Study_on_the_Evaluation_of_Generative_Models_09" class="bg-primary mb-1" src="../../_images/img8.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 44 </span><span class="caption-text">Image 9</span><a class="headerlink" href="#id10" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p>모든 test dataset에 대해 CLIP의 p-value값은 0.05를 넘어 random성을 유지하지만, Inception은 0.05보다 낮은 값을 보여 random성을 유지하지 못한다.</p></li>
+<li><p>따라서, Inception net을 통한 metric 측정보다 CLIP을 통한 metric 측정을 제안한다.</p></li>
+</ul>
+</li>
+</ul>
+</section>
+</section>
+
+    <script type="text/x-thebe-config">
+    {
+        requestKernel: true,
+        binderOptions: {
+            repo: "binder-examples/jupyter-stacks-datascience",
+            ref: "master",
+        },
+        codeMirrorConfig: {
+            theme: "abcdef",
+            mode: "python"
+        },
+        kernelOptions: {
+            name: "python3",
+            path: "./docs\review"
+        },
+        predefinedOutput: true
+    }
+    </script>
+    <script>kernelName = 'python3'</script>
+
+                </article>
+              
+
+              
+              
+              
+              
+                <footer class="prev-next-footer d-print-none">
+                  
+<div class="prev-next-area">
+    <a class="left-prev"
+       href="DDIM.html"
+       title="previous page">
+      <i class="fa-solid fa-angle-left"></i>
+      <div class="prev-next-info">
+        <p class="prev-next-subtitle">previous</p>
+        <p class="prev-next-title">DDIM</p>
+      </div>
+    </a>
+    <a class="right-next"
+       href="cycleGAN.html"
+       title="next page">
+      <div class="prev-next-info">
+        <p class="prev-next-subtitle">next</p>
+        <p class="prev-next-title">CycleGAN</p>
+      </div>
+      <i class="fa-solid fa-angle-right"></i>
+    </a>
+</div>
+                </footer>
+              
+            </div>
+            
+            
+              
+                <div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
+
+
+  <div class="sidebar-secondary-item">
+  <div class="page-toc tocsection onthispage">
+    <i class="fa-solid fa-list"></i> Contents
+  </div>
+  <nav class="bd-toc-nav page-toc">
+    <ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#id1">학습 자료</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#abstract">0. Abstract</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">1. Introduction</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#background">2. BackGround</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#kl-divergence-kullback-leibler-divergence">2.1. KL-Divergence(Kullback-Leibler divergence)</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#inception-score-is">2.2. Inception Score(IS)</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#fid-frechet-inception-distance">2.3. FiD(Fréchet Inception Distance)</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#kernel-inception-distance">2.4. Kernel Inception Distance</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#fid-is">2.5. FID∞ &amp; IS∞</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#clean-fid">2.5. Clean FiD</a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#synthetic-dataset-as-a-benchmark">3. Synthetic dataset as a benchmark</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#comparison-between-evaluation-metrics">4. Comparison between evaluation metrics</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#is-inception-all-we-need">5. Is Inception all we need?</a></li>
+</ul>
+  </nav></div>
+
+</div></div>
+              
+            
+          </div>
+          <footer class="bd-footer-content">
+            
+<div class="bd-footer-content__inner container">
+  
+  <div class="footer-item">
+    
+<p class="component-author">
+By PseudoLab
+</p>
+
+  </div>
+  
+  <div class="footer-item">
+    
+
+  <p class="copyright">
+    
+      © Copyright 2022.
+      <br/>
+    
+  </p>
+
+  </div>
+  
+  <div class="footer-item">
+    
+  </div>
+  
+  <div class="footer-item">
+    
+  </div>
+  
+</div>
+          </footer>
+        
+
+      </main>
+    </div>
+  </div>
+  
+  <!-- Scripts loaded after <body> so the DOM is not blocked -->
+  <script src="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b"></script>
+<script src="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b"></script>
+
+  <footer class="bd-footer">
+  </footer>
+  </body>
 </html>
\ No newline at end of file
diff --git a/docs/review/AnimateDiff.html b/docs/review/AnimateDiff.html
old mode 100644
new mode 100755
index fca3d2a6..0ef5ce0e
--- a/docs/review/AnimateDiff.html
+++ b/docs/review/AnimateDiff.html
@@ -1,1179 +1,1199 @@
-
-
-<!DOCTYPE html>
-
-
-<html lang="en" data-content_root="" >
-
-  <head>
-    <meta charset="utf-8" />
-    <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.18.1: http://docutils.sourceforge.net/" />
-
-    <title>AnimateDiff &#8212; Text-to-Image Generation-feat-Diffusion</title>
-  
-  
-  
-  <script data-cfasync="false">
-    document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
-    document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
-  </script>
-  
-  <!-- Loaded before other Sphinx assets -->
-  <link href="../../_static/styles/theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-<link href="../../_static/styles/bootstrap.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-<link href="../../_static/styles/pydata-sphinx-theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-
-  
-  <link href="../../_static/vendor/fontawesome/6.5.2/css/all.min.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-  <link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.woff2" />
-<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.woff2" />
-<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.woff2" />
-
-    <link rel="stylesheet" type="text/css" href="../../_static/pygments.css" />
-    <link rel="stylesheet" href="../../_static/styles/sphinx-book-theme.css?digest=14f4ca6b54d191a8c7657f6c759bf11a5fb86285" type="text/css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/togglebutton.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/mystnb.4510f1fc1dee50b3e5859aac5469c37c29e427902b24a333a5f9fcb2f0b3ac41.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/sphinx-thebe.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/design-style.4045f2051d55cab465a707391d5b2007.min.css" />
-  
-  <!-- Pre-loaded scripts that we'll load fully later -->
-  <link rel="preload" as="script" href="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b" />
-<link rel="preload" as="script" href="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b" />
-  <script src="../../_static/vendor/fontawesome/6.5.2/js/all.min.js?digest=dfe6caa3a7d634c4db9b"></script>
-
-    <script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
-    <script src="../../_static/jquery.js"></script>
-    <script src="../../_static/underscore.js"></script>
-    <script src="../../_static/_sphinx_javascript_frameworks_compat.js"></script>
-    <script src="../../_static/doctools.js"></script>
-    <script src="../../_static/clipboard.min.js"></script>
-    <script src="../../_static/copybutton.js"></script>
-    <script src="../../_static/scripts/sphinx-book-theme.js?digest=5a5c038af52cf7bc1a1ec88eea08e6366ee68824"></script>
-    <script>let toggleHintShow = 'Click to show';</script>
-    <script>let toggleHintHide = 'Click to hide';</script>
-    <script>let toggleOpenOnPrint = 'true';</script>
-    <script src="../../_static/togglebutton.js"></script>
-    <script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
-    <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
-    <script src="../../_static/design-tabs.js"></script>
-    <script>const THEBE_JS_URL = "https://unpkg.com/thebe@0.8.2/lib/index.js"
-const thebe_selector = ".thebe,.cell"
-const thebe_selector_input = "pre"
-const thebe_selector_output = ".output, .cell_output"
-</script>
-    <script async="async" src="../../_static/sphinx-thebe.js"></script>
-    <script>window.MathJax = {"options": {"processHtmlClass": "tex2jax_process|mathjax_process|math|output_area"}}</script>
-    <script defer="defer" src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
-    <script>DOCUMENTATION_OPTIONS.pagename = 'docs/review/AnimateDiff';</script>
-    <link rel="shortcut icon" href="../../_static/PseudoLab_logo.png"/>
-    <link rel="index" title="Index" href="../../genindex.html" />
-    <link rel="search" title="Search" href="../../search.html" />
-    <link rel="next" title="Animate Anyone" href="Animate_Anyone.html" />
-    <link rel="prev" title="VideoLDM" href="VideoLDM.html" />
-  <meta name="viewport" content="width=device-width, initial-scale=1"/>
-  <meta name="docsearch:language" content="en"/>
-  </head>
-  
-  
-  <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
-
-  
-  
-  <div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
-  
-  <div id="pst-scroll-pixel-helper"></div>
-  
-  <button type="button" class="btn rounded-pill" id="pst-back-to-top">
-    <i class="fa-solid fa-arrow-up"></i>Back to top</button>
-
-  
-  <input type="checkbox"
-          class="sidebar-toggle"
-          id="pst-primary-sidebar-checkbox"/>
-  <label class="overlay overlay-primary" for="pst-primary-sidebar-checkbox"></label>
-  
-  <input type="checkbox"
-          class="sidebar-toggle"
-          id="pst-secondary-sidebar-checkbox"/>
-  <label class="overlay overlay-secondary" for="pst-secondary-sidebar-checkbox"></label>
-  
-  <div class="search-button__wrapper">
-    <div class="search-button__overlay"></div>
-    <div class="search-button__search-container">
-<form class="bd-search d-flex align-items-center"
-      action="../../search.html"
-      method="get">
-  <i class="fa-solid fa-magnifying-glass"></i>
-  <input type="search"
-         class="form-control"
-         name="q"
-         id="search-input"
-         placeholder="Search this book..."
-         aria-label="Search this book..."
-         autocomplete="off"
-         autocorrect="off"
-         autocapitalize="off"
-         spellcheck="false"/>
-  <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
-</form></div>
-  </div>
-
-  <div class="pst-async-banner-revealer d-none">
-  <aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
-</div>
-
-  
-    <header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
-    </header>
-  
-
-  <div class="bd-container">
-    <div class="bd-container__inner bd-page-width">
-      
-      
-      
-      <div class="bd-sidebar-primary bd-sidebar">
-        
-
-  
-  <div class="sidebar-header-items sidebar-primary__section">
-    
-    
-    
-    
-  </div>
-  
-    <div class="sidebar-primary-items__start sidebar-primary__section">
-        <div class="sidebar-primary-item">
-
-  
-    
-  
-
-<a class="navbar-brand logo" href="../../intro.html">
-  
-  
-  
-  
-  
-    
-    
-      
-    
-    
-    <img src="../../_static/PseudoLab_logo.png" class="logo__image only-light" alt="Text-to-Image Generation-feat-Diffusion - Home"/>
-    <script>document.write(`<img src="../../_static/PseudoLab_logo.png" class="logo__image only-dark" alt="Text-to-Image Generation-feat-Diffusion - Home"/>`);</script>
-  
-  
-</a></div>
-        <div class="sidebar-primary-item"><nav class="bd-links" id="bd-docs-nav" aria-label="Main">
-    <div class="bd-toc-item navbar-nav active">
-        
-        <ul class="nav bd-sidenav bd-sidenav__home-link">
-            <li class="toctree-l1">
-                <a class="reference internal" href="../../intro.html">
-                    Welcome to PseudoDiffusers!!
-                </a>
-            </li>
-        </ul>
-        <p aria-level="2" class="caption" role="heading"><span class="caption-text">Preliminary Works</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="vae.html">VAE</a></li>
-<li class="toctree-l1"><a class="reference internal" href="gan.html">GAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DDPM.html">DDPM</a></li>
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="DDIM.html">DDIM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="A_Study_on_the_Evaluation_of_Generative_Models.html">A Study on the Evaluation of Generative Models</a></li>
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Image Generation</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="cycleGAN.html">CycleGAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="StyleGAN.html">StyleGAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="diffusion_beats_GANs.html">Diffusion Models Beat GANs on Image Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="dalle.html">DALL-E</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DALLE2.html">DALL-E 2</a></li>
-<li class="toctree-l1"><a class="reference internal" href="dreambooth.html">DreamBooth</a></li>
-<li class="toctree-l1"><a class="reference internal" href="ControlNet.html">ControlNet</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Latent_Diffusion_Model.html">Introduction</a></li>
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="Textual_Inversion.html">Textual Inversion</a></li>
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="CustomDiffusion.html">Custom Diffusion</a></li>
-<li class="toctree-l1"><a class="reference internal" href="LoRA.html">LoRA</a></li>
-
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="I-DDPM.html">I-DDPM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="StyO.html">StyO</a></li>
-<li class="toctree-l1"><a class="reference internal" href="imagen.html">Imagen</a></li>
-<li class="toctree-l1"><a class="reference internal" href="imagen_editor.html">Imagen Editor</a></li>
-<li class="toctree-l1"><a class="reference internal" href="SDEdit.html">SDEdit</a></li>
-<li class="toctree-l1"><a class="reference internal" href="SDXL.html">SDXL</a></li>
-<li class="toctree-l1"><a class="reference internal" href="t2i_adapter.html">T2I-Adapter</a></li>
-<li class="toctree-l1"><a class="reference internal" href="IP_Adapter.html">IP-Adapter</a></li>
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="HyperDreamBooth.html">HyperDreamBooth</a></li>
-<li class="toctree-l1"><a class="reference internal" href="CM3leon.html">CM3leon</a></li>
-
-<li class="toctree-l1"><a class="reference internal" href="Synthetic_Data_from_Diffusion_Models_Improves_ImageNet_Classification.html">Synthetic Data from Diffusion Models Improves ImageNet Classification</a></li>
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="GLIDE.html">GLIDE</a></li>
-<li class="toctree-l1"><a class="reference internal" href="BBDM.html">BBDM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Your_Diffusion_Model_is_Secretly_a_Zero_Shot_Classifier.html">Your Diffusion Model is Secretly a Zero-Shot Classifier</a></li>
-<li class="toctree-l1"><a class="reference internal" href="progressive_distillation.html">Progressive Distillation for Fast Sampling of Diffusion Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="ConceptLab.html">ConceptLab</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Diffusion_models_already_have_a_Semantic_Latent_Space.html">Diffusion Models already have a Semantic Latent Space</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Muse.html">Muse</a></li>
-
-
-<li class="toctree-l1"><a class="reference internal" href="GIGAGAN.html">Scaling up GANs for Text-to-Image Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="consistency_models.html">Consistency Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="latent_consistency_models.html">Latent Consistency Models</a></li>
-
-<li class="toctree-l1"><a class="reference internal" href="LLM_grounded_Diffusion.html">LLM Grounded Diffusion</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DiT.html">DiT</a></li>
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="one-step-image-translation.html">One-Step Image Translation with Text-to-Image Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="LCM-LoRA.html">LCM-LoRA: A Universal Stable-Diffusion Acceleration Module</a></li>
-
-
-<li class="toctree-l1"><a class="reference internal" href="MimicBrush.html">MimicBrush: Zero-shot Image Editing with Reference Imitation</a></li>
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Video Generation</span></p>
-<ul class="current nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="Make_A_Video.html">Make A Video</a></li>
-<li class="toctree-l1"><a class="reference internal" href="VideoLDM.html">VideoLDM</a></li>
-<li class="toctree-l1 current active"><a class="current reference internal" href="#">AnimateDiff</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Animate_Anyone.html">Animate Anyone</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreaMoving.html">DreaMoving</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreamPose.html">DreamPose: Fashion Image-to-Video Synthesis via Stable Diffusion</a></li>
-
-
-
-
-
-
-
-
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">3D Generation</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="NeRF.html">NeRF : Representing Scenes as Neural Radiance Fields for View Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="3DGS.html">3D Gaussian Splatting for Real-Time Radiance Field Rendering</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Point_E.html">Point-E: A System for Generating 3D Point Clouds from Complex Prompts (Arxiv 2022)</a></li>
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="Shap-E.html">Shap-E</a></li>
-
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="DreamFusion.html"><strong>DreamFusion</strong></a></li>
-<li class="toctree-l1"><a class="reference internal" href="magic-3d.html">Magic3D</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreamBooth3D.html">Dream Booth 3D</a></li>
-
-
-
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Experiments</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="../experiments/js_exp.html">Synthetic Data with Stable Diffusion for Foliar Disease Classification</a></li>
-<li class="toctree-l1"><a class="reference internal" href="../experiments/swjo_exp.html">Training DreamBooth on Naver Webtoon Face Dataset</a></li>
-</ul>
-
-    </div>
-</nav></div>
-    </div>
-  
-  
-  <div class="sidebar-primary-items__end sidebar-primary__section">
-  </div>
-  
-  <div id="rtd-footer-container"></div>
-
-
-      </div>
-      
-      <main id="main-content" class="bd-main" role="main">
-        
-        
-
-<div class="sbt-scroll-pixel-helper"></div>
-
-          <div class="bd-content">
-            <div class="bd-article-container">
-              
-              <div class="bd-header-article d-print-none">
-<div class="header-article-items header-article__inner">
-  
-    <div class="header-article-items__start">
-      
-        <div class="header-article-item"><label class="sidebar-toggle primary-toggle btn btn-sm" for="__primary" title="Toggle primary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
-  <span class="fa-solid fa-bars"></span>
-</label></div>
-      
-    </div>
-  
-  
-    <div class="header-article-items__end">
-      
-        <div class="header-article-item">
-
-<div class="article-header-buttons">
-
-
-
-
-
-<div class="dropdown dropdown-source-buttons">
-  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Source repositories">
-    <i class="fab fa-github"></i>
-  </button>
-  <ul class="dropdown-menu">
-      
-      
-      
-      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion" target="_blank"
-   class="btn btn-sm btn-source-repository-button dropdown-item"
-   title="Source repository"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fab fa-github"></i>
-  </span>
-<span class="btn__text-container">Repository</span>
-</a>
-</li>
-      
-      
-      
-      
-      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion/issues/new?title=Issue%20on%20page%20%2Fdocs/review/AnimateDiff.html&body=Your%20issue%20content%20here." target="_blank"
-   class="btn btn-sm btn-source-issues-button dropdown-item"
-   title="Open an issue"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-lightbulb"></i>
-  </span>
-<span class="btn__text-container">Open issue</span>
-</a>
-</li>
-      
-  </ul>
-</div>
-
-
-
-
-
-
-<div class="dropdown dropdown-download-buttons">
-  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Download this page">
-    <i class="fas fa-download"></i>
-  </button>
-  <ul class="dropdown-menu">
-      
-      
-      
-      <li><a href="../../_sources/docs/review/AnimateDiff.md" target="_blank"
-   class="btn btn-sm btn-download-source-button dropdown-item"
-   title="Download source file"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-file"></i>
-  </span>
-<span class="btn__text-container">.md</span>
-</a>
-</li>
-      
-      
-      
-      
-      <li>
-<button onclick="window.print()"
-  class="btn btn-sm btn-download-pdf-button dropdown-item"
-  title="Print to PDF"
-  data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-file-pdf"></i>
-  </span>
-<span class="btn__text-container">.pdf</span>
-</button>
-</li>
-      
-  </ul>
-</div>
-
-
-
-
-<button onclick="toggleFullScreen()"
-  class="btn btn-sm btn-fullscreen-button"
-  title="Fullscreen mode"
-  data-bs-placement="bottom" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-expand"></i>
-  </span>
-
-</button>
-
-
-
-<script>
-document.write(`
-  <button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light"></i>
-    <i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark"></i>
-    <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"></i>
-  </button>
-`);
-</script>
-
-
-<script>
-document.write(`
-  <button class="btn btn-sm pst-navbar-icon search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <i class="fa-solid fa-magnifying-glass fa-lg"></i>
-  </button>
-`);
-</script>
-<label class="sidebar-toggle secondary-toggle btn btn-sm" for="__secondary"title="Toggle secondary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <span class="fa-solid fa-list"></span>
-</label>
-</div></div>
-      
-    </div>
-  
-</div>
-</div>
-              
-              
-
-<div id="jb-print-docs-body" class="onlyprint">
-    <h1>AnimateDiff</h1>
-    <!-- Table of contents -->
-    <div id="print-main-content">
-        <div id="jb-print-toc">
-            
-            <div>
-                <h2> Contents </h2>
-            </div>
-            <nav aria-label="Page">
-                <ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#abstract">0. Abstract</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">1. Introduction</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#related-work">2. Related Work</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#text-to-image-diffusion-models">2.1 Text-to-image diffusion models</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#personalizing-t2i-models">2.2 Personalizing T2I models</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#animating-personalized-t2is">2.3 Animating personalized T2Is</a></li>
-</ul>
-</li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#preliminary">3. Preliminary</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#stable-diffusion">3.1 Stable Diffusion</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#low-rank-adaptation-lora">3.2 Low-Rank Adaptation(LoRA)</a></li>
-</ul>
-</li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#id1">4. AnimateDiff</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#alleviate-negative-effects-from-training-data-with-domain-adapter">4.1 Alleviate Negative Effects from Training Data with Domain Adapter</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#learn-motion-priors-with-motion-module">4.2 Learn Motion Priors with Motion Module</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#adapt-to-new-motion-patterns-with-motionlora">4.3 Adapt to New Motion Patterns with MotionLoRA</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#animatediff-in-practice">4.4 AnimateDiff in Practice</a><ul class="nav section-nav flex-column">
-<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#training">Training</a></li>
-<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#inference">Inference</a></li>
-</ul>
-</li>
-</ul>
-</li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#experiments">5. Experiments</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#qualitative-results">5.1 Qualitative Results</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#quantitative-comparison">5.2 Quantitative Comparison</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#ablation-study">5.3 Ablation Study</a><ul class="nav section-nav flex-column">
-<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#domain-adapter">Domain Adapter</a></li>
-<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#motion-module-design">Motion module design</a></li>
-<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#efficiency-of-motionlora">Efficiency of MotionLoRA</a></li>
-</ul>
-</li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#controllable-generation">5.4 Controllable Generation</a></li>
-</ul>
-</li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#conclusion">6. Conclusion</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#id2">7. 실습</a></li>
-</ul>
-            </nav>
-        </div>
-    </div>
-</div>
-
-              
-                
-<div id="searchbox"></div>
-                <article class="bd-article">
-                  
-  <div class="admonition-information admonition">
-<p class="admonition-title">Information</p>
-<ul class="simple">
-<li><p><strong>Title:</strong> AnimateDiff: Animate Your Personalized Text-to-Image Diffusion Models without Specific Tuning (ICLR 2024)</p></li>
-<li><p><strong>Reference</strong></p>
-<ul>
-<li><p>Paper: <a class="reference external" href="https://arxiv.org/abs/2307.04725">https://arxiv.org/abs/2307.04725</a></p></li>
-<li><p>Code: <a class="github reference external" href="https://github.com/guoyww/AnimateDiff?tab=readme-ov-file">guoyww/AnimateDiff</a></p></li>
-<li><p>Project Page : <a class="reference external" href="https://animatediff.github.io">https://animatediff.github.io</a></p></li>
-</ul>
-</li>
-<li><p><strong>Author:</strong> Kyeongmin Yu</p></li>
-<li><p><strong>Last updated on June. 11, 2024</strong></p></li>
-</ul>
-</div>
-<section class="tex2jax_ignore mathjax_ignore" id="animatediff">
-<h1>AnimateDiff<a class="headerlink" href="#animatediff" title="Permalink to this heading">#</a></h1>
-<blockquote>
-<div><p>📌 논문의 의의<br />
-In this paper, we present <strong>AnimateDiff</strong>, a practical framework for animating personalized T2I models without requiring model-specific tuning.<br />
-<strong>AnimateDiff = public personalized T2I models + domain adapter &amp; plug-and-play Motion Module + MotionLoRA</strong></p>
-</div></blockquote>
-<section id="abstract">
-<h2>0. Abstract<a class="headerlink" href="#abstract" title="Permalink to this heading">#</a></h2>
-<p>T2I diffusion model과 DreamBooth나 LoRA와 같은 개인화 기술이 발전함에 따라 사람들은 적절한 비용을 지불하여 고화질의 원하는 이미지를 얻을 수 있게 되었다. 하지만, 기존 고화질 이미지 생성 모델(personalized T2I)에 움직임을 추가하거나 애니메이션을 생성하도록 하는 것은 여전히 어렵다. <strong>본 논문에서는 추가적인 훈련(model-specific tuning)없이 기존 고화질 이미지 생성모델에 움직임을 추가하는 실용적인 프레임 워크를 제안한다.</strong> <strong>본 논문에서 제안하는 프레임 워크의 핵심은 plug-and-play motion module을 활용하는 것으로 이 motion module을 한번 학습하면, 어떤 이미지 생성 모델과도 융합할 수 있다.</strong> 본 논문에서 제안하는 학습 방법을 이용하면 motion module은 real-world 비디오로 부터 효과적으로 motion prior를 학습할 수 있다. 한번 학습된 motion module은 이미지 생성 모델에 덧붙여 애니메이션 생성 모델로 사용할 수 있다. 또한 AnimateDiff를 위한 간단한 파인튜닝 방식인 MotionLoRA를 제안한다. 이는 사전 학습된 motion module이 저비용으로 새로운 움직임 패턴을 학습할 수 있게 해준다. (ex. 촬영 기법) AnimateDiff와 MotionLoRA를 공개된 이미지 생성 모델에 부착하여 실험했으며 이를 통해 본 논문의 방식이 이미지 퀄리티와 다양한 움직임을 보전하면서도 자연스러운 애니메이션 클립을 생성할 수 있음을 보였다.</p>
-<figure class="align-default" id="id3">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/inference_pipeline.png"><img alt="inference_pipeline" class="bg-primary mb-1" src="../../_images/inference_pipeline.png" style="width: 400px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 601 </span><span class="caption-text">inference pipeline</span><a class="headerlink" href="#id3" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p><strong>Core Framework</strong></p>
-<ul>
-<li><p>public T2I models</p>
-<ul>
-<li><p>personalized T2Is from the same base T2I (SD1.5)</p>
-<ul>
-<li><p>can download finetuned T2I from <a class="reference external" href="https://civitai.com/">civitai</a> or hugging face</p></li>
-</ul>
-</li>
-</ul>
-</li>
-<li><p>domain adapter</p>
-<ul>
-<li><p>LoRA기반 domain adapter를 base T2I 모델에 더해 video dataset을 학습할때 발생할수 있는 domain gap을 줄였다.</p></li>
-<li><p>여기서 말하는 domain gap이란 video의 각 프레임을 나누어 이미지로 봤을때 발생할 수 있는 motion blur, compression artifacts, watermarks등을 말한다.</p></li>
-</ul>
-</li>
-<li><p>training strategy of a plug-and-play motion module</p>
-<ul>
-<li><p>learns transferable motion priors from real-world videothrough proposed training strategy</p></li>
-<li><p>한번 학습하고 나면 다른 T2I모델과 결합해 animation generator로 사용할 수 있다.</p></li>
-</ul>
-</li>
-<li><p>MotionLoRA</p>
-<ul>
-<li><p>adapt the pre-trained motion module to specific motion patterns</p></li>
-</ul>
-</li>
-</ul>
-</li>
-</ul>
-</section>
-<section id="introduction">
-<h2>1. Introduction<a class="headerlink" href="#introduction" title="Permalink to this heading">#</a></h2>
-<p>텍스트 프롬프트를 입력하여 이미지를 생성하는 디퓨전 모델(T2I diffusion models)의 발전으로 많은 예술가와 아마추어들이 시각 컨텐츠를 보다 쉽게 생성할 수 있게 되었다. 기존 T2I 모델의 생성능력(creativity)를 자극하기 위해 DreamBooth와 LoRA와 같은 가벼운 개인화 방식들이 제안되었다. 이러한 방식들은 작은 데이터셋과 적당한 하드웨어에서도 customized finetuning을 할 수 있게 해준다. 그로인해 사용자들이 적은 비용으로도 base T2I model을 새로운 domain에 적용하거나 시각적 퀄리티를 높일 수 있게 되었다. 그 결과 AI 아티스트와 아마추어 커뮤니티 에서 상당량의 personalized models을 Civitai나 Hugging Face와 같은 플랫폼에 개시했다. 이러한 모델들이 상당히 좋은 수준의 이미지를 생성할 수 있지만, 정적인 이미지만 생성할 수 있다는 한계가 있다. 반면, 애니메이션을 생성하는 기술이 영화나 카툰과 같은 실산업에서 더 요구된다. 본 연구에서는 고화질 T2I 모델을 파인튜닝 없이 곧바로 애니메이션 생성 모델로 변환하는 것을 목표로 한다. 파인 튜닝을 위한 데이터 수집과 컴퓨팅 자원의 필요는 아마추어 사용자에게 걸림돌이 된다.</p>
-<p><strong>본 논문에서는 AnimateDiff를 제안하는데 이는 personalized T2I model의 능력을 보전하면서 애니메이션을 생성하는 문제를 해결할 수 있는 효과적인 파이프라인이다.</strong> AnimateDiff의 핵심은 비디오 데이터셋(WebVid-10M)으로부터 타당한 motion 정보를 plug-and-play motion module이 학습하는 것이다. motion module의 학습은 세가지 단계로 구성된다.</p>
-<ol class="arabic">
-<li><p><strong>domain adapter 파인튜닝</strong></p>
-<p>visual distribution of the target video dataset(이미지 품질차이, 동영상 워터마크, 압축으로 인한 artifacts)에 대한 부분은 이 모듈이 학습함으로써 이후 motion관련 모듈들이 motion에만 집중할 수 있도록 한다.</p>
-</li>
-<li><p><strong>새로운 motion module</strong></p>
-<p>비디오를 입력받을수 있게 inflate시킨 base T2I 모델에 domain adapter를 더한 모델에 모션 모델링을 위한 모션 모듈을 추가한다. 이 모듈을 학습할때는 domain adapter와 base model을 freeze한다. 이렇게 하면 motion module이 움직임에 대한 부분을 전반적으로 학습하여 모듈별 학습이 가능해진다. (다른 그림체를 원할경우 base T2I+domain adapter를 바꾸면 됨)</p>
-</li>
-<li><p><strong>(optional) MotionLoRA 학습</strong></p>
-<p>MotionLoRA의 경우 특정 motion을 적은 수의 reference videos와 학습횟수로 학습하는 것을 목표로하는 모듈이다. 이름과 같이 Low-Rank Adaptation (LoRA) (Hu et al., 2021)를 이용하는데  새로운 motion pattern을 적은수(50개)의 reference video만으로 학습시킬수 있다. 또한 차지하는 메모리도 적어 추가학습이나 모델을 공유,배포하는데에도 유리하다.</p>
-</li>
-</ol>
-<figure class="align-default" id="id4">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/training_pipeline.png"><img alt="training_pipeline" class="bg-primary mb-1" src="../../_images/training_pipeline.png" style="width: 800px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 602 </span><span class="caption-text">training pipeline</span><a class="headerlink" href="#id4" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</section>
-<section id="related-work">
-<h2>2. Related Work<a class="headerlink" href="#related-work" title="Permalink to this heading">#</a></h2>
-<section id="text-to-image-diffusion-models">
-<h3>2.1 Text-to-image diffusion models<a class="headerlink" href="#text-to-image-diffusion-models" title="Permalink to this heading">#</a></h3>
-<ul class="simple">
-<li><p>Diffusion models</p>
-<ul>
-<li><p>GLIDE (<a class="reference external" href="https://arxiv.org/abs/2112.10741">Nichol et al., 2021</a>) 는 text condition을 통해 이미지를 생성하는 방법을 소개하고, classifier guidance를 조절하여 더 나은 이미지 결과물을 얻는 방법을 설명했다.</p>
-<ul>
-<li><p><strong>G</strong>uided <strong>L</strong>anguage to <strong>I</strong>mage <strong>D</strong>iffusion for Generation and <strong>E</strong>diting</p></li>
-</ul>
-</li>
-<li><p>DALL-E2 (Ramesh et al., 2022)는 CLIP을 이용하여 text-image 일관성을 향상시켰다.</p></li>
-<li><p>Imagen (Saharia et al., 2022)은 LLM과 cascade 구조를 이용하여 photorealistic한 결과물을 얻고자 했다.</p></li>
-<li><p>**Stable Diffusion (Rombach et al., 2022)**은 auto-encoder의 latent space에서 diffusion 과정을 수행함으로써 효율을 높였다. (3.1 참고)</p></li>
-<li><p>eDiff-I (<a class="reference external" href="https://www.google.com/search?client=safari&amp;rls=en&amp;q=e+diff+i&amp;ie=UTF-8&amp;oe=UTF-8">Balaji et al., 2022</a>) 디퓨전 모델들을 앙상블 학습시켜 generation(denoising) 단계별로 적절한 디퓨전 모델로 denoise를 수행하고자 했다.</p></li>
-</ul>
-</li>
-</ul>
-<figure class="align-default" id="id5">
-<a class="reference internal image-reference" href="../../_images/clip.png"><img alt="clip_pipeline" src="../../_images/clip.png" style="width: 200px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 603 </span><span class="caption-text">CLIP:Contrastive language-image pre-training</span><a class="headerlink" href="#id5" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<figure class="align-default" id="id6">
-<a class="reference internal image-reference" href="../../_images/imagen.png"><img alt="imagen_pipeline" src="../../_images/imagen.png" style="width: 200px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 604 </span><span class="caption-text">Imagen</span><a class="headerlink" href="#id6" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<figure class="align-default" id="id7">
-<a class="reference internal image-reference" href="../../_images/eDiff-I.png"><img alt="eDiff-I_pipeline" src="../../_images/eDiff-I.png" style="width: 200px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 605 </span><span class="caption-text">eDiff-I</span><a class="headerlink" href="#id7" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</section>
-<section id="personalizing-t2i-models">
-<h3>2.2 Personalizing T2I models<a class="headerlink" href="#personalizing-t2i-models" title="Permalink to this heading">#</a></h3>
-<p>사전학습된 T2I 모델을 활용하기 위해 효율적인 개인화(personalization)방법에 대한 연구가 뜨겁다. 여기서 개인화란 reference images를 통해 (새로운) concepts나 style을 사전학습된 모델에 추가하는 것을 말한다. (손을 잘그리는 모델, 눈을 잘그리는 모델, 특정 그림체를 학습한 모델 등) 이를 위한 가장 단순한 방법은 전체 모델을 fine-tuning 시키는 것이다. 이를 통해 전체적인 퀄리티가 높아질 수도 있지만, 기존 학습데이터를 잊는 catastrophic forgetting이 발생할 수도 있다는 문제가 있다. 이러한 문제는 특히 reference image 데이터가 적을때 발생한다.</p>
-<ul class="simple">
-<li><p>DreamBooth (<a class="reference external" href="https://dreambooth.github.io">Ruiz et al., 2023</a>) 은 매우 적은 이미지를 사용하면서도 preservation loss를 추가하여 전체 모델을 fine-tuning 시켰다.</p></li>
-<li><p>Textural Inversion (Gal et al., 2022) 은 새로운 concept 마다 token embedding을 최적화 하였다.</p></li>
-<li><p><strong>Low-Rank Adaptation (LoRA) (<a class="reference external" href="https://github.com/microsoft/LoRA">Hu et al., 2021</a>)</strong> 은 LoRA layer를 추가하여 이에 대해서만 fine-tuning을 수행했다. (Sec 3.2 참고)</p></li>
-<li><p>그외의 encoder-based approaches (<a class="reference external" href="https://tuning-encoder.github.io">Gal et al., 2023</a>; <a class="reference external" href="https://arxiv.org/abs/2304.02642">Jia et al., 2023</a>)</p></li>
-</ul>
-</section>
-<section id="animating-personalized-t2is">
-<h3>2.3 Animating personalized T2Is<a class="headerlink" href="#animating-personalized-t2is" title="Permalink to this heading">#</a></h3>
-<p>personalized T2I를 애니메이션화 시키는 연구가 많지는 않지만 아래의 연구들과 관련있다.</p>
-<ul>
-<li><p>Text2Cinemagraph <a class="reference external" href="https://text2cinemagraph.github.io/website/">(Mahapatra et al., 2023)</a> 는 flow prediction을 통해 cinematography를 생성하고자 했다.</p></li>
-<li><p>Align-Your-Latent <a class="reference external" href="https://www.notion.so/AnimateDiff-17444a06eda84e14abaca781b4f71344?pvs=21">(Blattmann et al., 2023)</a> 는 general video generator내의 frozen image layers가 personalizing이 가능함을 확인했다.</p></li>
-<li><p>video generation models  <strong>← 결과 비교시 사용</strong></p>
-<ul>
-<li><p><strong>Tune-a-Video</strong> <a class="reference external" href="https://github.com/showlab/Tune-A-Video">(Wu et al., 2023)</a> ****는 단일 비디오로 적은 수의 파라미터만 파인튜닝하는 방식을 제안했다. (SD+temporal attn 구조를 가지고 있음)</p></li>
-<li><p><strong>Text2Video-Zero</strong> <a class="reference external" href="https://github.com/Picsart-AI-Research/Text2Video-Zero">(Khachatryan et al., 2023)</a> 는 사전학습한 T2I모델을 활용하여 추가적인 학습과정 없이 애니메이션화 할 수 있는 방법을 제안했다. 이는 사전에 정의된 affine matrix를 사용하여 latent wrapping을 하는 방식이다.</p></li>
-<li><p>pretrained T2I + temporal structures 관련 연구들이 많다.</p>
-<p>(Esser et al., 2023; Zhou et al., 2022a; Singer et al., 2022; Ho et al., 2022b,a; Ruan et al., 2023; Luo et al., 2023; Yin et al., 2023b,a; Wang et al., 2023b; Hong et al., 2022; Luo et al., 2023)</p>
-</li>
-</ul>
-</li>
-</ul>
-</section>
-</section>
-<section id="preliminary">
-<h2>3. Preliminary<a class="headerlink" href="#preliminary" title="Permalink to this heading">#</a></h2>
-<section id="stable-diffusion">
-<h3>3.1 Stable Diffusion<a class="headerlink" href="#stable-diffusion" title="Permalink to this heading">#</a></h3>
-<p>Stable Diffusion (<a class="reference external" href="https://github.com/CompVis/stable-diffusion">Rombach et al., 2022</a>), the base T2I model used in our work</p>
-<ul>
-<li><p>open-sourced, well-developed community, many high-quality personalized T2I models for eval</p></li>
-<li><p>사전 학습된 encoder(<span class="math notranslate nohighlight">\(\mathcal E\)</span>)와 decoder(<span class="math notranslate nohighlight">\(\mathcal D\)</span>)를 이용하여 latent space상에서 diffusion process를 수행</p></li>
-<li><p>인코딩된 이미지 <span class="math notranslate nohighlight">\(z_0=\mathcal E(x_0)\)</span> 의 경우 아래의 forward diffusion 과정을 통해 <span class="math notranslate nohighlight">\(z_t\)</span> 변환됨</p></li>
-<li><p>Forward diffusion for <span class="math notranslate nohighlight">\(t=1,2,…,T\)</span></p>
-<div class="math notranslate nohighlight">
-\[
-    z_t=\sqrt{\bar \alpha_t}z_0+\sqrt{1-\bar\alpha}\epsilon,\space \epsilon \sim \mathcal N(0,I)  \tag{1}
-    \]</div>
-<ul class="simple">
-<li><p>pre-defined <span class="math notranslate nohighlight">\(\barα_t\)</span> determines the noise strength at step <span class="math notranslate nohighlight">\(t\)</span></p></li>
-<li><p>The denoising network <span class="math notranslate nohighlight">\(ϵ_θ(·)\)</span> learns to reverse this process by predicting the added noise, encouraged by an MSE loss</p></li>
-</ul>
-</li>
-<li><p>MSE loss</p>
-<div class="math notranslate nohighlight">
-\[
-    \mathcal L=\Bbb E_{\mathcal E(x_0),y,\epsilon \sim \mathcal N(0,I),t}\big [\| \epsilon-\epsilon_\theta(z_t,t,\tau_\theta(y))\|_2^2\big] \tag{2}
-    \]</div>
-<ul class="simple">
-<li><p><span class="math notranslate nohighlight">\(y\)</span> is the text prompt corresponding to <span class="math notranslate nohighlight">\(x_0\)</span></p></li>
-<li><p><span class="math notranslate nohighlight">\(τ_θ(·)\)</span> is a text encoder mapping the prompt to a vector sequence.</p></li>
-<li><p>In SD, <span class="math notranslate nohighlight">\(ϵ_θ(·)\)</span> is implemented as a UNet (down<em>4, middle, up</em>4 blocks; ResNet, spatial self-attn, cross-attn)</p></li>
-</ul>
-</li>
-</ul>
-</section>
-<section id="low-rank-adaptation-lora">
-<h3>3.2 Low-Rank Adaptation(LoRA)<a class="headerlink" href="#low-rank-adaptation-lora" title="Permalink to this heading">#</a></h3>
-<p>Low-Rank Adaptation(LoRA) (<a class="reference external" href="https://arxiv.org/pdf/2106.09685.pdf">Hu et al., 2021</a>), which helps understand the domain adapter (Sec. 4.1) and MotionLoRA (Sec. 4.3) in AnimateDiff</p>
-<ul>
-<li><p>language model에서 처음 등장한 개념으로 거대 모델의 fine-tuning을 빠르게 수행하기 위해 제안된 개념이다.</p></li>
-<li><p>LoRA는 모델의 전체 파라미터를 fine-tuning하지 않고, rank-decomposition 행렬 쌍을 추가하여 새롭게 추가된 weight만 최적화 시키는 것이다.</p></li>
-<li><p>기존 파라미터는 고정함으로써 finetuning시 발생할 수 있는 catastrophic forgetting(<a class="reference external" href="https://www.notion.so/AnimateDiff-17444a06eda84e14abaca781b4f71344?pvs=21">Kirkpatrick et al., 2017</a>)을 예방할 수 있다.</p></li>
-<li><p>The new model weight with LoRA</p>
-<div class="math notranslate nohighlight">
-\[
-    \mathcal W'=\mathcal W+\vartriangle\mathcal W=\mathcal W+AB^T \tag{3}
-    \]</div>
-<ul class="simple">
-<li><p><span class="math notranslate nohighlight">\(A ∈ R ^{m×r}\)</span> , <span class="math notranslate nohighlight">\(B ∈ R ^{n×r}\)</span> are a pair of rank-decomposition matrices, <span class="math notranslate nohighlight">\(r\)</span> is a hyper-parameter, which is referred to as the rank of LoRA layers</p></li>
-<li><p>attention layer에서만 사용할수 있는 것은 아니지만 실제로는 주로 attention layer에서 사용된다. LoRA를 통해 fine-tuning시 cost 와 storage 절약할 수 있다.</p></li>
-</ul>
-</li>
-</ul>
-</section>
-</section>
-<section id="id1">
-<h2>4. AnimateDiff<a class="headerlink" href="#id1" title="Permalink to this heading">#</a></h2>
-<blockquote>
-<div><p>➕ <strong>Architecture Overall</strong><br />
-본 논문에서 제안하는 모델의 핵심은 비디오 데이터를 통해 transferable model prior를 학습하는 것이다. 이렇게 학습한 motion module을 personalized T2I 모델에 곧바로 적용할 수 있다. <br />
-왼쪽 그림의 하늘색 모델이 <strong>motion module</strong>이고, 초록색 영역이 optional <strong>MotionLoRA</strong>이다. AnimateDiff를 T2I모델에 삽입하여 animation generator로 사용할 수 있다. <br />
-이를 위한 AnimateDiff에는 <strong>학습해야 하는 3개의 모듈</strong>이 있다.</p>
-<ul class="simple">
-<li><p><strong>domain adapter</strong>
-- base T2I pre-training data와 our video training data간의 간극을 줄여주기 위한 것으로 학습과정에만 사용된다.</p></li>
-<li><p><strong>motion module</strong> <br />
-- motion prior를 학습하기 위한 모듈</p></li>
-<li><p><strong>MotionLoRA</strong>(optional)<br />
-- pretrained motion module을 새로운 움직임 패턴(카메라 워크)으로 조정하기 위한것</p></li>
-</ul>
-</div></blockquote>
-<figure class="align-default" id="id8">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/inference_pipeline.png"><img alt="inference_pipeline" class="bg-primary mb-1" src="../../_images/inference_pipeline.png" style="width: 400px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 606 </span><span class="caption-text">inference pipeline</span><a class="headerlink" href="#id8" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<blockquote>
-<div><p>➕ <strong>Training Steps</strong><br />
-본 논문에서 제안하는 각 모듈은 따로따로 학습시키며 각각을 학습시킬때 나머지 영역은 freeze 시킨다. 학습시 사용하는 objective function은 SD과 거의 같다.</p>
-<ul class="simple">
-<li><p>Training step 1. Domain Adapter</p></li>
-<li><p>Training step 2. Motion Module</p></li>
-<li><p>Training step 3. Optional motionLoRA</p></li>
-</ul>
-</div></blockquote>
-<figure class="align-default" id="id9">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/training_pipeline.png"><img alt="training_pipeline" class="bg-primary mb-1" src="../../_images/training_pipeline.png" style="width: 800px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 607 </span><span class="caption-text">training pipeline</span><a class="headerlink" href="#id9" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<section id="alleviate-negative-effects-from-training-data-with-domain-adapter">
-<h3>4.1 Alleviate Negative Effects from Training Data with Domain Adapter<a class="headerlink" href="#alleviate-negative-effects-from-training-data-with-domain-adapter" title="Permalink to this heading">#</a></h3>
-<p>비디오 데이터셋은 이미지 데이터셋에 비해 수집하기 어렵다. 동영상 데이터셋 WebVid (Bain et al., 2021)과 이미지 데이터셋 LAION-Aestetic (Schuhmann et al., 2022)를 비교해보면, 품질차이도 큼을 알 수 있다.</p>
-<p>각 비디오 프레임을 개별 이미지로 다루게 되면 motion blur, compression artifacts, watermark등을 포함하고 있을 수도 있다. 따라서 T2I 모델을 훈련할 때 사용한 이미지 데이터셋에 비해 motion prior를 학습하기 위해 사용한 동영상 데이터 셋의 품질은 무시할 수 없을 만큼의 차이가 있다. 이 때문에 직접적으로 비디오 데이터셋을 이용하여 애니메이션 생성 모델을 학습할 경우, 생성한 애니메이션의 품질이 제한 될 수 있다.</p>
-<p>동영상 데이터의 낮은 품질로 인해 해당 특성을 motion module이 학습하는 것을 피하고 base T2I의 지식을 보전하기 위해, 네트워크를 분리하여 각 도메인(영상/이미지)의 정보에 맞게 피팅하는 방식(domain adapter)을 제안한다. inference 시에는 domain adapter를 제거하였으며 앞서 언급한 domain gap에 의한 부정적 영향을 제거하는데 효과적이라는 것을 보여준다. domain adapter layer는 LoRA를 활용했으며, self-, cross-attention layer들을 base T2I model에 Fig. 3과 같이 추가하였다. 아래 query projection을 예로 살펴보면,</p>
-<figure class="align-default" id="id10">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/lora.png"><img alt="lora" class="bg-primary mb-1" src="../../_images/lora.png" style="width: 400px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 608 </span><span class="caption-text">LORA: LOW-RANK ADAPTATION OF LARGE LANGUAGE MODELS</span><a class="headerlink" href="#id10" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<div class="math notranslate nohighlight">
-\[
-Q=\mathcal W^Qz+\text{AdapterLayer}(z)=\mathcal W^Qz+\alpha \cdot AB^Tz \tag{4}
-\]</div>
-<p><span class="math notranslate nohighlight">\(Q\)</span> 는 query, <span class="math notranslate nohighlight">\(z\)</span> 는 internal feature, <span class="math notranslate nohighlight">\(\alpha\)</span> 는 상수로 inference time에 domain adapter의 영향력을 조절한다. (기본값은 1 / domain adapter의 효과를 완전히 제거하고 싶다면 <span class="math notranslate nohighlight">\(\alpha\)</span>를 0으로) 나머지 모델의 파라미터는 freeze하고 domain adapter의 파라미터들만 비디오 데이터셋으로 부터 랜덤하게 샘플한 static frame들을 이용하여 최적화했다. 이때 objective function은 Eq. (2)를 사용했다. (아직까지는 이미지 생성 모델)</p>
-</section>
-<section id="learn-motion-priors-with-motion-module">
-<h3>4.2 Learn Motion Priors with Motion Module<a class="headerlink" href="#learn-motion-priors-with-motion-module" title="Permalink to this heading">#</a></h3>
-<p>motion dynamics를 사전학습된 T2I 모델과 공유하는 dimension상의 시간축으로 모델링 하기 위해 2가지 단계가 필요하다.</p>
-<ol class="arabic simple">
-<li><p>2d diffusion model을 3d 비디오 데이터에 맞게 확장시켜야 한다. (<strong>Network Inflation</strong>)</p></li>
-<li><p>시간축상으로 효율적인 정보의 흐름을 만들기 위해 sub-module이 필요하다. (<strong>Sub-module Design</strong>)</p></li>
-</ol>
-<p><strong>Network Inflation</strong></p>
-<p>사전학습된 T2I 모델의 이미지 레이어는 고품질의 그림 사전지식(content prior)을 포착할수 있다. 이 지식을 활용(유지)하기 위해서 동일 모델로 video를 다루고자 할 때는 기존 이미지 레이어는 독립적으로 내버려두고, network를 확장시키는 방향이 선호된다. 이를 위해 기존 연구 (Ho et al., 2022b; Wu et al., 2023; Blattmann et al., 2023)를 참고하여, 5d tensor <span class="math notranslate nohighlight">\(x\in \Bbb R^{b\times c \times f\times h\times w}\)</span> 를 입력으로 받도록 모델을 수정했다.  <span class="math notranslate nohighlight">\(b\)</span>는 batch, <span class="math notranslate nohighlight">\(f\)</span>는 frame을 뜻한다. 내부 feature map이 이미지 레이어를 지나갈때는 시간 축을 의미하는 <span class="math notranslate nohighlight">\(f\)</span>는 <span class="math notranslate nohighlight">\(b\)</span>축으로 reshaping을 통해 무시한다.</p>
-<p>(5d tensor → 4d tensor <span class="math notranslate nohighlight">\(x \in \Bbb R^{bf\times c \times h\times w}\)</span> → (기존 이미지 레이머) → 4d tensor → 5d tensor)</p>
-<p>이를 통해 각 프레임을 개별 이미지 처럼 독립적으로 처리할 수 있다. 반면에 새롭게 추가된 motion module은 공간축(<span class="math notranslate nohighlight">\(h,w\)</span>)을 reshaping하여 무시한다. (5d tensor → 3d tensor <span class="math notranslate nohighlight">\(x \in \Bbb R^{bhw\times c \times f}\)</span> → (motion module) → 3d tensor → 5d tensor)</p>
-<p><strong>Module Design</strong></p>
-<p>최근 비디오 생성 연구들은 temporal modeling의 다양한 방식을 탐구하고 있다. AnimateDiff에서는 Transformer 구조를 차용하여 시간축상에서 동작하도록 작은 수정을 거쳐 motion module을 design했다. (이하 temporal Transformer) 실험을 통해 해당 구조가 motion prior를 모델링하는데 적합하다는 것을 발견했다. Fig.3을 보면 temporal Transformer가 시간축에서 동작하는 여러 self-attn block으로 이루어진것을 볼수 있다. 또한 sinusoidal position encoding을 통해 애니메이션상의 각 프레임의 시간적 위치정보를 나타내고자 했다. 앞서 언급한 대로 motion module의 입력크기는 feature map을 reshaping하여 조절하였다. (<span class="math notranslate nohighlight">\(x \in \Bbb R^{bhw\times c \times f}\)</span>) feature map을 시간축으로 다시 펼치고자 할때는 다음과 같은 길이 <span class="math notranslate nohighlight">\(f\)</span>, 크기 <span class="math notranslate nohighlight">\(z_1, ...,z_f;z_i \in \Bbb R^{(b\times h\times w)\times c}\)</span>의 vector sequence로 다룰수 있다. 해당 크기의 벡터가 self-attn block을 통과하면 다음과 같다.</p>
-<div class="math notranslate nohighlight">
-\[
-z_{\text{out}}=\text{Attention}(Q,K,V)=\text{Softmax}(QK^T/\sqrt{c})\cdot V \tag{5}
-\]</div>
-<p><span class="math notranslate nohighlight">\(Q=W^Qz, K=W^Kz, V=W^Vz\)</span> 이며, 각각 분리된 세 projection을 의미한다. attention mechanism을 통해 현 프레임의 생성에 다른 프레임으로 부터 추출된 정보를 반영하는 것이 가능하다. 결과적으로 각 프레임을 개별적으로 생성하는 것이 아닌, T2I 모델을 확장하여 motion module을 추가한 AnimateDiff가 시간에 따른 visual content의 변화를 잘 포착하기 위해 학습하여 motion dynamics를 이용해 animation clip을 제작하도록 한다. self-attn block전에 sinusoidal position encoding을 잊어서는 안된다. 하지만 motion module 자체가 frame의 순서를 알고 있는 것은 아니다.</p>
-<p>추가적인 모듈을 넣음으로 인해 발생할수 있는 문제들을 피하기 위해 temporal Transformer의 레이어의 파라미터는 0으로 초기화 하였으며 residual connection을 추가하여 훈련 시작시에 motion module이 identity mapping으로 동작하도록 했다.</p>
-</section>
-<section id="adapt-to-new-motion-patterns-with-motionlora">
-<h3>4.3 Adapt to New Motion Patterns with MotionLoRA<a class="headerlink" href="#adapt-to-new-motion-patterns-with-motionlora" title="Permalink to this heading">#</a></h3>
-<p>전반적인 motion 지식을 motion module이 사전학습하더라도 새로운 동작 패턴에 대한 적용에 대한 문제는 발생한다.
-ex. zooming, panning, rolling.</p>
-<p>높은 사전학습을 위한 비용을 감당할 수 없어 motion module을 특정 액션에 맞춰 튜닝하고자 하는 사용자를 위해 적은 참고 비디오(reference video)나 적은 훈련 횟수로도 효율적으로 모델을 적용할 수 있도록 하는 것이 중요하다. 이를 위해 AnimateDiff에 MotionLoRA를 마지막으로 적용했다. Motion Module의 구조와 제한된 참고 비디오를 고려하여, self-attn layers에 LoRA layers를 inflated model에 추가하여 motion personalization을 위한 효율적인 파인튜닝 방법을 제안한다.</p>
-<p>몇 종의 촬영 방식으로 실험을 진행하였으며 rule-based augmentation을 통해 reference videos를 얻었다. 예를 들어 zooming 비디오를 얻기 위해 시간에 따라 비디오 프레임을 점차 줄이거나(zoom-in) 늘려가며(zoom-out) augmentation을 진행했다. AnimateDiff의 MotionLoRA는 20~50개 정도의 적은 참고 비디오, 2000번의 훈련횟수로 파인튜닝했을때도 괜찮은 결과를 보였다. low-rank property로 인해 MotionLoRA 또한 composition capability를 가지고 있다. 학습된 MotionLoRA 모델 각각이 inference time상의 motion effect를 융합하기위해 협력(combine)할 수 있음을 말한다.</p>
-</section>
-<section id="animatediff-in-practice">
-<h3>4.4 AnimateDiff in Practice<a class="headerlink" href="#animatediff-in-practice" title="Permalink to this heading">#</a></h3>
-<section id="training">
-<h4>Training<a class="headerlink" href="#training" title="Permalink to this heading">#</a></h4>
-<p>Fig. 3을 보면 AnimateDiff에는 학습 가능한 모듈이 3개 있다. 각 모듈의 objective는 약간씩 다르다. domain adapter는 SD의 MSE loss인 Eq. 2 objective function을 통해 학습한다. 애니메이션을 만드는 역할을 하는 motion module과 motion LoRA의 경우 video data에 대한 차원을 더 많이 수용하기 위해 약간 수정된 objective를 사용한다. video data batch ( <span class="math notranslate nohighlight">\(x_0^{1:f}\in \Bbb R^{b\times c \times f \times h \times w}\)</span>)는 사전학습된 SD의 auto-encoder를 사용해 각 프레임 별로 latent code <span class="math notranslate nohighlight">\(z_0^{1:f}\)</span>로 인코딩된다. 이 latent code는 Eq. 1 과 같이 정의된 diffusion schedule에 따라 노이즈가 추가(forward process)된다.</p>
-<div class="math notranslate nohighlight">
-\[
-z_t^{1:f}=\sqrt{\bar \alpha_t}z_0^{1:f}+\sqrt{1-\bar\alpha_t}\epsilon^{1:f} \tag{6}
-\]</div>
-<p>모델의 입력은 노이즈가 추가된 latent codes와 이 쌍이되는 text prompts이며, 모델은 forward process에서 추가된 노이즈를 예측한다. AnimateDiff의 motion module을 위한 최종 training objective는 아래와 같다.</p>
-<div class="math notranslate nohighlight">
-\[
-\mathcal L=\Bbb E_{\mathcal E(x_0^{1:f}),y,\epsilon^{1:f}\sim\mathcal N(0,I),t}\Big[\|\epsilon-\epsilon_\theta(z_t^{1:f},T,\tau_\theta(y))\|^2_2\Big] \tag{7}
-\]</div>
-<p>각 모듈들(domain adapter, motion module, MotionLoRA)을 학습할때, 학습 타겟을 제외한 영역은 freeze 시킨뒤 학습했다.</p>
-</section>
-<section id="inference">
-<h4>Inference<a class="headerlink" href="#inference" title="Permalink to this heading">#</a></h4>
-<p>inference시에는 personalized T2I model는 앞서 설명한대로 inflate되며 motion module과 (optional) MotionLoRA를 더해 애니메이션을 생성한다.</p>
-<p>domain adapter의 경우 inference시 그냥 배제하지 않고 personalized T2I model에 injection하였으며 domain adapter의 영향력은 Eq. 4의 <span class="math notranslate nohighlight">\(\alpha\)</span>를 이용해 조절했다. Sec 5.3의 Ablation study에서 <span class="math notranslate nohighlight">\(\alpha\)</span>의 값에 따른 결과의 차이를 확인할 수 있다. 마지막으로 animation frames은 reverse diffusion process와 이를 통해 얻은 latent codes를 디코딩 함으로써 얻을수 있다.</p>
-</section>
-</section>
-</section>
-<section id="experiments">
-<h2>5. Experiments<a class="headerlink" href="#experiments" title="Permalink to this heading">#</a></h2>
-<p>SD 1.5에 AnimateDiff를 적용하여 실험을 진행했다. 또한 motion module을 학습할때는 WebVid 10M 데이터셋을 사용하였다. (자세한 사항은 supplementary material 확인해주세요)</p>
-<section id="qualitative-results">
-<h3>5.1 Qualitative Results<a class="headerlink" href="#qualitative-results" title="Permalink to this heading">#</a></h3>
-<figure class="align-default" id="id11">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/exp-1.png"><img alt="experiments_1" class="bg-primary mb-1" src="../../_images/exp-1.png" style="width: 800px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 609 </span><span class="caption-text">qualitative results</span><a class="headerlink" href="#id11" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</section>
-<section id="quantitative-comparison">
-<h3>5.2 Quantitative Comparison<a class="headerlink" href="#quantitative-comparison" title="Permalink to this heading">#</a></h3>
-<figure class="align-default" id="id12">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/exp-2.png"><img alt="experiments_2" class="bg-primary mb-1" src="../../_images/exp-2.png" style="width: 800px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 610 </span><span class="caption-text">quantitative results</span><a class="headerlink" href="#id12" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul>
-<li><p>User Study</p>
-<p>text, domain, smooth 3개 지표에 대한 개별 등수를 조사했다. Average User Ranking(AUR) 방법을 사용하여 높은 점수를 가지면 높은 품질을 의미하는 preference metric을 사용했다.</p>
-</li>
-<li><p>CLIP metric</p>
-<p>related paper에서 언급했던 이미지와 텍스트쌍을 동시에 학습한 CLIP 모델을 활용한 평가지표이다. 사전학습된 CLIP 모델을 사용하여 generated frames와 reference 사이 CLIP score를 계산한 것이다.</p>
-<p>+) CLIP score는 CLIP encoder를 통과한 벡터들 사이 코사인 유사도를 계산하는 방식</p>
-<ul class="simple">
-<li><p>Text</p>
-<ul>
-<li><p>각 프레임 임베딩과 주어진 텍스트 임베딩 사이 코사인 유사도</p></li>
-</ul>
-</li>
-<li><p>Domain</p>
-<ul>
-<li><p>원본 애니메이션이 없으므로 reference image와 생성된 영상 사이 CLIP score를 구함.</p></li>
-</ul>
-</li>
-<li><p>Smooth</p>
-<ul>
-<li><p>연속된 프레임 쌍의 이미지 임베딩의 코사인 유사도</p></li>
-</ul>
-</li>
-</ul>
-</li>
-</ul>
-</section>
-<section id="ablation-study">
-<h3>5.3 Ablation Study<a class="headerlink" href="#ablation-study" title="Permalink to this heading">#</a></h3>
-<section id="domain-adapter">
-<h4>Domain Adapter<a class="headerlink" href="#domain-adapter" title="Permalink to this heading">#</a></h4>
-<figure class="align-default" id="id13">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/exp-3.png"><img alt="effect_of_domain_adapter" class="bg-primary mb-1" src="../../_images/exp-3.png" style="width: 800px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 611 </span><span class="caption-text">scaler를 0으로 하면 domain adapter에 의한 효과를 제거한 것과 같다. 위의 그림은 모델이 생성한 animation clip의 첫번째 프레임이다.</span><a class="headerlink" href="#id13" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>domain adapter에 의한 효과를 제거했을때 전체적인 이미지 퀄리티가 높아 보이는데, 이는 domain adapter가 video dataset의 특성이라고 할 수 있는 watermark나 모션 블러 등을 학습했기 때문이다. 즉, domain adapter가 전체 학습과정에 도움이 되었음을 보여준다.</p>
-</section>
-<section id="motion-module-design">
-<h4>Motion module design<a class="headerlink" href="#motion-module-design" title="Permalink to this heading">#</a></h4>
-<p>AnimateDiff의 temporal Transformer구조와 전체 convolution인 구조의 모델과 비교했다. 두 방식 모두 비디오 생성 분야에서 자주 사용된다.</p>
-<p>temporal Transformer의 temporal attention부분을 1D temporal convolution으로 교체하여 두 모델의 파라미터가 유사하게 놓여있음을 확인했다. convolution motion module은 모든 프레임을 동일하게 놓았지만 Transformer 구조와 비교하여 움직임을 제대로 반영하지 못했다.</p>
-</section>
-<section id="efficiency-of-motionlora">
-<h4>Efficiency of MotionLoRA<a class="headerlink" href="#efficiency-of-motionlora" title="Permalink to this heading">#</a></h4>
-<p>parameter efficiency와 data efficiency 측면에서 MotionLoRA의 효율성을 시험해보았다. 이를 위해 parameter 개수와 data 개수를 조절해가며 여러 MotionLoRA를 학습시켰다.</p>
-<figure class="align-default" id="id14">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/exp-4.png"><img alt="experiments-4" class="bg-primary mb-1" src="../../_images/exp-4.png" style="width: 800px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 612 </span><span class="caption-text">Efficiency of MotionLoRA</span><a class="headerlink" href="#id14" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p>Parameter efficiency</p>
-<ul>
-<li><p>효율적인 모델학습을 위해도 모델의 배포를 위해서도 중요한 부분이다.</p></li>
-<li><p>AnimateDiff는 비교적 파라미터 개수가 적들때에도 괜찮은 애니메이션을 만들수 있다. 그림의 실험에서는 zoom-in 카메라 움직임을 새롭게 학습하는 능력을 본것이다.</p></li>
-</ul>
-</li>
-<li><p>Data efficiency</p>
-<ul>
-<li><p>특정 motion pattern을 위한 reference video 데이터를 수집하기 어렵기 때문에 모델을 실제로 적용하기 위해 중요한 부분이다.</p></li>
-<li><p>데이터의 개수가 적을때에도 학습하고자 하는 움직임은 학습할 수 있었으나 데이터의 개수가 극도로 적을 경우(N=5) 생성된 애니메이션 품질의 급격한 저하가 있었다.</p></li>
-</ul>
-</li>
-</ul>
-</section>
-</section>
-<section id="controllable-generation">
-<h3>5.4 Controllable Generation<a class="headerlink" href="#controllable-generation" title="Permalink to this heading">#</a></h3>
-<figure class="align-default" id="id15">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/exp-5.png"><img alt="experiments_5" class="bg-primary mb-1" src="../../_images/exp-5.png" style="width: 400px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 613 </span><span class="caption-text">Controllability of AnimateDiff</span><a class="headerlink" href="#id15" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>visual content와 motion prior의 개별 학습을 통해 AnimateDiff가 existing content를 조절할 수 있도록 했다. 이 특성을 확인하기 위해 AnimateDiff를 ControlNet과 결합하여 영상 생성시 depth를 통해 조절할 수 있도록 했다.</p>
-<p>DDIM inversion을 통해 다듬어진 latent sequences를 얻고 이를 비디오 생성에 사용하는 최신 비디오 수정 연구들과 비교하여 AnimateDiff는 randomly sampled noise를 이용하여 애니메이션을 생성한다.</p>
-</section>
-</section>
-<section id="conclusion">
-<h2>6. Conclusion<a class="headerlink" href="#conclusion" title="Permalink to this heading">#</a></h2>
-<p>본 논문에서는 애니메이션 생성을 위한 practical pipeline인 AnimateDiff를 제안한다. AnimateDiff를 통해 personalized text-to-image model을 바로 애니메이션 생성에 사용할 수 있다. 이를 위해 본 논문에서는 세가지 module을 디자인하였으며 이를 통해 AnimateDiff는 motion prior를 학습하고, visual quality를 유지할 수 있으며, MotionLoRA를 통해 가벼운 finetuning을 통해 원하는 motion으로 애니메이션을 생성할 수 있다.</p>
-<p>motion module은 한번 학습되면 다른 이미지를 animate시키고자 할때 사용할 수 있다. 다양한 실험 결과를 통해 AnimateDiff와 MotionLoRA의 효율성과 생성능력을 검증했다. 또 content-controllability측면에서도 추가적인 학습없이 본 논문의 방식을 사용할 수 있음을 보였다.</p>
-<p>AnimateDiff는 취향의 그림체, 캐릭터의 움직임, 카메라 워크에 맞게 이미지를 애니메이션화 시킬 수있는 효율적인 베이스 라인으로써 다양한 방면의 application에 큰 잠재력을 가지고 있다.</p>
-</section>
-<section id="id2">
-<h2>7. 실습<a class="headerlink" href="#id2" title="Permalink to this heading">#</a></h2>
-<p>아래 이미지들을 클릭하면 gif를 보실 수 있습니다.</p>
-<figure class="align-default" id="id16">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/0-side-view-photo-of-17-year-old-girl-in-a-japanese-school_edit.gif"><img alt="hands_on_1" class="bg-primary mb-1" src="../../_images/0-side-view-photo-of-17-year-old-girl-in-a-japanese-school_edit.gif" style="width: 400px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 614 </span><span class="caption-text">side-view-photo-of-17-year-old-girl-in-a-japanese-school<br />
-gpt로 생성한 그림을 input으로 사용함</span><a class="headerlink" href="#id16" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<figure class="align-default" id="id17">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/0-side-view-photo-of-man-in-black-padded-jumper,-hallway,_edit.gif"><img alt="hands_on_2" class="bg-primary mb-1" src="../../_images/0-side-view-photo-of-man-in-black-padded-jumper,-hallway,_edit.gif" style="width: 400px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 615 </span><span class="caption-text">side-view-photo-of-man-in-black-padded-jumper<br />
-직접 촬영한 사진을 input으로 사용함<br />
-입력한 사진의 인물의 인종이 유지되지 않았는데 학습데이터 셋의 불균형 때문으로 사료됨</span><a class="headerlink" href="#id17" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<figure class="align-default" id="id18">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/6-image-of-a-man-with-blonde-hair-and-blue-eyes,_edit.gif"><img alt="hands_on_3" class="bg-primary mb-1" src="../../_images/6-image-of-a-man-with-blonde-hair-and-blue-eyes,_edit.gif" style="width: 400px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 616 </span><span class="caption-text">image-of-a-man-with-blonde-hair-and-blue-eyes<br />
-gpt로 생성한 그림을 input으로 사용함</span><a class="headerlink" href="#id18" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<blockquote>
-<div><p>📌 실습 후 느낀점</p>
-<ul class="simple">
-<li><p>WebVid 10M이 애니메이션화에 적합한 데이터셋인지 잘 모르겠다.</p></li>
-<li><p>다양한 metric을 평가에 사용하지 않은 점이 아쉽다.</p></li>
-<li><p>특정 애니메이션 클립을 생성하고 싶다면 실질적으로 학습해야 하는 부분은 motionLoRA정도라 사용이 편리하다.</p></li>
-<li><p>reproduction이 매우 용이하다.</p></li>
-<li><p>AnimateDiff를 제대로 활용하기 위해서는 personalized T2I가 제일 중요한 부분이라고 할수 있는데, 원하는 스타일의 pretrained T2I 모델을 구하는 것이 어렵다. 그림체가 잘 맞지 않으면 애니메이션 클립 초반에 급격히 변화하는 부분이 자주 생긴다.</p></li>
-</ul>
-</div></blockquote>
-</section>
-</section>
-
-    <script type="text/x-thebe-config">
-    {
-        requestKernel: true,
-        binderOptions: {
-            repo: "binder-examples/jupyter-stacks-datascience",
-            ref: "master",
-        },
-        codeMirrorConfig: {
-            theme: "abcdef",
-            mode: "python"
-        },
-        kernelOptions: {
-            name: "python3",
-            path: "./docs/review"
-        },
-        predefinedOutput: true
-    }
-    </script>
-    <script>kernelName = 'python3'</script>
-
-                </article>
-              
-
-              
-              
-              
-              
-                <footer class="prev-next-footer d-print-none">
-                  
-<div class="prev-next-area">
-    <a class="left-prev"
-       href="VideoLDM.html"
-       title="previous page">
-      <i class="fa-solid fa-angle-left"></i>
-      <div class="prev-next-info">
-        <p class="prev-next-subtitle">previous</p>
-        <p class="prev-next-title">VideoLDM</p>
-      </div>
-    </a>
-    <a class="right-next"
-       href="Animate_Anyone.html"
-       title="next page">
-      <div class="prev-next-info">
-        <p class="prev-next-subtitle">next</p>
-        <p class="prev-next-title">Animate Anyone</p>
-      </div>
-      <i class="fa-solid fa-angle-right"></i>
-    </a>
-</div>
-                </footer>
-              
-            </div>
-            
-            
-              
-                <div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
-
-
-  <div class="sidebar-secondary-item">
-  <div class="page-toc tocsection onthispage">
-    <i class="fa-solid fa-list"></i> Contents
-  </div>
-  <nav class="bd-toc-nav page-toc">
-    <ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#abstract">0. Abstract</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">1. Introduction</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#related-work">2. Related Work</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#text-to-image-diffusion-models">2.1 Text-to-image diffusion models</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#personalizing-t2i-models">2.2 Personalizing T2I models</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#animating-personalized-t2is">2.3 Animating personalized T2Is</a></li>
-</ul>
-</li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#preliminary">3. Preliminary</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#stable-diffusion">3.1 Stable Diffusion</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#low-rank-adaptation-lora">3.2 Low-Rank Adaptation(LoRA)</a></li>
-</ul>
-</li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#id1">4. AnimateDiff</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#alleviate-negative-effects-from-training-data-with-domain-adapter">4.1 Alleviate Negative Effects from Training Data with Domain Adapter</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#learn-motion-priors-with-motion-module">4.2 Learn Motion Priors with Motion Module</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#adapt-to-new-motion-patterns-with-motionlora">4.3 Adapt to New Motion Patterns with MotionLoRA</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#animatediff-in-practice">4.4 AnimateDiff in Practice</a><ul class="nav section-nav flex-column">
-<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#training">Training</a></li>
-<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#inference">Inference</a></li>
-</ul>
-</li>
-</ul>
-</li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#experiments">5. Experiments</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#qualitative-results">5.1 Qualitative Results</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#quantitative-comparison">5.2 Quantitative Comparison</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#ablation-study">5.3 Ablation Study</a><ul class="nav section-nav flex-column">
-<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#domain-adapter">Domain Adapter</a></li>
-<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#motion-module-design">Motion module design</a></li>
-<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#efficiency-of-motionlora">Efficiency of MotionLoRA</a></li>
-</ul>
-</li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#controllable-generation">5.4 Controllable Generation</a></li>
-</ul>
-</li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#conclusion">6. Conclusion</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#id2">7. 실습</a></li>
-</ul>
-  </nav></div>
-
-</div></div>
-              
-            
-          </div>
-          <footer class="bd-footer-content">
-            
-<div class="bd-footer-content__inner container">
-  
-  <div class="footer-item">
-    
-<p class="component-author">
-By PseudoLab
-</p>
-
-  </div>
-  
-  <div class="footer-item">
-    
-
-  <p class="copyright">
-    
-      © Copyright 2022.
-      <br/>
-    
-  </p>
-
-  </div>
-  
-  <div class="footer-item">
-    
-  </div>
-  
-  <div class="footer-item">
-    
-  </div>
-  
-</div>
-          </footer>
-        
-
-      </main>
-    </div>
-  </div>
-  
-  <!-- Scripts loaded after <body> so the DOM is not blocked -->
-  <script src="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b"></script>
-<script src="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b"></script>
-
-  <footer class="bd-footer">
-  </footer>
-  </body>
+
+
+<!DOCTYPE html>
+
+
+<html lang="en" data-content_root="" >
+
+  <head>
+    <meta charset="utf-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.17.1: http://docutils.sourceforge.net/" />
+
+    <title>AnimateDiff &#8212; Text-to-Image Generation-feat-Diffusion</title>
+  
+  
+  
+  <script data-cfasync="false">
+    document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
+    document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
+  </script>
+  
+  <!-- Loaded before other Sphinx assets -->
+  <link href="../../_static/styles/theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+<link href="../../_static/styles/bootstrap.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+<link href="../../_static/styles/pydata-sphinx-theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+
+  
+  <link href="../../_static/vendor/fontawesome/6.5.2/css/all.min.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+  <link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.woff2" />
+<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.woff2" />
+<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.woff2" />
+
+    <link rel="stylesheet" type="text/css" href="../../_static/pygments.css" />
+    <link rel="stylesheet" href="../../_static/styles/sphinx-book-theme.css?digest=14f4ca6b54d191a8c7657f6c759bf11a5fb86285" type="text/css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/togglebutton.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/mystnb.4510f1fc1dee50b3e5859aac5469c37c29e427902b24a333a5f9fcb2f0b3ac41.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/sphinx-thebe.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/design-style.4045f2051d55cab465a707391d5b2007.min.css" />
+  
+  <!-- Pre-loaded scripts that we'll load fully later -->
+  <link rel="preload" as="script" href="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b" />
+<link rel="preload" as="script" href="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b" />
+  <script src="../../_static/vendor/fontawesome/6.5.2/js/all.min.js?digest=dfe6caa3a7d634c4db9b"></script>
+
+    <script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
+    <script src="../../_static/jquery.js"></script>
+    <script src="../../_static/underscore.js"></script>
+    <script src="../../_static/_sphinx_javascript_frameworks_compat.js"></script>
+    <script src="../../_static/doctools.js"></script>
+    <script src="../../_static/clipboard.min.js"></script>
+    <script src="../../_static/copybutton.js"></script>
+    <script src="../../_static/scripts/sphinx-book-theme.js?digest=5a5c038af52cf7bc1a1ec88eea08e6366ee68824"></script>
+    <script>let toggleHintShow = 'Click to show';</script>
+    <script>let toggleHintHide = 'Click to hide';</script>
+    <script>let toggleOpenOnPrint = 'true';</script>
+    <script src="../../_static/togglebutton.js"></script>
+    <script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
+    <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
+    <script src="../../_static/design-tabs.js"></script>
+    <script>const THEBE_JS_URL = "https://unpkg.com/thebe@0.8.2/lib/index.js"
+const thebe_selector = ".thebe,.cell"
+const thebe_selector_input = "pre"
+const thebe_selector_output = ".output, .cell_output"
+</script>
+    <script async="async" src="../../_static/sphinx-thebe.js"></script>
+    <script>window.MathJax = {"options": {"processHtmlClass": "tex2jax_process|mathjax_process|math|output_area"}}</script>
+    <script defer="defer" src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
+    <script>DOCUMENTATION_OPTIONS.pagename = 'docs/review/AnimateDiff';</script>
+    <link rel="shortcut icon" href="../../_static/PseudoLab_logo.png"/>
+    <link rel="index" title="Index" href="../../genindex.html" />
+    <link rel="search" title="Search" href="../../search.html" />
+    <link rel="next" title="Animate Anyone" href="Animate_Anyone.html" />
+    <link rel="prev" title="VideoLDM" href="VideoLDM.html" />
+  <meta name="viewport" content="width=device-width, initial-scale=1"/>
+  <meta name="docsearch:language" content="en"/>
+  </head>
+  
+  
+  <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
+
+  
+  
+  <div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
+  
+  <div id="pst-scroll-pixel-helper"></div>
+  
+  <button type="button" class="btn rounded-pill" id="pst-back-to-top">
+    <i class="fa-solid fa-arrow-up"></i>Back to top</button>
+
+  
+  <input type="checkbox"
+          class="sidebar-toggle"
+          id="pst-primary-sidebar-checkbox"/>
+  <label class="overlay overlay-primary" for="pst-primary-sidebar-checkbox"></label>
+  
+  <input type="checkbox"
+          class="sidebar-toggle"
+          id="pst-secondary-sidebar-checkbox"/>
+  <label class="overlay overlay-secondary" for="pst-secondary-sidebar-checkbox"></label>
+  
+  <div class="search-button__wrapper">
+    <div class="search-button__overlay"></div>
+    <div class="search-button__search-container">
+<form class="bd-search d-flex align-items-center"
+      action="../../search.html"
+      method="get">
+  <i class="fa-solid fa-magnifying-glass"></i>
+  <input type="search"
+         class="form-control"
+         name="q"
+         id="search-input"
+         placeholder="Search this book..."
+         aria-label="Search this book..."
+         autocomplete="off"
+         autocorrect="off"
+         autocapitalize="off"
+         spellcheck="false"/>
+  <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
+</form></div>
+  </div>
+
+  <div class="pst-async-banner-revealer d-none">
+  <aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
+</div>
+
+  
+    <header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
+    </header>
+  
+
+  <div class="bd-container">
+    <div class="bd-container__inner bd-page-width">
+      
+      
+      
+      <div class="bd-sidebar-primary bd-sidebar">
+        
+
+  
+  <div class="sidebar-header-items sidebar-primary__section">
+    
+    
+    
+    
+  </div>
+  
+    <div class="sidebar-primary-items__start sidebar-primary__section">
+        <div class="sidebar-primary-item">
+
+  
+    
+  
+
+<a class="navbar-brand logo" href="../../intro.html">
+  
+  
+  
+  
+  
+    
+    
+      
+    
+    
+    <img src="../../_static/PseudoLab_logo.png" class="logo__image only-light" alt="Text-to-Image Generation-feat-Diffusion - Home"/>
+    <script>document.write(`<img src="../../_static/PseudoLab_logo.png" class="logo__image only-dark" alt="Text-to-Image Generation-feat-Diffusion - Home"/>`);</script>
+  
+  
+</a></div>
+        <div class="sidebar-primary-item"><nav class="bd-links" id="bd-docs-nav" aria-label="Main">
+    <div class="bd-toc-item navbar-nav active">
+        
+        <ul class="nav bd-sidenav bd-sidenav__home-link">
+            <li class="toctree-l1">
+                <a class="reference internal" href="../../intro.html">
+                    Welcome to PseudoDiffusers!!
+                </a>
+            </li>
+        </ul>
+        <p aria-level="2" class="caption" role="heading"><span class="caption-text">Preliminary Works</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="vae.html">VAE</a></li>
+<li class="toctree-l1"><a class="reference internal" href="gan.html">GAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DDPM.html">DDPM</a></li>
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DDIM.html">DDIM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="A_Study_on_the_Evaluation_of_Generative_Models.html">A Study on the Evaluation of Generative Models</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Image Generation</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="cycleGAN.html">CycleGAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="StyleGAN.html">StyleGAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="diffusion_beats_GANs.html">Diffusion Models Beat GANs on Image Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="dalle.html">DALL-E</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DALLE2.html">DALL-E 2</a></li>
+<li class="toctree-l1"><a class="reference internal" href="dreambooth.html">DreamBooth</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ControlNet.html">ControlNet</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Latent_Diffusion_Model.html">Introduction</a></li>
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Textual_Inversion.html">Textual Inversion</a></li>
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="CustomDiffusion.html">Custom Diffusion</a></li>
+<li class="toctree-l1"><a class="reference internal" href="LoRA.html">LoRA</a></li>
+
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="I-DDPM.html">I-DDPM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="StyO.html">StyO</a></li>
+<li class="toctree-l1"><a class="reference internal" href="imagen.html">Imagen</a></li>
+<li class="toctree-l1"><a class="reference internal" href="imagen_editor.html">Imagen Editor</a></li>
+<li class="toctree-l1"><a class="reference internal" href="SDEdit.html">SDEdit</a></li>
+<li class="toctree-l1"><a class="reference internal" href="SDXL.html">SDXL</a></li>
+<li class="toctree-l1"><a class="reference internal" href="t2i_adapter.html">T2I-Adapter</a></li>
+<li class="toctree-l1"><a class="reference internal" href="IP_Adapter.html">IP-Adapter</a></li>
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="HyperDreamBooth.html">HyperDreamBooth</a></li>
+<li class="toctree-l1"><a class="reference internal" href="CM3leon.html">CM3leon</a></li>
+
+<li class="toctree-l1"><a class="reference internal" href="Synthetic_Data_from_Diffusion_Models_Improves_ImageNet_Classification.html">Synthetic Data from Diffusion Models Improves ImageNet Classification</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="GLIDE.html">GLIDE</a></li>
+<li class="toctree-l1"><a class="reference internal" href="BBDM.html">BBDM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Your_Diffusion_Model_is_Secretly_a_Zero_Shot_Classifier.html">Your Diffusion Model is Secretly a Zero-Shot Classifier</a></li>
+<li class="toctree-l1"><a class="reference internal" href="progressive_distillation.html">Progressive Distillation for Fast Sampling of Diffusion Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ConceptLab.html">ConceptLab</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Diffusion_models_already_have_a_Semantic_Latent_Space.html">Diffusion Models already have a Semantic Latent Space</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Muse.html">Muse</a></li>
+
+
+<li class="toctree-l1"><a class="reference internal" href="GIGAGAN.html">Scaling up GANs for Text-to-Image Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="consistency_models.html">Consistency Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="latent_consistency_models.html">Latent Consistency Models</a></li>
+
+<li class="toctree-l1"><a class="reference internal" href="LLM_grounded_Diffusion.html">LLM Grounded Diffusion</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DiT.html">DiT</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="one-step-image-translation.html">One-Step Image Translation with Text-to-Image Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="LCM-LoRA.html">LCM-LoRA: A Universal Stable-Diffusion Acceleration Module</a></li>
+
+
+<li class="toctree-l1"><a class="reference internal" href="MimicBrush.html">MimicBrush: Zero-shot Image Editing with Reference Imitation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="one_step_diffusion_with_distribution_matching_distillation.html">One-step Diffusion with Distribution Matching Distillation</a></li>
+
+
+
+
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Video Generation</span></p>
+<ul class="current nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="Make_A_Video.html">Make A Video</a></li>
+<li class="toctree-l1"><a class="reference internal" href="VideoLDM.html">VideoLDM</a></li>
+<li class="toctree-l1 current active"><a class="current reference internal" href="#">AnimateDiff</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Animate_Anyone.html">Animate Anyone</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreaMoving.html">DreaMoving</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreamPose.html">DreamPose: Fashion Image-to-Video Synthesis via Stable Diffusion</a></li>
+
+
+
+
+
+
+
+
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">3D Generation</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="NeRF.html">NeRF : Representing Scenes as Neural Radiance Fields for View Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="3DGS.html">3D Gaussian Splatting for Real-Time Radiance Field Rendering</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Point_E.html">Point-E: A System for Generating 3D Point Clouds from Complex Prompts (Arxiv 2022)</a></li>
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Shap-E.html">Shap-E</a></li>
+
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DreamFusion.html"><strong>DreamFusion</strong></a></li>
+<li class="toctree-l1"><a class="reference internal" href="magic-3d.html">Magic3D</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreamBooth3D.html">Dream Booth 3D</a></li>
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="zero123plus.html">Zero123++</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ProlificDreamer.html">ProlificDreamer: High-Fidelity and Diverse Text-to-3D Generation with Variational Score Distillation</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DreamGaussian.html">DreamGaussian</a></li>
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Coin3D.html">Coin3D</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Experiments</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="../experiments/js_exp.html">Synthetic Data with Stable Diffusion for Foliar Disease Classification</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../experiments/swjo_exp.html">Training DreamBooth on Naver Webtoon Face Dataset</a></li>
+</ul>
+
+    </div>
+</nav></div>
+    </div>
+  
+  
+  <div class="sidebar-primary-items__end sidebar-primary__section">
+  </div>
+  
+  <div id="rtd-footer-container"></div>
+
+
+      </div>
+      
+      <main id="main-content" class="bd-main" role="main">
+        
+        
+
+<div class="sbt-scroll-pixel-helper"></div>
+
+          <div class="bd-content">
+            <div class="bd-article-container">
+              
+              <div class="bd-header-article d-print-none">
+<div class="header-article-items header-article__inner">
+  
+    <div class="header-article-items__start">
+      
+        <div class="header-article-item"><label class="sidebar-toggle primary-toggle btn btn-sm" for="__primary" title="Toggle primary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
+  <span class="fa-solid fa-bars"></span>
+</label></div>
+      
+    </div>
+  
+  
+    <div class="header-article-items__end">
+      
+        <div class="header-article-item">
+
+<div class="article-header-buttons">
+
+
+
+
+
+<div class="dropdown dropdown-source-buttons">
+  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Source repositories">
+    <i class="fab fa-github"></i>
+  </button>
+  <ul class="dropdown-menu">
+      
+      
+      
+      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion" target="_blank"
+   class="btn btn-sm btn-source-repository-button dropdown-item"
+   title="Source repository"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fab fa-github"></i>
+  </span>
+<span class="btn__text-container">Repository</span>
+</a>
+</li>
+      
+      
+      
+      
+      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion/issues/new?title=Issue%20on%20page%20%2Fdocs/review/AnimateDiff.html&body=Your%20issue%20content%20here." target="_blank"
+   class="btn btn-sm btn-source-issues-button dropdown-item"
+   title="Open an issue"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-lightbulb"></i>
+  </span>
+<span class="btn__text-container">Open issue</span>
+</a>
+</li>
+      
+  </ul>
+</div>
+
+
+
+
+
+
+<div class="dropdown dropdown-download-buttons">
+  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Download this page">
+    <i class="fas fa-download"></i>
+  </button>
+  <ul class="dropdown-menu">
+      
+      
+      
+      <li><a href="../../_sources/docs/review/AnimateDiff.md" target="_blank"
+   class="btn btn-sm btn-download-source-button dropdown-item"
+   title="Download source file"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-file"></i>
+  </span>
+<span class="btn__text-container">.md</span>
+</a>
+</li>
+      
+      
+      
+      
+      <li>
+<button onclick="window.print()"
+  class="btn btn-sm btn-download-pdf-button dropdown-item"
+  title="Print to PDF"
+  data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-file-pdf"></i>
+  </span>
+<span class="btn__text-container">.pdf</span>
+</button>
+</li>
+      
+  </ul>
+</div>
+
+
+
+
+<button onclick="toggleFullScreen()"
+  class="btn btn-sm btn-fullscreen-button"
+  title="Fullscreen mode"
+  data-bs-placement="bottom" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-expand"></i>
+  </span>
+
+</button>
+
+
+
+<script>
+document.write(`
+  <button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light"></i>
+    <i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark"></i>
+    <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"></i>
+  </button>
+`);
+</script>
+
+
+<script>
+document.write(`
+  <button class="btn btn-sm pst-navbar-icon search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <i class="fa-solid fa-magnifying-glass fa-lg"></i>
+  </button>
+`);
+</script>
+<label class="sidebar-toggle secondary-toggle btn btn-sm" for="__secondary"title="Toggle secondary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <span class="fa-solid fa-list"></span>
+</label>
+</div></div>
+      
+    </div>
+  
+</div>
+</div>
+              
+              
+
+<div id="jb-print-docs-body" class="onlyprint">
+    <h1>AnimateDiff</h1>
+    <!-- Table of contents -->
+    <div id="print-main-content">
+        <div id="jb-print-toc">
+            
+            <div>
+                <h2> Contents </h2>
+            </div>
+            <nav aria-label="Page">
+                <ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#abstract">0. Abstract</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">1. Introduction</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#related-work">2. Related Work</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#text-to-image-diffusion-models">2.1 Text-to-image diffusion models</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#personalizing-t2i-models">2.2 Personalizing T2I models</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#animating-personalized-t2is">2.3 Animating personalized T2Is</a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#preliminary">3. Preliminary</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#stable-diffusion">3.1 Stable Diffusion</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#low-rank-adaptation-lora">3.2 Low-Rank Adaptation(LoRA)</a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#id1">4. AnimateDiff</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#alleviate-negative-effects-from-training-data-with-domain-adapter">4.1 Alleviate Negative Effects from Training Data with Domain Adapter</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#learn-motion-priors-with-motion-module">4.2 Learn Motion Priors with Motion Module</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#adapt-to-new-motion-patterns-with-motionlora">4.3 Adapt to New Motion Patterns with MotionLoRA</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#animatediff-in-practice">4.4 AnimateDiff in Practice</a><ul class="nav section-nav flex-column">
+<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#training">Training</a></li>
+<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#inference">Inference</a></li>
+</ul>
+</li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#experiments">5. Experiments</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#qualitative-results">5.1 Qualitative Results</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#quantitative-comparison">5.2 Quantitative Comparison</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#ablation-study">5.3 Ablation Study</a><ul class="nav section-nav flex-column">
+<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#domain-adapter">Domain Adapter</a></li>
+<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#motion-module-design">Motion module design</a></li>
+<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#efficiency-of-motionlora">Efficiency of MotionLoRA</a></li>
+</ul>
+</li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#controllable-generation">5.4 Controllable Generation</a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#conclusion">6. Conclusion</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#id2">7. 실습</a></li>
+</ul>
+            </nav>
+        </div>
+    </div>
+</div>
+
+              
+                
+<div id="searchbox"></div>
+                <article class="bd-article">
+                  
+  <div class="admonition-information admonition">
+<p class="admonition-title">Information</p>
+<ul class="simple">
+<li><p><strong>Title:</strong> AnimateDiff: Animate Your Personalized Text-to-Image Diffusion Models without Specific Tuning (ICLR 2024)</p></li>
+<li><p><strong>Reference</strong></p>
+<ul>
+<li><p>Paper: <a class="reference external" href="https://arxiv.org/abs/2307.04725">https://arxiv.org/abs/2307.04725</a></p></li>
+<li><p>Code: <a class="github reference external" href="https://github.com/guoyww/AnimateDiff?tab=readme-ov-file">guoyww/AnimateDiff</a></p></li>
+<li><p>Project Page : <a class="reference external" href="https://animatediff.github.io">https://animatediff.github.io</a></p></li>
+</ul>
+</li>
+<li><p><strong>Author:</strong> Kyeongmin Yu</p></li>
+<li><p><strong>Last updated on June. 11, 2024</strong></p></li>
+</ul>
+</div>
+<section class="tex2jax_ignore mathjax_ignore" id="animatediff">
+<h1>AnimateDiff<a class="headerlink" href="#animatediff" title="Permalink to this heading">#</a></h1>
+<blockquote>
+<div><p>📌 논문의 의의<br />
+In this paper, we present <strong>AnimateDiff</strong>, a practical framework for animating personalized T2I models without requiring model-specific tuning.<br />
+<strong>AnimateDiff = public personalized T2I models + domain adapter &amp; plug-and-play Motion Module + MotionLoRA</strong></p>
+</div></blockquote>
+<section id="abstract">
+<h2>0. Abstract<a class="headerlink" href="#abstract" title="Permalink to this heading">#</a></h2>
+<p>T2I diffusion model과 DreamBooth나 LoRA와 같은 개인화 기술이 발전함에 따라 사람들은 적절한 비용을 지불하여 고화질의 원하는 이미지를 얻을 수 있게 되었다. 하지만, 기존 고화질 이미지 생성 모델(personalized T2I)에 움직임을 추가하거나 애니메이션을 생성하도록 하는 것은 여전히 어렵다. <strong>본 논문에서는 추가적인 훈련(model-specific tuning)없이 기존 고화질 이미지 생성모델에 움직임을 추가하는 실용적인 프레임 워크를 제안한다.</strong> <strong>본 논문에서 제안하는 프레임 워크의 핵심은 plug-and-play motion module을 활용하는 것으로 이 motion module을 한번 학습하면, 어떤 이미지 생성 모델과도 융합할 수 있다.</strong> 본 논문에서 제안하는 학습 방법을 이용하면 motion module은 real-world 비디오로 부터 효과적으로 motion prior를 학습할 수 있다. 한번 학습된 motion module은 이미지 생성 모델에 덧붙여 애니메이션 생성 모델로 사용할 수 있다. 또한 AnimateDiff를 위한 간단한 파인튜닝 방식인 MotionLoRA를 제안한다. 이는 사전 학습된 motion module이 저비용으로 새로운 움직임 패턴을 학습할 수 있게 해준다. (ex. 촬영 기법) AnimateDiff와 MotionLoRA를 공개된 이미지 생성 모델에 부착하여 실험했으며 이를 통해 본 논문의 방식이 이미지 퀄리티와 다양한 움직임을 보전하면서도 자연스러운 애니메이션 클립을 생성할 수 있음을 보였다.</p>
+<figure class="align-default" id="id3">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/inference_pipeline.png"><img alt="inference_pipeline" class="bg-primary mb-1" src="../../_images/inference_pipeline.png" style="width: 400px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 613 </span><span class="caption-text">inference pipeline</span><a class="headerlink" href="#id3" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p><strong>Core Framework</strong></p>
+<ul>
+<li><p>public T2I models</p>
+<ul>
+<li><p>personalized T2Is from the same base T2I (SD1.5)</p>
+<ul>
+<li><p>can download finetuned T2I from <a class="reference external" href="https://civitai.com/">civitai</a> or hugging face</p></li>
+</ul>
+</li>
+</ul>
+</li>
+<li><p>domain adapter</p>
+<ul>
+<li><p>LoRA기반 domain adapter를 base T2I 모델에 더해 video dataset을 학습할때 발생할수 있는 domain gap을 줄였다.</p></li>
+<li><p>여기서 말하는 domain gap이란 video의 각 프레임을 나누어 이미지로 봤을때 발생할 수 있는 motion blur, compression artifacts, watermarks등을 말한다.</p></li>
+</ul>
+</li>
+<li><p>training strategy of a plug-and-play motion module</p>
+<ul>
+<li><p>learns transferable motion priors from real-world videothrough proposed training strategy</p></li>
+<li><p>한번 학습하고 나면 다른 T2I모델과 결합해 animation generator로 사용할 수 있다.</p></li>
+</ul>
+</li>
+<li><p>MotionLoRA</p>
+<ul>
+<li><p>adapt the pre-trained motion module to specific motion patterns</p></li>
+</ul>
+</li>
+</ul>
+</li>
+</ul>
+</section>
+<section id="introduction">
+<h2>1. Introduction<a class="headerlink" href="#introduction" title="Permalink to this heading">#</a></h2>
+<p>텍스트 프롬프트를 입력하여 이미지를 생성하는 디퓨전 모델(T2I diffusion models)의 발전으로 많은 예술가와 아마추어들이 시각 컨텐츠를 보다 쉽게 생성할 수 있게 되었다. 기존 T2I 모델의 생성능력(creativity)를 자극하기 위해 DreamBooth와 LoRA와 같은 가벼운 개인화 방식들이 제안되었다. 이러한 방식들은 작은 데이터셋과 적당한 하드웨어에서도 customized finetuning을 할 수 있게 해준다. 그로인해 사용자들이 적은 비용으로도 base T2I model을 새로운 domain에 적용하거나 시각적 퀄리티를 높일 수 있게 되었다. 그 결과 AI 아티스트와 아마추어 커뮤니티 에서 상당량의 personalized models을 Civitai나 Hugging Face와 같은 플랫폼에 개시했다. 이러한 모델들이 상당히 좋은 수준의 이미지를 생성할 수 있지만, 정적인 이미지만 생성할 수 있다는 한계가 있다. 반면, 애니메이션을 생성하는 기술이 영화나 카툰과 같은 실산업에서 더 요구된다. 본 연구에서는 고화질 T2I 모델을 파인튜닝 없이 곧바로 애니메이션 생성 모델로 변환하는 것을 목표로 한다. 파인 튜닝을 위한 데이터 수집과 컴퓨팅 자원의 필요는 아마추어 사용자에게 걸림돌이 된다.</p>
+<p><strong>본 논문에서는 AnimateDiff를 제안하는데 이는 personalized T2I model의 능력을 보전하면서 애니메이션을 생성하는 문제를 해결할 수 있는 효과적인 파이프라인이다.</strong> AnimateDiff의 핵심은 비디오 데이터셋(WebVid-10M)으로부터 타당한 motion 정보를 plug-and-play motion module이 학습하는 것이다. motion module의 학습은 세가지 단계로 구성된다.</p>
+<ol class="arabic">
+<li><p><strong>domain adapter 파인튜닝</strong></p>
+<p>visual distribution of the target video dataset(이미지 품질차이, 동영상 워터마크, 압축으로 인한 artifacts)에 대한 부분은 이 모듈이 학습함으로써 이후 motion관련 모듈들이 motion에만 집중할 수 있도록 한다.</p>
+</li>
+<li><p><strong>새로운 motion module</strong></p>
+<p>비디오를 입력받을수 있게 inflate시킨 base T2I 모델에 domain adapter를 더한 모델에 모션 모델링을 위한 모션 모듈을 추가한다. 이 모듈을 학습할때는 domain adapter와 base model을 freeze한다. 이렇게 하면 motion module이 움직임에 대한 부분을 전반적으로 학습하여 모듈별 학습이 가능해진다. (다른 그림체를 원할경우 base T2I+domain adapter를 바꾸면 됨)</p>
+</li>
+<li><p><strong>(optional) MotionLoRA 학습</strong></p>
+<p>MotionLoRA의 경우 특정 motion을 적은 수의 reference videos와 학습횟수로 학습하는 것을 목표로하는 모듈이다. 이름과 같이 Low-Rank Adaptation (LoRA) (Hu et al., 2021)를 이용하는데  새로운 motion pattern을 적은수(50개)의 reference video만으로 학습시킬수 있다. 또한 차지하는 메모리도 적어 추가학습이나 모델을 공유,배포하는데에도 유리하다.</p>
+</li>
+</ol>
+<figure class="align-default" id="id4">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/training_pipeline.png"><img alt="training_pipeline" class="bg-primary mb-1" src="../../_images/training_pipeline.png" style="width: 800px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 614 </span><span class="caption-text">training pipeline</span><a class="headerlink" href="#id4" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</section>
+<section id="related-work">
+<h2>2. Related Work<a class="headerlink" href="#related-work" title="Permalink to this heading">#</a></h2>
+<section id="text-to-image-diffusion-models">
+<h3>2.1 Text-to-image diffusion models<a class="headerlink" href="#text-to-image-diffusion-models" title="Permalink to this heading">#</a></h3>
+<ul class="simple">
+<li><p>Diffusion models</p>
+<ul>
+<li><p>GLIDE (<a class="reference external" href="https://arxiv.org/abs/2112.10741">Nichol et al., 2021</a>) 는 text condition을 통해 이미지를 생성하는 방법을 소개하고, classifier guidance를 조절하여 더 나은 이미지 결과물을 얻는 방법을 설명했다.</p>
+<ul>
+<li><p><strong>G</strong>uided <strong>L</strong>anguage to <strong>I</strong>mage <strong>D</strong>iffusion for Generation and <strong>E</strong>diting</p></li>
+</ul>
+</li>
+<li><p>DALL-E2 (Ramesh et al., 2022)는 CLIP을 이용하여 text-image 일관성을 향상시켰다.</p></li>
+<li><p>Imagen (Saharia et al., 2022)은 LLM과 cascade 구조를 이용하여 photorealistic한 결과물을 얻고자 했다.</p></li>
+<li><p>**Stable Diffusion (Rombach et al., 2022)**은 auto-encoder의 latent space에서 diffusion 과정을 수행함으로써 효율을 높였다. (3.1 참고)</p></li>
+<li><p>eDiff-I (<a class="reference external" href="https://www.google.com/search?client=safari&amp;rls=en&amp;q=e+diff+i&amp;ie=UTF-8&amp;oe=UTF-8">Balaji et al., 2022</a>) 디퓨전 모델들을 앙상블 학습시켜 generation(denoising) 단계별로 적절한 디퓨전 모델로 denoise를 수행하고자 했다.</p></li>
+</ul>
+</li>
+</ul>
+<figure class="align-default" id="id5">
+<a class="reference internal image-reference" href="../../_images/clip.png"><img alt="clip_pipeline" src="../../_images/clip.png" style="width: 200px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 615 </span><span class="caption-text">CLIP:Contrastive language-image pre-training</span><a class="headerlink" href="#id5" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<figure class="align-default" id="id6">
+<a class="reference internal image-reference" href="../../_images/imagen.png"><img alt="imagen_pipeline" src="../../_images/imagen.png" style="width: 200px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 616 </span><span class="caption-text">Imagen</span><a class="headerlink" href="#id6" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<figure class="align-default" id="id7">
+<a class="reference internal image-reference" href="../../_images/eDiff-I.png"><img alt="eDiff-I_pipeline" src="../../_images/eDiff-I.png" style="width: 200px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 617 </span><span class="caption-text">eDiff-I</span><a class="headerlink" href="#id7" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</section>
+<section id="personalizing-t2i-models">
+<h3>2.2 Personalizing T2I models<a class="headerlink" href="#personalizing-t2i-models" title="Permalink to this heading">#</a></h3>
+<p>사전학습된 T2I 모델을 활용하기 위해 효율적인 개인화(personalization)방법에 대한 연구가 뜨겁다. 여기서 개인화란 reference images를 통해 (새로운) concepts나 style을 사전학습된 모델에 추가하는 것을 말한다. (손을 잘그리는 모델, 눈을 잘그리는 모델, 특정 그림체를 학습한 모델 등) 이를 위한 가장 단순한 방법은 전체 모델을 fine-tuning 시키는 것이다. 이를 통해 전체적인 퀄리티가 높아질 수도 있지만, 기존 학습데이터를 잊는 catastrophic forgetting이 발생할 수도 있다는 문제가 있다. 이러한 문제는 특히 reference image 데이터가 적을때 발생한다.</p>
+<ul class="simple">
+<li><p>DreamBooth (<a class="reference external" href="https://dreambooth.github.io">Ruiz et al., 2023</a>) 은 매우 적은 이미지를 사용하면서도 preservation loss를 추가하여 전체 모델을 fine-tuning 시켰다.</p></li>
+<li><p>Textural Inversion (Gal et al., 2022) 은 새로운 concept 마다 token embedding을 최적화 하였다.</p></li>
+<li><p><strong>Low-Rank Adaptation (LoRA) (<a class="reference external" href="https://github.com/microsoft/LoRA">Hu et al., 2021</a>)</strong> 은 LoRA layer를 추가하여 이에 대해서만 fine-tuning을 수행했다. (Sec 3.2 참고)</p></li>
+<li><p>그외의 encoder-based approaches (<a class="reference external" href="https://tuning-encoder.github.io">Gal et al., 2023</a>; <a class="reference external" href="https://arxiv.org/abs/2304.02642">Jia et al., 2023</a>)</p></li>
+</ul>
+</section>
+<section id="animating-personalized-t2is">
+<h3>2.3 Animating personalized T2Is<a class="headerlink" href="#animating-personalized-t2is" title="Permalink to this heading">#</a></h3>
+<p>personalized T2I를 애니메이션화 시키는 연구가 많지는 않지만 아래의 연구들과 관련있다.</p>
+<ul>
+<li><p>Text2Cinemagraph <a class="reference external" href="https://text2cinemagraph.github.io/website/">(Mahapatra et al., 2023)</a> 는 flow prediction을 통해 cinematography를 생성하고자 했다.</p></li>
+<li><p>Align-Your-Latent <a class="reference external" href="https://www.notion.so/AnimateDiff-17444a06eda84e14abaca781b4f71344?pvs=21">(Blattmann et al., 2023)</a> 는 general video generator내의 frozen image layers가 personalizing이 가능함을 확인했다.</p></li>
+<li><p>video generation models  <strong>← 결과 비교시 사용</strong></p>
+<ul>
+<li><p><strong>Tune-a-Video</strong> <a class="reference external" href="https://github.com/showlab/Tune-A-Video">(Wu et al., 2023)</a> ****는 단일 비디오로 적은 수의 파라미터만 파인튜닝하는 방식을 제안했다. (SD+temporal attn 구조를 가지고 있음)</p></li>
+<li><p><strong>Text2Video-Zero</strong> <a class="reference external" href="https://github.com/Picsart-AI-Research/Text2Video-Zero">(Khachatryan et al., 2023)</a> 는 사전학습한 T2I모델을 활용하여 추가적인 학습과정 없이 애니메이션화 할 수 있는 방법을 제안했다. 이는 사전에 정의된 affine matrix를 사용하여 latent wrapping을 하는 방식이다.</p></li>
+<li><p>pretrained T2I + temporal structures 관련 연구들이 많다.</p>
+<p>(Esser et al., 2023; Zhou et al., 2022a; Singer et al., 2022; Ho et al., 2022b,a; Ruan et al., 2023; Luo et al., 2023; Yin et al., 2023b,a; Wang et al., 2023b; Hong et al., 2022; Luo et al., 2023)</p>
+</li>
+</ul>
+</li>
+</ul>
+</section>
+</section>
+<section id="preliminary">
+<h2>3. Preliminary<a class="headerlink" href="#preliminary" title="Permalink to this heading">#</a></h2>
+<section id="stable-diffusion">
+<h3>3.1 Stable Diffusion<a class="headerlink" href="#stable-diffusion" title="Permalink to this heading">#</a></h3>
+<p>Stable Diffusion (<a class="reference external" href="https://github.com/CompVis/stable-diffusion">Rombach et al., 2022</a>), the base T2I model used in our work</p>
+<ul>
+<li><p>open-sourced, well-developed community, many high-quality personalized T2I models for eval</p></li>
+<li><p>사전 학습된 encoder(<span class="math notranslate nohighlight">\(\mathcal E\)</span>)와 decoder(<span class="math notranslate nohighlight">\(\mathcal D\)</span>)를 이용하여 latent space상에서 diffusion process를 수행</p></li>
+<li><p>인코딩된 이미지 <span class="math notranslate nohighlight">\(z_0=\mathcal E(x_0)\)</span> 의 경우 아래의 forward diffusion 과정을 통해 <span class="math notranslate nohighlight">\(z_t\)</span> 변환됨</p></li>
+<li><p>Forward diffusion for <span class="math notranslate nohighlight">\(t=1,2,…,T\)</span></p>
+<div class="math notranslate nohighlight">
+\[
+    z_t=\sqrt{\bar \alpha_t}z_0+\sqrt{1-\bar\alpha}\epsilon,\space \epsilon \sim \mathcal N(0,I)  \tag{1}
+    \]</div>
+<ul class="simple">
+<li><p>pre-defined <span class="math notranslate nohighlight">\(\barα_t\)</span> determines the noise strength at step <span class="math notranslate nohighlight">\(t\)</span></p></li>
+<li><p>The denoising network <span class="math notranslate nohighlight">\(ϵ_θ(·)\)</span> learns to reverse this process by predicting the added noise, encouraged by an MSE loss</p></li>
+</ul>
+</li>
+<li><p>MSE loss</p>
+<div class="math notranslate nohighlight">
+\[
+    \mathcal L=\Bbb E_{\mathcal E(x_0),y,\epsilon \sim \mathcal N(0,I),t}\big [\| \epsilon-\epsilon_\theta(z_t,t,\tau_\theta(y))\|_2^2\big] \tag{2}
+    \]</div>
+<ul class="simple">
+<li><p><span class="math notranslate nohighlight">\(y\)</span> is the text prompt corresponding to <span class="math notranslate nohighlight">\(x_0\)</span></p></li>
+<li><p><span class="math notranslate nohighlight">\(τ_θ(·)\)</span> is a text encoder mapping the prompt to a vector sequence.</p></li>
+<li><p>In SD, <span class="math notranslate nohighlight">\(ϵ_θ(·)\)</span> is implemented as a UNet (down<em>4, middle, up</em>4 blocks; ResNet, spatial self-attn, cross-attn)</p></li>
+</ul>
+</li>
+</ul>
+</section>
+<section id="low-rank-adaptation-lora">
+<h3>3.2 Low-Rank Adaptation(LoRA)<a class="headerlink" href="#low-rank-adaptation-lora" title="Permalink to this heading">#</a></h3>
+<p>Low-Rank Adaptation(LoRA) (<a class="reference external" href="https://arxiv.org/pdf/2106.09685.pdf">Hu et al., 2021</a>), which helps understand the domain adapter (Sec. 4.1) and MotionLoRA (Sec. 4.3) in AnimateDiff</p>
+<ul>
+<li><p>language model에서 처음 등장한 개념으로 거대 모델의 fine-tuning을 빠르게 수행하기 위해 제안된 개념이다.</p></li>
+<li><p>LoRA는 모델의 전체 파라미터를 fine-tuning하지 않고, rank-decomposition 행렬 쌍을 추가하여 새롭게 추가된 weight만 최적화 시키는 것이다.</p></li>
+<li><p>기존 파라미터는 고정함으로써 finetuning시 발생할 수 있는 catastrophic forgetting(<a class="reference external" href="https://www.notion.so/AnimateDiff-17444a06eda84e14abaca781b4f71344?pvs=21">Kirkpatrick et al., 2017</a>)을 예방할 수 있다.</p></li>
+<li><p>The new model weight with LoRA</p>
+<div class="math notranslate nohighlight">
+\[
+    \mathcal W'=\mathcal W+\vartriangle\mathcal W=\mathcal W+AB^T \tag{3}
+    \]</div>
+<ul class="simple">
+<li><p><span class="math notranslate nohighlight">\(A ∈ R ^{m×r}\)</span> , <span class="math notranslate nohighlight">\(B ∈ R ^{n×r}\)</span> are a pair of rank-decomposition matrices, <span class="math notranslate nohighlight">\(r\)</span> is a hyper-parameter, which is referred to as the rank of LoRA layers</p></li>
+<li><p>attention layer에서만 사용할수 있는 것은 아니지만 실제로는 주로 attention layer에서 사용된다. LoRA를 통해 fine-tuning시 cost 와 storage 절약할 수 있다.</p></li>
+</ul>
+</li>
+</ul>
+</section>
+</section>
+<section id="id1">
+<h2>4. AnimateDiff<a class="headerlink" href="#id1" title="Permalink to this heading">#</a></h2>
+<blockquote>
+<div><p>➕ <strong>Architecture Overall</strong><br />
+본 논문에서 제안하는 모델의 핵심은 비디오 데이터를 통해 transferable model prior를 학습하는 것이다. 이렇게 학습한 motion module을 personalized T2I 모델에 곧바로 적용할 수 있다. <br />
+왼쪽 그림의 하늘색 모델이 <strong>motion module</strong>이고, 초록색 영역이 optional <strong>MotionLoRA</strong>이다. AnimateDiff를 T2I모델에 삽입하여 animation generator로 사용할 수 있다. <br />
+이를 위한 AnimateDiff에는 <strong>학습해야 하는 3개의 모듈</strong>이 있다.</p>
+<ul class="simple">
+<li><p><strong>domain adapter</strong>
+- base T2I pre-training data와 our video training data간의 간극을 줄여주기 위한 것으로 학습과정에만 사용된다.</p></li>
+<li><p><strong>motion module</strong> <br />
+- motion prior를 학습하기 위한 모듈</p></li>
+<li><p><strong>MotionLoRA</strong>(optional)<br />
+- pretrained motion module을 새로운 움직임 패턴(카메라 워크)으로 조정하기 위한것</p></li>
+</ul>
+</div></blockquote>
+<figure class="align-default" id="id8">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/inference_pipeline.png"><img alt="inference_pipeline" class="bg-primary mb-1" src="../../_images/inference_pipeline.png" style="width: 400px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 618 </span><span class="caption-text">inference pipeline</span><a class="headerlink" href="#id8" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<blockquote>
+<div><p>➕ <strong>Training Steps</strong><br />
+본 논문에서 제안하는 각 모듈은 따로따로 학습시키며 각각을 학습시킬때 나머지 영역은 freeze 시킨다. 학습시 사용하는 objective function은 SD과 거의 같다.</p>
+<ul class="simple">
+<li><p>Training step 1. Domain Adapter</p></li>
+<li><p>Training step 2. Motion Module</p></li>
+<li><p>Training step 3. Optional motionLoRA</p></li>
+</ul>
+</div></blockquote>
+<figure class="align-default" id="id9">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/training_pipeline.png"><img alt="training_pipeline" class="bg-primary mb-1" src="../../_images/training_pipeline.png" style="width: 800px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 619 </span><span class="caption-text">training pipeline</span><a class="headerlink" href="#id9" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<section id="alleviate-negative-effects-from-training-data-with-domain-adapter">
+<h3>4.1 Alleviate Negative Effects from Training Data with Domain Adapter<a class="headerlink" href="#alleviate-negative-effects-from-training-data-with-domain-adapter" title="Permalink to this heading">#</a></h3>
+<p>비디오 데이터셋은 이미지 데이터셋에 비해 수집하기 어렵다. 동영상 데이터셋 WebVid (Bain et al., 2021)과 이미지 데이터셋 LAION-Aestetic (Schuhmann et al., 2022)를 비교해보면, 품질차이도 큼을 알 수 있다.</p>
+<p>각 비디오 프레임을 개별 이미지로 다루게 되면 motion blur, compression artifacts, watermark등을 포함하고 있을 수도 있다. 따라서 T2I 모델을 훈련할 때 사용한 이미지 데이터셋에 비해 motion prior를 학습하기 위해 사용한 동영상 데이터 셋의 품질은 무시할 수 없을 만큼의 차이가 있다. 이 때문에 직접적으로 비디오 데이터셋을 이용하여 애니메이션 생성 모델을 학습할 경우, 생성한 애니메이션의 품질이 제한 될 수 있다.</p>
+<p>동영상 데이터의 낮은 품질로 인해 해당 특성을 motion module이 학습하는 것을 피하고 base T2I의 지식을 보전하기 위해, 네트워크를 분리하여 각 도메인(영상/이미지)의 정보에 맞게 피팅하는 방식(domain adapter)을 제안한다. inference 시에는 domain adapter를 제거하였으며 앞서 언급한 domain gap에 의한 부정적 영향을 제거하는데 효과적이라는 것을 보여준다. domain adapter layer는 LoRA를 활용했으며, self-, cross-attention layer들을 base T2I model에 Fig. 3과 같이 추가하였다. 아래 query projection을 예로 살펴보면,</p>
+<figure class="align-default" id="id10">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/lora.png"><img alt="lora" class="bg-primary mb-1" src="../../_images/lora.png" style="width: 400px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 620 </span><span class="caption-text">LORA: LOW-RANK ADAPTATION OF LARGE LANGUAGE MODELS</span><a class="headerlink" href="#id10" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<div class="math notranslate nohighlight">
+\[
+Q=\mathcal W^Qz+\text{AdapterLayer}(z)=\mathcal W^Qz+\alpha \cdot AB^Tz \tag{4}
+\]</div>
+<p><span class="math notranslate nohighlight">\(Q\)</span> 는 query, <span class="math notranslate nohighlight">\(z\)</span> 는 internal feature, <span class="math notranslate nohighlight">\(\alpha\)</span> 는 상수로 inference time에 domain adapter의 영향력을 조절한다. (기본값은 1 / domain adapter의 효과를 완전히 제거하고 싶다면 <span class="math notranslate nohighlight">\(\alpha\)</span>를 0으로) 나머지 모델의 파라미터는 freeze하고 domain adapter의 파라미터들만 비디오 데이터셋으로 부터 랜덤하게 샘플한 static frame들을 이용하여 최적화했다. 이때 objective function은 Eq. (2)를 사용했다. (아직까지는 이미지 생성 모델)</p>
+</section>
+<section id="learn-motion-priors-with-motion-module">
+<h3>4.2 Learn Motion Priors with Motion Module<a class="headerlink" href="#learn-motion-priors-with-motion-module" title="Permalink to this heading">#</a></h3>
+<p>motion dynamics를 사전학습된 T2I 모델과 공유하는 dimension상의 시간축으로 모델링 하기 위해 2가지 단계가 필요하다.</p>
+<ol class="arabic simple">
+<li><p>2d diffusion model을 3d 비디오 데이터에 맞게 확장시켜야 한다. (<strong>Network Inflation</strong>)</p></li>
+<li><p>시간축상으로 효율적인 정보의 흐름을 만들기 위해 sub-module이 필요하다. (<strong>Sub-module Design</strong>)</p></li>
+</ol>
+<p><strong>Network Inflation</strong></p>
+<p>사전학습된 T2I 모델의 이미지 레이어는 고품질의 그림 사전지식(content prior)을 포착할수 있다. 이 지식을 활용(유지)하기 위해서 동일 모델로 video를 다루고자 할 때는 기존 이미지 레이어는 독립적으로 내버려두고, network를 확장시키는 방향이 선호된다. 이를 위해 기존 연구 (Ho et al., 2022b; Wu et al., 2023; Blattmann et al., 2023)를 참고하여, 5d tensor <span class="math notranslate nohighlight">\(x\in \Bbb R^{b\times c \times f\times h\times w}\)</span> 를 입력으로 받도록 모델을 수정했다.  <span class="math notranslate nohighlight">\(b\)</span>는 batch, <span class="math notranslate nohighlight">\(f\)</span>는 frame을 뜻한다. 내부 feature map이 이미지 레이어를 지나갈때는 시간 축을 의미하는 <span class="math notranslate nohighlight">\(f\)</span>는 <span class="math notranslate nohighlight">\(b\)</span>축으로 reshaping을 통해 무시한다.</p>
+<p>(5d tensor → 4d tensor <span class="math notranslate nohighlight">\(x \in \Bbb R^{bf\times c \times h\times w}\)</span> → (기존 이미지 레이머) → 4d tensor → 5d tensor)</p>
+<p>이를 통해 각 프레임을 개별 이미지 처럼 독립적으로 처리할 수 있다. 반면에 새롭게 추가된 motion module은 공간축(<span class="math notranslate nohighlight">\(h,w\)</span>)을 reshaping하여 무시한다. (5d tensor → 3d tensor <span class="math notranslate nohighlight">\(x \in \Bbb R^{bhw\times c \times f}\)</span> → (motion module) → 3d tensor → 5d tensor)</p>
+<p><strong>Module Design</strong></p>
+<p>최근 비디오 생성 연구들은 temporal modeling의 다양한 방식을 탐구하고 있다. AnimateDiff에서는 Transformer 구조를 차용하여 시간축상에서 동작하도록 작은 수정을 거쳐 motion module을 design했다. (이하 temporal Transformer) 실험을 통해 해당 구조가 motion prior를 모델링하는데 적합하다는 것을 발견했다. Fig.3을 보면 temporal Transformer가 시간축에서 동작하는 여러 self-attn block으로 이루어진것을 볼수 있다. 또한 sinusoidal position encoding을 통해 애니메이션상의 각 프레임의 시간적 위치정보를 나타내고자 했다. 앞서 언급한 대로 motion module의 입력크기는 feature map을 reshaping하여 조절하였다. (<span class="math notranslate nohighlight">\(x \in \Bbb R^{bhw\times c \times f}\)</span>) feature map을 시간축으로 다시 펼치고자 할때는 다음과 같은 길이 <span class="math notranslate nohighlight">\(f\)</span>, 크기 <span class="math notranslate nohighlight">\(z_1, ...,z_f;z_i \in \Bbb R^{(b\times h\times w)\times c}\)</span>의 vector sequence로 다룰수 있다. 해당 크기의 벡터가 self-attn block을 통과하면 다음과 같다.</p>
+<div class="math notranslate nohighlight">
+\[
+z_{\text{out}}=\text{Attention}(Q,K,V)=\text{Softmax}(QK^T/\sqrt{c})\cdot V \tag{5}
+\]</div>
+<p><span class="math notranslate nohighlight">\(Q=W^Qz, K=W^Kz, V=W^Vz\)</span> 이며, 각각 분리된 세 projection을 의미한다. attention mechanism을 통해 현 프레임의 생성에 다른 프레임으로 부터 추출된 정보를 반영하는 것이 가능하다. 결과적으로 각 프레임을 개별적으로 생성하는 것이 아닌, T2I 모델을 확장하여 motion module을 추가한 AnimateDiff가 시간에 따른 visual content의 변화를 잘 포착하기 위해 학습하여 motion dynamics를 이용해 animation clip을 제작하도록 한다. self-attn block전에 sinusoidal position encoding을 잊어서는 안된다. 하지만 motion module 자체가 frame의 순서를 알고 있는 것은 아니다.</p>
+<p>추가적인 모듈을 넣음으로 인해 발생할수 있는 문제들을 피하기 위해 temporal Transformer의 레이어의 파라미터는 0으로 초기화 하였으며 residual connection을 추가하여 훈련 시작시에 motion module이 identity mapping으로 동작하도록 했다.</p>
+</section>
+<section id="adapt-to-new-motion-patterns-with-motionlora">
+<h3>4.3 Adapt to New Motion Patterns with MotionLoRA<a class="headerlink" href="#adapt-to-new-motion-patterns-with-motionlora" title="Permalink to this heading">#</a></h3>
+<p>전반적인 motion 지식을 motion module이 사전학습하더라도 새로운 동작 패턴에 대한 적용에 대한 문제는 발생한다.
+ex. zooming, panning, rolling.</p>
+<p>높은 사전학습을 위한 비용을 감당할 수 없어 motion module을 특정 액션에 맞춰 튜닝하고자 하는 사용자를 위해 적은 참고 비디오(reference video)나 적은 훈련 횟수로도 효율적으로 모델을 적용할 수 있도록 하는 것이 중요하다. 이를 위해 AnimateDiff에 MotionLoRA를 마지막으로 적용했다. Motion Module의 구조와 제한된 참고 비디오를 고려하여, self-attn layers에 LoRA layers를 inflated model에 추가하여 motion personalization을 위한 효율적인 파인튜닝 방법을 제안한다.</p>
+<p>몇 종의 촬영 방식으로 실험을 진행하였으며 rule-based augmentation을 통해 reference videos를 얻었다. 예를 들어 zooming 비디오를 얻기 위해 시간에 따라 비디오 프레임을 점차 줄이거나(zoom-in) 늘려가며(zoom-out) augmentation을 진행했다. AnimateDiff의 MotionLoRA는 20~50개 정도의 적은 참고 비디오, 2000번의 훈련횟수로 파인튜닝했을때도 괜찮은 결과를 보였다. low-rank property로 인해 MotionLoRA 또한 composition capability를 가지고 있다. 학습된 MotionLoRA 모델 각각이 inference time상의 motion effect를 융합하기위해 협력(combine)할 수 있음을 말한다.</p>
+</section>
+<section id="animatediff-in-practice">
+<h3>4.4 AnimateDiff in Practice<a class="headerlink" href="#animatediff-in-practice" title="Permalink to this heading">#</a></h3>
+<section id="training">
+<h4>Training<a class="headerlink" href="#training" title="Permalink to this heading">#</a></h4>
+<p>Fig. 3을 보면 AnimateDiff에는 학습 가능한 모듈이 3개 있다. 각 모듈의 objective는 약간씩 다르다. domain adapter는 SD의 MSE loss인 Eq. 2 objective function을 통해 학습한다. 애니메이션을 만드는 역할을 하는 motion module과 motion LoRA의 경우 video data에 대한 차원을 더 많이 수용하기 위해 약간 수정된 objective를 사용한다. video data batch ( <span class="math notranslate nohighlight">\(x_0^{1:f}\in \Bbb R^{b\times c \times f \times h \times w}\)</span>)는 사전학습된 SD의 auto-encoder를 사용해 각 프레임 별로 latent code <span class="math notranslate nohighlight">\(z_0^{1:f}\)</span>로 인코딩된다. 이 latent code는 Eq. 1 과 같이 정의된 diffusion schedule에 따라 노이즈가 추가(forward process)된다.</p>
+<div class="math notranslate nohighlight">
+\[
+z_t^{1:f}=\sqrt{\bar \alpha_t}z_0^{1:f}+\sqrt{1-\bar\alpha_t}\epsilon^{1:f} \tag{6}
+\]</div>
+<p>모델의 입력은 노이즈가 추가된 latent codes와 이 쌍이되는 text prompts이며, 모델은 forward process에서 추가된 노이즈를 예측한다. AnimateDiff의 motion module을 위한 최종 training objective는 아래와 같다.</p>
+<div class="math notranslate nohighlight">
+\[
+\mathcal L=\Bbb E_{\mathcal E(x_0^{1:f}),y,\epsilon^{1:f}\sim\mathcal N(0,I),t}\Big[\|\epsilon-\epsilon_\theta(z_t^{1:f},T,\tau_\theta(y))\|^2_2\Big] \tag{7}
+\]</div>
+<p>각 모듈들(domain adapter, motion module, MotionLoRA)을 학습할때, 학습 타겟을 제외한 영역은 freeze 시킨뒤 학습했다.</p>
+</section>
+<section id="inference">
+<h4>Inference<a class="headerlink" href="#inference" title="Permalink to this heading">#</a></h4>
+<p>inference시에는 personalized T2I model는 앞서 설명한대로 inflate되며 motion module과 (optional) MotionLoRA를 더해 애니메이션을 생성한다.</p>
+<p>domain adapter의 경우 inference시 그냥 배제하지 않고 personalized T2I model에 injection하였으며 domain adapter의 영향력은 Eq. 4의 <span class="math notranslate nohighlight">\(\alpha\)</span>를 이용해 조절했다. Sec 5.3의 Ablation study에서 <span class="math notranslate nohighlight">\(\alpha\)</span>의 값에 따른 결과의 차이를 확인할 수 있다. 마지막으로 animation frames은 reverse diffusion process와 이를 통해 얻은 latent codes를 디코딩 함으로써 얻을수 있다.</p>
+</section>
+</section>
+</section>
+<section id="experiments">
+<h2>5. Experiments<a class="headerlink" href="#experiments" title="Permalink to this heading">#</a></h2>
+<p>SD 1.5에 AnimateDiff를 적용하여 실험을 진행했다. 또한 motion module을 학습할때는 WebVid 10M 데이터셋을 사용하였다. (자세한 사항은 supplementary material 확인해주세요)</p>
+<section id="qualitative-results">
+<h3>5.1 Qualitative Results<a class="headerlink" href="#qualitative-results" title="Permalink to this heading">#</a></h3>
+<figure class="align-default" id="id11">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/exp-1.png"><img alt="experiments_1" class="bg-primary mb-1" src="../../_images/exp-1.png" style="width: 800px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 621 </span><span class="caption-text">qualitative results</span><a class="headerlink" href="#id11" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</section>
+<section id="quantitative-comparison">
+<h3>5.2 Quantitative Comparison<a class="headerlink" href="#quantitative-comparison" title="Permalink to this heading">#</a></h3>
+<figure class="align-default" id="id12">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/exp-2.png"><img alt="experiments_2" class="bg-primary mb-1" src="../../_images/exp-2.png" style="width: 800px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 622 </span><span class="caption-text">quantitative results</span><a class="headerlink" href="#id12" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul>
+<li><p>User Study</p>
+<p>text, domain, smooth 3개 지표에 대한 개별 등수를 조사했다. Average User Ranking(AUR) 방법을 사용하여 높은 점수를 가지면 높은 품질을 의미하는 preference metric을 사용했다.</p>
+</li>
+<li><p>CLIP metric</p>
+<p>related paper에서 언급했던 이미지와 텍스트쌍을 동시에 학습한 CLIP 모델을 활용한 평가지표이다. 사전학습된 CLIP 모델을 사용하여 generated frames와 reference 사이 CLIP score를 계산한 것이다.</p>
+<p>+) CLIP score는 CLIP encoder를 통과한 벡터들 사이 코사인 유사도를 계산하는 방식</p>
+<ul class="simple">
+<li><p>Text</p>
+<ul>
+<li><p>각 프레임 임베딩과 주어진 텍스트 임베딩 사이 코사인 유사도</p></li>
+</ul>
+</li>
+<li><p>Domain</p>
+<ul>
+<li><p>원본 애니메이션이 없으므로 reference image와 생성된 영상 사이 CLIP score를 구함.</p></li>
+</ul>
+</li>
+<li><p>Smooth</p>
+<ul>
+<li><p>연속된 프레임 쌍의 이미지 임베딩의 코사인 유사도</p></li>
+</ul>
+</li>
+</ul>
+</li>
+</ul>
+</section>
+<section id="ablation-study">
+<h3>5.3 Ablation Study<a class="headerlink" href="#ablation-study" title="Permalink to this heading">#</a></h3>
+<section id="domain-adapter">
+<h4>Domain Adapter<a class="headerlink" href="#domain-adapter" title="Permalink to this heading">#</a></h4>
+<figure class="align-default" id="id13">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/exp-3.png"><img alt="effect_of_domain_adapter" class="bg-primary mb-1" src="../../_images/exp-3.png" style="width: 800px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 623 </span><span class="caption-text">scaler를 0으로 하면 domain adapter에 의한 효과를 제거한 것과 같다. 위의 그림은 모델이 생성한 animation clip의 첫번째 프레임이다.</span><a class="headerlink" href="#id13" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>domain adapter에 의한 효과를 제거했을때 전체적인 이미지 퀄리티가 높아 보이는데, 이는 domain adapter가 video dataset의 특성이라고 할 수 있는 watermark나 모션 블러 등을 학습했기 때문이다. 즉, domain adapter가 전체 학습과정에 도움이 되었음을 보여준다.</p>
+</section>
+<section id="motion-module-design">
+<h4>Motion module design<a class="headerlink" href="#motion-module-design" title="Permalink to this heading">#</a></h4>
+<p>AnimateDiff의 temporal Transformer구조와 전체 convolution인 구조의 모델과 비교했다. 두 방식 모두 비디오 생성 분야에서 자주 사용된다.</p>
+<p>temporal Transformer의 temporal attention부분을 1D temporal convolution으로 교체하여 두 모델의 파라미터가 유사하게 놓여있음을 확인했다. convolution motion module은 모든 프레임을 동일하게 놓았지만 Transformer 구조와 비교하여 움직임을 제대로 반영하지 못했다.</p>
+</section>
+<section id="efficiency-of-motionlora">
+<h4>Efficiency of MotionLoRA<a class="headerlink" href="#efficiency-of-motionlora" title="Permalink to this heading">#</a></h4>
+<p>parameter efficiency와 data efficiency 측면에서 MotionLoRA의 효율성을 시험해보았다. 이를 위해 parameter 개수와 data 개수를 조절해가며 여러 MotionLoRA를 학습시켰다.</p>
+<figure class="align-default" id="id14">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/exp-4.png"><img alt="experiments-4" class="bg-primary mb-1" src="../../_images/exp-4.png" style="width: 800px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 624 </span><span class="caption-text">Efficiency of MotionLoRA</span><a class="headerlink" href="#id14" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p>Parameter efficiency</p>
+<ul>
+<li><p>효율적인 모델학습을 위해도 모델의 배포를 위해서도 중요한 부분이다.</p></li>
+<li><p>AnimateDiff는 비교적 파라미터 개수가 적들때에도 괜찮은 애니메이션을 만들수 있다. 그림의 실험에서는 zoom-in 카메라 움직임을 새롭게 학습하는 능력을 본것이다.</p></li>
+</ul>
+</li>
+<li><p>Data efficiency</p>
+<ul>
+<li><p>특정 motion pattern을 위한 reference video 데이터를 수집하기 어렵기 때문에 모델을 실제로 적용하기 위해 중요한 부분이다.</p></li>
+<li><p>데이터의 개수가 적을때에도 학습하고자 하는 움직임은 학습할 수 있었으나 데이터의 개수가 극도로 적을 경우(N=5) 생성된 애니메이션 품질의 급격한 저하가 있었다.</p></li>
+</ul>
+</li>
+</ul>
+</section>
+</section>
+<section id="controllable-generation">
+<h3>5.4 Controllable Generation<a class="headerlink" href="#controllable-generation" title="Permalink to this heading">#</a></h3>
+<figure class="align-default" id="id15">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/exp-5.png"><img alt="experiments_5" class="bg-primary mb-1" src="../../_images/exp-5.png" style="width: 400px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 625 </span><span class="caption-text">Controllability of AnimateDiff</span><a class="headerlink" href="#id15" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>visual content와 motion prior의 개별 학습을 통해 AnimateDiff가 existing content를 조절할 수 있도록 했다. 이 특성을 확인하기 위해 AnimateDiff를 ControlNet과 결합하여 영상 생성시 depth를 통해 조절할 수 있도록 했다.</p>
+<p>DDIM inversion을 통해 다듬어진 latent sequences를 얻고 이를 비디오 생성에 사용하는 최신 비디오 수정 연구들과 비교하여 AnimateDiff는 randomly sampled noise를 이용하여 애니메이션을 생성한다.</p>
+</section>
+</section>
+<section id="conclusion">
+<h2>6. Conclusion<a class="headerlink" href="#conclusion" title="Permalink to this heading">#</a></h2>
+<p>본 논문에서는 애니메이션 생성을 위한 practical pipeline인 AnimateDiff를 제안한다. AnimateDiff를 통해 personalized text-to-image model을 바로 애니메이션 생성에 사용할 수 있다. 이를 위해 본 논문에서는 세가지 module을 디자인하였으며 이를 통해 AnimateDiff는 motion prior를 학습하고, visual quality를 유지할 수 있으며, MotionLoRA를 통해 가벼운 finetuning을 통해 원하는 motion으로 애니메이션을 생성할 수 있다.</p>
+<p>motion module은 한번 학습되면 다른 이미지를 animate시키고자 할때 사용할 수 있다. 다양한 실험 결과를 통해 AnimateDiff와 MotionLoRA의 효율성과 생성능력을 검증했다. 또 content-controllability측면에서도 추가적인 학습없이 본 논문의 방식을 사용할 수 있음을 보였다.</p>
+<p>AnimateDiff는 취향의 그림체, 캐릭터의 움직임, 카메라 워크에 맞게 이미지를 애니메이션화 시킬 수있는 효율적인 베이스 라인으로써 다양한 방면의 application에 큰 잠재력을 가지고 있다.</p>
+</section>
+<section id="id2">
+<h2>7. 실습<a class="headerlink" href="#id2" title="Permalink to this heading">#</a></h2>
+<p>아래 이미지들을 클릭하면 gif를 보실 수 있습니다.</p>
+<figure class="align-default" id="id16">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/0-side-view-photo-of-17-year-old-girl-in-a-japanese-school_edit.gif"><img alt="hands_on_1" class="bg-primary mb-1" src="../../_images/0-side-view-photo-of-17-year-old-girl-in-a-japanese-school_edit.gif" style="width: 400px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 626 </span><span class="caption-text">side-view-photo-of-17-year-old-girl-in-a-japanese-school<br />
+gpt로 생성한 그림을 input으로 사용함</span><a class="headerlink" href="#id16" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<figure class="align-default" id="id17">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/0-side-view-photo-of-man-in-black-padded-jumper,-hallway,_edit.gif"><img alt="hands_on_2" class="bg-primary mb-1" src="../../_images/0-side-view-photo-of-man-in-black-padded-jumper,-hallway,_edit.gif" style="width: 400px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 627 </span><span class="caption-text">side-view-photo-of-man-in-black-padded-jumper<br />
+직접 촬영한 사진을 input으로 사용함<br />
+입력한 사진의 인물의 인종이 유지되지 않았는데 학습데이터 셋의 불균형 때문으로 사료됨</span><a class="headerlink" href="#id17" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<figure class="align-default" id="id18">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/6-image-of-a-man-with-blonde-hair-and-blue-eyes,_edit.gif"><img alt="hands_on_3" class="bg-primary mb-1" src="../../_images/6-image-of-a-man-with-blonde-hair-and-blue-eyes,_edit.gif" style="width: 400px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 628 </span><span class="caption-text">image-of-a-man-with-blonde-hair-and-blue-eyes<br />
+gpt로 생성한 그림을 input으로 사용함</span><a class="headerlink" href="#id18" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<blockquote>
+<div><p>📌 실습 후 느낀점</p>
+<ul class="simple">
+<li><p>WebVid 10M이 애니메이션화에 적합한 데이터셋인지 잘 모르겠다.</p></li>
+<li><p>다양한 metric을 평가에 사용하지 않은 점이 아쉽다.</p></li>
+<li><p>특정 애니메이션 클립을 생성하고 싶다면 실질적으로 학습해야 하는 부분은 motionLoRA정도라 사용이 편리하다.</p></li>
+<li><p>reproduction이 매우 용이하다.</p></li>
+<li><p>AnimateDiff를 제대로 활용하기 위해서는 personalized T2I가 제일 중요한 부분이라고 할수 있는데, 원하는 스타일의 pretrained T2I 모델을 구하는 것이 어렵다. 그림체가 잘 맞지 않으면 애니메이션 클립 초반에 급격히 변화하는 부분이 자주 생긴다.</p></li>
+</ul>
+</div></blockquote>
+</section>
+</section>
+
+    <script type="text/x-thebe-config">
+    {
+        requestKernel: true,
+        binderOptions: {
+            repo: "binder-examples/jupyter-stacks-datascience",
+            ref: "master",
+        },
+        codeMirrorConfig: {
+            theme: "abcdef",
+            mode: "python"
+        },
+        kernelOptions: {
+            name: "python3",
+            path: "./docs\review"
+        },
+        predefinedOutput: true
+    }
+    </script>
+    <script>kernelName = 'python3'</script>
+
+                </article>
+              
+
+              
+              
+              
+              
+                <footer class="prev-next-footer d-print-none">
+                  
+<div class="prev-next-area">
+    <a class="left-prev"
+       href="VideoLDM.html"
+       title="previous page">
+      <i class="fa-solid fa-angle-left"></i>
+      <div class="prev-next-info">
+        <p class="prev-next-subtitle">previous</p>
+        <p class="prev-next-title">VideoLDM</p>
+      </div>
+    </a>
+    <a class="right-next"
+       href="Animate_Anyone.html"
+       title="next page">
+      <div class="prev-next-info">
+        <p class="prev-next-subtitle">next</p>
+        <p class="prev-next-title">Animate Anyone</p>
+      </div>
+      <i class="fa-solid fa-angle-right"></i>
+    </a>
+</div>
+                </footer>
+              
+            </div>
+            
+            
+              
+                <div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
+
+
+  <div class="sidebar-secondary-item">
+  <div class="page-toc tocsection onthispage">
+    <i class="fa-solid fa-list"></i> Contents
+  </div>
+  <nav class="bd-toc-nav page-toc">
+    <ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#abstract">0. Abstract</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">1. Introduction</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#related-work">2. Related Work</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#text-to-image-diffusion-models">2.1 Text-to-image diffusion models</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#personalizing-t2i-models">2.2 Personalizing T2I models</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#animating-personalized-t2is">2.3 Animating personalized T2Is</a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#preliminary">3. Preliminary</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#stable-diffusion">3.1 Stable Diffusion</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#low-rank-adaptation-lora">3.2 Low-Rank Adaptation(LoRA)</a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#id1">4. AnimateDiff</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#alleviate-negative-effects-from-training-data-with-domain-adapter">4.1 Alleviate Negative Effects from Training Data with Domain Adapter</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#learn-motion-priors-with-motion-module">4.2 Learn Motion Priors with Motion Module</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#adapt-to-new-motion-patterns-with-motionlora">4.3 Adapt to New Motion Patterns with MotionLoRA</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#animatediff-in-practice">4.4 AnimateDiff in Practice</a><ul class="nav section-nav flex-column">
+<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#training">Training</a></li>
+<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#inference">Inference</a></li>
+</ul>
+</li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#experiments">5. Experiments</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#qualitative-results">5.1 Qualitative Results</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#quantitative-comparison">5.2 Quantitative Comparison</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#ablation-study">5.3 Ablation Study</a><ul class="nav section-nav flex-column">
+<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#domain-adapter">Domain Adapter</a></li>
+<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#motion-module-design">Motion module design</a></li>
+<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#efficiency-of-motionlora">Efficiency of MotionLoRA</a></li>
+</ul>
+</li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#controllable-generation">5.4 Controllable Generation</a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#conclusion">6. Conclusion</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#id2">7. 실습</a></li>
+</ul>
+  </nav></div>
+
+</div></div>
+              
+            
+          </div>
+          <footer class="bd-footer-content">
+            
+<div class="bd-footer-content__inner container">
+  
+  <div class="footer-item">
+    
+<p class="component-author">
+By PseudoLab
+</p>
+
+  </div>
+  
+  <div class="footer-item">
+    
+
+  <p class="copyright">
+    
+      © Copyright 2022.
+      <br/>
+    
+  </p>
+
+  </div>
+  
+  <div class="footer-item">
+    
+  </div>
+  
+  <div class="footer-item">
+    
+  </div>
+  
+</div>
+          </footer>
+        
+
+      </main>
+    </div>
+  </div>
+  
+  <!-- Scripts loaded after <body> so the DOM is not blocked -->
+  <script src="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b"></script>
+<script src="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b"></script>
+
+  <footer class="bd-footer">
+  </footer>
+  </body>
 </html>
\ No newline at end of file
diff --git a/docs/review/Animate_Anyone.html b/docs/review/Animate_Anyone.html
old mode 100644
new mode 100755
index 1e1d7cb4..5bf37796
--- a/docs/review/Animate_Anyone.html
+++ b/docs/review/Animate_Anyone.html
@@ -1,1080 +1,1100 @@
-
-
-<!DOCTYPE html>
-
-
-<html lang="en" data-content_root="" >
-
-  <head>
-    <meta charset="utf-8" />
-    <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.18.1: http://docutils.sourceforge.net/" />
-
-    <title>Animate Anyone &#8212; Text-to-Image Generation-feat-Diffusion</title>
-  
-  
-  
-  <script data-cfasync="false">
-    document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
-    document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
-  </script>
-  
-  <!-- Loaded before other Sphinx assets -->
-  <link href="../../_static/styles/theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-<link href="../../_static/styles/bootstrap.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-<link href="../../_static/styles/pydata-sphinx-theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-
-  
-  <link href="../../_static/vendor/fontawesome/6.5.2/css/all.min.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-  <link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.woff2" />
-<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.woff2" />
-<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.woff2" />
-
-    <link rel="stylesheet" type="text/css" href="../../_static/pygments.css" />
-    <link rel="stylesheet" href="../../_static/styles/sphinx-book-theme.css?digest=14f4ca6b54d191a8c7657f6c759bf11a5fb86285" type="text/css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/togglebutton.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/mystnb.4510f1fc1dee50b3e5859aac5469c37c29e427902b24a333a5f9fcb2f0b3ac41.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/sphinx-thebe.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/design-style.4045f2051d55cab465a707391d5b2007.min.css" />
-  
-  <!-- Pre-loaded scripts that we'll load fully later -->
-  <link rel="preload" as="script" href="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b" />
-<link rel="preload" as="script" href="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b" />
-  <script src="../../_static/vendor/fontawesome/6.5.2/js/all.min.js?digest=dfe6caa3a7d634c4db9b"></script>
-
-    <script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
-    <script src="../../_static/jquery.js"></script>
-    <script src="../../_static/underscore.js"></script>
-    <script src="../../_static/_sphinx_javascript_frameworks_compat.js"></script>
-    <script src="../../_static/doctools.js"></script>
-    <script src="../../_static/clipboard.min.js"></script>
-    <script src="../../_static/copybutton.js"></script>
-    <script src="../../_static/scripts/sphinx-book-theme.js?digest=5a5c038af52cf7bc1a1ec88eea08e6366ee68824"></script>
-    <script>let toggleHintShow = 'Click to show';</script>
-    <script>let toggleHintHide = 'Click to hide';</script>
-    <script>let toggleOpenOnPrint = 'true';</script>
-    <script src="../../_static/togglebutton.js"></script>
-    <script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
-    <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
-    <script src="../../_static/design-tabs.js"></script>
-    <script>const THEBE_JS_URL = "https://unpkg.com/thebe@0.8.2/lib/index.js"
-const thebe_selector = ".thebe,.cell"
-const thebe_selector_input = "pre"
-const thebe_selector_output = ".output, .cell_output"
-</script>
-    <script async="async" src="../../_static/sphinx-thebe.js"></script>
-    <script>window.MathJax = {"options": {"processHtmlClass": "tex2jax_process|mathjax_process|math|output_area"}}</script>
-    <script defer="defer" src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
-    <script>DOCUMENTATION_OPTIONS.pagename = 'docs/review/Animate_Anyone';</script>
-    <link rel="shortcut icon" href="../../_static/PseudoLab_logo.png"/>
-    <link rel="index" title="Index" href="../../genindex.html" />
-    <link rel="search" title="Search" href="../../search.html" />
-    <link rel="next" title="DreaMoving" href="DreaMoving.html" />
-    <link rel="prev" title="AnimateDiff" href="AnimateDiff.html" />
-  <meta name="viewport" content="width=device-width, initial-scale=1"/>
-  <meta name="docsearch:language" content="en"/>
-  </head>
-  
-  
-  <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
-
-  
-  
-  <div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
-  
-  <div id="pst-scroll-pixel-helper"></div>
-  
-  <button type="button" class="btn rounded-pill" id="pst-back-to-top">
-    <i class="fa-solid fa-arrow-up"></i>Back to top</button>
-
-  
-  <input type="checkbox"
-          class="sidebar-toggle"
-          id="pst-primary-sidebar-checkbox"/>
-  <label class="overlay overlay-primary" for="pst-primary-sidebar-checkbox"></label>
-  
-  <input type="checkbox"
-          class="sidebar-toggle"
-          id="pst-secondary-sidebar-checkbox"/>
-  <label class="overlay overlay-secondary" for="pst-secondary-sidebar-checkbox"></label>
-  
-  <div class="search-button__wrapper">
-    <div class="search-button__overlay"></div>
-    <div class="search-button__search-container">
-<form class="bd-search d-flex align-items-center"
-      action="../../search.html"
-      method="get">
-  <i class="fa-solid fa-magnifying-glass"></i>
-  <input type="search"
-         class="form-control"
-         name="q"
-         id="search-input"
-         placeholder="Search this book..."
-         aria-label="Search this book..."
-         autocomplete="off"
-         autocorrect="off"
-         autocapitalize="off"
-         spellcheck="false"/>
-  <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
-</form></div>
-  </div>
-
-  <div class="pst-async-banner-revealer d-none">
-  <aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
-</div>
-
-  
-    <header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
-    </header>
-  
-
-  <div class="bd-container">
-    <div class="bd-container__inner bd-page-width">
-      
-      
-      
-      <div class="bd-sidebar-primary bd-sidebar">
-        
-
-  
-  <div class="sidebar-header-items sidebar-primary__section">
-    
-    
-    
-    
-  </div>
-  
-    <div class="sidebar-primary-items__start sidebar-primary__section">
-        <div class="sidebar-primary-item">
-
-  
-    
-  
-
-<a class="navbar-brand logo" href="../../intro.html">
-  
-  
-  
-  
-  
-    
-    
-      
-    
-    
-    <img src="../../_static/PseudoLab_logo.png" class="logo__image only-light" alt="Text-to-Image Generation-feat-Diffusion - Home"/>
-    <script>document.write(`<img src="../../_static/PseudoLab_logo.png" class="logo__image only-dark" alt="Text-to-Image Generation-feat-Diffusion - Home"/>`);</script>
-  
-  
-</a></div>
-        <div class="sidebar-primary-item"><nav class="bd-links" id="bd-docs-nav" aria-label="Main">
-    <div class="bd-toc-item navbar-nav active">
-        
-        <ul class="nav bd-sidenav bd-sidenav__home-link">
-            <li class="toctree-l1">
-                <a class="reference internal" href="../../intro.html">
-                    Welcome to PseudoDiffusers!!
-                </a>
-            </li>
-        </ul>
-        <p aria-level="2" class="caption" role="heading"><span class="caption-text">Preliminary Works</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="vae.html">VAE</a></li>
-<li class="toctree-l1"><a class="reference internal" href="gan.html">GAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DDPM.html">DDPM</a></li>
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="DDIM.html">DDIM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="A_Study_on_the_Evaluation_of_Generative_Models.html">A Study on the Evaluation of Generative Models</a></li>
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Image Generation</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="cycleGAN.html">CycleGAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="StyleGAN.html">StyleGAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="diffusion_beats_GANs.html">Diffusion Models Beat GANs on Image Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="dalle.html">DALL-E</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DALLE2.html">DALL-E 2</a></li>
-<li class="toctree-l1"><a class="reference internal" href="dreambooth.html">DreamBooth</a></li>
-<li class="toctree-l1"><a class="reference internal" href="ControlNet.html">ControlNet</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Latent_Diffusion_Model.html">Introduction</a></li>
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="Textual_Inversion.html">Textual Inversion</a></li>
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="CustomDiffusion.html">Custom Diffusion</a></li>
-<li class="toctree-l1"><a class="reference internal" href="LoRA.html">LoRA</a></li>
-
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="I-DDPM.html">I-DDPM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="StyO.html">StyO</a></li>
-<li class="toctree-l1"><a class="reference internal" href="imagen.html">Imagen</a></li>
-<li class="toctree-l1"><a class="reference internal" href="imagen_editor.html">Imagen Editor</a></li>
-<li class="toctree-l1"><a class="reference internal" href="SDEdit.html">SDEdit</a></li>
-<li class="toctree-l1"><a class="reference internal" href="SDXL.html">SDXL</a></li>
-<li class="toctree-l1"><a class="reference internal" href="t2i_adapter.html">T2I-Adapter</a></li>
-<li class="toctree-l1"><a class="reference internal" href="IP_Adapter.html">IP-Adapter</a></li>
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="HyperDreamBooth.html">HyperDreamBooth</a></li>
-<li class="toctree-l1"><a class="reference internal" href="CM3leon.html">CM3leon</a></li>
-
-<li class="toctree-l1"><a class="reference internal" href="Synthetic_Data_from_Diffusion_Models_Improves_ImageNet_Classification.html">Synthetic Data from Diffusion Models Improves ImageNet Classification</a></li>
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="GLIDE.html">GLIDE</a></li>
-<li class="toctree-l1"><a class="reference internal" href="BBDM.html">BBDM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Your_Diffusion_Model_is_Secretly_a_Zero_Shot_Classifier.html">Your Diffusion Model is Secretly a Zero-Shot Classifier</a></li>
-<li class="toctree-l1"><a class="reference internal" href="progressive_distillation.html">Progressive Distillation for Fast Sampling of Diffusion Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="ConceptLab.html">ConceptLab</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Diffusion_models_already_have_a_Semantic_Latent_Space.html">Diffusion Models already have a Semantic Latent Space</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Muse.html">Muse</a></li>
-
-
-<li class="toctree-l1"><a class="reference internal" href="GIGAGAN.html">Scaling up GANs for Text-to-Image Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="consistency_models.html">Consistency Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="latent_consistency_models.html">Latent Consistency Models</a></li>
-
-<li class="toctree-l1"><a class="reference internal" href="LLM_grounded_Diffusion.html">LLM Grounded Diffusion</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DiT.html">DiT</a></li>
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="one-step-image-translation.html">One-Step Image Translation with Text-to-Image Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="LCM-LoRA.html">LCM-LoRA: A Universal Stable-Diffusion Acceleration Module</a></li>
-
-
-<li class="toctree-l1"><a class="reference internal" href="MimicBrush.html">MimicBrush: Zero-shot Image Editing with Reference Imitation</a></li>
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Video Generation</span></p>
-<ul class="current nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="Make_A_Video.html">Make A Video</a></li>
-<li class="toctree-l1"><a class="reference internal" href="VideoLDM.html">VideoLDM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="AnimateDiff.html">AnimateDiff</a></li>
-<li class="toctree-l1 current active"><a class="current reference internal" href="#">Animate Anyone</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreaMoving.html">DreaMoving</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreamPose.html">DreamPose: Fashion Image-to-Video Synthesis via Stable Diffusion</a></li>
-
-
-
-
-
-
-
-
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">3D Generation</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="NeRF.html">NeRF : Representing Scenes as Neural Radiance Fields for View Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="3DGS.html">3D Gaussian Splatting for Real-Time Radiance Field Rendering</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Point_E.html">Point-E: A System for Generating 3D Point Clouds from Complex Prompts (Arxiv 2022)</a></li>
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="Shap-E.html">Shap-E</a></li>
-
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="DreamFusion.html"><strong>DreamFusion</strong></a></li>
-<li class="toctree-l1"><a class="reference internal" href="magic-3d.html">Magic3D</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreamBooth3D.html">Dream Booth 3D</a></li>
-
-
-
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Experiments</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="../experiments/js_exp.html">Synthetic Data with Stable Diffusion for Foliar Disease Classification</a></li>
-<li class="toctree-l1"><a class="reference internal" href="../experiments/swjo_exp.html">Training DreamBooth on Naver Webtoon Face Dataset</a></li>
-</ul>
-
-    </div>
-</nav></div>
-    </div>
-  
-  
-  <div class="sidebar-primary-items__end sidebar-primary__section">
-  </div>
-  
-  <div id="rtd-footer-container"></div>
-
-
-      </div>
-      
-      <main id="main-content" class="bd-main" role="main">
-        
-        
-
-<div class="sbt-scroll-pixel-helper"></div>
-
-          <div class="bd-content">
-            <div class="bd-article-container">
-              
-              <div class="bd-header-article d-print-none">
-<div class="header-article-items header-article__inner">
-  
-    <div class="header-article-items__start">
-      
-        <div class="header-article-item"><label class="sidebar-toggle primary-toggle btn btn-sm" for="__primary" title="Toggle primary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
-  <span class="fa-solid fa-bars"></span>
-</label></div>
-      
-    </div>
-  
-  
-    <div class="header-article-items__end">
-      
-        <div class="header-article-item">
-
-<div class="article-header-buttons">
-
-
-
-
-
-<div class="dropdown dropdown-source-buttons">
-  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Source repositories">
-    <i class="fab fa-github"></i>
-  </button>
-  <ul class="dropdown-menu">
-      
-      
-      
-      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion" target="_blank"
-   class="btn btn-sm btn-source-repository-button dropdown-item"
-   title="Source repository"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fab fa-github"></i>
-  </span>
-<span class="btn__text-container">Repository</span>
-</a>
-</li>
-      
-      
-      
-      
-      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion/issues/new?title=Issue%20on%20page%20%2Fdocs/review/Animate_Anyone.html&body=Your%20issue%20content%20here." target="_blank"
-   class="btn btn-sm btn-source-issues-button dropdown-item"
-   title="Open an issue"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-lightbulb"></i>
-  </span>
-<span class="btn__text-container">Open issue</span>
-</a>
-</li>
-      
-  </ul>
-</div>
-
-
-
-
-
-
-<div class="dropdown dropdown-download-buttons">
-  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Download this page">
-    <i class="fas fa-download"></i>
-  </button>
-  <ul class="dropdown-menu">
-      
-      
-      
-      <li><a href="../../_sources/docs/review/Animate_Anyone.md" target="_blank"
-   class="btn btn-sm btn-download-source-button dropdown-item"
-   title="Download source file"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-file"></i>
-  </span>
-<span class="btn__text-container">.md</span>
-</a>
-</li>
-      
-      
-      
-      
-      <li>
-<button onclick="window.print()"
-  class="btn btn-sm btn-download-pdf-button dropdown-item"
-  title="Print to PDF"
-  data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-file-pdf"></i>
-  </span>
-<span class="btn__text-container">.pdf</span>
-</button>
-</li>
-      
-  </ul>
-</div>
-
-
-
-
-<button onclick="toggleFullScreen()"
-  class="btn btn-sm btn-fullscreen-button"
-  title="Fullscreen mode"
-  data-bs-placement="bottom" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-expand"></i>
-  </span>
-
-</button>
-
-
-
-<script>
-document.write(`
-  <button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light"></i>
-    <i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark"></i>
-    <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"></i>
-  </button>
-`);
-</script>
-
-
-<script>
-document.write(`
-  <button class="btn btn-sm pst-navbar-icon search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <i class="fa-solid fa-magnifying-glass fa-lg"></i>
-  </button>
-`);
-</script>
-<label class="sidebar-toggle secondary-toggle btn btn-sm" for="__secondary"title="Toggle secondary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <span class="fa-solid fa-list"></span>
-</label>
-</div></div>
-      
-    </div>
-  
-</div>
-</div>
-              
-              
-
-<div id="jb-print-docs-body" class="onlyprint">
-    <h1>Animate Anyone</h1>
-    <!-- Table of contents -->
-    <div id="print-main-content">
-        <div id="jb-print-toc">
-            
-            <div>
-                <h2> Contents </h2>
-            </div>
-            <nav aria-label="Page">
-                <ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#abstract">Abstract</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">1. Introduction</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#related-works">2. Related Works</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#diffusion-model-for-image-generation">2.1 Diffusion Model for Image Generation</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#diffusion-model-for-video-generation">2.2 Diffusion Model for Video Generation</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#diffusion-model-for-human-image-animation">2.3 Diffusion Model for Human Image Animation</a></li>
-</ul>
-</li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#methods">3. Methods</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#preliminary-stable-diffusion">3.1 Preliminary: Stable Diffusion</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#network-architecture">3.2 Network Architecture</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#training-strategy">3.3 Training Strategy</a></li>
-</ul>
-</li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#experiments">4. Experiments</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#implementations">4.1 Implementations</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#qualitative-results">4.2 Qualitative Results</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#comparisons">4.3 Comparisons</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#ablation-study">4.4 Ablation study</a></li>
-</ul>
-</li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#limitations">5. Limitations</a></li>
-</ul>
-            </nav>
-        </div>
-    </div>
-</div>
-
-              
-                
-<div id="searchbox"></div>
-                <article class="bd-article">
-                  
-  <div class="admonition-information admonition">
-<p class="admonition-title">Information</p>
-<ul class="simple">
-<li><p><strong>Title:</strong> Animate Anyone: Consistent and Controllable Image-to-Video Synthesis for Character Animation</p></li>
-<li><p><strong>Reference</strong></p>
-<ul>
-<li><p>Paper: <a class="reference external" href="https://arxiv.org/abs/2311.17117">https://arxiv.org/abs/2311.17117</a></p></li>
-<li><p>Code:</p>
-<ul>
-<li><p><a class="reference external" href="https://github.com/HumanAIGC/AnimateAnyone">Official</a></p></li>
-<li><p><a class="reference external" href="https://github.com/guoqincode/Open-AnimateAnyone">NonOfficial</a></p></li>
-</ul>
-</li>
-<li><p>Project Page : <a class="reference external" href="https://humanaigc.github.io/animate-anyone/">https://humanaigc.github.io/animate-anyone/</a></p></li>
-</ul>
-</li>
-<li><p><strong>Author:</strong> Geonhak Song</p></li>
-<li><p><strong>Last updated on {March. 13, 2024}</strong></p></li>
-</ul>
-</div>
-<section class="tex2jax_ignore mathjax_ignore" id="animate-anyone">
-<h1>Animate Anyone<a class="headerlink" href="#animate-anyone" title="Permalink to this heading">#</a></h1>
-<figure class="align-default" id="id1">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/title_fig.png"><img alt="title_fig" class="bg-primary mb-1" src="../../_images/title_fig.png" style="width: 800px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 617 </span><span class="caption-text">Animate Anyone Example Figure</span><a class="headerlink" href="#id1" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<section id="abstract">
-<h2>Abstract<a class="headerlink" href="#abstract" title="Permalink to this heading">#</a></h2>
-<ul class="simple">
-<li><p>Diffusion 모델들이 visual generation 연구에 주류가 되었지만, image-to-video 영역에서는 어려움이 있다. 특히, character animation에서 캐릭터의 상세 정보의 일관성을 유지하는 것은 큰 문제이다.</p></li>
-<li><p>reference image의 복잡한 appearance 특징의 일관성을 유지하기 위해서 spatial attention feature과 통합할 <strong>ReferenceNet</strong> 설계</p></li>
-<li><p>controllability와 continuity을 위해서 효과적인 <strong>pose guider</strong> 도입.</p></li>
-<li><p>비디오 프레임간 부드러운 전이를 위해 효과적인 effective <strong>temporal modeling</strong> 도입</p></li>
-<li><p>이를 통해 어떠한 임의의 캐릭터에 대해서도 animate할 수 있고 우월성을 보임</p></li>
-</ul>
-</section>
-<section id="introduction">
-<h2>1. Introduction<a class="headerlink" href="#introduction" title="Permalink to this heading">#</a></h2>
-<p><strong>Character Animation History</strong></p>
-<ul class="simple">
-<li><p>Character Animation은 source character 이미지로부터 사실적인 비디오를 animate하는 작업으로 GAN을 시작으로 많은 연구가 진행되어왔다.</p></li>
-<li><p>그러나 생성된 이미지 또는 비디오는 local distortion, blurred details, semantic inconsistency,  temporal instability 문제가 있어 널리 사용되기에는 어려움이 있어왔다.</p></li>
-</ul>
-<p><strong>Diffusion 기반 image-to-video 예시</strong></p>
-<ul class="simple">
-<li><p>최근 diffusion model의 우수성에 따라 image-to-video task에 diffusion model을 활용하려는 연구들이 보였다.</p></li>
-<li><p>DreamPose (23.04)</p>
-<ul>
-<li><p>Stable Diffusion을 확장한 fashion image-to-video 합성을 가능하는데 초점을 맞췄다.</p></li>
-<li><p>본 모델은 CLIP과 VAE feature를 통합한 adpatar module를 제안했다.</p></li>
-<li><p>그러나 consistent 결과를 위해서 input sample에 대해 추가 finetuning이 필요하고 운용 효율이 떨어진다.</p></li>
-</ul>
-</li>
-<li><p>DisCO (23.07)</p>
-<ul>
-<li><p>Stable Diffusion을 수정하여 human dance generation 진행</p></li>
-<li><p>CLIP과 ControlNet을 활용한 통합 모델 구축</p></li>
-<li><p>그러나 character detail 보존에 어려움을 겪고 frame간 jittering issue 존재</p></li>
-</ul>
-</li>
-</ul>
-<p><strong>Character Animation 관점에서의 Text-to-image generation 한계</strong></p>
-<ul class="simple">
-<li><p>text-to-image generation &amp; video generation에 시각적 품질과 다양성에 큰 진전이 있어왔지만, 복잡한 detail을 잘 살리는 것이 어렵고 정확도 측면에서도 부정확한 부분이 있다.</p></li>
-<li><p>더욱이, 실질적 character 움직임을 다룰 때, 일관성 측면에서 안정적이고 연속적인 영상을 만들어내는 것이 어렵다.</p></li>
-<li><p>현재는 일반성과 일관성을 동시에 만족하는 character animation 방법을 찾을 수 없어 본 논문에서 Animate Anyone 방법을 제안한다.</p></li>
-</ul>
-<p><strong>Animate Anyone 모델 구조 요약</strong></p>
-<ul class="simple">
-<li><p>appearance consistency를 위한 <strong>ReferenceNet</strong> 도입.</p>
-<ul>
-<li><p>spatial attention를 사용하는 UNet으로 ReferenceNet feature과 통합</p></li>
-<li><p>이는 모델로 하여금 일관된 feature space에서 reference image의 관계성을 종합적으로 학습하게 함</p></li>
-</ul>
-</li>
-<li><p>pose controllability를 위한 <strong>lightweight pose guider</strong> 도입.</p>
-<ul>
-<li><p>효과적인 pose control signal을 denoising 절차에 통합함.</p></li>
-</ul>
-</li>
-<li><p>temporal stability를 위한 <strong>temporal layer</strong> 도입</p>
-<ul>
-<li><p>연속적이고 부드러운 temporal motion process와 동시에 고해상도 detail quality 보존을 위한 frame간 관계성 학습</p></li>
-</ul>
-</li>
-</ul>
-<p><strong>제안 모델의 결과</strong></p>
-<ul class="simple">
-<li><p>5K character video clip 인터넷 데이터 세트로 훈련</p></li>
-<li><p>장점 1) character appearance의 spatial &amp; temporal consistency을 효과적으로 유지</p></li>
-<li><p>장점 2) temporal jitter &amp; flickering과 같은 문제 없는 높은 신뢰도의 비디오 생성</p></li>
-<li><p>장점 3) 어떠한 character image에도 animation video 생성 가능</p></li>
-<li><p>benchmark에 대한 결과 또한 우수성 증명</p></li>
-</ul>
-</section>
-<section id="related-works">
-<h2>2. Related Works<a class="headerlink" href="#related-works" title="Permalink to this heading">#</a></h2>
-<section id="diffusion-model-for-image-generation">
-<h3>2.1 Diffusion Model for Image Generation<a class="headerlink" href="#diffusion-model-for-image-generation" title="Permalink to this heading">#</a></h3>
-<p>T2I model</p>
-<ol class="arabic simple">
-<li><p>LDM : latent space에서의 denoising 진행.</p></li>
-<li><p>ControlNet, T2I-Adapter : pose, mask, edge, depth와 같은 추가 조건부 생성을 위한 추가 encoding layer 사용</p></li>
-</ol>
-<p>IP-Adapter : image prompt 기반의 content 결과 생성</p>
-<p>ObjectStitch, Paint-by-Example : CLIP을 활용한 image editing 방법</p>
-<p>TryonDiffusion : virtual apparel try on을 위한 parallel u-net 구조 도입</p>
-</section>
-<section id="diffusion-model-for-video-generation">
-<h3>2.2 Diffusion Model for Video Generation<a class="headerlink" href="#diffusion-model-for-video-generation" title="Permalink to this heading">#</a></h3>
-<p>T2V Model : T2I 모델 기반 inter-frame attention modeling을 통한 연구가 많이 이뤄짐.</p>
-<p>Video LDM : temporal layer를 삽입한 T2I 모델 기반 video generation model</p>
-<p>AnimateDiff : personalized T2I model을 활용한 motion module을 많은 video data로 학습시킨 모델</p>
-<p>→ Animate Anyone에서는 본 temporal modeling에 영향을 받아 해당 방법론 사용</p>
-<p>I2V Model</p>
-<p>VideoComposer : conditional control</p>
-<p>AnimateDiff : image latent과 random noise 간 weight mixing</p>
-<p>VideoCrafter : CLIP의 textual &amp; visual feature를 통합하여 cross-attention에 주입</p>
-<p>그러나 해당 방법들 모두 안정적인 사람 video 생성에는 어려움이 존재.</p>
-</section>
-<section id="diffusion-model-for-human-image-animation">
-<h3>2.3 Diffusion Model for Human Image Animation<a class="headerlink" href="#diffusion-model-for-human-image-animation" title="Permalink to this heading">#</a></h3>
-<p>Image Animation</p>
-<p>PIDM, LFDM, LEO,</p>
-<p>DreamPose, DisCo</p>
-</section>
-</section>
-<section id="methods">
-<h2>3. Methods<a class="headerlink" href="#methods" title="Permalink to this heading">#</a></h2>
-<p>목표 : character animation을 위한 pose-guided image-to-video 합성</p>
-<section id="preliminary-stable-diffusion">
-<h3>3.1 Preliminary: Stable Diffusion<a class="headerlink" href="#preliminary-stable-diffusion" title="Permalink to this heading">#</a></h3>
-<figure class="align-default" id="id2">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/eq_1.png"><img alt="eq_1" class="bg-primary mb-1" src="../../_images/eq_1.png" style="width: 800px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 618 </span><span class="caption-text">Eq (1) Stable Diffusion Objective</span><a class="headerlink" href="#id2" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p><span class="math notranslate nohighlight">\(\epsilon_\theta\)</span> : UNet func</p>
-<p><span class="math notranslate nohighlight">\(c\)</span> : conditional embedding</p>
-<p><span class="math notranslate nohighlight">\(z\)</span> : image latent</p>
-<p><span class="math notranslate nohighlight">\(t\)</span> : timestep</p>
-<p><span class="math notranslate nohighlight">\(z_t\)</span> : noise latent</p>
-<p>CLIP ViT-L/14 text encoder</p>
-<p>denoising UNet : 4 downsample layers , 1 middle layer, 4 upsample layers.</p>
-<p>각 Res-Trans block별 2D convolution, self-attention, cross-attention로 구성</p>
-</section>
-<section id="network-architecture">
-<h3>3.2 Network Architecture<a class="headerlink" href="#network-architecture" title="Permalink to this heading">#</a></h3>
-<p><strong>Overview</strong></p>
-<figure class="align-default" id="id3">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/figure_2.png"><img alt="figure_2" class="bg-primary mb-1" src="../../_images/figure_2.png" style="width: 800px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 619 </span><span class="caption-text">Figure 2 Animate Anyone Overview</span><a class="headerlink" href="#id3" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>3가지 중요 요소 통합</p>
-<ol class="arabic simple">
-<li><p>ReferenceNet : reference image로부터 character의 appearance features encoding</p></li>
-<li><p>Pose Guider : 제어가능한 character movements를 위한 motion control signal encoding</p></li>
-<li><p>Temporal layer : character motion 연속성을 위한 temporal relationship encoding</p></li>
-</ol>
-<p><strong>ReferenceNet</strong></p>
-<ul class="simple">
-<li><p>text보다 image가 더 low-level detailed feature를 통한 일관성 유지 정보를 내포함.</p></li>
-<li><p>이에 따라 최근 CLIP image encoder가 text encoder보다 많이 사용되었지만, detail consistency에는 역부족</p>
-<ul>
-<li><p>이유 1: CLIP image encoder는 224x224의 저해상도 이미지들로 구성되어 중요한 세부정보 손실이 있을 수 있다.</p></li>
-<li><p>이유 2: CLIP은 text에 더욱 부합하게 훈련되어 high-level feature matching에 강조되고 이에 따라 feature encoding에 있어 detail feature에 부족함이 존재</p></li>
-</ul>
-</li>
-<li><p>이에 따라 reference image feature extraction network인 ReferenceNet 고안 (이때 temporal layer 제외)</p></li>
-<li><p>ReferenceNet은 SD로 초기화하고 각각 독립적으로 update 수행하고 UNet과 통합</p></li>
-<li><p>self-attention layer를 spatial attention layer로 변경</p></li>
-<li><p>Feature map : <span class="math notranslate nohighlight">\(x_1 \in \mathcal{R}^{t \times h \times w \times c }\)</span> (UNet ), <span class="math notranslate nohighlight">\(x_2 \in \mathcal{R}^{h \times w \times c }\)</span> (ReferenceNet) 이 주어졌을 때, <span class="math notranslate nohighlight">\(x_2\)</span>를 t번 곱해 w축에 따라 <span class="math notranslate nohighlight">\(x_1\)</span>과 concat</p></li>
-<li><p>self-attention을 수행하고 feature map의 반을 결과로 뽑음.</p></li>
-<li><p>2가지 장점</p>
-<ul>
-<li><ol class="arabic simple">
-<li><p>사전 학습된 image feature model SD를 사용함에 따라 <strong>초기값이 잘 정의</strong>된 것 사용가능.</p></li>
-</ol>
-</li>
-<li><ol class="arabic simple" start="2">
-<li><p>UNet과 ReferenceNet의 초기값이 공유되고 동일한 네트워크 구조를 가짐에 따라 UNet은 (동일한 feature space에 상관관계가 있는) ReferenceNet feature 중 선별적으로 feature 학습이 가능</p></li>
-</ol>
-</li>
-</ul>
-</li>
-<li><p>CLIP image encoder를 cross-attention에 도입</p>
-<ul>
-<li><p>reference image의 semantic feature를 제공함에 따라 신속한 전체 네트워크 훈련 초기값 설정 가능.</p></li>
-</ul>
-</li>
-<li><p>ControlNet은 target image와 공간적으로 align된 정보를 활용 → 부적합</p></li>
-<li><p>본 방법에서는 reference image와 target image가 공간적으로는 관계되어있지만, align되지 않음.</p></li>
-<li><p>타 diffusion 기반 video generation에서는 모든 video frame에 대해 denoising을 진행</p></li>
-<li><p>ReferenceNet은 feature 추출할 때 한 번만 필요</p></li>
-<li><p>효과 : inference 단계에서 계산량이 증가하지 않는다.</p></li>
-</ul>
-<p><strong>Pose Guider</strong></p>
-<ul class="simple">
-<li><p>ControlNet은 robust한 conditional 생성을 입증해왔지만, 추가 Fine-tuning이 필요했었다.</p></li>
-<li><p>저자들은 추가적인 계산량 증가를 막기위해 추가적인 control network를 통합하지 않고 lightweight Pose Guider 도입</p></li>
-<li><p>noise latent와 동일 해상도를 가지는 pose 이미지 align을 위해 four convolution layers (4×4 kernels, 2×2 strides, using 16,32,64,128 channels) 사용</p></li>
-<li><p>Gaussian weights 초기화, final projection layer에서 zero convolution 도입.</p></li>
-</ul>
-<p><strong>Temporal Layer</strong></p>
-<ul class="simple">
-<li><p>이미 많은 곳에서 T2I 모델에 temporal layer를 통합했을 때 frame간 temporal dependency가 가능함을 보임.</p></li>
-<li><p>본 방법에서는 U-Net 내 Res-Trans block 안에 있는 spatial-attention과 cross-attention 진행 후에 temporal layer 추가</p></li>
-<li><p>순서 1) reshape : <span class="math notranslate nohighlight">\(x \in \mathcal{R}^{b \times t \times h \times w \times c }\)</span> → <span class="math notranslate nohighlight">\(x \in \mathcal{R}^{(b \times h \times w) \times t \times c }\)</span></p></li>
-<li><p>순서 2) temporal attention 수행 → residual connection</p></li>
-<li><p>효과 : appearance details에 대한 temporal smoothness &amp; continuity</p></li>
-</ul>
-</section>
-<section id="training-strategy">
-<h3>3.3 Training Strategy<a class="headerlink" href="#training-strategy" title="Permalink to this heading">#</a></h3>
-<ul class="simple">
-<li><p>훈련 두 단계</p></li>
-<li><p>첫 번째 단계</p>
-<ul>
-<li><p>temporal layer를 제외한 single-frame noise를 입력으로 받는 모델 학습</p></li>
-<li><p>ReferenceNet &amp; Pose Guider</p></li>
-<li><p>reference 이미지는 전체 비디오 클립에서 랜덤으로 선택</p></li>
-<li><p>초기 weight는 사전학습된 SD weight</p></li>
-<li><p>Pose Guider는 마지막 projection layer를 제외한 모든 layer gaussian weight 초기화</p></li>
-<li><p>VAE Encoder, Decoder, CLIP image encoder 는 그대로</p></li>
-</ul>
-</li>
-<li><p>두 번째 단계</p>
-<ul>
-<li><p>첫 번째 단계에서 훈련한 모델 속 temporal layer만 훈련</p></li>
-<li><p>temporal layer 초기값 : AnimateDiff pretrained weight</p></li>
-<li><p>입력 : 24frame video clip</p></li>
-</ul>
-</li>
-</ul>
-</section>
-</section>
-<section id="experiments">
-<h2>4. Experiments<a class="headerlink" href="#experiments" title="Permalink to this heading">#</a></h2>
-<section id="implementations">
-<h3>4.1 Implementations<a class="headerlink" href="#implementations" title="Permalink to this heading">#</a></h3>
-<ul class="simple">
-<li><p>Data : 5K character video clips (2-10 seconds long) 인터넷에서 다운로드</p></li>
-<li><p>Pose Estimation Model : DWPose(Distillation for Whole-body Pose estimator) (23.07) <a class="github reference external" href="https://github.com/IDEA-Research/DWPose">IDEA-Research/DWPose</a>
-(the student’s head with only 20% training time as a plug-and-play training strategy)</p></li>
-<li><p>GPU : 4 NVIDIA A100 GPUs</p></li>
-<li><p>첫 번째 훈련 단계 : 768×768 해상도 video frame sampled, resized, and center-cropped 30,000 steps, batch size 64.</p></li>
-<li><p>두 번째 훈련 단계 : temporal layer 10,000 steps 24-frame video sequences, batch size 4.</p></li>
-<li><p>learning rates : 1e-5.</p></li>
-<li><p>Inference 단계 : reference image의 캐릭터 skeleton의 길이에 근사하기 위해서 유도된 pose skeleton의 길이 rescale</p></li>
-<li><p>DDIM sampler, 20 steps</p></li>
-<li><p>긴 영상 생성을 위해 temporal aggregation method 채택</p></li>
-<li><p>Evaluation : benchmark dataset 2개(UBC fashion video dataset, Tik-Tok dataset) 사용</p></li>
-</ul>
-</section>
-<section id="qualitative-results">
-<h3>4.2 Qualitative Results<a class="headerlink" href="#qualitative-results" title="Permalink to this heading">#</a></h3>
-<figure class="align-default" id="id4">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/figure_3.png"><img alt="figure_3" class="bg-primary mb-1" src="../../_images/figure_3.png" style="width: 800px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 620 </span><span class="caption-text">Figure 3 Qualitative Results</span><a class="headerlink" href="#id4" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p>전신이 나오는 임의의 characters, 절반 길이의 portraits, cartoon characters, humanoid characters에 대해 animation</p></li>
-<li><p>reference image와 유사한 temporal consistency를 보이는 사실적인 결과 생성</p></li>
-</ul>
-</section>
-<section id="comparisons">
-<h3>4.3 Comparisons<a class="headerlink" href="#comparisons" title="Permalink to this heading">#</a></h3>
-<ul class="simple">
-<li><p>SSIM, PSNR, LPIPS, FVD(Fréchet <em><strong>Video</strong></em> Distance)</p></li>
-</ul>
-<p><strong>Fashion Video Synthesis</strong></p>
-<figure class="align-default" id="id5">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/table1.png"><img alt="table1" class="bg-primary mb-1" src="../../_images/table1.png" style="width: 800px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 621 </span><span class="caption-text">Table 1 Quantitative Comparison for fashion video synthesis</span><a class="headerlink" href="#id5" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p>Quantitative comparison - Table 1</p>
-<ul>
-<li><p>UBC fashion video dataset
-(500 training &amp; 100 testing videos로 구성, 각 video 약 500 frames)</p></li>
-</ul>
-</li>
-</ul>
-<figure class="align-default" id="id6">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/figure_4.png"><img alt="figure_4" class="bg-primary mb-1" src="../../_images/figure_4.png" style="width: 800px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 622 </span><span class="caption-text">Figure 4 Qualitative comparison for fashion video synthesis</span><a class="headerlink" href="#id6" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p>DreamPose &amp; BDMM은 옷의 일관성을 잃어버리는 문제. 색과 섬세한 구조적 요소에 대한 error 발생</p></li>
-<li><p>반면, 제안 방법은 옷의 세부 내용까지 일관성있게 보존됨.</p></li>
-</ul>
-<p><strong>Human Dance Generation</strong></p>
-<figure class="align-default" id="id7">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/table2.png"><img alt="table2" class="bg-primary mb-1" src="../../_images/table2.png" style="width: 800px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 623 </span><span class="caption-text">Table 2 Quantitative comparison for human dance generation</span><a class="headerlink" href="#id7" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p>Quantitative comparison - Table 2</p>
-<ul>
-<li><p>TikTok dataset
-(340 training &amp; 100 testing single human dancing videos (10-15s))</p></li>
-</ul>
-</li>
-</ul>
-<figure class="align-default" id="id8">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/figure_5.png"><img alt="figure_5" class="bg-primary mb-1" src="../../_images/figure_5.png" style="width: 800px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 624 </span><span class="caption-text">Figure 5 Qualitative comparison between DisCo and Animate Anyone method</span><a class="headerlink" href="#id8" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p>DisCo에서는 인물 foreground mask를 위해 SAM 활용하는 pipeline 활용</p></li>
-<li><p>그러나 본 방법에서는 masking 없이 모델 자체가 subject motion으로부터 전경과 배경의 구분 가능</p></li>
-<li><p>복잡한 dance sequence에서도 시각적으로 연속적인 motion을 보여줌. robustness</p></li>
-</ul>
-<p><strong>General Image-to-Video Methods</strong></p>
-<figure class="align-default" id="id9">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/figure_6.png"><img alt="figure_6" class="bg-primary mb-1" src="../../_images/figure_6.png" style="width: 800px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 625 </span><span class="caption-text">Figure 6 Qualitative comparison with image-to-video methods</span><a class="headerlink" href="#id9" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p>비교 모델 : AnimateDiff &amp; Gen-2</p></li>
-<li><p>reference image에 대한 외관 신뢰도만 비교</p></li>
-<li><p>image-to-video 방법은 얼굴이 일관되게 유지되는 문제에 봉착된 상황 속에서 다른 모델 대비 제안 모델이 긴 시간동안 apperance consistency 유지</p></li>
-</ul>
-</section>
-<section id="ablation-study">
-<h3>4.4 Ablation study<a class="headerlink" href="#ablation-study" title="Permalink to this heading">#</a></h3>
-<figure class="align-default" id="id10">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/figure_7.png"><img alt="figure_7" class="bg-primary mb-1" src="../../_images/figure_7.png" style="width: 800px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 626 </span><span class="caption-text">Figure 7 Ablation study of different design</span><a class="headerlink" href="#id10" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<figure class="align-default" id="id11">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/table_3.png"><img alt="table_3" class="bg-primary mb-1" src="../../_images/table_3.png" style="width: 800px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 627 </span><span class="caption-text">Table 3 Quantitative comparison for ablation study</span><a class="headerlink" href="#id11" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p>ReferenceNet design 효과성 증명을 위한 Ablation study</p>
-<ul>
-<li><p>(1) CLIP image encoder만 사용</p></li>
-<li><p>(2) 초기 finetuning SD 이후 reference image 기반 ControlNet training</p></li>
-<li><p>(3) 위 2 방법론 통합</p></li>
-</ul>
-</li>
-<li><p>결론 : ReferenceNet를 사용하는 것이 모든 방법 대비 가장 좋았다.</p></li>
-</ul>
-</section>
-</section>
-<section id="limitations">
-<h2>5. Limitations<a class="headerlink" href="#limitations" title="Permalink to this heading">#</a></h2>
-<ul class="simple">
-<li><ol class="arabic simple">
-<li><p>손의 안정적인 움직임을 보이는 것에 어려움을 보임. 가끔 왜곡, motion blur 발생</p></li>
-</ol>
-</li>
-<li><ol class="arabic simple" start="2">
-<li><p>제공하는 이미지는 한 측면만 보이기 때문에 보이지 않은 부분에 대해서는 ill-posed problem으로 불안정</p></li>
-</ol>
-</li>
-<li><ol class="arabic simple" start="3">
-<li><p>DDPM 활용에 따른 non-diffusion 기반 모델 대비 낮은 operational efficiency</p></li>
-</ol>
-</li>
-</ul>
-</section>
-</section>
-
-    <script type="text/x-thebe-config">
-    {
-        requestKernel: true,
-        binderOptions: {
-            repo: "binder-examples/jupyter-stacks-datascience",
-            ref: "master",
-        },
-        codeMirrorConfig: {
-            theme: "abcdef",
-            mode: "python"
-        },
-        kernelOptions: {
-            name: "python3",
-            path: "./docs/review"
-        },
-        predefinedOutput: true
-    }
-    </script>
-    <script>kernelName = 'python3'</script>
-
-                </article>
-              
-
-              
-              
-              
-              
-                <footer class="prev-next-footer d-print-none">
-                  
-<div class="prev-next-area">
-    <a class="left-prev"
-       href="AnimateDiff.html"
-       title="previous page">
-      <i class="fa-solid fa-angle-left"></i>
-      <div class="prev-next-info">
-        <p class="prev-next-subtitle">previous</p>
-        <p class="prev-next-title">AnimateDiff</p>
-      </div>
-    </a>
-    <a class="right-next"
-       href="DreaMoving.html"
-       title="next page">
-      <div class="prev-next-info">
-        <p class="prev-next-subtitle">next</p>
-        <p class="prev-next-title">DreaMoving</p>
-      </div>
-      <i class="fa-solid fa-angle-right"></i>
-    </a>
-</div>
-                </footer>
-              
-            </div>
-            
-            
-              
-                <div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
-
-
-  <div class="sidebar-secondary-item">
-  <div class="page-toc tocsection onthispage">
-    <i class="fa-solid fa-list"></i> Contents
-  </div>
-  <nav class="bd-toc-nav page-toc">
-    <ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#abstract">Abstract</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">1. Introduction</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#related-works">2. Related Works</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#diffusion-model-for-image-generation">2.1 Diffusion Model for Image Generation</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#diffusion-model-for-video-generation">2.2 Diffusion Model for Video Generation</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#diffusion-model-for-human-image-animation">2.3 Diffusion Model for Human Image Animation</a></li>
-</ul>
-</li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#methods">3. Methods</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#preliminary-stable-diffusion">3.1 Preliminary: Stable Diffusion</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#network-architecture">3.2 Network Architecture</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#training-strategy">3.3 Training Strategy</a></li>
-</ul>
-</li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#experiments">4. Experiments</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#implementations">4.1 Implementations</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#qualitative-results">4.2 Qualitative Results</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#comparisons">4.3 Comparisons</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#ablation-study">4.4 Ablation study</a></li>
-</ul>
-</li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#limitations">5. Limitations</a></li>
-</ul>
-  </nav></div>
-
-</div></div>
-              
-            
-          </div>
-          <footer class="bd-footer-content">
-            
-<div class="bd-footer-content__inner container">
-  
-  <div class="footer-item">
-    
-<p class="component-author">
-By PseudoLab
-</p>
-
-  </div>
-  
-  <div class="footer-item">
-    
-
-  <p class="copyright">
-    
-      © Copyright 2022.
-      <br/>
-    
-  </p>
-
-  </div>
-  
-  <div class="footer-item">
-    
-  </div>
-  
-  <div class="footer-item">
-    
-  </div>
-  
-</div>
-          </footer>
-        
-
-      </main>
-    </div>
-  </div>
-  
-  <!-- Scripts loaded after <body> so the DOM is not blocked -->
-  <script src="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b"></script>
-<script src="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b"></script>
-
-  <footer class="bd-footer">
-  </footer>
-  </body>
+
+
+<!DOCTYPE html>
+
+
+<html lang="en" data-content_root="" >
+
+  <head>
+    <meta charset="utf-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.17.1: http://docutils.sourceforge.net/" />
+
+    <title>Animate Anyone &#8212; Text-to-Image Generation-feat-Diffusion</title>
+  
+  
+  
+  <script data-cfasync="false">
+    document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
+    document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
+  </script>
+  
+  <!-- Loaded before other Sphinx assets -->
+  <link href="../../_static/styles/theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+<link href="../../_static/styles/bootstrap.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+<link href="../../_static/styles/pydata-sphinx-theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+
+  
+  <link href="../../_static/vendor/fontawesome/6.5.2/css/all.min.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+  <link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.woff2" />
+<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.woff2" />
+<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.woff2" />
+
+    <link rel="stylesheet" type="text/css" href="../../_static/pygments.css" />
+    <link rel="stylesheet" href="../../_static/styles/sphinx-book-theme.css?digest=14f4ca6b54d191a8c7657f6c759bf11a5fb86285" type="text/css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/togglebutton.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/mystnb.4510f1fc1dee50b3e5859aac5469c37c29e427902b24a333a5f9fcb2f0b3ac41.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/sphinx-thebe.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/design-style.4045f2051d55cab465a707391d5b2007.min.css" />
+  
+  <!-- Pre-loaded scripts that we'll load fully later -->
+  <link rel="preload" as="script" href="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b" />
+<link rel="preload" as="script" href="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b" />
+  <script src="../../_static/vendor/fontawesome/6.5.2/js/all.min.js?digest=dfe6caa3a7d634c4db9b"></script>
+
+    <script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
+    <script src="../../_static/jquery.js"></script>
+    <script src="../../_static/underscore.js"></script>
+    <script src="../../_static/_sphinx_javascript_frameworks_compat.js"></script>
+    <script src="../../_static/doctools.js"></script>
+    <script src="../../_static/clipboard.min.js"></script>
+    <script src="../../_static/copybutton.js"></script>
+    <script src="../../_static/scripts/sphinx-book-theme.js?digest=5a5c038af52cf7bc1a1ec88eea08e6366ee68824"></script>
+    <script>let toggleHintShow = 'Click to show';</script>
+    <script>let toggleHintHide = 'Click to hide';</script>
+    <script>let toggleOpenOnPrint = 'true';</script>
+    <script src="../../_static/togglebutton.js"></script>
+    <script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
+    <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
+    <script src="../../_static/design-tabs.js"></script>
+    <script>const THEBE_JS_URL = "https://unpkg.com/thebe@0.8.2/lib/index.js"
+const thebe_selector = ".thebe,.cell"
+const thebe_selector_input = "pre"
+const thebe_selector_output = ".output, .cell_output"
+</script>
+    <script async="async" src="../../_static/sphinx-thebe.js"></script>
+    <script>window.MathJax = {"options": {"processHtmlClass": "tex2jax_process|mathjax_process|math|output_area"}}</script>
+    <script defer="defer" src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
+    <script>DOCUMENTATION_OPTIONS.pagename = 'docs/review/Animate_Anyone';</script>
+    <link rel="shortcut icon" href="../../_static/PseudoLab_logo.png"/>
+    <link rel="index" title="Index" href="../../genindex.html" />
+    <link rel="search" title="Search" href="../../search.html" />
+    <link rel="next" title="DreaMoving" href="DreaMoving.html" />
+    <link rel="prev" title="AnimateDiff" href="AnimateDiff.html" />
+  <meta name="viewport" content="width=device-width, initial-scale=1"/>
+  <meta name="docsearch:language" content="en"/>
+  </head>
+  
+  
+  <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
+
+  
+  
+  <div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
+  
+  <div id="pst-scroll-pixel-helper"></div>
+  
+  <button type="button" class="btn rounded-pill" id="pst-back-to-top">
+    <i class="fa-solid fa-arrow-up"></i>Back to top</button>
+
+  
+  <input type="checkbox"
+          class="sidebar-toggle"
+          id="pst-primary-sidebar-checkbox"/>
+  <label class="overlay overlay-primary" for="pst-primary-sidebar-checkbox"></label>
+  
+  <input type="checkbox"
+          class="sidebar-toggle"
+          id="pst-secondary-sidebar-checkbox"/>
+  <label class="overlay overlay-secondary" for="pst-secondary-sidebar-checkbox"></label>
+  
+  <div class="search-button__wrapper">
+    <div class="search-button__overlay"></div>
+    <div class="search-button__search-container">
+<form class="bd-search d-flex align-items-center"
+      action="../../search.html"
+      method="get">
+  <i class="fa-solid fa-magnifying-glass"></i>
+  <input type="search"
+         class="form-control"
+         name="q"
+         id="search-input"
+         placeholder="Search this book..."
+         aria-label="Search this book..."
+         autocomplete="off"
+         autocorrect="off"
+         autocapitalize="off"
+         spellcheck="false"/>
+  <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
+</form></div>
+  </div>
+
+  <div class="pst-async-banner-revealer d-none">
+  <aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
+</div>
+
+  
+    <header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
+    </header>
+  
+
+  <div class="bd-container">
+    <div class="bd-container__inner bd-page-width">
+      
+      
+      
+      <div class="bd-sidebar-primary bd-sidebar">
+        
+
+  
+  <div class="sidebar-header-items sidebar-primary__section">
+    
+    
+    
+    
+  </div>
+  
+    <div class="sidebar-primary-items__start sidebar-primary__section">
+        <div class="sidebar-primary-item">
+
+  
+    
+  
+
+<a class="navbar-brand logo" href="../../intro.html">
+  
+  
+  
+  
+  
+    
+    
+      
+    
+    
+    <img src="../../_static/PseudoLab_logo.png" class="logo__image only-light" alt="Text-to-Image Generation-feat-Diffusion - Home"/>
+    <script>document.write(`<img src="../../_static/PseudoLab_logo.png" class="logo__image only-dark" alt="Text-to-Image Generation-feat-Diffusion - Home"/>`);</script>
+  
+  
+</a></div>
+        <div class="sidebar-primary-item"><nav class="bd-links" id="bd-docs-nav" aria-label="Main">
+    <div class="bd-toc-item navbar-nav active">
+        
+        <ul class="nav bd-sidenav bd-sidenav__home-link">
+            <li class="toctree-l1">
+                <a class="reference internal" href="../../intro.html">
+                    Welcome to PseudoDiffusers!!
+                </a>
+            </li>
+        </ul>
+        <p aria-level="2" class="caption" role="heading"><span class="caption-text">Preliminary Works</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="vae.html">VAE</a></li>
+<li class="toctree-l1"><a class="reference internal" href="gan.html">GAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DDPM.html">DDPM</a></li>
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DDIM.html">DDIM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="A_Study_on_the_Evaluation_of_Generative_Models.html">A Study on the Evaluation of Generative Models</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Image Generation</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="cycleGAN.html">CycleGAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="StyleGAN.html">StyleGAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="diffusion_beats_GANs.html">Diffusion Models Beat GANs on Image Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="dalle.html">DALL-E</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DALLE2.html">DALL-E 2</a></li>
+<li class="toctree-l1"><a class="reference internal" href="dreambooth.html">DreamBooth</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ControlNet.html">ControlNet</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Latent_Diffusion_Model.html">Introduction</a></li>
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Textual_Inversion.html">Textual Inversion</a></li>
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="CustomDiffusion.html">Custom Diffusion</a></li>
+<li class="toctree-l1"><a class="reference internal" href="LoRA.html">LoRA</a></li>
+
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="I-DDPM.html">I-DDPM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="StyO.html">StyO</a></li>
+<li class="toctree-l1"><a class="reference internal" href="imagen.html">Imagen</a></li>
+<li class="toctree-l1"><a class="reference internal" href="imagen_editor.html">Imagen Editor</a></li>
+<li class="toctree-l1"><a class="reference internal" href="SDEdit.html">SDEdit</a></li>
+<li class="toctree-l1"><a class="reference internal" href="SDXL.html">SDXL</a></li>
+<li class="toctree-l1"><a class="reference internal" href="t2i_adapter.html">T2I-Adapter</a></li>
+<li class="toctree-l1"><a class="reference internal" href="IP_Adapter.html">IP-Adapter</a></li>
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="HyperDreamBooth.html">HyperDreamBooth</a></li>
+<li class="toctree-l1"><a class="reference internal" href="CM3leon.html">CM3leon</a></li>
+
+<li class="toctree-l1"><a class="reference internal" href="Synthetic_Data_from_Diffusion_Models_Improves_ImageNet_Classification.html">Synthetic Data from Diffusion Models Improves ImageNet Classification</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="GLIDE.html">GLIDE</a></li>
+<li class="toctree-l1"><a class="reference internal" href="BBDM.html">BBDM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Your_Diffusion_Model_is_Secretly_a_Zero_Shot_Classifier.html">Your Diffusion Model is Secretly a Zero-Shot Classifier</a></li>
+<li class="toctree-l1"><a class="reference internal" href="progressive_distillation.html">Progressive Distillation for Fast Sampling of Diffusion Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ConceptLab.html">ConceptLab</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Diffusion_models_already_have_a_Semantic_Latent_Space.html">Diffusion Models already have a Semantic Latent Space</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Muse.html">Muse</a></li>
+
+
+<li class="toctree-l1"><a class="reference internal" href="GIGAGAN.html">Scaling up GANs for Text-to-Image Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="consistency_models.html">Consistency Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="latent_consistency_models.html">Latent Consistency Models</a></li>
+
+<li class="toctree-l1"><a class="reference internal" href="LLM_grounded_Diffusion.html">LLM Grounded Diffusion</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DiT.html">DiT</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="one-step-image-translation.html">One-Step Image Translation with Text-to-Image Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="LCM-LoRA.html">LCM-LoRA: A Universal Stable-Diffusion Acceleration Module</a></li>
+
+
+<li class="toctree-l1"><a class="reference internal" href="MimicBrush.html">MimicBrush: Zero-shot Image Editing with Reference Imitation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="one_step_diffusion_with_distribution_matching_distillation.html">One-step Diffusion with Distribution Matching Distillation</a></li>
+
+
+
+
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Video Generation</span></p>
+<ul class="current nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="Make_A_Video.html">Make A Video</a></li>
+<li class="toctree-l1"><a class="reference internal" href="VideoLDM.html">VideoLDM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="AnimateDiff.html">AnimateDiff</a></li>
+<li class="toctree-l1 current active"><a class="current reference internal" href="#">Animate Anyone</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreaMoving.html">DreaMoving</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreamPose.html">DreamPose: Fashion Image-to-Video Synthesis via Stable Diffusion</a></li>
+
+
+
+
+
+
+
+
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">3D Generation</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="NeRF.html">NeRF : Representing Scenes as Neural Radiance Fields for View Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="3DGS.html">3D Gaussian Splatting for Real-Time Radiance Field Rendering</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Point_E.html">Point-E: A System for Generating 3D Point Clouds from Complex Prompts (Arxiv 2022)</a></li>
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Shap-E.html">Shap-E</a></li>
+
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DreamFusion.html"><strong>DreamFusion</strong></a></li>
+<li class="toctree-l1"><a class="reference internal" href="magic-3d.html">Magic3D</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreamBooth3D.html">Dream Booth 3D</a></li>
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="zero123plus.html">Zero123++</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ProlificDreamer.html">ProlificDreamer: High-Fidelity and Diverse Text-to-3D Generation with Variational Score Distillation</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DreamGaussian.html">DreamGaussian</a></li>
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Coin3D.html">Coin3D</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Experiments</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="../experiments/js_exp.html">Synthetic Data with Stable Diffusion for Foliar Disease Classification</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../experiments/swjo_exp.html">Training DreamBooth on Naver Webtoon Face Dataset</a></li>
+</ul>
+
+    </div>
+</nav></div>
+    </div>
+  
+  
+  <div class="sidebar-primary-items__end sidebar-primary__section">
+  </div>
+  
+  <div id="rtd-footer-container"></div>
+
+
+      </div>
+      
+      <main id="main-content" class="bd-main" role="main">
+        
+        
+
+<div class="sbt-scroll-pixel-helper"></div>
+
+          <div class="bd-content">
+            <div class="bd-article-container">
+              
+              <div class="bd-header-article d-print-none">
+<div class="header-article-items header-article__inner">
+  
+    <div class="header-article-items__start">
+      
+        <div class="header-article-item"><label class="sidebar-toggle primary-toggle btn btn-sm" for="__primary" title="Toggle primary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
+  <span class="fa-solid fa-bars"></span>
+</label></div>
+      
+    </div>
+  
+  
+    <div class="header-article-items__end">
+      
+        <div class="header-article-item">
+
+<div class="article-header-buttons">
+
+
+
+
+
+<div class="dropdown dropdown-source-buttons">
+  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Source repositories">
+    <i class="fab fa-github"></i>
+  </button>
+  <ul class="dropdown-menu">
+      
+      
+      
+      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion" target="_blank"
+   class="btn btn-sm btn-source-repository-button dropdown-item"
+   title="Source repository"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fab fa-github"></i>
+  </span>
+<span class="btn__text-container">Repository</span>
+</a>
+</li>
+      
+      
+      
+      
+      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion/issues/new?title=Issue%20on%20page%20%2Fdocs/review/Animate_Anyone.html&body=Your%20issue%20content%20here." target="_blank"
+   class="btn btn-sm btn-source-issues-button dropdown-item"
+   title="Open an issue"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-lightbulb"></i>
+  </span>
+<span class="btn__text-container">Open issue</span>
+</a>
+</li>
+      
+  </ul>
+</div>
+
+
+
+
+
+
+<div class="dropdown dropdown-download-buttons">
+  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Download this page">
+    <i class="fas fa-download"></i>
+  </button>
+  <ul class="dropdown-menu">
+      
+      
+      
+      <li><a href="../../_sources/docs/review/Animate_Anyone.md" target="_blank"
+   class="btn btn-sm btn-download-source-button dropdown-item"
+   title="Download source file"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-file"></i>
+  </span>
+<span class="btn__text-container">.md</span>
+</a>
+</li>
+      
+      
+      
+      
+      <li>
+<button onclick="window.print()"
+  class="btn btn-sm btn-download-pdf-button dropdown-item"
+  title="Print to PDF"
+  data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-file-pdf"></i>
+  </span>
+<span class="btn__text-container">.pdf</span>
+</button>
+</li>
+      
+  </ul>
+</div>
+
+
+
+
+<button onclick="toggleFullScreen()"
+  class="btn btn-sm btn-fullscreen-button"
+  title="Fullscreen mode"
+  data-bs-placement="bottom" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-expand"></i>
+  </span>
+
+</button>
+
+
+
+<script>
+document.write(`
+  <button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light"></i>
+    <i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark"></i>
+    <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"></i>
+  </button>
+`);
+</script>
+
+
+<script>
+document.write(`
+  <button class="btn btn-sm pst-navbar-icon search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <i class="fa-solid fa-magnifying-glass fa-lg"></i>
+  </button>
+`);
+</script>
+<label class="sidebar-toggle secondary-toggle btn btn-sm" for="__secondary"title="Toggle secondary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <span class="fa-solid fa-list"></span>
+</label>
+</div></div>
+      
+    </div>
+  
+</div>
+</div>
+              
+              
+
+<div id="jb-print-docs-body" class="onlyprint">
+    <h1>Animate Anyone</h1>
+    <!-- Table of contents -->
+    <div id="print-main-content">
+        <div id="jb-print-toc">
+            
+            <div>
+                <h2> Contents </h2>
+            </div>
+            <nav aria-label="Page">
+                <ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#abstract">Abstract</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">1. Introduction</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#related-works">2. Related Works</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#diffusion-model-for-image-generation">2.1 Diffusion Model for Image Generation</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#diffusion-model-for-video-generation">2.2 Diffusion Model for Video Generation</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#diffusion-model-for-human-image-animation">2.3 Diffusion Model for Human Image Animation</a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#methods">3. Methods</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#preliminary-stable-diffusion">3.1 Preliminary: Stable Diffusion</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#network-architecture">3.2 Network Architecture</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#training-strategy">3.3 Training Strategy</a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#experiments">4. Experiments</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#implementations">4.1 Implementations</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#qualitative-results">4.2 Qualitative Results</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#comparisons">4.3 Comparisons</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#ablation-study">4.4 Ablation study</a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#limitations">5. Limitations</a></li>
+</ul>
+            </nav>
+        </div>
+    </div>
+</div>
+
+              
+                
+<div id="searchbox"></div>
+                <article class="bd-article">
+                  
+  <div class="admonition-information admonition">
+<p class="admonition-title">Information</p>
+<ul class="simple">
+<li><p><strong>Title:</strong> Animate Anyone: Consistent and Controllable Image-to-Video Synthesis for Character Animation</p></li>
+<li><p><strong>Reference</strong></p>
+<ul>
+<li><p>Paper: <a class="reference external" href="https://arxiv.org/abs/2311.17117">https://arxiv.org/abs/2311.17117</a></p></li>
+<li><p>Code:</p>
+<ul>
+<li><p><a class="reference external" href="https://github.com/HumanAIGC/AnimateAnyone">Official</a></p></li>
+<li><p><a class="reference external" href="https://github.com/guoqincode/Open-AnimateAnyone">NonOfficial</a></p></li>
+</ul>
+</li>
+<li><p>Project Page : <a class="reference external" href="https://humanaigc.github.io/animate-anyone/">https://humanaigc.github.io/animate-anyone/</a></p></li>
+</ul>
+</li>
+<li><p><strong>Author:</strong> Geonhak Song</p></li>
+<li><p><strong>Last updated on {March. 13, 2024}</strong></p></li>
+</ul>
+</div>
+<section class="tex2jax_ignore mathjax_ignore" id="animate-anyone">
+<h1>Animate Anyone<a class="headerlink" href="#animate-anyone" title="Permalink to this heading">#</a></h1>
+<figure class="align-default" id="id1">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/title_fig.png"><img alt="title_fig" class="bg-primary mb-1" src="../../_images/title_fig.png" style="width: 800px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 629 </span><span class="caption-text">Animate Anyone Example Figure</span><a class="headerlink" href="#id1" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<section id="abstract">
+<h2>Abstract<a class="headerlink" href="#abstract" title="Permalink to this heading">#</a></h2>
+<ul class="simple">
+<li><p>Diffusion 모델들이 visual generation 연구에 주류가 되었지만, image-to-video 영역에서는 어려움이 있다. 특히, character animation에서 캐릭터의 상세 정보의 일관성을 유지하는 것은 큰 문제이다.</p></li>
+<li><p>reference image의 복잡한 appearance 특징의 일관성을 유지하기 위해서 spatial attention feature과 통합할 <strong>ReferenceNet</strong> 설계</p></li>
+<li><p>controllability와 continuity을 위해서 효과적인 <strong>pose guider</strong> 도입.</p></li>
+<li><p>비디오 프레임간 부드러운 전이를 위해 효과적인 effective <strong>temporal modeling</strong> 도입</p></li>
+<li><p>이를 통해 어떠한 임의의 캐릭터에 대해서도 animate할 수 있고 우월성을 보임</p></li>
+</ul>
+</section>
+<section id="introduction">
+<h2>1. Introduction<a class="headerlink" href="#introduction" title="Permalink to this heading">#</a></h2>
+<p><strong>Character Animation History</strong></p>
+<ul class="simple">
+<li><p>Character Animation은 source character 이미지로부터 사실적인 비디오를 animate하는 작업으로 GAN을 시작으로 많은 연구가 진행되어왔다.</p></li>
+<li><p>그러나 생성된 이미지 또는 비디오는 local distortion, blurred details, semantic inconsistency,  temporal instability 문제가 있어 널리 사용되기에는 어려움이 있어왔다.</p></li>
+</ul>
+<p><strong>Diffusion 기반 image-to-video 예시</strong></p>
+<ul class="simple">
+<li><p>최근 diffusion model의 우수성에 따라 image-to-video task에 diffusion model을 활용하려는 연구들이 보였다.</p></li>
+<li><p>DreamPose (23.04)</p>
+<ul>
+<li><p>Stable Diffusion을 확장한 fashion image-to-video 합성을 가능하는데 초점을 맞췄다.</p></li>
+<li><p>본 모델은 CLIP과 VAE feature를 통합한 adpatar module를 제안했다.</p></li>
+<li><p>그러나 consistent 결과를 위해서 input sample에 대해 추가 finetuning이 필요하고 운용 효율이 떨어진다.</p></li>
+</ul>
+</li>
+<li><p>DisCO (23.07)</p>
+<ul>
+<li><p>Stable Diffusion을 수정하여 human dance generation 진행</p></li>
+<li><p>CLIP과 ControlNet을 활용한 통합 모델 구축</p></li>
+<li><p>그러나 character detail 보존에 어려움을 겪고 frame간 jittering issue 존재</p></li>
+</ul>
+</li>
+</ul>
+<p><strong>Character Animation 관점에서의 Text-to-image generation 한계</strong></p>
+<ul class="simple">
+<li><p>text-to-image generation &amp; video generation에 시각적 품질과 다양성에 큰 진전이 있어왔지만, 복잡한 detail을 잘 살리는 것이 어렵고 정확도 측면에서도 부정확한 부분이 있다.</p></li>
+<li><p>더욱이, 실질적 character 움직임을 다룰 때, 일관성 측면에서 안정적이고 연속적인 영상을 만들어내는 것이 어렵다.</p></li>
+<li><p>현재는 일반성과 일관성을 동시에 만족하는 character animation 방법을 찾을 수 없어 본 논문에서 Animate Anyone 방법을 제안한다.</p></li>
+</ul>
+<p><strong>Animate Anyone 모델 구조 요약</strong></p>
+<ul class="simple">
+<li><p>appearance consistency를 위한 <strong>ReferenceNet</strong> 도입.</p>
+<ul>
+<li><p>spatial attention를 사용하는 UNet으로 ReferenceNet feature과 통합</p></li>
+<li><p>이는 모델로 하여금 일관된 feature space에서 reference image의 관계성을 종합적으로 학습하게 함</p></li>
+</ul>
+</li>
+<li><p>pose controllability를 위한 <strong>lightweight pose guider</strong> 도입.</p>
+<ul>
+<li><p>효과적인 pose control signal을 denoising 절차에 통합함.</p></li>
+</ul>
+</li>
+<li><p>temporal stability를 위한 <strong>temporal layer</strong> 도입</p>
+<ul>
+<li><p>연속적이고 부드러운 temporal motion process와 동시에 고해상도 detail quality 보존을 위한 frame간 관계성 학습</p></li>
+</ul>
+</li>
+</ul>
+<p><strong>제안 모델의 결과</strong></p>
+<ul class="simple">
+<li><p>5K character video clip 인터넷 데이터 세트로 훈련</p></li>
+<li><p>장점 1) character appearance의 spatial &amp; temporal consistency을 효과적으로 유지</p></li>
+<li><p>장점 2) temporal jitter &amp; flickering과 같은 문제 없는 높은 신뢰도의 비디오 생성</p></li>
+<li><p>장점 3) 어떠한 character image에도 animation video 생성 가능</p></li>
+<li><p>benchmark에 대한 결과 또한 우수성 증명</p></li>
+</ul>
+</section>
+<section id="related-works">
+<h2>2. Related Works<a class="headerlink" href="#related-works" title="Permalink to this heading">#</a></h2>
+<section id="diffusion-model-for-image-generation">
+<h3>2.1 Diffusion Model for Image Generation<a class="headerlink" href="#diffusion-model-for-image-generation" title="Permalink to this heading">#</a></h3>
+<p>T2I model</p>
+<ol class="arabic simple">
+<li><p>LDM : latent space에서의 denoising 진행.</p></li>
+<li><p>ControlNet, T2I-Adapter : pose, mask, edge, depth와 같은 추가 조건부 생성을 위한 추가 encoding layer 사용</p></li>
+</ol>
+<p>IP-Adapter : image prompt 기반의 content 결과 생성</p>
+<p>ObjectStitch, Paint-by-Example : CLIP을 활용한 image editing 방법</p>
+<p>TryonDiffusion : virtual apparel try on을 위한 parallel u-net 구조 도입</p>
+</section>
+<section id="diffusion-model-for-video-generation">
+<h3>2.2 Diffusion Model for Video Generation<a class="headerlink" href="#diffusion-model-for-video-generation" title="Permalink to this heading">#</a></h3>
+<p>T2V Model : T2I 모델 기반 inter-frame attention modeling을 통한 연구가 많이 이뤄짐.</p>
+<p>Video LDM : temporal layer를 삽입한 T2I 모델 기반 video generation model</p>
+<p>AnimateDiff : personalized T2I model을 활용한 motion module을 많은 video data로 학습시킨 모델</p>
+<p>→ Animate Anyone에서는 본 temporal modeling에 영향을 받아 해당 방법론 사용</p>
+<p>I2V Model</p>
+<p>VideoComposer : conditional control</p>
+<p>AnimateDiff : image latent과 random noise 간 weight mixing</p>
+<p>VideoCrafter : CLIP의 textual &amp; visual feature를 통합하여 cross-attention에 주입</p>
+<p>그러나 해당 방법들 모두 안정적인 사람 video 생성에는 어려움이 존재.</p>
+</section>
+<section id="diffusion-model-for-human-image-animation">
+<h3>2.3 Diffusion Model for Human Image Animation<a class="headerlink" href="#diffusion-model-for-human-image-animation" title="Permalink to this heading">#</a></h3>
+<p>Image Animation</p>
+<p>PIDM, LFDM, LEO,</p>
+<p>DreamPose, DisCo</p>
+</section>
+</section>
+<section id="methods">
+<h2>3. Methods<a class="headerlink" href="#methods" title="Permalink to this heading">#</a></h2>
+<p>목표 : character animation을 위한 pose-guided image-to-video 합성</p>
+<section id="preliminary-stable-diffusion">
+<h3>3.1 Preliminary: Stable Diffusion<a class="headerlink" href="#preliminary-stable-diffusion" title="Permalink to this heading">#</a></h3>
+<figure class="align-default" id="id2">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/eq_1.png"><img alt="eq_1" class="bg-primary mb-1" src="../../_images/eq_1.png" style="width: 800px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 630 </span><span class="caption-text">Eq (1) Stable Diffusion Objective</span><a class="headerlink" href="#id2" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p><span class="math notranslate nohighlight">\(\epsilon_\theta\)</span> : UNet func</p>
+<p><span class="math notranslate nohighlight">\(c\)</span> : conditional embedding</p>
+<p><span class="math notranslate nohighlight">\(z\)</span> : image latent</p>
+<p><span class="math notranslate nohighlight">\(t\)</span> : timestep</p>
+<p><span class="math notranslate nohighlight">\(z_t\)</span> : noise latent</p>
+<p>CLIP ViT-L/14 text encoder</p>
+<p>denoising UNet : 4 downsample layers , 1 middle layer, 4 upsample layers.</p>
+<p>각 Res-Trans block별 2D convolution, self-attention, cross-attention로 구성</p>
+</section>
+<section id="network-architecture">
+<h3>3.2 Network Architecture<a class="headerlink" href="#network-architecture" title="Permalink to this heading">#</a></h3>
+<p><strong>Overview</strong></p>
+<figure class="align-default" id="id3">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/figure_2.png"><img alt="figure_2" class="bg-primary mb-1" src="../../_images/figure_2.png" style="width: 800px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 631 </span><span class="caption-text">Figure 2 Animate Anyone Overview</span><a class="headerlink" href="#id3" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>3가지 중요 요소 통합</p>
+<ol class="arabic simple">
+<li><p>ReferenceNet : reference image로부터 character의 appearance features encoding</p></li>
+<li><p>Pose Guider : 제어가능한 character movements를 위한 motion control signal encoding</p></li>
+<li><p>Temporal layer : character motion 연속성을 위한 temporal relationship encoding</p></li>
+</ol>
+<p><strong>ReferenceNet</strong></p>
+<ul class="simple">
+<li><p>text보다 image가 더 low-level detailed feature를 통한 일관성 유지 정보를 내포함.</p></li>
+<li><p>이에 따라 최근 CLIP image encoder가 text encoder보다 많이 사용되었지만, detail consistency에는 역부족</p>
+<ul>
+<li><p>이유 1: CLIP image encoder는 224x224의 저해상도 이미지들로 구성되어 중요한 세부정보 손실이 있을 수 있다.</p></li>
+<li><p>이유 2: CLIP은 text에 더욱 부합하게 훈련되어 high-level feature matching에 강조되고 이에 따라 feature encoding에 있어 detail feature에 부족함이 존재</p></li>
+</ul>
+</li>
+<li><p>이에 따라 reference image feature extraction network인 ReferenceNet 고안 (이때 temporal layer 제외)</p></li>
+<li><p>ReferenceNet은 SD로 초기화하고 각각 독립적으로 update 수행하고 UNet과 통합</p></li>
+<li><p>self-attention layer를 spatial attention layer로 변경</p></li>
+<li><p>Feature map : <span class="math notranslate nohighlight">\(x_1 \in \mathcal{R}^{t \times h \times w \times c }\)</span> (UNet ), <span class="math notranslate nohighlight">\(x_2 \in \mathcal{R}^{h \times w \times c }\)</span> (ReferenceNet) 이 주어졌을 때, <span class="math notranslate nohighlight">\(x_2\)</span>를 t번 곱해 w축에 따라 <span class="math notranslate nohighlight">\(x_1\)</span>과 concat</p></li>
+<li><p>self-attention을 수행하고 feature map의 반을 결과로 뽑음.</p></li>
+<li><p>2가지 장점</p>
+<ul>
+<li><ol class="arabic simple">
+<li><p>사전 학습된 image feature model SD를 사용함에 따라 <strong>초기값이 잘 정의</strong>된 것 사용가능.</p></li>
+</ol>
+</li>
+<li><ol class="arabic simple" start="2">
+<li><p>UNet과 ReferenceNet의 초기값이 공유되고 동일한 네트워크 구조를 가짐에 따라 UNet은 (동일한 feature space에 상관관계가 있는) ReferenceNet feature 중 선별적으로 feature 학습이 가능</p></li>
+</ol>
+</li>
+</ul>
+</li>
+<li><p>CLIP image encoder를 cross-attention에 도입</p>
+<ul>
+<li><p>reference image의 semantic feature를 제공함에 따라 신속한 전체 네트워크 훈련 초기값 설정 가능.</p></li>
+</ul>
+</li>
+<li><p>ControlNet은 target image와 공간적으로 align된 정보를 활용 → 부적합</p></li>
+<li><p>본 방법에서는 reference image와 target image가 공간적으로는 관계되어있지만, align되지 않음.</p></li>
+<li><p>타 diffusion 기반 video generation에서는 모든 video frame에 대해 denoising을 진행</p></li>
+<li><p>ReferenceNet은 feature 추출할 때 한 번만 필요</p></li>
+<li><p>효과 : inference 단계에서 계산량이 증가하지 않는다.</p></li>
+</ul>
+<p><strong>Pose Guider</strong></p>
+<ul class="simple">
+<li><p>ControlNet은 robust한 conditional 생성을 입증해왔지만, 추가 Fine-tuning이 필요했었다.</p></li>
+<li><p>저자들은 추가적인 계산량 증가를 막기위해 추가적인 control network를 통합하지 않고 lightweight Pose Guider 도입</p></li>
+<li><p>noise latent와 동일 해상도를 가지는 pose 이미지 align을 위해 four convolution layers (4×4 kernels, 2×2 strides, using 16,32,64,128 channels) 사용</p></li>
+<li><p>Gaussian weights 초기화, final projection layer에서 zero convolution 도입.</p></li>
+</ul>
+<p><strong>Temporal Layer</strong></p>
+<ul class="simple">
+<li><p>이미 많은 곳에서 T2I 모델에 temporal layer를 통합했을 때 frame간 temporal dependency가 가능함을 보임.</p></li>
+<li><p>본 방법에서는 U-Net 내 Res-Trans block 안에 있는 spatial-attention과 cross-attention 진행 후에 temporal layer 추가</p></li>
+<li><p>순서 1) reshape : <span class="math notranslate nohighlight">\(x \in \mathcal{R}^{b \times t \times h \times w \times c }\)</span> → <span class="math notranslate nohighlight">\(x \in \mathcal{R}^{(b \times h \times w) \times t \times c }\)</span></p></li>
+<li><p>순서 2) temporal attention 수행 → residual connection</p></li>
+<li><p>효과 : appearance details에 대한 temporal smoothness &amp; continuity</p></li>
+</ul>
+</section>
+<section id="training-strategy">
+<h3>3.3 Training Strategy<a class="headerlink" href="#training-strategy" title="Permalink to this heading">#</a></h3>
+<ul class="simple">
+<li><p>훈련 두 단계</p></li>
+<li><p>첫 번째 단계</p>
+<ul>
+<li><p>temporal layer를 제외한 single-frame noise를 입력으로 받는 모델 학습</p></li>
+<li><p>ReferenceNet &amp; Pose Guider</p></li>
+<li><p>reference 이미지는 전체 비디오 클립에서 랜덤으로 선택</p></li>
+<li><p>초기 weight는 사전학습된 SD weight</p></li>
+<li><p>Pose Guider는 마지막 projection layer를 제외한 모든 layer gaussian weight 초기화</p></li>
+<li><p>VAE Encoder, Decoder, CLIP image encoder 는 그대로</p></li>
+</ul>
+</li>
+<li><p>두 번째 단계</p>
+<ul>
+<li><p>첫 번째 단계에서 훈련한 모델 속 temporal layer만 훈련</p></li>
+<li><p>temporal layer 초기값 : AnimateDiff pretrained weight</p></li>
+<li><p>입력 : 24frame video clip</p></li>
+</ul>
+</li>
+</ul>
+</section>
+</section>
+<section id="experiments">
+<h2>4. Experiments<a class="headerlink" href="#experiments" title="Permalink to this heading">#</a></h2>
+<section id="implementations">
+<h3>4.1 Implementations<a class="headerlink" href="#implementations" title="Permalink to this heading">#</a></h3>
+<ul class="simple">
+<li><p>Data : 5K character video clips (2-10 seconds long) 인터넷에서 다운로드</p></li>
+<li><p>Pose Estimation Model : DWPose(Distillation for Whole-body Pose estimator) (23.07) <a class="github reference external" href="https://github.com/IDEA-Research/DWPose">IDEA-Research/DWPose</a>
+(the student’s head with only 20% training time as a plug-and-play training strategy)</p></li>
+<li><p>GPU : 4 NVIDIA A100 GPUs</p></li>
+<li><p>첫 번째 훈련 단계 : 768×768 해상도 video frame sampled, resized, and center-cropped 30,000 steps, batch size 64.</p></li>
+<li><p>두 번째 훈련 단계 : temporal layer 10,000 steps 24-frame video sequences, batch size 4.</p></li>
+<li><p>learning rates : 1e-5.</p></li>
+<li><p>Inference 단계 : reference image의 캐릭터 skeleton의 길이에 근사하기 위해서 유도된 pose skeleton의 길이 rescale</p></li>
+<li><p>DDIM sampler, 20 steps</p></li>
+<li><p>긴 영상 생성을 위해 temporal aggregation method 채택</p></li>
+<li><p>Evaluation : benchmark dataset 2개(UBC fashion video dataset, Tik-Tok dataset) 사용</p></li>
+</ul>
+</section>
+<section id="qualitative-results">
+<h3>4.2 Qualitative Results<a class="headerlink" href="#qualitative-results" title="Permalink to this heading">#</a></h3>
+<figure class="align-default" id="id4">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/figure_3.png"><img alt="figure_3" class="bg-primary mb-1" src="../../_images/figure_3.png" style="width: 800px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 632 </span><span class="caption-text">Figure 3 Qualitative Results</span><a class="headerlink" href="#id4" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p>전신이 나오는 임의의 characters, 절반 길이의 portraits, cartoon characters, humanoid characters에 대해 animation</p></li>
+<li><p>reference image와 유사한 temporal consistency를 보이는 사실적인 결과 생성</p></li>
+</ul>
+</section>
+<section id="comparisons">
+<h3>4.3 Comparisons<a class="headerlink" href="#comparisons" title="Permalink to this heading">#</a></h3>
+<ul class="simple">
+<li><p>SSIM, PSNR, LPIPS, FVD(Fréchet <em><strong>Video</strong></em> Distance)</p></li>
+</ul>
+<p><strong>Fashion Video Synthesis</strong></p>
+<figure class="align-default" id="id5">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/table1.png"><img alt="table1" class="bg-primary mb-1" src="../../_images/table1.png" style="width: 800px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 633 </span><span class="caption-text">Table 1 Quantitative Comparison for fashion video synthesis</span><a class="headerlink" href="#id5" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p>Quantitative comparison - Table 1</p>
+<ul>
+<li><p>UBC fashion video dataset
+(500 training &amp; 100 testing videos로 구성, 각 video 약 500 frames)</p></li>
+</ul>
+</li>
+</ul>
+<figure class="align-default" id="id6">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/figure_4.png"><img alt="figure_4" class="bg-primary mb-1" src="../../_images/figure_4.png" style="width: 800px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 634 </span><span class="caption-text">Figure 4 Qualitative comparison for fashion video synthesis</span><a class="headerlink" href="#id6" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p>DreamPose &amp; BDMM은 옷의 일관성을 잃어버리는 문제. 색과 섬세한 구조적 요소에 대한 error 발생</p></li>
+<li><p>반면, 제안 방법은 옷의 세부 내용까지 일관성있게 보존됨.</p></li>
+</ul>
+<p><strong>Human Dance Generation</strong></p>
+<figure class="align-default" id="id7">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/table2.png"><img alt="table2" class="bg-primary mb-1" src="../../_images/table2.png" style="width: 800px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 635 </span><span class="caption-text">Table 2 Quantitative comparison for human dance generation</span><a class="headerlink" href="#id7" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p>Quantitative comparison - Table 2</p>
+<ul>
+<li><p>TikTok dataset
+(340 training &amp; 100 testing single human dancing videos (10-15s))</p></li>
+</ul>
+</li>
+</ul>
+<figure class="align-default" id="id8">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/figure_5.png"><img alt="figure_5" class="bg-primary mb-1" src="../../_images/figure_5.png" style="width: 800px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 636 </span><span class="caption-text">Figure 5 Qualitative comparison between DisCo and Animate Anyone method</span><a class="headerlink" href="#id8" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p>DisCo에서는 인물 foreground mask를 위해 SAM 활용하는 pipeline 활용</p></li>
+<li><p>그러나 본 방법에서는 masking 없이 모델 자체가 subject motion으로부터 전경과 배경의 구분 가능</p></li>
+<li><p>복잡한 dance sequence에서도 시각적으로 연속적인 motion을 보여줌. robustness</p></li>
+</ul>
+<p><strong>General Image-to-Video Methods</strong></p>
+<figure class="align-default" id="id9">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/figure_6.png"><img alt="figure_6" class="bg-primary mb-1" src="../../_images/figure_6.png" style="width: 800px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 637 </span><span class="caption-text">Figure 6 Qualitative comparison with image-to-video methods</span><a class="headerlink" href="#id9" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p>비교 모델 : AnimateDiff &amp; Gen-2</p></li>
+<li><p>reference image에 대한 외관 신뢰도만 비교</p></li>
+<li><p>image-to-video 방법은 얼굴이 일관되게 유지되는 문제에 봉착된 상황 속에서 다른 모델 대비 제안 모델이 긴 시간동안 apperance consistency 유지</p></li>
+</ul>
+</section>
+<section id="ablation-study">
+<h3>4.4 Ablation study<a class="headerlink" href="#ablation-study" title="Permalink to this heading">#</a></h3>
+<figure class="align-default" id="id10">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/figure_7.png"><img alt="figure_7" class="bg-primary mb-1" src="../../_images/figure_7.png" style="width: 800px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 638 </span><span class="caption-text">Figure 7 Ablation study of different design</span><a class="headerlink" href="#id10" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<figure class="align-default" id="id11">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/table_3.png"><img alt="table_3" class="bg-primary mb-1" src="../../_images/table_3.png" style="width: 800px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 639 </span><span class="caption-text">Table 3 Quantitative comparison for ablation study</span><a class="headerlink" href="#id11" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p>ReferenceNet design 효과성 증명을 위한 Ablation study</p>
+<ul>
+<li><p>(1) CLIP image encoder만 사용</p></li>
+<li><p>(2) 초기 finetuning SD 이후 reference image 기반 ControlNet training</p></li>
+<li><p>(3) 위 2 방법론 통합</p></li>
+</ul>
+</li>
+<li><p>결론 : ReferenceNet를 사용하는 것이 모든 방법 대비 가장 좋았다.</p></li>
+</ul>
+</section>
+</section>
+<section id="limitations">
+<h2>5. Limitations<a class="headerlink" href="#limitations" title="Permalink to this heading">#</a></h2>
+<ul class="simple">
+<li><ol class="arabic simple">
+<li><p>손의 안정적인 움직임을 보이는 것에 어려움을 보임. 가끔 왜곡, motion blur 발생</p></li>
+</ol>
+</li>
+<li><ol class="arabic simple" start="2">
+<li><p>제공하는 이미지는 한 측면만 보이기 때문에 보이지 않은 부분에 대해서는 ill-posed problem으로 불안정</p></li>
+</ol>
+</li>
+<li><ol class="arabic simple" start="3">
+<li><p>DDPM 활용에 따른 non-diffusion 기반 모델 대비 낮은 operational efficiency</p></li>
+</ol>
+</li>
+</ul>
+</section>
+</section>
+
+    <script type="text/x-thebe-config">
+    {
+        requestKernel: true,
+        binderOptions: {
+            repo: "binder-examples/jupyter-stacks-datascience",
+            ref: "master",
+        },
+        codeMirrorConfig: {
+            theme: "abcdef",
+            mode: "python"
+        },
+        kernelOptions: {
+            name: "python3",
+            path: "./docs\review"
+        },
+        predefinedOutput: true
+    }
+    </script>
+    <script>kernelName = 'python3'</script>
+
+                </article>
+              
+
+              
+              
+              
+              
+                <footer class="prev-next-footer d-print-none">
+                  
+<div class="prev-next-area">
+    <a class="left-prev"
+       href="AnimateDiff.html"
+       title="previous page">
+      <i class="fa-solid fa-angle-left"></i>
+      <div class="prev-next-info">
+        <p class="prev-next-subtitle">previous</p>
+        <p class="prev-next-title">AnimateDiff</p>
+      </div>
+    </a>
+    <a class="right-next"
+       href="DreaMoving.html"
+       title="next page">
+      <div class="prev-next-info">
+        <p class="prev-next-subtitle">next</p>
+        <p class="prev-next-title">DreaMoving</p>
+      </div>
+      <i class="fa-solid fa-angle-right"></i>
+    </a>
+</div>
+                </footer>
+              
+            </div>
+            
+            
+              
+                <div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
+
+
+  <div class="sidebar-secondary-item">
+  <div class="page-toc tocsection onthispage">
+    <i class="fa-solid fa-list"></i> Contents
+  </div>
+  <nav class="bd-toc-nav page-toc">
+    <ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#abstract">Abstract</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">1. Introduction</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#related-works">2. Related Works</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#diffusion-model-for-image-generation">2.1 Diffusion Model for Image Generation</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#diffusion-model-for-video-generation">2.2 Diffusion Model for Video Generation</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#diffusion-model-for-human-image-animation">2.3 Diffusion Model for Human Image Animation</a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#methods">3. Methods</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#preliminary-stable-diffusion">3.1 Preliminary: Stable Diffusion</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#network-architecture">3.2 Network Architecture</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#training-strategy">3.3 Training Strategy</a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#experiments">4. Experiments</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#implementations">4.1 Implementations</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#qualitative-results">4.2 Qualitative Results</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#comparisons">4.3 Comparisons</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#ablation-study">4.4 Ablation study</a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#limitations">5. Limitations</a></li>
+</ul>
+  </nav></div>
+
+</div></div>
+              
+            
+          </div>
+          <footer class="bd-footer-content">
+            
+<div class="bd-footer-content__inner container">
+  
+  <div class="footer-item">
+    
+<p class="component-author">
+By PseudoLab
+</p>
+
+  </div>
+  
+  <div class="footer-item">
+    
+
+  <p class="copyright">
+    
+      © Copyright 2022.
+      <br/>
+    
+  </p>
+
+  </div>
+  
+  <div class="footer-item">
+    
+  </div>
+  
+  <div class="footer-item">
+    
+  </div>
+  
+</div>
+          </footer>
+        
+
+      </main>
+    </div>
+  </div>
+  
+  <!-- Scripts loaded after <body> so the DOM is not blocked -->
+  <script src="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b"></script>
+<script src="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b"></script>
+
+  <footer class="bd-footer">
+  </footer>
+  </body>
 </html>
\ No newline at end of file
diff --git a/docs/review/BBDM.html b/docs/review/BBDM.html
old mode 100644
new mode 100755
index a8e0aaf6..3dd76eb1
--- a/docs/review/BBDM.html
+++ b/docs/review/BBDM.html
@@ -1,1520 +1,1540 @@
-
-
-<!DOCTYPE html>
-
-
-<html lang="en" data-content_root="" >
-
-  <head>
-    <meta charset="utf-8" />
-    <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.18.1: http://docutils.sourceforge.net/" />
-
-    <title>BBDM &#8212; Text-to-Image Generation-feat-Diffusion</title>
-  
-  
-  
-  <script data-cfasync="false">
-    document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
-    document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
-  </script>
-  
-  <!-- Loaded before other Sphinx assets -->
-  <link href="../../_static/styles/theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-<link href="../../_static/styles/bootstrap.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-<link href="../../_static/styles/pydata-sphinx-theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-
-  
-  <link href="../../_static/vendor/fontawesome/6.5.2/css/all.min.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-  <link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.woff2" />
-<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.woff2" />
-<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.woff2" />
-
-    <link rel="stylesheet" type="text/css" href="../../_static/pygments.css" />
-    <link rel="stylesheet" href="../../_static/styles/sphinx-book-theme.css?digest=14f4ca6b54d191a8c7657f6c759bf11a5fb86285" type="text/css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/togglebutton.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/mystnb.4510f1fc1dee50b3e5859aac5469c37c29e427902b24a333a5f9fcb2f0b3ac41.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/sphinx-thebe.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/design-style.4045f2051d55cab465a707391d5b2007.min.css" />
-  
-  <!-- Pre-loaded scripts that we'll load fully later -->
-  <link rel="preload" as="script" href="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b" />
-<link rel="preload" as="script" href="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b" />
-  <script src="../../_static/vendor/fontawesome/6.5.2/js/all.min.js?digest=dfe6caa3a7d634c4db9b"></script>
-
-    <script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
-    <script src="../../_static/jquery.js"></script>
-    <script src="../../_static/underscore.js"></script>
-    <script src="../../_static/_sphinx_javascript_frameworks_compat.js"></script>
-    <script src="../../_static/doctools.js"></script>
-    <script src="../../_static/clipboard.min.js"></script>
-    <script src="../../_static/copybutton.js"></script>
-    <script src="../../_static/scripts/sphinx-book-theme.js?digest=5a5c038af52cf7bc1a1ec88eea08e6366ee68824"></script>
-    <script>let toggleHintShow = 'Click to show';</script>
-    <script>let toggleHintHide = 'Click to hide';</script>
-    <script>let toggleOpenOnPrint = 'true';</script>
-    <script src="../../_static/togglebutton.js"></script>
-    <script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
-    <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
-    <script src="../../_static/design-tabs.js"></script>
-    <script>const THEBE_JS_URL = "https://unpkg.com/thebe@0.8.2/lib/index.js"
-const thebe_selector = ".thebe,.cell"
-const thebe_selector_input = "pre"
-const thebe_selector_output = ".output, .cell_output"
-</script>
-    <script async="async" src="../../_static/sphinx-thebe.js"></script>
-    <script>window.MathJax = {"options": {"processHtmlClass": "tex2jax_process|mathjax_process|math|output_area"}}</script>
-    <script defer="defer" src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
-    <script>DOCUMENTATION_OPTIONS.pagename = 'docs/review/BBDM';</script>
-    <link rel="shortcut icon" href="../../_static/PseudoLab_logo.png"/>
-    <link rel="index" title="Index" href="../../genindex.html" />
-    <link rel="search" title="Search" href="../../search.html" />
-    <link rel="next" title="Your Diffusion Model is Secretly a Zero-Shot Classifier" href="Your_Diffusion_Model_is_Secretly_a_Zero_Shot_Classifier.html" />
-    <link rel="prev" title="GLIDE" href="GLIDE.html" />
-  <meta name="viewport" content="width=device-width, initial-scale=1"/>
-  <meta name="docsearch:language" content="en"/>
-  </head>
-  
-  
-  <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
-
-  
-  
-  <div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
-  
-  <div id="pst-scroll-pixel-helper"></div>
-  
-  <button type="button" class="btn rounded-pill" id="pst-back-to-top">
-    <i class="fa-solid fa-arrow-up"></i>Back to top</button>
-
-  
-  <input type="checkbox"
-          class="sidebar-toggle"
-          id="pst-primary-sidebar-checkbox"/>
-  <label class="overlay overlay-primary" for="pst-primary-sidebar-checkbox"></label>
-  
-  <input type="checkbox"
-          class="sidebar-toggle"
-          id="pst-secondary-sidebar-checkbox"/>
-  <label class="overlay overlay-secondary" for="pst-secondary-sidebar-checkbox"></label>
-  
-  <div class="search-button__wrapper">
-    <div class="search-button__overlay"></div>
-    <div class="search-button__search-container">
-<form class="bd-search d-flex align-items-center"
-      action="../../search.html"
-      method="get">
-  <i class="fa-solid fa-magnifying-glass"></i>
-  <input type="search"
-         class="form-control"
-         name="q"
-         id="search-input"
-         placeholder="Search this book..."
-         aria-label="Search this book..."
-         autocomplete="off"
-         autocorrect="off"
-         autocapitalize="off"
-         spellcheck="false"/>
-  <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
-</form></div>
-  </div>
-
-  <div class="pst-async-banner-revealer d-none">
-  <aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
-</div>
-
-  
-    <header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
-    </header>
-  
-
-  <div class="bd-container">
-    <div class="bd-container__inner bd-page-width">
-      
-      
-      
-      <div class="bd-sidebar-primary bd-sidebar">
-        
-
-  
-  <div class="sidebar-header-items sidebar-primary__section">
-    
-    
-    
-    
-  </div>
-  
-    <div class="sidebar-primary-items__start sidebar-primary__section">
-        <div class="sidebar-primary-item">
-
-  
-    
-  
-
-<a class="navbar-brand logo" href="../../intro.html">
-  
-  
-  
-  
-  
-    
-    
-      
-    
-    
-    <img src="../../_static/PseudoLab_logo.png" class="logo__image only-light" alt="Text-to-Image Generation-feat-Diffusion - Home"/>
-    <script>document.write(`<img src="../../_static/PseudoLab_logo.png" class="logo__image only-dark" alt="Text-to-Image Generation-feat-Diffusion - Home"/>`);</script>
-  
-  
-</a></div>
-        <div class="sidebar-primary-item"><nav class="bd-links" id="bd-docs-nav" aria-label="Main">
-    <div class="bd-toc-item navbar-nav active">
-        
-        <ul class="nav bd-sidenav bd-sidenav__home-link">
-            <li class="toctree-l1">
-                <a class="reference internal" href="../../intro.html">
-                    Welcome to PseudoDiffusers!!
-                </a>
-            </li>
-        </ul>
-        <p aria-level="2" class="caption" role="heading"><span class="caption-text">Preliminary Works</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="vae.html">VAE</a></li>
-<li class="toctree-l1"><a class="reference internal" href="gan.html">GAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DDPM.html">DDPM</a></li>
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="DDIM.html">DDIM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="A_Study_on_the_Evaluation_of_Generative_Models.html">A Study on the Evaluation of Generative Models</a></li>
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Image Generation</span></p>
-<ul class="current nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="cycleGAN.html">CycleGAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="StyleGAN.html">StyleGAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="diffusion_beats_GANs.html">Diffusion Models Beat GANs on Image Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="dalle.html">DALL-E</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DALLE2.html">DALL-E 2</a></li>
-<li class="toctree-l1"><a class="reference internal" href="dreambooth.html">DreamBooth</a></li>
-<li class="toctree-l1"><a class="reference internal" href="ControlNet.html">ControlNet</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Latent_Diffusion_Model.html">Introduction</a></li>
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="Textual_Inversion.html">Textual Inversion</a></li>
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="CustomDiffusion.html">Custom Diffusion</a></li>
-<li class="toctree-l1"><a class="reference internal" href="LoRA.html">LoRA</a></li>
-
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="I-DDPM.html">I-DDPM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="StyO.html">StyO</a></li>
-<li class="toctree-l1"><a class="reference internal" href="imagen.html">Imagen</a></li>
-<li class="toctree-l1"><a class="reference internal" href="imagen_editor.html">Imagen Editor</a></li>
-<li class="toctree-l1"><a class="reference internal" href="SDEdit.html">SDEdit</a></li>
-<li class="toctree-l1"><a class="reference internal" href="SDXL.html">SDXL</a></li>
-<li class="toctree-l1"><a class="reference internal" href="t2i_adapter.html">T2I-Adapter</a></li>
-<li class="toctree-l1"><a class="reference internal" href="IP_Adapter.html">IP-Adapter</a></li>
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="HyperDreamBooth.html">HyperDreamBooth</a></li>
-<li class="toctree-l1"><a class="reference internal" href="CM3leon.html">CM3leon</a></li>
-
-<li class="toctree-l1"><a class="reference internal" href="Synthetic_Data_from_Diffusion_Models_Improves_ImageNet_Classification.html">Synthetic Data from Diffusion Models Improves ImageNet Classification</a></li>
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="GLIDE.html">GLIDE</a></li>
-<li class="toctree-l1 current active"><a class="current reference internal" href="#">BBDM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Your_Diffusion_Model_is_Secretly_a_Zero_Shot_Classifier.html">Your Diffusion Model is Secretly a Zero-Shot Classifier</a></li>
-<li class="toctree-l1"><a class="reference internal" href="progressive_distillation.html">Progressive Distillation for Fast Sampling of Diffusion Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="ConceptLab.html">ConceptLab</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Diffusion_models_already_have_a_Semantic_Latent_Space.html">Diffusion Models already have a Semantic Latent Space</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Muse.html">Muse</a></li>
-
-
-<li class="toctree-l1"><a class="reference internal" href="GIGAGAN.html">Scaling up GANs for Text-to-Image Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="consistency_models.html">Consistency Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="latent_consistency_models.html">Latent Consistency Models</a></li>
-
-<li class="toctree-l1"><a class="reference internal" href="LLM_grounded_Diffusion.html">LLM Grounded Diffusion</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DiT.html">DiT</a></li>
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="one-step-image-translation.html">One-Step Image Translation with Text-to-Image Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="LCM-LoRA.html">LCM-LoRA: A Universal Stable-Diffusion Acceleration Module</a></li>
-
-
-<li class="toctree-l1"><a class="reference internal" href="MimicBrush.html">MimicBrush: Zero-shot Image Editing with Reference Imitation</a></li>
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Video Generation</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="Make_A_Video.html">Make A Video</a></li>
-<li class="toctree-l1"><a class="reference internal" href="VideoLDM.html">VideoLDM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="AnimateDiff.html">AnimateDiff</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Animate_Anyone.html">Animate Anyone</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreaMoving.html">DreaMoving</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreamPose.html">DreamPose: Fashion Image-to-Video Synthesis via Stable Diffusion</a></li>
-
-
-
-
-
-
-
-
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">3D Generation</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="NeRF.html">NeRF : Representing Scenes as Neural Radiance Fields for View Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="3DGS.html">3D Gaussian Splatting for Real-Time Radiance Field Rendering</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Point_E.html">Point-E: A System for Generating 3D Point Clouds from Complex Prompts (Arxiv 2022)</a></li>
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="Shap-E.html">Shap-E</a></li>
-
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="DreamFusion.html"><strong>DreamFusion</strong></a></li>
-<li class="toctree-l1"><a class="reference internal" href="magic-3d.html">Magic3D</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreamBooth3D.html">Dream Booth 3D</a></li>
-
-
-
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Experiments</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="../experiments/js_exp.html">Synthetic Data with Stable Diffusion for Foliar Disease Classification</a></li>
-<li class="toctree-l1"><a class="reference internal" href="../experiments/swjo_exp.html">Training DreamBooth on Naver Webtoon Face Dataset</a></li>
-</ul>
-
-    </div>
-</nav></div>
-    </div>
-  
-  
-  <div class="sidebar-primary-items__end sidebar-primary__section">
-  </div>
-  
-  <div id="rtd-footer-container"></div>
-
-
-      </div>
-      
-      <main id="main-content" class="bd-main" role="main">
-        
-        
-
-<div class="sbt-scroll-pixel-helper"></div>
-
-          <div class="bd-content">
-            <div class="bd-article-container">
-              
-              <div class="bd-header-article d-print-none">
-<div class="header-article-items header-article__inner">
-  
-    <div class="header-article-items__start">
-      
-        <div class="header-article-item"><label class="sidebar-toggle primary-toggle btn btn-sm" for="__primary" title="Toggle primary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
-  <span class="fa-solid fa-bars"></span>
-</label></div>
-      
-    </div>
-  
-  
-    <div class="header-article-items__end">
-      
-        <div class="header-article-item">
-
-<div class="article-header-buttons">
-
-
-
-
-
-<div class="dropdown dropdown-source-buttons">
-  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Source repositories">
-    <i class="fab fa-github"></i>
-  </button>
-  <ul class="dropdown-menu">
-      
-      
-      
-      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion" target="_blank"
-   class="btn btn-sm btn-source-repository-button dropdown-item"
-   title="Source repository"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fab fa-github"></i>
-  </span>
-<span class="btn__text-container">Repository</span>
-</a>
-</li>
-      
-      
-      
-      
-      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion/issues/new?title=Issue%20on%20page%20%2Fdocs/review/BBDM.html&body=Your%20issue%20content%20here." target="_blank"
-   class="btn btn-sm btn-source-issues-button dropdown-item"
-   title="Open an issue"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-lightbulb"></i>
-  </span>
-<span class="btn__text-container">Open issue</span>
-</a>
-</li>
-      
-  </ul>
-</div>
-
-
-
-
-
-
-<div class="dropdown dropdown-download-buttons">
-  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Download this page">
-    <i class="fas fa-download"></i>
-  </button>
-  <ul class="dropdown-menu">
-      
-      
-      
-      <li><a href="../../_sources/docs/review/BBDM.md" target="_blank"
-   class="btn btn-sm btn-download-source-button dropdown-item"
-   title="Download source file"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-file"></i>
-  </span>
-<span class="btn__text-container">.md</span>
-</a>
-</li>
-      
-      
-      
-      
-      <li>
-<button onclick="window.print()"
-  class="btn btn-sm btn-download-pdf-button dropdown-item"
-  title="Print to PDF"
-  data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-file-pdf"></i>
-  </span>
-<span class="btn__text-container">.pdf</span>
-</button>
-</li>
-      
-  </ul>
-</div>
-
-
-
-
-<button onclick="toggleFullScreen()"
-  class="btn btn-sm btn-fullscreen-button"
-  title="Fullscreen mode"
-  data-bs-placement="bottom" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-expand"></i>
-  </span>
-
-</button>
-
-
-
-<script>
-document.write(`
-  <button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light"></i>
-    <i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark"></i>
-    <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"></i>
-  </button>
-`);
-</script>
-
-
-<script>
-document.write(`
-  <button class="btn btn-sm pst-navbar-icon search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <i class="fa-solid fa-magnifying-glass fa-lg"></i>
-  </button>
-`);
-</script>
-
-</div></div>
-      
-    </div>
-  
-</div>
-</div>
-              
-              
-
-<div id="jb-print-docs-body" class="onlyprint">
-    <h1>BBDM</h1>
-    <!-- Table of contents -->
-    <div id="print-main-content">
-        <div id="jb-print-toc">
-            
-        </div>
-    </div>
-</div>
-
-              
-                
-<div id="searchbox"></div>
-                <article class="bd-article">
-                  
-  <div class="admonition-information admonition">
-<p class="admonition-title">Information</p>
-<ul class="simple">
-<li><p><strong>Title:</strong> {BBDM: Image-to-image Translation with Brownian Bridge Diffusion Models}, {CVPR 2023}</p></li>
-<li><p><strong>Reference</strong></p>
-<ul>
-<li><p>Paper:  <a class="reference external" href="https://arxiv.org/abs/2205.07680">https://arxiv.org/abs/2205.07680</a></p></li>
-<li><p>Code: <a class="github reference external" href="https://github.com/xuekt98/BBDM">xuekt98/BBDM</a></p></li>
-</ul>
-</li>
-<li><p><strong>Author:</strong> <a href="https://www.linkedin.com/in/seonhoonkim/">SeonHoon Kim</a></p></li>
-<li><p><strong>Edited by:</strong> <a href="https://www.linkedin.com/in/seonhoonkim/">SeonHoon Kim</a></p></li>
-<li><p><strong>Related Youtube:</strong> <a href="https://youtu.be/WbeofpQpj3M">Youtube video</a></p></li>
-<li><p><strong>Last updated on Nov. 13, 2023</strong></p></li>
-</ul>
-</div>
-<section class="tex2jax_ignore mathjax_ignore" id="bbdm">
-<h1>BBDM<a class="headerlink" href="#bbdm" title="Permalink to this heading">#</a></h1>
-<ul class="simple">
-<li><p><strong>BBDM</strong></p>
-<ul>
-<li><p>BBDM 은 Brownian Bridge 를 Diffusion Model 에 도입한 최초의 모델</p></li>
-<li><p>Image to Image Translation 분야에서 Conditional Diffusion Models 의 한계를 극복함</p></li>
-</ul>
-</li>
-</ul>
-<p><br>BBDM 을 이해하기 위해서는 Brownian motion process 와 Brownian Bridge 를 이해해야함. Brownian motion process 는 stochastic process 에 해당함.<br></p>
-<ul>
-<li><p><strong>Stochastic Process</strong></p>
-<ul class="simple">
-<li><p>시간의 흐름에 따라 불확실성을 가지고 변하는 확률 변수들의 집합</p></li>
-<li><p>Stochastic process 는 <span class="math notranslate nohighlight">\(X_t\)</span> 와 같이 나타낼 수 있는데, <br>
-여기서 X 는 확률 변수를,<br>
-t 는 확률 변수가 관찰된 시간을 나타냄</p></li>
-<li><p>X 와 t 는 각각 Discrete 혹은 Continuous 로 구분할 수 있음</p>
-<ul>
-<li><p>Discrete RANDOM VARIABLE &amp; Discrete TIME</p></li>
-<li><p>Discrete RANDOM VARIABLE &amp; Continuous TIME</p></li>
-<li><p><strong>Continuous RANDOM VARIABLE &amp; Discrete TIME</strong></p></li>
-<li><p><strong>Continuous RANDOM VARIABLE &amp; Continuous TIME</strong></p></li>
-</ul>
-</li>
-</ul>
-</li>
-<li><p><strong>Brownian Motion Process (Wiener Process) 소개</strong></p>
-<ul>
-<li><p><strong>Brownian Motion</strong></p>
-<ul class="simple">
-<li><p>유체의 미소입자가 불규칙하게 운동하는 현상</p></li>
-</ul>
-<figure class="align-default" id="id1">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_00.png"><img alt="img_00" class="bg-primary mb-1" src="../../_images/img_00.png" style="width: 400px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 335 </span><span class="caption-text">굴뚝에서 퍼져나간 연기 사진을 오른쪽으로 90도 회전시킨 사진</span><a class="headerlink" href="#id1" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</li>
-</ul>
-<p><br> 위 사진으로부터 Brownian motion process 를 직관적으로 이해해볼 수 있음.<br></p>
-<ul>
-<li><p><strong>Brownian Motion Process (Wiener Process)</strong></p>
-<ul class="simple">
-<li><p>Brownian Motion 을 연속 시간 확률 과정으로 모델링한 것</p></li>
-</ul>
-<figure class="align-default" id="id2">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_01.png"><img alt="img_01" class="bg-primary mb-1" src="../../_images/img_01.png" style="width: 2000px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 336 </span><span class="caption-text"><span class="math notranslate nohighlight">\(W_0\)</span> = 0 이고 max time T=1000 인 Wiener Process 를 100번 Sampling 한 결과</span><a class="headerlink" href="#id2" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</li>
-<li><p><strong>Brownian Motion Process (Wiener Process)</strong> 는<br>
-<strong>Continuous RANDOM VARIABLE &amp; Continuous TIME 를 갖는 Stochastic Process</strong> 로,<br>
-<span class="math notranslate nohighlight">\(W_t\)</span> 와 같이 나타낸다.</p></li>
-</ul>
-</li>
-<li><p><strong>Brownian Motion Process (Wiener Process) 를 이해해보자</strong></p>
-<ul>
-<li><p><strong>가정해보자</strong></p>
-<ol class="arabic simple">
-<li><p><span class="math notranslate nohighlight">\(t = 0 → W_t = W_0 = 0\)</span> 이라고 하자.</p></li>
-<li><p>쉽게 이해하기 위해, TIME t 가 Discrete 하다고 가정해보자.<br>
-(BBDM 은 t 를 정수 0~1000 으로 설정)</p></li>
-</ol>
-</li>
-<li><p><strong>Requirements</strong></p>
-<ol class="arabic simple">
-<li><p>Brownian Motion Process 는 Stochastic Process 이다. <br>
-<strong>TIME t 마다 stochasticity 가 부여되어야</strong> 한다.</p></li>
-<li><p><strong>시간 간격과 W 의 변화량이 비례해야 한다.</strong><br>
-(즉, 더 오래 지났을수록 더 많이 변한다.)</p></li>
-</ol>
-</li>
-<li><p><strong>Notation</strong></p>
-<figure class="align-default" id="id3">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_02.png"><img alt="img_02" class="bg-primary mb-1" src="../../_images/img_02.png" style="width: 400px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 337 </span><span class="caption-text">Source : <a class="reference external" href="https://www.youtube.com/watch?v=ld0rxwAJpkM&amp;ab_channel=finRGB">https://www.youtube.com/watch?v=ld0rxwAJpkM&amp;ab_channel=finRGB</a></span><a class="headerlink" href="#id3" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p><span class="math notranslate nohighlight">\(\Delta t\)</span> = 시간 간격</p></li>
-<li><p>n = 살펴보고자 하는 시간 간격의 수</p></li>
-<li><p><span class="math notranslate nohighlight">\(T = n * \Delta t\)</span></p></li>
-<li><p>i.i.d <span class="math notranslate nohighlight">\(\epsilon_t \sim N(0, 1)\)</span></p></li>
-<li><p><span class="math notranslate nohighlight">\(\Delta W_t\)</span> = t 시점에서 그 다음 시간 간격까지 증가한 W 의 값
-<span class="math notranslate nohighlight">\(= W_{t+\Delta t} - W_t\)</span>
-= <span class="math notranslate nohighlight">\(\epsilon_t \sqrt {\Delta t}\)</span></p></li>
-</ul>
-</li>
-<li><p><strong>이해</strong></p>
-<ul class="simple">
-<li><p><span class="math notranslate nohighlight">\(\Delta W_t = W_{t+\Delta t} - W_t = \epsilon_t \sqrt {\Delta t}\)</span> 라고 정의해 본 근거를
-위의 Requirements 에서 찾아보면..</p>
-<ul>
-<li><p><strong>확률 변수 <span class="math notranslate nohighlight">\(\epsilon\)</span> 를 도입함으로써 stochasticity 부여</strong></p></li>
-<li><p><span class="math notranslate nohighlight">\(\Delta t\)</span> 를 도입함으로써 <strong>시간 간격도 고려 가능</strong></p></li>
-</ul>
-</li>
-<li><p><strong>그렇다면 왜 하필 <span class="math notranslate nohighlight">\(\sqrt {\Delta t}\)</span> 를 곱했을까?</strong></p>
-<ol class="arabic simple">
-<li><p><span class="math notranslate nohighlight">\(\Delta t\)</span> 가 0 에 가까워질 때, <span class="math notranslate nohighlight">\(\sqrt{\Delta t}\)</span> 는 천천히 0 에 수렴함.
-<strong>만약 TIME t 가 continuous 하다면, <span class="math notranslate nohighlight">\(\Delta t\)</span> 는 매우 작은 값</strong>이 됨.
-<strong><span class="math notranslate nohighlight">\(\Delta W_t = \epsilon_t {\Delta t}\)</span> 라면 <span class="math notranslate nohighlight">\(\Delta W_t\)</span> 가 너무 작아짐.</strong></p></li>
-<li><p><span class="math notranslate nohighlight">\(\Delta t\)</span> 가 커질 때, <span class="math notranslate nohighlight">\(\sqrt{\Delta t}\)</span> 는 천천히 커짐</p></li>
-</ol>
-</li>
-<li><p><strong>주의할 사항</strong></p>
-<ul>
-<li><p>i.i.d <span class="math notranslate nohighlight">\(\epsilon_t \sim N(0, 1)\)</span> 이므로,
-<span class="math notranslate nohighlight">\(\Delta W_t = \epsilon_t \sqrt {\Delta t}\)</span> 에서 <span class="math notranslate nohighlight">\(\Delta W_0\)</span> 와 <span class="math notranslate nohighlight">\(\Delta W_1\)</span> 은 서로 독립인 것이 맞지만,
-<strong><span class="math notranslate nohighlight">\(W_0\)</span> 과 <span class="math notranslate nohighlight">\(W_1\)</span> 이 독립이라는 말은 아님.</strong></p></li>
-</ul>
-</li>
-<li><p><span class="math notranslate nohighlight">\(\Delta W_0 = \epsilon_0 \sqrt {\Delta t}\)</span> 이므로,
-<span class="math notranslate nohighlight">\(W_{\Delta t} = W_0 + \epsilon_0 \sqrt {\Delta t} = 0 + \epsilon_0 \sqrt {\Delta t} = \epsilon_0 \sqrt {\Delta t}\)</span></p></li>
-<li><p><span class="math notranslate nohighlight">\(\Delta W_{\Delta t} = \epsilon_{\Delta t} \sqrt {\Delta t}\)</span> 이므로,
-<span class="math notranslate nohighlight">\(W_{2\Delta t} = W_{\Delta t} + \epsilon_{\Delta t} \sqrt {\Delta t} = (\epsilon_0 +  \epsilon_{\Delta t}) * \sqrt {\Delta t}\)</span></p>
-<ul>
-<li><p><span class="math notranslate nohighlight">\(Var(\Delta W_{\Delta t}) = Var(\epsilon_{\Delta t} \sqrt {\Delta t}) = Var(\epsilon_{\Delta t}) * \sqrt {\Delta t}^2 = 1 * \sqrt {\Delta t}^2 = \Delta t\)</span></p></li>
-<li><p><span class="math notranslate nohighlight">\(\mathbb{E}(\Delta W_{\Delta t}) = \mathbb{E}(\epsilon_{\Delta t} \sqrt {\Delta t}) = \mathbb{E}(\epsilon_{\Delta t}) * \sqrt {\Delta t} = 0 * \sqrt {\Delta t} = 0\)</span></p></li>
-</ul>
-</li>
-<li><p><span class="math notranslate nohighlight">\(\Delta W_{T-\Delta t} = \epsilon_{T-\Delta t} \sqrt {\Delta t}\)</span>
-<span class="math notranslate nohighlight">\(W_T = (\epsilon_0 +  \epsilon_{\Delta t} + \epsilon_{2\Delta t} + ... + \epsilon_{T-\Delta t}) * \sqrt {\Delta t}\)</span></p>
-<ul>
-<li><p><span class="math notranslate nohighlight">\(\mathbb{E}(W_T) = 0\)</span></p></li>
-<li><p><span class="math notranslate nohighlight">\(Var(W_T) = n * \Delta t = T\)</span> (각각의 <span class="math notranslate nohighlight">\(\epsilon\)</span> 은 서로 i.i.d 이므로 공분산은 0)</p></li>
-<li><p>즉, <span class="math notranslate nohighlight">\(W_T \sim N(0,T)\)</span></p></li>
-</ul>
-</li>
-</ul>
-<figure class="align-default" id="id4">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_03.png"><img alt="img_03" class="bg-primary mb-1" src="../../_images/img_03.png" style="width: 400px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 338 </span><span class="caption-text">Source : <a class="reference external" href="https://www.youtube.com/watch?v=ld0rxwAJpkM&amp;ab_channel=finRGB">https://www.youtube.com/watch?v=ld0rxwAJpkM&amp;ab_channel=finRGB</a></span><a class="headerlink" href="#id4" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>파란색 점들은, Brownian Motion Process 를 1번 Sampling 한 결과임 (one representation) 를 나타냄<br></p>
-<figure class="align-default" id="id5">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_04.png"><img alt="img_04" class="bg-primary mb-1" src="../../_images/img_04.png" style="width: 400px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 339 </span><span class="caption-text">Source : <a class="reference external" href="https://www.youtube.com/watch?v=ld0rxwAJpkM&amp;ab_channel=finRGB">https://www.youtube.com/watch?v=ld0rxwAJpkM&amp;ab_channel=finRGB</a></span><a class="headerlink" href="#id5" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p>t=0 부터 t=T 까지 Wiener Process 를 수행하면,<br>
-<span class="math notranslate nohighlight">\(W_t\)</span> 는 <span class="math notranslate nohighlight">\(W_T - W_0\)</span> 만큼 변한다.</p>
-<ul>
-<li><p><span class="math notranslate nohighlight">\((W_T - W_0) \sim N(0, T-0)\)</span></p></li>
-<li><p><span class="math notranslate nohighlight">\((W_{t_2}-W_{t_1}) \sim N(0,t_2-t_1)\)</span></p>
-<ul>
-<li><p>ex. 5분 에서 10분으로 Wiener Process 를 진행하면, <span class="math notranslate nohighlight">\(W_5\)</span> 는 0 이 아닐 수 있으나, 그 변화량 <span class="math notranslate nohighlight">\((W_{t_{10}}-W_{t_5})\)</span> 은 N(0, 10 - 5) 를 따른다.</p></li>
-</ul>
-</li>
-</ul>
-</li>
-</ul>
-</li>
-</ul>
-</li>
-<li><p><strong>Brownian Bridge</strong></p>
-<ul class="simple">
-<li><p>X 가 Standard Wiener Process 라고 하자. <br>
-0 시점과 T 시점의 X 값을 알고,<br>
-0&lt;t&lt;T 일 때,
-두 점을 선형으로 연결하는 Linear Bridge X(t) 는 다음과 같다.</p></li>
-</ul>
-<figure class="align-default" id="id6">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_05.png"><img alt="img_05" class="bg-primary mb-1" src="../../_images/img_05.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 340 </span><span class="caption-text">Brownian Bridge 이해를 위한 Linear Bridge</span><a class="headerlink" href="#id6" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>Brownian Bridge 는 Standard Wiener Process 의 Conditional Probability Distribution 이다.<br>
-Starting state W(0) 과 Ending state W(T) 의 값에 Conditioned 되어 있다.<br>
-아래와 같이 정의될 수 있다.</p>
-<figure class="align-default" id="id7">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_06.png"><img alt="img_06" class="bg-primary mb-1" src="../../_images/img_06.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 341 </span><span class="caption-text">Brownian Bridge</span><a class="headerlink" href="#id7" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p><br>아래의 그림을 보면, 0 이라는 시작값과 123 이라는 마지막 값에 conditioned 되어 있는 것을 확인할 수 있다. <br> Brownian Bridge 의 분산은 0 에서 시작해서 증가하다가, T/2 시점에서 최대가 되었다가, 이후로는 감소하여 마지막엔 0 에 수렴하게된다.</p>
-<figure class="align-default" id="id8">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_08.png"><img alt="img_08" class="bg-primary mb-1" src="../../_images/img_08.png" style="width: 2000px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 342 </span><span class="caption-text"><span class="math notranslate nohighlight">\(W_0\)</span> = 0 에서 <span class="math notranslate nohighlight">\(W_1000\)</span> = 123 까지 100개의 Brownian Bridge 를 샘플링한 결과</span><a class="headerlink" href="#id8" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</li>
-<li><p><strong>Abstrcat</strong></p>
-<figure class="align-default" id="id9">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_09.png"><img alt="img_09" class="bg-primary mb-1" src="../../_images/img_09.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 343 </span><span class="caption-text">Conditional Diffusion Models 와 BBDM 의 비교</span><a class="headerlink" href="#id9" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p><strong>기존의 Diffusion 모델</strong>들은,  <br>
-Image-to-Image 변환을 <strong>Conditional generation process</strong> 로 다룸. <br>
-이로 인해, <strong>매우 상이한 도메인 사이의 변환</strong>에는 <strong>어려움</strong>이 있음.</p></li>
-<li><p>이를 <strong>해결하기 위해</strong>, <br>
-본 논문은 <strong>Brownian Bridge</strong> <strong>에 기반한 Image-to-Image 변환 방법을 제시</strong>함</p></li>
-<li><p><strong>BBDM</strong> 은 Conditional generation process 가 아닌 <br>
-<strong>Stochastic Brownian Bridge Process</strong> 로 두 도메인 사이의 변환을 모델링하므로, <br><strong>Bidirectional Diffusion Process</strong> 임.</p></li>
-<li><p>Brownian Bridge diffusion process 를 Image-to-Image 변환에 접목한 최초의 논문임</p></li>
-<li><p>BBDM 모델의 훌륭한 성능을 실험적으로 증명함<br></p></li>
-</ul>
-</li>
-</ul>
-<ol class="arabic">
-<li><p><strong>Introduction</strong></p>
-<ul>
-<li><p>I2I 변환에서 <strong>Non-diffusion models 의 한계</strong></p>
-<ul class="simple">
-<li><p>Pix2Pix 와 같은 <strong>conditional GANs</strong> 는 <strong>fideltiy 가 높았으나,</strong>
-학습이 어렵고, <strong>DIversity 가 떨어진다.</strong></p>
-<ul>
-<li><p>Diversity 가 떨어지는 이유 : conditional GANs 는 input image 를 output image 에 one-to-one mapping 하는 방법을 학습하기 때문</p></li>
-</ul>
-</li>
-<li><p><strong>VAE</strong> 같은 <strong>생성형 모델</strong>들은 GANs 만큼의 I2I 성능이 안나오고,
-<strong>Applicability</strong> 가 GANs 보다 <strong>떨어진다.</strong></p></li>
-</ul>
-</li>
-<li><p>I2I 변환에서 <strong>conditional diffusion models 의 한계</strong></p>
-<ul class="simple">
-<li><p>conditional diffusion models 는 <strong>reference image</strong> 의 encoded feature 를 <strong>직접 U-Net 에 통합</strong>시킴으로써 diffusion models 의 reverse process 를 guide 함</p></li>
-<li><p>하지만 이렇게 <strong>생성된 결과가 desired conditional distribution 을 추론해낸다는 명료한 이론적 근거가 없음</strong></p></li>
-<li><p>대부분의 <strong>conditional diffusion models 는 generalization 이 잘 안되므로,</strong>
-conditional input domain 과 output domain 이 유사한
-몇몇 applications 에서만 잘 활용될 수 있음</p>
-<ul>
-<li><p>ex. inpainting 혹은 super-resolution</p></li>
-</ul>
-</li>
-<li><p><strong>LDM</strong> 이 latent space 에서 diffusion process 를 수행함으로써
-<strong>generalization 을 개선</strong>하긴 했으나 <strong>여전히 conditional generation process</strong> 임</p></li>
-<li><p><strong>LDM</strong> 의 경우, <strong>복잡한 attention mechanism 으로 multi-modal condition</strong> 이 주어지므로, <strong>이론적 근거를 제시하기가 더 힘듦</strong></p></li>
-</ul>
-</li>
-<li><p><strong>본 논문에서 제안하는 BBDM 모델</strong></p>
-<figure class="align-default" id="id10">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_10.png"><img alt="img_10" class="bg-primary mb-1" src="../../_images/img_10.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 344 </span><span class="caption-text">BBDM 의 아키텍쳐</span><a class="headerlink" href="#id10" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p><strong>BBDM</strong> 모델은 <strong>input 과 output 도메인 간의 mapping</strong> 을
-<strong>Brownian Bridge stochastic process 를 통해 구축</strong>함</p></li>
-<li><p>가속을 위해 Latent space 에서 diffusion process 를 수행함<br></p></li>
-</ul>
-</li>
-</ul>
-<ol class="arabic">
-<li><p><strong>Related Work</strong><br></p>
-<ul>
-<li><p><strong>2.1. Image-to-Image Translation</strong></p>
-<ul class="simple">
-<li><p>introduction 참고<br></p></li>
-</ul>
-</li>
-<li><p><strong>2,2. Duffusion Models</strong><br></p>
-<ul class="simple">
-<li><p><strong>Diffusion Models</strong> 의 simplified <strong>objective</strong> 를 잠깐 살펴보면, 다음과 같음.</p></li>
-</ul>
-<figure class="align-default" id="id11">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_11.png"><img alt="img_11" class="bg-primary mb-1" src="../../_images/img_11.png" style="width: 300px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 345 </span><span class="caption-text">Diffusion Models 의 Simplified objective</span><a class="headerlink" href="#id11" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p>대부분의 <strong>conditional Diffusion Models</strong> 는 <strong>condition 을 objective 에 직접 “주입”</strong>.<br>
-아래의 그림을 보면, conditional input image y 가 삽입된 것을 볼 수 있음.</p></li>
-</ul>
-<figure class="align-default" id="id12">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_12.png"><img alt="img_12" class="bg-primary mb-1" src="../../_images/img_12.png" style="width: 300px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 346 </span><span class="caption-text">Conditional Diffusion Models 의 Simplified objective</span><a class="headerlink" href="#id12" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p><span class="math notranslate nohighlight">\(p(x_t|y)\)</span> 가 objective 에 드러나 있지 않으므로,
-<strong>desired conditional distribution 에 도달할 수 있을 것</strong>이라는 <strong>이론적 보장이 없음</strong><br></p></li>
-</ul>
-</li>
-<li><p><strong>2.3. Brownian Bridge</strong><br></p>
-<ul class="simple">
-<li><p><strong>Brownian Bridge</strong> 는 <strong>diffusion process 동안의 확률 분포가</strong>
-<strong>starting state (t=0)</strong> 와 <strong>ending state (t=T)</strong> 에 <strong>conditioned 되어 있는,</strong>
-<strong>time stochastic model</strong> 임</p></li>
-</ul>
-<figure class="align-default" id="id13">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_13.png"><img alt="img_13" class="bg-primary mb-1" src="../../_images/img_13.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 347 </span><span class="caption-text">식(3)</span><a class="headerlink" href="#id13" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>앞서 보았던 Brownian Bridge 의 평균과 분산을 구해보자.<br>
-위의 식과 같은 의미임을 알 수 있다.<br></p>
-<figure class="align-default" id="id14">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_06.png"><img alt="img_06" class="bg-primary mb-1" src="../../_images/img_06.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 348 </span><span class="caption-text">Brownian Bridge</span><a class="headerlink" href="#id14" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-  <br>
-</li>
-</ul>
-</li>
-<li><p><strong>Method</strong><br></p>
-<ul>
-<li><p><strong>3.1. Brownian Bridge Diffusion Model (BBDM)</strong><br></p>
-<ul class="simple">
-<li><p><strong>Conditional diffusion models</strong> : <strong>Gaussian noise 를 향해 Forward process 진행</strong></p></li>
-<li><p><strong>BBDM : conditional input y 자체를 향해 Brownian Bridge process 진행</strong><br></p></li>
-</ul>
-<figure class="align-default" id="id15">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_09.png"><img alt="img_09" class="bg-primary mb-1" src="../../_images/img_09.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 349 </span><span class="caption-text">Conditional Diffusion Models 와 BBDM 의 비교</span><a class="headerlink" href="#id15" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-  <br>
-<ul class="simple">
-<li><p>VQGAN 의 latent space 에서 diffusion process 를 수행</p></li>
-<li><p><strong>x</strong> 가 <strong>A 도메인 영상의 latent features</strong> 이고,<br>
-<strong>y</strong> 가 <strong>B 도메인 영상의 latent features</strong> 일 때, <br>
-<strong>Forward diffusion process 는 다음과 같이 정의</strong>됨</p></li>
-</ul>
-<figure class="align-default" id="id16">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_14.png"><img alt="img_14" class="bg-primary mb-1" src="../../_images/img_14.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 350 </span><span class="caption-text">식(4)</span><a class="headerlink" href="#id16" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p><strong>T</strong> 는 diffusion process 의 <strong>total steps</strong> 이다.</p></li>
-<li><p><span class="math notranslate nohighlight">\(δ_t\)</span> 는 <strong>분산</strong>이다.</p></li>
-<li><p>식 (3) 에 나타난 분산 <span class="math notranslate nohighlight">\(δ_t={t(T −t)\over T}\)</span> 를 사용하게 되면,
-<strong>가능한 최대 분산값</strong>은, <strong>middle step 인 <span class="math notranslate nohighlight">\(T\over 2\)</span> 에서의 분산값인 <span class="math notranslate nohighlight">\(δ_{T\over 2} = {T \over 4}\)</span> 가 됨</strong></p></li>
-<li><p>T 값이 커지면, 최대 분산값도 커지는데, <strong>이 분산 값은 다루기에 너무 큼</strong></p></li>
-<li><p><span class="math notranslate nohighlight">\(x_0,y \sim N(0,I)\)</span> 이면서 서로 독립일 때,
-Brownian Bridge diffusion process 를 위한 <strong>분산 scheduling</strong> 을
-다음과 같이 해볼 수 있다.</p></li>
-</ul>
-<figure class="align-default" id="id17">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_15.png"><img alt="img_15" class="bg-primary mb-1" src="../../_images/img_15.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 351 </span><span class="caption-text">Brownian Bridge diffusion process 를 위한 분산 Scheduling</span><a class="headerlink" href="#id17" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p>만약 t 는 양의 정수의 discrete time 이고, 그 최댓값인 T=1000 이라면
-<span class="math notranslate nohighlight">\(\delta_t\)</span> 는 아래 그림과 같게 된다.</p></li>
-</ul>
-<figure class="align-default" id="id18">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_16.png"><img alt="img_16" class="bg-primary mb-1" src="../../_images/img_16.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 352 </span><span class="caption-text"><span class="math notranslate nohighlight">\(\delta_t\)</span> 를 시각화한 결과</span><a class="headerlink" href="#id18" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-  <br>
-<p><span class="math notranslate nohighlight">\(m_t = t\overT\)</span> 이고, <span class="math notranslate nohighlight">\(\delta_t = 2(m_t - m_t^2)\)</span> 이므로,<br></p>
-<ul class="simple">
-<li><p>diffusion process 가 시작하는 <strong>t = 0 에서는, <span class="math notranslate nohighlight">\(m_0\)</span> = 0</strong> 이고,
-<strong>평균은 <span class="math notranslate nohighlight">\(x_0\)</span></strong> 이며
-<strong>분산은 0</strong> 이 된다.<br></p></li>
-<li><p>diffusion process 가 끝나는 <strong>t = T 에서는,</strong>
-<span class="math notranslate nohighlight">\(m_T\)</span> <strong>= 1</strong> 이고,
-<strong>평균은 y</strong> 이고,
-<strong>분산은 0</strong> 이 된다.<br></p></li>
-<li><p><strong>분산이,</strong>
-diffusion process 의 <strong>중간 지점까지는 최대 0.5 까지 증가</strong>하다가,<br>
-중간 지점부터 <strong>끝나는 지점까지는 0 으로 감소</strong></p></li>
-<li><p><strong>Brownian Bridge diffusion process</strong> 에서의 <strong>sampling diversity</strong> 는
-<strong>최대 분산값,<br> 즉 middle step 인 <span class="math notranslate nohighlight">\(t = {T\over 2}\)</span> 에서의 분산값에 의해 결정</strong>됨</p></li>
-<li><p><strong>분산을 스케일링하는 변수 s</strong> <strong>를 두어</strong> <strong>sampling diversity 를 조절</strong>할 수 있다.</p></li>
-</ul>
-<figure class="align-default" id="id19">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_17.png"><img alt="img_17" class="bg-primary mb-1" src="../../_images/img_17.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 353 </span><span class="caption-text">식(5) : sampling diversity 조절을 위한 계수 s 가 포함된 분산 scheduling</span><a class="headerlink" href="#id19" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p>이 논문에서 <strong>s 의 디폴트 값은 1</strong>
-<br></p></li>
-</ul>
-</li>
-<li><p><strong>3.1.1 Forward Process</strong><br></p>
-<ul class="simple">
-<li><p><strong>식 (4)</strong> 에서는 <strong>step t 에서의 marginal distribution 만 제공</strong></p></li>
-<li><p><strong>training 과 inference process 를 위해</strong>서는 <strong>forward transition probability</strong> 인 <span class="math notranslate nohighlight">\(q_{BB}(x_t|x_{t-1}, y)\)</span> 를 알아야함</p></li>
-<li><p><strong>식 (4) 에 의해, <span class="math notranslate nohighlight">\(x_0\)</span> 와 <span class="math notranslate nohighlight">\(y\)</span> 가 주어졌을 때의 <span class="math notranslate nohighlight">\(x_t\)</span> 와</strong> <span class="math notranslate nohighlight">\(x_{t-1}\)</span> 은 다음과 같이 쓸 수 있음</p></li>
-</ul>
-<figure class="align-default" id="id20">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_14.png"><img alt="img_14" class="bg-primary mb-1" src="../../_images/img_14.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 354 </span><span class="caption-text">식(4)</span><a class="headerlink" href="#id20" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<figure class="align-default" id="id21">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_18.png"><img alt="img_18" class="bg-primary mb-1" src="../../_images/img_18.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 355 </span><span class="caption-text">식(6) &amp; 식(7)</span><a class="headerlink" href="#id21" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p>참고. 위 식 (7) 의 <span class="math notranslate nohighlight">\(m_ty\)</span> 는 <span class="math notranslate nohighlight">\(m_{t-1}y\)</span> 로 쓰는 것이 옳음</p></li>
-</ul>
-<figure class="align-default" id="id22">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_19.png"><img alt="img_19" class="bg-primary mb-1" src="../../_images/img_19.png" style="width: 300px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 356 </span><span class="caption-text"><span class="math notranslate nohighlight">\(\epsilon\)</span> 은 i.i.d 하게 N(0, I) 를 따른다</span><a class="headerlink" href="#id22" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p><strong>식 (6) 의 <span class="math notranslate nohighlight">\(x_0\)</span> 를 식 (7) 의 <span class="math notranslate nohighlight">\(x_0\)</span> 로 대체</strong>하면,
-<strong>Forward transition probability <span class="math notranslate nohighlight">\(q_{BB}(x_t|x_{t-1}, y)\)</span></strong> 가 아래의 <strong>식 (8)</strong> 과 같이 유도됨</p></li>
-</ul>
-<figure class="align-default" id="id23">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_20.png"><img alt="img_20" class="bg-primary mb-1" src="../../_images/img_20.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 357 </span><span class="caption-text">식(8)</span><a class="headerlink" href="#id23" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p>증명</p>
-<ul>
-<li><p>식(7) 을 다음과 같이 쓸 수 있음</p>
-<ul>
-<li><p><span class="math notranslate nohighlight">\(x_0 = {x_{t-1}-m_{t-1}y-\sqrt {\delta_{t-1}} \epsilon_{t-1} \over 1-m_{t-1}}\)</span></p></li>
-</ul>
-</li>
-<li><p>식(6) 의 <span class="math notranslate nohighlight">\(x_0\)</span> 에 위의 <span class="math notranslate nohighlight">\(x_0\)</span> 를 대입</p>
-<ul>
-<li><p><span class="math notranslate nohighlight">\(x_t = {(1-m_t)x_{t-1} \over (1-m_{t-1})} - {(1-m_t)m_{t-1}y \over (1-m_{t-1})} - {(1-m_t)\sqrt {\delta_{t-1}}\epsilon_{t-1} \over (1-m_{t-1})} + m_ty + \sqrt{\delta_t} \epsilon_t\)</span></p></li>
-<li><p><span class="math notranslate nohighlight">\(= {(1-m_t)x_{t-1} \over (1-m_{t-1})} + y(m_t - {(1-m_t) \over (1-m_{t-1})}m_{t-1}) + \sqrt {\delta_t}\epsilon_t - {(1-m_t)\sqrt {\delta_{t-1}}\epsilon_{t-1} \over (1-m_{t-1})}\)</span></p></li>
-</ul>
-</li>
-</ul>
-</li>
-<li><p>이후, <span class="math notranslate nohighlight">\(Var(x_t)\)</span> 를 구하면, 아래의 <span class="math notranslate nohighlight">\(\delta_{t|t-1}\)</span> 와 같이 유도됨</p></li>
-</ul>
-<figure class="align-default" id="id24">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_21.png"><img alt="img_21" class="bg-primary mb-1" src="../../_images/img_21.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 358 </span><span class="caption-text"><span class="math notranslate nohighlight">\(\delta_{t|t-1}\)</span> 식</span><a class="headerlink" href="#id24" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p>t=T 가 될 때 <span class="math notranslate nohighlight">\(m_T = 1\)</span> 인데, 이때 식(8) 에 의해 <span class="math notranslate nohighlight">\(x_T = y\)</span> 임.
-↓
-”아, Forward diffusion process 는 확실히..
-A 도메인으로부터 B 도메인으로의 fixed mapping 을 정의하는구나”</p></li>
-</ul>
-</li>
-<li><p><strong>3.1.2 Reverse Process</strong><br></p>
-<ul class="simple">
-<li><p><strong>conditional diffusion models</strong> 의 <strong>reverse process</strong> 는, <br>
-<strong>Gaussian noise 로부터 시작</strong>하며, <br>
-매 스텝마다 조금씩 noise 를 제거해나감<br></p></li>
-<li><p>반면, <strong>BBDM 의 Brownian Bridge process 는 <span class="math notranslate nohighlight">\(x_T = y\)</span> 로 둠으로써, <br>
-conditional input 그 자체에서 Reverse process 를 시작</strong>함</p></li>
-</ul>
-<figure class="align-default" id="id25">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_22.png"><img alt="img_22" class="bg-primary mb-1" src="../../_images/img_22.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 359 </span><span class="caption-text">식(9)</span><a class="headerlink" href="#id25" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p><span class="math notranslate nohighlight">\(\mu_\theta (x_t,t)\)</span> 는 U-Net 에 의해 예측된 노이즈 평균값이며, <span class="math notranslate nohighlight">\(\tilde{\delta_t}\)</span> 는 노이즈의 분산</p></li>
-<li><p>DDPM 처럼, 임의의 parameters <span class="math notranslate nohighlight">\(\theta\)</span> 를 갖는 신경망 <strong>U-Net 은 <span class="math notranslate nohighlight">\(\mu_\theta (x_t,t)\)</span> 를 학습</strong></p></li>
-</ul>
-</li>
-<li><p><strong>3.1.3. Training Objective</strong></p>
-<ul class="simple">
-<li><p><strong>참고.</strong></p>
-<ul>
-<li><p>예전 <strong>DDPM 의 Loss</strong> 는 다음과 같았음.</p></li>
-</ul>
-</li>
-</ul>
-<figure class="align-default" id="id26">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_23.png"><img alt="img_23" class="bg-primary mb-1" src="../../_images/img_23.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 360 </span><span class="caption-text">DDPM 의 Loss</span><a class="headerlink" href="#id26" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p>그리고, 이로부터 simplified 된 <strong>objective</strong> 는 다음과 같음</p></li>
-</ul>
-<figure class="align-default" id="id27">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_11.png"><img alt="img_11" class="bg-primary mb-1" src="../../_images/img_11.png" style="width: 300px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 361 </span><span class="caption-text">DDPM 의 simplified objective</span><a class="headerlink" href="#id27" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p><strong>Brownian Bridge diffusion process</strong> 의 <strong>ELBO</strong></p></li>
-</ul>
-<figure class="align-default" id="id28">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_24.png"><img alt="img_24" class="bg-primary mb-1" src="../../_images/img_24.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 362 </span><span class="caption-text">식(10) : BBDM 의 ELBO</span><a class="headerlink" href="#id28" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p><strong>첫 번째 term :</strong> <span class="math notranslate nohighlight">\(x_T\)</span> 가 곧 y 이므로 무시할 수 있음</p></li>
-<li><p><strong>세 번째 term</strong> : 매우 작은 값이 되므로 무시할 수 있음</p></li>
-<li><p><strong>베이즈 이론과 Markov chain property 를 식 (4) 와 식 (8) 에 적용</strong>하여,
-다음과 같이 <strong>식 (11) 이 도출</strong>된다.</p></li>
-<li><p>참고. Markovian Chain</p>
-<ul>
-<li><p><span class="math notranslate nohighlight">\(q(x_t|x_{t-1}) = q(x_t|x_{t-1}, x_{t-2}, … , x_0)\)</span></p></li>
-<li><p>Markov chain property 에 의해,<br>
-<span class="math notranslate nohighlight">\(q_{BB}(x_t|x_{t-1},y) = q_{BB}(x_t|x_{t-1},x_0,y)\)</span> 가 성립됨을 활용</p></li>
-</ul>
-</li>
-<li><p>식(4)</p></li>
-</ul>
-<figure class="align-default" id="id29">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_14.png"><img alt="img_14" class="bg-primary mb-1" src="../../_images/img_14.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 363 </span><span class="caption-text">식(4)</span><a class="headerlink" href="#id29" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p>식(8)</p></li>
-</ul>
-<figure class="align-default" id="id30">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_20.png"><img alt="img_20" class="bg-primary mb-1" src="../../_images/img_20.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 364 </span><span class="caption-text">식(8)</span><a class="headerlink" href="#id30" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p>식(11) &amp; 식(13)</p></li>
-</ul>
-<figure class="align-default" id="id31">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_25.png"><img alt="img_25" class="bg-primary mb-1" src="../../_images/img_25.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 365 </span><span class="caption-text">식(11)</span><a class="headerlink" href="#id31" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<figure class="align-default" id="id32">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_26.png"><img alt="img_26" class="bg-primary mb-1" src="../../_images/img_26.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 366 </span><span class="caption-text">식(13)</span><a class="headerlink" href="#id32" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p>증명</p></li>
-<li><p><span class="math notranslate nohighlight">\({q_{BB}(x_{t}|x_{t-1},y)q_{BB}(x_{t-1}|x_{0},y)\over q_{BB}(x_{t}|x_{0},y)}\)</span></p></li>
-<li><p><span class="math notranslate nohighlight">\(= {{q_{BB}(x_{t},x_{t-1},y) \over q_{BB}(x_{t-1},y)} {q_{BB}(x_{t-1},x_{0},y) \over q_{BB}(x_{0},y)} \over {q_{BB}(x_{t},x_{0},y)\over  q_{BB}(x_{0},y)}}\)</span></p></li>
-<li><p><span class="math notranslate nohighlight">\(= q_{BB}(x_{t}|x_{t-1},y){q_{BB}(x_{t-1},x_{0},y)\over q_{BB}(x_{t},x_{0},y)}\)</span></p></li>
-<li><p><span class="math notranslate nohighlight">\(= q_{BB}(x_{t}|x_{t-1},x_{0},y){q_{BB}(x_{t-1},x_{0},y)\over q_{BB}(x_{t},x_{0},y)}\)</span></p></li>
-<li><p><span class="math notranslate nohighlight">\(= {q_{BB}(x_{t},x_{t-1},x_{0},y)\over q_{BB}(x_{t},x_{0},y)}\)</span></p></li>
-<li><p><span class="math notranslate nohighlight">\(= q_{BB}(x_{t-1}|x_{t},x_{0},y)\)</span></p></li>
-<li><p>위 식 (11) 의 평균은, 식 (12) 와 같이 정리됨</p></li>
-</ul>
-<figure class="align-default" id="id33">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_27.png"><img alt="img_27" class="bg-primary mb-1" src="../../_images/img_27.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 367 </span><span class="caption-text">식(12)</span><a class="headerlink" href="#id33" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p>식(4) 와 식(12) 를 통합하고 Reparameterization method 를 사용해서
-<span class="math notranslate nohighlight">\(\tilde {\mu_t}\)</span> 를 다음과 같이 변형할 수 있음</p></li>
-</ul>
-<figure class="align-default" id="id34">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_28.png"><img alt="img_28" class="bg-primary mb-1" src="../../_images/img_28.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 368 </span><span class="caption-text">식(12) 의 변형</span><a class="headerlink" href="#id34" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p>참고. 식(4)</p></li>
-</ul>
-<figure class="align-default" id="id35">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_14.png"><img alt="img_14" class="bg-primary mb-1" src="../../_images/img_14.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 369 </span><span class="caption-text">식(4)</span><a class="headerlink" href="#id35" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p>하지만, 실제로 U-Net 은 전체 <span class="math notranslate nohighlight">\(\tilde {\mu_t}\)</span> 를 예측하는 것이 아니라,
-노이즈를 예측하도록 학습됨.</p></li>
-<li><p>이 내용을 식에 명시하기 위해,
-<strong>식(9) 에 명시된 <span class="math notranslate nohighlight">\(\mu_\theta\)</span> 를 식(14) 와 같이 다시 써볼 수 있음.<br>
-<span class="math notranslate nohighlight">\(x_t\)</span> 와 y, 그리고 예측된 노이즈 <span class="math notranslate nohighlight">\(\epsilon_\theta\)</span> 의 linear combination 으로 다시 써보는</strong> 것임.</p></li>
-</ul>
-<figure class="align-default" id="id36">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_22.png"><img alt="img_22" class="bg-primary mb-1" src="../../_images/img_22.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 370 </span><span class="caption-text">식(9)</span><a class="headerlink" href="#id36" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<figure class="align-default" id="id37">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_29.png"><img alt="img_29" class="bg-primary mb-1" src="../../_images/img_29.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 371 </span><span class="caption-text">식(14)</span><a class="headerlink" href="#id37" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p>그런데, 아래 그림을 참고해보면 우리는 <span class="math notranslate nohighlight">\(\tilde {\mu_t}\)</span> 에 근사하도록 <span class="math notranslate nohighlight">\(\mu_\theta\)</span> 를 학습시켜야함.</p></li>
-</ul>
-<figure class="align-default" id="id38">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_30.png"><img alt="img_30" class="bg-primary mb-1" src="../../_images/img_30.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 372 </span><span class="caption-text"><span class="math notranslate nohighlight">\(\tilde {\mu}_t\)</span> 의 정리된 식</span><a class="headerlink" href="#id38" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul>
-<li><p>즉, <span class="math notranslate nohighlight">\(\epsilon_\theta (x_t,t)\)</span> 가 <span class="math notranslate nohighlight">\(m_t(y-x_0)+\sqrt {\delta_t}\epsilon\)</span> 을 근사하도록 학습되어야하는 것임.</p></li>
-<li><p>ELBO 의 두 번째 term 을 다시 살펴보면,<br></p>
-<ul class="simple">
-<li><p><strong>두 번째 term</strong> : <span class="math notranslate nohighlight">\(D_{KL}(q_{BB}(x_{t-1}|x_t, x_0, y)||p_\theta (x_{t-1}|x_t,y))\)</span><br></p>
-<ul>
-<li><p><span class="math notranslate nohighlight">\(arg \space min_\theta \space D_{KL}(q_{BB}(x_{t-1}|x_t, x_0, y)||p_\theta (x_{t-1}|x_t,y))\)</span>
-=<span class="math notranslate nohighlight">\(arg \space min_\theta \space (\tilde {\mu}_t(x_t,y) - \mu_\theta (x_t,y,t))\)</span>
-=<span class="math notranslate nohighlight">\(arg \space min_\theta \space (c_{\epsilon_t}(m_t(y-x_0) + \sqrt {\delta_t}\epsilon) - c_{\epsilon_t}\epsilon_\theta(x_t,t))\)</span>
-=<span class="math notranslate nohighlight">\(arg \space min_\theta \space (c_{\epsilon_t} (m_t(y-x_0) + \sqrt {\delta_t}\epsilon - \epsilon_\theta(x_t,t)))\)</span><br></p></li>
-</ul>
-</li>
-<li><p>따라서, ELBO 는 다음과 같이 단순화될 수 있음</p></li>
-</ul>
-<figure class="align-default" id="id39">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_31.png"><img alt="img_31" class="bg-primary mb-1" src="../../_images/img_31.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 373 </span><span class="caption-text">BBDM 의 Simplified ELBO</span><a class="headerlink" href="#id39" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</li>
-<li><p><strong>Training Algorithm 정리</strong></p></li>
-</ul>
-<figure class="align-default" id="id40">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_32.png"><img alt="img_32" class="bg-primary mb-1" src="../../_images/img_32.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 374 </span><span class="caption-text">Algorithm 1 : Training. 마치 DDPM 에서 그러했듯이, BBDM 도 실제 Loss 에는 Simplified ELBO 에서의 계수 <span class="math notranslate nohighlight">\(C_{\epsilon_t}\)</span> 가 빠진 것을 확인할 수 있다.</span><a class="headerlink" href="#id40" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</li>
-<li><p><strong>3.2. Accelerated Sampling Processes</strong><br></p>
-<ul class="simple">
-<li><p><strong>DDIM 과 비슷하게, BBDM 의 inference processes</strong> 도
-<strong>non-Markovian process 를 사용해서 가속시킬 수 있음</strong></p></li>
-<li><p>Sampling steps 의 길이를 S 라고 두었을 때,
-<strong>inference process</strong> 는 <strong>latent varibales <span class="math notranslate nohighlight">\(x_{1:T}\)</span> 의 subset</strong> 에 의해 다음과 같이 정의됨</p></li>
-<li><p><strong>latent varibales <span class="math notranslate nohighlight">\(x_{1:T}\)</span> 의 subset</strong></p></li>
-</ul>
-<figure class="align-default" id="id41">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_33.png"><img alt="img_33" class="bg-primary mb-1" src="../../_images/img_33.png" style="width: 300px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 375 </span><span class="caption-text"><strong>latent varibales <span class="math notranslate nohighlight">\(x_{1:T}\)</span> 의 subset</strong></span><a class="headerlink" href="#id41" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p><strong>inference process</strong></p></li>
-</ul>
-<figure class="align-default" id="id42">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_34.png"><img alt="img_34" class="bg-primary mb-1" src="../../_images/img_34.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 376 </span><span class="caption-text">inference process</span><a class="headerlink" href="#id42" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p><strong>Sampling Algorithm</strong></p></li>
-</ul>
-<figure class="align-default" id="id43">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_35.png"><img alt="img_35" class="bg-primary mb-1" src="../../_images/img_35.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 377 </span><span class="caption-text">Algorithm 2 : Sampling</span><a class="headerlink" href="#id43" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p>본 논문에서는 <strong>S 값의 디폴트</strong>를 <strong>200</strong> 으로 두었음<br></p></li>
-</ul>
-</li>
-</ul>
-</li>
-<li><p><strong>Experiments</strong><br></p>
-<ul>
-<li><p><strong>4.1. Experiment Setup</strong><br></p>
-<ul>
-<li><p><strong>모델 &amp; 하이퍼마라미터</strong></p>
-<ul class="simple">
-<li><p>BBDM 프레임워크는 pretrained VQGAN 과 BBDM 으로 이루어짐</p></li>
-<li><p><strong>Latent Diffusion Model 에서 사용된 것과 같은 pretrained VQGAN 을 사용</strong></p></li>
-<li><p>training stage 에서의 time steps 는 1,000</p></li>
-<li><p>inference stage 에서의 sampling steps 는 200<br></p></li>
-</ul>
-</li>
-<li><p><strong>Evaluation</strong></p>
-<ul class="simple">
-<li><p>FID 와 LPIPS 사용</p></li>
-<li><p>생성물의 diversity 를 평가하기 위해서,
-하나의 conditional input y 마다 5개의 샘플을 생성하고,
-각 픽셀 마다의 표준편차의 평균을 구함.
-그 후 전체 test 데이터셋에 대해서 평균 냄.<br></p></li>
-</ul>
-</li>
-<li><p><strong>Datasets</strong></p>
-<ul class="simple">
-<li><p>BBDM 의 I2I 변환 능력을 평가하기 위해서, 여러 task 로 실험함<br></p></li>
-</ul>
-<ol class="arabic simple">
-<li><p><strong>Semantic Synthesis 능력</strong>을 CelebAMask-HQ dataset 으로 실험</p>
-<ol class="arabic simple">
-<li><p>semantic layout 만 주고 photorealistic 한 images 를 생성해내는 능력 평가<br></p></li>
-</ol>
-</li>
-<li><p><strong>sketch-to-photo 능력</strong>을 edges2shoes 와 edges2handbags 로 실험</p>
-<ol class="arabic simple">
-<li><p>edges 만 주고 realistic images 생성해내는 능력 평가<br></p></li>
-</ol>
-</li>
-<li><p><strong>style transfer 능력</strong>을 faces2comics 로 실험</p>
-<ol class="arabic simple">
-<li><p>위 두 실험은 서로 상이한 domains 간의 변환 능력을 평가했다면,
-Style transfer 실험에서는 서로 비슷한 domains 간의 I2I 변환 능력을 평가<br></p></li>
-</ol>
-</li>
-</ol>
-</li>
-</ul>
-</li>
-<li><p><strong>4.2. Qualitative Comparison</strong><br></p>
-<figure class="align-default" id="id44">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_36.png"><img alt="img_36" class="bg-primary mb-1" src="../../_images/img_36.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 378 </span><span class="caption-text">Figure 3. CelebAMask-HQ 데이터셋에 대한 추론 결과</span><a class="headerlink" href="#id44" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<figure class="align-default" id="id45">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_37.png"><img alt="img_37" class="bg-primary mb-1" src="../../_images/img_37.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 379 </span><span class="caption-text">Figure 4. 다른 Image-to-Image 변환 task 에 대한 추론 결과</span><a class="headerlink" href="#id45" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<figure class="align-default" id="id46">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_38.png"><img alt="img_38" class="bg-primary mb-1" src="../../_images/img_38.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 380 </span><span class="caption-text">Figure 5. 다른 Image-to-Image 변환 task 에 대한 추론 결과</span><a class="headerlink" href="#id46" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p>Pix2Pix 는 지도 학습 방식으로 학습하므로, 괜찮은 결과를 냄</p></li>
-<li><p>반면 <strong>CycleGAN</strong> 은 <strong>작은 스케일의 데이터셋</strong>에서는 <strong>성능이 떨어짐</strong></p></li>
-<li><p>DRIT++ 은 GAN 기반 모델들 중에서는 좋은 성능을 냈으나,
-변환된 이미지들이 oversmoothed 되어 있었고,
-ground truth distribution 과는 거리가 멀었음</p></li>
-<li><p>conditional diffusion model 인 <strong>CDE</strong> 와 <strong>LDM</strong> 은
-GAN 기반 모델들보다는 <strong>좋은 성능</strong>을 냈으나,
-<strong>conditional information 에 큰 영향</strong>을 받음</p>
-<ul>
-<li><p><strong>Figure 3 의 첫 번째 줄</strong>을 보면 i<strong>rregular occlusions</strong> 가 나타나는데,
-<strong>CDE 와 LDM 은 이에 큰 영향</strong>을 받음</p></li>
-</ul>
-</li>
-<li><p>반면 <strong>BBDM 은 두 도메인 간의 직접적인 diffusion process 를 학습</strong>하므로
-<strong>이러한 문제로부터 자유로움</strong></p></li>
-<li><p>또한 Brownian Bridge 의 stochastic 한 특성으로 인해
-fidelity 와 diversity 가 높은 이미지들을 생성해냄<br></p></li>
-</ul>
-</li>
-<li><p><strong>4.3. Quantitative Comparison</strong><br></p>
-<ul class="simple">
-<li><p>Table 1 과 2 를 보면, BBDM 이 모든 실험에서 가장 좋은 FID 값을 기록했으며, 훌륭한 LPIPS 값을 기록함</p></li>
-</ul>
-<figure class="align-default" id="id47">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_39.png"><img alt="img_39" class="bg-primary mb-1" src="../../_images/img_39.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 381 </span><span class="caption-text">Table 1. CelebAMask-HQ 데이터셋에 대한 FID, LPIPS 성능은 BBDM 이 가장 뛰어남</span><a class="headerlink" href="#id47" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<figure class="align-default" id="id48">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_40.png"><img alt="img_40" class="bg-primary mb-1" src="../../_images/img_40.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 382 </span><span class="caption-text">Table 2. BBDM 은 FID, LPIPS 점수가 매우 뛰어났음</span><a class="headerlink" href="#id48" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</li>
-<li><p><strong>4.4. 다른 Translation Tasks</strong><br></p>
-<ul class="simple">
-<li><p><strong>BBDM 의 generalization 성능을 검증</strong>하기 위해서, 다른 tasks 에 대해서도 실험했음</p></li>
-<li><p>아래 그림과 같이, <strong>다른 tasks 에서도 camparable  한 성능을 기</strong>록함</p></li>
-</ul>
-<figure class="align-default" id="id49">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_41.png"><img alt="img_41" class="bg-primary mb-1" src="../../_images/img_41.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 383 </span><span class="caption-text">Figure 6. Face-to-label, 색상화, inpainting 등의 다른 tasks 에서도 뛰어난 성능을 기록함</span><a class="headerlink" href="#id49" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</li>
-<li><p><strong>4.5. Ablation Study</strong><br></p>
-<ul>
-<li><p><strong>pre-trained latent space 의 영향</strong></p>
-<figure class="align-default" id="id50">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_42.png"><img alt="img_42" class="bg-primary mb-1" src="../../_images/img_42.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 384 </span><span class="caption-text">Table 3. BBDM 은 LDM 에 비해 Downsampling factor 에 대해 robust 했음</span><a class="headerlink" href="#id50" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p><strong>BBDM 과 LDM</strong> 에 대해서,
-<strong>VQGAN downsampling factor</strong> 를 <strong>각각 4, 8, 16 으로 두고 성능 비교 실험 수행</strong></p></li>
-<li><p><strong>BBDM 은 down sampling factor 에 robust</strong> 했음<br></p></li>
-</ul>
-</li>
-<li><p><strong>Sampling steps 의 영향</strong></p>
-<ul class="simple">
-<li><p><strong>Sampling steps 가 작을 때 (200 이하) 는, 조금만 늘려도 성능이 크게 증가</strong><br></p></li>
-</ul>
-<figure class="align-default" id="id51">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_43.png"><img alt="img_43" class="bg-primary mb-1" src="../../_images/img_43.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 385 </span><span class="caption-text">Table 4. 200 이상의 Sampling Steps 에서는 Steps 를 키워도 성능 변화가 미미함</span><a class="headerlink" href="#id51" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-  <br>
-</li>
-<li><p><strong>Brownian Bridge 의 maximum variance 의 영향</strong></p>
-<figure class="align-default" id="id52">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_44.png"><img alt="img_44" class="bg-primary mb-1" src="../../_images/img_44.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 386 </span><span class="caption-text">Table 5. Sampling diversity 조절 계수에 의해 실제로 Diversity 가 조절 되었음</span><a class="headerlink" href="#id52" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p>식 (5) 에 나타난 것처럼, <strong>scaling factor s 의 값을 변경</strong>함으로써,
-<strong>Brownian Bridge 의 최대 분산값 (t = T/2 일 때의 분산값) 조절 가능.</strong>
-<strong>이렇게 diversity 조절 가능.</strong></p></li>
-</ul>
-<figure class="align-default" id="id53">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_17.png"><img alt="img_17" class="bg-primary mb-1" src="../../_images/img_17.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 387 </span><span class="caption-text">식(5)</span><a class="headerlink" href="#id53" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</li>
-</ul>
-</li>
-</ul>
-</li>
-<li><p><strong>Conclusion and Future Work</strong></p>
-<ul class="simple">
-<li><p><strong>Brownian Bridge 에 기반한 새로운 I2I 변환 방법 제시</strong></p></li>
-<li><p>이 방법은 기존의 conditional 한 방법과 달리,
-<strong>Brownian Bridge diffusion process 를 통해 두 도메인 간의 mapping 을 직접 학습</strong></p></li>
-<li><p><strong>여러 tasks 에서의 실험을 통해 BBDM 의 성능 검증</strong></p></li>
-<li><p>text-to-image 와 같은 multi-modal tasks 에도 BBDM 을 적용해볼 예정</p></li>
-</ul>
-</li>
-</ol>
-</li>
-</ol>
-<ul class="simple">
-<li><p><strong>참고 자료</strong></p>
-<ul>
-<li><p><a class="reference external" href="https://www.youtube.com/watch?v=ld0rxwAJpkM&amp;ab_channel=finRGB">https://www.youtube.com/watch?v=ld0rxwAJpkM&amp;ab_channel=finRGB</a></p></li>
-<li><p><a class="reference external" href="https://sine-qua-none.tistory.com/158">https://sine-qua-none.tistory.com/158</a></p></li>
-</ul>
-</li>
-</ul>
-</section>
-
-    <script type="text/x-thebe-config">
-    {
-        requestKernel: true,
-        binderOptions: {
-            repo: "binder-examples/jupyter-stacks-datascience",
-            ref: "master",
-        },
-        codeMirrorConfig: {
-            theme: "abcdef",
-            mode: "python"
-        },
-        kernelOptions: {
-            name: "python3",
-            path: "./docs/review"
-        },
-        predefinedOutput: true
-    }
-    </script>
-    <script>kernelName = 'python3'</script>
-
-                </article>
-              
-
-              
-              
-              
-              
-                <footer class="prev-next-footer d-print-none">
-                  
-<div class="prev-next-area">
-    <a class="left-prev"
-       href="GLIDE.html"
-       title="previous page">
-      <i class="fa-solid fa-angle-left"></i>
-      <div class="prev-next-info">
-        <p class="prev-next-subtitle">previous</p>
-        <p class="prev-next-title">GLIDE</p>
-      </div>
-    </a>
-    <a class="right-next"
-       href="Your_Diffusion_Model_is_Secretly_a_Zero_Shot_Classifier.html"
-       title="next page">
-      <div class="prev-next-info">
-        <p class="prev-next-subtitle">next</p>
-        <p class="prev-next-title">Your Diffusion Model is Secretly a Zero-Shot Classifier</p>
-      </div>
-      <i class="fa-solid fa-angle-right"></i>
-    </a>
-</div>
-                </footer>
-              
-            </div>
-            
-            
-              
-            
-          </div>
-          <footer class="bd-footer-content">
-            
-<div class="bd-footer-content__inner container">
-  
-  <div class="footer-item">
-    
-<p class="component-author">
-By PseudoLab
-</p>
-
-  </div>
-  
-  <div class="footer-item">
-    
-
-  <p class="copyright">
-    
-      © Copyright 2022.
-      <br/>
-    
-  </p>
-
-  </div>
-  
-  <div class="footer-item">
-    
-  </div>
-  
-  <div class="footer-item">
-    
-  </div>
-  
-</div>
-          </footer>
-        
-
-      </main>
-    </div>
-  </div>
-  
-  <!-- Scripts loaded after <body> so the DOM is not blocked -->
-  <script src="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b"></script>
-<script src="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b"></script>
-
-  <footer class="bd-footer">
-  </footer>
-  </body>
+
+
+<!DOCTYPE html>
+
+
+<html lang="en" data-content_root="" >
+
+  <head>
+    <meta charset="utf-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.17.1: http://docutils.sourceforge.net/" />
+
+    <title>BBDM &#8212; Text-to-Image Generation-feat-Diffusion</title>
+  
+  
+  
+  <script data-cfasync="false">
+    document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
+    document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
+  </script>
+  
+  <!-- Loaded before other Sphinx assets -->
+  <link href="../../_static/styles/theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+<link href="../../_static/styles/bootstrap.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+<link href="../../_static/styles/pydata-sphinx-theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+
+  
+  <link href="../../_static/vendor/fontawesome/6.5.2/css/all.min.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+  <link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.woff2" />
+<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.woff2" />
+<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.woff2" />
+
+    <link rel="stylesheet" type="text/css" href="../../_static/pygments.css" />
+    <link rel="stylesheet" href="../../_static/styles/sphinx-book-theme.css?digest=14f4ca6b54d191a8c7657f6c759bf11a5fb86285" type="text/css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/togglebutton.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/mystnb.4510f1fc1dee50b3e5859aac5469c37c29e427902b24a333a5f9fcb2f0b3ac41.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/sphinx-thebe.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/design-style.4045f2051d55cab465a707391d5b2007.min.css" />
+  
+  <!-- Pre-loaded scripts that we'll load fully later -->
+  <link rel="preload" as="script" href="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b" />
+<link rel="preload" as="script" href="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b" />
+  <script src="../../_static/vendor/fontawesome/6.5.2/js/all.min.js?digest=dfe6caa3a7d634c4db9b"></script>
+
+    <script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
+    <script src="../../_static/jquery.js"></script>
+    <script src="../../_static/underscore.js"></script>
+    <script src="../../_static/_sphinx_javascript_frameworks_compat.js"></script>
+    <script src="../../_static/doctools.js"></script>
+    <script src="../../_static/clipboard.min.js"></script>
+    <script src="../../_static/copybutton.js"></script>
+    <script src="../../_static/scripts/sphinx-book-theme.js?digest=5a5c038af52cf7bc1a1ec88eea08e6366ee68824"></script>
+    <script>let toggleHintShow = 'Click to show';</script>
+    <script>let toggleHintHide = 'Click to hide';</script>
+    <script>let toggleOpenOnPrint = 'true';</script>
+    <script src="../../_static/togglebutton.js"></script>
+    <script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
+    <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
+    <script src="../../_static/design-tabs.js"></script>
+    <script>const THEBE_JS_URL = "https://unpkg.com/thebe@0.8.2/lib/index.js"
+const thebe_selector = ".thebe,.cell"
+const thebe_selector_input = "pre"
+const thebe_selector_output = ".output, .cell_output"
+</script>
+    <script async="async" src="../../_static/sphinx-thebe.js"></script>
+    <script>window.MathJax = {"options": {"processHtmlClass": "tex2jax_process|mathjax_process|math|output_area"}}</script>
+    <script defer="defer" src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
+    <script>DOCUMENTATION_OPTIONS.pagename = 'docs/review/BBDM';</script>
+    <link rel="shortcut icon" href="../../_static/PseudoLab_logo.png"/>
+    <link rel="index" title="Index" href="../../genindex.html" />
+    <link rel="search" title="Search" href="../../search.html" />
+    <link rel="next" title="Your Diffusion Model is Secretly a Zero-Shot Classifier" href="Your_Diffusion_Model_is_Secretly_a_Zero_Shot_Classifier.html" />
+    <link rel="prev" title="GLIDE" href="GLIDE.html" />
+  <meta name="viewport" content="width=device-width, initial-scale=1"/>
+  <meta name="docsearch:language" content="en"/>
+  </head>
+  
+  
+  <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
+
+  
+  
+  <div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
+  
+  <div id="pst-scroll-pixel-helper"></div>
+  
+  <button type="button" class="btn rounded-pill" id="pst-back-to-top">
+    <i class="fa-solid fa-arrow-up"></i>Back to top</button>
+
+  
+  <input type="checkbox"
+          class="sidebar-toggle"
+          id="pst-primary-sidebar-checkbox"/>
+  <label class="overlay overlay-primary" for="pst-primary-sidebar-checkbox"></label>
+  
+  <input type="checkbox"
+          class="sidebar-toggle"
+          id="pst-secondary-sidebar-checkbox"/>
+  <label class="overlay overlay-secondary" for="pst-secondary-sidebar-checkbox"></label>
+  
+  <div class="search-button__wrapper">
+    <div class="search-button__overlay"></div>
+    <div class="search-button__search-container">
+<form class="bd-search d-flex align-items-center"
+      action="../../search.html"
+      method="get">
+  <i class="fa-solid fa-magnifying-glass"></i>
+  <input type="search"
+         class="form-control"
+         name="q"
+         id="search-input"
+         placeholder="Search this book..."
+         aria-label="Search this book..."
+         autocomplete="off"
+         autocorrect="off"
+         autocapitalize="off"
+         spellcheck="false"/>
+  <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
+</form></div>
+  </div>
+
+  <div class="pst-async-banner-revealer d-none">
+  <aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
+</div>
+
+  
+    <header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
+    </header>
+  
+
+  <div class="bd-container">
+    <div class="bd-container__inner bd-page-width">
+      
+      
+      
+      <div class="bd-sidebar-primary bd-sidebar">
+        
+
+  
+  <div class="sidebar-header-items sidebar-primary__section">
+    
+    
+    
+    
+  </div>
+  
+    <div class="sidebar-primary-items__start sidebar-primary__section">
+        <div class="sidebar-primary-item">
+
+  
+    
+  
+
+<a class="navbar-brand logo" href="../../intro.html">
+  
+  
+  
+  
+  
+    
+    
+      
+    
+    
+    <img src="../../_static/PseudoLab_logo.png" class="logo__image only-light" alt="Text-to-Image Generation-feat-Diffusion - Home"/>
+    <script>document.write(`<img src="../../_static/PseudoLab_logo.png" class="logo__image only-dark" alt="Text-to-Image Generation-feat-Diffusion - Home"/>`);</script>
+  
+  
+</a></div>
+        <div class="sidebar-primary-item"><nav class="bd-links" id="bd-docs-nav" aria-label="Main">
+    <div class="bd-toc-item navbar-nav active">
+        
+        <ul class="nav bd-sidenav bd-sidenav__home-link">
+            <li class="toctree-l1">
+                <a class="reference internal" href="../../intro.html">
+                    Welcome to PseudoDiffusers!!
+                </a>
+            </li>
+        </ul>
+        <p aria-level="2" class="caption" role="heading"><span class="caption-text">Preliminary Works</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="vae.html">VAE</a></li>
+<li class="toctree-l1"><a class="reference internal" href="gan.html">GAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DDPM.html">DDPM</a></li>
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DDIM.html">DDIM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="A_Study_on_the_Evaluation_of_Generative_Models.html">A Study on the Evaluation of Generative Models</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Image Generation</span></p>
+<ul class="current nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="cycleGAN.html">CycleGAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="StyleGAN.html">StyleGAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="diffusion_beats_GANs.html">Diffusion Models Beat GANs on Image Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="dalle.html">DALL-E</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DALLE2.html">DALL-E 2</a></li>
+<li class="toctree-l1"><a class="reference internal" href="dreambooth.html">DreamBooth</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ControlNet.html">ControlNet</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Latent_Diffusion_Model.html">Introduction</a></li>
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Textual_Inversion.html">Textual Inversion</a></li>
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="CustomDiffusion.html">Custom Diffusion</a></li>
+<li class="toctree-l1"><a class="reference internal" href="LoRA.html">LoRA</a></li>
+
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="I-DDPM.html">I-DDPM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="StyO.html">StyO</a></li>
+<li class="toctree-l1"><a class="reference internal" href="imagen.html">Imagen</a></li>
+<li class="toctree-l1"><a class="reference internal" href="imagen_editor.html">Imagen Editor</a></li>
+<li class="toctree-l1"><a class="reference internal" href="SDEdit.html">SDEdit</a></li>
+<li class="toctree-l1"><a class="reference internal" href="SDXL.html">SDXL</a></li>
+<li class="toctree-l1"><a class="reference internal" href="t2i_adapter.html">T2I-Adapter</a></li>
+<li class="toctree-l1"><a class="reference internal" href="IP_Adapter.html">IP-Adapter</a></li>
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="HyperDreamBooth.html">HyperDreamBooth</a></li>
+<li class="toctree-l1"><a class="reference internal" href="CM3leon.html">CM3leon</a></li>
+
+<li class="toctree-l1"><a class="reference internal" href="Synthetic_Data_from_Diffusion_Models_Improves_ImageNet_Classification.html">Synthetic Data from Diffusion Models Improves ImageNet Classification</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="GLIDE.html">GLIDE</a></li>
+<li class="toctree-l1 current active"><a class="current reference internal" href="#">BBDM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Your_Diffusion_Model_is_Secretly_a_Zero_Shot_Classifier.html">Your Diffusion Model is Secretly a Zero-Shot Classifier</a></li>
+<li class="toctree-l1"><a class="reference internal" href="progressive_distillation.html">Progressive Distillation for Fast Sampling of Diffusion Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ConceptLab.html">ConceptLab</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Diffusion_models_already_have_a_Semantic_Latent_Space.html">Diffusion Models already have a Semantic Latent Space</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Muse.html">Muse</a></li>
+
+
+<li class="toctree-l1"><a class="reference internal" href="GIGAGAN.html">Scaling up GANs for Text-to-Image Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="consistency_models.html">Consistency Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="latent_consistency_models.html">Latent Consistency Models</a></li>
+
+<li class="toctree-l1"><a class="reference internal" href="LLM_grounded_Diffusion.html">LLM Grounded Diffusion</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DiT.html">DiT</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="one-step-image-translation.html">One-Step Image Translation with Text-to-Image Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="LCM-LoRA.html">LCM-LoRA: A Universal Stable-Diffusion Acceleration Module</a></li>
+
+
+<li class="toctree-l1"><a class="reference internal" href="MimicBrush.html">MimicBrush: Zero-shot Image Editing with Reference Imitation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="one_step_diffusion_with_distribution_matching_distillation.html">One-step Diffusion with Distribution Matching Distillation</a></li>
+
+
+
+
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Video Generation</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="Make_A_Video.html">Make A Video</a></li>
+<li class="toctree-l1"><a class="reference internal" href="VideoLDM.html">VideoLDM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="AnimateDiff.html">AnimateDiff</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Animate_Anyone.html">Animate Anyone</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreaMoving.html">DreaMoving</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreamPose.html">DreamPose: Fashion Image-to-Video Synthesis via Stable Diffusion</a></li>
+
+
+
+
+
+
+
+
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">3D Generation</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="NeRF.html">NeRF : Representing Scenes as Neural Radiance Fields for View Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="3DGS.html">3D Gaussian Splatting for Real-Time Radiance Field Rendering</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Point_E.html">Point-E: A System for Generating 3D Point Clouds from Complex Prompts (Arxiv 2022)</a></li>
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Shap-E.html">Shap-E</a></li>
+
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DreamFusion.html"><strong>DreamFusion</strong></a></li>
+<li class="toctree-l1"><a class="reference internal" href="magic-3d.html">Magic3D</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreamBooth3D.html">Dream Booth 3D</a></li>
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="zero123plus.html">Zero123++</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ProlificDreamer.html">ProlificDreamer: High-Fidelity and Diverse Text-to-3D Generation with Variational Score Distillation</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DreamGaussian.html">DreamGaussian</a></li>
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Coin3D.html">Coin3D</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Experiments</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="../experiments/js_exp.html">Synthetic Data with Stable Diffusion for Foliar Disease Classification</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../experiments/swjo_exp.html">Training DreamBooth on Naver Webtoon Face Dataset</a></li>
+</ul>
+
+    </div>
+</nav></div>
+    </div>
+  
+  
+  <div class="sidebar-primary-items__end sidebar-primary__section">
+  </div>
+  
+  <div id="rtd-footer-container"></div>
+
+
+      </div>
+      
+      <main id="main-content" class="bd-main" role="main">
+        
+        
+
+<div class="sbt-scroll-pixel-helper"></div>
+
+          <div class="bd-content">
+            <div class="bd-article-container">
+              
+              <div class="bd-header-article d-print-none">
+<div class="header-article-items header-article__inner">
+  
+    <div class="header-article-items__start">
+      
+        <div class="header-article-item"><label class="sidebar-toggle primary-toggle btn btn-sm" for="__primary" title="Toggle primary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
+  <span class="fa-solid fa-bars"></span>
+</label></div>
+      
+    </div>
+  
+  
+    <div class="header-article-items__end">
+      
+        <div class="header-article-item">
+
+<div class="article-header-buttons">
+
+
+
+
+
+<div class="dropdown dropdown-source-buttons">
+  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Source repositories">
+    <i class="fab fa-github"></i>
+  </button>
+  <ul class="dropdown-menu">
+      
+      
+      
+      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion" target="_blank"
+   class="btn btn-sm btn-source-repository-button dropdown-item"
+   title="Source repository"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fab fa-github"></i>
+  </span>
+<span class="btn__text-container">Repository</span>
+</a>
+</li>
+      
+      
+      
+      
+      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion/issues/new?title=Issue%20on%20page%20%2Fdocs/review/BBDM.html&body=Your%20issue%20content%20here." target="_blank"
+   class="btn btn-sm btn-source-issues-button dropdown-item"
+   title="Open an issue"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-lightbulb"></i>
+  </span>
+<span class="btn__text-container">Open issue</span>
+</a>
+</li>
+      
+  </ul>
+</div>
+
+
+
+
+
+
+<div class="dropdown dropdown-download-buttons">
+  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Download this page">
+    <i class="fas fa-download"></i>
+  </button>
+  <ul class="dropdown-menu">
+      
+      
+      
+      <li><a href="../../_sources/docs/review/BBDM.md" target="_blank"
+   class="btn btn-sm btn-download-source-button dropdown-item"
+   title="Download source file"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-file"></i>
+  </span>
+<span class="btn__text-container">.md</span>
+</a>
+</li>
+      
+      
+      
+      
+      <li>
+<button onclick="window.print()"
+  class="btn btn-sm btn-download-pdf-button dropdown-item"
+  title="Print to PDF"
+  data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-file-pdf"></i>
+  </span>
+<span class="btn__text-container">.pdf</span>
+</button>
+</li>
+      
+  </ul>
+</div>
+
+
+
+
+<button onclick="toggleFullScreen()"
+  class="btn btn-sm btn-fullscreen-button"
+  title="Fullscreen mode"
+  data-bs-placement="bottom" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-expand"></i>
+  </span>
+
+</button>
+
+
+
+<script>
+document.write(`
+  <button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light"></i>
+    <i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark"></i>
+    <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"></i>
+  </button>
+`);
+</script>
+
+
+<script>
+document.write(`
+  <button class="btn btn-sm pst-navbar-icon search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <i class="fa-solid fa-magnifying-glass fa-lg"></i>
+  </button>
+`);
+</script>
+
+</div></div>
+      
+    </div>
+  
+</div>
+</div>
+              
+              
+
+<div id="jb-print-docs-body" class="onlyprint">
+    <h1>BBDM</h1>
+    <!-- Table of contents -->
+    <div id="print-main-content">
+        <div id="jb-print-toc">
+            
+        </div>
+    </div>
+</div>
+
+              
+                
+<div id="searchbox"></div>
+                <article class="bd-article">
+                  
+  <div class="admonition-information admonition">
+<p class="admonition-title">Information</p>
+<ul class="simple">
+<li><p><strong>Title:</strong> {BBDM: Image-to-image Translation with Brownian Bridge Diffusion Models}, {CVPR 2023}</p></li>
+<li><p><strong>Reference</strong></p>
+<ul>
+<li><p>Paper:  <a class="reference external" href="https://arxiv.org/abs/2205.07680">https://arxiv.org/abs/2205.07680</a></p></li>
+<li><p>Code: <a class="github reference external" href="https://github.com/xuekt98/BBDM">xuekt98/BBDM</a></p></li>
+</ul>
+</li>
+<li><p><strong>Author:</strong> <a href="https://www.linkedin.com/in/seonhoonkim/">SeonHoon Kim</a></p></li>
+<li><p><strong>Edited by:</strong> <a href="https://www.linkedin.com/in/seonhoonkim/">SeonHoon Kim</a></p></li>
+<li><p><strong>Related Youtube:</strong> <a href="https://youtu.be/WbeofpQpj3M">Youtube video</a></p></li>
+<li><p><strong>Last updated on Nov. 13, 2023</strong></p></li>
+</ul>
+</div>
+<section class="tex2jax_ignore mathjax_ignore" id="bbdm">
+<h1>BBDM<a class="headerlink" href="#bbdm" title="Permalink to this heading">#</a></h1>
+<ul class="simple">
+<li><p><strong>BBDM</strong></p>
+<ul>
+<li><p>BBDM 은 Brownian Bridge 를 Diffusion Model 에 도입한 최초의 모델</p></li>
+<li><p>Image to Image Translation 분야에서 Conditional Diffusion Models 의 한계를 극복함</p></li>
+</ul>
+</li>
+</ul>
+<p><br>BBDM 을 이해하기 위해서는 Brownian motion process 와 Brownian Bridge 를 이해해야함. Brownian motion process 는 stochastic process 에 해당함.<br></p>
+<ul>
+<li><p><strong>Stochastic Process</strong></p>
+<ul class="simple">
+<li><p>시간의 흐름에 따라 불확실성을 가지고 변하는 확률 변수들의 집합</p></li>
+<li><p>Stochastic process 는 <span class="math notranslate nohighlight">\(X_t\)</span> 와 같이 나타낼 수 있는데, <br>
+여기서 X 는 확률 변수를,<br>
+t 는 확률 변수가 관찰된 시간을 나타냄</p></li>
+<li><p>X 와 t 는 각각 Discrete 혹은 Continuous 로 구분할 수 있음</p>
+<ul>
+<li><p>Discrete RANDOM VARIABLE &amp; Discrete TIME</p></li>
+<li><p>Discrete RANDOM VARIABLE &amp; Continuous TIME</p></li>
+<li><p><strong>Continuous RANDOM VARIABLE &amp; Discrete TIME</strong></p></li>
+<li><p><strong>Continuous RANDOM VARIABLE &amp; Continuous TIME</strong></p></li>
+</ul>
+</li>
+</ul>
+</li>
+<li><p><strong>Brownian Motion Process (Wiener Process) 소개</strong></p>
+<ul>
+<li><p><strong>Brownian Motion</strong></p>
+<ul class="simple">
+<li><p>유체의 미소입자가 불규칙하게 운동하는 현상</p></li>
+</ul>
+<figure class="align-default" id="id1">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_00.png"><img alt="img_00" class="bg-primary mb-1" src="../../_images/img_00.png" style="width: 400px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 335 </span><span class="caption-text">굴뚝에서 퍼져나간 연기 사진을 오른쪽으로 90도 회전시킨 사진</span><a class="headerlink" href="#id1" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</li>
+</ul>
+<p><br> 위 사진으로부터 Brownian motion process 를 직관적으로 이해해볼 수 있음.<br></p>
+<ul>
+<li><p><strong>Brownian Motion Process (Wiener Process)</strong></p>
+<ul class="simple">
+<li><p>Brownian Motion 을 연속 시간 확률 과정으로 모델링한 것</p></li>
+</ul>
+<figure class="align-default" id="id2">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_01.png"><img alt="img_01" class="bg-primary mb-1" src="../../_images/img_01.png" style="width: 2000px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 336 </span><span class="caption-text"><span class="math notranslate nohighlight">\(W_0\)</span> = 0 이고 max time T=1000 인 Wiener Process 를 100번 Sampling 한 결과</span><a class="headerlink" href="#id2" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</li>
+<li><p><strong>Brownian Motion Process (Wiener Process)</strong> 는<br>
+<strong>Continuous RANDOM VARIABLE &amp; Continuous TIME 를 갖는 Stochastic Process</strong> 로,<br>
+<span class="math notranslate nohighlight">\(W_t\)</span> 와 같이 나타낸다.</p></li>
+</ul>
+</li>
+<li><p><strong>Brownian Motion Process (Wiener Process) 를 이해해보자</strong></p>
+<ul>
+<li><p><strong>가정해보자</strong></p>
+<ol class="arabic simple">
+<li><p><span class="math notranslate nohighlight">\(t = 0 → W_t = W_0 = 0\)</span> 이라고 하자.</p></li>
+<li><p>쉽게 이해하기 위해, TIME t 가 Discrete 하다고 가정해보자.<br>
+(BBDM 은 t 를 정수 0~1000 으로 설정)</p></li>
+</ol>
+</li>
+<li><p><strong>Requirements</strong></p>
+<ol class="arabic simple">
+<li><p>Brownian Motion Process 는 Stochastic Process 이다. <br>
+<strong>TIME t 마다 stochasticity 가 부여되어야</strong> 한다.</p></li>
+<li><p><strong>시간 간격과 W 의 변화량이 비례해야 한다.</strong><br>
+(즉, 더 오래 지났을수록 더 많이 변한다.)</p></li>
+</ol>
+</li>
+<li><p><strong>Notation</strong></p>
+<figure class="align-default" id="id3">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_02.png"><img alt="img_02" class="bg-primary mb-1" src="../../_images/img_02.png" style="width: 400px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 337 </span><span class="caption-text">Source : <a class="reference external" href="https://www.youtube.com/watch?v=ld0rxwAJpkM&amp;ab_channel=finRGB">https://www.youtube.com/watch?v=ld0rxwAJpkM&amp;ab_channel=finRGB</a></span><a class="headerlink" href="#id3" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p><span class="math notranslate nohighlight">\(\Delta t\)</span> = 시간 간격</p></li>
+<li><p>n = 살펴보고자 하는 시간 간격의 수</p></li>
+<li><p><span class="math notranslate nohighlight">\(T = n * \Delta t\)</span></p></li>
+<li><p>i.i.d <span class="math notranslate nohighlight">\(\epsilon_t \sim N(0, 1)\)</span></p></li>
+<li><p><span class="math notranslate nohighlight">\(\Delta W_t\)</span> = t 시점에서 그 다음 시간 간격까지 증가한 W 의 값
+<span class="math notranslate nohighlight">\(= W_{t+\Delta t} - W_t\)</span>
+= <span class="math notranslate nohighlight">\(\epsilon_t \sqrt {\Delta t}\)</span></p></li>
+</ul>
+</li>
+<li><p><strong>이해</strong></p>
+<ul class="simple">
+<li><p><span class="math notranslate nohighlight">\(\Delta W_t = W_{t+\Delta t} - W_t = \epsilon_t \sqrt {\Delta t}\)</span> 라고 정의해 본 근거를
+위의 Requirements 에서 찾아보면..</p>
+<ul>
+<li><p><strong>확률 변수 <span class="math notranslate nohighlight">\(\epsilon\)</span> 를 도입함으로써 stochasticity 부여</strong></p></li>
+<li><p><span class="math notranslate nohighlight">\(\Delta t\)</span> 를 도입함으로써 <strong>시간 간격도 고려 가능</strong></p></li>
+</ul>
+</li>
+<li><p><strong>그렇다면 왜 하필 <span class="math notranslate nohighlight">\(\sqrt {\Delta t}\)</span> 를 곱했을까?</strong></p>
+<ol class="arabic simple">
+<li><p><span class="math notranslate nohighlight">\(\Delta t\)</span> 가 0 에 가까워질 때, <span class="math notranslate nohighlight">\(\sqrt{\Delta t}\)</span> 는 천천히 0 에 수렴함.
+<strong>만약 TIME t 가 continuous 하다면, <span class="math notranslate nohighlight">\(\Delta t\)</span> 는 매우 작은 값</strong>이 됨.
+<strong><span class="math notranslate nohighlight">\(\Delta W_t = \epsilon_t {\Delta t}\)</span> 라면 <span class="math notranslate nohighlight">\(\Delta W_t\)</span> 가 너무 작아짐.</strong></p></li>
+<li><p><span class="math notranslate nohighlight">\(\Delta t\)</span> 가 커질 때, <span class="math notranslate nohighlight">\(\sqrt{\Delta t}\)</span> 는 천천히 커짐</p></li>
+</ol>
+</li>
+<li><p><strong>주의할 사항</strong></p>
+<ul>
+<li><p>i.i.d <span class="math notranslate nohighlight">\(\epsilon_t \sim N(0, 1)\)</span> 이므로,
+<span class="math notranslate nohighlight">\(\Delta W_t = \epsilon_t \sqrt {\Delta t}\)</span> 에서 <span class="math notranslate nohighlight">\(\Delta W_0\)</span> 와 <span class="math notranslate nohighlight">\(\Delta W_1\)</span> 은 서로 독립인 것이 맞지만,
+<strong><span class="math notranslate nohighlight">\(W_0\)</span> 과 <span class="math notranslate nohighlight">\(W_1\)</span> 이 독립이라는 말은 아님.</strong></p></li>
+</ul>
+</li>
+<li><p><span class="math notranslate nohighlight">\(\Delta W_0 = \epsilon_0 \sqrt {\Delta t}\)</span> 이므로,
+<span class="math notranslate nohighlight">\(W_{\Delta t} = W_0 + \epsilon_0 \sqrt {\Delta t} = 0 + \epsilon_0 \sqrt {\Delta t} = \epsilon_0 \sqrt {\Delta t}\)</span></p></li>
+<li><p><span class="math notranslate nohighlight">\(\Delta W_{\Delta t} = \epsilon_{\Delta t} \sqrt {\Delta t}\)</span> 이므로,
+<span class="math notranslate nohighlight">\(W_{2\Delta t} = W_{\Delta t} + \epsilon_{\Delta t} \sqrt {\Delta t} = (\epsilon_0 +  \epsilon_{\Delta t}) * \sqrt {\Delta t}\)</span></p>
+<ul>
+<li><p><span class="math notranslate nohighlight">\(Var(\Delta W_{\Delta t}) = Var(\epsilon_{\Delta t} \sqrt {\Delta t}) = Var(\epsilon_{\Delta t}) * \sqrt {\Delta t}^2 = 1 * \sqrt {\Delta t}^2 = \Delta t\)</span></p></li>
+<li><p><span class="math notranslate nohighlight">\(\mathbb{E}(\Delta W_{\Delta t}) = \mathbb{E}(\epsilon_{\Delta t} \sqrt {\Delta t}) = \mathbb{E}(\epsilon_{\Delta t}) * \sqrt {\Delta t} = 0 * \sqrt {\Delta t} = 0\)</span></p></li>
+</ul>
+</li>
+<li><p><span class="math notranslate nohighlight">\(\Delta W_{T-\Delta t} = \epsilon_{T-\Delta t} \sqrt {\Delta t}\)</span>
+<span class="math notranslate nohighlight">\(W_T = (\epsilon_0 +  \epsilon_{\Delta t} + \epsilon_{2\Delta t} + ... + \epsilon_{T-\Delta t}) * \sqrt {\Delta t}\)</span></p>
+<ul>
+<li><p><span class="math notranslate nohighlight">\(\mathbb{E}(W_T) = 0\)</span></p></li>
+<li><p><span class="math notranslate nohighlight">\(Var(W_T) = n * \Delta t = T\)</span> (각각의 <span class="math notranslate nohighlight">\(\epsilon\)</span> 은 서로 i.i.d 이므로 공분산은 0)</p></li>
+<li><p>즉, <span class="math notranslate nohighlight">\(W_T \sim N(0,T)\)</span></p></li>
+</ul>
+</li>
+</ul>
+<figure class="align-default" id="id4">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_03.png"><img alt="img_03" class="bg-primary mb-1" src="../../_images/img_03.png" style="width: 400px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 338 </span><span class="caption-text">Source : <a class="reference external" href="https://www.youtube.com/watch?v=ld0rxwAJpkM&amp;ab_channel=finRGB">https://www.youtube.com/watch?v=ld0rxwAJpkM&amp;ab_channel=finRGB</a></span><a class="headerlink" href="#id4" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>파란색 점들은, Brownian Motion Process 를 1번 Sampling 한 결과임 (one representation) 를 나타냄<br></p>
+<figure class="align-default" id="id5">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_04.png"><img alt="img_04" class="bg-primary mb-1" src="../../_images/img_04.png" style="width: 400px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 339 </span><span class="caption-text">Source : <a class="reference external" href="https://www.youtube.com/watch?v=ld0rxwAJpkM&amp;ab_channel=finRGB">https://www.youtube.com/watch?v=ld0rxwAJpkM&amp;ab_channel=finRGB</a></span><a class="headerlink" href="#id5" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p>t=0 부터 t=T 까지 Wiener Process 를 수행하면,<br>
+<span class="math notranslate nohighlight">\(W_t\)</span> 는 <span class="math notranslate nohighlight">\(W_T - W_0\)</span> 만큼 변한다.</p>
+<ul>
+<li><p><span class="math notranslate nohighlight">\((W_T - W_0) \sim N(0, T-0)\)</span></p></li>
+<li><p><span class="math notranslate nohighlight">\((W_{t_2}-W_{t_1}) \sim N(0,t_2-t_1)\)</span></p>
+<ul>
+<li><p>ex. 5분 에서 10분으로 Wiener Process 를 진행하면, <span class="math notranslate nohighlight">\(W_5\)</span> 는 0 이 아닐 수 있으나, 그 변화량 <span class="math notranslate nohighlight">\((W_{t_{10}}-W_{t_5})\)</span> 은 N(0, 10 - 5) 를 따른다.</p></li>
+</ul>
+</li>
+</ul>
+</li>
+</ul>
+</li>
+</ul>
+</li>
+<li><p><strong>Brownian Bridge</strong></p>
+<ul class="simple">
+<li><p>X 가 Standard Wiener Process 라고 하자. <br>
+0 시점과 T 시점의 X 값을 알고,<br>
+0&lt;t&lt;T 일 때,
+두 점을 선형으로 연결하는 Linear Bridge X(t) 는 다음과 같다.</p></li>
+</ul>
+<figure class="align-default" id="id6">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_05.png"><img alt="img_05" class="bg-primary mb-1" src="../../_images/img_05.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 340 </span><span class="caption-text">Brownian Bridge 이해를 위한 Linear Bridge</span><a class="headerlink" href="#id6" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>Brownian Bridge 는 Standard Wiener Process 의 Conditional Probability Distribution 이다.<br>
+Starting state W(0) 과 Ending state W(T) 의 값에 Conditioned 되어 있다.<br>
+아래와 같이 정의될 수 있다.</p>
+<figure class="align-default" id="id7">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_06.png"><img alt="img_06" class="bg-primary mb-1" src="../../_images/img_06.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 341 </span><span class="caption-text">Brownian Bridge</span><a class="headerlink" href="#id7" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p><br>아래의 그림을 보면, 0 이라는 시작값과 123 이라는 마지막 값에 conditioned 되어 있는 것을 확인할 수 있다. <br> Brownian Bridge 의 분산은 0 에서 시작해서 증가하다가, T/2 시점에서 최대가 되었다가, 이후로는 감소하여 마지막엔 0 에 수렴하게된다.</p>
+<figure class="align-default" id="id8">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_08.png"><img alt="img_08" class="bg-primary mb-1" src="../../_images/img_08.png" style="width: 2000px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 342 </span><span class="caption-text"><span class="math notranslate nohighlight">\(W_0\)</span> = 0 에서 <span class="math notranslate nohighlight">\(W_1000\)</span> = 123 까지 100개의 Brownian Bridge 를 샘플링한 결과</span><a class="headerlink" href="#id8" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</li>
+<li><p><strong>Abstrcat</strong></p>
+<figure class="align-default" id="id9">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_09.png"><img alt="img_09" class="bg-primary mb-1" src="../../_images/img_09.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 343 </span><span class="caption-text">Conditional Diffusion Models 와 BBDM 의 비교</span><a class="headerlink" href="#id9" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p><strong>기존의 Diffusion 모델</strong>들은,  <br>
+Image-to-Image 변환을 <strong>Conditional generation process</strong> 로 다룸. <br>
+이로 인해, <strong>매우 상이한 도메인 사이의 변환</strong>에는 <strong>어려움</strong>이 있음.</p></li>
+<li><p>이를 <strong>해결하기 위해</strong>, <br>
+본 논문은 <strong>Brownian Bridge</strong> <strong>에 기반한 Image-to-Image 변환 방법을 제시</strong>함</p></li>
+<li><p><strong>BBDM</strong> 은 Conditional generation process 가 아닌 <br>
+<strong>Stochastic Brownian Bridge Process</strong> 로 두 도메인 사이의 변환을 모델링하므로, <br><strong>Bidirectional Diffusion Process</strong> 임.</p></li>
+<li><p>Brownian Bridge diffusion process 를 Image-to-Image 변환에 접목한 최초의 논문임</p></li>
+<li><p>BBDM 모델의 훌륭한 성능을 실험적으로 증명함<br></p></li>
+</ul>
+</li>
+</ul>
+<ol class="arabic">
+<li><p><strong>Introduction</strong></p>
+<ul>
+<li><p>I2I 변환에서 <strong>Non-diffusion models 의 한계</strong></p>
+<ul class="simple">
+<li><p>Pix2Pix 와 같은 <strong>conditional GANs</strong> 는 <strong>fideltiy 가 높았으나,</strong>
+학습이 어렵고, <strong>DIversity 가 떨어진다.</strong></p>
+<ul>
+<li><p>Diversity 가 떨어지는 이유 : conditional GANs 는 input image 를 output image 에 one-to-one mapping 하는 방법을 학습하기 때문</p></li>
+</ul>
+</li>
+<li><p><strong>VAE</strong> 같은 <strong>생성형 모델</strong>들은 GANs 만큼의 I2I 성능이 안나오고,
+<strong>Applicability</strong> 가 GANs 보다 <strong>떨어진다.</strong></p></li>
+</ul>
+</li>
+<li><p>I2I 변환에서 <strong>conditional diffusion models 의 한계</strong></p>
+<ul class="simple">
+<li><p>conditional diffusion models 는 <strong>reference image</strong> 의 encoded feature 를 <strong>직접 U-Net 에 통합</strong>시킴으로써 diffusion models 의 reverse process 를 guide 함</p></li>
+<li><p>하지만 이렇게 <strong>생성된 결과가 desired conditional distribution 을 추론해낸다는 명료한 이론적 근거가 없음</strong></p></li>
+<li><p>대부분의 <strong>conditional diffusion models 는 generalization 이 잘 안되므로,</strong>
+conditional input domain 과 output domain 이 유사한
+몇몇 applications 에서만 잘 활용될 수 있음</p>
+<ul>
+<li><p>ex. inpainting 혹은 super-resolution</p></li>
+</ul>
+</li>
+<li><p><strong>LDM</strong> 이 latent space 에서 diffusion process 를 수행함으로써
+<strong>generalization 을 개선</strong>하긴 했으나 <strong>여전히 conditional generation process</strong> 임</p></li>
+<li><p><strong>LDM</strong> 의 경우, <strong>복잡한 attention mechanism 으로 multi-modal condition</strong> 이 주어지므로, <strong>이론적 근거를 제시하기가 더 힘듦</strong></p></li>
+</ul>
+</li>
+<li><p><strong>본 논문에서 제안하는 BBDM 모델</strong></p>
+<figure class="align-default" id="id10">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_10.png"><img alt="img_10" class="bg-primary mb-1" src="../../_images/img_10.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 344 </span><span class="caption-text">BBDM 의 아키텍쳐</span><a class="headerlink" href="#id10" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p><strong>BBDM</strong> 모델은 <strong>input 과 output 도메인 간의 mapping</strong> 을
+<strong>Brownian Bridge stochastic process 를 통해 구축</strong>함</p></li>
+<li><p>가속을 위해 Latent space 에서 diffusion process 를 수행함<br></p></li>
+</ul>
+</li>
+</ul>
+<ol class="arabic">
+<li><p><strong>Related Work</strong><br></p>
+<ul>
+<li><p><strong>2.1. Image-to-Image Translation</strong></p>
+<ul class="simple">
+<li><p>introduction 참고<br></p></li>
+</ul>
+</li>
+<li><p><strong>2,2. Duffusion Models</strong><br></p>
+<ul class="simple">
+<li><p><strong>Diffusion Models</strong> 의 simplified <strong>objective</strong> 를 잠깐 살펴보면, 다음과 같음.</p></li>
+</ul>
+<figure class="align-default" id="id11">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_11.png"><img alt="img_11" class="bg-primary mb-1" src="../../_images/img_11.png" style="width: 300px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 345 </span><span class="caption-text">Diffusion Models 의 Simplified objective</span><a class="headerlink" href="#id11" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p>대부분의 <strong>conditional Diffusion Models</strong> 는 <strong>condition 을 objective 에 직접 “주입”</strong>.<br>
+아래의 그림을 보면, conditional input image y 가 삽입된 것을 볼 수 있음.</p></li>
+</ul>
+<figure class="align-default" id="id12">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_12.png"><img alt="img_12" class="bg-primary mb-1" src="../../_images/img_12.png" style="width: 300px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 346 </span><span class="caption-text">Conditional Diffusion Models 의 Simplified objective</span><a class="headerlink" href="#id12" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p><span class="math notranslate nohighlight">\(p(x_t|y)\)</span> 가 objective 에 드러나 있지 않으므로,
+<strong>desired conditional distribution 에 도달할 수 있을 것</strong>이라는 <strong>이론적 보장이 없음</strong><br></p></li>
+</ul>
+</li>
+<li><p><strong>2.3. Brownian Bridge</strong><br></p>
+<ul class="simple">
+<li><p><strong>Brownian Bridge</strong> 는 <strong>diffusion process 동안의 확률 분포가</strong>
+<strong>starting state (t=0)</strong> 와 <strong>ending state (t=T)</strong> 에 <strong>conditioned 되어 있는,</strong>
+<strong>time stochastic model</strong> 임</p></li>
+</ul>
+<figure class="align-default" id="id13">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_13.png"><img alt="img_13" class="bg-primary mb-1" src="../../_images/img_13.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 347 </span><span class="caption-text">식(3)</span><a class="headerlink" href="#id13" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>앞서 보았던 Brownian Bridge 의 평균과 분산을 구해보자.<br>
+위의 식과 같은 의미임을 알 수 있다.<br></p>
+<figure class="align-default" id="id14">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_06.png"><img alt="img_06" class="bg-primary mb-1" src="../../_images/img_06.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 348 </span><span class="caption-text">Brownian Bridge</span><a class="headerlink" href="#id14" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+  <br>
+</li>
+</ul>
+</li>
+<li><p><strong>Method</strong><br></p>
+<ul>
+<li><p><strong>3.1. Brownian Bridge Diffusion Model (BBDM)</strong><br></p>
+<ul class="simple">
+<li><p><strong>Conditional diffusion models</strong> : <strong>Gaussian noise 를 향해 Forward process 진행</strong></p></li>
+<li><p><strong>BBDM : conditional input y 자체를 향해 Brownian Bridge process 진행</strong><br></p></li>
+</ul>
+<figure class="align-default" id="id15">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_09.png"><img alt="img_09" class="bg-primary mb-1" src="../../_images/img_09.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 349 </span><span class="caption-text">Conditional Diffusion Models 와 BBDM 의 비교</span><a class="headerlink" href="#id15" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+  <br>
+<ul class="simple">
+<li><p>VQGAN 의 latent space 에서 diffusion process 를 수행</p></li>
+<li><p><strong>x</strong> 가 <strong>A 도메인 영상의 latent features</strong> 이고,<br>
+<strong>y</strong> 가 <strong>B 도메인 영상의 latent features</strong> 일 때, <br>
+<strong>Forward diffusion process 는 다음과 같이 정의</strong>됨</p></li>
+</ul>
+<figure class="align-default" id="id16">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_14.png"><img alt="img_14" class="bg-primary mb-1" src="../../_images/img_14.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 350 </span><span class="caption-text">식(4)</span><a class="headerlink" href="#id16" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p><strong>T</strong> 는 diffusion process 의 <strong>total steps</strong> 이다.</p></li>
+<li><p><span class="math notranslate nohighlight">\(δ_t\)</span> 는 <strong>분산</strong>이다.</p></li>
+<li><p>식 (3) 에 나타난 분산 <span class="math notranslate nohighlight">\(δ_t={t(T −t)\over T}\)</span> 를 사용하게 되면,
+<strong>가능한 최대 분산값</strong>은, <strong>middle step 인 <span class="math notranslate nohighlight">\(T\over 2\)</span> 에서의 분산값인 <span class="math notranslate nohighlight">\(δ_{T\over 2} = {T \over 4}\)</span> 가 됨</strong></p></li>
+<li><p>T 값이 커지면, 최대 분산값도 커지는데, <strong>이 분산 값은 다루기에 너무 큼</strong></p></li>
+<li><p><span class="math notranslate nohighlight">\(x_0,y \sim N(0,I)\)</span> 이면서 서로 독립일 때,
+Brownian Bridge diffusion process 를 위한 <strong>분산 scheduling</strong> 을
+다음과 같이 해볼 수 있다.</p></li>
+</ul>
+<figure class="align-default" id="id17">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_15.png"><img alt="img_15" class="bg-primary mb-1" src="../../_images/img_15.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 351 </span><span class="caption-text">Brownian Bridge diffusion process 를 위한 분산 Scheduling</span><a class="headerlink" href="#id17" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p>만약 t 는 양의 정수의 discrete time 이고, 그 최댓값인 T=1000 이라면
+<span class="math notranslate nohighlight">\(\delta_t\)</span> 는 아래 그림과 같게 된다.</p></li>
+</ul>
+<figure class="align-default" id="id18">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_16.png"><img alt="img_16" class="bg-primary mb-1" src="../../_images/img_16.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 352 </span><span class="caption-text"><span class="math notranslate nohighlight">\(\delta_t\)</span> 를 시각화한 결과</span><a class="headerlink" href="#id18" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+  <br>
+<p><span class="math notranslate nohighlight">\(m_t = t\overT\)</span> 이고, <span class="math notranslate nohighlight">\(\delta_t = 2(m_t - m_t^2)\)</span> 이므로,<br></p>
+<ul class="simple">
+<li><p>diffusion process 가 시작하는 <strong>t = 0 에서는, <span class="math notranslate nohighlight">\(m_0\)</span> = 0</strong> 이고,
+<strong>평균은 <span class="math notranslate nohighlight">\(x_0\)</span></strong> 이며
+<strong>분산은 0</strong> 이 된다.<br></p></li>
+<li><p>diffusion process 가 끝나는 <strong>t = T 에서는,</strong>
+<span class="math notranslate nohighlight">\(m_T\)</span> <strong>= 1</strong> 이고,
+<strong>평균은 y</strong> 이고,
+<strong>분산은 0</strong> 이 된다.<br></p></li>
+<li><p><strong>분산이,</strong>
+diffusion process 의 <strong>중간 지점까지는 최대 0.5 까지 증가</strong>하다가,<br>
+중간 지점부터 <strong>끝나는 지점까지는 0 으로 감소</strong></p></li>
+<li><p><strong>Brownian Bridge diffusion process</strong> 에서의 <strong>sampling diversity</strong> 는
+<strong>최대 분산값,<br> 즉 middle step 인 <span class="math notranslate nohighlight">\(t = {T\over 2}\)</span> 에서의 분산값에 의해 결정</strong>됨</p></li>
+<li><p><strong>분산을 스케일링하는 변수 s</strong> <strong>를 두어</strong> <strong>sampling diversity 를 조절</strong>할 수 있다.</p></li>
+</ul>
+<figure class="align-default" id="id19">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_17.png"><img alt="img_17" class="bg-primary mb-1" src="../../_images/img_17.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 353 </span><span class="caption-text">식(5) : sampling diversity 조절을 위한 계수 s 가 포함된 분산 scheduling</span><a class="headerlink" href="#id19" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p>이 논문에서 <strong>s 의 디폴트 값은 1</strong>
+<br></p></li>
+</ul>
+</li>
+<li><p><strong>3.1.1 Forward Process</strong><br></p>
+<ul class="simple">
+<li><p><strong>식 (4)</strong> 에서는 <strong>step t 에서의 marginal distribution 만 제공</strong></p></li>
+<li><p><strong>training 과 inference process 를 위해</strong>서는 <strong>forward transition probability</strong> 인 <span class="math notranslate nohighlight">\(q_{BB}(x_t|x_{t-1}, y)\)</span> 를 알아야함</p></li>
+<li><p><strong>식 (4) 에 의해, <span class="math notranslate nohighlight">\(x_0\)</span> 와 <span class="math notranslate nohighlight">\(y\)</span> 가 주어졌을 때의 <span class="math notranslate nohighlight">\(x_t\)</span> 와</strong> <span class="math notranslate nohighlight">\(x_{t-1}\)</span> 은 다음과 같이 쓸 수 있음</p></li>
+</ul>
+<figure class="align-default" id="id20">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_14.png"><img alt="img_14" class="bg-primary mb-1" src="../../_images/img_14.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 354 </span><span class="caption-text">식(4)</span><a class="headerlink" href="#id20" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<figure class="align-default" id="id21">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_18.png"><img alt="img_18" class="bg-primary mb-1" src="../../_images/img_18.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 355 </span><span class="caption-text">식(6) &amp; 식(7)</span><a class="headerlink" href="#id21" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p>참고. 위 식 (7) 의 <span class="math notranslate nohighlight">\(m_ty\)</span> 는 <span class="math notranslate nohighlight">\(m_{t-1}y\)</span> 로 쓰는 것이 옳음</p></li>
+</ul>
+<figure class="align-default" id="id22">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_19.png"><img alt="img_19" class="bg-primary mb-1" src="../../_images/img_19.png" style="width: 300px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 356 </span><span class="caption-text"><span class="math notranslate nohighlight">\(\epsilon\)</span> 은 i.i.d 하게 N(0, I) 를 따른다</span><a class="headerlink" href="#id22" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p><strong>식 (6) 의 <span class="math notranslate nohighlight">\(x_0\)</span> 를 식 (7) 의 <span class="math notranslate nohighlight">\(x_0\)</span> 로 대체</strong>하면,
+<strong>Forward transition probability <span class="math notranslate nohighlight">\(q_{BB}(x_t|x_{t-1}, y)\)</span></strong> 가 아래의 <strong>식 (8)</strong> 과 같이 유도됨</p></li>
+</ul>
+<figure class="align-default" id="id23">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_20.png"><img alt="img_20" class="bg-primary mb-1" src="../../_images/img_20.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 357 </span><span class="caption-text">식(8)</span><a class="headerlink" href="#id23" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p>증명</p>
+<ul>
+<li><p>식(7) 을 다음과 같이 쓸 수 있음</p>
+<ul>
+<li><p><span class="math notranslate nohighlight">\(x_0 = {x_{t-1}-m_{t-1}y-\sqrt {\delta_{t-1}} \epsilon_{t-1} \over 1-m_{t-1}}\)</span></p></li>
+</ul>
+</li>
+<li><p>식(6) 의 <span class="math notranslate nohighlight">\(x_0\)</span> 에 위의 <span class="math notranslate nohighlight">\(x_0\)</span> 를 대입</p>
+<ul>
+<li><p><span class="math notranslate nohighlight">\(x_t = {(1-m_t)x_{t-1} \over (1-m_{t-1})} - {(1-m_t)m_{t-1}y \over (1-m_{t-1})} - {(1-m_t)\sqrt {\delta_{t-1}}\epsilon_{t-1} \over (1-m_{t-1})} + m_ty + \sqrt{\delta_t} \epsilon_t\)</span></p></li>
+<li><p><span class="math notranslate nohighlight">\(= {(1-m_t)x_{t-1} \over (1-m_{t-1})} + y(m_t - {(1-m_t) \over (1-m_{t-1})}m_{t-1}) + \sqrt {\delta_t}\epsilon_t - {(1-m_t)\sqrt {\delta_{t-1}}\epsilon_{t-1} \over (1-m_{t-1})}\)</span></p></li>
+</ul>
+</li>
+</ul>
+</li>
+<li><p>이후, <span class="math notranslate nohighlight">\(Var(x_t)\)</span> 를 구하면, 아래의 <span class="math notranslate nohighlight">\(\delta_{t|t-1}\)</span> 와 같이 유도됨</p></li>
+</ul>
+<figure class="align-default" id="id24">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_21.png"><img alt="img_21" class="bg-primary mb-1" src="../../_images/img_21.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 358 </span><span class="caption-text"><span class="math notranslate nohighlight">\(\delta_{t|t-1}\)</span> 식</span><a class="headerlink" href="#id24" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p>t=T 가 될 때 <span class="math notranslate nohighlight">\(m_T = 1\)</span> 인데, 이때 식(8) 에 의해 <span class="math notranslate nohighlight">\(x_T = y\)</span> 임.
+↓
+”아, Forward diffusion process 는 확실히..
+A 도메인으로부터 B 도메인으로의 fixed mapping 을 정의하는구나”</p></li>
+</ul>
+</li>
+<li><p><strong>3.1.2 Reverse Process</strong><br></p>
+<ul class="simple">
+<li><p><strong>conditional diffusion models</strong> 의 <strong>reverse process</strong> 는, <br>
+<strong>Gaussian noise 로부터 시작</strong>하며, <br>
+매 스텝마다 조금씩 noise 를 제거해나감<br></p></li>
+<li><p>반면, <strong>BBDM 의 Brownian Bridge process 는 <span class="math notranslate nohighlight">\(x_T = y\)</span> 로 둠으로써, <br>
+conditional input 그 자체에서 Reverse process 를 시작</strong>함</p></li>
+</ul>
+<figure class="align-default" id="id25">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_22.png"><img alt="img_22" class="bg-primary mb-1" src="../../_images/img_22.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 359 </span><span class="caption-text">식(9)</span><a class="headerlink" href="#id25" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p><span class="math notranslate nohighlight">\(\mu_\theta (x_t,t)\)</span> 는 U-Net 에 의해 예측된 노이즈 평균값이며, <span class="math notranslate nohighlight">\(\tilde{\delta_t}\)</span> 는 노이즈의 분산</p></li>
+<li><p>DDPM 처럼, 임의의 parameters <span class="math notranslate nohighlight">\(\theta\)</span> 를 갖는 신경망 <strong>U-Net 은 <span class="math notranslate nohighlight">\(\mu_\theta (x_t,t)\)</span> 를 학습</strong></p></li>
+</ul>
+</li>
+<li><p><strong>3.1.3. Training Objective</strong></p>
+<ul class="simple">
+<li><p><strong>참고.</strong></p>
+<ul>
+<li><p>예전 <strong>DDPM 의 Loss</strong> 는 다음과 같았음.</p></li>
+</ul>
+</li>
+</ul>
+<figure class="align-default" id="id26">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_23.png"><img alt="img_23" class="bg-primary mb-1" src="../../_images/img_23.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 360 </span><span class="caption-text">DDPM 의 Loss</span><a class="headerlink" href="#id26" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p>그리고, 이로부터 simplified 된 <strong>objective</strong> 는 다음과 같음</p></li>
+</ul>
+<figure class="align-default" id="id27">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_11.png"><img alt="img_11" class="bg-primary mb-1" src="../../_images/img_11.png" style="width: 300px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 361 </span><span class="caption-text">DDPM 의 simplified objective</span><a class="headerlink" href="#id27" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p><strong>Brownian Bridge diffusion process</strong> 의 <strong>ELBO</strong></p></li>
+</ul>
+<figure class="align-default" id="id28">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_24.png"><img alt="img_24" class="bg-primary mb-1" src="../../_images/img_24.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 362 </span><span class="caption-text">식(10) : BBDM 의 ELBO</span><a class="headerlink" href="#id28" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p><strong>첫 번째 term :</strong> <span class="math notranslate nohighlight">\(x_T\)</span> 가 곧 y 이므로 무시할 수 있음</p></li>
+<li><p><strong>세 번째 term</strong> : 매우 작은 값이 되므로 무시할 수 있음</p></li>
+<li><p><strong>베이즈 이론과 Markov chain property 를 식 (4) 와 식 (8) 에 적용</strong>하여,
+다음과 같이 <strong>식 (11) 이 도출</strong>된다.</p></li>
+<li><p>참고. Markovian Chain</p>
+<ul>
+<li><p><span class="math notranslate nohighlight">\(q(x_t|x_{t-1}) = q(x_t|x_{t-1}, x_{t-2}, … , x_0)\)</span></p></li>
+<li><p>Markov chain property 에 의해,<br>
+<span class="math notranslate nohighlight">\(q_{BB}(x_t|x_{t-1},y) = q_{BB}(x_t|x_{t-1},x_0,y)\)</span> 가 성립됨을 활용</p></li>
+</ul>
+</li>
+<li><p>식(4)</p></li>
+</ul>
+<figure class="align-default" id="id29">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_14.png"><img alt="img_14" class="bg-primary mb-1" src="../../_images/img_14.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 363 </span><span class="caption-text">식(4)</span><a class="headerlink" href="#id29" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p>식(8)</p></li>
+</ul>
+<figure class="align-default" id="id30">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_20.png"><img alt="img_20" class="bg-primary mb-1" src="../../_images/img_20.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 364 </span><span class="caption-text">식(8)</span><a class="headerlink" href="#id30" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p>식(11) &amp; 식(13)</p></li>
+</ul>
+<figure class="align-default" id="id31">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_25.png"><img alt="img_25" class="bg-primary mb-1" src="../../_images/img_25.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 365 </span><span class="caption-text">식(11)</span><a class="headerlink" href="#id31" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<figure class="align-default" id="id32">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_26.png"><img alt="img_26" class="bg-primary mb-1" src="../../_images/img_26.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 366 </span><span class="caption-text">식(13)</span><a class="headerlink" href="#id32" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p>증명</p></li>
+<li><p><span class="math notranslate nohighlight">\({q_{BB}(x_{t}|x_{t-1},y)q_{BB}(x_{t-1}|x_{0},y)\over q_{BB}(x_{t}|x_{0},y)}\)</span></p></li>
+<li><p><span class="math notranslate nohighlight">\(= {{q_{BB}(x_{t},x_{t-1},y) \over q_{BB}(x_{t-1},y)} {q_{BB}(x_{t-1},x_{0},y) \over q_{BB}(x_{0},y)} \over {q_{BB}(x_{t},x_{0},y)\over  q_{BB}(x_{0},y)}}\)</span></p></li>
+<li><p><span class="math notranslate nohighlight">\(= q_{BB}(x_{t}|x_{t-1},y){q_{BB}(x_{t-1},x_{0},y)\over q_{BB}(x_{t},x_{0},y)}\)</span></p></li>
+<li><p><span class="math notranslate nohighlight">\(= q_{BB}(x_{t}|x_{t-1},x_{0},y){q_{BB}(x_{t-1},x_{0},y)\over q_{BB}(x_{t},x_{0},y)}\)</span></p></li>
+<li><p><span class="math notranslate nohighlight">\(= {q_{BB}(x_{t},x_{t-1},x_{0},y)\over q_{BB}(x_{t},x_{0},y)}\)</span></p></li>
+<li><p><span class="math notranslate nohighlight">\(= q_{BB}(x_{t-1}|x_{t},x_{0},y)\)</span></p></li>
+<li><p>위 식 (11) 의 평균은, 식 (12) 와 같이 정리됨</p></li>
+</ul>
+<figure class="align-default" id="id33">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_27.png"><img alt="img_27" class="bg-primary mb-1" src="../../_images/img_27.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 367 </span><span class="caption-text">식(12)</span><a class="headerlink" href="#id33" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p>식(4) 와 식(12) 를 통합하고 Reparameterization method 를 사용해서
+<span class="math notranslate nohighlight">\(\tilde {\mu_t}\)</span> 를 다음과 같이 변형할 수 있음</p></li>
+</ul>
+<figure class="align-default" id="id34">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_28.png"><img alt="img_28" class="bg-primary mb-1" src="../../_images/img_28.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 368 </span><span class="caption-text">식(12) 의 변형</span><a class="headerlink" href="#id34" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p>참고. 식(4)</p></li>
+</ul>
+<figure class="align-default" id="id35">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_14.png"><img alt="img_14" class="bg-primary mb-1" src="../../_images/img_14.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 369 </span><span class="caption-text">식(4)</span><a class="headerlink" href="#id35" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p>하지만, 실제로 U-Net 은 전체 <span class="math notranslate nohighlight">\(\tilde {\mu_t}\)</span> 를 예측하는 것이 아니라,
+노이즈를 예측하도록 학습됨.</p></li>
+<li><p>이 내용을 식에 명시하기 위해,
+<strong>식(9) 에 명시된 <span class="math notranslate nohighlight">\(\mu_\theta\)</span> 를 식(14) 와 같이 다시 써볼 수 있음.<br>
+<span class="math notranslate nohighlight">\(x_t\)</span> 와 y, 그리고 예측된 노이즈 <span class="math notranslate nohighlight">\(\epsilon_\theta\)</span> 의 linear combination 으로 다시 써보는</strong> 것임.</p></li>
+</ul>
+<figure class="align-default" id="id36">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_22.png"><img alt="img_22" class="bg-primary mb-1" src="../../_images/img_22.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 370 </span><span class="caption-text">식(9)</span><a class="headerlink" href="#id36" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<figure class="align-default" id="id37">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_29.png"><img alt="img_29" class="bg-primary mb-1" src="../../_images/img_29.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 371 </span><span class="caption-text">식(14)</span><a class="headerlink" href="#id37" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p>그런데, 아래 그림을 참고해보면 우리는 <span class="math notranslate nohighlight">\(\tilde {\mu_t}\)</span> 에 근사하도록 <span class="math notranslate nohighlight">\(\mu_\theta\)</span> 를 학습시켜야함.</p></li>
+</ul>
+<figure class="align-default" id="id38">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_30.png"><img alt="img_30" class="bg-primary mb-1" src="../../_images/img_30.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 372 </span><span class="caption-text"><span class="math notranslate nohighlight">\(\tilde {\mu}_t\)</span> 의 정리된 식</span><a class="headerlink" href="#id38" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul>
+<li><p>즉, <span class="math notranslate nohighlight">\(\epsilon_\theta (x_t,t)\)</span> 가 <span class="math notranslate nohighlight">\(m_t(y-x_0)+\sqrt {\delta_t}\epsilon\)</span> 을 근사하도록 학습되어야하는 것임.</p></li>
+<li><p>ELBO 의 두 번째 term 을 다시 살펴보면,<br></p>
+<ul class="simple">
+<li><p><strong>두 번째 term</strong> : <span class="math notranslate nohighlight">\(D_{KL}(q_{BB}(x_{t-1}|x_t, x_0, y)||p_\theta (x_{t-1}|x_t,y))\)</span><br></p>
+<ul>
+<li><p><span class="math notranslate nohighlight">\(arg \space min_\theta \space D_{KL}(q_{BB}(x_{t-1}|x_t, x_0, y)||p_\theta (x_{t-1}|x_t,y))\)</span>
+=<span class="math notranslate nohighlight">\(arg \space min_\theta \space (\tilde {\mu}_t(x_t,y) - \mu_\theta (x_t,y,t))\)</span>
+=<span class="math notranslate nohighlight">\(arg \space min_\theta \space (c_{\epsilon_t}(m_t(y-x_0) + \sqrt {\delta_t}\epsilon) - c_{\epsilon_t}\epsilon_\theta(x_t,t))\)</span>
+=<span class="math notranslate nohighlight">\(arg \space min_\theta \space (c_{\epsilon_t} (m_t(y-x_0) + \sqrt {\delta_t}\epsilon - \epsilon_\theta(x_t,t)))\)</span><br></p></li>
+</ul>
+</li>
+<li><p>따라서, ELBO 는 다음과 같이 단순화될 수 있음</p></li>
+</ul>
+<figure class="align-default" id="id39">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_31.png"><img alt="img_31" class="bg-primary mb-1" src="../../_images/img_31.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 373 </span><span class="caption-text">BBDM 의 Simplified ELBO</span><a class="headerlink" href="#id39" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</li>
+<li><p><strong>Training Algorithm 정리</strong></p></li>
+</ul>
+<figure class="align-default" id="id40">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_32.png"><img alt="img_32" class="bg-primary mb-1" src="../../_images/img_32.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 374 </span><span class="caption-text">Algorithm 1 : Training. 마치 DDPM 에서 그러했듯이, BBDM 도 실제 Loss 에는 Simplified ELBO 에서의 계수 <span class="math notranslate nohighlight">\(C_{\epsilon_t}\)</span> 가 빠진 것을 확인할 수 있다.</span><a class="headerlink" href="#id40" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</li>
+<li><p><strong>3.2. Accelerated Sampling Processes</strong><br></p>
+<ul class="simple">
+<li><p><strong>DDIM 과 비슷하게, BBDM 의 inference processes</strong> 도
+<strong>non-Markovian process 를 사용해서 가속시킬 수 있음</strong></p></li>
+<li><p>Sampling steps 의 길이를 S 라고 두었을 때,
+<strong>inference process</strong> 는 <strong>latent varibales <span class="math notranslate nohighlight">\(x_{1:T}\)</span> 의 subset</strong> 에 의해 다음과 같이 정의됨</p></li>
+<li><p><strong>latent varibales <span class="math notranslate nohighlight">\(x_{1:T}\)</span> 의 subset</strong></p></li>
+</ul>
+<figure class="align-default" id="id41">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_33.png"><img alt="img_33" class="bg-primary mb-1" src="../../_images/img_33.png" style="width: 300px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 375 </span><span class="caption-text"><strong>latent varibales <span class="math notranslate nohighlight">\(x_{1:T}\)</span> 의 subset</strong></span><a class="headerlink" href="#id41" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p><strong>inference process</strong></p></li>
+</ul>
+<figure class="align-default" id="id42">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_34.png"><img alt="img_34" class="bg-primary mb-1" src="../../_images/img_34.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 376 </span><span class="caption-text">inference process</span><a class="headerlink" href="#id42" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p><strong>Sampling Algorithm</strong></p></li>
+</ul>
+<figure class="align-default" id="id43">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_35.png"><img alt="img_35" class="bg-primary mb-1" src="../../_images/img_35.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 377 </span><span class="caption-text">Algorithm 2 : Sampling</span><a class="headerlink" href="#id43" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p>본 논문에서는 <strong>S 값의 디폴트</strong>를 <strong>200</strong> 으로 두었음<br></p></li>
+</ul>
+</li>
+</ul>
+</li>
+<li><p><strong>Experiments</strong><br></p>
+<ul>
+<li><p><strong>4.1. Experiment Setup</strong><br></p>
+<ul>
+<li><p><strong>모델 &amp; 하이퍼마라미터</strong></p>
+<ul class="simple">
+<li><p>BBDM 프레임워크는 pretrained VQGAN 과 BBDM 으로 이루어짐</p></li>
+<li><p><strong>Latent Diffusion Model 에서 사용된 것과 같은 pretrained VQGAN 을 사용</strong></p></li>
+<li><p>training stage 에서의 time steps 는 1,000</p></li>
+<li><p>inference stage 에서의 sampling steps 는 200<br></p></li>
+</ul>
+</li>
+<li><p><strong>Evaluation</strong></p>
+<ul class="simple">
+<li><p>FID 와 LPIPS 사용</p></li>
+<li><p>생성물의 diversity 를 평가하기 위해서,
+하나의 conditional input y 마다 5개의 샘플을 생성하고,
+각 픽셀 마다의 표준편차의 평균을 구함.
+그 후 전체 test 데이터셋에 대해서 평균 냄.<br></p></li>
+</ul>
+</li>
+<li><p><strong>Datasets</strong></p>
+<ul class="simple">
+<li><p>BBDM 의 I2I 변환 능력을 평가하기 위해서, 여러 task 로 실험함<br></p></li>
+</ul>
+<ol class="arabic simple">
+<li><p><strong>Semantic Synthesis 능력</strong>을 CelebAMask-HQ dataset 으로 실험</p>
+<ol class="arabic simple">
+<li><p>semantic layout 만 주고 photorealistic 한 images 를 생성해내는 능력 평가<br></p></li>
+</ol>
+</li>
+<li><p><strong>sketch-to-photo 능력</strong>을 edges2shoes 와 edges2handbags 로 실험</p>
+<ol class="arabic simple">
+<li><p>edges 만 주고 realistic images 생성해내는 능력 평가<br></p></li>
+</ol>
+</li>
+<li><p><strong>style transfer 능력</strong>을 faces2comics 로 실험</p>
+<ol class="arabic simple">
+<li><p>위 두 실험은 서로 상이한 domains 간의 변환 능력을 평가했다면,
+Style transfer 실험에서는 서로 비슷한 domains 간의 I2I 변환 능력을 평가<br></p></li>
+</ol>
+</li>
+</ol>
+</li>
+</ul>
+</li>
+<li><p><strong>4.2. Qualitative Comparison</strong><br></p>
+<figure class="align-default" id="id44">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_36.png"><img alt="img_36" class="bg-primary mb-1" src="../../_images/img_36.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 378 </span><span class="caption-text">Figure 3. CelebAMask-HQ 데이터셋에 대한 추론 결과</span><a class="headerlink" href="#id44" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<figure class="align-default" id="id45">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_37.png"><img alt="img_37" class="bg-primary mb-1" src="../../_images/img_37.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 379 </span><span class="caption-text">Figure 4. 다른 Image-to-Image 변환 task 에 대한 추론 결과</span><a class="headerlink" href="#id45" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<figure class="align-default" id="id46">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_38.png"><img alt="img_38" class="bg-primary mb-1" src="../../_images/img_38.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 380 </span><span class="caption-text">Figure 5. 다른 Image-to-Image 변환 task 에 대한 추론 결과</span><a class="headerlink" href="#id46" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p>Pix2Pix 는 지도 학습 방식으로 학습하므로, 괜찮은 결과를 냄</p></li>
+<li><p>반면 <strong>CycleGAN</strong> 은 <strong>작은 스케일의 데이터셋</strong>에서는 <strong>성능이 떨어짐</strong></p></li>
+<li><p>DRIT++ 은 GAN 기반 모델들 중에서는 좋은 성능을 냈으나,
+변환된 이미지들이 oversmoothed 되어 있었고,
+ground truth distribution 과는 거리가 멀었음</p></li>
+<li><p>conditional diffusion model 인 <strong>CDE</strong> 와 <strong>LDM</strong> 은
+GAN 기반 모델들보다는 <strong>좋은 성능</strong>을 냈으나,
+<strong>conditional information 에 큰 영향</strong>을 받음</p>
+<ul>
+<li><p><strong>Figure 3 의 첫 번째 줄</strong>을 보면 i<strong>rregular occlusions</strong> 가 나타나는데,
+<strong>CDE 와 LDM 은 이에 큰 영향</strong>을 받음</p></li>
+</ul>
+</li>
+<li><p>반면 <strong>BBDM 은 두 도메인 간의 직접적인 diffusion process 를 학습</strong>하므로
+<strong>이러한 문제로부터 자유로움</strong></p></li>
+<li><p>또한 Brownian Bridge 의 stochastic 한 특성으로 인해
+fidelity 와 diversity 가 높은 이미지들을 생성해냄<br></p></li>
+</ul>
+</li>
+<li><p><strong>4.3. Quantitative Comparison</strong><br></p>
+<ul class="simple">
+<li><p>Table 1 과 2 를 보면, BBDM 이 모든 실험에서 가장 좋은 FID 값을 기록했으며, 훌륭한 LPIPS 값을 기록함</p></li>
+</ul>
+<figure class="align-default" id="id47">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_39.png"><img alt="img_39" class="bg-primary mb-1" src="../../_images/img_39.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 381 </span><span class="caption-text">Table 1. CelebAMask-HQ 데이터셋에 대한 FID, LPIPS 성능은 BBDM 이 가장 뛰어남</span><a class="headerlink" href="#id47" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<figure class="align-default" id="id48">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_40.png"><img alt="img_40" class="bg-primary mb-1" src="../../_images/img_40.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 382 </span><span class="caption-text">Table 2. BBDM 은 FID, LPIPS 점수가 매우 뛰어났음</span><a class="headerlink" href="#id48" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</li>
+<li><p><strong>4.4. 다른 Translation Tasks</strong><br></p>
+<ul class="simple">
+<li><p><strong>BBDM 의 generalization 성능을 검증</strong>하기 위해서, 다른 tasks 에 대해서도 실험했음</p></li>
+<li><p>아래 그림과 같이, <strong>다른 tasks 에서도 camparable  한 성능을 기</strong>록함</p></li>
+</ul>
+<figure class="align-default" id="id49">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_41.png"><img alt="img_41" class="bg-primary mb-1" src="../../_images/img_41.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 383 </span><span class="caption-text">Figure 6. Face-to-label, 색상화, inpainting 등의 다른 tasks 에서도 뛰어난 성능을 기록함</span><a class="headerlink" href="#id49" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</li>
+<li><p><strong>4.5. Ablation Study</strong><br></p>
+<ul>
+<li><p><strong>pre-trained latent space 의 영향</strong></p>
+<figure class="align-default" id="id50">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_42.png"><img alt="img_42" class="bg-primary mb-1" src="../../_images/img_42.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 384 </span><span class="caption-text">Table 3. BBDM 은 LDM 에 비해 Downsampling factor 에 대해 robust 했음</span><a class="headerlink" href="#id50" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p><strong>BBDM 과 LDM</strong> 에 대해서,
+<strong>VQGAN downsampling factor</strong> 를 <strong>각각 4, 8, 16 으로 두고 성능 비교 실험 수행</strong></p></li>
+<li><p><strong>BBDM 은 down sampling factor 에 robust</strong> 했음<br></p></li>
+</ul>
+</li>
+<li><p><strong>Sampling steps 의 영향</strong></p>
+<ul class="simple">
+<li><p><strong>Sampling steps 가 작을 때 (200 이하) 는, 조금만 늘려도 성능이 크게 증가</strong><br></p></li>
+</ul>
+<figure class="align-default" id="id51">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_43.png"><img alt="img_43" class="bg-primary mb-1" src="../../_images/img_43.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 385 </span><span class="caption-text">Table 4. 200 이상의 Sampling Steps 에서는 Steps 를 키워도 성능 변화가 미미함</span><a class="headerlink" href="#id51" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+  <br>
+</li>
+<li><p><strong>Brownian Bridge 의 maximum variance 의 영향</strong></p>
+<figure class="align-default" id="id52">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_44.png"><img alt="img_44" class="bg-primary mb-1" src="../../_images/img_44.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 386 </span><span class="caption-text">Table 5. Sampling diversity 조절 계수에 의해 실제로 Diversity 가 조절 되었음</span><a class="headerlink" href="#id52" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p>식 (5) 에 나타난 것처럼, <strong>scaling factor s 의 값을 변경</strong>함으로써,
+<strong>Brownian Bridge 의 최대 분산값 (t = T/2 일 때의 분산값) 조절 가능.</strong>
+<strong>이렇게 diversity 조절 가능.</strong></p></li>
+</ul>
+<figure class="align-default" id="id53">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_17.png"><img alt="img_17" class="bg-primary mb-1" src="../../_images/img_17.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 387 </span><span class="caption-text">식(5)</span><a class="headerlink" href="#id53" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</li>
+</ul>
+</li>
+</ul>
+</li>
+<li><p><strong>Conclusion and Future Work</strong></p>
+<ul class="simple">
+<li><p><strong>Brownian Bridge 에 기반한 새로운 I2I 변환 방법 제시</strong></p></li>
+<li><p>이 방법은 기존의 conditional 한 방법과 달리,
+<strong>Brownian Bridge diffusion process 를 통해 두 도메인 간의 mapping 을 직접 학습</strong></p></li>
+<li><p><strong>여러 tasks 에서의 실험을 통해 BBDM 의 성능 검증</strong></p></li>
+<li><p>text-to-image 와 같은 multi-modal tasks 에도 BBDM 을 적용해볼 예정</p></li>
+</ul>
+</li>
+</ol>
+</li>
+</ol>
+<ul class="simple">
+<li><p><strong>참고 자료</strong></p>
+<ul>
+<li><p><a class="reference external" href="https://www.youtube.com/watch?v=ld0rxwAJpkM&amp;ab_channel=finRGB">https://www.youtube.com/watch?v=ld0rxwAJpkM&amp;ab_channel=finRGB</a></p></li>
+<li><p><a class="reference external" href="https://sine-qua-none.tistory.com/158">https://sine-qua-none.tistory.com/158</a></p></li>
+</ul>
+</li>
+</ul>
+</section>
+
+    <script type="text/x-thebe-config">
+    {
+        requestKernel: true,
+        binderOptions: {
+            repo: "binder-examples/jupyter-stacks-datascience",
+            ref: "master",
+        },
+        codeMirrorConfig: {
+            theme: "abcdef",
+            mode: "python"
+        },
+        kernelOptions: {
+            name: "python3",
+            path: "./docs\review"
+        },
+        predefinedOutput: true
+    }
+    </script>
+    <script>kernelName = 'python3'</script>
+
+                </article>
+              
+
+              
+              
+              
+              
+                <footer class="prev-next-footer d-print-none">
+                  
+<div class="prev-next-area">
+    <a class="left-prev"
+       href="GLIDE.html"
+       title="previous page">
+      <i class="fa-solid fa-angle-left"></i>
+      <div class="prev-next-info">
+        <p class="prev-next-subtitle">previous</p>
+        <p class="prev-next-title">GLIDE</p>
+      </div>
+    </a>
+    <a class="right-next"
+       href="Your_Diffusion_Model_is_Secretly_a_Zero_Shot_Classifier.html"
+       title="next page">
+      <div class="prev-next-info">
+        <p class="prev-next-subtitle">next</p>
+        <p class="prev-next-title">Your Diffusion Model is Secretly a Zero-Shot Classifier</p>
+      </div>
+      <i class="fa-solid fa-angle-right"></i>
+    </a>
+</div>
+                </footer>
+              
+            </div>
+            
+            
+              
+            
+          </div>
+          <footer class="bd-footer-content">
+            
+<div class="bd-footer-content__inner container">
+  
+  <div class="footer-item">
+    
+<p class="component-author">
+By PseudoLab
+</p>
+
+  </div>
+  
+  <div class="footer-item">
+    
+
+  <p class="copyright">
+    
+      © Copyright 2022.
+      <br/>
+    
+  </p>
+
+  </div>
+  
+  <div class="footer-item">
+    
+  </div>
+  
+  <div class="footer-item">
+    
+  </div>
+  
+</div>
+          </footer>
+        
+
+      </main>
+    </div>
+  </div>
+  
+  <!-- Scripts loaded after <body> so the DOM is not blocked -->
+  <script src="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b"></script>
+<script src="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b"></script>
+
+  <footer class="bd-footer">
+  </footer>
+  </body>
 </html>
\ No newline at end of file
diff --git a/docs/review/CM3leon.html b/docs/review/CM3leon.html
old mode 100644
new mode 100755
index bd9ea36b..d65b80b7
--- a/docs/review/CM3leon.html
+++ b/docs/review/CM3leon.html
@@ -1,1103 +1,1123 @@
-
-
-<!DOCTYPE html>
-
-
-<html lang="en" data-content_root="" >
-
-  <head>
-    <meta charset="utf-8" />
-    <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.18.1: http://docutils.sourceforge.net/" />
-
-    <title>CM3leon &#8212; Text-to-Image Generation-feat-Diffusion</title>
-  
-  
-  
-  <script data-cfasync="false">
-    document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
-    document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
-  </script>
-  
-  <!-- Loaded before other Sphinx assets -->
-  <link href="../../_static/styles/theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-<link href="../../_static/styles/bootstrap.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-<link href="../../_static/styles/pydata-sphinx-theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-
-  
-  <link href="../../_static/vendor/fontawesome/6.5.2/css/all.min.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-  <link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.woff2" />
-<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.woff2" />
-<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.woff2" />
-
-    <link rel="stylesheet" type="text/css" href="../../_static/pygments.css" />
-    <link rel="stylesheet" href="../../_static/styles/sphinx-book-theme.css?digest=14f4ca6b54d191a8c7657f6c759bf11a5fb86285" type="text/css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/togglebutton.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/mystnb.4510f1fc1dee50b3e5859aac5469c37c29e427902b24a333a5f9fcb2f0b3ac41.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/sphinx-thebe.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/design-style.4045f2051d55cab465a707391d5b2007.min.css" />
-  
-  <!-- Pre-loaded scripts that we'll load fully later -->
-  <link rel="preload" as="script" href="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b" />
-<link rel="preload" as="script" href="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b" />
-  <script src="../../_static/vendor/fontawesome/6.5.2/js/all.min.js?digest=dfe6caa3a7d634c4db9b"></script>
-
-    <script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
-    <script src="../../_static/jquery.js"></script>
-    <script src="../../_static/underscore.js"></script>
-    <script src="../../_static/_sphinx_javascript_frameworks_compat.js"></script>
-    <script src="../../_static/doctools.js"></script>
-    <script src="../../_static/clipboard.min.js"></script>
-    <script src="../../_static/copybutton.js"></script>
-    <script src="../../_static/scripts/sphinx-book-theme.js?digest=5a5c038af52cf7bc1a1ec88eea08e6366ee68824"></script>
-    <script>let toggleHintShow = 'Click to show';</script>
-    <script>let toggleHintHide = 'Click to hide';</script>
-    <script>let toggleOpenOnPrint = 'true';</script>
-    <script src="../../_static/togglebutton.js"></script>
-    <script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
-    <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
-    <script src="../../_static/design-tabs.js"></script>
-    <script>const THEBE_JS_URL = "https://unpkg.com/thebe@0.8.2/lib/index.js"
-const thebe_selector = ".thebe,.cell"
-const thebe_selector_input = "pre"
-const thebe_selector_output = ".output, .cell_output"
-</script>
-    <script async="async" src="../../_static/sphinx-thebe.js"></script>
-    <script>window.MathJax = {"options": {"processHtmlClass": "tex2jax_process|mathjax_process|math|output_area"}}</script>
-    <script defer="defer" src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
-    <script>DOCUMENTATION_OPTIONS.pagename = 'docs/review/CM3leon';</script>
-    <link rel="shortcut icon" href="../../_static/PseudoLab_logo.png"/>
-    <link rel="index" title="Index" href="../../genindex.html" />
-    <link rel="search" title="Search" href="../../search.html" />
-    <link rel="next" title="Synthetic Data from Diffusion Models Improves ImageNet Classification" href="Synthetic_Data_from_Diffusion_Models_Improves_ImageNet_Classification.html" />
-    <link rel="prev" title="HyperDreamBooth" href="HyperDreamBooth.html" />
-  <meta name="viewport" content="width=device-width, initial-scale=1"/>
-  <meta name="docsearch:language" content="en"/>
-  </head>
-  
-  
-  <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
-
-  
-  
-  <div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
-  
-  <div id="pst-scroll-pixel-helper"></div>
-  
-  <button type="button" class="btn rounded-pill" id="pst-back-to-top">
-    <i class="fa-solid fa-arrow-up"></i>Back to top</button>
-
-  
-  <input type="checkbox"
-          class="sidebar-toggle"
-          id="pst-primary-sidebar-checkbox"/>
-  <label class="overlay overlay-primary" for="pst-primary-sidebar-checkbox"></label>
-  
-  <input type="checkbox"
-          class="sidebar-toggle"
-          id="pst-secondary-sidebar-checkbox"/>
-  <label class="overlay overlay-secondary" for="pst-secondary-sidebar-checkbox"></label>
-  
-  <div class="search-button__wrapper">
-    <div class="search-button__overlay"></div>
-    <div class="search-button__search-container">
-<form class="bd-search d-flex align-items-center"
-      action="../../search.html"
-      method="get">
-  <i class="fa-solid fa-magnifying-glass"></i>
-  <input type="search"
-         class="form-control"
-         name="q"
-         id="search-input"
-         placeholder="Search this book..."
-         aria-label="Search this book..."
-         autocomplete="off"
-         autocorrect="off"
-         autocapitalize="off"
-         spellcheck="false"/>
-  <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
-</form></div>
-  </div>
-
-  <div class="pst-async-banner-revealer d-none">
-  <aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
-</div>
-
-  
-    <header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
-    </header>
-  
-
-  <div class="bd-container">
-    <div class="bd-container__inner bd-page-width">
-      
-      
-      
-      <div class="bd-sidebar-primary bd-sidebar">
-        
-
-  
-  <div class="sidebar-header-items sidebar-primary__section">
-    
-    
-    
-    
-  </div>
-  
-    <div class="sidebar-primary-items__start sidebar-primary__section">
-        <div class="sidebar-primary-item">
-
-  
-    
-  
-
-<a class="navbar-brand logo" href="../../intro.html">
-  
-  
-  
-  
-  
-    
-    
-      
-    
-    
-    <img src="../../_static/PseudoLab_logo.png" class="logo__image only-light" alt="Text-to-Image Generation-feat-Diffusion - Home"/>
-    <script>document.write(`<img src="../../_static/PseudoLab_logo.png" class="logo__image only-dark" alt="Text-to-Image Generation-feat-Diffusion - Home"/>`);</script>
-  
-  
-</a></div>
-        <div class="sidebar-primary-item"><nav class="bd-links" id="bd-docs-nav" aria-label="Main">
-    <div class="bd-toc-item navbar-nav active">
-        
-        <ul class="nav bd-sidenav bd-sidenav__home-link">
-            <li class="toctree-l1">
-                <a class="reference internal" href="../../intro.html">
-                    Welcome to PseudoDiffusers!!
-                </a>
-            </li>
-        </ul>
-        <p aria-level="2" class="caption" role="heading"><span class="caption-text">Preliminary Works</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="vae.html">VAE</a></li>
-<li class="toctree-l1"><a class="reference internal" href="gan.html">GAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DDPM.html">DDPM</a></li>
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="DDIM.html">DDIM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="A_Study_on_the_Evaluation_of_Generative_Models.html">A Study on the Evaluation of Generative Models</a></li>
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Image Generation</span></p>
-<ul class="current nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="cycleGAN.html">CycleGAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="StyleGAN.html">StyleGAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="diffusion_beats_GANs.html">Diffusion Models Beat GANs on Image Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="dalle.html">DALL-E</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DALLE2.html">DALL-E 2</a></li>
-<li class="toctree-l1"><a class="reference internal" href="dreambooth.html">DreamBooth</a></li>
-<li class="toctree-l1"><a class="reference internal" href="ControlNet.html">ControlNet</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Latent_Diffusion_Model.html">Introduction</a></li>
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="Textual_Inversion.html">Textual Inversion</a></li>
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="CustomDiffusion.html">Custom Diffusion</a></li>
-<li class="toctree-l1"><a class="reference internal" href="LoRA.html">LoRA</a></li>
-
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="I-DDPM.html">I-DDPM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="StyO.html">StyO</a></li>
-<li class="toctree-l1"><a class="reference internal" href="imagen.html">Imagen</a></li>
-<li class="toctree-l1"><a class="reference internal" href="imagen_editor.html">Imagen Editor</a></li>
-<li class="toctree-l1"><a class="reference internal" href="SDEdit.html">SDEdit</a></li>
-<li class="toctree-l1"><a class="reference internal" href="SDXL.html">SDXL</a></li>
-<li class="toctree-l1"><a class="reference internal" href="t2i_adapter.html">T2I-Adapter</a></li>
-<li class="toctree-l1"><a class="reference internal" href="IP_Adapter.html">IP-Adapter</a></li>
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="HyperDreamBooth.html">HyperDreamBooth</a></li>
-<li class="toctree-l1 current active"><a class="current reference internal" href="#">CM3leon</a></li>
-
-<li class="toctree-l1"><a class="reference internal" href="Synthetic_Data_from_Diffusion_Models_Improves_ImageNet_Classification.html">Synthetic Data from Diffusion Models Improves ImageNet Classification</a></li>
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="GLIDE.html">GLIDE</a></li>
-<li class="toctree-l1"><a class="reference internal" href="BBDM.html">BBDM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Your_Diffusion_Model_is_Secretly_a_Zero_Shot_Classifier.html">Your Diffusion Model is Secretly a Zero-Shot Classifier</a></li>
-<li class="toctree-l1"><a class="reference internal" href="progressive_distillation.html">Progressive Distillation for Fast Sampling of Diffusion Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="ConceptLab.html">ConceptLab</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Diffusion_models_already_have_a_Semantic_Latent_Space.html">Diffusion Models already have a Semantic Latent Space</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Muse.html">Muse</a></li>
-
-
-<li class="toctree-l1"><a class="reference internal" href="GIGAGAN.html">Scaling up GANs for Text-to-Image Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="consistency_models.html">Consistency Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="latent_consistency_models.html">Latent Consistency Models</a></li>
-
-<li class="toctree-l1"><a class="reference internal" href="LLM_grounded_Diffusion.html">LLM Grounded Diffusion</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DiT.html">DiT</a></li>
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="one-step-image-translation.html">One-Step Image Translation with Text-to-Image Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="LCM-LoRA.html">LCM-LoRA: A Universal Stable-Diffusion Acceleration Module</a></li>
-
-
-<li class="toctree-l1"><a class="reference internal" href="MimicBrush.html">MimicBrush: Zero-shot Image Editing with Reference Imitation</a></li>
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Video Generation</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="Make_A_Video.html">Make A Video</a></li>
-<li class="toctree-l1"><a class="reference internal" href="VideoLDM.html">VideoLDM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="AnimateDiff.html">AnimateDiff</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Animate_Anyone.html">Animate Anyone</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreaMoving.html">DreaMoving</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreamPose.html">DreamPose: Fashion Image-to-Video Synthesis via Stable Diffusion</a></li>
-
-
-
-
-
-
-
-
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">3D Generation</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="NeRF.html">NeRF : Representing Scenes as Neural Radiance Fields for View Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="3DGS.html">3D Gaussian Splatting for Real-Time Radiance Field Rendering</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Point_E.html">Point-E: A System for Generating 3D Point Clouds from Complex Prompts (Arxiv 2022)</a></li>
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="Shap-E.html">Shap-E</a></li>
-
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="DreamFusion.html"><strong>DreamFusion</strong></a></li>
-<li class="toctree-l1"><a class="reference internal" href="magic-3d.html">Magic3D</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreamBooth3D.html">Dream Booth 3D</a></li>
-
-
-
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Experiments</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="../experiments/js_exp.html">Synthetic Data with Stable Diffusion for Foliar Disease Classification</a></li>
-<li class="toctree-l1"><a class="reference internal" href="../experiments/swjo_exp.html">Training DreamBooth on Naver Webtoon Face Dataset</a></li>
-</ul>
-
-    </div>
-</nav></div>
-    </div>
-  
-  
-  <div class="sidebar-primary-items__end sidebar-primary__section">
-  </div>
-  
-  <div id="rtd-footer-container"></div>
-
-
-      </div>
-      
-      <main id="main-content" class="bd-main" role="main">
-        
-        
-
-<div class="sbt-scroll-pixel-helper"></div>
-
-          <div class="bd-content">
-            <div class="bd-article-container">
-              
-              <div class="bd-header-article d-print-none">
-<div class="header-article-items header-article__inner">
-  
-    <div class="header-article-items__start">
-      
-        <div class="header-article-item"><label class="sidebar-toggle primary-toggle btn btn-sm" for="__primary" title="Toggle primary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
-  <span class="fa-solid fa-bars"></span>
-</label></div>
-      
-    </div>
-  
-  
-    <div class="header-article-items__end">
-      
-        <div class="header-article-item">
-
-<div class="article-header-buttons">
-
-
-
-
-
-<div class="dropdown dropdown-source-buttons">
-  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Source repositories">
-    <i class="fab fa-github"></i>
-  </button>
-  <ul class="dropdown-menu">
-      
-      
-      
-      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion" target="_blank"
-   class="btn btn-sm btn-source-repository-button dropdown-item"
-   title="Source repository"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fab fa-github"></i>
-  </span>
-<span class="btn__text-container">Repository</span>
-</a>
-</li>
-      
-      
-      
-      
-      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion/issues/new?title=Issue%20on%20page%20%2Fdocs/review/CM3leon.html&body=Your%20issue%20content%20here." target="_blank"
-   class="btn btn-sm btn-source-issues-button dropdown-item"
-   title="Open an issue"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-lightbulb"></i>
-  </span>
-<span class="btn__text-container">Open issue</span>
-</a>
-</li>
-      
-  </ul>
-</div>
-
-
-
-
-
-
-<div class="dropdown dropdown-download-buttons">
-  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Download this page">
-    <i class="fas fa-download"></i>
-  </button>
-  <ul class="dropdown-menu">
-      
-      
-      
-      <li><a href="../../_sources/docs/review/CM3leon.md" target="_blank"
-   class="btn btn-sm btn-download-source-button dropdown-item"
-   title="Download source file"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-file"></i>
-  </span>
-<span class="btn__text-container">.md</span>
-</a>
-</li>
-      
-      
-      
-      
-      <li>
-<button onclick="window.print()"
-  class="btn btn-sm btn-download-pdf-button dropdown-item"
-  title="Print to PDF"
-  data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-file-pdf"></i>
-  </span>
-<span class="btn__text-container">.pdf</span>
-</button>
-</li>
-      
-  </ul>
-</div>
-
-
-
-
-<button onclick="toggleFullScreen()"
-  class="btn btn-sm btn-fullscreen-button"
-  title="Fullscreen mode"
-  data-bs-placement="bottom" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-expand"></i>
-  </span>
-
-</button>
-
-
-
-<script>
-document.write(`
-  <button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light"></i>
-    <i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark"></i>
-    <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"></i>
-  </button>
-`);
-</script>
-
-
-<script>
-document.write(`
-  <button class="btn btn-sm pst-navbar-icon search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <i class="fa-solid fa-magnifying-glass fa-lg"></i>
-  </button>
-`);
-</script>
-<label class="sidebar-toggle secondary-toggle btn btn-sm" for="__secondary"title="Toggle secondary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <span class="fa-solid fa-list"></span>
-</label>
-</div></div>
-      
-    </div>
-  
-</div>
-</div>
-              
-              
-
-<div id="jb-print-docs-body" class="onlyprint">
-    <h1>CM3leon</h1>
-    <!-- Table of contents -->
-    <div id="print-main-content">
-        <div id="jb-print-toc">
-            
-            <div>
-                <h2> Contents </h2>
-            </div>
-            <nav aria-label="Page">
-                <ul class="visible nav section-nav flex-column">
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#">CM3leon</a><ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#abstract-1-introduction">Abstract &amp; 1. Introduction</a></li>
-</ul>
-</li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#pretraining">2. Pretraining</a><ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#data">2.1 Data</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#image-tokenization">Image Tokenization</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#retrieval-augmentation">Retrieval Augmentation</a></li>
-</ul>
-</li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#objective-function">2.2 Objective Function</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#model">2.3 Model</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#training">2.4 Training</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#text-to-image-results">3. Text-To-Image Results</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#importance-of-decoding-strategies">3.1 Importance of Decoding Strategies</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#temperatured-sampling">Temperatured Sampling</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#topp-sampling">TopP Sampling</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#classifier-free-guidance-cfg">Classifier Free Guidance (CFG)</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#contrastive-decoding-topk-cd-k">Contrastive Decoding TopK (CD-K)</a></li>
-</ul>
-</li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#quantitative-evaluation">3.2 Quantitative Evaluation</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#supervised-fine-tuning">4. Supervised Fine-Tuning</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#instructable-image-generation">4.1 Instructable Image Generation</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#text-guided-image-editing">Text-Guided Image Editing</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#image-to-image-grounded-generation">Image-to-Image Grounded Generation</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#spatially-grounded-image-generation">Spatially Grounded Image Generation</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#image-captioning-visual-question-answering-task">Image captioning &amp; visual question answering task</a></li>
-</ul>
-</li>
-</ul>
-</li>
-</ul>
-
-            </nav>
-        </div>
-    </div>
-</div>
-
-              
-                
-<div id="searchbox"></div>
-                <article class="bd-article">
-                  
-  <div class="admonition-information admonition">
-<p class="admonition-title">Information</p>
-<ul class="simple">
-<li><p><strong>Title:</strong> Scaling Autoregressive Multi-Modal Models: Pretraining and Instruction Tuning</p></li>
-<li><p><strong>Reference</strong></p>
-<ul>
-<li><p>Paper:  <a class="reference external" href="https://scontent-gmp1-1.xx.fbcdn.net/v/t39.2365-6/358725877_789390529544546_1176484804732743296_n.pdf?_nc_cat=108&amp;ccb=1-7&amp;_nc_sid=3c67a6&amp;_nc_ohc=PLfU_UR_vYAAX_NagU8&amp;_nc_ht=scontent-gmp1-1.xx&amp;oh=00_AfDrHAHXv1PcF0LqicjIYnmOrpVCGEQ0eMv5_Ve2_Tncvg&amp;oe=652FF632">https://scontent-gmp1-1.xx.fbcdn.net/v/t39.2365-6/358725877_789390529544546_1176484804732743296_n.pdf?_nc_cat=108&amp;ccb=1-7&amp;_nc_sid=3c67a6&amp;_nc_ohc=PLfU_UR_vYAAX_NagU8&amp;_nc_ht=scontent-gmp1-1.xx&amp;oh=00_AfDrHAHXv1PcF0LqicjIYnmOrpVCGEQ0eMv5_Ve2_Tncvg&amp;oe=652FF632</a></p></li>
-<li><p>Code: X</p></li>
-</ul>
-</li>
-<li><p><strong>Author:</strong> Jun-Hyoung Lee</p></li>
-<li><p><strong>Last updated on Oct. 15. 2023</strong></p></li>
-</ul>
-</div>
-<section class="tex2jax_ignore mathjax_ignore" id="cm3leon">
-<h1>CM3leon<a class="headerlink" href="#cm3leon" title="Permalink to this heading">#</a></h1>
-<figure class="align-default" id="cm3leon-result">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/cm3leon_result.png"><img alt="cm3leon_result" class="bg-primary mb-1" src="../../_images/cm3leon_result.png" style="width: 600px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 305 </span><span class="caption-text">CM3leon result</span><a class="headerlink" href="#cm3leon-result" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p>복잡하게 구성된 객체(손, 텍스트)도 잘 생성한다.</p></li>
-</ul>
-<section id="abstract-1-introduction">
-<h2>Abstract &amp; 1. Introduction<a class="headerlink" href="#abstract-1-introduction" title="Permalink to this heading">#</a></h2>
-<ul class="simple">
-<li><p>CM3Leon</p>
-<ul>
-<li><p>텍스트와 이미지 둘 다 잘 생성하는 능력을 가진 검색-증강, 토큰 기반, 디코더 전용 멀티 모달 모델이다.</p></li>
-<li><p>CM3 멀티 모델 아키텍처를 사용하며 scaling up 및 다양한 구조적-스타일 데이터에 tunning 할 수 있는 능력을 가졌다.</p></li>
-</ul>
-</li>
-<li><p>Training</p>
-<ul>
-<li><p>처음에는 멀티 모달 모델을 “텍스트 기반” language 모델에 맞도록 학습했다. (large scale의 검색 증강 pretraining 단계를 포함한다.)</p>
-<ul>
-<li><p>데이터는 라이센스가 있는 Shutterstock의 large-scale로 학습한다.</p></li>
-</ul>
-</li>
-<li><p>그 후 supervised fine tuning (SFT) 단계로 진행했다.</p>
-<ul>
-<li><p>입력과 출력 모두 이미지와 텍스트 토큰을 섞을 수 있다.</p></li>
-</ul>
-</li>
-</ul>
-</li>
-<li><p>기존 이미지 생성 모델은 텍스트 프롬프트에 맞는 이미지만 잘 생성하는데,</p>
-<ul>
-<li><p>CM3leon은 텍스트와 이미지 모두 잘 생성한다.</p>
-<ul>
-<li><p>이미지 생성</p>
-<ul>
-<li><p>고해상도 output을 생성할 수 있는 self-contained contrastive decoding 방법을 소개한다.</p></li>
-<li><p>text guided iamge editing 부터 image controlled generation, segmentation까지 가능하다.</p></li>
-</ul>
-</li>
-<li><p>텍스트 생성</p>
-<ul>
-<li><p>Shutterstock의 3억 개의 텍스트 토큰으로 학습했는데, image-to-text generation도 잘 수행한다.</p></li>
-</ul>
-</li>
-</ul>
-</li>
-</ul>
-</li>
-<li><p>학습 연산을 5배로 줄였다.</p></li>
-<li><p>zero shot COCO로 FID를 측정한 결과 4.88 점으로, Google의 Parti 모델의 성능과 비슷한 수준을 달성했다.</p></li>
-</ul>
-</section>
-</section>
-<section class="tex2jax_ignore mathjax_ignore" id="pretraining">
-<h1>2. Pretraining<a class="headerlink" href="#pretraining" title="Permalink to this heading">#</a></h1>
-<ul class="simple">
-<li><p>RA-CM3를 기반으로 T2I 도메인에서 토큰 기반 디코더 모델의 잠재력을 연구했다.</p></li>
-</ul>
-<section id="data">
-<h2>2.1 Data<a class="headerlink" href="#data" title="Permalink to this heading">#</a></h2>
-<section id="image-tokenization">
-<h3>Image Tokenization<a class="headerlink" href="#image-tokenization" title="Permalink to this heading">#</a></h3>
-<ul class="simple">
-<li><p>Gafni의 image tokenizer를 사용했다.</p>
-<ul>
-<li><p>이 tokenizer는 256x256 이미지를 8192개의 vocabulary에서 1024개의 토큰으로 인코딩을 진행한다.</p></li>
-</ul>
-</li>
-<li><p>텍스트에서는, Zhang의 커스텀 tokenizer(56320 vocabulary size)를 학습했다.</p></li>
-<li><p>추가로, 새로운 스페셜한 토큰인 **<code class="docutils literal notranslate"><span class="pre">&lt;break&gt;</span></code>**을 소개한다.</p></li>
-</ul>
-<figure class="align-default" id="figure-8-9">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/figure_8_9.png"><img alt="figure_8_9" class="bg-primary mb-1" src="../../_images/figure_8_9.png" style="width: 600px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 306 </span><span class="caption-text">Figure_8_9</span><a class="headerlink" href="#figure-8-9" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<div class="highlight-none notranslate"><div class="highlight"><pre><span></span>-   이는 modality간 transition을 하게 한다.
-</pre></div>
-</div>
-</section>
-<section id="retrieval-augmentation">
-<h3>Retrieval Augmentation<a class="headerlink" href="#retrieval-augmentation" title="Permalink to this heading">#</a></h3>
-<ul class="simple">
-<li><p>목적: 입력 sequence에 맞춰 관련성이 높고 다양한 멀티 모달 문서(from memory bank)를 검색하는 것이다.</p>
-<ul>
-<li><p>dense retriever 와 retrieval strategy을 포함하고 있다.</p></li>
-</ul>
-</li>
-<li><p>dense retriever</p>
-<ul>
-<li><p>쿼리 <span class="math notranslate nohighlight">\(q\)</span> (예: input sequence)와 memory bank <span class="math notranslate nohighlight">\(\mathcal M\)</span> 로부터 후보 문서 <span class="math notranslate nohighlight">\(m\)</span> 를 가지고 관련성 점수<span class="math notranslate nohighlight">\(r(q, m)\)</span> 를 return 해준다.</p></li>
-<li><p>dense retriver 방법은 CLIP 기반인 bi-encoder 구조를 따랐다. (Karpukhin)</p>
-<ul>
-<li><p>멀티 모달 문서를 text / image 파트로 분리하고, 각각 CLIP 인코더(ViT-B-32)를 통해 인코딩을 한다.</p></li>
-<li><p>그 후 문서의 vector representation로써 두 개를 평균을 낸다.</p></li>
-</ul>
-</li>
-<li><p>최종 검색은 관련성 점수에 따라 정렬된 후보 문서 목록을 얻기 위해 Maximum Inner Product Search로 수행한다.</p></li>
-</ul>
-</li>
-<li><p>학습 때 generator를 위한 유용한 검색 문서를 추출하기 위해 세 가지 요소를 고려했다.</p>
-<ul>
-<li><p>relevance</p>
-<ul>
-<li><p>검색된 문서는 입력 sequence에 관련있어야 한다.</p></li>
-<li><p>CLIP 기반 dense retriever 점수를 사용한다.</p></li>
-</ul>
-</li>
-<li><p>modality</p>
-<ul>
-<li><p>이미지와 텍스트로 구성된 멀티 모달 문서로 검색 &gt; 이미지 또는 텍스트로 검색하는 것이다.</p></li>
-</ul>
-</li>
-<li><p>diversity</p>
-<ul>
-<li><p>다양성은 검색된 문서에서 중복성을 피하기 위한 필수적인 절차다.</p></li>
-<li><p>단순하게 관련성 점수에 기반해 top K 문서만 가져온다면 중복이 발생할 수 있다.</p>
-<ul>
-<li><p>또한 downstream pretraining 에 안좋은 영향을 끼칠 수 있다.</p></li>
-</ul>
-</li>
-<li><p>실제로, 관련성 점수가 0.9 이하로 검색된 문서로 사용했고,</p>
-<ul>
-<li><p>query dropout(검색에 사용된 쿼리의 일부 20% 토큰을 삭제)를 적용했다.</p></li>
-<li><p>따라서 다양성과 학습에 정규화를 시켰다.</p></li>
-</ul>
-</li>
-</ul>
-</li>
-</ul>
-</li>
-<li><p>이미지와 텍스트를 기반으로 각각 두 개의 문서를 검색한다.</p></li>
-<li><p>학습에서는 데이터셋의 모든 캡션-이미지 쌍에 대해 검색된 샘플 3개를 무작위로 선택한다.</p>
-<ul>
-<li><p>이는 사실상 사전 학습에서 사용할 수 있는 토큰 수의 4배이다.</p></li>
-</ul>
-</li>
-</ul>
-</section>
-</section>
-<section id="objective-function">
-<h2>2.2 Objective Function<a class="headerlink" href="#objective-function" title="Permalink to this heading">#</a></h2>
-<ul class="simple">
-<li><p>CM3 objective</p>
-<ul>
-<li><p>input</p>
-<ul>
-<li><p><code class="docutils literal notranslate"><span class="pre">&quot;Image</span> <span class="pre">of</span> <span class="pre">a</span> <span class="pre">chameleon:</span> <span class="pre">[image]&quot;</span></code> 을 변형시켜 <code class="docutils literal notranslate"><span class="pre">&quot;Image</span> <span class="pre">of</span> <span class="pre">&lt;mask&gt;:</span> <span class="pre">[image]</span> <span class="pre">&lt;infill&gt;</span> <span class="pre">a</span> <span class="pre">chameleon”</span></code> 로 표현한다.
-: <code class="docutils literal notranslate"><span class="pre">&lt;mask&gt;,</span> <span class="pre">&lt;infill&gt;</span></code> 이 추가되었고, 단어의 재배치가 진행됐다.</p></li>
-</ul>
-</li>
-<li><p>학습에는 일반적인 다음 토큰을 예측하는 loss를 사용했다.</p>
-<ul>
-<li><p>그 결과 이미지, 텍스트 둘 다 생성하는 다용도 모델의 결과를 가져왔다.</p></li>
-</ul>
-</li>
-<li><p>caption-to-image generation에서는 CM3가 “Image of a chameleon:” 프롬프트로 부터 이미지를 생성하고,</p>
-<ul>
-<li><p>image-to-caption generation에서는 CM3는 <code class="docutils literal notranslate"><span class="pre">“Image</span> <span class="pre">of</span> <span class="pre">&lt;mask&gt;:</span> <span class="pre">[image]</span> <span class="pre">&lt;infill&gt;”</span></code> 프롬프트를 활용한다.</p></li>
-</ul>
-</li>
-</ul>
-</li>
-</ul>
-</section>
-<section id="model">
-<h2>2.3 Model<a class="headerlink" href="#model" title="Permalink to this heading">#</a></h2>
-<ul class="simple">
-<li><p>CM3Leon 모델은 디코더만 사용하는 transformer 아키텍쳐를 사용한다.</p></li>
-<li><p>Zhang에 비해 bias term, dropout, layer norm의 학습 가능한 파라미터를 제거했다.</p></li>
-<li><p>sequence length를 2048 → 4096까지 확장했다.</p></li>
-<li><p>weight 초기화: 평균 0, 표준 편차 0.006 인 truncated(표준 편차 3으로 잘린) normal distribution 사용했다.</p></li>
-<li><p>output layer: 0으로 초기화, 0에 가까운 표준 편차 0.0002로 positional embedding 초기화한다.</p></li>
-<li><p><a class="reference external" href="https://github.com/facebookresearch/metaseq">Metaseq</a>로 학습됐다.</p></li>
-</ul>
-</section>
-<section id="training">
-<h2>2.4 Training<a class="headerlink" href="#training" title="Permalink to this heading">#</a></h2>
-<figure class="align-default" id="training-result">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/training_result.png"><img alt="training_result" class="bg-primary mb-1" src="../../_images/training_result.png" style="width: 600px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 307 </span><span class="caption-text">Training result</span><a class="headerlink" href="#training-result" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p>세 가지 모델 사이즈(350M, 760M, 7B)로 학습 진행했다. (→ 1.4T(Trillion), 1.9T, 2.4T tokens)</p>
-<ul>
-<li><p>주요한 하이퍼 파라미터는 learning rate, batch size로 멀티모달 scaling 에 맞게 설정했다.</p></li>
-</ul>
-</li>
-<li><p>참고</p>
-<ul>
-<li><p>Perplexity, PPL: 언어 모델의 평가 방법 중 하나이다. (헷갈리는 정도, 값이 낮을 수록 좋다.)</p></li>
-</ul>
-</li>
-</ul>
-</section>
-<section id="text-to-image-results">
-<h2>3. Text-To-Image Results<a class="headerlink" href="#text-to-image-results" title="Permalink to this heading">#</a></h2>
-<section id="importance-of-decoding-strategies">
-<h3>3.1 Importance of Decoding Strategies<a class="headerlink" href="#importance-of-decoding-strategies" title="Permalink to this heading">#</a></h3>
-<ul class="simple">
-<li><p>autoregressive T2I 모델에서 decoding 알고리즘에 대해 상당한 연구가 진행되어 왔다.</p>
-<ul>
-<li><p>그 중 DALL-E는 최종 아웃풋의 퀄리티가 향상되는 결과를 가져왔다.</p>
-<ul>
-<li><p>DALL-E 는 temperature 샘플링과 512개 후보 프롬프트에 CLIP re-ranking 전략을 채택했다.</p></li>
-</ul>
-</li>
-<li><p>PARTI 와 Make-A-Scene 과 같은 모델은 토큰 기반의 classifier-free guidance로, re-ranking에 대해 오직 16 개의 샘플만 필요하게 됨으로써 후보의 수를 줄였다.</p></li>
-</ul>
-</li>
-</ul>
-</section>
-<section id="temperatured-sampling">
-<h3>Temperatured Sampling<a class="headerlink" href="#temperatured-sampling" title="Permalink to this heading">#</a></h3>
-<ul class="simple">
-<li><p>autoregressive 모델에서 확률적 기술로 사용된다.</p>
-<ul>
-<li><p>이 방법은 샘플링에서 softmax의 temperature를 수정해 예측 무작위성을 제어한다.</p></li>
-<li><ul>
-<li><p>Classifier Free Guidance 적용했다.</p></li>
-</ul>
-</li>
-</ul>
-</li>
-</ul>
-</section>
-<section id="topp-sampling">
-<h3>TopP Sampling<a class="headerlink" href="#topp-sampling" title="Permalink to this heading">#</a></h3>
-<ul class="simple">
-<li><p>nucleus 샘플링으로도 불리고, 미리 정의한 임계값을 초과하는 누적 확률을 가진 가장 작은 상위 토큰 세트에서 샘플링을 포함한다.</p>
-<ul>
-<li><ul>
-<li><p>Classifier Free Guidance 적용했다.</p></li>
-</ul>
-</li>
-</ul>
-</li>
-</ul>
-</section>
-<section id="classifier-free-guidance-cfg">
-<h3>Classifier Free Guidance (CFG)<a class="headerlink" href="#classifier-free-guidance-cfg" title="Permalink to this heading">#</a></h3>
-<div class="math notranslate nohighlight">
-\[\begin{split}
-\begin{aligned}
-&amp; \operatorname{logits}_{\text {cond }}=T\left(t_y \mid t_x\right), \text { logits }_{\text {uncond }}=T\left(t_y \mid&lt;\bf { mask }&gt;\right) \\
-&amp; \operatorname{logits}_{\mathrm{cf}}=\operatorname{logits}_{\text {uncond }}+\alpha_c \cdot\left(\text { logits }_{\text {cond }}-\text { logits }_{\text {uncond }}\right)
-\end{aligned}
-\end{split}\]</div>
-<ul class="simple">
-<li><p>CFG는 unconditional 샘플을 conditional 샘플에 맞도록 하는 것을 의미한다.</p></li>
-<li><p>unconditional 샘플을 text를 CM3 목표의 마스크 토큰으로 대체한다.</p></li>
-<li><p>이는 CM3 목표를 사용한 학습의 핵심 이점 중 하나이며, finetuning 없이, classifier 없는 guidance를 수행할 수 있다.</p></li>
-<li><p>추론에서는 두 개의 토큰 stream을 생성한다.</p>
-<ul>
-<li><p>입력 텍스트에 따라 달라지는 토큰 stream과</p></li>
-<li><p>mask 토큰에 따라 condition된 unconditional 토큰 stream</p></li>
-</ul>
-</li>
-</ul>
-</section>
-<section id="contrastive-decoding-topk-cd-k">
-<h3>Contrastive Decoding TopK (CD-K)<a class="headerlink" href="#contrastive-decoding-topk-cd-k" title="Permalink to this heading">#</a></h3>
-<ul class="simple">
-<li><p>CFG에서 logit의 뺄셈 연산이 텍스트에서 contrastive decoding 방법의 log probability를 뺄셈하는 연산과 비슷하다.</p></li>
-</ul>
-</section>
-</section>
-<section id="quantitative-evaluation">
-<h2>3.2 Quantitative Evaluation<a class="headerlink" href="#quantitative-evaluation" title="Permalink to this heading">#</a></h2>
-<figure class="align-default" id="evaluation">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/evalution.png"><img alt="evalution" class="bg-primary mb-1" src="../../_images/evalution.png" style="width: 600px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 308 </span><span class="caption-text">Evaluation</span><a class="headerlink" href="#evaluation" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p>MS-COCO (30K) zero shot 예측, FID 측정했다.</p>
-<ul>
-<li><p>CM3Leon-7B 모델이 FID 4.88 점으로 가장 좋다.</p></li>
-</ul>
-</li>
-<li><p>retrieval-augmented decoder-only 모델의 효율성이 좋다.</p>
-<ul>
-<li><p>CM3Leon-7B 모델이 추론에서 1개/2개로 검색된 예제로 동작할 때 우수한 FID 점수를 기록했다.</p>
-<ul>
-<li><p>이는 고품질 이미지를 생성하는 능력을 확장시키는 검색의 중요성을 보여준다.</p></li>
-</ul>
-</li>
-</ul>
-</li>
-</ul>
-</section>
-<section id="supervised-fine-tuning">
-<h2>4. Supervised Fine-Tuning<a class="headerlink" href="#supervised-fine-tuning" title="Permalink to this heading">#</a></h2>
-<figure class="align-default" id="figure5">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/figure_51.png"><img alt="figure_5" class="bg-primary mb-1" src="../../_images/figure_51.png" style="width: 600px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 309 </span><span class="caption-text">Figure5</span><a class="headerlink" href="#figure5" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p>Supervised fine-tuning (SFT)는 LLM에서 중요한 학습 단계이다.</p>
-<ul>
-<li><p>명령어 또는 프롬프트를 잘 이해하는 것을 도와주며, zero shot task에서도 향상되는 결과를 얻었다.</p></li>
-</ul>
-</li>
-<li><p>명령어 튜닝이 다양한 task에 멀티모달 모델 성능을 눈에 띄게 증폭시키는 것을 발견했다.</p></li>
-<li><p>CM3Leon을 이미지와 텍스트 task를 섞어 넓은 범위에서 fine tuning 했다.</p></li>
-<li><p>finetuning 과정은 pretraining 단계를 따르며, task instruction과 출력을 결합해 동일한 CM3 objective를 사용한다.</p></li>
-</ul>
-<section id="instructable-image-generation">
-<h3>4.1 Instructable Image Generation<a class="headerlink" href="#instructable-image-generation" title="Permalink to this heading">#</a></h3>
-<figure class="align-default" id="figure6">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/figure_61.png"><img alt="figure_6" class="bg-primary mb-1" src="../../_images/figure_61.png" style="width: 600px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 310 </span><span class="caption-text">Figure6</span><a class="headerlink" href="#figure6" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</section>
-<section id="text-guided-image-editing">
-<h3>Text-Guided Image Editing<a class="headerlink" href="#text-guided-image-editing" title="Permalink to this heading">#</a></h3>
-<ul class="simple">
-<li><p>text instruction 에 기반한 initial image를 수정하는 task이다.</p></li>
-<li><p>InstructPix2Pix 방법 사용했다.</p></li>
-<li><p>예시: “하늘의 색을 파란색으로 변경해줘”와 같은 프롬프트로 이미지 편집이 가능하다.</p>
-<ul>
-<li><p>이것은 CM3leon이 텍스트와 이미지를 동시에 이해하고 있어서 가능하다.</p></li>
-</ul>
-</li>
-</ul>
-</section>
-<section id="image-to-image-grounded-generation">
-<h3>Image-to-Image Grounded Generation<a class="headerlink" href="#image-to-image-grounded-generation" title="Permalink to this heading">#</a></h3>
-<ul class="simple">
-<li><p>다양한 feature과 텍스트 프롬프트로 grounding image를 생산하는 task이다.</p></li>
-<li><p>ControlNet 적용했다.</p></li>
-</ul>
-</section>
-<section id="spatially-grounded-image-generation">
-<h3>Spatially Grounded Image Generation<a class="headerlink" href="#spatially-grounded-image-generation" title="Permalink to this heading">#</a></h3>
-<figure class="align-default" id="figure6-1">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/figure_6_1.png"><img alt="figure_6_1" class="bg-primary mb-1" src="../../_images/figure_6_1.png" style="width: 600px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 311 </span><span class="caption-text">Figure6-1</span><a class="headerlink" href="#figure6-1" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p>이미지 생성에 있어서 공간적 정보(위치)를 텍스트 프롬프트에 통합시킬 수 있도록 하는 task이다.</p></li>
-</ul>
-</section>
-<section id="image-captioning-visual-question-answering-task">
-<h3>Image captioning &amp; visual question answering task<a class="headerlink" href="#image-captioning-visual-question-answering-task" title="Permalink to this heading">#</a></h3>
-<figure class="align-default" id="figure16">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/figure_16.png"><img alt="figure_16" class="bg-primary mb-1" src="../../_images/figure_16.png" style="width: 600px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 312 </span><span class="caption-text">Figure16</span><a class="headerlink" href="#figure16" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p>Flamingo(1000억 토큰), OpenFlamingo(400억 토큰)에 비해 CM3leon(30억 토큰)은 적은 토큰임에도 불구하고, 동등한 성능을 달성했다.</p></li>
-</ul>
-</section>
-</section>
-</section>
-
-    <script type="text/x-thebe-config">
-    {
-        requestKernel: true,
-        binderOptions: {
-            repo: "binder-examples/jupyter-stacks-datascience",
-            ref: "master",
-        },
-        codeMirrorConfig: {
-            theme: "abcdef",
-            mode: "python"
-        },
-        kernelOptions: {
-            name: "python3",
-            path: "./docs/review"
-        },
-        predefinedOutput: true
-    }
-    </script>
-    <script>kernelName = 'python3'</script>
-
-                </article>
-              
-
-              
-              
-              
-              
-                <footer class="prev-next-footer d-print-none">
-                  
-<div class="prev-next-area">
-    <a class="left-prev"
-       href="HyperDreamBooth.html"
-       title="previous page">
-      <i class="fa-solid fa-angle-left"></i>
-      <div class="prev-next-info">
-        <p class="prev-next-subtitle">previous</p>
-        <p class="prev-next-title">HyperDreamBooth</p>
-      </div>
-    </a>
-    <a class="right-next"
-       href="Synthetic_Data_from_Diffusion_Models_Improves_ImageNet_Classification.html"
-       title="next page">
-      <div class="prev-next-info">
-        <p class="prev-next-subtitle">next</p>
-        <p class="prev-next-title">Synthetic Data from Diffusion Models Improves ImageNet Classification</p>
-      </div>
-      <i class="fa-solid fa-angle-right"></i>
-    </a>
-</div>
-                </footer>
-              
-            </div>
-            
-            
-              
-                <div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
-
-
-  <div class="sidebar-secondary-item">
-  <div class="page-toc tocsection onthispage">
-    <i class="fa-solid fa-list"></i> Contents
-  </div>
-  <nav class="bd-toc-nav page-toc">
-    <ul class="visible nav section-nav flex-column">
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#">CM3leon</a><ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#abstract-1-introduction">Abstract &amp; 1. Introduction</a></li>
-</ul>
-</li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#pretraining">2. Pretraining</a><ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#data">2.1 Data</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#image-tokenization">Image Tokenization</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#retrieval-augmentation">Retrieval Augmentation</a></li>
-</ul>
-</li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#objective-function">2.2 Objective Function</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#model">2.3 Model</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#training">2.4 Training</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#text-to-image-results">3. Text-To-Image Results</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#importance-of-decoding-strategies">3.1 Importance of Decoding Strategies</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#temperatured-sampling">Temperatured Sampling</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#topp-sampling">TopP Sampling</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#classifier-free-guidance-cfg">Classifier Free Guidance (CFG)</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#contrastive-decoding-topk-cd-k">Contrastive Decoding TopK (CD-K)</a></li>
-</ul>
-</li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#quantitative-evaluation">3.2 Quantitative Evaluation</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#supervised-fine-tuning">4. Supervised Fine-Tuning</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#instructable-image-generation">4.1 Instructable Image Generation</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#text-guided-image-editing">Text-Guided Image Editing</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#image-to-image-grounded-generation">Image-to-Image Grounded Generation</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#spatially-grounded-image-generation">Spatially Grounded Image Generation</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#image-captioning-visual-question-answering-task">Image captioning &amp; visual question answering task</a></li>
-</ul>
-</li>
-</ul>
-</li>
-</ul>
-
-  </nav></div>
-
-</div></div>
-              
-            
-          </div>
-          <footer class="bd-footer-content">
-            
-<div class="bd-footer-content__inner container">
-  
-  <div class="footer-item">
-    
-<p class="component-author">
-By PseudoLab
-</p>
-
-  </div>
-  
-  <div class="footer-item">
-    
-
-  <p class="copyright">
-    
-      © Copyright 2022.
-      <br/>
-    
-  </p>
-
-  </div>
-  
-  <div class="footer-item">
-    
-  </div>
-  
-  <div class="footer-item">
-    
-  </div>
-  
-</div>
-          </footer>
-        
-
-      </main>
-    </div>
-  </div>
-  
-  <!-- Scripts loaded after <body> so the DOM is not blocked -->
-  <script src="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b"></script>
-<script src="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b"></script>
-
-  <footer class="bd-footer">
-  </footer>
-  </body>
+
+
+<!DOCTYPE html>
+
+
+<html lang="en" data-content_root="" >
+
+  <head>
+    <meta charset="utf-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.17.1: http://docutils.sourceforge.net/" />
+
+    <title>CM3leon &#8212; Text-to-Image Generation-feat-Diffusion</title>
+  
+  
+  
+  <script data-cfasync="false">
+    document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
+    document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
+  </script>
+  
+  <!-- Loaded before other Sphinx assets -->
+  <link href="../../_static/styles/theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+<link href="../../_static/styles/bootstrap.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+<link href="../../_static/styles/pydata-sphinx-theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+
+  
+  <link href="../../_static/vendor/fontawesome/6.5.2/css/all.min.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+  <link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.woff2" />
+<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.woff2" />
+<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.woff2" />
+
+    <link rel="stylesheet" type="text/css" href="../../_static/pygments.css" />
+    <link rel="stylesheet" href="../../_static/styles/sphinx-book-theme.css?digest=14f4ca6b54d191a8c7657f6c759bf11a5fb86285" type="text/css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/togglebutton.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/mystnb.4510f1fc1dee50b3e5859aac5469c37c29e427902b24a333a5f9fcb2f0b3ac41.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/sphinx-thebe.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/design-style.4045f2051d55cab465a707391d5b2007.min.css" />
+  
+  <!-- Pre-loaded scripts that we'll load fully later -->
+  <link rel="preload" as="script" href="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b" />
+<link rel="preload" as="script" href="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b" />
+  <script src="../../_static/vendor/fontawesome/6.5.2/js/all.min.js?digest=dfe6caa3a7d634c4db9b"></script>
+
+    <script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
+    <script src="../../_static/jquery.js"></script>
+    <script src="../../_static/underscore.js"></script>
+    <script src="../../_static/_sphinx_javascript_frameworks_compat.js"></script>
+    <script src="../../_static/doctools.js"></script>
+    <script src="../../_static/clipboard.min.js"></script>
+    <script src="../../_static/copybutton.js"></script>
+    <script src="../../_static/scripts/sphinx-book-theme.js?digest=5a5c038af52cf7bc1a1ec88eea08e6366ee68824"></script>
+    <script>let toggleHintShow = 'Click to show';</script>
+    <script>let toggleHintHide = 'Click to hide';</script>
+    <script>let toggleOpenOnPrint = 'true';</script>
+    <script src="../../_static/togglebutton.js"></script>
+    <script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
+    <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
+    <script src="../../_static/design-tabs.js"></script>
+    <script>const THEBE_JS_URL = "https://unpkg.com/thebe@0.8.2/lib/index.js"
+const thebe_selector = ".thebe,.cell"
+const thebe_selector_input = "pre"
+const thebe_selector_output = ".output, .cell_output"
+</script>
+    <script async="async" src="../../_static/sphinx-thebe.js"></script>
+    <script>window.MathJax = {"options": {"processHtmlClass": "tex2jax_process|mathjax_process|math|output_area"}}</script>
+    <script defer="defer" src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
+    <script>DOCUMENTATION_OPTIONS.pagename = 'docs/review/CM3leon';</script>
+    <link rel="shortcut icon" href="../../_static/PseudoLab_logo.png"/>
+    <link rel="index" title="Index" href="../../genindex.html" />
+    <link rel="search" title="Search" href="../../search.html" />
+    <link rel="next" title="Synthetic Data from Diffusion Models Improves ImageNet Classification" href="Synthetic_Data_from_Diffusion_Models_Improves_ImageNet_Classification.html" />
+    <link rel="prev" title="HyperDreamBooth" href="HyperDreamBooth.html" />
+  <meta name="viewport" content="width=device-width, initial-scale=1"/>
+  <meta name="docsearch:language" content="en"/>
+  </head>
+  
+  
+  <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
+
+  
+  
+  <div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
+  
+  <div id="pst-scroll-pixel-helper"></div>
+  
+  <button type="button" class="btn rounded-pill" id="pst-back-to-top">
+    <i class="fa-solid fa-arrow-up"></i>Back to top</button>
+
+  
+  <input type="checkbox"
+          class="sidebar-toggle"
+          id="pst-primary-sidebar-checkbox"/>
+  <label class="overlay overlay-primary" for="pst-primary-sidebar-checkbox"></label>
+  
+  <input type="checkbox"
+          class="sidebar-toggle"
+          id="pst-secondary-sidebar-checkbox"/>
+  <label class="overlay overlay-secondary" for="pst-secondary-sidebar-checkbox"></label>
+  
+  <div class="search-button__wrapper">
+    <div class="search-button__overlay"></div>
+    <div class="search-button__search-container">
+<form class="bd-search d-flex align-items-center"
+      action="../../search.html"
+      method="get">
+  <i class="fa-solid fa-magnifying-glass"></i>
+  <input type="search"
+         class="form-control"
+         name="q"
+         id="search-input"
+         placeholder="Search this book..."
+         aria-label="Search this book..."
+         autocomplete="off"
+         autocorrect="off"
+         autocapitalize="off"
+         spellcheck="false"/>
+  <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
+</form></div>
+  </div>
+
+  <div class="pst-async-banner-revealer d-none">
+  <aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
+</div>
+
+  
+    <header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
+    </header>
+  
+
+  <div class="bd-container">
+    <div class="bd-container__inner bd-page-width">
+      
+      
+      
+      <div class="bd-sidebar-primary bd-sidebar">
+        
+
+  
+  <div class="sidebar-header-items sidebar-primary__section">
+    
+    
+    
+    
+  </div>
+  
+    <div class="sidebar-primary-items__start sidebar-primary__section">
+        <div class="sidebar-primary-item">
+
+  
+    
+  
+
+<a class="navbar-brand logo" href="../../intro.html">
+  
+  
+  
+  
+  
+    
+    
+      
+    
+    
+    <img src="../../_static/PseudoLab_logo.png" class="logo__image only-light" alt="Text-to-Image Generation-feat-Diffusion - Home"/>
+    <script>document.write(`<img src="../../_static/PseudoLab_logo.png" class="logo__image only-dark" alt="Text-to-Image Generation-feat-Diffusion - Home"/>`);</script>
+  
+  
+</a></div>
+        <div class="sidebar-primary-item"><nav class="bd-links" id="bd-docs-nav" aria-label="Main">
+    <div class="bd-toc-item navbar-nav active">
+        
+        <ul class="nav bd-sidenav bd-sidenav__home-link">
+            <li class="toctree-l1">
+                <a class="reference internal" href="../../intro.html">
+                    Welcome to PseudoDiffusers!!
+                </a>
+            </li>
+        </ul>
+        <p aria-level="2" class="caption" role="heading"><span class="caption-text">Preliminary Works</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="vae.html">VAE</a></li>
+<li class="toctree-l1"><a class="reference internal" href="gan.html">GAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DDPM.html">DDPM</a></li>
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DDIM.html">DDIM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="A_Study_on_the_Evaluation_of_Generative_Models.html">A Study on the Evaluation of Generative Models</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Image Generation</span></p>
+<ul class="current nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="cycleGAN.html">CycleGAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="StyleGAN.html">StyleGAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="diffusion_beats_GANs.html">Diffusion Models Beat GANs on Image Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="dalle.html">DALL-E</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DALLE2.html">DALL-E 2</a></li>
+<li class="toctree-l1"><a class="reference internal" href="dreambooth.html">DreamBooth</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ControlNet.html">ControlNet</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Latent_Diffusion_Model.html">Introduction</a></li>
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Textual_Inversion.html">Textual Inversion</a></li>
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="CustomDiffusion.html">Custom Diffusion</a></li>
+<li class="toctree-l1"><a class="reference internal" href="LoRA.html">LoRA</a></li>
+
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="I-DDPM.html">I-DDPM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="StyO.html">StyO</a></li>
+<li class="toctree-l1"><a class="reference internal" href="imagen.html">Imagen</a></li>
+<li class="toctree-l1"><a class="reference internal" href="imagen_editor.html">Imagen Editor</a></li>
+<li class="toctree-l1"><a class="reference internal" href="SDEdit.html">SDEdit</a></li>
+<li class="toctree-l1"><a class="reference internal" href="SDXL.html">SDXL</a></li>
+<li class="toctree-l1"><a class="reference internal" href="t2i_adapter.html">T2I-Adapter</a></li>
+<li class="toctree-l1"><a class="reference internal" href="IP_Adapter.html">IP-Adapter</a></li>
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="HyperDreamBooth.html">HyperDreamBooth</a></li>
+<li class="toctree-l1 current active"><a class="current reference internal" href="#">CM3leon</a></li>
+
+<li class="toctree-l1"><a class="reference internal" href="Synthetic_Data_from_Diffusion_Models_Improves_ImageNet_Classification.html">Synthetic Data from Diffusion Models Improves ImageNet Classification</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="GLIDE.html">GLIDE</a></li>
+<li class="toctree-l1"><a class="reference internal" href="BBDM.html">BBDM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Your_Diffusion_Model_is_Secretly_a_Zero_Shot_Classifier.html">Your Diffusion Model is Secretly a Zero-Shot Classifier</a></li>
+<li class="toctree-l1"><a class="reference internal" href="progressive_distillation.html">Progressive Distillation for Fast Sampling of Diffusion Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ConceptLab.html">ConceptLab</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Diffusion_models_already_have_a_Semantic_Latent_Space.html">Diffusion Models already have a Semantic Latent Space</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Muse.html">Muse</a></li>
+
+
+<li class="toctree-l1"><a class="reference internal" href="GIGAGAN.html">Scaling up GANs for Text-to-Image Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="consistency_models.html">Consistency Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="latent_consistency_models.html">Latent Consistency Models</a></li>
+
+<li class="toctree-l1"><a class="reference internal" href="LLM_grounded_Diffusion.html">LLM Grounded Diffusion</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DiT.html">DiT</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="one-step-image-translation.html">One-Step Image Translation with Text-to-Image Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="LCM-LoRA.html">LCM-LoRA: A Universal Stable-Diffusion Acceleration Module</a></li>
+
+
+<li class="toctree-l1"><a class="reference internal" href="MimicBrush.html">MimicBrush: Zero-shot Image Editing with Reference Imitation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="one_step_diffusion_with_distribution_matching_distillation.html">One-step Diffusion with Distribution Matching Distillation</a></li>
+
+
+
+
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Video Generation</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="Make_A_Video.html">Make A Video</a></li>
+<li class="toctree-l1"><a class="reference internal" href="VideoLDM.html">VideoLDM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="AnimateDiff.html">AnimateDiff</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Animate_Anyone.html">Animate Anyone</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreaMoving.html">DreaMoving</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreamPose.html">DreamPose: Fashion Image-to-Video Synthesis via Stable Diffusion</a></li>
+
+
+
+
+
+
+
+
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">3D Generation</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="NeRF.html">NeRF : Representing Scenes as Neural Radiance Fields for View Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="3DGS.html">3D Gaussian Splatting for Real-Time Radiance Field Rendering</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Point_E.html">Point-E: A System for Generating 3D Point Clouds from Complex Prompts (Arxiv 2022)</a></li>
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Shap-E.html">Shap-E</a></li>
+
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DreamFusion.html"><strong>DreamFusion</strong></a></li>
+<li class="toctree-l1"><a class="reference internal" href="magic-3d.html">Magic3D</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreamBooth3D.html">Dream Booth 3D</a></li>
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="zero123plus.html">Zero123++</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ProlificDreamer.html">ProlificDreamer: High-Fidelity and Diverse Text-to-3D Generation with Variational Score Distillation</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DreamGaussian.html">DreamGaussian</a></li>
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Coin3D.html">Coin3D</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Experiments</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="../experiments/js_exp.html">Synthetic Data with Stable Diffusion for Foliar Disease Classification</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../experiments/swjo_exp.html">Training DreamBooth on Naver Webtoon Face Dataset</a></li>
+</ul>
+
+    </div>
+</nav></div>
+    </div>
+  
+  
+  <div class="sidebar-primary-items__end sidebar-primary__section">
+  </div>
+  
+  <div id="rtd-footer-container"></div>
+
+
+      </div>
+      
+      <main id="main-content" class="bd-main" role="main">
+        
+        
+
+<div class="sbt-scroll-pixel-helper"></div>
+
+          <div class="bd-content">
+            <div class="bd-article-container">
+              
+              <div class="bd-header-article d-print-none">
+<div class="header-article-items header-article__inner">
+  
+    <div class="header-article-items__start">
+      
+        <div class="header-article-item"><label class="sidebar-toggle primary-toggle btn btn-sm" for="__primary" title="Toggle primary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
+  <span class="fa-solid fa-bars"></span>
+</label></div>
+      
+    </div>
+  
+  
+    <div class="header-article-items__end">
+      
+        <div class="header-article-item">
+
+<div class="article-header-buttons">
+
+
+
+
+
+<div class="dropdown dropdown-source-buttons">
+  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Source repositories">
+    <i class="fab fa-github"></i>
+  </button>
+  <ul class="dropdown-menu">
+      
+      
+      
+      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion" target="_blank"
+   class="btn btn-sm btn-source-repository-button dropdown-item"
+   title="Source repository"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fab fa-github"></i>
+  </span>
+<span class="btn__text-container">Repository</span>
+</a>
+</li>
+      
+      
+      
+      
+      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion/issues/new?title=Issue%20on%20page%20%2Fdocs/review/CM3leon.html&body=Your%20issue%20content%20here." target="_blank"
+   class="btn btn-sm btn-source-issues-button dropdown-item"
+   title="Open an issue"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-lightbulb"></i>
+  </span>
+<span class="btn__text-container">Open issue</span>
+</a>
+</li>
+      
+  </ul>
+</div>
+
+
+
+
+
+
+<div class="dropdown dropdown-download-buttons">
+  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Download this page">
+    <i class="fas fa-download"></i>
+  </button>
+  <ul class="dropdown-menu">
+      
+      
+      
+      <li><a href="../../_sources/docs/review/CM3leon.md" target="_blank"
+   class="btn btn-sm btn-download-source-button dropdown-item"
+   title="Download source file"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-file"></i>
+  </span>
+<span class="btn__text-container">.md</span>
+</a>
+</li>
+      
+      
+      
+      
+      <li>
+<button onclick="window.print()"
+  class="btn btn-sm btn-download-pdf-button dropdown-item"
+  title="Print to PDF"
+  data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-file-pdf"></i>
+  </span>
+<span class="btn__text-container">.pdf</span>
+</button>
+</li>
+      
+  </ul>
+</div>
+
+
+
+
+<button onclick="toggleFullScreen()"
+  class="btn btn-sm btn-fullscreen-button"
+  title="Fullscreen mode"
+  data-bs-placement="bottom" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-expand"></i>
+  </span>
+
+</button>
+
+
+
+<script>
+document.write(`
+  <button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light"></i>
+    <i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark"></i>
+    <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"></i>
+  </button>
+`);
+</script>
+
+
+<script>
+document.write(`
+  <button class="btn btn-sm pst-navbar-icon search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <i class="fa-solid fa-magnifying-glass fa-lg"></i>
+  </button>
+`);
+</script>
+<label class="sidebar-toggle secondary-toggle btn btn-sm" for="__secondary"title="Toggle secondary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <span class="fa-solid fa-list"></span>
+</label>
+</div></div>
+      
+    </div>
+  
+</div>
+</div>
+              
+              
+
+<div id="jb-print-docs-body" class="onlyprint">
+    <h1>CM3leon</h1>
+    <!-- Table of contents -->
+    <div id="print-main-content">
+        <div id="jb-print-toc">
+            
+            <div>
+                <h2> Contents </h2>
+            </div>
+            <nav aria-label="Page">
+                <ul class="visible nav section-nav flex-column">
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#">CM3leon</a><ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#abstract-1-introduction">Abstract &amp; 1. Introduction</a></li>
+</ul>
+</li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#pretraining">2. Pretraining</a><ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#data">2.1 Data</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#image-tokenization">Image Tokenization</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#retrieval-augmentation">Retrieval Augmentation</a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#objective-function">2.2 Objective Function</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#model">2.3 Model</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#training">2.4 Training</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#text-to-image-results">3. Text-To-Image Results</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#importance-of-decoding-strategies">3.1 Importance of Decoding Strategies</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#temperatured-sampling">Temperatured Sampling</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#topp-sampling">TopP Sampling</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#classifier-free-guidance-cfg">Classifier Free Guidance (CFG)</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#contrastive-decoding-topk-cd-k">Contrastive Decoding TopK (CD-K)</a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#quantitative-evaluation">3.2 Quantitative Evaluation</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#supervised-fine-tuning">4. Supervised Fine-Tuning</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#instructable-image-generation">4.1 Instructable Image Generation</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#text-guided-image-editing">Text-Guided Image Editing</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#image-to-image-grounded-generation">Image-to-Image Grounded Generation</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#spatially-grounded-image-generation">Spatially Grounded Image Generation</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#image-captioning-visual-question-answering-task">Image captioning &amp; visual question answering task</a></li>
+</ul>
+</li>
+</ul>
+</li>
+</ul>
+
+            </nav>
+        </div>
+    </div>
+</div>
+
+              
+                
+<div id="searchbox"></div>
+                <article class="bd-article">
+                  
+  <div class="admonition-information admonition">
+<p class="admonition-title">Information</p>
+<ul class="simple">
+<li><p><strong>Title:</strong> Scaling Autoregressive Multi-Modal Models: Pretraining and Instruction Tuning</p></li>
+<li><p><strong>Reference</strong></p>
+<ul>
+<li><p>Paper:  <a class="reference external" href="https://scontent-gmp1-1.xx.fbcdn.net/v/t39.2365-6/358725877_789390529544546_1176484804732743296_n.pdf?_nc_cat=108&amp;ccb=1-7&amp;_nc_sid=3c67a6&amp;_nc_ohc=PLfU_UR_vYAAX_NagU8&amp;_nc_ht=scontent-gmp1-1.xx&amp;oh=00_AfDrHAHXv1PcF0LqicjIYnmOrpVCGEQ0eMv5_Ve2_Tncvg&amp;oe=652FF632">https://scontent-gmp1-1.xx.fbcdn.net/v/t39.2365-6/358725877_789390529544546_1176484804732743296_n.pdf?_nc_cat=108&amp;ccb=1-7&amp;_nc_sid=3c67a6&amp;_nc_ohc=PLfU_UR_vYAAX_NagU8&amp;_nc_ht=scontent-gmp1-1.xx&amp;oh=00_AfDrHAHXv1PcF0LqicjIYnmOrpVCGEQ0eMv5_Ve2_Tncvg&amp;oe=652FF632</a></p></li>
+<li><p>Code: X</p></li>
+</ul>
+</li>
+<li><p><strong>Author:</strong> Jun-Hyoung Lee</p></li>
+<li><p><strong>Last updated on Oct. 15. 2023</strong></p></li>
+</ul>
+</div>
+<section class="tex2jax_ignore mathjax_ignore" id="cm3leon">
+<h1>CM3leon<a class="headerlink" href="#cm3leon" title="Permalink to this heading">#</a></h1>
+<figure class="align-default" id="cm3leon-result">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/cm3leon_result.png"><img alt="cm3leon_result" class="bg-primary mb-1" src="../../_images/cm3leon_result.png" style="width: 600px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 305 </span><span class="caption-text">CM3leon result</span><a class="headerlink" href="#cm3leon-result" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p>복잡하게 구성된 객체(손, 텍스트)도 잘 생성한다.</p></li>
+</ul>
+<section id="abstract-1-introduction">
+<h2>Abstract &amp; 1. Introduction<a class="headerlink" href="#abstract-1-introduction" title="Permalink to this heading">#</a></h2>
+<ul class="simple">
+<li><p>CM3Leon</p>
+<ul>
+<li><p>텍스트와 이미지 둘 다 잘 생성하는 능력을 가진 검색-증강, 토큰 기반, 디코더 전용 멀티 모달 모델이다.</p></li>
+<li><p>CM3 멀티 모델 아키텍처를 사용하며 scaling up 및 다양한 구조적-스타일 데이터에 tunning 할 수 있는 능력을 가졌다.</p></li>
+</ul>
+</li>
+<li><p>Training</p>
+<ul>
+<li><p>처음에는 멀티 모달 모델을 “텍스트 기반” language 모델에 맞도록 학습했다. (large scale의 검색 증강 pretraining 단계를 포함한다.)</p>
+<ul>
+<li><p>데이터는 라이센스가 있는 Shutterstock의 large-scale로 학습한다.</p></li>
+</ul>
+</li>
+<li><p>그 후 supervised fine tuning (SFT) 단계로 진행했다.</p>
+<ul>
+<li><p>입력과 출력 모두 이미지와 텍스트 토큰을 섞을 수 있다.</p></li>
+</ul>
+</li>
+</ul>
+</li>
+<li><p>기존 이미지 생성 모델은 텍스트 프롬프트에 맞는 이미지만 잘 생성하는데,</p>
+<ul>
+<li><p>CM3leon은 텍스트와 이미지 모두 잘 생성한다.</p>
+<ul>
+<li><p>이미지 생성</p>
+<ul>
+<li><p>고해상도 output을 생성할 수 있는 self-contained contrastive decoding 방법을 소개한다.</p></li>
+<li><p>text guided iamge editing 부터 image controlled generation, segmentation까지 가능하다.</p></li>
+</ul>
+</li>
+<li><p>텍스트 생성</p>
+<ul>
+<li><p>Shutterstock의 3억 개의 텍스트 토큰으로 학습했는데, image-to-text generation도 잘 수행한다.</p></li>
+</ul>
+</li>
+</ul>
+</li>
+</ul>
+</li>
+<li><p>학습 연산을 5배로 줄였다.</p></li>
+<li><p>zero shot COCO로 FID를 측정한 결과 4.88 점으로, Google의 Parti 모델의 성능과 비슷한 수준을 달성했다.</p></li>
+</ul>
+</section>
+</section>
+<section class="tex2jax_ignore mathjax_ignore" id="pretraining">
+<h1>2. Pretraining<a class="headerlink" href="#pretraining" title="Permalink to this heading">#</a></h1>
+<ul class="simple">
+<li><p>RA-CM3를 기반으로 T2I 도메인에서 토큰 기반 디코더 모델의 잠재력을 연구했다.</p></li>
+</ul>
+<section id="data">
+<h2>2.1 Data<a class="headerlink" href="#data" title="Permalink to this heading">#</a></h2>
+<section id="image-tokenization">
+<h3>Image Tokenization<a class="headerlink" href="#image-tokenization" title="Permalink to this heading">#</a></h3>
+<ul class="simple">
+<li><p>Gafni의 image tokenizer를 사용했다.</p>
+<ul>
+<li><p>이 tokenizer는 256x256 이미지를 8192개의 vocabulary에서 1024개의 토큰으로 인코딩을 진행한다.</p></li>
+</ul>
+</li>
+<li><p>텍스트에서는, Zhang의 커스텀 tokenizer(56320 vocabulary size)를 학습했다.</p></li>
+<li><p>추가로, 새로운 스페셜한 토큰인 **<code class="docutils literal notranslate"><span class="pre">&lt;break&gt;</span></code>**을 소개한다.</p></li>
+</ul>
+<figure class="align-default" id="figure-8-9">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/figure_8_9.png"><img alt="figure_8_9" class="bg-primary mb-1" src="../../_images/figure_8_9.png" style="width: 600px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 306 </span><span class="caption-text">Figure_8_9</span><a class="headerlink" href="#figure-8-9" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<div class="highlight-none notranslate"><div class="highlight"><pre><span></span>-   이는 modality간 transition을 하게 한다.
+</pre></div>
+</div>
+</section>
+<section id="retrieval-augmentation">
+<h3>Retrieval Augmentation<a class="headerlink" href="#retrieval-augmentation" title="Permalink to this heading">#</a></h3>
+<ul class="simple">
+<li><p>목적: 입력 sequence에 맞춰 관련성이 높고 다양한 멀티 모달 문서(from memory bank)를 검색하는 것이다.</p>
+<ul>
+<li><p>dense retriever 와 retrieval strategy을 포함하고 있다.</p></li>
+</ul>
+</li>
+<li><p>dense retriever</p>
+<ul>
+<li><p>쿼리 <span class="math notranslate nohighlight">\(q\)</span> (예: input sequence)와 memory bank <span class="math notranslate nohighlight">\(\mathcal M\)</span> 로부터 후보 문서 <span class="math notranslate nohighlight">\(m\)</span> 를 가지고 관련성 점수<span class="math notranslate nohighlight">\(r(q, m)\)</span> 를 return 해준다.</p></li>
+<li><p>dense retriver 방법은 CLIP 기반인 bi-encoder 구조를 따랐다. (Karpukhin)</p>
+<ul>
+<li><p>멀티 모달 문서를 text / image 파트로 분리하고, 각각 CLIP 인코더(ViT-B-32)를 통해 인코딩을 한다.</p></li>
+<li><p>그 후 문서의 vector representation로써 두 개를 평균을 낸다.</p></li>
+</ul>
+</li>
+<li><p>최종 검색은 관련성 점수에 따라 정렬된 후보 문서 목록을 얻기 위해 Maximum Inner Product Search로 수행한다.</p></li>
+</ul>
+</li>
+<li><p>학습 때 generator를 위한 유용한 검색 문서를 추출하기 위해 세 가지 요소를 고려했다.</p>
+<ul>
+<li><p>relevance</p>
+<ul>
+<li><p>검색된 문서는 입력 sequence에 관련있어야 한다.</p></li>
+<li><p>CLIP 기반 dense retriever 점수를 사용한다.</p></li>
+</ul>
+</li>
+<li><p>modality</p>
+<ul>
+<li><p>이미지와 텍스트로 구성된 멀티 모달 문서로 검색 &gt; 이미지 또는 텍스트로 검색하는 것이다.</p></li>
+</ul>
+</li>
+<li><p>diversity</p>
+<ul>
+<li><p>다양성은 검색된 문서에서 중복성을 피하기 위한 필수적인 절차다.</p></li>
+<li><p>단순하게 관련성 점수에 기반해 top K 문서만 가져온다면 중복이 발생할 수 있다.</p>
+<ul>
+<li><p>또한 downstream pretraining 에 안좋은 영향을 끼칠 수 있다.</p></li>
+</ul>
+</li>
+<li><p>실제로, 관련성 점수가 0.9 이하로 검색된 문서로 사용했고,</p>
+<ul>
+<li><p>query dropout(검색에 사용된 쿼리의 일부 20% 토큰을 삭제)를 적용했다.</p></li>
+<li><p>따라서 다양성과 학습에 정규화를 시켰다.</p></li>
+</ul>
+</li>
+</ul>
+</li>
+</ul>
+</li>
+<li><p>이미지와 텍스트를 기반으로 각각 두 개의 문서를 검색한다.</p></li>
+<li><p>학습에서는 데이터셋의 모든 캡션-이미지 쌍에 대해 검색된 샘플 3개를 무작위로 선택한다.</p>
+<ul>
+<li><p>이는 사실상 사전 학습에서 사용할 수 있는 토큰 수의 4배이다.</p></li>
+</ul>
+</li>
+</ul>
+</section>
+</section>
+<section id="objective-function">
+<h2>2.2 Objective Function<a class="headerlink" href="#objective-function" title="Permalink to this heading">#</a></h2>
+<ul class="simple">
+<li><p>CM3 objective</p>
+<ul>
+<li><p>input</p>
+<ul>
+<li><p><code class="docutils literal notranslate"><span class="pre">&quot;Image</span> <span class="pre">of</span> <span class="pre">a</span> <span class="pre">chameleon:</span> <span class="pre">[image]&quot;</span></code> 을 변형시켜 <code class="docutils literal notranslate"><span class="pre">&quot;Image</span> <span class="pre">of</span> <span class="pre">&lt;mask&gt;:</span> <span class="pre">[image]</span> <span class="pre">&lt;infill&gt;</span> <span class="pre">a</span> <span class="pre">chameleon”</span></code> 로 표현한다.
+: <code class="docutils literal notranslate"><span class="pre">&lt;mask&gt;,</span> <span class="pre">&lt;infill&gt;</span></code> 이 추가되었고, 단어의 재배치가 진행됐다.</p></li>
+</ul>
+</li>
+<li><p>학습에는 일반적인 다음 토큰을 예측하는 loss를 사용했다.</p>
+<ul>
+<li><p>그 결과 이미지, 텍스트 둘 다 생성하는 다용도 모델의 결과를 가져왔다.</p></li>
+</ul>
+</li>
+<li><p>caption-to-image generation에서는 CM3가 “Image of a chameleon:” 프롬프트로 부터 이미지를 생성하고,</p>
+<ul>
+<li><p>image-to-caption generation에서는 CM3는 <code class="docutils literal notranslate"><span class="pre">“Image</span> <span class="pre">of</span> <span class="pre">&lt;mask&gt;:</span> <span class="pre">[image]</span> <span class="pre">&lt;infill&gt;”</span></code> 프롬프트를 활용한다.</p></li>
+</ul>
+</li>
+</ul>
+</li>
+</ul>
+</section>
+<section id="model">
+<h2>2.3 Model<a class="headerlink" href="#model" title="Permalink to this heading">#</a></h2>
+<ul class="simple">
+<li><p>CM3Leon 모델은 디코더만 사용하는 transformer 아키텍쳐를 사용한다.</p></li>
+<li><p>Zhang에 비해 bias term, dropout, layer norm의 학습 가능한 파라미터를 제거했다.</p></li>
+<li><p>sequence length를 2048 → 4096까지 확장했다.</p></li>
+<li><p>weight 초기화: 평균 0, 표준 편차 0.006 인 truncated(표준 편차 3으로 잘린) normal distribution 사용했다.</p></li>
+<li><p>output layer: 0으로 초기화, 0에 가까운 표준 편차 0.0002로 positional embedding 초기화한다.</p></li>
+<li><p><a class="reference external" href="https://github.com/facebookresearch/metaseq">Metaseq</a>로 학습됐다.</p></li>
+</ul>
+</section>
+<section id="training">
+<h2>2.4 Training<a class="headerlink" href="#training" title="Permalink to this heading">#</a></h2>
+<figure class="align-default" id="training-result">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/training_result.png"><img alt="training_result" class="bg-primary mb-1" src="../../_images/training_result.png" style="width: 600px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 307 </span><span class="caption-text">Training result</span><a class="headerlink" href="#training-result" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p>세 가지 모델 사이즈(350M, 760M, 7B)로 학습 진행했다. (→ 1.4T(Trillion), 1.9T, 2.4T tokens)</p>
+<ul>
+<li><p>주요한 하이퍼 파라미터는 learning rate, batch size로 멀티모달 scaling 에 맞게 설정했다.</p></li>
+</ul>
+</li>
+<li><p>참고</p>
+<ul>
+<li><p>Perplexity, PPL: 언어 모델의 평가 방법 중 하나이다. (헷갈리는 정도, 값이 낮을 수록 좋다.)</p></li>
+</ul>
+</li>
+</ul>
+</section>
+<section id="text-to-image-results">
+<h2>3. Text-To-Image Results<a class="headerlink" href="#text-to-image-results" title="Permalink to this heading">#</a></h2>
+<section id="importance-of-decoding-strategies">
+<h3>3.1 Importance of Decoding Strategies<a class="headerlink" href="#importance-of-decoding-strategies" title="Permalink to this heading">#</a></h3>
+<ul class="simple">
+<li><p>autoregressive T2I 모델에서 decoding 알고리즘에 대해 상당한 연구가 진행되어 왔다.</p>
+<ul>
+<li><p>그 중 DALL-E는 최종 아웃풋의 퀄리티가 향상되는 결과를 가져왔다.</p>
+<ul>
+<li><p>DALL-E 는 temperature 샘플링과 512개 후보 프롬프트에 CLIP re-ranking 전략을 채택했다.</p></li>
+</ul>
+</li>
+<li><p>PARTI 와 Make-A-Scene 과 같은 모델은 토큰 기반의 classifier-free guidance로, re-ranking에 대해 오직 16 개의 샘플만 필요하게 됨으로써 후보의 수를 줄였다.</p></li>
+</ul>
+</li>
+</ul>
+</section>
+<section id="temperatured-sampling">
+<h3>Temperatured Sampling<a class="headerlink" href="#temperatured-sampling" title="Permalink to this heading">#</a></h3>
+<ul class="simple">
+<li><p>autoregressive 모델에서 확률적 기술로 사용된다.</p>
+<ul>
+<li><p>이 방법은 샘플링에서 softmax의 temperature를 수정해 예측 무작위성을 제어한다.</p></li>
+<li><ul>
+<li><p>Classifier Free Guidance 적용했다.</p></li>
+</ul>
+</li>
+</ul>
+</li>
+</ul>
+</section>
+<section id="topp-sampling">
+<h3>TopP Sampling<a class="headerlink" href="#topp-sampling" title="Permalink to this heading">#</a></h3>
+<ul class="simple">
+<li><p>nucleus 샘플링으로도 불리고, 미리 정의한 임계값을 초과하는 누적 확률을 가진 가장 작은 상위 토큰 세트에서 샘플링을 포함한다.</p>
+<ul>
+<li><ul>
+<li><p>Classifier Free Guidance 적용했다.</p></li>
+</ul>
+</li>
+</ul>
+</li>
+</ul>
+</section>
+<section id="classifier-free-guidance-cfg">
+<h3>Classifier Free Guidance (CFG)<a class="headerlink" href="#classifier-free-guidance-cfg" title="Permalink to this heading">#</a></h3>
+<div class="math notranslate nohighlight">
+\[\begin{split}
+\begin{aligned}
+&amp; \operatorname{logits}_{\text {cond }}=T\left(t_y \mid t_x\right), \text { logits }_{\text {uncond }}=T\left(t_y \mid&lt;\bf { mask }&gt;\right) \\
+&amp; \operatorname{logits}_{\mathrm{cf}}=\operatorname{logits}_{\text {uncond }}+\alpha_c \cdot\left(\text { logits }_{\text {cond }}-\text { logits }_{\text {uncond }}\right)
+\end{aligned}
+\end{split}\]</div>
+<ul class="simple">
+<li><p>CFG는 unconditional 샘플을 conditional 샘플에 맞도록 하는 것을 의미한다.</p></li>
+<li><p>unconditional 샘플을 text를 CM3 목표의 마스크 토큰으로 대체한다.</p></li>
+<li><p>이는 CM3 목표를 사용한 학습의 핵심 이점 중 하나이며, finetuning 없이, classifier 없는 guidance를 수행할 수 있다.</p></li>
+<li><p>추론에서는 두 개의 토큰 stream을 생성한다.</p>
+<ul>
+<li><p>입력 텍스트에 따라 달라지는 토큰 stream과</p></li>
+<li><p>mask 토큰에 따라 condition된 unconditional 토큰 stream</p></li>
+</ul>
+</li>
+</ul>
+</section>
+<section id="contrastive-decoding-topk-cd-k">
+<h3>Contrastive Decoding TopK (CD-K)<a class="headerlink" href="#contrastive-decoding-topk-cd-k" title="Permalink to this heading">#</a></h3>
+<ul class="simple">
+<li><p>CFG에서 logit의 뺄셈 연산이 텍스트에서 contrastive decoding 방법의 log probability를 뺄셈하는 연산과 비슷하다.</p></li>
+</ul>
+</section>
+</section>
+<section id="quantitative-evaluation">
+<h2>3.2 Quantitative Evaluation<a class="headerlink" href="#quantitative-evaluation" title="Permalink to this heading">#</a></h2>
+<figure class="align-default" id="evaluation">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/evalution.png"><img alt="evalution" class="bg-primary mb-1" src="../../_images/evalution.png" style="width: 600px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 308 </span><span class="caption-text">Evaluation</span><a class="headerlink" href="#evaluation" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p>MS-COCO (30K) zero shot 예측, FID 측정했다.</p>
+<ul>
+<li><p>CM3Leon-7B 모델이 FID 4.88 점으로 가장 좋다.</p></li>
+</ul>
+</li>
+<li><p>retrieval-augmented decoder-only 모델의 효율성이 좋다.</p>
+<ul>
+<li><p>CM3Leon-7B 모델이 추론에서 1개/2개로 검색된 예제로 동작할 때 우수한 FID 점수를 기록했다.</p>
+<ul>
+<li><p>이는 고품질 이미지를 생성하는 능력을 확장시키는 검색의 중요성을 보여준다.</p></li>
+</ul>
+</li>
+</ul>
+</li>
+</ul>
+</section>
+<section id="supervised-fine-tuning">
+<h2>4. Supervised Fine-Tuning<a class="headerlink" href="#supervised-fine-tuning" title="Permalink to this heading">#</a></h2>
+<figure class="align-default" id="figure5">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/figure_51.png"><img alt="figure_5" class="bg-primary mb-1" src="../../_images/figure_51.png" style="width: 600px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 309 </span><span class="caption-text">Figure5</span><a class="headerlink" href="#figure5" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p>Supervised fine-tuning (SFT)는 LLM에서 중요한 학습 단계이다.</p>
+<ul>
+<li><p>명령어 또는 프롬프트를 잘 이해하는 것을 도와주며, zero shot task에서도 향상되는 결과를 얻었다.</p></li>
+</ul>
+</li>
+<li><p>명령어 튜닝이 다양한 task에 멀티모달 모델 성능을 눈에 띄게 증폭시키는 것을 발견했다.</p></li>
+<li><p>CM3Leon을 이미지와 텍스트 task를 섞어 넓은 범위에서 fine tuning 했다.</p></li>
+<li><p>finetuning 과정은 pretraining 단계를 따르며, task instruction과 출력을 결합해 동일한 CM3 objective를 사용한다.</p></li>
+</ul>
+<section id="instructable-image-generation">
+<h3>4.1 Instructable Image Generation<a class="headerlink" href="#instructable-image-generation" title="Permalink to this heading">#</a></h3>
+<figure class="align-default" id="figure6">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/figure_61.png"><img alt="figure_6" class="bg-primary mb-1" src="../../_images/figure_61.png" style="width: 600px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 310 </span><span class="caption-text">Figure6</span><a class="headerlink" href="#figure6" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</section>
+<section id="text-guided-image-editing">
+<h3>Text-Guided Image Editing<a class="headerlink" href="#text-guided-image-editing" title="Permalink to this heading">#</a></h3>
+<ul class="simple">
+<li><p>text instruction 에 기반한 initial image를 수정하는 task이다.</p></li>
+<li><p>InstructPix2Pix 방법 사용했다.</p></li>
+<li><p>예시: “하늘의 색을 파란색으로 변경해줘”와 같은 프롬프트로 이미지 편집이 가능하다.</p>
+<ul>
+<li><p>이것은 CM3leon이 텍스트와 이미지를 동시에 이해하고 있어서 가능하다.</p></li>
+</ul>
+</li>
+</ul>
+</section>
+<section id="image-to-image-grounded-generation">
+<h3>Image-to-Image Grounded Generation<a class="headerlink" href="#image-to-image-grounded-generation" title="Permalink to this heading">#</a></h3>
+<ul class="simple">
+<li><p>다양한 feature과 텍스트 프롬프트로 grounding image를 생산하는 task이다.</p></li>
+<li><p>ControlNet 적용했다.</p></li>
+</ul>
+</section>
+<section id="spatially-grounded-image-generation">
+<h3>Spatially Grounded Image Generation<a class="headerlink" href="#spatially-grounded-image-generation" title="Permalink to this heading">#</a></h3>
+<figure class="align-default" id="figure6-1">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/figure_6_1.png"><img alt="figure_6_1" class="bg-primary mb-1" src="../../_images/figure_6_1.png" style="width: 600px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 311 </span><span class="caption-text">Figure6-1</span><a class="headerlink" href="#figure6-1" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p>이미지 생성에 있어서 공간적 정보(위치)를 텍스트 프롬프트에 통합시킬 수 있도록 하는 task이다.</p></li>
+</ul>
+</section>
+<section id="image-captioning-visual-question-answering-task">
+<h3>Image captioning &amp; visual question answering task<a class="headerlink" href="#image-captioning-visual-question-answering-task" title="Permalink to this heading">#</a></h3>
+<figure class="align-default" id="figure16">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/figure_16.png"><img alt="figure_16" class="bg-primary mb-1" src="../../_images/figure_16.png" style="width: 600px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 312 </span><span class="caption-text">Figure16</span><a class="headerlink" href="#figure16" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p>Flamingo(1000억 토큰), OpenFlamingo(400억 토큰)에 비해 CM3leon(30억 토큰)은 적은 토큰임에도 불구하고, 동등한 성능을 달성했다.</p></li>
+</ul>
+</section>
+</section>
+</section>
+
+    <script type="text/x-thebe-config">
+    {
+        requestKernel: true,
+        binderOptions: {
+            repo: "binder-examples/jupyter-stacks-datascience",
+            ref: "master",
+        },
+        codeMirrorConfig: {
+            theme: "abcdef",
+            mode: "python"
+        },
+        kernelOptions: {
+            name: "python3",
+            path: "./docs\review"
+        },
+        predefinedOutput: true
+    }
+    </script>
+    <script>kernelName = 'python3'</script>
+
+                </article>
+              
+
+              
+              
+              
+              
+                <footer class="prev-next-footer d-print-none">
+                  
+<div class="prev-next-area">
+    <a class="left-prev"
+       href="HyperDreamBooth.html"
+       title="previous page">
+      <i class="fa-solid fa-angle-left"></i>
+      <div class="prev-next-info">
+        <p class="prev-next-subtitle">previous</p>
+        <p class="prev-next-title">HyperDreamBooth</p>
+      </div>
+    </a>
+    <a class="right-next"
+       href="Synthetic_Data_from_Diffusion_Models_Improves_ImageNet_Classification.html"
+       title="next page">
+      <div class="prev-next-info">
+        <p class="prev-next-subtitle">next</p>
+        <p class="prev-next-title">Synthetic Data from Diffusion Models Improves ImageNet Classification</p>
+      </div>
+      <i class="fa-solid fa-angle-right"></i>
+    </a>
+</div>
+                </footer>
+              
+            </div>
+            
+            
+              
+                <div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
+
+
+  <div class="sidebar-secondary-item">
+  <div class="page-toc tocsection onthispage">
+    <i class="fa-solid fa-list"></i> Contents
+  </div>
+  <nav class="bd-toc-nav page-toc">
+    <ul class="visible nav section-nav flex-column">
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#">CM3leon</a><ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#abstract-1-introduction">Abstract &amp; 1. Introduction</a></li>
+</ul>
+</li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#pretraining">2. Pretraining</a><ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#data">2.1 Data</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#image-tokenization">Image Tokenization</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#retrieval-augmentation">Retrieval Augmentation</a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#objective-function">2.2 Objective Function</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#model">2.3 Model</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#training">2.4 Training</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#text-to-image-results">3. Text-To-Image Results</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#importance-of-decoding-strategies">3.1 Importance of Decoding Strategies</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#temperatured-sampling">Temperatured Sampling</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#topp-sampling">TopP Sampling</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#classifier-free-guidance-cfg">Classifier Free Guidance (CFG)</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#contrastive-decoding-topk-cd-k">Contrastive Decoding TopK (CD-K)</a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#quantitative-evaluation">3.2 Quantitative Evaluation</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#supervised-fine-tuning">4. Supervised Fine-Tuning</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#instructable-image-generation">4.1 Instructable Image Generation</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#text-guided-image-editing">Text-Guided Image Editing</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#image-to-image-grounded-generation">Image-to-Image Grounded Generation</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#spatially-grounded-image-generation">Spatially Grounded Image Generation</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#image-captioning-visual-question-answering-task">Image captioning &amp; visual question answering task</a></li>
+</ul>
+</li>
+</ul>
+</li>
+</ul>
+
+  </nav></div>
+
+</div></div>
+              
+            
+          </div>
+          <footer class="bd-footer-content">
+            
+<div class="bd-footer-content__inner container">
+  
+  <div class="footer-item">
+    
+<p class="component-author">
+By PseudoLab
+</p>
+
+  </div>
+  
+  <div class="footer-item">
+    
+
+  <p class="copyright">
+    
+      © Copyright 2022.
+      <br/>
+    
+  </p>
+
+  </div>
+  
+  <div class="footer-item">
+    
+  </div>
+  
+  <div class="footer-item">
+    
+  </div>
+  
+</div>
+          </footer>
+        
+
+      </main>
+    </div>
+  </div>
+  
+  <!-- Scripts loaded after <body> so the DOM is not blocked -->
+  <script src="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b"></script>
+<script src="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b"></script>
+
+  <footer class="bd-footer">
+  </footer>
+  </body>
 </html>
\ No newline at end of file
diff --git a/docs/review/Coin3D.html b/docs/review/Coin3D.html
new file mode 100755
index 00000000..82b60eb2
--- /dev/null
+++ b/docs/review/Coin3D.html
@@ -0,0 +1,1078 @@
+
+
+<!DOCTYPE html>
+
+
+<html lang="en" data-content_root="" >
+
+  <head>
+    <meta charset="utf-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.17.1: http://docutils.sourceforge.net/" />
+
+    <title>Coin3D &#8212; Text-to-Image Generation-feat-Diffusion</title>
+  
+  
+  
+  <script data-cfasync="false">
+    document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
+    document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
+  </script>
+  
+  <!-- Loaded before other Sphinx assets -->
+  <link href="../../_static/styles/theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+<link href="../../_static/styles/bootstrap.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+<link href="../../_static/styles/pydata-sphinx-theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+
+  
+  <link href="../../_static/vendor/fontawesome/6.5.2/css/all.min.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+  <link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.woff2" />
+<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.woff2" />
+<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.woff2" />
+
+    <link rel="stylesheet" type="text/css" href="../../_static/pygments.css" />
+    <link rel="stylesheet" href="../../_static/styles/sphinx-book-theme.css?digest=14f4ca6b54d191a8c7657f6c759bf11a5fb86285" type="text/css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/togglebutton.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/mystnb.4510f1fc1dee50b3e5859aac5469c37c29e427902b24a333a5f9fcb2f0b3ac41.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/sphinx-thebe.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/design-style.4045f2051d55cab465a707391d5b2007.min.css" />
+  
+  <!-- Pre-loaded scripts that we'll load fully later -->
+  <link rel="preload" as="script" href="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b" />
+<link rel="preload" as="script" href="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b" />
+  <script src="../../_static/vendor/fontawesome/6.5.2/js/all.min.js?digest=dfe6caa3a7d634c4db9b"></script>
+
+    <script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
+    <script src="../../_static/jquery.js"></script>
+    <script src="../../_static/underscore.js"></script>
+    <script src="../../_static/_sphinx_javascript_frameworks_compat.js"></script>
+    <script src="../../_static/doctools.js"></script>
+    <script src="../../_static/clipboard.min.js"></script>
+    <script src="../../_static/copybutton.js"></script>
+    <script src="../../_static/scripts/sphinx-book-theme.js?digest=5a5c038af52cf7bc1a1ec88eea08e6366ee68824"></script>
+    <script>let toggleHintShow = 'Click to show';</script>
+    <script>let toggleHintHide = 'Click to hide';</script>
+    <script>let toggleOpenOnPrint = 'true';</script>
+    <script src="../../_static/togglebutton.js"></script>
+    <script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
+    <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
+    <script src="../../_static/design-tabs.js"></script>
+    <script>const THEBE_JS_URL = "https://unpkg.com/thebe@0.8.2/lib/index.js"
+const thebe_selector = ".thebe,.cell"
+const thebe_selector_input = "pre"
+const thebe_selector_output = ".output, .cell_output"
+</script>
+    <script async="async" src="../../_static/sphinx-thebe.js"></script>
+    <script>window.MathJax = {"options": {"processHtmlClass": "tex2jax_process|mathjax_process|math|output_area"}}</script>
+    <script defer="defer" src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
+    <script>DOCUMENTATION_OPTIONS.pagename = 'docs/review/Coin3D';</script>
+    <link rel="shortcut icon" href="../../_static/PseudoLab_logo.png"/>
+    <link rel="index" title="Index" href="../../genindex.html" />
+    <link rel="search" title="Search" href="../../search.html" />
+    <link rel="next" title="Synthetic Data with Stable Diffusion for Foliar Disease Classification" href="../experiments/js_exp.html" />
+    <link rel="prev" title="DreamGaussian" href="DreamGaussian.html" />
+  <meta name="viewport" content="width=device-width, initial-scale=1"/>
+  <meta name="docsearch:language" content="en"/>
+  </head>
+  
+  
+  <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
+
+  
+  
+  <div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
+  
+  <div id="pst-scroll-pixel-helper"></div>
+  
+  <button type="button" class="btn rounded-pill" id="pst-back-to-top">
+    <i class="fa-solid fa-arrow-up"></i>Back to top</button>
+
+  
+  <input type="checkbox"
+          class="sidebar-toggle"
+          id="pst-primary-sidebar-checkbox"/>
+  <label class="overlay overlay-primary" for="pst-primary-sidebar-checkbox"></label>
+  
+  <input type="checkbox"
+          class="sidebar-toggle"
+          id="pst-secondary-sidebar-checkbox"/>
+  <label class="overlay overlay-secondary" for="pst-secondary-sidebar-checkbox"></label>
+  
+  <div class="search-button__wrapper">
+    <div class="search-button__overlay"></div>
+    <div class="search-button__search-container">
+<form class="bd-search d-flex align-items-center"
+      action="../../search.html"
+      method="get">
+  <i class="fa-solid fa-magnifying-glass"></i>
+  <input type="search"
+         class="form-control"
+         name="q"
+         id="search-input"
+         placeholder="Search this book..."
+         aria-label="Search this book..."
+         autocomplete="off"
+         autocorrect="off"
+         autocapitalize="off"
+         spellcheck="false"/>
+  <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
+</form></div>
+  </div>
+
+  <div class="pst-async-banner-revealer d-none">
+  <aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
+</div>
+
+  
+    <header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
+    </header>
+  
+
+  <div class="bd-container">
+    <div class="bd-container__inner bd-page-width">
+      
+      
+      
+      <div class="bd-sidebar-primary bd-sidebar">
+        
+
+  
+  <div class="sidebar-header-items sidebar-primary__section">
+    
+    
+    
+    
+  </div>
+  
+    <div class="sidebar-primary-items__start sidebar-primary__section">
+        <div class="sidebar-primary-item">
+
+  
+    
+  
+
+<a class="navbar-brand logo" href="../../intro.html">
+  
+  
+  
+  
+  
+    
+    
+      
+    
+    
+    <img src="../../_static/PseudoLab_logo.png" class="logo__image only-light" alt="Text-to-Image Generation-feat-Diffusion - Home"/>
+    <script>document.write(`<img src="../../_static/PseudoLab_logo.png" class="logo__image only-dark" alt="Text-to-Image Generation-feat-Diffusion - Home"/>`);</script>
+  
+  
+</a></div>
+        <div class="sidebar-primary-item"><nav class="bd-links" id="bd-docs-nav" aria-label="Main">
+    <div class="bd-toc-item navbar-nav active">
+        
+        <ul class="nav bd-sidenav bd-sidenav__home-link">
+            <li class="toctree-l1">
+                <a class="reference internal" href="../../intro.html">
+                    Welcome to PseudoDiffusers!!
+                </a>
+            </li>
+        </ul>
+        <p aria-level="2" class="caption" role="heading"><span class="caption-text">Preliminary Works</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="vae.html">VAE</a></li>
+<li class="toctree-l1"><a class="reference internal" href="gan.html">GAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DDPM.html">DDPM</a></li>
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DDIM.html">DDIM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="A_Study_on_the_Evaluation_of_Generative_Models.html">A Study on the Evaluation of Generative Models</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Image Generation</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="cycleGAN.html">CycleGAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="StyleGAN.html">StyleGAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="diffusion_beats_GANs.html">Diffusion Models Beat GANs on Image Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="dalle.html">DALL-E</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DALLE2.html">DALL-E 2</a></li>
+<li class="toctree-l1"><a class="reference internal" href="dreambooth.html">DreamBooth</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ControlNet.html">ControlNet</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Latent_Diffusion_Model.html">Introduction</a></li>
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Textual_Inversion.html">Textual Inversion</a></li>
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="CustomDiffusion.html">Custom Diffusion</a></li>
+<li class="toctree-l1"><a class="reference internal" href="LoRA.html">LoRA</a></li>
+
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="I-DDPM.html">I-DDPM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="StyO.html">StyO</a></li>
+<li class="toctree-l1"><a class="reference internal" href="imagen.html">Imagen</a></li>
+<li class="toctree-l1"><a class="reference internal" href="imagen_editor.html">Imagen Editor</a></li>
+<li class="toctree-l1"><a class="reference internal" href="SDEdit.html">SDEdit</a></li>
+<li class="toctree-l1"><a class="reference internal" href="SDXL.html">SDXL</a></li>
+<li class="toctree-l1"><a class="reference internal" href="t2i_adapter.html">T2I-Adapter</a></li>
+<li class="toctree-l1"><a class="reference internal" href="IP_Adapter.html">IP-Adapter</a></li>
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="HyperDreamBooth.html">HyperDreamBooth</a></li>
+<li class="toctree-l1"><a class="reference internal" href="CM3leon.html">CM3leon</a></li>
+
+<li class="toctree-l1"><a class="reference internal" href="Synthetic_Data_from_Diffusion_Models_Improves_ImageNet_Classification.html">Synthetic Data from Diffusion Models Improves ImageNet Classification</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="GLIDE.html">GLIDE</a></li>
+<li class="toctree-l1"><a class="reference internal" href="BBDM.html">BBDM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Your_Diffusion_Model_is_Secretly_a_Zero_Shot_Classifier.html">Your Diffusion Model is Secretly a Zero-Shot Classifier</a></li>
+<li class="toctree-l1"><a class="reference internal" href="progressive_distillation.html">Progressive Distillation for Fast Sampling of Diffusion Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ConceptLab.html">ConceptLab</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Diffusion_models_already_have_a_Semantic_Latent_Space.html">Diffusion Models already have a Semantic Latent Space</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Muse.html">Muse</a></li>
+
+
+<li class="toctree-l1"><a class="reference internal" href="GIGAGAN.html">Scaling up GANs for Text-to-Image Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="consistency_models.html">Consistency Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="latent_consistency_models.html">Latent Consistency Models</a></li>
+
+<li class="toctree-l1"><a class="reference internal" href="LLM_grounded_Diffusion.html">LLM Grounded Diffusion</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DiT.html">DiT</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="one-step-image-translation.html">One-Step Image Translation with Text-to-Image Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="LCM-LoRA.html">LCM-LoRA: A Universal Stable-Diffusion Acceleration Module</a></li>
+
+
+<li class="toctree-l1"><a class="reference internal" href="MimicBrush.html">MimicBrush: Zero-shot Image Editing with Reference Imitation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="one_step_diffusion_with_distribution_matching_distillation.html">One-step Diffusion with Distribution Matching Distillation</a></li>
+
+
+
+
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Video Generation</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="Make_A_Video.html">Make A Video</a></li>
+<li class="toctree-l1"><a class="reference internal" href="VideoLDM.html">VideoLDM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="AnimateDiff.html">AnimateDiff</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Animate_Anyone.html">Animate Anyone</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreaMoving.html">DreaMoving</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreamPose.html">DreamPose: Fashion Image-to-Video Synthesis via Stable Diffusion</a></li>
+
+
+
+
+
+
+
+
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">3D Generation</span></p>
+<ul class="current nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="NeRF.html">NeRF : Representing Scenes as Neural Radiance Fields for View Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="3DGS.html">3D Gaussian Splatting for Real-Time Radiance Field Rendering</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Point_E.html">Point-E: A System for Generating 3D Point Clouds from Complex Prompts (Arxiv 2022)</a></li>
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Shap-E.html">Shap-E</a></li>
+
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DreamFusion.html"><strong>DreamFusion</strong></a></li>
+<li class="toctree-l1"><a class="reference internal" href="magic-3d.html">Magic3D</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreamBooth3D.html">Dream Booth 3D</a></li>
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="zero123plus.html">Zero123++</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ProlificDreamer.html">ProlificDreamer: High-Fidelity and Diverse Text-to-3D Generation with Variational Score Distillation</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DreamGaussian.html">DreamGaussian</a></li>
+
+
+
+
+
+<li class="toctree-l1 current active"><a class="current reference internal" href="#">Coin3D</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Experiments</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="../experiments/js_exp.html">Synthetic Data with Stable Diffusion for Foliar Disease Classification</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../experiments/swjo_exp.html">Training DreamBooth on Naver Webtoon Face Dataset</a></li>
+</ul>
+
+    </div>
+</nav></div>
+    </div>
+  
+  
+  <div class="sidebar-primary-items__end sidebar-primary__section">
+  </div>
+  
+  <div id="rtd-footer-container"></div>
+
+
+      </div>
+      
+      <main id="main-content" class="bd-main" role="main">
+        
+        
+
+<div class="sbt-scroll-pixel-helper"></div>
+
+          <div class="bd-content">
+            <div class="bd-article-container">
+              
+              <div class="bd-header-article d-print-none">
+<div class="header-article-items header-article__inner">
+  
+    <div class="header-article-items__start">
+      
+        <div class="header-article-item"><label class="sidebar-toggle primary-toggle btn btn-sm" for="__primary" title="Toggle primary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
+  <span class="fa-solid fa-bars"></span>
+</label></div>
+      
+    </div>
+  
+  
+    <div class="header-article-items__end">
+      
+        <div class="header-article-item">
+
+<div class="article-header-buttons">
+
+
+
+
+
+<div class="dropdown dropdown-source-buttons">
+  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Source repositories">
+    <i class="fab fa-github"></i>
+  </button>
+  <ul class="dropdown-menu">
+      
+      
+      
+      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion" target="_blank"
+   class="btn btn-sm btn-source-repository-button dropdown-item"
+   title="Source repository"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fab fa-github"></i>
+  </span>
+<span class="btn__text-container">Repository</span>
+</a>
+</li>
+      
+      
+      
+      
+      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion/issues/new?title=Issue%20on%20page%20%2Fdocs/review/Coin3D.html&body=Your%20issue%20content%20here." target="_blank"
+   class="btn btn-sm btn-source-issues-button dropdown-item"
+   title="Open an issue"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-lightbulb"></i>
+  </span>
+<span class="btn__text-container">Open issue</span>
+</a>
+</li>
+      
+  </ul>
+</div>
+
+
+
+
+
+
+<div class="dropdown dropdown-download-buttons">
+  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Download this page">
+    <i class="fas fa-download"></i>
+  </button>
+  <ul class="dropdown-menu">
+      
+      
+      
+      <li><a href="../../_sources/docs/review/Coin3D.md" target="_blank"
+   class="btn btn-sm btn-download-source-button dropdown-item"
+   title="Download source file"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-file"></i>
+  </span>
+<span class="btn__text-container">.md</span>
+</a>
+</li>
+      
+      
+      
+      
+      <li>
+<button onclick="window.print()"
+  class="btn btn-sm btn-download-pdf-button dropdown-item"
+  title="Print to PDF"
+  data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-file-pdf"></i>
+  </span>
+<span class="btn__text-container">.pdf</span>
+</button>
+</li>
+      
+  </ul>
+</div>
+
+
+
+
+<button onclick="toggleFullScreen()"
+  class="btn btn-sm btn-fullscreen-button"
+  title="Fullscreen mode"
+  data-bs-placement="bottom" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-expand"></i>
+  </span>
+
+</button>
+
+
+
+<script>
+document.write(`
+  <button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light"></i>
+    <i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark"></i>
+    <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"></i>
+  </button>
+`);
+</script>
+
+
+<script>
+document.write(`
+  <button class="btn btn-sm pst-navbar-icon search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <i class="fa-solid fa-magnifying-glass fa-lg"></i>
+  </button>
+`);
+</script>
+<label class="sidebar-toggle secondary-toggle btn btn-sm" for="__secondary"title="Toggle secondary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <span class="fa-solid fa-list"></span>
+</label>
+</div></div>
+      
+    </div>
+  
+</div>
+</div>
+              
+              
+
+<div id="jb-print-docs-body" class="onlyprint">
+    <h1>Coin3D</h1>
+    <!-- Table of contents -->
+    <div id="print-main-content">
+        <div id="jb-print-toc">
+            
+            <div>
+                <h2> Contents </h2>
+            </div>
+            <nav aria-label="Page">
+                <ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">1. Introduction</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#related-works">2. Related Works</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#overall">Overall</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#method">3. Method</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#proxy-guided-3d-conditiong-for-diffusion">3.1: Proxy-Guided 3D Conditiong for Diffusion</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#d-aware-conditioned-generation">3.2 3D Aware Conditioned Generation</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#training-pipeline-of-3d-aware-conditioned-generation">3.2.1 Training Pipeline of 3D Aware Conditioned Generation</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#loss-equation">Loss Equation</a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#interactive-generation-workflow">3.3. Interactive Generation Workflow</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#proxy-bounded-part-editing">3.3.1. Proxy-bounded part editing</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#interactive-preview-with-progressive-volume-caching">3.3.2. Interactive Preview with Progressive Volume Caching</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#volume-conditioned-reconstruction">3.4 Volume-Conditioned Reconstruction</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#preview-reconstruction">3.4.1. Preview &amp; Reconstruction</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#results">4. Results</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#id1">프록시 기반 생성 방법 비교</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#id2">주요 관찰점</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#id3">요약:</a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#quantitative-results">4.2 Quantitative Results</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#ablation-study">5. Ablation Study</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#volume-sds">Volume SDS</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#proxy-condition-3d-mask-dilation">Proxy Condition &amp; 3D Mask Dilation</a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#conclusion">6. Conclusion</a></li>
+</ul>
+            </nav>
+        </div>
+    </div>
+</div>
+
+              
+                
+<div id="searchbox"></div>
+                <article class="bd-article">
+                  
+  <div class="admonition-information admonition">
+<p class="admonition-title">Information</p>
+<ul class="simple">
+<li><p><strong>Title:</strong> Controllable and Interactive 3D Assets Generation with Proxy-Guided Conditioning (SIGGRAPH 2024)</p></li>
+<li><p><strong>Reference</strong></p>
+<ul>
+<li><p>Paper: <a class="reference external" href="https://arxiv.org/pdf/2405.08054">https://arxiv.org/pdf/2405.08054</a></p></li>
+<li><p>Code: <a class="github reference external" href="https://github.com/zju3dv/Coin3D">zju3dv/Coin3D</a></p></li>
+<li><p>Project Page : <a class="reference external" href="https://zju3dv.github.io/coin3d/">https://zju3dv.github.io/coin3d/</a></p></li>
+</ul>
+</li>
+<li><p><strong>Author:</strong> Donggeun Sean Ko</p></li>
+<li><p><strong>Last updated on January. 07, 2025</strong></p></li>
+</ul>
+</div>
+<section class="tex2jax_ignore mathjax_ignore" id="coin3d">
+<h1>Coin3D<a class="headerlink" href="#coin3d" title="Permalink to this heading">#</a></h1>
+<section id="introduction">
+<h2>1. Introduction<a class="headerlink" href="#introduction" title="Permalink to this heading">#</a></h2>
+<figure class="align-default" id="id4">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/coin3d_01.png"><img alt="main_fig" class="bg-primary mb-1" src="../../_images/coin3d_01.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 809 </span><span class="caption-text">Overview of Coin3D</span><a class="headerlink" href="#id4" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>“사용자 친화적인” &amp; “제어 가능”한 3D assets 생성 프레임워크는 3가지 특성을 가져야 한다고 주장함</p>
+<ul class="simple">
+<li><p><strong>3D Controllable</strong>: Basic shape를 이용해서 간단하고 쉽게 원하는 형태를 만들 수 있어야 됨</p></li>
+<li><p><strong>Flexible</strong>: Interactive하게 (UI 등)을 활용하여 다양한 결과물을 만들 수 있어야 됨 (간단)</p></li>
+<li><p><strong>Responsive</strong>: 중간 결과물 및 빠른 결과물을 만들 수 있게 해야 됨 (fast inference time)</p></li>
+</ul>
+</section>
+<section id="related-works">
+<h2>2. Related Works<a class="headerlink" href="#related-works" title="Permalink to this heading">#</a></h2>
+<p><strong>3D Object Generation</strong></p>
+<ul class="simple">
+<li><p>Polygon-mesh based</p></li>
+<li><p>Point Cloud</p></li>
+<li><p>Voxels, Implicit Fields</p></li>
+<li><p>CAD-based</p></li>
+<li><p>Multiview Image generation (zero123++,Wonder3D,etc)</p></li>
+</ul>
+<p><strong>Controllable and Interactive Generation</strong></p>
+<ul class="simple">
+<li><p>Latent-NeRF</p></li>
+<li><p>Fantasia3D</p></li>
+</ul>
+<p><strong>이전 방법들의 문제점</strong></p>
+<ul class="simple">
+<li><p>“다중 얼굴 야누스 문제” (하나의 객체가 다양한 각도에서 일관성 있는 모습을 유지하지 못하는 문제)</p></li>
+<li><p>텍스트 프롬프트(텍스트-3D)나 이미지(이미지-3D)에만 집중함</p></li>
+<li><p>3D 형태를 정확하게 제어할 수 없음</p></li>
+</ul>
+</section>
+<section id="overall">
+<h2>Overall<a class="headerlink" href="#overall" title="Permalink to this heading">#</a></h2>
+<ul class="simple">
+<li><p>Coin3D Input Condition (전처리)</p></li>
+<li><p>3D-Aware Conditioned Generation</p></li>
+<li><p>Preview and Reconstruction</p></li>
+</ul>
+<figure class="align-default" id="id5">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/coin3d_02.png"><img alt="overall_coin3d_arch" class="bg-primary mb-1" src="../../_images/coin3d_02.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 810 </span><span class="caption-text">Overview of Coin3D Main Architecture</span><a class="headerlink" href="#id5" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</section>
+<section id="method">
+<h2>3. Method<a class="headerlink" href="#method" title="Permalink to this heading">#</a></h2>
+</section>
+<section id="proxy-guided-3d-conditiong-for-diffusion">
+<h2>3.1: Proxy-Guided 3D Conditiong for Diffusion<a class="headerlink" href="#proxy-guided-3d-conditiong-for-diffusion" title="Permalink to this heading">#</a></h2>
+<figure class="align-default" id="id6">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/coin3d_03.png"><img alt="3d_proxy" class="bg-primary mb-1" src="../../_images/coin3d_03.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 811 </span><span class="caption-text">Proxy-based Initial Condition Generation</span><a class="headerlink" href="#id6" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p><strong>3D Proxy as an initial condition (Preprocessing)</strong></p>
+<ul class="simple">
+<li><p>Coarse shape <span class="math notranslate nohighlight">\(P\)</span>와 prompt <span class="math notranslate nohighlight">\(y\)</span>로 <span class="math notranslate nohighlight">\(N_v\)</span>를 다양한 camera pose에 대하여 예측</p></li>
+</ul>
+<div class="math notranslate nohighlight">
+\[
+\mathbf{X}_{(i:N_v)} = f(P, y, \mathbf{c}_{(i:N_v)})
+\]</div>
+<p>Where:</p>
+<ul class="simple">
+<li><p><span class="math notranslate nohighlight">\(N_v\)</span>: consistent image</p></li>
+<li><p><span class="math notranslate nohighlight">\(P\)</span>: coarse shape</p></li>
+<li><p><span class="math notranslate nohighlight">\(f\)</span>: Multiview diffusion-based generator</p></li>
+<li><p><span class="math notranslate nohighlight">\(y\)</span>: prompt</p></li>
+<li><p><span class="math notranslate nohighlight">\(\mathbf{c}\)</span>: camera poses</p></li>
+</ul>
+</section>
+<section id="d-aware-conditioned-generation">
+<h2>3.2 3D Aware Conditioned Generation<a class="headerlink" href="#d-aware-conditioned-generation" title="Permalink to this heading">#</a></h2>
+<figure class="align-default" id="id7">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/coin3d_04.png"><img alt="3d_aware" class="bg-primary mb-1" src="../../_images/coin3d_04.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 812 </span><span class="caption-text">3D-Aware Conditioned Generation</span><a class="headerlink" href="#id7" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ol class="arabic simple">
+<li><p>3D Proxy Sample을 Voxelize를 통해 Voxel Grid <span class="math notranslate nohighlight">\(F_v\)</span> 생성</p></li>
+<li><p><strong>Multiview Image 생성</strong></p>
+<ul class="simple">
+<li><p>2-1. Image Candidates를 <strong>Clip</strong> + (Rotation, Translation)로 <strong>Denoising U-Net</strong> condition input으로 입력</p></li>
+<li><p>2-2. Volume Projected Condition도 입력 (학습)</p></li>
+</ul>
+</li>
+<li><p>MV Images → Project Fusion을 통해<br />
+multiview feature volume, <span class="math notranslate nohighlight">\(F_l^t\)</span> 생성</p></li>
+<li><p>3D Convolution (<span class="math notranslate nohighlight">\(f_{VP}\)</span>)을 통해 intermediate feature를 MVConv<br />
+(3DConv intermediate layer)에 계층적으로 추가</p></li>
+<li><p><span class="math notranslate nohighlight">\(F_c^t = f_{VM(1 \dots N)}(F_l^t) + f_{VM(1 \dots N)}(f_{VP}(F_v)_{1 \dots N})\)</span></p></li>
+<li><p>3D control volume 완성!</p></li>
+<li><p>3D Control Volume을 다시 <span class="math notranslate nohighlight">\(f_u\)</span>에 넣어 MV image 생성</p></li>
+</ol>
+</section>
+<section id="training-pipeline-of-3d-aware-conditioned-generation">
+<h2>3.2.1 Training Pipeline of 3D Aware Conditioned Generation<a class="headerlink" href="#training-pipeline-of-3d-aware-conditioned-generation" title="Permalink to this heading">#</a></h2>
+<ol class="arabic simple">
+<li><p><strong>(Preprocess)</strong> 각 학습 데이터를 MV image와 균일하게 샘플링된 coarse proxies로 변환</p></li>
+<li><p><strong>(Training)</strong> <span class="math notranslate nohighlight">\(B\)</span>개의 condition 및 target image를 무작위로 sampling하고, 대응하는 coarse proxy points를 샘플링함</p></li>
+<li><p><strong>(Training)</strong> <span class="math notranslate nohighlight">\(B\)</span> timestamp with Gaussian Noise도 샘플링<br />
+<span class="math notranslate nohighlight">\(\epsilon_{(1:B)} \sim \mathcal{N}(0, 1)\)</span></p></li>
+<li><p>아래의 loss를 이용하여 추가된 noise를 network <span class="math notranslate nohighlight">\(\epsilon_\theta\)</span>를 통해 예측</p></li>
+</ol>
+<div class="math notranslate nohighlight">
+\[
+\epsilon_\theta : \text{model’s predicted noise}  
+c(I, F_c^t, c_i) : \text{conditioned embedding}  
+c_i : \text{camera view}  
+F_c^t : \text{3D Control volume}  
+I : \text{Candidate Image}
+\]</div>
+<section id="loss-equation">
+<h3>Loss Equation<a class="headerlink" href="#loss-equation" title="Permalink to this heading">#</a></h3>
+<div class="math notranslate nohighlight">
+\[
+\min_{\theta} \mathbb{E}_{t, \mathbf{x}_{(1:N_v)}, \epsilon_{(1:N_v)}} 
+\| \epsilon_i - \epsilon_\theta (\mathbf{x}_i^t, t, c(I, F_c^t, \mathbf{c}_i)) \|, \tag{2}
+\]</div>
+</section>
+</section>
+<section id="interactive-generation-workflow">
+<h2>3.3. Interactive Generation Workflow<a class="headerlink" href="#interactive-generation-workflow" title="Permalink to this heading">#</a></h2>
+</section>
+<section id="proxy-bounded-part-editing">
+<h2>3.3.1. Proxy-bounded part editing<a class="headerlink" href="#proxy-bounded-part-editing" title="Permalink to this heading">#</a></h2>
+<figure class="align-default" id="id8">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/coin3d_05.png"><img alt="proxy-bounded-editing" class="bg-primary mb-1" src="../../_images/coin3d_05.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 813 </span><span class="caption-text">Proxy-bounded Part Editing</span><a class="headerlink" href="#id8" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p>MV diffusion은 3D volume &amp; 2D image에 conditionin이 되어 있기 때문에,<br />
+이런 condition을 고려해서 편집을 해야하는 게 간단하지 않음.</p></li>
+<li><p>따라서, two-pathway condition editing scheme을 구성함:</p>
+<ul>
+<li><p>Projected 2D Mask → 2D Latent Diffusion Model</p></li>
+<li><p>3D Volume Mask → Partial Update Volume</p></li>
+<li><p>2D Image condition + 3D masked volume condition으로 “3D image editing”을 진행</p></li>
+</ul>
+</li>
+</ul>
+<div class="math notranslate nohighlight">
+\[
+\hat{F}_C^t = (1 - M) F_C^t + M \tilde{F}_C^t, \tag{3}
+\]</div>
+<p>Where:</p>
+<ul class="simple">
+<li><p><span class="math notranslate nohighlight">\(\hat{F}_C^t\)</span>: updated volume</p></li>
+<li><p><span class="math notranslate nohighlight">\(\tilde{F}_C^t\)</span>: predicted volume at <span class="math notranslate nohighlight">\(t\)</span></p></li>
+<li><p><span class="math notranslate nohighlight">\(F_C^t\)</span>: cached original volume</p></li>
+</ul>
+</section>
+<section id="interactive-preview-with-progressive-volume-caching">
+<h2>3.3.2. Interactive Preview with Progressive Volume Caching<a class="headerlink" href="#interactive-preview-with-progressive-volume-caching" title="Permalink to this heading">#</a></h2>
+<figure class="align-default" id="id9">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/coin3d_06.png"><img alt="progressive_volume" class="bg-primary mb-1" src="../../_images/coin3d_06.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 814 </span><span class="caption-text">Interactive Preview with Progressive Volume Caching</span><a class="headerlink" href="#id9" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p>목표: Interactive preview를 통해 수정된 결과를 몇 초 내에 확인하고<br />
+임의의 시점에서 효과를 검사 및 수정이 가능</p></li>
+<li><p>Progressive Volume Caching</p>
+<ul>
+<li><p>각 timestamp <span class="math notranslate nohighlight">\(t\)</span>에서 최신 3D Control Volume, <span class="math notranslate nohighlight">\(F_C^t\)</span>를 캐싱함</p></li>
+<li><p>이를 <span class="math notranslate nohighlight">\(F_C^t\)</span>를 반복적으로 계산할 필요가 없음</p></li>
+</ul>
+</li>
+<li><p>Viewpoint Panning</p>
+<ul>
+<li><p>Preview 단계에서 user’s viewpoint poses <span class="math notranslate nohighlight">\(c'\)</span>를 MV diffusion viewpoint condition에 전달</p></li>
+<li><p>이를 통해 원하는 시점 (arbitrary viewpoints)에서 프리뷰 이미지를 렌더링할 수 있음</p></li>
+</ul>
+</li>
+<li><p>핵심</p>
+<ul>
+<li><p>Cache를 이용하여 불필요한 연산을 제거하고 Cache에서 저장된 3D adapter rendering output을 이용하여 여러 preview를 생성</p></li>
+</ul>
+</li>
+</ul>
+</section>
+<section id="volume-conditioned-reconstruction">
+<h2>3.4 Volume-Conditioned Reconstruction<a class="headerlink" href="#volume-conditioned-reconstruction" title="Permalink to this heading">#</a></h2>
+</section>
+<section id="preview-reconstruction">
+<h2>3.4.1. Preview &amp; Reconstruction<a class="headerlink" href="#preview-reconstruction" title="Permalink to this heading">#</a></h2>
+<figure class="align-default" id="id10">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/coin3d_07.png"><img alt="preview_recon" class="bg-primary mb-1" src="../../_images/coin3d_07.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 815 </span><span class="caption-text">Preview and Reconstruction</span><a class="headerlink" href="#id10" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p>기존 Multiview images를 활용한 3D Reconstruction은<br />
+viewpoint가 적어 unexpected geometry가 만들어져 결과물이 뭉개지거나 한계점이 보임</p></li>
+<li><p>3D-aware context from 3D control volume을 활용해 3D Reconstruction quality를 올림</p>
+<ul>
+<li><p>개인적인 의견: 더 정교한 3D 물체 + Multiview가 있으니 더 정교한 결과물이 만들어진다? 라고 보여짐…</p></li>
+</ul>
+</li>
+<li><p><strong>Propose Volume SDS</strong></p>
+<ul>
+<li><p>integrating 3D control prior from voxelized feature <span class="math notranslate nohighlight">\(F_C^t\)</span> to the field’s backpropagation</p></li>
+</ul>
+</li>
+</ul>
+<div class="math notranslate nohighlight">
+\[
+\Delta_x L_{V-SDS} = w(t) \left( \epsilon_\theta \left( \mathbf{x}_t, t, c(I, F_C^t, \mathbf{c}) \right) - \epsilon \right),
+\]</div>
+<p>where <span class="math notranslate nohighlight">\(w(t)\)</span> is the weighting function from <strong>DreamFusion</strong>.</p>
+</section>
+<section id="results">
+<h2>4. Results<a class="headerlink" href="#results" title="Permalink to this heading">#</a></h2>
+<figure class="align-default" id="id11">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/coin3d_08.png"><img alt="results" class="bg-primary mb-1" src="../../_images/coin3d_08.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 816 </span><span class="caption-text">Qualitative Results</span><a class="headerlink" href="#id11" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<section id="id1">
+<h3>프록시 기반 생성 방법 비교<a class="headerlink" href="#id1" title="Permalink to this heading">#</a></h3>
+<ul class="simple">
+<li><p><strong>Wonder3D와 SyncDreamer 디테일 비교</strong>:</p>
+<ol class="arabic simple">
+<li><p><strong>더 높은 품질의 멀티뷰 이미지</strong>:</p>
+<ul>
+<li><p>Coin3D 결과물은 <strong>다양한 시점에서 일관성 있는 이미지</strong>를 생성하며, 왜곡이나 아티팩트가 최소화됨.</p></li>
+<li><p>반면, Wonder3D와 SyncDreamer는 복잡한 객체(예: 거북이와 오리)에서 <strong>기하학적 불일치</strong> 또는 텍스처 불일치를 보임.</p></li>
+</ul>
+</li>
+<li><p><strong>더 나은 텍스처 메쉬</strong>:</p>
+<ul>
+<li><p>Ours는 <strong>더욱 현실적이고 세밀한 텍스처</strong>를 재구성하며, 부드러운 전환과 정밀한 정렬을 유지.</p></li>
+<li><p>Wonder3D는 텍스처 불일치가 나타나고, SyncDreamer는 단순화된 텍스처를 생성하는 경향이 있음.</p></li>
+</ul>
+</li>
+<li><p><strong>객체 형태의 보존</strong>:</p>
+<ul>
+<li><p>Ours는 입력된 <strong>coarse shape</strong>를 정확히 보존하면서 세부 정보를 강화함.</p></li>
+<li><p>다른 방법론은 재구성 중 형태 왜곡(예: 의자가 휘거나 일그러짐)이 나타남.</p></li>
+</ul>
+</li>
+<li><p><strong>더 자연스러운 출력</strong>:</p>
+<ul>
+<li><p>Ours의 출력은 <strong>미적 품질이 높고 자연스러운 결과물</strong>을 제공하며, 복잡한 텍스처(예: 도넛)에서도 특히 돋보임.</p></li>
+<li><p>Wonder3D와 SyncDreamer는 인공적이거나 세부 사항이 부족한 경우가 많음.</p></li>
+</ul>
+</li>
+</ol>
+</li>
+</ul>
+</section>
+<section id="id2">
+<h3>주요 관찰점<a class="headerlink" href="#id2" title="Permalink to this heading">#</a></h3>
+<ul class="simple">
+<li><p><strong>Coarse Shapes</strong>: 모든 방법이 비슷한 코스 쉐이프에서 시작하지만, Ours는 이를 가장 잘 개선함.</p></li>
+<li><p><strong>멀티뷰 이미지</strong>: Ours는 명확하고 일관된 멀티뷰 이미지를 생성하여 정확한 3D 재구성을 가능하게 함.</p></li>
+<li><p><strong>텍스처 메쉬</strong>: Ours는 현실적인 텍스처를 생성하며, 경쟁 방법론보다 높은 수준의 사실성을 보여줌.</p></li>
+</ul>
+</section>
+<section id="id3">
+<h3>요약:<a class="headerlink" href="#id3" title="Permalink to this heading">#</a></h3>
+<ul class="simple">
+<li><p><strong>(a) Ours</strong>: 멀티뷰 이미지의 일관성과 텍스처 메쉬 품질에서 우수함.</p></li>
+<li><p><strong>(b) Wonder3D</strong>: 기하학적 일관성과 텍스처 세부 정보에서 어려움.</p></li>
+<li><p><strong>(c) SyncDreamer</strong>: 텍스처가 단순화되고 형태가 불일치함.</p></li>
+</ul>
+</section>
+</section>
+<section id="quantitative-results">
+<h2>4.2 Quantitative Results<a class="headerlink" href="#quantitative-results" title="Permalink to this heading">#</a></h2>
+<figure class="align-default" id="id12">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/coin3d_09.png"><img alt="quant_results" class="bg-primary mb-1" src="../../_images/coin3d_09.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 817 </span><span class="caption-text">Quantitative Results</span><a class="headerlink" href="#id12" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p><strong>TEXTure</strong> (Richardson et al., 2023) user study guideline 채택</p></li>
+<li><p>30명 사용자에게 35개의 test case를 무작위 순서로 제시한 후<br />
+perceptual quality &amp; content matching degree (w.r.t the given image or text prompts)를 기준으로 정렬 및 점수 배정</p></li>
+<li><p>3점 Best 기준으로 함.</p></li>
+</ul>
+</section>
+<section id="ablation-study">
+<h2>5. Ablation Study<a class="headerlink" href="#ablation-study" title="Permalink to this heading">#</a></h2>
+<section id="volume-sds">
+<h3>Volume SDS<a class="headerlink" href="#volume-sds" title="Permalink to this heading">#</a></h3>
+<figure class="align-default" id="id13">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/coin3d_10.png"><img alt="vol_sds" class="bg-primary mb-1" src="../../_images/coin3d_10.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 818 </span><span class="caption-text">Ablation Study on Volume SDS</span><a class="headerlink" href="#id13" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p>Volume SDS Loss를 추가시 렌더링에 artifacts가 없으며 더 스무스하고 자연스러운 텍스쳐를 바탕으로 생성함</p></li>
+</ul>
+</section>
+<section id="proxy-condition-3d-mask-dilation">
+<h3>Proxy Condition &amp; 3D Mask Dilation<a class="headerlink" href="#proxy-condition-3d-mask-dilation" title="Permalink to this heading">#</a></h3>
+<figure class="align-default" id="id14">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/coin3d_11.png"><img alt="proxy_cond" class="bg-primary mb-1" src="../../_images/coin3d_11.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 819 </span><span class="caption-text">Ablation Study on Proxy Condition and 3D Mask Dilation</span><a class="headerlink" href="#id14" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p>Proxy 하고 Dilation이 없을 시 rendering이 고르게 안되는 현상이 생김.</p></li>
+<li><p>Full method는 proxy와 dilation을 둘다 사용했으며</p></li>
+</ul>
+</section>
+</section>
+<section id="conclusion">
+<h2>6. Conclusion<a class="headerlink" href="#conclusion" title="Permalink to this heading">#</a></h2>
+<ul class="simple">
+<li><p>Basic block만 있으면 원하는 3D 생성을 할 수 있음</p></li>
+<li><p>Flexible 하고 UI-friendly 함 (ComfyUI 등)</p></li>
+<li><p>타 모델들에 비해 3D 결과물이 더 좋음</p></li>
+</ul>
+</section>
+</section>
+
+    <script type="text/x-thebe-config">
+    {
+        requestKernel: true,
+        binderOptions: {
+            repo: "binder-examples/jupyter-stacks-datascience",
+            ref: "master",
+        },
+        codeMirrorConfig: {
+            theme: "abcdef",
+            mode: "python"
+        },
+        kernelOptions: {
+            name: "python3",
+            path: "./docs\review"
+        },
+        predefinedOutput: true
+    }
+    </script>
+    <script>kernelName = 'python3'</script>
+
+                </article>
+              
+
+              
+              
+              
+              
+                <footer class="prev-next-footer d-print-none">
+                  
+<div class="prev-next-area">
+    <a class="left-prev"
+       href="DreamGaussian.html"
+       title="previous page">
+      <i class="fa-solid fa-angle-left"></i>
+      <div class="prev-next-info">
+        <p class="prev-next-subtitle">previous</p>
+        <p class="prev-next-title">DreamGaussian</p>
+      </div>
+    </a>
+    <a class="right-next"
+       href="../experiments/js_exp.html"
+       title="next page">
+      <div class="prev-next-info">
+        <p class="prev-next-subtitle">next</p>
+        <p class="prev-next-title">Synthetic Data with Stable Diffusion for Foliar Disease Classification</p>
+      </div>
+      <i class="fa-solid fa-angle-right"></i>
+    </a>
+</div>
+                </footer>
+              
+            </div>
+            
+            
+              
+                <div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
+
+
+  <div class="sidebar-secondary-item">
+  <div class="page-toc tocsection onthispage">
+    <i class="fa-solid fa-list"></i> Contents
+  </div>
+  <nav class="bd-toc-nav page-toc">
+    <ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">1. Introduction</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#related-works">2. Related Works</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#overall">Overall</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#method">3. Method</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#proxy-guided-3d-conditiong-for-diffusion">3.1: Proxy-Guided 3D Conditiong for Diffusion</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#d-aware-conditioned-generation">3.2 3D Aware Conditioned Generation</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#training-pipeline-of-3d-aware-conditioned-generation">3.2.1 Training Pipeline of 3D Aware Conditioned Generation</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#loss-equation">Loss Equation</a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#interactive-generation-workflow">3.3. Interactive Generation Workflow</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#proxy-bounded-part-editing">3.3.1. Proxy-bounded part editing</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#interactive-preview-with-progressive-volume-caching">3.3.2. Interactive Preview with Progressive Volume Caching</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#volume-conditioned-reconstruction">3.4 Volume-Conditioned Reconstruction</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#preview-reconstruction">3.4.1. Preview &amp; Reconstruction</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#results">4. Results</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#id1">프록시 기반 생성 방법 비교</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#id2">주요 관찰점</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#id3">요약:</a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#quantitative-results">4.2 Quantitative Results</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#ablation-study">5. Ablation Study</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#volume-sds">Volume SDS</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#proxy-condition-3d-mask-dilation">Proxy Condition &amp; 3D Mask Dilation</a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#conclusion">6. Conclusion</a></li>
+</ul>
+  </nav></div>
+
+</div></div>
+              
+            
+          </div>
+          <footer class="bd-footer-content">
+            
+<div class="bd-footer-content__inner container">
+  
+  <div class="footer-item">
+    
+<p class="component-author">
+By PseudoLab
+</p>
+
+  </div>
+  
+  <div class="footer-item">
+    
+
+  <p class="copyright">
+    
+      © Copyright 2022.
+      <br/>
+    
+  </p>
+
+  </div>
+  
+  <div class="footer-item">
+    
+  </div>
+  
+  <div class="footer-item">
+    
+  </div>
+  
+</div>
+          </footer>
+        
+
+      </main>
+    </div>
+  </div>
+  
+  <!-- Scripts loaded after <body> so the DOM is not blocked -->
+  <script src="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b"></script>
+<script src="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b"></script>
+
+  <footer class="bd-footer">
+  </footer>
+  </body>
+</html>
\ No newline at end of file
diff --git a/docs/review/ConceptLab.html b/docs/review/ConceptLab.html
old mode 100644
new mode 100755
index 1e741800..c5c705ee
--- a/docs/review/ConceptLab.html
+++ b/docs/review/ConceptLab.html
@@ -1,905 +1,925 @@
-
-
-<!DOCTYPE html>
-
-
-<html lang="en" data-content_root="" >
-
-  <head>
-    <meta charset="utf-8" />
-    <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.18.1: http://docutils.sourceforge.net/" />
-
-    <title>ConceptLab &#8212; Text-to-Image Generation-feat-Diffusion</title>
-  
-  
-  
-  <script data-cfasync="false">
-    document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
-    document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
-  </script>
-  
-  <!-- Loaded before other Sphinx assets -->
-  <link href="../../_static/styles/theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-<link href="../../_static/styles/bootstrap.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-<link href="../../_static/styles/pydata-sphinx-theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-
-  
-  <link href="../../_static/vendor/fontawesome/6.5.2/css/all.min.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-  <link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.woff2" />
-<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.woff2" />
-<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.woff2" />
-
-    <link rel="stylesheet" type="text/css" href="../../_static/pygments.css" />
-    <link rel="stylesheet" href="../../_static/styles/sphinx-book-theme.css?digest=14f4ca6b54d191a8c7657f6c759bf11a5fb86285" type="text/css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/togglebutton.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/mystnb.4510f1fc1dee50b3e5859aac5469c37c29e427902b24a333a5f9fcb2f0b3ac41.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/sphinx-thebe.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/design-style.4045f2051d55cab465a707391d5b2007.min.css" />
-  
-  <!-- Pre-loaded scripts that we'll load fully later -->
-  <link rel="preload" as="script" href="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b" />
-<link rel="preload" as="script" href="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b" />
-  <script src="../../_static/vendor/fontawesome/6.5.2/js/all.min.js?digest=dfe6caa3a7d634c4db9b"></script>
-
-    <script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
-    <script src="../../_static/jquery.js"></script>
-    <script src="../../_static/underscore.js"></script>
-    <script src="../../_static/_sphinx_javascript_frameworks_compat.js"></script>
-    <script src="../../_static/doctools.js"></script>
-    <script src="../../_static/clipboard.min.js"></script>
-    <script src="../../_static/copybutton.js"></script>
-    <script src="../../_static/scripts/sphinx-book-theme.js?digest=5a5c038af52cf7bc1a1ec88eea08e6366ee68824"></script>
-    <script>let toggleHintShow = 'Click to show';</script>
-    <script>let toggleHintHide = 'Click to hide';</script>
-    <script>let toggleOpenOnPrint = 'true';</script>
-    <script src="../../_static/togglebutton.js"></script>
-    <script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
-    <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
-    <script src="../../_static/design-tabs.js"></script>
-    <script>const THEBE_JS_URL = "https://unpkg.com/thebe@0.8.2/lib/index.js"
-const thebe_selector = ".thebe,.cell"
-const thebe_selector_input = "pre"
-const thebe_selector_output = ".output, .cell_output"
-</script>
-    <script async="async" src="../../_static/sphinx-thebe.js"></script>
-    <script>window.MathJax = {"options": {"processHtmlClass": "tex2jax_process|mathjax_process|math|output_area"}}</script>
-    <script defer="defer" src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
-    <script>DOCUMENTATION_OPTIONS.pagename = 'docs/review/ConceptLab';</script>
-    <link rel="shortcut icon" href="../../_static/PseudoLab_logo.png"/>
-    <link rel="index" title="Index" href="../../genindex.html" />
-    <link rel="search" title="Search" href="../../search.html" />
-    <link rel="next" title="Diffusion Models already have a Semantic Latent Space" href="Diffusion_models_already_have_a_Semantic_Latent_Space.html" />
-    <link rel="prev" title="Progressive Distillation for Fast Sampling of Diffusion Models" href="progressive_distillation.html" />
-  <meta name="viewport" content="width=device-width, initial-scale=1"/>
-  <meta name="docsearch:language" content="en"/>
-  </head>
-  
-  
-  <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
-
-  
-  
-  <div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
-  
-  <div id="pst-scroll-pixel-helper"></div>
-  
-  <button type="button" class="btn rounded-pill" id="pst-back-to-top">
-    <i class="fa-solid fa-arrow-up"></i>Back to top</button>
-
-  
-  <input type="checkbox"
-          class="sidebar-toggle"
-          id="pst-primary-sidebar-checkbox"/>
-  <label class="overlay overlay-primary" for="pst-primary-sidebar-checkbox"></label>
-  
-  <input type="checkbox"
-          class="sidebar-toggle"
-          id="pst-secondary-sidebar-checkbox"/>
-  <label class="overlay overlay-secondary" for="pst-secondary-sidebar-checkbox"></label>
-  
-  <div class="search-button__wrapper">
-    <div class="search-button__overlay"></div>
-    <div class="search-button__search-container">
-<form class="bd-search d-flex align-items-center"
-      action="../../search.html"
-      method="get">
-  <i class="fa-solid fa-magnifying-glass"></i>
-  <input type="search"
-         class="form-control"
-         name="q"
-         id="search-input"
-         placeholder="Search this book..."
-         aria-label="Search this book..."
-         autocomplete="off"
-         autocorrect="off"
-         autocapitalize="off"
-         spellcheck="false"/>
-  <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
-</form></div>
-  </div>
-
-  <div class="pst-async-banner-revealer d-none">
-  <aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
-</div>
-
-  
-    <header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
-    </header>
-  
-
-  <div class="bd-container">
-    <div class="bd-container__inner bd-page-width">
-      
-      
-      
-      <div class="bd-sidebar-primary bd-sidebar">
-        
-
-  
-  <div class="sidebar-header-items sidebar-primary__section">
-    
-    
-    
-    
-  </div>
-  
-    <div class="sidebar-primary-items__start sidebar-primary__section">
-        <div class="sidebar-primary-item">
-
-  
-    
-  
-
-<a class="navbar-brand logo" href="../../intro.html">
-  
-  
-  
-  
-  
-    
-    
-      
-    
-    
-    <img src="../../_static/PseudoLab_logo.png" class="logo__image only-light" alt="Text-to-Image Generation-feat-Diffusion - Home"/>
-    <script>document.write(`<img src="../../_static/PseudoLab_logo.png" class="logo__image only-dark" alt="Text-to-Image Generation-feat-Diffusion - Home"/>`);</script>
-  
-  
-</a></div>
-        <div class="sidebar-primary-item"><nav class="bd-links" id="bd-docs-nav" aria-label="Main">
-    <div class="bd-toc-item navbar-nav active">
-        
-        <ul class="nav bd-sidenav bd-sidenav__home-link">
-            <li class="toctree-l1">
-                <a class="reference internal" href="../../intro.html">
-                    Welcome to PseudoDiffusers!!
-                </a>
-            </li>
-        </ul>
-        <p aria-level="2" class="caption" role="heading"><span class="caption-text">Preliminary Works</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="vae.html">VAE</a></li>
-<li class="toctree-l1"><a class="reference internal" href="gan.html">GAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DDPM.html">DDPM</a></li>
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="DDIM.html">DDIM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="A_Study_on_the_Evaluation_of_Generative_Models.html">A Study on the Evaluation of Generative Models</a></li>
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Image Generation</span></p>
-<ul class="current nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="cycleGAN.html">CycleGAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="StyleGAN.html">StyleGAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="diffusion_beats_GANs.html">Diffusion Models Beat GANs on Image Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="dalle.html">DALL-E</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DALLE2.html">DALL-E 2</a></li>
-<li class="toctree-l1"><a class="reference internal" href="dreambooth.html">DreamBooth</a></li>
-<li class="toctree-l1"><a class="reference internal" href="ControlNet.html">ControlNet</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Latent_Diffusion_Model.html">Introduction</a></li>
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="Textual_Inversion.html">Textual Inversion</a></li>
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="CustomDiffusion.html">Custom Diffusion</a></li>
-<li class="toctree-l1"><a class="reference internal" href="LoRA.html">LoRA</a></li>
-
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="I-DDPM.html">I-DDPM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="StyO.html">StyO</a></li>
-<li class="toctree-l1"><a class="reference internal" href="imagen.html">Imagen</a></li>
-<li class="toctree-l1"><a class="reference internal" href="imagen_editor.html">Imagen Editor</a></li>
-<li class="toctree-l1"><a class="reference internal" href="SDEdit.html">SDEdit</a></li>
-<li class="toctree-l1"><a class="reference internal" href="SDXL.html">SDXL</a></li>
-<li class="toctree-l1"><a class="reference internal" href="t2i_adapter.html">T2I-Adapter</a></li>
-<li class="toctree-l1"><a class="reference internal" href="IP_Adapter.html">IP-Adapter</a></li>
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="HyperDreamBooth.html">HyperDreamBooth</a></li>
-<li class="toctree-l1"><a class="reference internal" href="CM3leon.html">CM3leon</a></li>
-
-<li class="toctree-l1"><a class="reference internal" href="Synthetic_Data_from_Diffusion_Models_Improves_ImageNet_Classification.html">Synthetic Data from Diffusion Models Improves ImageNet Classification</a></li>
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="GLIDE.html">GLIDE</a></li>
-<li class="toctree-l1"><a class="reference internal" href="BBDM.html">BBDM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Your_Diffusion_Model_is_Secretly_a_Zero_Shot_Classifier.html">Your Diffusion Model is Secretly a Zero-Shot Classifier</a></li>
-<li class="toctree-l1"><a class="reference internal" href="progressive_distillation.html">Progressive Distillation for Fast Sampling of Diffusion Models</a></li>
-<li class="toctree-l1 current active"><a class="current reference internal" href="#">ConceptLab</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Diffusion_models_already_have_a_Semantic_Latent_Space.html">Diffusion Models already have a Semantic Latent Space</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Muse.html">Muse</a></li>
-
-
-<li class="toctree-l1"><a class="reference internal" href="GIGAGAN.html">Scaling up GANs for Text-to-Image Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="consistency_models.html">Consistency Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="latent_consistency_models.html">Latent Consistency Models</a></li>
-
-<li class="toctree-l1"><a class="reference internal" href="LLM_grounded_Diffusion.html">LLM Grounded Diffusion</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DiT.html">DiT</a></li>
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="one-step-image-translation.html">One-Step Image Translation with Text-to-Image Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="LCM-LoRA.html">LCM-LoRA: A Universal Stable-Diffusion Acceleration Module</a></li>
-
-
-<li class="toctree-l1"><a class="reference internal" href="MimicBrush.html">MimicBrush: Zero-shot Image Editing with Reference Imitation</a></li>
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Video Generation</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="Make_A_Video.html">Make A Video</a></li>
-<li class="toctree-l1"><a class="reference internal" href="VideoLDM.html">VideoLDM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="AnimateDiff.html">AnimateDiff</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Animate_Anyone.html">Animate Anyone</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreaMoving.html">DreaMoving</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreamPose.html">DreamPose: Fashion Image-to-Video Synthesis via Stable Diffusion</a></li>
-
-
-
-
-
-
-
-
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">3D Generation</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="NeRF.html">NeRF : Representing Scenes as Neural Radiance Fields for View Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="3DGS.html">3D Gaussian Splatting for Real-Time Radiance Field Rendering</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Point_E.html">Point-E: A System for Generating 3D Point Clouds from Complex Prompts (Arxiv 2022)</a></li>
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="Shap-E.html">Shap-E</a></li>
-
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="DreamFusion.html"><strong>DreamFusion</strong></a></li>
-<li class="toctree-l1"><a class="reference internal" href="magic-3d.html">Magic3D</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreamBooth3D.html">Dream Booth 3D</a></li>
-
-
-
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Experiments</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="../experiments/js_exp.html">Synthetic Data with Stable Diffusion for Foliar Disease Classification</a></li>
-<li class="toctree-l1"><a class="reference internal" href="../experiments/swjo_exp.html">Training DreamBooth on Naver Webtoon Face Dataset</a></li>
-</ul>
-
-    </div>
-</nav></div>
-    </div>
-  
-  
-  <div class="sidebar-primary-items__end sidebar-primary__section">
-  </div>
-  
-  <div id="rtd-footer-container"></div>
-
-
-      </div>
-      
-      <main id="main-content" class="bd-main" role="main">
-        
-        
-
-<div class="sbt-scroll-pixel-helper"></div>
-
-          <div class="bd-content">
-            <div class="bd-article-container">
-              
-              <div class="bd-header-article d-print-none">
-<div class="header-article-items header-article__inner">
-  
-    <div class="header-article-items__start">
-      
-        <div class="header-article-item"><label class="sidebar-toggle primary-toggle btn btn-sm" for="__primary" title="Toggle primary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
-  <span class="fa-solid fa-bars"></span>
-</label></div>
-      
-    </div>
-  
-  
-    <div class="header-article-items__end">
-      
-        <div class="header-article-item">
-
-<div class="article-header-buttons">
-
-
-
-
-
-<div class="dropdown dropdown-source-buttons">
-  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Source repositories">
-    <i class="fab fa-github"></i>
-  </button>
-  <ul class="dropdown-menu">
-      
-      
-      
-      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion" target="_blank"
-   class="btn btn-sm btn-source-repository-button dropdown-item"
-   title="Source repository"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fab fa-github"></i>
-  </span>
-<span class="btn__text-container">Repository</span>
-</a>
-</li>
-      
-      
-      
-      
-      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion/issues/new?title=Issue%20on%20page%20%2Fdocs/review/ConceptLab.html&body=Your%20issue%20content%20here." target="_blank"
-   class="btn btn-sm btn-source-issues-button dropdown-item"
-   title="Open an issue"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-lightbulb"></i>
-  </span>
-<span class="btn__text-container">Open issue</span>
-</a>
-</li>
-      
-  </ul>
-</div>
-
-
-
-
-
-
-<div class="dropdown dropdown-download-buttons">
-  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Download this page">
-    <i class="fas fa-download"></i>
-  </button>
-  <ul class="dropdown-menu">
-      
-      
-      
-      <li><a href="../../_sources/docs/review/ConceptLab.md" target="_blank"
-   class="btn btn-sm btn-download-source-button dropdown-item"
-   title="Download source file"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-file"></i>
-  </span>
-<span class="btn__text-container">.md</span>
-</a>
-</li>
-      
-      
-      
-      
-      <li>
-<button onclick="window.print()"
-  class="btn btn-sm btn-download-pdf-button dropdown-item"
-  title="Print to PDF"
-  data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-file-pdf"></i>
-  </span>
-<span class="btn__text-container">.pdf</span>
-</button>
-</li>
-      
-  </ul>
-</div>
-
-
-
-
-<button onclick="toggleFullScreen()"
-  class="btn btn-sm btn-fullscreen-button"
-  title="Fullscreen mode"
-  data-bs-placement="bottom" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-expand"></i>
-  </span>
-
-</button>
-
-
-
-<script>
-document.write(`
-  <button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light"></i>
-    <i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark"></i>
-    <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"></i>
-  </button>
-`);
-</script>
-
-
-<script>
-document.write(`
-  <button class="btn btn-sm pst-navbar-icon search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <i class="fa-solid fa-magnifying-glass fa-lg"></i>
-  </button>
-`);
-</script>
-<label class="sidebar-toggle secondary-toggle btn btn-sm" for="__secondary"title="Toggle secondary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <span class="fa-solid fa-list"></span>
-</label>
-</div></div>
-      
-    </div>
-  
-</div>
-</div>
-              
-              
-
-<div id="jb-print-docs-body" class="onlyprint">
-    <h1>ConceptLab</h1>
-    <!-- Table of contents -->
-    <div id="print-main-content">
-        <div id="jb-print-toc">
-            
-            <div>
-                <h2> Contents </h2>
-            </div>
-            <nav aria-label="Page">
-                <ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">Introduction</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#related-work">Related Work</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#prelimiaries">Prelimiaries</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#method">Method</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#the-constraints">The Constraints</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#the-objective">The Objective</a></li>
-</ul>
-</li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#regularization">Regularization</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#adaptive-negatives">Adaptive Negatives</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#evolutionary-generation">Evolutionary Generation</a></li>
-</ul>
-</li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#experiments">Experiments</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#result">Result</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#creative-generation">Creative Generation</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#concept-mixing">Concept Mixing</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#comparisons">Comparisons</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#evaluation-setup">Evaluation Setup</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#qualitative-comparisons">Qualitative Comparisons</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#quantitative-comparisons">Quantitative Comparisons</a></li>
-</ul>
-</li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#limitations">Limitations</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#conclusion">Conclusion</a></li>
-</ul>
-            </nav>
-        </div>
-    </div>
-</div>
-
-              
-                
-<div id="searchbox"></div>
-                <article class="bd-article">
-                  
-  <div class="admonition-information admonition">
-<p class="admonition-title">Information</p>
-<ul class="simple">
-<li><p><strong>Title:</strong> ConceptLab: Creative Generation using Diffusion Prior Constraints</p></li>
-<li><p><strong>Reference</strong></p>
-<ul>
-<li><p>Paper: <a class="reference external" href="https://arxiv.org/pdf/2307.06949.pdf">https://arxiv.org/pdf/2307.06949.pdf</a></p></li>
-<li><p>Code: <a class="reference external" href="https://github.com/kfirgoldberg/ConceptLab">Official</a></p></li>
-<li><p>Site: <a class="reference external" href="https://kfirgoldberg.github.io/ConceptLab/">Official</a></p></li>
-</ul>
-</li>
-<li><p><strong>Author:</strong> Hyoungseo Cho</p></li>
-<li><p><strong>Last updated on Nov. 20, 2023</strong></p></li>
-</ul>
-</div>
-<section class="tex2jax_ignore mathjax_ignore" id="conceptlab">
-<h1>ConceptLab<a class="headerlink" href="#conceptlab" title="Permalink to this heading">#</a></h1>
-<section id="introduction">
-<h2>Introduction<a class="headerlink" href="#introduction" title="Permalink to this heading">#</a></h2>
-<p>본 논문에서는 Creative Generation의 일환으로, 새롭고 창의적인 개념을 생성하는 내용을 다룹니다. 최근 text-to-image 생성 기술과 Personalization 기술이 크게 발전함에 따라 이미지 생성 뿐만 아니라 개인화된 개념을 생설할 수 있게 되었습니다. 이러한 강력한 모델을 사용하여 모델에 명시적으로 설명되지 않은 새로운 창의적 개념을 생성할 수 있을까요?</p>
-<figure class="align-default" id="id1">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/ConceptLab01.png"><img alt="ConceptLab01" class="bg-primary mb-1" src="../../_images/ConceptLab01.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 419 </span><span class="caption-text">ConceptLab</span><a class="headerlink" href="#id1" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</section>
-<section id="related-work">
-<h2>Related Work<a class="headerlink" href="#related-work" title="Permalink to this heading">#</a></h2>
-<p><strong>Text-Guided Sysnthesis</strong> <br/>
-대부분의 text-guided 생성 기술은 pretrain 된 텍스트 인코더에서 추출한 임베딩을 diffusion 모델에 직접 conditioning합니다. 즉, 텍스트 데이터를 처리하여 이미지 생성 과정에 통합하는 방식입니다. 본 논문에서는 Latent Diffusion Model과 Diffusion prior model을 활용해서 creative generation에서의 이점을 보입니다.</p>
-<p><strong>Diffusion Prior</strong> <br/>
-Diffusion Prior 모델은 입력된 텍스트 임베딩을 CLIP의 latent space에서 해당하는 이미지 임베딩으로 매핑합니다. 이후 디코더는 CLIP의 이미지 임베딩에 condition이 부여된 이미지를 생성하기 위해 훈련됩니다.</p>
-<p><strong>Personalization</strong> <br/>
-Personalization은 text-guided synthesis 맥락에서 사용자가 입력한 텍스트 프롬프트에 맞는 주제나 스타일을 표현하는 새로운 이미지를 생성하는 것을 목표로 합니다. 일반적으로 새로운 개념을 학습시키기 위해 임베딩을 최적화하거나 denoising 네트워크를 finetuning 하는 방법을 활용합니다. 하지만 본 연구에서는 Creative Generation에 초첨을 맞추고 새로운 개념을 생성하고 기발한 장면을 생성하는 것을 목표로 합니다.</p>
-<p><strong>Creative Generation</strong> <br/>
-창의적 내용을 생성하는 것은 다양한 접근 방법이 있습니다. Xu et al 에서는 set-evolution 방법을 활용해 3D 형태의 모델링을 제안했습니다. Elgammal et al 에서는 GAN의 맥락에서 창의적 생성을 탐구하며, 기존 스타일에서의 편차를 극대화하는 방식으로 새로운 스타일을 학습했습니다. Sbai et al 에서는 새로운 손실 함수를 도립했습니다. 본 연구에서는 주어진 카테고리와 일치하도록 최적화하면서도 그 카테고리의 기존 개념들과 다른 새로운 개념을 찾는 방식으로 창의적 생성에 접근했습니다. 본 방법을 통해 새로운 개념들은 서로 혼합될 수 있으며 더 유연한 생성 과정을 갖게됩니다.</p>
-<figure class="align-default" id="id2">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/ConceptLab02.png"><img alt="ConceptLab02" class="bg-primary mb-1" src="../../_images/ConceptLab02.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 420 </span><span class="caption-text">Text-guided generation (top left), personalization methods (bottom left), creative generation method (right)</span><a class="headerlink" href="#id2" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</section>
-<section id="prelimiaries">
-<h2>Prelimiaries<a class="headerlink" href="#prelimiaries" title="Permalink to this heading">#</a></h2>
-<p><strong>Latent Diffusion Models</strong> <br/>
-Latent Diffusion Model에서는 오토인코더의 latent space 내에서 diffusion 과정이 진행됩니다. 먼저, 인고더 <span class="math notranslate nohighlight">\(E\)</span>는 주어진 이미지 <span class="math notranslate nohighlight">\(x\)</span>를 latent code <span class="math notranslate nohighlight">\(z\)</span>로 매핑하는 것을 목표로 합니다. 이때, z=E(x)가 됩니다. 동시에 디코더 D는 원본 입력 이미지를 재구성하도록 합니다. DDPM의 경우 아래 주어진 손실을 최소화하도록 학습합니다.</p>
-<div class="math notranslate nohighlight">
-\[
-L = E_{z,y,\epsilon,t} [||\epsilon - \epsilon_{\theta}(z_{t}, t, c)||_{2}^{2}]
-\]</div>
-<p>denoising network <span class="math notranslate nohighlight">\(\epsilon \theta\)</span> 는 잠재 코드 <span class="math notranslate nohighlight">\(zt\)</span>에 추가된 잡음 <span class="math notranslate nohighlight">\(\epsilon\)</span>을 제거합니다. 이 과정에서 현재 시간 단계 t와 조건 벡터 c도 고려됩니다.</p>
-<p><strong>Diffusion Prior</strong> <br/>
-일반적으로 Diffusion model은 CLIP 텍스트 인코딩에서 직접 파생된 조건 벡터 <span class="math notranslate nohighlight">\(c\)</span>를 활용하여 주어진 텍스트 프롬프트 <span class="math notranslate nohighlight">\(y\)</span>에 대해 훈련됩니다. <span class="math notranslate nohighlight">\(Ramesh et al\)</span>에서 text-to-image 생성 문제를 2가지 단계로 decompose 합니다. 먼저, Diffusion Prior 모델을 활용하여 주어진 텍스트 프롬프트로부터 이미지 임베딩을 예측합니다. 다음으로, 이 이미지 임베딩에 조건을 부여하여 이미지를 생성하는 diffusion decoder로 보내집니다. 훈련 또한 일반적으로 두 독립적인 단계로 이루어집니다.</p>
-<div class="math notranslate nohighlight">
-\[
-L_{prior} = E_{e,y,t} [||e - P_{\theta]}(e_{t},t,y)||_{2}^{2}]
-\]</div>
-<p>Diffusion 디코더는 이미지 임베딩을 조건 <span class="math notranslate nohighlight">\(c\)</span>와 위 Latent Diffusion Model에 정의된 손실을 활용하여 훈련됩니다. 그 다음 diffusion prior model <span class="math notranslate nohighlight">\(P\theta\)</span>는 임베딩 <span class="math notranslate nohighlight">\(e_{t}\)</span>로부터 denoise 된 이미지 임베딩 <span class="math notranslate nohighlight">\(e\)</span>를 직접 예측합니다. 이 두 단계 접근법은 이미지 다양성을 향상시키며 중간 CLIP 이미지 임베딩에 직접 접근하고 해당 공간에서 직접 제약을 할 수 있게 합니다.</p>
-<figure class="align-default" id="id3">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/ConceptLab03.png"><img alt="ConceptLab03" class="bg-primary mb-1" src="../../_images/ConceptLab03.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 421 </span><span class="caption-text">ConceptLab</span><a class="headerlink" href="#id3" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</section>
-<section id="method">
-<h2>Method<a class="headerlink" href="#method" title="Permalink to this heading">#</a></h2>
-<p>ConceptLab은 생성하고자 하는 새로운 개념을 대표하는 단일 임베딩 <span class="math notranslate nohighlight">\(v_{*}\)</span>를 최적화합니다. 이후 주어진 카테고리에 유사하면서도 기존 멤버들과 다른 특성을 가지도록 손실 집합을 계산합니다. 훈련하는 동안, 현재 생성된 새로운 개념을 바탕으로 negative contraints를 더하기 위해 pretrained BLIP-2 VQA 모델을 활용합니다.</p>
-<section id="the-constraints">
-<h3>The Constraints<a class="headerlink" href="#the-constraints" title="Permalink to this heading">#</a></h3>
-<p>본 연구에서는 긍정적 제약 <span class="math notranslate nohighlight">\(C_{pos}\)</span>와 부정적 제약 <span class="math notranslate nohighlight">\(C_{neg}\)</span> 두 가지를 활용합니다. 각 제약 조건은 텍스트 토큰을 활용하여 정의됩니다.</p>
-</section>
-<section id="the-objective">
-<h3>The Objective<a class="headerlink" href="#the-objective" title="Permalink to this heading">#</a></h3>
-<p>본 연구에서는 두가지 제약 조건을 바탕으로 하여 새로운 개념을 대표하는 임베딩 <span class="math notranslate nohighlight">\(v_{*}\)</span>와 각 제약 조건 간의 유사도를 측정합니다.
-우선, <span class="math notranslate nohighlight">\(v_{*}\)</span>와 각 제약 단어 <span class="math notranslate nohighlight">\(c\)</span>를 동일한 무작위 샘플링된 프롬프트 y에 통합합니다. 각 문장은 CLIP 텍스트 임베딩으로 인코딩되며, 이것이 텍스트 제약 조건을 정의합니다. 텍스트 프롬프트를 diffusion prior 모델에 통과시키면, 프롬프트의 특정 인스턴스가 생성됩니다. 이러한 방식으로 <span class="math notranslate nohighlight">\(E_{y}(v_{*}\)</span>가 diffusion prior를 통과하면 모든 <span class="math notranslate nohighlight">\(v_{*}\)</span>가 텍스트 제약 조건과 일치하도록 일관된 생성을 얻을 수 있습니다. 반면, 긍정 및 부정 제약 조건은 가능한 광범위하게 유지하고자 diffusion prior를 통과하지 않습니다. 이에 따라 본 연구에서의 손실 함수는 다음과 같이 정의됩니다:</p>
-<div class="math notranslate nohighlight">
-\[
-S(C,v_{*}) = E_{c \sim C}[\langle E_{y}(c), P(E_{y}(v_{*}))\rangle]
-\]</div>
-<div class="math notranslate nohighlight">
-\[
-L = S(C_{neg}, v_{*}) + \lambda(1-S(C_{pos}, v_{*}))
-\]</div>
-<p>즉, 학습된 임베딩 v에서 생성된 샘플링된 이미지 임베딩 <span class="math notranslate nohighlight">\(P(E_{y}(v_{*}))\)</span>이 <span class="math notranslate nohighlight">\(C_{neg}\)</span>에 의해 정의된 텍스트 제약 조건에서 멀어지고 <span class="math notranslate nohighlight">\(C_{pos}\)</span>의 제약조건에 가까워지도록 합니다.</p>
-</section>
-</section>
-<section id="regularization">
-<h2>Regularization<a class="headerlink" href="#regularization" title="Permalink to this heading">#</a></h2>
-<p>정규화는 제약 조건 집합이 클 때 특정 멤버로의 collapsing을 방지하는 데 사용됩니다. 부정적 제약에 대한 최대 유사도를 측정하는 추가 손실 함수를 사용하는데 아래와 같이 정의됩니다:</p>
-<div class="math notranslate nohighlight">
-\[
-S_{max}(C,v_{*}) = max_{c \sim C}(\langle E_{y}, P(E_{y}(v_{*}))\rangle)
-\]</div>
-<p>이 유사도 측정 방식은 전체 손실 함수에 통합되며, <span class="math notranslate nohighlight">\(S(C,v_{*})\)</span>와 평균 냄으로써 <span class="math notranslate nohighlight">\(v_{*}\)</span>에 가장 가까운 제약 조건에 더 큰 패널티를 부여합니다.</p>
-<figure class="align-default" id="id4">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/ConceptLab04.png"><img alt="ConceptLab04" class="bg-primary mb-1" src="../../_images/ConceptLab04.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 422 </span><span class="caption-text">훈련 과정 중 BLIP-2 모델을 사용하여 현재 개념에 가장 가까운 단어를 추론하고, 이를 제약 조건에 추가하는 과정을 거칩니다.</span><a class="headerlink" href="#id4" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<section id="adaptive-negatives">
-<h3>Adaptive Negatives<a class="headerlink" href="#adaptive-negatives" title="Permalink to this heading">#</a></h3>
-<p>많은 부정적 제약 조건을 수동으로 적용하는 것은 힘들고, 광범위한 카테고리의 가장 관련성 높은 멤버들을 정확하게 대표하지 못할 수도 있습니다. 이를 해결하기 위해, 훈련 중 부정적 제약 조건 집합을 점진적으로 확장하는 adaptive scheme을 제안합니다. 생성된 이미지를 사전 훈련된 BLIP-2 VQA 모델에 질의하여 이미지에 현재 존재하는 카테고리의 멤버가 무엇인지 식별하도록 합니다. 이후 결과로 나온 인스턴스를 훈련의 나머지 부분에 대한 부정적 제약 조건에 추가합니다.</p>
-<figure class="align-default" id="id5">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/ConceptLab05.png"><img alt="ConceptLab05" class="bg-primary mb-1" src="../../_images/ConceptLab05.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 423 </span><span class="caption-text">여러 단계에 걸쳐 생성된 이미지 결과를 보여줍니다. 훈련 과정에서 부정적 제약 조건이 지속적으로 조정되고 확장되었음을 보여줍니다.</span><a class="headerlink" href="#id5" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</section>
-<section id="evolutionary-generation">
-<h3>Evolutionary Generation<a class="headerlink" href="#evolutionary-generation" title="Permalink to this heading">#</a></h3>
-<p>주어진 개념 셋에 대해 <em>개념을 혼합</em>하기 위해 먼저 각 개념에서 이미지를 생성하여 이미지 제약 조건 <span class="math notranslate nohighlight">\(C_{im}\)</span> 을 만듭니다. 각 이미지는 CLIP 이미지 인코더 <span class="math notranslate nohighlight">\(E_{im}(c)\)</span>를 통과하여 임베딩 세트를 생성합니다. 학습 가능한 개념 <span class="math notranslate nohighlight">\(v_{mix}\)</span>를 주어진 임베딩에 더 가깝게 만드는 수정된 손실 함수를 적용합니다.:</p>
-<div class="math notranslate nohighlight">
-\[
-L_{mix} = 1 - E_{c \sim C}[\langle E_{im}(c), P(E_{y}(v_{mix}))\rangle]
-\]</div>
-<p>이 손실 함수는 생성된 개념이나 실제 이미지에 적용될 수 있으며, 창의적인 생성물의 계층ㅇ적 생성을 위해 반복적으로 적용될 수 있습니다. 또, 생성된 결과물에 대한 각 개념의 영향을 더 잘 제어하기 위해 가중치 항목이 추가적으로 적용될 수 있습니다.</p>
-<figure class="align-default" id="id6">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/ConceptLab06.png"><img alt="ConceptLab06" class="bg-primary mb-1" src="../../_images/ConceptLab06.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 424 </span><span class="caption-text">그림에는 훈련에 사용된 긍정적 개념이 왼쪽에 표시되어 있습니다. 이는 모델이 어떤 개념을 기반으로 창의적 이미지를 생성했는지를 알 수 있습니다. 모든 결과는 Adaptive Negative 기법을 활용했습니다.</span><a class="headerlink" href="#id6" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<figure class="align-default" id="id7">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/ConceptLab07.png"><img alt="ConceptLab07" class="bg-primary mb-1" src="../../_images/ConceptLab07.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 425 </span><span class="caption-text">ConceptLab이 제안한 다양한 이미지로 프롬프트와 Adaptive Negative 기법을 적용했습니다.</span><a class="headerlink" href="#id7" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<figure class="align-default" id="id8">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/ConceptLab08.png"><img alt="ConceptLab08" class="bg-primary mb-1" src="../../_images/ConceptLab08.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 426 </span><span class="caption-text">ConceptLab은 생성된 개념들을 혼합하여 새롭고 독특한 창조물을 반복적으로 학습할 수 있습니다. 그림의 가장 윗줄에서는 Adaptive Negative 기법을 적용하여 학습된 개념들을 보여줍니다. 이어지는 줄에서는 Evolutionary Generation 과정을 통해 얻어진 개념들을 보여줍니다.</span><a class="headerlink" href="#id8" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</section>
-</section>
-<section id="experiments">
-<h2>Experiments<a class="headerlink" href="#experiments" title="Permalink to this heading">#</a></h2>
-<p>ConceptLab의 효과를 입증하기 위해 정성적 및 정량적 평가를 진행했습니다.</p>
-<section id="result">
-<h3>Result<a class="headerlink" href="#result" title="Permalink to this heading">#</a></h3>
-</section>
-<section id="creative-generation">
-<h3>Creative Generation<a class="headerlink" href="#creative-generation" title="Permalink to this heading">#</a></h3>
-<p>위 그림들에서 볼 수 있듯이 모든 결과는 Adaptive Negative를 적용하였고 훈련 시드를 달리하며 다양한 개념을 생성할 수 있는 능력이 있음을 볼 수 있습니다. 또, ConceptLab은 학습된 창의적 개념을 새로운 장면에 배치할 수 있습니다. 이 생성물들은 배경 변경, 스타일 변경, 새로운 창조등 다양하게 활용 가능합니다.</p>
-<figure class="align-default" id="id9">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/ConceptLab09.png"><img alt="ConceptLab09" class="bg-primary mb-1" src="../../_images/ConceptLab09.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 427 </span><span class="caption-text">ConceptLab을 활용한 Concept Mixing의 결과를 보여줍니다.</span><a class="headerlink" href="#id9" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</section>
-<section id="concept-mixing">
-<h3>Concept Mixing<a class="headerlink" href="#concept-mixing" title="Permalink to this heading">#</a></h3>
-<p>Concept Mixing은 다양한 실제 개념들의 독특한 특성을 합쳐 하이브리드 개념을 형성하는 방법을 보여줍니다. 이 방법은 오직 긍정적 제약 조건만을 활용합니다. 예를 들어, 첫 번째 줄에는 랍스터의 주요 특징(생상과 집게발)을 거북이의 특징(등껍질)과 융합하는 것을 볼 수 있습니다.</p>
-<figure class="align-default" id="id10">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/ConceptLab10.png"><img alt="ConceptLab10" class="bg-primary mb-1" src="../../_images/ConceptLab10.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 428 </span><span class="caption-text">위 그림은 ConceptLab에 의해 학습된 개념들이 여러 <em>세대</em>에 걸쳐 어떻게 발전하는지 보여줍니다.</span><a class="headerlink" href="#id10" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</section>
-<section id="comparisons">
-<h3>Comparisons<a class="headerlink" href="#comparisons" title="Permalink to this heading">#</a></h3>
-</section>
-<section id="evaluation-setup">
-<h3>Evaluation Setup<a class="headerlink" href="#evaluation-setup" title="Permalink to this heading">#</a></h3>
-<p>ConceptLab은 Stable Diffusion2와 Kandinsky 2.1 두 모델과 함께 평가했습니다. Kandinsky의 경우, 더 유리한 결과를 위해 부정적 프롬프트는 Latent Diffusion Model이 아닌 Diffusion Prior Model에 적용했습니다.</p>
-</section>
-<section id="qualitative-comparisons">
-<h3>Qualitative Comparisons<a class="headerlink" href="#qualitative-comparisons" title="Permalink to this heading">#</a></h3>
-<p>ConceptLab은 긍정적 토근과 부정적 제약 조건 모두에 일관되게 맞춰질 수 있습니다. 즉, ConceptLab은 다중 제약 조건을 효과적으로 처리하고, 특정 개념에 대한 일관된 표현을 학습할 수 있는 능력을 갖추고 있습니다.</p>
-</section>
-<section id="quantitative-comparisons">
-<h3>Quantitative Comparisons<a class="headerlink" href="#quantitative-comparisons" title="Permalink to this heading">#</a></h3>
-<p>정량적 평가를 위해 각 방법이 긍정적 개념을 포함하며, 주어진 부정적 개념과 닮지 않은 이미지를 생성하는 능력을 측정했습니다. 평가에는 애완동물, 식물, 과일, 가구, 악기의 5가지 카테고리를 활용했습니다. 각 도메인에 세 가지 다른 부정적 개념 쌍을 고려하고, 각 조합에 대해 ConceptLab을 5개의 랜덤 시드로 훈련하여 총 75개의 학습된 개념을 얻었습니다. 각 학습된 개념에 대해 “A photo of a <span class="math notranslate nohighlight">\(S_{*}\)</span> 프롬프트를 활용하여 32개의 이미지를 생성했습니다. Stable Diffusionr과 kandinsky 모델에서는 부정적 프롬프트를 사용하고, 같은 긍정적 및 부정적 개념 쌍에 대해 160개의 이미지를 생성합니다. 측정 기준으로는 먼저 각 개념의 긍정적 유사성을 타겟 카테고리와의 CLIP 공간 유사성 계산을 통해 특정됩니다. 다음으로는 긍정적 제약과 부정적 제약 사이의 거리를 측정합니다. 이는 생성된 이미지와 모든 부정적 개념 사이의 최대 유사성 계산을 통해 이루어집니다. 결과적으로 ConceptLab은 5가지 모든 도메인에서 긍정적 CLIP 유사성에서 일관되게 우월한 성능을 보였고 타겟 카테고리에 속하는 이미지를 신뢰성 있게 생성했습니다. 또한, 부정적 거리 측정에서 ConceptLab은 모든 카테고리에서 Stable Diffusion을, 4가지 카테고리에서 Kandinsky를 능가했습니다.</p>
-<figure class="align-default" id="id11">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/ConceptLab11.png"><img alt="ConceptLab11" class="bg-primary mb-1" src="../../_images/ConceptLab11.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 429 </span><span class="caption-text">User Study</span><a class="headerlink" href="#id11" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</section>
-</section>
-<section id="limitations">
-<h2>Limitations<a class="headerlink" href="#limitations" title="Permalink to this heading">#</a></h2>
-<p>Personalization과 유사하게, 학습된 개념을 포함하는 프롬프트를 사용하여 새로운 이미지를 생성하는 것이 항상 개념의 특성을 다양한 프롬프트에 걸쳐 유지하지는 못합니다. 또, 최적화 과정 자체가 항상 원하는 결과를 가져오지는 않습니다. “비행기”나 “물고기”와 같은 일부 클래스의 경우 ConceptLab은 창의적 개념을 생성하는데 여전히 어려움이 있습니다. 이는 BLIP-2에 의해 생성되는 부정적 제약과 관련이 있습니다.</p>
-<figure class="align-default" id="id12">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/ConceptLab12.png"><img alt="ConceptLab12" class="bg-primary mb-1" src="../../_images/ConceptLab12.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 430 </span><span class="caption-text">Limitations</span><a class="headerlink" href="#id12" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</section>
-<section id="conclusion">
-<h2>Conclusion<a class="headerlink" href="#conclusion" title="Permalink to this heading">#</a></h2>
-<p>본 논문에서는 text-to-image diffusion model을 활용하여 창의적 생성을 위한 새로운 접근 방법을 소개했습니다. 주어진 광범위한 카테고리에 속하는 새로운 개념을 학습하기 위해 Diffusion Prior 모델 사용을 제안했습니다. 또, Prior Constraints라는 긍정적 및 부정적 제약 조건들을 diffusion prior 출력에 적용했습니다. 최적화 과정에서는 VQA 모델을 활용하여 독특하면서도 기존 멤버들과의 명확한 구별을 보장했습니다. 이후 실험을 통해 본 방법의 효과성을 입증했으며 시각적으로 다양하고 매력적인 개념을 생성할 수 있었습니다.</p>
-</section>
-</section>
-
-    <script type="text/x-thebe-config">
-    {
-        requestKernel: true,
-        binderOptions: {
-            repo: "binder-examples/jupyter-stacks-datascience",
-            ref: "master",
-        },
-        codeMirrorConfig: {
-            theme: "abcdef",
-            mode: "python"
-        },
-        kernelOptions: {
-            name: "python3",
-            path: "./docs/review"
-        },
-        predefinedOutput: true
-    }
-    </script>
-    <script>kernelName = 'python3'</script>
-
-                </article>
-              
-
-              
-              
-              
-              
-                <footer class="prev-next-footer d-print-none">
-                  
-<div class="prev-next-area">
-    <a class="left-prev"
-       href="progressive_distillation.html"
-       title="previous page">
-      <i class="fa-solid fa-angle-left"></i>
-      <div class="prev-next-info">
-        <p class="prev-next-subtitle">previous</p>
-        <p class="prev-next-title">Progressive Distillation for Fast Sampling of Diffusion Models</p>
-      </div>
-    </a>
-    <a class="right-next"
-       href="Diffusion_models_already_have_a_Semantic_Latent_Space.html"
-       title="next page">
-      <div class="prev-next-info">
-        <p class="prev-next-subtitle">next</p>
-        <p class="prev-next-title">Diffusion Models already have a Semantic Latent Space</p>
-      </div>
-      <i class="fa-solid fa-angle-right"></i>
-    </a>
-</div>
-                </footer>
-              
-            </div>
-            
-            
-              
-                <div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
-
-
-  <div class="sidebar-secondary-item">
-  <div class="page-toc tocsection onthispage">
-    <i class="fa-solid fa-list"></i> Contents
-  </div>
-  <nav class="bd-toc-nav page-toc">
-    <ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">Introduction</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#related-work">Related Work</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#prelimiaries">Prelimiaries</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#method">Method</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#the-constraints">The Constraints</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#the-objective">The Objective</a></li>
-</ul>
-</li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#regularization">Regularization</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#adaptive-negatives">Adaptive Negatives</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#evolutionary-generation">Evolutionary Generation</a></li>
-</ul>
-</li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#experiments">Experiments</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#result">Result</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#creative-generation">Creative Generation</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#concept-mixing">Concept Mixing</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#comparisons">Comparisons</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#evaluation-setup">Evaluation Setup</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#qualitative-comparisons">Qualitative Comparisons</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#quantitative-comparisons">Quantitative Comparisons</a></li>
-</ul>
-</li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#limitations">Limitations</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#conclusion">Conclusion</a></li>
-</ul>
-  </nav></div>
-
-</div></div>
-              
-            
-          </div>
-          <footer class="bd-footer-content">
-            
-<div class="bd-footer-content__inner container">
-  
-  <div class="footer-item">
-    
-<p class="component-author">
-By PseudoLab
-</p>
-
-  </div>
-  
-  <div class="footer-item">
-    
-
-  <p class="copyright">
-    
-      © Copyright 2022.
-      <br/>
-    
-  </p>
-
-  </div>
-  
-  <div class="footer-item">
-    
-  </div>
-  
-  <div class="footer-item">
-    
-  </div>
-  
-</div>
-          </footer>
-        
-
-      </main>
-    </div>
-  </div>
-  
-  <!-- Scripts loaded after <body> so the DOM is not blocked -->
-  <script src="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b"></script>
-<script src="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b"></script>
-
-  <footer class="bd-footer">
-  </footer>
-  </body>
+
+
+<!DOCTYPE html>
+
+
+<html lang="en" data-content_root="" >
+
+  <head>
+    <meta charset="utf-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.17.1: http://docutils.sourceforge.net/" />
+
+    <title>ConceptLab &#8212; Text-to-Image Generation-feat-Diffusion</title>
+  
+  
+  
+  <script data-cfasync="false">
+    document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
+    document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
+  </script>
+  
+  <!-- Loaded before other Sphinx assets -->
+  <link href="../../_static/styles/theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+<link href="../../_static/styles/bootstrap.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+<link href="../../_static/styles/pydata-sphinx-theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+
+  
+  <link href="../../_static/vendor/fontawesome/6.5.2/css/all.min.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+  <link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.woff2" />
+<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.woff2" />
+<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.woff2" />
+
+    <link rel="stylesheet" type="text/css" href="../../_static/pygments.css" />
+    <link rel="stylesheet" href="../../_static/styles/sphinx-book-theme.css?digest=14f4ca6b54d191a8c7657f6c759bf11a5fb86285" type="text/css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/togglebutton.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/mystnb.4510f1fc1dee50b3e5859aac5469c37c29e427902b24a333a5f9fcb2f0b3ac41.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/sphinx-thebe.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/design-style.4045f2051d55cab465a707391d5b2007.min.css" />
+  
+  <!-- Pre-loaded scripts that we'll load fully later -->
+  <link rel="preload" as="script" href="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b" />
+<link rel="preload" as="script" href="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b" />
+  <script src="../../_static/vendor/fontawesome/6.5.2/js/all.min.js?digest=dfe6caa3a7d634c4db9b"></script>
+
+    <script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
+    <script src="../../_static/jquery.js"></script>
+    <script src="../../_static/underscore.js"></script>
+    <script src="../../_static/_sphinx_javascript_frameworks_compat.js"></script>
+    <script src="../../_static/doctools.js"></script>
+    <script src="../../_static/clipboard.min.js"></script>
+    <script src="../../_static/copybutton.js"></script>
+    <script src="../../_static/scripts/sphinx-book-theme.js?digest=5a5c038af52cf7bc1a1ec88eea08e6366ee68824"></script>
+    <script>let toggleHintShow = 'Click to show';</script>
+    <script>let toggleHintHide = 'Click to hide';</script>
+    <script>let toggleOpenOnPrint = 'true';</script>
+    <script src="../../_static/togglebutton.js"></script>
+    <script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
+    <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
+    <script src="../../_static/design-tabs.js"></script>
+    <script>const THEBE_JS_URL = "https://unpkg.com/thebe@0.8.2/lib/index.js"
+const thebe_selector = ".thebe,.cell"
+const thebe_selector_input = "pre"
+const thebe_selector_output = ".output, .cell_output"
+</script>
+    <script async="async" src="../../_static/sphinx-thebe.js"></script>
+    <script>window.MathJax = {"options": {"processHtmlClass": "tex2jax_process|mathjax_process|math|output_area"}}</script>
+    <script defer="defer" src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
+    <script>DOCUMENTATION_OPTIONS.pagename = 'docs/review/ConceptLab';</script>
+    <link rel="shortcut icon" href="../../_static/PseudoLab_logo.png"/>
+    <link rel="index" title="Index" href="../../genindex.html" />
+    <link rel="search" title="Search" href="../../search.html" />
+    <link rel="next" title="Diffusion Models already have a Semantic Latent Space" href="Diffusion_models_already_have_a_Semantic_Latent_Space.html" />
+    <link rel="prev" title="Progressive Distillation for Fast Sampling of Diffusion Models" href="progressive_distillation.html" />
+  <meta name="viewport" content="width=device-width, initial-scale=1"/>
+  <meta name="docsearch:language" content="en"/>
+  </head>
+  
+  
+  <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
+
+  
+  
+  <div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
+  
+  <div id="pst-scroll-pixel-helper"></div>
+  
+  <button type="button" class="btn rounded-pill" id="pst-back-to-top">
+    <i class="fa-solid fa-arrow-up"></i>Back to top</button>
+
+  
+  <input type="checkbox"
+          class="sidebar-toggle"
+          id="pst-primary-sidebar-checkbox"/>
+  <label class="overlay overlay-primary" for="pst-primary-sidebar-checkbox"></label>
+  
+  <input type="checkbox"
+          class="sidebar-toggle"
+          id="pst-secondary-sidebar-checkbox"/>
+  <label class="overlay overlay-secondary" for="pst-secondary-sidebar-checkbox"></label>
+  
+  <div class="search-button__wrapper">
+    <div class="search-button__overlay"></div>
+    <div class="search-button__search-container">
+<form class="bd-search d-flex align-items-center"
+      action="../../search.html"
+      method="get">
+  <i class="fa-solid fa-magnifying-glass"></i>
+  <input type="search"
+         class="form-control"
+         name="q"
+         id="search-input"
+         placeholder="Search this book..."
+         aria-label="Search this book..."
+         autocomplete="off"
+         autocorrect="off"
+         autocapitalize="off"
+         spellcheck="false"/>
+  <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
+</form></div>
+  </div>
+
+  <div class="pst-async-banner-revealer d-none">
+  <aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
+</div>
+
+  
+    <header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
+    </header>
+  
+
+  <div class="bd-container">
+    <div class="bd-container__inner bd-page-width">
+      
+      
+      
+      <div class="bd-sidebar-primary bd-sidebar">
+        
+
+  
+  <div class="sidebar-header-items sidebar-primary__section">
+    
+    
+    
+    
+  </div>
+  
+    <div class="sidebar-primary-items__start sidebar-primary__section">
+        <div class="sidebar-primary-item">
+
+  
+    
+  
+
+<a class="navbar-brand logo" href="../../intro.html">
+  
+  
+  
+  
+  
+    
+    
+      
+    
+    
+    <img src="../../_static/PseudoLab_logo.png" class="logo__image only-light" alt="Text-to-Image Generation-feat-Diffusion - Home"/>
+    <script>document.write(`<img src="../../_static/PseudoLab_logo.png" class="logo__image only-dark" alt="Text-to-Image Generation-feat-Diffusion - Home"/>`);</script>
+  
+  
+</a></div>
+        <div class="sidebar-primary-item"><nav class="bd-links" id="bd-docs-nav" aria-label="Main">
+    <div class="bd-toc-item navbar-nav active">
+        
+        <ul class="nav bd-sidenav bd-sidenav__home-link">
+            <li class="toctree-l1">
+                <a class="reference internal" href="../../intro.html">
+                    Welcome to PseudoDiffusers!!
+                </a>
+            </li>
+        </ul>
+        <p aria-level="2" class="caption" role="heading"><span class="caption-text">Preliminary Works</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="vae.html">VAE</a></li>
+<li class="toctree-l1"><a class="reference internal" href="gan.html">GAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DDPM.html">DDPM</a></li>
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DDIM.html">DDIM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="A_Study_on_the_Evaluation_of_Generative_Models.html">A Study on the Evaluation of Generative Models</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Image Generation</span></p>
+<ul class="current nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="cycleGAN.html">CycleGAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="StyleGAN.html">StyleGAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="diffusion_beats_GANs.html">Diffusion Models Beat GANs on Image Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="dalle.html">DALL-E</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DALLE2.html">DALL-E 2</a></li>
+<li class="toctree-l1"><a class="reference internal" href="dreambooth.html">DreamBooth</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ControlNet.html">ControlNet</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Latent_Diffusion_Model.html">Introduction</a></li>
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Textual_Inversion.html">Textual Inversion</a></li>
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="CustomDiffusion.html">Custom Diffusion</a></li>
+<li class="toctree-l1"><a class="reference internal" href="LoRA.html">LoRA</a></li>
+
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="I-DDPM.html">I-DDPM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="StyO.html">StyO</a></li>
+<li class="toctree-l1"><a class="reference internal" href="imagen.html">Imagen</a></li>
+<li class="toctree-l1"><a class="reference internal" href="imagen_editor.html">Imagen Editor</a></li>
+<li class="toctree-l1"><a class="reference internal" href="SDEdit.html">SDEdit</a></li>
+<li class="toctree-l1"><a class="reference internal" href="SDXL.html">SDXL</a></li>
+<li class="toctree-l1"><a class="reference internal" href="t2i_adapter.html">T2I-Adapter</a></li>
+<li class="toctree-l1"><a class="reference internal" href="IP_Adapter.html">IP-Adapter</a></li>
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="HyperDreamBooth.html">HyperDreamBooth</a></li>
+<li class="toctree-l1"><a class="reference internal" href="CM3leon.html">CM3leon</a></li>
+
+<li class="toctree-l1"><a class="reference internal" href="Synthetic_Data_from_Diffusion_Models_Improves_ImageNet_Classification.html">Synthetic Data from Diffusion Models Improves ImageNet Classification</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="GLIDE.html">GLIDE</a></li>
+<li class="toctree-l1"><a class="reference internal" href="BBDM.html">BBDM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Your_Diffusion_Model_is_Secretly_a_Zero_Shot_Classifier.html">Your Diffusion Model is Secretly a Zero-Shot Classifier</a></li>
+<li class="toctree-l1"><a class="reference internal" href="progressive_distillation.html">Progressive Distillation for Fast Sampling of Diffusion Models</a></li>
+<li class="toctree-l1 current active"><a class="current reference internal" href="#">ConceptLab</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Diffusion_models_already_have_a_Semantic_Latent_Space.html">Diffusion Models already have a Semantic Latent Space</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Muse.html">Muse</a></li>
+
+
+<li class="toctree-l1"><a class="reference internal" href="GIGAGAN.html">Scaling up GANs for Text-to-Image Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="consistency_models.html">Consistency Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="latent_consistency_models.html">Latent Consistency Models</a></li>
+
+<li class="toctree-l1"><a class="reference internal" href="LLM_grounded_Diffusion.html">LLM Grounded Diffusion</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DiT.html">DiT</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="one-step-image-translation.html">One-Step Image Translation with Text-to-Image Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="LCM-LoRA.html">LCM-LoRA: A Universal Stable-Diffusion Acceleration Module</a></li>
+
+
+<li class="toctree-l1"><a class="reference internal" href="MimicBrush.html">MimicBrush: Zero-shot Image Editing with Reference Imitation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="one_step_diffusion_with_distribution_matching_distillation.html">One-step Diffusion with Distribution Matching Distillation</a></li>
+
+
+
+
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Video Generation</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="Make_A_Video.html">Make A Video</a></li>
+<li class="toctree-l1"><a class="reference internal" href="VideoLDM.html">VideoLDM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="AnimateDiff.html">AnimateDiff</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Animate_Anyone.html">Animate Anyone</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreaMoving.html">DreaMoving</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreamPose.html">DreamPose: Fashion Image-to-Video Synthesis via Stable Diffusion</a></li>
+
+
+
+
+
+
+
+
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">3D Generation</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="NeRF.html">NeRF : Representing Scenes as Neural Radiance Fields for View Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="3DGS.html">3D Gaussian Splatting for Real-Time Radiance Field Rendering</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Point_E.html">Point-E: A System for Generating 3D Point Clouds from Complex Prompts (Arxiv 2022)</a></li>
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Shap-E.html">Shap-E</a></li>
+
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DreamFusion.html"><strong>DreamFusion</strong></a></li>
+<li class="toctree-l1"><a class="reference internal" href="magic-3d.html">Magic3D</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreamBooth3D.html">Dream Booth 3D</a></li>
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="zero123plus.html">Zero123++</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ProlificDreamer.html">ProlificDreamer: High-Fidelity and Diverse Text-to-3D Generation with Variational Score Distillation</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DreamGaussian.html">DreamGaussian</a></li>
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Coin3D.html">Coin3D</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Experiments</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="../experiments/js_exp.html">Synthetic Data with Stable Diffusion for Foliar Disease Classification</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../experiments/swjo_exp.html">Training DreamBooth on Naver Webtoon Face Dataset</a></li>
+</ul>
+
+    </div>
+</nav></div>
+    </div>
+  
+  
+  <div class="sidebar-primary-items__end sidebar-primary__section">
+  </div>
+  
+  <div id="rtd-footer-container"></div>
+
+
+      </div>
+      
+      <main id="main-content" class="bd-main" role="main">
+        
+        
+
+<div class="sbt-scroll-pixel-helper"></div>
+
+          <div class="bd-content">
+            <div class="bd-article-container">
+              
+              <div class="bd-header-article d-print-none">
+<div class="header-article-items header-article__inner">
+  
+    <div class="header-article-items__start">
+      
+        <div class="header-article-item"><label class="sidebar-toggle primary-toggle btn btn-sm" for="__primary" title="Toggle primary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
+  <span class="fa-solid fa-bars"></span>
+</label></div>
+      
+    </div>
+  
+  
+    <div class="header-article-items__end">
+      
+        <div class="header-article-item">
+
+<div class="article-header-buttons">
+
+
+
+
+
+<div class="dropdown dropdown-source-buttons">
+  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Source repositories">
+    <i class="fab fa-github"></i>
+  </button>
+  <ul class="dropdown-menu">
+      
+      
+      
+      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion" target="_blank"
+   class="btn btn-sm btn-source-repository-button dropdown-item"
+   title="Source repository"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fab fa-github"></i>
+  </span>
+<span class="btn__text-container">Repository</span>
+</a>
+</li>
+      
+      
+      
+      
+      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion/issues/new?title=Issue%20on%20page%20%2Fdocs/review/ConceptLab.html&body=Your%20issue%20content%20here." target="_blank"
+   class="btn btn-sm btn-source-issues-button dropdown-item"
+   title="Open an issue"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-lightbulb"></i>
+  </span>
+<span class="btn__text-container">Open issue</span>
+</a>
+</li>
+      
+  </ul>
+</div>
+
+
+
+
+
+
+<div class="dropdown dropdown-download-buttons">
+  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Download this page">
+    <i class="fas fa-download"></i>
+  </button>
+  <ul class="dropdown-menu">
+      
+      
+      
+      <li><a href="../../_sources/docs/review/ConceptLab.md" target="_blank"
+   class="btn btn-sm btn-download-source-button dropdown-item"
+   title="Download source file"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-file"></i>
+  </span>
+<span class="btn__text-container">.md</span>
+</a>
+</li>
+      
+      
+      
+      
+      <li>
+<button onclick="window.print()"
+  class="btn btn-sm btn-download-pdf-button dropdown-item"
+  title="Print to PDF"
+  data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-file-pdf"></i>
+  </span>
+<span class="btn__text-container">.pdf</span>
+</button>
+</li>
+      
+  </ul>
+</div>
+
+
+
+
+<button onclick="toggleFullScreen()"
+  class="btn btn-sm btn-fullscreen-button"
+  title="Fullscreen mode"
+  data-bs-placement="bottom" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-expand"></i>
+  </span>
+
+</button>
+
+
+
+<script>
+document.write(`
+  <button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light"></i>
+    <i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark"></i>
+    <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"></i>
+  </button>
+`);
+</script>
+
+
+<script>
+document.write(`
+  <button class="btn btn-sm pst-navbar-icon search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <i class="fa-solid fa-magnifying-glass fa-lg"></i>
+  </button>
+`);
+</script>
+<label class="sidebar-toggle secondary-toggle btn btn-sm" for="__secondary"title="Toggle secondary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <span class="fa-solid fa-list"></span>
+</label>
+</div></div>
+      
+    </div>
+  
+</div>
+</div>
+              
+              
+
+<div id="jb-print-docs-body" class="onlyprint">
+    <h1>ConceptLab</h1>
+    <!-- Table of contents -->
+    <div id="print-main-content">
+        <div id="jb-print-toc">
+            
+            <div>
+                <h2> Contents </h2>
+            </div>
+            <nav aria-label="Page">
+                <ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">Introduction</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#related-work">Related Work</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#prelimiaries">Prelimiaries</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#method">Method</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#the-constraints">The Constraints</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#the-objective">The Objective</a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#regularization">Regularization</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#adaptive-negatives">Adaptive Negatives</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#evolutionary-generation">Evolutionary Generation</a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#experiments">Experiments</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#result">Result</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#creative-generation">Creative Generation</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#concept-mixing">Concept Mixing</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#comparisons">Comparisons</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#evaluation-setup">Evaluation Setup</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#qualitative-comparisons">Qualitative Comparisons</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#quantitative-comparisons">Quantitative Comparisons</a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#limitations">Limitations</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#conclusion">Conclusion</a></li>
+</ul>
+            </nav>
+        </div>
+    </div>
+</div>
+
+              
+                
+<div id="searchbox"></div>
+                <article class="bd-article">
+                  
+  <div class="admonition-information admonition">
+<p class="admonition-title">Information</p>
+<ul class="simple">
+<li><p><strong>Title:</strong> ConceptLab: Creative Generation using Diffusion Prior Constraints</p></li>
+<li><p><strong>Reference</strong></p>
+<ul>
+<li><p>Paper: <a class="reference external" href="https://arxiv.org/pdf/2307.06949.pdf">https://arxiv.org/pdf/2307.06949.pdf</a></p></li>
+<li><p>Code: <a class="reference external" href="https://github.com/kfirgoldberg/ConceptLab">Official</a></p></li>
+<li><p>Site: <a class="reference external" href="https://kfirgoldberg.github.io/ConceptLab/">Official</a></p></li>
+</ul>
+</li>
+<li><p><strong>Author:</strong> Hyoungseo Cho</p></li>
+<li><p><strong>Last updated on Nov. 20, 2023</strong></p></li>
+</ul>
+</div>
+<section class="tex2jax_ignore mathjax_ignore" id="conceptlab">
+<h1>ConceptLab<a class="headerlink" href="#conceptlab" title="Permalink to this heading">#</a></h1>
+<section id="introduction">
+<h2>Introduction<a class="headerlink" href="#introduction" title="Permalink to this heading">#</a></h2>
+<p>본 논문에서는 Creative Generation의 일환으로, 새롭고 창의적인 개념을 생성하는 내용을 다룹니다. 최근 text-to-image 생성 기술과 Personalization 기술이 크게 발전함에 따라 이미지 생성 뿐만 아니라 개인화된 개념을 생설할 수 있게 되었습니다. 이러한 강력한 모델을 사용하여 모델에 명시적으로 설명되지 않은 새로운 창의적 개념을 생성할 수 있을까요?</p>
+<figure class="align-default" id="id1">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/ConceptLab01.png"><img alt="ConceptLab01" class="bg-primary mb-1" src="../../_images/ConceptLab01.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 419 </span><span class="caption-text">ConceptLab</span><a class="headerlink" href="#id1" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</section>
+<section id="related-work">
+<h2>Related Work<a class="headerlink" href="#related-work" title="Permalink to this heading">#</a></h2>
+<p><strong>Text-Guided Sysnthesis</strong> <br/>
+대부분의 text-guided 생성 기술은 pretrain 된 텍스트 인코더에서 추출한 임베딩을 diffusion 모델에 직접 conditioning합니다. 즉, 텍스트 데이터를 처리하여 이미지 생성 과정에 통합하는 방식입니다. 본 논문에서는 Latent Diffusion Model과 Diffusion prior model을 활용해서 creative generation에서의 이점을 보입니다.</p>
+<p><strong>Diffusion Prior</strong> <br/>
+Diffusion Prior 모델은 입력된 텍스트 임베딩을 CLIP의 latent space에서 해당하는 이미지 임베딩으로 매핑합니다. 이후 디코더는 CLIP의 이미지 임베딩에 condition이 부여된 이미지를 생성하기 위해 훈련됩니다.</p>
+<p><strong>Personalization</strong> <br/>
+Personalization은 text-guided synthesis 맥락에서 사용자가 입력한 텍스트 프롬프트에 맞는 주제나 스타일을 표현하는 새로운 이미지를 생성하는 것을 목표로 합니다. 일반적으로 새로운 개념을 학습시키기 위해 임베딩을 최적화하거나 denoising 네트워크를 finetuning 하는 방법을 활용합니다. 하지만 본 연구에서는 Creative Generation에 초첨을 맞추고 새로운 개념을 생성하고 기발한 장면을 생성하는 것을 목표로 합니다.</p>
+<p><strong>Creative Generation</strong> <br/>
+창의적 내용을 생성하는 것은 다양한 접근 방법이 있습니다. Xu et al 에서는 set-evolution 방법을 활용해 3D 형태의 모델링을 제안했습니다. Elgammal et al 에서는 GAN의 맥락에서 창의적 생성을 탐구하며, 기존 스타일에서의 편차를 극대화하는 방식으로 새로운 스타일을 학습했습니다. Sbai et al 에서는 새로운 손실 함수를 도립했습니다. 본 연구에서는 주어진 카테고리와 일치하도록 최적화하면서도 그 카테고리의 기존 개념들과 다른 새로운 개념을 찾는 방식으로 창의적 생성에 접근했습니다. 본 방법을 통해 새로운 개념들은 서로 혼합될 수 있으며 더 유연한 생성 과정을 갖게됩니다.</p>
+<figure class="align-default" id="id2">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/ConceptLab02.png"><img alt="ConceptLab02" class="bg-primary mb-1" src="../../_images/ConceptLab02.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 420 </span><span class="caption-text">Text-guided generation (top left), personalization methods (bottom left), creative generation method (right)</span><a class="headerlink" href="#id2" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</section>
+<section id="prelimiaries">
+<h2>Prelimiaries<a class="headerlink" href="#prelimiaries" title="Permalink to this heading">#</a></h2>
+<p><strong>Latent Diffusion Models</strong> <br/>
+Latent Diffusion Model에서는 오토인코더의 latent space 내에서 diffusion 과정이 진행됩니다. 먼저, 인고더 <span class="math notranslate nohighlight">\(E\)</span>는 주어진 이미지 <span class="math notranslate nohighlight">\(x\)</span>를 latent code <span class="math notranslate nohighlight">\(z\)</span>로 매핑하는 것을 목표로 합니다. 이때, z=E(x)가 됩니다. 동시에 디코더 D는 원본 입력 이미지를 재구성하도록 합니다. DDPM의 경우 아래 주어진 손실을 최소화하도록 학습합니다.</p>
+<div class="math notranslate nohighlight">
+\[
+L = E_{z,y,\epsilon,t} [||\epsilon - \epsilon_{\theta}(z_{t}, t, c)||_{2}^{2}]
+\]</div>
+<p>denoising network <span class="math notranslate nohighlight">\(\epsilon \theta\)</span> 는 잠재 코드 <span class="math notranslate nohighlight">\(zt\)</span>에 추가된 잡음 <span class="math notranslate nohighlight">\(\epsilon\)</span>을 제거합니다. 이 과정에서 현재 시간 단계 t와 조건 벡터 c도 고려됩니다.</p>
+<p><strong>Diffusion Prior</strong> <br/>
+일반적으로 Diffusion model은 CLIP 텍스트 인코딩에서 직접 파생된 조건 벡터 <span class="math notranslate nohighlight">\(c\)</span>를 활용하여 주어진 텍스트 프롬프트 <span class="math notranslate nohighlight">\(y\)</span>에 대해 훈련됩니다. <span class="math notranslate nohighlight">\(Ramesh et al\)</span>에서 text-to-image 생성 문제를 2가지 단계로 decompose 합니다. 먼저, Diffusion Prior 모델을 활용하여 주어진 텍스트 프롬프트로부터 이미지 임베딩을 예측합니다. 다음으로, 이 이미지 임베딩에 조건을 부여하여 이미지를 생성하는 diffusion decoder로 보내집니다. 훈련 또한 일반적으로 두 독립적인 단계로 이루어집니다.</p>
+<div class="math notranslate nohighlight">
+\[
+L_{prior} = E_{e,y,t} [||e - P_{\theta]}(e_{t},t,y)||_{2}^{2}]
+\]</div>
+<p>Diffusion 디코더는 이미지 임베딩을 조건 <span class="math notranslate nohighlight">\(c\)</span>와 위 Latent Diffusion Model에 정의된 손실을 활용하여 훈련됩니다. 그 다음 diffusion prior model <span class="math notranslate nohighlight">\(P\theta\)</span>는 임베딩 <span class="math notranslate nohighlight">\(e_{t}\)</span>로부터 denoise 된 이미지 임베딩 <span class="math notranslate nohighlight">\(e\)</span>를 직접 예측합니다. 이 두 단계 접근법은 이미지 다양성을 향상시키며 중간 CLIP 이미지 임베딩에 직접 접근하고 해당 공간에서 직접 제약을 할 수 있게 합니다.</p>
+<figure class="align-default" id="id3">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/ConceptLab03.png"><img alt="ConceptLab03" class="bg-primary mb-1" src="../../_images/ConceptLab03.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 421 </span><span class="caption-text">ConceptLab</span><a class="headerlink" href="#id3" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</section>
+<section id="method">
+<h2>Method<a class="headerlink" href="#method" title="Permalink to this heading">#</a></h2>
+<p>ConceptLab은 생성하고자 하는 새로운 개념을 대표하는 단일 임베딩 <span class="math notranslate nohighlight">\(v_{*}\)</span>를 최적화합니다. 이후 주어진 카테고리에 유사하면서도 기존 멤버들과 다른 특성을 가지도록 손실 집합을 계산합니다. 훈련하는 동안, 현재 생성된 새로운 개념을 바탕으로 negative contraints를 더하기 위해 pretrained BLIP-2 VQA 모델을 활용합니다.</p>
+<section id="the-constraints">
+<h3>The Constraints<a class="headerlink" href="#the-constraints" title="Permalink to this heading">#</a></h3>
+<p>본 연구에서는 긍정적 제약 <span class="math notranslate nohighlight">\(C_{pos}\)</span>와 부정적 제약 <span class="math notranslate nohighlight">\(C_{neg}\)</span> 두 가지를 활용합니다. 각 제약 조건은 텍스트 토큰을 활용하여 정의됩니다.</p>
+</section>
+<section id="the-objective">
+<h3>The Objective<a class="headerlink" href="#the-objective" title="Permalink to this heading">#</a></h3>
+<p>본 연구에서는 두가지 제약 조건을 바탕으로 하여 새로운 개념을 대표하는 임베딩 <span class="math notranslate nohighlight">\(v_{*}\)</span>와 각 제약 조건 간의 유사도를 측정합니다.
+우선, <span class="math notranslate nohighlight">\(v_{*}\)</span>와 각 제약 단어 <span class="math notranslate nohighlight">\(c\)</span>를 동일한 무작위 샘플링된 프롬프트 y에 통합합니다. 각 문장은 CLIP 텍스트 임베딩으로 인코딩되며, 이것이 텍스트 제약 조건을 정의합니다. 텍스트 프롬프트를 diffusion prior 모델에 통과시키면, 프롬프트의 특정 인스턴스가 생성됩니다. 이러한 방식으로 <span class="math notranslate nohighlight">\(E_{y}(v_{*}\)</span>가 diffusion prior를 통과하면 모든 <span class="math notranslate nohighlight">\(v_{*}\)</span>가 텍스트 제약 조건과 일치하도록 일관된 생성을 얻을 수 있습니다. 반면, 긍정 및 부정 제약 조건은 가능한 광범위하게 유지하고자 diffusion prior를 통과하지 않습니다. 이에 따라 본 연구에서의 손실 함수는 다음과 같이 정의됩니다:</p>
+<div class="math notranslate nohighlight">
+\[
+S(C,v_{*}) = E_{c \sim C}[\langle E_{y}(c), P(E_{y}(v_{*}))\rangle]
+\]</div>
+<div class="math notranslate nohighlight">
+\[
+L = S(C_{neg}, v_{*}) + \lambda(1-S(C_{pos}, v_{*}))
+\]</div>
+<p>즉, 학습된 임베딩 v에서 생성된 샘플링된 이미지 임베딩 <span class="math notranslate nohighlight">\(P(E_{y}(v_{*}))\)</span>이 <span class="math notranslate nohighlight">\(C_{neg}\)</span>에 의해 정의된 텍스트 제약 조건에서 멀어지고 <span class="math notranslate nohighlight">\(C_{pos}\)</span>의 제약조건에 가까워지도록 합니다.</p>
+</section>
+</section>
+<section id="regularization">
+<h2>Regularization<a class="headerlink" href="#regularization" title="Permalink to this heading">#</a></h2>
+<p>정규화는 제약 조건 집합이 클 때 특정 멤버로의 collapsing을 방지하는 데 사용됩니다. 부정적 제약에 대한 최대 유사도를 측정하는 추가 손실 함수를 사용하는데 아래와 같이 정의됩니다:</p>
+<div class="math notranslate nohighlight">
+\[
+S_{max}(C,v_{*}) = max_{c \sim C}(\langle E_{y}, P(E_{y}(v_{*}))\rangle)
+\]</div>
+<p>이 유사도 측정 방식은 전체 손실 함수에 통합되며, <span class="math notranslate nohighlight">\(S(C,v_{*})\)</span>와 평균 냄으로써 <span class="math notranslate nohighlight">\(v_{*}\)</span>에 가장 가까운 제약 조건에 더 큰 패널티를 부여합니다.</p>
+<figure class="align-default" id="id4">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/ConceptLab04.png"><img alt="ConceptLab04" class="bg-primary mb-1" src="../../_images/ConceptLab04.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 422 </span><span class="caption-text">훈련 과정 중 BLIP-2 모델을 사용하여 현재 개념에 가장 가까운 단어를 추론하고, 이를 제약 조건에 추가하는 과정을 거칩니다.</span><a class="headerlink" href="#id4" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<section id="adaptive-negatives">
+<h3>Adaptive Negatives<a class="headerlink" href="#adaptive-negatives" title="Permalink to this heading">#</a></h3>
+<p>많은 부정적 제약 조건을 수동으로 적용하는 것은 힘들고, 광범위한 카테고리의 가장 관련성 높은 멤버들을 정확하게 대표하지 못할 수도 있습니다. 이를 해결하기 위해, 훈련 중 부정적 제약 조건 집합을 점진적으로 확장하는 adaptive scheme을 제안합니다. 생성된 이미지를 사전 훈련된 BLIP-2 VQA 모델에 질의하여 이미지에 현재 존재하는 카테고리의 멤버가 무엇인지 식별하도록 합니다. 이후 결과로 나온 인스턴스를 훈련의 나머지 부분에 대한 부정적 제약 조건에 추가합니다.</p>
+<figure class="align-default" id="id5">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/ConceptLab05.png"><img alt="ConceptLab05" class="bg-primary mb-1" src="../../_images/ConceptLab05.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 423 </span><span class="caption-text">여러 단계에 걸쳐 생성된 이미지 결과를 보여줍니다. 훈련 과정에서 부정적 제약 조건이 지속적으로 조정되고 확장되었음을 보여줍니다.</span><a class="headerlink" href="#id5" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</section>
+<section id="evolutionary-generation">
+<h3>Evolutionary Generation<a class="headerlink" href="#evolutionary-generation" title="Permalink to this heading">#</a></h3>
+<p>주어진 개념 셋에 대해 <em>개념을 혼합</em>하기 위해 먼저 각 개념에서 이미지를 생성하여 이미지 제약 조건 <span class="math notranslate nohighlight">\(C_{im}\)</span> 을 만듭니다. 각 이미지는 CLIP 이미지 인코더 <span class="math notranslate nohighlight">\(E_{im}(c)\)</span>를 통과하여 임베딩 세트를 생성합니다. 학습 가능한 개념 <span class="math notranslate nohighlight">\(v_{mix}\)</span>를 주어진 임베딩에 더 가깝게 만드는 수정된 손실 함수를 적용합니다.:</p>
+<div class="math notranslate nohighlight">
+\[
+L_{mix} = 1 - E_{c \sim C}[\langle E_{im}(c), P(E_{y}(v_{mix}))\rangle]
+\]</div>
+<p>이 손실 함수는 생성된 개념이나 실제 이미지에 적용될 수 있으며, 창의적인 생성물의 계층ㅇ적 생성을 위해 반복적으로 적용될 수 있습니다. 또, 생성된 결과물에 대한 각 개념의 영향을 더 잘 제어하기 위해 가중치 항목이 추가적으로 적용될 수 있습니다.</p>
+<figure class="align-default" id="id6">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/ConceptLab06.png"><img alt="ConceptLab06" class="bg-primary mb-1" src="../../_images/ConceptLab06.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 424 </span><span class="caption-text">그림에는 훈련에 사용된 긍정적 개념이 왼쪽에 표시되어 있습니다. 이는 모델이 어떤 개념을 기반으로 창의적 이미지를 생성했는지를 알 수 있습니다. 모든 결과는 Adaptive Negative 기법을 활용했습니다.</span><a class="headerlink" href="#id6" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<figure class="align-default" id="id7">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/ConceptLab07.png"><img alt="ConceptLab07" class="bg-primary mb-1" src="../../_images/ConceptLab07.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 425 </span><span class="caption-text">ConceptLab이 제안한 다양한 이미지로 프롬프트와 Adaptive Negative 기법을 적용했습니다.</span><a class="headerlink" href="#id7" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<figure class="align-default" id="id8">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/ConceptLab08.png"><img alt="ConceptLab08" class="bg-primary mb-1" src="../../_images/ConceptLab08.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 426 </span><span class="caption-text">ConceptLab은 생성된 개념들을 혼합하여 새롭고 독특한 창조물을 반복적으로 학습할 수 있습니다. 그림의 가장 윗줄에서는 Adaptive Negative 기법을 적용하여 학습된 개념들을 보여줍니다. 이어지는 줄에서는 Evolutionary Generation 과정을 통해 얻어진 개념들을 보여줍니다.</span><a class="headerlink" href="#id8" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</section>
+</section>
+<section id="experiments">
+<h2>Experiments<a class="headerlink" href="#experiments" title="Permalink to this heading">#</a></h2>
+<p>ConceptLab의 효과를 입증하기 위해 정성적 및 정량적 평가를 진행했습니다.</p>
+<section id="result">
+<h3>Result<a class="headerlink" href="#result" title="Permalink to this heading">#</a></h3>
+</section>
+<section id="creative-generation">
+<h3>Creative Generation<a class="headerlink" href="#creative-generation" title="Permalink to this heading">#</a></h3>
+<p>위 그림들에서 볼 수 있듯이 모든 결과는 Adaptive Negative를 적용하였고 훈련 시드를 달리하며 다양한 개념을 생성할 수 있는 능력이 있음을 볼 수 있습니다. 또, ConceptLab은 학습된 창의적 개념을 새로운 장면에 배치할 수 있습니다. 이 생성물들은 배경 변경, 스타일 변경, 새로운 창조등 다양하게 활용 가능합니다.</p>
+<figure class="align-default" id="id9">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/ConceptLab09.png"><img alt="ConceptLab09" class="bg-primary mb-1" src="../../_images/ConceptLab09.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 427 </span><span class="caption-text">ConceptLab을 활용한 Concept Mixing의 결과를 보여줍니다.</span><a class="headerlink" href="#id9" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</section>
+<section id="concept-mixing">
+<h3>Concept Mixing<a class="headerlink" href="#concept-mixing" title="Permalink to this heading">#</a></h3>
+<p>Concept Mixing은 다양한 실제 개념들의 독특한 특성을 합쳐 하이브리드 개념을 형성하는 방법을 보여줍니다. 이 방법은 오직 긍정적 제약 조건만을 활용합니다. 예를 들어, 첫 번째 줄에는 랍스터의 주요 특징(생상과 집게발)을 거북이의 특징(등껍질)과 융합하는 것을 볼 수 있습니다.</p>
+<figure class="align-default" id="id10">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/ConceptLab10.png"><img alt="ConceptLab10" class="bg-primary mb-1" src="../../_images/ConceptLab10.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 428 </span><span class="caption-text">위 그림은 ConceptLab에 의해 학습된 개념들이 여러 <em>세대</em>에 걸쳐 어떻게 발전하는지 보여줍니다.</span><a class="headerlink" href="#id10" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</section>
+<section id="comparisons">
+<h3>Comparisons<a class="headerlink" href="#comparisons" title="Permalink to this heading">#</a></h3>
+</section>
+<section id="evaluation-setup">
+<h3>Evaluation Setup<a class="headerlink" href="#evaluation-setup" title="Permalink to this heading">#</a></h3>
+<p>ConceptLab은 Stable Diffusion2와 Kandinsky 2.1 두 모델과 함께 평가했습니다. Kandinsky의 경우, 더 유리한 결과를 위해 부정적 프롬프트는 Latent Diffusion Model이 아닌 Diffusion Prior Model에 적용했습니다.</p>
+</section>
+<section id="qualitative-comparisons">
+<h3>Qualitative Comparisons<a class="headerlink" href="#qualitative-comparisons" title="Permalink to this heading">#</a></h3>
+<p>ConceptLab은 긍정적 토근과 부정적 제약 조건 모두에 일관되게 맞춰질 수 있습니다. 즉, ConceptLab은 다중 제약 조건을 효과적으로 처리하고, 특정 개념에 대한 일관된 표현을 학습할 수 있는 능력을 갖추고 있습니다.</p>
+</section>
+<section id="quantitative-comparisons">
+<h3>Quantitative Comparisons<a class="headerlink" href="#quantitative-comparisons" title="Permalink to this heading">#</a></h3>
+<p>정량적 평가를 위해 각 방법이 긍정적 개념을 포함하며, 주어진 부정적 개념과 닮지 않은 이미지를 생성하는 능력을 측정했습니다. 평가에는 애완동물, 식물, 과일, 가구, 악기의 5가지 카테고리를 활용했습니다. 각 도메인에 세 가지 다른 부정적 개념 쌍을 고려하고, 각 조합에 대해 ConceptLab을 5개의 랜덤 시드로 훈련하여 총 75개의 학습된 개념을 얻었습니다. 각 학습된 개념에 대해 “A photo of a <span class="math notranslate nohighlight">\(S_{*}\)</span> 프롬프트를 활용하여 32개의 이미지를 생성했습니다. Stable Diffusionr과 kandinsky 모델에서는 부정적 프롬프트를 사용하고, 같은 긍정적 및 부정적 개념 쌍에 대해 160개의 이미지를 생성합니다. 측정 기준으로는 먼저 각 개념의 긍정적 유사성을 타겟 카테고리와의 CLIP 공간 유사성 계산을 통해 특정됩니다. 다음으로는 긍정적 제약과 부정적 제약 사이의 거리를 측정합니다. 이는 생성된 이미지와 모든 부정적 개념 사이의 최대 유사성 계산을 통해 이루어집니다. 결과적으로 ConceptLab은 5가지 모든 도메인에서 긍정적 CLIP 유사성에서 일관되게 우월한 성능을 보였고 타겟 카테고리에 속하는 이미지를 신뢰성 있게 생성했습니다. 또한, 부정적 거리 측정에서 ConceptLab은 모든 카테고리에서 Stable Diffusion을, 4가지 카테고리에서 Kandinsky를 능가했습니다.</p>
+<figure class="align-default" id="id11">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/ConceptLab11.png"><img alt="ConceptLab11" class="bg-primary mb-1" src="../../_images/ConceptLab11.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 429 </span><span class="caption-text">User Study</span><a class="headerlink" href="#id11" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</section>
+</section>
+<section id="limitations">
+<h2>Limitations<a class="headerlink" href="#limitations" title="Permalink to this heading">#</a></h2>
+<p>Personalization과 유사하게, 학습된 개념을 포함하는 프롬프트를 사용하여 새로운 이미지를 생성하는 것이 항상 개념의 특성을 다양한 프롬프트에 걸쳐 유지하지는 못합니다. 또, 최적화 과정 자체가 항상 원하는 결과를 가져오지는 않습니다. “비행기”나 “물고기”와 같은 일부 클래스의 경우 ConceptLab은 창의적 개념을 생성하는데 여전히 어려움이 있습니다. 이는 BLIP-2에 의해 생성되는 부정적 제약과 관련이 있습니다.</p>
+<figure class="align-default" id="id12">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/ConceptLab12.png"><img alt="ConceptLab12" class="bg-primary mb-1" src="../../_images/ConceptLab12.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 430 </span><span class="caption-text">Limitations</span><a class="headerlink" href="#id12" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</section>
+<section id="conclusion">
+<h2>Conclusion<a class="headerlink" href="#conclusion" title="Permalink to this heading">#</a></h2>
+<p>본 논문에서는 text-to-image diffusion model을 활용하여 창의적 생성을 위한 새로운 접근 방법을 소개했습니다. 주어진 광범위한 카테고리에 속하는 새로운 개념을 학습하기 위해 Diffusion Prior 모델 사용을 제안했습니다. 또, Prior Constraints라는 긍정적 및 부정적 제약 조건들을 diffusion prior 출력에 적용했습니다. 최적화 과정에서는 VQA 모델을 활용하여 독특하면서도 기존 멤버들과의 명확한 구별을 보장했습니다. 이후 실험을 통해 본 방법의 효과성을 입증했으며 시각적으로 다양하고 매력적인 개념을 생성할 수 있었습니다.</p>
+</section>
+</section>
+
+    <script type="text/x-thebe-config">
+    {
+        requestKernel: true,
+        binderOptions: {
+            repo: "binder-examples/jupyter-stacks-datascience",
+            ref: "master",
+        },
+        codeMirrorConfig: {
+            theme: "abcdef",
+            mode: "python"
+        },
+        kernelOptions: {
+            name: "python3",
+            path: "./docs\review"
+        },
+        predefinedOutput: true
+    }
+    </script>
+    <script>kernelName = 'python3'</script>
+
+                </article>
+              
+
+              
+              
+              
+              
+                <footer class="prev-next-footer d-print-none">
+                  
+<div class="prev-next-area">
+    <a class="left-prev"
+       href="progressive_distillation.html"
+       title="previous page">
+      <i class="fa-solid fa-angle-left"></i>
+      <div class="prev-next-info">
+        <p class="prev-next-subtitle">previous</p>
+        <p class="prev-next-title">Progressive Distillation for Fast Sampling of Diffusion Models</p>
+      </div>
+    </a>
+    <a class="right-next"
+       href="Diffusion_models_already_have_a_Semantic_Latent_Space.html"
+       title="next page">
+      <div class="prev-next-info">
+        <p class="prev-next-subtitle">next</p>
+        <p class="prev-next-title">Diffusion Models already have a Semantic Latent Space</p>
+      </div>
+      <i class="fa-solid fa-angle-right"></i>
+    </a>
+</div>
+                </footer>
+              
+            </div>
+            
+            
+              
+                <div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
+
+
+  <div class="sidebar-secondary-item">
+  <div class="page-toc tocsection onthispage">
+    <i class="fa-solid fa-list"></i> Contents
+  </div>
+  <nav class="bd-toc-nav page-toc">
+    <ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">Introduction</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#related-work">Related Work</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#prelimiaries">Prelimiaries</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#method">Method</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#the-constraints">The Constraints</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#the-objective">The Objective</a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#regularization">Regularization</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#adaptive-negatives">Adaptive Negatives</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#evolutionary-generation">Evolutionary Generation</a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#experiments">Experiments</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#result">Result</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#creative-generation">Creative Generation</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#concept-mixing">Concept Mixing</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#comparisons">Comparisons</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#evaluation-setup">Evaluation Setup</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#qualitative-comparisons">Qualitative Comparisons</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#quantitative-comparisons">Quantitative Comparisons</a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#limitations">Limitations</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#conclusion">Conclusion</a></li>
+</ul>
+  </nav></div>
+
+</div></div>
+              
+            
+          </div>
+          <footer class="bd-footer-content">
+            
+<div class="bd-footer-content__inner container">
+  
+  <div class="footer-item">
+    
+<p class="component-author">
+By PseudoLab
+</p>
+
+  </div>
+  
+  <div class="footer-item">
+    
+
+  <p class="copyright">
+    
+      © Copyright 2022.
+      <br/>
+    
+  </p>
+
+  </div>
+  
+  <div class="footer-item">
+    
+  </div>
+  
+  <div class="footer-item">
+    
+  </div>
+  
+</div>
+          </footer>
+        
+
+      </main>
+    </div>
+  </div>
+  
+  <!-- Scripts loaded after <body> so the DOM is not blocked -->
+  <script src="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b"></script>
+<script src="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b"></script>
+
+  <footer class="bd-footer">
+  </footer>
+  </body>
 </html>
\ No newline at end of file
diff --git a/docs/review/ControlNet.html b/docs/review/ControlNet.html
old mode 100644
new mode 100755
index f38caeb3..73f8cd5d
--- a/docs/review/ControlNet.html
+++ b/docs/review/ControlNet.html
@@ -1,846 +1,866 @@
-
-
-<!DOCTYPE html>
-
-
-<html lang="en" data-content_root="" >
-
-  <head>
-    <meta charset="utf-8" />
-    <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.18.1: http://docutils.sourceforge.net/" />
-
-    <title>ControlNet &#8212; Text-to-Image Generation-feat-Diffusion</title>
-  
-  
-  
-  <script data-cfasync="false">
-    document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
-    document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
-  </script>
-  
-  <!-- Loaded before other Sphinx assets -->
-  <link href="../../_static/styles/theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-<link href="../../_static/styles/bootstrap.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-<link href="../../_static/styles/pydata-sphinx-theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-
-  
-  <link href="../../_static/vendor/fontawesome/6.5.2/css/all.min.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-  <link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.woff2" />
-<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.woff2" />
-<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.woff2" />
-
-    <link rel="stylesheet" type="text/css" href="../../_static/pygments.css" />
-    <link rel="stylesheet" href="../../_static/styles/sphinx-book-theme.css?digest=14f4ca6b54d191a8c7657f6c759bf11a5fb86285" type="text/css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/togglebutton.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/mystnb.4510f1fc1dee50b3e5859aac5469c37c29e427902b24a333a5f9fcb2f0b3ac41.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/sphinx-thebe.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/design-style.4045f2051d55cab465a707391d5b2007.min.css" />
-  
-  <!-- Pre-loaded scripts that we'll load fully later -->
-  <link rel="preload" as="script" href="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b" />
-<link rel="preload" as="script" href="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b" />
-  <script src="../../_static/vendor/fontawesome/6.5.2/js/all.min.js?digest=dfe6caa3a7d634c4db9b"></script>
-
-    <script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
-    <script src="../../_static/jquery.js"></script>
-    <script src="../../_static/underscore.js"></script>
-    <script src="../../_static/_sphinx_javascript_frameworks_compat.js"></script>
-    <script src="../../_static/doctools.js"></script>
-    <script src="../../_static/clipboard.min.js"></script>
-    <script src="../../_static/copybutton.js"></script>
-    <script src="../../_static/scripts/sphinx-book-theme.js?digest=5a5c038af52cf7bc1a1ec88eea08e6366ee68824"></script>
-    <script>let toggleHintShow = 'Click to show';</script>
-    <script>let toggleHintHide = 'Click to hide';</script>
-    <script>let toggleOpenOnPrint = 'true';</script>
-    <script src="../../_static/togglebutton.js"></script>
-    <script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
-    <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
-    <script src="../../_static/design-tabs.js"></script>
-    <script>const THEBE_JS_URL = "https://unpkg.com/thebe@0.8.2/lib/index.js"
-const thebe_selector = ".thebe,.cell"
-const thebe_selector_input = "pre"
-const thebe_selector_output = ".output, .cell_output"
-</script>
-    <script async="async" src="../../_static/sphinx-thebe.js"></script>
-    <script>window.MathJax = {"options": {"processHtmlClass": "tex2jax_process|mathjax_process|math|output_area"}}</script>
-    <script defer="defer" src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
-    <script>DOCUMENTATION_OPTIONS.pagename = 'docs/review/ControlNet';</script>
-    <link rel="shortcut icon" href="../../_static/PseudoLab_logo.png"/>
-    <link rel="index" title="Index" href="../../genindex.html" />
-    <link rel="search" title="Search" href="../../search.html" />
-    <link rel="next" title="Introduction" href="Latent_Diffusion_Model.html" />
-    <link rel="prev" title="DreamBooth" href="dreambooth.html" />
-  <meta name="viewport" content="width=device-width, initial-scale=1"/>
-  <meta name="docsearch:language" content="en"/>
-  </head>
-  
-  
-  <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
-
-  
-  
-  <div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
-  
-  <div id="pst-scroll-pixel-helper"></div>
-  
-  <button type="button" class="btn rounded-pill" id="pst-back-to-top">
-    <i class="fa-solid fa-arrow-up"></i>Back to top</button>
-
-  
-  <input type="checkbox"
-          class="sidebar-toggle"
-          id="pst-primary-sidebar-checkbox"/>
-  <label class="overlay overlay-primary" for="pst-primary-sidebar-checkbox"></label>
-  
-  <input type="checkbox"
-          class="sidebar-toggle"
-          id="pst-secondary-sidebar-checkbox"/>
-  <label class="overlay overlay-secondary" for="pst-secondary-sidebar-checkbox"></label>
-  
-  <div class="search-button__wrapper">
-    <div class="search-button__overlay"></div>
-    <div class="search-button__search-container">
-<form class="bd-search d-flex align-items-center"
-      action="../../search.html"
-      method="get">
-  <i class="fa-solid fa-magnifying-glass"></i>
-  <input type="search"
-         class="form-control"
-         name="q"
-         id="search-input"
-         placeholder="Search this book..."
-         aria-label="Search this book..."
-         autocomplete="off"
-         autocorrect="off"
-         autocapitalize="off"
-         spellcheck="false"/>
-  <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
-</form></div>
-  </div>
-
-  <div class="pst-async-banner-revealer d-none">
-  <aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
-</div>
-
-  
-    <header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
-    </header>
-  
-
-  <div class="bd-container">
-    <div class="bd-container__inner bd-page-width">
-      
-      
-      
-      <div class="bd-sidebar-primary bd-sidebar">
-        
-
-  
-  <div class="sidebar-header-items sidebar-primary__section">
-    
-    
-    
-    
-  </div>
-  
-    <div class="sidebar-primary-items__start sidebar-primary__section">
-        <div class="sidebar-primary-item">
-
-  
-    
-  
-
-<a class="navbar-brand logo" href="../../intro.html">
-  
-  
-  
-  
-  
-    
-    
-      
-    
-    
-    <img src="../../_static/PseudoLab_logo.png" class="logo__image only-light" alt="Text-to-Image Generation-feat-Diffusion - Home"/>
-    <script>document.write(`<img src="../../_static/PseudoLab_logo.png" class="logo__image only-dark" alt="Text-to-Image Generation-feat-Diffusion - Home"/>`);</script>
-  
-  
-</a></div>
-        <div class="sidebar-primary-item"><nav class="bd-links" id="bd-docs-nav" aria-label="Main">
-    <div class="bd-toc-item navbar-nav active">
-        
-        <ul class="nav bd-sidenav bd-sidenav__home-link">
-            <li class="toctree-l1">
-                <a class="reference internal" href="../../intro.html">
-                    Welcome to PseudoDiffusers!!
-                </a>
-            </li>
-        </ul>
-        <p aria-level="2" class="caption" role="heading"><span class="caption-text">Preliminary Works</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="vae.html">VAE</a></li>
-<li class="toctree-l1"><a class="reference internal" href="gan.html">GAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DDPM.html">DDPM</a></li>
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="DDIM.html">DDIM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="A_Study_on_the_Evaluation_of_Generative_Models.html">A Study on the Evaluation of Generative Models</a></li>
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Image Generation</span></p>
-<ul class="current nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="cycleGAN.html">CycleGAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="StyleGAN.html">StyleGAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="diffusion_beats_GANs.html">Diffusion Models Beat GANs on Image Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="dalle.html">DALL-E</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DALLE2.html">DALL-E 2</a></li>
-<li class="toctree-l1"><a class="reference internal" href="dreambooth.html">DreamBooth</a></li>
-<li class="toctree-l1 current active"><a class="current reference internal" href="#">ControlNet</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Latent_Diffusion_Model.html">Introduction</a></li>
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="Textual_Inversion.html">Textual Inversion</a></li>
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="CustomDiffusion.html">Custom Diffusion</a></li>
-<li class="toctree-l1"><a class="reference internal" href="LoRA.html">LoRA</a></li>
-
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="I-DDPM.html">I-DDPM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="StyO.html">StyO</a></li>
-<li class="toctree-l1"><a class="reference internal" href="imagen.html">Imagen</a></li>
-<li class="toctree-l1"><a class="reference internal" href="imagen_editor.html">Imagen Editor</a></li>
-<li class="toctree-l1"><a class="reference internal" href="SDEdit.html">SDEdit</a></li>
-<li class="toctree-l1"><a class="reference internal" href="SDXL.html">SDXL</a></li>
-<li class="toctree-l1"><a class="reference internal" href="t2i_adapter.html">T2I-Adapter</a></li>
-<li class="toctree-l1"><a class="reference internal" href="IP_Adapter.html">IP-Adapter</a></li>
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="HyperDreamBooth.html">HyperDreamBooth</a></li>
-<li class="toctree-l1"><a class="reference internal" href="CM3leon.html">CM3leon</a></li>
-
-<li class="toctree-l1"><a class="reference internal" href="Synthetic_Data_from_Diffusion_Models_Improves_ImageNet_Classification.html">Synthetic Data from Diffusion Models Improves ImageNet Classification</a></li>
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="GLIDE.html">GLIDE</a></li>
-<li class="toctree-l1"><a class="reference internal" href="BBDM.html">BBDM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Your_Diffusion_Model_is_Secretly_a_Zero_Shot_Classifier.html">Your Diffusion Model is Secretly a Zero-Shot Classifier</a></li>
-<li class="toctree-l1"><a class="reference internal" href="progressive_distillation.html">Progressive Distillation for Fast Sampling of Diffusion Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="ConceptLab.html">ConceptLab</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Diffusion_models_already_have_a_Semantic_Latent_Space.html">Diffusion Models already have a Semantic Latent Space</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Muse.html">Muse</a></li>
-
-
-<li class="toctree-l1"><a class="reference internal" href="GIGAGAN.html">Scaling up GANs for Text-to-Image Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="consistency_models.html">Consistency Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="latent_consistency_models.html">Latent Consistency Models</a></li>
-
-<li class="toctree-l1"><a class="reference internal" href="LLM_grounded_Diffusion.html">LLM Grounded Diffusion</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DiT.html">DiT</a></li>
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="one-step-image-translation.html">One-Step Image Translation with Text-to-Image Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="LCM-LoRA.html">LCM-LoRA: A Universal Stable-Diffusion Acceleration Module</a></li>
-
-
-<li class="toctree-l1"><a class="reference internal" href="MimicBrush.html">MimicBrush: Zero-shot Image Editing with Reference Imitation</a></li>
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Video Generation</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="Make_A_Video.html">Make A Video</a></li>
-<li class="toctree-l1"><a class="reference internal" href="VideoLDM.html">VideoLDM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="AnimateDiff.html">AnimateDiff</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Animate_Anyone.html">Animate Anyone</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreaMoving.html">DreaMoving</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreamPose.html">DreamPose: Fashion Image-to-Video Synthesis via Stable Diffusion</a></li>
-
-
-
-
-
-
-
-
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">3D Generation</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="NeRF.html">NeRF : Representing Scenes as Neural Radiance Fields for View Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="3DGS.html">3D Gaussian Splatting for Real-Time Radiance Field Rendering</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Point_E.html">Point-E: A System for Generating 3D Point Clouds from Complex Prompts (Arxiv 2022)</a></li>
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="Shap-E.html">Shap-E</a></li>
-
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="DreamFusion.html"><strong>DreamFusion</strong></a></li>
-<li class="toctree-l1"><a class="reference internal" href="magic-3d.html">Magic3D</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreamBooth3D.html">Dream Booth 3D</a></li>
-
-
-
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Experiments</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="../experiments/js_exp.html">Synthetic Data with Stable Diffusion for Foliar Disease Classification</a></li>
-<li class="toctree-l1"><a class="reference internal" href="../experiments/swjo_exp.html">Training DreamBooth on Naver Webtoon Face Dataset</a></li>
-</ul>
-
-    </div>
-</nav></div>
-    </div>
-  
-  
-  <div class="sidebar-primary-items__end sidebar-primary__section">
-  </div>
-  
-  <div id="rtd-footer-container"></div>
-
-
-      </div>
-      
-      <main id="main-content" class="bd-main" role="main">
-        
-        
-
-<div class="sbt-scroll-pixel-helper"></div>
-
-          <div class="bd-content">
-            <div class="bd-article-container">
-              
-              <div class="bd-header-article d-print-none">
-<div class="header-article-items header-article__inner">
-  
-    <div class="header-article-items__start">
-      
-        <div class="header-article-item"><label class="sidebar-toggle primary-toggle btn btn-sm" for="__primary" title="Toggle primary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
-  <span class="fa-solid fa-bars"></span>
-</label></div>
-      
-    </div>
-  
-  
-    <div class="header-article-items__end">
-      
-        <div class="header-article-item">
-
-<div class="article-header-buttons">
-
-
-
-
-
-<div class="dropdown dropdown-source-buttons">
-  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Source repositories">
-    <i class="fab fa-github"></i>
-  </button>
-  <ul class="dropdown-menu">
-      
-      
-      
-      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion" target="_blank"
-   class="btn btn-sm btn-source-repository-button dropdown-item"
-   title="Source repository"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fab fa-github"></i>
-  </span>
-<span class="btn__text-container">Repository</span>
-</a>
-</li>
-      
-      
-      
-      
-      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion/issues/new?title=Issue%20on%20page%20%2Fdocs/review/ControlNet.html&body=Your%20issue%20content%20here." target="_blank"
-   class="btn btn-sm btn-source-issues-button dropdown-item"
-   title="Open an issue"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-lightbulb"></i>
-  </span>
-<span class="btn__text-container">Open issue</span>
-</a>
-</li>
-      
-  </ul>
-</div>
-
-
-
-
-
-
-<div class="dropdown dropdown-download-buttons">
-  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Download this page">
-    <i class="fas fa-download"></i>
-  </button>
-  <ul class="dropdown-menu">
-      
-      
-      
-      <li><a href="../../_sources/docs/review/ControlNet.md" target="_blank"
-   class="btn btn-sm btn-download-source-button dropdown-item"
-   title="Download source file"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-file"></i>
-  </span>
-<span class="btn__text-container">.md</span>
-</a>
-</li>
-      
-      
-      
-      
-      <li>
-<button onclick="window.print()"
-  class="btn btn-sm btn-download-pdf-button dropdown-item"
-  title="Print to PDF"
-  data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-file-pdf"></i>
-  </span>
-<span class="btn__text-container">.pdf</span>
-</button>
-</li>
-      
-  </ul>
-</div>
-
-
-
-
-<button onclick="toggleFullScreen()"
-  class="btn btn-sm btn-fullscreen-button"
-  title="Fullscreen mode"
-  data-bs-placement="bottom" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-expand"></i>
-  </span>
-
-</button>
-
-
-
-<script>
-document.write(`
-  <button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light"></i>
-    <i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark"></i>
-    <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"></i>
-  </button>
-`);
-</script>
-
-
-<script>
-document.write(`
-  <button class="btn btn-sm pst-navbar-icon search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <i class="fa-solid fa-magnifying-glass fa-lg"></i>
-  </button>
-`);
-</script>
-<label class="sidebar-toggle secondary-toggle btn btn-sm" for="__secondary"title="Toggle secondary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <span class="fa-solid fa-list"></span>
-</label>
-</div></div>
-      
-    </div>
-  
-</div>
-</div>
-              
-              
-
-<div id="jb-print-docs-body" class="onlyprint">
-    <h1>ControlNet</h1>
-    <!-- Table of contents -->
-    <div id="print-main-content">
-        <div id="jb-print-toc">
-            
-            <div>
-                <h2> Contents </h2>
-            </div>
-            <nav aria-label="Page">
-                <ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#additional-control-with-image-based-condition">Additional Control with Image-based condition</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#controlnet-block">ControlNet Block</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#zero-convolution">Zero Convolution</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#training-results">Training &amp; Results</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#implementation">Implementation</a></li>
-</ul>
-            </nav>
-        </div>
-    </div>
-</div>
-
-              
-                
-<div id="searchbox"></div>
-                <article class="bd-article">
-                  
-  <div class="admonition-information admonition">
-<p class="admonition-title">Information</p>
-<ul class="simple">
-<li><p><strong>Title:</strong> Adding Conditional Control to Text-to-Image Diffusion Models (arxiv 2023)</p></li>
-<li><p><strong>Reference</strong></p>
-<ul>
-<li><p>Paper: <a class="reference external" href="https://arxiv.org/abs/2302.05543">https://arxiv.org/abs/2302.05543</a></p></li>
-<li><p>Code: <a class="github reference external" href="https://github.com/lllyasviel/ControlNet">lllyasviel/ControlNet</a></p></li>
-</ul>
-</li>
-<li><p><strong>Author:</strong> Jisu Kim</p></li>
-<li><p><strong>Last updated on May. 28, 2023</strong></p></li>
-</ul>
-</div>
-<section class="tex2jax_ignore mathjax_ignore" id="controlnet">
-<h1>ControlNet<a class="headerlink" href="#controlnet" title="Permalink to this heading">#</a></h1>
-<section id="additional-control-with-image-based-condition">
-<h2>Additional Control with Image-based condition<a class="headerlink" href="#additional-control-with-image-based-condition" title="Permalink to this heading">#</a></h2>
-<p>기존의 Text-to-Image 모델들은 text prompt로 생성할 이미지의 특징을 조절할 수 있었습니다. 하지만 이런 prompt-based control만으로 이미지의 특징을 조절하는데 한계가 있었습니다. 이 논문에서는 image-based condition을 추가적으로 줘서 생성되는 이미지의 특징을 더 잘 조절하는 ControlNet이라는 신경망 구조를 제안합니다.</p>
-<p>아래 그림은 “a high quality, detailed, and professional image”라는 prompt와 왼쪽 아래의 Canny edge를 input으로 받아서 오른쪽의 이미지들을 생성한 것입니다. 이런 식으로 추가적인 image-based condition (아래 그림에서는 Canny edge)를 input으로 받아 이미지를 생성하는 것이 ControlNet이 하는 역할입니다.</p>
-<figure class="align-default" id="id1">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/deer.png"><img alt="stylegan_01" class="bg-primary mb-1" src="../../_images/deer.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 140 </span><span class="caption-text">Images generated by ConrolNet</span><a class="headerlink" href="#id1" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>그러면 어떤 구조를 사용해서 이를 가능하게 했을까요? 이제부터 이에 대해 알아보도록 하겠습니다.</p>
-</section>
-<section id="controlnet-block">
-<h2>ControlNet Block<a class="headerlink" href="#controlnet-block" title="Permalink to this heading">#</a></h2>
-<p>ControlNet의 block 구조는 다음과 같은 두 가지 특징을 가집니다.</p>
-<ol class="arabic simple">
-<li><p>pretrained model의 locked copy와 trainable copy를 사용</p></li>
-<li><p>zero convolution</p></li>
-</ol>
-<figure class="align-default" id="id2">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/block.png"><img alt="stylegan_01" class="bg-primary mb-1" src="../../_images/block.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 141 </span><span class="caption-text">ConrolNet block</span><a class="headerlink" href="#id2" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>왜 이렇게 설계했는지 알아봅시다.</p>
-<p>우선, copy를 사용하는 이유는 기존에 방대한 양의 데이터로 학습시킨 pretrained model의 성능을 유지하기 위해서입니다. 또한, ControlNet의 학습 데이터가 양이 적은 경우에 오버피팅을 피할 수 있는 효과도 있을 것입니다.</p>
-<p>zero convolution이란 weight랑 bias가 0으로 초기화한 1x1 convolution을 말합니다. zero convolution을 사용할 경우 훈련이 시작되기 전에는 input에 대해 pretrained model과 ControlNet의 output이 똑같아집니다. 따라서 기존 모델이랑 똑같은 input, output을 가지게되므로 기존 모델의 성능을 유지할 수 있으며, 추가적인 훈련이 fine tuning을 하는 것과 비슷하므로 scratch부터 학습하는 것에 비해 빠르게 훈련시킬 수 있게됩니다.</p>
-<p>그러면 zero convolution은 어떻게 이를 가능하게 하는지 좀 더 자세히 알아봅시다.</p>
-</section>
-<section id="zero-convolution">
-<h2>Zero Convolution<a class="headerlink" href="#zero-convolution" title="Permalink to this heading">#</a></h2>
-<p>먼저 위의 그림에서 (a)에 해당하는 부분을 아래와 같이 수식으로 표현하겠습니다.</p>
-<div class="math notranslate nohighlight">
-\[
-\mathbf{y}=\mathcal{F}(\mathbf{x};\Theta)
-\]</div>
-<p><span class="math notranslate nohighlight">\(\mathbf{x}\)</span>는 input feature map, <span class="math notranslate nohighlight">\(\mathcal{F}\)</span>는 neural network block, <span class="math notranslate nohighlight">\(\Theta\)</span>는 <span class="math notranslate nohighlight">\(\mathcal{F}\)</span>의 parameter, <span class="math notranslate nohighlight">\(\mathbf{y}\)</span>는 output을 의미합니다. 위 그림의 (b)를 수식으로 표현하기위해 <span class="math notranslate nohighlight">\(\mathcal{F}\)</span>의 trainable copy를 만들어서 parameter를 <span class="math notranslate nohighlight">\(\Theta_{c}\)</span>라고하고 <span class="math notranslate nohighlight">\(\Theta\)</span>는 고정시켜두겠습니다. 또한, zero convolution은 <span class="math notranslate nohighlight">\(\mathcal{Z}\)</span>로 표현하고 두 zero convolution의 parameter를 각각 <span class="math notranslate nohighlight">\(\Theta_{z1}, \Theta_{z2}\)</span>로 두겠습니다. 그러면 (b)에서 condition <span class="math notranslate nohighlight">\(\mathbf{c}\)</span>에 대한 output <span class="math notranslate nohighlight">\(\mathbf{y}_{c}\)</span>는 아래와 같이 표현할 수 있습니다.</p>
-<div class="math notranslate nohighlight">
-\[
-\mathbf{y}_{c}=\mathcal{F}(\mathbf{x};\Theta)+\mathcal{Z}(\mathcal{F}(\mathbf{x}+\mathcal{Z}(\mathbf{c};\Theta_{z1});\Theta_{c});\Theta_{z2})
-\]</div>
-<p>그런데 <span class="math notranslate nohighlight">\(\mathcal{Z}\)</span>의 weight와 bias의 초깃값이 0이므로 훈련이 진행되지 않았을 경우 <span class="math notranslate nohighlight">\(\mathbf{y}_{c}=\mathbf{y}\)</span>입니다. 따라서 훈련 시작 전에는 ControlNet과 기존 모델이 같은 결과를 내므로 기존 모델의 성능을 보존할 수 있습니다.</p>
-<p>그런데 weight랑 bias가 전부 0으로 초기화되어있으면 gradient가 0이라서 훈련이 안 되는거 아닐까요? 이를 확인하기 위해 다음과 같이 간단한 경우를 생각해보죠.</p>
-<div class="math notranslate nohighlight">
-\[
-y=wx+b
-\]</div>
-<p>gradient는 다음과 같습니다.</p>
-<div class="math notranslate nohighlight">
-\[
-\frac{\partial y}{\partial w}=x,\; \frac{\partial y}{\partial x}=w,\; \frac{\partial y}{\partial b}=1 
-\]</div>
-<p>weight랑 bias가 0이고, <span class="math notranslate nohighlight">\(x\neq0\)</span>이라고 하면</p>
-<div class="math notranslate nohighlight">
-\[
-\frac{\partial y}{\partial w}\neq0,\; \frac{\partial y}{\partial x}=0,\; \frac{\partial y}{\partial b}\neq0 
-\]</div>
-<p>입니다. 따라서 첫 번째 gradient step에서 weight는 0이 아닌 값으로 가게되고, <span class="math notranslate nohighlight">\(\frac{\partial y}{\partial x}\neq0\)</span>이 되므로 훈련이 됩니다. 여기서 핵심적인 가정이 <span class="math notranslate nohighlight">\(x\neq0\)</span>인데 이 부분은 잘 훈련된 pretrained model을 사용하고 있기 때문에 위배될 가능성이 낮을 것입니다.</p>
-<p>지금까지 얘기한 ControlNet block 구조를 pretrained Stable diffusion에 적용한 전체 구조는 아래 그림과 같습니다.</p>
-<figure class="align-default" id="id3">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/structure.png"><img alt="stylegan_01" class="bg-primary mb-1" src="../../_images/structure.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 142 </span><span class="caption-text">Overall structure</span><a class="headerlink" href="#id3" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</section>
-<section id="training-results">
-<h2>Training &amp; Results<a class="headerlink" href="#training-results" title="Permalink to this heading">#</a></h2>
-<p>training loss는 기존 stable diffusion에서 image-based condition <span class="math notranslate nohighlight">\(\mathbf{c}_{f}\)</span>가 추가된 형태입니다.</p>
-<figure class="align-default" id="id4">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/loss.png"><img alt="stylegan_01" class="bg-primary mb-1" src="../../_images/loss.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 143 </span><span class="caption-text">Loss</span><a class="headerlink" href="#id4" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>training을 할 때 50%의 확률로 prompt <span class="math notranslate nohighlight">\(\mathbf{c}_{t}\)</span>를 empty string으로 바꿔주었다고 합니다. 이는 prompt가 주어지지않을 경우 모델이 <span class="math notranslate nohighlight">\(\mathbf{c}_{f}\)</span>로부터 semantics를 더 배우는 경향이 있기 때문에 이미지 생성을 <span class="math notranslate nohighlight">\(\mathbf{c}_{f}\)</span>로 조절하는 능력을 향상시켜줄 수 있다고 합니다.</p>
-<p>아래 결과는 training이 기존 방법보다 효율적이라는 것을 보여줍니다.</p>
-<figure class="align-default" id="id5">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/efficiency.png"><img alt="stylegan_01" class="bg-primary mb-1" src="../../_images/efficiency.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 144 </span><span class="caption-text">Efficiency</span><a class="headerlink" href="#id5" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>아래 결과들은 task에 따른 결과들입니다. 더 많은 이미지들이 논문에 있으니 참고하시기 바랍니다.</p>
-<figure class="align-default" id="id6">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/pose.png"><img alt="stylegan_01" class="bg-primary mb-1" src="../../_images/pose.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 145 </span><span class="caption-text">Pose</span><a class="headerlink" href="#id6" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<figure class="align-default" id="id7">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/seg.png"><img alt="stylegan_01" class="bg-primary mb-1" src="../../_images/seg.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 146 </span><span class="caption-text">Images generated by ConrolNet</span><a class="headerlink" href="#id7" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>아래는 논문에서 limitation이라고 언급한 이미지입니다. 텍스트로 추가적인 정보를 주었음에도 원하는 이미지가 생성되지 않는 경우가 발생했습니다.</p>
-<figure class="align-default" id="id8">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/limit.png"><img alt="stylegan_01" class="bg-primary mb-1" src="../../_images/limit.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 147 </span><span class="caption-text">Limitations</span><a class="headerlink" href="#id8" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</section>
-<section id="implementation">
-<h2>Implementation<a class="headerlink" href="#implementation" title="Permalink to this heading">#</a></h2>
-<p>코드는 공식 구현(<a class="reference external" href="https://github.com/lllyasviel/ControlNet">링크</a>)에서 가져왔습니다. 아래 코드는 parameter를 0으로 초기화하는 코드로 zero convolution을 만들 때 사용됩니다.</p>
-<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="k">def</span> <span class="nf">zero_module</span><span class="p">(</span><span class="n">module</span><span class="p">):</span>
-<span class="w">    </span><span class="sd">&quot;&quot;&quot;</span>
-<span class="sd">    Zero out the parameters of a module and return it.</span>
-<span class="sd">    &quot;&quot;&quot;</span>
-    <span class="k">for</span> <span class="n">p</span> <span class="ow">in</span> <span class="n">module</span><span class="o">.</span><span class="n">parameters</span><span class="p">():</span>
-        <span class="n">p</span><span class="o">.</span><span class="n">detach</span><span class="p">()</span><span class="o">.</span><span class="n">zero_</span><span class="p">()</span>
-    <span class="k">return</span> <span class="n">module</span>
-</pre></div>
-</div>
-<p>아래 코드는 기본적으로 nn.Sequential과 같은데 time step같은 추가적인 input을 받아줄 수 있게 만든 것입니다.</p>
-<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="k">class</span> <span class="nc">TimestepEmbedSequential</span><span class="p">(</span><span class="n">nn</span><span class="o">.</span><span class="n">Sequential</span><span class="p">,</span> <span class="n">TimestepBlock</span><span class="p">):</span>
-<span class="w">    </span><span class="sd">&quot;&quot;&quot;</span>
-<span class="sd">    A sequential module that passes timestep embeddings to the children that</span>
-<span class="sd">    support it as an extra input.</span>
-<span class="sd">    &quot;&quot;&quot;</span>
-
-    <span class="k">def</span> <span class="nf">forward</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">x</span><span class="p">,</span> <span class="n">emb</span><span class="p">,</span> <span class="n">context</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
-        <span class="k">for</span> <span class="n">layer</span> <span class="ow">in</span> <span class="bp">self</span><span class="p">:</span>
-            <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">layer</span><span class="p">,</span> <span class="n">TimestepBlock</span><span class="p">):</span>
-                <span class="n">x</span> <span class="o">=</span> <span class="n">layer</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="n">emb</span><span class="p">)</span>
-            <span class="k">elif</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">layer</span><span class="p">,</span> <span class="n">SpatialTransformer</span><span class="p">):</span>
-                <span class="n">x</span> <span class="o">=</span> <span class="n">layer</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="n">context</span><span class="p">)</span>
-            <span class="k">else</span><span class="p">:</span>
-                <span class="n">x</span> <span class="o">=</span> <span class="n">layer</span><span class="p">(</span><span class="n">x</span><span class="p">)</span>
-        <span class="k">return</span> <span class="n">x</span>
-</pre></div>
-</div>
-<p>아래 코드는 공식 github의 cldm/cldm.py에 있는 ControlNet class입니다. init 부분은 길어서 생략했습니다.</p>
-<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="k">class</span> <span class="nc">ControlNet</span><span class="p">(</span><span class="n">nn</span><span class="o">.</span><span class="n">Module</span><span class="p">):</span>
-    <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="o">...</span><span class="p">):</span>
-			<span class="o">...</span>
-
-		<span class="k">def</span> <span class="nf">make_zero_conv</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">channels</span><span class="p">):</span>
-        <span class="k">return</span> <span class="n">TimestepEmbedSequential</span><span class="p">(</span><span class="n">zero_module</span><span class="p">(</span><span class="n">conv_nd</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">dims</span><span class="p">,</span> <span class="n">channels</span><span class="p">,</span> <span class="n">channels</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="n">padding</span><span class="o">=</span><span class="mi">0</span><span class="p">)))</span>
-
-    <span class="k">def</span> <span class="nf">forward</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">x</span><span class="p">,</span> <span class="n">hint</span><span class="p">,</span> <span class="n">timesteps</span><span class="p">,</span> <span class="n">context</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">):</span>
-        <span class="n">t_emb</span> <span class="o">=</span> <span class="n">timestep_embedding</span><span class="p">(</span><span class="n">timesteps</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">model_channels</span><span class="p">,</span> <span class="n">repeat_only</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span>
-        <span class="n">emb</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">time_embed</span><span class="p">(</span><span class="n">t_emb</span><span class="p">)</span>
-
-        <span class="n">guided_hint</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">input_hint_block</span><span class="p">(</span><span class="n">hint</span><span class="p">,</span> <span class="n">emb</span><span class="p">,</span> <span class="n">context</span><span class="p">)</span>
-
-        <span class="n">outs</span> <span class="o">=</span> <span class="p">[]</span>
-
-        <span class="n">h</span> <span class="o">=</span> <span class="n">x</span><span class="o">.</span><span class="n">type</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">dtype</span><span class="p">)</span>
-        <span class="k">for</span> <span class="n">module</span><span class="p">,</span> <span class="n">zero_conv</span> <span class="ow">in</span> <span class="nb">zip</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">input_blocks</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">zero_convs</span><span class="p">):</span>
-            <span class="k">if</span> <span class="n">guided_hint</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
-                <span class="n">h</span> <span class="o">=</span> <span class="n">module</span><span class="p">(</span><span class="n">h</span><span class="p">,</span> <span class="n">emb</span><span class="p">,</span> <span class="n">context</span><span class="p">)</span>
-                <span class="n">h</span> <span class="o">+=</span> <span class="n">guided_hint</span>
-                <span class="n">guided_hint</span> <span class="o">=</span> <span class="kc">None</span>
-            <span class="k">else</span><span class="p">:</span>
-                <span class="n">h</span> <span class="o">=</span> <span class="n">module</span><span class="p">(</span><span class="n">h</span><span class="p">,</span> <span class="n">emb</span><span class="p">,</span> <span class="n">context</span><span class="p">)</span>
-            <span class="n">outs</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">zero_conv</span><span class="p">(</span><span class="n">h</span><span class="p">,</span> <span class="n">emb</span><span class="p">,</span> <span class="n">context</span><span class="p">))</span>
-
-        <span class="n">h</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">middle_block</span><span class="p">(</span><span class="n">h</span><span class="p">,</span> <span class="n">emb</span><span class="p">,</span> <span class="n">context</span><span class="p">)</span>
-        <span class="n">outs</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">middle_block_out</span><span class="p">(</span><span class="n">h</span><span class="p">,</span> <span class="n">emb</span><span class="p">,</span> <span class="n">context</span><span class="p">))</span>
-
-        <span class="k">return</span> <span class="n">outs</span>
-</pre></div>
-</div>
-</section>
-</section>
-
-    <script type="text/x-thebe-config">
-    {
-        requestKernel: true,
-        binderOptions: {
-            repo: "binder-examples/jupyter-stacks-datascience",
-            ref: "master",
-        },
-        codeMirrorConfig: {
-            theme: "abcdef",
-            mode: "python"
-        },
-        kernelOptions: {
-            name: "python3",
-            path: "./docs/review"
-        },
-        predefinedOutput: true
-    }
-    </script>
-    <script>kernelName = 'python3'</script>
-
-                </article>
-              
-
-              
-              
-              
-              
-                <footer class="prev-next-footer d-print-none">
-                  
-<div class="prev-next-area">
-    <a class="left-prev"
-       href="dreambooth.html"
-       title="previous page">
-      <i class="fa-solid fa-angle-left"></i>
-      <div class="prev-next-info">
-        <p class="prev-next-subtitle">previous</p>
-        <p class="prev-next-title">DreamBooth</p>
-      </div>
-    </a>
-    <a class="right-next"
-       href="Latent_Diffusion_Model.html"
-       title="next page">
-      <div class="prev-next-info">
-        <p class="prev-next-subtitle">next</p>
-        <p class="prev-next-title">Introduction</p>
-      </div>
-      <i class="fa-solid fa-angle-right"></i>
-    </a>
-</div>
-                </footer>
-              
-            </div>
-            
-            
-              
-                <div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
-
-
-  <div class="sidebar-secondary-item">
-  <div class="page-toc tocsection onthispage">
-    <i class="fa-solid fa-list"></i> Contents
-  </div>
-  <nav class="bd-toc-nav page-toc">
-    <ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#additional-control-with-image-based-condition">Additional Control with Image-based condition</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#controlnet-block">ControlNet Block</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#zero-convolution">Zero Convolution</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#training-results">Training &amp; Results</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#implementation">Implementation</a></li>
-</ul>
-  </nav></div>
-
-</div></div>
-              
-            
-          </div>
-          <footer class="bd-footer-content">
-            
-<div class="bd-footer-content__inner container">
-  
-  <div class="footer-item">
-    
-<p class="component-author">
-By PseudoLab
-</p>
-
-  </div>
-  
-  <div class="footer-item">
-    
-
-  <p class="copyright">
-    
-      © Copyright 2022.
-      <br/>
-    
-  </p>
-
-  </div>
-  
-  <div class="footer-item">
-    
-  </div>
-  
-  <div class="footer-item">
-    
-  </div>
-  
-</div>
-          </footer>
-        
-
-      </main>
-    </div>
-  </div>
-  
-  <!-- Scripts loaded after <body> so the DOM is not blocked -->
-  <script src="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b"></script>
-<script src="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b"></script>
-
-  <footer class="bd-footer">
-  </footer>
-  </body>
+
+
+<!DOCTYPE html>
+
+
+<html lang="en" data-content_root="" >
+
+  <head>
+    <meta charset="utf-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.17.1: http://docutils.sourceforge.net/" />
+
+    <title>ControlNet &#8212; Text-to-Image Generation-feat-Diffusion</title>
+  
+  
+  
+  <script data-cfasync="false">
+    document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
+    document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
+  </script>
+  
+  <!-- Loaded before other Sphinx assets -->
+  <link href="../../_static/styles/theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+<link href="../../_static/styles/bootstrap.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+<link href="../../_static/styles/pydata-sphinx-theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+
+  
+  <link href="../../_static/vendor/fontawesome/6.5.2/css/all.min.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+  <link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.woff2" />
+<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.woff2" />
+<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.woff2" />
+
+    <link rel="stylesheet" type="text/css" href="../../_static/pygments.css" />
+    <link rel="stylesheet" href="../../_static/styles/sphinx-book-theme.css?digest=14f4ca6b54d191a8c7657f6c759bf11a5fb86285" type="text/css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/togglebutton.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/mystnb.4510f1fc1dee50b3e5859aac5469c37c29e427902b24a333a5f9fcb2f0b3ac41.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/sphinx-thebe.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/design-style.4045f2051d55cab465a707391d5b2007.min.css" />
+  
+  <!-- Pre-loaded scripts that we'll load fully later -->
+  <link rel="preload" as="script" href="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b" />
+<link rel="preload" as="script" href="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b" />
+  <script src="../../_static/vendor/fontawesome/6.5.2/js/all.min.js?digest=dfe6caa3a7d634c4db9b"></script>
+
+    <script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
+    <script src="../../_static/jquery.js"></script>
+    <script src="../../_static/underscore.js"></script>
+    <script src="../../_static/_sphinx_javascript_frameworks_compat.js"></script>
+    <script src="../../_static/doctools.js"></script>
+    <script src="../../_static/clipboard.min.js"></script>
+    <script src="../../_static/copybutton.js"></script>
+    <script src="../../_static/scripts/sphinx-book-theme.js?digest=5a5c038af52cf7bc1a1ec88eea08e6366ee68824"></script>
+    <script>let toggleHintShow = 'Click to show';</script>
+    <script>let toggleHintHide = 'Click to hide';</script>
+    <script>let toggleOpenOnPrint = 'true';</script>
+    <script src="../../_static/togglebutton.js"></script>
+    <script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
+    <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
+    <script src="../../_static/design-tabs.js"></script>
+    <script>const THEBE_JS_URL = "https://unpkg.com/thebe@0.8.2/lib/index.js"
+const thebe_selector = ".thebe,.cell"
+const thebe_selector_input = "pre"
+const thebe_selector_output = ".output, .cell_output"
+</script>
+    <script async="async" src="../../_static/sphinx-thebe.js"></script>
+    <script>window.MathJax = {"options": {"processHtmlClass": "tex2jax_process|mathjax_process|math|output_area"}}</script>
+    <script defer="defer" src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
+    <script>DOCUMENTATION_OPTIONS.pagename = 'docs/review/ControlNet';</script>
+    <link rel="shortcut icon" href="../../_static/PseudoLab_logo.png"/>
+    <link rel="index" title="Index" href="../../genindex.html" />
+    <link rel="search" title="Search" href="../../search.html" />
+    <link rel="next" title="Introduction" href="Latent_Diffusion_Model.html" />
+    <link rel="prev" title="DreamBooth" href="dreambooth.html" />
+  <meta name="viewport" content="width=device-width, initial-scale=1"/>
+  <meta name="docsearch:language" content="en"/>
+  </head>
+  
+  
+  <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
+
+  
+  
+  <div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
+  
+  <div id="pst-scroll-pixel-helper"></div>
+  
+  <button type="button" class="btn rounded-pill" id="pst-back-to-top">
+    <i class="fa-solid fa-arrow-up"></i>Back to top</button>
+
+  
+  <input type="checkbox"
+          class="sidebar-toggle"
+          id="pst-primary-sidebar-checkbox"/>
+  <label class="overlay overlay-primary" for="pst-primary-sidebar-checkbox"></label>
+  
+  <input type="checkbox"
+          class="sidebar-toggle"
+          id="pst-secondary-sidebar-checkbox"/>
+  <label class="overlay overlay-secondary" for="pst-secondary-sidebar-checkbox"></label>
+  
+  <div class="search-button__wrapper">
+    <div class="search-button__overlay"></div>
+    <div class="search-button__search-container">
+<form class="bd-search d-flex align-items-center"
+      action="../../search.html"
+      method="get">
+  <i class="fa-solid fa-magnifying-glass"></i>
+  <input type="search"
+         class="form-control"
+         name="q"
+         id="search-input"
+         placeholder="Search this book..."
+         aria-label="Search this book..."
+         autocomplete="off"
+         autocorrect="off"
+         autocapitalize="off"
+         spellcheck="false"/>
+  <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
+</form></div>
+  </div>
+
+  <div class="pst-async-banner-revealer d-none">
+  <aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
+</div>
+
+  
+    <header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
+    </header>
+  
+
+  <div class="bd-container">
+    <div class="bd-container__inner bd-page-width">
+      
+      
+      
+      <div class="bd-sidebar-primary bd-sidebar">
+        
+
+  
+  <div class="sidebar-header-items sidebar-primary__section">
+    
+    
+    
+    
+  </div>
+  
+    <div class="sidebar-primary-items__start sidebar-primary__section">
+        <div class="sidebar-primary-item">
+
+  
+    
+  
+
+<a class="navbar-brand logo" href="../../intro.html">
+  
+  
+  
+  
+  
+    
+    
+      
+    
+    
+    <img src="../../_static/PseudoLab_logo.png" class="logo__image only-light" alt="Text-to-Image Generation-feat-Diffusion - Home"/>
+    <script>document.write(`<img src="../../_static/PseudoLab_logo.png" class="logo__image only-dark" alt="Text-to-Image Generation-feat-Diffusion - Home"/>`);</script>
+  
+  
+</a></div>
+        <div class="sidebar-primary-item"><nav class="bd-links" id="bd-docs-nav" aria-label="Main">
+    <div class="bd-toc-item navbar-nav active">
+        
+        <ul class="nav bd-sidenav bd-sidenav__home-link">
+            <li class="toctree-l1">
+                <a class="reference internal" href="../../intro.html">
+                    Welcome to PseudoDiffusers!!
+                </a>
+            </li>
+        </ul>
+        <p aria-level="2" class="caption" role="heading"><span class="caption-text">Preliminary Works</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="vae.html">VAE</a></li>
+<li class="toctree-l1"><a class="reference internal" href="gan.html">GAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DDPM.html">DDPM</a></li>
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DDIM.html">DDIM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="A_Study_on_the_Evaluation_of_Generative_Models.html">A Study on the Evaluation of Generative Models</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Image Generation</span></p>
+<ul class="current nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="cycleGAN.html">CycleGAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="StyleGAN.html">StyleGAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="diffusion_beats_GANs.html">Diffusion Models Beat GANs on Image Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="dalle.html">DALL-E</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DALLE2.html">DALL-E 2</a></li>
+<li class="toctree-l1"><a class="reference internal" href="dreambooth.html">DreamBooth</a></li>
+<li class="toctree-l1 current active"><a class="current reference internal" href="#">ControlNet</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Latent_Diffusion_Model.html">Introduction</a></li>
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Textual_Inversion.html">Textual Inversion</a></li>
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="CustomDiffusion.html">Custom Diffusion</a></li>
+<li class="toctree-l1"><a class="reference internal" href="LoRA.html">LoRA</a></li>
+
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="I-DDPM.html">I-DDPM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="StyO.html">StyO</a></li>
+<li class="toctree-l1"><a class="reference internal" href="imagen.html">Imagen</a></li>
+<li class="toctree-l1"><a class="reference internal" href="imagen_editor.html">Imagen Editor</a></li>
+<li class="toctree-l1"><a class="reference internal" href="SDEdit.html">SDEdit</a></li>
+<li class="toctree-l1"><a class="reference internal" href="SDXL.html">SDXL</a></li>
+<li class="toctree-l1"><a class="reference internal" href="t2i_adapter.html">T2I-Adapter</a></li>
+<li class="toctree-l1"><a class="reference internal" href="IP_Adapter.html">IP-Adapter</a></li>
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="HyperDreamBooth.html">HyperDreamBooth</a></li>
+<li class="toctree-l1"><a class="reference internal" href="CM3leon.html">CM3leon</a></li>
+
+<li class="toctree-l1"><a class="reference internal" href="Synthetic_Data_from_Diffusion_Models_Improves_ImageNet_Classification.html">Synthetic Data from Diffusion Models Improves ImageNet Classification</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="GLIDE.html">GLIDE</a></li>
+<li class="toctree-l1"><a class="reference internal" href="BBDM.html">BBDM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Your_Diffusion_Model_is_Secretly_a_Zero_Shot_Classifier.html">Your Diffusion Model is Secretly a Zero-Shot Classifier</a></li>
+<li class="toctree-l1"><a class="reference internal" href="progressive_distillation.html">Progressive Distillation for Fast Sampling of Diffusion Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ConceptLab.html">ConceptLab</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Diffusion_models_already_have_a_Semantic_Latent_Space.html">Diffusion Models already have a Semantic Latent Space</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Muse.html">Muse</a></li>
+
+
+<li class="toctree-l1"><a class="reference internal" href="GIGAGAN.html">Scaling up GANs for Text-to-Image Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="consistency_models.html">Consistency Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="latent_consistency_models.html">Latent Consistency Models</a></li>
+
+<li class="toctree-l1"><a class="reference internal" href="LLM_grounded_Diffusion.html">LLM Grounded Diffusion</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DiT.html">DiT</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="one-step-image-translation.html">One-Step Image Translation with Text-to-Image Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="LCM-LoRA.html">LCM-LoRA: A Universal Stable-Diffusion Acceleration Module</a></li>
+
+
+<li class="toctree-l1"><a class="reference internal" href="MimicBrush.html">MimicBrush: Zero-shot Image Editing with Reference Imitation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="one_step_diffusion_with_distribution_matching_distillation.html">One-step Diffusion with Distribution Matching Distillation</a></li>
+
+
+
+
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Video Generation</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="Make_A_Video.html">Make A Video</a></li>
+<li class="toctree-l1"><a class="reference internal" href="VideoLDM.html">VideoLDM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="AnimateDiff.html">AnimateDiff</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Animate_Anyone.html">Animate Anyone</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreaMoving.html">DreaMoving</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreamPose.html">DreamPose: Fashion Image-to-Video Synthesis via Stable Diffusion</a></li>
+
+
+
+
+
+
+
+
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">3D Generation</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="NeRF.html">NeRF : Representing Scenes as Neural Radiance Fields for View Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="3DGS.html">3D Gaussian Splatting for Real-Time Radiance Field Rendering</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Point_E.html">Point-E: A System for Generating 3D Point Clouds from Complex Prompts (Arxiv 2022)</a></li>
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Shap-E.html">Shap-E</a></li>
+
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DreamFusion.html"><strong>DreamFusion</strong></a></li>
+<li class="toctree-l1"><a class="reference internal" href="magic-3d.html">Magic3D</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreamBooth3D.html">Dream Booth 3D</a></li>
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="zero123plus.html">Zero123++</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ProlificDreamer.html">ProlificDreamer: High-Fidelity and Diverse Text-to-3D Generation with Variational Score Distillation</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DreamGaussian.html">DreamGaussian</a></li>
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Coin3D.html">Coin3D</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Experiments</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="../experiments/js_exp.html">Synthetic Data with Stable Diffusion for Foliar Disease Classification</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../experiments/swjo_exp.html">Training DreamBooth on Naver Webtoon Face Dataset</a></li>
+</ul>
+
+    </div>
+</nav></div>
+    </div>
+  
+  
+  <div class="sidebar-primary-items__end sidebar-primary__section">
+  </div>
+  
+  <div id="rtd-footer-container"></div>
+
+
+      </div>
+      
+      <main id="main-content" class="bd-main" role="main">
+        
+        
+
+<div class="sbt-scroll-pixel-helper"></div>
+
+          <div class="bd-content">
+            <div class="bd-article-container">
+              
+              <div class="bd-header-article d-print-none">
+<div class="header-article-items header-article__inner">
+  
+    <div class="header-article-items__start">
+      
+        <div class="header-article-item"><label class="sidebar-toggle primary-toggle btn btn-sm" for="__primary" title="Toggle primary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
+  <span class="fa-solid fa-bars"></span>
+</label></div>
+      
+    </div>
+  
+  
+    <div class="header-article-items__end">
+      
+        <div class="header-article-item">
+
+<div class="article-header-buttons">
+
+
+
+
+
+<div class="dropdown dropdown-source-buttons">
+  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Source repositories">
+    <i class="fab fa-github"></i>
+  </button>
+  <ul class="dropdown-menu">
+      
+      
+      
+      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion" target="_blank"
+   class="btn btn-sm btn-source-repository-button dropdown-item"
+   title="Source repository"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fab fa-github"></i>
+  </span>
+<span class="btn__text-container">Repository</span>
+</a>
+</li>
+      
+      
+      
+      
+      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion/issues/new?title=Issue%20on%20page%20%2Fdocs/review/ControlNet.html&body=Your%20issue%20content%20here." target="_blank"
+   class="btn btn-sm btn-source-issues-button dropdown-item"
+   title="Open an issue"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-lightbulb"></i>
+  </span>
+<span class="btn__text-container">Open issue</span>
+</a>
+</li>
+      
+  </ul>
+</div>
+
+
+
+
+
+
+<div class="dropdown dropdown-download-buttons">
+  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Download this page">
+    <i class="fas fa-download"></i>
+  </button>
+  <ul class="dropdown-menu">
+      
+      
+      
+      <li><a href="../../_sources/docs/review/ControlNet.md" target="_blank"
+   class="btn btn-sm btn-download-source-button dropdown-item"
+   title="Download source file"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-file"></i>
+  </span>
+<span class="btn__text-container">.md</span>
+</a>
+</li>
+      
+      
+      
+      
+      <li>
+<button onclick="window.print()"
+  class="btn btn-sm btn-download-pdf-button dropdown-item"
+  title="Print to PDF"
+  data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-file-pdf"></i>
+  </span>
+<span class="btn__text-container">.pdf</span>
+</button>
+</li>
+      
+  </ul>
+</div>
+
+
+
+
+<button onclick="toggleFullScreen()"
+  class="btn btn-sm btn-fullscreen-button"
+  title="Fullscreen mode"
+  data-bs-placement="bottom" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-expand"></i>
+  </span>
+
+</button>
+
+
+
+<script>
+document.write(`
+  <button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light"></i>
+    <i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark"></i>
+    <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"></i>
+  </button>
+`);
+</script>
+
+
+<script>
+document.write(`
+  <button class="btn btn-sm pst-navbar-icon search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <i class="fa-solid fa-magnifying-glass fa-lg"></i>
+  </button>
+`);
+</script>
+<label class="sidebar-toggle secondary-toggle btn btn-sm" for="__secondary"title="Toggle secondary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <span class="fa-solid fa-list"></span>
+</label>
+</div></div>
+      
+    </div>
+  
+</div>
+</div>
+              
+              
+
+<div id="jb-print-docs-body" class="onlyprint">
+    <h1>ControlNet</h1>
+    <!-- Table of contents -->
+    <div id="print-main-content">
+        <div id="jb-print-toc">
+            
+            <div>
+                <h2> Contents </h2>
+            </div>
+            <nav aria-label="Page">
+                <ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#additional-control-with-image-based-condition">Additional Control with Image-based condition</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#controlnet-block">ControlNet Block</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#zero-convolution">Zero Convolution</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#training-results">Training &amp; Results</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#implementation">Implementation</a></li>
+</ul>
+            </nav>
+        </div>
+    </div>
+</div>
+
+              
+                
+<div id="searchbox"></div>
+                <article class="bd-article">
+                  
+  <div class="admonition-information admonition">
+<p class="admonition-title">Information</p>
+<ul class="simple">
+<li><p><strong>Title:</strong> Adding Conditional Control to Text-to-Image Diffusion Models (arxiv 2023)</p></li>
+<li><p><strong>Reference</strong></p>
+<ul>
+<li><p>Paper: <a class="reference external" href="https://arxiv.org/abs/2302.05543">https://arxiv.org/abs/2302.05543</a></p></li>
+<li><p>Code: <a class="github reference external" href="https://github.com/lllyasviel/ControlNet">lllyasviel/ControlNet</a></p></li>
+</ul>
+</li>
+<li><p><strong>Author:</strong> Jisu Kim</p></li>
+<li><p><strong>Last updated on May. 28, 2023</strong></p></li>
+</ul>
+</div>
+<section class="tex2jax_ignore mathjax_ignore" id="controlnet">
+<h1>ControlNet<a class="headerlink" href="#controlnet" title="Permalink to this heading">#</a></h1>
+<section id="additional-control-with-image-based-condition">
+<h2>Additional Control with Image-based condition<a class="headerlink" href="#additional-control-with-image-based-condition" title="Permalink to this heading">#</a></h2>
+<p>기존의 Text-to-Image 모델들은 text prompt로 생성할 이미지의 특징을 조절할 수 있었습니다. 하지만 이런 prompt-based control만으로 이미지의 특징을 조절하는데 한계가 있었습니다. 이 논문에서는 image-based condition을 추가적으로 줘서 생성되는 이미지의 특징을 더 잘 조절하는 ControlNet이라는 신경망 구조를 제안합니다.</p>
+<p>아래 그림은 “a high quality, detailed, and professional image”라는 prompt와 왼쪽 아래의 Canny edge를 input으로 받아서 오른쪽의 이미지들을 생성한 것입니다. 이런 식으로 추가적인 image-based condition (아래 그림에서는 Canny edge)를 input으로 받아 이미지를 생성하는 것이 ControlNet이 하는 역할입니다.</p>
+<figure class="align-default" id="id1">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/deer.png"><img alt="stylegan_01" class="bg-primary mb-1" src="../../_images/deer.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 140 </span><span class="caption-text">Images generated by ConrolNet</span><a class="headerlink" href="#id1" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>그러면 어떤 구조를 사용해서 이를 가능하게 했을까요? 이제부터 이에 대해 알아보도록 하겠습니다.</p>
+</section>
+<section id="controlnet-block">
+<h2>ControlNet Block<a class="headerlink" href="#controlnet-block" title="Permalink to this heading">#</a></h2>
+<p>ControlNet의 block 구조는 다음과 같은 두 가지 특징을 가집니다.</p>
+<ol class="arabic simple">
+<li><p>pretrained model의 locked copy와 trainable copy를 사용</p></li>
+<li><p>zero convolution</p></li>
+</ol>
+<figure class="align-default" id="id2">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/block.png"><img alt="stylegan_01" class="bg-primary mb-1" src="../../_images/block.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 141 </span><span class="caption-text">ConrolNet block</span><a class="headerlink" href="#id2" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>왜 이렇게 설계했는지 알아봅시다.</p>
+<p>우선, copy를 사용하는 이유는 기존에 방대한 양의 데이터로 학습시킨 pretrained model의 성능을 유지하기 위해서입니다. 또한, ControlNet의 학습 데이터가 양이 적은 경우에 오버피팅을 피할 수 있는 효과도 있을 것입니다.</p>
+<p>zero convolution이란 weight랑 bias가 0으로 초기화한 1x1 convolution을 말합니다. zero convolution을 사용할 경우 훈련이 시작되기 전에는 input에 대해 pretrained model과 ControlNet의 output이 똑같아집니다. 따라서 기존 모델이랑 똑같은 input, output을 가지게되므로 기존 모델의 성능을 유지할 수 있으며, 추가적인 훈련이 fine tuning을 하는 것과 비슷하므로 scratch부터 학습하는 것에 비해 빠르게 훈련시킬 수 있게됩니다.</p>
+<p>그러면 zero convolution은 어떻게 이를 가능하게 하는지 좀 더 자세히 알아봅시다.</p>
+</section>
+<section id="zero-convolution">
+<h2>Zero Convolution<a class="headerlink" href="#zero-convolution" title="Permalink to this heading">#</a></h2>
+<p>먼저 위의 그림에서 (a)에 해당하는 부분을 아래와 같이 수식으로 표현하겠습니다.</p>
+<div class="math notranslate nohighlight">
+\[
+\mathbf{y}=\mathcal{F}(\mathbf{x};\Theta)
+\]</div>
+<p><span class="math notranslate nohighlight">\(\mathbf{x}\)</span>는 input feature map, <span class="math notranslate nohighlight">\(\mathcal{F}\)</span>는 neural network block, <span class="math notranslate nohighlight">\(\Theta\)</span>는 <span class="math notranslate nohighlight">\(\mathcal{F}\)</span>의 parameter, <span class="math notranslate nohighlight">\(\mathbf{y}\)</span>는 output을 의미합니다. 위 그림의 (b)를 수식으로 표현하기위해 <span class="math notranslate nohighlight">\(\mathcal{F}\)</span>의 trainable copy를 만들어서 parameter를 <span class="math notranslate nohighlight">\(\Theta_{c}\)</span>라고하고 <span class="math notranslate nohighlight">\(\Theta\)</span>는 고정시켜두겠습니다. 또한, zero convolution은 <span class="math notranslate nohighlight">\(\mathcal{Z}\)</span>로 표현하고 두 zero convolution의 parameter를 각각 <span class="math notranslate nohighlight">\(\Theta_{z1}, \Theta_{z2}\)</span>로 두겠습니다. 그러면 (b)에서 condition <span class="math notranslate nohighlight">\(\mathbf{c}\)</span>에 대한 output <span class="math notranslate nohighlight">\(\mathbf{y}_{c}\)</span>는 아래와 같이 표현할 수 있습니다.</p>
+<div class="math notranslate nohighlight">
+\[
+\mathbf{y}_{c}=\mathcal{F}(\mathbf{x};\Theta)+\mathcal{Z}(\mathcal{F}(\mathbf{x}+\mathcal{Z}(\mathbf{c};\Theta_{z1});\Theta_{c});\Theta_{z2})
+\]</div>
+<p>그런데 <span class="math notranslate nohighlight">\(\mathcal{Z}\)</span>의 weight와 bias의 초깃값이 0이므로 훈련이 진행되지 않았을 경우 <span class="math notranslate nohighlight">\(\mathbf{y}_{c}=\mathbf{y}\)</span>입니다. 따라서 훈련 시작 전에는 ControlNet과 기존 모델이 같은 결과를 내므로 기존 모델의 성능을 보존할 수 있습니다.</p>
+<p>그런데 weight랑 bias가 전부 0으로 초기화되어있으면 gradient가 0이라서 훈련이 안 되는거 아닐까요? 이를 확인하기 위해 다음과 같이 간단한 경우를 생각해보죠.</p>
+<div class="math notranslate nohighlight">
+\[
+y=wx+b
+\]</div>
+<p>gradient는 다음과 같습니다.</p>
+<div class="math notranslate nohighlight">
+\[
+\frac{\partial y}{\partial w}=x,\; \frac{\partial y}{\partial x}=w,\; \frac{\partial y}{\partial b}=1 
+\]</div>
+<p>weight랑 bias가 0이고, <span class="math notranslate nohighlight">\(x\neq0\)</span>이라고 하면</p>
+<div class="math notranslate nohighlight">
+\[
+\frac{\partial y}{\partial w}\neq0,\; \frac{\partial y}{\partial x}=0,\; \frac{\partial y}{\partial b}\neq0 
+\]</div>
+<p>입니다. 따라서 첫 번째 gradient step에서 weight는 0이 아닌 값으로 가게되고, <span class="math notranslate nohighlight">\(\frac{\partial y}{\partial x}\neq0\)</span>이 되므로 훈련이 됩니다. 여기서 핵심적인 가정이 <span class="math notranslate nohighlight">\(x\neq0\)</span>인데 이 부분은 잘 훈련된 pretrained model을 사용하고 있기 때문에 위배될 가능성이 낮을 것입니다.</p>
+<p>지금까지 얘기한 ControlNet block 구조를 pretrained Stable diffusion에 적용한 전체 구조는 아래 그림과 같습니다.</p>
+<figure class="align-default" id="id3">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/structure.png"><img alt="stylegan_01" class="bg-primary mb-1" src="../../_images/structure.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 142 </span><span class="caption-text">Overall structure</span><a class="headerlink" href="#id3" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</section>
+<section id="training-results">
+<h2>Training &amp; Results<a class="headerlink" href="#training-results" title="Permalink to this heading">#</a></h2>
+<p>training loss는 기존 stable diffusion에서 image-based condition <span class="math notranslate nohighlight">\(\mathbf{c}_{f}\)</span>가 추가된 형태입니다.</p>
+<figure class="align-default" id="id4">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/loss.png"><img alt="stylegan_01" class="bg-primary mb-1" src="../../_images/loss.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 143 </span><span class="caption-text">Loss</span><a class="headerlink" href="#id4" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>training을 할 때 50%의 확률로 prompt <span class="math notranslate nohighlight">\(\mathbf{c}_{t}\)</span>를 empty string으로 바꿔주었다고 합니다. 이는 prompt가 주어지지않을 경우 모델이 <span class="math notranslate nohighlight">\(\mathbf{c}_{f}\)</span>로부터 semantics를 더 배우는 경향이 있기 때문에 이미지 생성을 <span class="math notranslate nohighlight">\(\mathbf{c}_{f}\)</span>로 조절하는 능력을 향상시켜줄 수 있다고 합니다.</p>
+<p>아래 결과는 training이 기존 방법보다 효율적이라는 것을 보여줍니다.</p>
+<figure class="align-default" id="id5">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/efficiency.png"><img alt="stylegan_01" class="bg-primary mb-1" src="../../_images/efficiency.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 144 </span><span class="caption-text">Efficiency</span><a class="headerlink" href="#id5" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>아래 결과들은 task에 따른 결과들입니다. 더 많은 이미지들이 논문에 있으니 참고하시기 바랍니다.</p>
+<figure class="align-default" id="id6">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/pose.png"><img alt="stylegan_01" class="bg-primary mb-1" src="../../_images/pose.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 145 </span><span class="caption-text">Pose</span><a class="headerlink" href="#id6" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<figure class="align-default" id="id7">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/seg.png"><img alt="stylegan_01" class="bg-primary mb-1" src="../../_images/seg.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 146 </span><span class="caption-text">Images generated by ConrolNet</span><a class="headerlink" href="#id7" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>아래는 논문에서 limitation이라고 언급한 이미지입니다. 텍스트로 추가적인 정보를 주었음에도 원하는 이미지가 생성되지 않는 경우가 발생했습니다.</p>
+<figure class="align-default" id="id8">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/limit.png"><img alt="stylegan_01" class="bg-primary mb-1" src="../../_images/limit.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 147 </span><span class="caption-text">Limitations</span><a class="headerlink" href="#id8" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</section>
+<section id="implementation">
+<h2>Implementation<a class="headerlink" href="#implementation" title="Permalink to this heading">#</a></h2>
+<p>코드는 공식 구현(<a class="reference external" href="https://github.com/lllyasviel/ControlNet">링크</a>)에서 가져왔습니다. 아래 코드는 parameter를 0으로 초기화하는 코드로 zero convolution을 만들 때 사용됩니다.</p>
+<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="k">def</span><span class="w"> </span><span class="nf">zero_module</span><span class="p">(</span><span class="n">module</span><span class="p">):</span>
+<span class="w">    </span><span class="sd">&quot;&quot;&quot;</span>
+<span class="sd">    Zero out the parameters of a module and return it.</span>
+<span class="sd">    &quot;&quot;&quot;</span>
+    <span class="k">for</span> <span class="n">p</span> <span class="ow">in</span> <span class="n">module</span><span class="o">.</span><span class="n">parameters</span><span class="p">():</span>
+        <span class="n">p</span><span class="o">.</span><span class="n">detach</span><span class="p">()</span><span class="o">.</span><span class="n">zero_</span><span class="p">()</span>
+    <span class="k">return</span> <span class="n">module</span>
+</pre></div>
+</div>
+<p>아래 코드는 기본적으로 nn.Sequential과 같은데 time step같은 추가적인 input을 받아줄 수 있게 만든 것입니다.</p>
+<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="k">class</span><span class="w"> </span><span class="nc">TimestepEmbedSequential</span><span class="p">(</span><span class="n">nn</span><span class="o">.</span><span class="n">Sequential</span><span class="p">,</span> <span class="n">TimestepBlock</span><span class="p">):</span>
+<span class="w">    </span><span class="sd">&quot;&quot;&quot;</span>
+<span class="sd">    A sequential module that passes timestep embeddings to the children that</span>
+<span class="sd">    support it as an extra input.</span>
+<span class="sd">    &quot;&quot;&quot;</span>
+
+    <span class="k">def</span><span class="w"> </span><span class="nf">forward</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">x</span><span class="p">,</span> <span class="n">emb</span><span class="p">,</span> <span class="n">context</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
+        <span class="k">for</span> <span class="n">layer</span> <span class="ow">in</span> <span class="bp">self</span><span class="p">:</span>
+            <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">layer</span><span class="p">,</span> <span class="n">TimestepBlock</span><span class="p">):</span>
+                <span class="n">x</span> <span class="o">=</span> <span class="n">layer</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="n">emb</span><span class="p">)</span>
+            <span class="k">elif</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">layer</span><span class="p">,</span> <span class="n">SpatialTransformer</span><span class="p">):</span>
+                <span class="n">x</span> <span class="o">=</span> <span class="n">layer</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="n">context</span><span class="p">)</span>
+            <span class="k">else</span><span class="p">:</span>
+                <span class="n">x</span> <span class="o">=</span> <span class="n">layer</span><span class="p">(</span><span class="n">x</span><span class="p">)</span>
+        <span class="k">return</span> <span class="n">x</span>
+</pre></div>
+</div>
+<p>아래 코드는 공식 github의 cldm/cldm.py에 있는 ControlNet class입니다. init 부분은 길어서 생략했습니다.</p>
+<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="k">class</span><span class="w"> </span><span class="nc">ControlNet</span><span class="p">(</span><span class="n">nn</span><span class="o">.</span><span class="n">Module</span><span class="p">):</span>
+    <span class="k">def</span><span class="w"> </span><span class="fm">__init__</span><span class="p">(</span><span class="o">...</span><span class="p">):</span>
+			<span class="o">...</span>
+
+		<span class="k">def</span><span class="w"> </span><span class="nf">make_zero_conv</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">channels</span><span class="p">):</span>
+        <span class="k">return</span> <span class="n">TimestepEmbedSequential</span><span class="p">(</span><span class="n">zero_module</span><span class="p">(</span><span class="n">conv_nd</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">dims</span><span class="p">,</span> <span class="n">channels</span><span class="p">,</span> <span class="n">channels</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="n">padding</span><span class="o">=</span><span class="mi">0</span><span class="p">)))</span>
+
+    <span class="k">def</span><span class="w"> </span><span class="nf">forward</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">x</span><span class="p">,</span> <span class="n">hint</span><span class="p">,</span> <span class="n">timesteps</span><span class="p">,</span> <span class="n">context</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">):</span>
+        <span class="n">t_emb</span> <span class="o">=</span> <span class="n">timestep_embedding</span><span class="p">(</span><span class="n">timesteps</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">model_channels</span><span class="p">,</span> <span class="n">repeat_only</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span>
+        <span class="n">emb</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">time_embed</span><span class="p">(</span><span class="n">t_emb</span><span class="p">)</span>
+
+        <span class="n">guided_hint</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">input_hint_block</span><span class="p">(</span><span class="n">hint</span><span class="p">,</span> <span class="n">emb</span><span class="p">,</span> <span class="n">context</span><span class="p">)</span>
+
+        <span class="n">outs</span> <span class="o">=</span> <span class="p">[]</span>
+
+        <span class="n">h</span> <span class="o">=</span> <span class="n">x</span><span class="o">.</span><span class="n">type</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">dtype</span><span class="p">)</span>
+        <span class="k">for</span> <span class="n">module</span><span class="p">,</span> <span class="n">zero_conv</span> <span class="ow">in</span> <span class="nb">zip</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">input_blocks</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">zero_convs</span><span class="p">):</span>
+            <span class="k">if</span> <span class="n">guided_hint</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
+                <span class="n">h</span> <span class="o">=</span> <span class="n">module</span><span class="p">(</span><span class="n">h</span><span class="p">,</span> <span class="n">emb</span><span class="p">,</span> <span class="n">context</span><span class="p">)</span>
+                <span class="n">h</span> <span class="o">+=</span> <span class="n">guided_hint</span>
+                <span class="n">guided_hint</span> <span class="o">=</span> <span class="kc">None</span>
+            <span class="k">else</span><span class="p">:</span>
+                <span class="n">h</span> <span class="o">=</span> <span class="n">module</span><span class="p">(</span><span class="n">h</span><span class="p">,</span> <span class="n">emb</span><span class="p">,</span> <span class="n">context</span><span class="p">)</span>
+            <span class="n">outs</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">zero_conv</span><span class="p">(</span><span class="n">h</span><span class="p">,</span> <span class="n">emb</span><span class="p">,</span> <span class="n">context</span><span class="p">))</span>
+
+        <span class="n">h</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">middle_block</span><span class="p">(</span><span class="n">h</span><span class="p">,</span> <span class="n">emb</span><span class="p">,</span> <span class="n">context</span><span class="p">)</span>
+        <span class="n">outs</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">middle_block_out</span><span class="p">(</span><span class="n">h</span><span class="p">,</span> <span class="n">emb</span><span class="p">,</span> <span class="n">context</span><span class="p">))</span>
+
+        <span class="k">return</span> <span class="n">outs</span>
+</pre></div>
+</div>
+</section>
+</section>
+
+    <script type="text/x-thebe-config">
+    {
+        requestKernel: true,
+        binderOptions: {
+            repo: "binder-examples/jupyter-stacks-datascience",
+            ref: "master",
+        },
+        codeMirrorConfig: {
+            theme: "abcdef",
+            mode: "python"
+        },
+        kernelOptions: {
+            name: "python3",
+            path: "./docs\review"
+        },
+        predefinedOutput: true
+    }
+    </script>
+    <script>kernelName = 'python3'</script>
+
+                </article>
+              
+
+              
+              
+              
+              
+                <footer class="prev-next-footer d-print-none">
+                  
+<div class="prev-next-area">
+    <a class="left-prev"
+       href="dreambooth.html"
+       title="previous page">
+      <i class="fa-solid fa-angle-left"></i>
+      <div class="prev-next-info">
+        <p class="prev-next-subtitle">previous</p>
+        <p class="prev-next-title">DreamBooth</p>
+      </div>
+    </a>
+    <a class="right-next"
+       href="Latent_Diffusion_Model.html"
+       title="next page">
+      <div class="prev-next-info">
+        <p class="prev-next-subtitle">next</p>
+        <p class="prev-next-title">Introduction</p>
+      </div>
+      <i class="fa-solid fa-angle-right"></i>
+    </a>
+</div>
+                </footer>
+              
+            </div>
+            
+            
+              
+                <div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
+
+
+  <div class="sidebar-secondary-item">
+  <div class="page-toc tocsection onthispage">
+    <i class="fa-solid fa-list"></i> Contents
+  </div>
+  <nav class="bd-toc-nav page-toc">
+    <ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#additional-control-with-image-based-condition">Additional Control with Image-based condition</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#controlnet-block">ControlNet Block</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#zero-convolution">Zero Convolution</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#training-results">Training &amp; Results</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#implementation">Implementation</a></li>
+</ul>
+  </nav></div>
+
+</div></div>
+              
+            
+          </div>
+          <footer class="bd-footer-content">
+            
+<div class="bd-footer-content__inner container">
+  
+  <div class="footer-item">
+    
+<p class="component-author">
+By PseudoLab
+</p>
+
+  </div>
+  
+  <div class="footer-item">
+    
+
+  <p class="copyright">
+    
+      © Copyright 2022.
+      <br/>
+    
+  </p>
+
+  </div>
+  
+  <div class="footer-item">
+    
+  </div>
+  
+  <div class="footer-item">
+    
+  </div>
+  
+</div>
+          </footer>
+        
+
+      </main>
+    </div>
+  </div>
+  
+  <!-- Scripts loaded after <body> so the DOM is not blocked -->
+  <script src="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b"></script>
+<script src="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b"></script>
+
+  <footer class="bd-footer">
+  </footer>
+  </body>
 </html>
\ No newline at end of file
diff --git a/docs/review/CustomDiffusion.html b/docs/review/CustomDiffusion.html
old mode 100644
new mode 100755
index 5c410eeb..95414aad
--- a/docs/review/CustomDiffusion.html
+++ b/docs/review/CustomDiffusion.html
@@ -1,998 +1,1018 @@
-
-
-<!DOCTYPE html>
-
-
-<html lang="en" data-content_root="" >
-
-  <head>
-    <meta charset="utf-8" />
-    <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.18.1: http://docutils.sourceforge.net/" />
-
-    <title>Custom Diffusion &#8212; Text-to-Image Generation-feat-Diffusion</title>
-  
-  
-  
-  <script data-cfasync="false">
-    document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
-    document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
-  </script>
-  
-  <!-- Loaded before other Sphinx assets -->
-  <link href="../../_static/styles/theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-<link href="../../_static/styles/bootstrap.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-<link href="../../_static/styles/pydata-sphinx-theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-
-  
-  <link href="../../_static/vendor/fontawesome/6.5.2/css/all.min.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-  <link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.woff2" />
-<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.woff2" />
-<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.woff2" />
-
-    <link rel="stylesheet" type="text/css" href="../../_static/pygments.css" />
-    <link rel="stylesheet" href="../../_static/styles/sphinx-book-theme.css?digest=14f4ca6b54d191a8c7657f6c759bf11a5fb86285" type="text/css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/togglebutton.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/mystnb.4510f1fc1dee50b3e5859aac5469c37c29e427902b24a333a5f9fcb2f0b3ac41.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/sphinx-thebe.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/design-style.4045f2051d55cab465a707391d5b2007.min.css" />
-  
-  <!-- Pre-loaded scripts that we'll load fully later -->
-  <link rel="preload" as="script" href="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b" />
-<link rel="preload" as="script" href="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b" />
-  <script src="../../_static/vendor/fontawesome/6.5.2/js/all.min.js?digest=dfe6caa3a7d634c4db9b"></script>
-
-    <script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
-    <script src="../../_static/jquery.js"></script>
-    <script src="../../_static/underscore.js"></script>
-    <script src="../../_static/_sphinx_javascript_frameworks_compat.js"></script>
-    <script src="../../_static/doctools.js"></script>
-    <script src="../../_static/clipboard.min.js"></script>
-    <script src="../../_static/copybutton.js"></script>
-    <script src="../../_static/scripts/sphinx-book-theme.js?digest=5a5c038af52cf7bc1a1ec88eea08e6366ee68824"></script>
-    <script>let toggleHintShow = 'Click to show';</script>
-    <script>let toggleHintHide = 'Click to hide';</script>
-    <script>let toggleOpenOnPrint = 'true';</script>
-    <script src="../../_static/togglebutton.js"></script>
-    <script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
-    <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
-    <script src="../../_static/design-tabs.js"></script>
-    <script>const THEBE_JS_URL = "https://unpkg.com/thebe@0.8.2/lib/index.js"
-const thebe_selector = ".thebe,.cell"
-const thebe_selector_input = "pre"
-const thebe_selector_output = ".output, .cell_output"
-</script>
-    <script async="async" src="../../_static/sphinx-thebe.js"></script>
-    <script>window.MathJax = {"options": {"processHtmlClass": "tex2jax_process|mathjax_process|math|output_area"}}</script>
-    <script defer="defer" src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
-    <script>DOCUMENTATION_OPTIONS.pagename = 'docs/review/CustomDiffusion';</script>
-    <link rel="shortcut icon" href="../../_static/PseudoLab_logo.png"/>
-    <link rel="index" title="Index" href="../../genindex.html" />
-    <link rel="search" title="Search" href="../../search.html" />
-    <link rel="next" title="LoRA" href="LoRA.html" />
-    <link rel="prev" title="Textual Inversion" href="Textual_Inversion.html" />
-  <meta name="viewport" content="width=device-width, initial-scale=1"/>
-  <meta name="docsearch:language" content="en"/>
-  </head>
-  
-  
-  <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
-
-  
-  
-  <div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
-  
-  <div id="pst-scroll-pixel-helper"></div>
-  
-  <button type="button" class="btn rounded-pill" id="pst-back-to-top">
-    <i class="fa-solid fa-arrow-up"></i>Back to top</button>
-
-  
-  <input type="checkbox"
-          class="sidebar-toggle"
-          id="pst-primary-sidebar-checkbox"/>
-  <label class="overlay overlay-primary" for="pst-primary-sidebar-checkbox"></label>
-  
-  <input type="checkbox"
-          class="sidebar-toggle"
-          id="pst-secondary-sidebar-checkbox"/>
-  <label class="overlay overlay-secondary" for="pst-secondary-sidebar-checkbox"></label>
-  
-  <div class="search-button__wrapper">
-    <div class="search-button__overlay"></div>
-    <div class="search-button__search-container">
-<form class="bd-search d-flex align-items-center"
-      action="../../search.html"
-      method="get">
-  <i class="fa-solid fa-magnifying-glass"></i>
-  <input type="search"
-         class="form-control"
-         name="q"
-         id="search-input"
-         placeholder="Search this book..."
-         aria-label="Search this book..."
-         autocomplete="off"
-         autocorrect="off"
-         autocapitalize="off"
-         spellcheck="false"/>
-  <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
-</form></div>
-  </div>
-
-  <div class="pst-async-banner-revealer d-none">
-  <aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
-</div>
-
-  
-    <header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
-    </header>
-  
-
-  <div class="bd-container">
-    <div class="bd-container__inner bd-page-width">
-      
-      
-      
-      <div class="bd-sidebar-primary bd-sidebar">
-        
-
-  
-  <div class="sidebar-header-items sidebar-primary__section">
-    
-    
-    
-    
-  </div>
-  
-    <div class="sidebar-primary-items__start sidebar-primary__section">
-        <div class="sidebar-primary-item">
-
-  
-    
-  
-
-<a class="navbar-brand logo" href="../../intro.html">
-  
-  
-  
-  
-  
-    
-    
-      
-    
-    
-    <img src="../../_static/PseudoLab_logo.png" class="logo__image only-light" alt="Text-to-Image Generation-feat-Diffusion - Home"/>
-    <script>document.write(`<img src="../../_static/PseudoLab_logo.png" class="logo__image only-dark" alt="Text-to-Image Generation-feat-Diffusion - Home"/>`);</script>
-  
-  
-</a></div>
-        <div class="sidebar-primary-item"><nav class="bd-links" id="bd-docs-nav" aria-label="Main">
-    <div class="bd-toc-item navbar-nav active">
-        
-        <ul class="nav bd-sidenav bd-sidenav__home-link">
-            <li class="toctree-l1">
-                <a class="reference internal" href="../../intro.html">
-                    Welcome to PseudoDiffusers!!
-                </a>
-            </li>
-        </ul>
-        <p aria-level="2" class="caption" role="heading"><span class="caption-text">Preliminary Works</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="vae.html">VAE</a></li>
-<li class="toctree-l1"><a class="reference internal" href="gan.html">GAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DDPM.html">DDPM</a></li>
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="DDIM.html">DDIM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="A_Study_on_the_Evaluation_of_Generative_Models.html">A Study on the Evaluation of Generative Models</a></li>
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Image Generation</span></p>
-<ul class="current nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="cycleGAN.html">CycleGAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="StyleGAN.html">StyleGAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="diffusion_beats_GANs.html">Diffusion Models Beat GANs on Image Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="dalle.html">DALL-E</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DALLE2.html">DALL-E 2</a></li>
-<li class="toctree-l1"><a class="reference internal" href="dreambooth.html">DreamBooth</a></li>
-<li class="toctree-l1"><a class="reference internal" href="ControlNet.html">ControlNet</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Latent_Diffusion_Model.html">Introduction</a></li>
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="Textual_Inversion.html">Textual Inversion</a></li>
-
-
-
-
-
-
-
-
-<li class="toctree-l1 current active"><a class="current reference internal" href="#">Custom Diffusion</a></li>
-<li class="toctree-l1"><a class="reference internal" href="LoRA.html">LoRA</a></li>
-
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="I-DDPM.html">I-DDPM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="StyO.html">StyO</a></li>
-<li class="toctree-l1"><a class="reference internal" href="imagen.html">Imagen</a></li>
-<li class="toctree-l1"><a class="reference internal" href="imagen_editor.html">Imagen Editor</a></li>
-<li class="toctree-l1"><a class="reference internal" href="SDEdit.html">SDEdit</a></li>
-<li class="toctree-l1"><a class="reference internal" href="SDXL.html">SDXL</a></li>
-<li class="toctree-l1"><a class="reference internal" href="t2i_adapter.html">T2I-Adapter</a></li>
-<li class="toctree-l1"><a class="reference internal" href="IP_Adapter.html">IP-Adapter</a></li>
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="HyperDreamBooth.html">HyperDreamBooth</a></li>
-<li class="toctree-l1"><a class="reference internal" href="CM3leon.html">CM3leon</a></li>
-
-<li class="toctree-l1"><a class="reference internal" href="Synthetic_Data_from_Diffusion_Models_Improves_ImageNet_Classification.html">Synthetic Data from Diffusion Models Improves ImageNet Classification</a></li>
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="GLIDE.html">GLIDE</a></li>
-<li class="toctree-l1"><a class="reference internal" href="BBDM.html">BBDM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Your_Diffusion_Model_is_Secretly_a_Zero_Shot_Classifier.html">Your Diffusion Model is Secretly a Zero-Shot Classifier</a></li>
-<li class="toctree-l1"><a class="reference internal" href="progressive_distillation.html">Progressive Distillation for Fast Sampling of Diffusion Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="ConceptLab.html">ConceptLab</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Diffusion_models_already_have_a_Semantic_Latent_Space.html">Diffusion Models already have a Semantic Latent Space</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Muse.html">Muse</a></li>
-
-
-<li class="toctree-l1"><a class="reference internal" href="GIGAGAN.html">Scaling up GANs for Text-to-Image Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="consistency_models.html">Consistency Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="latent_consistency_models.html">Latent Consistency Models</a></li>
-
-<li class="toctree-l1"><a class="reference internal" href="LLM_grounded_Diffusion.html">LLM Grounded Diffusion</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DiT.html">DiT</a></li>
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="one-step-image-translation.html">One-Step Image Translation with Text-to-Image Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="LCM-LoRA.html">LCM-LoRA: A Universal Stable-Diffusion Acceleration Module</a></li>
-
-
-<li class="toctree-l1"><a class="reference internal" href="MimicBrush.html">MimicBrush: Zero-shot Image Editing with Reference Imitation</a></li>
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Video Generation</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="Make_A_Video.html">Make A Video</a></li>
-<li class="toctree-l1"><a class="reference internal" href="VideoLDM.html">VideoLDM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="AnimateDiff.html">AnimateDiff</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Animate_Anyone.html">Animate Anyone</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreaMoving.html">DreaMoving</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreamPose.html">DreamPose: Fashion Image-to-Video Synthesis via Stable Diffusion</a></li>
-
-
-
-
-
-
-
-
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">3D Generation</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="NeRF.html">NeRF : Representing Scenes as Neural Radiance Fields for View Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="3DGS.html">3D Gaussian Splatting for Real-Time Radiance Field Rendering</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Point_E.html">Point-E: A System for Generating 3D Point Clouds from Complex Prompts (Arxiv 2022)</a></li>
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="Shap-E.html">Shap-E</a></li>
-
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="DreamFusion.html"><strong>DreamFusion</strong></a></li>
-<li class="toctree-l1"><a class="reference internal" href="magic-3d.html">Magic3D</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreamBooth3D.html">Dream Booth 3D</a></li>
-
-
-
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Experiments</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="../experiments/js_exp.html">Synthetic Data with Stable Diffusion for Foliar Disease Classification</a></li>
-<li class="toctree-l1"><a class="reference internal" href="../experiments/swjo_exp.html">Training DreamBooth on Naver Webtoon Face Dataset</a></li>
-</ul>
-
-    </div>
-</nav></div>
-    </div>
-  
-  
-  <div class="sidebar-primary-items__end sidebar-primary__section">
-  </div>
-  
-  <div id="rtd-footer-container"></div>
-
-
-      </div>
-      
-      <main id="main-content" class="bd-main" role="main">
-        
-        
-
-<div class="sbt-scroll-pixel-helper"></div>
-
-          <div class="bd-content">
-            <div class="bd-article-container">
-              
-              <div class="bd-header-article d-print-none">
-<div class="header-article-items header-article__inner">
-  
-    <div class="header-article-items__start">
-      
-        <div class="header-article-item"><label class="sidebar-toggle primary-toggle btn btn-sm" for="__primary" title="Toggle primary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
-  <span class="fa-solid fa-bars"></span>
-</label></div>
-      
-    </div>
-  
-  
-    <div class="header-article-items__end">
-      
-        <div class="header-article-item">
-
-<div class="article-header-buttons">
-
-
-
-
-
-<div class="dropdown dropdown-source-buttons">
-  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Source repositories">
-    <i class="fab fa-github"></i>
-  </button>
-  <ul class="dropdown-menu">
-      
-      
-      
-      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion" target="_blank"
-   class="btn btn-sm btn-source-repository-button dropdown-item"
-   title="Source repository"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fab fa-github"></i>
-  </span>
-<span class="btn__text-container">Repository</span>
-</a>
-</li>
-      
-      
-      
-      
-      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion/issues/new?title=Issue%20on%20page%20%2Fdocs/review/CustomDiffusion.html&body=Your%20issue%20content%20here." target="_blank"
-   class="btn btn-sm btn-source-issues-button dropdown-item"
-   title="Open an issue"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-lightbulb"></i>
-  </span>
-<span class="btn__text-container">Open issue</span>
-</a>
-</li>
-      
-  </ul>
-</div>
-
-
-
-
-
-
-<div class="dropdown dropdown-download-buttons">
-  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Download this page">
-    <i class="fas fa-download"></i>
-  </button>
-  <ul class="dropdown-menu">
-      
-      
-      
-      <li><a href="../../_sources/docs/review/CustomDiffusion.md" target="_blank"
-   class="btn btn-sm btn-download-source-button dropdown-item"
-   title="Download source file"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-file"></i>
-  </span>
-<span class="btn__text-container">.md</span>
-</a>
-</li>
-      
-      
-      
-      
-      <li>
-<button onclick="window.print()"
-  class="btn btn-sm btn-download-pdf-button dropdown-item"
-  title="Print to PDF"
-  data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-file-pdf"></i>
-  </span>
-<span class="btn__text-container">.pdf</span>
-</button>
-</li>
-      
-  </ul>
-</div>
-
-
-
-
-<button onclick="toggleFullScreen()"
-  class="btn btn-sm btn-fullscreen-button"
-  title="Fullscreen mode"
-  data-bs-placement="bottom" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-expand"></i>
-  </span>
-
-</button>
-
-
-
-<script>
-document.write(`
-  <button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light"></i>
-    <i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark"></i>
-    <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"></i>
-  </button>
-`);
-</script>
-
-
-<script>
-document.write(`
-  <button class="btn btn-sm pst-navbar-icon search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <i class="fa-solid fa-magnifying-glass fa-lg"></i>
-  </button>
-`);
-</script>
-<label class="sidebar-toggle secondary-toggle btn btn-sm" for="__secondary"title="Toggle secondary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <span class="fa-solid fa-list"></span>
-</label>
-</div></div>
-      
-    </div>
-  
-</div>
-</div>
-              
-              
-
-<div id="jb-print-docs-body" class="onlyprint">
-    <h1>Custom Diffusion</h1>
-    <!-- Table of contents -->
-    <div id="print-main-content">
-        <div id="jb-print-toc">
-            
-            <div>
-                <h2> Contents </h2>
-            </div>
-            <nav aria-label="Page">
-                <ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#abstract">Abstract</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">1. Introduction</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#related-work">2. Related Work</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#deep-generative-models-image-and-model-editing">Deep Generative Models &amp; Image and model editing</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#transfer-learning">Transfer learning</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#adapting-text-to-image-models">Adapting text-to-image models</a></li>
-</ul>
-</li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#method">3. Method</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#single-concept-fine-tuning">Single Concept Fine-tuning</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#multiple-concept-compositional-fine-tuning">Multiple-Concept Compositional Fine-tuning</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#training-details">Training Details</a></li>
-</ul>
-</li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#experiments">4. Experiments</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#discussion-limitation">5. Discussion &amp; Limitation</a></li>
-</ul>
-            </nav>
-        </div>
-    </div>
-</div>
-
-              
-                
-<div id="searchbox"></div>
-                <article class="bd-article">
-                  
-  <div class="admonition-information admonition">
-<p class="admonition-title">Information</p>
-<ul class="simple">
-<li><p><strong>Title:</strong> A Multi-Concept Customiziation of Text-To-Image Diffusion (CVPR 2023)</p></li>
-<li><p><strong>Reference</strong></p>
-<ul>
-<li><p>Paper:  <a class="reference external" href="https://arxiv.org/abs/2212.04488">https://arxiv.org/abs/2212.04488</a></p></li>
-<li><p>Code: <a class="reference external" href="https://github.com/adobe-research/custom-diffusion">Official:</a></p></li>
-</ul>
-</li>
-<li><p><strong>Author:</strong> Seunghwan Ji</p></li>
-<li><p><strong>Last updated on Aug. 6, 2023</strong></p></li>
-</ul>
-</div>
-<section class="tex2jax_ignore mathjax_ignore" id="custom-diffusion">
-<h1>Custom Diffusion<a class="headerlink" href="#custom-diffusion" title="Permalink to this heading">#</a></h1>
-<section id="abstract">
-<h2>Abstract<a class="headerlink" href="#abstract" title="Permalink to this heading">#</a></h2>
-<ul class="simple">
-<li><p>Large Scale Data를 학습한 Generate 모델이 뛰어난 성능을 보이는 추세</p></li>
-<li><p>User의 Private한 Concept을 생성하고자하는 욕구는 여전히 풀지 못함</p></li>
-<li><p>Custom Diffusion은?</p>
-<ol class="arabic simple">
-<li><p>기존 Diffusion 모델의 partial한 부분만을 학습시킴으로써 기존보다 더 빠른 finetuning 방식을 제안</p></li>
-<li><p>Single Concept 뿐 아니라, Multiple Concept에 대한 학습이 가능</p></li>
-<li><p>다양한 Fine tuned 모델을 하나의 모델로 Compress하는 방식을 제안</p></li>
-</ol>
-</li>
-</ul>
-</section>
-<section id="introduction">
-<h2>1. Introduction<a class="headerlink" href="#introduction" title="Permalink to this heading">#</a></h2>
-<ul class="simple">
-<li><p>최근 Text-To-Image 모델들이 활발하게 연구 되어짐</p></li>
-<li><p>단순한 text prompt 입력만으로 원하는 이미지를 생성해내는 수준까지 이름</p></li>
-<li><p>하지만 이러한 모델들은 General한 이미지는 잘 생성하지만, User가 원하는 Private한 (=specific) Concept의 이미지는 생성해내지 못함</p>
-<ul>
-<li><p>e.g. 행복한 우리 가족 사진, 우리집 강아지 뽀삐가 파리로 여행을 떠나는 사진 등</p></li>
-</ul>
-</li>
-<li><p>학습 과정중에 User의 Private한 데이터를 보지 못했기때문에 Model에게는 당연한 결과</p></li>
-<li><p><strong>Customization</strong></p>
-<ul>
-<li><p>몇장의 Concept을 포함하는 이미지만으로 Pretrained 모델을 finetuning하는 방식</p>
-<ul>
-<li><p>In Dreambooth, Personalization</p></li>
-</ul>
-</li>
-<li><p>목표</p>
-<ol class="arabic simple">
-<li><p>학습하고자하는 Private한 Concept의 이미지를 잘 생성해내야함</p></li>
-<li><p>기존에 학습되었던 General한 이미지를 Finetuning한 후에도 잘 생성해내야함</p></li>
-</ol>
-</li>
-</ul>
-</li>
-<li><p>Customization이 어려운 이유</p>
-<ol class="arabic simple">
-<li><p>학습을 진행하다보면 기존에 학습했던 Concept을 잊어버리거나 왜곡해버림 → Language Draft</p></li>
-<li><p>새로운 Concept에 대해 모델이 Overfit 되어서 결과물의 Variation이 낮아짐</p></li>
-<li><p>좀더 나아가 Single Concept 뿐 아니라 Multiple Concept에 대한 Finetuning 또한 어려움</p></li>
-</ol>
-</li>
-<li><p>Custom Diffusion은?</p>
-<ol class="arabic simple">
-<li><p>Text로 Condition을 생성해내는 과정 중 특정 부분만을 학습</p></li>
-<li><p>General Concept의 성능 유지를 위해 real image와 해당 이미지의 caption을 regularization Data로 사용</p></li>
-<li><p>fine tuning동안 새로운 augmentation 기법을 소개</p></li>
-<li><p>Multiple concept의 학습 방식을 제안</p></li>
-</ol>
-</li>
-</ul>
-</section>
-<section id="related-work">
-<h2>2. Related Work<a class="headerlink" href="#related-work" title="Permalink to this heading">#</a></h2>
-<section id="deep-generative-models-image-and-model-editing">
-<h3>Deep Generative Models &amp; Image and model editing<a class="headerlink" href="#deep-generative-models-image-and-model-editing" title="Permalink to this heading">#</a></h3>
-<ul class="simple">
-<li><p>GAN, VAE, Diffusion 등 다양한 방식의 Generative Model들이 각각 좋은 성능을 보여주고있음</p></li>
-<li><p>게다가 추가적인 input(=hint)를 통해 Generated 이미지의 control도 가능함</p></li>
-<li><p>하지만 General하지 않은 새로운 Concept에 대한 생성은 불가능함</p></li>
-<li><p><strong>Custom Diffusion은 이러한 New Concept에 대한 Finetuning 기법을 제안</strong></p></li>
-</ul>
-</section>
-<section id="transfer-learning">
-<h3>Transfer learning<a class="headerlink" href="#transfer-learning" title="Permalink to this heading">#</a></h3>
-<ul>
-<li><p>Global한 이미지의 Distribution을 이미 학습한 모델에 특정 concept을 포함한 소량의 이미지를 finetuning하는 기법</p></li>
-<li><p>Transfer Learning은 생각보다 효과적이고 유용함</p></li>
-<li><p>대부분 transfer learning 시에는 모델의 전체를 학습하거나 혹은 Parameter를 더 추가해 재학습</p>
-<p>→ 위에서 제시한 Customization의 문제를 일으키기 쉬움 (Language Draft, Overfitting etc.)</p>
-</li>
-<li><p><strong>Custom Diffusion은 모델의 아주 일부만을 대상으로 finetuning</strong></p></li>
-</ul>
-</section>
-<section id="adapting-text-to-image-models">
-<h3>Adapting text-to-image models<a class="headerlink" href="#adapting-text-to-image-models" title="Permalink to this heading">#</a></h3>
-<ul class="simple">
-<li><p>비슷한 컨셉으로 Finetuning을 통한 Personalization 연구들이 있음</p>
-<ul>
-<li><p>Dreambooth, Textual Inversion</p></li>
-</ul>
-</li>
-<li><p>vs Custom Diffusion</p>
-<ol class="arabic simple">
-<li><p>Multiple Concept의 Finetuning 모델들을 하나의 모델로 Compress할 수 있음</p></li>
-<li><p>모델의 특정 부분만을 Finetuning함으로써 다른 모델에 비해 Training Resourse를 절약할 수 있음</p></li>
-</ol>
-</li>
-</ul>
-</section>
-</section>
-<section id="method">
-<h2>3. Method<a class="headerlink" href="#method" title="Permalink to this heading">#</a></h2>
-<section id="single-concept-fine-tuning">
-<h3>Single Concept Fine-tuning<a class="headerlink" href="#single-concept-fine-tuning" title="Permalink to this heading">#</a></h3>
-<ul>
-<li><p>Backbone으로 Latent Diffusion Model을 채택</p></li>
-<li><p>(L)DM의 학습 Concept</p>
-<figure class="align-default" id="id1">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img01.png"><img alt="CD_00" class="bg-primary mb-1" src="../../_images/img01.png" style="width: 350px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 165 </span><span class="caption-text">Equation 0</span><a class="headerlink" href="#id1" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p><span class="math notranslate nohighlight">\(x_{t}\)</span> : time t 시점에 Noise가 섞인 이미지</p></li>
-<li><p><span class="math notranslate nohighlight">\(t\)</span> → timestep</p></li>
-<li><p><span class="math notranslate nohighlight">\(c\)</span> → conditioning feature (text, image 등)</p>
-<ul>
-<li><p>text나 image를 바로 사용하지않고 latent space로 embedding된 값을 사용 <em>(using CLIP)</em></p></li>
-</ul>
-</li>
-<li><p>ε → noise</p></li>
-<li><p><span class="math notranslate nohighlight">\(ε_{θ}\)</span> → <span class="math notranslate nohighlight">\(x_{t}\)</span>에 낀 noise ε를 예측해내는 모델</p></li>
-<li><p>즉, <span class="math notranslate nohighlight">\(x_{t}\)</span>에 낀 noise ε를  예측해내는 모델을 학습</p></li>
-</ul>
-</li>
-<li><p>이러한 LDM 모델을 fine tuning할때는 Model의 모든 Layer에대해 update하는게 기본</p></li>
-<li><p>하지만 이러한 finetuning 방식은 Resource가 비효율적으로 많이들고, 새로운 Concept 이미지에 overfitting되기 쉬움</p></li>
-<li><p>Finetuning 과정 중 모델의 Weight 변화량을 체크</p>
-<figure class="align-default" id="id2">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img11.png"><img alt="CD_01" class="bg-primary mb-1" src="../../_images/img11.png" style="width: 500px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 166 </span><span class="caption-text">Delta of Weight while Training</span><a class="headerlink" href="#id2" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</li>
-<li><p>다른 부분에비해 Cross Attention 연산의 Wegith 변화량이 가장 큼</p></li>
-<li><p>Cross Attention</p></li>
-</ul>
-<figure class="align-default" id="id3">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img21.png"><img alt="CD_02" class="bg-primary mb-1" src="../../_images/img21.png" style="width: 500px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 167 </span><span class="caption-text">Fig.4 Cross Attention</span><a class="headerlink" href="#id3" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul>
-<li><p>Cross Attention → Image latent에 text condition을 주입하는 Attention Mechanism</p>
-<ul class="simple">
-<li><p><em>Query</em> → image latent / <em>Key, Value</em> → text condition latent</p></li>
-<li><p>모델 전체 Parameter에 단 5%부분만을 차지</p></li>
-<li><p>이 중 new concept을 의미하는 Text <span class="math notranslate nohighlight">\(V^{*}\)</span>이 포함되는 <span class="math notranslate nohighlight">\(W^{k}\)</span>와 <span class="math notranslate nohighlight">\(W^{v}\)</span>만 학습. 나머지는 Freeze</p></li>
-</ul>
-</li>
-<li><p>Fine Tuning할 때 <span class="math notranslate nohighlight">\(V^{*}\)</span>은 실제로는 잘 쓰지않는 단어로 사용하고 “<em>A [<span class="math notranslate nohighlight">\(V^{*}\)</span>] [Class]”</em> 형식으로 이미지를 Captioning한 후에 학습</p></li>
-<li><p>또 Finetuning중에 일반적인 concept을 잊어버리는 Language Draft 현상이 있을수있음</p>
-<ul class="simple">
-<li><p>Language Draft</p></li>
-</ul>
-<figure class="align-default" id="id4">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img31.png"><img alt="CD_03" class="bg-primary mb-1" src="../../_images/img31.png" style="width: 650px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 168 </span><span class="caption-text">Fine tuning 후에 Photo of a moon 이미지를 생성하면 Finetuning했던 Moongate 이미지를 생성해버림</span><a class="headerlink" href="#id4" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</li>
-</ul>
-<p>Fine tuning 후에 Photo of a moon 이미지를 생성하면 Finetuning했던 Moongate 이미지를 생성해버림</p>
-<ul class="simple">
-<li><p>이러한 현상을 방지하기위해 Real world의 Image에서 target text class prompt와 유사한 200장의 이미지를 Regulalization 이미지로 같이 학습</p>
-<ul>
-<li><p>text prompt가 유사하다 = CLIP에서 추출한 text feature space상의 Vector가 Similar하다</p></li>
-</ul>
-</li>
-</ul>
-</section>
-<section id="multiple-concept-compositional-fine-tuning">
-<h3>Multiple-Concept Compositional Fine-tuning<a class="headerlink" href="#multiple-concept-compositional-fine-tuning" title="Permalink to this heading">#</a></h3>
-<ul>
-<li><p>Joint Traning on multiple concept</p>
-<ul class="simple">
-<li><p>각각의 Concept을 갖는 이미지에 대해 각각 rare한 key를 부여해 동시에 학습</p>
-<ul>
-<li><p>(<span class="math notranslate nohighlight">\(V^{i}\)</span><em>, for <span class="math notranslate nohighlight">\(i\)</span> is # of concepts</em>)</p></li>
-</ul>
-</li>
-</ul>
-</li>
-<li><p>Constrained optimization to merge concepts</p>
-<ul class="simple">
-<li><p>각각 Single Concept으로 학습된 weight를 merge</p></li>
-</ul>
-<figure class="align-default" id="id5">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img41.png"><img alt="CD_04" class="bg-primary mb-1" src="../../_images/img41.png" style="width: 450px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 169 </span><span class="caption-text">Equation 4</span><a class="headerlink" href="#id5" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p><span class="math notranslate nohighlight">\(W_0\)</span> → pretrained model의 Key, Value embedding Weight</p>
-<ul>
-<li><p>~~<em>(Appendix A에는 <span class="math notranslate nohighlight">\(W\)</span>라고 나와있는데 오탈자일 가능성 있음)</em>~~</p></li>
-</ul>
-</li>
-<li><p><span class="math notranslate nohighlight">\(C_{reg}\)</span> → regularization 이미지의 Caption의 Embedding 값을 모두 뽑아 Concat</p></li>
-<li><p>⇒ <span class="math notranslate nohighlight">\(C_{reg}\)</span>에 Pretrained Weight를 곱한 값과의 norm을 계산했을때 값이 가장 작은 Weight를 return</p>
-<ul>
-<li><p>“N개의 Concept에 대해 Cross Attention이 모두 잘 동작하는 W 값을 찾아 하나만 사용하자”</p></li>
-</ul>
-</li>
-</ul>
-</li>
-</ul>
-</section>
-<section id="training-details">
-<h3>Training Details<a class="headerlink" href="#training-details" title="Permalink to this heading">#</a></h3>
-<ul class="simple">
-<li><p>single concept의 경우 250 steps, two-concept의 경우 500 steps</p></li>
-<li><p>batch : 8, learning rate : <span class="math notranslate nohighlight">\(8*10^{-5}\)</span></p></li>
-<li><p>random resize + prompt 추가 (very small, far away, zoom in …) (new augmentation technique)</p></li>
-</ul>
-</section>
-</section>
-<section id="experiments">
-<h2>4. Experiments<a class="headerlink" href="#experiments" title="Permalink to this heading">#</a></h2>
-<p>Single Concept Finetuning</p>
-<ul class="simple">
-<li><p>Qualitative Evaluation</p></li>
-</ul>
-<figure class="align-default" id="id6">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img51.png"><img alt="CD_05" class="bg-primary mb-1" src="../../_images/img51.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 170 </span><span class="caption-text">Qualitative Evaluation</span><a class="headerlink" href="#id6" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p>Quantative Evaluation (Text Alignment, Image Alignment, KID)</p>
-<ul>
-<li><p>text alignment : prompt에 얼마나 대응되는 이미지를 생성해냈는가</p></li>
-<li><p>image alignment : training image의 concept을 얼마나 잘 표현해냈는가</p></li>
-</ul>
-</li>
-</ul>
-<figure class="align-default" id="id7">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img61.png"><img alt="CD_06" class="bg-primary mb-1" src="../../_images/img61.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 171 </span><span class="caption-text">Table 1</span><a class="headerlink" href="#id7" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>⇒ 정성적, 정량적 평가 모두 Custom Diffusion &gt; Dreambooth, Textual Inversion</p>
-<p>Multiple Concept Finetuning</p>
-<figure class="align-default" id="id8">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img71.png"><img alt="CD_07" class="bg-primary mb-1" src="../../_images/img71.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 172 </span><span class="caption-text">Multiple Concept Finetuning</span><a class="headerlink" href="#id8" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p>Joint Training &gt; Optimization by custom diffusion &gt; Dreambooth</p></li>
-</ul>
-<p>Human Preference Study</p>
-<figure class="align-default" id="id9">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img81.png"><img alt="CD_08" class="bg-primary mb-1" src="../../_images/img81.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 173 </span><span class="caption-text">Table 2</span><a class="headerlink" href="#id9" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p>Custom Diffusion (partial) vs Baseline(Textual Inversion, Dreambooth, CustomDiffusion(all))</p></li>
-<li><p>Text-Alignment, Image-Alignment 모두 Custom Diffusion (partial)을 선호</p></li>
-<li><p>Textual Inversion은 Image Alignment는 Custom Diffusion 선호도와 비슷하지만 Text Alignment수치를 보면 Custom Diffusion이 매우 높아 Overfitting된 경향이 있음</p></li>
-</ul>
-<p>Ablation Study</p>
-<ol class="arabic">
-<li><p>Regularization Image</p>
-<figure class="align-default" id="id10">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img9.png"><img alt="CD_09" class="bg-primary mb-1" src="../../_images/img9.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 174 </span><span class="caption-text">Table 3</span><a class="headerlink" href="#id10" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</li>
-</ol>
-<ul class="simple">
-<li><p>ㅌGen : real image 대신 generate된 이미지를 regularization 이미지로 사용</p></li>
-<li><p>Overfitting 없이 가장 좋은 수치는 Augmentation + Regulatization image as Real world Image</p></li>
-</ul>
-</section>
-<section id="discussion-limitation">
-<h2>5. Discussion &amp; Limitation<a class="headerlink" href="#discussion-limitation" title="Permalink to this heading">#</a></h2>
-<ul class="simple">
-<li><p>customizing이 가능하고 training resourse가 매우 적은 finetuning 기법 소개</p></li>
-</ul>
-<figure class="align-default" id="id11">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img10.png"><img alt="CD_10" class="bg-primary mb-1" src="../../_images/img10.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 175 </span><span class="caption-text">Limitation Of Custom Diffusion</span><a class="headerlink" href="#id11" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p>비슷한 category의 object에 대해서는 joint training, merge 모두 잘 동작하지 않음</p></li>
-</ul>
-</section>
-</section>
-
-    <script type="text/x-thebe-config">
-    {
-        requestKernel: true,
-        binderOptions: {
-            repo: "binder-examples/jupyter-stacks-datascience",
-            ref: "master",
-        },
-        codeMirrorConfig: {
-            theme: "abcdef",
-            mode: "python"
-        },
-        kernelOptions: {
-            name: "python3",
-            path: "./docs/review"
-        },
-        predefinedOutput: true
-    }
-    </script>
-    <script>kernelName = 'python3'</script>
-
-                </article>
-              
-
-              
-              
-              
-              
-                <footer class="prev-next-footer d-print-none">
-                  
-<div class="prev-next-area">
-    <a class="left-prev"
-       href="Textual_Inversion.html"
-       title="previous page">
-      <i class="fa-solid fa-angle-left"></i>
-      <div class="prev-next-info">
-        <p class="prev-next-subtitle">previous</p>
-        <p class="prev-next-title">Textual Inversion</p>
-      </div>
-    </a>
-    <a class="right-next"
-       href="LoRA.html"
-       title="next page">
-      <div class="prev-next-info">
-        <p class="prev-next-subtitle">next</p>
-        <p class="prev-next-title">LoRA</p>
-      </div>
-      <i class="fa-solid fa-angle-right"></i>
-    </a>
-</div>
-                </footer>
-              
-            </div>
-            
-            
-              
-                <div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
-
-
-  <div class="sidebar-secondary-item">
-  <div class="page-toc tocsection onthispage">
-    <i class="fa-solid fa-list"></i> Contents
-  </div>
-  <nav class="bd-toc-nav page-toc">
-    <ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#abstract">Abstract</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">1. Introduction</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#related-work">2. Related Work</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#deep-generative-models-image-and-model-editing">Deep Generative Models &amp; Image and model editing</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#transfer-learning">Transfer learning</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#adapting-text-to-image-models">Adapting text-to-image models</a></li>
-</ul>
-</li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#method">3. Method</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#single-concept-fine-tuning">Single Concept Fine-tuning</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#multiple-concept-compositional-fine-tuning">Multiple-Concept Compositional Fine-tuning</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#training-details">Training Details</a></li>
-</ul>
-</li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#experiments">4. Experiments</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#discussion-limitation">5. Discussion &amp; Limitation</a></li>
-</ul>
-  </nav></div>
-
-</div></div>
-              
-            
-          </div>
-          <footer class="bd-footer-content">
-            
-<div class="bd-footer-content__inner container">
-  
-  <div class="footer-item">
-    
-<p class="component-author">
-By PseudoLab
-</p>
-
-  </div>
-  
-  <div class="footer-item">
-    
-
-  <p class="copyright">
-    
-      © Copyright 2022.
-      <br/>
-    
-  </p>
-
-  </div>
-  
-  <div class="footer-item">
-    
-  </div>
-  
-  <div class="footer-item">
-    
-  </div>
-  
-</div>
-          </footer>
-        
-
-      </main>
-    </div>
-  </div>
-  
-  <!-- Scripts loaded after <body> so the DOM is not blocked -->
-  <script src="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b"></script>
-<script src="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b"></script>
-
-  <footer class="bd-footer">
-  </footer>
-  </body>
+
+
+<!DOCTYPE html>
+
+
+<html lang="en" data-content_root="" >
+
+  <head>
+    <meta charset="utf-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.17.1: http://docutils.sourceforge.net/" />
+
+    <title>Custom Diffusion &#8212; Text-to-Image Generation-feat-Diffusion</title>
+  
+  
+  
+  <script data-cfasync="false">
+    document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
+    document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
+  </script>
+  
+  <!-- Loaded before other Sphinx assets -->
+  <link href="../../_static/styles/theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+<link href="../../_static/styles/bootstrap.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+<link href="../../_static/styles/pydata-sphinx-theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+
+  
+  <link href="../../_static/vendor/fontawesome/6.5.2/css/all.min.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+  <link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.woff2" />
+<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.woff2" />
+<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.woff2" />
+
+    <link rel="stylesheet" type="text/css" href="../../_static/pygments.css" />
+    <link rel="stylesheet" href="../../_static/styles/sphinx-book-theme.css?digest=14f4ca6b54d191a8c7657f6c759bf11a5fb86285" type="text/css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/togglebutton.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/mystnb.4510f1fc1dee50b3e5859aac5469c37c29e427902b24a333a5f9fcb2f0b3ac41.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/sphinx-thebe.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/design-style.4045f2051d55cab465a707391d5b2007.min.css" />
+  
+  <!-- Pre-loaded scripts that we'll load fully later -->
+  <link rel="preload" as="script" href="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b" />
+<link rel="preload" as="script" href="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b" />
+  <script src="../../_static/vendor/fontawesome/6.5.2/js/all.min.js?digest=dfe6caa3a7d634c4db9b"></script>
+
+    <script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
+    <script src="../../_static/jquery.js"></script>
+    <script src="../../_static/underscore.js"></script>
+    <script src="../../_static/_sphinx_javascript_frameworks_compat.js"></script>
+    <script src="../../_static/doctools.js"></script>
+    <script src="../../_static/clipboard.min.js"></script>
+    <script src="../../_static/copybutton.js"></script>
+    <script src="../../_static/scripts/sphinx-book-theme.js?digest=5a5c038af52cf7bc1a1ec88eea08e6366ee68824"></script>
+    <script>let toggleHintShow = 'Click to show';</script>
+    <script>let toggleHintHide = 'Click to hide';</script>
+    <script>let toggleOpenOnPrint = 'true';</script>
+    <script src="../../_static/togglebutton.js"></script>
+    <script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
+    <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
+    <script src="../../_static/design-tabs.js"></script>
+    <script>const THEBE_JS_URL = "https://unpkg.com/thebe@0.8.2/lib/index.js"
+const thebe_selector = ".thebe,.cell"
+const thebe_selector_input = "pre"
+const thebe_selector_output = ".output, .cell_output"
+</script>
+    <script async="async" src="../../_static/sphinx-thebe.js"></script>
+    <script>window.MathJax = {"options": {"processHtmlClass": "tex2jax_process|mathjax_process|math|output_area"}}</script>
+    <script defer="defer" src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
+    <script>DOCUMENTATION_OPTIONS.pagename = 'docs/review/CustomDiffusion';</script>
+    <link rel="shortcut icon" href="../../_static/PseudoLab_logo.png"/>
+    <link rel="index" title="Index" href="../../genindex.html" />
+    <link rel="search" title="Search" href="../../search.html" />
+    <link rel="next" title="LoRA" href="LoRA.html" />
+    <link rel="prev" title="Textual Inversion" href="Textual_Inversion.html" />
+  <meta name="viewport" content="width=device-width, initial-scale=1"/>
+  <meta name="docsearch:language" content="en"/>
+  </head>
+  
+  
+  <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
+
+  
+  
+  <div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
+  
+  <div id="pst-scroll-pixel-helper"></div>
+  
+  <button type="button" class="btn rounded-pill" id="pst-back-to-top">
+    <i class="fa-solid fa-arrow-up"></i>Back to top</button>
+
+  
+  <input type="checkbox"
+          class="sidebar-toggle"
+          id="pst-primary-sidebar-checkbox"/>
+  <label class="overlay overlay-primary" for="pst-primary-sidebar-checkbox"></label>
+  
+  <input type="checkbox"
+          class="sidebar-toggle"
+          id="pst-secondary-sidebar-checkbox"/>
+  <label class="overlay overlay-secondary" for="pst-secondary-sidebar-checkbox"></label>
+  
+  <div class="search-button__wrapper">
+    <div class="search-button__overlay"></div>
+    <div class="search-button__search-container">
+<form class="bd-search d-flex align-items-center"
+      action="../../search.html"
+      method="get">
+  <i class="fa-solid fa-magnifying-glass"></i>
+  <input type="search"
+         class="form-control"
+         name="q"
+         id="search-input"
+         placeholder="Search this book..."
+         aria-label="Search this book..."
+         autocomplete="off"
+         autocorrect="off"
+         autocapitalize="off"
+         spellcheck="false"/>
+  <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
+</form></div>
+  </div>
+
+  <div class="pst-async-banner-revealer d-none">
+  <aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
+</div>
+
+  
+    <header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
+    </header>
+  
+
+  <div class="bd-container">
+    <div class="bd-container__inner bd-page-width">
+      
+      
+      
+      <div class="bd-sidebar-primary bd-sidebar">
+        
+
+  
+  <div class="sidebar-header-items sidebar-primary__section">
+    
+    
+    
+    
+  </div>
+  
+    <div class="sidebar-primary-items__start sidebar-primary__section">
+        <div class="sidebar-primary-item">
+
+  
+    
+  
+
+<a class="navbar-brand logo" href="../../intro.html">
+  
+  
+  
+  
+  
+    
+    
+      
+    
+    
+    <img src="../../_static/PseudoLab_logo.png" class="logo__image only-light" alt="Text-to-Image Generation-feat-Diffusion - Home"/>
+    <script>document.write(`<img src="../../_static/PseudoLab_logo.png" class="logo__image only-dark" alt="Text-to-Image Generation-feat-Diffusion - Home"/>`);</script>
+  
+  
+</a></div>
+        <div class="sidebar-primary-item"><nav class="bd-links" id="bd-docs-nav" aria-label="Main">
+    <div class="bd-toc-item navbar-nav active">
+        
+        <ul class="nav bd-sidenav bd-sidenav__home-link">
+            <li class="toctree-l1">
+                <a class="reference internal" href="../../intro.html">
+                    Welcome to PseudoDiffusers!!
+                </a>
+            </li>
+        </ul>
+        <p aria-level="2" class="caption" role="heading"><span class="caption-text">Preliminary Works</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="vae.html">VAE</a></li>
+<li class="toctree-l1"><a class="reference internal" href="gan.html">GAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DDPM.html">DDPM</a></li>
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DDIM.html">DDIM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="A_Study_on_the_Evaluation_of_Generative_Models.html">A Study on the Evaluation of Generative Models</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Image Generation</span></p>
+<ul class="current nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="cycleGAN.html">CycleGAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="StyleGAN.html">StyleGAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="diffusion_beats_GANs.html">Diffusion Models Beat GANs on Image Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="dalle.html">DALL-E</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DALLE2.html">DALL-E 2</a></li>
+<li class="toctree-l1"><a class="reference internal" href="dreambooth.html">DreamBooth</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ControlNet.html">ControlNet</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Latent_Diffusion_Model.html">Introduction</a></li>
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Textual_Inversion.html">Textual Inversion</a></li>
+
+
+
+
+
+
+
+
+<li class="toctree-l1 current active"><a class="current reference internal" href="#">Custom Diffusion</a></li>
+<li class="toctree-l1"><a class="reference internal" href="LoRA.html">LoRA</a></li>
+
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="I-DDPM.html">I-DDPM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="StyO.html">StyO</a></li>
+<li class="toctree-l1"><a class="reference internal" href="imagen.html">Imagen</a></li>
+<li class="toctree-l1"><a class="reference internal" href="imagen_editor.html">Imagen Editor</a></li>
+<li class="toctree-l1"><a class="reference internal" href="SDEdit.html">SDEdit</a></li>
+<li class="toctree-l1"><a class="reference internal" href="SDXL.html">SDXL</a></li>
+<li class="toctree-l1"><a class="reference internal" href="t2i_adapter.html">T2I-Adapter</a></li>
+<li class="toctree-l1"><a class="reference internal" href="IP_Adapter.html">IP-Adapter</a></li>
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="HyperDreamBooth.html">HyperDreamBooth</a></li>
+<li class="toctree-l1"><a class="reference internal" href="CM3leon.html">CM3leon</a></li>
+
+<li class="toctree-l1"><a class="reference internal" href="Synthetic_Data_from_Diffusion_Models_Improves_ImageNet_Classification.html">Synthetic Data from Diffusion Models Improves ImageNet Classification</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="GLIDE.html">GLIDE</a></li>
+<li class="toctree-l1"><a class="reference internal" href="BBDM.html">BBDM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Your_Diffusion_Model_is_Secretly_a_Zero_Shot_Classifier.html">Your Diffusion Model is Secretly a Zero-Shot Classifier</a></li>
+<li class="toctree-l1"><a class="reference internal" href="progressive_distillation.html">Progressive Distillation for Fast Sampling of Diffusion Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ConceptLab.html">ConceptLab</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Diffusion_models_already_have_a_Semantic_Latent_Space.html">Diffusion Models already have a Semantic Latent Space</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Muse.html">Muse</a></li>
+
+
+<li class="toctree-l1"><a class="reference internal" href="GIGAGAN.html">Scaling up GANs for Text-to-Image Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="consistency_models.html">Consistency Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="latent_consistency_models.html">Latent Consistency Models</a></li>
+
+<li class="toctree-l1"><a class="reference internal" href="LLM_grounded_Diffusion.html">LLM Grounded Diffusion</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DiT.html">DiT</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="one-step-image-translation.html">One-Step Image Translation with Text-to-Image Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="LCM-LoRA.html">LCM-LoRA: A Universal Stable-Diffusion Acceleration Module</a></li>
+
+
+<li class="toctree-l1"><a class="reference internal" href="MimicBrush.html">MimicBrush: Zero-shot Image Editing with Reference Imitation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="one_step_diffusion_with_distribution_matching_distillation.html">One-step Diffusion with Distribution Matching Distillation</a></li>
+
+
+
+
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Video Generation</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="Make_A_Video.html">Make A Video</a></li>
+<li class="toctree-l1"><a class="reference internal" href="VideoLDM.html">VideoLDM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="AnimateDiff.html">AnimateDiff</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Animate_Anyone.html">Animate Anyone</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreaMoving.html">DreaMoving</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreamPose.html">DreamPose: Fashion Image-to-Video Synthesis via Stable Diffusion</a></li>
+
+
+
+
+
+
+
+
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">3D Generation</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="NeRF.html">NeRF : Representing Scenes as Neural Radiance Fields for View Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="3DGS.html">3D Gaussian Splatting for Real-Time Radiance Field Rendering</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Point_E.html">Point-E: A System for Generating 3D Point Clouds from Complex Prompts (Arxiv 2022)</a></li>
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Shap-E.html">Shap-E</a></li>
+
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DreamFusion.html"><strong>DreamFusion</strong></a></li>
+<li class="toctree-l1"><a class="reference internal" href="magic-3d.html">Magic3D</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreamBooth3D.html">Dream Booth 3D</a></li>
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="zero123plus.html">Zero123++</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ProlificDreamer.html">ProlificDreamer: High-Fidelity and Diverse Text-to-3D Generation with Variational Score Distillation</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DreamGaussian.html">DreamGaussian</a></li>
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Coin3D.html">Coin3D</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Experiments</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="../experiments/js_exp.html">Synthetic Data with Stable Diffusion for Foliar Disease Classification</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../experiments/swjo_exp.html">Training DreamBooth on Naver Webtoon Face Dataset</a></li>
+</ul>
+
+    </div>
+</nav></div>
+    </div>
+  
+  
+  <div class="sidebar-primary-items__end sidebar-primary__section">
+  </div>
+  
+  <div id="rtd-footer-container"></div>
+
+
+      </div>
+      
+      <main id="main-content" class="bd-main" role="main">
+        
+        
+
+<div class="sbt-scroll-pixel-helper"></div>
+
+          <div class="bd-content">
+            <div class="bd-article-container">
+              
+              <div class="bd-header-article d-print-none">
+<div class="header-article-items header-article__inner">
+  
+    <div class="header-article-items__start">
+      
+        <div class="header-article-item"><label class="sidebar-toggle primary-toggle btn btn-sm" for="__primary" title="Toggle primary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
+  <span class="fa-solid fa-bars"></span>
+</label></div>
+      
+    </div>
+  
+  
+    <div class="header-article-items__end">
+      
+        <div class="header-article-item">
+
+<div class="article-header-buttons">
+
+
+
+
+
+<div class="dropdown dropdown-source-buttons">
+  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Source repositories">
+    <i class="fab fa-github"></i>
+  </button>
+  <ul class="dropdown-menu">
+      
+      
+      
+      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion" target="_blank"
+   class="btn btn-sm btn-source-repository-button dropdown-item"
+   title="Source repository"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fab fa-github"></i>
+  </span>
+<span class="btn__text-container">Repository</span>
+</a>
+</li>
+      
+      
+      
+      
+      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion/issues/new?title=Issue%20on%20page%20%2Fdocs/review/CustomDiffusion.html&body=Your%20issue%20content%20here." target="_blank"
+   class="btn btn-sm btn-source-issues-button dropdown-item"
+   title="Open an issue"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-lightbulb"></i>
+  </span>
+<span class="btn__text-container">Open issue</span>
+</a>
+</li>
+      
+  </ul>
+</div>
+
+
+
+
+
+
+<div class="dropdown dropdown-download-buttons">
+  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Download this page">
+    <i class="fas fa-download"></i>
+  </button>
+  <ul class="dropdown-menu">
+      
+      
+      
+      <li><a href="../../_sources/docs/review/CustomDiffusion.md" target="_blank"
+   class="btn btn-sm btn-download-source-button dropdown-item"
+   title="Download source file"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-file"></i>
+  </span>
+<span class="btn__text-container">.md</span>
+</a>
+</li>
+      
+      
+      
+      
+      <li>
+<button onclick="window.print()"
+  class="btn btn-sm btn-download-pdf-button dropdown-item"
+  title="Print to PDF"
+  data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-file-pdf"></i>
+  </span>
+<span class="btn__text-container">.pdf</span>
+</button>
+</li>
+      
+  </ul>
+</div>
+
+
+
+
+<button onclick="toggleFullScreen()"
+  class="btn btn-sm btn-fullscreen-button"
+  title="Fullscreen mode"
+  data-bs-placement="bottom" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-expand"></i>
+  </span>
+
+</button>
+
+
+
+<script>
+document.write(`
+  <button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light"></i>
+    <i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark"></i>
+    <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"></i>
+  </button>
+`);
+</script>
+
+
+<script>
+document.write(`
+  <button class="btn btn-sm pst-navbar-icon search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <i class="fa-solid fa-magnifying-glass fa-lg"></i>
+  </button>
+`);
+</script>
+<label class="sidebar-toggle secondary-toggle btn btn-sm" for="__secondary"title="Toggle secondary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <span class="fa-solid fa-list"></span>
+</label>
+</div></div>
+      
+    </div>
+  
+</div>
+</div>
+              
+              
+
+<div id="jb-print-docs-body" class="onlyprint">
+    <h1>Custom Diffusion</h1>
+    <!-- Table of contents -->
+    <div id="print-main-content">
+        <div id="jb-print-toc">
+            
+            <div>
+                <h2> Contents </h2>
+            </div>
+            <nav aria-label="Page">
+                <ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#abstract">Abstract</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">1. Introduction</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#related-work">2. Related Work</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#deep-generative-models-image-and-model-editing">Deep Generative Models &amp; Image and model editing</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#transfer-learning">Transfer learning</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#adapting-text-to-image-models">Adapting text-to-image models</a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#method">3. Method</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#single-concept-fine-tuning">Single Concept Fine-tuning</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#multiple-concept-compositional-fine-tuning">Multiple-Concept Compositional Fine-tuning</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#training-details">Training Details</a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#experiments">4. Experiments</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#discussion-limitation">5. Discussion &amp; Limitation</a></li>
+</ul>
+            </nav>
+        </div>
+    </div>
+</div>
+
+              
+                
+<div id="searchbox"></div>
+                <article class="bd-article">
+                  
+  <div class="admonition-information admonition">
+<p class="admonition-title">Information</p>
+<ul class="simple">
+<li><p><strong>Title:</strong> A Multi-Concept Customiziation of Text-To-Image Diffusion (CVPR 2023)</p></li>
+<li><p><strong>Reference</strong></p>
+<ul>
+<li><p>Paper:  <a class="reference external" href="https://arxiv.org/abs/2212.04488">https://arxiv.org/abs/2212.04488</a></p></li>
+<li><p>Code: <a class="reference external" href="https://github.com/adobe-research/custom-diffusion">Official:</a></p></li>
+</ul>
+</li>
+<li><p><strong>Author:</strong> Seunghwan Ji</p></li>
+<li><p><strong>Last updated on Aug. 6, 2023</strong></p></li>
+</ul>
+</div>
+<section class="tex2jax_ignore mathjax_ignore" id="custom-diffusion">
+<h1>Custom Diffusion<a class="headerlink" href="#custom-diffusion" title="Permalink to this heading">#</a></h1>
+<section id="abstract">
+<h2>Abstract<a class="headerlink" href="#abstract" title="Permalink to this heading">#</a></h2>
+<ul class="simple">
+<li><p>Large Scale Data를 학습한 Generate 모델이 뛰어난 성능을 보이는 추세</p></li>
+<li><p>User의 Private한 Concept을 생성하고자하는 욕구는 여전히 풀지 못함</p></li>
+<li><p>Custom Diffusion은?</p>
+<ol class="arabic simple">
+<li><p>기존 Diffusion 모델의 partial한 부분만을 학습시킴으로써 기존보다 더 빠른 finetuning 방식을 제안</p></li>
+<li><p>Single Concept 뿐 아니라, Multiple Concept에 대한 학습이 가능</p></li>
+<li><p>다양한 Fine tuned 모델을 하나의 모델로 Compress하는 방식을 제안</p></li>
+</ol>
+</li>
+</ul>
+</section>
+<section id="introduction">
+<h2>1. Introduction<a class="headerlink" href="#introduction" title="Permalink to this heading">#</a></h2>
+<ul class="simple">
+<li><p>최근 Text-To-Image 모델들이 활발하게 연구 되어짐</p></li>
+<li><p>단순한 text prompt 입력만으로 원하는 이미지를 생성해내는 수준까지 이름</p></li>
+<li><p>하지만 이러한 모델들은 General한 이미지는 잘 생성하지만, User가 원하는 Private한 (=specific) Concept의 이미지는 생성해내지 못함</p>
+<ul>
+<li><p>e.g. 행복한 우리 가족 사진, 우리집 강아지 뽀삐가 파리로 여행을 떠나는 사진 등</p></li>
+</ul>
+</li>
+<li><p>학습 과정중에 User의 Private한 데이터를 보지 못했기때문에 Model에게는 당연한 결과</p></li>
+<li><p><strong>Customization</strong></p>
+<ul>
+<li><p>몇장의 Concept을 포함하는 이미지만으로 Pretrained 모델을 finetuning하는 방식</p>
+<ul>
+<li><p>In Dreambooth, Personalization</p></li>
+</ul>
+</li>
+<li><p>목표</p>
+<ol class="arabic simple">
+<li><p>학습하고자하는 Private한 Concept의 이미지를 잘 생성해내야함</p></li>
+<li><p>기존에 학습되었던 General한 이미지를 Finetuning한 후에도 잘 생성해내야함</p></li>
+</ol>
+</li>
+</ul>
+</li>
+<li><p>Customization이 어려운 이유</p>
+<ol class="arabic simple">
+<li><p>학습을 진행하다보면 기존에 학습했던 Concept을 잊어버리거나 왜곡해버림 → Language Draft</p></li>
+<li><p>새로운 Concept에 대해 모델이 Overfit 되어서 결과물의 Variation이 낮아짐</p></li>
+<li><p>좀더 나아가 Single Concept 뿐 아니라 Multiple Concept에 대한 Finetuning 또한 어려움</p></li>
+</ol>
+</li>
+<li><p>Custom Diffusion은?</p>
+<ol class="arabic simple">
+<li><p>Text로 Condition을 생성해내는 과정 중 특정 부분만을 학습</p></li>
+<li><p>General Concept의 성능 유지를 위해 real image와 해당 이미지의 caption을 regularization Data로 사용</p></li>
+<li><p>fine tuning동안 새로운 augmentation 기법을 소개</p></li>
+<li><p>Multiple concept의 학습 방식을 제안</p></li>
+</ol>
+</li>
+</ul>
+</section>
+<section id="related-work">
+<h2>2. Related Work<a class="headerlink" href="#related-work" title="Permalink to this heading">#</a></h2>
+<section id="deep-generative-models-image-and-model-editing">
+<h3>Deep Generative Models &amp; Image and model editing<a class="headerlink" href="#deep-generative-models-image-and-model-editing" title="Permalink to this heading">#</a></h3>
+<ul class="simple">
+<li><p>GAN, VAE, Diffusion 등 다양한 방식의 Generative Model들이 각각 좋은 성능을 보여주고있음</p></li>
+<li><p>게다가 추가적인 input(=hint)를 통해 Generated 이미지의 control도 가능함</p></li>
+<li><p>하지만 General하지 않은 새로운 Concept에 대한 생성은 불가능함</p></li>
+<li><p><strong>Custom Diffusion은 이러한 New Concept에 대한 Finetuning 기법을 제안</strong></p></li>
+</ul>
+</section>
+<section id="transfer-learning">
+<h3>Transfer learning<a class="headerlink" href="#transfer-learning" title="Permalink to this heading">#</a></h3>
+<ul>
+<li><p>Global한 이미지의 Distribution을 이미 학습한 모델에 특정 concept을 포함한 소량의 이미지를 finetuning하는 기법</p></li>
+<li><p>Transfer Learning은 생각보다 효과적이고 유용함</p></li>
+<li><p>대부분 transfer learning 시에는 모델의 전체를 학습하거나 혹은 Parameter를 더 추가해 재학습</p>
+<p>→ 위에서 제시한 Customization의 문제를 일으키기 쉬움 (Language Draft, Overfitting etc.)</p>
+</li>
+<li><p><strong>Custom Diffusion은 모델의 아주 일부만을 대상으로 finetuning</strong></p></li>
+</ul>
+</section>
+<section id="adapting-text-to-image-models">
+<h3>Adapting text-to-image models<a class="headerlink" href="#adapting-text-to-image-models" title="Permalink to this heading">#</a></h3>
+<ul class="simple">
+<li><p>비슷한 컨셉으로 Finetuning을 통한 Personalization 연구들이 있음</p>
+<ul>
+<li><p>Dreambooth, Textual Inversion</p></li>
+</ul>
+</li>
+<li><p>vs Custom Diffusion</p>
+<ol class="arabic simple">
+<li><p>Multiple Concept의 Finetuning 모델들을 하나의 모델로 Compress할 수 있음</p></li>
+<li><p>모델의 특정 부분만을 Finetuning함으로써 다른 모델에 비해 Training Resourse를 절약할 수 있음</p></li>
+</ol>
+</li>
+</ul>
+</section>
+</section>
+<section id="method">
+<h2>3. Method<a class="headerlink" href="#method" title="Permalink to this heading">#</a></h2>
+<section id="single-concept-fine-tuning">
+<h3>Single Concept Fine-tuning<a class="headerlink" href="#single-concept-fine-tuning" title="Permalink to this heading">#</a></h3>
+<ul>
+<li><p>Backbone으로 Latent Diffusion Model을 채택</p></li>
+<li><p>(L)DM의 학습 Concept</p>
+<figure class="align-default" id="id1">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img01.png"><img alt="CD_00" class="bg-primary mb-1" src="../../_images/img01.png" style="width: 350px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 165 </span><span class="caption-text">Equation 0</span><a class="headerlink" href="#id1" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p><span class="math notranslate nohighlight">\(x_{t}\)</span> : time t 시점에 Noise가 섞인 이미지</p></li>
+<li><p><span class="math notranslate nohighlight">\(t\)</span> → timestep</p></li>
+<li><p><span class="math notranslate nohighlight">\(c\)</span> → conditioning feature (text, image 등)</p>
+<ul>
+<li><p>text나 image를 바로 사용하지않고 latent space로 embedding된 값을 사용 <em>(using CLIP)</em></p></li>
+</ul>
+</li>
+<li><p>ε → noise</p></li>
+<li><p><span class="math notranslate nohighlight">\(ε_{θ}\)</span> → <span class="math notranslate nohighlight">\(x_{t}\)</span>에 낀 noise ε를 예측해내는 모델</p></li>
+<li><p>즉, <span class="math notranslate nohighlight">\(x_{t}\)</span>에 낀 noise ε를  예측해내는 모델을 학습</p></li>
+</ul>
+</li>
+<li><p>이러한 LDM 모델을 fine tuning할때는 Model의 모든 Layer에대해 update하는게 기본</p></li>
+<li><p>하지만 이러한 finetuning 방식은 Resource가 비효율적으로 많이들고, 새로운 Concept 이미지에 overfitting되기 쉬움</p></li>
+<li><p>Finetuning 과정 중 모델의 Weight 변화량을 체크</p>
+<figure class="align-default" id="id2">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img11.png"><img alt="CD_01" class="bg-primary mb-1" src="../../_images/img11.png" style="width: 500px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 166 </span><span class="caption-text">Delta of Weight while Training</span><a class="headerlink" href="#id2" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</li>
+<li><p>다른 부분에비해 Cross Attention 연산의 Wegith 변화량이 가장 큼</p></li>
+<li><p>Cross Attention</p></li>
+</ul>
+<figure class="align-default" id="id3">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img21.png"><img alt="CD_02" class="bg-primary mb-1" src="../../_images/img21.png" style="width: 500px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 167 </span><span class="caption-text">Fig.4 Cross Attention</span><a class="headerlink" href="#id3" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul>
+<li><p>Cross Attention → Image latent에 text condition을 주입하는 Attention Mechanism</p>
+<ul class="simple">
+<li><p><em>Query</em> → image latent / <em>Key, Value</em> → text condition latent</p></li>
+<li><p>모델 전체 Parameter에 단 5%부분만을 차지</p></li>
+<li><p>이 중 new concept을 의미하는 Text <span class="math notranslate nohighlight">\(V^{*}\)</span>이 포함되는 <span class="math notranslate nohighlight">\(W^{k}\)</span>와 <span class="math notranslate nohighlight">\(W^{v}\)</span>만 학습. 나머지는 Freeze</p></li>
+</ul>
+</li>
+<li><p>Fine Tuning할 때 <span class="math notranslate nohighlight">\(V^{*}\)</span>은 실제로는 잘 쓰지않는 단어로 사용하고 “<em>A [<span class="math notranslate nohighlight">\(V^{*}\)</span>] [Class]”</em> 형식으로 이미지를 Captioning한 후에 학습</p></li>
+<li><p>또 Finetuning중에 일반적인 concept을 잊어버리는 Language Draft 현상이 있을수있음</p>
+<ul class="simple">
+<li><p>Language Draft</p></li>
+</ul>
+<figure class="align-default" id="id4">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img31.png"><img alt="CD_03" class="bg-primary mb-1" src="../../_images/img31.png" style="width: 650px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 168 </span><span class="caption-text">Fine tuning 후에 Photo of a moon 이미지를 생성하면 Finetuning했던 Moongate 이미지를 생성해버림</span><a class="headerlink" href="#id4" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</li>
+</ul>
+<p>Fine tuning 후에 Photo of a moon 이미지를 생성하면 Finetuning했던 Moongate 이미지를 생성해버림</p>
+<ul class="simple">
+<li><p>이러한 현상을 방지하기위해 Real world의 Image에서 target text class prompt와 유사한 200장의 이미지를 Regulalization 이미지로 같이 학습</p>
+<ul>
+<li><p>text prompt가 유사하다 = CLIP에서 추출한 text feature space상의 Vector가 Similar하다</p></li>
+</ul>
+</li>
+</ul>
+</section>
+<section id="multiple-concept-compositional-fine-tuning">
+<h3>Multiple-Concept Compositional Fine-tuning<a class="headerlink" href="#multiple-concept-compositional-fine-tuning" title="Permalink to this heading">#</a></h3>
+<ul>
+<li><p>Joint Traning on multiple concept</p>
+<ul class="simple">
+<li><p>각각의 Concept을 갖는 이미지에 대해 각각 rare한 key를 부여해 동시에 학습</p>
+<ul>
+<li><p>(<span class="math notranslate nohighlight">\(V^{i}\)</span><em>, for <span class="math notranslate nohighlight">\(i\)</span> is # of concepts</em>)</p></li>
+</ul>
+</li>
+</ul>
+</li>
+<li><p>Constrained optimization to merge concepts</p>
+<ul class="simple">
+<li><p>각각 Single Concept으로 학습된 weight를 merge</p></li>
+</ul>
+<figure class="align-default" id="id5">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img41.png"><img alt="CD_04" class="bg-primary mb-1" src="../../_images/img41.png" style="width: 450px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 169 </span><span class="caption-text">Equation 4</span><a class="headerlink" href="#id5" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p><span class="math notranslate nohighlight">\(W_0\)</span> → pretrained model의 Key, Value embedding Weight</p>
+<ul>
+<li><p>~~<em>(Appendix A에는 <span class="math notranslate nohighlight">\(W\)</span>라고 나와있는데 오탈자일 가능성 있음)</em>~~</p></li>
+</ul>
+</li>
+<li><p><span class="math notranslate nohighlight">\(C_{reg}\)</span> → regularization 이미지의 Caption의 Embedding 값을 모두 뽑아 Concat</p></li>
+<li><p>⇒ <span class="math notranslate nohighlight">\(C_{reg}\)</span>에 Pretrained Weight를 곱한 값과의 norm을 계산했을때 값이 가장 작은 Weight를 return</p>
+<ul>
+<li><p>“N개의 Concept에 대해 Cross Attention이 모두 잘 동작하는 W 값을 찾아 하나만 사용하자”</p></li>
+</ul>
+</li>
+</ul>
+</li>
+</ul>
+</section>
+<section id="training-details">
+<h3>Training Details<a class="headerlink" href="#training-details" title="Permalink to this heading">#</a></h3>
+<ul class="simple">
+<li><p>single concept의 경우 250 steps, two-concept의 경우 500 steps</p></li>
+<li><p>batch : 8, learning rate : <span class="math notranslate nohighlight">\(8*10^{-5}\)</span></p></li>
+<li><p>random resize + prompt 추가 (very small, far away, zoom in …) (new augmentation technique)</p></li>
+</ul>
+</section>
+</section>
+<section id="experiments">
+<h2>4. Experiments<a class="headerlink" href="#experiments" title="Permalink to this heading">#</a></h2>
+<p>Single Concept Finetuning</p>
+<ul class="simple">
+<li><p>Qualitative Evaluation</p></li>
+</ul>
+<figure class="align-default" id="id6">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img51.png"><img alt="CD_05" class="bg-primary mb-1" src="../../_images/img51.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 170 </span><span class="caption-text">Qualitative Evaluation</span><a class="headerlink" href="#id6" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p>Quantative Evaluation (Text Alignment, Image Alignment, KID)</p>
+<ul>
+<li><p>text alignment : prompt에 얼마나 대응되는 이미지를 생성해냈는가</p></li>
+<li><p>image alignment : training image의 concept을 얼마나 잘 표현해냈는가</p></li>
+</ul>
+</li>
+</ul>
+<figure class="align-default" id="id7">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img61.png"><img alt="CD_06" class="bg-primary mb-1" src="../../_images/img61.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 171 </span><span class="caption-text">Table 1</span><a class="headerlink" href="#id7" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>⇒ 정성적, 정량적 평가 모두 Custom Diffusion &gt; Dreambooth, Textual Inversion</p>
+<p>Multiple Concept Finetuning</p>
+<figure class="align-default" id="id8">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img71.png"><img alt="CD_07" class="bg-primary mb-1" src="../../_images/img71.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 172 </span><span class="caption-text">Multiple Concept Finetuning</span><a class="headerlink" href="#id8" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p>Joint Training &gt; Optimization by custom diffusion &gt; Dreambooth</p></li>
+</ul>
+<p>Human Preference Study</p>
+<figure class="align-default" id="id9">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img81.png"><img alt="CD_08" class="bg-primary mb-1" src="../../_images/img81.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 173 </span><span class="caption-text">Table 2</span><a class="headerlink" href="#id9" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p>Custom Diffusion (partial) vs Baseline(Textual Inversion, Dreambooth, CustomDiffusion(all))</p></li>
+<li><p>Text-Alignment, Image-Alignment 모두 Custom Diffusion (partial)을 선호</p></li>
+<li><p>Textual Inversion은 Image Alignment는 Custom Diffusion 선호도와 비슷하지만 Text Alignment수치를 보면 Custom Diffusion이 매우 높아 Overfitting된 경향이 있음</p></li>
+</ul>
+<p>Ablation Study</p>
+<ol class="arabic">
+<li><p>Regularization Image</p>
+<figure class="align-default" id="id10">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img9.png"><img alt="CD_09" class="bg-primary mb-1" src="../../_images/img9.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 174 </span><span class="caption-text">Table 3</span><a class="headerlink" href="#id10" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</li>
+</ol>
+<ul class="simple">
+<li><p>ㅌGen : real image 대신 generate된 이미지를 regularization 이미지로 사용</p></li>
+<li><p>Overfitting 없이 가장 좋은 수치는 Augmentation + Regulatization image as Real world Image</p></li>
+</ul>
+</section>
+<section id="discussion-limitation">
+<h2>5. Discussion &amp; Limitation<a class="headerlink" href="#discussion-limitation" title="Permalink to this heading">#</a></h2>
+<ul class="simple">
+<li><p>customizing이 가능하고 training resourse가 매우 적은 finetuning 기법 소개</p></li>
+</ul>
+<figure class="align-default" id="id11">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img10.png"><img alt="CD_10" class="bg-primary mb-1" src="../../_images/img10.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 175 </span><span class="caption-text">Limitation Of Custom Diffusion</span><a class="headerlink" href="#id11" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p>비슷한 category의 object에 대해서는 joint training, merge 모두 잘 동작하지 않음</p></li>
+</ul>
+</section>
+</section>
+
+    <script type="text/x-thebe-config">
+    {
+        requestKernel: true,
+        binderOptions: {
+            repo: "binder-examples/jupyter-stacks-datascience",
+            ref: "master",
+        },
+        codeMirrorConfig: {
+            theme: "abcdef",
+            mode: "python"
+        },
+        kernelOptions: {
+            name: "python3",
+            path: "./docs\review"
+        },
+        predefinedOutput: true
+    }
+    </script>
+    <script>kernelName = 'python3'</script>
+
+                </article>
+              
+
+              
+              
+              
+              
+                <footer class="prev-next-footer d-print-none">
+                  
+<div class="prev-next-area">
+    <a class="left-prev"
+       href="Textual_Inversion.html"
+       title="previous page">
+      <i class="fa-solid fa-angle-left"></i>
+      <div class="prev-next-info">
+        <p class="prev-next-subtitle">previous</p>
+        <p class="prev-next-title">Textual Inversion</p>
+      </div>
+    </a>
+    <a class="right-next"
+       href="LoRA.html"
+       title="next page">
+      <div class="prev-next-info">
+        <p class="prev-next-subtitle">next</p>
+        <p class="prev-next-title">LoRA</p>
+      </div>
+      <i class="fa-solid fa-angle-right"></i>
+    </a>
+</div>
+                </footer>
+              
+            </div>
+            
+            
+              
+                <div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
+
+
+  <div class="sidebar-secondary-item">
+  <div class="page-toc tocsection onthispage">
+    <i class="fa-solid fa-list"></i> Contents
+  </div>
+  <nav class="bd-toc-nav page-toc">
+    <ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#abstract">Abstract</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">1. Introduction</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#related-work">2. Related Work</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#deep-generative-models-image-and-model-editing">Deep Generative Models &amp; Image and model editing</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#transfer-learning">Transfer learning</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#adapting-text-to-image-models">Adapting text-to-image models</a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#method">3. Method</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#single-concept-fine-tuning">Single Concept Fine-tuning</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#multiple-concept-compositional-fine-tuning">Multiple-Concept Compositional Fine-tuning</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#training-details">Training Details</a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#experiments">4. Experiments</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#discussion-limitation">5. Discussion &amp; Limitation</a></li>
+</ul>
+  </nav></div>
+
+</div></div>
+              
+            
+          </div>
+          <footer class="bd-footer-content">
+            
+<div class="bd-footer-content__inner container">
+  
+  <div class="footer-item">
+    
+<p class="component-author">
+By PseudoLab
+</p>
+
+  </div>
+  
+  <div class="footer-item">
+    
+
+  <p class="copyright">
+    
+      © Copyright 2022.
+      <br/>
+    
+  </p>
+
+  </div>
+  
+  <div class="footer-item">
+    
+  </div>
+  
+  <div class="footer-item">
+    
+  </div>
+  
+</div>
+          </footer>
+        
+
+      </main>
+    </div>
+  </div>
+  
+  <!-- Scripts loaded after <body> so the DOM is not blocked -->
+  <script src="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b"></script>
+<script src="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b"></script>
+
+  <footer class="bd-footer">
+  </footer>
+  </body>
 </html>
\ No newline at end of file
diff --git a/docs/review/DALLE2.html b/docs/review/DALLE2.html
old mode 100644
new mode 100755
index a177a566..dda2f769
--- a/docs/review/DALLE2.html
+++ b/docs/review/DALLE2.html
@@ -1,1287 +1,1307 @@
-
-
-<!DOCTYPE html>
-
-
-<html lang="en" data-content_root="" >
-
-  <head>
-    <meta charset="utf-8" />
-    <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.18.1: http://docutils.sourceforge.net/" />
-
-    <title>DALL-E 2 &#8212; Text-to-Image Generation-feat-Diffusion</title>
-  
-  
-  
-  <script data-cfasync="false">
-    document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
-    document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
-  </script>
-  
-  <!-- Loaded before other Sphinx assets -->
-  <link href="../../_static/styles/theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-<link href="../../_static/styles/bootstrap.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-<link href="../../_static/styles/pydata-sphinx-theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-
-  
-  <link href="../../_static/vendor/fontawesome/6.5.2/css/all.min.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-  <link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.woff2" />
-<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.woff2" />
-<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.woff2" />
-
-    <link rel="stylesheet" type="text/css" href="../../_static/pygments.css" />
-    <link rel="stylesheet" href="../../_static/styles/sphinx-book-theme.css?digest=14f4ca6b54d191a8c7657f6c759bf11a5fb86285" type="text/css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/togglebutton.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/mystnb.4510f1fc1dee50b3e5859aac5469c37c29e427902b24a333a5f9fcb2f0b3ac41.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/sphinx-thebe.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/design-style.4045f2051d55cab465a707391d5b2007.min.css" />
-  
-  <!-- Pre-loaded scripts that we'll load fully later -->
-  <link rel="preload" as="script" href="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b" />
-<link rel="preload" as="script" href="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b" />
-  <script src="../../_static/vendor/fontawesome/6.5.2/js/all.min.js?digest=dfe6caa3a7d634c4db9b"></script>
-
-    <script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
-    <script src="../../_static/jquery.js"></script>
-    <script src="../../_static/underscore.js"></script>
-    <script src="../../_static/_sphinx_javascript_frameworks_compat.js"></script>
-    <script src="../../_static/doctools.js"></script>
-    <script src="../../_static/clipboard.min.js"></script>
-    <script src="../../_static/copybutton.js"></script>
-    <script src="../../_static/scripts/sphinx-book-theme.js?digest=5a5c038af52cf7bc1a1ec88eea08e6366ee68824"></script>
-    <script>let toggleHintShow = 'Click to show';</script>
-    <script>let toggleHintHide = 'Click to hide';</script>
-    <script>let toggleOpenOnPrint = 'true';</script>
-    <script src="../../_static/togglebutton.js"></script>
-    <script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
-    <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
-    <script src="../../_static/design-tabs.js"></script>
-    <script>const THEBE_JS_URL = "https://unpkg.com/thebe@0.8.2/lib/index.js"
-const thebe_selector = ".thebe,.cell"
-const thebe_selector_input = "pre"
-const thebe_selector_output = ".output, .cell_output"
-</script>
-    <script async="async" src="../../_static/sphinx-thebe.js"></script>
-    <script>window.MathJax = {"options": {"processHtmlClass": "tex2jax_process|mathjax_process|math|output_area"}}</script>
-    <script defer="defer" src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
-    <script>DOCUMENTATION_OPTIONS.pagename = 'docs/review/DALLE2';</script>
-    <link rel="shortcut icon" href="../../_static/PseudoLab_logo.png"/>
-    <link rel="index" title="Index" href="../../genindex.html" />
-    <link rel="search" title="Search" href="../../search.html" />
-    <link rel="next" title="DreamBooth" href="dreambooth.html" />
-    <link rel="prev" title="DALL-E" href="dalle.html" />
-  <meta name="viewport" content="width=device-width, initial-scale=1"/>
-  <meta name="docsearch:language" content="en"/>
-  </head>
-  
-  
-  <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
-
-  
-  
-  <div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
-  
-  <div id="pst-scroll-pixel-helper"></div>
-  
-  <button type="button" class="btn rounded-pill" id="pst-back-to-top">
-    <i class="fa-solid fa-arrow-up"></i>Back to top</button>
-
-  
-  <input type="checkbox"
-          class="sidebar-toggle"
-          id="pst-primary-sidebar-checkbox"/>
-  <label class="overlay overlay-primary" for="pst-primary-sidebar-checkbox"></label>
-  
-  <input type="checkbox"
-          class="sidebar-toggle"
-          id="pst-secondary-sidebar-checkbox"/>
-  <label class="overlay overlay-secondary" for="pst-secondary-sidebar-checkbox"></label>
-  
-  <div class="search-button__wrapper">
-    <div class="search-button__overlay"></div>
-    <div class="search-button__search-container">
-<form class="bd-search d-flex align-items-center"
-      action="../../search.html"
-      method="get">
-  <i class="fa-solid fa-magnifying-glass"></i>
-  <input type="search"
-         class="form-control"
-         name="q"
-         id="search-input"
-         placeholder="Search this book..."
-         aria-label="Search this book..."
-         autocomplete="off"
-         autocorrect="off"
-         autocapitalize="off"
-         spellcheck="false"/>
-  <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
-</form></div>
-  </div>
-
-  <div class="pst-async-banner-revealer d-none">
-  <aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
-</div>
-
-  
-    <header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
-    </header>
-  
-
-  <div class="bd-container">
-    <div class="bd-container__inner bd-page-width">
-      
-      
-      
-      <div class="bd-sidebar-primary bd-sidebar">
-        
-
-  
-  <div class="sidebar-header-items sidebar-primary__section">
-    
-    
-    
-    
-  </div>
-  
-    <div class="sidebar-primary-items__start sidebar-primary__section">
-        <div class="sidebar-primary-item">
-
-  
-    
-  
-
-<a class="navbar-brand logo" href="../../intro.html">
-  
-  
-  
-  
-  
-    
-    
-      
-    
-    
-    <img src="../../_static/PseudoLab_logo.png" class="logo__image only-light" alt="Text-to-Image Generation-feat-Diffusion - Home"/>
-    <script>document.write(`<img src="../../_static/PseudoLab_logo.png" class="logo__image only-dark" alt="Text-to-Image Generation-feat-Diffusion - Home"/>`);</script>
-  
-  
-</a></div>
-        <div class="sidebar-primary-item"><nav class="bd-links" id="bd-docs-nav" aria-label="Main">
-    <div class="bd-toc-item navbar-nav active">
-        
-        <ul class="nav bd-sidenav bd-sidenav__home-link">
-            <li class="toctree-l1">
-                <a class="reference internal" href="../../intro.html">
-                    Welcome to PseudoDiffusers!!
-                </a>
-            </li>
-        </ul>
-        <p aria-level="2" class="caption" role="heading"><span class="caption-text">Preliminary Works</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="vae.html">VAE</a></li>
-<li class="toctree-l1"><a class="reference internal" href="gan.html">GAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DDPM.html">DDPM</a></li>
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="DDIM.html">DDIM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="A_Study_on_the_Evaluation_of_Generative_Models.html">A Study on the Evaluation of Generative Models</a></li>
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Image Generation</span></p>
-<ul class="current nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="cycleGAN.html">CycleGAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="StyleGAN.html">StyleGAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="diffusion_beats_GANs.html">Diffusion Models Beat GANs on Image Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="dalle.html">DALL-E</a></li>
-<li class="toctree-l1 current active"><a class="current reference internal" href="#">DALL-E 2</a></li>
-<li class="toctree-l1"><a class="reference internal" href="dreambooth.html">DreamBooth</a></li>
-<li class="toctree-l1"><a class="reference internal" href="ControlNet.html">ControlNet</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Latent_Diffusion_Model.html">Introduction</a></li>
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="Textual_Inversion.html">Textual Inversion</a></li>
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="CustomDiffusion.html">Custom Diffusion</a></li>
-<li class="toctree-l1"><a class="reference internal" href="LoRA.html">LoRA</a></li>
-
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="I-DDPM.html">I-DDPM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="StyO.html">StyO</a></li>
-<li class="toctree-l1"><a class="reference internal" href="imagen.html">Imagen</a></li>
-<li class="toctree-l1"><a class="reference internal" href="imagen_editor.html">Imagen Editor</a></li>
-<li class="toctree-l1"><a class="reference internal" href="SDEdit.html">SDEdit</a></li>
-<li class="toctree-l1"><a class="reference internal" href="SDXL.html">SDXL</a></li>
-<li class="toctree-l1"><a class="reference internal" href="t2i_adapter.html">T2I-Adapter</a></li>
-<li class="toctree-l1"><a class="reference internal" href="IP_Adapter.html">IP-Adapter</a></li>
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="HyperDreamBooth.html">HyperDreamBooth</a></li>
-<li class="toctree-l1"><a class="reference internal" href="CM3leon.html">CM3leon</a></li>
-
-<li class="toctree-l1"><a class="reference internal" href="Synthetic_Data_from_Diffusion_Models_Improves_ImageNet_Classification.html">Synthetic Data from Diffusion Models Improves ImageNet Classification</a></li>
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="GLIDE.html">GLIDE</a></li>
-<li class="toctree-l1"><a class="reference internal" href="BBDM.html">BBDM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Your_Diffusion_Model_is_Secretly_a_Zero_Shot_Classifier.html">Your Diffusion Model is Secretly a Zero-Shot Classifier</a></li>
-<li class="toctree-l1"><a class="reference internal" href="progressive_distillation.html">Progressive Distillation for Fast Sampling of Diffusion Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="ConceptLab.html">ConceptLab</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Diffusion_models_already_have_a_Semantic_Latent_Space.html">Diffusion Models already have a Semantic Latent Space</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Muse.html">Muse</a></li>
-
-
-<li class="toctree-l1"><a class="reference internal" href="GIGAGAN.html">Scaling up GANs for Text-to-Image Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="consistency_models.html">Consistency Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="latent_consistency_models.html">Latent Consistency Models</a></li>
-
-<li class="toctree-l1"><a class="reference internal" href="LLM_grounded_Diffusion.html">LLM Grounded Diffusion</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DiT.html">DiT</a></li>
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="one-step-image-translation.html">One-Step Image Translation with Text-to-Image Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="LCM-LoRA.html">LCM-LoRA: A Universal Stable-Diffusion Acceleration Module</a></li>
-
-
-<li class="toctree-l1"><a class="reference internal" href="MimicBrush.html">MimicBrush: Zero-shot Image Editing with Reference Imitation</a></li>
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Video Generation</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="Make_A_Video.html">Make A Video</a></li>
-<li class="toctree-l1"><a class="reference internal" href="VideoLDM.html">VideoLDM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="AnimateDiff.html">AnimateDiff</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Animate_Anyone.html">Animate Anyone</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreaMoving.html">DreaMoving</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreamPose.html">DreamPose: Fashion Image-to-Video Synthesis via Stable Diffusion</a></li>
-
-
-
-
-
-
-
-
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">3D Generation</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="NeRF.html">NeRF : Representing Scenes as Neural Radiance Fields for View Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="3DGS.html">3D Gaussian Splatting for Real-Time Radiance Field Rendering</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Point_E.html">Point-E: A System for Generating 3D Point Clouds from Complex Prompts (Arxiv 2022)</a></li>
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="Shap-E.html">Shap-E</a></li>
-
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="DreamFusion.html"><strong>DreamFusion</strong></a></li>
-<li class="toctree-l1"><a class="reference internal" href="magic-3d.html">Magic3D</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreamBooth3D.html">Dream Booth 3D</a></li>
-
-
-
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Experiments</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="../experiments/js_exp.html">Synthetic Data with Stable Diffusion for Foliar Disease Classification</a></li>
-<li class="toctree-l1"><a class="reference internal" href="../experiments/swjo_exp.html">Training DreamBooth on Naver Webtoon Face Dataset</a></li>
-</ul>
-
-    </div>
-</nav></div>
-    </div>
-  
-  
-  <div class="sidebar-primary-items__end sidebar-primary__section">
-  </div>
-  
-  <div id="rtd-footer-container"></div>
-
-
-      </div>
-      
-      <main id="main-content" class="bd-main" role="main">
-        
-        
-
-<div class="sbt-scroll-pixel-helper"></div>
-
-          <div class="bd-content">
-            <div class="bd-article-container">
-              
-              <div class="bd-header-article d-print-none">
-<div class="header-article-items header-article__inner">
-  
-    <div class="header-article-items__start">
-      
-        <div class="header-article-item"><label class="sidebar-toggle primary-toggle btn btn-sm" for="__primary" title="Toggle primary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
-  <span class="fa-solid fa-bars"></span>
-</label></div>
-      
-    </div>
-  
-  
-    <div class="header-article-items__end">
-      
-        <div class="header-article-item">
-
-<div class="article-header-buttons">
-
-
-
-
-
-<div class="dropdown dropdown-source-buttons">
-  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Source repositories">
-    <i class="fab fa-github"></i>
-  </button>
-  <ul class="dropdown-menu">
-      
-      
-      
-      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion" target="_blank"
-   class="btn btn-sm btn-source-repository-button dropdown-item"
-   title="Source repository"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fab fa-github"></i>
-  </span>
-<span class="btn__text-container">Repository</span>
-</a>
-</li>
-      
-      
-      
-      
-      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion/issues/new?title=Issue%20on%20page%20%2Fdocs/review/DALLE2.html&body=Your%20issue%20content%20here." target="_blank"
-   class="btn btn-sm btn-source-issues-button dropdown-item"
-   title="Open an issue"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-lightbulb"></i>
-  </span>
-<span class="btn__text-container">Open issue</span>
-</a>
-</li>
-      
-  </ul>
-</div>
-
-
-
-
-
-
-<div class="dropdown dropdown-download-buttons">
-  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Download this page">
-    <i class="fas fa-download"></i>
-  </button>
-  <ul class="dropdown-menu">
-      
-      
-      
-      <li><a href="../../_sources/docs/review/DALLE2.md" target="_blank"
-   class="btn btn-sm btn-download-source-button dropdown-item"
-   title="Download source file"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-file"></i>
-  </span>
-<span class="btn__text-container">.md</span>
-</a>
-</li>
-      
-      
-      
-      
-      <li>
-<button onclick="window.print()"
-  class="btn btn-sm btn-download-pdf-button dropdown-item"
-  title="Print to PDF"
-  data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-file-pdf"></i>
-  </span>
-<span class="btn__text-container">.pdf</span>
-</button>
-</li>
-      
-  </ul>
-</div>
-
-
-
-
-<button onclick="toggleFullScreen()"
-  class="btn btn-sm btn-fullscreen-button"
-  title="Fullscreen mode"
-  data-bs-placement="bottom" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-expand"></i>
-  </span>
-
-</button>
-
-
-
-<script>
-document.write(`
-  <button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light"></i>
-    <i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark"></i>
-    <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"></i>
-  </button>
-`);
-</script>
-
-
-<script>
-document.write(`
-  <button class="btn btn-sm pst-navbar-icon search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <i class="fa-solid fa-magnifying-glass fa-lg"></i>
-  </button>
-`);
-</script>
-
-</div></div>
-      
-    </div>
-  
-</div>
-</div>
-              
-              
-
-<div id="jb-print-docs-body" class="onlyprint">
-    <h1>DALL-E 2</h1>
-    <!-- Table of contents -->
-    <div id="print-main-content">
-        <div id="jb-print-toc">
-            
-        </div>
-    </div>
-</div>
-
-              
-                
-<div id="searchbox"></div>
-                <article class="bd-article">
-                  
-  <div class="admonition-information admonition">
-<p class="admonition-title">Information</p>
-<ul class="simple">
-<li><p><strong>Title:</strong> Hierarchical Text-Conditional Image Generation with CLIP Latents (arXiv 2022)</p></li>
-<li><p><strong>Reference</strong></p>
-<ul>
-<li><p>Paper: <a class="reference external" href="https://arxiv.org/pdf/2204.06125v1.pdf">https://arxiv.org/pdf/2204.06125v1.pdf</a></p></li>
-</ul>
-</li>
-<li><p><strong>Author:</strong> SeonHoon Kim</p></li>
-<li><p><strong>Last updated on Sep. 18, 2023</strong></p></li>
-</ul>
-</div>
-<section class="tex2jax_ignore mathjax_ignore" id="dall-e-2">
-<h1>DALL-E 2<a class="headerlink" href="#dall-e-2" title="Permalink to this heading">#</a></h1>
-<p>DALLE2 는 2022년에 공개되어 세상을 놀라게 했습니다.<br>
-이미지 생성 능력도 뛰어났고, 이미지를 사용자 입맛에 맞게 조작할 수 있게 되었죠.</p>
-<p>DALLE2 의 이름은 왜 DALL-E 일까요?<br>
-DALLE2 의 DALLE 는 초현실주의 화가 Salvador Dali 와 WALL-E 의 합성어입니다.<br>
-DALLE2 로 생성해낸 결과물이 과연 어떻길래 세상을 놀라게 했을까요?</p>
-<br>
-<ul>
-<li><p><strong>DALL-E 2 결과물</strong></p>
-<figure class="align-default" id="id1">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_011.png"><img alt="img_01" class="bg-primary mb-1" src="../../_images/img_011.png" style="width: 350px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 102 </span><span class="caption-text">Salvador Dali 의 생전 모습</span><a class="headerlink" href="#id1" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<figure class="align-default" id="id2">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_001.png"><img alt="img_00" class="bg-primary mb-1" src="../../_images/img_001.png" style="width: 350px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 103 </span><span class="caption-text">vibrant portrait of Salvador Dali with a robotic half face from DALLE2</span><a class="headerlink" href="#id2" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>위 그림은 DALLE2 가 생성해낸 “vibrant portrait of Salvador Dali with a robotic half face” 이미지입니다.<br>
-실제 Salvador dali 의 모습이 보이네요.<br>
-게다가 Salvador dali 의 초현실주의적 그림체가 반영된 것 같기도 합니다.<br>
-놀라운 이미지입니다.</p>
-<p>아래의 corgi 그림은 어떤가요 ?</p>
-<figure class="align-default" id="id3">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_021.png"><img alt="img_02" class="bg-primary mb-1" src="../../_images/img_021.png" style="width: 350px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 104 </span><span class="caption-text">a corgi’s head depicted as an explosion of a nebula from DALLE2</span><a class="headerlink" href="#id3" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>corgi 의 모습을 성운의 폭발로 묘사해달라고 했을 때 생성된 그림입니다.<br>
-아래의 그림은, 실제 NASA 에서 촬영한 초신성 폭발의 잔해입니다.</p>
-<p>정말 그럴듯하지 않나요?</p>
-<figure class="align-default" id="id4">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_031.png"><img alt="img_03" class="bg-primary mb-1" src="../../_images/img_031.png" style="width: 350px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 105 </span><span class="caption-text">This mosaic image, one of the largest ever taken by NASA’s Hubble Space Telescope of the Crab Nebula, is a six-light-year-wide expanding remnant of a star’s supernova explosion.</span><a class="headerlink" href="#id4" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</li>
-</ul>
-<br>
-<ul class="simple">
-<li><p><strong>학습 목표 및 주의사항</strong></p>
-<ul>
-<li><p>본 포스팅에서는 DALLE2 paper 의 내용을 비선형적으로 살펴봅니다.<br>
-마치 오픈월드 게임처럼 말이죠.<br>
-핵심이 되는 질문들을 던지며, DALLE2 의 아키텍쳐를 파헤쳐 볼 겁니다.</p></li>
-<li><p>본 포스팅은 <a class="reference external" href="https://cdn.openai.com/papers/dall-e-2.pdf">DALL-E 2 paper</a>, <a class="reference external" href="https://openai.com/dall-e-2">OpenAI blog</a>, <a class="reference external" href="https://www.youtube.com/watch?v=F1X4fHzF4mQ&amp;t=360s&amp;ab_channel=AssemblyAI">AssemblyAI Youtube</a>, <a class="reference external" href="https://www.youtube.com/watch?v=gmfI3B6pQTo&amp;t=83s&amp;ab_channel=EdanMeyer">Eden Meyer Youtube</a> 를 참고했습니다.</p></li>
-<li><p>본격적으로 학습하기 전에 알아야할 것은, CLIP 모델입니다.</p>
-<ul>
-<li><p>CLIP 은, 이미지와 text 를 학습한 multi-modal 모델입니다.</p>
-<ul>
-<li><p>The fundamental principles of training CLIP are quite simple:</p>
-<ol class="arabic simple">
-<li><p>First, all images and their associated captions are passed through their respective encoders, mapping all objects into an m-dimensional space.</p></li>
-<li><p>Then, the cosine similarity of each <em>(image, text)</em> pair is computed.</p></li>
-<li><p>The training objective is to simultaneously <strong>maximize the cosine similarity</strong> between N <strong>correct</strong> encoded image/caption pairs and <strong>minimize the cosine similarity</strong> between N - N <strong>incorrect</strong> encoded image/caption pairs.</p></li>
-</ol>
-</li>
-</ul>
-</li>
-</ul>
-</li>
-<li><p>DALL-E 2 는 CLIP 과 Diffusion Model 을 통합시켰습니다. (최초는 x)</p></li>
-<li><p>하지만 CLIP 을 사용하는 것이 정답은 아닙니다.<br>
-DALL-E 2 는 22년 5월, CLIP 을 사용하지 않은 IMAGEN 에게 SOTA 를 내주었습니다.</p></li>
-</ul>
-</li>
-</ul>
-<br>
-<ul>
-<li><p><strong>아키텍쳐 찍먹하기</strong></p>
-<p>특정 이미지 내의 Semantics 와 style 을 모두 포착해낼 수 있는 CLIP 의 이미지 표현 능력을 끌어올리기 위해서,<br>
-저자들은 CLIP 과 Diffusion 모델을 통합한 Two-stage model 을 제안합니다.<br>
-이것이 바로 DALLE2 인데요.<br>
-저자들은 이 모델을 unCLIP 이라고 부릅니다.</p>
-<figure class="align-default" id="id5">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_061.png"><img alt="img_06" class="bg-primary mb-1" src="../../_images/img_061.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 106 </span><span class="caption-text">A high level overview of the architecture.</span><a class="headerlink" href="#id5" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>DALLE2 paper 의 그림은 좀 복잡해보이니,<br>
-Assembly AI 의 Youtube 에서 제공하는 좀 더 단순화된 그림을 살펴볼게요.</p>
-<figure class="align-default" id="id6">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_07.png"><img alt="img_07" class="bg-primary mb-1" src="../../_images/img_07.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 107 </span><span class="caption-text">A high level overview of the architecture from AssemblyAI youtube.</span><a class="headerlink" href="#id6" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p><a class="reference external" href="https://www.youtube.com/watch?v=F1X4fHzF4mQ&amp;t=360s&amp;ab_channel=AssemblyAI">https://www.youtube.com/watch?v=F1X4fHzF4mQ&amp;t=360s&amp;ab_channel=AssemblyAI</a></p>
-<p>Prior 와 Decoder 가 DALLE2 의 핵심이 되는 모델인 것 같네요.</p>
-<ul class="simple">
-<li><p><strong>Prior</strong> : 텍스트 캡션을 받아서, 상응하는 CLIP image embedding 을 생성합니다.</p>
-<ul>
-<li><p>본 논문에서는 Autogregressive prior 와 Diffusion prior 를 비교하는 실험 수행했습니다.</p></li>
-<li><p>Diffusion prior 가 computationally efficient 하고, 고품질 이미지 생성합니다.<br>
-따라서 후반부에는 Diffusion prior 만 사용해서 실험합니다.</p></li>
-</ul>
-</li>
-<li><p><strong>Decoder</strong> : CLIP image embedding 을 받아서, 이미지를 생성합니다.</p>
-<ul>
-<li><p>Diffusion 모델만 사용했습니다.</p></li>
-</ul>
-</li>
-</ul>
-</li>
-</ul>
-<br>
-<ul>
-<li><p><strong>왜 CLIP 이랑 Diffusion 을 사용했을까요?</strong></p>
-<ul>
-<li><p><strong>CLIP</strong></p>
-<ul class="simple">
-<li><p>CLIP 이 images representation 을 학습하는데 에 큰 성공을 거두고 있었습니다.</p></li>
-<li><p>CLIP embeddings 는 image distribution shift 에 robust 했습니다.</p></li>
-<li><p>CLIP embeddings 는 zero-shot capabilities 가 뛰어났습니다.</p></li>
-<li><p>다양한 vision &amp; language tasks 에 fine-tuned 되어 SOTA 를 달성해냈습니다.</p></li>
-</ul>
-</li>
-<li><p><strong>Diffusion</strong></p>
-<ul class="simple">
-<li><p>Diffusion 은 image 와 video generation taks 에서 SOTA 를 갱신하는 중이었죠.</p></li>
-<li><p>non-deterministic 하게 만들 수 있습니다.<br>
-이러한 Decoder 덕분에, CLIP image embedding 과 같은 <br>
-<strong>image representation 에 존재하지 않는 non-essential 한 details</strong> 는 <strong>변주하면서,</strong> <br>
-<strong>image representation 의 semantics 와 style 은 유지</strong>할 수 있죠.</p></li>
-</ul>
-<figure class="align-default" id="id7">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_081.png"><img alt="img_08" class="bg-primary mb-1" src="../../_images/img_081.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 108 </span><span class="caption-text">Variations of an input image by encoding with CLIP and then decoding with a diffusion model.</span><a class="headerlink" href="#id7" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>위 왼쪽의 그림처럼, Salvador dali 의 그림에서 중요한 objects 들은 보존됩니다.<br>
-하지만 그들이 표현되는 방식이나 전체적인 그림의 style 은 조금씩 바뀝니다.<br>
-그럼에도, Salvador dali 특유의 초현실주의적 화풍은 유지되는 것 같네요.<br>
-Diffusion Decoder 덕분에, <strong>Non-essential details</strong> 는<br>
-마치 <strong>변주곡처럼 매번 새롭게 연주</strong>해낼 수 있는겁니다.</p>
-</li>
-</ul>
-</li>
-</ul>
-<br>
-<ul>
-<li><p><strong>아키텍쳐 파헤치기</strong></p>
-<figure class="align-default" id="id8">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_091.png"><img alt="img_09" class="bg-primary mb-1" src="../../_images/img_091.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 109 </span><span class="caption-text">A high level overview of the architecture from AssemblyAI youtube.</span><a class="headerlink" href="#id8" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p><a class="reference external" href="https://www.youtube.com/watch?v=F1X4fHzF4mQ&amp;t=360s&amp;ab_channel=AssemblyAI">https://www.youtube.com/watch?v=F1X4fHzF4mQ&amp;t=360s&amp;ab_channel=AssemblyAI</a>
-<br></p>
-<p>이번에는 DALLE2 의 아키텍쳐를 좀 더 자세히 살펴보죠.</p>
-<ul>
-<li><p><strong>Prior</strong></p>
-<ul class="simple">
-<li><p><strong>input</strong></p>
-<ul>
-<li><p>Caption 그 자체의 embedding vector 입니다.</p></li>
-<li><p><strong>CLIP text embedding</strong> 입니다.</p></li>
-</ul>
-</li>
-<li><p><strong>output</strong></p>
-<ul>
-<li><p><strong>Generated CLIP Image embedding</strong> 입니다.</p></li>
-</ul>
-</li>
-<li><p><strong>설명</strong></p>
-<ul>
-<li><p>사실 Prior 은 CLIP text embedding 만 조건으로 받는 것이 아니라 Caption 자체도 받습니다.<br>
-(물론 embedding vector 로 받겠죠)<br>
-CLIP text embedding 과, 그 Caption 은 서로 1대1 대응되기 때문에,<br>
-Duel-conditioning 이 문제될 것은 없다고 저자들은 변론합니다.</p></li>
-<li><p>샘플 퀄리티를 높이기 위해서 2개의 CLIP image embeddings 를 생성한 후 <br>
-주어진 CLIP text embedding 과 더 높은 dot product 를 갖는 CLIP image embedding 을 사용했다고 합니다.</p></li>
-</ul>
-</li>
-</ul>
-</li>
-<li><p><strong>Decoder</strong></p>
-<ul>
-<li><p><strong>Input</strong></p>
-<ul class="simple">
-<li><p>CLIP text embedding</p></li>
-<li><p>Generated CLIP Image embedding</p></li>
-</ul>
-</li>
-<li><p><strong>Output</strong></p>
-<ul class="simple">
-<li><p>Generated Image</p></li>
-</ul>
-</li>
-<li><p><strong>설명</strong></p>
-<ul class="simple">
-<li><p>modified GLIDE model 을 Decoder 로 사용했습니다.<br>
-→ 따라서, <strong>projected CLIP text embeddings 를 아키텍쳐</strong>에 통합시킬 수 있다고 주장합니다.
-<br>
-어떻게 통합시키냐하면,</p></li>
-</ul>
-<ol class="arabic simple">
-<li><p>GLIDE timestep embedding 에 추가하고,</p></li>
-<li><p>4개의 extra context tokens 을 만들어서 GLIDE text encoder 의 output sequence 에 concat 하는거죠.
-<br>
-이 방법으로 <strong>CLIP image embeddings 를 받아서, 원본 영상을 생성하는 것</strong> 입니다.</p></li>
-</ol>
-<figure class="align-default" id="id9">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_101.png"><img alt="img_10" class="bg-primary mb-1" src="../../_images/img_101.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 110 </span><span class="caption-text">GLIDE training process</span><a class="headerlink" href="#id9" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p>GLIDE 를 수정해 사용함으로써 GLIDE 가 가지고 있던<br>
-text-conditional photorealistic image generation capabilities 를 활용할 수 있다고 주장합니다.</p></li>
-</ul>
-</li>
-</ul>
-</li>
-</ul>
-</li>
-</ul>
-<br>
-<ul>
-<li><p><strong>그렇다면 왜 Prior 가 필요할까요?</strong></p>
-<ol class="arabic simple">
-<li><p><strong>To obtain a full generative model of images</strong>, <br>
-we combine the CLIP image embedding decoder with a prior model, <br>
-which generates possible CLIP image embeddings from a given text caption <br></p></li>
-</ol>
-<p>라고 하지만.. 딱히 와닿지는 않습니다.<br>
-하지만 아직 실망하긴 이릅니다.<br>
-Prior 의 유무에 따라, 생성된 이미지의 품질을 비교하는 실험을 수행했다고 합니다.<br>
-한번 살펴볼까요?</p>
-<ol class="arabic simple" start="2">
-<li><p><strong>아래 세 가지 아키텍쳐를 비교하는 실험 수행</strong><br>
-(1) GLIDE 모델처럼, text 의 token embeddings 만 조건으로 주어 실험<br>
-(2) 추가적으로, CLIP text embeddings 를 조건으로 주어 실험<br>
-(3) 추가적으로, CLIP image embeddings 를 생성해내는 Prior 를 갖추고 실험<br>
-<br>
-실험 결과, (3) 이 가장 훌륭했습니다.<br>
-특히 image diversity 가 뛰어났습니다.</p></li>
-</ol>
-<figure class="align-default" id="id10">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_111.png"><img alt="img_11" class="bg-primary mb-1" src="../../_images/img_111.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 111 </span><span class="caption-text">3가지 경우의 아키텍쳐에 따른 실험 결과 from AssemblyAI youtube.</span><a class="headerlink" href="#id10" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<figure class="align-default" id="id11">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_121.png"><img alt="img_12" class="bg-primary mb-1" src="../../_images/img_121.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 112 </span><span class="caption-text">Samples using different conditioning signals for the same decoder.</span><a class="headerlink" href="#id11" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>그렇지만, 의문이 말끔히 해소되지는 않습니다.
-왜냐하면..</p>
-<ul class="simple">
-<li><p><strong>95% 의 학습 시간 동안, (3) 방식으로 학습한 Decoder 를,</strong><br>
-<strong>(1) 과 (2) 방식에 그대로 적용해 실험했습니다.</strong> <br>
-따라서 공정한 실험이라고 보긴 어려울 것 같습니다.</p></li>
-<li><p><strong>Decoder 를, True CLIP Image embeddings 와 Generated CLIP Image embeddings 로</strong><br>
-<strong>각각 학습시켰을 때의 성능 비교 실험은 없습니다.</strong>
-<br>
-개인적으로 저는 이러한 결과들을 보고,<br>
-Prior 를 반드시 써야하는 근거에 대한 설득력이 떨어진다고 생각했습니다.</p></li>
-</ul>
-</li>
-</ul>
-<br>
-<ul>
-<li><p><strong>왜 CLIP 을 써야할까요?</strong></p>
-<ol class="arabic simple">
-<li><p>CLIP 은 어떤 객체를 묘사한 텍스트와, 그 객체의 시각적 발현 사이의 의미론적 관계를 학습했습니다. <br>
-따라서 저자들은 이러한 CLIP 의 능력이 Text-to-Image task 에서 매우 중요하다고 주장합니다.</p></li>
-<li><p><strong>CLIP 을 활용한 덕분에 이미지를 Manipulation 할 수 있습니다.</strong></p></li>
-</ol>
-<figure class="align-default" id="id12">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_131.png"><img alt="img_13" class="bg-primary mb-1" src="../../_images/img_131.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 113 </span><span class="caption-text">Text diffs applied to images by interpolating between their CLIP image embeddings and a normalised difference of the CLIP text embeddings produced from the two descriptions.</span><a class="headerlink" href="#id12" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>어떻게 이미지를 Manipulation 하는지는 곧 자세히 살펴보겠습니다.</p>
-</li>
-</ul>
-<br>
-<ul>
-<li><p><strong>그래서 이 모델은 뭐가 좋은가요?</strong></p>
-<ul>
-<li><p><strong>Evaluation 결과, Diversity 가 뛰어났습니다.</strong></p>
-<ul class="simple">
-<li><p>모델을 평가하기 위해서,<br>
-주어진 Caption 에 대한 GLIDE 의 생성물과 unCLIP 의 생성물을 사람들에게 제시하고,<br>
-<strong>Photorealism, Caption Similarity, Diversity</strong> 에 대해서 <strong>점수를 매기도록</strong> 했습니다.<br></p></li>
-</ul>
-<figure class="align-default" id="id13">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_141.png"><img alt="img_14" class="bg-primary mb-1" src="../../_images/img_141.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 114 </span><span class="caption-text">Samples when increasing guidance scale for both unCLIP and GLIDE.</span><a class="headerlink" href="#id13" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<figure class="align-default" id="id14">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_151.png"><img alt="img_15" class="bg-primary mb-1" src="../../_images/img_151.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 115 </span><span class="caption-text">Comparison of unCLIP and GLIDE for different evaluations.</span><a class="headerlink" href="#id14" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<figure class="align-default" id="id15">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_161.png"><img alt="img_16" class="bg-primary mb-1" src="../../_images/img_161.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 116 </span><span class="caption-text">FID versus guidance scale for unCLIP and GLIDE.</span><a class="headerlink" href="#id15" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>결론은 다음과 같습니다.</p>
-<ol class="arabic simple">
-<li><p>GLIDE 에 비해서 <strong>Photorealism, Caption Similarity,</strong> 은 Comparable 했습니다.<br>
-(안 좋다.)</p></li>
-<li><p>하지만, <strong>Diversity</strong> 는 훨씬 뛰어났습니다.</p></li>
-</ol>
-</li>
-</ul>
-  <br>
-<ul>
-<li><p><strong>Image Manipulations 가 가능합니다.</strong></p>
-<ul class="simple">
-<li><p>Bipartite Representation</p>
-<ul>
-<li><p>unCLIP 구조 덕분에, <br>
-주어진 이미지 x 를 (z_i, x_T) 와 같은 bipartite latent representation 로 인코딩 가능합니다.</p></li>
-<li><p>이 latent space 를 활용해서, Image manipulation 을 수행할 수 있습니다.</p></li>
-<li><p>x_T 는 DDIM inversion 을 z_i 가 condition 된 x 에 적용해 얻으며,<br>
-Decoder 가 x 를 복원하는데 필요한 잔여 정보들을 지닙니다.</p></li>
-</ul>
-</li>
-</ul>
-  <br>
-<ol class="arabic">
-<li><p><strong>Variations</strong></p>
-<figure class="align-default" id="id16">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_171.png"><img alt="img_17" class="bg-primary mb-1" src="../../_images/img_171.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 117 </span><span class="caption-text">Variations of an input image by encoding with CLIP and then decoding with a diffusion model.</span><a class="headerlink" href="#id16" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p>Non-essential details 를 변주하기 위해서,<br>
-bipartite representation 에 DDIM with η &gt; 0 for sampling decoder 를 적용합니다.</p></li>
-<li><p>η = 0 일 때, decoder 는 deterministic 해지고 x 자체를 복원해냅니다.</p></li>
-<li><p>η 가 커질수록, sampling steps 에는 stochasticity 가 생기고,<br>
-원본 이미지 x 근처에서 perceptually “centereed” 된 variations 를 만들어낼 것입니다.</p></li>
-<li><p>η 를 키우면, 우리는 CLIP image embedding 에 어떤 정보가 존재하고 어떤 정보가 유실되었는지 탐색 가능합니다.<br>
-<strong>→ 즉, CLIP latent space 를 탐색해낼 수 있는거죠 !</strong></p></li>
-</ul>
-</li>
-</ol>
-  <br>
-<ol class="arabic" start="2">
-<li><p><strong>Interpolations</strong></p>
-<figure class="align-default" id="id17">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_181.png"><img alt="img_18" class="bg-primary mb-1" src="../../_images/img_181.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 118 </span><span class="caption-text">Variations between two images by interpolating their CLIP image embedding and then decoding with a diffusion model.</span><a class="headerlink" href="#id17" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p>이런 것도 됩니다.<br>
-input image 두 장의 CLIP image embeddings 를 interpolation 해서 Decoder 에 준다면,<br>
-interpolated image 를 생성할 수 있습니다.</p></li>
-</ul>
-</li>
-</ol>
-  <br>
-<ol class="arabic" start="3">
-<li><p><strong>Text Diffs</strong></p>
-<figure class="align-default" id="id18">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_191.png"><img alt="img_19" class="bg-primary mb-1" src="../../_images/img_191.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 119 </span><span class="caption-text">Text diffs applied to images by interpolating between their CLIP image embeddings and a normalised difference of the CLIP text embeddings produced from the two descriptions.</span><a class="headerlink" href="#id18" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul>
-<li><p><strong>어떤 이미지와 그 캡션이 주어져있을 때,<br>
-그 이미지를 우리가 원하는 target text prompt 에 맞게 조작할 수도 있습니다.</strong></p></li>
-<li><p><strong>Method</strong></p>
-<ul class="simple">
-<li><p><strong>z_t0 = current CLIP text embedding</strong> 이고,</p></li>
-<li><p><strong>z_t = target CLIP text embedding</strong> 이라면,</p></li>
-</ul>
-<figure class="align-default" id="id19">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_19_2.png"><img alt="img_19_2" class="bg-primary mb-1" src="../../_images/img_19_2.png" style="width: 400px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 120 </span><span class="caption-text">text diff method</span><a class="headerlink" href="#id19" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</li>
-<li><p>주어진 이미지의 <strong>CLIP image embdding z_i</strong> 를 <br>
-바로 이 <strong>text diff vector 와 interpolate 해서 Decoding</strong> 하면 이미지가 조작됩니다.</p></li>
-</ul>
-</li>
-</ol>
-</li>
-</ul>
-  <br>
-<ul>
-<li><p><strong>typographic attaks 에 대해서, Robust 합니다.</strong></p>
-<ul>
-<li><p><strong>typographic attacks</strong> : 이미지 내 사물 위에, 글씨가 쓰여 있는 경우입니다.</p></li>
-<li><p>Multimodal 로 학습한 CLIP 은 텍스트에 있는 정보를 더 많이 활용해<br>
-사물을 판단하는 경향이 있습니다.</p>
-<ol class="arabic simple">
-<li><p>unCLIP 의 Decoder 모델에 “iPod” 텍스트 종이가 붙은 사과를 보고 분류를 수행해보았습니다.</p></li>
-<li><p>역시, “Granny Smith” 의 예측 확률을 거의 0 에 가깝다고 판단했습니다.</p></li>
-<li><p>그럼에도 불구하고, 사과의 사진으로 recover 해냅니다.</p></li>
-</ol>
-<figure class="align-default" id="id20">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_201.png"><img alt="img_20" class="bg-primary mb-1" src="../../_images/img_201.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 121 </span><span class="caption-text">Variations of images featuring typographic attacks</span><a class="headerlink" href="#id20" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>이처럼 DALLE2 는 typographic attacks 에 더욱 robust 합니다.</p>
-</li>
-</ul>
-</li>
-</ul>
-</li>
-</ul>
-<br>
-<ul>
-<li><p><strong>이 모델, 단점은 없나요?</strong></p>
-  <br>
-<ol class="arabic simple">
-<li><p><strong>객체(cubes)와 그들의 속성(colors) 을 매칭시키는 능력이 떨어집니다.</strong></p></li>
-</ol>
-<figure class="align-default" id="id21">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_211.png"><img alt="img_21" class="bg-primary mb-1" src="../../_images/img_211.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 122 </span><span class="caption-text">Samples from unCLIP and GLIDE for the prompt “a red cube on top of a blue cube”.</span><a class="headerlink" href="#id21" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>위 그림처럼, 파란 큐브 위에 빨간 큐브를 그려달라고 했을 때, <br>
-DALLE2 는 <strong>아래의 큐브와 위의 큐브에 각각 어떤 색상 (attributes) 를 부여해야할지</strong> 헷갈려합니다.</p>
-  <br>
-<ol class="arabic simple" start="2">
-<li><p><strong>텍스트를 일관성있게 생성하는 능력이 떨어집니다</strong></p></li>
-</ol>
-<figure class="align-default" id="id22">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_221.png"><img alt="img_22" class="bg-primary mb-1" src="../../_images/img_221.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 123 </span><span class="caption-text">Samples from unCLIP for the prompt, “A sign that says deep learning.”</span><a class="headerlink" href="#id22" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>물론 이것은 DALLE2 만의 문제는 아닙니다.<br>
-많은 text-to-image models 가 어려워하는 문제입니다.</p>
-  <br>
-<ol class="arabic simple" start="3">
-<li><p><strong>복잡한 상황에서 디테일을 묘사하는 능력이 떨어집니다</strong></p></li>
-</ol>
-<figure class="align-default" id="id23">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_231.png"><img alt="img_23" class="bg-primary mb-1" src="../../_images/img_231.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 124 </span><span class="caption-text">unCLIP samples show low levels of detail for some complex scenes.</span><a class="headerlink" href="#id23" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>복잡한 네온 사인들의 디테일들이 좀 떨어지는 것을 확인하실 수 있습니다.</p>
-</li>
-</ul>
-<br>
-<ul>
-<li><p><strong>Method - Training</strong></p>
-<ul class="simple">
-<li><p>본 논문의 Method 에서는, unCLIP 모델의 아키텍쳐에 대한 수학적 justify 를 하고 있습니다.</p></li>
-<li><p>Training 데이터셋의 이미지를 x 라 합시다.</p></li>
-<li><p>그에 상응하는 text captions 을 y 라 합시다.</p></li>
-<li><p>각각에 대한 embeddings 인 Z_i, Z_t 를 기존의 CLIP 으로  생성합니다.</p>
-<ul>
-<li><p>image <strong>x —CLIP Image encoder—&gt; Z_i</strong> image embeddings</p></li>
-<li><p>text caption <strong>y —CLIP text encoder—&gt; Z_t</strong> text embeddings</p></li>
-</ul>
-</li>
-</ul>
-  <br>
-<ul>
-<li><p>저자의 주장</p>
-<ul>
-<li><p>unCLIP 으로, text caption y 로부터 image x 를 샘플링할 수 있다고 합니다.</p>
-<figure class="align-default" id="id24">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_241.png"><img alt="img_24" class="bg-primary mb-1" src="../../_images/img_241.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 125 </span><span class="caption-text">P(x|y) equation.</span><a class="headerlink" href="#id24" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</li>
-<li><p><em><strong>The first equality holds because z_i is a deterministic function of x.</strong></em></p></li>
-<li><p><em><strong>The second equality holds because of the chain rule.</strong></em></p></li>
-</ul>
-</li>
-</ul>
-  <br>
-<ul>
-<li><p><strong>포스팅을 위한 부가 설명</strong></p>
-<ul>
-<li><p>z_t 도 y 의 deterministic function 이므로, 다음과 같이 쓸 수 있죠.</p>
-<div class="math notranslate nohighlight">
-\[
-            P(x|y) = P(x, z_i|y, z_t) = P(x|z_i, y, z_t)P(z_i|y, z_t)
-            \]</div>
-</li>
-<li><p>즉 위 공식을 풀어서 해설해보면 다음과 같습니다.<br>
-Prior 를 사용해 Z_t 로부터 Z_i 를 샘플링하고,<br>
-Decoder 를 사용해 x 를 샘플링함으로써<br>
-True conditional distribution 인 P(x|y) 샘플링이 가능해지는 것입니다.</p></li>
-</ul>
-</li>
-</ul>
-</li>
-</ul>
-<br>
-<ul>
-<li><p><strong>DALL-E 2 Bias</strong></p>
-  <br>
-<p>개인적으로 DALLe2 와 같은 모델에 Bias 는 없는지 궁금해서 추가적으로 공부해봤습니다.<br>
-DALLE2 에 Bias 가 있는지,<br>
-Bias 가 있다면 해소하기 위해 어떤 노력을 하고있는지,<br>
-Bias 는 대체 어떻게 정량적으로 평가할 수 있는지 조사해봤습니다.<br></p>
-<p>결과부터 말씀드리면, DALLE2 처럼, 웹크롤링 데이터를 학습한 모델은 Bias 가 존재한다고 합니다.<br>
-이런 Bias 를 해소하기 위해서 OpenAI 는 어떤 노력을 하고있는지부터 살펴볼까요?</p>
-<p><a class="github reference external" href="https://github.com/openai/dalle-2-preview/blob/main/system-card.md">openai/dalle-2-preview</a></p>
-<ul class="simple">
-<li><p><strong>현재 OpenAI 가 DALL-E 2 의 Safety 를 위해 하고 있는 노력</strong></p>
-<ol class="arabic simple">
-<li><p>학습 데이터에서 violent, hate, or adult images 를 제거함으로써<br>
-이러한 이미지들에 DALL-E 2 가 노출되는 시간을 최소화했다고 합니다.</p></li>
-<li><p>Safety policies 를 위반한 text prompts 혹은 생성된 images 를 자정하는 시스템을 보유하고 있다고 합니다.</p></li>
-<li><p>신뢰할 수 있는 전문가들과 DALL-E 2 에 대한 사전 검토를 진행했다고 합니다.</p></li>
-</ol>
-</li>
-</ul>
-  <br>
-<ul>
-<li><p><strong>DALL-EVAL : 이미지 생성형 AI 의 Bias 를 평가하는 방법 소개</strong></p>
-<p>DALLE 와 같은 Text-to-Image 생성형 모델을 정량적으로 평가하는 기법이 있습니다.<br>
-바로 DALL-EVAL 입니다.</p>
-<p><a class="reference external" href="https://arxiv.org/pdf/2202.04053.pdf">https://arxiv.org/pdf/2202.04053.pdf</a></p>
-<p><a class="github reference external" href="https://github.com/j-min/DallEval">j-min/DallEval</a></p>
-<ul>
-<li><p><strong>Contribution</strong></p>
-<ul class="simple">
-<li><p>Text to Image Generation models 의 시각적 추론 능력 3가지를 평가하기 위한 데이터셋 제공합니다.</p></li>
-<li><p>최근의 모델들이 object recognition skill 은 상대적으로 뛰어나지만,<br>
-object counting 및 spaial relation 이해 능력은 떨어짐을 발견했습니다.</p></li>
-<li><p>Text to Image Generation models 에 존재하는 <br>
-<strong>gender 및 skin tone biases 를 측정하는 evaluation metrics 와 그 분석 결과를 제시한 최초의 논문</strong> 입니다.</p>
-<ul>
-<li><p>web 에서 image-text pairs 를 학습한 현재 T2I 생성 모델들은<br>
-특정한 gender/skin tone biases 를 학습했음을 보여주었습니다.</p></li>
-</ul>
-</li>
-</ul>
-<figure class="align-default" id="id25">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_261.png"><img alt="img_26" class="bg-primary mb-1" src="../../_images/img_261.png" style="width: 500px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 126 </span><span class="caption-text">Overview of DALL-EVAL evaluation process.</span><a class="headerlink" href="#id25" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</li>
-<li><p><strong>Social Bias Evaluation 측정 방법 (Sec. 4.2.1 참고)</strong></p>
-<figure class="align-default" id="id26">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_271.png"><img alt="img_27" class="bg-primary mb-1" src="../../_images/img_271.png" style="width: 500px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 127 </span><span class="caption-text">Overview of DALL-EVAL’s social bias evaluation process.</span><a class="headerlink" href="#id26" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>DALL-EVAL 의 자세한 social bias evaluation process 는 다음과 같습니다.</p>
-<ol class="arabic simple">
-<li><p>우선 <strong>Diagnostic prompts 로 이미지들을 생성합니다.</strong></p>
-<ol class="arabic simple">
-<li><p>ex) “a person who works as a nurse”</p></li>
-<li><p>총 252개의 Diagnostic prompts 제공</p></li>
-</ol>
-</li>
-<li><p><strong>Gender, Skin tone, Attributes 를 생성된 이미지로부터 탐지합니다.</strong></p>
-<ol class="arabic simple">
-<li><p>using automated detection models and verify the reliability of detection models with human evaluation</p></li>
-<li><p><strong>Gender</strong></p>
-<ol class="arabic simple">
-<li><p>BLIP-2 라는 모델에 생성된 영상을 주면서 영상 내 사람의 성별을 맞추게 합니다.<br>
-BLIP-2 의 답변을 기반으로 Gender Bias 측정합니다.</p></li>
-</ol>
-</li>
-<li><p><strong>Skin tone</strong></p>
-<ol class="arabic simple">
-<li><p>신경망으로 facial landmark 를 추출하고, illumination 을 측정합니다.</p></li>
-</ol>
-</li>
-<li><p><strong>Attributes</strong></p>
-<ol class="arabic simple">
-<li><p>BLIP-2 라는 모델에 생성된 영상을 주면서 영상 내 사람의 복장을 맞추게 합니다.<br>
-BLIP-2 의 답변을 기반으로 Attributes Bias 측정합니다.</p></li>
-</ol>
-</li>
-</ol>
-</li>
-<li><p>탐지된 Gender, Skin tone, Attributes 가<br>
-unbiased uniform distribution 으로부터 얼마나 skewed 되어있는지 측정합니다.</p></li>
-</ol>
-</li>
-</ul>
-  <br>
-<ul>
-<li><p><strong>실험 결과</strong></p>
-<figure class="align-default" id="id27">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_281.png"><img alt="img_28" class="bg-primary mb-1" src="../../_images/img_281.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 128 </span><span class="caption-text">Gender, skin tone, and attribute detection results with automated and expert human evaluation.</span><a class="headerlink" href="#id27" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<figure class="align-default" id="id28">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_291.png"><img alt="img_29" class="bg-primary mb-1" src="../../_images/img_291.png" style="width: 500px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 129 </span><span class="caption-text">Per-profession examples and average gender bias or average skin tone bias of images.</span><a class="headerlink" href="#id28" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<figure class="align-default" id="id29">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_301.png"><img alt="img_30" class="bg-primary mb-1" src="../../_images/img_301.png" style="width: 500px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 130 </span><span class="caption-text">Comparison of overall gender and skin tone bias of each model.</span><a class="headerlink" href="#id29" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</li>
-</ul>
-<p>위 실험 결과와 같이, DALL-EVAL 은 Text-to-Image models 를 정량적으로 평가하는데에 성공했습니다.<br>
-Satble Diffusion 처럼 웹크롤링을 활용해 데이터를 학습한 모델은 Bias 가 존재했습니다.<br>
-이처럼 생성형 AI 의 Bias 를 측정하기 위한 다양한 노력이 지속되고 있습니다.<br>
-미래에는 생성형 AI 가 더 안전하게 활용될 수 있기를 기대합니다.</p>
-</li>
-</ul>
-</li>
-</ul>
-</section>
-
-    <script type="text/x-thebe-config">
-    {
-        requestKernel: true,
-        binderOptions: {
-            repo: "binder-examples/jupyter-stacks-datascience",
-            ref: "master",
-        },
-        codeMirrorConfig: {
-            theme: "abcdef",
-            mode: "python"
-        },
-        kernelOptions: {
-            name: "python3",
-            path: "./docs/review"
-        },
-        predefinedOutput: true
-    }
-    </script>
-    <script>kernelName = 'python3'</script>
-
-                </article>
-              
-
-              
-              
-              
-              
-                <footer class="prev-next-footer d-print-none">
-                  
-<div class="prev-next-area">
-    <a class="left-prev"
-       href="dalle.html"
-       title="previous page">
-      <i class="fa-solid fa-angle-left"></i>
-      <div class="prev-next-info">
-        <p class="prev-next-subtitle">previous</p>
-        <p class="prev-next-title">DALL-E</p>
-      </div>
-    </a>
-    <a class="right-next"
-       href="dreambooth.html"
-       title="next page">
-      <div class="prev-next-info">
-        <p class="prev-next-subtitle">next</p>
-        <p class="prev-next-title">DreamBooth</p>
-      </div>
-      <i class="fa-solid fa-angle-right"></i>
-    </a>
-</div>
-                </footer>
-              
-            </div>
-            
-            
-              
-            
-          </div>
-          <footer class="bd-footer-content">
-            
-<div class="bd-footer-content__inner container">
-  
-  <div class="footer-item">
-    
-<p class="component-author">
-By PseudoLab
-</p>
-
-  </div>
-  
-  <div class="footer-item">
-    
-
-  <p class="copyright">
-    
-      © Copyright 2022.
-      <br/>
-    
-  </p>
-
-  </div>
-  
-  <div class="footer-item">
-    
-  </div>
-  
-  <div class="footer-item">
-    
-  </div>
-  
-</div>
-          </footer>
-        
-
-      </main>
-    </div>
-  </div>
-  
-  <!-- Scripts loaded after <body> so the DOM is not blocked -->
-  <script src="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b"></script>
-<script src="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b"></script>
-
-  <footer class="bd-footer">
-  </footer>
-  </body>
+
+
+<!DOCTYPE html>
+
+
+<html lang="en" data-content_root="" >
+
+  <head>
+    <meta charset="utf-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.17.1: http://docutils.sourceforge.net/" />
+
+    <title>DALL-E 2 &#8212; Text-to-Image Generation-feat-Diffusion</title>
+  
+  
+  
+  <script data-cfasync="false">
+    document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
+    document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
+  </script>
+  
+  <!-- Loaded before other Sphinx assets -->
+  <link href="../../_static/styles/theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+<link href="../../_static/styles/bootstrap.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+<link href="../../_static/styles/pydata-sphinx-theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+
+  
+  <link href="../../_static/vendor/fontawesome/6.5.2/css/all.min.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+  <link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.woff2" />
+<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.woff2" />
+<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.woff2" />
+
+    <link rel="stylesheet" type="text/css" href="../../_static/pygments.css" />
+    <link rel="stylesheet" href="../../_static/styles/sphinx-book-theme.css?digest=14f4ca6b54d191a8c7657f6c759bf11a5fb86285" type="text/css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/togglebutton.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/mystnb.4510f1fc1dee50b3e5859aac5469c37c29e427902b24a333a5f9fcb2f0b3ac41.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/sphinx-thebe.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/design-style.4045f2051d55cab465a707391d5b2007.min.css" />
+  
+  <!-- Pre-loaded scripts that we'll load fully later -->
+  <link rel="preload" as="script" href="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b" />
+<link rel="preload" as="script" href="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b" />
+  <script src="../../_static/vendor/fontawesome/6.5.2/js/all.min.js?digest=dfe6caa3a7d634c4db9b"></script>
+
+    <script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
+    <script src="../../_static/jquery.js"></script>
+    <script src="../../_static/underscore.js"></script>
+    <script src="../../_static/_sphinx_javascript_frameworks_compat.js"></script>
+    <script src="../../_static/doctools.js"></script>
+    <script src="../../_static/clipboard.min.js"></script>
+    <script src="../../_static/copybutton.js"></script>
+    <script src="../../_static/scripts/sphinx-book-theme.js?digest=5a5c038af52cf7bc1a1ec88eea08e6366ee68824"></script>
+    <script>let toggleHintShow = 'Click to show';</script>
+    <script>let toggleHintHide = 'Click to hide';</script>
+    <script>let toggleOpenOnPrint = 'true';</script>
+    <script src="../../_static/togglebutton.js"></script>
+    <script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
+    <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
+    <script src="../../_static/design-tabs.js"></script>
+    <script>const THEBE_JS_URL = "https://unpkg.com/thebe@0.8.2/lib/index.js"
+const thebe_selector = ".thebe,.cell"
+const thebe_selector_input = "pre"
+const thebe_selector_output = ".output, .cell_output"
+</script>
+    <script async="async" src="../../_static/sphinx-thebe.js"></script>
+    <script>window.MathJax = {"options": {"processHtmlClass": "tex2jax_process|mathjax_process|math|output_area"}}</script>
+    <script defer="defer" src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
+    <script>DOCUMENTATION_OPTIONS.pagename = 'docs/review/DALLE2';</script>
+    <link rel="shortcut icon" href="../../_static/PseudoLab_logo.png"/>
+    <link rel="index" title="Index" href="../../genindex.html" />
+    <link rel="search" title="Search" href="../../search.html" />
+    <link rel="next" title="DreamBooth" href="dreambooth.html" />
+    <link rel="prev" title="DALL-E" href="dalle.html" />
+  <meta name="viewport" content="width=device-width, initial-scale=1"/>
+  <meta name="docsearch:language" content="en"/>
+  </head>
+  
+  
+  <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
+
+  
+  
+  <div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
+  
+  <div id="pst-scroll-pixel-helper"></div>
+  
+  <button type="button" class="btn rounded-pill" id="pst-back-to-top">
+    <i class="fa-solid fa-arrow-up"></i>Back to top</button>
+
+  
+  <input type="checkbox"
+          class="sidebar-toggle"
+          id="pst-primary-sidebar-checkbox"/>
+  <label class="overlay overlay-primary" for="pst-primary-sidebar-checkbox"></label>
+  
+  <input type="checkbox"
+          class="sidebar-toggle"
+          id="pst-secondary-sidebar-checkbox"/>
+  <label class="overlay overlay-secondary" for="pst-secondary-sidebar-checkbox"></label>
+  
+  <div class="search-button__wrapper">
+    <div class="search-button__overlay"></div>
+    <div class="search-button__search-container">
+<form class="bd-search d-flex align-items-center"
+      action="../../search.html"
+      method="get">
+  <i class="fa-solid fa-magnifying-glass"></i>
+  <input type="search"
+         class="form-control"
+         name="q"
+         id="search-input"
+         placeholder="Search this book..."
+         aria-label="Search this book..."
+         autocomplete="off"
+         autocorrect="off"
+         autocapitalize="off"
+         spellcheck="false"/>
+  <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
+</form></div>
+  </div>
+
+  <div class="pst-async-banner-revealer d-none">
+  <aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
+</div>
+
+  
+    <header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
+    </header>
+  
+
+  <div class="bd-container">
+    <div class="bd-container__inner bd-page-width">
+      
+      
+      
+      <div class="bd-sidebar-primary bd-sidebar">
+        
+
+  
+  <div class="sidebar-header-items sidebar-primary__section">
+    
+    
+    
+    
+  </div>
+  
+    <div class="sidebar-primary-items__start sidebar-primary__section">
+        <div class="sidebar-primary-item">
+
+  
+    
+  
+
+<a class="navbar-brand logo" href="../../intro.html">
+  
+  
+  
+  
+  
+    
+    
+      
+    
+    
+    <img src="../../_static/PseudoLab_logo.png" class="logo__image only-light" alt="Text-to-Image Generation-feat-Diffusion - Home"/>
+    <script>document.write(`<img src="../../_static/PseudoLab_logo.png" class="logo__image only-dark" alt="Text-to-Image Generation-feat-Diffusion - Home"/>`);</script>
+  
+  
+</a></div>
+        <div class="sidebar-primary-item"><nav class="bd-links" id="bd-docs-nav" aria-label="Main">
+    <div class="bd-toc-item navbar-nav active">
+        
+        <ul class="nav bd-sidenav bd-sidenav__home-link">
+            <li class="toctree-l1">
+                <a class="reference internal" href="../../intro.html">
+                    Welcome to PseudoDiffusers!!
+                </a>
+            </li>
+        </ul>
+        <p aria-level="2" class="caption" role="heading"><span class="caption-text">Preliminary Works</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="vae.html">VAE</a></li>
+<li class="toctree-l1"><a class="reference internal" href="gan.html">GAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DDPM.html">DDPM</a></li>
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DDIM.html">DDIM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="A_Study_on_the_Evaluation_of_Generative_Models.html">A Study on the Evaluation of Generative Models</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Image Generation</span></p>
+<ul class="current nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="cycleGAN.html">CycleGAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="StyleGAN.html">StyleGAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="diffusion_beats_GANs.html">Diffusion Models Beat GANs on Image Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="dalle.html">DALL-E</a></li>
+<li class="toctree-l1 current active"><a class="current reference internal" href="#">DALL-E 2</a></li>
+<li class="toctree-l1"><a class="reference internal" href="dreambooth.html">DreamBooth</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ControlNet.html">ControlNet</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Latent_Diffusion_Model.html">Introduction</a></li>
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Textual_Inversion.html">Textual Inversion</a></li>
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="CustomDiffusion.html">Custom Diffusion</a></li>
+<li class="toctree-l1"><a class="reference internal" href="LoRA.html">LoRA</a></li>
+
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="I-DDPM.html">I-DDPM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="StyO.html">StyO</a></li>
+<li class="toctree-l1"><a class="reference internal" href="imagen.html">Imagen</a></li>
+<li class="toctree-l1"><a class="reference internal" href="imagen_editor.html">Imagen Editor</a></li>
+<li class="toctree-l1"><a class="reference internal" href="SDEdit.html">SDEdit</a></li>
+<li class="toctree-l1"><a class="reference internal" href="SDXL.html">SDXL</a></li>
+<li class="toctree-l1"><a class="reference internal" href="t2i_adapter.html">T2I-Adapter</a></li>
+<li class="toctree-l1"><a class="reference internal" href="IP_Adapter.html">IP-Adapter</a></li>
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="HyperDreamBooth.html">HyperDreamBooth</a></li>
+<li class="toctree-l1"><a class="reference internal" href="CM3leon.html">CM3leon</a></li>
+
+<li class="toctree-l1"><a class="reference internal" href="Synthetic_Data_from_Diffusion_Models_Improves_ImageNet_Classification.html">Synthetic Data from Diffusion Models Improves ImageNet Classification</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="GLIDE.html">GLIDE</a></li>
+<li class="toctree-l1"><a class="reference internal" href="BBDM.html">BBDM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Your_Diffusion_Model_is_Secretly_a_Zero_Shot_Classifier.html">Your Diffusion Model is Secretly a Zero-Shot Classifier</a></li>
+<li class="toctree-l1"><a class="reference internal" href="progressive_distillation.html">Progressive Distillation for Fast Sampling of Diffusion Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ConceptLab.html">ConceptLab</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Diffusion_models_already_have_a_Semantic_Latent_Space.html">Diffusion Models already have a Semantic Latent Space</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Muse.html">Muse</a></li>
+
+
+<li class="toctree-l1"><a class="reference internal" href="GIGAGAN.html">Scaling up GANs for Text-to-Image Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="consistency_models.html">Consistency Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="latent_consistency_models.html">Latent Consistency Models</a></li>
+
+<li class="toctree-l1"><a class="reference internal" href="LLM_grounded_Diffusion.html">LLM Grounded Diffusion</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DiT.html">DiT</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="one-step-image-translation.html">One-Step Image Translation with Text-to-Image Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="LCM-LoRA.html">LCM-LoRA: A Universal Stable-Diffusion Acceleration Module</a></li>
+
+
+<li class="toctree-l1"><a class="reference internal" href="MimicBrush.html">MimicBrush: Zero-shot Image Editing with Reference Imitation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="one_step_diffusion_with_distribution_matching_distillation.html">One-step Diffusion with Distribution Matching Distillation</a></li>
+
+
+
+
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Video Generation</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="Make_A_Video.html">Make A Video</a></li>
+<li class="toctree-l1"><a class="reference internal" href="VideoLDM.html">VideoLDM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="AnimateDiff.html">AnimateDiff</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Animate_Anyone.html">Animate Anyone</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreaMoving.html">DreaMoving</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreamPose.html">DreamPose: Fashion Image-to-Video Synthesis via Stable Diffusion</a></li>
+
+
+
+
+
+
+
+
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">3D Generation</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="NeRF.html">NeRF : Representing Scenes as Neural Radiance Fields for View Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="3DGS.html">3D Gaussian Splatting for Real-Time Radiance Field Rendering</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Point_E.html">Point-E: A System for Generating 3D Point Clouds from Complex Prompts (Arxiv 2022)</a></li>
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Shap-E.html">Shap-E</a></li>
+
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DreamFusion.html"><strong>DreamFusion</strong></a></li>
+<li class="toctree-l1"><a class="reference internal" href="magic-3d.html">Magic3D</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreamBooth3D.html">Dream Booth 3D</a></li>
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="zero123plus.html">Zero123++</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ProlificDreamer.html">ProlificDreamer: High-Fidelity and Diverse Text-to-3D Generation with Variational Score Distillation</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DreamGaussian.html">DreamGaussian</a></li>
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Coin3D.html">Coin3D</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Experiments</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="../experiments/js_exp.html">Synthetic Data with Stable Diffusion for Foliar Disease Classification</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../experiments/swjo_exp.html">Training DreamBooth on Naver Webtoon Face Dataset</a></li>
+</ul>
+
+    </div>
+</nav></div>
+    </div>
+  
+  
+  <div class="sidebar-primary-items__end sidebar-primary__section">
+  </div>
+  
+  <div id="rtd-footer-container"></div>
+
+
+      </div>
+      
+      <main id="main-content" class="bd-main" role="main">
+        
+        
+
+<div class="sbt-scroll-pixel-helper"></div>
+
+          <div class="bd-content">
+            <div class="bd-article-container">
+              
+              <div class="bd-header-article d-print-none">
+<div class="header-article-items header-article__inner">
+  
+    <div class="header-article-items__start">
+      
+        <div class="header-article-item"><label class="sidebar-toggle primary-toggle btn btn-sm" for="__primary" title="Toggle primary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
+  <span class="fa-solid fa-bars"></span>
+</label></div>
+      
+    </div>
+  
+  
+    <div class="header-article-items__end">
+      
+        <div class="header-article-item">
+
+<div class="article-header-buttons">
+
+
+
+
+
+<div class="dropdown dropdown-source-buttons">
+  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Source repositories">
+    <i class="fab fa-github"></i>
+  </button>
+  <ul class="dropdown-menu">
+      
+      
+      
+      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion" target="_blank"
+   class="btn btn-sm btn-source-repository-button dropdown-item"
+   title="Source repository"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fab fa-github"></i>
+  </span>
+<span class="btn__text-container">Repository</span>
+</a>
+</li>
+      
+      
+      
+      
+      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion/issues/new?title=Issue%20on%20page%20%2Fdocs/review/DALLE2.html&body=Your%20issue%20content%20here." target="_blank"
+   class="btn btn-sm btn-source-issues-button dropdown-item"
+   title="Open an issue"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-lightbulb"></i>
+  </span>
+<span class="btn__text-container">Open issue</span>
+</a>
+</li>
+      
+  </ul>
+</div>
+
+
+
+
+
+
+<div class="dropdown dropdown-download-buttons">
+  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Download this page">
+    <i class="fas fa-download"></i>
+  </button>
+  <ul class="dropdown-menu">
+      
+      
+      
+      <li><a href="../../_sources/docs/review/DALLE2.md" target="_blank"
+   class="btn btn-sm btn-download-source-button dropdown-item"
+   title="Download source file"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-file"></i>
+  </span>
+<span class="btn__text-container">.md</span>
+</a>
+</li>
+      
+      
+      
+      
+      <li>
+<button onclick="window.print()"
+  class="btn btn-sm btn-download-pdf-button dropdown-item"
+  title="Print to PDF"
+  data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-file-pdf"></i>
+  </span>
+<span class="btn__text-container">.pdf</span>
+</button>
+</li>
+      
+  </ul>
+</div>
+
+
+
+
+<button onclick="toggleFullScreen()"
+  class="btn btn-sm btn-fullscreen-button"
+  title="Fullscreen mode"
+  data-bs-placement="bottom" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-expand"></i>
+  </span>
+
+</button>
+
+
+
+<script>
+document.write(`
+  <button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light"></i>
+    <i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark"></i>
+    <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"></i>
+  </button>
+`);
+</script>
+
+
+<script>
+document.write(`
+  <button class="btn btn-sm pst-navbar-icon search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <i class="fa-solid fa-magnifying-glass fa-lg"></i>
+  </button>
+`);
+</script>
+
+</div></div>
+      
+    </div>
+  
+</div>
+</div>
+              
+              
+
+<div id="jb-print-docs-body" class="onlyprint">
+    <h1>DALL-E 2</h1>
+    <!-- Table of contents -->
+    <div id="print-main-content">
+        <div id="jb-print-toc">
+            
+        </div>
+    </div>
+</div>
+
+              
+                
+<div id="searchbox"></div>
+                <article class="bd-article">
+                  
+  <div class="admonition-information admonition">
+<p class="admonition-title">Information</p>
+<ul class="simple">
+<li><p><strong>Title:</strong> Hierarchical Text-Conditional Image Generation with CLIP Latents (arXiv 2022)</p></li>
+<li><p><strong>Reference</strong></p>
+<ul>
+<li><p>Paper: <a class="reference external" href="https://arxiv.org/pdf/2204.06125v1.pdf">https://arxiv.org/pdf/2204.06125v1.pdf</a></p></li>
+</ul>
+</li>
+<li><p><strong>Author:</strong> SeonHoon Kim</p></li>
+<li><p><strong>Last updated on Sep. 18, 2023</strong></p></li>
+</ul>
+</div>
+<section class="tex2jax_ignore mathjax_ignore" id="dall-e-2">
+<h1>DALL-E 2<a class="headerlink" href="#dall-e-2" title="Permalink to this heading">#</a></h1>
+<p>DALLE2 는 2022년에 공개되어 세상을 놀라게 했습니다.<br>
+이미지 생성 능력도 뛰어났고, 이미지를 사용자 입맛에 맞게 조작할 수 있게 되었죠.</p>
+<p>DALLE2 의 이름은 왜 DALL-E 일까요?<br>
+DALLE2 의 DALLE 는 초현실주의 화가 Salvador Dali 와 WALL-E 의 합성어입니다.<br>
+DALLE2 로 생성해낸 결과물이 과연 어떻길래 세상을 놀라게 했을까요?</p>
+<br>
+<ul>
+<li><p><strong>DALL-E 2 결과물</strong></p>
+<figure class="align-default" id="id1">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_011.png"><img alt="img_01" class="bg-primary mb-1" src="../../_images/img_011.png" style="width: 350px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 102 </span><span class="caption-text">Salvador Dali 의 생전 모습</span><a class="headerlink" href="#id1" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<figure class="align-default" id="id2">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_001.png"><img alt="img_00" class="bg-primary mb-1" src="../../_images/img_001.png" style="width: 350px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 103 </span><span class="caption-text">vibrant portrait of Salvador Dali with a robotic half face from DALLE2</span><a class="headerlink" href="#id2" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>위 그림은 DALLE2 가 생성해낸 “vibrant portrait of Salvador Dali with a robotic half face” 이미지입니다.<br>
+실제 Salvador dali 의 모습이 보이네요.<br>
+게다가 Salvador dali 의 초현실주의적 그림체가 반영된 것 같기도 합니다.<br>
+놀라운 이미지입니다.</p>
+<p>아래의 corgi 그림은 어떤가요 ?</p>
+<figure class="align-default" id="id3">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_021.png"><img alt="img_02" class="bg-primary mb-1" src="../../_images/img_021.png" style="width: 350px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 104 </span><span class="caption-text">a corgi’s head depicted as an explosion of a nebula from DALLE2</span><a class="headerlink" href="#id3" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>corgi 의 모습을 성운의 폭발로 묘사해달라고 했을 때 생성된 그림입니다.<br>
+아래의 그림은, 실제 NASA 에서 촬영한 초신성 폭발의 잔해입니다.</p>
+<p>정말 그럴듯하지 않나요?</p>
+<figure class="align-default" id="id4">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_031.png"><img alt="img_03" class="bg-primary mb-1" src="../../_images/img_031.png" style="width: 350px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 105 </span><span class="caption-text">This mosaic image, one of the largest ever taken by NASA’s Hubble Space Telescope of the Crab Nebula, is a six-light-year-wide expanding remnant of a star’s supernova explosion.</span><a class="headerlink" href="#id4" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</li>
+</ul>
+<br>
+<ul class="simple">
+<li><p><strong>학습 목표 및 주의사항</strong></p>
+<ul>
+<li><p>본 포스팅에서는 DALLE2 paper 의 내용을 비선형적으로 살펴봅니다.<br>
+마치 오픈월드 게임처럼 말이죠.<br>
+핵심이 되는 질문들을 던지며, DALLE2 의 아키텍쳐를 파헤쳐 볼 겁니다.</p></li>
+<li><p>본 포스팅은 <a class="reference external" href="https://cdn.openai.com/papers/dall-e-2.pdf">DALL-E 2 paper</a>, <a class="reference external" href="https://openai.com/dall-e-2">OpenAI blog</a>, <a class="reference external" href="https://www.youtube.com/watch?v=F1X4fHzF4mQ&amp;t=360s&amp;ab_channel=AssemblyAI">AssemblyAI Youtube</a>, <a class="reference external" href="https://www.youtube.com/watch?v=gmfI3B6pQTo&amp;t=83s&amp;ab_channel=EdanMeyer">Eden Meyer Youtube</a> 를 참고했습니다.</p></li>
+<li><p>본격적으로 학습하기 전에 알아야할 것은, CLIP 모델입니다.</p>
+<ul>
+<li><p>CLIP 은, 이미지와 text 를 학습한 multi-modal 모델입니다.</p>
+<ul>
+<li><p>The fundamental principles of training CLIP are quite simple:</p>
+<ol class="arabic simple">
+<li><p>First, all images and their associated captions are passed through their respective encoders, mapping all objects into an m-dimensional space.</p></li>
+<li><p>Then, the cosine similarity of each <em>(image, text)</em> pair is computed.</p></li>
+<li><p>The training objective is to simultaneously <strong>maximize the cosine similarity</strong> between N <strong>correct</strong> encoded image/caption pairs and <strong>minimize the cosine similarity</strong> between N - N <strong>incorrect</strong> encoded image/caption pairs.</p></li>
+</ol>
+</li>
+</ul>
+</li>
+</ul>
+</li>
+<li><p>DALL-E 2 는 CLIP 과 Diffusion Model 을 통합시켰습니다. (최초는 x)</p></li>
+<li><p>하지만 CLIP 을 사용하는 것이 정답은 아닙니다.<br>
+DALL-E 2 는 22년 5월, CLIP 을 사용하지 않은 IMAGEN 에게 SOTA 를 내주었습니다.</p></li>
+</ul>
+</li>
+</ul>
+<br>
+<ul>
+<li><p><strong>아키텍쳐 찍먹하기</strong></p>
+<p>특정 이미지 내의 Semantics 와 style 을 모두 포착해낼 수 있는 CLIP 의 이미지 표현 능력을 끌어올리기 위해서,<br>
+저자들은 CLIP 과 Diffusion 모델을 통합한 Two-stage model 을 제안합니다.<br>
+이것이 바로 DALLE2 인데요.<br>
+저자들은 이 모델을 unCLIP 이라고 부릅니다.</p>
+<figure class="align-default" id="id5">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_061.png"><img alt="img_06" class="bg-primary mb-1" src="../../_images/img_061.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 106 </span><span class="caption-text">A high level overview of the architecture.</span><a class="headerlink" href="#id5" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>DALLE2 paper 의 그림은 좀 복잡해보이니,<br>
+Assembly AI 의 Youtube 에서 제공하는 좀 더 단순화된 그림을 살펴볼게요.</p>
+<figure class="align-default" id="id6">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_07.png"><img alt="img_07" class="bg-primary mb-1" src="../../_images/img_07.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 107 </span><span class="caption-text">A high level overview of the architecture from AssemblyAI youtube.</span><a class="headerlink" href="#id6" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p><a class="reference external" href="https://www.youtube.com/watch?v=F1X4fHzF4mQ&amp;t=360s&amp;ab_channel=AssemblyAI">https://www.youtube.com/watch?v=F1X4fHzF4mQ&amp;t=360s&amp;ab_channel=AssemblyAI</a></p>
+<p>Prior 와 Decoder 가 DALLE2 의 핵심이 되는 모델인 것 같네요.</p>
+<ul class="simple">
+<li><p><strong>Prior</strong> : 텍스트 캡션을 받아서, 상응하는 CLIP image embedding 을 생성합니다.</p>
+<ul>
+<li><p>본 논문에서는 Autogregressive prior 와 Diffusion prior 를 비교하는 실험 수행했습니다.</p></li>
+<li><p>Diffusion prior 가 computationally efficient 하고, 고품질 이미지 생성합니다.<br>
+따라서 후반부에는 Diffusion prior 만 사용해서 실험합니다.</p></li>
+</ul>
+</li>
+<li><p><strong>Decoder</strong> : CLIP image embedding 을 받아서, 이미지를 생성합니다.</p>
+<ul>
+<li><p>Diffusion 모델만 사용했습니다.</p></li>
+</ul>
+</li>
+</ul>
+</li>
+</ul>
+<br>
+<ul>
+<li><p><strong>왜 CLIP 이랑 Diffusion 을 사용했을까요?</strong></p>
+<ul>
+<li><p><strong>CLIP</strong></p>
+<ul class="simple">
+<li><p>CLIP 이 images representation 을 학습하는데 에 큰 성공을 거두고 있었습니다.</p></li>
+<li><p>CLIP embeddings 는 image distribution shift 에 robust 했습니다.</p></li>
+<li><p>CLIP embeddings 는 zero-shot capabilities 가 뛰어났습니다.</p></li>
+<li><p>다양한 vision &amp; language tasks 에 fine-tuned 되어 SOTA 를 달성해냈습니다.</p></li>
+</ul>
+</li>
+<li><p><strong>Diffusion</strong></p>
+<ul class="simple">
+<li><p>Diffusion 은 image 와 video generation taks 에서 SOTA 를 갱신하는 중이었죠.</p></li>
+<li><p>non-deterministic 하게 만들 수 있습니다.<br>
+이러한 Decoder 덕분에, CLIP image embedding 과 같은 <br>
+<strong>image representation 에 존재하지 않는 non-essential 한 details</strong> 는 <strong>변주하면서,</strong> <br>
+<strong>image representation 의 semantics 와 style 은 유지</strong>할 수 있죠.</p></li>
+</ul>
+<figure class="align-default" id="id7">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_081.png"><img alt="img_08" class="bg-primary mb-1" src="../../_images/img_081.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 108 </span><span class="caption-text">Variations of an input image by encoding with CLIP and then decoding with a diffusion model.</span><a class="headerlink" href="#id7" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>위 왼쪽의 그림처럼, Salvador dali 의 그림에서 중요한 objects 들은 보존됩니다.<br>
+하지만 그들이 표현되는 방식이나 전체적인 그림의 style 은 조금씩 바뀝니다.<br>
+그럼에도, Salvador dali 특유의 초현실주의적 화풍은 유지되는 것 같네요.<br>
+Diffusion Decoder 덕분에, <strong>Non-essential details</strong> 는<br>
+마치 <strong>변주곡처럼 매번 새롭게 연주</strong>해낼 수 있는겁니다.</p>
+</li>
+</ul>
+</li>
+</ul>
+<br>
+<ul>
+<li><p><strong>아키텍쳐 파헤치기</strong></p>
+<figure class="align-default" id="id8">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_091.png"><img alt="img_09" class="bg-primary mb-1" src="../../_images/img_091.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 109 </span><span class="caption-text">A high level overview of the architecture from AssemblyAI youtube.</span><a class="headerlink" href="#id8" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p><a class="reference external" href="https://www.youtube.com/watch?v=F1X4fHzF4mQ&amp;t=360s&amp;ab_channel=AssemblyAI">https://www.youtube.com/watch?v=F1X4fHzF4mQ&amp;t=360s&amp;ab_channel=AssemblyAI</a>
+<br></p>
+<p>이번에는 DALLE2 의 아키텍쳐를 좀 더 자세히 살펴보죠.</p>
+<ul>
+<li><p><strong>Prior</strong></p>
+<ul class="simple">
+<li><p><strong>input</strong></p>
+<ul>
+<li><p>Caption 그 자체의 embedding vector 입니다.</p></li>
+<li><p><strong>CLIP text embedding</strong> 입니다.</p></li>
+</ul>
+</li>
+<li><p><strong>output</strong></p>
+<ul>
+<li><p><strong>Generated CLIP Image embedding</strong> 입니다.</p></li>
+</ul>
+</li>
+<li><p><strong>설명</strong></p>
+<ul>
+<li><p>사실 Prior 은 CLIP text embedding 만 조건으로 받는 것이 아니라 Caption 자체도 받습니다.<br>
+(물론 embedding vector 로 받겠죠)<br>
+CLIP text embedding 과, 그 Caption 은 서로 1대1 대응되기 때문에,<br>
+Duel-conditioning 이 문제될 것은 없다고 저자들은 변론합니다.</p></li>
+<li><p>샘플 퀄리티를 높이기 위해서 2개의 CLIP image embeddings 를 생성한 후 <br>
+주어진 CLIP text embedding 과 더 높은 dot product 를 갖는 CLIP image embedding 을 사용했다고 합니다.</p></li>
+</ul>
+</li>
+</ul>
+</li>
+<li><p><strong>Decoder</strong></p>
+<ul>
+<li><p><strong>Input</strong></p>
+<ul class="simple">
+<li><p>CLIP text embedding</p></li>
+<li><p>Generated CLIP Image embedding</p></li>
+</ul>
+</li>
+<li><p><strong>Output</strong></p>
+<ul class="simple">
+<li><p>Generated Image</p></li>
+</ul>
+</li>
+<li><p><strong>설명</strong></p>
+<ul class="simple">
+<li><p>modified GLIDE model 을 Decoder 로 사용했습니다.<br>
+→ 따라서, <strong>projected CLIP text embeddings 를 아키텍쳐</strong>에 통합시킬 수 있다고 주장합니다.
+<br>
+어떻게 통합시키냐하면,</p></li>
+</ul>
+<ol class="arabic simple">
+<li><p>GLIDE timestep embedding 에 추가하고,</p></li>
+<li><p>4개의 extra context tokens 을 만들어서 GLIDE text encoder 의 output sequence 에 concat 하는거죠.
+<br>
+이 방법으로 <strong>CLIP image embeddings 를 받아서, 원본 영상을 생성하는 것</strong> 입니다.</p></li>
+</ol>
+<figure class="align-default" id="id9">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_101.png"><img alt="img_10" class="bg-primary mb-1" src="../../_images/img_101.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 110 </span><span class="caption-text">GLIDE training process</span><a class="headerlink" href="#id9" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p>GLIDE 를 수정해 사용함으로써 GLIDE 가 가지고 있던<br>
+text-conditional photorealistic image generation capabilities 를 활용할 수 있다고 주장합니다.</p></li>
+</ul>
+</li>
+</ul>
+</li>
+</ul>
+</li>
+</ul>
+<br>
+<ul>
+<li><p><strong>그렇다면 왜 Prior 가 필요할까요?</strong></p>
+<ol class="arabic simple">
+<li><p><strong>To obtain a full generative model of images</strong>, <br>
+we combine the CLIP image embedding decoder with a prior model, <br>
+which generates possible CLIP image embeddings from a given text caption <br></p></li>
+</ol>
+<p>라고 하지만.. 딱히 와닿지는 않습니다.<br>
+하지만 아직 실망하긴 이릅니다.<br>
+Prior 의 유무에 따라, 생성된 이미지의 품질을 비교하는 실험을 수행했다고 합니다.<br>
+한번 살펴볼까요?</p>
+<ol class="arabic simple" start="2">
+<li><p><strong>아래 세 가지 아키텍쳐를 비교하는 실험 수행</strong><br>
+(1) GLIDE 모델처럼, text 의 token embeddings 만 조건으로 주어 실험<br>
+(2) 추가적으로, CLIP text embeddings 를 조건으로 주어 실험<br>
+(3) 추가적으로, CLIP image embeddings 를 생성해내는 Prior 를 갖추고 실험<br>
+<br>
+실험 결과, (3) 이 가장 훌륭했습니다.<br>
+특히 image diversity 가 뛰어났습니다.</p></li>
+</ol>
+<figure class="align-default" id="id10">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_111.png"><img alt="img_11" class="bg-primary mb-1" src="../../_images/img_111.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 111 </span><span class="caption-text">3가지 경우의 아키텍쳐에 따른 실험 결과 from AssemblyAI youtube.</span><a class="headerlink" href="#id10" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<figure class="align-default" id="id11">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_121.png"><img alt="img_12" class="bg-primary mb-1" src="../../_images/img_121.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 112 </span><span class="caption-text">Samples using different conditioning signals for the same decoder.</span><a class="headerlink" href="#id11" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>그렇지만, 의문이 말끔히 해소되지는 않습니다.
+왜냐하면..</p>
+<ul class="simple">
+<li><p><strong>95% 의 학습 시간 동안, (3) 방식으로 학습한 Decoder 를,</strong><br>
+<strong>(1) 과 (2) 방식에 그대로 적용해 실험했습니다.</strong> <br>
+따라서 공정한 실험이라고 보긴 어려울 것 같습니다.</p></li>
+<li><p><strong>Decoder 를, True CLIP Image embeddings 와 Generated CLIP Image embeddings 로</strong><br>
+<strong>각각 학습시켰을 때의 성능 비교 실험은 없습니다.</strong>
+<br>
+개인적으로 저는 이러한 결과들을 보고,<br>
+Prior 를 반드시 써야하는 근거에 대한 설득력이 떨어진다고 생각했습니다.</p></li>
+</ul>
+</li>
+</ul>
+<br>
+<ul>
+<li><p><strong>왜 CLIP 을 써야할까요?</strong></p>
+<ol class="arabic simple">
+<li><p>CLIP 은 어떤 객체를 묘사한 텍스트와, 그 객체의 시각적 발현 사이의 의미론적 관계를 학습했습니다. <br>
+따라서 저자들은 이러한 CLIP 의 능력이 Text-to-Image task 에서 매우 중요하다고 주장합니다.</p></li>
+<li><p><strong>CLIP 을 활용한 덕분에 이미지를 Manipulation 할 수 있습니다.</strong></p></li>
+</ol>
+<figure class="align-default" id="id12">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_131.png"><img alt="img_13" class="bg-primary mb-1" src="../../_images/img_131.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 113 </span><span class="caption-text">Text diffs applied to images by interpolating between their CLIP image embeddings and a normalised difference of the CLIP text embeddings produced from the two descriptions.</span><a class="headerlink" href="#id12" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>어떻게 이미지를 Manipulation 하는지는 곧 자세히 살펴보겠습니다.</p>
+</li>
+</ul>
+<br>
+<ul>
+<li><p><strong>그래서 이 모델은 뭐가 좋은가요?</strong></p>
+<ul>
+<li><p><strong>Evaluation 결과, Diversity 가 뛰어났습니다.</strong></p>
+<ul class="simple">
+<li><p>모델을 평가하기 위해서,<br>
+주어진 Caption 에 대한 GLIDE 의 생성물과 unCLIP 의 생성물을 사람들에게 제시하고,<br>
+<strong>Photorealism, Caption Similarity, Diversity</strong> 에 대해서 <strong>점수를 매기도록</strong> 했습니다.<br></p></li>
+</ul>
+<figure class="align-default" id="id13">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_141.png"><img alt="img_14" class="bg-primary mb-1" src="../../_images/img_141.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 114 </span><span class="caption-text">Samples when increasing guidance scale for both unCLIP and GLIDE.</span><a class="headerlink" href="#id13" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<figure class="align-default" id="id14">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_151.png"><img alt="img_15" class="bg-primary mb-1" src="../../_images/img_151.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 115 </span><span class="caption-text">Comparison of unCLIP and GLIDE for different evaluations.</span><a class="headerlink" href="#id14" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<figure class="align-default" id="id15">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_161.png"><img alt="img_16" class="bg-primary mb-1" src="../../_images/img_161.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 116 </span><span class="caption-text">FID versus guidance scale for unCLIP and GLIDE.</span><a class="headerlink" href="#id15" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>결론은 다음과 같습니다.</p>
+<ol class="arabic simple">
+<li><p>GLIDE 에 비해서 <strong>Photorealism, Caption Similarity,</strong> 은 Comparable 했습니다.<br>
+(안 좋다.)</p></li>
+<li><p>하지만, <strong>Diversity</strong> 는 훨씬 뛰어났습니다.</p></li>
+</ol>
+</li>
+</ul>
+  <br>
+<ul>
+<li><p><strong>Image Manipulations 가 가능합니다.</strong></p>
+<ul class="simple">
+<li><p>Bipartite Representation</p>
+<ul>
+<li><p>unCLIP 구조 덕분에, <br>
+주어진 이미지 x 를 (z_i, x_T) 와 같은 bipartite latent representation 로 인코딩 가능합니다.</p></li>
+<li><p>이 latent space 를 활용해서, Image manipulation 을 수행할 수 있습니다.</p></li>
+<li><p>x_T 는 DDIM inversion 을 z_i 가 condition 된 x 에 적용해 얻으며,<br>
+Decoder 가 x 를 복원하는데 필요한 잔여 정보들을 지닙니다.</p></li>
+</ul>
+</li>
+</ul>
+  <br>
+<ol class="arabic">
+<li><p><strong>Variations</strong></p>
+<figure class="align-default" id="id16">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_171.png"><img alt="img_17" class="bg-primary mb-1" src="../../_images/img_171.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 117 </span><span class="caption-text">Variations of an input image by encoding with CLIP and then decoding with a diffusion model.</span><a class="headerlink" href="#id16" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p>Non-essential details 를 변주하기 위해서,<br>
+bipartite representation 에 DDIM with η &gt; 0 for sampling decoder 를 적용합니다.</p></li>
+<li><p>η = 0 일 때, decoder 는 deterministic 해지고 x 자체를 복원해냅니다.</p></li>
+<li><p>η 가 커질수록, sampling steps 에는 stochasticity 가 생기고,<br>
+원본 이미지 x 근처에서 perceptually “centereed” 된 variations 를 만들어낼 것입니다.</p></li>
+<li><p>η 를 키우면, 우리는 CLIP image embedding 에 어떤 정보가 존재하고 어떤 정보가 유실되었는지 탐색 가능합니다.<br>
+<strong>→ 즉, CLIP latent space 를 탐색해낼 수 있는거죠 !</strong></p></li>
+</ul>
+</li>
+</ol>
+  <br>
+<ol class="arabic" start="2">
+<li><p><strong>Interpolations</strong></p>
+<figure class="align-default" id="id17">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_181.png"><img alt="img_18" class="bg-primary mb-1" src="../../_images/img_181.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 118 </span><span class="caption-text">Variations between two images by interpolating their CLIP image embedding and then decoding with a diffusion model.</span><a class="headerlink" href="#id17" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p>이런 것도 됩니다.<br>
+input image 두 장의 CLIP image embeddings 를 interpolation 해서 Decoder 에 준다면,<br>
+interpolated image 를 생성할 수 있습니다.</p></li>
+</ul>
+</li>
+</ol>
+  <br>
+<ol class="arabic" start="3">
+<li><p><strong>Text Diffs</strong></p>
+<figure class="align-default" id="id18">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_191.png"><img alt="img_19" class="bg-primary mb-1" src="../../_images/img_191.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 119 </span><span class="caption-text">Text diffs applied to images by interpolating between their CLIP image embeddings and a normalised difference of the CLIP text embeddings produced from the two descriptions.</span><a class="headerlink" href="#id18" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul>
+<li><p><strong>어떤 이미지와 그 캡션이 주어져있을 때,<br>
+그 이미지를 우리가 원하는 target text prompt 에 맞게 조작할 수도 있습니다.</strong></p></li>
+<li><p><strong>Method</strong></p>
+<ul class="simple">
+<li><p><strong>z_t0 = current CLIP text embedding</strong> 이고,</p></li>
+<li><p><strong>z_t = target CLIP text embedding</strong> 이라면,</p></li>
+</ul>
+<figure class="align-default" id="id19">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_19_2.png"><img alt="img_19_2" class="bg-primary mb-1" src="../../_images/img_19_2.png" style="width: 400px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 120 </span><span class="caption-text">text diff method</span><a class="headerlink" href="#id19" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</li>
+<li><p>주어진 이미지의 <strong>CLIP image embdding z_i</strong> 를 <br>
+바로 이 <strong>text diff vector 와 interpolate 해서 Decoding</strong> 하면 이미지가 조작됩니다.</p></li>
+</ul>
+</li>
+</ol>
+</li>
+</ul>
+  <br>
+<ul>
+<li><p><strong>typographic attaks 에 대해서, Robust 합니다.</strong></p>
+<ul>
+<li><p><strong>typographic attacks</strong> : 이미지 내 사물 위에, 글씨가 쓰여 있는 경우입니다.</p></li>
+<li><p>Multimodal 로 학습한 CLIP 은 텍스트에 있는 정보를 더 많이 활용해<br>
+사물을 판단하는 경향이 있습니다.</p>
+<ol class="arabic simple">
+<li><p>unCLIP 의 Decoder 모델에 “iPod” 텍스트 종이가 붙은 사과를 보고 분류를 수행해보았습니다.</p></li>
+<li><p>역시, “Granny Smith” 의 예측 확률을 거의 0 에 가깝다고 판단했습니다.</p></li>
+<li><p>그럼에도 불구하고, 사과의 사진으로 recover 해냅니다.</p></li>
+</ol>
+<figure class="align-default" id="id20">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_201.png"><img alt="img_20" class="bg-primary mb-1" src="../../_images/img_201.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 121 </span><span class="caption-text">Variations of images featuring typographic attacks</span><a class="headerlink" href="#id20" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>이처럼 DALLE2 는 typographic attacks 에 더욱 robust 합니다.</p>
+</li>
+</ul>
+</li>
+</ul>
+</li>
+</ul>
+<br>
+<ul>
+<li><p><strong>이 모델, 단점은 없나요?</strong></p>
+  <br>
+<ol class="arabic simple">
+<li><p><strong>객체(cubes)와 그들의 속성(colors) 을 매칭시키는 능력이 떨어집니다.</strong></p></li>
+</ol>
+<figure class="align-default" id="id21">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_211.png"><img alt="img_21" class="bg-primary mb-1" src="../../_images/img_211.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 122 </span><span class="caption-text">Samples from unCLIP and GLIDE for the prompt “a red cube on top of a blue cube”.</span><a class="headerlink" href="#id21" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>위 그림처럼, 파란 큐브 위에 빨간 큐브를 그려달라고 했을 때, <br>
+DALLE2 는 <strong>아래의 큐브와 위의 큐브에 각각 어떤 색상 (attributes) 를 부여해야할지</strong> 헷갈려합니다.</p>
+  <br>
+<ol class="arabic simple" start="2">
+<li><p><strong>텍스트를 일관성있게 생성하는 능력이 떨어집니다</strong></p></li>
+</ol>
+<figure class="align-default" id="id22">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_221.png"><img alt="img_22" class="bg-primary mb-1" src="../../_images/img_221.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 123 </span><span class="caption-text">Samples from unCLIP for the prompt, “A sign that says deep learning.”</span><a class="headerlink" href="#id22" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>물론 이것은 DALLE2 만의 문제는 아닙니다.<br>
+많은 text-to-image models 가 어려워하는 문제입니다.</p>
+  <br>
+<ol class="arabic simple" start="3">
+<li><p><strong>복잡한 상황에서 디테일을 묘사하는 능력이 떨어집니다</strong></p></li>
+</ol>
+<figure class="align-default" id="id23">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_231.png"><img alt="img_23" class="bg-primary mb-1" src="../../_images/img_231.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 124 </span><span class="caption-text">unCLIP samples show low levels of detail for some complex scenes.</span><a class="headerlink" href="#id23" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>복잡한 네온 사인들의 디테일들이 좀 떨어지는 것을 확인하실 수 있습니다.</p>
+</li>
+</ul>
+<br>
+<ul>
+<li><p><strong>Method - Training</strong></p>
+<ul class="simple">
+<li><p>본 논문의 Method 에서는, unCLIP 모델의 아키텍쳐에 대한 수학적 justify 를 하고 있습니다.</p></li>
+<li><p>Training 데이터셋의 이미지를 x 라 합시다.</p></li>
+<li><p>그에 상응하는 text captions 을 y 라 합시다.</p></li>
+<li><p>각각에 대한 embeddings 인 Z_i, Z_t 를 기존의 CLIP 으로  생성합니다.</p>
+<ul>
+<li><p>image <strong>x —CLIP Image encoder—&gt; Z_i</strong> image embeddings</p></li>
+<li><p>text caption <strong>y —CLIP text encoder—&gt; Z_t</strong> text embeddings</p></li>
+</ul>
+</li>
+</ul>
+  <br>
+<ul>
+<li><p>저자의 주장</p>
+<ul>
+<li><p>unCLIP 으로, text caption y 로부터 image x 를 샘플링할 수 있다고 합니다.</p>
+<figure class="align-default" id="id24">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_241.png"><img alt="img_24" class="bg-primary mb-1" src="../../_images/img_241.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 125 </span><span class="caption-text">P(x|y) equation.</span><a class="headerlink" href="#id24" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</li>
+<li><p><em><strong>The first equality holds because z_i is a deterministic function of x.</strong></em></p></li>
+<li><p><em><strong>The second equality holds because of the chain rule.</strong></em></p></li>
+</ul>
+</li>
+</ul>
+  <br>
+<ul>
+<li><p><strong>포스팅을 위한 부가 설명</strong></p>
+<ul>
+<li><p>z_t 도 y 의 deterministic function 이므로, 다음과 같이 쓸 수 있죠.</p>
+<div class="math notranslate nohighlight">
+\[
+            P(x|y) = P(x, z_i|y, z_t) = P(x|z_i, y, z_t)P(z_i|y, z_t)
+            \]</div>
+</li>
+<li><p>즉 위 공식을 풀어서 해설해보면 다음과 같습니다.<br>
+Prior 를 사용해 Z_t 로부터 Z_i 를 샘플링하고,<br>
+Decoder 를 사용해 x 를 샘플링함으로써<br>
+True conditional distribution 인 P(x|y) 샘플링이 가능해지는 것입니다.</p></li>
+</ul>
+</li>
+</ul>
+</li>
+</ul>
+<br>
+<ul>
+<li><p><strong>DALL-E 2 Bias</strong></p>
+  <br>
+<p>개인적으로 DALLe2 와 같은 모델에 Bias 는 없는지 궁금해서 추가적으로 공부해봤습니다.<br>
+DALLE2 에 Bias 가 있는지,<br>
+Bias 가 있다면 해소하기 위해 어떤 노력을 하고있는지,<br>
+Bias 는 대체 어떻게 정량적으로 평가할 수 있는지 조사해봤습니다.<br></p>
+<p>결과부터 말씀드리면, DALLE2 처럼, 웹크롤링 데이터를 학습한 모델은 Bias 가 존재한다고 합니다.<br>
+이런 Bias 를 해소하기 위해서 OpenAI 는 어떤 노력을 하고있는지부터 살펴볼까요?</p>
+<p><a class="github reference external" href="https://github.com/openai/dalle-2-preview/blob/main/system-card.md">openai/dalle-2-preview</a></p>
+<ul class="simple">
+<li><p><strong>현재 OpenAI 가 DALL-E 2 의 Safety 를 위해 하고 있는 노력</strong></p>
+<ol class="arabic simple">
+<li><p>학습 데이터에서 violent, hate, or adult images 를 제거함으로써<br>
+이러한 이미지들에 DALL-E 2 가 노출되는 시간을 최소화했다고 합니다.</p></li>
+<li><p>Safety policies 를 위반한 text prompts 혹은 생성된 images 를 자정하는 시스템을 보유하고 있다고 합니다.</p></li>
+<li><p>신뢰할 수 있는 전문가들과 DALL-E 2 에 대한 사전 검토를 진행했다고 합니다.</p></li>
+</ol>
+</li>
+</ul>
+  <br>
+<ul>
+<li><p><strong>DALL-EVAL : 이미지 생성형 AI 의 Bias 를 평가하는 방법 소개</strong></p>
+<p>DALLE 와 같은 Text-to-Image 생성형 모델을 정량적으로 평가하는 기법이 있습니다.<br>
+바로 DALL-EVAL 입니다.</p>
+<p><a class="reference external" href="https://arxiv.org/pdf/2202.04053.pdf">https://arxiv.org/pdf/2202.04053.pdf</a></p>
+<p><a class="github reference external" href="https://github.com/j-min/DallEval">j-min/DallEval</a></p>
+<ul>
+<li><p><strong>Contribution</strong></p>
+<ul class="simple">
+<li><p>Text to Image Generation models 의 시각적 추론 능력 3가지를 평가하기 위한 데이터셋 제공합니다.</p></li>
+<li><p>최근의 모델들이 object recognition skill 은 상대적으로 뛰어나지만,<br>
+object counting 및 spaial relation 이해 능력은 떨어짐을 발견했습니다.</p></li>
+<li><p>Text to Image Generation models 에 존재하는 <br>
+<strong>gender 및 skin tone biases 를 측정하는 evaluation metrics 와 그 분석 결과를 제시한 최초의 논문</strong> 입니다.</p>
+<ul>
+<li><p>web 에서 image-text pairs 를 학습한 현재 T2I 생성 모델들은<br>
+특정한 gender/skin tone biases 를 학습했음을 보여주었습니다.</p></li>
+</ul>
+</li>
+</ul>
+<figure class="align-default" id="id25">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_261.png"><img alt="img_26" class="bg-primary mb-1" src="../../_images/img_261.png" style="width: 500px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 126 </span><span class="caption-text">Overview of DALL-EVAL evaluation process.</span><a class="headerlink" href="#id25" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</li>
+<li><p><strong>Social Bias Evaluation 측정 방법 (Sec. 4.2.1 참고)</strong></p>
+<figure class="align-default" id="id26">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_271.png"><img alt="img_27" class="bg-primary mb-1" src="../../_images/img_271.png" style="width: 500px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 127 </span><span class="caption-text">Overview of DALL-EVAL’s social bias evaluation process.</span><a class="headerlink" href="#id26" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>DALL-EVAL 의 자세한 social bias evaluation process 는 다음과 같습니다.</p>
+<ol class="arabic simple">
+<li><p>우선 <strong>Diagnostic prompts 로 이미지들을 생성합니다.</strong></p>
+<ol class="arabic simple">
+<li><p>ex) “a person who works as a nurse”</p></li>
+<li><p>총 252개의 Diagnostic prompts 제공</p></li>
+</ol>
+</li>
+<li><p><strong>Gender, Skin tone, Attributes 를 생성된 이미지로부터 탐지합니다.</strong></p>
+<ol class="arabic simple">
+<li><p>using automated detection models and verify the reliability of detection models with human evaluation</p></li>
+<li><p><strong>Gender</strong></p>
+<ol class="arabic simple">
+<li><p>BLIP-2 라는 모델에 생성된 영상을 주면서 영상 내 사람의 성별을 맞추게 합니다.<br>
+BLIP-2 의 답변을 기반으로 Gender Bias 측정합니다.</p></li>
+</ol>
+</li>
+<li><p><strong>Skin tone</strong></p>
+<ol class="arabic simple">
+<li><p>신경망으로 facial landmark 를 추출하고, illumination 을 측정합니다.</p></li>
+</ol>
+</li>
+<li><p><strong>Attributes</strong></p>
+<ol class="arabic simple">
+<li><p>BLIP-2 라는 모델에 생성된 영상을 주면서 영상 내 사람의 복장을 맞추게 합니다.<br>
+BLIP-2 의 답변을 기반으로 Attributes Bias 측정합니다.</p></li>
+</ol>
+</li>
+</ol>
+</li>
+<li><p>탐지된 Gender, Skin tone, Attributes 가<br>
+unbiased uniform distribution 으로부터 얼마나 skewed 되어있는지 측정합니다.</p></li>
+</ol>
+</li>
+</ul>
+  <br>
+<ul>
+<li><p><strong>실험 결과</strong></p>
+<figure class="align-default" id="id27">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_281.png"><img alt="img_28" class="bg-primary mb-1" src="../../_images/img_281.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 128 </span><span class="caption-text">Gender, skin tone, and attribute detection results with automated and expert human evaluation.</span><a class="headerlink" href="#id27" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<figure class="align-default" id="id28">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_291.png"><img alt="img_29" class="bg-primary mb-1" src="../../_images/img_291.png" style="width: 500px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 129 </span><span class="caption-text">Per-profession examples and average gender bias or average skin tone bias of images.</span><a class="headerlink" href="#id28" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<figure class="align-default" id="id29">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_301.png"><img alt="img_30" class="bg-primary mb-1" src="../../_images/img_301.png" style="width: 500px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 130 </span><span class="caption-text">Comparison of overall gender and skin tone bias of each model.</span><a class="headerlink" href="#id29" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</li>
+</ul>
+<p>위 실험 결과와 같이, DALL-EVAL 은 Text-to-Image models 를 정량적으로 평가하는데에 성공했습니다.<br>
+Satble Diffusion 처럼 웹크롤링을 활용해 데이터를 학습한 모델은 Bias 가 존재했습니다.<br>
+이처럼 생성형 AI 의 Bias 를 측정하기 위한 다양한 노력이 지속되고 있습니다.<br>
+미래에는 생성형 AI 가 더 안전하게 활용될 수 있기를 기대합니다.</p>
+</li>
+</ul>
+</li>
+</ul>
+</section>
+
+    <script type="text/x-thebe-config">
+    {
+        requestKernel: true,
+        binderOptions: {
+            repo: "binder-examples/jupyter-stacks-datascience",
+            ref: "master",
+        },
+        codeMirrorConfig: {
+            theme: "abcdef",
+            mode: "python"
+        },
+        kernelOptions: {
+            name: "python3",
+            path: "./docs\review"
+        },
+        predefinedOutput: true
+    }
+    </script>
+    <script>kernelName = 'python3'</script>
+
+                </article>
+              
+
+              
+              
+              
+              
+                <footer class="prev-next-footer d-print-none">
+                  
+<div class="prev-next-area">
+    <a class="left-prev"
+       href="dalle.html"
+       title="previous page">
+      <i class="fa-solid fa-angle-left"></i>
+      <div class="prev-next-info">
+        <p class="prev-next-subtitle">previous</p>
+        <p class="prev-next-title">DALL-E</p>
+      </div>
+    </a>
+    <a class="right-next"
+       href="dreambooth.html"
+       title="next page">
+      <div class="prev-next-info">
+        <p class="prev-next-subtitle">next</p>
+        <p class="prev-next-title">DreamBooth</p>
+      </div>
+      <i class="fa-solid fa-angle-right"></i>
+    </a>
+</div>
+                </footer>
+              
+            </div>
+            
+            
+              
+            
+          </div>
+          <footer class="bd-footer-content">
+            
+<div class="bd-footer-content__inner container">
+  
+  <div class="footer-item">
+    
+<p class="component-author">
+By PseudoLab
+</p>
+
+  </div>
+  
+  <div class="footer-item">
+    
+
+  <p class="copyright">
+    
+      © Copyright 2022.
+      <br/>
+    
+  </p>
+
+  </div>
+  
+  <div class="footer-item">
+    
+  </div>
+  
+  <div class="footer-item">
+    
+  </div>
+  
+</div>
+          </footer>
+        
+
+      </main>
+    </div>
+  </div>
+  
+  <!-- Scripts loaded after <body> so the DOM is not blocked -->
+  <script src="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b"></script>
+<script src="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b"></script>
+
+  <footer class="bd-footer">
+  </footer>
+  </body>
 </html>
\ No newline at end of file
diff --git a/docs/review/DDIM.html b/docs/review/DDIM.html
old mode 100644
new mode 100755
index 38a69f89..65175c6d
--- a/docs/review/DDIM.html
+++ b/docs/review/DDIM.html
@@ -1,1033 +1,1053 @@
-
-
-<!DOCTYPE html>
-
-
-<html lang="en" data-content_root="" >
-
-  <head>
-    <meta charset="utf-8" />
-    <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.18.1: http://docutils.sourceforge.net/" />
-
-    <title>DDIM &#8212; Text-to-Image Generation-feat-Diffusion</title>
-  
-  
-  
-  <script data-cfasync="false">
-    document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
-    document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
-  </script>
-  
-  <!-- Loaded before other Sphinx assets -->
-  <link href="../../_static/styles/theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-<link href="../../_static/styles/bootstrap.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-<link href="../../_static/styles/pydata-sphinx-theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-
-  
-  <link href="../../_static/vendor/fontawesome/6.5.2/css/all.min.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-  <link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.woff2" />
-<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.woff2" />
-<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.woff2" />
-
-    <link rel="stylesheet" type="text/css" href="../../_static/pygments.css" />
-    <link rel="stylesheet" href="../../_static/styles/sphinx-book-theme.css?digest=14f4ca6b54d191a8c7657f6c759bf11a5fb86285" type="text/css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/togglebutton.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/mystnb.4510f1fc1dee50b3e5859aac5469c37c29e427902b24a333a5f9fcb2f0b3ac41.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/sphinx-thebe.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/design-style.4045f2051d55cab465a707391d5b2007.min.css" />
-  
-  <!-- Pre-loaded scripts that we'll load fully later -->
-  <link rel="preload" as="script" href="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b" />
-<link rel="preload" as="script" href="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b" />
-  <script src="../../_static/vendor/fontawesome/6.5.2/js/all.min.js?digest=dfe6caa3a7d634c4db9b"></script>
-
-    <script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
-    <script src="../../_static/jquery.js"></script>
-    <script src="../../_static/underscore.js"></script>
-    <script src="../../_static/_sphinx_javascript_frameworks_compat.js"></script>
-    <script src="../../_static/doctools.js"></script>
-    <script src="../../_static/clipboard.min.js"></script>
-    <script src="../../_static/copybutton.js"></script>
-    <script src="../../_static/scripts/sphinx-book-theme.js?digest=5a5c038af52cf7bc1a1ec88eea08e6366ee68824"></script>
-    <script>let toggleHintShow = 'Click to show';</script>
-    <script>let toggleHintHide = 'Click to hide';</script>
-    <script>let toggleOpenOnPrint = 'true';</script>
-    <script src="../../_static/togglebutton.js"></script>
-    <script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
-    <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
-    <script src="../../_static/design-tabs.js"></script>
-    <script>const THEBE_JS_URL = "https://unpkg.com/thebe@0.8.2/lib/index.js"
-const thebe_selector = ".thebe,.cell"
-const thebe_selector_input = "pre"
-const thebe_selector_output = ".output, .cell_output"
-</script>
-    <script async="async" src="../../_static/sphinx-thebe.js"></script>
-    <script>window.MathJax = {"options": {"processHtmlClass": "tex2jax_process|mathjax_process|math|output_area"}}</script>
-    <script defer="defer" src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
-    <script>DOCUMENTATION_OPTIONS.pagename = 'docs/review/DDIM';</script>
-    <link rel="shortcut icon" href="../../_static/PseudoLab_logo.png"/>
-    <link rel="index" title="Index" href="../../genindex.html" />
-    <link rel="search" title="Search" href="../../search.html" />
-    <link rel="next" title="A Study on the Evaluation of Generative Models" href="A_Study_on_the_Evaluation_of_Generative_Models.html" />
-    <link rel="prev" title="DDPM" href="DDPM.html" />
-  <meta name="viewport" content="width=device-width, initial-scale=1"/>
-  <meta name="docsearch:language" content="en"/>
-  </head>
-  
-  
-  <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
-
-  
-  
-  <div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
-  
-  <div id="pst-scroll-pixel-helper"></div>
-  
-  <button type="button" class="btn rounded-pill" id="pst-back-to-top">
-    <i class="fa-solid fa-arrow-up"></i>Back to top</button>
-
-  
-  <input type="checkbox"
-          class="sidebar-toggle"
-          id="pst-primary-sidebar-checkbox"/>
-  <label class="overlay overlay-primary" for="pst-primary-sidebar-checkbox"></label>
-  
-  <input type="checkbox"
-          class="sidebar-toggle"
-          id="pst-secondary-sidebar-checkbox"/>
-  <label class="overlay overlay-secondary" for="pst-secondary-sidebar-checkbox"></label>
-  
-  <div class="search-button__wrapper">
-    <div class="search-button__overlay"></div>
-    <div class="search-button__search-container">
-<form class="bd-search d-flex align-items-center"
-      action="../../search.html"
-      method="get">
-  <i class="fa-solid fa-magnifying-glass"></i>
-  <input type="search"
-         class="form-control"
-         name="q"
-         id="search-input"
-         placeholder="Search this book..."
-         aria-label="Search this book..."
-         autocomplete="off"
-         autocorrect="off"
-         autocapitalize="off"
-         spellcheck="false"/>
-  <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
-</form></div>
-  </div>
-
-  <div class="pst-async-banner-revealer d-none">
-  <aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
-</div>
-
-  
-    <header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
-    </header>
-  
-
-  <div class="bd-container">
-    <div class="bd-container__inner bd-page-width">
-      
-      
-      
-      <div class="bd-sidebar-primary bd-sidebar">
-        
-
-  
-  <div class="sidebar-header-items sidebar-primary__section">
-    
-    
-    
-    
-  </div>
-  
-    <div class="sidebar-primary-items__start sidebar-primary__section">
-        <div class="sidebar-primary-item">
-
-  
-    
-  
-
-<a class="navbar-brand logo" href="../../intro.html">
-  
-  
-  
-  
-  
-    
-    
-      
-    
-    
-    <img src="../../_static/PseudoLab_logo.png" class="logo__image only-light" alt="Text-to-Image Generation-feat-Diffusion - Home"/>
-    <script>document.write(`<img src="../../_static/PseudoLab_logo.png" class="logo__image only-dark" alt="Text-to-Image Generation-feat-Diffusion - Home"/>`);</script>
-  
-  
-</a></div>
-        <div class="sidebar-primary-item"><nav class="bd-links" id="bd-docs-nav" aria-label="Main">
-    <div class="bd-toc-item navbar-nav active">
-        
-        <ul class="nav bd-sidenav bd-sidenav__home-link">
-            <li class="toctree-l1">
-                <a class="reference internal" href="../../intro.html">
-                    Welcome to PseudoDiffusers!!
-                </a>
-            </li>
-        </ul>
-        <p aria-level="2" class="caption" role="heading"><span class="caption-text">Preliminary Works</span></p>
-<ul class="current nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="vae.html">VAE</a></li>
-<li class="toctree-l1"><a class="reference internal" href="gan.html">GAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DDPM.html">DDPM</a></li>
-
-
-
-
-<li class="toctree-l1 current active"><a class="current reference internal" href="#">DDIM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="A_Study_on_the_Evaluation_of_Generative_Models.html">A Study on the Evaluation of Generative Models</a></li>
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Image Generation</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="cycleGAN.html">CycleGAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="StyleGAN.html">StyleGAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="diffusion_beats_GANs.html">Diffusion Models Beat GANs on Image Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="dalle.html">DALL-E</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DALLE2.html">DALL-E 2</a></li>
-<li class="toctree-l1"><a class="reference internal" href="dreambooth.html">DreamBooth</a></li>
-<li class="toctree-l1"><a class="reference internal" href="ControlNet.html">ControlNet</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Latent_Diffusion_Model.html">Introduction</a></li>
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="Textual_Inversion.html">Textual Inversion</a></li>
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="CustomDiffusion.html">Custom Diffusion</a></li>
-<li class="toctree-l1"><a class="reference internal" href="LoRA.html">LoRA</a></li>
-
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="I-DDPM.html">I-DDPM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="StyO.html">StyO</a></li>
-<li class="toctree-l1"><a class="reference internal" href="imagen.html">Imagen</a></li>
-<li class="toctree-l1"><a class="reference internal" href="imagen_editor.html">Imagen Editor</a></li>
-<li class="toctree-l1"><a class="reference internal" href="SDEdit.html">SDEdit</a></li>
-<li class="toctree-l1"><a class="reference internal" href="SDXL.html">SDXL</a></li>
-<li class="toctree-l1"><a class="reference internal" href="t2i_adapter.html">T2I-Adapter</a></li>
-<li class="toctree-l1"><a class="reference internal" href="IP_Adapter.html">IP-Adapter</a></li>
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="HyperDreamBooth.html">HyperDreamBooth</a></li>
-<li class="toctree-l1"><a class="reference internal" href="CM3leon.html">CM3leon</a></li>
-
-<li class="toctree-l1"><a class="reference internal" href="Synthetic_Data_from_Diffusion_Models_Improves_ImageNet_Classification.html">Synthetic Data from Diffusion Models Improves ImageNet Classification</a></li>
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="GLIDE.html">GLIDE</a></li>
-<li class="toctree-l1"><a class="reference internal" href="BBDM.html">BBDM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Your_Diffusion_Model_is_Secretly_a_Zero_Shot_Classifier.html">Your Diffusion Model is Secretly a Zero-Shot Classifier</a></li>
-<li class="toctree-l1"><a class="reference internal" href="progressive_distillation.html">Progressive Distillation for Fast Sampling of Diffusion Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="ConceptLab.html">ConceptLab</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Diffusion_models_already_have_a_Semantic_Latent_Space.html">Diffusion Models already have a Semantic Latent Space</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Muse.html">Muse</a></li>
-
-
-<li class="toctree-l1"><a class="reference internal" href="GIGAGAN.html">Scaling up GANs for Text-to-Image Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="consistency_models.html">Consistency Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="latent_consistency_models.html">Latent Consistency Models</a></li>
-
-<li class="toctree-l1"><a class="reference internal" href="LLM_grounded_Diffusion.html">LLM Grounded Diffusion</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DiT.html">DiT</a></li>
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="one-step-image-translation.html">One-Step Image Translation with Text-to-Image Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="LCM-LoRA.html">LCM-LoRA: A Universal Stable-Diffusion Acceleration Module</a></li>
-
-
-<li class="toctree-l1"><a class="reference internal" href="MimicBrush.html">MimicBrush: Zero-shot Image Editing with Reference Imitation</a></li>
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Video Generation</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="Make_A_Video.html">Make A Video</a></li>
-<li class="toctree-l1"><a class="reference internal" href="VideoLDM.html">VideoLDM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="AnimateDiff.html">AnimateDiff</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Animate_Anyone.html">Animate Anyone</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreaMoving.html">DreaMoving</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreamPose.html">DreamPose: Fashion Image-to-Video Synthesis via Stable Diffusion</a></li>
-
-
-
-
-
-
-
-
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">3D Generation</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="NeRF.html">NeRF : Representing Scenes as Neural Radiance Fields for View Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="3DGS.html">3D Gaussian Splatting for Real-Time Radiance Field Rendering</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Point_E.html">Point-E: A System for Generating 3D Point Clouds from Complex Prompts (Arxiv 2022)</a></li>
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="Shap-E.html">Shap-E</a></li>
-
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="DreamFusion.html"><strong>DreamFusion</strong></a></li>
-<li class="toctree-l1"><a class="reference internal" href="magic-3d.html">Magic3D</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreamBooth3D.html">Dream Booth 3D</a></li>
-
-
-
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Experiments</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="../experiments/js_exp.html">Synthetic Data with Stable Diffusion for Foliar Disease Classification</a></li>
-<li class="toctree-l1"><a class="reference internal" href="../experiments/swjo_exp.html">Training DreamBooth on Naver Webtoon Face Dataset</a></li>
-</ul>
-
-    </div>
-</nav></div>
-    </div>
-  
-  
-  <div class="sidebar-primary-items__end sidebar-primary__section">
-  </div>
-  
-  <div id="rtd-footer-container"></div>
-
-
-      </div>
-      
-      <main id="main-content" class="bd-main" role="main">
-        
-        
-
-<div class="sbt-scroll-pixel-helper"></div>
-
-          <div class="bd-content">
-            <div class="bd-article-container">
-              
-              <div class="bd-header-article d-print-none">
-<div class="header-article-items header-article__inner">
-  
-    <div class="header-article-items__start">
-      
-        <div class="header-article-item"><label class="sidebar-toggle primary-toggle btn btn-sm" for="__primary" title="Toggle primary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
-  <span class="fa-solid fa-bars"></span>
-</label></div>
-      
-    </div>
-  
-  
-    <div class="header-article-items__end">
-      
-        <div class="header-article-item">
-
-<div class="article-header-buttons">
-
-
-
-
-
-<div class="dropdown dropdown-source-buttons">
-  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Source repositories">
-    <i class="fab fa-github"></i>
-  </button>
-  <ul class="dropdown-menu">
-      
-      
-      
-      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion" target="_blank"
-   class="btn btn-sm btn-source-repository-button dropdown-item"
-   title="Source repository"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fab fa-github"></i>
-  </span>
-<span class="btn__text-container">Repository</span>
-</a>
-</li>
-      
-      
-      
-      
-      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion/issues/new?title=Issue%20on%20page%20%2Fdocs/review/DDIM.html&body=Your%20issue%20content%20here." target="_blank"
-   class="btn btn-sm btn-source-issues-button dropdown-item"
-   title="Open an issue"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-lightbulb"></i>
-  </span>
-<span class="btn__text-container">Open issue</span>
-</a>
-</li>
-      
-  </ul>
-</div>
-
-
-
-
-
-
-<div class="dropdown dropdown-download-buttons">
-  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Download this page">
-    <i class="fas fa-download"></i>
-  </button>
-  <ul class="dropdown-menu">
-      
-      
-      
-      <li><a href="../../_sources/docs/review/DDIM.md" target="_blank"
-   class="btn btn-sm btn-download-source-button dropdown-item"
-   title="Download source file"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-file"></i>
-  </span>
-<span class="btn__text-container">.md</span>
-</a>
-</li>
-      
-      
-      
-      
-      <li>
-<button onclick="window.print()"
-  class="btn btn-sm btn-download-pdf-button dropdown-item"
-  title="Print to PDF"
-  data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-file-pdf"></i>
-  </span>
-<span class="btn__text-container">.pdf</span>
-</button>
-</li>
-      
-  </ul>
-</div>
-
-
-
-
-<button onclick="toggleFullScreen()"
-  class="btn btn-sm btn-fullscreen-button"
-  title="Fullscreen mode"
-  data-bs-placement="bottom" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-expand"></i>
-  </span>
-
-</button>
-
-
-
-<script>
-document.write(`
-  <button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light"></i>
-    <i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark"></i>
-    <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"></i>
-  </button>
-`);
-</script>
-
-
-<script>
-document.write(`
-  <button class="btn btn-sm pst-navbar-icon search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <i class="fa-solid fa-magnifying-glass fa-lg"></i>
-  </button>
-`);
-</script>
-<label class="sidebar-toggle secondary-toggle btn btn-sm" for="__secondary"title="Toggle secondary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <span class="fa-solid fa-list"></span>
-</label>
-</div></div>
-      
-    </div>
-  
-</div>
-</div>
-              
-              
-
-<div id="jb-print-docs-body" class="onlyprint">
-    <h1>DDIM</h1>
-    <!-- Table of contents -->
-    <div id="print-main-content">
-        <div id="jb-print-toc">
-            
-            <div>
-                <h2> Contents </h2>
-            </div>
-            <nav aria-label="Page">
-                <ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#abstract">Abstract</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">1. Introduction</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#background">2. Background</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#ddpm">DDPM</a></li>
-</ul>
-</li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#variational-inference-for-non-markovian-forward-process">3. Variational Inference For Non-Markovian Forward Process</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#sampling-from-generalized-generative-process">4. Sampling From Generalized Generative Process</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#experiments">5. Experiments</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#code">6. Code</a></li>
-</ul>
-            </nav>
-        </div>
-    </div>
-</div>
-
-              
-                
-<div id="searchbox"></div>
-                <article class="bd-article">
-                  
-  <div class="admonition-information admonition">
-<p class="admonition-title">Information</p>
-<ul class="simple">
-<li><p><strong>Title:</strong> Denoising Diffusion Implicit Models (ICLR 2021)</p></li>
-<li><p><strong>Reference</strong></p>
-<ul>
-<li><p>Paper:  <a class="reference external" href="https://arxiv.org/abs/2010.02502">https://arxiv.org/abs/2010.02502</a></p></li>
-<li><p>Code: <a class="reference external" href="https://github.com/ermongroup/ddim">Official:</a></p></li>
-</ul>
-</li>
-<li><p><strong>Author:</strong> Seunghwan Ji</p></li>
-<li><p><strong>Last updated on April. 23, 2023</strong></p></li>
-</ul>
-</div>
-<section class="tex2jax_ignore mathjax_ignore" id="ddim">
-<h1>DDIM<a class="headerlink" href="#ddim" title="Permalink to this heading">#</a></h1>
-<section id="abstract">
-<h2>Abstract<a class="headerlink" href="#abstract" title="Permalink to this heading">#</a></h2>
-<ul class="simple">
-<li><p>DDPM의 단점인 Markov Process를 Non markovian process로 정의함으로서 Time efficient,  deterministic한 Sampling이 가능한 모델을 제안</p>
-<ul>
-<li><p>Deterministic vs Stochastic</p></li>
-</ul>
-</li>
-</ul>
-</section>
-<section id="introduction">
-<h2>1. Introduction<a class="headerlink" href="#introduction" title="Permalink to this heading">#</a></h2>
-<ul>
-<li><p>생성 분야에서 GAN(Generative Adversarial Network)이 뛰어난 성능을 보여주고있다.</p></li>
-<li><p>하지만, GAN은 학습 과정에서 불안정성을 보이는 경우가 많다.</p>
-<ul class="simple">
-<li><p>Generator와 Discriminator의 Imbalanced에 의한 Mode collapse</p></li>
-</ul>
-</li>
-<li><p>그러던 중, DDPM과 NCSN같은 adversarial training구조가 아닌 model들이 등장하였고 성공의 가능성을 보여주었다.</p></li>
-<li><p>이 중 DDPM은 Forward Process에서 Markov Process를 거치는데 이때문에 GAN에 비해 매우 느린 Performance를 보여준다.</p>
-<div class="pst-scrollable-table-container"><table class="table">
-<thead>
-<tr class="row-odd"><th class="head"><p>sampling</p></th>
-<th class="head"><p>GAN</p></th>
-<th class="head"><p>DDPM</p></th>
-</tr>
-</thead>
-<tbody>
-<tr class="row-even"><td><p>32 x 32 x 50k</p></td>
-<td><p>Less than 1 min</p></td>
-<td><p>About 20h</p></td>
-</tr>
-<tr class="row-odd"><td><p>256 x 256 x 50k</p></td>
-<td><p>-</p></td>
-<td><p>About 1000h</p></td>
-</tr>
-</tbody>
-</table>
-</div>
-</li>
-<li><p>DDIM은,</p>
-<ol class="arabic simple">
-<li><p>Markov Chain에 기반한 Process를 Non Markovian Process로 대체하였고</p></li>
-<li><p>결국 좀더 빠르고 비교적 우수한 Quality의 결과를 생성해내고, (with accelate)</p></li>
-<li><p>DDPM과는 다르게 Consistency한 학습 결과를 보여줌으로써 latent간의 Interpolation이 가능하다.</p>
-<ul class="simple">
-<li><p>Consistency?</p>
-<ul>
-<li><p>If x, y is equivalent, then f(x) = f(y)</p></li>
-</ul>
-</li>
-</ul>
-</li>
-</ol>
-</li>
-</ul>
-</section>
-<section id="background">
-<h2>2. Background<a class="headerlink" href="#background" title="Permalink to this heading">#</a></h2>
-<section id="ddpm">
-<h3>DDPM<a class="headerlink" href="#ddpm" title="Permalink to this heading">#</a></h3>
-<figure class="align-default" id="id1">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/00.png"><img alt="DDIM_00" class="bg-primary mb-1" src="../../_images/00.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 23 </span><span class="caption-text">DDPM &amp; DDIM Architectures</span><a class="headerlink" href="#id1" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p>DDPM의 Forward Process는 Markov process로 동작한다.</p>
-<ul>
-<li><p><em><strong>Markov process</strong></em></p>
-<ul>
-<li><p><em>미래 시점을 예측하기위해 현재 시점의 값을 이용한다.</em></p></li>
-<li><p><em>미래 시점은 과거 시점의 값에는 독립적인 값을 갖는다.</em></p></li>
-</ul>
-</li>
-</ul>
-</li>
-<li><p>time step T는 DDPM에서 성능을 좌지우지하는 중요한 Hyper parameter이다. (대충 T=1000 정도?)</p></li>
-<li><p>하지만, Sampling 과정에서 DDPM은 결국 T 번의 inference 과정을 모두 Sequential하게 거쳐야하고 이는 다른 Method(GAN 등)보다 현저히 느린 속도를 보이는 요소가 된다.</p></li>
-</ul>
-</section>
-</section>
-<section id="variational-inference-for-non-markovian-forward-process">
-<h2>3. Variational Inference For Non-Markovian Forward Process<a class="headerlink" href="#variational-inference-for-non-markovian-forward-process" title="Permalink to this heading">#</a></h2>
-<p><strong>3.1. Non-Markovian Forward Processes</strong></p>
-<ul class="simple">
-<li><p>Inference’s Distribution 정의</p></li>
-</ul>
-<figure class="align-default" id="id2">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/01.png"><img alt="DDIM_01" class="bg-primary mb-1" src="../../_images/01.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 24 </span><span class="caption-text">Equation 1</span><a class="headerlink" href="#id2" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<figure class="align-default" id="id3">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/02.png"><img alt="DDIM_02" class="bg-primary mb-1" src="../../_images/02.png" style="width: 600px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 25 </span><span class="caption-text">Equation 2</span><a class="headerlink" href="#id3" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p>t 시점의 값을 구하기위해 <span class="math notranslate nohighlight">\(X_{t-1}\)</span>의 값과 <span class="math notranslate nohighlight">\(X_{0}\)</span>의 값을 참조</p>
-<ul>
-<li><p>DDPM은? <span class="math notranslate nohighlight">\(X_{t-1}\)</span>의 값만을 참조</p></li>
-<li><p>σ는 Forward process의 stochastic한 정도를 조절하는 hyper parameter (chap 4 참조)</p></li>
-</ul>
-</li>
-</ul>
-<p><strong>3.2. Generative Process And Unified Variational Inference Objective (Reverse Process)</strong></p>
-<figure class="align-default" id="id4">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/03.png"><img alt="DDIM_00" class="bg-primary mb-1" src="../../_images/03.png" style="width: 600px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 26 </span><span class="caption-text">Equation 3</span><a class="headerlink" href="#id4" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<figure class="align-default" id="id5">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/04.png"><img alt="DDIM_00" class="bg-primary mb-1" src="../../_images/04.png" style="width: 600px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 27 </span><span class="caption-text">Equation 4</span><a class="headerlink" href="#id5" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ol class="arabic simple">
-<li><p><span class="math notranslate nohighlight">\(X_{t}\)</span>을 통해 <span class="math notranslate nohighlight">\(X_{0}\)</span>의 값을 예측 (trainable)</p></li>
-<li><p>위의 식을 통해 <span class="math notranslate nohighlight">\(X_{t}\)</span>와, <span class="math notranslate nohighlight">\(X_{0}\)</span>의 값을 이용해 <span class="math notranslate nohighlight">\(X_{t-1}\)</span>을 샘플링</p></li>
-</ol>
-<p>실제로는</p>
-<ul>
-<li><p>noise(ε)와 <span class="math notranslate nohighlight">\(X_{0}\)</span>, <span class="math notranslate nohighlight">\(X_{t}\)</span>의 관계</p>
-<figure class="align-default" id="id6">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/05.png"><img alt="DDIM_05" class="bg-primary mb-1" src="../../_images/05.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 28 </span><span class="caption-text">Equation 5</span><a class="headerlink" href="#id6" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</li>
-</ul>
-<ol class="arabic simple">
-<li><p><span class="math notranslate nohighlight">\(X_{t}\)</span>을 통해 <span class="math notranslate nohighlight">\(X_{0}\)</span>을 예측</p>
-<ol class="arabic simple">
-<li><p>t 시점의 이미지를 통해 t 시점의 noise를 예측</p></li>
-<li><p>t 시점의 이미지와 t 시점의 noise를 통해 0 시점의 이미지를 계산 (fixed)</p></li>
-</ol>
-</li>
-<li><p>위의 식을 통해 t시점의 값과 예측한 0 시점의 값을 이용해 t-1 시점의 값을 샘플링</p></li>
-</ol>
-</section>
-<section id="sampling-from-generalized-generative-process">
-<h2>4. Sampling From Generalized Generative Process<a class="headerlink" href="#sampling-from-generalized-generative-process" title="Permalink to this heading">#</a></h2>
-<p>4.1. Denoising Diffusion Implicit Models</p>
-<ol class="arabic simple">
-<li><p>If σ → 0</p></li>
-</ol>
-<figure class="align-default" id="id7">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/06.png"><img alt="DDIM_06" class="bg-primary mb-1" src="../../_images/06.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 29 </span><span class="caption-text">Equation 6</span><a class="headerlink" href="#id7" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ol class="arabic simple">
-<li><p>σ가 특정 값을 가질 때 DDPM의 generative process의 수식과 동일하다.</p></li>
-</ol>
-<figure class="align-default" id="id8">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/07.png"><img alt="DDIM_07" class="bg-primary mb-1" src="../../_images/07.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 30 </span><span class="caption-text">Explanation of σ</span><a class="headerlink" href="#id8" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>4.2. Accelerated Generation Processes</p>
-<figure class="align-default" id="id9">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/08.png"><img alt="DDIM_08" class="bg-primary mb-1" src="../../_images/08.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 31 </span><span class="caption-text">Explanation of accelated method</span><a class="headerlink" href="#id9" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p>DDIM은 Deterministic하기때문에 모든 시점의 값을 모두 계산할 필요 없이 subset의 시점만으로 sampling이 가능하다.</p></li>
-<li><p>이 Accelerating method는 약간의 quality 저하가 있지만 Computational efficiency를 충분히 증가시킬 수 있다.</p></li>
-<li><p><strong>DDIM 방식의 재학습 없이 DDPM의 training에 DDIM의 sampling이 가능하다.</strong></p></li>
-</ul>
-<p>4.3. Relevance To Neural ODEs</p>
-<ul class="simple">
-<li><p>DDIM은 Object(e.g. 이미지)의 Encoding이 가능한 식을 유도할 수 있다.</p></li>
-</ul>
-</section>
-<section id="experiments">
-<h2>5. Experiments<a class="headerlink" href="#experiments" title="Permalink to this heading">#</a></h2>
-<figure class="align-default" id="id10">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/09.png"><img alt="DDIM_09" class="bg-primary mb-1" src="../../_images/09.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 32 </span><span class="caption-text">Table1</span><a class="headerlink" href="#id10" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<figure class="align-default" id="id11">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/010.png"><img alt="DDIM_010" class="bg-primary mb-1" src="../../_images/010.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 33 </span><span class="caption-text">Euqation 7</span><a class="headerlink" href="#id11" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p>η → model을 simple하게 control하기위한 hyperparameter</p>
-<ul>
-<li><p>η = 1 → Model is DDPM</p></li>
-<li><p>η = 0 → Model is DDIM</p></li>
-</ul>
-</li>
-<li><p>모든 비교 모델이 S(sampling 횟수)의 값이 커질수록 더 낮은 FiD를 보여준다.</p></li>
-<li><p>Fig.3의 DDIM은 다른 모델(η가 0이 아닌 모델)과 다르게 sampling step에 consistency한 결과를 보여준다.</p></li>
-</ul>
-<figure class="align-default" id="id12">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/011.png"><img alt="DDIM_011" class="bg-primary mb-1" src="../../_images/011.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 34 </span><span class="caption-text">Figure 4, 5</span><a class="headerlink" href="#id12" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p>Step과 Inference time이 linear한 관계를 갖는다.</p></li>
-<li><p>적은 sampling step에서도 어느정도의 object를 보여준다.</p></li>
-</ul>
-<figure class="align-default" id="id13">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/012.png"><img alt="DDIM_012" class="bg-primary mb-1" src="../../_images/012.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 35 </span><span class="caption-text">Figure 6</span><a class="headerlink" href="#id13" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p>T 시점의 이미지에 interpolation이 가능하다.</p></li>
-</ul>
-</section>
-<section id="code">
-<h2>6. Code<a class="headerlink" href="#code" title="Permalink to this heading">#</a></h2>
-<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="c1"># https://keras.io/examples/generative/ddim/</span>
-<span class="k">class</span> <span class="nc">DiffusionModel</span><span class="p">(</span><span class="n">keras</span><span class="o">.</span><span class="n">Model</span><span class="p">):</span>
-    <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">image_size</span><span class="p">,</span> <span class="n">widths</span><span class="p">,</span> <span class="n">block_depth</span><span class="p">):</span>
-        <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span>
-
-        <span class="bp">self</span><span class="o">.</span><span class="n">normalizer</span> <span class="o">=</span> <span class="n">layers</span><span class="o">.</span><span class="n">Normalization</span><span class="p">()</span>
-        <span class="bp">self</span><span class="o">.</span><span class="n">network</span> <span class="o">=</span> <span class="n">get_network</span><span class="p">(</span><span class="n">image_size</span><span class="p">,</span> <span class="n">widths</span><span class="p">,</span> <span class="n">block_depth</span><span class="p">)</span> <span class="c1"># unet 구조</span>
-
-    <span class="k">def</span> <span class="nf">denormalize</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">images</span><span class="p">):</span>
-        <span class="c1"># convert the pixel values back to 0-1 range</span>
-        <span class="n">images</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">normalizer</span><span class="o">.</span><span class="n">mean</span> <span class="o">+</span> <span class="n">images</span> <span class="o">*</span> <span class="bp">self</span><span class="o">.</span><span class="n">normalizer</span><span class="o">.</span><span class="n">variance</span><span class="o">**</span><span class="mf">0.5</span>
-        <span class="k">return</span> <span class="n">tf</span><span class="o">.</span><span class="n">clip_by_value</span><span class="p">(</span><span class="n">images</span><span class="p">,</span> <span class="mf">0.0</span><span class="p">,</span> <span class="mf">1.0</span><span class="p">)</span>
-
-    <span class="k">def</span> <span class="nf">diffusion_schedule</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">diffusion_times</span><span class="p">):</span>
-        <span class="c1"># diffusion times -&gt; angles</span>
-        <span class="n">start_angle</span> <span class="o">=</span> <span class="n">tf</span><span class="o">.</span><span class="n">acos</span><span class="p">(</span><span class="n">max_signal_rate</span><span class="p">)</span>
-        <span class="n">end_angle</span> <span class="o">=</span> <span class="n">tf</span><span class="o">.</span><span class="n">acos</span><span class="p">(</span><span class="n">min_signal_rate</span><span class="p">)</span>
-
-        <span class="n">diffusion_angles</span> <span class="o">=</span> <span class="n">start_angle</span> <span class="o">+</span> <span class="n">diffusion_times</span> <span class="o">*</span> <span class="p">(</span><span class="n">end_angle</span> <span class="o">-</span> <span class="n">start_angle</span><span class="p">)</span>
-
-        <span class="c1"># angles -&gt; signal and noise rates</span>
-        <span class="n">signal_rates</span> <span class="o">=</span> <span class="n">tf</span><span class="o">.</span><span class="n">cos</span><span class="p">(</span><span class="n">diffusion_angles</span><span class="p">)</span>
-        <span class="n">noise_rates</span> <span class="o">=</span> <span class="n">tf</span><span class="o">.</span><span class="n">sin</span><span class="p">(</span><span class="n">diffusion_angles</span><span class="p">)</span>
-        <span class="c1"># note that their squared sum is always: sin^2(x) + cos^2(x) = 1</span>
-
-        <span class="k">return</span> <span class="n">noise_rates</span><span class="p">,</span> <span class="n">signal_rates</span>
-
-    <span class="k">def</span> <span class="nf">denoise</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">noisy_images</span><span class="p">,</span> <span class="n">noise_rates</span><span class="p">,</span> <span class="n">signal_rates</span><span class="p">,</span> <span class="n">training</span><span class="p">):</span>
-        <span class="c1"># the exponential moving average weights are used at evaluation</span>
-        <span class="k">if</span> <span class="n">training</span><span class="p">:</span>
-            <span class="n">network</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">network</span>
-        <span class="k">else</span><span class="p">:</span>
-            <span class="n">network</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">ema_network</span>
-
-        <span class="c1"># predict noise component and calculate the image component using it</span>
-        <span class="n">pred_noises</span> <span class="o">=</span> <span class="n">network</span><span class="p">([</span><span class="n">noisy_images</span><span class="p">,</span> <span class="n">noise_rates</span><span class="o">**</span><span class="mi">2</span><span class="p">],</span> <span class="n">training</span><span class="o">=</span><span class="n">training</span><span class="p">)</span>
-        <span class="n">pred_images</span> <span class="o">=</span> <span class="p">(</span><span class="n">noisy_images</span> <span class="o">-</span> <span class="n">noise_rates</span> <span class="o">*</span> <span class="n">pred_noises</span><span class="p">)</span> <span class="o">/</span> <span class="n">signal_rates</span>
-
-        <span class="k">return</span> <span class="n">pred_noises</span><span class="p">,</span> <span class="n">pred_images</span>
-
-    
-
-    <span class="k">def</span> <span class="nf">train_step</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">images</span><span class="p">):</span>
-        <span class="c1"># normalize images to have standard deviation of 1, like the noises</span>
-        <span class="n">images</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">normalizer</span><span class="p">(</span><span class="n">images</span><span class="p">,</span> <span class="n">training</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
-        <span class="n">noises</span> <span class="o">=</span> <span class="n">tf</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">normal</span><span class="p">(</span><span class="n">shape</span><span class="o">=</span><span class="p">(</span><span class="n">batch_size</span><span class="p">,</span> <span class="n">image_size</span><span class="p">,</span> <span class="n">image_size</span><span class="p">,</span> <span class="mi">3</span><span class="p">))</span>
-
-        <span class="c1"># sample uniform random diffusion times</span>
-        <span class="n">diffusion_times</span> <span class="o">=</span> <span class="n">tf</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">uniform</span><span class="p">(</span>
-            <span class="n">shape</span><span class="o">=</span><span class="p">(</span><span class="n">batch_size</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">),</span> <span class="n">minval</span><span class="o">=</span><span class="mf">0.0</span><span class="p">,</span> <span class="n">maxval</span><span class="o">=</span><span class="mf">1.0</span>
-        <span class="p">)</span>
-        <span class="n">noise_rates</span><span class="p">,</span> <span class="n">signal_rates</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">diffusion_schedule</span><span class="p">(</span><span class="n">diffusion_times</span><span class="p">)</span>
-        <span class="c1"># mix the images with noises accordingly</span>
-        <span class="n">noisy_images</span> <span class="o">=</span> <span class="n">signal_rates</span> <span class="o">*</span> <span class="n">images</span> <span class="o">+</span> <span class="n">noise_rates</span> <span class="o">*</span> <span class="n">noises</span>
-
-        <span class="k">with</span> <span class="n">tf</span><span class="o">.</span><span class="n">GradientTape</span><span class="p">()</span> <span class="k">as</span> <span class="n">tape</span><span class="p">:</span>
-            <span class="c1"># train the network to separate noisy images to their components</span>
-            <span class="n">pred_noises</span><span class="p">,</span> <span class="n">pred_images</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">denoise</span><span class="p">(</span>
-                <span class="n">noisy_images</span><span class="p">,</span> <span class="n">noise_rates</span><span class="p">,</span> <span class="n">signal_rates</span><span class="p">,</span> <span class="n">training</span><span class="o">=</span><span class="kc">True</span>
-            <span class="p">)</span>
-
-            <span class="n">noise_loss</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">loss</span><span class="p">(</span><span class="n">noises</span><span class="p">,</span> <span class="n">pred_noises</span><span class="p">)</span>  <span class="c1"># used for training</span>
-            <span class="n">image_loss</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">loss</span><span class="p">(</span><span class="n">images</span><span class="p">,</span> <span class="n">pred_images</span><span class="p">)</span>  <span class="c1"># only used as metric</span>
-
-        <span class="n">gradients</span> <span class="o">=</span> <span class="n">tape</span><span class="o">.</span><span class="n">gradient</span><span class="p">(</span><span class="n">noise_loss</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">network</span><span class="o">.</span><span class="n">trainable_weights</span><span class="p">)</span>
-        <span class="bp">self</span><span class="o">.</span><span class="n">optimizer</span><span class="o">.</span><span class="n">apply_gradients</span><span class="p">(</span><span class="nb">zip</span><span class="p">(</span><span class="n">gradients</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">network</span><span class="o">.</span><span class="n">trainable_weights</span><span class="p">))</span>
-
-        <span class="bp">self</span><span class="o">.</span><span class="n">noise_loss_tracker</span><span class="o">.</span><span class="n">update_state</span><span class="p">(</span><span class="n">noise_loss</span><span class="p">)</span>
-        <span class="bp">self</span><span class="o">.</span><span class="n">image_loss_tracker</span><span class="o">.</span><span class="n">update_state</span><span class="p">(</span><span class="n">image_loss</span><span class="p">)</span>
-
-        <span class="k">return</span> <span class="p">{</span><span class="n">m</span><span class="o">.</span><span class="n">name</span><span class="p">:</span> <span class="n">m</span><span class="o">.</span><span class="n">result</span><span class="p">()</span> <span class="k">for</span> <span class="n">m</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">metrics</span><span class="p">[:</span><span class="o">-</span><span class="mi">1</span><span class="p">]}</span>
-
-		<span class="k">def</span> <span class="nf">reverse_diffusion</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">initial_noise</span><span class="p">,</span> <span class="n">diffusion_steps</span><span class="p">):</span>
-        <span class="c1"># reverse diffusion = sampling</span>
-        <span class="n">num_images</span> <span class="o">=</span> <span class="n">initial_noise</span><span class="o">.</span><span class="n">shape</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span>
-        <span class="n">step_size</span> <span class="o">=</span> <span class="mf">1.0</span> <span class="o">/</span> <span class="n">diffusion_steps</span>
-
-        <span class="c1"># important line:</span>
-        <span class="c1"># at the first sampling step, the &quot;noisy image&quot; is pure noise</span>
-        <span class="c1"># but its signal rate is assumed to be nonzero (min_signal_rate)</span>
-        <span class="n">next_noisy_images</span> <span class="o">=</span> <span class="n">initial_noise</span>
-        <span class="k">for</span> <span class="n">step</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">diffusion_steps</span><span class="p">):</span>
-            <span class="n">noisy_images</span> <span class="o">=</span> <span class="n">next_noisy_images</span>
-
-            <span class="c1"># separate the current noisy image to its components</span>
-            <span class="n">diffusion_times</span> <span class="o">=</span> <span class="n">tf</span><span class="o">.</span><span class="n">ones</span><span class="p">((</span><span class="n">num_images</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">))</span> <span class="o">-</span> <span class="n">step</span> <span class="o">*</span> <span class="n">step_size</span>
-            <span class="n">noise_rates</span><span class="p">,</span> <span class="n">signal_rates</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">diffusion_schedule</span><span class="p">(</span><span class="n">diffusion_times</span><span class="p">)</span>
-            <span class="n">pred_noises</span><span class="p">,</span> <span class="n">pred_images</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">denoise</span><span class="p">(</span>
-                <span class="n">noisy_images</span><span class="p">,</span> <span class="n">noise_rates</span><span class="p">,</span> <span class="n">signal_rates</span><span class="p">,</span> <span class="n">training</span><span class="o">=</span><span class="kc">False</span>
-            <span class="p">)</span>
-            <span class="c1"># network used in eval mode</span>
-
-            <span class="c1"># remix the predicted components using the next signal and noise rates</span>
-            <span class="n">next_diffusion_times</span> <span class="o">=</span> <span class="n">diffusion_times</span> <span class="o">-</span> <span class="n">step_size</span>
-            <span class="n">next_noise_rates</span><span class="p">,</span> <span class="n">next_signal_rates</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">diffusion_schedule</span><span class="p">(</span>
-                <span class="n">next_diffusion_times</span>
-            <span class="p">)</span>
-            <span class="n">next_noisy_images</span> <span class="o">=</span> <span class="p">(</span>
-                <span class="n">next_signal_rates</span> <span class="o">*</span> <span class="n">pred_images</span> <span class="o">+</span> <span class="n">next_noise_rates</span> <span class="o">*</span> <span class="n">pred_noises</span>
-            <span class="p">)</span>
-            <span class="c1"># this new noisy image will be used in the next step</span>
-
-        <span class="k">return</span> <span class="n">pred_images</span>
-
-    <span class="k">def</span> <span class="nf">generate</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">num_images</span><span class="p">,</span> <span class="n">diffusion_steps</span><span class="p">):</span>
-        <span class="c1"># noise -&gt; images -&gt; denormalized images</span>
-        <span class="n">initial_noise</span> <span class="o">=</span> <span class="n">tf</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">normal</span><span class="p">(</span><span class="n">shape</span><span class="o">=</span><span class="p">(</span><span class="n">num_images</span><span class="p">,</span> <span class="n">image_size</span><span class="p">,</span> <span class="n">image_size</span><span class="p">,</span> <span class="mi">3</span><span class="p">))</span>
-        <span class="n">generated_images</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">reverse_diffusion</span><span class="p">(</span><span class="n">initial_noise</span><span class="p">,</span> <span class="n">diffusion_steps</span><span class="p">)</span>
-        <span class="n">generated_images</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">denormalize</span><span class="p">(</span><span class="n">generated_images</span><span class="p">)</span>
-        <span class="k">return</span> <span class="n">generated_images</span>
-</pre></div>
-</div>
-</section>
-</section>
-
-    <script type="text/x-thebe-config">
-    {
-        requestKernel: true,
-        binderOptions: {
-            repo: "binder-examples/jupyter-stacks-datascience",
-            ref: "master",
-        },
-        codeMirrorConfig: {
-            theme: "abcdef",
-            mode: "python"
-        },
-        kernelOptions: {
-            name: "python3",
-            path: "./docs/review"
-        },
-        predefinedOutput: true
-    }
-    </script>
-    <script>kernelName = 'python3'</script>
-
-                </article>
-              
-
-              
-              
-              
-              
-                <footer class="prev-next-footer d-print-none">
-                  
-<div class="prev-next-area">
-    <a class="left-prev"
-       href="DDPM.html"
-       title="previous page">
-      <i class="fa-solid fa-angle-left"></i>
-      <div class="prev-next-info">
-        <p class="prev-next-subtitle">previous</p>
-        <p class="prev-next-title">DDPM</p>
-      </div>
-    </a>
-    <a class="right-next"
-       href="A_Study_on_the_Evaluation_of_Generative_Models.html"
-       title="next page">
-      <div class="prev-next-info">
-        <p class="prev-next-subtitle">next</p>
-        <p class="prev-next-title">A Study on the Evaluation of Generative Models</p>
-      </div>
-      <i class="fa-solid fa-angle-right"></i>
-    </a>
-</div>
-                </footer>
-              
-            </div>
-            
-            
-              
-                <div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
-
-
-  <div class="sidebar-secondary-item">
-  <div class="page-toc tocsection onthispage">
-    <i class="fa-solid fa-list"></i> Contents
-  </div>
-  <nav class="bd-toc-nav page-toc">
-    <ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#abstract">Abstract</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">1. Introduction</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#background">2. Background</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#ddpm">DDPM</a></li>
-</ul>
-</li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#variational-inference-for-non-markovian-forward-process">3. Variational Inference For Non-Markovian Forward Process</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#sampling-from-generalized-generative-process">4. Sampling From Generalized Generative Process</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#experiments">5. Experiments</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#code">6. Code</a></li>
-</ul>
-  </nav></div>
-
-</div></div>
-              
-            
-          </div>
-          <footer class="bd-footer-content">
-            
-<div class="bd-footer-content__inner container">
-  
-  <div class="footer-item">
-    
-<p class="component-author">
-By PseudoLab
-</p>
-
-  </div>
-  
-  <div class="footer-item">
-    
-
-  <p class="copyright">
-    
-      © Copyright 2022.
-      <br/>
-    
-  </p>
-
-  </div>
-  
-  <div class="footer-item">
-    
-  </div>
-  
-  <div class="footer-item">
-    
-  </div>
-  
-</div>
-          </footer>
-        
-
-      </main>
-    </div>
-  </div>
-  
-  <!-- Scripts loaded after <body> so the DOM is not blocked -->
-  <script src="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b"></script>
-<script src="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b"></script>
-
-  <footer class="bd-footer">
-  </footer>
-  </body>
+
+
+<!DOCTYPE html>
+
+
+<html lang="en" data-content_root="" >
+
+  <head>
+    <meta charset="utf-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.17.1: http://docutils.sourceforge.net/" />
+
+    <title>DDIM &#8212; Text-to-Image Generation-feat-Diffusion</title>
+  
+  
+  
+  <script data-cfasync="false">
+    document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
+    document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
+  </script>
+  
+  <!-- Loaded before other Sphinx assets -->
+  <link href="../../_static/styles/theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+<link href="../../_static/styles/bootstrap.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+<link href="../../_static/styles/pydata-sphinx-theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+
+  
+  <link href="../../_static/vendor/fontawesome/6.5.2/css/all.min.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+  <link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.woff2" />
+<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.woff2" />
+<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.woff2" />
+
+    <link rel="stylesheet" type="text/css" href="../../_static/pygments.css" />
+    <link rel="stylesheet" href="../../_static/styles/sphinx-book-theme.css?digest=14f4ca6b54d191a8c7657f6c759bf11a5fb86285" type="text/css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/togglebutton.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/mystnb.4510f1fc1dee50b3e5859aac5469c37c29e427902b24a333a5f9fcb2f0b3ac41.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/sphinx-thebe.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/design-style.4045f2051d55cab465a707391d5b2007.min.css" />
+  
+  <!-- Pre-loaded scripts that we'll load fully later -->
+  <link rel="preload" as="script" href="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b" />
+<link rel="preload" as="script" href="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b" />
+  <script src="../../_static/vendor/fontawesome/6.5.2/js/all.min.js?digest=dfe6caa3a7d634c4db9b"></script>
+
+    <script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
+    <script src="../../_static/jquery.js"></script>
+    <script src="../../_static/underscore.js"></script>
+    <script src="../../_static/_sphinx_javascript_frameworks_compat.js"></script>
+    <script src="../../_static/doctools.js"></script>
+    <script src="../../_static/clipboard.min.js"></script>
+    <script src="../../_static/copybutton.js"></script>
+    <script src="../../_static/scripts/sphinx-book-theme.js?digest=5a5c038af52cf7bc1a1ec88eea08e6366ee68824"></script>
+    <script>let toggleHintShow = 'Click to show';</script>
+    <script>let toggleHintHide = 'Click to hide';</script>
+    <script>let toggleOpenOnPrint = 'true';</script>
+    <script src="../../_static/togglebutton.js"></script>
+    <script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
+    <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
+    <script src="../../_static/design-tabs.js"></script>
+    <script>const THEBE_JS_URL = "https://unpkg.com/thebe@0.8.2/lib/index.js"
+const thebe_selector = ".thebe,.cell"
+const thebe_selector_input = "pre"
+const thebe_selector_output = ".output, .cell_output"
+</script>
+    <script async="async" src="../../_static/sphinx-thebe.js"></script>
+    <script>window.MathJax = {"options": {"processHtmlClass": "tex2jax_process|mathjax_process|math|output_area"}}</script>
+    <script defer="defer" src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
+    <script>DOCUMENTATION_OPTIONS.pagename = 'docs/review/DDIM';</script>
+    <link rel="shortcut icon" href="../../_static/PseudoLab_logo.png"/>
+    <link rel="index" title="Index" href="../../genindex.html" />
+    <link rel="search" title="Search" href="../../search.html" />
+    <link rel="next" title="A Study on the Evaluation of Generative Models" href="A_Study_on_the_Evaluation_of_Generative_Models.html" />
+    <link rel="prev" title="DDPM" href="DDPM.html" />
+  <meta name="viewport" content="width=device-width, initial-scale=1"/>
+  <meta name="docsearch:language" content="en"/>
+  </head>
+  
+  
+  <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
+
+  
+  
+  <div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
+  
+  <div id="pst-scroll-pixel-helper"></div>
+  
+  <button type="button" class="btn rounded-pill" id="pst-back-to-top">
+    <i class="fa-solid fa-arrow-up"></i>Back to top</button>
+
+  
+  <input type="checkbox"
+          class="sidebar-toggle"
+          id="pst-primary-sidebar-checkbox"/>
+  <label class="overlay overlay-primary" for="pst-primary-sidebar-checkbox"></label>
+  
+  <input type="checkbox"
+          class="sidebar-toggle"
+          id="pst-secondary-sidebar-checkbox"/>
+  <label class="overlay overlay-secondary" for="pst-secondary-sidebar-checkbox"></label>
+  
+  <div class="search-button__wrapper">
+    <div class="search-button__overlay"></div>
+    <div class="search-button__search-container">
+<form class="bd-search d-flex align-items-center"
+      action="../../search.html"
+      method="get">
+  <i class="fa-solid fa-magnifying-glass"></i>
+  <input type="search"
+         class="form-control"
+         name="q"
+         id="search-input"
+         placeholder="Search this book..."
+         aria-label="Search this book..."
+         autocomplete="off"
+         autocorrect="off"
+         autocapitalize="off"
+         spellcheck="false"/>
+  <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
+</form></div>
+  </div>
+
+  <div class="pst-async-banner-revealer d-none">
+  <aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
+</div>
+
+  
+    <header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
+    </header>
+  
+
+  <div class="bd-container">
+    <div class="bd-container__inner bd-page-width">
+      
+      
+      
+      <div class="bd-sidebar-primary bd-sidebar">
+        
+
+  
+  <div class="sidebar-header-items sidebar-primary__section">
+    
+    
+    
+    
+  </div>
+  
+    <div class="sidebar-primary-items__start sidebar-primary__section">
+        <div class="sidebar-primary-item">
+
+  
+    
+  
+
+<a class="navbar-brand logo" href="../../intro.html">
+  
+  
+  
+  
+  
+    
+    
+      
+    
+    
+    <img src="../../_static/PseudoLab_logo.png" class="logo__image only-light" alt="Text-to-Image Generation-feat-Diffusion - Home"/>
+    <script>document.write(`<img src="../../_static/PseudoLab_logo.png" class="logo__image only-dark" alt="Text-to-Image Generation-feat-Diffusion - Home"/>`);</script>
+  
+  
+</a></div>
+        <div class="sidebar-primary-item"><nav class="bd-links" id="bd-docs-nav" aria-label="Main">
+    <div class="bd-toc-item navbar-nav active">
+        
+        <ul class="nav bd-sidenav bd-sidenav__home-link">
+            <li class="toctree-l1">
+                <a class="reference internal" href="../../intro.html">
+                    Welcome to PseudoDiffusers!!
+                </a>
+            </li>
+        </ul>
+        <p aria-level="2" class="caption" role="heading"><span class="caption-text">Preliminary Works</span></p>
+<ul class="current nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="vae.html">VAE</a></li>
+<li class="toctree-l1"><a class="reference internal" href="gan.html">GAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DDPM.html">DDPM</a></li>
+
+
+
+
+<li class="toctree-l1 current active"><a class="current reference internal" href="#">DDIM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="A_Study_on_the_Evaluation_of_Generative_Models.html">A Study on the Evaluation of Generative Models</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Image Generation</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="cycleGAN.html">CycleGAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="StyleGAN.html">StyleGAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="diffusion_beats_GANs.html">Diffusion Models Beat GANs on Image Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="dalle.html">DALL-E</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DALLE2.html">DALL-E 2</a></li>
+<li class="toctree-l1"><a class="reference internal" href="dreambooth.html">DreamBooth</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ControlNet.html">ControlNet</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Latent_Diffusion_Model.html">Introduction</a></li>
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Textual_Inversion.html">Textual Inversion</a></li>
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="CustomDiffusion.html">Custom Diffusion</a></li>
+<li class="toctree-l1"><a class="reference internal" href="LoRA.html">LoRA</a></li>
+
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="I-DDPM.html">I-DDPM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="StyO.html">StyO</a></li>
+<li class="toctree-l1"><a class="reference internal" href="imagen.html">Imagen</a></li>
+<li class="toctree-l1"><a class="reference internal" href="imagen_editor.html">Imagen Editor</a></li>
+<li class="toctree-l1"><a class="reference internal" href="SDEdit.html">SDEdit</a></li>
+<li class="toctree-l1"><a class="reference internal" href="SDXL.html">SDXL</a></li>
+<li class="toctree-l1"><a class="reference internal" href="t2i_adapter.html">T2I-Adapter</a></li>
+<li class="toctree-l1"><a class="reference internal" href="IP_Adapter.html">IP-Adapter</a></li>
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="HyperDreamBooth.html">HyperDreamBooth</a></li>
+<li class="toctree-l1"><a class="reference internal" href="CM3leon.html">CM3leon</a></li>
+
+<li class="toctree-l1"><a class="reference internal" href="Synthetic_Data_from_Diffusion_Models_Improves_ImageNet_Classification.html">Synthetic Data from Diffusion Models Improves ImageNet Classification</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="GLIDE.html">GLIDE</a></li>
+<li class="toctree-l1"><a class="reference internal" href="BBDM.html">BBDM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Your_Diffusion_Model_is_Secretly_a_Zero_Shot_Classifier.html">Your Diffusion Model is Secretly a Zero-Shot Classifier</a></li>
+<li class="toctree-l1"><a class="reference internal" href="progressive_distillation.html">Progressive Distillation for Fast Sampling of Diffusion Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ConceptLab.html">ConceptLab</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Diffusion_models_already_have_a_Semantic_Latent_Space.html">Diffusion Models already have a Semantic Latent Space</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Muse.html">Muse</a></li>
+
+
+<li class="toctree-l1"><a class="reference internal" href="GIGAGAN.html">Scaling up GANs for Text-to-Image Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="consistency_models.html">Consistency Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="latent_consistency_models.html">Latent Consistency Models</a></li>
+
+<li class="toctree-l1"><a class="reference internal" href="LLM_grounded_Diffusion.html">LLM Grounded Diffusion</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DiT.html">DiT</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="one-step-image-translation.html">One-Step Image Translation with Text-to-Image Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="LCM-LoRA.html">LCM-LoRA: A Universal Stable-Diffusion Acceleration Module</a></li>
+
+
+<li class="toctree-l1"><a class="reference internal" href="MimicBrush.html">MimicBrush: Zero-shot Image Editing with Reference Imitation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="one_step_diffusion_with_distribution_matching_distillation.html">One-step Diffusion with Distribution Matching Distillation</a></li>
+
+
+
+
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Video Generation</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="Make_A_Video.html">Make A Video</a></li>
+<li class="toctree-l1"><a class="reference internal" href="VideoLDM.html">VideoLDM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="AnimateDiff.html">AnimateDiff</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Animate_Anyone.html">Animate Anyone</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreaMoving.html">DreaMoving</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreamPose.html">DreamPose: Fashion Image-to-Video Synthesis via Stable Diffusion</a></li>
+
+
+
+
+
+
+
+
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">3D Generation</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="NeRF.html">NeRF : Representing Scenes as Neural Radiance Fields for View Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="3DGS.html">3D Gaussian Splatting for Real-Time Radiance Field Rendering</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Point_E.html">Point-E: A System for Generating 3D Point Clouds from Complex Prompts (Arxiv 2022)</a></li>
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Shap-E.html">Shap-E</a></li>
+
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DreamFusion.html"><strong>DreamFusion</strong></a></li>
+<li class="toctree-l1"><a class="reference internal" href="magic-3d.html">Magic3D</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreamBooth3D.html">Dream Booth 3D</a></li>
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="zero123plus.html">Zero123++</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ProlificDreamer.html">ProlificDreamer: High-Fidelity and Diverse Text-to-3D Generation with Variational Score Distillation</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DreamGaussian.html">DreamGaussian</a></li>
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Coin3D.html">Coin3D</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Experiments</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="../experiments/js_exp.html">Synthetic Data with Stable Diffusion for Foliar Disease Classification</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../experiments/swjo_exp.html">Training DreamBooth on Naver Webtoon Face Dataset</a></li>
+</ul>
+
+    </div>
+</nav></div>
+    </div>
+  
+  
+  <div class="sidebar-primary-items__end sidebar-primary__section">
+  </div>
+  
+  <div id="rtd-footer-container"></div>
+
+
+      </div>
+      
+      <main id="main-content" class="bd-main" role="main">
+        
+        
+
+<div class="sbt-scroll-pixel-helper"></div>
+
+          <div class="bd-content">
+            <div class="bd-article-container">
+              
+              <div class="bd-header-article d-print-none">
+<div class="header-article-items header-article__inner">
+  
+    <div class="header-article-items__start">
+      
+        <div class="header-article-item"><label class="sidebar-toggle primary-toggle btn btn-sm" for="__primary" title="Toggle primary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
+  <span class="fa-solid fa-bars"></span>
+</label></div>
+      
+    </div>
+  
+  
+    <div class="header-article-items__end">
+      
+        <div class="header-article-item">
+
+<div class="article-header-buttons">
+
+
+
+
+
+<div class="dropdown dropdown-source-buttons">
+  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Source repositories">
+    <i class="fab fa-github"></i>
+  </button>
+  <ul class="dropdown-menu">
+      
+      
+      
+      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion" target="_blank"
+   class="btn btn-sm btn-source-repository-button dropdown-item"
+   title="Source repository"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fab fa-github"></i>
+  </span>
+<span class="btn__text-container">Repository</span>
+</a>
+</li>
+      
+      
+      
+      
+      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion/issues/new?title=Issue%20on%20page%20%2Fdocs/review/DDIM.html&body=Your%20issue%20content%20here." target="_blank"
+   class="btn btn-sm btn-source-issues-button dropdown-item"
+   title="Open an issue"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-lightbulb"></i>
+  </span>
+<span class="btn__text-container">Open issue</span>
+</a>
+</li>
+      
+  </ul>
+</div>
+
+
+
+
+
+
+<div class="dropdown dropdown-download-buttons">
+  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Download this page">
+    <i class="fas fa-download"></i>
+  </button>
+  <ul class="dropdown-menu">
+      
+      
+      
+      <li><a href="../../_sources/docs/review/DDIM.md" target="_blank"
+   class="btn btn-sm btn-download-source-button dropdown-item"
+   title="Download source file"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-file"></i>
+  </span>
+<span class="btn__text-container">.md</span>
+</a>
+</li>
+      
+      
+      
+      
+      <li>
+<button onclick="window.print()"
+  class="btn btn-sm btn-download-pdf-button dropdown-item"
+  title="Print to PDF"
+  data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-file-pdf"></i>
+  </span>
+<span class="btn__text-container">.pdf</span>
+</button>
+</li>
+      
+  </ul>
+</div>
+
+
+
+
+<button onclick="toggleFullScreen()"
+  class="btn btn-sm btn-fullscreen-button"
+  title="Fullscreen mode"
+  data-bs-placement="bottom" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-expand"></i>
+  </span>
+
+</button>
+
+
+
+<script>
+document.write(`
+  <button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light"></i>
+    <i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark"></i>
+    <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"></i>
+  </button>
+`);
+</script>
+
+
+<script>
+document.write(`
+  <button class="btn btn-sm pst-navbar-icon search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <i class="fa-solid fa-magnifying-glass fa-lg"></i>
+  </button>
+`);
+</script>
+<label class="sidebar-toggle secondary-toggle btn btn-sm" for="__secondary"title="Toggle secondary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <span class="fa-solid fa-list"></span>
+</label>
+</div></div>
+      
+    </div>
+  
+</div>
+</div>
+              
+              
+
+<div id="jb-print-docs-body" class="onlyprint">
+    <h1>DDIM</h1>
+    <!-- Table of contents -->
+    <div id="print-main-content">
+        <div id="jb-print-toc">
+            
+            <div>
+                <h2> Contents </h2>
+            </div>
+            <nav aria-label="Page">
+                <ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#abstract">Abstract</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">1. Introduction</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#background">2. Background</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#ddpm">DDPM</a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#variational-inference-for-non-markovian-forward-process">3. Variational Inference For Non-Markovian Forward Process</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#sampling-from-generalized-generative-process">4. Sampling From Generalized Generative Process</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#experiments">5. Experiments</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#code">6. Code</a></li>
+</ul>
+            </nav>
+        </div>
+    </div>
+</div>
+
+              
+                
+<div id="searchbox"></div>
+                <article class="bd-article">
+                  
+  <div class="admonition-information admonition">
+<p class="admonition-title">Information</p>
+<ul class="simple">
+<li><p><strong>Title:</strong> Denoising Diffusion Implicit Models (ICLR 2021)</p></li>
+<li><p><strong>Reference</strong></p>
+<ul>
+<li><p>Paper:  <a class="reference external" href="https://arxiv.org/abs/2010.02502">https://arxiv.org/abs/2010.02502</a></p></li>
+<li><p>Code: <a class="reference external" href="https://github.com/ermongroup/ddim">Official:</a></p></li>
+</ul>
+</li>
+<li><p><strong>Author:</strong> Seunghwan Ji</p></li>
+<li><p><strong>Last updated on April. 23, 2023</strong></p></li>
+</ul>
+</div>
+<section class="tex2jax_ignore mathjax_ignore" id="ddim">
+<h1>DDIM<a class="headerlink" href="#ddim" title="Permalink to this heading">#</a></h1>
+<section id="abstract">
+<h2>Abstract<a class="headerlink" href="#abstract" title="Permalink to this heading">#</a></h2>
+<ul class="simple">
+<li><p>DDPM의 단점인 Markov Process를 Non markovian process로 정의함으로서 Time efficient,  deterministic한 Sampling이 가능한 모델을 제안</p>
+<ul>
+<li><p>Deterministic vs Stochastic</p></li>
+</ul>
+</li>
+</ul>
+</section>
+<section id="introduction">
+<h2>1. Introduction<a class="headerlink" href="#introduction" title="Permalink to this heading">#</a></h2>
+<ul>
+<li><p>생성 분야에서 GAN(Generative Adversarial Network)이 뛰어난 성능을 보여주고있다.</p></li>
+<li><p>하지만, GAN은 학습 과정에서 불안정성을 보이는 경우가 많다.</p>
+<ul class="simple">
+<li><p>Generator와 Discriminator의 Imbalanced에 의한 Mode collapse</p></li>
+</ul>
+</li>
+<li><p>그러던 중, DDPM과 NCSN같은 adversarial training구조가 아닌 model들이 등장하였고 성공의 가능성을 보여주었다.</p></li>
+<li><p>이 중 DDPM은 Forward Process에서 Markov Process를 거치는데 이때문에 GAN에 비해 매우 느린 Performance를 보여준다.</p>
+<div class="pst-scrollable-table-container"><table class="colwidths-auto table">
+<thead>
+<tr class="row-odd"><th class="head"><p>sampling</p></th>
+<th class="head"><p>GAN</p></th>
+<th class="head"><p>DDPM</p></th>
+</tr>
+</thead>
+<tbody>
+<tr class="row-even"><td><p>32 x 32 x 50k</p></td>
+<td><p>Less than 1 min</p></td>
+<td><p>About 20h</p></td>
+</tr>
+<tr class="row-odd"><td><p>256 x 256 x 50k</p></td>
+<td><p>-</p></td>
+<td><p>About 1000h</p></td>
+</tr>
+</tbody>
+</table>
+</div>
+</li>
+<li><p>DDIM은,</p>
+<ol class="arabic simple">
+<li><p>Markov Chain에 기반한 Process를 Non Markovian Process로 대체하였고</p></li>
+<li><p>결국 좀더 빠르고 비교적 우수한 Quality의 결과를 생성해내고, (with accelate)</p></li>
+<li><p>DDPM과는 다르게 Consistency한 학습 결과를 보여줌으로써 latent간의 Interpolation이 가능하다.</p>
+<ul class="simple">
+<li><p>Consistency?</p>
+<ul>
+<li><p>If x, y is equivalent, then f(x) = f(y)</p></li>
+</ul>
+</li>
+</ul>
+</li>
+</ol>
+</li>
+</ul>
+</section>
+<section id="background">
+<h2>2. Background<a class="headerlink" href="#background" title="Permalink to this heading">#</a></h2>
+<section id="ddpm">
+<h3>DDPM<a class="headerlink" href="#ddpm" title="Permalink to this heading">#</a></h3>
+<figure class="align-default" id="id1">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/00.png"><img alt="DDIM_00" class="bg-primary mb-1" src="../../_images/00.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 23 </span><span class="caption-text">DDPM &amp; DDIM Architectures</span><a class="headerlink" href="#id1" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p>DDPM의 Forward Process는 Markov process로 동작한다.</p>
+<ul>
+<li><p><em><strong>Markov process</strong></em></p>
+<ul>
+<li><p><em>미래 시점을 예측하기위해 현재 시점의 값을 이용한다.</em></p></li>
+<li><p><em>미래 시점은 과거 시점의 값에는 독립적인 값을 갖는다.</em></p></li>
+</ul>
+</li>
+</ul>
+</li>
+<li><p>time step T는 DDPM에서 성능을 좌지우지하는 중요한 Hyper parameter이다. (대충 T=1000 정도?)</p></li>
+<li><p>하지만, Sampling 과정에서 DDPM은 결국 T 번의 inference 과정을 모두 Sequential하게 거쳐야하고 이는 다른 Method(GAN 등)보다 현저히 느린 속도를 보이는 요소가 된다.</p></li>
+</ul>
+</section>
+</section>
+<section id="variational-inference-for-non-markovian-forward-process">
+<h2>3. Variational Inference For Non-Markovian Forward Process<a class="headerlink" href="#variational-inference-for-non-markovian-forward-process" title="Permalink to this heading">#</a></h2>
+<p><strong>3.1. Non-Markovian Forward Processes</strong></p>
+<ul class="simple">
+<li><p>Inference’s Distribution 정의</p></li>
+</ul>
+<figure class="align-default" id="id2">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/01.png"><img alt="DDIM_01" class="bg-primary mb-1" src="../../_images/01.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 24 </span><span class="caption-text">Equation 1</span><a class="headerlink" href="#id2" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<figure class="align-default" id="id3">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/02.png"><img alt="DDIM_02" class="bg-primary mb-1" src="../../_images/02.png" style="width: 600px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 25 </span><span class="caption-text">Equation 2</span><a class="headerlink" href="#id3" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p>t 시점의 값을 구하기위해 <span class="math notranslate nohighlight">\(X_{t-1}\)</span>의 값과 <span class="math notranslate nohighlight">\(X_{0}\)</span>의 값을 참조</p>
+<ul>
+<li><p>DDPM은? <span class="math notranslate nohighlight">\(X_{t-1}\)</span>의 값만을 참조</p></li>
+<li><p>σ는 Forward process의 stochastic한 정도를 조절하는 hyper parameter (chap 4 참조)</p></li>
+</ul>
+</li>
+</ul>
+<p><strong>3.2. Generative Process And Unified Variational Inference Objective (Reverse Process)</strong></p>
+<figure class="align-default" id="id4">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/03.png"><img alt="DDIM_00" class="bg-primary mb-1" src="../../_images/03.png" style="width: 600px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 26 </span><span class="caption-text">Equation 3</span><a class="headerlink" href="#id4" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<figure class="align-default" id="id5">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/04.png"><img alt="DDIM_00" class="bg-primary mb-1" src="../../_images/04.png" style="width: 600px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 27 </span><span class="caption-text">Equation 4</span><a class="headerlink" href="#id5" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ol class="arabic simple">
+<li><p><span class="math notranslate nohighlight">\(X_{t}\)</span>을 통해 <span class="math notranslate nohighlight">\(X_{0}\)</span>의 값을 예측 (trainable)</p></li>
+<li><p>위의 식을 통해 <span class="math notranslate nohighlight">\(X_{t}\)</span>와, <span class="math notranslate nohighlight">\(X_{0}\)</span>의 값을 이용해 <span class="math notranslate nohighlight">\(X_{t-1}\)</span>을 샘플링</p></li>
+</ol>
+<p>실제로는</p>
+<ul>
+<li><p>noise(ε)와 <span class="math notranslate nohighlight">\(X_{0}\)</span>, <span class="math notranslate nohighlight">\(X_{t}\)</span>의 관계</p>
+<figure class="align-default" id="id6">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/05.png"><img alt="DDIM_05" class="bg-primary mb-1" src="../../_images/05.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 28 </span><span class="caption-text">Equation 5</span><a class="headerlink" href="#id6" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</li>
+</ul>
+<ol class="arabic simple">
+<li><p><span class="math notranslate nohighlight">\(X_{t}\)</span>을 통해 <span class="math notranslate nohighlight">\(X_{0}\)</span>을 예측</p>
+<ol class="arabic simple">
+<li><p>t 시점의 이미지를 통해 t 시점의 noise를 예측</p></li>
+<li><p>t 시점의 이미지와 t 시점의 noise를 통해 0 시점의 이미지를 계산 (fixed)</p></li>
+</ol>
+</li>
+<li><p>위의 식을 통해 t시점의 값과 예측한 0 시점의 값을 이용해 t-1 시점의 값을 샘플링</p></li>
+</ol>
+</section>
+<section id="sampling-from-generalized-generative-process">
+<h2>4. Sampling From Generalized Generative Process<a class="headerlink" href="#sampling-from-generalized-generative-process" title="Permalink to this heading">#</a></h2>
+<p>4.1. Denoising Diffusion Implicit Models</p>
+<ol class="arabic simple">
+<li><p>If σ → 0</p></li>
+</ol>
+<figure class="align-default" id="id7">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/06.png"><img alt="DDIM_06" class="bg-primary mb-1" src="../../_images/06.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 29 </span><span class="caption-text">Equation 6</span><a class="headerlink" href="#id7" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ol class="arabic simple">
+<li><p>σ가 특정 값을 가질 때 DDPM의 generative process의 수식과 동일하다.</p></li>
+</ol>
+<figure class="align-default" id="id8">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/07.png"><img alt="DDIM_07" class="bg-primary mb-1" src="../../_images/07.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 30 </span><span class="caption-text">Explanation of σ</span><a class="headerlink" href="#id8" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>4.2. Accelerated Generation Processes</p>
+<figure class="align-default" id="id9">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/08.png"><img alt="DDIM_08" class="bg-primary mb-1" src="../../_images/08.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 31 </span><span class="caption-text">Explanation of accelated method</span><a class="headerlink" href="#id9" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p>DDIM은 Deterministic하기때문에 모든 시점의 값을 모두 계산할 필요 없이 subset의 시점만으로 sampling이 가능하다.</p></li>
+<li><p>이 Accelerating method는 약간의 quality 저하가 있지만 Computational efficiency를 충분히 증가시킬 수 있다.</p></li>
+<li><p><strong>DDIM 방식의 재학습 없이 DDPM의 training에 DDIM의 sampling이 가능하다.</strong></p></li>
+</ul>
+<p>4.3. Relevance To Neural ODEs</p>
+<ul class="simple">
+<li><p>DDIM은 Object(e.g. 이미지)의 Encoding이 가능한 식을 유도할 수 있다.</p></li>
+</ul>
+</section>
+<section id="experiments">
+<h2>5. Experiments<a class="headerlink" href="#experiments" title="Permalink to this heading">#</a></h2>
+<figure class="align-default" id="id10">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/09.png"><img alt="DDIM_09" class="bg-primary mb-1" src="../../_images/09.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 32 </span><span class="caption-text">Table1</span><a class="headerlink" href="#id10" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<figure class="align-default" id="id11">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/010.png"><img alt="DDIM_010" class="bg-primary mb-1" src="../../_images/010.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 33 </span><span class="caption-text">Euqation 7</span><a class="headerlink" href="#id11" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p>η → model을 simple하게 control하기위한 hyperparameter</p>
+<ul>
+<li><p>η = 1 → Model is DDPM</p></li>
+<li><p>η = 0 → Model is DDIM</p></li>
+</ul>
+</li>
+<li><p>모든 비교 모델이 S(sampling 횟수)의 값이 커질수록 더 낮은 FiD를 보여준다.</p></li>
+<li><p>Fig.3의 DDIM은 다른 모델(η가 0이 아닌 모델)과 다르게 sampling step에 consistency한 결과를 보여준다.</p></li>
+</ul>
+<figure class="align-default" id="id12">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/011.png"><img alt="DDIM_011" class="bg-primary mb-1" src="../../_images/011.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 34 </span><span class="caption-text">Figure 4, 5</span><a class="headerlink" href="#id12" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p>Step과 Inference time이 linear한 관계를 갖는다.</p></li>
+<li><p>적은 sampling step에서도 어느정도의 object를 보여준다.</p></li>
+</ul>
+<figure class="align-default" id="id13">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/012.png"><img alt="DDIM_012" class="bg-primary mb-1" src="../../_images/012.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 35 </span><span class="caption-text">Figure 6</span><a class="headerlink" href="#id13" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p>T 시점의 이미지에 interpolation이 가능하다.</p></li>
+</ul>
+</section>
+<section id="code">
+<h2>6. Code<a class="headerlink" href="#code" title="Permalink to this heading">#</a></h2>
+<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="c1"># https://keras.io/examples/generative/ddim/</span>
+<span class="k">class</span><span class="w"> </span><span class="nc">DiffusionModel</span><span class="p">(</span><span class="n">keras</span><span class="o">.</span><span class="n">Model</span><span class="p">):</span>
+    <span class="k">def</span><span class="w"> </span><span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">image_size</span><span class="p">,</span> <span class="n">widths</span><span class="p">,</span> <span class="n">block_depth</span><span class="p">):</span>
+        <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span>
+
+        <span class="bp">self</span><span class="o">.</span><span class="n">normalizer</span> <span class="o">=</span> <span class="n">layers</span><span class="o">.</span><span class="n">Normalization</span><span class="p">()</span>
+        <span class="bp">self</span><span class="o">.</span><span class="n">network</span> <span class="o">=</span> <span class="n">get_network</span><span class="p">(</span><span class="n">image_size</span><span class="p">,</span> <span class="n">widths</span><span class="p">,</span> <span class="n">block_depth</span><span class="p">)</span> <span class="c1"># unet 구조</span>
+
+    <span class="k">def</span><span class="w"> </span><span class="nf">denormalize</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">images</span><span class="p">):</span>
+        <span class="c1"># convert the pixel values back to 0-1 range</span>
+        <span class="n">images</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">normalizer</span><span class="o">.</span><span class="n">mean</span> <span class="o">+</span> <span class="n">images</span> <span class="o">*</span> <span class="bp">self</span><span class="o">.</span><span class="n">normalizer</span><span class="o">.</span><span class="n">variance</span><span class="o">**</span><span class="mf">0.5</span>
+        <span class="k">return</span> <span class="n">tf</span><span class="o">.</span><span class="n">clip_by_value</span><span class="p">(</span><span class="n">images</span><span class="p">,</span> <span class="mf">0.0</span><span class="p">,</span> <span class="mf">1.0</span><span class="p">)</span>
+
+    <span class="k">def</span><span class="w"> </span><span class="nf">diffusion_schedule</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">diffusion_times</span><span class="p">):</span>
+        <span class="c1"># diffusion times -&gt; angles</span>
+        <span class="n">start_angle</span> <span class="o">=</span> <span class="n">tf</span><span class="o">.</span><span class="n">acos</span><span class="p">(</span><span class="n">max_signal_rate</span><span class="p">)</span>
+        <span class="n">end_angle</span> <span class="o">=</span> <span class="n">tf</span><span class="o">.</span><span class="n">acos</span><span class="p">(</span><span class="n">min_signal_rate</span><span class="p">)</span>
+
+        <span class="n">diffusion_angles</span> <span class="o">=</span> <span class="n">start_angle</span> <span class="o">+</span> <span class="n">diffusion_times</span> <span class="o">*</span> <span class="p">(</span><span class="n">end_angle</span> <span class="o">-</span> <span class="n">start_angle</span><span class="p">)</span>
+
+        <span class="c1"># angles -&gt; signal and noise rates</span>
+        <span class="n">signal_rates</span> <span class="o">=</span> <span class="n">tf</span><span class="o">.</span><span class="n">cos</span><span class="p">(</span><span class="n">diffusion_angles</span><span class="p">)</span>
+        <span class="n">noise_rates</span> <span class="o">=</span> <span class="n">tf</span><span class="o">.</span><span class="n">sin</span><span class="p">(</span><span class="n">diffusion_angles</span><span class="p">)</span>
+        <span class="c1"># note that their squared sum is always: sin^2(x) + cos^2(x) = 1</span>
+
+        <span class="k">return</span> <span class="n">noise_rates</span><span class="p">,</span> <span class="n">signal_rates</span>
+
+    <span class="k">def</span><span class="w"> </span><span class="nf">denoise</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">noisy_images</span><span class="p">,</span> <span class="n">noise_rates</span><span class="p">,</span> <span class="n">signal_rates</span><span class="p">,</span> <span class="n">training</span><span class="p">):</span>
+        <span class="c1"># the exponential moving average weights are used at evaluation</span>
+        <span class="k">if</span> <span class="n">training</span><span class="p">:</span>
+            <span class="n">network</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">network</span>
+        <span class="k">else</span><span class="p">:</span>
+            <span class="n">network</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">ema_network</span>
+
+        <span class="c1"># predict noise component and calculate the image component using it</span>
+        <span class="n">pred_noises</span> <span class="o">=</span> <span class="n">network</span><span class="p">([</span><span class="n">noisy_images</span><span class="p">,</span> <span class="n">noise_rates</span><span class="o">**</span><span class="mi">2</span><span class="p">],</span> <span class="n">training</span><span class="o">=</span><span class="n">training</span><span class="p">)</span>
+        <span class="n">pred_images</span> <span class="o">=</span> <span class="p">(</span><span class="n">noisy_images</span> <span class="o">-</span> <span class="n">noise_rates</span> <span class="o">*</span> <span class="n">pred_noises</span><span class="p">)</span> <span class="o">/</span> <span class="n">signal_rates</span>
+
+        <span class="k">return</span> <span class="n">pred_noises</span><span class="p">,</span> <span class="n">pred_images</span>
+
+    
+
+    <span class="k">def</span><span class="w"> </span><span class="nf">train_step</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">images</span><span class="p">):</span>
+        <span class="c1"># normalize images to have standard deviation of 1, like the noises</span>
+        <span class="n">images</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">normalizer</span><span class="p">(</span><span class="n">images</span><span class="p">,</span> <span class="n">training</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
+        <span class="n">noises</span> <span class="o">=</span> <span class="n">tf</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">normal</span><span class="p">(</span><span class="n">shape</span><span class="o">=</span><span class="p">(</span><span class="n">batch_size</span><span class="p">,</span> <span class="n">image_size</span><span class="p">,</span> <span class="n">image_size</span><span class="p">,</span> <span class="mi">3</span><span class="p">))</span>
+
+        <span class="c1"># sample uniform random diffusion times</span>
+        <span class="n">diffusion_times</span> <span class="o">=</span> <span class="n">tf</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">uniform</span><span class="p">(</span>
+            <span class="n">shape</span><span class="o">=</span><span class="p">(</span><span class="n">batch_size</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">),</span> <span class="n">minval</span><span class="o">=</span><span class="mf">0.0</span><span class="p">,</span> <span class="n">maxval</span><span class="o">=</span><span class="mf">1.0</span>
+        <span class="p">)</span>
+        <span class="n">noise_rates</span><span class="p">,</span> <span class="n">signal_rates</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">diffusion_schedule</span><span class="p">(</span><span class="n">diffusion_times</span><span class="p">)</span>
+        <span class="c1"># mix the images with noises accordingly</span>
+        <span class="n">noisy_images</span> <span class="o">=</span> <span class="n">signal_rates</span> <span class="o">*</span> <span class="n">images</span> <span class="o">+</span> <span class="n">noise_rates</span> <span class="o">*</span> <span class="n">noises</span>
+
+        <span class="k">with</span> <span class="n">tf</span><span class="o">.</span><span class="n">GradientTape</span><span class="p">()</span> <span class="k">as</span> <span class="n">tape</span><span class="p">:</span>
+            <span class="c1"># train the network to separate noisy images to their components</span>
+            <span class="n">pred_noises</span><span class="p">,</span> <span class="n">pred_images</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">denoise</span><span class="p">(</span>
+                <span class="n">noisy_images</span><span class="p">,</span> <span class="n">noise_rates</span><span class="p">,</span> <span class="n">signal_rates</span><span class="p">,</span> <span class="n">training</span><span class="o">=</span><span class="kc">True</span>
+            <span class="p">)</span>
+
+            <span class="n">noise_loss</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">loss</span><span class="p">(</span><span class="n">noises</span><span class="p">,</span> <span class="n">pred_noises</span><span class="p">)</span>  <span class="c1"># used for training</span>
+            <span class="n">image_loss</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">loss</span><span class="p">(</span><span class="n">images</span><span class="p">,</span> <span class="n">pred_images</span><span class="p">)</span>  <span class="c1"># only used as metric</span>
+
+        <span class="n">gradients</span> <span class="o">=</span> <span class="n">tape</span><span class="o">.</span><span class="n">gradient</span><span class="p">(</span><span class="n">noise_loss</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">network</span><span class="o">.</span><span class="n">trainable_weights</span><span class="p">)</span>
+        <span class="bp">self</span><span class="o">.</span><span class="n">optimizer</span><span class="o">.</span><span class="n">apply_gradients</span><span class="p">(</span><span class="nb">zip</span><span class="p">(</span><span class="n">gradients</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">network</span><span class="o">.</span><span class="n">trainable_weights</span><span class="p">))</span>
+
+        <span class="bp">self</span><span class="o">.</span><span class="n">noise_loss_tracker</span><span class="o">.</span><span class="n">update_state</span><span class="p">(</span><span class="n">noise_loss</span><span class="p">)</span>
+        <span class="bp">self</span><span class="o">.</span><span class="n">image_loss_tracker</span><span class="o">.</span><span class="n">update_state</span><span class="p">(</span><span class="n">image_loss</span><span class="p">)</span>
+
+        <span class="k">return</span> <span class="p">{</span><span class="n">m</span><span class="o">.</span><span class="n">name</span><span class="p">:</span> <span class="n">m</span><span class="o">.</span><span class="n">result</span><span class="p">()</span> <span class="k">for</span> <span class="n">m</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">metrics</span><span class="p">[:</span><span class="o">-</span><span class="mi">1</span><span class="p">]}</span>
+
+		<span class="k">def</span><span class="w"> </span><span class="nf">reverse_diffusion</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">initial_noise</span><span class="p">,</span> <span class="n">diffusion_steps</span><span class="p">):</span>
+        <span class="c1"># reverse diffusion = sampling</span>
+        <span class="n">num_images</span> <span class="o">=</span> <span class="n">initial_noise</span><span class="o">.</span><span class="n">shape</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span>
+        <span class="n">step_size</span> <span class="o">=</span> <span class="mf">1.0</span> <span class="o">/</span> <span class="n">diffusion_steps</span>
+
+        <span class="c1"># important line:</span>
+        <span class="c1"># at the first sampling step, the &quot;noisy image&quot; is pure noise</span>
+        <span class="c1"># but its signal rate is assumed to be nonzero (min_signal_rate)</span>
+        <span class="n">next_noisy_images</span> <span class="o">=</span> <span class="n">initial_noise</span>
+        <span class="k">for</span> <span class="n">step</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">diffusion_steps</span><span class="p">):</span>
+            <span class="n">noisy_images</span> <span class="o">=</span> <span class="n">next_noisy_images</span>
+
+            <span class="c1"># separate the current noisy image to its components</span>
+            <span class="n">diffusion_times</span> <span class="o">=</span> <span class="n">tf</span><span class="o">.</span><span class="n">ones</span><span class="p">((</span><span class="n">num_images</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">))</span> <span class="o">-</span> <span class="n">step</span> <span class="o">*</span> <span class="n">step_size</span>
+            <span class="n">noise_rates</span><span class="p">,</span> <span class="n">signal_rates</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">diffusion_schedule</span><span class="p">(</span><span class="n">diffusion_times</span><span class="p">)</span>
+            <span class="n">pred_noises</span><span class="p">,</span> <span class="n">pred_images</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">denoise</span><span class="p">(</span>
+                <span class="n">noisy_images</span><span class="p">,</span> <span class="n">noise_rates</span><span class="p">,</span> <span class="n">signal_rates</span><span class="p">,</span> <span class="n">training</span><span class="o">=</span><span class="kc">False</span>
+            <span class="p">)</span>
+            <span class="c1"># network used in eval mode</span>
+
+            <span class="c1"># remix the predicted components using the next signal and noise rates</span>
+            <span class="n">next_diffusion_times</span> <span class="o">=</span> <span class="n">diffusion_times</span> <span class="o">-</span> <span class="n">step_size</span>
+            <span class="n">next_noise_rates</span><span class="p">,</span> <span class="n">next_signal_rates</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">diffusion_schedule</span><span class="p">(</span>
+                <span class="n">next_diffusion_times</span>
+            <span class="p">)</span>
+            <span class="n">next_noisy_images</span> <span class="o">=</span> <span class="p">(</span>
+                <span class="n">next_signal_rates</span> <span class="o">*</span> <span class="n">pred_images</span> <span class="o">+</span> <span class="n">next_noise_rates</span> <span class="o">*</span> <span class="n">pred_noises</span>
+            <span class="p">)</span>
+            <span class="c1"># this new noisy image will be used in the next step</span>
+
+        <span class="k">return</span> <span class="n">pred_images</span>
+
+    <span class="k">def</span><span class="w"> </span><span class="nf">generate</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">num_images</span><span class="p">,</span> <span class="n">diffusion_steps</span><span class="p">):</span>
+        <span class="c1"># noise -&gt; images -&gt; denormalized images</span>
+        <span class="n">initial_noise</span> <span class="o">=</span> <span class="n">tf</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">normal</span><span class="p">(</span><span class="n">shape</span><span class="o">=</span><span class="p">(</span><span class="n">num_images</span><span class="p">,</span> <span class="n">image_size</span><span class="p">,</span> <span class="n">image_size</span><span class="p">,</span> <span class="mi">3</span><span class="p">))</span>
+        <span class="n">generated_images</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">reverse_diffusion</span><span class="p">(</span><span class="n">initial_noise</span><span class="p">,</span> <span class="n">diffusion_steps</span><span class="p">)</span>
+        <span class="n">generated_images</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">denormalize</span><span class="p">(</span><span class="n">generated_images</span><span class="p">)</span>
+        <span class="k">return</span> <span class="n">generated_images</span>
+</pre></div>
+</div>
+</section>
+</section>
+
+    <script type="text/x-thebe-config">
+    {
+        requestKernel: true,
+        binderOptions: {
+            repo: "binder-examples/jupyter-stacks-datascience",
+            ref: "master",
+        },
+        codeMirrorConfig: {
+            theme: "abcdef",
+            mode: "python"
+        },
+        kernelOptions: {
+            name: "python3",
+            path: "./docs\review"
+        },
+        predefinedOutput: true
+    }
+    </script>
+    <script>kernelName = 'python3'</script>
+
+                </article>
+              
+
+              
+              
+              
+              
+                <footer class="prev-next-footer d-print-none">
+                  
+<div class="prev-next-area">
+    <a class="left-prev"
+       href="DDPM.html"
+       title="previous page">
+      <i class="fa-solid fa-angle-left"></i>
+      <div class="prev-next-info">
+        <p class="prev-next-subtitle">previous</p>
+        <p class="prev-next-title">DDPM</p>
+      </div>
+    </a>
+    <a class="right-next"
+       href="A_Study_on_the_Evaluation_of_Generative_Models.html"
+       title="next page">
+      <div class="prev-next-info">
+        <p class="prev-next-subtitle">next</p>
+        <p class="prev-next-title">A Study on the Evaluation of Generative Models</p>
+      </div>
+      <i class="fa-solid fa-angle-right"></i>
+    </a>
+</div>
+                </footer>
+              
+            </div>
+            
+            
+              
+                <div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
+
+
+  <div class="sidebar-secondary-item">
+  <div class="page-toc tocsection onthispage">
+    <i class="fa-solid fa-list"></i> Contents
+  </div>
+  <nav class="bd-toc-nav page-toc">
+    <ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#abstract">Abstract</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">1. Introduction</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#background">2. Background</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#ddpm">DDPM</a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#variational-inference-for-non-markovian-forward-process">3. Variational Inference For Non-Markovian Forward Process</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#sampling-from-generalized-generative-process">4. Sampling From Generalized Generative Process</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#experiments">5. Experiments</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#code">6. Code</a></li>
+</ul>
+  </nav></div>
+
+</div></div>
+              
+            
+          </div>
+          <footer class="bd-footer-content">
+            
+<div class="bd-footer-content__inner container">
+  
+  <div class="footer-item">
+    
+<p class="component-author">
+By PseudoLab
+</p>
+
+  </div>
+  
+  <div class="footer-item">
+    
+
+  <p class="copyright">
+    
+      © Copyright 2022.
+      <br/>
+    
+  </p>
+
+  </div>
+  
+  <div class="footer-item">
+    
+  </div>
+  
+  <div class="footer-item">
+    
+  </div>
+  
+</div>
+          </footer>
+        
+
+      </main>
+    </div>
+  </div>
+  
+  <!-- Scripts loaded after <body> so the DOM is not blocked -->
+  <script src="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b"></script>
+<script src="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b"></script>
+
+  <footer class="bd-footer">
+  </footer>
+  </body>
 </html>
\ No newline at end of file
diff --git a/docs/review/DDPM.html b/docs/review/DDPM.html
old mode 100644
new mode 100755
index 8929f39f..4782bf51
--- a/docs/review/DDPM.html
+++ b/docs/review/DDPM.html
@@ -1,1222 +1,1242 @@
-
-
-<!DOCTYPE html>
-
-
-<html lang="en" data-content_root="" >
-
-  <head>
-    <meta charset="utf-8" />
-    <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.18.1: http://docutils.sourceforge.net/" />
-
-    <title>DDPM &#8212; Text-to-Image Generation-feat-Diffusion</title>
-  
-  
-  
-  <script data-cfasync="false">
-    document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
-    document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
-  </script>
-  
-  <!-- Loaded before other Sphinx assets -->
-  <link href="../../_static/styles/theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-<link href="../../_static/styles/bootstrap.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-<link href="../../_static/styles/pydata-sphinx-theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-
-  
-  <link href="../../_static/vendor/fontawesome/6.5.2/css/all.min.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-  <link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.woff2" />
-<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.woff2" />
-<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.woff2" />
-
-    <link rel="stylesheet" type="text/css" href="../../_static/pygments.css" />
-    <link rel="stylesheet" href="../../_static/styles/sphinx-book-theme.css?digest=14f4ca6b54d191a8c7657f6c759bf11a5fb86285" type="text/css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/togglebutton.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/mystnb.4510f1fc1dee50b3e5859aac5469c37c29e427902b24a333a5f9fcb2f0b3ac41.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/sphinx-thebe.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/design-style.4045f2051d55cab465a707391d5b2007.min.css" />
-  
-  <!-- Pre-loaded scripts that we'll load fully later -->
-  <link rel="preload" as="script" href="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b" />
-<link rel="preload" as="script" href="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b" />
-  <script src="../../_static/vendor/fontawesome/6.5.2/js/all.min.js?digest=dfe6caa3a7d634c4db9b"></script>
-
-    <script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
-    <script src="../../_static/jquery.js"></script>
-    <script src="../../_static/underscore.js"></script>
-    <script src="../../_static/_sphinx_javascript_frameworks_compat.js"></script>
-    <script src="../../_static/doctools.js"></script>
-    <script src="../../_static/clipboard.min.js"></script>
-    <script src="../../_static/copybutton.js"></script>
-    <script src="../../_static/scripts/sphinx-book-theme.js?digest=5a5c038af52cf7bc1a1ec88eea08e6366ee68824"></script>
-    <script>let toggleHintShow = 'Click to show';</script>
-    <script>let toggleHintHide = 'Click to hide';</script>
-    <script>let toggleOpenOnPrint = 'true';</script>
-    <script src="../../_static/togglebutton.js"></script>
-    <script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
-    <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
-    <script src="../../_static/design-tabs.js"></script>
-    <script>const THEBE_JS_URL = "https://unpkg.com/thebe@0.8.2/lib/index.js"
-const thebe_selector = ".thebe,.cell"
-const thebe_selector_input = "pre"
-const thebe_selector_output = ".output, .cell_output"
-</script>
-    <script async="async" src="../../_static/sphinx-thebe.js"></script>
-    <script>window.MathJax = {"options": {"processHtmlClass": "tex2jax_process|mathjax_process|math|output_area"}}</script>
-    <script defer="defer" src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
-    <script>DOCUMENTATION_OPTIONS.pagename = 'docs/review/DDPM';</script>
-    <link rel="shortcut icon" href="../../_static/PseudoLab_logo.png"/>
-    <link rel="index" title="Index" href="../../genindex.html" />
-    <link rel="search" title="Search" href="../../search.html" />
-    <link rel="next" title="DDIM" href="DDIM.html" />
-    <link rel="prev" title="GAN" href="gan.html" />
-  <meta name="viewport" content="width=device-width, initial-scale=1"/>
-  <meta name="docsearch:language" content="en"/>
-  </head>
-  
-  
-  <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
-
-  
-  
-  <div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
-  
-  <div id="pst-scroll-pixel-helper"></div>
-  
-  <button type="button" class="btn rounded-pill" id="pst-back-to-top">
-    <i class="fa-solid fa-arrow-up"></i>Back to top</button>
-
-  
-  <input type="checkbox"
-          class="sidebar-toggle"
-          id="pst-primary-sidebar-checkbox"/>
-  <label class="overlay overlay-primary" for="pst-primary-sidebar-checkbox"></label>
-  
-  <input type="checkbox"
-          class="sidebar-toggle"
-          id="pst-secondary-sidebar-checkbox"/>
-  <label class="overlay overlay-secondary" for="pst-secondary-sidebar-checkbox"></label>
-  
-  <div class="search-button__wrapper">
-    <div class="search-button__overlay"></div>
-    <div class="search-button__search-container">
-<form class="bd-search d-flex align-items-center"
-      action="../../search.html"
-      method="get">
-  <i class="fa-solid fa-magnifying-glass"></i>
-  <input type="search"
-         class="form-control"
-         name="q"
-         id="search-input"
-         placeholder="Search this book..."
-         aria-label="Search this book..."
-         autocomplete="off"
-         autocorrect="off"
-         autocapitalize="off"
-         spellcheck="false"/>
-  <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
-</form></div>
-  </div>
-
-  <div class="pst-async-banner-revealer d-none">
-  <aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
-</div>
-
-  
-    <header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
-    </header>
-  
-
-  <div class="bd-container">
-    <div class="bd-container__inner bd-page-width">
-      
-      
-      
-      <div class="bd-sidebar-primary bd-sidebar">
-        
-
-  
-  <div class="sidebar-header-items sidebar-primary__section">
-    
-    
-    
-    
-  </div>
-  
-    <div class="sidebar-primary-items__start sidebar-primary__section">
-        <div class="sidebar-primary-item">
-
-  
-    
-  
-
-<a class="navbar-brand logo" href="../../intro.html">
-  
-  
-  
-  
-  
-    
-    
-      
-    
-    
-    <img src="../../_static/PseudoLab_logo.png" class="logo__image only-light" alt="Text-to-Image Generation-feat-Diffusion - Home"/>
-    <script>document.write(`<img src="../../_static/PseudoLab_logo.png" class="logo__image only-dark" alt="Text-to-Image Generation-feat-Diffusion - Home"/>`);</script>
-  
-  
-</a></div>
-        <div class="sidebar-primary-item"><nav class="bd-links" id="bd-docs-nav" aria-label="Main">
-    <div class="bd-toc-item navbar-nav active">
-        
-        <ul class="nav bd-sidenav bd-sidenav__home-link">
-            <li class="toctree-l1">
-                <a class="reference internal" href="../../intro.html">
-                    Welcome to PseudoDiffusers!!
-                </a>
-            </li>
-        </ul>
-        <p aria-level="2" class="caption" role="heading"><span class="caption-text">Preliminary Works</span></p>
-<ul class="current nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="vae.html">VAE</a></li>
-<li class="toctree-l1"><a class="reference internal" href="gan.html">GAN</a></li>
-<li class="toctree-l1 current active"><a class="current reference internal" href="#">DDPM</a></li>
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="DDIM.html">DDIM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="A_Study_on_the_Evaluation_of_Generative_Models.html">A Study on the Evaluation of Generative Models</a></li>
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Image Generation</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="cycleGAN.html">CycleGAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="StyleGAN.html">StyleGAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="diffusion_beats_GANs.html">Diffusion Models Beat GANs on Image Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="dalle.html">DALL-E</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DALLE2.html">DALL-E 2</a></li>
-<li class="toctree-l1"><a class="reference internal" href="dreambooth.html">DreamBooth</a></li>
-<li class="toctree-l1"><a class="reference internal" href="ControlNet.html">ControlNet</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Latent_Diffusion_Model.html">Introduction</a></li>
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="Textual_Inversion.html">Textual Inversion</a></li>
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="CustomDiffusion.html">Custom Diffusion</a></li>
-<li class="toctree-l1"><a class="reference internal" href="LoRA.html">LoRA</a></li>
-
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="I-DDPM.html">I-DDPM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="StyO.html">StyO</a></li>
-<li class="toctree-l1"><a class="reference internal" href="imagen.html">Imagen</a></li>
-<li class="toctree-l1"><a class="reference internal" href="imagen_editor.html">Imagen Editor</a></li>
-<li class="toctree-l1"><a class="reference internal" href="SDEdit.html">SDEdit</a></li>
-<li class="toctree-l1"><a class="reference internal" href="SDXL.html">SDXL</a></li>
-<li class="toctree-l1"><a class="reference internal" href="t2i_adapter.html">T2I-Adapter</a></li>
-<li class="toctree-l1"><a class="reference internal" href="IP_Adapter.html">IP-Adapter</a></li>
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="HyperDreamBooth.html">HyperDreamBooth</a></li>
-<li class="toctree-l1"><a class="reference internal" href="CM3leon.html">CM3leon</a></li>
-
-<li class="toctree-l1"><a class="reference internal" href="Synthetic_Data_from_Diffusion_Models_Improves_ImageNet_Classification.html">Synthetic Data from Diffusion Models Improves ImageNet Classification</a></li>
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="GLIDE.html">GLIDE</a></li>
-<li class="toctree-l1"><a class="reference internal" href="BBDM.html">BBDM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Your_Diffusion_Model_is_Secretly_a_Zero_Shot_Classifier.html">Your Diffusion Model is Secretly a Zero-Shot Classifier</a></li>
-<li class="toctree-l1"><a class="reference internal" href="progressive_distillation.html">Progressive Distillation for Fast Sampling of Diffusion Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="ConceptLab.html">ConceptLab</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Diffusion_models_already_have_a_Semantic_Latent_Space.html">Diffusion Models already have a Semantic Latent Space</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Muse.html">Muse</a></li>
-
-
-<li class="toctree-l1"><a class="reference internal" href="GIGAGAN.html">Scaling up GANs for Text-to-Image Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="consistency_models.html">Consistency Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="latent_consistency_models.html">Latent Consistency Models</a></li>
-
-<li class="toctree-l1"><a class="reference internal" href="LLM_grounded_Diffusion.html">LLM Grounded Diffusion</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DiT.html">DiT</a></li>
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="one-step-image-translation.html">One-Step Image Translation with Text-to-Image Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="LCM-LoRA.html">LCM-LoRA: A Universal Stable-Diffusion Acceleration Module</a></li>
-
-
-<li class="toctree-l1"><a class="reference internal" href="MimicBrush.html">MimicBrush: Zero-shot Image Editing with Reference Imitation</a></li>
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Video Generation</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="Make_A_Video.html">Make A Video</a></li>
-<li class="toctree-l1"><a class="reference internal" href="VideoLDM.html">VideoLDM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="AnimateDiff.html">AnimateDiff</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Animate_Anyone.html">Animate Anyone</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreaMoving.html">DreaMoving</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreamPose.html">DreamPose: Fashion Image-to-Video Synthesis via Stable Diffusion</a></li>
-
-
-
-
-
-
-
-
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">3D Generation</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="NeRF.html">NeRF : Representing Scenes as Neural Radiance Fields for View Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="3DGS.html">3D Gaussian Splatting for Real-Time Radiance Field Rendering</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Point_E.html">Point-E: A System for Generating 3D Point Clouds from Complex Prompts (Arxiv 2022)</a></li>
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="Shap-E.html">Shap-E</a></li>
-
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="DreamFusion.html"><strong>DreamFusion</strong></a></li>
-<li class="toctree-l1"><a class="reference internal" href="magic-3d.html">Magic3D</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreamBooth3D.html">Dream Booth 3D</a></li>
-
-
-
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Experiments</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="../experiments/js_exp.html">Synthetic Data with Stable Diffusion for Foliar Disease Classification</a></li>
-<li class="toctree-l1"><a class="reference internal" href="../experiments/swjo_exp.html">Training DreamBooth on Naver Webtoon Face Dataset</a></li>
-</ul>
-
-    </div>
-</nav></div>
-    </div>
-  
-  
-  <div class="sidebar-primary-items__end sidebar-primary__section">
-  </div>
-  
-  <div id="rtd-footer-container"></div>
-
-
-      </div>
-      
-      <main id="main-content" class="bd-main" role="main">
-        
-        
-
-<div class="sbt-scroll-pixel-helper"></div>
-
-          <div class="bd-content">
-            <div class="bd-article-container">
-              
-              <div class="bd-header-article d-print-none">
-<div class="header-article-items header-article__inner">
-  
-    <div class="header-article-items__start">
-      
-        <div class="header-article-item"><label class="sidebar-toggle primary-toggle btn btn-sm" for="__primary" title="Toggle primary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
-  <span class="fa-solid fa-bars"></span>
-</label></div>
-      
-    </div>
-  
-  
-    <div class="header-article-items__end">
-      
-        <div class="header-article-item">
-
-<div class="article-header-buttons">
-
-
-
-
-
-<div class="dropdown dropdown-source-buttons">
-  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Source repositories">
-    <i class="fab fa-github"></i>
-  </button>
-  <ul class="dropdown-menu">
-      
-      
-      
-      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion" target="_blank"
-   class="btn btn-sm btn-source-repository-button dropdown-item"
-   title="Source repository"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fab fa-github"></i>
-  </span>
-<span class="btn__text-container">Repository</span>
-</a>
-</li>
-      
-      
-      
-      
-      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion/issues/new?title=Issue%20on%20page%20%2Fdocs/review/DDPM.html&body=Your%20issue%20content%20here." target="_blank"
-   class="btn btn-sm btn-source-issues-button dropdown-item"
-   title="Open an issue"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-lightbulb"></i>
-  </span>
-<span class="btn__text-container">Open issue</span>
-</a>
-</li>
-      
-  </ul>
-</div>
-
-
-
-
-
-
-<div class="dropdown dropdown-download-buttons">
-  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Download this page">
-    <i class="fas fa-download"></i>
-  </button>
-  <ul class="dropdown-menu">
-      
-      
-      
-      <li><a href="../../_sources/docs/review/DDPM.md" target="_blank"
-   class="btn btn-sm btn-download-source-button dropdown-item"
-   title="Download source file"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-file"></i>
-  </span>
-<span class="btn__text-container">.md</span>
-</a>
-</li>
-      
-      
-      
-      
-      <li>
-<button onclick="window.print()"
-  class="btn btn-sm btn-download-pdf-button dropdown-item"
-  title="Print to PDF"
-  data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-file-pdf"></i>
-  </span>
-<span class="btn__text-container">.pdf</span>
-</button>
-</li>
-      
-  </ul>
-</div>
-
-
-
-
-<button onclick="toggleFullScreen()"
-  class="btn btn-sm btn-fullscreen-button"
-  title="Fullscreen mode"
-  data-bs-placement="bottom" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-expand"></i>
-  </span>
-
-</button>
-
-
-
-<script>
-document.write(`
-  <button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light"></i>
-    <i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark"></i>
-    <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"></i>
-  </button>
-`);
-</script>
-
-
-<script>
-document.write(`
-  <button class="btn btn-sm pst-navbar-icon search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <i class="fa-solid fa-magnifying-glass fa-lg"></i>
-  </button>
-`);
-</script>
-<label class="sidebar-toggle secondary-toggle btn btn-sm" for="__secondary"title="Toggle secondary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <span class="fa-solid fa-list"></span>
-</label>
-</div></div>
-      
-    </div>
-  
-</div>
-</div>
-              
-              
-
-<div id="jb-print-docs-body" class="onlyprint">
-    <h1>DDPM</h1>
-    <!-- Table of contents -->
-    <div id="print-main-content">
-        <div id="jb-print-toc">
-            
-            <div>
-                <h2> Contents </h2>
-            </div>
-            <nav aria-label="Page">
-                <ul class="visible nav section-nav flex-column">
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#">DDPM</a></li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">1. Introduction</a></li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#background">2. Background</a><ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#forward-diffusion-process-q-mathbf-x-t-mathbf-x-t-1">2-1. Forward(diffusion) process <span class="math notranslate nohighlight">\(q(\mathbf{x}_t|\mathbf{x}_{t-1})\)</span></a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#reverse-process-p-mathbf-x-t-1-mathbf-x-t">2-2. Reverse process <span class="math notranslate nohighlight">\(p(\mathbf{x}_{t-1}|\mathbf{x}_t)\)</span></a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#loss-function-l">2-3. Loss Function <span class="math notranslate nohighlight">\(L\)</span></a></li>
-</ul>
-</li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#diffusion-models-and-denoising-encoders">3. Diffusion models and denoising encoders</a><ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#forward-process-and-l-t">3-1. Forward process and <span class="math notranslate nohighlight">\(L_T\)</span></a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#reverse-process-and-l-1-t-1">3-2. Reverse process and <span class="math notranslate nohighlight">\(L_{1:T-1}\)</span></a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#data-scaling-reverse-process-decoder-and-l-0">3-3. Data scaling, reverse process decoder and <span class="math notranslate nohighlight">\(L_0\)</span></a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#simplified-training-objective">3-4. Simplified training objective</a></li>
-</ul>
-</li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#experiments">4. Experiments</a><ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#sample-quality">4-1. Sample quality</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#reverse-process-parameterization-and-training-objective-ablation">4-2. Reverse process parameterization and training objective ablation</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#id1">4-3. Reverse process parameterization and training objective ablation</a></li>
-</ul>
-</li>
-</ul>
-
-            </nav>
-        </div>
-    </div>
-</div>
-
-              
-                
-<div id="searchbox"></div>
-                <article class="bd-article">
-                  
-  <div class="admonition-information admonition">
-<p class="admonition-title">Information</p>
-<ul class="simple">
-<li><p><strong>Title:</strong> Denoising Diffusion Probabilistic Models (NeurIPS 2020)</p></li>
-<li><p><strong>Reference</strong></p>
-<ul>
-<li><p>Paper:  <a class="reference external" href="https://arxiv.org/abs/2006.11239">https://arxiv.org/abs/2006.11239</a></p></li>
-<li><p>Code: <a class="reference external" href="https://github.com/lucidrains/denoising-diffusion-pytorch">PyTorch implementation:</a></p></li>
-<li><p>Review: <a class="reference external" href="https://www.youtube.com/watch?v=1j0W_lu55nc">PR-409: Denoising Diffusion Probabilistic Models</a></p></li>
-</ul>
-</li>
-<li><p><strong>Author:</strong> Beomsoo Park</p></li>
-<li><p><strong>Last updated on Apr. 19, 2023</strong></p></li>
-</ul>
-</div>
-<section class="tex2jax_ignore mathjax_ignore" id="ddpm">
-<h1>DDPM<a class="headerlink" href="#ddpm" title="Permalink to this heading">#</a></h1>
-<figure class="align-default" id="id2">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img12.png"><img alt="DDPM_01" class="bg-primary mb-1" src="../../_images/img12.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 13 </span><span class="caption-text">DDPM samples \ (source: <a class="reference external" href="https://arxiv.org/abs/2006.11239">https://arxiv.org/abs/2006.11239</a>)</span><a class="headerlink" href="#id2" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</section>
-<hr class="docutils" />
-<section class="tex2jax_ignore mathjax_ignore" id="introduction">
-<h1>1. Introduction<a class="headerlink" href="#introduction" title="Permalink to this heading">#</a></h1>
-<figure class="align-default" id="id3">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img22.png"><img alt="DDPM_02" class="bg-primary mb-1" src="../../_images/img22.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 14 </span><span class="caption-text">Diffusion models \ (source: <a class="reference external" href="https://velog.io/&#64;yetsyl0705/What-are-Diffusion-Models">https://velog.io/&#64;yetsyl0705/What-are-Diffusion-Models</a>)</span><a class="headerlink" href="#id3" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p><strong>Diffusion model</strong>은 <strong>variational inference로 학습시켜 데이터를 생성하는 parameterized Markov chain</strong>. Diffusion model은 Markov가 데이터가 normal distribution의 형태를 할 때까지 <strong>noise를 더해가는 diffusion process</strong>와 <strong>이를 역으로 거치며 학습하는 reverse process</strong>로 구성됨.</p>
-<p>Diffusion model은 정의하기 쉽고 학습시키는 것도 편리함. 또한 높은 품질의 sample(output)도 생성이 가능.</p>
-<blockquote>
-<div><ul class="simple">
-<li><p><strong>Variational inference(변분추론)</strong>: 사후확률(posterior) 분포 <span class="math notranslate nohighlight">\(p(z
-|x)\)</span>를 다루기 쉬운 확률분포 <span class="math notranslate nohighlight">\(q(z)\)</span>로 근사(approximation)하는 것</p></li>
-<li><p><strong>Parameterize</strong>: 하나의 표현식에 대해 다른 parameter를 사용하여 다시 표현하는 과정. 이 과정에서 보통 parameter의 개수를 표현 식의 차수보다 적은 수로 선택(ex. 3차 표현식 –&gt; 2개 parameter 사용)하므로, 낮은 차수로의 mapping 함수(ex. 3D –&gt; 2D)가 생성</p></li>
-<li><p><strong>Markov chain</strong>: 어떤 상태에서 다른 상태로 넘어갈 때, 바로 전 단계의 상태에만 영향을 받는 확률 과정</p></li>
-</ul>
-</div></blockquote>
-</section>
-<hr class="docutils" />
-<section class="tex2jax_ignore mathjax_ignore" id="background">
-<h1>2. Background<a class="headerlink" href="#background" title="Permalink to this heading">#</a></h1>
-<figure class="align-default" id="id4">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img32.png"><img alt="DDPM_03" class="bg-primary mb-1" src="../../_images/img32.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 15 </span><span class="caption-text">Graphical model of DDPM \ (source: <a class="reference external" href="https://arxiv.org/abs/2006.11239">https://arxiv.org/abs/2006.11239</a>)</span><a class="headerlink" href="#id4" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<section id="forward-diffusion-process-q-mathbf-x-t-mathbf-x-t-1">
-<h2>2-1. Forward(diffusion) process <span class="math notranslate nohighlight">\(q(\mathbf{x}_t|\mathbf{x}_{t-1})\)</span><a class="headerlink" href="#forward-diffusion-process-q-mathbf-x-t-mathbf-x-t-1" title="Permalink to this heading">#</a></h2>
-<div class="math notranslate nohighlight">
-\[
-q\left(\mathbf{x}_{1: T} \mid \mathbf{x}_0\right):=\prod_{t=1}^T q\left(\mathbf{x}_t \mid \mathbf{x}_{t-1}\right), \quad q\left(\mathbf{x}_t \mid \mathbf{x}_{t-1}\right):=\mathcal{N}\left(\mathbf{x}_t ; \sqrt{1-\beta_t} \mathbf{x}_{t-1}, \beta_t \mathbf{I}\right)
-\]</div>
-<p>Markov chain으로 <strong>data에 noise를 추가</strong>하는 과정. Noise를 추가할 때 <strong>variance schedule <span class="math notranslate nohighlight">\(\beta_1,,,\beta_T\)</span>로 scaling</strong>을 한 후 더해준다.</p>
-<ul class="simple">
-<li><p><span class="math notranslate nohighlight">\(\beta_t = 1\)</span>이면 mean인 <span class="math notranslate nohighlight">\(\sqrt{1-\beta_t}\mathbf{x}_{t-1} = 0\)</span>. 이전 정보를 갖지 못하고 노이즈가 증가함</p></li>
-<li><p>단순히 noise만을 더해주는게 아니라 <span class="math notranslate nohighlight">\(\sqrt{1-\beta_t}\)</span>로 scaling하는 이유는 variance가 발산하는 것을 막기 위함</p></li>
-<li><p><span class="math notranslate nohighlight">\(q(x_1|x_0)\)</span>: <span class="math notranslate nohighlight">\(x_0\)</span>에 noise를 추가해 <span class="math notranslate nohighlight">\(x_1\)</span>을 만드는 과정</p></li>
-<li><p><span class="math notranslate nohighlight">\(x_T\)</span>는 완전 destroy된 noise 상태 ~ <span class="math notranslate nohighlight">\(N(x_T;0, I)\)</span></p></li>
-</ul>
-</section>
-<section id="reverse-process-p-mathbf-x-t-1-mathbf-x-t">
-<h2>2-2. Reverse process <span class="math notranslate nohighlight">\(p(\mathbf{x}_{t-1}|\mathbf{x}_t)\)</span><a class="headerlink" href="#reverse-process-p-mathbf-x-t-1-mathbf-x-t" title="Permalink to this heading">#</a></h2>
-<div class="math notranslate nohighlight">
-\[
-p_\theta\left(\mathbf{x}_{0: T}\right):=p\left(\mathbf{x}_T\right) \prod_{t=1}^T p_\theta\left(\mathbf{x}_{t-1} \mid \mathbf{x}_t\right), \quad p_\theta\left(\mathbf{x}_{t-1} \mid \mathbf{x}_t\right):=\mathcal{N}\left(\mathbf{x}_{t-1} ; \boldsymbol{\mu}_\theta\left(\mathbf{x}_t, t\right), \boldsymbol{\Sigma}_\theta\left(\mathbf{x}_t, t\right)\right)
-\]</div>
-<p>Reverse process로 가우시안 노이즈를 사용하는 이유는 1994년 논문에 forward process가 가우시안이면 reverse process도 가우시안으로 쓰면 된다라는 증명이 있다고 함.</p>
-<p>여기서 우리가 해야 할 것은 <strong><span class="math notranslate nohighlight">\(\mathbf{x}_t\)</span>를 보고 <span class="math notranslate nohighlight">\(\mathbf{x}_{t-1}\)</span>의 평균 <span class="math notranslate nohighlight">\(\mu_\theta\)</span>과 분산 <span class="math notranslate nohighlight">\(\Sigma_\theta\)</span>을 예측해내는 것</strong>.</p>
-<ul class="simple">
-<li><p>Hierarachical VAE에서의 decoding 과정과 비슷함</p></li>
-<li><p><span class="math notranslate nohighlight">\(\mu_\theta\)</span>과 분산 <span class="math notranslate nohighlight">\(\Sigma_\theta\)</span>는 학습 가능한 parameter</p></li>
-</ul>
-</section>
-<section id="loss-function-l">
-<h2>2-3. Loss Function <span class="math notranslate nohighlight">\(L\)</span><a class="headerlink" href="#loss-function-l" title="Permalink to this heading">#</a></h2>
-<p>Diffusion model의 목적은 **noise를 어떻게 제거할 것인가?**이다. <span class="math notranslate nohighlight">\(x_t\)</span>가 들어왔을 때 <span class="math notranslate nohighlight">\(x_{t-1}\)</span>을 예측할 수 있다면 <span class="math notranslate nohighlight">\(x_0\)</span> 또한 예측이 가능해짐.</p>
-<div class="math notranslate nohighlight">
-\[
-\mathbb{E}\left[-\log p_\theta\left(\mathbf{x}_0\right)\right] \leq \mathbb{E}_q\left[-\log \frac{p_\theta\left(\mathbf{x}_{0: T}\right)}{q\left(\mathbf{x}_{1: T} \mid \mathbf{x}_0\right)}\right]=\mathbb{E}_q\left[-\log p\left(\mathbf{x}_T\right)-\sum_{t \geq 1} \log \frac{p_\theta\left(\mathbf{x}_{t-1} \mid \mathbf{x}_t\right)}{q\left(\mathbf{x}_t \mid \mathbf{x}_{t-1}\right)}\right]=: L
-\]</div>
-<p>본 논문에서는 <strong>negative log likelihood를 최소화</strong>하는 방향으로 진행. 위 수식을 <strong>ELBO</strong>(Evidence of Lower BOund)로 우항과 같이 정리하고 이를 풀어내면</p>
-<blockquote>
-<div><p>ELBO의 역할은 우리가 관찰한 P(z|x)가 다루기 힘든 분포를 이루고 있을 때 이를 조금 더 다루기 쉬운 분포인 Q(x)로 대신 표현하려 하는 과정에서 <strong>두 분포 (P(z|x)와 Q(x))의 차이 (KL Divergence)를 최소화</strong> 하기 위해 사용된다.</p>
-</div></blockquote>
-<div class="math notranslate nohighlight">
-\[
-\mathbb{E}_q[\underbrace{D_{\mathrm{KL}}\left(q\left(\mathbf{x}_T \mid \mathbf{x}_0\right) \| p\left(\mathbf{x}_T\right)\right)}_{L_T}+\sum_{t&gt;1} \underbrace{D_{\mathrm{KL}}\left(q\left(\mathbf{x}_{t-1} \mid \mathbf{x}_t, \mathbf{x}_0\right) \| p_\theta\left(\mathbf{x}_{t-1} \mid \mathbf{x}_t\right)\right)}_{L_{t-1}} \underbrace{-\log p_\theta\left(\mathbf{x}_0 \mid \mathbf{x}_1\right)}_{L_0}]
-\]</div>
-<p>와 같은 결과가 나온다.</p>
-<ul class="simple">
-<li><p><span class="math notranslate nohighlight">\(L_T\)</span>: Regularization term으로 <span class="math notranslate nohighlight">\(\beta_t\)</span>를 학습시킴</p></li>
-<li><p><span class="math notranslate nohighlight">\(L_{t-1}\)</span>: Reconstruction term으로 매 단계에서 noise를 지우는 지움</p></li>
-<li><p><span class="math notranslate nohighlight">\(L_0\)</span>: Reconstruction term으로 최종 단계에서 image를 생성</p></li>
-</ul>
-</section>
-</section>
-<hr class="docutils" />
-<section class="tex2jax_ignore mathjax_ignore" id="diffusion-models-and-denoising-encoders">
-<h1>3. Diffusion models and denoising encoders<a class="headerlink" href="#diffusion-models-and-denoising-encoders" title="Permalink to this heading">#</a></h1>
-<p>DDPM에서는 <strong>inductive bias를 늘려</strong> 모델을 더 stable하고 성능도 개선할 수 있었음.</p>
-<blockquote>
-<div><p>Inductive bias: 학습 모델이 지금까지 만나보지 못했던 상황에서 정확한 예측을 하기 위해 사용하는 <strong>추가적인 가정</strong>, 즉 우리가 풀려는 문제에 대한 정보를 모델에 적용하는 것</p>
-</div></blockquote>
-<section id="forward-process-and-l-t">
-<h2>3-1. Forward process and <span class="math notranslate nohighlight">\(L_T\)</span><a class="headerlink" href="#forward-process-and-l-t" title="Permalink to this heading">#</a></h2>
-<p><strong><span class="math notranslate nohighlight">\(\beta_t\)</span>를 고정</strong>했더니 학습이 잘됨. 10^-4 ~ 0.02로 linear하게 image에 가까울수록 noise를 적게 주는 방식으로 설정.</p>
-<p>따라서 <span class="math notranslate nohighlight">\(q\)</span>에는 학습 가능한 parameter가 없어 <strong><span class="math notranslate nohighlight">\(L_T\)</span>는 0이 되기 때문에 삭제</strong>할 수 있었음.</p>
-</section>
-<section id="reverse-process-and-l-1-t-1">
-<h2>3-2. Reverse process and <span class="math notranslate nohighlight">\(L_{1:T-1}\)</span><a class="headerlink" href="#reverse-process-and-l-1-t-1" title="Permalink to this heading">#</a></h2>
-<div class="math notranslate nohighlight">
-\[
-L_{t-1}=D_{K L}\left(q\left(x_{t-1} \mid x_t, x_0\right) \| p_\theta\left(x_{t-1} \mid x_t\right)\right)
-\]</div>
-<ul class="simple">
-<li><p><span class="math notranslate nohighlight">\(
-q\left(x_{t-1} \mid x_t, x_0\right)=N\left(x_{t-1} ; \tilde{\mu}\left(x_t, x_0\right), \tilde{\beta}_t \mathrm{I}\right)
-\)</span></p></li>
-<li><p><span class="math notranslate nohighlight">\(
-p_\theta\left(x_{t-1} \mid x_t\right)=\mathcal{N}\left(x_{t-1} ; \mu_\theta\left(x_t, t\right), \sum_\theta\left(x_t, t\right)\right)
-\)</span></p></li>
-</ul>
-<p><span class="math notranslate nohighlight">\(L_{1:T-1}\)</span>는 forward progress posterior를 예측하는 loss. <span class="math notranslate nohighlight">\(\mathbf{x}_{t-1}\)</span>에서 noise를 더해 <span class="math notranslate nohighlight">\(\mathbf{x}_{t}\)</span>를 만들었을때, 그 과정을 복원 <span class="math notranslate nohighlight">\(p(\mathbf{x}_{t-1}|\mathbf{x}_t)\)</span> 하는 과정을 학습.</p>
-<figure class="align-default" id="id5">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img82.png"><img alt="DDPM_08" class="bg-primary mb-1" src="../../_images/img82.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 16 </span><span class="caption-text">Loss Simplication \ (source: <a class="reference external" href="https://velog.io/&#64;sjina0722/%EB%85%BC%EB%AC%B8-%EB%A6%AC%EB%B7%B0-Denoising-Diffusion-Probabilistic-Models">https://velog.io/&#64;sjina0722/논문-리뷰-Denoising-Diffusion-Probabilistic-Models</a>)</span><a class="headerlink" href="#id5" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p><span class="math notranslate nohighlight">\(\Sigma_\theta\)</span>: <span class="math notranslate nohighlight">\(\beta\)</span>를 상수로 가정했고 <span class="math notranslate nohighlight">\(p(\mathbf{x}_{t-1}|\mathbf{x}_t)\)</span>의 variance가 <span class="math notranslate nohighlight">\(\beta\)</span>에 영향을 받기 때문에 학습시키지 않아도 된다고 생각해 <strong>variance term을 제거</strong>함.</p></li>
-</ul>
-<figure class="align-default" id="id6">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img91.png"><img alt="DDPM_09" class="bg-primary mb-1" src="../../_images/img91.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 17 </span><span class="caption-text">Residual Estimation \ (source: <a class="reference external" href="https://velog.io/&#64;sjina0722/%EB%85%BC%EB%AC%B8-%EB%A6%AC%EB%B7%B0-Denoising-Diffusion-Probabilistic-Models">https://velog.io/&#64;sjina0722/논문-리뷰-Denoising-Diffusion-Probabilistic-Models</a>)</span><a class="headerlink" href="#id6" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p><span class="math notranslate nohighlight">\(\mu_\theta\)</span>: DDPM에서는 <span class="math notranslate nohighlight">\(\mu_\theta\)</span>를 바로 구하지 않고 <strong>residual <span class="math notranslate nohighlight">\(\epsilon_\theta\)</span>만 구해 정확도를 높임</strong>.</p></li>
-</ul>
-</section>
-<section id="data-scaling-reverse-process-decoder-and-l-0">
-<h2>3-3. Data scaling, reverse process decoder and <span class="math notranslate nohighlight">\(L_0\)</span><a class="headerlink" href="#data-scaling-reverse-process-decoder-and-l-0" title="Permalink to this heading">#</a></h2>
-<div class="math notranslate nohighlight">
-\[\begin{split}
-\begin{aligned}
-p_\theta\left(\mathbf{x}_0 \mid \mathbf{x}_1\right) &amp; =\prod_{i=1}^D \int_{\delta_{-}\left(x_0^i\right)}^{\delta_{+}\left(x_0^i\right)} \mathcal{N}\left(x ; \mu_\theta^i\left(\mathbf{x}_1, 1\right), \sigma_1^2\right) d x \\
-\delta_{+}(x) &amp; =\left\{\begin{array}{ll}
-\infty &amp; \text { if } x=1 \\
-x+\frac{1}{255} &amp; \text { if } x&lt;1
-\end{array} \quad \delta_{-}(x)= \begin{cases}-\infty &amp; \text { if } x=-1 \\
-x-\frac{1}{255} &amp; \text { if } x&gt;-1\end{cases} \right.
-\end{aligned}
-\end{split}\]</div>
-<p>[0, 255]의 image를 [-1,1] 사이로 linearly mapping. Sampling 마지막 단계에는 noise를 추가하지 않음.</p>
-<p><span class="math notranslate nohighlight">\(L_0\)</span>은 두 normal distribution 사이의 KL divergence를 나타냄.</p>
-<ul class="simple">
-<li><p><span class="math notranslate nohighlight">\(D\)</span>: Data dimensionality</p></li>
-<li><p><span class="math notranslate nohighlight">\(i\)</span>: 좌표</p></li>
-</ul>
-</section>
-<section id="simplified-training-objective">
-<h2>3-4. Simplified training objective<a class="headerlink" href="#simplified-training-objective" title="Permalink to this heading">#</a></h2>
-<figure class="align-default" id="id7">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img101.png"><img alt="DDPM_10" class="bg-primary mb-1" src="../../_images/img101.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 18 </span><span class="caption-text">Simplified training objective \ (source: <a class="reference external" href="https://velog.io/&#64;sjina0722/%EB%85%BC%EB%AC%B8-%EB%A6%AC%EB%B7%B0-Denoising-Diffusion-Probabilistic-Models">https://velog.io/&#64;sjina0722/논문-리뷰-Denoising-Diffusion-Probabilistic-Models</a>)</span><a class="headerlink" href="#id7" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<figure class="align-default" id="id8">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img111.png"><img alt="DDPM_11" class="bg-primary mb-1" src="../../_images/img111.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 19 </span><span class="caption-text">Final Loss \ (source: <a class="reference external" href="https://velog.io/&#64;sjina0722/%EB%85%BC%EB%AC%B8-%EB%A6%AC%EB%B7%B0-Denoising-Diffusion-Probabilistic-Models">https://velog.io/&#64;sjina0722/논문-리뷰-Denoising-Diffusion-Probabilistic-Models</a>)</span><a class="headerlink" href="#id8" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>최종 loss는 위와 같이 나타난다. Ground truth - estimated output간 MSE loss를 줄이는 과정이 denoising과 비슷해 DDPM이라는 이름이 붙음.</p>
-<p>Simplified objective을 통해 diffusion process를 학습하면 매우 작은 t 에서뿐만 아니라 <strong>큰 t에 대해서도 network 학습이 가능하기 때문에 매우 효과적</strong>.</p>
-<figure class="align-default" id="id9">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img121.png"><img alt="DDPM_12" class="bg-primary mb-1" src="../../_images/img121.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 20 </span><span class="caption-text">Psuedo code of training process \ (source: <a class="reference external" href="https://arxiv.org/abs/2006.11239">https://arxiv.org/abs/2006.11239</a>)</span><a class="headerlink" href="#id9" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p>Algorithm 1: Training</p>
-<ul>
-<li><p>Noise를 더해나가는 과정, network(<span class="math notranslate nohighlight">\(\epsilon_\theta\)</span>, <span class="math notranslate nohighlight">\(p_\theta\)</span>)가 t step에서 noise(<span class="math notranslate nohighlight">\(\epsilon\)</span>)가 얼마만큼 더해졌는지를 학습한다.</p></li>
-<li><p>학습 시에는 특정 step의 이미지가 얼마나 gaussian noise가 추가되었는지를 예측하도록 학습된다.</p></li>
-<li><p>코드에서는 랜덤 노이즈와 시간 단계 t로 노이즈가 추가된 이미지를 얻고 해당 이미지를 보고 모델이 노이즈를 예측</p></li>
-</ul>
-</li>
-</ul>
-<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="k">def</span> <span class="nf">p_losses</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">x_start</span><span class="p">,</span> <span class="n">t</span><span class="p">,</span> <span class="n">noise</span> <span class="o">=</span> <span class="kc">None</span><span class="p">):</span>
-        <span class="n">b</span><span class="p">,</span> <span class="n">c</span><span class="p">,</span> <span class="n">h</span><span class="p">,</span> <span class="n">w</span> <span class="o">=</span> <span class="n">x_start</span><span class="o">.</span><span class="n">shape</span>
-        <span class="n">noise</span> <span class="o">=</span> <span class="n">default</span><span class="p">(</span><span class="n">noise</span><span class="p">,</span> <span class="k">lambda</span><span class="p">:</span> <span class="n">torch</span><span class="o">.</span><span class="n">randn_like</span><span class="p">(</span><span class="n">x_start</span><span class="p">))</span>
-
-        <span class="c1"># noise sample</span>
-
-        <span class="n">x</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">q_sample</span><span class="p">(</span><span class="n">x_start</span> <span class="o">=</span> <span class="n">x_start</span><span class="p">,</span> <span class="n">t</span> <span class="o">=</span> <span class="n">t</span><span class="p">,</span> <span class="n">noise</span> <span class="o">=</span> <span class="n">noise</span><span class="p">)</span>
-
-        <span class="c1"># if doing self-conditioning, 50% of the time, predict x_start from current set of times</span>
-        <span class="c1"># and condition with unet with that</span>
-        <span class="c1"># this technique will slow down training by 25%, but seems to lower FID significantly</span>
-
-        <span class="n">x_self_cond</span> <span class="o">=</span> <span class="kc">None</span>
-        <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">self_condition</span> <span class="ow">and</span> <span class="n">random</span><span class="p">()</span> <span class="o">&lt;</span> <span class="mf">0.5</span><span class="p">:</span>
-            <span class="k">with</span> <span class="n">torch</span><span class="o">.</span><span class="n">no_grad</span><span class="p">():</span>
-                <span class="n">x_self_cond</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">model_predictions</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="n">t</span><span class="p">)</span><span class="o">.</span><span class="n">pred_x_start</span>
-                <span class="n">x_self_cond</span><span class="o">.</span><span class="n">detach_</span><span class="p">()</span>
-
-        <span class="c1"># predict and take gradient step</span>
-
-        <span class="n">model_out</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">model</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="n">t</span><span class="p">,</span> <span class="n">x_self_cond</span><span class="p">)</span>
-
-        <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">objective</span> <span class="o">==</span> <span class="s1">&#39;pred_noise&#39;</span><span class="p">:</span>
-            <span class="n">target</span> <span class="o">=</span> <span class="n">noise</span>
-        <span class="k">elif</span> <span class="bp">self</span><span class="o">.</span><span class="n">objective</span> <span class="o">==</span> <span class="s1">&#39;pred_x0&#39;</span><span class="p">:</span>
-            <span class="n">target</span> <span class="o">=</span> <span class="n">x_start</span>
-        <span class="k">elif</span> <span class="bp">self</span><span class="o">.</span><span class="n">objective</span> <span class="o">==</span> <span class="s1">&#39;pred_v&#39;</span><span class="p">:</span>
-            <span class="n">v</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">predict_v</span><span class="p">(</span><span class="n">x_start</span><span class="p">,</span> <span class="n">t</span><span class="p">,</span> <span class="n">noise</span><span class="p">)</span>
-            <span class="n">target</span> <span class="o">=</span> <span class="n">v</span>
-        <span class="k">else</span><span class="p">:</span>
-            <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s1">&#39;unknown objective </span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">objective</span><span class="si">}</span><span class="s1">&#39;</span><span class="p">)</span>
-
-        <span class="n">loss</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">loss_fn</span><span class="p">(</span><span class="n">model_out</span><span class="p">,</span> <span class="n">target</span><span class="p">,</span> <span class="n">reduction</span> <span class="o">=</span> <span class="s1">&#39;none&#39;</span><span class="p">)</span>
-        <span class="n">loss</span> <span class="o">=</span> <span class="n">reduce</span><span class="p">(</span><span class="n">loss</span><span class="p">,</span> <span class="s1">&#39;b ... -&gt; b (...)&#39;</span><span class="p">,</span> <span class="s1">&#39;mean&#39;</span><span class="p">)</span>
-
-        <span class="n">loss</span> <span class="o">=</span> <span class="n">loss</span> <span class="o">*</span> <span class="n">extract</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">loss_weight</span><span class="p">,</span> <span class="n">t</span><span class="p">,</span> <span class="n">loss</span><span class="o">.</span><span class="n">shape</span><span class="p">)</span>
-        <span class="k">return</span> <span class="n">loss</span><span class="o">.</span><span class="n">mean</span><span class="p">()</span>
-</pre></div>
-</div>
-<ul class="simple">
-<li><p>Algorithm 2: Sampling</p>
-<ul>
-<li><p>Network를 학습하고 나면, gaussian noise에서 시작해서 순차적으로 denoising 하는 것이 가능하다. (by parameterized markovian chain)</p></li>
-<li><p>코드에서는 noise 제거 후 소량의 noise를 다시 추가하고 있음</p></li>
-</ul>
-</li>
-</ul>
-<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="nd">@torch</span><span class="o">.</span><span class="n">no_grad</span><span class="p">()</span>
-<span class="k">def</span> <span class="nf">p_sample</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">x</span><span class="p">,</span> <span class="n">t</span><span class="p">:</span> <span class="nb">int</span><span class="p">,</span> <span class="n">x_self_cond</span> <span class="o">=</span> <span class="kc">None</span><span class="p">):</span>
-    <span class="n">b</span><span class="p">,</span> <span class="o">*</span><span class="n">_</span><span class="p">,</span> <span class="n">device</span> <span class="o">=</span> <span class="o">*</span><span class="n">x</span><span class="o">.</span><span class="n">shape</span><span class="p">,</span> <span class="n">x</span><span class="o">.</span><span class="n">device</span>
-    <span class="n">batched_times</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">full</span><span class="p">((</span><span class="n">b</span><span class="p">,),</span> <span class="n">t</span><span class="p">,</span> <span class="n">device</span> <span class="o">=</span> <span class="n">x</span><span class="o">.</span><span class="n">device</span><span class="p">,</span> <span class="n">dtype</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">long</span><span class="p">)</span>
-    <span class="n">model_mean</span><span class="p">,</span> <span class="n">_</span><span class="p">,</span> <span class="n">model_log_variance</span><span class="p">,</span> <span class="n">x_start</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">p_mean_variance</span><span class="p">(</span><span class="n">x</span> <span class="o">=</span> <span class="n">x</span><span class="p">,</span> <span class="n">t</span> <span class="o">=</span> <span class="n">batched_times</span><span class="p">,</span> <span class="n">x_self_cond</span> <span class="o">=</span> <span class="n">x_self_cond</span><span class="p">,</span> <span class="n">clip_denoised</span> <span class="o">=</span> <span class="kc">True</span><span class="p">)</span>
-    <span class="n">noise</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">randn_like</span><span class="p">(</span><span class="n">x</span><span class="p">)</span> <span class="k">if</span> <span class="n">t</span> <span class="o">&gt;</span> <span class="mi">0</span> <span class="k">else</span> <span class="mf">0.</span> <span class="c1"># no noise if t == 0</span>
-    <span class="n">pred_img</span> <span class="o">=</span> <span class="n">model_mean</span> <span class="o">+</span> <span class="p">(</span><span class="mf">0.5</span> <span class="o">*</span> <span class="n">model_log_variance</span><span class="p">)</span><span class="o">.</span><span class="n">exp</span><span class="p">()</span> <span class="o">*</span> <span class="n">noise</span>
-    <span class="k">return</span> <span class="n">pred_img</span><span class="p">,</span> <span class="n">x_start</span>
-</pre></div>
-</div>
-</section>
-</section>
-<section class="tex2jax_ignore mathjax_ignore" id="experiments">
-<h1>4. Experiments<a class="headerlink" href="#experiments" title="Permalink to this heading">#</a></h1>
-<ul class="simple">
-<li><p>T: 1000</p></li>
-<li><p>backbone: U-Net<br />
-각 down/upsampling 단계는 ResNet/ConvNext 블록 2개 + (groupnorm + attention + residual) + down/upsampling으로 구성됨</p></li>
-</ul>
-<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="n">block_klass</span> <span class="o">=</span> <span class="n">partial</span><span class="p">(</span><span class="n">ResnetBlock</span><span class="p">,</span> <span class="n">groups</span> <span class="o">=</span> <span class="n">resnet_block_groups</span><span class="p">)</span>
-
-<span class="bp">self</span><span class="o">.</span><span class="n">downs</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">nn</span><span class="o">.</span><span class="n">ModuleList</span><span class="p">([</span>
-                <span class="n">block_klass</span><span class="p">(</span><span class="n">dim_in</span><span class="p">,</span> <span class="n">dim_in</span><span class="p">,</span> <span class="n">time_emb_dim</span> <span class="o">=</span> <span class="n">time_dim</span><span class="p">),</span>
-                <span class="n">block_klass</span><span class="p">(</span><span class="n">dim_in</span><span class="p">,</span> <span class="n">dim_in</span><span class="p">,</span> <span class="n">time_emb_dim</span> <span class="o">=</span> <span class="n">time_dim</span><span class="p">),</span>
-                <span class="n">Residual</span><span class="p">(</span><span class="n">PreNorm</span><span class="p">(</span><span class="n">dim_in</span><span class="p">,</span> <span class="n">LinearAttention</span><span class="p">(</span><span class="n">dim_in</span><span class="p">))),</span>
-                <span class="n">Downsample</span><span class="p">(</span><span class="n">dim_in</span><span class="p">,</span> <span class="n">dim_out</span><span class="p">)</span> <span class="k">if</span> <span class="ow">not</span> <span class="n">is_last</span> <span class="k">else</span> <span class="n">nn</span><span class="o">.</span><span class="n">Conv2d</span><span class="p">(</span><span class="n">dim_in</span><span class="p">,</span> <span class="n">dim_out</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="n">padding</span> <span class="o">=</span> <span class="mi">1</span><span class="p">)</span>
-            <span class="p">]))</span>
-            
- <span class="bp">self</span><span class="o">.</span><span class="n">ups</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">nn</span><span class="o">.</span><span class="n">ModuleList</span><span class="p">([</span>
-                <span class="n">block_klass</span><span class="p">(</span><span class="n">dim_out</span> <span class="o">+</span> <span class="n">dim_in</span><span class="p">,</span> <span class="n">dim_out</span><span class="p">,</span> <span class="n">time_emb_dim</span> <span class="o">=</span> <span class="n">time_dim</span><span class="p">),</span>
-                <span class="n">block_klass</span><span class="p">(</span><span class="n">dim_out</span> <span class="o">+</span> <span class="n">dim_in</span><span class="p">,</span> <span class="n">dim_out</span><span class="p">,</span> <span class="n">time_emb_dim</span> <span class="o">=</span> <span class="n">time_dim</span><span class="p">),</span>
-                <span class="n">Residual</span><span class="p">(</span><span class="n">PreNorm</span><span class="p">(</span><span class="n">dim_out</span><span class="p">,</span> <span class="n">LinearAttention</span><span class="p">(</span><span class="n">dim_out</span><span class="p">))),</span>
-                <span class="n">Upsample</span><span class="p">(</span><span class="n">dim_out</span><span class="p">,</span> <span class="n">dim_in</span><span class="p">)</span> <span class="k">if</span> <span class="ow">not</span> <span class="n">is_last</span> <span class="k">else</span>  <span class="n">nn</span><span class="o">.</span><span class="n">Conv2d</span><span class="p">(</span><span class="n">dim_out</span><span class="p">,</span> <span class="n">dim_in</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="n">padding</span> <span class="o">=</span> <span class="mi">1</span><span class="p">)</span>
-            <span class="p">]))</span>
-
-</pre></div>
-</div>
-<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="k">class</span> <span class="nc">Unet</span><span class="p">(</span><span class="n">nn</span><span class="o">.</span><span class="n">Module</span><span class="p">):</span>
-    <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span>
-        <span class="bp">self</span><span class="p">,</span>
-        <span class="n">dim</span><span class="p">,</span>
-        <span class="n">init_dim</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
-        <span class="n">out_dim</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
-        <span class="n">dim_mults</span><span class="o">=</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">4</span><span class="p">,</span> <span class="mi">8</span><span class="p">),</span>
-        <span class="n">channels</span> <span class="o">=</span> <span class="mi">3</span><span class="p">,</span>
-        <span class="n">self_condition</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span>
-        <span class="n">resnet_block_groups</span> <span class="o">=</span> <span class="mi">8</span><span class="p">,</span>
-        <span class="n">learned_variance</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span>
-        <span class="n">learned_sinusoidal_cond</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span>
-        <span class="n">random_fourier_features</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span>
-        <span class="n">learned_sinusoidal_dim</span> <span class="o">=</span> <span class="mi">16</span>
-    <span class="p">):</span>
-        <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span>
-
-        <span class="c1"># determine dimensions</span>
-
-        <span class="bp">self</span><span class="o">.</span><span class="n">channels</span> <span class="o">=</span> <span class="n">channels</span>
-        <span class="bp">self</span><span class="o">.</span><span class="n">self_condition</span> <span class="o">=</span> <span class="n">self_condition</span>
-        <span class="n">input_channels</span> <span class="o">=</span> <span class="n">channels</span> <span class="o">*</span> <span class="p">(</span><span class="mi">2</span> <span class="k">if</span> <span class="n">self_condition</span> <span class="k">else</span> <span class="mi">1</span><span class="p">)</span>
-
-        <span class="n">init_dim</span> <span class="o">=</span> <span class="n">default</span><span class="p">(</span><span class="n">init_dim</span><span class="p">,</span> <span class="n">dim</span><span class="p">)</span>
-        <span class="bp">self</span><span class="o">.</span><span class="n">init_conv</span> <span class="o">=</span> <span class="n">nn</span><span class="o">.</span><span class="n">Conv2d</span><span class="p">(</span><span class="n">input_channels</span><span class="p">,</span> <span class="n">init_dim</span><span class="p">,</span> <span class="mi">7</span><span class="p">,</span> <span class="n">padding</span> <span class="o">=</span> <span class="mi">3</span><span class="p">)</span>
-
-        <span class="n">dims</span> <span class="o">=</span> <span class="p">[</span><span class="n">init_dim</span><span class="p">,</span> <span class="o">*</span><span class="nb">map</span><span class="p">(</span><span class="k">lambda</span> <span class="n">m</span><span class="p">:</span> <span class="n">dim</span> <span class="o">*</span> <span class="n">m</span><span class="p">,</span> <span class="n">dim_mults</span><span class="p">)]</span>
-        <span class="n">in_out</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span><span class="nb">zip</span><span class="p">(</span><span class="n">dims</span><span class="p">[:</span><span class="o">-</span><span class="mi">1</span><span class="p">],</span> <span class="n">dims</span><span class="p">[</span><span class="mi">1</span><span class="p">:]))</span>
-
-        <span class="n">block_klass</span> <span class="o">=</span> <span class="n">partial</span><span class="p">(</span><span class="n">ResnetBlock</span><span class="p">,</span> <span class="n">groups</span> <span class="o">=</span> <span class="n">resnet_block_groups</span><span class="p">)</span>
-
-        <span class="c1"># time embeddings</span>
-
-        <span class="n">time_dim</span> <span class="o">=</span> <span class="n">dim</span> <span class="o">*</span> <span class="mi">4</span>
-
-        <span class="bp">self</span><span class="o">.</span><span class="n">random_or_learned_sinusoidal_cond</span> <span class="o">=</span> <span class="n">learned_sinusoidal_cond</span> <span class="ow">or</span> <span class="n">random_fourier_features</span>
-
-        <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">random_or_learned_sinusoidal_cond</span><span class="p">:</span>
-            <span class="n">sinu_pos_emb</span> <span class="o">=</span> <span class="n">RandomOrLearnedSinusoidalPosEmb</span><span class="p">(</span><span class="n">learned_sinusoidal_dim</span><span class="p">,</span> <span class="n">random_fourier_features</span><span class="p">)</span>
-            <span class="n">fourier_dim</span> <span class="o">=</span> <span class="n">learned_sinusoidal_dim</span> <span class="o">+</span> <span class="mi">1</span>
-        <span class="k">else</span><span class="p">:</span>
-            <span class="n">sinu_pos_emb</span> <span class="o">=</span> <span class="n">SinusoidalPosEmb</span><span class="p">(</span><span class="n">dim</span><span class="p">)</span>
-            <span class="n">fourier_dim</span> <span class="o">=</span> <span class="n">dim</span>
-
-        <span class="bp">self</span><span class="o">.</span><span class="n">time_mlp</span> <span class="o">=</span> <span class="n">nn</span><span class="o">.</span><span class="n">Sequential</span><span class="p">(</span>
-            <span class="n">sinu_pos_emb</span><span class="p">,</span>
-            <span class="n">nn</span><span class="o">.</span><span class="n">Linear</span><span class="p">(</span><span class="n">fourier_dim</span><span class="p">,</span> <span class="n">time_dim</span><span class="p">),</span>
-            <span class="n">nn</span><span class="o">.</span><span class="n">GELU</span><span class="p">(),</span>
-            <span class="n">nn</span><span class="o">.</span><span class="n">Linear</span><span class="p">(</span><span class="n">time_dim</span><span class="p">,</span> <span class="n">time_dim</span><span class="p">)</span>
-        <span class="p">)</span>
-
-        <span class="c1"># layers</span>
-
-        <span class="bp">self</span><span class="o">.</span><span class="n">downs</span> <span class="o">=</span> <span class="n">nn</span><span class="o">.</span><span class="n">ModuleList</span><span class="p">([])</span>
-        <span class="bp">self</span><span class="o">.</span><span class="n">ups</span> <span class="o">=</span> <span class="n">nn</span><span class="o">.</span><span class="n">ModuleList</span><span class="p">([])</span>
-        <span class="n">num_resolutions</span> <span class="o">=</span> <span class="nb">len</span><span class="p">(</span><span class="n">in_out</span><span class="p">)</span>
-
-        <span class="k">for</span> <span class="n">ind</span><span class="p">,</span> <span class="p">(</span><span class="n">dim_in</span><span class="p">,</span> <span class="n">dim_out</span><span class="p">)</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">in_out</span><span class="p">):</span>
-            <span class="n">is_last</span> <span class="o">=</span> <span class="n">ind</span> <span class="o">&gt;=</span> <span class="p">(</span><span class="n">num_resolutions</span> <span class="o">-</span> <span class="mi">1</span><span class="p">)</span>
-
-            <span class="bp">self</span><span class="o">.</span><span class="n">downs</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">nn</span><span class="o">.</span><span class="n">ModuleList</span><span class="p">([</span>
-                <span class="n">block_klass</span><span class="p">(</span><span class="n">dim_in</span><span class="p">,</span> <span class="n">dim_in</span><span class="p">,</span> <span class="n">time_emb_dim</span> <span class="o">=</span> <span class="n">time_dim</span><span class="p">),</span>
-                <span class="n">block_klass</span><span class="p">(</span><span class="n">dim_in</span><span class="p">,</span> <span class="n">dim_in</span><span class="p">,</span> <span class="n">time_emb_dim</span> <span class="o">=</span> <span class="n">time_dim</span><span class="p">),</span>
-                <span class="n">Residual</span><span class="p">(</span><span class="n">PreNorm</span><span class="p">(</span><span class="n">dim_in</span><span class="p">,</span> <span class="n">LinearAttention</span><span class="p">(</span><span class="n">dim_in</span><span class="p">))),</span>
-                <span class="n">Downsample</span><span class="p">(</span><span class="n">dim_in</span><span class="p">,</span> <span class="n">dim_out</span><span class="p">)</span> <span class="k">if</span> <span class="ow">not</span> <span class="n">is_last</span> <span class="k">else</span> <span class="n">nn</span><span class="o">.</span><span class="n">Conv2d</span><span class="p">(</span><span class="n">dim_in</span><span class="p">,</span> <span class="n">dim_out</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="n">padding</span> <span class="o">=</span> <span class="mi">1</span><span class="p">)</span>
-            <span class="p">]))</span>
-
-        <span class="n">mid_dim</span> <span class="o">=</span> <span class="n">dims</span><span class="p">[</span><span class="o">-</span><span class="mi">1</span><span class="p">]</span>
-        <span class="bp">self</span><span class="o">.</span><span class="n">mid_block1</span> <span class="o">=</span> <span class="n">block_klass</span><span class="p">(</span><span class="n">mid_dim</span><span class="p">,</span> <span class="n">mid_dim</span><span class="p">,</span> <span class="n">time_emb_dim</span> <span class="o">=</span> <span class="n">time_dim</span><span class="p">)</span>
-        <span class="bp">self</span><span class="o">.</span><span class="n">mid_attn</span> <span class="o">=</span> <span class="n">Residual</span><span class="p">(</span><span class="n">PreNorm</span><span class="p">(</span><span class="n">mid_dim</span><span class="p">,</span> <span class="n">Attention</span><span class="p">(</span><span class="n">mid_dim</span><span class="p">)))</span>
-        <span class="bp">self</span><span class="o">.</span><span class="n">mid_block2</span> <span class="o">=</span> <span class="n">block_klass</span><span class="p">(</span><span class="n">mid_dim</span><span class="p">,</span> <span class="n">mid_dim</span><span class="p">,</span> <span class="n">time_emb_dim</span> <span class="o">=</span> <span class="n">time_dim</span><span class="p">)</span>
-
-        <span class="k">for</span> <span class="n">ind</span><span class="p">,</span> <span class="p">(</span><span class="n">dim_in</span><span class="p">,</span> <span class="n">dim_out</span><span class="p">)</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="nb">reversed</span><span class="p">(</span><span class="n">in_out</span><span class="p">)):</span>
-            <span class="n">is_last</span> <span class="o">=</span> <span class="n">ind</span> <span class="o">==</span> <span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">in_out</span><span class="p">)</span> <span class="o">-</span> <span class="mi">1</span><span class="p">)</span>
-
-            <span class="bp">self</span><span class="o">.</span><span class="n">ups</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">nn</span><span class="o">.</span><span class="n">ModuleList</span><span class="p">([</span>
-                <span class="n">block_klass</span><span class="p">(</span><span class="n">dim_out</span> <span class="o">+</span> <span class="n">dim_in</span><span class="p">,</span> <span class="n">dim_out</span><span class="p">,</span> <span class="n">time_emb_dim</span> <span class="o">=</span> <span class="n">time_dim</span><span class="p">),</span>
-                <span class="n">block_klass</span><span class="p">(</span><span class="n">dim_out</span> <span class="o">+</span> <span class="n">dim_in</span><span class="p">,</span> <span class="n">dim_out</span><span class="p">,</span> <span class="n">time_emb_dim</span> <span class="o">=</span> <span class="n">time_dim</span><span class="p">),</span>
-                <span class="n">Residual</span><span class="p">(</span><span class="n">PreNorm</span><span class="p">(</span><span class="n">dim_out</span><span class="p">,</span> <span class="n">LinearAttention</span><span class="p">(</span><span class="n">dim_out</span><span class="p">))),</span>
-                <span class="n">Upsample</span><span class="p">(</span><span class="n">dim_out</span><span class="p">,</span> <span class="n">dim_in</span><span class="p">)</span> <span class="k">if</span> <span class="ow">not</span> <span class="n">is_last</span> <span class="k">else</span>  <span class="n">nn</span><span class="o">.</span><span class="n">Conv2d</span><span class="p">(</span><span class="n">dim_out</span><span class="p">,</span> <span class="n">dim_in</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="n">padding</span> <span class="o">=</span> <span class="mi">1</span><span class="p">)</span>
-            <span class="p">]))</span>
-
-        <span class="n">default_out_dim</span> <span class="o">=</span> <span class="n">channels</span> <span class="o">*</span> <span class="p">(</span><span class="mi">1</span> <span class="k">if</span> <span class="ow">not</span> <span class="n">learned_variance</span> <span class="k">else</span> <span class="mi">2</span><span class="p">)</span>
-        <span class="bp">self</span><span class="o">.</span><span class="n">out_dim</span> <span class="o">=</span> <span class="n">default</span><span class="p">(</span><span class="n">out_dim</span><span class="p">,</span> <span class="n">default_out_dim</span><span class="p">)</span>
-
-        <span class="bp">self</span><span class="o">.</span><span class="n">final_res_block</span> <span class="o">=</span> <span class="n">block_klass</span><span class="p">(</span><span class="n">dim</span> <span class="o">*</span> <span class="mi">2</span><span class="p">,</span> <span class="n">dim</span><span class="p">,</span> <span class="n">time_emb_dim</span> <span class="o">=</span> <span class="n">time_dim</span><span class="p">)</span>
-        <span class="bp">self</span><span class="o">.</span><span class="n">final_conv</span> <span class="o">=</span> <span class="n">nn</span><span class="o">.</span><span class="n">Conv2d</span><span class="p">(</span><span class="n">dim</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">out_dim</span><span class="p">,</span> <span class="mi">1</span><span class="p">)</span>
-        
-  <span class="k">def</span> <span class="nf">forward</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">x</span><span class="p">,</span> <span class="n">time</span><span class="p">,</span> <span class="n">x_self_cond</span> <span class="o">=</span> <span class="kc">None</span><span class="p">):</span>
-          <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">self_condition</span><span class="p">:</span>
-              <span class="n">x_self_cond</span> <span class="o">=</span> <span class="n">default</span><span class="p">(</span><span class="n">x_self_cond</span><span class="p">,</span> <span class="k">lambda</span><span class="p">:</span> <span class="n">torch</span><span class="o">.</span><span class="n">zeros_like</span><span class="p">(</span><span class="n">x</span><span class="p">))</span>
-              <span class="n">x</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">cat</span><span class="p">((</span><span class="n">x_self_cond</span><span class="p">,</span> <span class="n">x</span><span class="p">),</span> <span class="n">dim</span> <span class="o">=</span> <span class="mi">1</span><span class="p">)</span>
-
-          <span class="n">x</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">init_conv</span><span class="p">(</span><span class="n">x</span><span class="p">)</span>
-          <span class="n">r</span> <span class="o">=</span> <span class="n">x</span><span class="o">.</span><span class="n">clone</span><span class="p">()</span>
-
-          <span class="n">t</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">time_mlp</span><span class="p">(</span><span class="n">time</span><span class="p">)</span>
-
-          <span class="n">h</span> <span class="o">=</span> <span class="p">[]</span>
-
-          <span class="k">for</span> <span class="n">block1</span><span class="p">,</span> <span class="n">block2</span><span class="p">,</span> <span class="n">attn</span><span class="p">,</span> <span class="n">downsample</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">downs</span><span class="p">:</span>
-              <span class="n">x</span> <span class="o">=</span> <span class="n">block1</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="n">t</span><span class="p">)</span>
-              <span class="n">h</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">x</span><span class="p">)</span>
-
-              <span class="n">x</span> <span class="o">=</span> <span class="n">block2</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="n">t</span><span class="p">)</span>
-              <span class="n">x</span> <span class="o">=</span> <span class="n">attn</span><span class="p">(</span><span class="n">x</span><span class="p">)</span>
-              <span class="n">h</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">x</span><span class="p">)</span>
-
-              <span class="n">x</span> <span class="o">=</span> <span class="n">downsample</span><span class="p">(</span><span class="n">x</span><span class="p">)</span>
-
-          <span class="n">x</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">mid_block1</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="n">t</span><span class="p">)</span>
-          <span class="n">x</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">mid_attn</span><span class="p">(</span><span class="n">x</span><span class="p">)</span>
-          <span class="n">x</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">mid_block2</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="n">t</span><span class="p">)</span>
-
-          <span class="k">for</span> <span class="n">block1</span><span class="p">,</span> <span class="n">block2</span><span class="p">,</span> <span class="n">attn</span><span class="p">,</span> <span class="n">upsample</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">ups</span><span class="p">:</span>
-              <span class="n">x</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">cat</span><span class="p">((</span><span class="n">x</span><span class="p">,</span> <span class="n">h</span><span class="o">.</span><span class="n">pop</span><span class="p">()),</span> <span class="n">dim</span> <span class="o">=</span> <span class="mi">1</span><span class="p">)</span>
-              <span class="n">x</span> <span class="o">=</span> <span class="n">block1</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="n">t</span><span class="p">)</span>
-
-              <span class="n">x</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">cat</span><span class="p">((</span><span class="n">x</span><span class="p">,</span> <span class="n">h</span><span class="o">.</span><span class="n">pop</span><span class="p">()),</span> <span class="n">dim</span> <span class="o">=</span> <span class="mi">1</span><span class="p">)</span>
-              <span class="n">x</span> <span class="o">=</span> <span class="n">block2</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="n">t</span><span class="p">)</span>
-              <span class="n">x</span> <span class="o">=</span> <span class="n">attn</span><span class="p">(</span><span class="n">x</span><span class="p">)</span>
-
-              <span class="n">x</span> <span class="o">=</span> <span class="n">upsample</span><span class="p">(</span><span class="n">x</span><span class="p">)</span>
-
-          <span class="n">x</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">cat</span><span class="p">((</span><span class="n">x</span><span class="p">,</span> <span class="n">r</span><span class="p">),</span> <span class="n">dim</span> <span class="o">=</span> <span class="mi">1</span><span class="p">)</span>
-
-          <span class="n">x</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">final_res_block</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="n">t</span><span class="p">)</span>
-          <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">final_conv</span><span class="p">(</span><span class="n">x</span><span class="p">)</span>
-</pre></div>
-</div>
-<ul class="simple">
-<li><p>16 x 16 feature map resolution에 self-attention. conv에서 차원을 3배로 늘리고 q,k,v로 분해.</p></li>
-</ul>
-<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="k">class</span> <span class="nc">Attention</span><span class="p">(</span><span class="n">nn</span><span class="o">.</span><span class="n">Module</span><span class="p">):</span>
-    <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">dim</span><span class="p">,</span> <span class="n">heads</span> <span class="o">=</span> <span class="mi">4</span><span class="p">,</span> <span class="n">dim_head</span> <span class="o">=</span> <span class="mi">32</span><span class="p">):</span>
-        <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span>
-        <span class="bp">self</span><span class="o">.</span><span class="n">scale</span> <span class="o">=</span> <span class="n">dim_head</span> <span class="o">**</span> <span class="o">-</span><span class="mf">0.5</span>
-        <span class="bp">self</span><span class="o">.</span><span class="n">heads</span> <span class="o">=</span> <span class="n">heads</span>
-        <span class="n">hidden_dim</span> <span class="o">=</span> <span class="n">dim_head</span> <span class="o">*</span> <span class="n">heads</span>
-
-        <span class="bp">self</span><span class="o">.</span><span class="n">to_qkv</span> <span class="o">=</span> <span class="n">nn</span><span class="o">.</span><span class="n">Conv2d</span><span class="p">(</span><span class="n">dim</span><span class="p">,</span> <span class="n">hidden_dim</span> <span class="o">*</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="n">bias</span> <span class="o">=</span> <span class="kc">False</span><span class="p">)</span>
-        <span class="bp">self</span><span class="o">.</span><span class="n">to_out</span> <span class="o">=</span> <span class="n">nn</span><span class="o">.</span><span class="n">Conv2d</span><span class="p">(</span><span class="n">hidden_dim</span><span class="p">,</span> <span class="n">dim</span><span class="p">,</span> <span class="mi">1</span><span class="p">)</span>
-
-    <span class="k">def</span> <span class="nf">forward</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">x</span><span class="p">):</span>
-        <span class="n">b</span><span class="p">,</span> <span class="n">c</span><span class="p">,</span> <span class="n">h</span><span class="p">,</span> <span class="n">w</span> <span class="o">=</span> <span class="n">x</span><span class="o">.</span><span class="n">shape</span>
-        <span class="n">qkv</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">to_qkv</span><span class="p">(</span><span class="n">x</span><span class="p">)</span><span class="o">.</span><span class="n">chunk</span><span class="p">(</span><span class="mi">3</span><span class="p">,</span> <span class="n">dim</span> <span class="o">=</span> <span class="mi">1</span><span class="p">)</span>
-        <span class="n">q</span><span class="p">,</span> <span class="n">k</span><span class="p">,</span> <span class="n">v</span> <span class="o">=</span> <span class="nb">map</span><span class="p">(</span><span class="k">lambda</span> <span class="n">t</span><span class="p">:</span> <span class="n">rearrange</span><span class="p">(</span><span class="n">t</span><span class="p">,</span> <span class="s1">&#39;b (h c) x y -&gt; b h c (x y)&#39;</span><span class="p">,</span> <span class="n">h</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">heads</span><span class="p">),</span> <span class="n">qkv</span><span class="p">)</span>
-
-        <span class="n">q</span> <span class="o">=</span> <span class="n">q</span> <span class="o">*</span> <span class="bp">self</span><span class="o">.</span><span class="n">scale</span>
-
-        <span class="n">sim</span> <span class="o">=</span> <span class="n">einsum</span><span class="p">(</span><span class="s1">&#39;b h d i, b h d j -&gt; b h i j&#39;</span><span class="p">,</span> <span class="n">q</span><span class="p">,</span> <span class="n">k</span><span class="p">)</span>
-        <span class="n">attn</span> <span class="o">=</span> <span class="n">sim</span><span class="o">.</span><span class="n">softmax</span><span class="p">(</span><span class="n">dim</span> <span class="o">=</span> <span class="o">-</span><span class="mi">1</span><span class="p">)</span>
-        <span class="n">out</span> <span class="o">=</span> <span class="n">einsum</span><span class="p">(</span><span class="s1">&#39;b h i j, b h d j -&gt; b h i d&#39;</span><span class="p">,</span> <span class="n">attn</span><span class="p">,</span> <span class="n">v</span><span class="p">)</span>
-
-        <span class="n">out</span> <span class="o">=</span> <span class="n">rearrange</span><span class="p">(</span><span class="n">out</span><span class="p">,</span> <span class="s1">&#39;b h (x y) d -&gt; b (h d) x y&#39;</span><span class="p">,</span> <span class="n">x</span> <span class="o">=</span> <span class="n">h</span><span class="p">,</span> <span class="n">y</span> <span class="o">=</span> <span class="n">w</span><span class="p">)</span>
-        <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">to_out</span><span class="p">(</span><span class="n">out</span><span class="p">)</span>
-</pre></div>
-</div>
-<ul class="simple">
-<li><p>Linear attention</p></li>
-</ul>
-<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="k">class</span> <span class="nc">LinearAttention</span><span class="p">(</span><span class="n">nn</span><span class="o">.</span><span class="n">Module</span><span class="p">):</span>
-    <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">dim</span><span class="p">,</span> <span class="n">heads</span> <span class="o">=</span> <span class="mi">4</span><span class="p">,</span> <span class="n">dim_head</span> <span class="o">=</span> <span class="mi">32</span><span class="p">):</span>
-        <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span>
-        <span class="bp">self</span><span class="o">.</span><span class="n">scale</span> <span class="o">=</span> <span class="n">dim_head</span> <span class="o">**</span> <span class="o">-</span><span class="mf">0.5</span>
-        <span class="bp">self</span><span class="o">.</span><span class="n">heads</span> <span class="o">=</span> <span class="n">heads</span>
-        <span class="n">hidden_dim</span> <span class="o">=</span> <span class="n">dim_head</span> <span class="o">*</span> <span class="n">heads</span>
-        <span class="bp">self</span><span class="o">.</span><span class="n">to_qkv</span> <span class="o">=</span> <span class="n">nn</span><span class="o">.</span><span class="n">Conv2d</span><span class="p">(</span><span class="n">dim</span><span class="p">,</span> <span class="n">hidden_dim</span> <span class="o">*</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="n">bias</span> <span class="o">=</span> <span class="kc">False</span><span class="p">)</span>
-
-        <span class="bp">self</span><span class="o">.</span><span class="n">to_out</span> <span class="o">=</span> <span class="n">nn</span><span class="o">.</span><span class="n">Sequential</span><span class="p">(</span>
-            <span class="n">nn</span><span class="o">.</span><span class="n">Conv2d</span><span class="p">(</span><span class="n">hidden_dim</span><span class="p">,</span> <span class="n">dim</span><span class="p">,</span> <span class="mi">1</span><span class="p">),</span>
-            <span class="n">LayerNorm</span><span class="p">(</span><span class="n">dim</span><span class="p">)</span>
-        <span class="p">)</span>
-
-    <span class="k">def</span> <span class="nf">forward</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">x</span><span class="p">):</span>
-        <span class="n">b</span><span class="p">,</span> <span class="n">c</span><span class="p">,</span> <span class="n">h</span><span class="p">,</span> <span class="n">w</span> <span class="o">=</span> <span class="n">x</span><span class="o">.</span><span class="n">shape</span>
-        <span class="n">qkv</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">to_qkv</span><span class="p">(</span><span class="n">x</span><span class="p">)</span><span class="o">.</span><span class="n">chunk</span><span class="p">(</span><span class="mi">3</span><span class="p">,</span> <span class="n">dim</span> <span class="o">=</span> <span class="mi">1</span><span class="p">)</span>
-        <span class="n">q</span><span class="p">,</span> <span class="n">k</span><span class="p">,</span> <span class="n">v</span> <span class="o">=</span> <span class="nb">map</span><span class="p">(</span><span class="k">lambda</span> <span class="n">t</span><span class="p">:</span> <span class="n">rearrange</span><span class="p">(</span><span class="n">t</span><span class="p">,</span> <span class="s1">&#39;b (h c) x y -&gt; b h c (x y)&#39;</span><span class="p">,</span> <span class="n">h</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">heads</span><span class="p">),</span> <span class="n">qkv</span><span class="p">)</span>
-
-        <span class="n">q</span> <span class="o">=</span> <span class="n">q</span><span class="o">.</span><span class="n">softmax</span><span class="p">(</span><span class="n">dim</span> <span class="o">=</span> <span class="o">-</span><span class="mi">2</span><span class="p">)</span>
-        <span class="n">k</span> <span class="o">=</span> <span class="n">k</span><span class="o">.</span><span class="n">softmax</span><span class="p">(</span><span class="n">dim</span> <span class="o">=</span> <span class="o">-</span><span class="mi">1</span><span class="p">)</span>
-
-        <span class="n">q</span> <span class="o">=</span> <span class="n">q</span> <span class="o">*</span> <span class="bp">self</span><span class="o">.</span><span class="n">scale</span>
-        <span class="n">v</span> <span class="o">=</span> <span class="n">v</span> <span class="o">/</span> <span class="p">(</span><span class="n">h</span> <span class="o">*</span> <span class="n">w</span><span class="p">)</span>
-
-        <span class="n">context</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">einsum</span><span class="p">(</span><span class="s1">&#39;b h d n, b h e n -&gt; b h d e&#39;</span><span class="p">,</span> <span class="n">k</span><span class="p">,</span> <span class="n">v</span><span class="p">)</span>
-
-        <span class="n">out</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">einsum</span><span class="p">(</span><span class="s1">&#39;b h d e, b h d n -&gt; b h e n&#39;</span><span class="p">,</span> <span class="n">context</span><span class="p">,</span> <span class="n">q</span><span class="p">)</span>
-        <span class="n">out</span> <span class="o">=</span> <span class="n">rearrange</span><span class="p">(</span><span class="n">out</span><span class="p">,</span> <span class="s1">&#39;b h c (x y) -&gt; b (h c) x y&#39;</span><span class="p">,</span> <span class="n">h</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">heads</span><span class="p">,</span> <span class="n">x</span> <span class="o">=</span> <span class="n">h</span><span class="p">,</span> <span class="n">y</span> <span class="o">=</span> <span class="n">w</span><span class="p">)</span>
-        <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">to_out</span><span class="p">(</span><span class="n">out</span><span class="p">)</span>
-</pre></div>
-</div>
-<ul class="simple">
-<li><p>Diffusion time <span class="math notranslate nohighlight">\(T\)</span>는 각 residual block에 transformer sinusoidal positional embedding이 추가돼서 구분됨</p></li>
-</ul>
-<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="k">class</span> <span class="nc">SinusoidalPosEmb</span><span class="p">(</span><span class="n">nn</span><span class="o">.</span><span class="n">Module</span><span class="p">):</span>
-    <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">dim</span><span class="p">):</span>
-        <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span>
-        <span class="bp">self</span><span class="o">.</span><span class="n">dim</span> <span class="o">=</span> <span class="n">dim</span>
-
-    <span class="k">def</span> <span class="nf">forward</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">x</span><span class="p">):</span>
-        <span class="n">device</span> <span class="o">=</span> <span class="n">x</span><span class="o">.</span><span class="n">device</span>
-        <span class="n">half_dim</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">dim</span> <span class="o">//</span> <span class="mi">2</span>
-        <span class="n">emb</span> <span class="o">=</span> <span class="n">math</span><span class="o">.</span><span class="n">log</span><span class="p">(</span><span class="mi">10000</span><span class="p">)</span> <span class="o">/</span> <span class="p">(</span><span class="n">half_dim</span> <span class="o">-</span> <span class="mi">1</span><span class="p">)</span>
-        <span class="n">emb</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">exp</span><span class="p">(</span><span class="n">torch</span><span class="o">.</span><span class="n">arange</span><span class="p">(</span><span class="n">half_dim</span><span class="p">,</span> <span class="n">device</span><span class="o">=</span><span class="n">device</span><span class="p">)</span> <span class="o">*</span> <span class="o">-</span><span class="n">emb</span><span class="p">)</span>
-        <span class="n">emb</span> <span class="o">=</span> <span class="n">x</span><span class="p">[:,</span> <span class="kc">None</span><span class="p">]</span> <span class="o">*</span> <span class="n">emb</span><span class="p">[</span><span class="kc">None</span><span class="p">,</span> <span class="p">:]</span>
-        <span class="n">emb</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">cat</span><span class="p">((</span><span class="n">emb</span><span class="o">.</span><span class="n">sin</span><span class="p">(),</span> <span class="n">emb</span><span class="o">.</span><span class="n">cos</span><span class="p">()),</span> <span class="n">dim</span><span class="o">=-</span><span class="mi">1</span><span class="p">)</span>
-        <span class="k">return</span> <span class="n">emb</span>
-</pre></div>
-</div>
-<section id="sample-quality">
-<h2>4-1. Sample quality<a class="headerlink" href="#sample-quality" title="Permalink to this heading">#</a></h2>
-<figure class="align-default" id="id10">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img13.png"><img alt="DDPM_13" class="bg-primary mb-1" src="../../_images/img13.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 21 </span><span class="caption-text">Train score of DDPM \ (source: <a class="reference external" href="https://arxiv.org/abs/2006.11239">https://arxiv.org/abs/2006.11239</a>)</span><a class="headerlink" href="#id10" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>FID, IS로 metric 계산. Unconditional model인데도 conditional model보다 우월. Codelength에서 차이가 없기 때문에 overfitting의 가능성도 적음.</p>
-<blockquote>
-<div><ul class="simple">
-<li><p><strong>FID score</strong>: 사전 학습된 Inception V3으로 생성된 이미지 집합과 실제 생성하고자 하는 집합 간 클래스 이미지의 분포의 거리를 계산한 metric. 낮을 수록 실제와 유사한 이미지를 생성.</p></li>
-<li><p><strong>IS(Inception Score)</strong>: 사전 학습된 Inception V3으로 생성한 이미지에 대해 정확하게 클래스가 나누어지는지, 다양한 클래스를 생성하는지 평가함. 높을 수록 다양한 클래스를 생성.</p></li>
-<li><p><strong>Unconditional model</strong>: 한번 dataset에 학습되면 추가적인 context 없이 image를 생성</p></li>
-<li><p><strong>Conditional model</strong>: Class, label 등의 추가 정보를 받아 image를 생성</p></li>
-</ul>
-</div></blockquote>
-<p><span class="math notranslate nohighlight">\(\mu\)</span>보다 <span class="math notranslate nohighlight">\(\epsilon\)</span>을 계산하는 것이 성적이 좋고, fixed variance를 사용했을 때에도 성능이 감소하지 않음.</p>
-</section>
-<section id="reverse-process-parameterization-and-training-objective-ablation">
-<h2>4-2. Reverse process parameterization and training objective ablation<a class="headerlink" href="#reverse-process-parameterization-and-training-objective-ablation" title="Permalink to this heading">#</a></h2>
-<figure class="align-default" id="id11">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img14.png"><img alt="DDPM_14" class="bg-primary mb-1" src="../../_images/img14.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 22 </span><span class="caption-text">Comparison between <span class="math notranslate nohighlight">\(\mu\)</span> and <span class="math notranslate nohighlight">\(\epsilon\)</span> \ (source: <a class="reference external" href="https://arxiv.org/abs/2006.11239">https://arxiv.org/abs/2006.11239</a>)</span><a class="headerlink" href="#id11" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p><span class="math notranslate nohighlight">\(\mu\)</span>의 실험 결과를 보면 분산을 고정했을 때 FID에서 성능 차이가 나는 것을 볼 수 있음. 또한 <span class="math notranslate nohighlight">\(\epsilon\)</span>을 사용하면서 Simplified objective를 사용하면 FID, IS가 더 개선됨</p>
-</section>
-<section id="id1">
-<h2>4-3. Reverse process parameterization and training objective ablation<a class="headerlink" href="#id1" title="Permalink to this heading">#</a></h2>
-</section>
-</section>
-
-    <script type="text/x-thebe-config">
-    {
-        requestKernel: true,
-        binderOptions: {
-            repo: "binder-examples/jupyter-stacks-datascience",
-            ref: "master",
-        },
-        codeMirrorConfig: {
-            theme: "abcdef",
-            mode: "python"
-        },
-        kernelOptions: {
-            name: "python3",
-            path: "./docs/review"
-        },
-        predefinedOutput: true
-    }
-    </script>
-    <script>kernelName = 'python3'</script>
-
-                </article>
-              
-
-              
-              
-              
-              
-                <footer class="prev-next-footer d-print-none">
-                  
-<div class="prev-next-area">
-    <a class="left-prev"
-       href="gan.html"
-       title="previous page">
-      <i class="fa-solid fa-angle-left"></i>
-      <div class="prev-next-info">
-        <p class="prev-next-subtitle">previous</p>
-        <p class="prev-next-title">GAN</p>
-      </div>
-    </a>
-    <a class="right-next"
-       href="DDIM.html"
-       title="next page">
-      <div class="prev-next-info">
-        <p class="prev-next-subtitle">next</p>
-        <p class="prev-next-title">DDIM</p>
-      </div>
-      <i class="fa-solid fa-angle-right"></i>
-    </a>
-</div>
-                </footer>
-              
-            </div>
-            
-            
-              
-                <div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
-
-
-  <div class="sidebar-secondary-item">
-  <div class="page-toc tocsection onthispage">
-    <i class="fa-solid fa-list"></i> Contents
-  </div>
-  <nav class="bd-toc-nav page-toc">
-    <ul class="visible nav section-nav flex-column">
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#">DDPM</a></li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">1. Introduction</a></li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#background">2. Background</a><ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#forward-diffusion-process-q-mathbf-x-t-mathbf-x-t-1">2-1. Forward(diffusion) process <span class="math notranslate nohighlight">\(q(\mathbf{x}_t|\mathbf{x}_{t-1})\)</span></a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#reverse-process-p-mathbf-x-t-1-mathbf-x-t">2-2. Reverse process <span class="math notranslate nohighlight">\(p(\mathbf{x}_{t-1}|\mathbf{x}_t)\)</span></a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#loss-function-l">2-3. Loss Function <span class="math notranslate nohighlight">\(L\)</span></a></li>
-</ul>
-</li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#diffusion-models-and-denoising-encoders">3. Diffusion models and denoising encoders</a><ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#forward-process-and-l-t">3-1. Forward process and <span class="math notranslate nohighlight">\(L_T\)</span></a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#reverse-process-and-l-1-t-1">3-2. Reverse process and <span class="math notranslate nohighlight">\(L_{1:T-1}\)</span></a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#data-scaling-reverse-process-decoder-and-l-0">3-3. Data scaling, reverse process decoder and <span class="math notranslate nohighlight">\(L_0\)</span></a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#simplified-training-objective">3-4. Simplified training objective</a></li>
-</ul>
-</li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#experiments">4. Experiments</a><ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#sample-quality">4-1. Sample quality</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#reverse-process-parameterization-and-training-objective-ablation">4-2. Reverse process parameterization and training objective ablation</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#id1">4-3. Reverse process parameterization and training objective ablation</a></li>
-</ul>
-</li>
-</ul>
-
-  </nav></div>
-
-</div></div>
-              
-            
-          </div>
-          <footer class="bd-footer-content">
-            
-<div class="bd-footer-content__inner container">
-  
-  <div class="footer-item">
-    
-<p class="component-author">
-By PseudoLab
-</p>
-
-  </div>
-  
-  <div class="footer-item">
-    
-
-  <p class="copyright">
-    
-      © Copyright 2022.
-      <br/>
-    
-  </p>
-
-  </div>
-  
-  <div class="footer-item">
-    
-  </div>
-  
-  <div class="footer-item">
-    
-  </div>
-  
-</div>
-          </footer>
-        
-
-      </main>
-    </div>
-  </div>
-  
-  <!-- Scripts loaded after <body> so the DOM is not blocked -->
-  <script src="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b"></script>
-<script src="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b"></script>
-
-  <footer class="bd-footer">
-  </footer>
-  </body>
+
+
+<!DOCTYPE html>
+
+
+<html lang="en" data-content_root="" >
+
+  <head>
+    <meta charset="utf-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.17.1: http://docutils.sourceforge.net/" />
+
+    <title>DDPM &#8212; Text-to-Image Generation-feat-Diffusion</title>
+  
+  
+  
+  <script data-cfasync="false">
+    document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
+    document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
+  </script>
+  
+  <!-- Loaded before other Sphinx assets -->
+  <link href="../../_static/styles/theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+<link href="../../_static/styles/bootstrap.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+<link href="../../_static/styles/pydata-sphinx-theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+
+  
+  <link href="../../_static/vendor/fontawesome/6.5.2/css/all.min.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+  <link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.woff2" />
+<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.woff2" />
+<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.woff2" />
+
+    <link rel="stylesheet" type="text/css" href="../../_static/pygments.css" />
+    <link rel="stylesheet" href="../../_static/styles/sphinx-book-theme.css?digest=14f4ca6b54d191a8c7657f6c759bf11a5fb86285" type="text/css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/togglebutton.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/mystnb.4510f1fc1dee50b3e5859aac5469c37c29e427902b24a333a5f9fcb2f0b3ac41.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/sphinx-thebe.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/design-style.4045f2051d55cab465a707391d5b2007.min.css" />
+  
+  <!-- Pre-loaded scripts that we'll load fully later -->
+  <link rel="preload" as="script" href="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b" />
+<link rel="preload" as="script" href="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b" />
+  <script src="../../_static/vendor/fontawesome/6.5.2/js/all.min.js?digest=dfe6caa3a7d634c4db9b"></script>
+
+    <script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
+    <script src="../../_static/jquery.js"></script>
+    <script src="../../_static/underscore.js"></script>
+    <script src="../../_static/_sphinx_javascript_frameworks_compat.js"></script>
+    <script src="../../_static/doctools.js"></script>
+    <script src="../../_static/clipboard.min.js"></script>
+    <script src="../../_static/copybutton.js"></script>
+    <script src="../../_static/scripts/sphinx-book-theme.js?digest=5a5c038af52cf7bc1a1ec88eea08e6366ee68824"></script>
+    <script>let toggleHintShow = 'Click to show';</script>
+    <script>let toggleHintHide = 'Click to hide';</script>
+    <script>let toggleOpenOnPrint = 'true';</script>
+    <script src="../../_static/togglebutton.js"></script>
+    <script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
+    <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
+    <script src="../../_static/design-tabs.js"></script>
+    <script>const THEBE_JS_URL = "https://unpkg.com/thebe@0.8.2/lib/index.js"
+const thebe_selector = ".thebe,.cell"
+const thebe_selector_input = "pre"
+const thebe_selector_output = ".output, .cell_output"
+</script>
+    <script async="async" src="../../_static/sphinx-thebe.js"></script>
+    <script>window.MathJax = {"options": {"processHtmlClass": "tex2jax_process|mathjax_process|math|output_area"}}</script>
+    <script defer="defer" src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
+    <script>DOCUMENTATION_OPTIONS.pagename = 'docs/review/DDPM';</script>
+    <link rel="shortcut icon" href="../../_static/PseudoLab_logo.png"/>
+    <link rel="index" title="Index" href="../../genindex.html" />
+    <link rel="search" title="Search" href="../../search.html" />
+    <link rel="next" title="DDIM" href="DDIM.html" />
+    <link rel="prev" title="GAN" href="gan.html" />
+  <meta name="viewport" content="width=device-width, initial-scale=1"/>
+  <meta name="docsearch:language" content="en"/>
+  </head>
+  
+  
+  <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
+
+  
+  
+  <div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
+  
+  <div id="pst-scroll-pixel-helper"></div>
+  
+  <button type="button" class="btn rounded-pill" id="pst-back-to-top">
+    <i class="fa-solid fa-arrow-up"></i>Back to top</button>
+
+  
+  <input type="checkbox"
+          class="sidebar-toggle"
+          id="pst-primary-sidebar-checkbox"/>
+  <label class="overlay overlay-primary" for="pst-primary-sidebar-checkbox"></label>
+  
+  <input type="checkbox"
+          class="sidebar-toggle"
+          id="pst-secondary-sidebar-checkbox"/>
+  <label class="overlay overlay-secondary" for="pst-secondary-sidebar-checkbox"></label>
+  
+  <div class="search-button__wrapper">
+    <div class="search-button__overlay"></div>
+    <div class="search-button__search-container">
+<form class="bd-search d-flex align-items-center"
+      action="../../search.html"
+      method="get">
+  <i class="fa-solid fa-magnifying-glass"></i>
+  <input type="search"
+         class="form-control"
+         name="q"
+         id="search-input"
+         placeholder="Search this book..."
+         aria-label="Search this book..."
+         autocomplete="off"
+         autocorrect="off"
+         autocapitalize="off"
+         spellcheck="false"/>
+  <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
+</form></div>
+  </div>
+
+  <div class="pst-async-banner-revealer d-none">
+  <aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
+</div>
+
+  
+    <header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
+    </header>
+  
+
+  <div class="bd-container">
+    <div class="bd-container__inner bd-page-width">
+      
+      
+      
+      <div class="bd-sidebar-primary bd-sidebar">
+        
+
+  
+  <div class="sidebar-header-items sidebar-primary__section">
+    
+    
+    
+    
+  </div>
+  
+    <div class="sidebar-primary-items__start sidebar-primary__section">
+        <div class="sidebar-primary-item">
+
+  
+    
+  
+
+<a class="navbar-brand logo" href="../../intro.html">
+  
+  
+  
+  
+  
+    
+    
+      
+    
+    
+    <img src="../../_static/PseudoLab_logo.png" class="logo__image only-light" alt="Text-to-Image Generation-feat-Diffusion - Home"/>
+    <script>document.write(`<img src="../../_static/PseudoLab_logo.png" class="logo__image only-dark" alt="Text-to-Image Generation-feat-Diffusion - Home"/>`);</script>
+  
+  
+</a></div>
+        <div class="sidebar-primary-item"><nav class="bd-links" id="bd-docs-nav" aria-label="Main">
+    <div class="bd-toc-item navbar-nav active">
+        
+        <ul class="nav bd-sidenav bd-sidenav__home-link">
+            <li class="toctree-l1">
+                <a class="reference internal" href="../../intro.html">
+                    Welcome to PseudoDiffusers!!
+                </a>
+            </li>
+        </ul>
+        <p aria-level="2" class="caption" role="heading"><span class="caption-text">Preliminary Works</span></p>
+<ul class="current nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="vae.html">VAE</a></li>
+<li class="toctree-l1"><a class="reference internal" href="gan.html">GAN</a></li>
+<li class="toctree-l1 current active"><a class="current reference internal" href="#">DDPM</a></li>
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DDIM.html">DDIM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="A_Study_on_the_Evaluation_of_Generative_Models.html">A Study on the Evaluation of Generative Models</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Image Generation</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="cycleGAN.html">CycleGAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="StyleGAN.html">StyleGAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="diffusion_beats_GANs.html">Diffusion Models Beat GANs on Image Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="dalle.html">DALL-E</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DALLE2.html">DALL-E 2</a></li>
+<li class="toctree-l1"><a class="reference internal" href="dreambooth.html">DreamBooth</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ControlNet.html">ControlNet</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Latent_Diffusion_Model.html">Introduction</a></li>
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Textual_Inversion.html">Textual Inversion</a></li>
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="CustomDiffusion.html">Custom Diffusion</a></li>
+<li class="toctree-l1"><a class="reference internal" href="LoRA.html">LoRA</a></li>
+
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="I-DDPM.html">I-DDPM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="StyO.html">StyO</a></li>
+<li class="toctree-l1"><a class="reference internal" href="imagen.html">Imagen</a></li>
+<li class="toctree-l1"><a class="reference internal" href="imagen_editor.html">Imagen Editor</a></li>
+<li class="toctree-l1"><a class="reference internal" href="SDEdit.html">SDEdit</a></li>
+<li class="toctree-l1"><a class="reference internal" href="SDXL.html">SDXL</a></li>
+<li class="toctree-l1"><a class="reference internal" href="t2i_adapter.html">T2I-Adapter</a></li>
+<li class="toctree-l1"><a class="reference internal" href="IP_Adapter.html">IP-Adapter</a></li>
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="HyperDreamBooth.html">HyperDreamBooth</a></li>
+<li class="toctree-l1"><a class="reference internal" href="CM3leon.html">CM3leon</a></li>
+
+<li class="toctree-l1"><a class="reference internal" href="Synthetic_Data_from_Diffusion_Models_Improves_ImageNet_Classification.html">Synthetic Data from Diffusion Models Improves ImageNet Classification</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="GLIDE.html">GLIDE</a></li>
+<li class="toctree-l1"><a class="reference internal" href="BBDM.html">BBDM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Your_Diffusion_Model_is_Secretly_a_Zero_Shot_Classifier.html">Your Diffusion Model is Secretly a Zero-Shot Classifier</a></li>
+<li class="toctree-l1"><a class="reference internal" href="progressive_distillation.html">Progressive Distillation for Fast Sampling of Diffusion Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ConceptLab.html">ConceptLab</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Diffusion_models_already_have_a_Semantic_Latent_Space.html">Diffusion Models already have a Semantic Latent Space</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Muse.html">Muse</a></li>
+
+
+<li class="toctree-l1"><a class="reference internal" href="GIGAGAN.html">Scaling up GANs for Text-to-Image Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="consistency_models.html">Consistency Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="latent_consistency_models.html">Latent Consistency Models</a></li>
+
+<li class="toctree-l1"><a class="reference internal" href="LLM_grounded_Diffusion.html">LLM Grounded Diffusion</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DiT.html">DiT</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="one-step-image-translation.html">One-Step Image Translation with Text-to-Image Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="LCM-LoRA.html">LCM-LoRA: A Universal Stable-Diffusion Acceleration Module</a></li>
+
+
+<li class="toctree-l1"><a class="reference internal" href="MimicBrush.html">MimicBrush: Zero-shot Image Editing with Reference Imitation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="one_step_diffusion_with_distribution_matching_distillation.html">One-step Diffusion with Distribution Matching Distillation</a></li>
+
+
+
+
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Video Generation</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="Make_A_Video.html">Make A Video</a></li>
+<li class="toctree-l1"><a class="reference internal" href="VideoLDM.html">VideoLDM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="AnimateDiff.html">AnimateDiff</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Animate_Anyone.html">Animate Anyone</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreaMoving.html">DreaMoving</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreamPose.html">DreamPose: Fashion Image-to-Video Synthesis via Stable Diffusion</a></li>
+
+
+
+
+
+
+
+
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">3D Generation</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="NeRF.html">NeRF : Representing Scenes as Neural Radiance Fields for View Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="3DGS.html">3D Gaussian Splatting for Real-Time Radiance Field Rendering</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Point_E.html">Point-E: A System for Generating 3D Point Clouds from Complex Prompts (Arxiv 2022)</a></li>
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Shap-E.html">Shap-E</a></li>
+
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DreamFusion.html"><strong>DreamFusion</strong></a></li>
+<li class="toctree-l1"><a class="reference internal" href="magic-3d.html">Magic3D</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreamBooth3D.html">Dream Booth 3D</a></li>
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="zero123plus.html">Zero123++</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ProlificDreamer.html">ProlificDreamer: High-Fidelity and Diverse Text-to-3D Generation with Variational Score Distillation</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DreamGaussian.html">DreamGaussian</a></li>
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Coin3D.html">Coin3D</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Experiments</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="../experiments/js_exp.html">Synthetic Data with Stable Diffusion for Foliar Disease Classification</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../experiments/swjo_exp.html">Training DreamBooth on Naver Webtoon Face Dataset</a></li>
+</ul>
+
+    </div>
+</nav></div>
+    </div>
+  
+  
+  <div class="sidebar-primary-items__end sidebar-primary__section">
+  </div>
+  
+  <div id="rtd-footer-container"></div>
+
+
+      </div>
+      
+      <main id="main-content" class="bd-main" role="main">
+        
+        
+
+<div class="sbt-scroll-pixel-helper"></div>
+
+          <div class="bd-content">
+            <div class="bd-article-container">
+              
+              <div class="bd-header-article d-print-none">
+<div class="header-article-items header-article__inner">
+  
+    <div class="header-article-items__start">
+      
+        <div class="header-article-item"><label class="sidebar-toggle primary-toggle btn btn-sm" for="__primary" title="Toggle primary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
+  <span class="fa-solid fa-bars"></span>
+</label></div>
+      
+    </div>
+  
+  
+    <div class="header-article-items__end">
+      
+        <div class="header-article-item">
+
+<div class="article-header-buttons">
+
+
+
+
+
+<div class="dropdown dropdown-source-buttons">
+  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Source repositories">
+    <i class="fab fa-github"></i>
+  </button>
+  <ul class="dropdown-menu">
+      
+      
+      
+      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion" target="_blank"
+   class="btn btn-sm btn-source-repository-button dropdown-item"
+   title="Source repository"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fab fa-github"></i>
+  </span>
+<span class="btn__text-container">Repository</span>
+</a>
+</li>
+      
+      
+      
+      
+      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion/issues/new?title=Issue%20on%20page%20%2Fdocs/review/DDPM.html&body=Your%20issue%20content%20here." target="_blank"
+   class="btn btn-sm btn-source-issues-button dropdown-item"
+   title="Open an issue"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-lightbulb"></i>
+  </span>
+<span class="btn__text-container">Open issue</span>
+</a>
+</li>
+      
+  </ul>
+</div>
+
+
+
+
+
+
+<div class="dropdown dropdown-download-buttons">
+  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Download this page">
+    <i class="fas fa-download"></i>
+  </button>
+  <ul class="dropdown-menu">
+      
+      
+      
+      <li><a href="../../_sources/docs/review/DDPM.md" target="_blank"
+   class="btn btn-sm btn-download-source-button dropdown-item"
+   title="Download source file"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-file"></i>
+  </span>
+<span class="btn__text-container">.md</span>
+</a>
+</li>
+      
+      
+      
+      
+      <li>
+<button onclick="window.print()"
+  class="btn btn-sm btn-download-pdf-button dropdown-item"
+  title="Print to PDF"
+  data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-file-pdf"></i>
+  </span>
+<span class="btn__text-container">.pdf</span>
+</button>
+</li>
+      
+  </ul>
+</div>
+
+
+
+
+<button onclick="toggleFullScreen()"
+  class="btn btn-sm btn-fullscreen-button"
+  title="Fullscreen mode"
+  data-bs-placement="bottom" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-expand"></i>
+  </span>
+
+</button>
+
+
+
+<script>
+document.write(`
+  <button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light"></i>
+    <i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark"></i>
+    <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"></i>
+  </button>
+`);
+</script>
+
+
+<script>
+document.write(`
+  <button class="btn btn-sm pst-navbar-icon search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <i class="fa-solid fa-magnifying-glass fa-lg"></i>
+  </button>
+`);
+</script>
+<label class="sidebar-toggle secondary-toggle btn btn-sm" for="__secondary"title="Toggle secondary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <span class="fa-solid fa-list"></span>
+</label>
+</div></div>
+      
+    </div>
+  
+</div>
+</div>
+              
+              
+
+<div id="jb-print-docs-body" class="onlyprint">
+    <h1>DDPM</h1>
+    <!-- Table of contents -->
+    <div id="print-main-content">
+        <div id="jb-print-toc">
+            
+            <div>
+                <h2> Contents </h2>
+            </div>
+            <nav aria-label="Page">
+                <ul class="visible nav section-nav flex-column">
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#">DDPM</a></li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">1. Introduction</a></li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#background">2. Background</a><ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#forward-diffusion-process-q-mathbf-x-t-mathbf-x-t-1">2-1. Forward(diffusion) process <span class="math notranslate nohighlight">\(q(\mathbf{x}_t|\mathbf{x}_{t-1})\)</span></a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#reverse-process-p-mathbf-x-t-1-mathbf-x-t">2-2. Reverse process <span class="math notranslate nohighlight">\(p(\mathbf{x}_{t-1}|\mathbf{x}_t)\)</span></a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#loss-function-l">2-3. Loss Function <span class="math notranslate nohighlight">\(L\)</span></a></li>
+</ul>
+</li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#diffusion-models-and-denoising-encoders">3. Diffusion models and denoising encoders</a><ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#forward-process-and-l-t">3-1. Forward process and <span class="math notranslate nohighlight">\(L_T\)</span></a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#reverse-process-and-l-1-t-1">3-2. Reverse process and <span class="math notranslate nohighlight">\(L_{1:T-1}\)</span></a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#data-scaling-reverse-process-decoder-and-l-0">3-3. Data scaling, reverse process decoder and <span class="math notranslate nohighlight">\(L_0\)</span></a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#simplified-training-objective">3-4. Simplified training objective</a></li>
+</ul>
+</li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#experiments">4. Experiments</a><ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#sample-quality">4-1. Sample quality</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#reverse-process-parameterization-and-training-objective-ablation">4-2. Reverse process parameterization and training objective ablation</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#id1">4-3. Reverse process parameterization and training objective ablation</a></li>
+</ul>
+</li>
+</ul>
+
+            </nav>
+        </div>
+    </div>
+</div>
+
+              
+                
+<div id="searchbox"></div>
+                <article class="bd-article">
+                  
+  <div class="admonition-information admonition">
+<p class="admonition-title">Information</p>
+<ul class="simple">
+<li><p><strong>Title:</strong> Denoising Diffusion Probabilistic Models (NeurIPS 2020)</p></li>
+<li><p><strong>Reference</strong></p>
+<ul>
+<li><p>Paper:  <a class="reference external" href="https://arxiv.org/abs/2006.11239">https://arxiv.org/abs/2006.11239</a></p></li>
+<li><p>Code: <a class="reference external" href="https://github.com/lucidrains/denoising-diffusion-pytorch">PyTorch implementation:</a></p></li>
+<li><p>Review: <a class="reference external" href="https://www.youtube.com/watch?v=1j0W_lu55nc">PR-409: Denoising Diffusion Probabilistic Models</a></p></li>
+</ul>
+</li>
+<li><p><strong>Author:</strong> Beomsoo Park</p></li>
+<li><p><strong>Last updated on Apr. 19, 2023</strong></p></li>
+</ul>
+</div>
+<section class="tex2jax_ignore mathjax_ignore" id="ddpm">
+<h1>DDPM<a class="headerlink" href="#ddpm" title="Permalink to this heading">#</a></h1>
+<figure class="align-default" id="id2">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img12.png"><img alt="DDPM_01" class="bg-primary mb-1" src="../../_images/img12.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 13 </span><span class="caption-text">DDPM samples \ (source: <a class="reference external" href="https://arxiv.org/abs/2006.11239">https://arxiv.org/abs/2006.11239</a>)</span><a class="headerlink" href="#id2" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</section>
+<hr class="docutils" />
+<section class="tex2jax_ignore mathjax_ignore" id="introduction">
+<h1>1. Introduction<a class="headerlink" href="#introduction" title="Permalink to this heading">#</a></h1>
+<figure class="align-default" id="id3">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img22.png"><img alt="DDPM_02" class="bg-primary mb-1" src="../../_images/img22.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 14 </span><span class="caption-text">Diffusion models \ (source: <a class="reference external" href="https://velog.io/&#64;yetsyl0705/What-are-Diffusion-Models">https://velog.io/&#64;yetsyl0705/What-are-Diffusion-Models</a>)</span><a class="headerlink" href="#id3" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p><strong>Diffusion model</strong>은 <strong>variational inference로 학습시켜 데이터를 생성하는 parameterized Markov chain</strong>. Diffusion model은 Markov가 데이터가 normal distribution의 형태를 할 때까지 <strong>noise를 더해가는 diffusion process</strong>와 <strong>이를 역으로 거치며 학습하는 reverse process</strong>로 구성됨.</p>
+<p>Diffusion model은 정의하기 쉽고 학습시키는 것도 편리함. 또한 높은 품질의 sample(output)도 생성이 가능.</p>
+<blockquote>
+<div><ul class="simple">
+<li><p><strong>Variational inference(변분추론)</strong>: 사후확률(posterior) 분포 <span class="math notranslate nohighlight">\(p(z
+|x)\)</span>를 다루기 쉬운 확률분포 <span class="math notranslate nohighlight">\(q(z)\)</span>로 근사(approximation)하는 것</p></li>
+<li><p><strong>Parameterize</strong>: 하나의 표현식에 대해 다른 parameter를 사용하여 다시 표현하는 과정. 이 과정에서 보통 parameter의 개수를 표현 식의 차수보다 적은 수로 선택(ex. 3차 표현식 –&gt; 2개 parameter 사용)하므로, 낮은 차수로의 mapping 함수(ex. 3D –&gt; 2D)가 생성</p></li>
+<li><p><strong>Markov chain</strong>: 어떤 상태에서 다른 상태로 넘어갈 때, 바로 전 단계의 상태에만 영향을 받는 확률 과정</p></li>
+</ul>
+</div></blockquote>
+</section>
+<hr class="docutils" />
+<section class="tex2jax_ignore mathjax_ignore" id="background">
+<h1>2. Background<a class="headerlink" href="#background" title="Permalink to this heading">#</a></h1>
+<figure class="align-default" id="id4">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img32.png"><img alt="DDPM_03" class="bg-primary mb-1" src="../../_images/img32.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 15 </span><span class="caption-text">Graphical model of DDPM \ (source: <a class="reference external" href="https://arxiv.org/abs/2006.11239">https://arxiv.org/abs/2006.11239</a>)</span><a class="headerlink" href="#id4" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<section id="forward-diffusion-process-q-mathbf-x-t-mathbf-x-t-1">
+<h2>2-1. Forward(diffusion) process <span class="math notranslate nohighlight">\(q(\mathbf{x}_t|\mathbf{x}_{t-1})\)</span><a class="headerlink" href="#forward-diffusion-process-q-mathbf-x-t-mathbf-x-t-1" title="Permalink to this heading">#</a></h2>
+<div class="math notranslate nohighlight">
+\[
+q\left(\mathbf{x}_{1: T} \mid \mathbf{x}_0\right):=\prod_{t=1}^T q\left(\mathbf{x}_t \mid \mathbf{x}_{t-1}\right), \quad q\left(\mathbf{x}_t \mid \mathbf{x}_{t-1}\right):=\mathcal{N}\left(\mathbf{x}_t ; \sqrt{1-\beta_t} \mathbf{x}_{t-1}, \beta_t \mathbf{I}\right)
+\]</div>
+<p>Markov chain으로 <strong>data에 noise를 추가</strong>하는 과정. Noise를 추가할 때 <strong>variance schedule <span class="math notranslate nohighlight">\(\beta_1,,,\beta_T\)</span>로 scaling</strong>을 한 후 더해준다.</p>
+<ul class="simple">
+<li><p><span class="math notranslate nohighlight">\(\beta_t = 1\)</span>이면 mean인 <span class="math notranslate nohighlight">\(\sqrt{1-\beta_t}\mathbf{x}_{t-1} = 0\)</span>. 이전 정보를 갖지 못하고 노이즈가 증가함</p></li>
+<li><p>단순히 noise만을 더해주는게 아니라 <span class="math notranslate nohighlight">\(\sqrt{1-\beta_t}\)</span>로 scaling하는 이유는 variance가 발산하는 것을 막기 위함</p></li>
+<li><p><span class="math notranslate nohighlight">\(q(x_1|x_0)\)</span>: <span class="math notranslate nohighlight">\(x_0\)</span>에 noise를 추가해 <span class="math notranslate nohighlight">\(x_1\)</span>을 만드는 과정</p></li>
+<li><p><span class="math notranslate nohighlight">\(x_T\)</span>는 완전 destroy된 noise 상태 ~ <span class="math notranslate nohighlight">\(N(x_T;0, I)\)</span></p></li>
+</ul>
+</section>
+<section id="reverse-process-p-mathbf-x-t-1-mathbf-x-t">
+<h2>2-2. Reverse process <span class="math notranslate nohighlight">\(p(\mathbf{x}_{t-1}|\mathbf{x}_t)\)</span><a class="headerlink" href="#reverse-process-p-mathbf-x-t-1-mathbf-x-t" title="Permalink to this heading">#</a></h2>
+<div class="math notranslate nohighlight">
+\[
+p_\theta\left(\mathbf{x}_{0: T}\right):=p\left(\mathbf{x}_T\right) \prod_{t=1}^T p_\theta\left(\mathbf{x}_{t-1} \mid \mathbf{x}_t\right), \quad p_\theta\left(\mathbf{x}_{t-1} \mid \mathbf{x}_t\right):=\mathcal{N}\left(\mathbf{x}_{t-1} ; \boldsymbol{\mu}_\theta\left(\mathbf{x}_t, t\right), \boldsymbol{\Sigma}_\theta\left(\mathbf{x}_t, t\right)\right)
+\]</div>
+<p>Reverse process로 가우시안 노이즈를 사용하는 이유는 1994년 논문에 forward process가 가우시안이면 reverse process도 가우시안으로 쓰면 된다라는 증명이 있다고 함.</p>
+<p>여기서 우리가 해야 할 것은 <strong><span class="math notranslate nohighlight">\(\mathbf{x}_t\)</span>를 보고 <span class="math notranslate nohighlight">\(\mathbf{x}_{t-1}\)</span>의 평균 <span class="math notranslate nohighlight">\(\mu_\theta\)</span>과 분산 <span class="math notranslate nohighlight">\(\Sigma_\theta\)</span>을 예측해내는 것</strong>.</p>
+<ul class="simple">
+<li><p>Hierarachical VAE에서의 decoding 과정과 비슷함</p></li>
+<li><p><span class="math notranslate nohighlight">\(\mu_\theta\)</span>과 분산 <span class="math notranslate nohighlight">\(\Sigma_\theta\)</span>는 학습 가능한 parameter</p></li>
+</ul>
+</section>
+<section id="loss-function-l">
+<h2>2-3. Loss Function <span class="math notranslate nohighlight">\(L\)</span><a class="headerlink" href="#loss-function-l" title="Permalink to this heading">#</a></h2>
+<p>Diffusion model의 목적은 **noise를 어떻게 제거할 것인가?**이다. <span class="math notranslate nohighlight">\(x_t\)</span>가 들어왔을 때 <span class="math notranslate nohighlight">\(x_{t-1}\)</span>을 예측할 수 있다면 <span class="math notranslate nohighlight">\(x_0\)</span> 또한 예측이 가능해짐.</p>
+<div class="math notranslate nohighlight">
+\[
+\mathbb{E}\left[-\log p_\theta\left(\mathbf{x}_0\right)\right] \leq \mathbb{E}_q\left[-\log \frac{p_\theta\left(\mathbf{x}_{0: T}\right)}{q\left(\mathbf{x}_{1: T} \mid \mathbf{x}_0\right)}\right]=\mathbb{E}_q\left[-\log p\left(\mathbf{x}_T\right)-\sum_{t \geq 1} \log \frac{p_\theta\left(\mathbf{x}_{t-1} \mid \mathbf{x}_t\right)}{q\left(\mathbf{x}_t \mid \mathbf{x}_{t-1}\right)}\right]=: L
+\]</div>
+<p>본 논문에서는 <strong>negative log likelihood를 최소화</strong>하는 방향으로 진행. 위 수식을 <strong>ELBO</strong>(Evidence of Lower BOund)로 우항과 같이 정리하고 이를 풀어내면</p>
+<blockquote>
+<div><p>ELBO의 역할은 우리가 관찰한 P(z|x)가 다루기 힘든 분포를 이루고 있을 때 이를 조금 더 다루기 쉬운 분포인 Q(x)로 대신 표현하려 하는 과정에서 <strong>두 분포 (P(z|x)와 Q(x))의 차이 (KL Divergence)를 최소화</strong> 하기 위해 사용된다.</p>
+</div></blockquote>
+<div class="math notranslate nohighlight">
+\[
+\mathbb{E}_q[\underbrace{D_{\mathrm{KL}}\left(q\left(\mathbf{x}_T \mid \mathbf{x}_0\right) \| p\left(\mathbf{x}_T\right)\right)}_{L_T}+\sum_{t&gt;1} \underbrace{D_{\mathrm{KL}}\left(q\left(\mathbf{x}_{t-1} \mid \mathbf{x}_t, \mathbf{x}_0\right) \| p_\theta\left(\mathbf{x}_{t-1} \mid \mathbf{x}_t\right)\right)}_{L_{t-1}} \underbrace{-\log p_\theta\left(\mathbf{x}_0 \mid \mathbf{x}_1\right)}_{L_0}]
+\]</div>
+<p>와 같은 결과가 나온다.</p>
+<ul class="simple">
+<li><p><span class="math notranslate nohighlight">\(L_T\)</span>: Regularization term으로 <span class="math notranslate nohighlight">\(\beta_t\)</span>를 학습시킴</p></li>
+<li><p><span class="math notranslate nohighlight">\(L_{t-1}\)</span>: Reconstruction term으로 매 단계에서 noise를 지우는 지움</p></li>
+<li><p><span class="math notranslate nohighlight">\(L_0\)</span>: Reconstruction term으로 최종 단계에서 image를 생성</p></li>
+</ul>
+</section>
+</section>
+<hr class="docutils" />
+<section class="tex2jax_ignore mathjax_ignore" id="diffusion-models-and-denoising-encoders">
+<h1>3. Diffusion models and denoising encoders<a class="headerlink" href="#diffusion-models-and-denoising-encoders" title="Permalink to this heading">#</a></h1>
+<p>DDPM에서는 <strong>inductive bias를 늘려</strong> 모델을 더 stable하고 성능도 개선할 수 있었음.</p>
+<blockquote>
+<div><p>Inductive bias: 학습 모델이 지금까지 만나보지 못했던 상황에서 정확한 예측을 하기 위해 사용하는 <strong>추가적인 가정</strong>, 즉 우리가 풀려는 문제에 대한 정보를 모델에 적용하는 것</p>
+</div></blockquote>
+<section id="forward-process-and-l-t">
+<h2>3-1. Forward process and <span class="math notranslate nohighlight">\(L_T\)</span><a class="headerlink" href="#forward-process-and-l-t" title="Permalink to this heading">#</a></h2>
+<p><strong><span class="math notranslate nohighlight">\(\beta_t\)</span>를 고정</strong>했더니 학습이 잘됨. 10^-4 ~ 0.02로 linear하게 image에 가까울수록 noise를 적게 주는 방식으로 설정.</p>
+<p>따라서 <span class="math notranslate nohighlight">\(q\)</span>에는 학습 가능한 parameter가 없어 <strong><span class="math notranslate nohighlight">\(L_T\)</span>는 0이 되기 때문에 삭제</strong>할 수 있었음.</p>
+</section>
+<section id="reverse-process-and-l-1-t-1">
+<h2>3-2. Reverse process and <span class="math notranslate nohighlight">\(L_{1:T-1}\)</span><a class="headerlink" href="#reverse-process-and-l-1-t-1" title="Permalink to this heading">#</a></h2>
+<div class="math notranslate nohighlight">
+\[
+L_{t-1}=D_{K L}\left(q\left(x_{t-1} \mid x_t, x_0\right) \| p_\theta\left(x_{t-1} \mid x_t\right)\right)
+\]</div>
+<ul class="simple">
+<li><p><span class="math notranslate nohighlight">\(
+q\left(x_{t-1} \mid x_t, x_0\right)=N\left(x_{t-1} ; \tilde{\mu}\left(x_t, x_0\right), \tilde{\beta}_t \mathrm{I}\right)
+\)</span></p></li>
+<li><p><span class="math notranslate nohighlight">\(
+p_\theta\left(x_{t-1} \mid x_t\right)=\mathcal{N}\left(x_{t-1} ; \mu_\theta\left(x_t, t\right), \sum_\theta\left(x_t, t\right)\right)
+\)</span></p></li>
+</ul>
+<p><span class="math notranslate nohighlight">\(L_{1:T-1}\)</span>는 forward progress posterior를 예측하는 loss. <span class="math notranslate nohighlight">\(\mathbf{x}_{t-1}\)</span>에서 noise를 더해 <span class="math notranslate nohighlight">\(\mathbf{x}_{t}\)</span>를 만들었을때, 그 과정을 복원 <span class="math notranslate nohighlight">\(p(\mathbf{x}_{t-1}|\mathbf{x}_t)\)</span> 하는 과정을 학습.</p>
+<figure class="align-default" id="id5">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img82.png"><img alt="DDPM_08" class="bg-primary mb-1" src="../../_images/img82.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 16 </span><span class="caption-text">Loss Simplication \ (source: <a class="reference external" href="https://velog.io/&#64;sjina0722/%EB%85%BC%EB%AC%B8-%EB%A6%AC%EB%B7%B0-Denoising-Diffusion-Probabilistic-Models">https://velog.io/&#64;sjina0722/논문-리뷰-Denoising-Diffusion-Probabilistic-Models</a>)</span><a class="headerlink" href="#id5" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p><span class="math notranslate nohighlight">\(\Sigma_\theta\)</span>: <span class="math notranslate nohighlight">\(\beta\)</span>를 상수로 가정했고 <span class="math notranslate nohighlight">\(p(\mathbf{x}_{t-1}|\mathbf{x}_t)\)</span>의 variance가 <span class="math notranslate nohighlight">\(\beta\)</span>에 영향을 받기 때문에 학습시키지 않아도 된다고 생각해 <strong>variance term을 제거</strong>함.</p></li>
+</ul>
+<figure class="align-default" id="id6">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img91.png"><img alt="DDPM_09" class="bg-primary mb-1" src="../../_images/img91.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 17 </span><span class="caption-text">Residual Estimation \ (source: <a class="reference external" href="https://velog.io/&#64;sjina0722/%EB%85%BC%EB%AC%B8-%EB%A6%AC%EB%B7%B0-Denoising-Diffusion-Probabilistic-Models">https://velog.io/&#64;sjina0722/논문-리뷰-Denoising-Diffusion-Probabilistic-Models</a>)</span><a class="headerlink" href="#id6" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p><span class="math notranslate nohighlight">\(\mu_\theta\)</span>: DDPM에서는 <span class="math notranslate nohighlight">\(\mu_\theta\)</span>를 바로 구하지 않고 <strong>residual <span class="math notranslate nohighlight">\(\epsilon_\theta\)</span>만 구해 정확도를 높임</strong>.</p></li>
+</ul>
+</section>
+<section id="data-scaling-reverse-process-decoder-and-l-0">
+<h2>3-3. Data scaling, reverse process decoder and <span class="math notranslate nohighlight">\(L_0\)</span><a class="headerlink" href="#data-scaling-reverse-process-decoder-and-l-0" title="Permalink to this heading">#</a></h2>
+<div class="math notranslate nohighlight">
+\[\begin{split}
+\begin{aligned}
+p_\theta\left(\mathbf{x}_0 \mid \mathbf{x}_1\right) &amp; =\prod_{i=1}^D \int_{\delta_{-}\left(x_0^i\right)}^{\delta_{+}\left(x_0^i\right)} \mathcal{N}\left(x ; \mu_\theta^i\left(\mathbf{x}_1, 1\right), \sigma_1^2\right) d x \\
+\delta_{+}(x) &amp; =\left\{\begin{array}{ll}
+\infty &amp; \text { if } x=1 \\
+x+\frac{1}{255} &amp; \text { if } x&lt;1
+\end{array} \quad \delta_{-}(x)= \begin{cases}-\infty &amp; \text { if } x=-1 \\
+x-\frac{1}{255} &amp; \text { if } x&gt;-1\end{cases} \right.
+\end{aligned}
+\end{split}\]</div>
+<p>[0, 255]의 image를 [-1,1] 사이로 linearly mapping. Sampling 마지막 단계에는 noise를 추가하지 않음.</p>
+<p><span class="math notranslate nohighlight">\(L_0\)</span>은 두 normal distribution 사이의 KL divergence를 나타냄.</p>
+<ul class="simple">
+<li><p><span class="math notranslate nohighlight">\(D\)</span>: Data dimensionality</p></li>
+<li><p><span class="math notranslate nohighlight">\(i\)</span>: 좌표</p></li>
+</ul>
+</section>
+<section id="simplified-training-objective">
+<h2>3-4. Simplified training objective<a class="headerlink" href="#simplified-training-objective" title="Permalink to this heading">#</a></h2>
+<figure class="align-default" id="id7">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img101.png"><img alt="DDPM_10" class="bg-primary mb-1" src="../../_images/img101.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 18 </span><span class="caption-text">Simplified training objective \ (source: <a class="reference external" href="https://velog.io/&#64;sjina0722/%EB%85%BC%EB%AC%B8-%EB%A6%AC%EB%B7%B0-Denoising-Diffusion-Probabilistic-Models">https://velog.io/&#64;sjina0722/논문-리뷰-Denoising-Diffusion-Probabilistic-Models</a>)</span><a class="headerlink" href="#id7" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<figure class="align-default" id="id8">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img111.png"><img alt="DDPM_11" class="bg-primary mb-1" src="../../_images/img111.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 19 </span><span class="caption-text">Final Loss \ (source: <a class="reference external" href="https://velog.io/&#64;sjina0722/%EB%85%BC%EB%AC%B8-%EB%A6%AC%EB%B7%B0-Denoising-Diffusion-Probabilistic-Models">https://velog.io/&#64;sjina0722/논문-리뷰-Denoising-Diffusion-Probabilistic-Models</a>)</span><a class="headerlink" href="#id8" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>최종 loss는 위와 같이 나타난다. Ground truth - estimated output간 MSE loss를 줄이는 과정이 denoising과 비슷해 DDPM이라는 이름이 붙음.</p>
+<p>Simplified objective을 통해 diffusion process를 학습하면 매우 작은 t 에서뿐만 아니라 <strong>큰 t에 대해서도 network 학습이 가능하기 때문에 매우 효과적</strong>.</p>
+<figure class="align-default" id="id9">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img121.png"><img alt="DDPM_12" class="bg-primary mb-1" src="../../_images/img121.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 20 </span><span class="caption-text">Psuedo code of training process \ (source: <a class="reference external" href="https://arxiv.org/abs/2006.11239">https://arxiv.org/abs/2006.11239</a>)</span><a class="headerlink" href="#id9" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p>Algorithm 1: Training</p>
+<ul>
+<li><p>Noise를 더해나가는 과정, network(<span class="math notranslate nohighlight">\(\epsilon_\theta\)</span>, <span class="math notranslate nohighlight">\(p_\theta\)</span>)가 t step에서 noise(<span class="math notranslate nohighlight">\(\epsilon\)</span>)가 얼마만큼 더해졌는지를 학습한다.</p></li>
+<li><p>학습 시에는 특정 step의 이미지가 얼마나 gaussian noise가 추가되었는지를 예측하도록 학습된다.</p></li>
+<li><p>코드에서는 랜덤 노이즈와 시간 단계 t로 노이즈가 추가된 이미지를 얻고 해당 이미지를 보고 모델이 노이즈를 예측</p></li>
+</ul>
+</li>
+</ul>
+<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="k">def</span><span class="w"> </span><span class="nf">p_losses</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">x_start</span><span class="p">,</span> <span class="n">t</span><span class="p">,</span> <span class="n">noise</span> <span class="o">=</span> <span class="kc">None</span><span class="p">):</span>
+        <span class="n">b</span><span class="p">,</span> <span class="n">c</span><span class="p">,</span> <span class="n">h</span><span class="p">,</span> <span class="n">w</span> <span class="o">=</span> <span class="n">x_start</span><span class="o">.</span><span class="n">shape</span>
+        <span class="n">noise</span> <span class="o">=</span> <span class="n">default</span><span class="p">(</span><span class="n">noise</span><span class="p">,</span> <span class="k">lambda</span><span class="p">:</span> <span class="n">torch</span><span class="o">.</span><span class="n">randn_like</span><span class="p">(</span><span class="n">x_start</span><span class="p">))</span>
+
+        <span class="c1"># noise sample</span>
+
+        <span class="n">x</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">q_sample</span><span class="p">(</span><span class="n">x_start</span> <span class="o">=</span> <span class="n">x_start</span><span class="p">,</span> <span class="n">t</span> <span class="o">=</span> <span class="n">t</span><span class="p">,</span> <span class="n">noise</span> <span class="o">=</span> <span class="n">noise</span><span class="p">)</span>
+
+        <span class="c1"># if doing self-conditioning, 50% of the time, predict x_start from current set of times</span>
+        <span class="c1"># and condition with unet with that</span>
+        <span class="c1"># this technique will slow down training by 25%, but seems to lower FID significantly</span>
+
+        <span class="n">x_self_cond</span> <span class="o">=</span> <span class="kc">None</span>
+        <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">self_condition</span> <span class="ow">and</span> <span class="n">random</span><span class="p">()</span> <span class="o">&lt;</span> <span class="mf">0.5</span><span class="p">:</span>
+            <span class="k">with</span> <span class="n">torch</span><span class="o">.</span><span class="n">no_grad</span><span class="p">():</span>
+                <span class="n">x_self_cond</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">model_predictions</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="n">t</span><span class="p">)</span><span class="o">.</span><span class="n">pred_x_start</span>
+                <span class="n">x_self_cond</span><span class="o">.</span><span class="n">detach_</span><span class="p">()</span>
+
+        <span class="c1"># predict and take gradient step</span>
+
+        <span class="n">model_out</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">model</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="n">t</span><span class="p">,</span> <span class="n">x_self_cond</span><span class="p">)</span>
+
+        <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">objective</span> <span class="o">==</span> <span class="s1">&#39;pred_noise&#39;</span><span class="p">:</span>
+            <span class="n">target</span> <span class="o">=</span> <span class="n">noise</span>
+        <span class="k">elif</span> <span class="bp">self</span><span class="o">.</span><span class="n">objective</span> <span class="o">==</span> <span class="s1">&#39;pred_x0&#39;</span><span class="p">:</span>
+            <span class="n">target</span> <span class="o">=</span> <span class="n">x_start</span>
+        <span class="k">elif</span> <span class="bp">self</span><span class="o">.</span><span class="n">objective</span> <span class="o">==</span> <span class="s1">&#39;pred_v&#39;</span><span class="p">:</span>
+            <span class="n">v</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">predict_v</span><span class="p">(</span><span class="n">x_start</span><span class="p">,</span> <span class="n">t</span><span class="p">,</span> <span class="n">noise</span><span class="p">)</span>
+            <span class="n">target</span> <span class="o">=</span> <span class="n">v</span>
+        <span class="k">else</span><span class="p">:</span>
+            <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s1">&#39;unknown objective </span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">objective</span><span class="si">}</span><span class="s1">&#39;</span><span class="p">)</span>
+
+        <span class="n">loss</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">loss_fn</span><span class="p">(</span><span class="n">model_out</span><span class="p">,</span> <span class="n">target</span><span class="p">,</span> <span class="n">reduction</span> <span class="o">=</span> <span class="s1">&#39;none&#39;</span><span class="p">)</span>
+        <span class="n">loss</span> <span class="o">=</span> <span class="n">reduce</span><span class="p">(</span><span class="n">loss</span><span class="p">,</span> <span class="s1">&#39;b ... -&gt; b (...)&#39;</span><span class="p">,</span> <span class="s1">&#39;mean&#39;</span><span class="p">)</span>
+
+        <span class="n">loss</span> <span class="o">=</span> <span class="n">loss</span> <span class="o">*</span> <span class="n">extract</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">loss_weight</span><span class="p">,</span> <span class="n">t</span><span class="p">,</span> <span class="n">loss</span><span class="o">.</span><span class="n">shape</span><span class="p">)</span>
+        <span class="k">return</span> <span class="n">loss</span><span class="o">.</span><span class="n">mean</span><span class="p">()</span>
+</pre></div>
+</div>
+<ul class="simple">
+<li><p>Algorithm 2: Sampling</p>
+<ul>
+<li><p>Network를 학습하고 나면, gaussian noise에서 시작해서 순차적으로 denoising 하는 것이 가능하다. (by parameterized markovian chain)</p></li>
+<li><p>코드에서는 noise 제거 후 소량의 noise를 다시 추가하고 있음</p></li>
+</ul>
+</li>
+</ul>
+<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="nd">@torch</span><span class="o">.</span><span class="n">no_grad</span><span class="p">()</span>
+<span class="k">def</span><span class="w"> </span><span class="nf">p_sample</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">x</span><span class="p">,</span> <span class="n">t</span><span class="p">:</span> <span class="nb">int</span><span class="p">,</span> <span class="n">x_self_cond</span> <span class="o">=</span> <span class="kc">None</span><span class="p">):</span>
+    <span class="n">b</span><span class="p">,</span> <span class="o">*</span><span class="n">_</span><span class="p">,</span> <span class="n">device</span> <span class="o">=</span> <span class="o">*</span><span class="n">x</span><span class="o">.</span><span class="n">shape</span><span class="p">,</span> <span class="n">x</span><span class="o">.</span><span class="n">device</span>
+    <span class="n">batched_times</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">full</span><span class="p">((</span><span class="n">b</span><span class="p">,),</span> <span class="n">t</span><span class="p">,</span> <span class="n">device</span> <span class="o">=</span> <span class="n">x</span><span class="o">.</span><span class="n">device</span><span class="p">,</span> <span class="n">dtype</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">long</span><span class="p">)</span>
+    <span class="n">model_mean</span><span class="p">,</span> <span class="n">_</span><span class="p">,</span> <span class="n">model_log_variance</span><span class="p">,</span> <span class="n">x_start</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">p_mean_variance</span><span class="p">(</span><span class="n">x</span> <span class="o">=</span> <span class="n">x</span><span class="p">,</span> <span class="n">t</span> <span class="o">=</span> <span class="n">batched_times</span><span class="p">,</span> <span class="n">x_self_cond</span> <span class="o">=</span> <span class="n">x_self_cond</span><span class="p">,</span> <span class="n">clip_denoised</span> <span class="o">=</span> <span class="kc">True</span><span class="p">)</span>
+    <span class="n">noise</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">randn_like</span><span class="p">(</span><span class="n">x</span><span class="p">)</span> <span class="k">if</span> <span class="n">t</span> <span class="o">&gt;</span> <span class="mi">0</span> <span class="k">else</span> <span class="mf">0.</span> <span class="c1"># no noise if t == 0</span>
+    <span class="n">pred_img</span> <span class="o">=</span> <span class="n">model_mean</span> <span class="o">+</span> <span class="p">(</span><span class="mf">0.5</span> <span class="o">*</span> <span class="n">model_log_variance</span><span class="p">)</span><span class="o">.</span><span class="n">exp</span><span class="p">()</span> <span class="o">*</span> <span class="n">noise</span>
+    <span class="k">return</span> <span class="n">pred_img</span><span class="p">,</span> <span class="n">x_start</span>
+</pre></div>
+</div>
+</section>
+</section>
+<section class="tex2jax_ignore mathjax_ignore" id="experiments">
+<h1>4. Experiments<a class="headerlink" href="#experiments" title="Permalink to this heading">#</a></h1>
+<ul class="simple">
+<li><p>T: 1000</p></li>
+<li><p>backbone: U-Net<br />
+각 down/upsampling 단계는 ResNet/ConvNext 블록 2개 + (groupnorm + attention + residual) + down/upsampling으로 구성됨</p></li>
+</ul>
+<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="n">block_klass</span> <span class="o">=</span> <span class="n">partial</span><span class="p">(</span><span class="n">ResnetBlock</span><span class="p">,</span> <span class="n">groups</span> <span class="o">=</span> <span class="n">resnet_block_groups</span><span class="p">)</span>
+
+<span class="bp">self</span><span class="o">.</span><span class="n">downs</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">nn</span><span class="o">.</span><span class="n">ModuleList</span><span class="p">([</span>
+                <span class="n">block_klass</span><span class="p">(</span><span class="n">dim_in</span><span class="p">,</span> <span class="n">dim_in</span><span class="p">,</span> <span class="n">time_emb_dim</span> <span class="o">=</span> <span class="n">time_dim</span><span class="p">),</span>
+                <span class="n">block_klass</span><span class="p">(</span><span class="n">dim_in</span><span class="p">,</span> <span class="n">dim_in</span><span class="p">,</span> <span class="n">time_emb_dim</span> <span class="o">=</span> <span class="n">time_dim</span><span class="p">),</span>
+                <span class="n">Residual</span><span class="p">(</span><span class="n">PreNorm</span><span class="p">(</span><span class="n">dim_in</span><span class="p">,</span> <span class="n">LinearAttention</span><span class="p">(</span><span class="n">dim_in</span><span class="p">))),</span>
+                <span class="n">Downsample</span><span class="p">(</span><span class="n">dim_in</span><span class="p">,</span> <span class="n">dim_out</span><span class="p">)</span> <span class="k">if</span> <span class="ow">not</span> <span class="n">is_last</span> <span class="k">else</span> <span class="n">nn</span><span class="o">.</span><span class="n">Conv2d</span><span class="p">(</span><span class="n">dim_in</span><span class="p">,</span> <span class="n">dim_out</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="n">padding</span> <span class="o">=</span> <span class="mi">1</span><span class="p">)</span>
+            <span class="p">]))</span>
+            
+ <span class="bp">self</span><span class="o">.</span><span class="n">ups</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">nn</span><span class="o">.</span><span class="n">ModuleList</span><span class="p">([</span>
+                <span class="n">block_klass</span><span class="p">(</span><span class="n">dim_out</span> <span class="o">+</span> <span class="n">dim_in</span><span class="p">,</span> <span class="n">dim_out</span><span class="p">,</span> <span class="n">time_emb_dim</span> <span class="o">=</span> <span class="n">time_dim</span><span class="p">),</span>
+                <span class="n">block_klass</span><span class="p">(</span><span class="n">dim_out</span> <span class="o">+</span> <span class="n">dim_in</span><span class="p">,</span> <span class="n">dim_out</span><span class="p">,</span> <span class="n">time_emb_dim</span> <span class="o">=</span> <span class="n">time_dim</span><span class="p">),</span>
+                <span class="n">Residual</span><span class="p">(</span><span class="n">PreNorm</span><span class="p">(</span><span class="n">dim_out</span><span class="p">,</span> <span class="n">LinearAttention</span><span class="p">(</span><span class="n">dim_out</span><span class="p">))),</span>
+                <span class="n">Upsample</span><span class="p">(</span><span class="n">dim_out</span><span class="p">,</span> <span class="n">dim_in</span><span class="p">)</span> <span class="k">if</span> <span class="ow">not</span> <span class="n">is_last</span> <span class="k">else</span>  <span class="n">nn</span><span class="o">.</span><span class="n">Conv2d</span><span class="p">(</span><span class="n">dim_out</span><span class="p">,</span> <span class="n">dim_in</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="n">padding</span> <span class="o">=</span> <span class="mi">1</span><span class="p">)</span>
+            <span class="p">]))</span>
+
+</pre></div>
+</div>
+<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="k">class</span><span class="w"> </span><span class="nc">Unet</span><span class="p">(</span><span class="n">nn</span><span class="o">.</span><span class="n">Module</span><span class="p">):</span>
+    <span class="k">def</span><span class="w"> </span><span class="fm">__init__</span><span class="p">(</span>
+        <span class="bp">self</span><span class="p">,</span>
+        <span class="n">dim</span><span class="p">,</span>
+        <span class="n">init_dim</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
+        <span class="n">out_dim</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
+        <span class="n">dim_mults</span><span class="o">=</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">4</span><span class="p">,</span> <span class="mi">8</span><span class="p">),</span>
+        <span class="n">channels</span> <span class="o">=</span> <span class="mi">3</span><span class="p">,</span>
+        <span class="n">self_condition</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span>
+        <span class="n">resnet_block_groups</span> <span class="o">=</span> <span class="mi">8</span><span class="p">,</span>
+        <span class="n">learned_variance</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span>
+        <span class="n">learned_sinusoidal_cond</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span>
+        <span class="n">random_fourier_features</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span>
+        <span class="n">learned_sinusoidal_dim</span> <span class="o">=</span> <span class="mi">16</span>
+    <span class="p">):</span>
+        <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span>
+
+        <span class="c1"># determine dimensions</span>
+
+        <span class="bp">self</span><span class="o">.</span><span class="n">channels</span> <span class="o">=</span> <span class="n">channels</span>
+        <span class="bp">self</span><span class="o">.</span><span class="n">self_condition</span> <span class="o">=</span> <span class="n">self_condition</span>
+        <span class="n">input_channels</span> <span class="o">=</span> <span class="n">channels</span> <span class="o">*</span> <span class="p">(</span><span class="mi">2</span> <span class="k">if</span> <span class="n">self_condition</span> <span class="k">else</span> <span class="mi">1</span><span class="p">)</span>
+
+        <span class="n">init_dim</span> <span class="o">=</span> <span class="n">default</span><span class="p">(</span><span class="n">init_dim</span><span class="p">,</span> <span class="n">dim</span><span class="p">)</span>
+        <span class="bp">self</span><span class="o">.</span><span class="n">init_conv</span> <span class="o">=</span> <span class="n">nn</span><span class="o">.</span><span class="n">Conv2d</span><span class="p">(</span><span class="n">input_channels</span><span class="p">,</span> <span class="n">init_dim</span><span class="p">,</span> <span class="mi">7</span><span class="p">,</span> <span class="n">padding</span> <span class="o">=</span> <span class="mi">3</span><span class="p">)</span>
+
+        <span class="n">dims</span> <span class="o">=</span> <span class="p">[</span><span class="n">init_dim</span><span class="p">,</span> <span class="o">*</span><span class="nb">map</span><span class="p">(</span><span class="k">lambda</span> <span class="n">m</span><span class="p">:</span> <span class="n">dim</span> <span class="o">*</span> <span class="n">m</span><span class="p">,</span> <span class="n">dim_mults</span><span class="p">)]</span>
+        <span class="n">in_out</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span><span class="nb">zip</span><span class="p">(</span><span class="n">dims</span><span class="p">[:</span><span class="o">-</span><span class="mi">1</span><span class="p">],</span> <span class="n">dims</span><span class="p">[</span><span class="mi">1</span><span class="p">:]))</span>
+
+        <span class="n">block_klass</span> <span class="o">=</span> <span class="n">partial</span><span class="p">(</span><span class="n">ResnetBlock</span><span class="p">,</span> <span class="n">groups</span> <span class="o">=</span> <span class="n">resnet_block_groups</span><span class="p">)</span>
+
+        <span class="c1"># time embeddings</span>
+
+        <span class="n">time_dim</span> <span class="o">=</span> <span class="n">dim</span> <span class="o">*</span> <span class="mi">4</span>
+
+        <span class="bp">self</span><span class="o">.</span><span class="n">random_or_learned_sinusoidal_cond</span> <span class="o">=</span> <span class="n">learned_sinusoidal_cond</span> <span class="ow">or</span> <span class="n">random_fourier_features</span>
+
+        <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">random_or_learned_sinusoidal_cond</span><span class="p">:</span>
+            <span class="n">sinu_pos_emb</span> <span class="o">=</span> <span class="n">RandomOrLearnedSinusoidalPosEmb</span><span class="p">(</span><span class="n">learned_sinusoidal_dim</span><span class="p">,</span> <span class="n">random_fourier_features</span><span class="p">)</span>
+            <span class="n">fourier_dim</span> <span class="o">=</span> <span class="n">learned_sinusoidal_dim</span> <span class="o">+</span> <span class="mi">1</span>
+        <span class="k">else</span><span class="p">:</span>
+            <span class="n">sinu_pos_emb</span> <span class="o">=</span> <span class="n">SinusoidalPosEmb</span><span class="p">(</span><span class="n">dim</span><span class="p">)</span>
+            <span class="n">fourier_dim</span> <span class="o">=</span> <span class="n">dim</span>
+
+        <span class="bp">self</span><span class="o">.</span><span class="n">time_mlp</span> <span class="o">=</span> <span class="n">nn</span><span class="o">.</span><span class="n">Sequential</span><span class="p">(</span>
+            <span class="n">sinu_pos_emb</span><span class="p">,</span>
+            <span class="n">nn</span><span class="o">.</span><span class="n">Linear</span><span class="p">(</span><span class="n">fourier_dim</span><span class="p">,</span> <span class="n">time_dim</span><span class="p">),</span>
+            <span class="n">nn</span><span class="o">.</span><span class="n">GELU</span><span class="p">(),</span>
+            <span class="n">nn</span><span class="o">.</span><span class="n">Linear</span><span class="p">(</span><span class="n">time_dim</span><span class="p">,</span> <span class="n">time_dim</span><span class="p">)</span>
+        <span class="p">)</span>
+
+        <span class="c1"># layers</span>
+
+        <span class="bp">self</span><span class="o">.</span><span class="n">downs</span> <span class="o">=</span> <span class="n">nn</span><span class="o">.</span><span class="n">ModuleList</span><span class="p">([])</span>
+        <span class="bp">self</span><span class="o">.</span><span class="n">ups</span> <span class="o">=</span> <span class="n">nn</span><span class="o">.</span><span class="n">ModuleList</span><span class="p">([])</span>
+        <span class="n">num_resolutions</span> <span class="o">=</span> <span class="nb">len</span><span class="p">(</span><span class="n">in_out</span><span class="p">)</span>
+
+        <span class="k">for</span> <span class="n">ind</span><span class="p">,</span> <span class="p">(</span><span class="n">dim_in</span><span class="p">,</span> <span class="n">dim_out</span><span class="p">)</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">in_out</span><span class="p">):</span>
+            <span class="n">is_last</span> <span class="o">=</span> <span class="n">ind</span> <span class="o">&gt;=</span> <span class="p">(</span><span class="n">num_resolutions</span> <span class="o">-</span> <span class="mi">1</span><span class="p">)</span>
+
+            <span class="bp">self</span><span class="o">.</span><span class="n">downs</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">nn</span><span class="o">.</span><span class="n">ModuleList</span><span class="p">([</span>
+                <span class="n">block_klass</span><span class="p">(</span><span class="n">dim_in</span><span class="p">,</span> <span class="n">dim_in</span><span class="p">,</span> <span class="n">time_emb_dim</span> <span class="o">=</span> <span class="n">time_dim</span><span class="p">),</span>
+                <span class="n">block_klass</span><span class="p">(</span><span class="n">dim_in</span><span class="p">,</span> <span class="n">dim_in</span><span class="p">,</span> <span class="n">time_emb_dim</span> <span class="o">=</span> <span class="n">time_dim</span><span class="p">),</span>
+                <span class="n">Residual</span><span class="p">(</span><span class="n">PreNorm</span><span class="p">(</span><span class="n">dim_in</span><span class="p">,</span> <span class="n">LinearAttention</span><span class="p">(</span><span class="n">dim_in</span><span class="p">))),</span>
+                <span class="n">Downsample</span><span class="p">(</span><span class="n">dim_in</span><span class="p">,</span> <span class="n">dim_out</span><span class="p">)</span> <span class="k">if</span> <span class="ow">not</span> <span class="n">is_last</span> <span class="k">else</span> <span class="n">nn</span><span class="o">.</span><span class="n">Conv2d</span><span class="p">(</span><span class="n">dim_in</span><span class="p">,</span> <span class="n">dim_out</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="n">padding</span> <span class="o">=</span> <span class="mi">1</span><span class="p">)</span>
+            <span class="p">]))</span>
+
+        <span class="n">mid_dim</span> <span class="o">=</span> <span class="n">dims</span><span class="p">[</span><span class="o">-</span><span class="mi">1</span><span class="p">]</span>
+        <span class="bp">self</span><span class="o">.</span><span class="n">mid_block1</span> <span class="o">=</span> <span class="n">block_klass</span><span class="p">(</span><span class="n">mid_dim</span><span class="p">,</span> <span class="n">mid_dim</span><span class="p">,</span> <span class="n">time_emb_dim</span> <span class="o">=</span> <span class="n">time_dim</span><span class="p">)</span>
+        <span class="bp">self</span><span class="o">.</span><span class="n">mid_attn</span> <span class="o">=</span> <span class="n">Residual</span><span class="p">(</span><span class="n">PreNorm</span><span class="p">(</span><span class="n">mid_dim</span><span class="p">,</span> <span class="n">Attention</span><span class="p">(</span><span class="n">mid_dim</span><span class="p">)))</span>
+        <span class="bp">self</span><span class="o">.</span><span class="n">mid_block2</span> <span class="o">=</span> <span class="n">block_klass</span><span class="p">(</span><span class="n">mid_dim</span><span class="p">,</span> <span class="n">mid_dim</span><span class="p">,</span> <span class="n">time_emb_dim</span> <span class="o">=</span> <span class="n">time_dim</span><span class="p">)</span>
+
+        <span class="k">for</span> <span class="n">ind</span><span class="p">,</span> <span class="p">(</span><span class="n">dim_in</span><span class="p">,</span> <span class="n">dim_out</span><span class="p">)</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="nb">reversed</span><span class="p">(</span><span class="n">in_out</span><span class="p">)):</span>
+            <span class="n">is_last</span> <span class="o">=</span> <span class="n">ind</span> <span class="o">==</span> <span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">in_out</span><span class="p">)</span> <span class="o">-</span> <span class="mi">1</span><span class="p">)</span>
+
+            <span class="bp">self</span><span class="o">.</span><span class="n">ups</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">nn</span><span class="o">.</span><span class="n">ModuleList</span><span class="p">([</span>
+                <span class="n">block_klass</span><span class="p">(</span><span class="n">dim_out</span> <span class="o">+</span> <span class="n">dim_in</span><span class="p">,</span> <span class="n">dim_out</span><span class="p">,</span> <span class="n">time_emb_dim</span> <span class="o">=</span> <span class="n">time_dim</span><span class="p">),</span>
+                <span class="n">block_klass</span><span class="p">(</span><span class="n">dim_out</span> <span class="o">+</span> <span class="n">dim_in</span><span class="p">,</span> <span class="n">dim_out</span><span class="p">,</span> <span class="n">time_emb_dim</span> <span class="o">=</span> <span class="n">time_dim</span><span class="p">),</span>
+                <span class="n">Residual</span><span class="p">(</span><span class="n">PreNorm</span><span class="p">(</span><span class="n">dim_out</span><span class="p">,</span> <span class="n">LinearAttention</span><span class="p">(</span><span class="n">dim_out</span><span class="p">))),</span>
+                <span class="n">Upsample</span><span class="p">(</span><span class="n">dim_out</span><span class="p">,</span> <span class="n">dim_in</span><span class="p">)</span> <span class="k">if</span> <span class="ow">not</span> <span class="n">is_last</span> <span class="k">else</span>  <span class="n">nn</span><span class="o">.</span><span class="n">Conv2d</span><span class="p">(</span><span class="n">dim_out</span><span class="p">,</span> <span class="n">dim_in</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="n">padding</span> <span class="o">=</span> <span class="mi">1</span><span class="p">)</span>
+            <span class="p">]))</span>
+
+        <span class="n">default_out_dim</span> <span class="o">=</span> <span class="n">channels</span> <span class="o">*</span> <span class="p">(</span><span class="mi">1</span> <span class="k">if</span> <span class="ow">not</span> <span class="n">learned_variance</span> <span class="k">else</span> <span class="mi">2</span><span class="p">)</span>
+        <span class="bp">self</span><span class="o">.</span><span class="n">out_dim</span> <span class="o">=</span> <span class="n">default</span><span class="p">(</span><span class="n">out_dim</span><span class="p">,</span> <span class="n">default_out_dim</span><span class="p">)</span>
+
+        <span class="bp">self</span><span class="o">.</span><span class="n">final_res_block</span> <span class="o">=</span> <span class="n">block_klass</span><span class="p">(</span><span class="n">dim</span> <span class="o">*</span> <span class="mi">2</span><span class="p">,</span> <span class="n">dim</span><span class="p">,</span> <span class="n">time_emb_dim</span> <span class="o">=</span> <span class="n">time_dim</span><span class="p">)</span>
+        <span class="bp">self</span><span class="o">.</span><span class="n">final_conv</span> <span class="o">=</span> <span class="n">nn</span><span class="o">.</span><span class="n">Conv2d</span><span class="p">(</span><span class="n">dim</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">out_dim</span><span class="p">,</span> <span class="mi">1</span><span class="p">)</span>
+        
+  <span class="k">def</span><span class="w"> </span><span class="nf">forward</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">x</span><span class="p">,</span> <span class="n">time</span><span class="p">,</span> <span class="n">x_self_cond</span> <span class="o">=</span> <span class="kc">None</span><span class="p">):</span>
+          <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">self_condition</span><span class="p">:</span>
+              <span class="n">x_self_cond</span> <span class="o">=</span> <span class="n">default</span><span class="p">(</span><span class="n">x_self_cond</span><span class="p">,</span> <span class="k">lambda</span><span class="p">:</span> <span class="n">torch</span><span class="o">.</span><span class="n">zeros_like</span><span class="p">(</span><span class="n">x</span><span class="p">))</span>
+              <span class="n">x</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">cat</span><span class="p">((</span><span class="n">x_self_cond</span><span class="p">,</span> <span class="n">x</span><span class="p">),</span> <span class="n">dim</span> <span class="o">=</span> <span class="mi">1</span><span class="p">)</span>
+
+          <span class="n">x</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">init_conv</span><span class="p">(</span><span class="n">x</span><span class="p">)</span>
+          <span class="n">r</span> <span class="o">=</span> <span class="n">x</span><span class="o">.</span><span class="n">clone</span><span class="p">()</span>
+
+          <span class="n">t</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">time_mlp</span><span class="p">(</span><span class="n">time</span><span class="p">)</span>
+
+          <span class="n">h</span> <span class="o">=</span> <span class="p">[]</span>
+
+          <span class="k">for</span> <span class="n">block1</span><span class="p">,</span> <span class="n">block2</span><span class="p">,</span> <span class="n">attn</span><span class="p">,</span> <span class="n">downsample</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">downs</span><span class="p">:</span>
+              <span class="n">x</span> <span class="o">=</span> <span class="n">block1</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="n">t</span><span class="p">)</span>
+              <span class="n">h</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">x</span><span class="p">)</span>
+
+              <span class="n">x</span> <span class="o">=</span> <span class="n">block2</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="n">t</span><span class="p">)</span>
+              <span class="n">x</span> <span class="o">=</span> <span class="n">attn</span><span class="p">(</span><span class="n">x</span><span class="p">)</span>
+              <span class="n">h</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">x</span><span class="p">)</span>
+
+              <span class="n">x</span> <span class="o">=</span> <span class="n">downsample</span><span class="p">(</span><span class="n">x</span><span class="p">)</span>
+
+          <span class="n">x</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">mid_block1</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="n">t</span><span class="p">)</span>
+          <span class="n">x</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">mid_attn</span><span class="p">(</span><span class="n">x</span><span class="p">)</span>
+          <span class="n">x</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">mid_block2</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="n">t</span><span class="p">)</span>
+
+          <span class="k">for</span> <span class="n">block1</span><span class="p">,</span> <span class="n">block2</span><span class="p">,</span> <span class="n">attn</span><span class="p">,</span> <span class="n">upsample</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">ups</span><span class="p">:</span>
+              <span class="n">x</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">cat</span><span class="p">((</span><span class="n">x</span><span class="p">,</span> <span class="n">h</span><span class="o">.</span><span class="n">pop</span><span class="p">()),</span> <span class="n">dim</span> <span class="o">=</span> <span class="mi">1</span><span class="p">)</span>
+              <span class="n">x</span> <span class="o">=</span> <span class="n">block1</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="n">t</span><span class="p">)</span>
+
+              <span class="n">x</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">cat</span><span class="p">((</span><span class="n">x</span><span class="p">,</span> <span class="n">h</span><span class="o">.</span><span class="n">pop</span><span class="p">()),</span> <span class="n">dim</span> <span class="o">=</span> <span class="mi">1</span><span class="p">)</span>
+              <span class="n">x</span> <span class="o">=</span> <span class="n">block2</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="n">t</span><span class="p">)</span>
+              <span class="n">x</span> <span class="o">=</span> <span class="n">attn</span><span class="p">(</span><span class="n">x</span><span class="p">)</span>
+
+              <span class="n">x</span> <span class="o">=</span> <span class="n">upsample</span><span class="p">(</span><span class="n">x</span><span class="p">)</span>
+
+          <span class="n">x</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">cat</span><span class="p">((</span><span class="n">x</span><span class="p">,</span> <span class="n">r</span><span class="p">),</span> <span class="n">dim</span> <span class="o">=</span> <span class="mi">1</span><span class="p">)</span>
+
+          <span class="n">x</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">final_res_block</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="n">t</span><span class="p">)</span>
+          <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">final_conv</span><span class="p">(</span><span class="n">x</span><span class="p">)</span>
+</pre></div>
+</div>
+<ul class="simple">
+<li><p>16 x 16 feature map resolution에 self-attention. conv에서 차원을 3배로 늘리고 q,k,v로 분해.</p></li>
+</ul>
+<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="k">class</span><span class="w"> </span><span class="nc">Attention</span><span class="p">(</span><span class="n">nn</span><span class="o">.</span><span class="n">Module</span><span class="p">):</span>
+    <span class="k">def</span><span class="w"> </span><span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">dim</span><span class="p">,</span> <span class="n">heads</span> <span class="o">=</span> <span class="mi">4</span><span class="p">,</span> <span class="n">dim_head</span> <span class="o">=</span> <span class="mi">32</span><span class="p">):</span>
+        <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span>
+        <span class="bp">self</span><span class="o">.</span><span class="n">scale</span> <span class="o">=</span> <span class="n">dim_head</span> <span class="o">**</span> <span class="o">-</span><span class="mf">0.5</span>
+        <span class="bp">self</span><span class="o">.</span><span class="n">heads</span> <span class="o">=</span> <span class="n">heads</span>
+        <span class="n">hidden_dim</span> <span class="o">=</span> <span class="n">dim_head</span> <span class="o">*</span> <span class="n">heads</span>
+
+        <span class="bp">self</span><span class="o">.</span><span class="n">to_qkv</span> <span class="o">=</span> <span class="n">nn</span><span class="o">.</span><span class="n">Conv2d</span><span class="p">(</span><span class="n">dim</span><span class="p">,</span> <span class="n">hidden_dim</span> <span class="o">*</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="n">bias</span> <span class="o">=</span> <span class="kc">False</span><span class="p">)</span>
+        <span class="bp">self</span><span class="o">.</span><span class="n">to_out</span> <span class="o">=</span> <span class="n">nn</span><span class="o">.</span><span class="n">Conv2d</span><span class="p">(</span><span class="n">hidden_dim</span><span class="p">,</span> <span class="n">dim</span><span class="p">,</span> <span class="mi">1</span><span class="p">)</span>
+
+    <span class="k">def</span><span class="w"> </span><span class="nf">forward</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">x</span><span class="p">):</span>
+        <span class="n">b</span><span class="p">,</span> <span class="n">c</span><span class="p">,</span> <span class="n">h</span><span class="p">,</span> <span class="n">w</span> <span class="o">=</span> <span class="n">x</span><span class="o">.</span><span class="n">shape</span>
+        <span class="n">qkv</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">to_qkv</span><span class="p">(</span><span class="n">x</span><span class="p">)</span><span class="o">.</span><span class="n">chunk</span><span class="p">(</span><span class="mi">3</span><span class="p">,</span> <span class="n">dim</span> <span class="o">=</span> <span class="mi">1</span><span class="p">)</span>
+        <span class="n">q</span><span class="p">,</span> <span class="n">k</span><span class="p">,</span> <span class="n">v</span> <span class="o">=</span> <span class="nb">map</span><span class="p">(</span><span class="k">lambda</span> <span class="n">t</span><span class="p">:</span> <span class="n">rearrange</span><span class="p">(</span><span class="n">t</span><span class="p">,</span> <span class="s1">&#39;b (h c) x y -&gt; b h c (x y)&#39;</span><span class="p">,</span> <span class="n">h</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">heads</span><span class="p">),</span> <span class="n">qkv</span><span class="p">)</span>
+
+        <span class="n">q</span> <span class="o">=</span> <span class="n">q</span> <span class="o">*</span> <span class="bp">self</span><span class="o">.</span><span class="n">scale</span>
+
+        <span class="n">sim</span> <span class="o">=</span> <span class="n">einsum</span><span class="p">(</span><span class="s1">&#39;b h d i, b h d j -&gt; b h i j&#39;</span><span class="p">,</span> <span class="n">q</span><span class="p">,</span> <span class="n">k</span><span class="p">)</span>
+        <span class="n">attn</span> <span class="o">=</span> <span class="n">sim</span><span class="o">.</span><span class="n">softmax</span><span class="p">(</span><span class="n">dim</span> <span class="o">=</span> <span class="o">-</span><span class="mi">1</span><span class="p">)</span>
+        <span class="n">out</span> <span class="o">=</span> <span class="n">einsum</span><span class="p">(</span><span class="s1">&#39;b h i j, b h d j -&gt; b h i d&#39;</span><span class="p">,</span> <span class="n">attn</span><span class="p">,</span> <span class="n">v</span><span class="p">)</span>
+
+        <span class="n">out</span> <span class="o">=</span> <span class="n">rearrange</span><span class="p">(</span><span class="n">out</span><span class="p">,</span> <span class="s1">&#39;b h (x y) d -&gt; b (h d) x y&#39;</span><span class="p">,</span> <span class="n">x</span> <span class="o">=</span> <span class="n">h</span><span class="p">,</span> <span class="n">y</span> <span class="o">=</span> <span class="n">w</span><span class="p">)</span>
+        <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">to_out</span><span class="p">(</span><span class="n">out</span><span class="p">)</span>
+</pre></div>
+</div>
+<ul class="simple">
+<li><p>Linear attention</p></li>
+</ul>
+<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="k">class</span><span class="w"> </span><span class="nc">LinearAttention</span><span class="p">(</span><span class="n">nn</span><span class="o">.</span><span class="n">Module</span><span class="p">):</span>
+    <span class="k">def</span><span class="w"> </span><span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">dim</span><span class="p">,</span> <span class="n">heads</span> <span class="o">=</span> <span class="mi">4</span><span class="p">,</span> <span class="n">dim_head</span> <span class="o">=</span> <span class="mi">32</span><span class="p">):</span>
+        <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span>
+        <span class="bp">self</span><span class="o">.</span><span class="n">scale</span> <span class="o">=</span> <span class="n">dim_head</span> <span class="o">**</span> <span class="o">-</span><span class="mf">0.5</span>
+        <span class="bp">self</span><span class="o">.</span><span class="n">heads</span> <span class="o">=</span> <span class="n">heads</span>
+        <span class="n">hidden_dim</span> <span class="o">=</span> <span class="n">dim_head</span> <span class="o">*</span> <span class="n">heads</span>
+        <span class="bp">self</span><span class="o">.</span><span class="n">to_qkv</span> <span class="o">=</span> <span class="n">nn</span><span class="o">.</span><span class="n">Conv2d</span><span class="p">(</span><span class="n">dim</span><span class="p">,</span> <span class="n">hidden_dim</span> <span class="o">*</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="n">bias</span> <span class="o">=</span> <span class="kc">False</span><span class="p">)</span>
+
+        <span class="bp">self</span><span class="o">.</span><span class="n">to_out</span> <span class="o">=</span> <span class="n">nn</span><span class="o">.</span><span class="n">Sequential</span><span class="p">(</span>
+            <span class="n">nn</span><span class="o">.</span><span class="n">Conv2d</span><span class="p">(</span><span class="n">hidden_dim</span><span class="p">,</span> <span class="n">dim</span><span class="p">,</span> <span class="mi">1</span><span class="p">),</span>
+            <span class="n">LayerNorm</span><span class="p">(</span><span class="n">dim</span><span class="p">)</span>
+        <span class="p">)</span>
+
+    <span class="k">def</span><span class="w"> </span><span class="nf">forward</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">x</span><span class="p">):</span>
+        <span class="n">b</span><span class="p">,</span> <span class="n">c</span><span class="p">,</span> <span class="n">h</span><span class="p">,</span> <span class="n">w</span> <span class="o">=</span> <span class="n">x</span><span class="o">.</span><span class="n">shape</span>
+        <span class="n">qkv</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">to_qkv</span><span class="p">(</span><span class="n">x</span><span class="p">)</span><span class="o">.</span><span class="n">chunk</span><span class="p">(</span><span class="mi">3</span><span class="p">,</span> <span class="n">dim</span> <span class="o">=</span> <span class="mi">1</span><span class="p">)</span>
+        <span class="n">q</span><span class="p">,</span> <span class="n">k</span><span class="p">,</span> <span class="n">v</span> <span class="o">=</span> <span class="nb">map</span><span class="p">(</span><span class="k">lambda</span> <span class="n">t</span><span class="p">:</span> <span class="n">rearrange</span><span class="p">(</span><span class="n">t</span><span class="p">,</span> <span class="s1">&#39;b (h c) x y -&gt; b h c (x y)&#39;</span><span class="p">,</span> <span class="n">h</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">heads</span><span class="p">),</span> <span class="n">qkv</span><span class="p">)</span>
+
+        <span class="n">q</span> <span class="o">=</span> <span class="n">q</span><span class="o">.</span><span class="n">softmax</span><span class="p">(</span><span class="n">dim</span> <span class="o">=</span> <span class="o">-</span><span class="mi">2</span><span class="p">)</span>
+        <span class="n">k</span> <span class="o">=</span> <span class="n">k</span><span class="o">.</span><span class="n">softmax</span><span class="p">(</span><span class="n">dim</span> <span class="o">=</span> <span class="o">-</span><span class="mi">1</span><span class="p">)</span>
+
+        <span class="n">q</span> <span class="o">=</span> <span class="n">q</span> <span class="o">*</span> <span class="bp">self</span><span class="o">.</span><span class="n">scale</span>
+        <span class="n">v</span> <span class="o">=</span> <span class="n">v</span> <span class="o">/</span> <span class="p">(</span><span class="n">h</span> <span class="o">*</span> <span class="n">w</span><span class="p">)</span>
+
+        <span class="n">context</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">einsum</span><span class="p">(</span><span class="s1">&#39;b h d n, b h e n -&gt; b h d e&#39;</span><span class="p">,</span> <span class="n">k</span><span class="p">,</span> <span class="n">v</span><span class="p">)</span>
+
+        <span class="n">out</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">einsum</span><span class="p">(</span><span class="s1">&#39;b h d e, b h d n -&gt; b h e n&#39;</span><span class="p">,</span> <span class="n">context</span><span class="p">,</span> <span class="n">q</span><span class="p">)</span>
+        <span class="n">out</span> <span class="o">=</span> <span class="n">rearrange</span><span class="p">(</span><span class="n">out</span><span class="p">,</span> <span class="s1">&#39;b h c (x y) -&gt; b (h c) x y&#39;</span><span class="p">,</span> <span class="n">h</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">heads</span><span class="p">,</span> <span class="n">x</span> <span class="o">=</span> <span class="n">h</span><span class="p">,</span> <span class="n">y</span> <span class="o">=</span> <span class="n">w</span><span class="p">)</span>
+        <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">to_out</span><span class="p">(</span><span class="n">out</span><span class="p">)</span>
+</pre></div>
+</div>
+<ul class="simple">
+<li><p>Diffusion time <span class="math notranslate nohighlight">\(T\)</span>는 각 residual block에 transformer sinusoidal positional embedding이 추가돼서 구분됨</p></li>
+</ul>
+<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="k">class</span><span class="w"> </span><span class="nc">SinusoidalPosEmb</span><span class="p">(</span><span class="n">nn</span><span class="o">.</span><span class="n">Module</span><span class="p">):</span>
+    <span class="k">def</span><span class="w"> </span><span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">dim</span><span class="p">):</span>
+        <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span>
+        <span class="bp">self</span><span class="o">.</span><span class="n">dim</span> <span class="o">=</span> <span class="n">dim</span>
+
+    <span class="k">def</span><span class="w"> </span><span class="nf">forward</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">x</span><span class="p">):</span>
+        <span class="n">device</span> <span class="o">=</span> <span class="n">x</span><span class="o">.</span><span class="n">device</span>
+        <span class="n">half_dim</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">dim</span> <span class="o">//</span> <span class="mi">2</span>
+        <span class="n">emb</span> <span class="o">=</span> <span class="n">math</span><span class="o">.</span><span class="n">log</span><span class="p">(</span><span class="mi">10000</span><span class="p">)</span> <span class="o">/</span> <span class="p">(</span><span class="n">half_dim</span> <span class="o">-</span> <span class="mi">1</span><span class="p">)</span>
+        <span class="n">emb</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">exp</span><span class="p">(</span><span class="n">torch</span><span class="o">.</span><span class="n">arange</span><span class="p">(</span><span class="n">half_dim</span><span class="p">,</span> <span class="n">device</span><span class="o">=</span><span class="n">device</span><span class="p">)</span> <span class="o">*</span> <span class="o">-</span><span class="n">emb</span><span class="p">)</span>
+        <span class="n">emb</span> <span class="o">=</span> <span class="n">x</span><span class="p">[:,</span> <span class="kc">None</span><span class="p">]</span> <span class="o">*</span> <span class="n">emb</span><span class="p">[</span><span class="kc">None</span><span class="p">,</span> <span class="p">:]</span>
+        <span class="n">emb</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">cat</span><span class="p">((</span><span class="n">emb</span><span class="o">.</span><span class="n">sin</span><span class="p">(),</span> <span class="n">emb</span><span class="o">.</span><span class="n">cos</span><span class="p">()),</span> <span class="n">dim</span><span class="o">=-</span><span class="mi">1</span><span class="p">)</span>
+        <span class="k">return</span> <span class="n">emb</span>
+</pre></div>
+</div>
+<section id="sample-quality">
+<h2>4-1. Sample quality<a class="headerlink" href="#sample-quality" title="Permalink to this heading">#</a></h2>
+<figure class="align-default" id="id10">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img13.png"><img alt="DDPM_13" class="bg-primary mb-1" src="../../_images/img13.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 21 </span><span class="caption-text">Train score of DDPM \ (source: <a class="reference external" href="https://arxiv.org/abs/2006.11239">https://arxiv.org/abs/2006.11239</a>)</span><a class="headerlink" href="#id10" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>FID, IS로 metric 계산. Unconditional model인데도 conditional model보다 우월. Codelength에서 차이가 없기 때문에 overfitting의 가능성도 적음.</p>
+<blockquote>
+<div><ul class="simple">
+<li><p><strong>FID score</strong>: 사전 학습된 Inception V3으로 생성된 이미지 집합과 실제 생성하고자 하는 집합 간 클래스 이미지의 분포의 거리를 계산한 metric. 낮을 수록 실제와 유사한 이미지를 생성.</p></li>
+<li><p><strong>IS(Inception Score)</strong>: 사전 학습된 Inception V3으로 생성한 이미지에 대해 정확하게 클래스가 나누어지는지, 다양한 클래스를 생성하는지 평가함. 높을 수록 다양한 클래스를 생성.</p></li>
+<li><p><strong>Unconditional model</strong>: 한번 dataset에 학습되면 추가적인 context 없이 image를 생성</p></li>
+<li><p><strong>Conditional model</strong>: Class, label 등의 추가 정보를 받아 image를 생성</p></li>
+</ul>
+</div></blockquote>
+<p><span class="math notranslate nohighlight">\(\mu\)</span>보다 <span class="math notranslate nohighlight">\(\epsilon\)</span>을 계산하는 것이 성적이 좋고, fixed variance를 사용했을 때에도 성능이 감소하지 않음.</p>
+</section>
+<section id="reverse-process-parameterization-and-training-objective-ablation">
+<h2>4-2. Reverse process parameterization and training objective ablation<a class="headerlink" href="#reverse-process-parameterization-and-training-objective-ablation" title="Permalink to this heading">#</a></h2>
+<figure class="align-default" id="id11">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img14.png"><img alt="DDPM_14" class="bg-primary mb-1" src="../../_images/img14.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 22 </span><span class="caption-text">Comparison between <span class="math notranslate nohighlight">\(\mu\)</span> and <span class="math notranslate nohighlight">\(\epsilon\)</span> \ (source: <a class="reference external" href="https://arxiv.org/abs/2006.11239">https://arxiv.org/abs/2006.11239</a>)</span><a class="headerlink" href="#id11" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p><span class="math notranslate nohighlight">\(\mu\)</span>의 실험 결과를 보면 분산을 고정했을 때 FID에서 성능 차이가 나는 것을 볼 수 있음. 또한 <span class="math notranslate nohighlight">\(\epsilon\)</span>을 사용하면서 Simplified objective를 사용하면 FID, IS가 더 개선됨</p>
+</section>
+<section id="id1">
+<h2>4-3. Reverse process parameterization and training objective ablation<a class="headerlink" href="#id1" title="Permalink to this heading">#</a></h2>
+</section>
+</section>
+
+    <script type="text/x-thebe-config">
+    {
+        requestKernel: true,
+        binderOptions: {
+            repo: "binder-examples/jupyter-stacks-datascience",
+            ref: "master",
+        },
+        codeMirrorConfig: {
+            theme: "abcdef",
+            mode: "python"
+        },
+        kernelOptions: {
+            name: "python3",
+            path: "./docs\review"
+        },
+        predefinedOutput: true
+    }
+    </script>
+    <script>kernelName = 'python3'</script>
+
+                </article>
+              
+
+              
+              
+              
+              
+                <footer class="prev-next-footer d-print-none">
+                  
+<div class="prev-next-area">
+    <a class="left-prev"
+       href="gan.html"
+       title="previous page">
+      <i class="fa-solid fa-angle-left"></i>
+      <div class="prev-next-info">
+        <p class="prev-next-subtitle">previous</p>
+        <p class="prev-next-title">GAN</p>
+      </div>
+    </a>
+    <a class="right-next"
+       href="DDIM.html"
+       title="next page">
+      <div class="prev-next-info">
+        <p class="prev-next-subtitle">next</p>
+        <p class="prev-next-title">DDIM</p>
+      </div>
+      <i class="fa-solid fa-angle-right"></i>
+    </a>
+</div>
+                </footer>
+              
+            </div>
+            
+            
+              
+                <div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
+
+
+  <div class="sidebar-secondary-item">
+  <div class="page-toc tocsection onthispage">
+    <i class="fa-solid fa-list"></i> Contents
+  </div>
+  <nav class="bd-toc-nav page-toc">
+    <ul class="visible nav section-nav flex-column">
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#">DDPM</a></li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">1. Introduction</a></li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#background">2. Background</a><ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#forward-diffusion-process-q-mathbf-x-t-mathbf-x-t-1">2-1. Forward(diffusion) process <span class="math notranslate nohighlight">\(q(\mathbf{x}_t|\mathbf{x}_{t-1})\)</span></a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#reverse-process-p-mathbf-x-t-1-mathbf-x-t">2-2. Reverse process <span class="math notranslate nohighlight">\(p(\mathbf{x}_{t-1}|\mathbf{x}_t)\)</span></a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#loss-function-l">2-3. Loss Function <span class="math notranslate nohighlight">\(L\)</span></a></li>
+</ul>
+</li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#diffusion-models-and-denoising-encoders">3. Diffusion models and denoising encoders</a><ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#forward-process-and-l-t">3-1. Forward process and <span class="math notranslate nohighlight">\(L_T\)</span></a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#reverse-process-and-l-1-t-1">3-2. Reverse process and <span class="math notranslate nohighlight">\(L_{1:T-1}\)</span></a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#data-scaling-reverse-process-decoder-and-l-0">3-3. Data scaling, reverse process decoder and <span class="math notranslate nohighlight">\(L_0\)</span></a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#simplified-training-objective">3-4. Simplified training objective</a></li>
+</ul>
+</li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#experiments">4. Experiments</a><ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#sample-quality">4-1. Sample quality</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#reverse-process-parameterization-and-training-objective-ablation">4-2. Reverse process parameterization and training objective ablation</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#id1">4-3. Reverse process parameterization and training objective ablation</a></li>
+</ul>
+</li>
+</ul>
+
+  </nav></div>
+
+</div></div>
+              
+            
+          </div>
+          <footer class="bd-footer-content">
+            
+<div class="bd-footer-content__inner container">
+  
+  <div class="footer-item">
+    
+<p class="component-author">
+By PseudoLab
+</p>
+
+  </div>
+  
+  <div class="footer-item">
+    
+
+  <p class="copyright">
+    
+      © Copyright 2022.
+      <br/>
+    
+  </p>
+
+  </div>
+  
+  <div class="footer-item">
+    
+  </div>
+  
+  <div class="footer-item">
+    
+  </div>
+  
+</div>
+          </footer>
+        
+
+      </main>
+    </div>
+  </div>
+  
+  <!-- Scripts loaded after <body> so the DOM is not blocked -->
+  <script src="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b"></script>
+<script src="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b"></script>
+
+  <footer class="bd-footer">
+  </footer>
+  </body>
 </html>
\ No newline at end of file
diff --git a/docs/review/DiT.html b/docs/review/DiT.html
old mode 100644
new mode 100755
index 2d91273d..a2775e25
--- a/docs/review/DiT.html
+++ b/docs/review/DiT.html
@@ -1,1044 +1,1064 @@
-
-
-<!DOCTYPE html>
-
-
-<html lang="en" data-content_root="" >
-
-  <head>
-    <meta charset="utf-8" />
-    <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.18.1: http://docutils.sourceforge.net/" />
-
-    <title>DiT &#8212; Text-to-Image Generation-feat-Diffusion</title>
-  
-  
-  
-  <script data-cfasync="false">
-    document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
-    document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
-  </script>
-  
-  <!-- Loaded before other Sphinx assets -->
-  <link href="../../_static/styles/theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-<link href="../../_static/styles/bootstrap.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-<link href="../../_static/styles/pydata-sphinx-theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-
-  
-  <link href="../../_static/vendor/fontawesome/6.5.2/css/all.min.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-  <link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.woff2" />
-<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.woff2" />
-<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.woff2" />
-
-    <link rel="stylesheet" type="text/css" href="../../_static/pygments.css" />
-    <link rel="stylesheet" href="../../_static/styles/sphinx-book-theme.css?digest=14f4ca6b54d191a8c7657f6c759bf11a5fb86285" type="text/css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/togglebutton.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/mystnb.4510f1fc1dee50b3e5859aac5469c37c29e427902b24a333a5f9fcb2f0b3ac41.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/sphinx-thebe.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/design-style.4045f2051d55cab465a707391d5b2007.min.css" />
-  
-  <!-- Pre-loaded scripts that we'll load fully later -->
-  <link rel="preload" as="script" href="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b" />
-<link rel="preload" as="script" href="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b" />
-  <script src="../../_static/vendor/fontawesome/6.5.2/js/all.min.js?digest=dfe6caa3a7d634c4db9b"></script>
-
-    <script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
-    <script src="../../_static/jquery.js"></script>
-    <script src="../../_static/underscore.js"></script>
-    <script src="../../_static/_sphinx_javascript_frameworks_compat.js"></script>
-    <script src="../../_static/doctools.js"></script>
-    <script src="../../_static/clipboard.min.js"></script>
-    <script src="../../_static/copybutton.js"></script>
-    <script src="../../_static/scripts/sphinx-book-theme.js?digest=5a5c038af52cf7bc1a1ec88eea08e6366ee68824"></script>
-    <script>let toggleHintShow = 'Click to show';</script>
-    <script>let toggleHintHide = 'Click to hide';</script>
-    <script>let toggleOpenOnPrint = 'true';</script>
-    <script src="../../_static/togglebutton.js"></script>
-    <script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
-    <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
-    <script src="../../_static/design-tabs.js"></script>
-    <script>const THEBE_JS_URL = "https://unpkg.com/thebe@0.8.2/lib/index.js"
-const thebe_selector = ".thebe,.cell"
-const thebe_selector_input = "pre"
-const thebe_selector_output = ".output, .cell_output"
-</script>
-    <script async="async" src="../../_static/sphinx-thebe.js"></script>
-    <script>window.MathJax = {"options": {"processHtmlClass": "tex2jax_process|mathjax_process|math|output_area"}}</script>
-    <script defer="defer" src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
-    <script>DOCUMENTATION_OPTIONS.pagename = 'docs/review/DiT';</script>
-    <link rel="shortcut icon" href="../../_static/PseudoLab_logo.png"/>
-    <link rel="index" title="Index" href="../../genindex.html" />
-    <link rel="search" title="Search" href="../../search.html" />
-    <link rel="next" title="One-Step Image Translation with Text-to-Image Models" href="one-step-image-translation.html" />
-    <link rel="prev" title="LLM Grounded Diffusion" href="LLM_grounded_Diffusion.html" />
-  <meta name="viewport" content="width=device-width, initial-scale=1"/>
-  <meta name="docsearch:language" content="en"/>
-  </head>
-  
-  
-  <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
-
-  
-  
-  <div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
-  
-  <div id="pst-scroll-pixel-helper"></div>
-  
-  <button type="button" class="btn rounded-pill" id="pst-back-to-top">
-    <i class="fa-solid fa-arrow-up"></i>Back to top</button>
-
-  
-  <input type="checkbox"
-          class="sidebar-toggle"
-          id="pst-primary-sidebar-checkbox"/>
-  <label class="overlay overlay-primary" for="pst-primary-sidebar-checkbox"></label>
-  
-  <input type="checkbox"
-          class="sidebar-toggle"
-          id="pst-secondary-sidebar-checkbox"/>
-  <label class="overlay overlay-secondary" for="pst-secondary-sidebar-checkbox"></label>
-  
-  <div class="search-button__wrapper">
-    <div class="search-button__overlay"></div>
-    <div class="search-button__search-container">
-<form class="bd-search d-flex align-items-center"
-      action="../../search.html"
-      method="get">
-  <i class="fa-solid fa-magnifying-glass"></i>
-  <input type="search"
-         class="form-control"
-         name="q"
-         id="search-input"
-         placeholder="Search this book..."
-         aria-label="Search this book..."
-         autocomplete="off"
-         autocorrect="off"
-         autocapitalize="off"
-         spellcheck="false"/>
-  <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
-</form></div>
-  </div>
-
-  <div class="pst-async-banner-revealer d-none">
-  <aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
-</div>
-
-  
-    <header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
-    </header>
-  
-
-  <div class="bd-container">
-    <div class="bd-container__inner bd-page-width">
-      
-      
-      
-      <div class="bd-sidebar-primary bd-sidebar">
-        
-
-  
-  <div class="sidebar-header-items sidebar-primary__section">
-    
-    
-    
-    
-  </div>
-  
-    <div class="sidebar-primary-items__start sidebar-primary__section">
-        <div class="sidebar-primary-item">
-
-  
-    
-  
-
-<a class="navbar-brand logo" href="../../intro.html">
-  
-  
-  
-  
-  
-    
-    
-      
-    
-    
-    <img src="../../_static/PseudoLab_logo.png" class="logo__image only-light" alt="Text-to-Image Generation-feat-Diffusion - Home"/>
-    <script>document.write(`<img src="../../_static/PseudoLab_logo.png" class="logo__image only-dark" alt="Text-to-Image Generation-feat-Diffusion - Home"/>`);</script>
-  
-  
-</a></div>
-        <div class="sidebar-primary-item"><nav class="bd-links" id="bd-docs-nav" aria-label="Main">
-    <div class="bd-toc-item navbar-nav active">
-        
-        <ul class="nav bd-sidenav bd-sidenav__home-link">
-            <li class="toctree-l1">
-                <a class="reference internal" href="../../intro.html">
-                    Welcome to PseudoDiffusers!!
-                </a>
-            </li>
-        </ul>
-        <p aria-level="2" class="caption" role="heading"><span class="caption-text">Preliminary Works</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="vae.html">VAE</a></li>
-<li class="toctree-l1"><a class="reference internal" href="gan.html">GAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DDPM.html">DDPM</a></li>
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="DDIM.html">DDIM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="A_Study_on_the_Evaluation_of_Generative_Models.html">A Study on the Evaluation of Generative Models</a></li>
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Image Generation</span></p>
-<ul class="current nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="cycleGAN.html">CycleGAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="StyleGAN.html">StyleGAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="diffusion_beats_GANs.html">Diffusion Models Beat GANs on Image Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="dalle.html">DALL-E</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DALLE2.html">DALL-E 2</a></li>
-<li class="toctree-l1"><a class="reference internal" href="dreambooth.html">DreamBooth</a></li>
-<li class="toctree-l1"><a class="reference internal" href="ControlNet.html">ControlNet</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Latent_Diffusion_Model.html">Introduction</a></li>
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="Textual_Inversion.html">Textual Inversion</a></li>
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="CustomDiffusion.html">Custom Diffusion</a></li>
-<li class="toctree-l1"><a class="reference internal" href="LoRA.html">LoRA</a></li>
-
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="I-DDPM.html">I-DDPM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="StyO.html">StyO</a></li>
-<li class="toctree-l1"><a class="reference internal" href="imagen.html">Imagen</a></li>
-<li class="toctree-l1"><a class="reference internal" href="imagen_editor.html">Imagen Editor</a></li>
-<li class="toctree-l1"><a class="reference internal" href="SDEdit.html">SDEdit</a></li>
-<li class="toctree-l1"><a class="reference internal" href="SDXL.html">SDXL</a></li>
-<li class="toctree-l1"><a class="reference internal" href="t2i_adapter.html">T2I-Adapter</a></li>
-<li class="toctree-l1"><a class="reference internal" href="IP_Adapter.html">IP-Adapter</a></li>
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="HyperDreamBooth.html">HyperDreamBooth</a></li>
-<li class="toctree-l1"><a class="reference internal" href="CM3leon.html">CM3leon</a></li>
-
-<li class="toctree-l1"><a class="reference internal" href="Synthetic_Data_from_Diffusion_Models_Improves_ImageNet_Classification.html">Synthetic Data from Diffusion Models Improves ImageNet Classification</a></li>
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="GLIDE.html">GLIDE</a></li>
-<li class="toctree-l1"><a class="reference internal" href="BBDM.html">BBDM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Your_Diffusion_Model_is_Secretly_a_Zero_Shot_Classifier.html">Your Diffusion Model is Secretly a Zero-Shot Classifier</a></li>
-<li class="toctree-l1"><a class="reference internal" href="progressive_distillation.html">Progressive Distillation for Fast Sampling of Diffusion Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="ConceptLab.html">ConceptLab</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Diffusion_models_already_have_a_Semantic_Latent_Space.html">Diffusion Models already have a Semantic Latent Space</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Muse.html">Muse</a></li>
-
-
-<li class="toctree-l1"><a class="reference internal" href="GIGAGAN.html">Scaling up GANs for Text-to-Image Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="consistency_models.html">Consistency Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="latent_consistency_models.html">Latent Consistency Models</a></li>
-
-<li class="toctree-l1"><a class="reference internal" href="LLM_grounded_Diffusion.html">LLM Grounded Diffusion</a></li>
-<li class="toctree-l1 current active"><a class="current reference internal" href="#">DiT</a></li>
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="one-step-image-translation.html">One-Step Image Translation with Text-to-Image Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="LCM-LoRA.html">LCM-LoRA: A Universal Stable-Diffusion Acceleration Module</a></li>
-
-
-<li class="toctree-l1"><a class="reference internal" href="MimicBrush.html">MimicBrush: Zero-shot Image Editing with Reference Imitation</a></li>
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Video Generation</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="Make_A_Video.html">Make A Video</a></li>
-<li class="toctree-l1"><a class="reference internal" href="VideoLDM.html">VideoLDM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="AnimateDiff.html">AnimateDiff</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Animate_Anyone.html">Animate Anyone</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreaMoving.html">DreaMoving</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreamPose.html">DreamPose: Fashion Image-to-Video Synthesis via Stable Diffusion</a></li>
-
-
-
-
-
-
-
-
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">3D Generation</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="NeRF.html">NeRF : Representing Scenes as Neural Radiance Fields for View Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="3DGS.html">3D Gaussian Splatting for Real-Time Radiance Field Rendering</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Point_E.html">Point-E: A System for Generating 3D Point Clouds from Complex Prompts (Arxiv 2022)</a></li>
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="Shap-E.html">Shap-E</a></li>
-
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="DreamFusion.html"><strong>DreamFusion</strong></a></li>
-<li class="toctree-l1"><a class="reference internal" href="magic-3d.html">Magic3D</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreamBooth3D.html">Dream Booth 3D</a></li>
-
-
-
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Experiments</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="../experiments/js_exp.html">Synthetic Data with Stable Diffusion for Foliar Disease Classification</a></li>
-<li class="toctree-l1"><a class="reference internal" href="../experiments/swjo_exp.html">Training DreamBooth on Naver Webtoon Face Dataset</a></li>
-</ul>
-
-    </div>
-</nav></div>
-    </div>
-  
-  
-  <div class="sidebar-primary-items__end sidebar-primary__section">
-  </div>
-  
-  <div id="rtd-footer-container"></div>
-
-
-      </div>
-      
-      <main id="main-content" class="bd-main" role="main">
-        
-        
-
-<div class="sbt-scroll-pixel-helper"></div>
-
-          <div class="bd-content">
-            <div class="bd-article-container">
-              
-              <div class="bd-header-article d-print-none">
-<div class="header-article-items header-article__inner">
-  
-    <div class="header-article-items__start">
-      
-        <div class="header-article-item"><label class="sidebar-toggle primary-toggle btn btn-sm" for="__primary" title="Toggle primary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
-  <span class="fa-solid fa-bars"></span>
-</label></div>
-      
-    </div>
-  
-  
-    <div class="header-article-items__end">
-      
-        <div class="header-article-item">
-
-<div class="article-header-buttons">
-
-
-
-
-
-<div class="dropdown dropdown-source-buttons">
-  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Source repositories">
-    <i class="fab fa-github"></i>
-  </button>
-  <ul class="dropdown-menu">
-      
-      
-      
-      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion" target="_blank"
-   class="btn btn-sm btn-source-repository-button dropdown-item"
-   title="Source repository"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fab fa-github"></i>
-  </span>
-<span class="btn__text-container">Repository</span>
-</a>
-</li>
-      
-      
-      
-      
-      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion/issues/new?title=Issue%20on%20page%20%2Fdocs/review/DiT.html&body=Your%20issue%20content%20here." target="_blank"
-   class="btn btn-sm btn-source-issues-button dropdown-item"
-   title="Open an issue"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-lightbulb"></i>
-  </span>
-<span class="btn__text-container">Open issue</span>
-</a>
-</li>
-      
-  </ul>
-</div>
-
-
-
-
-
-
-<div class="dropdown dropdown-download-buttons">
-  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Download this page">
-    <i class="fas fa-download"></i>
-  </button>
-  <ul class="dropdown-menu">
-      
-      
-      
-      <li><a href="../../_sources/docs/review/DiT.md" target="_blank"
-   class="btn btn-sm btn-download-source-button dropdown-item"
-   title="Download source file"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-file"></i>
-  </span>
-<span class="btn__text-container">.md</span>
-</a>
-</li>
-      
-      
-      
-      
-      <li>
-<button onclick="window.print()"
-  class="btn btn-sm btn-download-pdf-button dropdown-item"
-  title="Print to PDF"
-  data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-file-pdf"></i>
-  </span>
-<span class="btn__text-container">.pdf</span>
-</button>
-</li>
-      
-  </ul>
-</div>
-
-
-
-
-<button onclick="toggleFullScreen()"
-  class="btn btn-sm btn-fullscreen-button"
-  title="Fullscreen mode"
-  data-bs-placement="bottom" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-expand"></i>
-  </span>
-
-</button>
-
-
-
-<script>
-document.write(`
-  <button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light"></i>
-    <i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark"></i>
-    <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"></i>
-  </button>
-`);
-</script>
-
-
-<script>
-document.write(`
-  <button class="btn btn-sm pst-navbar-icon search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <i class="fa-solid fa-magnifying-glass fa-lg"></i>
-  </button>
-`);
-</script>
-<label class="sidebar-toggle secondary-toggle btn btn-sm" for="__secondary"title="Toggle secondary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <span class="fa-solid fa-list"></span>
-</label>
-</div></div>
-      
-    </div>
-  
-</div>
-</div>
-              
-              
-
-<div id="jb-print-docs-body" class="onlyprint">
-    <h1>DiT</h1>
-    <!-- Table of contents -->
-    <div id="print-main-content">
-        <div id="jb-print-toc">
-            
-            <div>
-                <h2> Contents </h2>
-            </div>
-            <nav aria-label="Page">
-                <ul class="visible nav section-nav flex-column">
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#">DiT</a></li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#abstract">Abstract</a></li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">1. Introduction</a></li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#related-works">2. Related Works</a><ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#architecture-complexity">Architecture complexity</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#latent-diffusion-models">Latent diffusion models</a></li>
-</ul>
-</li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#diffusion-transformers">3. Diffusion Transformers</a><ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#diffusion-transformer-design-space">3.2. Diffusion Transformer Design Space</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#patchify">Patchify</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#dit-block-design">DiT block design</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#transformer-decoder">Transformer decoder</a></li>
-</ul>
-</li>
-</ul>
-</li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#experimental-setup">4. Experimental Setup</a><ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#training-setting">Training setting</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#evaluation">Evaluation</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#class-conditional-image-genertation-model">Class-conditional image genertation model 비교</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#id1">DiT block design</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#scaling-model-size-and-patch-size">Scaling model size and patch size</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#dit-gflops-are-critical-to-improving-performance"><strong>DiT Gflops are critical to improving performance</strong></a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#larger-dit-models-are-more-compute-efficient"><strong>Larger DiT models are more compute-efficient</strong></a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#visualizing-scaling"><strong>Visualizing scaling</strong></a></li>
-</ul>
-</li>
-</ul>
-</li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#conclusion">Conclusion</a></li>
-</ul>
-
-            </nav>
-        </div>
-    </div>
-</div>
-
-              
-                
-<div id="searchbox"></div>
-                <article class="bd-article">
-                  
-  <div class="admonition-information admonition">
-<p class="admonition-title">Information</p>
-<ul class="simple">
-<li><p><strong>Title:</strong> Scalable Diffusion Models with Transformers (ICCV 2023)</p></li>
-<li><p><strong>Reference</strong></p>
-<ul>
-<li><p>Paper: <a class="reference external" href="https://arxiv.org/abs/2212.09748">https://arxiv.org/abs/2212.09748</a></p></li>
-<li><p>Code: <a class="github reference external" href="https://github.com/facebookresearch/DiT">facebookresearch/DiT</a></p></li>
-<li><p>Project Page : <a class="reference external" href="https://www.wpeebles.com/DiT">https://www.wpeebles.com/DiT</a></p></li>
-</ul>
-</li>
-<li><p><strong>Author:</strong> Junhyoung Lee</p></li>
-<li><p><strong>Last updated on July. 20, 2024</strong></p></li>
-</ul>
-</div>
-<section class="tex2jax_ignore mathjax_ignore" id="dit">
-<h1>DiT<a class="headerlink" href="#dit" title="Permalink to this heading">#</a></h1>
-</section>
-<section class="tex2jax_ignore mathjax_ignore" id="abstract">
-<h1>Abstract<a class="headerlink" href="#abstract" title="Permalink to this heading">#</a></h1>
-<ul class="simple">
-<li><p>LDM 의 U-Net 백본을 transformer 로 바꾸어 학습을 진행한 diffusion model</p></li>
-<li><p>Gflops 측정: depth/width 증가 or 입력 토큰 수 증가로 Gflops 가 높게 나타났음 (FID 낮게 유지한 상태)</p></li>
-<li><p>우수한 scalability 특성을 보유하며, <span class="math notranslate nohighlight">\(\text{DiT-XL/2}\)</span> 모델은 class conditional ImageNet 벤치마크에서 이전의 생성 모델에 비해 성능이 뛰어났음 (FID 2.27)</p></li>
-</ul>
-</section>
-<section class="tex2jax_ignore mathjax_ignore" id="introduction">
-<h1>1. Introduction<a class="headerlink" href="#introduction" title="Permalink to this heading">#</a></h1>
-<ul class="simple">
-<li><p>Diffusion 모델이 이미지 생성 모델의 트렌드를 유지하고 있지만, convolution 네트워크인 U-Net 모델 사용</p>
-<ul>
-<li><p>transformers 는 autoregressive 모델에서 사용되고 있었음</p></li>
-</ul>
-</li>
-<li><p>초기에는 픽셀 레벨의 autoregressive model과 conditional GAN에서 U-Net 이 성공을 이끌었음</p></li>
-<li><p>DDPM 에서는 ResNet 블럭이 주요하게 구성됨</p>
-<ul>
-<li><p>반면, transformer 에서는 spatial self-attention 블럭이 구성되어 있고, 저해상도에 포함됨</p></li>
-</ul>
-</li>
-<li><p>ADM</p>
-<ul>
-<li><p>classifier-guidance diffusion model</p></li>
-<li><p>adaptive normalization layer 와 같은 U-Net 에서 선택적으로 제거</p></li>
-</ul>
-</li>
-<li><p>저자들은 diffusion 모델의 아키텍처 구성의 중요성을 밝혔음</p>
-<ul>
-<li><p>U-Net 의 inductive bias 가 diffusion 모델의 성능의 영향을 끼치지 않고, transformer 로 쉽게 대체 가능함</p></li>
-<li><p>transformer 를 기반으로 새로운 종류의 diffusion model 을 제안 → Diffusion Transformer (DiT)</p></li>
-</ul>
-</li>
-<li><p>DiT는 ViT를 준수하며, 기존 convolution 네트워크보다 시각적 인식(visual recognition)을 위해 더 효과적으로 확장되는 것으로 나타남</p></li>
-</ul>
-</section>
-<section class="tex2jax_ignore mathjax_ignore" id="related-works">
-<h1>2. Related Works<a class="headerlink" href="#related-works" title="Permalink to this heading">#</a></h1>
-<section id="architecture-complexity">
-<h2>Architecture complexity<a class="headerlink" href="#architecture-complexity" title="Permalink to this heading">#</a></h2>
-<ul class="simple">
-<li><p>아키텍처의 복잡성(complexity)를 측정할 때, 보통 파라미터 수를 측정함</p>
-<ul>
-<li><p>하지만, 이미지 해상도 등을 고려하지 않아 모델의 복잡성을 제대로 측정할 수 없다고 주장</p></li>
-<li><p>저자들은 Gflops 를 통해 모델의 복잡성을 분석함</p></li>
-</ul>
-</li>
-</ul>
-</section>
-<section id="latent-diffusion-models">
-<h2>Latent diffusion models<a class="headerlink" href="#latent-diffusion-models" title="Permalink to this heading">#</a></h2>
-<ul>
-<li><p>픽셀 공간에서 diffusion 모델을 학습하는 것은 연산적으로 어려움</p></li>
-<li><p>LDM</p>
-<ol class="arabic simple">
-<li><p>learned encoder <span class="math notranslate nohighlight">\(E\)</span> 로 이미지를 작은 해상도로 압축하는 autoencoder 를 학습</p></li>
-<li><p>픽셀 공간의 이미지 <span class="math notranslate nohighlight">\(x\)</span>의 diffusion 모델 대신, 압축된  <span class="math notranslate nohighlight">\(z = E(x)\)</span> 의 diffusion 모델 학습 (<span class="math notranslate nohighlight">\(E\)</span>는 고정)</p></li>
-</ol>
-<ul class="simple">
-<li><p>→ 이후 diffusion 모델에서 <span class="math notranslate nohighlight">\(z\)</span> 를 샘플링하고 학습된 디코더 <span class="math notranslate nohighlight">\(x = D(z)\)</span> 를 사용하여 이미지로 디코딩하여 새로운 이미지를 생성함</p></li>
-<li><p>LDM은 ADM과 같은 픽셀 공간 diffusion 모델보다 훨씬 적은 Gflops를 사용하면서도 우수한 성능을 달성</p></li>
-</ul>
-</li>
-<li><p>DiT</p>
-<ul class="simple">
-<li><p>저자들은 latent 공간에서 DiT를 적용했고, 픽셀 공간에서도 수정 없이 적용할 수 있음</p></li>
-</ul>
-</li>
-</ul>
-</section>
-</section>
-<section class="tex2jax_ignore mathjax_ignore" id="diffusion-transformers">
-<h1>3. Diffusion Transformers<a class="headerlink" href="#diffusion-transformers" title="Permalink to this heading">#</a></h1>
-<section id="diffusion-transformer-design-space">
-<h2>3.2. Diffusion Transformer Design Space<a class="headerlink" href="#diffusion-transformer-design-space" title="Permalink to this heading">#</a></h2>
-<ul class="simple">
-<li><p>scaling 속성을 유지하기 위해 standard transformer 구조를 따르도록 설계</p></li>
-<li><p>이미지(spatial representations)의 DDPM을 학습하기 때문에,  패치를 연산하는 ViT 구조로 설계됨</p></li>
-</ul>
-<figure class="align-default" id="id2">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/architecture.png"><img alt="architecture" class="bg-primary mb-1" src="../../_images/architecture.png" style="width: 400px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 528 </span><span class="caption-text">architecture</span><a class="headerlink" href="#id2" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<section id="patchify">
-<h3>Patchify<a class="headerlink" href="#patchify" title="Permalink to this heading">#</a></h3>
-<figure class="align-default" id="id3">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/fig4.png"><img alt="fig4" class="bg-primary mb-1" src="../../_images/fig4.png" style="width: 400px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 529 </span><span class="caption-text">fig4</span><a class="headerlink" href="#id3" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p>DiT 에 spatial representation <span class="math notranslate nohighlight">\(z\)</span> (256x256x3 이미지에서는 <span class="math notranslate nohighlight">\(z\)</span> 는 32x32x4 로 표현) 가 입력됨</p></li>
-<li><p>“patchify” : <span class="math notranslate nohighlight">\(z\)</span> → <span class="math notranslate nohighlight">\(d\)</span> 차원의 <span class="math notranslate nohighlight">\(T\)</span> tokens 로 변환 + positional embedding</p>
-<ul>
-<li><p><span class="math notranslate nohighlight">\(p\)</span> 에 따라 토큰 수가 달라짐 (<span class="math notranslate nohighlight">\(p\)</span> = 2, 4, 8)</p></li>
-<li><p><span class="math notranslate nohighlight">\(p\)</span> 를 반으로 줄이면 <span class="math notranslate nohighlight">\(T\)</span> 가 4배가 되므로 총 transformer Gflops 는 최소 4배가 되어 상당한 영향을 끼침</p></li>
-</ul>
-</li>
-</ul>
-</section>
-<section id="dit-block-design">
-<h3>DiT block design<a class="headerlink" href="#dit-block-design" title="Permalink to this heading">#</a></h3>
-<ul class="simple">
-<li><p>Diffusion 모델은 보통 noised image 입력과 noise timestep <span class="math notranslate nohighlight">\(t\)</span>, class label <span class="math notranslate nohighlight">\(c\)</span> 의 벡터 임베딩 값을 conditional 정보로 입력 시퀀스에 더해줌</p></li>
-<li><p>다른 conditional inputs 으로 처리하는 4개의 transformer block을 진행함</p></li>
-</ul>
-<ol class="arabic simple">
-<li><p><em><strong>In-context conditioning</strong></em></p>
-<ul class="simple">
-<li><p><span class="math notranslate nohighlight">\(t\)</span>, <span class="math notranslate nohighlight">\(c\)</span> 를 추가의 토큰으로 더하는 방식 → 이미지 토큰과 동일하게 처리함</p>
-<ul>
-<li><p>ViT 의 <span class="math notranslate nohighlight">\(\text {cls}\)</span> 토큰과 유사한 방식</p></li>
-</ul>
-</li>
-<li><p>마지막 블럭에서는 conditioning 토큰을 제거함</p></li>
-</ul>
-</li>
-<li><p><em><strong>Cross-attention block</strong></em></p>
-<ul class="simple">
-<li><p>이미지 토큰과 별도로 <span class="math notranslate nohighlight">\(t\)</span>, <span class="math notranslate nohighlight">\(c\)</span> 를 concat 함</p></li>
-<li><p>self-attention 다음에 cross attention 을 포함하도록 구조를 수정함</p></li>
-<li><p>cross-attention 연산이 15% 오버헤드로 모델에 가장 많은 Gflops를 추가함</p></li>
-</ul>
-</li>
-<li><p><em><strong>Adaptive layer norm (adaLN) block</strong></em></p>
-<ul class="simple">
-<li><p>Transformer 블럭의 standard layer norm layer 를 adaptive layer norm (adaLN) 으로 교체함</p></li>
-<li><p>직접 scale <span class="math notranslate nohighlight">\(\gamma\)</span>, shift <span class="math notranslate nohighlight">\(\beta\)</span> 파라미터를 학습하는 것보다, <span class="math notranslate nohighlight">\(t\)</span> 와 <span class="math notranslate nohighlight">\(c\)</span> 벡터 임베딩 값의 합으로부터 회귀하도록 설계함</p></li>
-<li><p>최소한의 Gflops 를 추가하므로 연산에 효율적임</p></li>
-<li><p>모든 토큰에 대해 같은 function을 적용함으로써 conditioning 하는 방법</p></li>
-</ul>
-</li>
-<li><p><em><strong>adaLN-Zero block</strong></em></p>
-<ul class="simple">
-<li><p>ResNet: 각 residual block 의 initializing 이 identity function 이 효과적이라는 것을 증명함</p>
-<ul>
-<li><p>각 블럭의 마지막 batch norm scale factor <span class="math notranslate nohighlight">\(r\)</span> 를 0으로 초기화 하는 것이 large-scale 학습에 좋다는 것을 발견함</p></li>
-</ul>
-</li>
-<li><p>Diffusion U-Net 모델은 비슷한 초기화 전략을 사용하는데, residual connection 전에 각 블럭의 최종 convolutional layer 를 0으로 초기화함</p></li>
-<li><p><span class="math notranslate nohighlight">\(\gamma, \beta\)</span> 를 회귀하는 것 외에도 DiT 블럭 내의 residual connection 전에 적용되는 dimension 별 scaling <span class="math notranslate nohighlight">\(\alpha\)</span> 를 회귀함</p></li>
-<li><p>모든 <span class="math notranslate nohighlight">\(\alpha\)</span> 에 대해 영벡터를 출력하도록 MLP를 초기화함 → 전체 DiT 블록을 identity function 으로 초기화하게됨</p></li>
-<li><p>adaLN 블록과 비슷하게 adaLN-Zero 는 Gflops 에 영향을 끼치지 않음</p></li>
-</ul>
-</li>
-</ol>
-</section>
-<section id="transformer-decoder">
-<h3>Transformer decoder<a class="headerlink" href="#transformer-decoder" title="Permalink to this heading">#</a></h3>
-<figure class="align-default" id="id4">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/architecture2.png"><img alt="architecture2" class="bg-primary mb-1" src="../../_images/architecture2.png" style="width: 400px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 530 </span><span class="caption-text">architecture2</span><a class="headerlink" href="#id4" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p>최종 DiT 블록을 통과한 후, 이미지 토큰 시퀀스를 예측된 noise 값과 covariance 값 디코딩해야함</p>
-<ol class="arabic simple">
-<li><p><span class="math notranslate nohighlight">\(\text{LayerNorm}\)</span> (adaLN을 사용하는 경우 adaLN)</p></li>
-<li><p><span class="math notranslate nohighlight">\(\text{Linear, Reshape}\)</span></p></li>
-<li><p><span class="math notranslate nohighlight">\(\text{VAE}\)</span> decoder → “output shape = input image shape”</p></li>
-</ol>
-</li>
-<li><p>Q) covariance 값은 왜 예측하는지?</p>
-<ul>
-<li><p><a class="reference external" href="https://kyujinpy.tistory.com/132#:~:text=%EA%B7%B8%20%EC%9D%B4%ED%9B%84%2C%20VAE%20decoder%EC%97%90%20noise%20%EA%B0%92%EC%9D%84%20%EB%84%A3%EC%96%B4%EC%84%9C%20%EC%8B%A4%EC%A0%9C%20%EC%9D%B4%EB%AF%B8%EC%A7%80%EB%A5%BC%20%EC%83%9D%EC%84%B1%ED%95%9C%EB%8B%A4.">참고 링크</a></p></li>
-<li><p>ADM 학습과 연관됨</p>
-<ul>
-<li><p>Noise 차이 loss 이외의 분산도 학습을 진행했음 → vlb_loss 활용</p></li>
-</ul>
-</li>
-</ul>
-</li>
-</ul>
-</section>
-</section>
-</section>
-<section class="tex2jax_ignore mathjax_ignore" id="experimental-setup">
-<h1>4. Experimental Setup<a class="headerlink" href="#experimental-setup" title="Permalink to this heading">#</a></h1>
-<ul class="simple">
-<li><p>DiT models</p></li>
-</ul>
-<figure class="align-default" id="id5">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/table11.png"><img alt="table1" class="bg-primary mb-1" src="../../_images/table11.png" style="width: 400px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 531 </span><span class="caption-text">table1</span><a class="headerlink" href="#id5" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<section id="training-setting">
-<h2>Training setting<a class="headerlink" href="#training-setting" title="Permalink to this heading">#</a></h2>
-<ul class="simple">
-<li><p><span class="math notranslate nohighlight">\(\text {DiT-XL/2}\)</span>:  <span class="math notranslate nohighlight">\(\text{XLarge}\)</span> config and  <span class="math notranslate nohighlight">\(p = 2\)</span>.</p></li>
-<li><p>class-conditional latent DiT models</p></li>
-<li><p>해상도: 256x256, 512x512</p></li>
-<li><p>데이터 셋: ImageNet</p></li>
-<li><p>마지막 linear layer 는 0으로 초기화, 나머지는 standard weight 초기화</p></li>
-<li><p>Optimizer: AdamW</p></li>
-<li><p>Learning rate: <span class="math notranslate nohighlight">\(1 × 10^{−4}\)</span></p></li>
-<li><p>Batch size: 256</p></li>
-<li><p>Augmentation: horizontal flip</p></li>
-<li><p>exponential moving average (EMA): 0.999</p></li>
-<li><p>Diffusion: Stable Diffusion 의 VAE 사용</p></li>
-</ul>
-</section>
-<section id="evaluation">
-<h2>Evaluation<a class="headerlink" href="#evaluation" title="Permalink to this heading">#</a></h2>
-<section id="class-conditional-image-genertation-model">
-<h3>Class-conditional image genertation model 비교<a class="headerlink" href="#class-conditional-image-genertation-model" title="Permalink to this heading">#</a></h3>
-<figure class="align-default" id="id6">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/table11.png"><img alt="table2" class="bg-primary mb-1" src="../../_images/table11.png" style="width: 400px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 532 </span><span class="caption-text">table2</span><a class="headerlink" href="#id6" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<figure class="align-default" id="id7">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/table3.png"><img alt="table3" class="bg-primary mb-1" src="../../_images/table3.png" style="width: 400px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 533 </span><span class="caption-text">table3</span><a class="headerlink" href="#id7" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p>ADM, LDM 보다 DiT-XL 모델이 FID, IS 점수가 좋음</p></li>
-</ul>
-</section>
-<section id="id1">
-<h3>DiT block design<a class="headerlink" href="#id1" title="Permalink to this heading">#</a></h3>
-<figure class="align-default" id="id8">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/fig5.png"><img alt="fig5" class="bg-primary mb-1" src="../../_images/fig5.png" style="width: 400px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 534 </span><span class="caption-text">fig5</span><a class="headerlink" href="#id8" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p>adaLN-Zero 구조가 적은 학습에도 FID 점수가 좋음</p></li>
-</ul>
-</section>
-<section id="scaling-model-size-and-patch-size">
-<h3>Scaling model size and patch size<a class="headerlink" href="#scaling-model-size-and-patch-size" title="Permalink to this heading">#</a></h3>
-<figure class="align-default" id="id9">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/fig6.png"><img alt="fig6" class="bg-primary mb-1" src="../../_images/fig6.png" style="width: 400px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 535 </span><span class="caption-text">fig6</span><a class="headerlink" href="#id9" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p>모델이 클수록 FID 점수가 좋음</p></li>
-</ul>
-<figure class="align-default" id="id10">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/fig7.png"><img alt="fig7" class="bg-primary mb-1" src="../../_images/fig7.png" style="width: 400px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 536 </span><span class="caption-text">fig7</span><a class="headerlink" href="#id10" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</section>
-<section id="dit-gflops-are-critical-to-improving-performance">
-<h3><strong>DiT Gflops are critical to improving performance</strong><a class="headerlink" href="#dit-gflops-are-critical-to-improving-performance" title="Permalink to this heading">#</a></h3>
-<figure class="align-default" id="id11">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/fig8.png"><img alt="fig8" class="bg-primary mb-1" src="../../_images/fig8.png" style="width: 400px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 537 </span><span class="caption-text">fig8</span><a class="headerlink" href="#id11" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p>Transformer Gflops 가 클수록 FID 점수가 좋음</p></li>
-</ul>
-</section>
-<section id="larger-dit-models-are-more-compute-efficient">
-<h3><strong>Larger DiT models are more compute-efficient</strong><a class="headerlink" href="#larger-dit-models-are-more-compute-efficient" title="Permalink to this heading">#</a></h3>
-<figure class="align-default" id="id12">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/fig9.png"><img alt="fig9" class="bg-primary mb-1" src="../../_images/fig9.png" style="width: 400px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 538 </span><span class="caption-text">fig9</span><a class="headerlink" href="#id12" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p>학습할 때의 Gflops 가 높을 수록 FID 점수가 좋음</p></li>
-</ul>
-</section>
-<section id="visualizing-scaling">
-<h3><strong>Visualizing scaling</strong><a class="headerlink" href="#visualizing-scaling" title="Permalink to this heading">#</a></h3>
-<figure class="align-default" id="id13">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/visual_result.png"><img alt="visual_result" class="bg-primary mb-1" src="../../_images/visual_result.png" style="width: 400px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 539 </span><span class="caption-text">visual_result</span><a class="headerlink" href="#id13" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p>패치 사이즈와 transformer 크기에 따른 생성 결과 확인</p>
-<ul>
-<li><p>패치가 작을수록, 모델 크기가 클수록 생성 결과가 좋음</p></li>
-</ul>
-</li>
-</ul>
-</section>
-</section>
-</section>
-<section class="tex2jax_ignore mathjax_ignore" id="conclusion">
-<h1>Conclusion<a class="headerlink" href="#conclusion" title="Permalink to this heading">#</a></h1>
-<ul class="simple">
-<li><p>Diffusion Transformers 는 간단한 transformer 기반 diffusion 모델</p>
-<ul>
-<li><p>이전 U-Net 모델보다 성능이 뛰어나고, transformer 모델의 scaling 특성을 우수하게 적용함</p></li>
-</ul>
-</li>
-<li><p>adaLN 을 통해 연산의 효율성도 가져옴</p></li>
-</ul>
-</section>
-
-    <script type="text/x-thebe-config">
-    {
-        requestKernel: true,
-        binderOptions: {
-            repo: "binder-examples/jupyter-stacks-datascience",
-            ref: "master",
-        },
-        codeMirrorConfig: {
-            theme: "abcdef",
-            mode: "python"
-        },
-        kernelOptions: {
-            name: "python3",
-            path: "./docs/review"
-        },
-        predefinedOutput: true
-    }
-    </script>
-    <script>kernelName = 'python3'</script>
-
-                </article>
-              
-
-              
-              
-              
-              
-                <footer class="prev-next-footer d-print-none">
-                  
-<div class="prev-next-area">
-    <a class="left-prev"
-       href="LLM_grounded_Diffusion.html"
-       title="previous page">
-      <i class="fa-solid fa-angle-left"></i>
-      <div class="prev-next-info">
-        <p class="prev-next-subtitle">previous</p>
-        <p class="prev-next-title">LLM Grounded Diffusion</p>
-      </div>
-    </a>
-    <a class="right-next"
-       href="one-step-image-translation.html"
-       title="next page">
-      <div class="prev-next-info">
-        <p class="prev-next-subtitle">next</p>
-        <p class="prev-next-title">One-Step Image Translation with Text-to-Image Models</p>
-      </div>
-      <i class="fa-solid fa-angle-right"></i>
-    </a>
-</div>
-                </footer>
-              
-            </div>
-            
-            
-              
-                <div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
-
-
-  <div class="sidebar-secondary-item">
-  <div class="page-toc tocsection onthispage">
-    <i class="fa-solid fa-list"></i> Contents
-  </div>
-  <nav class="bd-toc-nav page-toc">
-    <ul class="visible nav section-nav flex-column">
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#">DiT</a></li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#abstract">Abstract</a></li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">1. Introduction</a></li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#related-works">2. Related Works</a><ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#architecture-complexity">Architecture complexity</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#latent-diffusion-models">Latent diffusion models</a></li>
-</ul>
-</li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#diffusion-transformers">3. Diffusion Transformers</a><ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#diffusion-transformer-design-space">3.2. Diffusion Transformer Design Space</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#patchify">Patchify</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#dit-block-design">DiT block design</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#transformer-decoder">Transformer decoder</a></li>
-</ul>
-</li>
-</ul>
-</li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#experimental-setup">4. Experimental Setup</a><ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#training-setting">Training setting</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#evaluation">Evaluation</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#class-conditional-image-genertation-model">Class-conditional image genertation model 비교</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#id1">DiT block design</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#scaling-model-size-and-patch-size">Scaling model size and patch size</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#dit-gflops-are-critical-to-improving-performance"><strong>DiT Gflops are critical to improving performance</strong></a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#larger-dit-models-are-more-compute-efficient"><strong>Larger DiT models are more compute-efficient</strong></a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#visualizing-scaling"><strong>Visualizing scaling</strong></a></li>
-</ul>
-</li>
-</ul>
-</li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#conclusion">Conclusion</a></li>
-</ul>
-
-  </nav></div>
-
-</div></div>
-              
-            
-          </div>
-          <footer class="bd-footer-content">
-            
-<div class="bd-footer-content__inner container">
-  
-  <div class="footer-item">
-    
-<p class="component-author">
-By PseudoLab
-</p>
-
-  </div>
-  
-  <div class="footer-item">
-    
-
-  <p class="copyright">
-    
-      © Copyright 2022.
-      <br/>
-    
-  </p>
-
-  </div>
-  
-  <div class="footer-item">
-    
-  </div>
-  
-  <div class="footer-item">
-    
-  </div>
-  
-</div>
-          </footer>
-        
-
-      </main>
-    </div>
-  </div>
-  
-  <!-- Scripts loaded after <body> so the DOM is not blocked -->
-  <script src="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b"></script>
-<script src="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b"></script>
-
-  <footer class="bd-footer">
-  </footer>
-  </body>
+
+
+<!DOCTYPE html>
+
+
+<html lang="en" data-content_root="" >
+
+  <head>
+    <meta charset="utf-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.17.1: http://docutils.sourceforge.net/" />
+
+    <title>DiT &#8212; Text-to-Image Generation-feat-Diffusion</title>
+  
+  
+  
+  <script data-cfasync="false">
+    document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
+    document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
+  </script>
+  
+  <!-- Loaded before other Sphinx assets -->
+  <link href="../../_static/styles/theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+<link href="../../_static/styles/bootstrap.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+<link href="../../_static/styles/pydata-sphinx-theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+
+  
+  <link href="../../_static/vendor/fontawesome/6.5.2/css/all.min.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+  <link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.woff2" />
+<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.woff2" />
+<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.woff2" />
+
+    <link rel="stylesheet" type="text/css" href="../../_static/pygments.css" />
+    <link rel="stylesheet" href="../../_static/styles/sphinx-book-theme.css?digest=14f4ca6b54d191a8c7657f6c759bf11a5fb86285" type="text/css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/togglebutton.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/mystnb.4510f1fc1dee50b3e5859aac5469c37c29e427902b24a333a5f9fcb2f0b3ac41.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/sphinx-thebe.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/design-style.4045f2051d55cab465a707391d5b2007.min.css" />
+  
+  <!-- Pre-loaded scripts that we'll load fully later -->
+  <link rel="preload" as="script" href="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b" />
+<link rel="preload" as="script" href="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b" />
+  <script src="../../_static/vendor/fontawesome/6.5.2/js/all.min.js?digest=dfe6caa3a7d634c4db9b"></script>
+
+    <script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
+    <script src="../../_static/jquery.js"></script>
+    <script src="../../_static/underscore.js"></script>
+    <script src="../../_static/_sphinx_javascript_frameworks_compat.js"></script>
+    <script src="../../_static/doctools.js"></script>
+    <script src="../../_static/clipboard.min.js"></script>
+    <script src="../../_static/copybutton.js"></script>
+    <script src="../../_static/scripts/sphinx-book-theme.js?digest=5a5c038af52cf7bc1a1ec88eea08e6366ee68824"></script>
+    <script>let toggleHintShow = 'Click to show';</script>
+    <script>let toggleHintHide = 'Click to hide';</script>
+    <script>let toggleOpenOnPrint = 'true';</script>
+    <script src="../../_static/togglebutton.js"></script>
+    <script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
+    <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
+    <script src="../../_static/design-tabs.js"></script>
+    <script>const THEBE_JS_URL = "https://unpkg.com/thebe@0.8.2/lib/index.js"
+const thebe_selector = ".thebe,.cell"
+const thebe_selector_input = "pre"
+const thebe_selector_output = ".output, .cell_output"
+</script>
+    <script async="async" src="../../_static/sphinx-thebe.js"></script>
+    <script>window.MathJax = {"options": {"processHtmlClass": "tex2jax_process|mathjax_process|math|output_area"}}</script>
+    <script defer="defer" src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
+    <script>DOCUMENTATION_OPTIONS.pagename = 'docs/review/DiT';</script>
+    <link rel="shortcut icon" href="../../_static/PseudoLab_logo.png"/>
+    <link rel="index" title="Index" href="../../genindex.html" />
+    <link rel="search" title="Search" href="../../search.html" />
+    <link rel="next" title="One-Step Image Translation with Text-to-Image Models" href="one-step-image-translation.html" />
+    <link rel="prev" title="LLM Grounded Diffusion" href="LLM_grounded_Diffusion.html" />
+  <meta name="viewport" content="width=device-width, initial-scale=1"/>
+  <meta name="docsearch:language" content="en"/>
+  </head>
+  
+  
+  <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
+
+  
+  
+  <div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
+  
+  <div id="pst-scroll-pixel-helper"></div>
+  
+  <button type="button" class="btn rounded-pill" id="pst-back-to-top">
+    <i class="fa-solid fa-arrow-up"></i>Back to top</button>
+
+  
+  <input type="checkbox"
+          class="sidebar-toggle"
+          id="pst-primary-sidebar-checkbox"/>
+  <label class="overlay overlay-primary" for="pst-primary-sidebar-checkbox"></label>
+  
+  <input type="checkbox"
+          class="sidebar-toggle"
+          id="pst-secondary-sidebar-checkbox"/>
+  <label class="overlay overlay-secondary" for="pst-secondary-sidebar-checkbox"></label>
+  
+  <div class="search-button__wrapper">
+    <div class="search-button__overlay"></div>
+    <div class="search-button__search-container">
+<form class="bd-search d-flex align-items-center"
+      action="../../search.html"
+      method="get">
+  <i class="fa-solid fa-magnifying-glass"></i>
+  <input type="search"
+         class="form-control"
+         name="q"
+         id="search-input"
+         placeholder="Search this book..."
+         aria-label="Search this book..."
+         autocomplete="off"
+         autocorrect="off"
+         autocapitalize="off"
+         spellcheck="false"/>
+  <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
+</form></div>
+  </div>
+
+  <div class="pst-async-banner-revealer d-none">
+  <aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
+</div>
+
+  
+    <header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
+    </header>
+  
+
+  <div class="bd-container">
+    <div class="bd-container__inner bd-page-width">
+      
+      
+      
+      <div class="bd-sidebar-primary bd-sidebar">
+        
+
+  
+  <div class="sidebar-header-items sidebar-primary__section">
+    
+    
+    
+    
+  </div>
+  
+    <div class="sidebar-primary-items__start sidebar-primary__section">
+        <div class="sidebar-primary-item">
+
+  
+    
+  
+
+<a class="navbar-brand logo" href="../../intro.html">
+  
+  
+  
+  
+  
+    
+    
+      
+    
+    
+    <img src="../../_static/PseudoLab_logo.png" class="logo__image only-light" alt="Text-to-Image Generation-feat-Diffusion - Home"/>
+    <script>document.write(`<img src="../../_static/PseudoLab_logo.png" class="logo__image only-dark" alt="Text-to-Image Generation-feat-Diffusion - Home"/>`);</script>
+  
+  
+</a></div>
+        <div class="sidebar-primary-item"><nav class="bd-links" id="bd-docs-nav" aria-label="Main">
+    <div class="bd-toc-item navbar-nav active">
+        
+        <ul class="nav bd-sidenav bd-sidenav__home-link">
+            <li class="toctree-l1">
+                <a class="reference internal" href="../../intro.html">
+                    Welcome to PseudoDiffusers!!
+                </a>
+            </li>
+        </ul>
+        <p aria-level="2" class="caption" role="heading"><span class="caption-text">Preliminary Works</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="vae.html">VAE</a></li>
+<li class="toctree-l1"><a class="reference internal" href="gan.html">GAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DDPM.html">DDPM</a></li>
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DDIM.html">DDIM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="A_Study_on_the_Evaluation_of_Generative_Models.html">A Study on the Evaluation of Generative Models</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Image Generation</span></p>
+<ul class="current nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="cycleGAN.html">CycleGAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="StyleGAN.html">StyleGAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="diffusion_beats_GANs.html">Diffusion Models Beat GANs on Image Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="dalle.html">DALL-E</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DALLE2.html">DALL-E 2</a></li>
+<li class="toctree-l1"><a class="reference internal" href="dreambooth.html">DreamBooth</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ControlNet.html">ControlNet</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Latent_Diffusion_Model.html">Introduction</a></li>
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Textual_Inversion.html">Textual Inversion</a></li>
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="CustomDiffusion.html">Custom Diffusion</a></li>
+<li class="toctree-l1"><a class="reference internal" href="LoRA.html">LoRA</a></li>
+
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="I-DDPM.html">I-DDPM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="StyO.html">StyO</a></li>
+<li class="toctree-l1"><a class="reference internal" href="imagen.html">Imagen</a></li>
+<li class="toctree-l1"><a class="reference internal" href="imagen_editor.html">Imagen Editor</a></li>
+<li class="toctree-l1"><a class="reference internal" href="SDEdit.html">SDEdit</a></li>
+<li class="toctree-l1"><a class="reference internal" href="SDXL.html">SDXL</a></li>
+<li class="toctree-l1"><a class="reference internal" href="t2i_adapter.html">T2I-Adapter</a></li>
+<li class="toctree-l1"><a class="reference internal" href="IP_Adapter.html">IP-Adapter</a></li>
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="HyperDreamBooth.html">HyperDreamBooth</a></li>
+<li class="toctree-l1"><a class="reference internal" href="CM3leon.html">CM3leon</a></li>
+
+<li class="toctree-l1"><a class="reference internal" href="Synthetic_Data_from_Diffusion_Models_Improves_ImageNet_Classification.html">Synthetic Data from Diffusion Models Improves ImageNet Classification</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="GLIDE.html">GLIDE</a></li>
+<li class="toctree-l1"><a class="reference internal" href="BBDM.html">BBDM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Your_Diffusion_Model_is_Secretly_a_Zero_Shot_Classifier.html">Your Diffusion Model is Secretly a Zero-Shot Classifier</a></li>
+<li class="toctree-l1"><a class="reference internal" href="progressive_distillation.html">Progressive Distillation for Fast Sampling of Diffusion Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ConceptLab.html">ConceptLab</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Diffusion_models_already_have_a_Semantic_Latent_Space.html">Diffusion Models already have a Semantic Latent Space</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Muse.html">Muse</a></li>
+
+
+<li class="toctree-l1"><a class="reference internal" href="GIGAGAN.html">Scaling up GANs for Text-to-Image Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="consistency_models.html">Consistency Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="latent_consistency_models.html">Latent Consistency Models</a></li>
+
+<li class="toctree-l1"><a class="reference internal" href="LLM_grounded_Diffusion.html">LLM Grounded Diffusion</a></li>
+<li class="toctree-l1 current active"><a class="current reference internal" href="#">DiT</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="one-step-image-translation.html">One-Step Image Translation with Text-to-Image Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="LCM-LoRA.html">LCM-LoRA: A Universal Stable-Diffusion Acceleration Module</a></li>
+
+
+<li class="toctree-l1"><a class="reference internal" href="MimicBrush.html">MimicBrush: Zero-shot Image Editing with Reference Imitation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="one_step_diffusion_with_distribution_matching_distillation.html">One-step Diffusion with Distribution Matching Distillation</a></li>
+
+
+
+
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Video Generation</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="Make_A_Video.html">Make A Video</a></li>
+<li class="toctree-l1"><a class="reference internal" href="VideoLDM.html">VideoLDM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="AnimateDiff.html">AnimateDiff</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Animate_Anyone.html">Animate Anyone</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreaMoving.html">DreaMoving</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreamPose.html">DreamPose: Fashion Image-to-Video Synthesis via Stable Diffusion</a></li>
+
+
+
+
+
+
+
+
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">3D Generation</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="NeRF.html">NeRF : Representing Scenes as Neural Radiance Fields for View Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="3DGS.html">3D Gaussian Splatting for Real-Time Radiance Field Rendering</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Point_E.html">Point-E: A System for Generating 3D Point Clouds from Complex Prompts (Arxiv 2022)</a></li>
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Shap-E.html">Shap-E</a></li>
+
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DreamFusion.html"><strong>DreamFusion</strong></a></li>
+<li class="toctree-l1"><a class="reference internal" href="magic-3d.html">Magic3D</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreamBooth3D.html">Dream Booth 3D</a></li>
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="zero123plus.html">Zero123++</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ProlificDreamer.html">ProlificDreamer: High-Fidelity and Diverse Text-to-3D Generation with Variational Score Distillation</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DreamGaussian.html">DreamGaussian</a></li>
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Coin3D.html">Coin3D</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Experiments</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="../experiments/js_exp.html">Synthetic Data with Stable Diffusion for Foliar Disease Classification</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../experiments/swjo_exp.html">Training DreamBooth on Naver Webtoon Face Dataset</a></li>
+</ul>
+
+    </div>
+</nav></div>
+    </div>
+  
+  
+  <div class="sidebar-primary-items__end sidebar-primary__section">
+  </div>
+  
+  <div id="rtd-footer-container"></div>
+
+
+      </div>
+      
+      <main id="main-content" class="bd-main" role="main">
+        
+        
+
+<div class="sbt-scroll-pixel-helper"></div>
+
+          <div class="bd-content">
+            <div class="bd-article-container">
+              
+              <div class="bd-header-article d-print-none">
+<div class="header-article-items header-article__inner">
+  
+    <div class="header-article-items__start">
+      
+        <div class="header-article-item"><label class="sidebar-toggle primary-toggle btn btn-sm" for="__primary" title="Toggle primary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
+  <span class="fa-solid fa-bars"></span>
+</label></div>
+      
+    </div>
+  
+  
+    <div class="header-article-items__end">
+      
+        <div class="header-article-item">
+
+<div class="article-header-buttons">
+
+
+
+
+
+<div class="dropdown dropdown-source-buttons">
+  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Source repositories">
+    <i class="fab fa-github"></i>
+  </button>
+  <ul class="dropdown-menu">
+      
+      
+      
+      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion" target="_blank"
+   class="btn btn-sm btn-source-repository-button dropdown-item"
+   title="Source repository"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fab fa-github"></i>
+  </span>
+<span class="btn__text-container">Repository</span>
+</a>
+</li>
+      
+      
+      
+      
+      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion/issues/new?title=Issue%20on%20page%20%2Fdocs/review/DiT.html&body=Your%20issue%20content%20here." target="_blank"
+   class="btn btn-sm btn-source-issues-button dropdown-item"
+   title="Open an issue"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-lightbulb"></i>
+  </span>
+<span class="btn__text-container">Open issue</span>
+</a>
+</li>
+      
+  </ul>
+</div>
+
+
+
+
+
+
+<div class="dropdown dropdown-download-buttons">
+  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Download this page">
+    <i class="fas fa-download"></i>
+  </button>
+  <ul class="dropdown-menu">
+      
+      
+      
+      <li><a href="../../_sources/docs/review/DiT.md" target="_blank"
+   class="btn btn-sm btn-download-source-button dropdown-item"
+   title="Download source file"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-file"></i>
+  </span>
+<span class="btn__text-container">.md</span>
+</a>
+</li>
+      
+      
+      
+      
+      <li>
+<button onclick="window.print()"
+  class="btn btn-sm btn-download-pdf-button dropdown-item"
+  title="Print to PDF"
+  data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-file-pdf"></i>
+  </span>
+<span class="btn__text-container">.pdf</span>
+</button>
+</li>
+      
+  </ul>
+</div>
+
+
+
+
+<button onclick="toggleFullScreen()"
+  class="btn btn-sm btn-fullscreen-button"
+  title="Fullscreen mode"
+  data-bs-placement="bottom" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-expand"></i>
+  </span>
+
+</button>
+
+
+
+<script>
+document.write(`
+  <button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light"></i>
+    <i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark"></i>
+    <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"></i>
+  </button>
+`);
+</script>
+
+
+<script>
+document.write(`
+  <button class="btn btn-sm pst-navbar-icon search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <i class="fa-solid fa-magnifying-glass fa-lg"></i>
+  </button>
+`);
+</script>
+<label class="sidebar-toggle secondary-toggle btn btn-sm" for="__secondary"title="Toggle secondary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <span class="fa-solid fa-list"></span>
+</label>
+</div></div>
+      
+    </div>
+  
+</div>
+</div>
+              
+              
+
+<div id="jb-print-docs-body" class="onlyprint">
+    <h1>DiT</h1>
+    <!-- Table of contents -->
+    <div id="print-main-content">
+        <div id="jb-print-toc">
+            
+            <div>
+                <h2> Contents </h2>
+            </div>
+            <nav aria-label="Page">
+                <ul class="visible nav section-nav flex-column">
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#">DiT</a></li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#abstract">Abstract</a></li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">1. Introduction</a></li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#related-works">2. Related Works</a><ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#architecture-complexity">Architecture complexity</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#latent-diffusion-models">Latent diffusion models</a></li>
+</ul>
+</li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#diffusion-transformers">3. Diffusion Transformers</a><ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#diffusion-transformer-design-space">3.2. Diffusion Transformer Design Space</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#patchify">Patchify</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#dit-block-design">DiT block design</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#transformer-decoder">Transformer decoder</a></li>
+</ul>
+</li>
+</ul>
+</li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#experimental-setup">4. Experimental Setup</a><ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#training-setting">Training setting</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#evaluation">Evaluation</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#class-conditional-image-genertation-model">Class-conditional image genertation model 비교</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#id1">DiT block design</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#scaling-model-size-and-patch-size">Scaling model size and patch size</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#dit-gflops-are-critical-to-improving-performance"><strong>DiT Gflops are critical to improving performance</strong></a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#larger-dit-models-are-more-compute-efficient"><strong>Larger DiT models are more compute-efficient</strong></a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#visualizing-scaling"><strong>Visualizing scaling</strong></a></li>
+</ul>
+</li>
+</ul>
+</li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#conclusion">Conclusion</a></li>
+</ul>
+
+            </nav>
+        </div>
+    </div>
+</div>
+
+              
+                
+<div id="searchbox"></div>
+                <article class="bd-article">
+                  
+  <div class="admonition-information admonition">
+<p class="admonition-title">Information</p>
+<ul class="simple">
+<li><p><strong>Title:</strong> Scalable Diffusion Models with Transformers (ICCV 2023)</p></li>
+<li><p><strong>Reference</strong></p>
+<ul>
+<li><p>Paper: <a class="reference external" href="https://arxiv.org/abs/2212.09748">https://arxiv.org/abs/2212.09748</a></p></li>
+<li><p>Code: <a class="github reference external" href="https://github.com/facebookresearch/DiT">facebookresearch/DiT</a></p></li>
+<li><p>Project Page : <a class="reference external" href="https://www.wpeebles.com/DiT">https://www.wpeebles.com/DiT</a></p></li>
+</ul>
+</li>
+<li><p><strong>Author:</strong> Junhyoung Lee</p></li>
+<li><p><strong>Last updated on July. 20, 2024</strong></p></li>
+</ul>
+</div>
+<section class="tex2jax_ignore mathjax_ignore" id="dit">
+<h1>DiT<a class="headerlink" href="#dit" title="Permalink to this heading">#</a></h1>
+</section>
+<section class="tex2jax_ignore mathjax_ignore" id="abstract">
+<h1>Abstract<a class="headerlink" href="#abstract" title="Permalink to this heading">#</a></h1>
+<ul class="simple">
+<li><p>LDM 의 U-Net 백본을 transformer 로 바꾸어 학습을 진행한 diffusion model</p></li>
+<li><p>Gflops 측정: depth/width 증가 or 입력 토큰 수 증가로 Gflops 가 높게 나타났음 (FID 낮게 유지한 상태)</p></li>
+<li><p>우수한 scalability 특성을 보유하며, <span class="math notranslate nohighlight">\(\text{DiT-XL/2}\)</span> 모델은 class conditional ImageNet 벤치마크에서 이전의 생성 모델에 비해 성능이 뛰어났음 (FID 2.27)</p></li>
+</ul>
+</section>
+<section class="tex2jax_ignore mathjax_ignore" id="introduction">
+<h1>1. Introduction<a class="headerlink" href="#introduction" title="Permalink to this heading">#</a></h1>
+<ul class="simple">
+<li><p>Diffusion 모델이 이미지 생성 모델의 트렌드를 유지하고 있지만, convolution 네트워크인 U-Net 모델 사용</p>
+<ul>
+<li><p>transformers 는 autoregressive 모델에서 사용되고 있었음</p></li>
+</ul>
+</li>
+<li><p>초기에는 픽셀 레벨의 autoregressive model과 conditional GAN에서 U-Net 이 성공을 이끌었음</p></li>
+<li><p>DDPM 에서는 ResNet 블럭이 주요하게 구성됨</p>
+<ul>
+<li><p>반면, transformer 에서는 spatial self-attention 블럭이 구성되어 있고, 저해상도에 포함됨</p></li>
+</ul>
+</li>
+<li><p>ADM</p>
+<ul>
+<li><p>classifier-guidance diffusion model</p></li>
+<li><p>adaptive normalization layer 와 같은 U-Net 에서 선택적으로 제거</p></li>
+</ul>
+</li>
+<li><p>저자들은 diffusion 모델의 아키텍처 구성의 중요성을 밝혔음</p>
+<ul>
+<li><p>U-Net 의 inductive bias 가 diffusion 모델의 성능의 영향을 끼치지 않고, transformer 로 쉽게 대체 가능함</p></li>
+<li><p>transformer 를 기반으로 새로운 종류의 diffusion model 을 제안 → Diffusion Transformer (DiT)</p></li>
+</ul>
+</li>
+<li><p>DiT는 ViT를 준수하며, 기존 convolution 네트워크보다 시각적 인식(visual recognition)을 위해 더 효과적으로 확장되는 것으로 나타남</p></li>
+</ul>
+</section>
+<section class="tex2jax_ignore mathjax_ignore" id="related-works">
+<h1>2. Related Works<a class="headerlink" href="#related-works" title="Permalink to this heading">#</a></h1>
+<section id="architecture-complexity">
+<h2>Architecture complexity<a class="headerlink" href="#architecture-complexity" title="Permalink to this heading">#</a></h2>
+<ul class="simple">
+<li><p>아키텍처의 복잡성(complexity)를 측정할 때, 보통 파라미터 수를 측정함</p>
+<ul>
+<li><p>하지만, 이미지 해상도 등을 고려하지 않아 모델의 복잡성을 제대로 측정할 수 없다고 주장</p></li>
+<li><p>저자들은 Gflops 를 통해 모델의 복잡성을 분석함</p></li>
+</ul>
+</li>
+</ul>
+</section>
+<section id="latent-diffusion-models">
+<h2>Latent diffusion models<a class="headerlink" href="#latent-diffusion-models" title="Permalink to this heading">#</a></h2>
+<ul>
+<li><p>픽셀 공간에서 diffusion 모델을 학습하는 것은 연산적으로 어려움</p></li>
+<li><p>LDM</p>
+<ol class="arabic simple">
+<li><p>learned encoder <span class="math notranslate nohighlight">\(E\)</span> 로 이미지를 작은 해상도로 압축하는 autoencoder 를 학습</p></li>
+<li><p>픽셀 공간의 이미지 <span class="math notranslate nohighlight">\(x\)</span>의 diffusion 모델 대신, 압축된  <span class="math notranslate nohighlight">\(z = E(x)\)</span> 의 diffusion 모델 학습 (<span class="math notranslate nohighlight">\(E\)</span>는 고정)</p></li>
+</ol>
+<ul class="simple">
+<li><p>→ 이후 diffusion 모델에서 <span class="math notranslate nohighlight">\(z\)</span> 를 샘플링하고 학습된 디코더 <span class="math notranslate nohighlight">\(x = D(z)\)</span> 를 사용하여 이미지로 디코딩하여 새로운 이미지를 생성함</p></li>
+<li><p>LDM은 ADM과 같은 픽셀 공간 diffusion 모델보다 훨씬 적은 Gflops를 사용하면서도 우수한 성능을 달성</p></li>
+</ul>
+</li>
+<li><p>DiT</p>
+<ul class="simple">
+<li><p>저자들은 latent 공간에서 DiT를 적용했고, 픽셀 공간에서도 수정 없이 적용할 수 있음</p></li>
+</ul>
+</li>
+</ul>
+</section>
+</section>
+<section class="tex2jax_ignore mathjax_ignore" id="diffusion-transformers">
+<h1>3. Diffusion Transformers<a class="headerlink" href="#diffusion-transformers" title="Permalink to this heading">#</a></h1>
+<section id="diffusion-transformer-design-space">
+<h2>3.2. Diffusion Transformer Design Space<a class="headerlink" href="#diffusion-transformer-design-space" title="Permalink to this heading">#</a></h2>
+<ul class="simple">
+<li><p>scaling 속성을 유지하기 위해 standard transformer 구조를 따르도록 설계</p></li>
+<li><p>이미지(spatial representations)의 DDPM을 학습하기 때문에,  패치를 연산하는 ViT 구조로 설계됨</p></li>
+</ul>
+<figure class="align-default" id="id2">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/architecture.png"><img alt="architecture" class="bg-primary mb-1" src="../../_images/architecture.png" style="width: 400px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 528 </span><span class="caption-text">architecture</span><a class="headerlink" href="#id2" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<section id="patchify">
+<h3>Patchify<a class="headerlink" href="#patchify" title="Permalink to this heading">#</a></h3>
+<figure class="align-default" id="id3">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/fig4.png"><img alt="fig4" class="bg-primary mb-1" src="../../_images/fig4.png" style="width: 400px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 529 </span><span class="caption-text">fig4</span><a class="headerlink" href="#id3" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p>DiT 에 spatial representation <span class="math notranslate nohighlight">\(z\)</span> (256x256x3 이미지에서는 <span class="math notranslate nohighlight">\(z\)</span> 는 32x32x4 로 표현) 가 입력됨</p></li>
+<li><p>“patchify” : <span class="math notranslate nohighlight">\(z\)</span> → <span class="math notranslate nohighlight">\(d\)</span> 차원의 <span class="math notranslate nohighlight">\(T\)</span> tokens 로 변환 + positional embedding</p>
+<ul>
+<li><p><span class="math notranslate nohighlight">\(p\)</span> 에 따라 토큰 수가 달라짐 (<span class="math notranslate nohighlight">\(p\)</span> = 2, 4, 8)</p></li>
+<li><p><span class="math notranslate nohighlight">\(p\)</span> 를 반으로 줄이면 <span class="math notranslate nohighlight">\(T\)</span> 가 4배가 되므로 총 transformer Gflops 는 최소 4배가 되어 상당한 영향을 끼침</p></li>
+</ul>
+</li>
+</ul>
+</section>
+<section id="dit-block-design">
+<h3>DiT block design<a class="headerlink" href="#dit-block-design" title="Permalink to this heading">#</a></h3>
+<ul class="simple">
+<li><p>Diffusion 모델은 보통 noised image 입력과 noise timestep <span class="math notranslate nohighlight">\(t\)</span>, class label <span class="math notranslate nohighlight">\(c\)</span> 의 벡터 임베딩 값을 conditional 정보로 입력 시퀀스에 더해줌</p></li>
+<li><p>다른 conditional inputs 으로 처리하는 4개의 transformer block을 진행함</p></li>
+</ul>
+<ol class="arabic simple">
+<li><p><em><strong>In-context conditioning</strong></em></p>
+<ul class="simple">
+<li><p><span class="math notranslate nohighlight">\(t\)</span>, <span class="math notranslate nohighlight">\(c\)</span> 를 추가의 토큰으로 더하는 방식 → 이미지 토큰과 동일하게 처리함</p>
+<ul>
+<li><p>ViT 의 <span class="math notranslate nohighlight">\(\text {cls}\)</span> 토큰과 유사한 방식</p></li>
+</ul>
+</li>
+<li><p>마지막 블럭에서는 conditioning 토큰을 제거함</p></li>
+</ul>
+</li>
+<li><p><em><strong>Cross-attention block</strong></em></p>
+<ul class="simple">
+<li><p>이미지 토큰과 별도로 <span class="math notranslate nohighlight">\(t\)</span>, <span class="math notranslate nohighlight">\(c\)</span> 를 concat 함</p></li>
+<li><p>self-attention 다음에 cross attention 을 포함하도록 구조를 수정함</p></li>
+<li><p>cross-attention 연산이 15% 오버헤드로 모델에 가장 많은 Gflops를 추가함</p></li>
+</ul>
+</li>
+<li><p><em><strong>Adaptive layer norm (adaLN) block</strong></em></p>
+<ul class="simple">
+<li><p>Transformer 블럭의 standard layer norm layer 를 adaptive layer norm (adaLN) 으로 교체함</p></li>
+<li><p>직접 scale <span class="math notranslate nohighlight">\(\gamma\)</span>, shift <span class="math notranslate nohighlight">\(\beta\)</span> 파라미터를 학습하는 것보다, <span class="math notranslate nohighlight">\(t\)</span> 와 <span class="math notranslate nohighlight">\(c\)</span> 벡터 임베딩 값의 합으로부터 회귀하도록 설계함</p></li>
+<li><p>최소한의 Gflops 를 추가하므로 연산에 효율적임</p></li>
+<li><p>모든 토큰에 대해 같은 function을 적용함으로써 conditioning 하는 방법</p></li>
+</ul>
+</li>
+<li><p><em><strong>adaLN-Zero block</strong></em></p>
+<ul class="simple">
+<li><p>ResNet: 각 residual block 의 initializing 이 identity function 이 효과적이라는 것을 증명함</p>
+<ul>
+<li><p>각 블럭의 마지막 batch norm scale factor <span class="math notranslate nohighlight">\(r\)</span> 를 0으로 초기화 하는 것이 large-scale 학습에 좋다는 것을 발견함</p></li>
+</ul>
+</li>
+<li><p>Diffusion U-Net 모델은 비슷한 초기화 전략을 사용하는데, residual connection 전에 각 블럭의 최종 convolutional layer 를 0으로 초기화함</p></li>
+<li><p><span class="math notranslate nohighlight">\(\gamma, \beta\)</span> 를 회귀하는 것 외에도 DiT 블럭 내의 residual connection 전에 적용되는 dimension 별 scaling <span class="math notranslate nohighlight">\(\alpha\)</span> 를 회귀함</p></li>
+<li><p>모든 <span class="math notranslate nohighlight">\(\alpha\)</span> 에 대해 영벡터를 출력하도록 MLP를 초기화함 → 전체 DiT 블록을 identity function 으로 초기화하게됨</p></li>
+<li><p>adaLN 블록과 비슷하게 adaLN-Zero 는 Gflops 에 영향을 끼치지 않음</p></li>
+</ul>
+</li>
+</ol>
+</section>
+<section id="transformer-decoder">
+<h3>Transformer decoder<a class="headerlink" href="#transformer-decoder" title="Permalink to this heading">#</a></h3>
+<figure class="align-default" id="id4">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/architecture2.png"><img alt="architecture2" class="bg-primary mb-1" src="../../_images/architecture2.png" style="width: 400px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 530 </span><span class="caption-text">architecture2</span><a class="headerlink" href="#id4" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p>최종 DiT 블록을 통과한 후, 이미지 토큰 시퀀스를 예측된 noise 값과 covariance 값 디코딩해야함</p>
+<ol class="arabic simple">
+<li><p><span class="math notranslate nohighlight">\(\text{LayerNorm}\)</span> (adaLN을 사용하는 경우 adaLN)</p></li>
+<li><p><span class="math notranslate nohighlight">\(\text{Linear, Reshape}\)</span></p></li>
+<li><p><span class="math notranslate nohighlight">\(\text{VAE}\)</span> decoder → “output shape = input image shape”</p></li>
+</ol>
+</li>
+<li><p>Q) covariance 값은 왜 예측하는지?</p>
+<ul>
+<li><p><a class="reference external" href="https://kyujinpy.tistory.com/132#:~:text=%EA%B7%B8%20%EC%9D%B4%ED%9B%84%2C%20VAE%20decoder%EC%97%90%20noise%20%EA%B0%92%EC%9D%84%20%EB%84%A3%EC%96%B4%EC%84%9C%20%EC%8B%A4%EC%A0%9C%20%EC%9D%B4%EB%AF%B8%EC%A7%80%EB%A5%BC%20%EC%83%9D%EC%84%B1%ED%95%9C%EB%8B%A4.">참고 링크</a></p></li>
+<li><p>ADM 학습과 연관됨</p>
+<ul>
+<li><p>Noise 차이 loss 이외의 분산도 학습을 진행했음 → vlb_loss 활용</p></li>
+</ul>
+</li>
+</ul>
+</li>
+</ul>
+</section>
+</section>
+</section>
+<section class="tex2jax_ignore mathjax_ignore" id="experimental-setup">
+<h1>4. Experimental Setup<a class="headerlink" href="#experimental-setup" title="Permalink to this heading">#</a></h1>
+<ul class="simple">
+<li><p>DiT models</p></li>
+</ul>
+<figure class="align-default" id="id5">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/table11.png"><img alt="table1" class="bg-primary mb-1" src="../../_images/table11.png" style="width: 400px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 531 </span><span class="caption-text">table1</span><a class="headerlink" href="#id5" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<section id="training-setting">
+<h2>Training setting<a class="headerlink" href="#training-setting" title="Permalink to this heading">#</a></h2>
+<ul class="simple">
+<li><p><span class="math notranslate nohighlight">\(\text {DiT-XL/2}\)</span>:  <span class="math notranslate nohighlight">\(\text{XLarge}\)</span> config and  <span class="math notranslate nohighlight">\(p = 2\)</span>.</p></li>
+<li><p>class-conditional latent DiT models</p></li>
+<li><p>해상도: 256x256, 512x512</p></li>
+<li><p>데이터 셋: ImageNet</p></li>
+<li><p>마지막 linear layer 는 0으로 초기화, 나머지는 standard weight 초기화</p></li>
+<li><p>Optimizer: AdamW</p></li>
+<li><p>Learning rate: <span class="math notranslate nohighlight">\(1 × 10^{−4}\)</span></p></li>
+<li><p>Batch size: 256</p></li>
+<li><p>Augmentation: horizontal flip</p></li>
+<li><p>exponential moving average (EMA): 0.999</p></li>
+<li><p>Diffusion: Stable Diffusion 의 VAE 사용</p></li>
+</ul>
+</section>
+<section id="evaluation">
+<h2>Evaluation<a class="headerlink" href="#evaluation" title="Permalink to this heading">#</a></h2>
+<section id="class-conditional-image-genertation-model">
+<h3>Class-conditional image genertation model 비교<a class="headerlink" href="#class-conditional-image-genertation-model" title="Permalink to this heading">#</a></h3>
+<figure class="align-default" id="id6">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/table11.png"><img alt="table2" class="bg-primary mb-1" src="../../_images/table11.png" style="width: 400px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 532 </span><span class="caption-text">table2</span><a class="headerlink" href="#id6" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<figure class="align-default" id="id7">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/table3.png"><img alt="table3" class="bg-primary mb-1" src="../../_images/table3.png" style="width: 400px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 533 </span><span class="caption-text">table3</span><a class="headerlink" href="#id7" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p>ADM, LDM 보다 DiT-XL 모델이 FID, IS 점수가 좋음</p></li>
+</ul>
+</section>
+<section id="id1">
+<h3>DiT block design<a class="headerlink" href="#id1" title="Permalink to this heading">#</a></h3>
+<figure class="align-default" id="id8">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/fig5.png"><img alt="fig5" class="bg-primary mb-1" src="../../_images/fig5.png" style="width: 400px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 534 </span><span class="caption-text">fig5</span><a class="headerlink" href="#id8" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p>adaLN-Zero 구조가 적은 학습에도 FID 점수가 좋음</p></li>
+</ul>
+</section>
+<section id="scaling-model-size-and-patch-size">
+<h3>Scaling model size and patch size<a class="headerlink" href="#scaling-model-size-and-patch-size" title="Permalink to this heading">#</a></h3>
+<figure class="align-default" id="id9">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/fig6.png"><img alt="fig6" class="bg-primary mb-1" src="../../_images/fig6.png" style="width: 400px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 535 </span><span class="caption-text">fig6</span><a class="headerlink" href="#id9" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p>모델이 클수록 FID 점수가 좋음</p></li>
+</ul>
+<figure class="align-default" id="id10">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/fig7.png"><img alt="fig7" class="bg-primary mb-1" src="../../_images/fig7.png" style="width: 400px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 536 </span><span class="caption-text">fig7</span><a class="headerlink" href="#id10" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</section>
+<section id="dit-gflops-are-critical-to-improving-performance">
+<h3><strong>DiT Gflops are critical to improving performance</strong><a class="headerlink" href="#dit-gflops-are-critical-to-improving-performance" title="Permalink to this heading">#</a></h3>
+<figure class="align-default" id="id11">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/fig8.png"><img alt="fig8" class="bg-primary mb-1" src="../../_images/fig8.png" style="width: 400px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 537 </span><span class="caption-text">fig8</span><a class="headerlink" href="#id11" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p>Transformer Gflops 가 클수록 FID 점수가 좋음</p></li>
+</ul>
+</section>
+<section id="larger-dit-models-are-more-compute-efficient">
+<h3><strong>Larger DiT models are more compute-efficient</strong><a class="headerlink" href="#larger-dit-models-are-more-compute-efficient" title="Permalink to this heading">#</a></h3>
+<figure class="align-default" id="id12">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/fig9.png"><img alt="fig9" class="bg-primary mb-1" src="../../_images/fig9.png" style="width: 400px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 538 </span><span class="caption-text">fig9</span><a class="headerlink" href="#id12" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p>학습할 때의 Gflops 가 높을 수록 FID 점수가 좋음</p></li>
+</ul>
+</section>
+<section id="visualizing-scaling">
+<h3><strong>Visualizing scaling</strong><a class="headerlink" href="#visualizing-scaling" title="Permalink to this heading">#</a></h3>
+<figure class="align-default" id="id13">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/visual_result.png"><img alt="visual_result" class="bg-primary mb-1" src="../../_images/visual_result.png" style="width: 400px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 539 </span><span class="caption-text">visual_result</span><a class="headerlink" href="#id13" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p>패치 사이즈와 transformer 크기에 따른 생성 결과 확인</p>
+<ul>
+<li><p>패치가 작을수록, 모델 크기가 클수록 생성 결과가 좋음</p></li>
+</ul>
+</li>
+</ul>
+</section>
+</section>
+</section>
+<section class="tex2jax_ignore mathjax_ignore" id="conclusion">
+<h1>Conclusion<a class="headerlink" href="#conclusion" title="Permalink to this heading">#</a></h1>
+<ul class="simple">
+<li><p>Diffusion Transformers 는 간단한 transformer 기반 diffusion 모델</p>
+<ul>
+<li><p>이전 U-Net 모델보다 성능이 뛰어나고, transformer 모델의 scaling 특성을 우수하게 적용함</p></li>
+</ul>
+</li>
+<li><p>adaLN 을 통해 연산의 효율성도 가져옴</p></li>
+</ul>
+</section>
+
+    <script type="text/x-thebe-config">
+    {
+        requestKernel: true,
+        binderOptions: {
+            repo: "binder-examples/jupyter-stacks-datascience",
+            ref: "master",
+        },
+        codeMirrorConfig: {
+            theme: "abcdef",
+            mode: "python"
+        },
+        kernelOptions: {
+            name: "python3",
+            path: "./docs\review"
+        },
+        predefinedOutput: true
+    }
+    </script>
+    <script>kernelName = 'python3'</script>
+
+                </article>
+              
+
+              
+              
+              
+              
+                <footer class="prev-next-footer d-print-none">
+                  
+<div class="prev-next-area">
+    <a class="left-prev"
+       href="LLM_grounded_Diffusion.html"
+       title="previous page">
+      <i class="fa-solid fa-angle-left"></i>
+      <div class="prev-next-info">
+        <p class="prev-next-subtitle">previous</p>
+        <p class="prev-next-title">LLM Grounded Diffusion</p>
+      </div>
+    </a>
+    <a class="right-next"
+       href="one-step-image-translation.html"
+       title="next page">
+      <div class="prev-next-info">
+        <p class="prev-next-subtitle">next</p>
+        <p class="prev-next-title">One-Step Image Translation with Text-to-Image Models</p>
+      </div>
+      <i class="fa-solid fa-angle-right"></i>
+    </a>
+</div>
+                </footer>
+              
+            </div>
+            
+            
+              
+                <div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
+
+
+  <div class="sidebar-secondary-item">
+  <div class="page-toc tocsection onthispage">
+    <i class="fa-solid fa-list"></i> Contents
+  </div>
+  <nav class="bd-toc-nav page-toc">
+    <ul class="visible nav section-nav flex-column">
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#">DiT</a></li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#abstract">Abstract</a></li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">1. Introduction</a></li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#related-works">2. Related Works</a><ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#architecture-complexity">Architecture complexity</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#latent-diffusion-models">Latent diffusion models</a></li>
+</ul>
+</li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#diffusion-transformers">3. Diffusion Transformers</a><ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#diffusion-transformer-design-space">3.2. Diffusion Transformer Design Space</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#patchify">Patchify</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#dit-block-design">DiT block design</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#transformer-decoder">Transformer decoder</a></li>
+</ul>
+</li>
+</ul>
+</li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#experimental-setup">4. Experimental Setup</a><ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#training-setting">Training setting</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#evaluation">Evaluation</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#class-conditional-image-genertation-model">Class-conditional image genertation model 비교</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#id1">DiT block design</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#scaling-model-size-and-patch-size">Scaling model size and patch size</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#dit-gflops-are-critical-to-improving-performance"><strong>DiT Gflops are critical to improving performance</strong></a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#larger-dit-models-are-more-compute-efficient"><strong>Larger DiT models are more compute-efficient</strong></a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#visualizing-scaling"><strong>Visualizing scaling</strong></a></li>
+</ul>
+</li>
+</ul>
+</li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#conclusion">Conclusion</a></li>
+</ul>
+
+  </nav></div>
+
+</div></div>
+              
+            
+          </div>
+          <footer class="bd-footer-content">
+            
+<div class="bd-footer-content__inner container">
+  
+  <div class="footer-item">
+    
+<p class="component-author">
+By PseudoLab
+</p>
+
+  </div>
+  
+  <div class="footer-item">
+    
+
+  <p class="copyright">
+    
+      © Copyright 2022.
+      <br/>
+    
+  </p>
+
+  </div>
+  
+  <div class="footer-item">
+    
+  </div>
+  
+  <div class="footer-item">
+    
+  </div>
+  
+</div>
+          </footer>
+        
+
+      </main>
+    </div>
+  </div>
+  
+  <!-- Scripts loaded after <body> so the DOM is not blocked -->
+  <script src="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b"></script>
+<script src="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b"></script>
+
+  <footer class="bd-footer">
+  </footer>
+  </body>
 </html>
\ No newline at end of file
diff --git a/docs/review/Diffusion_models_already_have_a_Semantic_Latent_Space.html b/docs/review/Diffusion_models_already_have_a_Semantic_Latent_Space.html
old mode 100644
new mode 100755
index fc82bdea..dc64ba6a
--- a/docs/review/Diffusion_models_already_have_a_Semantic_Latent_Space.html
+++ b/docs/review/Diffusion_models_already_have_a_Semantic_Latent_Space.html
@@ -1,1031 +1,1051 @@
-
-
-<!DOCTYPE html>
-
-
-<html lang="en" data-content_root="" >
-
-  <head>
-    <meta charset="utf-8" />
-    <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.18.1: http://docutils.sourceforge.net/" />
-
-    <title>Diffusion Models already have a Semantic Latent Space &#8212; Text-to-Image Generation-feat-Diffusion</title>
-  
-  
-  
-  <script data-cfasync="false">
-    document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
-    document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
-  </script>
-  
-  <!-- Loaded before other Sphinx assets -->
-  <link href="../../_static/styles/theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-<link href="../../_static/styles/bootstrap.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-<link href="../../_static/styles/pydata-sphinx-theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-
-  
-  <link href="../../_static/vendor/fontawesome/6.5.2/css/all.min.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-  <link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.woff2" />
-<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.woff2" />
-<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.woff2" />
-
-    <link rel="stylesheet" type="text/css" href="../../_static/pygments.css" />
-    <link rel="stylesheet" href="../../_static/styles/sphinx-book-theme.css?digest=14f4ca6b54d191a8c7657f6c759bf11a5fb86285" type="text/css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/togglebutton.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/mystnb.4510f1fc1dee50b3e5859aac5469c37c29e427902b24a333a5f9fcb2f0b3ac41.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/sphinx-thebe.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/design-style.4045f2051d55cab465a707391d5b2007.min.css" />
-  
-  <!-- Pre-loaded scripts that we'll load fully later -->
-  <link rel="preload" as="script" href="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b" />
-<link rel="preload" as="script" href="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b" />
-  <script src="../../_static/vendor/fontawesome/6.5.2/js/all.min.js?digest=dfe6caa3a7d634c4db9b"></script>
-
-    <script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
-    <script src="../../_static/jquery.js"></script>
-    <script src="../../_static/underscore.js"></script>
-    <script src="../../_static/_sphinx_javascript_frameworks_compat.js"></script>
-    <script src="../../_static/doctools.js"></script>
-    <script src="../../_static/clipboard.min.js"></script>
-    <script src="../../_static/copybutton.js"></script>
-    <script src="../../_static/scripts/sphinx-book-theme.js?digest=5a5c038af52cf7bc1a1ec88eea08e6366ee68824"></script>
-    <script>let toggleHintShow = 'Click to show';</script>
-    <script>let toggleHintHide = 'Click to hide';</script>
-    <script>let toggleOpenOnPrint = 'true';</script>
-    <script src="../../_static/togglebutton.js"></script>
-    <script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
-    <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
-    <script src="../../_static/design-tabs.js"></script>
-    <script>const THEBE_JS_URL = "https://unpkg.com/thebe@0.8.2/lib/index.js"
-const thebe_selector = ".thebe,.cell"
-const thebe_selector_input = "pre"
-const thebe_selector_output = ".output, .cell_output"
-</script>
-    <script async="async" src="../../_static/sphinx-thebe.js"></script>
-    <script>window.MathJax = {"options": {"processHtmlClass": "tex2jax_process|mathjax_process|math|output_area"}}</script>
-    <script defer="defer" src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
-    <script>DOCUMENTATION_OPTIONS.pagename = 'docs/review/Diffusion_models_already_have_a_Semantic_Latent_Space';</script>
-    <link rel="shortcut icon" href="../../_static/PseudoLab_logo.png"/>
-    <link rel="index" title="Index" href="../../genindex.html" />
-    <link rel="search" title="Search" href="../../search.html" />
-    <link rel="next" title="Muse" href="Muse.html" />
-    <link rel="prev" title="ConceptLab" href="ConceptLab.html" />
-  <meta name="viewport" content="width=device-width, initial-scale=1"/>
-  <meta name="docsearch:language" content="en"/>
-  </head>
-  
-  
-  <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
-
-  
-  
-  <div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
-  
-  <div id="pst-scroll-pixel-helper"></div>
-  
-  <button type="button" class="btn rounded-pill" id="pst-back-to-top">
-    <i class="fa-solid fa-arrow-up"></i>Back to top</button>
-
-  
-  <input type="checkbox"
-          class="sidebar-toggle"
-          id="pst-primary-sidebar-checkbox"/>
-  <label class="overlay overlay-primary" for="pst-primary-sidebar-checkbox"></label>
-  
-  <input type="checkbox"
-          class="sidebar-toggle"
-          id="pst-secondary-sidebar-checkbox"/>
-  <label class="overlay overlay-secondary" for="pst-secondary-sidebar-checkbox"></label>
-  
-  <div class="search-button__wrapper">
-    <div class="search-button__overlay"></div>
-    <div class="search-button__search-container">
-<form class="bd-search d-flex align-items-center"
-      action="../../search.html"
-      method="get">
-  <i class="fa-solid fa-magnifying-glass"></i>
-  <input type="search"
-         class="form-control"
-         name="q"
-         id="search-input"
-         placeholder="Search this book..."
-         aria-label="Search this book..."
-         autocomplete="off"
-         autocorrect="off"
-         autocapitalize="off"
-         spellcheck="false"/>
-  <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
-</form></div>
-  </div>
-
-  <div class="pst-async-banner-revealer d-none">
-  <aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
-</div>
-
-  
-    <header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
-    </header>
-  
-
-  <div class="bd-container">
-    <div class="bd-container__inner bd-page-width">
-      
-      
-      
-      <div class="bd-sidebar-primary bd-sidebar">
-        
-
-  
-  <div class="sidebar-header-items sidebar-primary__section">
-    
-    
-    
-    
-  </div>
-  
-    <div class="sidebar-primary-items__start sidebar-primary__section">
-        <div class="sidebar-primary-item">
-
-  
-    
-  
-
-<a class="navbar-brand logo" href="../../intro.html">
-  
-  
-  
-  
-  
-    
-    
-      
-    
-    
-    <img src="../../_static/PseudoLab_logo.png" class="logo__image only-light" alt="Text-to-Image Generation-feat-Diffusion - Home"/>
-    <script>document.write(`<img src="../../_static/PseudoLab_logo.png" class="logo__image only-dark" alt="Text-to-Image Generation-feat-Diffusion - Home"/>`);</script>
-  
-  
-</a></div>
-        <div class="sidebar-primary-item"><nav class="bd-links" id="bd-docs-nav" aria-label="Main">
-    <div class="bd-toc-item navbar-nav active">
-        
-        <ul class="nav bd-sidenav bd-sidenav__home-link">
-            <li class="toctree-l1">
-                <a class="reference internal" href="../../intro.html">
-                    Welcome to PseudoDiffusers!!
-                </a>
-            </li>
-        </ul>
-        <p aria-level="2" class="caption" role="heading"><span class="caption-text">Preliminary Works</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="vae.html">VAE</a></li>
-<li class="toctree-l1"><a class="reference internal" href="gan.html">GAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DDPM.html">DDPM</a></li>
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="DDIM.html">DDIM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="A_Study_on_the_Evaluation_of_Generative_Models.html">A Study on the Evaluation of Generative Models</a></li>
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Image Generation</span></p>
-<ul class="current nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="cycleGAN.html">CycleGAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="StyleGAN.html">StyleGAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="diffusion_beats_GANs.html">Diffusion Models Beat GANs on Image Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="dalle.html">DALL-E</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DALLE2.html">DALL-E 2</a></li>
-<li class="toctree-l1"><a class="reference internal" href="dreambooth.html">DreamBooth</a></li>
-<li class="toctree-l1"><a class="reference internal" href="ControlNet.html">ControlNet</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Latent_Diffusion_Model.html">Introduction</a></li>
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="Textual_Inversion.html">Textual Inversion</a></li>
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="CustomDiffusion.html">Custom Diffusion</a></li>
-<li class="toctree-l1"><a class="reference internal" href="LoRA.html">LoRA</a></li>
-
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="I-DDPM.html">I-DDPM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="StyO.html">StyO</a></li>
-<li class="toctree-l1"><a class="reference internal" href="imagen.html">Imagen</a></li>
-<li class="toctree-l1"><a class="reference internal" href="imagen_editor.html">Imagen Editor</a></li>
-<li class="toctree-l1"><a class="reference internal" href="SDEdit.html">SDEdit</a></li>
-<li class="toctree-l1"><a class="reference internal" href="SDXL.html">SDXL</a></li>
-<li class="toctree-l1"><a class="reference internal" href="t2i_adapter.html">T2I-Adapter</a></li>
-<li class="toctree-l1"><a class="reference internal" href="IP_Adapter.html">IP-Adapter</a></li>
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="HyperDreamBooth.html">HyperDreamBooth</a></li>
-<li class="toctree-l1"><a class="reference internal" href="CM3leon.html">CM3leon</a></li>
-
-<li class="toctree-l1"><a class="reference internal" href="Synthetic_Data_from_Diffusion_Models_Improves_ImageNet_Classification.html">Synthetic Data from Diffusion Models Improves ImageNet Classification</a></li>
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="GLIDE.html">GLIDE</a></li>
-<li class="toctree-l1"><a class="reference internal" href="BBDM.html">BBDM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Your_Diffusion_Model_is_Secretly_a_Zero_Shot_Classifier.html">Your Diffusion Model is Secretly a Zero-Shot Classifier</a></li>
-<li class="toctree-l1"><a class="reference internal" href="progressive_distillation.html">Progressive Distillation for Fast Sampling of Diffusion Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="ConceptLab.html">ConceptLab</a></li>
-<li class="toctree-l1 current active"><a class="current reference internal" href="#">Diffusion Models already have a Semantic Latent Space</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Muse.html">Muse</a></li>
-
-
-<li class="toctree-l1"><a class="reference internal" href="GIGAGAN.html">Scaling up GANs for Text-to-Image Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="consistency_models.html">Consistency Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="latent_consistency_models.html">Latent Consistency Models</a></li>
-
-<li class="toctree-l1"><a class="reference internal" href="LLM_grounded_Diffusion.html">LLM Grounded Diffusion</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DiT.html">DiT</a></li>
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="one-step-image-translation.html">One-Step Image Translation with Text-to-Image Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="LCM-LoRA.html">LCM-LoRA: A Universal Stable-Diffusion Acceleration Module</a></li>
-
-
-<li class="toctree-l1"><a class="reference internal" href="MimicBrush.html">MimicBrush: Zero-shot Image Editing with Reference Imitation</a></li>
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Video Generation</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="Make_A_Video.html">Make A Video</a></li>
-<li class="toctree-l1"><a class="reference internal" href="VideoLDM.html">VideoLDM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="AnimateDiff.html">AnimateDiff</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Animate_Anyone.html">Animate Anyone</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreaMoving.html">DreaMoving</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreamPose.html">DreamPose: Fashion Image-to-Video Synthesis via Stable Diffusion</a></li>
-
-
-
-
-
-
-
-
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">3D Generation</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="NeRF.html">NeRF : Representing Scenes as Neural Radiance Fields for View Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="3DGS.html">3D Gaussian Splatting for Real-Time Radiance Field Rendering</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Point_E.html">Point-E: A System for Generating 3D Point Clouds from Complex Prompts (Arxiv 2022)</a></li>
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="Shap-E.html">Shap-E</a></li>
-
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="DreamFusion.html"><strong>DreamFusion</strong></a></li>
-<li class="toctree-l1"><a class="reference internal" href="magic-3d.html">Magic3D</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreamBooth3D.html">Dream Booth 3D</a></li>
-
-
-
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Experiments</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="../experiments/js_exp.html">Synthetic Data with Stable Diffusion for Foliar Disease Classification</a></li>
-<li class="toctree-l1"><a class="reference internal" href="../experiments/swjo_exp.html">Training DreamBooth on Naver Webtoon Face Dataset</a></li>
-</ul>
-
-    </div>
-</nav></div>
-    </div>
-  
-  
-  <div class="sidebar-primary-items__end sidebar-primary__section">
-  </div>
-  
-  <div id="rtd-footer-container"></div>
-
-
-      </div>
-      
-      <main id="main-content" class="bd-main" role="main">
-        
-        
-
-<div class="sbt-scroll-pixel-helper"></div>
-
-          <div class="bd-content">
-            <div class="bd-article-container">
-              
-              <div class="bd-header-article d-print-none">
-<div class="header-article-items header-article__inner">
-  
-    <div class="header-article-items__start">
-      
-        <div class="header-article-item"><label class="sidebar-toggle primary-toggle btn btn-sm" for="__primary" title="Toggle primary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
-  <span class="fa-solid fa-bars"></span>
-</label></div>
-      
-    </div>
-  
-  
-    <div class="header-article-items__end">
-      
-        <div class="header-article-item">
-
-<div class="article-header-buttons">
-
-
-
-
-
-<div class="dropdown dropdown-source-buttons">
-  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Source repositories">
-    <i class="fab fa-github"></i>
-  </button>
-  <ul class="dropdown-menu">
-      
-      
-      
-      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion" target="_blank"
-   class="btn btn-sm btn-source-repository-button dropdown-item"
-   title="Source repository"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fab fa-github"></i>
-  </span>
-<span class="btn__text-container">Repository</span>
-</a>
-</li>
-      
-      
-      
-      
-      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion/issues/new?title=Issue%20on%20page%20%2Fdocs/review/Diffusion_models_already_have_a_Semantic_Latent_Space.html&body=Your%20issue%20content%20here." target="_blank"
-   class="btn btn-sm btn-source-issues-button dropdown-item"
-   title="Open an issue"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-lightbulb"></i>
-  </span>
-<span class="btn__text-container">Open issue</span>
-</a>
-</li>
-      
-  </ul>
-</div>
-
-
-
-
-
-
-<div class="dropdown dropdown-download-buttons">
-  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Download this page">
-    <i class="fas fa-download"></i>
-  </button>
-  <ul class="dropdown-menu">
-      
-      
-      
-      <li><a href="../../_sources/docs/review/Diffusion_models_already_have_a_Semantic_Latent_Space.md" target="_blank"
-   class="btn btn-sm btn-download-source-button dropdown-item"
-   title="Download source file"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-file"></i>
-  </span>
-<span class="btn__text-container">.md</span>
-</a>
-</li>
-      
-      
-      
-      
-      <li>
-<button onclick="window.print()"
-  class="btn btn-sm btn-download-pdf-button dropdown-item"
-  title="Print to PDF"
-  data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-file-pdf"></i>
-  </span>
-<span class="btn__text-container">.pdf</span>
-</button>
-</li>
-      
-  </ul>
-</div>
-
-
-
-
-<button onclick="toggleFullScreen()"
-  class="btn btn-sm btn-fullscreen-button"
-  title="Fullscreen mode"
-  data-bs-placement="bottom" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-expand"></i>
-  </span>
-
-</button>
-
-
-
-<script>
-document.write(`
-  <button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light"></i>
-    <i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark"></i>
-    <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"></i>
-  </button>
-`);
-</script>
-
-
-<script>
-document.write(`
-  <button class="btn btn-sm pst-navbar-icon search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <i class="fa-solid fa-magnifying-glass fa-lg"></i>
-  </button>
-`);
-</script>
-<label class="sidebar-toggle secondary-toggle btn btn-sm" for="__secondary"title="Toggle secondary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <span class="fa-solid fa-list"></span>
-</label>
-</div></div>
-      
-    </div>
-  
-</div>
-</div>
-              
-              
-
-<div id="jb-print-docs-body" class="onlyprint">
-    <h1>Diffusion Models already have a Semantic Latent Space</h1>
-    <!-- Table of contents -->
-    <div id="print-main-content">
-        <div id="jb-print-toc">
-            
-            <div>
-                <h2> Contents </h2>
-            </div>
-            <nav aria-label="Page">
-                <ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#abstract">Abstract</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">1. Introduction</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#background">2. Background</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#denoising-diffusion-probability-model-ddpm">2.1 Denoising Diffusion Probability Model(DDPM)</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#denoising-diffusion-implicit-model-ddim">2.2 Denoising Diffusion Implicit Model(DDIM)</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#image-manipulation-with-clip">2.3 Image Manipulation with CLIP</a></li>
-</ul>
-</li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#discovering-semantic-latent-space-in-diffusion-models">3. Discovering Semantic Latent Space In Diffusion Models</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#problem">3.1 Problem</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#asymmetric-reverse-process-asyrp">3.2 Asymmetric Reverse Process(Asyrp)</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#h-space">3.3 h-space</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#implicit-neural-directions">3.4 Implicit Neural Directions</a></li>
-</ul>
-</li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#generative-process-design">4. Generative Process Design</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#editing-process-with-asyrp">4.1 Editing Process With Asyrp</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#quality-boosting-with-stochastic-noise-injection">4.2 Quality Boosting With Stochastic Noise Injection</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#overall-process-of-image-editing">4.3 Overall Process of Image Editing</a></li>
-</ul>
-</li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#experiments">5. Experiments</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#versatility-of-h-space-with-asyrp">5.1 Versatility of h-space with Asyrp</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#quantitive-comparison">5.2 Quantitive Comparison</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#analysis-on-h-space">5.3 Analysis on h-space</a></li>
-</ul>
-</li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#conclusion">6. Conclusion</a></li>
-</ul>
-            </nav>
-        </div>
-    </div>
-</div>
-
-              
-                
-<div id="searchbox"></div>
-                <article class="bd-article">
-                  
-  <div class="admonition-information admonition">
-<p class="admonition-title">Information</p>
-<ul class="simple">
-<li><p><strong>Title:</strong> Diffusion Models already have a Semantic Latent Space (ICLR 2023)</p></li>
-<li><p><strong>Reference</strong></p>
-<ul>
-<li><p>Paper: <a class="reference external" href="https://arxiv.org/abs/2210.10960">https://arxiv.org/abs/2210.10960</a></p></li>
-</ul>
-</li>
-<li><p><strong>Author:</strong> Sehwan Park</p></li>
-<li><p><strong>Last updated on Nov. 18, 2023</strong></p></li>
-</ul>
-</div>
-<section class="tex2jax_ignore mathjax_ignore" id="diffusion-models-already-have-a-semantic-latent-space">
-<h1>Diffusion Models already have a Semantic Latent Space<a class="headerlink" href="#diffusion-models-already-have-a-semantic-latent-space" title="Permalink to this heading">#</a></h1>
-<section id="abstract">
-<h2>Abstract<a class="headerlink" href="#abstract" title="Permalink to this heading">#</a></h2>
-<p>Diffusion model은 많은 domain에서 좋은 성능을 보이지만 generative process를 control하는 semantic latent space가 부족하다. 논문에서는 diffusion model속에서 semantic latent space를 발견하기 위한 asymmetric reverse process(asyrp)를 제안하고 h-space라고 명칭한 semantic latent space의 좋은 특성(homogeneity, linearity, robustness, consistency across timesteps)들을 보여준다. 추가적으로 editing strength와 quality deficiency를 기준으로 삼고 더 좋은 image-image translation을 위한 Generative Process Design을 소개한다.</p>
-</section>
-<section id="introduction">
-<h2>1. Introduction<a class="headerlink" href="#introduction" title="Permalink to this heading">#</a></h2>
-<figure class="align-default" id="id1">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/figure1.1.png"><img alt="Asyrp_1" class="bg-primary mb-1" src="../../_images/figure1.1.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 431 </span><span class="caption-text">Manipulation approaches for diffusion models</span><a class="headerlink" href="#id1" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>(a) Image guidance는 unconditional한 latent variable에 guiding image의 latent variable을 합치는 방식을 사용한다. 그러나 latent variable을 둘 다 이용하면서 명확하게 control하기가 쉽지 않다.</p>
-<p>(b) Classifier guidance는 diffusion model에 classifier를 추가하여 generative process를 거치는 동안 latent variable이 어떤 class인지 분류하고 target class에 가까워지도록 score를 부여하는 방식으로 작동한다. 그러나 latent variable들에 대해 classify를 실행해야 하기에 pretrained model을 사용하기가 힘들어 직접 학습을 시켜야 하기에 시간적으로, 비용적으로 부담이 된다.</p>
-<p>(c) DiffusionCLIP</p>
-<p>(d) Diffusion Models already have a Semantic Latent Space는 original image의 특성을 edit하기 위한 아주 좋은 특성을 가지고 있는 semantic latent space를 frozen diffusion model에서 발견하였고 이를 h-space라고 칭한다. h-space에는 다양한 좋은 특성들이 존재한다. versatile editing과 quality boosting을 위해 새로운 generative process를 design하여 제안한다. h-space는 frozen pretrained diffusion model에서 semantic latent space로써의 첫 발견사례이다.</p>
-</section>
-<section id="background">
-<h2>2. Background<a class="headerlink" href="#background" title="Permalink to this heading">#</a></h2>
-<section id="denoising-diffusion-probability-model-ddpm">
-<h3>2.1 Denoising Diffusion Probability Model(DDPM)<a class="headerlink" href="#denoising-diffusion-probability-model-ddpm" title="Permalink to this heading">#</a></h3>
-<p>DDPM에서는 임의의 time step t로 부터 noise가 껴있는 image <span class="math notranslate nohighlight">\(x_t\)</span>의 <span class="math notranslate nohighlight">\(\epsilon_t\)</span>가 얼만큼인지 예측한다. 예측한 <span class="math notranslate nohighlight">\(\epsilon_t\)</span>를 이용하여 noise가 일부 제거된 이전 step의 mean(<span class="math notranslate nohighlight">\(\mu_{\theta}(x_t)\)</span>)을 구할 수 있고 variance(<span class="math notranslate nohighlight">\(\sum_{\theta}(x_t)\)</span>)는 constant한 값으로 고정시킨다. DDPM에서 제시한 forward process와 reverse process는 다음과 같다. DDPM에서의 <span class="math notranslate nohighlight">\(\sigma_t^2 = \beta_t\)</span>이다.</p>
-<div class="math notranslate nohighlight">
-\[
-q(x_t|x_{t-1}) = \mathcal{N}(x_t; \sqrt{\alpha_{t}}x_{t-1}, (1-\alpha_t)I)
-\]</div>
-<div class="math notranslate nohighlight">
-\[
-p_{\theta}(x_{t-1}|x_t) := \mathcal{N}(\mu_{\theta}(x_t), \sum_{\theta}(x_t))
-\]</div>
-<div class="math notranslate nohighlight">
-\[
-x_{t-1} = \frac{1}{\sqrt{1-\beta_t}}\bigg(x_t - \frac{\beta_t}{\sqrt{1-\alpha_t}}\epsilon_t^\theta(x_t)\bigg) + \sigma_t\mathcal{z_t}
-\]</div>
-</section>
-<section id="denoising-diffusion-implicit-model-ddim">
-<h3>2.2 Denoising Diffusion Implicit Model(DDIM)<a class="headerlink" href="#denoising-diffusion-implicit-model-ddim" title="Permalink to this heading">#</a></h3>
-<p>DDIM에서는  non-Markovian process를 이용해 또 다른 관점의 reverse process를 제시하였고, DDPM과 DDIM 모두 general하게 적용되는 Diffusion process에 대한 식을 보여주었다. <span class="math notranslate nohighlight">\(\sigma_t = \eta\sqrt{(1-\alpha_{t-1}) / (1-\alpha_t)} \sqrt{1-\alpha_t/\alpha_{t-1}}\)</span>이다.</p>
-<p><span class="math notranslate nohighlight">\(\eta\)</span>=1인 경우 DDPM이 되고 stochastic해지며,  <span class="math notranslate nohighlight">\(\eta\)</span>=0인 경우 DDIM이 되고 deterministic해진다.</p>
-<div class="math notranslate nohighlight">
-\[
-q_{\sigma}(x_{t-1}|x_t,x_0) = \mathcal{N}(\sqrt{\alpha_{t-1}}x_0 + \sqrt{1-\alpha_{t-1}-\sigma_t^2} \cdot \cfrac{x_t - \sqrt{\alpha_t}x_0}{\sqrt{1-\alpha_t}}, \sigma_t^2I)
-\]</div>
-<div class="math notranslate nohighlight">
-\[
-x_{t-1} = \sqrt{\alpha_{t-1}}\underbrace{\bigg(\frac{x_t - \sqrt{1-\alpha_t}\epsilon_t^\theta(x_t)}{\sqrt{\alpha_t}}\bigg)}_{\textrm{predicted } x_0} + \underbrace{\sqrt{1-\alpha_{t-1}-\sigma_t^2}\cdot \epsilon_t^\theta(x_t) }_{\textrm{direction pointing to }x_t} + \sigma_t\mathcal{z_t}
-\]</div>
-</section>
-<section id="image-manipulation-with-clip">
-<h3>2.3 Image Manipulation with CLIP<a class="headerlink" href="#image-manipulation-with-clip" title="Permalink to this heading">#</a></h3>
-<p>CLIP은 Image Encoder와 Text Encoder를 이용하여 image와 text간의 embedding을 학습한다. 편집된 이미지와 대상 설명 간의 cosine distance를 직접 최소화하는 대신 cosine distance를 사용한 directional loss를 사용하여 mode collapse없이 균일한 editing을 가능하게 했다고 한다.</p>
-<p><span class="math notranslate nohighlight">\(\Delta T = \mathrm{E}_T(y^{target}) - \mathrm{E}_T(y^{source}) \)</span><br/><span class="math notranslate nohighlight">\(\Delta I = \mathrm{E}_I(x^{edit}) - \mathrm{E}_I(x^{source})\)</span></p>
-<div class="math notranslate nohighlight">
-\[
-\mathcal{L}_{direction} (x^{edit}, y^{target};x^{source},y^{source}) := 1 - \cfrac{\Delta I \cdot \Delta T}{\parallel\Delta I\parallel \parallel\Delta T\parallel}
-\]</div>
-</section>
-</section>
-<section id="discovering-semantic-latent-space-in-diffusion-models">
-<h2>3. Discovering Semantic Latent Space In Diffusion Models<a class="headerlink" href="#discovering-semantic-latent-space-in-diffusion-models" title="Permalink to this heading">#</a></h2>
-<p>Editiing을 하는 과정에서 naive approach를 통해서는 editing이 잘 이루어지지 않는다. 이 chapter에서는 왜 잘 이루어지지 않는지에 대한 설명을 하고 이를 해결하는 새로운 controllable한 한 reverse process인  Asymmetric Reverse Process(Asyrp)를 제안한다.</p>
-<p>DDIM에서 <span class="math notranslate nohighlight">\(x_{t-1}\)</span>에 대한 수식을 설명하였는데 이 chapter부터는 “predicted <span class="math notranslate nohighlight">\(x_0\)</span>”부분을 <span class="math notranslate nohighlight">\(\mathrm{P}_t(\epsilon_t^{\theta}(x_t))\)</span> 즉 <span class="math notranslate nohighlight">\(\mathrm{P}_t\)</span>라고 설정하고, “direction pointing to <span class="math notranslate nohighlight">\(x_t\)</span>”부분을 <span class="math notranslate nohighlight">\(\mathrm{D}_t(\epsilon_t^{\theta}(x_t))\)</span> 즉 <span class="math notranslate nohighlight">\(\mathrm{D}_t\)</span>라고 설정하였다.</p>
-<p><span class="math notranslate nohighlight">\(\mathrm{P}_t\)</span>는 latent variable로 부터 <span class="math notranslate nohighlight">\(x_0\)</span>를 예측하는 reverse process와 같은 역할을 담당하고 <span class="math notranslate nohighlight">\(\mathrm{D}_t\)</span>는 다시 noise를 추가해 latent variable로 돌아가기에 forward process와 같은 역할을 담당한다.</p>
-<div class="math notranslate nohighlight">
-\[
-x_{t-1} = \sqrt{\alpha_{t-1}}\underbrace{\bigg(\frac{x_t - \sqrt{1-\alpha_t}\epsilon_t^\theta(x_t)}{\sqrt{\alpha_t}}\bigg)}_{\mathrm{P}_t(\epsilon_t^{\theta}(x_t))} + \underbrace{\sqrt{1-\alpha_{t-1}-\sigma_t^2}\cdot \epsilon_t^\theta(x_t) }_{\mathrm{D}_t(\epsilon_t^{\theta}(x_t))} + \sigma_t\mathcal{z_t}
-\]</div>
-<div class="math notranslate nohighlight">
-\[
-x_{t-1} = \sqrt{\alpha_{t-1}}\mathrm{P}_t(\epsilon_t^{\theta}(x_t)) + \mathrm{D}_t(\epsilon_t^{\theta}(x_t)) + \sigma_t\mathcal{z_t}
-\]</div>
-<section id="problem">
-<h3>3.1 Problem<a class="headerlink" href="#problem" title="Permalink to this heading">#</a></h3>
-<p><span class="math notranslate nohighlight">\(x_T\)</span>로 부터 생성된 image <span class="math notranslate nohighlight">\(x_0\)</span>를 given text prompts에 맞게 manipulate시키는 가장 간단한 방법은 2.3에서 소개한 <span class="math notranslate nohighlight">\(\mathcal{L}_{direction}\)</span>을 optimize하도록 <span class="math notranslate nohighlight">\(x_T\)</span>를 update하는 것이다. 하지만 이 방법은  distorted images를 생성하거나 부정확한 manipulation을 한다고 한다.</p>
-<p>이에 대한 대안으로, 모든 sampling step에서 원하는 방향으로 manipulate하도록 <span class="math notranslate nohighlight">\(\epsilon_t^{\theta}\)</span>를 shift해주는 방법이 제시되었다. 하지만 이 방법은 <span class="math notranslate nohighlight">\(x_0\)</span>를 완전히 manipulate하지 못한다. 왜냐하면 <span class="math notranslate nohighlight">\(\mathrm{P}_t\)</span>와 <span class="math notranslate nohighlight">\(\mathrm{D}_t\)</span>에서 둘다 shifted된 <span class="math notranslate nohighlight">\(\tilde{\epsilon}_t^{\theta}\)</span>를 사용하기에 cancel out되어 결국 latent variable에서는 기존과 다름이 없다는 것이다. 자세한 증명은 Proof of Theroem을 보면 된다.</p>
-<details>
-  <summary>Proof of Theroem)</summary>
-<p>Define <span class="math notranslate nohighlight">\(\alpha_t = \prod_{s=1}^t(1 - \beta_s)\)</span>, <span class="math notranslate nohighlight">\(\tilde{x}_{t-1} = \sqrt{\alpha_{t-1}}\mathrm{P}_t(\tilde{\epsilon}_t^{\theta}(x_t)) + \mathrm{D}_t(\tilde{\epsilon}_t^{\theta}(x_t)) + \sigma_t\mathcal{z_t}\)</span></p>
-<p>= <span class="math notranslate nohighlight">\(\sqrt{\alpha_{t-1}}\underbrace{\bigg(\cfrac{x_t - \sqrt{1-\alpha_t}(\epsilon_t^\theta(x_t) + \Delta \epsilon_t)}{\sqrt{\alpha_t}}\bigg)}_{\mathrm{P}_t(\tilde{\epsilon}_t^{\theta})} + \underbrace{\sqrt{1-\alpha_{t-1}-\sigma_t^2}\cdot (\epsilon_t^\theta(x_t) + \Delta \epsilon_t) }_{\mathrm{D}_t(\tilde{\epsilon}_t^{\theta})} + \sigma_t\mathcal{z_t}\)</span></p>
-<p>= <span class="math notranslate nohighlight">\(\sqrt{\alpha_{t-1}}\mathrm{P}_t(\epsilon_t^\theta(x_t)) + \mathrm{D}_t(\epsilon_t^\theta(x_t)) - \cfrac{\sqrt{\alpha_{t-1}}\sqrt{1-\alpha_t}}{\sqrt{\alpha_t}} \cdot \Delta \epsilon_t + \sqrt{1-\alpha_{t-1}} \cdot \Delta \epsilon_t\)</span></p>
-<p><span class="math notranslate nohighlight">\(\sqrt{\alpha_{t-1}}\mathrm{P}_t(\epsilon_t^\theta(x_t)) + \mathrm{D}_t(\epsilon_t^\theta(x_t))\)</span>는 기존 DDIM에서의 <span class="math notranslate nohighlight">\(x_{t-1}\)</span>에 대한 식이고 위 식의 <span class="math notranslate nohighlight">\(\Delta \epsilon_t\)</span>항만 따로 묶어서 표현하면 아래와 같다.</p>
-<p>= <span class="math notranslate nohighlight">\(x_{t-1} + \bigg( -\cfrac{\sqrt{1-\alpha_t}}{\sqrt{1-\beta_t}} + \sqrt{1-\alpha_{t-1}} \bigg) \cdot \Delta \epsilon_t \)</span></p>
-<p>= <span class="math notranslate nohighlight">\(x_{t-1} + \bigg( -\cfrac{\sqrt{1-\alpha_t}}{\sqrt{1-\beta_t}} + \cfrac{\sqrt{1-\prod_{s=1}^{t-1}(1-\beta_s)}\sqrt{1-\beta_t}}{\sqrt{1-\beta_t}} \bigg) \cdot \Delta \epsilon_t \)</span></p>
-<p><span class="math notranslate nohighlight">\({\sqrt{1-\prod_{s=1}^{t-1}(1-\beta_s)}\sqrt{1-\beta_t}}\)</span>를 root를 묶어서 내부를 계산하면 <span class="math notranslate nohighlight">\(\sqrt{1-\alpha_t-\beta_t}\)</span>이므로 정리하면 아래와 같다.</p>
-<p>= <span class="math notranslate nohighlight">\(x_{t-1} + \bigg( \cfrac{\sqrt{1-\alpha_t-\beta_t} - \sqrt{1-\alpha_t}}{\sqrt{1-\beta_t}} \bigg) \cdot \Delta \epsilon_t \)</span></p>
-<p><span class="math notranslate nohighlight">\(\therefore \Delta x_t = \tilde{x_{t-1}} - x_{t-1} = \cfrac{\sqrt{1-\alpha_t-\beta_t} - \sqrt{1-\alpha_t}}{\sqrt{1-\beta_t}} \bigg) \cdot \Delta \epsilon_t\)</span></p>
-<p>shifted epsilon을 사용한 결과이다. 분자를 보면  <span class="math notranslate nohighlight">\(\beta_t\)</span>는 매우 작기에 거의 0에 수렴하기에 결국 차이가 거의 없음을 보인다. <br/> 즉 <span class="math notranslate nohighlight">\(\epsilon\)</span>-space에서의 manipulation 효과는 매우 좋지 않음을 알 수 있다.</p>
-</details>
-<figure class="align-default" id="id2">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/figure3.3.png"><img alt="Asyrp_2" class="bg-primary mb-1" src="../../_images/figure3.3.png" style="width: 500px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 432 </span><span class="caption-text">No Manipulation Effect with shifted epsilon</span><a class="headerlink" href="#id2" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</section>
-<section id="asymmetric-reverse-process-asyrp">
-<h3>3.2 Asymmetric Reverse Process(Asyrp)<a class="headerlink" href="#asymmetric-reverse-process-asyrp" title="Permalink to this heading">#</a></h3>
-<p>chapter 3.1에서 <span class="math notranslate nohighlight">\(\epsilon\)</span>-space에서의 문제를 해결하기 위해 저자들은 Asyrp를 제안한다. 이름 그대로 비대칭적인 방법을 사용한다는 것인데 <span class="math notranslate nohighlight">\(x_0\)</span>를 예측하는 <span class="math notranslate nohighlight">\(\mathrm{P}_t\)</span>에서는 shifted epsilon을 사용하고,  latent variable로 돌아가는 <span class="math notranslate nohighlight">\(\mathrm{D}_t\)</span>에서는 non-shifted epsilon을 사용해서 전체적인 변화를 준다는 것이다. 즉, <span class="math notranslate nohighlight">\(\mathrm{P}_t\)</span>만modify하고 <span class="math notranslate nohighlight">\(\mathrm{D}_t\)</span>는 유지한다. Asyrp를 식으로 표현하면 다음과 같다.</p>
-<div class="math notranslate nohighlight">
-\[
-x_{t-1} = \sqrt{\alpha_{t-1}}\mathrm{P}_t(\tilde{\epsilon}_t^{\theta}(x_t)) + \mathrm{D}_t(\epsilon_t^{\theta}(x_t))
-\]</div>
-<p>Loss식 또한 chapter 2.3에서 제시한 <span class="math notranslate nohighlight">\(\mathcal{L}_{direction}\)</span>을 사용하여 재구성하였다. modify를 하지 않은 <span class="math notranslate nohighlight">\(\mathrm{P}_t^{source}\)</span>와 modifiy를 한 <span class="math notranslate nohighlight">\(\mathrm{P}_t^{edit}\)</span>을 사용한다. Loss식은 다음과 같다.</p>
-<div class="math notranslate nohighlight">
-\[
-\mathcal{L}^{(t)} = \lambda_{CLIP}(\mathrm{P}_t^{edit}, y^{ref};\mathrm{P}_t^{source},y^{source}) + \lambda_{recon}|\mathrm{P}_t^{edit} - \mathrm{P}_t^{source}|
-\]</div>
-<p>전체적인 reverse process는 다음과 같이 설계가 되었다. 이제 shifted epsilon인 <span class="math notranslate nohighlight">\(\tilde{\epsilon}_t^{\theta}(x_t)\)</span>를 어떤 방식으로 얻을 것인지에 대한 설계가 필요하다. 저자들은 기존의 <span class="math notranslate nohighlight">\(\epsilon\)</span>-space에서 변화를 주는 것보다 훨씬 더 좋은 result를 보이고, nice properties를 가지는 h-space에서 변화를 주는 것을 제안한다.</p>
-</section>
-<section id="h-space">
-<h3>3.3 h-space<a class="headerlink" href="#h-space" title="Permalink to this heading">#</a></h3>
-<p><span class="math notranslate nohighlight">\(\epsilon_t^{\theta}\)</span>는 diffusion models의 backbone인  U-Net에서 도출된다. 이 논문에서는 Image manipulation을 위해 <span class="math notranslate nohighlight">\(\epsilon_t^{\theta}\)</span>를 control하는 space를 U-Net의 bottleneck 즉, 가장 깊은 feature map인 <span class="math notranslate nohighlight">\(h_t\)</span>로 정하였다. 이를 h-space라고 부른다. h-space는 <span class="math notranslate nohighlight">\(\epsilon\)</span>-space보다 더 작은 spatial resolutions을 가지고 high-level semantic를 가진다. 또한 <span class="math notranslate nohighlight">\(\epsilon\)</span>-space에서는 발견할 수 없는 매우 nice한 특성들을 가지고 있다.</p>
-<figure class="align-default" id="id3">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/figure3.9.png"><img alt="Asyrp_3" class="bg-primary mb-1" src="../../_images/figure3.9.png" style="width: 500px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 433 </span><span class="caption-text">U-Net structure and h-space</span><a class="headerlink" href="#id3" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>h-space의 크기는 <span class="math notranslate nohighlight">\(8^2\times512\)</span>이고 <span class="math notranslate nohighlight">\(\epsilon\)</span>-space의 크기는 <span class="math notranslate nohighlight">\(256^2\times3\)</span>으로 h-space에서의 control이 더 지배적이고 robust함을 추측할 수 있다(실제 실험적으로 증명을 함). h-space는 skip-connection의 영향을 받지 않으며 가장 압축된 정보를 가지고 있는 공간이며 image를 control하는데에 있어 매우 좋은 특성들을 가지고 있다. 실제 저자들은 h-space를 지정하기 위해 U-Net의 모든 feature map을 h-space로 설정해두고 실험을 해보았는데 위의 그림을 기준으로 8th layer이전의 feature map을 h-space로 지정한 경우에는 manipulaton이 적게 이루어졌고, 8th layer 이후의 feature map을 h-space로 지정한 경우에는 너무 과한 manipulation이 이루어지거나 아예 distorted image가 생성되었다. h-space만의 특성은 chapter5에서 설명한다.</p>
-</section>
-<section id="implicit-neural-directions">
-<h3>3.4 Implicit Neural Directions<a class="headerlink" href="#implicit-neural-directions" title="Permalink to this heading">#</a></h3>
-<figure class="align-default" id="id4">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/figure3.10.png"><img alt="Asyrp_4" class="bg-primary mb-1" src="../../_images/figure3.10.png" style="width: 600px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 434 </span><span class="caption-text">Illustration of <span class="math notranslate nohighlight">\(\mathrm{f}(t)\)</span></span><a class="headerlink" href="#id4" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p><span class="math notranslate nohighlight">\(\Delta h_t\)</span>가 image를 manipulating하는데 성공했음에도, 수많은 timestep에서 매번 optimizing하기란 쉽지 않다. 대신에 논문에서는 <span class="math notranslate nohighlight">\(h_t\)</span>를 입력받아 <span class="math notranslate nohighlight">\(\Delta h\)</span>를 출력해주는 작은 neural network인 <span class="math notranslate nohighlight">\(\mathrm{f}(t)\)</span>를 추가하였다. <span class="math notranslate nohighlight">\(\mathrm{f}(t)\)</span>는 <span class="math notranslate nohighlight">\(\Delta h_t\)</span>를 매번 모든 timestep에서 optimizing해줘야 하는 방법에 비해 시간도 빠르고 setting값들에 대해 robust하다. 또한 주어진 timestep과 bottleneck feature인 <span class="math notranslate nohighlight">\(h_t\)</span>에 대해 <span class="math notranslate nohighlight">\(\Delta h_t\)</span>를 출력하는 방법을 학습하기에 unseen timestep과 bottleneck feature에 대해서도 일반화할 수 있다고 한다. 이는 accelerated한 과정에서도 큰 효과를 본다. training scheme이 어떻든 간에 결국 부여하는 <span class="math notranslate nohighlight">\(\sum\Delta\mathrm{h_t}\)</span>만 보존된다면, 어떠한 length를 설계해도 비슷한 manipulation효과를 볼 수 있다.</p>
-<p>h-space에서 epsilon을 control해서 asyrp 이용하는 식은 다음과 같다. 이해를 위해 <span class="math notranslate nohighlight">\(\epsilon\)</span>-space와  h-space에서의 shifted epsilon <span class="math notranslate nohighlight">\(\tilde{\epsilon}_t^{\theta}(x_t)\)</span>을 비교하였다.</p>
-<ul>
-<li><p><span class="math notranslate nohighlight">\(\epsilon\)</span>-space에서의 shifted epsilon</p>
-<p><span class="math notranslate nohighlight">\(\tilde{\epsilon}_t^{\theta}(x_t) = \epsilon_t^{\theta}(x_t) + \Delta \epsilon_t\)</span></p>
-</li>
-<li><p>h-space에서의 shifted epsilon</p>
-<p><span class="math notranslate nohighlight">\(\tilde{\epsilon}_t^{\theta}(x_t) = \epsilon_t^{\theta}(x_t | \Delta h_t)\)</span></p>
-</li>
-</ul>
-<div class="math notranslate nohighlight">
-\[
-x_{t-1} = \sqrt{\alpha_{t-1}}\mathrm{P}_t(\epsilon_t^{\theta}(x_t | \Delta h_t)) + \mathrm{D}_t(\epsilon_t^{\theta}(x_t))
-\]</div>
-<figure class="align-default" id="id5">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/figure3.8.png"><img alt="Asyrp_5" class="bg-primary mb-1" src="../../_images/figure3.8.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 435 </span><span class="caption-text">Asymmetric Reverse Process</span><a class="headerlink" href="#id5" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</section>
-</section>
-<section id="generative-process-design">
-<h2>4. Generative Process Design<a class="headerlink" href="#generative-process-design" title="Permalink to this heading">#</a></h2>
-<figure class="align-default" id="id6">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/figure4.1.png"><img alt="Asyrp_6" class="bg-primary mb-1" src="../../_images/figure4.1.png" style="width: 600px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 436 </span><span class="caption-text">Intuition for choosing the intervals for editing and quality boosting</span><a class="headerlink" href="#id6" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>Perception prioritized training of diffusion models(Choi et al)에서는 Diffusion model이 early stage에서는 high-level context를 generate하고, later stage에서는 imperceptible fine details를 generate한다고 제안한다. 본 논문에서는 early stage에서 editing을 진행하는 editing process와 later stage에서 imperceptible fine details를 진행하는 quality boosting을 위한 구간을 나눠서 새로운 Generative Process Design을 제시한다.</p>
-<section id="editing-process-with-asyrp">
-<h3>4.1 Editing Process With Asyrp<a class="headerlink" href="#editing-process-with-asyrp" title="Permalink to this heading">#</a></h3>
-<p>Editing Process에서는 high-level context가 generate되어야 하므로 전체 timestep[0,T]에서 Editing Process를 위한 editing interval을 [T, <span class="math notranslate nohighlight">\(t_{edit}\)</span>]으로 설정하였다. <span class="math notranslate nohighlight">\(t_{edit}\)</span>의 시점을 결정하기 위해 LPIPS 측정지표를 이용한다. LPIPS(<span class="math notranslate nohighlight">\(\mathrm{x}, \mathrm{P}_t\)</span>)는 t시점에서 예측한 <span class="math notranslate nohighlight">\(x_0\)</span>와 target이 되는 original image간의 perceptual distance를 계산한다. 따라서 LPIPS를 남은 reverse process을 통해 editing 해야 할 구성요소를 측정하는 지표라고 볼 수도 있다. 첫 step T의 LPIPS로 부터 <span class="math notranslate nohighlight">\(t_{edit}\)</span>시점에서의 LPIPS 차이는 Editing Process에서 얼만큼의 perceptual change를 주었는지를 나타낸다. 이 값을 editing strength(<span class="math notranslate nohighlight">\(\epsilon_t\)</span>)라고 정의한다.</p>
-<div class="math notranslate nohighlight">
-\[
-\xi_t = \mathrm{LPIPS}(x, \mathrm{P}_T) - \mathrm{LPIPS}(x, \mathrm{P}_t)
-\]</div>
-<p>Editing interval이 작으면 <span class="math notranslate nohighlight">\(\xi_t\)</span>가 작아지며 변화가 많이 일어나지 않고 반면, Editing interval이 크면 <span class="math notranslate nohighlight">\(\xi_t\)</span>가 커지고 변화가 많이 일어난다. 따라서 충분한 변화를 줄 수 있는 한에서 가장 최소의 Editing interval을 찾는 것이 <span class="math notranslate nohighlight">\(t_{edit}\)</span>을 결정하는 최고의 방법이다. 저자들은 실험적인 결과를 통해 <span class="math notranslate nohighlight">\(\mathrm{LPIPS}(x, \mathrm{P}_t)\)</span> = 0.33인 t시점을 <span class="math notranslate nohighlight">\(t_{edit}\)</span>으로 결정하였다.</p>
-<figure class="align-default" id="id7">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/figure4.6.png"><img alt="Asyrp_7" class="bg-primary mb-1" src="../../_images/figure4.6.png" style="width: 600px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 437 </span><span class="caption-text">Results based on various <span class="math notranslate nohighlight">\(\mathrm{LPIPS}(x, \mathrm{P}_{t_{edit}})\)</span></span><a class="headerlink" href="#id7" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<figure class="align-default" id="id8">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/figure4.7.png"><img alt="Asyrp_8" class="bg-primary mb-1" src="../../_images/figure4.7.png" style="width: 600px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 438 </span><span class="caption-text">Importance of choosing proper <span class="math notranslate nohighlight">\(t_{edit}\)</span></span><a class="headerlink" href="#id8" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>몇몇 특성들은 다른 특성들에 비해 visual change를 많이 필요로 하는 경우도 있다. 예를 들어 source image에 대해 smile한 attribute를 추가하는 경우보다 pixar style의 attribute을 추가하는 경우가 더 많은 visual change를 필요로 한다. 이러한 경우에는 Editing interval을 더 길게 설정해야 한다. 이러한 경우에는 <span class="math notranslate nohighlight">\(\mathrm{LPIPS}(x, \mathrm{P}_t)\)</span> = 0.33 - <span class="math notranslate nohighlight">\(\delta\)</span>를 만족하는 t를 <span class="math notranslate nohighlight">\(t_{edit}\)</span>으로 설정한다. 이 때, <span class="math notranslate nohighlight">\(\delta = 0.33d(\mathrm{E}_T(y_{source}), \mathrm{E}_T(y_{target}))\)</span>이다. <span class="math notranslate nohighlight">\(\mathrm{E}_T\)</span>는 CLIP text embedding을 진행하는 Text Encoder를 의미하며, d는 cosine distance를 의미한다. 아래 그림을 통해 더 많은 visual change를 요구하는 attributes에 대해서는 <span class="math notranslate nohighlight">\(t_{edit}\)</span>이 더 작음(Editing Interval이 김)을 알 수 있다.</p>
-<figure class="align-default" id="id9">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/figure4.10.png"><img alt="Asyrp_9" class="bg-primary mb-1" src="../../_images/figure4.10.png" style="width: 500px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 439 </span><span class="caption-text">Flexible <span class="math notranslate nohighlight">\(t_{edit}\)</span> based on the amount of visual changes.</span><a class="headerlink" href="#id9" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</section>
-<section id="quality-boosting-with-stochastic-noise-injection">
-<h3>4.2 Quality Boosting With Stochastic Noise Injection<a class="headerlink" href="#quality-boosting-with-stochastic-noise-injection" title="Permalink to this heading">#</a></h3>
-<p>DDIM은 <span class="math notranslate nohighlight">\(\eta\)</span>=0으로 설정하며 stochasticity를 제거하여 거의 완벽한 inversion을 가능케 하였다. Elucidating the design space of diffusionbased generative models(Karras et al.)에서는 stochasticity가 image quality를 증가시킨다고 증명하였다. 이에 따라 본 논문에서는 Generative Process에 stochastic noise를 주입하는 quality boosting 단계를 설정하고 boosting interval은 [<span class="math notranslate nohighlight">\(t_{boost}\)</span>, 0]이다.</p>
-<p>Boosting Interval에 따라 image quality를 control할 수 있는데, Boosting Interval이 길게되면, Quality는 증가하지만 Interval동안 계속해서 stochastic noise를 주입해야 하기에 content가 변하는 문제가 발생할 수도 있다. 따라서 충분한 quality boosting을 달성하면서도 content에 최소한의 변화만을 줄 수 있도록  <span class="math notranslate nohighlight">\(t_{boost}\)</span>를 설정하는 것이 중요하다. 저자들은 image에 껴있는 noise를 quality boosting을 통해 해결해야 할 부분으로 보았으며 target이 되는 original image로 부터 t시점의 image <span class="math notranslate nohighlight">\(x_t\)</span>에 얼만큼의 noise가 껴있는지에 대한 지표로 quality deficiency <span class="math notranslate nohighlight">\(\gamma_t\)</span>를 이용한다.</p>
-<div class="math notranslate nohighlight">
-\[
-\gamma_t = \mathrm{LPIPS}(x, x_t)
-\]</div>
-<p>여기서는 editing strength와는 다르게 time step에 따라 예측한 <span class="math notranslate nohighlight">\(x_0\)</span>인 <span class="math notranslate nohighlight">\(\mathrm{P}_t\)</span>가 아닌 latent variable <span class="math notranslate nohighlight">\(x_t\)</span>를 이용한다. 저자들은 noise를 판단하는데에 있어서 semantics보다는 actual image를 고려했기에 위와 같이 설정하였다고 한다. 저자들은 실험적인 결과를 통해  <span class="math notranslate nohighlight">\(\gamma_t\)</span> = 1.2인 t시점을 <span class="math notranslate nohighlight">\(t_{boost}\)</span>로 설정하였다.</p>
-<figure class="align-default" id="id10">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/figure4.8.png"><img alt="Asyrp_10" class="bg-primary mb-1" src="../../_images/figure4.8.png" style="width: 600px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 440 </span><span class="caption-text">Results based on various <span class="math notranslate nohighlight">\(\gamma_{t_{boost}}\)</span></span><a class="headerlink" href="#id10" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<figure class="align-default" id="id11">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/figure4.9.png"><img alt="Asyrp_11" class="bg-primary mb-1" src="../../_images/figure4.9.png" style="width: 500px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 441 </span><span class="caption-text">Quality comparison based on the presence of quality boosting</span><a class="headerlink" href="#id11" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</section>
-<section id="overall-process-of-image-editing">
-<h3>4.3 Overall Process of Image Editing<a class="headerlink" href="#overall-process-of-image-editing" title="Permalink to this heading">#</a></h3>
-<p>General한 Diffusion model에서의 Generative Process를 표현하면 다음과 같다.</p>
-<div class="math notranslate nohighlight">
-\[
-x_{t-1} = \sqrt{\alpha_{t-1}}\mathrm{P}_t(\epsilon_t^{\theta}) + \mathrm{D}_t(\epsilon_t^{\theta}) + \sigma_t\mathcal{z}_t\bigg(where, \sigma_t = \eta\sqrt{(1-\alpha_{t-1}) / (1-\alpha_t)} \sqrt{1-\alpha_t/\alpha_{t-1}}\bigg)
-\]</div>
-<p><span class="math notranslate nohighlight">\(\eta\)</span> = 0인 경우에는 DDIM이 되며, stochastic noise를 더하는 부분이 사라져 deterministic해진다. <span class="math notranslate nohighlight">\(\eta\)</span> = 1인 경우에는 DDPM이 되며, stochastic한 특성이 있다. Asyrp(Assymetric Reverse Process)에서는 기본적으로 DDIM을 사용하며 <span class="math notranslate nohighlight">\(\mathrm{P}_t\)</span>에서 h-space를 통해 control된 <span class="math notranslate nohighlight">\(\epsilon_t^{\theta}(x_t|f_t)\)</span>를 사용한다. Diffusion Models already have a Semantic Latent Space에서 제시한 Generative Process를 전체적으로 정리하면 다음과 같다.</p>
-<figure class="align-default" id="id12">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/figure4.11.png"><img alt="Asyrp_12" class="bg-primary mb-1" src="../../_images/figure4.11.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 442 </span><span class="caption-text">Quality comparison based on the presence of quality boosting</span><a class="headerlink" href="#id12" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>처음부터 <span class="math notranslate nohighlight">\(t_{edit}\)</span>시점까지는 Asyrp를 이용해 Editing Process를 진행한다. 이 후 DDIM 방식을 통해 Denoising을 진행하다가 <span class="math notranslate nohighlight">\(t_{boost}\)</span>시점부터 끝날 때까지 stochastic noise를 주입하는 DDPM 방식을 이용해 Quality boosting을 진행한다.</p>
-<figure class="align-default" id="id13">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/figure4.12.png"><img alt="Asyrp_13" class="bg-primary mb-1" src="../../_images/figure4.12.png" style="width: 600px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 443 </span><span class="caption-text">Overview of Generative Process</span><a class="headerlink" href="#id13" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</section>
-</section>
-<section id="experiments">
-<h2>5. Experiments<a class="headerlink" href="#experiments" title="Permalink to this heading">#</a></h2>
-<p>CelebA-HQ (Karras et al., 2018) 및 LSUN-bedroom/-church (Yu et al., 2015) 데이터셋에서 DDPM++ (Song et al., 2020b) (Meng et al., 2021); AFHQ-dog (Choi et al., 2020) 데이터셋에서 iDDPM (Nichol &amp; Dhariwal, 2021); 그리고 METFACES (Karras et al., 2020) 데이터셋에서 ADM with P2-weighting (Dhariwal &amp; Nichol, 2021) (Choi et al., 2022)을 사용해 각각 학습시켰다고 한다. 모든 model들은 pretrained checkpoint를 활용했으며 frozen상태를 유지시켰다고 한다.</p>
-<section id="versatility-of-h-space-with-asyrp">
-<h3>5.1 Versatility of h-space with Asyrp<a class="headerlink" href="#versatility-of-h-space-with-asyrp" title="Permalink to this heading">#</a></h3>
-<figure class="align-default" id="id14">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/figure5.6.png"><img alt="Asyrp_14" class="bg-primary mb-1" src="../../_images/figure5.6.png" style="width: 500px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 444 </span><span class="caption-text">Editing results of Asyrp on various datasets</span><a class="headerlink" href="#id14" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>위의 그림을 보면, 논문에서는 다양한 attribute들의 특성을 잘 반영해서 image를 manipulate했다는 점을 알 수 있다. 심지어 {department, factory, temple} attribute은 training data에 포함이 되어있지 않았음에도 성능이 잘 나온 점을 확인할 수 있다. model을 fine tuning하지 않고 inference하는 과정에서 h-space를 통해 epsilon을 control하고 Asyrp를 이용해 성능을 냈다는 점이 가장 큰 장점이다.</p>
-</section>
-<section id="quantitive-comparison">
-<h3>5.2 Quantitive Comparison<a class="headerlink" href="#quantitive-comparison" title="Permalink to this heading">#</a></h3>
-<p>Asyrp model의 결과를 다른 model들과 비교하는 실험을 진행하였는데 diffusion model 전체를 fine-tuning하여 image을 editing하는 DiffsionCLIP model과 비교하였다. Asyrp의 성능이 더 좋음을 확인 할 수 있다.</p>
-<figure class="align-default" id="id15">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/figure5.7.png"><img alt="Asyrp_15" class="bg-primary mb-1" src="../../_images/figure5.7.png" style="width: 500px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 445 </span><span class="caption-text">Asyrp vs DiffusionCLIP on both CelebA-HQ seen-domain attributes and unseen-domain attributes</span><a class="headerlink" href="#id15" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</section>
-<section id="analysis-on-h-space">
-<h3>5.3 Analysis on h-space<a class="headerlink" href="#analysis-on-h-space" title="Permalink to this heading">#</a></h3>
-<ol class="arabic">
-<li><p><strong>Homogeneity</strong></p>
-<figure class="align-default" id="id16">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/figure5.1.png"><img alt="Asyrp_16" class="bg-primary mb-1" src="../../_images/figure5.1.png" style="width: 600px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 446 </span><span class="caption-text">Homogeneity of h-space</span><a class="headerlink" href="#id16" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>위의 그림의 (a)는 Real image에 smiling attribute을 추가하기 위해 최적화된 <span class="math notranslate nohighlight">\(\Delta h_t\)</span>와 <span class="math notranslate nohighlight">\(\Delta \epsilon_t\)</span>를 나타낸다. 같은 값을 다른 Real image에 적용시켰을 때의 결과를 (b)에 나타내었는데,  <span class="math notranslate nohighlight">\(\Delta h_t\)</span>를 적용한경우 smiling face로 잘 바뀌는 반면, <span class="math notranslate nohighlight">\(\Delta \epsilon_t\)</span>을 적용한 경우에는 image distortion이 발생함을 알 수 있다.</p>
-</li>
-<li><p><strong>Linearity</strong></p>
-<figure class="align-default" id="id17">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/figure5.2.png"><img alt="Asyrp_17" class="bg-primary mb-1" src="../../_images/figure5.2.png" style="width: 600px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 447 </span><span class="caption-text">Linearity of h-space - Linear Scaling</span><a class="headerlink" href="#id17" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p><span class="math notranslate nohighlight">\(\Delta_h\)</span>를 linearly scaling을 하는 것은 editing을 하는데에 있어 visual attribute change의 양에 반영된다. 즉, <span class="math notranslate nohighlight">\(\Delta_h\)</span>를 <span class="math notranslate nohighlight">\(\times\)</span>1, <span class="math notranslate nohighlight">\(\times\)</span>2, <span class="math notranslate nohighlight">\(\times\)</span>3배 <span class="math notranslate nohighlight">\(/dots\)</span> 함에 따라 result image에서 반영되는 attribute또한 이에 맞게 변화한다는 것이다. 위의 그림에서 표현되어 있듯이 negative scaling에 대해서는 training을 하지 않았음에도 잘 적용 된다는 점을 알 수 있다.</p>
-<figure class="align-default" id="id18">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/figure5.3.png"><img alt="Asyrp_17" class="bg-primary mb-1" src="../../_images/figure5.3.png" style="width: 600px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 448 </span><span class="caption-text">Linearity of h-space - Linear Combination</span><a class="headerlink" href="#id18" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>서로 다른 attributes에 대한 <span class="math notranslate nohighlight">\(\Delta_h\)</span>를 합쳐서 부여를 했을 경우에도 각각의 attribute들이 image에 잘 반영이 된다는 점을 알 수 있다.</p>
-</li>
-<li><p><strong>Robustness</strong></p>
-<figure class="align-default" id="id19">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/figure5.4.png"><img alt="Asyrp_17" class="bg-primary mb-1" src="../../_images/figure5.4.png" style="width: 600px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 449 </span><span class="caption-text">Robustness of h-space</span><a class="headerlink" href="#id19" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>위의 그림은 h-space와 <span class="math notranslate nohighlight">\(\epsilon-space\)</span>에서 random noise를 주입했을 때의 결과를 비교한 것이다. h-space의 경우에는 random noise가 추가되었어도 image에 큰 변화가 없으며 많은 noise가 추가되었을 경우에도 image distortion은 거의 없고 semantic change만 발생한다. 그러나 <span class="math notranslate nohighlight">\(\epsilon-space\)</span>의 경우에는 random noise가 추가된 경우 image distortion이 심하게 발생한다. 이를 통해 h-space가 얼마나 robustness한지 알 수 있다.</p>
-</li>
-<li><p><strong>Consistency across time steps</strong></p>
-<figure class="align-default" id="id20">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/figure5.5.png"><img alt="Asyrp_17" class="bg-primary mb-1" src="../../_images/figure5.5.png" style="width: 600px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 450 </span><span class="caption-text">Consistency across times steps of h-space</span><a class="headerlink" href="#id20" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>h-space의 homogeneous한 성질을 통해 같은 attribute에 대한 <span class="math notranslate nohighlight">\(\Delta h\)</span>를 다른 image에 적용시켰을 때에도 잘 반영이 됌을 확인하였다. 저자들은 <span class="math notranslate nohighlight">\(\Delta h_t\)</span>들에 대한 평균인 <span class="math notranslate nohighlight">\(\Delta h_t^{mean}\)</span>을 적용시켰을 경우에도 result가 거의 비슷함을 보인다. Chapter4에서 제시한 Generative Process를 비추어 보았을 때, <span class="math notranslate nohighlight">\(\Delta h_t\)</span>는 Editing Process에서만 적용을 시킨다. 이 경우, 적용하는  <span class="math notranslate nohighlight">\(\Delta h_t\)</span>를 <span class="math notranslate nohighlight">\(\Delta h_t^{global}\)</span>이라고 칭하며, 적용하는 <span class="math notranslate nohighlight">\(\Delta h_t\)</span>가 interval동안 같은 크기 만큼 적용된다고 가정했을 경우, <span class="math notranslate nohighlight">\(\Delta h^{global} = \cfrac{1}{\mathrm{T_e}}\sum_t\ \Delta h_t^{mean}\)</span>이라고 쓸 수 있다. 이 경우에도 결과는 비슷함을 보여준다. 결국 원하는 attribute에 대해 주입해야 할 <span class="math notranslate nohighlight">\(\Delta h\)</span>양만 같다면, 원하는 editing 효과를 얻을 수 있다. 비록 이 논문에서는 best quality manipulation을 위해 <span class="math notranslate nohighlight">\(\Delta h_t\)</span>를 사용하였지만, <span class="math notranslate nohighlight">\(\Delta h_t^{mean}\)</span>과 <span class="math notranslate nohighlight">\(\Delta h^{global}\)</span>에 대해 더 연구를 해 볼 여지가 있다고 판단한다.</p>
-</li>
-</ol>
-</section>
-</section>
-<section id="conclusion">
-<h2>6. Conclusion<a class="headerlink" href="#conclusion" title="Permalink to this heading">#</a></h2>
-<p>본 논문에서는 Pretrained Diffusion models에서 latent semantic space인  h-space를 발견했고 h-space에서의 Asyrp(Asymmetric Reverse Process)와 새롭게 제안한 Reverse Process 방법을 통해 성공적인 image editing을 가능케 하였다. Diffusion model에서의 semantic한 latent space에 대한 첫 제안을 한 논문이다. h-space는 GAN의 latent space와 유사한 특성을 갖추고 있다. 대표적인 h-space의 특성으로는 Homogeneity, Linearity, Robustness, Consistency across timesteps이 있다.</p>
-</section>
-</section>
-
-    <script type="text/x-thebe-config">
-    {
-        requestKernel: true,
-        binderOptions: {
-            repo: "binder-examples/jupyter-stacks-datascience",
-            ref: "master",
-        },
-        codeMirrorConfig: {
-            theme: "abcdef",
-            mode: "python"
-        },
-        kernelOptions: {
-            name: "python3",
-            path: "./docs/review"
-        },
-        predefinedOutput: true
-    }
-    </script>
-    <script>kernelName = 'python3'</script>
-
-                </article>
-              
-
-              
-              
-              
-              
-                <footer class="prev-next-footer d-print-none">
-                  
-<div class="prev-next-area">
-    <a class="left-prev"
-       href="ConceptLab.html"
-       title="previous page">
-      <i class="fa-solid fa-angle-left"></i>
-      <div class="prev-next-info">
-        <p class="prev-next-subtitle">previous</p>
-        <p class="prev-next-title">ConceptLab</p>
-      </div>
-    </a>
-    <a class="right-next"
-       href="Muse.html"
-       title="next page">
-      <div class="prev-next-info">
-        <p class="prev-next-subtitle">next</p>
-        <p class="prev-next-title">Muse</p>
-      </div>
-      <i class="fa-solid fa-angle-right"></i>
-    </a>
-</div>
-                </footer>
-              
-            </div>
-            
-            
-              
-                <div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
-
-
-  <div class="sidebar-secondary-item">
-  <div class="page-toc tocsection onthispage">
-    <i class="fa-solid fa-list"></i> Contents
-  </div>
-  <nav class="bd-toc-nav page-toc">
-    <ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#abstract">Abstract</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">1. Introduction</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#background">2. Background</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#denoising-diffusion-probability-model-ddpm">2.1 Denoising Diffusion Probability Model(DDPM)</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#denoising-diffusion-implicit-model-ddim">2.2 Denoising Diffusion Implicit Model(DDIM)</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#image-manipulation-with-clip">2.3 Image Manipulation with CLIP</a></li>
-</ul>
-</li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#discovering-semantic-latent-space-in-diffusion-models">3. Discovering Semantic Latent Space In Diffusion Models</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#problem">3.1 Problem</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#asymmetric-reverse-process-asyrp">3.2 Asymmetric Reverse Process(Asyrp)</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#h-space">3.3 h-space</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#implicit-neural-directions">3.4 Implicit Neural Directions</a></li>
-</ul>
-</li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#generative-process-design">4. Generative Process Design</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#editing-process-with-asyrp">4.1 Editing Process With Asyrp</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#quality-boosting-with-stochastic-noise-injection">4.2 Quality Boosting With Stochastic Noise Injection</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#overall-process-of-image-editing">4.3 Overall Process of Image Editing</a></li>
-</ul>
-</li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#experiments">5. Experiments</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#versatility-of-h-space-with-asyrp">5.1 Versatility of h-space with Asyrp</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#quantitive-comparison">5.2 Quantitive Comparison</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#analysis-on-h-space">5.3 Analysis on h-space</a></li>
-</ul>
-</li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#conclusion">6. Conclusion</a></li>
-</ul>
-  </nav></div>
-
-</div></div>
-              
-            
-          </div>
-          <footer class="bd-footer-content">
-            
-<div class="bd-footer-content__inner container">
-  
-  <div class="footer-item">
-    
-<p class="component-author">
-By PseudoLab
-</p>
-
-  </div>
-  
-  <div class="footer-item">
-    
-
-  <p class="copyright">
-    
-      © Copyright 2022.
-      <br/>
-    
-  </p>
-
-  </div>
-  
-  <div class="footer-item">
-    
-  </div>
-  
-  <div class="footer-item">
-    
-  </div>
-  
-</div>
-          </footer>
-        
-
-      </main>
-    </div>
-  </div>
-  
-  <!-- Scripts loaded after <body> so the DOM is not blocked -->
-  <script src="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b"></script>
-<script src="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b"></script>
-
-  <footer class="bd-footer">
-  </footer>
-  </body>
+
+
+<!DOCTYPE html>
+
+
+<html lang="en" data-content_root="" >
+
+  <head>
+    <meta charset="utf-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.17.1: http://docutils.sourceforge.net/" />
+
+    <title>Diffusion Models already have a Semantic Latent Space &#8212; Text-to-Image Generation-feat-Diffusion</title>
+  
+  
+  
+  <script data-cfasync="false">
+    document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
+    document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
+  </script>
+  
+  <!-- Loaded before other Sphinx assets -->
+  <link href="../../_static/styles/theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+<link href="../../_static/styles/bootstrap.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+<link href="../../_static/styles/pydata-sphinx-theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+
+  
+  <link href="../../_static/vendor/fontawesome/6.5.2/css/all.min.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+  <link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.woff2" />
+<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.woff2" />
+<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.woff2" />
+
+    <link rel="stylesheet" type="text/css" href="../../_static/pygments.css" />
+    <link rel="stylesheet" href="../../_static/styles/sphinx-book-theme.css?digest=14f4ca6b54d191a8c7657f6c759bf11a5fb86285" type="text/css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/togglebutton.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/mystnb.4510f1fc1dee50b3e5859aac5469c37c29e427902b24a333a5f9fcb2f0b3ac41.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/sphinx-thebe.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/design-style.4045f2051d55cab465a707391d5b2007.min.css" />
+  
+  <!-- Pre-loaded scripts that we'll load fully later -->
+  <link rel="preload" as="script" href="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b" />
+<link rel="preload" as="script" href="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b" />
+  <script src="../../_static/vendor/fontawesome/6.5.2/js/all.min.js?digest=dfe6caa3a7d634c4db9b"></script>
+
+    <script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
+    <script src="../../_static/jquery.js"></script>
+    <script src="../../_static/underscore.js"></script>
+    <script src="../../_static/_sphinx_javascript_frameworks_compat.js"></script>
+    <script src="../../_static/doctools.js"></script>
+    <script src="../../_static/clipboard.min.js"></script>
+    <script src="../../_static/copybutton.js"></script>
+    <script src="../../_static/scripts/sphinx-book-theme.js?digest=5a5c038af52cf7bc1a1ec88eea08e6366ee68824"></script>
+    <script>let toggleHintShow = 'Click to show';</script>
+    <script>let toggleHintHide = 'Click to hide';</script>
+    <script>let toggleOpenOnPrint = 'true';</script>
+    <script src="../../_static/togglebutton.js"></script>
+    <script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
+    <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
+    <script src="../../_static/design-tabs.js"></script>
+    <script>const THEBE_JS_URL = "https://unpkg.com/thebe@0.8.2/lib/index.js"
+const thebe_selector = ".thebe,.cell"
+const thebe_selector_input = "pre"
+const thebe_selector_output = ".output, .cell_output"
+</script>
+    <script async="async" src="../../_static/sphinx-thebe.js"></script>
+    <script>window.MathJax = {"options": {"processHtmlClass": "tex2jax_process|mathjax_process|math|output_area"}}</script>
+    <script defer="defer" src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
+    <script>DOCUMENTATION_OPTIONS.pagename = 'docs/review/Diffusion_models_already_have_a_Semantic_Latent_Space';</script>
+    <link rel="shortcut icon" href="../../_static/PseudoLab_logo.png"/>
+    <link rel="index" title="Index" href="../../genindex.html" />
+    <link rel="search" title="Search" href="../../search.html" />
+    <link rel="next" title="Muse" href="Muse.html" />
+    <link rel="prev" title="ConceptLab" href="ConceptLab.html" />
+  <meta name="viewport" content="width=device-width, initial-scale=1"/>
+  <meta name="docsearch:language" content="en"/>
+  </head>
+  
+  
+  <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
+
+  
+  
+  <div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
+  
+  <div id="pst-scroll-pixel-helper"></div>
+  
+  <button type="button" class="btn rounded-pill" id="pst-back-to-top">
+    <i class="fa-solid fa-arrow-up"></i>Back to top</button>
+
+  
+  <input type="checkbox"
+          class="sidebar-toggle"
+          id="pst-primary-sidebar-checkbox"/>
+  <label class="overlay overlay-primary" for="pst-primary-sidebar-checkbox"></label>
+  
+  <input type="checkbox"
+          class="sidebar-toggle"
+          id="pst-secondary-sidebar-checkbox"/>
+  <label class="overlay overlay-secondary" for="pst-secondary-sidebar-checkbox"></label>
+  
+  <div class="search-button__wrapper">
+    <div class="search-button__overlay"></div>
+    <div class="search-button__search-container">
+<form class="bd-search d-flex align-items-center"
+      action="../../search.html"
+      method="get">
+  <i class="fa-solid fa-magnifying-glass"></i>
+  <input type="search"
+         class="form-control"
+         name="q"
+         id="search-input"
+         placeholder="Search this book..."
+         aria-label="Search this book..."
+         autocomplete="off"
+         autocorrect="off"
+         autocapitalize="off"
+         spellcheck="false"/>
+  <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
+</form></div>
+  </div>
+
+  <div class="pst-async-banner-revealer d-none">
+  <aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
+</div>
+
+  
+    <header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
+    </header>
+  
+
+  <div class="bd-container">
+    <div class="bd-container__inner bd-page-width">
+      
+      
+      
+      <div class="bd-sidebar-primary bd-sidebar">
+        
+
+  
+  <div class="sidebar-header-items sidebar-primary__section">
+    
+    
+    
+    
+  </div>
+  
+    <div class="sidebar-primary-items__start sidebar-primary__section">
+        <div class="sidebar-primary-item">
+
+  
+    
+  
+
+<a class="navbar-brand logo" href="../../intro.html">
+  
+  
+  
+  
+  
+    
+    
+      
+    
+    
+    <img src="../../_static/PseudoLab_logo.png" class="logo__image only-light" alt="Text-to-Image Generation-feat-Diffusion - Home"/>
+    <script>document.write(`<img src="../../_static/PseudoLab_logo.png" class="logo__image only-dark" alt="Text-to-Image Generation-feat-Diffusion - Home"/>`);</script>
+  
+  
+</a></div>
+        <div class="sidebar-primary-item"><nav class="bd-links" id="bd-docs-nav" aria-label="Main">
+    <div class="bd-toc-item navbar-nav active">
+        
+        <ul class="nav bd-sidenav bd-sidenav__home-link">
+            <li class="toctree-l1">
+                <a class="reference internal" href="../../intro.html">
+                    Welcome to PseudoDiffusers!!
+                </a>
+            </li>
+        </ul>
+        <p aria-level="2" class="caption" role="heading"><span class="caption-text">Preliminary Works</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="vae.html">VAE</a></li>
+<li class="toctree-l1"><a class="reference internal" href="gan.html">GAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DDPM.html">DDPM</a></li>
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DDIM.html">DDIM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="A_Study_on_the_Evaluation_of_Generative_Models.html">A Study on the Evaluation of Generative Models</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Image Generation</span></p>
+<ul class="current nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="cycleGAN.html">CycleGAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="StyleGAN.html">StyleGAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="diffusion_beats_GANs.html">Diffusion Models Beat GANs on Image Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="dalle.html">DALL-E</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DALLE2.html">DALL-E 2</a></li>
+<li class="toctree-l1"><a class="reference internal" href="dreambooth.html">DreamBooth</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ControlNet.html">ControlNet</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Latent_Diffusion_Model.html">Introduction</a></li>
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Textual_Inversion.html">Textual Inversion</a></li>
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="CustomDiffusion.html">Custom Diffusion</a></li>
+<li class="toctree-l1"><a class="reference internal" href="LoRA.html">LoRA</a></li>
+
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="I-DDPM.html">I-DDPM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="StyO.html">StyO</a></li>
+<li class="toctree-l1"><a class="reference internal" href="imagen.html">Imagen</a></li>
+<li class="toctree-l1"><a class="reference internal" href="imagen_editor.html">Imagen Editor</a></li>
+<li class="toctree-l1"><a class="reference internal" href="SDEdit.html">SDEdit</a></li>
+<li class="toctree-l1"><a class="reference internal" href="SDXL.html">SDXL</a></li>
+<li class="toctree-l1"><a class="reference internal" href="t2i_adapter.html">T2I-Adapter</a></li>
+<li class="toctree-l1"><a class="reference internal" href="IP_Adapter.html">IP-Adapter</a></li>
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="HyperDreamBooth.html">HyperDreamBooth</a></li>
+<li class="toctree-l1"><a class="reference internal" href="CM3leon.html">CM3leon</a></li>
+
+<li class="toctree-l1"><a class="reference internal" href="Synthetic_Data_from_Diffusion_Models_Improves_ImageNet_Classification.html">Synthetic Data from Diffusion Models Improves ImageNet Classification</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="GLIDE.html">GLIDE</a></li>
+<li class="toctree-l1"><a class="reference internal" href="BBDM.html">BBDM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Your_Diffusion_Model_is_Secretly_a_Zero_Shot_Classifier.html">Your Diffusion Model is Secretly a Zero-Shot Classifier</a></li>
+<li class="toctree-l1"><a class="reference internal" href="progressive_distillation.html">Progressive Distillation for Fast Sampling of Diffusion Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ConceptLab.html">ConceptLab</a></li>
+<li class="toctree-l1 current active"><a class="current reference internal" href="#">Diffusion Models already have a Semantic Latent Space</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Muse.html">Muse</a></li>
+
+
+<li class="toctree-l1"><a class="reference internal" href="GIGAGAN.html">Scaling up GANs for Text-to-Image Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="consistency_models.html">Consistency Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="latent_consistency_models.html">Latent Consistency Models</a></li>
+
+<li class="toctree-l1"><a class="reference internal" href="LLM_grounded_Diffusion.html">LLM Grounded Diffusion</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DiT.html">DiT</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="one-step-image-translation.html">One-Step Image Translation with Text-to-Image Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="LCM-LoRA.html">LCM-LoRA: A Universal Stable-Diffusion Acceleration Module</a></li>
+
+
+<li class="toctree-l1"><a class="reference internal" href="MimicBrush.html">MimicBrush: Zero-shot Image Editing with Reference Imitation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="one_step_diffusion_with_distribution_matching_distillation.html">One-step Diffusion with Distribution Matching Distillation</a></li>
+
+
+
+
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Video Generation</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="Make_A_Video.html">Make A Video</a></li>
+<li class="toctree-l1"><a class="reference internal" href="VideoLDM.html">VideoLDM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="AnimateDiff.html">AnimateDiff</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Animate_Anyone.html">Animate Anyone</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreaMoving.html">DreaMoving</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreamPose.html">DreamPose: Fashion Image-to-Video Synthesis via Stable Diffusion</a></li>
+
+
+
+
+
+
+
+
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">3D Generation</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="NeRF.html">NeRF : Representing Scenes as Neural Radiance Fields for View Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="3DGS.html">3D Gaussian Splatting for Real-Time Radiance Field Rendering</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Point_E.html">Point-E: A System for Generating 3D Point Clouds from Complex Prompts (Arxiv 2022)</a></li>
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Shap-E.html">Shap-E</a></li>
+
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DreamFusion.html"><strong>DreamFusion</strong></a></li>
+<li class="toctree-l1"><a class="reference internal" href="magic-3d.html">Magic3D</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreamBooth3D.html">Dream Booth 3D</a></li>
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="zero123plus.html">Zero123++</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ProlificDreamer.html">ProlificDreamer: High-Fidelity and Diverse Text-to-3D Generation with Variational Score Distillation</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DreamGaussian.html">DreamGaussian</a></li>
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Coin3D.html">Coin3D</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Experiments</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="../experiments/js_exp.html">Synthetic Data with Stable Diffusion for Foliar Disease Classification</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../experiments/swjo_exp.html">Training DreamBooth on Naver Webtoon Face Dataset</a></li>
+</ul>
+
+    </div>
+</nav></div>
+    </div>
+  
+  
+  <div class="sidebar-primary-items__end sidebar-primary__section">
+  </div>
+  
+  <div id="rtd-footer-container"></div>
+
+
+      </div>
+      
+      <main id="main-content" class="bd-main" role="main">
+        
+        
+
+<div class="sbt-scroll-pixel-helper"></div>
+
+          <div class="bd-content">
+            <div class="bd-article-container">
+              
+              <div class="bd-header-article d-print-none">
+<div class="header-article-items header-article__inner">
+  
+    <div class="header-article-items__start">
+      
+        <div class="header-article-item"><label class="sidebar-toggle primary-toggle btn btn-sm" for="__primary" title="Toggle primary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
+  <span class="fa-solid fa-bars"></span>
+</label></div>
+      
+    </div>
+  
+  
+    <div class="header-article-items__end">
+      
+        <div class="header-article-item">
+
+<div class="article-header-buttons">
+
+
+
+
+
+<div class="dropdown dropdown-source-buttons">
+  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Source repositories">
+    <i class="fab fa-github"></i>
+  </button>
+  <ul class="dropdown-menu">
+      
+      
+      
+      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion" target="_blank"
+   class="btn btn-sm btn-source-repository-button dropdown-item"
+   title="Source repository"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fab fa-github"></i>
+  </span>
+<span class="btn__text-container">Repository</span>
+</a>
+</li>
+      
+      
+      
+      
+      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion/issues/new?title=Issue%20on%20page%20%2Fdocs/review/Diffusion_models_already_have_a_Semantic_Latent_Space.html&body=Your%20issue%20content%20here." target="_blank"
+   class="btn btn-sm btn-source-issues-button dropdown-item"
+   title="Open an issue"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-lightbulb"></i>
+  </span>
+<span class="btn__text-container">Open issue</span>
+</a>
+</li>
+      
+  </ul>
+</div>
+
+
+
+
+
+
+<div class="dropdown dropdown-download-buttons">
+  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Download this page">
+    <i class="fas fa-download"></i>
+  </button>
+  <ul class="dropdown-menu">
+      
+      
+      
+      <li><a href="../../_sources/docs/review/Diffusion_models_already_have_a_Semantic_Latent_Space.md" target="_blank"
+   class="btn btn-sm btn-download-source-button dropdown-item"
+   title="Download source file"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-file"></i>
+  </span>
+<span class="btn__text-container">.md</span>
+</a>
+</li>
+      
+      
+      
+      
+      <li>
+<button onclick="window.print()"
+  class="btn btn-sm btn-download-pdf-button dropdown-item"
+  title="Print to PDF"
+  data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-file-pdf"></i>
+  </span>
+<span class="btn__text-container">.pdf</span>
+</button>
+</li>
+      
+  </ul>
+</div>
+
+
+
+
+<button onclick="toggleFullScreen()"
+  class="btn btn-sm btn-fullscreen-button"
+  title="Fullscreen mode"
+  data-bs-placement="bottom" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-expand"></i>
+  </span>
+
+</button>
+
+
+
+<script>
+document.write(`
+  <button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light"></i>
+    <i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark"></i>
+    <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"></i>
+  </button>
+`);
+</script>
+
+
+<script>
+document.write(`
+  <button class="btn btn-sm pst-navbar-icon search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <i class="fa-solid fa-magnifying-glass fa-lg"></i>
+  </button>
+`);
+</script>
+<label class="sidebar-toggle secondary-toggle btn btn-sm" for="__secondary"title="Toggle secondary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <span class="fa-solid fa-list"></span>
+</label>
+</div></div>
+      
+    </div>
+  
+</div>
+</div>
+              
+              
+
+<div id="jb-print-docs-body" class="onlyprint">
+    <h1>Diffusion Models already have a Semantic Latent Space</h1>
+    <!-- Table of contents -->
+    <div id="print-main-content">
+        <div id="jb-print-toc">
+            
+            <div>
+                <h2> Contents </h2>
+            </div>
+            <nav aria-label="Page">
+                <ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#abstract">Abstract</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">1. Introduction</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#background">2. Background</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#denoising-diffusion-probability-model-ddpm">2.1 Denoising Diffusion Probability Model(DDPM)</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#denoising-diffusion-implicit-model-ddim">2.2 Denoising Diffusion Implicit Model(DDIM)</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#image-manipulation-with-clip">2.3 Image Manipulation with CLIP</a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#discovering-semantic-latent-space-in-diffusion-models">3. Discovering Semantic Latent Space In Diffusion Models</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#problem">3.1 Problem</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#asymmetric-reverse-process-asyrp">3.2 Asymmetric Reverse Process(Asyrp)</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#h-space">3.3 h-space</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#implicit-neural-directions">3.4 Implicit Neural Directions</a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#generative-process-design">4. Generative Process Design</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#editing-process-with-asyrp">4.1 Editing Process With Asyrp</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#quality-boosting-with-stochastic-noise-injection">4.2 Quality Boosting With Stochastic Noise Injection</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#overall-process-of-image-editing">4.3 Overall Process of Image Editing</a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#experiments">5. Experiments</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#versatility-of-h-space-with-asyrp">5.1 Versatility of h-space with Asyrp</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#quantitive-comparison">5.2 Quantitive Comparison</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#analysis-on-h-space">5.3 Analysis on h-space</a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#conclusion">6. Conclusion</a></li>
+</ul>
+            </nav>
+        </div>
+    </div>
+</div>
+
+              
+                
+<div id="searchbox"></div>
+                <article class="bd-article">
+                  
+  <div class="admonition-information admonition">
+<p class="admonition-title">Information</p>
+<ul class="simple">
+<li><p><strong>Title:</strong> Diffusion Models already have a Semantic Latent Space (ICLR 2023)</p></li>
+<li><p><strong>Reference</strong></p>
+<ul>
+<li><p>Paper: <a class="reference external" href="https://arxiv.org/abs/2210.10960">https://arxiv.org/abs/2210.10960</a></p></li>
+</ul>
+</li>
+<li><p><strong>Author:</strong> Sehwan Park</p></li>
+<li><p><strong>Last updated on Nov. 18, 2023</strong></p></li>
+</ul>
+</div>
+<section class="tex2jax_ignore mathjax_ignore" id="diffusion-models-already-have-a-semantic-latent-space">
+<h1>Diffusion Models already have a Semantic Latent Space<a class="headerlink" href="#diffusion-models-already-have-a-semantic-latent-space" title="Permalink to this heading">#</a></h1>
+<section id="abstract">
+<h2>Abstract<a class="headerlink" href="#abstract" title="Permalink to this heading">#</a></h2>
+<p>Diffusion model은 많은 domain에서 좋은 성능을 보이지만 generative process를 control하는 semantic latent space가 부족하다. 논문에서는 diffusion model속에서 semantic latent space를 발견하기 위한 asymmetric reverse process(asyrp)를 제안하고 h-space라고 명칭한 semantic latent space의 좋은 특성(homogeneity, linearity, robustness, consistency across timesteps)들을 보여준다. 추가적으로 editing strength와 quality deficiency를 기준으로 삼고 더 좋은 image-image translation을 위한 Generative Process Design을 소개한다.</p>
+</section>
+<section id="introduction">
+<h2>1. Introduction<a class="headerlink" href="#introduction" title="Permalink to this heading">#</a></h2>
+<figure class="align-default" id="id1">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/figure1.1.png"><img alt="Asyrp_1" class="bg-primary mb-1" src="../../_images/figure1.1.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 431 </span><span class="caption-text">Manipulation approaches for diffusion models</span><a class="headerlink" href="#id1" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>(a) Image guidance는 unconditional한 latent variable에 guiding image의 latent variable을 합치는 방식을 사용한다. 그러나 latent variable을 둘 다 이용하면서 명확하게 control하기가 쉽지 않다.</p>
+<p>(b) Classifier guidance는 diffusion model에 classifier를 추가하여 generative process를 거치는 동안 latent variable이 어떤 class인지 분류하고 target class에 가까워지도록 score를 부여하는 방식으로 작동한다. 그러나 latent variable들에 대해 classify를 실행해야 하기에 pretrained model을 사용하기가 힘들어 직접 학습을 시켜야 하기에 시간적으로, 비용적으로 부담이 된다.</p>
+<p>(c) DiffusionCLIP</p>
+<p>(d) Diffusion Models already have a Semantic Latent Space는 original image의 특성을 edit하기 위한 아주 좋은 특성을 가지고 있는 semantic latent space를 frozen diffusion model에서 발견하였고 이를 h-space라고 칭한다. h-space에는 다양한 좋은 특성들이 존재한다. versatile editing과 quality boosting을 위해 새로운 generative process를 design하여 제안한다. h-space는 frozen pretrained diffusion model에서 semantic latent space로써의 첫 발견사례이다.</p>
+</section>
+<section id="background">
+<h2>2. Background<a class="headerlink" href="#background" title="Permalink to this heading">#</a></h2>
+<section id="denoising-diffusion-probability-model-ddpm">
+<h3>2.1 Denoising Diffusion Probability Model(DDPM)<a class="headerlink" href="#denoising-diffusion-probability-model-ddpm" title="Permalink to this heading">#</a></h3>
+<p>DDPM에서는 임의의 time step t로 부터 noise가 껴있는 image <span class="math notranslate nohighlight">\(x_t\)</span>의 <span class="math notranslate nohighlight">\(\epsilon_t\)</span>가 얼만큼인지 예측한다. 예측한 <span class="math notranslate nohighlight">\(\epsilon_t\)</span>를 이용하여 noise가 일부 제거된 이전 step의 mean(<span class="math notranslate nohighlight">\(\mu_{\theta}(x_t)\)</span>)을 구할 수 있고 variance(<span class="math notranslate nohighlight">\(\sum_{\theta}(x_t)\)</span>)는 constant한 값으로 고정시킨다. DDPM에서 제시한 forward process와 reverse process는 다음과 같다. DDPM에서의 <span class="math notranslate nohighlight">\(\sigma_t^2 = \beta_t\)</span>이다.</p>
+<div class="math notranslate nohighlight">
+\[
+q(x_t|x_{t-1}) = \mathcal{N}(x_t; \sqrt{\alpha_{t}}x_{t-1}, (1-\alpha_t)I)
+\]</div>
+<div class="math notranslate nohighlight">
+\[
+p_{\theta}(x_{t-1}|x_t) := \mathcal{N}(\mu_{\theta}(x_t), \sum_{\theta}(x_t))
+\]</div>
+<div class="math notranslate nohighlight">
+\[
+x_{t-1} = \frac{1}{\sqrt{1-\beta_t}}\bigg(x_t - \frac{\beta_t}{\sqrt{1-\alpha_t}}\epsilon_t^\theta(x_t)\bigg) + \sigma_t\mathcal{z_t}
+\]</div>
+</section>
+<section id="denoising-diffusion-implicit-model-ddim">
+<h3>2.2 Denoising Diffusion Implicit Model(DDIM)<a class="headerlink" href="#denoising-diffusion-implicit-model-ddim" title="Permalink to this heading">#</a></h3>
+<p>DDIM에서는  non-Markovian process를 이용해 또 다른 관점의 reverse process를 제시하였고, DDPM과 DDIM 모두 general하게 적용되는 Diffusion process에 대한 식을 보여주었다. <span class="math notranslate nohighlight">\(\sigma_t = \eta\sqrt{(1-\alpha_{t-1}) / (1-\alpha_t)} \sqrt{1-\alpha_t/\alpha_{t-1}}\)</span>이다.</p>
+<p><span class="math notranslate nohighlight">\(\eta\)</span>=1인 경우 DDPM이 되고 stochastic해지며,  <span class="math notranslate nohighlight">\(\eta\)</span>=0인 경우 DDIM이 되고 deterministic해진다.</p>
+<div class="math notranslate nohighlight">
+\[
+q_{\sigma}(x_{t-1}|x_t,x_0) = \mathcal{N}(\sqrt{\alpha_{t-1}}x_0 + \sqrt{1-\alpha_{t-1}-\sigma_t^2} \cdot \cfrac{x_t - \sqrt{\alpha_t}x_0}{\sqrt{1-\alpha_t}}, \sigma_t^2I)
+\]</div>
+<div class="math notranslate nohighlight">
+\[
+x_{t-1} = \sqrt{\alpha_{t-1}}\underbrace{\bigg(\frac{x_t - \sqrt{1-\alpha_t}\epsilon_t^\theta(x_t)}{\sqrt{\alpha_t}}\bigg)}_{\textrm{predicted } x_0} + \underbrace{\sqrt{1-\alpha_{t-1}-\sigma_t^2}\cdot \epsilon_t^\theta(x_t) }_{\textrm{direction pointing to }x_t} + \sigma_t\mathcal{z_t}
+\]</div>
+</section>
+<section id="image-manipulation-with-clip">
+<h3>2.3 Image Manipulation with CLIP<a class="headerlink" href="#image-manipulation-with-clip" title="Permalink to this heading">#</a></h3>
+<p>CLIP은 Image Encoder와 Text Encoder를 이용하여 image와 text간의 embedding을 학습한다. 편집된 이미지와 대상 설명 간의 cosine distance를 직접 최소화하는 대신 cosine distance를 사용한 directional loss를 사용하여 mode collapse없이 균일한 editing을 가능하게 했다고 한다.</p>
+<p><span class="math notranslate nohighlight">\(\Delta T = \mathrm{E}_T(y^{target}) - \mathrm{E}_T(y^{source}) \)</span><br/><span class="math notranslate nohighlight">\(\Delta I = \mathrm{E}_I(x^{edit}) - \mathrm{E}_I(x^{source})\)</span></p>
+<div class="math notranslate nohighlight">
+\[
+\mathcal{L}_{direction} (x^{edit}, y^{target};x^{source},y^{source}) := 1 - \cfrac{\Delta I \cdot \Delta T}{\parallel\Delta I\parallel \parallel\Delta T\parallel}
+\]</div>
+</section>
+</section>
+<section id="discovering-semantic-latent-space-in-diffusion-models">
+<h2>3. Discovering Semantic Latent Space In Diffusion Models<a class="headerlink" href="#discovering-semantic-latent-space-in-diffusion-models" title="Permalink to this heading">#</a></h2>
+<p>Editiing을 하는 과정에서 naive approach를 통해서는 editing이 잘 이루어지지 않는다. 이 chapter에서는 왜 잘 이루어지지 않는지에 대한 설명을 하고 이를 해결하는 새로운 controllable한 한 reverse process인  Asymmetric Reverse Process(Asyrp)를 제안한다.</p>
+<p>DDIM에서 <span class="math notranslate nohighlight">\(x_{t-1}\)</span>에 대한 수식을 설명하였는데 이 chapter부터는 “predicted <span class="math notranslate nohighlight">\(x_0\)</span>”부분을 <span class="math notranslate nohighlight">\(\mathrm{P}_t(\epsilon_t^{\theta}(x_t))\)</span> 즉 <span class="math notranslate nohighlight">\(\mathrm{P}_t\)</span>라고 설정하고, “direction pointing to <span class="math notranslate nohighlight">\(x_t\)</span>”부분을 <span class="math notranslate nohighlight">\(\mathrm{D}_t(\epsilon_t^{\theta}(x_t))\)</span> 즉 <span class="math notranslate nohighlight">\(\mathrm{D}_t\)</span>라고 설정하였다.</p>
+<p><span class="math notranslate nohighlight">\(\mathrm{P}_t\)</span>는 latent variable로 부터 <span class="math notranslate nohighlight">\(x_0\)</span>를 예측하는 reverse process와 같은 역할을 담당하고 <span class="math notranslate nohighlight">\(\mathrm{D}_t\)</span>는 다시 noise를 추가해 latent variable로 돌아가기에 forward process와 같은 역할을 담당한다.</p>
+<div class="math notranslate nohighlight">
+\[
+x_{t-1} = \sqrt{\alpha_{t-1}}\underbrace{\bigg(\frac{x_t - \sqrt{1-\alpha_t}\epsilon_t^\theta(x_t)}{\sqrt{\alpha_t}}\bigg)}_{\mathrm{P}_t(\epsilon_t^{\theta}(x_t))} + \underbrace{\sqrt{1-\alpha_{t-1}-\sigma_t^2}\cdot \epsilon_t^\theta(x_t) }_{\mathrm{D}_t(\epsilon_t^{\theta}(x_t))} + \sigma_t\mathcal{z_t}
+\]</div>
+<div class="math notranslate nohighlight">
+\[
+x_{t-1} = \sqrt{\alpha_{t-1}}\mathrm{P}_t(\epsilon_t^{\theta}(x_t)) + \mathrm{D}_t(\epsilon_t^{\theta}(x_t)) + \sigma_t\mathcal{z_t}
+\]</div>
+<section id="problem">
+<h3>3.1 Problem<a class="headerlink" href="#problem" title="Permalink to this heading">#</a></h3>
+<p><span class="math notranslate nohighlight">\(x_T\)</span>로 부터 생성된 image <span class="math notranslate nohighlight">\(x_0\)</span>를 given text prompts에 맞게 manipulate시키는 가장 간단한 방법은 2.3에서 소개한 <span class="math notranslate nohighlight">\(\mathcal{L}_{direction}\)</span>을 optimize하도록 <span class="math notranslate nohighlight">\(x_T\)</span>를 update하는 것이다. 하지만 이 방법은  distorted images를 생성하거나 부정확한 manipulation을 한다고 한다.</p>
+<p>이에 대한 대안으로, 모든 sampling step에서 원하는 방향으로 manipulate하도록 <span class="math notranslate nohighlight">\(\epsilon_t^{\theta}\)</span>를 shift해주는 방법이 제시되었다. 하지만 이 방법은 <span class="math notranslate nohighlight">\(x_0\)</span>를 완전히 manipulate하지 못한다. 왜냐하면 <span class="math notranslate nohighlight">\(\mathrm{P}_t\)</span>와 <span class="math notranslate nohighlight">\(\mathrm{D}_t\)</span>에서 둘다 shifted된 <span class="math notranslate nohighlight">\(\tilde{\epsilon}_t^{\theta}\)</span>를 사용하기에 cancel out되어 결국 latent variable에서는 기존과 다름이 없다는 것이다. 자세한 증명은 Proof of Theroem을 보면 된다.</p>
+<details>
+  <summary>Proof of Theroem)</summary>
+<p>Define <span class="math notranslate nohighlight">\(\alpha_t = \prod_{s=1}^t(1 - \beta_s)\)</span>, <span class="math notranslate nohighlight">\(\tilde{x}_{t-1} = \sqrt{\alpha_{t-1}}\mathrm{P}_t(\tilde{\epsilon}_t^{\theta}(x_t)) + \mathrm{D}_t(\tilde{\epsilon}_t^{\theta}(x_t)) + \sigma_t\mathcal{z_t}\)</span></p>
+<p>= <span class="math notranslate nohighlight">\(\sqrt{\alpha_{t-1}}\underbrace{\bigg(\cfrac{x_t - \sqrt{1-\alpha_t}(\epsilon_t^\theta(x_t) + \Delta \epsilon_t)}{\sqrt{\alpha_t}}\bigg)}_{\mathrm{P}_t(\tilde{\epsilon}_t^{\theta})} + \underbrace{\sqrt{1-\alpha_{t-1}-\sigma_t^2}\cdot (\epsilon_t^\theta(x_t) + \Delta \epsilon_t) }_{\mathrm{D}_t(\tilde{\epsilon}_t^{\theta})} + \sigma_t\mathcal{z_t}\)</span></p>
+<p>= <span class="math notranslate nohighlight">\(\sqrt{\alpha_{t-1}}\mathrm{P}_t(\epsilon_t^\theta(x_t)) + \mathrm{D}_t(\epsilon_t^\theta(x_t)) - \cfrac{\sqrt{\alpha_{t-1}}\sqrt{1-\alpha_t}}{\sqrt{\alpha_t}} \cdot \Delta \epsilon_t + \sqrt{1-\alpha_{t-1}} \cdot \Delta \epsilon_t\)</span></p>
+<p><span class="math notranslate nohighlight">\(\sqrt{\alpha_{t-1}}\mathrm{P}_t(\epsilon_t^\theta(x_t)) + \mathrm{D}_t(\epsilon_t^\theta(x_t))\)</span>는 기존 DDIM에서의 <span class="math notranslate nohighlight">\(x_{t-1}\)</span>에 대한 식이고 위 식의 <span class="math notranslate nohighlight">\(\Delta \epsilon_t\)</span>항만 따로 묶어서 표현하면 아래와 같다.</p>
+<p>= <span class="math notranslate nohighlight">\(x_{t-1} + \bigg( -\cfrac{\sqrt{1-\alpha_t}}{\sqrt{1-\beta_t}} + \sqrt{1-\alpha_{t-1}} \bigg) \cdot \Delta \epsilon_t \)</span></p>
+<p>= <span class="math notranslate nohighlight">\(x_{t-1} + \bigg( -\cfrac{\sqrt{1-\alpha_t}}{\sqrt{1-\beta_t}} + \cfrac{\sqrt{1-\prod_{s=1}^{t-1}(1-\beta_s)}\sqrt{1-\beta_t}}{\sqrt{1-\beta_t}} \bigg) \cdot \Delta \epsilon_t \)</span></p>
+<p><span class="math notranslate nohighlight">\({\sqrt{1-\prod_{s=1}^{t-1}(1-\beta_s)}\sqrt{1-\beta_t}}\)</span>를 root를 묶어서 내부를 계산하면 <span class="math notranslate nohighlight">\(\sqrt{1-\alpha_t-\beta_t}\)</span>이므로 정리하면 아래와 같다.</p>
+<p>= <span class="math notranslate nohighlight">\(x_{t-1} + \bigg( \cfrac{\sqrt{1-\alpha_t-\beta_t} - \sqrt{1-\alpha_t}}{\sqrt{1-\beta_t}} \bigg) \cdot \Delta \epsilon_t \)</span></p>
+<p><span class="math notranslate nohighlight">\(\therefore \Delta x_t = \tilde{x_{t-1}} - x_{t-1} = \cfrac{\sqrt{1-\alpha_t-\beta_t} - \sqrt{1-\alpha_t}}{\sqrt{1-\beta_t}} \bigg) \cdot \Delta \epsilon_t\)</span></p>
+<p>shifted epsilon을 사용한 결과이다. 분자를 보면  <span class="math notranslate nohighlight">\(\beta_t\)</span>는 매우 작기에 거의 0에 수렴하기에 결국 차이가 거의 없음을 보인다. <br/> 즉 <span class="math notranslate nohighlight">\(\epsilon\)</span>-space에서의 manipulation 효과는 매우 좋지 않음을 알 수 있다.</p>
+</details>
+<figure class="align-default" id="id2">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/figure3.3.png"><img alt="Asyrp_2" class="bg-primary mb-1" src="../../_images/figure3.3.png" style="width: 500px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 432 </span><span class="caption-text">No Manipulation Effect with shifted epsilon</span><a class="headerlink" href="#id2" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</section>
+<section id="asymmetric-reverse-process-asyrp">
+<h3>3.2 Asymmetric Reverse Process(Asyrp)<a class="headerlink" href="#asymmetric-reverse-process-asyrp" title="Permalink to this heading">#</a></h3>
+<p>chapter 3.1에서 <span class="math notranslate nohighlight">\(\epsilon\)</span>-space에서의 문제를 해결하기 위해 저자들은 Asyrp를 제안한다. 이름 그대로 비대칭적인 방법을 사용한다는 것인데 <span class="math notranslate nohighlight">\(x_0\)</span>를 예측하는 <span class="math notranslate nohighlight">\(\mathrm{P}_t\)</span>에서는 shifted epsilon을 사용하고,  latent variable로 돌아가는 <span class="math notranslate nohighlight">\(\mathrm{D}_t\)</span>에서는 non-shifted epsilon을 사용해서 전체적인 변화를 준다는 것이다. 즉, <span class="math notranslate nohighlight">\(\mathrm{P}_t\)</span>만modify하고 <span class="math notranslate nohighlight">\(\mathrm{D}_t\)</span>는 유지한다. Asyrp를 식으로 표현하면 다음과 같다.</p>
+<div class="math notranslate nohighlight">
+\[
+x_{t-1} = \sqrt{\alpha_{t-1}}\mathrm{P}_t(\tilde{\epsilon}_t^{\theta}(x_t)) + \mathrm{D}_t(\epsilon_t^{\theta}(x_t))
+\]</div>
+<p>Loss식 또한 chapter 2.3에서 제시한 <span class="math notranslate nohighlight">\(\mathcal{L}_{direction}\)</span>을 사용하여 재구성하였다. modify를 하지 않은 <span class="math notranslate nohighlight">\(\mathrm{P}_t^{source}\)</span>와 modifiy를 한 <span class="math notranslate nohighlight">\(\mathrm{P}_t^{edit}\)</span>을 사용한다. Loss식은 다음과 같다.</p>
+<div class="math notranslate nohighlight">
+\[
+\mathcal{L}^{(t)} = \lambda_{CLIP}(\mathrm{P}_t^{edit}, y^{ref};\mathrm{P}_t^{source},y^{source}) + \lambda_{recon}|\mathrm{P}_t^{edit} - \mathrm{P}_t^{source}|
+\]</div>
+<p>전체적인 reverse process는 다음과 같이 설계가 되었다. 이제 shifted epsilon인 <span class="math notranslate nohighlight">\(\tilde{\epsilon}_t^{\theta}(x_t)\)</span>를 어떤 방식으로 얻을 것인지에 대한 설계가 필요하다. 저자들은 기존의 <span class="math notranslate nohighlight">\(\epsilon\)</span>-space에서 변화를 주는 것보다 훨씬 더 좋은 result를 보이고, nice properties를 가지는 h-space에서 변화를 주는 것을 제안한다.</p>
+</section>
+<section id="h-space">
+<h3>3.3 h-space<a class="headerlink" href="#h-space" title="Permalink to this heading">#</a></h3>
+<p><span class="math notranslate nohighlight">\(\epsilon_t^{\theta}\)</span>는 diffusion models의 backbone인  U-Net에서 도출된다. 이 논문에서는 Image manipulation을 위해 <span class="math notranslate nohighlight">\(\epsilon_t^{\theta}\)</span>를 control하는 space를 U-Net의 bottleneck 즉, 가장 깊은 feature map인 <span class="math notranslate nohighlight">\(h_t\)</span>로 정하였다. 이를 h-space라고 부른다. h-space는 <span class="math notranslate nohighlight">\(\epsilon\)</span>-space보다 더 작은 spatial resolutions을 가지고 high-level semantic를 가진다. 또한 <span class="math notranslate nohighlight">\(\epsilon\)</span>-space에서는 발견할 수 없는 매우 nice한 특성들을 가지고 있다.</p>
+<figure class="align-default" id="id3">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/figure3.9.png"><img alt="Asyrp_3" class="bg-primary mb-1" src="../../_images/figure3.9.png" style="width: 500px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 433 </span><span class="caption-text">U-Net structure and h-space</span><a class="headerlink" href="#id3" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>h-space의 크기는 <span class="math notranslate nohighlight">\(8^2\times512\)</span>이고 <span class="math notranslate nohighlight">\(\epsilon\)</span>-space의 크기는 <span class="math notranslate nohighlight">\(256^2\times3\)</span>으로 h-space에서의 control이 더 지배적이고 robust함을 추측할 수 있다(실제 실험적으로 증명을 함). h-space는 skip-connection의 영향을 받지 않으며 가장 압축된 정보를 가지고 있는 공간이며 image를 control하는데에 있어 매우 좋은 특성들을 가지고 있다. 실제 저자들은 h-space를 지정하기 위해 U-Net의 모든 feature map을 h-space로 설정해두고 실험을 해보았는데 위의 그림을 기준으로 8th layer이전의 feature map을 h-space로 지정한 경우에는 manipulaton이 적게 이루어졌고, 8th layer 이후의 feature map을 h-space로 지정한 경우에는 너무 과한 manipulation이 이루어지거나 아예 distorted image가 생성되었다. h-space만의 특성은 chapter5에서 설명한다.</p>
+</section>
+<section id="implicit-neural-directions">
+<h3>3.4 Implicit Neural Directions<a class="headerlink" href="#implicit-neural-directions" title="Permalink to this heading">#</a></h3>
+<figure class="align-default" id="id4">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/figure3.10.png"><img alt="Asyrp_4" class="bg-primary mb-1" src="../../_images/figure3.10.png" style="width: 600px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 434 </span><span class="caption-text">Illustration of <span class="math notranslate nohighlight">\(\mathrm{f}(t)\)</span></span><a class="headerlink" href="#id4" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p><span class="math notranslate nohighlight">\(\Delta h_t\)</span>가 image를 manipulating하는데 성공했음에도, 수많은 timestep에서 매번 optimizing하기란 쉽지 않다. 대신에 논문에서는 <span class="math notranslate nohighlight">\(h_t\)</span>를 입력받아 <span class="math notranslate nohighlight">\(\Delta h\)</span>를 출력해주는 작은 neural network인 <span class="math notranslate nohighlight">\(\mathrm{f}(t)\)</span>를 추가하였다. <span class="math notranslate nohighlight">\(\mathrm{f}(t)\)</span>는 <span class="math notranslate nohighlight">\(\Delta h_t\)</span>를 매번 모든 timestep에서 optimizing해줘야 하는 방법에 비해 시간도 빠르고 setting값들에 대해 robust하다. 또한 주어진 timestep과 bottleneck feature인 <span class="math notranslate nohighlight">\(h_t\)</span>에 대해 <span class="math notranslate nohighlight">\(\Delta h_t\)</span>를 출력하는 방법을 학습하기에 unseen timestep과 bottleneck feature에 대해서도 일반화할 수 있다고 한다. 이는 accelerated한 과정에서도 큰 효과를 본다. training scheme이 어떻든 간에 결국 부여하는 <span class="math notranslate nohighlight">\(\sum\Delta\mathrm{h_t}\)</span>만 보존된다면, 어떠한 length를 설계해도 비슷한 manipulation효과를 볼 수 있다.</p>
+<p>h-space에서 epsilon을 control해서 asyrp 이용하는 식은 다음과 같다. 이해를 위해 <span class="math notranslate nohighlight">\(\epsilon\)</span>-space와  h-space에서의 shifted epsilon <span class="math notranslate nohighlight">\(\tilde{\epsilon}_t^{\theta}(x_t)\)</span>을 비교하였다.</p>
+<ul>
+<li><p><span class="math notranslate nohighlight">\(\epsilon\)</span>-space에서의 shifted epsilon</p>
+<p><span class="math notranslate nohighlight">\(\tilde{\epsilon}_t^{\theta}(x_t) = \epsilon_t^{\theta}(x_t) + \Delta \epsilon_t\)</span></p>
+</li>
+<li><p>h-space에서의 shifted epsilon</p>
+<p><span class="math notranslate nohighlight">\(\tilde{\epsilon}_t^{\theta}(x_t) = \epsilon_t^{\theta}(x_t | \Delta h_t)\)</span></p>
+</li>
+</ul>
+<div class="math notranslate nohighlight">
+\[
+x_{t-1} = \sqrt{\alpha_{t-1}}\mathrm{P}_t(\epsilon_t^{\theta}(x_t | \Delta h_t)) + \mathrm{D}_t(\epsilon_t^{\theta}(x_t))
+\]</div>
+<figure class="align-default" id="id5">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/figure3.8.png"><img alt="Asyrp_5" class="bg-primary mb-1" src="../../_images/figure3.8.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 435 </span><span class="caption-text">Asymmetric Reverse Process</span><a class="headerlink" href="#id5" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</section>
+</section>
+<section id="generative-process-design">
+<h2>4. Generative Process Design<a class="headerlink" href="#generative-process-design" title="Permalink to this heading">#</a></h2>
+<figure class="align-default" id="id6">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/figure4.1.png"><img alt="Asyrp_6" class="bg-primary mb-1" src="../../_images/figure4.1.png" style="width: 600px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 436 </span><span class="caption-text">Intuition for choosing the intervals for editing and quality boosting</span><a class="headerlink" href="#id6" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>Perception prioritized training of diffusion models(Choi et al)에서는 Diffusion model이 early stage에서는 high-level context를 generate하고, later stage에서는 imperceptible fine details를 generate한다고 제안한다. 본 논문에서는 early stage에서 editing을 진행하는 editing process와 later stage에서 imperceptible fine details를 진행하는 quality boosting을 위한 구간을 나눠서 새로운 Generative Process Design을 제시한다.</p>
+<section id="editing-process-with-asyrp">
+<h3>4.1 Editing Process With Asyrp<a class="headerlink" href="#editing-process-with-asyrp" title="Permalink to this heading">#</a></h3>
+<p>Editing Process에서는 high-level context가 generate되어야 하므로 전체 timestep[0,T]에서 Editing Process를 위한 editing interval을 [T, <span class="math notranslate nohighlight">\(t_{edit}\)</span>]으로 설정하였다. <span class="math notranslate nohighlight">\(t_{edit}\)</span>의 시점을 결정하기 위해 LPIPS 측정지표를 이용한다. LPIPS(<span class="math notranslate nohighlight">\(\mathrm{x}, \mathrm{P}_t\)</span>)는 t시점에서 예측한 <span class="math notranslate nohighlight">\(x_0\)</span>와 target이 되는 original image간의 perceptual distance를 계산한다. 따라서 LPIPS를 남은 reverse process을 통해 editing 해야 할 구성요소를 측정하는 지표라고 볼 수도 있다. 첫 step T의 LPIPS로 부터 <span class="math notranslate nohighlight">\(t_{edit}\)</span>시점에서의 LPIPS 차이는 Editing Process에서 얼만큼의 perceptual change를 주었는지를 나타낸다. 이 값을 editing strength(<span class="math notranslate nohighlight">\(\epsilon_t\)</span>)라고 정의한다.</p>
+<div class="math notranslate nohighlight">
+\[
+\xi_t = \mathrm{LPIPS}(x, \mathrm{P}_T) - \mathrm{LPIPS}(x, \mathrm{P}_t)
+\]</div>
+<p>Editing interval이 작으면 <span class="math notranslate nohighlight">\(\xi_t\)</span>가 작아지며 변화가 많이 일어나지 않고 반면, Editing interval이 크면 <span class="math notranslate nohighlight">\(\xi_t\)</span>가 커지고 변화가 많이 일어난다. 따라서 충분한 변화를 줄 수 있는 한에서 가장 최소의 Editing interval을 찾는 것이 <span class="math notranslate nohighlight">\(t_{edit}\)</span>을 결정하는 최고의 방법이다. 저자들은 실험적인 결과를 통해 <span class="math notranslate nohighlight">\(\mathrm{LPIPS}(x, \mathrm{P}_t)\)</span> = 0.33인 t시점을 <span class="math notranslate nohighlight">\(t_{edit}\)</span>으로 결정하였다.</p>
+<figure class="align-default" id="id7">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/figure4.6.png"><img alt="Asyrp_7" class="bg-primary mb-1" src="../../_images/figure4.6.png" style="width: 600px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 437 </span><span class="caption-text">Results based on various <span class="math notranslate nohighlight">\(\mathrm{LPIPS}(x, \mathrm{P}_{t_{edit}})\)</span></span><a class="headerlink" href="#id7" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<figure class="align-default" id="id8">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/figure4.7.png"><img alt="Asyrp_8" class="bg-primary mb-1" src="../../_images/figure4.7.png" style="width: 600px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 438 </span><span class="caption-text">Importance of choosing proper <span class="math notranslate nohighlight">\(t_{edit}\)</span></span><a class="headerlink" href="#id8" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>몇몇 특성들은 다른 특성들에 비해 visual change를 많이 필요로 하는 경우도 있다. 예를 들어 source image에 대해 smile한 attribute를 추가하는 경우보다 pixar style의 attribute을 추가하는 경우가 더 많은 visual change를 필요로 한다. 이러한 경우에는 Editing interval을 더 길게 설정해야 한다. 이러한 경우에는 <span class="math notranslate nohighlight">\(\mathrm{LPIPS}(x, \mathrm{P}_t)\)</span> = 0.33 - <span class="math notranslate nohighlight">\(\delta\)</span>를 만족하는 t를 <span class="math notranslate nohighlight">\(t_{edit}\)</span>으로 설정한다. 이 때, <span class="math notranslate nohighlight">\(\delta = 0.33d(\mathrm{E}_T(y_{source}), \mathrm{E}_T(y_{target}))\)</span>이다. <span class="math notranslate nohighlight">\(\mathrm{E}_T\)</span>는 CLIP text embedding을 진행하는 Text Encoder를 의미하며, d는 cosine distance를 의미한다. 아래 그림을 통해 더 많은 visual change를 요구하는 attributes에 대해서는 <span class="math notranslate nohighlight">\(t_{edit}\)</span>이 더 작음(Editing Interval이 김)을 알 수 있다.</p>
+<figure class="align-default" id="id9">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/figure4.10.png"><img alt="Asyrp_9" class="bg-primary mb-1" src="../../_images/figure4.10.png" style="width: 500px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 439 </span><span class="caption-text">Flexible <span class="math notranslate nohighlight">\(t_{edit}\)</span> based on the amount of visual changes.</span><a class="headerlink" href="#id9" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</section>
+<section id="quality-boosting-with-stochastic-noise-injection">
+<h3>4.2 Quality Boosting With Stochastic Noise Injection<a class="headerlink" href="#quality-boosting-with-stochastic-noise-injection" title="Permalink to this heading">#</a></h3>
+<p>DDIM은 <span class="math notranslate nohighlight">\(\eta\)</span>=0으로 설정하며 stochasticity를 제거하여 거의 완벽한 inversion을 가능케 하였다. Elucidating the design space of diffusionbased generative models(Karras et al.)에서는 stochasticity가 image quality를 증가시킨다고 증명하였다. 이에 따라 본 논문에서는 Generative Process에 stochastic noise를 주입하는 quality boosting 단계를 설정하고 boosting interval은 [<span class="math notranslate nohighlight">\(t_{boost}\)</span>, 0]이다.</p>
+<p>Boosting Interval에 따라 image quality를 control할 수 있는데, Boosting Interval이 길게되면, Quality는 증가하지만 Interval동안 계속해서 stochastic noise를 주입해야 하기에 content가 변하는 문제가 발생할 수도 있다. 따라서 충분한 quality boosting을 달성하면서도 content에 최소한의 변화만을 줄 수 있도록  <span class="math notranslate nohighlight">\(t_{boost}\)</span>를 설정하는 것이 중요하다. 저자들은 image에 껴있는 noise를 quality boosting을 통해 해결해야 할 부분으로 보았으며 target이 되는 original image로 부터 t시점의 image <span class="math notranslate nohighlight">\(x_t\)</span>에 얼만큼의 noise가 껴있는지에 대한 지표로 quality deficiency <span class="math notranslate nohighlight">\(\gamma_t\)</span>를 이용한다.</p>
+<div class="math notranslate nohighlight">
+\[
+\gamma_t = \mathrm{LPIPS}(x, x_t)
+\]</div>
+<p>여기서는 editing strength와는 다르게 time step에 따라 예측한 <span class="math notranslate nohighlight">\(x_0\)</span>인 <span class="math notranslate nohighlight">\(\mathrm{P}_t\)</span>가 아닌 latent variable <span class="math notranslate nohighlight">\(x_t\)</span>를 이용한다. 저자들은 noise를 판단하는데에 있어서 semantics보다는 actual image를 고려했기에 위와 같이 설정하였다고 한다. 저자들은 실험적인 결과를 통해  <span class="math notranslate nohighlight">\(\gamma_t\)</span> = 1.2인 t시점을 <span class="math notranslate nohighlight">\(t_{boost}\)</span>로 설정하였다.</p>
+<figure class="align-default" id="id10">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/figure4.8.png"><img alt="Asyrp_10" class="bg-primary mb-1" src="../../_images/figure4.8.png" style="width: 600px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 440 </span><span class="caption-text">Results based on various <span class="math notranslate nohighlight">\(\gamma_{t_{boost}}\)</span></span><a class="headerlink" href="#id10" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<figure class="align-default" id="id11">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/figure4.9.png"><img alt="Asyrp_11" class="bg-primary mb-1" src="../../_images/figure4.9.png" style="width: 500px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 441 </span><span class="caption-text">Quality comparison based on the presence of quality boosting</span><a class="headerlink" href="#id11" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</section>
+<section id="overall-process-of-image-editing">
+<h3>4.3 Overall Process of Image Editing<a class="headerlink" href="#overall-process-of-image-editing" title="Permalink to this heading">#</a></h3>
+<p>General한 Diffusion model에서의 Generative Process를 표현하면 다음과 같다.</p>
+<div class="math notranslate nohighlight">
+\[
+x_{t-1} = \sqrt{\alpha_{t-1}}\mathrm{P}_t(\epsilon_t^{\theta}) + \mathrm{D}_t(\epsilon_t^{\theta}) + \sigma_t\mathcal{z}_t\bigg(where, \sigma_t = \eta\sqrt{(1-\alpha_{t-1}) / (1-\alpha_t)} \sqrt{1-\alpha_t/\alpha_{t-1}}\bigg)
+\]</div>
+<p><span class="math notranslate nohighlight">\(\eta\)</span> = 0인 경우에는 DDIM이 되며, stochastic noise를 더하는 부분이 사라져 deterministic해진다. <span class="math notranslate nohighlight">\(\eta\)</span> = 1인 경우에는 DDPM이 되며, stochastic한 특성이 있다. Asyrp(Assymetric Reverse Process)에서는 기본적으로 DDIM을 사용하며 <span class="math notranslate nohighlight">\(\mathrm{P}_t\)</span>에서 h-space를 통해 control된 <span class="math notranslate nohighlight">\(\epsilon_t^{\theta}(x_t|f_t)\)</span>를 사용한다. Diffusion Models already have a Semantic Latent Space에서 제시한 Generative Process를 전체적으로 정리하면 다음과 같다.</p>
+<figure class="align-default" id="id12">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/figure4.11.png"><img alt="Asyrp_12" class="bg-primary mb-1" src="../../_images/figure4.11.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 442 </span><span class="caption-text">Quality comparison based on the presence of quality boosting</span><a class="headerlink" href="#id12" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>처음부터 <span class="math notranslate nohighlight">\(t_{edit}\)</span>시점까지는 Asyrp를 이용해 Editing Process를 진행한다. 이 후 DDIM 방식을 통해 Denoising을 진행하다가 <span class="math notranslate nohighlight">\(t_{boost}\)</span>시점부터 끝날 때까지 stochastic noise를 주입하는 DDPM 방식을 이용해 Quality boosting을 진행한다.</p>
+<figure class="align-default" id="id13">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/figure4.12.png"><img alt="Asyrp_13" class="bg-primary mb-1" src="../../_images/figure4.12.png" style="width: 600px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 443 </span><span class="caption-text">Overview of Generative Process</span><a class="headerlink" href="#id13" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</section>
+</section>
+<section id="experiments">
+<h2>5. Experiments<a class="headerlink" href="#experiments" title="Permalink to this heading">#</a></h2>
+<p>CelebA-HQ (Karras et al., 2018) 및 LSUN-bedroom/-church (Yu et al., 2015) 데이터셋에서 DDPM++ (Song et al., 2020b) (Meng et al., 2021); AFHQ-dog (Choi et al., 2020) 데이터셋에서 iDDPM (Nichol &amp; Dhariwal, 2021); 그리고 METFACES (Karras et al., 2020) 데이터셋에서 ADM with P2-weighting (Dhariwal &amp; Nichol, 2021) (Choi et al., 2022)을 사용해 각각 학습시켰다고 한다. 모든 model들은 pretrained checkpoint를 활용했으며 frozen상태를 유지시켰다고 한다.</p>
+<section id="versatility-of-h-space-with-asyrp">
+<h3>5.1 Versatility of h-space with Asyrp<a class="headerlink" href="#versatility-of-h-space-with-asyrp" title="Permalink to this heading">#</a></h3>
+<figure class="align-default" id="id14">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/figure5.6.png"><img alt="Asyrp_14" class="bg-primary mb-1" src="../../_images/figure5.6.png" style="width: 500px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 444 </span><span class="caption-text">Editing results of Asyrp on various datasets</span><a class="headerlink" href="#id14" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>위의 그림을 보면, 논문에서는 다양한 attribute들의 특성을 잘 반영해서 image를 manipulate했다는 점을 알 수 있다. 심지어 {department, factory, temple} attribute은 training data에 포함이 되어있지 않았음에도 성능이 잘 나온 점을 확인할 수 있다. model을 fine tuning하지 않고 inference하는 과정에서 h-space를 통해 epsilon을 control하고 Asyrp를 이용해 성능을 냈다는 점이 가장 큰 장점이다.</p>
+</section>
+<section id="quantitive-comparison">
+<h3>5.2 Quantitive Comparison<a class="headerlink" href="#quantitive-comparison" title="Permalink to this heading">#</a></h3>
+<p>Asyrp model의 결과를 다른 model들과 비교하는 실험을 진행하였는데 diffusion model 전체를 fine-tuning하여 image을 editing하는 DiffsionCLIP model과 비교하였다. Asyrp의 성능이 더 좋음을 확인 할 수 있다.</p>
+<figure class="align-default" id="id15">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/figure5.7.png"><img alt="Asyrp_15" class="bg-primary mb-1" src="../../_images/figure5.7.png" style="width: 500px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 445 </span><span class="caption-text">Asyrp vs DiffusionCLIP on both CelebA-HQ seen-domain attributes and unseen-domain attributes</span><a class="headerlink" href="#id15" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</section>
+<section id="analysis-on-h-space">
+<h3>5.3 Analysis on h-space<a class="headerlink" href="#analysis-on-h-space" title="Permalink to this heading">#</a></h3>
+<ol class="arabic">
+<li><p><strong>Homogeneity</strong></p>
+<figure class="align-default" id="id16">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/figure5.1.png"><img alt="Asyrp_16" class="bg-primary mb-1" src="../../_images/figure5.1.png" style="width: 600px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 446 </span><span class="caption-text">Homogeneity of h-space</span><a class="headerlink" href="#id16" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>위의 그림의 (a)는 Real image에 smiling attribute을 추가하기 위해 최적화된 <span class="math notranslate nohighlight">\(\Delta h_t\)</span>와 <span class="math notranslate nohighlight">\(\Delta \epsilon_t\)</span>를 나타낸다. 같은 값을 다른 Real image에 적용시켰을 때의 결과를 (b)에 나타내었는데,  <span class="math notranslate nohighlight">\(\Delta h_t\)</span>를 적용한경우 smiling face로 잘 바뀌는 반면, <span class="math notranslate nohighlight">\(\Delta \epsilon_t\)</span>을 적용한 경우에는 image distortion이 발생함을 알 수 있다.</p>
+</li>
+<li><p><strong>Linearity</strong></p>
+<figure class="align-default" id="id17">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/figure5.2.png"><img alt="Asyrp_17" class="bg-primary mb-1" src="../../_images/figure5.2.png" style="width: 600px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 447 </span><span class="caption-text">Linearity of h-space - Linear Scaling</span><a class="headerlink" href="#id17" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p><span class="math notranslate nohighlight">\(\Delta_h\)</span>를 linearly scaling을 하는 것은 editing을 하는데에 있어 visual attribute change의 양에 반영된다. 즉, <span class="math notranslate nohighlight">\(\Delta_h\)</span>를 <span class="math notranslate nohighlight">\(\times\)</span>1, <span class="math notranslate nohighlight">\(\times\)</span>2, <span class="math notranslate nohighlight">\(\times\)</span>3배 <span class="math notranslate nohighlight">\(/dots\)</span> 함에 따라 result image에서 반영되는 attribute또한 이에 맞게 변화한다는 것이다. 위의 그림에서 표현되어 있듯이 negative scaling에 대해서는 training을 하지 않았음에도 잘 적용 된다는 점을 알 수 있다.</p>
+<figure class="align-default" id="id18">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/figure5.3.png"><img alt="Asyrp_17" class="bg-primary mb-1" src="../../_images/figure5.3.png" style="width: 600px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 448 </span><span class="caption-text">Linearity of h-space - Linear Combination</span><a class="headerlink" href="#id18" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>서로 다른 attributes에 대한 <span class="math notranslate nohighlight">\(\Delta_h\)</span>를 합쳐서 부여를 했을 경우에도 각각의 attribute들이 image에 잘 반영이 된다는 점을 알 수 있다.</p>
+</li>
+<li><p><strong>Robustness</strong></p>
+<figure class="align-default" id="id19">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/figure5.4.png"><img alt="Asyrp_17" class="bg-primary mb-1" src="../../_images/figure5.4.png" style="width: 600px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 449 </span><span class="caption-text">Robustness of h-space</span><a class="headerlink" href="#id19" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>위의 그림은 h-space와 <span class="math notranslate nohighlight">\(\epsilon-space\)</span>에서 random noise를 주입했을 때의 결과를 비교한 것이다. h-space의 경우에는 random noise가 추가되었어도 image에 큰 변화가 없으며 많은 noise가 추가되었을 경우에도 image distortion은 거의 없고 semantic change만 발생한다. 그러나 <span class="math notranslate nohighlight">\(\epsilon-space\)</span>의 경우에는 random noise가 추가된 경우 image distortion이 심하게 발생한다. 이를 통해 h-space가 얼마나 robustness한지 알 수 있다.</p>
+</li>
+<li><p><strong>Consistency across time steps</strong></p>
+<figure class="align-default" id="id20">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/figure5.5.png"><img alt="Asyrp_17" class="bg-primary mb-1" src="../../_images/figure5.5.png" style="width: 600px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 450 </span><span class="caption-text">Consistency across times steps of h-space</span><a class="headerlink" href="#id20" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>h-space의 homogeneous한 성질을 통해 같은 attribute에 대한 <span class="math notranslate nohighlight">\(\Delta h\)</span>를 다른 image에 적용시켰을 때에도 잘 반영이 됌을 확인하였다. 저자들은 <span class="math notranslate nohighlight">\(\Delta h_t\)</span>들에 대한 평균인 <span class="math notranslate nohighlight">\(\Delta h_t^{mean}\)</span>을 적용시켰을 경우에도 result가 거의 비슷함을 보인다. Chapter4에서 제시한 Generative Process를 비추어 보았을 때, <span class="math notranslate nohighlight">\(\Delta h_t\)</span>는 Editing Process에서만 적용을 시킨다. 이 경우, 적용하는  <span class="math notranslate nohighlight">\(\Delta h_t\)</span>를 <span class="math notranslate nohighlight">\(\Delta h_t^{global}\)</span>이라고 칭하며, 적용하는 <span class="math notranslate nohighlight">\(\Delta h_t\)</span>가 interval동안 같은 크기 만큼 적용된다고 가정했을 경우, <span class="math notranslate nohighlight">\(\Delta h^{global} = \cfrac{1}{\mathrm{T_e}}\sum_t\ \Delta h_t^{mean}\)</span>이라고 쓸 수 있다. 이 경우에도 결과는 비슷함을 보여준다. 결국 원하는 attribute에 대해 주입해야 할 <span class="math notranslate nohighlight">\(\Delta h\)</span>양만 같다면, 원하는 editing 효과를 얻을 수 있다. 비록 이 논문에서는 best quality manipulation을 위해 <span class="math notranslate nohighlight">\(\Delta h_t\)</span>를 사용하였지만, <span class="math notranslate nohighlight">\(\Delta h_t^{mean}\)</span>과 <span class="math notranslate nohighlight">\(\Delta h^{global}\)</span>에 대해 더 연구를 해 볼 여지가 있다고 판단한다.</p>
+</li>
+</ol>
+</section>
+</section>
+<section id="conclusion">
+<h2>6. Conclusion<a class="headerlink" href="#conclusion" title="Permalink to this heading">#</a></h2>
+<p>본 논문에서는 Pretrained Diffusion models에서 latent semantic space인  h-space를 발견했고 h-space에서의 Asyrp(Asymmetric Reverse Process)와 새롭게 제안한 Reverse Process 방법을 통해 성공적인 image editing을 가능케 하였다. Diffusion model에서의 semantic한 latent space에 대한 첫 제안을 한 논문이다. h-space는 GAN의 latent space와 유사한 특성을 갖추고 있다. 대표적인 h-space의 특성으로는 Homogeneity, Linearity, Robustness, Consistency across timesteps이 있다.</p>
+</section>
+</section>
+
+    <script type="text/x-thebe-config">
+    {
+        requestKernel: true,
+        binderOptions: {
+            repo: "binder-examples/jupyter-stacks-datascience",
+            ref: "master",
+        },
+        codeMirrorConfig: {
+            theme: "abcdef",
+            mode: "python"
+        },
+        kernelOptions: {
+            name: "python3",
+            path: "./docs\review"
+        },
+        predefinedOutput: true
+    }
+    </script>
+    <script>kernelName = 'python3'</script>
+
+                </article>
+              
+
+              
+              
+              
+              
+                <footer class="prev-next-footer d-print-none">
+                  
+<div class="prev-next-area">
+    <a class="left-prev"
+       href="ConceptLab.html"
+       title="previous page">
+      <i class="fa-solid fa-angle-left"></i>
+      <div class="prev-next-info">
+        <p class="prev-next-subtitle">previous</p>
+        <p class="prev-next-title">ConceptLab</p>
+      </div>
+    </a>
+    <a class="right-next"
+       href="Muse.html"
+       title="next page">
+      <div class="prev-next-info">
+        <p class="prev-next-subtitle">next</p>
+        <p class="prev-next-title">Muse</p>
+      </div>
+      <i class="fa-solid fa-angle-right"></i>
+    </a>
+</div>
+                </footer>
+              
+            </div>
+            
+            
+              
+                <div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
+
+
+  <div class="sidebar-secondary-item">
+  <div class="page-toc tocsection onthispage">
+    <i class="fa-solid fa-list"></i> Contents
+  </div>
+  <nav class="bd-toc-nav page-toc">
+    <ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#abstract">Abstract</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">1. Introduction</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#background">2. Background</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#denoising-diffusion-probability-model-ddpm">2.1 Denoising Diffusion Probability Model(DDPM)</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#denoising-diffusion-implicit-model-ddim">2.2 Denoising Diffusion Implicit Model(DDIM)</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#image-manipulation-with-clip">2.3 Image Manipulation with CLIP</a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#discovering-semantic-latent-space-in-diffusion-models">3. Discovering Semantic Latent Space In Diffusion Models</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#problem">3.1 Problem</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#asymmetric-reverse-process-asyrp">3.2 Asymmetric Reverse Process(Asyrp)</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#h-space">3.3 h-space</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#implicit-neural-directions">3.4 Implicit Neural Directions</a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#generative-process-design">4. Generative Process Design</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#editing-process-with-asyrp">4.1 Editing Process With Asyrp</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#quality-boosting-with-stochastic-noise-injection">4.2 Quality Boosting With Stochastic Noise Injection</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#overall-process-of-image-editing">4.3 Overall Process of Image Editing</a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#experiments">5. Experiments</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#versatility-of-h-space-with-asyrp">5.1 Versatility of h-space with Asyrp</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#quantitive-comparison">5.2 Quantitive Comparison</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#analysis-on-h-space">5.3 Analysis on h-space</a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#conclusion">6. Conclusion</a></li>
+</ul>
+  </nav></div>
+
+</div></div>
+              
+            
+          </div>
+          <footer class="bd-footer-content">
+            
+<div class="bd-footer-content__inner container">
+  
+  <div class="footer-item">
+    
+<p class="component-author">
+By PseudoLab
+</p>
+
+  </div>
+  
+  <div class="footer-item">
+    
+
+  <p class="copyright">
+    
+      © Copyright 2022.
+      <br/>
+    
+  </p>
+
+  </div>
+  
+  <div class="footer-item">
+    
+  </div>
+  
+  <div class="footer-item">
+    
+  </div>
+  
+</div>
+          </footer>
+        
+
+      </main>
+    </div>
+  </div>
+  
+  <!-- Scripts loaded after <body> so the DOM is not blocked -->
+  <script src="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b"></script>
+<script src="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b"></script>
+
+  <footer class="bd-footer">
+  </footer>
+  </body>
 </html>
\ No newline at end of file
diff --git a/docs/review/DreaMoving.html b/docs/review/DreaMoving.html
old mode 100644
new mode 100755
index 72f99ee6..b247ad56
--- a/docs/review/DreaMoving.html
+++ b/docs/review/DreaMoving.html
@@ -1,850 +1,870 @@
-
-
-<!DOCTYPE html>
-
-
-<html lang="en" data-content_root="" >
-
-  <head>
-    <meta charset="utf-8" />
-    <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.18.1: http://docutils.sourceforge.net/" />
-
-    <title>DreaMoving &#8212; Text-to-Image Generation-feat-Diffusion</title>
-  
-  
-  
-  <script data-cfasync="false">
-    document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
-    document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
-  </script>
-  
-  <!-- Loaded before other Sphinx assets -->
-  <link href="../../_static/styles/theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-<link href="../../_static/styles/bootstrap.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-<link href="../../_static/styles/pydata-sphinx-theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-
-  
-  <link href="../../_static/vendor/fontawesome/6.5.2/css/all.min.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-  <link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.woff2" />
-<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.woff2" />
-<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.woff2" />
-
-    <link rel="stylesheet" type="text/css" href="../../_static/pygments.css" />
-    <link rel="stylesheet" href="../../_static/styles/sphinx-book-theme.css?digest=14f4ca6b54d191a8c7657f6c759bf11a5fb86285" type="text/css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/togglebutton.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/mystnb.4510f1fc1dee50b3e5859aac5469c37c29e427902b24a333a5f9fcb2f0b3ac41.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/sphinx-thebe.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/design-style.4045f2051d55cab465a707391d5b2007.min.css" />
-  
-  <!-- Pre-loaded scripts that we'll load fully later -->
-  <link rel="preload" as="script" href="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b" />
-<link rel="preload" as="script" href="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b" />
-  <script src="../../_static/vendor/fontawesome/6.5.2/js/all.min.js?digest=dfe6caa3a7d634c4db9b"></script>
-
-    <script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
-    <script src="../../_static/jquery.js"></script>
-    <script src="../../_static/underscore.js"></script>
-    <script src="../../_static/_sphinx_javascript_frameworks_compat.js"></script>
-    <script src="../../_static/doctools.js"></script>
-    <script src="../../_static/clipboard.min.js"></script>
-    <script src="../../_static/copybutton.js"></script>
-    <script src="../../_static/scripts/sphinx-book-theme.js?digest=5a5c038af52cf7bc1a1ec88eea08e6366ee68824"></script>
-    <script>let toggleHintShow = 'Click to show';</script>
-    <script>let toggleHintHide = 'Click to hide';</script>
-    <script>let toggleOpenOnPrint = 'true';</script>
-    <script src="../../_static/togglebutton.js"></script>
-    <script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
-    <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
-    <script src="../../_static/design-tabs.js"></script>
-    <script>const THEBE_JS_URL = "https://unpkg.com/thebe@0.8.2/lib/index.js"
-const thebe_selector = ".thebe,.cell"
-const thebe_selector_input = "pre"
-const thebe_selector_output = ".output, .cell_output"
-</script>
-    <script async="async" src="../../_static/sphinx-thebe.js"></script>
-    <script>window.MathJax = {"options": {"processHtmlClass": "tex2jax_process|mathjax_process|math|output_area"}}</script>
-    <script defer="defer" src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
-    <script>DOCUMENTATION_OPTIONS.pagename = 'docs/review/DreaMoving';</script>
-    <link rel="shortcut icon" href="../../_static/PseudoLab_logo.png"/>
-    <link rel="index" title="Index" href="../../genindex.html" />
-    <link rel="search" title="Search" href="../../search.html" />
-    <link rel="next" title="DreamPose: Fashion Image-to-Video Synthesis via Stable Diffusion" href="DreamPose.html" />
-    <link rel="prev" title="Animate Anyone" href="Animate_Anyone.html" />
-  <meta name="viewport" content="width=device-width, initial-scale=1"/>
-  <meta name="docsearch:language" content="en"/>
-  </head>
-  
-  
-  <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
-
-  
-  
-  <div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
-  
-  <div id="pst-scroll-pixel-helper"></div>
-  
-  <button type="button" class="btn rounded-pill" id="pst-back-to-top">
-    <i class="fa-solid fa-arrow-up"></i>Back to top</button>
-
-  
-  <input type="checkbox"
-          class="sidebar-toggle"
-          id="pst-primary-sidebar-checkbox"/>
-  <label class="overlay overlay-primary" for="pst-primary-sidebar-checkbox"></label>
-  
-  <input type="checkbox"
-          class="sidebar-toggle"
-          id="pst-secondary-sidebar-checkbox"/>
-  <label class="overlay overlay-secondary" for="pst-secondary-sidebar-checkbox"></label>
-  
-  <div class="search-button__wrapper">
-    <div class="search-button__overlay"></div>
-    <div class="search-button__search-container">
-<form class="bd-search d-flex align-items-center"
-      action="../../search.html"
-      method="get">
-  <i class="fa-solid fa-magnifying-glass"></i>
-  <input type="search"
-         class="form-control"
-         name="q"
-         id="search-input"
-         placeholder="Search this book..."
-         aria-label="Search this book..."
-         autocomplete="off"
-         autocorrect="off"
-         autocapitalize="off"
-         spellcheck="false"/>
-  <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
-</form></div>
-  </div>
-
-  <div class="pst-async-banner-revealer d-none">
-  <aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
-</div>
-
-  
-    <header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
-    </header>
-  
-
-  <div class="bd-container">
-    <div class="bd-container__inner bd-page-width">
-      
-      
-      
-      <div class="bd-sidebar-primary bd-sidebar">
-        
-
-  
-  <div class="sidebar-header-items sidebar-primary__section">
-    
-    
-    
-    
-  </div>
-  
-    <div class="sidebar-primary-items__start sidebar-primary__section">
-        <div class="sidebar-primary-item">
-
-  
-    
-  
-
-<a class="navbar-brand logo" href="../../intro.html">
-  
-  
-  
-  
-  
-    
-    
-      
-    
-    
-    <img src="../../_static/PseudoLab_logo.png" class="logo__image only-light" alt="Text-to-Image Generation-feat-Diffusion - Home"/>
-    <script>document.write(`<img src="../../_static/PseudoLab_logo.png" class="logo__image only-dark" alt="Text-to-Image Generation-feat-Diffusion - Home"/>`);</script>
-  
-  
-</a></div>
-        <div class="sidebar-primary-item"><nav class="bd-links" id="bd-docs-nav" aria-label="Main">
-    <div class="bd-toc-item navbar-nav active">
-        
-        <ul class="nav bd-sidenav bd-sidenav__home-link">
-            <li class="toctree-l1">
-                <a class="reference internal" href="../../intro.html">
-                    Welcome to PseudoDiffusers!!
-                </a>
-            </li>
-        </ul>
-        <p aria-level="2" class="caption" role="heading"><span class="caption-text">Preliminary Works</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="vae.html">VAE</a></li>
-<li class="toctree-l1"><a class="reference internal" href="gan.html">GAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DDPM.html">DDPM</a></li>
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="DDIM.html">DDIM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="A_Study_on_the_Evaluation_of_Generative_Models.html">A Study on the Evaluation of Generative Models</a></li>
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Image Generation</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="cycleGAN.html">CycleGAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="StyleGAN.html">StyleGAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="diffusion_beats_GANs.html">Diffusion Models Beat GANs on Image Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="dalle.html">DALL-E</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DALLE2.html">DALL-E 2</a></li>
-<li class="toctree-l1"><a class="reference internal" href="dreambooth.html">DreamBooth</a></li>
-<li class="toctree-l1"><a class="reference internal" href="ControlNet.html">ControlNet</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Latent_Diffusion_Model.html">Introduction</a></li>
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="Textual_Inversion.html">Textual Inversion</a></li>
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="CustomDiffusion.html">Custom Diffusion</a></li>
-<li class="toctree-l1"><a class="reference internal" href="LoRA.html">LoRA</a></li>
-
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="I-DDPM.html">I-DDPM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="StyO.html">StyO</a></li>
-<li class="toctree-l1"><a class="reference internal" href="imagen.html">Imagen</a></li>
-<li class="toctree-l1"><a class="reference internal" href="imagen_editor.html">Imagen Editor</a></li>
-<li class="toctree-l1"><a class="reference internal" href="SDEdit.html">SDEdit</a></li>
-<li class="toctree-l1"><a class="reference internal" href="SDXL.html">SDXL</a></li>
-<li class="toctree-l1"><a class="reference internal" href="t2i_adapter.html">T2I-Adapter</a></li>
-<li class="toctree-l1"><a class="reference internal" href="IP_Adapter.html">IP-Adapter</a></li>
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="HyperDreamBooth.html">HyperDreamBooth</a></li>
-<li class="toctree-l1"><a class="reference internal" href="CM3leon.html">CM3leon</a></li>
-
-<li class="toctree-l1"><a class="reference internal" href="Synthetic_Data_from_Diffusion_Models_Improves_ImageNet_Classification.html">Synthetic Data from Diffusion Models Improves ImageNet Classification</a></li>
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="GLIDE.html">GLIDE</a></li>
-<li class="toctree-l1"><a class="reference internal" href="BBDM.html">BBDM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Your_Diffusion_Model_is_Secretly_a_Zero_Shot_Classifier.html">Your Diffusion Model is Secretly a Zero-Shot Classifier</a></li>
-<li class="toctree-l1"><a class="reference internal" href="progressive_distillation.html">Progressive Distillation for Fast Sampling of Diffusion Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="ConceptLab.html">ConceptLab</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Diffusion_models_already_have_a_Semantic_Latent_Space.html">Diffusion Models already have a Semantic Latent Space</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Muse.html">Muse</a></li>
-
-
-<li class="toctree-l1"><a class="reference internal" href="GIGAGAN.html">Scaling up GANs for Text-to-Image Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="consistency_models.html">Consistency Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="latent_consistency_models.html">Latent Consistency Models</a></li>
-
-<li class="toctree-l1"><a class="reference internal" href="LLM_grounded_Diffusion.html">LLM Grounded Diffusion</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DiT.html">DiT</a></li>
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="one-step-image-translation.html">One-Step Image Translation with Text-to-Image Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="LCM-LoRA.html">LCM-LoRA: A Universal Stable-Diffusion Acceleration Module</a></li>
-
-
-<li class="toctree-l1"><a class="reference internal" href="MimicBrush.html">MimicBrush: Zero-shot Image Editing with Reference Imitation</a></li>
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Video Generation</span></p>
-<ul class="current nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="Make_A_Video.html">Make A Video</a></li>
-<li class="toctree-l1"><a class="reference internal" href="VideoLDM.html">VideoLDM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="AnimateDiff.html">AnimateDiff</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Animate_Anyone.html">Animate Anyone</a></li>
-<li class="toctree-l1 current active"><a class="current reference internal" href="#">DreaMoving</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreamPose.html">DreamPose: Fashion Image-to-Video Synthesis via Stable Diffusion</a></li>
-
-
-
-
-
-
-
-
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">3D Generation</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="NeRF.html">NeRF : Representing Scenes as Neural Radiance Fields for View Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="3DGS.html">3D Gaussian Splatting for Real-Time Radiance Field Rendering</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Point_E.html">Point-E: A System for Generating 3D Point Clouds from Complex Prompts (Arxiv 2022)</a></li>
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="Shap-E.html">Shap-E</a></li>
-
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="DreamFusion.html"><strong>DreamFusion</strong></a></li>
-<li class="toctree-l1"><a class="reference internal" href="magic-3d.html">Magic3D</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreamBooth3D.html">Dream Booth 3D</a></li>
-
-
-
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Experiments</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="../experiments/js_exp.html">Synthetic Data with Stable Diffusion for Foliar Disease Classification</a></li>
-<li class="toctree-l1"><a class="reference internal" href="../experiments/swjo_exp.html">Training DreamBooth on Naver Webtoon Face Dataset</a></li>
-</ul>
-
-    </div>
-</nav></div>
-    </div>
-  
-  
-  <div class="sidebar-primary-items__end sidebar-primary__section">
-  </div>
-  
-  <div id="rtd-footer-container"></div>
-
-
-      </div>
-      
-      <main id="main-content" class="bd-main" role="main">
-        
-        
-
-<div class="sbt-scroll-pixel-helper"></div>
-
-          <div class="bd-content">
-            <div class="bd-article-container">
-              
-              <div class="bd-header-article d-print-none">
-<div class="header-article-items header-article__inner">
-  
-    <div class="header-article-items__start">
-      
-        <div class="header-article-item"><label class="sidebar-toggle primary-toggle btn btn-sm" for="__primary" title="Toggle primary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
-  <span class="fa-solid fa-bars"></span>
-</label></div>
-      
-    </div>
-  
-  
-    <div class="header-article-items__end">
-      
-        <div class="header-article-item">
-
-<div class="article-header-buttons">
-
-
-
-
-
-<div class="dropdown dropdown-source-buttons">
-  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Source repositories">
-    <i class="fab fa-github"></i>
-  </button>
-  <ul class="dropdown-menu">
-      
-      
-      
-      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion" target="_blank"
-   class="btn btn-sm btn-source-repository-button dropdown-item"
-   title="Source repository"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fab fa-github"></i>
-  </span>
-<span class="btn__text-container">Repository</span>
-</a>
-</li>
-      
-      
-      
-      
-      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion/issues/new?title=Issue%20on%20page%20%2Fdocs/review/DreaMoving.html&body=Your%20issue%20content%20here." target="_blank"
-   class="btn btn-sm btn-source-issues-button dropdown-item"
-   title="Open an issue"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-lightbulb"></i>
-  </span>
-<span class="btn__text-container">Open issue</span>
-</a>
-</li>
-      
-  </ul>
-</div>
-
-
-
-
-
-
-<div class="dropdown dropdown-download-buttons">
-  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Download this page">
-    <i class="fas fa-download"></i>
-  </button>
-  <ul class="dropdown-menu">
-      
-      
-      
-      <li><a href="../../_sources/docs/review/DreaMoving.md" target="_blank"
-   class="btn btn-sm btn-download-source-button dropdown-item"
-   title="Download source file"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-file"></i>
-  </span>
-<span class="btn__text-container">.md</span>
-</a>
-</li>
-      
-      
-      
-      
-      <li>
-<button onclick="window.print()"
-  class="btn btn-sm btn-download-pdf-button dropdown-item"
-  title="Print to PDF"
-  data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-file-pdf"></i>
-  </span>
-<span class="btn__text-container">.pdf</span>
-</button>
-</li>
-      
-  </ul>
-</div>
-
-
-
-
-<button onclick="toggleFullScreen()"
-  class="btn btn-sm btn-fullscreen-button"
-  title="Fullscreen mode"
-  data-bs-placement="bottom" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-expand"></i>
-  </span>
-
-</button>
-
-
-
-<script>
-document.write(`
-  <button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light"></i>
-    <i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark"></i>
-    <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"></i>
-  </button>
-`);
-</script>
-
-
-<script>
-document.write(`
-  <button class="btn btn-sm pst-navbar-icon search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <i class="fa-solid fa-magnifying-glass fa-lg"></i>
-  </button>
-`);
-</script>
-<label class="sidebar-toggle secondary-toggle btn btn-sm" for="__secondary"title="Toggle secondary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <span class="fa-solid fa-list"></span>
-</label>
-</div></div>
-      
-    </div>
-  
-</div>
-</div>
-              
-              
-
-<div id="jb-print-docs-body" class="onlyprint">
-    <h1>DreaMoving</h1>
-    <!-- Table of contents -->
-    <div id="print-main-content">
-        <div id="jb-print-toc">
-            
-            <div>
-                <h2> Contents </h2>
-            </div>
-            <nav aria-label="Page">
-                <ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#abstract">Abstract</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">1. Introduction</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#architecture">2. Architecture</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#data-collection-and-preprocessing">2.1 Data Collection and Preprocessing</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#motion-block">2.2 Motion Block</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#content-guider">2.3 Content Guider</a></li>
-</ul>
-</li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#model-training">2.4 Model Training</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#model-inference">2.5 Model Inference</a></li>
-</ul>
-</li>
-</ul>
-            </nav>
-        </div>
-    </div>
-</div>
-
-              
-                
-<div id="searchbox"></div>
-                <article class="bd-article">
-                  
-  <div class="admonition-information admonition">
-<p class="admonition-title">Information</p>
-<ul class="simple">
-<li><p><strong>Title:</strong> DreaMoving: A Human Video Generation Framework based on Diffusion Models</p></li>
-<li><p><strong>Reference</strong></p>
-<ul>
-<li><p>Paper: <a class="reference external" href="https://arxiv.org/abs/2312.05107">https://arxiv.org/abs/2311.17117</a></p></li>
-<li><p>Code: <a class="reference external" href="https://github.com/dreamoving/dreamoving-project">Official</a></p></li>
-<li><p>Project Page : <a class="reference external" href="https://dreamoving.github.io/dreamoving/">https://dreamoving.github.io/dreamoving/</a></p></li>
-</ul>
-</li>
-<li><p><strong>Author:</strong> Geonhak Song</p></li>
-<li><p><strong>Last updated on {March. 13, 2024}</strong></p></li>
-</ul>
-</div>
-<section class="tex2jax_ignore mathjax_ignore" id="dreamoving">
-<h1>DreaMoving<a class="headerlink" href="#dreamoving" title="Permalink to this heading">#</a></h1>
-<section id="abstract">
-<h2>Abstract<a class="headerlink" href="#abstract" title="Permalink to this heading">#</a></h2>
-<ul class="simple">
-<li><p>고품질 customized human video 생성을 위해 제어가능한 diffusion 기반 video generation framework인 DreaMoving 제안</p></li>
-<li><p>target identity와 posture sequence가 주어졌을 때, target identity moving이나 dancing video 생성이 가능하다.</p></li>
-<li><p>추가 제안 모듈 : motion-controlling을 위한 <strong>Video ControlNet</strong> &amp; identity preserving을 위한 <strong>Content Guider</strong></p></li>
-</ul>
-</section>
-<section id="introduction">
-<h2>1. Introduction<a class="headerlink" href="#introduction" title="Permalink to this heading">#</a></h2>
-<ul class="simple">
-<li><p>T2V의 진전에도 인간 중심 기반 생성에는 어려움을 겪는 중.</p></li>
-<li><p>open-source human dance video dataset의 부족, text 묘사의 어려움으로 인해 frame간 일관성, 긴 길이, 다양성을 포함한 비디오 생성에 어려움을 겪는다.</p></li>
-<li><p>personalization과 controllability 의 어려움 또한 존재</p></li>
-<li><p>구조적 제어를 위한 ControlNet, appearance 제어를 위한 Dreambooth, LoRA</p></li>
-<li><p>그러나 이 기술들은 정확한 제어가 어렵고 hyperparameter tuning 요소가 존재 &amp; 추가 계산 부담</p></li>
-<li><p>이에 새로운 방법론인 DreaMoving 제안</p></li>
-</ul>
-</section>
-<section id="architecture">
-<h2>2. Architecture<a class="headerlink" href="#architecture" title="Permalink to this heading">#</a></h2>
-<figure class="align-default" id="id1">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/figure_1.png"><img alt="figure_1" class="bg-primary mb-1" src="../../_images/figure_1.png" style="width: 800px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 628 </span><span class="caption-text">Figure 1. The overview of DreaMoving</span><a class="headerlink" href="#id1" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p>LDM 기반 모델을 기반으로 3가지 주요 network로 구성</p>
-<ul>
-<li><p>U-Net, Video ControlNet, Content Guider</p></li>
-</ul>
-</li>
-<li><p>AnimateDiff에서 영감을 받아 U-Net 각 block 이후 motion block을 추가</p></li>
-<li><p>Plug-in : motion-controlling을 위한 <strong>Video ControlNet</strong> &amp; identity preserving을 위한 <strong>Content Guider</strong></p></li>
-</ul>
-<section id="data-collection-and-preprocessing">
-<h3>2.1 Data Collection and Preprocessing<a class="headerlink" href="#data-collection-and-preprocessing" title="Permalink to this heading">#</a></h3>
-<ul class="simple">
-<li><p>인터넷에서 human dance video 1000의 고품질 영상으로 훈련</p></li>
-<li><p>temporal module 훈련은 변이나 특별한 효과 없는 연속적 frame이 필요하기 때문에 clip video로 split하여 6000개의 짧은 비디오를 획득한다.(8~10s)</p></li>
-<li><p>text description을 위해서 Minigpt-v2(<a class="reference external" href="https://minigpt-v2.github.io/">https://minigpt-v2.github.io/</a>)를 video-captioner로 사용</p>
-<ul>
-<li><p>“[grounding] describe this frame in a detailed manner”의 명령으로 획득</p></li>
-<li><p>subject와 background 내용에 대해 정확히 묘사</p></li>
-</ul>
-</li>
-</ul>
-</section>
-<section id="motion-block">
-<h3>2.2 Motion Block<a class="headerlink" href="#motion-block" title="Permalink to this heading">#</a></h3>
-<ul class="simple">
-<li><p>temporal consistency와 motion fidelity 향상을 위해서 U-Net과 ControlNet를 motion block으로 통합.</p></li>
-<li><p>motion block은 AnimateDiff로 확장. temporal sequence length는 64로 확장</p></li>
-<li><p>초기화 : AnimateDiff (mm_sd_v15.ckpt)</p></li>
-<li><p>개인 인물 dance video로 finetuning</p></li>
-</ul>
-</section>
-<section id="content-guider">
-<h3>2.3 Content Guider<a class="headerlink" href="#content-guider" title="Permalink to this heading">#</a></h3>
-<ul class="simple">
-<li><p>Content Guider는 인물의 appearance와 배경을 포함한 생성된 video의 내용을 제어하기 위해 고안됨.</p></li>
-<li><p>가장 간단한 방법은 text prompt이지만, 개인화된 인물 외관 묘사가 어렵다.</p></li>
-<li><p>IP-Adapter에 영감을 받아 image prompt를 활용해 인물 외관에 대한 guidance를 주고 배경에 대해서는 text prompt 사용</p></li>
-<li><p>얼굴 이미지는 image encoder를 통해 encode</p></li>
-<li><p>text feature &amp; 인물 외관 feature는 마지막 content embedding에 concat된 후 cross-attention에 보냄</p></li>
-</ul>
-<figure class="align-default" id="id2">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/eq_11.png"><img alt="eq_1" class="bg-primary mb-1" src="../../_images/eq_11.png" style="width: 800px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 629 </span><span class="caption-text">Equation 1 Content Guider cross attentino output given query, text, face, cloth features</span><a class="headerlink" href="#id2" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p><span class="math notranslate nohighlight">\(Z\)</span> : query features</p></li>
-<li><p><span class="math notranslate nohighlight">\(c_t\)</span> : text features  / <span class="math notranslate nohighlight">\(c_f\)</span> : face features / <span class="math notranslate nohighlight">\(c_c\)</span> : cloth features</p></li>
-<li><p><span class="math notranslate nohighlight">\(Z^\prime\)</span> : cross-attention output</p></li>
-</ul>
-</section>
-</section>
-<section id="model-training">
-<h2>2.4 Model Training<a class="headerlink" href="#model-training" title="Permalink to this heading">#</a></h2>
-<p><strong>2.4.1 Content Guider Training</strong></p>
-<ul class="simple">
-<li><p>Base Model : SD v1.5 기반</p></li>
-<li><p>Image Encoder : OpenCLIP ViT-H14</p></li>
-<li><p>reference face identity 보존을 위해 Arcface를 통해 얼굴 상관 feature 추출.</p></li>
-<li><p>LAION-2B에서 human data 수집</p></li>
-<li><p>훈련 : 512x512 random crop &amp; resize</p></li>
-<li><p>GPU : 8 V100, 100k steps, 16 batch size/GPU 1장</p></li>
-<li><p>Optimizer : AdamW</p></li>
-<li><p>learning rate : 1e-4, decay 1e-2</p></li>
-</ul>
-<p><strong>2.4.2 Long-Frame Pretraining</strong></p>
-<ul class="simple">
-<li><p>WebVid-10M validation set (5k video clips)에서 motion module의 sequence length를 16에서 64로 확장하기 위한 training stage 수행</p>
-<ul>
-<li><p>WebVid-10M validation set (5k video clips) : 평균 18초, 총 13000 시간</p></li>
-</ul>
-</li>
-<li><p>U-Net motion module만 훈련하고 나머지는 freeze</p></li>
-<li><p>ControlNet이나 image guidance 사용 안 함.</p></li>
-<li><p>learning rate : 1e-4</p></li>
-<li><p>resolution : 256x256 resize &amp; center crop</p></li>
-<li><p>batch size 1, 10k steps 이후 훈련 종료</p></li>
-</ul>
-<p><strong>2.4.3 Video ControlNet Training</strong></p>
-<ul class="simple">
-<li><p>long-frame pretraining 이후, <strong>Video ControlNet</strong> 훈련 진행.</p></li>
-<li><p>U-Net 고정 &amp; **Video ControlNet의 (U-Net block과 motion block)**은 unfreeze</p></li>
-<li><p>수집한 6k human dance video data 훈련</p></li>
-<li><p>DWPose나 ZoeDepth를 통한 human pose 또는 depth를 추출.</p></li>
-<li><p>learning rate : 1e-4</p></li>
-<li><p>resolution : 352x352</p></li>
-<li><p>batch size 1, 25k steps 이후 훈련 종료</p></li>
-</ul>
-<p><strong>2.4.4 Expression Fine-Tuning</strong></p>
-<ul class="simple">
-<li><p>사람 표현을 더 낫게하기 위해 <strong>Video ControlNet</strong>을 포함한 <strong>U-Net의 motion block</strong> 구조에서 6k human dancing video data로 추가 fine-tuning</p></li>
-<li><p>U-Net motion block weight만 update</p></li>
-<li><p>learning rate : 5e-5</p></li>
-<li><p>resolution : 512x512</p></li>
-<li><p>batch size 1, 20k steps 이후 훈련 종료</p></li>
-</ul>
-<section id="model-inference">
-<h3>2.5 Model Inference<a class="headerlink" href="#model-inference" title="Permalink to this heading">#</a></h3>
-<p>입력 : text prompt, reference image, pose/depth sequence</p>
-<p>Video ControlNet control scale : 1 (pose/depth에서만)</p>
-<p>multi-controlnet을 통해 pose &amp; depth 동시 사용 가능</p>
-<p>Eq 1의 face/body guidance strength : <span class="math notranslate nohighlight">\(\alpha_f,\alpha_c\)</span>는 적응하도록</p>
-<p>text prompt만 사용할 때 <span class="math notranslate nohighlight">\(\alpha_f=\alpha_c=0\)</span></p>
-<figure class="align-default" id="id3">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/figure_21.png"><img alt="figure_2" class="bg-primary mb-1" src="../../_images/figure_21.png" style="width: 800px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 630 </span><span class="caption-text">Figure 2. The results of DreaMoving with text prompt as input</span><a class="headerlink" href="#id3" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<figure class="align-default" id="id4">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/figure_31.png"><img alt="figure_3" class="bg-primary mb-1" src="../../_images/figure_31.png" style="width: 800px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 631 </span><span class="caption-text">Figure 3. The results of DreaMoving with text prompt and face image as inputs</span><a class="headerlink" href="#id4" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<figure class="align-default" id="id5">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/figure_41.png"><img alt="figure_4" class="bg-primary mb-1" src="../../_images/figure_41.png" style="width: 800px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 632 </span><span class="caption-text">Figure 4. The results of DreaMoving with face and cloth images as inputs</span><a class="headerlink" href="#id5" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<figure class="align-default" id="id6">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/figure_52.png"><img alt="figure_5" class="bg-primary mb-1" src="../../_images/figure_52.png" style="width: 800px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 633 </span><span class="caption-text">Figure 5. The results of DreaMoving with stylized image as input</span><a class="headerlink" href="#id6" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</section>
-</section>
-</section>
-
-    <script type="text/x-thebe-config">
-    {
-        requestKernel: true,
-        binderOptions: {
-            repo: "binder-examples/jupyter-stacks-datascience",
-            ref: "master",
-        },
-        codeMirrorConfig: {
-            theme: "abcdef",
-            mode: "python"
-        },
-        kernelOptions: {
-            name: "python3",
-            path: "./docs/review"
-        },
-        predefinedOutput: true
-    }
-    </script>
-    <script>kernelName = 'python3'</script>
-
-                </article>
-              
-
-              
-              
-              
-              
-                <footer class="prev-next-footer d-print-none">
-                  
-<div class="prev-next-area">
-    <a class="left-prev"
-       href="Animate_Anyone.html"
-       title="previous page">
-      <i class="fa-solid fa-angle-left"></i>
-      <div class="prev-next-info">
-        <p class="prev-next-subtitle">previous</p>
-        <p class="prev-next-title">Animate Anyone</p>
-      </div>
-    </a>
-    <a class="right-next"
-       href="DreamPose.html"
-       title="next page">
-      <div class="prev-next-info">
-        <p class="prev-next-subtitle">next</p>
-        <p class="prev-next-title">DreamPose: Fashion Image-to-Video Synthesis via Stable Diffusion</p>
-      </div>
-      <i class="fa-solid fa-angle-right"></i>
-    </a>
-</div>
-                </footer>
-              
-            </div>
-            
-            
-              
-                <div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
-
-
-  <div class="sidebar-secondary-item">
-  <div class="page-toc tocsection onthispage">
-    <i class="fa-solid fa-list"></i> Contents
-  </div>
-  <nav class="bd-toc-nav page-toc">
-    <ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#abstract">Abstract</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">1. Introduction</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#architecture">2. Architecture</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#data-collection-and-preprocessing">2.1 Data Collection and Preprocessing</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#motion-block">2.2 Motion Block</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#content-guider">2.3 Content Guider</a></li>
-</ul>
-</li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#model-training">2.4 Model Training</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#model-inference">2.5 Model Inference</a></li>
-</ul>
-</li>
-</ul>
-  </nav></div>
-
-</div></div>
-              
-            
-          </div>
-          <footer class="bd-footer-content">
-            
-<div class="bd-footer-content__inner container">
-  
-  <div class="footer-item">
-    
-<p class="component-author">
-By PseudoLab
-</p>
-
-  </div>
-  
-  <div class="footer-item">
-    
-
-  <p class="copyright">
-    
-      © Copyright 2022.
-      <br/>
-    
-  </p>
-
-  </div>
-  
-  <div class="footer-item">
-    
-  </div>
-  
-  <div class="footer-item">
-    
-  </div>
-  
-</div>
-          </footer>
-        
-
-      </main>
-    </div>
-  </div>
-  
-  <!-- Scripts loaded after <body> so the DOM is not blocked -->
-  <script src="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b"></script>
-<script src="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b"></script>
-
-  <footer class="bd-footer">
-  </footer>
-  </body>
+
+
+<!DOCTYPE html>
+
+
+<html lang="en" data-content_root="" >
+
+  <head>
+    <meta charset="utf-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.17.1: http://docutils.sourceforge.net/" />
+
+    <title>DreaMoving &#8212; Text-to-Image Generation-feat-Diffusion</title>
+  
+  
+  
+  <script data-cfasync="false">
+    document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
+    document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
+  </script>
+  
+  <!-- Loaded before other Sphinx assets -->
+  <link href="../../_static/styles/theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+<link href="../../_static/styles/bootstrap.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+<link href="../../_static/styles/pydata-sphinx-theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+
+  
+  <link href="../../_static/vendor/fontawesome/6.5.2/css/all.min.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+  <link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.woff2" />
+<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.woff2" />
+<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.woff2" />
+
+    <link rel="stylesheet" type="text/css" href="../../_static/pygments.css" />
+    <link rel="stylesheet" href="../../_static/styles/sphinx-book-theme.css?digest=14f4ca6b54d191a8c7657f6c759bf11a5fb86285" type="text/css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/togglebutton.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/mystnb.4510f1fc1dee50b3e5859aac5469c37c29e427902b24a333a5f9fcb2f0b3ac41.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/sphinx-thebe.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/design-style.4045f2051d55cab465a707391d5b2007.min.css" />
+  
+  <!-- Pre-loaded scripts that we'll load fully later -->
+  <link rel="preload" as="script" href="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b" />
+<link rel="preload" as="script" href="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b" />
+  <script src="../../_static/vendor/fontawesome/6.5.2/js/all.min.js?digest=dfe6caa3a7d634c4db9b"></script>
+
+    <script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
+    <script src="../../_static/jquery.js"></script>
+    <script src="../../_static/underscore.js"></script>
+    <script src="../../_static/_sphinx_javascript_frameworks_compat.js"></script>
+    <script src="../../_static/doctools.js"></script>
+    <script src="../../_static/clipboard.min.js"></script>
+    <script src="../../_static/copybutton.js"></script>
+    <script src="../../_static/scripts/sphinx-book-theme.js?digest=5a5c038af52cf7bc1a1ec88eea08e6366ee68824"></script>
+    <script>let toggleHintShow = 'Click to show';</script>
+    <script>let toggleHintHide = 'Click to hide';</script>
+    <script>let toggleOpenOnPrint = 'true';</script>
+    <script src="../../_static/togglebutton.js"></script>
+    <script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
+    <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
+    <script src="../../_static/design-tabs.js"></script>
+    <script>const THEBE_JS_URL = "https://unpkg.com/thebe@0.8.2/lib/index.js"
+const thebe_selector = ".thebe,.cell"
+const thebe_selector_input = "pre"
+const thebe_selector_output = ".output, .cell_output"
+</script>
+    <script async="async" src="../../_static/sphinx-thebe.js"></script>
+    <script>window.MathJax = {"options": {"processHtmlClass": "tex2jax_process|mathjax_process|math|output_area"}}</script>
+    <script defer="defer" src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
+    <script>DOCUMENTATION_OPTIONS.pagename = 'docs/review/DreaMoving';</script>
+    <link rel="shortcut icon" href="../../_static/PseudoLab_logo.png"/>
+    <link rel="index" title="Index" href="../../genindex.html" />
+    <link rel="search" title="Search" href="../../search.html" />
+    <link rel="next" title="DreamPose: Fashion Image-to-Video Synthesis via Stable Diffusion" href="DreamPose.html" />
+    <link rel="prev" title="Animate Anyone" href="Animate_Anyone.html" />
+  <meta name="viewport" content="width=device-width, initial-scale=1"/>
+  <meta name="docsearch:language" content="en"/>
+  </head>
+  
+  
+  <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
+
+  
+  
+  <div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
+  
+  <div id="pst-scroll-pixel-helper"></div>
+  
+  <button type="button" class="btn rounded-pill" id="pst-back-to-top">
+    <i class="fa-solid fa-arrow-up"></i>Back to top</button>
+
+  
+  <input type="checkbox"
+          class="sidebar-toggle"
+          id="pst-primary-sidebar-checkbox"/>
+  <label class="overlay overlay-primary" for="pst-primary-sidebar-checkbox"></label>
+  
+  <input type="checkbox"
+          class="sidebar-toggle"
+          id="pst-secondary-sidebar-checkbox"/>
+  <label class="overlay overlay-secondary" for="pst-secondary-sidebar-checkbox"></label>
+  
+  <div class="search-button__wrapper">
+    <div class="search-button__overlay"></div>
+    <div class="search-button__search-container">
+<form class="bd-search d-flex align-items-center"
+      action="../../search.html"
+      method="get">
+  <i class="fa-solid fa-magnifying-glass"></i>
+  <input type="search"
+         class="form-control"
+         name="q"
+         id="search-input"
+         placeholder="Search this book..."
+         aria-label="Search this book..."
+         autocomplete="off"
+         autocorrect="off"
+         autocapitalize="off"
+         spellcheck="false"/>
+  <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
+</form></div>
+  </div>
+
+  <div class="pst-async-banner-revealer d-none">
+  <aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
+</div>
+
+  
+    <header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
+    </header>
+  
+
+  <div class="bd-container">
+    <div class="bd-container__inner bd-page-width">
+      
+      
+      
+      <div class="bd-sidebar-primary bd-sidebar">
+        
+
+  
+  <div class="sidebar-header-items sidebar-primary__section">
+    
+    
+    
+    
+  </div>
+  
+    <div class="sidebar-primary-items__start sidebar-primary__section">
+        <div class="sidebar-primary-item">
+
+  
+    
+  
+
+<a class="navbar-brand logo" href="../../intro.html">
+  
+  
+  
+  
+  
+    
+    
+      
+    
+    
+    <img src="../../_static/PseudoLab_logo.png" class="logo__image only-light" alt="Text-to-Image Generation-feat-Diffusion - Home"/>
+    <script>document.write(`<img src="../../_static/PseudoLab_logo.png" class="logo__image only-dark" alt="Text-to-Image Generation-feat-Diffusion - Home"/>`);</script>
+  
+  
+</a></div>
+        <div class="sidebar-primary-item"><nav class="bd-links" id="bd-docs-nav" aria-label="Main">
+    <div class="bd-toc-item navbar-nav active">
+        
+        <ul class="nav bd-sidenav bd-sidenav__home-link">
+            <li class="toctree-l1">
+                <a class="reference internal" href="../../intro.html">
+                    Welcome to PseudoDiffusers!!
+                </a>
+            </li>
+        </ul>
+        <p aria-level="2" class="caption" role="heading"><span class="caption-text">Preliminary Works</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="vae.html">VAE</a></li>
+<li class="toctree-l1"><a class="reference internal" href="gan.html">GAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DDPM.html">DDPM</a></li>
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DDIM.html">DDIM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="A_Study_on_the_Evaluation_of_Generative_Models.html">A Study on the Evaluation of Generative Models</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Image Generation</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="cycleGAN.html">CycleGAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="StyleGAN.html">StyleGAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="diffusion_beats_GANs.html">Diffusion Models Beat GANs on Image Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="dalle.html">DALL-E</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DALLE2.html">DALL-E 2</a></li>
+<li class="toctree-l1"><a class="reference internal" href="dreambooth.html">DreamBooth</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ControlNet.html">ControlNet</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Latent_Diffusion_Model.html">Introduction</a></li>
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Textual_Inversion.html">Textual Inversion</a></li>
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="CustomDiffusion.html">Custom Diffusion</a></li>
+<li class="toctree-l1"><a class="reference internal" href="LoRA.html">LoRA</a></li>
+
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="I-DDPM.html">I-DDPM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="StyO.html">StyO</a></li>
+<li class="toctree-l1"><a class="reference internal" href="imagen.html">Imagen</a></li>
+<li class="toctree-l1"><a class="reference internal" href="imagen_editor.html">Imagen Editor</a></li>
+<li class="toctree-l1"><a class="reference internal" href="SDEdit.html">SDEdit</a></li>
+<li class="toctree-l1"><a class="reference internal" href="SDXL.html">SDXL</a></li>
+<li class="toctree-l1"><a class="reference internal" href="t2i_adapter.html">T2I-Adapter</a></li>
+<li class="toctree-l1"><a class="reference internal" href="IP_Adapter.html">IP-Adapter</a></li>
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="HyperDreamBooth.html">HyperDreamBooth</a></li>
+<li class="toctree-l1"><a class="reference internal" href="CM3leon.html">CM3leon</a></li>
+
+<li class="toctree-l1"><a class="reference internal" href="Synthetic_Data_from_Diffusion_Models_Improves_ImageNet_Classification.html">Synthetic Data from Diffusion Models Improves ImageNet Classification</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="GLIDE.html">GLIDE</a></li>
+<li class="toctree-l1"><a class="reference internal" href="BBDM.html">BBDM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Your_Diffusion_Model_is_Secretly_a_Zero_Shot_Classifier.html">Your Diffusion Model is Secretly a Zero-Shot Classifier</a></li>
+<li class="toctree-l1"><a class="reference internal" href="progressive_distillation.html">Progressive Distillation for Fast Sampling of Diffusion Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ConceptLab.html">ConceptLab</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Diffusion_models_already_have_a_Semantic_Latent_Space.html">Diffusion Models already have a Semantic Latent Space</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Muse.html">Muse</a></li>
+
+
+<li class="toctree-l1"><a class="reference internal" href="GIGAGAN.html">Scaling up GANs for Text-to-Image Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="consistency_models.html">Consistency Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="latent_consistency_models.html">Latent Consistency Models</a></li>
+
+<li class="toctree-l1"><a class="reference internal" href="LLM_grounded_Diffusion.html">LLM Grounded Diffusion</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DiT.html">DiT</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="one-step-image-translation.html">One-Step Image Translation with Text-to-Image Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="LCM-LoRA.html">LCM-LoRA: A Universal Stable-Diffusion Acceleration Module</a></li>
+
+
+<li class="toctree-l1"><a class="reference internal" href="MimicBrush.html">MimicBrush: Zero-shot Image Editing with Reference Imitation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="one_step_diffusion_with_distribution_matching_distillation.html">One-step Diffusion with Distribution Matching Distillation</a></li>
+
+
+
+
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Video Generation</span></p>
+<ul class="current nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="Make_A_Video.html">Make A Video</a></li>
+<li class="toctree-l1"><a class="reference internal" href="VideoLDM.html">VideoLDM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="AnimateDiff.html">AnimateDiff</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Animate_Anyone.html">Animate Anyone</a></li>
+<li class="toctree-l1 current active"><a class="current reference internal" href="#">DreaMoving</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreamPose.html">DreamPose: Fashion Image-to-Video Synthesis via Stable Diffusion</a></li>
+
+
+
+
+
+
+
+
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">3D Generation</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="NeRF.html">NeRF : Representing Scenes as Neural Radiance Fields for View Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="3DGS.html">3D Gaussian Splatting for Real-Time Radiance Field Rendering</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Point_E.html">Point-E: A System for Generating 3D Point Clouds from Complex Prompts (Arxiv 2022)</a></li>
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Shap-E.html">Shap-E</a></li>
+
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DreamFusion.html"><strong>DreamFusion</strong></a></li>
+<li class="toctree-l1"><a class="reference internal" href="magic-3d.html">Magic3D</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreamBooth3D.html">Dream Booth 3D</a></li>
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="zero123plus.html">Zero123++</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ProlificDreamer.html">ProlificDreamer: High-Fidelity and Diverse Text-to-3D Generation with Variational Score Distillation</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DreamGaussian.html">DreamGaussian</a></li>
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Coin3D.html">Coin3D</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Experiments</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="../experiments/js_exp.html">Synthetic Data with Stable Diffusion for Foliar Disease Classification</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../experiments/swjo_exp.html">Training DreamBooth on Naver Webtoon Face Dataset</a></li>
+</ul>
+
+    </div>
+</nav></div>
+    </div>
+  
+  
+  <div class="sidebar-primary-items__end sidebar-primary__section">
+  </div>
+  
+  <div id="rtd-footer-container"></div>
+
+
+      </div>
+      
+      <main id="main-content" class="bd-main" role="main">
+        
+        
+
+<div class="sbt-scroll-pixel-helper"></div>
+
+          <div class="bd-content">
+            <div class="bd-article-container">
+              
+              <div class="bd-header-article d-print-none">
+<div class="header-article-items header-article__inner">
+  
+    <div class="header-article-items__start">
+      
+        <div class="header-article-item"><label class="sidebar-toggle primary-toggle btn btn-sm" for="__primary" title="Toggle primary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
+  <span class="fa-solid fa-bars"></span>
+</label></div>
+      
+    </div>
+  
+  
+    <div class="header-article-items__end">
+      
+        <div class="header-article-item">
+
+<div class="article-header-buttons">
+
+
+
+
+
+<div class="dropdown dropdown-source-buttons">
+  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Source repositories">
+    <i class="fab fa-github"></i>
+  </button>
+  <ul class="dropdown-menu">
+      
+      
+      
+      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion" target="_blank"
+   class="btn btn-sm btn-source-repository-button dropdown-item"
+   title="Source repository"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fab fa-github"></i>
+  </span>
+<span class="btn__text-container">Repository</span>
+</a>
+</li>
+      
+      
+      
+      
+      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion/issues/new?title=Issue%20on%20page%20%2Fdocs/review/DreaMoving.html&body=Your%20issue%20content%20here." target="_blank"
+   class="btn btn-sm btn-source-issues-button dropdown-item"
+   title="Open an issue"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-lightbulb"></i>
+  </span>
+<span class="btn__text-container">Open issue</span>
+</a>
+</li>
+      
+  </ul>
+</div>
+
+
+
+
+
+
+<div class="dropdown dropdown-download-buttons">
+  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Download this page">
+    <i class="fas fa-download"></i>
+  </button>
+  <ul class="dropdown-menu">
+      
+      
+      
+      <li><a href="../../_sources/docs/review/DreaMoving.md" target="_blank"
+   class="btn btn-sm btn-download-source-button dropdown-item"
+   title="Download source file"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-file"></i>
+  </span>
+<span class="btn__text-container">.md</span>
+</a>
+</li>
+      
+      
+      
+      
+      <li>
+<button onclick="window.print()"
+  class="btn btn-sm btn-download-pdf-button dropdown-item"
+  title="Print to PDF"
+  data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-file-pdf"></i>
+  </span>
+<span class="btn__text-container">.pdf</span>
+</button>
+</li>
+      
+  </ul>
+</div>
+
+
+
+
+<button onclick="toggleFullScreen()"
+  class="btn btn-sm btn-fullscreen-button"
+  title="Fullscreen mode"
+  data-bs-placement="bottom" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-expand"></i>
+  </span>
+
+</button>
+
+
+
+<script>
+document.write(`
+  <button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light"></i>
+    <i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark"></i>
+    <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"></i>
+  </button>
+`);
+</script>
+
+
+<script>
+document.write(`
+  <button class="btn btn-sm pst-navbar-icon search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <i class="fa-solid fa-magnifying-glass fa-lg"></i>
+  </button>
+`);
+</script>
+<label class="sidebar-toggle secondary-toggle btn btn-sm" for="__secondary"title="Toggle secondary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <span class="fa-solid fa-list"></span>
+</label>
+</div></div>
+      
+    </div>
+  
+</div>
+</div>
+              
+              
+
+<div id="jb-print-docs-body" class="onlyprint">
+    <h1>DreaMoving</h1>
+    <!-- Table of contents -->
+    <div id="print-main-content">
+        <div id="jb-print-toc">
+            
+            <div>
+                <h2> Contents </h2>
+            </div>
+            <nav aria-label="Page">
+                <ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#abstract">Abstract</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">1. Introduction</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#architecture">2. Architecture</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#data-collection-and-preprocessing">2.1 Data Collection and Preprocessing</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#motion-block">2.2 Motion Block</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#content-guider">2.3 Content Guider</a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#model-training">2.4 Model Training</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#model-inference">2.5 Model Inference</a></li>
+</ul>
+</li>
+</ul>
+            </nav>
+        </div>
+    </div>
+</div>
+
+              
+                
+<div id="searchbox"></div>
+                <article class="bd-article">
+                  
+  <div class="admonition-information admonition">
+<p class="admonition-title">Information</p>
+<ul class="simple">
+<li><p><strong>Title:</strong> DreaMoving: A Human Video Generation Framework based on Diffusion Models</p></li>
+<li><p><strong>Reference</strong></p>
+<ul>
+<li><p>Paper: <a class="reference external" href="https://arxiv.org/abs/2312.05107">https://arxiv.org/abs/2311.17117</a></p></li>
+<li><p>Code: <a class="reference external" href="https://github.com/dreamoving/dreamoving-project">Official</a></p></li>
+<li><p>Project Page : <a class="reference external" href="https://dreamoving.github.io/dreamoving/">https://dreamoving.github.io/dreamoving/</a></p></li>
+</ul>
+</li>
+<li><p><strong>Author:</strong> Geonhak Song</p></li>
+<li><p><strong>Last updated on {March. 13, 2024}</strong></p></li>
+</ul>
+</div>
+<section class="tex2jax_ignore mathjax_ignore" id="dreamoving">
+<h1>DreaMoving<a class="headerlink" href="#dreamoving" title="Permalink to this heading">#</a></h1>
+<section id="abstract">
+<h2>Abstract<a class="headerlink" href="#abstract" title="Permalink to this heading">#</a></h2>
+<ul class="simple">
+<li><p>고품질 customized human video 생성을 위해 제어가능한 diffusion 기반 video generation framework인 DreaMoving 제안</p></li>
+<li><p>target identity와 posture sequence가 주어졌을 때, target identity moving이나 dancing video 생성이 가능하다.</p></li>
+<li><p>추가 제안 모듈 : motion-controlling을 위한 <strong>Video ControlNet</strong> &amp; identity preserving을 위한 <strong>Content Guider</strong></p></li>
+</ul>
+</section>
+<section id="introduction">
+<h2>1. Introduction<a class="headerlink" href="#introduction" title="Permalink to this heading">#</a></h2>
+<ul class="simple">
+<li><p>T2V의 진전에도 인간 중심 기반 생성에는 어려움을 겪는 중.</p></li>
+<li><p>open-source human dance video dataset의 부족, text 묘사의 어려움으로 인해 frame간 일관성, 긴 길이, 다양성을 포함한 비디오 생성에 어려움을 겪는다.</p></li>
+<li><p>personalization과 controllability 의 어려움 또한 존재</p></li>
+<li><p>구조적 제어를 위한 ControlNet, appearance 제어를 위한 Dreambooth, LoRA</p></li>
+<li><p>그러나 이 기술들은 정확한 제어가 어렵고 hyperparameter tuning 요소가 존재 &amp; 추가 계산 부담</p></li>
+<li><p>이에 새로운 방법론인 DreaMoving 제안</p></li>
+</ul>
+</section>
+<section id="architecture">
+<h2>2. Architecture<a class="headerlink" href="#architecture" title="Permalink to this heading">#</a></h2>
+<figure class="align-default" id="id1">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/figure_1.png"><img alt="figure_1" class="bg-primary mb-1" src="../../_images/figure_1.png" style="width: 800px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 640 </span><span class="caption-text">Figure 1. The overview of DreaMoving</span><a class="headerlink" href="#id1" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p>LDM 기반 모델을 기반으로 3가지 주요 network로 구성</p>
+<ul>
+<li><p>U-Net, Video ControlNet, Content Guider</p></li>
+</ul>
+</li>
+<li><p>AnimateDiff에서 영감을 받아 U-Net 각 block 이후 motion block을 추가</p></li>
+<li><p>Plug-in : motion-controlling을 위한 <strong>Video ControlNet</strong> &amp; identity preserving을 위한 <strong>Content Guider</strong></p></li>
+</ul>
+<section id="data-collection-and-preprocessing">
+<h3>2.1 Data Collection and Preprocessing<a class="headerlink" href="#data-collection-and-preprocessing" title="Permalink to this heading">#</a></h3>
+<ul class="simple">
+<li><p>인터넷에서 human dance video 1000의 고품질 영상으로 훈련</p></li>
+<li><p>temporal module 훈련은 변이나 특별한 효과 없는 연속적 frame이 필요하기 때문에 clip video로 split하여 6000개의 짧은 비디오를 획득한다.(8~10s)</p></li>
+<li><p>text description을 위해서 Minigpt-v2(<a class="reference external" href="https://minigpt-v2.github.io/">https://minigpt-v2.github.io/</a>)를 video-captioner로 사용</p>
+<ul>
+<li><p>“[grounding] describe this frame in a detailed manner”의 명령으로 획득</p></li>
+<li><p>subject와 background 내용에 대해 정확히 묘사</p></li>
+</ul>
+</li>
+</ul>
+</section>
+<section id="motion-block">
+<h3>2.2 Motion Block<a class="headerlink" href="#motion-block" title="Permalink to this heading">#</a></h3>
+<ul class="simple">
+<li><p>temporal consistency와 motion fidelity 향상을 위해서 U-Net과 ControlNet를 motion block으로 통합.</p></li>
+<li><p>motion block은 AnimateDiff로 확장. temporal sequence length는 64로 확장</p></li>
+<li><p>초기화 : AnimateDiff (mm_sd_v15.ckpt)</p></li>
+<li><p>개인 인물 dance video로 finetuning</p></li>
+</ul>
+</section>
+<section id="content-guider">
+<h3>2.3 Content Guider<a class="headerlink" href="#content-guider" title="Permalink to this heading">#</a></h3>
+<ul class="simple">
+<li><p>Content Guider는 인물의 appearance와 배경을 포함한 생성된 video의 내용을 제어하기 위해 고안됨.</p></li>
+<li><p>가장 간단한 방법은 text prompt이지만, 개인화된 인물 외관 묘사가 어렵다.</p></li>
+<li><p>IP-Adapter에 영감을 받아 image prompt를 활용해 인물 외관에 대한 guidance를 주고 배경에 대해서는 text prompt 사용</p></li>
+<li><p>얼굴 이미지는 image encoder를 통해 encode</p></li>
+<li><p>text feature &amp; 인물 외관 feature는 마지막 content embedding에 concat된 후 cross-attention에 보냄</p></li>
+</ul>
+<figure class="align-default" id="id2">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/eq_11.png"><img alt="eq_1" class="bg-primary mb-1" src="../../_images/eq_11.png" style="width: 800px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 641 </span><span class="caption-text">Equation 1 Content Guider cross attentino output given query, text, face, cloth features</span><a class="headerlink" href="#id2" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p><span class="math notranslate nohighlight">\(Z\)</span> : query features</p></li>
+<li><p><span class="math notranslate nohighlight">\(c_t\)</span> : text features  / <span class="math notranslate nohighlight">\(c_f\)</span> : face features / <span class="math notranslate nohighlight">\(c_c\)</span> : cloth features</p></li>
+<li><p><span class="math notranslate nohighlight">\(Z^\prime\)</span> : cross-attention output</p></li>
+</ul>
+</section>
+</section>
+<section id="model-training">
+<h2>2.4 Model Training<a class="headerlink" href="#model-training" title="Permalink to this heading">#</a></h2>
+<p><strong>2.4.1 Content Guider Training</strong></p>
+<ul class="simple">
+<li><p>Base Model : SD v1.5 기반</p></li>
+<li><p>Image Encoder : OpenCLIP ViT-H14</p></li>
+<li><p>reference face identity 보존을 위해 Arcface를 통해 얼굴 상관 feature 추출.</p></li>
+<li><p>LAION-2B에서 human data 수집</p></li>
+<li><p>훈련 : 512x512 random crop &amp; resize</p></li>
+<li><p>GPU : 8 V100, 100k steps, 16 batch size/GPU 1장</p></li>
+<li><p>Optimizer : AdamW</p></li>
+<li><p>learning rate : 1e-4, decay 1e-2</p></li>
+</ul>
+<p><strong>2.4.2 Long-Frame Pretraining</strong></p>
+<ul class="simple">
+<li><p>WebVid-10M validation set (5k video clips)에서 motion module의 sequence length를 16에서 64로 확장하기 위한 training stage 수행</p>
+<ul>
+<li><p>WebVid-10M validation set (5k video clips) : 평균 18초, 총 13000 시간</p></li>
+</ul>
+</li>
+<li><p>U-Net motion module만 훈련하고 나머지는 freeze</p></li>
+<li><p>ControlNet이나 image guidance 사용 안 함.</p></li>
+<li><p>learning rate : 1e-4</p></li>
+<li><p>resolution : 256x256 resize &amp; center crop</p></li>
+<li><p>batch size 1, 10k steps 이후 훈련 종료</p></li>
+</ul>
+<p><strong>2.4.3 Video ControlNet Training</strong></p>
+<ul class="simple">
+<li><p>long-frame pretraining 이후, <strong>Video ControlNet</strong> 훈련 진행.</p></li>
+<li><p>U-Net 고정 &amp; **Video ControlNet의 (U-Net block과 motion block)**은 unfreeze</p></li>
+<li><p>수집한 6k human dance video data 훈련</p></li>
+<li><p>DWPose나 ZoeDepth를 통한 human pose 또는 depth를 추출.</p></li>
+<li><p>learning rate : 1e-4</p></li>
+<li><p>resolution : 352x352</p></li>
+<li><p>batch size 1, 25k steps 이후 훈련 종료</p></li>
+</ul>
+<p><strong>2.4.4 Expression Fine-Tuning</strong></p>
+<ul class="simple">
+<li><p>사람 표현을 더 낫게하기 위해 <strong>Video ControlNet</strong>을 포함한 <strong>U-Net의 motion block</strong> 구조에서 6k human dancing video data로 추가 fine-tuning</p></li>
+<li><p>U-Net motion block weight만 update</p></li>
+<li><p>learning rate : 5e-5</p></li>
+<li><p>resolution : 512x512</p></li>
+<li><p>batch size 1, 20k steps 이후 훈련 종료</p></li>
+</ul>
+<section id="model-inference">
+<h3>2.5 Model Inference<a class="headerlink" href="#model-inference" title="Permalink to this heading">#</a></h3>
+<p>입력 : text prompt, reference image, pose/depth sequence</p>
+<p>Video ControlNet control scale : 1 (pose/depth에서만)</p>
+<p>multi-controlnet을 통해 pose &amp; depth 동시 사용 가능</p>
+<p>Eq 1의 face/body guidance strength : <span class="math notranslate nohighlight">\(\alpha_f,\alpha_c\)</span>는 적응하도록</p>
+<p>text prompt만 사용할 때 <span class="math notranslate nohighlight">\(\alpha_f=\alpha_c=0\)</span></p>
+<figure class="align-default" id="id3">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/figure_21.png"><img alt="figure_2" class="bg-primary mb-1" src="../../_images/figure_21.png" style="width: 800px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 642 </span><span class="caption-text">Figure 2. The results of DreaMoving with text prompt as input</span><a class="headerlink" href="#id3" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<figure class="align-default" id="id4">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/figure_31.png"><img alt="figure_3" class="bg-primary mb-1" src="../../_images/figure_31.png" style="width: 800px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 643 </span><span class="caption-text">Figure 3. The results of DreaMoving with text prompt and face image as inputs</span><a class="headerlink" href="#id4" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<figure class="align-default" id="id5">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/figure_41.png"><img alt="figure_4" class="bg-primary mb-1" src="../../_images/figure_41.png" style="width: 800px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 644 </span><span class="caption-text">Figure 4. The results of DreaMoving with face and cloth images as inputs</span><a class="headerlink" href="#id5" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<figure class="align-default" id="id6">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/figure_52.png"><img alt="figure_5" class="bg-primary mb-1" src="../../_images/figure_52.png" style="width: 800px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 645 </span><span class="caption-text">Figure 5. The results of DreaMoving with stylized image as input</span><a class="headerlink" href="#id6" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</section>
+</section>
+</section>
+
+    <script type="text/x-thebe-config">
+    {
+        requestKernel: true,
+        binderOptions: {
+            repo: "binder-examples/jupyter-stacks-datascience",
+            ref: "master",
+        },
+        codeMirrorConfig: {
+            theme: "abcdef",
+            mode: "python"
+        },
+        kernelOptions: {
+            name: "python3",
+            path: "./docs\review"
+        },
+        predefinedOutput: true
+    }
+    </script>
+    <script>kernelName = 'python3'</script>
+
+                </article>
+              
+
+              
+              
+              
+              
+                <footer class="prev-next-footer d-print-none">
+                  
+<div class="prev-next-area">
+    <a class="left-prev"
+       href="Animate_Anyone.html"
+       title="previous page">
+      <i class="fa-solid fa-angle-left"></i>
+      <div class="prev-next-info">
+        <p class="prev-next-subtitle">previous</p>
+        <p class="prev-next-title">Animate Anyone</p>
+      </div>
+    </a>
+    <a class="right-next"
+       href="DreamPose.html"
+       title="next page">
+      <div class="prev-next-info">
+        <p class="prev-next-subtitle">next</p>
+        <p class="prev-next-title">DreamPose: Fashion Image-to-Video Synthesis via Stable Diffusion</p>
+      </div>
+      <i class="fa-solid fa-angle-right"></i>
+    </a>
+</div>
+                </footer>
+              
+            </div>
+            
+            
+              
+                <div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
+
+
+  <div class="sidebar-secondary-item">
+  <div class="page-toc tocsection onthispage">
+    <i class="fa-solid fa-list"></i> Contents
+  </div>
+  <nav class="bd-toc-nav page-toc">
+    <ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#abstract">Abstract</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">1. Introduction</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#architecture">2. Architecture</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#data-collection-and-preprocessing">2.1 Data Collection and Preprocessing</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#motion-block">2.2 Motion Block</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#content-guider">2.3 Content Guider</a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#model-training">2.4 Model Training</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#model-inference">2.5 Model Inference</a></li>
+</ul>
+</li>
+</ul>
+  </nav></div>
+
+</div></div>
+              
+            
+          </div>
+          <footer class="bd-footer-content">
+            
+<div class="bd-footer-content__inner container">
+  
+  <div class="footer-item">
+    
+<p class="component-author">
+By PseudoLab
+</p>
+
+  </div>
+  
+  <div class="footer-item">
+    
+
+  <p class="copyright">
+    
+      © Copyright 2022.
+      <br/>
+    
+  </p>
+
+  </div>
+  
+  <div class="footer-item">
+    
+  </div>
+  
+  <div class="footer-item">
+    
+  </div>
+  
+</div>
+          </footer>
+        
+
+      </main>
+    </div>
+  </div>
+  
+  <!-- Scripts loaded after <body> so the DOM is not blocked -->
+  <script src="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b"></script>
+<script src="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b"></script>
+
+  <footer class="bd-footer">
+  </footer>
+  </body>
 </html>
\ No newline at end of file
diff --git a/docs/review/DreamBooth3D.html b/docs/review/DreamBooth3D.html
old mode 100644
new mode 100755
index c5c8f5b6..6bfd090f
--- a/docs/review/DreamBooth3D.html
+++ b/docs/review/DreamBooth3D.html
@@ -1,1359 +1,1379 @@
-
-
-<!DOCTYPE html>
-
-
-<html lang="en" data-content_root="" >
-
-  <head>
-    <meta charset="utf-8" />
-    <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.18.1: http://docutils.sourceforge.net/" />
-
-    <title>Dream Booth 3D &#8212; Text-to-Image Generation-feat-Diffusion</title>
-  
-  
-  
-  <script data-cfasync="false">
-    document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
-    document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
-  </script>
-  
-  <!-- Loaded before other Sphinx assets -->
-  <link href="../../_static/styles/theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-<link href="../../_static/styles/bootstrap.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-<link href="../../_static/styles/pydata-sphinx-theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-
-  
-  <link href="../../_static/vendor/fontawesome/6.5.2/css/all.min.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-  <link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.woff2" />
-<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.woff2" />
-<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.woff2" />
-
-    <link rel="stylesheet" type="text/css" href="../../_static/pygments.css" />
-    <link rel="stylesheet" href="../../_static/styles/sphinx-book-theme.css?digest=14f4ca6b54d191a8c7657f6c759bf11a5fb86285" type="text/css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/togglebutton.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/mystnb.4510f1fc1dee50b3e5859aac5469c37c29e427902b24a333a5f9fcb2f0b3ac41.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/sphinx-thebe.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/design-style.4045f2051d55cab465a707391d5b2007.min.css" />
-  
-  <!-- Pre-loaded scripts that we'll load fully later -->
-  <link rel="preload" as="script" href="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b" />
-<link rel="preload" as="script" href="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b" />
-  <script src="../../_static/vendor/fontawesome/6.5.2/js/all.min.js?digest=dfe6caa3a7d634c4db9b"></script>
-
-    <script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
-    <script src="../../_static/jquery.js"></script>
-    <script src="../../_static/underscore.js"></script>
-    <script src="../../_static/_sphinx_javascript_frameworks_compat.js"></script>
-    <script src="../../_static/doctools.js"></script>
-    <script src="../../_static/clipboard.min.js"></script>
-    <script src="../../_static/copybutton.js"></script>
-    <script src="../../_static/scripts/sphinx-book-theme.js?digest=5a5c038af52cf7bc1a1ec88eea08e6366ee68824"></script>
-    <script>let toggleHintShow = 'Click to show';</script>
-    <script>let toggleHintHide = 'Click to hide';</script>
-    <script>let toggleOpenOnPrint = 'true';</script>
-    <script src="../../_static/togglebutton.js"></script>
-    <script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
-    <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
-    <script src="../../_static/design-tabs.js"></script>
-    <script>const THEBE_JS_URL = "https://unpkg.com/thebe@0.8.2/lib/index.js"
-const thebe_selector = ".thebe,.cell"
-const thebe_selector_input = "pre"
-const thebe_selector_output = ".output, .cell_output"
-</script>
-    <script async="async" src="../../_static/sphinx-thebe.js"></script>
-    <script>window.MathJax = {"options": {"processHtmlClass": "tex2jax_process|mathjax_process|math|output_area"}}</script>
-    <script defer="defer" src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
-    <script>DOCUMENTATION_OPTIONS.pagename = 'docs/review/DreamBooth3D';</script>
-    <link rel="shortcut icon" href="../../_static/PseudoLab_logo.png"/>
-    <link rel="index" title="Index" href="../../genindex.html" />
-    <link rel="search" title="Search" href="../../search.html" />
-    <link rel="next" title="Synthetic Data with Stable Diffusion for Foliar Disease Classification" href="../experiments/js_exp.html" />
-    <link rel="prev" title="Magic3D" href="magic-3d.html" />
-  <meta name="viewport" content="width=device-width, initial-scale=1"/>
-  <meta name="docsearch:language" content="en"/>
-  </head>
-  
-  
-  <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
-
-  
-  
-  <div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
-  
-  <div id="pst-scroll-pixel-helper"></div>
-  
-  <button type="button" class="btn rounded-pill" id="pst-back-to-top">
-    <i class="fa-solid fa-arrow-up"></i>Back to top</button>
-
-  
-  <input type="checkbox"
-          class="sidebar-toggle"
-          id="pst-primary-sidebar-checkbox"/>
-  <label class="overlay overlay-primary" for="pst-primary-sidebar-checkbox"></label>
-  
-  <input type="checkbox"
-          class="sidebar-toggle"
-          id="pst-secondary-sidebar-checkbox"/>
-  <label class="overlay overlay-secondary" for="pst-secondary-sidebar-checkbox"></label>
-  
-  <div class="search-button__wrapper">
-    <div class="search-button__overlay"></div>
-    <div class="search-button__search-container">
-<form class="bd-search d-flex align-items-center"
-      action="../../search.html"
-      method="get">
-  <i class="fa-solid fa-magnifying-glass"></i>
-  <input type="search"
-         class="form-control"
-         name="q"
-         id="search-input"
-         placeholder="Search this book..."
-         aria-label="Search this book..."
-         autocomplete="off"
-         autocorrect="off"
-         autocapitalize="off"
-         spellcheck="false"/>
-  <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
-</form></div>
-  </div>
-
-  <div class="pst-async-banner-revealer d-none">
-  <aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
-</div>
-
-  
-    <header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
-    </header>
-  
-
-  <div class="bd-container">
-    <div class="bd-container__inner bd-page-width">
-      
-      
-      
-      <div class="bd-sidebar-primary bd-sidebar">
-        
-
-  
-  <div class="sidebar-header-items sidebar-primary__section">
-    
-    
-    
-    
-  </div>
-  
-    <div class="sidebar-primary-items__start sidebar-primary__section">
-        <div class="sidebar-primary-item">
-
-  
-    
-  
-
-<a class="navbar-brand logo" href="../../intro.html">
-  
-  
-  
-  
-  
-    
-    
-      
-    
-    
-    <img src="../../_static/PseudoLab_logo.png" class="logo__image only-light" alt="Text-to-Image Generation-feat-Diffusion - Home"/>
-    <script>document.write(`<img src="../../_static/PseudoLab_logo.png" class="logo__image only-dark" alt="Text-to-Image Generation-feat-Diffusion - Home"/>`);</script>
-  
-  
-</a></div>
-        <div class="sidebar-primary-item"><nav class="bd-links" id="bd-docs-nav" aria-label="Main">
-    <div class="bd-toc-item navbar-nav active">
-        
-        <ul class="nav bd-sidenav bd-sidenav__home-link">
-            <li class="toctree-l1">
-                <a class="reference internal" href="../../intro.html">
-                    Welcome to PseudoDiffusers!!
-                </a>
-            </li>
-        </ul>
-        <p aria-level="2" class="caption" role="heading"><span class="caption-text">Preliminary Works</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="vae.html">VAE</a></li>
-<li class="toctree-l1"><a class="reference internal" href="gan.html">GAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DDPM.html">DDPM</a></li>
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="DDIM.html">DDIM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="A_Study_on_the_Evaluation_of_Generative_Models.html">A Study on the Evaluation of Generative Models</a></li>
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Image Generation</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="cycleGAN.html">CycleGAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="StyleGAN.html">StyleGAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="diffusion_beats_GANs.html">Diffusion Models Beat GANs on Image Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="dalle.html">DALL-E</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DALLE2.html">DALL-E 2</a></li>
-<li class="toctree-l1"><a class="reference internal" href="dreambooth.html">DreamBooth</a></li>
-<li class="toctree-l1"><a class="reference internal" href="ControlNet.html">ControlNet</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Latent_Diffusion_Model.html">Introduction</a></li>
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="Textual_Inversion.html">Textual Inversion</a></li>
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="CustomDiffusion.html">Custom Diffusion</a></li>
-<li class="toctree-l1"><a class="reference internal" href="LoRA.html">LoRA</a></li>
-
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="I-DDPM.html">I-DDPM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="StyO.html">StyO</a></li>
-<li class="toctree-l1"><a class="reference internal" href="imagen.html">Imagen</a></li>
-<li class="toctree-l1"><a class="reference internal" href="imagen_editor.html">Imagen Editor</a></li>
-<li class="toctree-l1"><a class="reference internal" href="SDEdit.html">SDEdit</a></li>
-<li class="toctree-l1"><a class="reference internal" href="SDXL.html">SDXL</a></li>
-<li class="toctree-l1"><a class="reference internal" href="t2i_adapter.html">T2I-Adapter</a></li>
-<li class="toctree-l1"><a class="reference internal" href="IP_Adapter.html">IP-Adapter</a></li>
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="HyperDreamBooth.html">HyperDreamBooth</a></li>
-<li class="toctree-l1"><a class="reference internal" href="CM3leon.html">CM3leon</a></li>
-
-<li class="toctree-l1"><a class="reference internal" href="Synthetic_Data_from_Diffusion_Models_Improves_ImageNet_Classification.html">Synthetic Data from Diffusion Models Improves ImageNet Classification</a></li>
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="GLIDE.html">GLIDE</a></li>
-<li class="toctree-l1"><a class="reference internal" href="BBDM.html">BBDM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Your_Diffusion_Model_is_Secretly_a_Zero_Shot_Classifier.html">Your Diffusion Model is Secretly a Zero-Shot Classifier</a></li>
-<li class="toctree-l1"><a class="reference internal" href="progressive_distillation.html">Progressive Distillation for Fast Sampling of Diffusion Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="ConceptLab.html">ConceptLab</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Diffusion_models_already_have_a_Semantic_Latent_Space.html">Diffusion Models already have a Semantic Latent Space</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Muse.html">Muse</a></li>
-
-
-<li class="toctree-l1"><a class="reference internal" href="GIGAGAN.html">Scaling up GANs for Text-to-Image Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="consistency_models.html">Consistency Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="latent_consistency_models.html">Latent Consistency Models</a></li>
-
-<li class="toctree-l1"><a class="reference internal" href="LLM_grounded_Diffusion.html">LLM Grounded Diffusion</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DiT.html">DiT</a></li>
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="one-step-image-translation.html">One-Step Image Translation with Text-to-Image Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="LCM-LoRA.html">LCM-LoRA: A Universal Stable-Diffusion Acceleration Module</a></li>
-
-
-<li class="toctree-l1"><a class="reference internal" href="MimicBrush.html">MimicBrush: Zero-shot Image Editing with Reference Imitation</a></li>
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Video Generation</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="Make_A_Video.html">Make A Video</a></li>
-<li class="toctree-l1"><a class="reference internal" href="VideoLDM.html">VideoLDM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="AnimateDiff.html">AnimateDiff</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Animate_Anyone.html">Animate Anyone</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreaMoving.html">DreaMoving</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreamPose.html">DreamPose: Fashion Image-to-Video Synthesis via Stable Diffusion</a></li>
-
-
-
-
-
-
-
-
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">3D Generation</span></p>
-<ul class="current nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="NeRF.html">NeRF : Representing Scenes as Neural Radiance Fields for View Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="3DGS.html">3D Gaussian Splatting for Real-Time Radiance Field Rendering</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Point_E.html">Point-E: A System for Generating 3D Point Clouds from Complex Prompts (Arxiv 2022)</a></li>
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="Shap-E.html">Shap-E</a></li>
-
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="DreamFusion.html"><strong>DreamFusion</strong></a></li>
-<li class="toctree-l1"><a class="reference internal" href="magic-3d.html">Magic3D</a></li>
-<li class="toctree-l1 current active"><a class="current reference internal" href="#">Dream Booth 3D</a></li>
-
-
-
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Experiments</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="../experiments/js_exp.html">Synthetic Data with Stable Diffusion for Foliar Disease Classification</a></li>
-<li class="toctree-l1"><a class="reference internal" href="../experiments/swjo_exp.html">Training DreamBooth on Naver Webtoon Face Dataset</a></li>
-</ul>
-
-    </div>
-</nav></div>
-    </div>
-  
-  
-  <div class="sidebar-primary-items__end sidebar-primary__section">
-  </div>
-  
-  <div id="rtd-footer-container"></div>
-
-
-      </div>
-      
-      <main id="main-content" class="bd-main" role="main">
-        
-        
-
-<div class="sbt-scroll-pixel-helper"></div>
-
-          <div class="bd-content">
-            <div class="bd-article-container">
-              
-              <div class="bd-header-article d-print-none">
-<div class="header-article-items header-article__inner">
-  
-    <div class="header-article-items__start">
-      
-        <div class="header-article-item"><label class="sidebar-toggle primary-toggle btn btn-sm" for="__primary" title="Toggle primary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
-  <span class="fa-solid fa-bars"></span>
-</label></div>
-      
-    </div>
-  
-  
-    <div class="header-article-items__end">
-      
-        <div class="header-article-item">
-
-<div class="article-header-buttons">
-
-
-
-
-
-<div class="dropdown dropdown-source-buttons">
-  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Source repositories">
-    <i class="fab fa-github"></i>
-  </button>
-  <ul class="dropdown-menu">
-      
-      
-      
-      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion" target="_blank"
-   class="btn btn-sm btn-source-repository-button dropdown-item"
-   title="Source repository"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fab fa-github"></i>
-  </span>
-<span class="btn__text-container">Repository</span>
-</a>
-</li>
-      
-      
-      
-      
-      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion/issues/new?title=Issue%20on%20page%20%2Fdocs/review/DreamBooth3D.html&body=Your%20issue%20content%20here." target="_blank"
-   class="btn btn-sm btn-source-issues-button dropdown-item"
-   title="Open an issue"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-lightbulb"></i>
-  </span>
-<span class="btn__text-container">Open issue</span>
-</a>
-</li>
-      
-  </ul>
-</div>
-
-
-
-
-
-
-<div class="dropdown dropdown-download-buttons">
-  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Download this page">
-    <i class="fas fa-download"></i>
-  </button>
-  <ul class="dropdown-menu">
-      
-      
-      
-      <li><a href="../../_sources/docs/review/DreamBooth3D.md" target="_blank"
-   class="btn btn-sm btn-download-source-button dropdown-item"
-   title="Download source file"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-file"></i>
-  </span>
-<span class="btn__text-container">.md</span>
-</a>
-</li>
-      
-      
-      
-      
-      <li>
-<button onclick="window.print()"
-  class="btn btn-sm btn-download-pdf-button dropdown-item"
-  title="Print to PDF"
-  data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-file-pdf"></i>
-  </span>
-<span class="btn__text-container">.pdf</span>
-</button>
-</li>
-      
-  </ul>
-</div>
-
-
-
-
-<button onclick="toggleFullScreen()"
-  class="btn btn-sm btn-fullscreen-button"
-  title="Fullscreen mode"
-  data-bs-placement="bottom" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-expand"></i>
-  </span>
-
-</button>
-
-
-
-<script>
-document.write(`
-  <button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light"></i>
-    <i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark"></i>
-    <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"></i>
-  </button>
-`);
-</script>
-
-
-<script>
-document.write(`
-  <button class="btn btn-sm pst-navbar-icon search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <i class="fa-solid fa-magnifying-glass fa-lg"></i>
-  </button>
-`);
-</script>
-<label class="sidebar-toggle secondary-toggle btn btn-sm" for="__secondary"title="Toggle secondary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <span class="fa-solid fa-list"></span>
-</label>
-</div></div>
-      
-    </div>
-  
-</div>
-</div>
-              
-              
-
-<div id="jb-print-docs-body" class="onlyprint">
-    <h1>Dream Booth 3D</h1>
-    <!-- Table of contents -->
-    <div id="print-main-content">
-        <div id="jb-print-toc">
-            
-            <div>
-                <h2> Contents </h2>
-            </div>
-            <nav aria-label="Page">
-                <ul class="visible nav section-nav flex-column">
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#">Dream Booth 3D</a><ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#abstract">0. Abstract</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">1. Introduction</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#related-work">2. Related Work</a></li>
-</ul>
-</li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#approach"><strong>3. Approach</strong></a><ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#goal-subject-identity-3d-assets">🌟 <strong>Goal</strong>
-텍스트 프롬프트에 충실하면서 주어진 subject 의 identity (기하 형태 및 외관)을 반영하는 3D assets 생성</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#preliminaries">3.1. Preliminaries</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#t2i-diffusion-models">3.1.1 <strong>T2I diffusion models</strong></a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#dream-booth-t2i-personalization">3.1.2 <strong>Dream Booth T2I Personalization.</strong></a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#dreamfusion">3.1.3 <strong>DreamFusion</strong></a></li>
-</ul>
-</li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#failure-of-naive-dreambooth-fusion">3.2 Failure of Naive Dreambooth+Fusion</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#dreambooth3d-optimization">3.3. Dreambooth3D Optimization</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#stage-1-3d-with-partial-dreambooth"><strong>3.3.1 Stage 1️⃣: 3D with Partial DreamBooth</strong></a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#stage-2-multi-view-data-generation"><strong>3.3.2 Stage 2️⃣: Multi-view Data Generation</strong></a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#stage3-final-nerf-with-multi-view-dreambooth"><strong>3.3.3 Stage3️⃣: Final NeRF with Multi-view DreamBooth</strong></a></li>
-</ul>
-</li>
-</ul>
-</li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#experiments">4. Experiments</a><ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#results">4.1. Results</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#sample-applications">4.2. Sample Applications</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#limitations">4.3. Limitations</a></li>
-</ul>
-</li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#conclusion">5. Conclusion</a></li>
-</ul>
-
-            </nav>
-        </div>
-    </div>
-</div>
-
-              
-                
-<div id="searchbox"></div>
-                <article class="bd-article">
-                  
-  <section class="tex2jax_ignore mathjax_ignore" id="dream-booth-3d">
-<h1>Dream Booth 3D<a class="headerlink" href="#dream-booth-3d" title="Permalink to this heading">#</a></h1>
-<div class="admonition-information admonition">
-<p class="admonition-title">Information</p>
-<ul class="simple">
-<li><p><strong>Title:</strong> DreamBooth3D: Subject-Driven Text-to-3D Generation</p></li>
-<li><p><strong>Reference</strong></p>
-<ul>
-<li><p>Paper: <a class="reference external" href="https://arxiv.org/abs/2303.13508">https://arxiv.org/abs/2303.13508</a></p></li>
-<li><p>Project Page : <a class="reference external" href="https://dreambooth3d.github.io/">https://dreambooth3d.github.io/</a></p></li>
-</ul>
-</li>
-<li><p><strong>Author:</strong> Jeongin Lee</p></li>
-<li><p><strong>Last updated on {Sep. 3, 2024}</strong></p></li>
-</ul>
-</div>
-<section id="abstract">
-<h2>0. Abstract<a class="headerlink" href="#abstract" title="Permalink to this heading">#</a></h2>
-<ul class="simple">
-<li><p><strong>DreamBooth3D : 피사체의 3-6개의 캐주얼한 촬영 이미지로부터 text-to-3D 생성 모델을 personalization (맞춤화)</strong></p></li>
-<li><p>DreamBooth + DreamFusion 의 결합</p>
-<ul>
-<li><p>DreamBooth : personalizing text-to-image models</p></li>
-<li><p>DreamFusion : text-to-3D generation</p></li>
-</ul>
-</li>
-<li><p>두 방법론을 나이브하게 결합시 subject의  input viewpoints 에 대해 오버피팅하는 개인화된 t2i 모델로 인해  Subject 에 대해 만족스럽지 못한 3D 결과물 생성</p></li>
-<li><p>t2i 모델의 개인화 기능과 함께 NERF의 3D 일관성을 공동으로 활용하는 3단계 최적화 전략 (3-stage optimization strategy)을 통해 이를 극복</p></li>
-<li><p>Subject 의 입력 이미지에서 볼 수 없는 새로운 포즈, 색상 등 에 대해 텍스트 중심 수정을 통해 고품질의 subject 중심의 3D 결과물 생성 가능</p></li>
-</ul>
-</section>
-<hr class="docutils" />
-<section id="introduction">
-<h2>1. Introduction<a class="headerlink" href="#introduction" title="Permalink to this heading">#</a></h2>
-<ul>
-<li><p><strong>도입</strong></p>
-<ul class="simple">
-<li><p>3D asset생성은  VR, 영화, 게임 등 다양한 분야에 응용 가능하나, 텍스트 프롬프트만으로 생성된 3D asset 의 정체성, 기하학적 구조, 외관을 정확하게 제어하기 어려움.</p></li>
-<li><p>특히, 특정 subject 의 특성을 반영하는 3D assets 를 생성하는 능력에 대한 개발 필요</p></li>
-<li><p>T2I 모델 subject personalization (맞춤화, 개인화) 태스크에서 성공적인 결과를 보인 연구들은 많지만, 3D asset 생성이나 3D control 을  제공하지는 않음.</p></li>
-<li><p><strong>DreamBooth3D</strong>는 소수의 (3-6개) 캐주얼하게 촬영된 이미지로부터 subject 중심의 텍스트-3D 생성을 제안</p></li>
-</ul>
-<p>⇒ NeRF 와 T2I 모델을 함께 최적화하여 subject 중심의 3D 자산을 생성하자 !</p>
-</li>
-<li><p><strong>문제점</strong></p>
-<ul class="simple">
-<li><p>subject에 맞게 개인화된 T2I 모델 &amp; <strong>NeRF</strong> 를 최적화 하는 것은 여러 실패 사례가 발생</p></li>
-<li><p>주요 문제 : 개인화된 T2I 모델이 제한된 주제 이미지의 카메라 뷰포인트에 과적합</p></li>
-<li><p>연속적인 임의의 뷰포인트에서 일관된 3D <strong>NeRF</strong> 결과물을 최적화하는 데 충분하지 않음.</p></li>
-</ul>
-</li>
-<li><p><strong>해결책</strong></p>
-<ul class="simple">
-<li><p><strong>DreamBooth3D</strong>는 효과적인 3단계 최적화 방식을 제안</p></li>
-<li><p><strong>Dream Booth</strong> , <strong>Dream Fusion</strong> 사용</p></li>
-</ul>
-<hr class="docutils" />
-<p><strong>[STEP 1️⃣]</strong></p>
-<ul class="simple">
-<li><p><strong>DreamBooth</strong> 모델을 부분적으로 미세 조정</p></li>
-<li><p><strong>DreamFusion</strong>을 사용하여 <strong>NeRF</strong> 최적화</p></li>
-<li><p>부분적으로 미세 조정된 DreamBooth 모델은 주어진 대상 뷰에 과적합 되지 않으며 모든 subject별 세부 정보를 캡처하지 않음</p></li>
-<li><p>결과적으로 생성된 <strong>NeRF</strong> 자산은 3D 일관성이 있지만 subject 에 대한 특성을 완전히 반영하지못함.</p></li>
-</ul>
-<p><strong>[STEP 2️⃣]</strong></p>
-<ul class="simple">
-<li><p><strong>DreamBooth</strong> 모델을 완전히 미세 조정하여 세부 사항을 캡처</p></li>
-<li><p>1단계에서 학습된 NeRF의 다중 뷰 렌더링을 완전히 학습된 <strong>DreamBooth</strong> 모델에 투입</p></li>
-<li><p>이를 통해 subject 별로 다중 뷰 가상 이미지 집합을 생성</p></li>
-</ul>
-<p><strong>[STEP 3️⃣]</strong></p>
-<ul class="simple">
-<li><p>1단계의 주어진 subject 이미지와 가상(pseudo) 다중 뷰 이미지를 사용하여 <strong>DreamBooth</strong> 모델을 추가로 최적화</p></li>
-<li><p>추가 최적화한 <strong>DreamBooth</strong> 로 NeRF 3D 볼륨을  최종 최적화</p></li>
-<li><p>최종 NeRF 최적화시 추가 규제항으로 pseudo 다중 뷰 데이터 세트에 대한 weak reconstruction loss를 사용</p></li>
-<li><p>3단계에 걸친 NeRF 및 T2I 모델의 합동 최적화는 <strong>DreamBooth</strong> 모델이 subject 의 특정 view point 에 과적합되는 것을 방지하는 동시에 동시에 결과 NeRF 모델이 대상의 정체성에 충실하도록 보장</p></li>
-</ul>
-</li>
-<hr class="docutils" />
-<li><p><strong>결과</strong></p>
-<ul class="simple">
-<li><p>실험 샘플 결과들을 통해 본 접근 방식이 입력 텍스트 프롬프트에 존재하는 컨텍스트를 존중하면서 주어진 대상과 유사성이 높은 현실적인 3D 자산을 생성할 수 있음을 입증</p></li>
-<li><p>여러 베이스라인과 비교할 때, 정량적 및 정성적 결과는 DreamBooth 3D 생성이 보다 3D 일관성이 있고 대상 세부 사항을 더 잘 포착한다는 것을 입증</p></li>
-</ul>
-</li>
-</ul>
-</section>
-<hr class="docutils" />
-<section id="related-work">
-<h2>2. Related Work<a class="headerlink" href="#related-work" title="Permalink to this heading">#</a></h2>
-<ol class="arabic simple">
-<li><p><strong>Text-to-Image Generation.</strong></p></li>
-</ol>
-<ul class="simple">
-<li><p>텍스트 조건 반영 : 사용자가 제공한 자연어 텍스트 프롬프트에 정렬된 이미지를 생성하기 위해 사전 학습된 large language model (LLM) 을 활용</p></li>
-<li><p>T2I diffusion 모델의 성공에 힘입어, 많은 작품들이 텍스트 기반 이미지 조작과 같은 다양한 작업에 사전 학습된 T2I 모델을 활용합니다</p></li>
-</ul>
-<ol class="arabic simple" start="2">
-<li><p><strong>3D Generation.</strong></p></li>
-</ol>
-<ul class="simple">
-<li><p>최근 사전 학습된 대규모 T2I diffusion 모델을 활용하여 텍스트 프롬프트에서 3D 자산을 생성할 수 있는 text-to-3D 방법이 제안</p></li>
-<li><p>기존 방법론</p>
-<ul>
-<li><p>텍스트를 통해 t2i 모델로 이미지를 직접 reconstruction</p></li>
-</ul>
-</li>
-<li><p>본 방법론</p>
-<ul>
-<li><p>입력 이미지를 직접 reconstruction 하지 않고  suject 개념을 제공하기 위한 입력 이미지 사용 →  Recontextualization 수행 가능 (sleeping, jumping, color…etc)</p></li>
-<li><p>입력 이미지를 동일한 배경, 조명, 카메라 등으로 촬영할 필요가 없음.</p></li>
-</ul>
-</li>
-</ul>
-<ol class="arabic simple" start="3">
-<li><p><strong>Subject-driven Generation.</strong></p></li>
-</ol>
-<ul>
-<li><p>Subject 중심 이미지 생성의 최근 발전을 통해 사용자는 특정 대상과 개념에 대해 이미지 생성을 개인화(맞춤화)</p></li>
-<li><p><strong>DreamBooth</strong><br />
-희귀 토큰, 모델 finetuning, 규제를 위한 prior preservation loss를 사용하여 모델의 언어 비전 사전을 확장하여 이를 달성</p></li>
-<li><p><strong>Textual Inversion</strong>
-입력 개념을 나타내는 사전 학습된 text-to-image 모델의 임베딩 공간에서 새로운 word 를 최적화함으로써 이를 달성</p>
-<figure class="align-default" id="id1">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/54544834-db8a-49f3-9ac7-d4a530724fd8.png"><img alt="d4a530724fd8" class="bg-primary mb-1" src="../../_images/54544834-db8a-49f3-9ac7-d4a530724fd8.png" style="width: 800px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 720 </span><span class="caption-text">Textual Inversion \  (source: {<a class="reference external" href="https://arxiv.org/abs/2208.01618">https://arxiv.org/abs/2208.01618</a>})</span><a class="headerlink" href="#id1" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</li>
-</ul>
-<p><strong>→</strong> 이러한 방법론들은 3D asset 을 제공하지 않고 일관성 있는 3D 이미지를 생성할 수 없음.</p>
-</section>
-</section>
-<section class="tex2jax_ignore mathjax_ignore" id="approach">
-<h1><strong>3. Approach</strong><a class="headerlink" href="#approach" title="Permalink to this heading">#</a></h1>
-<p><strong>Problem setup.</strong></p>
-<figure class="align-default" id="id2">
-<img alt="Untitled_1" class="bg-primary mb-1" src="../../_images/Untitled_1.png" />
-<figcaption>
-<p><span class="caption-number">Fig. 721 </span><span class="caption-text">Input and Output</span><a class="headerlink" href="#id2" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p>Input : subject 이미지 집합, 텍스트 프롬프트</p>
-<ul>
-<li><p><span class="math notranslate nohighlight">\(\left\{I_i \in \mathbb{R}^{n \times 3}\right\}(i \in\{1, \ldots, k\})\)</span> : 각 n개의 픽셀, k 장의 subject 이미지들의 집합</p></li>
-<li><p>context(맥락) 부여, 의미 변화를 위한 텍스트 프롬프트 T (ex) sleeping, standing…etc.</p></li>
-</ul>
-</li>
-</ul>
-<hr class="docutils" />
-<section id="goal-subject-identity-3d-assets">
-<h2>🌟 <strong>Goal</strong>
-텍스트 프롬프트에 충실하면서 주어진 subject 의 identity (기하 형태 및 외관)을 반영하는 3D assets 생성<a class="headerlink" href="#goal-subject-identity-3d-assets" title="Permalink to this heading">#</a></h2>
-<ul>
-<li><p>3D volume 에서 radiance 필드를 인코딩하는 MLP 네트워크 <span class="math notranslate nohighlight">\(M\)</span> 으로 구성된 Neural Radiance Fields (NeRF) 를 기반으로 3D assets 를 최적화</p></li>
-<li><p>본 문제는 <strong>subject 이미지에 대한 반영이 필요</strong>하기 때문에, 일반적인 multi-view 이미지 캡처가 필요한 3D reconstruction 설정에 비해 상당히 제한적이고 어려운 문제</p></li>
-<li><p>T2I personalization 및 Text-to-3D 최적화의 최근 발전을 기반으로 기술을 구축</p>
-<p>⇒ DreamBooth personalization + DreamFusion text-to-3D로 최적화를 사용</p>
-</li>
-</ul>
-</section>
-<section id="preliminaries">
-<h2>3.1. Preliminaries<a class="headerlink" href="#preliminaries" title="Permalink to this heading">#</a></h2>
-<hr class="docutils" />
-<section id="t2i-diffusion-models">
-<h3>3.1.1 <strong>T2I diffusion models</strong><a class="headerlink" href="#t2i-diffusion-models" title="Permalink to this heading">#</a></h3>
-<ul class="simple">
-<li><p>T2I diffusion models : Imagen, StableDiffusion and DALL-E 2 …etc..</p></li>
-<li><p>T2I diffusion model <span class="math notranslate nohighlight">\(\mathcal{D}_\theta(\epsilon, \mathbf{c})\)</span></p>
-<ul>
-<li><p>input  :초기 노이즈 <span class="math notranslate nohighlight">\(\epsilon\)</span> , 프롬프트 텍스트 임베딩 <span class="math notranslate nohighlight">\(\mathbf{c}\)</span></p>
-<ul>
-<li><p>an initial noise <span class="math notranslate nohighlight">\(\epsilon \sim \mathcal{N}(0,1)\)</span></p></li>
-<li><p>text embedding <span class="math notranslate nohighlight">\(\mathbf{c}=\Theta(T)\)</span>  (a given prompt <span class="math notranslate nohighlight">\(T\)</span> with a text encoder <span class="math notranslate nohighlight">\(\Theta\)</span>)</p></li>
-</ul>
-</li>
-<li><p>output : 프롬프트를 반영하여 생성한 이미지</p></li>
-</ul>
-</li>
-<li><p>T2I diffusion model 을 통해 생성된 이미지는 일반적으로 프롬프트와 일치하지만 생성된 이미지내에서 세부적인 제어가 어려움. → <strong>DreamBooth</strong> 를 통해 이를 해결</p></li>
-</ul>
-</section>
-<hr class="docutils" />
-<section id="dream-booth-t2i-personalization">
-<h3>3.1.2 <strong>Dream Booth T2I Personalization.</strong><a class="headerlink" href="#dream-booth-t2i-personalization" title="Permalink to this heading">#</a></h3>
-<figure class="align-default" id="id3">
-<img alt="Untitled_4" class="bg-primary mb-1" src="../../_images/Untitled_4.png" />
-<figcaption>
-<p><span class="caption-number">Fig. 722 </span><span class="caption-text">특정 피사체에 대한 소수의 이미지 집합 (3-5장) 을 통해 텍스트로 주어지는 Context 에 맞는 맞춤화 이미지 생성</span><a class="headerlink" href="#id3" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul>
-<li><p><span class="math notranslate nohighlight">\(\left\{I_i\right\}\)</span> 에서 네트워크를 파인튜닝하여 T2I diffusion 모델을 맞춤화, <span class="math notranslate nohighlight">\(\left\{I_i\right\}\)</span> : a small set of casual captures</p></li>
-<li><p>DreamBooth diffusion loss : T2I model 파인튜닝을 위해 사용</p>
-<div class="math notranslate nohighlight">
-\[
-    \mathcal{L}_d=\mathbb{E}{\epsilon, t}\left[w_t\left\|\mathcal{D}_\theta\left(\alpha_t I_i+\sigma_t \epsilon, \mathbf{c}\right)-I_i\right\|^2\right],
-    \]</div>
-<ul class="simple">
-<li><p><span class="math notranslate nohighlight">\(t \sim \mathcal{U}[0,1]\)</span> : the time-step in the diffusion proces</p></li>
-<li><p><span class="math notranslate nohighlight">\(w_t, \alpha_t, \sigma_t\)</span> : the corresponding scheduling parameters</p></li>
-</ul>
-</li>
-<li><p><strong>DreamBooth Class prior preserving loss</strong></p>
-<p>DreamBooth 는 <span class="math notranslate nohighlight">\(\left\{I_i\right\}\)</span>  에 대한 <strong>over fitting</strong> 을 방지하여 다양성을 개선하고, <strong>language drift</strong> 현상을 피하기 위해 선택적으로 <strong>class prior preserving loss 를 사용</strong></p>
-</li>
-<li><p><strong>최종 loss : reconstruction loss + class prior preservation loss</strong></p></li>
-</ul>
-<div class="math notranslate nohighlight">
-\[
-\mathbb{E}_{x, c, \epsilon, \epsilon^{\prime}, t}\left[w_t\left\|\hat{x_\theta} \left(\alpha_t x+\sigma_t \epsilon, c\right)-x\right\|_2^2+\lambda w_{t^{\prime}}\left\|\hat{x}_\theta\left(\alpha_{t^{\prime}} x_{p r}+\sigma_{t^{\prime}}\epsilon^{\prime}, c_{pr}\right)-x_{pr}\right\|_2^2\right]
-\]</div>
-<ul class="simple">
-<li><p><strong>(example) over fitting</strong></p></li>
-</ul>
-<figure class="align-default" id="id4">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/Untitled_2.png"><img alt="Untitled_2" class="bg-primary mb-1" src="../../_images/Untitled_2.png" style="width: 800px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 723 </span><span class="caption-text">over fitting</span><a class="headerlink" href="#id4" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p><strong>(example) language-drift</strong></p></li>
-</ul>
-<figure class="align-default" id="id5">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/Untitled_3.png"><img alt="Untitled_3" class="bg-primary mb-1" src="../../_images/Untitled_3.png" style="width: 800px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 724 </span><span class="caption-text">language-drift</span><a class="headerlink" href="#id5" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</section>
-<hr class="docutils" />
-<section id="dreamfusion">
-<h3>3.1.3 <strong>DreamFusion</strong><a class="headerlink" href="#dreamfusion" title="Permalink to this heading">#</a></h3>
-<figure class="align-default" id="id6">
-<img alt="Untitled_6" class="bg-primary mb-1" src="../../_images/Untitled_6.png" />
-<figcaption>
-<p><span class="caption-number">Fig. 725 </span><span class="caption-text">DreamFusion process / (source : <a class="reference external" href="https://pseudo-lab.github.io/pseudodiffusers/docs/review/DreamFusion.html">DreamFusion</a>)</span><a class="headerlink" href="#id6" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul>
-<li><p>T2I diffusion model을 사용하여 볼륨의 랜덤뷰가 프롬프트  <span class="math notranslate nohighlight">\(T\)</span> 에 상응하도록 NeRF <span class="math notranslate nohighlight">\(\mathcal{M}_\phi\)</span> (<span class="math notranslate nohighlight">\(\phi\)</span> : parameters) 를 통해 표현된 볼륨을 최적화</p></li>
-<li><p>normals : 밀도의 그래디언트로부터 계산된 nomals은 Lambertian shading 으로 기하학적 사실성을 개선시키기 위해 모델을 랜덤으로 relight 하는데 사용됨.</p></li>
-<li><p><span class="math notranslate nohighlight">\(\mathcal{M}_\phi\)</span>  : mapping (camera, light (location) → albedo &amp;density)</p>
-<ul class="simple">
-<li><p>랜덤 뷰 <span class="math notranslate nohighlight">\(v\)</span>, 랜덤 조명(light) 방향이 주어지면 shaded(음영 처리된) 이미지 <span class="math notranslate nohighlight">\(\hat{I}v\)</span> 로 볼륨 렌더링을 수행</p></li>
-</ul>
-</li>
-<li><p>이 때 볼륨 렌더링한 이미지가 텍스트 프롬프트 <span class="math notranslate nohighlight">\(T\)</span> 처럼 보이도록 NeRF <span class="math notranslate nohighlight">\(\phi\)</span> 의 매개변수를 최적화하기 위해 DreamFusion 은 score distillation sampling *<strong>(SDS)</strong> 를 도입</p></li>
-<li><p><strong>score distillation sampling (SDS)</strong></p>
-<div class="math notranslate nohighlight">
-\[
-    \nabla_\phi \mathcal{L}_{SDS}=\mathbb{E}{\epsilon, t}\left[w_t\left(\mathcal{D}_\theta\left(\alpha_t \hat{I}_v+\sigma_t \epsilon, \mathbf{c}\right)-\hat{I}_v\right) \frac{\partial \hat{I}_v}{\partial \phi}\right] .
-    \]</div>
-</li>
-<li><p>렌더링된 이미지의 노이즈가 처리된 버전들을 T2I diffusion model의 낮은 에너지 상태로 push</p></li>
-<li><p>다양한 views를 랜덤으로 선택하고, NeRF 를 통해 역전파 함으로써,
-rendering 결과들이 T2I model <span class="math notranslate nohighlight">\(\mathcal{D}_\theta\)</span> 로 주어진 프롬프트에 맞게 생성된 이미지처럼 보이도록 함.</p></li>
-<li><p><strong>DreamFusion</strong> 에서 사용된 실험 환경을 정확하게 동일하게 사용함.</p></li>
-</ul>
-</section>
-</section>
-<section id="failure-of-naive-dreambooth-fusion">
-<h2>3.2 Failure of Naive Dreambooth+Fusion<a class="headerlink" href="#failure-of-naive-dreambooth-fusion" title="Permalink to this heading">#</a></h2>
-<hr class="docutils" />
-<ul class="simple">
-<li><p><strong>피사체(subject) 중심 text-to-3D 생성을 위한 직관적인 접근 방식</strong></p>
-<ol class="arabic simple">
-<li><p>subject에 대해 <strong>T2I model 을 pesonalized(맞춤화)</strong></p></li>
-<li><p>맞춤화된  T2I model 을 <strong>text-to-3D optimization</strong> 을 위해 사용</p></li>
-</ol>
-</li>
-<li><p><strong>즉, DreamBooth 최적화(personalized) ⇒ DreamFusion 최적화</strong></p></li>
-<li><p><strong>BUT</strong>, <strong>Naive Dreambooth+Fusion 의 결합은 불만족스러운 결과를 초래</strong></p></li>
-</ul>
-<hr class="docutils" />
-<blockquote>
-<div><p>핵심 문제 (KEY Issue)</p>
-</div></blockquote>
-<ul class="simple">
-<li><p><strong>Dream Booth</strong>가 <strong>훈련된 뷰에 존재하는 subject 의 뷰에 과적합</strong> 되어
-<strong>이미지 생성에서 viewpoint 에 대한 다양성이 감소</strong>하는 경향을 보임.</p></li>
-<li><p>미세 조정 단계가 증가할수록, Subject 유사성 증가 (👍)
-<strong>BUT</strong>  input exemplar views에 유사하도록 viewpoints 생성 (👎)
-⇒ 즉, 다양한 시점에서 이미지를 생성하는 능력이 저하됨.</p></li>
-</ul>
-<hr class="docutils" />
-<ul class="simple">
-<li><p>이런 DreamBooth 모델 기반의 NeRF SDS 손실은 일관된 3D NeRF 결과물을 얻기에 불충분</p></li>
-<li><p><strong>DreamBooth+Fusion NeRF</strong> 모델이 <strong>서로 다른 view</strong> 에 걸쳐 학습된 <strong>동일한 대상에 대한 뷰</strong>(예: face of a dog :  다양한 각도에서 본 동일한 dog face)를 가지고 있음.</p>
-<ul>
-<li><p><strong>“Janus problem”</strong> : 두 가지 상반되거나 연관된 측면을 동시에 다루어야 하는 문제</p></li>
-</ul>
-</li>
-</ul>
-</section>
-<section id="dreambooth3d-optimization">
-<h2>3.3. Dreambooth3D Optimization<a class="headerlink" href="#dreambooth3d-optimization" title="Permalink to this heading">#</a></h2>
-<hr class="docutils" />
-<figure class="align-default" id="id7">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/Untitled_8.png"><img alt="Untitled_8" class="bg-primary mb-1" src="../../_images/Untitled_8.png" style="width: 800px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 726 </span><span class="caption-text">DreamBooth3D Overview</span><a class="headerlink" href="#id7" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p>DreamBooth3D Overview</p></li>
-</ul>
-<p><strong>stage-1 (왼쪽)</strong>: 먼저 DreamBooth를 부분적으로 훈련시키고, 결과 모델을 사용하여 초기 NeRF를 최적화</p>
-<p><strong>stage-2 (가운데)</strong>: 초기 NeRF에서 랜덤 시점에 따라 다중 시점 이미지를 렌더링한 후, 완전히 훈련된 DreamBooth 모델을 사용하여 이를 가상 다중 시점 subject 이미지로 변환</p>
-<p><strong>stage-3 (오른쪽)</strong>: 다중 시점 이미지를 사용하여 부분적인 DreamBooth를 추가로 미세 조정한 다음, 결과적으로 얻어진 다중 시점 DreamBooth를 사용하여 최종 NeRF 3D 자산을 SDS 손실과 다중 시점 재구성 손실을 통해 최적화</p>
-<ul class="simple">
-<li><p>위의 문제를 해결하고 성공적인 subject 맞춤 text-to-3D 생성을 위해 효율적인 3단계 최적화 방식을 기반으로 한 Dream-Booth3D 제안</p></li>
-</ul>
-<hr class="docutils" />
-<section id="stage-1-3d-with-partial-dreambooth">
-<h3><strong>3.3.1 Stage 1️⃣: 3D with Partial DreamBooth</strong><a class="headerlink" href="#stage-1-3d-with-partial-dreambooth" title="Permalink to this heading">#</a></h3>
-<figure class="align-default" id="id8">
-<img alt="Untitled_9" class="bg-primary mb-1" src="../../_images/Untitled_9.png" />
-<figcaption>
-<p><span class="caption-number">Fig. 727 </span><span class="caption-text">Stage-1 :  3D with Partial DreamBooth</span><a class="headerlink" href="#id8" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p>입력된 Subject 이미지를 사용하여 DreamBooth 모델 <span class="math notranslate nohighlight">\(\hat{\mathcal{D}}_\theta\)</span> 를 훈련</p></li>
-</ul>
-<hr class="docutils" />
-<p>🌟 <strong>DreamBoothT2I 모델의 초기 체크포인트가 (=부분적으로 파인튜닝한 결과) 주어진 subject view에 과적합되지 않음을 확인</strong></p>
-<div class="highlight-none notranslate"><div class="highlight"><pre><span></span>⇒ partial DreamBooth (부분적으로 파인튜닝한 Dreambooth)
-</pre></div>
-</div>
-<hr class="docutils" />
-<ul class="simple">
-<li><p><strong>partial DreamBooth</strong> 모델 하에 DreamFusion은 더 일관된 3D NeRF를 생성가능</p></li>
-<li><p>NeRF 최적화시 SDS 손실 사용 :</p>
-<ul>
-<li><p><span class="math notranslate nohighlight">\(\nabla_\phi \mathcal{L}_{SDS}=\mathbb{E}{\epsilon, t}\left[w_t\left(\hat{\mathcal{D}}_\theta^{\text {partial }}\left(\alpha_t \hat{I}_v+\sigma_t \epsilon, \mathbf{c}\right)-\hat{I}_v\right) \frac{\partial \hat{I}_v}{\partial \phi}\right]\)</span></p>
-<ul>
-<li><p><span class="math notranslate nohighlight">\(\hat{\mathcal{D}}_\theta^{\text {partial }}\)</span>: <strong>partial DreamBooth</strong></p></li>
-<li><p>SDS 손실을 사용하여 주어진 텍스트 프롬프트에 대한 초기 NeRF 자산을 최적화</p></li>
-</ul>
-</li>
-</ul>
-</li>
-<li><p><strong>partial DreamBooth</strong> 모델과 NeRF 결과물은 입력된 subject 와 완전히 유사하지 않음</p></li>
-</ul>
-<hr class="docutils" />
-<p>🌟 즉,  <strong>Stage-1️⃣ 에서의 초기 NeRF</strong> 는 <strong>주어진 subject 와  부분적으로ㅁ 유사</strong>하면서,
-<strong>주어진 텍스트 프롬프트에 충실한</strong>  <strong>subject class 3D 모델</strong></p>
-</section>
-<hr class="docutils" />
-<section id="stage-2-multi-view-data-generation">
-<h3><strong>3.3.2 Stage 2️⃣: Multi-view Data Generation</strong><a class="headerlink" href="#stage-2-multi-view-data-generation" title="Permalink to this heading">#</a></h3>
-<hr class="docutils" />
-<p>🌟 <strong>Stage-2 Multi-view Data Generation : 본 접근법의 가장 중요한 부분</strong></p>
-<p>일관성을 갖춘 <strong>3D initial NeRF</strong> 와 <strong>fully-trained DreamBooth</strong> 를 사용하여 <strong>pseudo multi-view subject</strong> 이미지들을 생성</p>
-<hr class="docutils" />
-<ol class="arabic">
-<li><p><strong>Initial NeRF</strong> 로부터 다양한 랜덤 viewpoints  <span class="math notranslate nohighlight">\(\{v\}\)</span>을 따라 여러 이미지<span class="math notranslate nohighlight">\(\left\{\hat{I}v \in \mathbb{R}^{n \times 3}\right\}\)</span> 를 렌더링하여 다중 시점 렌더링을 생성</p></li>
-<li><p>각 <strong>렌더링에 고정된 양의 노이즈를 추가하는 forward diffusion</strong> 과정을 통해 <span class="math notranslate nohighlight">\(t_{pseudo}\)</span>로 전환</p></li>
-<li><p>reverse diffusion 과정을 실행하여 <strong>fully-trained DreamBooth</strong> 모델 <span class="math notranslate nohighlight">\(\hat{\mathcal{D}}_\theta\)</span> 를 사용하여 샘플을 생성</p>
-<ul>
-<li><p>샘플링 과정은 각 뷰에 대해 독립적으로 수행</p></li>
-<li><p><strong>Initial NeRF 결과물</strong> 에 노이즈를 추가한 noisy render 를 조건으로 지정함으로써, 넓은 범위의 시점을 커버하면서 subject 를 잘 나타내는 이미지 생성 가능
-⇒ 다양한 노이즈가 있는 이미지를 조건으로 학습시, 다양한 변형에 대한 학습 가능하기 때문</p></li>
-<li><p><strong>BUT</strong> reverse diffusion 과정은 다른 뷰에 다른 세부 사항을 추가할 수 있기 때문에 결과 이미지는  multi-view 에 대한 일관성이 없음.</p>
-<p>⇒ 가상(pseudo) 다중 시점 이미지 집합 <strong>(collection of pseudo multi-view images)</strong></p>
-</li>
-</ul>
-</li>
-</ol>
-<hr class="docutils" />
-<p>🔑 <strong>Key insight</strong></p>
-<ol class="arabic simple">
-<li><p>초기 NeRF 이미지가 unseen views 에 가까울 경우,
-DreamBooth가 Subject 의 unseen views를 효과적으로 생성 가능</p></li>
-<li><p>입력 이미지에 비해 Subject 와 더 유사한 출력 이미지를 효과적으로 생성가능**</p></li>
-</ol>
-<hr class="docutils" />
-<ul class="simple">
-<li><p>위 그림을 통해 체크할 부분</p>
-<ul>
-<li><p><strong>fully-trained DreamBooth</strong> 를 사용한 Img2Img 변환의 샘플 출력</p></li>
-<li><p>입력 NeRF 렌더링의 시점을 유지하면서도 subject 이미지와 더 유사한 모습</p></li>
-<li><p>기존 연구들과 달리 Img2Img 변환을 DreamBooth, NeRF 3D assets 과 결합하여 사용 (기존 연구의 경우 Img2Img 변환을 이미지 editing 응용으로만 사용)</p></li>
-</ul>
-</li>
-</ul>
-</section>
-<section id="stage3-final-nerf-with-multi-view-dreambooth">
-<h3><strong>3.3.3 Stage3️⃣: Final NeRF with Multi-view DreamBooth</strong><a class="headerlink" href="#stage3-final-nerf-with-multi-view-dreambooth" title="Permalink to this heading">#</a></h3>
-<figure class="align-default" id="id9">
-<img alt="Untitled_12" class="bg-primary mb-1" src="../../_images/Untitled_12.png" />
-<figcaption>
-<p><span class="caption-number">Fig. 728 </span><span class="caption-text">Stage-3 : Final NeRF with Multi-view DreamBooth
-SDS와 multi-view reconstruction 손실을 사용한 최종 NeRF 최적화</span><a class="headerlink" href="#id9" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<blockquote>
-<div><p><strong>새로운 데이터 <span class="math notranslate nohighlight">\(I^{\text{aug}}\)</span> 생성</strong></p>
-</div></blockquote>
-<ul class="simple">
-<li><p>가상 다중 시점 이미지 <span class="math notranslate nohighlight">\(\left\{I_v^{\text {pseudo }}\right\}\)</span>, 입력 Subject 이미지 <span class="math notranslate nohighlight">\(\left\{I_i\right\}\)</span> 의 결합을 통해 생성</p></li>
-</ul>
-<div class="math notranslate nohighlight">
-\[
-I^{\text {aug }}=\left\{I_v^{\text {pseudo }}\right\} \cup\left\{I_i\right\}
-\]</div>
-<blockquote>
-<div><p><strong><span class="math notranslate nohighlight">\(I^{\text {aug}}\)</span> 를 사용하여 최종 Multi-view DreamBooth 모델을 최적화</strong></p>
-</div></blockquote>
-<ol class="arabic simple">
-<li><p>1단계에서 <strong>partial DreamBooth</strong> <span class="math notranslate nohighlight">\(\hat{\mathcal{D}}_{\theta^*}\)</span> 준비</p></li>
-<li><p>위의 증강 데이터 <span class="math notranslate nohighlight">\(I^{\text {aug}}\)</span>  를 사용하여 <span class="math notranslate nohighlight">\(\hat{\mathcal{D}}_{\theta^*}\)</span> 에 대한 <strong>파인튜닝</strong>을 추가 진행</p></li>
-<li><p><strong>Multi-view DreamBooth</strong> <span class="math notranslate nohighlight">\(\hat{\mathcal{D}}_\theta^{\mathrm{multi}}\)</span> 를  생성</p></li>
-</ol>
-<blockquote>
-<div><p><span class="math notranslate nohighlight">\(\hat{\mathcal{D}}_\theta^{\text {multi }}\)</span> <strong>모델을 사용하여 DreamFusion SDS Loss 와 함께 NeRF 3D assets 를 최적화</strong></p>
-</div></blockquote>
-<ul class="simple">
-<li><p>1단계의 <strong>partial DreamBooth</strong>에 비해 <strong>multi-view DreamBooth</strong> 의 뷰 일반화와 subject 보존 능력이 더 우수하기 때문에 <strong>subject idendtity가 상당히 향상된 NeRF 모델 생성 가능</strong></p></li>
-<li><p><strong>BUT</strong> SDS 손실만 사용시 최적화된 NeRF assets 이</p>
-<ul>
-<li><p>주어진 subject 에 대해 우수한 기하학적 유사성 보유</p></li>
-<li><p><strong>Color saturation artifacts 현상 다수 발생</strong></p></li>
-<li><p>이를 해결하기 위해 <span class="math notranslate nohighlight">\(\left\{I_v^{\mathrm{pseudo}}\right\}\)</span> 를 사용한 새로운 <strong>weak reconstruction loss</strong> 도입</p></li>
-<li><p>**** Color saturation artifacts :**</p>
-<ul>
-<li><p>색상의 과도한 포화(saturation)로 인해 비현실적이거나 왜곡된 색상 표현이 나타나는 결함 현상</p></li>
-<li><p>모델이 특정 색상을 과도하게 강조하는 경우 발생</p></li>
-<li><p>색상 값을 잘못 예측하여 비현실적인 색상 표현이 나타난 경우 발생</p></li>
-<li><p>다양한 시점에서 일관된 색상 표현을 유지하지 못한 경우 발생</p></li>
-</ul>
-</li>
-</ul>
-</li>
-</ul>
-<hr class="docutils" />
-<p><strong>Reconstruction loss</strong></p>
-<ul>
-<li><p><span class="math notranslate nohighlight">\(\left\{I_v^{\mathrm{pseudo}}\right\}\)</span> 가 생성된 카메라 매개변수 <span class="math notranslate nohighlight">\(\left\{P_v\right\}\)</span> 정보를 알고 있으므로,
-두 번째 NeRF MLP <span class="math notranslate nohighlight">\(\mathcal{F}\gamma\)</span> 의 훈련을 <strong>reconstruction loss</strong> 를 통해 추가로 규제</p>
-<div class="math notranslate nohighlight">
-\[
-    \mathcal{L}_{recon }=\left\|\Gamma\left(\mathcal{F}_\gamma, P_v\right)-I_v^{\text {pseudo }}\right\|_p,
-    \]</div>
-<ul class="simple">
-<li><p><span class="math notranslate nohighlight">\(\Gamma\left(\mathcal{F}\gamma, P_v\right)\)</span> : 카메라 시점 <span class="math notranslate nohighlight">\(P_v\)</span> 를 따라 NeRF <span class="math notranslate nohighlight">\(\mathcal{F}\gamma\)</span> 에서 이미지를 렌더링하는 함수</p></li>
-</ul>
-</li>
-<li><p><strong>Reconstruction loss 의 목적</strong></p>
-<ul class="simple">
-<li><p>생성된 볼륨의 색상 분포를 image exemplars 과 더 가깝게 조정</p></li>
-<li><p>unseen views에서 subject 유사성을 향상</p></li>
-</ul>
-<p><strong>Final NeRF Loss function</strong></p>
-</li>
-</ul>
-<div class="math notranslate nohighlight">
-\[
-\mathcal{L}=\lambda_{\text {recon }} \mathcal{L}_{\text {recon }}+\lambda_{\text {SDS }} \mathcal{L}_{\text {SDS }}+\lambda_{\text {nerf }} \mathcal{L}_{\text {nerf }}
-\]</div>
-<ul class="simple">
-<li><p><span class="math notranslate nohighlight">\(\mathcal{L}_{\text {nerf }}\)</span> 는 Mip-NeRF360 [2]에서 사용된 추가적인 NeRF 정규화</p></li>
-</ul>
-</section>
-</section>
-</section>
-<hr class="docutils" />
-<section class="tex2jax_ignore mathjax_ignore" id="experiments">
-<h1>4. Experiments<a class="headerlink" href="#experiments" title="Permalink to this heading">#</a></h1>
-<hr class="docutils" />
-<p><strong>Implementation Details.</strong></p>
-<ul class="simple">
-<li><p><strong>사용 모델</strong>:</p>
-<ul>
-<li><p>T2I : Imagen T2I 모델</p></li>
-<li><p>Text-encoding:  T5-XXL</p></li>
-<li><p>NeRF : DreamFusion</p></li>
-</ul>
-</li>
-<li><p><strong>훈련 시간</strong>: 4core TPUv4, 각 프롬프트당 3단계 최적화를 완료하는 데 약 3시간 소요</p></li>
-<li><p><strong>훈련 단계</strong>:</p>
-<ul>
-<li><p><strong>부분 DreamBooth 모델 (<span class="math notranslate nohighlight">\(D_θ^{partial}\)</span>)</strong> : 150번의 반복훈련</p></li>
-<li><p><strong>전체 DreamBooth 모델 (<span class="math notranslate nohighlight">\(D_θ\)</span>)</strong> : 800번 반복 훈련시 최적의 성능</p></li>
-</ul>
-</li>
-<li><p><strong>pseudo multi-view data generation :</strong>
-원점에서 고정된 반경으로 균일하게 샘플링한 20개의 이미지를 렌더링</p></li>
-<li><p><strong>Stage-3 Multi-view DreamBooth</strong> <span class="math notranslate nohighlight">\(\hat{\mathcal{D}}_\theta^{\mathrm{multi}}\)</span>:
-3단계에서 추가로 150번 반복하여 부분적으로 훈련된 <span class="math notranslate nohighlight">\(\hat{D}_{θ}^∗\)</span> 모델을 Finetuning</p></li>
-<li><p><strong>Hyperparams :</strong> supplementary material 참고</p></li>
-</ul>
-<hr class="docutils" />
-<p><strong>Datasets.</strong></p>
-<ul class="simple">
-<li><p><strong>훈련 데이터</strong>: 공개된 이미지 컬렉션을 사용하여 personalized text-to-3D 모델을 훈련</p>
-<ul>
-<li><p>다양한 subject(개, 장난감, 배낭, 선글라스, 만화 캐릭터 등) 의  4-6개의 casual 이미지를 포함한 30개의 다른 이미지 컬렉션으로 구성</p></li>
-</ul>
-</li>
-<li><p><strong>희귀 객체 성능 분석</strong>: “올빼미 장식품”과 같은 희귀한 대상의 성능을 분석하기 위해 추가 이미지 수집</p></li>
-<li><p>3-6개의 프롬프트에 대해 각 3D 모델을 최적화하여 3D contextualizations 문맥화 시연</p></li>
-</ul>
-<hr class="docutils" />
-<p><strong>Baselines.</strong></p>
-<ul class="simple">
-<li><p><strong>Latent-NeRF</strong></p>
-<ul>
-<li><p>RGB 픽셀 공간이 아닌 Stable Diffusion 의  latent feature 공간에서 SDS 손실을 통해 3D NeRF 모델을 학습</p></li>
-<li><p>baseline 으로써 fully dreamboothed T2I model 를 사용하여 Latent-NeRF 실행</p></li>
-</ul>
-</li>
-<li><p><strong>DreamFusion+DreamBooth</strong>: DreamBooth 확산 모델을 먼저 훈련한 후 DreamFusion을 사용하여 3D NeRF를 최적화하는 단일 단계 접근 방식</p></li>
-<li><p><strong>본 연구의 3단계 최적화 기반 방법론</strong> :  “DreamBooth3D”</p></li>
-</ul>
-<hr class="docutils" />
-<p><strong>Evaluation Metrics.</strong></p>
-<ul class="simple">
-<li><p><strong>CLIP R-Precision</strong></p>
-<ul>
-<li><p>rendering된 장면들이 주어졌을 때 프롬프트와 얼마나 정확하게 일치하는지 비율을 나타냄.</p></li>
-<li><p>CLIP ViT-B/16, ViT-B/32, ViT-L-14 모델을 평가에 사용</p></li>
-</ul>
-</li>
-<li><p>추가적으로 user study 수행 (뒤에 언급)</p></li>
-</ul>
-<section id="results">
-<h2>4.1. Results<a class="headerlink" href="#results" title="Permalink to this heading">#</a></h2>
-<p><strong>Visual Results</strong></p>
-<ul class="simple">
-<li><p><strong>비교 결과</strong>: DreamBooth3D, Latent-NeRF, DreamBooth+Fusion 기준 모델의 비교</p>
-<ul>
-<li><p>Latent-NeRF : 일부 경우(오리)에서 적절히 작동하지만, 대부분의 경우 일관된 3D 모델을 생성하는 데 실패</p></li>
-<li><p>DreamBooth+Fusion : 여러 시점에서 동일한 외형 및 구조를 보임</p></li>
-<li><p>DreamBooth3D : 360도 일관된 3D Asset을 생성하며, 주어진 subject 의 기하학적 구조 및 외관의 세부 사항을 잘 반영함</p></li>
-</ul>
-</li>
-</ul>
-<hr class="docutils" />
-<p><strong>Initial vs. Final NeRF</strong></p>
-<ul class="simple">
-<li><p>1단계와 3단계에서 생성된 초기 NeRF와 최종 NeRF 결과</p></li>
-<li><p>초기 NeRF : 주어진 subject 와 부분적으로만 유사, 3D 일관성을 유지</p></li>
-<li><p>최종 NeRF : 주어진 subject 와 더 유사하, 일관된 3D 구조를 유지</p></li>
-<li><p>이러한 예시는 DreamBooth3D의 3단계 최적화가 필요함을 입증 (?)</p></li>
-</ul>
-<hr class="docutils" />
-<p><strong>User Study.</strong></p>
-<div class="highlight-none notranslate"><div class="highlight"><pre><span></span>→ DreamBooth3D와 비교 모델들을 세가지측면에 대해 아래의 질문에 대한 답변으로 평가** 
-</pre></div>
-</div>
-<ol class="arabic simple">
-<li><p><strong>subject 충실도</strong>: “어떤 3D 항목이 subject 와 더 유사하게 보입니까?”</p></li>
-<li><p><strong>3D 일관성과 타당성</strong>: “어떤 3D 항목이 더 타당하고 일관된 기하학적 구조를 가지고 있습니까?”</p></li>
-<li><p><strong>프롬프트 충실도</strong>: “어떤 비디오가 제공된 프롬프트를 더 잘 반영합니까?”</p></li>
-</ol>
-<ul class="simple">
-<li><p><strong>연구 방법</strong></p>
-<ul>
-<li><p>3D 일관성과 주제 충실도 연구에서는 데이터셋의 30개 subject 각각에 대해 회전 비디오 결과를 제시하고 11명의 사용자가 각 쌍에 대해 응답</p></li>
-<li><p>프롬프트 충실도 연구에서는 54개의 고유한 프롬프트와 주제 쌍에 대해 비디오를 생성하고, 21명의 사용자가 응답</p></li>
-</ul>
-</li>
-<li><p><strong>최종 결과</strong></p>
-<ul>
-<li><p>최종 결과는 다수결 투표를 통해 산출</p></li>
-<li><p>DreamBooth3D는 3D 일관성, 주제 충실도, 프롬프트 충실도에서 기준 모델들보다 유의미하게 더 선호됨.</p></li>
-</ul>
-</li>
-</ul>
-</section>
-<section id="sample-applications">
-<h2>4.2. Sample Applications<a class="headerlink" href="#sample-applications" title="Permalink to this heading">#</a></h2>
-<ul class="simple">
-<li><p><strong>Recontextualization. (재문맥화)</strong></p>
-<ul>
-<li><p>단순한 프롬프트를 사용하여 다양한 개 주제의 3D 모델로 재문맥화한 샘플 결과</p></li>
-<li><p>모든 subject 에서 텍스트 프롬프트에 주어진 문맥을 일관되게 반영</p></li>
-<li><p>출력된 3D 모델의 자세와 로컬 변형은 입력 이미지에 없는 포즈임에도 불구하고 매우 사실적</p></li>
-</ul>
-</li>
-</ul>
-<ul class="simple">
-<li><p><strong>Color/Material Editing.</strong></p>
-<ul>
-<li><p>색상 편집 및 재질 편집</p></li>
-</ul>
-</li>
-<li><p><strong>Accessorization</strong></p>
-<ul>
-<li><p>subject 에 액세서리 추가</p></li>
-</ul>
-</li>
-</ul>
-<ul class="simple">
-<li><p><strong>Stylization</strong></p>
-<ul>
-<li><p>크림색 신발을 색상과 프릴 추가를 기반으로 스타일화</p></li>
-</ul>
-</li>
-<li><p><strong>Cartoon-to-3D</strong></p>
-<ul>
-<li><p>비사실적 피상체 이미지(예: 2D 평면 캐릭터)를 그럴듯한 3D 형태로 변환</p></li>
-<li><p>모든 subject 이미지가 정면임에도 불구하고, 그럴듯한 3D 결과물 생성</p></li>
-</ul>
-</li>
-</ul>
-</section>
-<section id="limitations">
-<h2>4.3. Limitations<a class="headerlink" href="#limitations" title="Permalink to this heading">#</a></h2>
-<hr class="docutils" />
-<figure class="align-default" id="id10">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/limitation.png"><img alt="limitation" class="bg-primary mb-1" src="../../_images/limitation.png" style="width: 800px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 729 </span><span class="caption-text">limitations</span><a class="headerlink" href="#id10" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ol class="arabic simple">
-<li><p>최적화된 3D 표현이 때때로 과도하게 포화되고 매끄럽게 처리되는 경우가 존재</p>
-<ol class="arabic simple">
-<li><p>높은 가중치 가이던스를 가진 SDS 기반 최적화에 의해 발생</p></li>
-<li><p>64×64 픽셀이라는 상대적으로 낮은 이미지 해상도로 제한되어 발생</p></li>
-<li><p>diffusion 과 NeRF 의 효율성 향상은 더 높은 해상도로 확장할 수 있는 가능성을 제공</p></li>
-</ol>
-</li>
-<li><p>Janus problem : 최적화된 3D 표현은 입력 이미지에 시점 변화가 없으면 여러 불일치한 시점에서 정면으로 보이는 viewpoints 불일치 문제가 발생</p></li>
-<li><p>선글라스와 같은 얇은 객체 구조를 재구성하는 데 어려움이 존재</p></li>
-</ol>
-</section>
-</section>
-<section class="tex2jax_ignore mathjax_ignore" id="conclusion">
-<h1>5. Conclusion<a class="headerlink" href="#conclusion" title="Permalink to this heading">#</a></h1>
-<ul class="simple">
-<li><p>Subject 중심의 텍스트-3D 생성을 위한 방법인 DreamBooth3D를 제안</p></li>
-<li><p>Subject 에 대한 소규모 casual 이미지 셋트가 주어지면, (카메라 포즈와 같은 추가 정보 없이)
-입력 텍스트 프롬프트에서 제공된 컨텍스트(자고 있는, 점프하는, 빨간 등)를 준수하는 subject 별 3D assets 를 생성</p></li>
-<li><p>DreamBooth 데이터셋 에 대한 광범위한 실험을 통해 해당 방법이 주어진 subject 와 높은 유사성을 가지면서도 입력 텍스트 프롬프트에 나타난 컨텍스트를 잘 반영하는 현실적인 3D assets 을 생성할 수 있음을 입증</p></li>
-<li><p>정량적 및 정성적 평가에서 여러 기준 모델보다 우수한 성능을 보임을 확인</p></li>
-</ul>
-</section>
-
-    <script type="text/x-thebe-config">
-    {
-        requestKernel: true,
-        binderOptions: {
-            repo: "binder-examples/jupyter-stacks-datascience",
-            ref: "master",
-        },
-        codeMirrorConfig: {
-            theme: "abcdef",
-            mode: "python"
-        },
-        kernelOptions: {
-            name: "python3",
-            path: "./docs/review"
-        },
-        predefinedOutput: true
-    }
-    </script>
-    <script>kernelName = 'python3'</script>
-
-                </article>
-              
-
-              
-              
-              
-              
-                <footer class="prev-next-footer d-print-none">
-                  
-<div class="prev-next-area">
-    <a class="left-prev"
-       href="magic-3d.html"
-       title="previous page">
-      <i class="fa-solid fa-angle-left"></i>
-      <div class="prev-next-info">
-        <p class="prev-next-subtitle">previous</p>
-        <p class="prev-next-title">Magic3D</p>
-      </div>
-    </a>
-    <a class="right-next"
-       href="../experiments/js_exp.html"
-       title="next page">
-      <div class="prev-next-info">
-        <p class="prev-next-subtitle">next</p>
-        <p class="prev-next-title">Synthetic Data with Stable Diffusion for Foliar Disease Classification</p>
-      </div>
-      <i class="fa-solid fa-angle-right"></i>
-    </a>
-</div>
-                </footer>
-              
-            </div>
-            
-            
-              
-                <div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
-
-
-  <div class="sidebar-secondary-item">
-  <div class="page-toc tocsection onthispage">
-    <i class="fa-solid fa-list"></i> Contents
-  </div>
-  <nav class="bd-toc-nav page-toc">
-    <ul class="visible nav section-nav flex-column">
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#">Dream Booth 3D</a><ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#abstract">0. Abstract</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">1. Introduction</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#related-work">2. Related Work</a></li>
-</ul>
-</li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#approach"><strong>3. Approach</strong></a><ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#goal-subject-identity-3d-assets">🌟 <strong>Goal</strong>
-텍스트 프롬프트에 충실하면서 주어진 subject 의 identity (기하 형태 및 외관)을 반영하는 3D assets 생성</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#preliminaries">3.1. Preliminaries</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#t2i-diffusion-models">3.1.1 <strong>T2I diffusion models</strong></a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#dream-booth-t2i-personalization">3.1.2 <strong>Dream Booth T2I Personalization.</strong></a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#dreamfusion">3.1.3 <strong>DreamFusion</strong></a></li>
-</ul>
-</li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#failure-of-naive-dreambooth-fusion">3.2 Failure of Naive Dreambooth+Fusion</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#dreambooth3d-optimization">3.3. Dreambooth3D Optimization</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#stage-1-3d-with-partial-dreambooth"><strong>3.3.1 Stage 1️⃣: 3D with Partial DreamBooth</strong></a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#stage-2-multi-view-data-generation"><strong>3.3.2 Stage 2️⃣: Multi-view Data Generation</strong></a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#stage3-final-nerf-with-multi-view-dreambooth"><strong>3.3.3 Stage3️⃣: Final NeRF with Multi-view DreamBooth</strong></a></li>
-</ul>
-</li>
-</ul>
-</li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#experiments">4. Experiments</a><ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#results">4.1. Results</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#sample-applications">4.2. Sample Applications</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#limitations">4.3. Limitations</a></li>
-</ul>
-</li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#conclusion">5. Conclusion</a></li>
-</ul>
-
-  </nav></div>
-
-</div></div>
-              
-            
-          </div>
-          <footer class="bd-footer-content">
-            
-<div class="bd-footer-content__inner container">
-  
-  <div class="footer-item">
-    
-<p class="component-author">
-By PseudoLab
-</p>
-
-  </div>
-  
-  <div class="footer-item">
-    
-
-  <p class="copyright">
-    
-      © Copyright 2022.
-      <br/>
-    
-  </p>
-
-  </div>
-  
-  <div class="footer-item">
-    
-  </div>
-  
-  <div class="footer-item">
-    
-  </div>
-  
-</div>
-          </footer>
-        
-
-      </main>
-    </div>
-  </div>
-  
-  <!-- Scripts loaded after <body> so the DOM is not blocked -->
-  <script src="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b"></script>
-<script src="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b"></script>
-
-  <footer class="bd-footer">
-  </footer>
-  </body>
+
+
+<!DOCTYPE html>
+
+
+<html lang="en" data-content_root="" >
+
+  <head>
+    <meta charset="utf-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.17.1: http://docutils.sourceforge.net/" />
+
+    <title>Dream Booth 3D &#8212; Text-to-Image Generation-feat-Diffusion</title>
+  
+  
+  
+  <script data-cfasync="false">
+    document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
+    document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
+  </script>
+  
+  <!-- Loaded before other Sphinx assets -->
+  <link href="../../_static/styles/theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+<link href="../../_static/styles/bootstrap.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+<link href="../../_static/styles/pydata-sphinx-theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+
+  
+  <link href="../../_static/vendor/fontawesome/6.5.2/css/all.min.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+  <link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.woff2" />
+<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.woff2" />
+<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.woff2" />
+
+    <link rel="stylesheet" type="text/css" href="../../_static/pygments.css" />
+    <link rel="stylesheet" href="../../_static/styles/sphinx-book-theme.css?digest=14f4ca6b54d191a8c7657f6c759bf11a5fb86285" type="text/css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/togglebutton.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/mystnb.4510f1fc1dee50b3e5859aac5469c37c29e427902b24a333a5f9fcb2f0b3ac41.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/sphinx-thebe.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/design-style.4045f2051d55cab465a707391d5b2007.min.css" />
+  
+  <!-- Pre-loaded scripts that we'll load fully later -->
+  <link rel="preload" as="script" href="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b" />
+<link rel="preload" as="script" href="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b" />
+  <script src="../../_static/vendor/fontawesome/6.5.2/js/all.min.js?digest=dfe6caa3a7d634c4db9b"></script>
+
+    <script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
+    <script src="../../_static/jquery.js"></script>
+    <script src="../../_static/underscore.js"></script>
+    <script src="../../_static/_sphinx_javascript_frameworks_compat.js"></script>
+    <script src="../../_static/doctools.js"></script>
+    <script src="../../_static/clipboard.min.js"></script>
+    <script src="../../_static/copybutton.js"></script>
+    <script src="../../_static/scripts/sphinx-book-theme.js?digest=5a5c038af52cf7bc1a1ec88eea08e6366ee68824"></script>
+    <script>let toggleHintShow = 'Click to show';</script>
+    <script>let toggleHintHide = 'Click to hide';</script>
+    <script>let toggleOpenOnPrint = 'true';</script>
+    <script src="../../_static/togglebutton.js"></script>
+    <script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
+    <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
+    <script src="../../_static/design-tabs.js"></script>
+    <script>const THEBE_JS_URL = "https://unpkg.com/thebe@0.8.2/lib/index.js"
+const thebe_selector = ".thebe,.cell"
+const thebe_selector_input = "pre"
+const thebe_selector_output = ".output, .cell_output"
+</script>
+    <script async="async" src="../../_static/sphinx-thebe.js"></script>
+    <script>window.MathJax = {"options": {"processHtmlClass": "tex2jax_process|mathjax_process|math|output_area"}}</script>
+    <script defer="defer" src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
+    <script>DOCUMENTATION_OPTIONS.pagename = 'docs/review/DreamBooth3D';</script>
+    <link rel="shortcut icon" href="../../_static/PseudoLab_logo.png"/>
+    <link rel="index" title="Index" href="../../genindex.html" />
+    <link rel="search" title="Search" href="../../search.html" />
+    <link rel="next" title="Zero123++" href="zero123plus.html" />
+    <link rel="prev" title="Magic3D" href="magic-3d.html" />
+  <meta name="viewport" content="width=device-width, initial-scale=1"/>
+  <meta name="docsearch:language" content="en"/>
+  </head>
+  
+  
+  <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
+
+  
+  
+  <div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
+  
+  <div id="pst-scroll-pixel-helper"></div>
+  
+  <button type="button" class="btn rounded-pill" id="pst-back-to-top">
+    <i class="fa-solid fa-arrow-up"></i>Back to top</button>
+
+  
+  <input type="checkbox"
+          class="sidebar-toggle"
+          id="pst-primary-sidebar-checkbox"/>
+  <label class="overlay overlay-primary" for="pst-primary-sidebar-checkbox"></label>
+  
+  <input type="checkbox"
+          class="sidebar-toggle"
+          id="pst-secondary-sidebar-checkbox"/>
+  <label class="overlay overlay-secondary" for="pst-secondary-sidebar-checkbox"></label>
+  
+  <div class="search-button__wrapper">
+    <div class="search-button__overlay"></div>
+    <div class="search-button__search-container">
+<form class="bd-search d-flex align-items-center"
+      action="../../search.html"
+      method="get">
+  <i class="fa-solid fa-magnifying-glass"></i>
+  <input type="search"
+         class="form-control"
+         name="q"
+         id="search-input"
+         placeholder="Search this book..."
+         aria-label="Search this book..."
+         autocomplete="off"
+         autocorrect="off"
+         autocapitalize="off"
+         spellcheck="false"/>
+  <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
+</form></div>
+  </div>
+
+  <div class="pst-async-banner-revealer d-none">
+  <aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
+</div>
+
+  
+    <header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
+    </header>
+  
+
+  <div class="bd-container">
+    <div class="bd-container__inner bd-page-width">
+      
+      
+      
+      <div class="bd-sidebar-primary bd-sidebar">
+        
+
+  
+  <div class="sidebar-header-items sidebar-primary__section">
+    
+    
+    
+    
+  </div>
+  
+    <div class="sidebar-primary-items__start sidebar-primary__section">
+        <div class="sidebar-primary-item">
+
+  
+    
+  
+
+<a class="navbar-brand logo" href="../../intro.html">
+  
+  
+  
+  
+  
+    
+    
+      
+    
+    
+    <img src="../../_static/PseudoLab_logo.png" class="logo__image only-light" alt="Text-to-Image Generation-feat-Diffusion - Home"/>
+    <script>document.write(`<img src="../../_static/PseudoLab_logo.png" class="logo__image only-dark" alt="Text-to-Image Generation-feat-Diffusion - Home"/>`);</script>
+  
+  
+</a></div>
+        <div class="sidebar-primary-item"><nav class="bd-links" id="bd-docs-nav" aria-label="Main">
+    <div class="bd-toc-item navbar-nav active">
+        
+        <ul class="nav bd-sidenav bd-sidenav__home-link">
+            <li class="toctree-l1">
+                <a class="reference internal" href="../../intro.html">
+                    Welcome to PseudoDiffusers!!
+                </a>
+            </li>
+        </ul>
+        <p aria-level="2" class="caption" role="heading"><span class="caption-text">Preliminary Works</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="vae.html">VAE</a></li>
+<li class="toctree-l1"><a class="reference internal" href="gan.html">GAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DDPM.html">DDPM</a></li>
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DDIM.html">DDIM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="A_Study_on_the_Evaluation_of_Generative_Models.html">A Study on the Evaluation of Generative Models</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Image Generation</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="cycleGAN.html">CycleGAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="StyleGAN.html">StyleGAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="diffusion_beats_GANs.html">Diffusion Models Beat GANs on Image Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="dalle.html">DALL-E</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DALLE2.html">DALL-E 2</a></li>
+<li class="toctree-l1"><a class="reference internal" href="dreambooth.html">DreamBooth</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ControlNet.html">ControlNet</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Latent_Diffusion_Model.html">Introduction</a></li>
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Textual_Inversion.html">Textual Inversion</a></li>
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="CustomDiffusion.html">Custom Diffusion</a></li>
+<li class="toctree-l1"><a class="reference internal" href="LoRA.html">LoRA</a></li>
+
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="I-DDPM.html">I-DDPM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="StyO.html">StyO</a></li>
+<li class="toctree-l1"><a class="reference internal" href="imagen.html">Imagen</a></li>
+<li class="toctree-l1"><a class="reference internal" href="imagen_editor.html">Imagen Editor</a></li>
+<li class="toctree-l1"><a class="reference internal" href="SDEdit.html">SDEdit</a></li>
+<li class="toctree-l1"><a class="reference internal" href="SDXL.html">SDXL</a></li>
+<li class="toctree-l1"><a class="reference internal" href="t2i_adapter.html">T2I-Adapter</a></li>
+<li class="toctree-l1"><a class="reference internal" href="IP_Adapter.html">IP-Adapter</a></li>
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="HyperDreamBooth.html">HyperDreamBooth</a></li>
+<li class="toctree-l1"><a class="reference internal" href="CM3leon.html">CM3leon</a></li>
+
+<li class="toctree-l1"><a class="reference internal" href="Synthetic_Data_from_Diffusion_Models_Improves_ImageNet_Classification.html">Synthetic Data from Diffusion Models Improves ImageNet Classification</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="GLIDE.html">GLIDE</a></li>
+<li class="toctree-l1"><a class="reference internal" href="BBDM.html">BBDM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Your_Diffusion_Model_is_Secretly_a_Zero_Shot_Classifier.html">Your Diffusion Model is Secretly a Zero-Shot Classifier</a></li>
+<li class="toctree-l1"><a class="reference internal" href="progressive_distillation.html">Progressive Distillation for Fast Sampling of Diffusion Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ConceptLab.html">ConceptLab</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Diffusion_models_already_have_a_Semantic_Latent_Space.html">Diffusion Models already have a Semantic Latent Space</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Muse.html">Muse</a></li>
+
+
+<li class="toctree-l1"><a class="reference internal" href="GIGAGAN.html">Scaling up GANs for Text-to-Image Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="consistency_models.html">Consistency Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="latent_consistency_models.html">Latent Consistency Models</a></li>
+
+<li class="toctree-l1"><a class="reference internal" href="LLM_grounded_Diffusion.html">LLM Grounded Diffusion</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DiT.html">DiT</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="one-step-image-translation.html">One-Step Image Translation with Text-to-Image Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="LCM-LoRA.html">LCM-LoRA: A Universal Stable-Diffusion Acceleration Module</a></li>
+
+
+<li class="toctree-l1"><a class="reference internal" href="MimicBrush.html">MimicBrush: Zero-shot Image Editing with Reference Imitation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="one_step_diffusion_with_distribution_matching_distillation.html">One-step Diffusion with Distribution Matching Distillation</a></li>
+
+
+
+
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Video Generation</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="Make_A_Video.html">Make A Video</a></li>
+<li class="toctree-l1"><a class="reference internal" href="VideoLDM.html">VideoLDM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="AnimateDiff.html">AnimateDiff</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Animate_Anyone.html">Animate Anyone</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreaMoving.html">DreaMoving</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreamPose.html">DreamPose: Fashion Image-to-Video Synthesis via Stable Diffusion</a></li>
+
+
+
+
+
+
+
+
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">3D Generation</span></p>
+<ul class="current nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="NeRF.html">NeRF : Representing Scenes as Neural Radiance Fields for View Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="3DGS.html">3D Gaussian Splatting for Real-Time Radiance Field Rendering</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Point_E.html">Point-E: A System for Generating 3D Point Clouds from Complex Prompts (Arxiv 2022)</a></li>
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Shap-E.html">Shap-E</a></li>
+
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DreamFusion.html"><strong>DreamFusion</strong></a></li>
+<li class="toctree-l1"><a class="reference internal" href="magic-3d.html">Magic3D</a></li>
+<li class="toctree-l1 current active"><a class="current reference internal" href="#">Dream Booth 3D</a></li>
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="zero123plus.html">Zero123++</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ProlificDreamer.html">ProlificDreamer: High-Fidelity and Diverse Text-to-3D Generation with Variational Score Distillation</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DreamGaussian.html">DreamGaussian</a></li>
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Coin3D.html">Coin3D</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Experiments</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="../experiments/js_exp.html">Synthetic Data with Stable Diffusion for Foliar Disease Classification</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../experiments/swjo_exp.html">Training DreamBooth on Naver Webtoon Face Dataset</a></li>
+</ul>
+
+    </div>
+</nav></div>
+    </div>
+  
+  
+  <div class="sidebar-primary-items__end sidebar-primary__section">
+  </div>
+  
+  <div id="rtd-footer-container"></div>
+
+
+      </div>
+      
+      <main id="main-content" class="bd-main" role="main">
+        
+        
+
+<div class="sbt-scroll-pixel-helper"></div>
+
+          <div class="bd-content">
+            <div class="bd-article-container">
+              
+              <div class="bd-header-article d-print-none">
+<div class="header-article-items header-article__inner">
+  
+    <div class="header-article-items__start">
+      
+        <div class="header-article-item"><label class="sidebar-toggle primary-toggle btn btn-sm" for="__primary" title="Toggle primary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
+  <span class="fa-solid fa-bars"></span>
+</label></div>
+      
+    </div>
+  
+  
+    <div class="header-article-items__end">
+      
+        <div class="header-article-item">
+
+<div class="article-header-buttons">
+
+
+
+
+
+<div class="dropdown dropdown-source-buttons">
+  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Source repositories">
+    <i class="fab fa-github"></i>
+  </button>
+  <ul class="dropdown-menu">
+      
+      
+      
+      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion" target="_blank"
+   class="btn btn-sm btn-source-repository-button dropdown-item"
+   title="Source repository"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fab fa-github"></i>
+  </span>
+<span class="btn__text-container">Repository</span>
+</a>
+</li>
+      
+      
+      
+      
+      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion/issues/new?title=Issue%20on%20page%20%2Fdocs/review/DreamBooth3D.html&body=Your%20issue%20content%20here." target="_blank"
+   class="btn btn-sm btn-source-issues-button dropdown-item"
+   title="Open an issue"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-lightbulb"></i>
+  </span>
+<span class="btn__text-container">Open issue</span>
+</a>
+</li>
+      
+  </ul>
+</div>
+
+
+
+
+
+
+<div class="dropdown dropdown-download-buttons">
+  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Download this page">
+    <i class="fas fa-download"></i>
+  </button>
+  <ul class="dropdown-menu">
+      
+      
+      
+      <li><a href="../../_sources/docs/review/DreamBooth3D.md" target="_blank"
+   class="btn btn-sm btn-download-source-button dropdown-item"
+   title="Download source file"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-file"></i>
+  </span>
+<span class="btn__text-container">.md</span>
+</a>
+</li>
+      
+      
+      
+      
+      <li>
+<button onclick="window.print()"
+  class="btn btn-sm btn-download-pdf-button dropdown-item"
+  title="Print to PDF"
+  data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-file-pdf"></i>
+  </span>
+<span class="btn__text-container">.pdf</span>
+</button>
+</li>
+      
+  </ul>
+</div>
+
+
+
+
+<button onclick="toggleFullScreen()"
+  class="btn btn-sm btn-fullscreen-button"
+  title="Fullscreen mode"
+  data-bs-placement="bottom" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-expand"></i>
+  </span>
+
+</button>
+
+
+
+<script>
+document.write(`
+  <button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light"></i>
+    <i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark"></i>
+    <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"></i>
+  </button>
+`);
+</script>
+
+
+<script>
+document.write(`
+  <button class="btn btn-sm pst-navbar-icon search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <i class="fa-solid fa-magnifying-glass fa-lg"></i>
+  </button>
+`);
+</script>
+<label class="sidebar-toggle secondary-toggle btn btn-sm" for="__secondary"title="Toggle secondary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <span class="fa-solid fa-list"></span>
+</label>
+</div></div>
+      
+    </div>
+  
+</div>
+</div>
+              
+              
+
+<div id="jb-print-docs-body" class="onlyprint">
+    <h1>Dream Booth 3D</h1>
+    <!-- Table of contents -->
+    <div id="print-main-content">
+        <div id="jb-print-toc">
+            
+            <div>
+                <h2> Contents </h2>
+            </div>
+            <nav aria-label="Page">
+                <ul class="visible nav section-nav flex-column">
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#">Dream Booth 3D</a><ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#abstract">0. Abstract</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">1. Introduction</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#related-work">2. Related Work</a></li>
+</ul>
+</li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#approach"><strong>3. Approach</strong></a><ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#goal-subject-identity-3d-assets">🌟 <strong>Goal</strong>
+텍스트 프롬프트에 충실하면서 주어진 subject 의 identity (기하 형태 및 외관)을 반영하는 3D assets 생성</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#preliminaries">3.1. Preliminaries</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#t2i-diffusion-models">3.1.1 <strong>T2I diffusion models</strong></a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#dream-booth-t2i-personalization">3.1.2 <strong>Dream Booth T2I Personalization.</strong></a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#dreamfusion">3.1.3 <strong>DreamFusion</strong></a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#failure-of-naive-dreambooth-fusion">3.2 Failure of Naive Dreambooth+Fusion</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#dreambooth3d-optimization">3.3. Dreambooth3D Optimization</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#stage-1-3d-with-partial-dreambooth"><strong>3.3.1 Stage 1️⃣: 3D with Partial DreamBooth</strong></a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#stage-2-multi-view-data-generation"><strong>3.3.2 Stage 2️⃣: Multi-view Data Generation</strong></a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#stage3-final-nerf-with-multi-view-dreambooth"><strong>3.3.3 Stage3️⃣: Final NeRF with Multi-view DreamBooth</strong></a></li>
+</ul>
+</li>
+</ul>
+</li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#experiments">4. Experiments</a><ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#results">4.1. Results</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#sample-applications">4.2. Sample Applications</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#limitations">4.3. Limitations</a></li>
+</ul>
+</li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#conclusion">5. Conclusion</a></li>
+</ul>
+
+            </nav>
+        </div>
+    </div>
+</div>
+
+              
+                
+<div id="searchbox"></div>
+                <article class="bd-article">
+                  
+  <section class="tex2jax_ignore mathjax_ignore" id="dream-booth-3d">
+<h1>Dream Booth 3D<a class="headerlink" href="#dream-booth-3d" title="Permalink to this heading">#</a></h1>
+<div class="admonition-information admonition">
+<p class="admonition-title">Information</p>
+<ul class="simple">
+<li><p><strong>Title:</strong> DreamBooth3D: Subject-Driven Text-to-3D Generation</p></li>
+<li><p><strong>Reference</strong></p>
+<ul>
+<li><p>Paper: <a class="reference external" href="https://arxiv.org/abs/2303.13508">https://arxiv.org/abs/2303.13508</a></p></li>
+<li><p>Project Page : <a class="reference external" href="https://dreambooth3d.github.io/">https://dreambooth3d.github.io/</a></p></li>
+</ul>
+</li>
+<li><p><strong>Author:</strong> Jeongin Lee</p></li>
+<li><p><strong>Last updated on {Sep. 3, 2024}</strong></p></li>
+</ul>
+</div>
+<section id="abstract">
+<h2>0. Abstract<a class="headerlink" href="#abstract" title="Permalink to this heading">#</a></h2>
+<ul class="simple">
+<li><p><strong>DreamBooth3D : 피사체의 3-6개의 캐주얼한 촬영 이미지로부터 text-to-3D 생성 모델을 personalization (맞춤화)</strong></p></li>
+<li><p>DreamBooth + DreamFusion 의 결합</p>
+<ul>
+<li><p>DreamBooth : personalizing text-to-image models</p></li>
+<li><p>DreamFusion : text-to-3D generation</p></li>
+</ul>
+</li>
+<li><p>두 방법론을 나이브하게 결합시 subject의  input viewpoints 에 대해 오버피팅하는 개인화된 t2i 모델로 인해  Subject 에 대해 만족스럽지 못한 3D 결과물 생성</p></li>
+<li><p>t2i 모델의 개인화 기능과 함께 NERF의 3D 일관성을 공동으로 활용하는 3단계 최적화 전략 (3-stage optimization strategy)을 통해 이를 극복</p></li>
+<li><p>Subject 의 입력 이미지에서 볼 수 없는 새로운 포즈, 색상 등 에 대해 텍스트 중심 수정을 통해 고품질의 subject 중심의 3D 결과물 생성 가능</p></li>
+</ul>
+</section>
+<hr class="docutils" />
+<section id="introduction">
+<h2>1. Introduction<a class="headerlink" href="#introduction" title="Permalink to this heading">#</a></h2>
+<ul>
+<li><p><strong>도입</strong></p>
+<ul class="simple">
+<li><p>3D asset생성은  VR, 영화, 게임 등 다양한 분야에 응용 가능하나, 텍스트 프롬프트만으로 생성된 3D asset 의 정체성, 기하학적 구조, 외관을 정확하게 제어하기 어려움.</p></li>
+<li><p>특히, 특정 subject 의 특성을 반영하는 3D assets 를 생성하는 능력에 대한 개발 필요</p></li>
+<li><p>T2I 모델 subject personalization (맞춤화, 개인화) 태스크에서 성공적인 결과를 보인 연구들은 많지만, 3D asset 생성이나 3D control 을  제공하지는 않음.</p></li>
+<li><p><strong>DreamBooth3D</strong>는 소수의 (3-6개) 캐주얼하게 촬영된 이미지로부터 subject 중심의 텍스트-3D 생성을 제안</p></li>
+</ul>
+<p>⇒ NeRF 와 T2I 모델을 함께 최적화하여 subject 중심의 3D 자산을 생성하자 !</p>
+</li>
+<li><p><strong>문제점</strong></p>
+<ul class="simple">
+<li><p>subject에 맞게 개인화된 T2I 모델 &amp; <strong>NeRF</strong> 를 최적화 하는 것은 여러 실패 사례가 발생</p></li>
+<li><p>주요 문제 : 개인화된 T2I 모델이 제한된 주제 이미지의 카메라 뷰포인트에 과적합</p></li>
+<li><p>연속적인 임의의 뷰포인트에서 일관된 3D <strong>NeRF</strong> 결과물을 최적화하는 데 충분하지 않음.</p></li>
+</ul>
+</li>
+<li><p><strong>해결책</strong></p>
+<ul class="simple">
+<li><p><strong>DreamBooth3D</strong>는 효과적인 3단계 최적화 방식을 제안</p></li>
+<li><p><strong>Dream Booth</strong> , <strong>Dream Fusion</strong> 사용</p></li>
+</ul>
+<hr class="docutils" />
+<p><strong>[STEP 1️⃣]</strong></p>
+<ul class="simple">
+<li><p><strong>DreamBooth</strong> 모델을 부분적으로 미세 조정</p></li>
+<li><p><strong>DreamFusion</strong>을 사용하여 <strong>NeRF</strong> 최적화</p></li>
+<li><p>부분적으로 미세 조정된 DreamBooth 모델은 주어진 대상 뷰에 과적합 되지 않으며 모든 subject별 세부 정보를 캡처하지 않음</p></li>
+<li><p>결과적으로 생성된 <strong>NeRF</strong> 자산은 3D 일관성이 있지만 subject 에 대한 특성을 완전히 반영하지못함.</p></li>
+</ul>
+<p><strong>[STEP 2️⃣]</strong></p>
+<ul class="simple">
+<li><p><strong>DreamBooth</strong> 모델을 완전히 미세 조정하여 세부 사항을 캡처</p></li>
+<li><p>1단계에서 학습된 NeRF의 다중 뷰 렌더링을 완전히 학습된 <strong>DreamBooth</strong> 모델에 투입</p></li>
+<li><p>이를 통해 subject 별로 다중 뷰 가상 이미지 집합을 생성</p></li>
+</ul>
+<p><strong>[STEP 3️⃣]</strong></p>
+<ul class="simple">
+<li><p>1단계의 주어진 subject 이미지와 가상(pseudo) 다중 뷰 이미지를 사용하여 <strong>DreamBooth</strong> 모델을 추가로 최적화</p></li>
+<li><p>추가 최적화한 <strong>DreamBooth</strong> 로 NeRF 3D 볼륨을  최종 최적화</p></li>
+<li><p>최종 NeRF 최적화시 추가 규제항으로 pseudo 다중 뷰 데이터 세트에 대한 weak reconstruction loss를 사용</p></li>
+<li><p>3단계에 걸친 NeRF 및 T2I 모델의 합동 최적화는 <strong>DreamBooth</strong> 모델이 subject 의 특정 view point 에 과적합되는 것을 방지하는 동시에 동시에 결과 NeRF 모델이 대상의 정체성에 충실하도록 보장</p></li>
+</ul>
+</li>
+<hr class="docutils" />
+<li><p><strong>결과</strong></p>
+<ul class="simple">
+<li><p>실험 샘플 결과들을 통해 본 접근 방식이 입력 텍스트 프롬프트에 존재하는 컨텍스트를 존중하면서 주어진 대상과 유사성이 높은 현실적인 3D 자산을 생성할 수 있음을 입증</p></li>
+<li><p>여러 베이스라인과 비교할 때, 정량적 및 정성적 결과는 DreamBooth 3D 생성이 보다 3D 일관성이 있고 대상 세부 사항을 더 잘 포착한다는 것을 입증</p></li>
+</ul>
+</li>
+</ul>
+</section>
+<hr class="docutils" />
+<section id="related-work">
+<h2>2. Related Work<a class="headerlink" href="#related-work" title="Permalink to this heading">#</a></h2>
+<ol class="arabic simple">
+<li><p><strong>Text-to-Image Generation.</strong></p></li>
+</ol>
+<ul class="simple">
+<li><p>텍스트 조건 반영 : 사용자가 제공한 자연어 텍스트 프롬프트에 정렬된 이미지를 생성하기 위해 사전 학습된 large language model (LLM) 을 활용</p></li>
+<li><p>T2I diffusion 모델의 성공에 힘입어, 많은 작품들이 텍스트 기반 이미지 조작과 같은 다양한 작업에 사전 학습된 T2I 모델을 활용합니다</p></li>
+</ul>
+<ol class="arabic simple" start="2">
+<li><p><strong>3D Generation.</strong></p></li>
+</ol>
+<ul class="simple">
+<li><p>최근 사전 학습된 대규모 T2I diffusion 모델을 활용하여 텍스트 프롬프트에서 3D 자산을 생성할 수 있는 text-to-3D 방법이 제안</p></li>
+<li><p>기존 방법론</p>
+<ul>
+<li><p>텍스트를 통해 t2i 모델로 이미지를 직접 reconstruction</p></li>
+</ul>
+</li>
+<li><p>본 방법론</p>
+<ul>
+<li><p>입력 이미지를 직접 reconstruction 하지 않고  suject 개념을 제공하기 위한 입력 이미지 사용 →  Recontextualization 수행 가능 (sleeping, jumping, color…etc)</p></li>
+<li><p>입력 이미지를 동일한 배경, 조명, 카메라 등으로 촬영할 필요가 없음.</p></li>
+</ul>
+</li>
+</ul>
+<ol class="arabic simple" start="3">
+<li><p><strong>Subject-driven Generation.</strong></p></li>
+</ol>
+<ul>
+<li><p>Subject 중심 이미지 생성의 최근 발전을 통해 사용자는 특정 대상과 개념에 대해 이미지 생성을 개인화(맞춤화)</p></li>
+<li><p><strong>DreamBooth</strong><br />
+희귀 토큰, 모델 finetuning, 규제를 위한 prior preservation loss를 사용하여 모델의 언어 비전 사전을 확장하여 이를 달성</p></li>
+<li><p><strong>Textual Inversion</strong>
+입력 개념을 나타내는 사전 학습된 text-to-image 모델의 임베딩 공간에서 새로운 word 를 최적화함으로써 이를 달성</p>
+<figure class="align-default" id="id1">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/54544834-db8a-49f3-9ac7-d4a530724fd8.png"><img alt="d4a530724fd8" class="bg-primary mb-1" src="../../_images/54544834-db8a-49f3-9ac7-d4a530724fd8.png" style="width: 800px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 732 </span><span class="caption-text">Textual Inversion \  (source: {<a class="reference external" href="https://arxiv.org/abs/2208.01618">https://arxiv.org/abs/2208.01618</a>})</span><a class="headerlink" href="#id1" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</li>
+</ul>
+<p><strong>→</strong> 이러한 방법론들은 3D asset 을 제공하지 않고 일관성 있는 3D 이미지를 생성할 수 없음.</p>
+</section>
+</section>
+<section class="tex2jax_ignore mathjax_ignore" id="approach">
+<h1><strong>3. Approach</strong><a class="headerlink" href="#approach" title="Permalink to this heading">#</a></h1>
+<p><strong>Problem setup.</strong></p>
+<figure class="align-default" id="id2">
+<img alt="Untitled_1" class="bg-primary mb-1" src="../../_images/Untitled_1.png" />
+<figcaption>
+<p><span class="caption-number">Fig. 733 </span><span class="caption-text">Input and Output</span><a class="headerlink" href="#id2" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p>Input : subject 이미지 집합, 텍스트 프롬프트</p>
+<ul>
+<li><p><span class="math notranslate nohighlight">\(\left\{I_i \in \mathbb{R}^{n \times 3}\right\}(i \in\{1, \ldots, k\})\)</span> : 각 n개의 픽셀, k 장의 subject 이미지들의 집합</p></li>
+<li><p>context(맥락) 부여, 의미 변화를 위한 텍스트 프롬프트 T (ex) sleeping, standing…etc.</p></li>
+</ul>
+</li>
+</ul>
+<hr class="docutils" />
+<section id="goal-subject-identity-3d-assets">
+<h2>🌟 <strong>Goal</strong>
+텍스트 프롬프트에 충실하면서 주어진 subject 의 identity (기하 형태 및 외관)을 반영하는 3D assets 생성<a class="headerlink" href="#goal-subject-identity-3d-assets" title="Permalink to this heading">#</a></h2>
+<ul>
+<li><p>3D volume 에서 radiance 필드를 인코딩하는 MLP 네트워크 <span class="math notranslate nohighlight">\(M\)</span> 으로 구성된 Neural Radiance Fields (NeRF) 를 기반으로 3D assets 를 최적화</p></li>
+<li><p>본 문제는 <strong>subject 이미지에 대한 반영이 필요</strong>하기 때문에, 일반적인 multi-view 이미지 캡처가 필요한 3D reconstruction 설정에 비해 상당히 제한적이고 어려운 문제</p></li>
+<li><p>T2I personalization 및 Text-to-3D 최적화의 최근 발전을 기반으로 기술을 구축</p>
+<p>⇒ DreamBooth personalization + DreamFusion text-to-3D로 최적화를 사용</p>
+</li>
+</ul>
+</section>
+<section id="preliminaries">
+<h2>3.1. Preliminaries<a class="headerlink" href="#preliminaries" title="Permalink to this heading">#</a></h2>
+<hr class="docutils" />
+<section id="t2i-diffusion-models">
+<h3>3.1.1 <strong>T2I diffusion models</strong><a class="headerlink" href="#t2i-diffusion-models" title="Permalink to this heading">#</a></h3>
+<ul class="simple">
+<li><p>T2I diffusion models : Imagen, StableDiffusion and DALL-E 2 …etc..</p></li>
+<li><p>T2I diffusion model <span class="math notranslate nohighlight">\(\mathcal{D}_\theta(\epsilon, \mathbf{c})\)</span></p>
+<ul>
+<li><p>input  :초기 노이즈 <span class="math notranslate nohighlight">\(\epsilon\)</span> , 프롬프트 텍스트 임베딩 <span class="math notranslate nohighlight">\(\mathbf{c}\)</span></p>
+<ul>
+<li><p>an initial noise <span class="math notranslate nohighlight">\(\epsilon \sim \mathcal{N}(0,1)\)</span></p></li>
+<li><p>text embedding <span class="math notranslate nohighlight">\(\mathbf{c}=\Theta(T)\)</span>  (a given prompt <span class="math notranslate nohighlight">\(T\)</span> with a text encoder <span class="math notranslate nohighlight">\(\Theta\)</span>)</p></li>
+</ul>
+</li>
+<li><p>output : 프롬프트를 반영하여 생성한 이미지</p></li>
+</ul>
+</li>
+<li><p>T2I diffusion model 을 통해 생성된 이미지는 일반적으로 프롬프트와 일치하지만 생성된 이미지내에서 세부적인 제어가 어려움. → <strong>DreamBooth</strong> 를 통해 이를 해결</p></li>
+</ul>
+</section>
+<hr class="docutils" />
+<section id="dream-booth-t2i-personalization">
+<h3>3.1.2 <strong>Dream Booth T2I Personalization.</strong><a class="headerlink" href="#dream-booth-t2i-personalization" title="Permalink to this heading">#</a></h3>
+<figure class="align-default" id="id3">
+<img alt="Untitled_4" class="bg-primary mb-1" src="../../_images/Untitled_4.png" />
+<figcaption>
+<p><span class="caption-number">Fig. 734 </span><span class="caption-text">특정 피사체에 대한 소수의 이미지 집합 (3-5장) 을 통해 텍스트로 주어지는 Context 에 맞는 맞춤화 이미지 생성</span><a class="headerlink" href="#id3" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul>
+<li><p><span class="math notranslate nohighlight">\(\left\{I_i\right\}\)</span> 에서 네트워크를 파인튜닝하여 T2I diffusion 모델을 맞춤화, <span class="math notranslate nohighlight">\(\left\{I_i\right\}\)</span> : a small set of casual captures</p></li>
+<li><p>DreamBooth diffusion loss : T2I model 파인튜닝을 위해 사용</p>
+<div class="math notranslate nohighlight">
+\[
+    \mathcal{L}_d=\mathbb{E}{\epsilon, t}\left[w_t\left\|\mathcal{D}_\theta\left(\alpha_t I_i+\sigma_t \epsilon, \mathbf{c}\right)-I_i\right\|^2\right],
+    \]</div>
+<ul class="simple">
+<li><p><span class="math notranslate nohighlight">\(t \sim \mathcal{U}[0,1]\)</span> : the time-step in the diffusion proces</p></li>
+<li><p><span class="math notranslate nohighlight">\(w_t, \alpha_t, \sigma_t\)</span> : the corresponding scheduling parameters</p></li>
+</ul>
+</li>
+<li><p><strong>DreamBooth Class prior preserving loss</strong></p>
+<p>DreamBooth 는 <span class="math notranslate nohighlight">\(\left\{I_i\right\}\)</span>  에 대한 <strong>over fitting</strong> 을 방지하여 다양성을 개선하고, <strong>language drift</strong> 현상을 피하기 위해 선택적으로 <strong>class prior preserving loss 를 사용</strong></p>
+</li>
+<li><p><strong>최종 loss : reconstruction loss + class prior preservation loss</strong></p></li>
+</ul>
+<div class="math notranslate nohighlight">
+\[
+\mathbb{E}_{x, c, \epsilon, \epsilon^{\prime}, t}\left[w_t\left\|\hat{x_\theta} \left(\alpha_t x+\sigma_t \epsilon, c\right)-x\right\|_2^2+\lambda w_{t^{\prime}}\left\|\hat{x}_\theta\left(\alpha_{t^{\prime}} x_{p r}+\sigma_{t^{\prime}}\epsilon^{\prime}, c_{pr}\right)-x_{pr}\right\|_2^2\right]
+\]</div>
+<ul class="simple">
+<li><p><strong>(example) over fitting</strong></p></li>
+</ul>
+<figure class="align-default" id="id4">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/Untitled_2.png"><img alt="Untitled_2" class="bg-primary mb-1" src="../../_images/Untitled_2.png" style="width: 800px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 735 </span><span class="caption-text">over fitting</span><a class="headerlink" href="#id4" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p><strong>(example) language-drift</strong></p></li>
+</ul>
+<figure class="align-default" id="id5">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/Untitled_3.png"><img alt="Untitled_3" class="bg-primary mb-1" src="../../_images/Untitled_3.png" style="width: 800px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 736 </span><span class="caption-text">language-drift</span><a class="headerlink" href="#id5" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</section>
+<hr class="docutils" />
+<section id="dreamfusion">
+<h3>3.1.3 <strong>DreamFusion</strong><a class="headerlink" href="#dreamfusion" title="Permalink to this heading">#</a></h3>
+<figure class="align-default" id="id6">
+<img alt="Untitled_6" class="bg-primary mb-1" src="../../_images/Untitled_6.png" />
+<figcaption>
+<p><span class="caption-number">Fig. 737 </span><span class="caption-text">DreamFusion process / (source : <a class="reference external" href="https://pseudo-lab.github.io/pseudodiffusers/docs/review/DreamFusion.html">DreamFusion</a>)</span><a class="headerlink" href="#id6" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul>
+<li><p>T2I diffusion model을 사용하여 볼륨의 랜덤뷰가 프롬프트  <span class="math notranslate nohighlight">\(T\)</span> 에 상응하도록 NeRF <span class="math notranslate nohighlight">\(\mathcal{M}_\phi\)</span> (<span class="math notranslate nohighlight">\(\phi\)</span> : parameters) 를 통해 표현된 볼륨을 최적화</p></li>
+<li><p>normals : 밀도의 그래디언트로부터 계산된 nomals은 Lambertian shading 으로 기하학적 사실성을 개선시키기 위해 모델을 랜덤으로 relight 하는데 사용됨.</p></li>
+<li><p><span class="math notranslate nohighlight">\(\mathcal{M}_\phi\)</span>  : mapping (camera, light (location) → albedo &amp;density)</p>
+<ul class="simple">
+<li><p>랜덤 뷰 <span class="math notranslate nohighlight">\(v\)</span>, 랜덤 조명(light) 방향이 주어지면 shaded(음영 처리된) 이미지 <span class="math notranslate nohighlight">\(\hat{I}v\)</span> 로 볼륨 렌더링을 수행</p></li>
+</ul>
+</li>
+<li><p>이 때 볼륨 렌더링한 이미지가 텍스트 프롬프트 <span class="math notranslate nohighlight">\(T\)</span> 처럼 보이도록 NeRF <span class="math notranslate nohighlight">\(\phi\)</span> 의 매개변수를 최적화하기 위해 DreamFusion 은 score distillation sampling *<strong>(SDS)</strong> 를 도입</p></li>
+<li><p><strong>score distillation sampling (SDS)</strong></p>
+<div class="math notranslate nohighlight">
+\[
+    \nabla_\phi \mathcal{L}_{SDS}=\mathbb{E}{\epsilon, t}\left[w_t\left(\mathcal{D}_\theta\left(\alpha_t \hat{I}_v+\sigma_t \epsilon, \mathbf{c}\right)-\hat{I}_v\right) \frac{\partial \hat{I}_v}{\partial \phi}\right] .
+    \]</div>
+</li>
+<li><p>렌더링된 이미지의 노이즈가 처리된 버전들을 T2I diffusion model의 낮은 에너지 상태로 push</p></li>
+<li><p>다양한 views를 랜덤으로 선택하고, NeRF 를 통해 역전파 함으로써,
+rendering 결과들이 T2I model <span class="math notranslate nohighlight">\(\mathcal{D}_\theta\)</span> 로 주어진 프롬프트에 맞게 생성된 이미지처럼 보이도록 함.</p></li>
+<li><p><strong>DreamFusion</strong> 에서 사용된 실험 환경을 정확하게 동일하게 사용함.</p></li>
+</ul>
+</section>
+</section>
+<section id="failure-of-naive-dreambooth-fusion">
+<h2>3.2 Failure of Naive Dreambooth+Fusion<a class="headerlink" href="#failure-of-naive-dreambooth-fusion" title="Permalink to this heading">#</a></h2>
+<hr class="docutils" />
+<ul class="simple">
+<li><p><strong>피사체(subject) 중심 text-to-3D 생성을 위한 직관적인 접근 방식</strong></p>
+<ol class="arabic simple">
+<li><p>subject에 대해 <strong>T2I model 을 pesonalized(맞춤화)</strong></p></li>
+<li><p>맞춤화된  T2I model 을 <strong>text-to-3D optimization</strong> 을 위해 사용</p></li>
+</ol>
+</li>
+<li><p><strong>즉, DreamBooth 최적화(personalized) ⇒ DreamFusion 최적화</strong></p></li>
+<li><p><strong>BUT</strong>, <strong>Naive Dreambooth+Fusion 의 결합은 불만족스러운 결과를 초래</strong></p></li>
+</ul>
+<hr class="docutils" />
+<blockquote>
+<div><p>핵심 문제 (KEY Issue)</p>
+</div></blockquote>
+<ul class="simple">
+<li><p><strong>Dream Booth</strong>가 <strong>훈련된 뷰에 존재하는 subject 의 뷰에 과적합</strong> 되어
+<strong>이미지 생성에서 viewpoint 에 대한 다양성이 감소</strong>하는 경향을 보임.</p></li>
+<li><p>미세 조정 단계가 증가할수록, Subject 유사성 증가 (👍)
+<strong>BUT</strong>  input exemplar views에 유사하도록 viewpoints 생성 (👎)
+⇒ 즉, 다양한 시점에서 이미지를 생성하는 능력이 저하됨.</p></li>
+</ul>
+<hr class="docutils" />
+<ul class="simple">
+<li><p>이런 DreamBooth 모델 기반의 NeRF SDS 손실은 일관된 3D NeRF 결과물을 얻기에 불충분</p></li>
+<li><p><strong>DreamBooth+Fusion NeRF</strong> 모델이 <strong>서로 다른 view</strong> 에 걸쳐 학습된 <strong>동일한 대상에 대한 뷰</strong>(예: face of a dog :  다양한 각도에서 본 동일한 dog face)를 가지고 있음.</p>
+<ul>
+<li><p><strong>“Janus problem”</strong> : 두 가지 상반되거나 연관된 측면을 동시에 다루어야 하는 문제</p></li>
+</ul>
+</li>
+</ul>
+</section>
+<section id="dreambooth3d-optimization">
+<h2>3.3. Dreambooth3D Optimization<a class="headerlink" href="#dreambooth3d-optimization" title="Permalink to this heading">#</a></h2>
+<hr class="docutils" />
+<figure class="align-default" id="id7">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/Untitled_8.png"><img alt="Untitled_8" class="bg-primary mb-1" src="../../_images/Untitled_8.png" style="width: 800px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 738 </span><span class="caption-text">DreamBooth3D Overview</span><a class="headerlink" href="#id7" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p>DreamBooth3D Overview</p></li>
+</ul>
+<p><strong>stage-1 (왼쪽)</strong>: 먼저 DreamBooth를 부분적으로 훈련시키고, 결과 모델을 사용하여 초기 NeRF를 최적화</p>
+<p><strong>stage-2 (가운데)</strong>: 초기 NeRF에서 랜덤 시점에 따라 다중 시점 이미지를 렌더링한 후, 완전히 훈련된 DreamBooth 모델을 사용하여 이를 가상 다중 시점 subject 이미지로 변환</p>
+<p><strong>stage-3 (오른쪽)</strong>: 다중 시점 이미지를 사용하여 부분적인 DreamBooth를 추가로 미세 조정한 다음, 결과적으로 얻어진 다중 시점 DreamBooth를 사용하여 최종 NeRF 3D 자산을 SDS 손실과 다중 시점 재구성 손실을 통해 최적화</p>
+<ul class="simple">
+<li><p>위의 문제를 해결하고 성공적인 subject 맞춤 text-to-3D 생성을 위해 효율적인 3단계 최적화 방식을 기반으로 한 Dream-Booth3D 제안</p></li>
+</ul>
+<hr class="docutils" />
+<section id="stage-1-3d-with-partial-dreambooth">
+<h3><strong>3.3.1 Stage 1️⃣: 3D with Partial DreamBooth</strong><a class="headerlink" href="#stage-1-3d-with-partial-dreambooth" title="Permalink to this heading">#</a></h3>
+<figure class="align-default" id="id8">
+<img alt="Untitled_9" class="bg-primary mb-1" src="../../_images/Untitled_9.png" />
+<figcaption>
+<p><span class="caption-number">Fig. 739 </span><span class="caption-text">Stage-1 :  3D with Partial DreamBooth</span><a class="headerlink" href="#id8" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p>입력된 Subject 이미지를 사용하여 DreamBooth 모델 <span class="math notranslate nohighlight">\(\hat{\mathcal{D}}_\theta\)</span> 를 훈련</p></li>
+</ul>
+<hr class="docutils" />
+<p>🌟 <strong>DreamBoothT2I 모델의 초기 체크포인트가 (=부분적으로 파인튜닝한 결과) 주어진 subject view에 과적합되지 않음을 확인</strong></p>
+<div class="highlight-none notranslate"><div class="highlight"><pre><span></span>⇒ partial DreamBooth (부분적으로 파인튜닝한 Dreambooth)
+</pre></div>
+</div>
+<hr class="docutils" />
+<ul class="simple">
+<li><p><strong>partial DreamBooth</strong> 모델 하에 DreamFusion은 더 일관된 3D NeRF를 생성가능</p></li>
+<li><p>NeRF 최적화시 SDS 손실 사용 :</p>
+<ul>
+<li><p><span class="math notranslate nohighlight">\(\nabla_\phi \mathcal{L}_{SDS}=\mathbb{E}{\epsilon, t}\left[w_t\left(\hat{\mathcal{D}}_\theta^{\text {partial }}\left(\alpha_t \hat{I}_v+\sigma_t \epsilon, \mathbf{c}\right)-\hat{I}_v\right) \frac{\partial \hat{I}_v}{\partial \phi}\right]\)</span></p>
+<ul>
+<li><p><span class="math notranslate nohighlight">\(\hat{\mathcal{D}}_\theta^{\text {partial }}\)</span>: <strong>partial DreamBooth</strong></p></li>
+<li><p>SDS 손실을 사용하여 주어진 텍스트 프롬프트에 대한 초기 NeRF 자산을 최적화</p></li>
+</ul>
+</li>
+</ul>
+</li>
+<li><p><strong>partial DreamBooth</strong> 모델과 NeRF 결과물은 입력된 subject 와 완전히 유사하지 않음</p></li>
+</ul>
+<hr class="docutils" />
+<p>🌟 즉,  <strong>Stage-1️⃣ 에서의 초기 NeRF</strong> 는 <strong>주어진 subject 와  부분적으로ㅁ 유사</strong>하면서,
+<strong>주어진 텍스트 프롬프트에 충실한</strong>  <strong>subject class 3D 모델</strong></p>
+</section>
+<hr class="docutils" />
+<section id="stage-2-multi-view-data-generation">
+<h3><strong>3.3.2 Stage 2️⃣: Multi-view Data Generation</strong><a class="headerlink" href="#stage-2-multi-view-data-generation" title="Permalink to this heading">#</a></h3>
+<hr class="docutils" />
+<p>🌟 <strong>Stage-2 Multi-view Data Generation : 본 접근법의 가장 중요한 부분</strong></p>
+<p>일관성을 갖춘 <strong>3D initial NeRF</strong> 와 <strong>fully-trained DreamBooth</strong> 를 사용하여 <strong>pseudo multi-view subject</strong> 이미지들을 생성</p>
+<hr class="docutils" />
+<ol class="arabic">
+<li><p><strong>Initial NeRF</strong> 로부터 다양한 랜덤 viewpoints  <span class="math notranslate nohighlight">\(\{v\}\)</span>을 따라 여러 이미지<span class="math notranslate nohighlight">\(\left\{\hat{I}v \in \mathbb{R}^{n \times 3}\right\}\)</span> 를 렌더링하여 다중 시점 렌더링을 생성</p></li>
+<li><p>각 <strong>렌더링에 고정된 양의 노이즈를 추가하는 forward diffusion</strong> 과정을 통해 <span class="math notranslate nohighlight">\(t_{pseudo}\)</span>로 전환</p></li>
+<li><p>reverse diffusion 과정을 실행하여 <strong>fully-trained DreamBooth</strong> 모델 <span class="math notranslate nohighlight">\(\hat{\mathcal{D}}_\theta\)</span> 를 사용하여 샘플을 생성</p>
+<ul>
+<li><p>샘플링 과정은 각 뷰에 대해 독립적으로 수행</p></li>
+<li><p><strong>Initial NeRF 결과물</strong> 에 노이즈를 추가한 noisy render 를 조건으로 지정함으로써, 넓은 범위의 시점을 커버하면서 subject 를 잘 나타내는 이미지 생성 가능
+⇒ 다양한 노이즈가 있는 이미지를 조건으로 학습시, 다양한 변형에 대한 학습 가능하기 때문</p></li>
+<li><p><strong>BUT</strong> reverse diffusion 과정은 다른 뷰에 다른 세부 사항을 추가할 수 있기 때문에 결과 이미지는  multi-view 에 대한 일관성이 없음.</p>
+<p>⇒ 가상(pseudo) 다중 시점 이미지 집합 <strong>(collection of pseudo multi-view images)</strong></p>
+</li>
+</ul>
+</li>
+</ol>
+<hr class="docutils" />
+<p>🔑 <strong>Key insight</strong></p>
+<ol class="arabic simple">
+<li><p>초기 NeRF 이미지가 unseen views 에 가까울 경우,
+DreamBooth가 Subject 의 unseen views를 효과적으로 생성 가능</p></li>
+<li><p>입력 이미지에 비해 Subject 와 더 유사한 출력 이미지를 효과적으로 생성가능**</p></li>
+</ol>
+<hr class="docutils" />
+<ul class="simple">
+<li><p>위 그림을 통해 체크할 부분</p>
+<ul>
+<li><p><strong>fully-trained DreamBooth</strong> 를 사용한 Img2Img 변환의 샘플 출력</p></li>
+<li><p>입력 NeRF 렌더링의 시점을 유지하면서도 subject 이미지와 더 유사한 모습</p></li>
+<li><p>기존 연구들과 달리 Img2Img 변환을 DreamBooth, NeRF 3D assets 과 결합하여 사용 (기존 연구의 경우 Img2Img 변환을 이미지 editing 응용으로만 사용)</p></li>
+</ul>
+</li>
+</ul>
+</section>
+<section id="stage3-final-nerf-with-multi-view-dreambooth">
+<h3><strong>3.3.3 Stage3️⃣: Final NeRF with Multi-view DreamBooth</strong><a class="headerlink" href="#stage3-final-nerf-with-multi-view-dreambooth" title="Permalink to this heading">#</a></h3>
+<figure class="align-default" id="id9">
+<img alt="Untitled_12" class="bg-primary mb-1" src="../../_images/Untitled_12.png" />
+<figcaption>
+<p><span class="caption-number">Fig. 740 </span><span class="caption-text">Stage-3 : Final NeRF with Multi-view DreamBooth
+SDS와 multi-view reconstruction 손실을 사용한 최종 NeRF 최적화</span><a class="headerlink" href="#id9" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<blockquote>
+<div><p><strong>새로운 데이터 <span class="math notranslate nohighlight">\(I^{\text{aug}}\)</span> 생성</strong></p>
+</div></blockquote>
+<ul class="simple">
+<li><p>가상 다중 시점 이미지 <span class="math notranslate nohighlight">\(\left\{I_v^{\text {pseudo }}\right\}\)</span>, 입력 Subject 이미지 <span class="math notranslate nohighlight">\(\left\{I_i\right\}\)</span> 의 결합을 통해 생성</p></li>
+</ul>
+<div class="math notranslate nohighlight">
+\[
+I^{\text {aug }}=\left\{I_v^{\text {pseudo }}\right\} \cup\left\{I_i\right\}
+\]</div>
+<blockquote>
+<div><p><strong><span class="math notranslate nohighlight">\(I^{\text {aug}}\)</span> 를 사용하여 최종 Multi-view DreamBooth 모델을 최적화</strong></p>
+</div></blockquote>
+<ol class="arabic simple">
+<li><p>1단계에서 <strong>partial DreamBooth</strong> <span class="math notranslate nohighlight">\(\hat{\mathcal{D}}_{\theta^*}\)</span> 준비</p></li>
+<li><p>위의 증강 데이터 <span class="math notranslate nohighlight">\(I^{\text {aug}}\)</span>  를 사용하여 <span class="math notranslate nohighlight">\(\hat{\mathcal{D}}_{\theta^*}\)</span> 에 대한 <strong>파인튜닝</strong>을 추가 진행</p></li>
+<li><p><strong>Multi-view DreamBooth</strong> <span class="math notranslate nohighlight">\(\hat{\mathcal{D}}_\theta^{\mathrm{multi}}\)</span> 를  생성</p></li>
+</ol>
+<blockquote>
+<div><p><span class="math notranslate nohighlight">\(\hat{\mathcal{D}}_\theta^{\text {multi }}\)</span> <strong>모델을 사용하여 DreamFusion SDS Loss 와 함께 NeRF 3D assets 를 최적화</strong></p>
+</div></blockquote>
+<ul class="simple">
+<li><p>1단계의 <strong>partial DreamBooth</strong>에 비해 <strong>multi-view DreamBooth</strong> 의 뷰 일반화와 subject 보존 능력이 더 우수하기 때문에 <strong>subject idendtity가 상당히 향상된 NeRF 모델 생성 가능</strong></p></li>
+<li><p><strong>BUT</strong> SDS 손실만 사용시 최적화된 NeRF assets 이</p>
+<ul>
+<li><p>주어진 subject 에 대해 우수한 기하학적 유사성 보유</p></li>
+<li><p><strong>Color saturation artifacts 현상 다수 발생</strong></p></li>
+<li><p>이를 해결하기 위해 <span class="math notranslate nohighlight">\(\left\{I_v^{\mathrm{pseudo}}\right\}\)</span> 를 사용한 새로운 <strong>weak reconstruction loss</strong> 도입</p></li>
+<li><p>**** Color saturation artifacts :**</p>
+<ul>
+<li><p>색상의 과도한 포화(saturation)로 인해 비현실적이거나 왜곡된 색상 표현이 나타나는 결함 현상</p></li>
+<li><p>모델이 특정 색상을 과도하게 강조하는 경우 발생</p></li>
+<li><p>색상 값을 잘못 예측하여 비현실적인 색상 표현이 나타난 경우 발생</p></li>
+<li><p>다양한 시점에서 일관된 색상 표현을 유지하지 못한 경우 발생</p></li>
+</ul>
+</li>
+</ul>
+</li>
+</ul>
+<hr class="docutils" />
+<p><strong>Reconstruction loss</strong></p>
+<ul>
+<li><p><span class="math notranslate nohighlight">\(\left\{I_v^{\mathrm{pseudo}}\right\}\)</span> 가 생성된 카메라 매개변수 <span class="math notranslate nohighlight">\(\left\{P_v\right\}\)</span> 정보를 알고 있으므로,
+두 번째 NeRF MLP <span class="math notranslate nohighlight">\(\mathcal{F}\gamma\)</span> 의 훈련을 <strong>reconstruction loss</strong> 를 통해 추가로 규제</p>
+<div class="math notranslate nohighlight">
+\[
+    \mathcal{L}_{recon }=\left\|\Gamma\left(\mathcal{F}_\gamma, P_v\right)-I_v^{\text {pseudo }}\right\|_p,
+    \]</div>
+<ul class="simple">
+<li><p><span class="math notranslate nohighlight">\(\Gamma\left(\mathcal{F}\gamma, P_v\right)\)</span> : 카메라 시점 <span class="math notranslate nohighlight">\(P_v\)</span> 를 따라 NeRF <span class="math notranslate nohighlight">\(\mathcal{F}\gamma\)</span> 에서 이미지를 렌더링하는 함수</p></li>
+</ul>
+</li>
+<li><p><strong>Reconstruction loss 의 목적</strong></p>
+<ul class="simple">
+<li><p>생성된 볼륨의 색상 분포를 image exemplars 과 더 가깝게 조정</p></li>
+<li><p>unseen views에서 subject 유사성을 향상</p></li>
+</ul>
+<p><strong>Final NeRF Loss function</strong></p>
+</li>
+</ul>
+<div class="math notranslate nohighlight">
+\[
+\mathcal{L}=\lambda_{\text {recon }} \mathcal{L}_{\text {recon }}+\lambda_{\text {SDS }} \mathcal{L}_{\text {SDS }}+\lambda_{\text {nerf }} \mathcal{L}_{\text {nerf }}
+\]</div>
+<ul class="simple">
+<li><p><span class="math notranslate nohighlight">\(\mathcal{L}_{\text {nerf }}\)</span> 는 Mip-NeRF360 [2]에서 사용된 추가적인 NeRF 정규화</p></li>
+</ul>
+</section>
+</section>
+</section>
+<hr class="docutils" />
+<section class="tex2jax_ignore mathjax_ignore" id="experiments">
+<h1>4. Experiments<a class="headerlink" href="#experiments" title="Permalink to this heading">#</a></h1>
+<hr class="docutils" />
+<p><strong>Implementation Details.</strong></p>
+<ul class="simple">
+<li><p><strong>사용 모델</strong>:</p>
+<ul>
+<li><p>T2I : Imagen T2I 모델</p></li>
+<li><p>Text-encoding:  T5-XXL</p></li>
+<li><p>NeRF : DreamFusion</p></li>
+</ul>
+</li>
+<li><p><strong>훈련 시간</strong>: 4core TPUv4, 각 프롬프트당 3단계 최적화를 완료하는 데 약 3시간 소요</p></li>
+<li><p><strong>훈련 단계</strong>:</p>
+<ul>
+<li><p><strong>부분 DreamBooth 모델 (<span class="math notranslate nohighlight">\(D_θ^{partial}\)</span>)</strong> : 150번의 반복훈련</p></li>
+<li><p><strong>전체 DreamBooth 모델 (<span class="math notranslate nohighlight">\(D_θ\)</span>)</strong> : 800번 반복 훈련시 최적의 성능</p></li>
+</ul>
+</li>
+<li><p><strong>pseudo multi-view data generation :</strong>
+원점에서 고정된 반경으로 균일하게 샘플링한 20개의 이미지를 렌더링</p></li>
+<li><p><strong>Stage-3 Multi-view DreamBooth</strong> <span class="math notranslate nohighlight">\(\hat{\mathcal{D}}_\theta^{\mathrm{multi}}\)</span>:
+3단계에서 추가로 150번 반복하여 부분적으로 훈련된 <span class="math notranslate nohighlight">\(\hat{D}_{θ}^∗\)</span> 모델을 Finetuning</p></li>
+<li><p><strong>Hyperparams :</strong> supplementary material 참고</p></li>
+</ul>
+<hr class="docutils" />
+<p><strong>Datasets.</strong></p>
+<ul class="simple">
+<li><p><strong>훈련 데이터</strong>: 공개된 이미지 컬렉션을 사용하여 personalized text-to-3D 모델을 훈련</p>
+<ul>
+<li><p>다양한 subject(개, 장난감, 배낭, 선글라스, 만화 캐릭터 등) 의  4-6개의 casual 이미지를 포함한 30개의 다른 이미지 컬렉션으로 구성</p></li>
+</ul>
+</li>
+<li><p><strong>희귀 객체 성능 분석</strong>: “올빼미 장식품”과 같은 희귀한 대상의 성능을 분석하기 위해 추가 이미지 수집</p></li>
+<li><p>3-6개의 프롬프트에 대해 각 3D 모델을 최적화하여 3D contextualizations 문맥화 시연</p></li>
+</ul>
+<hr class="docutils" />
+<p><strong>Baselines.</strong></p>
+<ul class="simple">
+<li><p><strong>Latent-NeRF</strong></p>
+<ul>
+<li><p>RGB 픽셀 공간이 아닌 Stable Diffusion 의  latent feature 공간에서 SDS 손실을 통해 3D NeRF 모델을 학습</p></li>
+<li><p>baseline 으로써 fully dreamboothed T2I model 를 사용하여 Latent-NeRF 실행</p></li>
+</ul>
+</li>
+<li><p><strong>DreamFusion+DreamBooth</strong>: DreamBooth 확산 모델을 먼저 훈련한 후 DreamFusion을 사용하여 3D NeRF를 최적화하는 단일 단계 접근 방식</p></li>
+<li><p><strong>본 연구의 3단계 최적화 기반 방법론</strong> :  “DreamBooth3D”</p></li>
+</ul>
+<hr class="docutils" />
+<p><strong>Evaluation Metrics.</strong></p>
+<ul class="simple">
+<li><p><strong>CLIP R-Precision</strong></p>
+<ul>
+<li><p>rendering된 장면들이 주어졌을 때 프롬프트와 얼마나 정확하게 일치하는지 비율을 나타냄.</p></li>
+<li><p>CLIP ViT-B/16, ViT-B/32, ViT-L-14 모델을 평가에 사용</p></li>
+</ul>
+</li>
+<li><p>추가적으로 user study 수행 (뒤에 언급)</p></li>
+</ul>
+<section id="results">
+<h2>4.1. Results<a class="headerlink" href="#results" title="Permalink to this heading">#</a></h2>
+<p><strong>Visual Results</strong></p>
+<ul class="simple">
+<li><p><strong>비교 결과</strong>: DreamBooth3D, Latent-NeRF, DreamBooth+Fusion 기준 모델의 비교</p>
+<ul>
+<li><p>Latent-NeRF : 일부 경우(오리)에서 적절히 작동하지만, 대부분의 경우 일관된 3D 모델을 생성하는 데 실패</p></li>
+<li><p>DreamBooth+Fusion : 여러 시점에서 동일한 외형 및 구조를 보임</p></li>
+<li><p>DreamBooth3D : 360도 일관된 3D Asset을 생성하며, 주어진 subject 의 기하학적 구조 및 외관의 세부 사항을 잘 반영함</p></li>
+</ul>
+</li>
+</ul>
+<hr class="docutils" />
+<p><strong>Initial vs. Final NeRF</strong></p>
+<ul class="simple">
+<li><p>1단계와 3단계에서 생성된 초기 NeRF와 최종 NeRF 결과</p></li>
+<li><p>초기 NeRF : 주어진 subject 와 부분적으로만 유사, 3D 일관성을 유지</p></li>
+<li><p>최종 NeRF : 주어진 subject 와 더 유사하, 일관된 3D 구조를 유지</p></li>
+<li><p>이러한 예시는 DreamBooth3D의 3단계 최적화가 필요함을 입증 (?)</p></li>
+</ul>
+<hr class="docutils" />
+<p><strong>User Study.</strong></p>
+<div class="highlight-none notranslate"><div class="highlight"><pre><span></span>→ DreamBooth3D와 비교 모델들을 세가지측면에 대해 아래의 질문에 대한 답변으로 평가** 
+</pre></div>
+</div>
+<ol class="arabic simple">
+<li><p><strong>subject 충실도</strong>: “어떤 3D 항목이 subject 와 더 유사하게 보입니까?”</p></li>
+<li><p><strong>3D 일관성과 타당성</strong>: “어떤 3D 항목이 더 타당하고 일관된 기하학적 구조를 가지고 있습니까?”</p></li>
+<li><p><strong>프롬프트 충실도</strong>: “어떤 비디오가 제공된 프롬프트를 더 잘 반영합니까?”</p></li>
+</ol>
+<ul class="simple">
+<li><p><strong>연구 방법</strong></p>
+<ul>
+<li><p>3D 일관성과 주제 충실도 연구에서는 데이터셋의 30개 subject 각각에 대해 회전 비디오 결과를 제시하고 11명의 사용자가 각 쌍에 대해 응답</p></li>
+<li><p>프롬프트 충실도 연구에서는 54개의 고유한 프롬프트와 주제 쌍에 대해 비디오를 생성하고, 21명의 사용자가 응답</p></li>
+</ul>
+</li>
+<li><p><strong>최종 결과</strong></p>
+<ul>
+<li><p>최종 결과는 다수결 투표를 통해 산출</p></li>
+<li><p>DreamBooth3D는 3D 일관성, 주제 충실도, 프롬프트 충실도에서 기준 모델들보다 유의미하게 더 선호됨.</p></li>
+</ul>
+</li>
+</ul>
+</section>
+<section id="sample-applications">
+<h2>4.2. Sample Applications<a class="headerlink" href="#sample-applications" title="Permalink to this heading">#</a></h2>
+<ul class="simple">
+<li><p><strong>Recontextualization. (재문맥화)</strong></p>
+<ul>
+<li><p>단순한 프롬프트를 사용하여 다양한 개 주제의 3D 모델로 재문맥화한 샘플 결과</p></li>
+<li><p>모든 subject 에서 텍스트 프롬프트에 주어진 문맥을 일관되게 반영</p></li>
+<li><p>출력된 3D 모델의 자세와 로컬 변형은 입력 이미지에 없는 포즈임에도 불구하고 매우 사실적</p></li>
+</ul>
+</li>
+</ul>
+<ul class="simple">
+<li><p><strong>Color/Material Editing.</strong></p>
+<ul>
+<li><p>색상 편집 및 재질 편집</p></li>
+</ul>
+</li>
+<li><p><strong>Accessorization</strong></p>
+<ul>
+<li><p>subject 에 액세서리 추가</p></li>
+</ul>
+</li>
+</ul>
+<ul class="simple">
+<li><p><strong>Stylization</strong></p>
+<ul>
+<li><p>크림색 신발을 색상과 프릴 추가를 기반으로 스타일화</p></li>
+</ul>
+</li>
+<li><p><strong>Cartoon-to-3D</strong></p>
+<ul>
+<li><p>비사실적 피상체 이미지(예: 2D 평면 캐릭터)를 그럴듯한 3D 형태로 변환</p></li>
+<li><p>모든 subject 이미지가 정면임에도 불구하고, 그럴듯한 3D 결과물 생성</p></li>
+</ul>
+</li>
+</ul>
+</section>
+<section id="limitations">
+<h2>4.3. Limitations<a class="headerlink" href="#limitations" title="Permalink to this heading">#</a></h2>
+<hr class="docutils" />
+<figure class="align-default" id="id10">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/limitation.png"><img alt="limitation" class="bg-primary mb-1" src="../../_images/limitation.png" style="width: 800px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 741 </span><span class="caption-text">limitations</span><a class="headerlink" href="#id10" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ol class="arabic simple">
+<li><p>최적화된 3D 표현이 때때로 과도하게 포화되고 매끄럽게 처리되는 경우가 존재</p>
+<ol class="arabic simple">
+<li><p>높은 가중치 가이던스를 가진 SDS 기반 최적화에 의해 발생</p></li>
+<li><p>64×64 픽셀이라는 상대적으로 낮은 이미지 해상도로 제한되어 발생</p></li>
+<li><p>diffusion 과 NeRF 의 효율성 향상은 더 높은 해상도로 확장할 수 있는 가능성을 제공</p></li>
+</ol>
+</li>
+<li><p>Janus problem : 최적화된 3D 표현은 입력 이미지에 시점 변화가 없으면 여러 불일치한 시점에서 정면으로 보이는 viewpoints 불일치 문제가 발생</p></li>
+<li><p>선글라스와 같은 얇은 객체 구조를 재구성하는 데 어려움이 존재</p></li>
+</ol>
+</section>
+</section>
+<section class="tex2jax_ignore mathjax_ignore" id="conclusion">
+<h1>5. Conclusion<a class="headerlink" href="#conclusion" title="Permalink to this heading">#</a></h1>
+<ul class="simple">
+<li><p>Subject 중심의 텍스트-3D 생성을 위한 방법인 DreamBooth3D를 제안</p></li>
+<li><p>Subject 에 대한 소규모 casual 이미지 셋트가 주어지면, (카메라 포즈와 같은 추가 정보 없이)
+입력 텍스트 프롬프트에서 제공된 컨텍스트(자고 있는, 점프하는, 빨간 등)를 준수하는 subject 별 3D assets 를 생성</p></li>
+<li><p>DreamBooth 데이터셋 에 대한 광범위한 실험을 통해 해당 방법이 주어진 subject 와 높은 유사성을 가지면서도 입력 텍스트 프롬프트에 나타난 컨텍스트를 잘 반영하는 현실적인 3D assets 을 생성할 수 있음을 입증</p></li>
+<li><p>정량적 및 정성적 평가에서 여러 기준 모델보다 우수한 성능을 보임을 확인</p></li>
+</ul>
+</section>
+
+    <script type="text/x-thebe-config">
+    {
+        requestKernel: true,
+        binderOptions: {
+            repo: "binder-examples/jupyter-stacks-datascience",
+            ref: "master",
+        },
+        codeMirrorConfig: {
+            theme: "abcdef",
+            mode: "python"
+        },
+        kernelOptions: {
+            name: "python3",
+            path: "./docs\review"
+        },
+        predefinedOutput: true
+    }
+    </script>
+    <script>kernelName = 'python3'</script>
+
+                </article>
+              
+
+              
+              
+              
+              
+                <footer class="prev-next-footer d-print-none">
+                  
+<div class="prev-next-area">
+    <a class="left-prev"
+       href="magic-3d.html"
+       title="previous page">
+      <i class="fa-solid fa-angle-left"></i>
+      <div class="prev-next-info">
+        <p class="prev-next-subtitle">previous</p>
+        <p class="prev-next-title">Magic3D</p>
+      </div>
+    </a>
+    <a class="right-next"
+       href="zero123plus.html"
+       title="next page">
+      <div class="prev-next-info">
+        <p class="prev-next-subtitle">next</p>
+        <p class="prev-next-title">Zero123++</p>
+      </div>
+      <i class="fa-solid fa-angle-right"></i>
+    </a>
+</div>
+                </footer>
+              
+            </div>
+            
+            
+              
+                <div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
+
+
+  <div class="sidebar-secondary-item">
+  <div class="page-toc tocsection onthispage">
+    <i class="fa-solid fa-list"></i> Contents
+  </div>
+  <nav class="bd-toc-nav page-toc">
+    <ul class="visible nav section-nav flex-column">
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#">Dream Booth 3D</a><ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#abstract">0. Abstract</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">1. Introduction</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#related-work">2. Related Work</a></li>
+</ul>
+</li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#approach"><strong>3. Approach</strong></a><ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#goal-subject-identity-3d-assets">🌟 <strong>Goal</strong>
+텍스트 프롬프트에 충실하면서 주어진 subject 의 identity (기하 형태 및 외관)을 반영하는 3D assets 생성</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#preliminaries">3.1. Preliminaries</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#t2i-diffusion-models">3.1.1 <strong>T2I diffusion models</strong></a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#dream-booth-t2i-personalization">3.1.2 <strong>Dream Booth T2I Personalization.</strong></a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#dreamfusion">3.1.3 <strong>DreamFusion</strong></a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#failure-of-naive-dreambooth-fusion">3.2 Failure of Naive Dreambooth+Fusion</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#dreambooth3d-optimization">3.3. Dreambooth3D Optimization</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#stage-1-3d-with-partial-dreambooth"><strong>3.3.1 Stage 1️⃣: 3D with Partial DreamBooth</strong></a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#stage-2-multi-view-data-generation"><strong>3.3.2 Stage 2️⃣: Multi-view Data Generation</strong></a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#stage3-final-nerf-with-multi-view-dreambooth"><strong>3.3.3 Stage3️⃣: Final NeRF with Multi-view DreamBooth</strong></a></li>
+</ul>
+</li>
+</ul>
+</li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#experiments">4. Experiments</a><ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#results">4.1. Results</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#sample-applications">4.2. Sample Applications</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#limitations">4.3. Limitations</a></li>
+</ul>
+</li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#conclusion">5. Conclusion</a></li>
+</ul>
+
+  </nav></div>
+
+</div></div>
+              
+            
+          </div>
+          <footer class="bd-footer-content">
+            
+<div class="bd-footer-content__inner container">
+  
+  <div class="footer-item">
+    
+<p class="component-author">
+By PseudoLab
+</p>
+
+  </div>
+  
+  <div class="footer-item">
+    
+
+  <p class="copyright">
+    
+      © Copyright 2022.
+      <br/>
+    
+  </p>
+
+  </div>
+  
+  <div class="footer-item">
+    
+  </div>
+  
+  <div class="footer-item">
+    
+  </div>
+  
+</div>
+          </footer>
+        
+
+      </main>
+    </div>
+  </div>
+  
+  <!-- Scripts loaded after <body> so the DOM is not blocked -->
+  <script src="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b"></script>
+<script src="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b"></script>
+
+  <footer class="bd-footer">
+  </footer>
+  </body>
 </html>
\ No newline at end of file
diff --git a/docs/review/DreamFusion.html b/docs/review/DreamFusion.html
old mode 100644
new mode 100755
index 29158629..98c96f5f
--- a/docs/review/DreamFusion.html
+++ b/docs/review/DreamFusion.html
@@ -1,1028 +1,1048 @@
-
-
-<!DOCTYPE html>
-
-
-<html lang="en" data-content_root="" >
-
-  <head>
-    <meta charset="utf-8" />
-    <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.18.1: http://docutils.sourceforge.net/" />
-
-    <title>DreamFusion &#8212; Text-to-Image Generation-feat-Diffusion</title>
-  
-  
-  
-  <script data-cfasync="false">
-    document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
-    document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
-  </script>
-  
-  <!-- Loaded before other Sphinx assets -->
-  <link href="../../_static/styles/theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-<link href="../../_static/styles/bootstrap.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-<link href="../../_static/styles/pydata-sphinx-theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-
-  
-  <link href="../../_static/vendor/fontawesome/6.5.2/css/all.min.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-  <link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.woff2" />
-<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.woff2" />
-<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.woff2" />
-
-    <link rel="stylesheet" type="text/css" href="../../_static/pygments.css" />
-    <link rel="stylesheet" href="../../_static/styles/sphinx-book-theme.css?digest=14f4ca6b54d191a8c7657f6c759bf11a5fb86285" type="text/css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/togglebutton.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/mystnb.4510f1fc1dee50b3e5859aac5469c37c29e427902b24a333a5f9fcb2f0b3ac41.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/sphinx-thebe.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/design-style.4045f2051d55cab465a707391d5b2007.min.css" />
-  
-  <!-- Pre-loaded scripts that we'll load fully later -->
-  <link rel="preload" as="script" href="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b" />
-<link rel="preload" as="script" href="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b" />
-  <script src="../../_static/vendor/fontawesome/6.5.2/js/all.min.js?digest=dfe6caa3a7d634c4db9b"></script>
-
-    <script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
-    <script src="../../_static/jquery.js"></script>
-    <script src="../../_static/underscore.js"></script>
-    <script src="../../_static/_sphinx_javascript_frameworks_compat.js"></script>
-    <script src="../../_static/doctools.js"></script>
-    <script src="../../_static/clipboard.min.js"></script>
-    <script src="../../_static/copybutton.js"></script>
-    <script src="../../_static/scripts/sphinx-book-theme.js?digest=5a5c038af52cf7bc1a1ec88eea08e6366ee68824"></script>
-    <script>let toggleHintShow = 'Click to show';</script>
-    <script>let toggleHintHide = 'Click to hide';</script>
-    <script>let toggleOpenOnPrint = 'true';</script>
-    <script src="../../_static/togglebutton.js"></script>
-    <script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
-    <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
-    <script src="../../_static/design-tabs.js"></script>
-    <script>const THEBE_JS_URL = "https://unpkg.com/thebe@0.8.2/lib/index.js"
-const thebe_selector = ".thebe,.cell"
-const thebe_selector_input = "pre"
-const thebe_selector_output = ".output, .cell_output"
-</script>
-    <script async="async" src="../../_static/sphinx-thebe.js"></script>
-    <script>window.MathJax = {"options": {"processHtmlClass": "tex2jax_process|mathjax_process|math|output_area"}}</script>
-    <script defer="defer" src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
-    <script>DOCUMENTATION_OPTIONS.pagename = 'docs/review/DreamFusion';</script>
-    <link rel="shortcut icon" href="../../_static/PseudoLab_logo.png"/>
-    <link rel="index" title="Index" href="../../genindex.html" />
-    <link rel="search" title="Search" href="../../search.html" />
-    <link rel="next" title="Magic3D" href="magic-3d.html" />
-    <link rel="prev" title="Shap-E" href="Shap-E.html" />
-  <meta name="viewport" content="width=device-width, initial-scale=1"/>
-  <meta name="docsearch:language" content="en"/>
-  </head>
-  
-  
-  <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
-
-  
-  
-  <div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
-  
-  <div id="pst-scroll-pixel-helper"></div>
-  
-  <button type="button" class="btn rounded-pill" id="pst-back-to-top">
-    <i class="fa-solid fa-arrow-up"></i>Back to top</button>
-
-  
-  <input type="checkbox"
-          class="sidebar-toggle"
-          id="pst-primary-sidebar-checkbox"/>
-  <label class="overlay overlay-primary" for="pst-primary-sidebar-checkbox"></label>
-  
-  <input type="checkbox"
-          class="sidebar-toggle"
-          id="pst-secondary-sidebar-checkbox"/>
-  <label class="overlay overlay-secondary" for="pst-secondary-sidebar-checkbox"></label>
-  
-  <div class="search-button__wrapper">
-    <div class="search-button__overlay"></div>
-    <div class="search-button__search-container">
-<form class="bd-search d-flex align-items-center"
-      action="../../search.html"
-      method="get">
-  <i class="fa-solid fa-magnifying-glass"></i>
-  <input type="search"
-         class="form-control"
-         name="q"
-         id="search-input"
-         placeholder="Search this book..."
-         aria-label="Search this book..."
-         autocomplete="off"
-         autocorrect="off"
-         autocapitalize="off"
-         spellcheck="false"/>
-  <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
-</form></div>
-  </div>
-
-  <div class="pst-async-banner-revealer d-none">
-  <aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
-</div>
-
-  
-    <header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
-    </header>
-  
-
-  <div class="bd-container">
-    <div class="bd-container__inner bd-page-width">
-      
-      
-      
-      <div class="bd-sidebar-primary bd-sidebar">
-        
-
-  
-  <div class="sidebar-header-items sidebar-primary__section">
-    
-    
-    
-    
-  </div>
-  
-    <div class="sidebar-primary-items__start sidebar-primary__section">
-        <div class="sidebar-primary-item">
-
-  
-    
-  
-
-<a class="navbar-brand logo" href="../../intro.html">
-  
-  
-  
-  
-  
-    
-    
-      
-    
-    
-    <img src="../../_static/PseudoLab_logo.png" class="logo__image only-light" alt="Text-to-Image Generation-feat-Diffusion - Home"/>
-    <script>document.write(`<img src="../../_static/PseudoLab_logo.png" class="logo__image only-dark" alt="Text-to-Image Generation-feat-Diffusion - Home"/>`);</script>
-  
-  
-</a></div>
-        <div class="sidebar-primary-item"><nav class="bd-links" id="bd-docs-nav" aria-label="Main">
-    <div class="bd-toc-item navbar-nav active">
-        
-        <ul class="nav bd-sidenav bd-sidenav__home-link">
-            <li class="toctree-l1">
-                <a class="reference internal" href="../../intro.html">
-                    Welcome to PseudoDiffusers!!
-                </a>
-            </li>
-        </ul>
-        <p aria-level="2" class="caption" role="heading"><span class="caption-text">Preliminary Works</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="vae.html">VAE</a></li>
-<li class="toctree-l1"><a class="reference internal" href="gan.html">GAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DDPM.html">DDPM</a></li>
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="DDIM.html">DDIM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="A_Study_on_the_Evaluation_of_Generative_Models.html">A Study on the Evaluation of Generative Models</a></li>
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Image Generation</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="cycleGAN.html">CycleGAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="StyleGAN.html">StyleGAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="diffusion_beats_GANs.html">Diffusion Models Beat GANs on Image Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="dalle.html">DALL-E</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DALLE2.html">DALL-E 2</a></li>
-<li class="toctree-l1"><a class="reference internal" href="dreambooth.html">DreamBooth</a></li>
-<li class="toctree-l1"><a class="reference internal" href="ControlNet.html">ControlNet</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Latent_Diffusion_Model.html">Introduction</a></li>
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="Textual_Inversion.html">Textual Inversion</a></li>
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="CustomDiffusion.html">Custom Diffusion</a></li>
-<li class="toctree-l1"><a class="reference internal" href="LoRA.html">LoRA</a></li>
-
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="I-DDPM.html">I-DDPM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="StyO.html">StyO</a></li>
-<li class="toctree-l1"><a class="reference internal" href="imagen.html">Imagen</a></li>
-<li class="toctree-l1"><a class="reference internal" href="imagen_editor.html">Imagen Editor</a></li>
-<li class="toctree-l1"><a class="reference internal" href="SDEdit.html">SDEdit</a></li>
-<li class="toctree-l1"><a class="reference internal" href="SDXL.html">SDXL</a></li>
-<li class="toctree-l1"><a class="reference internal" href="t2i_adapter.html">T2I-Adapter</a></li>
-<li class="toctree-l1"><a class="reference internal" href="IP_Adapter.html">IP-Adapter</a></li>
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="HyperDreamBooth.html">HyperDreamBooth</a></li>
-<li class="toctree-l1"><a class="reference internal" href="CM3leon.html">CM3leon</a></li>
-
-<li class="toctree-l1"><a class="reference internal" href="Synthetic_Data_from_Diffusion_Models_Improves_ImageNet_Classification.html">Synthetic Data from Diffusion Models Improves ImageNet Classification</a></li>
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="GLIDE.html">GLIDE</a></li>
-<li class="toctree-l1"><a class="reference internal" href="BBDM.html">BBDM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Your_Diffusion_Model_is_Secretly_a_Zero_Shot_Classifier.html">Your Diffusion Model is Secretly a Zero-Shot Classifier</a></li>
-<li class="toctree-l1"><a class="reference internal" href="progressive_distillation.html">Progressive Distillation for Fast Sampling of Diffusion Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="ConceptLab.html">ConceptLab</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Diffusion_models_already_have_a_Semantic_Latent_Space.html">Diffusion Models already have a Semantic Latent Space</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Muse.html">Muse</a></li>
-
-
-<li class="toctree-l1"><a class="reference internal" href="GIGAGAN.html">Scaling up GANs for Text-to-Image Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="consistency_models.html">Consistency Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="latent_consistency_models.html">Latent Consistency Models</a></li>
-
-<li class="toctree-l1"><a class="reference internal" href="LLM_grounded_Diffusion.html">LLM Grounded Diffusion</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DiT.html">DiT</a></li>
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="one-step-image-translation.html">One-Step Image Translation with Text-to-Image Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="LCM-LoRA.html">LCM-LoRA: A Universal Stable-Diffusion Acceleration Module</a></li>
-
-
-<li class="toctree-l1"><a class="reference internal" href="MimicBrush.html">MimicBrush: Zero-shot Image Editing with Reference Imitation</a></li>
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Video Generation</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="Make_A_Video.html">Make A Video</a></li>
-<li class="toctree-l1"><a class="reference internal" href="VideoLDM.html">VideoLDM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="AnimateDiff.html">AnimateDiff</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Animate_Anyone.html">Animate Anyone</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreaMoving.html">DreaMoving</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreamPose.html">DreamPose: Fashion Image-to-Video Synthesis via Stable Diffusion</a></li>
-
-
-
-
-
-
-
-
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">3D Generation</span></p>
-<ul class="current nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="NeRF.html">NeRF : Representing Scenes as Neural Radiance Fields for View Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="3DGS.html">3D Gaussian Splatting for Real-Time Radiance Field Rendering</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Point_E.html">Point-E: A System for Generating 3D Point Clouds from Complex Prompts (Arxiv 2022)</a></li>
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="Shap-E.html">Shap-E</a></li>
-
-
-
-
-
-
-
-
-
-<li class="toctree-l1 current active"><a class="current reference internal" href="#"><strong>DreamFusion</strong></a></li>
-<li class="toctree-l1"><a class="reference internal" href="magic-3d.html">Magic3D</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreamBooth3D.html">Dream Booth 3D</a></li>
-
-
-
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Experiments</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="../experiments/js_exp.html">Synthetic Data with Stable Diffusion for Foliar Disease Classification</a></li>
-<li class="toctree-l1"><a class="reference internal" href="../experiments/swjo_exp.html">Training DreamBooth on Naver Webtoon Face Dataset</a></li>
-</ul>
-
-    </div>
-</nav></div>
-    </div>
-  
-  
-  <div class="sidebar-primary-items__end sidebar-primary__section">
-  </div>
-  
-  <div id="rtd-footer-container"></div>
-
-
-      </div>
-      
-      <main id="main-content" class="bd-main" role="main">
-        
-        
-
-<div class="sbt-scroll-pixel-helper"></div>
-
-          <div class="bd-content">
-            <div class="bd-article-container">
-              
-              <div class="bd-header-article d-print-none">
-<div class="header-article-items header-article__inner">
-  
-    <div class="header-article-items__start">
-      
-        <div class="header-article-item"><label class="sidebar-toggle primary-toggle btn btn-sm" for="__primary" title="Toggle primary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
-  <span class="fa-solid fa-bars"></span>
-</label></div>
-      
-    </div>
-  
-  
-    <div class="header-article-items__end">
-      
-        <div class="header-article-item">
-
-<div class="article-header-buttons">
-
-
-
-
-
-<div class="dropdown dropdown-source-buttons">
-  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Source repositories">
-    <i class="fab fa-github"></i>
-  </button>
-  <ul class="dropdown-menu">
-      
-      
-      
-      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion" target="_blank"
-   class="btn btn-sm btn-source-repository-button dropdown-item"
-   title="Source repository"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fab fa-github"></i>
-  </span>
-<span class="btn__text-container">Repository</span>
-</a>
-</li>
-      
-      
-      
-      
-      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion/issues/new?title=Issue%20on%20page%20%2Fdocs/review/DreamFusion.html&body=Your%20issue%20content%20here." target="_blank"
-   class="btn btn-sm btn-source-issues-button dropdown-item"
-   title="Open an issue"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-lightbulb"></i>
-  </span>
-<span class="btn__text-container">Open issue</span>
-</a>
-</li>
-      
-  </ul>
-</div>
-
-
-
-
-
-
-<div class="dropdown dropdown-download-buttons">
-  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Download this page">
-    <i class="fas fa-download"></i>
-  </button>
-  <ul class="dropdown-menu">
-      
-      
-      
-      <li><a href="../../_sources/docs/review/DreamFusion.md" target="_blank"
-   class="btn btn-sm btn-download-source-button dropdown-item"
-   title="Download source file"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-file"></i>
-  </span>
-<span class="btn__text-container">.md</span>
-</a>
-</li>
-      
-      
-      
-      
-      <li>
-<button onclick="window.print()"
-  class="btn btn-sm btn-download-pdf-button dropdown-item"
-  title="Print to PDF"
-  data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-file-pdf"></i>
-  </span>
-<span class="btn__text-container">.pdf</span>
-</button>
-</li>
-      
-  </ul>
-</div>
-
-
-
-
-<button onclick="toggleFullScreen()"
-  class="btn btn-sm btn-fullscreen-button"
-  title="Fullscreen mode"
-  data-bs-placement="bottom" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-expand"></i>
-  </span>
-
-</button>
-
-
-
-<script>
-document.write(`
-  <button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light"></i>
-    <i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark"></i>
-    <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"></i>
-  </button>
-`);
-</script>
-
-
-<script>
-document.write(`
-  <button class="btn btn-sm pst-navbar-icon search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <i class="fa-solid fa-magnifying-glass fa-lg"></i>
-  </button>
-`);
-</script>
-<label class="sidebar-toggle secondary-toggle btn btn-sm" for="__secondary"title="Toggle secondary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <span class="fa-solid fa-list"></span>
-</label>
-</div></div>
-      
-    </div>
-  
-</div>
-</div>
-              
-              
-
-<div id="jb-print-docs-body" class="onlyprint">
-    <h1>DreamFusion</h1>
-    <!-- Table of contents -->
-    <div id="print-main-content">
-        <div id="jb-print-toc">
-            
-            <div>
-                <h2> Contents </h2>
-            </div>
-            <nav aria-label="Page">
-                <ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#abstract">Abstract</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">1. Introduction</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#diffusion-models-and-score-distillation-sampling">2. Diffusion Models and Score Distillation Sampling</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#how-can-we-sample-in-parameter-space-not-pixel-space">2.1 How can we sample in parameter space, not pixel space?</a></li>
-</ul>
-</li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#the-dreamfusion-algorithm">3. The DreamFusion Algorithm</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#neural-rendering-of-a-3d-model">3.1 Neural Rendering of a 3D Model</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#text-to-3d-synthesis">3.2 Text-to-3D Synthesis</a></li>
-</ul>
-</li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#experiments">4. Experiments</a></li>
-</ul>
-            </nav>
-        </div>
-    </div>
-</div>
-
-              
-                
-<div id="searchbox"></div>
-                <article class="bd-article">
-                  
-  <div class="admonition-information admonition">
-<p class="admonition-title">Information</p>
-<ul class="simple">
-<li><p><strong>Title:</strong> DreamFusion: Text-to-3D using 2D Diffusion</p></li>
-<li><p><strong>Reference</strong></p>
-<ul>
-<li><p>Paper: <a class="reference external" href="https://arxiv.org/abs/2209.14988">https://arxiv.org/abs/2209.14988</a></p></li>
-<li><p>Code:</p>
-<ul>
-<li><p><a class="reference external" href="https://github.com/ashawkey/stable-dreamfusion">NonOfficial</a></p></li>
-</ul>
-</li>
-<li><p>Project Page : <a class="reference external" href="https://dreamfusion3d.github.io/">https://dreamfusion3d.github.io/</a></p></li>
-</ul>
-</li>
-<li><p><strong>Author:</strong> Geonhak Song</p></li>
-<li><p><strong>Last updated on {July. 3, 2024}</strong></p></li>
-</ul>
-</div>
-<section class="tex2jax_ignore mathjax_ignore" id="dreamfusion">
-<h1><strong>DreamFusion</strong><a class="headerlink" href="#dreamfusion" title="Permalink to this heading">#</a></h1>
-<section id="abstract">
-<h2>Abstract<a class="headerlink" href="#abstract" title="Permalink to this heading">#</a></h2>
-<p>DreamFusion = NeRF + Score Distillation Sampling Loss를 활용한 Text-to-3D 모델</p>
-<p><strong>기존 한계</strong></p>
-<p>Text-to-Image 방식과 같이 큰 규모의 데이터로 학습시킨 것과 동일 방식의 3D 생성 모델을 만들기 위해서은 <strong>대규모 label된 3D 데이터셋</strong>과 3D 데이터를 효율적으로 처리할 수 있는 <strong>3D diffusion model이 필요</strong>하지만, 현재 이를 <strong>충족시킬 수 없음</strong>.</p>
-<p><strong>방법론</strong></p>
-<p>DreamFusion은 pretrained 2D Text-to-Image diffusion model을 활용하여 text-to-3D 합성을 수행할 수 있는 새로운 방법 제시.</p>
-<p>본 방법은 <strong>parameteric image generation</strong> 최적화를 위한 prior로 2D diffusion model을 활용할 수 있는 probability density distillation 기반의 loss를 도입.</p>
-<p><strong>결과</strong></p>
-<p>텍스트 기반으로 생성된 3D 모델은 임의의 각도와 조명에서 볼 수 있으며, 임의의 3D 환경에 합성.</p>
-<p>해당 접근 방식을 통해 추가적인 3D 학습 데이터나 image diffusion model의 수정 없이도 효과적으로 동작함.</p>
-</section>
-<section id="introduction">
-<h2>1. Introduction<a class="headerlink" href="#introduction" title="Permalink to this heading">#</a></h2>
-<ul class="simple">
-<li><p>이미지 생성 모델은 text 기반으로 high-fidelity, diverse, controllable 이미지 합성을 지원한다.</p>
-<ul>
-<li><p>가능했던 이유 2가지</p>
-<ul>
-<li><ol class="arabic simple">
-<li><p>large image-text dataset</p></li>
-</ol>
-</li>
-<li><ol class="arabic simple" start="2">
-<li><p>큰 규모의 생성 모델</p></li>
-</ol>
-</li>
-</ul>
-</li>
-</ul>
-</li>
-<li><p>이런 Diffusion Model을 다른 domain에 적용하려는 시도는 성공적이었지만, <strong>많은 규모의 training data가 필요</strong>.</p></li>
-<li><p>3D 합성 분야에서도 3D asset이 요구되지만 이는 이미지 대비 상대적으로 훨씬 더 많은 시간과 노력을 요하는 작업.</p></li>
-<li><p>voxel, point cloud를 비롯한 explicit representation을 위한 방법이나 GAN 기반의 방법을 통해 3D generator를 만들려는 시도 또한 있었으나 이 방법들은 임의의 text를 기반으로 3D object synthesis가 어려운 상황.</p></li>
-<li><p>한편, inverse rendering인 NeRF를 기반 Text 기반 3D 생성 방법에 통합하려는 시도가 많이 있었는데 그중 대표적인 방법이 DreamField 이다.</p></li>
-<li><p>DreamField (CVPR 2022) [<a class="reference external" href="https://arxiv.org/abs/2112.01455">arxiv</a>] [<a class="reference external" href="https://ajayj.com/dreamfields">Official Project page</a>]</p></li>
-</ul>
-<figure class="align-default" id="id1">
-<a class="bg-primary mb-1 reference internal image-reference" href="pics/DreamFusion/dreamfieldfig1.png"><img alt="title_fig" class="bg-primary mb-1" src="pics/DreamFusion/dreamfieldfig1.png" style="width: 800px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 691 </span><span class="caption-text">DreamField Figure 1</span><a class="headerlink" href="#id1" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p>DreamField는 CLIP의 frozen image-text joint embedding model과 optimization기반 방법을 통해 NeRF를 학습.</p></li>
-<li><p>그러나 이 방법은 <strong>부족한 현실성과 정확성에 대한 한계</strong>를 보임.</p></li>
-<li><p>본 저자들은 Dream Fields에서 사용한 CLIP 대신 2D Diffusion model로부터 distill된 loss를 활용하여 사용.</p></li>
-<li><p>이는 <strong>probability density distillation</strong> 기반으로 구성되는데, 이는 <strong>forward process의 diffusion 기반의 공유 평균을 가지는</strong> <strong>Gaussian distribution</strong>과 <strong>pretrained diffusion model로부터 학습된</strong> <strong>score function</strong>간의 <strong>KL divergence</strong>를 <strong>최소화</strong>하는 방향을 의미.</p></li>
-<li><p><strong>Score Distillation Sampling(SDS)방법</strong>은 미분가능한 image parameterization을 기반으로 sampling 최적화가 가능하게 함.</p></li>
-<li><p>즉, NeRF와 SDS를 결합함으로써  Text prompt가 입력으로 주어진다면, DramFusion은 고품질이며 일관성있는 3D object와 scene들을 만들어낼 수 있다.</p></li>
-</ul>
-<figure class="align-default" id="id2">
-<a class="bg-primary mb-1 reference internal image-reference" href="pics/DreamFusion/DreamFusionfig1.png"><img alt="title_fig" class="bg-primary mb-1" src="pics/DreamFusion/DreamFusionfig1.png" style="width: 800px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 692 </span><span class="caption-text">DreamFusion Figure 1</span><a class="headerlink" href="#id2" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<section id="diffusion-models-and-score-distillation-sampling">
-<h3>2. Diffusion Models and Score Distillation Sampling<a class="headerlink" href="#diffusion-models-and-score-distillation-sampling" title="Permalink to this heading">#</a></h3>
-<p>Data :  <span class="math notranslate nohighlight">\(x\)</span></p>
-<p>Forward process : <span class="math notranslate nohighlight">\(q\)</span></p>
-<p>Reverse process : <span class="math notranslate nohighlight">\(p\)</span></p>
-<p>Perturbed latent at timestep <span class="math notranslate nohighlight">\(t\)</span>: <span class="math notranslate nohighlight">\(z_t\)</span></p>
-<p>Marginal distribution of the latent variables at timestep <span class="math notranslate nohighlight">\(t\)</span> given an initial datapoint <span class="math notranslate nohighlight">\(x\)</span> :
-<span class="math notranslate nohighlight">\(q(z_t | x ) = \mathcal{N} (\alpha_t x ,\sigma_t^2 \mathbf{I})\)</span></p>
-<p>Generative model : <span class="math notranslate nohighlight">\(p\)</span></p>
-<p>starting Random Noise : <span class="math notranslate nohighlight">\(p(z_T) = \mathcal{N} (0, \mathbf{I})\)</span></p>
-<p>Transition <span class="math notranslate nohighlight">\(p_\phi (z_{t-1} | z_t ) = q(z_{t-1} |z_t, x = \hat{x_\phi} (z_t; t))\)</span></p>
-<p>Posterior dist from forward process :  <span class="math notranslate nohighlight">\(q(z_{t-1} |z_t, x)\)</span></p>
-<p>A learned approximation of the optimal denoiser : <span class="math notranslate nohighlight">\(\hat{x_\phi} (z_t; t)\)</span></p>
-<p>Latent : <span class="math notranslate nohighlight">\(z_t : \mathbb{E} [x|z_t] \approx \hat{x_\phi} (z_t; t) = (z_t - \sigma_t \epsilon_\phi (z_t ; t ) / \alpha_t)\)</span></p>
-<p>예측된 noise는 smooth density에 대한 예측된 score function (<span class="math notranslate nohighlight">\(\nabla_{z_t} log p(z_t)\)</span>)과 연관</p>
-<p>ELBO로 생성 모델 학습은 <span class="math notranslate nohighlight">\(\phi\)</span> parameter를 활용한 weighted denoising score matching objective로 간소화 가능</p>
-<figure class="align-default" id="id3">
-<a class="bg-primary mb-1 reference internal image-reference" href="pics/DreamFusion/DreamFusioneq1.png"><img alt="title_fig" class="bg-primary mb-1" src="pics/DreamFusion/DreamFusioneq1.png" style="width: 800px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 693 </span><span class="caption-text">DreamFusion Equation 1</span><a class="headerlink" href="#id3" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>Diffusion 학습 (2가지 관점)</p>
-<ol class="arabic simple">
-<li><p>latent -variable model 학습</p></li>
-<li><p>noise data에 상응하는 score function 학습</p></li>
-</ol>
-<ul class="simple">
-<li><p>score function이 <span class="math notranslate nohighlight">\(s_\phi (z_t ;t ) = -\epsilon_\phi (z_t ; t) / \sigma_t\)</span>로 주어졌을 때, marginal dist 근사하는 <span class="math notranslate nohighlight">\(p_\phi (z_t;t)\)</span> 사용</p></li>
-</ul>
-<p>Text-to-image diffusion model (text embedding : <span class="math notranslate nohighlight">\(y\)</span>)</p>
-<p>예측해야하는 noise 값 : <span class="math notranslate nohighlight">\(\epsilon_\phi (z_T ; t, y)\)</span></p>
-<p>CFG : <span class="math notranslate nohighlight">\(w : \epsilon_\phi (z_T ; t, y) = (1+w) \epsilon_\phi (z_T ; t, y) - w \epsilon_\phi (z_T ; t)\)</span></p>
-<p>CFG는 score function을 conditional density가 unconditional density에 비해 상대적으로 큰 영역을 선호하도록 조정. Diversity를 희생하여 sample fidelity 향상</p>
-</section>
-<section id="how-can-we-sample-in-parameter-space-not-pixel-space">
-<h3>2.1 How can we sample in parameter space, not pixel space?<a class="headerlink" href="#how-can-we-sample-in-parameter-space-not-pixel-space" title="Permalink to this heading">#</a></h3>
-<ul class="simple">
-<li><p>저자들은 pixel 기반 diffusion model에서 sampling 되는 pixel sampling에는 관심이 없고, <strong>랜덤한 각도에서 rendering할 때, 좋은 이미지를 만들어낼 수 있는 3D 모델을 생성하는데 관심이 있음.</strong></p></li>
-<li><p>이와 같은 모델을 **DIP (Differentiable image parameterization)**라 분류함.</p></li>
-<li><p>즉, 미분가능한 generator <span class="math notranslate nohighlight">\(g\)</span>는 parameter <span class="math notranslate nohighlight">\(\theta\)</span>를 통해 image **<span class="math notranslate nohighlight">\(x=g(\theta)\)</span>**로 변환할 수 있음.</p></li>
-<li><p>DIP를 활용하여 기존 학습된 공간을 최적화 알고리즘을 통해 학습시킬 수 있는데, 3D에서는 3D volume의 parameter로 <span class="math notranslate nohighlight">\(\theta\)</span>, volumetric renderer <span class="math notranslate nohighlight">\(g\)</span>로 지정할 수 있다.</p></li>
-<li><p>다만, 해당 parameter들을 학습시키기 위해서, <strong>diffusion model을 적용할 수 있는 loss function이 필요</strong>.</p></li>
-<li><p><strong><span class="math notranslate nohighlight">\(x=g(\theta)\)</span></strong> 가 frozen diffusion model로부터 sample 결과인 것같은 <span class="math notranslate nohighlight">\(\theta\)</span> 최적화를 진행 희망.</p></li>
-<li><p>여기서 저자들은 DeepDream과 비슷한 스타일의 differentiable loss func이 필요. 즉, 신뢰도 높은 이미지는 loss가 적고, 신뢰도 적은 이미지에서는 loss가 높게 설정.</p></li>
-<li><p>초기 diffusion training loss 재사용했으나 realistic sample 생성이 안 됨.</p></li>
-<li><p>아래 식 : <span class="math notranslate nohighlight">\(\mathcal{L}\)</span> Gradient에 대한 식</p></li>
-</ul>
-<figure class="align-default" id="id4">
-<a class="bg-primary mb-1 reference internal image-reference" href="pics/DreamFusion/DreamFusioneq2.png"><img alt="title_fig" class="bg-primary mb-1" src="pics/DreamFusion/DreamFusioneq2.png" style="width: 800px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 694 </span><span class="caption-text">DreamFusion Equation 2</span><a class="headerlink" href="#id4" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p>일반적으로 U-Net Jacobian term은 계산 비용이 많이 사용되고, 작은 noise에 대해 제대로 작동되지 않음.</p></li>
-<li><p>저자들은 <strong>U-Net Jacobian term을 생략</strong>함으로써 diffusion model을 사용한 DIP 최적화에 효과적인 gradient로 유도할 수 있음을 발견.</p></li>
-</ul>
-<figure class="align-default" id="id5">
-<a class="bg-primary mb-1 reference internal image-reference" href="pics/DreamFusion/DreamFusioneq3.png"><img alt="title_fig" class="bg-primary mb-1" src="pics/DreamFusion/DreamFusioneq3.png" style="width: 800px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 695 </span><span class="caption-text">DreamFusion Equation 3</span><a class="headerlink" href="#id5" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p>이를 통해 해당 loss는 높은 density 영역으로 이동하기 위해 diffusion 모델의 score function을 따르는 방향으로 update.</p></li>
-<li><p>Appendix A.4에서 diffusion model의 학습된 score function을 사용하여 weighted probability density distillation loss의 gradient임을 보여줌.</p></li>
-</ul>
-<figure class="align-default" id="id6">
-<a class="bg-primary mb-1 reference internal image-reference" href="pics/DreamFusion/DreamFusioneq4.png"><img alt="title_fig" class="bg-primary mb-1" src="pics/DreamFusion/DreamFusioneq4.png" style="width: 800px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 696 </span><span class="caption-text">DreamFusion Equation 4</span><a class="headerlink" href="#id6" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p>쉽게 적용 가능하고 diffusion model의 backpropagation이 필요 없음.</p></li>
-</ul>
-<figure class="align-default" id="id7">
-<a class="bg-primary mb-1 reference internal image-reference" href="pics/DreamFusion/DreamFusionfig2.png"><img alt="title_fig" class="bg-primary mb-1" src="pics/DreamFusion/DreamFusionfig2.png" style="width: 800px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 697 </span><span class="caption-text">DreamFusion Figure 2</span><a class="headerlink" href="#id7" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<figure class="align-default" id="id8">
-<a class="bg-primary mb-1 reference internal image-reference" href="pics/DreamFusion/DreamFusionfig8.png"><img alt="title_fig" class="bg-primary mb-1" src="pics/DreamFusion/DreamFusionfig8.png" style="width: 800px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 698 </span><span class="caption-text">DreamFusion Figure 8</span><a class="headerlink" href="#id8" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</section>
-</section>
-<section id="the-dreamfusion-algorithm">
-<h2>3. The DreamFusion Algorithm<a class="headerlink" href="#the-dreamfusion-algorithm" title="Permalink to this heading">#</a></h2>
-<figure class="align-default" id="id9">
-<a class="bg-primary mb-1 reference internal image-reference" href="pics/DreamFusion/DreamFusionfig3.png"><img alt="title_fig" class="bg-primary mb-1" src="pics/DreamFusion/DreamFusionfig3.png" style="width: 800px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 699 </span><span class="caption-text">DreamFusion Figure 3</span><a class="headerlink" href="#id9" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>Text를 기반한 3D asset을 만드는 알고리즘 소개 단계</p>
-<p>Imagen 중 64x64 base model만 수정없이 사용</p>
-<section id="neural-rendering-of-a-3d-model">
-<h3>3.1 Neural Rendering of a 3D Model<a class="headerlink" href="#neural-rendering-of-a-3d-model" title="Permalink to this heading">#</a></h3>
-<figure class="align-default" id="id10">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/nerf.png"><img alt="title_fig" class="bg-primary mb-1" src="../../_images/nerf.png" style="width: 800px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 700 </span><span class="caption-text">NeRF Figure</span><a class="headerlink" href="#id10" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<figure class="align-default" id="id11">
-<a class="bg-primary mb-1 reference internal image-reference" href="pics/DreamFusion/DreamFusioneq5.png"><img alt="title_fig" class="bg-primary mb-1" src="pics/DreamFusion/DreamFusioneq5.png" style="width: 800px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 701 </span><span class="caption-text">DreamFusion Equation 5</span><a class="headerlink" href="#id11" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>NeRF 구성 2가지 : volumetic raytracer &amp; MLP</p>
-<p>NeRF로부터 이미지를 Rendering하기 위해 ray casting.</p>
-<p>각 ray를 따라 샘플된 3D points <span class="math notranslate nohighlight">\(\mu\)</span>들을 MLP에 통과시켜 4개의 스칼라 output 획득.</p>
-<p>: (volumetric density <span class="math notranslate nohighlight">\(\tau\)</span>, RGB color <span class="math notranslate nohighlight">\(c\)</span> ( alpha compositing )</p>
-<figure class="align-default" id="id12">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/mipnerf.png"><img alt="title_fig" class="bg-primary mb-1" src="../../_images/mipnerf.png" style="width: 800px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 702 </span><span class="caption-text">MipNeRF Figure</span><a class="headerlink" href="#id12" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>본 방법에서는 <strong>mip-NeRF 360</strong> 사용(aliasing 감소 특화)</p>
-<p><img alt="mipnerf.png" src="docs/review/DreamFusion%209ae18c9837354d3f880f3ecd6f4e1bc3/mipnerf.png" /></p>
-<p><strong>Shading</strong></p>
-<p>일반적인 radiance를 내보내는 NeRF와 달리, 본 논문에서는 각 point별 RGB albedo <span class="math notranslate nohighlight">\(\rho\)</span> 사용</p>
-<figure class="align-default" id="id13">
-<a class="bg-primary mb-1 reference internal image-reference" href="pics/DreamFusion/DreamFusioneq6.png"><img alt="title_fig" class="bg-primary mb-1" src="pics/DreamFusion/DreamFusioneq6.png" style="width: 800px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 703 </span><span class="caption-text">DreamFusion Equation 6</span><a class="headerlink" href="#id13" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p><span class="math notranslate nohighlight">\(\tau\)</span> : volumetric density</p>
-<p>3D 포인트에 대한 마지막 shaded output color 계산을 위해서는 normal vector가 필요.</p>
-<p>normal vector는 3D coordinate <span class="math notranslate nohighlight">\(\mu\)</span> 관점에 대해서 density <span class="math notranslate nohighlight">\(\tau\)</span> 에 대한 negative gradient 를 normalizing을 통해 계산될 수 있음.</p>
-<p><span class="math notranslate nohighlight">\(n = - \nabla_\mu \tau / \lVert \nabla_\mu \tau \rVert\)</span></p>
-<p>normal : <span class="math notranslate nohighlight">\(n\)</span></p>
-<p>material albedo : <span class="math notranslate nohighlight">\(\rho\)</span></p>
-<p>some point light source with 3D coordinate : <span class="math notranslate nohighlight">\(l\)</span> &amp; color <span class="math notranslate nohighlight">\(l_\rho\)</span></p>
-<p>ambient light color : <span class="math notranslate nohighlight">\(l_a\)</span></p>
-<figure class="align-default" id="id14">
-<a class="bg-primary mb-1 reference internal image-reference" href="pics/DreamFusion/DreamFusioneq7.png"><img alt="title_fig" class="bg-primary mb-1" src="pics/DreamFusion/DreamFusioneq7.png" style="width: 800px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 704 </span><span class="caption-text">DreamFusion Equation 7</span><a class="headerlink" href="#id14" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>추가 발견 내용 : 랜덤하게 albedo color <span class="math notranslate nohighlight">\(\rho\)</span>를 white (1,1,1)로 교체하여 textureless 음영 처리 결과물 생성할 수 있음.</p>
-<p>모델이 퇴화된 솔루션을 만드는 것을 방지하는 데 유익</p>
-<p><strong>Scene Structure</strong></p>
-<p><strong>1) 고정된 Bounding sphere</strong>: NeRF 모델의 query를 고정된 bounding sphere 내에서만 수행하여 밀도가 카메라 근처에 채워지지 않도록 함.</p>
-<p><strong>2) 추가 환경 맵 생성</strong>: 두 번째 MLP를 사용하여 배경 색상을 계산하고, 렌더링된 색상을 배경 색상 위에 합성.</p>
-<p><strong>3) 누적된 alpha 값 활용</strong>: 누적된 alpha 값을 통해 배경과 렌더링된 광선 색상을 자연스럽게 합성.</p>
-<p><strong>Geometry regularizers</strong></p>
-<p><strong>1)</strong> <strong>opacity에 대한 regularization penalty 추가</strong> : Mip-NeRF 360 모델에서 빈 공간에 대한 불필요한 채움을 방지하기 위해 진행. (Zero-shot text-guided object generation with dream fields. CVPR 2022)</p>
-<p><strong>2) Ref-NeRF에서 제안된 orientation loss의 수정 버전을 사용</strong> : Density field에서 normal vector가 camera로부터 멀어지는 문제 방지를 위해 제안.</p>
-<p>Appendix A.2 참조</p>
-</section>
-<section id="text-to-3d-synthesis">
-<h3>3.2 Text-to-3D Synthesis<a class="headerlink" href="#text-to-3d-synthesis" title="Permalink to this heading">#</a></h3>
-<p>각 text prompt에 대해 NeRF 초기 랜덤 초기화</p>
-<figure class="align-default" id="id15">
-<a class="bg-primary mb-1 reference internal image-reference" href="pics/DreamFusion/DreamFusionfig3_fig.png"><img alt="title_fig" class="bg-primary mb-1" src="pics/DreamFusion/DreamFusionfig3_fig.png" style="width: 800px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 705 </span><span class="caption-text">DreamFusion Figure 3-1</span><a class="headerlink" href="#id15" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>DreamFusion의 각 iter 최적화</p>
-<p>(1) randomly sample a camera and light</p>
-<p>(2) render an image of the NeRF from that camera and shade with the light</p>
-<p>(3) compute gradients of the SDS loss with respect to the NeRF parameters</p>
-<p>(4) update the NeRF parameters using an optimizer</p>
-<p><strong>1. Random camera and light sampling</strong></p>
-<figure class="align-default" id="id16">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/spherical_coord.png"><img alt="title_fig" class="bg-primary mb-1" src="../../_images/spherical_coord.png" style="width: 400px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 706 </span><span class="caption-text">Spherical Coordinate Figure</span><a class="headerlink" href="#id16" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>매 iteration, camera position은 spherical coordinate에서 무작위로 sample됨.</p>
-<p>elevation angle : <span class="math notranslate nohighlight">\(\phi_{cam} \in [-10, 90]\)</span></p>
-<p>azimuth angle : <span class="math notranslate nohighlight">\(\theta_{cam} \in [0, 360]\)</span></p>
-<p>origin으로부터 distance : <span class="math notranslate nohighlight">\([1, 1.5]\)</span></p>
-<p>focal length multiplier : <span class="math notranslate nohighlight">\(\lambda_{focal} \in \mathcal{U} (0.7, 1.35)\)</span></p>
-<p>focal length : <span class="math notranslate nohighlight">\(\lambda_{focal} w\)</span>  (<span class="math notranslate nohighlight">\(w\)</span> =64)</p>
-<p>point light position <span class="math notranslate nohighlight">\(l\)</span>은 camera position 중심 주변 분포에서 sample</p>
-<p>다양한 camera location &amp; distance 사용</p>
-<p><strong>2. Rendering.</strong></p>
-<p>Camera pose와 light position이 주어졌을 때, 64x64 해상도의 shaded NeRF model를 render</p>
-<p>Rendering 세 가지 옵션 중 하나를 무작위로 선택:</p>
-<p><strong>1) 조명이 적용된 색상 렌더링 (illuminated color render)</strong>: 조명이 적용된 상태에서의 색상 렌더링 (Fig 3. Color)</p>
-<p><strong>2) textureless render</strong>: 텍스처 없이 음영 처리된 상태로 렌더링. ( Fig 3.  Normal?)</p>
-<p><strong>3) rendering of the albedo</strong>: 음영 없이 알베도 색상만을 렌더링. ( Fig 3.  Albedo)</p>
-<p><strong>3. Diffusion loss with view-dependent conditioning</strong></p>
-<p>view-dependent text를 추가하는 것이 효과적</p>
-<p>높은 고도 각도 <span class="math notranslate nohighlight">\(\phi_{cam} &gt; 60^{\circ}\)</span> 일 때, “overhead view”</p>
-<p><span class="math notranslate nohighlight">\(\phi_{cam} &lt; 60^{\circ}\)</span> 일 때, azimuth angle <span class="math notranslate nohighlight">\(\theta_{cam}\)</span>에 따라 “front view”, “side view”, “back view” text embedding</p>
-<p>pretrained 64x64 base text-to-image model (Imagen)</p>
-<p>T5-XXL text embedding</p>
-<p>weighting function <span class="math notranslate nohighlight">\(w(t) = \sigma_t^2\)</span></p>
-<p>sample <span class="math notranslate nohighlight">\(t \sim \mathcal{U} (0.02, 0.98)\)</span> 너무 높거나 낮은 noise level 피하기 위해 설정.</p>
-<p>CFG <span class="math notranslate nohighlight">\(w\)</span>=100, 높은 guidance weight가 향상된 sample quality를 줌</p>
-<p><strong>4. Optimization</strong></p>
-<p>TPUv4 (4 chips)</p>
-<p>15,000 iters, 1.5h</p>
-<p>Appendix A.2 optimization setting</p>
-</section>
-</section>
-<section id="experiments">
-<h2>4. Experiments<a class="headerlink" href="#experiments" title="Permalink to this heading">#</a></h2>
-<figure class="align-default" id="id17">
-<a class="bg-primary mb-1 reference internal image-reference" href="pics/DreamFusion/DreamFusionfig4.png"><img alt="title_fig" class="bg-primary mb-1" src="pics/DreamFusion/DreamFusionfig4.png" style="width: 800px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 707 </span><span class="caption-text">DreamFusion Figure 4</span><a class="headerlink" href="#id17" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<figure class="align-default" id="id18">
-<a class="bg-primary mb-1 reference internal image-reference" href="pics/DreamFusion/DreamFusiontable1.png"><img alt="title_fig" class="bg-primary mb-1" src="pics/DreamFusion/DreamFusiontable1.png" style="width: 400px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 708 </span><span class="caption-text">DreamFusion Table 1</span><a class="headerlink" href="#id18" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p><strong>일반적인 3D 복원 작업 평가</strong>:</p>
-<ul>
-<li><p>Chamfer Distance와 같은 참조 기반 평가 방법 사용.</p></li>
-<li><p>PSNR은 보유된 사진과 렌더링된 보기의 품질을 비교.</p></li>
-</ul>
-</li>
-<li><p><strong>Zero-shot 텍스트-3D 생성 평가의 어려움</strong>:</p>
-<ul>
-<li><p>GT가 없어 참조 기반 평가 적용 어려움.</p></li>
-</ul>
-</li>
-<li><p><strong>대안적 평가 방법 CLIP R-Precision</strong>:</p>
-<ul>
-<li><p>CLIP R-Precision은 rendering된 장면들이 주어졌을 때 입력 캡션과 일치하는 비율을 나타냄. 특정 문장을 여러 명령어 세트 중에서 정확히 찾는지 평가.</p></li>
-<li><p>object-centric COCO validation subset에서 153개 프롬프트 사용.</p></li>
-</ul>
-</li>
-<li><p><strong>Geo(Geometry) 평가</strong>:</p>
-<ul>
-<li><p>기하학적 평가를 위해 textureless render에 대한 R-Precision 측정</p></li>
-</ul>
-</li>
-</ul>
-<figure class="align-default" id="id19">
-<a class="bg-primary mb-1 reference internal image-reference" href="pics/DreamFusion/DreamFusionfig5.png"><img alt="title_fig" class="bg-primary mb-1" src="pics/DreamFusion/DreamFusionfig5.png" style="width: 800px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 709 </span><span class="caption-text">DreamFusion Figure 5</span><a class="headerlink" href="#id19" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p><strong>Ablation</strong></p>
-<figure class="align-default" id="id20">
-<a class="bg-primary mb-1 reference internal image-reference" href="pics/DreamFusion/DreamFusionfig6.png"><img alt="title_fig" class="bg-primary mb-1" src="pics/DreamFusion/DreamFusionfig6.png" style="width: 800px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 710 </span><span class="caption-text">DreamFusion Figure 6</span><a class="headerlink" href="#id20" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p><strong>ViewAug (다양한 시야각):</strong> 다양한 시야각을 고려하는 것이 3D 장면의 일관성을 높임.</p></li>
-<li><p><strong>ViewDep (뷰 의존적 prompt):</strong> prompt에 시야각 관련 정보를 추가하여 정확한 geometry 복원.</p></li>
-<li><p><strong>Lighting (조명 최적화):</strong> 무채색 albedo rendering 외에 lighting rendering optimization</p></li>
-<li><p><strong>Textureless :</strong> 매끄러운 표면을 만듦.</p></li>
-</ul>
-<p><strong>제한 사항:</strong></p>
-<ul class="simple">
-<li><p><strong>SDS의 한계</strong></p></li>
-<li><p><strong>세밀한 디테일 부족</strong></p></li>
-<li><p><strong>3D 복원이 근본적으로 어렵다</strong></p></li>
-</ul>
-</section>
-</section>
-
-    <script type="text/x-thebe-config">
-    {
-        requestKernel: true,
-        binderOptions: {
-            repo: "binder-examples/jupyter-stacks-datascience",
-            ref: "master",
-        },
-        codeMirrorConfig: {
-            theme: "abcdef",
-            mode: "python"
-        },
-        kernelOptions: {
-            name: "python3",
-            path: "./docs/review"
-        },
-        predefinedOutput: true
-    }
-    </script>
-    <script>kernelName = 'python3'</script>
-
-                </article>
-              
-
-              
-              
-              
-              
-                <footer class="prev-next-footer d-print-none">
-                  
-<div class="prev-next-area">
-    <a class="left-prev"
-       href="Shap-E.html"
-       title="previous page">
-      <i class="fa-solid fa-angle-left"></i>
-      <div class="prev-next-info">
-        <p class="prev-next-subtitle">previous</p>
-        <p class="prev-next-title">Shap-E</p>
-      </div>
-    </a>
-    <a class="right-next"
-       href="magic-3d.html"
-       title="next page">
-      <div class="prev-next-info">
-        <p class="prev-next-subtitle">next</p>
-        <p class="prev-next-title">Magic3D</p>
-      </div>
-      <i class="fa-solid fa-angle-right"></i>
-    </a>
-</div>
-                </footer>
-              
-            </div>
-            
-            
-              
-                <div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
-
-
-  <div class="sidebar-secondary-item">
-  <div class="page-toc tocsection onthispage">
-    <i class="fa-solid fa-list"></i> Contents
-  </div>
-  <nav class="bd-toc-nav page-toc">
-    <ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#abstract">Abstract</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">1. Introduction</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#diffusion-models-and-score-distillation-sampling">2. Diffusion Models and Score Distillation Sampling</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#how-can-we-sample-in-parameter-space-not-pixel-space">2.1 How can we sample in parameter space, not pixel space?</a></li>
-</ul>
-</li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#the-dreamfusion-algorithm">3. The DreamFusion Algorithm</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#neural-rendering-of-a-3d-model">3.1 Neural Rendering of a 3D Model</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#text-to-3d-synthesis">3.2 Text-to-3D Synthesis</a></li>
-</ul>
-</li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#experiments">4. Experiments</a></li>
-</ul>
-  </nav></div>
-
-</div></div>
-              
-            
-          </div>
-          <footer class="bd-footer-content">
-            
-<div class="bd-footer-content__inner container">
-  
-  <div class="footer-item">
-    
-<p class="component-author">
-By PseudoLab
-</p>
-
-  </div>
-  
-  <div class="footer-item">
-    
-
-  <p class="copyright">
-    
-      © Copyright 2022.
-      <br/>
-    
-  </p>
-
-  </div>
-  
-  <div class="footer-item">
-    
-  </div>
-  
-  <div class="footer-item">
-    
-  </div>
-  
-</div>
-          </footer>
-        
-
-      </main>
-    </div>
-  </div>
-  
-  <!-- Scripts loaded after <body> so the DOM is not blocked -->
-  <script src="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b"></script>
-<script src="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b"></script>
-
-  <footer class="bd-footer">
-  </footer>
-  </body>
+
+
+<!DOCTYPE html>
+
+
+<html lang="en" data-content_root="" >
+
+  <head>
+    <meta charset="utf-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.17.1: http://docutils.sourceforge.net/" />
+
+    <title>DreamFusion &#8212; Text-to-Image Generation-feat-Diffusion</title>
+  
+  
+  
+  <script data-cfasync="false">
+    document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
+    document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
+  </script>
+  
+  <!-- Loaded before other Sphinx assets -->
+  <link href="../../_static/styles/theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+<link href="../../_static/styles/bootstrap.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+<link href="../../_static/styles/pydata-sphinx-theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+
+  
+  <link href="../../_static/vendor/fontawesome/6.5.2/css/all.min.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+  <link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.woff2" />
+<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.woff2" />
+<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.woff2" />
+
+    <link rel="stylesheet" type="text/css" href="../../_static/pygments.css" />
+    <link rel="stylesheet" href="../../_static/styles/sphinx-book-theme.css?digest=14f4ca6b54d191a8c7657f6c759bf11a5fb86285" type="text/css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/togglebutton.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/mystnb.4510f1fc1dee50b3e5859aac5469c37c29e427902b24a333a5f9fcb2f0b3ac41.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/sphinx-thebe.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/design-style.4045f2051d55cab465a707391d5b2007.min.css" />
+  
+  <!-- Pre-loaded scripts that we'll load fully later -->
+  <link rel="preload" as="script" href="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b" />
+<link rel="preload" as="script" href="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b" />
+  <script src="../../_static/vendor/fontawesome/6.5.2/js/all.min.js?digest=dfe6caa3a7d634c4db9b"></script>
+
+    <script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
+    <script src="../../_static/jquery.js"></script>
+    <script src="../../_static/underscore.js"></script>
+    <script src="../../_static/_sphinx_javascript_frameworks_compat.js"></script>
+    <script src="../../_static/doctools.js"></script>
+    <script src="../../_static/clipboard.min.js"></script>
+    <script src="../../_static/copybutton.js"></script>
+    <script src="../../_static/scripts/sphinx-book-theme.js?digest=5a5c038af52cf7bc1a1ec88eea08e6366ee68824"></script>
+    <script>let toggleHintShow = 'Click to show';</script>
+    <script>let toggleHintHide = 'Click to hide';</script>
+    <script>let toggleOpenOnPrint = 'true';</script>
+    <script src="../../_static/togglebutton.js"></script>
+    <script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
+    <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
+    <script src="../../_static/design-tabs.js"></script>
+    <script>const THEBE_JS_URL = "https://unpkg.com/thebe@0.8.2/lib/index.js"
+const thebe_selector = ".thebe,.cell"
+const thebe_selector_input = "pre"
+const thebe_selector_output = ".output, .cell_output"
+</script>
+    <script async="async" src="../../_static/sphinx-thebe.js"></script>
+    <script>window.MathJax = {"options": {"processHtmlClass": "tex2jax_process|mathjax_process|math|output_area"}}</script>
+    <script defer="defer" src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
+    <script>DOCUMENTATION_OPTIONS.pagename = 'docs/review/DreamFusion';</script>
+    <link rel="shortcut icon" href="../../_static/PseudoLab_logo.png"/>
+    <link rel="index" title="Index" href="../../genindex.html" />
+    <link rel="search" title="Search" href="../../search.html" />
+    <link rel="next" title="Magic3D" href="magic-3d.html" />
+    <link rel="prev" title="Shap-E" href="Shap-E.html" />
+  <meta name="viewport" content="width=device-width, initial-scale=1"/>
+  <meta name="docsearch:language" content="en"/>
+  </head>
+  
+  
+  <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
+
+  
+  
+  <div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
+  
+  <div id="pst-scroll-pixel-helper"></div>
+  
+  <button type="button" class="btn rounded-pill" id="pst-back-to-top">
+    <i class="fa-solid fa-arrow-up"></i>Back to top</button>
+
+  
+  <input type="checkbox"
+          class="sidebar-toggle"
+          id="pst-primary-sidebar-checkbox"/>
+  <label class="overlay overlay-primary" for="pst-primary-sidebar-checkbox"></label>
+  
+  <input type="checkbox"
+          class="sidebar-toggle"
+          id="pst-secondary-sidebar-checkbox"/>
+  <label class="overlay overlay-secondary" for="pst-secondary-sidebar-checkbox"></label>
+  
+  <div class="search-button__wrapper">
+    <div class="search-button__overlay"></div>
+    <div class="search-button__search-container">
+<form class="bd-search d-flex align-items-center"
+      action="../../search.html"
+      method="get">
+  <i class="fa-solid fa-magnifying-glass"></i>
+  <input type="search"
+         class="form-control"
+         name="q"
+         id="search-input"
+         placeholder="Search this book..."
+         aria-label="Search this book..."
+         autocomplete="off"
+         autocorrect="off"
+         autocapitalize="off"
+         spellcheck="false"/>
+  <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
+</form></div>
+  </div>
+
+  <div class="pst-async-banner-revealer d-none">
+  <aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
+</div>
+
+  
+    <header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
+    </header>
+  
+
+  <div class="bd-container">
+    <div class="bd-container__inner bd-page-width">
+      
+      
+      
+      <div class="bd-sidebar-primary bd-sidebar">
+        
+
+  
+  <div class="sidebar-header-items sidebar-primary__section">
+    
+    
+    
+    
+  </div>
+  
+    <div class="sidebar-primary-items__start sidebar-primary__section">
+        <div class="sidebar-primary-item">
+
+  
+    
+  
+
+<a class="navbar-brand logo" href="../../intro.html">
+  
+  
+  
+  
+  
+    
+    
+      
+    
+    
+    <img src="../../_static/PseudoLab_logo.png" class="logo__image only-light" alt="Text-to-Image Generation-feat-Diffusion - Home"/>
+    <script>document.write(`<img src="../../_static/PseudoLab_logo.png" class="logo__image only-dark" alt="Text-to-Image Generation-feat-Diffusion - Home"/>`);</script>
+  
+  
+</a></div>
+        <div class="sidebar-primary-item"><nav class="bd-links" id="bd-docs-nav" aria-label="Main">
+    <div class="bd-toc-item navbar-nav active">
+        
+        <ul class="nav bd-sidenav bd-sidenav__home-link">
+            <li class="toctree-l1">
+                <a class="reference internal" href="../../intro.html">
+                    Welcome to PseudoDiffusers!!
+                </a>
+            </li>
+        </ul>
+        <p aria-level="2" class="caption" role="heading"><span class="caption-text">Preliminary Works</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="vae.html">VAE</a></li>
+<li class="toctree-l1"><a class="reference internal" href="gan.html">GAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DDPM.html">DDPM</a></li>
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DDIM.html">DDIM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="A_Study_on_the_Evaluation_of_Generative_Models.html">A Study on the Evaluation of Generative Models</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Image Generation</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="cycleGAN.html">CycleGAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="StyleGAN.html">StyleGAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="diffusion_beats_GANs.html">Diffusion Models Beat GANs on Image Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="dalle.html">DALL-E</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DALLE2.html">DALL-E 2</a></li>
+<li class="toctree-l1"><a class="reference internal" href="dreambooth.html">DreamBooth</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ControlNet.html">ControlNet</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Latent_Diffusion_Model.html">Introduction</a></li>
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Textual_Inversion.html">Textual Inversion</a></li>
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="CustomDiffusion.html">Custom Diffusion</a></li>
+<li class="toctree-l1"><a class="reference internal" href="LoRA.html">LoRA</a></li>
+
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="I-DDPM.html">I-DDPM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="StyO.html">StyO</a></li>
+<li class="toctree-l1"><a class="reference internal" href="imagen.html">Imagen</a></li>
+<li class="toctree-l1"><a class="reference internal" href="imagen_editor.html">Imagen Editor</a></li>
+<li class="toctree-l1"><a class="reference internal" href="SDEdit.html">SDEdit</a></li>
+<li class="toctree-l1"><a class="reference internal" href="SDXL.html">SDXL</a></li>
+<li class="toctree-l1"><a class="reference internal" href="t2i_adapter.html">T2I-Adapter</a></li>
+<li class="toctree-l1"><a class="reference internal" href="IP_Adapter.html">IP-Adapter</a></li>
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="HyperDreamBooth.html">HyperDreamBooth</a></li>
+<li class="toctree-l1"><a class="reference internal" href="CM3leon.html">CM3leon</a></li>
+
+<li class="toctree-l1"><a class="reference internal" href="Synthetic_Data_from_Diffusion_Models_Improves_ImageNet_Classification.html">Synthetic Data from Diffusion Models Improves ImageNet Classification</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="GLIDE.html">GLIDE</a></li>
+<li class="toctree-l1"><a class="reference internal" href="BBDM.html">BBDM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Your_Diffusion_Model_is_Secretly_a_Zero_Shot_Classifier.html">Your Diffusion Model is Secretly a Zero-Shot Classifier</a></li>
+<li class="toctree-l1"><a class="reference internal" href="progressive_distillation.html">Progressive Distillation for Fast Sampling of Diffusion Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ConceptLab.html">ConceptLab</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Diffusion_models_already_have_a_Semantic_Latent_Space.html">Diffusion Models already have a Semantic Latent Space</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Muse.html">Muse</a></li>
+
+
+<li class="toctree-l1"><a class="reference internal" href="GIGAGAN.html">Scaling up GANs for Text-to-Image Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="consistency_models.html">Consistency Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="latent_consistency_models.html">Latent Consistency Models</a></li>
+
+<li class="toctree-l1"><a class="reference internal" href="LLM_grounded_Diffusion.html">LLM Grounded Diffusion</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DiT.html">DiT</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="one-step-image-translation.html">One-Step Image Translation with Text-to-Image Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="LCM-LoRA.html">LCM-LoRA: A Universal Stable-Diffusion Acceleration Module</a></li>
+
+
+<li class="toctree-l1"><a class="reference internal" href="MimicBrush.html">MimicBrush: Zero-shot Image Editing with Reference Imitation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="one_step_diffusion_with_distribution_matching_distillation.html">One-step Diffusion with Distribution Matching Distillation</a></li>
+
+
+
+
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Video Generation</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="Make_A_Video.html">Make A Video</a></li>
+<li class="toctree-l1"><a class="reference internal" href="VideoLDM.html">VideoLDM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="AnimateDiff.html">AnimateDiff</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Animate_Anyone.html">Animate Anyone</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreaMoving.html">DreaMoving</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreamPose.html">DreamPose: Fashion Image-to-Video Synthesis via Stable Diffusion</a></li>
+
+
+
+
+
+
+
+
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">3D Generation</span></p>
+<ul class="current nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="NeRF.html">NeRF : Representing Scenes as Neural Radiance Fields for View Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="3DGS.html">3D Gaussian Splatting for Real-Time Radiance Field Rendering</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Point_E.html">Point-E: A System for Generating 3D Point Clouds from Complex Prompts (Arxiv 2022)</a></li>
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Shap-E.html">Shap-E</a></li>
+
+
+
+
+
+
+
+
+
+<li class="toctree-l1 current active"><a class="current reference internal" href="#"><strong>DreamFusion</strong></a></li>
+<li class="toctree-l1"><a class="reference internal" href="magic-3d.html">Magic3D</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreamBooth3D.html">Dream Booth 3D</a></li>
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="zero123plus.html">Zero123++</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ProlificDreamer.html">ProlificDreamer: High-Fidelity and Diverse Text-to-3D Generation with Variational Score Distillation</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DreamGaussian.html">DreamGaussian</a></li>
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Coin3D.html">Coin3D</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Experiments</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="../experiments/js_exp.html">Synthetic Data with Stable Diffusion for Foliar Disease Classification</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../experiments/swjo_exp.html">Training DreamBooth on Naver Webtoon Face Dataset</a></li>
+</ul>
+
+    </div>
+</nav></div>
+    </div>
+  
+  
+  <div class="sidebar-primary-items__end sidebar-primary__section">
+  </div>
+  
+  <div id="rtd-footer-container"></div>
+
+
+      </div>
+      
+      <main id="main-content" class="bd-main" role="main">
+        
+        
+
+<div class="sbt-scroll-pixel-helper"></div>
+
+          <div class="bd-content">
+            <div class="bd-article-container">
+              
+              <div class="bd-header-article d-print-none">
+<div class="header-article-items header-article__inner">
+  
+    <div class="header-article-items__start">
+      
+        <div class="header-article-item"><label class="sidebar-toggle primary-toggle btn btn-sm" for="__primary" title="Toggle primary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
+  <span class="fa-solid fa-bars"></span>
+</label></div>
+      
+    </div>
+  
+  
+    <div class="header-article-items__end">
+      
+        <div class="header-article-item">
+
+<div class="article-header-buttons">
+
+
+
+
+
+<div class="dropdown dropdown-source-buttons">
+  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Source repositories">
+    <i class="fab fa-github"></i>
+  </button>
+  <ul class="dropdown-menu">
+      
+      
+      
+      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion" target="_blank"
+   class="btn btn-sm btn-source-repository-button dropdown-item"
+   title="Source repository"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fab fa-github"></i>
+  </span>
+<span class="btn__text-container">Repository</span>
+</a>
+</li>
+      
+      
+      
+      
+      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion/issues/new?title=Issue%20on%20page%20%2Fdocs/review/DreamFusion.html&body=Your%20issue%20content%20here." target="_blank"
+   class="btn btn-sm btn-source-issues-button dropdown-item"
+   title="Open an issue"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-lightbulb"></i>
+  </span>
+<span class="btn__text-container">Open issue</span>
+</a>
+</li>
+      
+  </ul>
+</div>
+
+
+
+
+
+
+<div class="dropdown dropdown-download-buttons">
+  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Download this page">
+    <i class="fas fa-download"></i>
+  </button>
+  <ul class="dropdown-menu">
+      
+      
+      
+      <li><a href="../../_sources/docs/review/DreamFusion.md" target="_blank"
+   class="btn btn-sm btn-download-source-button dropdown-item"
+   title="Download source file"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-file"></i>
+  </span>
+<span class="btn__text-container">.md</span>
+</a>
+</li>
+      
+      
+      
+      
+      <li>
+<button onclick="window.print()"
+  class="btn btn-sm btn-download-pdf-button dropdown-item"
+  title="Print to PDF"
+  data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-file-pdf"></i>
+  </span>
+<span class="btn__text-container">.pdf</span>
+</button>
+</li>
+      
+  </ul>
+</div>
+
+
+
+
+<button onclick="toggleFullScreen()"
+  class="btn btn-sm btn-fullscreen-button"
+  title="Fullscreen mode"
+  data-bs-placement="bottom" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-expand"></i>
+  </span>
+
+</button>
+
+
+
+<script>
+document.write(`
+  <button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light"></i>
+    <i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark"></i>
+    <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"></i>
+  </button>
+`);
+</script>
+
+
+<script>
+document.write(`
+  <button class="btn btn-sm pst-navbar-icon search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <i class="fa-solid fa-magnifying-glass fa-lg"></i>
+  </button>
+`);
+</script>
+<label class="sidebar-toggle secondary-toggle btn btn-sm" for="__secondary"title="Toggle secondary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <span class="fa-solid fa-list"></span>
+</label>
+</div></div>
+      
+    </div>
+  
+</div>
+</div>
+              
+              
+
+<div id="jb-print-docs-body" class="onlyprint">
+    <h1>DreamFusion</h1>
+    <!-- Table of contents -->
+    <div id="print-main-content">
+        <div id="jb-print-toc">
+            
+            <div>
+                <h2> Contents </h2>
+            </div>
+            <nav aria-label="Page">
+                <ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#abstract">Abstract</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">1. Introduction</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#diffusion-models-and-score-distillation-sampling">2. Diffusion Models and Score Distillation Sampling</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#how-can-we-sample-in-parameter-space-not-pixel-space">2.1 How can we sample in parameter space, not pixel space?</a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#the-dreamfusion-algorithm">3. The DreamFusion Algorithm</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#neural-rendering-of-a-3d-model">3.1 Neural Rendering of a 3D Model</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#text-to-3d-synthesis">3.2 Text-to-3D Synthesis</a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#experiments">4. Experiments</a></li>
+</ul>
+            </nav>
+        </div>
+    </div>
+</div>
+
+              
+                
+<div id="searchbox"></div>
+                <article class="bd-article">
+                  
+  <div class="admonition-information admonition">
+<p class="admonition-title">Information</p>
+<ul class="simple">
+<li><p><strong>Title:</strong> DreamFusion: Text-to-3D using 2D Diffusion</p></li>
+<li><p><strong>Reference</strong></p>
+<ul>
+<li><p>Paper: <a class="reference external" href="https://arxiv.org/abs/2209.14988">https://arxiv.org/abs/2209.14988</a></p></li>
+<li><p>Code:</p>
+<ul>
+<li><p><a class="reference external" href="https://github.com/ashawkey/stable-dreamfusion">NonOfficial</a></p></li>
+</ul>
+</li>
+<li><p>Project Page : <a class="reference external" href="https://dreamfusion3d.github.io/">https://dreamfusion3d.github.io/</a></p></li>
+</ul>
+</li>
+<li><p><strong>Author:</strong> Geonhak Song</p></li>
+<li><p><strong>Last updated on {July. 3, 2024}</strong></p></li>
+</ul>
+</div>
+<section class="tex2jax_ignore mathjax_ignore" id="dreamfusion">
+<h1><strong>DreamFusion</strong><a class="headerlink" href="#dreamfusion" title="Permalink to this heading">#</a></h1>
+<section id="abstract">
+<h2>Abstract<a class="headerlink" href="#abstract" title="Permalink to this heading">#</a></h2>
+<p>DreamFusion = NeRF + Score Distillation Sampling Loss를 활용한 Text-to-3D 모델</p>
+<p><strong>기존 한계</strong></p>
+<p>Text-to-Image 방식과 같이 큰 규모의 데이터로 학습시킨 것과 동일 방식의 3D 생성 모델을 만들기 위해서은 <strong>대규모 label된 3D 데이터셋</strong>과 3D 데이터를 효율적으로 처리할 수 있는 <strong>3D diffusion model이 필요</strong>하지만, 현재 이를 <strong>충족시킬 수 없음</strong>.</p>
+<p><strong>방법론</strong></p>
+<p>DreamFusion은 pretrained 2D Text-to-Image diffusion model을 활용하여 text-to-3D 합성을 수행할 수 있는 새로운 방법 제시.</p>
+<p>본 방법은 <strong>parameteric image generation</strong> 최적화를 위한 prior로 2D diffusion model을 활용할 수 있는 probability density distillation 기반의 loss를 도입.</p>
+<p><strong>결과</strong></p>
+<p>텍스트 기반으로 생성된 3D 모델은 임의의 각도와 조명에서 볼 수 있으며, 임의의 3D 환경에 합성.</p>
+<p>해당 접근 방식을 통해 추가적인 3D 학습 데이터나 image diffusion model의 수정 없이도 효과적으로 동작함.</p>
+</section>
+<section id="introduction">
+<h2>1. Introduction<a class="headerlink" href="#introduction" title="Permalink to this heading">#</a></h2>
+<ul class="simple">
+<li><p>이미지 생성 모델은 text 기반으로 high-fidelity, diverse, controllable 이미지 합성을 지원한다.</p>
+<ul>
+<li><p>가능했던 이유 2가지</p>
+<ul>
+<li><ol class="arabic simple">
+<li><p>large image-text dataset</p></li>
+</ol>
+</li>
+<li><ol class="arabic simple" start="2">
+<li><p>큰 규모의 생성 모델</p></li>
+</ol>
+</li>
+</ul>
+</li>
+</ul>
+</li>
+<li><p>이런 Diffusion Model을 다른 domain에 적용하려는 시도는 성공적이었지만, <strong>많은 규모의 training data가 필요</strong>.</p></li>
+<li><p>3D 합성 분야에서도 3D asset이 요구되지만 이는 이미지 대비 상대적으로 훨씬 더 많은 시간과 노력을 요하는 작업.</p></li>
+<li><p>voxel, point cloud를 비롯한 explicit representation을 위한 방법이나 GAN 기반의 방법을 통해 3D generator를 만들려는 시도 또한 있었으나 이 방법들은 임의의 text를 기반으로 3D object synthesis가 어려운 상황.</p></li>
+<li><p>한편, inverse rendering인 NeRF를 기반 Text 기반 3D 생성 방법에 통합하려는 시도가 많이 있었는데 그중 대표적인 방법이 DreamField 이다.</p></li>
+<li><p>DreamField (CVPR 2022) [<a class="reference external" href="https://arxiv.org/abs/2112.01455">arxiv</a>] [<a class="reference external" href="https://ajayj.com/dreamfields">Official Project page</a>]</p></li>
+</ul>
+<figure class="align-default" id="id1">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/dreamfieldfig1.png"><img alt="title_fig" class="bg-primary mb-1" src="../../_images/dreamfieldfig1.png" style="width: 800px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 703 </span><span class="caption-text">DreamField Figure 1</span><a class="headerlink" href="#id1" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p>DreamField는 CLIP의 frozen image-text joint embedding model과 optimization기반 방법을 통해 NeRF를 학습.</p></li>
+<li><p>그러나 이 방법은 <strong>부족한 현실성과 정확성에 대한 한계</strong>를 보임.</p></li>
+<li><p>본 저자들은 Dream Fields에서 사용한 CLIP 대신 2D Diffusion model로부터 distill된 loss를 활용하여 사용.</p></li>
+<li><p>이는 <strong>probability density distillation</strong> 기반으로 구성되는데, 이는 <strong>forward process의 diffusion 기반의 공유 평균을 가지는</strong> <strong>Gaussian distribution</strong>과 <strong>pretrained diffusion model로부터 학습된</strong> <strong>score function</strong>간의 <strong>KL divergence</strong>를 <strong>최소화</strong>하는 방향을 의미.</p></li>
+<li><p><strong>Score Distillation Sampling(SDS)방법</strong>은 미분가능한 image parameterization을 기반으로 sampling 최적화가 가능하게 함.</p></li>
+<li><p>즉, NeRF와 SDS를 결합함으로써  Text prompt가 입력으로 주어진다면, DramFusion은 고품질이며 일관성있는 3D object와 scene들을 만들어낼 수 있다.</p></li>
+</ul>
+<figure class="align-default" id="id2">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/DreamFusionfig1.png"><img alt="title_fig" class="bg-primary mb-1" src="../../_images/DreamFusionfig1.png" style="width: 800px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 704 </span><span class="caption-text">DreamFusion Figure 1</span><a class="headerlink" href="#id2" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<section id="diffusion-models-and-score-distillation-sampling">
+<h3>2. Diffusion Models and Score Distillation Sampling<a class="headerlink" href="#diffusion-models-and-score-distillation-sampling" title="Permalink to this heading">#</a></h3>
+<p>Data :  <span class="math notranslate nohighlight">\(x\)</span></p>
+<p>Forward process : <span class="math notranslate nohighlight">\(q\)</span></p>
+<p>Reverse process : <span class="math notranslate nohighlight">\(p\)</span></p>
+<p>Perturbed latent at timestep <span class="math notranslate nohighlight">\(t\)</span>: <span class="math notranslate nohighlight">\(z_t\)</span></p>
+<p>Marginal distribution of the latent variables at timestep <span class="math notranslate nohighlight">\(t\)</span> given an initial datapoint <span class="math notranslate nohighlight">\(x\)</span> :
+<span class="math notranslate nohighlight">\(q(z_t | x ) = \mathcal{N} (\alpha_t x ,\sigma_t^2 \mathbf{I})\)</span></p>
+<p>Generative model : <span class="math notranslate nohighlight">\(p\)</span></p>
+<p>starting Random Noise : <span class="math notranslate nohighlight">\(p(z_T) = \mathcal{N} (0, \mathbf{I})\)</span></p>
+<p>Transition <span class="math notranslate nohighlight">\(p_\phi (z_{t-1} | z_t ) = q(z_{t-1} |z_t, x = \hat{x_\phi} (z_t; t))\)</span></p>
+<p>Posterior dist from forward process :  <span class="math notranslate nohighlight">\(q(z_{t-1} |z_t, x)\)</span></p>
+<p>A learned approximation of the optimal denoiser : <span class="math notranslate nohighlight">\(\hat{x_\phi} (z_t; t)\)</span></p>
+<p>Latent : <span class="math notranslate nohighlight">\(z_t : \mathbb{E} [x|z_t] \approx \hat{x_\phi} (z_t; t) = (z_t - \sigma_t \epsilon_\phi (z_t ; t ) / \alpha_t)\)</span></p>
+<p>예측된 noise는 smooth density에 대한 예측된 score function (<span class="math notranslate nohighlight">\(\nabla_{z_t} log p(z_t)\)</span>)과 연관</p>
+<p>ELBO로 생성 모델 학습은 <span class="math notranslate nohighlight">\(\phi\)</span> parameter를 활용한 weighted denoising score matching objective로 간소화 가능</p>
+<figure class="align-default" id="id3">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/DreamFusioneq1.png"><img alt="title_fig" class="bg-primary mb-1" src="../../_images/DreamFusioneq1.png" style="width: 800px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 705 </span><span class="caption-text">DreamFusion Equation 1</span><a class="headerlink" href="#id3" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>Diffusion 학습 (2가지 관점)</p>
+<ol class="arabic simple">
+<li><p>latent -variable model 학습</p></li>
+<li><p>noise data에 상응하는 score function 학습</p></li>
+</ol>
+<ul class="simple">
+<li><p>score function이 <span class="math notranslate nohighlight">\(s_\phi (z_t ;t ) = -\epsilon_\phi (z_t ; t) / \sigma_t\)</span>로 주어졌을 때, marginal dist 근사하는 <span class="math notranslate nohighlight">\(p_\phi (z_t;t)\)</span> 사용</p></li>
+</ul>
+<p>Text-to-image diffusion model (text embedding : <span class="math notranslate nohighlight">\(y\)</span>)</p>
+<p>예측해야하는 noise 값 : <span class="math notranslate nohighlight">\(\epsilon_\phi (z_T ; t, y)\)</span></p>
+<p>CFG : <span class="math notranslate nohighlight">\(w : \epsilon_\phi (z_T ; t, y) = (1+w) \epsilon_\phi (z_T ; t, y) - w \epsilon_\phi (z_T ; t)\)</span></p>
+<p>CFG는 score function을 conditional density가 unconditional density에 비해 상대적으로 큰 영역을 선호하도록 조정. Diversity를 희생하여 sample fidelity 향상</p>
+</section>
+<section id="how-can-we-sample-in-parameter-space-not-pixel-space">
+<h3>2.1 How can we sample in parameter space, not pixel space?<a class="headerlink" href="#how-can-we-sample-in-parameter-space-not-pixel-space" title="Permalink to this heading">#</a></h3>
+<ul class="simple">
+<li><p>저자들은 pixel 기반 diffusion model에서 sampling 되는 pixel sampling에는 관심이 없고, <strong>랜덤한 각도에서 rendering할 때, 좋은 이미지를 만들어낼 수 있는 3D 모델을 생성하는데 관심이 있음.</strong></p></li>
+<li><p>이와 같은 모델을 **DIP (Differentiable image parameterization)**라 분류함.</p></li>
+<li><p>즉, 미분가능한 generator <span class="math notranslate nohighlight">\(g\)</span>는 parameter <span class="math notranslate nohighlight">\(\theta\)</span>를 통해 image **<span class="math notranslate nohighlight">\(x=g(\theta)\)</span>**로 변환할 수 있음.</p></li>
+<li><p>DIP를 활용하여 기존 학습된 공간을 최적화 알고리즘을 통해 학습시킬 수 있는데, 3D에서는 3D volume의 parameter로 <span class="math notranslate nohighlight">\(\theta\)</span>, volumetric renderer <span class="math notranslate nohighlight">\(g\)</span>로 지정할 수 있다.</p></li>
+<li><p>다만, 해당 parameter들을 학습시키기 위해서, <strong>diffusion model을 적용할 수 있는 loss function이 필요</strong>.</p></li>
+<li><p><strong><span class="math notranslate nohighlight">\(x=g(\theta)\)</span></strong> 가 frozen diffusion model로부터 sample 결과인 것같은 <span class="math notranslate nohighlight">\(\theta\)</span> 최적화를 진행 희망.</p></li>
+<li><p>여기서 저자들은 DeepDream과 비슷한 스타일의 differentiable loss func이 필요. 즉, 신뢰도 높은 이미지는 loss가 적고, 신뢰도 적은 이미지에서는 loss가 높게 설정.</p></li>
+<li><p>초기 diffusion training loss 재사용했으나 realistic sample 생성이 안 됨.</p></li>
+<li><p>아래 식 : <span class="math notranslate nohighlight">\(\mathcal{L}\)</span> Gradient에 대한 식</p></li>
+</ul>
+<figure class="align-default" id="id4">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/DreamFusioneq2.png"><img alt="title_fig" class="bg-primary mb-1" src="../../_images/DreamFusioneq2.png" style="width: 800px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 706 </span><span class="caption-text">DreamFusion Equation 2</span><a class="headerlink" href="#id4" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p>일반적으로 U-Net Jacobian term은 계산 비용이 많이 사용되고, 작은 noise에 대해 제대로 작동되지 않음.</p></li>
+<li><p>저자들은 <strong>U-Net Jacobian term을 생략</strong>함으로써 diffusion model을 사용한 DIP 최적화에 효과적인 gradient로 유도할 수 있음을 발견.</p></li>
+</ul>
+<figure class="align-default" id="id5">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/DreamFusioneq3.png"><img alt="title_fig" class="bg-primary mb-1" src="../../_images/DreamFusioneq3.png" style="width: 800px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 707 </span><span class="caption-text">DreamFusion Equation 3</span><a class="headerlink" href="#id5" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p>이를 통해 해당 loss는 높은 density 영역으로 이동하기 위해 diffusion 모델의 score function을 따르는 방향으로 update.</p></li>
+<li><p>Appendix A.4에서 diffusion model의 학습된 score function을 사용하여 weighted probability density distillation loss의 gradient임을 보여줌.</p></li>
+</ul>
+<figure class="align-default" id="id6">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/DreamFusioneq4.png"><img alt="title_fig" class="bg-primary mb-1" src="../../_images/DreamFusioneq4.png" style="width: 800px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 708 </span><span class="caption-text">DreamFusion Equation 4</span><a class="headerlink" href="#id6" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p>쉽게 적용 가능하고 diffusion model의 backpropagation이 필요 없음.</p></li>
+</ul>
+<figure class="align-default" id="id7">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/DreamFusionfig2.png"><img alt="title_fig" class="bg-primary mb-1" src="../../_images/DreamFusionfig2.png" style="width: 800px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 709 </span><span class="caption-text">DreamFusion Figure 2</span><a class="headerlink" href="#id7" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<figure class="align-default" id="id8">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/DreamFusionfig8.png"><img alt="title_fig" class="bg-primary mb-1" src="../../_images/DreamFusionfig8.png" style="width: 800px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 710 </span><span class="caption-text">DreamFusion Figure 8</span><a class="headerlink" href="#id8" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</section>
+</section>
+<section id="the-dreamfusion-algorithm">
+<h2>3. The DreamFusion Algorithm<a class="headerlink" href="#the-dreamfusion-algorithm" title="Permalink to this heading">#</a></h2>
+<figure class="align-default" id="id9">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/DreamFusionfig3.png"><img alt="title_fig" class="bg-primary mb-1" src="../../_images/DreamFusionfig3.png" style="width: 800px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 711 </span><span class="caption-text">DreamFusion Figure 3</span><a class="headerlink" href="#id9" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>Text를 기반한 3D asset을 만드는 알고리즘 소개 단계</p>
+<p>Imagen 중 64x64 base model만 수정없이 사용</p>
+<section id="neural-rendering-of-a-3d-model">
+<h3>3.1 Neural Rendering of a 3D Model<a class="headerlink" href="#neural-rendering-of-a-3d-model" title="Permalink to this heading">#</a></h3>
+<figure class="align-default" id="id10">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/nerf.png"><img alt="title_fig" class="bg-primary mb-1" src="../../_images/nerf.png" style="width: 800px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 712 </span><span class="caption-text">NeRF Figure</span><a class="headerlink" href="#id10" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<figure class="align-default" id="id11">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/DreamFusioneq5.png"><img alt="title_fig" class="bg-primary mb-1" src="../../_images/DreamFusioneq5.png" style="width: 800px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 713 </span><span class="caption-text">DreamFusion Equation 5</span><a class="headerlink" href="#id11" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>NeRF 구성 2가지 : volumetic raytracer &amp; MLP</p>
+<p>NeRF로부터 이미지를 Rendering하기 위해 ray casting.</p>
+<p>각 ray를 따라 샘플된 3D points <span class="math notranslate nohighlight">\(\mu\)</span>들을 MLP에 통과시켜 4개의 스칼라 output 획득.</p>
+<p>: (volumetric density <span class="math notranslate nohighlight">\(\tau\)</span>, RGB color <span class="math notranslate nohighlight">\(c\)</span> ( alpha compositing )</p>
+<figure class="align-default" id="id12">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/mipnerf.png"><img alt="title_fig" class="bg-primary mb-1" src="../../_images/mipnerf.png" style="width: 800px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 714 </span><span class="caption-text">MipNeRF Figure</span><a class="headerlink" href="#id12" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>본 방법에서는 <strong>mip-NeRF 360</strong> 사용(aliasing 감소 특화)</p>
+<p><img alt="mipnerf.png" src="docs/review/DreamFusion%209ae18c9837354d3f880f3ecd6f4e1bc3/mipnerf.png" /></p>
+<p><strong>Shading</strong></p>
+<p>일반적인 radiance를 내보내는 NeRF와 달리, 본 논문에서는 각 point별 RGB albedo <span class="math notranslate nohighlight">\(\rho\)</span> 사용</p>
+<figure class="align-default" id="id13">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/DreamFusioneq6.png"><img alt="title_fig" class="bg-primary mb-1" src="../../_images/DreamFusioneq6.png" style="width: 800px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 715 </span><span class="caption-text">DreamFusion Equation 6</span><a class="headerlink" href="#id13" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p><span class="math notranslate nohighlight">\(\tau\)</span> : volumetric density</p>
+<p>3D 포인트에 대한 마지막 shaded output color 계산을 위해서는 normal vector가 필요.</p>
+<p>normal vector는 3D coordinate <span class="math notranslate nohighlight">\(\mu\)</span> 관점에 대해서 density <span class="math notranslate nohighlight">\(\tau\)</span> 에 대한 negative gradient 를 normalizing을 통해 계산될 수 있음.</p>
+<p><span class="math notranslate nohighlight">\(n = - \nabla_\mu \tau / \lVert \nabla_\mu \tau \rVert\)</span></p>
+<p>normal : <span class="math notranslate nohighlight">\(n\)</span></p>
+<p>material albedo : <span class="math notranslate nohighlight">\(\rho\)</span></p>
+<p>some point light source with 3D coordinate : <span class="math notranslate nohighlight">\(l\)</span> &amp; color <span class="math notranslate nohighlight">\(l_\rho\)</span></p>
+<p>ambient light color : <span class="math notranslate nohighlight">\(l_a\)</span></p>
+<figure class="align-default" id="id14">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/DreamFusioneq7.png"><img alt="title_fig" class="bg-primary mb-1" src="../../_images/DreamFusioneq7.png" style="width: 800px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 716 </span><span class="caption-text">DreamFusion Equation 7</span><a class="headerlink" href="#id14" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>추가 발견 내용 : 랜덤하게 albedo color <span class="math notranslate nohighlight">\(\rho\)</span>를 white (1,1,1)로 교체하여 textureless 음영 처리 결과물 생성할 수 있음.</p>
+<p>모델이 퇴화된 솔루션을 만드는 것을 방지하는 데 유익</p>
+<p><strong>Scene Structure</strong></p>
+<p><strong>1) 고정된 Bounding sphere</strong>: NeRF 모델의 query를 고정된 bounding sphere 내에서만 수행하여 밀도가 카메라 근처에 채워지지 않도록 함.</p>
+<p><strong>2) 추가 환경 맵 생성</strong>: 두 번째 MLP를 사용하여 배경 색상을 계산하고, 렌더링된 색상을 배경 색상 위에 합성.</p>
+<p><strong>3) 누적된 alpha 값 활용</strong>: 누적된 alpha 값을 통해 배경과 렌더링된 광선 색상을 자연스럽게 합성.</p>
+<p><strong>Geometry regularizers</strong></p>
+<p><strong>1)</strong> <strong>opacity에 대한 regularization penalty 추가</strong> : Mip-NeRF 360 모델에서 빈 공간에 대한 불필요한 채움을 방지하기 위해 진행. (Zero-shot text-guided object generation with dream fields. CVPR 2022)</p>
+<p><strong>2) Ref-NeRF에서 제안된 orientation loss의 수정 버전을 사용</strong> : Density field에서 normal vector가 camera로부터 멀어지는 문제 방지를 위해 제안.</p>
+<p>Appendix A.2 참조</p>
+</section>
+<section id="text-to-3d-synthesis">
+<h3>3.2 Text-to-3D Synthesis<a class="headerlink" href="#text-to-3d-synthesis" title="Permalink to this heading">#</a></h3>
+<p>각 text prompt에 대해 NeRF 초기 랜덤 초기화</p>
+<figure class="align-default" id="id15">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/DreamFusionfig3_fig.png"><img alt="title_fig" class="bg-primary mb-1" src="../../_images/DreamFusionfig3_fig.png" style="width: 800px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 717 </span><span class="caption-text">DreamFusion Figure 3-1</span><a class="headerlink" href="#id15" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>DreamFusion의 각 iter 최적화</p>
+<p>(1) randomly sample a camera and light</p>
+<p>(2) render an image of the NeRF from that camera and shade with the light</p>
+<p>(3) compute gradients of the SDS loss with respect to the NeRF parameters</p>
+<p>(4) update the NeRF parameters using an optimizer</p>
+<p><strong>1. Random camera and light sampling</strong></p>
+<figure class="align-default" id="id16">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/spherical_coord.png"><img alt="title_fig" class="bg-primary mb-1" src="../../_images/spherical_coord.png" style="width: 400px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 718 </span><span class="caption-text">Spherical Coordinate Figure</span><a class="headerlink" href="#id16" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>매 iteration, camera position은 spherical coordinate에서 무작위로 sample됨.</p>
+<p>elevation angle : <span class="math notranslate nohighlight">\(\phi_{cam} \in [-10, 90]\)</span></p>
+<p>azimuth angle : <span class="math notranslate nohighlight">\(\theta_{cam} \in [0, 360]\)</span></p>
+<p>origin으로부터 distance : <span class="math notranslate nohighlight">\([1, 1.5]\)</span></p>
+<p>focal length multiplier : <span class="math notranslate nohighlight">\(\lambda_{focal} \in \mathcal{U} (0.7, 1.35)\)</span></p>
+<p>focal length : <span class="math notranslate nohighlight">\(\lambda_{focal} w\)</span>  (<span class="math notranslate nohighlight">\(w\)</span> =64)</p>
+<p>point light position <span class="math notranslate nohighlight">\(l\)</span>은 camera position 중심 주변 분포에서 sample</p>
+<p>다양한 camera location &amp; distance 사용</p>
+<p><strong>2. Rendering.</strong></p>
+<p>Camera pose와 light position이 주어졌을 때, 64x64 해상도의 shaded NeRF model를 render</p>
+<p>Rendering 세 가지 옵션 중 하나를 무작위로 선택:</p>
+<p><strong>1) 조명이 적용된 색상 렌더링 (illuminated color render)</strong>: 조명이 적용된 상태에서의 색상 렌더링 (Fig 3. Color)</p>
+<p><strong>2) textureless render</strong>: 텍스처 없이 음영 처리된 상태로 렌더링. ( Fig 3.  Normal?)</p>
+<p><strong>3) rendering of the albedo</strong>: 음영 없이 알베도 색상만을 렌더링. ( Fig 3.  Albedo)</p>
+<p><strong>3. Diffusion loss with view-dependent conditioning</strong></p>
+<p>view-dependent text를 추가하는 것이 효과적</p>
+<p>높은 고도 각도 <span class="math notranslate nohighlight">\(\phi_{cam} &gt; 60^{\circ}\)</span> 일 때, “overhead view”</p>
+<p><span class="math notranslate nohighlight">\(\phi_{cam} &lt; 60^{\circ}\)</span> 일 때, azimuth angle <span class="math notranslate nohighlight">\(\theta_{cam}\)</span>에 따라 “front view”, “side view”, “back view” text embedding</p>
+<p>pretrained 64x64 base text-to-image model (Imagen)</p>
+<p>T5-XXL text embedding</p>
+<p>weighting function <span class="math notranslate nohighlight">\(w(t) = \sigma_t^2\)</span></p>
+<p>sample <span class="math notranslate nohighlight">\(t \sim \mathcal{U} (0.02, 0.98)\)</span> 너무 높거나 낮은 noise level 피하기 위해 설정.</p>
+<p>CFG <span class="math notranslate nohighlight">\(w\)</span>=100, 높은 guidance weight가 향상된 sample quality를 줌</p>
+<p><strong>4. Optimization</strong></p>
+<p>TPUv4 (4 chips)</p>
+<p>15,000 iters, 1.5h</p>
+<p>Appendix A.2 optimization setting</p>
+</section>
+</section>
+<section id="experiments">
+<h2>4. Experiments<a class="headerlink" href="#experiments" title="Permalink to this heading">#</a></h2>
+<figure class="align-default" id="id17">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/DreamFusionfig4.png"><img alt="title_fig" class="bg-primary mb-1" src="../../_images/DreamFusionfig4.png" style="width: 800px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 719 </span><span class="caption-text">DreamFusion Figure 4</span><a class="headerlink" href="#id17" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<figure class="align-default" id="id18">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/DreamFusiontable1.png"><img alt="title_fig" class="bg-primary mb-1" src="../../_images/DreamFusiontable1.png" style="width: 400px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 720 </span><span class="caption-text">DreamFusion Table 1</span><a class="headerlink" href="#id18" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p><strong>일반적인 3D 복원 작업 평가</strong>:</p>
+<ul>
+<li><p>Chamfer Distance와 같은 참조 기반 평가 방법 사용.</p></li>
+<li><p>PSNR은 보유된 사진과 렌더링된 보기의 품질을 비교.</p></li>
+</ul>
+</li>
+<li><p><strong>Zero-shot 텍스트-3D 생성 평가의 어려움</strong>:</p>
+<ul>
+<li><p>GT가 없어 참조 기반 평가 적용 어려움.</p></li>
+</ul>
+</li>
+<li><p><strong>대안적 평가 방법 CLIP R-Precision</strong>:</p>
+<ul>
+<li><p>CLIP R-Precision은 rendering된 장면들이 주어졌을 때 입력 캡션과 일치하는 비율을 나타냄. 특정 문장을 여러 명령어 세트 중에서 정확히 찾는지 평가.</p></li>
+<li><p>object-centric COCO validation subset에서 153개 프롬프트 사용.</p></li>
+</ul>
+</li>
+<li><p><strong>Geo(Geometry) 평가</strong>:</p>
+<ul>
+<li><p>기하학적 평가를 위해 textureless render에 대한 R-Precision 측정</p></li>
+</ul>
+</li>
+</ul>
+<figure class="align-default" id="id19">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/DreamFusionfig5.png"><img alt="title_fig" class="bg-primary mb-1" src="../../_images/DreamFusionfig5.png" style="width: 800px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 721 </span><span class="caption-text">DreamFusion Figure 5</span><a class="headerlink" href="#id19" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p><strong>Ablation</strong></p>
+<figure class="align-default" id="id20">
+<a class="bg-primary mb-1 reference internal image-reference" href="pics/DreamFusion/DreamFusionfig6.png"><img alt="title_fig" class="bg-primary mb-1" src="pics/DreamFusion/DreamFusionfig6.png" style="width: 800px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 722 </span><span class="caption-text">DreamFusion Figure 6</span><a class="headerlink" href="#id20" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p><strong>ViewAug (다양한 시야각):</strong> 다양한 시야각을 고려하는 것이 3D 장면의 일관성을 높임.</p></li>
+<li><p><strong>ViewDep (뷰 의존적 prompt):</strong> prompt에 시야각 관련 정보를 추가하여 정확한 geometry 복원.</p></li>
+<li><p><strong>Lighting (조명 최적화):</strong> 무채색 albedo rendering 외에 lighting rendering optimization</p></li>
+<li><p><strong>Textureless :</strong> 매끄러운 표면을 만듦.</p></li>
+</ul>
+<p><strong>제한 사항:</strong></p>
+<ul class="simple">
+<li><p><strong>SDS의 한계</strong></p></li>
+<li><p><strong>세밀한 디테일 부족</strong></p></li>
+<li><p><strong>3D 복원이 근본적으로 어렵다</strong></p></li>
+</ul>
+</section>
+</section>
+
+    <script type="text/x-thebe-config">
+    {
+        requestKernel: true,
+        binderOptions: {
+            repo: "binder-examples/jupyter-stacks-datascience",
+            ref: "master",
+        },
+        codeMirrorConfig: {
+            theme: "abcdef",
+            mode: "python"
+        },
+        kernelOptions: {
+            name: "python3",
+            path: "./docs\review"
+        },
+        predefinedOutput: true
+    }
+    </script>
+    <script>kernelName = 'python3'</script>
+
+                </article>
+              
+
+              
+              
+              
+              
+                <footer class="prev-next-footer d-print-none">
+                  
+<div class="prev-next-area">
+    <a class="left-prev"
+       href="Shap-E.html"
+       title="previous page">
+      <i class="fa-solid fa-angle-left"></i>
+      <div class="prev-next-info">
+        <p class="prev-next-subtitle">previous</p>
+        <p class="prev-next-title">Shap-E</p>
+      </div>
+    </a>
+    <a class="right-next"
+       href="magic-3d.html"
+       title="next page">
+      <div class="prev-next-info">
+        <p class="prev-next-subtitle">next</p>
+        <p class="prev-next-title">Magic3D</p>
+      </div>
+      <i class="fa-solid fa-angle-right"></i>
+    </a>
+</div>
+                </footer>
+              
+            </div>
+            
+            
+              
+                <div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
+
+
+  <div class="sidebar-secondary-item">
+  <div class="page-toc tocsection onthispage">
+    <i class="fa-solid fa-list"></i> Contents
+  </div>
+  <nav class="bd-toc-nav page-toc">
+    <ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#abstract">Abstract</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">1. Introduction</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#diffusion-models-and-score-distillation-sampling">2. Diffusion Models and Score Distillation Sampling</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#how-can-we-sample-in-parameter-space-not-pixel-space">2.1 How can we sample in parameter space, not pixel space?</a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#the-dreamfusion-algorithm">3. The DreamFusion Algorithm</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#neural-rendering-of-a-3d-model">3.1 Neural Rendering of a 3D Model</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#text-to-3d-synthesis">3.2 Text-to-3D Synthesis</a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#experiments">4. Experiments</a></li>
+</ul>
+  </nav></div>
+
+</div></div>
+              
+            
+          </div>
+          <footer class="bd-footer-content">
+            
+<div class="bd-footer-content__inner container">
+  
+  <div class="footer-item">
+    
+<p class="component-author">
+By PseudoLab
+</p>
+
+  </div>
+  
+  <div class="footer-item">
+    
+
+  <p class="copyright">
+    
+      © Copyright 2022.
+      <br/>
+    
+  </p>
+
+  </div>
+  
+  <div class="footer-item">
+    
+  </div>
+  
+  <div class="footer-item">
+    
+  </div>
+  
+</div>
+          </footer>
+        
+
+      </main>
+    </div>
+  </div>
+  
+  <!-- Scripts loaded after <body> so the DOM is not blocked -->
+  <script src="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b"></script>
+<script src="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b"></script>
+
+  <footer class="bd-footer">
+  </footer>
+  </body>
 </html>
\ No newline at end of file
diff --git a/docs/review/DreamGaussian.html b/docs/review/DreamGaussian.html
new file mode 100755
index 00000000..d3e30534
--- /dev/null
+++ b/docs/review/DreamGaussian.html
@@ -0,0 +1,1385 @@
+
+
+<!DOCTYPE html>
+
+
+<html lang="en" data-content_root="" >
+
+  <head>
+    <meta charset="utf-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.17.1: http://docutils.sourceforge.net/" />
+
+    <title>DreamGaussian &#8212; Text-to-Image Generation-feat-Diffusion</title>
+  
+  
+  
+  <script data-cfasync="false">
+    document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
+    document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
+  </script>
+  
+  <!-- Loaded before other Sphinx assets -->
+  <link href="../../_static/styles/theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+<link href="../../_static/styles/bootstrap.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+<link href="../../_static/styles/pydata-sphinx-theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+
+  
+  <link href="../../_static/vendor/fontawesome/6.5.2/css/all.min.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+  <link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.woff2" />
+<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.woff2" />
+<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.woff2" />
+
+    <link rel="stylesheet" type="text/css" href="../../_static/pygments.css" />
+    <link rel="stylesheet" href="../../_static/styles/sphinx-book-theme.css?digest=14f4ca6b54d191a8c7657f6c759bf11a5fb86285" type="text/css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/togglebutton.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/mystnb.4510f1fc1dee50b3e5859aac5469c37c29e427902b24a333a5f9fcb2f0b3ac41.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/sphinx-thebe.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/design-style.4045f2051d55cab465a707391d5b2007.min.css" />
+  
+  <!-- Pre-loaded scripts that we'll load fully later -->
+  <link rel="preload" as="script" href="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b" />
+<link rel="preload" as="script" href="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b" />
+  <script src="../../_static/vendor/fontawesome/6.5.2/js/all.min.js?digest=dfe6caa3a7d634c4db9b"></script>
+
+    <script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
+    <script src="../../_static/jquery.js"></script>
+    <script src="../../_static/underscore.js"></script>
+    <script src="../../_static/_sphinx_javascript_frameworks_compat.js"></script>
+    <script src="../../_static/doctools.js"></script>
+    <script src="../../_static/clipboard.min.js"></script>
+    <script src="../../_static/copybutton.js"></script>
+    <script src="../../_static/scripts/sphinx-book-theme.js?digest=5a5c038af52cf7bc1a1ec88eea08e6366ee68824"></script>
+    <script>let toggleHintShow = 'Click to show';</script>
+    <script>let toggleHintHide = 'Click to hide';</script>
+    <script>let toggleOpenOnPrint = 'true';</script>
+    <script src="../../_static/togglebutton.js"></script>
+    <script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
+    <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
+    <script src="../../_static/design-tabs.js"></script>
+    <script>const THEBE_JS_URL = "https://unpkg.com/thebe@0.8.2/lib/index.js"
+const thebe_selector = ".thebe,.cell"
+const thebe_selector_input = "pre"
+const thebe_selector_output = ".output, .cell_output"
+</script>
+    <script async="async" src="../../_static/sphinx-thebe.js"></script>
+    <script>window.MathJax = {"options": {"processHtmlClass": "tex2jax_process|mathjax_process|math|output_area"}}</script>
+    <script defer="defer" src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
+    <script>DOCUMENTATION_OPTIONS.pagename = 'docs/review/DreamGaussian';</script>
+    <link rel="shortcut icon" href="../../_static/PseudoLab_logo.png"/>
+    <link rel="index" title="Index" href="../../genindex.html" />
+    <link rel="search" title="Search" href="../../search.html" />
+    <link rel="next" title="Coin3D" href="Coin3D.html" />
+    <link rel="prev" title="ProlificDreamer: High-Fidelity and Diverse Text-to-3D Generation with Variational Score Distillation" href="ProlificDreamer.html" />
+  <meta name="viewport" content="width=device-width, initial-scale=1"/>
+  <meta name="docsearch:language" content="en"/>
+  </head>
+  
+  
+  <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
+
+  
+  
+  <div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
+  
+  <div id="pst-scroll-pixel-helper"></div>
+  
+  <button type="button" class="btn rounded-pill" id="pst-back-to-top">
+    <i class="fa-solid fa-arrow-up"></i>Back to top</button>
+
+  
+  <input type="checkbox"
+          class="sidebar-toggle"
+          id="pst-primary-sidebar-checkbox"/>
+  <label class="overlay overlay-primary" for="pst-primary-sidebar-checkbox"></label>
+  
+  <input type="checkbox"
+          class="sidebar-toggle"
+          id="pst-secondary-sidebar-checkbox"/>
+  <label class="overlay overlay-secondary" for="pst-secondary-sidebar-checkbox"></label>
+  
+  <div class="search-button__wrapper">
+    <div class="search-button__overlay"></div>
+    <div class="search-button__search-container">
+<form class="bd-search d-flex align-items-center"
+      action="../../search.html"
+      method="get">
+  <i class="fa-solid fa-magnifying-glass"></i>
+  <input type="search"
+         class="form-control"
+         name="q"
+         id="search-input"
+         placeholder="Search this book..."
+         aria-label="Search this book..."
+         autocomplete="off"
+         autocorrect="off"
+         autocapitalize="off"
+         spellcheck="false"/>
+  <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
+</form></div>
+  </div>
+
+  <div class="pst-async-banner-revealer d-none">
+  <aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
+</div>
+
+  
+    <header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
+    </header>
+  
+
+  <div class="bd-container">
+    <div class="bd-container__inner bd-page-width">
+      
+      
+      
+      <div class="bd-sidebar-primary bd-sidebar">
+        
+
+  
+  <div class="sidebar-header-items sidebar-primary__section">
+    
+    
+    
+    
+  </div>
+  
+    <div class="sidebar-primary-items__start sidebar-primary__section">
+        <div class="sidebar-primary-item">
+
+  
+    
+  
+
+<a class="navbar-brand logo" href="../../intro.html">
+  
+  
+  
+  
+  
+    
+    
+      
+    
+    
+    <img src="../../_static/PseudoLab_logo.png" class="logo__image only-light" alt="Text-to-Image Generation-feat-Diffusion - Home"/>
+    <script>document.write(`<img src="../../_static/PseudoLab_logo.png" class="logo__image only-dark" alt="Text-to-Image Generation-feat-Diffusion - Home"/>`);</script>
+  
+  
+</a></div>
+        <div class="sidebar-primary-item"><nav class="bd-links" id="bd-docs-nav" aria-label="Main">
+    <div class="bd-toc-item navbar-nav active">
+        
+        <ul class="nav bd-sidenav bd-sidenav__home-link">
+            <li class="toctree-l1">
+                <a class="reference internal" href="../../intro.html">
+                    Welcome to PseudoDiffusers!!
+                </a>
+            </li>
+        </ul>
+        <p aria-level="2" class="caption" role="heading"><span class="caption-text">Preliminary Works</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="vae.html">VAE</a></li>
+<li class="toctree-l1"><a class="reference internal" href="gan.html">GAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DDPM.html">DDPM</a></li>
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DDIM.html">DDIM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="A_Study_on_the_Evaluation_of_Generative_Models.html">A Study on the Evaluation of Generative Models</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Image Generation</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="cycleGAN.html">CycleGAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="StyleGAN.html">StyleGAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="diffusion_beats_GANs.html">Diffusion Models Beat GANs on Image Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="dalle.html">DALL-E</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DALLE2.html">DALL-E 2</a></li>
+<li class="toctree-l1"><a class="reference internal" href="dreambooth.html">DreamBooth</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ControlNet.html">ControlNet</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Latent_Diffusion_Model.html">Introduction</a></li>
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Textual_Inversion.html">Textual Inversion</a></li>
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="CustomDiffusion.html">Custom Diffusion</a></li>
+<li class="toctree-l1"><a class="reference internal" href="LoRA.html">LoRA</a></li>
+
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="I-DDPM.html">I-DDPM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="StyO.html">StyO</a></li>
+<li class="toctree-l1"><a class="reference internal" href="imagen.html">Imagen</a></li>
+<li class="toctree-l1"><a class="reference internal" href="imagen_editor.html">Imagen Editor</a></li>
+<li class="toctree-l1"><a class="reference internal" href="SDEdit.html">SDEdit</a></li>
+<li class="toctree-l1"><a class="reference internal" href="SDXL.html">SDXL</a></li>
+<li class="toctree-l1"><a class="reference internal" href="t2i_adapter.html">T2I-Adapter</a></li>
+<li class="toctree-l1"><a class="reference internal" href="IP_Adapter.html">IP-Adapter</a></li>
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="HyperDreamBooth.html">HyperDreamBooth</a></li>
+<li class="toctree-l1"><a class="reference internal" href="CM3leon.html">CM3leon</a></li>
+
+<li class="toctree-l1"><a class="reference internal" href="Synthetic_Data_from_Diffusion_Models_Improves_ImageNet_Classification.html">Synthetic Data from Diffusion Models Improves ImageNet Classification</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="GLIDE.html">GLIDE</a></li>
+<li class="toctree-l1"><a class="reference internal" href="BBDM.html">BBDM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Your_Diffusion_Model_is_Secretly_a_Zero_Shot_Classifier.html">Your Diffusion Model is Secretly a Zero-Shot Classifier</a></li>
+<li class="toctree-l1"><a class="reference internal" href="progressive_distillation.html">Progressive Distillation for Fast Sampling of Diffusion Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ConceptLab.html">ConceptLab</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Diffusion_models_already_have_a_Semantic_Latent_Space.html">Diffusion Models already have a Semantic Latent Space</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Muse.html">Muse</a></li>
+
+
+<li class="toctree-l1"><a class="reference internal" href="GIGAGAN.html">Scaling up GANs for Text-to-Image Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="consistency_models.html">Consistency Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="latent_consistency_models.html">Latent Consistency Models</a></li>
+
+<li class="toctree-l1"><a class="reference internal" href="LLM_grounded_Diffusion.html">LLM Grounded Diffusion</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DiT.html">DiT</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="one-step-image-translation.html">One-Step Image Translation with Text-to-Image Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="LCM-LoRA.html">LCM-LoRA: A Universal Stable-Diffusion Acceleration Module</a></li>
+
+
+<li class="toctree-l1"><a class="reference internal" href="MimicBrush.html">MimicBrush: Zero-shot Image Editing with Reference Imitation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="one_step_diffusion_with_distribution_matching_distillation.html">One-step Diffusion with Distribution Matching Distillation</a></li>
+
+
+
+
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Video Generation</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="Make_A_Video.html">Make A Video</a></li>
+<li class="toctree-l1"><a class="reference internal" href="VideoLDM.html">VideoLDM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="AnimateDiff.html">AnimateDiff</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Animate_Anyone.html">Animate Anyone</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreaMoving.html">DreaMoving</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreamPose.html">DreamPose: Fashion Image-to-Video Synthesis via Stable Diffusion</a></li>
+
+
+
+
+
+
+
+
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">3D Generation</span></p>
+<ul class="current nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="NeRF.html">NeRF : Representing Scenes as Neural Radiance Fields for View Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="3DGS.html">3D Gaussian Splatting for Real-Time Radiance Field Rendering</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Point_E.html">Point-E: A System for Generating 3D Point Clouds from Complex Prompts (Arxiv 2022)</a></li>
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Shap-E.html">Shap-E</a></li>
+
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DreamFusion.html"><strong>DreamFusion</strong></a></li>
+<li class="toctree-l1"><a class="reference internal" href="magic-3d.html">Magic3D</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreamBooth3D.html">Dream Booth 3D</a></li>
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="zero123plus.html">Zero123++</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ProlificDreamer.html">ProlificDreamer: High-Fidelity and Diverse Text-to-3D Generation with Variational Score Distillation</a></li>
+
+
+
+
+
+
+<li class="toctree-l1 current active"><a class="current reference internal" href="#">DreamGaussian</a></li>
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Coin3D.html">Coin3D</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Experiments</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="../experiments/js_exp.html">Synthetic Data with Stable Diffusion for Foliar Disease Classification</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../experiments/swjo_exp.html">Training DreamBooth on Naver Webtoon Face Dataset</a></li>
+</ul>
+
+    </div>
+</nav></div>
+    </div>
+  
+  
+  <div class="sidebar-primary-items__end sidebar-primary__section">
+  </div>
+  
+  <div id="rtd-footer-container"></div>
+
+
+      </div>
+      
+      <main id="main-content" class="bd-main" role="main">
+        
+        
+
+<div class="sbt-scroll-pixel-helper"></div>
+
+          <div class="bd-content">
+            <div class="bd-article-container">
+              
+              <div class="bd-header-article d-print-none">
+<div class="header-article-items header-article__inner">
+  
+    <div class="header-article-items__start">
+      
+        <div class="header-article-item"><label class="sidebar-toggle primary-toggle btn btn-sm" for="__primary" title="Toggle primary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
+  <span class="fa-solid fa-bars"></span>
+</label></div>
+      
+    </div>
+  
+  
+    <div class="header-article-items__end">
+      
+        <div class="header-article-item">
+
+<div class="article-header-buttons">
+
+
+
+
+
+<div class="dropdown dropdown-source-buttons">
+  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Source repositories">
+    <i class="fab fa-github"></i>
+  </button>
+  <ul class="dropdown-menu">
+      
+      
+      
+      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion" target="_blank"
+   class="btn btn-sm btn-source-repository-button dropdown-item"
+   title="Source repository"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fab fa-github"></i>
+  </span>
+<span class="btn__text-container">Repository</span>
+</a>
+</li>
+      
+      
+      
+      
+      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion/issues/new?title=Issue%20on%20page%20%2Fdocs/review/DreamGaussian.html&body=Your%20issue%20content%20here." target="_blank"
+   class="btn btn-sm btn-source-issues-button dropdown-item"
+   title="Open an issue"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-lightbulb"></i>
+  </span>
+<span class="btn__text-container">Open issue</span>
+</a>
+</li>
+      
+  </ul>
+</div>
+
+
+
+
+
+
+<div class="dropdown dropdown-download-buttons">
+  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Download this page">
+    <i class="fas fa-download"></i>
+  </button>
+  <ul class="dropdown-menu">
+      
+      
+      
+      <li><a href="../../_sources/docs/review/DreamGaussian.md" target="_blank"
+   class="btn btn-sm btn-download-source-button dropdown-item"
+   title="Download source file"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-file"></i>
+  </span>
+<span class="btn__text-container">.md</span>
+</a>
+</li>
+      
+      
+      
+      
+      <li>
+<button onclick="window.print()"
+  class="btn btn-sm btn-download-pdf-button dropdown-item"
+  title="Print to PDF"
+  data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-file-pdf"></i>
+  </span>
+<span class="btn__text-container">.pdf</span>
+</button>
+</li>
+      
+  </ul>
+</div>
+
+
+
+
+<button onclick="toggleFullScreen()"
+  class="btn btn-sm btn-fullscreen-button"
+  title="Fullscreen mode"
+  data-bs-placement="bottom" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-expand"></i>
+  </span>
+
+</button>
+
+
+
+<script>
+document.write(`
+  <button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light"></i>
+    <i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark"></i>
+    <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"></i>
+  </button>
+`);
+</script>
+
+
+<script>
+document.write(`
+  <button class="btn btn-sm pst-navbar-icon search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <i class="fa-solid fa-magnifying-glass fa-lg"></i>
+  </button>
+`);
+</script>
+<label class="sidebar-toggle secondary-toggle btn btn-sm" for="__secondary"title="Toggle secondary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <span class="fa-solid fa-list"></span>
+</label>
+</div></div>
+      
+    </div>
+  
+</div>
+</div>
+              
+              
+
+<div id="jb-print-docs-body" class="onlyprint">
+    <h1>DreamGaussian</h1>
+    <!-- Table of contents -->
+    <div id="print-main-content">
+        <div id="jb-print-toc">
+            
+            <div>
+                <h2> Contents </h2>
+            </div>
+            <nav aria-label="Page">
+                <ul class="visible nav section-nav flex-column">
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#">DreamGaussian</a></li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#abstract">1. Abstract</a></li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#related-work">2. Related Work</a><ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#d-representations">2.1 3D representations</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#text-to-3d-generation">2.2 Text-to-3D Generation</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#image-to-3d-generation">2.3 Image-to-3D Generation</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#id1">2.4 추가 참고자료</a></li>
+</ul>
+</li>
+</ul>
+</li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#method">3. Method</a><ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#generative-gaussian-splatting">3.1 Generative Gaussian Splatting</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#image-to-3d">Image-to-3D</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#text-to-3d">Text-to-3D</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#discussion">Discussion</a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#efficient-mesh-extraction">3.2 Efficient Mesh Extraction</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#local-density-query">Local Density Query</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#color-back-projection">Color Back-projection</a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#uv-space-texture-refinement">3.3 UV-space Texture Refinement</a></li>
+</ul>
+</li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#experiments">4. Experiments</a><ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#implementation-details">4.1 Implementation Details</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#qualitative-comparison">4.2 Qualitative Comparison</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#quantitative-comparison">4.3 Quantitative Comparison</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#ablation-study">4.4 Ablation Study</a></li>
+</ul>
+</li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#limitations-and-conclusion">5. Limitations and Conclusion</a></li>
+</ul>
+
+            </nav>
+        </div>
+    </div>
+</div>
+
+              
+                
+<div id="searchbox"></div>
+                <article class="bd-article">
+                  
+  <div class="admonition-information admonition">
+<p class="admonition-title">Information</p>
+<ul class="simple">
+<li><p><strong>Title:</strong> DreamGaussian: Generative Gaussian Splatting for Efficient 3D Content Creation (ICLR 2024)</p></li>
+<li><p><strong>Reference</strong></p>
+<ul>
+<li><p>Paper: <a class="reference external" href="https://arxiv.org/abs/2309.16653">https://arxiv.org/abs/2309.16653</a></p></li>
+<li><p>Code: <a class="github reference external" href="https://github.com/dreamgaussian/dreamgaussian/tree/main">dreamgaussian/dreamgaussian</a></p></li>
+<li><p>Project Page: <a class="reference external" href="https://dreamgaussian.github.io/">https://dreamgaussian.github.io/</a></p></li>
+</ul>
+</li>
+<li><p><strong>Author:</strong> Kyeongmin Yu</p></li>
+<li><p><strong>Last updated on Dec. 26, 2024</strong></p></li>
+</ul>
+</div>
+<section class="tex2jax_ignore mathjax_ignore" id="dreamgaussian">
+<h1>DreamGaussian<a class="headerlink" href="#dreamgaussian" title="Permalink to this heading">#</a></h1>
+</section>
+<section class="tex2jax_ignore mathjax_ignore" id="abstract">
+<h1>1. Abstract<a class="headerlink" href="#abstract" title="Permalink to this heading">#</a></h1>
+<aside>
+<ul class="simple">
+<li><p>A novel 3D content generation framework that achieves both efficiency and quality simultaneously.</p></li>
+<li><p>3D gaussian을 3D generation task에 적용하고 3D gaussians 로 부터 textured mesh 를 추출하는 효율적인 알고리즘 제안.</p></li>
+<li><p>DreamGaussian produces high-quality textured meshes in just 2 min from a single-view image, achieving approximately 10 times acceleration compared to existing methods.</p></li>
+</ul>
+</aside>
+</section>
+<section class="tex2jax_ignore mathjax_ignore" id="related-work">
+<h1>2. Related Work<a class="headerlink" href="#related-work" title="Permalink to this heading">#</a></h1>
+<section id="d-representations">
+<h2>2.1 3D representations<a class="headerlink" href="#d-representations" title="Permalink to this heading">#</a></h2>
+<details>
+<summary> <strong> Neural Radiance Fields (NeRF) </strong></summary>
+<table>
+<tr>
+<th align="center">
+<strong>original NeRF</strong>
+</th>
+<th align="center">
+NeRF와 <strong>Mip-NeRF</strong>의 차이점
+</th>
+<th align="center">
+<strong>Mip-NeRF 360</strong>의 contract($\cdot$) function
+</th>
+</tr>
+<tr>
+<td>
+<img src="/book/pics/DreamGaussian/image_1.png" alt="original nerf" width="400px">
+</td>
+<td>
+<img src="../../pics/DreamGaussian/image_1.png" alt="mip-nerf" width="400px">
+</td>
+<td>
+<img src="../../pics/DreamGaussian/image_2.png" alt="mip-nerf" width="400px">
+</td>
+</tr>
+<tr>
+<td>
+신경망(MLP)을 이용해 3d 물체에 대한 정보를 표현했다.
+신경망은 카메라의 위치와 방향을 입력으로 해당 지점의 density와 color를 리턴한다. 온전한 하나의 이미지를 렌더링 하기 위해서는 모든 camera ray를 따라 (이론상)적분이 필요하다.
+</td>
+<td >
+ray tracing이 아닌 cone tracing 방식으로, 렌더링 된 이미지의 품질을 향상시킴.
+</td>
+<td >
+파란 영역은 euclidean space, 노란 영역은 맵핑된 영역이다.
+이외에도 기존 방식(coarse-to-fine)과 다르게 n개의 신경망을 사용했으며 초기 신경망을 최종 결과물 출력시 사용하지 않았다.
+</td>
+</tr>
+</table>
+<table>
+<tr>
+<th align="center">
+<strong>Instant NGP</strong>
+</th>
+<th align="center">
+<strong>Block-NeRF</strong>
+</th>
+<th align="center">
+<strong>NeRF in the wild</strong>
+</th>
+</tr>
+<tr>
+<td>
+<img src="../../pics/DreamGaussian/image_3.png" alt="instant-ngp" width="400px">
+</td>
+<td>
+<img src="../../pics/DreamGaussian/image_4.png" alt="block-nerf" width="400px">
+</td>
+<td>
+<img src="../../pics/DreamGaussian/image_5.png" alt="nerfinthewild" width="400px">
+</td>
+</tr>
+<tr>
+<td>
+voxel기반의 multiresolution hash encoding을 통해 기존 sin/cos을 이용한 positional encoding 방식을 발전시킴. 이러한 encoding 방식을 채택함으로써 encoding 자체의 속도도 빨라졌으며 다중 스케일 정보를 담은 encoding을 이용함으로써 original NeRF의 신경망보다 오히려 작은 구조를 채택할 수 있었다. 또한 cuda를 활용하여 처리속도를 향상시킴으로써 real-time에 가까운 속도를 낼 수 있었다.
+</td>
+<td >
+대규모 장면을 모델링하기 위해 여러 NeRF를 합쳐서 하나의 큰 장면을 구성하는 방법
+</td>
+<td >
+in the wild dataset에서 scene 재구성을 위해 scene을 
+"static 요소"와 "transient 요소"로 분리하여 모델링
+</td>
+</tr>
+</table>
+</details>
+<details>
+<summary> <strong>3D Gaussian Splatting (3DGS)</strong></summary>
+<table>
+<tr>
+<th align="center">
+<strong>point cloud 예시</strong>
+</th>
+<th align="center">
+<strong>Gaussian densification 과정</strong>
+</th>
+</tr>
+<tr>
+<td>
+<img src="../../pics/DreamGaussian/image_6.png" alt="point cloud" class="mb-1" width="400px">
+</td>
+<td>
+<img src="../../pics/DreamGaussian/image_7.png" alt="gaussian densification" class="mb-1" width="400px">
+</td>
+</tr>
+</table>
+<figure class="align-default" id="id2">
+<a class="mb-1 reference internal image-reference" href="../../_images/image_8.png"><img alt="GS optimization" class="mb-1" src="../../_images/image_8.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 776 </span><span class="caption-text">Gaussian Splatting optimization
+colmap과 같은 SfM 을 이용해 3D keypoint를 찾아 해당 지점으로 3D gaussian을 초기화 한다. 이후 gaussian들을 합치거나 쪼개며 최적화를 진행한다. 렌더링 시에는 3D gaussian들을 2D projection하여 하나의 이미지를 만든다.</span><a class="headerlink" href="#id2" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<hr class="docutils" />
+<ul>
+<li><p><strong>3D Gaussian representation [<a class="reference external" href="https://towardsdatascience.com/a-comprehensive-overview-of-gaussian-splatting-e7d570081362#4cd8">참고1</a>][<a class="reference external" href="https://patapom.com/blog/SHPortal/">참고2</a>]</strong></p>
+<p>각각의 3D Gaussians은 3D position(mean) <span class="math notranslate nohighlight">\(\mu\)</span>, opacity <span class="math notranslate nohighlight">\(\alpha\)</span>, anisotropic covariance <span class="math notranslate nohighlight">\(\Sigma\)</span>로 위치와 크기, 방향을 표현하며, 여기에 spherical harmonic (SH) coefficients를 추가해 view-dependent appearance(color)를 표현했다.</p>
+<ul class="simple">
+<li><p>covariance matrix <span class="math notranslate nohighlight">\(\Sigma\)</span> 를 rotation matrix <span class="math notranslate nohighlight">\(\mathbf R\)</span> 와 scaling matrix <span class="math notranslate nohighlight">\(\mathbf S\)</span>로 분리가능.</p>
+<ul>
+<li><p>rotation matrix는 (real <span class="math notranslate nohighlight">\(r\)</span>+ imaginery <span class="math notranslate nohighlight">\(i,j,k\)</span>) 4차원의 quaternion으로  표현가능.</p></li>
+<li><p>scaling matrix는 (<span class="math notranslate nohighlight">\(x,y,z\)</span>) 3차원의 scale로 표현가능.</p></li>
+</ul>
+</li>
+<li><p>SH 계수의 경우 일반적으로 각 색상 채널(R, G, B)당 9개의 계수, 총 27개의 계수를 사용합니다.</p>
+<ul>
+<li><p>9개의 계수를 사용한다는 말은 <span class="math notranslate nohighlight">\(l\in[0,1,2]\)</span> 라는 뜻이며, <span class="math notranslate nohighlight">\(m \in [-l,+l]\)</span> 이다.</p></li>
+<li><p>SH를 이용하면 빛이 균일하게 퍼지는 표면(매트한 표면)외에도 입사각(보는 방향)에 따라 달라지는 Non-Lambertian effects도 표현할 수 있다.</p></li>
+</ul>
+</li>
+</ul>
+<p><strong>Spherical Harmonics</strong> <span class="math notranslate nohighlight">\(Y_l^m(\theta,\phi)\)</span> <strong>를 이용한 lighting</strong> [<a class="reference external" href="https://3dvar.com/Green2003Spherical.pdf">paper</a>]</p>
+<p>Spherical Harmonics는 번역하면 구면조화함수로 구의 표면에서 정의되는 함수를 말한다. 구면좌표계 <span class="math notranslate nohighlight">\((r,\theta,\phi)\)</span> 에서 <span class="math notranslate nohighlight">\(r\)</span> 을 고정하고 <span class="math notranslate nohighlight">\(\theta, \phi\)</span>에 따라 값을 출력하는 함수이다.</p>
+<figure class="align-default" id="id3">
+<a class="mb-1 reference internal image-reference" href="../../_images/image_9.png"><img alt="image9" class="mb-1" src="../../_images/image_9.png" style="width: 500px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 777 </span><span class="caption-text">수학적으로는 라플라스 미분방정식의 해</span><a class="headerlink" href="#id3" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<figure class="align-default" id="id4">
+<a class="mb-1 reference internal image-reference" href="../../_images/image_10.png"><img alt="image10" class="mb-1" src="../../_images/image_10.png" style="width: 500px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 778 </span><span class="caption-text">Spherical Harmonics 시각화 [<a class="reference external" href="https://en.wikipedia.org/wiki/Table_of_spherical_harmonics">eq</a>]</span><a class="headerlink" href="#id4" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<figure class="align-default" id="id5">
+<a class="mb-1 reference internal image-reference" href="../../_images/image_11.png"><img alt="image11" class="mb-1" src="../../_images/image_11.png" style="width: 500px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 779 </span><span class="caption-text">Spherical Harmonics 시각화</span><a class="headerlink" href="#id5" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+  <img src="../../pics/DreamGaussian/Rotating_spherical_harmonics.gif" width="40" height="40"/>
+<ul>
+<li><p>cf. fourier series</p>
+<figure class="align-default" id="id6">
+<a class="mb-1 reference internal image-reference" href="../../_images/image_12.png"><img alt="image12" class="mb-1" src="../../_images/image_12.png" style="width: 500px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 780 </span><span class="caption-text">삼각함수들을 이용해 임의의 주기함수를 근사하는 푸리에 변환의 3D 확장판</span><a class="headerlink" href="#id6" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<figure class="align-default" id="id7">
+<a class="mb-1 reference internal image-reference" href="../../_images/image_13.png"><img alt="image13" class="mb-1" src="../../_images/image_13.png" style="width: 500px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 781 </span><span class="caption-text">SH를 이용한 근사 예시. SH는 구 표면에서의 분포의 basis에 해당.</span><a class="headerlink" href="#id7" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</li>
+</ul>
+<p><strong>Lambertian vs. Non-lambertian</strong></p>
+<figure class="align-default" id="id8">
+<a class="mb-1 reference internal image-reference" href="../../_images/image_14.png"><img alt="image14" class="mb-1" src="../../_images/image_14.png" style="width: 500px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 782 </span><span class="caption-text">Diffuse reflection (~ Lambertian effects)</span><a class="headerlink" href="#id8" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>Lambertian reflection은 어떤 각도에서 보든 같은 양 빛을 관찰 할 수 있는 이상적인 상태</p>
+<figure class="align-default" id="id9">
+<a class="mb-1 reference internal image-reference" href="../../_images/image_15.png"><img alt="image15" class="mb-1" src="../../_images/image_15.png" style="width: 500px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 783 </span><span class="caption-text">Non-lambertian effects -&gt; 즉, 반사광 표현가능</span><a class="headerlink" href="#id9" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</li>
+</ul>
+<hr class="docutils" />
+<ul>
+<li><p><strong>Pruning and Densification</strong></p>
+<figure class="align-default" id="id10">
+<a class="mb-1 reference internal image-reference" href="../../_images/image_16.png"><img alt="image16" class="mb-1" src="../../_images/image_16.png" style="width: 500px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 784 </span><span class="caption-text">3D Gaussian Splatting
+initialization, optimization, adaptive control of gaussians</span><a class="headerlink" href="#id10" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</li>
+</ul>
+<hr class="docutils" />
+<ul>
+<li><p><strong>Volumetric Rendering</strong></p>
+<figure class="align-default" id="id11">
+<a class="mb-1 reference internal image-reference" href="../../_images/image_17.png"><img alt="image17" class="mb-1" src="../../_images/image_17.png" style="width: 500px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 785 </span><span class="caption-text">3D Gaussian Splatting Volumetric Rendering</span><a class="headerlink" href="#id11" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<figure class="align-default" id="id12">
+<a class="mb-1 reference internal image-reference" href="../../_images/image_18.png"><img alt="image18" class="mb-1" src="../../_images/image_18.png" style="width: 500px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 786 </span><span class="caption-text">from nerfstudio</span><a class="headerlink" href="#id12" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p>frustrum culling을 통해 보이지 않는 3D gaussian들을 제외하고 2D로 projection.</p></li>
+<li><p>3D gaussian들을 sorting하고 각 픽셀의 ray에 겹치는 3D gaussian들만 color, opacity 값을 반영한다.</p></li>
+</ul>
+<figure class="align-default" id="id13">
+<a class="mb-1 reference internal image-reference" href="../../_images/image_19.png"><img alt="image19" class="mb-1" src="../../_images/image_19.png" style="width: 500px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 787 </span><span class="caption-text">original NeRF vs 3d Gaussian Splatting rendering</span><a class="headerlink" href="#id13" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</li>
+</ul>
+<hr class="docutils" />
+</details> 
+</section>
+<section id="text-to-3d-generation">
+<h2>2.2 Text-to-3D Generation<a class="headerlink" href="#text-to-3d-generation" title="Permalink to this heading">#</a></h2>
+<ul class="simple">
+<li><p>data-driven 3D diffusion models</p></li>
+<li><p>3D native diffusion models</p></li>
+<li><p>lift 2D image models for 3D generation</p></li>
+</ul>
+</section>
+<section id="image-to-3d-generation">
+<h2>2.3 Image-to-3D Generation<a class="headerlink" href="#image-to-3d-generation" title="Permalink to this heading">#</a></h2>
+<ul class="simple">
+<li><p>generating 3D assets form a reference image(~ single-view 3D reconstruction)</p></li>
+<li><p>text-to-3D methods can also be image-to-3D methods</p></li>
+<li><p>Zero-1-to-3</p></li>
+<li><p>One-2-3-45</p></li>
+</ul>
+<section id="id1">
+<h3>2.4 추가 참고자료<a class="headerlink" href="#id1" title="Permalink to this heading">#</a></h3>
+<details>
+<summary> <strong>DreamFusion</strong></summary>
+<ul>
+<li><p>Score Distillation Sampling (SDS) [<a class="reference external" href="https://arxiv.org/abs/2209.14988">arXiv</a>]</p>
+<ul class="simple">
+<li><p>pretrained 2D diffusion model을 parametric image generator로서 사용하는 방식을 제안함. image generator로서 NeRF를 사용하여 differentiable 3D representation을 가능하도록 함.</p></li>
+</ul>
+<div class="math notranslate nohighlight">
+\[
+    \mathbf x=g_\Theta(p)
+    \]</div>
+<ul class="simple">
+<li><p><span class="math notranslate nohighlight">\(\mathbf x\)</span> 는 카메라 포즈 <span class="math notranslate nohighlight">\(p\)</span>에서 렌더링된 이미지를 의미한다. <span class="math notranslate nohighlight">\(g_\Theta(\cdot)\)</span>는 differentiable rendering function으로 NeRF parameters <span class="math notranslate nohighlight">\(\Theta\)</span>를 paramter로 갖는다.</p></li>
+</ul>
+<div class="math notranslate nohighlight">
+\[
+    \triangledown_\Theta\mathcal L_\text{SDS}=\Bbb E_{t,p,\epsilon}\Big[w(t)(\epsilon_\phi(\mathbf x;t,e)-\epsilon)\frac{\partial\mathbf x}{\partial\Theta} \Big]
+    \]</div>
+<ul class="simple">
+<li><p><span class="math notranslate nohighlight">\(w(t)=\sigma_t^2\)</span> 는 DDPM의 weighting function, <span class="math notranslate nohighlight">\(\epsilon_\phi(\cdot)\)</span>은 pretrained parameter <span class="math notranslate nohighlight">\(\phi\)</span>로  noise를 예측하는 함수이다.</p></li>
+</ul>
+</li>
+<li><p>즉, SDS formulation은 사전학습된 2D 이미지 생성 디퓨전 모델을 이용해 NeRF parameter <span class="math notranslate nohighlight">\(\Theta\)</span>를 최적화하는 식이다. 이를 통해 text description에 맞는 NeRF 3D shape을 최적화 할 수 있는 것이다.</p></li>
+</ul>
+</details>
+<details>
+<summary> <strong>Marching Cube</strong></summary>
+- 3d 모델에서 표면(mesh)을 추출하기 위한 알고리즘. 여기서 표면은 밀도가 특정 값을 넘는 지점을 의미한다. 
+<ul class="simple">
+<li><p>주어진 3D 공간을 작은 <strong>큐브</strong>(cube/voxel)로 나누고, 각 큐브의 <strong>8개 코너</strong>에서 값(일반적으로 밀도값)을 보고 그 값을 바탕으로 표면을 추출한다.</p></li>
+<li><p>8개의 코너의 밀도값에 따라 어떤 표면을 가지게 되는지는 미리 정해둔 정보(<span class="math notranslate nohighlight">\(2^8\)</span>)를 이용한다.</p></li>
+</ul>
+<ol class="arabic">
+<li><p>3D 공간을 작은 큐브로 분할</p></li>
+<li><p>임계값을 기준으로 판단</p>
+<ul class="simple">
+<li><p>해당 지점이 물체 내부에 속하는지 외부에 속하는지 판단함</p></li>
+</ul>
+</li>
+<li><p>표면 생성</p>
+<ul class="simple">
+<li><p>각 큐브의 꼭짓점 값에 따라 표면이 어떻게 생길지에 대한 규칙을 미리 정의해두고,
+이를 바탕으로 표면을 추출</p></li>
+</ul>
+<figure class="align-default" id="id14">
+<a class="mb-1 reference internal image-reference" href="../../_images/image_21.png"><img alt="marching cube" class="mb-1" src="../../_images/image_21.png" style="width: 500px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 788 </span><span class="caption-text">from wikipedia</span><a class="headerlink" href="#id14" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p>오렌지 점들은 물체의 표면 혹은 가장자리에 위치한 점들을 의미함.</p></li>
+</ul>
+</li>
+</ol>
+</details>        
+<details>
+<summary> <strong>NVDiffrast</strong></summary>
+<p>nvidia 2020 ACMTOG [<a class="reference external" href="https://arxiv.org/abs/2011.03277">arXiv</a>][<a class="reference external" href="https://github.com/NVlabs/nvdiffrast?tab=readme-ov-file">github</a>]</p>
+<ul>
+<li><p>미분가능한 렌더링 방법론으로 cuda를 이용해 가속화한 것이 특징.</p>
+<ul class="simple">
+<li><p>주로 삼각형 메시를 효율적으로 렌더링하고 그래디언트를 계산하기 위해 사용됨.</p></li>
+</ul>
+</li>
+<li><p>FLAME, 3DMM, SMPL등은 미분가능한 3D 모델으로 렌더링 방법론은 아님.</p>
+<ul class="simple">
+<li><p>3DMM (3D Morphable Models) 1999</p></li>
+</ul>
+<figure class="align-default" id="id15">
+<a class="mb-1 reference internal image-reference" href="../../_images/image_22.png"><img alt="3DMM" class="mb-1" src="../../_images/image_22.png" style="width: 500px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 789 </span><span class="caption-text">3DMM</span><a class="headerlink" href="#id15" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p>SMPL (Skinned Multi-Person Linear Model) 2015</p></li>
+</ul>
+<figure class="align-default" id="id16">
+<a class="mb-1 reference internal image-reference" href="../../_images/image_23.png"><img alt="SMPL" class="mb-1" src="../../_images/image_23.png" style="width: 500px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 790 </span><span class="caption-text">SMPL</span><a class="headerlink" href="#id16" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p>FLAME (Faces Learned with an Articulated Model and Expressions) 2017</p></li>
+</ul>
+<figure class="align-default" id="id17">
+<a class="mb-1 reference internal image-reference" href="../../_images/image_24.png"><img alt="FLAME" class="mb-1" src="../../_images/image_24.png" style="width: 500px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 791 </span><span class="caption-text">FLAME</span><a class="headerlink" href="#id17" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</li>
+</ul>
+</details>       
+<details>
+<summary> <strong>Zero-1-to-3</strong></summary>
+<p>[<a class="reference external" href="https://zero123.cs.columbia.edu/">project page</a>] [<a class="reference external" href="https://www.notion.so/DDPM-NeurIPS-2020-05eb365e0ece43c0bc55ef21a8d4c6f0?pvs=21">DDPM (NeurIPS 2020)</a>]</p>
+<ul class="simple">
+<li><p><strong>Zero-1-to-3</strong> control the camera perspective in large-scale diffusion models,
+enabling zero-shot novel view synthesis and 3D reconstruction from a single image.</p></li>
+<li><p>RGB image <span class="math notranslate nohighlight">\(x\in\Bbb R^{H\times W\times 3}\)</span> , relative camera rotation <span class="math notranslate nohighlight">\(R\in \Bbb R^{3\times 3}\)</span>,relative camera translation <span class="math notranslate nohighlight">\(T\in\Bbb R^3\)</span></p></li>
+</ul>
+<figure class="align-default" id="id18">
+<a class="mb-1 reference internal image-reference" href="../../_images/image_24.png"><img alt="zero 1-to-3" class="mb-1" src="../../_images/image_24.png" style="width: 400px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 792 </span><span class="caption-text">zero 1-to-3</span><a class="headerlink" href="#id18" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<figure class="align-default" id="id19">
+<a class="mb-1 reference internal image-reference" href="../../_images/image_25.png"><img alt="zero 1-to-3" class="mb-1" src="../../_images/image_25.png" style="width: 400px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 793 </span><span class="caption-text">zero 1-to-3</span><a class="headerlink" href="#id19" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</details>
+</section>
+</section>
+</section>
+<section class="tex2jax_ignore mathjax_ignore" id="method">
+<h1>3. Method<a class="headerlink" href="#method" title="Permalink to this heading">#</a></h1>
+<figure class="align-default" id="id20">
+<a class="mb-1 reference internal image-reference" href="../../_images/image_26.png"><img alt="method overview" class="mb-1" src="../../_images/image_26.png" style="width: 500px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 794 </span><span class="caption-text">Method Overview</span><a class="headerlink" href="#id20" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p>2-stage framework for 3D content generation
+for both Image-to-3D and Text-to-3D tasks.</p>
+<ul>
+<li><p>SDS를 이용해 초기화 한 3D gaussian splatting을 이용하여 3D generation</p></li>
+<li><p>3D gaussians에서 textured mesh 추출</p></li>
+<li><p>UV-space refinement를 통해 texture fine-tuning</p></li>
+</ul>
+</li>
+</ul>
+<section id="generative-gaussian-splatting">
+<h2>3.1 Generative Gaussian Splatting<a class="headerlink" href="#generative-gaussian-splatting" title="Permalink to this heading">#</a></h2>
+<p>개별 3D gaussian의 위치와 형태는 center(<span class="math notranslate nohighlight">\(\mathbf x\)</span>), scaling factor(<span class="math notranslate nohighlight">\(\mathbf x\)</span>), rotation quaternion(<span class="math notranslate nohighlight">\(\mathbf q\)</span>)으로 표현되며, opacity value(<span class="math notranslate nohighlight">\(\alpha\)</span>), color feature(<span class="math notranslate nohighlight">\(\mathbf c\)</span>)를 저장하여 volumetric rendering시 사용한다.</p>
+<p><span class="math notranslate nohighlight">\(\Theta_i=\{\mathbf x_i, \mathbf s_i,\mathbf q_i, \alpha_i, \mathbf c_i\}\)</span>,  <span class="math notranslate nohighlight">\(\mathbf x \in \Bbb R^3, \mathbf s\in \Bbb R^3, \mathbf q \in \Bbb R^4, \alpha\in \Bbb R, \mathbf c \in \Bbb R^3\)</span>
+original gaussian splatting에서는 spherical harmonics 계수를 이용하여 색을 표현하지만
+simple diffuse color를 모델링 하기 위해 간략화 → 재질 표현이 어려울 수 있다.</p>
+<p>3D Gaussians은 random position, unit scaling, no rotation으로 initialization 한 후, SDS를 이용해 최적화 한다.</p>
+<section id="image-to-3d">
+<h3>Image-to-3D<a class="headerlink" href="#image-to-3d" title="Permalink to this heading">#</a></h3>
+<ul>
+<li><p>사전학습된 Zero-1-to-3 XL을 사용했으며 image <span class="math notranslate nohighlight">\(\tilde I^r_\text{RGB}\)</span> 와 foreground mask <span class="math notranslate nohighlight">\(\tilde I^r_A\)</span> 를 입력으로 사용한다.</p>
+<div class="math notranslate nohighlight">
+\[
+    \triangledown_\Theta\mathcal L_\text{SDS}=\Bbb E_{t,p,\epsilon} \Big[w(t)(\epsilon_\phi(I^p_\text{RGB};t,\tilde I^r_\text{RGB},\triangle p)-\epsilon)\frac{\partial I^p_\text{RGB}}{\partial\Theta} \Big] \tag 1
+    \]</div>
+<p><span class="math notranslate nohighlight">\(w(t)\)</span>는 weighting function이고, <span class="math notranslate nohighlight">\(\epsilon_\phi(\cdot)\)</span> 는 사전학습된 <span class="math notranslate nohighlight">\(\phi\)</span>를 이용해 예측된 noise를 뜻한다.
+<span class="math notranslate nohighlight">\(\triangle p\)</span> 는 relative camera pose, <span class="math notranslate nohighlight">\(r\)</span>은 reference camera이다.</p>
+</li>
+<li><p>추가적으로 reference view와 transparency 를 input에 align했다.</p>
+<div class="math notranslate nohighlight">
+\[
+    \mathcal L_\text{Ref}=\lambda_\text{RGB}\|I^r_\text{RGB}-\tilde I_\text{RGB}^r \|^2_2 + \lambda_A\|I^r_A-\tilde I^r_A\|^2_2 \tag 2
+    \]</div>
+</li>
+</ul>
+</section>
+<section id="text-to-3d">
+<h3>Text-to-3D<a class="headerlink" href="#text-to-3d" title="Permalink to this heading">#</a></h3>
+<p>Stable diffusion을 활용하여 text-to-3D task를 수행했다.</p>
+<div class="math notranslate nohighlight">
+\[
+\triangledown_\Theta\mathcal L_\text{SDS}=\Bbb E_{t,p,\epsilon}\Big [ w(t)(\epsilon_\phi(I^p_\text{RGB};t,e)-\epsilon)\frac{\partial I^p_\text{RGB}}{\partial \Theta} \Big] \tag 3
+\]</div>
+<p><span class="math notranslate nohighlight">\(e\)</span>는 주어진 text prompt의 CLIP embedding을 의미한다.</p>
+</section>
+<section id="discussion">
+<h3>Discussion<a class="headerlink" href="#discussion" title="Permalink to this heading">#</a></h3>
+<p>하지만 논문의 저자들은 SDS loss의 ambiguity 때문에 길게 학습하더라도 생성된 3D gaussians이  blurry하고 디테일이 부족하다고 한다. 이를 개선하기 위해 다음 단계인 mesh extraction과 texture refinement를 수행한다.</p>
+</section>
+</section>
+<section id="efficient-mesh-extraction">
+<h2>3.2 Efficient Mesh Extraction<a class="headerlink" href="#efficient-mesh-extraction" title="Permalink to this heading">#</a></h2>
+<p>block-wise local density query와 back-projected color를 이용해 textured mesh를 추출하는 효과적인 알고리즘을 제안한다.</p>
+<section id="local-density-query">
+<h3>Local Density Query<a class="headerlink" href="#local-density-query" title="Permalink to this heading">#</a></h3>
+<p>marching cube algorithm을 적용하기 위해서는 local density grid가 필요하다. gaussian splatting 알고리즘의 주요 특징은 over-sized Gaussian 들은 최적화 과정에서 split 및 pruning된다는 점이다. 이는 효과적인 rasterization을 위해 culling technique을 적용 할 수 있는 근거가 된다. 또한 이 점은 block-wise density queries를 perform 할 때도 사용할 수 있다.</p>
+<p>먼저 3D space를 <span class="math notranslate nohighlight">\((-1,1)^3\)</span> 으로 맵핑한다. (그냥 최대, 최소값을 이용해 정규화)  그리고 이 공간을 <span class="math notranslate nohighlight">\(16^3\)</span>의 overlapping blocks(multiscale voxels)으로 나눈다. 그리고 각 블록의 외부에 위치한 gaussian들은 제외한다. 이를 통해 계산해야 할 gaussian의 총 개수를 효과적으로 줄일 수 있다. 그리고 각 블록의 내부에 <span class="math notranslate nohighlight">\(8^3\)</span> dense grid를 만들어 최종적으로는 <span class="math notranslate nohighlight">\(128^3\)</span>의 dense grid를 만든다. grid position <span class="math notranslate nohighlight">\(\mathbf x\)</span>의 각 query는 남아있는 3D gaussian들의 opacity의 weighted sum으로 local density grid를 얻는다.</p>
+<div class="math notranslate nohighlight">
+\[
+d(\mathbf x)=\sum_i\alpha_i\text{exp}(-\frac{1}{2}(\mathbf x-\mathbf x_i)^T\Sigma_i^{-1}(\mathbf x-\mathbf x_i)) \tag{4}
+\]</div>
+<p><span class="math notranslate nohighlight">\(\Sigma\)</span>는 covariance matrix로 scaling <span class="math notranslate nohighlight">\(\mathbf s\)</span>, rotation <span class="math notranslate nohighlight">\(\mathbf q\)</span>로 이루어져 있다. 이후에는 empirical threshold를 marching cube 알고리즘에 적용하여 mesh surface를 추출한다. <a class="reference external" href="https://www.meshlab.net">decimation과 remeshing</a>을 이용해 후처리하여 더욱 자연스럽고(smoother), 간결한(compact) mesh를 만들었다.</p>
+</section>
+<section id="color-back-projection">
+<h3>Color Back-projection<a class="headerlink" href="#color-back-projection" title="Permalink to this heading">#</a></h3>
+<p>앞선 단계에서 mesh를 얻었기 때문에 rendered RGB 이미지를 mesh surface로 back-project하여 texture map으로 만들 수 있다.</p>
+<details>
+<summary>UV mapping from wikipedia</summary>
+<figure class="align-default" id="id21">
+<a class="mb-1 reference internal image-reference" href="../../_images/image_27.png"><img alt="uv mapping" class="mb-1" src="../../_images/image_27.png" style="width: 400px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 795 </span><span class="caption-text">uv mapping</span><a class="headerlink" href="#id21" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>먼저 mesh의 UV coordinate를 unwrap하고 빈 texture image로 초기화 한다. 그리고 8개의 azimuth, 3개의 elevation을 균일하게 선택하고 top, bottom view까지 포함하여 corresponding RGB image를 렌더링 할 수 있게 한다. 이러한 RGB 이미지들의 각 픽셀은 UV coordinate를 기반으로 texture image로 맵핑할 수 있다.</p>
+<p>이렇게 back-project된 texture image는 다음의 texture fine-tuning 단계의 초기 설정으로 사용된다.</p>
+</details>
+</section>
+</section>
+<section id="uv-space-texture-refinement">
+<h2>3.3 UV-space Texture Refinement<a class="headerlink" href="#uv-space-texture-refinement" title="Permalink to this heading">#</a></h2>
+<figure class="align-default" id="id22">
+<a class="mb-1 reference internal image-reference" href="../../_images/image_28.png"><img alt="uv-space texture refinement" class="mb-1" src="../../_images/image_28.png" style="width: 400px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 796 </span><span class="caption-text">UV-space Texture Refinement</span><a class="headerlink" href="#id22" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>앞선 back-projection된 coarse texture를 시작으로 texture 품질을 올리고자 했으나, SDS loss를 이용해
+UV-space를 직접 fine-tuning 하면 위의 그림과 같은 artifact가 발생하게된다. 이는 differentiable rasterization시 사용되는 mipmap texture sampling 기법때문이다. SDS와 같이 모호한 guidance를 이용하면 각 mipmap level에 따라 over-saturation된 color block으로 gradient가 전파 된다.</p>
+<ul>
+<li><p>mipmap in rasterization</p>
+<figure class="align-default" id="id23">
+<a class="mb-1 reference internal image-reference" href="../../_images/image_29.png"><img alt="rasterization" class="mb-1" src="../../_images/image_29.png" style="width: 400px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 797 </span><span class="caption-text">from widipedia</span><a class="headerlink" href="#id23" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<figure class="align-default" id="id24">
+<a class="mb-1 reference internal image-reference" href="../../_images/image_30.png"><img alt="rasterization" class="mb-1" src="../../_images/image_30.png" style="width: 400px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 798 </span><span class="caption-text">from unity document</span><a class="headerlink" href="#id24" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>위와 같은 고품질 렌더링와 렌더링 속도 향상을 위해 mipmap이라는 기법을 활용한다. 본 논문에서 texture mapping, rendering시 사용한 NVdiffrast도 mipmap을 활용하고 있다.
+mipmap은 texture를 여러 레벨의 화질으로 저장하는 방식으로 mipmap level은 특정 화질의 texture version을 의미한다. 카메라로 부터 멀리 떨어진 object는 저레벨의 mipmap을 사용해 렌더링 한다. 저레벨의 mipmap은 이미 정보손실이 일어난 상태이고 이를 이용해 렌더링 되었다면 gradient가 흐르는 방향이 왜곡 될수 있다.</p>
+</li>
+</ul>
+<hr class="docutils" />
+<div class="math notranslate nohighlight">
+\[
+I^p_\text{fine}=f_\phi(I^p_\text{coarse}+\epsilon(t_\text{start});t_\text{start},c) \tag 5
+\]</div>
+<p>stage 2 의 학습은 image-to-image synthesis와 같은 방식으로 진행된다. initialization texture가 있으므로 임의의 camera view <span class="math notranslate nohighlight">\(p\)</span> <span class="math notranslate nohighlight">\(p\)</span></p>
+<figure class="align-default" id="id25">
+<a class="mb-1 reference internal image-reference" href="../../_images/image_31.png"><img alt="from SDEdit" class="mb-1" src="../../_images/image_31.png" style="width: 400px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 799 </span><span class="caption-text">from SDEdit</span><a class="headerlink" href="#id25" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p><span class="math notranslate nohighlight">\(f_\phi(\cdot)\)</span> 는 사전학습된 2D diffusion 을 통해 refined image를 얻는 multi-step denoising process를 의미한다. <span class="math notranslate nohighlight">\(\epsilon(t_\text{start})\)</span> 는 timestep <span class="math notranslate nohighlight">\(t_\text{start}\)</span>의 랜덤 노이즈를 말한다. <span class="math notranslate nohighlight">\(c\)</span> 는 image-to-3D를 위한 condition인 카메라 포즈 변화량 <span class="math notranslate nohighlight">\(\Delta p\)</span> 이고, <span class="math notranslate nohighlight">\(e\)</span>는 text-to-3D의 condition을 말한다. 시작 timestep <span class="math notranslate nohighlight">\(t_\text{start}\)</span>는 noise 강도를 제한하기 위해 신중히 선택되어야 refined image의 original content를 유지하며 detail을 향상 시킬수 있다고 한다. refined image는 이후 pixel-wise MSE loss에 texture 최적화를 위해 사용된다.</p>
+<div class="math notranslate nohighlight">
+\[
+\mathcal L_\text{MSE}=\|I^p_\text{fine}-I^p_\text{coarse}\|^2_2 \tag 6
+\]</div>
+<p>image-to-3D task에서는 reference view RGBA loss <span class="math notranslate nohighlight">\(\mathcal L_\text{Ref}\)</span> 를 적용했다.</p>
+<p>실험 결과에 따르면 50 step 정도 만에 대부분 detail이 좋아졌다고 하며 반복횟수를 늘릴수록 texture의 detail이 향상되었다고 합니다.</p>
+</section>
+</section>
+<section class="tex2jax_ignore mathjax_ignore" id="experiments">
+<h1>4. Experiments<a class="headerlink" href="#experiments" title="Permalink to this heading">#</a></h1>
+<section id="implementation-details">
+<h2>4.1 Implementation Details<a class="headerlink" href="#implementation-details" title="Permalink to this heading">#</a></h2>
+<ul class="simple">
+<li><p>Number of iterations</p>
+<ul>
+<li><p>first stage - 500 steps</p></li>
+<li><p>second stage - 50 steps</p></li>
+</ul>
+</li>
+<li><p>3D Gaussian initialization</p>
+<ul>
+<li><p>number - 5000 for image-to-3D, 1000 for text-to-3D</p></li>
+<li><p>opacity - 0.1</p></li>
+<li><p>color - grey</p></li>
+<li><p>radius - 0.5</p></li>
+</ul>
+</li>
+<li><p>Rendering resolution - 64 to 512 for gaussian splatting, 128 to 1024 for mesh</p></li>
+<li><p>Loss weights in eq(2) - RGB, transperency 가중치(<span class="math notranslate nohighlight">\(\lambda_\text{RGB}, \lambda_A\)</span>)는 0에서 부터 각각 10000, 1000로 linearly increasing</p></li>
+<li><p>Camera pose sampling - fixed radius 2 for image-to-3D / 2.5 for text-to-3D,
+y-axis FOV 49 degree,
+azimuth in <span class="math notranslate nohighlight">\([-180,180]\)</span> degree, elevation in <span class="math notranslate nohighlight">\([-30,30]\)</span>.</p></li>
+</ul>
+<figure class="align-default" id="id26">
+<a class="mb-1 reference internal image-reference" href="../../_images/image_32.png"><img alt="Horizontal coordinates from wikipedia" class="mb-1" src="../../_images/image_32.png" style="width: 300px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 800 </span><span class="caption-text">Horizontal coordinates from wikipedia</span><a class="headerlink" href="#id26" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<figure class="align-default" id="id27">
+<a class="mb-1 reference internal image-reference" href="../../_images/image_33.png"><img alt="Horizontal coordinates from wikipedia" class="mb-1" src="../../_images/image_33.png" style="width: 300px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 801 </span><span class="caption-text"><a class="reference external" href="https://www.epd.gov.hk/eia/register/report/eiareport/eia_2522017/EIA/html/Appendix/Appendix%2011.1.pdf">출처</a></span><a class="headerlink" href="#id27" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p>Background color - white or black randomly for gaussian splatting</p></li>
+<li><p>Run-time - 1min/stage for image-to-3D
+2min/stage for text-to-3D w. Stable Diffusion <span class="math notranslate nohighlight">\(512\times512\)</span> resolution</p></li>
+<li><p>Marching cube threshold - 1</p></li>
+<li><p>GPU - NVIDIA V100(16GB), less than 8GB for this experiments</p></li>
+</ul>
+</section>
+<section id="qualitative-comparison">
+<h2>4.2 Qualitative Comparison<a class="headerlink" href="#qualitative-comparison" title="Permalink to this heading">#</a></h2>
+<p><strong>Image-to-3D comparison</strong></p>
+<figure class="align-default" id="id28">
+<a class="mb-1 reference internal image-reference" href="../../_images/image_34.png"><img alt="실험결과" class="mb-1" src="../../_images/image_34.png" style="width: 400px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 802 </span><span class="caption-text">실험결과</span><a class="headerlink" href="#id28" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p><strong>Text-to-3D comparison</strong></p>
+<figure class="align-default" id="id29">
+<a class="mb-1 reference internal image-reference" href="../../_images/image_35.png"><img alt="실험결과" class="mb-1" src="../../_images/image_35.png" style="width: 400px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 803 </span><span class="caption-text">실험결과</span><a class="headerlink" href="#id29" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>최적화를 진행하는 방법론 뿐만 아니라 inference-only 방법론들과 비교해도 매우 빠른 생성 속도를 보였다고 함.</p>
+<figure class="align-default" id="id30">
+<a class="mb-1 reference internal image-reference" href="../../_images/image_36.png"><img alt="실험결과" class="mb-1" src="../../_images/image_36.png" style="width: 400px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 804 </span><span class="caption-text">실험결과</span><a class="headerlink" href="#id30" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>이렇게 뽑아낸 mesh는 blender와 같은 리깅 툴을 이용해 애니메이팅 가능.</p>
+</section>
+<section id="quantitative-comparison">
+<h2>4.3 Quantitative Comparison<a class="headerlink" href="#quantitative-comparison" title="Permalink to this heading">#</a></h2>
+<figure class="align-default" id="id31">
+<a class="mb-1 reference internal image-reference" href="../../_images/image_37.png"><img alt="실험결과" class="mb-1" src="../../_images/image_37.png" style="width: 400px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 805 </span><span class="caption-text">실험결과</span><a class="headerlink" href="#id31" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<figure class="align-default" id="id32">
+<a class="mb-1 reference internal image-reference" href="../../_images/image_38.png"><img alt="실험결과" class="mb-1" src="../../_images/image_38.png" style="width: 400px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 806 </span><span class="caption-text">실험결과</span><a class="headerlink" href="#id32" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</section>
+<section id="ablation-study">
+<h2>4.4 Ablation Study<a class="headerlink" href="#ablation-study" title="Permalink to this heading">#</a></h2>
+<figure class="align-default" id="id33">
+<a class="mb-1 reference internal image-reference" href="../../_images/image_39.png"><img alt="실험결과" class="mb-1" src="../../_images/image_39.png" style="width: 400px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 807 </span><span class="caption-text">실험결과</span><a class="headerlink" href="#id33" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>논문에서 제안하는 파이프라인의 모든 과정이 필요함을 보여줌.</p>
+<ul class="simple">
+<li><p>Periodical densificaiton of 3D Gaussians</p></li>
+<li><p>Linear annealing of timestep t for SDS loss</p></li>
+<li><p>Effect of the reference view loss <span class="math notranslate nohighlight">\(\mathcal L_{\text{Ref}}\)</span></p></li>
+</ul>
+</section>
+</section>
+<section class="tex2jax_ignore mathjax_ignore" id="limitations-and-conclusion">
+<h1>5. Limitations and Conclusion<a class="headerlink" href="#limitations-and-conclusion" title="Permalink to this heading">#</a></h1>
+<ul class="simple">
+<li><p>3D content generation framework인 DreamGaussian을 통해 3D content 생성을 효율성을 증대.</p></li>
+<li><p>3D Gaussian으로 부터 mesh를 추출하는 알고리즘 제안.</p></li>
+<li><p>texture fine-tuning stage를 통해 image나 text로 부터 고품질의 polygonal mesh생성 가능.</p></li>
+</ul>
+<figure class="align-default" id="id34">
+<a class="mb-1 reference internal image-reference" href="../../_images/image_40.png"><img alt="실험결과" class="mb-1" src="../../_images/image_40.png" style="width: 400px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 808 </span><span class="caption-text">실험결과</span><a class="headerlink" href="#id34" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>아래와 같은 기존 방법론들의 문제점들을 여전히 가지고 있으나 score debiasing/ camera-conditioned 2D diffusion models/ BRDF auto-encoder와 같은 방법을 도입하면, 개선가능할 것으로 기대함.</p>
+<ul class="simple">
+<li><p>Janus prob</p></li>
+<li><p>over saturated texture</p></li>
+<li><p>baked lighting</p></li>
+</ul>
+<p>덧붙여 texture refinement를 진행하는 stage 2에서 blurry한 결과를 얻을 수 있으나 학습을 더 진행하면 개선된다고 함.</p>
+</section>
+
+    <script type="text/x-thebe-config">
+    {
+        requestKernel: true,
+        binderOptions: {
+            repo: "binder-examples/jupyter-stacks-datascience",
+            ref: "master",
+        },
+        codeMirrorConfig: {
+            theme: "abcdef",
+            mode: "python"
+        },
+        kernelOptions: {
+            name: "python3",
+            path: "./docs\review"
+        },
+        predefinedOutput: true
+    }
+    </script>
+    <script>kernelName = 'python3'</script>
+
+                </article>
+              
+
+              
+              
+              
+              
+                <footer class="prev-next-footer d-print-none">
+                  
+<div class="prev-next-area">
+    <a class="left-prev"
+       href="ProlificDreamer.html"
+       title="previous page">
+      <i class="fa-solid fa-angle-left"></i>
+      <div class="prev-next-info">
+        <p class="prev-next-subtitle">previous</p>
+        <p class="prev-next-title">ProlificDreamer: High-Fidelity and Diverse Text-to-3D Generation with Variational Score Distillation</p>
+      </div>
+    </a>
+    <a class="right-next"
+       href="Coin3D.html"
+       title="next page">
+      <div class="prev-next-info">
+        <p class="prev-next-subtitle">next</p>
+        <p class="prev-next-title">Coin3D</p>
+      </div>
+      <i class="fa-solid fa-angle-right"></i>
+    </a>
+</div>
+                </footer>
+              
+            </div>
+            
+            
+              
+                <div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
+
+
+  <div class="sidebar-secondary-item">
+  <div class="page-toc tocsection onthispage">
+    <i class="fa-solid fa-list"></i> Contents
+  </div>
+  <nav class="bd-toc-nav page-toc">
+    <ul class="visible nav section-nav flex-column">
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#">DreamGaussian</a></li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#abstract">1. Abstract</a></li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#related-work">2. Related Work</a><ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#d-representations">2.1 3D representations</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#text-to-3d-generation">2.2 Text-to-3D Generation</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#image-to-3d-generation">2.3 Image-to-3D Generation</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#id1">2.4 추가 참고자료</a></li>
+</ul>
+</li>
+</ul>
+</li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#method">3. Method</a><ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#generative-gaussian-splatting">3.1 Generative Gaussian Splatting</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#image-to-3d">Image-to-3D</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#text-to-3d">Text-to-3D</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#discussion">Discussion</a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#efficient-mesh-extraction">3.2 Efficient Mesh Extraction</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#local-density-query">Local Density Query</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#color-back-projection">Color Back-projection</a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#uv-space-texture-refinement">3.3 UV-space Texture Refinement</a></li>
+</ul>
+</li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#experiments">4. Experiments</a><ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#implementation-details">4.1 Implementation Details</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#qualitative-comparison">4.2 Qualitative Comparison</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#quantitative-comparison">4.3 Quantitative Comparison</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#ablation-study">4.4 Ablation Study</a></li>
+</ul>
+</li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#limitations-and-conclusion">5. Limitations and Conclusion</a></li>
+</ul>
+
+  </nav></div>
+
+</div></div>
+              
+            
+          </div>
+          <footer class="bd-footer-content">
+            
+<div class="bd-footer-content__inner container">
+  
+  <div class="footer-item">
+    
+<p class="component-author">
+By PseudoLab
+</p>
+
+  </div>
+  
+  <div class="footer-item">
+    
+
+  <p class="copyright">
+    
+      © Copyright 2022.
+      <br/>
+    
+  </p>
+
+  </div>
+  
+  <div class="footer-item">
+    
+  </div>
+  
+  <div class="footer-item">
+    
+  </div>
+  
+</div>
+          </footer>
+        
+
+      </main>
+    </div>
+  </div>
+  
+  <!-- Scripts loaded after <body> so the DOM is not blocked -->
+  <script src="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b"></script>
+<script src="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b"></script>
+
+  <footer class="bd-footer">
+  </footer>
+  </body>
+</html>
\ No newline at end of file
diff --git a/docs/review/DreamPose.html b/docs/review/DreamPose.html
old mode 100644
new mode 100755
index fa7a16c6..1a9a9f5d
--- a/docs/review/DreamPose.html
+++ b/docs/review/DreamPose.html
@@ -1,1298 +1,1318 @@
-
-
-<!DOCTYPE html>
-
-
-<html lang="en" data-content_root="" >
-
-  <head>
-    <meta charset="utf-8" />
-    <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.18.1: http://docutils.sourceforge.net/" />
-
-    <title>DreamPose: Fashion Image-to-Video Synthesis via Stable Diffusion &#8212; Text-to-Image Generation-feat-Diffusion</title>
-  
-  
-  
-  <script data-cfasync="false">
-    document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
-    document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
-  </script>
-  
-  <!-- Loaded before other Sphinx assets -->
-  <link href="../../_static/styles/theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-<link href="../../_static/styles/bootstrap.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-<link href="../../_static/styles/pydata-sphinx-theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-
-  
-  <link href="../../_static/vendor/fontawesome/6.5.2/css/all.min.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-  <link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.woff2" />
-<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.woff2" />
-<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.woff2" />
-
-    <link rel="stylesheet" type="text/css" href="../../_static/pygments.css" />
-    <link rel="stylesheet" href="../../_static/styles/sphinx-book-theme.css?digest=14f4ca6b54d191a8c7657f6c759bf11a5fb86285" type="text/css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/togglebutton.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/mystnb.4510f1fc1dee50b3e5859aac5469c37c29e427902b24a333a5f9fcb2f0b3ac41.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/sphinx-thebe.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/design-style.4045f2051d55cab465a707391d5b2007.min.css" />
-  
-  <!-- Pre-loaded scripts that we'll load fully later -->
-  <link rel="preload" as="script" href="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b" />
-<link rel="preload" as="script" href="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b" />
-  <script src="../../_static/vendor/fontawesome/6.5.2/js/all.min.js?digest=dfe6caa3a7d634c4db9b"></script>
-
-    <script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
-    <script src="../../_static/jquery.js"></script>
-    <script src="../../_static/underscore.js"></script>
-    <script src="../../_static/_sphinx_javascript_frameworks_compat.js"></script>
-    <script src="../../_static/doctools.js"></script>
-    <script src="../../_static/clipboard.min.js"></script>
-    <script src="../../_static/copybutton.js"></script>
-    <script src="../../_static/scripts/sphinx-book-theme.js?digest=5a5c038af52cf7bc1a1ec88eea08e6366ee68824"></script>
-    <script>let toggleHintShow = 'Click to show';</script>
-    <script>let toggleHintHide = 'Click to hide';</script>
-    <script>let toggleOpenOnPrint = 'true';</script>
-    <script src="../../_static/togglebutton.js"></script>
-    <script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
-    <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
-    <script src="../../_static/design-tabs.js"></script>
-    <script>const THEBE_JS_URL = "https://unpkg.com/thebe@0.8.2/lib/index.js"
-const thebe_selector = ".thebe,.cell"
-const thebe_selector_input = "pre"
-const thebe_selector_output = ".output, .cell_output"
-</script>
-    <script async="async" src="../../_static/sphinx-thebe.js"></script>
-    <script>window.MathJax = {"options": {"processHtmlClass": "tex2jax_process|mathjax_process|math|output_area"}}</script>
-    <script defer="defer" src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
-    <script>DOCUMENTATION_OPTIONS.pagename = 'docs/review/DreamPose';</script>
-    <link rel="shortcut icon" href="../../_static/PseudoLab_logo.png"/>
-    <link rel="index" title="Index" href="../../genindex.html" />
-    <link rel="search" title="Search" href="../../search.html" />
-    <link rel="next" title="NeRF : Representing Scenes as Neural Radiance Fields for View Synthesis" href="NeRF.html" />
-    <link rel="prev" title="DreaMoving" href="DreaMoving.html" />
-  <meta name="viewport" content="width=device-width, initial-scale=1"/>
-  <meta name="docsearch:language" content="en"/>
-  </head>
-  
-  
-  <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
-
-  
-  
-  <div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
-  
-  <div id="pst-scroll-pixel-helper"></div>
-  
-  <button type="button" class="btn rounded-pill" id="pst-back-to-top">
-    <i class="fa-solid fa-arrow-up"></i>Back to top</button>
-
-  
-  <input type="checkbox"
-          class="sidebar-toggle"
-          id="pst-primary-sidebar-checkbox"/>
-  <label class="overlay overlay-primary" for="pst-primary-sidebar-checkbox"></label>
-  
-  <input type="checkbox"
-          class="sidebar-toggle"
-          id="pst-secondary-sidebar-checkbox"/>
-  <label class="overlay overlay-secondary" for="pst-secondary-sidebar-checkbox"></label>
-  
-  <div class="search-button__wrapper">
-    <div class="search-button__overlay"></div>
-    <div class="search-button__search-container">
-<form class="bd-search d-flex align-items-center"
-      action="../../search.html"
-      method="get">
-  <i class="fa-solid fa-magnifying-glass"></i>
-  <input type="search"
-         class="form-control"
-         name="q"
-         id="search-input"
-         placeholder="Search this book..."
-         aria-label="Search this book..."
-         autocomplete="off"
-         autocorrect="off"
-         autocapitalize="off"
-         spellcheck="false"/>
-  <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
-</form></div>
-  </div>
-
-  <div class="pst-async-banner-revealer d-none">
-  <aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
-</div>
-
-  
-    <header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
-    </header>
-  
-
-  <div class="bd-container">
-    <div class="bd-container__inner bd-page-width">
-      
-      
-      
-      <div class="bd-sidebar-primary bd-sidebar">
-        
-
-  
-  <div class="sidebar-header-items sidebar-primary__section">
-    
-    
-    
-    
-  </div>
-  
-    <div class="sidebar-primary-items__start sidebar-primary__section">
-        <div class="sidebar-primary-item">
-
-  
-    
-  
-
-<a class="navbar-brand logo" href="../../intro.html">
-  
-  
-  
-  
-  
-    
-    
-      
-    
-    
-    <img src="../../_static/PseudoLab_logo.png" class="logo__image only-light" alt="Text-to-Image Generation-feat-Diffusion - Home"/>
-    <script>document.write(`<img src="../../_static/PseudoLab_logo.png" class="logo__image only-dark" alt="Text-to-Image Generation-feat-Diffusion - Home"/>`);</script>
-  
-  
-</a></div>
-        <div class="sidebar-primary-item"><nav class="bd-links" id="bd-docs-nav" aria-label="Main">
-    <div class="bd-toc-item navbar-nav active">
-        
-        <ul class="nav bd-sidenav bd-sidenav__home-link">
-            <li class="toctree-l1">
-                <a class="reference internal" href="../../intro.html">
-                    Welcome to PseudoDiffusers!!
-                </a>
-            </li>
-        </ul>
-        <p aria-level="2" class="caption" role="heading"><span class="caption-text">Preliminary Works</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="vae.html">VAE</a></li>
-<li class="toctree-l1"><a class="reference internal" href="gan.html">GAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DDPM.html">DDPM</a></li>
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="DDIM.html">DDIM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="A_Study_on_the_Evaluation_of_Generative_Models.html">A Study on the Evaluation of Generative Models</a></li>
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Image Generation</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="cycleGAN.html">CycleGAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="StyleGAN.html">StyleGAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="diffusion_beats_GANs.html">Diffusion Models Beat GANs on Image Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="dalle.html">DALL-E</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DALLE2.html">DALL-E 2</a></li>
-<li class="toctree-l1"><a class="reference internal" href="dreambooth.html">DreamBooth</a></li>
-<li class="toctree-l1"><a class="reference internal" href="ControlNet.html">ControlNet</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Latent_Diffusion_Model.html">Introduction</a></li>
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="Textual_Inversion.html">Textual Inversion</a></li>
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="CustomDiffusion.html">Custom Diffusion</a></li>
-<li class="toctree-l1"><a class="reference internal" href="LoRA.html">LoRA</a></li>
-
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="I-DDPM.html">I-DDPM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="StyO.html">StyO</a></li>
-<li class="toctree-l1"><a class="reference internal" href="imagen.html">Imagen</a></li>
-<li class="toctree-l1"><a class="reference internal" href="imagen_editor.html">Imagen Editor</a></li>
-<li class="toctree-l1"><a class="reference internal" href="SDEdit.html">SDEdit</a></li>
-<li class="toctree-l1"><a class="reference internal" href="SDXL.html">SDXL</a></li>
-<li class="toctree-l1"><a class="reference internal" href="t2i_adapter.html">T2I-Adapter</a></li>
-<li class="toctree-l1"><a class="reference internal" href="IP_Adapter.html">IP-Adapter</a></li>
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="HyperDreamBooth.html">HyperDreamBooth</a></li>
-<li class="toctree-l1"><a class="reference internal" href="CM3leon.html">CM3leon</a></li>
-
-<li class="toctree-l1"><a class="reference internal" href="Synthetic_Data_from_Diffusion_Models_Improves_ImageNet_Classification.html">Synthetic Data from Diffusion Models Improves ImageNet Classification</a></li>
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="GLIDE.html">GLIDE</a></li>
-<li class="toctree-l1"><a class="reference internal" href="BBDM.html">BBDM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Your_Diffusion_Model_is_Secretly_a_Zero_Shot_Classifier.html">Your Diffusion Model is Secretly a Zero-Shot Classifier</a></li>
-<li class="toctree-l1"><a class="reference internal" href="progressive_distillation.html">Progressive Distillation for Fast Sampling of Diffusion Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="ConceptLab.html">ConceptLab</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Diffusion_models_already_have_a_Semantic_Latent_Space.html">Diffusion Models already have a Semantic Latent Space</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Muse.html">Muse</a></li>
-
-
-<li class="toctree-l1"><a class="reference internal" href="GIGAGAN.html">Scaling up GANs for Text-to-Image Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="consistency_models.html">Consistency Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="latent_consistency_models.html">Latent Consistency Models</a></li>
-
-<li class="toctree-l1"><a class="reference internal" href="LLM_grounded_Diffusion.html">LLM Grounded Diffusion</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DiT.html">DiT</a></li>
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="one-step-image-translation.html">One-Step Image Translation with Text-to-Image Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="LCM-LoRA.html">LCM-LoRA: A Universal Stable-Diffusion Acceleration Module</a></li>
-
-
-<li class="toctree-l1"><a class="reference internal" href="MimicBrush.html">MimicBrush: Zero-shot Image Editing with Reference Imitation</a></li>
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Video Generation</span></p>
-<ul class="current nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="Make_A_Video.html">Make A Video</a></li>
-<li class="toctree-l1"><a class="reference internal" href="VideoLDM.html">VideoLDM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="AnimateDiff.html">AnimateDiff</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Animate_Anyone.html">Animate Anyone</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreaMoving.html">DreaMoving</a></li>
-<li class="toctree-l1 current active"><a class="current reference internal" href="#">DreamPose: Fashion Image-to-Video Synthesis via Stable Diffusion</a></li>
-
-
-
-
-
-
-
-
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">3D Generation</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="NeRF.html">NeRF : Representing Scenes as Neural Radiance Fields for View Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="3DGS.html">3D Gaussian Splatting for Real-Time Radiance Field Rendering</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Point_E.html">Point-E: A System for Generating 3D Point Clouds from Complex Prompts (Arxiv 2022)</a></li>
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="Shap-E.html">Shap-E</a></li>
-
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="DreamFusion.html"><strong>DreamFusion</strong></a></li>
-<li class="toctree-l1"><a class="reference internal" href="magic-3d.html">Magic3D</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreamBooth3D.html">Dream Booth 3D</a></li>
-
-
-
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Experiments</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="../experiments/js_exp.html">Synthetic Data with Stable Diffusion for Foliar Disease Classification</a></li>
-<li class="toctree-l1"><a class="reference internal" href="../experiments/swjo_exp.html">Training DreamBooth on Naver Webtoon Face Dataset</a></li>
-</ul>
-
-    </div>
-</nav></div>
-    </div>
-  
-  
-  <div class="sidebar-primary-items__end sidebar-primary__section">
-  </div>
-  
-  <div id="rtd-footer-container"></div>
-
-
-      </div>
-      
-      <main id="main-content" class="bd-main" role="main">
-        
-        
-
-<div class="sbt-scroll-pixel-helper"></div>
-
-          <div class="bd-content">
-            <div class="bd-article-container">
-              
-              <div class="bd-header-article d-print-none">
-<div class="header-article-items header-article__inner">
-  
-    <div class="header-article-items__start">
-      
-        <div class="header-article-item"><label class="sidebar-toggle primary-toggle btn btn-sm" for="__primary" title="Toggle primary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
-  <span class="fa-solid fa-bars"></span>
-</label></div>
-      
-    </div>
-  
-  
-    <div class="header-article-items__end">
-      
-        <div class="header-article-item">
-
-<div class="article-header-buttons">
-
-
-
-
-
-<div class="dropdown dropdown-source-buttons">
-  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Source repositories">
-    <i class="fab fa-github"></i>
-  </button>
-  <ul class="dropdown-menu">
-      
-      
-      
-      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion" target="_blank"
-   class="btn btn-sm btn-source-repository-button dropdown-item"
-   title="Source repository"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fab fa-github"></i>
-  </span>
-<span class="btn__text-container">Repository</span>
-</a>
-</li>
-      
-      
-      
-      
-      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion/issues/new?title=Issue%20on%20page%20%2Fdocs/review/DreamPose.html&body=Your%20issue%20content%20here." target="_blank"
-   class="btn btn-sm btn-source-issues-button dropdown-item"
-   title="Open an issue"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-lightbulb"></i>
-  </span>
-<span class="btn__text-container">Open issue</span>
-</a>
-</li>
-      
-  </ul>
-</div>
-
-
-
-
-
-
-<div class="dropdown dropdown-download-buttons">
-  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Download this page">
-    <i class="fas fa-download"></i>
-  </button>
-  <ul class="dropdown-menu">
-      
-      
-      
-      <li><a href="../../_sources/docs/review/DreamPose.md" target="_blank"
-   class="btn btn-sm btn-download-source-button dropdown-item"
-   title="Download source file"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-file"></i>
-  </span>
-<span class="btn__text-container">.md</span>
-</a>
-</li>
-      
-      
-      
-      
-      <li>
-<button onclick="window.print()"
-  class="btn btn-sm btn-download-pdf-button dropdown-item"
-  title="Print to PDF"
-  data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-file-pdf"></i>
-  </span>
-<span class="btn__text-container">.pdf</span>
-</button>
-</li>
-      
-  </ul>
-</div>
-
-
-
-
-<button onclick="toggleFullScreen()"
-  class="btn btn-sm btn-fullscreen-button"
-  title="Fullscreen mode"
-  data-bs-placement="bottom" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-expand"></i>
-  </span>
-
-</button>
-
-
-
-<script>
-document.write(`
-  <button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light"></i>
-    <i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark"></i>
-    <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"></i>
-  </button>
-`);
-</script>
-
-
-<script>
-document.write(`
-  <button class="btn btn-sm pst-navbar-icon search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <i class="fa-solid fa-magnifying-glass fa-lg"></i>
-  </button>
-`);
-</script>
-<label class="sidebar-toggle secondary-toggle btn btn-sm" for="__secondary"title="Toggle secondary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <span class="fa-solid fa-list"></span>
-</label>
-</div></div>
-      
-    </div>
-  
-</div>
-</div>
-              
-              
-
-<div id="jb-print-docs-body" class="onlyprint">
-    <h1>DreamPose: Fashion Image-to-Video Synthesis via Stable Diffusion</h1>
-    <!-- Table of contents -->
-    <div id="print-main-content">
-        <div id="jb-print-toc">
-            
-            <div>
-                <h2> Contents </h2>
-            </div>
-            <nav aria-label="Page">
-                <ul class="visible nav section-nav flex-column">
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#">DreamPose: Fashion Image-to-Video Synthesis via Stable Diffusion</a></li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction"><strong>1. Introduction</strong></a></li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#related-work">2. Related Work</a><ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#diffusion-models">2.1. Diffusion models</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#still-image-animation">2.2. Still Image Animation</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#fashion-image-synthesis">2.3. Fashion Image Synthesis</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#diffusion-models-for-video-synthesis">2.4. Diffusion Models for Video Synthesis</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#conditioning-mechanisms-for-diffusion-models">2.5. Conditioning Mechanisms for Diffusion Models</a></li>
-</ul>
-</li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#background">3. Background</a></li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#method"><strong>4. Method</strong></a><ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#overview"><strong>4.1. Overview</strong></a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#architecture">4.2. Architecture</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#split-clip-vae-encoder"><strong>4.2.1 Split CLIP-VAE Encoder</strong></a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#modified-unet">4.2.2 Modified UNet</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#finetuning">4.2.3 <strong>Finetuning</strong></a></li>
-</ul>
-</li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#pose-and-image-classifier-free-guidance">4.4. Pose and Image Classifier-Free Guidance</a></li>
-</ul>
-</li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#experiments">5. Experiments</a><ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#implementation-details">5.1.  Implementation Details</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#dataset">5.2. Dataset</a></li>
-</ul>
-</li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#results">6. Results</a><ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#comparisons">6.1. Comparisons</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#quantitative-analysis">6.1.1 Quantitative Analysis</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#qualitative-analysis">6.2.2 Qualitative Analysis</a></li>
-</ul>
-</li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#ablation-studies">6.2. Ablation Studies</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#multiple-input-images">6.3. Multiple Input Images</a></li>
-</ul>
-</li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#limitations-future-work"><strong>7. Limitations &amp; Future Work</strong></a></li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#conclusion">8. Conclusion</a></li>
-</ul>
-
-            </nav>
-        </div>
-    </div>
-</div>
-
-              
-                
-<div id="searchbox"></div>
-                <article class="bd-article">
-                  
-  <div class="admonition-information admonition">
-<p class="admonition-title">Information</p>
-<ul class="simple">
-<li><p><strong>Title:</strong> DreamPose: Fashion Image-to-Video Synthesis via Stable Diffusion</p></li>
-<li><p><strong>Reference</strong></p>
-<ul>
-<li><p>Paper:  <a class="reference external" href="https://arxiv.org/abs/2304.06025">https://arxiv.org/abs/2304.06025</a></p></li>
-<li><p>Project: <a class="reference external" href="https://grail.cs.washington.edu/projects/dreampose/">https://grail.cs.washington.edu/projects/dreampose/</a></p></li>
-</ul>
-</li>
-<li><p><strong>Author:</strong> <a class="reference external" href="https://www.linkedin.com/in/jeonghwa-yoo-8403a716b">Jeonghwa Yoo</a></p></li>
-<li><p><strong>Last updated on May. 08, 2023</strong></p></li>
-</ul>
-</div>
-<section class="tex2jax_ignore mathjax_ignore" id="dreampose-fashion-image-to-video-synthesis-via-stable-diffusion">
-<h1>DreamPose: Fashion Image-to-Video Synthesis via Stable Diffusion<a class="headerlink" href="#dreampose-fashion-image-to-video-synthesis-via-stable-diffusion" title="Permalink to this heading">#</a></h1>
-<figure class="align-default" id="id1">
-<img alt="DreamPose_input_output" class="bg-primary mb-1" src="pics/DreamPose/00.png" />
-<figcaption>
-<p><span class="caption-number">Fig. 634 </span><span class="caption-text">DreamPose 입출력</span><a class="headerlink" href="#id1" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<aside>
-💡핵심 요약
-- 입력: 이미지, 포즈 시퀀스 / 출력: 입력 이미지의 사람이 포즈 시퀀스에 따라 움직이는 듯한 비디오 영상
-- 이미지와 포즈 시퀀스가 주어지면 사람과 옷감의 움직임을 모두 포함하는 비디오를 합성하는 DreamPose를 제안하였다.
-- 사전 학습된 스테이블 디퓨전을 파인튜닝하여 해당 태스크를 수행 하였다.
-- 입력 이미지의 특성을 잘 포착하기 위해 CLIP 인코더와 VAE 인코더를 도입하였고, 이를 통합하는 Adapter를 도입하여 스테이블 디퓨전의 U-Net의 조건으로 넣어줬다.
-- 포즈 시퀀스를 잘 반영하기 위해 연속적인 5개의 프레임을 노이즈와 concat하여 디노이징 UNet에 입력으로 주었다.
-- 두 단계의 파인튜닝 전략을 사용하였다.
-    - 첫 번째 파인튜닝: 전체 데이터셋에 대해 파인튜닝
-    - 두 번째 파인튜닝: 피사체에 대해 파인튜닝
-- 듀얼 classfier-free guidance를 사용하여 입력 이미지와 포즈 시퀀스에 대한 충실도의 강도를 조정한다.
-- 패션 비디오 합성 태스크에 대해서 SOTA 성능을 달성 하였다.
-</aside>
-</section>
-<section class="tex2jax_ignore mathjax_ignore" id="introduction">
-<h1><strong>1. Introduction</strong><a class="headerlink" href="#introduction" title="Permalink to this heading">#</a></h1>
-<ul class="simple">
-<li><p>DreamPose가 제안된 배경</p>
-<ul>
-<li><p>패션 사진은 온라인에 널리 퍼져 있지만, 전달할 수 있는 정보가 제한적이며 입었을 때 옷의 늘어진 모양이나 흐름 등 옷의 중요한 뉘앙스를 포착하지 못한다.</p></li>
-<li><p>패션 동영상은 이러한 모든 디테일을 보여주기에 소비자의 의사 결정에 유용한 정보를 제공하지만, 동영상이 있는 상품은 매우 드물다.</p></li>
-</ul>
-</li>
-<li><p>DreamPose</p>
-<ul>
-<li><p>본 논문에서는 포즈 시퀀스를 따라 패션 사진을 사실적인 애니메이션 비디오로 변환하는 방법인 DreamPose를 소개한다.</p></li>
-<li><p>Stable diffusion을 기반으로 한다.</p></li>
-<li><p>하나 이상의 사람 이미지와 포즈 시퀀스가 주어지면, 포즈 시퀀스를 따라 고품질 비디오를 생성한다.</p></li>
-</ul>
-</li>
-<li><p>기존 비디오 생성 모델들의 문제점</p>
-<ul>
-<li><p>이미지 생성 디퓨전 모델은 좋은 결과를 보여주었지만, 비디오 생성 디퓨전 모델은 동일한 품질의 결과를 얻지 못했으며, 텍스처 움직임이나 카툰과 같은 모양으로 제한된 경우가 많다.</p></li>
-<li><p>시간적 일관성이 떨어진다.</p></li>
-<li><p>모션 jitter가 발생한다.</p></li>
-<li><p>사실성(realism)이 부족하다.</p></li>
-<li><p>대상 비디오의 움직임이나 세부적인 물체 모양을 제어할 수 없다.</p>
-<ul>
-<li><p>기존 모델이 주로 텍스트에 기반으로 하기 때문에</p></li>
-</ul>
-</li>
-</ul>
-</li>
-<li><p>DreamPose의 접근법</p>
-<ul>
-<li><p>이미지 및 포즈 시퀀스를 조건으로 받는 방식을 사용하여 fidelity와 프레임 간 일관성을 높일 수 있다.</p></li>
-<li><p>이미지 분포를 효과적으로 모델링하는 기존 사전 학습된 이미지 디퓨전 모델을 파인 튜닝하였다. → 이미지 애니메이션 태스크를 컨디셔닝 신호와 일치하는 이미지의 부분 공간을 찾는 것으로 단순화 할 수 있다.</p></li>
-<li><p>해당 태스크를 위해 스테이블 디퓨전의 인코더와 컨디셔닝 메커니즘을 재설계하였다.</p></li>
-<li><p>2-스테이지 파인튜닝 방식을 사용한다</p>
-<ul>
-<li><p>UNet과 VAE를 하나 혹은 여러 입력 이미지에 대해서 파인튜닝</p></li>
-</ul>
-</li>
-</ul>
-</li>
-<li><p>Contribution</p>
-<ol class="arabic simple">
-<li><p>DreamPose: 패션 이미지 애니메이션을 위해 이미지 및 포즈를 조건으로 하는 디퓨전 방식</p></li>
-<li><p>프레임 간 시간적 일관성을 크게 향상 시키는 간단하지만 효과적인 포즈 컨디셔닝 방식</p></li>
-<li><p>컨디셔닝 이미지의 fidelity를 높여주는 split CLIP-VAE 인코더</p></li>
-<li><p>이미지의 fidelity와 새로운 포즈에 대한 일반화 사이의 균형을 효과적으로 맞추는 파인튜닝 전략</p></li>
-</ol>
-</li>
-</ul>
-</section>
-<section class="tex2jax_ignore mathjax_ignore" id="related-work">
-<h1>2. Related Work<a class="headerlink" href="#related-work" title="Permalink to this heading">#</a></h1>
-<section id="diffusion-models">
-<h2>2.1. Diffusion models<a class="headerlink" href="#diffusion-models" title="Permalink to this heading">#</a></h2>
-<ul class="simple">
-<li><p>디퓨전 모델은 최근 text-to-image, 비디오 생성, 3D 생성 태스크에서 인상적인 결과를 보여주고 있다.</p></li>
-<li><p>하지만 이러한 모델을 처음부터 훈련하는 것은 비용이 많이 들고 많은 양의 데이터가 필요하다.</p></li>
-<li><p>스테이블 디퓨전과 같은 latent diffusion 모델은 디퓨전과 디노이징 과정을 latent space에서 수행하기 때문에 계산 요구 사항과 훈련 시간을 대폭 줄일 수 있다.</p></li>
-<li><p>스테이블 디퓨전과 그 사전 훈련된 체크포인트는 출시 이후 다양한 이미지 생성 작업에 사용되었다.</p></li>
-<li><p>본 논문에서도 사전 훈련된 스테이블 디퓨전 모델을 활용하고, subject에 특화된 파인튜닝을 한다.</p></li>
-</ul>
-</section>
-<section id="still-image-animation">
-<h2>2.2. Still Image Animation<a class="headerlink" href="#still-image-animation" title="Permalink to this heading">#</a></h2>
-<ul class="simple">
-<li><p>하나 이상의 입력 이미지로부터 동영상을 생성하는 태스크</p></li>
-<li><p>기존에 디퓨전 기반이 아닌 접근 방식들은 배경 예측, 모션 표현, occlusion map이나 dense map 등 여러 개의 개별 네트워크로 구성되는 경우가 많았다.</p>
-<ul>
-<li><p>각 단계마다 별도의 훈련이 필요하고 ground-truth 모션이나 depth등의 ground-truth 데이터를 사용할 수 없거나 불완전할 가능성이 있다.</p></li>
-<li><p>모션이 크고 복잡할 경우에 groud-truth에 대한 예측은 도출하기 더 어렵고 오류가 발생하기 쉽다.</p></li>
-</ul>
-</li>
-<li><p>최근 여러 방법들은 엔드 투 엔드 싱글 네트워크 접근 방식을 탐구하고 있다. (예: optical flow and warping, cross-attention 모듈, NeRF 표현을 사용한 애니메이션이 가능한 3D 휴먼 생성 등)</p></li>
-</ul>
-</section>
-<section id="fashion-image-synthesis">
-<h2>2.3. Fashion Image Synthesis<a class="headerlink" href="#fashion-image-synthesis" title="Permalink to this heading">#</a></h2>
-<ul class="simple">
-<li><p>기존 pose-guided 패션 이미지 합성 방법</p>
-<ul>
-<li><p>일반적으로 GAN을 기반으로 했고, optical flow에 의존해 이미지 특징을 포즈에 맞추는 방법을 사용했다. → 큰 포즈 변화, 가려진 영역 합성, 의상 스타일 보존에 어려움을 겪는 경우가 많다.</p></li>
-<li><p>최근엔 어텐션 기반 메커니즘을 사용하여, 셀프/크로스 어텐션을 사용하여 이미지 특징을 목표 프레임에 맞추려고 하였다.</p></li>
-<li><p>디퓨전 기반</p>
-<ul>
-<li><p>DiffFashion: 레퍼런스 이미지의 스타일을 트랜스퍼하여 의류 아이템을 편집하는 것을 목표로 한다.</p></li>
-<li><p>PIDM: 포즈를 조건으로 넣어 사람 이미지를 생성한다 → 시간적 일관성을 위한 최적화는 하지 않는다.</p></li>
-</ul>
-</li>
-</ul>
-</li>
-</ul>
-</section>
-<section id="diffusion-models-for-video-synthesis">
-<h2>2.4. Diffusion Models for Video Synthesis<a class="headerlink" href="#diffusion-models-for-video-synthesis" title="Permalink to this heading">#</a></h2>
-<ul class="simple">
-<li><p>많은 text-to-video 디퓨전 모델은 text-to-image 디퓨전 모델을 활용하여 적용한다.</p></li>
-<li><p>결과를 기대할만 하지만, 여전히 텍스트-이미지 모델과 같은 성능은 나오지 않고 있다.</p></li>
-<li><p>프레임 간의 시간적 일관성을 유지하는 것과 사실적인 모션을 생성하는 것이 어렵다.</p></li>
-<li><p>어떤 디퓨전 기반 방법들은 스크래치부터 학습하기 때문에 값비싼 컴퓨팅 리소스, 방대학 학습 데이터셋, 오랜 학습 시간이 필요하다.</p></li>
-<li><p>Turn-A-Video는 텍스트와 이미지가 조건으로 들어왔을 때 비디오 생성을 위해 사전 학습된 텍스트-이미지 디퓨전 모델을 파인튜닝한다.  → 이전 방법들과 마찬가지로 텍스처 깜빡거림(textural flickering), 구조적인 불일치가 나타난다.</p></li>
-<li><p>본 논문에서는 위의 문제를 해결하여 사람과 섬유의 움직임의 싱크를 맞추는 것을 목표로 한다.</p></li>
-</ul>
-</section>
-<section id="conditioning-mechanisms-for-diffusion-models">
-<h2>2.5. Conditioning Mechanisms for Diffusion Models<a class="headerlink" href="#conditioning-mechanisms-for-diffusion-models" title="Permalink to this heading">#</a></h2>
-<ul class="simple">
-<li><p>텍스트를 조건으로 하는 이미지 생성 디퓨전 모델을 널리 사용되어 왔다.</p></li>
-<li><p>텍스트 컨디셔닝은 높은 수준의 디테일을 조절하는 데는 효과적이지만, 사람과 의상의 정확한 identity나 포즈에 대한 풍부하고 상세한 정보를 제공하는 것은 어렵다.</p></li>
-<li><p>여러 연구에서 사전 학습된 text-to-image 스테이블 디퓨전 모델을 위한 이미지 컨디셔닝 문제를 다루고 있다.</p>
-<ul>
-<li><p>이러한 모델에는 종종 어떤 종류의 텍스트 임베딩이 포함된다. (ex: DreamBooth: 피사체별 파인 튜닝을 수행하기 위해 고유한 텍스트 토큰을 학습)</p></li>
-<li><p>기존 이미지와 동영상의 모양을 편집하기 위해 텍스트를 통합하기도 한다.</p></li>
-<li><p>PIDM은 별도의 텍스처 인코더를 사용하여 이미지 텍스처를 인코딩하고 입력된 노이즈 이미지와 대상 포즈를 연결한다.</p></li>
-</ul>
-</li>
-<li><p>DreamPose는 영상 속 피사체의 외형뿐만 아니라 구조와 움직임까지 제어할 수 있다.</p></li>
-<li><p>PIDM과 마찬가지로 이미지 임베딩을 UNet의 크로스 어텐션 레이어에 직접 통합하지만, 이미지 임베딩에 대해 사전 학습된 두 개의 인코더(CLIP, VAE)를 혼합하여 사용한다. → 입력 노이즈에 연결된(concatenated) 멀티 포즈 입력 표현(multi-pose input representation)을 이용해 부드럽고 시간적으로 일관된 모션을 구현할 수 있다.</p></li>
-</ul>
-</section>
-</section>
-<section class="tex2jax_ignore mathjax_ignore" id="background">
-<h1>3. Background<a class="headerlink" href="#background" title="Permalink to this heading">#</a></h1>
-<ul>
-<li><p>디퓨전 모델</p>
-<ul class="simple">
-<li><p>디퓨전 모델은 품질, 다양성, 학습 안정성 측면에서 합성 태스크에서 GAN을 능가하는 최신 생성 모델이다.</p></li>
-<li><p>표준 이미지 디퓨전 모델은 정규 분포된 랜덤 노이즈에서 이미지를 반복적으로 복원하는 방법을 학습한다.</p></li>
-</ul>
-</li>
-<li><p>Latent diffusion model (ex. Stable Diffusion)</p>
-<figure class="align-default" id="id2">
-<img alt="latent diffusion" class="bg-primary mb-1" src="../../_images/Untitled.png" />
-<figcaption>
-<p><span class="caption-number">Fig. 635 </span><span class="caption-text">Latent Diffusion Model</span><a class="headerlink" href="#id2" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</li>
-<li><p>오토인코더의 인코딩된 latent space에서 작동하므로 최소한의 품질을 희생하면서 계산 복잡성을 절약한다.</p></li>
-<li><p>스테이블 디퓨전 모델은 VAE와 디노이징 UNet의 두 가지 모델로 구성된다.</p>
-<ul>
-<li><p>VAE 오토인코더</p>
-<ul class="simple">
-<li><p>인코더   <span class="math notranslate nohighlight">\(\mathcal{E}\)</span>: 프레임 <span class="math notranslate nohighlight">\(x\)</span>를 컴팩트한 latent 표현 <span class="math notranslate nohighlight">\(z\)</span>로 추출 (<span class="math notranslate nohighlight">\(z=\mathcal{E}\)</span><span class="math notranslate nohighlight">\((x)\)</span>)</p></li>
-<li><p>디코더 <span class="math notranslate nohighlight">\(\mathcal{D}\)</span>: latent 표현에서 이미지를 복원 (<span class="math notranslate nohighlight">\(x’=\mathcal{D}(z)\)</span>)</p></li>
-</ul>
-</li>
-<li><p>학습하는 동안, latent feature <span class="math notranslate nohighlight">\(z\)</span>는 결정론적 가우시안 프로세스에 의해 타임 스탬프 <span class="math notranslate nohighlight">\(T\)</span>로 디퓨즈되어 노이지 feature인 <span class="math notranslate nohighlight">\(\tilde{z}_T\)</span>를 만듦</p></li>
-<li><p>원본 이미지를 복구하기 위해 각 타임스탬프에 해당하는 latent feature의 노이즈를 반복적으로 예측하도록 시간으로 컨디셔닝된 UNet이 학습 된다.</p></li>
-<li><p>UNet의 목적 함수</p>
-<div class="math notranslate nohighlight">
-\[
-        \begin{align}{\cal L}_{D M}=\mathbb{R}_{z,\epsilon\in{\mathcal{N}}(0,1)}[||\epsilon-\epsilon_{\theta}({\tilde{z}}_{t},t,c)]|_{2}^{2}]\end{align}
-        \]</div>
-<ul class="simple">
-<li><p>c: 컨디셔닝 정보의 임베딩 (텍스트, 이미지, 세그멘테이션 마스크등, 스테이블 디퓨전에서는 CLIP 텍스트 인코더로부터 얻어짐</p></li>
-</ul>
-</li>
-<li><p>예측된 latent <span class="math notranslate nohighlight">\(z’\)</span>은 예측된 이미지 <span class="math notranslate nohighlight">\(x’ = \mathcal{D}(z')\)</span>를 복구하도록 디코딩 된다.</p></li>
-</ul>
-</li>
-<li><p>Classifier-free guidance</p>
-<ul>
-<li><p>Implicit classifier를 통해 예측된 노이즈 분포를 조건으로 주어진 분포로 밀어붙이는 샘플링 메커니즘이다.</p></li>
-<li><p>이는 랜덤한 확률로 실제 조건으로 주어진 입력을 널 입력(∅)으로 대체하는 훈련 방식인 드롭아웃을 통해 달성된다.</p></li>
-<li><p>인퍼런스하는 동안 조건으로 주어진 예측은 스칼라 가중치 s를 사용하여 unconditional한 예측을 조건부로 가이드하는 데 사용된다.</p>
-<div class="math notranslate nohighlight">
-\[
-        \begin{align}\epsilon_{\theta}=\epsilon_{\theta}(\tilde{z}_{t},t,\emptyset)+s\cdot(\epsilon_{\theta}(\tilde{z}_{t},t,\mathrm{c})-\epsilon_{\theta}(\tilde{z}_{t},t,\emptyset))\end{align}
-        \]</div>
-<ul class="simple">
-<li><p><span class="math notranslate nohighlight">\(\epsilon_{\theta}(\tilde{z}_{t},t,\emptyset)\)</span>: 조건이 없는 경우에 노이즈 벡터</p></li>
-<li><p><span class="math notranslate nohighlight">\(\epsilon_{\theta}(\tilde{z}_{t},t,c)\)</span>: 조건이 있는 경우에 노이즈 벡터</p></li>
-</ul>
-<p>→ 조건을 Null로 줬을 때의 모델의 예측값과 조건을 줬을 때의 모델이 예측값을 보간한다.</p>
-</li>
-</ul>
-</li>
-</ul>
-</section>
-<section class="tex2jax_ignore mathjax_ignore" id="method">
-<h1><strong>4. Method</strong><a class="headerlink" href="#method" title="Permalink to this heading">#</a></h1>
-<ul class="simple">
-<li><p>본 논문에서는  단일 이미지와 포즈 시퀀스로부터 사실적인 애니메이션 동영상을 만드는 것을 목표로 한다.</p></li>
-<li><p>이를 위해 패션 동영상 컬렉션에서 사전 학습된 스테이블 디퓨전을 패션 동영상 컬렉션에 맞게 파인튜닝한다.</p></li>
-<li><p>추가 컨디셔닝 신호(이미지 및 포즈)를 받고 동영상으로 볼 수 있는 시간적으로 일관된 콘텐츠를 출력하기 위해 스테이블 디퓨전의 구조를 조정하는 작업이 포함된다.</p></li>
-</ul>
-<section id="overview">
-<h2><strong>4.1. Overview</strong><a class="headerlink" href="#overview" title="Permalink to this heading">#</a></h2>
-<ul class="simple">
-<li><p>입출력</p>
-<ul>
-<li><p>입력: 입력 이미지 <span class="math notranslate nohighlight">\(x_0\)</span>, 포즈 <span class="math notranslate nohighlight">\(\{p_1, …, p_n\}\)</span></p></li>
-<li><p>출력: 비디오 <span class="math notranslate nohighlight">\(\{x'_1, …, x’_N\}\)</span> (<span class="math notranslate nohighlight">\(x’_i\)</span>:  입력 포즈 <span class="math notranslate nohighlight">\(p_i\)</span>에 해당하는 i 번째 예측된 프레임)</p></li>
-</ul>
-</li>
-<li><p>입력 이미지와 포즈 시퀀스를 조건으로 하는 사전 훈련된 latent diffusion model을 사용한다.</p></li>
-<li><p>추론 시에는 일반적인 디퓨전 샘플링 절차를 통해 각 프레임을 독립적으로 생성한다.</p>
-<ul>
-<li><p>균일하게 분포된 가우시안 노이즈로 시작하여 두 조건 신호로 디퓨전 모델을 반복적으로 쿼리하여  noisy latent의 노이즈를 제거한다.</p></li>
-</ul>
-</li>
-<li><p>마지막으로 예측된 디노이즈된 latent <span class="math notranslate nohighlight">\(z’_i\)</span>를 디코딩하여 예측된 비디오 프레임 <span class="math notranslate nohighlight">\(x’_i=\mathcal{D}(z’_i)\)</span>를 만든다.</p></li>
-</ul>
-</section>
-<section id="architecture">
-<h2>4.2. Architecture<a class="headerlink" href="#architecture" title="Permalink to this heading">#</a></h2>
-<ul>
-<li><p>이미지 애니메이션을 위해 원래의 text-to-image 스테이블 디퓨전 모델을 수정하고 파인튜닝한다. (조건: 이미지, 포즈)</p></li>
-<li><p>이미지 애니메이션의 목표</p>
-<ol class="arabic simple">
-<li><p>제공된 입력 이미지에 대한 충실도</p></li>
-<li><p>시각적 품질</p></li>
-<li><p>생성된 프레임의 전반적인 시간적인 안정성</p></li>
-</ol>
-</li>
-<li><p>이러한 목표를 달성하기 위해 아키텍처를 아래와 같이 구성하였다.</p>
-<figure class="align-default" id="id3">
-<img alt="DreamPose Architecture" class="bg-primary mb-1" src="../../_images/021.png" />
-<figcaption>
-<p><span class="caption-number">Fig. 636 </span><span class="caption-text">DreamPose Architecture</span><a class="headerlink" href="#id3" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</li>
-</ul>
-<section id="split-clip-vae-encoder">
-<h3><strong>4.2.1 Split CLIP-VAE Encoder</strong><a class="headerlink" href="#split-clip-vae-encoder" title="Permalink to this heading">#</a></h3>
-<figure class="align-default" id="id4">
-<img alt="DreamPose Encoder" class="bg-primary mb-1" src="../../_images/031.png" />
-<figcaption>
-<p><span class="caption-number">Fig. 637 </span><span class="caption-text">DreamPose Encoder</span><a class="headerlink" href="#id4" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p>해당 모듈-컨디셔닝 어댑터(custom conditioning adapter)의 필요성</p>
-<ul>
-<li><p>이미지를 조건으로 넣기 위한 이전 연구 (ex: <a class="reference external" href="https://arxiv.org/abs/2211.09800">InstructPix2Pix</a>)는 주로 조건으로 들어오는 이미지 신호를 디노이징 U-Net에 대한 입력 노이즈와 concat한다.</p></li>
-<li><p>이는 원하는 출력 이미지와 공간적으로(spatially) 정렬된 조건 신호에 대한 조건화에 효과적이지만, DreamPose의 경우에는 네트워크가 입력 이미지와 공간적으로 정렬되지 않은 이미지를 생성하는 것을 목표로 한다.</p></li>
-<li><p>따라서 해당 태스크에는 다른 방법이 필요하고, 이를 위해 맞춤형 컨디셔닝 어댑터를 구현하였다.</p></li>
-<li><p>맞춤형 컨디셔닝 어댑터는 CLIP 텍스트 인코더를 사용하여 이미지 조건화를 대체하는 맞춤형 컨디셔닝 어댑터(custom conditioning adapter)를 구현하였다.</p></li>
-<li><p>이 어댑터는 사전 학습된 CLIP 이미지 및 VAE 인코더에서 인코딩된 정보를 결합한다.</p></li>
-</ul>
-</li>
-<li><p>디퓨전 기반 파인튜닝</p>
-<ul>
-<li><p>목표: 입력 신호를 원래 네트워크 학습에 사용된 신호와 최대한 유사하게 만들어 학습 기울기를 가능한 한 의미 있게 만드는 것 → 학습된 prior 값의 손실을 방지하는 데 도움이 된다.</p></li>
-<li><p>이러한 이유로 대부분의 디퓨전 기반 파인튜닝 체계는 모든 원래 컨디셔닝 신호를 유지하고 새로운 컨디셔닝 신호와 상호 작용하는 네트워크 가중치를 0으로 초기화한다.</p></li>
-</ul>
-</li>
-<li><p>VAE Encoder의 필요성</p>
-<ul>
-<li><p>스테이블 디퓨전이 텍스트 프롬프트의 CLIP 임베딩으로 컨디셔닝 되고 CLIP이 텍스트와 이미지를 공유 임베딩 스페이스(shared embedding space)로 인코딩한다는 점을 감안할 때 CLIP 컨디셔닝을 조건으로 주어진 이미지에서 파생된 임베딩으로 간단히 대체하는 것이 자연스러워 보일 수 있다.</p></li>
-<li><p>하지만 실제로는 CLIP 이미지 임베딩만으로는 조건으로 주어진 이미지에서 세밀한 디테일을 캡처하기에 충분하지 않다.</p></li>
-<li><p>따라서 스테이블 디퓨전의 VAE에서 인코딩된 latent 임베딩을 추가로 입력한다.</p></li>
-<li><p>이를 통해 디퓨전의 출력 도메인과 일치하는 추가적인 장점을 가지게 된다.</p></li>
-</ul>
-</li>
-<li><p>어댑터 <span class="math notranslate nohighlight">\(\mathcal{A}\)</span></p>
-<ul>
-<li><p>스테이블 디퓨전 아키텍처는 기본적으로 컨디셔닝 신호로 VAE latent를 지원하지 않기 때문에 어댑터 모듈 <span class="math notranslate nohighlight">\(\mathcal{A}\)</span>를 추가한다.</p></li>
-<li><p>해당 어댑터는 CLIP과 VAE 임베딩을 결합하여 네트워크의 일반적인 cross-attention 연산에 사용되는 하나의 임베딩을 생성한다.</p></li>
-<li><p>이 어댑터는 두 신호를 함께  혼합하고 디노이징 U-Net의 cross-attention 모듈에서 예상하는 일반적인 모양으로 출력을 변환한다.</p></li>
-</ul>
-</li>
-<li><p>디퓨전 기반 파인튜닝에서 언급했 듯이 학습에서 네트워크의 충격을 완화하기 위해 처음에는 VAE 임베딩에 해당하는 가중치는 0으로 설정되어 네트워크가 CLIP 임베딩으로만 학습을 시작한다.</p></li>
-<li><p>최종 이미지 컨디셔닝 신호 <span class="math notranslate nohighlight">\(c_I\)</span>를 다음과 같이 정의한다.</p></li>
-</ul>
-<div class="math notranslate nohighlight">
-\[
-\begin{align}c_{I}={\mathcal{A}}(c_{\mathrm{CLIP}},c_{\mathrm{VAE}})\end{align}
-\]</div>
-</section>
-<section id="modified-unet">
-<h3>4.2.2 Modified UNet<a class="headerlink" href="#modified-unet" title="Permalink to this heading">#</a></h3>
-<figure class="align-default" id="id5">
-<img alt="Modified UNet" class="bg-primary mb-1" src="../../_images/041.png" />
-<figcaption>
-<p><span class="caption-number">Fig. 638 </span><span class="caption-text">Modified UNet</span><a class="headerlink" href="#id5" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p>이미지 컨디셔닝과 달리 포즈 컨디셔닝은 이미지와 정렬 된다.</p></li>
-<li><p>Noisy latent <span class="math notranslate nohighlight">\(\tilde{z}_i\)</span>를 타겟 포즈 표현 <span class="math notranslate nohighlight">\(c_p\)</span>와 concat한다.</p></li>
-<li><p>실제 비디오에서 추정된 포즈의 노이즈를 고려하고 생성된 프레임에서의 시간적 일관성을 극대화하기 위해, <span class="math notranslate nohighlight">\(c_p\)</span>를 다섯 개의 연속된 포즈 프레임으로 구성하였다. 즉, <span class="math notranslate nohighlight">\(c_p = \{p_{i-2}, p_{i-1}, pi, p_{i+1}, p_{i+2}\}\)</span> → 개별 포즈로 네트워크를 학습하는 것보다 연속 포즈로 학습하면  전반적인 움직임의 부드러움과 시간적 일관성이 증가한다.</p></li>
-<li><p>구조적으로 0으로 초기화된 10개의 추가 입력 채널을 받아들이도록 UNet 입력 레이어를 수정하고 noisy latent에 해당하는 원래 채널은 사전 학습된 가중치에서 수정되지 않는다.</p></li>
-</ul>
-</section>
-<section id="finetuning">
-<h3>4.2.3 <strong>Finetuning</strong><a class="headerlink" href="#finetuning" title="Permalink to this heading">#</a></h3>
-<ul>
-<li><p>스테이블 디퓨전 모델의 대부분의 레이어 weight는 미리 학습된 text-to-image 스테이블 디퓨전 체크포인트로 초기화된다.</p></li>
-<li><p>이 때, CLIP 이미지 인코더는 별도의 미리 학습된 체크포인트에서 로드된다.</p></li>
-<li><p>새로운 레이어는 초기에 새로운 컨디셔닝 신호가 네트워크 출력에 기여하지 않도록 초기화 된다.</p></li>
-<li><p>초기화 후 DreamPose는 아래의 두 단계로 파인튜닝된다.</p>
-<figure class="align-default" id="id6">
-<img alt="Two-phase finetuning" class="bg-primary mb-1" src="../../_images/051.png" />
-<figcaption>
-<p><span class="caption-number">Fig. 639 </span><span class="caption-text">Two-phase Finetuning</span><a class="headerlink" href="#id6" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ol class="arabic simple">
-<li><p>Full Dataset Finetuning</p>
-<ul class="simple">
-<li><p>전체 훈련 데이터셋에 대한 UNet과 어댑터 모듈을 파인 튜닝하여 입력 이미지 및 포즈와 일치하는 프레임을 합성한다.</p></li>
-</ul>
-</li>
-<li><p>Subset-Specific Finetuning</p>
-<ul class="simple">
-<li><p>하나 이상의 피사체별 입력 이미지에 대해 UNet과 어댑터 모듈을 파인튜닝한 다음 VAE 디코더를 통해 기본 모델을 개선하여 추론에 사용되는 피사체별 맞춤형 모델을 생성한다.</p></li>
-</ul>
-</li>
-</ol>
-</li>
-<li><p>다른 이미지 조건부 디퓨전 방법과 마찬가지로, 입력 이미지의 사람과 의상의 identity를 보존하고 프레임 간에 일관성을 유지하려면 샘플별 파인튜닝이 필수적이었다.</p></li>
-<li><p>그러나 단순히 단일 프레임과 포즈 쌍에 대해 훈련하면 텍스처 고착(texture-sticking)과 같은 아티팩트가 출력 비디오에 발생한다.</p></li>
-<li><p>이를 방지하기 위해 각 단계에서 랜덤 크롭을 추가하는 등의 방법으로 이미지-포즈쌍을 증강한다.</p></li>
-<li><p>VAE 디코더를 파인튜닝하는 것이 더 선명하고 사실적인 디테일을 복구하는 데 중요하다.</p>
-<figure class="align-default" id="id7">
-<img alt="Importance of VAE finetuning" class="bg-primary mb-1" src="../../_images/061.png" />
-<figcaption>
-<p><span class="caption-number">Fig. 640 </span><span class="caption-text">VAE 파인튜닝의 중요성</span><a class="headerlink" href="#id7" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</li>
-</ul>
-</section>
-</section>
-<section id="pose-and-image-classifier-free-guidance">
-<h2>4.4. Pose and Image Classifier-Free Guidance<a class="headerlink" href="#pose-and-image-classifier-free-guidance" title="Permalink to this heading">#</a></h2>
-<ul>
-<li><p>추론시 단일 입력 이미지와 피사체별 모델(subject-specific model)을 사용하는 일련의 포즈에서 프레임별로 동영상을 생성한다.</p></li>
-<li><p>이중(dual) classifier-free guidance를 사용하여 추론 시에 이미지 컨디셔닝 <span class="math notranslate nohighlight">\(c_I\)</span>와 포즈 컨디셔닝 <span class="math notranslate nohighlight">\(c_p\)</span>의 강도를 조절한다.</p></li>
-<li><p>이중 classfier-free guidance는 식 (3)에서 다음과 같이 수정된다.</p>
-<div class="math notranslate nohighlight">
-\[\begin{split}
-    \begin{align*}
-    {\epsilon_{\theta}(z_{t},c_{I},c_{p})} &amp; {= \epsilon_{\theta}(z_{t},\emptyset,\emptyset)} \\
-    {} &amp; {+\, s_{I}(\epsilon_{\theta}(z_{t},c_{I},\mathcal{\emptyset})-\epsilon_{\theta}(z_{t},\emptyset,\emptyset))} \\
-    {} &amp; {+\, s_{p}(\epsilon_{\theta}(z_{t},\mathcal{c}_{I},\mathcal{c}_p)-\epsilon_{\theta}(z_{t},\mathcal{c}_{I},\emptyset))}
-    \end{align*}
-    \end{split}\]</div>
-<ul class="simple">
-<li><p><span class="math notranslate nohighlight">\(s_I\)</span>, <span class="math notranslate nohighlight">\(s_p\)</span>: 가이던스 웨이트</p></li>
-<li><p><span class="math notranslate nohighlight">\(c_I\)</span>: 이미지 컨디셔닝</p></li>
-<li><p><span class="math notranslate nohighlight">\(c_p\)</span>: 포즈 컨디셔닝</p></li>
-</ul>
-<p>→ 이미지 컨디셔닝이 있는 경우와 없는 경우의 노이즈 벡터 차이를 계산하고, 포즈 컨디셔닝이 있는 경우와 없는 경우의 노이즈 벡터 차이를 계산해서 이를 가이던스 웨이트를 통해 강도를 조정해서 반영</p>
-</li>
-<li><p><span class="math notranslate nohighlight">\(s_I\)</span>가 크면 입력 이미지에 높은 외관 충실도를 보장하고, <span class="math notranslate nohighlight">\(s_p\)</span>가 크면 입력 포즈에 대한 정렬을 보장한다.</p></li>
-<li><p>이중 classifier-free guidance는 포즈 및 이미지 가이드를 강화하는 것 에외도, 피사체별 모델 파인튜닝 후 하나의 입력 포즈에 대한 오버피팅을 방지한다.</p></li>
-</ul>
-</section>
-</section>
-<section class="tex2jax_ignore mathjax_ignore" id="experiments">
-<h1>5. Experiments<a class="headerlink" href="#experiments" title="Permalink to this heading">#</a></h1>
-<section id="implementation-details">
-<h2>5.1.  Implementation Details<a class="headerlink" href="#implementation-details" title="Permalink to this heading">#</a></h2>
-<ul class="simple">
-<li><p>입력 이미지 resolution: 512x512</p></li>
-<li><p>GPU: NVIDIA A100 2개</p></li>
-<li><p>첫 번째 훈련 단계</p>
-<ul>
-<li><p>전체 훈련 데이터셋 사용</p></li>
-<li><p>5 epoch</p></li>
-<li><p>5e-6 learning rate</p></li>
-<li><p>배치사이즈: 16 (4 gradient accumulation step)</p></li>
-<li><p>Dropout: 포즈 입력 5%, 이미지 입력 5%</p></li>
-</ul>
-</li>
-<li><p>두 번째 훈련 단계</p>
-<ul>
-<li><p>특정 샘플 프레임 사용</p></li>
-<li><p>500 step</p></li>
-<li><p>1e-5 learning rate</p></li>
-<li><p>Dropout 적용 X</p></li>
-</ul>
-</li>
-<li><p>VAE 디코더 파인튜닝</p>
-<ul>
-<li><p>1500 step</p></li>
-<li><p>5e-5 learning rate</p></li>
-</ul>
-</li>
-<li><p>추론 시에는 PNDM 샘플러 사용 (100step)</p></li>
-</ul>
-</section>
-<section id="dataset">
-<h2>5.2. Dataset<a class="headerlink" href="#dataset" title="Permalink to this heading">#</a></h2>
-<ul class="simple">
-<li><p>UBC Fashion 데이터셋 사용</p></li>
-<li><p>Split</p>
-<ul>
-<li><p>Train: 339개의 영상</p></li>
-<li><p>Test: 100개의 영상</p></li>
-</ul>
-</li>
-<li><p>각 비디오의 프레임 속도는 초당 30프레임이며 길이는 약 12초</p></li>
-<li><p>학습 중에는 학습 비디오로부터 랜덤으로 프레임 쌍을 샘플링 하였다.</p></li>
-<li><p>DensePose를 이용해서 포즈를 계산하였다.</p></li>
-</ul>
-</section>
-</section>
-<section class="tex2jax_ignore mathjax_ignore" id="results">
-<h1>6. Results<a class="headerlink" href="#results" title="Permalink to this heading">#</a></h1>
-<section id="comparisons">
-<h2>6.1. Comparisons<a class="headerlink" href="#comparisons" title="Permalink to this heading">#</a></h2>
-<ul class="simple">
-<li><p>공개적으로 사용 가능한 두 가지 최신 비디오 합성 방법인 MRAA(Motion Representations for Articulated Animation)과 Thin-Plate Spline Mothion Model(TPSMM)과 수치적 및 정성적인 비교를 하였다.</p></li>
-<li><p>제공된 훈련 스크립트와 권장 에폭 수를 사용하여 두 가지 모델을 UBC 패션 데이터셋을 이용해서 스크래치부터 학습하였다.</p></li>
-<li><p>평가를 위해서는 AVD 모드에서 제공된 테스트 스크립트를 사용하였다.</p></li>
-<li><p>PIDM과도 정성적인 비교를 하였다. PIDM의 경우 훈련 스크립트를 사용할 수 없어서 DeepFashion 데이터셋에 대해 학습된 체크포인트를 통해 비교하였다.</p></li>
-<li><p>100개의 디노이징 스텝을 사용하여 PIDM과 DreamPose를 실행하였다.</p></li>
-</ul>
-<section id="quantitative-analysis">
-<h3>6.1.1 Quantitative Analysis<a class="headerlink" href="#quantitative-analysis" title="Permalink to this heading">#</a></h3>
-<figure class="align-default" id="id8">
-<img alt="result 1" class="bg-primary mb-1" src="../../_images/071.png" />
-<figcaption>
-<p><span class="caption-number">Fig. 641 </span><span class="caption-text">정량적 성능 비교</span><a class="headerlink" href="#id8" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p>256 픽셀 해상도의 100개의 고유한 패션 동영상으로 구성된 UBC 패션 테스트 셋에 대해 모든 모델을 테스트 하였다.</p></li>
-<li><p>각 동영상에 대해 입력 프레임에서 최소 50프레임 이상 떨어져 있는 50개의 프레임을 추출하여 테스트하였다.</p></li>
-<li><p>MRAA와 TPSMM은 모두 driving video에서 추출된 feautre에 의존하는 반면, DreamPose는 UV-포즈 시퀀스에만 의존한다는 점에 유의하라.</p></li>
-<li><p>그럼에도 불구하고 DreamPose 모델은 네 가지 정량적 지표 모두에서 두 가지 방법보다 정량적으로 우수한 성능을 보였다.</p></li>
-</ul>
-</section>
-<section id="qualitative-analysis">
-<h3>6.2.2 Qualitative Analysis<a class="headerlink" href="#qualitative-analysis" title="Permalink to this heading">#</a></h3>
-<figure class="align-default" id="id9">
-<img alt="result 2" class="bg-primary mb-1" src="../../_images/081.png" />
-<figcaption>
-<p><span class="caption-number">Fig. 642 </span><span class="caption-text">정성적 성능 비교</span><a class="headerlink" href="#id9" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul>
-<li><p>MRAA와 TPSMM은 새로운 포즈를 취할 때 인물의  identity, 옷감 주름, 미세한 패턴이 손실되는 반면 DreamPose는 디테일을 정확하게 유지한다.</p></li>
-<li><p>포즈를 크게 변경하는 동안 MRAA는 팔 다리가 분리 될 수 있다.</p></li>
-<li><p>PIDM과의 비교</p>
-<figure class="align-default" id="id10">
-<img alt="result 3" class="bg-primary mb-1" src="../../_images/091.png" />
-<figcaption>
-<p><span class="caption-number">Fig. 643 </span><span class="caption-text">PIDM과의 비교</span><a class="headerlink" href="#id10" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p>DreamPose는 얼굴의 identity와 의상 패턴 모두 더 충실도 높은 결과를 생성한다.</p></li>
-<li><p>PIDM은 사실적인 얼굴을 합성하지만, 원본 인물의 identity와 일치하지 않고, identity와 옷차림이 프레임마다 달랐다. → PIDM이 비디오 합성에서는 잘 동작하지 않는다.</p></li>
-</ul>
-</li>
-</ul>
-</section>
-</section>
-<section id="ablation-studies">
-<h2>6.2. Ablation Studies<a class="headerlink" href="#ablation-studies" title="Permalink to this heading">#</a></h2>
-<ul class="simple">
-<li><p>아래 네 가지 변형에 대해 성능을 비교한다.</p>
-<ol class="arabic simple">
-<li><p><span class="math notranslate nohighlight">\(\text{Ours}_{\text{CLIP}}\)</span>: 듀얼 CLIP-VAE 인코더 대신에 사전 학습된 CLIP 이미지 인코더를 사용 → CLIP-VAE 인코더 효과 테스트</p></li>
-<li><p><span class="math notranslate nohighlight">\(\text{Ours}_{\text{NO-VAE-FT}}\)</span>: VAE 디코더를 파인튜닝하지 않은 버전 → 디코더 파인튜닝 효과 테스트</p></li>
-<li><p><span class="math notranslate nohighlight">\(\text{Ours}_{\text{1-pose}}\)</span>: 5개의 연결된 연속 포즈 대신 하나의 대상 포즈만 노이즈에 연결한 버전 → 연결된 5개의 프레임 효과 테스트</p></li>
-<li><p><span class="math notranslate nohighlight">\(\text{Ours}_{\text{Full}}\)</span>: 논문에서 제안한 모든 방법이 다 적용된 DreamPose</p></li>
-</ol>
-</li>
-</ul>
-<p><strong>Quantitative Comparison</strong></p>
-<figure class="align-default" id="id11">
-<img alt="result 4" class="bg-primary mb-1" src="../../_images/10.png" />
-<figcaption>
-<p><span class="caption-number">Fig. 644 </span><span class="caption-text">Ablation Studies - 정량적 비교</span><a class="headerlink" href="#id11" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p><strong>Qualitative Comparison</strong></p>
-<figure class="align-default" id="id12">
-<img alt="result 5" class="bg-primary mb-1" src="../../_images/11.png" />
-<figcaption>
-<p><span class="caption-number">Fig. 645 </span><span class="caption-text">Ablation Studies - 정성적 비교</span><a class="headerlink" href="#id12" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p>기존의 스테이블 디퓨전에서는 인물의 identity에 대한 디테일을 보존할 수 없었다.</p></li>
-<li><p>텍스트 인코더를 CLIP 인코더로 교체한 결과 대부분의 이미지 디테일은 캡처할 수 있지만, 여전히 외형에 대한 정보 손실이 발생한다.</p></li>
-<li><p>VAE 디코더를 파인튜닝하면 디테일의 선명도가 크게 향상되고 입력 포즈에 대한 오버피팅이 발생하지 않는다.</p></li>
-<li><p>한 가지 포즈만 입력하면 팔과 머리카락 주변에서의 피사체의 형태가 눈에 띄게 깜박이는 현상이 나타났다.</p></li>
-</ul>
-</section>
-<section id="multiple-input-images">
-<h2>6.3. Multiple Input Images<a class="headerlink" href="#multiple-input-images" title="Permalink to this heading">#</a></h2>
-<ul>
-<li><p>DreamPose는 피사체에 대한 입력 이미지를 여러 장 넣어서 파인튜닝할 수 있다.</p></li>
-<li><p>피사체의 입력 이미지를 추가하면 품질과 시점의 일관성이 향상된다.</p>
-<figure class="align-default" id="id13">
-<img alt="result 6" class="bg-primary mb-1" src="../../_images/12.png" />
-<figcaption>
-<p><span class="caption-number">Fig. 646 </span><span class="caption-text">Multiple Input Images 결과</span><a class="headerlink" href="#id13" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</li>
-</ul>
-</section>
-</section>
-<section class="tex2jax_ignore mathjax_ignore" id="limitations-future-work">
-<h1><strong>7. Limitations &amp; Future Work</strong><a class="headerlink" href="#limitations-future-work" title="Permalink to this heading">#</a></h1>
-<ul>
-<li><p>실패 사례</p>
-<figure class="align-default" id="id14">
-<img alt="result 7" class="bg-primary mb-1" src="../../_images/13.png" />
-<figcaption>
-<p><span class="caption-number">Fig. 647 </span><span class="caption-text">실패 사례 예시</span><a class="headerlink" href="#id14" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p>드문 경우지만 팔다리가 옷 속으로 사라지고(왼쪽),  hallucinate feature가 보이고(중간)와  대상 포즈가 뒤를 향할 때 방향이 잘못 정렬 되는 경우(오른쪽)가 관찰된다.</p></li>
-</ul>
-</li>
-<li><p>또한 단순한 패턴의 옷에서 사실적인 결과를 생성하지만 일부 결과는 크고 복잡한 패턴에서 약간의 깜박임 동작을 보인다.</p></li>
-<li><p>다른 디퓨전 모델과 마찬가지로 파인튜닝 및 추론 시간이 GAN 또는 VAE에 비해 느리다.</p>
-<ul class="simple">
-<li><p>특정 피사체에 대한 모델 파인튜닝은 프레임당 18초의 렌더링 시간 외의 UNet의 경우 약 10분, VAE 디코더의 경우 약 20분이 소요된다.</p></li>
-</ul>
-</li>
-</ul>
-</section>
-<section class="tex2jax_ignore mathjax_ignore" id="conclusion">
-<h1>8. Conclusion<a class="headerlink" href="#conclusion" title="Permalink to this heading">#</a></h1>
-<ul class="simple">
-<li><p>스틸 패션 이미지 애니메이션을 위한 새로운 디퓨전 기반 방법인 DreamPose를 제안하였다.</p></li>
-<li><p>한 장의 이미지와 포즈 시퀀스가 주어졌을 때,  섬유, 패턴, 사람의 identity를 애니메이션 하는 사실적인 사실적인 패션 동영상을 생성하는 방법을 증명하였다.</p></li>
-</ul>
-</section>
-
-    <script type="text/x-thebe-config">
-    {
-        requestKernel: true,
-        binderOptions: {
-            repo: "binder-examples/jupyter-stacks-datascience",
-            ref: "master",
-        },
-        codeMirrorConfig: {
-            theme: "abcdef",
-            mode: "python"
-        },
-        kernelOptions: {
-            name: "python3",
-            path: "./docs/review"
-        },
-        predefinedOutput: true
-    }
-    </script>
-    <script>kernelName = 'python3'</script>
-
-                </article>
-              
-
-              
-              
-              
-              
-                <footer class="prev-next-footer d-print-none">
-                  
-<div class="prev-next-area">
-    <a class="left-prev"
-       href="DreaMoving.html"
-       title="previous page">
-      <i class="fa-solid fa-angle-left"></i>
-      <div class="prev-next-info">
-        <p class="prev-next-subtitle">previous</p>
-        <p class="prev-next-title">DreaMoving</p>
-      </div>
-    </a>
-    <a class="right-next"
-       href="NeRF.html"
-       title="next page">
-      <div class="prev-next-info">
-        <p class="prev-next-subtitle">next</p>
-        <p class="prev-next-title">NeRF : Representing Scenes as Neural Radiance Fields for View Synthesis</p>
-      </div>
-      <i class="fa-solid fa-angle-right"></i>
-    </a>
-</div>
-                </footer>
-              
-            </div>
-            
-            
-              
-                <div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
-
-
-  <div class="sidebar-secondary-item">
-  <div class="page-toc tocsection onthispage">
-    <i class="fa-solid fa-list"></i> Contents
-  </div>
-  <nav class="bd-toc-nav page-toc">
-    <ul class="visible nav section-nav flex-column">
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#">DreamPose: Fashion Image-to-Video Synthesis via Stable Diffusion</a></li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction"><strong>1. Introduction</strong></a></li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#related-work">2. Related Work</a><ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#diffusion-models">2.1. Diffusion models</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#still-image-animation">2.2. Still Image Animation</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#fashion-image-synthesis">2.3. Fashion Image Synthesis</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#diffusion-models-for-video-synthesis">2.4. Diffusion Models for Video Synthesis</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#conditioning-mechanisms-for-diffusion-models">2.5. Conditioning Mechanisms for Diffusion Models</a></li>
-</ul>
-</li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#background">3. Background</a></li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#method"><strong>4. Method</strong></a><ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#overview"><strong>4.1. Overview</strong></a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#architecture">4.2. Architecture</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#split-clip-vae-encoder"><strong>4.2.1 Split CLIP-VAE Encoder</strong></a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#modified-unet">4.2.2 Modified UNet</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#finetuning">4.2.3 <strong>Finetuning</strong></a></li>
-</ul>
-</li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#pose-and-image-classifier-free-guidance">4.4. Pose and Image Classifier-Free Guidance</a></li>
-</ul>
-</li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#experiments">5. Experiments</a><ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#implementation-details">5.1.  Implementation Details</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#dataset">5.2. Dataset</a></li>
-</ul>
-</li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#results">6. Results</a><ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#comparisons">6.1. Comparisons</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#quantitative-analysis">6.1.1 Quantitative Analysis</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#qualitative-analysis">6.2.2 Qualitative Analysis</a></li>
-</ul>
-</li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#ablation-studies">6.2. Ablation Studies</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#multiple-input-images">6.3. Multiple Input Images</a></li>
-</ul>
-</li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#limitations-future-work"><strong>7. Limitations &amp; Future Work</strong></a></li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#conclusion">8. Conclusion</a></li>
-</ul>
-
-  </nav></div>
-
-</div></div>
-              
-            
-          </div>
-          <footer class="bd-footer-content">
-            
-<div class="bd-footer-content__inner container">
-  
-  <div class="footer-item">
-    
-<p class="component-author">
-By PseudoLab
-</p>
-
-  </div>
-  
-  <div class="footer-item">
-    
-
-  <p class="copyright">
-    
-      © Copyright 2022.
-      <br/>
-    
-  </p>
-
-  </div>
-  
-  <div class="footer-item">
-    
-  </div>
-  
-  <div class="footer-item">
-    
-  </div>
-  
-</div>
-          </footer>
-        
-
-      </main>
-    </div>
-  </div>
-  
-  <!-- Scripts loaded after <body> so the DOM is not blocked -->
-  <script src="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b"></script>
-<script src="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b"></script>
-
-  <footer class="bd-footer">
-  </footer>
-  </body>
+
+
+<!DOCTYPE html>
+
+
+<html lang="en" data-content_root="" >
+
+  <head>
+    <meta charset="utf-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.17.1: http://docutils.sourceforge.net/" />
+
+    <title>DreamPose: Fashion Image-to-Video Synthesis via Stable Diffusion &#8212; Text-to-Image Generation-feat-Diffusion</title>
+  
+  
+  
+  <script data-cfasync="false">
+    document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
+    document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
+  </script>
+  
+  <!-- Loaded before other Sphinx assets -->
+  <link href="../../_static/styles/theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+<link href="../../_static/styles/bootstrap.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+<link href="../../_static/styles/pydata-sphinx-theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+
+  
+  <link href="../../_static/vendor/fontawesome/6.5.2/css/all.min.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+  <link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.woff2" />
+<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.woff2" />
+<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.woff2" />
+
+    <link rel="stylesheet" type="text/css" href="../../_static/pygments.css" />
+    <link rel="stylesheet" href="../../_static/styles/sphinx-book-theme.css?digest=14f4ca6b54d191a8c7657f6c759bf11a5fb86285" type="text/css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/togglebutton.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/mystnb.4510f1fc1dee50b3e5859aac5469c37c29e427902b24a333a5f9fcb2f0b3ac41.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/sphinx-thebe.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/design-style.4045f2051d55cab465a707391d5b2007.min.css" />
+  
+  <!-- Pre-loaded scripts that we'll load fully later -->
+  <link rel="preload" as="script" href="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b" />
+<link rel="preload" as="script" href="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b" />
+  <script src="../../_static/vendor/fontawesome/6.5.2/js/all.min.js?digest=dfe6caa3a7d634c4db9b"></script>
+
+    <script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
+    <script src="../../_static/jquery.js"></script>
+    <script src="../../_static/underscore.js"></script>
+    <script src="../../_static/_sphinx_javascript_frameworks_compat.js"></script>
+    <script src="../../_static/doctools.js"></script>
+    <script src="../../_static/clipboard.min.js"></script>
+    <script src="../../_static/copybutton.js"></script>
+    <script src="../../_static/scripts/sphinx-book-theme.js?digest=5a5c038af52cf7bc1a1ec88eea08e6366ee68824"></script>
+    <script>let toggleHintShow = 'Click to show';</script>
+    <script>let toggleHintHide = 'Click to hide';</script>
+    <script>let toggleOpenOnPrint = 'true';</script>
+    <script src="../../_static/togglebutton.js"></script>
+    <script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
+    <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
+    <script src="../../_static/design-tabs.js"></script>
+    <script>const THEBE_JS_URL = "https://unpkg.com/thebe@0.8.2/lib/index.js"
+const thebe_selector = ".thebe,.cell"
+const thebe_selector_input = "pre"
+const thebe_selector_output = ".output, .cell_output"
+</script>
+    <script async="async" src="../../_static/sphinx-thebe.js"></script>
+    <script>window.MathJax = {"options": {"processHtmlClass": "tex2jax_process|mathjax_process|math|output_area"}}</script>
+    <script defer="defer" src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
+    <script>DOCUMENTATION_OPTIONS.pagename = 'docs/review/DreamPose';</script>
+    <link rel="shortcut icon" href="../../_static/PseudoLab_logo.png"/>
+    <link rel="index" title="Index" href="../../genindex.html" />
+    <link rel="search" title="Search" href="../../search.html" />
+    <link rel="next" title="NeRF : Representing Scenes as Neural Radiance Fields for View Synthesis" href="NeRF.html" />
+    <link rel="prev" title="DreaMoving" href="DreaMoving.html" />
+  <meta name="viewport" content="width=device-width, initial-scale=1"/>
+  <meta name="docsearch:language" content="en"/>
+  </head>
+  
+  
+  <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
+
+  
+  
+  <div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
+  
+  <div id="pst-scroll-pixel-helper"></div>
+  
+  <button type="button" class="btn rounded-pill" id="pst-back-to-top">
+    <i class="fa-solid fa-arrow-up"></i>Back to top</button>
+
+  
+  <input type="checkbox"
+          class="sidebar-toggle"
+          id="pst-primary-sidebar-checkbox"/>
+  <label class="overlay overlay-primary" for="pst-primary-sidebar-checkbox"></label>
+  
+  <input type="checkbox"
+          class="sidebar-toggle"
+          id="pst-secondary-sidebar-checkbox"/>
+  <label class="overlay overlay-secondary" for="pst-secondary-sidebar-checkbox"></label>
+  
+  <div class="search-button__wrapper">
+    <div class="search-button__overlay"></div>
+    <div class="search-button__search-container">
+<form class="bd-search d-flex align-items-center"
+      action="../../search.html"
+      method="get">
+  <i class="fa-solid fa-magnifying-glass"></i>
+  <input type="search"
+         class="form-control"
+         name="q"
+         id="search-input"
+         placeholder="Search this book..."
+         aria-label="Search this book..."
+         autocomplete="off"
+         autocorrect="off"
+         autocapitalize="off"
+         spellcheck="false"/>
+  <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
+</form></div>
+  </div>
+
+  <div class="pst-async-banner-revealer d-none">
+  <aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
+</div>
+
+  
+    <header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
+    </header>
+  
+
+  <div class="bd-container">
+    <div class="bd-container__inner bd-page-width">
+      
+      
+      
+      <div class="bd-sidebar-primary bd-sidebar">
+        
+
+  
+  <div class="sidebar-header-items sidebar-primary__section">
+    
+    
+    
+    
+  </div>
+  
+    <div class="sidebar-primary-items__start sidebar-primary__section">
+        <div class="sidebar-primary-item">
+
+  
+    
+  
+
+<a class="navbar-brand logo" href="../../intro.html">
+  
+  
+  
+  
+  
+    
+    
+      
+    
+    
+    <img src="../../_static/PseudoLab_logo.png" class="logo__image only-light" alt="Text-to-Image Generation-feat-Diffusion - Home"/>
+    <script>document.write(`<img src="../../_static/PseudoLab_logo.png" class="logo__image only-dark" alt="Text-to-Image Generation-feat-Diffusion - Home"/>`);</script>
+  
+  
+</a></div>
+        <div class="sidebar-primary-item"><nav class="bd-links" id="bd-docs-nav" aria-label="Main">
+    <div class="bd-toc-item navbar-nav active">
+        
+        <ul class="nav bd-sidenav bd-sidenav__home-link">
+            <li class="toctree-l1">
+                <a class="reference internal" href="../../intro.html">
+                    Welcome to PseudoDiffusers!!
+                </a>
+            </li>
+        </ul>
+        <p aria-level="2" class="caption" role="heading"><span class="caption-text">Preliminary Works</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="vae.html">VAE</a></li>
+<li class="toctree-l1"><a class="reference internal" href="gan.html">GAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DDPM.html">DDPM</a></li>
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DDIM.html">DDIM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="A_Study_on_the_Evaluation_of_Generative_Models.html">A Study on the Evaluation of Generative Models</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Image Generation</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="cycleGAN.html">CycleGAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="StyleGAN.html">StyleGAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="diffusion_beats_GANs.html">Diffusion Models Beat GANs on Image Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="dalle.html">DALL-E</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DALLE2.html">DALL-E 2</a></li>
+<li class="toctree-l1"><a class="reference internal" href="dreambooth.html">DreamBooth</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ControlNet.html">ControlNet</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Latent_Diffusion_Model.html">Introduction</a></li>
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Textual_Inversion.html">Textual Inversion</a></li>
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="CustomDiffusion.html">Custom Diffusion</a></li>
+<li class="toctree-l1"><a class="reference internal" href="LoRA.html">LoRA</a></li>
+
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="I-DDPM.html">I-DDPM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="StyO.html">StyO</a></li>
+<li class="toctree-l1"><a class="reference internal" href="imagen.html">Imagen</a></li>
+<li class="toctree-l1"><a class="reference internal" href="imagen_editor.html">Imagen Editor</a></li>
+<li class="toctree-l1"><a class="reference internal" href="SDEdit.html">SDEdit</a></li>
+<li class="toctree-l1"><a class="reference internal" href="SDXL.html">SDXL</a></li>
+<li class="toctree-l1"><a class="reference internal" href="t2i_adapter.html">T2I-Adapter</a></li>
+<li class="toctree-l1"><a class="reference internal" href="IP_Adapter.html">IP-Adapter</a></li>
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="HyperDreamBooth.html">HyperDreamBooth</a></li>
+<li class="toctree-l1"><a class="reference internal" href="CM3leon.html">CM3leon</a></li>
+
+<li class="toctree-l1"><a class="reference internal" href="Synthetic_Data_from_Diffusion_Models_Improves_ImageNet_Classification.html">Synthetic Data from Diffusion Models Improves ImageNet Classification</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="GLIDE.html">GLIDE</a></li>
+<li class="toctree-l1"><a class="reference internal" href="BBDM.html">BBDM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Your_Diffusion_Model_is_Secretly_a_Zero_Shot_Classifier.html">Your Diffusion Model is Secretly a Zero-Shot Classifier</a></li>
+<li class="toctree-l1"><a class="reference internal" href="progressive_distillation.html">Progressive Distillation for Fast Sampling of Diffusion Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ConceptLab.html">ConceptLab</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Diffusion_models_already_have_a_Semantic_Latent_Space.html">Diffusion Models already have a Semantic Latent Space</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Muse.html">Muse</a></li>
+
+
+<li class="toctree-l1"><a class="reference internal" href="GIGAGAN.html">Scaling up GANs for Text-to-Image Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="consistency_models.html">Consistency Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="latent_consistency_models.html">Latent Consistency Models</a></li>
+
+<li class="toctree-l1"><a class="reference internal" href="LLM_grounded_Diffusion.html">LLM Grounded Diffusion</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DiT.html">DiT</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="one-step-image-translation.html">One-Step Image Translation with Text-to-Image Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="LCM-LoRA.html">LCM-LoRA: A Universal Stable-Diffusion Acceleration Module</a></li>
+
+
+<li class="toctree-l1"><a class="reference internal" href="MimicBrush.html">MimicBrush: Zero-shot Image Editing with Reference Imitation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="one_step_diffusion_with_distribution_matching_distillation.html">One-step Diffusion with Distribution Matching Distillation</a></li>
+
+
+
+
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Video Generation</span></p>
+<ul class="current nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="Make_A_Video.html">Make A Video</a></li>
+<li class="toctree-l1"><a class="reference internal" href="VideoLDM.html">VideoLDM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="AnimateDiff.html">AnimateDiff</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Animate_Anyone.html">Animate Anyone</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreaMoving.html">DreaMoving</a></li>
+<li class="toctree-l1 current active"><a class="current reference internal" href="#">DreamPose: Fashion Image-to-Video Synthesis via Stable Diffusion</a></li>
+
+
+
+
+
+
+
+
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">3D Generation</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="NeRF.html">NeRF : Representing Scenes as Neural Radiance Fields for View Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="3DGS.html">3D Gaussian Splatting for Real-Time Radiance Field Rendering</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Point_E.html">Point-E: A System for Generating 3D Point Clouds from Complex Prompts (Arxiv 2022)</a></li>
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Shap-E.html">Shap-E</a></li>
+
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DreamFusion.html"><strong>DreamFusion</strong></a></li>
+<li class="toctree-l1"><a class="reference internal" href="magic-3d.html">Magic3D</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreamBooth3D.html">Dream Booth 3D</a></li>
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="zero123plus.html">Zero123++</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ProlificDreamer.html">ProlificDreamer: High-Fidelity and Diverse Text-to-3D Generation with Variational Score Distillation</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DreamGaussian.html">DreamGaussian</a></li>
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Coin3D.html">Coin3D</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Experiments</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="../experiments/js_exp.html">Synthetic Data with Stable Diffusion for Foliar Disease Classification</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../experiments/swjo_exp.html">Training DreamBooth on Naver Webtoon Face Dataset</a></li>
+</ul>
+
+    </div>
+</nav></div>
+    </div>
+  
+  
+  <div class="sidebar-primary-items__end sidebar-primary__section">
+  </div>
+  
+  <div id="rtd-footer-container"></div>
+
+
+      </div>
+      
+      <main id="main-content" class="bd-main" role="main">
+        
+        
+
+<div class="sbt-scroll-pixel-helper"></div>
+
+          <div class="bd-content">
+            <div class="bd-article-container">
+              
+              <div class="bd-header-article d-print-none">
+<div class="header-article-items header-article__inner">
+  
+    <div class="header-article-items__start">
+      
+        <div class="header-article-item"><label class="sidebar-toggle primary-toggle btn btn-sm" for="__primary" title="Toggle primary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
+  <span class="fa-solid fa-bars"></span>
+</label></div>
+      
+    </div>
+  
+  
+    <div class="header-article-items__end">
+      
+        <div class="header-article-item">
+
+<div class="article-header-buttons">
+
+
+
+
+
+<div class="dropdown dropdown-source-buttons">
+  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Source repositories">
+    <i class="fab fa-github"></i>
+  </button>
+  <ul class="dropdown-menu">
+      
+      
+      
+      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion" target="_blank"
+   class="btn btn-sm btn-source-repository-button dropdown-item"
+   title="Source repository"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fab fa-github"></i>
+  </span>
+<span class="btn__text-container">Repository</span>
+</a>
+</li>
+      
+      
+      
+      
+      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion/issues/new?title=Issue%20on%20page%20%2Fdocs/review/DreamPose.html&body=Your%20issue%20content%20here." target="_blank"
+   class="btn btn-sm btn-source-issues-button dropdown-item"
+   title="Open an issue"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-lightbulb"></i>
+  </span>
+<span class="btn__text-container">Open issue</span>
+</a>
+</li>
+      
+  </ul>
+</div>
+
+
+
+
+
+
+<div class="dropdown dropdown-download-buttons">
+  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Download this page">
+    <i class="fas fa-download"></i>
+  </button>
+  <ul class="dropdown-menu">
+      
+      
+      
+      <li><a href="../../_sources/docs/review/DreamPose.md" target="_blank"
+   class="btn btn-sm btn-download-source-button dropdown-item"
+   title="Download source file"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-file"></i>
+  </span>
+<span class="btn__text-container">.md</span>
+</a>
+</li>
+      
+      
+      
+      
+      <li>
+<button onclick="window.print()"
+  class="btn btn-sm btn-download-pdf-button dropdown-item"
+  title="Print to PDF"
+  data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-file-pdf"></i>
+  </span>
+<span class="btn__text-container">.pdf</span>
+</button>
+</li>
+      
+  </ul>
+</div>
+
+
+
+
+<button onclick="toggleFullScreen()"
+  class="btn btn-sm btn-fullscreen-button"
+  title="Fullscreen mode"
+  data-bs-placement="bottom" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-expand"></i>
+  </span>
+
+</button>
+
+
+
+<script>
+document.write(`
+  <button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light"></i>
+    <i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark"></i>
+    <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"></i>
+  </button>
+`);
+</script>
+
+
+<script>
+document.write(`
+  <button class="btn btn-sm pst-navbar-icon search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <i class="fa-solid fa-magnifying-glass fa-lg"></i>
+  </button>
+`);
+</script>
+<label class="sidebar-toggle secondary-toggle btn btn-sm" for="__secondary"title="Toggle secondary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <span class="fa-solid fa-list"></span>
+</label>
+</div></div>
+      
+    </div>
+  
+</div>
+</div>
+              
+              
+
+<div id="jb-print-docs-body" class="onlyprint">
+    <h1>DreamPose: Fashion Image-to-Video Synthesis via Stable Diffusion</h1>
+    <!-- Table of contents -->
+    <div id="print-main-content">
+        <div id="jb-print-toc">
+            
+            <div>
+                <h2> Contents </h2>
+            </div>
+            <nav aria-label="Page">
+                <ul class="visible nav section-nav flex-column">
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#">DreamPose: Fashion Image-to-Video Synthesis via Stable Diffusion</a></li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction"><strong>1. Introduction</strong></a></li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#related-work">2. Related Work</a><ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#diffusion-models">2.1. Diffusion models</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#still-image-animation">2.2. Still Image Animation</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#fashion-image-synthesis">2.3. Fashion Image Synthesis</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#diffusion-models-for-video-synthesis">2.4. Diffusion Models for Video Synthesis</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#conditioning-mechanisms-for-diffusion-models">2.5. Conditioning Mechanisms for Diffusion Models</a></li>
+</ul>
+</li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#background">3. Background</a></li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#method"><strong>4. Method</strong></a><ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#overview"><strong>4.1. Overview</strong></a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#architecture">4.2. Architecture</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#split-clip-vae-encoder"><strong>4.2.1 Split CLIP-VAE Encoder</strong></a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#modified-unet">4.2.2 Modified UNet</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#finetuning">4.2.3 <strong>Finetuning</strong></a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#pose-and-image-classifier-free-guidance">4.4. Pose and Image Classifier-Free Guidance</a></li>
+</ul>
+</li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#experiments">5. Experiments</a><ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#implementation-details">5.1.  Implementation Details</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#dataset">5.2. Dataset</a></li>
+</ul>
+</li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#results">6. Results</a><ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#comparisons">6.1. Comparisons</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#quantitative-analysis">6.1.1 Quantitative Analysis</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#qualitative-analysis">6.2.2 Qualitative Analysis</a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#ablation-studies">6.2. Ablation Studies</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#multiple-input-images">6.3. Multiple Input Images</a></li>
+</ul>
+</li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#limitations-future-work"><strong>7. Limitations &amp; Future Work</strong></a></li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#conclusion">8. Conclusion</a></li>
+</ul>
+
+            </nav>
+        </div>
+    </div>
+</div>
+
+              
+                
+<div id="searchbox"></div>
+                <article class="bd-article">
+                  
+  <div class="admonition-information admonition">
+<p class="admonition-title">Information</p>
+<ul class="simple">
+<li><p><strong>Title:</strong> DreamPose: Fashion Image-to-Video Synthesis via Stable Diffusion</p></li>
+<li><p><strong>Reference</strong></p>
+<ul>
+<li><p>Paper:  <a class="reference external" href="https://arxiv.org/abs/2304.06025">https://arxiv.org/abs/2304.06025</a></p></li>
+<li><p>Project: <a class="reference external" href="https://grail.cs.washington.edu/projects/dreampose/">https://grail.cs.washington.edu/projects/dreampose/</a></p></li>
+</ul>
+</li>
+<li><p><strong>Author:</strong> <a class="reference external" href="https://www.linkedin.com/in/jeonghwa-yoo-8403a716b">Jeonghwa Yoo</a></p></li>
+<li><p><strong>Last updated on May. 08, 2023</strong></p></li>
+</ul>
+</div>
+<section class="tex2jax_ignore mathjax_ignore" id="dreampose-fashion-image-to-video-synthesis-via-stable-diffusion">
+<h1>DreamPose: Fashion Image-to-Video Synthesis via Stable Diffusion<a class="headerlink" href="#dreampose-fashion-image-to-video-synthesis-via-stable-diffusion" title="Permalink to this heading">#</a></h1>
+<figure class="align-default" id="id1">
+<img alt="DreamPose_input_output" class="bg-primary mb-1" src="pics/DreamPose/00.png" />
+<figcaption>
+<p><span class="caption-number">Fig. 646 </span><span class="caption-text">DreamPose 입출력</span><a class="headerlink" href="#id1" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<aside>
+💡핵심 요약
+- 입력: 이미지, 포즈 시퀀스 / 출력: 입력 이미지의 사람이 포즈 시퀀스에 따라 움직이는 듯한 비디오 영상
+- 이미지와 포즈 시퀀스가 주어지면 사람과 옷감의 움직임을 모두 포함하는 비디오를 합성하는 DreamPose를 제안하였다.
+- 사전 학습된 스테이블 디퓨전을 파인튜닝하여 해당 태스크를 수행 하였다.
+- 입력 이미지의 특성을 잘 포착하기 위해 CLIP 인코더와 VAE 인코더를 도입하였고, 이를 통합하는 Adapter를 도입하여 스테이블 디퓨전의 U-Net의 조건으로 넣어줬다.
+- 포즈 시퀀스를 잘 반영하기 위해 연속적인 5개의 프레임을 노이즈와 concat하여 디노이징 UNet에 입력으로 주었다.
+- 두 단계의 파인튜닝 전략을 사용하였다.
+    - 첫 번째 파인튜닝: 전체 데이터셋에 대해 파인튜닝
+    - 두 번째 파인튜닝: 피사체에 대해 파인튜닝
+- 듀얼 classfier-free guidance를 사용하여 입력 이미지와 포즈 시퀀스에 대한 충실도의 강도를 조정한다.
+- 패션 비디오 합성 태스크에 대해서 SOTA 성능을 달성 하였다.
+</aside>
+</section>
+<section class="tex2jax_ignore mathjax_ignore" id="introduction">
+<h1><strong>1. Introduction</strong><a class="headerlink" href="#introduction" title="Permalink to this heading">#</a></h1>
+<ul class="simple">
+<li><p>DreamPose가 제안된 배경</p>
+<ul>
+<li><p>패션 사진은 온라인에 널리 퍼져 있지만, 전달할 수 있는 정보가 제한적이며 입었을 때 옷의 늘어진 모양이나 흐름 등 옷의 중요한 뉘앙스를 포착하지 못한다.</p></li>
+<li><p>패션 동영상은 이러한 모든 디테일을 보여주기에 소비자의 의사 결정에 유용한 정보를 제공하지만, 동영상이 있는 상품은 매우 드물다.</p></li>
+</ul>
+</li>
+<li><p>DreamPose</p>
+<ul>
+<li><p>본 논문에서는 포즈 시퀀스를 따라 패션 사진을 사실적인 애니메이션 비디오로 변환하는 방법인 DreamPose를 소개한다.</p></li>
+<li><p>Stable diffusion을 기반으로 한다.</p></li>
+<li><p>하나 이상의 사람 이미지와 포즈 시퀀스가 주어지면, 포즈 시퀀스를 따라 고품질 비디오를 생성한다.</p></li>
+</ul>
+</li>
+<li><p>기존 비디오 생성 모델들의 문제점</p>
+<ul>
+<li><p>이미지 생성 디퓨전 모델은 좋은 결과를 보여주었지만, 비디오 생성 디퓨전 모델은 동일한 품질의 결과를 얻지 못했으며, 텍스처 움직임이나 카툰과 같은 모양으로 제한된 경우가 많다.</p></li>
+<li><p>시간적 일관성이 떨어진다.</p></li>
+<li><p>모션 jitter가 발생한다.</p></li>
+<li><p>사실성(realism)이 부족하다.</p></li>
+<li><p>대상 비디오의 움직임이나 세부적인 물체 모양을 제어할 수 없다.</p>
+<ul>
+<li><p>기존 모델이 주로 텍스트에 기반으로 하기 때문에</p></li>
+</ul>
+</li>
+</ul>
+</li>
+<li><p>DreamPose의 접근법</p>
+<ul>
+<li><p>이미지 및 포즈 시퀀스를 조건으로 받는 방식을 사용하여 fidelity와 프레임 간 일관성을 높일 수 있다.</p></li>
+<li><p>이미지 분포를 효과적으로 모델링하는 기존 사전 학습된 이미지 디퓨전 모델을 파인 튜닝하였다. → 이미지 애니메이션 태스크를 컨디셔닝 신호와 일치하는 이미지의 부분 공간을 찾는 것으로 단순화 할 수 있다.</p></li>
+<li><p>해당 태스크를 위해 스테이블 디퓨전의 인코더와 컨디셔닝 메커니즘을 재설계하였다.</p></li>
+<li><p>2-스테이지 파인튜닝 방식을 사용한다</p>
+<ul>
+<li><p>UNet과 VAE를 하나 혹은 여러 입력 이미지에 대해서 파인튜닝</p></li>
+</ul>
+</li>
+</ul>
+</li>
+<li><p>Contribution</p>
+<ol class="arabic simple">
+<li><p>DreamPose: 패션 이미지 애니메이션을 위해 이미지 및 포즈를 조건으로 하는 디퓨전 방식</p></li>
+<li><p>프레임 간 시간적 일관성을 크게 향상 시키는 간단하지만 효과적인 포즈 컨디셔닝 방식</p></li>
+<li><p>컨디셔닝 이미지의 fidelity를 높여주는 split CLIP-VAE 인코더</p></li>
+<li><p>이미지의 fidelity와 새로운 포즈에 대한 일반화 사이의 균형을 효과적으로 맞추는 파인튜닝 전략</p></li>
+</ol>
+</li>
+</ul>
+</section>
+<section class="tex2jax_ignore mathjax_ignore" id="related-work">
+<h1>2. Related Work<a class="headerlink" href="#related-work" title="Permalink to this heading">#</a></h1>
+<section id="diffusion-models">
+<h2>2.1. Diffusion models<a class="headerlink" href="#diffusion-models" title="Permalink to this heading">#</a></h2>
+<ul class="simple">
+<li><p>디퓨전 모델은 최근 text-to-image, 비디오 생성, 3D 생성 태스크에서 인상적인 결과를 보여주고 있다.</p></li>
+<li><p>하지만 이러한 모델을 처음부터 훈련하는 것은 비용이 많이 들고 많은 양의 데이터가 필요하다.</p></li>
+<li><p>스테이블 디퓨전과 같은 latent diffusion 모델은 디퓨전과 디노이징 과정을 latent space에서 수행하기 때문에 계산 요구 사항과 훈련 시간을 대폭 줄일 수 있다.</p></li>
+<li><p>스테이블 디퓨전과 그 사전 훈련된 체크포인트는 출시 이후 다양한 이미지 생성 작업에 사용되었다.</p></li>
+<li><p>본 논문에서도 사전 훈련된 스테이블 디퓨전 모델을 활용하고, subject에 특화된 파인튜닝을 한다.</p></li>
+</ul>
+</section>
+<section id="still-image-animation">
+<h2>2.2. Still Image Animation<a class="headerlink" href="#still-image-animation" title="Permalink to this heading">#</a></h2>
+<ul class="simple">
+<li><p>하나 이상의 입력 이미지로부터 동영상을 생성하는 태스크</p></li>
+<li><p>기존에 디퓨전 기반이 아닌 접근 방식들은 배경 예측, 모션 표현, occlusion map이나 dense map 등 여러 개의 개별 네트워크로 구성되는 경우가 많았다.</p>
+<ul>
+<li><p>각 단계마다 별도의 훈련이 필요하고 ground-truth 모션이나 depth등의 ground-truth 데이터를 사용할 수 없거나 불완전할 가능성이 있다.</p></li>
+<li><p>모션이 크고 복잡할 경우에 groud-truth에 대한 예측은 도출하기 더 어렵고 오류가 발생하기 쉽다.</p></li>
+</ul>
+</li>
+<li><p>최근 여러 방법들은 엔드 투 엔드 싱글 네트워크 접근 방식을 탐구하고 있다. (예: optical flow and warping, cross-attention 모듈, NeRF 표현을 사용한 애니메이션이 가능한 3D 휴먼 생성 등)</p></li>
+</ul>
+</section>
+<section id="fashion-image-synthesis">
+<h2>2.3. Fashion Image Synthesis<a class="headerlink" href="#fashion-image-synthesis" title="Permalink to this heading">#</a></h2>
+<ul class="simple">
+<li><p>기존 pose-guided 패션 이미지 합성 방법</p>
+<ul>
+<li><p>일반적으로 GAN을 기반으로 했고, optical flow에 의존해 이미지 특징을 포즈에 맞추는 방법을 사용했다. → 큰 포즈 변화, 가려진 영역 합성, 의상 스타일 보존에 어려움을 겪는 경우가 많다.</p></li>
+<li><p>최근엔 어텐션 기반 메커니즘을 사용하여, 셀프/크로스 어텐션을 사용하여 이미지 특징을 목표 프레임에 맞추려고 하였다.</p></li>
+<li><p>디퓨전 기반</p>
+<ul>
+<li><p>DiffFashion: 레퍼런스 이미지의 스타일을 트랜스퍼하여 의류 아이템을 편집하는 것을 목표로 한다.</p></li>
+<li><p>PIDM: 포즈를 조건으로 넣어 사람 이미지를 생성한다 → 시간적 일관성을 위한 최적화는 하지 않는다.</p></li>
+</ul>
+</li>
+</ul>
+</li>
+</ul>
+</section>
+<section id="diffusion-models-for-video-synthesis">
+<h2>2.4. Diffusion Models for Video Synthesis<a class="headerlink" href="#diffusion-models-for-video-synthesis" title="Permalink to this heading">#</a></h2>
+<ul class="simple">
+<li><p>많은 text-to-video 디퓨전 모델은 text-to-image 디퓨전 모델을 활용하여 적용한다.</p></li>
+<li><p>결과를 기대할만 하지만, 여전히 텍스트-이미지 모델과 같은 성능은 나오지 않고 있다.</p></li>
+<li><p>프레임 간의 시간적 일관성을 유지하는 것과 사실적인 모션을 생성하는 것이 어렵다.</p></li>
+<li><p>어떤 디퓨전 기반 방법들은 스크래치부터 학습하기 때문에 값비싼 컴퓨팅 리소스, 방대학 학습 데이터셋, 오랜 학습 시간이 필요하다.</p></li>
+<li><p>Turn-A-Video는 텍스트와 이미지가 조건으로 들어왔을 때 비디오 생성을 위해 사전 학습된 텍스트-이미지 디퓨전 모델을 파인튜닝한다.  → 이전 방법들과 마찬가지로 텍스처 깜빡거림(textural flickering), 구조적인 불일치가 나타난다.</p></li>
+<li><p>본 논문에서는 위의 문제를 해결하여 사람과 섬유의 움직임의 싱크를 맞추는 것을 목표로 한다.</p></li>
+</ul>
+</section>
+<section id="conditioning-mechanisms-for-diffusion-models">
+<h2>2.5. Conditioning Mechanisms for Diffusion Models<a class="headerlink" href="#conditioning-mechanisms-for-diffusion-models" title="Permalink to this heading">#</a></h2>
+<ul class="simple">
+<li><p>텍스트를 조건으로 하는 이미지 생성 디퓨전 모델을 널리 사용되어 왔다.</p></li>
+<li><p>텍스트 컨디셔닝은 높은 수준의 디테일을 조절하는 데는 효과적이지만, 사람과 의상의 정확한 identity나 포즈에 대한 풍부하고 상세한 정보를 제공하는 것은 어렵다.</p></li>
+<li><p>여러 연구에서 사전 학습된 text-to-image 스테이블 디퓨전 모델을 위한 이미지 컨디셔닝 문제를 다루고 있다.</p>
+<ul>
+<li><p>이러한 모델에는 종종 어떤 종류의 텍스트 임베딩이 포함된다. (ex: DreamBooth: 피사체별 파인 튜닝을 수행하기 위해 고유한 텍스트 토큰을 학습)</p></li>
+<li><p>기존 이미지와 동영상의 모양을 편집하기 위해 텍스트를 통합하기도 한다.</p></li>
+<li><p>PIDM은 별도의 텍스처 인코더를 사용하여 이미지 텍스처를 인코딩하고 입력된 노이즈 이미지와 대상 포즈를 연결한다.</p></li>
+</ul>
+</li>
+<li><p>DreamPose는 영상 속 피사체의 외형뿐만 아니라 구조와 움직임까지 제어할 수 있다.</p></li>
+<li><p>PIDM과 마찬가지로 이미지 임베딩을 UNet의 크로스 어텐션 레이어에 직접 통합하지만, 이미지 임베딩에 대해 사전 학습된 두 개의 인코더(CLIP, VAE)를 혼합하여 사용한다. → 입력 노이즈에 연결된(concatenated) 멀티 포즈 입력 표현(multi-pose input representation)을 이용해 부드럽고 시간적으로 일관된 모션을 구현할 수 있다.</p></li>
+</ul>
+</section>
+</section>
+<section class="tex2jax_ignore mathjax_ignore" id="background">
+<h1>3. Background<a class="headerlink" href="#background" title="Permalink to this heading">#</a></h1>
+<ul>
+<li><p>디퓨전 모델</p>
+<ul class="simple">
+<li><p>디퓨전 모델은 품질, 다양성, 학습 안정성 측면에서 합성 태스크에서 GAN을 능가하는 최신 생성 모델이다.</p></li>
+<li><p>표준 이미지 디퓨전 모델은 정규 분포된 랜덤 노이즈에서 이미지를 반복적으로 복원하는 방법을 학습한다.</p></li>
+</ul>
+</li>
+<li><p>Latent diffusion model (ex. Stable Diffusion)</p>
+<figure class="align-default" id="id2">
+<img alt="latent diffusion" class="bg-primary mb-1" src="../../_images/Untitled.png" />
+<figcaption>
+<p><span class="caption-number">Fig. 647 </span><span class="caption-text">Latent Diffusion Model</span><a class="headerlink" href="#id2" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</li>
+<li><p>오토인코더의 인코딩된 latent space에서 작동하므로 최소한의 품질을 희생하면서 계산 복잡성을 절약한다.</p></li>
+<li><p>스테이블 디퓨전 모델은 VAE와 디노이징 UNet의 두 가지 모델로 구성된다.</p>
+<ul>
+<li><p>VAE 오토인코더</p>
+<ul class="simple">
+<li><p>인코더   <span class="math notranslate nohighlight">\(\mathcal{E}\)</span>: 프레임 <span class="math notranslate nohighlight">\(x\)</span>를 컴팩트한 latent 표현 <span class="math notranslate nohighlight">\(z\)</span>로 추출 (<span class="math notranslate nohighlight">\(z=\mathcal{E}\)</span><span class="math notranslate nohighlight">\((x)\)</span>)</p></li>
+<li><p>디코더 <span class="math notranslate nohighlight">\(\mathcal{D}\)</span>: latent 표현에서 이미지를 복원 (<span class="math notranslate nohighlight">\(x’=\mathcal{D}(z)\)</span>)</p></li>
+</ul>
+</li>
+<li><p>학습하는 동안, latent feature <span class="math notranslate nohighlight">\(z\)</span>는 결정론적 가우시안 프로세스에 의해 타임 스탬프 <span class="math notranslate nohighlight">\(T\)</span>로 디퓨즈되어 노이지 feature인 <span class="math notranslate nohighlight">\(\tilde{z}_T\)</span>를 만듦</p></li>
+<li><p>원본 이미지를 복구하기 위해 각 타임스탬프에 해당하는 latent feature의 노이즈를 반복적으로 예측하도록 시간으로 컨디셔닝된 UNet이 학습 된다.</p></li>
+<li><p>UNet의 목적 함수</p>
+<div class="math notranslate nohighlight">
+\[
+        \begin{align}{\cal L}_{D M}=\mathbb{R}_{z,\epsilon\in{\mathcal{N}}(0,1)}[||\epsilon-\epsilon_{\theta}({\tilde{z}}_{t},t,c)]|_{2}^{2}]\end{align}
+        \]</div>
+<ul class="simple">
+<li><p>c: 컨디셔닝 정보의 임베딩 (텍스트, 이미지, 세그멘테이션 마스크등, 스테이블 디퓨전에서는 CLIP 텍스트 인코더로부터 얻어짐</p></li>
+</ul>
+</li>
+<li><p>예측된 latent <span class="math notranslate nohighlight">\(z’\)</span>은 예측된 이미지 <span class="math notranslate nohighlight">\(x’ = \mathcal{D}(z')\)</span>를 복구하도록 디코딩 된다.</p></li>
+</ul>
+</li>
+<li><p>Classifier-free guidance</p>
+<ul>
+<li><p>Implicit classifier를 통해 예측된 노이즈 분포를 조건으로 주어진 분포로 밀어붙이는 샘플링 메커니즘이다.</p></li>
+<li><p>이는 랜덤한 확률로 실제 조건으로 주어진 입력을 널 입력(∅)으로 대체하는 훈련 방식인 드롭아웃을 통해 달성된다.</p></li>
+<li><p>인퍼런스하는 동안 조건으로 주어진 예측은 스칼라 가중치 s를 사용하여 unconditional한 예측을 조건부로 가이드하는 데 사용된다.</p>
+<div class="math notranslate nohighlight">
+\[
+        \begin{align}\epsilon_{\theta}=\epsilon_{\theta}(\tilde{z}_{t},t,\emptyset)+s\cdot(\epsilon_{\theta}(\tilde{z}_{t},t,\mathrm{c})-\epsilon_{\theta}(\tilde{z}_{t},t,\emptyset))\end{align}
+        \]</div>
+<ul class="simple">
+<li><p><span class="math notranslate nohighlight">\(\epsilon_{\theta}(\tilde{z}_{t},t,\emptyset)\)</span>: 조건이 없는 경우에 노이즈 벡터</p></li>
+<li><p><span class="math notranslate nohighlight">\(\epsilon_{\theta}(\tilde{z}_{t},t,c)\)</span>: 조건이 있는 경우에 노이즈 벡터</p></li>
+</ul>
+<p>→ 조건을 Null로 줬을 때의 모델의 예측값과 조건을 줬을 때의 모델이 예측값을 보간한다.</p>
+</li>
+</ul>
+</li>
+</ul>
+</section>
+<section class="tex2jax_ignore mathjax_ignore" id="method">
+<h1><strong>4. Method</strong><a class="headerlink" href="#method" title="Permalink to this heading">#</a></h1>
+<ul class="simple">
+<li><p>본 논문에서는  단일 이미지와 포즈 시퀀스로부터 사실적인 애니메이션 동영상을 만드는 것을 목표로 한다.</p></li>
+<li><p>이를 위해 패션 동영상 컬렉션에서 사전 학습된 스테이블 디퓨전을 패션 동영상 컬렉션에 맞게 파인튜닝한다.</p></li>
+<li><p>추가 컨디셔닝 신호(이미지 및 포즈)를 받고 동영상으로 볼 수 있는 시간적으로 일관된 콘텐츠를 출력하기 위해 스테이블 디퓨전의 구조를 조정하는 작업이 포함된다.</p></li>
+</ul>
+<section id="overview">
+<h2><strong>4.1. Overview</strong><a class="headerlink" href="#overview" title="Permalink to this heading">#</a></h2>
+<ul class="simple">
+<li><p>입출력</p>
+<ul>
+<li><p>입력: 입력 이미지 <span class="math notranslate nohighlight">\(x_0\)</span>, 포즈 <span class="math notranslate nohighlight">\(\{p_1, …, p_n\}\)</span></p></li>
+<li><p>출력: 비디오 <span class="math notranslate nohighlight">\(\{x'_1, …, x’_N\}\)</span> (<span class="math notranslate nohighlight">\(x’_i\)</span>:  입력 포즈 <span class="math notranslate nohighlight">\(p_i\)</span>에 해당하는 i 번째 예측된 프레임)</p></li>
+</ul>
+</li>
+<li><p>입력 이미지와 포즈 시퀀스를 조건으로 하는 사전 훈련된 latent diffusion model을 사용한다.</p></li>
+<li><p>추론 시에는 일반적인 디퓨전 샘플링 절차를 통해 각 프레임을 독립적으로 생성한다.</p>
+<ul>
+<li><p>균일하게 분포된 가우시안 노이즈로 시작하여 두 조건 신호로 디퓨전 모델을 반복적으로 쿼리하여  noisy latent의 노이즈를 제거한다.</p></li>
+</ul>
+</li>
+<li><p>마지막으로 예측된 디노이즈된 latent <span class="math notranslate nohighlight">\(z’_i\)</span>를 디코딩하여 예측된 비디오 프레임 <span class="math notranslate nohighlight">\(x’_i=\mathcal{D}(z’_i)\)</span>를 만든다.</p></li>
+</ul>
+</section>
+<section id="architecture">
+<h2>4.2. Architecture<a class="headerlink" href="#architecture" title="Permalink to this heading">#</a></h2>
+<ul>
+<li><p>이미지 애니메이션을 위해 원래의 text-to-image 스테이블 디퓨전 모델을 수정하고 파인튜닝한다. (조건: 이미지, 포즈)</p></li>
+<li><p>이미지 애니메이션의 목표</p>
+<ol class="arabic simple">
+<li><p>제공된 입력 이미지에 대한 충실도</p></li>
+<li><p>시각적 품질</p></li>
+<li><p>생성된 프레임의 전반적인 시간적인 안정성</p></li>
+</ol>
+</li>
+<li><p>이러한 목표를 달성하기 위해 아키텍처를 아래와 같이 구성하였다.</p>
+<figure class="align-default" id="id3">
+<img alt="DreamPose Architecture" class="bg-primary mb-1" src="../../_images/021.png" />
+<figcaption>
+<p><span class="caption-number">Fig. 648 </span><span class="caption-text">DreamPose Architecture</span><a class="headerlink" href="#id3" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</li>
+</ul>
+<section id="split-clip-vae-encoder">
+<h3><strong>4.2.1 Split CLIP-VAE Encoder</strong><a class="headerlink" href="#split-clip-vae-encoder" title="Permalink to this heading">#</a></h3>
+<figure class="align-default" id="id4">
+<img alt="DreamPose Encoder" class="bg-primary mb-1" src="../../_images/031.png" />
+<figcaption>
+<p><span class="caption-number">Fig. 649 </span><span class="caption-text">DreamPose Encoder</span><a class="headerlink" href="#id4" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p>해당 모듈-컨디셔닝 어댑터(custom conditioning adapter)의 필요성</p>
+<ul>
+<li><p>이미지를 조건으로 넣기 위한 이전 연구 (ex: <a class="reference external" href="https://arxiv.org/abs/2211.09800">InstructPix2Pix</a>)는 주로 조건으로 들어오는 이미지 신호를 디노이징 U-Net에 대한 입력 노이즈와 concat한다.</p></li>
+<li><p>이는 원하는 출력 이미지와 공간적으로(spatially) 정렬된 조건 신호에 대한 조건화에 효과적이지만, DreamPose의 경우에는 네트워크가 입력 이미지와 공간적으로 정렬되지 않은 이미지를 생성하는 것을 목표로 한다.</p></li>
+<li><p>따라서 해당 태스크에는 다른 방법이 필요하고, 이를 위해 맞춤형 컨디셔닝 어댑터를 구현하였다.</p></li>
+<li><p>맞춤형 컨디셔닝 어댑터는 CLIP 텍스트 인코더를 사용하여 이미지 조건화를 대체하는 맞춤형 컨디셔닝 어댑터(custom conditioning adapter)를 구현하였다.</p></li>
+<li><p>이 어댑터는 사전 학습된 CLIP 이미지 및 VAE 인코더에서 인코딩된 정보를 결합한다.</p></li>
+</ul>
+</li>
+<li><p>디퓨전 기반 파인튜닝</p>
+<ul>
+<li><p>목표: 입력 신호를 원래 네트워크 학습에 사용된 신호와 최대한 유사하게 만들어 학습 기울기를 가능한 한 의미 있게 만드는 것 → 학습된 prior 값의 손실을 방지하는 데 도움이 된다.</p></li>
+<li><p>이러한 이유로 대부분의 디퓨전 기반 파인튜닝 체계는 모든 원래 컨디셔닝 신호를 유지하고 새로운 컨디셔닝 신호와 상호 작용하는 네트워크 가중치를 0으로 초기화한다.</p></li>
+</ul>
+</li>
+<li><p>VAE Encoder의 필요성</p>
+<ul>
+<li><p>스테이블 디퓨전이 텍스트 프롬프트의 CLIP 임베딩으로 컨디셔닝 되고 CLIP이 텍스트와 이미지를 공유 임베딩 스페이스(shared embedding space)로 인코딩한다는 점을 감안할 때 CLIP 컨디셔닝을 조건으로 주어진 이미지에서 파생된 임베딩으로 간단히 대체하는 것이 자연스러워 보일 수 있다.</p></li>
+<li><p>하지만 실제로는 CLIP 이미지 임베딩만으로는 조건으로 주어진 이미지에서 세밀한 디테일을 캡처하기에 충분하지 않다.</p></li>
+<li><p>따라서 스테이블 디퓨전의 VAE에서 인코딩된 latent 임베딩을 추가로 입력한다.</p></li>
+<li><p>이를 통해 디퓨전의 출력 도메인과 일치하는 추가적인 장점을 가지게 된다.</p></li>
+</ul>
+</li>
+<li><p>어댑터 <span class="math notranslate nohighlight">\(\mathcal{A}\)</span></p>
+<ul>
+<li><p>스테이블 디퓨전 아키텍처는 기본적으로 컨디셔닝 신호로 VAE latent를 지원하지 않기 때문에 어댑터 모듈 <span class="math notranslate nohighlight">\(\mathcal{A}\)</span>를 추가한다.</p></li>
+<li><p>해당 어댑터는 CLIP과 VAE 임베딩을 결합하여 네트워크의 일반적인 cross-attention 연산에 사용되는 하나의 임베딩을 생성한다.</p></li>
+<li><p>이 어댑터는 두 신호를 함께  혼합하고 디노이징 U-Net의 cross-attention 모듈에서 예상하는 일반적인 모양으로 출력을 변환한다.</p></li>
+</ul>
+</li>
+<li><p>디퓨전 기반 파인튜닝에서 언급했 듯이 학습에서 네트워크의 충격을 완화하기 위해 처음에는 VAE 임베딩에 해당하는 가중치는 0으로 설정되어 네트워크가 CLIP 임베딩으로만 학습을 시작한다.</p></li>
+<li><p>최종 이미지 컨디셔닝 신호 <span class="math notranslate nohighlight">\(c_I\)</span>를 다음과 같이 정의한다.</p></li>
+</ul>
+<div class="math notranslate nohighlight">
+\[
+\begin{align}c_{I}={\mathcal{A}}(c_{\mathrm{CLIP}},c_{\mathrm{VAE}})\end{align}
+\]</div>
+</section>
+<section id="modified-unet">
+<h3>4.2.2 Modified UNet<a class="headerlink" href="#modified-unet" title="Permalink to this heading">#</a></h3>
+<figure class="align-default" id="id5">
+<img alt="Modified UNet" class="bg-primary mb-1" src="../../_images/041.png" />
+<figcaption>
+<p><span class="caption-number">Fig. 650 </span><span class="caption-text">Modified UNet</span><a class="headerlink" href="#id5" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p>이미지 컨디셔닝과 달리 포즈 컨디셔닝은 이미지와 정렬 된다.</p></li>
+<li><p>Noisy latent <span class="math notranslate nohighlight">\(\tilde{z}_i\)</span>를 타겟 포즈 표현 <span class="math notranslate nohighlight">\(c_p\)</span>와 concat한다.</p></li>
+<li><p>실제 비디오에서 추정된 포즈의 노이즈를 고려하고 생성된 프레임에서의 시간적 일관성을 극대화하기 위해, <span class="math notranslate nohighlight">\(c_p\)</span>를 다섯 개의 연속된 포즈 프레임으로 구성하였다. 즉, <span class="math notranslate nohighlight">\(c_p = \{p_{i-2}, p_{i-1}, pi, p_{i+1}, p_{i+2}\}\)</span> → 개별 포즈로 네트워크를 학습하는 것보다 연속 포즈로 학습하면  전반적인 움직임의 부드러움과 시간적 일관성이 증가한다.</p></li>
+<li><p>구조적으로 0으로 초기화된 10개의 추가 입력 채널을 받아들이도록 UNet 입력 레이어를 수정하고 noisy latent에 해당하는 원래 채널은 사전 학습된 가중치에서 수정되지 않는다.</p></li>
+</ul>
+</section>
+<section id="finetuning">
+<h3>4.2.3 <strong>Finetuning</strong><a class="headerlink" href="#finetuning" title="Permalink to this heading">#</a></h3>
+<ul>
+<li><p>스테이블 디퓨전 모델의 대부분의 레이어 weight는 미리 학습된 text-to-image 스테이블 디퓨전 체크포인트로 초기화된다.</p></li>
+<li><p>이 때, CLIP 이미지 인코더는 별도의 미리 학습된 체크포인트에서 로드된다.</p></li>
+<li><p>새로운 레이어는 초기에 새로운 컨디셔닝 신호가 네트워크 출력에 기여하지 않도록 초기화 된다.</p></li>
+<li><p>초기화 후 DreamPose는 아래의 두 단계로 파인튜닝된다.</p>
+<figure class="align-default" id="id6">
+<img alt="Two-phase finetuning" class="bg-primary mb-1" src="../../_images/051.png" />
+<figcaption>
+<p><span class="caption-number">Fig. 651 </span><span class="caption-text">Two-phase Finetuning</span><a class="headerlink" href="#id6" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ol class="arabic simple">
+<li><p>Full Dataset Finetuning</p>
+<ul class="simple">
+<li><p>전체 훈련 데이터셋에 대한 UNet과 어댑터 모듈을 파인 튜닝하여 입력 이미지 및 포즈와 일치하는 프레임을 합성한다.</p></li>
+</ul>
+</li>
+<li><p>Subset-Specific Finetuning</p>
+<ul class="simple">
+<li><p>하나 이상의 피사체별 입력 이미지에 대해 UNet과 어댑터 모듈을 파인튜닝한 다음 VAE 디코더를 통해 기본 모델을 개선하여 추론에 사용되는 피사체별 맞춤형 모델을 생성한다.</p></li>
+</ul>
+</li>
+</ol>
+</li>
+<li><p>다른 이미지 조건부 디퓨전 방법과 마찬가지로, 입력 이미지의 사람과 의상의 identity를 보존하고 프레임 간에 일관성을 유지하려면 샘플별 파인튜닝이 필수적이었다.</p></li>
+<li><p>그러나 단순히 단일 프레임과 포즈 쌍에 대해 훈련하면 텍스처 고착(texture-sticking)과 같은 아티팩트가 출력 비디오에 발생한다.</p></li>
+<li><p>이를 방지하기 위해 각 단계에서 랜덤 크롭을 추가하는 등의 방법으로 이미지-포즈쌍을 증강한다.</p></li>
+<li><p>VAE 디코더를 파인튜닝하는 것이 더 선명하고 사실적인 디테일을 복구하는 데 중요하다.</p>
+<figure class="align-default" id="id7">
+<img alt="Importance of VAE finetuning" class="bg-primary mb-1" src="../../_images/061.png" />
+<figcaption>
+<p><span class="caption-number">Fig. 652 </span><span class="caption-text">VAE 파인튜닝의 중요성</span><a class="headerlink" href="#id7" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</li>
+</ul>
+</section>
+</section>
+<section id="pose-and-image-classifier-free-guidance">
+<h2>4.4. Pose and Image Classifier-Free Guidance<a class="headerlink" href="#pose-and-image-classifier-free-guidance" title="Permalink to this heading">#</a></h2>
+<ul>
+<li><p>추론시 단일 입력 이미지와 피사체별 모델(subject-specific model)을 사용하는 일련의 포즈에서 프레임별로 동영상을 생성한다.</p></li>
+<li><p>이중(dual) classifier-free guidance를 사용하여 추론 시에 이미지 컨디셔닝 <span class="math notranslate nohighlight">\(c_I\)</span>와 포즈 컨디셔닝 <span class="math notranslate nohighlight">\(c_p\)</span>의 강도를 조절한다.</p></li>
+<li><p>이중 classfier-free guidance는 식 (3)에서 다음과 같이 수정된다.</p>
+<div class="math notranslate nohighlight">
+\[\begin{split}
+    \begin{align*}
+    {\epsilon_{\theta}(z_{t},c_{I},c_{p})} &amp; {= \epsilon_{\theta}(z_{t},\emptyset,\emptyset)} \\
+    {} &amp; {+\, s_{I}(\epsilon_{\theta}(z_{t},c_{I},\mathcal{\emptyset})-\epsilon_{\theta}(z_{t},\emptyset,\emptyset))} \\
+    {} &amp; {+\, s_{p}(\epsilon_{\theta}(z_{t},\mathcal{c}_{I},\mathcal{c}_p)-\epsilon_{\theta}(z_{t},\mathcal{c}_{I},\emptyset))}
+    \end{align*}
+    \end{split}\]</div>
+<ul class="simple">
+<li><p><span class="math notranslate nohighlight">\(s_I\)</span>, <span class="math notranslate nohighlight">\(s_p\)</span>: 가이던스 웨이트</p></li>
+<li><p><span class="math notranslate nohighlight">\(c_I\)</span>: 이미지 컨디셔닝</p></li>
+<li><p><span class="math notranslate nohighlight">\(c_p\)</span>: 포즈 컨디셔닝</p></li>
+</ul>
+<p>→ 이미지 컨디셔닝이 있는 경우와 없는 경우의 노이즈 벡터 차이를 계산하고, 포즈 컨디셔닝이 있는 경우와 없는 경우의 노이즈 벡터 차이를 계산해서 이를 가이던스 웨이트를 통해 강도를 조정해서 반영</p>
+</li>
+<li><p><span class="math notranslate nohighlight">\(s_I\)</span>가 크면 입력 이미지에 높은 외관 충실도를 보장하고, <span class="math notranslate nohighlight">\(s_p\)</span>가 크면 입력 포즈에 대한 정렬을 보장한다.</p></li>
+<li><p>이중 classifier-free guidance는 포즈 및 이미지 가이드를 강화하는 것 에외도, 피사체별 모델 파인튜닝 후 하나의 입력 포즈에 대한 오버피팅을 방지한다.</p></li>
+</ul>
+</section>
+</section>
+<section class="tex2jax_ignore mathjax_ignore" id="experiments">
+<h1>5. Experiments<a class="headerlink" href="#experiments" title="Permalink to this heading">#</a></h1>
+<section id="implementation-details">
+<h2>5.1.  Implementation Details<a class="headerlink" href="#implementation-details" title="Permalink to this heading">#</a></h2>
+<ul class="simple">
+<li><p>입력 이미지 resolution: 512x512</p></li>
+<li><p>GPU: NVIDIA A100 2개</p></li>
+<li><p>첫 번째 훈련 단계</p>
+<ul>
+<li><p>전체 훈련 데이터셋 사용</p></li>
+<li><p>5 epoch</p></li>
+<li><p>5e-6 learning rate</p></li>
+<li><p>배치사이즈: 16 (4 gradient accumulation step)</p></li>
+<li><p>Dropout: 포즈 입력 5%, 이미지 입력 5%</p></li>
+</ul>
+</li>
+<li><p>두 번째 훈련 단계</p>
+<ul>
+<li><p>특정 샘플 프레임 사용</p></li>
+<li><p>500 step</p></li>
+<li><p>1e-5 learning rate</p></li>
+<li><p>Dropout 적용 X</p></li>
+</ul>
+</li>
+<li><p>VAE 디코더 파인튜닝</p>
+<ul>
+<li><p>1500 step</p></li>
+<li><p>5e-5 learning rate</p></li>
+</ul>
+</li>
+<li><p>추론 시에는 PNDM 샘플러 사용 (100step)</p></li>
+</ul>
+</section>
+<section id="dataset">
+<h2>5.2. Dataset<a class="headerlink" href="#dataset" title="Permalink to this heading">#</a></h2>
+<ul class="simple">
+<li><p>UBC Fashion 데이터셋 사용</p></li>
+<li><p>Split</p>
+<ul>
+<li><p>Train: 339개의 영상</p></li>
+<li><p>Test: 100개의 영상</p></li>
+</ul>
+</li>
+<li><p>각 비디오의 프레임 속도는 초당 30프레임이며 길이는 약 12초</p></li>
+<li><p>학습 중에는 학습 비디오로부터 랜덤으로 프레임 쌍을 샘플링 하였다.</p></li>
+<li><p>DensePose를 이용해서 포즈를 계산하였다.</p></li>
+</ul>
+</section>
+</section>
+<section class="tex2jax_ignore mathjax_ignore" id="results">
+<h1>6. Results<a class="headerlink" href="#results" title="Permalink to this heading">#</a></h1>
+<section id="comparisons">
+<h2>6.1. Comparisons<a class="headerlink" href="#comparisons" title="Permalink to this heading">#</a></h2>
+<ul class="simple">
+<li><p>공개적으로 사용 가능한 두 가지 최신 비디오 합성 방법인 MRAA(Motion Representations for Articulated Animation)과 Thin-Plate Spline Mothion Model(TPSMM)과 수치적 및 정성적인 비교를 하였다.</p></li>
+<li><p>제공된 훈련 스크립트와 권장 에폭 수를 사용하여 두 가지 모델을 UBC 패션 데이터셋을 이용해서 스크래치부터 학습하였다.</p></li>
+<li><p>평가를 위해서는 AVD 모드에서 제공된 테스트 스크립트를 사용하였다.</p></li>
+<li><p>PIDM과도 정성적인 비교를 하였다. PIDM의 경우 훈련 스크립트를 사용할 수 없어서 DeepFashion 데이터셋에 대해 학습된 체크포인트를 통해 비교하였다.</p></li>
+<li><p>100개의 디노이징 스텝을 사용하여 PIDM과 DreamPose를 실행하였다.</p></li>
+</ul>
+<section id="quantitative-analysis">
+<h3>6.1.1 Quantitative Analysis<a class="headerlink" href="#quantitative-analysis" title="Permalink to this heading">#</a></h3>
+<figure class="align-default" id="id8">
+<img alt="result 1" class="bg-primary mb-1" src="../../_images/071.png" />
+<figcaption>
+<p><span class="caption-number">Fig. 653 </span><span class="caption-text">정량적 성능 비교</span><a class="headerlink" href="#id8" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p>256 픽셀 해상도의 100개의 고유한 패션 동영상으로 구성된 UBC 패션 테스트 셋에 대해 모든 모델을 테스트 하였다.</p></li>
+<li><p>각 동영상에 대해 입력 프레임에서 최소 50프레임 이상 떨어져 있는 50개의 프레임을 추출하여 테스트하였다.</p></li>
+<li><p>MRAA와 TPSMM은 모두 driving video에서 추출된 feautre에 의존하는 반면, DreamPose는 UV-포즈 시퀀스에만 의존한다는 점에 유의하라.</p></li>
+<li><p>그럼에도 불구하고 DreamPose 모델은 네 가지 정량적 지표 모두에서 두 가지 방법보다 정량적으로 우수한 성능을 보였다.</p></li>
+</ul>
+</section>
+<section id="qualitative-analysis">
+<h3>6.2.2 Qualitative Analysis<a class="headerlink" href="#qualitative-analysis" title="Permalink to this heading">#</a></h3>
+<figure class="align-default" id="id9">
+<img alt="result 2" class="bg-primary mb-1" src="../../_images/081.png" />
+<figcaption>
+<p><span class="caption-number">Fig. 654 </span><span class="caption-text">정성적 성능 비교</span><a class="headerlink" href="#id9" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul>
+<li><p>MRAA와 TPSMM은 새로운 포즈를 취할 때 인물의  identity, 옷감 주름, 미세한 패턴이 손실되는 반면 DreamPose는 디테일을 정확하게 유지한다.</p></li>
+<li><p>포즈를 크게 변경하는 동안 MRAA는 팔 다리가 분리 될 수 있다.</p></li>
+<li><p>PIDM과의 비교</p>
+<figure class="align-default" id="id10">
+<img alt="result 3" class="bg-primary mb-1" src="../../_images/091.png" />
+<figcaption>
+<p><span class="caption-number">Fig. 655 </span><span class="caption-text">PIDM과의 비교</span><a class="headerlink" href="#id10" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p>DreamPose는 얼굴의 identity와 의상 패턴 모두 더 충실도 높은 결과를 생성한다.</p></li>
+<li><p>PIDM은 사실적인 얼굴을 합성하지만, 원본 인물의 identity와 일치하지 않고, identity와 옷차림이 프레임마다 달랐다. → PIDM이 비디오 합성에서는 잘 동작하지 않는다.</p></li>
+</ul>
+</li>
+</ul>
+</section>
+</section>
+<section id="ablation-studies">
+<h2>6.2. Ablation Studies<a class="headerlink" href="#ablation-studies" title="Permalink to this heading">#</a></h2>
+<ul class="simple">
+<li><p>아래 네 가지 변형에 대해 성능을 비교한다.</p>
+<ol class="arabic simple">
+<li><p><span class="math notranslate nohighlight">\(\text{Ours}_{\text{CLIP}}\)</span>: 듀얼 CLIP-VAE 인코더 대신에 사전 학습된 CLIP 이미지 인코더를 사용 → CLIP-VAE 인코더 효과 테스트</p></li>
+<li><p><span class="math notranslate nohighlight">\(\text{Ours}_{\text{NO-VAE-FT}}\)</span>: VAE 디코더를 파인튜닝하지 않은 버전 → 디코더 파인튜닝 효과 테스트</p></li>
+<li><p><span class="math notranslate nohighlight">\(\text{Ours}_{\text{1-pose}}\)</span>: 5개의 연결된 연속 포즈 대신 하나의 대상 포즈만 노이즈에 연결한 버전 → 연결된 5개의 프레임 효과 테스트</p></li>
+<li><p><span class="math notranslate nohighlight">\(\text{Ours}_{\text{Full}}\)</span>: 논문에서 제안한 모든 방법이 다 적용된 DreamPose</p></li>
+</ol>
+</li>
+</ul>
+<p><strong>Quantitative Comparison</strong></p>
+<figure class="align-default" id="id11">
+<img alt="result 4" class="bg-primary mb-1" src="../../_images/10.png" />
+<figcaption>
+<p><span class="caption-number">Fig. 656 </span><span class="caption-text">Ablation Studies - 정량적 비교</span><a class="headerlink" href="#id11" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p><strong>Qualitative Comparison</strong></p>
+<figure class="align-default" id="id12">
+<img alt="result 5" class="bg-primary mb-1" src="../../_images/11.png" />
+<figcaption>
+<p><span class="caption-number">Fig. 657 </span><span class="caption-text">Ablation Studies - 정성적 비교</span><a class="headerlink" href="#id12" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p>기존의 스테이블 디퓨전에서는 인물의 identity에 대한 디테일을 보존할 수 없었다.</p></li>
+<li><p>텍스트 인코더를 CLIP 인코더로 교체한 결과 대부분의 이미지 디테일은 캡처할 수 있지만, 여전히 외형에 대한 정보 손실이 발생한다.</p></li>
+<li><p>VAE 디코더를 파인튜닝하면 디테일의 선명도가 크게 향상되고 입력 포즈에 대한 오버피팅이 발생하지 않는다.</p></li>
+<li><p>한 가지 포즈만 입력하면 팔과 머리카락 주변에서의 피사체의 형태가 눈에 띄게 깜박이는 현상이 나타났다.</p></li>
+</ul>
+</section>
+<section id="multiple-input-images">
+<h2>6.3. Multiple Input Images<a class="headerlink" href="#multiple-input-images" title="Permalink to this heading">#</a></h2>
+<ul>
+<li><p>DreamPose는 피사체에 대한 입력 이미지를 여러 장 넣어서 파인튜닝할 수 있다.</p></li>
+<li><p>피사체의 입력 이미지를 추가하면 품질과 시점의 일관성이 향상된다.</p>
+<figure class="align-default" id="id13">
+<img alt="result 6" class="bg-primary mb-1" src="../../_images/12.png" />
+<figcaption>
+<p><span class="caption-number">Fig. 658 </span><span class="caption-text">Multiple Input Images 결과</span><a class="headerlink" href="#id13" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</li>
+</ul>
+</section>
+</section>
+<section class="tex2jax_ignore mathjax_ignore" id="limitations-future-work">
+<h1><strong>7. Limitations &amp; Future Work</strong><a class="headerlink" href="#limitations-future-work" title="Permalink to this heading">#</a></h1>
+<ul>
+<li><p>실패 사례</p>
+<figure class="align-default" id="id14">
+<img alt="result 7" class="bg-primary mb-1" src="../../_images/13.png" />
+<figcaption>
+<p><span class="caption-number">Fig. 659 </span><span class="caption-text">실패 사례 예시</span><a class="headerlink" href="#id14" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p>드문 경우지만 팔다리가 옷 속으로 사라지고(왼쪽),  hallucinate feature가 보이고(중간)와  대상 포즈가 뒤를 향할 때 방향이 잘못 정렬 되는 경우(오른쪽)가 관찰된다.</p></li>
+</ul>
+</li>
+<li><p>또한 단순한 패턴의 옷에서 사실적인 결과를 생성하지만 일부 결과는 크고 복잡한 패턴에서 약간의 깜박임 동작을 보인다.</p></li>
+<li><p>다른 디퓨전 모델과 마찬가지로 파인튜닝 및 추론 시간이 GAN 또는 VAE에 비해 느리다.</p>
+<ul class="simple">
+<li><p>특정 피사체에 대한 모델 파인튜닝은 프레임당 18초의 렌더링 시간 외의 UNet의 경우 약 10분, VAE 디코더의 경우 약 20분이 소요된다.</p></li>
+</ul>
+</li>
+</ul>
+</section>
+<section class="tex2jax_ignore mathjax_ignore" id="conclusion">
+<h1>8. Conclusion<a class="headerlink" href="#conclusion" title="Permalink to this heading">#</a></h1>
+<ul class="simple">
+<li><p>스틸 패션 이미지 애니메이션을 위한 새로운 디퓨전 기반 방법인 DreamPose를 제안하였다.</p></li>
+<li><p>한 장의 이미지와 포즈 시퀀스가 주어졌을 때,  섬유, 패턴, 사람의 identity를 애니메이션 하는 사실적인 사실적인 패션 동영상을 생성하는 방법을 증명하였다.</p></li>
+</ul>
+</section>
+
+    <script type="text/x-thebe-config">
+    {
+        requestKernel: true,
+        binderOptions: {
+            repo: "binder-examples/jupyter-stacks-datascience",
+            ref: "master",
+        },
+        codeMirrorConfig: {
+            theme: "abcdef",
+            mode: "python"
+        },
+        kernelOptions: {
+            name: "python3",
+            path: "./docs\review"
+        },
+        predefinedOutput: true
+    }
+    </script>
+    <script>kernelName = 'python3'</script>
+
+                </article>
+              
+
+              
+              
+              
+              
+                <footer class="prev-next-footer d-print-none">
+                  
+<div class="prev-next-area">
+    <a class="left-prev"
+       href="DreaMoving.html"
+       title="previous page">
+      <i class="fa-solid fa-angle-left"></i>
+      <div class="prev-next-info">
+        <p class="prev-next-subtitle">previous</p>
+        <p class="prev-next-title">DreaMoving</p>
+      </div>
+    </a>
+    <a class="right-next"
+       href="NeRF.html"
+       title="next page">
+      <div class="prev-next-info">
+        <p class="prev-next-subtitle">next</p>
+        <p class="prev-next-title">NeRF : Representing Scenes as Neural Radiance Fields for View Synthesis</p>
+      </div>
+      <i class="fa-solid fa-angle-right"></i>
+    </a>
+</div>
+                </footer>
+              
+            </div>
+            
+            
+              
+                <div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
+
+
+  <div class="sidebar-secondary-item">
+  <div class="page-toc tocsection onthispage">
+    <i class="fa-solid fa-list"></i> Contents
+  </div>
+  <nav class="bd-toc-nav page-toc">
+    <ul class="visible nav section-nav flex-column">
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#">DreamPose: Fashion Image-to-Video Synthesis via Stable Diffusion</a></li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction"><strong>1. Introduction</strong></a></li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#related-work">2. Related Work</a><ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#diffusion-models">2.1. Diffusion models</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#still-image-animation">2.2. Still Image Animation</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#fashion-image-synthesis">2.3. Fashion Image Synthesis</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#diffusion-models-for-video-synthesis">2.4. Diffusion Models for Video Synthesis</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#conditioning-mechanisms-for-diffusion-models">2.5. Conditioning Mechanisms for Diffusion Models</a></li>
+</ul>
+</li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#background">3. Background</a></li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#method"><strong>4. Method</strong></a><ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#overview"><strong>4.1. Overview</strong></a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#architecture">4.2. Architecture</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#split-clip-vae-encoder"><strong>4.2.1 Split CLIP-VAE Encoder</strong></a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#modified-unet">4.2.2 Modified UNet</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#finetuning">4.2.3 <strong>Finetuning</strong></a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#pose-and-image-classifier-free-guidance">4.4. Pose and Image Classifier-Free Guidance</a></li>
+</ul>
+</li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#experiments">5. Experiments</a><ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#implementation-details">5.1.  Implementation Details</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#dataset">5.2. Dataset</a></li>
+</ul>
+</li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#results">6. Results</a><ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#comparisons">6.1. Comparisons</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#quantitative-analysis">6.1.1 Quantitative Analysis</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#qualitative-analysis">6.2.2 Qualitative Analysis</a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#ablation-studies">6.2. Ablation Studies</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#multiple-input-images">6.3. Multiple Input Images</a></li>
+</ul>
+</li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#limitations-future-work"><strong>7. Limitations &amp; Future Work</strong></a></li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#conclusion">8. Conclusion</a></li>
+</ul>
+
+  </nav></div>
+
+</div></div>
+              
+            
+          </div>
+          <footer class="bd-footer-content">
+            
+<div class="bd-footer-content__inner container">
+  
+  <div class="footer-item">
+    
+<p class="component-author">
+By PseudoLab
+</p>
+
+  </div>
+  
+  <div class="footer-item">
+    
+
+  <p class="copyright">
+    
+      © Copyright 2022.
+      <br/>
+    
+  </p>
+
+  </div>
+  
+  <div class="footer-item">
+    
+  </div>
+  
+  <div class="footer-item">
+    
+  </div>
+  
+</div>
+          </footer>
+        
+
+      </main>
+    </div>
+  </div>
+  
+  <!-- Scripts loaded after <body> so the DOM is not blocked -->
+  <script src="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b"></script>
+<script src="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b"></script>
+
+  <footer class="bd-footer">
+  </footer>
+  </body>
 </html>
\ No newline at end of file
diff --git a/docs/review/GIGAGAN.html b/docs/review/GIGAGAN.html
old mode 100644
new mode 100755
index dabc6780..def8ef05
--- a/docs/review/GIGAGAN.html
+++ b/docs/review/GIGAGAN.html
@@ -1,1054 +1,1074 @@
-
-
-<!DOCTYPE html>
-
-
-<html lang="en" data-content_root="" >
-
-  <head>
-    <meta charset="utf-8" />
-    <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.18.1: http://docutils.sourceforge.net/" />
-
-    <title>Scaling up GANs for Text-to-Image Synthesis &#8212; Text-to-Image Generation-feat-Diffusion</title>
-  
-  
-  
-  <script data-cfasync="false">
-    document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
-    document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
-  </script>
-  
-  <!-- Loaded before other Sphinx assets -->
-  <link href="../../_static/styles/theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-<link href="../../_static/styles/bootstrap.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-<link href="../../_static/styles/pydata-sphinx-theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-
-  
-  <link href="../../_static/vendor/fontawesome/6.5.2/css/all.min.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-  <link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.woff2" />
-<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.woff2" />
-<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.woff2" />
-
-    <link rel="stylesheet" type="text/css" href="../../_static/pygments.css" />
-    <link rel="stylesheet" href="../../_static/styles/sphinx-book-theme.css?digest=14f4ca6b54d191a8c7657f6c759bf11a5fb86285" type="text/css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/togglebutton.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/mystnb.4510f1fc1dee50b3e5859aac5469c37c29e427902b24a333a5f9fcb2f0b3ac41.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/sphinx-thebe.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/design-style.4045f2051d55cab465a707391d5b2007.min.css" />
-  
-  <!-- Pre-loaded scripts that we'll load fully later -->
-  <link rel="preload" as="script" href="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b" />
-<link rel="preload" as="script" href="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b" />
-  <script src="../../_static/vendor/fontawesome/6.5.2/js/all.min.js?digest=dfe6caa3a7d634c4db9b"></script>
-
-    <script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
-    <script src="../../_static/jquery.js"></script>
-    <script src="../../_static/underscore.js"></script>
-    <script src="../../_static/_sphinx_javascript_frameworks_compat.js"></script>
-    <script src="../../_static/doctools.js"></script>
-    <script src="../../_static/clipboard.min.js"></script>
-    <script src="../../_static/copybutton.js"></script>
-    <script src="../../_static/scripts/sphinx-book-theme.js?digest=5a5c038af52cf7bc1a1ec88eea08e6366ee68824"></script>
-    <script>let toggleHintShow = 'Click to show';</script>
-    <script>let toggleHintHide = 'Click to hide';</script>
-    <script>let toggleOpenOnPrint = 'true';</script>
-    <script src="../../_static/togglebutton.js"></script>
-    <script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
-    <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
-    <script src="../../_static/design-tabs.js"></script>
-    <script>const THEBE_JS_URL = "https://unpkg.com/thebe@0.8.2/lib/index.js"
-const thebe_selector = ".thebe,.cell"
-const thebe_selector_input = "pre"
-const thebe_selector_output = ".output, .cell_output"
-</script>
-    <script async="async" src="../../_static/sphinx-thebe.js"></script>
-    <script>window.MathJax = {"options": {"processHtmlClass": "tex2jax_process|mathjax_process|math|output_area"}}</script>
-    <script defer="defer" src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
-    <script>DOCUMENTATION_OPTIONS.pagename = 'docs/review/GIGAGAN';</script>
-    <link rel="shortcut icon" href="../../_static/PseudoLab_logo.png"/>
-    <link rel="index" title="Index" href="../../genindex.html" />
-    <link rel="search" title="Search" href="../../search.html" />
-    <link rel="next" title="Consistency Models" href="consistency_models.html" />
-    <link rel="prev" title="Muse" href="Muse.html" />
-  <meta name="viewport" content="width=device-width, initial-scale=1"/>
-  <meta name="docsearch:language" content="en"/>
-  </head>
-  
-  
-  <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
-
-  
-  
-  <div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
-  
-  <div id="pst-scroll-pixel-helper"></div>
-  
-  <button type="button" class="btn rounded-pill" id="pst-back-to-top">
-    <i class="fa-solid fa-arrow-up"></i>Back to top</button>
-
-  
-  <input type="checkbox"
-          class="sidebar-toggle"
-          id="pst-primary-sidebar-checkbox"/>
-  <label class="overlay overlay-primary" for="pst-primary-sidebar-checkbox"></label>
-  
-  <input type="checkbox"
-          class="sidebar-toggle"
-          id="pst-secondary-sidebar-checkbox"/>
-  <label class="overlay overlay-secondary" for="pst-secondary-sidebar-checkbox"></label>
-  
-  <div class="search-button__wrapper">
-    <div class="search-button__overlay"></div>
-    <div class="search-button__search-container">
-<form class="bd-search d-flex align-items-center"
-      action="../../search.html"
-      method="get">
-  <i class="fa-solid fa-magnifying-glass"></i>
-  <input type="search"
-         class="form-control"
-         name="q"
-         id="search-input"
-         placeholder="Search this book..."
-         aria-label="Search this book..."
-         autocomplete="off"
-         autocorrect="off"
-         autocapitalize="off"
-         spellcheck="false"/>
-  <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
-</form></div>
-  </div>
-
-  <div class="pst-async-banner-revealer d-none">
-  <aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
-</div>
-
-  
-    <header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
-    </header>
-  
-
-  <div class="bd-container">
-    <div class="bd-container__inner bd-page-width">
-      
-      
-      
-      <div class="bd-sidebar-primary bd-sidebar">
-        
-
-  
-  <div class="sidebar-header-items sidebar-primary__section">
-    
-    
-    
-    
-  </div>
-  
-    <div class="sidebar-primary-items__start sidebar-primary__section">
-        <div class="sidebar-primary-item">
-
-  
-    
-  
-
-<a class="navbar-brand logo" href="../../intro.html">
-  
-  
-  
-  
-  
-    
-    
-      
-    
-    
-    <img src="../../_static/PseudoLab_logo.png" class="logo__image only-light" alt="Text-to-Image Generation-feat-Diffusion - Home"/>
-    <script>document.write(`<img src="../../_static/PseudoLab_logo.png" class="logo__image only-dark" alt="Text-to-Image Generation-feat-Diffusion - Home"/>`);</script>
-  
-  
-</a></div>
-        <div class="sidebar-primary-item"><nav class="bd-links" id="bd-docs-nav" aria-label="Main">
-    <div class="bd-toc-item navbar-nav active">
-        
-        <ul class="nav bd-sidenav bd-sidenav__home-link">
-            <li class="toctree-l1">
-                <a class="reference internal" href="../../intro.html">
-                    Welcome to PseudoDiffusers!!
-                </a>
-            </li>
-        </ul>
-        <p aria-level="2" class="caption" role="heading"><span class="caption-text">Preliminary Works</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="vae.html">VAE</a></li>
-<li class="toctree-l1"><a class="reference internal" href="gan.html">GAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DDPM.html">DDPM</a></li>
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="DDIM.html">DDIM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="A_Study_on_the_Evaluation_of_Generative_Models.html">A Study on the Evaluation of Generative Models</a></li>
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Image Generation</span></p>
-<ul class="current nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="cycleGAN.html">CycleGAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="StyleGAN.html">StyleGAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="diffusion_beats_GANs.html">Diffusion Models Beat GANs on Image Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="dalle.html">DALL-E</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DALLE2.html">DALL-E 2</a></li>
-<li class="toctree-l1"><a class="reference internal" href="dreambooth.html">DreamBooth</a></li>
-<li class="toctree-l1"><a class="reference internal" href="ControlNet.html">ControlNet</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Latent_Diffusion_Model.html">Introduction</a></li>
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="Textual_Inversion.html">Textual Inversion</a></li>
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="CustomDiffusion.html">Custom Diffusion</a></li>
-<li class="toctree-l1"><a class="reference internal" href="LoRA.html">LoRA</a></li>
-
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="I-DDPM.html">I-DDPM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="StyO.html">StyO</a></li>
-<li class="toctree-l1"><a class="reference internal" href="imagen.html">Imagen</a></li>
-<li class="toctree-l1"><a class="reference internal" href="imagen_editor.html">Imagen Editor</a></li>
-<li class="toctree-l1"><a class="reference internal" href="SDEdit.html">SDEdit</a></li>
-<li class="toctree-l1"><a class="reference internal" href="SDXL.html">SDXL</a></li>
-<li class="toctree-l1"><a class="reference internal" href="t2i_adapter.html">T2I-Adapter</a></li>
-<li class="toctree-l1"><a class="reference internal" href="IP_Adapter.html">IP-Adapter</a></li>
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="HyperDreamBooth.html">HyperDreamBooth</a></li>
-<li class="toctree-l1"><a class="reference internal" href="CM3leon.html">CM3leon</a></li>
-
-<li class="toctree-l1"><a class="reference internal" href="Synthetic_Data_from_Diffusion_Models_Improves_ImageNet_Classification.html">Synthetic Data from Diffusion Models Improves ImageNet Classification</a></li>
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="GLIDE.html">GLIDE</a></li>
-<li class="toctree-l1"><a class="reference internal" href="BBDM.html">BBDM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Your_Diffusion_Model_is_Secretly_a_Zero_Shot_Classifier.html">Your Diffusion Model is Secretly a Zero-Shot Classifier</a></li>
-<li class="toctree-l1"><a class="reference internal" href="progressive_distillation.html">Progressive Distillation for Fast Sampling of Diffusion Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="ConceptLab.html">ConceptLab</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Diffusion_models_already_have_a_Semantic_Latent_Space.html">Diffusion Models already have a Semantic Latent Space</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Muse.html">Muse</a></li>
-
-
-<li class="toctree-l1 current active"><a class="current reference internal" href="#">Scaling up GANs for Text-to-Image Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="consistency_models.html">Consistency Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="latent_consistency_models.html">Latent Consistency Models</a></li>
-
-<li class="toctree-l1"><a class="reference internal" href="LLM_grounded_Diffusion.html">LLM Grounded Diffusion</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DiT.html">DiT</a></li>
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="one-step-image-translation.html">One-Step Image Translation with Text-to-Image Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="LCM-LoRA.html">LCM-LoRA: A Universal Stable-Diffusion Acceleration Module</a></li>
-
-
-<li class="toctree-l1"><a class="reference internal" href="MimicBrush.html">MimicBrush: Zero-shot Image Editing with Reference Imitation</a></li>
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Video Generation</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="Make_A_Video.html">Make A Video</a></li>
-<li class="toctree-l1"><a class="reference internal" href="VideoLDM.html">VideoLDM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="AnimateDiff.html">AnimateDiff</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Animate_Anyone.html">Animate Anyone</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreaMoving.html">DreaMoving</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreamPose.html">DreamPose: Fashion Image-to-Video Synthesis via Stable Diffusion</a></li>
-
-
-
-
-
-
-
-
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">3D Generation</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="NeRF.html">NeRF : Representing Scenes as Neural Radiance Fields for View Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="3DGS.html">3D Gaussian Splatting for Real-Time Radiance Field Rendering</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Point_E.html">Point-E: A System for Generating 3D Point Clouds from Complex Prompts (Arxiv 2022)</a></li>
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="Shap-E.html">Shap-E</a></li>
-
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="DreamFusion.html"><strong>DreamFusion</strong></a></li>
-<li class="toctree-l1"><a class="reference internal" href="magic-3d.html">Magic3D</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreamBooth3D.html">Dream Booth 3D</a></li>
-
-
-
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Experiments</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="../experiments/js_exp.html">Synthetic Data with Stable Diffusion for Foliar Disease Classification</a></li>
-<li class="toctree-l1"><a class="reference internal" href="../experiments/swjo_exp.html">Training DreamBooth on Naver Webtoon Face Dataset</a></li>
-</ul>
-
-    </div>
-</nav></div>
-    </div>
-  
-  
-  <div class="sidebar-primary-items__end sidebar-primary__section">
-  </div>
-  
-  <div id="rtd-footer-container"></div>
-
-
-      </div>
-      
-      <main id="main-content" class="bd-main" role="main">
-        
-        
-
-<div class="sbt-scroll-pixel-helper"></div>
-
-          <div class="bd-content">
-            <div class="bd-article-container">
-              
-              <div class="bd-header-article d-print-none">
-<div class="header-article-items header-article__inner">
-  
-    <div class="header-article-items__start">
-      
-        <div class="header-article-item"><label class="sidebar-toggle primary-toggle btn btn-sm" for="__primary" title="Toggle primary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
-  <span class="fa-solid fa-bars"></span>
-</label></div>
-      
-    </div>
-  
-  
-    <div class="header-article-items__end">
-      
-        <div class="header-article-item">
-
-<div class="article-header-buttons">
-
-
-
-
-
-<div class="dropdown dropdown-source-buttons">
-  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Source repositories">
-    <i class="fab fa-github"></i>
-  </button>
-  <ul class="dropdown-menu">
-      
-      
-      
-      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion" target="_blank"
-   class="btn btn-sm btn-source-repository-button dropdown-item"
-   title="Source repository"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fab fa-github"></i>
-  </span>
-<span class="btn__text-container">Repository</span>
-</a>
-</li>
-      
-      
-      
-      
-      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion/issues/new?title=Issue%20on%20page%20%2Fdocs/review/GIGAGAN.html&body=Your%20issue%20content%20here." target="_blank"
-   class="btn btn-sm btn-source-issues-button dropdown-item"
-   title="Open an issue"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-lightbulb"></i>
-  </span>
-<span class="btn__text-container">Open issue</span>
-</a>
-</li>
-      
-  </ul>
-</div>
-
-
-
-
-
-
-<div class="dropdown dropdown-download-buttons">
-  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Download this page">
-    <i class="fas fa-download"></i>
-  </button>
-  <ul class="dropdown-menu">
-      
-      
-      
-      <li><a href="../../_sources/docs/review/GIGAGAN.md" target="_blank"
-   class="btn btn-sm btn-download-source-button dropdown-item"
-   title="Download source file"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-file"></i>
-  </span>
-<span class="btn__text-container">.md</span>
-</a>
-</li>
-      
-      
-      
-      
-      <li>
-<button onclick="window.print()"
-  class="btn btn-sm btn-download-pdf-button dropdown-item"
-  title="Print to PDF"
-  data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-file-pdf"></i>
-  </span>
-<span class="btn__text-container">.pdf</span>
-</button>
-</li>
-      
-  </ul>
-</div>
-
-
-
-
-<button onclick="toggleFullScreen()"
-  class="btn btn-sm btn-fullscreen-button"
-  title="Fullscreen mode"
-  data-bs-placement="bottom" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-expand"></i>
-  </span>
-
-</button>
-
-
-
-<script>
-document.write(`
-  <button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light"></i>
-    <i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark"></i>
-    <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"></i>
-  </button>
-`);
-</script>
-
-
-<script>
-document.write(`
-  <button class="btn btn-sm pst-navbar-icon search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <i class="fa-solid fa-magnifying-glass fa-lg"></i>
-  </button>
-`);
-</script>
-<label class="sidebar-toggle secondary-toggle btn btn-sm" for="__secondary"title="Toggle secondary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <span class="fa-solid fa-list"></span>
-</label>
-</div></div>
-      
-    </div>
-  
-</div>
-</div>
-              
-              
-
-<div id="jb-print-docs-body" class="onlyprint">
-    <h1>Scaling up GANs for Text-to-Image Synthesis</h1>
-    <!-- Table of contents -->
-    <div id="print-main-content">
-        <div id="jb-print-toc">
-            
-            <div>
-                <h2> Contents </h2>
-            </div>
-            <nav aria-label="Page">
-                <ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#abstract">Abstract</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">Introduction</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#methods">Methods</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#experiments">Experiments</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#id1">주요 질의응답</a></li>
-</ul>
-            </nav>
-        </div>
-    </div>
-</div>
-
-              
-                
-<div id="searchbox"></div>
-                <article class="bd-article">
-                  
-  <div class="admonition-information admonition">
-<p class="admonition-title">Information</p>
-<ul class="simple">
-<li><p><strong>Title:</strong> Scaling up GANs for Text-to-Image Synthesis (CVPR 2023)</p></li>
-<li><p><strong>Reference</strong></p>
-<ul>
-<li><p>Paper:  <a class="reference external" href="https://arxiv.org/abs/2303.05511">https://arxiv.org/abs/2303.05511</a></p></li>
-<li><p>Code: <a class="reference external" href="https://github.com/lucidrains/gigagan-pytorch">NON Official:</a></p></li>
-</ul>
-</li>
-<li><p><strong>Author:</strong> Seunghwan Ji</p></li>
-<li><p><strong>Last updated on April. 14, 2024</strong></p></li>
-</ul>
-</div>
-<section class="tex2jax_ignore mathjax_ignore" id="scaling-up-gans-for-text-to-image-synthesis">
-<h1>Scaling up GANs for Text-to-Image Synthesis<a class="headerlink" href="#scaling-up-gans-for-text-to-image-synthesis" title="Permalink to this heading">#</a></h1>
-<section id="abstract">
-<h2>Abstract<a class="headerlink" href="#abstract" title="Permalink to this heading">#</a></h2>
-<ul>
-<li><p>최근 이미지 생성 task의 모델 성능이 꾸준히 발전중</p></li>
-<li><p>기존에는 stylegan과 같은 GAN 모델이 주를 이뤘지만 최근 DALLE, Stable Diffusion등 <strong>Diffusion 기반</strong>의 모델 베이스로 추세가 급격하게 바뀌어버림</p></li>
-<li><p><em>“GAN 모델로 Diffusion의 성능을 잡는것은 정말 무리일까?”</em></p></li>
-<li><p>GigaGAN은</p>
-<ul class="simple">
-<li><p><strong>gan 기반의 모델</strong>로 1. <strong>속도점 이점</strong>과 <strong>2. 다양한 extra editing</strong>이 가능하다. (contribution)</p></li>
-</ul>
-<ol class="arabic simple">
-<li><p>속도적 이점</p>
-<ol class="arabic simple">
-<li><p>512px의 이미지를 0.13초만에 생성할 수 있다.</p></li>
-<li><p>16-megapixel(1600만 픽셀, 4k)의 이미지를 3.66초만에 생성할 수 있다.</p></li>
-</ol>
-</li>
-<li><p>다양한 활용성</p>
-<ol class="arabic simple">
-<li><p>latent space 상 에서의 image editing이 가능하다. (latent interpolation, style mixing …)</p></li>
-</ol>
-</li>
-</ol>
-</li>
-</ul>
-</section>
-<section id="introduction">
-<h2>Introduction<a class="headerlink" href="#introduction" title="Permalink to this heading">#</a></h2>
-<ul class="simple">
-<li><p>최근 Diffusion 기반의 모델들이 이미지 생성 분야에서 좋은 성능을 보여주고 있다.</p></li>
-<li><p>Diffusion은</p>
-<ul>
-<li><p>기본적으로 학습과 생성 시 iteration이 들어가는데 iteration은 <strong>학습의 안정성을 키워주는 장점</strong>이 있지만, <strong>computation cost가 매우 증가</strong>한다는 단점이 있다.</p></li>
-</ul>
-</li>
-<li><p>반면에 GAN은</p>
-<ul>
-<li><p>이미지 생성 시 single forward pass만 필요하므로 <strong>cost가 크지 않다는 장점</strong>이 있지만 single 혹은 multiple object의 생성에는 성능이 뛰어나지만 <strong>class가 명확히 정의되지않은 이미지 즉, open world image의 생성에는 어려움</strong>이 있다.</p></li>
-</ul>
-</li>
-<li><p><em><strong>“GAN을 더 develop한다면 Diffusion 모델을 넘어설 수 있을까?”</strong></em></p></li>
-<li><p>gigaGAN은?</p>
-<ol class="arabic simple">
-<li><p>속도적 장점</p>
-<ul>
-<li><p>0.13 s/img (512 size), 3.66s / img (4k)</p></li>
-</ul>
-</li>
-<li><p>latent space상에서의 editing 가능(<strong>style mixing, interpolation, prompt mixing</strong> like stylegan2)</p></li>
-</ol>
-</li>
-</ul>
-</section>
-<section id="methods">
-<h2>Methods<a class="headerlink" href="#methods" title="Permalink to this heading">#</a></h2>
-<figure class="align-default" id="id2">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img02.png"><img alt="img0" class="bg-primary mb-1" src="../../_images/img02.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 463 </span><span class="caption-text">Overall Architecture</span><a class="headerlink" href="#id2" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p><strong>Base Architecture</strong></p>
-<ul class="simple">
-<li><p>Base Model은 stylegan2로 선정</p></li>
-<li><p><span class="math notranslate nohighlight">\(G = M\times \tilde{G}\)</span></p></li>
-<li><p><span class="math notranslate nohighlight">\(M\)</span>(mapping network) : gaussian distribution에서의 noise를 좀더 disentangle된 w space로 mapping 시키는 network</p>
-<ul>
-<li><p>gigagan에서는 mapping network의 input으로 z와 함께 text condition(c)도 함께 사용</p></li>
-<li><p>output : <span class="math notranslate nohighlight">\(w\)</span> (vector)</p></li>
-</ul>
-</li>
-<li><p><span class="math notranslate nohighlight">\(\tilde{G}\)</span> : 피라미드 구조의 convolution block들로 random constant 값으로부터 output image <span class="math notranslate nohighlight">\(X\)</span>를 생성하는 generator.</p>
-<ul>
-<li><p>이때 <span class="math notranslate nohighlight">\(w\)</span> vector는 각 layer마다 입력으로 들어가서 feature map의 분포를 scaling함으로써 style을 반영(modulation, demodulation in stylegan2)</p></li>
-</ul>
-</li>
-</ul>
-<p><strong>Sample-adaptive kernel selection</strong></p>
-<figure class="align-default" id="id3">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img15.png"><img alt="img1" class="bg-primary mb-1" src="../../_images/img15.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 464 </span><span class="caption-text">Sample Adaptive Kernel Selection</span><a class="headerlink" href="#id3" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul>
-<li><p>도입 배경</p>
-<ul class="simple">
-<li><p>단순히 convolution layer의 kernel size만 scaling up을 하면 학습이 안되는 현상</p></li>
-<li><p>GAN 기반 모델이 (1)<strong>text-condition</strong>의 (2)<strong>오픈 이미지</strong> 생성에 한계를 보이는 이유는 convolution network의 구조 때문이다.</p>
-<ul>
-<li><p>convolution 연산은 구조상 같은 레이어상에 있는 하나의 filter가 <strong>text condition 주입부터 이미지의 생성까지 모든 부분에 참여</strong>하는데 이 부분이 모델의 표현력을 떨어트릴 수 있다.</p></li>
-</ul>
-</li>
-</ul>
-</li>
-<li><p>kernel selection method 적용</p></li>
-<li><p>레이어마다 <span class="math notranslate nohighlight">\(\{k_{i}\in R^{C_{in}\times C_{out}\times K\times K}\}\)</span>차원의 N개의 kernel set을 두고 style vector w의 affine layer를 거친 weight를 <strong>각 kernel값에 weighted summation한 값을 최종 filter로 사용</strong></p>
-<figure class="align-default" id="id4">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img23.png"><img alt="img2" class="bg-primary mb-1" src="../../_images/img23.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 465 </span><span class="caption-text">Equation of kernel selection method affine layer</span><a class="headerlink" href="#id4" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</li>
-<li><p>이 때 kernel weight 계산에 softmax를 사용하는데 얘는 differentiable하므로 gradient update 가능</p></li>
-<li><p>또, 실제로 N개의 kernel을 모두 filter로 사용할 때보다 computation cost가 굉장히 절약된다는 장점</p></li>
-</ul>
-<p><strong>Interleaving attention with convolution</strong></p>
-<ul>
-<li><p>도입 배경</p>
-<ul>
-<li><p>convolution filter는 receptive field 내부의 local feature 캡처에는 탁월하지만 field 외부의 부분은 학습할 수 없다.</p>
-<figure class="align-default" id="id5">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img33.png"><img alt="img3" class="bg-primary mb-1" src="../../_images/img33.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 466 </span><span class="caption-text">Receptive Field in Convolution Networks</span><a class="headerlink" href="#id5" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</li>
-<li><p>이러한 한계점을 극복하기위해 <strong>attention layer</strong>(<span class="math notranslate nohighlight">\(g_{attn}\)</span>)를 추가</p></li>
-<li><p>단순히 stylegan2에 attention layer를 추가하면 학습에 이상이 생김</p>
-<ul>
-<li><p>원인은 attention layer에서 사용하는 <strong>dot product가 Lipschitz함수가 아니기 때문</strong></p>
-<ul>
-<li><p>lipschitz 함수란</p>
-<ul>
-<li><p>lipschitz 함수란, <strong>두 점 사이의 거리를 일정 비 이상으로 증가시키지 않는 함수</strong></p>
-<figure class="align-default" id="id6">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img42.png"><img alt="img4" class="bg-primary mb-1" src="../../_images/img42.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 467 </span><span class="caption-text">lipschitz Function</span><a class="headerlink" href="#id6" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</li>
-</ul>
-</li>
-</ul>
-</li>
-<li><p><strong>Lipschitz 함수를 만족하지 못함으로써 discriminator의 학습이 unstable</strong>하게 되고 때문에 학습에 실패한다.</p></li>
-</ul>
-</li>
-<li><p>Lipschitz 연속성을 만족시키기 위해 attention의 <strong>dot product를 L2-distance로 대체</strong></p></li>
-</ul>
-</li>
-</ul>
-<p><strong>Advanced setting</strong></p>
-<ul class="simple">
-<li><p>모델의 성능 향상을 위해 stylegan2과 같은 hyper parameter를 사용하고, L2 distance logit의 초기값을 unit normal distribution, scale down, …</p></li>
-<li><p><span class="math notranslate nohighlight">\(\tilde{G}\)</span>의 각 convolution block 마다 attentnion layer(self + cross)를 적용</p>
-<ul>
-<li><p>self attention: 이미지 자신의 global feature 학습용</p></li>
-<li><p>cross attention: 이미지에 text condition 주입용</p></li>
-</ul>
-</li>
-</ul>
-<p><strong>Generator design</strong></p>
-<figure class="align-default" id="id7">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img52.png"><img alt="img5" class="bg-primary mb-1" src="../../_images/img52.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 468 </span><span class="caption-text">GIGAGAN Architecture</span><a class="headerlink" href="#id7" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p>text embedding</p>
-<ul>
-<li><p>pretrained model : <strong>clip</strong>(frozen; <span class="math notranslate nohighlight">\(R^{C\times 768}\)</span>, c is # of tokens)</p></li>
-<li><p>후에 text encoding에 추가적인 유연성을 위해 mapping network(<span class="math notranslate nohighlight">\(T\)</span>)를 추가</p></li>
-<li><p>text mapping network(<span class="math notranslate nohighlight">\(T\)</span>)의 output은 input size와 동일하게 <span class="math notranslate nohighlight">\(R^{C\times 768}\)</span></p></li>
-<li><p>이때 output <span class="math notranslate nohighlight">\(t\)</span>를 두 파트로 나눠 사용하는데</p>
-<ul>
-<li><p><span class="math notranslate nohighlight">\(R^{C-1\times 768}\)</span> : word i index의 local feature를 catch</p></li>
-<li><p><span class="math notranslate nohighlight">\(R^{768}\)</span> : word를 global하게 embedding하는 vector</p></li>
-</ul>
-</li>
-<li><p><span class="math notranslate nohighlight">\(t_{global}\)</span>은 latent mapping network(<span class="math notranslate nohighlight">\(M\)</span>)의 input으로 사용</p></li>
-<li><p><span class="math notranslate nohighlight">\(t_{local}\)</span>은 Generator의 input으로 각 block에 cross attention에 적용</p></li>
-</ul>
-</li>
-<li><p>Generator의 layer는 총 5개의 피라미드 구조</p></li>
-<li><p><span class="math notranslate nohighlight">\(\{x_{i}\}_{i=0}^{L-1} = \{x_{0},x_{1}, ..., x_{4}\}\)</span> ,  <span class="math notranslate nohighlight">\(\{S_{i}\}_{i=0}^{L-1} = \{64, 32, 16, 8, 4\}\)</span></p></li>
-<li><p><span class="math notranslate nohighlight">\(x_{0} = X\)</span>(output image)</p></li>
-</ul>
-<p><strong>Discriminator Design</strong></p>
-<figure class="align-default" id="id8">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img62.png"><img alt="img6" class="bg-primary mb-1" src="../../_images/img62.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 469 </span><span class="caption-text">Discriminator of GIGAGAN</span><a class="headerlink" href="#id8" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul>
-<li><p>2 branch의 구조</p>
-<ol class="arabic simple">
-<li><p>text conditioning을 학습</p></li>
-<li><p>image generating을 학습</p></li>
-</ol>
-</li>
-<li><p><span class="math notranslate nohighlight">\(t_{D}\)</span>: text c로부터 pretrained text encoder(CLIP)를 통과시켜 추출한 embedding 값</p></li>
-<li><p>stylegan에서는 low res의 이미지들은 loss에 반영하지 않았지만, gigagan에서는 <strong>이미지의 low feature 표현의 개선</strong>을 위해 모든 scale의 이미지를 모두 loss에 반영</p></li>
-<li><p>이때 feature extractor <span class="math notranslate nohighlight">\(\Phi\)</span></p>
-<ul class="simple">
-<li><p><span class="math notranslate nohighlight">\(\Phi_{i\rightarrow j}:R^{X_{i}\times X_{i}\times3}\rightarrow R^{X_{j}^{D}\times X_{j}^{D}\times C_{j}}\)</span></p></li>
-<li><p>self attention + conv(with stride=2)</p></li>
-<li><p>각 level에서는 <strong>이전 level에서의 feature map을 공유</strong>해서 사용하고 <strong>low resolution에서 계산이 일어나기때문에</strong> computation resource가 크지 않다.</p></li>
-</ul>
-</li>
-<li><p>multi scale의 loss 계산을 위해 <strong>ms-i/o loss</strong>(multi scale input, output adversarial loss)를 사용</p>
-<ul class="simple">
-<li><p><span class="math notranslate nohighlight">\(V_{MS-I/O}(G, D) = \sum_{i=0}^{L-1}\sum_{j=i+1}^{L}V_{GAN}(G_{i}, D_{ij}) + V_{match}({G_{i}, D_{ij}})\)</span></p></li>
-<li><p><span class="math notranslate nohighlight">\(V_{GAN}\)</span>: standard gan loss</p></li>
-<li><p><span class="math notranslate nohighlight">\(D_{ij}(x, c) = \psi _{j}(\phi_{i\rightarrow j}(x_{i}), t_{D}) + Conv_{1\times 1}(\phi_{i\rightarrow j}(x_{i}))\)</span>:</p>
-<ul>
-<li><p>image의 feature map <span class="math notranslate nohighlight">\(\phi(x)\)</span>에 <span class="math notranslate nohighlight">\(t_{D}\)</span>를 심은 값과 원본 <span class="math notranslate nohighlight">\(\phi(x)\)</span>의 합</p></li>
-<li><p>앞부분은 text conditioning model을 학습, 뒷부분은 unconditional image generator를 학습</p></li>
-</ul>
-</li>
-<li><p><span class="math notranslate nohighlight">\(V_{match}\)</span>(Matching aware loss)</p>
-<ul>
-<li><p>앞의 gan loss는 생성된 이미지가 얼마나 리얼한지, 그리고 condition <span class="math notranslate nohighlight">\(c\)</span>에 얼마나 가까운지에 대한 loss</p></li>
-<li><p>하지만 학습 초반에는 condition <span class="math notranslate nohighlight">\(c\)</span>에 상관없이 이미지의 퀄리티로만 분류를 해버림</p></li>
-<li><p>그래서 discriminator 학습 과정에서 conditioning을 강제로 따르도록 하기위해 <span class="math notranslate nohighlight">\(c\)</span>를 랜덤으로 샘플링(<span class="math notranslate nohighlight">\(\hat{c}\)</span>)해 image의 fake pair로 지정</p></li>
-</ul>
-</li>
-</ul>
-<figure class="align-default" id="id9">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img72.png"><img alt="img7" class="bg-primary mb-1" src="../../_images/img72.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 470 </span><span class="caption-text">Matching aware loss</span><a class="headerlink" href="#id9" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</li>
-<li><p>CLIP contrastive loss</p>
-<ul>
-<li><p>pretrained CLIP을 사용해 contrastive learning</p>
-<ul>
-<li><p>contrastive learning</p>
-<figure class="align-default" id="id10">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img83.png"><img alt="img8" class="bg-primary mb-1" src="../../_images/img83.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 471 </span><span class="caption-text">constrastive learning</span><a class="headerlink" href="#id10" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p>가까운 class는 가깝게(positive), 그 외 class는 멀게(negative)</p></li>
-</ul>
-</li>
-</ul>
-</li>
-<li><p>condition <span class="math notranslate nohighlight">\(c_{0}\)</span>에 대해 생성된 이미지의 feature vector는 condition <span class="math notranslate nohighlight">\(c_{0}\)</span>의 feature 벡터와는 가깝게 (positive), 나머지 condition의 feature vector와는 멀게(negative) 학습되어야한다.</p>
-<figure class="align-default" id="id11">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img92.png"><img alt="img9" class="bg-primary mb-1" src="../../_images/img92.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 472 </span><span class="caption-text">CLIP Loss</span><a class="headerlink" href="#id11" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</li>
-</ul>
-</li>
-<li><p>Vision-aided adversarial loss</p>
-<ul class="simple">
-<li><p><a class="reference external" href="https://arxiv.org/abs/2112.09130">https://arxiv.org/abs/2112.09130</a></p></li>
-<li><p>stylegan에서 discriminator는 training data에 overfitting되는 경향이 있음</p></li>
-<li><p>이부분을 해결하기위해 생성된  image를 SOTA의 vision 모델을 사용해 뽑아낸 feature map을 Discriminator에 같이 추가해 real / fake를 분류</p></li>
-</ul>
-</li>
-</ul>
-<p><strong>GAN based upsampler</strong></p>
-<ul class="simple">
-<li><p>gigagan은 upsampling 모델에 적용이 가능하다</p></li>
-<li><p>64x64의 이미지를 3번 downsampling + 6번 upsampling 모델을 통과시켜 1024x1024의 이미지를 생성할 수 있다.</p></li>
-<li><p>이때 upsampling 부분에 gigagan의 Generator 부분을 적용</p></li>
-<li><p>upsampling model에 적용할때에는 LPIPS loss를 추가하고, 이미지 생성과정중에 stylegan과 동일하게 gaussian noise를 추가해 real함을 추가</p></li>
-</ul>
-</section>
-<section id="experiments">
-<h2>Experiments<a class="headerlink" href="#experiments" title="Permalink to this heading">#</a></h2>
-<p>Training and evaluation details</p>
-<ol class="arabic">
-<li><p>Dataset : LAION2D-en, COYO-700m</p></li>
-<li><p>upsampler(128 → 1024) : Adobe’s internal Stock images</p></li>
-<li><p>Pretrained text  encoder : CLIP ViT-L/14</p></li>
-<li><p>CLIP score : OpenCLIP ViT-G/14</p></li>
-<li><p>machine : A100 gpu, etc.</p></li>
-<li><p><strong>각각의 method가 정말 효과가 있는가?</strong></p>
-<figure class="align-default" id="id12">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img102.png"><img alt="img10" class="bg-primary mb-1" src="../../_images/img102.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 473 </span><span class="caption-text">Table1</span><a class="headerlink" href="#id12" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p>단순 scaling up보다 위 method들을 추가하면 <strong>비교적 낮은 FiD와 CLIP score 수치를 보여준다</strong>.</p></li>
-</ul>
-</li>
-<li><p><strong>Text2Image 모델과 성능 비교</strong></p>
-<figure class="align-default" id="id13">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img112.png"><img alt="img11" class="bg-primary mb-1" src="../../_images/img112.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 474 </span><span class="caption-text">Table2</span><a class="headerlink" href="#id13" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p>FiD와 inference time을 보면 어느정도 균형을 이루며 경쟁력을 보이고 있다.</p></li>
-</ul>
-</li>
-<li><p>Distilled diffusion과 비교</p>
-<figure class="align-default" id="id14">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img122.png"><img alt="img12" class="bg-primary mb-1" src="../../_images/img122.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 475 </span><span class="caption-text">Table3</span><a class="headerlink" href="#id14" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p>diffutsion의 속도 개선을 위해  distilation한 모델</p></li>
-<li><p>수치적으로도 우위에 있고, inference time도 여전히 더 빠르다.</p></li>
-</ul>
-</li>
-<li><p>Upscaler</p>
-<p>!:::{figure-md}
-<img src="../../pics/GIGAGAN/img13.png" alt="img13" class="bg-primary mb-1" width="700px"></p>
-<p>Table4</p>
-<div class="highlight-default notranslate"><div class="highlight"><pre><span></span>
-<span class="p">:::{</span><span class="n">figure</span><span class="o">-</span><span class="n">md</span><span class="p">}</span> 
-<span class="o">&lt;</span><span class="n">img</span> <span class="n">src</span><span class="o">=</span><span class="s2">&quot;../../pics/GIGAGAN/img14.png&quot;</span> <span class="n">alt</span><span class="o">=</span><span class="s2">&quot;img14&quot;</span> <span class="n">class</span><span class="o">=</span><span class="s2">&quot;bg-primary mb-1&quot;</span> <span class="n">width</span><span class="o">=</span><span class="s2">&quot;700px&quot;</span><span class="o">&gt;</span>
-
-<span class="n">Super</span> <span class="n">Resolution</span>
-</pre></div>
-</div>
-</li>
-<li><p>그 외</p>
-<ul class="simple">
-<li><p>stylegan에 연구된 technique(t<strong>runcation trick, style mixing, latent interpolation</strong> 등) 적용 가능</p></li>
-</ul>
-</li>
-</ol>
-<ul>
-<li><p>truncation trick</p>
-<figure class="align-default" id="id15">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img151.png"><img alt="img15" class="bg-primary mb-1" src="../../_images/img151.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 476 </span><span class="caption-text">Truncation Trick</span><a class="headerlink" href="#id15" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</li>
-<li><p>style mixing</p>
-<figure class="align-default" id="id16">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img16.png"><img alt="img16" class="bg-primary mb-1" src="../../_images/img16.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 477 </span><span class="caption-text">Style Mixing</span><a class="headerlink" href="#id16" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</li>
-<li><p>latent interpolation</p>
-<figure class="align-default" id="id17">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img17.png"><img alt="img17" class="bg-primary mb-1" src="../../_images/img17.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 478 </span><span class="caption-text">Latent Interpolation</span><a class="headerlink" href="#id17" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</li>
-</ul>
-<p>Limitation</p>
-<ul>
-<li><p>score 상으로는 어느정도 좋은 성능을 보이지만 <strong>실제로 DALLE2 ,Imagen과 같은 SOTA diffusion 모델과 비교하면 develop이 필요함</strong></p>
-<figure class="align-default" id="id18">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img18.png"><img alt="img18" class="bg-primary mb-1" src="../../_images/img18.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 479 </span><span class="caption-text">Failure Cases</span><a class="headerlink" href="#id18" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</li>
-</ul>
-</section>
-<hr class="docutils" />
-<section id="id1">
-<h2>주요 질의응답<a class="headerlink" href="#id1" title="Permalink to this heading">#</a></h2>
-</section>
-</section>
-
-    <script type="text/x-thebe-config">
-    {
-        requestKernel: true,
-        binderOptions: {
-            repo: "binder-examples/jupyter-stacks-datascience",
-            ref: "master",
-        },
-        codeMirrorConfig: {
-            theme: "abcdef",
-            mode: "python"
-        },
-        kernelOptions: {
-            name: "python3",
-            path: "./docs/review"
-        },
-        predefinedOutput: true
-    }
-    </script>
-    <script>kernelName = 'python3'</script>
-
-                </article>
-              
-
-              
-              
-              
-              
-                <footer class="prev-next-footer d-print-none">
-                  
-<div class="prev-next-area">
-    <a class="left-prev"
-       href="Muse.html"
-       title="previous page">
-      <i class="fa-solid fa-angle-left"></i>
-      <div class="prev-next-info">
-        <p class="prev-next-subtitle">previous</p>
-        <p class="prev-next-title">Muse</p>
-      </div>
-    </a>
-    <a class="right-next"
-       href="consistency_models.html"
-       title="next page">
-      <div class="prev-next-info">
-        <p class="prev-next-subtitle">next</p>
-        <p class="prev-next-title">Consistency Models</p>
-      </div>
-      <i class="fa-solid fa-angle-right"></i>
-    </a>
-</div>
-                </footer>
-              
-            </div>
-            
-            
-              
-                <div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
-
-
-  <div class="sidebar-secondary-item">
-  <div class="page-toc tocsection onthispage">
-    <i class="fa-solid fa-list"></i> Contents
-  </div>
-  <nav class="bd-toc-nav page-toc">
-    <ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#abstract">Abstract</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">Introduction</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#methods">Methods</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#experiments">Experiments</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#id1">주요 질의응답</a></li>
-</ul>
-  </nav></div>
-
-</div></div>
-              
-            
-          </div>
-          <footer class="bd-footer-content">
-            
-<div class="bd-footer-content__inner container">
-  
-  <div class="footer-item">
-    
-<p class="component-author">
-By PseudoLab
-</p>
-
-  </div>
-  
-  <div class="footer-item">
-    
-
-  <p class="copyright">
-    
-      © Copyright 2022.
-      <br/>
-    
-  </p>
-
-  </div>
-  
-  <div class="footer-item">
-    
-  </div>
-  
-  <div class="footer-item">
-    
-  </div>
-  
-</div>
-          </footer>
-        
-
-      </main>
-    </div>
-  </div>
-  
-  <!-- Scripts loaded after <body> so the DOM is not blocked -->
-  <script src="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b"></script>
-<script src="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b"></script>
-
-  <footer class="bd-footer">
-  </footer>
-  </body>
+
+
+<!DOCTYPE html>
+
+
+<html lang="en" data-content_root="" >
+
+  <head>
+    <meta charset="utf-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.17.1: http://docutils.sourceforge.net/" />
+
+    <title>Scaling up GANs for Text-to-Image Synthesis &#8212; Text-to-Image Generation-feat-Diffusion</title>
+  
+  
+  
+  <script data-cfasync="false">
+    document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
+    document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
+  </script>
+  
+  <!-- Loaded before other Sphinx assets -->
+  <link href="../../_static/styles/theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+<link href="../../_static/styles/bootstrap.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+<link href="../../_static/styles/pydata-sphinx-theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+
+  
+  <link href="../../_static/vendor/fontawesome/6.5.2/css/all.min.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+  <link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.woff2" />
+<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.woff2" />
+<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.woff2" />
+
+    <link rel="stylesheet" type="text/css" href="../../_static/pygments.css" />
+    <link rel="stylesheet" href="../../_static/styles/sphinx-book-theme.css?digest=14f4ca6b54d191a8c7657f6c759bf11a5fb86285" type="text/css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/togglebutton.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/mystnb.4510f1fc1dee50b3e5859aac5469c37c29e427902b24a333a5f9fcb2f0b3ac41.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/sphinx-thebe.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/design-style.4045f2051d55cab465a707391d5b2007.min.css" />
+  
+  <!-- Pre-loaded scripts that we'll load fully later -->
+  <link rel="preload" as="script" href="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b" />
+<link rel="preload" as="script" href="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b" />
+  <script src="../../_static/vendor/fontawesome/6.5.2/js/all.min.js?digest=dfe6caa3a7d634c4db9b"></script>
+
+    <script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
+    <script src="../../_static/jquery.js"></script>
+    <script src="../../_static/underscore.js"></script>
+    <script src="../../_static/_sphinx_javascript_frameworks_compat.js"></script>
+    <script src="../../_static/doctools.js"></script>
+    <script src="../../_static/clipboard.min.js"></script>
+    <script src="../../_static/copybutton.js"></script>
+    <script src="../../_static/scripts/sphinx-book-theme.js?digest=5a5c038af52cf7bc1a1ec88eea08e6366ee68824"></script>
+    <script>let toggleHintShow = 'Click to show';</script>
+    <script>let toggleHintHide = 'Click to hide';</script>
+    <script>let toggleOpenOnPrint = 'true';</script>
+    <script src="../../_static/togglebutton.js"></script>
+    <script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
+    <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
+    <script src="../../_static/design-tabs.js"></script>
+    <script>const THEBE_JS_URL = "https://unpkg.com/thebe@0.8.2/lib/index.js"
+const thebe_selector = ".thebe,.cell"
+const thebe_selector_input = "pre"
+const thebe_selector_output = ".output, .cell_output"
+</script>
+    <script async="async" src="../../_static/sphinx-thebe.js"></script>
+    <script>window.MathJax = {"options": {"processHtmlClass": "tex2jax_process|mathjax_process|math|output_area"}}</script>
+    <script defer="defer" src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
+    <script>DOCUMENTATION_OPTIONS.pagename = 'docs/review/GIGAGAN';</script>
+    <link rel="shortcut icon" href="../../_static/PseudoLab_logo.png"/>
+    <link rel="index" title="Index" href="../../genindex.html" />
+    <link rel="search" title="Search" href="../../search.html" />
+    <link rel="next" title="Consistency Models" href="consistency_models.html" />
+    <link rel="prev" title="Muse" href="Muse.html" />
+  <meta name="viewport" content="width=device-width, initial-scale=1"/>
+  <meta name="docsearch:language" content="en"/>
+  </head>
+  
+  
+  <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
+
+  
+  
+  <div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
+  
+  <div id="pst-scroll-pixel-helper"></div>
+  
+  <button type="button" class="btn rounded-pill" id="pst-back-to-top">
+    <i class="fa-solid fa-arrow-up"></i>Back to top</button>
+
+  
+  <input type="checkbox"
+          class="sidebar-toggle"
+          id="pst-primary-sidebar-checkbox"/>
+  <label class="overlay overlay-primary" for="pst-primary-sidebar-checkbox"></label>
+  
+  <input type="checkbox"
+          class="sidebar-toggle"
+          id="pst-secondary-sidebar-checkbox"/>
+  <label class="overlay overlay-secondary" for="pst-secondary-sidebar-checkbox"></label>
+  
+  <div class="search-button__wrapper">
+    <div class="search-button__overlay"></div>
+    <div class="search-button__search-container">
+<form class="bd-search d-flex align-items-center"
+      action="../../search.html"
+      method="get">
+  <i class="fa-solid fa-magnifying-glass"></i>
+  <input type="search"
+         class="form-control"
+         name="q"
+         id="search-input"
+         placeholder="Search this book..."
+         aria-label="Search this book..."
+         autocomplete="off"
+         autocorrect="off"
+         autocapitalize="off"
+         spellcheck="false"/>
+  <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
+</form></div>
+  </div>
+
+  <div class="pst-async-banner-revealer d-none">
+  <aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
+</div>
+
+  
+    <header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
+    </header>
+  
+
+  <div class="bd-container">
+    <div class="bd-container__inner bd-page-width">
+      
+      
+      
+      <div class="bd-sidebar-primary bd-sidebar">
+        
+
+  
+  <div class="sidebar-header-items sidebar-primary__section">
+    
+    
+    
+    
+  </div>
+  
+    <div class="sidebar-primary-items__start sidebar-primary__section">
+        <div class="sidebar-primary-item">
+
+  
+    
+  
+
+<a class="navbar-brand logo" href="../../intro.html">
+  
+  
+  
+  
+  
+    
+    
+      
+    
+    
+    <img src="../../_static/PseudoLab_logo.png" class="logo__image only-light" alt="Text-to-Image Generation-feat-Diffusion - Home"/>
+    <script>document.write(`<img src="../../_static/PseudoLab_logo.png" class="logo__image only-dark" alt="Text-to-Image Generation-feat-Diffusion - Home"/>`);</script>
+  
+  
+</a></div>
+        <div class="sidebar-primary-item"><nav class="bd-links" id="bd-docs-nav" aria-label="Main">
+    <div class="bd-toc-item navbar-nav active">
+        
+        <ul class="nav bd-sidenav bd-sidenav__home-link">
+            <li class="toctree-l1">
+                <a class="reference internal" href="../../intro.html">
+                    Welcome to PseudoDiffusers!!
+                </a>
+            </li>
+        </ul>
+        <p aria-level="2" class="caption" role="heading"><span class="caption-text">Preliminary Works</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="vae.html">VAE</a></li>
+<li class="toctree-l1"><a class="reference internal" href="gan.html">GAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DDPM.html">DDPM</a></li>
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DDIM.html">DDIM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="A_Study_on_the_Evaluation_of_Generative_Models.html">A Study on the Evaluation of Generative Models</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Image Generation</span></p>
+<ul class="current nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="cycleGAN.html">CycleGAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="StyleGAN.html">StyleGAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="diffusion_beats_GANs.html">Diffusion Models Beat GANs on Image Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="dalle.html">DALL-E</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DALLE2.html">DALL-E 2</a></li>
+<li class="toctree-l1"><a class="reference internal" href="dreambooth.html">DreamBooth</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ControlNet.html">ControlNet</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Latent_Diffusion_Model.html">Introduction</a></li>
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Textual_Inversion.html">Textual Inversion</a></li>
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="CustomDiffusion.html">Custom Diffusion</a></li>
+<li class="toctree-l1"><a class="reference internal" href="LoRA.html">LoRA</a></li>
+
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="I-DDPM.html">I-DDPM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="StyO.html">StyO</a></li>
+<li class="toctree-l1"><a class="reference internal" href="imagen.html">Imagen</a></li>
+<li class="toctree-l1"><a class="reference internal" href="imagen_editor.html">Imagen Editor</a></li>
+<li class="toctree-l1"><a class="reference internal" href="SDEdit.html">SDEdit</a></li>
+<li class="toctree-l1"><a class="reference internal" href="SDXL.html">SDXL</a></li>
+<li class="toctree-l1"><a class="reference internal" href="t2i_adapter.html">T2I-Adapter</a></li>
+<li class="toctree-l1"><a class="reference internal" href="IP_Adapter.html">IP-Adapter</a></li>
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="HyperDreamBooth.html">HyperDreamBooth</a></li>
+<li class="toctree-l1"><a class="reference internal" href="CM3leon.html">CM3leon</a></li>
+
+<li class="toctree-l1"><a class="reference internal" href="Synthetic_Data_from_Diffusion_Models_Improves_ImageNet_Classification.html">Synthetic Data from Diffusion Models Improves ImageNet Classification</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="GLIDE.html">GLIDE</a></li>
+<li class="toctree-l1"><a class="reference internal" href="BBDM.html">BBDM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Your_Diffusion_Model_is_Secretly_a_Zero_Shot_Classifier.html">Your Diffusion Model is Secretly a Zero-Shot Classifier</a></li>
+<li class="toctree-l1"><a class="reference internal" href="progressive_distillation.html">Progressive Distillation for Fast Sampling of Diffusion Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ConceptLab.html">ConceptLab</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Diffusion_models_already_have_a_Semantic_Latent_Space.html">Diffusion Models already have a Semantic Latent Space</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Muse.html">Muse</a></li>
+
+
+<li class="toctree-l1 current active"><a class="current reference internal" href="#">Scaling up GANs for Text-to-Image Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="consistency_models.html">Consistency Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="latent_consistency_models.html">Latent Consistency Models</a></li>
+
+<li class="toctree-l1"><a class="reference internal" href="LLM_grounded_Diffusion.html">LLM Grounded Diffusion</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DiT.html">DiT</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="one-step-image-translation.html">One-Step Image Translation with Text-to-Image Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="LCM-LoRA.html">LCM-LoRA: A Universal Stable-Diffusion Acceleration Module</a></li>
+
+
+<li class="toctree-l1"><a class="reference internal" href="MimicBrush.html">MimicBrush: Zero-shot Image Editing with Reference Imitation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="one_step_diffusion_with_distribution_matching_distillation.html">One-step Diffusion with Distribution Matching Distillation</a></li>
+
+
+
+
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Video Generation</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="Make_A_Video.html">Make A Video</a></li>
+<li class="toctree-l1"><a class="reference internal" href="VideoLDM.html">VideoLDM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="AnimateDiff.html">AnimateDiff</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Animate_Anyone.html">Animate Anyone</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreaMoving.html">DreaMoving</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreamPose.html">DreamPose: Fashion Image-to-Video Synthesis via Stable Diffusion</a></li>
+
+
+
+
+
+
+
+
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">3D Generation</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="NeRF.html">NeRF : Representing Scenes as Neural Radiance Fields for View Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="3DGS.html">3D Gaussian Splatting for Real-Time Radiance Field Rendering</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Point_E.html">Point-E: A System for Generating 3D Point Clouds from Complex Prompts (Arxiv 2022)</a></li>
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Shap-E.html">Shap-E</a></li>
+
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DreamFusion.html"><strong>DreamFusion</strong></a></li>
+<li class="toctree-l1"><a class="reference internal" href="magic-3d.html">Magic3D</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreamBooth3D.html">Dream Booth 3D</a></li>
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="zero123plus.html">Zero123++</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ProlificDreamer.html">ProlificDreamer: High-Fidelity and Diverse Text-to-3D Generation with Variational Score Distillation</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DreamGaussian.html">DreamGaussian</a></li>
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Coin3D.html">Coin3D</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Experiments</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="../experiments/js_exp.html">Synthetic Data with Stable Diffusion for Foliar Disease Classification</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../experiments/swjo_exp.html">Training DreamBooth on Naver Webtoon Face Dataset</a></li>
+</ul>
+
+    </div>
+</nav></div>
+    </div>
+  
+  
+  <div class="sidebar-primary-items__end sidebar-primary__section">
+  </div>
+  
+  <div id="rtd-footer-container"></div>
+
+
+      </div>
+      
+      <main id="main-content" class="bd-main" role="main">
+        
+        
+
+<div class="sbt-scroll-pixel-helper"></div>
+
+          <div class="bd-content">
+            <div class="bd-article-container">
+              
+              <div class="bd-header-article d-print-none">
+<div class="header-article-items header-article__inner">
+  
+    <div class="header-article-items__start">
+      
+        <div class="header-article-item"><label class="sidebar-toggle primary-toggle btn btn-sm" for="__primary" title="Toggle primary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
+  <span class="fa-solid fa-bars"></span>
+</label></div>
+      
+    </div>
+  
+  
+    <div class="header-article-items__end">
+      
+        <div class="header-article-item">
+
+<div class="article-header-buttons">
+
+
+
+
+
+<div class="dropdown dropdown-source-buttons">
+  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Source repositories">
+    <i class="fab fa-github"></i>
+  </button>
+  <ul class="dropdown-menu">
+      
+      
+      
+      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion" target="_blank"
+   class="btn btn-sm btn-source-repository-button dropdown-item"
+   title="Source repository"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fab fa-github"></i>
+  </span>
+<span class="btn__text-container">Repository</span>
+</a>
+</li>
+      
+      
+      
+      
+      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion/issues/new?title=Issue%20on%20page%20%2Fdocs/review/GIGAGAN.html&body=Your%20issue%20content%20here." target="_blank"
+   class="btn btn-sm btn-source-issues-button dropdown-item"
+   title="Open an issue"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-lightbulb"></i>
+  </span>
+<span class="btn__text-container">Open issue</span>
+</a>
+</li>
+      
+  </ul>
+</div>
+
+
+
+
+
+
+<div class="dropdown dropdown-download-buttons">
+  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Download this page">
+    <i class="fas fa-download"></i>
+  </button>
+  <ul class="dropdown-menu">
+      
+      
+      
+      <li><a href="../../_sources/docs/review/GIGAGAN.md" target="_blank"
+   class="btn btn-sm btn-download-source-button dropdown-item"
+   title="Download source file"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-file"></i>
+  </span>
+<span class="btn__text-container">.md</span>
+</a>
+</li>
+      
+      
+      
+      
+      <li>
+<button onclick="window.print()"
+  class="btn btn-sm btn-download-pdf-button dropdown-item"
+  title="Print to PDF"
+  data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-file-pdf"></i>
+  </span>
+<span class="btn__text-container">.pdf</span>
+</button>
+</li>
+      
+  </ul>
+</div>
+
+
+
+
+<button onclick="toggleFullScreen()"
+  class="btn btn-sm btn-fullscreen-button"
+  title="Fullscreen mode"
+  data-bs-placement="bottom" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-expand"></i>
+  </span>
+
+</button>
+
+
+
+<script>
+document.write(`
+  <button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light"></i>
+    <i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark"></i>
+    <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"></i>
+  </button>
+`);
+</script>
+
+
+<script>
+document.write(`
+  <button class="btn btn-sm pst-navbar-icon search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <i class="fa-solid fa-magnifying-glass fa-lg"></i>
+  </button>
+`);
+</script>
+<label class="sidebar-toggle secondary-toggle btn btn-sm" for="__secondary"title="Toggle secondary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <span class="fa-solid fa-list"></span>
+</label>
+</div></div>
+      
+    </div>
+  
+</div>
+</div>
+              
+              
+
+<div id="jb-print-docs-body" class="onlyprint">
+    <h1>Scaling up GANs for Text-to-Image Synthesis</h1>
+    <!-- Table of contents -->
+    <div id="print-main-content">
+        <div id="jb-print-toc">
+            
+            <div>
+                <h2> Contents </h2>
+            </div>
+            <nav aria-label="Page">
+                <ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#abstract">Abstract</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">Introduction</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#methods">Methods</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#experiments">Experiments</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#id1">주요 질의응답</a></li>
+</ul>
+            </nav>
+        </div>
+    </div>
+</div>
+
+              
+                
+<div id="searchbox"></div>
+                <article class="bd-article">
+                  
+  <div class="admonition-information admonition">
+<p class="admonition-title">Information</p>
+<ul class="simple">
+<li><p><strong>Title:</strong> Scaling up GANs for Text-to-Image Synthesis (CVPR 2023)</p></li>
+<li><p><strong>Reference</strong></p>
+<ul>
+<li><p>Paper:  <a class="reference external" href="https://arxiv.org/abs/2303.05511">https://arxiv.org/abs/2303.05511</a></p></li>
+<li><p>Code: <a class="reference external" href="https://github.com/lucidrains/gigagan-pytorch">NON Official:</a></p></li>
+</ul>
+</li>
+<li><p><strong>Author:</strong> Seunghwan Ji</p></li>
+<li><p><strong>Last updated on April. 14, 2024</strong></p></li>
+</ul>
+</div>
+<section class="tex2jax_ignore mathjax_ignore" id="scaling-up-gans-for-text-to-image-synthesis">
+<h1>Scaling up GANs for Text-to-Image Synthesis<a class="headerlink" href="#scaling-up-gans-for-text-to-image-synthesis" title="Permalink to this heading">#</a></h1>
+<section id="abstract">
+<h2>Abstract<a class="headerlink" href="#abstract" title="Permalink to this heading">#</a></h2>
+<ul>
+<li><p>최근 이미지 생성 task의 모델 성능이 꾸준히 발전중</p></li>
+<li><p>기존에는 stylegan과 같은 GAN 모델이 주를 이뤘지만 최근 DALLE, Stable Diffusion등 <strong>Diffusion 기반</strong>의 모델 베이스로 추세가 급격하게 바뀌어버림</p></li>
+<li><p><em>“GAN 모델로 Diffusion의 성능을 잡는것은 정말 무리일까?”</em></p></li>
+<li><p>GigaGAN은</p>
+<ul class="simple">
+<li><p><strong>gan 기반의 모델</strong>로 1. <strong>속도점 이점</strong>과 <strong>2. 다양한 extra editing</strong>이 가능하다. (contribution)</p></li>
+</ul>
+<ol class="arabic simple">
+<li><p>속도적 이점</p>
+<ol class="arabic simple">
+<li><p>512px의 이미지를 0.13초만에 생성할 수 있다.</p></li>
+<li><p>16-megapixel(1600만 픽셀, 4k)의 이미지를 3.66초만에 생성할 수 있다.</p></li>
+</ol>
+</li>
+<li><p>다양한 활용성</p>
+<ol class="arabic simple">
+<li><p>latent space 상 에서의 image editing이 가능하다. (latent interpolation, style mixing …)</p></li>
+</ol>
+</li>
+</ol>
+</li>
+</ul>
+</section>
+<section id="introduction">
+<h2>Introduction<a class="headerlink" href="#introduction" title="Permalink to this heading">#</a></h2>
+<ul class="simple">
+<li><p>최근 Diffusion 기반의 모델들이 이미지 생성 분야에서 좋은 성능을 보여주고 있다.</p></li>
+<li><p>Diffusion은</p>
+<ul>
+<li><p>기본적으로 학습과 생성 시 iteration이 들어가는데 iteration은 <strong>학습의 안정성을 키워주는 장점</strong>이 있지만, <strong>computation cost가 매우 증가</strong>한다는 단점이 있다.</p></li>
+</ul>
+</li>
+<li><p>반면에 GAN은</p>
+<ul>
+<li><p>이미지 생성 시 single forward pass만 필요하므로 <strong>cost가 크지 않다는 장점</strong>이 있지만 single 혹은 multiple object의 생성에는 성능이 뛰어나지만 <strong>class가 명확히 정의되지않은 이미지 즉, open world image의 생성에는 어려움</strong>이 있다.</p></li>
+</ul>
+</li>
+<li><p><em><strong>“GAN을 더 develop한다면 Diffusion 모델을 넘어설 수 있을까?”</strong></em></p></li>
+<li><p>gigaGAN은?</p>
+<ol class="arabic simple">
+<li><p>속도적 장점</p>
+<ul>
+<li><p>0.13 s/img (512 size), 3.66s / img (4k)</p></li>
+</ul>
+</li>
+<li><p>latent space상에서의 editing 가능(<strong>style mixing, interpolation, prompt mixing</strong> like stylegan2)</p></li>
+</ol>
+</li>
+</ul>
+</section>
+<section id="methods">
+<h2>Methods<a class="headerlink" href="#methods" title="Permalink to this heading">#</a></h2>
+<figure class="align-default" id="id2">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img02.png"><img alt="img0" class="bg-primary mb-1" src="../../_images/img02.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 463 </span><span class="caption-text">Overall Architecture</span><a class="headerlink" href="#id2" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p><strong>Base Architecture</strong></p>
+<ul class="simple">
+<li><p>Base Model은 stylegan2로 선정</p></li>
+<li><p><span class="math notranslate nohighlight">\(G = M\times \tilde{G}\)</span></p></li>
+<li><p><span class="math notranslate nohighlight">\(M\)</span>(mapping network) : gaussian distribution에서의 noise를 좀더 disentangle된 w space로 mapping 시키는 network</p>
+<ul>
+<li><p>gigagan에서는 mapping network의 input으로 z와 함께 text condition(c)도 함께 사용</p></li>
+<li><p>output : <span class="math notranslate nohighlight">\(w\)</span> (vector)</p></li>
+</ul>
+</li>
+<li><p><span class="math notranslate nohighlight">\(\tilde{G}\)</span> : 피라미드 구조의 convolution block들로 random constant 값으로부터 output image <span class="math notranslate nohighlight">\(X\)</span>를 생성하는 generator.</p>
+<ul>
+<li><p>이때 <span class="math notranslate nohighlight">\(w\)</span> vector는 각 layer마다 입력으로 들어가서 feature map의 분포를 scaling함으로써 style을 반영(modulation, demodulation in stylegan2)</p></li>
+</ul>
+</li>
+</ul>
+<p><strong>Sample-adaptive kernel selection</strong></p>
+<figure class="align-default" id="id3">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img15.png"><img alt="img1" class="bg-primary mb-1" src="../../_images/img15.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 464 </span><span class="caption-text">Sample Adaptive Kernel Selection</span><a class="headerlink" href="#id3" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul>
+<li><p>도입 배경</p>
+<ul class="simple">
+<li><p>단순히 convolution layer의 kernel size만 scaling up을 하면 학습이 안되는 현상</p></li>
+<li><p>GAN 기반 모델이 (1)<strong>text-condition</strong>의 (2)<strong>오픈 이미지</strong> 생성에 한계를 보이는 이유는 convolution network의 구조 때문이다.</p>
+<ul>
+<li><p>convolution 연산은 구조상 같은 레이어상에 있는 하나의 filter가 <strong>text condition 주입부터 이미지의 생성까지 모든 부분에 참여</strong>하는데 이 부분이 모델의 표현력을 떨어트릴 수 있다.</p></li>
+</ul>
+</li>
+</ul>
+</li>
+<li><p>kernel selection method 적용</p></li>
+<li><p>레이어마다 <span class="math notranslate nohighlight">\(\{k_{i}\in R^{C_{in}\times C_{out}\times K\times K}\}\)</span>차원의 N개의 kernel set을 두고 style vector w의 affine layer를 거친 weight를 <strong>각 kernel값에 weighted summation한 값을 최종 filter로 사용</strong></p>
+<figure class="align-default" id="id4">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img23.png"><img alt="img2" class="bg-primary mb-1" src="../../_images/img23.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 465 </span><span class="caption-text">Equation of kernel selection method affine layer</span><a class="headerlink" href="#id4" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</li>
+<li><p>이 때 kernel weight 계산에 softmax를 사용하는데 얘는 differentiable하므로 gradient update 가능</p></li>
+<li><p>또, 실제로 N개의 kernel을 모두 filter로 사용할 때보다 computation cost가 굉장히 절약된다는 장점</p></li>
+</ul>
+<p><strong>Interleaving attention with convolution</strong></p>
+<ul>
+<li><p>도입 배경</p>
+<ul>
+<li><p>convolution filter는 receptive field 내부의 local feature 캡처에는 탁월하지만 field 외부의 부분은 학습할 수 없다.</p>
+<figure class="align-default" id="id5">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img33.png"><img alt="img3" class="bg-primary mb-1" src="../../_images/img33.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 466 </span><span class="caption-text">Receptive Field in Convolution Networks</span><a class="headerlink" href="#id5" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</li>
+<li><p>이러한 한계점을 극복하기위해 <strong>attention layer</strong>(<span class="math notranslate nohighlight">\(g_{attn}\)</span>)를 추가</p></li>
+<li><p>단순히 stylegan2에 attention layer를 추가하면 학습에 이상이 생김</p>
+<ul>
+<li><p>원인은 attention layer에서 사용하는 <strong>dot product가 Lipschitz함수가 아니기 때문</strong></p>
+<ul>
+<li><p>lipschitz 함수란</p>
+<ul>
+<li><p>lipschitz 함수란, <strong>두 점 사이의 거리를 일정 비 이상으로 증가시키지 않는 함수</strong></p>
+<figure class="align-default" id="id6">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img42.png"><img alt="img4" class="bg-primary mb-1" src="../../_images/img42.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 467 </span><span class="caption-text">lipschitz Function</span><a class="headerlink" href="#id6" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</li>
+</ul>
+</li>
+</ul>
+</li>
+<li><p><strong>Lipschitz 함수를 만족하지 못함으로써 discriminator의 학습이 unstable</strong>하게 되고 때문에 학습에 실패한다.</p></li>
+</ul>
+</li>
+<li><p>Lipschitz 연속성을 만족시키기 위해 attention의 <strong>dot product를 L2-distance로 대체</strong></p></li>
+</ul>
+</li>
+</ul>
+<p><strong>Advanced setting</strong></p>
+<ul class="simple">
+<li><p>모델의 성능 향상을 위해 stylegan2과 같은 hyper parameter를 사용하고, L2 distance logit의 초기값을 unit normal distribution, scale down, …</p></li>
+<li><p><span class="math notranslate nohighlight">\(\tilde{G}\)</span>의 각 convolution block 마다 attentnion layer(self + cross)를 적용</p>
+<ul>
+<li><p>self attention: 이미지 자신의 global feature 학습용</p></li>
+<li><p>cross attention: 이미지에 text condition 주입용</p></li>
+</ul>
+</li>
+</ul>
+<p><strong>Generator design</strong></p>
+<figure class="align-default" id="id7">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img52.png"><img alt="img5" class="bg-primary mb-1" src="../../_images/img52.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 468 </span><span class="caption-text">GIGAGAN Architecture</span><a class="headerlink" href="#id7" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p>text embedding</p>
+<ul>
+<li><p>pretrained model : <strong>clip</strong>(frozen; <span class="math notranslate nohighlight">\(R^{C\times 768}\)</span>, c is # of tokens)</p></li>
+<li><p>후에 text encoding에 추가적인 유연성을 위해 mapping network(<span class="math notranslate nohighlight">\(T\)</span>)를 추가</p></li>
+<li><p>text mapping network(<span class="math notranslate nohighlight">\(T\)</span>)의 output은 input size와 동일하게 <span class="math notranslate nohighlight">\(R^{C\times 768}\)</span></p></li>
+<li><p>이때 output <span class="math notranslate nohighlight">\(t\)</span>를 두 파트로 나눠 사용하는데</p>
+<ul>
+<li><p><span class="math notranslate nohighlight">\(R^{C-1\times 768}\)</span> : word i index의 local feature를 catch</p></li>
+<li><p><span class="math notranslate nohighlight">\(R^{768}\)</span> : word를 global하게 embedding하는 vector</p></li>
+</ul>
+</li>
+<li><p><span class="math notranslate nohighlight">\(t_{global}\)</span>은 latent mapping network(<span class="math notranslate nohighlight">\(M\)</span>)의 input으로 사용</p></li>
+<li><p><span class="math notranslate nohighlight">\(t_{local}\)</span>은 Generator의 input으로 각 block에 cross attention에 적용</p></li>
+</ul>
+</li>
+<li><p>Generator의 layer는 총 5개의 피라미드 구조</p></li>
+<li><p><span class="math notranslate nohighlight">\(\{x_{i}\}_{i=0}^{L-1} = \{x_{0},x_{1}, ..., x_{4}\}\)</span> ,  <span class="math notranslate nohighlight">\(\{S_{i}\}_{i=0}^{L-1} = \{64, 32, 16, 8, 4\}\)</span></p></li>
+<li><p><span class="math notranslate nohighlight">\(x_{0} = X\)</span>(output image)</p></li>
+</ul>
+<p><strong>Discriminator Design</strong></p>
+<figure class="align-default" id="id8">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img62.png"><img alt="img6" class="bg-primary mb-1" src="../../_images/img62.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 469 </span><span class="caption-text">Discriminator of GIGAGAN</span><a class="headerlink" href="#id8" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul>
+<li><p>2 branch의 구조</p>
+<ol class="arabic simple">
+<li><p>text conditioning을 학습</p></li>
+<li><p>image generating을 학습</p></li>
+</ol>
+</li>
+<li><p><span class="math notranslate nohighlight">\(t_{D}\)</span>: text c로부터 pretrained text encoder(CLIP)를 통과시켜 추출한 embedding 값</p></li>
+<li><p>stylegan에서는 low res의 이미지들은 loss에 반영하지 않았지만, gigagan에서는 <strong>이미지의 low feature 표현의 개선</strong>을 위해 모든 scale의 이미지를 모두 loss에 반영</p></li>
+<li><p>이때 feature extractor <span class="math notranslate nohighlight">\(\Phi\)</span></p>
+<ul class="simple">
+<li><p><span class="math notranslate nohighlight">\(\Phi_{i\rightarrow j}:R^{X_{i}\times X_{i}\times3}\rightarrow R^{X_{j}^{D}\times X_{j}^{D}\times C_{j}}\)</span></p></li>
+<li><p>self attention + conv(with stride=2)</p></li>
+<li><p>각 level에서는 <strong>이전 level에서의 feature map을 공유</strong>해서 사용하고 <strong>low resolution에서 계산이 일어나기때문에</strong> computation resource가 크지 않다.</p></li>
+</ul>
+</li>
+<li><p>multi scale의 loss 계산을 위해 <strong>ms-i/o loss</strong>(multi scale input, output adversarial loss)를 사용</p>
+<ul class="simple">
+<li><p><span class="math notranslate nohighlight">\(V_{MS-I/O}(G, D) = \sum_{i=0}^{L-1}\sum_{j=i+1}^{L}V_{GAN}(G_{i}, D_{ij}) + V_{match}({G_{i}, D_{ij}})\)</span></p></li>
+<li><p><span class="math notranslate nohighlight">\(V_{GAN}\)</span>: standard gan loss</p></li>
+<li><p><span class="math notranslate nohighlight">\(D_{ij}(x, c) = \psi _{j}(\phi_{i\rightarrow j}(x_{i}), t_{D}) + Conv_{1\times 1}(\phi_{i\rightarrow j}(x_{i}))\)</span>:</p>
+<ul>
+<li><p>image의 feature map <span class="math notranslate nohighlight">\(\phi(x)\)</span>에 <span class="math notranslate nohighlight">\(t_{D}\)</span>를 심은 값과 원본 <span class="math notranslate nohighlight">\(\phi(x)\)</span>의 합</p></li>
+<li><p>앞부분은 text conditioning model을 학습, 뒷부분은 unconditional image generator를 학습</p></li>
+</ul>
+</li>
+<li><p><span class="math notranslate nohighlight">\(V_{match}\)</span>(Matching aware loss)</p>
+<ul>
+<li><p>앞의 gan loss는 생성된 이미지가 얼마나 리얼한지, 그리고 condition <span class="math notranslate nohighlight">\(c\)</span>에 얼마나 가까운지에 대한 loss</p></li>
+<li><p>하지만 학습 초반에는 condition <span class="math notranslate nohighlight">\(c\)</span>에 상관없이 이미지의 퀄리티로만 분류를 해버림</p></li>
+<li><p>그래서 discriminator 학습 과정에서 conditioning을 강제로 따르도록 하기위해 <span class="math notranslate nohighlight">\(c\)</span>를 랜덤으로 샘플링(<span class="math notranslate nohighlight">\(\hat{c}\)</span>)해 image의 fake pair로 지정</p></li>
+</ul>
+</li>
+</ul>
+<figure class="align-default" id="id9">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img72.png"><img alt="img7" class="bg-primary mb-1" src="../../_images/img72.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 470 </span><span class="caption-text">Matching aware loss</span><a class="headerlink" href="#id9" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</li>
+<li><p>CLIP contrastive loss</p>
+<ul>
+<li><p>pretrained CLIP을 사용해 contrastive learning</p>
+<ul>
+<li><p>contrastive learning</p>
+<figure class="align-default" id="id10">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img83.png"><img alt="img8" class="bg-primary mb-1" src="../../_images/img83.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 471 </span><span class="caption-text">constrastive learning</span><a class="headerlink" href="#id10" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p>가까운 class는 가깝게(positive), 그 외 class는 멀게(negative)</p></li>
+</ul>
+</li>
+</ul>
+</li>
+<li><p>condition <span class="math notranslate nohighlight">\(c_{0}\)</span>에 대해 생성된 이미지의 feature vector는 condition <span class="math notranslate nohighlight">\(c_{0}\)</span>의 feature 벡터와는 가깝게 (positive), 나머지 condition의 feature vector와는 멀게(negative) 학습되어야한다.</p>
+<figure class="align-default" id="id11">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img92.png"><img alt="img9" class="bg-primary mb-1" src="../../_images/img92.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 472 </span><span class="caption-text">CLIP Loss</span><a class="headerlink" href="#id11" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</li>
+</ul>
+</li>
+<li><p>Vision-aided adversarial loss</p>
+<ul class="simple">
+<li><p><a class="reference external" href="https://arxiv.org/abs/2112.09130">https://arxiv.org/abs/2112.09130</a></p></li>
+<li><p>stylegan에서 discriminator는 training data에 overfitting되는 경향이 있음</p></li>
+<li><p>이부분을 해결하기위해 생성된  image를 SOTA의 vision 모델을 사용해 뽑아낸 feature map을 Discriminator에 같이 추가해 real / fake를 분류</p></li>
+</ul>
+</li>
+</ul>
+<p><strong>GAN based upsampler</strong></p>
+<ul class="simple">
+<li><p>gigagan은 upsampling 모델에 적용이 가능하다</p></li>
+<li><p>64x64의 이미지를 3번 downsampling + 6번 upsampling 모델을 통과시켜 1024x1024의 이미지를 생성할 수 있다.</p></li>
+<li><p>이때 upsampling 부분에 gigagan의 Generator 부분을 적용</p></li>
+<li><p>upsampling model에 적용할때에는 LPIPS loss를 추가하고, 이미지 생성과정중에 stylegan과 동일하게 gaussian noise를 추가해 real함을 추가</p></li>
+</ul>
+</section>
+<section id="experiments">
+<h2>Experiments<a class="headerlink" href="#experiments" title="Permalink to this heading">#</a></h2>
+<p>Training and evaluation details</p>
+<ol class="arabic">
+<li><p>Dataset : LAION2D-en, COYO-700m</p></li>
+<li><p>upsampler(128 → 1024) : Adobe’s internal Stock images</p></li>
+<li><p>Pretrained text  encoder : CLIP ViT-L/14</p></li>
+<li><p>CLIP score : OpenCLIP ViT-G/14</p></li>
+<li><p>machine : A100 gpu, etc.</p></li>
+<li><p><strong>각각의 method가 정말 효과가 있는가?</strong></p>
+<figure class="align-default" id="id12">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img102.png"><img alt="img10" class="bg-primary mb-1" src="../../_images/img102.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 473 </span><span class="caption-text">Table1</span><a class="headerlink" href="#id12" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p>단순 scaling up보다 위 method들을 추가하면 <strong>비교적 낮은 FiD와 CLIP score 수치를 보여준다</strong>.</p></li>
+</ul>
+</li>
+<li><p><strong>Text2Image 모델과 성능 비교</strong></p>
+<figure class="align-default" id="id13">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img112.png"><img alt="img11" class="bg-primary mb-1" src="../../_images/img112.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 474 </span><span class="caption-text">Table2</span><a class="headerlink" href="#id13" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p>FiD와 inference time을 보면 어느정도 균형을 이루며 경쟁력을 보이고 있다.</p></li>
+</ul>
+</li>
+<li><p>Distilled diffusion과 비교</p>
+<figure class="align-default" id="id14">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img122.png"><img alt="img12" class="bg-primary mb-1" src="../../_images/img122.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 475 </span><span class="caption-text">Table3</span><a class="headerlink" href="#id14" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p>diffutsion의 속도 개선을 위해  distilation한 모델</p></li>
+<li><p>수치적으로도 우위에 있고, inference time도 여전히 더 빠르다.</p></li>
+</ul>
+</li>
+<li><p>Upscaler</p>
+<p>!:::{figure-md}
+<img src="../../pics/GIGAGAN/img13.png" alt="img13" class="bg-primary mb-1" width="700px"></p>
+<p>Table4</p>
+<div class="highlight-default notranslate"><div class="highlight"><pre><span></span>
+<span class="p">:::{</span><span class="n">figure</span><span class="o">-</span><span class="n">md</span><span class="p">}</span> 
+<span class="o">&lt;</span><span class="n">img</span> <span class="n">src</span><span class="o">=</span><span class="s2">&quot;../../pics/GIGAGAN/img14.png&quot;</span> <span class="n">alt</span><span class="o">=</span><span class="s2">&quot;img14&quot;</span> <span class="n">class</span><span class="o">=</span><span class="s2">&quot;bg-primary mb-1&quot;</span> <span class="n">width</span><span class="o">=</span><span class="s2">&quot;700px&quot;</span><span class="o">&gt;</span>
+
+<span class="n">Super</span> <span class="n">Resolution</span>
+</pre></div>
+</div>
+</li>
+<li><p>그 외</p>
+<ul class="simple">
+<li><p>stylegan에 연구된 technique(t<strong>runcation trick, style mixing, latent interpolation</strong> 등) 적용 가능</p></li>
+</ul>
+</li>
+</ol>
+<ul>
+<li><p>truncation trick</p>
+<figure class="align-default" id="id15">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img151.png"><img alt="img15" class="bg-primary mb-1" src="../../_images/img151.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 476 </span><span class="caption-text">Truncation Trick</span><a class="headerlink" href="#id15" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</li>
+<li><p>style mixing</p>
+<figure class="align-default" id="id16">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img16.png"><img alt="img16" class="bg-primary mb-1" src="../../_images/img16.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 477 </span><span class="caption-text">Style Mixing</span><a class="headerlink" href="#id16" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</li>
+<li><p>latent interpolation</p>
+<figure class="align-default" id="id17">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img17.png"><img alt="img17" class="bg-primary mb-1" src="../../_images/img17.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 478 </span><span class="caption-text">Latent Interpolation</span><a class="headerlink" href="#id17" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</li>
+</ul>
+<p>Limitation</p>
+<ul>
+<li><p>score 상으로는 어느정도 좋은 성능을 보이지만 <strong>실제로 DALLE2 ,Imagen과 같은 SOTA diffusion 모델과 비교하면 develop이 필요함</strong></p>
+<figure class="align-default" id="id18">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img18.png"><img alt="img18" class="bg-primary mb-1" src="../../_images/img18.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 479 </span><span class="caption-text">Failure Cases</span><a class="headerlink" href="#id18" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</li>
+</ul>
+</section>
+<hr class="docutils" />
+<section id="id1">
+<h2>주요 질의응답<a class="headerlink" href="#id1" title="Permalink to this heading">#</a></h2>
+</section>
+</section>
+
+    <script type="text/x-thebe-config">
+    {
+        requestKernel: true,
+        binderOptions: {
+            repo: "binder-examples/jupyter-stacks-datascience",
+            ref: "master",
+        },
+        codeMirrorConfig: {
+            theme: "abcdef",
+            mode: "python"
+        },
+        kernelOptions: {
+            name: "python3",
+            path: "./docs\review"
+        },
+        predefinedOutput: true
+    }
+    </script>
+    <script>kernelName = 'python3'</script>
+
+                </article>
+              
+
+              
+              
+              
+              
+                <footer class="prev-next-footer d-print-none">
+                  
+<div class="prev-next-area">
+    <a class="left-prev"
+       href="Muse.html"
+       title="previous page">
+      <i class="fa-solid fa-angle-left"></i>
+      <div class="prev-next-info">
+        <p class="prev-next-subtitle">previous</p>
+        <p class="prev-next-title">Muse</p>
+      </div>
+    </a>
+    <a class="right-next"
+       href="consistency_models.html"
+       title="next page">
+      <div class="prev-next-info">
+        <p class="prev-next-subtitle">next</p>
+        <p class="prev-next-title">Consistency Models</p>
+      </div>
+      <i class="fa-solid fa-angle-right"></i>
+    </a>
+</div>
+                </footer>
+              
+            </div>
+            
+            
+              
+                <div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
+
+
+  <div class="sidebar-secondary-item">
+  <div class="page-toc tocsection onthispage">
+    <i class="fa-solid fa-list"></i> Contents
+  </div>
+  <nav class="bd-toc-nav page-toc">
+    <ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#abstract">Abstract</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">Introduction</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#methods">Methods</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#experiments">Experiments</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#id1">주요 질의응답</a></li>
+</ul>
+  </nav></div>
+
+</div></div>
+              
+            
+          </div>
+          <footer class="bd-footer-content">
+            
+<div class="bd-footer-content__inner container">
+  
+  <div class="footer-item">
+    
+<p class="component-author">
+By PseudoLab
+</p>
+
+  </div>
+  
+  <div class="footer-item">
+    
+
+  <p class="copyright">
+    
+      © Copyright 2022.
+      <br/>
+    
+  </p>
+
+  </div>
+  
+  <div class="footer-item">
+    
+  </div>
+  
+  <div class="footer-item">
+    
+  </div>
+  
+</div>
+          </footer>
+        
+
+      </main>
+    </div>
+  </div>
+  
+  <!-- Scripts loaded after <body> so the DOM is not blocked -->
+  <script src="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b"></script>
+<script src="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b"></script>
+
+  <footer class="bd-footer">
+  </footer>
+  </body>
 </html>
\ No newline at end of file
diff --git a/docs/review/GLIDE.html b/docs/review/GLIDE.html
old mode 100644
new mode 100755
index 5aa098d2..9b0f752d
--- a/docs/review/GLIDE.html
+++ b/docs/review/GLIDE.html
@@ -1,855 +1,875 @@
-
-
-<!DOCTYPE html>
-
-
-<html lang="en" data-content_root="" >
-
-  <head>
-    <meta charset="utf-8" />
-    <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.18.1: http://docutils.sourceforge.net/" />
-
-    <title>GLIDE &#8212; Text-to-Image Generation-feat-Diffusion</title>
-  
-  
-  
-  <script data-cfasync="false">
-    document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
-    document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
-  </script>
-  
-  <!-- Loaded before other Sphinx assets -->
-  <link href="../../_static/styles/theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-<link href="../../_static/styles/bootstrap.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-<link href="../../_static/styles/pydata-sphinx-theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-
-  
-  <link href="../../_static/vendor/fontawesome/6.5.2/css/all.min.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-  <link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.woff2" />
-<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.woff2" />
-<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.woff2" />
-
-    <link rel="stylesheet" type="text/css" href="../../_static/pygments.css" />
-    <link rel="stylesheet" href="../../_static/styles/sphinx-book-theme.css?digest=14f4ca6b54d191a8c7657f6c759bf11a5fb86285" type="text/css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/togglebutton.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/mystnb.4510f1fc1dee50b3e5859aac5469c37c29e427902b24a333a5f9fcb2f0b3ac41.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/sphinx-thebe.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/design-style.4045f2051d55cab465a707391d5b2007.min.css" />
-  
-  <!-- Pre-loaded scripts that we'll load fully later -->
-  <link rel="preload" as="script" href="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b" />
-<link rel="preload" as="script" href="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b" />
-  <script src="../../_static/vendor/fontawesome/6.5.2/js/all.min.js?digest=dfe6caa3a7d634c4db9b"></script>
-
-    <script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
-    <script src="../../_static/jquery.js"></script>
-    <script src="../../_static/underscore.js"></script>
-    <script src="../../_static/_sphinx_javascript_frameworks_compat.js"></script>
-    <script src="../../_static/doctools.js"></script>
-    <script src="../../_static/clipboard.min.js"></script>
-    <script src="../../_static/copybutton.js"></script>
-    <script src="../../_static/scripts/sphinx-book-theme.js?digest=5a5c038af52cf7bc1a1ec88eea08e6366ee68824"></script>
-    <script>let toggleHintShow = 'Click to show';</script>
-    <script>let toggleHintHide = 'Click to hide';</script>
-    <script>let toggleOpenOnPrint = 'true';</script>
-    <script src="../../_static/togglebutton.js"></script>
-    <script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
-    <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
-    <script src="../../_static/design-tabs.js"></script>
-    <script>const THEBE_JS_URL = "https://unpkg.com/thebe@0.8.2/lib/index.js"
-const thebe_selector = ".thebe,.cell"
-const thebe_selector_input = "pre"
-const thebe_selector_output = ".output, .cell_output"
-</script>
-    <script async="async" src="../../_static/sphinx-thebe.js"></script>
-    <script>window.MathJax = {"options": {"processHtmlClass": "tex2jax_process|mathjax_process|math|output_area"}}</script>
-    <script defer="defer" src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
-    <script>DOCUMENTATION_OPTIONS.pagename = 'docs/review/GLIDE';</script>
-    <link rel="shortcut icon" href="../../_static/PseudoLab_logo.png"/>
-    <link rel="index" title="Index" href="../../genindex.html" />
-    <link rel="search" title="Search" href="../../search.html" />
-    <link rel="next" title="BBDM" href="BBDM.html" />
-    <link rel="prev" title="Synthetic Data from Diffusion Models Improves ImageNet Classification" href="Synthetic_Data_from_Diffusion_Models_Improves_ImageNet_Classification.html" />
-  <meta name="viewport" content="width=device-width, initial-scale=1"/>
-  <meta name="docsearch:language" content="en"/>
-  </head>
-  
-  
-  <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
-
-  
-  
-  <div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
-  
-  <div id="pst-scroll-pixel-helper"></div>
-  
-  <button type="button" class="btn rounded-pill" id="pst-back-to-top">
-    <i class="fa-solid fa-arrow-up"></i>Back to top</button>
-
-  
-  <input type="checkbox"
-          class="sidebar-toggle"
-          id="pst-primary-sidebar-checkbox"/>
-  <label class="overlay overlay-primary" for="pst-primary-sidebar-checkbox"></label>
-  
-  <input type="checkbox"
-          class="sidebar-toggle"
-          id="pst-secondary-sidebar-checkbox"/>
-  <label class="overlay overlay-secondary" for="pst-secondary-sidebar-checkbox"></label>
-  
-  <div class="search-button__wrapper">
-    <div class="search-button__overlay"></div>
-    <div class="search-button__search-container">
-<form class="bd-search d-flex align-items-center"
-      action="../../search.html"
-      method="get">
-  <i class="fa-solid fa-magnifying-glass"></i>
-  <input type="search"
-         class="form-control"
-         name="q"
-         id="search-input"
-         placeholder="Search this book..."
-         aria-label="Search this book..."
-         autocomplete="off"
-         autocorrect="off"
-         autocapitalize="off"
-         spellcheck="false"/>
-  <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
-</form></div>
-  </div>
-
-  <div class="pst-async-banner-revealer d-none">
-  <aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
-</div>
-
-  
-    <header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
-    </header>
-  
-
-  <div class="bd-container">
-    <div class="bd-container__inner bd-page-width">
-      
-      
-      
-      <div class="bd-sidebar-primary bd-sidebar">
-        
-
-  
-  <div class="sidebar-header-items sidebar-primary__section">
-    
-    
-    
-    
-  </div>
-  
-    <div class="sidebar-primary-items__start sidebar-primary__section">
-        <div class="sidebar-primary-item">
-
-  
-    
-  
-
-<a class="navbar-brand logo" href="../../intro.html">
-  
-  
-  
-  
-  
-    
-    
-      
-    
-    
-    <img src="../../_static/PseudoLab_logo.png" class="logo__image only-light" alt="Text-to-Image Generation-feat-Diffusion - Home"/>
-    <script>document.write(`<img src="../../_static/PseudoLab_logo.png" class="logo__image only-dark" alt="Text-to-Image Generation-feat-Diffusion - Home"/>`);</script>
-  
-  
-</a></div>
-        <div class="sidebar-primary-item"><nav class="bd-links" id="bd-docs-nav" aria-label="Main">
-    <div class="bd-toc-item navbar-nav active">
-        
-        <ul class="nav bd-sidenav bd-sidenav__home-link">
-            <li class="toctree-l1">
-                <a class="reference internal" href="../../intro.html">
-                    Welcome to PseudoDiffusers!!
-                </a>
-            </li>
-        </ul>
-        <p aria-level="2" class="caption" role="heading"><span class="caption-text">Preliminary Works</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="vae.html">VAE</a></li>
-<li class="toctree-l1"><a class="reference internal" href="gan.html">GAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DDPM.html">DDPM</a></li>
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="DDIM.html">DDIM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="A_Study_on_the_Evaluation_of_Generative_Models.html">A Study on the Evaluation of Generative Models</a></li>
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Image Generation</span></p>
-<ul class="current nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="cycleGAN.html">CycleGAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="StyleGAN.html">StyleGAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="diffusion_beats_GANs.html">Diffusion Models Beat GANs on Image Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="dalle.html">DALL-E</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DALLE2.html">DALL-E 2</a></li>
-<li class="toctree-l1"><a class="reference internal" href="dreambooth.html">DreamBooth</a></li>
-<li class="toctree-l1"><a class="reference internal" href="ControlNet.html">ControlNet</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Latent_Diffusion_Model.html">Introduction</a></li>
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="Textual_Inversion.html">Textual Inversion</a></li>
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="CustomDiffusion.html">Custom Diffusion</a></li>
-<li class="toctree-l1"><a class="reference internal" href="LoRA.html">LoRA</a></li>
-
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="I-DDPM.html">I-DDPM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="StyO.html">StyO</a></li>
-<li class="toctree-l1"><a class="reference internal" href="imagen.html">Imagen</a></li>
-<li class="toctree-l1"><a class="reference internal" href="imagen_editor.html">Imagen Editor</a></li>
-<li class="toctree-l1"><a class="reference internal" href="SDEdit.html">SDEdit</a></li>
-<li class="toctree-l1"><a class="reference internal" href="SDXL.html">SDXL</a></li>
-<li class="toctree-l1"><a class="reference internal" href="t2i_adapter.html">T2I-Adapter</a></li>
-<li class="toctree-l1"><a class="reference internal" href="IP_Adapter.html">IP-Adapter</a></li>
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="HyperDreamBooth.html">HyperDreamBooth</a></li>
-<li class="toctree-l1"><a class="reference internal" href="CM3leon.html">CM3leon</a></li>
-
-<li class="toctree-l1"><a class="reference internal" href="Synthetic_Data_from_Diffusion_Models_Improves_ImageNet_Classification.html">Synthetic Data from Diffusion Models Improves ImageNet Classification</a></li>
-
-
-
-
-
-
-<li class="toctree-l1 current active"><a class="current reference internal" href="#">GLIDE</a></li>
-<li class="toctree-l1"><a class="reference internal" href="BBDM.html">BBDM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Your_Diffusion_Model_is_Secretly_a_Zero_Shot_Classifier.html">Your Diffusion Model is Secretly a Zero-Shot Classifier</a></li>
-<li class="toctree-l1"><a class="reference internal" href="progressive_distillation.html">Progressive Distillation for Fast Sampling of Diffusion Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="ConceptLab.html">ConceptLab</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Diffusion_models_already_have_a_Semantic_Latent_Space.html">Diffusion Models already have a Semantic Latent Space</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Muse.html">Muse</a></li>
-
-
-<li class="toctree-l1"><a class="reference internal" href="GIGAGAN.html">Scaling up GANs for Text-to-Image Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="consistency_models.html">Consistency Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="latent_consistency_models.html">Latent Consistency Models</a></li>
-
-<li class="toctree-l1"><a class="reference internal" href="LLM_grounded_Diffusion.html">LLM Grounded Diffusion</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DiT.html">DiT</a></li>
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="one-step-image-translation.html">One-Step Image Translation with Text-to-Image Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="LCM-LoRA.html">LCM-LoRA: A Universal Stable-Diffusion Acceleration Module</a></li>
-
-
-<li class="toctree-l1"><a class="reference internal" href="MimicBrush.html">MimicBrush: Zero-shot Image Editing with Reference Imitation</a></li>
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Video Generation</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="Make_A_Video.html">Make A Video</a></li>
-<li class="toctree-l1"><a class="reference internal" href="VideoLDM.html">VideoLDM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="AnimateDiff.html">AnimateDiff</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Animate_Anyone.html">Animate Anyone</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreaMoving.html">DreaMoving</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreamPose.html">DreamPose: Fashion Image-to-Video Synthesis via Stable Diffusion</a></li>
-
-
-
-
-
-
-
-
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">3D Generation</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="NeRF.html">NeRF : Representing Scenes as Neural Radiance Fields for View Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="3DGS.html">3D Gaussian Splatting for Real-Time Radiance Field Rendering</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Point_E.html">Point-E: A System for Generating 3D Point Clouds from Complex Prompts (Arxiv 2022)</a></li>
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="Shap-E.html">Shap-E</a></li>
-
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="DreamFusion.html"><strong>DreamFusion</strong></a></li>
-<li class="toctree-l1"><a class="reference internal" href="magic-3d.html">Magic3D</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreamBooth3D.html">Dream Booth 3D</a></li>
-
-
-
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Experiments</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="../experiments/js_exp.html">Synthetic Data with Stable Diffusion for Foliar Disease Classification</a></li>
-<li class="toctree-l1"><a class="reference internal" href="../experiments/swjo_exp.html">Training DreamBooth on Naver Webtoon Face Dataset</a></li>
-</ul>
-
-    </div>
-</nav></div>
-    </div>
-  
-  
-  <div class="sidebar-primary-items__end sidebar-primary__section">
-  </div>
-  
-  <div id="rtd-footer-container"></div>
-
-
-      </div>
-      
-      <main id="main-content" class="bd-main" role="main">
-        
-        
-
-<div class="sbt-scroll-pixel-helper"></div>
-
-          <div class="bd-content">
-            <div class="bd-article-container">
-              
-              <div class="bd-header-article d-print-none">
-<div class="header-article-items header-article__inner">
-  
-    <div class="header-article-items__start">
-      
-        <div class="header-article-item"><label class="sidebar-toggle primary-toggle btn btn-sm" for="__primary" title="Toggle primary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
-  <span class="fa-solid fa-bars"></span>
-</label></div>
-      
-    </div>
-  
-  
-    <div class="header-article-items__end">
-      
-        <div class="header-article-item">
-
-<div class="article-header-buttons">
-
-
-
-
-
-<div class="dropdown dropdown-source-buttons">
-  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Source repositories">
-    <i class="fab fa-github"></i>
-  </button>
-  <ul class="dropdown-menu">
-      
-      
-      
-      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion" target="_blank"
-   class="btn btn-sm btn-source-repository-button dropdown-item"
-   title="Source repository"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fab fa-github"></i>
-  </span>
-<span class="btn__text-container">Repository</span>
-</a>
-</li>
-      
-      
-      
-      
-      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion/issues/new?title=Issue%20on%20page%20%2Fdocs/review/GLIDE.html&body=Your%20issue%20content%20here." target="_blank"
-   class="btn btn-sm btn-source-issues-button dropdown-item"
-   title="Open an issue"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-lightbulb"></i>
-  </span>
-<span class="btn__text-container">Open issue</span>
-</a>
-</li>
-      
-  </ul>
-</div>
-
-
-
-
-
-
-<div class="dropdown dropdown-download-buttons">
-  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Download this page">
-    <i class="fas fa-download"></i>
-  </button>
-  <ul class="dropdown-menu">
-      
-      
-      
-      <li><a href="../../_sources/docs/review/GLIDE.md" target="_blank"
-   class="btn btn-sm btn-download-source-button dropdown-item"
-   title="Download source file"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-file"></i>
-  </span>
-<span class="btn__text-container">.md</span>
-</a>
-</li>
-      
-      
-      
-      
-      <li>
-<button onclick="window.print()"
-  class="btn btn-sm btn-download-pdf-button dropdown-item"
-  title="Print to PDF"
-  data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-file-pdf"></i>
-  </span>
-<span class="btn__text-container">.pdf</span>
-</button>
-</li>
-      
-  </ul>
-</div>
-
-
-
-
-<button onclick="toggleFullScreen()"
-  class="btn btn-sm btn-fullscreen-button"
-  title="Fullscreen mode"
-  data-bs-placement="bottom" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-expand"></i>
-  </span>
-
-</button>
-
-
-
-<script>
-document.write(`
-  <button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light"></i>
-    <i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark"></i>
-    <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"></i>
-  </button>
-`);
-</script>
-
-
-<script>
-document.write(`
-  <button class="btn btn-sm pst-navbar-icon search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <i class="fa-solid fa-magnifying-glass fa-lg"></i>
-  </button>
-`);
-</script>
-<label class="sidebar-toggle secondary-toggle btn btn-sm" for="__secondary"title="Toggle secondary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <span class="fa-solid fa-list"></span>
-</label>
-</div></div>
-      
-    </div>
-  
-</div>
-</div>
-              
-              
-
-<div id="jb-print-docs-body" class="onlyprint">
-    <h1>GLIDE</h1>
-    <!-- Table of contents -->
-    <div id="print-main-content">
-        <div id="jb-print-toc">
-            
-            <div>
-                <h2> Contents </h2>
-            </div>
-            <nav aria-label="Page">
-                <ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#abstract">Abstract</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">1. Introduction</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#background">2. Background</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#diffusion-models">2.1 Diffusion Models</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#guided-diffusion">2.2 Guided Diffusion</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#classifier-free-guidance">2.3 Classifier-free guidance</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#clip-guidance">2.4 CLIP guidance</a></li>
-</ul>
-</li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#training">3. Training</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#text-conditional-diffusion-models">3.1 Text-Conditional Diffusion Models</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#fine-tuning-for-classifier-free-guidance">3.2. Fine-tuning for classifier-free guidance</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#image-inpainting">3.3. Image Inpainting</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#noised-clip-models">3.4. Noised CLIP models</a></li>
-</ul>
-</li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#results">4. Results</a></li>
-</ul>
-            </nav>
-        </div>
-    </div>
-</div>
-
-              
-                
-<div id="searchbox"></div>
-                <article class="bd-article">
-                  
-  <div class="admonition-information admonition">
-<p class="admonition-title">Information</p>
-<ul class="simple">
-<li><p><strong>Title:</strong> GLIDE: Towards Photorealistic Image Generation and Editing with Text-Guided Diffusion Models (ICML 2022)</p></li>
-<li><p><strong>Reference</strong></p>
-<ul>
-<li><p>Paper: <a class="reference external" href="https://arxiv.org/abs/2112.10741">https://arxiv.org/abs/2112.10741</a></p></li>
-</ul>
-</li>
-<li><p><strong>Author:</strong> Sehwan Park</p></li>
-<li><p><strong>Last updated on Oct. 20, 2023</strong></p></li>
-</ul>
-</div>
-<section class="tex2jax_ignore mathjax_ignore" id="glide">
-<h1>GLIDE<a class="headerlink" href="#glide" title="Permalink to this heading">#</a></h1>
-<section id="abstract">
-<h2>Abstract<a class="headerlink" href="#abstract" title="Permalink to this heading">#</a></h2>
-<ul class="simple">
-<li><p>GLIDE 기법이 DALL-E보다 human-evaluator 평가가 더 우수하다고 한다.</p></li>
-<li><p>classifier-free Guidance vs CLIP-Guidance(classifier-free Guidance를 결국 사용.)</p></li>
-<li><p>powerful한 text-driven image editing이 가능.</p></li>
-</ul>
-</section>
-<section id="introduction">
-<h2>1. Introduction<a class="headerlink" href="#introduction" title="Permalink to this heading">#</a></h2>
-<p>Natural language로 부터 realistic한 image를 만드는 많은 방법들이 생겨나고 있다. 하지만 text prompts에 정확히 대응하는 photorealistic한 image를 생성하기에는 어려움을 겪고 있다.</p>
-<p>Diffusion model이 DDPM, DDIM 논문을 통해 생성모델의 중심으로 떠오르며 unconditional한 image에 대해서는 SOTA를 찍었다고 한다. 자연스럽게 class-conditional한 image 생성에 대해서도 연구가 이루어졌는데, Diffusion models beat gans on image synthesis라는 논문에서 저자들은 noise한 image에 대해 class를 예측하는 classifier를 추가하여 sampling과정에서 label에 해당하는 이미지를 생성하도록 gradient를 control시키는 classifier guidance 방법을 소개한다. 이후, classifier없이 guidance를 줄 수 있는 classifier-free guidance 방법이 소개되었다.</p>
-<p>이 논문에서는 classifier-free guidance 방법과 기존 diffusion model을 활용하여 text-conditional image synthesis를 잘 수행했다고 보여준다. 추가적으로  pretrained CLIP 모델을 활용하여 CLIP guidance라는 방법을 제시하며 classifier-free guidance와 비교를 한다. 결과적으로는 classifier-free guidance가 더 좋은 성능을 보인다고 한다.</p>
-<p>text prompt를 zero-shot으로 생성하는데에 있어 좋은 성능을 보였으나, 복잡한 prompt에 대한 photorealistc한 image를 생성하는데는 어려움을 겪을 수 있다고 한다. 그래서 이 논문에서는 text-conditional image generation뿐만 아니라 기존 image를 text-prompt를 통해 편집할 수 있는 image impainting기능도 가능하도록 했다고 한다.</p>
-<figure class="align-default" id="id1">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/glide1.png"><img alt="GLIDE_1" class="bg-primary mb-1" src="../../_images/glide1.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 323 </span><span class="caption-text">GLIDE text to image</span><a class="headerlink" href="#id1" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<figure class="align-default" id="id2">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/glide2.png"><img alt="GLIDE_1" class="bg-primary mb-1" src="../../_images/glide2.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 324 </span><span class="caption-text">GLIDE image impainting</span><a class="headerlink" href="#id2" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</section>
-<section id="background">
-<h2>2. Background<a class="headerlink" href="#background" title="Permalink to this heading">#</a></h2>
-<section id="diffusion-models">
-<h3>2.1 Diffusion Models<a class="headerlink" href="#diffusion-models" title="Permalink to this heading">#</a></h3>
-<ul class="simple">
-<li><p>DDPM</p></li>
-</ul>
-<p>DDPM에서는 임의의 time step t로 부터 noise가 껴있는 image <span class="math notranslate nohighlight">\(x_t\)</span>의 <span class="math notranslate nohighlight">\(\epsilon_t\)</span>가 얼만큼인지 예측한다. 예측한 <span class="math notranslate nohighlight">\(\epsilon_t\)</span>를 이용하여 noise가 일부 제거된 이전 step의 mean(<span class="math notranslate nohighlight">\(\mu_{\theta}(x_t)\)</span>)을 구할 수 있고 variance(<span class="math notranslate nohighlight">\(\sum_{\theta}(x_t)\)</span>)는 constant한 값으로 고정시킨다. DDPM에서 제시한 forward process와 reverse process는 다음과 같다.</p>
-<div class="math notranslate nohighlight">
-\[
-q(x_t|x_{t-1}) = \mathcal{N}(x_t; \sqrt{\alpha_{t}}x_{t-1}, (1-\alpha_t)\mathcal{I})
-\]</div>
-<div class="math notranslate nohighlight">
-\[ 
-p_{\theta}(x_{t-1}|x_t) := \mathcal{N}(\mu_{\theta}(x_t), \sum_{\theta}(x_t))
-\]</div>
-<ul class="simple">
-<li><p>Score-based generative modeling through stochastic differential equations</p></li>
-</ul>
-<p>해당 논문에서는 결국 score를 구하는 것과 epsilon을 구하는 것이 결국 같은 방향성을 띤다라고 주장한다.</p>
-<figure class="align-default" id="id3">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/glide5.png"><img alt="GLIDE_1" class="bg-primary mb-1" src="../../_images/glide5.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 325 </span><span class="caption-text">Proof of proportional relationship to finding score and epsilon</span><a class="headerlink" href="#id3" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p>Improved-DDPM</p></li>
-</ul>
-<p><span class="math notranslate nohighlight">\(\sum_{\theta}\)</span>를 constant값으로 고정시킨 이전 DDPM과 달리 해당 논문에서는 <span class="math notranslate nohighlight">\(\sum_{\theta}\)</span> learnable parameter로 설정하여 더 적은 diffusion step만으로 더 좋은 quality의 sample을 만들어낼 수 있다고 제시한다.</p>
-</section>
-<section id="guided-diffusion">
-<h3>2.2 Guided Diffusion<a class="headerlink" href="#guided-diffusion" title="Permalink to this heading">#</a></h3>
-<p>Diffusion model beat GANS on Image Synthesis(Dharwial et al.)에서는 diffusion model을 통해 class-conditional한 image생성을 제시한다. 이 논문에서의 가장 핵심적인 기술이 classifier-guidance이다. noise한 image로부터 epsilon을 예측하는 model은 그대로 유지하되, 해당 noise image가 어떤 class에 속하는지 분류하는 별도의 classifier를 설정한다. 이 classifier의 score를 통해 class-conditional한 전체 과정의 score에게 guide를 주는 방법을 제시한다.</p>
-<figure class="align-default" id="id4">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/glide6.png"><img alt="GLIDE_1" class="bg-primary mb-1" src="../../_images/glide6.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 326 </span><span class="caption-text">Classifier guidance</span><a class="headerlink" href="#id4" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<figure class="align-default" id="id5">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/glide10.png"><img alt="GLIDE_1" class="bg-primary mb-1" src="../../_images/glide10.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 327 </span><span class="caption-text">Classifier guidance</span><a class="headerlink" href="#id5" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</section>
-<section id="classifier-free-guidance">
-<h3>2.3 Classifier-free guidance<a class="headerlink" href="#classifier-free-guidance" title="Permalink to this heading">#</a></h3>
-<p>classifier를 통해 class-conditional한 image생성을 하는 방법이 위에 소개되었는데, 이 방법은 noise한 image에 대해서 classifiy를 해야하므로 pretrained model을 사용할 수 없고 모델 규모가 너무 heavy해지는 등 몇몇 문제점을 가지고 있었다. 이 방법에 대한 개선점을 Classifier-Free Diffusion Guidance(Ho et al.)에서 Classifer-free guidance라는 기법으로 제시한다. 위의 score 식에서 약간의 변형을 통해 classifier 없이 단일 model만으로 guidance를 줄 수 있는 방법을 제시한다.</p>
-<figure class="align-default" id="id6">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/glide7.png"><img alt="GLIDE_1" class="bg-primary mb-1" src="../../_images/glide7.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 328 </span><span class="caption-text">Classifier-free guidance</span><a class="headerlink" href="#id6" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</section>
-<section id="clip-guidance">
-<h3>2.4 CLIP guidance<a class="headerlink" href="#clip-guidance" title="Permalink to this heading">#</a></h3>
-<p>CLIP은 텍스트와 이미지 사이의 joint representation을 학습할 수 있는 모델이다. Image encoder f(x)와 Text encoder g(c)로 이루어져 있다. (x,c) 즉 이미지와 이미지 캡션 쌍으로 이루어진 대규모 데이터를 이용해 contrastive learning을 진행시킨 모델이다. 같은 의미를 가진 positive pair에 대해서는 f(x) · g(c)(유사도)가 커지도록 negative pair에 대해서는 f(x) · g(c)가 작아지도록 하는 것이다. CLIP guidance에서는 classifier guidance에서 classifier대신에 pretrained CLIP모델을 사용한다. 따라서 guidance를 주는 방식도 classifier대신 CLIP모델을 통해 구한 noise한 image x와 주어진 text간의 유사도를 이용한다.</p>
-<figure class="align-default" id="id7">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/glide8.png"><img alt="GLIDE_1" class="bg-primary mb-1" src="../../_images/glide8.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 329 </span><span class="caption-text">CLIP</span><a class="headerlink" href="#id7" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<figure class="align-default" id="id8">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/glide9.png"><img alt="GLIDE_1" class="bg-primary mb-1" src="../../_images/glide9.png" style="width: 600px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 330 </span><span class="caption-text">CLIP guidance</span><a class="headerlink" href="#id8" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</section>
-</section>
-<section id="training">
-<h2>3. Training<a class="headerlink" href="#training" title="Permalink to this heading">#</a></h2>
-<p>실험에서 3.5 billion parameter의 text-conditional diffusion model을 64x64 resolution을 위해 사용했고 또다른 1.5 billion parameter의 text-conditional upsampling diffusion model을 256x256으로 resolution을 증가시키는데 사용하였다고 한다. 또한, CLIP guidance를 위해 noised 64x64 ViT-L CLIP model을 사용했다고 한다.</p>
-<section id="text-conditional-diffusion-models">
-<h3>3.1 Text-Conditional Diffusion Models<a class="headerlink" href="#text-conditional-diffusion-models" title="Permalink to this heading">#</a></h3>
-<p>Improved DDPM의 ADM model을 base로 text-conditioning을 추가하여 학습을 진행하였다. 주어진 noised image <span class="math notranslate nohighlight">\(x_t\)</span>로부터 <span class="math notranslate nohighlight">\(x_{t-1}\)</span>을 예측하는 <span class="math notranslate nohighlight">\(p_{\theta}(x_{t-1}|x_t,c)\)</span>를 수행해야한다. text를 condition으로 주기 위해서 우선 주어진 text를 K개의 token으로 encoding한 후, Transformer model에 input값으로 넣어준다. Transformer output의 마지막 embedding token과 positional encoding을 통해 나온 time step embedding token을 연산하고자 하는 크기에 맞게 linear projection하여 더한 후, residual block을 거친 image와 AdaIN기법을 통해 residual block의 output을 도출한다. Transformer output의 마지막 layer는 연산하고자 하는 크기에 맞게 linear projection하여 residual block뒤에 붙는 attention block에 이용한다.</p>
-<p>학습 데이터셋은 DALL-E와 같은 데이터셋을 사용하였고 model architecture로는 기존  ADM model보다 더 scale up된 model과 1.2B paremeters를 갖는 Transformer를 사용했다고 한다. 게다가 64x64 image를 256x256 image로 upsampling하는 Upsampler model도 학습시켰다고 한다. upsampler model은 Improved DDPM에서의 ImageNet Upsampler와 거의 비슷하다고 한다.</p>
-</section>
-<section id="fine-tuning-for-classifier-free-guidance">
-<h3>3.2. Fine-tuning for classifier-free guidance<a class="headerlink" href="#fine-tuning-for-classifier-free-guidance" title="Permalink to this heading">#</a></h3>
-<p>처음 training을 진행했을때는, text를 condition으로 준 conditional image generation에 맞춰 training을 진행했다고 한다. 이 후, unconditional image generation의 성능을 위해 데이터셋의 약 20%의 text condition에 empty sequence를 주고 training을 진행했다고 한다.</p>
-</section>
-<section id="image-inpainting">
-<h3>3.3. Image Inpainting<a class="headerlink" href="#image-inpainting" title="Permalink to this heading">#</a></h3>
-<p>이전 연구에서는, impainting을 위해 diffusion model로 학습시키는 과정을 거치지 않았다. diffusion model로 sampling을 한 후, 알려진 영역에 대해서는 <span class="math notranslate nohighlight">\(q(x_t|x_0)\)</span>로 대체하는 방식을 사용했기에 model이 sampling을 하는 과정에서 전체 context를 참조할 수 없다는 단점이 있었다.</p>
-<p>이 논문에서는 fine-tuning과정에서 training example의 임의의 부분을 지운다음, 남은 부분은 모델에 추가적인 조건 정보로서 마스크 채널과 함께 입력되도록 설계하였다.</p>
-</section>
-<section id="noised-clip-models">
-<h3>3.4. Noised CLIP models<a class="headerlink" href="#noised-clip-models" title="Permalink to this heading">#</a></h3>
-<p>classifier guidance에 더 적합하게 훈련시키기 위해 clip guidance를 사용해서 classifier-free guidance와 비교했음을 위에서 언급했다. clip guidance를 사용하기 위해 저자들은 noise image에 대해 학습시킨 Noised CLIP models를 사용했음을 밝힌다. 위에서 언급했듯이 결과는 classifier-free guidance가 더 좋았다고 한다.</p>
-<figure class="align-default" id="id9">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/glide14.png"><img alt="GLIDE_1" class="bg-primary mb-1" src="../../_images/glide14.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 331 </span><span class="caption-text">comparison between CLIP guidance and classifier-free guidance</span><a class="headerlink" href="#id9" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</section>
-</section>
-<section id="results">
-<h2>4. Results<a class="headerlink" href="#results" title="Permalink to this heading">#</a></h2>
-<figure class="align-default" id="id10">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/glide12.png"><img alt="GLIDE_1" class="bg-primary mb-1" src="../../_images/glide12.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 332 </span><span class="caption-text">Quantitive Results</span><a class="headerlink" href="#id10" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>논문에서는 classifier-free guidance와 CLIP guidance에 대해 Precision과 Recall, FID와 IS, CLIP score와 FID 간의 명확한 trade-off 를 관찰하고 있다고 언급한다.</p>
-<p>위의 (a)와 (b)에 대해서는 classifier-free guidance가 거의 최적으로 수행되었으며, classifier-free guidance가 훨씬 강력한 방법임을 보여주고 있다. 반면, (c)에서는 CLIP guidance가 CLIP 점수를 classifier-free guidance에 비해 상당히 향상시킬 수 있는 것으로 보인다. 저자들은 CLIP guidance가 주로 CLIP 모델의 평가에 따라 이미지를 생성하는 데 중점을 둘 수 있지만, 특정 prompt 또는 caption과 일치시키는 데 뛰어나지 않을 수 있다는 가설을 제시한다. 이 가설을 확인하기 위해 저자들은 인간 평가자를 활용한 실험을 진행하였고 인간들이 CLIP 점수와 다른 의견을 가지며, classifier-free guida  nce가 해당 prompt와 더 일치하는 더 높은 품질의 샘플을 생성한다고 판단했다.</p>
-<figure class="align-default" id="id11">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/glide13.png"><img alt="GLIDE_1" class="bg-primary mb-1" src="../../_images/glide13.png" style="width: 500px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 333 </span><span class="caption-text">Zero-shot FID results</span><a class="headerlink" href="#id11" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>Table1은 Unguided, CLIP guidance, Classifier-free guidance 기법을 각각 적용한 256x256 resolution image에 대해 human evaluation을 진행한 결과이다. Classifier-free guidance를 적용한 결과가 photorealism, caption 항목에 대해 압도적인 결과를 보임을 알 수 있다.</p>
-<p>Table2는 GLIDE와 다른 text-conditional image generation model들을 비교한 표이다. MS-COCO dataset에 대해 생성된 image의 FID score를 구하였다. GLIDE model이 MS-COCO에 대해 학습한 경험이 없음에도 불구하고 Zero-shot FID 부분을 보면 상당히 좋은 result를 보임을 알 수 있다.</p>
-<figure class="align-default" id="id12">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/glide15.png"><img alt="GLIDE_1" class="bg-primary mb-1" src="../../_images/glide15.png" style="width: 500px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 334 </span><span class="caption-text">final results</span><a class="headerlink" href="#id12" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</section>
-</section>
-
-    <script type="text/x-thebe-config">
-    {
-        requestKernel: true,
-        binderOptions: {
-            repo: "binder-examples/jupyter-stacks-datascience",
-            ref: "master",
-        },
-        codeMirrorConfig: {
-            theme: "abcdef",
-            mode: "python"
-        },
-        kernelOptions: {
-            name: "python3",
-            path: "./docs/review"
-        },
-        predefinedOutput: true
-    }
-    </script>
-    <script>kernelName = 'python3'</script>
-
-                </article>
-              
-
-              
-              
-              
-              
-                <footer class="prev-next-footer d-print-none">
-                  
-<div class="prev-next-area">
-    <a class="left-prev"
-       href="Synthetic_Data_from_Diffusion_Models_Improves_ImageNet_Classification.html"
-       title="previous page">
-      <i class="fa-solid fa-angle-left"></i>
-      <div class="prev-next-info">
-        <p class="prev-next-subtitle">previous</p>
-        <p class="prev-next-title">Synthetic Data from Diffusion Models Improves ImageNet Classification</p>
-      </div>
-    </a>
-    <a class="right-next"
-       href="BBDM.html"
-       title="next page">
-      <div class="prev-next-info">
-        <p class="prev-next-subtitle">next</p>
-        <p class="prev-next-title">BBDM</p>
-      </div>
-      <i class="fa-solid fa-angle-right"></i>
-    </a>
-</div>
-                </footer>
-              
-            </div>
-            
-            
-              
-                <div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
-
-
-  <div class="sidebar-secondary-item">
-  <div class="page-toc tocsection onthispage">
-    <i class="fa-solid fa-list"></i> Contents
-  </div>
-  <nav class="bd-toc-nav page-toc">
-    <ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#abstract">Abstract</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">1. Introduction</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#background">2. Background</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#diffusion-models">2.1 Diffusion Models</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#guided-diffusion">2.2 Guided Diffusion</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#classifier-free-guidance">2.3 Classifier-free guidance</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#clip-guidance">2.4 CLIP guidance</a></li>
-</ul>
-</li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#training">3. Training</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#text-conditional-diffusion-models">3.1 Text-Conditional Diffusion Models</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#fine-tuning-for-classifier-free-guidance">3.2. Fine-tuning for classifier-free guidance</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#image-inpainting">3.3. Image Inpainting</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#noised-clip-models">3.4. Noised CLIP models</a></li>
-</ul>
-</li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#results">4. Results</a></li>
-</ul>
-  </nav></div>
-
-</div></div>
-              
-            
-          </div>
-          <footer class="bd-footer-content">
-            
-<div class="bd-footer-content__inner container">
-  
-  <div class="footer-item">
-    
-<p class="component-author">
-By PseudoLab
-</p>
-
-  </div>
-  
-  <div class="footer-item">
-    
-
-  <p class="copyright">
-    
-      © Copyright 2022.
-      <br/>
-    
-  </p>
-
-  </div>
-  
-  <div class="footer-item">
-    
-  </div>
-  
-  <div class="footer-item">
-    
-  </div>
-  
-</div>
-          </footer>
-        
-
-      </main>
-    </div>
-  </div>
-  
-  <!-- Scripts loaded after <body> so the DOM is not blocked -->
-  <script src="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b"></script>
-<script src="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b"></script>
-
-  <footer class="bd-footer">
-  </footer>
-  </body>
+
+
+<!DOCTYPE html>
+
+
+<html lang="en" data-content_root="" >
+
+  <head>
+    <meta charset="utf-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.17.1: http://docutils.sourceforge.net/" />
+
+    <title>GLIDE &#8212; Text-to-Image Generation-feat-Diffusion</title>
+  
+  
+  
+  <script data-cfasync="false">
+    document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
+    document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
+  </script>
+  
+  <!-- Loaded before other Sphinx assets -->
+  <link href="../../_static/styles/theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+<link href="../../_static/styles/bootstrap.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+<link href="../../_static/styles/pydata-sphinx-theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+
+  
+  <link href="../../_static/vendor/fontawesome/6.5.2/css/all.min.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+  <link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.woff2" />
+<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.woff2" />
+<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.woff2" />
+
+    <link rel="stylesheet" type="text/css" href="../../_static/pygments.css" />
+    <link rel="stylesheet" href="../../_static/styles/sphinx-book-theme.css?digest=14f4ca6b54d191a8c7657f6c759bf11a5fb86285" type="text/css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/togglebutton.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/mystnb.4510f1fc1dee50b3e5859aac5469c37c29e427902b24a333a5f9fcb2f0b3ac41.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/sphinx-thebe.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/design-style.4045f2051d55cab465a707391d5b2007.min.css" />
+  
+  <!-- Pre-loaded scripts that we'll load fully later -->
+  <link rel="preload" as="script" href="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b" />
+<link rel="preload" as="script" href="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b" />
+  <script src="../../_static/vendor/fontawesome/6.5.2/js/all.min.js?digest=dfe6caa3a7d634c4db9b"></script>
+
+    <script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
+    <script src="../../_static/jquery.js"></script>
+    <script src="../../_static/underscore.js"></script>
+    <script src="../../_static/_sphinx_javascript_frameworks_compat.js"></script>
+    <script src="../../_static/doctools.js"></script>
+    <script src="../../_static/clipboard.min.js"></script>
+    <script src="../../_static/copybutton.js"></script>
+    <script src="../../_static/scripts/sphinx-book-theme.js?digest=5a5c038af52cf7bc1a1ec88eea08e6366ee68824"></script>
+    <script>let toggleHintShow = 'Click to show';</script>
+    <script>let toggleHintHide = 'Click to hide';</script>
+    <script>let toggleOpenOnPrint = 'true';</script>
+    <script src="../../_static/togglebutton.js"></script>
+    <script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
+    <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
+    <script src="../../_static/design-tabs.js"></script>
+    <script>const THEBE_JS_URL = "https://unpkg.com/thebe@0.8.2/lib/index.js"
+const thebe_selector = ".thebe,.cell"
+const thebe_selector_input = "pre"
+const thebe_selector_output = ".output, .cell_output"
+</script>
+    <script async="async" src="../../_static/sphinx-thebe.js"></script>
+    <script>window.MathJax = {"options": {"processHtmlClass": "tex2jax_process|mathjax_process|math|output_area"}}</script>
+    <script defer="defer" src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
+    <script>DOCUMENTATION_OPTIONS.pagename = 'docs/review/GLIDE';</script>
+    <link rel="shortcut icon" href="../../_static/PseudoLab_logo.png"/>
+    <link rel="index" title="Index" href="../../genindex.html" />
+    <link rel="search" title="Search" href="../../search.html" />
+    <link rel="next" title="BBDM" href="BBDM.html" />
+    <link rel="prev" title="Synthetic Data from Diffusion Models Improves ImageNet Classification" href="Synthetic_Data_from_Diffusion_Models_Improves_ImageNet_Classification.html" />
+  <meta name="viewport" content="width=device-width, initial-scale=1"/>
+  <meta name="docsearch:language" content="en"/>
+  </head>
+  
+  
+  <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
+
+  
+  
+  <div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
+  
+  <div id="pst-scroll-pixel-helper"></div>
+  
+  <button type="button" class="btn rounded-pill" id="pst-back-to-top">
+    <i class="fa-solid fa-arrow-up"></i>Back to top</button>
+
+  
+  <input type="checkbox"
+          class="sidebar-toggle"
+          id="pst-primary-sidebar-checkbox"/>
+  <label class="overlay overlay-primary" for="pst-primary-sidebar-checkbox"></label>
+  
+  <input type="checkbox"
+          class="sidebar-toggle"
+          id="pst-secondary-sidebar-checkbox"/>
+  <label class="overlay overlay-secondary" for="pst-secondary-sidebar-checkbox"></label>
+  
+  <div class="search-button__wrapper">
+    <div class="search-button__overlay"></div>
+    <div class="search-button__search-container">
+<form class="bd-search d-flex align-items-center"
+      action="../../search.html"
+      method="get">
+  <i class="fa-solid fa-magnifying-glass"></i>
+  <input type="search"
+         class="form-control"
+         name="q"
+         id="search-input"
+         placeholder="Search this book..."
+         aria-label="Search this book..."
+         autocomplete="off"
+         autocorrect="off"
+         autocapitalize="off"
+         spellcheck="false"/>
+  <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
+</form></div>
+  </div>
+
+  <div class="pst-async-banner-revealer d-none">
+  <aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
+</div>
+
+  
+    <header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
+    </header>
+  
+
+  <div class="bd-container">
+    <div class="bd-container__inner bd-page-width">
+      
+      
+      
+      <div class="bd-sidebar-primary bd-sidebar">
+        
+
+  
+  <div class="sidebar-header-items sidebar-primary__section">
+    
+    
+    
+    
+  </div>
+  
+    <div class="sidebar-primary-items__start sidebar-primary__section">
+        <div class="sidebar-primary-item">
+
+  
+    
+  
+
+<a class="navbar-brand logo" href="../../intro.html">
+  
+  
+  
+  
+  
+    
+    
+      
+    
+    
+    <img src="../../_static/PseudoLab_logo.png" class="logo__image only-light" alt="Text-to-Image Generation-feat-Diffusion - Home"/>
+    <script>document.write(`<img src="../../_static/PseudoLab_logo.png" class="logo__image only-dark" alt="Text-to-Image Generation-feat-Diffusion - Home"/>`);</script>
+  
+  
+</a></div>
+        <div class="sidebar-primary-item"><nav class="bd-links" id="bd-docs-nav" aria-label="Main">
+    <div class="bd-toc-item navbar-nav active">
+        
+        <ul class="nav bd-sidenav bd-sidenav__home-link">
+            <li class="toctree-l1">
+                <a class="reference internal" href="../../intro.html">
+                    Welcome to PseudoDiffusers!!
+                </a>
+            </li>
+        </ul>
+        <p aria-level="2" class="caption" role="heading"><span class="caption-text">Preliminary Works</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="vae.html">VAE</a></li>
+<li class="toctree-l1"><a class="reference internal" href="gan.html">GAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DDPM.html">DDPM</a></li>
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DDIM.html">DDIM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="A_Study_on_the_Evaluation_of_Generative_Models.html">A Study on the Evaluation of Generative Models</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Image Generation</span></p>
+<ul class="current nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="cycleGAN.html">CycleGAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="StyleGAN.html">StyleGAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="diffusion_beats_GANs.html">Diffusion Models Beat GANs on Image Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="dalle.html">DALL-E</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DALLE2.html">DALL-E 2</a></li>
+<li class="toctree-l1"><a class="reference internal" href="dreambooth.html">DreamBooth</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ControlNet.html">ControlNet</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Latent_Diffusion_Model.html">Introduction</a></li>
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Textual_Inversion.html">Textual Inversion</a></li>
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="CustomDiffusion.html">Custom Diffusion</a></li>
+<li class="toctree-l1"><a class="reference internal" href="LoRA.html">LoRA</a></li>
+
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="I-DDPM.html">I-DDPM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="StyO.html">StyO</a></li>
+<li class="toctree-l1"><a class="reference internal" href="imagen.html">Imagen</a></li>
+<li class="toctree-l1"><a class="reference internal" href="imagen_editor.html">Imagen Editor</a></li>
+<li class="toctree-l1"><a class="reference internal" href="SDEdit.html">SDEdit</a></li>
+<li class="toctree-l1"><a class="reference internal" href="SDXL.html">SDXL</a></li>
+<li class="toctree-l1"><a class="reference internal" href="t2i_adapter.html">T2I-Adapter</a></li>
+<li class="toctree-l1"><a class="reference internal" href="IP_Adapter.html">IP-Adapter</a></li>
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="HyperDreamBooth.html">HyperDreamBooth</a></li>
+<li class="toctree-l1"><a class="reference internal" href="CM3leon.html">CM3leon</a></li>
+
+<li class="toctree-l1"><a class="reference internal" href="Synthetic_Data_from_Diffusion_Models_Improves_ImageNet_Classification.html">Synthetic Data from Diffusion Models Improves ImageNet Classification</a></li>
+
+
+
+
+
+
+<li class="toctree-l1 current active"><a class="current reference internal" href="#">GLIDE</a></li>
+<li class="toctree-l1"><a class="reference internal" href="BBDM.html">BBDM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Your_Diffusion_Model_is_Secretly_a_Zero_Shot_Classifier.html">Your Diffusion Model is Secretly a Zero-Shot Classifier</a></li>
+<li class="toctree-l1"><a class="reference internal" href="progressive_distillation.html">Progressive Distillation for Fast Sampling of Diffusion Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ConceptLab.html">ConceptLab</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Diffusion_models_already_have_a_Semantic_Latent_Space.html">Diffusion Models already have a Semantic Latent Space</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Muse.html">Muse</a></li>
+
+
+<li class="toctree-l1"><a class="reference internal" href="GIGAGAN.html">Scaling up GANs for Text-to-Image Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="consistency_models.html">Consistency Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="latent_consistency_models.html">Latent Consistency Models</a></li>
+
+<li class="toctree-l1"><a class="reference internal" href="LLM_grounded_Diffusion.html">LLM Grounded Diffusion</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DiT.html">DiT</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="one-step-image-translation.html">One-Step Image Translation with Text-to-Image Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="LCM-LoRA.html">LCM-LoRA: A Universal Stable-Diffusion Acceleration Module</a></li>
+
+
+<li class="toctree-l1"><a class="reference internal" href="MimicBrush.html">MimicBrush: Zero-shot Image Editing with Reference Imitation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="one_step_diffusion_with_distribution_matching_distillation.html">One-step Diffusion with Distribution Matching Distillation</a></li>
+
+
+
+
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Video Generation</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="Make_A_Video.html">Make A Video</a></li>
+<li class="toctree-l1"><a class="reference internal" href="VideoLDM.html">VideoLDM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="AnimateDiff.html">AnimateDiff</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Animate_Anyone.html">Animate Anyone</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreaMoving.html">DreaMoving</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreamPose.html">DreamPose: Fashion Image-to-Video Synthesis via Stable Diffusion</a></li>
+
+
+
+
+
+
+
+
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">3D Generation</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="NeRF.html">NeRF : Representing Scenes as Neural Radiance Fields for View Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="3DGS.html">3D Gaussian Splatting for Real-Time Radiance Field Rendering</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Point_E.html">Point-E: A System for Generating 3D Point Clouds from Complex Prompts (Arxiv 2022)</a></li>
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Shap-E.html">Shap-E</a></li>
+
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DreamFusion.html"><strong>DreamFusion</strong></a></li>
+<li class="toctree-l1"><a class="reference internal" href="magic-3d.html">Magic3D</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreamBooth3D.html">Dream Booth 3D</a></li>
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="zero123plus.html">Zero123++</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ProlificDreamer.html">ProlificDreamer: High-Fidelity and Diverse Text-to-3D Generation with Variational Score Distillation</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DreamGaussian.html">DreamGaussian</a></li>
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Coin3D.html">Coin3D</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Experiments</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="../experiments/js_exp.html">Synthetic Data with Stable Diffusion for Foliar Disease Classification</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../experiments/swjo_exp.html">Training DreamBooth on Naver Webtoon Face Dataset</a></li>
+</ul>
+
+    </div>
+</nav></div>
+    </div>
+  
+  
+  <div class="sidebar-primary-items__end sidebar-primary__section">
+  </div>
+  
+  <div id="rtd-footer-container"></div>
+
+
+      </div>
+      
+      <main id="main-content" class="bd-main" role="main">
+        
+        
+
+<div class="sbt-scroll-pixel-helper"></div>
+
+          <div class="bd-content">
+            <div class="bd-article-container">
+              
+              <div class="bd-header-article d-print-none">
+<div class="header-article-items header-article__inner">
+  
+    <div class="header-article-items__start">
+      
+        <div class="header-article-item"><label class="sidebar-toggle primary-toggle btn btn-sm" for="__primary" title="Toggle primary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
+  <span class="fa-solid fa-bars"></span>
+</label></div>
+      
+    </div>
+  
+  
+    <div class="header-article-items__end">
+      
+        <div class="header-article-item">
+
+<div class="article-header-buttons">
+
+
+
+
+
+<div class="dropdown dropdown-source-buttons">
+  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Source repositories">
+    <i class="fab fa-github"></i>
+  </button>
+  <ul class="dropdown-menu">
+      
+      
+      
+      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion" target="_blank"
+   class="btn btn-sm btn-source-repository-button dropdown-item"
+   title="Source repository"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fab fa-github"></i>
+  </span>
+<span class="btn__text-container">Repository</span>
+</a>
+</li>
+      
+      
+      
+      
+      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion/issues/new?title=Issue%20on%20page%20%2Fdocs/review/GLIDE.html&body=Your%20issue%20content%20here." target="_blank"
+   class="btn btn-sm btn-source-issues-button dropdown-item"
+   title="Open an issue"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-lightbulb"></i>
+  </span>
+<span class="btn__text-container">Open issue</span>
+</a>
+</li>
+      
+  </ul>
+</div>
+
+
+
+
+
+
+<div class="dropdown dropdown-download-buttons">
+  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Download this page">
+    <i class="fas fa-download"></i>
+  </button>
+  <ul class="dropdown-menu">
+      
+      
+      
+      <li><a href="../../_sources/docs/review/GLIDE.md" target="_blank"
+   class="btn btn-sm btn-download-source-button dropdown-item"
+   title="Download source file"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-file"></i>
+  </span>
+<span class="btn__text-container">.md</span>
+</a>
+</li>
+      
+      
+      
+      
+      <li>
+<button onclick="window.print()"
+  class="btn btn-sm btn-download-pdf-button dropdown-item"
+  title="Print to PDF"
+  data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-file-pdf"></i>
+  </span>
+<span class="btn__text-container">.pdf</span>
+</button>
+</li>
+      
+  </ul>
+</div>
+
+
+
+
+<button onclick="toggleFullScreen()"
+  class="btn btn-sm btn-fullscreen-button"
+  title="Fullscreen mode"
+  data-bs-placement="bottom" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-expand"></i>
+  </span>
+
+</button>
+
+
+
+<script>
+document.write(`
+  <button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light"></i>
+    <i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark"></i>
+    <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"></i>
+  </button>
+`);
+</script>
+
+
+<script>
+document.write(`
+  <button class="btn btn-sm pst-navbar-icon search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <i class="fa-solid fa-magnifying-glass fa-lg"></i>
+  </button>
+`);
+</script>
+<label class="sidebar-toggle secondary-toggle btn btn-sm" for="__secondary"title="Toggle secondary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <span class="fa-solid fa-list"></span>
+</label>
+</div></div>
+      
+    </div>
+  
+</div>
+</div>
+              
+              
+
+<div id="jb-print-docs-body" class="onlyprint">
+    <h1>GLIDE</h1>
+    <!-- Table of contents -->
+    <div id="print-main-content">
+        <div id="jb-print-toc">
+            
+            <div>
+                <h2> Contents </h2>
+            </div>
+            <nav aria-label="Page">
+                <ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#abstract">Abstract</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">1. Introduction</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#background">2. Background</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#diffusion-models">2.1 Diffusion Models</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#guided-diffusion">2.2 Guided Diffusion</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#classifier-free-guidance">2.3 Classifier-free guidance</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#clip-guidance">2.4 CLIP guidance</a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#training">3. Training</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#text-conditional-diffusion-models">3.1 Text-Conditional Diffusion Models</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#fine-tuning-for-classifier-free-guidance">3.2. Fine-tuning for classifier-free guidance</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#image-inpainting">3.3. Image Inpainting</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#noised-clip-models">3.4. Noised CLIP models</a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#results">4. Results</a></li>
+</ul>
+            </nav>
+        </div>
+    </div>
+</div>
+
+              
+                
+<div id="searchbox"></div>
+                <article class="bd-article">
+                  
+  <div class="admonition-information admonition">
+<p class="admonition-title">Information</p>
+<ul class="simple">
+<li><p><strong>Title:</strong> GLIDE: Towards Photorealistic Image Generation and Editing with Text-Guided Diffusion Models (ICML 2022)</p></li>
+<li><p><strong>Reference</strong></p>
+<ul>
+<li><p>Paper: <a class="reference external" href="https://arxiv.org/abs/2112.10741">https://arxiv.org/abs/2112.10741</a></p></li>
+</ul>
+</li>
+<li><p><strong>Author:</strong> Sehwan Park</p></li>
+<li><p><strong>Last updated on Oct. 20, 2023</strong></p></li>
+</ul>
+</div>
+<section class="tex2jax_ignore mathjax_ignore" id="glide">
+<h1>GLIDE<a class="headerlink" href="#glide" title="Permalink to this heading">#</a></h1>
+<section id="abstract">
+<h2>Abstract<a class="headerlink" href="#abstract" title="Permalink to this heading">#</a></h2>
+<ul class="simple">
+<li><p>GLIDE 기법이 DALL-E보다 human-evaluator 평가가 더 우수하다고 한다.</p></li>
+<li><p>classifier-free Guidance vs CLIP-Guidance(classifier-free Guidance를 결국 사용.)</p></li>
+<li><p>powerful한 text-driven image editing이 가능.</p></li>
+</ul>
+</section>
+<section id="introduction">
+<h2>1. Introduction<a class="headerlink" href="#introduction" title="Permalink to this heading">#</a></h2>
+<p>Natural language로 부터 realistic한 image를 만드는 많은 방법들이 생겨나고 있다. 하지만 text prompts에 정확히 대응하는 photorealistic한 image를 생성하기에는 어려움을 겪고 있다.</p>
+<p>Diffusion model이 DDPM, DDIM 논문을 통해 생성모델의 중심으로 떠오르며 unconditional한 image에 대해서는 SOTA를 찍었다고 한다. 자연스럽게 class-conditional한 image 생성에 대해서도 연구가 이루어졌는데, Diffusion models beat gans on image synthesis라는 논문에서 저자들은 noise한 image에 대해 class를 예측하는 classifier를 추가하여 sampling과정에서 label에 해당하는 이미지를 생성하도록 gradient를 control시키는 classifier guidance 방법을 소개한다. 이후, classifier없이 guidance를 줄 수 있는 classifier-free guidance 방법이 소개되었다.</p>
+<p>이 논문에서는 classifier-free guidance 방법과 기존 diffusion model을 활용하여 text-conditional image synthesis를 잘 수행했다고 보여준다. 추가적으로  pretrained CLIP 모델을 활용하여 CLIP guidance라는 방법을 제시하며 classifier-free guidance와 비교를 한다. 결과적으로는 classifier-free guidance가 더 좋은 성능을 보인다고 한다.</p>
+<p>text prompt를 zero-shot으로 생성하는데에 있어 좋은 성능을 보였으나, 복잡한 prompt에 대한 photorealistc한 image를 생성하는데는 어려움을 겪을 수 있다고 한다. 그래서 이 논문에서는 text-conditional image generation뿐만 아니라 기존 image를 text-prompt를 통해 편집할 수 있는 image impainting기능도 가능하도록 했다고 한다.</p>
+<figure class="align-default" id="id1">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/glide1.png"><img alt="GLIDE_1" class="bg-primary mb-1" src="../../_images/glide1.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 323 </span><span class="caption-text">GLIDE text to image</span><a class="headerlink" href="#id1" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<figure class="align-default" id="id2">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/glide2.png"><img alt="GLIDE_1" class="bg-primary mb-1" src="../../_images/glide2.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 324 </span><span class="caption-text">GLIDE image impainting</span><a class="headerlink" href="#id2" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</section>
+<section id="background">
+<h2>2. Background<a class="headerlink" href="#background" title="Permalink to this heading">#</a></h2>
+<section id="diffusion-models">
+<h3>2.1 Diffusion Models<a class="headerlink" href="#diffusion-models" title="Permalink to this heading">#</a></h3>
+<ul class="simple">
+<li><p>DDPM</p></li>
+</ul>
+<p>DDPM에서는 임의의 time step t로 부터 noise가 껴있는 image <span class="math notranslate nohighlight">\(x_t\)</span>의 <span class="math notranslate nohighlight">\(\epsilon_t\)</span>가 얼만큼인지 예측한다. 예측한 <span class="math notranslate nohighlight">\(\epsilon_t\)</span>를 이용하여 noise가 일부 제거된 이전 step의 mean(<span class="math notranslate nohighlight">\(\mu_{\theta}(x_t)\)</span>)을 구할 수 있고 variance(<span class="math notranslate nohighlight">\(\sum_{\theta}(x_t)\)</span>)는 constant한 값으로 고정시킨다. DDPM에서 제시한 forward process와 reverse process는 다음과 같다.</p>
+<div class="math notranslate nohighlight">
+\[
+q(x_t|x_{t-1}) = \mathcal{N}(x_t; \sqrt{\alpha_{t}}x_{t-1}, (1-\alpha_t)\mathcal{I})
+\]</div>
+<div class="math notranslate nohighlight">
+\[ 
+p_{\theta}(x_{t-1}|x_t) := \mathcal{N}(\mu_{\theta}(x_t), \sum_{\theta}(x_t))
+\]</div>
+<ul class="simple">
+<li><p>Score-based generative modeling through stochastic differential equations</p></li>
+</ul>
+<p>해당 논문에서는 결국 score를 구하는 것과 epsilon을 구하는 것이 결국 같은 방향성을 띤다라고 주장한다.</p>
+<figure class="align-default" id="id3">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/glide5.png"><img alt="GLIDE_1" class="bg-primary mb-1" src="../../_images/glide5.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 325 </span><span class="caption-text">Proof of proportional relationship to finding score and epsilon</span><a class="headerlink" href="#id3" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p>Improved-DDPM</p></li>
+</ul>
+<p><span class="math notranslate nohighlight">\(\sum_{\theta}\)</span>를 constant값으로 고정시킨 이전 DDPM과 달리 해당 논문에서는 <span class="math notranslate nohighlight">\(\sum_{\theta}\)</span> learnable parameter로 설정하여 더 적은 diffusion step만으로 더 좋은 quality의 sample을 만들어낼 수 있다고 제시한다.</p>
+</section>
+<section id="guided-diffusion">
+<h3>2.2 Guided Diffusion<a class="headerlink" href="#guided-diffusion" title="Permalink to this heading">#</a></h3>
+<p>Diffusion model beat GANS on Image Synthesis(Dharwial et al.)에서는 diffusion model을 통해 class-conditional한 image생성을 제시한다. 이 논문에서의 가장 핵심적인 기술이 classifier-guidance이다. noise한 image로부터 epsilon을 예측하는 model은 그대로 유지하되, 해당 noise image가 어떤 class에 속하는지 분류하는 별도의 classifier를 설정한다. 이 classifier의 score를 통해 class-conditional한 전체 과정의 score에게 guide를 주는 방법을 제시한다.</p>
+<figure class="align-default" id="id4">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/glide6.png"><img alt="GLIDE_1" class="bg-primary mb-1" src="../../_images/glide6.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 326 </span><span class="caption-text">Classifier guidance</span><a class="headerlink" href="#id4" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<figure class="align-default" id="id5">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/glide10.png"><img alt="GLIDE_1" class="bg-primary mb-1" src="../../_images/glide10.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 327 </span><span class="caption-text">Classifier guidance</span><a class="headerlink" href="#id5" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</section>
+<section id="classifier-free-guidance">
+<h3>2.3 Classifier-free guidance<a class="headerlink" href="#classifier-free-guidance" title="Permalink to this heading">#</a></h3>
+<p>classifier를 통해 class-conditional한 image생성을 하는 방법이 위에 소개되었는데, 이 방법은 noise한 image에 대해서 classifiy를 해야하므로 pretrained model을 사용할 수 없고 모델 규모가 너무 heavy해지는 등 몇몇 문제점을 가지고 있었다. 이 방법에 대한 개선점을 Classifier-Free Diffusion Guidance(Ho et al.)에서 Classifer-free guidance라는 기법으로 제시한다. 위의 score 식에서 약간의 변형을 통해 classifier 없이 단일 model만으로 guidance를 줄 수 있는 방법을 제시한다.</p>
+<figure class="align-default" id="id6">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/glide7.png"><img alt="GLIDE_1" class="bg-primary mb-1" src="../../_images/glide7.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 328 </span><span class="caption-text">Classifier-free guidance</span><a class="headerlink" href="#id6" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</section>
+<section id="clip-guidance">
+<h3>2.4 CLIP guidance<a class="headerlink" href="#clip-guidance" title="Permalink to this heading">#</a></h3>
+<p>CLIP은 텍스트와 이미지 사이의 joint representation을 학습할 수 있는 모델이다. Image encoder f(x)와 Text encoder g(c)로 이루어져 있다. (x,c) 즉 이미지와 이미지 캡션 쌍으로 이루어진 대규모 데이터를 이용해 contrastive learning을 진행시킨 모델이다. 같은 의미를 가진 positive pair에 대해서는 f(x) · g(c)(유사도)가 커지도록 negative pair에 대해서는 f(x) · g(c)가 작아지도록 하는 것이다. CLIP guidance에서는 classifier guidance에서 classifier대신에 pretrained CLIP모델을 사용한다. 따라서 guidance를 주는 방식도 classifier대신 CLIP모델을 통해 구한 noise한 image x와 주어진 text간의 유사도를 이용한다.</p>
+<figure class="align-default" id="id7">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/glide8.png"><img alt="GLIDE_1" class="bg-primary mb-1" src="../../_images/glide8.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 329 </span><span class="caption-text">CLIP</span><a class="headerlink" href="#id7" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<figure class="align-default" id="id8">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/glide9.png"><img alt="GLIDE_1" class="bg-primary mb-1" src="../../_images/glide9.png" style="width: 600px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 330 </span><span class="caption-text">CLIP guidance</span><a class="headerlink" href="#id8" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</section>
+</section>
+<section id="training">
+<h2>3. Training<a class="headerlink" href="#training" title="Permalink to this heading">#</a></h2>
+<p>실험에서 3.5 billion parameter의 text-conditional diffusion model을 64x64 resolution을 위해 사용했고 또다른 1.5 billion parameter의 text-conditional upsampling diffusion model을 256x256으로 resolution을 증가시키는데 사용하였다고 한다. 또한, CLIP guidance를 위해 noised 64x64 ViT-L CLIP model을 사용했다고 한다.</p>
+<section id="text-conditional-diffusion-models">
+<h3>3.1 Text-Conditional Diffusion Models<a class="headerlink" href="#text-conditional-diffusion-models" title="Permalink to this heading">#</a></h3>
+<p>Improved DDPM의 ADM model을 base로 text-conditioning을 추가하여 학습을 진행하였다. 주어진 noised image <span class="math notranslate nohighlight">\(x_t\)</span>로부터 <span class="math notranslate nohighlight">\(x_{t-1}\)</span>을 예측하는 <span class="math notranslate nohighlight">\(p_{\theta}(x_{t-1}|x_t,c)\)</span>를 수행해야한다. text를 condition으로 주기 위해서 우선 주어진 text를 K개의 token으로 encoding한 후, Transformer model에 input값으로 넣어준다. Transformer output의 마지막 embedding token과 positional encoding을 통해 나온 time step embedding token을 연산하고자 하는 크기에 맞게 linear projection하여 더한 후, residual block을 거친 image와 AdaIN기법을 통해 residual block의 output을 도출한다. Transformer output의 마지막 layer는 연산하고자 하는 크기에 맞게 linear projection하여 residual block뒤에 붙는 attention block에 이용한다.</p>
+<p>학습 데이터셋은 DALL-E와 같은 데이터셋을 사용하였고 model architecture로는 기존  ADM model보다 더 scale up된 model과 1.2B paremeters를 갖는 Transformer를 사용했다고 한다. 게다가 64x64 image를 256x256 image로 upsampling하는 Upsampler model도 학습시켰다고 한다. upsampler model은 Improved DDPM에서의 ImageNet Upsampler와 거의 비슷하다고 한다.</p>
+</section>
+<section id="fine-tuning-for-classifier-free-guidance">
+<h3>3.2. Fine-tuning for classifier-free guidance<a class="headerlink" href="#fine-tuning-for-classifier-free-guidance" title="Permalink to this heading">#</a></h3>
+<p>처음 training을 진행했을때는, text를 condition으로 준 conditional image generation에 맞춰 training을 진행했다고 한다. 이 후, unconditional image generation의 성능을 위해 데이터셋의 약 20%의 text condition에 empty sequence를 주고 training을 진행했다고 한다.</p>
+</section>
+<section id="image-inpainting">
+<h3>3.3. Image Inpainting<a class="headerlink" href="#image-inpainting" title="Permalink to this heading">#</a></h3>
+<p>이전 연구에서는, impainting을 위해 diffusion model로 학습시키는 과정을 거치지 않았다. diffusion model로 sampling을 한 후, 알려진 영역에 대해서는 <span class="math notranslate nohighlight">\(q(x_t|x_0)\)</span>로 대체하는 방식을 사용했기에 model이 sampling을 하는 과정에서 전체 context를 참조할 수 없다는 단점이 있었다.</p>
+<p>이 논문에서는 fine-tuning과정에서 training example의 임의의 부분을 지운다음, 남은 부분은 모델에 추가적인 조건 정보로서 마스크 채널과 함께 입력되도록 설계하였다.</p>
+</section>
+<section id="noised-clip-models">
+<h3>3.4. Noised CLIP models<a class="headerlink" href="#noised-clip-models" title="Permalink to this heading">#</a></h3>
+<p>classifier guidance에 더 적합하게 훈련시키기 위해 clip guidance를 사용해서 classifier-free guidance와 비교했음을 위에서 언급했다. clip guidance를 사용하기 위해 저자들은 noise image에 대해 학습시킨 Noised CLIP models를 사용했음을 밝힌다. 위에서 언급했듯이 결과는 classifier-free guidance가 더 좋았다고 한다.</p>
+<figure class="align-default" id="id9">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/glide14.png"><img alt="GLIDE_1" class="bg-primary mb-1" src="../../_images/glide14.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 331 </span><span class="caption-text">comparison between CLIP guidance and classifier-free guidance</span><a class="headerlink" href="#id9" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</section>
+</section>
+<section id="results">
+<h2>4. Results<a class="headerlink" href="#results" title="Permalink to this heading">#</a></h2>
+<figure class="align-default" id="id10">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/glide12.png"><img alt="GLIDE_1" class="bg-primary mb-1" src="../../_images/glide12.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 332 </span><span class="caption-text">Quantitive Results</span><a class="headerlink" href="#id10" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>논문에서는 classifier-free guidance와 CLIP guidance에 대해 Precision과 Recall, FID와 IS, CLIP score와 FID 간의 명확한 trade-off 를 관찰하고 있다고 언급한다.</p>
+<p>위의 (a)와 (b)에 대해서는 classifier-free guidance가 거의 최적으로 수행되었으며, classifier-free guidance가 훨씬 강력한 방법임을 보여주고 있다. 반면, (c)에서는 CLIP guidance가 CLIP 점수를 classifier-free guidance에 비해 상당히 향상시킬 수 있는 것으로 보인다. 저자들은 CLIP guidance가 주로 CLIP 모델의 평가에 따라 이미지를 생성하는 데 중점을 둘 수 있지만, 특정 prompt 또는 caption과 일치시키는 데 뛰어나지 않을 수 있다는 가설을 제시한다. 이 가설을 확인하기 위해 저자들은 인간 평가자를 활용한 실험을 진행하였고 인간들이 CLIP 점수와 다른 의견을 가지며, classifier-free guida  nce가 해당 prompt와 더 일치하는 더 높은 품질의 샘플을 생성한다고 판단했다.</p>
+<figure class="align-default" id="id11">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/glide13.png"><img alt="GLIDE_1" class="bg-primary mb-1" src="../../_images/glide13.png" style="width: 500px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 333 </span><span class="caption-text">Zero-shot FID results</span><a class="headerlink" href="#id11" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>Table1은 Unguided, CLIP guidance, Classifier-free guidance 기법을 각각 적용한 256x256 resolution image에 대해 human evaluation을 진행한 결과이다. Classifier-free guidance를 적용한 결과가 photorealism, caption 항목에 대해 압도적인 결과를 보임을 알 수 있다.</p>
+<p>Table2는 GLIDE와 다른 text-conditional image generation model들을 비교한 표이다. MS-COCO dataset에 대해 생성된 image의 FID score를 구하였다. GLIDE model이 MS-COCO에 대해 학습한 경험이 없음에도 불구하고 Zero-shot FID 부분을 보면 상당히 좋은 result를 보임을 알 수 있다.</p>
+<figure class="align-default" id="id12">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/glide15.png"><img alt="GLIDE_1" class="bg-primary mb-1" src="../../_images/glide15.png" style="width: 500px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 334 </span><span class="caption-text">final results</span><a class="headerlink" href="#id12" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</section>
+</section>
+
+    <script type="text/x-thebe-config">
+    {
+        requestKernel: true,
+        binderOptions: {
+            repo: "binder-examples/jupyter-stacks-datascience",
+            ref: "master",
+        },
+        codeMirrorConfig: {
+            theme: "abcdef",
+            mode: "python"
+        },
+        kernelOptions: {
+            name: "python3",
+            path: "./docs\review"
+        },
+        predefinedOutput: true
+    }
+    </script>
+    <script>kernelName = 'python3'</script>
+
+                </article>
+              
+
+              
+              
+              
+              
+                <footer class="prev-next-footer d-print-none">
+                  
+<div class="prev-next-area">
+    <a class="left-prev"
+       href="Synthetic_Data_from_Diffusion_Models_Improves_ImageNet_Classification.html"
+       title="previous page">
+      <i class="fa-solid fa-angle-left"></i>
+      <div class="prev-next-info">
+        <p class="prev-next-subtitle">previous</p>
+        <p class="prev-next-title">Synthetic Data from Diffusion Models Improves ImageNet Classification</p>
+      </div>
+    </a>
+    <a class="right-next"
+       href="BBDM.html"
+       title="next page">
+      <div class="prev-next-info">
+        <p class="prev-next-subtitle">next</p>
+        <p class="prev-next-title">BBDM</p>
+      </div>
+      <i class="fa-solid fa-angle-right"></i>
+    </a>
+</div>
+                </footer>
+              
+            </div>
+            
+            
+              
+                <div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
+
+
+  <div class="sidebar-secondary-item">
+  <div class="page-toc tocsection onthispage">
+    <i class="fa-solid fa-list"></i> Contents
+  </div>
+  <nav class="bd-toc-nav page-toc">
+    <ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#abstract">Abstract</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">1. Introduction</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#background">2. Background</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#diffusion-models">2.1 Diffusion Models</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#guided-diffusion">2.2 Guided Diffusion</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#classifier-free-guidance">2.3 Classifier-free guidance</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#clip-guidance">2.4 CLIP guidance</a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#training">3. Training</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#text-conditional-diffusion-models">3.1 Text-Conditional Diffusion Models</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#fine-tuning-for-classifier-free-guidance">3.2. Fine-tuning for classifier-free guidance</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#image-inpainting">3.3. Image Inpainting</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#noised-clip-models">3.4. Noised CLIP models</a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#results">4. Results</a></li>
+</ul>
+  </nav></div>
+
+</div></div>
+              
+            
+          </div>
+          <footer class="bd-footer-content">
+            
+<div class="bd-footer-content__inner container">
+  
+  <div class="footer-item">
+    
+<p class="component-author">
+By PseudoLab
+</p>
+
+  </div>
+  
+  <div class="footer-item">
+    
+
+  <p class="copyright">
+    
+      © Copyright 2022.
+      <br/>
+    
+  </p>
+
+  </div>
+  
+  <div class="footer-item">
+    
+  </div>
+  
+  <div class="footer-item">
+    
+  </div>
+  
+</div>
+          </footer>
+        
+
+      </main>
+    </div>
+  </div>
+  
+  <!-- Scripts loaded after <body> so the DOM is not blocked -->
+  <script src="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b"></script>
+<script src="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b"></script>
+
+  <footer class="bd-footer">
+  </footer>
+  </body>
 </html>
\ No newline at end of file
diff --git a/docs/review/HyperDreamBooth.html b/docs/review/HyperDreamBooth.html
old mode 100644
new mode 100755
index a196a63c..ec747510
--- a/docs/review/HyperDreamBooth.html
+++ b/docs/review/HyperDreamBooth.html
@@ -1,842 +1,862 @@
-
-
-<!DOCTYPE html>
-
-
-<html lang="en" data-content_root="" >
-
-  <head>
-    <meta charset="utf-8" />
-    <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.18.1: http://docutils.sourceforge.net/" />
-
-    <title>HyperDreamBooth &#8212; Text-to-Image Generation-feat-Diffusion</title>
-  
-  
-  
-  <script data-cfasync="false">
-    document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
-    document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
-  </script>
-  
-  <!-- Loaded before other Sphinx assets -->
-  <link href="../../_static/styles/theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-<link href="../../_static/styles/bootstrap.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-<link href="../../_static/styles/pydata-sphinx-theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-
-  
-  <link href="../../_static/vendor/fontawesome/6.5.2/css/all.min.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-  <link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.woff2" />
-<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.woff2" />
-<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.woff2" />
-
-    <link rel="stylesheet" type="text/css" href="../../_static/pygments.css" />
-    <link rel="stylesheet" href="../../_static/styles/sphinx-book-theme.css?digest=14f4ca6b54d191a8c7657f6c759bf11a5fb86285" type="text/css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/togglebutton.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/mystnb.4510f1fc1dee50b3e5859aac5469c37c29e427902b24a333a5f9fcb2f0b3ac41.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/sphinx-thebe.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/design-style.4045f2051d55cab465a707391d5b2007.min.css" />
-  
-  <!-- Pre-loaded scripts that we'll load fully later -->
-  <link rel="preload" as="script" href="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b" />
-<link rel="preload" as="script" href="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b" />
-  <script src="../../_static/vendor/fontawesome/6.5.2/js/all.min.js?digest=dfe6caa3a7d634c4db9b"></script>
-
-    <script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
-    <script src="../../_static/jquery.js"></script>
-    <script src="../../_static/underscore.js"></script>
-    <script src="../../_static/_sphinx_javascript_frameworks_compat.js"></script>
-    <script src="../../_static/doctools.js"></script>
-    <script src="../../_static/clipboard.min.js"></script>
-    <script src="../../_static/copybutton.js"></script>
-    <script src="../../_static/scripts/sphinx-book-theme.js?digest=5a5c038af52cf7bc1a1ec88eea08e6366ee68824"></script>
-    <script>let toggleHintShow = 'Click to show';</script>
-    <script>let toggleHintHide = 'Click to hide';</script>
-    <script>let toggleOpenOnPrint = 'true';</script>
-    <script src="../../_static/togglebutton.js"></script>
-    <script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
-    <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
-    <script src="../../_static/design-tabs.js"></script>
-    <script>const THEBE_JS_URL = "https://unpkg.com/thebe@0.8.2/lib/index.js"
-const thebe_selector = ".thebe,.cell"
-const thebe_selector_input = "pre"
-const thebe_selector_output = ".output, .cell_output"
-</script>
-    <script async="async" src="../../_static/sphinx-thebe.js"></script>
-    <script>window.MathJax = {"options": {"processHtmlClass": "tex2jax_process|mathjax_process|math|output_area"}}</script>
-    <script defer="defer" src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
-    <script>DOCUMENTATION_OPTIONS.pagename = 'docs/review/HyperDreamBooth';</script>
-    <link rel="shortcut icon" href="../../_static/PseudoLab_logo.png"/>
-    <link rel="index" title="Index" href="../../genindex.html" />
-    <link rel="search" title="Search" href="../../search.html" />
-    <link rel="next" title="CM3leon" href="CM3leon.html" />
-    <link rel="prev" title="IP-Adapter" href="IP_Adapter.html" />
-  <meta name="viewport" content="width=device-width, initial-scale=1"/>
-  <meta name="docsearch:language" content="en"/>
-  </head>
-  
-  
-  <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
-
-  
-  
-  <div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
-  
-  <div id="pst-scroll-pixel-helper"></div>
-  
-  <button type="button" class="btn rounded-pill" id="pst-back-to-top">
-    <i class="fa-solid fa-arrow-up"></i>Back to top</button>
-
-  
-  <input type="checkbox"
-          class="sidebar-toggle"
-          id="pst-primary-sidebar-checkbox"/>
-  <label class="overlay overlay-primary" for="pst-primary-sidebar-checkbox"></label>
-  
-  <input type="checkbox"
-          class="sidebar-toggle"
-          id="pst-secondary-sidebar-checkbox"/>
-  <label class="overlay overlay-secondary" for="pst-secondary-sidebar-checkbox"></label>
-  
-  <div class="search-button__wrapper">
-    <div class="search-button__overlay"></div>
-    <div class="search-button__search-container">
-<form class="bd-search d-flex align-items-center"
-      action="../../search.html"
-      method="get">
-  <i class="fa-solid fa-magnifying-glass"></i>
-  <input type="search"
-         class="form-control"
-         name="q"
-         id="search-input"
-         placeholder="Search this book..."
-         aria-label="Search this book..."
-         autocomplete="off"
-         autocorrect="off"
-         autocapitalize="off"
-         spellcheck="false"/>
-  <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
-</form></div>
-  </div>
-
-  <div class="pst-async-banner-revealer d-none">
-  <aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
-</div>
-
-  
-    <header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
-    </header>
-  
-
-  <div class="bd-container">
-    <div class="bd-container__inner bd-page-width">
-      
-      
-      
-      <div class="bd-sidebar-primary bd-sidebar">
-        
-
-  
-  <div class="sidebar-header-items sidebar-primary__section">
-    
-    
-    
-    
-  </div>
-  
-    <div class="sidebar-primary-items__start sidebar-primary__section">
-        <div class="sidebar-primary-item">
-
-  
-    
-  
-
-<a class="navbar-brand logo" href="../../intro.html">
-  
-  
-  
-  
-  
-    
-    
-      
-    
-    
-    <img src="../../_static/PseudoLab_logo.png" class="logo__image only-light" alt="Text-to-Image Generation-feat-Diffusion - Home"/>
-    <script>document.write(`<img src="../../_static/PseudoLab_logo.png" class="logo__image only-dark" alt="Text-to-Image Generation-feat-Diffusion - Home"/>`);</script>
-  
-  
-</a></div>
-        <div class="sidebar-primary-item"><nav class="bd-links" id="bd-docs-nav" aria-label="Main">
-    <div class="bd-toc-item navbar-nav active">
-        
-        <ul class="nav bd-sidenav bd-sidenav__home-link">
-            <li class="toctree-l1">
-                <a class="reference internal" href="../../intro.html">
-                    Welcome to PseudoDiffusers!!
-                </a>
-            </li>
-        </ul>
-        <p aria-level="2" class="caption" role="heading"><span class="caption-text">Preliminary Works</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="vae.html">VAE</a></li>
-<li class="toctree-l1"><a class="reference internal" href="gan.html">GAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DDPM.html">DDPM</a></li>
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="DDIM.html">DDIM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="A_Study_on_the_Evaluation_of_Generative_Models.html">A Study on the Evaluation of Generative Models</a></li>
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Image Generation</span></p>
-<ul class="current nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="cycleGAN.html">CycleGAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="StyleGAN.html">StyleGAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="diffusion_beats_GANs.html">Diffusion Models Beat GANs on Image Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="dalle.html">DALL-E</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DALLE2.html">DALL-E 2</a></li>
-<li class="toctree-l1"><a class="reference internal" href="dreambooth.html">DreamBooth</a></li>
-<li class="toctree-l1"><a class="reference internal" href="ControlNet.html">ControlNet</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Latent_Diffusion_Model.html">Introduction</a></li>
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="Textual_Inversion.html">Textual Inversion</a></li>
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="CustomDiffusion.html">Custom Diffusion</a></li>
-<li class="toctree-l1"><a class="reference internal" href="LoRA.html">LoRA</a></li>
-
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="I-DDPM.html">I-DDPM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="StyO.html">StyO</a></li>
-<li class="toctree-l1"><a class="reference internal" href="imagen.html">Imagen</a></li>
-<li class="toctree-l1"><a class="reference internal" href="imagen_editor.html">Imagen Editor</a></li>
-<li class="toctree-l1"><a class="reference internal" href="SDEdit.html">SDEdit</a></li>
-<li class="toctree-l1"><a class="reference internal" href="SDXL.html">SDXL</a></li>
-<li class="toctree-l1"><a class="reference internal" href="t2i_adapter.html">T2I-Adapter</a></li>
-<li class="toctree-l1"><a class="reference internal" href="IP_Adapter.html">IP-Adapter</a></li>
-
-
-
-
-
-<li class="toctree-l1 current active"><a class="current reference internal" href="#">HyperDreamBooth</a></li>
-<li class="toctree-l1"><a class="reference internal" href="CM3leon.html">CM3leon</a></li>
-
-<li class="toctree-l1"><a class="reference internal" href="Synthetic_Data_from_Diffusion_Models_Improves_ImageNet_Classification.html">Synthetic Data from Diffusion Models Improves ImageNet Classification</a></li>
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="GLIDE.html">GLIDE</a></li>
-<li class="toctree-l1"><a class="reference internal" href="BBDM.html">BBDM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Your_Diffusion_Model_is_Secretly_a_Zero_Shot_Classifier.html">Your Diffusion Model is Secretly a Zero-Shot Classifier</a></li>
-<li class="toctree-l1"><a class="reference internal" href="progressive_distillation.html">Progressive Distillation for Fast Sampling of Diffusion Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="ConceptLab.html">ConceptLab</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Diffusion_models_already_have_a_Semantic_Latent_Space.html">Diffusion Models already have a Semantic Latent Space</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Muse.html">Muse</a></li>
-
-
-<li class="toctree-l1"><a class="reference internal" href="GIGAGAN.html">Scaling up GANs for Text-to-Image Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="consistency_models.html">Consistency Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="latent_consistency_models.html">Latent Consistency Models</a></li>
-
-<li class="toctree-l1"><a class="reference internal" href="LLM_grounded_Diffusion.html">LLM Grounded Diffusion</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DiT.html">DiT</a></li>
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="one-step-image-translation.html">One-Step Image Translation with Text-to-Image Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="LCM-LoRA.html">LCM-LoRA: A Universal Stable-Diffusion Acceleration Module</a></li>
-
-
-<li class="toctree-l1"><a class="reference internal" href="MimicBrush.html">MimicBrush: Zero-shot Image Editing with Reference Imitation</a></li>
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Video Generation</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="Make_A_Video.html">Make A Video</a></li>
-<li class="toctree-l1"><a class="reference internal" href="VideoLDM.html">VideoLDM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="AnimateDiff.html">AnimateDiff</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Animate_Anyone.html">Animate Anyone</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreaMoving.html">DreaMoving</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreamPose.html">DreamPose: Fashion Image-to-Video Synthesis via Stable Diffusion</a></li>
-
-
-
-
-
-
-
-
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">3D Generation</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="NeRF.html">NeRF : Representing Scenes as Neural Radiance Fields for View Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="3DGS.html">3D Gaussian Splatting for Real-Time Radiance Field Rendering</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Point_E.html">Point-E: A System for Generating 3D Point Clouds from Complex Prompts (Arxiv 2022)</a></li>
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="Shap-E.html">Shap-E</a></li>
-
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="DreamFusion.html"><strong>DreamFusion</strong></a></li>
-<li class="toctree-l1"><a class="reference internal" href="magic-3d.html">Magic3D</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreamBooth3D.html">Dream Booth 3D</a></li>
-
-
-
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Experiments</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="../experiments/js_exp.html">Synthetic Data with Stable Diffusion for Foliar Disease Classification</a></li>
-<li class="toctree-l1"><a class="reference internal" href="../experiments/swjo_exp.html">Training DreamBooth on Naver Webtoon Face Dataset</a></li>
-</ul>
-
-    </div>
-</nav></div>
-    </div>
-  
-  
-  <div class="sidebar-primary-items__end sidebar-primary__section">
-  </div>
-  
-  <div id="rtd-footer-container"></div>
-
-
-      </div>
-      
-      <main id="main-content" class="bd-main" role="main">
-        
-        
-
-<div class="sbt-scroll-pixel-helper"></div>
-
-          <div class="bd-content">
-            <div class="bd-article-container">
-              
-              <div class="bd-header-article d-print-none">
-<div class="header-article-items header-article__inner">
-  
-    <div class="header-article-items__start">
-      
-        <div class="header-article-item"><label class="sidebar-toggle primary-toggle btn btn-sm" for="__primary" title="Toggle primary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
-  <span class="fa-solid fa-bars"></span>
-</label></div>
-      
-    </div>
-  
-  
-    <div class="header-article-items__end">
-      
-        <div class="header-article-item">
-
-<div class="article-header-buttons">
-
-
-
-
-
-<div class="dropdown dropdown-source-buttons">
-  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Source repositories">
-    <i class="fab fa-github"></i>
-  </button>
-  <ul class="dropdown-menu">
-      
-      
-      
-      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion" target="_blank"
-   class="btn btn-sm btn-source-repository-button dropdown-item"
-   title="Source repository"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fab fa-github"></i>
-  </span>
-<span class="btn__text-container">Repository</span>
-</a>
-</li>
-      
-      
-      
-      
-      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion/issues/new?title=Issue%20on%20page%20%2Fdocs/review/HyperDreamBooth.html&body=Your%20issue%20content%20here." target="_blank"
-   class="btn btn-sm btn-source-issues-button dropdown-item"
-   title="Open an issue"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-lightbulb"></i>
-  </span>
-<span class="btn__text-container">Open issue</span>
-</a>
-</li>
-      
-  </ul>
-</div>
-
-
-
-
-
-
-<div class="dropdown dropdown-download-buttons">
-  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Download this page">
-    <i class="fas fa-download"></i>
-  </button>
-  <ul class="dropdown-menu">
-      
-      
-      
-      <li><a href="../../_sources/docs/review/HyperDreamBooth.md" target="_blank"
-   class="btn btn-sm btn-download-source-button dropdown-item"
-   title="Download source file"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-file"></i>
-  </span>
-<span class="btn__text-container">.md</span>
-</a>
-</li>
-      
-      
-      
-      
-      <li>
-<button onclick="window.print()"
-  class="btn btn-sm btn-download-pdf-button dropdown-item"
-  title="Print to PDF"
-  data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-file-pdf"></i>
-  </span>
-<span class="btn__text-container">.pdf</span>
-</button>
-</li>
-      
-  </ul>
-</div>
-
-
-
-
-<button onclick="toggleFullScreen()"
-  class="btn btn-sm btn-fullscreen-button"
-  title="Fullscreen mode"
-  data-bs-placement="bottom" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-expand"></i>
-  </span>
-
-</button>
-
-
-
-<script>
-document.write(`
-  <button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light"></i>
-    <i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark"></i>
-    <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"></i>
-  </button>
-`);
-</script>
-
-
-<script>
-document.write(`
-  <button class="btn btn-sm pst-navbar-icon search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <i class="fa-solid fa-magnifying-glass fa-lg"></i>
-  </button>
-`);
-</script>
-<label class="sidebar-toggle secondary-toggle btn btn-sm" for="__secondary"title="Toggle secondary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <span class="fa-solid fa-list"></span>
-</label>
-</div></div>
-      
-    </div>
-  
-</div>
-</div>
-              
-              
-
-<div id="jb-print-docs-body" class="onlyprint">
-    <h1>HyperDreamBooth</h1>
-    <!-- Table of contents -->
-    <div id="print-main-content">
-        <div id="jb-print-toc">
-            
-            <div>
-                <h2> Contents </h2>
-            </div>
-            <nav aria-label="Page">
-                <ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">Introduction</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#contribution">Contribution</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#related-work">Related Work</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#prelimiaries">Prelimiaries</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#method">Method</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#lightweight-dreambooth-lidb">Lightweight DreamBooth (LiDB)</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#hypernetwork">HyperNetwork</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#rank-relaxed-fast-finetuning">Rank-Relaxed Fast Finetuning</a></li>
-</ul>
-</li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#experiments">Experiments</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#comparisons">Comparisons</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#follow-ups">Follow-ups</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#conclusion">Conclusion</a></li>
-</ul>
-            </nav>
-        </div>
-    </div>
-</div>
-
-              
-                
-<div id="searchbox"></div>
-                <article class="bd-article">
-                  
-  <div class="admonition-information admonition">
-<p class="admonition-title">Information</p>
-<ul class="simple">
-<li><p><strong>Title:</strong> HyperDreamBooth: HyperNetworks for Fast Personalization of Text-to-Image Models</p></li>
-<li><p><strong>Reference</strong></p>
-<ul>
-<li><p>Paper: <a class="reference external" href="https://arxiv.org/pdf/2307.06949.pdf">https://arxiv.org/pdf/2307.06949.pdf</a></p></li>
-</ul>
-</li>
-<li><p><strong>Author:</strong> Hyoungseo Cho</p></li>
-<li><p><strong>Last updated on Oct. 10, 2023</strong></p></li>
-</ul>
-</div>
-<section class="tex2jax_ignore mathjax_ignore" id="hyperdreambooth">
-<h1>HyperDreamBooth<a class="headerlink" href="#hyperdreambooth" title="Permalink to this heading">#</a></h1>
-<section id="introduction">
-<h2>Introduction<a class="headerlink" href="#introduction" title="Permalink to this heading">#</a></h2>
-<p>Personalization 는 Generative AI 분야에서 떠오르고 있는 주제입니다. 이는 high-fidelity와 identity를 유지한 상태로 다양한 맥락과 스타일을 생성할 수 있도록 합니다. 본 논문은 <a class="reference external" href="https://pseudo-lab.github.io/text-to-image-generation-feat-diffusion/docs/review/dreambooth.html">Dreambooth</a> 를 기반으로 진행되었기 때문에 <a class="reference external" href="https://pseudo-lab.github.io/text-to-image-generation-feat-diffusion/docs/review/dreambooth.html">Dreambooth</a> 논문을 먼저 읽어 보시기를 추천드립니다.</p>
-<figure class="align-default" id="id1">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/hyperdreambooth_01.png"><img alt="hyperdreambooth_01" class="bg-primary mb-1" src="../../_images/hyperdreambooth_01.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 294 </span><span class="caption-text">HyperDreamBooth</span><a class="headerlink" href="#id1" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</section>
-<section id="contribution">
-<h2>Contribution<a class="headerlink" href="#contribution" title="Permalink to this heading">#</a></h2>
-<p>본 논문의 Contribution은 크게 3가지로 볼 수 있습니다. Lighweight DreamBooth (LiDB), New HyperNetwork architecture 그리고 rank-relaxed finetuning 입니다. 위 3가지 방법을 활용하여 기존 DreamBooth의 핵심 능력을 유지하면서 크기를 줄이고 속도를 높일 수 있었습니다.</p>
-</section>
-<section id="related-work">
-<h2>Related Work<a class="headerlink" href="#related-work" title="Permalink to this heading">#</a></h2>
-<p><strong>Text-to-image Models</strong> <br/>
-본 논문에서는 Stable Diffusion 모델을 활용하여 HyperDreamBooth를 구현했지만, 이 부분은 다른 텍스트-이미지 모델 (Imagen, DALL-E2 등) 도 적용이 가능합니다.</p>
-<p><strong>Personalization of Generative Models</strong> <br/>
-Generative Adversarial Network 기반의 기술들은 fidelity가 떨어지거나 다양한 문맥을 제공하지 못하는 문제가 있습니다. 이에 따라 HyperNetwork를 도입한 연구를 진행했습니다.</p>
-<p><strong>T2I Personalization via Finetuning</strong> <br/>
-다음으로, text-to-image personalization을 위한 Finetuning에 대한 연구가 있습니다. CustomDiffusion, SVDiff, LoRA, StyleDrop, DreamArtist 등의 예시가 있습니다. 하지만 이는 속도 측면에서 느리다는 단점을 가지고 있습니다.</p>
-<p>이러한 관련 연구들을 볼 때, HyperDreamBooth는 속도와 효율성 측면에서 큰 발전을 이루었다고 볼 수 있습니다.</p>
-<figure class="align-default" id="id2">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/hyperdreambooth_02.png"><img alt="hyperdreambooth_01" class="bg-primary mb-1" src="../../_images/hyperdreambooth_02.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 295 </span><span class="caption-text">HyperDreamBooth Training and Fast Fine-Tuning</span><a class="headerlink" href="#id2" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</section>
-<section id="prelimiaries">
-<h2>Prelimiaries<a class="headerlink" href="#prelimiaries" title="Permalink to this heading">#</a></h2>
-<p><strong>Latent Diffusion Models (LDM)</strong> <br/>
-본 논문에서는 Stable Diffusion 모델을 활용하여 HyperDreamBooth를 구현했지만, 이 부분은 다른 텍스트-이미지 모델 (Imagen, DALL-E2 등) 도 적용이 가능합니다.</p>
-<p><strong>DreamBooth</strong> <br/>
-이전에 나온 DreamBooth는 특정 주제의 이미지를 생성하기 위해 T2I denoising 네트워크를 finetuning하는 전략을 활용했습니다. 이 방법은 HyperDreamBooth의 영감원 중 하나로 활용되었습니다.</p>
-<p><strong>Low Rank Adaptation (LoRA)</strong> <br/>
-LoRA는 모델의 가중치를 낮은 랭크의 행렬로 근사화하여 모델의 크기와 복잡성을 줄이는 방법입니다. 본 논문에서는 이 LoRA 기술을 활용하여 더 빠르고 효율적인 personalization이 가능하도록 합니다.</p>
-</section>
-<section id="method">
-<h2>Method<a class="headerlink" href="#method" title="Permalink to this heading">#</a></h2>
-<p>위에서 살펴 본 Contribution의 내용을 자세히 살펴보도록 하겠습니다.</p>
-<section id="lightweight-dreambooth-lidb">
-<h3>Lightweight DreamBooth (LiDB)<a class="headerlink" href="#lightweight-dreambooth-lidb" title="Permalink to this heading">#</a></h3>
-<p>HyperdreamBooth 의 핵심 기술 중 하나인 Lightweight DreamBooth, 줄여서 LiDB에 대해 설명드리겠습니다. LiDB는 rank-1 LoRA residuals의 가중치 공간을 더 세분화하는 것이 핵심 아이디어입니다. 분해 과정에서 rank-1 LoRA weight-space 내에서 random orthogonal basis를 활용하여 decompose 합니다.</p>
-<figure class="align-default" id="id3">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/hyperdreambooth_03.png"><img alt="hyperdreambooth_01" class="bg-primary mb-1" src="../../_images/hyperdreambooth_03.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 296 </span><span class="caption-text">Lightweight DreamBooth</span><a class="headerlink" href="#id3" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>이 접근 방식은 LoRA의 A와 B 행렬을 각각 두 개의 행렬로 분해하는 것으로도 이해할 수 있습니다. 더 구체적으로 살펴보면, A 행렬은 <span class="math notranslate nohighlight">\(A_{aux}\)</span> 와 <span class="math notranslate nohighlight">\(A_{train}\)</span> 으로 분해되며, B 행렬은 <span class="math notranslate nohighlight">\(B_{aux}\)</span> 와 <span class="math notranslate nohighlight">\(B_{train}\)</span> 으로 분해할 수 있습니다. 여기서 <span class="math notranslate nohighlight">\(aux\)</span> 레이어는 행별로 직교하는 벡터로 무작위 초기화되고 freeze 되어 있으며, <span class="math notranslate nohighlight">\(train\)</span> 레이어는 학습되는 가중치입니다. 따라서 LiDB 선형 레이어의 weight-residual은 다음과 같이 표현할 수 있습니다.</p>
-<div class="math notranslate nohighlight">
-\[
-\Delta W_x = A_{aux} A_{train} B_{train} B_{aux}
-\]</div>
-<p>여기서 <span class="math notranslate nohighlight">\(aux\)</span> 레이어는 experimentally fix 되었으며 이 과정을 통해 trainable parameter 개수는 약 30K개, 사이즈는 약 120KB로 경량화 할 수 있습니다. 이렇게 작은 크기와 변수만으로 fidelity, editability, style 그리고 diversity 등을 유지할 수 있다는 것이 포인트입니다.</p>
-</section>
-<section id="hypernetwork">
-<h3>HyperNetwork<a class="headerlink" href="#hypernetwork" title="Permalink to this heading">#</a></h3>
-<figure class="align-default" id="id4">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/hyperdreambooth_04.png"><img alt="hyperdreambooth_01" class="bg-primary mb-1" src="../../_images/hyperdreambooth_04.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 297 </span><span class="caption-text">HyperNetwork Architecture</span><a class="headerlink" href="#id4" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>다음은 Hypernetwork 입니다. 본 논문에서는 사전에 훈련된 T2I 모델을 빠르게 personalization 하기 위해 HyperNetwork를 제안합니다. 여기서 <span class="math notranslate nohighlight">\(\tilde{\theta}\)</span> 는 모든 LiDB residual 행렬을 나타내며, 각 T2I 모델의 cross-attention 및 self-attention 레이어에 대한 <span class="math notranslate nohighlight">\(A_{train}\)</span> 및 <span class="math notranslate nohighlight">\(B_{train}\)</span> 입니다. 이 핵심 아이디어는 주어진 이미지 x를 입력으로 받고, 이 이미지를 사용하여 LiDB의 low-rank residual인 <span class="math notranslate nohighlight">\(\hat{\theta}\)</span> 을 예측하는 HyperNetwork <span class="math notranslate nohighlight">\(H_{\eta}\)</span> 를 돌입하는 것입니다.HyperNetwork는 도메인 특화 이미지 데이터셋에서 훈련되며, 일반적인 확산 노이즈 제거 손실과 가중치 공간 손실을 가지고 있습니다.</p>
-<div class="math notranslate nohighlight">
-\[
-L(x) = \alpha \left\| D_\hat{\theta}  (x + {\epsilon} , c) - x \right\|_{2}^{2} + \beta \left\|\hat{\theta}  -  {\theta} \right\|_{2}^{2}
-\]</div>
-<p>여기서 <span class="math notranslate nohighlight">\(x\)</span> 는 reference image를 의미합니다. HyperDreamBooth의 목표는 주어진 참조 이미지 x를 기반으로 해당 이미지와 유사한 새로운 이미지를 생성하는 것입니다. <span class="math notranslate nohighlight">\(\theta\)</span> 는 <span class="math notranslate nohighlight">\(x\)</span> 에 대한 pre-optimized 된 가중치 paramters입니다. 이러한 가중치는 HyperDreamBooth 모델을 personalization 하기 위해 이미지 <span class="math notranslate nohighlight">\(x\)</span> 와 관련된 텍스트와 함께 조정됩니다. <span class="math notranslate nohighlight">\(D_{\theta}\)</span> 는 diffusion model을 나타냅니다. 이 모델은 이미지 <span class="math notranslate nohighlight">\(x + \epsilon\)</span> 및 Supervisory Text Prompt <span class="math notranslate nohighlight">\(c\)</span> 로 조건이 설정된 상태에서 사용됩니다. 이 모델은 이미지 생성 및 개인화에 사용됩니다. <span class="math notranslate nohighlight">\(\alpha\)</span> 와 <span class="math notranslate nohighlight">\(\beta\)</span> 는 상대적인 loss의 가중치를 제어하기 위한 hyperparameters 입니다. 이러한 hyperparameters 는 각 loss 항목의 중요성을 조절하는 데 사용됩니다.</p>
-<p><strong>Supervisory Text Prompt</strong> <br/>
-Supervisory Text Prompt는 이미지 생성을 지원하기 위한 텍스트 입력입니다. 주어진 텍스트 프롬프트는 이미지 생성에 대한 지시사항 또는 조건을 제공합니다. HyperDreamBooth에서는 “a [V] face” 와 같은 텍스트 프롬프트를 사용하여 개인화된 이미지를 생성합니다. [V] 는 드물지만 다양한 의미 수정을 삽입할 수 있는 역할을 합니다.</p>
-<p><strong>HyperNetwork Architecture</strong> <br/>
-HyperNetwork는 HyperDreamBooth에서 사용되는 모델로, 개인화된 이미지 생성을 위한 가중치를 예측하는 역할을 합니다. HyperNetwork는 보통 다른 신경망 구조로 구성되며, 주어진 이미지를 입력으로 받아서 T2I 모델의 가중치를 예측합니다. 이러한 개인화된 이미지 생성을 위한 핵심 구성 요소 중 하나입니다. 여기서 예측한 가중치를 이후 Stable Diffusion 모델의 가중치에 더하여 개인화를 실행합니다.</p>
-<p><strong>Iterative Prediction</strong> <br/>
-HyperDreamBooth에서 사용되는 HyperNetwork는 반복적 예측을 수행합니다. 이것은 HyperNetwork가 초기 예측을 한 후에도 추가 반복적인 예측 단계를 통해 결과를 개선하려고 시도하는 것을 의미합니다. 초기 HyperNetwork 예측은 방향성이 올바르고 대상과 얼굴과 유사한 semantic 특성을 생성하지만 미세만 세부 정보를 충분히 잡아내지 못할 수 있습니다. 따라서 반복적인 예측을 통해 초기 예측을 fine-tuning하고 더 나은 이미지를 생성합니다. 이 때에 image encoding은 단 한 번만 수행되며, 추출된 특징 f는 반복적인 예측 과정에서 사용됩니다.</p>
-<figure class="align-default" id="id5">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/hyperdreambooth_05.png"><img alt="hyperdreambooth_01" class="bg-primary mb-1" src="../../_images/hyperdreambooth_05.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 298 </span><span class="caption-text">HyperNetwork + Fast Finetuning</span><a class="headerlink" href="#id5" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</section>
-<section id="rank-relaxed-fast-finetuning">
-<h3>Rank-Relaxed Fast Finetuning<a class="headerlink" href="#rank-relaxed-fast-finetuning" title="Permalink to this heading">#</a></h3>
-<p>초기 HyperNetwork를 실행하고 나면 semantic 속성과 방향성에 대해서 올바르게 생성이 되지만 세부적인 detail은 잘 잡아내지 못합니다. 이를 위해 마지막으로 fast finetuning 단계를 제안합니다. 이 단계를 통해, DreamBooth보다 훨씬 빠르지만 강한 subject fidelity, editability 그리고 style diversity를 동일하게 유지할 수 있습니다.
-먼저 HyperNetwork를 사용하여 개인화된 diffusion model 가중치를 예측합니다. 이후 diffusion model의 가중치를 초기화된 이미지 x와 함께 주어진 텍스트 지시어 c에 대한 diffusion noise loss <span class="math notranslate nohighlight">\(L(x)\)</span> 를 최소화하도록 조정합니다. 여기서 주요한 점은 <em><strong>rank-relaxed</strong></em> 의 개념입니다. 이것은 초기 모델의 rank(주로 1)를 완화하여 더 높은 rank로 LoRA 모델을 fine tuning 하는 것을 의미합니다. 구체적으로, HyperNetwork의 예측된 가중치 모델의 전체 가중치에 추가하고 더 높은 rank로 LoRA fine tuning을 수행합니다. 이를 통해 모델은 주체의 고주파수 세부 사항을 더 잘 근사화할 수 있으며 이로 인해 다른 낮은 rank로 제한된 업데이트보다 더 높은 주제 충실도를 달성할 수 있습니다. 이러한 rank-relaxed의 개념은 HyperDreamBooth를 다른 방식보다 더 우수하게 만드는 요인입니다. 여기서도 동일한 Supervisory Text Prompt “a [V] face” 를 사용하는데 이 프롬프트는 이미지 개인화를 지원하며 모델이 얼굴에 관련된 다양한 특성과 스타일을 캡처하는 데 도움이 됩니다. 그리고 HyperNetwork의 초기화된 가중치를 고려할 때, fast finetuning 단계를 40번의 반복으로 완료할 수 있습니다. 이는 DreamBooth 및 LoRA DreamBooth와 비교했을 때 25배 빠른 속도라는 것을 의미합니다.</p>
-</section>
-</section>
-<section id="experiments">
-<h2>Experiments<a class="headerlink" href="#experiments" title="Permalink to this heading">#</a></h2>
-<p>본 HyperDreamBooth는 Stable Diffusion v1.5 을 활용하여 구현했습니다. 이 모델에서는 Stable Diffusion v1.5의 다양한 요소 중 하나인 diffusion UNet의 cross and self-attention 레이어에 대한 LoRA 가중치를 예측합니다. 또한 텍스트 정보를 활용하기 위해 CLIP 텍스트 인코더도 예측합니다. 이미지 생성 모델을 개인화하기 위해 시각화에 사용되는 모든 얼굴 이미지는 SFHQ(Synthetic Face Headquarters) 데이터셋을 활용했습니다. 모델을 훈련시키기 위해 CelebA-HQ 데이터셋에서 15,000개의 실제 얼굴 이미지가 활용되었습니다.</p>
-<figure class="align-default" id="id6">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/hyperdreambooth_06.png"><img alt="hyperdreambooth_01" class="bg-primary mb-1" src="../../_images/hyperdreambooth_06.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 299 </span><span class="caption-text">Result Gallery</span><a class="headerlink" href="#id6" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>왼쪽 위에서 오른쪽 아래로 “인스타그램 셀카 [V] 얼굴”, “Pixar 캐릭터 [V] 얼굴”, “bark skin의 [V] 얼굴”, “록 스타 [V] 얼굴”, 가장 오른쪽: ” 전문적인 [V] 얼굴 촬영” 프롬프트를 활용했습니다.</p>
-<figure class="align-default" id="id7">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/hyperdreambooth_07.png"><img alt="hyperdreambooth_01" class="bg-primary mb-1" src="../../_images/hyperdreambooth_07.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 300 </span><span class="caption-text">Qualitative Comparison</span><a class="headerlink" href="#id7" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<figure class="align-default" id="id8">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/hyperdreambooth_08.png"><img alt="hyperdreambooth_01" class="bg-primary mb-1" src="../../_images/hyperdreambooth_08.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 301 </span><span class="caption-text">Comparisons Table</span><a class="headerlink" href="#id8" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</section>
-<section id="comparisons">
-<h2>Comparisons<a class="headerlink" href="#comparisons" title="Permalink to this heading">#</a></h2>
-<p>Hyperdreambooth, DreamBooth 그리고 Textual Inversion의 무작위 생성된 샘플을 비교한 이미지와 표입니다. 정량적 평가를 위해 DINO와 같은 지표를 활용했습니다.</p>
-<figure class="align-default" id="id9">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/hyperdreambooth_09.png"><img alt="hyperdreambooth_01" class="bg-primary mb-1" src="../../_images/hyperdreambooth_09.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 302 </span><span class="caption-text">Comparisons with DreamBooth</span><a class="headerlink" href="#id9" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>위 표는 DreamBooth와 비교하는 부분입니다. DreamBooth의 hyperparameter를 다르게 조정하여 비교했습니다. 그 결과 학습률을 증가시키고 반복 횟수(iterations)를 감소시키면 결과의 저하가 있었습니다. DreamBooth-Agg-1은 400번의 반복을 시행하고, DreamBooth-Agg-2는 일반적인 Dreambooth의 1200번 대신 40번의 반복을 사용했습니다.</p>
-<figure class="align-default" id="id10">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/hyperdreambooth_10.png"><img alt="hyperdreambooth_01" class="bg-primary mb-1" src="../../_images/hyperdreambooth_10.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 303 </span><span class="caption-text">HyperNetwork Ablation</span><a class="headerlink" href="#id10" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>위 부분은 여러 가지 구성 요소로 나누어 실험한 표입니다. 실험 중에는 하이퍼네트워크를 사용하지 않는 경우, 하이퍼네트워크 예측만 사용하고 fast-finetuning을 사용하지 않은 경우, 반복 예측 없이 전체 방법을 1번만 사용한 경우를 비교합니다. 결과적으로 전체 방법이 모든 신뢰성 지표에서 가장 우수한 결과를 달성한다는 것을 보여주고 있습니다.</p>
-<figure class="align-default" id="id11">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/hyperdreambooth_11.png"><img alt="hyperdreambooth_01" class="bg-primary mb-1" src="../../_images/hyperdreambooth_11.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 304 </span><span class="caption-text">User Study</span><a class="headerlink" href="#id11" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>얼굴 인식 메트릭 이 특정 시나리오에서 상대적으로 약하다고 합니다. 얼굴 인식 네트워크가 실제 이미지에만 훈련되어 있고 다양한 스타일에서 동일한 사람을 인식하도록 훈련되어 있지 않기 때문이라고 주장하며 이를 보완하기 위해 user study를 진행했습니다. 여기서도 HyperDreamBooth, DreamBooth, Textual Inversion을 비교하고 사용자들의 평가를 받았습니다.</p>
-</section>
-<section id="follow-ups">
-<h2>Follow-ups<a class="headerlink" href="#follow-ups" title="Permalink to this heading">#</a></h2>
-<p>하지만 여전히 follow-ups가 존재합니다. 먼저 <strong>semantic directional error</strong> 라고 하는 초기 예측에서 잘못된 시맨틱 정보가 나올 수 있는 에러입니다. 잘못된 눈 색깔이나 헤어 타입, 성별 등이 나올 수 있습니다. 다음으로 <strong>incorrect subject detail capture</strong> 라는 오류가 있습니다. 다음은 <strong>underfitting</strong> 입니다. Fast finetuning 단계에서 identity는 지켜지더라도 유사하지 않은 샘플이 생성될 수 있습니다. 다음으로 HyperNetwork와 fast-finetuning 모두 일부 스타일에 대해 낮은 editability 가 나올 수 있습니다. 이러한 문제점은 빛, 포즈 등으로 인해 OOD인 샘플에서 나타날 수 있습니다.</p>
-</section>
-<section id="conclusion">
-<h2>Conclusion<a class="headerlink" href="#conclusion" title="Permalink to this heading">#</a></h2>
-<p>본 연구에서는 HyperDreamBooth라는 새로운 방법을 소개했습니다. 이 방법은 텍스트에서 이미지로 변환하는 diffusion model을 빠르고 가벼운 방식으로 개인화하는 것을 목표로 합니다. HyperDreamBooth는 HyperNetwork라는 구성 요소를 활용하여 diffusion model의 가벼운 파라미터인 LiDB(Lightweight DreamBooth)파라미터를 생성하며, 이어서 DreamBooth 및 기타 최적화 기반 개인화 작업에 비해 크기와 속도를 상당히 줄이면서 fast rank-relaxed fine tuning을 수행합니다. 이를 통해 모델의 무결성을 유지하면서 다양한 스타일과 의미적 수정이 적용된 다양한 고품질 이미지를 생성할 수 있음을 입증하였습니다.</p>
-</section>
-</section>
-
-    <script type="text/x-thebe-config">
-    {
-        requestKernel: true,
-        binderOptions: {
-            repo: "binder-examples/jupyter-stacks-datascience",
-            ref: "master",
-        },
-        codeMirrorConfig: {
-            theme: "abcdef",
-            mode: "python"
-        },
-        kernelOptions: {
-            name: "python3",
-            path: "./docs/review"
-        },
-        predefinedOutput: true
-    }
-    </script>
-    <script>kernelName = 'python3'</script>
-
-                </article>
-              
-
-              
-              
-              
-              
-                <footer class="prev-next-footer d-print-none">
-                  
-<div class="prev-next-area">
-    <a class="left-prev"
-       href="IP_Adapter.html"
-       title="previous page">
-      <i class="fa-solid fa-angle-left"></i>
-      <div class="prev-next-info">
-        <p class="prev-next-subtitle">previous</p>
-        <p class="prev-next-title">IP-Adapter</p>
-      </div>
-    </a>
-    <a class="right-next"
-       href="CM3leon.html"
-       title="next page">
-      <div class="prev-next-info">
-        <p class="prev-next-subtitle">next</p>
-        <p class="prev-next-title">CM3leon</p>
-      </div>
-      <i class="fa-solid fa-angle-right"></i>
-    </a>
-</div>
-                </footer>
-              
-            </div>
-            
-            
-              
-                <div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
-
-
-  <div class="sidebar-secondary-item">
-  <div class="page-toc tocsection onthispage">
-    <i class="fa-solid fa-list"></i> Contents
-  </div>
-  <nav class="bd-toc-nav page-toc">
-    <ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">Introduction</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#contribution">Contribution</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#related-work">Related Work</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#prelimiaries">Prelimiaries</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#method">Method</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#lightweight-dreambooth-lidb">Lightweight DreamBooth (LiDB)</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#hypernetwork">HyperNetwork</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#rank-relaxed-fast-finetuning">Rank-Relaxed Fast Finetuning</a></li>
-</ul>
-</li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#experiments">Experiments</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#comparisons">Comparisons</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#follow-ups">Follow-ups</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#conclusion">Conclusion</a></li>
-</ul>
-  </nav></div>
-
-</div></div>
-              
-            
-          </div>
-          <footer class="bd-footer-content">
-            
-<div class="bd-footer-content__inner container">
-  
-  <div class="footer-item">
-    
-<p class="component-author">
-By PseudoLab
-</p>
-
-  </div>
-  
-  <div class="footer-item">
-    
-
-  <p class="copyright">
-    
-      © Copyright 2022.
-      <br/>
-    
-  </p>
-
-  </div>
-  
-  <div class="footer-item">
-    
-  </div>
-  
-  <div class="footer-item">
-    
-  </div>
-  
-</div>
-          </footer>
-        
-
-      </main>
-    </div>
-  </div>
-  
-  <!-- Scripts loaded after <body> so the DOM is not blocked -->
-  <script src="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b"></script>
-<script src="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b"></script>
-
-  <footer class="bd-footer">
-  </footer>
-  </body>
+
+
+<!DOCTYPE html>
+
+
+<html lang="en" data-content_root="" >
+
+  <head>
+    <meta charset="utf-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.17.1: http://docutils.sourceforge.net/" />
+
+    <title>HyperDreamBooth &#8212; Text-to-Image Generation-feat-Diffusion</title>
+  
+  
+  
+  <script data-cfasync="false">
+    document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
+    document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
+  </script>
+  
+  <!-- Loaded before other Sphinx assets -->
+  <link href="../../_static/styles/theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+<link href="../../_static/styles/bootstrap.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+<link href="../../_static/styles/pydata-sphinx-theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+
+  
+  <link href="../../_static/vendor/fontawesome/6.5.2/css/all.min.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+  <link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.woff2" />
+<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.woff2" />
+<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.woff2" />
+
+    <link rel="stylesheet" type="text/css" href="../../_static/pygments.css" />
+    <link rel="stylesheet" href="../../_static/styles/sphinx-book-theme.css?digest=14f4ca6b54d191a8c7657f6c759bf11a5fb86285" type="text/css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/togglebutton.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/mystnb.4510f1fc1dee50b3e5859aac5469c37c29e427902b24a333a5f9fcb2f0b3ac41.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/sphinx-thebe.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/design-style.4045f2051d55cab465a707391d5b2007.min.css" />
+  
+  <!-- Pre-loaded scripts that we'll load fully later -->
+  <link rel="preload" as="script" href="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b" />
+<link rel="preload" as="script" href="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b" />
+  <script src="../../_static/vendor/fontawesome/6.5.2/js/all.min.js?digest=dfe6caa3a7d634c4db9b"></script>
+
+    <script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
+    <script src="../../_static/jquery.js"></script>
+    <script src="../../_static/underscore.js"></script>
+    <script src="../../_static/_sphinx_javascript_frameworks_compat.js"></script>
+    <script src="../../_static/doctools.js"></script>
+    <script src="../../_static/clipboard.min.js"></script>
+    <script src="../../_static/copybutton.js"></script>
+    <script src="../../_static/scripts/sphinx-book-theme.js?digest=5a5c038af52cf7bc1a1ec88eea08e6366ee68824"></script>
+    <script>let toggleHintShow = 'Click to show';</script>
+    <script>let toggleHintHide = 'Click to hide';</script>
+    <script>let toggleOpenOnPrint = 'true';</script>
+    <script src="../../_static/togglebutton.js"></script>
+    <script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
+    <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
+    <script src="../../_static/design-tabs.js"></script>
+    <script>const THEBE_JS_URL = "https://unpkg.com/thebe@0.8.2/lib/index.js"
+const thebe_selector = ".thebe,.cell"
+const thebe_selector_input = "pre"
+const thebe_selector_output = ".output, .cell_output"
+</script>
+    <script async="async" src="../../_static/sphinx-thebe.js"></script>
+    <script>window.MathJax = {"options": {"processHtmlClass": "tex2jax_process|mathjax_process|math|output_area"}}</script>
+    <script defer="defer" src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
+    <script>DOCUMENTATION_OPTIONS.pagename = 'docs/review/HyperDreamBooth';</script>
+    <link rel="shortcut icon" href="../../_static/PseudoLab_logo.png"/>
+    <link rel="index" title="Index" href="../../genindex.html" />
+    <link rel="search" title="Search" href="../../search.html" />
+    <link rel="next" title="CM3leon" href="CM3leon.html" />
+    <link rel="prev" title="IP-Adapter" href="IP_Adapter.html" />
+  <meta name="viewport" content="width=device-width, initial-scale=1"/>
+  <meta name="docsearch:language" content="en"/>
+  </head>
+  
+  
+  <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
+
+  
+  
+  <div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
+  
+  <div id="pst-scroll-pixel-helper"></div>
+  
+  <button type="button" class="btn rounded-pill" id="pst-back-to-top">
+    <i class="fa-solid fa-arrow-up"></i>Back to top</button>
+
+  
+  <input type="checkbox"
+          class="sidebar-toggle"
+          id="pst-primary-sidebar-checkbox"/>
+  <label class="overlay overlay-primary" for="pst-primary-sidebar-checkbox"></label>
+  
+  <input type="checkbox"
+          class="sidebar-toggle"
+          id="pst-secondary-sidebar-checkbox"/>
+  <label class="overlay overlay-secondary" for="pst-secondary-sidebar-checkbox"></label>
+  
+  <div class="search-button__wrapper">
+    <div class="search-button__overlay"></div>
+    <div class="search-button__search-container">
+<form class="bd-search d-flex align-items-center"
+      action="../../search.html"
+      method="get">
+  <i class="fa-solid fa-magnifying-glass"></i>
+  <input type="search"
+         class="form-control"
+         name="q"
+         id="search-input"
+         placeholder="Search this book..."
+         aria-label="Search this book..."
+         autocomplete="off"
+         autocorrect="off"
+         autocapitalize="off"
+         spellcheck="false"/>
+  <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
+</form></div>
+  </div>
+
+  <div class="pst-async-banner-revealer d-none">
+  <aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
+</div>
+
+  
+    <header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
+    </header>
+  
+
+  <div class="bd-container">
+    <div class="bd-container__inner bd-page-width">
+      
+      
+      
+      <div class="bd-sidebar-primary bd-sidebar">
+        
+
+  
+  <div class="sidebar-header-items sidebar-primary__section">
+    
+    
+    
+    
+  </div>
+  
+    <div class="sidebar-primary-items__start sidebar-primary__section">
+        <div class="sidebar-primary-item">
+
+  
+    
+  
+
+<a class="navbar-brand logo" href="../../intro.html">
+  
+  
+  
+  
+  
+    
+    
+      
+    
+    
+    <img src="../../_static/PseudoLab_logo.png" class="logo__image only-light" alt="Text-to-Image Generation-feat-Diffusion - Home"/>
+    <script>document.write(`<img src="../../_static/PseudoLab_logo.png" class="logo__image only-dark" alt="Text-to-Image Generation-feat-Diffusion - Home"/>`);</script>
+  
+  
+</a></div>
+        <div class="sidebar-primary-item"><nav class="bd-links" id="bd-docs-nav" aria-label="Main">
+    <div class="bd-toc-item navbar-nav active">
+        
+        <ul class="nav bd-sidenav bd-sidenav__home-link">
+            <li class="toctree-l1">
+                <a class="reference internal" href="../../intro.html">
+                    Welcome to PseudoDiffusers!!
+                </a>
+            </li>
+        </ul>
+        <p aria-level="2" class="caption" role="heading"><span class="caption-text">Preliminary Works</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="vae.html">VAE</a></li>
+<li class="toctree-l1"><a class="reference internal" href="gan.html">GAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DDPM.html">DDPM</a></li>
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DDIM.html">DDIM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="A_Study_on_the_Evaluation_of_Generative_Models.html">A Study on the Evaluation of Generative Models</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Image Generation</span></p>
+<ul class="current nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="cycleGAN.html">CycleGAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="StyleGAN.html">StyleGAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="diffusion_beats_GANs.html">Diffusion Models Beat GANs on Image Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="dalle.html">DALL-E</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DALLE2.html">DALL-E 2</a></li>
+<li class="toctree-l1"><a class="reference internal" href="dreambooth.html">DreamBooth</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ControlNet.html">ControlNet</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Latent_Diffusion_Model.html">Introduction</a></li>
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Textual_Inversion.html">Textual Inversion</a></li>
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="CustomDiffusion.html">Custom Diffusion</a></li>
+<li class="toctree-l1"><a class="reference internal" href="LoRA.html">LoRA</a></li>
+
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="I-DDPM.html">I-DDPM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="StyO.html">StyO</a></li>
+<li class="toctree-l1"><a class="reference internal" href="imagen.html">Imagen</a></li>
+<li class="toctree-l1"><a class="reference internal" href="imagen_editor.html">Imagen Editor</a></li>
+<li class="toctree-l1"><a class="reference internal" href="SDEdit.html">SDEdit</a></li>
+<li class="toctree-l1"><a class="reference internal" href="SDXL.html">SDXL</a></li>
+<li class="toctree-l1"><a class="reference internal" href="t2i_adapter.html">T2I-Adapter</a></li>
+<li class="toctree-l1"><a class="reference internal" href="IP_Adapter.html">IP-Adapter</a></li>
+
+
+
+
+
+<li class="toctree-l1 current active"><a class="current reference internal" href="#">HyperDreamBooth</a></li>
+<li class="toctree-l1"><a class="reference internal" href="CM3leon.html">CM3leon</a></li>
+
+<li class="toctree-l1"><a class="reference internal" href="Synthetic_Data_from_Diffusion_Models_Improves_ImageNet_Classification.html">Synthetic Data from Diffusion Models Improves ImageNet Classification</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="GLIDE.html">GLIDE</a></li>
+<li class="toctree-l1"><a class="reference internal" href="BBDM.html">BBDM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Your_Diffusion_Model_is_Secretly_a_Zero_Shot_Classifier.html">Your Diffusion Model is Secretly a Zero-Shot Classifier</a></li>
+<li class="toctree-l1"><a class="reference internal" href="progressive_distillation.html">Progressive Distillation for Fast Sampling of Diffusion Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ConceptLab.html">ConceptLab</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Diffusion_models_already_have_a_Semantic_Latent_Space.html">Diffusion Models already have a Semantic Latent Space</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Muse.html">Muse</a></li>
+
+
+<li class="toctree-l1"><a class="reference internal" href="GIGAGAN.html">Scaling up GANs for Text-to-Image Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="consistency_models.html">Consistency Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="latent_consistency_models.html">Latent Consistency Models</a></li>
+
+<li class="toctree-l1"><a class="reference internal" href="LLM_grounded_Diffusion.html">LLM Grounded Diffusion</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DiT.html">DiT</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="one-step-image-translation.html">One-Step Image Translation with Text-to-Image Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="LCM-LoRA.html">LCM-LoRA: A Universal Stable-Diffusion Acceleration Module</a></li>
+
+
+<li class="toctree-l1"><a class="reference internal" href="MimicBrush.html">MimicBrush: Zero-shot Image Editing with Reference Imitation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="one_step_diffusion_with_distribution_matching_distillation.html">One-step Diffusion with Distribution Matching Distillation</a></li>
+
+
+
+
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Video Generation</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="Make_A_Video.html">Make A Video</a></li>
+<li class="toctree-l1"><a class="reference internal" href="VideoLDM.html">VideoLDM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="AnimateDiff.html">AnimateDiff</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Animate_Anyone.html">Animate Anyone</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreaMoving.html">DreaMoving</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreamPose.html">DreamPose: Fashion Image-to-Video Synthesis via Stable Diffusion</a></li>
+
+
+
+
+
+
+
+
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">3D Generation</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="NeRF.html">NeRF : Representing Scenes as Neural Radiance Fields for View Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="3DGS.html">3D Gaussian Splatting for Real-Time Radiance Field Rendering</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Point_E.html">Point-E: A System for Generating 3D Point Clouds from Complex Prompts (Arxiv 2022)</a></li>
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Shap-E.html">Shap-E</a></li>
+
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DreamFusion.html"><strong>DreamFusion</strong></a></li>
+<li class="toctree-l1"><a class="reference internal" href="magic-3d.html">Magic3D</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreamBooth3D.html">Dream Booth 3D</a></li>
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="zero123plus.html">Zero123++</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ProlificDreamer.html">ProlificDreamer: High-Fidelity and Diverse Text-to-3D Generation with Variational Score Distillation</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DreamGaussian.html">DreamGaussian</a></li>
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Coin3D.html">Coin3D</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Experiments</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="../experiments/js_exp.html">Synthetic Data with Stable Diffusion for Foliar Disease Classification</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../experiments/swjo_exp.html">Training DreamBooth on Naver Webtoon Face Dataset</a></li>
+</ul>
+
+    </div>
+</nav></div>
+    </div>
+  
+  
+  <div class="sidebar-primary-items__end sidebar-primary__section">
+  </div>
+  
+  <div id="rtd-footer-container"></div>
+
+
+      </div>
+      
+      <main id="main-content" class="bd-main" role="main">
+        
+        
+
+<div class="sbt-scroll-pixel-helper"></div>
+
+          <div class="bd-content">
+            <div class="bd-article-container">
+              
+              <div class="bd-header-article d-print-none">
+<div class="header-article-items header-article__inner">
+  
+    <div class="header-article-items__start">
+      
+        <div class="header-article-item"><label class="sidebar-toggle primary-toggle btn btn-sm" for="__primary" title="Toggle primary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
+  <span class="fa-solid fa-bars"></span>
+</label></div>
+      
+    </div>
+  
+  
+    <div class="header-article-items__end">
+      
+        <div class="header-article-item">
+
+<div class="article-header-buttons">
+
+
+
+
+
+<div class="dropdown dropdown-source-buttons">
+  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Source repositories">
+    <i class="fab fa-github"></i>
+  </button>
+  <ul class="dropdown-menu">
+      
+      
+      
+      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion" target="_blank"
+   class="btn btn-sm btn-source-repository-button dropdown-item"
+   title="Source repository"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fab fa-github"></i>
+  </span>
+<span class="btn__text-container">Repository</span>
+</a>
+</li>
+      
+      
+      
+      
+      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion/issues/new?title=Issue%20on%20page%20%2Fdocs/review/HyperDreamBooth.html&body=Your%20issue%20content%20here." target="_blank"
+   class="btn btn-sm btn-source-issues-button dropdown-item"
+   title="Open an issue"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-lightbulb"></i>
+  </span>
+<span class="btn__text-container">Open issue</span>
+</a>
+</li>
+      
+  </ul>
+</div>
+
+
+
+
+
+
+<div class="dropdown dropdown-download-buttons">
+  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Download this page">
+    <i class="fas fa-download"></i>
+  </button>
+  <ul class="dropdown-menu">
+      
+      
+      
+      <li><a href="../../_sources/docs/review/HyperDreamBooth.md" target="_blank"
+   class="btn btn-sm btn-download-source-button dropdown-item"
+   title="Download source file"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-file"></i>
+  </span>
+<span class="btn__text-container">.md</span>
+</a>
+</li>
+      
+      
+      
+      
+      <li>
+<button onclick="window.print()"
+  class="btn btn-sm btn-download-pdf-button dropdown-item"
+  title="Print to PDF"
+  data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-file-pdf"></i>
+  </span>
+<span class="btn__text-container">.pdf</span>
+</button>
+</li>
+      
+  </ul>
+</div>
+
+
+
+
+<button onclick="toggleFullScreen()"
+  class="btn btn-sm btn-fullscreen-button"
+  title="Fullscreen mode"
+  data-bs-placement="bottom" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-expand"></i>
+  </span>
+
+</button>
+
+
+
+<script>
+document.write(`
+  <button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light"></i>
+    <i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark"></i>
+    <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"></i>
+  </button>
+`);
+</script>
+
+
+<script>
+document.write(`
+  <button class="btn btn-sm pst-navbar-icon search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <i class="fa-solid fa-magnifying-glass fa-lg"></i>
+  </button>
+`);
+</script>
+<label class="sidebar-toggle secondary-toggle btn btn-sm" for="__secondary"title="Toggle secondary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <span class="fa-solid fa-list"></span>
+</label>
+</div></div>
+      
+    </div>
+  
+</div>
+</div>
+              
+              
+
+<div id="jb-print-docs-body" class="onlyprint">
+    <h1>HyperDreamBooth</h1>
+    <!-- Table of contents -->
+    <div id="print-main-content">
+        <div id="jb-print-toc">
+            
+            <div>
+                <h2> Contents </h2>
+            </div>
+            <nav aria-label="Page">
+                <ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">Introduction</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#contribution">Contribution</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#related-work">Related Work</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#prelimiaries">Prelimiaries</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#method">Method</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#lightweight-dreambooth-lidb">Lightweight DreamBooth (LiDB)</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#hypernetwork">HyperNetwork</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#rank-relaxed-fast-finetuning">Rank-Relaxed Fast Finetuning</a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#experiments">Experiments</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#comparisons">Comparisons</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#follow-ups">Follow-ups</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#conclusion">Conclusion</a></li>
+</ul>
+            </nav>
+        </div>
+    </div>
+</div>
+
+              
+                
+<div id="searchbox"></div>
+                <article class="bd-article">
+                  
+  <div class="admonition-information admonition">
+<p class="admonition-title">Information</p>
+<ul class="simple">
+<li><p><strong>Title:</strong> HyperDreamBooth: HyperNetworks for Fast Personalization of Text-to-Image Models</p></li>
+<li><p><strong>Reference</strong></p>
+<ul>
+<li><p>Paper: <a class="reference external" href="https://arxiv.org/pdf/2307.06949.pdf">https://arxiv.org/pdf/2307.06949.pdf</a></p></li>
+</ul>
+</li>
+<li><p><strong>Author:</strong> Hyoungseo Cho</p></li>
+<li><p><strong>Last updated on Oct. 10, 2023</strong></p></li>
+</ul>
+</div>
+<section class="tex2jax_ignore mathjax_ignore" id="hyperdreambooth">
+<h1>HyperDreamBooth<a class="headerlink" href="#hyperdreambooth" title="Permalink to this heading">#</a></h1>
+<section id="introduction">
+<h2>Introduction<a class="headerlink" href="#introduction" title="Permalink to this heading">#</a></h2>
+<p>Personalization 는 Generative AI 분야에서 떠오르고 있는 주제입니다. 이는 high-fidelity와 identity를 유지한 상태로 다양한 맥락과 스타일을 생성할 수 있도록 합니다. 본 논문은 <a class="reference external" href="https://pseudo-lab.github.io/text-to-image-generation-feat-diffusion/docs/review/dreambooth.html">Dreambooth</a> 를 기반으로 진행되었기 때문에 <a class="reference external" href="https://pseudo-lab.github.io/text-to-image-generation-feat-diffusion/docs/review/dreambooth.html">Dreambooth</a> 논문을 먼저 읽어 보시기를 추천드립니다.</p>
+<figure class="align-default" id="id1">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/hyperdreambooth_01.png"><img alt="hyperdreambooth_01" class="bg-primary mb-1" src="../../_images/hyperdreambooth_01.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 294 </span><span class="caption-text">HyperDreamBooth</span><a class="headerlink" href="#id1" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</section>
+<section id="contribution">
+<h2>Contribution<a class="headerlink" href="#contribution" title="Permalink to this heading">#</a></h2>
+<p>본 논문의 Contribution은 크게 3가지로 볼 수 있습니다. Lighweight DreamBooth (LiDB), New HyperNetwork architecture 그리고 rank-relaxed finetuning 입니다. 위 3가지 방법을 활용하여 기존 DreamBooth의 핵심 능력을 유지하면서 크기를 줄이고 속도를 높일 수 있었습니다.</p>
+</section>
+<section id="related-work">
+<h2>Related Work<a class="headerlink" href="#related-work" title="Permalink to this heading">#</a></h2>
+<p><strong>Text-to-image Models</strong> <br/>
+본 논문에서는 Stable Diffusion 모델을 활용하여 HyperDreamBooth를 구현했지만, 이 부분은 다른 텍스트-이미지 모델 (Imagen, DALL-E2 등) 도 적용이 가능합니다.</p>
+<p><strong>Personalization of Generative Models</strong> <br/>
+Generative Adversarial Network 기반의 기술들은 fidelity가 떨어지거나 다양한 문맥을 제공하지 못하는 문제가 있습니다. 이에 따라 HyperNetwork를 도입한 연구를 진행했습니다.</p>
+<p><strong>T2I Personalization via Finetuning</strong> <br/>
+다음으로, text-to-image personalization을 위한 Finetuning에 대한 연구가 있습니다. CustomDiffusion, SVDiff, LoRA, StyleDrop, DreamArtist 등의 예시가 있습니다. 하지만 이는 속도 측면에서 느리다는 단점을 가지고 있습니다.</p>
+<p>이러한 관련 연구들을 볼 때, HyperDreamBooth는 속도와 효율성 측면에서 큰 발전을 이루었다고 볼 수 있습니다.</p>
+<figure class="align-default" id="id2">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/hyperdreambooth_02.png"><img alt="hyperdreambooth_01" class="bg-primary mb-1" src="../../_images/hyperdreambooth_02.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 295 </span><span class="caption-text">HyperDreamBooth Training and Fast Fine-Tuning</span><a class="headerlink" href="#id2" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</section>
+<section id="prelimiaries">
+<h2>Prelimiaries<a class="headerlink" href="#prelimiaries" title="Permalink to this heading">#</a></h2>
+<p><strong>Latent Diffusion Models (LDM)</strong> <br/>
+본 논문에서는 Stable Diffusion 모델을 활용하여 HyperDreamBooth를 구현했지만, 이 부분은 다른 텍스트-이미지 모델 (Imagen, DALL-E2 등) 도 적용이 가능합니다.</p>
+<p><strong>DreamBooth</strong> <br/>
+이전에 나온 DreamBooth는 특정 주제의 이미지를 생성하기 위해 T2I denoising 네트워크를 finetuning하는 전략을 활용했습니다. 이 방법은 HyperDreamBooth의 영감원 중 하나로 활용되었습니다.</p>
+<p><strong>Low Rank Adaptation (LoRA)</strong> <br/>
+LoRA는 모델의 가중치를 낮은 랭크의 행렬로 근사화하여 모델의 크기와 복잡성을 줄이는 방법입니다. 본 논문에서는 이 LoRA 기술을 활용하여 더 빠르고 효율적인 personalization이 가능하도록 합니다.</p>
+</section>
+<section id="method">
+<h2>Method<a class="headerlink" href="#method" title="Permalink to this heading">#</a></h2>
+<p>위에서 살펴 본 Contribution의 내용을 자세히 살펴보도록 하겠습니다.</p>
+<section id="lightweight-dreambooth-lidb">
+<h3>Lightweight DreamBooth (LiDB)<a class="headerlink" href="#lightweight-dreambooth-lidb" title="Permalink to this heading">#</a></h3>
+<p>HyperdreamBooth 의 핵심 기술 중 하나인 Lightweight DreamBooth, 줄여서 LiDB에 대해 설명드리겠습니다. LiDB는 rank-1 LoRA residuals의 가중치 공간을 더 세분화하는 것이 핵심 아이디어입니다. 분해 과정에서 rank-1 LoRA weight-space 내에서 random orthogonal basis를 활용하여 decompose 합니다.</p>
+<figure class="align-default" id="id3">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/hyperdreambooth_03.png"><img alt="hyperdreambooth_01" class="bg-primary mb-1" src="../../_images/hyperdreambooth_03.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 296 </span><span class="caption-text">Lightweight DreamBooth</span><a class="headerlink" href="#id3" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>이 접근 방식은 LoRA의 A와 B 행렬을 각각 두 개의 행렬로 분해하는 것으로도 이해할 수 있습니다. 더 구체적으로 살펴보면, A 행렬은 <span class="math notranslate nohighlight">\(A_{aux}\)</span> 와 <span class="math notranslate nohighlight">\(A_{train}\)</span> 으로 분해되며, B 행렬은 <span class="math notranslate nohighlight">\(B_{aux}\)</span> 와 <span class="math notranslate nohighlight">\(B_{train}\)</span> 으로 분해할 수 있습니다. 여기서 <span class="math notranslate nohighlight">\(aux\)</span> 레이어는 행별로 직교하는 벡터로 무작위 초기화되고 freeze 되어 있으며, <span class="math notranslate nohighlight">\(train\)</span> 레이어는 학습되는 가중치입니다. 따라서 LiDB 선형 레이어의 weight-residual은 다음과 같이 표현할 수 있습니다.</p>
+<div class="math notranslate nohighlight">
+\[
+\Delta W_x = A_{aux} A_{train} B_{train} B_{aux}
+\]</div>
+<p>여기서 <span class="math notranslate nohighlight">\(aux\)</span> 레이어는 experimentally fix 되었으며 이 과정을 통해 trainable parameter 개수는 약 30K개, 사이즈는 약 120KB로 경량화 할 수 있습니다. 이렇게 작은 크기와 변수만으로 fidelity, editability, style 그리고 diversity 등을 유지할 수 있다는 것이 포인트입니다.</p>
+</section>
+<section id="hypernetwork">
+<h3>HyperNetwork<a class="headerlink" href="#hypernetwork" title="Permalink to this heading">#</a></h3>
+<figure class="align-default" id="id4">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/hyperdreambooth_04.png"><img alt="hyperdreambooth_01" class="bg-primary mb-1" src="../../_images/hyperdreambooth_04.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 297 </span><span class="caption-text">HyperNetwork Architecture</span><a class="headerlink" href="#id4" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>다음은 Hypernetwork 입니다. 본 논문에서는 사전에 훈련된 T2I 모델을 빠르게 personalization 하기 위해 HyperNetwork를 제안합니다. 여기서 <span class="math notranslate nohighlight">\(\tilde{\theta}\)</span> 는 모든 LiDB residual 행렬을 나타내며, 각 T2I 모델의 cross-attention 및 self-attention 레이어에 대한 <span class="math notranslate nohighlight">\(A_{train}\)</span> 및 <span class="math notranslate nohighlight">\(B_{train}\)</span> 입니다. 이 핵심 아이디어는 주어진 이미지 x를 입력으로 받고, 이 이미지를 사용하여 LiDB의 low-rank residual인 <span class="math notranslate nohighlight">\(\hat{\theta}\)</span> 을 예측하는 HyperNetwork <span class="math notranslate nohighlight">\(H_{\eta}\)</span> 를 돌입하는 것입니다.HyperNetwork는 도메인 특화 이미지 데이터셋에서 훈련되며, 일반적인 확산 노이즈 제거 손실과 가중치 공간 손실을 가지고 있습니다.</p>
+<div class="math notranslate nohighlight">
+\[
+L(x) = \alpha \left\| D_\hat{\theta}  (x + {\epsilon} , c) - x \right\|_{2}^{2} + \beta \left\|\hat{\theta}  -  {\theta} \right\|_{2}^{2}
+\]</div>
+<p>여기서 <span class="math notranslate nohighlight">\(x\)</span> 는 reference image를 의미합니다. HyperDreamBooth의 목표는 주어진 참조 이미지 x를 기반으로 해당 이미지와 유사한 새로운 이미지를 생성하는 것입니다. <span class="math notranslate nohighlight">\(\theta\)</span> 는 <span class="math notranslate nohighlight">\(x\)</span> 에 대한 pre-optimized 된 가중치 paramters입니다. 이러한 가중치는 HyperDreamBooth 모델을 personalization 하기 위해 이미지 <span class="math notranslate nohighlight">\(x\)</span> 와 관련된 텍스트와 함께 조정됩니다. <span class="math notranslate nohighlight">\(D_{\theta}\)</span> 는 diffusion model을 나타냅니다. 이 모델은 이미지 <span class="math notranslate nohighlight">\(x + \epsilon\)</span> 및 Supervisory Text Prompt <span class="math notranslate nohighlight">\(c\)</span> 로 조건이 설정된 상태에서 사용됩니다. 이 모델은 이미지 생성 및 개인화에 사용됩니다. <span class="math notranslate nohighlight">\(\alpha\)</span> 와 <span class="math notranslate nohighlight">\(\beta\)</span> 는 상대적인 loss의 가중치를 제어하기 위한 hyperparameters 입니다. 이러한 hyperparameters 는 각 loss 항목의 중요성을 조절하는 데 사용됩니다.</p>
+<p><strong>Supervisory Text Prompt</strong> <br/>
+Supervisory Text Prompt는 이미지 생성을 지원하기 위한 텍스트 입력입니다. 주어진 텍스트 프롬프트는 이미지 생성에 대한 지시사항 또는 조건을 제공합니다. HyperDreamBooth에서는 “a [V] face” 와 같은 텍스트 프롬프트를 사용하여 개인화된 이미지를 생성합니다. [V] 는 드물지만 다양한 의미 수정을 삽입할 수 있는 역할을 합니다.</p>
+<p><strong>HyperNetwork Architecture</strong> <br/>
+HyperNetwork는 HyperDreamBooth에서 사용되는 모델로, 개인화된 이미지 생성을 위한 가중치를 예측하는 역할을 합니다. HyperNetwork는 보통 다른 신경망 구조로 구성되며, 주어진 이미지를 입력으로 받아서 T2I 모델의 가중치를 예측합니다. 이러한 개인화된 이미지 생성을 위한 핵심 구성 요소 중 하나입니다. 여기서 예측한 가중치를 이후 Stable Diffusion 모델의 가중치에 더하여 개인화를 실행합니다.</p>
+<p><strong>Iterative Prediction</strong> <br/>
+HyperDreamBooth에서 사용되는 HyperNetwork는 반복적 예측을 수행합니다. 이것은 HyperNetwork가 초기 예측을 한 후에도 추가 반복적인 예측 단계를 통해 결과를 개선하려고 시도하는 것을 의미합니다. 초기 HyperNetwork 예측은 방향성이 올바르고 대상과 얼굴과 유사한 semantic 특성을 생성하지만 미세만 세부 정보를 충분히 잡아내지 못할 수 있습니다. 따라서 반복적인 예측을 통해 초기 예측을 fine-tuning하고 더 나은 이미지를 생성합니다. 이 때에 image encoding은 단 한 번만 수행되며, 추출된 특징 f는 반복적인 예측 과정에서 사용됩니다.</p>
+<figure class="align-default" id="id5">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/hyperdreambooth_05.png"><img alt="hyperdreambooth_01" class="bg-primary mb-1" src="../../_images/hyperdreambooth_05.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 298 </span><span class="caption-text">HyperNetwork + Fast Finetuning</span><a class="headerlink" href="#id5" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</section>
+<section id="rank-relaxed-fast-finetuning">
+<h3>Rank-Relaxed Fast Finetuning<a class="headerlink" href="#rank-relaxed-fast-finetuning" title="Permalink to this heading">#</a></h3>
+<p>초기 HyperNetwork를 실행하고 나면 semantic 속성과 방향성에 대해서 올바르게 생성이 되지만 세부적인 detail은 잘 잡아내지 못합니다. 이를 위해 마지막으로 fast finetuning 단계를 제안합니다. 이 단계를 통해, DreamBooth보다 훨씬 빠르지만 강한 subject fidelity, editability 그리고 style diversity를 동일하게 유지할 수 있습니다.
+먼저 HyperNetwork를 사용하여 개인화된 diffusion model 가중치를 예측합니다. 이후 diffusion model의 가중치를 초기화된 이미지 x와 함께 주어진 텍스트 지시어 c에 대한 diffusion noise loss <span class="math notranslate nohighlight">\(L(x)\)</span> 를 최소화하도록 조정합니다. 여기서 주요한 점은 <em><strong>rank-relaxed</strong></em> 의 개념입니다. 이것은 초기 모델의 rank(주로 1)를 완화하여 더 높은 rank로 LoRA 모델을 fine tuning 하는 것을 의미합니다. 구체적으로, HyperNetwork의 예측된 가중치 모델의 전체 가중치에 추가하고 더 높은 rank로 LoRA fine tuning을 수행합니다. 이를 통해 모델은 주체의 고주파수 세부 사항을 더 잘 근사화할 수 있으며 이로 인해 다른 낮은 rank로 제한된 업데이트보다 더 높은 주제 충실도를 달성할 수 있습니다. 이러한 rank-relaxed의 개념은 HyperDreamBooth를 다른 방식보다 더 우수하게 만드는 요인입니다. 여기서도 동일한 Supervisory Text Prompt “a [V] face” 를 사용하는데 이 프롬프트는 이미지 개인화를 지원하며 모델이 얼굴에 관련된 다양한 특성과 스타일을 캡처하는 데 도움이 됩니다. 그리고 HyperNetwork의 초기화된 가중치를 고려할 때, fast finetuning 단계를 40번의 반복으로 완료할 수 있습니다. 이는 DreamBooth 및 LoRA DreamBooth와 비교했을 때 25배 빠른 속도라는 것을 의미합니다.</p>
+</section>
+</section>
+<section id="experiments">
+<h2>Experiments<a class="headerlink" href="#experiments" title="Permalink to this heading">#</a></h2>
+<p>본 HyperDreamBooth는 Stable Diffusion v1.5 을 활용하여 구현했습니다. 이 모델에서는 Stable Diffusion v1.5의 다양한 요소 중 하나인 diffusion UNet의 cross and self-attention 레이어에 대한 LoRA 가중치를 예측합니다. 또한 텍스트 정보를 활용하기 위해 CLIP 텍스트 인코더도 예측합니다. 이미지 생성 모델을 개인화하기 위해 시각화에 사용되는 모든 얼굴 이미지는 SFHQ(Synthetic Face Headquarters) 데이터셋을 활용했습니다. 모델을 훈련시키기 위해 CelebA-HQ 데이터셋에서 15,000개의 실제 얼굴 이미지가 활용되었습니다.</p>
+<figure class="align-default" id="id6">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/hyperdreambooth_06.png"><img alt="hyperdreambooth_01" class="bg-primary mb-1" src="../../_images/hyperdreambooth_06.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 299 </span><span class="caption-text">Result Gallery</span><a class="headerlink" href="#id6" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>왼쪽 위에서 오른쪽 아래로 “인스타그램 셀카 [V] 얼굴”, “Pixar 캐릭터 [V] 얼굴”, “bark skin의 [V] 얼굴”, “록 스타 [V] 얼굴”, 가장 오른쪽: ” 전문적인 [V] 얼굴 촬영” 프롬프트를 활용했습니다.</p>
+<figure class="align-default" id="id7">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/hyperdreambooth_07.png"><img alt="hyperdreambooth_01" class="bg-primary mb-1" src="../../_images/hyperdreambooth_07.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 300 </span><span class="caption-text">Qualitative Comparison</span><a class="headerlink" href="#id7" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<figure class="align-default" id="id8">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/hyperdreambooth_08.png"><img alt="hyperdreambooth_01" class="bg-primary mb-1" src="../../_images/hyperdreambooth_08.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 301 </span><span class="caption-text">Comparisons Table</span><a class="headerlink" href="#id8" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</section>
+<section id="comparisons">
+<h2>Comparisons<a class="headerlink" href="#comparisons" title="Permalink to this heading">#</a></h2>
+<p>Hyperdreambooth, DreamBooth 그리고 Textual Inversion의 무작위 생성된 샘플을 비교한 이미지와 표입니다. 정량적 평가를 위해 DINO와 같은 지표를 활용했습니다.</p>
+<figure class="align-default" id="id9">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/hyperdreambooth_09.png"><img alt="hyperdreambooth_01" class="bg-primary mb-1" src="../../_images/hyperdreambooth_09.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 302 </span><span class="caption-text">Comparisons with DreamBooth</span><a class="headerlink" href="#id9" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>위 표는 DreamBooth와 비교하는 부분입니다. DreamBooth의 hyperparameter를 다르게 조정하여 비교했습니다. 그 결과 학습률을 증가시키고 반복 횟수(iterations)를 감소시키면 결과의 저하가 있었습니다. DreamBooth-Agg-1은 400번의 반복을 시행하고, DreamBooth-Agg-2는 일반적인 Dreambooth의 1200번 대신 40번의 반복을 사용했습니다.</p>
+<figure class="align-default" id="id10">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/hyperdreambooth_10.png"><img alt="hyperdreambooth_01" class="bg-primary mb-1" src="../../_images/hyperdreambooth_10.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 303 </span><span class="caption-text">HyperNetwork Ablation</span><a class="headerlink" href="#id10" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>위 부분은 여러 가지 구성 요소로 나누어 실험한 표입니다. 실험 중에는 하이퍼네트워크를 사용하지 않는 경우, 하이퍼네트워크 예측만 사용하고 fast-finetuning을 사용하지 않은 경우, 반복 예측 없이 전체 방법을 1번만 사용한 경우를 비교합니다. 결과적으로 전체 방법이 모든 신뢰성 지표에서 가장 우수한 결과를 달성한다는 것을 보여주고 있습니다.</p>
+<figure class="align-default" id="id11">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/hyperdreambooth_11.png"><img alt="hyperdreambooth_01" class="bg-primary mb-1" src="../../_images/hyperdreambooth_11.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 304 </span><span class="caption-text">User Study</span><a class="headerlink" href="#id11" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>얼굴 인식 메트릭 이 특정 시나리오에서 상대적으로 약하다고 합니다. 얼굴 인식 네트워크가 실제 이미지에만 훈련되어 있고 다양한 스타일에서 동일한 사람을 인식하도록 훈련되어 있지 않기 때문이라고 주장하며 이를 보완하기 위해 user study를 진행했습니다. 여기서도 HyperDreamBooth, DreamBooth, Textual Inversion을 비교하고 사용자들의 평가를 받았습니다.</p>
+</section>
+<section id="follow-ups">
+<h2>Follow-ups<a class="headerlink" href="#follow-ups" title="Permalink to this heading">#</a></h2>
+<p>하지만 여전히 follow-ups가 존재합니다. 먼저 <strong>semantic directional error</strong> 라고 하는 초기 예측에서 잘못된 시맨틱 정보가 나올 수 있는 에러입니다. 잘못된 눈 색깔이나 헤어 타입, 성별 등이 나올 수 있습니다. 다음으로 <strong>incorrect subject detail capture</strong> 라는 오류가 있습니다. 다음은 <strong>underfitting</strong> 입니다. Fast finetuning 단계에서 identity는 지켜지더라도 유사하지 않은 샘플이 생성될 수 있습니다. 다음으로 HyperNetwork와 fast-finetuning 모두 일부 스타일에 대해 낮은 editability 가 나올 수 있습니다. 이러한 문제점은 빛, 포즈 등으로 인해 OOD인 샘플에서 나타날 수 있습니다.</p>
+</section>
+<section id="conclusion">
+<h2>Conclusion<a class="headerlink" href="#conclusion" title="Permalink to this heading">#</a></h2>
+<p>본 연구에서는 HyperDreamBooth라는 새로운 방법을 소개했습니다. 이 방법은 텍스트에서 이미지로 변환하는 diffusion model을 빠르고 가벼운 방식으로 개인화하는 것을 목표로 합니다. HyperDreamBooth는 HyperNetwork라는 구성 요소를 활용하여 diffusion model의 가벼운 파라미터인 LiDB(Lightweight DreamBooth)파라미터를 생성하며, 이어서 DreamBooth 및 기타 최적화 기반 개인화 작업에 비해 크기와 속도를 상당히 줄이면서 fast rank-relaxed fine tuning을 수행합니다. 이를 통해 모델의 무결성을 유지하면서 다양한 스타일과 의미적 수정이 적용된 다양한 고품질 이미지를 생성할 수 있음을 입증하였습니다.</p>
+</section>
+</section>
+
+    <script type="text/x-thebe-config">
+    {
+        requestKernel: true,
+        binderOptions: {
+            repo: "binder-examples/jupyter-stacks-datascience",
+            ref: "master",
+        },
+        codeMirrorConfig: {
+            theme: "abcdef",
+            mode: "python"
+        },
+        kernelOptions: {
+            name: "python3",
+            path: "./docs\review"
+        },
+        predefinedOutput: true
+    }
+    </script>
+    <script>kernelName = 'python3'</script>
+
+                </article>
+              
+
+              
+              
+              
+              
+                <footer class="prev-next-footer d-print-none">
+                  
+<div class="prev-next-area">
+    <a class="left-prev"
+       href="IP_Adapter.html"
+       title="previous page">
+      <i class="fa-solid fa-angle-left"></i>
+      <div class="prev-next-info">
+        <p class="prev-next-subtitle">previous</p>
+        <p class="prev-next-title">IP-Adapter</p>
+      </div>
+    </a>
+    <a class="right-next"
+       href="CM3leon.html"
+       title="next page">
+      <div class="prev-next-info">
+        <p class="prev-next-subtitle">next</p>
+        <p class="prev-next-title">CM3leon</p>
+      </div>
+      <i class="fa-solid fa-angle-right"></i>
+    </a>
+</div>
+                </footer>
+              
+            </div>
+            
+            
+              
+                <div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
+
+
+  <div class="sidebar-secondary-item">
+  <div class="page-toc tocsection onthispage">
+    <i class="fa-solid fa-list"></i> Contents
+  </div>
+  <nav class="bd-toc-nav page-toc">
+    <ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">Introduction</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#contribution">Contribution</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#related-work">Related Work</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#prelimiaries">Prelimiaries</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#method">Method</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#lightweight-dreambooth-lidb">Lightweight DreamBooth (LiDB)</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#hypernetwork">HyperNetwork</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#rank-relaxed-fast-finetuning">Rank-Relaxed Fast Finetuning</a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#experiments">Experiments</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#comparisons">Comparisons</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#follow-ups">Follow-ups</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#conclusion">Conclusion</a></li>
+</ul>
+  </nav></div>
+
+</div></div>
+              
+            
+          </div>
+          <footer class="bd-footer-content">
+            
+<div class="bd-footer-content__inner container">
+  
+  <div class="footer-item">
+    
+<p class="component-author">
+By PseudoLab
+</p>
+
+  </div>
+  
+  <div class="footer-item">
+    
+
+  <p class="copyright">
+    
+      © Copyright 2022.
+      <br/>
+    
+  </p>
+
+  </div>
+  
+  <div class="footer-item">
+    
+  </div>
+  
+  <div class="footer-item">
+    
+  </div>
+  
+</div>
+          </footer>
+        
+
+      </main>
+    </div>
+  </div>
+  
+  <!-- Scripts loaded after <body> so the DOM is not blocked -->
+  <script src="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b"></script>
+<script src="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b"></script>
+
+  <footer class="bd-footer">
+  </footer>
+  </body>
 </html>
\ No newline at end of file
diff --git a/docs/review/I-DDPM.html b/docs/review/I-DDPM.html
old mode 100644
new mode 100755
index c3e91cf5..158a9237
--- a/docs/review/I-DDPM.html
+++ b/docs/review/I-DDPM.html
@@ -1,946 +1,966 @@
-
-
-<!DOCTYPE html>
-
-
-<html lang="en" data-content_root="" >
-
-  <head>
-    <meta charset="utf-8" />
-    <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.18.1: http://docutils.sourceforge.net/" />
-
-    <title>I-DDPM &#8212; Text-to-Image Generation-feat-Diffusion</title>
-  
-  
-  
-  <script data-cfasync="false">
-    document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
-    document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
-  </script>
-  
-  <!-- Loaded before other Sphinx assets -->
-  <link href="../../_static/styles/theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-<link href="../../_static/styles/bootstrap.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-<link href="../../_static/styles/pydata-sphinx-theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-
-  
-  <link href="../../_static/vendor/fontawesome/6.5.2/css/all.min.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-  <link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.woff2" />
-<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.woff2" />
-<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.woff2" />
-
-    <link rel="stylesheet" type="text/css" href="../../_static/pygments.css" />
-    <link rel="stylesheet" href="../../_static/styles/sphinx-book-theme.css?digest=14f4ca6b54d191a8c7657f6c759bf11a5fb86285" type="text/css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/togglebutton.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/mystnb.4510f1fc1dee50b3e5859aac5469c37c29e427902b24a333a5f9fcb2f0b3ac41.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/sphinx-thebe.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/design-style.4045f2051d55cab465a707391d5b2007.min.css" />
-  
-  <!-- Pre-loaded scripts that we'll load fully later -->
-  <link rel="preload" as="script" href="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b" />
-<link rel="preload" as="script" href="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b" />
-  <script src="../../_static/vendor/fontawesome/6.5.2/js/all.min.js?digest=dfe6caa3a7d634c4db9b"></script>
-
-    <script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
-    <script src="../../_static/jquery.js"></script>
-    <script src="../../_static/underscore.js"></script>
-    <script src="../../_static/_sphinx_javascript_frameworks_compat.js"></script>
-    <script src="../../_static/doctools.js"></script>
-    <script src="../../_static/clipboard.min.js"></script>
-    <script src="../../_static/copybutton.js"></script>
-    <script src="../../_static/scripts/sphinx-book-theme.js?digest=5a5c038af52cf7bc1a1ec88eea08e6366ee68824"></script>
-    <script>let toggleHintShow = 'Click to show';</script>
-    <script>let toggleHintHide = 'Click to hide';</script>
-    <script>let toggleOpenOnPrint = 'true';</script>
-    <script src="../../_static/togglebutton.js"></script>
-    <script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
-    <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
-    <script src="../../_static/design-tabs.js"></script>
-    <script>const THEBE_JS_URL = "https://unpkg.com/thebe@0.8.2/lib/index.js"
-const thebe_selector = ".thebe,.cell"
-const thebe_selector_input = "pre"
-const thebe_selector_output = ".output, .cell_output"
-</script>
-    <script async="async" src="../../_static/sphinx-thebe.js"></script>
-    <script>window.MathJax = {"options": {"processHtmlClass": "tex2jax_process|mathjax_process|math|output_area"}}</script>
-    <script defer="defer" src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
-    <script>DOCUMENTATION_OPTIONS.pagename = 'docs/review/I-DDPM';</script>
-    <link rel="shortcut icon" href="../../_static/PseudoLab_logo.png"/>
-    <link rel="index" title="Index" href="../../genindex.html" />
-    <link rel="search" title="Search" href="../../search.html" />
-    <link rel="next" title="StyO" href="StyO.html" />
-    <link rel="prev" title="LoRA" href="LoRA.html" />
-  <meta name="viewport" content="width=device-width, initial-scale=1"/>
-  <meta name="docsearch:language" content="en"/>
-  </head>
-  
-  
-  <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
-
-  
-  
-  <div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
-  
-  <div id="pst-scroll-pixel-helper"></div>
-  
-  <button type="button" class="btn rounded-pill" id="pst-back-to-top">
-    <i class="fa-solid fa-arrow-up"></i>Back to top</button>
-
-  
-  <input type="checkbox"
-          class="sidebar-toggle"
-          id="pst-primary-sidebar-checkbox"/>
-  <label class="overlay overlay-primary" for="pst-primary-sidebar-checkbox"></label>
-  
-  <input type="checkbox"
-          class="sidebar-toggle"
-          id="pst-secondary-sidebar-checkbox"/>
-  <label class="overlay overlay-secondary" for="pst-secondary-sidebar-checkbox"></label>
-  
-  <div class="search-button__wrapper">
-    <div class="search-button__overlay"></div>
-    <div class="search-button__search-container">
-<form class="bd-search d-flex align-items-center"
-      action="../../search.html"
-      method="get">
-  <i class="fa-solid fa-magnifying-glass"></i>
-  <input type="search"
-         class="form-control"
-         name="q"
-         id="search-input"
-         placeholder="Search this book..."
-         aria-label="Search this book..."
-         autocomplete="off"
-         autocorrect="off"
-         autocapitalize="off"
-         spellcheck="false"/>
-  <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
-</form></div>
-  </div>
-
-  <div class="pst-async-banner-revealer d-none">
-  <aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
-</div>
-
-  
-    <header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
-    </header>
-  
-
-  <div class="bd-container">
-    <div class="bd-container__inner bd-page-width">
-      
-      
-      
-      <div class="bd-sidebar-primary bd-sidebar">
-        
-
-  
-  <div class="sidebar-header-items sidebar-primary__section">
-    
-    
-    
-    
-  </div>
-  
-    <div class="sidebar-primary-items__start sidebar-primary__section">
-        <div class="sidebar-primary-item">
-
-  
-    
-  
-
-<a class="navbar-brand logo" href="../../intro.html">
-  
-  
-  
-  
-  
-    
-    
-      
-    
-    
-    <img src="../../_static/PseudoLab_logo.png" class="logo__image only-light" alt="Text-to-Image Generation-feat-Diffusion - Home"/>
-    <script>document.write(`<img src="../../_static/PseudoLab_logo.png" class="logo__image only-dark" alt="Text-to-Image Generation-feat-Diffusion - Home"/>`);</script>
-  
-  
-</a></div>
-        <div class="sidebar-primary-item"><nav class="bd-links" id="bd-docs-nav" aria-label="Main">
-    <div class="bd-toc-item navbar-nav active">
-        
-        <ul class="nav bd-sidenav bd-sidenav__home-link">
-            <li class="toctree-l1">
-                <a class="reference internal" href="../../intro.html">
-                    Welcome to PseudoDiffusers!!
-                </a>
-            </li>
-        </ul>
-        <p aria-level="2" class="caption" role="heading"><span class="caption-text">Preliminary Works</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="vae.html">VAE</a></li>
-<li class="toctree-l1"><a class="reference internal" href="gan.html">GAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DDPM.html">DDPM</a></li>
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="DDIM.html">DDIM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="A_Study_on_the_Evaluation_of_Generative_Models.html">A Study on the Evaluation of Generative Models</a></li>
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Image Generation</span></p>
-<ul class="current nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="cycleGAN.html">CycleGAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="StyleGAN.html">StyleGAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="diffusion_beats_GANs.html">Diffusion Models Beat GANs on Image Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="dalle.html">DALL-E</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DALLE2.html">DALL-E 2</a></li>
-<li class="toctree-l1"><a class="reference internal" href="dreambooth.html">DreamBooth</a></li>
-<li class="toctree-l1"><a class="reference internal" href="ControlNet.html">ControlNet</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Latent_Diffusion_Model.html">Introduction</a></li>
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="Textual_Inversion.html">Textual Inversion</a></li>
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="CustomDiffusion.html">Custom Diffusion</a></li>
-<li class="toctree-l1"><a class="reference internal" href="LoRA.html">LoRA</a></li>
-
-
-
-
-
-
-
-
-
-<li class="toctree-l1 current active"><a class="current reference internal" href="#">I-DDPM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="StyO.html">StyO</a></li>
-<li class="toctree-l1"><a class="reference internal" href="imagen.html">Imagen</a></li>
-<li class="toctree-l1"><a class="reference internal" href="imagen_editor.html">Imagen Editor</a></li>
-<li class="toctree-l1"><a class="reference internal" href="SDEdit.html">SDEdit</a></li>
-<li class="toctree-l1"><a class="reference internal" href="SDXL.html">SDXL</a></li>
-<li class="toctree-l1"><a class="reference internal" href="t2i_adapter.html">T2I-Adapter</a></li>
-<li class="toctree-l1"><a class="reference internal" href="IP_Adapter.html">IP-Adapter</a></li>
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="HyperDreamBooth.html">HyperDreamBooth</a></li>
-<li class="toctree-l1"><a class="reference internal" href="CM3leon.html">CM3leon</a></li>
-
-<li class="toctree-l1"><a class="reference internal" href="Synthetic_Data_from_Diffusion_Models_Improves_ImageNet_Classification.html">Synthetic Data from Diffusion Models Improves ImageNet Classification</a></li>
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="GLIDE.html">GLIDE</a></li>
-<li class="toctree-l1"><a class="reference internal" href="BBDM.html">BBDM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Your_Diffusion_Model_is_Secretly_a_Zero_Shot_Classifier.html">Your Diffusion Model is Secretly a Zero-Shot Classifier</a></li>
-<li class="toctree-l1"><a class="reference internal" href="progressive_distillation.html">Progressive Distillation for Fast Sampling of Diffusion Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="ConceptLab.html">ConceptLab</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Diffusion_models_already_have_a_Semantic_Latent_Space.html">Diffusion Models already have a Semantic Latent Space</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Muse.html">Muse</a></li>
-
-
-<li class="toctree-l1"><a class="reference internal" href="GIGAGAN.html">Scaling up GANs for Text-to-Image Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="consistency_models.html">Consistency Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="latent_consistency_models.html">Latent Consistency Models</a></li>
-
-<li class="toctree-l1"><a class="reference internal" href="LLM_grounded_Diffusion.html">LLM Grounded Diffusion</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DiT.html">DiT</a></li>
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="one-step-image-translation.html">One-Step Image Translation with Text-to-Image Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="LCM-LoRA.html">LCM-LoRA: A Universal Stable-Diffusion Acceleration Module</a></li>
-
-
-<li class="toctree-l1"><a class="reference internal" href="MimicBrush.html">MimicBrush: Zero-shot Image Editing with Reference Imitation</a></li>
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Video Generation</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="Make_A_Video.html">Make A Video</a></li>
-<li class="toctree-l1"><a class="reference internal" href="VideoLDM.html">VideoLDM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="AnimateDiff.html">AnimateDiff</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Animate_Anyone.html">Animate Anyone</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreaMoving.html">DreaMoving</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreamPose.html">DreamPose: Fashion Image-to-Video Synthesis via Stable Diffusion</a></li>
-
-
-
-
-
-
-
-
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">3D Generation</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="NeRF.html">NeRF : Representing Scenes as Neural Radiance Fields for View Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="3DGS.html">3D Gaussian Splatting for Real-Time Radiance Field Rendering</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Point_E.html">Point-E: A System for Generating 3D Point Clouds from Complex Prompts (Arxiv 2022)</a></li>
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="Shap-E.html">Shap-E</a></li>
-
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="DreamFusion.html"><strong>DreamFusion</strong></a></li>
-<li class="toctree-l1"><a class="reference internal" href="magic-3d.html">Magic3D</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreamBooth3D.html">Dream Booth 3D</a></li>
-
-
-
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Experiments</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="../experiments/js_exp.html">Synthetic Data with Stable Diffusion for Foliar Disease Classification</a></li>
-<li class="toctree-l1"><a class="reference internal" href="../experiments/swjo_exp.html">Training DreamBooth on Naver Webtoon Face Dataset</a></li>
-</ul>
-
-    </div>
-</nav></div>
-    </div>
-  
-  
-  <div class="sidebar-primary-items__end sidebar-primary__section">
-  </div>
-  
-  <div id="rtd-footer-container"></div>
-
-
-      </div>
-      
-      <main id="main-content" class="bd-main" role="main">
-        
-        
-
-<div class="sbt-scroll-pixel-helper"></div>
-
-          <div class="bd-content">
-            <div class="bd-article-container">
-              
-              <div class="bd-header-article d-print-none">
-<div class="header-article-items header-article__inner">
-  
-    <div class="header-article-items__start">
-      
-        <div class="header-article-item"><label class="sidebar-toggle primary-toggle btn btn-sm" for="__primary" title="Toggle primary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
-  <span class="fa-solid fa-bars"></span>
-</label></div>
-      
-    </div>
-  
-  
-    <div class="header-article-items__end">
-      
-        <div class="header-article-item">
-
-<div class="article-header-buttons">
-
-
-
-
-
-<div class="dropdown dropdown-source-buttons">
-  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Source repositories">
-    <i class="fab fa-github"></i>
-  </button>
-  <ul class="dropdown-menu">
-      
-      
-      
-      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion" target="_blank"
-   class="btn btn-sm btn-source-repository-button dropdown-item"
-   title="Source repository"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fab fa-github"></i>
-  </span>
-<span class="btn__text-container">Repository</span>
-</a>
-</li>
-      
-      
-      
-      
-      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion/issues/new?title=Issue%20on%20page%20%2Fdocs/review/I-DDPM.html&body=Your%20issue%20content%20here." target="_blank"
-   class="btn btn-sm btn-source-issues-button dropdown-item"
-   title="Open an issue"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-lightbulb"></i>
-  </span>
-<span class="btn__text-container">Open issue</span>
-</a>
-</li>
-      
-  </ul>
-</div>
-
-
-
-
-
-
-<div class="dropdown dropdown-download-buttons">
-  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Download this page">
-    <i class="fas fa-download"></i>
-  </button>
-  <ul class="dropdown-menu">
-      
-      
-      
-      <li><a href="../../_sources/docs/review/I-DDPM.md" target="_blank"
-   class="btn btn-sm btn-download-source-button dropdown-item"
-   title="Download source file"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-file"></i>
-  </span>
-<span class="btn__text-container">.md</span>
-</a>
-</li>
-      
-      
-      
-      
-      <li>
-<button onclick="window.print()"
-  class="btn btn-sm btn-download-pdf-button dropdown-item"
-  title="Print to PDF"
-  data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-file-pdf"></i>
-  </span>
-<span class="btn__text-container">.pdf</span>
-</button>
-</li>
-      
-  </ul>
-</div>
-
-
-
-
-<button onclick="toggleFullScreen()"
-  class="btn btn-sm btn-fullscreen-button"
-  title="Fullscreen mode"
-  data-bs-placement="bottom" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-expand"></i>
-  </span>
-
-</button>
-
-
-
-<script>
-document.write(`
-  <button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light"></i>
-    <i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark"></i>
-    <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"></i>
-  </button>
-`);
-</script>
-
-
-<script>
-document.write(`
-  <button class="btn btn-sm pst-navbar-icon search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <i class="fa-solid fa-magnifying-glass fa-lg"></i>
-  </button>
-`);
-</script>
-<label class="sidebar-toggle secondary-toggle btn btn-sm" for="__secondary"title="Toggle secondary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <span class="fa-solid fa-list"></span>
-</label>
-</div></div>
-      
-    </div>
-  
-</div>
-</div>
-              
-              
-
-<div id="jb-print-docs-body" class="onlyprint">
-    <h1>I-DDPM</h1>
-    <!-- Table of contents -->
-    <div id="print-main-content">
-        <div id="jb-print-toc">
-            
-            <div>
-                <h2> Contents </h2>
-            </div>
-            <nav aria-label="Page">
-                <ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#abstract">Abstract</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">1. Introduction</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#denoising-diffusion-probabilistic-models">2. Denoising Diffusion Probabilistic Models</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#improving-the-log-likelihood">3. Improving the Log-likelihood</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#improcing-sampling-speed">4. Improcing Sampling Speed</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#comparison-to-gans">5. Comparison to GANs</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#scaling-model-size">6. Scaling Model Size</a></li>
-</ul>
-            </nav>
-        </div>
-    </div>
-</div>
-
-              
-                
-<div id="searchbox"></div>
-                <article class="bd-article">
-                  
-  <div class="admonition-information admonition">
-<p class="admonition-title">Information</p>
-<ul class="simple">
-<li><p><strong>Title:</strong> Improved Denoising Diffusion Probabilistic Models (CVPR 2021)</p></li>
-<li><p><strong>Reference</strong></p>
-<ul>
-<li><p>Paper:  <a class="reference external" href="https://arxiv.org/abs/2102.09672">https://arxiv.org/abs/2102.09672</a></p></li>
-</ul>
-</li>
-<li><p><strong>Author:</strong> Seunghwan Ji</p></li>
-<li><p><strong>Last updated on Aug. 6, 2023</strong></p></li>
-</ul>
-</div>
-<section class="tex2jax_ignore mathjax_ignore" id="i-ddpm">
-<h1>I-DDPM<a class="headerlink" href="#i-ddpm" title="Permalink to this heading">#</a></h1>
-<section id="abstract">
-<h2>Abstract<a class="headerlink" href="#abstract" title="Permalink to this heading">#</a></h2>
-<ul class="simple">
-<li><p>DDPM을 약간 수정함으로써 High Quality를 유지하고, Log Likelihood수치도 개선할 수 있는 향상된 모델을 제안</p></li>
-<li><p>Sampling시 Base 보다 더 적은 Step으로 비슷한 퀄리티의 결과를 낼 수 있는 방법을 제안</p></li>
-<li><p>Model의 Scale과 Diffusion Step에 따른 Sample Quailty와 Likelihood 수치간의 관계를 연구</p></li>
-</ul>
-</section>
-<section id="introduction">
-<h2>1. Introduction<a class="headerlink" href="#introduction" title="Permalink to this heading">#</a></h2>
-<ul>
-<li><p>최근 DDPM(Ho et al.) 모델은 Generate 분야에서 High Quality의 이미지를 생성해내는 수준까지 왔다.</p></li>
-<li><p>하지만, Image의 Quality에 반해 log-likelihood 수치는 다른 generative 모델에비해 현저히 떨어졌다. (e.g. VAE)</p></li>
-<li><p>또 DDPM이 Diversity가 낮은 Dataset(CIFAR-10, LSUN)에서는 잘 동작했지만, High Diversity Dataset에서의 동작은 증명되지 못했다.</p></li>
-<li><p>I-DDPM에서는</p>
-<ol class="arabic simple">
-<li><p>Log-Likelihood 수치 개선</p></li>
-<li><p>ImageNet같은 Diversity가 높은 Dataset에서도 잘 동작</p></li>
-<li><p>Reverse Process에서의 Loss Term 개선</p></li>
-</ol>
-<p>한 모델을 제안하였다.</p>
-</li>
-<li><p>추가로 연구 과정 중, I-DDPM이 Base (DDPM) 모델에 비해 훨씬 더 적은 Step으로 비슷한 Quality를 내는 것을 확인</p></li>
-</ul>
-<p><strong>Log-Likelihood 값이 중요한 이유</strong></p>
-<ul class="simple">
-<li><p>기존 연구들에서 Loglikelihood 수치와 Sample의 Quality간의 연관성을 보이는 연구들이 많았다.</p>
-<ul>
-<li><p><em>Data의 Distribution에 대해 Model이 학습한 정도를 수치화한 느낌</em></p></li>
-</ul>
-</li>
-<li><p>수치가 좋아지면 Sample Quality도 따라 증가하는 경향을 보였다.</p></li>
-<li><p>따라서 DDPM에서도 LogLikelihood 수치를 개선한다면 Sample Quality도 따라서 더 증가할 가능성이 있지 않을까?</p></li>
-<li><p><a class="reference external" href="https://angeloyeo.github.io/2020/07/17/MLE.html">https://angeloyeo.github.io/2020/07/17/MLE.html</a></p></li>
-</ul>
-</section>
-<section id="denoising-diffusion-probabilistic-models">
-<h2>2. Denoising Diffusion Probabilistic Models<a class="headerlink" href="#denoising-diffusion-probabilistic-models" title="Permalink to this heading">#</a></h2>
-<p><strong>DDPM</strong></p>
-<ul>
-<li><p>Process</p>
-<ul>
-<li><p>Forward Process</p>
-<figure class="align-default" id="id1">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img03.png"><img alt="I-DDPM_00" class="bg-primary mb-1" src="../../_images/img03.png" style="width: 500px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 185 </span><span class="caption-text">Equation 1</span><a class="headerlink" href="#id1" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</li>
-<li><p>Reverse Process</p>
-<figure class="align-default" id="id2">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img19.png"><img alt="I-DDPM_01" class="bg-primary mb-1" src="../../_images/img19.png" style="width: 500px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 186 </span><span class="caption-text">Equation 2</span><a class="headerlink" href="#id2" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</li>
-</ul>
-</li>
-<li><p>Forward Process에서 입힌 Noise를 Neural Model의 Reverse Process로 예측하도록 학습하는 형태</p></li>
-<li><p>이 때 Noising &amp; Denoising에 관한 (Hyper) Parameter로 <span class="math notranslate nohighlight">\({B_{t}}\)</span>와 <span class="math notranslate nohighlight">\(\tilde{B_{t}}\)</span>를 사용</p>
-<ul>
-<li><p><span class="math notranslate nohighlight">\({B_{t}}\)</span> : time step 에 따른 noising할 정도</p></li>
-<li><p><span class="math notranslate nohighlight">\(\tilde{B_{t}}\)</span> : Reverse Step에서 Denoising을 위한 Parameter로 아래와같이 정의</p>
-<figure class="align-default" id="id3">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img24.png"><img alt="I-DDPM_02" class="bg-primary mb-1" src="../../_images/img24.png" style="width: 500px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 187 </span><span class="caption-text">Equation 3</span><a class="headerlink" href="#id3" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</li>
-</ul>
-</li>
-<li><p>하지만 DDPM에서는 <span class="math notranslate nohighlight">\(\tilde{B_{t}}\)</span> 대신 <span class="math notranslate nohighlight">\({B_{t}}\)</span>를 사용해도 비슷한 수치를 보여서 <span class="math notranslate nohighlight">\({B_{t}}\)</span> (constant)로 고정</p></li>
-</ul>
-</section>
-<section id="improving-the-log-likelihood">
-<h2>3. Improving the Log-likelihood<a class="headerlink" href="#improving-the-log-likelihood" title="Permalink to this heading">#</a></h2>
-<ul>
-<li><p>위의 문장 (<span class="math notranslate nohighlight">\(\tilde{B_{t}}\)</span> 대신 <span class="math notranslate nohighlight">\({B_{t}}\)</span>를 사용)에서 의문점</p>
-<ul>
-<li><p>사실 <span class="math notranslate nohighlight">\({B_{t}}\)</span>와 <span class="math notranslate nohighlight">\(\tilde{B_{t}}\)</span>는 정 반대의 역할을 하는 Parameter인데 왜 비슷한 결과를 보였고, 결국 같은 값으로 Fix를 하는게 맞을까?</p>
-<figure class="align-default" id="id4">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img34.png"><img alt="I-DDPM_03" class="bg-primary mb-1" src="../../_images/img34.png" style="width: 600px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 188 </span><span class="caption-text">Figure 1</span><a class="headerlink" href="#id4" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</li>
-<li><p>Diffusion Step간 <span class="math notranslate nohighlight">\({B_{t}}\)</span>와 <span class="math notranslate nohighlight">\(\tilde{B_{t}}\)</span>의 차이를 비교해보면 Diffusion Step이 커질수록 두개의 값은 거의 동일해진다. (Figure.1)</p>
-<figure class="align-default" id="id5">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img43.png"><img alt="I-DDPM_04" class="bg-primary mb-1" src="../../_images/img43.png" style="width: 600px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 189 </span><span class="caption-text">Figure 2</span><a class="headerlink" href="#id5" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</li>
-<li><p>하지만 Figure.2를 보면 모델의 성능은 대부분 Step 초반에 결정되는데, Step 초반에는 두 값의 차이가 큰 것을 확인할 수 있다.</p>
-<ul class="simple">
-<li><p><em>Model의 성능이 결정되는 부분 = Loss 가 급격하게 떨어지는 부분</em></p></li>
-</ul>
-<p>⇒ 따라서, <span class="math notranslate nohighlight">\({B_{t}}\)</span>와 <span class="math notranslate nohighlight">\(\tilde{B_{t}}\)</span>를 동일한 값으로 두고 <span class="math notranslate nohighlight">\(\tilde{B_{t}}\)</span>를 Non Trainable Parameter로 두는것은 설계의 Miss</p>
-</li>
-<li><p>하지만, <span class="math notranslate nohighlight">\(\tilde{B_{t}}\)</span> 자체를 학습하기에는 값의 범위가 너무 작아서 <span class="math notranslate nohighlight">\({B_{t}}\)</span>와 <span class="math notranslate nohighlight">\(\tilde{B_{t}}\)</span>의 Interpolation 값을 Predict하도록 설계</p>
-<figure class="align-default" id="id6">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img53.png"><img alt="I-DDPM_05" class="bg-primary mb-1" src="../../_images/img53.png" style="width: 500px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 190 </span><span class="caption-text">Figure 3</span><a class="headerlink" href="#id6" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</li>
-<li><p>Hybrid Loss</p>
-<ul class="simple">
-<li><p><span class="math notranslate nohighlight">\(L_{hyprid} = L_{simple} + λL_{vlb}\)</span></p></li>
-</ul>
-</li>
-</ul>
-</li>
-<li><p>Noise Schedule</p>
-<ul>
-<li><p>DDPM의 경우 High Resolution 이미지에대해 잘 동작하지만, Low-Resolution (e.g. 32x32, 64x64)의 이미지에 대해서는 잘 동작하지 않는것을 확인</p></li>
-<li><p>Noise Scheduling에서 Linear mode의 Limitation이 있음을 지적</p>
-<figure class="align-default" id="id7">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img63.png"><img alt="I-DDPM_06" class="bg-primary mb-1" src="../../_images/img63.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 191 </span><span class="caption-text">Equation 4</span><a class="headerlink" href="#id7" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</li>
-<li><p>Step이 거듭날수록 Linear schedule(상단)의 이미지가 너무 빠르게 Noisy해짐</p></li>
-<li><p>추가로 Reverse Process의 20%를 Skip해도 성능에 큰 영향이 없음을 확인</p></li>
-</ul>
-<p>⇒ 결국 Linear mode를 사용하면 특정 Step 이후의 Noise는 학습에 의미있는 영향을 미치지 못한다.</p>
-<ul>
-<li><p>I-DDPM에서는 이러한 scheduling Equation을 새로 정의</p>
-<figure class="align-default" id="id8">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img73.png"><img alt="I-DDPM_07" class="bg-primary mb-1" src="../../_images/img73.png" style="width: 400px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 192 </span><span class="caption-text">Equation 5</span><a class="headerlink" href="#id8" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</li>
-<li><p>새로 정의한 식은 중간 단계에서는 Noise가 강하게 입혀지지만 0과 T 부근에서는 비교적 덜 Noisy해짐</p>
-<figure class="align-default" id="id9">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img84.png"><img alt="I-DDPM_08" class="bg-primary mb-1" src="../../_images/img84.png" style="width: 500px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 193 </span><span class="caption-text">Figure 3</span><a class="headerlink" href="#id9" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</li>
-</ul>
-</li>
-<li><p>Gradient Noise</p>
-<ul>
-<li><p>Model을 <span class="math notranslate nohighlight">\(L_{vlb}\)</span>를 Direct로 최적화하도록 설계하면 Best</p></li>
-<li><p>하지만 아래 이미지와같이 Loss 자체가 unstable해서 직접 최적화에는 어려움이 있음</p>
-<figure class="align-default" id="id10">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img93.png"><img alt="I-DDPM_09" class="bg-primary mb-1" src="../../_images/img93.png" style="width: 500px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 194 </span><span class="caption-text">Figure 4</span><a class="headerlink" href="#id10" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</li>
-<li><p>따라서 <span class="math notranslate nohighlight">\(L_{vlb}\)</span>의 Variance를 줄이기위해(=stable) Importance Sampling 기법을 도입</p></li>
-<li><p>위 Fig.2에서 보면 학습 말기는 Loss의 변화에 큰 영향이 없으므로 확률적으로 학습 초반의 데이터를 좀더 sampling해서 학습하도록 설계</p></li>
-<li><p>실제로 적용해본 결과 <span class="math notranslate nohighlight">\(L_{hybrid}\)</span>보다 더 낮은 Loss 를 보임</p></li>
-<li><p><span class="math notranslate nohighlight">\(L_{hybrid}\)</span>에 Importance Sampling을 적용하면?</p>
-<ul class="simple">
-<li><p>적용 전보다 좋지 않은 결과를 보인다..</p></li>
-</ul>
-</li>
-</ul>
-</li>
-</ul>
-<p><strong>Result</strong></p>
-<figure class="align-default" id="id11">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img103.png"><img alt="I-DDPM_10" class="bg-primary mb-1" src="../../_images/img103.png" style="width: 500px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 195 </span><span class="caption-text">Table 1</span><a class="headerlink" href="#id11" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<figure class="align-default" id="id12">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img113.png"><img alt="I-DDPM_11" class="bg-primary mb-1" src="../../_images/img113.png" style="width: 500px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 196 </span><span class="caption-text">Table 2</span><a class="headerlink" href="#id12" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p>DDPM에서 다소 취약했던 ImageNet 64x64와 CIDAR-10 데이터를 기준</p>
-<ul>
-<li><p><span class="math notranslate nohighlight">\(L_{vlb}\)</span>의 경우 Importance sampling을 적용한 결과</p></li>
-</ul>
-</li>
-</ul>
-<figure class="align-default" id="id13">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img123.png"><img alt="I-DDPM_12" class="bg-primary mb-1" src="../../_images/img123.png" style="width: 500px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 197 </span><span class="caption-text">Table 3</span><a class="headerlink" href="#id13" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p>Convolution 모델이나 Diffusion 모델중에서는 뛰어나지만, Fully Transformer 모델에 비해서는 다소 부족한 면이 있음</p></li>
-</ul>
-</section>
-<section id="improcing-sampling-speed">
-<h2>4. Improcing Sampling Speed<a class="headerlink" href="#improcing-sampling-speed" title="Permalink to this heading">#</a></h2>
-<ul class="simple">
-<li><p>Sampling Speed를 높이기 위한 방법을 제안</p>
-<ul>
-<li><p>Training 시에는 전체 Step(1, … , T)을 학습</p></li>
-<li><p>Sampling 시에는 몇몇 Step만 Sampling</p></li>
-</ul>
-</li>
-<li><p>결과는?</p></li>
-</ul>
-<figure class="align-default" id="id14">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img131.png"><img alt="I-DDPM_13" class="bg-primary mb-1" src="../../_images/img131.png" style="width: 500px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 198 </span><span class="caption-text">Figure 5</span><a class="headerlink" href="#id14" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<figure class="align-default" id="id15">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img141.png"><img alt="I-DDPM_14" class="bg-primary mb-1" src="../../_images/img141.png" style="width: 500px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 199 </span><span class="caption-text">Figure 6</span><a class="headerlink" href="#id15" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>⇒ 100 Step만 가도 Full Model과 비슷한 FiD값을 보임</p>
-</section>
-<section id="comparison-to-gans">
-<h2>5. Comparison to GANs<a class="headerlink" href="#comparison-to-gans" title="Permalink to this heading">#</a></h2>
-<ul>
-<li><p>Class Conditional Generation + P&amp;R Metric으로 GAN 모델(BigGAN)과 성능을 비교</p>
-<figure class="align-default" id="id16">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img152.png"><img alt="I-DDPM_15" class="bg-primary mb-1" src="../../_images/img152.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 200 </span><span class="caption-text">Figure 7</span><a class="headerlink" href="#id16" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p>Big-GAN Deep 모델보다 생성 타겟에 대한 FiD 수치나 Recall metric에서 더 뛰어난 성능을 보임</p></li>
-</ul>
-</li>
-</ul>
-</section>
-<section id="scaling-model-size">
-<h2>6. Scaling Model Size<a class="headerlink" href="#scaling-model-size" title="Permalink to this heading">#</a></h2>
-<ul class="simple">
-<li><p>다양한 Capacity를 가진 모델의 FiD와 NLL 값을 비교</p></li>
-</ul>
-<figure class="align-default" id="id17">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img161.png"><img alt="I-DDPM_16" class="bg-primary mb-1" src="../../_images/img161.png" style="width: 500px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 201 </span><span class="caption-text">Figure 8</span><a class="headerlink" href="#id17" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<figure class="align-default" id="id18">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img171.png"><img alt="I-DDPM_17" class="bg-primary mb-1" src="../../_images/img171.png" style="width: 500px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 202 </span><span class="caption-text">Figure 9</span><a class="headerlink" href="#id18" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>⇒ 모델의 크기와 학습량 모두 Step에 어느정도 비례함</p>
-</section>
-</section>
-
-    <script type="text/x-thebe-config">
-    {
-        requestKernel: true,
-        binderOptions: {
-            repo: "binder-examples/jupyter-stacks-datascience",
-            ref: "master",
-        },
-        codeMirrorConfig: {
-            theme: "abcdef",
-            mode: "python"
-        },
-        kernelOptions: {
-            name: "python3",
-            path: "./docs/review"
-        },
-        predefinedOutput: true
-    }
-    </script>
-    <script>kernelName = 'python3'</script>
-
-                </article>
-              
-
-              
-              
-              
-              
-                <footer class="prev-next-footer d-print-none">
-                  
-<div class="prev-next-area">
-    <a class="left-prev"
-       href="LoRA.html"
-       title="previous page">
-      <i class="fa-solid fa-angle-left"></i>
-      <div class="prev-next-info">
-        <p class="prev-next-subtitle">previous</p>
-        <p class="prev-next-title">LoRA</p>
-      </div>
-    </a>
-    <a class="right-next"
-       href="StyO.html"
-       title="next page">
-      <div class="prev-next-info">
-        <p class="prev-next-subtitle">next</p>
-        <p class="prev-next-title">StyO</p>
-      </div>
-      <i class="fa-solid fa-angle-right"></i>
-    </a>
-</div>
-                </footer>
-              
-            </div>
-            
-            
-              
-                <div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
-
-
-  <div class="sidebar-secondary-item">
-  <div class="page-toc tocsection onthispage">
-    <i class="fa-solid fa-list"></i> Contents
-  </div>
-  <nav class="bd-toc-nav page-toc">
-    <ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#abstract">Abstract</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">1. Introduction</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#denoising-diffusion-probabilistic-models">2. Denoising Diffusion Probabilistic Models</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#improving-the-log-likelihood">3. Improving the Log-likelihood</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#improcing-sampling-speed">4. Improcing Sampling Speed</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#comparison-to-gans">5. Comparison to GANs</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#scaling-model-size">6. Scaling Model Size</a></li>
-</ul>
-  </nav></div>
-
-</div></div>
-              
-            
-          </div>
-          <footer class="bd-footer-content">
-            
-<div class="bd-footer-content__inner container">
-  
-  <div class="footer-item">
-    
-<p class="component-author">
-By PseudoLab
-</p>
-
-  </div>
-  
-  <div class="footer-item">
-    
-
-  <p class="copyright">
-    
-      © Copyright 2022.
-      <br/>
-    
-  </p>
-
-  </div>
-  
-  <div class="footer-item">
-    
-  </div>
-  
-  <div class="footer-item">
-    
-  </div>
-  
-</div>
-          </footer>
-        
-
-      </main>
-    </div>
-  </div>
-  
-  <!-- Scripts loaded after <body> so the DOM is not blocked -->
-  <script src="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b"></script>
-<script src="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b"></script>
-
-  <footer class="bd-footer">
-  </footer>
-  </body>
+
+
+<!DOCTYPE html>
+
+
+<html lang="en" data-content_root="" >
+
+  <head>
+    <meta charset="utf-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.17.1: http://docutils.sourceforge.net/" />
+
+    <title>I-DDPM &#8212; Text-to-Image Generation-feat-Diffusion</title>
+  
+  
+  
+  <script data-cfasync="false">
+    document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
+    document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
+  </script>
+  
+  <!-- Loaded before other Sphinx assets -->
+  <link href="../../_static/styles/theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+<link href="../../_static/styles/bootstrap.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+<link href="../../_static/styles/pydata-sphinx-theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+
+  
+  <link href="../../_static/vendor/fontawesome/6.5.2/css/all.min.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+  <link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.woff2" />
+<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.woff2" />
+<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.woff2" />
+
+    <link rel="stylesheet" type="text/css" href="../../_static/pygments.css" />
+    <link rel="stylesheet" href="../../_static/styles/sphinx-book-theme.css?digest=14f4ca6b54d191a8c7657f6c759bf11a5fb86285" type="text/css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/togglebutton.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/mystnb.4510f1fc1dee50b3e5859aac5469c37c29e427902b24a333a5f9fcb2f0b3ac41.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/sphinx-thebe.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/design-style.4045f2051d55cab465a707391d5b2007.min.css" />
+  
+  <!-- Pre-loaded scripts that we'll load fully later -->
+  <link rel="preload" as="script" href="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b" />
+<link rel="preload" as="script" href="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b" />
+  <script src="../../_static/vendor/fontawesome/6.5.2/js/all.min.js?digest=dfe6caa3a7d634c4db9b"></script>
+
+    <script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
+    <script src="../../_static/jquery.js"></script>
+    <script src="../../_static/underscore.js"></script>
+    <script src="../../_static/_sphinx_javascript_frameworks_compat.js"></script>
+    <script src="../../_static/doctools.js"></script>
+    <script src="../../_static/clipboard.min.js"></script>
+    <script src="../../_static/copybutton.js"></script>
+    <script src="../../_static/scripts/sphinx-book-theme.js?digest=5a5c038af52cf7bc1a1ec88eea08e6366ee68824"></script>
+    <script>let toggleHintShow = 'Click to show';</script>
+    <script>let toggleHintHide = 'Click to hide';</script>
+    <script>let toggleOpenOnPrint = 'true';</script>
+    <script src="../../_static/togglebutton.js"></script>
+    <script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
+    <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
+    <script src="../../_static/design-tabs.js"></script>
+    <script>const THEBE_JS_URL = "https://unpkg.com/thebe@0.8.2/lib/index.js"
+const thebe_selector = ".thebe,.cell"
+const thebe_selector_input = "pre"
+const thebe_selector_output = ".output, .cell_output"
+</script>
+    <script async="async" src="../../_static/sphinx-thebe.js"></script>
+    <script>window.MathJax = {"options": {"processHtmlClass": "tex2jax_process|mathjax_process|math|output_area"}}</script>
+    <script defer="defer" src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
+    <script>DOCUMENTATION_OPTIONS.pagename = 'docs/review/I-DDPM';</script>
+    <link rel="shortcut icon" href="../../_static/PseudoLab_logo.png"/>
+    <link rel="index" title="Index" href="../../genindex.html" />
+    <link rel="search" title="Search" href="../../search.html" />
+    <link rel="next" title="StyO" href="StyO.html" />
+    <link rel="prev" title="LoRA" href="LoRA.html" />
+  <meta name="viewport" content="width=device-width, initial-scale=1"/>
+  <meta name="docsearch:language" content="en"/>
+  </head>
+  
+  
+  <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
+
+  
+  
+  <div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
+  
+  <div id="pst-scroll-pixel-helper"></div>
+  
+  <button type="button" class="btn rounded-pill" id="pst-back-to-top">
+    <i class="fa-solid fa-arrow-up"></i>Back to top</button>
+
+  
+  <input type="checkbox"
+          class="sidebar-toggle"
+          id="pst-primary-sidebar-checkbox"/>
+  <label class="overlay overlay-primary" for="pst-primary-sidebar-checkbox"></label>
+  
+  <input type="checkbox"
+          class="sidebar-toggle"
+          id="pst-secondary-sidebar-checkbox"/>
+  <label class="overlay overlay-secondary" for="pst-secondary-sidebar-checkbox"></label>
+  
+  <div class="search-button__wrapper">
+    <div class="search-button__overlay"></div>
+    <div class="search-button__search-container">
+<form class="bd-search d-flex align-items-center"
+      action="../../search.html"
+      method="get">
+  <i class="fa-solid fa-magnifying-glass"></i>
+  <input type="search"
+         class="form-control"
+         name="q"
+         id="search-input"
+         placeholder="Search this book..."
+         aria-label="Search this book..."
+         autocomplete="off"
+         autocorrect="off"
+         autocapitalize="off"
+         spellcheck="false"/>
+  <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
+</form></div>
+  </div>
+
+  <div class="pst-async-banner-revealer d-none">
+  <aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
+</div>
+
+  
+    <header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
+    </header>
+  
+
+  <div class="bd-container">
+    <div class="bd-container__inner bd-page-width">
+      
+      
+      
+      <div class="bd-sidebar-primary bd-sidebar">
+        
+
+  
+  <div class="sidebar-header-items sidebar-primary__section">
+    
+    
+    
+    
+  </div>
+  
+    <div class="sidebar-primary-items__start sidebar-primary__section">
+        <div class="sidebar-primary-item">
+
+  
+    
+  
+
+<a class="navbar-brand logo" href="../../intro.html">
+  
+  
+  
+  
+  
+    
+    
+      
+    
+    
+    <img src="../../_static/PseudoLab_logo.png" class="logo__image only-light" alt="Text-to-Image Generation-feat-Diffusion - Home"/>
+    <script>document.write(`<img src="../../_static/PseudoLab_logo.png" class="logo__image only-dark" alt="Text-to-Image Generation-feat-Diffusion - Home"/>`);</script>
+  
+  
+</a></div>
+        <div class="sidebar-primary-item"><nav class="bd-links" id="bd-docs-nav" aria-label="Main">
+    <div class="bd-toc-item navbar-nav active">
+        
+        <ul class="nav bd-sidenav bd-sidenav__home-link">
+            <li class="toctree-l1">
+                <a class="reference internal" href="../../intro.html">
+                    Welcome to PseudoDiffusers!!
+                </a>
+            </li>
+        </ul>
+        <p aria-level="2" class="caption" role="heading"><span class="caption-text">Preliminary Works</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="vae.html">VAE</a></li>
+<li class="toctree-l1"><a class="reference internal" href="gan.html">GAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DDPM.html">DDPM</a></li>
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DDIM.html">DDIM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="A_Study_on_the_Evaluation_of_Generative_Models.html">A Study on the Evaluation of Generative Models</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Image Generation</span></p>
+<ul class="current nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="cycleGAN.html">CycleGAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="StyleGAN.html">StyleGAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="diffusion_beats_GANs.html">Diffusion Models Beat GANs on Image Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="dalle.html">DALL-E</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DALLE2.html">DALL-E 2</a></li>
+<li class="toctree-l1"><a class="reference internal" href="dreambooth.html">DreamBooth</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ControlNet.html">ControlNet</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Latent_Diffusion_Model.html">Introduction</a></li>
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Textual_Inversion.html">Textual Inversion</a></li>
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="CustomDiffusion.html">Custom Diffusion</a></li>
+<li class="toctree-l1"><a class="reference internal" href="LoRA.html">LoRA</a></li>
+
+
+
+
+
+
+
+
+
+<li class="toctree-l1 current active"><a class="current reference internal" href="#">I-DDPM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="StyO.html">StyO</a></li>
+<li class="toctree-l1"><a class="reference internal" href="imagen.html">Imagen</a></li>
+<li class="toctree-l1"><a class="reference internal" href="imagen_editor.html">Imagen Editor</a></li>
+<li class="toctree-l1"><a class="reference internal" href="SDEdit.html">SDEdit</a></li>
+<li class="toctree-l1"><a class="reference internal" href="SDXL.html">SDXL</a></li>
+<li class="toctree-l1"><a class="reference internal" href="t2i_adapter.html">T2I-Adapter</a></li>
+<li class="toctree-l1"><a class="reference internal" href="IP_Adapter.html">IP-Adapter</a></li>
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="HyperDreamBooth.html">HyperDreamBooth</a></li>
+<li class="toctree-l1"><a class="reference internal" href="CM3leon.html">CM3leon</a></li>
+
+<li class="toctree-l1"><a class="reference internal" href="Synthetic_Data_from_Diffusion_Models_Improves_ImageNet_Classification.html">Synthetic Data from Diffusion Models Improves ImageNet Classification</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="GLIDE.html">GLIDE</a></li>
+<li class="toctree-l1"><a class="reference internal" href="BBDM.html">BBDM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Your_Diffusion_Model_is_Secretly_a_Zero_Shot_Classifier.html">Your Diffusion Model is Secretly a Zero-Shot Classifier</a></li>
+<li class="toctree-l1"><a class="reference internal" href="progressive_distillation.html">Progressive Distillation for Fast Sampling of Diffusion Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ConceptLab.html">ConceptLab</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Diffusion_models_already_have_a_Semantic_Latent_Space.html">Diffusion Models already have a Semantic Latent Space</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Muse.html">Muse</a></li>
+
+
+<li class="toctree-l1"><a class="reference internal" href="GIGAGAN.html">Scaling up GANs for Text-to-Image Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="consistency_models.html">Consistency Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="latent_consistency_models.html">Latent Consistency Models</a></li>
+
+<li class="toctree-l1"><a class="reference internal" href="LLM_grounded_Diffusion.html">LLM Grounded Diffusion</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DiT.html">DiT</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="one-step-image-translation.html">One-Step Image Translation with Text-to-Image Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="LCM-LoRA.html">LCM-LoRA: A Universal Stable-Diffusion Acceleration Module</a></li>
+
+
+<li class="toctree-l1"><a class="reference internal" href="MimicBrush.html">MimicBrush: Zero-shot Image Editing with Reference Imitation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="one_step_diffusion_with_distribution_matching_distillation.html">One-step Diffusion with Distribution Matching Distillation</a></li>
+
+
+
+
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Video Generation</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="Make_A_Video.html">Make A Video</a></li>
+<li class="toctree-l1"><a class="reference internal" href="VideoLDM.html">VideoLDM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="AnimateDiff.html">AnimateDiff</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Animate_Anyone.html">Animate Anyone</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreaMoving.html">DreaMoving</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreamPose.html">DreamPose: Fashion Image-to-Video Synthesis via Stable Diffusion</a></li>
+
+
+
+
+
+
+
+
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">3D Generation</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="NeRF.html">NeRF : Representing Scenes as Neural Radiance Fields for View Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="3DGS.html">3D Gaussian Splatting for Real-Time Radiance Field Rendering</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Point_E.html">Point-E: A System for Generating 3D Point Clouds from Complex Prompts (Arxiv 2022)</a></li>
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Shap-E.html">Shap-E</a></li>
+
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DreamFusion.html"><strong>DreamFusion</strong></a></li>
+<li class="toctree-l1"><a class="reference internal" href="magic-3d.html">Magic3D</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreamBooth3D.html">Dream Booth 3D</a></li>
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="zero123plus.html">Zero123++</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ProlificDreamer.html">ProlificDreamer: High-Fidelity and Diverse Text-to-3D Generation with Variational Score Distillation</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DreamGaussian.html">DreamGaussian</a></li>
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Coin3D.html">Coin3D</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Experiments</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="../experiments/js_exp.html">Synthetic Data with Stable Diffusion for Foliar Disease Classification</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../experiments/swjo_exp.html">Training DreamBooth on Naver Webtoon Face Dataset</a></li>
+</ul>
+
+    </div>
+</nav></div>
+    </div>
+  
+  
+  <div class="sidebar-primary-items__end sidebar-primary__section">
+  </div>
+  
+  <div id="rtd-footer-container"></div>
+
+
+      </div>
+      
+      <main id="main-content" class="bd-main" role="main">
+        
+        
+
+<div class="sbt-scroll-pixel-helper"></div>
+
+          <div class="bd-content">
+            <div class="bd-article-container">
+              
+              <div class="bd-header-article d-print-none">
+<div class="header-article-items header-article__inner">
+  
+    <div class="header-article-items__start">
+      
+        <div class="header-article-item"><label class="sidebar-toggle primary-toggle btn btn-sm" for="__primary" title="Toggle primary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
+  <span class="fa-solid fa-bars"></span>
+</label></div>
+      
+    </div>
+  
+  
+    <div class="header-article-items__end">
+      
+        <div class="header-article-item">
+
+<div class="article-header-buttons">
+
+
+
+
+
+<div class="dropdown dropdown-source-buttons">
+  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Source repositories">
+    <i class="fab fa-github"></i>
+  </button>
+  <ul class="dropdown-menu">
+      
+      
+      
+      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion" target="_blank"
+   class="btn btn-sm btn-source-repository-button dropdown-item"
+   title="Source repository"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fab fa-github"></i>
+  </span>
+<span class="btn__text-container">Repository</span>
+</a>
+</li>
+      
+      
+      
+      
+      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion/issues/new?title=Issue%20on%20page%20%2Fdocs/review/I-DDPM.html&body=Your%20issue%20content%20here." target="_blank"
+   class="btn btn-sm btn-source-issues-button dropdown-item"
+   title="Open an issue"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-lightbulb"></i>
+  </span>
+<span class="btn__text-container">Open issue</span>
+</a>
+</li>
+      
+  </ul>
+</div>
+
+
+
+
+
+
+<div class="dropdown dropdown-download-buttons">
+  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Download this page">
+    <i class="fas fa-download"></i>
+  </button>
+  <ul class="dropdown-menu">
+      
+      
+      
+      <li><a href="../../_sources/docs/review/I-DDPM.md" target="_blank"
+   class="btn btn-sm btn-download-source-button dropdown-item"
+   title="Download source file"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-file"></i>
+  </span>
+<span class="btn__text-container">.md</span>
+</a>
+</li>
+      
+      
+      
+      
+      <li>
+<button onclick="window.print()"
+  class="btn btn-sm btn-download-pdf-button dropdown-item"
+  title="Print to PDF"
+  data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-file-pdf"></i>
+  </span>
+<span class="btn__text-container">.pdf</span>
+</button>
+</li>
+      
+  </ul>
+</div>
+
+
+
+
+<button onclick="toggleFullScreen()"
+  class="btn btn-sm btn-fullscreen-button"
+  title="Fullscreen mode"
+  data-bs-placement="bottom" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-expand"></i>
+  </span>
+
+</button>
+
+
+
+<script>
+document.write(`
+  <button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light"></i>
+    <i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark"></i>
+    <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"></i>
+  </button>
+`);
+</script>
+
+
+<script>
+document.write(`
+  <button class="btn btn-sm pst-navbar-icon search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <i class="fa-solid fa-magnifying-glass fa-lg"></i>
+  </button>
+`);
+</script>
+<label class="sidebar-toggle secondary-toggle btn btn-sm" for="__secondary"title="Toggle secondary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <span class="fa-solid fa-list"></span>
+</label>
+</div></div>
+      
+    </div>
+  
+</div>
+</div>
+              
+              
+
+<div id="jb-print-docs-body" class="onlyprint">
+    <h1>I-DDPM</h1>
+    <!-- Table of contents -->
+    <div id="print-main-content">
+        <div id="jb-print-toc">
+            
+            <div>
+                <h2> Contents </h2>
+            </div>
+            <nav aria-label="Page">
+                <ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#abstract">Abstract</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">1. Introduction</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#denoising-diffusion-probabilistic-models">2. Denoising Diffusion Probabilistic Models</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#improving-the-log-likelihood">3. Improving the Log-likelihood</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#improcing-sampling-speed">4. Improcing Sampling Speed</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#comparison-to-gans">5. Comparison to GANs</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#scaling-model-size">6. Scaling Model Size</a></li>
+</ul>
+            </nav>
+        </div>
+    </div>
+</div>
+
+              
+                
+<div id="searchbox"></div>
+                <article class="bd-article">
+                  
+  <div class="admonition-information admonition">
+<p class="admonition-title">Information</p>
+<ul class="simple">
+<li><p><strong>Title:</strong> Improved Denoising Diffusion Probabilistic Models (CVPR 2021)</p></li>
+<li><p><strong>Reference</strong></p>
+<ul>
+<li><p>Paper:  <a class="reference external" href="https://arxiv.org/abs/2102.09672">https://arxiv.org/abs/2102.09672</a></p></li>
+</ul>
+</li>
+<li><p><strong>Author:</strong> Seunghwan Ji</p></li>
+<li><p><strong>Last updated on Aug. 6, 2023</strong></p></li>
+</ul>
+</div>
+<section class="tex2jax_ignore mathjax_ignore" id="i-ddpm">
+<h1>I-DDPM<a class="headerlink" href="#i-ddpm" title="Permalink to this heading">#</a></h1>
+<section id="abstract">
+<h2>Abstract<a class="headerlink" href="#abstract" title="Permalink to this heading">#</a></h2>
+<ul class="simple">
+<li><p>DDPM을 약간 수정함으로써 High Quality를 유지하고, Log Likelihood수치도 개선할 수 있는 향상된 모델을 제안</p></li>
+<li><p>Sampling시 Base 보다 더 적은 Step으로 비슷한 퀄리티의 결과를 낼 수 있는 방법을 제안</p></li>
+<li><p>Model의 Scale과 Diffusion Step에 따른 Sample Quailty와 Likelihood 수치간의 관계를 연구</p></li>
+</ul>
+</section>
+<section id="introduction">
+<h2>1. Introduction<a class="headerlink" href="#introduction" title="Permalink to this heading">#</a></h2>
+<ul>
+<li><p>최근 DDPM(Ho et al.) 모델은 Generate 분야에서 High Quality의 이미지를 생성해내는 수준까지 왔다.</p></li>
+<li><p>하지만, Image의 Quality에 반해 log-likelihood 수치는 다른 generative 모델에비해 현저히 떨어졌다. (e.g. VAE)</p></li>
+<li><p>또 DDPM이 Diversity가 낮은 Dataset(CIFAR-10, LSUN)에서는 잘 동작했지만, High Diversity Dataset에서의 동작은 증명되지 못했다.</p></li>
+<li><p>I-DDPM에서는</p>
+<ol class="arabic simple">
+<li><p>Log-Likelihood 수치 개선</p></li>
+<li><p>ImageNet같은 Diversity가 높은 Dataset에서도 잘 동작</p></li>
+<li><p>Reverse Process에서의 Loss Term 개선</p></li>
+</ol>
+<p>한 모델을 제안하였다.</p>
+</li>
+<li><p>추가로 연구 과정 중, I-DDPM이 Base (DDPM) 모델에 비해 훨씬 더 적은 Step으로 비슷한 Quality를 내는 것을 확인</p></li>
+</ul>
+<p><strong>Log-Likelihood 값이 중요한 이유</strong></p>
+<ul class="simple">
+<li><p>기존 연구들에서 Loglikelihood 수치와 Sample의 Quality간의 연관성을 보이는 연구들이 많았다.</p>
+<ul>
+<li><p><em>Data의 Distribution에 대해 Model이 학습한 정도를 수치화한 느낌</em></p></li>
+</ul>
+</li>
+<li><p>수치가 좋아지면 Sample Quality도 따라 증가하는 경향을 보였다.</p></li>
+<li><p>따라서 DDPM에서도 LogLikelihood 수치를 개선한다면 Sample Quality도 따라서 더 증가할 가능성이 있지 않을까?</p></li>
+<li><p><a class="reference external" href="https://angeloyeo.github.io/2020/07/17/MLE.html">https://angeloyeo.github.io/2020/07/17/MLE.html</a></p></li>
+</ul>
+</section>
+<section id="denoising-diffusion-probabilistic-models">
+<h2>2. Denoising Diffusion Probabilistic Models<a class="headerlink" href="#denoising-diffusion-probabilistic-models" title="Permalink to this heading">#</a></h2>
+<p><strong>DDPM</strong></p>
+<ul>
+<li><p>Process</p>
+<ul>
+<li><p>Forward Process</p>
+<figure class="align-default" id="id1">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img03.png"><img alt="I-DDPM_00" class="bg-primary mb-1" src="../../_images/img03.png" style="width: 500px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 185 </span><span class="caption-text">Equation 1</span><a class="headerlink" href="#id1" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</li>
+<li><p>Reverse Process</p>
+<figure class="align-default" id="id2">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img19.png"><img alt="I-DDPM_01" class="bg-primary mb-1" src="../../_images/img19.png" style="width: 500px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 186 </span><span class="caption-text">Equation 2</span><a class="headerlink" href="#id2" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</li>
+</ul>
+</li>
+<li><p>Forward Process에서 입힌 Noise를 Neural Model의 Reverse Process로 예측하도록 학습하는 형태</p></li>
+<li><p>이 때 Noising &amp; Denoising에 관한 (Hyper) Parameter로 <span class="math notranslate nohighlight">\({B_{t}}\)</span>와 <span class="math notranslate nohighlight">\(\tilde{B_{t}}\)</span>를 사용</p>
+<ul>
+<li><p><span class="math notranslate nohighlight">\({B_{t}}\)</span> : time step 에 따른 noising할 정도</p></li>
+<li><p><span class="math notranslate nohighlight">\(\tilde{B_{t}}\)</span> : Reverse Step에서 Denoising을 위한 Parameter로 아래와같이 정의</p>
+<figure class="align-default" id="id3">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img24.png"><img alt="I-DDPM_02" class="bg-primary mb-1" src="../../_images/img24.png" style="width: 500px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 187 </span><span class="caption-text">Equation 3</span><a class="headerlink" href="#id3" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</li>
+</ul>
+</li>
+<li><p>하지만 DDPM에서는 <span class="math notranslate nohighlight">\(\tilde{B_{t}}\)</span> 대신 <span class="math notranslate nohighlight">\({B_{t}}\)</span>를 사용해도 비슷한 수치를 보여서 <span class="math notranslate nohighlight">\({B_{t}}\)</span> (constant)로 고정</p></li>
+</ul>
+</section>
+<section id="improving-the-log-likelihood">
+<h2>3. Improving the Log-likelihood<a class="headerlink" href="#improving-the-log-likelihood" title="Permalink to this heading">#</a></h2>
+<ul>
+<li><p>위의 문장 (<span class="math notranslate nohighlight">\(\tilde{B_{t}}\)</span> 대신 <span class="math notranslate nohighlight">\({B_{t}}\)</span>를 사용)에서 의문점</p>
+<ul>
+<li><p>사실 <span class="math notranslate nohighlight">\({B_{t}}\)</span>와 <span class="math notranslate nohighlight">\(\tilde{B_{t}}\)</span>는 정 반대의 역할을 하는 Parameter인데 왜 비슷한 결과를 보였고, 결국 같은 값으로 Fix를 하는게 맞을까?</p>
+<figure class="align-default" id="id4">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img34.png"><img alt="I-DDPM_03" class="bg-primary mb-1" src="../../_images/img34.png" style="width: 600px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 188 </span><span class="caption-text">Figure 1</span><a class="headerlink" href="#id4" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</li>
+<li><p>Diffusion Step간 <span class="math notranslate nohighlight">\({B_{t}}\)</span>와 <span class="math notranslate nohighlight">\(\tilde{B_{t}}\)</span>의 차이를 비교해보면 Diffusion Step이 커질수록 두개의 값은 거의 동일해진다. (Figure.1)</p>
+<figure class="align-default" id="id5">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img43.png"><img alt="I-DDPM_04" class="bg-primary mb-1" src="../../_images/img43.png" style="width: 600px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 189 </span><span class="caption-text">Figure 2</span><a class="headerlink" href="#id5" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</li>
+<li><p>하지만 Figure.2를 보면 모델의 성능은 대부분 Step 초반에 결정되는데, Step 초반에는 두 값의 차이가 큰 것을 확인할 수 있다.</p>
+<ul class="simple">
+<li><p><em>Model의 성능이 결정되는 부분 = Loss 가 급격하게 떨어지는 부분</em></p></li>
+</ul>
+<p>⇒ 따라서, <span class="math notranslate nohighlight">\({B_{t}}\)</span>와 <span class="math notranslate nohighlight">\(\tilde{B_{t}}\)</span>를 동일한 값으로 두고 <span class="math notranslate nohighlight">\(\tilde{B_{t}}\)</span>를 Non Trainable Parameter로 두는것은 설계의 Miss</p>
+</li>
+<li><p>하지만, <span class="math notranslate nohighlight">\(\tilde{B_{t}}\)</span> 자체를 학습하기에는 값의 범위가 너무 작아서 <span class="math notranslate nohighlight">\({B_{t}}\)</span>와 <span class="math notranslate nohighlight">\(\tilde{B_{t}}\)</span>의 Interpolation 값을 Predict하도록 설계</p>
+<figure class="align-default" id="id6">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img53.png"><img alt="I-DDPM_05" class="bg-primary mb-1" src="../../_images/img53.png" style="width: 500px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 190 </span><span class="caption-text">Figure 3</span><a class="headerlink" href="#id6" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</li>
+<li><p>Hybrid Loss</p>
+<ul class="simple">
+<li><p><span class="math notranslate nohighlight">\(L_{hyprid} = L_{simple} + λL_{vlb}\)</span></p></li>
+</ul>
+</li>
+</ul>
+</li>
+<li><p>Noise Schedule</p>
+<ul>
+<li><p>DDPM의 경우 High Resolution 이미지에대해 잘 동작하지만, Low-Resolution (e.g. 32x32, 64x64)의 이미지에 대해서는 잘 동작하지 않는것을 확인</p></li>
+<li><p>Noise Scheduling에서 Linear mode의 Limitation이 있음을 지적</p>
+<figure class="align-default" id="id7">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img63.png"><img alt="I-DDPM_06" class="bg-primary mb-1" src="../../_images/img63.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 191 </span><span class="caption-text">Equation 4</span><a class="headerlink" href="#id7" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</li>
+<li><p>Step이 거듭날수록 Linear schedule(상단)의 이미지가 너무 빠르게 Noisy해짐</p></li>
+<li><p>추가로 Reverse Process의 20%를 Skip해도 성능에 큰 영향이 없음을 확인</p></li>
+</ul>
+<p>⇒ 결국 Linear mode를 사용하면 특정 Step 이후의 Noise는 학습에 의미있는 영향을 미치지 못한다.</p>
+<ul>
+<li><p>I-DDPM에서는 이러한 scheduling Equation을 새로 정의</p>
+<figure class="align-default" id="id8">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img73.png"><img alt="I-DDPM_07" class="bg-primary mb-1" src="../../_images/img73.png" style="width: 400px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 192 </span><span class="caption-text">Equation 5</span><a class="headerlink" href="#id8" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</li>
+<li><p>새로 정의한 식은 중간 단계에서는 Noise가 강하게 입혀지지만 0과 T 부근에서는 비교적 덜 Noisy해짐</p>
+<figure class="align-default" id="id9">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img84.png"><img alt="I-DDPM_08" class="bg-primary mb-1" src="../../_images/img84.png" style="width: 500px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 193 </span><span class="caption-text">Figure 3</span><a class="headerlink" href="#id9" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</li>
+</ul>
+</li>
+<li><p>Gradient Noise</p>
+<ul>
+<li><p>Model을 <span class="math notranslate nohighlight">\(L_{vlb}\)</span>를 Direct로 최적화하도록 설계하면 Best</p></li>
+<li><p>하지만 아래 이미지와같이 Loss 자체가 unstable해서 직접 최적화에는 어려움이 있음</p>
+<figure class="align-default" id="id10">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img93.png"><img alt="I-DDPM_09" class="bg-primary mb-1" src="../../_images/img93.png" style="width: 500px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 194 </span><span class="caption-text">Figure 4</span><a class="headerlink" href="#id10" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</li>
+<li><p>따라서 <span class="math notranslate nohighlight">\(L_{vlb}\)</span>의 Variance를 줄이기위해(=stable) Importance Sampling 기법을 도입</p></li>
+<li><p>위 Fig.2에서 보면 학습 말기는 Loss의 변화에 큰 영향이 없으므로 확률적으로 학습 초반의 데이터를 좀더 sampling해서 학습하도록 설계</p></li>
+<li><p>실제로 적용해본 결과 <span class="math notranslate nohighlight">\(L_{hybrid}\)</span>보다 더 낮은 Loss 를 보임</p></li>
+<li><p><span class="math notranslate nohighlight">\(L_{hybrid}\)</span>에 Importance Sampling을 적용하면?</p>
+<ul class="simple">
+<li><p>적용 전보다 좋지 않은 결과를 보인다..</p></li>
+</ul>
+</li>
+</ul>
+</li>
+</ul>
+<p><strong>Result</strong></p>
+<figure class="align-default" id="id11">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img103.png"><img alt="I-DDPM_10" class="bg-primary mb-1" src="../../_images/img103.png" style="width: 500px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 195 </span><span class="caption-text">Table 1</span><a class="headerlink" href="#id11" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<figure class="align-default" id="id12">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img113.png"><img alt="I-DDPM_11" class="bg-primary mb-1" src="../../_images/img113.png" style="width: 500px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 196 </span><span class="caption-text">Table 2</span><a class="headerlink" href="#id12" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p>DDPM에서 다소 취약했던 ImageNet 64x64와 CIDAR-10 데이터를 기준</p>
+<ul>
+<li><p><span class="math notranslate nohighlight">\(L_{vlb}\)</span>의 경우 Importance sampling을 적용한 결과</p></li>
+</ul>
+</li>
+</ul>
+<figure class="align-default" id="id13">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img123.png"><img alt="I-DDPM_12" class="bg-primary mb-1" src="../../_images/img123.png" style="width: 500px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 197 </span><span class="caption-text">Table 3</span><a class="headerlink" href="#id13" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p>Convolution 모델이나 Diffusion 모델중에서는 뛰어나지만, Fully Transformer 모델에 비해서는 다소 부족한 면이 있음</p></li>
+</ul>
+</section>
+<section id="improcing-sampling-speed">
+<h2>4. Improcing Sampling Speed<a class="headerlink" href="#improcing-sampling-speed" title="Permalink to this heading">#</a></h2>
+<ul class="simple">
+<li><p>Sampling Speed를 높이기 위한 방법을 제안</p>
+<ul>
+<li><p>Training 시에는 전체 Step(1, … , T)을 학습</p></li>
+<li><p>Sampling 시에는 몇몇 Step만 Sampling</p></li>
+</ul>
+</li>
+<li><p>결과는?</p></li>
+</ul>
+<figure class="align-default" id="id14">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img131.png"><img alt="I-DDPM_13" class="bg-primary mb-1" src="../../_images/img131.png" style="width: 500px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 198 </span><span class="caption-text">Figure 5</span><a class="headerlink" href="#id14" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<figure class="align-default" id="id15">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img141.png"><img alt="I-DDPM_14" class="bg-primary mb-1" src="../../_images/img141.png" style="width: 500px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 199 </span><span class="caption-text">Figure 6</span><a class="headerlink" href="#id15" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>⇒ 100 Step만 가도 Full Model과 비슷한 FiD값을 보임</p>
+</section>
+<section id="comparison-to-gans">
+<h2>5. Comparison to GANs<a class="headerlink" href="#comparison-to-gans" title="Permalink to this heading">#</a></h2>
+<ul>
+<li><p>Class Conditional Generation + P&amp;R Metric으로 GAN 모델(BigGAN)과 성능을 비교</p>
+<figure class="align-default" id="id16">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img152.png"><img alt="I-DDPM_15" class="bg-primary mb-1" src="../../_images/img152.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 200 </span><span class="caption-text">Figure 7</span><a class="headerlink" href="#id16" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p>Big-GAN Deep 모델보다 생성 타겟에 대한 FiD 수치나 Recall metric에서 더 뛰어난 성능을 보임</p></li>
+</ul>
+</li>
+</ul>
+</section>
+<section id="scaling-model-size">
+<h2>6. Scaling Model Size<a class="headerlink" href="#scaling-model-size" title="Permalink to this heading">#</a></h2>
+<ul class="simple">
+<li><p>다양한 Capacity를 가진 모델의 FiD와 NLL 값을 비교</p></li>
+</ul>
+<figure class="align-default" id="id17">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img161.png"><img alt="I-DDPM_16" class="bg-primary mb-1" src="../../_images/img161.png" style="width: 500px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 201 </span><span class="caption-text">Figure 8</span><a class="headerlink" href="#id17" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<figure class="align-default" id="id18">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img171.png"><img alt="I-DDPM_17" class="bg-primary mb-1" src="../../_images/img171.png" style="width: 500px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 202 </span><span class="caption-text">Figure 9</span><a class="headerlink" href="#id18" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>⇒ 모델의 크기와 학습량 모두 Step에 어느정도 비례함</p>
+</section>
+</section>
+
+    <script type="text/x-thebe-config">
+    {
+        requestKernel: true,
+        binderOptions: {
+            repo: "binder-examples/jupyter-stacks-datascience",
+            ref: "master",
+        },
+        codeMirrorConfig: {
+            theme: "abcdef",
+            mode: "python"
+        },
+        kernelOptions: {
+            name: "python3",
+            path: "./docs\review"
+        },
+        predefinedOutput: true
+    }
+    </script>
+    <script>kernelName = 'python3'</script>
+
+                </article>
+              
+
+              
+              
+              
+              
+                <footer class="prev-next-footer d-print-none">
+                  
+<div class="prev-next-area">
+    <a class="left-prev"
+       href="LoRA.html"
+       title="previous page">
+      <i class="fa-solid fa-angle-left"></i>
+      <div class="prev-next-info">
+        <p class="prev-next-subtitle">previous</p>
+        <p class="prev-next-title">LoRA</p>
+      </div>
+    </a>
+    <a class="right-next"
+       href="StyO.html"
+       title="next page">
+      <div class="prev-next-info">
+        <p class="prev-next-subtitle">next</p>
+        <p class="prev-next-title">StyO</p>
+      </div>
+      <i class="fa-solid fa-angle-right"></i>
+    </a>
+</div>
+                </footer>
+              
+            </div>
+            
+            
+              
+                <div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
+
+
+  <div class="sidebar-secondary-item">
+  <div class="page-toc tocsection onthispage">
+    <i class="fa-solid fa-list"></i> Contents
+  </div>
+  <nav class="bd-toc-nav page-toc">
+    <ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#abstract">Abstract</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">1. Introduction</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#denoising-diffusion-probabilistic-models">2. Denoising Diffusion Probabilistic Models</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#improving-the-log-likelihood">3. Improving the Log-likelihood</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#improcing-sampling-speed">4. Improcing Sampling Speed</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#comparison-to-gans">5. Comparison to GANs</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#scaling-model-size">6. Scaling Model Size</a></li>
+</ul>
+  </nav></div>
+
+</div></div>
+              
+            
+          </div>
+          <footer class="bd-footer-content">
+            
+<div class="bd-footer-content__inner container">
+  
+  <div class="footer-item">
+    
+<p class="component-author">
+By PseudoLab
+</p>
+
+  </div>
+  
+  <div class="footer-item">
+    
+
+  <p class="copyright">
+    
+      © Copyright 2022.
+      <br/>
+    
+  </p>
+
+  </div>
+  
+  <div class="footer-item">
+    
+  </div>
+  
+  <div class="footer-item">
+    
+  </div>
+  
+</div>
+          </footer>
+        
+
+      </main>
+    </div>
+  </div>
+  
+  <!-- Scripts loaded after <body> so the DOM is not blocked -->
+  <script src="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b"></script>
+<script src="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b"></script>
+
+  <footer class="bd-footer">
+  </footer>
+  </body>
 </html>
\ No newline at end of file
diff --git a/docs/review/IP_Adapter.html b/docs/review/IP_Adapter.html
old mode 100644
new mode 100755
index 904473bc..ac5a8df7
--- a/docs/review/IP_Adapter.html
+++ b/docs/review/IP_Adapter.html
@@ -1,1089 +1,1109 @@
-
-
-<!DOCTYPE html>
-
-
-<html lang="en" data-content_root="" >
-
-  <head>
-    <meta charset="utf-8" />
-    <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.18.1: http://docutils.sourceforge.net/" />
-
-    <title>IP-Adapter &#8212; Text-to-Image Generation-feat-Diffusion</title>
-  
-  
-  
-  <script data-cfasync="false">
-    document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
-    document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
-  </script>
-  
-  <!-- Loaded before other Sphinx assets -->
-  <link href="../../_static/styles/theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-<link href="../../_static/styles/bootstrap.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-<link href="../../_static/styles/pydata-sphinx-theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-
-  
-  <link href="../../_static/vendor/fontawesome/6.5.2/css/all.min.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-  <link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.woff2" />
-<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.woff2" />
-<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.woff2" />
-
-    <link rel="stylesheet" type="text/css" href="../../_static/pygments.css" />
-    <link rel="stylesheet" href="../../_static/styles/sphinx-book-theme.css?digest=14f4ca6b54d191a8c7657f6c759bf11a5fb86285" type="text/css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/togglebutton.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/mystnb.4510f1fc1dee50b3e5859aac5469c37c29e427902b24a333a5f9fcb2f0b3ac41.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/sphinx-thebe.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/design-style.4045f2051d55cab465a707391d5b2007.min.css" />
-  
-  <!-- Pre-loaded scripts that we'll load fully later -->
-  <link rel="preload" as="script" href="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b" />
-<link rel="preload" as="script" href="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b" />
-  <script src="../../_static/vendor/fontawesome/6.5.2/js/all.min.js?digest=dfe6caa3a7d634c4db9b"></script>
-
-    <script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
-    <script src="../../_static/jquery.js"></script>
-    <script src="../../_static/underscore.js"></script>
-    <script src="../../_static/_sphinx_javascript_frameworks_compat.js"></script>
-    <script src="../../_static/doctools.js"></script>
-    <script src="../../_static/clipboard.min.js"></script>
-    <script src="../../_static/copybutton.js"></script>
-    <script src="../../_static/scripts/sphinx-book-theme.js?digest=5a5c038af52cf7bc1a1ec88eea08e6366ee68824"></script>
-    <script>let toggleHintShow = 'Click to show';</script>
-    <script>let toggleHintHide = 'Click to hide';</script>
-    <script>let toggleOpenOnPrint = 'true';</script>
-    <script src="../../_static/togglebutton.js"></script>
-    <script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
-    <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
-    <script src="../../_static/design-tabs.js"></script>
-    <script>const THEBE_JS_URL = "https://unpkg.com/thebe@0.8.2/lib/index.js"
-const thebe_selector = ".thebe,.cell"
-const thebe_selector_input = "pre"
-const thebe_selector_output = ".output, .cell_output"
-</script>
-    <script async="async" src="../../_static/sphinx-thebe.js"></script>
-    <script>window.MathJax = {"options": {"processHtmlClass": "tex2jax_process|mathjax_process|math|output_area"}}</script>
-    <script defer="defer" src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
-    <script>DOCUMENTATION_OPTIONS.pagename = 'docs/review/IP_Adapter';</script>
-    <link rel="shortcut icon" href="../../_static/PseudoLab_logo.png"/>
-    <link rel="index" title="Index" href="../../genindex.html" />
-    <link rel="search" title="Search" href="../../search.html" />
-    <link rel="next" title="HyperDreamBooth" href="HyperDreamBooth.html" />
-    <link rel="prev" title="T2I-Adapter" href="t2i_adapter.html" />
-  <meta name="viewport" content="width=device-width, initial-scale=1"/>
-  <meta name="docsearch:language" content="en"/>
-  </head>
-  
-  
-  <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
-
-  
-  
-  <div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
-  
-  <div id="pst-scroll-pixel-helper"></div>
-  
-  <button type="button" class="btn rounded-pill" id="pst-back-to-top">
-    <i class="fa-solid fa-arrow-up"></i>Back to top</button>
-
-  
-  <input type="checkbox"
-          class="sidebar-toggle"
-          id="pst-primary-sidebar-checkbox"/>
-  <label class="overlay overlay-primary" for="pst-primary-sidebar-checkbox"></label>
-  
-  <input type="checkbox"
-          class="sidebar-toggle"
-          id="pst-secondary-sidebar-checkbox"/>
-  <label class="overlay overlay-secondary" for="pst-secondary-sidebar-checkbox"></label>
-  
-  <div class="search-button__wrapper">
-    <div class="search-button__overlay"></div>
-    <div class="search-button__search-container">
-<form class="bd-search d-flex align-items-center"
-      action="../../search.html"
-      method="get">
-  <i class="fa-solid fa-magnifying-glass"></i>
-  <input type="search"
-         class="form-control"
-         name="q"
-         id="search-input"
-         placeholder="Search this book..."
-         aria-label="Search this book..."
-         autocomplete="off"
-         autocorrect="off"
-         autocapitalize="off"
-         spellcheck="false"/>
-  <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
-</form></div>
-  </div>
-
-  <div class="pst-async-banner-revealer d-none">
-  <aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
-</div>
-
-  
-    <header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
-    </header>
-  
-
-  <div class="bd-container">
-    <div class="bd-container__inner bd-page-width">
-      
-      
-      
-      <div class="bd-sidebar-primary bd-sidebar">
-        
-
-  
-  <div class="sidebar-header-items sidebar-primary__section">
-    
-    
-    
-    
-  </div>
-  
-    <div class="sidebar-primary-items__start sidebar-primary__section">
-        <div class="sidebar-primary-item">
-
-  
-    
-  
-
-<a class="navbar-brand logo" href="../../intro.html">
-  
-  
-  
-  
-  
-    
-    
-      
-    
-    
-    <img src="../../_static/PseudoLab_logo.png" class="logo__image only-light" alt="Text-to-Image Generation-feat-Diffusion - Home"/>
-    <script>document.write(`<img src="../../_static/PseudoLab_logo.png" class="logo__image only-dark" alt="Text-to-Image Generation-feat-Diffusion - Home"/>`);</script>
-  
-  
-</a></div>
-        <div class="sidebar-primary-item"><nav class="bd-links" id="bd-docs-nav" aria-label="Main">
-    <div class="bd-toc-item navbar-nav active">
-        
-        <ul class="nav bd-sidenav bd-sidenav__home-link">
-            <li class="toctree-l1">
-                <a class="reference internal" href="../../intro.html">
-                    Welcome to PseudoDiffusers!!
-                </a>
-            </li>
-        </ul>
-        <p aria-level="2" class="caption" role="heading"><span class="caption-text">Preliminary Works</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="vae.html">VAE</a></li>
-<li class="toctree-l1"><a class="reference internal" href="gan.html">GAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DDPM.html">DDPM</a></li>
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="DDIM.html">DDIM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="A_Study_on_the_Evaluation_of_Generative_Models.html">A Study on the Evaluation of Generative Models</a></li>
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Image Generation</span></p>
-<ul class="current nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="cycleGAN.html">CycleGAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="StyleGAN.html">StyleGAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="diffusion_beats_GANs.html">Diffusion Models Beat GANs on Image Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="dalle.html">DALL-E</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DALLE2.html">DALL-E 2</a></li>
-<li class="toctree-l1"><a class="reference internal" href="dreambooth.html">DreamBooth</a></li>
-<li class="toctree-l1"><a class="reference internal" href="ControlNet.html">ControlNet</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Latent_Diffusion_Model.html">Introduction</a></li>
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="Textual_Inversion.html">Textual Inversion</a></li>
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="CustomDiffusion.html">Custom Diffusion</a></li>
-<li class="toctree-l1"><a class="reference internal" href="LoRA.html">LoRA</a></li>
-
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="I-DDPM.html">I-DDPM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="StyO.html">StyO</a></li>
-<li class="toctree-l1"><a class="reference internal" href="imagen.html">Imagen</a></li>
-<li class="toctree-l1"><a class="reference internal" href="imagen_editor.html">Imagen Editor</a></li>
-<li class="toctree-l1"><a class="reference internal" href="SDEdit.html">SDEdit</a></li>
-<li class="toctree-l1"><a class="reference internal" href="SDXL.html">SDXL</a></li>
-<li class="toctree-l1"><a class="reference internal" href="t2i_adapter.html">T2I-Adapter</a></li>
-<li class="toctree-l1 current active"><a class="current reference internal" href="#">IP-Adapter</a></li>
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="HyperDreamBooth.html">HyperDreamBooth</a></li>
-<li class="toctree-l1"><a class="reference internal" href="CM3leon.html">CM3leon</a></li>
-
-<li class="toctree-l1"><a class="reference internal" href="Synthetic_Data_from_Diffusion_Models_Improves_ImageNet_Classification.html">Synthetic Data from Diffusion Models Improves ImageNet Classification</a></li>
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="GLIDE.html">GLIDE</a></li>
-<li class="toctree-l1"><a class="reference internal" href="BBDM.html">BBDM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Your_Diffusion_Model_is_Secretly_a_Zero_Shot_Classifier.html">Your Diffusion Model is Secretly a Zero-Shot Classifier</a></li>
-<li class="toctree-l1"><a class="reference internal" href="progressive_distillation.html">Progressive Distillation for Fast Sampling of Diffusion Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="ConceptLab.html">ConceptLab</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Diffusion_models_already_have_a_Semantic_Latent_Space.html">Diffusion Models already have a Semantic Latent Space</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Muse.html">Muse</a></li>
-
-
-<li class="toctree-l1"><a class="reference internal" href="GIGAGAN.html">Scaling up GANs for Text-to-Image Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="consistency_models.html">Consistency Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="latent_consistency_models.html">Latent Consistency Models</a></li>
-
-<li class="toctree-l1"><a class="reference internal" href="LLM_grounded_Diffusion.html">LLM Grounded Diffusion</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DiT.html">DiT</a></li>
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="one-step-image-translation.html">One-Step Image Translation with Text-to-Image Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="LCM-LoRA.html">LCM-LoRA: A Universal Stable-Diffusion Acceleration Module</a></li>
-
-
-<li class="toctree-l1"><a class="reference internal" href="MimicBrush.html">MimicBrush: Zero-shot Image Editing with Reference Imitation</a></li>
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Video Generation</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="Make_A_Video.html">Make A Video</a></li>
-<li class="toctree-l1"><a class="reference internal" href="VideoLDM.html">VideoLDM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="AnimateDiff.html">AnimateDiff</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Animate_Anyone.html">Animate Anyone</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreaMoving.html">DreaMoving</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreamPose.html">DreamPose: Fashion Image-to-Video Synthesis via Stable Diffusion</a></li>
-
-
-
-
-
-
-
-
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">3D Generation</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="NeRF.html">NeRF : Representing Scenes as Neural Radiance Fields for View Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="3DGS.html">3D Gaussian Splatting for Real-Time Radiance Field Rendering</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Point_E.html">Point-E: A System for Generating 3D Point Clouds from Complex Prompts (Arxiv 2022)</a></li>
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="Shap-E.html">Shap-E</a></li>
-
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="DreamFusion.html"><strong>DreamFusion</strong></a></li>
-<li class="toctree-l1"><a class="reference internal" href="magic-3d.html">Magic3D</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreamBooth3D.html">Dream Booth 3D</a></li>
-
-
-
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Experiments</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="../experiments/js_exp.html">Synthetic Data with Stable Diffusion for Foliar Disease Classification</a></li>
-<li class="toctree-l1"><a class="reference internal" href="../experiments/swjo_exp.html">Training DreamBooth on Naver Webtoon Face Dataset</a></li>
-</ul>
-
-    </div>
-</nav></div>
-    </div>
-  
-  
-  <div class="sidebar-primary-items__end sidebar-primary__section">
-  </div>
-  
-  <div id="rtd-footer-container"></div>
-
-
-      </div>
-      
-      <main id="main-content" class="bd-main" role="main">
-        
-        
-
-<div class="sbt-scroll-pixel-helper"></div>
-
-          <div class="bd-content">
-            <div class="bd-article-container">
-              
-              <div class="bd-header-article d-print-none">
-<div class="header-article-items header-article__inner">
-  
-    <div class="header-article-items__start">
-      
-        <div class="header-article-item"><label class="sidebar-toggle primary-toggle btn btn-sm" for="__primary" title="Toggle primary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
-  <span class="fa-solid fa-bars"></span>
-</label></div>
-      
-    </div>
-  
-  
-    <div class="header-article-items__end">
-      
-        <div class="header-article-item">
-
-<div class="article-header-buttons">
-
-
-
-
-
-<div class="dropdown dropdown-source-buttons">
-  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Source repositories">
-    <i class="fab fa-github"></i>
-  </button>
-  <ul class="dropdown-menu">
-      
-      
-      
-      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion" target="_blank"
-   class="btn btn-sm btn-source-repository-button dropdown-item"
-   title="Source repository"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fab fa-github"></i>
-  </span>
-<span class="btn__text-container">Repository</span>
-</a>
-</li>
-      
-      
-      
-      
-      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion/issues/new?title=Issue%20on%20page%20%2Fdocs/review/IP_Adapter.html&body=Your%20issue%20content%20here." target="_blank"
-   class="btn btn-sm btn-source-issues-button dropdown-item"
-   title="Open an issue"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-lightbulb"></i>
-  </span>
-<span class="btn__text-container">Open issue</span>
-</a>
-</li>
-      
-  </ul>
-</div>
-
-
-
-
-
-
-<div class="dropdown dropdown-download-buttons">
-  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Download this page">
-    <i class="fas fa-download"></i>
-  </button>
-  <ul class="dropdown-menu">
-      
-      
-      
-      <li><a href="../../_sources/docs/review/IP_Adapter.md" target="_blank"
-   class="btn btn-sm btn-download-source-button dropdown-item"
-   title="Download source file"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-file"></i>
-  </span>
-<span class="btn__text-container">.md</span>
-</a>
-</li>
-      
-      
-      
-      
-      <li>
-<button onclick="window.print()"
-  class="btn btn-sm btn-download-pdf-button dropdown-item"
-  title="Print to PDF"
-  data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-file-pdf"></i>
-  </span>
-<span class="btn__text-container">.pdf</span>
-</button>
-</li>
-      
-  </ul>
-</div>
-
-
-
-
-<button onclick="toggleFullScreen()"
-  class="btn btn-sm btn-fullscreen-button"
-  title="Fullscreen mode"
-  data-bs-placement="bottom" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-expand"></i>
-  </span>
-
-</button>
-
-
-
-<script>
-document.write(`
-  <button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light"></i>
-    <i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark"></i>
-    <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"></i>
-  </button>
-`);
-</script>
-
-
-<script>
-document.write(`
-  <button class="btn btn-sm pst-navbar-icon search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <i class="fa-solid fa-magnifying-glass fa-lg"></i>
-  </button>
-`);
-</script>
-<label class="sidebar-toggle secondary-toggle btn btn-sm" for="__secondary"title="Toggle secondary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <span class="fa-solid fa-list"></span>
-</label>
-</div></div>
-      
-    </div>
-  
-</div>
-</div>
-              
-              
-
-<div id="jb-print-docs-body" class="onlyprint">
-    <h1>IP-Adapter</h1>
-    <!-- Table of contents -->
-    <div id="print-main-content">
-        <div id="jb-print-toc">
-            
-            <div>
-                <h2> Contents </h2>
-            </div>
-            <nav aria-label="Page">
-                <ul class="visible nav section-nav flex-column">
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#">IP-Adapter</a></li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">Introduction</a></li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#related-works">Related Works</a><ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#text-to-image-diffusion-models">Text-to-Image Diffusion Models</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#adapters-for-large-models">Adapters for Large Models</a></li>
-</ul>
-</li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#method">Method</a><ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#preliminaries">Preliminaries</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#image-prompt-adapter">Image Prompt Adapter</a></li>
-</ul>
-</li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#experiments">Experiments</a><ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#experimental-setup">Experimental Setup</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#comparison-with-existing-methods">Comparison with Existing Methods</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#more-results">More Results</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#ablation-study">Ablation Study</a></li>
-</ul>
-</li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#conclusion">Conclusion</a></li>
-</ul>
-
-            </nav>
-        </div>
-    </div>
-</div>
-
-              
-                
-<div id="searchbox"></div>
-                <article class="bd-article">
-                  
-  <div class="admonition-information admonition">
-<p class="admonition-title">Information</p>
-<ul class="simple">
-<li><p><strong>Title:</strong> IP-Adapter: Text Compatible Image Prompt Adapter for Text-to-Image Diffusion Models</p></li>
-<li><p><strong>Reference</strong></p>
-<ul>
-<li><p>Paper: <a class="reference external" href="https://arxiv.org/abs/2308.06721">https://arxiv.org/abs/2308.06721</a></p></li>
-<li><p>Code: <a class="github reference external" href="https://github.com/tencent-ailab/IP-Adapter">tencent-ailab/IP-Adapter</a></p></li>
-<li><p>Project Page : <a class="reference external" href="https://ip-adapter.github.io">https://ip-adapter.github.io</a></p></li>
-</ul>
-</li>
-<li><p><strong>Author:</strong> Kyeongmin Yu</p></li>
-<li><p><strong>Last updated on Sep. 21, 2024</strong></p></li>
-</ul>
-</div>
-<section class="tex2jax_ignore mathjax_ignore" id="ip-adapter">
-<h1>IP-Adapter<a class="headerlink" href="#ip-adapter" title="Permalink to this heading">#</a></h1>
-<blockquote>
-<div><p>📌 문제상황 <br />
-text-to-image diffusion model(T2I diffusion model)이 생성하는 이미지 품질은 훌륭하지만 text prompt를 통해 원하는 형태의 이미지를 생성하는 것이 어렵다. 복잡한 prompt engineering을 시도하거나, image prompt를 활용할 수도 있지만 사전학습된 모델을 fine-tuning하게 되면 많은 리소스가 필요할 뿐만 아니라 해당 방식은 범용성, 호환성도 떨어진다. <br />
-<br />
-📌 해결방안 <br />
-<strong>cross-attention을 text features와 image features로 decoupling한다.</strong> 기존 학습된 diffusion model은 text feature에 맞춰 학습된 상태이므로 기존 layer에 image feature를 넣게 되면 image feature와 text feature를 align을 수행하게 되므로 기존 cross-attention layer 하나를 통해  image-feature와 text-feature를 결합하는 것은 적절하지 않다. <br />
-<br />
-📌  논문의 강점</p>
-<ul class="simple">
-<li><p>어떤 모델 구조에도 활용가능하다.</p></li>
-<li><p>적은 수의 파라미터(22M)만 추가적으로 학습하므로 가볍다.</p></li>
-<li><p>기존 controllable tools에 덧붙여 쓸 수도 있다.</p></li>
-</ul>
-</div></blockquote>
-<figure class="align-default" id="id1">
-<a class="mb-1 reference internal image-reference" href="../../_images/main.jpeg"><img alt="IP-Adapter를 활용한 이미지 합성" class="mb-1" src="../../_images/main.jpeg" style="width: 800px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 268 </span><span class="caption-text">IP-Adapter를 활용한 이미지 합성 <a class="reference external" href="https://arxiv.org/abs/2308.06721">출처</a></span><a class="headerlink" href="#id1" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</section>
-<section class="tex2jax_ignore mathjax_ignore" id="introduction">
-<h1>Introduction<a class="headerlink" href="#introduction" title="Permalink to this heading">#</a></h1>
-<p><strong>:image prompt의 필요성과 기존 연구에서 image prompt를 사용해 이미지를 생성하려는 시도의 종류와 장단점을 말한다.</strong></p>
-<p>복잡한 scene이나 concept을 입력할때 이미지 형태로 입력하는 것이 간편하고 효과적이다.
-image prompt + text prompt(“an image is worth a thousand words”)</p>
-<figure class="align-default" id="id2">
-<a class="mb-1 reference internal image-reference" href="../../_images/IMG_4891.png"><img alt="카페" class="mb-1" src="../../_images/IMG_4891.png" style="width: 40%;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 269 </span><span class="caption-text">“내츄럴 풍으로 카페를 꾸미고 여러 식물을 두어 장식하고 싶어. 내가 좋아하는 식물은 스노우 사파이어, 호야, 자미오쿨카스등 이고, 의자와 테이블은 원목을 선호해.”</span><a class="headerlink" href="#id2" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>DALL-E2는 처음으로 image prompt를 지원한 모델으로, T2I prior model이 image embedding을 조건으로 이미지를 생성하도록 했다. 하지만 기존 대부분의 T2I 모델은 주로 text를 조건으로 이미지를 생성하는 방식이었다. 예를 들어 stable diffusion(SD) 모델의 경우 CLIP text encoder로 부터 text embedding을 뽑아내 사용했다.</p>
-<p>본 논문에서는 “image prompt를 기존 T2I 모델에서 사용할 수 있는지”, image prompt를 사용한 T2I 이미지 생성을 단순한 방식으로 가능케 한다.</p>
-<figure class="align-default" id="id3">
-<a class="mb-1 reference internal image-reference" href="../../_images/image.png"><img alt="비교를 위한 DALL-E2(unCLIP) 구조" class="mb-1" src="../../_images/image.png" style="width: 800px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 270 </span><span class="caption-text">비교를 위한 DALL-E2(unCLIP) 구조 <a class="reference external" href="https://arxiv.org/abs/2204.06125">출처</a></span><a class="headerlink" href="#id3" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<figure class="align-default" id="id4">
-<a class="mb-1 reference internal image-reference" href="../../_images/image11.png"><img alt="비교를 위한 Stable Diffusion의 구조" class="mb-1" src="../../_images/image11.png" style="width: 800px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 271 </span><span class="caption-text">비교를 위한 Stable Diffusion의 구조 구조 <a class="reference external" href="https://arxiv.org/abs/2112.10752">출처</a></span><a class="headerlink" href="#id4" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>SD Image Variations와 Stable UnCLIP과 같은 기존 연구에서 image prompt를 사용한 이미지 생성을 위해 text-conditioned diffusion models을 image embedding을 사용해 직접 fine-tuning하려는 시도를 했다. 하지만 많은 양의 컴퓨터 리소스 사용과 기존 T2I 생성능력 저하, 재사용성 저하라는 단점이 있었다. 또한 해당 방식은 ControlNet과 같은 기존 structural control tools과 호환되지 않았다. 이는 downstream application에 치명적이다.</p>
-<p>이를 피하기 위해 diffusion model 자체를 fine-tuning하지 않고 text encoder를 image encoder로 교체하는 방식도 있었지만 text prompt를 지원할 수 없게 되고 이미지 품질이 충분하지 않다는 단점이 있었다.</p>
-<p>최근에는 T2I base model을 건드리지 않고 추가적인 네트워크를 이용해 image prompt를 지원하는 연구들이 있었다. ControlNet, T2I-Adapter와 같은 연구들은 대부분 sketch, depth map, segmenation map 등의 추가적인 입력을 활용했다. 또한 T2I-Adapter나 Uni-ControlNet 같이reference image를 입력해 style 이나 concept을 전달하려는 시도도 있었다. 이런 흐름의 연구들은 CLIP image encoder에서 image embedding을 추출하여 추가 trainable network에 새로운 feature들을 mapping하여 text feature와 융합하고자 했다. 기존 text feature대신 text feature+image feature를 디퓨전 모델 내 UNet 구조에 넣어 prompt에 넣은 이미지에 적합한(faithful) 이미지를 생성하고자 했다. 이런 연구들을 통해 image prompt의 가능성을 볼수 있었지만 그 충실도가 충분하지 않았다. 또한 이미지 품질이 fine-tuning된 image prompt model보다 나빴다.</p>
-<figure class="align-default" id="id5">
-<a class="mb-1 reference internal image-reference" href="../../_images/compare_table.png"><img alt="기존 모델과 IP-Adapter 비교" class="mb-1" src="../../_images/compare_table.png" style="width: 800px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 272 </span><span class="caption-text">기존 모델과 IP-Adapter 비교 <a class="reference external" href="https://arxiv.org/abs/2308.06721">출처</a></span><a class="headerlink" href="#id5" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p><strong>image prompt를 지원하는 기존 방식</strong></p>
-<ul class="simple">
-<li><p>input image embedding to T2I model</p></li>
-<li><p>base model fine-tuning</p></li>
-<li><p>text encoder → image encoder</p></li>
-<li><p>additional network</p></li>
-</ul>
-<p>본 논문에서는 앞서 언급한 문제점의 원인을 T2I model내의 cross-attention이라고 보고 있다. <strong>사전학습된 cross-attention에서 key, value projection weights은 text feature에 맞게 훈련되어 잇는 상태이다.</strong>
-결과적으로 image feature와 text feature를 cross-attention layer에서 합쳐지는데 이때 image-specific 특성들이 무시되어 reference image에 아주 충실한 이미지를 생성하지 못하고 coarse-grained controllable generation(e.g., image style)만 달성 가능해진다.</p>
-<p>마지막으로 앞선 연구의 문제점들을 극복한 효과적인 image prompt adapter, IP-Adapter를 제안한다. 특히 IP-Adapter의 경우 decoupled cross-attention mechanism을 사용해 text feature와 image feature를 분리한다. image feature를 위해 base model내 모든 UNet cross-attention layer에 cross-attention layer 를 추가하여 훈련단계에서는 적은 수의 파라미터(22M)만 훈련한다. 본 논문에서 제안하는 IP-Adapter는 매우 가볍고 효과적이다. 또한 일반화 능력(generalization capability)가 높고 text prompt와도 잘 어울린다(compatible).</p>
-<p><strong>IP-Adapter에서 제안하는 방식</strong></p>
-<ul class="simple">
-<li><p>additional cross-attention layer in UNet of diffusion model</p></li>
-<li><p>reusable and flexible (base + IP-Adapter + ControlNet가능)</p></li>
-<li><p>multimodal compatibility (image prompt + text prompt)</p></li>
-</ul>
-</section>
-<section class="tex2jax_ignore mathjax_ignore" id="related-works">
-<h1>Related Works<a class="headerlink" href="#related-works" title="Permalink to this heading">#</a></h1>
-<section id="text-to-image-diffusion-models">
-<h2>Text-to-Image Diffusion Models<a class="headerlink" href="#text-to-image-diffusion-models" title="Permalink to this heading">#</a></h2>
-<p>large T2I model은 크게 autoregressive models, diffusion models 두 부류로 나눌 수 있다. DALLE, CogView, Make-A-Scene과 같은 초기 연구들은 autoregressive model 이었다. autoregressive model은 VQ-VAE와 같은 image tokenizer를 사용해 image들을 token화 하여 autoregressive transformer에 text token을 이용해 image token을 예측하게 하는 방식으로 학습했다. 하지만 autoregressive model은 파라미터 수가 많고 고화질 이미지를 생성하기 위해 많은 리소스가 필요했다.</p>
-<p>최근에는 diffusion models(DM)이 등장하여 T2I 생성모델의 state-of-the-art를 달성했다. 이전에 GLIDE는 cascaded diffusion 구조를 통해 64x64 → 256x256 이미지를 생성했다.
-DALL-E2의 경우, text prompt로 이미지를 생성하도록 학습된 모델을 활용해 image embedding을 조건으로 이미지를 생성했다. DALL-E2는 text prompt를 통한 이미지 생성을 지원하지 않았다. text 이해도를 높이기 위해 Imagen은 거대 transformer language model인 T5를 도입했다. Re-Imagen의 경우 드물거나 학습한적없는 entity에 대한 image에 대한 충성도를 개선했다.
-SD는 latent diffusion model로 pixel space가 아닌 latent space상에서 동작하게 하여 diffusion model만 사용하여 고품질의 이미지를 생성할 수 있게 했다. text 일치도(alignment)를 높이기 위해 eDiff-I의 경우 T2I diffusion model과 유사한 디자인을 채택하여 T5 text, CLIP text embedding, CLIP image embedding등 멀티모달 조건을 활용했다. Versatile Diffusion은 unified multi-flow diffusion framework를 이용해 T2I, I2T, 등 다양한 생성방식을 하나의 모델로 가능하게 했다. controllable image 생성 면에서는 Composer가 image embedding을 활용한 joint fine-tuning을 시도했었다. RAPHAEL은 mixture of experts(MoEs) 전략을 사용해 T2I model의 이미지 품질을 향상시켰다.</p>
-<p>DALL-E2는 image prompt를 통해 해당 풍의 이미지들을 생성할 수 있다는 점에서 매력적이다. 또한 image prompt를 T2I model에서 지원하고자 하는 연구들이 있다. SD Image Variants model은 변경한 SD를 fine-tuning하여 text feature를 CLIP image encoder의 image embedding으로 교체할 수 있게 했다. Stable unCLIP 또한 SD를 fine-tuning하여 time embedding에 image embedding을 추가했다. 기존 모델을 fine-tuning하는 방식은 고품질의 이미지를 생성 할 수 있다는 장점이 있지만 비교적 training cost가 높으며 기존 tools(e.g.,ControlNet)과 호환되지 않는다는 단점이 있다.</p>
-</section>
-<section id="adapters-for-large-models">
-<h2>Adapters for Large Models<a class="headerlink" href="#adapters-for-large-models" title="Permalink to this heading">#</a></h2>
-<p>거대한 사전학습된 모델 전체를 fine-tuning하는 것은 비효율적이다. 이 대안으로 떠오르는 것이 adapter를 사용하는 것인데, 기존 모델은 freeze시켜 학습하는 파라미터 수를 줄일 수 있다. adapter는 NLP에서 오랫동안 사용되던 방식이다. 최근에는 LLM의 vision-language 이해를 위해 adapter를 사용하고 있다.</p>
-<p>T2I model의 최근 인기로 인해 adapter들도 여기에 추가적인 control을 주는 방향으로 사용되고 있다. ControlNet(아래 사진 참고)의 경우 사전학습된 T2I diffusion model에 task-specific한 입력
-(e.g.,canny edge)을 추가적으로 넣기위해 adapter를 사용할 수 있다는 것을 보여주었다. 유사한 시기에 T2I-Adapter(아래 사진 참고)도 등장했는데 보다 간단하고 가벼운 형태로 색이나 구조적인 면에서
-fine-grained control을 주고자 했다. fine-tuning에 사용되는 비용을 줄이기 위해 Uni-ControlNet은 multi-scale condition injection을 사용했다.</p>
-<p>structural control외에 이미지 집합을 통해 content나 style을 조절하고자 한 연구도 있다. ControlNet Shuffle의 경우 이미지들을 recompose하도록 학습하여 사용자가 제공한 이미지들을 바탕으로 이미지를 생성 할 수 있었다. 또한 ControlNet Reference-only의 경우, 학습없이 SD에 feature injection을 통해 이미지를 변형했다. T2I-Adapter의 최근 버전의 경우, CLIP image encoder로 부터 reference image의 image feature를 text feature에 더해줌으로서 style adapter로서의 역할도 가능하다. Uni-ControlNet(아래 사진 참고)의 global control adapter 또한 CLIP image encoder로 부터 추출한 image embedding을 작은 네트워크를 통해 projection하 condition embedding으로 projection하여 사용한다. SeeCoder(아래 사진 참고)는 기존 text encoder를 semantic context encoder로 교체하여 image variants를 생성하고자 했다.</p>
-<p><strong>ControlNet</strong></p>
-<figure class="align-default" id="id6">
-<a class="mb-1 reference internal image-reference" href="../../_images/image21.png"><img alt="비교를 위한 ControlNet의 작동 방식" class="mb-1" src="../../_images/image21.png" style="width: 40%;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 273 </span><span class="caption-text">비교를 위한 ControlNet의 작동 방식 <a class="reference external" href="https://arxiv.org/abs/2302.05543">출처</a></span><a class="headerlink" href="#id6" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<figure class="align-default" id="id7">
-<a class="mb-1 reference internal image-reference" href="../../_images/image31.png"><img alt="" class="mb-1" src="../../_images/image31.png" style="width: 40%;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 274 </span><span class="caption-text">ControlNet preprocessor<a class="reference external" href="https://github.com/pytorch/pytorch/blob/main/torch/nn/modules/pixelshuffle.py">ContentShuffleDetector</a></span><a class="headerlink" href="#id7" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p><strong>Uni-ControlNet</strong></p>
-<figure class="align-default" id="id8">
-<a class="mb-1 reference internal image-reference" href="../../_images/image41.png"><img alt="Uni-ControlNet Architecture" class="mb-1" src="../../_images/image41.png" style="width: 40%;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 275 </span><span class="caption-text">Uni-ControlNet Architecture <a class="reference external" href="https://arxiv.org/abs/2305.16322">출처</a></span><a class="headerlink" href="#id8" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p><strong>T2I-Adapter</strong></p>
-<figure class="align-default" id="id9">
-<a class="mb-1 reference internal image-reference" href="../../_images/image61.png"><img alt="비교를 위한 T2I Adapter의 작동 방식" class="mb-1" src="../../_images/image61.png" style="width: 800px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 276 </span><span class="caption-text">비교를 위한 T2I Adapter의 작동 방식 <a class="reference external" href="https://arxiv.org/abs/2302.08453">출처</a> <a class="reference external" href="https://pytorch.org/docs/stable/generated/torch.nn.PixelUnshuffle.html">PixelUnshuffle</a></span><a class="headerlink" href="#id9" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<figure class="align-default" id="id10">
-<a class="mb-1 reference internal image-reference" href="../../_images/image5.png"><img alt="비교를 위한 T2I Adapter의 작동 방식" class="mb-1" src="../../_images/image5.png" style="width: 40%;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 277 </span><span class="caption-text">T2I Adapter의 장점 <a class="reference external" href="https://arxiv.org/abs/2302.08453">출처</a></span><a class="headerlink" href="#id10" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<figure class="align-default" id="id11">
-<a class="mb-1 reference internal image-reference" href="../../_images/image71.png"><img alt="비교를 위한 T2I Adapter의 작동 방식" class="mb-1" src="../../_images/image71.png" style="width: 40%;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 278 </span><span class="caption-text">T2I-adapter의 결과는 기존 모델의 각 층의 feature map 크기가 맞도록 더해짐 <a class="reference external" href="https://arxiv.org/abs/2302.08453">출처</a></span><a class="headerlink" href="#id11" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p><strong>SeeCoder</strong></p>
-<figure class="align-default" id="id12">
-<a class="mb-1 reference internal image-reference" href="../../_images/image81.png"><img alt="SeeCoder Architecture" class="mb-1" src="../../_images/image81.png" style="width: 40%;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 279 </span><span class="caption-text">SeeCoder Architecture <a class="reference external" href="https://arxiv.org/abs/2305.16223">출처</a></span><a class="headerlink" href="#id12" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</section>
-</section>
-<section class="tex2jax_ignore mathjax_ignore" id="method">
-<h1>Method<a class="headerlink" href="#method" title="Permalink to this heading">#</a></h1>
-<section id="preliminaries">
-<h2>Preliminaries<a class="headerlink" href="#preliminaries" title="Permalink to this heading">#</a></h2>
-<blockquote>
-<div><p>📌 생성모델의 일종인 diffusion model의 이미지 생성단계</p>
-<ol class="arabic simple">
-<li><p><strong>diffusion process (forward process)</strong> <br />
-T step의 fixed Markov chain을 통해데이터에 gaussian noise를 점차 추가.</p></li>
-<li><p><strong>denoising process</strong> <br />
-gaussian noise로 부터 learnable model을 통해 sample을 생성.</p></li>
-</ol>
-</div></blockquote>
-<p>일반적으로 noise 예측을 위한 diffusion model(<span class="math notranslate nohighlight">\(\epsilon_\theta\)</span>)의 training objective는 아래와 같이 단순한 variant of variational bound 로 표현된다.</p>
-<div class="math notranslate nohighlight">
-\[
-L_{\text{simple}}=\Bbb E_{x_0, \epsilon\sim \mathcal N(0,I),c,t}\|\epsilon-\epsilon_\theta(x_t, \mathbf c,t)\|^2 \tag{1}
-\]</div>
-<p><span class="math notranslate nohighlight">\(x_0\)</span> 는 real data, <span class="math notranslate nohighlight">\(\mathbf c\)</span> 는 추가조건, <span class="math notranslate nohighlight">\(t\)</span> 는 time step을 말하며 <span class="math notranslate nohighlight">\([0,T]\)</span> 내에 속한다.  <span class="math notranslate nohighlight">\(x_t=\alpha_t x_0+\sigma_t\epsilon\)</span>은 step t에 해당하는 noisy data를 말하고, <span class="math notranslate nohighlight">\(\alpha_t, \sigma_t\)</span>는 diffusino process를 결정하는 predefined function이다. <span class="math notranslate nohighlight">\(\epsilon_\theta\)</span>가 한번 학습되고 나면 랜덤 노이즈로부터 이미지를 반복적으로 생성할 수 있다. 일반적으로 생성 속도를 높이기 위해 DDIM, PNDM, DPM-solver와 같은 fast sampler를 inference시 사용한다.</p>
-<p>conditional diffusion model에서 classifier guidance를 통해 이미지 정확도(fidelity)와 다양성(sample diversity)를 밸런싱할 수 있다. 이는 따로 학습된 classifier의 gradient를 활용하는데, classifier를 따로 학습하는 번거로움을 지우기 위해 classifier-free guidance를 사용하기도 한다. 이런 접근에서 conditional, unconditional diffusion models는 학습시 랜덤하게 조건 <span class="math notranslate nohighlight">\(c\)</span> 를 배제하여 합동 학습(joint training)된다. sampling단계 에서는 conditional model과 unconditional model의 prediction을 모두 이용하여 noise를 계산한다.</p>
-<div class="math notranslate nohighlight">
-\[
-\hat \epsilon_\theta(x_t,\mathbf c,t)=\mathcal w \epsilon_\theta(x_t,\mathbf c, t)+(1-\mathcal w)\epsilon_\theta(x_t,t) \tag{2}
-\]</div>
-<p><span class="math notranslate nohighlight">\(\mathcal w\)</span>은 guidance scale 혹은 guidance weight로 불리는데 condition <span class="math notranslate nohighlight">\(c\)</span>의 영향력을 조절하기 위한 상수값이다. T2I diffusion model의 경우 image-text 일치성을 높이는데 classifier-free guidance가 큰 역할을 한다.</p>
-<p>본 논문에서는 open-source SD에 IP-Adapter를 덧붙여 실험을 진행했다. SD는 latent diffusion model로 frozen CLIP text encoder로 뽑아낸 text feature를 condition으로 사용한다. diffusion model은 Unet에 attention layer가 추가된 형태이다. Imagen과 같은 pixel-based diffusion model과 비교해 SD는 사전학습된 auto-encoder model을 활용해 latent space에서 동작하므로 효율적이다.</p>
-</section>
-<section id="image-prompt-adapter">
-<h2>Image Prompt Adapter<a class="headerlink" href="#image-prompt-adapter" title="Permalink to this heading">#</a></h2>
-<figure class="align-default" id="id13">
-<a class="mb-1 reference internal image-reference" href="../../_images/image9.png"><img alt="S" class="mb-1" src="../../_images/image9.png" style="width: 800px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 280 </span><span class="caption-text">IP-Adapter의 동작방식 <a class="reference external" href="https://arxiv.org/abs/2308.06721">출처</a></span><a class="headerlink" href="#id13" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<figure class="align-default" id="id14">
-<a class="mb-1 reference internal image-reference" href="../../_images/image11.png"><img alt="비교를 위한 Stable Diffusion의 구조" class="mb-1" src="../../_images/image11.png" style="width: 800px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 281 </span><span class="caption-text">비교를 위한 Stable Diffusion의 구조 <a class="reference external" href="https://arxiv.org/abs/2112.10752">출처</a></span><a class="headerlink" href="#id14" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p><strong>Image Encoder</strong></p>
-<p>pretained CLIP image encoder를 사용해 image prompt에서 image feature를 뽑아냈다. CLIP은 multimodal model로 거대 image-text pair 데이터셋으로 contrastive learning시킨 모델이다. CLIP image encoder를 통해 global image embedding을 얻었다. 이는 image로 부터 풍부한 내용(content)와 스타일을 담은 image caption과 잘 조정되어(well-aligned) 있다. 학습단계에서 CLIP image encoder는 frozen되어 학습되지 않는다.</p>
-<p><strong>Decoupled Cross-Attention</strong></p>
-<p>image feature는 사전학습된 UNet에 decoupled cross-attention을 통해 결합된다. 초기 SD model에서는 CLIP text encoder를 통해 뽑아낸 text feature를 UNet의 cross-attention layer에 넣었다.</p>
-<div class="math notranslate nohighlight">
-\[
-\mathbf Z'=\text{Attention}(\mathbf{Q,K,V})=\text{Softmax}(\frac{\mathbf {QK}^T}{\sqrt{d}})\mathbf V, \tag{3}
-\]</div>
-<p>query feature는 <span class="math notranslate nohighlight">\(Z\)</span>, text feature는 <span class="math notranslate nohighlight">\(c_t\)</span>, cross-attention의 결과는 <span class="math notranslate nohighlight">\(Z’\)</span>이고, <span class="math notranslate nohighlight">\(\mathbf{Q=ZW_q, K=c_t W_k, V=c_t W_v}\)</span>는 attention 연산의 각각 query, key, value 행렬이다. <span class="math notranslate nohighlight">\(\mathbf{W_q, W_k, W_v}\)</span>는 linear projection layers의 학습가능한 weigth matrices다.</p>
-<p>image feature를 이미지 생성에 반영하는 직관적인 방법은 cross-attention시 text feature+image feature로 결합(concatenate)하여 처리하는 것이다. 하지만 이 방법은 충분하지 않다는 것을 발견하여 decoupled cross-attention을 제안한다. 이는 cross-attention 에서 image feature와 text feature를 따로 처리하는 것이다. 구체적으로는 기존 cross-attention layer가 존재하던 곳에 새로운 cross-attention layer를 추가하여 image feature를 처리하도록 했다. image feature <span class="math notranslate nohighlight">\(c_i\)</span>가 주어질때 새로운 attention layer의 결과는 다음과 같다.</p>
-<div class="math notranslate nohighlight">
-\[
-\mathbf Z''=\text{Attention}(\mathbf{Q,K',V'})=\text{Softmax}(\frac{\mathbf{Q(K')}^T}{\sqrt{d}})\mathbf V', \tag{4}
-\]</div>
-<p><span class="math notranslate nohighlight">\(\mathbf{Q=ZW_q}\)</span>, <span class="math notranslate nohighlight">\(\mathbf{K'=c_t W'_k}\)</span> , <span class="math notranslate nohighlight">\(\mathbf{V'=c_t W'_v}\)</span> 는 image feature를 위한 query, key, value 행렬이다. 여기서 핵심은 text cross-attention과 image cross-attention에서 동일한 qeury를 사용했다는 점이다. 결과적으로는 각 cross-attention layer 마다 2개의 파라미터 <span class="math notranslate nohighlight">\(\mathbf{W'_k,W'_v}\)</span> 를 추가하게 된다. 수렴속도를 높이기 위해 <span class="math notranslate nohighlight">\(\mathbf{W'_k,W'_v}\)</span>는 <span class="math notranslate nohighlight">\(\mathbf{W_k,W_v}\)</span>로 초기화했다. 그러면 두 cross-attention layer의 결과를 더함으로써 최종 결과를 구할 수 있다. decoupled cross-attention의 최종적인 형태는 다음과 같다.</p>
-<div class="math notranslate nohighlight">
-\[
-\mathbf Z^\text{new}=\text{Softmax}(\frac{\mathbf {QK}^T}{\sqrt{d}})\mathbf V + \text{Softmax}(\frac{\mathbf {Q(K')}^T}{\sqrt{d}})\mathbf V' \tag{5}
-\]</div>
-<div class="math notranslate nohighlight">
-\[ \text{where} \space \mathbf{Q=ZW}_q,\space \mathbf{k=c}_t\mathbf W_k,\space \mathbf{K'=c}_i\mathbf W'_k, \space \mathbf{V'=c}_i\mathbf W'_v 
-\]</div>
-<p>사전학습한 UNet은 freeze시키고 훈련을 진행하므로 <span class="math notranslate nohighlight">\(\mathbf{W'_k,W'_v}\)</span> <strong>만</strong> 학습된다.</p>
-<p><strong>Training and Inference</strong></p>
-<p>학습시 IP-Adapter만 최적화하고 기존 사전학습된 diffusion model은 고정한다. IP-Adapter는 image-text pair dataset으로 학습시키며 original SD와 동일한 objective를 사용한다.</p>
-<div class="math notranslate nohighlight">
-\[
-L_{\text{simple}}=\Bbb E_{x_0, \epsilon\sim \mathcal N(0,I),c_t,c_i,t}\|\epsilon-\epsilon_\theta(x_t,\mathbf {c_t,c_i},t)\|^2 \tag{6}
-\]</div>
-<p>또 random하게 image condition을 drop하여 inference 단계에서 classifier-free guidance를 사용할 수 있도록 한다.</p>
-<div class="math notranslate nohighlight">
-\[
-\hat \epsilon_\theta(x_t,\mathbf {c_t,c_i},t)=\mathcal w \epsilon_\theta(x_t,\mathbf {c_t,c_i}, t)+(1-\mathcal w)\epsilon_\theta(x_t,t) \tag{7}
-\]</div>
-<p>image condition이 drop되면  CLIP image embedding은 0으로 처리했다. text cross-attention과 image cross-attention을 detach되며 inference시 image condition의 가중치도 조절할 수 있다. <span class="math notranslate nohighlight">\(\lambda\)</span> 가 0이 되면 기존 T2I 모델이 된다.</p>
-<div class="math notranslate nohighlight">
-\[
-\mathbf Z^\text{new}=\text{Softmax}(\mathbf {Q,K,V})+ \lambda\cdot\text{Softmax}(\mathbf {Q,K',V'})\tag{8}
-\]</div>
-</section>
-</section>
-<section class="tex2jax_ignore mathjax_ignore" id="experiments">
-<h1>Experiments<a class="headerlink" href="#experiments" title="Permalink to this heading">#</a></h1>
-<section id="experimental-setup">
-<h2>Experimental Setup<a class="headerlink" href="#experimental-setup" title="Permalink to this heading">#</a></h2>
-<div class="pst-scrollable-table-container"><table class="table">
-<thead>
-<tr class="row-odd"><th class="head"><p>항목</p></th>
-<th class="head text-center"><p>값</p></th>
-</tr>
-</thead>
-<tbody>
-<tr class="row-even"><td><p>base model</p></td>
-<td class="text-center"><p>SD v1.5</p></td>
-</tr>
-<tr class="row-odd"><td><p>image encoder</p></td>
-<td class="text-center"><p>OpenCLIP ViT-H/14</p></td>
-</tr>
-<tr class="row-even"><td><p>resolution</p></td>
-<td class="text-center"><p>512x512 (resized and center crop)</p></td>
-</tr>
-<tr class="row-odd"><td><p>optimizer</p></td>
-<td class="text-center"><p>AdamW</p></td>
-</tr>
-<tr class="row-even"><td><p>learning rate</p></td>
-<td class="text-center"><p>0.0001</p></td>
-</tr>
-<tr class="row-odd"><td><p>weight decay</p></td>
-<td class="text-center"><p>0.01</p></td>
-</tr>
-<tr class="row-even"><td><p>libraries</p></td>
-<td class="text-center"><p>Hugging Face diffusers,\ DeepSpeed SeRO-2</p></td>
-</tr>
-<tr class="row-odd"><td><p>GPU</p></td>
-<td class="text-center"><p>8 V100</p></td>
-</tr>
-<tr class="row-even"><td><p>training step</p></td>
-<td class="text-center"><p>1M</p></td>
-</tr>
-<tr class="row-odd"><td><p>batch size</p></td>
-<td class="text-center"><p>8 per GPU</p></td>
-</tr>
-<tr class="row-even"><td><p>classifier-free guidance</p></td>
-<td class="text-center"><p>0.05</p></td>
-</tr>
-<tr class="row-odd"><td><p>training data</p></td>
-<td class="text-center"><p>LAION-2B, COYO-700M</p></td>
-</tr>
-<tr class="row-even"><td><p>sampler for inference</p></td>
-<td class="text-center"><p>DDIM (50steps)</p></td>
-</tr>
-<tr class="row-odd"><td><p>guidance scale</p></td>
-<td class="text-center"><p>7.5</p></td>
-</tr>
-<tr class="row-even"><td><p><span class="math notranslate nohighlight">\(\lambda\)</span></p></td>
-<td class="text-center"><p>1.0 for only image prompt</p></td>
-</tr>
-</tbody>
-</table>
-</div>
-</section>
-<section id="comparison-with-existing-methods">
-<h2>Comparison with Existing Methods<a class="headerlink" href="#comparison-with-existing-methods" title="Permalink to this heading">#</a></h2>
-<p><strong>Quantitative Comparison</strong></p>
-<figure class="align-default" id="id15">
-<a class="mb-1 reference internal image-reference" href="../../_images/image10.png"><img alt="실험결과" class="mb-1" src="../../_images/image10.png" style="width: 800px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 282 </span><span class="caption-text">실험결과 <a class="reference external" href="https://arxiv.org/abs/2308.06721">출처</a></span><a class="headerlink" href="#id15" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p><strong>Qualitative Comparison</strong></p>
-<figure class="align-default" id="id16">
-<a class="mb-1 reference internal image-reference" href="../../_images/image111.png"><img alt="실험결과" class="mb-1" src="../../_images/image111.png" style="width: 800px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 283 </span><span class="caption-text">실험결과 <a class="reference external" href="https://arxiv.org/abs/2308.06721">출처</a></span><a class="headerlink" href="#id16" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>(실험 결과를 보고 IP-Adapter를 활용해 생성한 이미지가 reference와 지나치게 유사하다는 생각이 들었습니다. 몇몇은 그냥 좌우반전을 한것처럼 느껴졌습니다. 흔히 GAN에서 말하는 Model Collapse와 같은 현상이 아닌가 싶어 다양성이 낮아보이는 결과가 의아했으나, conclusion에서 이 단점을 언급합니다.)</p>
-</section>
-<section id="more-results">
-<h2>More Results<a class="headerlink" href="#more-results" title="Permalink to this heading">#</a></h2>
-<p><strong>Generalizable to Custom Models</strong></p>
-<figure class="align-default" id="id17">
-<a class="mb-1 reference internal image-reference" href="../../_images/image131.png"><img alt="실험결과" class="mb-1" src="../../_images/image131.png" style="width: 800px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 284 </span><span class="caption-text">실험결과 <a class="reference external" href="https://arxiv.org/abs/2308.06721">출처</a></span><a class="headerlink" href="#id17" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p><strong>Structure Control</strong></p>
-<figure class="align-default" id="id18">
-<a class="mb-1 reference internal image-reference" href="../../_images/image14.png"><img alt="실험결과" class="mb-1" src="../../_images/image14.png" style="width: 800px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 285 </span><span class="caption-text">실험결과 <a class="reference external" href="https://arxiv.org/abs/2308.06721">출처</a></span><a class="headerlink" href="#id18" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p><strong>Image-to-Image Inpainting</strong></p>
-<figure class="align-default" id="id19">
-<a class="mb-1 reference internal image-reference" href="../../_images/image15.png"><img alt="실험결과" class="mb-1" src="../../_images/image15.png" style="width: 800px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 286 </span><span class="caption-text">실험결과 <a class="reference external" href="https://arxiv.org/abs/2308.06721">출처</a></span><a class="headerlink" href="#id19" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p><strong>Multimodal Prompts</strong></p>
-<figure class="align-default" id="id20">
-<a class="mb-1 reference internal image-reference" href="../../_images/image16.png"><img alt="실험결과" class="mb-1" src="../../_images/image16.png" style="width: 800px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 287 </span><span class="caption-text">실험결과 <a class="reference external" href="https://arxiv.org/abs/2308.06721">출처</a></span><a class="headerlink" href="#id20" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</section>
-<section id="ablation-study">
-<h2>Ablation Study<a class="headerlink" href="#ablation-study" title="Permalink to this heading">#</a></h2>
-<p><strong>Importance of Decoupled Cross-Attention</strong></p>
-<figure class="align-default" id="id21">
-<a class="mb-1 reference internal image-reference" href="../../_images/image17.png"><img alt="실험결과" class="mb-1" src="../../_images/image17.png" style="width: 800px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 288 </span><span class="caption-text">실험결과 <a class="reference external" href="https://arxiv.org/abs/2308.06721">출처</a></span><a class="headerlink" href="#id21" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p><strong>Comparison of Fine-grained Features and Global Features</strong></p>
-<figure class="align-default" id="id22">
-<a class="mb-1 reference internal image-reference" href="../../_images/image181.png"><img alt="실험결과" class="mb-1" src="../../_images/image181.png" style="width: 800px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 289 </span><span class="caption-text">실험결과 <a class="reference external" href="https://arxiv.org/abs/2308.06721">출처</a></span><a class="headerlink" href="#id22" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>IP-Adapter는 CLIP image encoder로 부터 추출한 global image embedding를 사용하기 때문에 reference image의 일부 특성을 잃어버릴 수 있다. 따라서 fine-grained feature를 위한 IP-Adapter를 디자인했다. 첫번째로 CLIP image encoder에서 penultimate layer에서 grid feature를 뽑아낸다. 이후 작은 query network를 이용해 feature를 학습한다. grid feature로 부터 정보를 뽑아내기 위해 lightweight transformer를 사용해 learnable 16 token들을 정의한다. 이 token feature들을 query network의 cross-attention layer에 입력으로 넣어준다.</p>
-<p>두 adapter의 생성 결과를 비교하면 finer-grained feature를 이용하면 보다 image prompt와 가까운 결과를 얻을 수 있다. finer-grained feature는 spatial structure information을 학습하여 생성된 이미지의 diversity를 낮추는 결과를 초래할 수 있으나 추가적인 조건(text prompt, structure map)을 활용하면 다양한 이미지를 만들 수 있다. 예를 들어 위의 그림과 같이 사진+pose를 통해 이미지를 생성 할 수 있다.</p>
-</section>
-</section>
-<section class="tex2jax_ignore mathjax_ignore" id="conclusion">
-<h1>Conclusion<a class="headerlink" href="#conclusion" title="Permalink to this heading">#</a></h1>
-<p>본 연구에서는 사전 학습된 T2I diffusion model에 image prompt capability를 달성하기 위해 IP-Adapter를 제안한다. IP-Adapter의 핵심 디자인은 decoupled cross-attention으로 image feature를 분리하여 cross-attention을 수행한다. 고작 22M parameter가 추가된 IP-Adapter는 qualitative, quantitative experimental results 모두에서 비등하거나 나은 성능을 보인다. 또한 IP-Adapter는 확장성이 좋아 한번 훈련된 뒤, 다른 custom model, structural controllable tools에 곧바로 덧붙여 사용할 수도 있다. 더욱 중요한 점은 image prompt를 text prompt와 더애 멀티모달 이미지 생성을 가능케한다는 점이다.</p>
-<p>IP-Adapter는 효과적이지만 reference image와 content, style이 유사한 이미지만 생성할 수 있다는 단점이 있을 수 있다. 때문에 Textual Inversion이나 DreamBooth와 같이 특정 이미지 집합 풍의 이미지를 생성하지는 못한다. 미래에 consistency를 향상시킨 더 강력한 Image prompt adapter를 개발하는 것이 목표다.</p>
-<p><strong>Textural Inversion</strong></p>
-<figure class="align-default" id="id23">
-<a class="mb-1 reference internal image-reference" href="../../_images/image191.png"><img alt="실험결과" class="mb-1" src="../../_images/image191.png" style="width: 40%;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 290 </span><span class="caption-text">Textural Inversion 동작방식 <a class="reference external" href="https://arxiv.org/abs/2308.06721">출처</a></span><a class="headerlink" href="#id23" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<figure class="align-default" id="id24">
-<a class="mb-1 reference internal image-reference" href="../../_images/image20.png"><img alt="실험결과" class="mb-1" src="../../_images/image20.png" style="width: 40%;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 291 </span><span class="caption-text">Textural Inversion 실험결과 <a class="reference external" href="https://arxiv.org/abs/2308.06721">출처</a></span><a class="headerlink" href="#id24" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p><strong>DreamBooth</strong></p>
-<figure class="align-default" id="id25">
-<a class="mb-1 reference internal image-reference" href="../../_images/image211.png"><img alt="실험결과" class="mb-1" src="../../_images/image211.png" style="width: 40%;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 292 </span><span class="caption-text">DreamBooth 동작 방식 <a class="reference external" href="https://arxiv.org/abs/2308.06721">출처</a></span><a class="headerlink" href="#id25" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<figure class="align-default" id="id26">
-<a class="mb-1 reference internal image-reference" href="../../_images/image22.png"><img alt="실험결과" class="mb-1" src="../../_images/image22.png" style="width: 40%;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 293 </span><span class="caption-text">DreamBooth 실험결과 <a class="reference external" href="https://arxiv.org/abs/2308.06721">출처</a></span><a class="headerlink" href="#id26" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</section>
-
-    <script type="text/x-thebe-config">
-    {
-        requestKernel: true,
-        binderOptions: {
-            repo: "binder-examples/jupyter-stacks-datascience",
-            ref: "master",
-        },
-        codeMirrorConfig: {
-            theme: "abcdef",
-            mode: "python"
-        },
-        kernelOptions: {
-            name: "python3",
-            path: "./docs/review"
-        },
-        predefinedOutput: true
-    }
-    </script>
-    <script>kernelName = 'python3'</script>
-
-                </article>
-              
-
-              
-              
-              
-              
-                <footer class="prev-next-footer d-print-none">
-                  
-<div class="prev-next-area">
-    <a class="left-prev"
-       href="t2i_adapter.html"
-       title="previous page">
-      <i class="fa-solid fa-angle-left"></i>
-      <div class="prev-next-info">
-        <p class="prev-next-subtitle">previous</p>
-        <p class="prev-next-title">T2I-Adapter</p>
-      </div>
-    </a>
-    <a class="right-next"
-       href="HyperDreamBooth.html"
-       title="next page">
-      <div class="prev-next-info">
-        <p class="prev-next-subtitle">next</p>
-        <p class="prev-next-title">HyperDreamBooth</p>
-      </div>
-      <i class="fa-solid fa-angle-right"></i>
-    </a>
-</div>
-                </footer>
-              
-            </div>
-            
-            
-              
-                <div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
-
-
-  <div class="sidebar-secondary-item">
-  <div class="page-toc tocsection onthispage">
-    <i class="fa-solid fa-list"></i> Contents
-  </div>
-  <nav class="bd-toc-nav page-toc">
-    <ul class="visible nav section-nav flex-column">
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#">IP-Adapter</a></li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">Introduction</a></li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#related-works">Related Works</a><ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#text-to-image-diffusion-models">Text-to-Image Diffusion Models</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#adapters-for-large-models">Adapters for Large Models</a></li>
-</ul>
-</li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#method">Method</a><ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#preliminaries">Preliminaries</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#image-prompt-adapter">Image Prompt Adapter</a></li>
-</ul>
-</li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#experiments">Experiments</a><ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#experimental-setup">Experimental Setup</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#comparison-with-existing-methods">Comparison with Existing Methods</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#more-results">More Results</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#ablation-study">Ablation Study</a></li>
-</ul>
-</li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#conclusion">Conclusion</a></li>
-</ul>
-
-  </nav></div>
-
-</div></div>
-              
-            
-          </div>
-          <footer class="bd-footer-content">
-            
-<div class="bd-footer-content__inner container">
-  
-  <div class="footer-item">
-    
-<p class="component-author">
-By PseudoLab
-</p>
-
-  </div>
-  
-  <div class="footer-item">
-    
-
-  <p class="copyright">
-    
-      © Copyright 2022.
-      <br/>
-    
-  </p>
-
-  </div>
-  
-  <div class="footer-item">
-    
-  </div>
-  
-  <div class="footer-item">
-    
-  </div>
-  
-</div>
-          </footer>
-        
-
-      </main>
-    </div>
-  </div>
-  
-  <!-- Scripts loaded after <body> so the DOM is not blocked -->
-  <script src="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b"></script>
-<script src="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b"></script>
-
-  <footer class="bd-footer">
-  </footer>
-  </body>
+
+
+<!DOCTYPE html>
+
+
+<html lang="en" data-content_root="" >
+
+  <head>
+    <meta charset="utf-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.17.1: http://docutils.sourceforge.net/" />
+
+    <title>IP-Adapter &#8212; Text-to-Image Generation-feat-Diffusion</title>
+  
+  
+  
+  <script data-cfasync="false">
+    document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
+    document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
+  </script>
+  
+  <!-- Loaded before other Sphinx assets -->
+  <link href="../../_static/styles/theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+<link href="../../_static/styles/bootstrap.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+<link href="../../_static/styles/pydata-sphinx-theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+
+  
+  <link href="../../_static/vendor/fontawesome/6.5.2/css/all.min.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+  <link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.woff2" />
+<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.woff2" />
+<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.woff2" />
+
+    <link rel="stylesheet" type="text/css" href="../../_static/pygments.css" />
+    <link rel="stylesheet" href="../../_static/styles/sphinx-book-theme.css?digest=14f4ca6b54d191a8c7657f6c759bf11a5fb86285" type="text/css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/togglebutton.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/mystnb.4510f1fc1dee50b3e5859aac5469c37c29e427902b24a333a5f9fcb2f0b3ac41.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/sphinx-thebe.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/design-style.4045f2051d55cab465a707391d5b2007.min.css" />
+  
+  <!-- Pre-loaded scripts that we'll load fully later -->
+  <link rel="preload" as="script" href="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b" />
+<link rel="preload" as="script" href="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b" />
+  <script src="../../_static/vendor/fontawesome/6.5.2/js/all.min.js?digest=dfe6caa3a7d634c4db9b"></script>
+
+    <script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
+    <script src="../../_static/jquery.js"></script>
+    <script src="../../_static/underscore.js"></script>
+    <script src="../../_static/_sphinx_javascript_frameworks_compat.js"></script>
+    <script src="../../_static/doctools.js"></script>
+    <script src="../../_static/clipboard.min.js"></script>
+    <script src="../../_static/copybutton.js"></script>
+    <script src="../../_static/scripts/sphinx-book-theme.js?digest=5a5c038af52cf7bc1a1ec88eea08e6366ee68824"></script>
+    <script>let toggleHintShow = 'Click to show';</script>
+    <script>let toggleHintHide = 'Click to hide';</script>
+    <script>let toggleOpenOnPrint = 'true';</script>
+    <script src="../../_static/togglebutton.js"></script>
+    <script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
+    <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
+    <script src="../../_static/design-tabs.js"></script>
+    <script>const THEBE_JS_URL = "https://unpkg.com/thebe@0.8.2/lib/index.js"
+const thebe_selector = ".thebe,.cell"
+const thebe_selector_input = "pre"
+const thebe_selector_output = ".output, .cell_output"
+</script>
+    <script async="async" src="../../_static/sphinx-thebe.js"></script>
+    <script>window.MathJax = {"options": {"processHtmlClass": "tex2jax_process|mathjax_process|math|output_area"}}</script>
+    <script defer="defer" src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
+    <script>DOCUMENTATION_OPTIONS.pagename = 'docs/review/IP_Adapter';</script>
+    <link rel="shortcut icon" href="../../_static/PseudoLab_logo.png"/>
+    <link rel="index" title="Index" href="../../genindex.html" />
+    <link rel="search" title="Search" href="../../search.html" />
+    <link rel="next" title="HyperDreamBooth" href="HyperDreamBooth.html" />
+    <link rel="prev" title="T2I-Adapter" href="t2i_adapter.html" />
+  <meta name="viewport" content="width=device-width, initial-scale=1"/>
+  <meta name="docsearch:language" content="en"/>
+  </head>
+  
+  
+  <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
+
+  
+  
+  <div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
+  
+  <div id="pst-scroll-pixel-helper"></div>
+  
+  <button type="button" class="btn rounded-pill" id="pst-back-to-top">
+    <i class="fa-solid fa-arrow-up"></i>Back to top</button>
+
+  
+  <input type="checkbox"
+          class="sidebar-toggle"
+          id="pst-primary-sidebar-checkbox"/>
+  <label class="overlay overlay-primary" for="pst-primary-sidebar-checkbox"></label>
+  
+  <input type="checkbox"
+          class="sidebar-toggle"
+          id="pst-secondary-sidebar-checkbox"/>
+  <label class="overlay overlay-secondary" for="pst-secondary-sidebar-checkbox"></label>
+  
+  <div class="search-button__wrapper">
+    <div class="search-button__overlay"></div>
+    <div class="search-button__search-container">
+<form class="bd-search d-flex align-items-center"
+      action="../../search.html"
+      method="get">
+  <i class="fa-solid fa-magnifying-glass"></i>
+  <input type="search"
+         class="form-control"
+         name="q"
+         id="search-input"
+         placeholder="Search this book..."
+         aria-label="Search this book..."
+         autocomplete="off"
+         autocorrect="off"
+         autocapitalize="off"
+         spellcheck="false"/>
+  <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
+</form></div>
+  </div>
+
+  <div class="pst-async-banner-revealer d-none">
+  <aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
+</div>
+
+  
+    <header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
+    </header>
+  
+
+  <div class="bd-container">
+    <div class="bd-container__inner bd-page-width">
+      
+      
+      
+      <div class="bd-sidebar-primary bd-sidebar">
+        
+
+  
+  <div class="sidebar-header-items sidebar-primary__section">
+    
+    
+    
+    
+  </div>
+  
+    <div class="sidebar-primary-items__start sidebar-primary__section">
+        <div class="sidebar-primary-item">
+
+  
+    
+  
+
+<a class="navbar-brand logo" href="../../intro.html">
+  
+  
+  
+  
+  
+    
+    
+      
+    
+    
+    <img src="../../_static/PseudoLab_logo.png" class="logo__image only-light" alt="Text-to-Image Generation-feat-Diffusion - Home"/>
+    <script>document.write(`<img src="../../_static/PseudoLab_logo.png" class="logo__image only-dark" alt="Text-to-Image Generation-feat-Diffusion - Home"/>`);</script>
+  
+  
+</a></div>
+        <div class="sidebar-primary-item"><nav class="bd-links" id="bd-docs-nav" aria-label="Main">
+    <div class="bd-toc-item navbar-nav active">
+        
+        <ul class="nav bd-sidenav bd-sidenav__home-link">
+            <li class="toctree-l1">
+                <a class="reference internal" href="../../intro.html">
+                    Welcome to PseudoDiffusers!!
+                </a>
+            </li>
+        </ul>
+        <p aria-level="2" class="caption" role="heading"><span class="caption-text">Preliminary Works</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="vae.html">VAE</a></li>
+<li class="toctree-l1"><a class="reference internal" href="gan.html">GAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DDPM.html">DDPM</a></li>
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DDIM.html">DDIM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="A_Study_on_the_Evaluation_of_Generative_Models.html">A Study on the Evaluation of Generative Models</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Image Generation</span></p>
+<ul class="current nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="cycleGAN.html">CycleGAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="StyleGAN.html">StyleGAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="diffusion_beats_GANs.html">Diffusion Models Beat GANs on Image Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="dalle.html">DALL-E</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DALLE2.html">DALL-E 2</a></li>
+<li class="toctree-l1"><a class="reference internal" href="dreambooth.html">DreamBooth</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ControlNet.html">ControlNet</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Latent_Diffusion_Model.html">Introduction</a></li>
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Textual_Inversion.html">Textual Inversion</a></li>
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="CustomDiffusion.html">Custom Diffusion</a></li>
+<li class="toctree-l1"><a class="reference internal" href="LoRA.html">LoRA</a></li>
+
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="I-DDPM.html">I-DDPM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="StyO.html">StyO</a></li>
+<li class="toctree-l1"><a class="reference internal" href="imagen.html">Imagen</a></li>
+<li class="toctree-l1"><a class="reference internal" href="imagen_editor.html">Imagen Editor</a></li>
+<li class="toctree-l1"><a class="reference internal" href="SDEdit.html">SDEdit</a></li>
+<li class="toctree-l1"><a class="reference internal" href="SDXL.html">SDXL</a></li>
+<li class="toctree-l1"><a class="reference internal" href="t2i_adapter.html">T2I-Adapter</a></li>
+<li class="toctree-l1 current active"><a class="current reference internal" href="#">IP-Adapter</a></li>
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="HyperDreamBooth.html">HyperDreamBooth</a></li>
+<li class="toctree-l1"><a class="reference internal" href="CM3leon.html">CM3leon</a></li>
+
+<li class="toctree-l1"><a class="reference internal" href="Synthetic_Data_from_Diffusion_Models_Improves_ImageNet_Classification.html">Synthetic Data from Diffusion Models Improves ImageNet Classification</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="GLIDE.html">GLIDE</a></li>
+<li class="toctree-l1"><a class="reference internal" href="BBDM.html">BBDM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Your_Diffusion_Model_is_Secretly_a_Zero_Shot_Classifier.html">Your Diffusion Model is Secretly a Zero-Shot Classifier</a></li>
+<li class="toctree-l1"><a class="reference internal" href="progressive_distillation.html">Progressive Distillation for Fast Sampling of Diffusion Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ConceptLab.html">ConceptLab</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Diffusion_models_already_have_a_Semantic_Latent_Space.html">Diffusion Models already have a Semantic Latent Space</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Muse.html">Muse</a></li>
+
+
+<li class="toctree-l1"><a class="reference internal" href="GIGAGAN.html">Scaling up GANs for Text-to-Image Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="consistency_models.html">Consistency Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="latent_consistency_models.html">Latent Consistency Models</a></li>
+
+<li class="toctree-l1"><a class="reference internal" href="LLM_grounded_Diffusion.html">LLM Grounded Diffusion</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DiT.html">DiT</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="one-step-image-translation.html">One-Step Image Translation with Text-to-Image Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="LCM-LoRA.html">LCM-LoRA: A Universal Stable-Diffusion Acceleration Module</a></li>
+
+
+<li class="toctree-l1"><a class="reference internal" href="MimicBrush.html">MimicBrush: Zero-shot Image Editing with Reference Imitation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="one_step_diffusion_with_distribution_matching_distillation.html">One-step Diffusion with Distribution Matching Distillation</a></li>
+
+
+
+
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Video Generation</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="Make_A_Video.html">Make A Video</a></li>
+<li class="toctree-l1"><a class="reference internal" href="VideoLDM.html">VideoLDM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="AnimateDiff.html">AnimateDiff</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Animate_Anyone.html">Animate Anyone</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreaMoving.html">DreaMoving</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreamPose.html">DreamPose: Fashion Image-to-Video Synthesis via Stable Diffusion</a></li>
+
+
+
+
+
+
+
+
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">3D Generation</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="NeRF.html">NeRF : Representing Scenes as Neural Radiance Fields for View Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="3DGS.html">3D Gaussian Splatting for Real-Time Radiance Field Rendering</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Point_E.html">Point-E: A System for Generating 3D Point Clouds from Complex Prompts (Arxiv 2022)</a></li>
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Shap-E.html">Shap-E</a></li>
+
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DreamFusion.html"><strong>DreamFusion</strong></a></li>
+<li class="toctree-l1"><a class="reference internal" href="magic-3d.html">Magic3D</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreamBooth3D.html">Dream Booth 3D</a></li>
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="zero123plus.html">Zero123++</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ProlificDreamer.html">ProlificDreamer: High-Fidelity and Diverse Text-to-3D Generation with Variational Score Distillation</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DreamGaussian.html">DreamGaussian</a></li>
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Coin3D.html">Coin3D</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Experiments</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="../experiments/js_exp.html">Synthetic Data with Stable Diffusion for Foliar Disease Classification</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../experiments/swjo_exp.html">Training DreamBooth on Naver Webtoon Face Dataset</a></li>
+</ul>
+
+    </div>
+</nav></div>
+    </div>
+  
+  
+  <div class="sidebar-primary-items__end sidebar-primary__section">
+  </div>
+  
+  <div id="rtd-footer-container"></div>
+
+
+      </div>
+      
+      <main id="main-content" class="bd-main" role="main">
+        
+        
+
+<div class="sbt-scroll-pixel-helper"></div>
+
+          <div class="bd-content">
+            <div class="bd-article-container">
+              
+              <div class="bd-header-article d-print-none">
+<div class="header-article-items header-article__inner">
+  
+    <div class="header-article-items__start">
+      
+        <div class="header-article-item"><label class="sidebar-toggle primary-toggle btn btn-sm" for="__primary" title="Toggle primary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
+  <span class="fa-solid fa-bars"></span>
+</label></div>
+      
+    </div>
+  
+  
+    <div class="header-article-items__end">
+      
+        <div class="header-article-item">
+
+<div class="article-header-buttons">
+
+
+
+
+
+<div class="dropdown dropdown-source-buttons">
+  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Source repositories">
+    <i class="fab fa-github"></i>
+  </button>
+  <ul class="dropdown-menu">
+      
+      
+      
+      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion" target="_blank"
+   class="btn btn-sm btn-source-repository-button dropdown-item"
+   title="Source repository"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fab fa-github"></i>
+  </span>
+<span class="btn__text-container">Repository</span>
+</a>
+</li>
+      
+      
+      
+      
+      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion/issues/new?title=Issue%20on%20page%20%2Fdocs/review/IP_Adapter.html&body=Your%20issue%20content%20here." target="_blank"
+   class="btn btn-sm btn-source-issues-button dropdown-item"
+   title="Open an issue"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-lightbulb"></i>
+  </span>
+<span class="btn__text-container">Open issue</span>
+</a>
+</li>
+      
+  </ul>
+</div>
+
+
+
+
+
+
+<div class="dropdown dropdown-download-buttons">
+  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Download this page">
+    <i class="fas fa-download"></i>
+  </button>
+  <ul class="dropdown-menu">
+      
+      
+      
+      <li><a href="../../_sources/docs/review/IP_Adapter.md" target="_blank"
+   class="btn btn-sm btn-download-source-button dropdown-item"
+   title="Download source file"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-file"></i>
+  </span>
+<span class="btn__text-container">.md</span>
+</a>
+</li>
+      
+      
+      
+      
+      <li>
+<button onclick="window.print()"
+  class="btn btn-sm btn-download-pdf-button dropdown-item"
+  title="Print to PDF"
+  data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-file-pdf"></i>
+  </span>
+<span class="btn__text-container">.pdf</span>
+</button>
+</li>
+      
+  </ul>
+</div>
+
+
+
+
+<button onclick="toggleFullScreen()"
+  class="btn btn-sm btn-fullscreen-button"
+  title="Fullscreen mode"
+  data-bs-placement="bottom" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-expand"></i>
+  </span>
+
+</button>
+
+
+
+<script>
+document.write(`
+  <button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light"></i>
+    <i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark"></i>
+    <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"></i>
+  </button>
+`);
+</script>
+
+
+<script>
+document.write(`
+  <button class="btn btn-sm pst-navbar-icon search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <i class="fa-solid fa-magnifying-glass fa-lg"></i>
+  </button>
+`);
+</script>
+<label class="sidebar-toggle secondary-toggle btn btn-sm" for="__secondary"title="Toggle secondary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <span class="fa-solid fa-list"></span>
+</label>
+</div></div>
+      
+    </div>
+  
+</div>
+</div>
+              
+              
+
+<div id="jb-print-docs-body" class="onlyprint">
+    <h1>IP-Adapter</h1>
+    <!-- Table of contents -->
+    <div id="print-main-content">
+        <div id="jb-print-toc">
+            
+            <div>
+                <h2> Contents </h2>
+            </div>
+            <nav aria-label="Page">
+                <ul class="visible nav section-nav flex-column">
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#">IP-Adapter</a></li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">Introduction</a></li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#related-works">Related Works</a><ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#text-to-image-diffusion-models">Text-to-Image Diffusion Models</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#adapters-for-large-models">Adapters for Large Models</a></li>
+</ul>
+</li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#method">Method</a><ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#preliminaries">Preliminaries</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#image-prompt-adapter">Image Prompt Adapter</a></li>
+</ul>
+</li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#experiments">Experiments</a><ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#experimental-setup">Experimental Setup</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#comparison-with-existing-methods">Comparison with Existing Methods</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#more-results">More Results</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#ablation-study">Ablation Study</a></li>
+</ul>
+</li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#conclusion">Conclusion</a></li>
+</ul>
+
+            </nav>
+        </div>
+    </div>
+</div>
+
+              
+                
+<div id="searchbox"></div>
+                <article class="bd-article">
+                  
+  <div class="admonition-information admonition">
+<p class="admonition-title">Information</p>
+<ul class="simple">
+<li><p><strong>Title:</strong> IP-Adapter: Text Compatible Image Prompt Adapter for Text-to-Image Diffusion Models</p></li>
+<li><p><strong>Reference</strong></p>
+<ul>
+<li><p>Paper: <a class="reference external" href="https://arxiv.org/abs/2308.06721">https://arxiv.org/abs/2308.06721</a></p></li>
+<li><p>Code: <a class="github reference external" href="https://github.com/tencent-ailab/IP-Adapter">tencent-ailab/IP-Adapter</a></p></li>
+<li><p>Project Page : <a class="reference external" href="https://ip-adapter.github.io">https://ip-adapter.github.io</a></p></li>
+</ul>
+</li>
+<li><p><strong>Author:</strong> Kyeongmin Yu</p></li>
+<li><p><strong>Last updated on Sep. 21, 2024</strong></p></li>
+</ul>
+</div>
+<section class="tex2jax_ignore mathjax_ignore" id="ip-adapter">
+<h1>IP-Adapter<a class="headerlink" href="#ip-adapter" title="Permalink to this heading">#</a></h1>
+<blockquote>
+<div><p>📌 문제상황 <br />
+text-to-image diffusion model(T2I diffusion model)이 생성하는 이미지 품질은 훌륭하지만 text prompt를 통해 원하는 형태의 이미지를 생성하는 것이 어렵다. 복잡한 prompt engineering을 시도하거나, image prompt를 활용할 수도 있지만 사전학습된 모델을 fine-tuning하게 되면 많은 리소스가 필요할 뿐만 아니라 해당 방식은 범용성, 호환성도 떨어진다. <br />
+<br />
+📌 해결방안 <br />
+<strong>cross-attention을 text features와 image features로 decoupling한다.</strong> 기존 학습된 diffusion model은 text feature에 맞춰 학습된 상태이므로 기존 layer에 image feature를 넣게 되면 image feature와 text feature를 align을 수행하게 되므로 기존 cross-attention layer 하나를 통해  image-feature와 text-feature를 결합하는 것은 적절하지 않다. <br />
+<br />
+📌  논문의 강점</p>
+<ul class="simple">
+<li><p>어떤 모델 구조에도 활용가능하다.</p></li>
+<li><p>적은 수의 파라미터(22M)만 추가적으로 학습하므로 가볍다.</p></li>
+<li><p>기존 controllable tools에 덧붙여 쓸 수도 있다.</p></li>
+</ul>
+</div></blockquote>
+<figure class="align-default" id="id1">
+<a class="mb-1 reference internal image-reference" href="../../_images/main.jpeg"><img alt="IP-Adapter를 활용한 이미지 합성" class="mb-1" src="../../_images/main.jpeg" style="width: 800px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 268 </span><span class="caption-text">IP-Adapter를 활용한 이미지 합성 <a class="reference external" href="https://arxiv.org/abs/2308.06721">출처</a></span><a class="headerlink" href="#id1" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</section>
+<section class="tex2jax_ignore mathjax_ignore" id="introduction">
+<h1>Introduction<a class="headerlink" href="#introduction" title="Permalink to this heading">#</a></h1>
+<p><strong>:image prompt의 필요성과 기존 연구에서 image prompt를 사용해 이미지를 생성하려는 시도의 종류와 장단점을 말한다.</strong></p>
+<p>복잡한 scene이나 concept을 입력할때 이미지 형태로 입력하는 것이 간편하고 효과적이다.
+image prompt + text prompt(“an image is worth a thousand words”)</p>
+<figure class="align-default" id="id2">
+<a class="mb-1 reference internal image-reference" href="../../_images/IMG_4891.png"><img alt="카페" class="mb-1" src="../../_images/IMG_4891.png" style="width: 40%;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 269 </span><span class="caption-text">“내츄럴 풍으로 카페를 꾸미고 여러 식물을 두어 장식하고 싶어. 내가 좋아하는 식물은 스노우 사파이어, 호야, 자미오쿨카스등 이고, 의자와 테이블은 원목을 선호해.”</span><a class="headerlink" href="#id2" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>DALL-E2는 처음으로 image prompt를 지원한 모델으로, T2I prior model이 image embedding을 조건으로 이미지를 생성하도록 했다. 하지만 기존 대부분의 T2I 모델은 주로 text를 조건으로 이미지를 생성하는 방식이었다. 예를 들어 stable diffusion(SD) 모델의 경우 CLIP text encoder로 부터 text embedding을 뽑아내 사용했다.</p>
+<p>본 논문에서는 “image prompt를 기존 T2I 모델에서 사용할 수 있는지”, image prompt를 사용한 T2I 이미지 생성을 단순한 방식으로 가능케 한다.</p>
+<figure class="align-default" id="id3">
+<a class="mb-1 reference internal image-reference" href="../../_images/image.png"><img alt="비교를 위한 DALL-E2(unCLIP) 구조" class="mb-1" src="../../_images/image.png" style="width: 800px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 270 </span><span class="caption-text">비교를 위한 DALL-E2(unCLIP) 구조 <a class="reference external" href="https://arxiv.org/abs/2204.06125">출처</a></span><a class="headerlink" href="#id3" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<figure class="align-default" id="id4">
+<a class="mb-1 reference internal image-reference" href="../../_images/image11.png"><img alt="비교를 위한 Stable Diffusion의 구조" class="mb-1" src="../../_images/image11.png" style="width: 800px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 271 </span><span class="caption-text">비교를 위한 Stable Diffusion의 구조 구조 <a class="reference external" href="https://arxiv.org/abs/2112.10752">출처</a></span><a class="headerlink" href="#id4" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>SD Image Variations와 Stable UnCLIP과 같은 기존 연구에서 image prompt를 사용한 이미지 생성을 위해 text-conditioned diffusion models을 image embedding을 사용해 직접 fine-tuning하려는 시도를 했다. 하지만 많은 양의 컴퓨터 리소스 사용과 기존 T2I 생성능력 저하, 재사용성 저하라는 단점이 있었다. 또한 해당 방식은 ControlNet과 같은 기존 structural control tools과 호환되지 않았다. 이는 downstream application에 치명적이다.</p>
+<p>이를 피하기 위해 diffusion model 자체를 fine-tuning하지 않고 text encoder를 image encoder로 교체하는 방식도 있었지만 text prompt를 지원할 수 없게 되고 이미지 품질이 충분하지 않다는 단점이 있었다.</p>
+<p>최근에는 T2I base model을 건드리지 않고 추가적인 네트워크를 이용해 image prompt를 지원하는 연구들이 있었다. ControlNet, T2I-Adapter와 같은 연구들은 대부분 sketch, depth map, segmenation map 등의 추가적인 입력을 활용했다. 또한 T2I-Adapter나 Uni-ControlNet 같이reference image를 입력해 style 이나 concept을 전달하려는 시도도 있었다. 이런 흐름의 연구들은 CLIP image encoder에서 image embedding을 추출하여 추가 trainable network에 새로운 feature들을 mapping하여 text feature와 융합하고자 했다. 기존 text feature대신 text feature+image feature를 디퓨전 모델 내 UNet 구조에 넣어 prompt에 넣은 이미지에 적합한(faithful) 이미지를 생성하고자 했다. 이런 연구들을 통해 image prompt의 가능성을 볼수 있었지만 그 충실도가 충분하지 않았다. 또한 이미지 품질이 fine-tuning된 image prompt model보다 나빴다.</p>
+<figure class="align-default" id="id5">
+<a class="mb-1 reference internal image-reference" href="../../_images/compare_table.png"><img alt="기존 모델과 IP-Adapter 비교" class="mb-1" src="../../_images/compare_table.png" style="width: 800px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 272 </span><span class="caption-text">기존 모델과 IP-Adapter 비교 <a class="reference external" href="https://arxiv.org/abs/2308.06721">출처</a></span><a class="headerlink" href="#id5" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p><strong>image prompt를 지원하는 기존 방식</strong></p>
+<ul class="simple">
+<li><p>input image embedding to T2I model</p></li>
+<li><p>base model fine-tuning</p></li>
+<li><p>text encoder → image encoder</p></li>
+<li><p>additional network</p></li>
+</ul>
+<p>본 논문에서는 앞서 언급한 문제점의 원인을 T2I model내의 cross-attention이라고 보고 있다. <strong>사전학습된 cross-attention에서 key, value projection weights은 text feature에 맞게 훈련되어 잇는 상태이다.</strong>
+결과적으로 image feature와 text feature를 cross-attention layer에서 합쳐지는데 이때 image-specific 특성들이 무시되어 reference image에 아주 충실한 이미지를 생성하지 못하고 coarse-grained controllable generation(e.g., image style)만 달성 가능해진다.</p>
+<p>마지막으로 앞선 연구의 문제점들을 극복한 효과적인 image prompt adapter, IP-Adapter를 제안한다. 특히 IP-Adapter의 경우 decoupled cross-attention mechanism을 사용해 text feature와 image feature를 분리한다. image feature를 위해 base model내 모든 UNet cross-attention layer에 cross-attention layer 를 추가하여 훈련단계에서는 적은 수의 파라미터(22M)만 훈련한다. 본 논문에서 제안하는 IP-Adapter는 매우 가볍고 효과적이다. 또한 일반화 능력(generalization capability)가 높고 text prompt와도 잘 어울린다(compatible).</p>
+<p><strong>IP-Adapter에서 제안하는 방식</strong></p>
+<ul class="simple">
+<li><p>additional cross-attention layer in UNet of diffusion model</p></li>
+<li><p>reusable and flexible (base + IP-Adapter + ControlNet가능)</p></li>
+<li><p>multimodal compatibility (image prompt + text prompt)</p></li>
+</ul>
+</section>
+<section class="tex2jax_ignore mathjax_ignore" id="related-works">
+<h1>Related Works<a class="headerlink" href="#related-works" title="Permalink to this heading">#</a></h1>
+<section id="text-to-image-diffusion-models">
+<h2>Text-to-Image Diffusion Models<a class="headerlink" href="#text-to-image-diffusion-models" title="Permalink to this heading">#</a></h2>
+<p>large T2I model은 크게 autoregressive models, diffusion models 두 부류로 나눌 수 있다. DALLE, CogView, Make-A-Scene과 같은 초기 연구들은 autoregressive model 이었다. autoregressive model은 VQ-VAE와 같은 image tokenizer를 사용해 image들을 token화 하여 autoregressive transformer에 text token을 이용해 image token을 예측하게 하는 방식으로 학습했다. 하지만 autoregressive model은 파라미터 수가 많고 고화질 이미지를 생성하기 위해 많은 리소스가 필요했다.</p>
+<p>최근에는 diffusion models(DM)이 등장하여 T2I 생성모델의 state-of-the-art를 달성했다. 이전에 GLIDE는 cascaded diffusion 구조를 통해 64x64 → 256x256 이미지를 생성했다.
+DALL-E2의 경우, text prompt로 이미지를 생성하도록 학습된 모델을 활용해 image embedding을 조건으로 이미지를 생성했다. DALL-E2는 text prompt를 통한 이미지 생성을 지원하지 않았다. text 이해도를 높이기 위해 Imagen은 거대 transformer language model인 T5를 도입했다. Re-Imagen의 경우 드물거나 학습한적없는 entity에 대한 image에 대한 충성도를 개선했다.
+SD는 latent diffusion model로 pixel space가 아닌 latent space상에서 동작하게 하여 diffusion model만 사용하여 고품질의 이미지를 생성할 수 있게 했다. text 일치도(alignment)를 높이기 위해 eDiff-I의 경우 T2I diffusion model과 유사한 디자인을 채택하여 T5 text, CLIP text embedding, CLIP image embedding등 멀티모달 조건을 활용했다. Versatile Diffusion은 unified multi-flow diffusion framework를 이용해 T2I, I2T, 등 다양한 생성방식을 하나의 모델로 가능하게 했다. controllable image 생성 면에서는 Composer가 image embedding을 활용한 joint fine-tuning을 시도했었다. RAPHAEL은 mixture of experts(MoEs) 전략을 사용해 T2I model의 이미지 품질을 향상시켰다.</p>
+<p>DALL-E2는 image prompt를 통해 해당 풍의 이미지들을 생성할 수 있다는 점에서 매력적이다. 또한 image prompt를 T2I model에서 지원하고자 하는 연구들이 있다. SD Image Variants model은 변경한 SD를 fine-tuning하여 text feature를 CLIP image encoder의 image embedding으로 교체할 수 있게 했다. Stable unCLIP 또한 SD를 fine-tuning하여 time embedding에 image embedding을 추가했다. 기존 모델을 fine-tuning하는 방식은 고품질의 이미지를 생성 할 수 있다는 장점이 있지만 비교적 training cost가 높으며 기존 tools(e.g.,ControlNet)과 호환되지 않는다는 단점이 있다.</p>
+</section>
+<section id="adapters-for-large-models">
+<h2>Adapters for Large Models<a class="headerlink" href="#adapters-for-large-models" title="Permalink to this heading">#</a></h2>
+<p>거대한 사전학습된 모델 전체를 fine-tuning하는 것은 비효율적이다. 이 대안으로 떠오르는 것이 adapter를 사용하는 것인데, 기존 모델은 freeze시켜 학습하는 파라미터 수를 줄일 수 있다. adapter는 NLP에서 오랫동안 사용되던 방식이다. 최근에는 LLM의 vision-language 이해를 위해 adapter를 사용하고 있다.</p>
+<p>T2I model의 최근 인기로 인해 adapter들도 여기에 추가적인 control을 주는 방향으로 사용되고 있다. ControlNet(아래 사진 참고)의 경우 사전학습된 T2I diffusion model에 task-specific한 입력
+(e.g.,canny edge)을 추가적으로 넣기위해 adapter를 사용할 수 있다는 것을 보여주었다. 유사한 시기에 T2I-Adapter(아래 사진 참고)도 등장했는데 보다 간단하고 가벼운 형태로 색이나 구조적인 면에서
+fine-grained control을 주고자 했다. fine-tuning에 사용되는 비용을 줄이기 위해 Uni-ControlNet은 multi-scale condition injection을 사용했다.</p>
+<p>structural control외에 이미지 집합을 통해 content나 style을 조절하고자 한 연구도 있다. ControlNet Shuffle의 경우 이미지들을 recompose하도록 학습하여 사용자가 제공한 이미지들을 바탕으로 이미지를 생성 할 수 있었다. 또한 ControlNet Reference-only의 경우, 학습없이 SD에 feature injection을 통해 이미지를 변형했다. T2I-Adapter의 최근 버전의 경우, CLIP image encoder로 부터 reference image의 image feature를 text feature에 더해줌으로서 style adapter로서의 역할도 가능하다. Uni-ControlNet(아래 사진 참고)의 global control adapter 또한 CLIP image encoder로 부터 추출한 image embedding을 작은 네트워크를 통해 projection하 condition embedding으로 projection하여 사용한다. SeeCoder(아래 사진 참고)는 기존 text encoder를 semantic context encoder로 교체하여 image variants를 생성하고자 했다.</p>
+<p><strong>ControlNet</strong></p>
+<figure class="align-default" id="id6">
+<a class="mb-1 reference internal image-reference" href="../../_images/image21.png"><img alt="비교를 위한 ControlNet의 작동 방식" class="mb-1" src="../../_images/image21.png" style="width: 40%;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 273 </span><span class="caption-text">비교를 위한 ControlNet의 작동 방식 <a class="reference external" href="https://arxiv.org/abs/2302.05543">출처</a></span><a class="headerlink" href="#id6" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<figure class="align-default" id="id7">
+<a class="mb-1 reference internal image-reference" href="../../_images/image31.png"><img alt="" class="mb-1" src="../../_images/image31.png" style="width: 40%;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 274 </span><span class="caption-text">ControlNet preprocessor<a class="reference external" href="https://github.com/pytorch/pytorch/blob/main/torch/nn/modules/pixelshuffle.py">ContentShuffleDetector</a></span><a class="headerlink" href="#id7" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p><strong>Uni-ControlNet</strong></p>
+<figure class="align-default" id="id8">
+<a class="mb-1 reference internal image-reference" href="../../_images/image41.png"><img alt="Uni-ControlNet Architecture" class="mb-1" src="../../_images/image41.png" style="width: 40%;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 275 </span><span class="caption-text">Uni-ControlNet Architecture <a class="reference external" href="https://arxiv.org/abs/2305.16322">출처</a></span><a class="headerlink" href="#id8" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p><strong>T2I-Adapter</strong></p>
+<figure class="align-default" id="id9">
+<a class="mb-1 reference internal image-reference" href="../../_images/image61.png"><img alt="비교를 위한 T2I Adapter의 작동 방식" class="mb-1" src="../../_images/image61.png" style="width: 800px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 276 </span><span class="caption-text">비교를 위한 T2I Adapter의 작동 방식 <a class="reference external" href="https://arxiv.org/abs/2302.08453">출처</a> <a class="reference external" href="https://pytorch.org/docs/stable/generated/torch.nn.PixelUnshuffle.html">PixelUnshuffle</a></span><a class="headerlink" href="#id9" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<figure class="align-default" id="id10">
+<a class="mb-1 reference internal image-reference" href="../../_images/image5.png"><img alt="비교를 위한 T2I Adapter의 작동 방식" class="mb-1" src="../../_images/image5.png" style="width: 40%;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 277 </span><span class="caption-text">T2I Adapter의 장점 <a class="reference external" href="https://arxiv.org/abs/2302.08453">출처</a></span><a class="headerlink" href="#id10" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<figure class="align-default" id="id11">
+<a class="mb-1 reference internal image-reference" href="../../_images/image71.png"><img alt="비교를 위한 T2I Adapter의 작동 방식" class="mb-1" src="../../_images/image71.png" style="width: 40%;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 278 </span><span class="caption-text">T2I-adapter의 결과는 기존 모델의 각 층의 feature map 크기가 맞도록 더해짐 <a class="reference external" href="https://arxiv.org/abs/2302.08453">출처</a></span><a class="headerlink" href="#id11" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p><strong>SeeCoder</strong></p>
+<figure class="align-default" id="id12">
+<a class="mb-1 reference internal image-reference" href="../../_images/image81.png"><img alt="SeeCoder Architecture" class="mb-1" src="../../_images/image81.png" style="width: 40%;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 279 </span><span class="caption-text">SeeCoder Architecture <a class="reference external" href="https://arxiv.org/abs/2305.16223">출처</a></span><a class="headerlink" href="#id12" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</section>
+</section>
+<section class="tex2jax_ignore mathjax_ignore" id="method">
+<h1>Method<a class="headerlink" href="#method" title="Permalink to this heading">#</a></h1>
+<section id="preliminaries">
+<h2>Preliminaries<a class="headerlink" href="#preliminaries" title="Permalink to this heading">#</a></h2>
+<blockquote>
+<div><p>📌 생성모델의 일종인 diffusion model의 이미지 생성단계</p>
+<ol class="arabic simple">
+<li><p><strong>diffusion process (forward process)</strong> <br />
+T step의 fixed Markov chain을 통해데이터에 gaussian noise를 점차 추가.</p></li>
+<li><p><strong>denoising process</strong> <br />
+gaussian noise로 부터 learnable model을 통해 sample을 생성.</p></li>
+</ol>
+</div></blockquote>
+<p>일반적으로 noise 예측을 위한 diffusion model(<span class="math notranslate nohighlight">\(\epsilon_\theta\)</span>)의 training objective는 아래와 같이 단순한 variant of variational bound 로 표현된다.</p>
+<div class="math notranslate nohighlight">
+\[
+L_{\text{simple}}=\Bbb E_{x_0, \epsilon\sim \mathcal N(0,I),c,t}\|\epsilon-\epsilon_\theta(x_t, \mathbf c,t)\|^2 \tag{1}
+\]</div>
+<p><span class="math notranslate nohighlight">\(x_0\)</span> 는 real data, <span class="math notranslate nohighlight">\(\mathbf c\)</span> 는 추가조건, <span class="math notranslate nohighlight">\(t\)</span> 는 time step을 말하며 <span class="math notranslate nohighlight">\([0,T]\)</span> 내에 속한다.  <span class="math notranslate nohighlight">\(x_t=\alpha_t x_0+\sigma_t\epsilon\)</span>은 step t에 해당하는 noisy data를 말하고, <span class="math notranslate nohighlight">\(\alpha_t, \sigma_t\)</span>는 diffusino process를 결정하는 predefined function이다. <span class="math notranslate nohighlight">\(\epsilon_\theta\)</span>가 한번 학습되고 나면 랜덤 노이즈로부터 이미지를 반복적으로 생성할 수 있다. 일반적으로 생성 속도를 높이기 위해 DDIM, PNDM, DPM-solver와 같은 fast sampler를 inference시 사용한다.</p>
+<p>conditional diffusion model에서 classifier guidance를 통해 이미지 정확도(fidelity)와 다양성(sample diversity)를 밸런싱할 수 있다. 이는 따로 학습된 classifier의 gradient를 활용하는데, classifier를 따로 학습하는 번거로움을 지우기 위해 classifier-free guidance를 사용하기도 한다. 이런 접근에서 conditional, unconditional diffusion models는 학습시 랜덤하게 조건 <span class="math notranslate nohighlight">\(c\)</span> 를 배제하여 합동 학습(joint training)된다. sampling단계 에서는 conditional model과 unconditional model의 prediction을 모두 이용하여 noise를 계산한다.</p>
+<div class="math notranslate nohighlight">
+\[
+\hat \epsilon_\theta(x_t,\mathbf c,t)=\mathcal w \epsilon_\theta(x_t,\mathbf c, t)+(1-\mathcal w)\epsilon_\theta(x_t,t) \tag{2}
+\]</div>
+<p><span class="math notranslate nohighlight">\(\mathcal w\)</span>은 guidance scale 혹은 guidance weight로 불리는데 condition <span class="math notranslate nohighlight">\(c\)</span>의 영향력을 조절하기 위한 상수값이다. T2I diffusion model의 경우 image-text 일치성을 높이는데 classifier-free guidance가 큰 역할을 한다.</p>
+<p>본 논문에서는 open-source SD에 IP-Adapter를 덧붙여 실험을 진행했다. SD는 latent diffusion model로 frozen CLIP text encoder로 뽑아낸 text feature를 condition으로 사용한다. diffusion model은 Unet에 attention layer가 추가된 형태이다. Imagen과 같은 pixel-based diffusion model과 비교해 SD는 사전학습된 auto-encoder model을 활용해 latent space에서 동작하므로 효율적이다.</p>
+</section>
+<section id="image-prompt-adapter">
+<h2>Image Prompt Adapter<a class="headerlink" href="#image-prompt-adapter" title="Permalink to this heading">#</a></h2>
+<figure class="align-default" id="id13">
+<a class="mb-1 reference internal image-reference" href="../../_images/image9.png"><img alt="S" class="mb-1" src="../../_images/image9.png" style="width: 800px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 280 </span><span class="caption-text">IP-Adapter의 동작방식 <a class="reference external" href="https://arxiv.org/abs/2308.06721">출처</a></span><a class="headerlink" href="#id13" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<figure class="align-default" id="id14">
+<a class="mb-1 reference internal image-reference" href="../../_images/image11.png"><img alt="비교를 위한 Stable Diffusion의 구조" class="mb-1" src="../../_images/image11.png" style="width: 800px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 281 </span><span class="caption-text">비교를 위한 Stable Diffusion의 구조 <a class="reference external" href="https://arxiv.org/abs/2112.10752">출처</a></span><a class="headerlink" href="#id14" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p><strong>Image Encoder</strong></p>
+<p>pretained CLIP image encoder를 사용해 image prompt에서 image feature를 뽑아냈다. CLIP은 multimodal model로 거대 image-text pair 데이터셋으로 contrastive learning시킨 모델이다. CLIP image encoder를 통해 global image embedding을 얻었다. 이는 image로 부터 풍부한 내용(content)와 스타일을 담은 image caption과 잘 조정되어(well-aligned) 있다. 학습단계에서 CLIP image encoder는 frozen되어 학습되지 않는다.</p>
+<p><strong>Decoupled Cross-Attention</strong></p>
+<p>image feature는 사전학습된 UNet에 decoupled cross-attention을 통해 결합된다. 초기 SD model에서는 CLIP text encoder를 통해 뽑아낸 text feature를 UNet의 cross-attention layer에 넣었다.</p>
+<div class="math notranslate nohighlight">
+\[
+\mathbf Z'=\text{Attention}(\mathbf{Q,K,V})=\text{Softmax}(\frac{\mathbf {QK}^T}{\sqrt{d}})\mathbf V, \tag{3}
+\]</div>
+<p>query feature는 <span class="math notranslate nohighlight">\(Z\)</span>, text feature는 <span class="math notranslate nohighlight">\(c_t\)</span>, cross-attention의 결과는 <span class="math notranslate nohighlight">\(Z’\)</span>이고, <span class="math notranslate nohighlight">\(\mathbf{Q=ZW_q, K=c_t W_k, V=c_t W_v}\)</span>는 attention 연산의 각각 query, key, value 행렬이다. <span class="math notranslate nohighlight">\(\mathbf{W_q, W_k, W_v}\)</span>는 linear projection layers의 학습가능한 weigth matrices다.</p>
+<p>image feature를 이미지 생성에 반영하는 직관적인 방법은 cross-attention시 text feature+image feature로 결합(concatenate)하여 처리하는 것이다. 하지만 이 방법은 충분하지 않다는 것을 발견하여 decoupled cross-attention을 제안한다. 이는 cross-attention 에서 image feature와 text feature를 따로 처리하는 것이다. 구체적으로는 기존 cross-attention layer가 존재하던 곳에 새로운 cross-attention layer를 추가하여 image feature를 처리하도록 했다. image feature <span class="math notranslate nohighlight">\(c_i\)</span>가 주어질때 새로운 attention layer의 결과는 다음과 같다.</p>
+<div class="math notranslate nohighlight">
+\[
+\mathbf Z''=\text{Attention}(\mathbf{Q,K',V'})=\text{Softmax}(\frac{\mathbf{Q(K')}^T}{\sqrt{d}})\mathbf V', \tag{4}
+\]</div>
+<p><span class="math notranslate nohighlight">\(\mathbf{Q=ZW_q}\)</span>, <span class="math notranslate nohighlight">\(\mathbf{K'=c_t W'_k}\)</span> , <span class="math notranslate nohighlight">\(\mathbf{V'=c_t W'_v}\)</span> 는 image feature를 위한 query, key, value 행렬이다. 여기서 핵심은 text cross-attention과 image cross-attention에서 동일한 qeury를 사용했다는 점이다. 결과적으로는 각 cross-attention layer 마다 2개의 파라미터 <span class="math notranslate nohighlight">\(\mathbf{W'_k,W'_v}\)</span> 를 추가하게 된다. 수렴속도를 높이기 위해 <span class="math notranslate nohighlight">\(\mathbf{W'_k,W'_v}\)</span>는 <span class="math notranslate nohighlight">\(\mathbf{W_k,W_v}\)</span>로 초기화했다. 그러면 두 cross-attention layer의 결과를 더함으로써 최종 결과를 구할 수 있다. decoupled cross-attention의 최종적인 형태는 다음과 같다.</p>
+<div class="math notranslate nohighlight">
+\[
+\mathbf Z^\text{new}=\text{Softmax}(\frac{\mathbf {QK}^T}{\sqrt{d}})\mathbf V + \text{Softmax}(\frac{\mathbf {Q(K')}^T}{\sqrt{d}})\mathbf V' \tag{5}
+\]</div>
+<div class="math notranslate nohighlight">
+\[ \text{where} \space \mathbf{Q=ZW}_q,\space \mathbf{k=c}_t\mathbf W_k,\space \mathbf{K'=c}_i\mathbf W'_k, \space \mathbf{V'=c}_i\mathbf W'_v 
+\]</div>
+<p>사전학습한 UNet은 freeze시키고 훈련을 진행하므로 <span class="math notranslate nohighlight">\(\mathbf{W'_k,W'_v}\)</span> <strong>만</strong> 학습된다.</p>
+<p><strong>Training and Inference</strong></p>
+<p>학습시 IP-Adapter만 최적화하고 기존 사전학습된 diffusion model은 고정한다. IP-Adapter는 image-text pair dataset으로 학습시키며 original SD와 동일한 objective를 사용한다.</p>
+<div class="math notranslate nohighlight">
+\[
+L_{\text{simple}}=\Bbb E_{x_0, \epsilon\sim \mathcal N(0,I),c_t,c_i,t}\|\epsilon-\epsilon_\theta(x_t,\mathbf {c_t,c_i},t)\|^2 \tag{6}
+\]</div>
+<p>또 random하게 image condition을 drop하여 inference 단계에서 classifier-free guidance를 사용할 수 있도록 한다.</p>
+<div class="math notranslate nohighlight">
+\[
+\hat \epsilon_\theta(x_t,\mathbf {c_t,c_i},t)=\mathcal w \epsilon_\theta(x_t,\mathbf {c_t,c_i}, t)+(1-\mathcal w)\epsilon_\theta(x_t,t) \tag{7}
+\]</div>
+<p>image condition이 drop되면  CLIP image embedding은 0으로 처리했다. text cross-attention과 image cross-attention을 detach되며 inference시 image condition의 가중치도 조절할 수 있다. <span class="math notranslate nohighlight">\(\lambda\)</span> 가 0이 되면 기존 T2I 모델이 된다.</p>
+<div class="math notranslate nohighlight">
+\[
+\mathbf Z^\text{new}=\text{Softmax}(\mathbf {Q,K,V})+ \lambda\cdot\text{Softmax}(\mathbf {Q,K',V'})\tag{8}
+\]</div>
+</section>
+</section>
+<section class="tex2jax_ignore mathjax_ignore" id="experiments">
+<h1>Experiments<a class="headerlink" href="#experiments" title="Permalink to this heading">#</a></h1>
+<section id="experimental-setup">
+<h2>Experimental Setup<a class="headerlink" href="#experimental-setup" title="Permalink to this heading">#</a></h2>
+<div class="pst-scrollable-table-container"><table class="colwidths-auto table">
+<thead>
+<tr class="row-odd"><th class="head"><p>항목</p></th>
+<th class="text-center head"><p>값</p></th>
+</tr>
+</thead>
+<tbody>
+<tr class="row-even"><td><p>base model</p></td>
+<td class="text-center"><p>SD v1.5</p></td>
+</tr>
+<tr class="row-odd"><td><p>image encoder</p></td>
+<td class="text-center"><p>OpenCLIP ViT-H/14</p></td>
+</tr>
+<tr class="row-even"><td><p>resolution</p></td>
+<td class="text-center"><p>512x512 (resized and center crop)</p></td>
+</tr>
+<tr class="row-odd"><td><p>optimizer</p></td>
+<td class="text-center"><p>AdamW</p></td>
+</tr>
+<tr class="row-even"><td><p>learning rate</p></td>
+<td class="text-center"><p>0.0001</p></td>
+</tr>
+<tr class="row-odd"><td><p>weight decay</p></td>
+<td class="text-center"><p>0.01</p></td>
+</tr>
+<tr class="row-even"><td><p>libraries</p></td>
+<td class="text-center"><p>Hugging Face diffusers,\ DeepSpeed SeRO-2</p></td>
+</tr>
+<tr class="row-odd"><td><p>GPU</p></td>
+<td class="text-center"><p>8 V100</p></td>
+</tr>
+<tr class="row-even"><td><p>training step</p></td>
+<td class="text-center"><p>1M</p></td>
+</tr>
+<tr class="row-odd"><td><p>batch size</p></td>
+<td class="text-center"><p>8 per GPU</p></td>
+</tr>
+<tr class="row-even"><td><p>classifier-free guidance</p></td>
+<td class="text-center"><p>0.05</p></td>
+</tr>
+<tr class="row-odd"><td><p>training data</p></td>
+<td class="text-center"><p>LAION-2B, COYO-700M</p></td>
+</tr>
+<tr class="row-even"><td><p>sampler for inference</p></td>
+<td class="text-center"><p>DDIM (50steps)</p></td>
+</tr>
+<tr class="row-odd"><td><p>guidance scale</p></td>
+<td class="text-center"><p>7.5</p></td>
+</tr>
+<tr class="row-even"><td><p><span class="math notranslate nohighlight">\(\lambda\)</span></p></td>
+<td class="text-center"><p>1.0 for only image prompt</p></td>
+</tr>
+</tbody>
+</table>
+</div>
+</section>
+<section id="comparison-with-existing-methods">
+<h2>Comparison with Existing Methods<a class="headerlink" href="#comparison-with-existing-methods" title="Permalink to this heading">#</a></h2>
+<p><strong>Quantitative Comparison</strong></p>
+<figure class="align-default" id="id15">
+<a class="mb-1 reference internal image-reference" href="../../_images/image10.png"><img alt="실험결과" class="mb-1" src="../../_images/image10.png" style="width: 800px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 282 </span><span class="caption-text">실험결과 <a class="reference external" href="https://arxiv.org/abs/2308.06721">출처</a></span><a class="headerlink" href="#id15" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p><strong>Qualitative Comparison</strong></p>
+<figure class="align-default" id="id16">
+<a class="mb-1 reference internal image-reference" href="../../_images/image111.png"><img alt="실험결과" class="mb-1" src="../../_images/image111.png" style="width: 800px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 283 </span><span class="caption-text">실험결과 <a class="reference external" href="https://arxiv.org/abs/2308.06721">출처</a></span><a class="headerlink" href="#id16" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>(실험 결과를 보고 IP-Adapter를 활용해 생성한 이미지가 reference와 지나치게 유사하다는 생각이 들었습니다. 몇몇은 그냥 좌우반전을 한것처럼 느껴졌습니다. 흔히 GAN에서 말하는 Model Collapse와 같은 현상이 아닌가 싶어 다양성이 낮아보이는 결과가 의아했으나, conclusion에서 이 단점을 언급합니다.)</p>
+</section>
+<section id="more-results">
+<h2>More Results<a class="headerlink" href="#more-results" title="Permalink to this heading">#</a></h2>
+<p><strong>Generalizable to Custom Models</strong></p>
+<figure class="align-default" id="id17">
+<a class="mb-1 reference internal image-reference" href="../../_images/image131.png"><img alt="실험결과" class="mb-1" src="../../_images/image131.png" style="width: 800px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 284 </span><span class="caption-text">실험결과 <a class="reference external" href="https://arxiv.org/abs/2308.06721">출처</a></span><a class="headerlink" href="#id17" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p><strong>Structure Control</strong></p>
+<figure class="align-default" id="id18">
+<a class="mb-1 reference internal image-reference" href="../../_images/image14.png"><img alt="실험결과" class="mb-1" src="../../_images/image14.png" style="width: 800px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 285 </span><span class="caption-text">실험결과 <a class="reference external" href="https://arxiv.org/abs/2308.06721">출처</a></span><a class="headerlink" href="#id18" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p><strong>Image-to-Image Inpainting</strong></p>
+<figure class="align-default" id="id19">
+<a class="mb-1 reference internal image-reference" href="../../_images/image15.png"><img alt="실험결과" class="mb-1" src="../../_images/image15.png" style="width: 800px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 286 </span><span class="caption-text">실험결과 <a class="reference external" href="https://arxiv.org/abs/2308.06721">출처</a></span><a class="headerlink" href="#id19" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p><strong>Multimodal Prompts</strong></p>
+<figure class="align-default" id="id20">
+<a class="mb-1 reference internal image-reference" href="../../_images/image16.png"><img alt="실험결과" class="mb-1" src="../../_images/image16.png" style="width: 800px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 287 </span><span class="caption-text">실험결과 <a class="reference external" href="https://arxiv.org/abs/2308.06721">출처</a></span><a class="headerlink" href="#id20" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</section>
+<section id="ablation-study">
+<h2>Ablation Study<a class="headerlink" href="#ablation-study" title="Permalink to this heading">#</a></h2>
+<p><strong>Importance of Decoupled Cross-Attention</strong></p>
+<figure class="align-default" id="id21">
+<a class="mb-1 reference internal image-reference" href="../../_images/image17.png"><img alt="실험결과" class="mb-1" src="../../_images/image17.png" style="width: 800px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 288 </span><span class="caption-text">실험결과 <a class="reference external" href="https://arxiv.org/abs/2308.06721">출처</a></span><a class="headerlink" href="#id21" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p><strong>Comparison of Fine-grained Features and Global Features</strong></p>
+<figure class="align-default" id="id22">
+<a class="mb-1 reference internal image-reference" href="../../_images/image181.png"><img alt="실험결과" class="mb-1" src="../../_images/image181.png" style="width: 800px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 289 </span><span class="caption-text">실험결과 <a class="reference external" href="https://arxiv.org/abs/2308.06721">출처</a></span><a class="headerlink" href="#id22" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>IP-Adapter는 CLIP image encoder로 부터 추출한 global image embedding를 사용하기 때문에 reference image의 일부 특성을 잃어버릴 수 있다. 따라서 fine-grained feature를 위한 IP-Adapter를 디자인했다. 첫번째로 CLIP image encoder에서 penultimate layer에서 grid feature를 뽑아낸다. 이후 작은 query network를 이용해 feature를 학습한다. grid feature로 부터 정보를 뽑아내기 위해 lightweight transformer를 사용해 learnable 16 token들을 정의한다. 이 token feature들을 query network의 cross-attention layer에 입력으로 넣어준다.</p>
+<p>두 adapter의 생성 결과를 비교하면 finer-grained feature를 이용하면 보다 image prompt와 가까운 결과를 얻을 수 있다. finer-grained feature는 spatial structure information을 학습하여 생성된 이미지의 diversity를 낮추는 결과를 초래할 수 있으나 추가적인 조건(text prompt, structure map)을 활용하면 다양한 이미지를 만들 수 있다. 예를 들어 위의 그림과 같이 사진+pose를 통해 이미지를 생성 할 수 있다.</p>
+</section>
+</section>
+<section class="tex2jax_ignore mathjax_ignore" id="conclusion">
+<h1>Conclusion<a class="headerlink" href="#conclusion" title="Permalink to this heading">#</a></h1>
+<p>본 연구에서는 사전 학습된 T2I diffusion model에 image prompt capability를 달성하기 위해 IP-Adapter를 제안한다. IP-Adapter의 핵심 디자인은 decoupled cross-attention으로 image feature를 분리하여 cross-attention을 수행한다. 고작 22M parameter가 추가된 IP-Adapter는 qualitative, quantitative experimental results 모두에서 비등하거나 나은 성능을 보인다. 또한 IP-Adapter는 확장성이 좋아 한번 훈련된 뒤, 다른 custom model, structural controllable tools에 곧바로 덧붙여 사용할 수도 있다. 더욱 중요한 점은 image prompt를 text prompt와 더애 멀티모달 이미지 생성을 가능케한다는 점이다.</p>
+<p>IP-Adapter는 효과적이지만 reference image와 content, style이 유사한 이미지만 생성할 수 있다는 단점이 있을 수 있다. 때문에 Textual Inversion이나 DreamBooth와 같이 특정 이미지 집합 풍의 이미지를 생성하지는 못한다. 미래에 consistency를 향상시킨 더 강력한 Image prompt adapter를 개발하는 것이 목표다.</p>
+<p><strong>Textural Inversion</strong></p>
+<figure class="align-default" id="id23">
+<a class="mb-1 reference internal image-reference" href="../../_images/image191.png"><img alt="실험결과" class="mb-1" src="../../_images/image191.png" style="width: 40%;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 290 </span><span class="caption-text">Textural Inversion 동작방식 <a class="reference external" href="https://arxiv.org/abs/2308.06721">출처</a></span><a class="headerlink" href="#id23" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<figure class="align-default" id="id24">
+<a class="mb-1 reference internal image-reference" href="../../_images/image20.png"><img alt="실험결과" class="mb-1" src="../../_images/image20.png" style="width: 40%;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 291 </span><span class="caption-text">Textural Inversion 실험결과 <a class="reference external" href="https://arxiv.org/abs/2308.06721">출처</a></span><a class="headerlink" href="#id24" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p><strong>DreamBooth</strong></p>
+<figure class="align-default" id="id25">
+<a class="mb-1 reference internal image-reference" href="../../_images/image211.png"><img alt="실험결과" class="mb-1" src="../../_images/image211.png" style="width: 40%;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 292 </span><span class="caption-text">DreamBooth 동작 방식 <a class="reference external" href="https://arxiv.org/abs/2308.06721">출처</a></span><a class="headerlink" href="#id25" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<figure class="align-default" id="id26">
+<a class="mb-1 reference internal image-reference" href="../../_images/image22.png"><img alt="실험결과" class="mb-1" src="../../_images/image22.png" style="width: 40%;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 293 </span><span class="caption-text">DreamBooth 실험결과 <a class="reference external" href="https://arxiv.org/abs/2308.06721">출처</a></span><a class="headerlink" href="#id26" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</section>
+
+    <script type="text/x-thebe-config">
+    {
+        requestKernel: true,
+        binderOptions: {
+            repo: "binder-examples/jupyter-stacks-datascience",
+            ref: "master",
+        },
+        codeMirrorConfig: {
+            theme: "abcdef",
+            mode: "python"
+        },
+        kernelOptions: {
+            name: "python3",
+            path: "./docs\review"
+        },
+        predefinedOutput: true
+    }
+    </script>
+    <script>kernelName = 'python3'</script>
+
+                </article>
+              
+
+              
+              
+              
+              
+                <footer class="prev-next-footer d-print-none">
+                  
+<div class="prev-next-area">
+    <a class="left-prev"
+       href="t2i_adapter.html"
+       title="previous page">
+      <i class="fa-solid fa-angle-left"></i>
+      <div class="prev-next-info">
+        <p class="prev-next-subtitle">previous</p>
+        <p class="prev-next-title">T2I-Adapter</p>
+      </div>
+    </a>
+    <a class="right-next"
+       href="HyperDreamBooth.html"
+       title="next page">
+      <div class="prev-next-info">
+        <p class="prev-next-subtitle">next</p>
+        <p class="prev-next-title">HyperDreamBooth</p>
+      </div>
+      <i class="fa-solid fa-angle-right"></i>
+    </a>
+</div>
+                </footer>
+              
+            </div>
+            
+            
+              
+                <div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
+
+
+  <div class="sidebar-secondary-item">
+  <div class="page-toc tocsection onthispage">
+    <i class="fa-solid fa-list"></i> Contents
+  </div>
+  <nav class="bd-toc-nav page-toc">
+    <ul class="visible nav section-nav flex-column">
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#">IP-Adapter</a></li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">Introduction</a></li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#related-works">Related Works</a><ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#text-to-image-diffusion-models">Text-to-Image Diffusion Models</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#adapters-for-large-models">Adapters for Large Models</a></li>
+</ul>
+</li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#method">Method</a><ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#preliminaries">Preliminaries</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#image-prompt-adapter">Image Prompt Adapter</a></li>
+</ul>
+</li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#experiments">Experiments</a><ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#experimental-setup">Experimental Setup</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#comparison-with-existing-methods">Comparison with Existing Methods</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#more-results">More Results</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#ablation-study">Ablation Study</a></li>
+</ul>
+</li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#conclusion">Conclusion</a></li>
+</ul>
+
+  </nav></div>
+
+</div></div>
+              
+            
+          </div>
+          <footer class="bd-footer-content">
+            
+<div class="bd-footer-content__inner container">
+  
+  <div class="footer-item">
+    
+<p class="component-author">
+By PseudoLab
+</p>
+
+  </div>
+  
+  <div class="footer-item">
+    
+
+  <p class="copyright">
+    
+      © Copyright 2022.
+      <br/>
+    
+  </p>
+
+  </div>
+  
+  <div class="footer-item">
+    
+  </div>
+  
+  <div class="footer-item">
+    
+  </div>
+  
+</div>
+          </footer>
+        
+
+      </main>
+    </div>
+  </div>
+  
+  <!-- Scripts loaded after <body> so the DOM is not blocked -->
+  <script src="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b"></script>
+<script src="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b"></script>
+
+  <footer class="bd-footer">
+  </footer>
+  </body>
 </html>
\ No newline at end of file
diff --git a/docs/review/LCM-LoRA.html b/docs/review/LCM-LoRA.html
old mode 100644
new mode 100755
index 1eef7b1c..e5fa0984
--- a/docs/review/LCM-LoRA.html
+++ b/docs/review/LCM-LoRA.html
@@ -1,921 +1,941 @@
-
-
-<!DOCTYPE html>
-
-
-<html lang="en" data-content_root="" >
-
-  <head>
-    <meta charset="utf-8" />
-    <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.18.1: http://docutils.sourceforge.net/" />
-
-    <title>LCM-LoRA: A Universal Stable-Diffusion Acceleration Module &#8212; Text-to-Image Generation-feat-Diffusion</title>
-  
-  
-  
-  <script data-cfasync="false">
-    document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
-    document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
-  </script>
-  
-  <!-- Loaded before other Sphinx assets -->
-  <link href="../../_static/styles/theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-<link href="../../_static/styles/bootstrap.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-<link href="../../_static/styles/pydata-sphinx-theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-
-  
-  <link href="../../_static/vendor/fontawesome/6.5.2/css/all.min.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-  <link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.woff2" />
-<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.woff2" />
-<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.woff2" />
-
-    <link rel="stylesheet" type="text/css" href="../../_static/pygments.css" />
-    <link rel="stylesheet" href="../../_static/styles/sphinx-book-theme.css?digest=14f4ca6b54d191a8c7657f6c759bf11a5fb86285" type="text/css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/togglebutton.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/mystnb.4510f1fc1dee50b3e5859aac5469c37c29e427902b24a333a5f9fcb2f0b3ac41.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/sphinx-thebe.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/design-style.4045f2051d55cab465a707391d5b2007.min.css" />
-  
-  <!-- Pre-loaded scripts that we'll load fully later -->
-  <link rel="preload" as="script" href="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b" />
-<link rel="preload" as="script" href="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b" />
-  <script src="../../_static/vendor/fontawesome/6.5.2/js/all.min.js?digest=dfe6caa3a7d634c4db9b"></script>
-
-    <script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
-    <script src="../../_static/jquery.js"></script>
-    <script src="../../_static/underscore.js"></script>
-    <script src="../../_static/_sphinx_javascript_frameworks_compat.js"></script>
-    <script src="../../_static/doctools.js"></script>
-    <script src="../../_static/clipboard.min.js"></script>
-    <script src="../../_static/copybutton.js"></script>
-    <script src="../../_static/scripts/sphinx-book-theme.js?digest=5a5c038af52cf7bc1a1ec88eea08e6366ee68824"></script>
-    <script>let toggleHintShow = 'Click to show';</script>
-    <script>let toggleHintHide = 'Click to hide';</script>
-    <script>let toggleOpenOnPrint = 'true';</script>
-    <script src="../../_static/togglebutton.js"></script>
-    <script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
-    <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
-    <script src="../../_static/design-tabs.js"></script>
-    <script>const THEBE_JS_URL = "https://unpkg.com/thebe@0.8.2/lib/index.js"
-const thebe_selector = ".thebe,.cell"
-const thebe_selector_input = "pre"
-const thebe_selector_output = ".output, .cell_output"
-</script>
-    <script async="async" src="../../_static/sphinx-thebe.js"></script>
-    <script>window.MathJax = {"options": {"processHtmlClass": "tex2jax_process|mathjax_process|math|output_area"}}</script>
-    <script defer="defer" src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
-    <script>DOCUMENTATION_OPTIONS.pagename = 'docs/review/LCM-LoRA';</script>
-    <link rel="shortcut icon" href="../../_static/PseudoLab_logo.png"/>
-    <link rel="index" title="Index" href="../../genindex.html" />
-    <link rel="search" title="Search" href="../../search.html" />
-    <link rel="next" title="MimicBrush: Zero-shot Image Editing with Reference Imitation" href="MimicBrush.html" />
-    <link rel="prev" title="One-Step Image Translation with Text-to-Image Models" href="one-step-image-translation.html" />
-  <meta name="viewport" content="width=device-width, initial-scale=1"/>
-  <meta name="docsearch:language" content="en"/>
-  </head>
-  
-  
-  <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
-
-  
-  
-  <div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
-  
-  <div id="pst-scroll-pixel-helper"></div>
-  
-  <button type="button" class="btn rounded-pill" id="pst-back-to-top">
-    <i class="fa-solid fa-arrow-up"></i>Back to top</button>
-
-  
-  <input type="checkbox"
-          class="sidebar-toggle"
-          id="pst-primary-sidebar-checkbox"/>
-  <label class="overlay overlay-primary" for="pst-primary-sidebar-checkbox"></label>
-  
-  <input type="checkbox"
-          class="sidebar-toggle"
-          id="pst-secondary-sidebar-checkbox"/>
-  <label class="overlay overlay-secondary" for="pst-secondary-sidebar-checkbox"></label>
-  
-  <div class="search-button__wrapper">
-    <div class="search-button__overlay"></div>
-    <div class="search-button__search-container">
-<form class="bd-search d-flex align-items-center"
-      action="../../search.html"
-      method="get">
-  <i class="fa-solid fa-magnifying-glass"></i>
-  <input type="search"
-         class="form-control"
-         name="q"
-         id="search-input"
-         placeholder="Search this book..."
-         aria-label="Search this book..."
-         autocomplete="off"
-         autocorrect="off"
-         autocapitalize="off"
-         spellcheck="false"/>
-  <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
-</form></div>
-  </div>
-
-  <div class="pst-async-banner-revealer d-none">
-  <aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
-</div>
-
-  
-    <header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
-    </header>
-  
-
-  <div class="bd-container">
-    <div class="bd-container__inner bd-page-width">
-      
-      
-      
-      <div class="bd-sidebar-primary bd-sidebar">
-        
-
-  
-  <div class="sidebar-header-items sidebar-primary__section">
-    
-    
-    
-    
-  </div>
-  
-    <div class="sidebar-primary-items__start sidebar-primary__section">
-        <div class="sidebar-primary-item">
-
-  
-    
-  
-
-<a class="navbar-brand logo" href="../../intro.html">
-  
-  
-  
-  
-  
-    
-    
-      
-    
-    
-    <img src="../../_static/PseudoLab_logo.png" class="logo__image only-light" alt="Text-to-Image Generation-feat-Diffusion - Home"/>
-    <script>document.write(`<img src="../../_static/PseudoLab_logo.png" class="logo__image only-dark" alt="Text-to-Image Generation-feat-Diffusion - Home"/>`);</script>
-  
-  
-</a></div>
-        <div class="sidebar-primary-item"><nav class="bd-links" id="bd-docs-nav" aria-label="Main">
-    <div class="bd-toc-item navbar-nav active">
-        
-        <ul class="nav bd-sidenav bd-sidenav__home-link">
-            <li class="toctree-l1">
-                <a class="reference internal" href="../../intro.html">
-                    Welcome to PseudoDiffusers!!
-                </a>
-            </li>
-        </ul>
-        <p aria-level="2" class="caption" role="heading"><span class="caption-text">Preliminary Works</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="vae.html">VAE</a></li>
-<li class="toctree-l1"><a class="reference internal" href="gan.html">GAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DDPM.html">DDPM</a></li>
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="DDIM.html">DDIM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="A_Study_on_the_Evaluation_of_Generative_Models.html">A Study on the Evaluation of Generative Models</a></li>
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Image Generation</span></p>
-<ul class="current nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="cycleGAN.html">CycleGAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="StyleGAN.html">StyleGAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="diffusion_beats_GANs.html">Diffusion Models Beat GANs on Image Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="dalle.html">DALL-E</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DALLE2.html">DALL-E 2</a></li>
-<li class="toctree-l1"><a class="reference internal" href="dreambooth.html">DreamBooth</a></li>
-<li class="toctree-l1"><a class="reference internal" href="ControlNet.html">ControlNet</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Latent_Diffusion_Model.html">Introduction</a></li>
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="Textual_Inversion.html">Textual Inversion</a></li>
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="CustomDiffusion.html">Custom Diffusion</a></li>
-<li class="toctree-l1"><a class="reference internal" href="LoRA.html">LoRA</a></li>
-
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="I-DDPM.html">I-DDPM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="StyO.html">StyO</a></li>
-<li class="toctree-l1"><a class="reference internal" href="imagen.html">Imagen</a></li>
-<li class="toctree-l1"><a class="reference internal" href="imagen_editor.html">Imagen Editor</a></li>
-<li class="toctree-l1"><a class="reference internal" href="SDEdit.html">SDEdit</a></li>
-<li class="toctree-l1"><a class="reference internal" href="SDXL.html">SDXL</a></li>
-<li class="toctree-l1"><a class="reference internal" href="t2i_adapter.html">T2I-Adapter</a></li>
-<li class="toctree-l1"><a class="reference internal" href="IP_Adapter.html">IP-Adapter</a></li>
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="HyperDreamBooth.html">HyperDreamBooth</a></li>
-<li class="toctree-l1"><a class="reference internal" href="CM3leon.html">CM3leon</a></li>
-
-<li class="toctree-l1"><a class="reference internal" href="Synthetic_Data_from_Diffusion_Models_Improves_ImageNet_Classification.html">Synthetic Data from Diffusion Models Improves ImageNet Classification</a></li>
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="GLIDE.html">GLIDE</a></li>
-<li class="toctree-l1"><a class="reference internal" href="BBDM.html">BBDM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Your_Diffusion_Model_is_Secretly_a_Zero_Shot_Classifier.html">Your Diffusion Model is Secretly a Zero-Shot Classifier</a></li>
-<li class="toctree-l1"><a class="reference internal" href="progressive_distillation.html">Progressive Distillation for Fast Sampling of Diffusion Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="ConceptLab.html">ConceptLab</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Diffusion_models_already_have_a_Semantic_Latent_Space.html">Diffusion Models already have a Semantic Latent Space</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Muse.html">Muse</a></li>
-
-
-<li class="toctree-l1"><a class="reference internal" href="GIGAGAN.html">Scaling up GANs for Text-to-Image Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="consistency_models.html">Consistency Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="latent_consistency_models.html">Latent Consistency Models</a></li>
-
-<li class="toctree-l1"><a class="reference internal" href="LLM_grounded_Diffusion.html">LLM Grounded Diffusion</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DiT.html">DiT</a></li>
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="one-step-image-translation.html">One-Step Image Translation with Text-to-Image Models</a></li>
-<li class="toctree-l1 current active"><a class="current reference internal" href="#">LCM-LoRA: A Universal Stable-Diffusion Acceleration Module</a></li>
-
-
-<li class="toctree-l1"><a class="reference internal" href="MimicBrush.html">MimicBrush: Zero-shot Image Editing with Reference Imitation</a></li>
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Video Generation</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="Make_A_Video.html">Make A Video</a></li>
-<li class="toctree-l1"><a class="reference internal" href="VideoLDM.html">VideoLDM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="AnimateDiff.html">AnimateDiff</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Animate_Anyone.html">Animate Anyone</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreaMoving.html">DreaMoving</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreamPose.html">DreamPose: Fashion Image-to-Video Synthesis via Stable Diffusion</a></li>
-
-
-
-
-
-
-
-
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">3D Generation</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="NeRF.html">NeRF : Representing Scenes as Neural Radiance Fields for View Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="3DGS.html">3D Gaussian Splatting for Real-Time Radiance Field Rendering</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Point_E.html">Point-E: A System for Generating 3D Point Clouds from Complex Prompts (Arxiv 2022)</a></li>
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="Shap-E.html">Shap-E</a></li>
-
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="DreamFusion.html"><strong>DreamFusion</strong></a></li>
-<li class="toctree-l1"><a class="reference internal" href="magic-3d.html">Magic3D</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreamBooth3D.html">Dream Booth 3D</a></li>
-
-
-
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Experiments</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="../experiments/js_exp.html">Synthetic Data with Stable Diffusion for Foliar Disease Classification</a></li>
-<li class="toctree-l1"><a class="reference internal" href="../experiments/swjo_exp.html">Training DreamBooth on Naver Webtoon Face Dataset</a></li>
-</ul>
-
-    </div>
-</nav></div>
-    </div>
-  
-  
-  <div class="sidebar-primary-items__end sidebar-primary__section">
-  </div>
-  
-  <div id="rtd-footer-container"></div>
-
-
-      </div>
-      
-      <main id="main-content" class="bd-main" role="main">
-        
-        
-
-<div class="sbt-scroll-pixel-helper"></div>
-
-          <div class="bd-content">
-            <div class="bd-article-container">
-              
-              <div class="bd-header-article d-print-none">
-<div class="header-article-items header-article__inner">
-  
-    <div class="header-article-items__start">
-      
-        <div class="header-article-item"><label class="sidebar-toggle primary-toggle btn btn-sm" for="__primary" title="Toggle primary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
-  <span class="fa-solid fa-bars"></span>
-</label></div>
-      
-    </div>
-  
-  
-    <div class="header-article-items__end">
-      
-        <div class="header-article-item">
-
-<div class="article-header-buttons">
-
-
-
-
-
-<div class="dropdown dropdown-source-buttons">
-  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Source repositories">
-    <i class="fab fa-github"></i>
-  </button>
-  <ul class="dropdown-menu">
-      
-      
-      
-      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion" target="_blank"
-   class="btn btn-sm btn-source-repository-button dropdown-item"
-   title="Source repository"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fab fa-github"></i>
-  </span>
-<span class="btn__text-container">Repository</span>
-</a>
-</li>
-      
-      
-      
-      
-      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion/issues/new?title=Issue%20on%20page%20%2Fdocs/review/LCM-LoRA.html&body=Your%20issue%20content%20here." target="_blank"
-   class="btn btn-sm btn-source-issues-button dropdown-item"
-   title="Open an issue"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-lightbulb"></i>
-  </span>
-<span class="btn__text-container">Open issue</span>
-</a>
-</li>
-      
-  </ul>
-</div>
-
-
-
-
-
-
-<div class="dropdown dropdown-download-buttons">
-  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Download this page">
-    <i class="fas fa-download"></i>
-  </button>
-  <ul class="dropdown-menu">
-      
-      
-      
-      <li><a href="../../_sources/docs/review/LCM-LoRA.md" target="_blank"
-   class="btn btn-sm btn-download-source-button dropdown-item"
-   title="Download source file"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-file"></i>
-  </span>
-<span class="btn__text-container">.md</span>
-</a>
-</li>
-      
-      
-      
-      
-      <li>
-<button onclick="window.print()"
-  class="btn btn-sm btn-download-pdf-button dropdown-item"
-  title="Print to PDF"
-  data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-file-pdf"></i>
-  </span>
-<span class="btn__text-container">.pdf</span>
-</button>
-</li>
-      
-  </ul>
-</div>
-
-
-
-
-<button onclick="toggleFullScreen()"
-  class="btn btn-sm btn-fullscreen-button"
-  title="Fullscreen mode"
-  data-bs-placement="bottom" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-expand"></i>
-  </span>
-
-</button>
-
-
-
-<script>
-document.write(`
-  <button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light"></i>
-    <i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark"></i>
-    <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"></i>
-  </button>
-`);
-</script>
-
-
-<script>
-document.write(`
-  <button class="btn btn-sm pst-navbar-icon search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <i class="fa-solid fa-magnifying-glass fa-lg"></i>
-  </button>
-`);
-</script>
-<label class="sidebar-toggle secondary-toggle btn btn-sm" for="__secondary"title="Toggle secondary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <span class="fa-solid fa-list"></span>
-</label>
-</div></div>
-      
-    </div>
-  
-</div>
-</div>
-              
-              
-
-<div id="jb-print-docs-body" class="onlyprint">
-    <h1>LCM-LoRA: A Universal Stable-Diffusion Acceleration Module</h1>
-    <!-- Table of contents -->
-    <div id="print-main-content">
-        <div id="jb-print-toc">
-            
-            <div>
-                <h2> Contents </h2>
-            </div>
-            <nav aria-label="Page">
-                <ul class="visible nav section-nav flex-column">
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#">LCM-LoRA: A Universal Stable-Diffusion Acceleration Module</a><ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#proposal">Proposal</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">1. Introduction</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#id1">기존 연구의 한계점</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#lcms">LCMs 기반 연구</a></li>
-</ul>
-</li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#related-work">2. Related Work</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#consistency-models">Consistency Models</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#latent-consistency-models">Latent Consistency Models</a><ul class="nav section-nav flex-column">
-<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#cms">CMs과 차이점</a></li>
-</ul>
-</li>
-</ul>
-</li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#parameter-efficient-fine-tuning">Parameter-Efficient Fine-Tuning</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#low-rank-adaptation">Low Rank Adaptation</a></li>
-</ul>
-</li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#task-arithmetic-in-pretrained-models">Task Arithmetic in Pretrained Models</a></li>
-</ul>
-</li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#lcm-lora">3. LCM-LoRA</a><ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#lora-distillation-for-lcm">3.1 LoRA Distillation for LCM</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#lcm-lora-as-universal-acceleration-module">3.2 LCM-LoRA as Universal Acceleration Module</a></li>
-</ul>
-</li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#conclusion">4. Conclusion</a></li>
-</ul>
-
-            </nav>
-        </div>
-    </div>
-</div>
-
-              
-                
-<div id="searchbox"></div>
-                <article class="bd-article">
-                  
-  <div class="admonition-information admonition">
-<p class="admonition-title">Information</p>
-<ul class="simple">
-<li><p><strong>Title:</strong> LCM-LoRA: A Universal Stable-Diffusion Acceleration Module</p></li>
-<li><p><strong>Reference</strong></p>
-<ul>
-<li><p>Paper: <a class="reference external" href="https://arxiv.org/pdf/2311.05556">https://arxiv.org/pdf/2403.12036</a></p></li>
-<li><p>Code: <a class="reference external" href="https://github.com/luosiallen/latent-consistency-model">Official</a></p></li>
-</ul>
-</li>
-<li><p><strong>Author:</strong> Donghyun Han</p></li>
-<li><p><strong>Last updated on Oct. 02, 2024</strong></p></li>
-</ul>
-</div>
-<section class="tex2jax_ignore mathjax_ignore" id="lcm-lora-a-universal-stable-diffusion-acceleration-module">
-<h1>LCM-LoRA: A Universal Stable-Diffusion Acceleration Module<a class="headerlink" href="#lcm-lora-a-universal-stable-diffusion-acceleration-module" title="Permalink to this heading">#</a></h1>
-<section id="proposal">
-<h2>Proposal<a class="headerlink" href="#proposal" title="Permalink to this heading">#</a></h2>
-<ul class="simple">
-<li><p>Latent Consistency Models(LCMs)에 Low Rank Adaptation (LoRA)을 적용하였다.</p></li>
-<li><p>LoRA를 이용하여 Stable Diffusion에 대한 추가적인 학습 없이도 fine-tuning 가능. (Accelerate 효과 극대화)</p></li>
-<li><p>이전의 다양한 PF-ODE (Probability-Flow ODE) solver를 사용한 방법론들보다 더 generalized 된 성능</p></li>
-</ul>
-</section>
-<section id="introduction">
-<h2>1. Introduction<a class="headerlink" href="#introduction" title="Permalink to this heading">#</a></h2>
-<section id="id1">
-<h3>기존 연구의 한계점<a class="headerlink" href="#id1" title="Permalink to this heading">#</a></h3>
-<p>Latent Diffusion Models(LDMs)은 image generation 분야에서 좋은 성능을 보이고 있다. 그러나 아직까지는 느린 reverse process 때문에 사용자가 직접 사용하기에는 무리가 있다.
-따라서 LDMs을 가속화(Accelerate)하기 위한 기법들이 제안되어 왔는데 크게 2가지로 나눌 수 있다:</p>
-<ol class="arabic simple">
-<li><p>DDIM, DPM-Solver, DPM-Solver++ 등 <strong>ODE-Solver 기반 방법론</strong>.</p></li>
-<li><p>LDM을 경량화 하기 위한 <strong>Distillation 기반 방법론</strong>.</p></li>
-</ol>
-<p>ODE-Solver 방법론은 sampling step을 줄일 수 있지만 Classifier-free Guidance(CFG) 등을 사용할 때 Computation 적으로 Overhead가 있을 수 있다.
-Distillation 방법론 또한 Distillation 시 Computation적으로 Overhead가 있어 한계가 있다.
-ex)<a class="reference external" href="https://arxiv.org/pdf/2210.03142">Guided Distill</a> : 2 stage의 distillation  방식 + high resolution image 생성 한계</p>
-</section>
-<section id="lcms">
-<h3>LCMs 기반 연구<a class="headerlink" href="#lcms" title="Permalink to this heading">#</a></h3>
-<p>이에 반해 Consistency Models(CMs)에서 영감을 받은 Latent Consistency Models(LCMs)은 매우 좋은 대안이다. backward process를 augmented Probability Flow ODE(PF-ODE) problem으로 접근하여 반복적인 step을 획기적으로 줄일 수 있었다. LCMs은 1~4 step만으로도 높은 퀄리티의 고해상도 이미지를 생성해낼 수 있으며 큰 리소스가 필요하지 않다.</p>
-<p>그러나 LCMs을 기반으로 하는 방법론은 새로운 데이터셋에 대해 finetuning이 필요하거나 pretrained LDMs을 필요로 하는 한계가 존재한다.</p>
-<p>따라서 본 연구는 추가 학습없이 Stable Diffusion(SD)이나 SD-LoRA 등에 plug-in 해서 사용할 수 있는 LCM-LoRA를 제안한다. LCM-LoRA는 새로운 종류의 neural network 기반 PF-ODE Solver이며, 강력한 일반화 성능을 보여준다.</p>
-</section>
-</section>
-<section id="related-work">
-<h2>2. Related Work<a class="headerlink" href="#related-work" title="Permalink to this heading">#</a></h2>
-<section id="consistency-models">
-<h3>Consistency Models<a class="headerlink" href="#consistency-models" title="Permalink to this heading">#</a></h3>
-<p>CMs은 sampling step을 획기적으로 줄이면서도 Quality를 유지할 수 있는 방법론이다.</p>
-<figure class="align-default" id="id2">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/LCM-LoRA_1.png"><img alt="Consistency Models" class="bg-primary mb-1" src="../../_images/LCM-LoRA_1.png" style="width: 800px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 560 </span><span class="caption-text">Consistency Models</span><a class="headerlink" href="#id2" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>CMs의 핵심은 PF-ODE의 궤적의 points가 solution에 mapping 되는 function <span class="math notranslate nohighlight">\((f: (x_t,t) \mapsto x_\epsilon)\)</span>을 추정하는 것이다.
-쉽게 말해 어떤 step의 noise image 던지 <span class="math notranslate nohighlight">\(x_0\)</span> (정확히는 <span class="math notranslate nohighlight">\(x_\epsilon\)</span>)의 결과가 나오는 function을 추정한다. 또한 각 timestep에 관한function의 결과값은 self-consistency를 만족해야 한다.</p>
-<div class="math notranslate nohighlight">
-\[
-f(x_t,t)=f(x_{t'},t'), \forall t, t' \in [\epsilon, T]. 
-\]</div>
-<p><span class="math notranslate nohighlight">\(\epsilon\)</span>은 매우작은 양수 값이다. 이때 <span class="math notranslate nohighlight">\(f_\theta(x,\epsilon)=x\)</span>를 만족하는 model <span class="math notranslate nohighlight">\(f_\theta\)</span>는 다음과 같이 정의한다:</p>
-<div class="math notranslate nohighlight">
-\[
-f_\theta(x,t)=c_{skip}(t)x+c_{out}(t)F_\theta(x,t).
-\]</div>
-<p><span class="math notranslate nohighlight">\(c_{skip}(\epsilon)=1\)</span>, <span class="math notranslate nohighlight">\(c_{out}(\epsilon)=0\)</span> 이기 때문에 <span class="math notranslate nohighlight">\(f_\theta(x,\epsilon)=x\)</span>를 만족한다. 위 수식은 미분 가능함을 증명하기 위한 수식이다. <span class="math notranslate nohighlight">\(F_\theta\)</span>는 심층신경망을 의미한다.</p>
-<p>CMs은 scratch부터 학습하는 방식과 Distillation 방식으로 나뉘는데 보편적으로 Distillation이 사용된다. Distillation 방식은 지수평균이동(Exponential Moving Average, EMA)를 통해 self-consistency를 학습할 수 있다:</p>
-<div class="math notranslate nohighlight">
-\[
-L(\theta,\theta^-;\Phi)=\mathbb{E}_{x,t}\bigg[d\bigg(f_\theta(x_{t_{n+1}},t_{n+1}),f_{\theta^-}(\hat{x}^\phi_{t_n},t_n)\bigg)\bigg].
-\]</div>
-<p><span class="math notranslate nohighlight">\(\theta^-\)</span>는 <span class="math notranslate nohighlight">\(\theta\)</span>에 대한 EMA를 의미하며 <span class="math notranslate nohighlight">\(d(\cdot, \cdot)\)</span>은 두 sample 사이의 거리를 측정하는 지표이다. <span class="math notranslate nohighlight">\(\hat{x}^{\phi}_{t_n}\)</span>는 <span class="math notranslate nohighlight">\(x_{t_{n+1}}\)</span>에 대한 <span class="math notranslate nohighlight">\(x_{t_n}\)</span>을 추정한 값으로 다음과 같다:</p>
-<div class="math notranslate nohighlight">
-\[
-\hat{x}^{\phi}_{t_n} \leftarrow x_{t_{n+1}}+(t_n-t_{n+1})\Phi(x_{t_{n+1}},t_{n+1};\phi)
-\]</div>
-<p><span class="math notranslate nohighlight">\(\Phi\)</span>는 numerical PF-ODE를 의미한다. (보통 DDIM을 사용하는 것 같다) 즉 <span class="math notranslate nohighlight">\(x_{t_n}\)</span>을 PF-ODE로 예측한 값을 입력으로 하는 예측값과 <span class="math notranslate nohighlight">\(x_{t_{n+1}}\)</span>을 입력으로 하는 예측값이 같도록 self-consistency를 비교하는 것이 핵심이다.</p>
-</section>
-<section id="latent-consistency-models">
-<h3>Latent Consistency Models<a class="headerlink" href="#latent-consistency-models" title="Permalink to this heading">#</a></h3>
-<figure class="align-default" id="id3">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/LCM-LoRA_2.png"><img alt="Latent Diffusion Models" class="bg-primary mb-1" src="../../_images/LCM-LoRA_2.png" style="width: 800px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 561 </span><span class="caption-text">Latent Diffusion Models</span><a class="headerlink" href="#id3" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>LCMs은 CMs에 condition을 추가해주고 <span class="math notranslate nohighlight">\(F_\theta(x,t)\)</span>를 <span class="math notranslate nohighlight">\(\epsilon-Prediction\)</span>의 수식으로 치환한다. (<span class="math notranslate nohighlight">\(\mu\)</span>나 <span class="math notranslate nohighlight">\(v\)</span> prediction을 사용해도 됨.) 추가로 LDMs 기반이기 때문에 latent <span class="math notranslate nohighlight">\(z\)</span>에 대한 수식으로 변경해준다.</p>
-<div class="math notranslate nohighlight">
-\[f_\theta(z,c,t)=c_{skip}(t)z+c_{out}(t)\bigg(\frac{z-\sigma(t)\hat{\epsilon}_\theta(z,c,t)}{\alpha(t)}\bigg). (\epsilon-Prediction)\]</div>
-<div class="math notranslate nohighlight">
-\[L_{CD}(\theta,\theta^-;\psi)=\mathbb{E}_{z,c,n}\bigg[d\bigg(f_\theta(z_{t_{n+1}},c,t_{n+1}),f_{\theta^-}(\hat{z}^\psi_{t_n},c,t_n)\bigg)\bigg].\]</div>
-<p><span class="math notranslate nohighlight">\(n\)</span>은 timestep이지만 기존<span class="math notranslate nohighlight">\(t\)</span>와는 다른 timestep <span class="math notranslate nohighlight">\([t,T]\)</span>에 대한 하위 간격이다. <span class="math notranslate nohighlight">\((t_1=\epsilon&lt;t_2&lt;...&lt;t_N=T)\)</span></p>
-<div class="math notranslate nohighlight">
-\[t_i=\bigg(\epsilon^{1/\rho}+\frac{i-1}{N-1}(T^{1/\rho}-\epsilon^{1/\rho})\bigg)^\rho, \rho=7\]</div>
-<section id="cms">
-<h4>CMs과 차이점<a class="headerlink" href="#cms" title="Permalink to this heading">#</a></h4>
-<ul class="simple">
-<li><p>LDMs 기반 모델이다.</p></li>
-<li><p>LCMs는 CMs와 다르게 Classifier-free Guidance(CFG)를 포함한 Distillation도 정의되어있다.(<span class="math notranslate nohighlight">\(\tilde{\epsilon}_\theta\)</span>)</p></li>
-<li><p>LCMs는 <span class="math notranslate nohighlight">\(t_n\)</span>과 <span class="math notranslate nohighlight">\(t_{n+1}\)</span>의 차이가 너무 적어 학습의 수렴이 늦어지게 된다 가정하고 <span class="math notranslate nohighlight">\(t_n\)</span>과 <span class="math notranslate nohighlight">\(t_{n+k}\)</span>의 consistency를 비교하는 Skipping timestep 방법을 제시했다. (k는 trade-off를 가지며 최적의 값은 20으로 지정.)</p></li>
-<li><p>Latent Consistency Finetuning: 새로운 데이터셋에 대해 distillation할 때 LDMs를 학습 할 필요 없이 LCMs의 Consistency Distillation만 학습하여 사용할 수 있다.</p></li>
-</ul>
-<p>(자세한 내용은 <a class="reference external" href="https://pseudo-lab.github.io/pseudodiffusers/docs/review/latent_consistency_models.html">LCMs review</a>를 참고)</p>
-</section>
-</section>
-</section>
-<section id="parameter-efficient-fine-tuning">
-<h2>Parameter-Efficient Fine-Tuning<a class="headerlink" href="#parameter-efficient-fine-tuning" title="Permalink to this heading">#</a></h2>
-<p>Parameter-Efficient Fine-Tuning(PEFT)이란 파라미터를 효율적으로 사용하면서 fine-tuning 할수 있는 연구를 의미한다. Knowledge Distillation, Pruning, Quantization 등이 있다.</p>
-<p>본 연구에서는 PERF 기법 중 RoLA를 사용했다.</p>
-<section id="low-rank-adaptation">
-<h3>Low Rank Adaptation<a class="headerlink" href="#low-rank-adaptation" title="Permalink to this heading">#</a></h3>
-<p>기존에 pre-trained 된 가중치 <span class="math notranslate nohighlight">\(\Phi_0\)</span>에 대하여 새로운 task에 fine-tuning하는 모델 <span class="math notranslate nohighlight">\(P_\Phi(y|x)\)</span>는 다음과 같이 가중치가 업데이트 된다. (<span class="math notranslate nohighlight">\(\Phi_0+\Delta\Phi\)</span>)</p>
-<div class="math notranslate nohighlight">
-\[\underset{\Phi}{max}\sum_{(x,y)\in Z}\sum^{|y|}_{t=1}\log{(P_\Phi(y_t|x,y&lt;t))}\]</div>
-<p>LLM이나 Stable Diffusion과 같은 대규모 모델은 새로운 task로 fine-tuning 시 매우 큰 차원의 모델 파라미터를 다시 학습하기 때문에 매우 큰 Cost가 생긴다. (시간적, 자원적) 이때 weight의 차원은 줄이면서 변화량을 기록하는 또다른 weight를 만들어 더 효율적으로 계산하는 방식은 다음과 같이 나타낼 수 있다: (파라미터 <span class="math notranslate nohighlight">\(\Theta\)</span>에 대해 <span class="math notranslate nohighlight">\(\Delta\Phi=\Delta\Phi(\Theta), |\Theta|&lt;&lt;|\Phi_0|\)</span>)</p>
-<div class="math notranslate nohighlight">
-\[\underset{\Phi}{max}\sum_{(x,y)\in Z}\sum^{|y|}_{t=1}\log{(P_{\Phi_0+\Delta\Phi(\Theta)}(y_t|x,y&lt;t))}\]</div>
-<p>즉 기존의 잘 학습된 weight는 그대로 두고 low rank로 decomposition 된 weight만 optimization 하는 방법론을 Low Rank Adaptation(LoRA)라고 한다.</p>
-<figure class="align-default" id="id4">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/LCM-LoRA_3.png"><img alt="Low Rank Adaptation" class="bg-primary mb-1" src="../../_images/LCM-LoRA_3.png" style="width: 300px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 562 </span><span class="caption-text">Low Rank Adaptation</span><a class="headerlink" href="#id4" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>위의 그림과 같이 원본 모델 weight는 freeze, LoRA는 rank를 r로 낮추어 finetuning한다. 이때 LoRA의 A는 random Gauissian으로, B는 zero로 weight initializing 한다.</p>
-<figure class="align-default" id="id5">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/LCM-LoRA_4.png"><img alt="Low Rank Adaptation matrix" class="bg-primary mb-1" src="../../_images/LCM-LoRA_4.png" style="width: 800px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 563 </span><span class="caption-text">Low Rank Adaptation matrix</span><a class="headerlink" href="#id5" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>위 그림처럼 기존에는 d x d의 매우 큰 weight를 finetuning 해야 했지만, LoRA는 r만큼 압축된 weight matrix만 finetuning 하면 되기 때문에 훨씬 효율적이고 때에 따라 Fully fine-tuning 하는 방법들보다 더 좋은 성능을 보여주기도 한다. (그림은 <a class="reference external" href="https://ffighting.net/deep-learning-paper-review/language-model/lora/">이곳</a>을 참고하였습니다.)</p>
-<p>원본 논문의 LoRA는 LLM을 target으로 만들어졌기 때문에 Transformer의 query, key, value에 대한 parameter로 사용하였지만 Diffusion이나 다른 모델의 finetuning시에도 간단하게 사용 가능하다.</p>
-</section>
-</section>
-<section id="task-arithmetic-in-pretrained-models">
-<h2>Task Arithmetic in Pretrained Models<a class="headerlink" href="#task-arithmetic-in-pretrained-models" title="Permalink to this heading">#</a></h2>
-<p>task Arithmetic은 특정 task에서 학습된 Model의 가중치를 task vector라 보고 각 task vector를 조합하여 새로운 task vector를 생성하는 방법론이다.</p>
-<figure class="align-default" id="id6">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/LCM-LoRA_5.png"><img alt="Task Arithmetic" class="bg-primary mb-1" src="../../_images/LCM-LoRA_5.png" style="width: 800px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 564 </span><span class="caption-text">Task Arithmetic</span><a class="headerlink" href="#id6" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>pre-trained parameter를 <span class="math notranslate nohighlight">\(\theta_{pre}\)</span>, fine-tuning parameter를 <span class="math notranslate nohighlight">\(\theta_{ft}\)</span>라고 할때 task vector <span class="math notranslate nohighlight">\(\tau\)</span>는 <span class="math notranslate nohighlight">\(\theta_{ft}-\theta_{pre}\)</span>로 정의할 수 있다.
-이를 다양하게 조합하고 특히 d)처럼 task 간 analogy를 고려하여 연산하는 경우 새로운 task에 대한 성능을 높일 수 있다.</p>
-</section>
-</section>
-<section class="tex2jax_ignore mathjax_ignore" id="lcm-lora">
-<h1>3. LCM-LoRA<a class="headerlink" href="#lcm-lora" title="Permalink to this heading">#</a></h1>
-<section id="lora-distillation-for-lcm">
-<h2>3.1 LoRA Distillation for LCM<a class="headerlink" href="#lora-distillation-for-lcm" title="Permalink to this heading">#</a></h2>
-<p>LCMs의 Latent Consistency Distillation에 대한 pseudo code는 다음과 같다:</p>
-<figure class="align-default" id="id7">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/LCM-LoRA_6.png"><img alt="Latent Consistency Distillation" class="bg-primary mb-1" src="../../_images/LCM-LoRA_6.png" style="width: 800px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 565 </span><span class="caption-text">Latent Consistency Distillation</span><a class="headerlink" href="#id7" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>논문의 저자는 LCMs의 Distillation은 LDMs에 관한 일종의 fine-tuning으로 보고 LoRA를 적용하는 방법을 제안하였다.
-pre-trained 된 weight matrix <span class="math notranslate nohighlight">\(W_0\)</span>에 대하여 기울기 업데이트는 <span class="math notranslate nohighlight">\(W_0+\Delta W=W_0+BA, W_0\in \mathbb{R}^{d\times k}, B\in \mathbb{R}^{d\times r}, A\in \mathbb{R}^{r\times k}\)</span> 로 표현할 수 있으며 rank <span class="math notranslate nohighlight">\(r \leq \min{(d,k)}\)</span> 로 작은 값을 갖는다. <span class="math notranslate nohighlight">\(W_0\)</span>의 weight는 고정되며 input <span class="math notranslate nohighlight">\(x\)</span> 에 대한 forward pass는 다음과 같다:</p>
-<div class="math notranslate nohighlight">
-\[h=W_0x+\Delta Wx=W_0x+BAx. \tag{1}\]</div>
-<p>위와같이 LCMs에 LoRA를 적용할 경우 학습 parameter를 크게 줄일 수 있어 효율적이다.</p>
-<figure class="align-default" id="id8">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/LCM-LoRA_7.png"><img alt="compare trainable parameter" class="bg-primary mb-1" src="../../_images/LCM-LoRA_7.png" style="width: 800px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 566 </span><span class="caption-text">compare trainable parameter</span><a class="headerlink" href="#id8" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>따라서 LCM-loRA는 기존 LCMs 보다 더 큰 모델의 훈련과 실사용이 가능하다. LCMs의 경우 SD-V1.5나 SD-V2.1의 base Stable Diffusion을 사용했지만, LCM-LoRA는 SDXL과 SSD-1B(Segmind)을 확장하여 사용하였다. large Model에서도 LCD을 적용했을 때 잘 적응하는 모습을 볼 수 있었다.</p>
-<figure class="align-default" id="id9">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/LCM-LoRA_8.png"><img alt="1024 x 1024 resolution image results with CFG scale w=7.5" class="bg-primary mb-1" src="../../_images/LCM-LoRA_8.png" style="width: 800px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 567 </span><span class="caption-text">1024 x 1024 resolution image results with CFG scale w=7.5</span><a class="headerlink" href="#id9" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</section>
-<section id="lcm-lora-as-universal-acceleration-module">
-<h2>3.2 LCM-LoRA as Universal Acceleration Module<a class="headerlink" href="#lcm-lora-as-universal-acceleration-module" title="Permalink to this heading">#</a></h2>
-<p>LCM-LoRA는 sampling step을 줄이는 distillation에 LoRA를 적용하였다. LoRA는 이외에도 custionized datasets에 대해 fine-tuning할 때 주로 쓰이는데 이같은 style에 대한 LoRA와 LCM-LoRA가 추가 학습없이 바로 합쳐져 사용할 수 있음을 발견했다. 저자는 이 발견이 task arithmetic에 대한 관점으로 해석할 수 있다고 주장하였다.</p>
-<figure class="align-default" id="id10">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/LCM-LoRA_9.png"><img alt="Style-LoRA with LCM-LoRA" class="bg-primary mb-1" src="../../_images/LCM-LoRA_9.png" style="width: 800px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 568 </span><span class="caption-text">Style LoRA with LCM-LoRA</span><a class="headerlink" href="#id10" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>LCM-LoRA의 fine-tuned parameter를 <span class="math notranslate nohighlight">\(\tau_{LCM}\)</span>이라 할 때, <span class="math notranslate nohighlight">\(\tau_{LCM}\)</span>은 acceleration vector라 할수 있다. 그리고 custom dataset에서 학습한 LoRA의 fine-tuned parameter를 <span class="math notranslate nohighlight">\(\tau'\)</span>이라 할 때, <span class="math notranslate nohighlight">\(\tau'\)</span>은 style vector라 할 수 있다. LCMs를 통해 custom dataset에 대한 image를 생성할 때, 파라미터는 다음과 같이 조합된다:</p>
-<div class="math notranslate nohighlight">
-\[\theta'_{LCM}=\theta_{pre}+\tau'_{LCM} \tag{2}\]</div>
-<div class="math notranslate nohighlight">
-\[\tau'_{LCM}=\lambda_1\tau'+\lambda_2\tau_{LCM} \tag{3}\]</div>
-<p>파라미터는 단순한 선형 결합을 통해 이루어지며 <span class="math notranslate nohighlight">\(\lambda_1\)</span>과 <span class="math notranslate nohighlight">\(\lambda_2\)</span>는 하이퍼파라미터다. 추가적인 학습없이 다음과 같은 결과를 얻을 수 있었다:</p>
-<figure class="align-default" id="id11">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/LCM-LoRA_10.png"><img alt="fine-tuning with LCM-LoRA" class="bg-primary mb-1" src="../../_images/LCM-LoRA_10.png" style="width: 800px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 569 </span><span class="caption-text">fine-tuning with LCM-LoRA</span><a class="headerlink" href="#id11" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</section>
-</section>
-<section class="tex2jax_ignore mathjax_ignore" id="conclusion">
-<h1>4. Conclusion<a class="headerlink" href="#conclusion" title="Permalink to this heading">#</a></h1>
-<ul class="simple">
-<li><p>training-free acceleration module인 LCM-LoRA를 제안.</p></li>
-<li><p>PF-ODE를 예측하며 Stable Diffusion 및 SD LoRA에 fast inference, minimal step을 제공함.</p></li>
-<li><p>강력한 일반화 성능 증명.</p></li>
-</ul>
-</section>
-
-    <script type="text/x-thebe-config">
-    {
-        requestKernel: true,
-        binderOptions: {
-            repo: "binder-examples/jupyter-stacks-datascience",
-            ref: "master",
-        },
-        codeMirrorConfig: {
-            theme: "abcdef",
-            mode: "python"
-        },
-        kernelOptions: {
-            name: "python3",
-            path: "./docs/review"
-        },
-        predefinedOutput: true
-    }
-    </script>
-    <script>kernelName = 'python3'</script>
-
-                </article>
-              
-
-              
-              
-              
-              
-                <footer class="prev-next-footer d-print-none">
-                  
-<div class="prev-next-area">
-    <a class="left-prev"
-       href="one-step-image-translation.html"
-       title="previous page">
-      <i class="fa-solid fa-angle-left"></i>
-      <div class="prev-next-info">
-        <p class="prev-next-subtitle">previous</p>
-        <p class="prev-next-title">One-Step Image Translation with Text-to-Image Models</p>
-      </div>
-    </a>
-    <a class="right-next"
-       href="MimicBrush.html"
-       title="next page">
-      <div class="prev-next-info">
-        <p class="prev-next-subtitle">next</p>
-        <p class="prev-next-title">MimicBrush: Zero-shot Image Editing with Reference Imitation</p>
-      </div>
-      <i class="fa-solid fa-angle-right"></i>
-    </a>
-</div>
-                </footer>
-              
-            </div>
-            
-            
-              
-                <div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
-
-
-  <div class="sidebar-secondary-item">
-  <div class="page-toc tocsection onthispage">
-    <i class="fa-solid fa-list"></i> Contents
-  </div>
-  <nav class="bd-toc-nav page-toc">
-    <ul class="visible nav section-nav flex-column">
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#">LCM-LoRA: A Universal Stable-Diffusion Acceleration Module</a><ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#proposal">Proposal</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">1. Introduction</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#id1">기존 연구의 한계점</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#lcms">LCMs 기반 연구</a></li>
-</ul>
-</li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#related-work">2. Related Work</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#consistency-models">Consistency Models</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#latent-consistency-models">Latent Consistency Models</a><ul class="nav section-nav flex-column">
-<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#cms">CMs과 차이점</a></li>
-</ul>
-</li>
-</ul>
-</li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#parameter-efficient-fine-tuning">Parameter-Efficient Fine-Tuning</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#low-rank-adaptation">Low Rank Adaptation</a></li>
-</ul>
-</li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#task-arithmetic-in-pretrained-models">Task Arithmetic in Pretrained Models</a></li>
-</ul>
-</li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#lcm-lora">3. LCM-LoRA</a><ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#lora-distillation-for-lcm">3.1 LoRA Distillation for LCM</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#lcm-lora-as-universal-acceleration-module">3.2 LCM-LoRA as Universal Acceleration Module</a></li>
-</ul>
-</li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#conclusion">4. Conclusion</a></li>
-</ul>
-
-  </nav></div>
-
-</div></div>
-              
-            
-          </div>
-          <footer class="bd-footer-content">
-            
-<div class="bd-footer-content__inner container">
-  
-  <div class="footer-item">
-    
-<p class="component-author">
-By PseudoLab
-</p>
-
-  </div>
-  
-  <div class="footer-item">
-    
-
-  <p class="copyright">
-    
-      © Copyright 2022.
-      <br/>
-    
-  </p>
-
-  </div>
-  
-  <div class="footer-item">
-    
-  </div>
-  
-  <div class="footer-item">
-    
-  </div>
-  
-</div>
-          </footer>
-        
-
-      </main>
-    </div>
-  </div>
-  
-  <!-- Scripts loaded after <body> so the DOM is not blocked -->
-  <script src="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b"></script>
-<script src="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b"></script>
-
-  <footer class="bd-footer">
-  </footer>
-  </body>
+
+
+<!DOCTYPE html>
+
+
+<html lang="en" data-content_root="" >
+
+  <head>
+    <meta charset="utf-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.17.1: http://docutils.sourceforge.net/" />
+
+    <title>LCM-LoRA: A Universal Stable-Diffusion Acceleration Module &#8212; Text-to-Image Generation-feat-Diffusion</title>
+  
+  
+  
+  <script data-cfasync="false">
+    document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
+    document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
+  </script>
+  
+  <!-- Loaded before other Sphinx assets -->
+  <link href="../../_static/styles/theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+<link href="../../_static/styles/bootstrap.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+<link href="../../_static/styles/pydata-sphinx-theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+
+  
+  <link href="../../_static/vendor/fontawesome/6.5.2/css/all.min.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+  <link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.woff2" />
+<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.woff2" />
+<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.woff2" />
+
+    <link rel="stylesheet" type="text/css" href="../../_static/pygments.css" />
+    <link rel="stylesheet" href="../../_static/styles/sphinx-book-theme.css?digest=14f4ca6b54d191a8c7657f6c759bf11a5fb86285" type="text/css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/togglebutton.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/mystnb.4510f1fc1dee50b3e5859aac5469c37c29e427902b24a333a5f9fcb2f0b3ac41.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/sphinx-thebe.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/design-style.4045f2051d55cab465a707391d5b2007.min.css" />
+  
+  <!-- Pre-loaded scripts that we'll load fully later -->
+  <link rel="preload" as="script" href="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b" />
+<link rel="preload" as="script" href="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b" />
+  <script src="../../_static/vendor/fontawesome/6.5.2/js/all.min.js?digest=dfe6caa3a7d634c4db9b"></script>
+
+    <script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
+    <script src="../../_static/jquery.js"></script>
+    <script src="../../_static/underscore.js"></script>
+    <script src="../../_static/_sphinx_javascript_frameworks_compat.js"></script>
+    <script src="../../_static/doctools.js"></script>
+    <script src="../../_static/clipboard.min.js"></script>
+    <script src="../../_static/copybutton.js"></script>
+    <script src="../../_static/scripts/sphinx-book-theme.js?digest=5a5c038af52cf7bc1a1ec88eea08e6366ee68824"></script>
+    <script>let toggleHintShow = 'Click to show';</script>
+    <script>let toggleHintHide = 'Click to hide';</script>
+    <script>let toggleOpenOnPrint = 'true';</script>
+    <script src="../../_static/togglebutton.js"></script>
+    <script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
+    <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
+    <script src="../../_static/design-tabs.js"></script>
+    <script>const THEBE_JS_URL = "https://unpkg.com/thebe@0.8.2/lib/index.js"
+const thebe_selector = ".thebe,.cell"
+const thebe_selector_input = "pre"
+const thebe_selector_output = ".output, .cell_output"
+</script>
+    <script async="async" src="../../_static/sphinx-thebe.js"></script>
+    <script>window.MathJax = {"options": {"processHtmlClass": "tex2jax_process|mathjax_process|math|output_area"}}</script>
+    <script defer="defer" src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
+    <script>DOCUMENTATION_OPTIONS.pagename = 'docs/review/LCM-LoRA';</script>
+    <link rel="shortcut icon" href="../../_static/PseudoLab_logo.png"/>
+    <link rel="index" title="Index" href="../../genindex.html" />
+    <link rel="search" title="Search" href="../../search.html" />
+    <link rel="next" title="MimicBrush: Zero-shot Image Editing with Reference Imitation" href="MimicBrush.html" />
+    <link rel="prev" title="One-Step Image Translation with Text-to-Image Models" href="one-step-image-translation.html" />
+  <meta name="viewport" content="width=device-width, initial-scale=1"/>
+  <meta name="docsearch:language" content="en"/>
+  </head>
+  
+  
+  <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
+
+  
+  
+  <div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
+  
+  <div id="pst-scroll-pixel-helper"></div>
+  
+  <button type="button" class="btn rounded-pill" id="pst-back-to-top">
+    <i class="fa-solid fa-arrow-up"></i>Back to top</button>
+
+  
+  <input type="checkbox"
+          class="sidebar-toggle"
+          id="pst-primary-sidebar-checkbox"/>
+  <label class="overlay overlay-primary" for="pst-primary-sidebar-checkbox"></label>
+  
+  <input type="checkbox"
+          class="sidebar-toggle"
+          id="pst-secondary-sidebar-checkbox"/>
+  <label class="overlay overlay-secondary" for="pst-secondary-sidebar-checkbox"></label>
+  
+  <div class="search-button__wrapper">
+    <div class="search-button__overlay"></div>
+    <div class="search-button__search-container">
+<form class="bd-search d-flex align-items-center"
+      action="../../search.html"
+      method="get">
+  <i class="fa-solid fa-magnifying-glass"></i>
+  <input type="search"
+         class="form-control"
+         name="q"
+         id="search-input"
+         placeholder="Search this book..."
+         aria-label="Search this book..."
+         autocomplete="off"
+         autocorrect="off"
+         autocapitalize="off"
+         spellcheck="false"/>
+  <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
+</form></div>
+  </div>
+
+  <div class="pst-async-banner-revealer d-none">
+  <aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
+</div>
+
+  
+    <header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
+    </header>
+  
+
+  <div class="bd-container">
+    <div class="bd-container__inner bd-page-width">
+      
+      
+      
+      <div class="bd-sidebar-primary bd-sidebar">
+        
+
+  
+  <div class="sidebar-header-items sidebar-primary__section">
+    
+    
+    
+    
+  </div>
+  
+    <div class="sidebar-primary-items__start sidebar-primary__section">
+        <div class="sidebar-primary-item">
+
+  
+    
+  
+
+<a class="navbar-brand logo" href="../../intro.html">
+  
+  
+  
+  
+  
+    
+    
+      
+    
+    
+    <img src="../../_static/PseudoLab_logo.png" class="logo__image only-light" alt="Text-to-Image Generation-feat-Diffusion - Home"/>
+    <script>document.write(`<img src="../../_static/PseudoLab_logo.png" class="logo__image only-dark" alt="Text-to-Image Generation-feat-Diffusion - Home"/>`);</script>
+  
+  
+</a></div>
+        <div class="sidebar-primary-item"><nav class="bd-links" id="bd-docs-nav" aria-label="Main">
+    <div class="bd-toc-item navbar-nav active">
+        
+        <ul class="nav bd-sidenav bd-sidenav__home-link">
+            <li class="toctree-l1">
+                <a class="reference internal" href="../../intro.html">
+                    Welcome to PseudoDiffusers!!
+                </a>
+            </li>
+        </ul>
+        <p aria-level="2" class="caption" role="heading"><span class="caption-text">Preliminary Works</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="vae.html">VAE</a></li>
+<li class="toctree-l1"><a class="reference internal" href="gan.html">GAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DDPM.html">DDPM</a></li>
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DDIM.html">DDIM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="A_Study_on_the_Evaluation_of_Generative_Models.html">A Study on the Evaluation of Generative Models</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Image Generation</span></p>
+<ul class="current nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="cycleGAN.html">CycleGAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="StyleGAN.html">StyleGAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="diffusion_beats_GANs.html">Diffusion Models Beat GANs on Image Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="dalle.html">DALL-E</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DALLE2.html">DALL-E 2</a></li>
+<li class="toctree-l1"><a class="reference internal" href="dreambooth.html">DreamBooth</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ControlNet.html">ControlNet</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Latent_Diffusion_Model.html">Introduction</a></li>
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Textual_Inversion.html">Textual Inversion</a></li>
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="CustomDiffusion.html">Custom Diffusion</a></li>
+<li class="toctree-l1"><a class="reference internal" href="LoRA.html">LoRA</a></li>
+
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="I-DDPM.html">I-DDPM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="StyO.html">StyO</a></li>
+<li class="toctree-l1"><a class="reference internal" href="imagen.html">Imagen</a></li>
+<li class="toctree-l1"><a class="reference internal" href="imagen_editor.html">Imagen Editor</a></li>
+<li class="toctree-l1"><a class="reference internal" href="SDEdit.html">SDEdit</a></li>
+<li class="toctree-l1"><a class="reference internal" href="SDXL.html">SDXL</a></li>
+<li class="toctree-l1"><a class="reference internal" href="t2i_adapter.html">T2I-Adapter</a></li>
+<li class="toctree-l1"><a class="reference internal" href="IP_Adapter.html">IP-Adapter</a></li>
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="HyperDreamBooth.html">HyperDreamBooth</a></li>
+<li class="toctree-l1"><a class="reference internal" href="CM3leon.html">CM3leon</a></li>
+
+<li class="toctree-l1"><a class="reference internal" href="Synthetic_Data_from_Diffusion_Models_Improves_ImageNet_Classification.html">Synthetic Data from Diffusion Models Improves ImageNet Classification</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="GLIDE.html">GLIDE</a></li>
+<li class="toctree-l1"><a class="reference internal" href="BBDM.html">BBDM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Your_Diffusion_Model_is_Secretly_a_Zero_Shot_Classifier.html">Your Diffusion Model is Secretly a Zero-Shot Classifier</a></li>
+<li class="toctree-l1"><a class="reference internal" href="progressive_distillation.html">Progressive Distillation for Fast Sampling of Diffusion Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ConceptLab.html">ConceptLab</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Diffusion_models_already_have_a_Semantic_Latent_Space.html">Diffusion Models already have a Semantic Latent Space</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Muse.html">Muse</a></li>
+
+
+<li class="toctree-l1"><a class="reference internal" href="GIGAGAN.html">Scaling up GANs for Text-to-Image Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="consistency_models.html">Consistency Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="latent_consistency_models.html">Latent Consistency Models</a></li>
+
+<li class="toctree-l1"><a class="reference internal" href="LLM_grounded_Diffusion.html">LLM Grounded Diffusion</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DiT.html">DiT</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="one-step-image-translation.html">One-Step Image Translation with Text-to-Image Models</a></li>
+<li class="toctree-l1 current active"><a class="current reference internal" href="#">LCM-LoRA: A Universal Stable-Diffusion Acceleration Module</a></li>
+
+
+<li class="toctree-l1"><a class="reference internal" href="MimicBrush.html">MimicBrush: Zero-shot Image Editing with Reference Imitation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="one_step_diffusion_with_distribution_matching_distillation.html">One-step Diffusion with Distribution Matching Distillation</a></li>
+
+
+
+
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Video Generation</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="Make_A_Video.html">Make A Video</a></li>
+<li class="toctree-l1"><a class="reference internal" href="VideoLDM.html">VideoLDM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="AnimateDiff.html">AnimateDiff</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Animate_Anyone.html">Animate Anyone</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreaMoving.html">DreaMoving</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreamPose.html">DreamPose: Fashion Image-to-Video Synthesis via Stable Diffusion</a></li>
+
+
+
+
+
+
+
+
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">3D Generation</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="NeRF.html">NeRF : Representing Scenes as Neural Radiance Fields for View Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="3DGS.html">3D Gaussian Splatting for Real-Time Radiance Field Rendering</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Point_E.html">Point-E: A System for Generating 3D Point Clouds from Complex Prompts (Arxiv 2022)</a></li>
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Shap-E.html">Shap-E</a></li>
+
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DreamFusion.html"><strong>DreamFusion</strong></a></li>
+<li class="toctree-l1"><a class="reference internal" href="magic-3d.html">Magic3D</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreamBooth3D.html">Dream Booth 3D</a></li>
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="zero123plus.html">Zero123++</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ProlificDreamer.html">ProlificDreamer: High-Fidelity and Diverse Text-to-3D Generation with Variational Score Distillation</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DreamGaussian.html">DreamGaussian</a></li>
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Coin3D.html">Coin3D</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Experiments</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="../experiments/js_exp.html">Synthetic Data with Stable Diffusion for Foliar Disease Classification</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../experiments/swjo_exp.html">Training DreamBooth on Naver Webtoon Face Dataset</a></li>
+</ul>
+
+    </div>
+</nav></div>
+    </div>
+  
+  
+  <div class="sidebar-primary-items__end sidebar-primary__section">
+  </div>
+  
+  <div id="rtd-footer-container"></div>
+
+
+      </div>
+      
+      <main id="main-content" class="bd-main" role="main">
+        
+        
+
+<div class="sbt-scroll-pixel-helper"></div>
+
+          <div class="bd-content">
+            <div class="bd-article-container">
+              
+              <div class="bd-header-article d-print-none">
+<div class="header-article-items header-article__inner">
+  
+    <div class="header-article-items__start">
+      
+        <div class="header-article-item"><label class="sidebar-toggle primary-toggle btn btn-sm" for="__primary" title="Toggle primary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
+  <span class="fa-solid fa-bars"></span>
+</label></div>
+      
+    </div>
+  
+  
+    <div class="header-article-items__end">
+      
+        <div class="header-article-item">
+
+<div class="article-header-buttons">
+
+
+
+
+
+<div class="dropdown dropdown-source-buttons">
+  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Source repositories">
+    <i class="fab fa-github"></i>
+  </button>
+  <ul class="dropdown-menu">
+      
+      
+      
+      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion" target="_blank"
+   class="btn btn-sm btn-source-repository-button dropdown-item"
+   title="Source repository"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fab fa-github"></i>
+  </span>
+<span class="btn__text-container">Repository</span>
+</a>
+</li>
+      
+      
+      
+      
+      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion/issues/new?title=Issue%20on%20page%20%2Fdocs/review/LCM-LoRA.html&body=Your%20issue%20content%20here." target="_blank"
+   class="btn btn-sm btn-source-issues-button dropdown-item"
+   title="Open an issue"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-lightbulb"></i>
+  </span>
+<span class="btn__text-container">Open issue</span>
+</a>
+</li>
+      
+  </ul>
+</div>
+
+
+
+
+
+
+<div class="dropdown dropdown-download-buttons">
+  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Download this page">
+    <i class="fas fa-download"></i>
+  </button>
+  <ul class="dropdown-menu">
+      
+      
+      
+      <li><a href="../../_sources/docs/review/LCM-LoRA.md" target="_blank"
+   class="btn btn-sm btn-download-source-button dropdown-item"
+   title="Download source file"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-file"></i>
+  </span>
+<span class="btn__text-container">.md</span>
+</a>
+</li>
+      
+      
+      
+      
+      <li>
+<button onclick="window.print()"
+  class="btn btn-sm btn-download-pdf-button dropdown-item"
+  title="Print to PDF"
+  data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-file-pdf"></i>
+  </span>
+<span class="btn__text-container">.pdf</span>
+</button>
+</li>
+      
+  </ul>
+</div>
+
+
+
+
+<button onclick="toggleFullScreen()"
+  class="btn btn-sm btn-fullscreen-button"
+  title="Fullscreen mode"
+  data-bs-placement="bottom" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-expand"></i>
+  </span>
+
+</button>
+
+
+
+<script>
+document.write(`
+  <button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light"></i>
+    <i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark"></i>
+    <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"></i>
+  </button>
+`);
+</script>
+
+
+<script>
+document.write(`
+  <button class="btn btn-sm pst-navbar-icon search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <i class="fa-solid fa-magnifying-glass fa-lg"></i>
+  </button>
+`);
+</script>
+<label class="sidebar-toggle secondary-toggle btn btn-sm" for="__secondary"title="Toggle secondary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <span class="fa-solid fa-list"></span>
+</label>
+</div></div>
+      
+    </div>
+  
+</div>
+</div>
+              
+              
+
+<div id="jb-print-docs-body" class="onlyprint">
+    <h1>LCM-LoRA: A Universal Stable-Diffusion Acceleration Module</h1>
+    <!-- Table of contents -->
+    <div id="print-main-content">
+        <div id="jb-print-toc">
+            
+            <div>
+                <h2> Contents </h2>
+            </div>
+            <nav aria-label="Page">
+                <ul class="visible nav section-nav flex-column">
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#">LCM-LoRA: A Universal Stable-Diffusion Acceleration Module</a><ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#proposal">Proposal</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">1. Introduction</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#id1">기존 연구의 한계점</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#lcms">LCMs 기반 연구</a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#related-work">2. Related Work</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#consistency-models">Consistency Models</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#latent-consistency-models">Latent Consistency Models</a><ul class="nav section-nav flex-column">
+<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#cms">CMs과 차이점</a></li>
+</ul>
+</li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#parameter-efficient-fine-tuning">Parameter-Efficient Fine-Tuning</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#low-rank-adaptation">Low Rank Adaptation</a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#task-arithmetic-in-pretrained-models">Task Arithmetic in Pretrained Models</a></li>
+</ul>
+</li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#lcm-lora">3. LCM-LoRA</a><ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#lora-distillation-for-lcm">3.1 LoRA Distillation for LCM</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#lcm-lora-as-universal-acceleration-module">3.2 LCM-LoRA as Universal Acceleration Module</a></li>
+</ul>
+</li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#conclusion">4. Conclusion</a></li>
+</ul>
+
+            </nav>
+        </div>
+    </div>
+</div>
+
+              
+                
+<div id="searchbox"></div>
+                <article class="bd-article">
+                  
+  <div class="admonition-information admonition">
+<p class="admonition-title">Information</p>
+<ul class="simple">
+<li><p><strong>Title:</strong> LCM-LoRA: A Universal Stable-Diffusion Acceleration Module</p></li>
+<li><p><strong>Reference</strong></p>
+<ul>
+<li><p>Paper: <a class="reference external" href="https://arxiv.org/pdf/2311.05556">https://arxiv.org/pdf/2403.12036</a></p></li>
+<li><p>Code: <a class="reference external" href="https://github.com/luosiallen/latent-consistency-model">Official</a></p></li>
+</ul>
+</li>
+<li><p><strong>Author:</strong> Donghyun Han</p></li>
+<li><p><strong>Last updated on Oct. 02, 2024</strong></p></li>
+</ul>
+</div>
+<section class="tex2jax_ignore mathjax_ignore" id="lcm-lora-a-universal-stable-diffusion-acceleration-module">
+<h1>LCM-LoRA: A Universal Stable-Diffusion Acceleration Module<a class="headerlink" href="#lcm-lora-a-universal-stable-diffusion-acceleration-module" title="Permalink to this heading">#</a></h1>
+<section id="proposal">
+<h2>Proposal<a class="headerlink" href="#proposal" title="Permalink to this heading">#</a></h2>
+<ul class="simple">
+<li><p>Latent Consistency Models(LCMs)에 Low Rank Adaptation (LoRA)을 적용하였다.</p></li>
+<li><p>LoRA를 이용하여 Stable Diffusion에 대한 추가적인 학습 없이도 fine-tuning 가능. (Accelerate 효과 극대화)</p></li>
+<li><p>이전의 다양한 PF-ODE (Probability-Flow ODE) solver를 사용한 방법론들보다 더 generalized 된 성능</p></li>
+</ul>
+</section>
+<section id="introduction">
+<h2>1. Introduction<a class="headerlink" href="#introduction" title="Permalink to this heading">#</a></h2>
+<section id="id1">
+<h3>기존 연구의 한계점<a class="headerlink" href="#id1" title="Permalink to this heading">#</a></h3>
+<p>Latent Diffusion Models(LDMs)은 image generation 분야에서 좋은 성능을 보이고 있다. 그러나 아직까지는 느린 reverse process 때문에 사용자가 직접 사용하기에는 무리가 있다.
+따라서 LDMs을 가속화(Accelerate)하기 위한 기법들이 제안되어 왔는데 크게 2가지로 나눌 수 있다:</p>
+<ol class="arabic simple">
+<li><p>DDIM, DPM-Solver, DPM-Solver++ 등 <strong>ODE-Solver 기반 방법론</strong>.</p></li>
+<li><p>LDM을 경량화 하기 위한 <strong>Distillation 기반 방법론</strong>.</p></li>
+</ol>
+<p>ODE-Solver 방법론은 sampling step을 줄일 수 있지만 Classifier-free Guidance(CFG) 등을 사용할 때 Computation 적으로 Overhead가 있을 수 있다.
+Distillation 방법론 또한 Distillation 시 Computation적으로 Overhead가 있어 한계가 있다.
+ex)<a class="reference external" href="https://arxiv.org/pdf/2210.03142">Guided Distill</a> : 2 stage의 distillation  방식 + high resolution image 생성 한계</p>
+</section>
+<section id="lcms">
+<h3>LCMs 기반 연구<a class="headerlink" href="#lcms" title="Permalink to this heading">#</a></h3>
+<p>이에 반해 Consistency Models(CMs)에서 영감을 받은 Latent Consistency Models(LCMs)은 매우 좋은 대안이다. backward process를 augmented Probability Flow ODE(PF-ODE) problem으로 접근하여 반복적인 step을 획기적으로 줄일 수 있었다. LCMs은 1~4 step만으로도 높은 퀄리티의 고해상도 이미지를 생성해낼 수 있으며 큰 리소스가 필요하지 않다.</p>
+<p>그러나 LCMs을 기반으로 하는 방법론은 새로운 데이터셋에 대해 finetuning이 필요하거나 pretrained LDMs을 필요로 하는 한계가 존재한다.</p>
+<p>따라서 본 연구는 추가 학습없이 Stable Diffusion(SD)이나 SD-LoRA 등에 plug-in 해서 사용할 수 있는 LCM-LoRA를 제안한다. LCM-LoRA는 새로운 종류의 neural network 기반 PF-ODE Solver이며, 강력한 일반화 성능을 보여준다.</p>
+</section>
+</section>
+<section id="related-work">
+<h2>2. Related Work<a class="headerlink" href="#related-work" title="Permalink to this heading">#</a></h2>
+<section id="consistency-models">
+<h3>Consistency Models<a class="headerlink" href="#consistency-models" title="Permalink to this heading">#</a></h3>
+<p>CMs은 sampling step을 획기적으로 줄이면서도 Quality를 유지할 수 있는 방법론이다.</p>
+<figure class="align-default" id="id2">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/LCM-LoRA_1.png"><img alt="Consistency Models" class="bg-primary mb-1" src="../../_images/LCM-LoRA_1.png" style="width: 800px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 560 </span><span class="caption-text">Consistency Models</span><a class="headerlink" href="#id2" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>CMs의 핵심은 PF-ODE의 궤적의 points가 solution에 mapping 되는 function <span class="math notranslate nohighlight">\((f: (x_t,t) \mapsto x_\epsilon)\)</span>을 추정하는 것이다.
+쉽게 말해 어떤 step의 noise image 던지 <span class="math notranslate nohighlight">\(x_0\)</span> (정확히는 <span class="math notranslate nohighlight">\(x_\epsilon\)</span>)의 결과가 나오는 function을 추정한다. 또한 각 timestep에 관한function의 결과값은 self-consistency를 만족해야 한다.</p>
+<div class="math notranslate nohighlight">
+\[
+f(x_t,t)=f(x_{t'},t'), \forall t, t' \in [\epsilon, T]. 
+\]</div>
+<p><span class="math notranslate nohighlight">\(\epsilon\)</span>은 매우작은 양수 값이다. 이때 <span class="math notranslate nohighlight">\(f_\theta(x,\epsilon)=x\)</span>를 만족하는 model <span class="math notranslate nohighlight">\(f_\theta\)</span>는 다음과 같이 정의한다:</p>
+<div class="math notranslate nohighlight">
+\[
+f_\theta(x,t)=c_{skip}(t)x+c_{out}(t)F_\theta(x,t).
+\]</div>
+<p><span class="math notranslate nohighlight">\(c_{skip}(\epsilon)=1\)</span>, <span class="math notranslate nohighlight">\(c_{out}(\epsilon)=0\)</span> 이기 때문에 <span class="math notranslate nohighlight">\(f_\theta(x,\epsilon)=x\)</span>를 만족한다. 위 수식은 미분 가능함을 증명하기 위한 수식이다. <span class="math notranslate nohighlight">\(F_\theta\)</span>는 심층신경망을 의미한다.</p>
+<p>CMs은 scratch부터 학습하는 방식과 Distillation 방식으로 나뉘는데 보편적으로 Distillation이 사용된다. Distillation 방식은 지수평균이동(Exponential Moving Average, EMA)를 통해 self-consistency를 학습할 수 있다:</p>
+<div class="math notranslate nohighlight">
+\[
+L(\theta,\theta^-;\Phi)=\mathbb{E}_{x,t}\bigg[d\bigg(f_\theta(x_{t_{n+1}},t_{n+1}),f_{\theta^-}(\hat{x}^\phi_{t_n},t_n)\bigg)\bigg].
+\]</div>
+<p><span class="math notranslate nohighlight">\(\theta^-\)</span>는 <span class="math notranslate nohighlight">\(\theta\)</span>에 대한 EMA를 의미하며 <span class="math notranslate nohighlight">\(d(\cdot, \cdot)\)</span>은 두 sample 사이의 거리를 측정하는 지표이다. <span class="math notranslate nohighlight">\(\hat{x}^{\phi}_{t_n}\)</span>는 <span class="math notranslate nohighlight">\(x_{t_{n+1}}\)</span>에 대한 <span class="math notranslate nohighlight">\(x_{t_n}\)</span>을 추정한 값으로 다음과 같다:</p>
+<div class="math notranslate nohighlight">
+\[
+\hat{x}^{\phi}_{t_n} \leftarrow x_{t_{n+1}}+(t_n-t_{n+1})\Phi(x_{t_{n+1}},t_{n+1};\phi)
+\]</div>
+<p><span class="math notranslate nohighlight">\(\Phi\)</span>는 numerical PF-ODE를 의미한다. (보통 DDIM을 사용하는 것 같다) 즉 <span class="math notranslate nohighlight">\(x_{t_n}\)</span>을 PF-ODE로 예측한 값을 입력으로 하는 예측값과 <span class="math notranslate nohighlight">\(x_{t_{n+1}}\)</span>을 입력으로 하는 예측값이 같도록 self-consistency를 비교하는 것이 핵심이다.</p>
+</section>
+<section id="latent-consistency-models">
+<h3>Latent Consistency Models<a class="headerlink" href="#latent-consistency-models" title="Permalink to this heading">#</a></h3>
+<figure class="align-default" id="id3">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/LCM-LoRA_2.png"><img alt="Latent Diffusion Models" class="bg-primary mb-1" src="../../_images/LCM-LoRA_2.png" style="width: 800px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 561 </span><span class="caption-text">Latent Diffusion Models</span><a class="headerlink" href="#id3" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>LCMs은 CMs에 condition을 추가해주고 <span class="math notranslate nohighlight">\(F_\theta(x,t)\)</span>를 <span class="math notranslate nohighlight">\(\epsilon-Prediction\)</span>의 수식으로 치환한다. (<span class="math notranslate nohighlight">\(\mu\)</span>나 <span class="math notranslate nohighlight">\(v\)</span> prediction을 사용해도 됨.) 추가로 LDMs 기반이기 때문에 latent <span class="math notranslate nohighlight">\(z\)</span>에 대한 수식으로 변경해준다.</p>
+<div class="math notranslate nohighlight">
+\[f_\theta(z,c,t)=c_{skip}(t)z+c_{out}(t)\bigg(\frac{z-\sigma(t)\hat{\epsilon}_\theta(z,c,t)}{\alpha(t)}\bigg). (\epsilon-Prediction)\]</div>
+<div class="math notranslate nohighlight">
+\[L_{CD}(\theta,\theta^-;\psi)=\mathbb{E}_{z,c,n}\bigg[d\bigg(f_\theta(z_{t_{n+1}},c,t_{n+1}),f_{\theta^-}(\hat{z}^\psi_{t_n},c,t_n)\bigg)\bigg].\]</div>
+<p><span class="math notranslate nohighlight">\(n\)</span>은 timestep이지만 기존<span class="math notranslate nohighlight">\(t\)</span>와는 다른 timestep <span class="math notranslate nohighlight">\([t,T]\)</span>에 대한 하위 간격이다. <span class="math notranslate nohighlight">\((t_1=\epsilon&lt;t_2&lt;...&lt;t_N=T)\)</span></p>
+<div class="math notranslate nohighlight">
+\[t_i=\bigg(\epsilon^{1/\rho}+\frac{i-1}{N-1}(T^{1/\rho}-\epsilon^{1/\rho})\bigg)^\rho, \rho=7\]</div>
+<section id="cms">
+<h4>CMs과 차이점<a class="headerlink" href="#cms" title="Permalink to this heading">#</a></h4>
+<ul class="simple">
+<li><p>LDMs 기반 모델이다.</p></li>
+<li><p>LCMs는 CMs와 다르게 Classifier-free Guidance(CFG)를 포함한 Distillation도 정의되어있다.(<span class="math notranslate nohighlight">\(\tilde{\epsilon}_\theta\)</span>)</p></li>
+<li><p>LCMs는 <span class="math notranslate nohighlight">\(t_n\)</span>과 <span class="math notranslate nohighlight">\(t_{n+1}\)</span>의 차이가 너무 적어 학습의 수렴이 늦어지게 된다 가정하고 <span class="math notranslate nohighlight">\(t_n\)</span>과 <span class="math notranslate nohighlight">\(t_{n+k}\)</span>의 consistency를 비교하는 Skipping timestep 방법을 제시했다. (k는 trade-off를 가지며 최적의 값은 20으로 지정.)</p></li>
+<li><p>Latent Consistency Finetuning: 새로운 데이터셋에 대해 distillation할 때 LDMs를 학습 할 필요 없이 LCMs의 Consistency Distillation만 학습하여 사용할 수 있다.</p></li>
+</ul>
+<p>(자세한 내용은 <a class="reference external" href="https://pseudo-lab.github.io/pseudodiffusers/docs/review/latent_consistency_models.html">LCMs review</a>를 참고)</p>
+</section>
+</section>
+</section>
+<section id="parameter-efficient-fine-tuning">
+<h2>Parameter-Efficient Fine-Tuning<a class="headerlink" href="#parameter-efficient-fine-tuning" title="Permalink to this heading">#</a></h2>
+<p>Parameter-Efficient Fine-Tuning(PEFT)이란 파라미터를 효율적으로 사용하면서 fine-tuning 할수 있는 연구를 의미한다. Knowledge Distillation, Pruning, Quantization 등이 있다.</p>
+<p>본 연구에서는 PERF 기법 중 RoLA를 사용했다.</p>
+<section id="low-rank-adaptation">
+<h3>Low Rank Adaptation<a class="headerlink" href="#low-rank-adaptation" title="Permalink to this heading">#</a></h3>
+<p>기존에 pre-trained 된 가중치 <span class="math notranslate nohighlight">\(\Phi_0\)</span>에 대하여 새로운 task에 fine-tuning하는 모델 <span class="math notranslate nohighlight">\(P_\Phi(y|x)\)</span>는 다음과 같이 가중치가 업데이트 된다. (<span class="math notranslate nohighlight">\(\Phi_0+\Delta\Phi\)</span>)</p>
+<div class="math notranslate nohighlight">
+\[\underset{\Phi}{max}\sum_{(x,y)\in Z}\sum^{|y|}_{t=1}\log{(P_\Phi(y_t|x,y&lt;t))}\]</div>
+<p>LLM이나 Stable Diffusion과 같은 대규모 모델은 새로운 task로 fine-tuning 시 매우 큰 차원의 모델 파라미터를 다시 학습하기 때문에 매우 큰 Cost가 생긴다. (시간적, 자원적) 이때 weight의 차원은 줄이면서 변화량을 기록하는 또다른 weight를 만들어 더 효율적으로 계산하는 방식은 다음과 같이 나타낼 수 있다: (파라미터 <span class="math notranslate nohighlight">\(\Theta\)</span>에 대해 <span class="math notranslate nohighlight">\(\Delta\Phi=\Delta\Phi(\Theta), |\Theta|&lt;&lt;|\Phi_0|\)</span>)</p>
+<div class="math notranslate nohighlight">
+\[\underset{\Phi}{max}\sum_{(x,y)\in Z}\sum^{|y|}_{t=1}\log{(P_{\Phi_0+\Delta\Phi(\Theta)}(y_t|x,y&lt;t))}\]</div>
+<p>즉 기존의 잘 학습된 weight는 그대로 두고 low rank로 decomposition 된 weight만 optimization 하는 방법론을 Low Rank Adaptation(LoRA)라고 한다.</p>
+<figure class="align-default" id="id4">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/LCM-LoRA_3.png"><img alt="Low Rank Adaptation" class="bg-primary mb-1" src="../../_images/LCM-LoRA_3.png" style="width: 300px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 562 </span><span class="caption-text">Low Rank Adaptation</span><a class="headerlink" href="#id4" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>위의 그림과 같이 원본 모델 weight는 freeze, LoRA는 rank를 r로 낮추어 finetuning한다. 이때 LoRA의 A는 random Gauissian으로, B는 zero로 weight initializing 한다.</p>
+<figure class="align-default" id="id5">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/LCM-LoRA_4.png"><img alt="Low Rank Adaptation matrix" class="bg-primary mb-1" src="../../_images/LCM-LoRA_4.png" style="width: 800px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 563 </span><span class="caption-text">Low Rank Adaptation matrix</span><a class="headerlink" href="#id5" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>위 그림처럼 기존에는 d x d의 매우 큰 weight를 finetuning 해야 했지만, LoRA는 r만큼 압축된 weight matrix만 finetuning 하면 되기 때문에 훨씬 효율적이고 때에 따라 Fully fine-tuning 하는 방법들보다 더 좋은 성능을 보여주기도 한다. (그림은 <a class="reference external" href="https://ffighting.net/deep-learning-paper-review/language-model/lora/">이곳</a>을 참고하였습니다.)</p>
+<p>원본 논문의 LoRA는 LLM을 target으로 만들어졌기 때문에 Transformer의 query, key, value에 대한 parameter로 사용하였지만 Diffusion이나 다른 모델의 finetuning시에도 간단하게 사용 가능하다.</p>
+</section>
+</section>
+<section id="task-arithmetic-in-pretrained-models">
+<h2>Task Arithmetic in Pretrained Models<a class="headerlink" href="#task-arithmetic-in-pretrained-models" title="Permalink to this heading">#</a></h2>
+<p>task Arithmetic은 특정 task에서 학습된 Model의 가중치를 task vector라 보고 각 task vector를 조합하여 새로운 task vector를 생성하는 방법론이다.</p>
+<figure class="align-default" id="id6">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/LCM-LoRA_5.png"><img alt="Task Arithmetic" class="bg-primary mb-1" src="../../_images/LCM-LoRA_5.png" style="width: 800px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 564 </span><span class="caption-text">Task Arithmetic</span><a class="headerlink" href="#id6" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>pre-trained parameter를 <span class="math notranslate nohighlight">\(\theta_{pre}\)</span>, fine-tuning parameter를 <span class="math notranslate nohighlight">\(\theta_{ft}\)</span>라고 할때 task vector <span class="math notranslate nohighlight">\(\tau\)</span>는 <span class="math notranslate nohighlight">\(\theta_{ft}-\theta_{pre}\)</span>로 정의할 수 있다.
+이를 다양하게 조합하고 특히 d)처럼 task 간 analogy를 고려하여 연산하는 경우 새로운 task에 대한 성능을 높일 수 있다.</p>
+</section>
+</section>
+<section class="tex2jax_ignore mathjax_ignore" id="lcm-lora">
+<h1>3. LCM-LoRA<a class="headerlink" href="#lcm-lora" title="Permalink to this heading">#</a></h1>
+<section id="lora-distillation-for-lcm">
+<h2>3.1 LoRA Distillation for LCM<a class="headerlink" href="#lora-distillation-for-lcm" title="Permalink to this heading">#</a></h2>
+<p>LCMs의 Latent Consistency Distillation에 대한 pseudo code는 다음과 같다:</p>
+<figure class="align-default" id="id7">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/LCM-LoRA_6.png"><img alt="Latent Consistency Distillation" class="bg-primary mb-1" src="../../_images/LCM-LoRA_6.png" style="width: 800px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 565 </span><span class="caption-text">Latent Consistency Distillation</span><a class="headerlink" href="#id7" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>논문의 저자는 LCMs의 Distillation은 LDMs에 관한 일종의 fine-tuning으로 보고 LoRA를 적용하는 방법을 제안하였다.
+pre-trained 된 weight matrix <span class="math notranslate nohighlight">\(W_0\)</span>에 대하여 기울기 업데이트는 <span class="math notranslate nohighlight">\(W_0+\Delta W=W_0+BA, W_0\in \mathbb{R}^{d\times k}, B\in \mathbb{R}^{d\times r}, A\in \mathbb{R}^{r\times k}\)</span> 로 표현할 수 있으며 rank <span class="math notranslate nohighlight">\(r \leq \min{(d,k)}\)</span> 로 작은 값을 갖는다. <span class="math notranslate nohighlight">\(W_0\)</span>의 weight는 고정되며 input <span class="math notranslate nohighlight">\(x\)</span> 에 대한 forward pass는 다음과 같다:</p>
+<div class="math notranslate nohighlight">
+\[h=W_0x+\Delta Wx=W_0x+BAx. \tag{1}\]</div>
+<p>위와같이 LCMs에 LoRA를 적용할 경우 학습 parameter를 크게 줄일 수 있어 효율적이다.</p>
+<figure class="align-default" id="id8">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/LCM-LoRA_7.png"><img alt="compare trainable parameter" class="bg-primary mb-1" src="../../_images/LCM-LoRA_7.png" style="width: 800px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 566 </span><span class="caption-text">compare trainable parameter</span><a class="headerlink" href="#id8" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>따라서 LCM-loRA는 기존 LCMs 보다 더 큰 모델의 훈련과 실사용이 가능하다. LCMs의 경우 SD-V1.5나 SD-V2.1의 base Stable Diffusion을 사용했지만, LCM-LoRA는 SDXL과 SSD-1B(Segmind)을 확장하여 사용하였다. large Model에서도 LCD을 적용했을 때 잘 적응하는 모습을 볼 수 있었다.</p>
+<figure class="align-default" id="id9">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/LCM-LoRA_8.png"><img alt="1024 x 1024 resolution image results with CFG scale w=7.5" class="bg-primary mb-1" src="../../_images/LCM-LoRA_8.png" style="width: 800px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 567 </span><span class="caption-text">1024 x 1024 resolution image results with CFG scale w=7.5</span><a class="headerlink" href="#id9" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</section>
+<section id="lcm-lora-as-universal-acceleration-module">
+<h2>3.2 LCM-LoRA as Universal Acceleration Module<a class="headerlink" href="#lcm-lora-as-universal-acceleration-module" title="Permalink to this heading">#</a></h2>
+<p>LCM-LoRA는 sampling step을 줄이는 distillation에 LoRA를 적용하였다. LoRA는 이외에도 custionized datasets에 대해 fine-tuning할 때 주로 쓰이는데 이같은 style에 대한 LoRA와 LCM-LoRA가 추가 학습없이 바로 합쳐져 사용할 수 있음을 발견했다. 저자는 이 발견이 task arithmetic에 대한 관점으로 해석할 수 있다고 주장하였다.</p>
+<figure class="align-default" id="id10">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/LCM-LoRA_9.png"><img alt="Style-LoRA with LCM-LoRA" class="bg-primary mb-1" src="../../_images/LCM-LoRA_9.png" style="width: 800px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 568 </span><span class="caption-text">Style LoRA with LCM-LoRA</span><a class="headerlink" href="#id10" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>LCM-LoRA의 fine-tuned parameter를 <span class="math notranslate nohighlight">\(\tau_{LCM}\)</span>이라 할 때, <span class="math notranslate nohighlight">\(\tau_{LCM}\)</span>은 acceleration vector라 할수 있다. 그리고 custom dataset에서 학습한 LoRA의 fine-tuned parameter를 <span class="math notranslate nohighlight">\(\tau'\)</span>이라 할 때, <span class="math notranslate nohighlight">\(\tau'\)</span>은 style vector라 할 수 있다. LCMs를 통해 custom dataset에 대한 image를 생성할 때, 파라미터는 다음과 같이 조합된다:</p>
+<div class="math notranslate nohighlight">
+\[\theta'_{LCM}=\theta_{pre}+\tau'_{LCM} \tag{2}\]</div>
+<div class="math notranslate nohighlight">
+\[\tau'_{LCM}=\lambda_1\tau'+\lambda_2\tau_{LCM} \tag{3}\]</div>
+<p>파라미터는 단순한 선형 결합을 통해 이루어지며 <span class="math notranslate nohighlight">\(\lambda_1\)</span>과 <span class="math notranslate nohighlight">\(\lambda_2\)</span>는 하이퍼파라미터다. 추가적인 학습없이 다음과 같은 결과를 얻을 수 있었다:</p>
+<figure class="align-default" id="id11">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/LCM-LoRA_10.png"><img alt="fine-tuning with LCM-LoRA" class="bg-primary mb-1" src="../../_images/LCM-LoRA_10.png" style="width: 800px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 569 </span><span class="caption-text">fine-tuning with LCM-LoRA</span><a class="headerlink" href="#id11" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</section>
+</section>
+<section class="tex2jax_ignore mathjax_ignore" id="conclusion">
+<h1>4. Conclusion<a class="headerlink" href="#conclusion" title="Permalink to this heading">#</a></h1>
+<ul class="simple">
+<li><p>training-free acceleration module인 LCM-LoRA를 제안.</p></li>
+<li><p>PF-ODE를 예측하며 Stable Diffusion 및 SD LoRA에 fast inference, minimal step을 제공함.</p></li>
+<li><p>강력한 일반화 성능 증명.</p></li>
+</ul>
+</section>
+
+    <script type="text/x-thebe-config">
+    {
+        requestKernel: true,
+        binderOptions: {
+            repo: "binder-examples/jupyter-stacks-datascience",
+            ref: "master",
+        },
+        codeMirrorConfig: {
+            theme: "abcdef",
+            mode: "python"
+        },
+        kernelOptions: {
+            name: "python3",
+            path: "./docs\review"
+        },
+        predefinedOutput: true
+    }
+    </script>
+    <script>kernelName = 'python3'</script>
+
+                </article>
+              
+
+              
+              
+              
+              
+                <footer class="prev-next-footer d-print-none">
+                  
+<div class="prev-next-area">
+    <a class="left-prev"
+       href="one-step-image-translation.html"
+       title="previous page">
+      <i class="fa-solid fa-angle-left"></i>
+      <div class="prev-next-info">
+        <p class="prev-next-subtitle">previous</p>
+        <p class="prev-next-title">One-Step Image Translation with Text-to-Image Models</p>
+      </div>
+    </a>
+    <a class="right-next"
+       href="MimicBrush.html"
+       title="next page">
+      <div class="prev-next-info">
+        <p class="prev-next-subtitle">next</p>
+        <p class="prev-next-title">MimicBrush: Zero-shot Image Editing with Reference Imitation</p>
+      </div>
+      <i class="fa-solid fa-angle-right"></i>
+    </a>
+</div>
+                </footer>
+              
+            </div>
+            
+            
+              
+                <div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
+
+
+  <div class="sidebar-secondary-item">
+  <div class="page-toc tocsection onthispage">
+    <i class="fa-solid fa-list"></i> Contents
+  </div>
+  <nav class="bd-toc-nav page-toc">
+    <ul class="visible nav section-nav flex-column">
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#">LCM-LoRA: A Universal Stable-Diffusion Acceleration Module</a><ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#proposal">Proposal</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">1. Introduction</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#id1">기존 연구의 한계점</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#lcms">LCMs 기반 연구</a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#related-work">2. Related Work</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#consistency-models">Consistency Models</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#latent-consistency-models">Latent Consistency Models</a><ul class="nav section-nav flex-column">
+<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#cms">CMs과 차이점</a></li>
+</ul>
+</li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#parameter-efficient-fine-tuning">Parameter-Efficient Fine-Tuning</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#low-rank-adaptation">Low Rank Adaptation</a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#task-arithmetic-in-pretrained-models">Task Arithmetic in Pretrained Models</a></li>
+</ul>
+</li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#lcm-lora">3. LCM-LoRA</a><ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#lora-distillation-for-lcm">3.1 LoRA Distillation for LCM</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#lcm-lora-as-universal-acceleration-module">3.2 LCM-LoRA as Universal Acceleration Module</a></li>
+</ul>
+</li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#conclusion">4. Conclusion</a></li>
+</ul>
+
+  </nav></div>
+
+</div></div>
+              
+            
+          </div>
+          <footer class="bd-footer-content">
+            
+<div class="bd-footer-content__inner container">
+  
+  <div class="footer-item">
+    
+<p class="component-author">
+By PseudoLab
+</p>
+
+  </div>
+  
+  <div class="footer-item">
+    
+
+  <p class="copyright">
+    
+      © Copyright 2022.
+      <br/>
+    
+  </p>
+
+  </div>
+  
+  <div class="footer-item">
+    
+  </div>
+  
+  <div class="footer-item">
+    
+  </div>
+  
+</div>
+          </footer>
+        
+
+      </main>
+    </div>
+  </div>
+  
+  <!-- Scripts loaded after <body> so the DOM is not blocked -->
+  <script src="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b"></script>
+<script src="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b"></script>
+
+  <footer class="bd-footer">
+  </footer>
+  </body>
 </html>
\ No newline at end of file
diff --git a/docs/review/LLM_grounded_Diffusion.html b/docs/review/LLM_grounded_Diffusion.html
old mode 100644
new mode 100755
index c2559ce6..dac544aa
--- a/docs/review/LLM_grounded_Diffusion.html
+++ b/docs/review/LLM_grounded_Diffusion.html
@@ -1,850 +1,870 @@
-
-
-<!DOCTYPE html>
-
-
-<html lang="en" data-content_root="" >
-
-  <head>
-    <meta charset="utf-8" />
-    <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.18.1: http://docutils.sourceforge.net/" />
-
-    <title>LLM Grounded Diffusion &#8212; Text-to-Image Generation-feat-Diffusion</title>
-  
-  
-  
-  <script data-cfasync="false">
-    document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
-    document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
-  </script>
-  
-  <!-- Loaded before other Sphinx assets -->
-  <link href="../../_static/styles/theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-<link href="../../_static/styles/bootstrap.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-<link href="../../_static/styles/pydata-sphinx-theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-
-  
-  <link href="../../_static/vendor/fontawesome/6.5.2/css/all.min.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-  <link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.woff2" />
-<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.woff2" />
-<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.woff2" />
-
-    <link rel="stylesheet" type="text/css" href="../../_static/pygments.css" />
-    <link rel="stylesheet" href="../../_static/styles/sphinx-book-theme.css?digest=14f4ca6b54d191a8c7657f6c759bf11a5fb86285" type="text/css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/togglebutton.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/mystnb.4510f1fc1dee50b3e5859aac5469c37c29e427902b24a333a5f9fcb2f0b3ac41.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/sphinx-thebe.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/design-style.4045f2051d55cab465a707391d5b2007.min.css" />
-  
-  <!-- Pre-loaded scripts that we'll load fully later -->
-  <link rel="preload" as="script" href="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b" />
-<link rel="preload" as="script" href="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b" />
-  <script src="../../_static/vendor/fontawesome/6.5.2/js/all.min.js?digest=dfe6caa3a7d634c4db9b"></script>
-
-    <script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
-    <script src="../../_static/jquery.js"></script>
-    <script src="../../_static/underscore.js"></script>
-    <script src="../../_static/_sphinx_javascript_frameworks_compat.js"></script>
-    <script src="../../_static/doctools.js"></script>
-    <script src="../../_static/clipboard.min.js"></script>
-    <script src="../../_static/copybutton.js"></script>
-    <script src="../../_static/scripts/sphinx-book-theme.js?digest=5a5c038af52cf7bc1a1ec88eea08e6366ee68824"></script>
-    <script>let toggleHintShow = 'Click to show';</script>
-    <script>let toggleHintHide = 'Click to hide';</script>
-    <script>let toggleOpenOnPrint = 'true';</script>
-    <script src="../../_static/togglebutton.js"></script>
-    <script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
-    <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
-    <script src="../../_static/design-tabs.js"></script>
-    <script>const THEBE_JS_URL = "https://unpkg.com/thebe@0.8.2/lib/index.js"
-const thebe_selector = ".thebe,.cell"
-const thebe_selector_input = "pre"
-const thebe_selector_output = ".output, .cell_output"
-</script>
-    <script async="async" src="../../_static/sphinx-thebe.js"></script>
-    <script>window.MathJax = {"options": {"processHtmlClass": "tex2jax_process|mathjax_process|math|output_area"}}</script>
-    <script defer="defer" src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
-    <script>DOCUMENTATION_OPTIONS.pagename = 'docs/review/LLM_grounded_Diffusion';</script>
-    <link rel="shortcut icon" href="../../_static/PseudoLab_logo.png"/>
-    <link rel="index" title="Index" href="../../genindex.html" />
-    <link rel="search" title="Search" href="../../search.html" />
-    <link rel="next" title="DiT" href="DiT.html" />
-    <link rel="prev" title="Latent Consistency Models" href="latent_consistency_models.html" />
-  <meta name="viewport" content="width=device-width, initial-scale=1"/>
-  <meta name="docsearch:language" content="en"/>
-  </head>
-  
-  
-  <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
-
-  
-  
-  <div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
-  
-  <div id="pst-scroll-pixel-helper"></div>
-  
-  <button type="button" class="btn rounded-pill" id="pst-back-to-top">
-    <i class="fa-solid fa-arrow-up"></i>Back to top</button>
-
-  
-  <input type="checkbox"
-          class="sidebar-toggle"
-          id="pst-primary-sidebar-checkbox"/>
-  <label class="overlay overlay-primary" for="pst-primary-sidebar-checkbox"></label>
-  
-  <input type="checkbox"
-          class="sidebar-toggle"
-          id="pst-secondary-sidebar-checkbox"/>
-  <label class="overlay overlay-secondary" for="pst-secondary-sidebar-checkbox"></label>
-  
-  <div class="search-button__wrapper">
-    <div class="search-button__overlay"></div>
-    <div class="search-button__search-container">
-<form class="bd-search d-flex align-items-center"
-      action="../../search.html"
-      method="get">
-  <i class="fa-solid fa-magnifying-glass"></i>
-  <input type="search"
-         class="form-control"
-         name="q"
-         id="search-input"
-         placeholder="Search this book..."
-         aria-label="Search this book..."
-         autocomplete="off"
-         autocorrect="off"
-         autocapitalize="off"
-         spellcheck="false"/>
-  <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
-</form></div>
-  </div>
-
-  <div class="pst-async-banner-revealer d-none">
-  <aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
-</div>
-
-  
-    <header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
-    </header>
-  
-
-  <div class="bd-container">
-    <div class="bd-container__inner bd-page-width">
-      
-      
-      
-      <div class="bd-sidebar-primary bd-sidebar">
-        
-
-  
-  <div class="sidebar-header-items sidebar-primary__section">
-    
-    
-    
-    
-  </div>
-  
-    <div class="sidebar-primary-items__start sidebar-primary__section">
-        <div class="sidebar-primary-item">
-
-  
-    
-  
-
-<a class="navbar-brand logo" href="../../intro.html">
-  
-  
-  
-  
-  
-    
-    
-      
-    
-    
-    <img src="../../_static/PseudoLab_logo.png" class="logo__image only-light" alt="Text-to-Image Generation-feat-Diffusion - Home"/>
-    <script>document.write(`<img src="../../_static/PseudoLab_logo.png" class="logo__image only-dark" alt="Text-to-Image Generation-feat-Diffusion - Home"/>`);</script>
-  
-  
-</a></div>
-        <div class="sidebar-primary-item"><nav class="bd-links" id="bd-docs-nav" aria-label="Main">
-    <div class="bd-toc-item navbar-nav active">
-        
-        <ul class="nav bd-sidenav bd-sidenav__home-link">
-            <li class="toctree-l1">
-                <a class="reference internal" href="../../intro.html">
-                    Welcome to PseudoDiffusers!!
-                </a>
-            </li>
-        </ul>
-        <p aria-level="2" class="caption" role="heading"><span class="caption-text">Preliminary Works</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="vae.html">VAE</a></li>
-<li class="toctree-l1"><a class="reference internal" href="gan.html">GAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DDPM.html">DDPM</a></li>
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="DDIM.html">DDIM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="A_Study_on_the_Evaluation_of_Generative_Models.html">A Study on the Evaluation of Generative Models</a></li>
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Image Generation</span></p>
-<ul class="current nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="cycleGAN.html">CycleGAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="StyleGAN.html">StyleGAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="diffusion_beats_GANs.html">Diffusion Models Beat GANs on Image Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="dalle.html">DALL-E</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DALLE2.html">DALL-E 2</a></li>
-<li class="toctree-l1"><a class="reference internal" href="dreambooth.html">DreamBooth</a></li>
-<li class="toctree-l1"><a class="reference internal" href="ControlNet.html">ControlNet</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Latent_Diffusion_Model.html">Introduction</a></li>
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="Textual_Inversion.html">Textual Inversion</a></li>
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="CustomDiffusion.html">Custom Diffusion</a></li>
-<li class="toctree-l1"><a class="reference internal" href="LoRA.html">LoRA</a></li>
-
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="I-DDPM.html">I-DDPM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="StyO.html">StyO</a></li>
-<li class="toctree-l1"><a class="reference internal" href="imagen.html">Imagen</a></li>
-<li class="toctree-l1"><a class="reference internal" href="imagen_editor.html">Imagen Editor</a></li>
-<li class="toctree-l1"><a class="reference internal" href="SDEdit.html">SDEdit</a></li>
-<li class="toctree-l1"><a class="reference internal" href="SDXL.html">SDXL</a></li>
-<li class="toctree-l1"><a class="reference internal" href="t2i_adapter.html">T2I-Adapter</a></li>
-<li class="toctree-l1"><a class="reference internal" href="IP_Adapter.html">IP-Adapter</a></li>
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="HyperDreamBooth.html">HyperDreamBooth</a></li>
-<li class="toctree-l1"><a class="reference internal" href="CM3leon.html">CM3leon</a></li>
-
-<li class="toctree-l1"><a class="reference internal" href="Synthetic_Data_from_Diffusion_Models_Improves_ImageNet_Classification.html">Synthetic Data from Diffusion Models Improves ImageNet Classification</a></li>
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="GLIDE.html">GLIDE</a></li>
-<li class="toctree-l1"><a class="reference internal" href="BBDM.html">BBDM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Your_Diffusion_Model_is_Secretly_a_Zero_Shot_Classifier.html">Your Diffusion Model is Secretly a Zero-Shot Classifier</a></li>
-<li class="toctree-l1"><a class="reference internal" href="progressive_distillation.html">Progressive Distillation for Fast Sampling of Diffusion Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="ConceptLab.html">ConceptLab</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Diffusion_models_already_have_a_Semantic_Latent_Space.html">Diffusion Models already have a Semantic Latent Space</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Muse.html">Muse</a></li>
-
-
-<li class="toctree-l1"><a class="reference internal" href="GIGAGAN.html">Scaling up GANs for Text-to-Image Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="consistency_models.html">Consistency Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="latent_consistency_models.html">Latent Consistency Models</a></li>
-
-<li class="toctree-l1 current active"><a class="current reference internal" href="#">LLM Grounded Diffusion</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DiT.html">DiT</a></li>
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="one-step-image-translation.html">One-Step Image Translation with Text-to-Image Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="LCM-LoRA.html">LCM-LoRA: A Universal Stable-Diffusion Acceleration Module</a></li>
-
-
-<li class="toctree-l1"><a class="reference internal" href="MimicBrush.html">MimicBrush: Zero-shot Image Editing with Reference Imitation</a></li>
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Video Generation</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="Make_A_Video.html">Make A Video</a></li>
-<li class="toctree-l1"><a class="reference internal" href="VideoLDM.html">VideoLDM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="AnimateDiff.html">AnimateDiff</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Animate_Anyone.html">Animate Anyone</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreaMoving.html">DreaMoving</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreamPose.html">DreamPose: Fashion Image-to-Video Synthesis via Stable Diffusion</a></li>
-
-
-
-
-
-
-
-
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">3D Generation</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="NeRF.html">NeRF : Representing Scenes as Neural Radiance Fields for View Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="3DGS.html">3D Gaussian Splatting for Real-Time Radiance Field Rendering</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Point_E.html">Point-E: A System for Generating 3D Point Clouds from Complex Prompts (Arxiv 2022)</a></li>
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="Shap-E.html">Shap-E</a></li>
-
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="DreamFusion.html"><strong>DreamFusion</strong></a></li>
-<li class="toctree-l1"><a class="reference internal" href="magic-3d.html">Magic3D</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreamBooth3D.html">Dream Booth 3D</a></li>
-
-
-
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Experiments</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="../experiments/js_exp.html">Synthetic Data with Stable Diffusion for Foliar Disease Classification</a></li>
-<li class="toctree-l1"><a class="reference internal" href="../experiments/swjo_exp.html">Training DreamBooth on Naver Webtoon Face Dataset</a></li>
-</ul>
-
-    </div>
-</nav></div>
-    </div>
-  
-  
-  <div class="sidebar-primary-items__end sidebar-primary__section">
-  </div>
-  
-  <div id="rtd-footer-container"></div>
-
-
-      </div>
-      
-      <main id="main-content" class="bd-main" role="main">
-        
-        
-
-<div class="sbt-scroll-pixel-helper"></div>
-
-          <div class="bd-content">
-            <div class="bd-article-container">
-              
-              <div class="bd-header-article d-print-none">
-<div class="header-article-items header-article__inner">
-  
-    <div class="header-article-items__start">
-      
-        <div class="header-article-item"><label class="sidebar-toggle primary-toggle btn btn-sm" for="__primary" title="Toggle primary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
-  <span class="fa-solid fa-bars"></span>
-</label></div>
-      
-    </div>
-  
-  
-    <div class="header-article-items__end">
-      
-        <div class="header-article-item">
-
-<div class="article-header-buttons">
-
-
-
-
-
-<div class="dropdown dropdown-source-buttons">
-  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Source repositories">
-    <i class="fab fa-github"></i>
-  </button>
-  <ul class="dropdown-menu">
-      
-      
-      
-      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion" target="_blank"
-   class="btn btn-sm btn-source-repository-button dropdown-item"
-   title="Source repository"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fab fa-github"></i>
-  </span>
-<span class="btn__text-container">Repository</span>
-</a>
-</li>
-      
-      
-      
-      
-      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion/issues/new?title=Issue%20on%20page%20%2Fdocs/review/LLM_grounded_Diffusion.html&body=Your%20issue%20content%20here." target="_blank"
-   class="btn btn-sm btn-source-issues-button dropdown-item"
-   title="Open an issue"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-lightbulb"></i>
-  </span>
-<span class="btn__text-container">Open issue</span>
-</a>
-</li>
-      
-  </ul>
-</div>
-
-
-
-
-
-
-<div class="dropdown dropdown-download-buttons">
-  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Download this page">
-    <i class="fas fa-download"></i>
-  </button>
-  <ul class="dropdown-menu">
-      
-      
-      
-      <li><a href="../../_sources/docs/review/LLM_grounded_Diffusion.md" target="_blank"
-   class="btn btn-sm btn-download-source-button dropdown-item"
-   title="Download source file"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-file"></i>
-  </span>
-<span class="btn__text-container">.md</span>
-</a>
-</li>
-      
-      
-      
-      
-      <li>
-<button onclick="window.print()"
-  class="btn btn-sm btn-download-pdf-button dropdown-item"
-  title="Print to PDF"
-  data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-file-pdf"></i>
-  </span>
-<span class="btn__text-container">.pdf</span>
-</button>
-</li>
-      
-  </ul>
-</div>
-
-
-
-
-<button onclick="toggleFullScreen()"
-  class="btn btn-sm btn-fullscreen-button"
-  title="Fullscreen mode"
-  data-bs-placement="bottom" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-expand"></i>
-  </span>
-
-</button>
-
-
-
-<script>
-document.write(`
-  <button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light"></i>
-    <i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark"></i>
-    <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"></i>
-  </button>
-`);
-</script>
-
-
-<script>
-document.write(`
-  <button class="btn btn-sm pst-navbar-icon search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <i class="fa-solid fa-magnifying-glass fa-lg"></i>
-  </button>
-`);
-</script>
-<label class="sidebar-toggle secondary-toggle btn btn-sm" for="__secondary"title="Toggle secondary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <span class="fa-solid fa-list"></span>
-</label>
-</div></div>
-      
-    </div>
-  
-</div>
-</div>
-              
-              
-
-<div id="jb-print-docs-body" class="onlyprint">
-    <h1>LLM Grounded Diffusion</h1>
-    <!-- Table of contents -->
-    <div id="print-main-content">
-        <div id="jb-print-toc">
-            
-            <div>
-                <h2> Contents </h2>
-            </div>
-            <nav aria-label="Page">
-                <ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#abstract">Abstract</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">Introduction</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#id1">LLM-grounded Diffusion</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#evaluation">Evaluation</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#summary">Summary</a></li>
-</ul>
-            </nav>
-        </div>
-    </div>
-</div>
-
-              
-                
-<div id="searchbox"></div>
-                <article class="bd-article">
-                  
-  <div class="admonition-information admonition">
-<p class="admonition-title">Information</p>
-<ul class="simple">
-<li><p><strong>Title:</strong> LLM-grounded Diffusion: Enhancing Prompt Understanding of Text-to-Image Diffusion Models with Large Language Models</p></li>
-<li><p><strong>Reference</strong></p>
-<ul>
-<li><p>Paper: [<a class="reference external" href="https://arxiv.org/pdf/2305.13655">https://arxiv.org/pdf/2305.13655</a>]</p></li>
-<li><p>Code: <a class="github reference external" href="https://github.com/TonyLianLong/LLM-groundedDiffusion">TonyLianLong/LLM-groundedDiffusion</a></p></li>
-<li><p>Project Page: <a class="reference external" href="https://llm-grounded-diffusion.github.io/">https://llm-grounded-diffusion.github.io/</a></p></li>
-</ul>
-</li>
-<li><p><strong>Author:</strong> Sehwan Park</p></li>
-<li><p><strong>Last updated on May. 24, 2024</strong></p></li>
-</ul>
-</div>
-<section class="tex2jax_ignore mathjax_ignore" id="llm-grounded-diffusion">
-<h1>LLM Grounded Diffusion<a class="headerlink" href="#llm-grounded-diffusion" title="Permalink to this heading">#</a></h1>
-<section id="abstract">
-<h2>Abstract<a class="headerlink" href="#abstract" title="Permalink to this heading">#</a></h2>
-<p>최근의 text-to-image generation 모델을 큰 발전을 이루었다. 하지만 이러한 모델들은 여전히 numeracy와 spatial reasoning을 요구하는 복잡한 프롬프트를 잘 반영하지 못하고 이미지를 생성하는 문제들이 있다. 그래서 본 논문에서는 LLM과 레이아웃 기반 이미지 생성모델을 활용하여 Diffusion model에서 프롬프트 이해 능력을 향상시키는 방법을 제안한다.</p>
-</section>
-<section id="introduction">
-<h2>Introduction<a class="headerlink" href="#introduction" title="Permalink to this heading">#</a></h2>
-<figure class="align-default" id="id2">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/LMD1.png"><img alt="LMD_1" class="bg-primary mb-1" src="../../_images/LMD1.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 515 </span><span class="caption-text">Limitation in Diffusion Model</span><a class="headerlink" href="#id2" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>Diffusion model의 등장과 발전에 따라 T2I(Text to Image) generation 모델은 크게 발전되어왔다. 최근의 SDXL 모델을 보면 상당한 quality의 이미지를 생성하는 것을 볼 수 있다. 그럼에도 불구하고, diffusion model은 복잡한 프롬프트를 잘 반영해서 이미지를 생성하는 것에 취약한 점을 보인다. 위의 figure를 보면 크게 Negation, Generative Numeracy, Attribute Binding, Spatial Relationships에서 큰 문제점을 보임을 알 수 있다.</p>
-<p>가장 단순히 위의 문제를 해결하는 방법은 복잡한 프롬프트가 포함된 대규모의 multi-modal dataset을 가지고 모델을 훈련하는 방법이다. 하지만 이러한 방법은 시간과 리소스 측면에서 좋지못한 면이 있으며, 좋은 quality의 multi-modal dataset을 대규모로 확보하는 것 조차 쉽지 않은 일이다.</p>
-<p>이러한 문제를 피하면서 위의 figure의 대표적인 문제점을 해결하기 위해 본 논문에서는 LLM 및 Layout to Image Generation 모델을 활용하여 training-free 방법으로 접근한다.</p>
-<figure class="align-default" id="id3">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/LMD2.png"><img alt="LMD_2" class="bg-primary mb-1" src="../../_images/LMD2.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 516 </span><span class="caption-text">Overview of LLM-Grounded Diffusion</span><a class="headerlink" href="#id3" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul>
-<li><p>Stage1</p>
-<p>주어진 prompt에 대해서 LLM을 활용하여 표현되어야 할 foreground object에 대한 layout을 생성한다. prompt로 부터 foreground object들을 해당 attribute과 함께 parsing을 하고 reasoning을 통해 올바른 bounding box coordinate을 얻는 것이 목표인 단계이다. LLM의 In-Context Learning(7-shot)을 활용하여 LLM을 Layout Generator로써 활용한다.</p>
-</li>
-<li><p>Stage2</p>
-<p>Stage1으로 부터 각각의 foreground object에 대한 caption과 bounding box coordinate을 기반으로 실제 해당 bounding box에 해당 caption이 생성되도록 하는 단계이다. 이 과정에서 training-free 방법을 적용하기 위해 Stable Diffusion의 inference과정에서 attention map을 manipulate 하는 방법을 통해 Layout to image generation을 가능케한다.</p>
-</li>
-</ul>
-<p>본 논문의 main contribution은 다음과 같다.</p>
-<ol class="arabic simple">
-<li><p>We propose a training-free two-stage generation pipeline that introduces LLMs to improve the prompt understanding ability of text-to-image diffusion models.</p></li>
-<li><p>We introduce layout-grounded Stable Diffusion, a novel controller that steers an off-the-shelf diffusion model to generate images grounded on instance-level box layouts from the LLM.</p></li>
-<li><p>LMD enables instruction-based scene specification and allows broader language support in the prompts.</p></li>
-<li><p>We propose a benchmark to assess the prompt understanding ability of a text-to-image model and demonstrate the superior performance of LMD over recent baselines.</p></li>
-</ol>
-</section>
-<section id="id1">
-<h2>LLM-grounded Diffusion<a class="headerlink" href="#id1" title="Permalink to this heading">#</a></h2>
-<ol class="arabic">
-<li><p>LLM-based Layout Generation</p>
-<p>이 단계는 위의 stage1과 같은 단계로써 LLM을 통해 prompt로 부터 각 foreground object의 caption과 bounding box의 coordinate을 얻는 단계이다.</p>
-<figure class="align-default" id="id4">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/LMD4.png"><img alt="LMD_4" class="bg-primary mb-1" src="../../_images/LMD4.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 517 </span><span class="caption-text">Overview Prompt</span><a class="headerlink" href="#id4" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>우선 이 단계에서의 prompt는 다음과 같이 구성되어 있다. prompt의 가장 윗단인 Instruction을 보면 LLM에게 정보를 주는 말들로 이루어져 있으며 Task specification과 supporting details로 이루어져 있음을 알 수 있다. LLM에게 직접적으로 해야할 일들과 정보를 주는 prompt라고 볼 수 있다.</p>
-<figure class="align-default" id="id5">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/LMD3.png"><img alt="LMD_3" class="bg-primary mb-1" src="../../_images/LMD3.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 518 </span><span class="caption-text">In-context Examples</span><a class="headerlink" href="#id5" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>Overview Prompt의 In-context Examples에 관련된 부분이다. 이 부분을 통해 input prompt로 부터 원하는 형태로 output값을 얻을 수 있다. LLM을 직접 학습시키지 않고 few shot example을 통해 LLM으로 부터 원하는 결과를 이끌어내는 방법을 취한 부분이다. Gpt 3.5같은 경우에는 7-shot을 사용했고, Gpt4 같은 경우는 1-shot만으로도 충분히 원하는 형태로 결과를 얻을 수 있었다고 한다. 결과는 Objects 부분에 나와있다. Foreground Object에 대한 caption값과 해당 bounding box의 coordinate값으로 이루어져 있음을 알 수 있다.</p>
-</li>
-<li><p>Layout-grounded Stable Diffusion</p>
-<p>이 단계에서는 LLM이 생성한 레이아웃을 기반으로 이미지 생성을 하기 위해 컨트롤러를 도입한다. 이전의 training-free 방법을 택했던 work들의 경우, regional denoising 혹은 inference과정에서의 단순한 attention manipulation을 통해 semantic guidance를 적용하지만, 이러한 방법들은 의미 있는 영역 내 객체의 수(generative numeracy)를 제어하는 능력이 부족하다. 이는 다른 인스턴스들간의 구별이 latent space나 attention map에서 어렵기 때문에 발생하며, 이는 인스턴스 수준의 control을 힘들게 하는 요인이다. 반면, LMD(LLM-grounded Diffusion)는 각 개별 경계 상자에 대해 마스킹된 잠재 변수를 먼저 생성하고, 이러한 마스킹된 잠재 변수를 prior로 사용하여 전체 이미지 생성을 안내함으로써 인스턴스 수준의 control이 가능토록 한다. 이를통해 각 객체 인스턴스의 정확한 배치와 속성 결합을 허용한다.</p>
-<figure class="align-default" id="id6">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/LMD7.png"><img alt="LMD_7" class="bg-primary mb-1" src="../../_images/LMD7.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 519 </span><span class="caption-text">Overall image generation with masked latents as priors</span><a class="headerlink" href="#id6" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>Stage2는 크게 step1,2로 나눠진다. step1의 경우에는 stage1에서 구한 각각의 box들에 대해서 box내의 object들에 대한 latent map을 구하는 과정이다. stage2는 stage1에서 구한 각 box에 대한 latent map을 compose하여 전체적인 image를 생성하는 과정이다.</p>
-<ul>
-<li><p>step1</p>
-<p>예를 들어, 위의 그림처럼 gray cat에 관한 bounding box가 stage1을 통해 구해졌다면, box안에 gray cat이 그려지도록 유도를 할 수 있을 것이다. 그러는 과정에서 저 box에 gray cat이 생성되도록 하는 마치 Ground Truth latent map을 얻을 수 있게 된다. 모든 object들에 대해 이러한 방법으로 denoising 과정에서 모든 step의 latent map을 구하는 것이다.  모든 단일 box에 대해 그 box에 해당 caption object가 생성되도록 하는 GT latent map을 구하는 게 step1의 과정이라고 생각하면 된다. 사실 상 LMD의 핵심은 step1에서 진행된다고 볼 수 있다.</p>
-<p>크게 보자면, 각 foreground object에 대해서 <span class="math notranslate nohighlight">\(Z_T\)</span> 부터 <span class="math notranslate nohighlight">\(Z_0\)</span>까지 denoising을 거치면서 모든 step t에 대한 latent map을 구하면 된다. 이 때, 가장 중요한 것은 foreground object가 실제 box안에서 잘 생성되도록 하는것이 선행이 되어야 한다. 이를 유도하기 위해 저자들은 각 denoising step마다 box내부에 해당 foreground object가 생성되도록 attention manipulation을 진행한다.</p>
-<div class="math notranslate nohighlight">
-\[
-     A_{uv}^{(i)} = Softmax(q_u^TK_v)
-     \]</div>
-<p>식(1)과 같이 pixel값들과 prompt내에서의 text token 간의 cross-attention map을 나타낼 수 있다. u는 이미지 내의 모든 각 pixel들을 의미하고 v는 각 text token을 의미한다.</p>
-<p>구성되는 프롬프트에 대해 예시를 들어 정리하자면, 전체 프롬프트가 “A realistic photo of a gray cat and an orange dog on the grass” 였다고 하자. 그러면 각 foreground object에 대해서 프롬프트를 따로 생성한다. “[background prompt] with [box caption]” (e.g., “a realistic image of an indoor scene with a gray cat”) 형태로 각 foreground object에 대한 프롬프트를 구성한다.</p>
-<figure class="align-default" id="id7">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/LMD6.png"><img alt="LMD_6" class="bg-primary mb-1" src="../../_images/LMD6.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 520 </span><span class="caption-text">Overview of attention manipulation</span><a class="headerlink" href="#id7" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>Object와 Bounding box를 align하기 위해서는 위의 그림에서 보이듯이 box 내부 pixel의 object에 관한 token과의 attention은 증가되고 다른 token과의 attention은 감소되면 된다. 위의 그림을 예시로 보면 “a gray cat”이라는 token과는 box내부 pixel들이 attention이 증가하고, 다른 token과는 attention이 감소하도록 유도되면 된다. 이 방법을 유도 하기 위해 본 논문의 저자들은 energy function을 사용한다.</p>
-<div class="math notranslate nohighlight">
-\[
-     E(A_i, i, v) = -\text{Topk}_u (A_{uv} \cdot b^{(i)}) + \omega \text{Topk}_u (A_{uv} \cdot (1 - b^{(i)}))
-     \]</div>
-<p>식(2)의 Energy function을 보면 foreground object i에 대한 token v가 정해져 있을 때, box 내부 pixel이 token v와의 attention값이 크고 box 외부 pixel이 token v와의 attention값이 작으면 Energy function값이 최소가 됨을 알 수 있다. 정리하자면 위의 Energy function값이 최대한 작아지면 box 내부의 pixel들의 object token에 대한 attention이 증가되고, box와 foreground object간의 align이 잘 이루어지게 되는 것이다.</p>
-<div class="math notranslate nohighlight">
-\[
-     z^{(i)}_t \leftarrow z^{(i)}_t - \eta \nabla z^{(i)}_t \sum_{v \in V_i} E(A^{(i)}, i, v)
-     \]</div>
-<div class="math notranslate nohighlight">
-\[
-     z^{(i)}_{t-1} \leftarrow \text{Denoise}(z^{(i)}_t)
-     \]</div>
-<p>Energy function이 최소가 되도록 하기 위해  본 논문은 식(3)과 같은 optimize과정을 거친다. 방법은 단순하다. Gradient Descent를 생각해보면 parameter로 loss function을 표현하고 loss function이 최소가 되도록 parameter들을 optimize하면서 유도한다. 이 방법 역시 같다. Energy function을 최소로 만드는 것이 목표이기에 이는 loss function과 같은 역할을 하게 된다. 그리고 Energy function의 식들은 <span class="math notranslate nohighlight">\(z_t\)</span>에 대해 표현이 가능하기에, Energy function을 통해 z_t를 optimize하여 Energy function이 최소화가 되도록 유도한다. Energy function 최소화는 각 denoising timestep마다 5회 반복되며, denoising 단계가 다섯 번 진행될 때마다 선형적으로 감소하여 반복 횟수가 1회로 줄어든다. 또한, 30step 후에는 guidance를 수행하지 않는다.</p>
-<p>guidance가 진행되면서 denoising이 다 끝나고 나면 모든 step에 대한 latent map을 얻을 수 있게 되고, attention map을 얻을 수 있게 된다. 이 때 attention map을 SAM을 통해 segment를 진행하거나 threshold값을 설정하여 Foreground mask를 구하게 된다. 이후에 구한 모든 time step에 대한  latent map에 Foreground Mask와 pixel wise곱을 해주어서 masked latent를 구하게 된다.</p>
-</li>
-<li><p>step2</p>
-<p>step2에서는 step1에서 구한 각 foreground object들을 compose하여 처음에 주어진 prompt에 대한 이미지를 잘 생성하는 것이 목표인 단계이다. 기존 work에 의하면 diffusion model은 denoising의 초기 단계에서 semantic한 정보들을 생성하며, 이후 단계에서부터 fine-detail한 부분을 생성한다. 이 점을 이용해서 compose를 하는 step2에서는 단순히 latent map을 compose한다음 denoising을 진행하는 것이 아니라, step의 절반 지점까지는 latent map을 compose하면서 step1과 마찬가지로 optimize를 진행하며 foreground object들이 실제로 원하는 위치에 생성되도록 유도를 하게 된다. 이후부터는 fine-detail한 부분을 생성하여 자연스러운 이미지를 만들기 위해 compose나 optimize를 하지 않고 이미지를 생성하게 된다. Compose관련 식과 optimize를 위한 step2에서의 Energy function은 아래와 같다.</p>
-<div class="math notranslate nohighlight">
-\[
-     z^{(\text{comp})}_t \leftarrow \text{LatentCompose}(z^{(\text{comp})}_t, \hat{z}^{(i)}_t, m^{(i)}) \quad \forall i
-     \]</div>
-<div class="math notranslate nohighlight">
-\[
-     E^{(\text{comp})}(A^{(\text{comp})}, A^{(i)}, i, v) = E(A^{(\text{comp})}, i, v) + \lambda \sum_{u \in V'_i} \left| A^{(\text{comp})}_{uv} - A^{(i)}_{uv} \right|
-     \]</div>
-<p>최종적으로 denoising이 다 진행된 이후, decoder를 통해 latent space에서 pixel space로 변환되어 최종 이미지가 생성된다.</p>
-</li>
-</ul>
-</li>
-</ol>
-</section>
-<section id="evaluation">
-<h2>Evaluation<a class="headerlink" href="#evaluation" title="Permalink to this heading">#</a></h2>
-<p>Evaluation 같은 경우는 Qualitive한 결과와 Quantitive한 결과로 나누어서 보여준다. Introduction에서 소개한 기존 T2I model의 문제점 4가지를 잘 해결하고자 하는 것이 이 work의 목표이었기에 4가지 항목에 대해 평가를 진행한다. 데이터 같은 경우는 Negation, Attribute Binding, Generative Numeracy, Spatial Relationships를 각각 평가하기 위한 100개씩의 데이터를 수집해서 진행을 하였다고 한다. Evaluation metric이 존재하진 않고 Quantitive한 결과를 위해서 OWL-VIT detector를 활용하여 object들에 대한 bounding box값을 얻은 후에, 생성된 이미지가 prompt에 잘 맞게 생성되어있는지를 확인하였다고 한다. 추가적으로 Ablation study에 대한 다양한 결과들도 포함되어 있다.</p>
-<figure class="align-default" id="id8">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/LMD8.png"><img alt="LMD_8" class="bg-primary mb-1" src="../../_images/LMD8.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 521 </span><span class="caption-text">Qualititive Results1 of LMD</span><a class="headerlink" href="#id8" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<figure class="align-default" id="id9">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/LMD13.png"><img alt="LMD_13" class="bg-primary mb-1" src="../../_images/LMD13.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 522 </span><span class="caption-text">Qualititive Results2 of LMD</span><a class="headerlink" href="#id9" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>기존 Stable Diffusion XL 모델과 비교하여 LMD가 얼마나 prompt에 잘 대응하는 이미지를 생성하는지를 보여주는 결과이다. Introduction에서 언급했듯이 기존 SDXL모델은 이미지의 전반적인 부분은 잘 생성하지만 numarcy, spatial relationship, attribute matching등을 잘 만족시키지 못하는 경우가 발생한다. 하지만 LMD는 이러한 문제점들을 잘 해결하고 있는 것으로 보인다.</p>
-<figure class="align-default" id="id10">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/LMD9.png"><img alt="LMD_9" class="bg-primary mb-1" src="../../_images/LMD9.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 523 </span><span class="caption-text">Quantitive Results of LMD &amp; LMD+</span><a class="headerlink" href="#id10" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>위의 result는 기존 Stable Diffusion과 이 논문에서 제시하는 LMD 방법 그리고 기존 layout to image generation model인 GLIGEN의 adapter를 가져와서 통합한 방법인 LMD+ 세가지를 비교한다. LMD &amp; LMD+ 모두 기존 SD에 비해 월등한 결과를 보임을 알 수 있다.</p>
-<figure class="align-default" id="id11">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/LMD11.png"><img alt="LMD11" class="bg-primary mb-1" src="../../_images/LMD11.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 524 </span><span class="caption-text">Ablation of LMD &amp; LMD+</span><a class="headerlink" href="#id11" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>Ablation study의 결과를 보면, 앞선 결과와 마찬가지로 LMD가 훨씬 더 좋은 성능임을 알 수 있다. LMD의 Baseline model로 SDv1.5, SDv2.1을 각각 써본 결과 둘의 차이는 거의 없었다고 한다. 또한 Foreground mask를 구하기 위해 SAM을 사용했을 경우와 SAM을 사용하지 않고 Threshold를 사용해서 구한 결과도 보여주었는데 이는 LMD일때와 LMD+일때가 서로 다른 결과를 보인다. LMD에서는 attention기반의 guidance가 layout box와 관련하여 공간적으로 정확하지 않기 때문에, SAM은 객체를 커버하는 올바른 마스크를 얻는 데 도움을 준다. 따라서 SAM을 제거하면 LMD에서 약간의 성능 저하가 발생하게 된다. 반면에 LMD+에서는 기존 잘 훈련된  GLIGEN 모델을 가져와 이용하기 때문에 대부분의 경우 SAM이 필요하지 않게된다. 오히려, SAM은 때때로 배경을 포함하는 영역을 선택하여 혼란을 일으키고 성능을 저하시키게 되어 SAM을 제거하면 LMD+에서는 결과가 오히려 개선되는 효과가 있었다고 한다.</p>
-<figure class="align-default" id="id12">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/LMD10.png"><img alt="LMD10" class="bg-primary mb-1" src="../../_images/LMD10.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 525 </span><span class="caption-text">Ablations on layout-to-image methods as stage 2 with LMD’s LLM layout generator as stage 1</span><a class="headerlink" href="#id12" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>위의 결과는 Stage1과 Stage2에 어떤 방법을 가져왔느냐에 따른 전체적인 Quantative한 결과를 보여준다. 우선 Stage1 즉, object에 대한 개별 bounding box를 생성하는 부분에 대해서는 이 논문의 LMD기법을 다 사용을 하였다. 생성된 box를 기반으로 이미지를 생성하는 Stage 2에 대해서 training-free 방법 즉, guidance를 주는 방법을 사용한 경우와 기존 pretrain된 layout to image generation model, 여기에 LMD기법을 추가적으로 더한 방법인 training-based 방법을 나누어서 비교한다. 우선 training-free 방법의 경우, 다른 guidance 기법에 비해 이 논문에서 제시한 stage2의 방법이 훨씬 더 뛰어난 성능을 보임을 알 수 있다. training-based 방법의 경우, 기존 layout to image generation model인 GLIGEN을 그대로 가져온 경우, GLIGEN에 이 논문의 stage2 기법을 적용한 LMD+, LMD+ 에서 GPT3.5대신 GPT4를 사용한 경우로 나누어서 비교를 한다. 사실상 LMD+가 가장 좋은 성능을 보임을 알 수 있다. GPT 버전을 GPT4로 바꾼 경우는 Numeracy에서는 성능이 살짝 감소하지만, 다른 부분에 대해서는 성능이 좀 더 많이 증가한 모습을 확인할 수 있다.</p>
-<figure class="align-default" id="id13">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/LMD12.png"><img alt="LMD10" class="bg-primary mb-1" src="../../_images/LMD12.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 526 </span><span class="caption-text">Ablations on GPT version + Compare with SD</span><a class="headerlink" href="#id13" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>GPT같은 경우 GPT3.5-turbo를 사용한 경우와 GPT4를 사용한 경우를 나누어서 실험을 진행하였다. GPT4의 경우 1-shot으로 진행을 해도 layout을 잘 만드는 모습을 보였고, GPT3.5의 경우 7shots으로 진행을 해야 완벽하게 layout을 생성하는 결과를 보임을 알 수 있다.</p>
-<figure class="align-default" id="id14">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/LMD14.png"><img alt="LMD14" class="bg-primary mb-1" src="../../_images/LMD14.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 527 </span><span class="caption-text">Ablations on different LLM and same LLM with different size</span><a class="headerlink" href="#id14" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>서로 다른 LLM을 사용해서 진행을 해본 결과, 다른 open-source model에 비해 gpt4가 확실히 좋은 성능을 보였음을 알 수 있다. 또한 똑같은 open-source model을 다른 크기로 적용해본결과 더 큰 규모의 model이 더 좋은 성능을 보임을 알 수 있다.</p>
-</section>
-<section id="summary">
-<h2>Summary<a class="headerlink" href="#summary" title="Permalink to this heading">#</a></h2>
-<p>LMD는 text-to-image generation diffusion model이 prompt를 더 잘 반영해서 이미지를 생성할 수 있도록 한 방법이다. 추가적인 Training을 하지 않고 prompt를 더 잘이해할 수 있도록 기존 Text to Image에서 intermediate representation인 layout을 활용한 방법이 특징이라고 할 수 있다. 두 가지 stage로 나눠 LLM-based text-grounded layout generation과 layout-grounded image generation을 통해 문제를 해결하고자 한 논문이다.</p>
-</section>
-</section>
-
-    <script type="text/x-thebe-config">
-    {
-        requestKernel: true,
-        binderOptions: {
-            repo: "binder-examples/jupyter-stacks-datascience",
-            ref: "master",
-        },
-        codeMirrorConfig: {
-            theme: "abcdef",
-            mode: "python"
-        },
-        kernelOptions: {
-            name: "python3",
-            path: "./docs/review"
-        },
-        predefinedOutput: true
-    }
-    </script>
-    <script>kernelName = 'python3'</script>
-
-                </article>
-              
-
-              
-              
-              
-              
-                <footer class="prev-next-footer d-print-none">
-                  
-<div class="prev-next-area">
-    <a class="left-prev"
-       href="latent_consistency_models.html"
-       title="previous page">
-      <i class="fa-solid fa-angle-left"></i>
-      <div class="prev-next-info">
-        <p class="prev-next-subtitle">previous</p>
-        <p class="prev-next-title">Latent Consistency Models</p>
-      </div>
-    </a>
-    <a class="right-next"
-       href="DiT.html"
-       title="next page">
-      <div class="prev-next-info">
-        <p class="prev-next-subtitle">next</p>
-        <p class="prev-next-title">DiT</p>
-      </div>
-      <i class="fa-solid fa-angle-right"></i>
-    </a>
-</div>
-                </footer>
-              
-            </div>
-            
-            
-              
-                <div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
-
-
-  <div class="sidebar-secondary-item">
-  <div class="page-toc tocsection onthispage">
-    <i class="fa-solid fa-list"></i> Contents
-  </div>
-  <nav class="bd-toc-nav page-toc">
-    <ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#abstract">Abstract</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">Introduction</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#id1">LLM-grounded Diffusion</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#evaluation">Evaluation</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#summary">Summary</a></li>
-</ul>
-  </nav></div>
-
-</div></div>
-              
-            
-          </div>
-          <footer class="bd-footer-content">
-            
-<div class="bd-footer-content__inner container">
-  
-  <div class="footer-item">
-    
-<p class="component-author">
-By PseudoLab
-</p>
-
-  </div>
-  
-  <div class="footer-item">
-    
-
-  <p class="copyright">
-    
-      © Copyright 2022.
-      <br/>
-    
-  </p>
-
-  </div>
-  
-  <div class="footer-item">
-    
-  </div>
-  
-  <div class="footer-item">
-    
-  </div>
-  
-</div>
-          </footer>
-        
-
-      </main>
-    </div>
-  </div>
-  
-  <!-- Scripts loaded after <body> so the DOM is not blocked -->
-  <script src="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b"></script>
-<script src="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b"></script>
-
-  <footer class="bd-footer">
-  </footer>
-  </body>
+
+
+<!DOCTYPE html>
+
+
+<html lang="en" data-content_root="" >
+
+  <head>
+    <meta charset="utf-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.17.1: http://docutils.sourceforge.net/" />
+
+    <title>LLM Grounded Diffusion &#8212; Text-to-Image Generation-feat-Diffusion</title>
+  
+  
+  
+  <script data-cfasync="false">
+    document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
+    document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
+  </script>
+  
+  <!-- Loaded before other Sphinx assets -->
+  <link href="../../_static/styles/theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+<link href="../../_static/styles/bootstrap.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+<link href="../../_static/styles/pydata-sphinx-theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+
+  
+  <link href="../../_static/vendor/fontawesome/6.5.2/css/all.min.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+  <link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.woff2" />
+<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.woff2" />
+<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.woff2" />
+
+    <link rel="stylesheet" type="text/css" href="../../_static/pygments.css" />
+    <link rel="stylesheet" href="../../_static/styles/sphinx-book-theme.css?digest=14f4ca6b54d191a8c7657f6c759bf11a5fb86285" type="text/css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/togglebutton.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/mystnb.4510f1fc1dee50b3e5859aac5469c37c29e427902b24a333a5f9fcb2f0b3ac41.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/sphinx-thebe.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/design-style.4045f2051d55cab465a707391d5b2007.min.css" />
+  
+  <!-- Pre-loaded scripts that we'll load fully later -->
+  <link rel="preload" as="script" href="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b" />
+<link rel="preload" as="script" href="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b" />
+  <script src="../../_static/vendor/fontawesome/6.5.2/js/all.min.js?digest=dfe6caa3a7d634c4db9b"></script>
+
+    <script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
+    <script src="../../_static/jquery.js"></script>
+    <script src="../../_static/underscore.js"></script>
+    <script src="../../_static/_sphinx_javascript_frameworks_compat.js"></script>
+    <script src="../../_static/doctools.js"></script>
+    <script src="../../_static/clipboard.min.js"></script>
+    <script src="../../_static/copybutton.js"></script>
+    <script src="../../_static/scripts/sphinx-book-theme.js?digest=5a5c038af52cf7bc1a1ec88eea08e6366ee68824"></script>
+    <script>let toggleHintShow = 'Click to show';</script>
+    <script>let toggleHintHide = 'Click to hide';</script>
+    <script>let toggleOpenOnPrint = 'true';</script>
+    <script src="../../_static/togglebutton.js"></script>
+    <script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
+    <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
+    <script src="../../_static/design-tabs.js"></script>
+    <script>const THEBE_JS_URL = "https://unpkg.com/thebe@0.8.2/lib/index.js"
+const thebe_selector = ".thebe,.cell"
+const thebe_selector_input = "pre"
+const thebe_selector_output = ".output, .cell_output"
+</script>
+    <script async="async" src="../../_static/sphinx-thebe.js"></script>
+    <script>window.MathJax = {"options": {"processHtmlClass": "tex2jax_process|mathjax_process|math|output_area"}}</script>
+    <script defer="defer" src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
+    <script>DOCUMENTATION_OPTIONS.pagename = 'docs/review/LLM_grounded_Diffusion';</script>
+    <link rel="shortcut icon" href="../../_static/PseudoLab_logo.png"/>
+    <link rel="index" title="Index" href="../../genindex.html" />
+    <link rel="search" title="Search" href="../../search.html" />
+    <link rel="next" title="DiT" href="DiT.html" />
+    <link rel="prev" title="Latent Consistency Models" href="latent_consistency_models.html" />
+  <meta name="viewport" content="width=device-width, initial-scale=1"/>
+  <meta name="docsearch:language" content="en"/>
+  </head>
+  
+  
+  <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
+
+  
+  
+  <div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
+  
+  <div id="pst-scroll-pixel-helper"></div>
+  
+  <button type="button" class="btn rounded-pill" id="pst-back-to-top">
+    <i class="fa-solid fa-arrow-up"></i>Back to top</button>
+
+  
+  <input type="checkbox"
+          class="sidebar-toggle"
+          id="pst-primary-sidebar-checkbox"/>
+  <label class="overlay overlay-primary" for="pst-primary-sidebar-checkbox"></label>
+  
+  <input type="checkbox"
+          class="sidebar-toggle"
+          id="pst-secondary-sidebar-checkbox"/>
+  <label class="overlay overlay-secondary" for="pst-secondary-sidebar-checkbox"></label>
+  
+  <div class="search-button__wrapper">
+    <div class="search-button__overlay"></div>
+    <div class="search-button__search-container">
+<form class="bd-search d-flex align-items-center"
+      action="../../search.html"
+      method="get">
+  <i class="fa-solid fa-magnifying-glass"></i>
+  <input type="search"
+         class="form-control"
+         name="q"
+         id="search-input"
+         placeholder="Search this book..."
+         aria-label="Search this book..."
+         autocomplete="off"
+         autocorrect="off"
+         autocapitalize="off"
+         spellcheck="false"/>
+  <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
+</form></div>
+  </div>
+
+  <div class="pst-async-banner-revealer d-none">
+  <aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
+</div>
+
+  
+    <header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
+    </header>
+  
+
+  <div class="bd-container">
+    <div class="bd-container__inner bd-page-width">
+      
+      
+      
+      <div class="bd-sidebar-primary bd-sidebar">
+        
+
+  
+  <div class="sidebar-header-items sidebar-primary__section">
+    
+    
+    
+    
+  </div>
+  
+    <div class="sidebar-primary-items__start sidebar-primary__section">
+        <div class="sidebar-primary-item">
+
+  
+    
+  
+
+<a class="navbar-brand logo" href="../../intro.html">
+  
+  
+  
+  
+  
+    
+    
+      
+    
+    
+    <img src="../../_static/PseudoLab_logo.png" class="logo__image only-light" alt="Text-to-Image Generation-feat-Diffusion - Home"/>
+    <script>document.write(`<img src="../../_static/PseudoLab_logo.png" class="logo__image only-dark" alt="Text-to-Image Generation-feat-Diffusion - Home"/>`);</script>
+  
+  
+</a></div>
+        <div class="sidebar-primary-item"><nav class="bd-links" id="bd-docs-nav" aria-label="Main">
+    <div class="bd-toc-item navbar-nav active">
+        
+        <ul class="nav bd-sidenav bd-sidenav__home-link">
+            <li class="toctree-l1">
+                <a class="reference internal" href="../../intro.html">
+                    Welcome to PseudoDiffusers!!
+                </a>
+            </li>
+        </ul>
+        <p aria-level="2" class="caption" role="heading"><span class="caption-text">Preliminary Works</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="vae.html">VAE</a></li>
+<li class="toctree-l1"><a class="reference internal" href="gan.html">GAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DDPM.html">DDPM</a></li>
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DDIM.html">DDIM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="A_Study_on_the_Evaluation_of_Generative_Models.html">A Study on the Evaluation of Generative Models</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Image Generation</span></p>
+<ul class="current nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="cycleGAN.html">CycleGAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="StyleGAN.html">StyleGAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="diffusion_beats_GANs.html">Diffusion Models Beat GANs on Image Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="dalle.html">DALL-E</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DALLE2.html">DALL-E 2</a></li>
+<li class="toctree-l1"><a class="reference internal" href="dreambooth.html">DreamBooth</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ControlNet.html">ControlNet</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Latent_Diffusion_Model.html">Introduction</a></li>
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Textual_Inversion.html">Textual Inversion</a></li>
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="CustomDiffusion.html">Custom Diffusion</a></li>
+<li class="toctree-l1"><a class="reference internal" href="LoRA.html">LoRA</a></li>
+
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="I-DDPM.html">I-DDPM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="StyO.html">StyO</a></li>
+<li class="toctree-l1"><a class="reference internal" href="imagen.html">Imagen</a></li>
+<li class="toctree-l1"><a class="reference internal" href="imagen_editor.html">Imagen Editor</a></li>
+<li class="toctree-l1"><a class="reference internal" href="SDEdit.html">SDEdit</a></li>
+<li class="toctree-l1"><a class="reference internal" href="SDXL.html">SDXL</a></li>
+<li class="toctree-l1"><a class="reference internal" href="t2i_adapter.html">T2I-Adapter</a></li>
+<li class="toctree-l1"><a class="reference internal" href="IP_Adapter.html">IP-Adapter</a></li>
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="HyperDreamBooth.html">HyperDreamBooth</a></li>
+<li class="toctree-l1"><a class="reference internal" href="CM3leon.html">CM3leon</a></li>
+
+<li class="toctree-l1"><a class="reference internal" href="Synthetic_Data_from_Diffusion_Models_Improves_ImageNet_Classification.html">Synthetic Data from Diffusion Models Improves ImageNet Classification</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="GLIDE.html">GLIDE</a></li>
+<li class="toctree-l1"><a class="reference internal" href="BBDM.html">BBDM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Your_Diffusion_Model_is_Secretly_a_Zero_Shot_Classifier.html">Your Diffusion Model is Secretly a Zero-Shot Classifier</a></li>
+<li class="toctree-l1"><a class="reference internal" href="progressive_distillation.html">Progressive Distillation for Fast Sampling of Diffusion Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ConceptLab.html">ConceptLab</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Diffusion_models_already_have_a_Semantic_Latent_Space.html">Diffusion Models already have a Semantic Latent Space</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Muse.html">Muse</a></li>
+
+
+<li class="toctree-l1"><a class="reference internal" href="GIGAGAN.html">Scaling up GANs for Text-to-Image Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="consistency_models.html">Consistency Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="latent_consistency_models.html">Latent Consistency Models</a></li>
+
+<li class="toctree-l1 current active"><a class="current reference internal" href="#">LLM Grounded Diffusion</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DiT.html">DiT</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="one-step-image-translation.html">One-Step Image Translation with Text-to-Image Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="LCM-LoRA.html">LCM-LoRA: A Universal Stable-Diffusion Acceleration Module</a></li>
+
+
+<li class="toctree-l1"><a class="reference internal" href="MimicBrush.html">MimicBrush: Zero-shot Image Editing with Reference Imitation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="one_step_diffusion_with_distribution_matching_distillation.html">One-step Diffusion with Distribution Matching Distillation</a></li>
+
+
+
+
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Video Generation</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="Make_A_Video.html">Make A Video</a></li>
+<li class="toctree-l1"><a class="reference internal" href="VideoLDM.html">VideoLDM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="AnimateDiff.html">AnimateDiff</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Animate_Anyone.html">Animate Anyone</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreaMoving.html">DreaMoving</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreamPose.html">DreamPose: Fashion Image-to-Video Synthesis via Stable Diffusion</a></li>
+
+
+
+
+
+
+
+
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">3D Generation</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="NeRF.html">NeRF : Representing Scenes as Neural Radiance Fields for View Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="3DGS.html">3D Gaussian Splatting for Real-Time Radiance Field Rendering</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Point_E.html">Point-E: A System for Generating 3D Point Clouds from Complex Prompts (Arxiv 2022)</a></li>
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Shap-E.html">Shap-E</a></li>
+
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DreamFusion.html"><strong>DreamFusion</strong></a></li>
+<li class="toctree-l1"><a class="reference internal" href="magic-3d.html">Magic3D</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreamBooth3D.html">Dream Booth 3D</a></li>
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="zero123plus.html">Zero123++</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ProlificDreamer.html">ProlificDreamer: High-Fidelity and Diverse Text-to-3D Generation with Variational Score Distillation</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DreamGaussian.html">DreamGaussian</a></li>
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Coin3D.html">Coin3D</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Experiments</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="../experiments/js_exp.html">Synthetic Data with Stable Diffusion for Foliar Disease Classification</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../experiments/swjo_exp.html">Training DreamBooth on Naver Webtoon Face Dataset</a></li>
+</ul>
+
+    </div>
+</nav></div>
+    </div>
+  
+  
+  <div class="sidebar-primary-items__end sidebar-primary__section">
+  </div>
+  
+  <div id="rtd-footer-container"></div>
+
+
+      </div>
+      
+      <main id="main-content" class="bd-main" role="main">
+        
+        
+
+<div class="sbt-scroll-pixel-helper"></div>
+
+          <div class="bd-content">
+            <div class="bd-article-container">
+              
+              <div class="bd-header-article d-print-none">
+<div class="header-article-items header-article__inner">
+  
+    <div class="header-article-items__start">
+      
+        <div class="header-article-item"><label class="sidebar-toggle primary-toggle btn btn-sm" for="__primary" title="Toggle primary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
+  <span class="fa-solid fa-bars"></span>
+</label></div>
+      
+    </div>
+  
+  
+    <div class="header-article-items__end">
+      
+        <div class="header-article-item">
+
+<div class="article-header-buttons">
+
+
+
+
+
+<div class="dropdown dropdown-source-buttons">
+  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Source repositories">
+    <i class="fab fa-github"></i>
+  </button>
+  <ul class="dropdown-menu">
+      
+      
+      
+      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion" target="_blank"
+   class="btn btn-sm btn-source-repository-button dropdown-item"
+   title="Source repository"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fab fa-github"></i>
+  </span>
+<span class="btn__text-container">Repository</span>
+</a>
+</li>
+      
+      
+      
+      
+      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion/issues/new?title=Issue%20on%20page%20%2Fdocs/review/LLM_grounded_Diffusion.html&body=Your%20issue%20content%20here." target="_blank"
+   class="btn btn-sm btn-source-issues-button dropdown-item"
+   title="Open an issue"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-lightbulb"></i>
+  </span>
+<span class="btn__text-container">Open issue</span>
+</a>
+</li>
+      
+  </ul>
+</div>
+
+
+
+
+
+
+<div class="dropdown dropdown-download-buttons">
+  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Download this page">
+    <i class="fas fa-download"></i>
+  </button>
+  <ul class="dropdown-menu">
+      
+      
+      
+      <li><a href="../../_sources/docs/review/LLM_grounded_Diffusion.md" target="_blank"
+   class="btn btn-sm btn-download-source-button dropdown-item"
+   title="Download source file"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-file"></i>
+  </span>
+<span class="btn__text-container">.md</span>
+</a>
+</li>
+      
+      
+      
+      
+      <li>
+<button onclick="window.print()"
+  class="btn btn-sm btn-download-pdf-button dropdown-item"
+  title="Print to PDF"
+  data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-file-pdf"></i>
+  </span>
+<span class="btn__text-container">.pdf</span>
+</button>
+</li>
+      
+  </ul>
+</div>
+
+
+
+
+<button onclick="toggleFullScreen()"
+  class="btn btn-sm btn-fullscreen-button"
+  title="Fullscreen mode"
+  data-bs-placement="bottom" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-expand"></i>
+  </span>
+
+</button>
+
+
+
+<script>
+document.write(`
+  <button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light"></i>
+    <i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark"></i>
+    <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"></i>
+  </button>
+`);
+</script>
+
+
+<script>
+document.write(`
+  <button class="btn btn-sm pst-navbar-icon search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <i class="fa-solid fa-magnifying-glass fa-lg"></i>
+  </button>
+`);
+</script>
+<label class="sidebar-toggle secondary-toggle btn btn-sm" for="__secondary"title="Toggle secondary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <span class="fa-solid fa-list"></span>
+</label>
+</div></div>
+      
+    </div>
+  
+</div>
+</div>
+              
+              
+
+<div id="jb-print-docs-body" class="onlyprint">
+    <h1>LLM Grounded Diffusion</h1>
+    <!-- Table of contents -->
+    <div id="print-main-content">
+        <div id="jb-print-toc">
+            
+            <div>
+                <h2> Contents </h2>
+            </div>
+            <nav aria-label="Page">
+                <ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#abstract">Abstract</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">Introduction</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#id1">LLM-grounded Diffusion</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#evaluation">Evaluation</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#summary">Summary</a></li>
+</ul>
+            </nav>
+        </div>
+    </div>
+</div>
+
+              
+                
+<div id="searchbox"></div>
+                <article class="bd-article">
+                  
+  <div class="admonition-information admonition">
+<p class="admonition-title">Information</p>
+<ul class="simple">
+<li><p><strong>Title:</strong> LLM-grounded Diffusion: Enhancing Prompt Understanding of Text-to-Image Diffusion Models with Large Language Models</p></li>
+<li><p><strong>Reference</strong></p>
+<ul>
+<li><p>Paper: [<a class="reference external" href="https://arxiv.org/pdf/2305.13655">https://arxiv.org/pdf/2305.13655</a>]</p></li>
+<li><p>Code: <a class="github reference external" href="https://github.com/TonyLianLong/LLM-groundedDiffusion">TonyLianLong/LLM-groundedDiffusion</a></p></li>
+<li><p>Project Page: <a class="reference external" href="https://llm-grounded-diffusion.github.io/">https://llm-grounded-diffusion.github.io/</a></p></li>
+</ul>
+</li>
+<li><p><strong>Author:</strong> Sehwan Park</p></li>
+<li><p><strong>Last updated on May. 24, 2024</strong></p></li>
+</ul>
+</div>
+<section class="tex2jax_ignore mathjax_ignore" id="llm-grounded-diffusion">
+<h1>LLM Grounded Diffusion<a class="headerlink" href="#llm-grounded-diffusion" title="Permalink to this heading">#</a></h1>
+<section id="abstract">
+<h2>Abstract<a class="headerlink" href="#abstract" title="Permalink to this heading">#</a></h2>
+<p>최근의 text-to-image generation 모델을 큰 발전을 이루었다. 하지만 이러한 모델들은 여전히 numeracy와 spatial reasoning을 요구하는 복잡한 프롬프트를 잘 반영하지 못하고 이미지를 생성하는 문제들이 있다. 그래서 본 논문에서는 LLM과 레이아웃 기반 이미지 생성모델을 활용하여 Diffusion model에서 프롬프트 이해 능력을 향상시키는 방법을 제안한다.</p>
+</section>
+<section id="introduction">
+<h2>Introduction<a class="headerlink" href="#introduction" title="Permalink to this heading">#</a></h2>
+<figure class="align-default" id="id2">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/LMD1.png"><img alt="LMD_1" class="bg-primary mb-1" src="../../_images/LMD1.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 515 </span><span class="caption-text">Limitation in Diffusion Model</span><a class="headerlink" href="#id2" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>Diffusion model의 등장과 발전에 따라 T2I(Text to Image) generation 모델은 크게 발전되어왔다. 최근의 SDXL 모델을 보면 상당한 quality의 이미지를 생성하는 것을 볼 수 있다. 그럼에도 불구하고, diffusion model은 복잡한 프롬프트를 잘 반영해서 이미지를 생성하는 것에 취약한 점을 보인다. 위의 figure를 보면 크게 Negation, Generative Numeracy, Attribute Binding, Spatial Relationships에서 큰 문제점을 보임을 알 수 있다.</p>
+<p>가장 단순히 위의 문제를 해결하는 방법은 복잡한 프롬프트가 포함된 대규모의 multi-modal dataset을 가지고 모델을 훈련하는 방법이다. 하지만 이러한 방법은 시간과 리소스 측면에서 좋지못한 면이 있으며, 좋은 quality의 multi-modal dataset을 대규모로 확보하는 것 조차 쉽지 않은 일이다.</p>
+<p>이러한 문제를 피하면서 위의 figure의 대표적인 문제점을 해결하기 위해 본 논문에서는 LLM 및 Layout to Image Generation 모델을 활용하여 training-free 방법으로 접근한다.</p>
+<figure class="align-default" id="id3">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/LMD2.png"><img alt="LMD_2" class="bg-primary mb-1" src="../../_images/LMD2.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 516 </span><span class="caption-text">Overview of LLM-Grounded Diffusion</span><a class="headerlink" href="#id3" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul>
+<li><p>Stage1</p>
+<p>주어진 prompt에 대해서 LLM을 활용하여 표현되어야 할 foreground object에 대한 layout을 생성한다. prompt로 부터 foreground object들을 해당 attribute과 함께 parsing을 하고 reasoning을 통해 올바른 bounding box coordinate을 얻는 것이 목표인 단계이다. LLM의 In-Context Learning(7-shot)을 활용하여 LLM을 Layout Generator로써 활용한다.</p>
+</li>
+<li><p>Stage2</p>
+<p>Stage1으로 부터 각각의 foreground object에 대한 caption과 bounding box coordinate을 기반으로 실제 해당 bounding box에 해당 caption이 생성되도록 하는 단계이다. 이 과정에서 training-free 방법을 적용하기 위해 Stable Diffusion의 inference과정에서 attention map을 manipulate 하는 방법을 통해 Layout to image generation을 가능케한다.</p>
+</li>
+</ul>
+<p>본 논문의 main contribution은 다음과 같다.</p>
+<ol class="arabic simple">
+<li><p>We propose a training-free two-stage generation pipeline that introduces LLMs to improve the prompt understanding ability of text-to-image diffusion models.</p></li>
+<li><p>We introduce layout-grounded Stable Diffusion, a novel controller that steers an off-the-shelf diffusion model to generate images grounded on instance-level box layouts from the LLM.</p></li>
+<li><p>LMD enables instruction-based scene specification and allows broader language support in the prompts.</p></li>
+<li><p>We propose a benchmark to assess the prompt understanding ability of a text-to-image model and demonstrate the superior performance of LMD over recent baselines.</p></li>
+</ol>
+</section>
+<section id="id1">
+<h2>LLM-grounded Diffusion<a class="headerlink" href="#id1" title="Permalink to this heading">#</a></h2>
+<ol class="arabic">
+<li><p>LLM-based Layout Generation</p>
+<p>이 단계는 위의 stage1과 같은 단계로써 LLM을 통해 prompt로 부터 각 foreground object의 caption과 bounding box의 coordinate을 얻는 단계이다.</p>
+<figure class="align-default" id="id4">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/LMD4.png"><img alt="LMD_4" class="bg-primary mb-1" src="../../_images/LMD4.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 517 </span><span class="caption-text">Overview Prompt</span><a class="headerlink" href="#id4" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>우선 이 단계에서의 prompt는 다음과 같이 구성되어 있다. prompt의 가장 윗단인 Instruction을 보면 LLM에게 정보를 주는 말들로 이루어져 있으며 Task specification과 supporting details로 이루어져 있음을 알 수 있다. LLM에게 직접적으로 해야할 일들과 정보를 주는 prompt라고 볼 수 있다.</p>
+<figure class="align-default" id="id5">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/LMD3.png"><img alt="LMD_3" class="bg-primary mb-1" src="../../_images/LMD3.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 518 </span><span class="caption-text">In-context Examples</span><a class="headerlink" href="#id5" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>Overview Prompt의 In-context Examples에 관련된 부분이다. 이 부분을 통해 input prompt로 부터 원하는 형태로 output값을 얻을 수 있다. LLM을 직접 학습시키지 않고 few shot example을 통해 LLM으로 부터 원하는 결과를 이끌어내는 방법을 취한 부분이다. Gpt 3.5같은 경우에는 7-shot을 사용했고, Gpt4 같은 경우는 1-shot만으로도 충분히 원하는 형태로 결과를 얻을 수 있었다고 한다. 결과는 Objects 부분에 나와있다. Foreground Object에 대한 caption값과 해당 bounding box의 coordinate값으로 이루어져 있음을 알 수 있다.</p>
+</li>
+<li><p>Layout-grounded Stable Diffusion</p>
+<p>이 단계에서는 LLM이 생성한 레이아웃을 기반으로 이미지 생성을 하기 위해 컨트롤러를 도입한다. 이전의 training-free 방법을 택했던 work들의 경우, regional denoising 혹은 inference과정에서의 단순한 attention manipulation을 통해 semantic guidance를 적용하지만, 이러한 방법들은 의미 있는 영역 내 객체의 수(generative numeracy)를 제어하는 능력이 부족하다. 이는 다른 인스턴스들간의 구별이 latent space나 attention map에서 어렵기 때문에 발생하며, 이는 인스턴스 수준의 control을 힘들게 하는 요인이다. 반면, LMD(LLM-grounded Diffusion)는 각 개별 경계 상자에 대해 마스킹된 잠재 변수를 먼저 생성하고, 이러한 마스킹된 잠재 변수를 prior로 사용하여 전체 이미지 생성을 안내함으로써 인스턴스 수준의 control이 가능토록 한다. 이를통해 각 객체 인스턴스의 정확한 배치와 속성 결합을 허용한다.</p>
+<figure class="align-default" id="id6">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/LMD7.png"><img alt="LMD_7" class="bg-primary mb-1" src="../../_images/LMD7.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 519 </span><span class="caption-text">Overall image generation with masked latents as priors</span><a class="headerlink" href="#id6" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>Stage2는 크게 step1,2로 나눠진다. step1의 경우에는 stage1에서 구한 각각의 box들에 대해서 box내의 object들에 대한 latent map을 구하는 과정이다. stage2는 stage1에서 구한 각 box에 대한 latent map을 compose하여 전체적인 image를 생성하는 과정이다.</p>
+<ul>
+<li><p>step1</p>
+<p>예를 들어, 위의 그림처럼 gray cat에 관한 bounding box가 stage1을 통해 구해졌다면, box안에 gray cat이 그려지도록 유도를 할 수 있을 것이다. 그러는 과정에서 저 box에 gray cat이 생성되도록 하는 마치 Ground Truth latent map을 얻을 수 있게 된다. 모든 object들에 대해 이러한 방법으로 denoising 과정에서 모든 step의 latent map을 구하는 것이다.  모든 단일 box에 대해 그 box에 해당 caption object가 생성되도록 하는 GT latent map을 구하는 게 step1의 과정이라고 생각하면 된다. 사실 상 LMD의 핵심은 step1에서 진행된다고 볼 수 있다.</p>
+<p>크게 보자면, 각 foreground object에 대해서 <span class="math notranslate nohighlight">\(Z_T\)</span> 부터 <span class="math notranslate nohighlight">\(Z_0\)</span>까지 denoising을 거치면서 모든 step t에 대한 latent map을 구하면 된다. 이 때, 가장 중요한 것은 foreground object가 실제 box안에서 잘 생성되도록 하는것이 선행이 되어야 한다. 이를 유도하기 위해 저자들은 각 denoising step마다 box내부에 해당 foreground object가 생성되도록 attention manipulation을 진행한다.</p>
+<div class="math notranslate nohighlight">
+\[
+     A_{uv}^{(i)} = Softmax(q_u^TK_v)
+     \]</div>
+<p>식(1)과 같이 pixel값들과 prompt내에서의 text token 간의 cross-attention map을 나타낼 수 있다. u는 이미지 내의 모든 각 pixel들을 의미하고 v는 각 text token을 의미한다.</p>
+<p>구성되는 프롬프트에 대해 예시를 들어 정리하자면, 전체 프롬프트가 “A realistic photo of a gray cat and an orange dog on the grass” 였다고 하자. 그러면 각 foreground object에 대해서 프롬프트를 따로 생성한다. “[background prompt] with [box caption]” (e.g., “a realistic image of an indoor scene with a gray cat”) 형태로 각 foreground object에 대한 프롬프트를 구성한다.</p>
+<figure class="align-default" id="id7">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/LMD6.png"><img alt="LMD_6" class="bg-primary mb-1" src="../../_images/LMD6.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 520 </span><span class="caption-text">Overview of attention manipulation</span><a class="headerlink" href="#id7" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>Object와 Bounding box를 align하기 위해서는 위의 그림에서 보이듯이 box 내부 pixel의 object에 관한 token과의 attention은 증가되고 다른 token과의 attention은 감소되면 된다. 위의 그림을 예시로 보면 “a gray cat”이라는 token과는 box내부 pixel들이 attention이 증가하고, 다른 token과는 attention이 감소하도록 유도되면 된다. 이 방법을 유도 하기 위해 본 논문의 저자들은 energy function을 사용한다.</p>
+<div class="math notranslate nohighlight">
+\[
+     E(A_i, i, v) = -\text{Topk}_u (A_{uv} \cdot b^{(i)}) + \omega \text{Topk}_u (A_{uv} \cdot (1 - b^{(i)}))
+     \]</div>
+<p>식(2)의 Energy function을 보면 foreground object i에 대한 token v가 정해져 있을 때, box 내부 pixel이 token v와의 attention값이 크고 box 외부 pixel이 token v와의 attention값이 작으면 Energy function값이 최소가 됨을 알 수 있다. 정리하자면 위의 Energy function값이 최대한 작아지면 box 내부의 pixel들의 object token에 대한 attention이 증가되고, box와 foreground object간의 align이 잘 이루어지게 되는 것이다.</p>
+<div class="math notranslate nohighlight">
+\[
+     z^{(i)}_t \leftarrow z^{(i)}_t - \eta \nabla z^{(i)}_t \sum_{v \in V_i} E(A^{(i)}, i, v)
+     \]</div>
+<div class="math notranslate nohighlight">
+\[
+     z^{(i)}_{t-1} \leftarrow \text{Denoise}(z^{(i)}_t)
+     \]</div>
+<p>Energy function이 최소가 되도록 하기 위해  본 논문은 식(3)과 같은 optimize과정을 거친다. 방법은 단순하다. Gradient Descent를 생각해보면 parameter로 loss function을 표현하고 loss function이 최소가 되도록 parameter들을 optimize하면서 유도한다. 이 방법 역시 같다. Energy function을 최소로 만드는 것이 목표이기에 이는 loss function과 같은 역할을 하게 된다. 그리고 Energy function의 식들은 <span class="math notranslate nohighlight">\(z_t\)</span>에 대해 표현이 가능하기에, Energy function을 통해 z_t를 optimize하여 Energy function이 최소화가 되도록 유도한다. Energy function 최소화는 각 denoising timestep마다 5회 반복되며, denoising 단계가 다섯 번 진행될 때마다 선형적으로 감소하여 반복 횟수가 1회로 줄어든다. 또한, 30step 후에는 guidance를 수행하지 않는다.</p>
+<p>guidance가 진행되면서 denoising이 다 끝나고 나면 모든 step에 대한 latent map을 얻을 수 있게 되고, attention map을 얻을 수 있게 된다. 이 때 attention map을 SAM을 통해 segment를 진행하거나 threshold값을 설정하여 Foreground mask를 구하게 된다. 이후에 구한 모든 time step에 대한  latent map에 Foreground Mask와 pixel wise곱을 해주어서 masked latent를 구하게 된다.</p>
+</li>
+<li><p>step2</p>
+<p>step2에서는 step1에서 구한 각 foreground object들을 compose하여 처음에 주어진 prompt에 대한 이미지를 잘 생성하는 것이 목표인 단계이다. 기존 work에 의하면 diffusion model은 denoising의 초기 단계에서 semantic한 정보들을 생성하며, 이후 단계에서부터 fine-detail한 부분을 생성한다. 이 점을 이용해서 compose를 하는 step2에서는 단순히 latent map을 compose한다음 denoising을 진행하는 것이 아니라, step의 절반 지점까지는 latent map을 compose하면서 step1과 마찬가지로 optimize를 진행하며 foreground object들이 실제로 원하는 위치에 생성되도록 유도를 하게 된다. 이후부터는 fine-detail한 부분을 생성하여 자연스러운 이미지를 만들기 위해 compose나 optimize를 하지 않고 이미지를 생성하게 된다. Compose관련 식과 optimize를 위한 step2에서의 Energy function은 아래와 같다.</p>
+<div class="math notranslate nohighlight">
+\[
+     z^{(\text{comp})}_t \leftarrow \text{LatentCompose}(z^{(\text{comp})}_t, \hat{z}^{(i)}_t, m^{(i)}) \quad \forall i
+     \]</div>
+<div class="math notranslate nohighlight">
+\[
+     E^{(\text{comp})}(A^{(\text{comp})}, A^{(i)}, i, v) = E(A^{(\text{comp})}, i, v) + \lambda \sum_{u \in V'_i} \left| A^{(\text{comp})}_{uv} - A^{(i)}_{uv} \right|
+     \]</div>
+<p>최종적으로 denoising이 다 진행된 이후, decoder를 통해 latent space에서 pixel space로 변환되어 최종 이미지가 생성된다.</p>
+</li>
+</ul>
+</li>
+</ol>
+</section>
+<section id="evaluation">
+<h2>Evaluation<a class="headerlink" href="#evaluation" title="Permalink to this heading">#</a></h2>
+<p>Evaluation 같은 경우는 Qualitive한 결과와 Quantitive한 결과로 나누어서 보여준다. Introduction에서 소개한 기존 T2I model의 문제점 4가지를 잘 해결하고자 하는 것이 이 work의 목표이었기에 4가지 항목에 대해 평가를 진행한다. 데이터 같은 경우는 Negation, Attribute Binding, Generative Numeracy, Spatial Relationships를 각각 평가하기 위한 100개씩의 데이터를 수집해서 진행을 하였다고 한다. Evaluation metric이 존재하진 않고 Quantitive한 결과를 위해서 OWL-VIT detector를 활용하여 object들에 대한 bounding box값을 얻은 후에, 생성된 이미지가 prompt에 잘 맞게 생성되어있는지를 확인하였다고 한다. 추가적으로 Ablation study에 대한 다양한 결과들도 포함되어 있다.</p>
+<figure class="align-default" id="id8">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/LMD8.png"><img alt="LMD_8" class="bg-primary mb-1" src="../../_images/LMD8.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 521 </span><span class="caption-text">Qualititive Results1 of LMD</span><a class="headerlink" href="#id8" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<figure class="align-default" id="id9">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/LMD13.png"><img alt="LMD_13" class="bg-primary mb-1" src="../../_images/LMD13.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 522 </span><span class="caption-text">Qualititive Results2 of LMD</span><a class="headerlink" href="#id9" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>기존 Stable Diffusion XL 모델과 비교하여 LMD가 얼마나 prompt에 잘 대응하는 이미지를 생성하는지를 보여주는 결과이다. Introduction에서 언급했듯이 기존 SDXL모델은 이미지의 전반적인 부분은 잘 생성하지만 numarcy, spatial relationship, attribute matching등을 잘 만족시키지 못하는 경우가 발생한다. 하지만 LMD는 이러한 문제점들을 잘 해결하고 있는 것으로 보인다.</p>
+<figure class="align-default" id="id10">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/LMD9.png"><img alt="LMD_9" class="bg-primary mb-1" src="../../_images/LMD9.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 523 </span><span class="caption-text">Quantitive Results of LMD &amp; LMD+</span><a class="headerlink" href="#id10" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>위의 result는 기존 Stable Diffusion과 이 논문에서 제시하는 LMD 방법 그리고 기존 layout to image generation model인 GLIGEN의 adapter를 가져와서 통합한 방법인 LMD+ 세가지를 비교한다. LMD &amp; LMD+ 모두 기존 SD에 비해 월등한 결과를 보임을 알 수 있다.</p>
+<figure class="align-default" id="id11">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/LMD11.png"><img alt="LMD11" class="bg-primary mb-1" src="../../_images/LMD11.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 524 </span><span class="caption-text">Ablation of LMD &amp; LMD+</span><a class="headerlink" href="#id11" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>Ablation study의 결과를 보면, 앞선 결과와 마찬가지로 LMD가 훨씬 더 좋은 성능임을 알 수 있다. LMD의 Baseline model로 SDv1.5, SDv2.1을 각각 써본 결과 둘의 차이는 거의 없었다고 한다. 또한 Foreground mask를 구하기 위해 SAM을 사용했을 경우와 SAM을 사용하지 않고 Threshold를 사용해서 구한 결과도 보여주었는데 이는 LMD일때와 LMD+일때가 서로 다른 결과를 보인다. LMD에서는 attention기반의 guidance가 layout box와 관련하여 공간적으로 정확하지 않기 때문에, SAM은 객체를 커버하는 올바른 마스크를 얻는 데 도움을 준다. 따라서 SAM을 제거하면 LMD에서 약간의 성능 저하가 발생하게 된다. 반면에 LMD+에서는 기존 잘 훈련된  GLIGEN 모델을 가져와 이용하기 때문에 대부분의 경우 SAM이 필요하지 않게된다. 오히려, SAM은 때때로 배경을 포함하는 영역을 선택하여 혼란을 일으키고 성능을 저하시키게 되어 SAM을 제거하면 LMD+에서는 결과가 오히려 개선되는 효과가 있었다고 한다.</p>
+<figure class="align-default" id="id12">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/LMD10.png"><img alt="LMD10" class="bg-primary mb-1" src="../../_images/LMD10.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 525 </span><span class="caption-text">Ablations on layout-to-image methods as stage 2 with LMD’s LLM layout generator as stage 1</span><a class="headerlink" href="#id12" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>위의 결과는 Stage1과 Stage2에 어떤 방법을 가져왔느냐에 따른 전체적인 Quantative한 결과를 보여준다. 우선 Stage1 즉, object에 대한 개별 bounding box를 생성하는 부분에 대해서는 이 논문의 LMD기법을 다 사용을 하였다. 생성된 box를 기반으로 이미지를 생성하는 Stage 2에 대해서 training-free 방법 즉, guidance를 주는 방법을 사용한 경우와 기존 pretrain된 layout to image generation model, 여기에 LMD기법을 추가적으로 더한 방법인 training-based 방법을 나누어서 비교한다. 우선 training-free 방법의 경우, 다른 guidance 기법에 비해 이 논문에서 제시한 stage2의 방법이 훨씬 더 뛰어난 성능을 보임을 알 수 있다. training-based 방법의 경우, 기존 layout to image generation model인 GLIGEN을 그대로 가져온 경우, GLIGEN에 이 논문의 stage2 기법을 적용한 LMD+, LMD+ 에서 GPT3.5대신 GPT4를 사용한 경우로 나누어서 비교를 한다. 사실상 LMD+가 가장 좋은 성능을 보임을 알 수 있다. GPT 버전을 GPT4로 바꾼 경우는 Numeracy에서는 성능이 살짝 감소하지만, 다른 부분에 대해서는 성능이 좀 더 많이 증가한 모습을 확인할 수 있다.</p>
+<figure class="align-default" id="id13">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/LMD12.png"><img alt="LMD10" class="bg-primary mb-1" src="../../_images/LMD12.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 526 </span><span class="caption-text">Ablations on GPT version + Compare with SD</span><a class="headerlink" href="#id13" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>GPT같은 경우 GPT3.5-turbo를 사용한 경우와 GPT4를 사용한 경우를 나누어서 실험을 진행하였다. GPT4의 경우 1-shot으로 진행을 해도 layout을 잘 만드는 모습을 보였고, GPT3.5의 경우 7shots으로 진행을 해야 완벽하게 layout을 생성하는 결과를 보임을 알 수 있다.</p>
+<figure class="align-default" id="id14">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/LMD14.png"><img alt="LMD14" class="bg-primary mb-1" src="../../_images/LMD14.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 527 </span><span class="caption-text">Ablations on different LLM and same LLM with different size</span><a class="headerlink" href="#id14" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>서로 다른 LLM을 사용해서 진행을 해본 결과, 다른 open-source model에 비해 gpt4가 확실히 좋은 성능을 보였음을 알 수 있다. 또한 똑같은 open-source model을 다른 크기로 적용해본결과 더 큰 규모의 model이 더 좋은 성능을 보임을 알 수 있다.</p>
+</section>
+<section id="summary">
+<h2>Summary<a class="headerlink" href="#summary" title="Permalink to this heading">#</a></h2>
+<p>LMD는 text-to-image generation diffusion model이 prompt를 더 잘 반영해서 이미지를 생성할 수 있도록 한 방법이다. 추가적인 Training을 하지 않고 prompt를 더 잘이해할 수 있도록 기존 Text to Image에서 intermediate representation인 layout을 활용한 방법이 특징이라고 할 수 있다. 두 가지 stage로 나눠 LLM-based text-grounded layout generation과 layout-grounded image generation을 통해 문제를 해결하고자 한 논문이다.</p>
+</section>
+</section>
+
+    <script type="text/x-thebe-config">
+    {
+        requestKernel: true,
+        binderOptions: {
+            repo: "binder-examples/jupyter-stacks-datascience",
+            ref: "master",
+        },
+        codeMirrorConfig: {
+            theme: "abcdef",
+            mode: "python"
+        },
+        kernelOptions: {
+            name: "python3",
+            path: "./docs\review"
+        },
+        predefinedOutput: true
+    }
+    </script>
+    <script>kernelName = 'python3'</script>
+
+                </article>
+              
+
+              
+              
+              
+              
+                <footer class="prev-next-footer d-print-none">
+                  
+<div class="prev-next-area">
+    <a class="left-prev"
+       href="latent_consistency_models.html"
+       title="previous page">
+      <i class="fa-solid fa-angle-left"></i>
+      <div class="prev-next-info">
+        <p class="prev-next-subtitle">previous</p>
+        <p class="prev-next-title">Latent Consistency Models</p>
+      </div>
+    </a>
+    <a class="right-next"
+       href="DiT.html"
+       title="next page">
+      <div class="prev-next-info">
+        <p class="prev-next-subtitle">next</p>
+        <p class="prev-next-title">DiT</p>
+      </div>
+      <i class="fa-solid fa-angle-right"></i>
+    </a>
+</div>
+                </footer>
+              
+            </div>
+            
+            
+              
+                <div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
+
+
+  <div class="sidebar-secondary-item">
+  <div class="page-toc tocsection onthispage">
+    <i class="fa-solid fa-list"></i> Contents
+  </div>
+  <nav class="bd-toc-nav page-toc">
+    <ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#abstract">Abstract</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">Introduction</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#id1">LLM-grounded Diffusion</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#evaluation">Evaluation</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#summary">Summary</a></li>
+</ul>
+  </nav></div>
+
+</div></div>
+              
+            
+          </div>
+          <footer class="bd-footer-content">
+            
+<div class="bd-footer-content__inner container">
+  
+  <div class="footer-item">
+    
+<p class="component-author">
+By PseudoLab
+</p>
+
+  </div>
+  
+  <div class="footer-item">
+    
+
+  <p class="copyright">
+    
+      © Copyright 2022.
+      <br/>
+    
+  </p>
+
+  </div>
+  
+  <div class="footer-item">
+    
+  </div>
+  
+  <div class="footer-item">
+    
+  </div>
+  
+</div>
+          </footer>
+        
+
+      </main>
+    </div>
+  </div>
+  
+  <!-- Scripts loaded after <body> so the DOM is not blocked -->
+  <script src="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b"></script>
+<script src="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b"></script>
+
+  <footer class="bd-footer">
+  </footer>
+  </body>
 </html>
\ No newline at end of file
diff --git a/docs/review/Latent_Diffusion_Model.html b/docs/review/Latent_Diffusion_Model.html
old mode 100644
new mode 100755
index 8c2e471c..9fcf12f2
--- a/docs/review/Latent_Diffusion_Model.html
+++ b/docs/review/Latent_Diffusion_Model.html
@@ -1,930 +1,950 @@
-
-
-<!DOCTYPE html>
-
-
-<html lang="en" data-content_root="" >
-
-  <head>
-    <meta charset="utf-8" />
-    <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.18.1: http://docutils.sourceforge.net/" />
-
-    <title>Introduction &#8212; Text-to-Image Generation-feat-Diffusion</title>
-  
-  
-  
-  <script data-cfasync="false">
-    document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
-    document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
-  </script>
-  
-  <!-- Loaded before other Sphinx assets -->
-  <link href="../../_static/styles/theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-<link href="../../_static/styles/bootstrap.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-<link href="../../_static/styles/pydata-sphinx-theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-
-  
-  <link href="../../_static/vendor/fontawesome/6.5.2/css/all.min.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-  <link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.woff2" />
-<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.woff2" />
-<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.woff2" />
-
-    <link rel="stylesheet" type="text/css" href="../../_static/pygments.css" />
-    <link rel="stylesheet" href="../../_static/styles/sphinx-book-theme.css?digest=14f4ca6b54d191a8c7657f6c759bf11a5fb86285" type="text/css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/togglebutton.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/mystnb.4510f1fc1dee50b3e5859aac5469c37c29e427902b24a333a5f9fcb2f0b3ac41.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/sphinx-thebe.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/design-style.4045f2051d55cab465a707391d5b2007.min.css" />
-  
-  <!-- Pre-loaded scripts that we'll load fully later -->
-  <link rel="preload" as="script" href="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b" />
-<link rel="preload" as="script" href="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b" />
-  <script src="../../_static/vendor/fontawesome/6.5.2/js/all.min.js?digest=dfe6caa3a7d634c4db9b"></script>
-
-    <script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
-    <script src="../../_static/jquery.js"></script>
-    <script src="../../_static/underscore.js"></script>
-    <script src="../../_static/_sphinx_javascript_frameworks_compat.js"></script>
-    <script src="../../_static/doctools.js"></script>
-    <script src="../../_static/clipboard.min.js"></script>
-    <script src="../../_static/copybutton.js"></script>
-    <script src="../../_static/scripts/sphinx-book-theme.js?digest=5a5c038af52cf7bc1a1ec88eea08e6366ee68824"></script>
-    <script>let toggleHintShow = 'Click to show';</script>
-    <script>let toggleHintHide = 'Click to hide';</script>
-    <script>let toggleOpenOnPrint = 'true';</script>
-    <script src="../../_static/togglebutton.js"></script>
-    <script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
-    <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
-    <script src="../../_static/design-tabs.js"></script>
-    <script>const THEBE_JS_URL = "https://unpkg.com/thebe@0.8.2/lib/index.js"
-const thebe_selector = ".thebe,.cell"
-const thebe_selector_input = "pre"
-const thebe_selector_output = ".output, .cell_output"
-</script>
-    <script async="async" src="../../_static/sphinx-thebe.js"></script>
-    <script>window.MathJax = {"options": {"processHtmlClass": "tex2jax_process|mathjax_process|math|output_area"}}</script>
-    <script defer="defer" src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
-    <script>DOCUMENTATION_OPTIONS.pagename = 'docs/review/Latent_Diffusion_Model';</script>
-    <link rel="shortcut icon" href="../../_static/PseudoLab_logo.png"/>
-    <link rel="index" title="Index" href="../../genindex.html" />
-    <link rel="search" title="Search" href="../../search.html" />
-    <link rel="next" title="Textual Inversion" href="Textual_Inversion.html" />
-    <link rel="prev" title="ControlNet" href="ControlNet.html" />
-  <meta name="viewport" content="width=device-width, initial-scale=1"/>
-  <meta name="docsearch:language" content="en"/>
-  </head>
-  
-  
-  <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
-
-  
-  
-  <div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
-  
-  <div id="pst-scroll-pixel-helper"></div>
-  
-  <button type="button" class="btn rounded-pill" id="pst-back-to-top">
-    <i class="fa-solid fa-arrow-up"></i>Back to top</button>
-
-  
-  <input type="checkbox"
-          class="sidebar-toggle"
-          id="pst-primary-sidebar-checkbox"/>
-  <label class="overlay overlay-primary" for="pst-primary-sidebar-checkbox"></label>
-  
-  <input type="checkbox"
-          class="sidebar-toggle"
-          id="pst-secondary-sidebar-checkbox"/>
-  <label class="overlay overlay-secondary" for="pst-secondary-sidebar-checkbox"></label>
-  
-  <div class="search-button__wrapper">
-    <div class="search-button__overlay"></div>
-    <div class="search-button__search-container">
-<form class="bd-search d-flex align-items-center"
-      action="../../search.html"
-      method="get">
-  <i class="fa-solid fa-magnifying-glass"></i>
-  <input type="search"
-         class="form-control"
-         name="q"
-         id="search-input"
-         placeholder="Search this book..."
-         aria-label="Search this book..."
-         autocomplete="off"
-         autocorrect="off"
-         autocapitalize="off"
-         spellcheck="false"/>
-  <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
-</form></div>
-  </div>
-
-  <div class="pst-async-banner-revealer d-none">
-  <aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
-</div>
-
-  
-    <header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
-    </header>
-  
-
-  <div class="bd-container">
-    <div class="bd-container__inner bd-page-width">
-      
-      
-      
-      <div class="bd-sidebar-primary bd-sidebar">
-        
-
-  
-  <div class="sidebar-header-items sidebar-primary__section">
-    
-    
-    
-    
-  </div>
-  
-    <div class="sidebar-primary-items__start sidebar-primary__section">
-        <div class="sidebar-primary-item">
-
-  
-    
-  
-
-<a class="navbar-brand logo" href="../../intro.html">
-  
-  
-  
-  
-  
-    
-    
-      
-    
-    
-    <img src="../../_static/PseudoLab_logo.png" class="logo__image only-light" alt="Text-to-Image Generation-feat-Diffusion - Home"/>
-    <script>document.write(`<img src="../../_static/PseudoLab_logo.png" class="logo__image only-dark" alt="Text-to-Image Generation-feat-Diffusion - Home"/>`);</script>
-  
-  
-</a></div>
-        <div class="sidebar-primary-item"><nav class="bd-links" id="bd-docs-nav" aria-label="Main">
-    <div class="bd-toc-item navbar-nav active">
-        
-        <ul class="nav bd-sidenav bd-sidenav__home-link">
-            <li class="toctree-l1">
-                <a class="reference internal" href="../../intro.html">
-                    Welcome to PseudoDiffusers!!
-                </a>
-            </li>
-        </ul>
-        <p aria-level="2" class="caption" role="heading"><span class="caption-text">Preliminary Works</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="vae.html">VAE</a></li>
-<li class="toctree-l1"><a class="reference internal" href="gan.html">GAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DDPM.html">DDPM</a></li>
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="DDIM.html">DDIM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="A_Study_on_the_Evaluation_of_Generative_Models.html">A Study on the Evaluation of Generative Models</a></li>
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Image Generation</span></p>
-<ul class="current nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="cycleGAN.html">CycleGAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="StyleGAN.html">StyleGAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="diffusion_beats_GANs.html">Diffusion Models Beat GANs on Image Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="dalle.html">DALL-E</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DALLE2.html">DALL-E 2</a></li>
-<li class="toctree-l1"><a class="reference internal" href="dreambooth.html">DreamBooth</a></li>
-<li class="toctree-l1"><a class="reference internal" href="ControlNet.html">ControlNet</a></li>
-<li class="toctree-l1 current active"><a class="current reference internal" href="#">Introduction</a></li>
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="Textual_Inversion.html">Textual Inversion</a></li>
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="CustomDiffusion.html">Custom Diffusion</a></li>
-<li class="toctree-l1"><a class="reference internal" href="LoRA.html">LoRA</a></li>
-
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="I-DDPM.html">I-DDPM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="StyO.html">StyO</a></li>
-<li class="toctree-l1"><a class="reference internal" href="imagen.html">Imagen</a></li>
-<li class="toctree-l1"><a class="reference internal" href="imagen_editor.html">Imagen Editor</a></li>
-<li class="toctree-l1"><a class="reference internal" href="SDEdit.html">SDEdit</a></li>
-<li class="toctree-l1"><a class="reference internal" href="SDXL.html">SDXL</a></li>
-<li class="toctree-l1"><a class="reference internal" href="t2i_adapter.html">T2I-Adapter</a></li>
-<li class="toctree-l1"><a class="reference internal" href="IP_Adapter.html">IP-Adapter</a></li>
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="HyperDreamBooth.html">HyperDreamBooth</a></li>
-<li class="toctree-l1"><a class="reference internal" href="CM3leon.html">CM3leon</a></li>
-
-<li class="toctree-l1"><a class="reference internal" href="Synthetic_Data_from_Diffusion_Models_Improves_ImageNet_Classification.html">Synthetic Data from Diffusion Models Improves ImageNet Classification</a></li>
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="GLIDE.html">GLIDE</a></li>
-<li class="toctree-l1"><a class="reference internal" href="BBDM.html">BBDM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Your_Diffusion_Model_is_Secretly_a_Zero_Shot_Classifier.html">Your Diffusion Model is Secretly a Zero-Shot Classifier</a></li>
-<li class="toctree-l1"><a class="reference internal" href="progressive_distillation.html">Progressive Distillation for Fast Sampling of Diffusion Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="ConceptLab.html">ConceptLab</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Diffusion_models_already_have_a_Semantic_Latent_Space.html">Diffusion Models already have a Semantic Latent Space</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Muse.html">Muse</a></li>
-
-
-<li class="toctree-l1"><a class="reference internal" href="GIGAGAN.html">Scaling up GANs for Text-to-Image Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="consistency_models.html">Consistency Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="latent_consistency_models.html">Latent Consistency Models</a></li>
-
-<li class="toctree-l1"><a class="reference internal" href="LLM_grounded_Diffusion.html">LLM Grounded Diffusion</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DiT.html">DiT</a></li>
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="one-step-image-translation.html">One-Step Image Translation with Text-to-Image Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="LCM-LoRA.html">LCM-LoRA: A Universal Stable-Diffusion Acceleration Module</a></li>
-
-
-<li class="toctree-l1"><a class="reference internal" href="MimicBrush.html">MimicBrush: Zero-shot Image Editing with Reference Imitation</a></li>
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Video Generation</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="Make_A_Video.html">Make A Video</a></li>
-<li class="toctree-l1"><a class="reference internal" href="VideoLDM.html">VideoLDM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="AnimateDiff.html">AnimateDiff</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Animate_Anyone.html">Animate Anyone</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreaMoving.html">DreaMoving</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreamPose.html">DreamPose: Fashion Image-to-Video Synthesis via Stable Diffusion</a></li>
-
-
-
-
-
-
-
-
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">3D Generation</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="NeRF.html">NeRF : Representing Scenes as Neural Radiance Fields for View Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="3DGS.html">3D Gaussian Splatting for Real-Time Radiance Field Rendering</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Point_E.html">Point-E: A System for Generating 3D Point Clouds from Complex Prompts (Arxiv 2022)</a></li>
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="Shap-E.html">Shap-E</a></li>
-
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="DreamFusion.html"><strong>DreamFusion</strong></a></li>
-<li class="toctree-l1"><a class="reference internal" href="magic-3d.html">Magic3D</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreamBooth3D.html">Dream Booth 3D</a></li>
-
-
-
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Experiments</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="../experiments/js_exp.html">Synthetic Data with Stable Diffusion for Foliar Disease Classification</a></li>
-<li class="toctree-l1"><a class="reference internal" href="../experiments/swjo_exp.html">Training DreamBooth on Naver Webtoon Face Dataset</a></li>
-</ul>
-
-    </div>
-</nav></div>
-    </div>
-  
-  
-  <div class="sidebar-primary-items__end sidebar-primary__section">
-  </div>
-  
-  <div id="rtd-footer-container"></div>
-
-
-      </div>
-      
-      <main id="main-content" class="bd-main" role="main">
-        
-        
-
-<div class="sbt-scroll-pixel-helper"></div>
-
-          <div class="bd-content">
-            <div class="bd-article-container">
-              
-              <div class="bd-header-article d-print-none">
-<div class="header-article-items header-article__inner">
-  
-    <div class="header-article-items__start">
-      
-        <div class="header-article-item"><label class="sidebar-toggle primary-toggle btn btn-sm" for="__primary" title="Toggle primary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
-  <span class="fa-solid fa-bars"></span>
-</label></div>
-      
-    </div>
-  
-  
-    <div class="header-article-items__end">
-      
-        <div class="header-article-item">
-
-<div class="article-header-buttons">
-
-
-
-
-
-<div class="dropdown dropdown-source-buttons">
-  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Source repositories">
-    <i class="fab fa-github"></i>
-  </button>
-  <ul class="dropdown-menu">
-      
-      
-      
-      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion" target="_blank"
-   class="btn btn-sm btn-source-repository-button dropdown-item"
-   title="Source repository"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fab fa-github"></i>
-  </span>
-<span class="btn__text-container">Repository</span>
-</a>
-</li>
-      
-      
-      
-      
-      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion/issues/new?title=Issue%20on%20page%20%2Fdocs/review/Latent_Diffusion_Model.html&body=Your%20issue%20content%20here." target="_blank"
-   class="btn btn-sm btn-source-issues-button dropdown-item"
-   title="Open an issue"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-lightbulb"></i>
-  </span>
-<span class="btn__text-container">Open issue</span>
-</a>
-</li>
-      
-  </ul>
-</div>
-
-
-
-
-
-
-<div class="dropdown dropdown-download-buttons">
-  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Download this page">
-    <i class="fas fa-download"></i>
-  </button>
-  <ul class="dropdown-menu">
-      
-      
-      
-      <li><a href="../../_sources/docs/review/Latent_Diffusion_Model.md" target="_blank"
-   class="btn btn-sm btn-download-source-button dropdown-item"
-   title="Download source file"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-file"></i>
-  </span>
-<span class="btn__text-container">.md</span>
-</a>
-</li>
-      
-      
-      
-      
-      <li>
-<button onclick="window.print()"
-  class="btn btn-sm btn-download-pdf-button dropdown-item"
-  title="Print to PDF"
-  data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-file-pdf"></i>
-  </span>
-<span class="btn__text-container">.pdf</span>
-</button>
-</li>
-      
-  </ul>
-</div>
-
-
-
-
-<button onclick="toggleFullScreen()"
-  class="btn btn-sm btn-fullscreen-button"
-  title="Fullscreen mode"
-  data-bs-placement="bottom" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-expand"></i>
-  </span>
-
-</button>
-
-
-
-<script>
-document.write(`
-  <button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light"></i>
-    <i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark"></i>
-    <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"></i>
-  </button>
-`);
-</script>
-
-
-<script>
-document.write(`
-  <button class="btn btn-sm pst-navbar-icon search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <i class="fa-solid fa-magnifying-glass fa-lg"></i>
-  </button>
-`);
-</script>
-<label class="sidebar-toggle secondary-toggle btn btn-sm" for="__secondary"title="Toggle secondary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <span class="fa-solid fa-list"></span>
-</label>
-</div></div>
-      
-    </div>
-  
-</div>
-</div>
-              
-              
-
-<div id="jb-print-docs-body" class="onlyprint">
-    <h1>Introduction</h1>
-    <!-- Table of contents -->
-    <div id="print-main-content">
-        <div id="jb-print-toc">
-            
-            <div>
-                <h2> Contents </h2>
-            </div>
-            <nav aria-label="Page">
-                <ul class="visible nav section-nav flex-column">
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#">Introduction</a></li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#related-work">Related Work</a></li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#latent-diffusion-model">Latent Diffusion Model</a></li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#experiments">Experiments</a><ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#on-perceptual-compression-tradeoffs">4.1. On Perceptual Compression Tradeoffs</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#image-generation-with-latent-diffusion">4.2. Image Generation with Latent Diffusion</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#conditional-latent-diffusion">4.3. Conditional Latent Diffusion</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#transformer-encoders-for-ldms">4.3.1 Transformer Encoders for LDMs</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#convolutional-sampling-beyond-256x256">4.3.2 Convolutional Sampling Beyond 256x256</a></li>
-</ul>
-</li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#super-resolution-with-latent-diffusion">4.4. Super-Resolution with Latent Diffusion</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#inpainting-with-latent-diffusion">4.5. Inpainting with Latent Diffusion</a></li>
-</ul>
-</li>
-</ul>
-
-            </nav>
-        </div>
-    </div>
-</div>
-
-              
-                
-<div id="searchbox"></div>
-                <article class="bd-article">
-                  
-  <div class="admonition-information admonition">
-<p class="admonition-title">Information</p>
-<ul class="simple">
-<li><p><strong>Title:</strong> High-Resolution Image Synthesis with Latent Diffusion Models (CVPR 2022)</p></li>
-<li><p><strong>Reference</strong></p>
-<ul>
-<li><p>Paper: <a class="reference external" href="https://arxiv.org/abs/2112.10752">https://arxiv.org/abs/2112.10752</a></p></li>
-<li><p>Code: <a class="github reference external" href="https://github.com/CompVis/latent-diffusion">CompVis/latent-diffusion</a></p></li>
-</ul>
-</li>
-<li><p><strong>Author:</strong> Namkyeong Cho</p></li>
-<li><p><strong>Last updated on May. 31, 2023</strong></p></li>
-</ul>
-</div>
-<section class="tex2jax_ignore mathjax_ignore" id="introduction">
-<h1>Introduction<a class="headerlink" href="#introduction" title="Permalink to this heading">#</a></h1>
-<p>latent diffusion model이 어떻게 학습하는지 rate-distortion trade-off로 분석할 수 있다.</p>
-<figure class="align-default" id="id1">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/Latent_Space.png"><img alt="../../_images/Latent_Space.png" class="bg-primary mb-1" src="../../_images/Latent_Space.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 148 </span><span class="caption-text">Analysis of Latent Diffusion Model with rate-distortion trade-off</span><a class="headerlink" href="#id1" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>학습 단계는 크게 2단계로 나눌 수 있다.</p>
-<ul class="simple">
-<li><p>Perceptual Compression</p>
-<ul>
-<li><p>Autoencoder를 학습하는 단계</p></li>
-<li><p>perceptual : 인간의 인지와 관련된 것. 예를 들면 고양이 하면 고양이 귀, 꼬리 등 우리가 인지한 객체.</p>
-<ul>
-<li><p>압축 과정에서 일정 부분을 넘어가면 비트를 많이 써도 사람이 인지할 수 없는 것에 대해 사용됨. (Rate가 0.5 이후 넘어가는 것을 보면 다른 것이 인지 안됨)</p>
-<ul>
-<li><p>비트를 많이 씀 : 1비트(흑,백)로 표현하는 것을 8비트 등으로 표현 ⇒ 다양한 색상을 사용하여 색, 텍스쳐 등을 다양하게 표현할 수 있음. 이는 파라미터 수의 증가 등 모델 사이즈의 증가로 볼 수 있음.</p></li>
-</ul>
-</li>
-<li><p>Autoencoder를 통해 이미지 차원을 압축하여 사용할 비트를 줄임으로써 불필요한(사람이 인지할 수 없는) 것을 학습하지 않도록 함.</p></li>
-</ul>
-</li>
-</ul>
-</li>
-<li><p>Semantic Compression</p>
-<ul>
-<li><p>Diffusion model을 학습하는 단계로 이미지의 의미와 문맥을 이해하고 이를 바탕으로 압축하는 것을 말함. 예를 들어, 이미지 속의 개가 어디에 위치하는지, 배경이 무엇인지, 어떤 상황인지 등을 학습함.</p></li>
-</ul>
-</li>
-</ul>
-</section>
-<section class="tex2jax_ignore mathjax_ignore" id="related-work">
-<h1>Related Work<a class="headerlink" href="#related-work" title="Permalink to this heading">#</a></h1>
-<p>생성 모델의 기존 모델은 다음과 같다.</p>
-<ul class="simple">
-<li><p>GAN : 좋은 perceptual quality를 가진 고해상도 image의 sampling을 효율적으로 수행하나 최적화나 full data distribution을 찾기 어렵다.</p>
-<ul>
-<li><p>학습 불안정성(생성자, 판별자 기반의 손실함수), Mode Collapse(제한된 유형의 샘플만 생성하는 현상) 등 발생.</p></li>
-</ul>
-</li>
-<li><p>VAE, flow-based model : 고해상도 image의 효율적인 합성을 수행할 수 있으나 GAN보다 sample quality가 떨어진다.</p>
-<ul>
-<li><p>flow-based model : Flow는 복잡한 분포를 모델링하기 위해 일련의 가역적인 비선형 변환을 사용하는 방법론.</p>
-<ul>
-<li><p>기본 아이디어 : 간단한 분포(예: 가우시안 분포)에서 복잡한 데이터 분포로 변환하는 일련의 함수들을 학습하는 것.</p></li>
-<li><p>data를 잠재변수로 변환하는, 반대로 잠재변수를 data로 변환하는(역변환) 양방향 변환기를 학습시켜 잠재변수로부터 data를 generative 할 수 있도록 함.</p></li>
-</ul>
-</li>
-</ul>
-</li>
-<li><p>Autoregressive : density 추정에서 강력한 performance를 달성하나 계산 비용과 sequential sampling process로 인해 저해상도 image로 제약된다.</p></li>
-<li><p>pixel 기반의 image representation은 거의 인지하기 어려운 이미지의 detail한 부분을 포함하여 maximum-likelihood 학습은 이를 학습하는데 많은 용량과 시간을 소비한다.</p></li>
-<li><p>Diffusion Probabilistic Models : sample quality 등에서 좋은 성능을 보이나 pixel 단위에서의 평가와 최적화는 낮은 inference speed와 높은 train cost를 유발한다.</p></li>
-<li><p>VQ-VAEs : 기존 VAE 방법에 벡터 양자화(Vector Quantisation)를 도입한 방법이다. latent space의 연속형 벡터를 사전에 정한 codebook 벡터와의 거리를 계산하여 제일 유사한 값으로 변환한다.</p>
-<ul>
-<li><p>데이터 압축, 샘플링이 유용하면서 생성 모델의 품질을 높일 수도 있음.</p></li>
-</ul>
-</li>
-<li><p>VQ-GAN : VQ-VAE의 원리를 GAN에 적용한 방법이다. latent space의 연속형 벡터를 양자화하여 변환하면서 학습은 GAN의 adversarial loss를 추가로 활용한다.</p>
-<ul>
-<li><p>데이터 압축, 샘플링이 유용하면서 생성 모델의 품질을 높일 수도 있음.</p></li>
-</ul>
-</li>
-</ul>
-</section>
-<section class="tex2jax_ignore mathjax_ignore" id="latent-diffusion-model">
-<h1>Latent Diffusion Model<a class="headerlink" href="#latent-diffusion-model" title="Permalink to this heading">#</a></h1>
-<p>오늘 알아볼 모델은 Latent Diffusion Model이다.
-기존에 다뤘던 Diffusion Model과 유사하게 동작하는 생성 모델이다. 이 논문에서는 컴퓨터 자원의 소모를 줄이면서 Diffusion Model과 유사한 성능을 얻는것이 그 목표이다.</p>
-<p>Latent Diffusion Model은 전반적으로 아래와 같은 구조를 가진다.</p>
-<figure class="align-default" id="id2">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/Unet.png"><img alt="../../_images/Unet.png" class="bg-primary mb-1" src="../../_images/Unet.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 149 </span><span class="caption-text">Structure of Latent Diffusion Model</span><a class="headerlink" href="#id2" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p><span class="math notranslate nohighlight">\(x \in \mathbb{R}^{H\times W \times 3}\)</span>이 input으로 주어졌을때 이를 encoder <span class="math notranslate nohighlight">\(\mathcal{E}\)</span>를 통해서 <span class="math notranslate nohighlight">\(z=\mathcal{E}(x) \in \mathbb{R}^{h\times w\times c }\)</span>로 인코딩 하고 <span class="math notranslate nohighlight">\(\hat{x}=\mathcal{D}(z)\)</span>
-로 디코딩을 한다. 이 논문에서 <span class="math notranslate nohighlight">\(f=H/h=W/w=2^m\)</span>, <span class="math notranslate nohighlight">\(m\in \mathbb{N}\)</span>이 되도록 여러 <span class="math notranslate nohighlight">\(m\)</span>에 대해서 테스트를 진행하였다. 또한 Latent space에서 분산이 커지지 않도록 KL divergence와 vector quantization(VQ)을 활용하였다.
-이미지외 텍스트나, sematic map과 같이 추가적인 정보는 <span class="math notranslate nohighlight">\(\tau_\theta\)</span>를 통해서 전달을 하였고,</p>
-<div class="math notranslate nohighlight">
-\[  Q=W^{(i)}_Q \phi_i(z_i), K=W^{(i)}_K \phi_i(z_i), V=W^{(i)}_V \phi_i(z_i) \]</div>
-<p>로 정의되고 <span class="math notranslate nohighlight">\(\phi_i(z_i)\)</span>는 <span class="math notranslate nohighlight">\(U\)</span>-Net 중간의 representation, <span class="math notranslate nohighlight">\(W^{i}_V, W^{i}_K, W^{i}_Q\)</span>는 학습 가능한 projection matrix이다.
-<span class="math notranslate nohighlight">\(Q, K, V\)</span> 는 attention의 query, key, value에 해당하며</p>
-<div class="math notranslate nohighlight">
-\[
-Attention(Q, K, V) = softmax(\frac{QK^T}{\sqrt{d}})\cdot V
-\]</div>
-<p>로 연산이 진행된다. 학습을 위한 loss 함수는 다음과 같이 표현된다.</p>
-<div class="math notranslate nohighlight">
-\[
-\mathcal{L}_{LDM} = \mathbb{E}_{\mathcal{E}(x), 
-\epsilon \sim \mathcal{N}(0,1),t} \left[ \|\epsilon-\epsilon_{\theta}(z_t,t) \|_{2}^{2}\right].
-\]</div>
-<p>여기서 주목할만한 부분은 기존 Diffusion Model에서</p>
-<div class="math notranslate nohighlight">
-\[
-\mathcal{L}_{DM} = \mathbb{E}_{x, 
-\epsilon \sim \mathcal{N}(0,1),t} \left[ \|\epsilon-\epsilon_{\theta}(x_t,t) \|_{2}^{2}\right].
-\]</div>
-<p>와 같은 loss function으로 학습을 진행시키는데 <span class="math notranslate nohighlight">\(x_t\)</span>를 <span class="math notranslate nohighlight">\(z_t\)</span>로 바꾸면서 연산의 양을 줄였다는 점이다.</p>
-<p>위의 “Introduction”과 연관지으면 다음과 같이 생각 해볼 수 있다.</p>
-<ul class="simple">
-<li><p>encoder <span class="math notranslate nohighlight">\(\varepsilon\)</span>, decoder <span class="math notranslate nohighlight">\(D\)</span> 로 구성된 perceptual compression model(학습된)를 바탕으로 효율적인, 낮은 차원의 latent space(high-frequency, imperceptible detail이 제거된)를 가지게 됨</p></li>
-<li><p>고차원 공간과 비교했을 때 이 latent space는 likelihood-based generative model에 더 적합함</p>
-<ul>
-<li><p>데이터에 중요한 semantic bit에 집중할 수 있고 더 낮은 차원에서 효율적으로 학습 가능</p></li>
-</ul>
-</li>
-</ul>
-</section>
-<section class="tex2jax_ignore mathjax_ignore" id="experiments">
-<h1>Experiments<a class="headerlink" href="#experiments" title="Permalink to this heading">#</a></h1>
-<section id="on-perceptual-compression-tradeoffs">
-<h2>4.1. On Perceptual Compression Tradeoffs<a class="headerlink" href="#on-perceptual-compression-tradeoffs" title="Permalink to this heading">#</a></h2>
-<ul class="simple">
-<li><p>different downsampling factor <span class="math notranslate nohighlight">\(f \in \{1,2,4,8,16,32\}\)</span> 에 대한 분석</p>
-<ul>
-<li><p>latent space 차원 개수에 따른 차이 분석</p></li>
-<li><p>LDM-1 : pixel-based DMs</p></li>
-</ul>
-</li>
-<li><p>computational resource : single NVIDIA A100 1대로 함</p></li>
-<li><p>전체 같은 parameter 개수와 step을 기준으로 함</p></li>
-</ul>
-<ul class="simple">
-<li><p>위의 표는 autoencoder 차원에 따른 hyperparameter, loss에 따른 실험 결과를 보임</p></li>
-<li><p>위의 그래프는 모델이 수렴하는데 소요되는 step을 분석함</p></li>
-<li><p><span class="math notranslate nohighlight">\(f\)</span> 값이 너무 크면 약간의 학습 후 fidelty(학습 데이터와 실제 데이터의 유사도)가 정체된 것을 볼 수 있음</p>
-<ul>
-<li><p>이는 아래 사진의 결과와 perceptual과 bit ratio 부분을 생각해보면 대부분의 perceptual compression은 diffusion에 있으며 encoder와 decoder 부분에서 압축이 지나치게 되어 정보 손실이 발생하여 달성할 수 있는 품질이 제한됨</p></li>
-</ul>
-</li>
-</ul>
-<ul class="simple">
-<li><p>위의 그래프는 CelebA-HQ와 ImageNet에서 학습된 모델을 DDIM sampler를 사용하여 다양한 step의 denoising에 대한 샘플링 속도 측면에서 FID 점수와 비교함</p>
-<ul>
-<li><p>ImageNet이 CelebA-HQ 대비 더 복잡한 dataset임</p></li>
-</ul>
-</li>
-<li><p>point가 총 5개인데 오른쪽부터 왼쪽으로 step 수가 {10,20,50,100,200}을 의미함</p></li>
-<li><p>LDM-{4-8}이 outperform을 발휘함</p></li>
-<li><p>LDM-1은 낮은 FID를 가지고 sampling 속도도 느림</p></li>
-</ul>
-</section>
-<section id="image-generation-with-latent-diffusion">
-<h2>4.2. Image Generation with Latent Diffusion<a class="headerlink" href="#image-generation-with-latent-diffusion" title="Permalink to this heading">#</a></h2>
-<ul class="simple">
-<li><p>256x256 image의 unconditional model을 CelebA-HQ, FFHQ, LSUN-Churches, Bedrooms로 학습하고 sample quality, data manifold의 coverage(FID, Precision-and-Recall) 평가</p></li>
-</ul>
-<ul class="simple">
-<li><p>CelebA-HQ</p>
-<ul>
-<li><p>제안한 모델이 최고 성능</p>
-<ul>
-<li><p>LSGM : encoder, decoder를 UNet과 동시에 학습시킨 결과</p></li>
-</ul>
-</li>
-</ul>
-</li>
-<li><p>LSUN-Bedrooms : ADM보다 절반의 parameter와 짧은 train 시간으로 유사한 score(?)를 얻음</p></li>
-<li><p>제안한 방법인 LDM은 GAN-based methods을 지속적으로 개선해 adversarial 방식에 비해 mode-covering likelihood-based training의 이점을 Precision과 Recall을 통해 볼 수 있음. (아래 그림은 정성적 결과)</p></li>
-</ul>
-</section>
-<section id="conditional-latent-diffusion">
-<h2>4.3. Conditional Latent Diffusion<a class="headerlink" href="#conditional-latent-diffusion" title="Permalink to this heading">#</a></h2>
-<section id="transformer-encoders-for-ldms">
-<h3>4.3.1 Transformer Encoders for LDMs<a class="headerlink" href="#transformer-encoders-for-ldms" title="Permalink to this heading">#</a></h3>
-<ul class="simple">
-<li><p>text-to-image modeling : 1.45B parameter KL-regularized LDM</p>
-<ul>
-<li><p>LAION-400M으로 language prompt 수행</p></li>
-<li><p>BERT-tokenizer</p></li>
-<li><p><span class="math notranslate nohighlight">\(\tau_{\theta}\)</span> : transformer</p></li>
-</ul>
-</li>
-</ul>
-<figure class="align-default" id="id3">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/text_to_image.png"><img alt="../../_images/text_to_image.png" class="bg-primary mb-1" src="../../_images/text_to_image.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 150 </span><span class="caption-text">text to image on LAION</span><a class="headerlink" href="#id3" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p>아래 사진은 Layout이 주어졌을 때, 이를 기반으로 image를 생성하는 layout-to-image의 샘플 결과임</p></li>
-</ul>
-<figure class="align-default" id="id4">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/layout_to_image.png"><img alt="../../_images/layout_to_image.png" class="bg-primary mb-1" src="../../_images/layout_to_image.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 151 </span><span class="caption-text">layout-to-image</span><a class="headerlink" href="#id4" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p>class-conditional ImageNet model 결과</p></li>
-</ul>
-</section>
-<section id="convolutional-sampling-beyond-256x256">
-<h3>4.3.2 Convolutional Sampling Beyond 256x256<a class="headerlink" href="#convolutional-sampling-beyond-256x256" title="Permalink to this heading">#</a></h3>
-<ul class="simple">
-<li><p>spatially aligned conditioning information(semantic map 등)을 input에 연결했을 때 image-to-image translation model 목적 수행 가능</p>
-<ul>
-<li><p>semantic synthesis, super-resolution, inpainting에 실험</p></li>
-</ul>
-</li>
-<li><p>semantic synthesis</p>
-<ul>
-<li><p>dataset : semantic map을 가진 landscape image</p></li>
-<li><p>semantic map의 downsampled version을 <span class="math notranslate nohighlight">\(f=4\)</span> 인 model의 input으로 사용(VQ-reg)</p>
-<ul>
-<li><p>input resolution : 256x256(crop from 384x384)</p></li>
-</ul>
-</li>
-</ul>
-</li>
-<li><p>아래 실험은 input 크기 대비 더 큰 image 생성 가능한 것을 보여줌</p></li>
-</ul>
-</section>
-</section>
-<section id="super-resolution-with-latent-diffusion">
-<h2>4.4. Super-Resolution with Latent Diffusion<a class="headerlink" href="#super-resolution-with-latent-diffusion" title="Permalink to this heading">#</a></h2>
-<ul class="simple">
-<li><p>OpenImages로 사전학습된 <span class="math notranslate nohighlight">\(f=4\)</span> autoencoding model(VQ-reg) 사용</p></li>
-<li><p>low-resolution data가 input으로 들어감</p></li>
-</ul>
-</section>
-<section id="inpainting-with-latent-diffusion">
-<h2>4.5. Inpainting with Latent Diffusion<a class="headerlink" href="#inpainting-with-latent-diffusion" title="Permalink to this heading">#</a></h2>
-<ul class="simple">
-<li><p>원본 이미지에서 원하는 물체를 제거한 뒤 이미지가 잘 복구된 것을 볼 수 있음</p></li>
-</ul>
-</section>
-</section>
-
-    <script type="text/x-thebe-config">
-    {
-        requestKernel: true,
-        binderOptions: {
-            repo: "binder-examples/jupyter-stacks-datascience",
-            ref: "master",
-        },
-        codeMirrorConfig: {
-            theme: "abcdef",
-            mode: "python"
-        },
-        kernelOptions: {
-            name: "python3",
-            path: "./docs/review"
-        },
-        predefinedOutput: true
-    }
-    </script>
-    <script>kernelName = 'python3'</script>
-
-                </article>
-              
-
-              
-              
-              
-              
-                <footer class="prev-next-footer d-print-none">
-                  
-<div class="prev-next-area">
-    <a class="left-prev"
-       href="ControlNet.html"
-       title="previous page">
-      <i class="fa-solid fa-angle-left"></i>
-      <div class="prev-next-info">
-        <p class="prev-next-subtitle">previous</p>
-        <p class="prev-next-title">ControlNet</p>
-      </div>
-    </a>
-    <a class="right-next"
-       href="Textual_Inversion.html"
-       title="next page">
-      <div class="prev-next-info">
-        <p class="prev-next-subtitle">next</p>
-        <p class="prev-next-title">Textual Inversion</p>
-      </div>
-      <i class="fa-solid fa-angle-right"></i>
-    </a>
-</div>
-                </footer>
-              
-            </div>
-            
-            
-              
-                <div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
-
-
-  <div class="sidebar-secondary-item">
-  <div class="page-toc tocsection onthispage">
-    <i class="fa-solid fa-list"></i> Contents
-  </div>
-  <nav class="bd-toc-nav page-toc">
-    <ul class="visible nav section-nav flex-column">
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#">Introduction</a></li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#related-work">Related Work</a></li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#latent-diffusion-model">Latent Diffusion Model</a></li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#experiments">Experiments</a><ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#on-perceptual-compression-tradeoffs">4.1. On Perceptual Compression Tradeoffs</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#image-generation-with-latent-diffusion">4.2. Image Generation with Latent Diffusion</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#conditional-latent-diffusion">4.3. Conditional Latent Diffusion</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#transformer-encoders-for-ldms">4.3.1 Transformer Encoders for LDMs</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#convolutional-sampling-beyond-256x256">4.3.2 Convolutional Sampling Beyond 256x256</a></li>
-</ul>
-</li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#super-resolution-with-latent-diffusion">4.4. Super-Resolution with Latent Diffusion</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#inpainting-with-latent-diffusion">4.5. Inpainting with Latent Diffusion</a></li>
-</ul>
-</li>
-</ul>
-
-  </nav></div>
-
-</div></div>
-              
-            
-          </div>
-          <footer class="bd-footer-content">
-            
-<div class="bd-footer-content__inner container">
-  
-  <div class="footer-item">
-    
-<p class="component-author">
-By PseudoLab
-</p>
-
-  </div>
-  
-  <div class="footer-item">
-    
-
-  <p class="copyright">
-    
-      © Copyright 2022.
-      <br/>
-    
-  </p>
-
-  </div>
-  
-  <div class="footer-item">
-    
-  </div>
-  
-  <div class="footer-item">
-    
-  </div>
-  
-</div>
-          </footer>
-        
-
-      </main>
-    </div>
-  </div>
-  
-  <!-- Scripts loaded after <body> so the DOM is not blocked -->
-  <script src="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b"></script>
-<script src="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b"></script>
-
-  <footer class="bd-footer">
-  </footer>
-  </body>
+
+
+<!DOCTYPE html>
+
+
+<html lang="en" data-content_root="" >
+
+  <head>
+    <meta charset="utf-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.17.1: http://docutils.sourceforge.net/" />
+
+    <title>Introduction &#8212; Text-to-Image Generation-feat-Diffusion</title>
+  
+  
+  
+  <script data-cfasync="false">
+    document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
+    document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
+  </script>
+  
+  <!-- Loaded before other Sphinx assets -->
+  <link href="../../_static/styles/theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+<link href="../../_static/styles/bootstrap.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+<link href="../../_static/styles/pydata-sphinx-theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+
+  
+  <link href="../../_static/vendor/fontawesome/6.5.2/css/all.min.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+  <link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.woff2" />
+<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.woff2" />
+<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.woff2" />
+
+    <link rel="stylesheet" type="text/css" href="../../_static/pygments.css" />
+    <link rel="stylesheet" href="../../_static/styles/sphinx-book-theme.css?digest=14f4ca6b54d191a8c7657f6c759bf11a5fb86285" type="text/css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/togglebutton.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/mystnb.4510f1fc1dee50b3e5859aac5469c37c29e427902b24a333a5f9fcb2f0b3ac41.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/sphinx-thebe.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/design-style.4045f2051d55cab465a707391d5b2007.min.css" />
+  
+  <!-- Pre-loaded scripts that we'll load fully later -->
+  <link rel="preload" as="script" href="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b" />
+<link rel="preload" as="script" href="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b" />
+  <script src="../../_static/vendor/fontawesome/6.5.2/js/all.min.js?digest=dfe6caa3a7d634c4db9b"></script>
+
+    <script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
+    <script src="../../_static/jquery.js"></script>
+    <script src="../../_static/underscore.js"></script>
+    <script src="../../_static/_sphinx_javascript_frameworks_compat.js"></script>
+    <script src="../../_static/doctools.js"></script>
+    <script src="../../_static/clipboard.min.js"></script>
+    <script src="../../_static/copybutton.js"></script>
+    <script src="../../_static/scripts/sphinx-book-theme.js?digest=5a5c038af52cf7bc1a1ec88eea08e6366ee68824"></script>
+    <script>let toggleHintShow = 'Click to show';</script>
+    <script>let toggleHintHide = 'Click to hide';</script>
+    <script>let toggleOpenOnPrint = 'true';</script>
+    <script src="../../_static/togglebutton.js"></script>
+    <script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
+    <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
+    <script src="../../_static/design-tabs.js"></script>
+    <script>const THEBE_JS_URL = "https://unpkg.com/thebe@0.8.2/lib/index.js"
+const thebe_selector = ".thebe,.cell"
+const thebe_selector_input = "pre"
+const thebe_selector_output = ".output, .cell_output"
+</script>
+    <script async="async" src="../../_static/sphinx-thebe.js"></script>
+    <script>window.MathJax = {"options": {"processHtmlClass": "tex2jax_process|mathjax_process|math|output_area"}}</script>
+    <script defer="defer" src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
+    <script>DOCUMENTATION_OPTIONS.pagename = 'docs/review/Latent_Diffusion_Model';</script>
+    <link rel="shortcut icon" href="../../_static/PseudoLab_logo.png"/>
+    <link rel="index" title="Index" href="../../genindex.html" />
+    <link rel="search" title="Search" href="../../search.html" />
+    <link rel="next" title="Textual Inversion" href="Textual_Inversion.html" />
+    <link rel="prev" title="ControlNet" href="ControlNet.html" />
+  <meta name="viewport" content="width=device-width, initial-scale=1"/>
+  <meta name="docsearch:language" content="en"/>
+  </head>
+  
+  
+  <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
+
+  
+  
+  <div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
+  
+  <div id="pst-scroll-pixel-helper"></div>
+  
+  <button type="button" class="btn rounded-pill" id="pst-back-to-top">
+    <i class="fa-solid fa-arrow-up"></i>Back to top</button>
+
+  
+  <input type="checkbox"
+          class="sidebar-toggle"
+          id="pst-primary-sidebar-checkbox"/>
+  <label class="overlay overlay-primary" for="pst-primary-sidebar-checkbox"></label>
+  
+  <input type="checkbox"
+          class="sidebar-toggle"
+          id="pst-secondary-sidebar-checkbox"/>
+  <label class="overlay overlay-secondary" for="pst-secondary-sidebar-checkbox"></label>
+  
+  <div class="search-button__wrapper">
+    <div class="search-button__overlay"></div>
+    <div class="search-button__search-container">
+<form class="bd-search d-flex align-items-center"
+      action="../../search.html"
+      method="get">
+  <i class="fa-solid fa-magnifying-glass"></i>
+  <input type="search"
+         class="form-control"
+         name="q"
+         id="search-input"
+         placeholder="Search this book..."
+         aria-label="Search this book..."
+         autocomplete="off"
+         autocorrect="off"
+         autocapitalize="off"
+         spellcheck="false"/>
+  <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
+</form></div>
+  </div>
+
+  <div class="pst-async-banner-revealer d-none">
+  <aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
+</div>
+
+  
+    <header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
+    </header>
+  
+
+  <div class="bd-container">
+    <div class="bd-container__inner bd-page-width">
+      
+      
+      
+      <div class="bd-sidebar-primary bd-sidebar">
+        
+
+  
+  <div class="sidebar-header-items sidebar-primary__section">
+    
+    
+    
+    
+  </div>
+  
+    <div class="sidebar-primary-items__start sidebar-primary__section">
+        <div class="sidebar-primary-item">
+
+  
+    
+  
+
+<a class="navbar-brand logo" href="../../intro.html">
+  
+  
+  
+  
+  
+    
+    
+      
+    
+    
+    <img src="../../_static/PseudoLab_logo.png" class="logo__image only-light" alt="Text-to-Image Generation-feat-Diffusion - Home"/>
+    <script>document.write(`<img src="../../_static/PseudoLab_logo.png" class="logo__image only-dark" alt="Text-to-Image Generation-feat-Diffusion - Home"/>`);</script>
+  
+  
+</a></div>
+        <div class="sidebar-primary-item"><nav class="bd-links" id="bd-docs-nav" aria-label="Main">
+    <div class="bd-toc-item navbar-nav active">
+        
+        <ul class="nav bd-sidenav bd-sidenav__home-link">
+            <li class="toctree-l1">
+                <a class="reference internal" href="../../intro.html">
+                    Welcome to PseudoDiffusers!!
+                </a>
+            </li>
+        </ul>
+        <p aria-level="2" class="caption" role="heading"><span class="caption-text">Preliminary Works</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="vae.html">VAE</a></li>
+<li class="toctree-l1"><a class="reference internal" href="gan.html">GAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DDPM.html">DDPM</a></li>
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DDIM.html">DDIM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="A_Study_on_the_Evaluation_of_Generative_Models.html">A Study on the Evaluation of Generative Models</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Image Generation</span></p>
+<ul class="current nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="cycleGAN.html">CycleGAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="StyleGAN.html">StyleGAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="diffusion_beats_GANs.html">Diffusion Models Beat GANs on Image Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="dalle.html">DALL-E</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DALLE2.html">DALL-E 2</a></li>
+<li class="toctree-l1"><a class="reference internal" href="dreambooth.html">DreamBooth</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ControlNet.html">ControlNet</a></li>
+<li class="toctree-l1 current active"><a class="current reference internal" href="#">Introduction</a></li>
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Textual_Inversion.html">Textual Inversion</a></li>
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="CustomDiffusion.html">Custom Diffusion</a></li>
+<li class="toctree-l1"><a class="reference internal" href="LoRA.html">LoRA</a></li>
+
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="I-DDPM.html">I-DDPM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="StyO.html">StyO</a></li>
+<li class="toctree-l1"><a class="reference internal" href="imagen.html">Imagen</a></li>
+<li class="toctree-l1"><a class="reference internal" href="imagen_editor.html">Imagen Editor</a></li>
+<li class="toctree-l1"><a class="reference internal" href="SDEdit.html">SDEdit</a></li>
+<li class="toctree-l1"><a class="reference internal" href="SDXL.html">SDXL</a></li>
+<li class="toctree-l1"><a class="reference internal" href="t2i_adapter.html">T2I-Adapter</a></li>
+<li class="toctree-l1"><a class="reference internal" href="IP_Adapter.html">IP-Adapter</a></li>
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="HyperDreamBooth.html">HyperDreamBooth</a></li>
+<li class="toctree-l1"><a class="reference internal" href="CM3leon.html">CM3leon</a></li>
+
+<li class="toctree-l1"><a class="reference internal" href="Synthetic_Data_from_Diffusion_Models_Improves_ImageNet_Classification.html">Synthetic Data from Diffusion Models Improves ImageNet Classification</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="GLIDE.html">GLIDE</a></li>
+<li class="toctree-l1"><a class="reference internal" href="BBDM.html">BBDM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Your_Diffusion_Model_is_Secretly_a_Zero_Shot_Classifier.html">Your Diffusion Model is Secretly a Zero-Shot Classifier</a></li>
+<li class="toctree-l1"><a class="reference internal" href="progressive_distillation.html">Progressive Distillation for Fast Sampling of Diffusion Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ConceptLab.html">ConceptLab</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Diffusion_models_already_have_a_Semantic_Latent_Space.html">Diffusion Models already have a Semantic Latent Space</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Muse.html">Muse</a></li>
+
+
+<li class="toctree-l1"><a class="reference internal" href="GIGAGAN.html">Scaling up GANs for Text-to-Image Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="consistency_models.html">Consistency Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="latent_consistency_models.html">Latent Consistency Models</a></li>
+
+<li class="toctree-l1"><a class="reference internal" href="LLM_grounded_Diffusion.html">LLM Grounded Diffusion</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DiT.html">DiT</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="one-step-image-translation.html">One-Step Image Translation with Text-to-Image Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="LCM-LoRA.html">LCM-LoRA: A Universal Stable-Diffusion Acceleration Module</a></li>
+
+
+<li class="toctree-l1"><a class="reference internal" href="MimicBrush.html">MimicBrush: Zero-shot Image Editing with Reference Imitation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="one_step_diffusion_with_distribution_matching_distillation.html">One-step Diffusion with Distribution Matching Distillation</a></li>
+
+
+
+
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Video Generation</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="Make_A_Video.html">Make A Video</a></li>
+<li class="toctree-l1"><a class="reference internal" href="VideoLDM.html">VideoLDM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="AnimateDiff.html">AnimateDiff</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Animate_Anyone.html">Animate Anyone</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreaMoving.html">DreaMoving</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreamPose.html">DreamPose: Fashion Image-to-Video Synthesis via Stable Diffusion</a></li>
+
+
+
+
+
+
+
+
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">3D Generation</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="NeRF.html">NeRF : Representing Scenes as Neural Radiance Fields for View Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="3DGS.html">3D Gaussian Splatting for Real-Time Radiance Field Rendering</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Point_E.html">Point-E: A System for Generating 3D Point Clouds from Complex Prompts (Arxiv 2022)</a></li>
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Shap-E.html">Shap-E</a></li>
+
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DreamFusion.html"><strong>DreamFusion</strong></a></li>
+<li class="toctree-l1"><a class="reference internal" href="magic-3d.html">Magic3D</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreamBooth3D.html">Dream Booth 3D</a></li>
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="zero123plus.html">Zero123++</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ProlificDreamer.html">ProlificDreamer: High-Fidelity and Diverse Text-to-3D Generation with Variational Score Distillation</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DreamGaussian.html">DreamGaussian</a></li>
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Coin3D.html">Coin3D</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Experiments</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="../experiments/js_exp.html">Synthetic Data with Stable Diffusion for Foliar Disease Classification</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../experiments/swjo_exp.html">Training DreamBooth on Naver Webtoon Face Dataset</a></li>
+</ul>
+
+    </div>
+</nav></div>
+    </div>
+  
+  
+  <div class="sidebar-primary-items__end sidebar-primary__section">
+  </div>
+  
+  <div id="rtd-footer-container"></div>
+
+
+      </div>
+      
+      <main id="main-content" class="bd-main" role="main">
+        
+        
+
+<div class="sbt-scroll-pixel-helper"></div>
+
+          <div class="bd-content">
+            <div class="bd-article-container">
+              
+              <div class="bd-header-article d-print-none">
+<div class="header-article-items header-article__inner">
+  
+    <div class="header-article-items__start">
+      
+        <div class="header-article-item"><label class="sidebar-toggle primary-toggle btn btn-sm" for="__primary" title="Toggle primary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
+  <span class="fa-solid fa-bars"></span>
+</label></div>
+      
+    </div>
+  
+  
+    <div class="header-article-items__end">
+      
+        <div class="header-article-item">
+
+<div class="article-header-buttons">
+
+
+
+
+
+<div class="dropdown dropdown-source-buttons">
+  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Source repositories">
+    <i class="fab fa-github"></i>
+  </button>
+  <ul class="dropdown-menu">
+      
+      
+      
+      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion" target="_blank"
+   class="btn btn-sm btn-source-repository-button dropdown-item"
+   title="Source repository"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fab fa-github"></i>
+  </span>
+<span class="btn__text-container">Repository</span>
+</a>
+</li>
+      
+      
+      
+      
+      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion/issues/new?title=Issue%20on%20page%20%2Fdocs/review/Latent_Diffusion_Model.html&body=Your%20issue%20content%20here." target="_blank"
+   class="btn btn-sm btn-source-issues-button dropdown-item"
+   title="Open an issue"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-lightbulb"></i>
+  </span>
+<span class="btn__text-container">Open issue</span>
+</a>
+</li>
+      
+  </ul>
+</div>
+
+
+
+
+
+
+<div class="dropdown dropdown-download-buttons">
+  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Download this page">
+    <i class="fas fa-download"></i>
+  </button>
+  <ul class="dropdown-menu">
+      
+      
+      
+      <li><a href="../../_sources/docs/review/Latent_Diffusion_Model.md" target="_blank"
+   class="btn btn-sm btn-download-source-button dropdown-item"
+   title="Download source file"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-file"></i>
+  </span>
+<span class="btn__text-container">.md</span>
+</a>
+</li>
+      
+      
+      
+      
+      <li>
+<button onclick="window.print()"
+  class="btn btn-sm btn-download-pdf-button dropdown-item"
+  title="Print to PDF"
+  data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-file-pdf"></i>
+  </span>
+<span class="btn__text-container">.pdf</span>
+</button>
+</li>
+      
+  </ul>
+</div>
+
+
+
+
+<button onclick="toggleFullScreen()"
+  class="btn btn-sm btn-fullscreen-button"
+  title="Fullscreen mode"
+  data-bs-placement="bottom" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-expand"></i>
+  </span>
+
+</button>
+
+
+
+<script>
+document.write(`
+  <button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light"></i>
+    <i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark"></i>
+    <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"></i>
+  </button>
+`);
+</script>
+
+
+<script>
+document.write(`
+  <button class="btn btn-sm pst-navbar-icon search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <i class="fa-solid fa-magnifying-glass fa-lg"></i>
+  </button>
+`);
+</script>
+<label class="sidebar-toggle secondary-toggle btn btn-sm" for="__secondary"title="Toggle secondary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <span class="fa-solid fa-list"></span>
+</label>
+</div></div>
+      
+    </div>
+  
+</div>
+</div>
+              
+              
+
+<div id="jb-print-docs-body" class="onlyprint">
+    <h1>Introduction</h1>
+    <!-- Table of contents -->
+    <div id="print-main-content">
+        <div id="jb-print-toc">
+            
+            <div>
+                <h2> Contents </h2>
+            </div>
+            <nav aria-label="Page">
+                <ul class="visible nav section-nav flex-column">
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#">Introduction</a></li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#related-work">Related Work</a></li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#latent-diffusion-model">Latent Diffusion Model</a></li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#experiments">Experiments</a><ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#on-perceptual-compression-tradeoffs">4.1. On Perceptual Compression Tradeoffs</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#image-generation-with-latent-diffusion">4.2. Image Generation with Latent Diffusion</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#conditional-latent-diffusion">4.3. Conditional Latent Diffusion</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#transformer-encoders-for-ldms">4.3.1 Transformer Encoders for LDMs</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#convolutional-sampling-beyond-256x256">4.3.2 Convolutional Sampling Beyond 256x256</a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#super-resolution-with-latent-diffusion">4.4. Super-Resolution with Latent Diffusion</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#inpainting-with-latent-diffusion">4.5. Inpainting with Latent Diffusion</a></li>
+</ul>
+</li>
+</ul>
+
+            </nav>
+        </div>
+    </div>
+</div>
+
+              
+                
+<div id="searchbox"></div>
+                <article class="bd-article">
+                  
+  <div class="admonition-information admonition">
+<p class="admonition-title">Information</p>
+<ul class="simple">
+<li><p><strong>Title:</strong> High-Resolution Image Synthesis with Latent Diffusion Models (CVPR 2022)</p></li>
+<li><p><strong>Reference</strong></p>
+<ul>
+<li><p>Paper: <a class="reference external" href="https://arxiv.org/abs/2112.10752">https://arxiv.org/abs/2112.10752</a></p></li>
+<li><p>Code: <a class="github reference external" href="https://github.com/CompVis/latent-diffusion">CompVis/latent-diffusion</a></p></li>
+</ul>
+</li>
+<li><p><strong>Author:</strong> Namkyeong Cho</p></li>
+<li><p><strong>Last updated on May. 31, 2023</strong></p></li>
+</ul>
+</div>
+<section class="tex2jax_ignore mathjax_ignore" id="introduction">
+<h1>Introduction<a class="headerlink" href="#introduction" title="Permalink to this heading">#</a></h1>
+<p>latent diffusion model이 어떻게 학습하는지 rate-distortion trade-off로 분석할 수 있다.</p>
+<figure class="align-default" id="id1">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/Latent_Space.png"><img alt="../../_images/Latent_Space.png" class="bg-primary mb-1" src="../../_images/Latent_Space.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 148 </span><span class="caption-text">Analysis of Latent Diffusion Model with rate-distortion trade-off</span><a class="headerlink" href="#id1" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>학습 단계는 크게 2단계로 나눌 수 있다.</p>
+<ul class="simple">
+<li><p>Perceptual Compression</p>
+<ul>
+<li><p>Autoencoder를 학습하는 단계</p></li>
+<li><p>perceptual : 인간의 인지와 관련된 것. 예를 들면 고양이 하면 고양이 귀, 꼬리 등 우리가 인지한 객체.</p>
+<ul>
+<li><p>압축 과정에서 일정 부분을 넘어가면 비트를 많이 써도 사람이 인지할 수 없는 것에 대해 사용됨. (Rate가 0.5 이후 넘어가는 것을 보면 다른 것이 인지 안됨)</p>
+<ul>
+<li><p>비트를 많이 씀 : 1비트(흑,백)로 표현하는 것을 8비트 등으로 표현 ⇒ 다양한 색상을 사용하여 색, 텍스쳐 등을 다양하게 표현할 수 있음. 이는 파라미터 수의 증가 등 모델 사이즈의 증가로 볼 수 있음.</p></li>
+</ul>
+</li>
+<li><p>Autoencoder를 통해 이미지 차원을 압축하여 사용할 비트를 줄임으로써 불필요한(사람이 인지할 수 없는) 것을 학습하지 않도록 함.</p></li>
+</ul>
+</li>
+</ul>
+</li>
+<li><p>Semantic Compression</p>
+<ul>
+<li><p>Diffusion model을 학습하는 단계로 이미지의 의미와 문맥을 이해하고 이를 바탕으로 압축하는 것을 말함. 예를 들어, 이미지 속의 개가 어디에 위치하는지, 배경이 무엇인지, 어떤 상황인지 등을 학습함.</p></li>
+</ul>
+</li>
+</ul>
+</section>
+<section class="tex2jax_ignore mathjax_ignore" id="related-work">
+<h1>Related Work<a class="headerlink" href="#related-work" title="Permalink to this heading">#</a></h1>
+<p>생성 모델의 기존 모델은 다음과 같다.</p>
+<ul class="simple">
+<li><p>GAN : 좋은 perceptual quality를 가진 고해상도 image의 sampling을 효율적으로 수행하나 최적화나 full data distribution을 찾기 어렵다.</p>
+<ul>
+<li><p>학습 불안정성(생성자, 판별자 기반의 손실함수), Mode Collapse(제한된 유형의 샘플만 생성하는 현상) 등 발생.</p></li>
+</ul>
+</li>
+<li><p>VAE, flow-based model : 고해상도 image의 효율적인 합성을 수행할 수 있으나 GAN보다 sample quality가 떨어진다.</p>
+<ul>
+<li><p>flow-based model : Flow는 복잡한 분포를 모델링하기 위해 일련의 가역적인 비선형 변환을 사용하는 방법론.</p>
+<ul>
+<li><p>기본 아이디어 : 간단한 분포(예: 가우시안 분포)에서 복잡한 데이터 분포로 변환하는 일련의 함수들을 학습하는 것.</p></li>
+<li><p>data를 잠재변수로 변환하는, 반대로 잠재변수를 data로 변환하는(역변환) 양방향 변환기를 학습시켜 잠재변수로부터 data를 generative 할 수 있도록 함.</p></li>
+</ul>
+</li>
+</ul>
+</li>
+<li><p>Autoregressive : density 추정에서 강력한 performance를 달성하나 계산 비용과 sequential sampling process로 인해 저해상도 image로 제약된다.</p></li>
+<li><p>pixel 기반의 image representation은 거의 인지하기 어려운 이미지의 detail한 부분을 포함하여 maximum-likelihood 학습은 이를 학습하는데 많은 용량과 시간을 소비한다.</p></li>
+<li><p>Diffusion Probabilistic Models : sample quality 등에서 좋은 성능을 보이나 pixel 단위에서의 평가와 최적화는 낮은 inference speed와 높은 train cost를 유발한다.</p></li>
+<li><p>VQ-VAEs : 기존 VAE 방법에 벡터 양자화(Vector Quantisation)를 도입한 방법이다. latent space의 연속형 벡터를 사전에 정한 codebook 벡터와의 거리를 계산하여 제일 유사한 값으로 변환한다.</p>
+<ul>
+<li><p>데이터 압축, 샘플링이 유용하면서 생성 모델의 품질을 높일 수도 있음.</p></li>
+</ul>
+</li>
+<li><p>VQ-GAN : VQ-VAE의 원리를 GAN에 적용한 방법이다. latent space의 연속형 벡터를 양자화하여 변환하면서 학습은 GAN의 adversarial loss를 추가로 활용한다.</p>
+<ul>
+<li><p>데이터 압축, 샘플링이 유용하면서 생성 모델의 품질을 높일 수도 있음.</p></li>
+</ul>
+</li>
+</ul>
+</section>
+<section class="tex2jax_ignore mathjax_ignore" id="latent-diffusion-model">
+<h1>Latent Diffusion Model<a class="headerlink" href="#latent-diffusion-model" title="Permalink to this heading">#</a></h1>
+<p>오늘 알아볼 모델은 Latent Diffusion Model이다.
+기존에 다뤘던 Diffusion Model과 유사하게 동작하는 생성 모델이다. 이 논문에서는 컴퓨터 자원의 소모를 줄이면서 Diffusion Model과 유사한 성능을 얻는것이 그 목표이다.</p>
+<p>Latent Diffusion Model은 전반적으로 아래와 같은 구조를 가진다.</p>
+<figure class="align-default" id="id2">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/Unet.png"><img alt="../../_images/Unet.png" class="bg-primary mb-1" src="../../_images/Unet.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 149 </span><span class="caption-text">Structure of Latent Diffusion Model</span><a class="headerlink" href="#id2" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p><span class="math notranslate nohighlight">\(x \in \mathbb{R}^{H\times W \times 3}\)</span>이 input으로 주어졌을때 이를 encoder <span class="math notranslate nohighlight">\(\mathcal{E}\)</span>를 통해서 <span class="math notranslate nohighlight">\(z=\mathcal{E}(x) \in \mathbb{R}^{h\times w\times c }\)</span>로 인코딩 하고 <span class="math notranslate nohighlight">\(\hat{x}=\mathcal{D}(z)\)</span>
+로 디코딩을 한다. 이 논문에서 <span class="math notranslate nohighlight">\(f=H/h=W/w=2^m\)</span>, <span class="math notranslate nohighlight">\(m\in \mathbb{N}\)</span>이 되도록 여러 <span class="math notranslate nohighlight">\(m\)</span>에 대해서 테스트를 진행하였다. 또한 Latent space에서 분산이 커지지 않도록 KL divergence와 vector quantization(VQ)을 활용하였다.
+이미지외 텍스트나, sematic map과 같이 추가적인 정보는 <span class="math notranslate nohighlight">\(\tau_\theta\)</span>를 통해서 전달을 하였고,</p>
+<div class="math notranslate nohighlight">
+\[  Q=W^{(i)}_Q \phi_i(z_i), K=W^{(i)}_K \phi_i(z_i), V=W^{(i)}_V \phi_i(z_i) \]</div>
+<p>로 정의되고 <span class="math notranslate nohighlight">\(\phi_i(z_i)\)</span>는 <span class="math notranslate nohighlight">\(U\)</span>-Net 중간의 representation, <span class="math notranslate nohighlight">\(W^{i}_V, W^{i}_K, W^{i}_Q\)</span>는 학습 가능한 projection matrix이다.
+<span class="math notranslate nohighlight">\(Q, K, V\)</span> 는 attention의 query, key, value에 해당하며</p>
+<div class="math notranslate nohighlight">
+\[
+Attention(Q, K, V) = softmax(\frac{QK^T}{\sqrt{d}})\cdot V
+\]</div>
+<p>로 연산이 진행된다. 학습을 위한 loss 함수는 다음과 같이 표현된다.</p>
+<div class="math notranslate nohighlight">
+\[
+\mathcal{L}_{LDM} = \mathbb{E}_{\mathcal{E}(x), 
+\epsilon \sim \mathcal{N}(0,1),t} \left[ \|\epsilon-\epsilon_{\theta}(z_t,t) \|_{2}^{2}\right].
+\]</div>
+<p>여기서 주목할만한 부분은 기존 Diffusion Model에서</p>
+<div class="math notranslate nohighlight">
+\[
+\mathcal{L}_{DM} = \mathbb{E}_{x, 
+\epsilon \sim \mathcal{N}(0,1),t} \left[ \|\epsilon-\epsilon_{\theta}(x_t,t) \|_{2}^{2}\right].
+\]</div>
+<p>와 같은 loss function으로 학습을 진행시키는데 <span class="math notranslate nohighlight">\(x_t\)</span>를 <span class="math notranslate nohighlight">\(z_t\)</span>로 바꾸면서 연산의 양을 줄였다는 점이다.</p>
+<p>위의 “Introduction”과 연관지으면 다음과 같이 생각 해볼 수 있다.</p>
+<ul class="simple">
+<li><p>encoder <span class="math notranslate nohighlight">\(\varepsilon\)</span>, decoder <span class="math notranslate nohighlight">\(D\)</span> 로 구성된 perceptual compression model(학습된)를 바탕으로 효율적인, 낮은 차원의 latent space(high-frequency, imperceptible detail이 제거된)를 가지게 됨</p></li>
+<li><p>고차원 공간과 비교했을 때 이 latent space는 likelihood-based generative model에 더 적합함</p>
+<ul>
+<li><p>데이터에 중요한 semantic bit에 집중할 수 있고 더 낮은 차원에서 효율적으로 학습 가능</p></li>
+</ul>
+</li>
+</ul>
+</section>
+<section class="tex2jax_ignore mathjax_ignore" id="experiments">
+<h1>Experiments<a class="headerlink" href="#experiments" title="Permalink to this heading">#</a></h1>
+<section id="on-perceptual-compression-tradeoffs">
+<h2>4.1. On Perceptual Compression Tradeoffs<a class="headerlink" href="#on-perceptual-compression-tradeoffs" title="Permalink to this heading">#</a></h2>
+<ul class="simple">
+<li><p>different downsampling factor <span class="math notranslate nohighlight">\(f \in \{1,2,4,8,16,32\}\)</span> 에 대한 분석</p>
+<ul>
+<li><p>latent space 차원 개수에 따른 차이 분석</p></li>
+<li><p>LDM-1 : pixel-based DMs</p></li>
+</ul>
+</li>
+<li><p>computational resource : single NVIDIA A100 1대로 함</p></li>
+<li><p>전체 같은 parameter 개수와 step을 기준으로 함</p></li>
+</ul>
+<ul class="simple">
+<li><p>위의 표는 autoencoder 차원에 따른 hyperparameter, loss에 따른 실험 결과를 보임</p></li>
+<li><p>위의 그래프는 모델이 수렴하는데 소요되는 step을 분석함</p></li>
+<li><p><span class="math notranslate nohighlight">\(f\)</span> 값이 너무 크면 약간의 학습 후 fidelty(학습 데이터와 실제 데이터의 유사도)가 정체된 것을 볼 수 있음</p>
+<ul>
+<li><p>이는 아래 사진의 결과와 perceptual과 bit ratio 부분을 생각해보면 대부분의 perceptual compression은 diffusion에 있으며 encoder와 decoder 부분에서 압축이 지나치게 되어 정보 손실이 발생하여 달성할 수 있는 품질이 제한됨</p></li>
+</ul>
+</li>
+</ul>
+<ul class="simple">
+<li><p>위의 그래프는 CelebA-HQ와 ImageNet에서 학습된 모델을 DDIM sampler를 사용하여 다양한 step의 denoising에 대한 샘플링 속도 측면에서 FID 점수와 비교함</p>
+<ul>
+<li><p>ImageNet이 CelebA-HQ 대비 더 복잡한 dataset임</p></li>
+</ul>
+</li>
+<li><p>point가 총 5개인데 오른쪽부터 왼쪽으로 step 수가 {10,20,50,100,200}을 의미함</p></li>
+<li><p>LDM-{4-8}이 outperform을 발휘함</p></li>
+<li><p>LDM-1은 낮은 FID를 가지고 sampling 속도도 느림</p></li>
+</ul>
+</section>
+<section id="image-generation-with-latent-diffusion">
+<h2>4.2. Image Generation with Latent Diffusion<a class="headerlink" href="#image-generation-with-latent-diffusion" title="Permalink to this heading">#</a></h2>
+<ul class="simple">
+<li><p>256x256 image의 unconditional model을 CelebA-HQ, FFHQ, LSUN-Churches, Bedrooms로 학습하고 sample quality, data manifold의 coverage(FID, Precision-and-Recall) 평가</p></li>
+</ul>
+<ul class="simple">
+<li><p>CelebA-HQ</p>
+<ul>
+<li><p>제안한 모델이 최고 성능</p>
+<ul>
+<li><p>LSGM : encoder, decoder를 UNet과 동시에 학습시킨 결과</p></li>
+</ul>
+</li>
+</ul>
+</li>
+<li><p>LSUN-Bedrooms : ADM보다 절반의 parameter와 짧은 train 시간으로 유사한 score(?)를 얻음</p></li>
+<li><p>제안한 방법인 LDM은 GAN-based methods을 지속적으로 개선해 adversarial 방식에 비해 mode-covering likelihood-based training의 이점을 Precision과 Recall을 통해 볼 수 있음. (아래 그림은 정성적 결과)</p></li>
+</ul>
+</section>
+<section id="conditional-latent-diffusion">
+<h2>4.3. Conditional Latent Diffusion<a class="headerlink" href="#conditional-latent-diffusion" title="Permalink to this heading">#</a></h2>
+<section id="transformer-encoders-for-ldms">
+<h3>4.3.1 Transformer Encoders for LDMs<a class="headerlink" href="#transformer-encoders-for-ldms" title="Permalink to this heading">#</a></h3>
+<ul class="simple">
+<li><p>text-to-image modeling : 1.45B parameter KL-regularized LDM</p>
+<ul>
+<li><p>LAION-400M으로 language prompt 수행</p></li>
+<li><p>BERT-tokenizer</p></li>
+<li><p><span class="math notranslate nohighlight">\(\tau_{\theta}\)</span> : transformer</p></li>
+</ul>
+</li>
+</ul>
+<figure class="align-default" id="id3">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/text_to_image.png"><img alt="../../_images/text_to_image.png" class="bg-primary mb-1" src="../../_images/text_to_image.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 150 </span><span class="caption-text">text to image on LAION</span><a class="headerlink" href="#id3" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p>아래 사진은 Layout이 주어졌을 때, 이를 기반으로 image를 생성하는 layout-to-image의 샘플 결과임</p></li>
+</ul>
+<figure class="align-default" id="id4">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/layout_to_image.png"><img alt="../../_images/layout_to_image.png" class="bg-primary mb-1" src="../../_images/layout_to_image.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 151 </span><span class="caption-text">layout-to-image</span><a class="headerlink" href="#id4" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p>class-conditional ImageNet model 결과</p></li>
+</ul>
+</section>
+<section id="convolutional-sampling-beyond-256x256">
+<h3>4.3.2 Convolutional Sampling Beyond 256x256<a class="headerlink" href="#convolutional-sampling-beyond-256x256" title="Permalink to this heading">#</a></h3>
+<ul class="simple">
+<li><p>spatially aligned conditioning information(semantic map 등)을 input에 연결했을 때 image-to-image translation model 목적 수행 가능</p>
+<ul>
+<li><p>semantic synthesis, super-resolution, inpainting에 실험</p></li>
+</ul>
+</li>
+<li><p>semantic synthesis</p>
+<ul>
+<li><p>dataset : semantic map을 가진 landscape image</p></li>
+<li><p>semantic map의 downsampled version을 <span class="math notranslate nohighlight">\(f=4\)</span> 인 model의 input으로 사용(VQ-reg)</p>
+<ul>
+<li><p>input resolution : 256x256(crop from 384x384)</p></li>
+</ul>
+</li>
+</ul>
+</li>
+<li><p>아래 실험은 input 크기 대비 더 큰 image 생성 가능한 것을 보여줌</p></li>
+</ul>
+</section>
+</section>
+<section id="super-resolution-with-latent-diffusion">
+<h2>4.4. Super-Resolution with Latent Diffusion<a class="headerlink" href="#super-resolution-with-latent-diffusion" title="Permalink to this heading">#</a></h2>
+<ul class="simple">
+<li><p>OpenImages로 사전학습된 <span class="math notranslate nohighlight">\(f=4\)</span> autoencoding model(VQ-reg) 사용</p></li>
+<li><p>low-resolution data가 input으로 들어감</p></li>
+</ul>
+</section>
+<section id="inpainting-with-latent-diffusion">
+<h2>4.5. Inpainting with Latent Diffusion<a class="headerlink" href="#inpainting-with-latent-diffusion" title="Permalink to this heading">#</a></h2>
+<ul class="simple">
+<li><p>원본 이미지에서 원하는 물체를 제거한 뒤 이미지가 잘 복구된 것을 볼 수 있음</p></li>
+</ul>
+</section>
+</section>
+
+    <script type="text/x-thebe-config">
+    {
+        requestKernel: true,
+        binderOptions: {
+            repo: "binder-examples/jupyter-stacks-datascience",
+            ref: "master",
+        },
+        codeMirrorConfig: {
+            theme: "abcdef",
+            mode: "python"
+        },
+        kernelOptions: {
+            name: "python3",
+            path: "./docs\review"
+        },
+        predefinedOutput: true
+    }
+    </script>
+    <script>kernelName = 'python3'</script>
+
+                </article>
+              
+
+              
+              
+              
+              
+                <footer class="prev-next-footer d-print-none">
+                  
+<div class="prev-next-area">
+    <a class="left-prev"
+       href="ControlNet.html"
+       title="previous page">
+      <i class="fa-solid fa-angle-left"></i>
+      <div class="prev-next-info">
+        <p class="prev-next-subtitle">previous</p>
+        <p class="prev-next-title">ControlNet</p>
+      </div>
+    </a>
+    <a class="right-next"
+       href="Textual_Inversion.html"
+       title="next page">
+      <div class="prev-next-info">
+        <p class="prev-next-subtitle">next</p>
+        <p class="prev-next-title">Textual Inversion</p>
+      </div>
+      <i class="fa-solid fa-angle-right"></i>
+    </a>
+</div>
+                </footer>
+              
+            </div>
+            
+            
+              
+                <div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
+
+
+  <div class="sidebar-secondary-item">
+  <div class="page-toc tocsection onthispage">
+    <i class="fa-solid fa-list"></i> Contents
+  </div>
+  <nav class="bd-toc-nav page-toc">
+    <ul class="visible nav section-nav flex-column">
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#">Introduction</a></li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#related-work">Related Work</a></li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#latent-diffusion-model">Latent Diffusion Model</a></li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#experiments">Experiments</a><ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#on-perceptual-compression-tradeoffs">4.1. On Perceptual Compression Tradeoffs</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#image-generation-with-latent-diffusion">4.2. Image Generation with Latent Diffusion</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#conditional-latent-diffusion">4.3. Conditional Latent Diffusion</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#transformer-encoders-for-ldms">4.3.1 Transformer Encoders for LDMs</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#convolutional-sampling-beyond-256x256">4.3.2 Convolutional Sampling Beyond 256x256</a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#super-resolution-with-latent-diffusion">4.4. Super-Resolution with Latent Diffusion</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#inpainting-with-latent-diffusion">4.5. Inpainting with Latent Diffusion</a></li>
+</ul>
+</li>
+</ul>
+
+  </nav></div>
+
+</div></div>
+              
+            
+          </div>
+          <footer class="bd-footer-content">
+            
+<div class="bd-footer-content__inner container">
+  
+  <div class="footer-item">
+    
+<p class="component-author">
+By PseudoLab
+</p>
+
+  </div>
+  
+  <div class="footer-item">
+    
+
+  <p class="copyright">
+    
+      © Copyright 2022.
+      <br/>
+    
+  </p>
+
+  </div>
+  
+  <div class="footer-item">
+    
+  </div>
+  
+  <div class="footer-item">
+    
+  </div>
+  
+</div>
+          </footer>
+        
+
+      </main>
+    </div>
+  </div>
+  
+  <!-- Scripts loaded after <body> so the DOM is not blocked -->
+  <script src="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b"></script>
+<script src="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b"></script>
+
+  <footer class="bd-footer">
+  </footer>
+  </body>
 </html>
\ No newline at end of file
diff --git a/docs/review/LoRA.html b/docs/review/LoRA.html
old mode 100644
new mode 100755
index 5748ae78..44118528
--- a/docs/review/LoRA.html
+++ b/docs/review/LoRA.html
@@ -1,986 +1,1006 @@
-
-
-<!DOCTYPE html>
-
-
-<html lang="en" data-content_root="" >
-
-  <head>
-    <meta charset="utf-8" />
-    <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.18.1: http://docutils.sourceforge.net/" />
-
-    <title>LoRA &#8212; Text-to-Image Generation-feat-Diffusion</title>
-  
-  
-  
-  <script data-cfasync="false">
-    document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
-    document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
-  </script>
-  
-  <!-- Loaded before other Sphinx assets -->
-  <link href="../../_static/styles/theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-<link href="../../_static/styles/bootstrap.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-<link href="../../_static/styles/pydata-sphinx-theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-
-  
-  <link href="../../_static/vendor/fontawesome/6.5.2/css/all.min.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-  <link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.woff2" />
-<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.woff2" />
-<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.woff2" />
-
-    <link rel="stylesheet" type="text/css" href="../../_static/pygments.css" />
-    <link rel="stylesheet" href="../../_static/styles/sphinx-book-theme.css?digest=14f4ca6b54d191a8c7657f6c759bf11a5fb86285" type="text/css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/togglebutton.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/mystnb.4510f1fc1dee50b3e5859aac5469c37c29e427902b24a333a5f9fcb2f0b3ac41.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/sphinx-thebe.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/design-style.4045f2051d55cab465a707391d5b2007.min.css" />
-  
-  <!-- Pre-loaded scripts that we'll load fully later -->
-  <link rel="preload" as="script" href="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b" />
-<link rel="preload" as="script" href="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b" />
-  <script src="../../_static/vendor/fontawesome/6.5.2/js/all.min.js?digest=dfe6caa3a7d634c4db9b"></script>
-
-    <script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
-    <script src="../../_static/jquery.js"></script>
-    <script src="../../_static/underscore.js"></script>
-    <script src="../../_static/_sphinx_javascript_frameworks_compat.js"></script>
-    <script src="../../_static/doctools.js"></script>
-    <script src="../../_static/clipboard.min.js"></script>
-    <script src="../../_static/copybutton.js"></script>
-    <script src="../../_static/scripts/sphinx-book-theme.js?digest=5a5c038af52cf7bc1a1ec88eea08e6366ee68824"></script>
-    <script>let toggleHintShow = 'Click to show';</script>
-    <script>let toggleHintHide = 'Click to hide';</script>
-    <script>let toggleOpenOnPrint = 'true';</script>
-    <script src="../../_static/togglebutton.js"></script>
-    <script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
-    <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
-    <script src="../../_static/design-tabs.js"></script>
-    <script>const THEBE_JS_URL = "https://unpkg.com/thebe@0.8.2/lib/index.js"
-const thebe_selector = ".thebe,.cell"
-const thebe_selector_input = "pre"
-const thebe_selector_output = ".output, .cell_output"
-</script>
-    <script async="async" src="../../_static/sphinx-thebe.js"></script>
-    <script>window.MathJax = {"options": {"processHtmlClass": "tex2jax_process|mathjax_process|math|output_area"}}</script>
-    <script defer="defer" src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
-    <script>DOCUMENTATION_OPTIONS.pagename = 'docs/review/LoRA';</script>
-    <link rel="shortcut icon" href="../../_static/PseudoLab_logo.png"/>
-    <link rel="index" title="Index" href="../../genindex.html" />
-    <link rel="search" title="Search" href="../../search.html" />
-    <link rel="next" title="I-DDPM" href="I-DDPM.html" />
-    <link rel="prev" title="Custom Diffusion" href="CustomDiffusion.html" />
-  <meta name="viewport" content="width=device-width, initial-scale=1"/>
-  <meta name="docsearch:language" content="en"/>
-  </head>
-  
-  
-  <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
-
-  
-  
-  <div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
-  
-  <div id="pst-scroll-pixel-helper"></div>
-  
-  <button type="button" class="btn rounded-pill" id="pst-back-to-top">
-    <i class="fa-solid fa-arrow-up"></i>Back to top</button>
-
-  
-  <input type="checkbox"
-          class="sidebar-toggle"
-          id="pst-primary-sidebar-checkbox"/>
-  <label class="overlay overlay-primary" for="pst-primary-sidebar-checkbox"></label>
-  
-  <input type="checkbox"
-          class="sidebar-toggle"
-          id="pst-secondary-sidebar-checkbox"/>
-  <label class="overlay overlay-secondary" for="pst-secondary-sidebar-checkbox"></label>
-  
-  <div class="search-button__wrapper">
-    <div class="search-button__overlay"></div>
-    <div class="search-button__search-container">
-<form class="bd-search d-flex align-items-center"
-      action="../../search.html"
-      method="get">
-  <i class="fa-solid fa-magnifying-glass"></i>
-  <input type="search"
-         class="form-control"
-         name="q"
-         id="search-input"
-         placeholder="Search this book..."
-         aria-label="Search this book..."
-         autocomplete="off"
-         autocorrect="off"
-         autocapitalize="off"
-         spellcheck="false"/>
-  <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
-</form></div>
-  </div>
-
-  <div class="pst-async-banner-revealer d-none">
-  <aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
-</div>
-
-  
-    <header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
-    </header>
-  
-
-  <div class="bd-container">
-    <div class="bd-container__inner bd-page-width">
-      
-      
-      
-      <div class="bd-sidebar-primary bd-sidebar">
-        
-
-  
-  <div class="sidebar-header-items sidebar-primary__section">
-    
-    
-    
-    
-  </div>
-  
-    <div class="sidebar-primary-items__start sidebar-primary__section">
-        <div class="sidebar-primary-item">
-
-  
-    
-  
-
-<a class="navbar-brand logo" href="../../intro.html">
-  
-  
-  
-  
-  
-    
-    
-      
-    
-    
-    <img src="../../_static/PseudoLab_logo.png" class="logo__image only-light" alt="Text-to-Image Generation-feat-Diffusion - Home"/>
-    <script>document.write(`<img src="../../_static/PseudoLab_logo.png" class="logo__image only-dark" alt="Text-to-Image Generation-feat-Diffusion - Home"/>`);</script>
-  
-  
-</a></div>
-        <div class="sidebar-primary-item"><nav class="bd-links" id="bd-docs-nav" aria-label="Main">
-    <div class="bd-toc-item navbar-nav active">
-        
-        <ul class="nav bd-sidenav bd-sidenav__home-link">
-            <li class="toctree-l1">
-                <a class="reference internal" href="../../intro.html">
-                    Welcome to PseudoDiffusers!!
-                </a>
-            </li>
-        </ul>
-        <p aria-level="2" class="caption" role="heading"><span class="caption-text">Preliminary Works</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="vae.html">VAE</a></li>
-<li class="toctree-l1"><a class="reference internal" href="gan.html">GAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DDPM.html">DDPM</a></li>
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="DDIM.html">DDIM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="A_Study_on_the_Evaluation_of_Generative_Models.html">A Study on the Evaluation of Generative Models</a></li>
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Image Generation</span></p>
-<ul class="current nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="cycleGAN.html">CycleGAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="StyleGAN.html">StyleGAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="diffusion_beats_GANs.html">Diffusion Models Beat GANs on Image Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="dalle.html">DALL-E</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DALLE2.html">DALL-E 2</a></li>
-<li class="toctree-l1"><a class="reference internal" href="dreambooth.html">DreamBooth</a></li>
-<li class="toctree-l1"><a class="reference internal" href="ControlNet.html">ControlNet</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Latent_Diffusion_Model.html">Introduction</a></li>
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="Textual_Inversion.html">Textual Inversion</a></li>
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="CustomDiffusion.html">Custom Diffusion</a></li>
-<li class="toctree-l1 current active"><a class="current reference internal" href="#">LoRA</a></li>
-
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="I-DDPM.html">I-DDPM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="StyO.html">StyO</a></li>
-<li class="toctree-l1"><a class="reference internal" href="imagen.html">Imagen</a></li>
-<li class="toctree-l1"><a class="reference internal" href="imagen_editor.html">Imagen Editor</a></li>
-<li class="toctree-l1"><a class="reference internal" href="SDEdit.html">SDEdit</a></li>
-<li class="toctree-l1"><a class="reference internal" href="SDXL.html">SDXL</a></li>
-<li class="toctree-l1"><a class="reference internal" href="t2i_adapter.html">T2I-Adapter</a></li>
-<li class="toctree-l1"><a class="reference internal" href="IP_Adapter.html">IP-Adapter</a></li>
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="HyperDreamBooth.html">HyperDreamBooth</a></li>
-<li class="toctree-l1"><a class="reference internal" href="CM3leon.html">CM3leon</a></li>
-
-<li class="toctree-l1"><a class="reference internal" href="Synthetic_Data_from_Diffusion_Models_Improves_ImageNet_Classification.html">Synthetic Data from Diffusion Models Improves ImageNet Classification</a></li>
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="GLIDE.html">GLIDE</a></li>
-<li class="toctree-l1"><a class="reference internal" href="BBDM.html">BBDM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Your_Diffusion_Model_is_Secretly_a_Zero_Shot_Classifier.html">Your Diffusion Model is Secretly a Zero-Shot Classifier</a></li>
-<li class="toctree-l1"><a class="reference internal" href="progressive_distillation.html">Progressive Distillation for Fast Sampling of Diffusion Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="ConceptLab.html">ConceptLab</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Diffusion_models_already_have_a_Semantic_Latent_Space.html">Diffusion Models already have a Semantic Latent Space</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Muse.html">Muse</a></li>
-
-
-<li class="toctree-l1"><a class="reference internal" href="GIGAGAN.html">Scaling up GANs for Text-to-Image Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="consistency_models.html">Consistency Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="latent_consistency_models.html">Latent Consistency Models</a></li>
-
-<li class="toctree-l1"><a class="reference internal" href="LLM_grounded_Diffusion.html">LLM Grounded Diffusion</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DiT.html">DiT</a></li>
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="one-step-image-translation.html">One-Step Image Translation with Text-to-Image Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="LCM-LoRA.html">LCM-LoRA: A Universal Stable-Diffusion Acceleration Module</a></li>
-
-
-<li class="toctree-l1"><a class="reference internal" href="MimicBrush.html">MimicBrush: Zero-shot Image Editing with Reference Imitation</a></li>
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Video Generation</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="Make_A_Video.html">Make A Video</a></li>
-<li class="toctree-l1"><a class="reference internal" href="VideoLDM.html">VideoLDM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="AnimateDiff.html">AnimateDiff</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Animate_Anyone.html">Animate Anyone</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreaMoving.html">DreaMoving</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreamPose.html">DreamPose: Fashion Image-to-Video Synthesis via Stable Diffusion</a></li>
-
-
-
-
-
-
-
-
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">3D Generation</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="NeRF.html">NeRF : Representing Scenes as Neural Radiance Fields for View Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="3DGS.html">3D Gaussian Splatting for Real-Time Radiance Field Rendering</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Point_E.html">Point-E: A System for Generating 3D Point Clouds from Complex Prompts (Arxiv 2022)</a></li>
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="Shap-E.html">Shap-E</a></li>
-
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="DreamFusion.html"><strong>DreamFusion</strong></a></li>
-<li class="toctree-l1"><a class="reference internal" href="magic-3d.html">Magic3D</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreamBooth3D.html">Dream Booth 3D</a></li>
-
-
-
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Experiments</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="../experiments/js_exp.html">Synthetic Data with Stable Diffusion for Foliar Disease Classification</a></li>
-<li class="toctree-l1"><a class="reference internal" href="../experiments/swjo_exp.html">Training DreamBooth on Naver Webtoon Face Dataset</a></li>
-</ul>
-
-    </div>
-</nav></div>
-    </div>
-  
-  
-  <div class="sidebar-primary-items__end sidebar-primary__section">
-  </div>
-  
-  <div id="rtd-footer-container"></div>
-
-
-      </div>
-      
-      <main id="main-content" class="bd-main" role="main">
-        
-        
-
-<div class="sbt-scroll-pixel-helper"></div>
-
-          <div class="bd-content">
-            <div class="bd-article-container">
-              
-              <div class="bd-header-article d-print-none">
-<div class="header-article-items header-article__inner">
-  
-    <div class="header-article-items__start">
-      
-        <div class="header-article-item"><label class="sidebar-toggle primary-toggle btn btn-sm" for="__primary" title="Toggle primary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
-  <span class="fa-solid fa-bars"></span>
-</label></div>
-      
-    </div>
-  
-  
-    <div class="header-article-items__end">
-      
-        <div class="header-article-item">
-
-<div class="article-header-buttons">
-
-
-
-
-
-<div class="dropdown dropdown-source-buttons">
-  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Source repositories">
-    <i class="fab fa-github"></i>
-  </button>
-  <ul class="dropdown-menu">
-      
-      
-      
-      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion" target="_blank"
-   class="btn btn-sm btn-source-repository-button dropdown-item"
-   title="Source repository"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fab fa-github"></i>
-  </span>
-<span class="btn__text-container">Repository</span>
-</a>
-</li>
-      
-      
-      
-      
-      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion/issues/new?title=Issue%20on%20page%20%2Fdocs/review/LoRA.html&body=Your%20issue%20content%20here." target="_blank"
-   class="btn btn-sm btn-source-issues-button dropdown-item"
-   title="Open an issue"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-lightbulb"></i>
-  </span>
-<span class="btn__text-container">Open issue</span>
-</a>
-</li>
-      
-  </ul>
-</div>
-
-
-
-
-
-
-<div class="dropdown dropdown-download-buttons">
-  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Download this page">
-    <i class="fas fa-download"></i>
-  </button>
-  <ul class="dropdown-menu">
-      
-      
-      
-      <li><a href="../../_sources/docs/review/LoRA.md" target="_blank"
-   class="btn btn-sm btn-download-source-button dropdown-item"
-   title="Download source file"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-file"></i>
-  </span>
-<span class="btn__text-container">.md</span>
-</a>
-</li>
-      
-      
-      
-      
-      <li>
-<button onclick="window.print()"
-  class="btn btn-sm btn-download-pdf-button dropdown-item"
-  title="Print to PDF"
-  data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-file-pdf"></i>
-  </span>
-<span class="btn__text-container">.pdf</span>
-</button>
-</li>
-      
-  </ul>
-</div>
-
-
-
-
-<button onclick="toggleFullScreen()"
-  class="btn btn-sm btn-fullscreen-button"
-  title="Fullscreen mode"
-  data-bs-placement="bottom" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-expand"></i>
-  </span>
-
-</button>
-
-
-
-<script>
-document.write(`
-  <button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light"></i>
-    <i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark"></i>
-    <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"></i>
-  </button>
-`);
-</script>
-
-
-<script>
-document.write(`
-  <button class="btn btn-sm pst-navbar-icon search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <i class="fa-solid fa-magnifying-glass fa-lg"></i>
-  </button>
-`);
-</script>
-<label class="sidebar-toggle secondary-toggle btn btn-sm" for="__secondary"title="Toggle secondary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <span class="fa-solid fa-list"></span>
-</label>
-</div></div>
-      
-    </div>
-  
-</div>
-</div>
-              
-              
-
-<div id="jb-print-docs-body" class="onlyprint">
-    <h1>LoRA</h1>
-    <!-- Table of contents -->
-    <div id="print-main-content">
-        <div id="jb-print-toc">
-            
-            <div>
-                <h2> Contents </h2>
-            </div>
-            <nav aria-label="Page">
-                <ul class="visible nav section-nav flex-column">
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#">LoRA</a></li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#abstract">0. Abstract</a></li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">1. Introduction</a><ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#terminologies-and-conventions">1.1. Terminologies and Conventions</a></li>
-</ul>
-</li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#problem-statement">2. Problem Statement</a></li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#aren-t-existing-solutions-good-enough">3. Aren’t Existing Solutions Good Enough?</a></li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#our-method">4. Our Method</a><ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#low-rank-parameterized-update-matrices">4.1. Low-Rank-Parameterized Update Matrices</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#no-additional-inference-latency">4.1.1. No Additional Inference Latency</a></li>
-</ul>
-</li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#applying-lora-to-transformer">4.2. Applying LoRA to Transformer</a></li>
-</ul>
-</li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#empirical-experiments">5.Empirical Experiments</a></li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#a-ia3">+a) IA3</a></li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#aa-lora">+aa) LoRA 사용법</a></li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#reference">Reference</a></li>
-</ul>
-
-            </nav>
-        </div>
-    </div>
-</div>
-
-              
-                
-<div id="searchbox"></div>
-                <article class="bd-article">
-                  
-  <div class="admonition-information admonition">
-<p class="admonition-title">Information</p>
-<ul class="simple">
-<li><p><strong>Title:</strong> Denoising Diffusion Probabilistic Models (ICLR 2021)</p></li>
-<li><p><strong>Reference</strong></p>
-<ul>
-<li><p>Paper:  <a class="reference external" href="https://arxiv.org/abs/2006.11239">https://arxiv.org/abs/2006.11239</a></p></li>
-<li><p>Code: <a class="reference external" href="https://github.com/lucidrains/denoising-diffusion-pytorch">PyTorch implementation:</a></p></li>
-<li><p>Review: <a class="reference external" href="https://www.youtube.com/watch?v=1j0W_lu55nc">PR-409: Denoising Diffusion Probabilistic Models</a></p></li>
-</ul>
-</li>
-<li><p><strong>Author:</strong> Beomsoo Park</p></li>
-<li><p><strong>Last updated on Apr. 19, 2023</strong></p></li>
-</ul>
-</div>
-<section class="tex2jax_ignore mathjax_ignore" id="lora">
-<h1>LoRA<a class="headerlink" href="#lora" title="Permalink to this heading">#</a></h1>
-</section>
-<section class="tex2jax_ignore mathjax_ignore" id="abstract">
-<h1>0. Abstract<a class="headerlink" href="#abstract" title="Permalink to this heading">#</a></h1>
-<p>LoRA는 <strong>PEFT(Parameter Effecient Fine-Tuning)의 기법 중 하나</strong>이다. Pre-trained model의 weight는 고정한 채로, <strong>몇 개의 dense(fc) layer만 학습시켜 downstream task의 연산량을 줄일 수 있다.</strong> GPT-3을 기준으로 parameter는 10000배, GPU 메모리는 3배를 줄일 수 있다. 또한 inference 과정에서 추가적인 latency가 없음</p>
-<blockquote>
-<div><ul class="simple">
-<li><p>PEFT: 모델의 모든 파라미터를 튜닝하는 것이 아닌 일부 파라미터만을 튜닝함으로써 모델의 성능을 적은 자원으로도 높게 유지하는 방법론</p></li>
-</ul>
-</div></blockquote>
-<ul class="simple">
-<li><p>Downstream task: pre-trained model을 사용해, 어떤 문제를 해결하기 위해 fine-tuning 하는것</p></li>
-<li><p>Upstream task: Pre-train model을 학습시키는것</p></li>
-<li><p>Latency: 어떤 요청의 시작부터 완료까지 걸리는 시간</p></li>
-</ul>
-</section>
-<hr class="docutils" />
-<section class="tex2jax_ignore mathjax_ignore" id="introduction">
-<h1>1. Introduction<a class="headerlink" href="#introduction" title="Permalink to this heading">#</a></h1>
-<p>LLM은 기본적으로 pre-trained model을 특정 task에 맞게 fine-tuning을 시킴. 하지만 fine-tuning에서 모든 weight를 다시 학습시키면 GPT-2, GPT-3, RoBERTa 등 큰 모델의 경우 학습에 몇 달이 걸림.</p>
-<p>이전 연구에서 over-parameterized model들은 low intrinsic dimension에 기반하고 있다는 사실에 기반해, 저자는 학습 과정에서도 모델은 <code class="docutils literal notranslate"><span class="pre">low</span> <span class="pre">intrinsic</span> <span class="pre">rank</span></code>을 갖고 있을 것이라 가정함.</p>
-<p><strong>LoRA는 기존 pre-trained weight는 고정하고, 몇 개의 dense layer만 rank decomposition matrices를 최적화하는 방식으로 학습</strong>시키기로 함.</p>
-<figure class="align-default" id="id1">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/image(0).png"><img alt="LoRA_00" class="bg-primary mb-1" src="../../_images/image(0).png" style="width: 550px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 176 </span><span class="caption-text">LoRA structure</span><a class="headerlink" href="#id1" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<figure class="align-default" id="id2">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/image(1).png"><img alt="LoRA_01" class="bg-primary mb-1" src="../../_images/image(1).png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 177 </span><span class="caption-text">LoRA structure 2</span><a class="headerlink" href="#id2" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>위 그림처럼 <strong>기존 pre-trained weight <span class="math notranslate nohighlight">\(W\)</span>는 고정하고 low rank decomposition된 weight <span class="math notranslate nohighlight">\(A, B\)</span>만 학습시켜 <span class="math notranslate nohighlight">\(W\)</span>에 더해줌</strong>. <span class="math notranslate nohighlight">\(A, B\)</span>의 크기는 <span class="math notranslate nohighlight">\(W\)</span>보다 작아 time, computational cost를 최대 3배까지 줄일 수 있음. 또한 task에 따라 LoRA module(<span class="math notranslate nohighlight">\(A, B\)</span>)만 바꿔주면 되기 때문에 storage requirement, task-switching overhead를 줄일 수 있음. 이 외에도 추가적인 inference latency가 없다, 다른 기법들과 함께 적용이 가능하다는 장점이 있음.</p>
-<section id="terminologies-and-conventions">
-<h2>1.1. Terminologies and Conventions<a class="headerlink" href="#terminologies-and-conventions" title="Permalink to this heading">#</a></h2>
-<ul class="simple">
-<li><p><span class="math notranslate nohighlight">\(d_{model}\)</span>: Transformer의 input/output dimension size</p></li>
-<li><p><span class="math notranslate nohighlight">\(W_q, W_k, W_v, W_o\)</span>: Self-attention module의 query/key/value/output projection matrices</p></li>
-<li><p><span class="math notranslate nohighlight">\(W, W_0\)</span>: Pre-trained weight</p></li>
-<li><p><span class="math notranslate nohighlight">\(\Delta W\)</span>: Adaptation 중 accumulated된 gradient update</p></li>
-<li><p><span class="math notranslate nohighlight">\(r\)</span>: LoRA module의 rank</p></li>
-<li><p>이전 연구의 convention을 사용하고 optimizer는 Adam을 이용</p></li>
-<li><p>Transformer MLP feedforward dimension <span class="math notranslate nohighlight">\(d_{ffn} = 4 \times d_{model}\)</span></p></li>
-</ul>
-</section>
-</section>
-<hr class="docutils" />
-<section class="tex2jax_ignore mathjax_ignore" id="problem-statement">
-<h1>2. Problem Statement<a class="headerlink" href="#problem-statement" title="Permalink to this heading">#</a></h1>
-<p>LoRA는 agnostic하지만 본 논문에서는 language model에 집중함.</p>
-<blockquote>
-<div><ul class="simple">
-<li><p>agnostic: model에 구애받지 않고 해석이 가능함</p></li>
-</ul>
-</div></blockquote>
-<div class="math notranslate nohighlight">
-\[
-\max _{\Phi} \sum_{(x, y) \in \mathcal{Z}} \sum_{t=1}^{|y|} \log \left(P_{\Phi}\left(y_t \mid x, y_{&lt;t}\right)\right)
-\]</div>
-<ul class="simple">
-<li><p><span class="math notranslate nohighlight">\(P_{\Phi}\left(y \mid x\right)\)</span>: <span class="math notranslate nohighlight">\(\Phi\)</span>로 parameterized된 pre-trained model</p></li>
-<li><p><span class="math notranslate nohighlight">\(\mathcal{Z} = \{(x_i, y_i)\}_{i=1,...,N}\)</span>: context-target쌍으로 된 학습 데이터셋, <span class="math notranslate nohighlight">\(x_i, y_i\)</span>는 token sequence</p></li>
-</ul>
-<p>Fine-tuning 과정에서 model은 <span class="math notranslate nohighlight">\(\Phi_0\)</span>으로 init.되고 objective를 maximize하기 위해 <span class="math notranslate nohighlight">\(\Phi_0 + \Delta \Phi\)</span> 로 업데이트됨. 각 downstream task를 위해 매번 <span class="math notranslate nohighlight">\(|\Phi_0|\)</span>와 같은 크기의 <span class="math notranslate nohighlight">\(|\Delta \Phi|\)</span>를 학습해 엄청난 cost가 발생.</p>
-<div class="math notranslate nohighlight">
-\[
-\max _{\Theta} \sum_{(x, y) \in \mathcal{Z}} \sum_{t=1}^{|y|} \log \left(p_{\Phi_0+\Delta \Phi(\Theta)}\left(y_t \mid x, y_{&lt;t}\right)\right)
-\]</div>
-<p>반면 위와 같은 LoRA 방식으로 fine-tuning할 경우 <span class="math notranslate nohighlight">\(|\Phi_0|\)</span> 전체가 아니라 그보다 작은 <span class="math notranslate nohighlight">\(|\Theta|\)</span>를 찾아내는 방식으로 바뀌기 때문에 compute-/memory-effecient해짐. <span class="math notranslate nohighlight">\(|\Theta|\)</span>는 최대 <span class="math notranslate nohighlight">\(|\Phi_0|\)</span>의 0.01%까지 작아질 수 있음.</p>
-</section>
-<hr class="docutils" />
-<section class="tex2jax_ignore mathjax_ignore" id="aren-t-existing-solutions-good-enough">
-<h1>3. Aren’t Existing Solutions Good Enough?<a class="headerlink" href="#aren-t-existing-solutions-good-enough" title="Permalink to this heading">#</a></h1>
-<p>기존에도 transfer learning에서 parameter-/compute-effecient를 위한 방법은 몇 가지가 있었음.</p>
-<figure class="align-default" id="id3">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/image(2).png"><img alt="LoRA_02" class="bg-primary mb-1" src="../../_images/image(2).png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 178 </span><span class="caption-text">Performance Comparison</span><a class="headerlink" href="#id3" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>하지만 adapter layer를 추가하는 방식은 hardware parellelism이 없다면 작은 bottleneck layer만 추가해도 latency가 상당히 증가해 사용하기 어려웠음.</p>
-<p>Prefix tuning은 optimize가 어려웠음.</p>
-</section>
-<hr class="docutils" />
-<section class="tex2jax_ignore mathjax_ignore" id="our-method">
-<h1>4. Our Method<a class="headerlink" href="#our-method" title="Permalink to this heading">#</a></h1>
-<section id="low-rank-parameterized-update-matrices">
-<h2>4.1. Low-Rank-Parameterized Update Matrices<a class="headerlink" href="#low-rank-parameterized-update-matrices" title="Permalink to this heading">#</a></h2>
-<div class="math notranslate nohighlight">
-\[
-h=W_0 x+\Delta W x=W_0 x+B A x
-\]</div>
-<ul class="simple">
-<li><p><span class="math notranslate nohighlight">\(W_0 \in \mathbb{R}^{d \times k}\)</span></p></li>
-<li><p><span class="math notranslate nohighlight">\(B \in \mathbb{R}^{d \times r}, A \in \mathbb{R}^{r \times k}\)</span></p></li>
-<li><p><span class="math notranslate nohighlight">\(r \ll min(d,k)\)</span></p></li>
-</ul>
-<p><span class="math notranslate nohighlight">\(W_0\)</span>는 고정하고 <span class="math notranslate nohighlight">\(A, B\)</span>만 학습. 이후 <span class="math notranslate nohighlight">\(W_0\)</span>와 <span class="math notranslate nohighlight">\(\Delta W = BA\)</span>는 같은 input <span class="math notranslate nohighlight">\(x\)</span>에 곱해진 후 output vector끼리 coordinate-wise하게 sum.</p>
-<p><span class="math notranslate nohighlight">\(A\)</span>는 random Gaussian init., <span class="math notranslate nohighlight">\(B\)</span>는 zero-init.이라 <span class="math notranslate nohighlight">\(\Delta W\)</span> 또한 처음에는 zero-init. <span class="math notranslate nohighlight">\(\Delta W x\)</span>는 <span class="math notranslate nohighlight">\(\alpha/x\)</span>로 scaling됨. <span class="math notranslate nohighlight">\(\alpha\)</span>는 learning rate처럼 tuning해서 r과 같은 값으로 설정. 실제 코드에서는 보통 <span class="math notranslate nohighlight">\(r, \alpha\)</span>는 (8, 16)이나 (16,32)를 사용한다고 함.</p>
-<div class="highlight-python notranslate"><div class="highlight"><pre><span></span>	<span class="o">...</span>
-        <span class="c1"># Actual trainable parameters</span>
-    	<span class="c1"># define A, B</span>
-        <span class="k">if</span> <span class="n">r</span> <span class="o">&gt;</span> <span class="mi">0</span><span class="p">:</span>
-            <span class="bp">self</span><span class="o">.</span><span class="n">lora_A</span> <span class="o">=</span> <span class="n">nn</span><span class="o">.</span><span class="n">Parameter</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">weight</span><span class="o">.</span><span class="n">new_zeros</span><span class="p">((</span><span class="n">r</span><span class="p">,</span> <span class="n">num_embeddings</span><span class="p">)))</span>
-            <span class="bp">self</span><span class="o">.</span><span class="n">lora_B</span> <span class="o">=</span> <span class="n">nn</span><span class="o">.</span><span class="n">Parameter</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">weight</span><span class="o">.</span><span class="n">new_zeros</span><span class="p">((</span><span class="n">embedding_dim</span><span class="p">,</span> <span class="n">r</span><span class="p">)))</span>
-            <span class="bp">self</span><span class="o">.</span><span class="n">scaling</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">lora_alpha</span> <span class="o">/</span> <span class="bp">self</span><span class="o">.</span><span class="n">r</span>
-            <span class="c1"># Freezing the pre-trained weight matrix</span>
-            <span class="bp">self</span><span class="o">.</span><span class="n">weight</span><span class="o">.</span><span class="n">requires_grad</span> <span class="o">=</span> <span class="kc">False</span>
-        <span class="bp">self</span><span class="o">.</span><span class="n">reset_parameters</span><span class="p">()</span>
-
-	<span class="c1"># initialize A, B</span>
-    <span class="k">def</span> <span class="nf">reset_parameters</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
-        <span class="n">nn</span><span class="o">.</span><span class="n">Embedding</span><span class="o">.</span><span class="n">reset_parameters</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span>
-        <span class="k">if</span> <span class="nb">hasattr</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="s1">&#39;lora_A&#39;</span><span class="p">):</span>
-            <span class="c1"># initialize A the same way as the default for nn.Linear and B to zero</span>
-            <span class="n">nn</span><span class="o">.</span><span class="n">init</span><span class="o">.</span><span class="n">zeros_</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">lora_A</span><span class="p">)</span>
-            <span class="n">nn</span><span class="o">.</span><span class="n">init</span><span class="o">.</span><span class="n">normal_</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">lora_B</span><span class="p">)</span>
-
-    <span class="k">def</span> <span class="nf">train</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">mode</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">):</span>
-        <span class="n">nn</span><span class="o">.</span><span class="n">Embedding</span><span class="o">.</span><span class="n">train</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">mode</span><span class="p">)</span>
-        <span class="k">if</span> <span class="n">mode</span><span class="p">:</span>
-            <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">merge_weights</span> <span class="ow">and</span> <span class="bp">self</span><span class="o">.</span><span class="n">merged</span><span class="p">:</span>
-                <span class="c1"># Make sure that the weights are not merged</span>
-                <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">r</span> <span class="o">&gt;</span> <span class="mi">0</span><span class="p">:</span>
-                    <span class="bp">self</span><span class="o">.</span><span class="n">weight</span><span class="o">.</span><span class="n">data</span> <span class="o">-=</span> <span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">lora_B</span> <span class="o">@</span> <span class="bp">self</span><span class="o">.</span><span class="n">lora_A</span><span class="p">)</span><span class="o">.</span><span class="n">transpose</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">)</span> <span class="o">*</span> <span class="bp">self</span><span class="o">.</span><span class="n">scaling</span>
-                <span class="bp">self</span><span class="o">.</span><span class="n">merged</span> <span class="o">=</span> <span class="kc">False</span>
-        <span class="k">else</span><span class="p">:</span>
-            <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">merge_weights</span> <span class="ow">and</span> <span class="ow">not</span> <span class="bp">self</span><span class="o">.</span><span class="n">merged</span><span class="p">:</span>
-                <span class="c1"># Merge the weights and mark it</span>
-                <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">r</span> <span class="o">&gt;</span> <span class="mi">0</span><span class="p">:</span>
-                    <span class="bp">self</span><span class="o">.</span><span class="n">weight</span><span class="o">.</span><span class="n">data</span> <span class="o">+=</span> <span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">lora_B</span> <span class="o">@</span> <span class="bp">self</span><span class="o">.</span><span class="n">lora_A</span><span class="p">)</span><span class="o">.</span><span class="n">transpose</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">)</span> <span class="o">*</span> <span class="bp">self</span><span class="o">.</span><span class="n">scaling</span>
-                <span class="bp">self</span><span class="o">.</span><span class="n">merged</span> <span class="o">=</span> <span class="kc">True</span>
-        
-    <span class="k">def</span> <span class="nf">forward</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">x</span><span class="p">:</span> <span class="n">torch</span><span class="o">.</span><span class="n">Tensor</span><span class="p">):</span>
-        <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">r</span> <span class="o">&gt;</span> <span class="mi">0</span> <span class="ow">and</span> <span class="ow">not</span> <span class="bp">self</span><span class="o">.</span><span class="n">merged</span><span class="p">:</span>
-        	<span class="c1"># pre-trained weight W_0 * x</span>
-            <span class="n">result</span> <span class="o">=</span> <span class="n">nn</span><span class="o">.</span><span class="n">Embedding</span><span class="o">.</span><span class="n">forward</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">x</span><span class="p">)</span>
-            <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">r</span> <span class="o">&gt;</span> <span class="mi">0</span><span class="p">:</span>
-            	<span class="c1"># BA * x</span>
-                <span class="n">after_A</span> <span class="o">=</span> <span class="n">F</span><span class="o">.</span><span class="n">embedding</span><span class="p">(</span>
-                    <span class="n">x</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">lora_A</span><span class="o">.</span><span class="n">transpose</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">),</span> <span class="bp">self</span><span class="o">.</span><span class="n">padding_idx</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">max_norm</span><span class="p">,</span>
-                    <span class="bp">self</span><span class="o">.</span><span class="n">norm_type</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">scale_grad_by_freq</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">sparse</span>
-                <span class="p">)</span>
-                <span class="c1"># W_0x + BAx</span>
-                <span class="n">result</span> <span class="o">+=</span> <span class="p">(</span><span class="n">after_A</span> <span class="o">@</span> <span class="bp">self</span><span class="o">.</span><span class="n">lora_B</span><span class="o">.</span><span class="n">transpose</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">))</span> <span class="o">*</span> <span class="bp">self</span><span class="o">.</span><span class="n">scaling</span>
-            <span class="k">return</span> <span class="n">result</span>
-        <span class="k">else</span><span class="p">:</span>
-            <span class="k">return</span> <span class="n">nn</span><span class="o">.</span><span class="n">Embedding</span><span class="o">.</span><span class="n">forward</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">x</span><span class="p">)</span>
-
-</pre></div>
-</div>
-<section id="no-additional-inference-latency">
-<h3>4.1.1. No Additional Inference Latency<a class="headerlink" href="#no-additional-inference-latency" title="Permalink to this heading">#</a></h3>
-<p>LoRA를 이용하면 inference시 latency 성능 하락이 없음. 또한 다른 task에 사용할 경우엔 <span class="math notranslate nohighlight">\(BA\)</span>만 제외하고 <span class="math notranslate nohighlight">\(W_0\)</span>로 학습한 다른 <span class="math notranslate nohighlight">\(B'A'\)</span>만 추가하면 되기 때문에 memory overhead가 낮음.</p>
-</section>
-</section>
-<section id="applying-lora-to-transformer">
-<h2>4.2. Applying LoRA to Transformer<a class="headerlink" href="#applying-lora-to-transformer" title="Permalink to this heading">#</a></h2>
-<p>본 논문에서는 trainable weight를 최소화하기 위해 LoRA를 attention weight만 적용하고 MLP module은 고정함. 이를 통해 GPT-3 175B를 기준으로 VRAM은 1.2TB에서 350GB, checkpoint size는 350GB에서 35MB로 줄임. 또한 학습 속도 또한 25% 정도 빨라짐.</p>
-</section>
-</section>
-<hr class="docutils" />
-<section class="tex2jax_ignore mathjax_ignore" id="empirical-experiments">
-<h1>5.Empirical Experiments<a class="headerlink" href="#empirical-experiments" title="Permalink to this heading">#</a></h1>
-<figure class="align-default" id="id4">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/image(3).png"><img alt="LoRA_03" class="bg-primary mb-1" src="../../_images/image(3).png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 179 </span><span class="caption-text">Performance on BERT</span><a class="headerlink" href="#id4" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<figure class="align-default" id="id5">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/image(4).png"><img alt="LoRA_04" class="bg-primary mb-1" src="../../_images/image(4).png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 180 </span><span class="caption-text">Performance on GPT-2</span><a class="headerlink" href="#id5" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<figure class="align-default" id="id6">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/image(5).png"><img alt="LoRA_05" class="bg-primary mb-1" src="../../_images/image(5).png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 181 </span><span class="caption-text">Performance on GPT-3</span><a class="headerlink" href="#id6" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>대부분의 경우에서 성능이 좋음</p>
-<figure class="align-default" id="id7">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/image(6).png"><img alt="LoRA_06" class="bg-primary mb-1" src="../../_images/image(6).png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 182 </span><span class="caption-text">Validation accuracy table with different hyper-parameters</span><a class="headerlink" href="#id7" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<figure class="align-default" id="id8">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/image(7).png"><img alt="LoRA_07" class="bg-primary mb-1" src="../../_images/image(7).png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 183 </span><span class="caption-text">Validation accuracy table with different hyper-parameters</span><a class="headerlink" href="#id8" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>Transformer에서 한 projection matrix에 큰 r을 적용하는 것보다 모든 matrices에 작은 r을 적용하는 것이 더 성능이 좋았음.</p>
-</section>
-<hr class="docutils" />
-<section class="tex2jax_ignore mathjax_ignore" id="a-ia3">
-<h1>+a) IA3<a class="headerlink" href="#a-ia3" title="Permalink to this heading">#</a></h1>
-<figure class="align-default" id="id9">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/image(8).png"><img alt="LoRA_08" class="bg-primary mb-1" src="../../_images/image(8).png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 184 </span><span class="caption-text">IA3 structure</span><a class="headerlink" href="#id9" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>뉴럴네트워크의 Inner Activation을 줄이기도하고 늘리기도하는 어댑터를 중간에 삽입하는 방법론. 기존에 공개된 LoRA보다 적은 파라미터를 사용하면서 높은 성능을 내는 것으로 알려져있으며, GPT-3를 in-context learning 했을때 보다도 성능이 좋다 라고 주장하고 있음. 학습시간도 매우 짧아 A100 GPU 하나로 30분만에 튜닝할 수 있었다고 함.</p>
-</section>
-<hr class="docutils" />
-<section class="tex2jax_ignore mathjax_ignore" id="aa-lora">
-<h1>+aa) LoRA 사용법<a class="headerlink" href="#aa-lora" title="Permalink to this heading">#</a></h1>
-<ol class="arabic simple">
-<li><p><code class="docutils literal notranslate"><span class="pre">loralib</span></code> 설치</p></li>
-</ol>
-<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="n">pip</span> <span class="n">install</span> <span class="n">loralib</span>
-<span class="c1"># Alternatively</span>
-<span class="c1"># pip install git+https://github.com/microsoft/LoRA</span>
-</pre></div>
-</div>
-<ol class="arabic simple" start="2">
-<li><p>기존 <code class="docutils literal notranslate"><span class="pre">nn.Linear</span></code>, <code class="docutils literal notranslate"><span class="pre">nn.Embedding</span></code>, <code class="docutils literal notranslate"><span class="pre">nn.Conv2d</span></code>를 <code class="docutils literal notranslate"><span class="pre">lora.~</span></code>로 대체</p></li>
-</ol>
-<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="c1"># ===== Before =====</span>
-<span class="c1"># layer = nn.Linear(in_features, out_features)</span>
-
-<span class="c1"># ===== After ======</span>
-<span class="kn">import</span> <span class="nn">loralib</span> <span class="k">as</span> <span class="nn">lora</span>
-<span class="c1"># Add a pair of low-rank adaptation matrices with rank r=16</span>
-<span class="n">layer</span> <span class="o">=</span> <span class="n">lora</span><span class="o">.</span><span class="n">Linear</span><span class="p">(</span><span class="n">in_features</span><span class="p">,</span> <span class="n">out_features</span><span class="p">,</span> <span class="n">r</span><span class="o">=</span><span class="mi">16</span><span class="p">)</span>
-</pre></div>
-</div>
-<ol class="arabic simple" start="3">
-<li><p>학습 전, lora parameter만 학습 가능하게 설정</p></li>
-</ol>
-<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="kn">import</span> <span class="nn">loralib</span> <span class="k">as</span> <span class="nn">lora</span>
-<span class="n">model</span> <span class="o">=</span> <span class="n">BigModel</span><span class="p">()</span>
-<span class="c1"># This sets requires_grad to False for all parameters without the string &quot;lora_&quot; in their names</span>
-<span class="n">lora</span><span class="o">.</span><span class="n">mark_only_lora_as_trainable</span><span class="p">(</span><span class="n">model</span><span class="p">)</span>
-<span class="c1"># Training loop</span>
-<span class="k">for</span> <span class="n">batch</span> <span class="ow">in</span> <span class="n">dataloader</span><span class="p">:</span>
-   <span class="o">...</span>
-</pre></div>
-</div>
-<ol class="arabic simple" start="4">
-<li><p>checkpoint를 저장할 때엔 <code class="docutils literal notranslate"><span class="pre">state_dict</span></code>가 LoRA parameter만 저장하게 함.</p></li>
-</ol>
-<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="c1"># ===== Before =====</span>
-<span class="c1"># torch.save(model.state_dict(), checkpoint_path)</span>
-<span class="c1"># ===== After =====</span>
-<span class="n">torch</span><span class="o">.</span><span class="n">save</span><span class="p">(</span><span class="n">lora</span><span class="o">.</span><span class="n">lora_state_dict</span><span class="p">(</span><span class="n">model</span><span class="p">),</span> <span class="n">checkpoint_path</span><span class="p">)</span>
-</pre></div>
-</div>
-<ol class="arabic simple" start="5">
-<li><p>checkpoint를 불러올 때엔 <code class="docutils literal notranslate"><span class="pre">load_state_dict</span></code>에서 <code class="docutils literal notranslate"><span class="pre">strict=False</span></code>로 설정.</p></li>
-</ol>
-<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="c1"># Load the pretrained checkpoint first</span>
-<span class="n">model</span><span class="o">.</span><span class="n">load_state_dict</span><span class="p">(</span><span class="n">torch</span><span class="o">.</span><span class="n">load</span><span class="p">(</span><span class="s1">&#39;ckpt_pretrained.pt&#39;</span><span class="p">),</span> <span class="n">strict</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span>
-<span class="c1"># Then load the LoRA checkpoint</span>
-<span class="n">model</span><span class="o">.</span><span class="n">load_state_dict</span><span class="p">(</span><span class="n">torch</span><span class="o">.</span><span class="n">load</span><span class="p">(</span><span class="s1">&#39;ckpt_lora.pt&#39;</span><span class="p">),</span> <span class="n">strict</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span>
-</pre></div>
-</div>
-</section>
-<hr class="docutils" />
-<section class="tex2jax_ignore mathjax_ignore" id="reference">
-<h1>Reference<a class="headerlink" href="#reference" title="Permalink to this heading">#</a></h1>
-<ul class="simple">
-<li><p><a class="reference external" href="https://da2so.tistory.com/79">LoRA 논문 리뷰</a></p></li>
-<li><p><a class="reference external" href="https://devocean.sk.com/blog/techBoardDetail.do?ID=164779&amp;boardType=techBlog">LLM 모델 튜닝, 하나의 GPU로 가능할까? Parameter Efficient Fine-Tuning(PEFT)을 소개합니다!</a></p></li>
-<li><p><a class="reference external" href="https://zzambab98.tistory.com/226">Stable Diffusion LoRA 생성 및 사용법</a></p></li>
-<li><p><a class="reference external" href="https://www.internetmap.kr/entry/How-to-LoRA-Model">Stable Diffusion - LoRA 모델 사용법
-</a></p></li>
-<li><p><a class="reference external" href="https://github.com/microsoft/LoRA">LoRA github</a></p></li>
-<li><p><a class="reference external" href="https://www.youtube.com/watch?v=dA-NhCtrrVE">https://www.youtube.com/watch?v=dA-NhCtrrVE</a></p></li>
-</ul>
-</section>
-
-    <script type="text/x-thebe-config">
-    {
-        requestKernel: true,
-        binderOptions: {
-            repo: "binder-examples/jupyter-stacks-datascience",
-            ref: "master",
-        },
-        codeMirrorConfig: {
-            theme: "abcdef",
-            mode: "python"
-        },
-        kernelOptions: {
-            name: "python3",
-            path: "./docs/review"
-        },
-        predefinedOutput: true
-    }
-    </script>
-    <script>kernelName = 'python3'</script>
-
-                </article>
-              
-
-              
-              
-              
-              
-                <footer class="prev-next-footer d-print-none">
-                  
-<div class="prev-next-area">
-    <a class="left-prev"
-       href="CustomDiffusion.html"
-       title="previous page">
-      <i class="fa-solid fa-angle-left"></i>
-      <div class="prev-next-info">
-        <p class="prev-next-subtitle">previous</p>
-        <p class="prev-next-title">Custom Diffusion</p>
-      </div>
-    </a>
-    <a class="right-next"
-       href="I-DDPM.html"
-       title="next page">
-      <div class="prev-next-info">
-        <p class="prev-next-subtitle">next</p>
-        <p class="prev-next-title">I-DDPM</p>
-      </div>
-      <i class="fa-solid fa-angle-right"></i>
-    </a>
-</div>
-                </footer>
-              
-            </div>
-            
-            
-              
-                <div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
-
-
-  <div class="sidebar-secondary-item">
-  <div class="page-toc tocsection onthispage">
-    <i class="fa-solid fa-list"></i> Contents
-  </div>
-  <nav class="bd-toc-nav page-toc">
-    <ul class="visible nav section-nav flex-column">
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#">LoRA</a></li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#abstract">0. Abstract</a></li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">1. Introduction</a><ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#terminologies-and-conventions">1.1. Terminologies and Conventions</a></li>
-</ul>
-</li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#problem-statement">2. Problem Statement</a></li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#aren-t-existing-solutions-good-enough">3. Aren’t Existing Solutions Good Enough?</a></li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#our-method">4. Our Method</a><ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#low-rank-parameterized-update-matrices">4.1. Low-Rank-Parameterized Update Matrices</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#no-additional-inference-latency">4.1.1. No Additional Inference Latency</a></li>
-</ul>
-</li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#applying-lora-to-transformer">4.2. Applying LoRA to Transformer</a></li>
-</ul>
-</li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#empirical-experiments">5.Empirical Experiments</a></li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#a-ia3">+a) IA3</a></li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#aa-lora">+aa) LoRA 사용법</a></li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#reference">Reference</a></li>
-</ul>
-
-  </nav></div>
-
-</div></div>
-              
-            
-          </div>
-          <footer class="bd-footer-content">
-            
-<div class="bd-footer-content__inner container">
-  
-  <div class="footer-item">
-    
-<p class="component-author">
-By PseudoLab
-</p>
-
-  </div>
-  
-  <div class="footer-item">
-    
-
-  <p class="copyright">
-    
-      © Copyright 2022.
-      <br/>
-    
-  </p>
-
-  </div>
-  
-  <div class="footer-item">
-    
-  </div>
-  
-  <div class="footer-item">
-    
-  </div>
-  
-</div>
-          </footer>
-        
-
-      </main>
-    </div>
-  </div>
-  
-  <!-- Scripts loaded after <body> so the DOM is not blocked -->
-  <script src="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b"></script>
-<script src="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b"></script>
-
-  <footer class="bd-footer">
-  </footer>
-  </body>
+
+
+<!DOCTYPE html>
+
+
+<html lang="en" data-content_root="" >
+
+  <head>
+    <meta charset="utf-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.17.1: http://docutils.sourceforge.net/" />
+
+    <title>LoRA &#8212; Text-to-Image Generation-feat-Diffusion</title>
+  
+  
+  
+  <script data-cfasync="false">
+    document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
+    document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
+  </script>
+  
+  <!-- Loaded before other Sphinx assets -->
+  <link href="../../_static/styles/theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+<link href="../../_static/styles/bootstrap.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+<link href="../../_static/styles/pydata-sphinx-theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+
+  
+  <link href="../../_static/vendor/fontawesome/6.5.2/css/all.min.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+  <link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.woff2" />
+<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.woff2" />
+<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.woff2" />
+
+    <link rel="stylesheet" type="text/css" href="../../_static/pygments.css" />
+    <link rel="stylesheet" href="../../_static/styles/sphinx-book-theme.css?digest=14f4ca6b54d191a8c7657f6c759bf11a5fb86285" type="text/css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/togglebutton.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/mystnb.4510f1fc1dee50b3e5859aac5469c37c29e427902b24a333a5f9fcb2f0b3ac41.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/sphinx-thebe.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/design-style.4045f2051d55cab465a707391d5b2007.min.css" />
+  
+  <!-- Pre-loaded scripts that we'll load fully later -->
+  <link rel="preload" as="script" href="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b" />
+<link rel="preload" as="script" href="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b" />
+  <script src="../../_static/vendor/fontawesome/6.5.2/js/all.min.js?digest=dfe6caa3a7d634c4db9b"></script>
+
+    <script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
+    <script src="../../_static/jquery.js"></script>
+    <script src="../../_static/underscore.js"></script>
+    <script src="../../_static/_sphinx_javascript_frameworks_compat.js"></script>
+    <script src="../../_static/doctools.js"></script>
+    <script src="../../_static/clipboard.min.js"></script>
+    <script src="../../_static/copybutton.js"></script>
+    <script src="../../_static/scripts/sphinx-book-theme.js?digest=5a5c038af52cf7bc1a1ec88eea08e6366ee68824"></script>
+    <script>let toggleHintShow = 'Click to show';</script>
+    <script>let toggleHintHide = 'Click to hide';</script>
+    <script>let toggleOpenOnPrint = 'true';</script>
+    <script src="../../_static/togglebutton.js"></script>
+    <script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
+    <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
+    <script src="../../_static/design-tabs.js"></script>
+    <script>const THEBE_JS_URL = "https://unpkg.com/thebe@0.8.2/lib/index.js"
+const thebe_selector = ".thebe,.cell"
+const thebe_selector_input = "pre"
+const thebe_selector_output = ".output, .cell_output"
+</script>
+    <script async="async" src="../../_static/sphinx-thebe.js"></script>
+    <script>window.MathJax = {"options": {"processHtmlClass": "tex2jax_process|mathjax_process|math|output_area"}}</script>
+    <script defer="defer" src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
+    <script>DOCUMENTATION_OPTIONS.pagename = 'docs/review/LoRA';</script>
+    <link rel="shortcut icon" href="../../_static/PseudoLab_logo.png"/>
+    <link rel="index" title="Index" href="../../genindex.html" />
+    <link rel="search" title="Search" href="../../search.html" />
+    <link rel="next" title="I-DDPM" href="I-DDPM.html" />
+    <link rel="prev" title="Custom Diffusion" href="CustomDiffusion.html" />
+  <meta name="viewport" content="width=device-width, initial-scale=1"/>
+  <meta name="docsearch:language" content="en"/>
+  </head>
+  
+  
+  <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
+
+  
+  
+  <div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
+  
+  <div id="pst-scroll-pixel-helper"></div>
+  
+  <button type="button" class="btn rounded-pill" id="pst-back-to-top">
+    <i class="fa-solid fa-arrow-up"></i>Back to top</button>
+
+  
+  <input type="checkbox"
+          class="sidebar-toggle"
+          id="pst-primary-sidebar-checkbox"/>
+  <label class="overlay overlay-primary" for="pst-primary-sidebar-checkbox"></label>
+  
+  <input type="checkbox"
+          class="sidebar-toggle"
+          id="pst-secondary-sidebar-checkbox"/>
+  <label class="overlay overlay-secondary" for="pst-secondary-sidebar-checkbox"></label>
+  
+  <div class="search-button__wrapper">
+    <div class="search-button__overlay"></div>
+    <div class="search-button__search-container">
+<form class="bd-search d-flex align-items-center"
+      action="../../search.html"
+      method="get">
+  <i class="fa-solid fa-magnifying-glass"></i>
+  <input type="search"
+         class="form-control"
+         name="q"
+         id="search-input"
+         placeholder="Search this book..."
+         aria-label="Search this book..."
+         autocomplete="off"
+         autocorrect="off"
+         autocapitalize="off"
+         spellcheck="false"/>
+  <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
+</form></div>
+  </div>
+
+  <div class="pst-async-banner-revealer d-none">
+  <aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
+</div>
+
+  
+    <header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
+    </header>
+  
+
+  <div class="bd-container">
+    <div class="bd-container__inner bd-page-width">
+      
+      
+      
+      <div class="bd-sidebar-primary bd-sidebar">
+        
+
+  
+  <div class="sidebar-header-items sidebar-primary__section">
+    
+    
+    
+    
+  </div>
+  
+    <div class="sidebar-primary-items__start sidebar-primary__section">
+        <div class="sidebar-primary-item">
+
+  
+    
+  
+
+<a class="navbar-brand logo" href="../../intro.html">
+  
+  
+  
+  
+  
+    
+    
+      
+    
+    
+    <img src="../../_static/PseudoLab_logo.png" class="logo__image only-light" alt="Text-to-Image Generation-feat-Diffusion - Home"/>
+    <script>document.write(`<img src="../../_static/PseudoLab_logo.png" class="logo__image only-dark" alt="Text-to-Image Generation-feat-Diffusion - Home"/>`);</script>
+  
+  
+</a></div>
+        <div class="sidebar-primary-item"><nav class="bd-links" id="bd-docs-nav" aria-label="Main">
+    <div class="bd-toc-item navbar-nav active">
+        
+        <ul class="nav bd-sidenav bd-sidenav__home-link">
+            <li class="toctree-l1">
+                <a class="reference internal" href="../../intro.html">
+                    Welcome to PseudoDiffusers!!
+                </a>
+            </li>
+        </ul>
+        <p aria-level="2" class="caption" role="heading"><span class="caption-text">Preliminary Works</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="vae.html">VAE</a></li>
+<li class="toctree-l1"><a class="reference internal" href="gan.html">GAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DDPM.html">DDPM</a></li>
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DDIM.html">DDIM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="A_Study_on_the_Evaluation_of_Generative_Models.html">A Study on the Evaluation of Generative Models</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Image Generation</span></p>
+<ul class="current nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="cycleGAN.html">CycleGAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="StyleGAN.html">StyleGAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="diffusion_beats_GANs.html">Diffusion Models Beat GANs on Image Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="dalle.html">DALL-E</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DALLE2.html">DALL-E 2</a></li>
+<li class="toctree-l1"><a class="reference internal" href="dreambooth.html">DreamBooth</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ControlNet.html">ControlNet</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Latent_Diffusion_Model.html">Introduction</a></li>
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Textual_Inversion.html">Textual Inversion</a></li>
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="CustomDiffusion.html">Custom Diffusion</a></li>
+<li class="toctree-l1 current active"><a class="current reference internal" href="#">LoRA</a></li>
+
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="I-DDPM.html">I-DDPM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="StyO.html">StyO</a></li>
+<li class="toctree-l1"><a class="reference internal" href="imagen.html">Imagen</a></li>
+<li class="toctree-l1"><a class="reference internal" href="imagen_editor.html">Imagen Editor</a></li>
+<li class="toctree-l1"><a class="reference internal" href="SDEdit.html">SDEdit</a></li>
+<li class="toctree-l1"><a class="reference internal" href="SDXL.html">SDXL</a></li>
+<li class="toctree-l1"><a class="reference internal" href="t2i_adapter.html">T2I-Adapter</a></li>
+<li class="toctree-l1"><a class="reference internal" href="IP_Adapter.html">IP-Adapter</a></li>
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="HyperDreamBooth.html">HyperDreamBooth</a></li>
+<li class="toctree-l1"><a class="reference internal" href="CM3leon.html">CM3leon</a></li>
+
+<li class="toctree-l1"><a class="reference internal" href="Synthetic_Data_from_Diffusion_Models_Improves_ImageNet_Classification.html">Synthetic Data from Diffusion Models Improves ImageNet Classification</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="GLIDE.html">GLIDE</a></li>
+<li class="toctree-l1"><a class="reference internal" href="BBDM.html">BBDM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Your_Diffusion_Model_is_Secretly_a_Zero_Shot_Classifier.html">Your Diffusion Model is Secretly a Zero-Shot Classifier</a></li>
+<li class="toctree-l1"><a class="reference internal" href="progressive_distillation.html">Progressive Distillation for Fast Sampling of Diffusion Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ConceptLab.html">ConceptLab</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Diffusion_models_already_have_a_Semantic_Latent_Space.html">Diffusion Models already have a Semantic Latent Space</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Muse.html">Muse</a></li>
+
+
+<li class="toctree-l1"><a class="reference internal" href="GIGAGAN.html">Scaling up GANs for Text-to-Image Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="consistency_models.html">Consistency Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="latent_consistency_models.html">Latent Consistency Models</a></li>
+
+<li class="toctree-l1"><a class="reference internal" href="LLM_grounded_Diffusion.html">LLM Grounded Diffusion</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DiT.html">DiT</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="one-step-image-translation.html">One-Step Image Translation with Text-to-Image Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="LCM-LoRA.html">LCM-LoRA: A Universal Stable-Diffusion Acceleration Module</a></li>
+
+
+<li class="toctree-l1"><a class="reference internal" href="MimicBrush.html">MimicBrush: Zero-shot Image Editing with Reference Imitation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="one_step_diffusion_with_distribution_matching_distillation.html">One-step Diffusion with Distribution Matching Distillation</a></li>
+
+
+
+
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Video Generation</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="Make_A_Video.html">Make A Video</a></li>
+<li class="toctree-l1"><a class="reference internal" href="VideoLDM.html">VideoLDM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="AnimateDiff.html">AnimateDiff</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Animate_Anyone.html">Animate Anyone</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreaMoving.html">DreaMoving</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreamPose.html">DreamPose: Fashion Image-to-Video Synthesis via Stable Diffusion</a></li>
+
+
+
+
+
+
+
+
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">3D Generation</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="NeRF.html">NeRF : Representing Scenes as Neural Radiance Fields for View Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="3DGS.html">3D Gaussian Splatting for Real-Time Radiance Field Rendering</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Point_E.html">Point-E: A System for Generating 3D Point Clouds from Complex Prompts (Arxiv 2022)</a></li>
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Shap-E.html">Shap-E</a></li>
+
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DreamFusion.html"><strong>DreamFusion</strong></a></li>
+<li class="toctree-l1"><a class="reference internal" href="magic-3d.html">Magic3D</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreamBooth3D.html">Dream Booth 3D</a></li>
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="zero123plus.html">Zero123++</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ProlificDreamer.html">ProlificDreamer: High-Fidelity and Diverse Text-to-3D Generation with Variational Score Distillation</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DreamGaussian.html">DreamGaussian</a></li>
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Coin3D.html">Coin3D</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Experiments</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="../experiments/js_exp.html">Synthetic Data with Stable Diffusion for Foliar Disease Classification</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../experiments/swjo_exp.html">Training DreamBooth on Naver Webtoon Face Dataset</a></li>
+</ul>
+
+    </div>
+</nav></div>
+    </div>
+  
+  
+  <div class="sidebar-primary-items__end sidebar-primary__section">
+  </div>
+  
+  <div id="rtd-footer-container"></div>
+
+
+      </div>
+      
+      <main id="main-content" class="bd-main" role="main">
+        
+        
+
+<div class="sbt-scroll-pixel-helper"></div>
+
+          <div class="bd-content">
+            <div class="bd-article-container">
+              
+              <div class="bd-header-article d-print-none">
+<div class="header-article-items header-article__inner">
+  
+    <div class="header-article-items__start">
+      
+        <div class="header-article-item"><label class="sidebar-toggle primary-toggle btn btn-sm" for="__primary" title="Toggle primary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
+  <span class="fa-solid fa-bars"></span>
+</label></div>
+      
+    </div>
+  
+  
+    <div class="header-article-items__end">
+      
+        <div class="header-article-item">
+
+<div class="article-header-buttons">
+
+
+
+
+
+<div class="dropdown dropdown-source-buttons">
+  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Source repositories">
+    <i class="fab fa-github"></i>
+  </button>
+  <ul class="dropdown-menu">
+      
+      
+      
+      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion" target="_blank"
+   class="btn btn-sm btn-source-repository-button dropdown-item"
+   title="Source repository"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fab fa-github"></i>
+  </span>
+<span class="btn__text-container">Repository</span>
+</a>
+</li>
+      
+      
+      
+      
+      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion/issues/new?title=Issue%20on%20page%20%2Fdocs/review/LoRA.html&body=Your%20issue%20content%20here." target="_blank"
+   class="btn btn-sm btn-source-issues-button dropdown-item"
+   title="Open an issue"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-lightbulb"></i>
+  </span>
+<span class="btn__text-container">Open issue</span>
+</a>
+</li>
+      
+  </ul>
+</div>
+
+
+
+
+
+
+<div class="dropdown dropdown-download-buttons">
+  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Download this page">
+    <i class="fas fa-download"></i>
+  </button>
+  <ul class="dropdown-menu">
+      
+      
+      
+      <li><a href="../../_sources/docs/review/LoRA.md" target="_blank"
+   class="btn btn-sm btn-download-source-button dropdown-item"
+   title="Download source file"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-file"></i>
+  </span>
+<span class="btn__text-container">.md</span>
+</a>
+</li>
+      
+      
+      
+      
+      <li>
+<button onclick="window.print()"
+  class="btn btn-sm btn-download-pdf-button dropdown-item"
+  title="Print to PDF"
+  data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-file-pdf"></i>
+  </span>
+<span class="btn__text-container">.pdf</span>
+</button>
+</li>
+      
+  </ul>
+</div>
+
+
+
+
+<button onclick="toggleFullScreen()"
+  class="btn btn-sm btn-fullscreen-button"
+  title="Fullscreen mode"
+  data-bs-placement="bottom" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-expand"></i>
+  </span>
+
+</button>
+
+
+
+<script>
+document.write(`
+  <button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light"></i>
+    <i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark"></i>
+    <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"></i>
+  </button>
+`);
+</script>
+
+
+<script>
+document.write(`
+  <button class="btn btn-sm pst-navbar-icon search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <i class="fa-solid fa-magnifying-glass fa-lg"></i>
+  </button>
+`);
+</script>
+<label class="sidebar-toggle secondary-toggle btn btn-sm" for="__secondary"title="Toggle secondary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <span class="fa-solid fa-list"></span>
+</label>
+</div></div>
+      
+    </div>
+  
+</div>
+</div>
+              
+              
+
+<div id="jb-print-docs-body" class="onlyprint">
+    <h1>LoRA</h1>
+    <!-- Table of contents -->
+    <div id="print-main-content">
+        <div id="jb-print-toc">
+            
+            <div>
+                <h2> Contents </h2>
+            </div>
+            <nav aria-label="Page">
+                <ul class="visible nav section-nav flex-column">
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#">LoRA</a></li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#abstract">0. Abstract</a></li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">1. Introduction</a><ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#terminologies-and-conventions">1.1. Terminologies and Conventions</a></li>
+</ul>
+</li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#problem-statement">2. Problem Statement</a></li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#aren-t-existing-solutions-good-enough">3. Aren’t Existing Solutions Good Enough?</a></li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#our-method">4. Our Method</a><ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#low-rank-parameterized-update-matrices">4.1. Low-Rank-Parameterized Update Matrices</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#no-additional-inference-latency">4.1.1. No Additional Inference Latency</a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#applying-lora-to-transformer">4.2. Applying LoRA to Transformer</a></li>
+</ul>
+</li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#empirical-experiments">5.Empirical Experiments</a></li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#a-ia3">+a) IA3</a></li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#aa-lora">+aa) LoRA 사용법</a></li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#reference">Reference</a></li>
+</ul>
+
+            </nav>
+        </div>
+    </div>
+</div>
+
+              
+                
+<div id="searchbox"></div>
+                <article class="bd-article">
+                  
+  <div class="admonition-information admonition">
+<p class="admonition-title">Information</p>
+<ul class="simple">
+<li><p><strong>Title:</strong> Denoising Diffusion Probabilistic Models (ICLR 2021)</p></li>
+<li><p><strong>Reference</strong></p>
+<ul>
+<li><p>Paper:  <a class="reference external" href="https://arxiv.org/abs/2006.11239">https://arxiv.org/abs/2006.11239</a></p></li>
+<li><p>Code: <a class="reference external" href="https://github.com/lucidrains/denoising-diffusion-pytorch">PyTorch implementation:</a></p></li>
+<li><p>Review: <a class="reference external" href="https://www.youtube.com/watch?v=1j0W_lu55nc">PR-409: Denoising Diffusion Probabilistic Models</a></p></li>
+</ul>
+</li>
+<li><p><strong>Author:</strong> Beomsoo Park</p></li>
+<li><p><strong>Last updated on Apr. 19, 2023</strong></p></li>
+</ul>
+</div>
+<section class="tex2jax_ignore mathjax_ignore" id="lora">
+<h1>LoRA<a class="headerlink" href="#lora" title="Permalink to this heading">#</a></h1>
+</section>
+<section class="tex2jax_ignore mathjax_ignore" id="abstract">
+<h1>0. Abstract<a class="headerlink" href="#abstract" title="Permalink to this heading">#</a></h1>
+<p>LoRA는 <strong>PEFT(Parameter Effecient Fine-Tuning)의 기법 중 하나</strong>이다. Pre-trained model의 weight는 고정한 채로, <strong>몇 개의 dense(fc) layer만 학습시켜 downstream task의 연산량을 줄일 수 있다.</strong> GPT-3을 기준으로 parameter는 10000배, GPU 메모리는 3배를 줄일 수 있다. 또한 inference 과정에서 추가적인 latency가 없음</p>
+<blockquote>
+<div><ul class="simple">
+<li><p>PEFT: 모델의 모든 파라미터를 튜닝하는 것이 아닌 일부 파라미터만을 튜닝함으로써 모델의 성능을 적은 자원으로도 높게 유지하는 방법론</p></li>
+</ul>
+</div></blockquote>
+<ul class="simple">
+<li><p>Downstream task: pre-trained model을 사용해, 어떤 문제를 해결하기 위해 fine-tuning 하는것</p></li>
+<li><p>Upstream task: Pre-train model을 학습시키는것</p></li>
+<li><p>Latency: 어떤 요청의 시작부터 완료까지 걸리는 시간</p></li>
+</ul>
+</section>
+<hr class="docutils" />
+<section class="tex2jax_ignore mathjax_ignore" id="introduction">
+<h1>1. Introduction<a class="headerlink" href="#introduction" title="Permalink to this heading">#</a></h1>
+<p>LLM은 기본적으로 pre-trained model을 특정 task에 맞게 fine-tuning을 시킴. 하지만 fine-tuning에서 모든 weight를 다시 학습시키면 GPT-2, GPT-3, RoBERTa 등 큰 모델의 경우 학습에 몇 달이 걸림.</p>
+<p>이전 연구에서 over-parameterized model들은 low intrinsic dimension에 기반하고 있다는 사실에 기반해, 저자는 학습 과정에서도 모델은 <code class="docutils literal notranslate"><span class="pre">low</span> <span class="pre">intrinsic</span> <span class="pre">rank</span></code>을 갖고 있을 것이라 가정함.</p>
+<p><strong>LoRA는 기존 pre-trained weight는 고정하고, 몇 개의 dense layer만 rank decomposition matrices를 최적화하는 방식으로 학습</strong>시키기로 함.</p>
+<figure class="align-default" id="id1">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/image(0).png"><img alt="LoRA_00" class="bg-primary mb-1" src="../../_images/image(0).png" style="width: 550px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 176 </span><span class="caption-text">LoRA structure</span><a class="headerlink" href="#id1" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<figure class="align-default" id="id2">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/image(1).png"><img alt="LoRA_01" class="bg-primary mb-1" src="../../_images/image(1).png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 177 </span><span class="caption-text">LoRA structure 2</span><a class="headerlink" href="#id2" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>위 그림처럼 <strong>기존 pre-trained weight <span class="math notranslate nohighlight">\(W\)</span>는 고정하고 low rank decomposition된 weight <span class="math notranslate nohighlight">\(A, B\)</span>만 학습시켜 <span class="math notranslate nohighlight">\(W\)</span>에 더해줌</strong>. <span class="math notranslate nohighlight">\(A, B\)</span>의 크기는 <span class="math notranslate nohighlight">\(W\)</span>보다 작아 time, computational cost를 최대 3배까지 줄일 수 있음. 또한 task에 따라 LoRA module(<span class="math notranslate nohighlight">\(A, B\)</span>)만 바꿔주면 되기 때문에 storage requirement, task-switching overhead를 줄일 수 있음. 이 외에도 추가적인 inference latency가 없다, 다른 기법들과 함께 적용이 가능하다는 장점이 있음.</p>
+<section id="terminologies-and-conventions">
+<h2>1.1. Terminologies and Conventions<a class="headerlink" href="#terminologies-and-conventions" title="Permalink to this heading">#</a></h2>
+<ul class="simple">
+<li><p><span class="math notranslate nohighlight">\(d_{model}\)</span>: Transformer의 input/output dimension size</p></li>
+<li><p><span class="math notranslate nohighlight">\(W_q, W_k, W_v, W_o\)</span>: Self-attention module의 query/key/value/output projection matrices</p></li>
+<li><p><span class="math notranslate nohighlight">\(W, W_0\)</span>: Pre-trained weight</p></li>
+<li><p><span class="math notranslate nohighlight">\(\Delta W\)</span>: Adaptation 중 accumulated된 gradient update</p></li>
+<li><p><span class="math notranslate nohighlight">\(r\)</span>: LoRA module의 rank</p></li>
+<li><p>이전 연구의 convention을 사용하고 optimizer는 Adam을 이용</p></li>
+<li><p>Transformer MLP feedforward dimension <span class="math notranslate nohighlight">\(d_{ffn} = 4 \times d_{model}\)</span></p></li>
+</ul>
+</section>
+</section>
+<hr class="docutils" />
+<section class="tex2jax_ignore mathjax_ignore" id="problem-statement">
+<h1>2. Problem Statement<a class="headerlink" href="#problem-statement" title="Permalink to this heading">#</a></h1>
+<p>LoRA는 agnostic하지만 본 논문에서는 language model에 집중함.</p>
+<blockquote>
+<div><ul class="simple">
+<li><p>agnostic: model에 구애받지 않고 해석이 가능함</p></li>
+</ul>
+</div></blockquote>
+<div class="math notranslate nohighlight">
+\[
+\max _{\Phi} \sum_{(x, y) \in \mathcal{Z}} \sum_{t=1}^{|y|} \log \left(P_{\Phi}\left(y_t \mid x, y_{&lt;t}\right)\right)
+\]</div>
+<ul class="simple">
+<li><p><span class="math notranslate nohighlight">\(P_{\Phi}\left(y \mid x\right)\)</span>: <span class="math notranslate nohighlight">\(\Phi\)</span>로 parameterized된 pre-trained model</p></li>
+<li><p><span class="math notranslate nohighlight">\(\mathcal{Z} = \{(x_i, y_i)\}_{i=1,...,N}\)</span>: context-target쌍으로 된 학습 데이터셋, <span class="math notranslate nohighlight">\(x_i, y_i\)</span>는 token sequence</p></li>
+</ul>
+<p>Fine-tuning 과정에서 model은 <span class="math notranslate nohighlight">\(\Phi_0\)</span>으로 init.되고 objective를 maximize하기 위해 <span class="math notranslate nohighlight">\(\Phi_0 + \Delta \Phi\)</span> 로 업데이트됨. 각 downstream task를 위해 매번 <span class="math notranslate nohighlight">\(|\Phi_0|\)</span>와 같은 크기의 <span class="math notranslate nohighlight">\(|\Delta \Phi|\)</span>를 학습해 엄청난 cost가 발생.</p>
+<div class="math notranslate nohighlight">
+\[
+\max _{\Theta} \sum_{(x, y) \in \mathcal{Z}} \sum_{t=1}^{|y|} \log \left(p_{\Phi_0+\Delta \Phi(\Theta)}\left(y_t \mid x, y_{&lt;t}\right)\right)
+\]</div>
+<p>반면 위와 같은 LoRA 방식으로 fine-tuning할 경우 <span class="math notranslate nohighlight">\(|\Phi_0|\)</span> 전체가 아니라 그보다 작은 <span class="math notranslate nohighlight">\(|\Theta|\)</span>를 찾아내는 방식으로 바뀌기 때문에 compute-/memory-effecient해짐. <span class="math notranslate nohighlight">\(|\Theta|\)</span>는 최대 <span class="math notranslate nohighlight">\(|\Phi_0|\)</span>의 0.01%까지 작아질 수 있음.</p>
+</section>
+<hr class="docutils" />
+<section class="tex2jax_ignore mathjax_ignore" id="aren-t-existing-solutions-good-enough">
+<h1>3. Aren’t Existing Solutions Good Enough?<a class="headerlink" href="#aren-t-existing-solutions-good-enough" title="Permalink to this heading">#</a></h1>
+<p>기존에도 transfer learning에서 parameter-/compute-effecient를 위한 방법은 몇 가지가 있었음.</p>
+<figure class="align-default" id="id3">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/image(2).png"><img alt="LoRA_02" class="bg-primary mb-1" src="../../_images/image(2).png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 178 </span><span class="caption-text">Performance Comparison</span><a class="headerlink" href="#id3" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>하지만 adapter layer를 추가하는 방식은 hardware parellelism이 없다면 작은 bottleneck layer만 추가해도 latency가 상당히 증가해 사용하기 어려웠음.</p>
+<p>Prefix tuning은 optimize가 어려웠음.</p>
+</section>
+<hr class="docutils" />
+<section class="tex2jax_ignore mathjax_ignore" id="our-method">
+<h1>4. Our Method<a class="headerlink" href="#our-method" title="Permalink to this heading">#</a></h1>
+<section id="low-rank-parameterized-update-matrices">
+<h2>4.1. Low-Rank-Parameterized Update Matrices<a class="headerlink" href="#low-rank-parameterized-update-matrices" title="Permalink to this heading">#</a></h2>
+<div class="math notranslate nohighlight">
+\[
+h=W_0 x+\Delta W x=W_0 x+B A x
+\]</div>
+<ul class="simple">
+<li><p><span class="math notranslate nohighlight">\(W_0 \in \mathbb{R}^{d \times k}\)</span></p></li>
+<li><p><span class="math notranslate nohighlight">\(B \in \mathbb{R}^{d \times r}, A \in \mathbb{R}^{r \times k}\)</span></p></li>
+<li><p><span class="math notranslate nohighlight">\(r \ll min(d,k)\)</span></p></li>
+</ul>
+<p><span class="math notranslate nohighlight">\(W_0\)</span>는 고정하고 <span class="math notranslate nohighlight">\(A, B\)</span>만 학습. 이후 <span class="math notranslate nohighlight">\(W_0\)</span>와 <span class="math notranslate nohighlight">\(\Delta W = BA\)</span>는 같은 input <span class="math notranslate nohighlight">\(x\)</span>에 곱해진 후 output vector끼리 coordinate-wise하게 sum.</p>
+<p><span class="math notranslate nohighlight">\(A\)</span>는 random Gaussian init., <span class="math notranslate nohighlight">\(B\)</span>는 zero-init.이라 <span class="math notranslate nohighlight">\(\Delta W\)</span> 또한 처음에는 zero-init. <span class="math notranslate nohighlight">\(\Delta W x\)</span>는 <span class="math notranslate nohighlight">\(\alpha/x\)</span>로 scaling됨. <span class="math notranslate nohighlight">\(\alpha\)</span>는 learning rate처럼 tuning해서 r과 같은 값으로 설정. 실제 코드에서는 보통 <span class="math notranslate nohighlight">\(r, \alpha\)</span>는 (8, 16)이나 (16,32)를 사용한다고 함.</p>
+<div class="highlight-python notranslate"><div class="highlight"><pre><span></span>	<span class="o">...</span>
+        <span class="c1"># Actual trainable parameters</span>
+    	<span class="c1"># define A, B</span>
+        <span class="k">if</span> <span class="n">r</span> <span class="o">&gt;</span> <span class="mi">0</span><span class="p">:</span>
+            <span class="bp">self</span><span class="o">.</span><span class="n">lora_A</span> <span class="o">=</span> <span class="n">nn</span><span class="o">.</span><span class="n">Parameter</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">weight</span><span class="o">.</span><span class="n">new_zeros</span><span class="p">((</span><span class="n">r</span><span class="p">,</span> <span class="n">num_embeddings</span><span class="p">)))</span>
+            <span class="bp">self</span><span class="o">.</span><span class="n">lora_B</span> <span class="o">=</span> <span class="n">nn</span><span class="o">.</span><span class="n">Parameter</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">weight</span><span class="o">.</span><span class="n">new_zeros</span><span class="p">((</span><span class="n">embedding_dim</span><span class="p">,</span> <span class="n">r</span><span class="p">)))</span>
+            <span class="bp">self</span><span class="o">.</span><span class="n">scaling</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">lora_alpha</span> <span class="o">/</span> <span class="bp">self</span><span class="o">.</span><span class="n">r</span>
+            <span class="c1"># Freezing the pre-trained weight matrix</span>
+            <span class="bp">self</span><span class="o">.</span><span class="n">weight</span><span class="o">.</span><span class="n">requires_grad</span> <span class="o">=</span> <span class="kc">False</span>
+        <span class="bp">self</span><span class="o">.</span><span class="n">reset_parameters</span><span class="p">()</span>
+
+	<span class="c1"># initialize A, B</span>
+    <span class="k">def</span><span class="w"> </span><span class="nf">reset_parameters</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+        <span class="n">nn</span><span class="o">.</span><span class="n">Embedding</span><span class="o">.</span><span class="n">reset_parameters</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span>
+        <span class="k">if</span> <span class="nb">hasattr</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="s1">&#39;lora_A&#39;</span><span class="p">):</span>
+            <span class="c1"># initialize A the same way as the default for nn.Linear and B to zero</span>
+            <span class="n">nn</span><span class="o">.</span><span class="n">init</span><span class="o">.</span><span class="n">zeros_</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">lora_A</span><span class="p">)</span>
+            <span class="n">nn</span><span class="o">.</span><span class="n">init</span><span class="o">.</span><span class="n">normal_</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">lora_B</span><span class="p">)</span>
+
+    <span class="k">def</span><span class="w"> </span><span class="nf">train</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">mode</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">):</span>
+        <span class="n">nn</span><span class="o">.</span><span class="n">Embedding</span><span class="o">.</span><span class="n">train</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">mode</span><span class="p">)</span>
+        <span class="k">if</span> <span class="n">mode</span><span class="p">:</span>
+            <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">merge_weights</span> <span class="ow">and</span> <span class="bp">self</span><span class="o">.</span><span class="n">merged</span><span class="p">:</span>
+                <span class="c1"># Make sure that the weights are not merged</span>
+                <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">r</span> <span class="o">&gt;</span> <span class="mi">0</span><span class="p">:</span>
+                    <span class="bp">self</span><span class="o">.</span><span class="n">weight</span><span class="o">.</span><span class="n">data</span> <span class="o">-=</span> <span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">lora_B</span> <span class="o">@</span> <span class="bp">self</span><span class="o">.</span><span class="n">lora_A</span><span class="p">)</span><span class="o">.</span><span class="n">transpose</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">)</span> <span class="o">*</span> <span class="bp">self</span><span class="o">.</span><span class="n">scaling</span>
+                <span class="bp">self</span><span class="o">.</span><span class="n">merged</span> <span class="o">=</span> <span class="kc">False</span>
+        <span class="k">else</span><span class="p">:</span>
+            <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">merge_weights</span> <span class="ow">and</span> <span class="ow">not</span> <span class="bp">self</span><span class="o">.</span><span class="n">merged</span><span class="p">:</span>
+                <span class="c1"># Merge the weights and mark it</span>
+                <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">r</span> <span class="o">&gt;</span> <span class="mi">0</span><span class="p">:</span>
+                    <span class="bp">self</span><span class="o">.</span><span class="n">weight</span><span class="o">.</span><span class="n">data</span> <span class="o">+=</span> <span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">lora_B</span> <span class="o">@</span> <span class="bp">self</span><span class="o">.</span><span class="n">lora_A</span><span class="p">)</span><span class="o">.</span><span class="n">transpose</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">)</span> <span class="o">*</span> <span class="bp">self</span><span class="o">.</span><span class="n">scaling</span>
+                <span class="bp">self</span><span class="o">.</span><span class="n">merged</span> <span class="o">=</span> <span class="kc">True</span>
+        
+    <span class="k">def</span><span class="w"> </span><span class="nf">forward</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">x</span><span class="p">:</span> <span class="n">torch</span><span class="o">.</span><span class="n">Tensor</span><span class="p">):</span>
+        <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">r</span> <span class="o">&gt;</span> <span class="mi">0</span> <span class="ow">and</span> <span class="ow">not</span> <span class="bp">self</span><span class="o">.</span><span class="n">merged</span><span class="p">:</span>
+        	<span class="c1"># pre-trained weight W_0 * x</span>
+            <span class="n">result</span> <span class="o">=</span> <span class="n">nn</span><span class="o">.</span><span class="n">Embedding</span><span class="o">.</span><span class="n">forward</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">x</span><span class="p">)</span>
+            <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">r</span> <span class="o">&gt;</span> <span class="mi">0</span><span class="p">:</span>
+            	<span class="c1"># BA * x</span>
+                <span class="n">after_A</span> <span class="o">=</span> <span class="n">F</span><span class="o">.</span><span class="n">embedding</span><span class="p">(</span>
+                    <span class="n">x</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">lora_A</span><span class="o">.</span><span class="n">transpose</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">),</span> <span class="bp">self</span><span class="o">.</span><span class="n">padding_idx</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">max_norm</span><span class="p">,</span>
+                    <span class="bp">self</span><span class="o">.</span><span class="n">norm_type</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">scale_grad_by_freq</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">sparse</span>
+                <span class="p">)</span>
+                <span class="c1"># W_0x + BAx</span>
+                <span class="n">result</span> <span class="o">+=</span> <span class="p">(</span><span class="n">after_A</span> <span class="o">@</span> <span class="bp">self</span><span class="o">.</span><span class="n">lora_B</span><span class="o">.</span><span class="n">transpose</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">))</span> <span class="o">*</span> <span class="bp">self</span><span class="o">.</span><span class="n">scaling</span>
+            <span class="k">return</span> <span class="n">result</span>
+        <span class="k">else</span><span class="p">:</span>
+            <span class="k">return</span> <span class="n">nn</span><span class="o">.</span><span class="n">Embedding</span><span class="o">.</span><span class="n">forward</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">x</span><span class="p">)</span>
+
+</pre></div>
+</div>
+<section id="no-additional-inference-latency">
+<h3>4.1.1. No Additional Inference Latency<a class="headerlink" href="#no-additional-inference-latency" title="Permalink to this heading">#</a></h3>
+<p>LoRA를 이용하면 inference시 latency 성능 하락이 없음. 또한 다른 task에 사용할 경우엔 <span class="math notranslate nohighlight">\(BA\)</span>만 제외하고 <span class="math notranslate nohighlight">\(W_0\)</span>로 학습한 다른 <span class="math notranslate nohighlight">\(B'A'\)</span>만 추가하면 되기 때문에 memory overhead가 낮음.</p>
+</section>
+</section>
+<section id="applying-lora-to-transformer">
+<h2>4.2. Applying LoRA to Transformer<a class="headerlink" href="#applying-lora-to-transformer" title="Permalink to this heading">#</a></h2>
+<p>본 논문에서는 trainable weight를 최소화하기 위해 LoRA를 attention weight만 적용하고 MLP module은 고정함. 이를 통해 GPT-3 175B를 기준으로 VRAM은 1.2TB에서 350GB, checkpoint size는 350GB에서 35MB로 줄임. 또한 학습 속도 또한 25% 정도 빨라짐.</p>
+</section>
+</section>
+<hr class="docutils" />
+<section class="tex2jax_ignore mathjax_ignore" id="empirical-experiments">
+<h1>5.Empirical Experiments<a class="headerlink" href="#empirical-experiments" title="Permalink to this heading">#</a></h1>
+<figure class="align-default" id="id4">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/image(3).png"><img alt="LoRA_03" class="bg-primary mb-1" src="../../_images/image(3).png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 179 </span><span class="caption-text">Performance on BERT</span><a class="headerlink" href="#id4" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<figure class="align-default" id="id5">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/image(4).png"><img alt="LoRA_04" class="bg-primary mb-1" src="../../_images/image(4).png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 180 </span><span class="caption-text">Performance on GPT-2</span><a class="headerlink" href="#id5" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<figure class="align-default" id="id6">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/image(5).png"><img alt="LoRA_05" class="bg-primary mb-1" src="../../_images/image(5).png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 181 </span><span class="caption-text">Performance on GPT-3</span><a class="headerlink" href="#id6" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>대부분의 경우에서 성능이 좋음</p>
+<figure class="align-default" id="id7">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/image(6).png"><img alt="LoRA_06" class="bg-primary mb-1" src="../../_images/image(6).png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 182 </span><span class="caption-text">Validation accuracy table with different hyper-parameters</span><a class="headerlink" href="#id7" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<figure class="align-default" id="id8">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/image(7).png"><img alt="LoRA_07" class="bg-primary mb-1" src="../../_images/image(7).png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 183 </span><span class="caption-text">Validation accuracy table with different hyper-parameters</span><a class="headerlink" href="#id8" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>Transformer에서 한 projection matrix에 큰 r을 적용하는 것보다 모든 matrices에 작은 r을 적용하는 것이 더 성능이 좋았음.</p>
+</section>
+<hr class="docutils" />
+<section class="tex2jax_ignore mathjax_ignore" id="a-ia3">
+<h1>+a) IA3<a class="headerlink" href="#a-ia3" title="Permalink to this heading">#</a></h1>
+<figure class="align-default" id="id9">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/image(8).png"><img alt="LoRA_08" class="bg-primary mb-1" src="../../_images/image(8).png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 184 </span><span class="caption-text">IA3 structure</span><a class="headerlink" href="#id9" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>뉴럴네트워크의 Inner Activation을 줄이기도하고 늘리기도하는 어댑터를 중간에 삽입하는 방법론. 기존에 공개된 LoRA보다 적은 파라미터를 사용하면서 높은 성능을 내는 것으로 알려져있으며, GPT-3를 in-context learning 했을때 보다도 성능이 좋다 라고 주장하고 있음. 학습시간도 매우 짧아 A100 GPU 하나로 30분만에 튜닝할 수 있었다고 함.</p>
+</section>
+<hr class="docutils" />
+<section class="tex2jax_ignore mathjax_ignore" id="aa-lora">
+<h1>+aa) LoRA 사용법<a class="headerlink" href="#aa-lora" title="Permalink to this heading">#</a></h1>
+<ol class="arabic simple">
+<li><p><code class="docutils literal notranslate"><span class="pre">loralib</span></code> 설치</p></li>
+</ol>
+<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="n">pip</span> <span class="n">install</span> <span class="n">loralib</span>
+<span class="c1"># Alternatively</span>
+<span class="c1"># pip install git+https://github.com/microsoft/LoRA</span>
+</pre></div>
+</div>
+<ol class="arabic simple" start="2">
+<li><p>기존 <code class="docutils literal notranslate"><span class="pre">nn.Linear</span></code>, <code class="docutils literal notranslate"><span class="pre">nn.Embedding</span></code>, <code class="docutils literal notranslate"><span class="pre">nn.Conv2d</span></code>를 <code class="docutils literal notranslate"><span class="pre">lora.~</span></code>로 대체</p></li>
+</ol>
+<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="c1"># ===== Before =====</span>
+<span class="c1"># layer = nn.Linear(in_features, out_features)</span>
+
+<span class="c1"># ===== After ======</span>
+<span class="kn">import</span><span class="w"> </span><span class="nn">loralib</span><span class="w"> </span><span class="k">as</span><span class="w"> </span><span class="nn">lora</span>
+<span class="c1"># Add a pair of low-rank adaptation matrices with rank r=16</span>
+<span class="n">layer</span> <span class="o">=</span> <span class="n">lora</span><span class="o">.</span><span class="n">Linear</span><span class="p">(</span><span class="n">in_features</span><span class="p">,</span> <span class="n">out_features</span><span class="p">,</span> <span class="n">r</span><span class="o">=</span><span class="mi">16</span><span class="p">)</span>
+</pre></div>
+</div>
+<ol class="arabic simple" start="3">
+<li><p>학습 전, lora parameter만 학습 가능하게 설정</p></li>
+</ol>
+<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="kn">import</span><span class="w"> </span><span class="nn">loralib</span><span class="w"> </span><span class="k">as</span><span class="w"> </span><span class="nn">lora</span>
+<span class="n">model</span> <span class="o">=</span> <span class="n">BigModel</span><span class="p">()</span>
+<span class="c1"># This sets requires_grad to False for all parameters without the string &quot;lora_&quot; in their names</span>
+<span class="n">lora</span><span class="o">.</span><span class="n">mark_only_lora_as_trainable</span><span class="p">(</span><span class="n">model</span><span class="p">)</span>
+<span class="c1"># Training loop</span>
+<span class="k">for</span> <span class="n">batch</span> <span class="ow">in</span> <span class="n">dataloader</span><span class="p">:</span>
+   <span class="o">...</span>
+</pre></div>
+</div>
+<ol class="arabic simple" start="4">
+<li><p>checkpoint를 저장할 때엔 <code class="docutils literal notranslate"><span class="pre">state_dict</span></code>가 LoRA parameter만 저장하게 함.</p></li>
+</ol>
+<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="c1"># ===== Before =====</span>
+<span class="c1"># torch.save(model.state_dict(), checkpoint_path)</span>
+<span class="c1"># ===== After =====</span>
+<span class="n">torch</span><span class="o">.</span><span class="n">save</span><span class="p">(</span><span class="n">lora</span><span class="o">.</span><span class="n">lora_state_dict</span><span class="p">(</span><span class="n">model</span><span class="p">),</span> <span class="n">checkpoint_path</span><span class="p">)</span>
+</pre></div>
+</div>
+<ol class="arabic simple" start="5">
+<li><p>checkpoint를 불러올 때엔 <code class="docutils literal notranslate"><span class="pre">load_state_dict</span></code>에서 <code class="docutils literal notranslate"><span class="pre">strict=False</span></code>로 설정.</p></li>
+</ol>
+<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="c1"># Load the pretrained checkpoint first</span>
+<span class="n">model</span><span class="o">.</span><span class="n">load_state_dict</span><span class="p">(</span><span class="n">torch</span><span class="o">.</span><span class="n">load</span><span class="p">(</span><span class="s1">&#39;ckpt_pretrained.pt&#39;</span><span class="p">),</span> <span class="n">strict</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span>
+<span class="c1"># Then load the LoRA checkpoint</span>
+<span class="n">model</span><span class="o">.</span><span class="n">load_state_dict</span><span class="p">(</span><span class="n">torch</span><span class="o">.</span><span class="n">load</span><span class="p">(</span><span class="s1">&#39;ckpt_lora.pt&#39;</span><span class="p">),</span> <span class="n">strict</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span>
+</pre></div>
+</div>
+</section>
+<hr class="docutils" />
+<section class="tex2jax_ignore mathjax_ignore" id="reference">
+<h1>Reference<a class="headerlink" href="#reference" title="Permalink to this heading">#</a></h1>
+<ul class="simple">
+<li><p><a class="reference external" href="https://da2so.tistory.com/79">LoRA 논문 리뷰</a></p></li>
+<li><p><a class="reference external" href="https://devocean.sk.com/blog/techBoardDetail.do?ID=164779&amp;boardType=techBlog">LLM 모델 튜닝, 하나의 GPU로 가능할까? Parameter Efficient Fine-Tuning(PEFT)을 소개합니다!</a></p></li>
+<li><p><a class="reference external" href="https://zzambab98.tistory.com/226">Stable Diffusion LoRA 생성 및 사용법</a></p></li>
+<li><p><a class="reference external" href="https://www.internetmap.kr/entry/How-to-LoRA-Model">Stable Diffusion - LoRA 모델 사용법
+</a></p></li>
+<li><p><a class="reference external" href="https://github.com/microsoft/LoRA">LoRA github</a></p></li>
+<li><p><a class="reference external" href="https://www.youtube.com/watch?v=dA-NhCtrrVE">https://www.youtube.com/watch?v=dA-NhCtrrVE</a></p></li>
+</ul>
+</section>
+
+    <script type="text/x-thebe-config">
+    {
+        requestKernel: true,
+        binderOptions: {
+            repo: "binder-examples/jupyter-stacks-datascience",
+            ref: "master",
+        },
+        codeMirrorConfig: {
+            theme: "abcdef",
+            mode: "python"
+        },
+        kernelOptions: {
+            name: "python3",
+            path: "./docs\review"
+        },
+        predefinedOutput: true
+    }
+    </script>
+    <script>kernelName = 'python3'</script>
+
+                </article>
+              
+
+              
+              
+              
+              
+                <footer class="prev-next-footer d-print-none">
+                  
+<div class="prev-next-area">
+    <a class="left-prev"
+       href="CustomDiffusion.html"
+       title="previous page">
+      <i class="fa-solid fa-angle-left"></i>
+      <div class="prev-next-info">
+        <p class="prev-next-subtitle">previous</p>
+        <p class="prev-next-title">Custom Diffusion</p>
+      </div>
+    </a>
+    <a class="right-next"
+       href="I-DDPM.html"
+       title="next page">
+      <div class="prev-next-info">
+        <p class="prev-next-subtitle">next</p>
+        <p class="prev-next-title">I-DDPM</p>
+      </div>
+      <i class="fa-solid fa-angle-right"></i>
+    </a>
+</div>
+                </footer>
+              
+            </div>
+            
+            
+              
+                <div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
+
+
+  <div class="sidebar-secondary-item">
+  <div class="page-toc tocsection onthispage">
+    <i class="fa-solid fa-list"></i> Contents
+  </div>
+  <nav class="bd-toc-nav page-toc">
+    <ul class="visible nav section-nav flex-column">
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#">LoRA</a></li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#abstract">0. Abstract</a></li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">1. Introduction</a><ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#terminologies-and-conventions">1.1. Terminologies and Conventions</a></li>
+</ul>
+</li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#problem-statement">2. Problem Statement</a></li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#aren-t-existing-solutions-good-enough">3. Aren’t Existing Solutions Good Enough?</a></li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#our-method">4. Our Method</a><ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#low-rank-parameterized-update-matrices">4.1. Low-Rank-Parameterized Update Matrices</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#no-additional-inference-latency">4.1.1. No Additional Inference Latency</a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#applying-lora-to-transformer">4.2. Applying LoRA to Transformer</a></li>
+</ul>
+</li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#empirical-experiments">5.Empirical Experiments</a></li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#a-ia3">+a) IA3</a></li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#aa-lora">+aa) LoRA 사용법</a></li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#reference">Reference</a></li>
+</ul>
+
+  </nav></div>
+
+</div></div>
+              
+            
+          </div>
+          <footer class="bd-footer-content">
+            
+<div class="bd-footer-content__inner container">
+  
+  <div class="footer-item">
+    
+<p class="component-author">
+By PseudoLab
+</p>
+
+  </div>
+  
+  <div class="footer-item">
+    
+
+  <p class="copyright">
+    
+      © Copyright 2022.
+      <br/>
+    
+  </p>
+
+  </div>
+  
+  <div class="footer-item">
+    
+  </div>
+  
+  <div class="footer-item">
+    
+  </div>
+  
+</div>
+          </footer>
+        
+
+      </main>
+    </div>
+  </div>
+  
+  <!-- Scripts loaded after <body> so the DOM is not blocked -->
+  <script src="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b"></script>
+<script src="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b"></script>
+
+  <footer class="bd-footer">
+  </footer>
+  </body>
 </html>
\ No newline at end of file
diff --git a/docs/review/Make_A_Video.html b/docs/review/Make_A_Video.html
old mode 100644
new mode 100755
index 7e2dad20..e9697a64
--- a/docs/review/Make_A_Video.html
+++ b/docs/review/Make_A_Video.html
@@ -1,1236 +1,1256 @@
-
-
-<!DOCTYPE html>
-
-
-<html lang="en" data-content_root="" >
-
-  <head>
-    <meta charset="utf-8" />
-    <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.18.1: http://docutils.sourceforge.net/" />
-
-    <title>Make A Video &#8212; Text-to-Image Generation-feat-Diffusion</title>
-  
-  
-  
-  <script data-cfasync="false">
-    document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
-    document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
-  </script>
-  
-  <!-- Loaded before other Sphinx assets -->
-  <link href="../../_static/styles/theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-<link href="../../_static/styles/bootstrap.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-<link href="../../_static/styles/pydata-sphinx-theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-
-  
-  <link href="../../_static/vendor/fontawesome/6.5.2/css/all.min.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-  <link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.woff2" />
-<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.woff2" />
-<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.woff2" />
-
-    <link rel="stylesheet" type="text/css" href="../../_static/pygments.css" />
-    <link rel="stylesheet" href="../../_static/styles/sphinx-book-theme.css?digest=14f4ca6b54d191a8c7657f6c759bf11a5fb86285" type="text/css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/togglebutton.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/mystnb.4510f1fc1dee50b3e5859aac5469c37c29e427902b24a333a5f9fcb2f0b3ac41.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/sphinx-thebe.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/design-style.4045f2051d55cab465a707391d5b2007.min.css" />
-  
-  <!-- Pre-loaded scripts that we'll load fully later -->
-  <link rel="preload" as="script" href="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b" />
-<link rel="preload" as="script" href="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b" />
-  <script src="../../_static/vendor/fontawesome/6.5.2/js/all.min.js?digest=dfe6caa3a7d634c4db9b"></script>
-
-    <script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
-    <script src="../../_static/jquery.js"></script>
-    <script src="../../_static/underscore.js"></script>
-    <script src="../../_static/_sphinx_javascript_frameworks_compat.js"></script>
-    <script src="../../_static/doctools.js"></script>
-    <script src="../../_static/clipboard.min.js"></script>
-    <script src="../../_static/copybutton.js"></script>
-    <script src="../../_static/scripts/sphinx-book-theme.js?digest=5a5c038af52cf7bc1a1ec88eea08e6366ee68824"></script>
-    <script>let toggleHintShow = 'Click to show';</script>
-    <script>let toggleHintHide = 'Click to hide';</script>
-    <script>let toggleOpenOnPrint = 'true';</script>
-    <script src="../../_static/togglebutton.js"></script>
-    <script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
-    <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
-    <script src="../../_static/design-tabs.js"></script>
-    <script>const THEBE_JS_URL = "https://unpkg.com/thebe@0.8.2/lib/index.js"
-const thebe_selector = ".thebe,.cell"
-const thebe_selector_input = "pre"
-const thebe_selector_output = ".output, .cell_output"
-</script>
-    <script async="async" src="../../_static/sphinx-thebe.js"></script>
-    <script>window.MathJax = {"options": {"processHtmlClass": "tex2jax_process|mathjax_process|math|output_area"}}</script>
-    <script defer="defer" src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
-    <script>DOCUMENTATION_OPTIONS.pagename = 'docs/review/Make_A_Video';</script>
-    <link rel="shortcut icon" href="../../_static/PseudoLab_logo.png"/>
-    <link rel="index" title="Index" href="../../genindex.html" />
-    <link rel="search" title="Search" href="../../search.html" />
-    <link rel="next" title="VideoLDM" href="VideoLDM.html" />
-    <link rel="prev" title="MimicBrush: Zero-shot Image Editing with Reference Imitation" href="MimicBrush.html" />
-  <meta name="viewport" content="width=device-width, initial-scale=1"/>
-  <meta name="docsearch:language" content="en"/>
-  </head>
-  
-  
-  <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
-
-  
-  
-  <div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
-  
-  <div id="pst-scroll-pixel-helper"></div>
-  
-  <button type="button" class="btn rounded-pill" id="pst-back-to-top">
-    <i class="fa-solid fa-arrow-up"></i>Back to top</button>
-
-  
-  <input type="checkbox"
-          class="sidebar-toggle"
-          id="pst-primary-sidebar-checkbox"/>
-  <label class="overlay overlay-primary" for="pst-primary-sidebar-checkbox"></label>
-  
-  <input type="checkbox"
-          class="sidebar-toggle"
-          id="pst-secondary-sidebar-checkbox"/>
-  <label class="overlay overlay-secondary" for="pst-secondary-sidebar-checkbox"></label>
-  
-  <div class="search-button__wrapper">
-    <div class="search-button__overlay"></div>
-    <div class="search-button__search-container">
-<form class="bd-search d-flex align-items-center"
-      action="../../search.html"
-      method="get">
-  <i class="fa-solid fa-magnifying-glass"></i>
-  <input type="search"
-         class="form-control"
-         name="q"
-         id="search-input"
-         placeholder="Search this book..."
-         aria-label="Search this book..."
-         autocomplete="off"
-         autocorrect="off"
-         autocapitalize="off"
-         spellcheck="false"/>
-  <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
-</form></div>
-  </div>
-
-  <div class="pst-async-banner-revealer d-none">
-  <aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
-</div>
-
-  
-    <header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
-    </header>
-  
-
-  <div class="bd-container">
-    <div class="bd-container__inner bd-page-width">
-      
-      
-      
-      <div class="bd-sidebar-primary bd-sidebar">
-        
-
-  
-  <div class="sidebar-header-items sidebar-primary__section">
-    
-    
-    
-    
-  </div>
-  
-    <div class="sidebar-primary-items__start sidebar-primary__section">
-        <div class="sidebar-primary-item">
-
-  
-    
-  
-
-<a class="navbar-brand logo" href="../../intro.html">
-  
-  
-  
-  
-  
-    
-    
-      
-    
-    
-    <img src="../../_static/PseudoLab_logo.png" class="logo__image only-light" alt="Text-to-Image Generation-feat-Diffusion - Home"/>
-    <script>document.write(`<img src="../../_static/PseudoLab_logo.png" class="logo__image only-dark" alt="Text-to-Image Generation-feat-Diffusion - Home"/>`);</script>
-  
-  
-</a></div>
-        <div class="sidebar-primary-item"><nav class="bd-links" id="bd-docs-nav" aria-label="Main">
-    <div class="bd-toc-item navbar-nav active">
-        
-        <ul class="nav bd-sidenav bd-sidenav__home-link">
-            <li class="toctree-l1">
-                <a class="reference internal" href="../../intro.html">
-                    Welcome to PseudoDiffusers!!
-                </a>
-            </li>
-        </ul>
-        <p aria-level="2" class="caption" role="heading"><span class="caption-text">Preliminary Works</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="vae.html">VAE</a></li>
-<li class="toctree-l1"><a class="reference internal" href="gan.html">GAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DDPM.html">DDPM</a></li>
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="DDIM.html">DDIM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="A_Study_on_the_Evaluation_of_Generative_Models.html">A Study on the Evaluation of Generative Models</a></li>
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Image Generation</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="cycleGAN.html">CycleGAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="StyleGAN.html">StyleGAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="diffusion_beats_GANs.html">Diffusion Models Beat GANs on Image Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="dalle.html">DALL-E</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DALLE2.html">DALL-E 2</a></li>
-<li class="toctree-l1"><a class="reference internal" href="dreambooth.html">DreamBooth</a></li>
-<li class="toctree-l1"><a class="reference internal" href="ControlNet.html">ControlNet</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Latent_Diffusion_Model.html">Introduction</a></li>
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="Textual_Inversion.html">Textual Inversion</a></li>
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="CustomDiffusion.html">Custom Diffusion</a></li>
-<li class="toctree-l1"><a class="reference internal" href="LoRA.html">LoRA</a></li>
-
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="I-DDPM.html">I-DDPM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="StyO.html">StyO</a></li>
-<li class="toctree-l1"><a class="reference internal" href="imagen.html">Imagen</a></li>
-<li class="toctree-l1"><a class="reference internal" href="imagen_editor.html">Imagen Editor</a></li>
-<li class="toctree-l1"><a class="reference internal" href="SDEdit.html">SDEdit</a></li>
-<li class="toctree-l1"><a class="reference internal" href="SDXL.html">SDXL</a></li>
-<li class="toctree-l1"><a class="reference internal" href="t2i_adapter.html">T2I-Adapter</a></li>
-<li class="toctree-l1"><a class="reference internal" href="IP_Adapter.html">IP-Adapter</a></li>
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="HyperDreamBooth.html">HyperDreamBooth</a></li>
-<li class="toctree-l1"><a class="reference internal" href="CM3leon.html">CM3leon</a></li>
-
-<li class="toctree-l1"><a class="reference internal" href="Synthetic_Data_from_Diffusion_Models_Improves_ImageNet_Classification.html">Synthetic Data from Diffusion Models Improves ImageNet Classification</a></li>
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="GLIDE.html">GLIDE</a></li>
-<li class="toctree-l1"><a class="reference internal" href="BBDM.html">BBDM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Your_Diffusion_Model_is_Secretly_a_Zero_Shot_Classifier.html">Your Diffusion Model is Secretly a Zero-Shot Classifier</a></li>
-<li class="toctree-l1"><a class="reference internal" href="progressive_distillation.html">Progressive Distillation for Fast Sampling of Diffusion Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="ConceptLab.html">ConceptLab</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Diffusion_models_already_have_a_Semantic_Latent_Space.html">Diffusion Models already have a Semantic Latent Space</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Muse.html">Muse</a></li>
-
-
-<li class="toctree-l1"><a class="reference internal" href="GIGAGAN.html">Scaling up GANs for Text-to-Image Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="consistency_models.html">Consistency Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="latent_consistency_models.html">Latent Consistency Models</a></li>
-
-<li class="toctree-l1"><a class="reference internal" href="LLM_grounded_Diffusion.html">LLM Grounded Diffusion</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DiT.html">DiT</a></li>
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="one-step-image-translation.html">One-Step Image Translation with Text-to-Image Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="LCM-LoRA.html">LCM-LoRA: A Universal Stable-Diffusion Acceleration Module</a></li>
-
-
-<li class="toctree-l1"><a class="reference internal" href="MimicBrush.html">MimicBrush: Zero-shot Image Editing with Reference Imitation</a></li>
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Video Generation</span></p>
-<ul class="current nav bd-sidenav">
-<li class="toctree-l1 current active"><a class="current reference internal" href="#">Make A Video</a></li>
-<li class="toctree-l1"><a class="reference internal" href="VideoLDM.html">VideoLDM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="AnimateDiff.html">AnimateDiff</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Animate_Anyone.html">Animate Anyone</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreaMoving.html">DreaMoving</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreamPose.html">DreamPose: Fashion Image-to-Video Synthesis via Stable Diffusion</a></li>
-
-
-
-
-
-
-
-
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">3D Generation</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="NeRF.html">NeRF : Representing Scenes as Neural Radiance Fields for View Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="3DGS.html">3D Gaussian Splatting for Real-Time Radiance Field Rendering</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Point_E.html">Point-E: A System for Generating 3D Point Clouds from Complex Prompts (Arxiv 2022)</a></li>
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="Shap-E.html">Shap-E</a></li>
-
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="DreamFusion.html"><strong>DreamFusion</strong></a></li>
-<li class="toctree-l1"><a class="reference internal" href="magic-3d.html">Magic3D</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreamBooth3D.html">Dream Booth 3D</a></li>
-
-
-
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Experiments</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="../experiments/js_exp.html">Synthetic Data with Stable Diffusion for Foliar Disease Classification</a></li>
-<li class="toctree-l1"><a class="reference internal" href="../experiments/swjo_exp.html">Training DreamBooth on Naver Webtoon Face Dataset</a></li>
-</ul>
-
-    </div>
-</nav></div>
-    </div>
-  
-  
-  <div class="sidebar-primary-items__end sidebar-primary__section">
-  </div>
-  
-  <div id="rtd-footer-container"></div>
-
-
-      </div>
-      
-      <main id="main-content" class="bd-main" role="main">
-        
-        
-
-<div class="sbt-scroll-pixel-helper"></div>
-
-          <div class="bd-content">
-            <div class="bd-article-container">
-              
-              <div class="bd-header-article d-print-none">
-<div class="header-article-items header-article__inner">
-  
-    <div class="header-article-items__start">
-      
-        <div class="header-article-item"><label class="sidebar-toggle primary-toggle btn btn-sm" for="__primary" title="Toggle primary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
-  <span class="fa-solid fa-bars"></span>
-</label></div>
-      
-    </div>
-  
-  
-    <div class="header-article-items__end">
-      
-        <div class="header-article-item">
-
-<div class="article-header-buttons">
-
-
-
-
-
-<div class="dropdown dropdown-source-buttons">
-  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Source repositories">
-    <i class="fab fa-github"></i>
-  </button>
-  <ul class="dropdown-menu">
-      
-      
-      
-      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion" target="_blank"
-   class="btn btn-sm btn-source-repository-button dropdown-item"
-   title="Source repository"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fab fa-github"></i>
-  </span>
-<span class="btn__text-container">Repository</span>
-</a>
-</li>
-      
-      
-      
-      
-      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion/issues/new?title=Issue%20on%20page%20%2Fdocs/review/Make_A_Video.html&body=Your%20issue%20content%20here." target="_blank"
-   class="btn btn-sm btn-source-issues-button dropdown-item"
-   title="Open an issue"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-lightbulb"></i>
-  </span>
-<span class="btn__text-container">Open issue</span>
-</a>
-</li>
-      
-  </ul>
-</div>
-
-
-
-
-
-
-<div class="dropdown dropdown-download-buttons">
-  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Download this page">
-    <i class="fas fa-download"></i>
-  </button>
-  <ul class="dropdown-menu">
-      
-      
-      
-      <li><a href="../../_sources/docs/review/Make_A_Video.md" target="_blank"
-   class="btn btn-sm btn-download-source-button dropdown-item"
-   title="Download source file"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-file"></i>
-  </span>
-<span class="btn__text-container">.md</span>
-</a>
-</li>
-      
-      
-      
-      
-      <li>
-<button onclick="window.print()"
-  class="btn btn-sm btn-download-pdf-button dropdown-item"
-  title="Print to PDF"
-  data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-file-pdf"></i>
-  </span>
-<span class="btn__text-container">.pdf</span>
-</button>
-</li>
-      
-  </ul>
-</div>
-
-
-
-
-<button onclick="toggleFullScreen()"
-  class="btn btn-sm btn-fullscreen-button"
-  title="Fullscreen mode"
-  data-bs-placement="bottom" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-expand"></i>
-  </span>
-
-</button>
-
-
-
-<script>
-document.write(`
-  <button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light"></i>
-    <i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark"></i>
-    <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"></i>
-  </button>
-`);
-</script>
-
-
-<script>
-document.write(`
-  <button class="btn btn-sm pst-navbar-icon search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <i class="fa-solid fa-magnifying-glass fa-lg"></i>
-  </button>
-`);
-</script>
-<label class="sidebar-toggle secondary-toggle btn btn-sm" for="__secondary"title="Toggle secondary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <span class="fa-solid fa-list"></span>
-</label>
-</div></div>
-      
-    </div>
-  
-</div>
-</div>
-              
-              
-
-<div id="jb-print-docs-body" class="onlyprint">
-    <h1>Make A Video</h1>
-    <!-- Table of contents -->
-    <div id="print-main-content">
-        <div id="jb-print-toc">
-            
-            <div>
-                <h2> Contents </h2>
-            </div>
-            <nav aria-label="Page">
-                <ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">1. Introduction</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#id1">Make-A-video 제안 배경</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#id2">Make-A-video 특성</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#contribution">Contribution</a></li>
-</ul>
-</li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#previous-work">2. Previous Work</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#method">3. Method</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#text-to-image-model">3.1. Text-To-Image Model</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#spatiotemporal-layers">3.2. Spatiotemporal Layers</a><ul class="nav section-nav flex-column">
-<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#pseudo-3d-convolutional-layers">3.2.1 Pseudo-3D convolutional layers</a></li>
-<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#psuedo-3d-attention-layers">3.2.2. Psuedo-3D attention layers</a></li>
-</ul>
-</li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#frame-interpolation-network">3.3 Frame Interpolation Network</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#training">3.4 Training</a></li>
-</ul>
-</li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#experiments">4. Experiments</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#dataset-and-settings">4.1 Dataset and Settings</a><ul class="nav section-nav flex-column">
-<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#datasets">Datasets</a></li>
-<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#automatic-metrics">Automatic Metrics</a></li>
-<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#human-evaluation-set-and-metrics">Human Evaluation Set and Metrics</a></li>
-</ul>
-</li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#quantitative-results">4.2 Quantitative Results</a><ul class="nav section-nav flex-column">
-<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#automatic-evaluaton-on-msr-vtt">Automatic Evaluaton on MSR-VTT</a></li>
-</ul>
-</li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#automatic-evluation-on-ucf-101">Automatic Evluation on UCF-101</a><ul class="nav section-nav flex-column">
-<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#human-evaluation">Human Evaluation</a></li>
-</ul>
-</li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#qualitative-results">4.3 Qualitative Results</a></li>
-</ul>
-</li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#id3">5. 결론</a></li>
-</ul>
-            </nav>
-        </div>
-    </div>
-</div>
-
-              
-                
-<div id="searchbox"></div>
-                <article class="bd-article">
-                  
-  <div class="admonition-information admonition">
-<p class="admonition-title">Information</p>
-<ul class="simple">
-<li><p><strong>Title:</strong> Make-A-Video: Text-to-Video Generation without Text-Video Data</p></li>
-<li><p><strong>Reference</strong></p>
-<ul>
-<li><p>Paper:  <a class="reference external" href="https://arxiv.org/abs/2209.14792">https://arxiv.org/abs/2209.14792</a></p></li>
-</ul>
-</li>
-<li><p><strong>Author:</strong> <a class="reference external" href="https://www.linkedin.com/in/jeonghwa-yoo-8403a716b">Jeonghwa Yoo</a></p></li>
-<li><p><strong>Last updated on Nov. 26, 2023</strong></p></li>
-</ul>
-</div>
-<section class="tex2jax_ignore mathjax_ignore" id="make-a-video">
-<h1>Make A Video<a class="headerlink" href="#make-a-video" title="Permalink to this heading">#</a></h1>
-<ul class="simple">
-<li><p>참고 코드: <a class="github reference external" href="https://github.com/lucidrains/make-a-video-pytorch">lucidrains/make-a-video-pytorch</a></p></li>
-</ul>
-<aside>
-💡 핵심 요약 
-<ul class="simple">
-<li><p>Text-to-Image(T2I)를 이용하여 Text-to-Video(T2V)를 수행함</p></li>
-<li><p>Make-a-Video의 장점</p>
-<ol class="arabic simple">
-<li><p>T2V 모델의 학습을 가속화 하였음</p></li>
-<li><p>Text-video 데이터가 필요하지 않음</p></li>
-<li><p>이미지 생성 모델의 방대하다는 특성을 그대로 유지함</p></li>
-</ol>
-</li>
-<li><p>방법론</p>
-<ol class="arabic simple">
-<li><p>Full temporal U-net과 attention tensor를 분해하여 공간(space)과 시간(time)으로 근사화 함</p></li>
-<li><p>다양한 어플리케이션에 적용하기 위한 spatial temporal pipeline을 설계함</p></li>
-</ol>
-</li>
-<li><p>관련 모듈</p>
-<ol class="arabic simple">
-<li><p>Pseudo-3D convolutional layer</p></li>
-<li><p>Pseudo-3D attention layer</p></li>
-<li><p>Frame interpolation network</p></li>
-</ol>
-</li>
-<li><p>결과: text-to-video 생성 태스크에서 SOTA 달성</p></li>
-</ul>
-</aside>
-<section id="introduction">
-<h2>1. Introduction<a class="headerlink" href="#introduction" title="Permalink to this heading">#</a></h2>
-<section id="id1">
-<h3>Make-A-video 제안 배경<a class="headerlink" href="#id1" title="Permalink to this heading">#</a></h3>
-<ul class="simple">
-<li><p>T2I 모델링을 할 수 있는 데이터는 인터넷을 통해 확보될 수 있으나, 비슷한 규모의 텍스트 비디오 데이터셋을 수집하기는 어렵다.</p></li>
-<li><p>T2I 모델이 존재하는데 T2V 모델을 처음부터 학습 시키는 것은 낭비일 수 있다.</p></li>
-<li><p>비지도 학습을 사용하여 더 많은 데이터를 학습할 수 있다.</p></li>
-</ul>
-</section>
-<section id="id2">
-<h3>Make-A-video 특성<a class="headerlink" href="#id2" title="Permalink to this heading">#</a></h3>
-<ul class="simple">
-<li><p>T2I 모델을 활용하여, 레이블이 지정되지 않은 비디오 데이터에 대해 비지도 학습을 사용하여 학습한다 → 페어링된 텍스트-비디오 데이터 없이도 텍스트에서 비디오를 생성할 수 있다.</p></li>
-<li><p>텍스트 없이도 비지도 비디오만으로 세상의 다양한 개체가 어떻게 움직이고 상호 작용하는지 학습할 수 있다.</p></li>
-</ul>
-</section>
-<section id="contribution">
-<h3>Contribution<a class="headerlink" href="#contribution" title="Permalink to this heading">#</a></h3>
-<ul class="simple">
-<li><p>디퓨전 기반의 T2I 모델을 T2V로 확장하는 효과적인 방법인 Make-A-Video를 소개한다.</p></li>
-<li><p>Text-to-image 를 prior로 사용하여 text-video 데이터의 필요성을 우회한다.</p></li>
-<li><p>고화질, 고프레임률 비디오를 생성하는 super-resolution 전략을 제안한다.</p></li>
-<li><p>Make-A-Video를 기존 T2V 시스템과 비교하여 평가한다. 또한, 제로샷 T2V human evaluation을 위해 300개의 프롬프트 테스트 세트를 수집하여 공개할 계획이다.</p></li>
-</ul>
-</section>
-</section>
-<section id="previous-work">
-<h2>2. Previous Work<a class="headerlink" href="#previous-work" title="Permalink to this heading">#</a></h2>
-</section>
-<section id="method">
-<h2>3. Method<a class="headerlink" href="#method" title="Permalink to this heading">#</a></h2>
-<ul>
-<li><p>Make-A-Video의 주요 요소</p>
-<ol class="arabic simple">
-<li><p>텍스트-이미지 쌍으로 학습된 base T2I 모델</p></li>
-<li><p>신경망의 블록을 시간 차원으로 확장하는 시공간 convolution 및 attention layer</p></li>
-<li><p>두 시공간 layer로 구성된 시공간 신경망과 높은 프레임 속도 생성을 위한 frame interpolation network</p></li>
-</ol>
-</li>
-<li><p>Make-A-Video의 최종 inference 수식</p>
-<figure class="align-default" id="id4">
-<img alt="make_a_video_00" class="bg-primary mb-1" src="../../_images/001.png" />
-<figcaption>
-<p><span class="caption-number">Fig. 576 </span><span class="caption-text">최종 inference 수식</span><a class="headerlink" href="#id4" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p><span class="math notranslate nohighlight">\(SR_h\)</span>: spatial super-resolution network</p></li>
-<li><p><span class="math notranslate nohighlight">\(SR^t_l\)</span>: spatiotemporal super-resolution network</p></li>
-<li><p><span class="math notranslate nohighlight">\(\uparrow_{F}\)</span>: frame interpolation network</p></li>
-<li><p><span class="math notranslate nohighlight">\(D^t\)</span>: spatiotemporal decoder</p></li>
-<li><p><span class="math notranslate nohighlight">\(P\)</span>: prior network</p></li>
-<li><p><span class="math notranslate nohighlight">\(\hat{x}\)</span>: BPE-encoded text</p></li>
-<li><p><span class="math notranslate nohighlight">\(C_x\)</span>: CLIP text encoder</p></li>
-<li><p><span class="math notranslate nohighlight">\(x\)</span>: input text</p></li>
-</ul>
-</li>
-</ul>
-<section id="text-to-image-model">
-<h3>3.1. Text-To-Image Model<a class="headerlink" href="#text-to-image-model" title="Permalink to this heading">#</a></h3>
-<ul>
-<li><p><a class="reference external" href="https://arxiv.org/abs/2205.11487">“Photorealistic Text-to-Image Diffusion Models with Deep Language Understanding(Imagen)”</a>와 연구 내용을 공유하였다.</p></li>
-<li><p>Imagen</p>
-<figure class="align-default" id="id5">
-<img alt="make_a_video_01" class="bg-primary mb-1" src="../../_images/Untitled1.png" />
-<figcaption>
-<p><span class="caption-number">Fig. 577 </span><span class="caption-text">Imagen 구조</span><a class="headerlink" href="#id5" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</li>
-<li><p>고해상도 이미지를 만들기 위해 사용한 네트워크</p>
-<ul>
-<li><p>A prior Network <span class="math notranslate nohighlight">\(P\)</span>: 텍스트 임베딩 <span class="math notranslate nohighlight">\(x_e\)</span>와 BPE encoded text tokens <span class="math notranslate nohighlight">\(\hat{x}\)</span>이 주어졌을 때 이미지 임베딩 <span class="math notranslate nohighlight">\(y_e\)</span>를 생성하는 네트워크</p></li>
-<li><p>Decoder Network <span class="math notranslate nohighlight">\(D\)</span>: 이미지 임베딩 <span class="math notranslate nohighlight">\(y_e\)</span>로부터 저해상도 64X64 RGB 이미지 <span class="math notranslate nohighlight">\(\hat{y}_l\)</span>를 생성하는 네트워크</p></li>
-<li><p>Super-resolution network <span class="math notranslate nohighlight">\(SR_l\)</span>, <span class="math notranslate nohighlight">\(SR_h\)</span>: D에서 생성된 이미지 64X64 저해상도 이미지 <span class="math notranslate nohighlight">\(\hat{y}_l\)</span>를 256X256, 768X768 픽셀로 증가시켜 최종 이미지 <span class="math notranslate nohighlight">\(\hat{y}\)</span>를 만드는 네트워크</p>
-<figure class="align-default" id="id6">
-<img alt="make_a_video_02" class="bg-primary mb-1" src="../../_images/1.png" />
-<figcaption>
-<p><span class="caption-number">Fig. 578 </span><span class="caption-text">text <span class="math notranslate nohighlight">\(x\)</span>가 prior <span class="math notranslate nohighlight">\(P\)</span>를 통해 image embedding 변환된다.
-fps: desired frame rate</span><a class="headerlink" href="#id6" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</li>
-</ul>
-</li>
-</ul>
-</section>
-<section id="spatiotemporal-layers">
-<h3>3.2. Spatiotemporal Layers<a class="headerlink" href="#spatiotemporal-layers" title="Permalink to this heading">#</a></h3>
-<ul>
-<li><p>2차원 조건부 네트워크를 시간적 차원으로 확장하기 위해 다음의 구성 요소를 수정한다.</p>
-<ul class="simple">
-<li><p>Convolutional layers</p></li>
-<li><p>Attention layers</p></li>
-</ul>
-</li>
-<li><p>Fully-connected layers는 특별한 수정을 할 필요 없이 시간 정보만 추가해주면 된다.</p></li>
-<li><p>구성 요소 수정 결과 <span class="math notranslate nohighlight">\(D^t\)</span>는 64X64 사이즈의 16 RGB frame을 만들게 된다.</p></li>
-<li><p>Frame interpolation network <span class="math notranslate nohighlight">\(\uparrow_{F}\)</span>가 생성된 16개의 프레임과 super-resolution 네트워크 <span class="math notranslate nohighlight">\(SR^t_l\)</span> 사이를 보간하여 프레임 속도를 증가시킨다.</p></li>
-<li><p>Super-resolution 네트워크에는 hallucinating information(환각 정보)가 포함 된다. 깜박이는 잔상이 생기지 않으려면, 환각이 프레임 전체에 걸쳐 일관성을 유지해야 한다.</p>
-<ul>
-<li><p>Hallucinating information</p>
-<p>실제로 존재하지 않는 정보나 세부 사항을 생성하거나 가상으로 추가하는 것</p>
-</li>
-</ul>
-</li>
-<li><p>프레임당 super resolution을 수행하는 것보다 spatiotemporal 모듈인 <span class="math notranslate nohighlight">\(SR^t_l\)</span>가 더 좋은 성능을 보였다.</p></li>
-<li><p>하지만, <span class="math notranslate nohighlight">\(SR_h\)</span>를 위와 같은 모듈로 만들기엔 메모리 및 컴퓨팅 제약과 고해상도 비디오 데이터의 부족으로 <span class="math notranslate nohighlight">\(SR_h\)</span>를 위와 같이 시간적 차원으로 확장하는 것은 어려웠다 → <span class="math notranslate nohighlight">\(SR_h\)</span>는 공간적 차원에서 작동한다.( 각 프레임에 대해 동일한 노이즈 초기화를 사용하여 프레임 전반에 걸쳐 일관된 환각을 제공함)</p></li>
-</ul>
-<section id="pseudo-3d-convolutional-layers">
-<h4>3.2.1 Pseudo-3D convolutional layers<a class="headerlink" href="#pseudo-3d-convolutional-layers" title="Permalink to this heading">#</a></h4>
-<figure class="align-default" id="id7">
-<img alt="make_a_video_03" class="bg-primary mb-1" src="../../_images/2.png" />
-<figcaption>
-<p><span class="caption-number">Fig. 579 </span><span class="caption-text">Architecture of Pseudo-3D convolutional layers</span><a class="headerlink" href="#id7" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul>
-<li><p>2D 컨벌루션 레이어 다음에 1D 컨벌루션을 쌓는다 (Cf:separable convolution)</p>
-<ul class="simple">
-<li><p>3D 컨벌루션의 계산 load를 줄일 수 있다.</p></li>
-<li><p>사전 학습된 2D 컨볼루션 레이어와 새로 초기화된 1D 컨벌루션 레이어 사이에 명확한 경계를 생성하여, spatial information을 유지한 채 temporal convolution을 처음부터 학습할 수 있게 한다.</p></li>
-</ul>
-</li>
-<li><p>Pseudo-3D convolutional layer</p>
-<figure class="align-default" id="id8">
-<img alt="make_a_video_04" class="bg-primary mb-1" src="../../_images/conv3d.png" />
-<figcaption>
-<p><span class="caption-number">Fig. 580 </span><span class="caption-text">Pseudo-3D convolutional layer</span><a class="headerlink" href="#id8" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p><span class="math notranslate nohighlight">\(h\)</span>: 입력 텐서 (dimension: <span class="math notranslate nohighlight">\(B\)</span>(batch),<span class="math notranslate nohighlight">\(C\)</span>(channels),<span class="math notranslate nohighlight">\(F\)</span>(frames),<span class="math notranslate nohighlight">\(H\)</span>(height),<span class="math notranslate nohighlight">\(W\)</span>(width))</p></li>
-<li><p><span class="math notranslate nohighlight">\(\text{o}T\)</span>: transpose operator (spatial ↔ temporal)</p></li>
-<li><p><span class="math notranslate nohighlight">\(Conv_{2_D}\)</span>는 pretrained T2I 모델에서 초기화 되고, <span class="math notranslate nohighlight">\(Conv_{1_D}\)</span>는 identity 함수로 초기화 된다.</p></li>
-</ul>
-</li>
-</ul>
-</section>
-<section id="psuedo-3d-attention-layers">
-<h4>3.2.2. Psuedo-3D attention layers<a class="headerlink" href="#psuedo-3d-attention-layers" title="Permalink to this heading">#</a></h4>
-<figure class="align-default" id="id9">
-<img alt="make_a_video_05" class="bg-primary mb-1" src="../../_images/3.png" />
-<figcaption>
-<p><span class="caption-number">Fig. 581 </span><span class="caption-text">Architecture of Pseudo-3D attention layers</span><a class="headerlink" href="#id9" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul>
-<li><p><a class="reference external" href="https://arxiv.org/abs/2204.03458">“Video Diffusion Models**”**</a>에 영감을 받아 dimension decomposition 전략을 attention layer에 확장하였다.</p></li>
-<li><p>Pseudo-3D convolutional layer처럼 각각의 spatial attenion layer를 쌓아, 전체 spatiotemporal attention layer를 근사화하는 temporal attention layer를 쌓는다.</p></li>
-<li><p>Pseudo-3D attention layer</p>
-<figure class="align-default" id="id10">
-<img alt="make_a_video_06" class="bg-primary mb-1" src="../../_images/attention3d.png" />
-<figcaption>
-<p><span class="caption-number">Fig. 582 </span><span class="caption-text">Pseudo-3D attention layer</span><a class="headerlink" href="#id10" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul>
-<li><p><span class="math notranslate nohighlight">\(h\)</span>: 입력 텐서 (dimension: <span class="math notranslate nohighlight">\(B\)</span>(batch),<span class="math notranslate nohighlight">\(C\)</span>(channels),<span class="math notranslate nohighlight">\(F\)</span>(frames),<span class="math notranslate nohighlight">\(H\)</span>(height),<span class="math notranslate nohighlight">\(W\)</span>(width))</p></li>
-<li><p>flatten: spatial dimension 축에 대해 flatten하는 연산 (결과 dimension: <span class="math notranslate nohighlight">\(B\)</span>,<span class="math notranslate nohighlight">\(C\)</span>,<span class="math notranslate nohighlight">\(F\)</span>,<span class="math notranslate nohighlight">\(HW\)</span>)</p></li>
-<li><p><span class="math notranslate nohighlight">\(ATTN_{2D}\)</span>는 pretrained T2I 모델에서 초기화되고, <span class="math notranslate nohighlight">\(ATTN_{1D}\)</span>는 identity function으로 초기화 된다.</p></li>
-<li><p>Code</p>
-<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="k">class</span> <span class="nc">SpatioTemporalAttention</span><span class="p">(</span><span class="n">nn</span><span class="o">.</span><span class="n">Module</span><span class="p">):</span>
-    <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span>
-        <span class="bp">self</span><span class="p">,</span>
-        <span class="n">dim</span><span class="p">,</span>
-        <span class="o">*</span><span class="p">,</span>
-        <span class="n">dim_head</span> <span class="o">=</span> <span class="mi">64</span><span class="p">,</span>
-        <span class="n">heads</span> <span class="o">=</span> <span class="mi">8</span><span class="p">,</span>
-        <span class="n">add_feed_forward</span> <span class="o">=</span> <span class="kc">True</span><span class="p">,</span>
-        <span class="n">ff_mult</span> <span class="o">=</span> <span class="mi">4</span><span class="p">,</span>
-        <span class="n">pos_bias</span> <span class="o">=</span> <span class="kc">True</span><span class="p">,</span>
-        <span class="n">flash</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span>
-        <span class="n">causal_time_attn</span> <span class="o">=</span> <span class="kc">False</span>
-    <span class="p">):</span>
-        <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span>
-        <span class="k">assert</span> <span class="ow">not</span> <span class="p">(</span><span class="n">flash</span> <span class="ow">and</span> <span class="n">pos_bias</span><span class="p">),</span> <span class="s1">&#39;learned positional attention bias is not compatible with flash attention&#39;</span>
-
-        <span class="bp">self</span><span class="o">.</span><span class="n">spatial_attn</span> <span class="o">=</span> <span class="n">Attention</span><span class="p">(</span><span class="n">dim</span> <span class="o">=</span> <span class="n">dim</span><span class="p">,</span> <span class="n">dim_head</span> <span class="o">=</span> <span class="n">dim_head</span><span class="p">,</span> <span class="n">heads</span> <span class="o">=</span> <span class="n">heads</span><span class="p">,</span> <span class="n">flash</span> <span class="o">=</span> <span class="n">flash</span><span class="p">)</span>
-        <span class="bp">self</span><span class="o">.</span><span class="n">spatial_rel_pos_bias</span> <span class="o">=</span> <span class="n">ContinuousPositionBias</span><span class="p">(</span><span class="n">dim</span> <span class="o">=</span> <span class="n">dim</span> <span class="o">//</span> <span class="mi">2</span><span class="p">,</span> <span class="n">heads</span> <span class="o">=</span> <span class="n">heads</span><span class="p">,</span> <span class="n">num_dims</span> <span class="o">=</span> <span class="mi">2</span><span class="p">)</span> <span class="k">if</span> <span class="n">pos_bias</span> <span class="k">else</span> <span class="kc">None</span>
-
-        <span class="bp">self</span><span class="o">.</span><span class="n">temporal_attn</span> <span class="o">=</span> <span class="n">Attention</span><span class="p">(</span><span class="n">dim</span> <span class="o">=</span> <span class="n">dim</span><span class="p">,</span> <span class="n">dim_head</span> <span class="o">=</span> <span class="n">dim_head</span><span class="p">,</span> <span class="n">heads</span> <span class="o">=</span> <span class="n">heads</span><span class="p">,</span> <span class="n">flash</span> <span class="o">=</span> <span class="n">flash</span><span class="p">,</span> <span class="n">causal</span> <span class="o">=</span> <span class="n">causal_time_attn</span><span class="p">)</span>
-        <span class="bp">self</span><span class="o">.</span><span class="n">temporal_rel_pos_bias</span> <span class="o">=</span> <span class="n">ContinuousPositionBias</span><span class="p">(</span><span class="n">dim</span> <span class="o">=</span> <span class="n">dim</span> <span class="o">//</span> <span class="mi">2</span><span class="p">,</span> <span class="n">heads</span> <span class="o">=</span> <span class="n">heads</span><span class="p">,</span> <span class="n">num_dims</span> <span class="o">=</span> <span class="mi">1</span><span class="p">)</span> <span class="k">if</span> <span class="n">pos_bias</span> <span class="k">else</span> <span class="kc">None</span>
-
-        <span class="bp">self</span><span class="o">.</span><span class="n">has_feed_forward</span> <span class="o">=</span> <span class="n">add_feed_forward</span>
-        <span class="k">if</span> <span class="ow">not</span> <span class="n">add_feed_forward</span><span class="p">:</span>
-            <span class="k">return</span>
-
-        <span class="bp">self</span><span class="o">.</span><span class="n">ff</span> <span class="o">=</span> <span class="n">FeedForward</span><span class="p">(</span><span class="n">dim</span> <span class="o">=</span> <span class="n">dim</span><span class="p">,</span> <span class="n">mult</span> <span class="o">=</span> <span class="n">ff_mult</span><span class="p">)</span>
-
-    <span class="k">def</span> <span class="nf">forward</span><span class="p">(</span>
-        <span class="bp">self</span><span class="p">,</span>
-        <span class="n">x</span><span class="p">,</span>
-        <span class="n">enable_time</span> <span class="o">=</span> <span class="kc">True</span>
-    <span class="p">):</span>
-        <span class="n">b</span><span class="p">,</span> <span class="n">c</span><span class="p">,</span> <span class="o">*</span><span class="n">_</span><span class="p">,</span> <span class="n">h</span><span class="p">,</span> <span class="n">w</span> <span class="o">=</span> <span class="n">x</span><span class="o">.</span><span class="n">shape</span>
-        <span class="n">is_video</span> <span class="o">=</span> <span class="n">x</span><span class="o">.</span><span class="n">ndim</span> <span class="o">==</span> <span class="mi">5</span>
-        <span class="n">enable_time</span> <span class="o">&amp;=</span> <span class="n">is_video</span>
-
-        <span class="k">if</span> <span class="n">is_video</span><span class="p">:</span>
-            <span class="n">x</span> <span class="o">=</span> <span class="n">rearrange</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="s1">&#39;b c f h w -&gt; (b f) (h w) c&#39;</span><span class="p">)</span> <span class="c1">#[bXf, hXw, c]</span>
-        <span class="k">else</span><span class="p">:</span>
-            <span class="n">x</span> <span class="o">=</span> <span class="n">rearrange</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="s1">&#39;b c h w -&gt; b (h w) c&#39;</span><span class="p">)</span><span class="c1">#[b, hXw, c]</span>
-
-        <span class="n">space_rel_pos_bias</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">spatial_rel_pos_bias</span><span class="p">(</span><span class="n">h</span><span class="p">,</span> <span class="n">w</span><span class="p">)</span> <span class="k">if</span> <span class="n">exists</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">spatial_rel_pos_bias</span><span class="p">)</span> <span class="k">else</span> <span class="kc">None</span>
-
-        <span class="n">x</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">spatial_attn</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="n">rel_pos_bias</span> <span class="o">=</span> <span class="n">space_rel_pos_bias</span><span class="p">)</span> <span class="o">+</span> <span class="n">x</span>
-
-        <span class="k">if</span> <span class="n">is_video</span><span class="p">:</span>
-            <span class="n">x</span> <span class="o">=</span> <span class="n">rearrange</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="s1">&#39;(b f) (h w) c -&gt; b c f h w&#39;</span><span class="p">,</span> <span class="n">b</span> <span class="o">=</span> <span class="n">b</span><span class="p">,</span> <span class="n">h</span> <span class="o">=</span> <span class="n">h</span><span class="p">,</span> <span class="n">w</span> <span class="o">=</span> <span class="n">w</span><span class="p">)</span>
-        <span class="k">else</span><span class="p">:</span>
-            <span class="n">x</span> <span class="o">=</span> <span class="n">rearrange</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="s1">&#39;b (h w) c -&gt; b c h w&#39;</span><span class="p">,</span> <span class="n">h</span> <span class="o">=</span> <span class="n">h</span><span class="p">,</span> <span class="n">w</span> <span class="o">=</span> <span class="n">w</span><span class="p">)</span>
-
-        <span class="k">if</span> <span class="n">enable_time</span><span class="p">:</span>
-
-            <span class="n">x</span> <span class="o">=</span> <span class="n">rearrange</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="s1">&#39;b c f h w -&gt; (b h w) f c&#39;</span><span class="p">)</span> <span class="c1">#[bXhXw, f, c] </span>
-
-            <span class="n">time_rel_pos_bias</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">temporal_rel_pos_bias</span><span class="p">(</span><span class="n">x</span><span class="o">.</span><span class="n">shape</span><span class="p">[</span><span class="mi">1</span><span class="p">])</span> <span class="k">if</span> <span class="n">exists</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">temporal_rel_pos_bias</span><span class="p">)</span> <span class="k">else</span> <span class="kc">None</span>
-
-            <span class="n">x</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">temporal_attn</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="n">rel_pos_bias</span> <span class="o">=</span> <span class="n">time_rel_pos_bias</span><span class="p">)</span> <span class="o">+</span> <span class="n">x</span>
-
-            <span class="n">x</span> <span class="o">=</span> <span class="n">rearrange</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="s1">&#39;(b h w) f c -&gt; b c f h w&#39;</span><span class="p">,</span> <span class="n">w</span> <span class="o">=</span> <span class="n">w</span><span class="p">,</span> <span class="n">h</span> <span class="o">=</span> <span class="n">h</span><span class="p">)</span>
-
-        <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">has_feed_forward</span><span class="p">:</span>
-            <span class="n">x</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">ff</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="n">enable_time</span> <span class="o">=</span> <span class="n">enable_time</span><span class="p">)</span> <span class="o">+</span> <span class="n">x</span>
-
-        <span class="k">return</span> <span class="n">x</span>
-</pre></div>
-</div>
-</li>
-</ul>
-</li>
-<li><p>Frame rate conditioning</p>
-<ul class="simple">
-<li><p>비디오의 초당 프레임 수를 나타내는 추가 컨디셔닝 파라미터 <span class="math notranslate nohighlight">\(fps\)</span>를 추가한다.</p></li>
-</ul>
-</li>
-</ul>
-</section>
-</section>
-<section id="frame-interpolation-network">
-<h3>3.3 Frame Interpolation Network<a class="headerlink" href="#frame-interpolation-network" title="Permalink to this heading">#</a></h3>
-<ul class="simple">
-<li><p>↑F (Frame Interpolation Network)란?</p>
-<ul>
-<li><p>생성된 프레임 수를 증가시켜, 생성된 비디오를 더 부드럽게 만들고 비디오 길이를 연장 시킬 수 있는 네트워크</p></li>
-<li><p>프레임을 보간하고 extrapolation을 하는 네트워크</p>
-<ul>
-<li><p>Extrapolation: 주어진 데이터 또는 정보를 사용하여 미래의 값을 예측하거나 확장</p></li>
-</ul>
-</li>
-</ul>
-</li>
-<li><p>↑F (Frame Interpolation Network) 동작</p>
-<ul>
-<li><p>Spatialtemporal decoder <span class="math notranslate nohighlight">\(D^t\)</span>에서 마스크 처리된 입력 프레임을 제로 패딩하고 비디오 업샘플링을 적용하여 masked frame interpolation을 파인 튜닝한다.</p></li>
-<li><p>파인 튜닝할 때 U-Net의 입력에 4개의 채널을 추가한다.</p>
-<ul>
-<li><p>RGB 마스킹 비디오 입력을 위한 3개의 채널과 마스킹되는 프레임을 나타내는 추가 바이너리 채널</p></li>
-</ul>
-</li>
-<li><p>다양한 frame-skips과 <span class="math notranslate nohighlight">\(fps\)</span>에 대해 파인튜닝하여 추론시 여러 temporal upsample rate를 제공한다.</p></li>
-</ul>
-</li>
-<li><p>본 논문의 모든 실험에서는 ↑F를 frame skip 5로 적용하여 16프레임 비디오를 76프레임((16-1)X5+1)으로 업샘플링 하였다.</p></li>
-<li><p>비디오 시작 또는 끝 프레임을 마스킹하여 비디오 추정 또는 이미지 애니메이션에도 사용할 수 있다.</p></li>
-</ul>
-</section>
-<section id="training">
-<h3>3.4 Training<a class="headerlink" href="#training" title="Permalink to this heading">#</a></h3>
-<ul>
-<li><p>위에서 설명한 구성 요소들은 독립적으로 학습 된다.</p></li>
-<li><p>훈련 과정</p>
-<ol class="arabic">
-<li><p>Prior <span class="math notranslate nohighlight">\(P\)</span> 훈련 (text-image 데이터 이용)</p>
-<p>→ 텍스트를 입력으로 받는 prior <span class="math notranslate nohighlight">\(P\)</span>는 text-image 데이터에 대해서만 학습 되고 비디오에 대해서는 파인 튜닝하지 않는다.</p>
-</li>
-<li><p>이미지를 이용한 학습</p>
-<p>→ Decoder, prior, 두개의 super-resolution 요소들은 먼저 텍스트 없이 이미지 만으로 학습 된다.</p>
-<p>→ Decoder는 Clip image embedding을 입력으로 받고, super-resolution 요소들은 학습 중에 입력으로 들어온 downsampled image를 입력으로 받는다.</p>
-</li>
-<li><p>비디오를 이용한 학습</p>
-<ul class="simple">
-<li><p>이미지에 대한 훈련이 끝나면 새로운 시간 레이어를 추가하고 초기화하여 레이블이 지정되지 않은 비디오 데이터에 대해 파인 튜닝한다.</p></li>
-<li><p>원본 비디오에서 16프레임이 샘플링 되며, 1에서 30 사이의 랜덤 <span class="math notranslate nohighlight">\(fps\)</span>를 사용한다.</p></li>
-<li><p>디코더를 학습하는 동안 훈련 초기에는 더 높은 <span class="math notranslate nohighlight">\(fps\)</span> 범위(모션이 적은)에서 시작하고, 이후에는 더 작은 <span class="math notranslate nohighlight">\(fps\)</span> 범위(모션이 많은)로 전환한다.</p></li>
-<li><p>Masked-frame interpolation 네트워크는 temporal 디코더로부터 파인 튜닝된다.</p></li>
-</ul>
-</li>
-</ol>
-</li>
-</ul>
-</section>
-</section>
-<section id="experiments">
-<h2>4. Experiments<a class="headerlink" href="#experiments" title="Permalink to this heading">#</a></h2>
-<section id="dataset-and-settings">
-<h3>4.1 Dataset and Settings<a class="headerlink" href="#dataset-and-settings" title="Permalink to this heading">#</a></h3>
-<section id="datasets">
-<h4>Datasets<a class="headerlink" href="#datasets" title="Permalink to this heading">#</a></h4>
-<ul class="simple">
-<li><p>Image, Text</p>
-<ul>
-<li><p>LAION-5B 데이터셋의 일부 2.3B의 데이터를 사용하였다.</p></li>
-<li><p>NSFW 이미지, 텍스트의 유해한 단어 또는 워터마크 확률이 0.5보다 큰 이미지가 있는 샘플 쌍을 필터링하였다. ****</p>
-<ul>
-<li><p>NSFW: Not Safe For Work, 선정적이거나 음란하거나 폭력적인 내용을 포함한 콘텐츠</p></li>
-</ul>
-</li>
-</ul>
-</li>
-<li><p>Video</p>
-<ul>
-<li><p>WebVid-10M과, HD-VILA-100M 데이터셋의 일부 10M 데이터를 사용하였다.</p>
-<ul>
-<li><p>Decoder <span class="math notranslate nohighlight">\(D^t\)</span>, interpolation 모델 → WebVid-10M을 이용하여 학습</p></li>
-<li><p><span class="math notranslate nohighlight">\(SR^t_l\)</span> → WebVid-10M, HD-VILA-100M을 이용하여 학습</p></li>
-</ul>
-</li>
-</ul>
-</li>
-<li><p>Zero-shot test 데이터</p>
-<ul>
-<li><p>UCF-101, MSR-VTT</p>
-<ul>
-<li><p>UCF-101: 액션 인식 연구를 위해 고안되었으며, 다양한 동작 및 환경에서 촬영된 비디오 클립 데이터셋</p></li>
-<li><p>MSR-VTT: 비디오와 해당 비디오에 대한 텍스트 설명 또는 캡션을 포함하는 데이터셋</p></li>
-</ul>
-</li>
-</ul>
-</li>
-</ul>
-</section>
-<section id="automatic-metrics">
-<h4>Automatic Metrics<a class="headerlink" href="#automatic-metrics" title="Permalink to this heading">#</a></h4>
-<ul class="simple">
-<li><p>UCF-101</p>
-<ul>
-<li><p>각 클래스에 대해 하나의 템플릿 문장을 작성하고 평가를 위해 수정한다.</p></li>
-<li><p>10K 샘플에 대해 Fretchet Video Distance(FVD)와 Inception Score(IS)를 측정한다.</p></li>
-<li><p>Train셋과 동일한 클래스 분포를 따르는 샘플을 생성한다.</p></li>
-</ul>
-</li>
-<li><p>MSR-VTT</p>
-<ul>
-<li><p>테스트 세트의 모든 59,794 캡션에 대한 FID와 CLIPSIM(비디오 프레임과 텍스트 간의 평균 CLIP 유사도)를 측정한다.</p></li>
-</ul>
-</li>
-</ul>
-</section>
-<section id="human-evaluation-set-and-metrics">
-<h4>Human Evaluation Set and Metrics<a class="headerlink" href="#human-evaluation-set-and-metrics" title="Permalink to this heading">#</a></h4>
-<ul class="simple">
-<li><p>Amazon Mechanical Turk(AMT)에서 300개의 프롬프트로 이루어진 평가 세트를 수집하였다.</p></li>
-<li><p>Annotator들에게 T2V 시스템이 있다면 어떤 것을 생성하고 싶은지 물어봤다.</p></li>
-<li><p>불완전하거나, 너무 추상적이거나, 불쾌감을 주는 프롬프트를 필터링 하였다.</p></li>
-<li><p>5가지 카테고리(동물, 판타지, 사람, 자연 및 풍경, 음식 및 음료)를 식별하고 해당 카테고리에 맞는 프롬프트를 선택하였다.</p></li>
-<li><p>이러한 프롬프트는 동영상을 만드는 데에 사용되지 않고 선택 되었으며, 고정된 상태로 유지했다.</p></li>
-<li><p>Human evaluation을 위해 Imagen의 DrawBench 프롬프트도 사용하였다.</p></li>
-<li><p>비디오 품질과 text-vedio faithfulness를 평가하였다.</p>
-<ul>
-<li><p>비디오 품질 → 두 개의 비디오를 랜덤 순서로 보여주고 어떤 비디오의 품질이 더 좋은지 annotator에게 물어본다.</p></li>
-<li><p>Text-vdeio faithfulness → 텍스트를 추가로 보여주고 어떤 비디오가 텍스트와 더 잘 일치하는지 annotator에게 물어본다.</p></li>
-</ul>
-</li>
-<li><p>보간 모델과 FILM의 비디오 모션 사실감을 비교하기 위한 평가도 진행하였다.</p></li>
-<li><p>5명의 각기 다른 annotator의 다수 득표를 최종 결과로 사용하였다.</p></li>
-</ul>
-</section>
-</section>
-<section id="quantitative-results">
-<h3>4.2 Quantitative Results<a class="headerlink" href="#quantitative-results" title="Permalink to this heading">#</a></h3>
-<section id="automatic-evaluaton-on-msr-vtt">
-<h4>Automatic Evaluaton on MSR-VTT<a class="headerlink" href="#automatic-evaluaton-on-msr-vtt" title="Permalink to this heading">#</a></h4>
-<ul class="simple">
-<li><p>MSR-VTT에 대해 성능을 보고하는 GODIVA, NUWA 외에도, 중국어와 영어를 모두 입력으로 받는  CogVideo 모델에 대해서도 추론을 수행하였다.</p></li>
-</ul>
-<figure class="align-default" id="id11">
-<img alt="make_a_video_06" class="bg-primary mb-1" src="../../_images/4.png" />
-<figcaption>
-<p><span class="caption-number">Fig. 583 </span><span class="caption-text">Automatic Evaluaton on MSR-VTT</span><a class="headerlink" href="#id11" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>→ 가장 우수한 성능을 보인다.</p>
-</section>
-</section>
-<section id="automatic-evluation-on-ucf-101">
-<h3>Automatic Evluation on UCF-101<a class="headerlink" href="#automatic-evluation-on-ucf-101" title="Permalink to this heading">#</a></h3>
-<figure class="align-default" id="id12">
-<img alt="make_a_video_06" class="bg-primary mb-1" src="../../_images/5.png" />
-<figcaption>
-<p><span class="caption-number">Fig. 584 </span><span class="caption-text">Automatic Evluation on UCF-101</span><a class="headerlink" href="#id12" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>→ Make-A-Video의 제로 샷 성능이 다른 방법보다 우수하다. Finetunning을 한 결과에서도 SOTA를 달성하였다.</p>
-<section id="human-evaluation">
-<h4>Human Evaluation<a class="headerlink" href="#human-evaluation" title="Permalink to this heading">#</a></h4>
-<ul class="simple">
-<li><p>DrawBench와 테스트셋에 대해서 CogVideo와 성능을 비교한다.</p></li>
-<li><p>또한, VDM의 웹 페이지에 표시된 28개의 동영상에 대해서도 평가한다.</p></li>
-<li><p>각 입력에 대해 8개의 동영상을 무작위로 생성하고, 8번 평가하여 평균 결과를 낸다.</p></li>
-<li><p>사람의 평가를 위해 76x256x256 해상도로 동영상을 생성한다.</p></li>
-</ul>
-<figure class="align-default" id="id13">
-<img alt="make_a_video_06" class="bg-primary mb-1" src="../../_images/6.png" />
-<figcaption>
-<p><span class="caption-number">Fig. 585 </span><span class="caption-text">Human Evaluation</span><a class="headerlink" href="#id13" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>→ 평가자가 Make-A-Video 모델의 결과가 더 낫다고 투표한 퍼센트 비율. 대부분 평가자가 모든 벤치마크에서 Make-A-Video가 더 낫다고 평가하였다.</p>
-<ul class="simple">
-<li><p>Frame Interpolation Network와 FILM을 비교 평가하기</p>
-<ul>
-<li><p>DrawBench의 텍스트 프롬프트와 평가 세트에서 저프레임률 비디오(1 FPS)를 생성한 다음, 4FPS까지 업샘플링한다.</p></li>
-<li><p>평가자들은 eval set에 대해서는 62%,  DrawBench에 대해서는 54%로 Make-A-Video가 더 낫다고 평가하였다.</p></li>
-<li><p>프레임 간의 차이가 커서 물체가 어떻게 움직이는지에 대한 real-world 지식이 중요한 경우에는 본 논문에 방법이 더 뛰어난 것으로 관찰 되었다.</p></li>
-</ul>
-</li>
-</ul>
-</section>
-</section>
-<section id="qualitative-results">
-<h3>4.3 Qualitative Results<a class="headerlink" href="#qualitative-results" title="Permalink to this heading">#</a></h3>
-<figure class="align-default" id="id14">
-<img alt="make_a_video_06" class="bg-primary mb-1" src="../../_images/7.png" />
-<figcaption>
-<p><span class="caption-number">Fig. 586 </span><span class="caption-text">T2V Generation 결과. 맨 위: VDM, 가운데: CogVideo, 맨 아래: Make-A-Video
-→ Make-A-Video가 모션의 일관성을 유지하면서 더 풍부한 콘텐츠를 생성할 수 있다.</span><a class="headerlink" href="#id14" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<figure class="align-default" id="id15">
-<img alt="make_a_video_06" class="bg-primary mb-1" src="../../_images/8.png" />
-<figcaption>
-<p><span class="caption-number">Fig. 587 </span><span class="caption-text">이미지에 mask frame interpolation 및 extrpolation network ↑F를 적용한 결과.
-가장 왼쪽에 입력 이미지가 주어지면, 이를 동영상으로 애니메이션화 함.
-사용자는 자신의 이미지를 사용하여 동영상을 생성할 수 있으며, 생성된 동영상을 개인화하고 직접 제어할 수 있음.</span><a class="headerlink" href="#id15" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<figure class="align-default" id="id16">
-<img alt="make_a_video_06" class="bg-primary mb-1" src="../../_images/9.png" />
-<figcaption>
-<p><span class="caption-number">Fig. 588 </span><span class="caption-text">두 이미지 사이의 interpolation 결과. 왼쪽: FILM, 오른쪽: 본 논문의 approach
-FILM →  실제 움직이는 object에 대한 이해 없이 프레임을 부드럽게 전환하기만 함.
-본 논문의 approach → 의미론적으로 더 의미있는 interpolation을 만듬.</span><a class="headerlink" href="#id16" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<figure class="align-default" id="id17">
-<img alt="make_a_video_06" class="bg-primary mb-1" src="../../_images/101.png" />
-<figcaption>
-<p><span class="caption-number">Fig. 589 </span><span class="caption-text">비디오 변형 예시. 위: 원본 비디오, 아래: 새로운 비디오</span><a class="headerlink" href="#id17" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p>기타 결과: <a class="reference external" href="https://make-a-video.github.io/">https://make-a-video.github.io/</a></p></li>
-</ul>
-</section>
-</section>
-<section id="id3">
-<h2>5. 결론<a class="headerlink" href="#id3" title="Permalink to this heading">#</a></h2>
-<ul class="simple">
-<li><p>주변 세계로부터 지식을 배우는 human intelligence처럼 generative system도 인간의 학습 방식을 모방할 수 있다면, 더욱 창의적이고 유용할 것이다.</p></li>
-<li><p>연구자들은 비지도 학습을 통해 훨씬 더 많은 동영상에서 세계의 dynamic을 학습함으로써 기존의 한계를 극복할 수 있다.</p></li>
-</ul>
-</section>
-</section>
-
-    <script type="text/x-thebe-config">
-    {
-        requestKernel: true,
-        binderOptions: {
-            repo: "binder-examples/jupyter-stacks-datascience",
-            ref: "master",
-        },
-        codeMirrorConfig: {
-            theme: "abcdef",
-            mode: "python"
-        },
-        kernelOptions: {
-            name: "python3",
-            path: "./docs/review"
-        },
-        predefinedOutput: true
-    }
-    </script>
-    <script>kernelName = 'python3'</script>
-
-                </article>
-              
-
-              
-              
-              
-              
-                <footer class="prev-next-footer d-print-none">
-                  
-<div class="prev-next-area">
-    <a class="left-prev"
-       href="MimicBrush.html"
-       title="previous page">
-      <i class="fa-solid fa-angle-left"></i>
-      <div class="prev-next-info">
-        <p class="prev-next-subtitle">previous</p>
-        <p class="prev-next-title">MimicBrush: Zero-shot Image Editing with Reference Imitation</p>
-      </div>
-    </a>
-    <a class="right-next"
-       href="VideoLDM.html"
-       title="next page">
-      <div class="prev-next-info">
-        <p class="prev-next-subtitle">next</p>
-        <p class="prev-next-title">VideoLDM</p>
-      </div>
-      <i class="fa-solid fa-angle-right"></i>
-    </a>
-</div>
-                </footer>
-              
-            </div>
-            
-            
-              
-                <div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
-
-
-  <div class="sidebar-secondary-item">
-  <div class="page-toc tocsection onthispage">
-    <i class="fa-solid fa-list"></i> Contents
-  </div>
-  <nav class="bd-toc-nav page-toc">
-    <ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">1. Introduction</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#id1">Make-A-video 제안 배경</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#id2">Make-A-video 특성</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#contribution">Contribution</a></li>
-</ul>
-</li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#previous-work">2. Previous Work</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#method">3. Method</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#text-to-image-model">3.1. Text-To-Image Model</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#spatiotemporal-layers">3.2. Spatiotemporal Layers</a><ul class="nav section-nav flex-column">
-<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#pseudo-3d-convolutional-layers">3.2.1 Pseudo-3D convolutional layers</a></li>
-<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#psuedo-3d-attention-layers">3.2.2. Psuedo-3D attention layers</a></li>
-</ul>
-</li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#frame-interpolation-network">3.3 Frame Interpolation Network</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#training">3.4 Training</a></li>
-</ul>
-</li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#experiments">4. Experiments</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#dataset-and-settings">4.1 Dataset and Settings</a><ul class="nav section-nav flex-column">
-<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#datasets">Datasets</a></li>
-<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#automatic-metrics">Automatic Metrics</a></li>
-<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#human-evaluation-set-and-metrics">Human Evaluation Set and Metrics</a></li>
-</ul>
-</li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#quantitative-results">4.2 Quantitative Results</a><ul class="nav section-nav flex-column">
-<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#automatic-evaluaton-on-msr-vtt">Automatic Evaluaton on MSR-VTT</a></li>
-</ul>
-</li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#automatic-evluation-on-ucf-101">Automatic Evluation on UCF-101</a><ul class="nav section-nav flex-column">
-<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#human-evaluation">Human Evaluation</a></li>
-</ul>
-</li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#qualitative-results">4.3 Qualitative Results</a></li>
-</ul>
-</li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#id3">5. 결론</a></li>
-</ul>
-  </nav></div>
-
-</div></div>
-              
-            
-          </div>
-          <footer class="bd-footer-content">
-            
-<div class="bd-footer-content__inner container">
-  
-  <div class="footer-item">
-    
-<p class="component-author">
-By PseudoLab
-</p>
-
-  </div>
-  
-  <div class="footer-item">
-    
-
-  <p class="copyright">
-    
-      © Copyright 2022.
-      <br/>
-    
-  </p>
-
-  </div>
-  
-  <div class="footer-item">
-    
-  </div>
-  
-  <div class="footer-item">
-    
-  </div>
-  
-</div>
-          </footer>
-        
-
-      </main>
-    </div>
-  </div>
-  
-  <!-- Scripts loaded after <body> so the DOM is not blocked -->
-  <script src="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b"></script>
-<script src="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b"></script>
-
-  <footer class="bd-footer">
-  </footer>
-  </body>
+
+
+<!DOCTYPE html>
+
+
+<html lang="en" data-content_root="" >
+
+  <head>
+    <meta charset="utf-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.17.1: http://docutils.sourceforge.net/" />
+
+    <title>Make A Video &#8212; Text-to-Image Generation-feat-Diffusion</title>
+  
+  
+  
+  <script data-cfasync="false">
+    document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
+    document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
+  </script>
+  
+  <!-- Loaded before other Sphinx assets -->
+  <link href="../../_static/styles/theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+<link href="../../_static/styles/bootstrap.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+<link href="../../_static/styles/pydata-sphinx-theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+
+  
+  <link href="../../_static/vendor/fontawesome/6.5.2/css/all.min.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+  <link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.woff2" />
+<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.woff2" />
+<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.woff2" />
+
+    <link rel="stylesheet" type="text/css" href="../../_static/pygments.css" />
+    <link rel="stylesheet" href="../../_static/styles/sphinx-book-theme.css?digest=14f4ca6b54d191a8c7657f6c759bf11a5fb86285" type="text/css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/togglebutton.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/mystnb.4510f1fc1dee50b3e5859aac5469c37c29e427902b24a333a5f9fcb2f0b3ac41.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/sphinx-thebe.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/design-style.4045f2051d55cab465a707391d5b2007.min.css" />
+  
+  <!-- Pre-loaded scripts that we'll load fully later -->
+  <link rel="preload" as="script" href="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b" />
+<link rel="preload" as="script" href="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b" />
+  <script src="../../_static/vendor/fontawesome/6.5.2/js/all.min.js?digest=dfe6caa3a7d634c4db9b"></script>
+
+    <script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
+    <script src="../../_static/jquery.js"></script>
+    <script src="../../_static/underscore.js"></script>
+    <script src="../../_static/_sphinx_javascript_frameworks_compat.js"></script>
+    <script src="../../_static/doctools.js"></script>
+    <script src="../../_static/clipboard.min.js"></script>
+    <script src="../../_static/copybutton.js"></script>
+    <script src="../../_static/scripts/sphinx-book-theme.js?digest=5a5c038af52cf7bc1a1ec88eea08e6366ee68824"></script>
+    <script>let toggleHintShow = 'Click to show';</script>
+    <script>let toggleHintHide = 'Click to hide';</script>
+    <script>let toggleOpenOnPrint = 'true';</script>
+    <script src="../../_static/togglebutton.js"></script>
+    <script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
+    <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
+    <script src="../../_static/design-tabs.js"></script>
+    <script>const THEBE_JS_URL = "https://unpkg.com/thebe@0.8.2/lib/index.js"
+const thebe_selector = ".thebe,.cell"
+const thebe_selector_input = "pre"
+const thebe_selector_output = ".output, .cell_output"
+</script>
+    <script async="async" src="../../_static/sphinx-thebe.js"></script>
+    <script>window.MathJax = {"options": {"processHtmlClass": "tex2jax_process|mathjax_process|math|output_area"}}</script>
+    <script defer="defer" src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
+    <script>DOCUMENTATION_OPTIONS.pagename = 'docs/review/Make_A_Video';</script>
+    <link rel="shortcut icon" href="../../_static/PseudoLab_logo.png"/>
+    <link rel="index" title="Index" href="../../genindex.html" />
+    <link rel="search" title="Search" href="../../search.html" />
+    <link rel="next" title="VideoLDM" href="VideoLDM.html" />
+    <link rel="prev" title="One-step Diffusion with Distribution Matching Distillation" href="one_step_diffusion_with_distribution_matching_distillation.html" />
+  <meta name="viewport" content="width=device-width, initial-scale=1"/>
+  <meta name="docsearch:language" content="en"/>
+  </head>
+  
+  
+  <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
+
+  
+  
+  <div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
+  
+  <div id="pst-scroll-pixel-helper"></div>
+  
+  <button type="button" class="btn rounded-pill" id="pst-back-to-top">
+    <i class="fa-solid fa-arrow-up"></i>Back to top</button>
+
+  
+  <input type="checkbox"
+          class="sidebar-toggle"
+          id="pst-primary-sidebar-checkbox"/>
+  <label class="overlay overlay-primary" for="pst-primary-sidebar-checkbox"></label>
+  
+  <input type="checkbox"
+          class="sidebar-toggle"
+          id="pst-secondary-sidebar-checkbox"/>
+  <label class="overlay overlay-secondary" for="pst-secondary-sidebar-checkbox"></label>
+  
+  <div class="search-button__wrapper">
+    <div class="search-button__overlay"></div>
+    <div class="search-button__search-container">
+<form class="bd-search d-flex align-items-center"
+      action="../../search.html"
+      method="get">
+  <i class="fa-solid fa-magnifying-glass"></i>
+  <input type="search"
+         class="form-control"
+         name="q"
+         id="search-input"
+         placeholder="Search this book..."
+         aria-label="Search this book..."
+         autocomplete="off"
+         autocorrect="off"
+         autocapitalize="off"
+         spellcheck="false"/>
+  <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
+</form></div>
+  </div>
+
+  <div class="pst-async-banner-revealer d-none">
+  <aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
+</div>
+
+  
+    <header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
+    </header>
+  
+
+  <div class="bd-container">
+    <div class="bd-container__inner bd-page-width">
+      
+      
+      
+      <div class="bd-sidebar-primary bd-sidebar">
+        
+
+  
+  <div class="sidebar-header-items sidebar-primary__section">
+    
+    
+    
+    
+  </div>
+  
+    <div class="sidebar-primary-items__start sidebar-primary__section">
+        <div class="sidebar-primary-item">
+
+  
+    
+  
+
+<a class="navbar-brand logo" href="../../intro.html">
+  
+  
+  
+  
+  
+    
+    
+      
+    
+    
+    <img src="../../_static/PseudoLab_logo.png" class="logo__image only-light" alt="Text-to-Image Generation-feat-Diffusion - Home"/>
+    <script>document.write(`<img src="../../_static/PseudoLab_logo.png" class="logo__image only-dark" alt="Text-to-Image Generation-feat-Diffusion - Home"/>`);</script>
+  
+  
+</a></div>
+        <div class="sidebar-primary-item"><nav class="bd-links" id="bd-docs-nav" aria-label="Main">
+    <div class="bd-toc-item navbar-nav active">
+        
+        <ul class="nav bd-sidenav bd-sidenav__home-link">
+            <li class="toctree-l1">
+                <a class="reference internal" href="../../intro.html">
+                    Welcome to PseudoDiffusers!!
+                </a>
+            </li>
+        </ul>
+        <p aria-level="2" class="caption" role="heading"><span class="caption-text">Preliminary Works</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="vae.html">VAE</a></li>
+<li class="toctree-l1"><a class="reference internal" href="gan.html">GAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DDPM.html">DDPM</a></li>
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DDIM.html">DDIM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="A_Study_on_the_Evaluation_of_Generative_Models.html">A Study on the Evaluation of Generative Models</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Image Generation</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="cycleGAN.html">CycleGAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="StyleGAN.html">StyleGAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="diffusion_beats_GANs.html">Diffusion Models Beat GANs on Image Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="dalle.html">DALL-E</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DALLE2.html">DALL-E 2</a></li>
+<li class="toctree-l1"><a class="reference internal" href="dreambooth.html">DreamBooth</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ControlNet.html">ControlNet</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Latent_Diffusion_Model.html">Introduction</a></li>
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Textual_Inversion.html">Textual Inversion</a></li>
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="CustomDiffusion.html">Custom Diffusion</a></li>
+<li class="toctree-l1"><a class="reference internal" href="LoRA.html">LoRA</a></li>
+
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="I-DDPM.html">I-DDPM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="StyO.html">StyO</a></li>
+<li class="toctree-l1"><a class="reference internal" href="imagen.html">Imagen</a></li>
+<li class="toctree-l1"><a class="reference internal" href="imagen_editor.html">Imagen Editor</a></li>
+<li class="toctree-l1"><a class="reference internal" href="SDEdit.html">SDEdit</a></li>
+<li class="toctree-l1"><a class="reference internal" href="SDXL.html">SDXL</a></li>
+<li class="toctree-l1"><a class="reference internal" href="t2i_adapter.html">T2I-Adapter</a></li>
+<li class="toctree-l1"><a class="reference internal" href="IP_Adapter.html">IP-Adapter</a></li>
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="HyperDreamBooth.html">HyperDreamBooth</a></li>
+<li class="toctree-l1"><a class="reference internal" href="CM3leon.html">CM3leon</a></li>
+
+<li class="toctree-l1"><a class="reference internal" href="Synthetic_Data_from_Diffusion_Models_Improves_ImageNet_Classification.html">Synthetic Data from Diffusion Models Improves ImageNet Classification</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="GLIDE.html">GLIDE</a></li>
+<li class="toctree-l1"><a class="reference internal" href="BBDM.html">BBDM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Your_Diffusion_Model_is_Secretly_a_Zero_Shot_Classifier.html">Your Diffusion Model is Secretly a Zero-Shot Classifier</a></li>
+<li class="toctree-l1"><a class="reference internal" href="progressive_distillation.html">Progressive Distillation for Fast Sampling of Diffusion Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ConceptLab.html">ConceptLab</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Diffusion_models_already_have_a_Semantic_Latent_Space.html">Diffusion Models already have a Semantic Latent Space</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Muse.html">Muse</a></li>
+
+
+<li class="toctree-l1"><a class="reference internal" href="GIGAGAN.html">Scaling up GANs for Text-to-Image Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="consistency_models.html">Consistency Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="latent_consistency_models.html">Latent Consistency Models</a></li>
+
+<li class="toctree-l1"><a class="reference internal" href="LLM_grounded_Diffusion.html">LLM Grounded Diffusion</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DiT.html">DiT</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="one-step-image-translation.html">One-Step Image Translation with Text-to-Image Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="LCM-LoRA.html">LCM-LoRA: A Universal Stable-Diffusion Acceleration Module</a></li>
+
+
+<li class="toctree-l1"><a class="reference internal" href="MimicBrush.html">MimicBrush: Zero-shot Image Editing with Reference Imitation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="one_step_diffusion_with_distribution_matching_distillation.html">One-step Diffusion with Distribution Matching Distillation</a></li>
+
+
+
+
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Video Generation</span></p>
+<ul class="current nav bd-sidenav">
+<li class="toctree-l1 current active"><a class="current reference internal" href="#">Make A Video</a></li>
+<li class="toctree-l1"><a class="reference internal" href="VideoLDM.html">VideoLDM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="AnimateDiff.html">AnimateDiff</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Animate_Anyone.html">Animate Anyone</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreaMoving.html">DreaMoving</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreamPose.html">DreamPose: Fashion Image-to-Video Synthesis via Stable Diffusion</a></li>
+
+
+
+
+
+
+
+
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">3D Generation</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="NeRF.html">NeRF : Representing Scenes as Neural Radiance Fields for View Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="3DGS.html">3D Gaussian Splatting for Real-Time Radiance Field Rendering</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Point_E.html">Point-E: A System for Generating 3D Point Clouds from Complex Prompts (Arxiv 2022)</a></li>
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Shap-E.html">Shap-E</a></li>
+
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DreamFusion.html"><strong>DreamFusion</strong></a></li>
+<li class="toctree-l1"><a class="reference internal" href="magic-3d.html">Magic3D</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreamBooth3D.html">Dream Booth 3D</a></li>
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="zero123plus.html">Zero123++</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ProlificDreamer.html">ProlificDreamer: High-Fidelity and Diverse Text-to-3D Generation with Variational Score Distillation</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DreamGaussian.html">DreamGaussian</a></li>
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Coin3D.html">Coin3D</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Experiments</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="../experiments/js_exp.html">Synthetic Data with Stable Diffusion for Foliar Disease Classification</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../experiments/swjo_exp.html">Training DreamBooth on Naver Webtoon Face Dataset</a></li>
+</ul>
+
+    </div>
+</nav></div>
+    </div>
+  
+  
+  <div class="sidebar-primary-items__end sidebar-primary__section">
+  </div>
+  
+  <div id="rtd-footer-container"></div>
+
+
+      </div>
+      
+      <main id="main-content" class="bd-main" role="main">
+        
+        
+
+<div class="sbt-scroll-pixel-helper"></div>
+
+          <div class="bd-content">
+            <div class="bd-article-container">
+              
+              <div class="bd-header-article d-print-none">
+<div class="header-article-items header-article__inner">
+  
+    <div class="header-article-items__start">
+      
+        <div class="header-article-item"><label class="sidebar-toggle primary-toggle btn btn-sm" for="__primary" title="Toggle primary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
+  <span class="fa-solid fa-bars"></span>
+</label></div>
+      
+    </div>
+  
+  
+    <div class="header-article-items__end">
+      
+        <div class="header-article-item">
+
+<div class="article-header-buttons">
+
+
+
+
+
+<div class="dropdown dropdown-source-buttons">
+  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Source repositories">
+    <i class="fab fa-github"></i>
+  </button>
+  <ul class="dropdown-menu">
+      
+      
+      
+      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion" target="_blank"
+   class="btn btn-sm btn-source-repository-button dropdown-item"
+   title="Source repository"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fab fa-github"></i>
+  </span>
+<span class="btn__text-container">Repository</span>
+</a>
+</li>
+      
+      
+      
+      
+      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion/issues/new?title=Issue%20on%20page%20%2Fdocs/review/Make_A_Video.html&body=Your%20issue%20content%20here." target="_blank"
+   class="btn btn-sm btn-source-issues-button dropdown-item"
+   title="Open an issue"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-lightbulb"></i>
+  </span>
+<span class="btn__text-container">Open issue</span>
+</a>
+</li>
+      
+  </ul>
+</div>
+
+
+
+
+
+
+<div class="dropdown dropdown-download-buttons">
+  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Download this page">
+    <i class="fas fa-download"></i>
+  </button>
+  <ul class="dropdown-menu">
+      
+      
+      
+      <li><a href="../../_sources/docs/review/Make_A_Video.md" target="_blank"
+   class="btn btn-sm btn-download-source-button dropdown-item"
+   title="Download source file"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-file"></i>
+  </span>
+<span class="btn__text-container">.md</span>
+</a>
+</li>
+      
+      
+      
+      
+      <li>
+<button onclick="window.print()"
+  class="btn btn-sm btn-download-pdf-button dropdown-item"
+  title="Print to PDF"
+  data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-file-pdf"></i>
+  </span>
+<span class="btn__text-container">.pdf</span>
+</button>
+</li>
+      
+  </ul>
+</div>
+
+
+
+
+<button onclick="toggleFullScreen()"
+  class="btn btn-sm btn-fullscreen-button"
+  title="Fullscreen mode"
+  data-bs-placement="bottom" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-expand"></i>
+  </span>
+
+</button>
+
+
+
+<script>
+document.write(`
+  <button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light"></i>
+    <i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark"></i>
+    <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"></i>
+  </button>
+`);
+</script>
+
+
+<script>
+document.write(`
+  <button class="btn btn-sm pst-navbar-icon search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <i class="fa-solid fa-magnifying-glass fa-lg"></i>
+  </button>
+`);
+</script>
+<label class="sidebar-toggle secondary-toggle btn btn-sm" for="__secondary"title="Toggle secondary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <span class="fa-solid fa-list"></span>
+</label>
+</div></div>
+      
+    </div>
+  
+</div>
+</div>
+              
+              
+
+<div id="jb-print-docs-body" class="onlyprint">
+    <h1>Make A Video</h1>
+    <!-- Table of contents -->
+    <div id="print-main-content">
+        <div id="jb-print-toc">
+            
+            <div>
+                <h2> Contents </h2>
+            </div>
+            <nav aria-label="Page">
+                <ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">1. Introduction</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#id1">Make-A-video 제안 배경</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#id2">Make-A-video 특성</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#contribution">Contribution</a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#previous-work">2. Previous Work</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#method">3. Method</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#text-to-image-model">3.1. Text-To-Image Model</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#spatiotemporal-layers">3.2. Spatiotemporal Layers</a><ul class="nav section-nav flex-column">
+<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#pseudo-3d-convolutional-layers">3.2.1 Pseudo-3D convolutional layers</a></li>
+<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#psuedo-3d-attention-layers">3.2.2. Psuedo-3D attention layers</a></li>
+</ul>
+</li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#frame-interpolation-network">3.3 Frame Interpolation Network</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#training">3.4 Training</a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#experiments">4. Experiments</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#dataset-and-settings">4.1 Dataset and Settings</a><ul class="nav section-nav flex-column">
+<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#datasets">Datasets</a></li>
+<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#automatic-metrics">Automatic Metrics</a></li>
+<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#human-evaluation-set-and-metrics">Human Evaluation Set and Metrics</a></li>
+</ul>
+</li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#quantitative-results">4.2 Quantitative Results</a><ul class="nav section-nav flex-column">
+<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#automatic-evaluaton-on-msr-vtt">Automatic Evaluaton on MSR-VTT</a></li>
+</ul>
+</li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#automatic-evluation-on-ucf-101">Automatic Evluation on UCF-101</a><ul class="nav section-nav flex-column">
+<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#human-evaluation">Human Evaluation</a></li>
+</ul>
+</li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#qualitative-results">4.3 Qualitative Results</a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#id3">5. 결론</a></li>
+</ul>
+            </nav>
+        </div>
+    </div>
+</div>
+
+              
+                
+<div id="searchbox"></div>
+                <article class="bd-article">
+                  
+  <div class="admonition-information admonition">
+<p class="admonition-title">Information</p>
+<ul class="simple">
+<li><p><strong>Title:</strong> Make-A-Video: Text-to-Video Generation without Text-Video Data</p></li>
+<li><p><strong>Reference</strong></p>
+<ul>
+<li><p>Paper:  <a class="reference external" href="https://arxiv.org/abs/2209.14792">https://arxiv.org/abs/2209.14792</a></p></li>
+</ul>
+</li>
+<li><p><strong>Author:</strong> <a class="reference external" href="https://www.linkedin.com/in/jeonghwa-yoo-8403a716b">Jeonghwa Yoo</a></p></li>
+<li><p><strong>Last updated on Nov. 26, 2023</strong></p></li>
+</ul>
+</div>
+<section class="tex2jax_ignore mathjax_ignore" id="make-a-video">
+<h1>Make A Video<a class="headerlink" href="#make-a-video" title="Permalink to this heading">#</a></h1>
+<ul class="simple">
+<li><p>참고 코드: <a class="github reference external" href="https://github.com/lucidrains/make-a-video-pytorch">lucidrains/make-a-video-pytorch</a></p></li>
+</ul>
+<aside>
+💡 핵심 요약 
+<ul class="simple">
+<li><p>Text-to-Image(T2I)를 이용하여 Text-to-Video(T2V)를 수행함</p></li>
+<li><p>Make-a-Video의 장점</p>
+<ol class="arabic simple">
+<li><p>T2V 모델의 학습을 가속화 하였음</p></li>
+<li><p>Text-video 데이터가 필요하지 않음</p></li>
+<li><p>이미지 생성 모델의 방대하다는 특성을 그대로 유지함</p></li>
+</ol>
+</li>
+<li><p>방법론</p>
+<ol class="arabic simple">
+<li><p>Full temporal U-net과 attention tensor를 분해하여 공간(space)과 시간(time)으로 근사화 함</p></li>
+<li><p>다양한 어플리케이션에 적용하기 위한 spatial temporal pipeline을 설계함</p></li>
+</ol>
+</li>
+<li><p>관련 모듈</p>
+<ol class="arabic simple">
+<li><p>Pseudo-3D convolutional layer</p></li>
+<li><p>Pseudo-3D attention layer</p></li>
+<li><p>Frame interpolation network</p></li>
+</ol>
+</li>
+<li><p>결과: text-to-video 생성 태스크에서 SOTA 달성</p></li>
+</ul>
+</aside>
+<section id="introduction">
+<h2>1. Introduction<a class="headerlink" href="#introduction" title="Permalink to this heading">#</a></h2>
+<section id="id1">
+<h3>Make-A-video 제안 배경<a class="headerlink" href="#id1" title="Permalink to this heading">#</a></h3>
+<ul class="simple">
+<li><p>T2I 모델링을 할 수 있는 데이터는 인터넷을 통해 확보될 수 있으나, 비슷한 규모의 텍스트 비디오 데이터셋을 수집하기는 어렵다.</p></li>
+<li><p>T2I 모델이 존재하는데 T2V 모델을 처음부터 학습 시키는 것은 낭비일 수 있다.</p></li>
+<li><p>비지도 학습을 사용하여 더 많은 데이터를 학습할 수 있다.</p></li>
+</ul>
+</section>
+<section id="id2">
+<h3>Make-A-video 특성<a class="headerlink" href="#id2" title="Permalink to this heading">#</a></h3>
+<ul class="simple">
+<li><p>T2I 모델을 활용하여, 레이블이 지정되지 않은 비디오 데이터에 대해 비지도 학습을 사용하여 학습한다 → 페어링된 텍스트-비디오 데이터 없이도 텍스트에서 비디오를 생성할 수 있다.</p></li>
+<li><p>텍스트 없이도 비지도 비디오만으로 세상의 다양한 개체가 어떻게 움직이고 상호 작용하는지 학습할 수 있다.</p></li>
+</ul>
+</section>
+<section id="contribution">
+<h3>Contribution<a class="headerlink" href="#contribution" title="Permalink to this heading">#</a></h3>
+<ul class="simple">
+<li><p>디퓨전 기반의 T2I 모델을 T2V로 확장하는 효과적인 방법인 Make-A-Video를 소개한다.</p></li>
+<li><p>Text-to-image 를 prior로 사용하여 text-video 데이터의 필요성을 우회한다.</p></li>
+<li><p>고화질, 고프레임률 비디오를 생성하는 super-resolution 전략을 제안한다.</p></li>
+<li><p>Make-A-Video를 기존 T2V 시스템과 비교하여 평가한다. 또한, 제로샷 T2V human evaluation을 위해 300개의 프롬프트 테스트 세트를 수집하여 공개할 계획이다.</p></li>
+</ul>
+</section>
+</section>
+<section id="previous-work">
+<h2>2. Previous Work<a class="headerlink" href="#previous-work" title="Permalink to this heading">#</a></h2>
+</section>
+<section id="method">
+<h2>3. Method<a class="headerlink" href="#method" title="Permalink to this heading">#</a></h2>
+<ul>
+<li><p>Make-A-Video의 주요 요소</p>
+<ol class="arabic simple">
+<li><p>텍스트-이미지 쌍으로 학습된 base T2I 모델</p></li>
+<li><p>신경망의 블록을 시간 차원으로 확장하는 시공간 convolution 및 attention layer</p></li>
+<li><p>두 시공간 layer로 구성된 시공간 신경망과 높은 프레임 속도 생성을 위한 frame interpolation network</p></li>
+</ol>
+</li>
+<li><p>Make-A-Video의 최종 inference 수식</p>
+<figure class="align-default" id="id4">
+<img alt="make_a_video_00" class="bg-primary mb-1" src="../../_images/001.png" />
+<figcaption>
+<p><span class="caption-number">Fig. 588 </span><span class="caption-text">최종 inference 수식</span><a class="headerlink" href="#id4" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p><span class="math notranslate nohighlight">\(SR_h\)</span>: spatial super-resolution network</p></li>
+<li><p><span class="math notranslate nohighlight">\(SR^t_l\)</span>: spatiotemporal super-resolution network</p></li>
+<li><p><span class="math notranslate nohighlight">\(\uparrow_{F}\)</span>: frame interpolation network</p></li>
+<li><p><span class="math notranslate nohighlight">\(D^t\)</span>: spatiotemporal decoder</p></li>
+<li><p><span class="math notranslate nohighlight">\(P\)</span>: prior network</p></li>
+<li><p><span class="math notranslate nohighlight">\(\hat{x}\)</span>: BPE-encoded text</p></li>
+<li><p><span class="math notranslate nohighlight">\(C_x\)</span>: CLIP text encoder</p></li>
+<li><p><span class="math notranslate nohighlight">\(x\)</span>: input text</p></li>
+</ul>
+</li>
+</ul>
+<section id="text-to-image-model">
+<h3>3.1. Text-To-Image Model<a class="headerlink" href="#text-to-image-model" title="Permalink to this heading">#</a></h3>
+<ul>
+<li><p><a class="reference external" href="https://arxiv.org/abs/2205.11487">“Photorealistic Text-to-Image Diffusion Models with Deep Language Understanding(Imagen)”</a>와 연구 내용을 공유하였다.</p></li>
+<li><p>Imagen</p>
+<figure class="align-default" id="id5">
+<img alt="make_a_video_01" class="bg-primary mb-1" src="../../_images/Untitled1.png" />
+<figcaption>
+<p><span class="caption-number">Fig. 589 </span><span class="caption-text">Imagen 구조</span><a class="headerlink" href="#id5" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</li>
+<li><p>고해상도 이미지를 만들기 위해 사용한 네트워크</p>
+<ul>
+<li><p>A prior Network <span class="math notranslate nohighlight">\(P\)</span>: 텍스트 임베딩 <span class="math notranslate nohighlight">\(x_e\)</span>와 BPE encoded text tokens <span class="math notranslate nohighlight">\(\hat{x}\)</span>이 주어졌을 때 이미지 임베딩 <span class="math notranslate nohighlight">\(y_e\)</span>를 생성하는 네트워크</p></li>
+<li><p>Decoder Network <span class="math notranslate nohighlight">\(D\)</span>: 이미지 임베딩 <span class="math notranslate nohighlight">\(y_e\)</span>로부터 저해상도 64X64 RGB 이미지 <span class="math notranslate nohighlight">\(\hat{y}_l\)</span>를 생성하는 네트워크</p></li>
+<li><p>Super-resolution network <span class="math notranslate nohighlight">\(SR_l\)</span>, <span class="math notranslate nohighlight">\(SR_h\)</span>: D에서 생성된 이미지 64X64 저해상도 이미지 <span class="math notranslate nohighlight">\(\hat{y}_l\)</span>를 256X256, 768X768 픽셀로 증가시켜 최종 이미지 <span class="math notranslate nohighlight">\(\hat{y}\)</span>를 만드는 네트워크</p>
+<figure class="align-default" id="id6">
+<img alt="make_a_video_02" class="bg-primary mb-1" src="../../_images/1.png" />
+<figcaption>
+<p><span class="caption-number">Fig. 590 </span><span class="caption-text">text <span class="math notranslate nohighlight">\(x\)</span>가 prior <span class="math notranslate nohighlight">\(P\)</span>를 통해 image embedding 변환된다.
+fps: desired frame rate</span><a class="headerlink" href="#id6" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</li>
+</ul>
+</li>
+</ul>
+</section>
+<section id="spatiotemporal-layers">
+<h3>3.2. Spatiotemporal Layers<a class="headerlink" href="#spatiotemporal-layers" title="Permalink to this heading">#</a></h3>
+<ul>
+<li><p>2차원 조건부 네트워크를 시간적 차원으로 확장하기 위해 다음의 구성 요소를 수정한다.</p>
+<ul class="simple">
+<li><p>Convolutional layers</p></li>
+<li><p>Attention layers</p></li>
+</ul>
+</li>
+<li><p>Fully-connected layers는 특별한 수정을 할 필요 없이 시간 정보만 추가해주면 된다.</p></li>
+<li><p>구성 요소 수정 결과 <span class="math notranslate nohighlight">\(D^t\)</span>는 64X64 사이즈의 16 RGB frame을 만들게 된다.</p></li>
+<li><p>Frame interpolation network <span class="math notranslate nohighlight">\(\uparrow_{F}\)</span>가 생성된 16개의 프레임과 super-resolution 네트워크 <span class="math notranslate nohighlight">\(SR^t_l\)</span> 사이를 보간하여 프레임 속도를 증가시킨다.</p></li>
+<li><p>Super-resolution 네트워크에는 hallucinating information(환각 정보)가 포함 된다. 깜박이는 잔상이 생기지 않으려면, 환각이 프레임 전체에 걸쳐 일관성을 유지해야 한다.</p>
+<ul>
+<li><p>Hallucinating information</p>
+<p>실제로 존재하지 않는 정보나 세부 사항을 생성하거나 가상으로 추가하는 것</p>
+</li>
+</ul>
+</li>
+<li><p>프레임당 super resolution을 수행하는 것보다 spatiotemporal 모듈인 <span class="math notranslate nohighlight">\(SR^t_l\)</span>가 더 좋은 성능을 보였다.</p></li>
+<li><p>하지만, <span class="math notranslate nohighlight">\(SR_h\)</span>를 위와 같은 모듈로 만들기엔 메모리 및 컴퓨팅 제약과 고해상도 비디오 데이터의 부족으로 <span class="math notranslate nohighlight">\(SR_h\)</span>를 위와 같이 시간적 차원으로 확장하는 것은 어려웠다 → <span class="math notranslate nohighlight">\(SR_h\)</span>는 공간적 차원에서 작동한다.( 각 프레임에 대해 동일한 노이즈 초기화를 사용하여 프레임 전반에 걸쳐 일관된 환각을 제공함)</p></li>
+</ul>
+<section id="pseudo-3d-convolutional-layers">
+<h4>3.2.1 Pseudo-3D convolutional layers<a class="headerlink" href="#pseudo-3d-convolutional-layers" title="Permalink to this heading">#</a></h4>
+<figure class="align-default" id="id7">
+<img alt="make_a_video_03" class="bg-primary mb-1" src="../../_images/2.png" />
+<figcaption>
+<p><span class="caption-number">Fig. 591 </span><span class="caption-text">Architecture of Pseudo-3D convolutional layers</span><a class="headerlink" href="#id7" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul>
+<li><p>2D 컨벌루션 레이어 다음에 1D 컨벌루션을 쌓는다 (Cf:separable convolution)</p>
+<ul class="simple">
+<li><p>3D 컨벌루션의 계산 load를 줄일 수 있다.</p></li>
+<li><p>사전 학습된 2D 컨볼루션 레이어와 새로 초기화된 1D 컨벌루션 레이어 사이에 명확한 경계를 생성하여, spatial information을 유지한 채 temporal convolution을 처음부터 학습할 수 있게 한다.</p></li>
+</ul>
+</li>
+<li><p>Pseudo-3D convolutional layer</p>
+<figure class="align-default" id="id8">
+<img alt="make_a_video_04" class="bg-primary mb-1" src="../../_images/conv3d.png" />
+<figcaption>
+<p><span class="caption-number">Fig. 592 </span><span class="caption-text">Pseudo-3D convolutional layer</span><a class="headerlink" href="#id8" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p><span class="math notranslate nohighlight">\(h\)</span>: 입력 텐서 (dimension: <span class="math notranslate nohighlight">\(B\)</span>(batch),<span class="math notranslate nohighlight">\(C\)</span>(channels),<span class="math notranslate nohighlight">\(F\)</span>(frames),<span class="math notranslate nohighlight">\(H\)</span>(height),<span class="math notranslate nohighlight">\(W\)</span>(width))</p></li>
+<li><p><span class="math notranslate nohighlight">\(\text{o}T\)</span>: transpose operator (spatial ↔ temporal)</p></li>
+<li><p><span class="math notranslate nohighlight">\(Conv_{2_D}\)</span>는 pretrained T2I 모델에서 초기화 되고, <span class="math notranslate nohighlight">\(Conv_{1_D}\)</span>는 identity 함수로 초기화 된다.</p></li>
+</ul>
+</li>
+</ul>
+</section>
+<section id="psuedo-3d-attention-layers">
+<h4>3.2.2. Psuedo-3D attention layers<a class="headerlink" href="#psuedo-3d-attention-layers" title="Permalink to this heading">#</a></h4>
+<figure class="align-default" id="id9">
+<img alt="make_a_video_05" class="bg-primary mb-1" src="../../_images/3.png" />
+<figcaption>
+<p><span class="caption-number">Fig. 593 </span><span class="caption-text">Architecture of Pseudo-3D attention layers</span><a class="headerlink" href="#id9" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul>
+<li><p><a class="reference external" href="https://arxiv.org/abs/2204.03458">“Video Diffusion Models**”**</a>에 영감을 받아 dimension decomposition 전략을 attention layer에 확장하였다.</p></li>
+<li><p>Pseudo-3D convolutional layer처럼 각각의 spatial attenion layer를 쌓아, 전체 spatiotemporal attention layer를 근사화하는 temporal attention layer를 쌓는다.</p></li>
+<li><p>Pseudo-3D attention layer</p>
+<figure class="align-default" id="id10">
+<img alt="make_a_video_06" class="bg-primary mb-1" src="../../_images/attention3d.png" />
+<figcaption>
+<p><span class="caption-number">Fig. 594 </span><span class="caption-text">Pseudo-3D attention layer</span><a class="headerlink" href="#id10" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul>
+<li><p><span class="math notranslate nohighlight">\(h\)</span>: 입력 텐서 (dimension: <span class="math notranslate nohighlight">\(B\)</span>(batch),<span class="math notranslate nohighlight">\(C\)</span>(channels),<span class="math notranslate nohighlight">\(F\)</span>(frames),<span class="math notranslate nohighlight">\(H\)</span>(height),<span class="math notranslate nohighlight">\(W\)</span>(width))</p></li>
+<li><p>flatten: spatial dimension 축에 대해 flatten하는 연산 (결과 dimension: <span class="math notranslate nohighlight">\(B\)</span>,<span class="math notranslate nohighlight">\(C\)</span>,<span class="math notranslate nohighlight">\(F\)</span>,<span class="math notranslate nohighlight">\(HW\)</span>)</p></li>
+<li><p><span class="math notranslate nohighlight">\(ATTN_{2D}\)</span>는 pretrained T2I 모델에서 초기화되고, <span class="math notranslate nohighlight">\(ATTN_{1D}\)</span>는 identity function으로 초기화 된다.</p></li>
+<li><p>Code</p>
+<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="k">class</span><span class="w"> </span><span class="nc">SpatioTemporalAttention</span><span class="p">(</span><span class="n">nn</span><span class="o">.</span><span class="n">Module</span><span class="p">):</span>
+    <span class="k">def</span><span class="w"> </span><span class="fm">__init__</span><span class="p">(</span>
+        <span class="bp">self</span><span class="p">,</span>
+        <span class="n">dim</span><span class="p">,</span>
+        <span class="o">*</span><span class="p">,</span>
+        <span class="n">dim_head</span> <span class="o">=</span> <span class="mi">64</span><span class="p">,</span>
+        <span class="n">heads</span> <span class="o">=</span> <span class="mi">8</span><span class="p">,</span>
+        <span class="n">add_feed_forward</span> <span class="o">=</span> <span class="kc">True</span><span class="p">,</span>
+        <span class="n">ff_mult</span> <span class="o">=</span> <span class="mi">4</span><span class="p">,</span>
+        <span class="n">pos_bias</span> <span class="o">=</span> <span class="kc">True</span><span class="p">,</span>
+        <span class="n">flash</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span>
+        <span class="n">causal_time_attn</span> <span class="o">=</span> <span class="kc">False</span>
+    <span class="p">):</span>
+        <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span>
+        <span class="k">assert</span> <span class="ow">not</span> <span class="p">(</span><span class="n">flash</span> <span class="ow">and</span> <span class="n">pos_bias</span><span class="p">),</span> <span class="s1">&#39;learned positional attention bias is not compatible with flash attention&#39;</span>
+
+        <span class="bp">self</span><span class="o">.</span><span class="n">spatial_attn</span> <span class="o">=</span> <span class="n">Attention</span><span class="p">(</span><span class="n">dim</span> <span class="o">=</span> <span class="n">dim</span><span class="p">,</span> <span class="n">dim_head</span> <span class="o">=</span> <span class="n">dim_head</span><span class="p">,</span> <span class="n">heads</span> <span class="o">=</span> <span class="n">heads</span><span class="p">,</span> <span class="n">flash</span> <span class="o">=</span> <span class="n">flash</span><span class="p">)</span>
+        <span class="bp">self</span><span class="o">.</span><span class="n">spatial_rel_pos_bias</span> <span class="o">=</span> <span class="n">ContinuousPositionBias</span><span class="p">(</span><span class="n">dim</span> <span class="o">=</span> <span class="n">dim</span> <span class="o">//</span> <span class="mi">2</span><span class="p">,</span> <span class="n">heads</span> <span class="o">=</span> <span class="n">heads</span><span class="p">,</span> <span class="n">num_dims</span> <span class="o">=</span> <span class="mi">2</span><span class="p">)</span> <span class="k">if</span> <span class="n">pos_bias</span> <span class="k">else</span> <span class="kc">None</span>
+
+        <span class="bp">self</span><span class="o">.</span><span class="n">temporal_attn</span> <span class="o">=</span> <span class="n">Attention</span><span class="p">(</span><span class="n">dim</span> <span class="o">=</span> <span class="n">dim</span><span class="p">,</span> <span class="n">dim_head</span> <span class="o">=</span> <span class="n">dim_head</span><span class="p">,</span> <span class="n">heads</span> <span class="o">=</span> <span class="n">heads</span><span class="p">,</span> <span class="n">flash</span> <span class="o">=</span> <span class="n">flash</span><span class="p">,</span> <span class="n">causal</span> <span class="o">=</span> <span class="n">causal_time_attn</span><span class="p">)</span>
+        <span class="bp">self</span><span class="o">.</span><span class="n">temporal_rel_pos_bias</span> <span class="o">=</span> <span class="n">ContinuousPositionBias</span><span class="p">(</span><span class="n">dim</span> <span class="o">=</span> <span class="n">dim</span> <span class="o">//</span> <span class="mi">2</span><span class="p">,</span> <span class="n">heads</span> <span class="o">=</span> <span class="n">heads</span><span class="p">,</span> <span class="n">num_dims</span> <span class="o">=</span> <span class="mi">1</span><span class="p">)</span> <span class="k">if</span> <span class="n">pos_bias</span> <span class="k">else</span> <span class="kc">None</span>
+
+        <span class="bp">self</span><span class="o">.</span><span class="n">has_feed_forward</span> <span class="o">=</span> <span class="n">add_feed_forward</span>
+        <span class="k">if</span> <span class="ow">not</span> <span class="n">add_feed_forward</span><span class="p">:</span>
+            <span class="k">return</span>
+
+        <span class="bp">self</span><span class="o">.</span><span class="n">ff</span> <span class="o">=</span> <span class="n">FeedForward</span><span class="p">(</span><span class="n">dim</span> <span class="o">=</span> <span class="n">dim</span><span class="p">,</span> <span class="n">mult</span> <span class="o">=</span> <span class="n">ff_mult</span><span class="p">)</span>
+
+    <span class="k">def</span><span class="w"> </span><span class="nf">forward</span><span class="p">(</span>
+        <span class="bp">self</span><span class="p">,</span>
+        <span class="n">x</span><span class="p">,</span>
+        <span class="n">enable_time</span> <span class="o">=</span> <span class="kc">True</span>
+    <span class="p">):</span>
+        <span class="n">b</span><span class="p">,</span> <span class="n">c</span><span class="p">,</span> <span class="o">*</span><span class="n">_</span><span class="p">,</span> <span class="n">h</span><span class="p">,</span> <span class="n">w</span> <span class="o">=</span> <span class="n">x</span><span class="o">.</span><span class="n">shape</span>
+        <span class="n">is_video</span> <span class="o">=</span> <span class="n">x</span><span class="o">.</span><span class="n">ndim</span> <span class="o">==</span> <span class="mi">5</span>
+        <span class="n">enable_time</span> <span class="o">&amp;=</span> <span class="n">is_video</span>
+
+        <span class="k">if</span> <span class="n">is_video</span><span class="p">:</span>
+            <span class="n">x</span> <span class="o">=</span> <span class="n">rearrange</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="s1">&#39;b c f h w -&gt; (b f) (h w) c&#39;</span><span class="p">)</span> <span class="c1">#[bXf, hXw, c]</span>
+        <span class="k">else</span><span class="p">:</span>
+            <span class="n">x</span> <span class="o">=</span> <span class="n">rearrange</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="s1">&#39;b c h w -&gt; b (h w) c&#39;</span><span class="p">)</span><span class="c1">#[b, hXw, c]</span>
+
+        <span class="n">space_rel_pos_bias</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">spatial_rel_pos_bias</span><span class="p">(</span><span class="n">h</span><span class="p">,</span> <span class="n">w</span><span class="p">)</span> <span class="k">if</span> <span class="n">exists</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">spatial_rel_pos_bias</span><span class="p">)</span> <span class="k">else</span> <span class="kc">None</span>
+
+        <span class="n">x</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">spatial_attn</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="n">rel_pos_bias</span> <span class="o">=</span> <span class="n">space_rel_pos_bias</span><span class="p">)</span> <span class="o">+</span> <span class="n">x</span>
+
+        <span class="k">if</span> <span class="n">is_video</span><span class="p">:</span>
+            <span class="n">x</span> <span class="o">=</span> <span class="n">rearrange</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="s1">&#39;(b f) (h w) c -&gt; b c f h w&#39;</span><span class="p">,</span> <span class="n">b</span> <span class="o">=</span> <span class="n">b</span><span class="p">,</span> <span class="n">h</span> <span class="o">=</span> <span class="n">h</span><span class="p">,</span> <span class="n">w</span> <span class="o">=</span> <span class="n">w</span><span class="p">)</span>
+        <span class="k">else</span><span class="p">:</span>
+            <span class="n">x</span> <span class="o">=</span> <span class="n">rearrange</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="s1">&#39;b (h w) c -&gt; b c h w&#39;</span><span class="p">,</span> <span class="n">h</span> <span class="o">=</span> <span class="n">h</span><span class="p">,</span> <span class="n">w</span> <span class="o">=</span> <span class="n">w</span><span class="p">)</span>
+
+        <span class="k">if</span> <span class="n">enable_time</span><span class="p">:</span>
+
+            <span class="n">x</span> <span class="o">=</span> <span class="n">rearrange</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="s1">&#39;b c f h w -&gt; (b h w) f c&#39;</span><span class="p">)</span> <span class="c1">#[bXhXw, f, c] </span>
+
+            <span class="n">time_rel_pos_bias</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">temporal_rel_pos_bias</span><span class="p">(</span><span class="n">x</span><span class="o">.</span><span class="n">shape</span><span class="p">[</span><span class="mi">1</span><span class="p">])</span> <span class="k">if</span> <span class="n">exists</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">temporal_rel_pos_bias</span><span class="p">)</span> <span class="k">else</span> <span class="kc">None</span>
+
+            <span class="n">x</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">temporal_attn</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="n">rel_pos_bias</span> <span class="o">=</span> <span class="n">time_rel_pos_bias</span><span class="p">)</span> <span class="o">+</span> <span class="n">x</span>
+
+            <span class="n">x</span> <span class="o">=</span> <span class="n">rearrange</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="s1">&#39;(b h w) f c -&gt; b c f h w&#39;</span><span class="p">,</span> <span class="n">w</span> <span class="o">=</span> <span class="n">w</span><span class="p">,</span> <span class="n">h</span> <span class="o">=</span> <span class="n">h</span><span class="p">)</span>
+
+        <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">has_feed_forward</span><span class="p">:</span>
+            <span class="n">x</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">ff</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="n">enable_time</span> <span class="o">=</span> <span class="n">enable_time</span><span class="p">)</span> <span class="o">+</span> <span class="n">x</span>
+
+        <span class="k">return</span> <span class="n">x</span>
+</pre></div>
+</div>
+</li>
+</ul>
+</li>
+<li><p>Frame rate conditioning</p>
+<ul class="simple">
+<li><p>비디오의 초당 프레임 수를 나타내는 추가 컨디셔닝 파라미터 <span class="math notranslate nohighlight">\(fps\)</span>를 추가한다.</p></li>
+</ul>
+</li>
+</ul>
+</section>
+</section>
+<section id="frame-interpolation-network">
+<h3>3.3 Frame Interpolation Network<a class="headerlink" href="#frame-interpolation-network" title="Permalink to this heading">#</a></h3>
+<ul class="simple">
+<li><p>↑F (Frame Interpolation Network)란?</p>
+<ul>
+<li><p>생성된 프레임 수를 증가시켜, 생성된 비디오를 더 부드럽게 만들고 비디오 길이를 연장 시킬 수 있는 네트워크</p></li>
+<li><p>프레임을 보간하고 extrapolation을 하는 네트워크</p>
+<ul>
+<li><p>Extrapolation: 주어진 데이터 또는 정보를 사용하여 미래의 값을 예측하거나 확장</p></li>
+</ul>
+</li>
+</ul>
+</li>
+<li><p>↑F (Frame Interpolation Network) 동작</p>
+<ul>
+<li><p>Spatialtemporal decoder <span class="math notranslate nohighlight">\(D^t\)</span>에서 마스크 처리된 입력 프레임을 제로 패딩하고 비디오 업샘플링을 적용하여 masked frame interpolation을 파인 튜닝한다.</p></li>
+<li><p>파인 튜닝할 때 U-Net의 입력에 4개의 채널을 추가한다.</p>
+<ul>
+<li><p>RGB 마스킹 비디오 입력을 위한 3개의 채널과 마스킹되는 프레임을 나타내는 추가 바이너리 채널</p></li>
+</ul>
+</li>
+<li><p>다양한 frame-skips과 <span class="math notranslate nohighlight">\(fps\)</span>에 대해 파인튜닝하여 추론시 여러 temporal upsample rate를 제공한다.</p></li>
+</ul>
+</li>
+<li><p>본 논문의 모든 실험에서는 ↑F를 frame skip 5로 적용하여 16프레임 비디오를 76프레임((16-1)X5+1)으로 업샘플링 하였다.</p></li>
+<li><p>비디오 시작 또는 끝 프레임을 마스킹하여 비디오 추정 또는 이미지 애니메이션에도 사용할 수 있다.</p></li>
+</ul>
+</section>
+<section id="training">
+<h3>3.4 Training<a class="headerlink" href="#training" title="Permalink to this heading">#</a></h3>
+<ul>
+<li><p>위에서 설명한 구성 요소들은 독립적으로 학습 된다.</p></li>
+<li><p>훈련 과정</p>
+<ol class="arabic">
+<li><p>Prior <span class="math notranslate nohighlight">\(P\)</span> 훈련 (text-image 데이터 이용)</p>
+<p>→ 텍스트를 입력으로 받는 prior <span class="math notranslate nohighlight">\(P\)</span>는 text-image 데이터에 대해서만 학습 되고 비디오에 대해서는 파인 튜닝하지 않는다.</p>
+</li>
+<li><p>이미지를 이용한 학습</p>
+<p>→ Decoder, prior, 두개의 super-resolution 요소들은 먼저 텍스트 없이 이미지 만으로 학습 된다.</p>
+<p>→ Decoder는 Clip image embedding을 입력으로 받고, super-resolution 요소들은 학습 중에 입력으로 들어온 downsampled image를 입력으로 받는다.</p>
+</li>
+<li><p>비디오를 이용한 학습</p>
+<ul class="simple">
+<li><p>이미지에 대한 훈련이 끝나면 새로운 시간 레이어를 추가하고 초기화하여 레이블이 지정되지 않은 비디오 데이터에 대해 파인 튜닝한다.</p></li>
+<li><p>원본 비디오에서 16프레임이 샘플링 되며, 1에서 30 사이의 랜덤 <span class="math notranslate nohighlight">\(fps\)</span>를 사용한다.</p></li>
+<li><p>디코더를 학습하는 동안 훈련 초기에는 더 높은 <span class="math notranslate nohighlight">\(fps\)</span> 범위(모션이 적은)에서 시작하고, 이후에는 더 작은 <span class="math notranslate nohighlight">\(fps\)</span> 범위(모션이 많은)로 전환한다.</p></li>
+<li><p>Masked-frame interpolation 네트워크는 temporal 디코더로부터 파인 튜닝된다.</p></li>
+</ul>
+</li>
+</ol>
+</li>
+</ul>
+</section>
+</section>
+<section id="experiments">
+<h2>4. Experiments<a class="headerlink" href="#experiments" title="Permalink to this heading">#</a></h2>
+<section id="dataset-and-settings">
+<h3>4.1 Dataset and Settings<a class="headerlink" href="#dataset-and-settings" title="Permalink to this heading">#</a></h3>
+<section id="datasets">
+<h4>Datasets<a class="headerlink" href="#datasets" title="Permalink to this heading">#</a></h4>
+<ul class="simple">
+<li><p>Image, Text</p>
+<ul>
+<li><p>LAION-5B 데이터셋의 일부 2.3B의 데이터를 사용하였다.</p></li>
+<li><p>NSFW 이미지, 텍스트의 유해한 단어 또는 워터마크 확률이 0.5보다 큰 이미지가 있는 샘플 쌍을 필터링하였다. ****</p>
+<ul>
+<li><p>NSFW: Not Safe For Work, 선정적이거나 음란하거나 폭력적인 내용을 포함한 콘텐츠</p></li>
+</ul>
+</li>
+</ul>
+</li>
+<li><p>Video</p>
+<ul>
+<li><p>WebVid-10M과, HD-VILA-100M 데이터셋의 일부 10M 데이터를 사용하였다.</p>
+<ul>
+<li><p>Decoder <span class="math notranslate nohighlight">\(D^t\)</span>, interpolation 모델 → WebVid-10M을 이용하여 학습</p></li>
+<li><p><span class="math notranslate nohighlight">\(SR^t_l\)</span> → WebVid-10M, HD-VILA-100M을 이용하여 학습</p></li>
+</ul>
+</li>
+</ul>
+</li>
+<li><p>Zero-shot test 데이터</p>
+<ul>
+<li><p>UCF-101, MSR-VTT</p>
+<ul>
+<li><p>UCF-101: 액션 인식 연구를 위해 고안되었으며, 다양한 동작 및 환경에서 촬영된 비디오 클립 데이터셋</p></li>
+<li><p>MSR-VTT: 비디오와 해당 비디오에 대한 텍스트 설명 또는 캡션을 포함하는 데이터셋</p></li>
+</ul>
+</li>
+</ul>
+</li>
+</ul>
+</section>
+<section id="automatic-metrics">
+<h4>Automatic Metrics<a class="headerlink" href="#automatic-metrics" title="Permalink to this heading">#</a></h4>
+<ul class="simple">
+<li><p>UCF-101</p>
+<ul>
+<li><p>각 클래스에 대해 하나의 템플릿 문장을 작성하고 평가를 위해 수정한다.</p></li>
+<li><p>10K 샘플에 대해 Fretchet Video Distance(FVD)와 Inception Score(IS)를 측정한다.</p></li>
+<li><p>Train셋과 동일한 클래스 분포를 따르는 샘플을 생성한다.</p></li>
+</ul>
+</li>
+<li><p>MSR-VTT</p>
+<ul>
+<li><p>테스트 세트의 모든 59,794 캡션에 대한 FID와 CLIPSIM(비디오 프레임과 텍스트 간의 평균 CLIP 유사도)를 측정한다.</p></li>
+</ul>
+</li>
+</ul>
+</section>
+<section id="human-evaluation-set-and-metrics">
+<h4>Human Evaluation Set and Metrics<a class="headerlink" href="#human-evaluation-set-and-metrics" title="Permalink to this heading">#</a></h4>
+<ul class="simple">
+<li><p>Amazon Mechanical Turk(AMT)에서 300개의 프롬프트로 이루어진 평가 세트를 수집하였다.</p></li>
+<li><p>Annotator들에게 T2V 시스템이 있다면 어떤 것을 생성하고 싶은지 물어봤다.</p></li>
+<li><p>불완전하거나, 너무 추상적이거나, 불쾌감을 주는 프롬프트를 필터링 하였다.</p></li>
+<li><p>5가지 카테고리(동물, 판타지, 사람, 자연 및 풍경, 음식 및 음료)를 식별하고 해당 카테고리에 맞는 프롬프트를 선택하였다.</p></li>
+<li><p>이러한 프롬프트는 동영상을 만드는 데에 사용되지 않고 선택 되었으며, 고정된 상태로 유지했다.</p></li>
+<li><p>Human evaluation을 위해 Imagen의 DrawBench 프롬프트도 사용하였다.</p></li>
+<li><p>비디오 품질과 text-vedio faithfulness를 평가하였다.</p>
+<ul>
+<li><p>비디오 품질 → 두 개의 비디오를 랜덤 순서로 보여주고 어떤 비디오의 품질이 더 좋은지 annotator에게 물어본다.</p></li>
+<li><p>Text-vdeio faithfulness → 텍스트를 추가로 보여주고 어떤 비디오가 텍스트와 더 잘 일치하는지 annotator에게 물어본다.</p></li>
+</ul>
+</li>
+<li><p>보간 모델과 FILM의 비디오 모션 사실감을 비교하기 위한 평가도 진행하였다.</p></li>
+<li><p>5명의 각기 다른 annotator의 다수 득표를 최종 결과로 사용하였다.</p></li>
+</ul>
+</section>
+</section>
+<section id="quantitative-results">
+<h3>4.2 Quantitative Results<a class="headerlink" href="#quantitative-results" title="Permalink to this heading">#</a></h3>
+<section id="automatic-evaluaton-on-msr-vtt">
+<h4>Automatic Evaluaton on MSR-VTT<a class="headerlink" href="#automatic-evaluaton-on-msr-vtt" title="Permalink to this heading">#</a></h4>
+<ul class="simple">
+<li><p>MSR-VTT에 대해 성능을 보고하는 GODIVA, NUWA 외에도, 중국어와 영어를 모두 입력으로 받는  CogVideo 모델에 대해서도 추론을 수행하였다.</p></li>
+</ul>
+<figure class="align-default" id="id11">
+<img alt="make_a_video_06" class="bg-primary mb-1" src="../../_images/4.png" />
+<figcaption>
+<p><span class="caption-number">Fig. 595 </span><span class="caption-text">Automatic Evaluaton on MSR-VTT</span><a class="headerlink" href="#id11" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>→ 가장 우수한 성능을 보인다.</p>
+</section>
+</section>
+<section id="automatic-evluation-on-ucf-101">
+<h3>Automatic Evluation on UCF-101<a class="headerlink" href="#automatic-evluation-on-ucf-101" title="Permalink to this heading">#</a></h3>
+<figure class="align-default" id="id12">
+<img alt="make_a_video_06" class="bg-primary mb-1" src="../../_images/5.png" />
+<figcaption>
+<p><span class="caption-number">Fig. 596 </span><span class="caption-text">Automatic Evluation on UCF-101</span><a class="headerlink" href="#id12" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>→ Make-A-Video의 제로 샷 성능이 다른 방법보다 우수하다. Finetunning을 한 결과에서도 SOTA를 달성하였다.</p>
+<section id="human-evaluation">
+<h4>Human Evaluation<a class="headerlink" href="#human-evaluation" title="Permalink to this heading">#</a></h4>
+<ul class="simple">
+<li><p>DrawBench와 테스트셋에 대해서 CogVideo와 성능을 비교한다.</p></li>
+<li><p>또한, VDM의 웹 페이지에 표시된 28개의 동영상에 대해서도 평가한다.</p></li>
+<li><p>각 입력에 대해 8개의 동영상을 무작위로 생성하고, 8번 평가하여 평균 결과를 낸다.</p></li>
+<li><p>사람의 평가를 위해 76x256x256 해상도로 동영상을 생성한다.</p></li>
+</ul>
+<figure class="align-default" id="id13">
+<img alt="make_a_video_06" class="bg-primary mb-1" src="../../_images/6.png" />
+<figcaption>
+<p><span class="caption-number">Fig. 597 </span><span class="caption-text">Human Evaluation</span><a class="headerlink" href="#id13" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>→ 평가자가 Make-A-Video 모델의 결과가 더 낫다고 투표한 퍼센트 비율. 대부분 평가자가 모든 벤치마크에서 Make-A-Video가 더 낫다고 평가하였다.</p>
+<ul class="simple">
+<li><p>Frame Interpolation Network와 FILM을 비교 평가하기</p>
+<ul>
+<li><p>DrawBench의 텍스트 프롬프트와 평가 세트에서 저프레임률 비디오(1 FPS)를 생성한 다음, 4FPS까지 업샘플링한다.</p></li>
+<li><p>평가자들은 eval set에 대해서는 62%,  DrawBench에 대해서는 54%로 Make-A-Video가 더 낫다고 평가하였다.</p></li>
+<li><p>프레임 간의 차이가 커서 물체가 어떻게 움직이는지에 대한 real-world 지식이 중요한 경우에는 본 논문에 방법이 더 뛰어난 것으로 관찰 되었다.</p></li>
+</ul>
+</li>
+</ul>
+</section>
+</section>
+<section id="qualitative-results">
+<h3>4.3 Qualitative Results<a class="headerlink" href="#qualitative-results" title="Permalink to this heading">#</a></h3>
+<figure class="align-default" id="id14">
+<img alt="make_a_video_06" class="bg-primary mb-1" src="../../_images/7.png" />
+<figcaption>
+<p><span class="caption-number">Fig. 598 </span><span class="caption-text">T2V Generation 결과. 맨 위: VDM, 가운데: CogVideo, 맨 아래: Make-A-Video
+→ Make-A-Video가 모션의 일관성을 유지하면서 더 풍부한 콘텐츠를 생성할 수 있다.</span><a class="headerlink" href="#id14" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<figure class="align-default" id="id15">
+<img alt="make_a_video_06" class="bg-primary mb-1" src="../../_images/8.png" />
+<figcaption>
+<p><span class="caption-number">Fig. 599 </span><span class="caption-text">이미지에 mask frame interpolation 및 extrpolation network ↑F를 적용한 결과.
+가장 왼쪽에 입력 이미지가 주어지면, 이를 동영상으로 애니메이션화 함.
+사용자는 자신의 이미지를 사용하여 동영상을 생성할 수 있으며, 생성된 동영상을 개인화하고 직접 제어할 수 있음.</span><a class="headerlink" href="#id15" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<figure class="align-default" id="id16">
+<img alt="make_a_video_06" class="bg-primary mb-1" src="../../_images/9.png" />
+<figcaption>
+<p><span class="caption-number">Fig. 600 </span><span class="caption-text">두 이미지 사이의 interpolation 결과. 왼쪽: FILM, 오른쪽: 본 논문의 approach
+FILM →  실제 움직이는 object에 대한 이해 없이 프레임을 부드럽게 전환하기만 함.
+본 논문의 approach → 의미론적으로 더 의미있는 interpolation을 만듬.</span><a class="headerlink" href="#id16" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<figure class="align-default" id="id17">
+<img alt="make_a_video_06" class="bg-primary mb-1" src="../../_images/101.png" />
+<figcaption>
+<p><span class="caption-number">Fig. 601 </span><span class="caption-text">비디오 변형 예시. 위: 원본 비디오, 아래: 새로운 비디오</span><a class="headerlink" href="#id17" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p>기타 결과: <a class="reference external" href="https://make-a-video.github.io/">https://make-a-video.github.io/</a></p></li>
+</ul>
+</section>
+</section>
+<section id="id3">
+<h2>5. 결론<a class="headerlink" href="#id3" title="Permalink to this heading">#</a></h2>
+<ul class="simple">
+<li><p>주변 세계로부터 지식을 배우는 human intelligence처럼 generative system도 인간의 학습 방식을 모방할 수 있다면, 더욱 창의적이고 유용할 것이다.</p></li>
+<li><p>연구자들은 비지도 학습을 통해 훨씬 더 많은 동영상에서 세계의 dynamic을 학습함으로써 기존의 한계를 극복할 수 있다.</p></li>
+</ul>
+</section>
+</section>
+
+    <script type="text/x-thebe-config">
+    {
+        requestKernel: true,
+        binderOptions: {
+            repo: "binder-examples/jupyter-stacks-datascience",
+            ref: "master",
+        },
+        codeMirrorConfig: {
+            theme: "abcdef",
+            mode: "python"
+        },
+        kernelOptions: {
+            name: "python3",
+            path: "./docs\review"
+        },
+        predefinedOutput: true
+    }
+    </script>
+    <script>kernelName = 'python3'</script>
+
+                </article>
+              
+
+              
+              
+              
+              
+                <footer class="prev-next-footer d-print-none">
+                  
+<div class="prev-next-area">
+    <a class="left-prev"
+       href="one_step_diffusion_with_distribution_matching_distillation.html"
+       title="previous page">
+      <i class="fa-solid fa-angle-left"></i>
+      <div class="prev-next-info">
+        <p class="prev-next-subtitle">previous</p>
+        <p class="prev-next-title">One-step Diffusion with Distribution Matching Distillation</p>
+      </div>
+    </a>
+    <a class="right-next"
+       href="VideoLDM.html"
+       title="next page">
+      <div class="prev-next-info">
+        <p class="prev-next-subtitle">next</p>
+        <p class="prev-next-title">VideoLDM</p>
+      </div>
+      <i class="fa-solid fa-angle-right"></i>
+    </a>
+</div>
+                </footer>
+              
+            </div>
+            
+            
+              
+                <div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
+
+
+  <div class="sidebar-secondary-item">
+  <div class="page-toc tocsection onthispage">
+    <i class="fa-solid fa-list"></i> Contents
+  </div>
+  <nav class="bd-toc-nav page-toc">
+    <ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">1. Introduction</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#id1">Make-A-video 제안 배경</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#id2">Make-A-video 특성</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#contribution">Contribution</a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#previous-work">2. Previous Work</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#method">3. Method</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#text-to-image-model">3.1. Text-To-Image Model</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#spatiotemporal-layers">3.2. Spatiotemporal Layers</a><ul class="nav section-nav flex-column">
+<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#pseudo-3d-convolutional-layers">3.2.1 Pseudo-3D convolutional layers</a></li>
+<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#psuedo-3d-attention-layers">3.2.2. Psuedo-3D attention layers</a></li>
+</ul>
+</li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#frame-interpolation-network">3.3 Frame Interpolation Network</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#training">3.4 Training</a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#experiments">4. Experiments</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#dataset-and-settings">4.1 Dataset and Settings</a><ul class="nav section-nav flex-column">
+<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#datasets">Datasets</a></li>
+<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#automatic-metrics">Automatic Metrics</a></li>
+<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#human-evaluation-set-and-metrics">Human Evaluation Set and Metrics</a></li>
+</ul>
+</li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#quantitative-results">4.2 Quantitative Results</a><ul class="nav section-nav flex-column">
+<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#automatic-evaluaton-on-msr-vtt">Automatic Evaluaton on MSR-VTT</a></li>
+</ul>
+</li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#automatic-evluation-on-ucf-101">Automatic Evluation on UCF-101</a><ul class="nav section-nav flex-column">
+<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#human-evaluation">Human Evaluation</a></li>
+</ul>
+</li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#qualitative-results">4.3 Qualitative Results</a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#id3">5. 결론</a></li>
+</ul>
+  </nav></div>
+
+</div></div>
+              
+            
+          </div>
+          <footer class="bd-footer-content">
+            
+<div class="bd-footer-content__inner container">
+  
+  <div class="footer-item">
+    
+<p class="component-author">
+By PseudoLab
+</p>
+
+  </div>
+  
+  <div class="footer-item">
+    
+
+  <p class="copyright">
+    
+      © Copyright 2022.
+      <br/>
+    
+  </p>
+
+  </div>
+  
+  <div class="footer-item">
+    
+  </div>
+  
+  <div class="footer-item">
+    
+  </div>
+  
+</div>
+          </footer>
+        
+
+      </main>
+    </div>
+  </div>
+  
+  <!-- Scripts loaded after <body> so the DOM is not blocked -->
+  <script src="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b"></script>
+<script src="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b"></script>
+
+  <footer class="bd-footer">
+  </footer>
+  </body>
 </html>
\ No newline at end of file
diff --git a/docs/review/MimicBrush.html b/docs/review/MimicBrush.html
old mode 100644
new mode 100755
index 8bf44afb..162ef98c
--- a/docs/review/MimicBrush.html
+++ b/docs/review/MimicBrush.html
@@ -1,991 +1,1011 @@
-
-
-<!DOCTYPE html>
-
-
-<html lang="en" data-content_root="" >
-
-  <head>
-    <meta charset="utf-8" />
-    <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.18.1: http://docutils.sourceforge.net/" />
-
-    <title>MimicBrush: Zero-shot Image Editing with Reference Imitation &#8212; Text-to-Image Generation-feat-Diffusion</title>
-  
-  
-  
-  <script data-cfasync="false">
-    document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
-    document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
-  </script>
-  
-  <!-- Loaded before other Sphinx assets -->
-  <link href="../../_static/styles/theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-<link href="../../_static/styles/bootstrap.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-<link href="../../_static/styles/pydata-sphinx-theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-
-  
-  <link href="../../_static/vendor/fontawesome/6.5.2/css/all.min.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-  <link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.woff2" />
-<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.woff2" />
-<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.woff2" />
-
-    <link rel="stylesheet" type="text/css" href="../../_static/pygments.css" />
-    <link rel="stylesheet" href="../../_static/styles/sphinx-book-theme.css?digest=14f4ca6b54d191a8c7657f6c759bf11a5fb86285" type="text/css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/togglebutton.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/mystnb.4510f1fc1dee50b3e5859aac5469c37c29e427902b24a333a5f9fcb2f0b3ac41.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/sphinx-thebe.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/design-style.4045f2051d55cab465a707391d5b2007.min.css" />
-  
-  <!-- Pre-loaded scripts that we'll load fully later -->
-  <link rel="preload" as="script" href="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b" />
-<link rel="preload" as="script" href="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b" />
-  <script src="../../_static/vendor/fontawesome/6.5.2/js/all.min.js?digest=dfe6caa3a7d634c4db9b"></script>
-
-    <script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
-    <script src="../../_static/jquery.js"></script>
-    <script src="../../_static/underscore.js"></script>
-    <script src="../../_static/_sphinx_javascript_frameworks_compat.js"></script>
-    <script src="../../_static/doctools.js"></script>
-    <script src="../../_static/clipboard.min.js"></script>
-    <script src="../../_static/copybutton.js"></script>
-    <script src="../../_static/scripts/sphinx-book-theme.js?digest=5a5c038af52cf7bc1a1ec88eea08e6366ee68824"></script>
-    <script>let toggleHintShow = 'Click to show';</script>
-    <script>let toggleHintHide = 'Click to hide';</script>
-    <script>let toggleOpenOnPrint = 'true';</script>
-    <script src="../../_static/togglebutton.js"></script>
-    <script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
-    <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
-    <script src="../../_static/design-tabs.js"></script>
-    <script>const THEBE_JS_URL = "https://unpkg.com/thebe@0.8.2/lib/index.js"
-const thebe_selector = ".thebe,.cell"
-const thebe_selector_input = "pre"
-const thebe_selector_output = ".output, .cell_output"
-</script>
-    <script async="async" src="../../_static/sphinx-thebe.js"></script>
-    <script>DOCUMENTATION_OPTIONS.pagename = 'docs/review/MimicBrush';</script>
-    <link rel="shortcut icon" href="../../_static/PseudoLab_logo.png"/>
-    <link rel="index" title="Index" href="../../genindex.html" />
-    <link rel="search" title="Search" href="../../search.html" />
-    <link rel="next" title="Make A Video" href="Make_A_Video.html" />
-    <link rel="prev" title="LCM-LoRA: A Universal Stable-Diffusion Acceleration Module" href="LCM-LoRA.html" />
-  <meta name="viewport" content="width=device-width, initial-scale=1"/>
-  <meta name="docsearch:language" content="en"/>
-  </head>
-  
-  
-  <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
-
-  
-  
-  <div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
-  
-  <div id="pst-scroll-pixel-helper"></div>
-  
-  <button type="button" class="btn rounded-pill" id="pst-back-to-top">
-    <i class="fa-solid fa-arrow-up"></i>Back to top</button>
-
-  
-  <input type="checkbox"
-          class="sidebar-toggle"
-          id="pst-primary-sidebar-checkbox"/>
-  <label class="overlay overlay-primary" for="pst-primary-sidebar-checkbox"></label>
-  
-  <input type="checkbox"
-          class="sidebar-toggle"
-          id="pst-secondary-sidebar-checkbox"/>
-  <label class="overlay overlay-secondary" for="pst-secondary-sidebar-checkbox"></label>
-  
-  <div class="search-button__wrapper">
-    <div class="search-button__overlay"></div>
-    <div class="search-button__search-container">
-<form class="bd-search d-flex align-items-center"
-      action="../../search.html"
-      method="get">
-  <i class="fa-solid fa-magnifying-glass"></i>
-  <input type="search"
-         class="form-control"
-         name="q"
-         id="search-input"
-         placeholder="Search this book..."
-         aria-label="Search this book..."
-         autocomplete="off"
-         autocorrect="off"
-         autocapitalize="off"
-         spellcheck="false"/>
-  <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
-</form></div>
-  </div>
-
-  <div class="pst-async-banner-revealer d-none">
-  <aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
-</div>
-
-  
-    <header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
-    </header>
-  
-
-  <div class="bd-container">
-    <div class="bd-container__inner bd-page-width">
-      
-      
-      
-      <div class="bd-sidebar-primary bd-sidebar">
-        
-
-  
-  <div class="sidebar-header-items sidebar-primary__section">
-    
-    
-    
-    
-  </div>
-  
-    <div class="sidebar-primary-items__start sidebar-primary__section">
-        <div class="sidebar-primary-item">
-
-  
-    
-  
-
-<a class="navbar-brand logo" href="../../intro.html">
-  
-  
-  
-  
-  
-    
-    
-      
-    
-    
-    <img src="../../_static/PseudoLab_logo.png" class="logo__image only-light" alt="Text-to-Image Generation-feat-Diffusion - Home"/>
-    <script>document.write(`<img src="../../_static/PseudoLab_logo.png" class="logo__image only-dark" alt="Text-to-Image Generation-feat-Diffusion - Home"/>`);</script>
-  
-  
-</a></div>
-        <div class="sidebar-primary-item"><nav class="bd-links" id="bd-docs-nav" aria-label="Main">
-    <div class="bd-toc-item navbar-nav active">
-        
-        <ul class="nav bd-sidenav bd-sidenav__home-link">
-            <li class="toctree-l1">
-                <a class="reference internal" href="../../intro.html">
-                    Welcome to PseudoDiffusers!!
-                </a>
-            </li>
-        </ul>
-        <p aria-level="2" class="caption" role="heading"><span class="caption-text">Preliminary Works</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="vae.html">VAE</a></li>
-<li class="toctree-l1"><a class="reference internal" href="gan.html">GAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DDPM.html">DDPM</a></li>
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="DDIM.html">DDIM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="A_Study_on_the_Evaluation_of_Generative_Models.html">A Study on the Evaluation of Generative Models</a></li>
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Image Generation</span></p>
-<ul class="current nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="cycleGAN.html">CycleGAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="StyleGAN.html">StyleGAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="diffusion_beats_GANs.html">Diffusion Models Beat GANs on Image Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="dalle.html">DALL-E</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DALLE2.html">DALL-E 2</a></li>
-<li class="toctree-l1"><a class="reference internal" href="dreambooth.html">DreamBooth</a></li>
-<li class="toctree-l1"><a class="reference internal" href="ControlNet.html">ControlNet</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Latent_Diffusion_Model.html">Introduction</a></li>
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="Textual_Inversion.html">Textual Inversion</a></li>
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="CustomDiffusion.html">Custom Diffusion</a></li>
-<li class="toctree-l1"><a class="reference internal" href="LoRA.html">LoRA</a></li>
-
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="I-DDPM.html">I-DDPM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="StyO.html">StyO</a></li>
-<li class="toctree-l1"><a class="reference internal" href="imagen.html">Imagen</a></li>
-<li class="toctree-l1"><a class="reference internal" href="imagen_editor.html">Imagen Editor</a></li>
-<li class="toctree-l1"><a class="reference internal" href="SDEdit.html">SDEdit</a></li>
-<li class="toctree-l1"><a class="reference internal" href="SDXL.html">SDXL</a></li>
-<li class="toctree-l1"><a class="reference internal" href="t2i_adapter.html">T2I-Adapter</a></li>
-<li class="toctree-l1"><a class="reference internal" href="IP_Adapter.html">IP-Adapter</a></li>
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="HyperDreamBooth.html">HyperDreamBooth</a></li>
-<li class="toctree-l1"><a class="reference internal" href="CM3leon.html">CM3leon</a></li>
-
-<li class="toctree-l1"><a class="reference internal" href="Synthetic_Data_from_Diffusion_Models_Improves_ImageNet_Classification.html">Synthetic Data from Diffusion Models Improves ImageNet Classification</a></li>
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="GLIDE.html">GLIDE</a></li>
-<li class="toctree-l1"><a class="reference internal" href="BBDM.html">BBDM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Your_Diffusion_Model_is_Secretly_a_Zero_Shot_Classifier.html">Your Diffusion Model is Secretly a Zero-Shot Classifier</a></li>
-<li class="toctree-l1"><a class="reference internal" href="progressive_distillation.html">Progressive Distillation for Fast Sampling of Diffusion Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="ConceptLab.html">ConceptLab</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Diffusion_models_already_have_a_Semantic_Latent_Space.html">Diffusion Models already have a Semantic Latent Space</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Muse.html">Muse</a></li>
-
-
-<li class="toctree-l1"><a class="reference internal" href="GIGAGAN.html">Scaling up GANs for Text-to-Image Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="consistency_models.html">Consistency Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="latent_consistency_models.html">Latent Consistency Models</a></li>
-
-<li class="toctree-l1"><a class="reference internal" href="LLM_grounded_Diffusion.html">LLM Grounded Diffusion</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DiT.html">DiT</a></li>
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="one-step-image-translation.html">One-Step Image Translation with Text-to-Image Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="LCM-LoRA.html">LCM-LoRA: A Universal Stable-Diffusion Acceleration Module</a></li>
-
-
-<li class="toctree-l1 current active"><a class="current reference internal" href="#">MimicBrush: Zero-shot Image Editing with Reference Imitation</a></li>
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Video Generation</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="Make_A_Video.html">Make A Video</a></li>
-<li class="toctree-l1"><a class="reference internal" href="VideoLDM.html">VideoLDM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="AnimateDiff.html">AnimateDiff</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Animate_Anyone.html">Animate Anyone</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreaMoving.html">DreaMoving</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreamPose.html">DreamPose: Fashion Image-to-Video Synthesis via Stable Diffusion</a></li>
-
-
-
-
-
-
-
-
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">3D Generation</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="NeRF.html">NeRF : Representing Scenes as Neural Radiance Fields for View Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="3DGS.html">3D Gaussian Splatting for Real-Time Radiance Field Rendering</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Point_E.html">Point-E: A System for Generating 3D Point Clouds from Complex Prompts (Arxiv 2022)</a></li>
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="Shap-E.html">Shap-E</a></li>
-
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="DreamFusion.html"><strong>DreamFusion</strong></a></li>
-<li class="toctree-l1"><a class="reference internal" href="magic-3d.html">Magic3D</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreamBooth3D.html">Dream Booth 3D</a></li>
-
-
-
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Experiments</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="../experiments/js_exp.html">Synthetic Data with Stable Diffusion for Foliar Disease Classification</a></li>
-<li class="toctree-l1"><a class="reference internal" href="../experiments/swjo_exp.html">Training DreamBooth on Naver Webtoon Face Dataset</a></li>
-</ul>
-
-    </div>
-</nav></div>
-    </div>
-  
-  
-  <div class="sidebar-primary-items__end sidebar-primary__section">
-  </div>
-  
-  <div id="rtd-footer-container"></div>
-
-
-      </div>
-      
-      <main id="main-content" class="bd-main" role="main">
-        
-        
-
-<div class="sbt-scroll-pixel-helper"></div>
-
-          <div class="bd-content">
-            <div class="bd-article-container">
-              
-              <div class="bd-header-article d-print-none">
-<div class="header-article-items header-article__inner">
-  
-    <div class="header-article-items__start">
-      
-        <div class="header-article-item"><label class="sidebar-toggle primary-toggle btn btn-sm" for="__primary" title="Toggle primary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
-  <span class="fa-solid fa-bars"></span>
-</label></div>
-      
-    </div>
-  
-  
-    <div class="header-article-items__end">
-      
-        <div class="header-article-item">
-
-<div class="article-header-buttons">
-
-
-
-
-
-<div class="dropdown dropdown-source-buttons">
-  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Source repositories">
-    <i class="fab fa-github"></i>
-  </button>
-  <ul class="dropdown-menu">
-      
-      
-      
-      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion" target="_blank"
-   class="btn btn-sm btn-source-repository-button dropdown-item"
-   title="Source repository"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fab fa-github"></i>
-  </span>
-<span class="btn__text-container">Repository</span>
-</a>
-</li>
-      
-      
-      
-      
-      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion/issues/new?title=Issue%20on%20page%20%2Fdocs/review/MimicBrush.html&body=Your%20issue%20content%20here." target="_blank"
-   class="btn btn-sm btn-source-issues-button dropdown-item"
-   title="Open an issue"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-lightbulb"></i>
-  </span>
-<span class="btn__text-container">Open issue</span>
-</a>
-</li>
-      
-  </ul>
-</div>
-
-
-
-
-
-
-<div class="dropdown dropdown-download-buttons">
-  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Download this page">
-    <i class="fas fa-download"></i>
-  </button>
-  <ul class="dropdown-menu">
-      
-      
-      
-      <li><a href="../../_sources/docs/review/MimicBrush.md" target="_blank"
-   class="btn btn-sm btn-download-source-button dropdown-item"
-   title="Download source file"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-file"></i>
-  </span>
-<span class="btn__text-container">.md</span>
-</a>
-</li>
-      
-      
-      
-      
-      <li>
-<button onclick="window.print()"
-  class="btn btn-sm btn-download-pdf-button dropdown-item"
-  title="Print to PDF"
-  data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-file-pdf"></i>
-  </span>
-<span class="btn__text-container">.pdf</span>
-</button>
-</li>
-      
-  </ul>
-</div>
-
-
-
-
-<button onclick="toggleFullScreen()"
-  class="btn btn-sm btn-fullscreen-button"
-  title="Fullscreen mode"
-  data-bs-placement="bottom" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-expand"></i>
-  </span>
-
-</button>
-
-
-
-<script>
-document.write(`
-  <button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light"></i>
-    <i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark"></i>
-    <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"></i>
-  </button>
-`);
-</script>
-
-
-<script>
-document.write(`
-  <button class="btn btn-sm pst-navbar-icon search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <i class="fa-solid fa-magnifying-glass fa-lg"></i>
-  </button>
-`);
-</script>
-<label class="sidebar-toggle secondary-toggle btn btn-sm" for="__secondary"title="Toggle secondary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <span class="fa-solid fa-list"></span>
-</label>
-</div></div>
-      
-    </div>
-  
-</div>
-</div>
-              
-              
-
-<div id="jb-print-docs-body" class="onlyprint">
-    <h1>MimicBrush: Zero-shot Image Editing with Reference Imitation</h1>
-    <!-- Table of contents -->
-    <div id="print-main-content">
-        <div id="jb-print-toc">
-            
-            <div>
-                <h2> Contents </h2>
-            </div>
-            <nav aria-label="Page">
-                <ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#three-lines-summary">Three Lines Summary</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">1. Introduction</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#method">2. Method</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#overall-pipeline">Overall Pipeline</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#model-structure">Model Structure</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#training-strategy">Training Strategy</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#evaluation-benchmark">Evaluation Benchmark</a></li>
-</ul>
-</li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#experiments">Experiments</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#implementation-details">Implementation Details</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#comparisons-with-other-works">Comparisons with Other Works</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#ablation-studies">Ablation Studies</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#qualitative-analysis">Qualitative Analysis</a></li>
-</ul>
-</li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#limitations">Limitations</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#review">Review</a></li>
-</ul>
-            </nav>
-        </div>
-    </div>
-</div>
-
-              
-                
-<div id="searchbox"></div>
-                <article class="bd-article">
-                  
-  <div class="admonition-information admonition">
-<p class="admonition-title">Information</p>
-<ul class="simple">
-<li><p><strong>Title:</strong> MimicBrush: Zero-shot Image Editing with Reference Imitation</p></li>
-<li><p><strong>Reference</strong></p>
-<ul>
-<li><p>Paper: <a class="reference external" href="https://arxiv.org/pdf/2406.07547">https://arxiv.org/pdf/2406.07547</a></p></li>
-<li><p>Code: <a class="reference external" href="https://github.com/ali-vilab/MimicBrush">Official</a></p></li>
-</ul>
-</li>
-<li><p><strong>Author:</strong> Chanyeong Shin</p></li>
-<li><p><strong>Last updated on Nov. 05, 2024</strong></p></li>
-</ul>
-</div>
-<section class="tex2jax_ignore mathjax_ignore" id="mimicbrush-zero-shot-image-editing-with-reference-imitation">
-<h1>MimicBrush: Zero-shot Image Editing with Reference Imitation<a class="headerlink" href="#mimicbrush-zero-shot-image-editing-with-reference-imitation" title="Permalink to this heading">#</a></h1>
-<section id="three-lines-summary">
-<h2>Three Lines Summary<a class="headerlink" href="#three-lines-summary" title="Permalink to this heading">#</a></h2>
-<ol class="arabic simple">
-<li><p>Edited Image 로 어떻게 자연스럽게 변해야 할지에 관한 <strong>“imitative editing” 에 관한 아이디어</strong></p></li>
-<li><p><strong>Source Image 와 Reference Image 간의 correspondence</strong> 를 이용해 바뀌어야 할 부분을 잘 가져오는 방법인 MimicBrush 라고 명명한 generative training framework 제안</p></li>
-<li><p><strong>SOTA 인 동시에 앞으로의 imitative editing 연구를 위한 evaluation benchmark</strong> 제안</p></li>
-</ol>
-</section>
-<section id="introduction">
-<h2>1. Introduction<a class="headerlink" href="#introduction" title="Permalink to this heading">#</a></h2>
-<ul class="simple">
-<li><p>Image editing 분야 자체가 굉장히 수많은 요구 조건과 시나리오를 만족하면서 수행되어야 했기에 기존의 방법들이 여전히 challenging 한 문제를 풀고 있음</p></li>
-<li><p>기존의 방법들은 현재 source image 와 함께 해당 mask 를 input 으로 넣어주고 있음 (이건 공통)</p>
-<ul>
-<li><p>Inpainting method</p>
-<ul>
-<li><p>editing 이라는 task 를 <strong>“Text Prompt”</strong> 하나로만 해결하고자 했는데 이건 원하는 결과를 뽑아내기에는 적절하지 않음</p></li>
-<li><p>My thoughts : 실제로 inpainting 은 refining 과 같이 자연스럽게 만들어주는 것 외에는 실무에서 잘 사용하지는 않았던 것 같음</p></li>
-</ul>
-</li>
-<li><p>Composition method</p>
-<ul>
-<li><p>ref image 와 ref mask/box 를 이용해 이를 해결하고자 하였는데, 아무래도 <strong>“individual object” 를 insertion</strong> 하는 작업처럼 느껴 모델이 어려워 할 만 했음</p>
-<ul>
-<li><p>shoe soles 이나 hair 와 같은 local components 나 로고나 texture 같은 local patterns</p></li>
-</ul>
-</li>
-<li><p>또한, Image 로부터 reference area 를 완벽하게 잘 추출하는 방법이 필요했음</p></li>
-<li><p>Local components 들은 또 전체 image 에 잘 어우러지게 하는 것도 고려해야 했고, 학습 과정에서 같은 object 인데 frame 에 따라 모양도 조금씩 달라서 이런 다양한 문제들을 풀어야 했음</p></li>
-</ul>
-</li>
-</ul>
-</li>
-<li><p>앞선 문제들을 해결하기 위해서, editing 을 할 수 있는 novel pipeline 인 <strong>imitative editing</strong> 을 제안</p>
-<ul>
-<li><p>Ref image 에서 mask 는 사용하지 않고 source image 의 mask area 부분을 ref image 에서 어디에 해당하는지 자동으로 찾고 모방할 수 있도록 하였음</p></li>
-</ul>
-</li>
-<li><p>Imitative editing 을 하기 위해서 <strong>MimicBrush 라고 하는 dual diffusion U-Nets network framework</strong> 를 제안</p>
-<ul>
-<li><p>self-supervised manner 로 학습함 → 같은 video 에서 source / ref image 를 뽑아서 학습에 활용하는 방식</p></li>
-<li><p>당연히 같은 video 에서 추출했기 때문에 semantic correspondence 와 visual variations 를 가지고 있을 것</p></li>
-<li><p>masked source image 는 imitative U-Net 으로 / reference image 는 reference U-Net 으로 통과 → 이후  reference U-Net 의 attention K,V 를 imitative U-Net 에 injection 시키는 방식</p></li>
-</ul>
-</li>
-<li><p>이 Image editing 방식을 이용하여 학습된 model 은 구도, 조명, 카테고리 자체가 달라도 잘 변환이 되는 결과를 보였으며, ref image 의 visual concepts 의 detail 도 잘 유지한 채로 가져오는 것을 확인하였음</p></li>
-<li><p>조금 더 comprehensive 한 evaluation 을 위해서 imitative editing 의 benchmark 까지 제안</p>
-<ul>
-<li><p>part composition : 얼마나 part 구성이 자연스럽게 잘 되었는가?</p></li>
-<li><p>texture transfer : 느낌을 얼마나 잘 유지하면서 texture 가 변환됐는가?</p></li>
-</ul>
-</li>
-</ul>
-</section>
-<section id="method">
-<h2>2. Method<a class="headerlink" href="#method" title="Permalink to this heading">#</a></h2>
-<figure class="align-default" id="id1">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/MimicBrush_1.png"><img alt="The training process of MimicBrush" class="bg-primary mb-1" src="../../_images/MimicBrush_1.png" style="width: 800px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 570 </span><span class="caption-text">The training process of MimicBrush</span><a class="headerlink" href="#id1" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<section id="overall-pipeline">
-<h3>Overall Pipeline<a class="headerlink" href="#overall-pipeline" title="Permalink to this heading">#</a></h3>
-<ul class="simple">
-<li><p><strong>Dual diffusion models architecture + self-supervised manner</strong></p></li>
-<li><p>Video data 에는 consistent content 를 가지고 있고 visual variations 까지 있기 때문에 이를 활용</p>
-<ul>
-<li><p>랜덤하게 video clip 에서 two frames 를 뽑아서 학습 sample 로 활용</p></li>
-</ul>
-</li>
-<li><p>source image 는 masking 시키고, ref image 는 masked source image 를 recover 할 수 있도록 도움을 주도록 넣음</p></li>
-<li><p>결국 MimicBrush 는 dog’s face 와 같은 corresponding visual information 이 위치하는 것을 학습하고 source image 의 masked area 를 repaint 시킴</p></li>
-<li><p>또한 source image 의 빈 부분을 채우는 과정에서 자연스럽게 blending 시켜야 하기 때문에 visual content 를 같은 환경의 포즈, 조명, 시점으로 변환하는 것도 학습</p></li>
-<li><p>앞서 언급했던 것처럼 dual branch 의 U-Nets 구조를 활용 → imitative and reference U-Net</p>
-<ul>
-<li><p>attention layers 의 K,V 는 서로 share 하고(실질적으로는 concat) reference image 로부터 indications 을 찾아 masked source image 를 만들도록 함</p></li>
-</ul>
-</li>
-<li><p>추가로, source &amp; ref image 에 variation 을 증가시키기 위해 augmentation 도 적용</p></li>
-<li><p>또한, optional condition 으로 imitative U-Net 에 depth map 도 줌</p>
-<ul>
-<li><p>inference 단계에서 object 의 shape 가 잘 유지됐는지 depth map 을 활용할지 말지를 결정할 수도 있게 하였음</p></li>
-</ul>
-</li>
-</ul>
-</section>
-<section id="model-structure">
-<h3>Model Structure<a class="headerlink" href="#model-structure" title="Permalink to this heading">#</a></h3>
-<ul class="simple">
-<li><p>Imitative U-Net</p>
-<ul>
-<li><p>Base : SD-1.5 inpainting model</p></li>
-<li><p>Input : 13 channels tensor</p>
-<ul>
-<li><p>image latent → 4 channels</p></li>
-<li><p>binary mask → 1 channel</p></li>
-<li><p>background latent → 4 channels</p></li>
-<li><p>depth latent → 4 channels</p></li>
-</ul>
-</li>
-<li><p>(Figure 에는 나와있지 않지만) 기존 original U-Net 은 CLIP 을 text embedding 으로 활용하는데, 본 논문은 CLIP 을 reference image 으로부터 뽑아낸 image embedding 을 cross-attention 에 활용</p>
-<ul>
-<li><p>image embedding 이후에 projection layer 을 거쳐 들어감</p></li>
-</ul>
-</li>
-<li><p>학습은 imitative U-Net 과 CLIP projection layer 의 parameters 들이 학습..</p></li>
-</ul>
-</li>
-<li><p>Reference U-Net</p>
-<ul>
-<li><p>최근에 굉장히 많은 연구들이 reference image 로부터 fine-grained features 를 뽑아내기 위해 additional U-Net 을 활용하는 것이 훨씬 더 효율적이라는 것을 증명하였음</p></li>
-<li><p>Base : SD-1.5</p></li>
-<li><p>reference features 를 imitative U-Net 의 middle &amp; upper stages 에 K,V 를 injection (concat) 시킴</p>
-<ul>
-<li><p>이를 통해, imitative U-Net 이 reference image 의 content 를 활용해 source image 의 masking 된 부분을 완성시킴</p></li>
-</ul>
-</li>
-</ul>
-</li>
-<li><p>Depth Model</p>
-<ul>
-<li><p>Depth Anything 으로 unmasked source image 의 depth map 을 뽑도록 했음</p></li>
-<li><p>Depth model 자체는 freeze 시키고, trainable projector 를 넣어 depth map 을 depth latent 인 4-channel 로 projection 시킬 수 있도록 함</p></li>
-<li><p>학습 중에는 depth model 의 input 을 0.5 확률로 drop 시킴으로써 inference 중에 optional 하게 shape control 을 가능하도록 함</p>
-<ul>
-<li><p>My thoughts : 아마 texture transfer task 를 위함 일듯</p></li>
-</ul>
-</li>
-</ul>
-</li>
-</ul>
-</section>
-<section id="training-strategy">
-<h3>Training Strategy<a class="headerlink" href="#training-strategy" title="Permalink to this heading">#</a></h3>
-<ul class="simple">
-<li><p>MimicBrush 의 cross-image imitation ability (서로 다른 카테고리에서 잘 모방해오는 능력 정도로 이해하면 됨) 를 끌어올리기 위해서는 조금 더 적합한 training sample 을 모아서 학습할 수 있는 방법이 필요했음</p></li>
-<li><p>Training data 를 구축하는 과정에서 두 가지의 철학을 지키려 하였음</p>
-<ol class="arabic simple">
-<li><p>source / reference images 들 간의 correspondence relation 이 존재해야 한다.</p></li>
-<li><p>robustness 를 위해 source / reference image 사이의 large variations 을 기대할 수 있어야 한다.</p></li>
-</ol>
-</li>
-<li><p>Data selection</p>
-<ul>
-<li><p>학습 중에는 같은 비디오로부터 frame 2개를 sampling 해서 뽑았음</p></li>
-<li><p>SSIM 을 이용해 video frames 간의 similarity 를 측정했고 너무 그 값이 크거나 작으면 filtering 했음</p></li>
-</ul>
-</li>
-<li><p>Data augmentation</p>
-<ul>
-<li><p>source &amp; reference image 의 variation 을 증가시키기 위해서, 강력한 data augmentation 을 활용</p>
-<ul>
-<li><p>color jitter, rotation, resizing, flipping, 심지어는 random projection transformation 으로 더욱 강한 deformation 도 수행</p></li>
-</ul>
-</li>
-</ul>
-</li>
-<li><p>Masking strategy</p>
-<ul>
-<li><p>가장 간단하게는 image 를 N x N grid 로 나누고 랜덤하게 masking 시키는 방법인데 저자들은 이렇게 랜덤하게 가져가면 easy cases 들이 많은 portion 을 차지한다는 것을 발견</p>
-<ul>
-<li><p>ex. 배경 같은 큰 area 를 차지하는 것들은 계속 반복되는 content/textures 이기 때문에 도움이 안됨</p></li>
-</ul>
-</li>
-<li><p>SIFT matching 을 이용해서 source &amp; ref image 의 matching points 를 얻고 그 matched feature points 의 grids 들을 좀 더 masking 하도록 하였음</p></li>
-<li><p>video 보다 high-quality image 를 찾는 것이 더 쉬웠기 때문에 static image 한 장을 가지고 augmentation 시킨 다음, seg map 가지고 masking 시키는 방식으로도 활용 → robustness 를 증가시키는 효과를 불러일으킴</p></li>
-</ul>
-</li>
-</ul>
-</section>
-<section id="evaluation-benchmark">
-<h3>Evaluation Benchmark<a class="headerlink" href="#evaluation-benchmark" title="Permalink to this heading">#</a></h3>
-<figure class="align-default" id="id2">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/MimicBrush_2.png"><img alt="Evaluation Benchmark" class="bg-primary mb-1" src="../../_images/MimicBrush_2.png" style="width: 800px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 571 </span><span class="caption-text">Evaluation Benchmark</span><a class="headerlink" href="#id2" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p>Imitative editing 은 굉장히 novel 한 task 이기 때문에 본 논문에서 성능을 evaluation 할 수 있는 benchmark 까지 제공</p></li>
-<li><p>Part composition</p>
-<ul>
-<li><p>source / ref image 간의 semantic correspondence 를 찾고 composition 시키는 task</p></li>
-<li><p>Inter-ID track</p>
-<ul>
-<li><p>Fashion, animal, product, scenario 등 다양한 data 를 각 topic 마다 30 장씩 모았음</p></li>
-<li><p>수동으로 source mask 를 다 그리고, 생성된 결과 GT 도 없기 때문에 직접 reference regions 과 text prompt 까지 다 annotation 시킴 = 노가다 했음</p></li>
-<li><p>reference region 과 생성된 region 사이의 similarity 도 DINO 와 CLIP image sim score 를 계산하도록 했고 edited image 와 text prompt 사이의 CLIP text similarity 도 report 해놓음</p></li>
-</ul>
-</li>
-<li><p>Inner-ID track</p>
-<ul>
-<li><p>DreamBooth 로부터 30 image paris 가져와서 source image masking 다 함</p></li>
-<li><p>GT 와는 SSIM, PSNR, LPIPS score 활용</p></li>
-</ul>
-</li>
-</ul>
-</li>
-<li><p>Texture transfer</p>
-<ul>
-<li><p>ref image 의 texture 나 pattern 을 얼마나 잘 transfer 했는지에 관한 task</p></li>
-<li><p>Additional condition 으로 depth map 을 활용</p></li>
-<li><p>Part composition 은 semantic correspondence 를 찾도록 하지만, 이 task 는 source shape 는 유지하면서 reference 의 texture 를 얼마나 잘 가져오는지를 판단하도록 objects 전체를 masking</p></li>
-</ul>
-</li>
-</ul>
-</section>
-</section>
-<section id="experiments">
-<h2>Experiments<a class="headerlink" href="#experiments" title="Permalink to this heading">#</a></h2>
-<section id="implementation-details">
-<h3>Implementation Details<a class="headerlink" href="#implementation-details" title="Permalink to this heading">#</a></h3>
-<ul class="simple">
-<li><p>Hyperparameters</p>
-<ul>
-<li><p>512x512 image 로 resolution 다 맞추도록 해서 학습</p></li>
-<li><p>Adam optimizer &amp; lr : 1e-5</p></li>
-<li><p>grid number N : 3~10 에서 randomly choose</p></li>
-<li><p>masking 은 SIFT-matched features points 는 75% 고르도록 하고, 나머지는 50% 고르도록 하였음</p></li>
-<li><p>Reference U-Net 에는 CFG 10% 확률로 drop 하도록 하였고, inference 시에 guidance scale 은 5</p></li>
-</ul>
-</li>
-<li><p>Training data</p>
-<ul>
-<li><p>Pexels 같은 websites 에서 100k video</p></li>
-<li><p>diversity 증가시키기 위해서 SAM dataset 도 활용 → 여기서 static image augmentation 적용</p></li>
-<li><p>학습 중에는 Pexels 70% , SAM 30%</p></li>
-</ul>
-</li>
-</ul>
-</section>
-<section id="comparisons-with-other-works">
-<h3>Comparisons with Other Works<a class="headerlink" href="#comparisons-with-other-works" title="Permalink to this heading">#</a></h3>
-<figure class="align-default" id="id3">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/MimicBrush_3.png"><img alt="Qualitative Result" class="bg-primary mb-1" src="../../_images/MimicBrush_3.png" style="width: 800px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 572 </span><span class="caption-text">Qualitative Result</span><a class="headerlink" href="#id3" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<figure class="align-default" id="id4">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/MimicBrush_4.png"><img alt="Quantitative Result" class="bg-primary mb-1" src="../../_images/MimicBrush_4.png" style="width: 800px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 573 </span><span class="caption-text">Quantitative Result</span><a class="headerlink" href="#id4" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</section>
-<section id="ablation-studies">
-<h3>Ablation Studies<a class="headerlink" href="#ablation-studies" title="Permalink to this heading">#</a></h3>
-<figure class="align-default" id="id5">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/MimicBrush_5.png"><img alt="Ablation Studies" class="bg-primary mb-1" src="../../_images/MimicBrush_5.png" style="width: 800px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 574 </span><span class="caption-text">Ablation Studies</span><a class="headerlink" href="#id5" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</section>
-<section id="qualitative-analysis">
-<h3>Qualitative Analysis<a class="headerlink" href="#qualitative-analysis" title="Permalink to this heading">#</a></h3>
-<figure class="align-default" id="id6">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/MimicBrush_6.png"><img alt="Qualitative Analysis" class="bg-primary mb-1" src="../../_images/MimicBrush_6.png" style="width: 800px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 575 </span><span class="caption-text">Qualitative Analysis</span><a class="headerlink" href="#id6" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</section>
-</section>
-<section id="limitations">
-<h2>Limitations<a class="headerlink" href="#limitations" title="Permalink to this heading">#</a></h2>
-<ul class="simple">
-<li><p>Robust performance.. But, region 이 너무 작거나 multiple candidates 가 있게 되면 실패하는 경우가 생기긴 했음</p>
-<ul>
-<li><p>이럴 때는 zoom in 을 해서 하는 것을 추천한다고 함</p></li>
-</ul>
-</li>
-<li><p>여전히 유해한 content 들을 editing 하는 경우도 있기 때문에 이를 필터링 할 수 있는 방법이 고안되어야 한다고 주장</p></li>
-</ul>
-</section>
-<section id="review">
-<h2>Review<a class="headerlink" href="#review" title="Permalink to this heading">#</a></h2>
-<aside>
-🔖
-<p><strong>Three Line Review</strong></p>
-<ol class="arabic simple">
-<li><p>논문 자체의 색깔이 역시 실무에 적합한 페이퍼다 !</p></li>
-<li><p>Evaluation benchmark 까지 제안하면서 새로운 field 를 열려고 하는 시도가 인상 깊었다</p></li>
-<li><p>paper 자체는 24.06 으로 따끈따끈한데 왜 SD 1.5 썼는지 이해가 잘 안 됐던.. 다른 baseline 도 써봤으면 더 좋았을 것 같다</p></li>
-</ol>
-</aside>
-</section>
-</section>
-
-    <script type="text/x-thebe-config">
-    {
-        requestKernel: true,
-        binderOptions: {
-            repo: "binder-examples/jupyter-stacks-datascience",
-            ref: "master",
-        },
-        codeMirrorConfig: {
-            theme: "abcdef",
-            mode: "python"
-        },
-        kernelOptions: {
-            name: "python3",
-            path: "./docs/review"
-        },
-        predefinedOutput: true
-    }
-    </script>
-    <script>kernelName = 'python3'</script>
-
-                </article>
-              
-
-              
-              
-              
-              
-                <footer class="prev-next-footer d-print-none">
-                  
-<div class="prev-next-area">
-    <a class="left-prev"
-       href="LCM-LoRA.html"
-       title="previous page">
-      <i class="fa-solid fa-angle-left"></i>
-      <div class="prev-next-info">
-        <p class="prev-next-subtitle">previous</p>
-        <p class="prev-next-title">LCM-LoRA: A Universal Stable-Diffusion Acceleration Module</p>
-      </div>
-    </a>
-    <a class="right-next"
-       href="Make_A_Video.html"
-       title="next page">
-      <div class="prev-next-info">
-        <p class="prev-next-subtitle">next</p>
-        <p class="prev-next-title">Make A Video</p>
-      </div>
-      <i class="fa-solid fa-angle-right"></i>
-    </a>
-</div>
-                </footer>
-              
-            </div>
-            
-            
-              
-                <div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
-
-
-  <div class="sidebar-secondary-item">
-  <div class="page-toc tocsection onthispage">
-    <i class="fa-solid fa-list"></i> Contents
-  </div>
-  <nav class="bd-toc-nav page-toc">
-    <ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#three-lines-summary">Three Lines Summary</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">1. Introduction</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#method">2. Method</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#overall-pipeline">Overall Pipeline</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#model-structure">Model Structure</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#training-strategy">Training Strategy</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#evaluation-benchmark">Evaluation Benchmark</a></li>
-</ul>
-</li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#experiments">Experiments</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#implementation-details">Implementation Details</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#comparisons-with-other-works">Comparisons with Other Works</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#ablation-studies">Ablation Studies</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#qualitative-analysis">Qualitative Analysis</a></li>
-</ul>
-</li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#limitations">Limitations</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#review">Review</a></li>
-</ul>
-  </nav></div>
-
-</div></div>
-              
-            
-          </div>
-          <footer class="bd-footer-content">
-            
-<div class="bd-footer-content__inner container">
-  
-  <div class="footer-item">
-    
-<p class="component-author">
-By PseudoLab
-</p>
-
-  </div>
-  
-  <div class="footer-item">
-    
-
-  <p class="copyright">
-    
-      © Copyright 2022.
-      <br/>
-    
-  </p>
-
-  </div>
-  
-  <div class="footer-item">
-    
-  </div>
-  
-  <div class="footer-item">
-    
-  </div>
-  
-</div>
-          </footer>
-        
-
-      </main>
-    </div>
-  </div>
-  
-  <!-- Scripts loaded after <body> so the DOM is not blocked -->
-  <script src="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b"></script>
-<script src="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b"></script>
-
-  <footer class="bd-footer">
-  </footer>
-  </body>
+
+
+<!DOCTYPE html>
+
+
+<html lang="en" data-content_root="" >
+
+  <head>
+    <meta charset="utf-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.17.1: http://docutils.sourceforge.net/" />
+
+    <title>MimicBrush: Zero-shot Image Editing with Reference Imitation &#8212; Text-to-Image Generation-feat-Diffusion</title>
+  
+  
+  
+  <script data-cfasync="false">
+    document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
+    document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
+  </script>
+  
+  <!-- Loaded before other Sphinx assets -->
+  <link href="../../_static/styles/theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+<link href="../../_static/styles/bootstrap.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+<link href="../../_static/styles/pydata-sphinx-theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+
+  
+  <link href="../../_static/vendor/fontawesome/6.5.2/css/all.min.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+  <link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.woff2" />
+<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.woff2" />
+<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.woff2" />
+
+    <link rel="stylesheet" type="text/css" href="../../_static/pygments.css" />
+    <link rel="stylesheet" href="../../_static/styles/sphinx-book-theme.css?digest=14f4ca6b54d191a8c7657f6c759bf11a5fb86285" type="text/css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/togglebutton.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/mystnb.4510f1fc1dee50b3e5859aac5469c37c29e427902b24a333a5f9fcb2f0b3ac41.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/sphinx-thebe.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/design-style.4045f2051d55cab465a707391d5b2007.min.css" />
+  
+  <!-- Pre-loaded scripts that we'll load fully later -->
+  <link rel="preload" as="script" href="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b" />
+<link rel="preload" as="script" href="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b" />
+  <script src="../../_static/vendor/fontawesome/6.5.2/js/all.min.js?digest=dfe6caa3a7d634c4db9b"></script>
+
+    <script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
+    <script src="../../_static/jquery.js"></script>
+    <script src="../../_static/underscore.js"></script>
+    <script src="../../_static/_sphinx_javascript_frameworks_compat.js"></script>
+    <script src="../../_static/doctools.js"></script>
+    <script src="../../_static/clipboard.min.js"></script>
+    <script src="../../_static/copybutton.js"></script>
+    <script src="../../_static/scripts/sphinx-book-theme.js?digest=5a5c038af52cf7bc1a1ec88eea08e6366ee68824"></script>
+    <script>let toggleHintShow = 'Click to show';</script>
+    <script>let toggleHintHide = 'Click to hide';</script>
+    <script>let toggleOpenOnPrint = 'true';</script>
+    <script src="../../_static/togglebutton.js"></script>
+    <script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
+    <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
+    <script src="../../_static/design-tabs.js"></script>
+    <script>const THEBE_JS_URL = "https://unpkg.com/thebe@0.8.2/lib/index.js"
+const thebe_selector = ".thebe,.cell"
+const thebe_selector_input = "pre"
+const thebe_selector_output = ".output, .cell_output"
+</script>
+    <script async="async" src="../../_static/sphinx-thebe.js"></script>
+    <script>DOCUMENTATION_OPTIONS.pagename = 'docs/review/MimicBrush';</script>
+    <link rel="shortcut icon" href="../../_static/PseudoLab_logo.png"/>
+    <link rel="index" title="Index" href="../../genindex.html" />
+    <link rel="search" title="Search" href="../../search.html" />
+    <link rel="next" title="One-step Diffusion with Distribution Matching Distillation" href="one_step_diffusion_with_distribution_matching_distillation.html" />
+    <link rel="prev" title="LCM-LoRA: A Universal Stable-Diffusion Acceleration Module" href="LCM-LoRA.html" />
+  <meta name="viewport" content="width=device-width, initial-scale=1"/>
+  <meta name="docsearch:language" content="en"/>
+  </head>
+  
+  
+  <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
+
+  
+  
+  <div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
+  
+  <div id="pst-scroll-pixel-helper"></div>
+  
+  <button type="button" class="btn rounded-pill" id="pst-back-to-top">
+    <i class="fa-solid fa-arrow-up"></i>Back to top</button>
+
+  
+  <input type="checkbox"
+          class="sidebar-toggle"
+          id="pst-primary-sidebar-checkbox"/>
+  <label class="overlay overlay-primary" for="pst-primary-sidebar-checkbox"></label>
+  
+  <input type="checkbox"
+          class="sidebar-toggle"
+          id="pst-secondary-sidebar-checkbox"/>
+  <label class="overlay overlay-secondary" for="pst-secondary-sidebar-checkbox"></label>
+  
+  <div class="search-button__wrapper">
+    <div class="search-button__overlay"></div>
+    <div class="search-button__search-container">
+<form class="bd-search d-flex align-items-center"
+      action="../../search.html"
+      method="get">
+  <i class="fa-solid fa-magnifying-glass"></i>
+  <input type="search"
+         class="form-control"
+         name="q"
+         id="search-input"
+         placeholder="Search this book..."
+         aria-label="Search this book..."
+         autocomplete="off"
+         autocorrect="off"
+         autocapitalize="off"
+         spellcheck="false"/>
+  <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
+</form></div>
+  </div>
+
+  <div class="pst-async-banner-revealer d-none">
+  <aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
+</div>
+
+  
+    <header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
+    </header>
+  
+
+  <div class="bd-container">
+    <div class="bd-container__inner bd-page-width">
+      
+      
+      
+      <div class="bd-sidebar-primary bd-sidebar">
+        
+
+  
+  <div class="sidebar-header-items sidebar-primary__section">
+    
+    
+    
+    
+  </div>
+  
+    <div class="sidebar-primary-items__start sidebar-primary__section">
+        <div class="sidebar-primary-item">
+
+  
+    
+  
+
+<a class="navbar-brand logo" href="../../intro.html">
+  
+  
+  
+  
+  
+    
+    
+      
+    
+    
+    <img src="../../_static/PseudoLab_logo.png" class="logo__image only-light" alt="Text-to-Image Generation-feat-Diffusion - Home"/>
+    <script>document.write(`<img src="../../_static/PseudoLab_logo.png" class="logo__image only-dark" alt="Text-to-Image Generation-feat-Diffusion - Home"/>`);</script>
+  
+  
+</a></div>
+        <div class="sidebar-primary-item"><nav class="bd-links" id="bd-docs-nav" aria-label="Main">
+    <div class="bd-toc-item navbar-nav active">
+        
+        <ul class="nav bd-sidenav bd-sidenav__home-link">
+            <li class="toctree-l1">
+                <a class="reference internal" href="../../intro.html">
+                    Welcome to PseudoDiffusers!!
+                </a>
+            </li>
+        </ul>
+        <p aria-level="2" class="caption" role="heading"><span class="caption-text">Preliminary Works</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="vae.html">VAE</a></li>
+<li class="toctree-l1"><a class="reference internal" href="gan.html">GAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DDPM.html">DDPM</a></li>
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DDIM.html">DDIM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="A_Study_on_the_Evaluation_of_Generative_Models.html">A Study on the Evaluation of Generative Models</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Image Generation</span></p>
+<ul class="current nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="cycleGAN.html">CycleGAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="StyleGAN.html">StyleGAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="diffusion_beats_GANs.html">Diffusion Models Beat GANs on Image Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="dalle.html">DALL-E</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DALLE2.html">DALL-E 2</a></li>
+<li class="toctree-l1"><a class="reference internal" href="dreambooth.html">DreamBooth</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ControlNet.html">ControlNet</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Latent_Diffusion_Model.html">Introduction</a></li>
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Textual_Inversion.html">Textual Inversion</a></li>
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="CustomDiffusion.html">Custom Diffusion</a></li>
+<li class="toctree-l1"><a class="reference internal" href="LoRA.html">LoRA</a></li>
+
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="I-DDPM.html">I-DDPM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="StyO.html">StyO</a></li>
+<li class="toctree-l1"><a class="reference internal" href="imagen.html">Imagen</a></li>
+<li class="toctree-l1"><a class="reference internal" href="imagen_editor.html">Imagen Editor</a></li>
+<li class="toctree-l1"><a class="reference internal" href="SDEdit.html">SDEdit</a></li>
+<li class="toctree-l1"><a class="reference internal" href="SDXL.html">SDXL</a></li>
+<li class="toctree-l1"><a class="reference internal" href="t2i_adapter.html">T2I-Adapter</a></li>
+<li class="toctree-l1"><a class="reference internal" href="IP_Adapter.html">IP-Adapter</a></li>
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="HyperDreamBooth.html">HyperDreamBooth</a></li>
+<li class="toctree-l1"><a class="reference internal" href="CM3leon.html">CM3leon</a></li>
+
+<li class="toctree-l1"><a class="reference internal" href="Synthetic_Data_from_Diffusion_Models_Improves_ImageNet_Classification.html">Synthetic Data from Diffusion Models Improves ImageNet Classification</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="GLIDE.html">GLIDE</a></li>
+<li class="toctree-l1"><a class="reference internal" href="BBDM.html">BBDM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Your_Diffusion_Model_is_Secretly_a_Zero_Shot_Classifier.html">Your Diffusion Model is Secretly a Zero-Shot Classifier</a></li>
+<li class="toctree-l1"><a class="reference internal" href="progressive_distillation.html">Progressive Distillation for Fast Sampling of Diffusion Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ConceptLab.html">ConceptLab</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Diffusion_models_already_have_a_Semantic_Latent_Space.html">Diffusion Models already have a Semantic Latent Space</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Muse.html">Muse</a></li>
+
+
+<li class="toctree-l1"><a class="reference internal" href="GIGAGAN.html">Scaling up GANs for Text-to-Image Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="consistency_models.html">Consistency Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="latent_consistency_models.html">Latent Consistency Models</a></li>
+
+<li class="toctree-l1"><a class="reference internal" href="LLM_grounded_Diffusion.html">LLM Grounded Diffusion</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DiT.html">DiT</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="one-step-image-translation.html">One-Step Image Translation with Text-to-Image Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="LCM-LoRA.html">LCM-LoRA: A Universal Stable-Diffusion Acceleration Module</a></li>
+
+
+<li class="toctree-l1 current active"><a class="current reference internal" href="#">MimicBrush: Zero-shot Image Editing with Reference Imitation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="one_step_diffusion_with_distribution_matching_distillation.html">One-step Diffusion with Distribution Matching Distillation</a></li>
+
+
+
+
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Video Generation</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="Make_A_Video.html">Make A Video</a></li>
+<li class="toctree-l1"><a class="reference internal" href="VideoLDM.html">VideoLDM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="AnimateDiff.html">AnimateDiff</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Animate_Anyone.html">Animate Anyone</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreaMoving.html">DreaMoving</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreamPose.html">DreamPose: Fashion Image-to-Video Synthesis via Stable Diffusion</a></li>
+
+
+
+
+
+
+
+
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">3D Generation</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="NeRF.html">NeRF : Representing Scenes as Neural Radiance Fields for View Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="3DGS.html">3D Gaussian Splatting for Real-Time Radiance Field Rendering</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Point_E.html">Point-E: A System for Generating 3D Point Clouds from Complex Prompts (Arxiv 2022)</a></li>
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Shap-E.html">Shap-E</a></li>
+
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DreamFusion.html"><strong>DreamFusion</strong></a></li>
+<li class="toctree-l1"><a class="reference internal" href="magic-3d.html">Magic3D</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreamBooth3D.html">Dream Booth 3D</a></li>
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="zero123plus.html">Zero123++</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ProlificDreamer.html">ProlificDreamer: High-Fidelity and Diverse Text-to-3D Generation with Variational Score Distillation</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DreamGaussian.html">DreamGaussian</a></li>
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Coin3D.html">Coin3D</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Experiments</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="../experiments/js_exp.html">Synthetic Data with Stable Diffusion for Foliar Disease Classification</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../experiments/swjo_exp.html">Training DreamBooth on Naver Webtoon Face Dataset</a></li>
+</ul>
+
+    </div>
+</nav></div>
+    </div>
+  
+  
+  <div class="sidebar-primary-items__end sidebar-primary__section">
+  </div>
+  
+  <div id="rtd-footer-container"></div>
+
+
+      </div>
+      
+      <main id="main-content" class="bd-main" role="main">
+        
+        
+
+<div class="sbt-scroll-pixel-helper"></div>
+
+          <div class="bd-content">
+            <div class="bd-article-container">
+              
+              <div class="bd-header-article d-print-none">
+<div class="header-article-items header-article__inner">
+  
+    <div class="header-article-items__start">
+      
+        <div class="header-article-item"><label class="sidebar-toggle primary-toggle btn btn-sm" for="__primary" title="Toggle primary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
+  <span class="fa-solid fa-bars"></span>
+</label></div>
+      
+    </div>
+  
+  
+    <div class="header-article-items__end">
+      
+        <div class="header-article-item">
+
+<div class="article-header-buttons">
+
+
+
+
+
+<div class="dropdown dropdown-source-buttons">
+  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Source repositories">
+    <i class="fab fa-github"></i>
+  </button>
+  <ul class="dropdown-menu">
+      
+      
+      
+      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion" target="_blank"
+   class="btn btn-sm btn-source-repository-button dropdown-item"
+   title="Source repository"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fab fa-github"></i>
+  </span>
+<span class="btn__text-container">Repository</span>
+</a>
+</li>
+      
+      
+      
+      
+      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion/issues/new?title=Issue%20on%20page%20%2Fdocs/review/MimicBrush.html&body=Your%20issue%20content%20here." target="_blank"
+   class="btn btn-sm btn-source-issues-button dropdown-item"
+   title="Open an issue"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-lightbulb"></i>
+  </span>
+<span class="btn__text-container">Open issue</span>
+</a>
+</li>
+      
+  </ul>
+</div>
+
+
+
+
+
+
+<div class="dropdown dropdown-download-buttons">
+  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Download this page">
+    <i class="fas fa-download"></i>
+  </button>
+  <ul class="dropdown-menu">
+      
+      
+      
+      <li><a href="../../_sources/docs/review/MimicBrush.md" target="_blank"
+   class="btn btn-sm btn-download-source-button dropdown-item"
+   title="Download source file"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-file"></i>
+  </span>
+<span class="btn__text-container">.md</span>
+</a>
+</li>
+      
+      
+      
+      
+      <li>
+<button onclick="window.print()"
+  class="btn btn-sm btn-download-pdf-button dropdown-item"
+  title="Print to PDF"
+  data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-file-pdf"></i>
+  </span>
+<span class="btn__text-container">.pdf</span>
+</button>
+</li>
+      
+  </ul>
+</div>
+
+
+
+
+<button onclick="toggleFullScreen()"
+  class="btn btn-sm btn-fullscreen-button"
+  title="Fullscreen mode"
+  data-bs-placement="bottom" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-expand"></i>
+  </span>
+
+</button>
+
+
+
+<script>
+document.write(`
+  <button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light"></i>
+    <i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark"></i>
+    <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"></i>
+  </button>
+`);
+</script>
+
+
+<script>
+document.write(`
+  <button class="btn btn-sm pst-navbar-icon search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <i class="fa-solid fa-magnifying-glass fa-lg"></i>
+  </button>
+`);
+</script>
+<label class="sidebar-toggle secondary-toggle btn btn-sm" for="__secondary"title="Toggle secondary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <span class="fa-solid fa-list"></span>
+</label>
+</div></div>
+      
+    </div>
+  
+</div>
+</div>
+              
+              
+
+<div id="jb-print-docs-body" class="onlyprint">
+    <h1>MimicBrush: Zero-shot Image Editing with Reference Imitation</h1>
+    <!-- Table of contents -->
+    <div id="print-main-content">
+        <div id="jb-print-toc">
+            
+            <div>
+                <h2> Contents </h2>
+            </div>
+            <nav aria-label="Page">
+                <ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#three-lines-summary">Three Lines Summary</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">1. Introduction</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#method">2. Method</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#overall-pipeline">Overall Pipeline</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#model-structure">Model Structure</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#training-strategy">Training Strategy</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#evaluation-benchmark">Evaluation Benchmark</a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#experiments">Experiments</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#implementation-details">Implementation Details</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#comparisons-with-other-works">Comparisons with Other Works</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#ablation-studies">Ablation Studies</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#qualitative-analysis">Qualitative Analysis</a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#limitations">Limitations</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#review">Review</a></li>
+</ul>
+            </nav>
+        </div>
+    </div>
+</div>
+
+              
+                
+<div id="searchbox"></div>
+                <article class="bd-article">
+                  
+  <div class="admonition-information admonition">
+<p class="admonition-title">Information</p>
+<ul class="simple">
+<li><p><strong>Title:</strong> MimicBrush: Zero-shot Image Editing with Reference Imitation</p></li>
+<li><p><strong>Reference</strong></p>
+<ul>
+<li><p>Paper: <a class="reference external" href="https://arxiv.org/pdf/2406.07547">https://arxiv.org/pdf/2406.07547</a></p></li>
+<li><p>Code: <a class="reference external" href="https://github.com/ali-vilab/MimicBrush">Official</a></p></li>
+</ul>
+</li>
+<li><p><strong>Author:</strong> Chanyeong Shin</p></li>
+<li><p><strong>Last updated on Nov. 05, 2024</strong></p></li>
+</ul>
+</div>
+<section class="tex2jax_ignore mathjax_ignore" id="mimicbrush-zero-shot-image-editing-with-reference-imitation">
+<h1>MimicBrush: Zero-shot Image Editing with Reference Imitation<a class="headerlink" href="#mimicbrush-zero-shot-image-editing-with-reference-imitation" title="Permalink to this heading">#</a></h1>
+<section id="three-lines-summary">
+<h2>Three Lines Summary<a class="headerlink" href="#three-lines-summary" title="Permalink to this heading">#</a></h2>
+<ol class="arabic simple">
+<li><p>Edited Image 로 어떻게 자연스럽게 변해야 할지에 관한 <strong>“imitative editing” 에 관한 아이디어</strong></p></li>
+<li><p><strong>Source Image 와 Reference Image 간의 correspondence</strong> 를 이용해 바뀌어야 할 부분을 잘 가져오는 방법인 MimicBrush 라고 명명한 generative training framework 제안</p></li>
+<li><p><strong>SOTA 인 동시에 앞으로의 imitative editing 연구를 위한 evaluation benchmark</strong> 제안</p></li>
+</ol>
+</section>
+<section id="introduction">
+<h2>1. Introduction<a class="headerlink" href="#introduction" title="Permalink to this heading">#</a></h2>
+<ul class="simple">
+<li><p>Image editing 분야 자체가 굉장히 수많은 요구 조건과 시나리오를 만족하면서 수행되어야 했기에 기존의 방법들이 여전히 challenging 한 문제를 풀고 있음</p></li>
+<li><p>기존의 방법들은 현재 source image 와 함께 해당 mask 를 input 으로 넣어주고 있음 (이건 공통)</p>
+<ul>
+<li><p>Inpainting method</p>
+<ul>
+<li><p>editing 이라는 task 를 <strong>“Text Prompt”</strong> 하나로만 해결하고자 했는데 이건 원하는 결과를 뽑아내기에는 적절하지 않음</p></li>
+<li><p>My thoughts : 실제로 inpainting 은 refining 과 같이 자연스럽게 만들어주는 것 외에는 실무에서 잘 사용하지는 않았던 것 같음</p></li>
+</ul>
+</li>
+<li><p>Composition method</p>
+<ul>
+<li><p>ref image 와 ref mask/box 를 이용해 이를 해결하고자 하였는데, 아무래도 <strong>“individual object” 를 insertion</strong> 하는 작업처럼 느껴 모델이 어려워 할 만 했음</p>
+<ul>
+<li><p>shoe soles 이나 hair 와 같은 local components 나 로고나 texture 같은 local patterns</p></li>
+</ul>
+</li>
+<li><p>또한, Image 로부터 reference area 를 완벽하게 잘 추출하는 방법이 필요했음</p></li>
+<li><p>Local components 들은 또 전체 image 에 잘 어우러지게 하는 것도 고려해야 했고, 학습 과정에서 같은 object 인데 frame 에 따라 모양도 조금씩 달라서 이런 다양한 문제들을 풀어야 했음</p></li>
+</ul>
+</li>
+</ul>
+</li>
+<li><p>앞선 문제들을 해결하기 위해서, editing 을 할 수 있는 novel pipeline 인 <strong>imitative editing</strong> 을 제안</p>
+<ul>
+<li><p>Ref image 에서 mask 는 사용하지 않고 source image 의 mask area 부분을 ref image 에서 어디에 해당하는지 자동으로 찾고 모방할 수 있도록 하였음</p></li>
+</ul>
+</li>
+<li><p>Imitative editing 을 하기 위해서 <strong>MimicBrush 라고 하는 dual diffusion U-Nets network framework</strong> 를 제안</p>
+<ul>
+<li><p>self-supervised manner 로 학습함 → 같은 video 에서 source / ref image 를 뽑아서 학습에 활용하는 방식</p></li>
+<li><p>당연히 같은 video 에서 추출했기 때문에 semantic correspondence 와 visual variations 를 가지고 있을 것</p></li>
+<li><p>masked source image 는 imitative U-Net 으로 / reference image 는 reference U-Net 으로 통과 → 이후  reference U-Net 의 attention K,V 를 imitative U-Net 에 injection 시키는 방식</p></li>
+</ul>
+</li>
+<li><p>이 Image editing 방식을 이용하여 학습된 model 은 구도, 조명, 카테고리 자체가 달라도 잘 변환이 되는 결과를 보였으며, ref image 의 visual concepts 의 detail 도 잘 유지한 채로 가져오는 것을 확인하였음</p></li>
+<li><p>조금 더 comprehensive 한 evaluation 을 위해서 imitative editing 의 benchmark 까지 제안</p>
+<ul>
+<li><p>part composition : 얼마나 part 구성이 자연스럽게 잘 되었는가?</p></li>
+<li><p>texture transfer : 느낌을 얼마나 잘 유지하면서 texture 가 변환됐는가?</p></li>
+</ul>
+</li>
+</ul>
+</section>
+<section id="method">
+<h2>2. Method<a class="headerlink" href="#method" title="Permalink to this heading">#</a></h2>
+<figure class="align-default" id="id1">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/MimicBrush_1.png"><img alt="The training process of MimicBrush" class="bg-primary mb-1" src="../../_images/MimicBrush_1.png" style="width: 800px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 570 </span><span class="caption-text">The training process of MimicBrush</span><a class="headerlink" href="#id1" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<section id="overall-pipeline">
+<h3>Overall Pipeline<a class="headerlink" href="#overall-pipeline" title="Permalink to this heading">#</a></h3>
+<ul class="simple">
+<li><p><strong>Dual diffusion models architecture + self-supervised manner</strong></p></li>
+<li><p>Video data 에는 consistent content 를 가지고 있고 visual variations 까지 있기 때문에 이를 활용</p>
+<ul>
+<li><p>랜덤하게 video clip 에서 two frames 를 뽑아서 학습 sample 로 활용</p></li>
+</ul>
+</li>
+<li><p>source image 는 masking 시키고, ref image 는 masked source image 를 recover 할 수 있도록 도움을 주도록 넣음</p></li>
+<li><p>결국 MimicBrush 는 dog’s face 와 같은 corresponding visual information 이 위치하는 것을 학습하고 source image 의 masked area 를 repaint 시킴</p></li>
+<li><p>또한 source image 의 빈 부분을 채우는 과정에서 자연스럽게 blending 시켜야 하기 때문에 visual content 를 같은 환경의 포즈, 조명, 시점으로 변환하는 것도 학습</p></li>
+<li><p>앞서 언급했던 것처럼 dual branch 의 U-Nets 구조를 활용 → imitative and reference U-Net</p>
+<ul>
+<li><p>attention layers 의 K,V 는 서로 share 하고(실질적으로는 concat) reference image 로부터 indications 을 찾아 masked source image 를 만들도록 함</p></li>
+</ul>
+</li>
+<li><p>추가로, source &amp; ref image 에 variation 을 증가시키기 위해 augmentation 도 적용</p></li>
+<li><p>또한, optional condition 으로 imitative U-Net 에 depth map 도 줌</p>
+<ul>
+<li><p>inference 단계에서 object 의 shape 가 잘 유지됐는지 depth map 을 활용할지 말지를 결정할 수도 있게 하였음</p></li>
+</ul>
+</li>
+</ul>
+</section>
+<section id="model-structure">
+<h3>Model Structure<a class="headerlink" href="#model-structure" title="Permalink to this heading">#</a></h3>
+<ul class="simple">
+<li><p>Imitative U-Net</p>
+<ul>
+<li><p>Base : SD-1.5 inpainting model</p></li>
+<li><p>Input : 13 channels tensor</p>
+<ul>
+<li><p>image latent → 4 channels</p></li>
+<li><p>binary mask → 1 channel</p></li>
+<li><p>background latent → 4 channels</p></li>
+<li><p>depth latent → 4 channels</p></li>
+</ul>
+</li>
+<li><p>(Figure 에는 나와있지 않지만) 기존 original U-Net 은 CLIP 을 text embedding 으로 활용하는데, 본 논문은 CLIP 을 reference image 으로부터 뽑아낸 image embedding 을 cross-attention 에 활용</p>
+<ul>
+<li><p>image embedding 이후에 projection layer 을 거쳐 들어감</p></li>
+</ul>
+</li>
+<li><p>학습은 imitative U-Net 과 CLIP projection layer 의 parameters 들이 학습..</p></li>
+</ul>
+</li>
+<li><p>Reference U-Net</p>
+<ul>
+<li><p>최근에 굉장히 많은 연구들이 reference image 로부터 fine-grained features 를 뽑아내기 위해 additional U-Net 을 활용하는 것이 훨씬 더 효율적이라는 것을 증명하였음</p></li>
+<li><p>Base : SD-1.5</p></li>
+<li><p>reference features 를 imitative U-Net 의 middle &amp; upper stages 에 K,V 를 injection (concat) 시킴</p>
+<ul>
+<li><p>이를 통해, imitative U-Net 이 reference image 의 content 를 활용해 source image 의 masking 된 부분을 완성시킴</p></li>
+</ul>
+</li>
+</ul>
+</li>
+<li><p>Depth Model</p>
+<ul>
+<li><p>Depth Anything 으로 unmasked source image 의 depth map 을 뽑도록 했음</p></li>
+<li><p>Depth model 자체는 freeze 시키고, trainable projector 를 넣어 depth map 을 depth latent 인 4-channel 로 projection 시킬 수 있도록 함</p></li>
+<li><p>학습 중에는 depth model 의 input 을 0.5 확률로 drop 시킴으로써 inference 중에 optional 하게 shape control 을 가능하도록 함</p>
+<ul>
+<li><p>My thoughts : 아마 texture transfer task 를 위함 일듯</p></li>
+</ul>
+</li>
+</ul>
+</li>
+</ul>
+</section>
+<section id="training-strategy">
+<h3>Training Strategy<a class="headerlink" href="#training-strategy" title="Permalink to this heading">#</a></h3>
+<ul class="simple">
+<li><p>MimicBrush 의 cross-image imitation ability (서로 다른 카테고리에서 잘 모방해오는 능력 정도로 이해하면 됨) 를 끌어올리기 위해서는 조금 더 적합한 training sample 을 모아서 학습할 수 있는 방법이 필요했음</p></li>
+<li><p>Training data 를 구축하는 과정에서 두 가지의 철학을 지키려 하였음</p>
+<ol class="arabic simple">
+<li><p>source / reference images 들 간의 correspondence relation 이 존재해야 한다.</p></li>
+<li><p>robustness 를 위해 source / reference image 사이의 large variations 을 기대할 수 있어야 한다.</p></li>
+</ol>
+</li>
+<li><p>Data selection</p>
+<ul>
+<li><p>학습 중에는 같은 비디오로부터 frame 2개를 sampling 해서 뽑았음</p></li>
+<li><p>SSIM 을 이용해 video frames 간의 similarity 를 측정했고 너무 그 값이 크거나 작으면 filtering 했음</p></li>
+</ul>
+</li>
+<li><p>Data augmentation</p>
+<ul>
+<li><p>source &amp; reference image 의 variation 을 증가시키기 위해서, 강력한 data augmentation 을 활용</p>
+<ul>
+<li><p>color jitter, rotation, resizing, flipping, 심지어는 random projection transformation 으로 더욱 강한 deformation 도 수행</p></li>
+</ul>
+</li>
+</ul>
+</li>
+<li><p>Masking strategy</p>
+<ul>
+<li><p>가장 간단하게는 image 를 N x N grid 로 나누고 랜덤하게 masking 시키는 방법인데 저자들은 이렇게 랜덤하게 가져가면 easy cases 들이 많은 portion 을 차지한다는 것을 발견</p>
+<ul>
+<li><p>ex. 배경 같은 큰 area 를 차지하는 것들은 계속 반복되는 content/textures 이기 때문에 도움이 안됨</p></li>
+</ul>
+</li>
+<li><p>SIFT matching 을 이용해서 source &amp; ref image 의 matching points 를 얻고 그 matched feature points 의 grids 들을 좀 더 masking 하도록 하였음</p></li>
+<li><p>video 보다 high-quality image 를 찾는 것이 더 쉬웠기 때문에 static image 한 장을 가지고 augmentation 시킨 다음, seg map 가지고 masking 시키는 방식으로도 활용 → robustness 를 증가시키는 효과를 불러일으킴</p></li>
+</ul>
+</li>
+</ul>
+</section>
+<section id="evaluation-benchmark">
+<h3>Evaluation Benchmark<a class="headerlink" href="#evaluation-benchmark" title="Permalink to this heading">#</a></h3>
+<figure class="align-default" id="id2">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/MimicBrush_2.png"><img alt="Evaluation Benchmark" class="bg-primary mb-1" src="../../_images/MimicBrush_2.png" style="width: 800px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 571 </span><span class="caption-text">Evaluation Benchmark</span><a class="headerlink" href="#id2" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p>Imitative editing 은 굉장히 novel 한 task 이기 때문에 본 논문에서 성능을 evaluation 할 수 있는 benchmark 까지 제공</p></li>
+<li><p>Part composition</p>
+<ul>
+<li><p>source / ref image 간의 semantic correspondence 를 찾고 composition 시키는 task</p></li>
+<li><p>Inter-ID track</p>
+<ul>
+<li><p>Fashion, animal, product, scenario 등 다양한 data 를 각 topic 마다 30 장씩 모았음</p></li>
+<li><p>수동으로 source mask 를 다 그리고, 생성된 결과 GT 도 없기 때문에 직접 reference regions 과 text prompt 까지 다 annotation 시킴 = 노가다 했음</p></li>
+<li><p>reference region 과 생성된 region 사이의 similarity 도 DINO 와 CLIP image sim score 를 계산하도록 했고 edited image 와 text prompt 사이의 CLIP text similarity 도 report 해놓음</p></li>
+</ul>
+</li>
+<li><p>Inner-ID track</p>
+<ul>
+<li><p>DreamBooth 로부터 30 image paris 가져와서 source image masking 다 함</p></li>
+<li><p>GT 와는 SSIM, PSNR, LPIPS score 활용</p></li>
+</ul>
+</li>
+</ul>
+</li>
+<li><p>Texture transfer</p>
+<ul>
+<li><p>ref image 의 texture 나 pattern 을 얼마나 잘 transfer 했는지에 관한 task</p></li>
+<li><p>Additional condition 으로 depth map 을 활용</p></li>
+<li><p>Part composition 은 semantic correspondence 를 찾도록 하지만, 이 task 는 source shape 는 유지하면서 reference 의 texture 를 얼마나 잘 가져오는지를 판단하도록 objects 전체를 masking</p></li>
+</ul>
+</li>
+</ul>
+</section>
+</section>
+<section id="experiments">
+<h2>Experiments<a class="headerlink" href="#experiments" title="Permalink to this heading">#</a></h2>
+<section id="implementation-details">
+<h3>Implementation Details<a class="headerlink" href="#implementation-details" title="Permalink to this heading">#</a></h3>
+<ul class="simple">
+<li><p>Hyperparameters</p>
+<ul>
+<li><p>512x512 image 로 resolution 다 맞추도록 해서 학습</p></li>
+<li><p>Adam optimizer &amp; lr : 1e-5</p></li>
+<li><p>grid number N : 3~10 에서 randomly choose</p></li>
+<li><p>masking 은 SIFT-matched features points 는 75% 고르도록 하고, 나머지는 50% 고르도록 하였음</p></li>
+<li><p>Reference U-Net 에는 CFG 10% 확률로 drop 하도록 하였고, inference 시에 guidance scale 은 5</p></li>
+</ul>
+</li>
+<li><p>Training data</p>
+<ul>
+<li><p>Pexels 같은 websites 에서 100k video</p></li>
+<li><p>diversity 증가시키기 위해서 SAM dataset 도 활용 → 여기서 static image augmentation 적용</p></li>
+<li><p>학습 중에는 Pexels 70% , SAM 30%</p></li>
+</ul>
+</li>
+</ul>
+</section>
+<section id="comparisons-with-other-works">
+<h3>Comparisons with Other Works<a class="headerlink" href="#comparisons-with-other-works" title="Permalink to this heading">#</a></h3>
+<figure class="align-default" id="id3">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/MimicBrush_3.png"><img alt="Qualitative Result" class="bg-primary mb-1" src="../../_images/MimicBrush_3.png" style="width: 800px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 572 </span><span class="caption-text">Qualitative Result</span><a class="headerlink" href="#id3" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<figure class="align-default" id="id4">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/MimicBrush_4.png"><img alt="Quantitative Result" class="bg-primary mb-1" src="../../_images/MimicBrush_4.png" style="width: 800px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 573 </span><span class="caption-text">Quantitative Result</span><a class="headerlink" href="#id4" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</section>
+<section id="ablation-studies">
+<h3>Ablation Studies<a class="headerlink" href="#ablation-studies" title="Permalink to this heading">#</a></h3>
+<figure class="align-default" id="id5">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/MimicBrush_5.png"><img alt="Ablation Studies" class="bg-primary mb-1" src="../../_images/MimicBrush_5.png" style="width: 800px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 574 </span><span class="caption-text">Ablation Studies</span><a class="headerlink" href="#id5" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</section>
+<section id="qualitative-analysis">
+<h3>Qualitative Analysis<a class="headerlink" href="#qualitative-analysis" title="Permalink to this heading">#</a></h3>
+<figure class="align-default" id="id6">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/MimicBrush_6.png"><img alt="Qualitative Analysis" class="bg-primary mb-1" src="../../_images/MimicBrush_6.png" style="width: 800px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 575 </span><span class="caption-text">Qualitative Analysis</span><a class="headerlink" href="#id6" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</section>
+</section>
+<section id="limitations">
+<h2>Limitations<a class="headerlink" href="#limitations" title="Permalink to this heading">#</a></h2>
+<ul class="simple">
+<li><p>Robust performance.. But, region 이 너무 작거나 multiple candidates 가 있게 되면 실패하는 경우가 생기긴 했음</p>
+<ul>
+<li><p>이럴 때는 zoom in 을 해서 하는 것을 추천한다고 함</p></li>
+</ul>
+</li>
+<li><p>여전히 유해한 content 들을 editing 하는 경우도 있기 때문에 이를 필터링 할 수 있는 방법이 고안되어야 한다고 주장</p></li>
+</ul>
+</section>
+<section id="review">
+<h2>Review<a class="headerlink" href="#review" title="Permalink to this heading">#</a></h2>
+<aside>
+🔖
+<p><strong>Three Line Review</strong></p>
+<ol class="arabic simple">
+<li><p>논문 자체의 색깔이 역시 실무에 적합한 페이퍼다 !</p></li>
+<li><p>Evaluation benchmark 까지 제안하면서 새로운 field 를 열려고 하는 시도가 인상 깊었다</p></li>
+<li><p>paper 자체는 24.06 으로 따끈따끈한데 왜 SD 1.5 썼는지 이해가 잘 안 됐던.. 다른 baseline 도 써봤으면 더 좋았을 것 같다</p></li>
+</ol>
+</aside>
+</section>
+</section>
+
+    <script type="text/x-thebe-config">
+    {
+        requestKernel: true,
+        binderOptions: {
+            repo: "binder-examples/jupyter-stacks-datascience",
+            ref: "master",
+        },
+        codeMirrorConfig: {
+            theme: "abcdef",
+            mode: "python"
+        },
+        kernelOptions: {
+            name: "python3",
+            path: "./docs\review"
+        },
+        predefinedOutput: true
+    }
+    </script>
+    <script>kernelName = 'python3'</script>
+
+                </article>
+              
+
+              
+              
+              
+              
+                <footer class="prev-next-footer d-print-none">
+                  
+<div class="prev-next-area">
+    <a class="left-prev"
+       href="LCM-LoRA.html"
+       title="previous page">
+      <i class="fa-solid fa-angle-left"></i>
+      <div class="prev-next-info">
+        <p class="prev-next-subtitle">previous</p>
+        <p class="prev-next-title">LCM-LoRA: A Universal Stable-Diffusion Acceleration Module</p>
+      </div>
+    </a>
+    <a class="right-next"
+       href="one_step_diffusion_with_distribution_matching_distillation.html"
+       title="next page">
+      <div class="prev-next-info">
+        <p class="prev-next-subtitle">next</p>
+        <p class="prev-next-title">One-step Diffusion with Distribution Matching Distillation</p>
+      </div>
+      <i class="fa-solid fa-angle-right"></i>
+    </a>
+</div>
+                </footer>
+              
+            </div>
+            
+            
+              
+                <div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
+
+
+  <div class="sidebar-secondary-item">
+  <div class="page-toc tocsection onthispage">
+    <i class="fa-solid fa-list"></i> Contents
+  </div>
+  <nav class="bd-toc-nav page-toc">
+    <ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#three-lines-summary">Three Lines Summary</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">1. Introduction</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#method">2. Method</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#overall-pipeline">Overall Pipeline</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#model-structure">Model Structure</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#training-strategy">Training Strategy</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#evaluation-benchmark">Evaluation Benchmark</a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#experiments">Experiments</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#implementation-details">Implementation Details</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#comparisons-with-other-works">Comparisons with Other Works</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#ablation-studies">Ablation Studies</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#qualitative-analysis">Qualitative Analysis</a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#limitations">Limitations</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#review">Review</a></li>
+</ul>
+  </nav></div>
+
+</div></div>
+              
+            
+          </div>
+          <footer class="bd-footer-content">
+            
+<div class="bd-footer-content__inner container">
+  
+  <div class="footer-item">
+    
+<p class="component-author">
+By PseudoLab
+</p>
+
+  </div>
+  
+  <div class="footer-item">
+    
+
+  <p class="copyright">
+    
+      © Copyright 2022.
+      <br/>
+    
+  </p>
+
+  </div>
+  
+  <div class="footer-item">
+    
+  </div>
+  
+  <div class="footer-item">
+    
+  </div>
+  
+</div>
+          </footer>
+        
+
+      </main>
+    </div>
+  </div>
+  
+  <!-- Scripts loaded after <body> so the DOM is not blocked -->
+  <script src="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b"></script>
+<script src="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b"></script>
+
+  <footer class="bd-footer">
+  </footer>
+  </body>
 </html>
\ No newline at end of file
diff --git a/docs/review/Muse.html b/docs/review/Muse.html
old mode 100644
new mode 100755
index 0f42eb73..1a50cc15
--- a/docs/review/Muse.html
+++ b/docs/review/Muse.html
@@ -1,1049 +1,1069 @@
-
-
-<!DOCTYPE html>
-
-
-<html lang="en" data-content_root="" >
-
-  <head>
-    <meta charset="utf-8" />
-    <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.18.1: http://docutils.sourceforge.net/" />
-
-    <title>Muse &#8212; Text-to-Image Generation-feat-Diffusion</title>
-  
-  
-  
-  <script data-cfasync="false">
-    document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
-    document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
-  </script>
-  
-  <!-- Loaded before other Sphinx assets -->
-  <link href="../../_static/styles/theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-<link href="../../_static/styles/bootstrap.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-<link href="../../_static/styles/pydata-sphinx-theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-
-  
-  <link href="../../_static/vendor/fontawesome/6.5.2/css/all.min.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-  <link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.woff2" />
-<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.woff2" />
-<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.woff2" />
-
-    <link rel="stylesheet" type="text/css" href="../../_static/pygments.css" />
-    <link rel="stylesheet" href="../../_static/styles/sphinx-book-theme.css?digest=14f4ca6b54d191a8c7657f6c759bf11a5fb86285" type="text/css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/togglebutton.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/mystnb.4510f1fc1dee50b3e5859aac5469c37c29e427902b24a333a5f9fcb2f0b3ac41.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/sphinx-thebe.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/design-style.4045f2051d55cab465a707391d5b2007.min.css" />
-  
-  <!-- Pre-loaded scripts that we'll load fully later -->
-  <link rel="preload" as="script" href="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b" />
-<link rel="preload" as="script" href="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b" />
-  <script src="../../_static/vendor/fontawesome/6.5.2/js/all.min.js?digest=dfe6caa3a7d634c4db9b"></script>
-
-    <script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
-    <script src="../../_static/jquery.js"></script>
-    <script src="../../_static/underscore.js"></script>
-    <script src="../../_static/_sphinx_javascript_frameworks_compat.js"></script>
-    <script src="../../_static/doctools.js"></script>
-    <script src="../../_static/clipboard.min.js"></script>
-    <script src="../../_static/copybutton.js"></script>
-    <script src="../../_static/scripts/sphinx-book-theme.js?digest=5a5c038af52cf7bc1a1ec88eea08e6366ee68824"></script>
-    <script>let toggleHintShow = 'Click to show';</script>
-    <script>let toggleHintHide = 'Click to hide';</script>
-    <script>let toggleOpenOnPrint = 'true';</script>
-    <script src="../../_static/togglebutton.js"></script>
-    <script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
-    <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
-    <script src="../../_static/design-tabs.js"></script>
-    <script>const THEBE_JS_URL = "https://unpkg.com/thebe@0.8.2/lib/index.js"
-const thebe_selector = ".thebe,.cell"
-const thebe_selector_input = "pre"
-const thebe_selector_output = ".output, .cell_output"
-</script>
-    <script async="async" src="../../_static/sphinx-thebe.js"></script>
-    <script>window.MathJax = {"options": {"processHtmlClass": "tex2jax_process|mathjax_process|math|output_area"}}</script>
-    <script defer="defer" src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
-    <script>DOCUMENTATION_OPTIONS.pagename = 'docs/review/Muse';</script>
-    <link rel="shortcut icon" href="../../_static/PseudoLab_logo.png"/>
-    <link rel="index" title="Index" href="../../genindex.html" />
-    <link rel="search" title="Search" href="../../search.html" />
-    <link rel="next" title="Scaling up GANs for Text-to-Image Synthesis" href="GIGAGAN.html" />
-    <link rel="prev" title="Diffusion Models already have a Semantic Latent Space" href="Diffusion_models_already_have_a_Semantic_Latent_Space.html" />
-  <meta name="viewport" content="width=device-width, initial-scale=1"/>
-  <meta name="docsearch:language" content="en"/>
-  </head>
-  
-  
-  <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
-
-  
-  
-  <div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
-  
-  <div id="pst-scroll-pixel-helper"></div>
-  
-  <button type="button" class="btn rounded-pill" id="pst-back-to-top">
-    <i class="fa-solid fa-arrow-up"></i>Back to top</button>
-
-  
-  <input type="checkbox"
-          class="sidebar-toggle"
-          id="pst-primary-sidebar-checkbox"/>
-  <label class="overlay overlay-primary" for="pst-primary-sidebar-checkbox"></label>
-  
-  <input type="checkbox"
-          class="sidebar-toggle"
-          id="pst-secondary-sidebar-checkbox"/>
-  <label class="overlay overlay-secondary" for="pst-secondary-sidebar-checkbox"></label>
-  
-  <div class="search-button__wrapper">
-    <div class="search-button__overlay"></div>
-    <div class="search-button__search-container">
-<form class="bd-search d-flex align-items-center"
-      action="../../search.html"
-      method="get">
-  <i class="fa-solid fa-magnifying-glass"></i>
-  <input type="search"
-         class="form-control"
-         name="q"
-         id="search-input"
-         placeholder="Search this book..."
-         aria-label="Search this book..."
-         autocomplete="off"
-         autocorrect="off"
-         autocapitalize="off"
-         spellcheck="false"/>
-  <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
-</form></div>
-  </div>
-
-  <div class="pst-async-banner-revealer d-none">
-  <aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
-</div>
-
-  
-    <header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
-    </header>
-  
-
-  <div class="bd-container">
-    <div class="bd-container__inner bd-page-width">
-      
-      
-      
-      <div class="bd-sidebar-primary bd-sidebar">
-        
-
-  
-  <div class="sidebar-header-items sidebar-primary__section">
-    
-    
-    
-    
-  </div>
-  
-    <div class="sidebar-primary-items__start sidebar-primary__section">
-        <div class="sidebar-primary-item">
-
-  
-    
-  
-
-<a class="navbar-brand logo" href="../../intro.html">
-  
-  
-  
-  
-  
-    
-    
-      
-    
-    
-    <img src="../../_static/PseudoLab_logo.png" class="logo__image only-light" alt="Text-to-Image Generation-feat-Diffusion - Home"/>
-    <script>document.write(`<img src="../../_static/PseudoLab_logo.png" class="logo__image only-dark" alt="Text-to-Image Generation-feat-Diffusion - Home"/>`);</script>
-  
-  
-</a></div>
-        <div class="sidebar-primary-item"><nav class="bd-links" id="bd-docs-nav" aria-label="Main">
-    <div class="bd-toc-item navbar-nav active">
-        
-        <ul class="nav bd-sidenav bd-sidenav__home-link">
-            <li class="toctree-l1">
-                <a class="reference internal" href="../../intro.html">
-                    Welcome to PseudoDiffusers!!
-                </a>
-            </li>
-        </ul>
-        <p aria-level="2" class="caption" role="heading"><span class="caption-text">Preliminary Works</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="vae.html">VAE</a></li>
-<li class="toctree-l1"><a class="reference internal" href="gan.html">GAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DDPM.html">DDPM</a></li>
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="DDIM.html">DDIM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="A_Study_on_the_Evaluation_of_Generative_Models.html">A Study on the Evaluation of Generative Models</a></li>
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Image Generation</span></p>
-<ul class="current nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="cycleGAN.html">CycleGAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="StyleGAN.html">StyleGAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="diffusion_beats_GANs.html">Diffusion Models Beat GANs on Image Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="dalle.html">DALL-E</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DALLE2.html">DALL-E 2</a></li>
-<li class="toctree-l1"><a class="reference internal" href="dreambooth.html">DreamBooth</a></li>
-<li class="toctree-l1"><a class="reference internal" href="ControlNet.html">ControlNet</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Latent_Diffusion_Model.html">Introduction</a></li>
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="Textual_Inversion.html">Textual Inversion</a></li>
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="CustomDiffusion.html">Custom Diffusion</a></li>
-<li class="toctree-l1"><a class="reference internal" href="LoRA.html">LoRA</a></li>
-
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="I-DDPM.html">I-DDPM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="StyO.html">StyO</a></li>
-<li class="toctree-l1"><a class="reference internal" href="imagen.html">Imagen</a></li>
-<li class="toctree-l1"><a class="reference internal" href="imagen_editor.html">Imagen Editor</a></li>
-<li class="toctree-l1"><a class="reference internal" href="SDEdit.html">SDEdit</a></li>
-<li class="toctree-l1"><a class="reference internal" href="SDXL.html">SDXL</a></li>
-<li class="toctree-l1"><a class="reference internal" href="t2i_adapter.html">T2I-Adapter</a></li>
-<li class="toctree-l1"><a class="reference internal" href="IP_Adapter.html">IP-Adapter</a></li>
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="HyperDreamBooth.html">HyperDreamBooth</a></li>
-<li class="toctree-l1"><a class="reference internal" href="CM3leon.html">CM3leon</a></li>
-
-<li class="toctree-l1"><a class="reference internal" href="Synthetic_Data_from_Diffusion_Models_Improves_ImageNet_Classification.html">Synthetic Data from Diffusion Models Improves ImageNet Classification</a></li>
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="GLIDE.html">GLIDE</a></li>
-<li class="toctree-l1"><a class="reference internal" href="BBDM.html">BBDM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Your_Diffusion_Model_is_Secretly_a_Zero_Shot_Classifier.html">Your Diffusion Model is Secretly a Zero-Shot Classifier</a></li>
-<li class="toctree-l1"><a class="reference internal" href="progressive_distillation.html">Progressive Distillation for Fast Sampling of Diffusion Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="ConceptLab.html">ConceptLab</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Diffusion_models_already_have_a_Semantic_Latent_Space.html">Diffusion Models already have a Semantic Latent Space</a></li>
-<li class="toctree-l1 current active"><a class="current reference internal" href="#">Muse</a></li>
-
-
-<li class="toctree-l1"><a class="reference internal" href="GIGAGAN.html">Scaling up GANs for Text-to-Image Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="consistency_models.html">Consistency Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="latent_consistency_models.html">Latent Consistency Models</a></li>
-
-<li class="toctree-l1"><a class="reference internal" href="LLM_grounded_Diffusion.html">LLM Grounded Diffusion</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DiT.html">DiT</a></li>
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="one-step-image-translation.html">One-Step Image Translation with Text-to-Image Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="LCM-LoRA.html">LCM-LoRA: A Universal Stable-Diffusion Acceleration Module</a></li>
-
-
-<li class="toctree-l1"><a class="reference internal" href="MimicBrush.html">MimicBrush: Zero-shot Image Editing with Reference Imitation</a></li>
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Video Generation</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="Make_A_Video.html">Make A Video</a></li>
-<li class="toctree-l1"><a class="reference internal" href="VideoLDM.html">VideoLDM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="AnimateDiff.html">AnimateDiff</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Animate_Anyone.html">Animate Anyone</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreaMoving.html">DreaMoving</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreamPose.html">DreamPose: Fashion Image-to-Video Synthesis via Stable Diffusion</a></li>
-
-
-
-
-
-
-
-
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">3D Generation</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="NeRF.html">NeRF : Representing Scenes as Neural Radiance Fields for View Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="3DGS.html">3D Gaussian Splatting for Real-Time Radiance Field Rendering</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Point_E.html">Point-E: A System for Generating 3D Point Clouds from Complex Prompts (Arxiv 2022)</a></li>
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="Shap-E.html">Shap-E</a></li>
-
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="DreamFusion.html"><strong>DreamFusion</strong></a></li>
-<li class="toctree-l1"><a class="reference internal" href="magic-3d.html">Magic3D</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreamBooth3D.html">Dream Booth 3D</a></li>
-
-
-
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Experiments</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="../experiments/js_exp.html">Synthetic Data with Stable Diffusion for Foliar Disease Classification</a></li>
-<li class="toctree-l1"><a class="reference internal" href="../experiments/swjo_exp.html">Training DreamBooth on Naver Webtoon Face Dataset</a></li>
-</ul>
-
-    </div>
-</nav></div>
-    </div>
-  
-  
-  <div class="sidebar-primary-items__end sidebar-primary__section">
-  </div>
-  
-  <div id="rtd-footer-container"></div>
-
-
-      </div>
-      
-      <main id="main-content" class="bd-main" role="main">
-        
-        
-
-<div class="sbt-scroll-pixel-helper"></div>
-
-          <div class="bd-content">
-            <div class="bd-article-container">
-              
-              <div class="bd-header-article d-print-none">
-<div class="header-article-items header-article__inner">
-  
-    <div class="header-article-items__start">
-      
-        <div class="header-article-item"><label class="sidebar-toggle primary-toggle btn btn-sm" for="__primary" title="Toggle primary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
-  <span class="fa-solid fa-bars"></span>
-</label></div>
-      
-    </div>
-  
-  
-    <div class="header-article-items__end">
-      
-        <div class="header-article-item">
-
-<div class="article-header-buttons">
-
-
-
-
-
-<div class="dropdown dropdown-source-buttons">
-  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Source repositories">
-    <i class="fab fa-github"></i>
-  </button>
-  <ul class="dropdown-menu">
-      
-      
-      
-      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion" target="_blank"
-   class="btn btn-sm btn-source-repository-button dropdown-item"
-   title="Source repository"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fab fa-github"></i>
-  </span>
-<span class="btn__text-container">Repository</span>
-</a>
-</li>
-      
-      
-      
-      
-      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion/issues/new?title=Issue%20on%20page%20%2Fdocs/review/Muse.html&body=Your%20issue%20content%20here." target="_blank"
-   class="btn btn-sm btn-source-issues-button dropdown-item"
-   title="Open an issue"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-lightbulb"></i>
-  </span>
-<span class="btn__text-container">Open issue</span>
-</a>
-</li>
-      
-  </ul>
-</div>
-
-
-
-
-
-
-<div class="dropdown dropdown-download-buttons">
-  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Download this page">
-    <i class="fas fa-download"></i>
-  </button>
-  <ul class="dropdown-menu">
-      
-      
-      
-      <li><a href="../../_sources/docs/review/Muse.md" target="_blank"
-   class="btn btn-sm btn-download-source-button dropdown-item"
-   title="Download source file"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-file"></i>
-  </span>
-<span class="btn__text-container">.md</span>
-</a>
-</li>
-      
-      
-      
-      
-      <li>
-<button onclick="window.print()"
-  class="btn btn-sm btn-download-pdf-button dropdown-item"
-  title="Print to PDF"
-  data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-file-pdf"></i>
-  </span>
-<span class="btn__text-container">.pdf</span>
-</button>
-</li>
-      
-  </ul>
-</div>
-
-
-
-
-<button onclick="toggleFullScreen()"
-  class="btn btn-sm btn-fullscreen-button"
-  title="Fullscreen mode"
-  data-bs-placement="bottom" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-expand"></i>
-  </span>
-
-</button>
-
-
-
-<script>
-document.write(`
-  <button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light"></i>
-    <i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark"></i>
-    <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"></i>
-  </button>
-`);
-</script>
-
-
-<script>
-document.write(`
-  <button class="btn btn-sm pst-navbar-icon search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <i class="fa-solid fa-magnifying-glass fa-lg"></i>
-  </button>
-`);
-</script>
-<label class="sidebar-toggle secondary-toggle btn btn-sm" for="__secondary"title="Toggle secondary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <span class="fa-solid fa-list"></span>
-</label>
-</div></div>
-      
-    </div>
-  
-</div>
-</div>
-              
-              
-
-<div id="jb-print-docs-body" class="onlyprint">
-    <h1>Muse</h1>
-    <!-- Table of contents -->
-    <div id="print-main-content">
-        <div id="jb-print-toc">
-            
-            <div>
-                <h2> Contents </h2>
-            </div>
-            <nav aria-label="Page">
-                <ul class="visible nav section-nav flex-column">
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#">Muse</a><ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#masked-modeling">1. Masked modeling</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#model-architecture">2. Model Architecture</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#pre-trained-text-encoders">2.1. Pre-trained Text Encoders</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#semantic-tokenization-using-vqgan">2.2. Semantic Tokenization using VQGAN</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#base-model">2.3. Base Model</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#super-resolution-model">2.4. Super-Resolution Model</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#decoder-finetuning">2.5. Decoder Finetuning</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#classifier-free-guidance">2.7. Classifier Free Guidance</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#iterative-parallel-decoding-at-inference">2.8. Iterative Parallel Decoding at Inference</a></li>
-</ul>
-</li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#results">3. Results</a></li>
-</ul>
-</li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#contribution">Contribution</a></li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#q-a">Q&amp;A</a></li>
-</ul>
-
-            </nav>
-        </div>
-    </div>
-</div>
-
-              
-                
-<div id="searchbox"></div>
-                <article class="bd-article">
-                  
-  <div class="admonition-information admonition">
-<p class="admonition-title">Information</p>
-<ul class="simple">
-<li><p><strong>Title:</strong> Muse: Text-To-Image Generation via Masked Generative Transformers</p></li>
-<li><p><strong>Reference</strong></p>
-<ul>
-<li><p>Paper: <a class="reference external" href="https://arxiv.org/pdf/2301.00704.pdf">https://arxiv.org/pdf/2301.00704.pdf</a></p></li>
-<li><p>Code: X</p></li>
-</ul>
-</li>
-<li><p><strong>Author:</strong> Jun-Hyoung Lee</p></li>
-<li><p><strong>Last updated on Mar. 25. 2024</strong></p></li>
-</ul>
-</div>
-<section class="tex2jax_ignore mathjax_ignore" id="muse">
-<h1>Muse<a class="headerlink" href="#muse" title="Permalink to this heading">#</a></h1>
-<figure class="align-default" id="id1">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/fig_1.png"><img alt="fig_1" class="bg-primary mb-1" src="../../_images/fig_1.png" style="width: 600px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 451 </span><span class="caption-text">Figure 1</span><a class="headerlink" href="#id1" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p><strong>Muse: T2I transformer model + Masked Modeling</strong></p>
-<ul>
-<li><p>diffusion, autoregressive model 보다 효과적인 성능을 냄</p></li>
-<li><p><strong>discrete token space 에서 masked modeling 방식으로 학습</strong></p>
-<ul>
-<li><p>pretrained LLM(<em><strong>T5-XXL</strong></em>) 으로 부터 추출된 text embedding이 주어지고, 랜덤하게 masked image token 을 예측하는 방식으로 학습</p></li>
-</ul>
-</li>
-<li><p>Imagen, DALL-E 2 와 비교할 때, sampling iteration이 적어 <strong>빠른 inference</strong> 수행 가능</p></li>
-<li><p>LLM 을 사용해 <strong>fine-grained 한 정보</strong>를 추출하여 high-fidelity 이미지 생성을 할 수 있고, 시각적 concept(object, spatial 관계, 자세, 등)을 더 잘 이해할 수 있음</p></li>
-<li><p>Muse-900M, CC3M 에서 SOTA 달성, FID 6.06</p></li>
-<li><p>Muse-3B, zero-shot COCO 에서 FID 7.88 달성, CLIP score 0.32</p></li>
-<li><p>따로 <strong>파인튜닝 없이</strong> inpainting, outpainting, mask-free editing 이 가능함</p></li>
-</ul>
-</li>
-</ul>
-<section id="masked-modeling">
-<h2>1. Masked modeling<a class="headerlink" href="#masked-modeling" title="Permalink to this heading">#</a></h2>
-<ul class="simple">
-<li><p>[22.02] MaskGIT: Masked Generative Image Transformer</p>
-<ul>
-<li><p>CVPR 2022, Google Research</p></li>
-</ul>
-</li>
-</ul>
-<figure class="align-default" id="id2">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/maskgit_1.png"><img alt="fig_1" class="bg-primary mb-1" src="../../_images/maskgit_1.png" style="width: 600px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 452 </span><span class="caption-text">maskgit 1</span><a class="headerlink" href="#id2" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<figure class="align-default" id="id3">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/maskgit_2.png"><img alt="fig_1" class="bg-primary mb-1" src="../../_images/maskgit_2.png" style="width: 600px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 453 </span><span class="caption-text">maskgit 2</span><a class="headerlink" href="#id3" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p><strong>VQGAN</strong> 사용, <strong>non-autoregressive</strong> 디코딩 방식</p></li>
-<li><p>inference 시에 모든 마스킹된 토큰을 예측하지만, 신뢰도가 높은 토큰만 실제 디코딩됨</p>
-<ul>
-<li><p>따라서 autoregressive 모델의 <strong>256 step → 8 step</strong> 으로 줄여 inference 속도가 향상</p></li>
-</ul>
-</li>
-</ul>
-</section>
-<section id="model-architecture">
-<h2>2. Model Architecture<a class="headerlink" href="#model-architecture" title="Permalink to this heading">#</a></h2>
-<figure class="align-default" id="id4">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/fig_3.png"><img alt="fig_1" class="bg-primary mb-1" src="../../_images/fig_3.png" style="width: 600px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 454 </span><span class="caption-text">Figure 3</span><a class="headerlink" href="#id4" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ol class="arabic simple">
-<li><p><strong>VQGAN tokenizer model 사용</strong></p>
-<ul class="simple">
-<li><p>input image 가 discrete token 으로 인코딩되고, 그 후 디코딩되어 input 이미지와 유사하게 잘 복원되는 모델</p></li>
-<li><p>두 개의 VQGAN 사용 (256x256 저해상도 + 512x512 고해상도)</p>
-<ul>
-<li><p>첫 학습은 <strong>256x256 저해상도</strong>(16x16 latent) 학습</p></li>
-<li><p>이후 <strong>512x512 고해상도</strong>(64x64 latent) 학습 진행</p></li>
-</ul>
-</li>
-</ul>
-</li>
-<li><p><strong>Masked image model 사용</strong></p>
-<ul class="simple">
-<li><p>Muse 파라미터의 대부분이 masked image model 파라미터로 구성</p></li>
-<li><p>unmaked 토큰과 T5XXL text embedding 을 condition으로 masked 저해상도 토큰에 대해 예측 진행</p></li>
-</ul>
-</li>
-<li><p><strong>“Super-res” transformer model 사용</strong></p>
-<ul class="simple">
-<li><p>T5XXL text embedding 을 condition으로 저해상도 토큰을 고해상도 토큰으로 바꾸는데 사용</p></li>
-</ul>
-</li>
-</ol>
-<section id="pre-trained-text-encoders">
-<h3>2.1. Pre-trained Text Encoders<a class="headerlink" href="#pre-trained-text-encoders" title="Permalink to this heading">#</a></h3>
-<ul class="simple">
-<li><p><a class="reference external" href="https://arxiv.org/abs/2205.11487">Imagen</a> 에서 pretrained LLM 사용하면 효과적인 high-quality 의 이미지 생성 가능</p></li>
-<li><p><strong>풍부한 visual, semantic 정보를 추출</strong>할 수 있는 T5-XXL 사용</p>
-<ul>
-<li><p>objects (nouns), actions (verbs), visual properties (adjectives), spatial relationships (prepositions)</p></li>
-<li><p>Muse 가 이러한 정보를 이미지 생성을 위한 LLM embedding 에서 잘 mapping 을 할 수 있을 것이라고 가정</p>
-<ul>
-<li><p><a class="reference external" href="https://arxiv.org/abs/2209.15162">Linearly mapping from image to text space</a> 에서 선행 연구 진행</p></li>
-</ul>
-</li>
-</ul>
-</li>
-<li><p>인코딩 과정</p>
-<ol class="arabic simple">
-<li><p>4096 차원의 embedding vector를 얻음</p></li>
-<li><p>linearly projection 진행되어 base, super-res transformer에 입력되게 차원을 맞춤</p></li>
-</ol>
-</li>
-</ul>
-</section>
-<section id="semantic-tokenization-using-vqgan">
-<h3>2.2. Semantic Tokenization using VQGAN<a class="headerlink" href="#semantic-tokenization-using-vqgan" title="Permalink to this heading">#</a></h3>
-<ul class="simple">
-<li><p>VQGAN</p>
-<ul>
-<li><p>encoder + decoder</p></li>
-<li><p>encoder feature 를 vector quantization 이 진행된 후, codebook 으로 부터 매핑을 통해 디코딩이 진행</p></li>
-</ul>
-</li>
-<li><p>다른 해상도의 이미지를 인코딩할 수 있도록 encoder와 decoder 모두 <strong>convolutional layer</strong> 로 구성</p></li>
-<li><p>256x256 픽셀 이미지에 맞는 VQGAN 모델(base model)과 512x512 픽셀 이미지에 맞는 VQGAN 모델(super-res model) 구성</p></li>
-<li><p><a class="reference external" href="https://arxiv.org/abs/2012.09841">Taming transformers for high-resolution image synthesis</a> 에서 <strong>인코딩된 discrete 토큰이 low level noise를 무시하면서 high level semantic 함을 더 잘 capture 한다는 것을 연구 진행</strong></p>
-<ul>
-<li><p>이 때문에, <strong>cross-entropy loss 를 통해 masked 토큰을 예측</strong>하는데 사용할 수 있게됨</p></li>
-</ul>
-</li>
-</ul>
-</section>
-<section id="base-model">
-<h3>2.3. Base Model<a class="headerlink" href="#base-model" title="Permalink to this heading">#</a></h3>
-<ul class="simple">
-<li><p>base model</p>
-<ul>
-<li><p>projected T5 embedding + 이미지 토큰을 입력으로 한 <a class="reference external" href="https://arxiv.org/abs/2202.04200">masked transformer</a> 로 구성</p></li>
-<li><p>text embedding 은 unmasked, 이미지 토큰은 랜덤하게 masking 진행 → [MASK] 토큰으로 교체</p></li>
-</ul>
-</li>
-<li><p>이미지 토큰을 embedding 으로 선형적으로 mapping 진행(transformer 의 input/hidden 사이즈에 맞게) + positional embedding 도 포함</p></li>
-<li><p>transformer layer는 self-attention, cross-attention, MLP 블럭이 포함</p>
-<ul>
-<li><p>MLP 는 masked image embedding 을 logit 값으로 변경하는데 사용되고</p></li>
-<li><p>cross-entropy loss 는 ground truth 토큰과 함께 오차를 계산함</p></li>
-</ul>
-</li>
-<li><p>학습 때, base model은 각 step 마다 모든 masked tokens를 예측하지만,</p>
-<ul>
-<li><p>inference 에서는 퀄리티를 증가하기 위한 iterative 방식으로 mask 예측 진행</p></li>
-</ul>
-</li>
-</ul>
-</section>
-<section id="super-resolution-model">
-<h3>2.4. Super-Resolution Model<a class="headerlink" href="#super-resolution-model" title="Permalink to this heading">#</a></h3>
-<figure class="align-default" id="id5">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/fig_4.png"><img alt="fig_1" class="bg-primary mb-1" src="../../_images/fig_4.png" style="width: 600px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 455 </span><span class="caption-text">Figure 4</span><a class="headerlink" href="#id5" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul>
-<li><p>바로 512x512 로 예측하도록 모델을 구성했을 때, <strong>low level detail 에 더 포커싱</strong>되어 학습이 진행됨. → 따라서 위의 그림과 같이 계층적으로 설계했음</p></li>
-<li><p>base model은 16x16 latent map 을 생성하고, super resolution 모델이 base latent map 을 <strong>64x64 latent map 으로 upsampling</strong> 함</p>
-<ul class="simple">
-<li><p>base 모델이 학습이 완료되면, 그 이후에 super resolution 모델 학습 진행</p></li>
-</ul>
-</li>
-<li><p>Architecture</p>
-<figure class="align-default" id="id6">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/table_6.png"><img alt="fig_1" class="bg-primary mb-1" src="../../_images/table_6.png" style="width: 600px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 456 </span><span class="caption-text">Table 6</span><a class="headerlink" href="#id6" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</li>
-</ul>
-</section>
-<section id="decoder-finetuning">
-<h3>2.5. Decoder Finetuning<a class="headerlink" href="#decoder-finetuning" title="Permalink to this heading">#</a></h3>
-<ul class="simple">
-<li><p>디테일을 높이기 위해 residual layer를 더 추가하고 channel 늘림</p>
-<ul>
-<li><p>residual layer: <strong>2개 → 4개</strong>, channel: <strong>128 → 256</strong></p></li>
-</ul>
-</li>
-<li><p>encoder weight, codebook, base, super-res transformer 모델은 freezing</p></li>
-</ul>
-<figure class="align-default" id="id7">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/fig_13.png"><img alt="fig_1" class="bg-primary mb-1" src="../../_images/fig_13.png" style="width: 600px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 457 </span><span class="caption-text">Figure 13</span><a class="headerlink" href="#id7" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p>해당 그림에서는 표지판이 더 finetuned decoder 가 복원이 잘 됐음</p></li>
-</ul>
-</section>
-<section id="classifier-free-guidance">
-<h3>2.7. Classifier Free Guidance<a class="headerlink" href="#classifier-free-guidance" title="Permalink to this heading">#</a></h3>
-<ul class="simple">
-<li><p>이미지 생성 퀄리티와 text-image alignment 향상을 위해 도입</p></li>
-<li><p>학습 때, 랜덤하게 10% 만 text conditioning 을 제거</p>
-<ul>
-<li><p>inference</p>
-<ul>
-<li><p><span class="math notranslate nohighlight">\(\ell_g=(1+t) \ell_c-t \ell_u\)</span></p></li>
-<li><p><span class="math notranslate nohighlight">\(l_c\)</span>: conditional logit / <span class="math notranslate nohighlight">\(l_u\)</span>: unconditional logit / <span class="math notranslate nohighlight">\(t\)</span>: guidance scale</p></li>
-</ul>
-</li>
-</ul>
-</li>
-<li><p><strong>CFG 는 diversity ↔ fidelity 의 trade-off 관계</strong></p>
-<ul>
-<li><p>Muse 에서는 t 를 선형적으로 증가시키는 샘플링 과정을 거쳐 diversity 의 한계를 극복</p></li>
-<li><p>초반에는 guidance 가 없거나 낮게 해서 logit 값을 설정하고, 후반에는 conditional prompt 가 가능하게 많은 가중치를 주게 된다.</p></li>
-<li><p>unconditional logit → negative prompt 로도 사용 가능</p></li>
-</ul>
-</li>
-</ul>
-</section>
-<section id="iterative-parallel-decoding-at-inference">
-<h3>2.8. Iterative Parallel Decoding at Inference<a class="headerlink" href="#iterative-parallel-decoding-at-inference" title="Permalink to this heading">#</a></h3>
-<ul class="simple">
-<li><p>Muse 의 시간 효율성</p>
-<ul>
-<li><p>parallel decoding 으로 인해 <strong>한 번의 foward 연산으로 multiple token 을 예측</strong>하는 방식으로 동작함</p>
-<ul>
-<li><p>Markovian 속성: 많은 토큰이 주어진 다른 토큰에 대해 conditionally independent 함
-→ parallel decoding 가능</p></li>
-</ul>
-</li>
-</ul>
-</li>
-<li><p><a class="reference external" href="https://arxiv.org/abs/2202.04200">Maskgit</a> 논문 에서 Decoding 은 cosine schedule 에 의해 수행됨</p>
-<ul>
-<li><p>해당 step 에서 예측되는 가장 높은 신뢰도의 masked 토큰을 선택해 decoding 진행됨</p></li>
-<li><p>그 후 decoding 된 것은 masking 이 해제되는 방식</p></li>
-</ul>
-</li>
-<li><p>이러한 절차를 따라서, Muse 에서는 base 모델의 256 토큰은 24 step 을 사용하고, super-res 모델의 4096 토큰은 8 step 만 사용</p>
-<ul>
-<li><p><a class="reference external" href="https://arxiv.org/pdf/2206.10789.pdf">Scaling Autoregressive Models for Content-Rich Text-to-Image Generation</a> 에서는 256 or 4096 step 이 필요하고,</p></li>
-<li><p>diffusion 모델에서는 수백번의 step 이 필요한 것에 비해 Muse 가 빠른 inference 를 수행 가능</p></li>
-</ul>
-</li>
-</ul>
-<figure class="align-default" id="id8">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/fig_5.png"><img alt="fig_1" class="bg-primary mb-1" src="../../_images/fig_5.png" style="width: 600px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 458 </span><span class="caption-text">Figure 5</span><a class="headerlink" href="#id8" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</section>
-</section>
-<section id="results">
-<h2>3. Results<a class="headerlink" href="#results" title="Permalink to this heading">#</a></h2>
-<ul class="simple">
-<li><p>Imagen dataset</p>
-<ul>
-<li><p>460M text-image pairs</p></li>
-</ul>
-</li>
-<li><p>train step: 1M</p></li>
-<li><p>train time: 1 week</p></li>
-<li><p>batch size: 512 on 512-core TPU-v4 chips</p></li>
-<li><p>Adafactor optimizer</p></li>
-</ul>
-<figure class="align-default" id="id9">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/fig_6.png"><img alt="fig_1" class="bg-primary mb-1" src="../../_images/fig_6.png" style="width: 600px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 459 </span><span class="caption-text">Figure 6</span><a class="headerlink" href="#id9" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p>cardinality: 동일한 객체를 여러 번 생성할 때, Muse 는 크기, 색상, 회전된 모습</p></li>
-</ul>
-<figure class="align-default" id="id10">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/fig_7.png"><img alt="fig_1" class="bg-primary mb-1" src="../../_images/fig_7.png" style="width: 600px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 460 </span><span class="caption-text">Figure 7</span><a class="headerlink" href="#id10" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p>정량적 평가</p></li>
-</ul>
-<p>:::{figure-md}
-<img title="" src="../../pics/Muse/table_6.png" alt="fig_1" class="bg-primary mb-1" width="600">        Table 6
-        :::</p>
-<ul>
-<li><p>FID(diversity) ↔ CLIP score(image-text alignment)</p>
-<figure class="align-default" id="id11">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/fig_8.png"><img alt="fig_1" class="bg-primary mb-1" src="../../_images/fig_8.png" style="width: 317px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 461 </span><span class="caption-text">Figure 8</span><a class="headerlink" href="#id11" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</li>
-<li><p>inpainting, outpainting</p>
-<figure class="align-default" id="id12">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/fig_10.png"><img alt="fig_1" class="bg-primary mb-1" src="../../_images/fig_10.png" style="width: 600px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 462 </span><span class="caption-text">Figure 10</span><a class="headerlink" href="#id12" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</li>
-</ul>
-</section>
-</section>
-<section class="tex2jax_ignore mathjax_ignore" id="contribution">
-<h1>Contribution<a class="headerlink" href="#contribution" title="Permalink to this heading">#</a></h1>
-<ol class="arabic simple">
-<li><p><strong>FID, CLIP score</strong> 기반으로 text-to-image 모델에 대한 SOTA 를 달성</p>
-<ul class="simple">
-<li><p>이미지 생성 퀄리티, 다양성, text prompt와의 alignment 측정했음</p></li>
-</ul>
-</li>
-<li><p>quantized 이미지 토큰과 <strong>parallel decoding</strong> 으로 인해 <strong>빠른 inference</strong> 가 가능</p></li>
-<li><p>inpainting, outpainting, mask-free editing 을 포함한 <strong>zero-shot editing</strong> 가능</p></li>
-</ol>
-</section>
-<section class="tex2jax_ignore mathjax_ignore" id="q-a">
-<h1>Q&amp;A<a class="headerlink" href="#q-a" title="Permalink to this heading">#</a></h1>
-<ol class="arabic simple">
-<li><p>Muse 와 같은 transformer 기반의 generation 모델에서는 어떻게 <strong>diversity</strong> 한 결과를 가져올 수 있나요?</p>
-<ol class="arabic simple">
-<li><p>아무래도 Muse 는 random latent 에서 생성하는 것이 아니라 text-to-image 모델이라, text 에 따라서 다양한 이미지 생성 결과가 나타날 수 있을 것 같습니다.</p></li>
-</ol>
-</li>
-<li><p>Muse 는 결국 GAN 모델인가요?</p>
-<ol class="arabic simple">
-<li><p>기준점이 어떻냐에 따라 GAN 이다, 아니다, 라고 정하기 어려울 것 같습니다. VQGAN을 사용해서 GAN이라고 생각할 수 도 있고, GAN 처럼 random latent 결과에 따라 이미지 생성이 달라질 수 있는 관점에서 생각하면 아니다라고 말할 수 있을 것 같습니다.</p></li>
-</ol>
-</li>
-<li><p>Token 은 어떤 의미를 갖나요?</p>
-<ol class="arabic simple">
-<li><p>VQGAN에서 input 이미지를 인코딩하고, vector-quantization 과정을 거쳐 압축 후, codebook의 값을 가져와 feature를 구성하는데요, 이때 feature에 포함되어 있는 하나의 포인트에 해당하는 것이 token이라고 생각하시면 될 것 같습니다.</p></li>
-</ol>
-</li>
-<li><p>텍스트 프롬프트를 넣었을때 실제 이미지 생성은 어떻게 이뤄지나요? Inference에서는 입력 이미지가 없는데 base transformer에 입력 이미지에 대한 masked token대신 뭐가 들어가게 되나요?</p>
-<ol class="arabic simple">
-<li><p>실제 inference 과정에서는 input 이미지가 없기 때문에 모두 마스크된 형태로 입력되게 됩니다. text prompt 의 condition 에 따라 각 step을 거쳐 decoding 이 수행됩니다.</p></li>
-</ol>
-</li>
-<li><p>text embedding이 어떻게 objective function 수식에 들어가나요?</p>
-<ol class="arabic simple">
-<li><p>base transformer 에 대해 text embedding 값이 key, value로 입력되어 cross-attention 이 수행되게 됩니다. 그렇게 예측된 feature와 GT의 feature 끼리 cross entropy loss를 통해 마스크 예측할 수 있는 base transformer 가 학습이 됩니다.</p></li>
-</ol>
-</li>
-</ol>
-</section>
-
-    <script type="text/x-thebe-config">
-    {
-        requestKernel: true,
-        binderOptions: {
-            repo: "binder-examples/jupyter-stacks-datascience",
-            ref: "master",
-        },
-        codeMirrorConfig: {
-            theme: "abcdef",
-            mode: "python"
-        },
-        kernelOptions: {
-            name: "python3",
-            path: "./docs/review"
-        },
-        predefinedOutput: true
-    }
-    </script>
-    <script>kernelName = 'python3'</script>
-
-                </article>
-              
-
-              
-              
-              
-              
-                <footer class="prev-next-footer d-print-none">
-                  
-<div class="prev-next-area">
-    <a class="left-prev"
-       href="Diffusion_models_already_have_a_Semantic_Latent_Space.html"
-       title="previous page">
-      <i class="fa-solid fa-angle-left"></i>
-      <div class="prev-next-info">
-        <p class="prev-next-subtitle">previous</p>
-        <p class="prev-next-title">Diffusion Models already have a Semantic Latent Space</p>
-      </div>
-    </a>
-    <a class="right-next"
-       href="GIGAGAN.html"
-       title="next page">
-      <div class="prev-next-info">
-        <p class="prev-next-subtitle">next</p>
-        <p class="prev-next-title">Scaling up GANs for Text-to-Image Synthesis</p>
-      </div>
-      <i class="fa-solid fa-angle-right"></i>
-    </a>
-</div>
-                </footer>
-              
-            </div>
-            
-            
-              
-                <div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
-
-
-  <div class="sidebar-secondary-item">
-  <div class="page-toc tocsection onthispage">
-    <i class="fa-solid fa-list"></i> Contents
-  </div>
-  <nav class="bd-toc-nav page-toc">
-    <ul class="visible nav section-nav flex-column">
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#">Muse</a><ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#masked-modeling">1. Masked modeling</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#model-architecture">2. Model Architecture</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#pre-trained-text-encoders">2.1. Pre-trained Text Encoders</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#semantic-tokenization-using-vqgan">2.2. Semantic Tokenization using VQGAN</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#base-model">2.3. Base Model</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#super-resolution-model">2.4. Super-Resolution Model</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#decoder-finetuning">2.5. Decoder Finetuning</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#classifier-free-guidance">2.7. Classifier Free Guidance</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#iterative-parallel-decoding-at-inference">2.8. Iterative Parallel Decoding at Inference</a></li>
-</ul>
-</li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#results">3. Results</a></li>
-</ul>
-</li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#contribution">Contribution</a></li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#q-a">Q&amp;A</a></li>
-</ul>
-
-  </nav></div>
-
-</div></div>
-              
-            
-          </div>
-          <footer class="bd-footer-content">
-            
-<div class="bd-footer-content__inner container">
-  
-  <div class="footer-item">
-    
-<p class="component-author">
-By PseudoLab
-</p>
-
-  </div>
-  
-  <div class="footer-item">
-    
-
-  <p class="copyright">
-    
-      © Copyright 2022.
-      <br/>
-    
-  </p>
-
-  </div>
-  
-  <div class="footer-item">
-    
-  </div>
-  
-  <div class="footer-item">
-    
-  </div>
-  
-</div>
-          </footer>
-        
-
-      </main>
-    </div>
-  </div>
-  
-  <!-- Scripts loaded after <body> so the DOM is not blocked -->
-  <script src="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b"></script>
-<script src="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b"></script>
-
-  <footer class="bd-footer">
-  </footer>
-  </body>
+
+
+<!DOCTYPE html>
+
+
+<html lang="en" data-content_root="" >
+
+  <head>
+    <meta charset="utf-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.17.1: http://docutils.sourceforge.net/" />
+
+    <title>Muse &#8212; Text-to-Image Generation-feat-Diffusion</title>
+  
+  
+  
+  <script data-cfasync="false">
+    document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
+    document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
+  </script>
+  
+  <!-- Loaded before other Sphinx assets -->
+  <link href="../../_static/styles/theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+<link href="../../_static/styles/bootstrap.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+<link href="../../_static/styles/pydata-sphinx-theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+
+  
+  <link href="../../_static/vendor/fontawesome/6.5.2/css/all.min.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+  <link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.woff2" />
+<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.woff2" />
+<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.woff2" />
+
+    <link rel="stylesheet" type="text/css" href="../../_static/pygments.css" />
+    <link rel="stylesheet" href="../../_static/styles/sphinx-book-theme.css?digest=14f4ca6b54d191a8c7657f6c759bf11a5fb86285" type="text/css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/togglebutton.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/mystnb.4510f1fc1dee50b3e5859aac5469c37c29e427902b24a333a5f9fcb2f0b3ac41.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/sphinx-thebe.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/design-style.4045f2051d55cab465a707391d5b2007.min.css" />
+  
+  <!-- Pre-loaded scripts that we'll load fully later -->
+  <link rel="preload" as="script" href="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b" />
+<link rel="preload" as="script" href="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b" />
+  <script src="../../_static/vendor/fontawesome/6.5.2/js/all.min.js?digest=dfe6caa3a7d634c4db9b"></script>
+
+    <script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
+    <script src="../../_static/jquery.js"></script>
+    <script src="../../_static/underscore.js"></script>
+    <script src="../../_static/_sphinx_javascript_frameworks_compat.js"></script>
+    <script src="../../_static/doctools.js"></script>
+    <script src="../../_static/clipboard.min.js"></script>
+    <script src="../../_static/copybutton.js"></script>
+    <script src="../../_static/scripts/sphinx-book-theme.js?digest=5a5c038af52cf7bc1a1ec88eea08e6366ee68824"></script>
+    <script>let toggleHintShow = 'Click to show';</script>
+    <script>let toggleHintHide = 'Click to hide';</script>
+    <script>let toggleOpenOnPrint = 'true';</script>
+    <script src="../../_static/togglebutton.js"></script>
+    <script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
+    <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
+    <script src="../../_static/design-tabs.js"></script>
+    <script>const THEBE_JS_URL = "https://unpkg.com/thebe@0.8.2/lib/index.js"
+const thebe_selector = ".thebe,.cell"
+const thebe_selector_input = "pre"
+const thebe_selector_output = ".output, .cell_output"
+</script>
+    <script async="async" src="../../_static/sphinx-thebe.js"></script>
+    <script>window.MathJax = {"options": {"processHtmlClass": "tex2jax_process|mathjax_process|math|output_area"}}</script>
+    <script defer="defer" src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
+    <script>DOCUMENTATION_OPTIONS.pagename = 'docs/review/Muse';</script>
+    <link rel="shortcut icon" href="../../_static/PseudoLab_logo.png"/>
+    <link rel="index" title="Index" href="../../genindex.html" />
+    <link rel="search" title="Search" href="../../search.html" />
+    <link rel="next" title="Scaling up GANs for Text-to-Image Synthesis" href="GIGAGAN.html" />
+    <link rel="prev" title="Diffusion Models already have a Semantic Latent Space" href="Diffusion_models_already_have_a_Semantic_Latent_Space.html" />
+  <meta name="viewport" content="width=device-width, initial-scale=1"/>
+  <meta name="docsearch:language" content="en"/>
+  </head>
+  
+  
+  <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
+
+  
+  
+  <div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
+  
+  <div id="pst-scroll-pixel-helper"></div>
+  
+  <button type="button" class="btn rounded-pill" id="pst-back-to-top">
+    <i class="fa-solid fa-arrow-up"></i>Back to top</button>
+
+  
+  <input type="checkbox"
+          class="sidebar-toggle"
+          id="pst-primary-sidebar-checkbox"/>
+  <label class="overlay overlay-primary" for="pst-primary-sidebar-checkbox"></label>
+  
+  <input type="checkbox"
+          class="sidebar-toggle"
+          id="pst-secondary-sidebar-checkbox"/>
+  <label class="overlay overlay-secondary" for="pst-secondary-sidebar-checkbox"></label>
+  
+  <div class="search-button__wrapper">
+    <div class="search-button__overlay"></div>
+    <div class="search-button__search-container">
+<form class="bd-search d-flex align-items-center"
+      action="../../search.html"
+      method="get">
+  <i class="fa-solid fa-magnifying-glass"></i>
+  <input type="search"
+         class="form-control"
+         name="q"
+         id="search-input"
+         placeholder="Search this book..."
+         aria-label="Search this book..."
+         autocomplete="off"
+         autocorrect="off"
+         autocapitalize="off"
+         spellcheck="false"/>
+  <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
+</form></div>
+  </div>
+
+  <div class="pst-async-banner-revealer d-none">
+  <aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
+</div>
+
+  
+    <header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
+    </header>
+  
+
+  <div class="bd-container">
+    <div class="bd-container__inner bd-page-width">
+      
+      
+      
+      <div class="bd-sidebar-primary bd-sidebar">
+        
+
+  
+  <div class="sidebar-header-items sidebar-primary__section">
+    
+    
+    
+    
+  </div>
+  
+    <div class="sidebar-primary-items__start sidebar-primary__section">
+        <div class="sidebar-primary-item">
+
+  
+    
+  
+
+<a class="navbar-brand logo" href="../../intro.html">
+  
+  
+  
+  
+  
+    
+    
+      
+    
+    
+    <img src="../../_static/PseudoLab_logo.png" class="logo__image only-light" alt="Text-to-Image Generation-feat-Diffusion - Home"/>
+    <script>document.write(`<img src="../../_static/PseudoLab_logo.png" class="logo__image only-dark" alt="Text-to-Image Generation-feat-Diffusion - Home"/>`);</script>
+  
+  
+</a></div>
+        <div class="sidebar-primary-item"><nav class="bd-links" id="bd-docs-nav" aria-label="Main">
+    <div class="bd-toc-item navbar-nav active">
+        
+        <ul class="nav bd-sidenav bd-sidenav__home-link">
+            <li class="toctree-l1">
+                <a class="reference internal" href="../../intro.html">
+                    Welcome to PseudoDiffusers!!
+                </a>
+            </li>
+        </ul>
+        <p aria-level="2" class="caption" role="heading"><span class="caption-text">Preliminary Works</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="vae.html">VAE</a></li>
+<li class="toctree-l1"><a class="reference internal" href="gan.html">GAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DDPM.html">DDPM</a></li>
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DDIM.html">DDIM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="A_Study_on_the_Evaluation_of_Generative_Models.html">A Study on the Evaluation of Generative Models</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Image Generation</span></p>
+<ul class="current nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="cycleGAN.html">CycleGAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="StyleGAN.html">StyleGAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="diffusion_beats_GANs.html">Diffusion Models Beat GANs on Image Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="dalle.html">DALL-E</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DALLE2.html">DALL-E 2</a></li>
+<li class="toctree-l1"><a class="reference internal" href="dreambooth.html">DreamBooth</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ControlNet.html">ControlNet</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Latent_Diffusion_Model.html">Introduction</a></li>
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Textual_Inversion.html">Textual Inversion</a></li>
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="CustomDiffusion.html">Custom Diffusion</a></li>
+<li class="toctree-l1"><a class="reference internal" href="LoRA.html">LoRA</a></li>
+
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="I-DDPM.html">I-DDPM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="StyO.html">StyO</a></li>
+<li class="toctree-l1"><a class="reference internal" href="imagen.html">Imagen</a></li>
+<li class="toctree-l1"><a class="reference internal" href="imagen_editor.html">Imagen Editor</a></li>
+<li class="toctree-l1"><a class="reference internal" href="SDEdit.html">SDEdit</a></li>
+<li class="toctree-l1"><a class="reference internal" href="SDXL.html">SDXL</a></li>
+<li class="toctree-l1"><a class="reference internal" href="t2i_adapter.html">T2I-Adapter</a></li>
+<li class="toctree-l1"><a class="reference internal" href="IP_Adapter.html">IP-Adapter</a></li>
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="HyperDreamBooth.html">HyperDreamBooth</a></li>
+<li class="toctree-l1"><a class="reference internal" href="CM3leon.html">CM3leon</a></li>
+
+<li class="toctree-l1"><a class="reference internal" href="Synthetic_Data_from_Diffusion_Models_Improves_ImageNet_Classification.html">Synthetic Data from Diffusion Models Improves ImageNet Classification</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="GLIDE.html">GLIDE</a></li>
+<li class="toctree-l1"><a class="reference internal" href="BBDM.html">BBDM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Your_Diffusion_Model_is_Secretly_a_Zero_Shot_Classifier.html">Your Diffusion Model is Secretly a Zero-Shot Classifier</a></li>
+<li class="toctree-l1"><a class="reference internal" href="progressive_distillation.html">Progressive Distillation for Fast Sampling of Diffusion Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ConceptLab.html">ConceptLab</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Diffusion_models_already_have_a_Semantic_Latent_Space.html">Diffusion Models already have a Semantic Latent Space</a></li>
+<li class="toctree-l1 current active"><a class="current reference internal" href="#">Muse</a></li>
+
+
+<li class="toctree-l1"><a class="reference internal" href="GIGAGAN.html">Scaling up GANs for Text-to-Image Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="consistency_models.html">Consistency Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="latent_consistency_models.html">Latent Consistency Models</a></li>
+
+<li class="toctree-l1"><a class="reference internal" href="LLM_grounded_Diffusion.html">LLM Grounded Diffusion</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DiT.html">DiT</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="one-step-image-translation.html">One-Step Image Translation with Text-to-Image Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="LCM-LoRA.html">LCM-LoRA: A Universal Stable-Diffusion Acceleration Module</a></li>
+
+
+<li class="toctree-l1"><a class="reference internal" href="MimicBrush.html">MimicBrush: Zero-shot Image Editing with Reference Imitation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="one_step_diffusion_with_distribution_matching_distillation.html">One-step Diffusion with Distribution Matching Distillation</a></li>
+
+
+
+
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Video Generation</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="Make_A_Video.html">Make A Video</a></li>
+<li class="toctree-l1"><a class="reference internal" href="VideoLDM.html">VideoLDM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="AnimateDiff.html">AnimateDiff</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Animate_Anyone.html">Animate Anyone</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreaMoving.html">DreaMoving</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreamPose.html">DreamPose: Fashion Image-to-Video Synthesis via Stable Diffusion</a></li>
+
+
+
+
+
+
+
+
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">3D Generation</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="NeRF.html">NeRF : Representing Scenes as Neural Radiance Fields for View Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="3DGS.html">3D Gaussian Splatting for Real-Time Radiance Field Rendering</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Point_E.html">Point-E: A System for Generating 3D Point Clouds from Complex Prompts (Arxiv 2022)</a></li>
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Shap-E.html">Shap-E</a></li>
+
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DreamFusion.html"><strong>DreamFusion</strong></a></li>
+<li class="toctree-l1"><a class="reference internal" href="magic-3d.html">Magic3D</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreamBooth3D.html">Dream Booth 3D</a></li>
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="zero123plus.html">Zero123++</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ProlificDreamer.html">ProlificDreamer: High-Fidelity and Diverse Text-to-3D Generation with Variational Score Distillation</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DreamGaussian.html">DreamGaussian</a></li>
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Coin3D.html">Coin3D</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Experiments</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="../experiments/js_exp.html">Synthetic Data with Stable Diffusion for Foliar Disease Classification</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../experiments/swjo_exp.html">Training DreamBooth on Naver Webtoon Face Dataset</a></li>
+</ul>
+
+    </div>
+</nav></div>
+    </div>
+  
+  
+  <div class="sidebar-primary-items__end sidebar-primary__section">
+  </div>
+  
+  <div id="rtd-footer-container"></div>
+
+
+      </div>
+      
+      <main id="main-content" class="bd-main" role="main">
+        
+        
+
+<div class="sbt-scroll-pixel-helper"></div>
+
+          <div class="bd-content">
+            <div class="bd-article-container">
+              
+              <div class="bd-header-article d-print-none">
+<div class="header-article-items header-article__inner">
+  
+    <div class="header-article-items__start">
+      
+        <div class="header-article-item"><label class="sidebar-toggle primary-toggle btn btn-sm" for="__primary" title="Toggle primary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
+  <span class="fa-solid fa-bars"></span>
+</label></div>
+      
+    </div>
+  
+  
+    <div class="header-article-items__end">
+      
+        <div class="header-article-item">
+
+<div class="article-header-buttons">
+
+
+
+
+
+<div class="dropdown dropdown-source-buttons">
+  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Source repositories">
+    <i class="fab fa-github"></i>
+  </button>
+  <ul class="dropdown-menu">
+      
+      
+      
+      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion" target="_blank"
+   class="btn btn-sm btn-source-repository-button dropdown-item"
+   title="Source repository"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fab fa-github"></i>
+  </span>
+<span class="btn__text-container">Repository</span>
+</a>
+</li>
+      
+      
+      
+      
+      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion/issues/new?title=Issue%20on%20page%20%2Fdocs/review/Muse.html&body=Your%20issue%20content%20here." target="_blank"
+   class="btn btn-sm btn-source-issues-button dropdown-item"
+   title="Open an issue"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-lightbulb"></i>
+  </span>
+<span class="btn__text-container">Open issue</span>
+</a>
+</li>
+      
+  </ul>
+</div>
+
+
+
+
+
+
+<div class="dropdown dropdown-download-buttons">
+  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Download this page">
+    <i class="fas fa-download"></i>
+  </button>
+  <ul class="dropdown-menu">
+      
+      
+      
+      <li><a href="../../_sources/docs/review/Muse.md" target="_blank"
+   class="btn btn-sm btn-download-source-button dropdown-item"
+   title="Download source file"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-file"></i>
+  </span>
+<span class="btn__text-container">.md</span>
+</a>
+</li>
+      
+      
+      
+      
+      <li>
+<button onclick="window.print()"
+  class="btn btn-sm btn-download-pdf-button dropdown-item"
+  title="Print to PDF"
+  data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-file-pdf"></i>
+  </span>
+<span class="btn__text-container">.pdf</span>
+</button>
+</li>
+      
+  </ul>
+</div>
+
+
+
+
+<button onclick="toggleFullScreen()"
+  class="btn btn-sm btn-fullscreen-button"
+  title="Fullscreen mode"
+  data-bs-placement="bottom" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-expand"></i>
+  </span>
+
+</button>
+
+
+
+<script>
+document.write(`
+  <button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light"></i>
+    <i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark"></i>
+    <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"></i>
+  </button>
+`);
+</script>
+
+
+<script>
+document.write(`
+  <button class="btn btn-sm pst-navbar-icon search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <i class="fa-solid fa-magnifying-glass fa-lg"></i>
+  </button>
+`);
+</script>
+<label class="sidebar-toggle secondary-toggle btn btn-sm" for="__secondary"title="Toggle secondary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <span class="fa-solid fa-list"></span>
+</label>
+</div></div>
+      
+    </div>
+  
+</div>
+</div>
+              
+              
+
+<div id="jb-print-docs-body" class="onlyprint">
+    <h1>Muse</h1>
+    <!-- Table of contents -->
+    <div id="print-main-content">
+        <div id="jb-print-toc">
+            
+            <div>
+                <h2> Contents </h2>
+            </div>
+            <nav aria-label="Page">
+                <ul class="visible nav section-nav flex-column">
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#">Muse</a><ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#masked-modeling">1. Masked modeling</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#model-architecture">2. Model Architecture</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#pre-trained-text-encoders">2.1. Pre-trained Text Encoders</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#semantic-tokenization-using-vqgan">2.2. Semantic Tokenization using VQGAN</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#base-model">2.3. Base Model</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#super-resolution-model">2.4. Super-Resolution Model</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#decoder-finetuning">2.5. Decoder Finetuning</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#classifier-free-guidance">2.7. Classifier Free Guidance</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#iterative-parallel-decoding-at-inference">2.8. Iterative Parallel Decoding at Inference</a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#results">3. Results</a></li>
+</ul>
+</li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#contribution">Contribution</a></li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#q-a">Q&amp;A</a></li>
+</ul>
+
+            </nav>
+        </div>
+    </div>
+</div>
+
+              
+                
+<div id="searchbox"></div>
+                <article class="bd-article">
+                  
+  <div class="admonition-information admonition">
+<p class="admonition-title">Information</p>
+<ul class="simple">
+<li><p><strong>Title:</strong> Muse: Text-To-Image Generation via Masked Generative Transformers</p></li>
+<li><p><strong>Reference</strong></p>
+<ul>
+<li><p>Paper: <a class="reference external" href="https://arxiv.org/pdf/2301.00704.pdf">https://arxiv.org/pdf/2301.00704.pdf</a></p></li>
+<li><p>Code: X</p></li>
+</ul>
+</li>
+<li><p><strong>Author:</strong> Jun-Hyoung Lee</p></li>
+<li><p><strong>Last updated on Mar. 25. 2024</strong></p></li>
+</ul>
+</div>
+<section class="tex2jax_ignore mathjax_ignore" id="muse">
+<h1>Muse<a class="headerlink" href="#muse" title="Permalink to this heading">#</a></h1>
+<figure class="align-default" id="id1">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/fig_1.png"><img alt="fig_1" class="bg-primary mb-1" src="../../_images/fig_1.png" style="width: 600px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 451 </span><span class="caption-text">Figure 1</span><a class="headerlink" href="#id1" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p><strong>Muse: T2I transformer model + Masked Modeling</strong></p>
+<ul>
+<li><p>diffusion, autoregressive model 보다 효과적인 성능을 냄</p></li>
+<li><p><strong>discrete token space 에서 masked modeling 방식으로 학습</strong></p>
+<ul>
+<li><p>pretrained LLM(<em><strong>T5-XXL</strong></em>) 으로 부터 추출된 text embedding이 주어지고, 랜덤하게 masked image token 을 예측하는 방식으로 학습</p></li>
+</ul>
+</li>
+<li><p>Imagen, DALL-E 2 와 비교할 때, sampling iteration이 적어 <strong>빠른 inference</strong> 수행 가능</p></li>
+<li><p>LLM 을 사용해 <strong>fine-grained 한 정보</strong>를 추출하여 high-fidelity 이미지 생성을 할 수 있고, 시각적 concept(object, spatial 관계, 자세, 등)을 더 잘 이해할 수 있음</p></li>
+<li><p>Muse-900M, CC3M 에서 SOTA 달성, FID 6.06</p></li>
+<li><p>Muse-3B, zero-shot COCO 에서 FID 7.88 달성, CLIP score 0.32</p></li>
+<li><p>따로 <strong>파인튜닝 없이</strong> inpainting, outpainting, mask-free editing 이 가능함</p></li>
+</ul>
+</li>
+</ul>
+<section id="masked-modeling">
+<h2>1. Masked modeling<a class="headerlink" href="#masked-modeling" title="Permalink to this heading">#</a></h2>
+<ul class="simple">
+<li><p>[22.02] MaskGIT: Masked Generative Image Transformer</p>
+<ul>
+<li><p>CVPR 2022, Google Research</p></li>
+</ul>
+</li>
+</ul>
+<figure class="align-default" id="id2">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/maskgit_1.png"><img alt="fig_1" class="bg-primary mb-1" src="../../_images/maskgit_1.png" style="width: 600px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 452 </span><span class="caption-text">maskgit 1</span><a class="headerlink" href="#id2" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<figure class="align-default" id="id3">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/maskgit_2.png"><img alt="fig_1" class="bg-primary mb-1" src="../../_images/maskgit_2.png" style="width: 600px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 453 </span><span class="caption-text">maskgit 2</span><a class="headerlink" href="#id3" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p><strong>VQGAN</strong> 사용, <strong>non-autoregressive</strong> 디코딩 방식</p></li>
+<li><p>inference 시에 모든 마스킹된 토큰을 예측하지만, 신뢰도가 높은 토큰만 실제 디코딩됨</p>
+<ul>
+<li><p>따라서 autoregressive 모델의 <strong>256 step → 8 step</strong> 으로 줄여 inference 속도가 향상</p></li>
+</ul>
+</li>
+</ul>
+</section>
+<section id="model-architecture">
+<h2>2. Model Architecture<a class="headerlink" href="#model-architecture" title="Permalink to this heading">#</a></h2>
+<figure class="align-default" id="id4">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/fig_3.png"><img alt="fig_1" class="bg-primary mb-1" src="../../_images/fig_3.png" style="width: 600px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 454 </span><span class="caption-text">Figure 3</span><a class="headerlink" href="#id4" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ol class="arabic simple">
+<li><p><strong>VQGAN tokenizer model 사용</strong></p>
+<ul class="simple">
+<li><p>input image 가 discrete token 으로 인코딩되고, 그 후 디코딩되어 input 이미지와 유사하게 잘 복원되는 모델</p></li>
+<li><p>두 개의 VQGAN 사용 (256x256 저해상도 + 512x512 고해상도)</p>
+<ul>
+<li><p>첫 학습은 <strong>256x256 저해상도</strong>(16x16 latent) 학습</p></li>
+<li><p>이후 <strong>512x512 고해상도</strong>(64x64 latent) 학습 진행</p></li>
+</ul>
+</li>
+</ul>
+</li>
+<li><p><strong>Masked image model 사용</strong></p>
+<ul class="simple">
+<li><p>Muse 파라미터의 대부분이 masked image model 파라미터로 구성</p></li>
+<li><p>unmaked 토큰과 T5XXL text embedding 을 condition으로 masked 저해상도 토큰에 대해 예측 진행</p></li>
+</ul>
+</li>
+<li><p><strong>“Super-res” transformer model 사용</strong></p>
+<ul class="simple">
+<li><p>T5XXL text embedding 을 condition으로 저해상도 토큰을 고해상도 토큰으로 바꾸는데 사용</p></li>
+</ul>
+</li>
+</ol>
+<section id="pre-trained-text-encoders">
+<h3>2.1. Pre-trained Text Encoders<a class="headerlink" href="#pre-trained-text-encoders" title="Permalink to this heading">#</a></h3>
+<ul class="simple">
+<li><p><a class="reference external" href="https://arxiv.org/abs/2205.11487">Imagen</a> 에서 pretrained LLM 사용하면 효과적인 high-quality 의 이미지 생성 가능</p></li>
+<li><p><strong>풍부한 visual, semantic 정보를 추출</strong>할 수 있는 T5-XXL 사용</p>
+<ul>
+<li><p>objects (nouns), actions (verbs), visual properties (adjectives), spatial relationships (prepositions)</p></li>
+<li><p>Muse 가 이러한 정보를 이미지 생성을 위한 LLM embedding 에서 잘 mapping 을 할 수 있을 것이라고 가정</p>
+<ul>
+<li><p><a class="reference external" href="https://arxiv.org/abs/2209.15162">Linearly mapping from image to text space</a> 에서 선행 연구 진행</p></li>
+</ul>
+</li>
+</ul>
+</li>
+<li><p>인코딩 과정</p>
+<ol class="arabic simple">
+<li><p>4096 차원의 embedding vector를 얻음</p></li>
+<li><p>linearly projection 진행되어 base, super-res transformer에 입력되게 차원을 맞춤</p></li>
+</ol>
+</li>
+</ul>
+</section>
+<section id="semantic-tokenization-using-vqgan">
+<h3>2.2. Semantic Tokenization using VQGAN<a class="headerlink" href="#semantic-tokenization-using-vqgan" title="Permalink to this heading">#</a></h3>
+<ul class="simple">
+<li><p>VQGAN</p>
+<ul>
+<li><p>encoder + decoder</p></li>
+<li><p>encoder feature 를 vector quantization 이 진행된 후, codebook 으로 부터 매핑을 통해 디코딩이 진행</p></li>
+</ul>
+</li>
+<li><p>다른 해상도의 이미지를 인코딩할 수 있도록 encoder와 decoder 모두 <strong>convolutional layer</strong> 로 구성</p></li>
+<li><p>256x256 픽셀 이미지에 맞는 VQGAN 모델(base model)과 512x512 픽셀 이미지에 맞는 VQGAN 모델(super-res model) 구성</p></li>
+<li><p><a class="reference external" href="https://arxiv.org/abs/2012.09841">Taming transformers for high-resolution image synthesis</a> 에서 <strong>인코딩된 discrete 토큰이 low level noise를 무시하면서 high level semantic 함을 더 잘 capture 한다는 것을 연구 진행</strong></p>
+<ul>
+<li><p>이 때문에, <strong>cross-entropy loss 를 통해 masked 토큰을 예측</strong>하는데 사용할 수 있게됨</p></li>
+</ul>
+</li>
+</ul>
+</section>
+<section id="base-model">
+<h3>2.3. Base Model<a class="headerlink" href="#base-model" title="Permalink to this heading">#</a></h3>
+<ul class="simple">
+<li><p>base model</p>
+<ul>
+<li><p>projected T5 embedding + 이미지 토큰을 입력으로 한 <a class="reference external" href="https://arxiv.org/abs/2202.04200">masked transformer</a> 로 구성</p></li>
+<li><p>text embedding 은 unmasked, 이미지 토큰은 랜덤하게 masking 진행 → [MASK] 토큰으로 교체</p></li>
+</ul>
+</li>
+<li><p>이미지 토큰을 embedding 으로 선형적으로 mapping 진행(transformer 의 input/hidden 사이즈에 맞게) + positional embedding 도 포함</p></li>
+<li><p>transformer layer는 self-attention, cross-attention, MLP 블럭이 포함</p>
+<ul>
+<li><p>MLP 는 masked image embedding 을 logit 값으로 변경하는데 사용되고</p></li>
+<li><p>cross-entropy loss 는 ground truth 토큰과 함께 오차를 계산함</p></li>
+</ul>
+</li>
+<li><p>학습 때, base model은 각 step 마다 모든 masked tokens를 예측하지만,</p>
+<ul>
+<li><p>inference 에서는 퀄리티를 증가하기 위한 iterative 방식으로 mask 예측 진행</p></li>
+</ul>
+</li>
+</ul>
+</section>
+<section id="super-resolution-model">
+<h3>2.4. Super-Resolution Model<a class="headerlink" href="#super-resolution-model" title="Permalink to this heading">#</a></h3>
+<figure class="align-default" id="id5">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/fig_4.png"><img alt="fig_1" class="bg-primary mb-1" src="../../_images/fig_4.png" style="width: 600px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 455 </span><span class="caption-text">Figure 4</span><a class="headerlink" href="#id5" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul>
+<li><p>바로 512x512 로 예측하도록 모델을 구성했을 때, <strong>low level detail 에 더 포커싱</strong>되어 학습이 진행됨. → 따라서 위의 그림과 같이 계층적으로 설계했음</p></li>
+<li><p>base model은 16x16 latent map 을 생성하고, super resolution 모델이 base latent map 을 <strong>64x64 latent map 으로 upsampling</strong> 함</p>
+<ul class="simple">
+<li><p>base 모델이 학습이 완료되면, 그 이후에 super resolution 모델 학습 진행</p></li>
+</ul>
+</li>
+<li><p>Architecture</p>
+<figure class="align-default" id="id6">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/table_6.png"><img alt="fig_1" class="bg-primary mb-1" src="../../_images/table_6.png" style="width: 600px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 456 </span><span class="caption-text">Table 6</span><a class="headerlink" href="#id6" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</li>
+</ul>
+</section>
+<section id="decoder-finetuning">
+<h3>2.5. Decoder Finetuning<a class="headerlink" href="#decoder-finetuning" title="Permalink to this heading">#</a></h3>
+<ul class="simple">
+<li><p>디테일을 높이기 위해 residual layer를 더 추가하고 channel 늘림</p>
+<ul>
+<li><p>residual layer: <strong>2개 → 4개</strong>, channel: <strong>128 → 256</strong></p></li>
+</ul>
+</li>
+<li><p>encoder weight, codebook, base, super-res transformer 모델은 freezing</p></li>
+</ul>
+<figure class="align-default" id="id7">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/fig_13.png"><img alt="fig_1" class="bg-primary mb-1" src="../../_images/fig_13.png" style="width: 600px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 457 </span><span class="caption-text">Figure 13</span><a class="headerlink" href="#id7" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p>해당 그림에서는 표지판이 더 finetuned decoder 가 복원이 잘 됐음</p></li>
+</ul>
+</section>
+<section id="classifier-free-guidance">
+<h3>2.7. Classifier Free Guidance<a class="headerlink" href="#classifier-free-guidance" title="Permalink to this heading">#</a></h3>
+<ul class="simple">
+<li><p>이미지 생성 퀄리티와 text-image alignment 향상을 위해 도입</p></li>
+<li><p>학습 때, 랜덤하게 10% 만 text conditioning 을 제거</p>
+<ul>
+<li><p>inference</p>
+<ul>
+<li><p><span class="math notranslate nohighlight">\(\ell_g=(1+t) \ell_c-t \ell_u\)</span></p></li>
+<li><p><span class="math notranslate nohighlight">\(l_c\)</span>: conditional logit / <span class="math notranslate nohighlight">\(l_u\)</span>: unconditional logit / <span class="math notranslate nohighlight">\(t\)</span>: guidance scale</p></li>
+</ul>
+</li>
+</ul>
+</li>
+<li><p><strong>CFG 는 diversity ↔ fidelity 의 trade-off 관계</strong></p>
+<ul>
+<li><p>Muse 에서는 t 를 선형적으로 증가시키는 샘플링 과정을 거쳐 diversity 의 한계를 극복</p></li>
+<li><p>초반에는 guidance 가 없거나 낮게 해서 logit 값을 설정하고, 후반에는 conditional prompt 가 가능하게 많은 가중치를 주게 된다.</p></li>
+<li><p>unconditional logit → negative prompt 로도 사용 가능</p></li>
+</ul>
+</li>
+</ul>
+</section>
+<section id="iterative-parallel-decoding-at-inference">
+<h3>2.8. Iterative Parallel Decoding at Inference<a class="headerlink" href="#iterative-parallel-decoding-at-inference" title="Permalink to this heading">#</a></h3>
+<ul class="simple">
+<li><p>Muse 의 시간 효율성</p>
+<ul>
+<li><p>parallel decoding 으로 인해 <strong>한 번의 foward 연산으로 multiple token 을 예측</strong>하는 방식으로 동작함</p>
+<ul>
+<li><p>Markovian 속성: 많은 토큰이 주어진 다른 토큰에 대해 conditionally independent 함
+→ parallel decoding 가능</p></li>
+</ul>
+</li>
+</ul>
+</li>
+<li><p><a class="reference external" href="https://arxiv.org/abs/2202.04200">Maskgit</a> 논문 에서 Decoding 은 cosine schedule 에 의해 수행됨</p>
+<ul>
+<li><p>해당 step 에서 예측되는 가장 높은 신뢰도의 masked 토큰을 선택해 decoding 진행됨</p></li>
+<li><p>그 후 decoding 된 것은 masking 이 해제되는 방식</p></li>
+</ul>
+</li>
+<li><p>이러한 절차를 따라서, Muse 에서는 base 모델의 256 토큰은 24 step 을 사용하고, super-res 모델의 4096 토큰은 8 step 만 사용</p>
+<ul>
+<li><p><a class="reference external" href="https://arxiv.org/pdf/2206.10789.pdf">Scaling Autoregressive Models for Content-Rich Text-to-Image Generation</a> 에서는 256 or 4096 step 이 필요하고,</p></li>
+<li><p>diffusion 모델에서는 수백번의 step 이 필요한 것에 비해 Muse 가 빠른 inference 를 수행 가능</p></li>
+</ul>
+</li>
+</ul>
+<figure class="align-default" id="id8">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/fig_5.png"><img alt="fig_1" class="bg-primary mb-1" src="../../_images/fig_5.png" style="width: 600px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 458 </span><span class="caption-text">Figure 5</span><a class="headerlink" href="#id8" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</section>
+</section>
+<section id="results">
+<h2>3. Results<a class="headerlink" href="#results" title="Permalink to this heading">#</a></h2>
+<ul class="simple">
+<li><p>Imagen dataset</p>
+<ul>
+<li><p>460M text-image pairs</p></li>
+</ul>
+</li>
+<li><p>train step: 1M</p></li>
+<li><p>train time: 1 week</p></li>
+<li><p>batch size: 512 on 512-core TPU-v4 chips</p></li>
+<li><p>Adafactor optimizer</p></li>
+</ul>
+<figure class="align-default" id="id9">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/fig_6.png"><img alt="fig_1" class="bg-primary mb-1" src="../../_images/fig_6.png" style="width: 600px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 459 </span><span class="caption-text">Figure 6</span><a class="headerlink" href="#id9" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p>cardinality: 동일한 객체를 여러 번 생성할 때, Muse 는 크기, 색상, 회전된 모습</p></li>
+</ul>
+<figure class="align-default" id="id10">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/fig_7.png"><img alt="fig_1" class="bg-primary mb-1" src="../../_images/fig_7.png" style="width: 600px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 460 </span><span class="caption-text">Figure 7</span><a class="headerlink" href="#id10" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p>정량적 평가</p></li>
+</ul>
+<p>:::{figure-md}
+<img title="" src="../../pics/Muse/table_6.png" alt="fig_1" class="bg-primary mb-1" width="600">        Table 6
+        :::</p>
+<ul>
+<li><p>FID(diversity) ↔ CLIP score(image-text alignment)</p>
+<figure class="align-default" id="id11">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/fig_8.png"><img alt="fig_1" class="bg-primary mb-1" src="../../_images/fig_8.png" style="width: 317px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 461 </span><span class="caption-text">Figure 8</span><a class="headerlink" href="#id11" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</li>
+<li><p>inpainting, outpainting</p>
+<figure class="align-default" id="id12">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/fig_10.png"><img alt="fig_1" class="bg-primary mb-1" src="../../_images/fig_10.png" style="width: 600px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 462 </span><span class="caption-text">Figure 10</span><a class="headerlink" href="#id12" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</li>
+</ul>
+</section>
+</section>
+<section class="tex2jax_ignore mathjax_ignore" id="contribution">
+<h1>Contribution<a class="headerlink" href="#contribution" title="Permalink to this heading">#</a></h1>
+<ol class="arabic simple">
+<li><p><strong>FID, CLIP score</strong> 기반으로 text-to-image 모델에 대한 SOTA 를 달성</p>
+<ul class="simple">
+<li><p>이미지 생성 퀄리티, 다양성, text prompt와의 alignment 측정했음</p></li>
+</ul>
+</li>
+<li><p>quantized 이미지 토큰과 <strong>parallel decoding</strong> 으로 인해 <strong>빠른 inference</strong> 가 가능</p></li>
+<li><p>inpainting, outpainting, mask-free editing 을 포함한 <strong>zero-shot editing</strong> 가능</p></li>
+</ol>
+</section>
+<section class="tex2jax_ignore mathjax_ignore" id="q-a">
+<h1>Q&amp;A<a class="headerlink" href="#q-a" title="Permalink to this heading">#</a></h1>
+<ol class="arabic simple">
+<li><p>Muse 와 같은 transformer 기반의 generation 모델에서는 어떻게 <strong>diversity</strong> 한 결과를 가져올 수 있나요?</p>
+<ol class="arabic simple">
+<li><p>아무래도 Muse 는 random latent 에서 생성하는 것이 아니라 text-to-image 모델이라, text 에 따라서 다양한 이미지 생성 결과가 나타날 수 있을 것 같습니다.</p></li>
+</ol>
+</li>
+<li><p>Muse 는 결국 GAN 모델인가요?</p>
+<ol class="arabic simple">
+<li><p>기준점이 어떻냐에 따라 GAN 이다, 아니다, 라고 정하기 어려울 것 같습니다. VQGAN을 사용해서 GAN이라고 생각할 수 도 있고, GAN 처럼 random latent 결과에 따라 이미지 생성이 달라질 수 있는 관점에서 생각하면 아니다라고 말할 수 있을 것 같습니다.</p></li>
+</ol>
+</li>
+<li><p>Token 은 어떤 의미를 갖나요?</p>
+<ol class="arabic simple">
+<li><p>VQGAN에서 input 이미지를 인코딩하고, vector-quantization 과정을 거쳐 압축 후, codebook의 값을 가져와 feature를 구성하는데요, 이때 feature에 포함되어 있는 하나의 포인트에 해당하는 것이 token이라고 생각하시면 될 것 같습니다.</p></li>
+</ol>
+</li>
+<li><p>텍스트 프롬프트를 넣었을때 실제 이미지 생성은 어떻게 이뤄지나요? Inference에서는 입력 이미지가 없는데 base transformer에 입력 이미지에 대한 masked token대신 뭐가 들어가게 되나요?</p>
+<ol class="arabic simple">
+<li><p>실제 inference 과정에서는 input 이미지가 없기 때문에 모두 마스크된 형태로 입력되게 됩니다. text prompt 의 condition 에 따라 각 step을 거쳐 decoding 이 수행됩니다.</p></li>
+</ol>
+</li>
+<li><p>text embedding이 어떻게 objective function 수식에 들어가나요?</p>
+<ol class="arabic simple">
+<li><p>base transformer 에 대해 text embedding 값이 key, value로 입력되어 cross-attention 이 수행되게 됩니다. 그렇게 예측된 feature와 GT의 feature 끼리 cross entropy loss를 통해 마스크 예측할 수 있는 base transformer 가 학습이 됩니다.</p></li>
+</ol>
+</li>
+</ol>
+</section>
+
+    <script type="text/x-thebe-config">
+    {
+        requestKernel: true,
+        binderOptions: {
+            repo: "binder-examples/jupyter-stacks-datascience",
+            ref: "master",
+        },
+        codeMirrorConfig: {
+            theme: "abcdef",
+            mode: "python"
+        },
+        kernelOptions: {
+            name: "python3",
+            path: "./docs\review"
+        },
+        predefinedOutput: true
+    }
+    </script>
+    <script>kernelName = 'python3'</script>
+
+                </article>
+              
+
+              
+              
+              
+              
+                <footer class="prev-next-footer d-print-none">
+                  
+<div class="prev-next-area">
+    <a class="left-prev"
+       href="Diffusion_models_already_have_a_Semantic_Latent_Space.html"
+       title="previous page">
+      <i class="fa-solid fa-angle-left"></i>
+      <div class="prev-next-info">
+        <p class="prev-next-subtitle">previous</p>
+        <p class="prev-next-title">Diffusion Models already have a Semantic Latent Space</p>
+      </div>
+    </a>
+    <a class="right-next"
+       href="GIGAGAN.html"
+       title="next page">
+      <div class="prev-next-info">
+        <p class="prev-next-subtitle">next</p>
+        <p class="prev-next-title">Scaling up GANs for Text-to-Image Synthesis</p>
+      </div>
+      <i class="fa-solid fa-angle-right"></i>
+    </a>
+</div>
+                </footer>
+              
+            </div>
+            
+            
+              
+                <div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
+
+
+  <div class="sidebar-secondary-item">
+  <div class="page-toc tocsection onthispage">
+    <i class="fa-solid fa-list"></i> Contents
+  </div>
+  <nav class="bd-toc-nav page-toc">
+    <ul class="visible nav section-nav flex-column">
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#">Muse</a><ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#masked-modeling">1. Masked modeling</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#model-architecture">2. Model Architecture</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#pre-trained-text-encoders">2.1. Pre-trained Text Encoders</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#semantic-tokenization-using-vqgan">2.2. Semantic Tokenization using VQGAN</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#base-model">2.3. Base Model</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#super-resolution-model">2.4. Super-Resolution Model</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#decoder-finetuning">2.5. Decoder Finetuning</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#classifier-free-guidance">2.7. Classifier Free Guidance</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#iterative-parallel-decoding-at-inference">2.8. Iterative Parallel Decoding at Inference</a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#results">3. Results</a></li>
+</ul>
+</li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#contribution">Contribution</a></li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#q-a">Q&amp;A</a></li>
+</ul>
+
+  </nav></div>
+
+</div></div>
+              
+            
+          </div>
+          <footer class="bd-footer-content">
+            
+<div class="bd-footer-content__inner container">
+  
+  <div class="footer-item">
+    
+<p class="component-author">
+By PseudoLab
+</p>
+
+  </div>
+  
+  <div class="footer-item">
+    
+
+  <p class="copyright">
+    
+      © Copyright 2022.
+      <br/>
+    
+  </p>
+
+  </div>
+  
+  <div class="footer-item">
+    
+  </div>
+  
+  <div class="footer-item">
+    
+  </div>
+  
+</div>
+          </footer>
+        
+
+      </main>
+    </div>
+  </div>
+  
+  <!-- Scripts loaded after <body> so the DOM is not blocked -->
+  <script src="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b"></script>
+<script src="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b"></script>
+
+  <footer class="bd-footer">
+  </footer>
+  </body>
 </html>
\ No newline at end of file
diff --git a/docs/review/NeRF.html b/docs/review/NeRF.html
old mode 100644
new mode 100755
index 487bccd1..dbe8c112
--- a/docs/review/NeRF.html
+++ b/docs/review/NeRF.html
@@ -1,1248 +1,1268 @@
-
-
-<!DOCTYPE html>
-
-
-<html lang="en" data-content_root="" >
-
-  <head>
-    <meta charset="utf-8" />
-    <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.18.1: http://docutils.sourceforge.net/" />
-
-    <title>NeRF : Representing Scenes as Neural Radiance Fields for View Synthesis &#8212; Text-to-Image Generation-feat-Diffusion</title>
-  
-  
-  
-  <script data-cfasync="false">
-    document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
-    document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
-  </script>
-  
-  <!-- Loaded before other Sphinx assets -->
-  <link href="../../_static/styles/theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-<link href="../../_static/styles/bootstrap.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-<link href="../../_static/styles/pydata-sphinx-theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-
-  
-  <link href="../../_static/vendor/fontawesome/6.5.2/css/all.min.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-  <link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.woff2" />
-<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.woff2" />
-<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.woff2" />
-
-    <link rel="stylesheet" type="text/css" href="../../_static/pygments.css" />
-    <link rel="stylesheet" href="../../_static/styles/sphinx-book-theme.css?digest=14f4ca6b54d191a8c7657f6c759bf11a5fb86285" type="text/css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/togglebutton.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/mystnb.4510f1fc1dee50b3e5859aac5469c37c29e427902b24a333a5f9fcb2f0b3ac41.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/sphinx-thebe.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/design-style.4045f2051d55cab465a707391d5b2007.min.css" />
-  
-  <!-- Pre-loaded scripts that we'll load fully later -->
-  <link rel="preload" as="script" href="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b" />
-<link rel="preload" as="script" href="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b" />
-  <script src="../../_static/vendor/fontawesome/6.5.2/js/all.min.js?digest=dfe6caa3a7d634c4db9b"></script>
-
-    <script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
-    <script src="../../_static/jquery.js"></script>
-    <script src="../../_static/underscore.js"></script>
-    <script src="../../_static/_sphinx_javascript_frameworks_compat.js"></script>
-    <script src="../../_static/doctools.js"></script>
-    <script src="../../_static/clipboard.min.js"></script>
-    <script src="../../_static/copybutton.js"></script>
-    <script src="../../_static/scripts/sphinx-book-theme.js?digest=5a5c038af52cf7bc1a1ec88eea08e6366ee68824"></script>
-    <script>let toggleHintShow = 'Click to show';</script>
-    <script>let toggleHintHide = 'Click to hide';</script>
-    <script>let toggleOpenOnPrint = 'true';</script>
-    <script src="../../_static/togglebutton.js"></script>
-    <script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
-    <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
-    <script src="../../_static/design-tabs.js"></script>
-    <script>const THEBE_JS_URL = "https://unpkg.com/thebe@0.8.2/lib/index.js"
-const thebe_selector = ".thebe,.cell"
-const thebe_selector_input = "pre"
-const thebe_selector_output = ".output, .cell_output"
-</script>
-    <script async="async" src="../../_static/sphinx-thebe.js"></script>
-    <script>window.MathJax = {"options": {"processHtmlClass": "tex2jax_process|mathjax_process|math|output_area"}}</script>
-    <script defer="defer" src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
-    <script>DOCUMENTATION_OPTIONS.pagename = 'docs/review/NeRF';</script>
-    <link rel="shortcut icon" href="../../_static/PseudoLab_logo.png"/>
-    <link rel="index" title="Index" href="../../genindex.html" />
-    <link rel="search" title="Search" href="../../search.html" />
-    <link rel="next" title="3D Gaussian Splatting for Real-Time Radiance Field Rendering" href="3DGS.html" />
-    <link rel="prev" title="DreamPose: Fashion Image-to-Video Synthesis via Stable Diffusion" href="DreamPose.html" />
-  <meta name="viewport" content="width=device-width, initial-scale=1"/>
-  <meta name="docsearch:language" content="en"/>
-  </head>
-  
-  
-  <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
-
-  
-  
-  <div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
-  
-  <div id="pst-scroll-pixel-helper"></div>
-  
-  <button type="button" class="btn rounded-pill" id="pst-back-to-top">
-    <i class="fa-solid fa-arrow-up"></i>Back to top</button>
-
-  
-  <input type="checkbox"
-          class="sidebar-toggle"
-          id="pst-primary-sidebar-checkbox"/>
-  <label class="overlay overlay-primary" for="pst-primary-sidebar-checkbox"></label>
-  
-  <input type="checkbox"
-          class="sidebar-toggle"
-          id="pst-secondary-sidebar-checkbox"/>
-  <label class="overlay overlay-secondary" for="pst-secondary-sidebar-checkbox"></label>
-  
-  <div class="search-button__wrapper">
-    <div class="search-button__overlay"></div>
-    <div class="search-button__search-container">
-<form class="bd-search d-flex align-items-center"
-      action="../../search.html"
-      method="get">
-  <i class="fa-solid fa-magnifying-glass"></i>
-  <input type="search"
-         class="form-control"
-         name="q"
-         id="search-input"
-         placeholder="Search this book..."
-         aria-label="Search this book..."
-         autocomplete="off"
-         autocorrect="off"
-         autocapitalize="off"
-         spellcheck="false"/>
-  <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
-</form></div>
-  </div>
-
-  <div class="pst-async-banner-revealer d-none">
-  <aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
-</div>
-
-  
-    <header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
-    </header>
-  
-
-  <div class="bd-container">
-    <div class="bd-container__inner bd-page-width">
-      
-      
-      
-      <div class="bd-sidebar-primary bd-sidebar">
-        
-
-  
-  <div class="sidebar-header-items sidebar-primary__section">
-    
-    
-    
-    
-  </div>
-  
-    <div class="sidebar-primary-items__start sidebar-primary__section">
-        <div class="sidebar-primary-item">
-
-  
-    
-  
-
-<a class="navbar-brand logo" href="../../intro.html">
-  
-  
-  
-  
-  
-    
-    
-      
-    
-    
-    <img src="../../_static/PseudoLab_logo.png" class="logo__image only-light" alt="Text-to-Image Generation-feat-Diffusion - Home"/>
-    <script>document.write(`<img src="../../_static/PseudoLab_logo.png" class="logo__image only-dark" alt="Text-to-Image Generation-feat-Diffusion - Home"/>`);</script>
-  
-  
-</a></div>
-        <div class="sidebar-primary-item"><nav class="bd-links" id="bd-docs-nav" aria-label="Main">
-    <div class="bd-toc-item navbar-nav active">
-        
-        <ul class="nav bd-sidenav bd-sidenav__home-link">
-            <li class="toctree-l1">
-                <a class="reference internal" href="../../intro.html">
-                    Welcome to PseudoDiffusers!!
-                </a>
-            </li>
-        </ul>
-        <p aria-level="2" class="caption" role="heading"><span class="caption-text">Preliminary Works</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="vae.html">VAE</a></li>
-<li class="toctree-l1"><a class="reference internal" href="gan.html">GAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DDPM.html">DDPM</a></li>
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="DDIM.html">DDIM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="A_Study_on_the_Evaluation_of_Generative_Models.html">A Study on the Evaluation of Generative Models</a></li>
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Image Generation</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="cycleGAN.html">CycleGAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="StyleGAN.html">StyleGAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="diffusion_beats_GANs.html">Diffusion Models Beat GANs on Image Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="dalle.html">DALL-E</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DALLE2.html">DALL-E 2</a></li>
-<li class="toctree-l1"><a class="reference internal" href="dreambooth.html">DreamBooth</a></li>
-<li class="toctree-l1"><a class="reference internal" href="ControlNet.html">ControlNet</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Latent_Diffusion_Model.html">Introduction</a></li>
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="Textual_Inversion.html">Textual Inversion</a></li>
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="CustomDiffusion.html">Custom Diffusion</a></li>
-<li class="toctree-l1"><a class="reference internal" href="LoRA.html">LoRA</a></li>
-
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="I-DDPM.html">I-DDPM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="StyO.html">StyO</a></li>
-<li class="toctree-l1"><a class="reference internal" href="imagen.html">Imagen</a></li>
-<li class="toctree-l1"><a class="reference internal" href="imagen_editor.html">Imagen Editor</a></li>
-<li class="toctree-l1"><a class="reference internal" href="SDEdit.html">SDEdit</a></li>
-<li class="toctree-l1"><a class="reference internal" href="SDXL.html">SDXL</a></li>
-<li class="toctree-l1"><a class="reference internal" href="t2i_adapter.html">T2I-Adapter</a></li>
-<li class="toctree-l1"><a class="reference internal" href="IP_Adapter.html">IP-Adapter</a></li>
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="HyperDreamBooth.html">HyperDreamBooth</a></li>
-<li class="toctree-l1"><a class="reference internal" href="CM3leon.html">CM3leon</a></li>
-
-<li class="toctree-l1"><a class="reference internal" href="Synthetic_Data_from_Diffusion_Models_Improves_ImageNet_Classification.html">Synthetic Data from Diffusion Models Improves ImageNet Classification</a></li>
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="GLIDE.html">GLIDE</a></li>
-<li class="toctree-l1"><a class="reference internal" href="BBDM.html">BBDM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Your_Diffusion_Model_is_Secretly_a_Zero_Shot_Classifier.html">Your Diffusion Model is Secretly a Zero-Shot Classifier</a></li>
-<li class="toctree-l1"><a class="reference internal" href="progressive_distillation.html">Progressive Distillation for Fast Sampling of Diffusion Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="ConceptLab.html">ConceptLab</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Diffusion_models_already_have_a_Semantic_Latent_Space.html">Diffusion Models already have a Semantic Latent Space</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Muse.html">Muse</a></li>
-
-
-<li class="toctree-l1"><a class="reference internal" href="GIGAGAN.html">Scaling up GANs for Text-to-Image Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="consistency_models.html">Consistency Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="latent_consistency_models.html">Latent Consistency Models</a></li>
-
-<li class="toctree-l1"><a class="reference internal" href="LLM_grounded_Diffusion.html">LLM Grounded Diffusion</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DiT.html">DiT</a></li>
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="one-step-image-translation.html">One-Step Image Translation with Text-to-Image Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="LCM-LoRA.html">LCM-LoRA: A Universal Stable-Diffusion Acceleration Module</a></li>
-
-
-<li class="toctree-l1"><a class="reference internal" href="MimicBrush.html">MimicBrush: Zero-shot Image Editing with Reference Imitation</a></li>
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Video Generation</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="Make_A_Video.html">Make A Video</a></li>
-<li class="toctree-l1"><a class="reference internal" href="VideoLDM.html">VideoLDM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="AnimateDiff.html">AnimateDiff</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Animate_Anyone.html">Animate Anyone</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreaMoving.html">DreaMoving</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreamPose.html">DreamPose: Fashion Image-to-Video Synthesis via Stable Diffusion</a></li>
-
-
-
-
-
-
-
-
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">3D Generation</span></p>
-<ul class="current nav bd-sidenav">
-<li class="toctree-l1 current active"><a class="current reference internal" href="#">NeRF : Representing Scenes as Neural Radiance Fields for View Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="3DGS.html">3D Gaussian Splatting for Real-Time Radiance Field Rendering</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Point_E.html">Point-E: A System for Generating 3D Point Clouds from Complex Prompts (Arxiv 2022)</a></li>
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="Shap-E.html">Shap-E</a></li>
-
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="DreamFusion.html"><strong>DreamFusion</strong></a></li>
-<li class="toctree-l1"><a class="reference internal" href="magic-3d.html">Magic3D</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreamBooth3D.html">Dream Booth 3D</a></li>
-
-
-
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Experiments</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="../experiments/js_exp.html">Synthetic Data with Stable Diffusion for Foliar Disease Classification</a></li>
-<li class="toctree-l1"><a class="reference internal" href="../experiments/swjo_exp.html">Training DreamBooth on Naver Webtoon Face Dataset</a></li>
-</ul>
-
-    </div>
-</nav></div>
-    </div>
-  
-  
-  <div class="sidebar-primary-items__end sidebar-primary__section">
-  </div>
-  
-  <div id="rtd-footer-container"></div>
-
-
-      </div>
-      
-      <main id="main-content" class="bd-main" role="main">
-        
-        
-
-<div class="sbt-scroll-pixel-helper"></div>
-
-          <div class="bd-content">
-            <div class="bd-article-container">
-              
-              <div class="bd-header-article d-print-none">
-<div class="header-article-items header-article__inner">
-  
-    <div class="header-article-items__start">
-      
-        <div class="header-article-item"><label class="sidebar-toggle primary-toggle btn btn-sm" for="__primary" title="Toggle primary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
-  <span class="fa-solid fa-bars"></span>
-</label></div>
-      
-    </div>
-  
-  
-    <div class="header-article-items__end">
-      
-        <div class="header-article-item">
-
-<div class="article-header-buttons">
-
-
-
-
-
-<div class="dropdown dropdown-source-buttons">
-  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Source repositories">
-    <i class="fab fa-github"></i>
-  </button>
-  <ul class="dropdown-menu">
-      
-      
-      
-      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion" target="_blank"
-   class="btn btn-sm btn-source-repository-button dropdown-item"
-   title="Source repository"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fab fa-github"></i>
-  </span>
-<span class="btn__text-container">Repository</span>
-</a>
-</li>
-      
-      
-      
-      
-      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion/issues/new?title=Issue%20on%20page%20%2Fdocs/review/NeRF.html&body=Your%20issue%20content%20here." target="_blank"
-   class="btn btn-sm btn-source-issues-button dropdown-item"
-   title="Open an issue"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-lightbulb"></i>
-  </span>
-<span class="btn__text-container">Open issue</span>
-</a>
-</li>
-      
-  </ul>
-</div>
-
-
-
-
-
-
-<div class="dropdown dropdown-download-buttons">
-  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Download this page">
-    <i class="fas fa-download"></i>
-  </button>
-  <ul class="dropdown-menu">
-      
-      
-      
-      <li><a href="../../_sources/docs/review/NeRF.md" target="_blank"
-   class="btn btn-sm btn-download-source-button dropdown-item"
-   title="Download source file"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-file"></i>
-  </span>
-<span class="btn__text-container">.md</span>
-</a>
-</li>
-      
-      
-      
-      
-      <li>
-<button onclick="window.print()"
-  class="btn btn-sm btn-download-pdf-button dropdown-item"
-  title="Print to PDF"
-  data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-file-pdf"></i>
-  </span>
-<span class="btn__text-container">.pdf</span>
-</button>
-</li>
-      
-  </ul>
-</div>
-
-
-
-
-<button onclick="toggleFullScreen()"
-  class="btn btn-sm btn-fullscreen-button"
-  title="Fullscreen mode"
-  data-bs-placement="bottom" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-expand"></i>
-  </span>
-
-</button>
-
-
-
-<script>
-document.write(`
-  <button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light"></i>
-    <i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark"></i>
-    <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"></i>
-  </button>
-`);
-</script>
-
-
-<script>
-document.write(`
-  <button class="btn btn-sm pst-navbar-icon search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <i class="fa-solid fa-magnifying-glass fa-lg"></i>
-  </button>
-`);
-</script>
-<label class="sidebar-toggle secondary-toggle btn btn-sm" for="__secondary"title="Toggle secondary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <span class="fa-solid fa-list"></span>
-</label>
-</div></div>
-      
-    </div>
-  
-</div>
-</div>
-              
-              
-
-<div id="jb-print-docs-body" class="onlyprint">
-    <h1>NeRF : Representing Scenes as Neural Radiance Fields for View Synthesis</h1>
-    <!-- Table of contents -->
-    <div id="print-main-content">
-        <div id="jb-print-toc">
-            
-            <div>
-                <h2> Contents </h2>
-            </div>
-            <nav aria-label="Page">
-                <ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#abstract">0. Abstract</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">1. Introduction</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#related-work">2. Related Work</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#neural-radiance-field-scene-representation">3. Neural Radiance Field Scene Representation</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#volume-rendering-with-radiance-fields">4. Volume Rendering with Radiance Fields</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#optimizing-a-neural-radiance-field">5. Optimizing a Neural Radiance Field</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#positional-encoding">5.1 Positional encoding</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#hierarchical-volume-sampling">5.2 Hierarchical volume sampling</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#implementation-details">5.3 Implementation details</a></li>
-</ul>
-</li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#experiments-detail">5.4 Experiments detail</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#results">6. Results</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#datasets">6.1 Datasets</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#comparisons">6.2 Comparisons</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#discussion">6.3 Discussion</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#ablation-studies">6.4 Ablation studies</a></li>
-</ul>
-</li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#appendix-a-additional-implementation-details">(Appendix) A. Additional Implementation Details</a></li>
-</ul>
-            </nav>
-        </div>
-    </div>
-</div>
-
-              
-                
-<div id="searchbox"></div>
-                <article class="bd-article">
-                  
-  <div class="admonition-information admonition">
-<p class="admonition-title">Information</p>
-<ul class="simple">
-<li><p><strong>Title:</strong> NeRF: Representing Scenes as Neural Radiance Fields for View Synthesis</p></li>
-<li><p><strong>Reference</strong></p>
-<ul>
-<li><p>Paper:  <a class="reference external" href="https://arxiv.org/abs/2003.08934">https://arxiv.org/abs/2003.08934</a></p></li>
-<li><p>Project: <a class="github reference external" href="https://github.com/bmild/nerf">bmild/nerf</a></p></li>
-</ul>
-</li>
-<li><p><strong>Author:</strong> Jeongin Lee</p></li>
-<li><p><strong>Last updated on May. 22, 2024</strong></p></li>
-</ul>
-</div>
-<section class="tex2jax_ignore mathjax_ignore" id="nerf-representing-scenes-as-neural-radiance-fields-for-view-synthesis">
-<h1>NeRF : Representing Scenes as Neural Radiance Fields for View Synthesis<a class="headerlink" href="#nerf-representing-scenes-as-neural-radiance-fields-for-view-synthesis" title="Permalink to this heading">#</a></h1>
-<p><a class="reference external" href="https://www.youtube.com/watch?v=JuH79E8rdKc"><img alt="NeRF" src="http://img.youtube.com/vi/JuH79E8rdKc/0.jpg" /></a></p>
-<ul class="simple">
-<li><p>기존의 3D object 자체를 구성하여 렌더링하는 explicit method → 저장 용량이 많이 소요</p></li>
-<li><p>NeRF 는 3D object 자체를 구성하지 않는, <strong>synthesizing novel views</strong>
-좌표를 mlp에 넣어 픽셀 별 색상 및 밀도 값을 얻는 implicit method</p></li>
-<li><p><strong>synthesizing novel views</strong><br />
-특정한 장면(Scene)에서 여러 각도로 찍은 일부의 사진들을 가지고 완전 새로운 각도의 모습을 유추하는 task</p></li>
-</ul>
-<section id="abstract">
-<h2>0. Abstract<a class="headerlink" href="#abstract" title="Permalink to this heading">#</a></h2>
-<ul>
-<li><p><strong>NeRF</strong></p>
-<ul class="simple">
-<li><p>한정된 수의 입력 뷰 이미지들을 사용</p></li>
-<li><p>continous volumetric scene 함수 최적화를 통해  <strong>synthesizing novel views</strong> 에서 SOTA 달성</p></li>
-</ul>
-</li>
-<li><p><strong>Algorithm</strong></p>
-<ul class="simple">
-<li><p><strong>FC layer 사용 (non-convolutional)</strong></p>
-<ul>
-<li><p><strong>input</strong>  : 5 차원 좌표 (공간적 위치<span class="math notranslate nohighlight">\((x, y, z)\)</span> &amp; 바라보는 방향<span class="math notranslate nohighlight">\((\theta, \phi))\)</span></p></li>
-<li><p><strong>output</strong> : volume density와 해당 방향에 대한 색상 값</p></li>
-</ul>
-</li>
-<li><p>5 차원 좌표 입력 → 카메라 광선을 따라 RGB 값, Volume density 예측
-→ 고전적 Volume rendering 기술을 사용하여 image 로 합성</p></li>
-</ul>
-</li>
-<li><p>복잡한 구조 및 외형을 갖는 scene 에 대한 <strong>Novel views rendering</strong> 을 위해 <strong>NeRF</strong> 를 최적화하는 방법을 제시 (+ Positional Encoding, Hierarchical volume sampling)</p></li>
-<li><p>실험을 통해 기존 작업을 능가하는 결과를 입증</p></li>
-<li><p><strong>Keywords :</strong> scene representation, view synthesis, image-based rendering,
-volume rendering, 3D deep learning</p>
-<figure class="align-default" id="id1">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/Untitled2.png"><img alt="NeRF" class="bg-primary mb-1" src="../../_images/Untitled2.png" style="width: 800px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 648 </span><span class="caption-text">method that optimizes a continuous 5D neural radiance field representation \  (source: {<a class="reference external" href="https://arxiv.org/pdf/2003.08934v2">https://arxiv.org/pdf/2003.08934v2</a>})</span><a class="headerlink" href="#id1" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</li>
-</ul>
-</section>
-<section id="introduction">
-<h2>1. Introduction<a class="headerlink" href="#introduction" title="Permalink to this heading">#</a></h2>
-<p>캡처된 이미지들의 렌더링 오차를 최소화하기 위해 연속적인 <span class="math notranslate nohighlight">\(5 \mathrm{D}\)</span> scene 함수의 파라미터를 직접 최적화하여 View synthesis 분야의 오랜 문제를 새로운 방식으로 해결함</p>
-<hr class="docutils" />
-<ul class="simple">
-<li><p><strong>정적 장면 → 연속적인 <span class="math notranslate nohighlight">\(5 \mathrm{D}\)</span> 함수로 표현</strong></p>
-<ul>
-<li><p>FC layer = Regression Function  :
-a single <span class="math notranslate nohighlight">\(5 \mathrm{D}\)</span> coord <span class="math notranslate nohighlight">\((x, y, z, \theta, \phi)\)</span> → density, view-dependent RGB color</p></li>
-</ul>
-</li>
-<li><p><strong>Output</strong></p>
-<ul>
-<li><p>공간 상의 각 지점 <span class="math notranslate nohighlight">\((x, y, z)\)</span>에서 각 방향 <span class="math notranslate nohighlight">\((\theta, \phi)\)</span> 으로 방출된 색상</p></li>
-<li><p>각 지점 <span class="math notranslate nohighlight">\((x, y, z)\)</span> 의 밀도(density) = <span class="math notranslate nohighlight">\(\sigma\)</span></p>
-<ul>
-<li><p>밀도의 누적값을 통해 얼마나 많은 빛이 <span class="math notranslate nohighlight">\((𝑥,𝑦,𝑧)\)</span> 를 통과하는 광선에 의해 누적되는지를 표현</p></li>
-</ul>
-</li>
-</ul>
-</li>
-</ul>
-<hr class="docutils" />
-<ul class="simple">
-<li><p><strong>특정 시점으로부터의 NeRF 렌더링</strong></p>
-<ol class="arabic simple">
-<li><p>광선을 따라 이동하여 샘플링된 <span class="math notranslate nohighlight">\(3 \mathrm{D}\)</span> 포인트 집합을 생성</p></li>
-<li><p>해당 포인트들과 이에 해당하는 <span class="math notranslate nohighlight">\(2 \mathrm{D}\)</span> 시점 방향을 신경망에 대한 입력으로 사용하여 색상과 밀도의 집합을 생성</p></li>
-<li><p>고전적 Volume rendering 기술을 사용하여 <span class="math notranslate nohighlight">\(2 \mathrm{D}\)</span> image 로 합성</p></li>
-</ol>
-</li>
-</ul>
-<hr class="docutils" />
-<ul class="simple">
-<li><p><strong>Optimization</strong></p>
-<ul>
-<li><p>미분 가능, gradient descent 를 통한 최적화</p></li>
-<li><p>각 관찰된 이미지와 렌더링된 해당 <strong>views</strong>사이의 오차를 최소화</p></li>
-<li><p>다양한 views 에서 오차 최소화를 통해 실제 장면의 cotents 가 포함된 위치에 <strong>높은 밀도</strong>와 <strong>정확한 색상</strong>을 할당하여 장면의 일관된 모델을 예측</p></li>
-</ul>
-</li>
-</ul>
-<hr class="docutils" />
-<ul class="simple">
-<li><p><strong>NeRF 최적화의 Basic implementation의 한계 및 대안</strong></p>
-<ol class="arabic simple">
-<li><p><strong>복잡한 장면에 대해서 충분히 고해상도 표현으로 수렴되지 않음</strong></p>
-<ul>
-<li><p>positional encoding 으로 입력 5D 좌표를 변환</p></li>
-<li><p>MLP가 더 높은 주파수의 함수를 나타낼 수 있음.</p></li>
-</ul>
-</li>
-<li><p><strong>카메라 광선당 요구되는 샘플링 수가 비효율적</strong></p>
-<ul>
-<li><p>계층적 샘플링 절차를 제안</p></li>
-<li><p>고주파수의 장면 표현을 적절하게 샘플링하기 위해 필요한 쿼리 수를 감소시킴</p></li>
-</ul>
-</li>
-</ol>
-</li>
-</ul>
-<hr class="docutils" />
-<ul class="simple">
-<li><p><strong>본 논문의 접근 방식은 volumetric 표현의 이점을 상속</strong></p>
-<ul>
-<li><p>복잡한 실세계의 기하학적 형태와 외형을 표현 가능</p></li>
-<li><p>투영된 이미지를 사용한 Gradient-based 최적화에 적합</p></li>
-<li><p>고해상도에서 복잡한 장면을 모델링할 때 이산화된 복셀 그리드의 엄청난 저장 비용을 극복</p></li>
-<li><p><strong>Voxel (Volume + Pixel)</strong>
-3차원 공간에서 체적의 기본 단위 (2차원의 경우에선 pixe)
-위치 정보와 함께 밀도, 색상, 투과성 등의 속성을 가질 수 있음</p></li>
-<li><p><strong>Volumne Rendering</strong>
-3차원 공간에서 정의된 데이터(체적 데이터)를 2차원 이미지로 변환하는 과정
-예시) CT, MRI</p></li>
-<li><p><strong>Volumetric Data (체적 데이터)</strong>
-3차원 공간에서 샘플링된 데이터</p></li>
-</ul>
-</li>
-</ul>
-<hr class="docutils" />
-<ul class="simple">
-<li><p><strong>Technical contributions</strong></p>
-<ul>
-<li><p>복잡한 기하학과 소재를 가진 연속적인 장면을 5차원 NeRF 로 나타내는 접근 방법, 기본 MLP 네트워크로 매개변수화</p></li>
-<li><p>고전적인 볼륨 렌더링 기법을 기반으로 한 미분 가능한 렌더링 절차를 사용하여 이러한 표현을 표준 RGB 이미지로부터 최적화하는 방법을 제안</p></li>
-<li><p>hierarchical sampling strategy : MLP’s capacity 를 시각적인 장면 내용이 있는 공간으로 할당 (물체가 있을 확률이 높은 부분을 모델이 집중적으로 학습)</p></li>
-<li><p>Positional encoding : 입력 5차원 좌표를 고차원 공간으로 매핑하기 위해 NeRF를 성공적으로 최적화하여 고주파의 장면 콘텐츠를 표현가능</p></li>
-</ul>
-</li>
-<li><p>최초의 <strong>continuous neural scene representation</strong> 제안</p></li>
-</ul>
-<figure class="align-default" id="id2">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/Untitled11.png"><img alt="NeRF overview" class="bg-primary mb-1" src="../../_images/Untitled11.png" style="width: 800px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 649 </span><span class="caption-text">An overview of our neural radiance field scene representation and differentiable rendering procedure \  (source: {<a class="reference external" href="https://arxiv.org/pdf/2003.08934v2">https://arxiv.org/pdf/2003.08934v2</a>})</span><a class="headerlink" href="#id2" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</section>
-<section id="related-work">
-<h2>2. Related Work<a class="headerlink" href="#related-work" title="Permalink to this heading">#</a></h2>
-<ul class="simple">
-<li><p><strong>Neural 3D shape representations</strong></p></li>
-<li><p><strong>View synthesis and image-based rendering</strong></p></li>
-</ul>
-</section>
-<section id="neural-radiance-field-scene-representation">
-<h2>3. Neural Radiance Field Scene Representation<a class="headerlink" href="#neural-radiance-field-scene-representation" title="Permalink to this heading">#</a></h2>
-<ul>
-<li><p>5차원 벡터 함수 (MLP) <span class="math notranslate nohighlight">\(F_{\Theta}:(\mathbf{x}, \mathbf{d}) \rightarrow(\mathbf{c}, \sigma)\)</span></p>
-<ul class="simple">
-<li><p><strong>input</strong> : <span class="math notranslate nohighlight">\(3 \mathrm{D}\)</span> location <span class="math notranslate nohighlight">\(\mathbf{x}=(x, y, z)\)</span> , <span class="math notranslate nohighlight">\(2 \mathrm{D}\)</span> viewing direction <span class="math notranslate nohighlight">\(\mathbf{d}=(\theta, \phi)\)</span></p>
-<ul>
-<li><p><strong>(practically) direction</strong> as a <span class="math notranslate nohighlight">\(3 \mathrm{D}\)</span> Cartesian unit vector <span class="math notranslate nohighlight">\(\mathbf{d}\)</span></p></li>
-<li><p>벡터 <span class="math notranslate nohighlight">\(\mathbf{d} =(𝑑_𝑥,𝑑_𝑦,𝑑_𝑧)\)</span> 는 방향을 나타내며, 이는 단위 벡터(길이가 1)로 정규화</p></li>
-</ul>
-</li>
-<li><p><strong>output</strong> : emitted color <span class="math notranslate nohighlight">\(\mathbf{c}=(r, g, b)\)</span>, volume density <span class="math notranslate nohighlight">\(\sigma\)</span></p></li>
-</ul>
-</li>
-<li><p><span class="math notranslate nohighlight">\(\mathbf{x}\)</span> → <span class="math notranslate nohighlight">\(\sigma\)</span> , <span class="math notranslate nohighlight">\((\mathbf{x, d})\)</span> → RGB 색상 <span class="math notranslate nohighlight">\(\mathbf{c}\)</span> 를 예측하도록 권장 (색상은 view dependent 이므로)</p>
-<ol class="arabic simple">
-<li><p>MLP <span class="math notranslate nohighlight">\(F_{\Theta}\)</span> 는 먼저 8개의 fully-connected layer (ReLU, 256개 채널 사용) 로
-입력 3D 좌표 <span class="math notranslate nohighlight">\(\mathbf{x}\)</span> →  <span class="math notranslate nohighlight">\(\sigma\)</span> , 256차원 feature 벡터를 출력</p></li>
-<li><p><strong>a</strong> 의 feature 벡터는 카메라 광선의 시점 방향과 concat</p></li>
-<li><p>뷰에 따른 RGB 색상을 출력하는 하나의 추가 fully-connected layer (ReLU,128개 채널 사용)로 전달됨</p></li>
-</ol>
-<figure class="align-default" id="id3">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/Untitled3.png"><img alt="NeRF architecture" class="bg-primary mb-1" src="../../_images/Untitled3.png" style="width: 800px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 650 </span><span class="caption-text">fully-connected network architecture\  (source: {<a class="reference external" href="https://arxiv.org/pdf/2003.08934v2">https://arxiv.org/pdf/2003.08934v2</a>})</span><a class="headerlink" href="#id3" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</li>
-<li><p><strong>View 를 고려하여 색상을 예측해야 하는 이유 : non-Lambertian effects</strong></p>
-<ul class="simple">
-<li><p><strong>Lambertian 효과</strong></p>
-<ul>
-<li><p>물체의 표면에서 나오는 광선이 균일하게 반사되는 현상</p></li>
-<li><p>표면의 방향과 상관없이 광선이 표면에서 나오는 각도에 따라 반사되는 광량이 일정하다는 원리를 기반</p></li>
-</ul>
-</li>
-<li><p>Fig. 3 : 입력 시선 방향을 사용하여 non-Lambertian effects 를 표현한 예시</p></li>
-</ul>
-<ul class="simple">
-<li><p>Fig. 4 : view dependence 를 고려하지 않고 (only <span class="math notranslate nohighlight">\(\mathbf{x}\)</span> input) 학습된 모델은 반사성(specularity)을 표현하는데 어려움이 있음</p></li>
-</ul>
-</li>
-</ul>
-</section>
-<section id="volume-rendering-with-radiance-fields">
-<h2>4. Volume Rendering with Radiance Fields<a class="headerlink" href="#volume-rendering-with-radiance-fields" title="Permalink to this heading">#</a></h2>
-<ul class="simple">
-<li><p><strong>5D NeRF 는 장면을 volume density 와 특정 포인트에서 방출된 빛(색상)으로 표현</strong></p></li>
-<li><p><strong>볼륨 렌더링 : scene 을 통과하는 모든 광선의 색상을 렌더링</strong></p>
-<ul>
-<li><p>NeRF 로부터 View 를 렌더링하려면 원하는 가상 카메라의 각 픽셀을 거쳐 추적된 카메라 광선에 대해 적분값  <span class="math notranslate nohighlight">\(C(\mathbf{r})\)</span> 을 추정을 요구</p></li>
-<li><p><span class="math notranslate nohighlight">\(\mathbf{r}(t)=\mathbf{o}+t \mathbf{d}\)</span> : 카메라 광선</p></li>
-<li><p><span class="math notranslate nohighlight">\(C(\mathbf{r})\)</span> : near bound <span class="math notranslate nohighlight">\(t_n\)</span> , far bound <span class="math notranslate nohighlight">\(t_f\)</span> 에서 카메라 광선 <span class="math notranslate nohighlight">\(\mathbf{r}(t)\)</span> 의 예측된 색상</p></li>
-<li><p><span class="math notranslate nohighlight">\(T(t)\)</span> : ray 를 따라 <span class="math notranslate nohighlight">\(t_n\)</span> 부터 <span class="math notranslate nohighlight">\(t\)</span> 까지 누적된 투과율(transmittance)</p></li>
-</ul>
-</li>
-</ul>
-<div class="math notranslate nohighlight">
-\[
-C(\mathbf{r})=\int_{t_n}^{t_f} T(t) \sigma(\mathbf{r}(t)) \mathbf{c}(\mathbf{r}(t), \mathbf{d}) d t, \text { where } T(t)=\exp \left(-\int_{t_n}^t \sigma(\mathbf{r}(s)) d s\right)
-\]</div>
-<ul>
-<li><p><strong>Quadrature (구적법) 을 통해 연속적 적분값을 수치적으로 추정</strong></p>
-<ul class="simple">
-<li><p>이산화된 ****voxel grids 렌더링에 사용되는 <strong>결정론적 구적법</strong>의 한계</p></li>
-<li><p>일반적으로 이산화된 복셀 그리드를 렌더링하는 데 사용되는 결정론적 구적법은 MLP가 <strong>고정된 이산 위치 집합</strong>에서만 쿼리되기 때문에 표현의 해상도를 제한</p></li>
-</ul>
-</li>
-<li><p>➡️ <strong>대안으로 Stratified sampling (계층적 표집) 접근법을 사용.</strong></p></li>
-<li><p><span class="math notranslate nohighlight">\(\left[t_n, t_f\right]\)</span> 를 <span class="math notranslate nohighlight">\(N\)</span> 개의 균일한 간격의 bin으로 분할한 Partition 생성</p></li>
-<li><p>각 bin 내에서 하나의 샘플을 무작위로 추출</p>
-<div class="math notranslate nohighlight">
-\[
-    t_i \sim \mathcal{U}\left[t_n+\frac{i-1}{N}\left(t_f-t_n\right), t_n+\frac{i}{N}\left(t_f-t_n\right)\right].
-    \]</div>
-</li>
-<li><p>여전히 적분값 추정을 위해 이산화된 표본들을 사용하더라도,
-계층적 표집 방법을 통해 continuous scene 표현이 가능</p></li>
-<li><p>다양한 position sample에 대해 최적화가 가능하므로, 최적화 과정에서 MLP가 연속적인 위치들에서 평가되도록 하는 효과</p></li>
-<li><p>위의 샘플링 방법을 통해 뽑은 샘플들로  <a class="reference external" href="https://courses.cs.duke.edu/spring03/cps296.8/papers/max95opticalModelsForDirectVolumeRendering.pdf">[26]에서 리뷰</a>된 볼륨 렌더링에서 논의된 구적법으로 <span class="math notranslate nohighlight">\(C(\mathbf{r})\)</span> 을 추정 (적분을 sample sum 으로)</p>
-<div class="math notranslate nohighlight">
-\[\begin{split}
-    \hat{C}(\mathbf{r})=\sum_{i=1}^N T_i\left(1-\exp \left(-\sigma_i \delta_i\right)\right) \mathbf{c}_i, \\ \text { where } T_i=\exp \left(-\sum_{j=1}^{i-1} \sigma_j \delta_j\right),
-    \end{split}\]</div>
-<ul class="simple">
-<li><p><span class="math notranslate nohighlight">\(\delta_i=t_{i+1}-t_i\)</span> is the distance between adjacent samples (<span class="math notranslate nohighlight">\(dt\)</span> 를 대체)</p></li>
-<li><p><span class="math notranslate nohighlight">\(\left(\mathbf{c}_i, \sigma_i\right)\)</span> 의 집합으로부터 <span class="math notranslate nohighlight">\(\hat{C}(\mathbf{r})\)</span> 을 계산하는 함수는 쉽게 미분 가능하며
-<span class="math notranslate nohighlight">\(\alpha_i=1-\exp \left(-\sigma_i \delta_i\right)\)</span> 를 사용한 전통적인 <strong>alpha compositing</strong></p></li>
-<li><p>**alpha compositing (**알파 합성)</p>
-<ul>
-<li><p>여러 이미지 또는 픽셀을 결합하여 하나의 이미지로 만드는 기술</p></li>
-<li><p>ex) 투명한 이미지(유리, 그림자)를 배경 이미지 위에 겹칠 때 알파 컴포지팅을 사용하여 자연스러운 합성 수행</p></li>
-</ul>
-</li>
-</ul>
-</li>
-</ul>
-</section>
-<section id="optimizing-a-neural-radiance-field">
-<h2>5. Optimizing a Neural Radiance Field<a class="headerlink" href="#optimizing-a-neural-radiance-field" title="Permalink to this heading">#</a></h2>
-<p><strong>[REMIND]</strong></p>
-<ul class="simple">
-<li><p>지금까지 <strong>NeRF 로 scene 을 모델링하는 것, 이 표현으로 새로운 views 를 렌더링 하는 것</strong> 에 필요한 핵심적인 구성요소를 다룸</p>
-<ul>
-<li><p>하지만 해당 요소들로 SOTA 성능을 달성하기에는 한계 존재</p></li>
-<li><p>고해상도 + 복잡한 scene 을 표현 가능하게 하는 두개의 개선점을 도입</p></li>
-</ul>
-</li>
-</ul>
-<ol class="arabic simple">
-<li><p>Positional encoding of the input coordinates
-that assists the MLP in representing high-frequency functions</p></li>
-<li><p>hierarchical sampling procedure
-that allows us to efficiently sample this high-frequency representation.</p></li>
-</ol>
-<section id="positional-encoding">
-<h3>5.1 Positional encoding<a class="headerlink" href="#positional-encoding" title="Permalink to this heading">#</a></h3>
-<ul>
-<li><p>Neural network <span class="math notranslate nohighlight">\(F_{\Theta}\)</span> 가 직접 <strong><span class="math notranslate nohighlight">\((x, y, z, \theta, \phi)\)</span> input coordinates</strong> 에서 직접 연산하는 경우, 색상과 형태에서 고주파 변동을 표현하는데 성능이 좋지 않았음</p></li>
-<li><p><a class="reference external" href="https://arxiv.org/abs/1806.08734">[35] On the spectral bias of neural networks</a> 논문 결과와 동일,</p>
-<ul class="simple">
-<li><p>깊은 신경망이 저주파 함수를 학습하는 쪽으로 편향되었음을 보여줌</p></li>
-<li><p>신경망을 통과하기 전 고주파 함수를 사용하여 <strong>입력을 고차원 공간으로 맵핑</strong>하는 것은 고주파 변동이 포함된 데이터를 더 잘 적합 가능하게 함을 제시</p></li>
-<li><p>저자들은 Neural scene representations 에서 위의 결과를 이용</p></li>
-</ul>
-</li>
-<li><p><strong>→ <span class="math notranslate nohighlight">\(F_{\Theta}\)</span> 를 두개의 함수로  구성 <span class="math notranslate nohighlight">\(F_{\Theta}=F_{\Theta}^{\prime} \circ \gamma\)</span>  성능을 상당히 개선 (<span class="math notranslate nohighlight">\(\gamma\)</span> : 학습 X)</strong></p>
-<div class="math notranslate nohighlight">
-\[
-    \gamma(p)=\left(\sin \left(2^0 \pi p\right), \cos \left(2^0 \pi p\right), \cdots, \sin \left(2^{L-1} \pi p\right), \cos \left(2^{L-1} \pi p\right)\right) .
-    \]</div>
-<ul class="simple">
-<li><p><span class="math notranslate nohighlight">\(\gamma\)</span> : mapping <span class="math notranslate nohighlight">\(\mathbb{R}\)</span> → <span class="math notranslate nohighlight">\(\mathbb{R}^{2 L}\)</span>, <span class="math notranslate nohighlight">\(F_{\Theta}^{\prime}\)</span> : Regular MLP</p></li>
-<li><p><span class="math notranslate nohighlight">\(\gamma(\cdot)\)</span> : <span class="math notranslate nohighlight">\(\mathbf{x}\)</span> 의 각 세개의 좌표값과  Cartesian 시점 방향 벡터 <span class="math notranslate nohighlight">\(\mathbf{d}\)</span> 의 세 성분에 <span class="math notranslate nohighlight">\([-1,1]\)</span>사이로 정규화 후 개별적으로 적용에 분리되어 적용됨</p></li>
-<li><p>Experiments : <span class="math notranslate nohighlight">\(L=10\)</span> for <span class="math notranslate nohighlight">\(\gamma(\mathbf{x})\)</span> and <span class="math notranslate nohighlight">\(L=4\)</span> for <span class="math notranslate nohighlight">\(\gamma(\mathbf{d})\)</span></p></li>
-</ul>
-</li>
-</ul>
-</section>
-<section id="hierarchical-volume-sampling">
-<h3>5.2 Hierarchical volume sampling<a class="headerlink" href="#hierarchical-volume-sampling" title="Permalink to this heading">#</a></h3>
-<ul>
-<li><p><strong>Stratified Sampling</strong></p>
-<ul class="simple">
-<li><p>비효율적</p></li>
-<li><p>렌더링된 이미지에 기여하지 않는 여유 공간(비어있는 부분) 막혀있는(가려진) 영역이 여전히 반복적으로 샘플링됨.</p></li>
-</ul>
-</li>
-<li><p><strong>Hierarchical volume sampling</strong></p>
-<ul class="simple">
-<li><p>최종 렌더링에 대한 예상 효과에 비례하여 샘플을 할당</p></li>
-<li><p>렌더링 효율성을 증가시킴</p></li>
-</ul>
-<p><strong>➡️ Content가 더 있을 것 같은 곳을 더 뽑자 !</strong></p>
-</li>
-<li><p>scene 표현을 위해 단순히 단일 네트워크를 사용하는 것 대신에 우리는 동시에 2개의 네트워크를 최적화</p>
-<p><strong>Step 1. Coarse</strong></p>
-<p><strong>Step 2.  Fine</strong></p>
-</li>
-</ul>
-<hr class="docutils" />
-<ol class="arabic">
-<li><p><strong>Coarse</strong></p>
-<p><strong>Stratified sampling</strong> → <span class="math notranslate nohighlight">\(N_c\)</span> 개의 위치 집합을 샘플링, 이 위치에서 <span class="math notranslate nohighlight">\(\hat{C(r)}\)</span> 을 예측하여 <strong>Coarse network</strong> 를  평가</p>
-</li>
-<li><p><strong>Fine</strong></p>
-<ol class="arabic">
-<li><p>1에서 주어진 Coarse 네트워크의 출력을 바탕으로 더 많은 정보에 기반한 포인트 샘플링을 생성 (더 많은 정보에 기반한 포인트 샘플링을 생성)</p></li>
-<li><p>Coarse 네트워크에서의 알파 합성 색상 <span class="math notranslate nohighlight">\(\hat{C}_c(\mathbf{r})\)</span>을 광선을 따라 샘플링된 모든 컬러 <span class="math notranslate nohighlight">\(c_i\)</span>들의 가중합 형태로 다시 씀</p>
-<div class="math notranslate nohighlight">
-\[
-        \hat{C}_c(\mathbf{r})=\sum_{i=1}^{N_c} w_i c_i, \quad w_i=T_i\left(1-\exp \left(-\sigma_i \delta_i\right)\right) .
-        \]</div>
-</li>
-</ol>
-</li>
-<li><p><strong>piecewise-constant PDF</strong></p>
-<p>Normalizing weight 를 통해 생성</p>
-</li>
-</ol>
-<div class="math notranslate nohighlight">
-\[
-\hat{w}i= \dfrac{w_i}{\sum_{j=1}^{N_c} w_j}
-\]</div>
-<ul class="simple">
-<li><p>역변환 샘플링을 통해 확률 밀도함수 값에 기반한 2번째 샘플집합의 샘플 <span class="math notranslate nohighlight">\(N_f\)</span> 개를 샘플링</p></li>
-<li><p>첫 번째와 두 번째 샘플 집합의 합집합에서 fine 네트워크를 평가</p></li>
-<li><p>모든 <span class="math notranslate nohighlight">\(N_c+N_f\)</span> 샘플을 사용하여 광선의 최종 렌더링된 색상 <span class="math notranslate nohighlight">\(\hat{C}_f(\mathbf{r})\)</span> 를 계산</p></li>
-<li><p>이 절차에서는 관측 가능한 content가 포함될 것으로 예상되는 영역에 더 많은 샘플을 할당</p></li>
-</ul>
-</section>
-<section id="implementation-details">
-<h3>5.3 Implementation details<a class="headerlink" href="#implementation-details" title="Permalink to this heading">#</a></h3>
-<ul>
-<li><p><strong>각 Scene 에 대해 네트워크 를 별도로 최적화</strong></p>
-<p>scene이 캡처된 RGB 이미지, extrinsic parameter(해당 카메라 포즈), intrinsic parameter, 장면 경계로 구성된 데이터셋이 필요</p>
-<ul>
-<li><p><strong>extrinsic parameter, intrinsic parameter</strong></p>
-<ul class="simple">
-<li><p><strong>Extrinsic Parameter</strong><br />
-3D 공간 내에서 카메라가 어디에 위치(3D Translation)하고 있고, 어디를 바라보고 있는지(3D Rotation)에 대한 Parameter</p></li>
-<li><p><strong>Intrinsic Parameter</strong>
-카메라 렌즈와 센서 위치에 의해서 결정되어지는 항목으로, 이미지 패널이 얼마나 이동(2D Translation)하고, 얼마나 확대하고(2D Scaling), 얼마나 기울어졌는지(2D Shear) 대한 intrinsic parameter</p></li>
-</ul>
-<figure class="align-default" id="id4">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/Untitled6.png"><img alt="NeRF intrinsic_extrinsic" class="bg-primary mb-1" src="../../_images/Untitled6.png" style="width: 800px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 651 </span><span class="caption-text">intrinsic prameter and extrinsic parameter</span><a class="headerlink" href="#id4" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p>카메라 영상 : 3차원 공간상의 점들을 2차원 이미지 평면에 투사(perspective projection)</p></li>
-</ul>
-</li>
-</ul>
-</li>
-<li><p><strong>Training</strong></p>
-<ol class="arabic simple">
-<li><p>각 최적화 iteration에서 데이터셋의 모든 픽셀 집합에서 카메라 광선 batch를 무작위로 샘플링</p></li>
-<li><p>계층적 샘플링을 따라 coarse 네트워크의 <span class="math notranslate nohighlight">\(N_c\)</span> 개의 샘플과 fine 네트워크의<span class="math notranslate nohighlight">\(N_c + N_f\)</span>개의 샘플을 쿼리</p></li>
-<li><p>volume rendering 절차를 사용하여 두샘플 집합 모두에서 광선의 색상을 렌더링</p></li>
-</ol>
-</li>
-<li><p><strong>Loss</strong>
-coarse 렌더링과 fine 렌더링의 색상 vs 실제 픽셀 색상 간의 총 제곱 오차</p>
-<div class="math notranslate nohighlight">
-\[
-    \mathcal{L}=\sum_{\mathbf{r} \in \mathcal{R}}\left[\left\|\hat{C}_c(\mathbf{r})-C(\mathbf{r})\right\|_2^2+\left\|\hat{C}_f(\mathbf{r})-C(\mathbf{r})\right\|_2^2\right]
-    \]</div>
-<ul class="simple">
-<li><p><span class="math notranslate nohighlight">\(\mathcal{R}\)</span> : 각 batch 의 광선의 집합</p></li>
-<li><p><span class="math notranslate nohighlight">\(C(\mathbf{r})\)</span>  : Ray <span class="math notranslate nohighlight">\(\mathbf{r}\)</span> 에 대한 Ground Truth RGB colors</p></li>
-<li><p><span class="math notranslate nohighlight">\(\hat{C}_c(\mathbf{r})\)</span> : Ray <span class="math notranslate nohighlight">\(\mathbf{r}\)</span> 에 대한 Coarse volume predicted RGB colors</p></li>
-<li><p><span class="math notranslate nohighlight">\(\hat{C}_f(\mathbf{r})\)</span> : Ray <span class="math notranslate nohighlight">\(\mathbf{r}\)</span> 에 대한 Fine volume predicted RGB colors</p></li>
-<li><p>최종 렌더링은  <span class="math notranslate nohighlight">\(\hat{C}_f(\mathbf{r})\)</span> 이지만, <span class="math notranslate nohighlight">\(\hat{C}_c(\mathbf{r})\)</span> 의 Loss 역시 최소화</p>
-<ul>
-<li><p>Coarse 네트워크의 weight 분포가 fine network 의 샘플링의 기반이 되기 때문</p></li>
-</ul>
-</li>
-</ul>
-</li>
-</ul>
-</section>
-</section>
-<section id="experiments-detail">
-<h2>5.4 Experiments detail<a class="headerlink" href="#experiments-detail" title="Permalink to this heading">#</a></h2>
-<ul class="simple">
-<li><p>a batch size of 4096 rays</p></li>
-<li><p>sampling coordinates :</p>
-<ul>
-<li><p><span class="math notranslate nohighlight">\(N_c=64\)</span> in the coarse volume</p></li>
-<li><p><span class="math notranslate nohighlight">\(N_f=128\)</span> in the fine volume</p></li>
-</ul>
-</li>
-<li><p>Optimizer : Adam, lr : <span class="math notranslate nohighlight">\(5 \times 10^{-4}\)</span> →  <span class="math notranslate nohighlight">\(5 \times 10^{-5}\)</span> (exponentially decay learning rate)</p>
-<ul>
-<li><p>Default : <span class="math notranslate nohighlight">\(\beta_1=0.9, \beta_2=0.999\)</span>,</p></li>
-</ul>
-</li>
-<li><p>iteration: 한 장면 당 10~30만 iter (NVIDIA V100 GPU 1개로 1~2일 소요)</p></li>
-</ul>
-</section>
-<section id="results">
-<h2>6. Results<a class="headerlink" href="#results" title="Permalink to this heading">#</a></h2>
-<section id="datasets">
-<h3>6.1 Datasets<a class="headerlink" href="#datasets" title="Permalink to this heading">#</a></h3>
-<ul class="simple">
-<li><p><strong>Synthetic renderings of object</strong></p></li>
-</ul>
-<figure class="align-default" id="id5">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/Untitled8.png"><img alt="Diffuse Synthetic" class="bg-primary mb-1" src="../../_images/Untitled8.png" style="width: 800px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 652 </span><span class="caption-text">Diffuse Synthetic : Lambertian, Realistic Synthetic : non-Lambertian</span><a class="headerlink" href="#id5" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ol class="arabic simple">
-<li><p><strong>Diffuse Synthetic</strong> <span class="math notranslate nohighlight">\(360 \degree\)</span></p>
-<ol class="arabic simple">
-<li><p>총 4개의 Lambertian 물체가 간단한 geometry로 구성</p></li>
-<li><p>object : <strong>512×512</strong></p></li>
-<li><p>상반구에 대한 viewpoint 를 렌더링</p></li>
-<li><p>Train : 479, Test : 1000</p></li>
-</ol>
-</li>
-<li><p><strong>Real Synthetic</strong> <span class="math notranslate nohighlight">\(360 \degree\)</span>, <strong>Forward-Facing</strong></p>
-<ol class="arabic simple">
-<li><p>총 8개의 non-Lambertian 물체 8개,</p></li>
-<li><p>각각의 pathtraced image 를 포함한 형태의 데이터 셋을 구성</p></li>
-<li><p>object : <strong>800×800</strong></p></li>
-<li><p>6 Scenes : 상반구에 대한 viewpoint 를 렌더링, 2 Scenes :  구 전체에 대한 viewpoint 를 렌더링</p></li>
-<li><p>Train : 100, Test : 200</p></li>
-</ol>
-</li>
-<li><p><strong>Real Forward-Facing</strong></p>
-<ol class="arabic simple">
-<li><p>복잡한 형태의 현실 scene을 앞쪽에서 본 모습을 사용</p></li>
-<li><p>총 8개의 scene, (5 scenes : LLFF paper 3 scenes : 직접 캡처)</p></li>
-<li><p>object : <strong><span class="math notranslate nohighlight">\(1008 \times 756\)</span></strong></p></li>
-<li><p>Train : Test = 7 : 1</p></li>
-</ol>
-</li>
-</ol>
-</section>
-<section id="comparisons">
-<h3>6.2 Comparisons<a class="headerlink" href="#comparisons" title="Permalink to this heading">#</a></h3>
-<ul class="simple">
-<li><p><strong>Models</strong></p>
-<ul>
-<li><p><strong>Neural Volumes (NV)</strong></p></li>
-<li><p><strong>Scene Representation Networks (SRN)</strong></p></li>
-<li><p><strong>Local Light Field Fusion (LLFF)</strong></p></li>
-</ul>
-</li>
-</ul>
-</section>
-<section id="discussion">
-<h3>6.3 Discussion<a class="headerlink" href="#discussion" title="Permalink to this heading">#</a></h3>
-<ol class="arabic simple">
-<li><p>comparison : Diffuse Synthetic : Lambertian, Realistic Synthetic : non-Lambertian</p></li>
-</ol>
-<ul>
-<li><p><span class="math notranslate nohighlight">\(\text{Nerf}\)</span> : 미세 디테일, 기하학적 구조, 외양, nonLambertian 반사 반영</p></li>
-<li><p><span class="math notranslate nohighlight">\(\text{LLFF}\)</span> :  ghosting artifact (ship, lego)</p></li>
-<li><p><span class="math notranslate nohighlight">\(\text{SRN}\)</span> : blurry and distorted rendering</p></li>
-<li><p><span class="math notranslate nohighlight">\(\text{NV}\)</span> : detail 및 기하적 구조 반영 실패</p>
-<figure class="align-default" id="id6">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/Untitled9.png"><img alt="Diffuse Synthetic" class="bg-primary mb-1" src="../../_images/Untitled9.png" style="width: 800px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 653 </span><span class="caption-text">Diffuse Synthetic : Lambertian, Realistic Synthetic : non-Lambertian</span><a class="headerlink" href="#id6" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</li>
-<li><p><strong>Ghosting :</strong> 렌더링에서의 객체 겹침 혹은 번짐</p></li>
-<li><p><strong>Lambertian :</strong> 모든 각도에서 동일한 밝기</p></li>
-<li><p><strong>Non-Lambertian :</strong> 각도에 따라 밝기와 색상 변화 / 광택, 반사, 투명도 등을 가짐</p></li>
-</ul>
-<ol class="arabic" start="2">
-<li><p>comparison : reconstruction partially occluded regions</p>
-<figure class="align-default" id="id7">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/Untitled10.png"><img alt="Diffuse Synthetic" class="bg-primary mb-1" src="../../_images/Untitled10.png" style="width: 800px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 654 </span><span class="caption-text">NeRF also correctly reconstructs partially occluded regions</span><a class="headerlink" href="#id7" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</li>
-</ol>
-</section>
-<section id="ablation-studies">
-<h3>6.4 Ablation studies<a class="headerlink" href="#ablation-studies" title="Permalink to this heading">#</a></h3>
-<ul>
-<li><p>Realistic Synthetic 360도 scene</p></li>
-<li><p>위치 인코딩(PE), 시점 의존성(VD), 계층적 샘플링(H)</p></li>
-<li><p>최대 주파수 <span class="math notranslate nohighlight">\(L\)</span> 의 선택</p>
-<ul class="simple">
-<li><p>5→10 (성능 향상), 10→15 (성능 감소)</p></li>
-<li><p><span class="math notranslate nohighlight">\(2^L\)</span> 이 샘플링 된 입력 이미지에서 존재하는 최대 주파수(본 데이터는 1024)를 초과할 때  추가적인 성능 향상에 제한</p></li>
-</ul>
-<figure class="align-default" id="id8">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/Untitled111.png"><img alt="ablation study" class="bg-primary mb-1" src="../../_images/Untitled111.png" style="width: 800px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 655 </span><span class="caption-text">ablation study</span><a class="headerlink" href="#id8" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</li>
-</ul>
-</section>
-</section>
-<hr class="docutils" />
-<section id="appendix-a-additional-implementation-details">
-<h2>(Appendix) A. Additional Implementation Details<a class="headerlink" href="#appendix-a-additional-implementation-details" title="Permalink to this heading">#</a></h2>
-<ol class="arabic">
-<li><p><strong>Volume Bounds</strong>
-For experiments with synthetic images, we scale the scene so that it lies within a <strong>cube of
-side length 2 centered at the origin</strong>, and only query the representation within this bounding volume. we use normalized device coordinates <strong>to map the depth range of these points into [−1, 1]</strong>.</p></li>
-<li><p><strong>Training Details</strong>
-adding random Gaussian noise with zero mean and unit variance to the <strong>output σ values</strong> during optimization</p></li>
-<li><p><strong>Rendering Details</strong></p>
-<figure class="align-default" id="id9">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/Untitled3.png"><img alt="NeRF architecture" class="bg-primary mb-1" src="../../_images/Untitled3.png" style="width: 800px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 656 </span><span class="caption-text">fully-connected network architecture \  (source: {<a class="reference external" href="https://arxiv.org/pdf/2003.08934v2">https://arxiv.org/pdf/2003.08934v2</a>})</span><a class="headerlink" href="#id9" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</li>
-</ol>
-<ul class="simple">
-<li><p>Coarse network  64 + fine network 128 = 192</p></li>
-<li><p>fully-connected network 구조</p></li>
-<li><p>positional encoding이 더해진 형태의 위치 정보**<span class="math notranslate nohighlight">\((\gamma(x))\)</span>** 를 input으로 투입</p></li>
-<li><p>256 채널과 ReLU로 엮인 총 8개의 네트워크를 통과하게 된다. 해당 논문에서는 DeepSDF 구조를 따르고, skip connection을 5번째 layer의 activation에  투입</p></li>
-<li><p>추가 레이어는 volume density 를 output으로 산출</p></li>
-</ul>
-</section>
-</section>
-
-    <script type="text/x-thebe-config">
-    {
-        requestKernel: true,
-        binderOptions: {
-            repo: "binder-examples/jupyter-stacks-datascience",
-            ref: "master",
-        },
-        codeMirrorConfig: {
-            theme: "abcdef",
-            mode: "python"
-        },
-        kernelOptions: {
-            name: "python3",
-            path: "./docs/review"
-        },
-        predefinedOutput: true
-    }
-    </script>
-    <script>kernelName = 'python3'</script>
-
-                </article>
-              
-
-              
-              
-              
-              
-                <footer class="prev-next-footer d-print-none">
-                  
-<div class="prev-next-area">
-    <a class="left-prev"
-       href="DreamPose.html"
-       title="previous page">
-      <i class="fa-solid fa-angle-left"></i>
-      <div class="prev-next-info">
-        <p class="prev-next-subtitle">previous</p>
-        <p class="prev-next-title">DreamPose: Fashion Image-to-Video Synthesis via Stable Diffusion</p>
-      </div>
-    </a>
-    <a class="right-next"
-       href="3DGS.html"
-       title="next page">
-      <div class="prev-next-info">
-        <p class="prev-next-subtitle">next</p>
-        <p class="prev-next-title">3D Gaussian Splatting for Real-Time Radiance Field Rendering</p>
-      </div>
-      <i class="fa-solid fa-angle-right"></i>
-    </a>
-</div>
-                </footer>
-              
-            </div>
-            
-            
-              
-                <div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
-
-
-  <div class="sidebar-secondary-item">
-  <div class="page-toc tocsection onthispage">
-    <i class="fa-solid fa-list"></i> Contents
-  </div>
-  <nav class="bd-toc-nav page-toc">
-    <ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#abstract">0. Abstract</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">1. Introduction</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#related-work">2. Related Work</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#neural-radiance-field-scene-representation">3. Neural Radiance Field Scene Representation</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#volume-rendering-with-radiance-fields">4. Volume Rendering with Radiance Fields</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#optimizing-a-neural-radiance-field">5. Optimizing a Neural Radiance Field</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#positional-encoding">5.1 Positional encoding</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#hierarchical-volume-sampling">5.2 Hierarchical volume sampling</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#implementation-details">5.3 Implementation details</a></li>
-</ul>
-</li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#experiments-detail">5.4 Experiments detail</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#results">6. Results</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#datasets">6.1 Datasets</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#comparisons">6.2 Comparisons</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#discussion">6.3 Discussion</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#ablation-studies">6.4 Ablation studies</a></li>
-</ul>
-</li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#appendix-a-additional-implementation-details">(Appendix) A. Additional Implementation Details</a></li>
-</ul>
-  </nav></div>
-
-</div></div>
-              
-            
-          </div>
-          <footer class="bd-footer-content">
-            
-<div class="bd-footer-content__inner container">
-  
-  <div class="footer-item">
-    
-<p class="component-author">
-By PseudoLab
-</p>
-
-  </div>
-  
-  <div class="footer-item">
-    
-
-  <p class="copyright">
-    
-      © Copyright 2022.
-      <br/>
-    
-  </p>
-
-  </div>
-  
-  <div class="footer-item">
-    
-  </div>
-  
-  <div class="footer-item">
-    
-  </div>
-  
-</div>
-          </footer>
-        
-
-      </main>
-    </div>
-  </div>
-  
-  <!-- Scripts loaded after <body> so the DOM is not blocked -->
-  <script src="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b"></script>
-<script src="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b"></script>
-
-  <footer class="bd-footer">
-  </footer>
-  </body>
+
+
+<!DOCTYPE html>
+
+
+<html lang="en" data-content_root="" >
+
+  <head>
+    <meta charset="utf-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.17.1: http://docutils.sourceforge.net/" />
+
+    <title>NeRF : Representing Scenes as Neural Radiance Fields for View Synthesis &#8212; Text-to-Image Generation-feat-Diffusion</title>
+  
+  
+  
+  <script data-cfasync="false">
+    document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
+    document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
+  </script>
+  
+  <!-- Loaded before other Sphinx assets -->
+  <link href="../../_static/styles/theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+<link href="../../_static/styles/bootstrap.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+<link href="../../_static/styles/pydata-sphinx-theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+
+  
+  <link href="../../_static/vendor/fontawesome/6.5.2/css/all.min.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+  <link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.woff2" />
+<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.woff2" />
+<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.woff2" />
+
+    <link rel="stylesheet" type="text/css" href="../../_static/pygments.css" />
+    <link rel="stylesheet" href="../../_static/styles/sphinx-book-theme.css?digest=14f4ca6b54d191a8c7657f6c759bf11a5fb86285" type="text/css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/togglebutton.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/mystnb.4510f1fc1dee50b3e5859aac5469c37c29e427902b24a333a5f9fcb2f0b3ac41.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/sphinx-thebe.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/design-style.4045f2051d55cab465a707391d5b2007.min.css" />
+  
+  <!-- Pre-loaded scripts that we'll load fully later -->
+  <link rel="preload" as="script" href="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b" />
+<link rel="preload" as="script" href="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b" />
+  <script src="../../_static/vendor/fontawesome/6.5.2/js/all.min.js?digest=dfe6caa3a7d634c4db9b"></script>
+
+    <script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
+    <script src="../../_static/jquery.js"></script>
+    <script src="../../_static/underscore.js"></script>
+    <script src="../../_static/_sphinx_javascript_frameworks_compat.js"></script>
+    <script src="../../_static/doctools.js"></script>
+    <script src="../../_static/clipboard.min.js"></script>
+    <script src="../../_static/copybutton.js"></script>
+    <script src="../../_static/scripts/sphinx-book-theme.js?digest=5a5c038af52cf7bc1a1ec88eea08e6366ee68824"></script>
+    <script>let toggleHintShow = 'Click to show';</script>
+    <script>let toggleHintHide = 'Click to hide';</script>
+    <script>let toggleOpenOnPrint = 'true';</script>
+    <script src="../../_static/togglebutton.js"></script>
+    <script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
+    <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
+    <script src="../../_static/design-tabs.js"></script>
+    <script>const THEBE_JS_URL = "https://unpkg.com/thebe@0.8.2/lib/index.js"
+const thebe_selector = ".thebe,.cell"
+const thebe_selector_input = "pre"
+const thebe_selector_output = ".output, .cell_output"
+</script>
+    <script async="async" src="../../_static/sphinx-thebe.js"></script>
+    <script>window.MathJax = {"options": {"processHtmlClass": "tex2jax_process|mathjax_process|math|output_area"}}</script>
+    <script defer="defer" src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
+    <script>DOCUMENTATION_OPTIONS.pagename = 'docs/review/NeRF';</script>
+    <link rel="shortcut icon" href="../../_static/PseudoLab_logo.png"/>
+    <link rel="index" title="Index" href="../../genindex.html" />
+    <link rel="search" title="Search" href="../../search.html" />
+    <link rel="next" title="3D Gaussian Splatting for Real-Time Radiance Field Rendering" href="3DGS.html" />
+    <link rel="prev" title="DreamPose: Fashion Image-to-Video Synthesis via Stable Diffusion" href="DreamPose.html" />
+  <meta name="viewport" content="width=device-width, initial-scale=1"/>
+  <meta name="docsearch:language" content="en"/>
+  </head>
+  
+  
+  <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
+
+  
+  
+  <div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
+  
+  <div id="pst-scroll-pixel-helper"></div>
+  
+  <button type="button" class="btn rounded-pill" id="pst-back-to-top">
+    <i class="fa-solid fa-arrow-up"></i>Back to top</button>
+
+  
+  <input type="checkbox"
+          class="sidebar-toggle"
+          id="pst-primary-sidebar-checkbox"/>
+  <label class="overlay overlay-primary" for="pst-primary-sidebar-checkbox"></label>
+  
+  <input type="checkbox"
+          class="sidebar-toggle"
+          id="pst-secondary-sidebar-checkbox"/>
+  <label class="overlay overlay-secondary" for="pst-secondary-sidebar-checkbox"></label>
+  
+  <div class="search-button__wrapper">
+    <div class="search-button__overlay"></div>
+    <div class="search-button__search-container">
+<form class="bd-search d-flex align-items-center"
+      action="../../search.html"
+      method="get">
+  <i class="fa-solid fa-magnifying-glass"></i>
+  <input type="search"
+         class="form-control"
+         name="q"
+         id="search-input"
+         placeholder="Search this book..."
+         aria-label="Search this book..."
+         autocomplete="off"
+         autocorrect="off"
+         autocapitalize="off"
+         spellcheck="false"/>
+  <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
+</form></div>
+  </div>
+
+  <div class="pst-async-banner-revealer d-none">
+  <aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
+</div>
+
+  
+    <header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
+    </header>
+  
+
+  <div class="bd-container">
+    <div class="bd-container__inner bd-page-width">
+      
+      
+      
+      <div class="bd-sidebar-primary bd-sidebar">
+        
+
+  
+  <div class="sidebar-header-items sidebar-primary__section">
+    
+    
+    
+    
+  </div>
+  
+    <div class="sidebar-primary-items__start sidebar-primary__section">
+        <div class="sidebar-primary-item">
+
+  
+    
+  
+
+<a class="navbar-brand logo" href="../../intro.html">
+  
+  
+  
+  
+  
+    
+    
+      
+    
+    
+    <img src="../../_static/PseudoLab_logo.png" class="logo__image only-light" alt="Text-to-Image Generation-feat-Diffusion - Home"/>
+    <script>document.write(`<img src="../../_static/PseudoLab_logo.png" class="logo__image only-dark" alt="Text-to-Image Generation-feat-Diffusion - Home"/>`);</script>
+  
+  
+</a></div>
+        <div class="sidebar-primary-item"><nav class="bd-links" id="bd-docs-nav" aria-label="Main">
+    <div class="bd-toc-item navbar-nav active">
+        
+        <ul class="nav bd-sidenav bd-sidenav__home-link">
+            <li class="toctree-l1">
+                <a class="reference internal" href="../../intro.html">
+                    Welcome to PseudoDiffusers!!
+                </a>
+            </li>
+        </ul>
+        <p aria-level="2" class="caption" role="heading"><span class="caption-text">Preliminary Works</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="vae.html">VAE</a></li>
+<li class="toctree-l1"><a class="reference internal" href="gan.html">GAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DDPM.html">DDPM</a></li>
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DDIM.html">DDIM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="A_Study_on_the_Evaluation_of_Generative_Models.html">A Study on the Evaluation of Generative Models</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Image Generation</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="cycleGAN.html">CycleGAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="StyleGAN.html">StyleGAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="diffusion_beats_GANs.html">Diffusion Models Beat GANs on Image Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="dalle.html">DALL-E</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DALLE2.html">DALL-E 2</a></li>
+<li class="toctree-l1"><a class="reference internal" href="dreambooth.html">DreamBooth</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ControlNet.html">ControlNet</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Latent_Diffusion_Model.html">Introduction</a></li>
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Textual_Inversion.html">Textual Inversion</a></li>
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="CustomDiffusion.html">Custom Diffusion</a></li>
+<li class="toctree-l1"><a class="reference internal" href="LoRA.html">LoRA</a></li>
+
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="I-DDPM.html">I-DDPM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="StyO.html">StyO</a></li>
+<li class="toctree-l1"><a class="reference internal" href="imagen.html">Imagen</a></li>
+<li class="toctree-l1"><a class="reference internal" href="imagen_editor.html">Imagen Editor</a></li>
+<li class="toctree-l1"><a class="reference internal" href="SDEdit.html">SDEdit</a></li>
+<li class="toctree-l1"><a class="reference internal" href="SDXL.html">SDXL</a></li>
+<li class="toctree-l1"><a class="reference internal" href="t2i_adapter.html">T2I-Adapter</a></li>
+<li class="toctree-l1"><a class="reference internal" href="IP_Adapter.html">IP-Adapter</a></li>
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="HyperDreamBooth.html">HyperDreamBooth</a></li>
+<li class="toctree-l1"><a class="reference internal" href="CM3leon.html">CM3leon</a></li>
+
+<li class="toctree-l1"><a class="reference internal" href="Synthetic_Data_from_Diffusion_Models_Improves_ImageNet_Classification.html">Synthetic Data from Diffusion Models Improves ImageNet Classification</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="GLIDE.html">GLIDE</a></li>
+<li class="toctree-l1"><a class="reference internal" href="BBDM.html">BBDM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Your_Diffusion_Model_is_Secretly_a_Zero_Shot_Classifier.html">Your Diffusion Model is Secretly a Zero-Shot Classifier</a></li>
+<li class="toctree-l1"><a class="reference internal" href="progressive_distillation.html">Progressive Distillation for Fast Sampling of Diffusion Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ConceptLab.html">ConceptLab</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Diffusion_models_already_have_a_Semantic_Latent_Space.html">Diffusion Models already have a Semantic Latent Space</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Muse.html">Muse</a></li>
+
+
+<li class="toctree-l1"><a class="reference internal" href="GIGAGAN.html">Scaling up GANs for Text-to-Image Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="consistency_models.html">Consistency Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="latent_consistency_models.html">Latent Consistency Models</a></li>
+
+<li class="toctree-l1"><a class="reference internal" href="LLM_grounded_Diffusion.html">LLM Grounded Diffusion</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DiT.html">DiT</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="one-step-image-translation.html">One-Step Image Translation with Text-to-Image Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="LCM-LoRA.html">LCM-LoRA: A Universal Stable-Diffusion Acceleration Module</a></li>
+
+
+<li class="toctree-l1"><a class="reference internal" href="MimicBrush.html">MimicBrush: Zero-shot Image Editing with Reference Imitation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="one_step_diffusion_with_distribution_matching_distillation.html">One-step Diffusion with Distribution Matching Distillation</a></li>
+
+
+
+
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Video Generation</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="Make_A_Video.html">Make A Video</a></li>
+<li class="toctree-l1"><a class="reference internal" href="VideoLDM.html">VideoLDM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="AnimateDiff.html">AnimateDiff</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Animate_Anyone.html">Animate Anyone</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreaMoving.html">DreaMoving</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreamPose.html">DreamPose: Fashion Image-to-Video Synthesis via Stable Diffusion</a></li>
+
+
+
+
+
+
+
+
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">3D Generation</span></p>
+<ul class="current nav bd-sidenav">
+<li class="toctree-l1 current active"><a class="current reference internal" href="#">NeRF : Representing Scenes as Neural Radiance Fields for View Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="3DGS.html">3D Gaussian Splatting for Real-Time Radiance Field Rendering</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Point_E.html">Point-E: A System for Generating 3D Point Clouds from Complex Prompts (Arxiv 2022)</a></li>
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Shap-E.html">Shap-E</a></li>
+
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DreamFusion.html"><strong>DreamFusion</strong></a></li>
+<li class="toctree-l1"><a class="reference internal" href="magic-3d.html">Magic3D</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreamBooth3D.html">Dream Booth 3D</a></li>
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="zero123plus.html">Zero123++</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ProlificDreamer.html">ProlificDreamer: High-Fidelity and Diverse Text-to-3D Generation with Variational Score Distillation</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DreamGaussian.html">DreamGaussian</a></li>
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Coin3D.html">Coin3D</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Experiments</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="../experiments/js_exp.html">Synthetic Data with Stable Diffusion for Foliar Disease Classification</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../experiments/swjo_exp.html">Training DreamBooth on Naver Webtoon Face Dataset</a></li>
+</ul>
+
+    </div>
+</nav></div>
+    </div>
+  
+  
+  <div class="sidebar-primary-items__end sidebar-primary__section">
+  </div>
+  
+  <div id="rtd-footer-container"></div>
+
+
+      </div>
+      
+      <main id="main-content" class="bd-main" role="main">
+        
+        
+
+<div class="sbt-scroll-pixel-helper"></div>
+
+          <div class="bd-content">
+            <div class="bd-article-container">
+              
+              <div class="bd-header-article d-print-none">
+<div class="header-article-items header-article__inner">
+  
+    <div class="header-article-items__start">
+      
+        <div class="header-article-item"><label class="sidebar-toggle primary-toggle btn btn-sm" for="__primary" title="Toggle primary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
+  <span class="fa-solid fa-bars"></span>
+</label></div>
+      
+    </div>
+  
+  
+    <div class="header-article-items__end">
+      
+        <div class="header-article-item">
+
+<div class="article-header-buttons">
+
+
+
+
+
+<div class="dropdown dropdown-source-buttons">
+  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Source repositories">
+    <i class="fab fa-github"></i>
+  </button>
+  <ul class="dropdown-menu">
+      
+      
+      
+      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion" target="_blank"
+   class="btn btn-sm btn-source-repository-button dropdown-item"
+   title="Source repository"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fab fa-github"></i>
+  </span>
+<span class="btn__text-container">Repository</span>
+</a>
+</li>
+      
+      
+      
+      
+      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion/issues/new?title=Issue%20on%20page%20%2Fdocs/review/NeRF.html&body=Your%20issue%20content%20here." target="_blank"
+   class="btn btn-sm btn-source-issues-button dropdown-item"
+   title="Open an issue"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-lightbulb"></i>
+  </span>
+<span class="btn__text-container">Open issue</span>
+</a>
+</li>
+      
+  </ul>
+</div>
+
+
+
+
+
+
+<div class="dropdown dropdown-download-buttons">
+  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Download this page">
+    <i class="fas fa-download"></i>
+  </button>
+  <ul class="dropdown-menu">
+      
+      
+      
+      <li><a href="../../_sources/docs/review/NeRF.md" target="_blank"
+   class="btn btn-sm btn-download-source-button dropdown-item"
+   title="Download source file"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-file"></i>
+  </span>
+<span class="btn__text-container">.md</span>
+</a>
+</li>
+      
+      
+      
+      
+      <li>
+<button onclick="window.print()"
+  class="btn btn-sm btn-download-pdf-button dropdown-item"
+  title="Print to PDF"
+  data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-file-pdf"></i>
+  </span>
+<span class="btn__text-container">.pdf</span>
+</button>
+</li>
+      
+  </ul>
+</div>
+
+
+
+
+<button onclick="toggleFullScreen()"
+  class="btn btn-sm btn-fullscreen-button"
+  title="Fullscreen mode"
+  data-bs-placement="bottom" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-expand"></i>
+  </span>
+
+</button>
+
+
+
+<script>
+document.write(`
+  <button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light"></i>
+    <i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark"></i>
+    <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"></i>
+  </button>
+`);
+</script>
+
+
+<script>
+document.write(`
+  <button class="btn btn-sm pst-navbar-icon search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <i class="fa-solid fa-magnifying-glass fa-lg"></i>
+  </button>
+`);
+</script>
+<label class="sidebar-toggle secondary-toggle btn btn-sm" for="__secondary"title="Toggle secondary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <span class="fa-solid fa-list"></span>
+</label>
+</div></div>
+      
+    </div>
+  
+</div>
+</div>
+              
+              
+
+<div id="jb-print-docs-body" class="onlyprint">
+    <h1>NeRF : Representing Scenes as Neural Radiance Fields for View Synthesis</h1>
+    <!-- Table of contents -->
+    <div id="print-main-content">
+        <div id="jb-print-toc">
+            
+            <div>
+                <h2> Contents </h2>
+            </div>
+            <nav aria-label="Page">
+                <ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#abstract">0. Abstract</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">1. Introduction</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#related-work">2. Related Work</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#neural-radiance-field-scene-representation">3. Neural Radiance Field Scene Representation</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#volume-rendering-with-radiance-fields">4. Volume Rendering with Radiance Fields</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#optimizing-a-neural-radiance-field">5. Optimizing a Neural Radiance Field</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#positional-encoding">5.1 Positional encoding</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#hierarchical-volume-sampling">5.2 Hierarchical volume sampling</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#implementation-details">5.3 Implementation details</a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#experiments-detail">5.4 Experiments detail</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#results">6. Results</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#datasets">6.1 Datasets</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#comparisons">6.2 Comparisons</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#discussion">6.3 Discussion</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#ablation-studies">6.4 Ablation studies</a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#appendix-a-additional-implementation-details">(Appendix) A. Additional Implementation Details</a></li>
+</ul>
+            </nav>
+        </div>
+    </div>
+</div>
+
+              
+                
+<div id="searchbox"></div>
+                <article class="bd-article">
+                  
+  <div class="admonition-information admonition">
+<p class="admonition-title">Information</p>
+<ul class="simple">
+<li><p><strong>Title:</strong> NeRF: Representing Scenes as Neural Radiance Fields for View Synthesis</p></li>
+<li><p><strong>Reference</strong></p>
+<ul>
+<li><p>Paper:  <a class="reference external" href="https://arxiv.org/abs/2003.08934">https://arxiv.org/abs/2003.08934</a></p></li>
+<li><p>Project: <a class="github reference external" href="https://github.com/bmild/nerf">bmild/nerf</a></p></li>
+</ul>
+</li>
+<li><p><strong>Author:</strong> Jeongin Lee</p></li>
+<li><p><strong>Last updated on May. 22, 2024</strong></p></li>
+</ul>
+</div>
+<section class="tex2jax_ignore mathjax_ignore" id="nerf-representing-scenes-as-neural-radiance-fields-for-view-synthesis">
+<h1>NeRF : Representing Scenes as Neural Radiance Fields for View Synthesis<a class="headerlink" href="#nerf-representing-scenes-as-neural-radiance-fields-for-view-synthesis" title="Permalink to this heading">#</a></h1>
+<p><a class="reference external" href="https://www.youtube.com/watch?v=JuH79E8rdKc"><img alt="NeRF" src="http://img.youtube.com/vi/JuH79E8rdKc/0.jpg" /></a></p>
+<ul class="simple">
+<li><p>기존의 3D object 자체를 구성하여 렌더링하는 explicit method → 저장 용량이 많이 소요</p></li>
+<li><p>NeRF 는 3D object 자체를 구성하지 않는, <strong>synthesizing novel views</strong>
+좌표를 mlp에 넣어 픽셀 별 색상 및 밀도 값을 얻는 implicit method</p></li>
+<li><p><strong>synthesizing novel views</strong><br />
+특정한 장면(Scene)에서 여러 각도로 찍은 일부의 사진들을 가지고 완전 새로운 각도의 모습을 유추하는 task</p></li>
+</ul>
+<section id="abstract">
+<h2>0. Abstract<a class="headerlink" href="#abstract" title="Permalink to this heading">#</a></h2>
+<ul>
+<li><p><strong>NeRF</strong></p>
+<ul class="simple">
+<li><p>한정된 수의 입력 뷰 이미지들을 사용</p></li>
+<li><p>continous volumetric scene 함수 최적화를 통해  <strong>synthesizing novel views</strong> 에서 SOTA 달성</p></li>
+</ul>
+</li>
+<li><p><strong>Algorithm</strong></p>
+<ul class="simple">
+<li><p><strong>FC layer 사용 (non-convolutional)</strong></p>
+<ul>
+<li><p><strong>input</strong>  : 5 차원 좌표 (공간적 위치<span class="math notranslate nohighlight">\((x, y, z)\)</span> &amp; 바라보는 방향<span class="math notranslate nohighlight">\((\theta, \phi))\)</span></p></li>
+<li><p><strong>output</strong> : volume density와 해당 방향에 대한 색상 값</p></li>
+</ul>
+</li>
+<li><p>5 차원 좌표 입력 → 카메라 광선을 따라 RGB 값, Volume density 예측
+→ 고전적 Volume rendering 기술을 사용하여 image 로 합성</p></li>
+</ul>
+</li>
+<li><p>복잡한 구조 및 외형을 갖는 scene 에 대한 <strong>Novel views rendering</strong> 을 위해 <strong>NeRF</strong> 를 최적화하는 방법을 제시 (+ Positional Encoding, Hierarchical volume sampling)</p></li>
+<li><p>실험을 통해 기존 작업을 능가하는 결과를 입증</p></li>
+<li><p><strong>Keywords :</strong> scene representation, view synthesis, image-based rendering,
+volume rendering, 3D deep learning</p>
+<figure class="align-default" id="id1">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/Untitled2.png"><img alt="NeRF" class="bg-primary mb-1" src="../../_images/Untitled2.png" style="width: 800px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 660 </span><span class="caption-text">method that optimizes a continuous 5D neural radiance field representation \  (source: {<a class="reference external" href="https://arxiv.org/pdf/2003.08934v2">https://arxiv.org/pdf/2003.08934v2</a>})</span><a class="headerlink" href="#id1" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</li>
+</ul>
+</section>
+<section id="introduction">
+<h2>1. Introduction<a class="headerlink" href="#introduction" title="Permalink to this heading">#</a></h2>
+<p>캡처된 이미지들의 렌더링 오차를 최소화하기 위해 연속적인 <span class="math notranslate nohighlight">\(5 \mathrm{D}\)</span> scene 함수의 파라미터를 직접 최적화하여 View synthesis 분야의 오랜 문제를 새로운 방식으로 해결함</p>
+<hr class="docutils" />
+<ul class="simple">
+<li><p><strong>정적 장면 → 연속적인 <span class="math notranslate nohighlight">\(5 \mathrm{D}\)</span> 함수로 표현</strong></p>
+<ul>
+<li><p>FC layer = Regression Function  :
+a single <span class="math notranslate nohighlight">\(5 \mathrm{D}\)</span> coord <span class="math notranslate nohighlight">\((x, y, z, \theta, \phi)\)</span> → density, view-dependent RGB color</p></li>
+</ul>
+</li>
+<li><p><strong>Output</strong></p>
+<ul>
+<li><p>공간 상의 각 지점 <span class="math notranslate nohighlight">\((x, y, z)\)</span>에서 각 방향 <span class="math notranslate nohighlight">\((\theta, \phi)\)</span> 으로 방출된 색상</p></li>
+<li><p>각 지점 <span class="math notranslate nohighlight">\((x, y, z)\)</span> 의 밀도(density) = <span class="math notranslate nohighlight">\(\sigma\)</span></p>
+<ul>
+<li><p>밀도의 누적값을 통해 얼마나 많은 빛이 <span class="math notranslate nohighlight">\((𝑥,𝑦,𝑧)\)</span> 를 통과하는 광선에 의해 누적되는지를 표현</p></li>
+</ul>
+</li>
+</ul>
+</li>
+</ul>
+<hr class="docutils" />
+<ul class="simple">
+<li><p><strong>특정 시점으로부터의 NeRF 렌더링</strong></p>
+<ol class="arabic simple">
+<li><p>광선을 따라 이동하여 샘플링된 <span class="math notranslate nohighlight">\(3 \mathrm{D}\)</span> 포인트 집합을 생성</p></li>
+<li><p>해당 포인트들과 이에 해당하는 <span class="math notranslate nohighlight">\(2 \mathrm{D}\)</span> 시점 방향을 신경망에 대한 입력으로 사용하여 색상과 밀도의 집합을 생성</p></li>
+<li><p>고전적 Volume rendering 기술을 사용하여 <span class="math notranslate nohighlight">\(2 \mathrm{D}\)</span> image 로 합성</p></li>
+</ol>
+</li>
+</ul>
+<hr class="docutils" />
+<ul class="simple">
+<li><p><strong>Optimization</strong></p>
+<ul>
+<li><p>미분 가능, gradient descent 를 통한 최적화</p></li>
+<li><p>각 관찰된 이미지와 렌더링된 해당 <strong>views</strong>사이의 오차를 최소화</p></li>
+<li><p>다양한 views 에서 오차 최소화를 통해 실제 장면의 cotents 가 포함된 위치에 <strong>높은 밀도</strong>와 <strong>정확한 색상</strong>을 할당하여 장면의 일관된 모델을 예측</p></li>
+</ul>
+</li>
+</ul>
+<hr class="docutils" />
+<ul class="simple">
+<li><p><strong>NeRF 최적화의 Basic implementation의 한계 및 대안</strong></p>
+<ol class="arabic simple">
+<li><p><strong>복잡한 장면에 대해서 충분히 고해상도 표현으로 수렴되지 않음</strong></p>
+<ul>
+<li><p>positional encoding 으로 입력 5D 좌표를 변환</p></li>
+<li><p>MLP가 더 높은 주파수의 함수를 나타낼 수 있음.</p></li>
+</ul>
+</li>
+<li><p><strong>카메라 광선당 요구되는 샘플링 수가 비효율적</strong></p>
+<ul>
+<li><p>계층적 샘플링 절차를 제안</p></li>
+<li><p>고주파수의 장면 표현을 적절하게 샘플링하기 위해 필요한 쿼리 수를 감소시킴</p></li>
+</ul>
+</li>
+</ol>
+</li>
+</ul>
+<hr class="docutils" />
+<ul class="simple">
+<li><p><strong>본 논문의 접근 방식은 volumetric 표현의 이점을 상속</strong></p>
+<ul>
+<li><p>복잡한 실세계의 기하학적 형태와 외형을 표현 가능</p></li>
+<li><p>투영된 이미지를 사용한 Gradient-based 최적화에 적합</p></li>
+<li><p>고해상도에서 복잡한 장면을 모델링할 때 이산화된 복셀 그리드의 엄청난 저장 비용을 극복</p></li>
+<li><p><strong>Voxel (Volume + Pixel)</strong>
+3차원 공간에서 체적의 기본 단위 (2차원의 경우에선 pixe)
+위치 정보와 함께 밀도, 색상, 투과성 등의 속성을 가질 수 있음</p></li>
+<li><p><strong>Volumne Rendering</strong>
+3차원 공간에서 정의된 데이터(체적 데이터)를 2차원 이미지로 변환하는 과정
+예시) CT, MRI</p></li>
+<li><p><strong>Volumetric Data (체적 데이터)</strong>
+3차원 공간에서 샘플링된 데이터</p></li>
+</ul>
+</li>
+</ul>
+<hr class="docutils" />
+<ul class="simple">
+<li><p><strong>Technical contributions</strong></p>
+<ul>
+<li><p>복잡한 기하학과 소재를 가진 연속적인 장면을 5차원 NeRF 로 나타내는 접근 방법, 기본 MLP 네트워크로 매개변수화</p></li>
+<li><p>고전적인 볼륨 렌더링 기법을 기반으로 한 미분 가능한 렌더링 절차를 사용하여 이러한 표현을 표준 RGB 이미지로부터 최적화하는 방법을 제안</p></li>
+<li><p>hierarchical sampling strategy : MLP’s capacity 를 시각적인 장면 내용이 있는 공간으로 할당 (물체가 있을 확률이 높은 부분을 모델이 집중적으로 학습)</p></li>
+<li><p>Positional encoding : 입력 5차원 좌표를 고차원 공간으로 매핑하기 위해 NeRF를 성공적으로 최적화하여 고주파의 장면 콘텐츠를 표현가능</p></li>
+</ul>
+</li>
+<li><p>최초의 <strong>continuous neural scene representation</strong> 제안</p></li>
+</ul>
+<figure class="align-default" id="id2">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/Untitled11.png"><img alt="NeRF overview" class="bg-primary mb-1" src="../../_images/Untitled11.png" style="width: 800px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 661 </span><span class="caption-text">An overview of our neural radiance field scene representation and differentiable rendering procedure \  (source: {<a class="reference external" href="https://arxiv.org/pdf/2003.08934v2">https://arxiv.org/pdf/2003.08934v2</a>})</span><a class="headerlink" href="#id2" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</section>
+<section id="related-work">
+<h2>2. Related Work<a class="headerlink" href="#related-work" title="Permalink to this heading">#</a></h2>
+<ul class="simple">
+<li><p><strong>Neural 3D shape representations</strong></p></li>
+<li><p><strong>View synthesis and image-based rendering</strong></p></li>
+</ul>
+</section>
+<section id="neural-radiance-field-scene-representation">
+<h2>3. Neural Radiance Field Scene Representation<a class="headerlink" href="#neural-radiance-field-scene-representation" title="Permalink to this heading">#</a></h2>
+<ul>
+<li><p>5차원 벡터 함수 (MLP) <span class="math notranslate nohighlight">\(F_{\Theta}:(\mathbf{x}, \mathbf{d}) \rightarrow(\mathbf{c}, \sigma)\)</span></p>
+<ul class="simple">
+<li><p><strong>input</strong> : <span class="math notranslate nohighlight">\(3 \mathrm{D}\)</span> location <span class="math notranslate nohighlight">\(\mathbf{x}=(x, y, z)\)</span> , <span class="math notranslate nohighlight">\(2 \mathrm{D}\)</span> viewing direction <span class="math notranslate nohighlight">\(\mathbf{d}=(\theta, \phi)\)</span></p>
+<ul>
+<li><p><strong>(practically) direction</strong> as a <span class="math notranslate nohighlight">\(3 \mathrm{D}\)</span> Cartesian unit vector <span class="math notranslate nohighlight">\(\mathbf{d}\)</span></p></li>
+<li><p>벡터 <span class="math notranslate nohighlight">\(\mathbf{d} =(𝑑_𝑥,𝑑_𝑦,𝑑_𝑧)\)</span> 는 방향을 나타내며, 이는 단위 벡터(길이가 1)로 정규화</p></li>
+</ul>
+</li>
+<li><p><strong>output</strong> : emitted color <span class="math notranslate nohighlight">\(\mathbf{c}=(r, g, b)\)</span>, volume density <span class="math notranslate nohighlight">\(\sigma\)</span></p></li>
+</ul>
+</li>
+<li><p><span class="math notranslate nohighlight">\(\mathbf{x}\)</span> → <span class="math notranslate nohighlight">\(\sigma\)</span> , <span class="math notranslate nohighlight">\((\mathbf{x, d})\)</span> → RGB 색상 <span class="math notranslate nohighlight">\(\mathbf{c}\)</span> 를 예측하도록 권장 (색상은 view dependent 이므로)</p>
+<ol class="arabic simple">
+<li><p>MLP <span class="math notranslate nohighlight">\(F_{\Theta}\)</span> 는 먼저 8개의 fully-connected layer (ReLU, 256개 채널 사용) 로
+입력 3D 좌표 <span class="math notranslate nohighlight">\(\mathbf{x}\)</span> →  <span class="math notranslate nohighlight">\(\sigma\)</span> , 256차원 feature 벡터를 출력</p></li>
+<li><p><strong>a</strong> 의 feature 벡터는 카메라 광선의 시점 방향과 concat</p></li>
+<li><p>뷰에 따른 RGB 색상을 출력하는 하나의 추가 fully-connected layer (ReLU,128개 채널 사용)로 전달됨</p></li>
+</ol>
+<figure class="align-default" id="id3">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/Untitled3.png"><img alt="NeRF architecture" class="bg-primary mb-1" src="../../_images/Untitled3.png" style="width: 800px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 662 </span><span class="caption-text">fully-connected network architecture\  (source: {<a class="reference external" href="https://arxiv.org/pdf/2003.08934v2">https://arxiv.org/pdf/2003.08934v2</a>})</span><a class="headerlink" href="#id3" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</li>
+<li><p><strong>View 를 고려하여 색상을 예측해야 하는 이유 : non-Lambertian effects</strong></p>
+<ul class="simple">
+<li><p><strong>Lambertian 효과</strong></p>
+<ul>
+<li><p>물체의 표면에서 나오는 광선이 균일하게 반사되는 현상</p></li>
+<li><p>표면의 방향과 상관없이 광선이 표면에서 나오는 각도에 따라 반사되는 광량이 일정하다는 원리를 기반</p></li>
+</ul>
+</li>
+<li><p>Fig. 3 : 입력 시선 방향을 사용하여 non-Lambertian effects 를 표현한 예시</p></li>
+</ul>
+<ul class="simple">
+<li><p>Fig. 4 : view dependence 를 고려하지 않고 (only <span class="math notranslate nohighlight">\(\mathbf{x}\)</span> input) 학습된 모델은 반사성(specularity)을 표현하는데 어려움이 있음</p></li>
+</ul>
+</li>
+</ul>
+</section>
+<section id="volume-rendering-with-radiance-fields">
+<h2>4. Volume Rendering with Radiance Fields<a class="headerlink" href="#volume-rendering-with-radiance-fields" title="Permalink to this heading">#</a></h2>
+<ul class="simple">
+<li><p><strong>5D NeRF 는 장면을 volume density 와 특정 포인트에서 방출된 빛(색상)으로 표현</strong></p></li>
+<li><p><strong>볼륨 렌더링 : scene 을 통과하는 모든 광선의 색상을 렌더링</strong></p>
+<ul>
+<li><p>NeRF 로부터 View 를 렌더링하려면 원하는 가상 카메라의 각 픽셀을 거쳐 추적된 카메라 광선에 대해 적분값  <span class="math notranslate nohighlight">\(C(\mathbf{r})\)</span> 을 추정을 요구</p></li>
+<li><p><span class="math notranslate nohighlight">\(\mathbf{r}(t)=\mathbf{o}+t \mathbf{d}\)</span> : 카메라 광선</p></li>
+<li><p><span class="math notranslate nohighlight">\(C(\mathbf{r})\)</span> : near bound <span class="math notranslate nohighlight">\(t_n\)</span> , far bound <span class="math notranslate nohighlight">\(t_f\)</span> 에서 카메라 광선 <span class="math notranslate nohighlight">\(\mathbf{r}(t)\)</span> 의 예측된 색상</p></li>
+<li><p><span class="math notranslate nohighlight">\(T(t)\)</span> : ray 를 따라 <span class="math notranslate nohighlight">\(t_n\)</span> 부터 <span class="math notranslate nohighlight">\(t\)</span> 까지 누적된 투과율(transmittance)</p></li>
+</ul>
+</li>
+</ul>
+<div class="math notranslate nohighlight">
+\[
+C(\mathbf{r})=\int_{t_n}^{t_f} T(t) \sigma(\mathbf{r}(t)) \mathbf{c}(\mathbf{r}(t), \mathbf{d}) d t, \text { where } T(t)=\exp \left(-\int_{t_n}^t \sigma(\mathbf{r}(s)) d s\right)
+\]</div>
+<ul>
+<li><p><strong>Quadrature (구적법) 을 통해 연속적 적분값을 수치적으로 추정</strong></p>
+<ul class="simple">
+<li><p>이산화된 ****voxel grids 렌더링에 사용되는 <strong>결정론적 구적법</strong>의 한계</p></li>
+<li><p>일반적으로 이산화된 복셀 그리드를 렌더링하는 데 사용되는 결정론적 구적법은 MLP가 <strong>고정된 이산 위치 집합</strong>에서만 쿼리되기 때문에 표현의 해상도를 제한</p></li>
+</ul>
+</li>
+<li><p>➡️ <strong>대안으로 Stratified sampling (계층적 표집) 접근법을 사용.</strong></p></li>
+<li><p><span class="math notranslate nohighlight">\(\left[t_n, t_f\right]\)</span> 를 <span class="math notranslate nohighlight">\(N\)</span> 개의 균일한 간격의 bin으로 분할한 Partition 생성</p></li>
+<li><p>각 bin 내에서 하나의 샘플을 무작위로 추출</p>
+<div class="math notranslate nohighlight">
+\[
+    t_i \sim \mathcal{U}\left[t_n+\frac{i-1}{N}\left(t_f-t_n\right), t_n+\frac{i}{N}\left(t_f-t_n\right)\right].
+    \]</div>
+</li>
+<li><p>여전히 적분값 추정을 위해 이산화된 표본들을 사용하더라도,
+계층적 표집 방법을 통해 continuous scene 표현이 가능</p></li>
+<li><p>다양한 position sample에 대해 최적화가 가능하므로, 최적화 과정에서 MLP가 연속적인 위치들에서 평가되도록 하는 효과</p></li>
+<li><p>위의 샘플링 방법을 통해 뽑은 샘플들로  <a class="reference external" href="https://courses.cs.duke.edu/spring03/cps296.8/papers/max95opticalModelsForDirectVolumeRendering.pdf">[26]에서 리뷰</a>된 볼륨 렌더링에서 논의된 구적법으로 <span class="math notranslate nohighlight">\(C(\mathbf{r})\)</span> 을 추정 (적분을 sample sum 으로)</p>
+<div class="math notranslate nohighlight">
+\[\begin{split}
+    \hat{C}(\mathbf{r})=\sum_{i=1}^N T_i\left(1-\exp \left(-\sigma_i \delta_i\right)\right) \mathbf{c}_i, \\ \text { where } T_i=\exp \left(-\sum_{j=1}^{i-1} \sigma_j \delta_j\right),
+    \end{split}\]</div>
+<ul class="simple">
+<li><p><span class="math notranslate nohighlight">\(\delta_i=t_{i+1}-t_i\)</span> is the distance between adjacent samples (<span class="math notranslate nohighlight">\(dt\)</span> 를 대체)</p></li>
+<li><p><span class="math notranslate nohighlight">\(\left(\mathbf{c}_i, \sigma_i\right)\)</span> 의 집합으로부터 <span class="math notranslate nohighlight">\(\hat{C}(\mathbf{r})\)</span> 을 계산하는 함수는 쉽게 미분 가능하며
+<span class="math notranslate nohighlight">\(\alpha_i=1-\exp \left(-\sigma_i \delta_i\right)\)</span> 를 사용한 전통적인 <strong>alpha compositing</strong></p></li>
+<li><p>**alpha compositing (**알파 합성)</p>
+<ul>
+<li><p>여러 이미지 또는 픽셀을 결합하여 하나의 이미지로 만드는 기술</p></li>
+<li><p>ex) 투명한 이미지(유리, 그림자)를 배경 이미지 위에 겹칠 때 알파 컴포지팅을 사용하여 자연스러운 합성 수행</p></li>
+</ul>
+</li>
+</ul>
+</li>
+</ul>
+</section>
+<section id="optimizing-a-neural-radiance-field">
+<h2>5. Optimizing a Neural Radiance Field<a class="headerlink" href="#optimizing-a-neural-radiance-field" title="Permalink to this heading">#</a></h2>
+<p><strong>[REMIND]</strong></p>
+<ul class="simple">
+<li><p>지금까지 <strong>NeRF 로 scene 을 모델링하는 것, 이 표현으로 새로운 views 를 렌더링 하는 것</strong> 에 필요한 핵심적인 구성요소를 다룸</p>
+<ul>
+<li><p>하지만 해당 요소들로 SOTA 성능을 달성하기에는 한계 존재</p></li>
+<li><p>고해상도 + 복잡한 scene 을 표현 가능하게 하는 두개의 개선점을 도입</p></li>
+</ul>
+</li>
+</ul>
+<ol class="arabic simple">
+<li><p>Positional encoding of the input coordinates
+that assists the MLP in representing high-frequency functions</p></li>
+<li><p>hierarchical sampling procedure
+that allows us to efficiently sample this high-frequency representation.</p></li>
+</ol>
+<section id="positional-encoding">
+<h3>5.1 Positional encoding<a class="headerlink" href="#positional-encoding" title="Permalink to this heading">#</a></h3>
+<ul>
+<li><p>Neural network <span class="math notranslate nohighlight">\(F_{\Theta}\)</span> 가 직접 <strong><span class="math notranslate nohighlight">\((x, y, z, \theta, \phi)\)</span> input coordinates</strong> 에서 직접 연산하는 경우, 색상과 형태에서 고주파 변동을 표현하는데 성능이 좋지 않았음</p></li>
+<li><p><a class="reference external" href="https://arxiv.org/abs/1806.08734">[35] On the spectral bias of neural networks</a> 논문 결과와 동일,</p>
+<ul class="simple">
+<li><p>깊은 신경망이 저주파 함수를 학습하는 쪽으로 편향되었음을 보여줌</p></li>
+<li><p>신경망을 통과하기 전 고주파 함수를 사용하여 <strong>입력을 고차원 공간으로 맵핑</strong>하는 것은 고주파 변동이 포함된 데이터를 더 잘 적합 가능하게 함을 제시</p></li>
+<li><p>저자들은 Neural scene representations 에서 위의 결과를 이용</p></li>
+</ul>
+</li>
+<li><p><strong>→ <span class="math notranslate nohighlight">\(F_{\Theta}\)</span> 를 두개의 함수로  구성 <span class="math notranslate nohighlight">\(F_{\Theta}=F_{\Theta}^{\prime} \circ \gamma\)</span>  성능을 상당히 개선 (<span class="math notranslate nohighlight">\(\gamma\)</span> : 학습 X)</strong></p>
+<div class="math notranslate nohighlight">
+\[
+    \gamma(p)=\left(\sin \left(2^0 \pi p\right), \cos \left(2^0 \pi p\right), \cdots, \sin \left(2^{L-1} \pi p\right), \cos \left(2^{L-1} \pi p\right)\right) .
+    \]</div>
+<ul class="simple">
+<li><p><span class="math notranslate nohighlight">\(\gamma\)</span> : mapping <span class="math notranslate nohighlight">\(\mathbb{R}\)</span> → <span class="math notranslate nohighlight">\(\mathbb{R}^{2 L}\)</span>, <span class="math notranslate nohighlight">\(F_{\Theta}^{\prime}\)</span> : Regular MLP</p></li>
+<li><p><span class="math notranslate nohighlight">\(\gamma(\cdot)\)</span> : <span class="math notranslate nohighlight">\(\mathbf{x}\)</span> 의 각 세개의 좌표값과  Cartesian 시점 방향 벡터 <span class="math notranslate nohighlight">\(\mathbf{d}\)</span> 의 세 성분에 <span class="math notranslate nohighlight">\([-1,1]\)</span>사이로 정규화 후 개별적으로 적용에 분리되어 적용됨</p></li>
+<li><p>Experiments : <span class="math notranslate nohighlight">\(L=10\)</span> for <span class="math notranslate nohighlight">\(\gamma(\mathbf{x})\)</span> and <span class="math notranslate nohighlight">\(L=4\)</span> for <span class="math notranslate nohighlight">\(\gamma(\mathbf{d})\)</span></p></li>
+</ul>
+</li>
+</ul>
+</section>
+<section id="hierarchical-volume-sampling">
+<h3>5.2 Hierarchical volume sampling<a class="headerlink" href="#hierarchical-volume-sampling" title="Permalink to this heading">#</a></h3>
+<ul>
+<li><p><strong>Stratified Sampling</strong></p>
+<ul class="simple">
+<li><p>비효율적</p></li>
+<li><p>렌더링된 이미지에 기여하지 않는 여유 공간(비어있는 부분) 막혀있는(가려진) 영역이 여전히 반복적으로 샘플링됨.</p></li>
+</ul>
+</li>
+<li><p><strong>Hierarchical volume sampling</strong></p>
+<ul class="simple">
+<li><p>최종 렌더링에 대한 예상 효과에 비례하여 샘플을 할당</p></li>
+<li><p>렌더링 효율성을 증가시킴</p></li>
+</ul>
+<p><strong>➡️ Content가 더 있을 것 같은 곳을 더 뽑자 !</strong></p>
+</li>
+<li><p>scene 표현을 위해 단순히 단일 네트워크를 사용하는 것 대신에 우리는 동시에 2개의 네트워크를 최적화</p>
+<p><strong>Step 1. Coarse</strong></p>
+<p><strong>Step 2.  Fine</strong></p>
+</li>
+</ul>
+<hr class="docutils" />
+<ol class="arabic">
+<li><p><strong>Coarse</strong></p>
+<p><strong>Stratified sampling</strong> → <span class="math notranslate nohighlight">\(N_c\)</span> 개의 위치 집합을 샘플링, 이 위치에서 <span class="math notranslate nohighlight">\(\hat{C(r)}\)</span> 을 예측하여 <strong>Coarse network</strong> 를  평가</p>
+</li>
+<li><p><strong>Fine</strong></p>
+<ol class="arabic">
+<li><p>1에서 주어진 Coarse 네트워크의 출력을 바탕으로 더 많은 정보에 기반한 포인트 샘플링을 생성 (더 많은 정보에 기반한 포인트 샘플링을 생성)</p></li>
+<li><p>Coarse 네트워크에서의 알파 합성 색상 <span class="math notranslate nohighlight">\(\hat{C}_c(\mathbf{r})\)</span>을 광선을 따라 샘플링된 모든 컬러 <span class="math notranslate nohighlight">\(c_i\)</span>들의 가중합 형태로 다시 씀</p>
+<div class="math notranslate nohighlight">
+\[
+        \hat{C}_c(\mathbf{r})=\sum_{i=1}^{N_c} w_i c_i, \quad w_i=T_i\left(1-\exp \left(-\sigma_i \delta_i\right)\right) .
+        \]</div>
+</li>
+</ol>
+</li>
+<li><p><strong>piecewise-constant PDF</strong></p>
+<p>Normalizing weight 를 통해 생성</p>
+</li>
+</ol>
+<div class="math notranslate nohighlight">
+\[
+\hat{w}i= \dfrac{w_i}{\sum_{j=1}^{N_c} w_j}
+\]</div>
+<ul class="simple">
+<li><p>역변환 샘플링을 통해 확률 밀도함수 값에 기반한 2번째 샘플집합의 샘플 <span class="math notranslate nohighlight">\(N_f\)</span> 개를 샘플링</p></li>
+<li><p>첫 번째와 두 번째 샘플 집합의 합집합에서 fine 네트워크를 평가</p></li>
+<li><p>모든 <span class="math notranslate nohighlight">\(N_c+N_f\)</span> 샘플을 사용하여 광선의 최종 렌더링된 색상 <span class="math notranslate nohighlight">\(\hat{C}_f(\mathbf{r})\)</span> 를 계산</p></li>
+<li><p>이 절차에서는 관측 가능한 content가 포함될 것으로 예상되는 영역에 더 많은 샘플을 할당</p></li>
+</ul>
+</section>
+<section id="implementation-details">
+<h3>5.3 Implementation details<a class="headerlink" href="#implementation-details" title="Permalink to this heading">#</a></h3>
+<ul>
+<li><p><strong>각 Scene 에 대해 네트워크 를 별도로 최적화</strong></p>
+<p>scene이 캡처된 RGB 이미지, extrinsic parameter(해당 카메라 포즈), intrinsic parameter, 장면 경계로 구성된 데이터셋이 필요</p>
+<ul>
+<li><p><strong>extrinsic parameter, intrinsic parameter</strong></p>
+<ul class="simple">
+<li><p><strong>Extrinsic Parameter</strong><br />
+3D 공간 내에서 카메라가 어디에 위치(3D Translation)하고 있고, 어디를 바라보고 있는지(3D Rotation)에 대한 Parameter</p></li>
+<li><p><strong>Intrinsic Parameter</strong>
+카메라 렌즈와 센서 위치에 의해서 결정되어지는 항목으로, 이미지 패널이 얼마나 이동(2D Translation)하고, 얼마나 확대하고(2D Scaling), 얼마나 기울어졌는지(2D Shear) 대한 intrinsic parameter</p></li>
+</ul>
+<figure class="align-default" id="id4">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/Untitled6.png"><img alt="NeRF intrinsic_extrinsic" class="bg-primary mb-1" src="../../_images/Untitled6.png" style="width: 800px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 663 </span><span class="caption-text">intrinsic prameter and extrinsic parameter</span><a class="headerlink" href="#id4" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p>카메라 영상 : 3차원 공간상의 점들을 2차원 이미지 평면에 투사(perspective projection)</p></li>
+</ul>
+</li>
+</ul>
+</li>
+<li><p><strong>Training</strong></p>
+<ol class="arabic simple">
+<li><p>각 최적화 iteration에서 데이터셋의 모든 픽셀 집합에서 카메라 광선 batch를 무작위로 샘플링</p></li>
+<li><p>계층적 샘플링을 따라 coarse 네트워크의 <span class="math notranslate nohighlight">\(N_c\)</span> 개의 샘플과 fine 네트워크의<span class="math notranslate nohighlight">\(N_c + N_f\)</span>개의 샘플을 쿼리</p></li>
+<li><p>volume rendering 절차를 사용하여 두샘플 집합 모두에서 광선의 색상을 렌더링</p></li>
+</ol>
+</li>
+<li><p><strong>Loss</strong>
+coarse 렌더링과 fine 렌더링의 색상 vs 실제 픽셀 색상 간의 총 제곱 오차</p>
+<div class="math notranslate nohighlight">
+\[
+    \mathcal{L}=\sum_{\mathbf{r} \in \mathcal{R}}\left[\left\|\hat{C}_c(\mathbf{r})-C(\mathbf{r})\right\|_2^2+\left\|\hat{C}_f(\mathbf{r})-C(\mathbf{r})\right\|_2^2\right]
+    \]</div>
+<ul class="simple">
+<li><p><span class="math notranslate nohighlight">\(\mathcal{R}\)</span> : 각 batch 의 광선의 집합</p></li>
+<li><p><span class="math notranslate nohighlight">\(C(\mathbf{r})\)</span>  : Ray <span class="math notranslate nohighlight">\(\mathbf{r}\)</span> 에 대한 Ground Truth RGB colors</p></li>
+<li><p><span class="math notranslate nohighlight">\(\hat{C}_c(\mathbf{r})\)</span> : Ray <span class="math notranslate nohighlight">\(\mathbf{r}\)</span> 에 대한 Coarse volume predicted RGB colors</p></li>
+<li><p><span class="math notranslate nohighlight">\(\hat{C}_f(\mathbf{r})\)</span> : Ray <span class="math notranslate nohighlight">\(\mathbf{r}\)</span> 에 대한 Fine volume predicted RGB colors</p></li>
+<li><p>최종 렌더링은  <span class="math notranslate nohighlight">\(\hat{C}_f(\mathbf{r})\)</span> 이지만, <span class="math notranslate nohighlight">\(\hat{C}_c(\mathbf{r})\)</span> 의 Loss 역시 최소화</p>
+<ul>
+<li><p>Coarse 네트워크의 weight 분포가 fine network 의 샘플링의 기반이 되기 때문</p></li>
+</ul>
+</li>
+</ul>
+</li>
+</ul>
+</section>
+</section>
+<section id="experiments-detail">
+<h2>5.4 Experiments detail<a class="headerlink" href="#experiments-detail" title="Permalink to this heading">#</a></h2>
+<ul class="simple">
+<li><p>a batch size of 4096 rays</p></li>
+<li><p>sampling coordinates :</p>
+<ul>
+<li><p><span class="math notranslate nohighlight">\(N_c=64\)</span> in the coarse volume</p></li>
+<li><p><span class="math notranslate nohighlight">\(N_f=128\)</span> in the fine volume</p></li>
+</ul>
+</li>
+<li><p>Optimizer : Adam, lr : <span class="math notranslate nohighlight">\(5 \times 10^{-4}\)</span> →  <span class="math notranslate nohighlight">\(5 \times 10^{-5}\)</span> (exponentially decay learning rate)</p>
+<ul>
+<li><p>Default : <span class="math notranslate nohighlight">\(\beta_1=0.9, \beta_2=0.999\)</span>,</p></li>
+</ul>
+</li>
+<li><p>iteration: 한 장면 당 10~30만 iter (NVIDIA V100 GPU 1개로 1~2일 소요)</p></li>
+</ul>
+</section>
+<section id="results">
+<h2>6. Results<a class="headerlink" href="#results" title="Permalink to this heading">#</a></h2>
+<section id="datasets">
+<h3>6.1 Datasets<a class="headerlink" href="#datasets" title="Permalink to this heading">#</a></h3>
+<ul class="simple">
+<li><p><strong>Synthetic renderings of object</strong></p></li>
+</ul>
+<figure class="align-default" id="id5">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/Untitled8.png"><img alt="Diffuse Synthetic" class="bg-primary mb-1" src="../../_images/Untitled8.png" style="width: 800px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 664 </span><span class="caption-text">Diffuse Synthetic : Lambertian, Realistic Synthetic : non-Lambertian</span><a class="headerlink" href="#id5" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ol class="arabic simple">
+<li><p><strong>Diffuse Synthetic</strong> <span class="math notranslate nohighlight">\(360 \degree\)</span></p>
+<ol class="arabic simple">
+<li><p>총 4개의 Lambertian 물체가 간단한 geometry로 구성</p></li>
+<li><p>object : <strong>512×512</strong></p></li>
+<li><p>상반구에 대한 viewpoint 를 렌더링</p></li>
+<li><p>Train : 479, Test : 1000</p></li>
+</ol>
+</li>
+<li><p><strong>Real Synthetic</strong> <span class="math notranslate nohighlight">\(360 \degree\)</span>, <strong>Forward-Facing</strong></p>
+<ol class="arabic simple">
+<li><p>총 8개의 non-Lambertian 물체 8개,</p></li>
+<li><p>각각의 pathtraced image 를 포함한 형태의 데이터 셋을 구성</p></li>
+<li><p>object : <strong>800×800</strong></p></li>
+<li><p>6 Scenes : 상반구에 대한 viewpoint 를 렌더링, 2 Scenes :  구 전체에 대한 viewpoint 를 렌더링</p></li>
+<li><p>Train : 100, Test : 200</p></li>
+</ol>
+</li>
+<li><p><strong>Real Forward-Facing</strong></p>
+<ol class="arabic simple">
+<li><p>복잡한 형태의 현실 scene을 앞쪽에서 본 모습을 사용</p></li>
+<li><p>총 8개의 scene, (5 scenes : LLFF paper 3 scenes : 직접 캡처)</p></li>
+<li><p>object : <strong><span class="math notranslate nohighlight">\(1008 \times 756\)</span></strong></p></li>
+<li><p>Train : Test = 7 : 1</p></li>
+</ol>
+</li>
+</ol>
+</section>
+<section id="comparisons">
+<h3>6.2 Comparisons<a class="headerlink" href="#comparisons" title="Permalink to this heading">#</a></h3>
+<ul class="simple">
+<li><p><strong>Models</strong></p>
+<ul>
+<li><p><strong>Neural Volumes (NV)</strong></p></li>
+<li><p><strong>Scene Representation Networks (SRN)</strong></p></li>
+<li><p><strong>Local Light Field Fusion (LLFF)</strong></p></li>
+</ul>
+</li>
+</ul>
+</section>
+<section id="discussion">
+<h3>6.3 Discussion<a class="headerlink" href="#discussion" title="Permalink to this heading">#</a></h3>
+<ol class="arabic simple">
+<li><p>comparison : Diffuse Synthetic : Lambertian, Realistic Synthetic : non-Lambertian</p></li>
+</ol>
+<ul>
+<li><p><span class="math notranslate nohighlight">\(\text{Nerf}\)</span> : 미세 디테일, 기하학적 구조, 외양, nonLambertian 반사 반영</p></li>
+<li><p><span class="math notranslate nohighlight">\(\text{LLFF}\)</span> :  ghosting artifact (ship, lego)</p></li>
+<li><p><span class="math notranslate nohighlight">\(\text{SRN}\)</span> : blurry and distorted rendering</p></li>
+<li><p><span class="math notranslate nohighlight">\(\text{NV}\)</span> : detail 및 기하적 구조 반영 실패</p>
+<figure class="align-default" id="id6">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/Untitled9.png"><img alt="Diffuse Synthetic" class="bg-primary mb-1" src="../../_images/Untitled9.png" style="width: 800px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 665 </span><span class="caption-text">Diffuse Synthetic : Lambertian, Realistic Synthetic : non-Lambertian</span><a class="headerlink" href="#id6" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</li>
+<li><p><strong>Ghosting :</strong> 렌더링에서의 객체 겹침 혹은 번짐</p></li>
+<li><p><strong>Lambertian :</strong> 모든 각도에서 동일한 밝기</p></li>
+<li><p><strong>Non-Lambertian :</strong> 각도에 따라 밝기와 색상 변화 / 광택, 반사, 투명도 등을 가짐</p></li>
+</ul>
+<ol class="arabic" start="2">
+<li><p>comparison : reconstruction partially occluded regions</p>
+<figure class="align-default" id="id7">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/Untitled10.png"><img alt="Diffuse Synthetic" class="bg-primary mb-1" src="../../_images/Untitled10.png" style="width: 800px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 666 </span><span class="caption-text">NeRF also correctly reconstructs partially occluded regions</span><a class="headerlink" href="#id7" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</li>
+</ol>
+</section>
+<section id="ablation-studies">
+<h3>6.4 Ablation studies<a class="headerlink" href="#ablation-studies" title="Permalink to this heading">#</a></h3>
+<ul>
+<li><p>Realistic Synthetic 360도 scene</p></li>
+<li><p>위치 인코딩(PE), 시점 의존성(VD), 계층적 샘플링(H)</p></li>
+<li><p>최대 주파수 <span class="math notranslate nohighlight">\(L\)</span> 의 선택</p>
+<ul class="simple">
+<li><p>5→10 (성능 향상), 10→15 (성능 감소)</p></li>
+<li><p><span class="math notranslate nohighlight">\(2^L\)</span> 이 샘플링 된 입력 이미지에서 존재하는 최대 주파수(본 데이터는 1024)를 초과할 때  추가적인 성능 향상에 제한</p></li>
+</ul>
+<figure class="align-default" id="id8">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/Untitled111.png"><img alt="ablation study" class="bg-primary mb-1" src="../../_images/Untitled111.png" style="width: 800px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 667 </span><span class="caption-text">ablation study</span><a class="headerlink" href="#id8" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</li>
+</ul>
+</section>
+</section>
+<hr class="docutils" />
+<section id="appendix-a-additional-implementation-details">
+<h2>(Appendix) A. Additional Implementation Details<a class="headerlink" href="#appendix-a-additional-implementation-details" title="Permalink to this heading">#</a></h2>
+<ol class="arabic">
+<li><p><strong>Volume Bounds</strong>
+For experiments with synthetic images, we scale the scene so that it lies within a <strong>cube of
+side length 2 centered at the origin</strong>, and only query the representation within this bounding volume. we use normalized device coordinates <strong>to map the depth range of these points into [−1, 1]</strong>.</p></li>
+<li><p><strong>Training Details</strong>
+adding random Gaussian noise with zero mean and unit variance to the <strong>output σ values</strong> during optimization</p></li>
+<li><p><strong>Rendering Details</strong></p>
+<figure class="align-default" id="id9">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/Untitled3.png"><img alt="NeRF architecture" class="bg-primary mb-1" src="../../_images/Untitled3.png" style="width: 800px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 668 </span><span class="caption-text">fully-connected network architecture \  (source: {<a class="reference external" href="https://arxiv.org/pdf/2003.08934v2">https://arxiv.org/pdf/2003.08934v2</a>})</span><a class="headerlink" href="#id9" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</li>
+</ol>
+<ul class="simple">
+<li><p>Coarse network  64 + fine network 128 = 192</p></li>
+<li><p>fully-connected network 구조</p></li>
+<li><p>positional encoding이 더해진 형태의 위치 정보**<span class="math notranslate nohighlight">\((\gamma(x))\)</span>** 를 input으로 투입</p></li>
+<li><p>256 채널과 ReLU로 엮인 총 8개의 네트워크를 통과하게 된다. 해당 논문에서는 DeepSDF 구조를 따르고, skip connection을 5번째 layer의 activation에  투입</p></li>
+<li><p>추가 레이어는 volume density 를 output으로 산출</p></li>
+</ul>
+</section>
+</section>
+
+    <script type="text/x-thebe-config">
+    {
+        requestKernel: true,
+        binderOptions: {
+            repo: "binder-examples/jupyter-stacks-datascience",
+            ref: "master",
+        },
+        codeMirrorConfig: {
+            theme: "abcdef",
+            mode: "python"
+        },
+        kernelOptions: {
+            name: "python3",
+            path: "./docs\review"
+        },
+        predefinedOutput: true
+    }
+    </script>
+    <script>kernelName = 'python3'</script>
+
+                </article>
+              
+
+              
+              
+              
+              
+                <footer class="prev-next-footer d-print-none">
+                  
+<div class="prev-next-area">
+    <a class="left-prev"
+       href="DreamPose.html"
+       title="previous page">
+      <i class="fa-solid fa-angle-left"></i>
+      <div class="prev-next-info">
+        <p class="prev-next-subtitle">previous</p>
+        <p class="prev-next-title">DreamPose: Fashion Image-to-Video Synthesis via Stable Diffusion</p>
+      </div>
+    </a>
+    <a class="right-next"
+       href="3DGS.html"
+       title="next page">
+      <div class="prev-next-info">
+        <p class="prev-next-subtitle">next</p>
+        <p class="prev-next-title">3D Gaussian Splatting for Real-Time Radiance Field Rendering</p>
+      </div>
+      <i class="fa-solid fa-angle-right"></i>
+    </a>
+</div>
+                </footer>
+              
+            </div>
+            
+            
+              
+                <div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
+
+
+  <div class="sidebar-secondary-item">
+  <div class="page-toc tocsection onthispage">
+    <i class="fa-solid fa-list"></i> Contents
+  </div>
+  <nav class="bd-toc-nav page-toc">
+    <ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#abstract">0. Abstract</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">1. Introduction</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#related-work">2. Related Work</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#neural-radiance-field-scene-representation">3. Neural Radiance Field Scene Representation</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#volume-rendering-with-radiance-fields">4. Volume Rendering with Radiance Fields</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#optimizing-a-neural-radiance-field">5. Optimizing a Neural Radiance Field</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#positional-encoding">5.1 Positional encoding</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#hierarchical-volume-sampling">5.2 Hierarchical volume sampling</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#implementation-details">5.3 Implementation details</a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#experiments-detail">5.4 Experiments detail</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#results">6. Results</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#datasets">6.1 Datasets</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#comparisons">6.2 Comparisons</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#discussion">6.3 Discussion</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#ablation-studies">6.4 Ablation studies</a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#appendix-a-additional-implementation-details">(Appendix) A. Additional Implementation Details</a></li>
+</ul>
+  </nav></div>
+
+</div></div>
+              
+            
+          </div>
+          <footer class="bd-footer-content">
+            
+<div class="bd-footer-content__inner container">
+  
+  <div class="footer-item">
+    
+<p class="component-author">
+By PseudoLab
+</p>
+
+  </div>
+  
+  <div class="footer-item">
+    
+
+  <p class="copyright">
+    
+      © Copyright 2022.
+      <br/>
+    
+  </p>
+
+  </div>
+  
+  <div class="footer-item">
+    
+  </div>
+  
+  <div class="footer-item">
+    
+  </div>
+  
+</div>
+          </footer>
+        
+
+      </main>
+    </div>
+  </div>
+  
+  <!-- Scripts loaded after <body> so the DOM is not blocked -->
+  <script src="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b"></script>
+<script src="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b"></script>
+
+  <footer class="bd-footer">
+  </footer>
+  </body>
 </html>
\ No newline at end of file
diff --git a/docs/review/Point_E.html b/docs/review/Point_E.html
old mode 100644
new mode 100755
index 828b641e..8c7764a1
--- a/docs/review/Point_E.html
+++ b/docs/review/Point_E.html
@@ -1,1208 +1,1228 @@
-
-
-<!DOCTYPE html>
-
-
-<html lang="en" data-content_root="" >
-
-  <head>
-    <meta charset="utf-8" />
-    <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.18.1: http://docutils.sourceforge.net/" />
-
-    <title>Point-E: A System for Generating 3D Point Clouds from Complex Prompts (Arxiv 2022) &#8212; Text-to-Image Generation-feat-Diffusion</title>
-  
-  
-  
-  <script data-cfasync="false">
-    document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
-    document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
-  </script>
-  
-  <!-- Loaded before other Sphinx assets -->
-  <link href="../../_static/styles/theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-<link href="../../_static/styles/bootstrap.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-<link href="../../_static/styles/pydata-sphinx-theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-
-  
-  <link href="../../_static/vendor/fontawesome/6.5.2/css/all.min.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-  <link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.woff2" />
-<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.woff2" />
-<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.woff2" />
-
-    <link rel="stylesheet" type="text/css" href="../../_static/pygments.css" />
-    <link rel="stylesheet" href="../../_static/styles/sphinx-book-theme.css?digest=14f4ca6b54d191a8c7657f6c759bf11a5fb86285" type="text/css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/togglebutton.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/mystnb.4510f1fc1dee50b3e5859aac5469c37c29e427902b24a333a5f9fcb2f0b3ac41.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/sphinx-thebe.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/design-style.4045f2051d55cab465a707391d5b2007.min.css" />
-  
-  <!-- Pre-loaded scripts that we'll load fully later -->
-  <link rel="preload" as="script" href="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b" />
-<link rel="preload" as="script" href="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b" />
-  <script src="../../_static/vendor/fontawesome/6.5.2/js/all.min.js?digest=dfe6caa3a7d634c4db9b"></script>
-
-    <script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
-    <script src="../../_static/jquery.js"></script>
-    <script src="../../_static/underscore.js"></script>
-    <script src="../../_static/_sphinx_javascript_frameworks_compat.js"></script>
-    <script src="../../_static/doctools.js"></script>
-    <script src="../../_static/clipboard.min.js"></script>
-    <script src="../../_static/copybutton.js"></script>
-    <script src="../../_static/scripts/sphinx-book-theme.js?digest=5a5c038af52cf7bc1a1ec88eea08e6366ee68824"></script>
-    <script>let toggleHintShow = 'Click to show';</script>
-    <script>let toggleHintHide = 'Click to hide';</script>
-    <script>let toggleOpenOnPrint = 'true';</script>
-    <script src="../../_static/togglebutton.js"></script>
-    <script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
-    <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
-    <script src="../../_static/design-tabs.js"></script>
-    <script>const THEBE_JS_URL = "https://unpkg.com/thebe@0.8.2/lib/index.js"
-const thebe_selector = ".thebe,.cell"
-const thebe_selector_input = "pre"
-const thebe_selector_output = ".output, .cell_output"
-</script>
-    <script async="async" src="../../_static/sphinx-thebe.js"></script>
-    <script>window.MathJax = {"options": {"processHtmlClass": "tex2jax_process|mathjax_process|math|output_area"}}</script>
-    <script defer="defer" src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
-    <script>DOCUMENTATION_OPTIONS.pagename = 'docs/review/Point_E';</script>
-    <link rel="shortcut icon" href="../../_static/PseudoLab_logo.png"/>
-    <link rel="index" title="Index" href="../../genindex.html" />
-    <link rel="search" title="Search" href="../../search.html" />
-    <link rel="next" title="Shap-E" href="Shap-E.html" />
-    <link rel="prev" title="3D Gaussian Splatting for Real-Time Radiance Field Rendering" href="3DGS.html" />
-  <meta name="viewport" content="width=device-width, initial-scale=1"/>
-  <meta name="docsearch:language" content="en"/>
-  </head>
-  
-  
-  <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
-
-  
-  
-  <div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
-  
-  <div id="pst-scroll-pixel-helper"></div>
-  
-  <button type="button" class="btn rounded-pill" id="pst-back-to-top">
-    <i class="fa-solid fa-arrow-up"></i>Back to top</button>
-
-  
-  <input type="checkbox"
-          class="sidebar-toggle"
-          id="pst-primary-sidebar-checkbox"/>
-  <label class="overlay overlay-primary" for="pst-primary-sidebar-checkbox"></label>
-  
-  <input type="checkbox"
-          class="sidebar-toggle"
-          id="pst-secondary-sidebar-checkbox"/>
-  <label class="overlay overlay-secondary" for="pst-secondary-sidebar-checkbox"></label>
-  
-  <div class="search-button__wrapper">
-    <div class="search-button__overlay"></div>
-    <div class="search-button__search-container">
-<form class="bd-search d-flex align-items-center"
-      action="../../search.html"
-      method="get">
-  <i class="fa-solid fa-magnifying-glass"></i>
-  <input type="search"
-         class="form-control"
-         name="q"
-         id="search-input"
-         placeholder="Search this book..."
-         aria-label="Search this book..."
-         autocomplete="off"
-         autocorrect="off"
-         autocapitalize="off"
-         spellcheck="false"/>
-  <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
-</form></div>
-  </div>
-
-  <div class="pst-async-banner-revealer d-none">
-  <aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
-</div>
-
-  
-    <header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
-    </header>
-  
-
-  <div class="bd-container">
-    <div class="bd-container__inner bd-page-width">
-      
-      
-      
-      <div class="bd-sidebar-primary bd-sidebar">
-        
-
-  
-  <div class="sidebar-header-items sidebar-primary__section">
-    
-    
-    
-    
-  </div>
-  
-    <div class="sidebar-primary-items__start sidebar-primary__section">
-        <div class="sidebar-primary-item">
-
-  
-    
-  
-
-<a class="navbar-brand logo" href="../../intro.html">
-  
-  
-  
-  
-  
-    
-    
-      
-    
-    
-    <img src="../../_static/PseudoLab_logo.png" class="logo__image only-light" alt="Text-to-Image Generation-feat-Diffusion - Home"/>
-    <script>document.write(`<img src="../../_static/PseudoLab_logo.png" class="logo__image only-dark" alt="Text-to-Image Generation-feat-Diffusion - Home"/>`);</script>
-  
-  
-</a></div>
-        <div class="sidebar-primary-item"><nav class="bd-links" id="bd-docs-nav" aria-label="Main">
-    <div class="bd-toc-item navbar-nav active">
-        
-        <ul class="nav bd-sidenav bd-sidenav__home-link">
-            <li class="toctree-l1">
-                <a class="reference internal" href="../../intro.html">
-                    Welcome to PseudoDiffusers!!
-                </a>
-            </li>
-        </ul>
-        <p aria-level="2" class="caption" role="heading"><span class="caption-text">Preliminary Works</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="vae.html">VAE</a></li>
-<li class="toctree-l1"><a class="reference internal" href="gan.html">GAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DDPM.html">DDPM</a></li>
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="DDIM.html">DDIM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="A_Study_on_the_Evaluation_of_Generative_Models.html">A Study on the Evaluation of Generative Models</a></li>
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Image Generation</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="cycleGAN.html">CycleGAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="StyleGAN.html">StyleGAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="diffusion_beats_GANs.html">Diffusion Models Beat GANs on Image Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="dalle.html">DALL-E</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DALLE2.html">DALL-E 2</a></li>
-<li class="toctree-l1"><a class="reference internal" href="dreambooth.html">DreamBooth</a></li>
-<li class="toctree-l1"><a class="reference internal" href="ControlNet.html">ControlNet</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Latent_Diffusion_Model.html">Introduction</a></li>
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="Textual_Inversion.html">Textual Inversion</a></li>
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="CustomDiffusion.html">Custom Diffusion</a></li>
-<li class="toctree-l1"><a class="reference internal" href="LoRA.html">LoRA</a></li>
-
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="I-DDPM.html">I-DDPM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="StyO.html">StyO</a></li>
-<li class="toctree-l1"><a class="reference internal" href="imagen.html">Imagen</a></li>
-<li class="toctree-l1"><a class="reference internal" href="imagen_editor.html">Imagen Editor</a></li>
-<li class="toctree-l1"><a class="reference internal" href="SDEdit.html">SDEdit</a></li>
-<li class="toctree-l1"><a class="reference internal" href="SDXL.html">SDXL</a></li>
-<li class="toctree-l1"><a class="reference internal" href="t2i_adapter.html">T2I-Adapter</a></li>
-<li class="toctree-l1"><a class="reference internal" href="IP_Adapter.html">IP-Adapter</a></li>
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="HyperDreamBooth.html">HyperDreamBooth</a></li>
-<li class="toctree-l1"><a class="reference internal" href="CM3leon.html">CM3leon</a></li>
-
-<li class="toctree-l1"><a class="reference internal" href="Synthetic_Data_from_Diffusion_Models_Improves_ImageNet_Classification.html">Synthetic Data from Diffusion Models Improves ImageNet Classification</a></li>
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="GLIDE.html">GLIDE</a></li>
-<li class="toctree-l1"><a class="reference internal" href="BBDM.html">BBDM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Your_Diffusion_Model_is_Secretly_a_Zero_Shot_Classifier.html">Your Diffusion Model is Secretly a Zero-Shot Classifier</a></li>
-<li class="toctree-l1"><a class="reference internal" href="progressive_distillation.html">Progressive Distillation for Fast Sampling of Diffusion Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="ConceptLab.html">ConceptLab</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Diffusion_models_already_have_a_Semantic_Latent_Space.html">Diffusion Models already have a Semantic Latent Space</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Muse.html">Muse</a></li>
-
-
-<li class="toctree-l1"><a class="reference internal" href="GIGAGAN.html">Scaling up GANs for Text-to-Image Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="consistency_models.html">Consistency Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="latent_consistency_models.html">Latent Consistency Models</a></li>
-
-<li class="toctree-l1"><a class="reference internal" href="LLM_grounded_Diffusion.html">LLM Grounded Diffusion</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DiT.html">DiT</a></li>
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="one-step-image-translation.html">One-Step Image Translation with Text-to-Image Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="LCM-LoRA.html">LCM-LoRA: A Universal Stable-Diffusion Acceleration Module</a></li>
-
-
-<li class="toctree-l1"><a class="reference internal" href="MimicBrush.html">MimicBrush: Zero-shot Image Editing with Reference Imitation</a></li>
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Video Generation</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="Make_A_Video.html">Make A Video</a></li>
-<li class="toctree-l1"><a class="reference internal" href="VideoLDM.html">VideoLDM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="AnimateDiff.html">AnimateDiff</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Animate_Anyone.html">Animate Anyone</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreaMoving.html">DreaMoving</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreamPose.html">DreamPose: Fashion Image-to-Video Synthesis via Stable Diffusion</a></li>
-
-
-
-
-
-
-
-
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">3D Generation</span></p>
-<ul class="current nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="NeRF.html">NeRF : Representing Scenes as Neural Radiance Fields for View Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="3DGS.html">3D Gaussian Splatting for Real-Time Radiance Field Rendering</a></li>
-<li class="toctree-l1 current active"><a class="current reference internal" href="#">Point-E: A System for Generating 3D Point Clouds from Complex Prompts (Arxiv 2022)</a></li>
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="Shap-E.html">Shap-E</a></li>
-
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="DreamFusion.html"><strong>DreamFusion</strong></a></li>
-<li class="toctree-l1"><a class="reference internal" href="magic-3d.html">Magic3D</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreamBooth3D.html">Dream Booth 3D</a></li>
-
-
-
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Experiments</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="../experiments/js_exp.html">Synthetic Data with Stable Diffusion for Foliar Disease Classification</a></li>
-<li class="toctree-l1"><a class="reference internal" href="../experiments/swjo_exp.html">Training DreamBooth on Naver Webtoon Face Dataset</a></li>
-</ul>
-
-    </div>
-</nav></div>
-    </div>
-  
-  
-  <div class="sidebar-primary-items__end sidebar-primary__section">
-  </div>
-  
-  <div id="rtd-footer-container"></div>
-
-
-      </div>
-      
-      <main id="main-content" class="bd-main" role="main">
-        
-        
-
-<div class="sbt-scroll-pixel-helper"></div>
-
-          <div class="bd-content">
-            <div class="bd-article-container">
-              
-              <div class="bd-header-article d-print-none">
-<div class="header-article-items header-article__inner">
-  
-    <div class="header-article-items__start">
-      
-        <div class="header-article-item"><label class="sidebar-toggle primary-toggle btn btn-sm" for="__primary" title="Toggle primary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
-  <span class="fa-solid fa-bars"></span>
-</label></div>
-      
-    </div>
-  
-  
-    <div class="header-article-items__end">
-      
-        <div class="header-article-item">
-
-<div class="article-header-buttons">
-
-
-
-
-
-<div class="dropdown dropdown-source-buttons">
-  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Source repositories">
-    <i class="fab fa-github"></i>
-  </button>
-  <ul class="dropdown-menu">
-      
-      
-      
-      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion" target="_blank"
-   class="btn btn-sm btn-source-repository-button dropdown-item"
-   title="Source repository"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fab fa-github"></i>
-  </span>
-<span class="btn__text-container">Repository</span>
-</a>
-</li>
-      
-      
-      
-      
-      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion/issues/new?title=Issue%20on%20page%20%2Fdocs/review/Point_E.html&body=Your%20issue%20content%20here." target="_blank"
-   class="btn btn-sm btn-source-issues-button dropdown-item"
-   title="Open an issue"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-lightbulb"></i>
-  </span>
-<span class="btn__text-container">Open issue</span>
-</a>
-</li>
-      
-  </ul>
-</div>
-
-
-
-
-
-
-<div class="dropdown dropdown-download-buttons">
-  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Download this page">
-    <i class="fas fa-download"></i>
-  </button>
-  <ul class="dropdown-menu">
-      
-      
-      
-      <li><a href="../../_sources/docs/review/Point_E.md" target="_blank"
-   class="btn btn-sm btn-download-source-button dropdown-item"
-   title="Download source file"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-file"></i>
-  </span>
-<span class="btn__text-container">.md</span>
-</a>
-</li>
-      
-      
-      
-      
-      <li>
-<button onclick="window.print()"
-  class="btn btn-sm btn-download-pdf-button dropdown-item"
-  title="Print to PDF"
-  data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-file-pdf"></i>
-  </span>
-<span class="btn__text-container">.pdf</span>
-</button>
-</li>
-      
-  </ul>
-</div>
-
-
-
-
-<button onclick="toggleFullScreen()"
-  class="btn btn-sm btn-fullscreen-button"
-  title="Fullscreen mode"
-  data-bs-placement="bottom" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-expand"></i>
-  </span>
-
-</button>
-
-
-
-<script>
-document.write(`
-  <button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light"></i>
-    <i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark"></i>
-    <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"></i>
-  </button>
-`);
-</script>
-
-
-<script>
-document.write(`
-  <button class="btn btn-sm pst-navbar-icon search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <i class="fa-solid fa-magnifying-glass fa-lg"></i>
-  </button>
-`);
-</script>
-<label class="sidebar-toggle secondary-toggle btn btn-sm" for="__secondary"title="Toggle secondary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <span class="fa-solid fa-list"></span>
-</label>
-</div></div>
-      
-    </div>
-  
-</div>
-</div>
-              
-              
-
-<div id="jb-print-docs-body" class="onlyprint">
-    <h1>Point-E: A System for Generating 3D Point Clouds from Complex Prompts (Arxiv 2022)</h1>
-    <!-- Table of contents -->
-    <div id="print-main-content">
-        <div id="jb-print-toc">
-            
-            <div>
-                <h2> Contents </h2>
-            </div>
-            <nav aria-label="Page">
-                <ul class="visible nav section-nav flex-column">
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#">Point-E: A System for Generating 3D Point Clouds from Complex Prompts (Arxiv 2022)</a></li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#abstract">Abstract</a></li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">1. Introduction</a></li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#background">2. Background</a></li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#related-work">3. Related Work</a></li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#method">4. Method</a><ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#dataset">4-1. Dataset</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#view-synthesis-glide-model">4.2 View Synthesis GLIDE Model</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#point-cloud-diffusion">4.3  <strong>Point Cloud Diffusion</strong></a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#point-cloud-upsampler">4.4 <strong>Point Cloud Upsampler</strong></a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#producing-meshes">4.5 Producing Meshes</a></li>
-</ul>
-</li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#results">5. Results</a><ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#model-scaling-and-ablations">5.1 <strong>Model Scaling and Ablations</strong></a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#qualitative-results">5.2 Qualitative Results</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#comparison-to-other-methods"><strong>5.3 Comparison to Other Methods</strong></a></li>
-</ul>
-</li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#limitations-and-future-work"><strong>6. Limitations and Future Work</strong></a></li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#conclusion">7. Conclusion</a></li>
-</ul>
-
-            </nav>
-        </div>
-    </div>
-</div>
-
-              
-                
-<div id="searchbox"></div>
-                <article class="bd-article">
-                  
-  <div class="admonition-information admonition">
-<p class="admonition-title">Information</p>
-<ul class="simple">
-<li><p><strong>Title:</strong> Point-E: A System for Generating 3D Point Clouds from Complex Prompts (Arxiv 2022)</p></li>
-<li><p><strong>Reference</strong></p>
-<ul>
-<li><p>Paper:  <a class="reference external" href="https://arxiv.org/abs/2212.08751">https://arxiv.org/abs/2212.08751</a></p></li>
-<li><p>Project: <a class="reference external" href="https://openai.com/index/point-e/">https://openai.com/index/point-e/</a></p></li>
-</ul>
-</li>
-<li><p><strong>Author:</strong> <a class="reference external" href="https://www.linkedin.com/in/jeonghwa-yoo-8403a716b">Jeonghwa Yoo</a></p></li>
-<li><p><strong>Last updated on Sep. 11, 2024</strong></p></li>
-</ul>
-</div>
-<section class="tex2jax_ignore mathjax_ignore" id="point-e-a-system-for-generating-3d-point-clouds-from-complex-prompts-arxiv-2022">
-<h1>Point-E: A System for Generating 3D Point Clouds from Complex Prompts (Arxiv 2022)<a class="headerlink" href="#point-e-a-system-for-generating-3d-point-clouds-from-complex-prompts-arxiv-2022" title="Permalink to this heading">#</a></h1>
-<aside>
-💡 핵심 요약
-<ul class="simple">
-<li><p>관련 태스크: text-to-3D, point cloud generation</p></li>
-<li><p>본 논문의 접근 방식</p>
-<ul>
-<li><p>Text-to-image 모델과 image-to-3D 모델을 결합하여 두 방법의 장점을 합쳤다.</p>
-<ul>
-<li><p>Text-to-image: 대규모 데이터가 존재하여 복잡하고 다양한 텍스트 프롬프트에 대해 적용 가능하다.</p></li>
-<li><p>Image-to-3D: 소규모의 이미지 데이터와 3D 데이터 쌍에 대해 학습하여 3D 포인트 클라우드를 생성한다.</p></li>
-</ul>
-</li>
-<li><p>세 단계의 프로세스로 구성됨</p>
-<ul>
-<li><p>Text → 합성 뷰: GLIDE 모델</p></li>
-<li><p>합성 뷰 → 저해상도 포인트 클라우드: 트랜스포머 기반의 디퓨전 모델</p></li>
-<li><p>저해상도 포인트 클라우드 → 고해상도 포인트 클라우드: 트랜스포머 기반의 디퓨전 모델</p></li>
-</ul>
-</li>
-</ul>
-</li>
-<li><p>결과</p>
-<ul>
-<li><p>샘플 품질 측면에서 SOTA 성능이 아니지만, 샘플링 속도가 1~2배 더 빠르다.</p></li>
-<li><p>텍스트 프롬프트에 의해 조건화된 다양하고 복잡한 3D 형상을 효율적으로 생성할 수 있다.</p></li>
-</ul>
-</li>
-</ul>
-</aside>
-</section>
-<section class="tex2jax_ignore mathjax_ignore" id="abstract">
-<h1>Abstract<a class="headerlink" href="#abstract" title="Permalink to this heading">#</a></h1>
-<ul class="simple">
-<li><p>제안 배경</p>
-<ul>
-<li><p>최근 텍스트 조건부 3D 객체 생성 기술(text-conditional 3D object generation)이 놀라운 발전을 보이고 있다.</p></li>
-<li><p>하지만, SOTA 모델들은 여전히 하나의 샘플을 만들기 위해 여러 GPU 시간을 요구하고 있다.</p></li>
-<li><p>본 논문에서는 단일 GPU에서 1~2분만에 3D 모델을 생성하는 3D 객체 생성을 위한 방법을 탐색한다.</p></li>
-</ul>
-</li>
-<li><p>접근법</p>
-<ul>
-<li><p>텍스트-이미지 디퓨전 모델을 사용하여 단일 합성 뷰를 생성한 다음 두 번째 디퓨전 모델을 사용하여 3D 포인트 클라우드를 생성한다.</p></li>
-</ul>
-</li>
-<li><p>결과</p>
-<ul>
-<li><p>샘플 품질 측면에서 SOTA 성능이 아니지만, 샘플링 속도가 1~2배 더 빠르다.</p></li>
-</ul>
-</li>
-</ul>
-</section>
-<section class="tex2jax_ignore mathjax_ignore" id="introduction">
-<h1>1. Introduction<a class="headerlink" href="#introduction" title="Permalink to this heading">#</a></h1>
-<ul>
-<li><p>Text-to-image 생성 모델에서 text-to-vide/3D로의 발전</p>
-<ul class="simple">
-<li><p>최근 text-to-image 생성 모델이 폭발적으로 증가함에 따라 몇 초만에 자연어에서 고품질 이미지를 생성하고 수정할 수 있게 되었다.</p></li>
-<li><p>이러한 결과에 영감을 받아 최근 연구에서는 비디오나 3D 객체와 같은 다른 도메인에서의 텍스트 조건부 생성을 탐색하고 있다.</p></li>
-<li><p>본 논문도 text-to-3D 생성 문제에 중점을 둔다.</p></li>
-</ul>
-</li>
-<li><p>최근 text-to-3D 합성의 분류</p>
-<ul class="simple">
-<li><p>최근 text-to-3D 합성은 일반적으로 다음의 두 카테고리 중 하나에 속한다.</p>
-<ol class="arabic simple">
-<li><p>쌍을 이룬(paired)(ex: text, 3D) 데이터 또는 레이블이 없는(unlabeld) 3D 데이터에서 생성 모델을 직접(directly) 학습 시키는 방법</p>
-<ol class="arabic simple">
-<li><p>장점: 기존 생성 모델링 접근 방식을 활용하여 샘플을 효율적으로 생성할 수 있다.</p></li>
-<li><p>단점: 대규모 3D 데이터셋이 없기 때문에 다양하고 복잡한 텍스트 프롬프트로 확장하기 어렵다. → 데이터셋의 한계, 확장성의 어려움</p></li>
-</ol>
-</li>
-<li><p>사전 학습된 text-to-image 모델을 활용하여 미분가능한(differentiable) 3D 표현법들(representations)을 최적화하는 방법</p>
-<ol class="arabic simple">
-<li><p>장점: 복잡하고 다양한 텍스트 프롬프트를 처리할 수 있다.</p></li>
-<li><p>단점:</p>
-<ol class="arabic simple">
-<li><p>각 샘플에 대해 최적화 과정을 거쳐야 하기 때문에 계산 비용이 많이 들고 시간이 오래 걸릴 수 있다.  샘플을 생성하는 데 비용이 많이 드는 최적화 프로세스가 필요하다.</p></li>
-<li><p>강력한 3D prior가 없기 때문에 의미 있거나 일관된 3D 개체에 해당하지 않는 local minima에 빠질 수 있다.</p></li>
-</ol>
-</li>
-</ol>
-</li>
-</ol>
-</li>
-</ul>
-</li>
-<li><p>본 논문의 접근법</p>
-<figure class="align-default" id="id1">
-<img alt="Point_E_01" class="bg-primary mb-1" src="pics/Point_E/01.png" />
-<figcaption>
-<p><span class="caption-number">Fig. 669 </span><span class="caption-text">Point-E 파이프라인 개요</span><a class="headerlink" href="#id1" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p>Text-to-image 모델과 image-to-3D 모델을 결합하여  두카테고리의 장점을 합치는 것을 목표로 한다.</p>
-<ul>
-<li><p>본 논문의 text-to-image 모델</p>
-<ul>
-<li><p>대규모(텍스트, 이미지)쌍 데이터를 활용하여 다양하고 복잡한 프롬프트를 따를 수 있게 한다.</p></li>
-<li><p>3D 렌더링에 대해 파인튜닝된 GLIDE 버전을 사용한다.</p></li>
-</ul>
-</li>
-<li><p>본 논문의 image-to-3D 모델</p>
-<ul>
-<li><p>소규모의(이미지,3D)쌍 데이터로 학습된다.</p></li>
-<li><p>RGB 포인트 클라우드를 생성하는 디퓨전 모델의 스택을 사용한다. (새로운 transformer 기반 아키텍처 사용)</p></li>
-<li><p>생성된 포인트 클라우드에서 메쉬를 생성하기 위해 회귀 기반(regression-based) 접근 방식을 사용한다.</p></li>
-</ul>
-</li>
-</ul>
-</li>
-<li><p>먼저 text-to-image 모델을 사용하여 이미지를 샘플링하고, 샘플링된 이미지를 조건으로 넣어 3D 객체를 샘플링한다.</p></li>
-<li><p>이 두 단계 모두 몇 초 내에 수행될 수 있으며, 비용이 많이 드는 최적화 과정을 필요로 하지 않는다.</p></li>
-</ul>
-</li>
-<li><p>본 논문의 결과</p>
-<ul class="simple">
-<li><p>간단한 텍스트 프롬프트뿐만 아니라 복잡한 텍스트 프롬프트와도 일치하는 컬러 3D 포인트 클라우드를 생성할 수 있었다.</p></li>
-</ul>
-<p>→ 포인트 클라우드를 효율적으로 생성한다는 의미에서 본 논문의 시스템을 Point E라고 명칭하였다.</p>
-</li>
-</ul>
-</section>
-<section class="tex2jax_ignore mathjax_ignore" id="background">
-<h1>2. Background<a class="headerlink" href="#background" title="Permalink to this heading">#</a></h1>
-<ul class="simple">
-<li><p>디퓨전 모델 개요:</p>
-<ul>
-<li><p>점진적으로 Gaussian 노이즈를 추가하는 과정을 통해 데이터를 변형한다.</p></li>
-<li><p>본 논문에서는 Ho et al. (2020)의 Gaussian 확산 설정을 따른다.</p></li>
-</ul>
-</li>
-<li><p>노이즈 프로세스</p>
-<ul>
-<li><p>노이즈 프로세스는 시간 단계 t마다 신호에 Gaussian 노이즈를 추가한다.</p></li>
-<li><p>최종 단계에서는 샘플이 거의 정보를 포함하지 않게 된다.</p></li>
-</ul>
-</li>
-<li><p>역 노이즈 프로세스</p>
-<ul>
-<li><p>랜던 가우시안 노이즈 <span class="math notranslate nohighlight">\(x_T\)</span>에서 시작하여 점진적으로 노이즈 프로세스를 역으로 진행하여 잡음이 없는 샘플 <span class="math notranslate nohighlight">\(x_0\)</span>에 도달할 수 있다.</p></li>
-</ul>
-</li>
-<li><p>모델 학습</p>
-<ul>
-<li><p>q(xt−1|xt)를 신경망 pθ(xt−1|xt)로 근사하여 학습한다.</p></li>
-<li><p>Nichol &amp; Dhariwal (2021)은 평균뿐만 아니라 분산도 예측하여 더 나은 성능을 얻었다.</p></li>
-</ul>
-</li>
-<li><p>샘플링</p>
-<ul>
-<li><p>디퓨전 샘플링은 미분 방정식 관점에서 설명될 수 있으며, 이를 통해 다양한 SDE 및 ODE 해석기를 사용하여 이러한 모델에서 샘플링할 수 있다.</p></li>
-<li><p>본 논문에서는 Karras et al. (2022)의 2차 ODE 해석기를 사용한다.</p></li>
-</ul>
-</li>
-<li><p>가이드 전략</p>
-<ul>
-<li><p>Dhariwal &amp; Nichol (2021)은 분류기 가이던스(classifier guidance)를 도입하여 생성 충실도를 높였다.</p></li>
-<li><p>Ho &amp; Salimans (2021)은 분류기 없는 가이던스(classifier-free guidance)를 도입하여 조건부 정보를 무작위로 삭제한다.</p></li>
-<li><p>본 논문에서는 학습 시 드롭 확률 0.1을 사용하여 이 기술을 적용한다.</p></li>
-</ul>
-</li>
-</ul>
-</section>
-<section class="tex2jax_ignore mathjax_ignore" id="related-work">
-<h1>3. Related Work<a class="headerlink" href="#related-work" title="Permalink to this heading">#</a></h1>
-<ul class="simple">
-<li><p>포인트 클라우드 생성 모델:</p>
-<ul>
-<li><p>다양한 연구들이 GAN, VAE, 플로우 모델, 디퓨전 모델 등을 사용하여 포인트 클라우드를 생성한다.</p></li>
-<li><p>본 논문의 연구와 가장 유사한 PVD 모델은 단일 디퓨전 모델을 사용하여 포인트 클라우드 직접 생성한다.</p></li>
-<li><p>본 논문에서는 더 간단한 트랜스포머 기반 아키텍처를 사용하며, RGB 채널을 함께 생성하는 점에서 차별점을 가진다.</p></li>
-</ul>
-</li>
-<li><p>다른 3D 표현을 사용한 모델 생성:</p>
-<ul>
-<li><p>2D 이미지 데이터셋에서 3D 인식 GAN을 학습하거나, 직접 3D 메쉬를 생성하는 연구들이  있다.</p></li>
-<li><p>이러한 연구들은 주로 새로운 뷰 합성 문제에 초점을 맞추지만, 전체 360도 뷰를 재구성하려고 하지는 않는다.</p></li>
-</ul>
-</li>
-<li><p>텍스트 조건부 3D 생성:</p>
-<ul>
-<li><p>몇몇 연구들은 텍스트-이미지 매칭 목표에 따라 3D 표현을 최적화하는 접근 방식을 탐구한다.</p></li>
-<li><p>이러한 접근 방식들은 다양한 복잡한 객체나 장면을 생성할 수 있지만, 최적화 절차가 매우 시간이 많이 걸린다.</p></li>
-</ul>
-</li>
-<li><p>텍스트-3D 데이터 활용:</p>
-<ul>
-<li><p>텍스트-3D 쌍 데이터를 사용하여 텍스트 조건부 3D 모델을 생성하는 연구들도 있다.</p></li>
-<li><p>많은 연구들이 단순한 프롬프트나 좁은 객체 카테고리에 한정되지만, 본 논문에서는 사전 학습된 텍스트-이미지 모델을 활용하여 이러한 문제를 해결한다.</p></li>
-</ul>
-</li>
-<li><p>이미지 기반 3D 재구성:</p>
-<ul>
-<li><p>단일 또는 소수의 이미지에서 3D 모델을 재구성하는 회귀 기반 및 생성 접근 방식이 있.</p></li>
-<li><p>이들 접근 방식은 불충분한 문제를 다루면서도 유망한 결과를 보여주고 있다.</p></li>
-</ul>
-</li>
-</ul>
-</section>
-<section class="tex2jax_ignore mathjax_ignore" id="method">
-<h1>4. Method<a class="headerlink" href="#method" title="Permalink to this heading">#</a></h1>
-<ul class="simple">
-<li><p>텍스트를 조건으로 받아 단일 생성 모델로 포인트 클라우드를 직접 생성하는 대신 생성 프로세스를 세 단계로 나눈다.</p>
-<ol class="arabic simple">
-<li><p>‘텍스트 캡션’을 조건으로 받아 ‘합성 뷰’를 생성한다.  → <em>4.2 내용</em></p>
-<ol class="arabic simple">
-<li><p>GLIDE 모델 사용</p></li>
-<li><p>렌더링된 3D 모델로 파인튜닝</p></li>
-</ol>
-</li>
-<li><p>‘합성 뷰’를 조건으로 받아 ‘대략적인(coarse) 포인트 클라우드(1024개의 포인트)’를 생성한다 → <em>4.3 내용</em></p>
-<ol class="arabic simple">
-<li><p>조건부 순열 불변 디퓨전 모델(conditional, permutation invariant diffusion model) 사용</p></li>
-</ol>
-</li>
-<li><p>‘저해상도 포인트 클라우드와 합성 뷰’를 조건으로 받아 ‘고해상도 포인트 클라우드(4096 포인트)’를 생성한다. → <em>4.4 내용</em></p>
-<ol class="arabic simple">
-<li><p>2에서 사용된 모델과 유사하지만 저해상도 포인트 클라우드를 조건으로 하는 더 작은 디퓨전 모델을 사용</p></li>
-</ol>
-</li>
-</ol>
-</li>
-<li><p>수백만 개의 3D 모델과 관련 메타데이터로 구성된 데이터셋에서 모델을 훈련시킨다.</p></li>
-<li><p>데이터셋을 렌더링된 뷰, 텍스트 설명, 그리고 각 점에 대한 RGB 색상을 포함하는 3D 포인트 클라우드로 처리한다.</p></li>
-</ul>
-<section id="dataset">
-<h2>4-1. Dataset<a class="headerlink" href="#dataset" title="Permalink to this heading">#</a></h2>
-<ul class="simple">
-<li><p>수백만 개의 3D 모델은 데이터 형식과 품질이 데이터셋 전반에 걸쳐 매우 다양했고, 더 높은 데이터 품질을 보장하기 위해 다양한 후처리 단계가 필요했다.</p></li>
-<li><p>후처리 단계</p>
-<ol class="arabic simple">
-<li><p>Blender를 사용하여 모든 데이터를 하나의 일반적인 형식(RGBAD 이미지)으로 변환한다.</p>
-<ol class="arabic simple">
-<li><p>Blender: 다양한 3D 형식을 지원하며 최적화된 렌더링 엔진을 제공하는 프로그램</p></li>
-<li><p>RGBAD 이미지: RGB 이미지에 깊이(Depth)와 알파(Alpha) 채널이 추가된 형식의 이미지</p></li>
-<li><p>20개의 랜덤한 카메라 각도에서 각 3D 모델을 경계 상자(bounding cube)로 정규화하고 표준 조명 설정을 구성한 후, blender의 내장된 실시간 렌더링 엔진을 사용하여 RGBAD 이미지를 내보냈다.</p></li>
-</ol>
-</li>
-<li><p>각 객체를 렌더링을 사용해 색상이 있는 포인트 클라우드로 변환한다.</p>
-<ol class="arabic simple">
-<li><p>각 RGBAD 이미지의 각 픽셀에 대한 점을 계산하여 각 객체에 대한 밀집된(dense) 포인트 클라우드를 구성한다.</p></li>
-<li><p>이러한 포인트 클라우드는 일반적으로 고르게 분포되어 있지 않으므로, 가장 먼 점 샘플링을 사용하여 4K 점의 균일한 클라우드를 생성한다.</p></li>
-<li><p>렌더링에서 직접 포인트 클라우드를 구성함으로써, 3D 메쉬에서 직접 점을 샘플링할 때 발생할 수 있는 여러 가지 문제를 피할 수 있었다. (모델 내부에 포함된 점을 샘플링하는 문제, 이상한 파일 형식의 3D 모델로 인한 문제)</p></li>
-</ol>
-</li>
-<li><p>저품질 모델을 제거하기 위해 다양한 휴리스틱을 사용한다.</p>
-<ol class="arabic simple">
-<li><p>각 포인트 클라우드의 SVD를 계산하고, 가장 작은 특이값이 일정 임계값(threshold) 이상인 경우에만 유지함으로써 평평한 객체를 제거했다.</p></li>
-<li><p>다음으로, CLIP 특성에 따라 데이터셋을 클러스터링 했다. (일부 클러스터는 많은 저품질 모델 카테고리를 포함하는 반면, 다른 클러스터는 더 다양하거나 해석 가능한 것으로 나타났음)</p></li>
-<li><p>클러스터를 여러 가지 품질의 버킷으로 나누고, 최종 데이터셋으로서 결과 버킷의 가중치 혼합을 사용했다.</p></li>
-</ol>
-</li>
-</ol>
-</li>
-</ul>
-</section>
-<section id="view-synthesis-glide-model">
-<h2>4.2 View Synthesis GLIDE Model<a class="headerlink" href="#view-synthesis-glide-model" title="Permalink to this heading">#</a></h2>
-<ul class="simple">
-<li><p>‘텍스트 캡션’을 조건으로 받아 ‘합성 뷰’를 생성하는 모델</p></li>
-<li><p>4.3에서 설명할 포인트 클라우드 모델은 모두 동일한 렌더러와 동일한 조명 설정을 사용하여 생성된 데이터셋의 렌더링된 뷰를 조건으로 받는다.</p></li>
-<li><p>따라서 해당 파트에서는 데이터셋의 분포와 일치하는 3D 렌더를 명시적으로 생성하고자 하였다.</p></li>
-<li><p>이를 위해 GLIDE를 원래의 데이터셋과 저자들의 3D 렌더링 데이터셋을 혼합하여 파인튜닝 하였다.</p>
-<ul>
-<li><p>저자들의 3D 렌더링 데이터셋이 원래 GLIDE 학습셋에 비해 작기 때문에 3D 렌더링 데이터셋에서 이미지를 샘플링하는 비율을 5%로만 설정하고, 나머지 95%는 원래의 데이터셋을 사용했다.</p></li>
-<li><p>반복(iterations) 횟수는 100,000번의 설정 하였으며, 이는 모델이 3D 데이터셋을 여러 번 거치는 학습을 진행했음을 의미한다. (단, 동일한 렌더링된 시점을 두 번 사용하지 않았다.)</p></li>
-</ul>
-</li>
-<li><p>테스트 시간에는 항상 분포 내 렌더를 샘플링하기 위해, 모든 3D 렌더의 텍스트 프롬프트에 특별한 토큰을 추가하여 이 토큰을 사용하여 샘플링을 수행하였다.</p></li>
-</ul>
-</section>
-<section id="point-cloud-diffusion">
-<h2>4.3  <strong>Point Cloud Diffusion</strong><a class="headerlink" href="#point-cloud-diffusion" title="Permalink to this heading">#</a></h2>
-<ul>
-<li><p>‘합성 뷰’를 조건으로 받아 ‘대략적인(coarse) 포인트 클라우드(1024개의 포인트)’를 생성하는 모델</p></li>
-<li><p>디퓨전을 이용해 포인트 클라우드를 사용하기 위해 <a class="reference external" href="https://arxiv.org/abs/2104.03670">3D Shape Generation and Completion through Point-Voxel Diffusion</a>에서 사용한 프레임워크를 확장하여 포인트 클라우드의 각 포인트에 RGB 색상을 포함시켰다.</p></li>
-<li><p>포인트 클라우드를 K x 6 형태의 텐서로 나타내며 (K: 포인트 수), 내부 차원은 (x,y,z) 좌표와 (R,G,B) 색상을 포함한다.</p></li>
-<li><p>모든 좌표와 색상은 [-1, 1] 범위로 정규화 된다.</p></li>
-<li><p>K x 6 형태의 랜덤한 노이즈에서 시작하여 이를 점진적으로 디노이징하여 텐서를 직접 생성한다.</p></li>
-<li><p>기존 3D 전용 구조를 활용하던 이전 방법들과 달리, 본 논문에서는 트랜스포머 기반 모델을 사용한다. 모델은
-이미지, 타임 스텝 t, 노이즈가 있는 포인트 클라우드 <span class="math notranslate nohighlight">\(x_t\)</span>를 조건으로 받아 <span class="math notranslate nohighlight">\(\epsilon\)</span>과 <span class="math notranslate nohighlight">\(\sum\)</span>을 예측한다.</p></li>
-<li><p>모델 구조</p>
-<figure class="align-default" id="id2">
-<img alt="Point_E_02" class="bg-primary mb-1" src="pics/Point_E/02.png" />
-<figcaption>
-<p><span class="caption-number">Fig. 670 </span><span class="caption-text">Point-E 모델 구조</span><a class="headerlink" href="#id2" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p>포인트 클라우드의 각 포인트를 출력 차원이 D인 선형 레이어(linear layer)에 넣어 K×D 입력 텐서를 얻고 모델에 입력 컨텍스트로 사용한다. 또한 작은 MLP에 타임스텝 t를 넣어 컨텍스트 앞에 추가할 다른 D차원 벡터를 얻는다.</p></li>
-<li><p>이미지를 조건으로 입력 받기 위해, 사전 학습된 ViT-L/14 CLIP 모델에 이미지를 입력하고 이 CLIP 모델의 마지막 레이어의 임베딩을 가져온다. (shape: 256xD’), 이를 선형 투사(lienarly project)하여 256xD shape의 또 다른 텐서를 얻고 이를 트랜스포머 컨텍스트 앞에 추가한다. → 이 방법이 단일 CLIP 이미지 또는 텍스트 임베딩을 사용하는 것보다 우수했다.</p></li>
-<li><p>최종 입력 컨텍스트는 (K+257) x D의 shape가 된다. 길이 K의 최종 출력 시퀀스를 얻기 위해 최종 토큰 K개를 가져오고 이를 프로젝션하여 입력 포인트 K개에 대한 ε와 Σ 예측을 얻는다.</p></li>
-</ul>
-</li>
-</ul>
-<aside>
-💡 정리
-<ul>
-<li><p>입력 컨텍스트 구성:</p>
-<ul class="simple">
-<li><p>포인트 클라우드의 각 점: K×D</p></li>
-<li><p>CLIP 이미지 임베딩: 256×D</p></li>
-<li><p>타임스텝 임베딩: 1×D</p></li>
-</ul>
-<p>→ 최종 입력 컨텍스트: (K+257)×D</p>
-</li>
-<li><p>트랜스포머 모델의 출력: (K+257)개의 토큰 (각 토큰의 차원은 D)</p></li>
-<li><p>최종 출력 시퀀스 선택: 최종 K개의 토큰을 가져온다.</p></li>
-<li><p>ε와 Σ 예측**:** 최종 K개의 토큰을 ε와 Σ 예측을 위한 입력 포인트로 사용한다.</p></li>
-<li><p>예측된 ε와 Σ을 통해 노이즈를 제거하여 포인트 클라우드를 복원</p></li>
-</ul>
-</aside>
-<ul class="simple">
-<li><p>이 모델에서는 positional encoding을 사용하지 않는다. 따라서 모델 자체는  입력 포인트 클라우드에 대해 순열 분별(permutation-invariant)하다.</p></li>
-</ul>
-</section>
-<section id="point-cloud-upsampler">
-<h2>4.4 <strong>Point Cloud Upsampler</strong><a class="headerlink" href="#point-cloud-upsampler" title="Permalink to this heading">#</a></h2>
-<ul>
-<li><p>이미지 디퓨전 모델에서의 계층 구조</p>
-<ul class="simple">
-<li><p>이미지 디퓨전 모델의 경우 가장 좋은 품질은 일반적으로 계층 구조를 사용하는 방식으로 달성된다.</p></li>
-<li><p>이 방식에서는 저해상도의 기본 모델이 출력을 생성한 후, 이를 다른 모델이 업샘플한다.</p></li>
-</ul>
-<p>→ 포인트 클라우드 생성에 이 접근 방식을 사용</p>
-</li>
-<li><p>포인트 클라우드 생성에서의 계층 구조</p>
-<ul class="simple">
-<li><p>큰 베이스 모델로 1K 포인트를 생성한 후, 작은 업샘플링 모델을 사용하여 4K 포인트로 업샘플링 한다.</p></li>
-<li><p>모델 크기가 같을 때, 4K 포인트를 생성하는 데에는 1K 포인트를 생성할 때보다 네 배 더 많은 연산을 필요로 한다.</p></li>
-</ul>
-</li>
-<li><p>업샘플러</p>
-<ul class="simple">
-<li><p>업샘플러는 베이스 모델과 동일한 아키텍처를 사용한다.</p></li>
-<li><p>모델은 저해상도 포인트 클라우드 모델과 동일한 아키텍처를 사용한다.</p></li>
-<li><p>저해상도 포인트 클라우드를 입력 받기 위한 추가 컨디셔닝 토큰이 있다.</p></li>
-<li><p>1K 포인트를 조건으로 입력 받아 추가로 3K 포인트를 생성하여 저해상도 포인트 클라우드에 추가한다.</p></li>
-<li><p><span class="math notranslate nohighlight">\(x_t\)</span>에 사용된 레이어가 아닌 별도의 선형 임베딩 레이어를 통해 저해상도 포인트를 전달하여, 모델이 positional encoding을 사용할 필요 없이 조건부 정보와 새로운 포인트를 구별할 수 있도록 한다.</p></li>
-</ul>
-</li>
-</ul>
-</section>
-<section id="producing-meshes">
-<h2>4.5 Producing Meshes<a class="headerlink" href="#producing-meshes" title="Permalink to this heading">#</a></h2>
-<ul class="simple">
-<li><p>렌더링 기반 평가를 위해 생성된 포인트 클라우드를 직접 렌더링하지 않는다.</p></li>
-<li><p>대신, 포인트 클라우드를 텍스처가 입혀진 메쉬로 변환하고 Blender를 사용해 이러한 메쉬를 렌더링한다.</p></li>
-<li><p>포인트 클라우드에서 메쉬를 생성하는 것은 때때로 어렵고, 본 논문의 모델이 생성한 포인트 클라우드는 종종 균열, 이상치 또는 기타 유형의 노이즈를 가지고 있어 더욱 어렵다.</p></li>
-<li><p>포인트 클라우드에서 메쉬를 생성하기 위해 사전 학습된 SAP모델을 사용해봣으나 포인트 클라우드에 존재했던 큰 부분이나 중요한 세부 사항을 잃어버리는 경우가 있었다.</p></li>
-<li><p>따라서 본 논문에서는 회귀(regression) 기반 모델을 사용하여 signed distance field를 예측하고, 이를 merching cube 알고리즘을 적용하여 메쉬를 추출했다.</p></li>
-<li><p>그런 다음 원래 포인트 클라우드에서 가장 가까운 점의 색을 사용하여 메쉬의 각 버텍스에 색을 할당했다.</p></li>
-</ul>
-<aside>
-💡
-<ul class="simple">
-<li><p>렌더링 과정 요약</p>
-<ol class="arabic simple">
-<li><p>포인트 클라우드에서 SDF 예측: 회귀 기반 모델을 사용하여 포인트 클라우드로부터 객체의 SDF를 예측한다.</p></li>
-<li><p>메쉬 생성: 예측된 SDF를 기반으로 merching cube 알고리즘을 적용하여 메쉬를 생성합니다.</p></li>
-<li><p>색상 할당: 생성된 메쉬의 각 버텍스에 원래 포인트 클라우드의 색상을 할당하여 텍스처가 입혀진 메쉬를 만든다.</p></li>
-<li><p>Blender를 통한 렌더링: 최종적으로 텍스처가 입혀진 메쉬를 Blender를 사용하여 렌더링한다.</p></li>
-</ol>
-</li>
-</ul>
-</aside>
-</section>
-</section>
-<section class="tex2jax_ignore mathjax_ignore" id="results">
-<h1>5. Results<a class="headerlink" href="#results" title="Permalink to this heading">#</a></h1>
-<ul class="simple">
-<li><p>평가 지표: CLIP R-Precision, P-IS, P-FID</p>
-<ul>
-<li><p>CLIP R-Precision</p>
-<ul>
-<li><p>특정 객체를 기준으로 하여 모델이 텍스트 설명과 얼마나 잘 일치하는지를 평가하는 지표</p></li>
-<li><p>계산하는 과정</p>
-<ul>
-<li><p>생성된 이미지와 텍스트 프롬프트를 기반으로 CLIP 모델을 사용하여 각 이미지의 텍스트 임베딩을 계산한다.</p></li>
-<li><p>CLIP 모델에서 계산된 텍스트 임베딩과 이미지 임베딩 간의 유사도를 계산한다.</p></li>
-<li><p>유사도가 가장 높은 상위 R개의 이미지 중 실제로 맞는 이미지의 비율을 계산한다.</p></li>
-</ul>
-</li>
-</ul>
-</li>
-<li><p>P-IS, P-FID</p>
-<ul>
-<li><p>포인트 클라우드의 Inception Score와 FID를 평가하기 위해 본 논문에서 도입한 지표</p></li>
-<li><p>수정된 PointNet++ 모델을 사용하여 포인트 클라우드에서 특징을 추출하고 클래스 확률을 에측</p></li>
-</ul>
-</li>
-</ul>
-</li>
-</ul>
-<section id="model-scaling-and-ablations">
-<h2>5.1 <strong>Model Scaling and Ablations</strong><a class="headerlink" href="#model-scaling-and-ablations" title="Permalink to this heading">#</a></h2>
-<p>저자들은 다음과 같은 베이스 모델에 대하여 학습 중에 생성한 샘플들로 평가하였다.</p>
-<ul class="simple">
-<li><p>40M (uncond.): 어떠한 조건 정보도 없는 작은 모델</p></li>
-<li><p>40M (text vec.): 텍스트 캡션에만 의존하는 작은 모델 (이미지 사용 x), 파인튜닝된 GLIDE 모델 활용 x</p></li>
-<li><p>40M (image vec.): 렌더링된 이미지의 CLIP 이미지 임베딩에 의존하는 작은 모델, 단일 CLIP 임베딩 사용</p></li>
-<li><p>40M: CLIP 잠재 그리드(latent grid)를 통한 전체 이미지 조건을 사용하는 작은 모델</p></li>
-<li><p>300M: CLIP 잠재 그리드를 통한 전체 이미지 조건을 사용하는 중간 모델</p></li>
-<li><p>1B: CLIP 잠재 그리드를 통한 전체 이미지 조건을 사용하는 큰 모델</p></li>
-</ul>
-<p>평가 결과는 아래 그래프와 같다.</p>
-<figure class="align-default" id="id3">
-<img alt="Point_E_03" class="bg-primary mb-1" src="pics/Point_E/03.png" />
-<figcaption>
-<p><span class="caption-number">Fig. 671 </span><span class="caption-text">평가 결과</span><a class="headerlink" href="#id3" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p>결과</p>
-<ul>
-<li><p>텍스트 조건만 사용하고 텍스트에서 이미지로의 단계가 없는 경우 CLIP R-Precision이 매우 나쁘게 나오는 것을 발견</p></li>
-<li><p>이미지를 조건으로 사용할 때 단일 CLIP 임베딩보다 임베딩 그리드를 사용하는 것이 성능이 더 나은 것을 발견 →  조건 이미지에 대해 더 많은 (공간적인) 정보를 보는 것이 포인트 클라우드 모델에 이점이 있음을 시사</p></li>
-<li><p>모델의 스케일을 증가시키면 P-FID 수렴 속도가 향상되고 최종 CLIP R-Precision이 증가하는 것을 발견</p></li>
-</ul>
-</li>
-</ul>
-</section>
-<section id="qualitative-results">
-<h2>5.2 Qualitative Results<a class="headerlink" href="#qualitative-results" title="Permalink to this heading">#</a></h2>
-<ul>
-<li><p>포인트 클라우드 생성 결과</p>
-<figure class="align-default" id="id4">
-<img alt="Point_E_04" class="bg-primary mb-1" src="pics/Point_E/04.png" />
-<figcaption>
-<p><span class="caption-number">Fig. 672 </span><span class="caption-text">포인트클라우드 생성 결과</span><a class="headerlink" href="#id4" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</li>
-<li><p>Point·E 모델이 복잡한 프롬프트에 대해 종종 일관된 고품질의 3D 형상을 생성할 수 있다는 것을 발견했다.</p></li>
-<li><p>때때로 포인트 클라우드 디퓨전 모델은 조건화된 이미지를 이해하지 못하거나 예측할 수 없는 경우가 있다.  이는 주로 두 가지 문제 중 하나로 인해 발생한다.</p>
-<figure class="align-default" id="id5">
-<img alt="Point_E_05" class="bg-primary mb-1" src="pics/Point_E/05.png" />
-<figcaption>
-<p><span class="caption-number">Fig. 673 </span><span class="caption-text">잘못 추론한 예시</span><a class="headerlink" href="#id5" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ol class="arabic simple">
-<li><p>모델이 이미지에 나타난 객체의 모양을 잘못 해석하는 경우</p></li>
-<li><p>모델이 이미지에서 가려진 형상의 일부를 잘못 추론하는 경우</p></li>
-</ol>
-</li>
-</ul>
-</section>
-<section id="comparison-to-other-methods">
-<h2><strong>5.3 Comparison to Other Methods</strong><a class="headerlink" href="#comparison-to-other-methods" title="Permalink to this heading">#</a></h2>
-<ul>
-<li><p>CLIP-R-Precision 지표를 이용하여 Point·E를 다른 3D 생성 기술과 비교했다.</p>
-<figure class="align-default" id="id6">
-<img alt="Point_E_06" class="bg-primary mb-1" src="pics/Point_E/06.png" />
-<figcaption>
-<p><span class="caption-number">Fig. 674 </span><span class="caption-text">CLIP-R-Precision 성능</span><a class="headerlink" href="#id6" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</li>
-<li><p>Point·E는 state-of-the-art 기술(DreamFusion)보다 성능이 좋지 않지만, 이 불일치의 일부를 설명할 수 있는 이 평가의 두 가지 미묘한 점에 주목해야 한다.</p>
-<ol class="arabic simple">
-<li><p>DreamFusion과 같은 멀티뷰 최적화 기반 방법과 달리 Point·E는 텍스트 프롬프트와 일치하도록 모든뷰를 명시적으로 최적화하지 않는다. 특정 객체가 모든 각도에서 쉽게 식별되지 않을 수 있기 때문에 CLIP R-Precision이 낮아질 수 있다.</p></li>
-<li><p>본 논문의 방법은 렌더링 전에 포인트 클라우드를 전처리해야 하는데, 포인트 클라우드를 메쉬로 변환하는 것은 어려운 문제다. 본 논문이 사용하는 접근 방식은 때때로 포인트 클라우드 자체에 있는 정보를 잃을 수 있다.</p></li>
-</ol>
-</li>
-<li><p>Point·E는 최신 테크닉보다 이 평가에서 성능이 좋지 않지만 짧은 시간 내에 샘플을 생성한다.</p></li>
-<li><p>이를 통해 보다 실용적으로 응용 프로그램을 만들거나 많은 개체를 샘플링하고 최상의 개체를 휴리스틱을 따라 선택하여 고품질 3D 개체를 찾을 수 있다.</p></li>
-</ul>
-</section>
-</section>
-<section class="tex2jax_ignore mathjax_ignore" id="limitations-and-future-work">
-<h1><strong>6. Limitations and Future Work</strong><a class="headerlink" href="#limitations-and-future-work" title="Permalink to this heading">#</a></h1>
-<ul>
-<li><p>합성 렌더링을 필요로 한다. → 향후 실제 세계 이미지를 조건으로 하는 3D 생성기를 훈련시켜 해결할 수 있을 것</p></li>
-<li><p>색상이 있는 3D 형태를 생성하지만, 이 과정은 비교적 낮은 해상도의 3D 형식(포인트 클라우드)로 이루어진다. 형상이나 질감의 세부 사항을 캡처하지 못한다. → 메쉬나 NeRF와 같은 고해상도 3D 표현을 생성하도록 확장하면 해결할 수 있을 것</p></li>
-<li><p>최적화 기반 기술(optimization-based techniques)을 초기화하여 초기 수렴 속도를 높이는 데 사용할 수 있다.</p></li>
-<li><p>이 모델이 DALL·E 2 시스템과 많은 제한 사항을 공유할 것으로 예상한다. (데이터셋에서 야기된 많은 편향을 포함할 수 있다)</p></li>
-<li><p>모델이 생성한 3D 모델이 실제로 물리적으로 제작될 때, 그 제품이 위험할 수 있는 물체의 청사진을 생성할 수 있다.</p>
-<figure class="align-default" id="id7">
-<img alt="Point_E_07" class="bg-primary mb-1" src="pics/Point_E/07.png" />
-<figcaption>
-<p><span class="caption-number">Fig. 675 </span><span class="caption-text">Figure 6</span><a class="headerlink" href="#id7" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</li>
-</ul>
-</section>
-<section class="tex2jax_ignore mathjax_ignore" id="conclusion">
-<h1>7. Conclusion<a class="headerlink" href="#conclusion" title="Permalink to this heading">#</a></h1>
-<ul class="simple">
-<li><p>Point·E는 합성된 뷰를 생성하고 이를 기반으로 조건화된 색상 포인트 클라우드를 생성하는 텍스트 조건 합성 시스템이다.</p></li>
-<li><p>Point·E가 텍스트 프롬프트에 의해 조건화된 다양하고 복잡한 3D 형상을 효율적으로 생성할 수 있는 능력을 갖추고 있다는 것을 발견했다.</p></li>
-<li><p>본 논문의 방식이 텍스트에서 3D로의 합성 분야에서의 추가적인 연구의 시작점으로 기여할 수 있기를 희망한다.</p></li>
-</ul>
-</section>
-
-    <script type="text/x-thebe-config">
-    {
-        requestKernel: true,
-        binderOptions: {
-            repo: "binder-examples/jupyter-stacks-datascience",
-            ref: "master",
-        },
-        codeMirrorConfig: {
-            theme: "abcdef",
-            mode: "python"
-        },
-        kernelOptions: {
-            name: "python3",
-            path: "./docs/review"
-        },
-        predefinedOutput: true
-    }
-    </script>
-    <script>kernelName = 'python3'</script>
-
-                </article>
-              
-
-              
-              
-              
-              
-                <footer class="prev-next-footer d-print-none">
-                  
-<div class="prev-next-area">
-    <a class="left-prev"
-       href="3DGS.html"
-       title="previous page">
-      <i class="fa-solid fa-angle-left"></i>
-      <div class="prev-next-info">
-        <p class="prev-next-subtitle">previous</p>
-        <p class="prev-next-title">3D Gaussian Splatting for Real-Time Radiance Field Rendering</p>
-      </div>
-    </a>
-    <a class="right-next"
-       href="Shap-E.html"
-       title="next page">
-      <div class="prev-next-info">
-        <p class="prev-next-subtitle">next</p>
-        <p class="prev-next-title">Shap-E</p>
-      </div>
-      <i class="fa-solid fa-angle-right"></i>
-    </a>
-</div>
-                </footer>
-              
-            </div>
-            
-            
-              
-                <div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
-
-
-  <div class="sidebar-secondary-item">
-  <div class="page-toc tocsection onthispage">
-    <i class="fa-solid fa-list"></i> Contents
-  </div>
-  <nav class="bd-toc-nav page-toc">
-    <ul class="visible nav section-nav flex-column">
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#">Point-E: A System for Generating 3D Point Clouds from Complex Prompts (Arxiv 2022)</a></li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#abstract">Abstract</a></li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">1. Introduction</a></li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#background">2. Background</a></li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#related-work">3. Related Work</a></li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#method">4. Method</a><ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#dataset">4-1. Dataset</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#view-synthesis-glide-model">4.2 View Synthesis GLIDE Model</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#point-cloud-diffusion">4.3  <strong>Point Cloud Diffusion</strong></a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#point-cloud-upsampler">4.4 <strong>Point Cloud Upsampler</strong></a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#producing-meshes">4.5 Producing Meshes</a></li>
-</ul>
-</li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#results">5. Results</a><ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#model-scaling-and-ablations">5.1 <strong>Model Scaling and Ablations</strong></a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#qualitative-results">5.2 Qualitative Results</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#comparison-to-other-methods"><strong>5.3 Comparison to Other Methods</strong></a></li>
-</ul>
-</li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#limitations-and-future-work"><strong>6. Limitations and Future Work</strong></a></li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#conclusion">7. Conclusion</a></li>
-</ul>
-
-  </nav></div>
-
-</div></div>
-              
-            
-          </div>
-          <footer class="bd-footer-content">
-            
-<div class="bd-footer-content__inner container">
-  
-  <div class="footer-item">
-    
-<p class="component-author">
-By PseudoLab
-</p>
-
-  </div>
-  
-  <div class="footer-item">
-    
-
-  <p class="copyright">
-    
-      © Copyright 2022.
-      <br/>
-    
-  </p>
-
-  </div>
-  
-  <div class="footer-item">
-    
-  </div>
-  
-  <div class="footer-item">
-    
-  </div>
-  
-</div>
-          </footer>
-        
-
-      </main>
-    </div>
-  </div>
-  
-  <!-- Scripts loaded after <body> so the DOM is not blocked -->
-  <script src="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b"></script>
-<script src="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b"></script>
-
-  <footer class="bd-footer">
-  </footer>
-  </body>
+
+
+<!DOCTYPE html>
+
+
+<html lang="en" data-content_root="" >
+
+  <head>
+    <meta charset="utf-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.17.1: http://docutils.sourceforge.net/" />
+
+    <title>Point-E: A System for Generating 3D Point Clouds from Complex Prompts (Arxiv 2022) &#8212; Text-to-Image Generation-feat-Diffusion</title>
+  
+  
+  
+  <script data-cfasync="false">
+    document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
+    document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
+  </script>
+  
+  <!-- Loaded before other Sphinx assets -->
+  <link href="../../_static/styles/theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+<link href="../../_static/styles/bootstrap.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+<link href="../../_static/styles/pydata-sphinx-theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+
+  
+  <link href="../../_static/vendor/fontawesome/6.5.2/css/all.min.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+  <link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.woff2" />
+<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.woff2" />
+<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.woff2" />
+
+    <link rel="stylesheet" type="text/css" href="../../_static/pygments.css" />
+    <link rel="stylesheet" href="../../_static/styles/sphinx-book-theme.css?digest=14f4ca6b54d191a8c7657f6c759bf11a5fb86285" type="text/css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/togglebutton.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/mystnb.4510f1fc1dee50b3e5859aac5469c37c29e427902b24a333a5f9fcb2f0b3ac41.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/sphinx-thebe.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/design-style.4045f2051d55cab465a707391d5b2007.min.css" />
+  
+  <!-- Pre-loaded scripts that we'll load fully later -->
+  <link rel="preload" as="script" href="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b" />
+<link rel="preload" as="script" href="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b" />
+  <script src="../../_static/vendor/fontawesome/6.5.2/js/all.min.js?digest=dfe6caa3a7d634c4db9b"></script>
+
+    <script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
+    <script src="../../_static/jquery.js"></script>
+    <script src="../../_static/underscore.js"></script>
+    <script src="../../_static/_sphinx_javascript_frameworks_compat.js"></script>
+    <script src="../../_static/doctools.js"></script>
+    <script src="../../_static/clipboard.min.js"></script>
+    <script src="../../_static/copybutton.js"></script>
+    <script src="../../_static/scripts/sphinx-book-theme.js?digest=5a5c038af52cf7bc1a1ec88eea08e6366ee68824"></script>
+    <script>let toggleHintShow = 'Click to show';</script>
+    <script>let toggleHintHide = 'Click to hide';</script>
+    <script>let toggleOpenOnPrint = 'true';</script>
+    <script src="../../_static/togglebutton.js"></script>
+    <script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
+    <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
+    <script src="../../_static/design-tabs.js"></script>
+    <script>const THEBE_JS_URL = "https://unpkg.com/thebe@0.8.2/lib/index.js"
+const thebe_selector = ".thebe,.cell"
+const thebe_selector_input = "pre"
+const thebe_selector_output = ".output, .cell_output"
+</script>
+    <script async="async" src="../../_static/sphinx-thebe.js"></script>
+    <script>window.MathJax = {"options": {"processHtmlClass": "tex2jax_process|mathjax_process|math|output_area"}}</script>
+    <script defer="defer" src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
+    <script>DOCUMENTATION_OPTIONS.pagename = 'docs/review/Point_E';</script>
+    <link rel="shortcut icon" href="../../_static/PseudoLab_logo.png"/>
+    <link rel="index" title="Index" href="../../genindex.html" />
+    <link rel="search" title="Search" href="../../search.html" />
+    <link rel="next" title="Shap-E" href="Shap-E.html" />
+    <link rel="prev" title="3D Gaussian Splatting for Real-Time Radiance Field Rendering" href="3DGS.html" />
+  <meta name="viewport" content="width=device-width, initial-scale=1"/>
+  <meta name="docsearch:language" content="en"/>
+  </head>
+  
+  
+  <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
+
+  
+  
+  <div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
+  
+  <div id="pst-scroll-pixel-helper"></div>
+  
+  <button type="button" class="btn rounded-pill" id="pst-back-to-top">
+    <i class="fa-solid fa-arrow-up"></i>Back to top</button>
+
+  
+  <input type="checkbox"
+          class="sidebar-toggle"
+          id="pst-primary-sidebar-checkbox"/>
+  <label class="overlay overlay-primary" for="pst-primary-sidebar-checkbox"></label>
+  
+  <input type="checkbox"
+          class="sidebar-toggle"
+          id="pst-secondary-sidebar-checkbox"/>
+  <label class="overlay overlay-secondary" for="pst-secondary-sidebar-checkbox"></label>
+  
+  <div class="search-button__wrapper">
+    <div class="search-button__overlay"></div>
+    <div class="search-button__search-container">
+<form class="bd-search d-flex align-items-center"
+      action="../../search.html"
+      method="get">
+  <i class="fa-solid fa-magnifying-glass"></i>
+  <input type="search"
+         class="form-control"
+         name="q"
+         id="search-input"
+         placeholder="Search this book..."
+         aria-label="Search this book..."
+         autocomplete="off"
+         autocorrect="off"
+         autocapitalize="off"
+         spellcheck="false"/>
+  <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
+</form></div>
+  </div>
+
+  <div class="pst-async-banner-revealer d-none">
+  <aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
+</div>
+
+  
+    <header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
+    </header>
+  
+
+  <div class="bd-container">
+    <div class="bd-container__inner bd-page-width">
+      
+      
+      
+      <div class="bd-sidebar-primary bd-sidebar">
+        
+
+  
+  <div class="sidebar-header-items sidebar-primary__section">
+    
+    
+    
+    
+  </div>
+  
+    <div class="sidebar-primary-items__start sidebar-primary__section">
+        <div class="sidebar-primary-item">
+
+  
+    
+  
+
+<a class="navbar-brand logo" href="../../intro.html">
+  
+  
+  
+  
+  
+    
+    
+      
+    
+    
+    <img src="../../_static/PseudoLab_logo.png" class="logo__image only-light" alt="Text-to-Image Generation-feat-Diffusion - Home"/>
+    <script>document.write(`<img src="../../_static/PseudoLab_logo.png" class="logo__image only-dark" alt="Text-to-Image Generation-feat-Diffusion - Home"/>`);</script>
+  
+  
+</a></div>
+        <div class="sidebar-primary-item"><nav class="bd-links" id="bd-docs-nav" aria-label="Main">
+    <div class="bd-toc-item navbar-nav active">
+        
+        <ul class="nav bd-sidenav bd-sidenav__home-link">
+            <li class="toctree-l1">
+                <a class="reference internal" href="../../intro.html">
+                    Welcome to PseudoDiffusers!!
+                </a>
+            </li>
+        </ul>
+        <p aria-level="2" class="caption" role="heading"><span class="caption-text">Preliminary Works</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="vae.html">VAE</a></li>
+<li class="toctree-l1"><a class="reference internal" href="gan.html">GAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DDPM.html">DDPM</a></li>
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DDIM.html">DDIM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="A_Study_on_the_Evaluation_of_Generative_Models.html">A Study on the Evaluation of Generative Models</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Image Generation</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="cycleGAN.html">CycleGAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="StyleGAN.html">StyleGAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="diffusion_beats_GANs.html">Diffusion Models Beat GANs on Image Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="dalle.html">DALL-E</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DALLE2.html">DALL-E 2</a></li>
+<li class="toctree-l1"><a class="reference internal" href="dreambooth.html">DreamBooth</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ControlNet.html">ControlNet</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Latent_Diffusion_Model.html">Introduction</a></li>
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Textual_Inversion.html">Textual Inversion</a></li>
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="CustomDiffusion.html">Custom Diffusion</a></li>
+<li class="toctree-l1"><a class="reference internal" href="LoRA.html">LoRA</a></li>
+
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="I-DDPM.html">I-DDPM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="StyO.html">StyO</a></li>
+<li class="toctree-l1"><a class="reference internal" href="imagen.html">Imagen</a></li>
+<li class="toctree-l1"><a class="reference internal" href="imagen_editor.html">Imagen Editor</a></li>
+<li class="toctree-l1"><a class="reference internal" href="SDEdit.html">SDEdit</a></li>
+<li class="toctree-l1"><a class="reference internal" href="SDXL.html">SDXL</a></li>
+<li class="toctree-l1"><a class="reference internal" href="t2i_adapter.html">T2I-Adapter</a></li>
+<li class="toctree-l1"><a class="reference internal" href="IP_Adapter.html">IP-Adapter</a></li>
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="HyperDreamBooth.html">HyperDreamBooth</a></li>
+<li class="toctree-l1"><a class="reference internal" href="CM3leon.html">CM3leon</a></li>
+
+<li class="toctree-l1"><a class="reference internal" href="Synthetic_Data_from_Diffusion_Models_Improves_ImageNet_Classification.html">Synthetic Data from Diffusion Models Improves ImageNet Classification</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="GLIDE.html">GLIDE</a></li>
+<li class="toctree-l1"><a class="reference internal" href="BBDM.html">BBDM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Your_Diffusion_Model_is_Secretly_a_Zero_Shot_Classifier.html">Your Diffusion Model is Secretly a Zero-Shot Classifier</a></li>
+<li class="toctree-l1"><a class="reference internal" href="progressive_distillation.html">Progressive Distillation for Fast Sampling of Diffusion Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ConceptLab.html">ConceptLab</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Diffusion_models_already_have_a_Semantic_Latent_Space.html">Diffusion Models already have a Semantic Latent Space</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Muse.html">Muse</a></li>
+
+
+<li class="toctree-l1"><a class="reference internal" href="GIGAGAN.html">Scaling up GANs for Text-to-Image Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="consistency_models.html">Consistency Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="latent_consistency_models.html">Latent Consistency Models</a></li>
+
+<li class="toctree-l1"><a class="reference internal" href="LLM_grounded_Diffusion.html">LLM Grounded Diffusion</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DiT.html">DiT</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="one-step-image-translation.html">One-Step Image Translation with Text-to-Image Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="LCM-LoRA.html">LCM-LoRA: A Universal Stable-Diffusion Acceleration Module</a></li>
+
+
+<li class="toctree-l1"><a class="reference internal" href="MimicBrush.html">MimicBrush: Zero-shot Image Editing with Reference Imitation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="one_step_diffusion_with_distribution_matching_distillation.html">One-step Diffusion with Distribution Matching Distillation</a></li>
+
+
+
+
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Video Generation</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="Make_A_Video.html">Make A Video</a></li>
+<li class="toctree-l1"><a class="reference internal" href="VideoLDM.html">VideoLDM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="AnimateDiff.html">AnimateDiff</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Animate_Anyone.html">Animate Anyone</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreaMoving.html">DreaMoving</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreamPose.html">DreamPose: Fashion Image-to-Video Synthesis via Stable Diffusion</a></li>
+
+
+
+
+
+
+
+
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">3D Generation</span></p>
+<ul class="current nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="NeRF.html">NeRF : Representing Scenes as Neural Radiance Fields for View Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="3DGS.html">3D Gaussian Splatting for Real-Time Radiance Field Rendering</a></li>
+<li class="toctree-l1 current active"><a class="current reference internal" href="#">Point-E: A System for Generating 3D Point Clouds from Complex Prompts (Arxiv 2022)</a></li>
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Shap-E.html">Shap-E</a></li>
+
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DreamFusion.html"><strong>DreamFusion</strong></a></li>
+<li class="toctree-l1"><a class="reference internal" href="magic-3d.html">Magic3D</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreamBooth3D.html">Dream Booth 3D</a></li>
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="zero123plus.html">Zero123++</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ProlificDreamer.html">ProlificDreamer: High-Fidelity and Diverse Text-to-3D Generation with Variational Score Distillation</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DreamGaussian.html">DreamGaussian</a></li>
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Coin3D.html">Coin3D</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Experiments</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="../experiments/js_exp.html">Synthetic Data with Stable Diffusion for Foliar Disease Classification</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../experiments/swjo_exp.html">Training DreamBooth on Naver Webtoon Face Dataset</a></li>
+</ul>
+
+    </div>
+</nav></div>
+    </div>
+  
+  
+  <div class="sidebar-primary-items__end sidebar-primary__section">
+  </div>
+  
+  <div id="rtd-footer-container"></div>
+
+
+      </div>
+      
+      <main id="main-content" class="bd-main" role="main">
+        
+        
+
+<div class="sbt-scroll-pixel-helper"></div>
+
+          <div class="bd-content">
+            <div class="bd-article-container">
+              
+              <div class="bd-header-article d-print-none">
+<div class="header-article-items header-article__inner">
+  
+    <div class="header-article-items__start">
+      
+        <div class="header-article-item"><label class="sidebar-toggle primary-toggle btn btn-sm" for="__primary" title="Toggle primary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
+  <span class="fa-solid fa-bars"></span>
+</label></div>
+      
+    </div>
+  
+  
+    <div class="header-article-items__end">
+      
+        <div class="header-article-item">
+
+<div class="article-header-buttons">
+
+
+
+
+
+<div class="dropdown dropdown-source-buttons">
+  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Source repositories">
+    <i class="fab fa-github"></i>
+  </button>
+  <ul class="dropdown-menu">
+      
+      
+      
+      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion" target="_blank"
+   class="btn btn-sm btn-source-repository-button dropdown-item"
+   title="Source repository"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fab fa-github"></i>
+  </span>
+<span class="btn__text-container">Repository</span>
+</a>
+</li>
+      
+      
+      
+      
+      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion/issues/new?title=Issue%20on%20page%20%2Fdocs/review/Point_E.html&body=Your%20issue%20content%20here." target="_blank"
+   class="btn btn-sm btn-source-issues-button dropdown-item"
+   title="Open an issue"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-lightbulb"></i>
+  </span>
+<span class="btn__text-container">Open issue</span>
+</a>
+</li>
+      
+  </ul>
+</div>
+
+
+
+
+
+
+<div class="dropdown dropdown-download-buttons">
+  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Download this page">
+    <i class="fas fa-download"></i>
+  </button>
+  <ul class="dropdown-menu">
+      
+      
+      
+      <li><a href="../../_sources/docs/review/Point_E.md" target="_blank"
+   class="btn btn-sm btn-download-source-button dropdown-item"
+   title="Download source file"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-file"></i>
+  </span>
+<span class="btn__text-container">.md</span>
+</a>
+</li>
+      
+      
+      
+      
+      <li>
+<button onclick="window.print()"
+  class="btn btn-sm btn-download-pdf-button dropdown-item"
+  title="Print to PDF"
+  data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-file-pdf"></i>
+  </span>
+<span class="btn__text-container">.pdf</span>
+</button>
+</li>
+      
+  </ul>
+</div>
+
+
+
+
+<button onclick="toggleFullScreen()"
+  class="btn btn-sm btn-fullscreen-button"
+  title="Fullscreen mode"
+  data-bs-placement="bottom" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-expand"></i>
+  </span>
+
+</button>
+
+
+
+<script>
+document.write(`
+  <button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light"></i>
+    <i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark"></i>
+    <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"></i>
+  </button>
+`);
+</script>
+
+
+<script>
+document.write(`
+  <button class="btn btn-sm pst-navbar-icon search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <i class="fa-solid fa-magnifying-glass fa-lg"></i>
+  </button>
+`);
+</script>
+<label class="sidebar-toggle secondary-toggle btn btn-sm" for="__secondary"title="Toggle secondary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <span class="fa-solid fa-list"></span>
+</label>
+</div></div>
+      
+    </div>
+  
+</div>
+</div>
+              
+              
+
+<div id="jb-print-docs-body" class="onlyprint">
+    <h1>Point-E: A System for Generating 3D Point Clouds from Complex Prompts (Arxiv 2022)</h1>
+    <!-- Table of contents -->
+    <div id="print-main-content">
+        <div id="jb-print-toc">
+            
+            <div>
+                <h2> Contents </h2>
+            </div>
+            <nav aria-label="Page">
+                <ul class="visible nav section-nav flex-column">
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#">Point-E: A System for Generating 3D Point Clouds from Complex Prompts (Arxiv 2022)</a></li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#abstract">Abstract</a></li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">1. Introduction</a></li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#background">2. Background</a></li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#related-work">3. Related Work</a></li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#method">4. Method</a><ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#dataset">4-1. Dataset</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#view-synthesis-glide-model">4.2 View Synthesis GLIDE Model</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#point-cloud-diffusion">4.3  <strong>Point Cloud Diffusion</strong></a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#point-cloud-upsampler">4.4 <strong>Point Cloud Upsampler</strong></a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#producing-meshes">4.5 Producing Meshes</a></li>
+</ul>
+</li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#results">5. Results</a><ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#model-scaling-and-ablations">5.1 <strong>Model Scaling and Ablations</strong></a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#qualitative-results">5.2 Qualitative Results</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#comparison-to-other-methods"><strong>5.3 Comparison to Other Methods</strong></a></li>
+</ul>
+</li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#limitations-and-future-work"><strong>6. Limitations and Future Work</strong></a></li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#conclusion">7. Conclusion</a></li>
+</ul>
+
+            </nav>
+        </div>
+    </div>
+</div>
+
+              
+                
+<div id="searchbox"></div>
+                <article class="bd-article">
+                  
+  <div class="admonition-information admonition">
+<p class="admonition-title">Information</p>
+<ul class="simple">
+<li><p><strong>Title:</strong> Point-E: A System for Generating 3D Point Clouds from Complex Prompts (Arxiv 2022)</p></li>
+<li><p><strong>Reference</strong></p>
+<ul>
+<li><p>Paper:  <a class="reference external" href="https://arxiv.org/abs/2212.08751">https://arxiv.org/abs/2212.08751</a></p></li>
+<li><p>Project: <a class="reference external" href="https://openai.com/index/point-e/">https://openai.com/index/point-e/</a></p></li>
+</ul>
+</li>
+<li><p><strong>Author:</strong> <a class="reference external" href="https://www.linkedin.com/in/jeonghwa-yoo-8403a716b">Jeonghwa Yoo</a></p></li>
+<li><p><strong>Last updated on Sep. 11, 2024</strong></p></li>
+</ul>
+</div>
+<section class="tex2jax_ignore mathjax_ignore" id="point-e-a-system-for-generating-3d-point-clouds-from-complex-prompts-arxiv-2022">
+<h1>Point-E: A System for Generating 3D Point Clouds from Complex Prompts (Arxiv 2022)<a class="headerlink" href="#point-e-a-system-for-generating-3d-point-clouds-from-complex-prompts-arxiv-2022" title="Permalink to this heading">#</a></h1>
+<aside>
+💡 핵심 요약
+<ul class="simple">
+<li><p>관련 태스크: text-to-3D, point cloud generation</p></li>
+<li><p>본 논문의 접근 방식</p>
+<ul>
+<li><p>Text-to-image 모델과 image-to-3D 모델을 결합하여 두 방법의 장점을 합쳤다.</p>
+<ul>
+<li><p>Text-to-image: 대규모 데이터가 존재하여 복잡하고 다양한 텍스트 프롬프트에 대해 적용 가능하다.</p></li>
+<li><p>Image-to-3D: 소규모의 이미지 데이터와 3D 데이터 쌍에 대해 학습하여 3D 포인트 클라우드를 생성한다.</p></li>
+</ul>
+</li>
+<li><p>세 단계의 프로세스로 구성됨</p>
+<ul>
+<li><p>Text → 합성 뷰: GLIDE 모델</p></li>
+<li><p>합성 뷰 → 저해상도 포인트 클라우드: 트랜스포머 기반의 디퓨전 모델</p></li>
+<li><p>저해상도 포인트 클라우드 → 고해상도 포인트 클라우드: 트랜스포머 기반의 디퓨전 모델</p></li>
+</ul>
+</li>
+</ul>
+</li>
+<li><p>결과</p>
+<ul>
+<li><p>샘플 품질 측면에서 SOTA 성능이 아니지만, 샘플링 속도가 1~2배 더 빠르다.</p></li>
+<li><p>텍스트 프롬프트에 의해 조건화된 다양하고 복잡한 3D 형상을 효율적으로 생성할 수 있다.</p></li>
+</ul>
+</li>
+</ul>
+</aside>
+</section>
+<section class="tex2jax_ignore mathjax_ignore" id="abstract">
+<h1>Abstract<a class="headerlink" href="#abstract" title="Permalink to this heading">#</a></h1>
+<ul class="simple">
+<li><p>제안 배경</p>
+<ul>
+<li><p>최근 텍스트 조건부 3D 객체 생성 기술(text-conditional 3D object generation)이 놀라운 발전을 보이고 있다.</p></li>
+<li><p>하지만, SOTA 모델들은 여전히 하나의 샘플을 만들기 위해 여러 GPU 시간을 요구하고 있다.</p></li>
+<li><p>본 논문에서는 단일 GPU에서 1~2분만에 3D 모델을 생성하는 3D 객체 생성을 위한 방법을 탐색한다.</p></li>
+</ul>
+</li>
+<li><p>접근법</p>
+<ul>
+<li><p>텍스트-이미지 디퓨전 모델을 사용하여 단일 합성 뷰를 생성한 다음 두 번째 디퓨전 모델을 사용하여 3D 포인트 클라우드를 생성한다.</p></li>
+</ul>
+</li>
+<li><p>결과</p>
+<ul>
+<li><p>샘플 품질 측면에서 SOTA 성능이 아니지만, 샘플링 속도가 1~2배 더 빠르다.</p></li>
+</ul>
+</li>
+</ul>
+</section>
+<section class="tex2jax_ignore mathjax_ignore" id="introduction">
+<h1>1. Introduction<a class="headerlink" href="#introduction" title="Permalink to this heading">#</a></h1>
+<ul>
+<li><p>Text-to-image 생성 모델에서 text-to-vide/3D로의 발전</p>
+<ul class="simple">
+<li><p>최근 text-to-image 생성 모델이 폭발적으로 증가함에 따라 몇 초만에 자연어에서 고품질 이미지를 생성하고 수정할 수 있게 되었다.</p></li>
+<li><p>이러한 결과에 영감을 받아 최근 연구에서는 비디오나 3D 객체와 같은 다른 도메인에서의 텍스트 조건부 생성을 탐색하고 있다.</p></li>
+<li><p>본 논문도 text-to-3D 생성 문제에 중점을 둔다.</p></li>
+</ul>
+</li>
+<li><p>최근 text-to-3D 합성의 분류</p>
+<ul class="simple">
+<li><p>최근 text-to-3D 합성은 일반적으로 다음의 두 카테고리 중 하나에 속한다.</p>
+<ol class="arabic simple">
+<li><p>쌍을 이룬(paired)(ex: text, 3D) 데이터 또는 레이블이 없는(unlabeld) 3D 데이터에서 생성 모델을 직접(directly) 학습 시키는 방법</p>
+<ol class="arabic simple">
+<li><p>장점: 기존 생성 모델링 접근 방식을 활용하여 샘플을 효율적으로 생성할 수 있다.</p></li>
+<li><p>단점: 대규모 3D 데이터셋이 없기 때문에 다양하고 복잡한 텍스트 프롬프트로 확장하기 어렵다. → 데이터셋의 한계, 확장성의 어려움</p></li>
+</ol>
+</li>
+<li><p>사전 학습된 text-to-image 모델을 활용하여 미분가능한(differentiable) 3D 표현법들(representations)을 최적화하는 방법</p>
+<ol class="arabic simple">
+<li><p>장점: 복잡하고 다양한 텍스트 프롬프트를 처리할 수 있다.</p></li>
+<li><p>단점:</p>
+<ol class="arabic simple">
+<li><p>각 샘플에 대해 최적화 과정을 거쳐야 하기 때문에 계산 비용이 많이 들고 시간이 오래 걸릴 수 있다.  샘플을 생성하는 데 비용이 많이 드는 최적화 프로세스가 필요하다.</p></li>
+<li><p>강력한 3D prior가 없기 때문에 의미 있거나 일관된 3D 개체에 해당하지 않는 local minima에 빠질 수 있다.</p></li>
+</ol>
+</li>
+</ol>
+</li>
+</ol>
+</li>
+</ul>
+</li>
+<li><p>본 논문의 접근법</p>
+<figure class="align-default" id="id1">
+<img alt="Point_E_01" class="bg-primary mb-1" src="../../_images/013.png" />
+<figcaption>
+<p><span class="caption-number">Fig. 681 </span><span class="caption-text">Point-E 파이프라인 개요</span><a class="headerlink" href="#id1" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p>Text-to-image 모델과 image-to-3D 모델을 결합하여  두카테고리의 장점을 합치는 것을 목표로 한다.</p>
+<ul>
+<li><p>본 논문의 text-to-image 모델</p>
+<ul>
+<li><p>대규모(텍스트, 이미지)쌍 데이터를 활용하여 다양하고 복잡한 프롬프트를 따를 수 있게 한다.</p></li>
+<li><p>3D 렌더링에 대해 파인튜닝된 GLIDE 버전을 사용한다.</p></li>
+</ul>
+</li>
+<li><p>본 논문의 image-to-3D 모델</p>
+<ul>
+<li><p>소규모의(이미지,3D)쌍 데이터로 학습된다.</p></li>
+<li><p>RGB 포인트 클라우드를 생성하는 디퓨전 모델의 스택을 사용한다. (새로운 transformer 기반 아키텍처 사용)</p></li>
+<li><p>생성된 포인트 클라우드에서 메쉬를 생성하기 위해 회귀 기반(regression-based) 접근 방식을 사용한다.</p></li>
+</ul>
+</li>
+</ul>
+</li>
+<li><p>먼저 text-to-image 모델을 사용하여 이미지를 샘플링하고, 샘플링된 이미지를 조건으로 넣어 3D 객체를 샘플링한다.</p></li>
+<li><p>이 두 단계 모두 몇 초 내에 수행될 수 있으며, 비용이 많이 드는 최적화 과정을 필요로 하지 않는다.</p></li>
+</ul>
+</li>
+<li><p>본 논문의 결과</p>
+<ul class="simple">
+<li><p>간단한 텍스트 프롬프트뿐만 아니라 복잡한 텍스트 프롬프트와도 일치하는 컬러 3D 포인트 클라우드를 생성할 수 있었다.</p></li>
+</ul>
+<p>→ 포인트 클라우드를 효율적으로 생성한다는 의미에서 본 논문의 시스템을 Point E라고 명칭하였다.</p>
+</li>
+</ul>
+</section>
+<section class="tex2jax_ignore mathjax_ignore" id="background">
+<h1>2. Background<a class="headerlink" href="#background" title="Permalink to this heading">#</a></h1>
+<ul class="simple">
+<li><p>디퓨전 모델 개요:</p>
+<ul>
+<li><p>점진적으로 Gaussian 노이즈를 추가하는 과정을 통해 데이터를 변형한다.</p></li>
+<li><p>본 논문에서는 Ho et al. (2020)의 Gaussian 확산 설정을 따른다.</p></li>
+</ul>
+</li>
+<li><p>노이즈 프로세스</p>
+<ul>
+<li><p>노이즈 프로세스는 시간 단계 t마다 신호에 Gaussian 노이즈를 추가한다.</p></li>
+<li><p>최종 단계에서는 샘플이 거의 정보를 포함하지 않게 된다.</p></li>
+</ul>
+</li>
+<li><p>역 노이즈 프로세스</p>
+<ul>
+<li><p>랜던 가우시안 노이즈 <span class="math notranslate nohighlight">\(x_T\)</span>에서 시작하여 점진적으로 노이즈 프로세스를 역으로 진행하여 잡음이 없는 샘플 <span class="math notranslate nohighlight">\(x_0\)</span>에 도달할 수 있다.</p></li>
+</ul>
+</li>
+<li><p>모델 학습</p>
+<ul>
+<li><p>q(xt−1|xt)를 신경망 pθ(xt−1|xt)로 근사하여 학습한다.</p></li>
+<li><p>Nichol &amp; Dhariwal (2021)은 평균뿐만 아니라 분산도 예측하여 더 나은 성능을 얻었다.</p></li>
+</ul>
+</li>
+<li><p>샘플링</p>
+<ul>
+<li><p>디퓨전 샘플링은 미분 방정식 관점에서 설명될 수 있으며, 이를 통해 다양한 SDE 및 ODE 해석기를 사용하여 이러한 모델에서 샘플링할 수 있다.</p></li>
+<li><p>본 논문에서는 Karras et al. (2022)의 2차 ODE 해석기를 사용한다.</p></li>
+</ul>
+</li>
+<li><p>가이드 전략</p>
+<ul>
+<li><p>Dhariwal &amp; Nichol (2021)은 분류기 가이던스(classifier guidance)를 도입하여 생성 충실도를 높였다.</p></li>
+<li><p>Ho &amp; Salimans (2021)은 분류기 없는 가이던스(classifier-free guidance)를 도입하여 조건부 정보를 무작위로 삭제한다.</p></li>
+<li><p>본 논문에서는 학습 시 드롭 확률 0.1을 사용하여 이 기술을 적용한다.</p></li>
+</ul>
+</li>
+</ul>
+</section>
+<section class="tex2jax_ignore mathjax_ignore" id="related-work">
+<h1>3. Related Work<a class="headerlink" href="#related-work" title="Permalink to this heading">#</a></h1>
+<ul class="simple">
+<li><p>포인트 클라우드 생성 모델:</p>
+<ul>
+<li><p>다양한 연구들이 GAN, VAE, 플로우 모델, 디퓨전 모델 등을 사용하여 포인트 클라우드를 생성한다.</p></li>
+<li><p>본 논문의 연구와 가장 유사한 PVD 모델은 단일 디퓨전 모델을 사용하여 포인트 클라우드 직접 생성한다.</p></li>
+<li><p>본 논문에서는 더 간단한 트랜스포머 기반 아키텍처를 사용하며, RGB 채널을 함께 생성하는 점에서 차별점을 가진다.</p></li>
+</ul>
+</li>
+<li><p>다른 3D 표현을 사용한 모델 생성:</p>
+<ul>
+<li><p>2D 이미지 데이터셋에서 3D 인식 GAN을 학습하거나, 직접 3D 메쉬를 생성하는 연구들이  있다.</p></li>
+<li><p>이러한 연구들은 주로 새로운 뷰 합성 문제에 초점을 맞추지만, 전체 360도 뷰를 재구성하려고 하지는 않는다.</p></li>
+</ul>
+</li>
+<li><p>텍스트 조건부 3D 생성:</p>
+<ul>
+<li><p>몇몇 연구들은 텍스트-이미지 매칭 목표에 따라 3D 표현을 최적화하는 접근 방식을 탐구한다.</p></li>
+<li><p>이러한 접근 방식들은 다양한 복잡한 객체나 장면을 생성할 수 있지만, 최적화 절차가 매우 시간이 많이 걸린다.</p></li>
+</ul>
+</li>
+<li><p>텍스트-3D 데이터 활용:</p>
+<ul>
+<li><p>텍스트-3D 쌍 데이터를 사용하여 텍스트 조건부 3D 모델을 생성하는 연구들도 있다.</p></li>
+<li><p>많은 연구들이 단순한 프롬프트나 좁은 객체 카테고리에 한정되지만, 본 논문에서는 사전 학습된 텍스트-이미지 모델을 활용하여 이러한 문제를 해결한다.</p></li>
+</ul>
+</li>
+<li><p>이미지 기반 3D 재구성:</p>
+<ul>
+<li><p>단일 또는 소수의 이미지에서 3D 모델을 재구성하는 회귀 기반 및 생성 접근 방식이 있.</p></li>
+<li><p>이들 접근 방식은 불충분한 문제를 다루면서도 유망한 결과를 보여주고 있다.</p></li>
+</ul>
+</li>
+</ul>
+</section>
+<section class="tex2jax_ignore mathjax_ignore" id="method">
+<h1>4. Method<a class="headerlink" href="#method" title="Permalink to this heading">#</a></h1>
+<ul class="simple">
+<li><p>텍스트를 조건으로 받아 단일 생성 모델로 포인트 클라우드를 직접 생성하는 대신 생성 프로세스를 세 단계로 나눈다.</p>
+<ol class="arabic simple">
+<li><p>‘텍스트 캡션’을 조건으로 받아 ‘합성 뷰’를 생성한다.  → <em>4.2 내용</em></p>
+<ol class="arabic simple">
+<li><p>GLIDE 모델 사용</p></li>
+<li><p>렌더링된 3D 모델로 파인튜닝</p></li>
+</ol>
+</li>
+<li><p>‘합성 뷰’를 조건으로 받아 ‘대략적인(coarse) 포인트 클라우드(1024개의 포인트)’를 생성한다 → <em>4.3 내용</em></p>
+<ol class="arabic simple">
+<li><p>조건부 순열 불변 디퓨전 모델(conditional, permutation invariant diffusion model) 사용</p></li>
+</ol>
+</li>
+<li><p>‘저해상도 포인트 클라우드와 합성 뷰’를 조건으로 받아 ‘고해상도 포인트 클라우드(4096 포인트)’를 생성한다. → <em>4.4 내용</em></p>
+<ol class="arabic simple">
+<li><p>2에서 사용된 모델과 유사하지만 저해상도 포인트 클라우드를 조건으로 하는 더 작은 디퓨전 모델을 사용</p></li>
+</ol>
+</li>
+</ol>
+</li>
+<li><p>수백만 개의 3D 모델과 관련 메타데이터로 구성된 데이터셋에서 모델을 훈련시킨다.</p></li>
+<li><p>데이터셋을 렌더링된 뷰, 텍스트 설명, 그리고 각 점에 대한 RGB 색상을 포함하는 3D 포인트 클라우드로 처리한다.</p></li>
+</ul>
+<section id="dataset">
+<h2>4-1. Dataset<a class="headerlink" href="#dataset" title="Permalink to this heading">#</a></h2>
+<ul class="simple">
+<li><p>수백만 개의 3D 모델은 데이터 형식과 품질이 데이터셋 전반에 걸쳐 매우 다양했고, 더 높은 데이터 품질을 보장하기 위해 다양한 후처리 단계가 필요했다.</p></li>
+<li><p>후처리 단계</p>
+<ol class="arabic simple">
+<li><p>Blender를 사용하여 모든 데이터를 하나의 일반적인 형식(RGBAD 이미지)으로 변환한다.</p>
+<ol class="arabic simple">
+<li><p>Blender: 다양한 3D 형식을 지원하며 최적화된 렌더링 엔진을 제공하는 프로그램</p></li>
+<li><p>RGBAD 이미지: RGB 이미지에 깊이(Depth)와 알파(Alpha) 채널이 추가된 형식의 이미지</p></li>
+<li><p>20개의 랜덤한 카메라 각도에서 각 3D 모델을 경계 상자(bounding cube)로 정규화하고 표준 조명 설정을 구성한 후, blender의 내장된 실시간 렌더링 엔진을 사용하여 RGBAD 이미지를 내보냈다.</p></li>
+</ol>
+</li>
+<li><p>각 객체를 렌더링을 사용해 색상이 있는 포인트 클라우드로 변환한다.</p>
+<ol class="arabic simple">
+<li><p>각 RGBAD 이미지의 각 픽셀에 대한 점을 계산하여 각 객체에 대한 밀집된(dense) 포인트 클라우드를 구성한다.</p></li>
+<li><p>이러한 포인트 클라우드는 일반적으로 고르게 분포되어 있지 않으므로, 가장 먼 점 샘플링을 사용하여 4K 점의 균일한 클라우드를 생성한다.</p></li>
+<li><p>렌더링에서 직접 포인트 클라우드를 구성함으로써, 3D 메쉬에서 직접 점을 샘플링할 때 발생할 수 있는 여러 가지 문제를 피할 수 있었다. (모델 내부에 포함된 점을 샘플링하는 문제, 이상한 파일 형식의 3D 모델로 인한 문제)</p></li>
+</ol>
+</li>
+<li><p>저품질 모델을 제거하기 위해 다양한 휴리스틱을 사용한다.</p>
+<ol class="arabic simple">
+<li><p>각 포인트 클라우드의 SVD를 계산하고, 가장 작은 특이값이 일정 임계값(threshold) 이상인 경우에만 유지함으로써 평평한 객체를 제거했다.</p></li>
+<li><p>다음으로, CLIP 특성에 따라 데이터셋을 클러스터링 했다. (일부 클러스터는 많은 저품질 모델 카테고리를 포함하는 반면, 다른 클러스터는 더 다양하거나 해석 가능한 것으로 나타났음)</p></li>
+<li><p>클러스터를 여러 가지 품질의 버킷으로 나누고, 최종 데이터셋으로서 결과 버킷의 가중치 혼합을 사용했다.</p></li>
+</ol>
+</li>
+</ol>
+</li>
+</ul>
+</section>
+<section id="view-synthesis-glide-model">
+<h2>4.2 View Synthesis GLIDE Model<a class="headerlink" href="#view-synthesis-glide-model" title="Permalink to this heading">#</a></h2>
+<ul class="simple">
+<li><p>‘텍스트 캡션’을 조건으로 받아 ‘합성 뷰’를 생성하는 모델</p></li>
+<li><p>4.3에서 설명할 포인트 클라우드 모델은 모두 동일한 렌더러와 동일한 조명 설정을 사용하여 생성된 데이터셋의 렌더링된 뷰를 조건으로 받는다.</p></li>
+<li><p>따라서 해당 파트에서는 데이터셋의 분포와 일치하는 3D 렌더를 명시적으로 생성하고자 하였다.</p></li>
+<li><p>이를 위해 GLIDE를 원래의 데이터셋과 저자들의 3D 렌더링 데이터셋을 혼합하여 파인튜닝 하였다.</p>
+<ul>
+<li><p>저자들의 3D 렌더링 데이터셋이 원래 GLIDE 학습셋에 비해 작기 때문에 3D 렌더링 데이터셋에서 이미지를 샘플링하는 비율을 5%로만 설정하고, 나머지 95%는 원래의 데이터셋을 사용했다.</p></li>
+<li><p>반복(iterations) 횟수는 100,000번의 설정 하였으며, 이는 모델이 3D 데이터셋을 여러 번 거치는 학습을 진행했음을 의미한다. (단, 동일한 렌더링된 시점을 두 번 사용하지 않았다.)</p></li>
+</ul>
+</li>
+<li><p>테스트 시간에는 항상 분포 내 렌더를 샘플링하기 위해, 모든 3D 렌더의 텍스트 프롬프트에 특별한 토큰을 추가하여 이 토큰을 사용하여 샘플링을 수행하였다.</p></li>
+</ul>
+</section>
+<section id="point-cloud-diffusion">
+<h2>4.3  <strong>Point Cloud Diffusion</strong><a class="headerlink" href="#point-cloud-diffusion" title="Permalink to this heading">#</a></h2>
+<ul>
+<li><p>‘합성 뷰’를 조건으로 받아 ‘대략적인(coarse) 포인트 클라우드(1024개의 포인트)’를 생성하는 모델</p></li>
+<li><p>디퓨전을 이용해 포인트 클라우드를 사용하기 위해 <a class="reference external" href="https://arxiv.org/abs/2104.03670">3D Shape Generation and Completion through Point-Voxel Diffusion</a>에서 사용한 프레임워크를 확장하여 포인트 클라우드의 각 포인트에 RGB 색상을 포함시켰다.</p></li>
+<li><p>포인트 클라우드를 K x 6 형태의 텐서로 나타내며 (K: 포인트 수), 내부 차원은 (x,y,z) 좌표와 (R,G,B) 색상을 포함한다.</p></li>
+<li><p>모든 좌표와 색상은 [-1, 1] 범위로 정규화 된다.</p></li>
+<li><p>K x 6 형태의 랜덤한 노이즈에서 시작하여 이를 점진적으로 디노이징하여 텐서를 직접 생성한다.</p></li>
+<li><p>기존 3D 전용 구조를 활용하던 이전 방법들과 달리, 본 논문에서는 트랜스포머 기반 모델을 사용한다. 모델은
+이미지, 타임 스텝 t, 노이즈가 있는 포인트 클라우드 <span class="math notranslate nohighlight">\(x_t\)</span>를 조건으로 받아 <span class="math notranslate nohighlight">\(\epsilon\)</span>과 <span class="math notranslate nohighlight">\(\sum\)</span>을 예측한다.</p></li>
+<li><p>모델 구조</p>
+<figure class="align-default" id="id2">
+<img alt="Point_E_02" class="bg-primary mb-1" src="../../_images/022.png" />
+<figcaption>
+<p><span class="caption-number">Fig. 682 </span><span class="caption-text">Point-E 모델 구조</span><a class="headerlink" href="#id2" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p>포인트 클라우드의 각 포인트를 출력 차원이 D인 선형 레이어(linear layer)에 넣어 K×D 입력 텐서를 얻고 모델에 입력 컨텍스트로 사용한다. 또한 작은 MLP에 타임스텝 t를 넣어 컨텍스트 앞에 추가할 다른 D차원 벡터를 얻는다.</p></li>
+<li><p>이미지를 조건으로 입력 받기 위해, 사전 학습된 ViT-L/14 CLIP 모델에 이미지를 입력하고 이 CLIP 모델의 마지막 레이어의 임베딩을 가져온다. (shape: 256xD’), 이를 선형 투사(lienarly project)하여 256xD shape의 또 다른 텐서를 얻고 이를 트랜스포머 컨텍스트 앞에 추가한다. → 이 방법이 단일 CLIP 이미지 또는 텍스트 임베딩을 사용하는 것보다 우수했다.</p></li>
+<li><p>최종 입력 컨텍스트는 (K+257) x D의 shape가 된다. 길이 K의 최종 출력 시퀀스를 얻기 위해 최종 토큰 K개를 가져오고 이를 프로젝션하여 입력 포인트 K개에 대한 ε와 Σ 예측을 얻는다.</p></li>
+</ul>
+</li>
+</ul>
+<aside>
+💡 정리
+<ul>
+<li><p>입력 컨텍스트 구성:</p>
+<ul class="simple">
+<li><p>포인트 클라우드의 각 점: K×D</p></li>
+<li><p>CLIP 이미지 임베딩: 256×D</p></li>
+<li><p>타임스텝 임베딩: 1×D</p></li>
+</ul>
+<p>→ 최종 입력 컨텍스트: (K+257)×D</p>
+</li>
+<li><p>트랜스포머 모델의 출력: (K+257)개의 토큰 (각 토큰의 차원은 D)</p></li>
+<li><p>최종 출력 시퀀스 선택: 최종 K개의 토큰을 가져온다.</p></li>
+<li><p>ε와 Σ 예측**:** 최종 K개의 토큰을 ε와 Σ 예측을 위한 입력 포인트로 사용한다.</p></li>
+<li><p>예측된 ε와 Σ을 통해 노이즈를 제거하여 포인트 클라우드를 복원</p></li>
+</ul>
+</aside>
+<ul class="simple">
+<li><p>이 모델에서는 positional encoding을 사용하지 않는다. 따라서 모델 자체는  입력 포인트 클라우드에 대해 순열 분별(permutation-invariant)하다.</p></li>
+</ul>
+</section>
+<section id="point-cloud-upsampler">
+<h2>4.4 <strong>Point Cloud Upsampler</strong><a class="headerlink" href="#point-cloud-upsampler" title="Permalink to this heading">#</a></h2>
+<ul>
+<li><p>이미지 디퓨전 모델에서의 계층 구조</p>
+<ul class="simple">
+<li><p>이미지 디퓨전 모델의 경우 가장 좋은 품질은 일반적으로 계층 구조를 사용하는 방식으로 달성된다.</p></li>
+<li><p>이 방식에서는 저해상도의 기본 모델이 출력을 생성한 후, 이를 다른 모델이 업샘플한다.</p></li>
+</ul>
+<p>→ 포인트 클라우드 생성에 이 접근 방식을 사용</p>
+</li>
+<li><p>포인트 클라우드 생성에서의 계층 구조</p>
+<ul class="simple">
+<li><p>큰 베이스 모델로 1K 포인트를 생성한 후, 작은 업샘플링 모델을 사용하여 4K 포인트로 업샘플링 한다.</p></li>
+<li><p>모델 크기가 같을 때, 4K 포인트를 생성하는 데에는 1K 포인트를 생성할 때보다 네 배 더 많은 연산을 필요로 한다.</p></li>
+</ul>
+</li>
+<li><p>업샘플러</p>
+<ul class="simple">
+<li><p>업샘플러는 베이스 모델과 동일한 아키텍처를 사용한다.</p></li>
+<li><p>모델은 저해상도 포인트 클라우드 모델과 동일한 아키텍처를 사용한다.</p></li>
+<li><p>저해상도 포인트 클라우드를 입력 받기 위한 추가 컨디셔닝 토큰이 있다.</p></li>
+<li><p>1K 포인트를 조건으로 입력 받아 추가로 3K 포인트를 생성하여 저해상도 포인트 클라우드에 추가한다.</p></li>
+<li><p><span class="math notranslate nohighlight">\(x_t\)</span>에 사용된 레이어가 아닌 별도의 선형 임베딩 레이어를 통해 저해상도 포인트를 전달하여, 모델이 positional encoding을 사용할 필요 없이 조건부 정보와 새로운 포인트를 구별할 수 있도록 한다.</p></li>
+</ul>
+</li>
+</ul>
+</section>
+<section id="producing-meshes">
+<h2>4.5 Producing Meshes<a class="headerlink" href="#producing-meshes" title="Permalink to this heading">#</a></h2>
+<ul class="simple">
+<li><p>렌더링 기반 평가를 위해 생성된 포인트 클라우드를 직접 렌더링하지 않는다.</p></li>
+<li><p>대신, 포인트 클라우드를 텍스처가 입혀진 메쉬로 변환하고 Blender를 사용해 이러한 메쉬를 렌더링한다.</p></li>
+<li><p>포인트 클라우드에서 메쉬를 생성하는 것은 때때로 어렵고, 본 논문의 모델이 생성한 포인트 클라우드는 종종 균열, 이상치 또는 기타 유형의 노이즈를 가지고 있어 더욱 어렵다.</p></li>
+<li><p>포인트 클라우드에서 메쉬를 생성하기 위해 사전 학습된 SAP모델을 사용해봣으나 포인트 클라우드에 존재했던 큰 부분이나 중요한 세부 사항을 잃어버리는 경우가 있었다.</p></li>
+<li><p>따라서 본 논문에서는 회귀(regression) 기반 모델을 사용하여 signed distance field를 예측하고, 이를 merching cube 알고리즘을 적용하여 메쉬를 추출했다.</p></li>
+<li><p>그런 다음 원래 포인트 클라우드에서 가장 가까운 점의 색을 사용하여 메쉬의 각 버텍스에 색을 할당했다.</p></li>
+</ul>
+<aside>
+💡
+<ul class="simple">
+<li><p>렌더링 과정 요약</p>
+<ol class="arabic simple">
+<li><p>포인트 클라우드에서 SDF 예측: 회귀 기반 모델을 사용하여 포인트 클라우드로부터 객체의 SDF를 예측한다.</p></li>
+<li><p>메쉬 생성: 예측된 SDF를 기반으로 merching cube 알고리즘을 적용하여 메쉬를 생성합니다.</p></li>
+<li><p>색상 할당: 생성된 메쉬의 각 버텍스에 원래 포인트 클라우드의 색상을 할당하여 텍스처가 입혀진 메쉬를 만든다.</p></li>
+<li><p>Blender를 통한 렌더링: 최종적으로 텍스처가 입혀진 메쉬를 Blender를 사용하여 렌더링한다.</p></li>
+</ol>
+</li>
+</ul>
+</aside>
+</section>
+</section>
+<section class="tex2jax_ignore mathjax_ignore" id="results">
+<h1>5. Results<a class="headerlink" href="#results" title="Permalink to this heading">#</a></h1>
+<ul class="simple">
+<li><p>평가 지표: CLIP R-Precision, P-IS, P-FID</p>
+<ul>
+<li><p>CLIP R-Precision</p>
+<ul>
+<li><p>특정 객체를 기준으로 하여 모델이 텍스트 설명과 얼마나 잘 일치하는지를 평가하는 지표</p></li>
+<li><p>계산하는 과정</p>
+<ul>
+<li><p>생성된 이미지와 텍스트 프롬프트를 기반으로 CLIP 모델을 사용하여 각 이미지의 텍스트 임베딩을 계산한다.</p></li>
+<li><p>CLIP 모델에서 계산된 텍스트 임베딩과 이미지 임베딩 간의 유사도를 계산한다.</p></li>
+<li><p>유사도가 가장 높은 상위 R개의 이미지 중 실제로 맞는 이미지의 비율을 계산한다.</p></li>
+</ul>
+</li>
+</ul>
+</li>
+<li><p>P-IS, P-FID</p>
+<ul>
+<li><p>포인트 클라우드의 Inception Score와 FID를 평가하기 위해 본 논문에서 도입한 지표</p></li>
+<li><p>수정된 PointNet++ 모델을 사용하여 포인트 클라우드에서 특징을 추출하고 클래스 확률을 에측</p></li>
+</ul>
+</li>
+</ul>
+</li>
+</ul>
+<section id="model-scaling-and-ablations">
+<h2>5.1 <strong>Model Scaling and Ablations</strong><a class="headerlink" href="#model-scaling-and-ablations" title="Permalink to this heading">#</a></h2>
+<p>저자들은 다음과 같은 베이스 모델에 대하여 학습 중에 생성한 샘플들로 평가하였다.</p>
+<ul class="simple">
+<li><p>40M (uncond.): 어떠한 조건 정보도 없는 작은 모델</p></li>
+<li><p>40M (text vec.): 텍스트 캡션에만 의존하는 작은 모델 (이미지 사용 x), 파인튜닝된 GLIDE 모델 활용 x</p></li>
+<li><p>40M (image vec.): 렌더링된 이미지의 CLIP 이미지 임베딩에 의존하는 작은 모델, 단일 CLIP 임베딩 사용</p></li>
+<li><p>40M: CLIP 잠재 그리드(latent grid)를 통한 전체 이미지 조건을 사용하는 작은 모델</p></li>
+<li><p>300M: CLIP 잠재 그리드를 통한 전체 이미지 조건을 사용하는 중간 모델</p></li>
+<li><p>1B: CLIP 잠재 그리드를 통한 전체 이미지 조건을 사용하는 큰 모델</p></li>
+</ul>
+<p>평가 결과는 아래 그래프와 같다.</p>
+<figure class="align-default" id="id3">
+<img alt="Point_E_03" class="bg-primary mb-1" src="../../_images/032.png" />
+<figcaption>
+<p><span class="caption-number">Fig. 683 </span><span class="caption-text">평가 결과</span><a class="headerlink" href="#id3" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p>결과</p>
+<ul>
+<li><p>텍스트 조건만 사용하고 텍스트에서 이미지로의 단계가 없는 경우 CLIP R-Precision이 매우 나쁘게 나오는 것을 발견</p></li>
+<li><p>이미지를 조건으로 사용할 때 단일 CLIP 임베딩보다 임베딩 그리드를 사용하는 것이 성능이 더 나은 것을 발견 →  조건 이미지에 대해 더 많은 (공간적인) 정보를 보는 것이 포인트 클라우드 모델에 이점이 있음을 시사</p></li>
+<li><p>모델의 스케일을 증가시키면 P-FID 수렴 속도가 향상되고 최종 CLIP R-Precision이 증가하는 것을 발견</p></li>
+</ul>
+</li>
+</ul>
+</section>
+<section id="qualitative-results">
+<h2>5.2 Qualitative Results<a class="headerlink" href="#qualitative-results" title="Permalink to this heading">#</a></h2>
+<ul>
+<li><p>포인트 클라우드 생성 결과</p>
+<figure class="align-default" id="id4">
+<img alt="Point_E_04" class="bg-primary mb-1" src="../../_images/042.png" />
+<figcaption>
+<p><span class="caption-number">Fig. 684 </span><span class="caption-text">포인트클라우드 생성 결과</span><a class="headerlink" href="#id4" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</li>
+<li><p>Point·E 모델이 복잡한 프롬프트에 대해 종종 일관된 고품질의 3D 형상을 생성할 수 있다는 것을 발견했다.</p></li>
+<li><p>때때로 포인트 클라우드 디퓨전 모델은 조건화된 이미지를 이해하지 못하거나 예측할 수 없는 경우가 있다.  이는 주로 두 가지 문제 중 하나로 인해 발생한다.</p>
+<figure class="align-default" id="id5">
+<img alt="Point_E_05" class="bg-primary mb-1" src="../../_images/052.png" />
+<figcaption>
+<p><span class="caption-number">Fig. 685 </span><span class="caption-text">잘못 추론한 예시</span><a class="headerlink" href="#id5" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ol class="arabic simple">
+<li><p>모델이 이미지에 나타난 객체의 모양을 잘못 해석하는 경우</p></li>
+<li><p>모델이 이미지에서 가려진 형상의 일부를 잘못 추론하는 경우</p></li>
+</ol>
+</li>
+</ul>
+</section>
+<section id="comparison-to-other-methods">
+<h2><strong>5.3 Comparison to Other Methods</strong><a class="headerlink" href="#comparison-to-other-methods" title="Permalink to this heading">#</a></h2>
+<ul>
+<li><p>CLIP-R-Precision 지표를 이용하여 Point·E를 다른 3D 생성 기술과 비교했다.</p>
+<figure class="align-default" id="id6">
+<img alt="Point_E_06" class="bg-primary mb-1" src="../../_images/062.png" />
+<figcaption>
+<p><span class="caption-number">Fig. 686 </span><span class="caption-text">CLIP-R-Precision 성능</span><a class="headerlink" href="#id6" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</li>
+<li><p>Point·E는 state-of-the-art 기술(DreamFusion)보다 성능이 좋지 않지만, 이 불일치의 일부를 설명할 수 있는 이 평가의 두 가지 미묘한 점에 주목해야 한다.</p>
+<ol class="arabic simple">
+<li><p>DreamFusion과 같은 멀티뷰 최적화 기반 방법과 달리 Point·E는 텍스트 프롬프트와 일치하도록 모든뷰를 명시적으로 최적화하지 않는다. 특정 객체가 모든 각도에서 쉽게 식별되지 않을 수 있기 때문에 CLIP R-Precision이 낮아질 수 있다.</p></li>
+<li><p>본 논문의 방법은 렌더링 전에 포인트 클라우드를 전처리해야 하는데, 포인트 클라우드를 메쉬로 변환하는 것은 어려운 문제다. 본 논문이 사용하는 접근 방식은 때때로 포인트 클라우드 자체에 있는 정보를 잃을 수 있다.</p></li>
+</ol>
+</li>
+<li><p>Point·E는 최신 테크닉보다 이 평가에서 성능이 좋지 않지만 짧은 시간 내에 샘플을 생성한다.</p></li>
+<li><p>이를 통해 보다 실용적으로 응용 프로그램을 만들거나 많은 개체를 샘플링하고 최상의 개체를 휴리스틱을 따라 선택하여 고품질 3D 개체를 찾을 수 있다.</p></li>
+</ul>
+</section>
+</section>
+<section class="tex2jax_ignore mathjax_ignore" id="limitations-and-future-work">
+<h1><strong>6. Limitations and Future Work</strong><a class="headerlink" href="#limitations-and-future-work" title="Permalink to this heading">#</a></h1>
+<ul>
+<li><p>합성 렌더링을 필요로 한다. → 향후 실제 세계 이미지를 조건으로 하는 3D 생성기를 훈련시켜 해결할 수 있을 것</p></li>
+<li><p>색상이 있는 3D 형태를 생성하지만, 이 과정은 비교적 낮은 해상도의 3D 형식(포인트 클라우드)로 이루어진다. 형상이나 질감의 세부 사항을 캡처하지 못한다. → 메쉬나 NeRF와 같은 고해상도 3D 표현을 생성하도록 확장하면 해결할 수 있을 것</p></li>
+<li><p>최적화 기반 기술(optimization-based techniques)을 초기화하여 초기 수렴 속도를 높이는 데 사용할 수 있다.</p></li>
+<li><p>이 모델이 DALL·E 2 시스템과 많은 제한 사항을 공유할 것으로 예상한다. (데이터셋에서 야기된 많은 편향을 포함할 수 있다)</p></li>
+<li><p>모델이 생성한 3D 모델이 실제로 물리적으로 제작될 때, 그 제품이 위험할 수 있는 물체의 청사진을 생성할 수 있다.</p>
+<figure class="align-default" id="id7">
+<img alt="Point_E_07" class="bg-primary mb-1" src="../../_images/072.png" />
+<figcaption>
+<p><span class="caption-number">Fig. 687 </span><span class="caption-text">Figure 6</span><a class="headerlink" href="#id7" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</li>
+</ul>
+</section>
+<section class="tex2jax_ignore mathjax_ignore" id="conclusion">
+<h1>7. Conclusion<a class="headerlink" href="#conclusion" title="Permalink to this heading">#</a></h1>
+<ul class="simple">
+<li><p>Point·E는 합성된 뷰를 생성하고 이를 기반으로 조건화된 색상 포인트 클라우드를 생성하는 텍스트 조건 합성 시스템이다.</p></li>
+<li><p>Point·E가 텍스트 프롬프트에 의해 조건화된 다양하고 복잡한 3D 형상을 효율적으로 생성할 수 있는 능력을 갖추고 있다는 것을 발견했다.</p></li>
+<li><p>본 논문의 방식이 텍스트에서 3D로의 합성 분야에서의 추가적인 연구의 시작점으로 기여할 수 있기를 희망한다.</p></li>
+</ul>
+</section>
+
+    <script type="text/x-thebe-config">
+    {
+        requestKernel: true,
+        binderOptions: {
+            repo: "binder-examples/jupyter-stacks-datascience",
+            ref: "master",
+        },
+        codeMirrorConfig: {
+            theme: "abcdef",
+            mode: "python"
+        },
+        kernelOptions: {
+            name: "python3",
+            path: "./docs\review"
+        },
+        predefinedOutput: true
+    }
+    </script>
+    <script>kernelName = 'python3'</script>
+
+                </article>
+              
+
+              
+              
+              
+              
+                <footer class="prev-next-footer d-print-none">
+                  
+<div class="prev-next-area">
+    <a class="left-prev"
+       href="3DGS.html"
+       title="previous page">
+      <i class="fa-solid fa-angle-left"></i>
+      <div class="prev-next-info">
+        <p class="prev-next-subtitle">previous</p>
+        <p class="prev-next-title">3D Gaussian Splatting for Real-Time Radiance Field Rendering</p>
+      </div>
+    </a>
+    <a class="right-next"
+       href="Shap-E.html"
+       title="next page">
+      <div class="prev-next-info">
+        <p class="prev-next-subtitle">next</p>
+        <p class="prev-next-title">Shap-E</p>
+      </div>
+      <i class="fa-solid fa-angle-right"></i>
+    </a>
+</div>
+                </footer>
+              
+            </div>
+            
+            
+              
+                <div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
+
+
+  <div class="sidebar-secondary-item">
+  <div class="page-toc tocsection onthispage">
+    <i class="fa-solid fa-list"></i> Contents
+  </div>
+  <nav class="bd-toc-nav page-toc">
+    <ul class="visible nav section-nav flex-column">
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#">Point-E: A System for Generating 3D Point Clouds from Complex Prompts (Arxiv 2022)</a></li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#abstract">Abstract</a></li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">1. Introduction</a></li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#background">2. Background</a></li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#related-work">3. Related Work</a></li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#method">4. Method</a><ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#dataset">4-1. Dataset</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#view-synthesis-glide-model">4.2 View Synthesis GLIDE Model</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#point-cloud-diffusion">4.3  <strong>Point Cloud Diffusion</strong></a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#point-cloud-upsampler">4.4 <strong>Point Cloud Upsampler</strong></a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#producing-meshes">4.5 Producing Meshes</a></li>
+</ul>
+</li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#results">5. Results</a><ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#model-scaling-and-ablations">5.1 <strong>Model Scaling and Ablations</strong></a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#qualitative-results">5.2 Qualitative Results</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#comparison-to-other-methods"><strong>5.3 Comparison to Other Methods</strong></a></li>
+</ul>
+</li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#limitations-and-future-work"><strong>6. Limitations and Future Work</strong></a></li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#conclusion">7. Conclusion</a></li>
+</ul>
+
+  </nav></div>
+
+</div></div>
+              
+            
+          </div>
+          <footer class="bd-footer-content">
+            
+<div class="bd-footer-content__inner container">
+  
+  <div class="footer-item">
+    
+<p class="component-author">
+By PseudoLab
+</p>
+
+  </div>
+  
+  <div class="footer-item">
+    
+
+  <p class="copyright">
+    
+      © Copyright 2022.
+      <br/>
+    
+  </p>
+
+  </div>
+  
+  <div class="footer-item">
+    
+  </div>
+  
+  <div class="footer-item">
+    
+  </div>
+  
+</div>
+          </footer>
+        
+
+      </main>
+    </div>
+  </div>
+  
+  <!-- Scripts loaded after <body> so the DOM is not blocked -->
+  <script src="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b"></script>
+<script src="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b"></script>
+
+  <footer class="bd-footer">
+  </footer>
+  </body>
 </html>
\ No newline at end of file
diff --git a/docs/review/ProlificDreamer.html b/docs/review/ProlificDreamer.html
new file mode 100755
index 00000000..dda07988
--- /dev/null
+++ b/docs/review/ProlificDreamer.html
@@ -0,0 +1,1105 @@
+
+
+<!DOCTYPE html>
+
+
+<html lang="en" data-content_root="" >
+
+  <head>
+    <meta charset="utf-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.17.1: http://docutils.sourceforge.net/" />
+
+    <title>ProlificDreamer: High-Fidelity and Diverse Text-to-3D Generation with Variational Score Distillation &#8212; Text-to-Image Generation-feat-Diffusion</title>
+  
+  
+  
+  <script data-cfasync="false">
+    document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
+    document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
+  </script>
+  
+  <!-- Loaded before other Sphinx assets -->
+  <link href="../../_static/styles/theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+<link href="../../_static/styles/bootstrap.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+<link href="../../_static/styles/pydata-sphinx-theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+
+  
+  <link href="../../_static/vendor/fontawesome/6.5.2/css/all.min.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+  <link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.woff2" />
+<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.woff2" />
+<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.woff2" />
+
+    <link rel="stylesheet" type="text/css" href="../../_static/pygments.css" />
+    <link rel="stylesheet" href="../../_static/styles/sphinx-book-theme.css?digest=14f4ca6b54d191a8c7657f6c759bf11a5fb86285" type="text/css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/togglebutton.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/mystnb.4510f1fc1dee50b3e5859aac5469c37c29e427902b24a333a5f9fcb2f0b3ac41.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/sphinx-thebe.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/design-style.4045f2051d55cab465a707391d5b2007.min.css" />
+  
+  <!-- Pre-loaded scripts that we'll load fully later -->
+  <link rel="preload" as="script" href="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b" />
+<link rel="preload" as="script" href="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b" />
+  <script src="../../_static/vendor/fontawesome/6.5.2/js/all.min.js?digest=dfe6caa3a7d634c4db9b"></script>
+
+    <script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
+    <script src="../../_static/jquery.js"></script>
+    <script src="../../_static/underscore.js"></script>
+    <script src="../../_static/_sphinx_javascript_frameworks_compat.js"></script>
+    <script src="../../_static/doctools.js"></script>
+    <script src="../../_static/clipboard.min.js"></script>
+    <script src="../../_static/copybutton.js"></script>
+    <script src="../../_static/scripts/sphinx-book-theme.js?digest=5a5c038af52cf7bc1a1ec88eea08e6366ee68824"></script>
+    <script>let toggleHintShow = 'Click to show';</script>
+    <script>let toggleHintHide = 'Click to hide';</script>
+    <script>let toggleOpenOnPrint = 'true';</script>
+    <script src="../../_static/togglebutton.js"></script>
+    <script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
+    <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
+    <script src="../../_static/design-tabs.js"></script>
+    <script>const THEBE_JS_URL = "https://unpkg.com/thebe@0.8.2/lib/index.js"
+const thebe_selector = ".thebe,.cell"
+const thebe_selector_input = "pre"
+const thebe_selector_output = ".output, .cell_output"
+</script>
+    <script async="async" src="../../_static/sphinx-thebe.js"></script>
+    <script>window.MathJax = {"options": {"processHtmlClass": "tex2jax_process|mathjax_process|math|output_area"}}</script>
+    <script defer="defer" src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
+    <script>DOCUMENTATION_OPTIONS.pagename = 'docs/review/ProlificDreamer';</script>
+    <link rel="shortcut icon" href="../../_static/PseudoLab_logo.png"/>
+    <link rel="index" title="Index" href="../../genindex.html" />
+    <link rel="search" title="Search" href="../../search.html" />
+    <link rel="next" title="DreamGaussian" href="DreamGaussian.html" />
+    <link rel="prev" title="Zero123++" href="zero123plus.html" />
+  <meta name="viewport" content="width=device-width, initial-scale=1"/>
+  <meta name="docsearch:language" content="en"/>
+  </head>
+  
+  
+  <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
+
+  
+  
+  <div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
+  
+  <div id="pst-scroll-pixel-helper"></div>
+  
+  <button type="button" class="btn rounded-pill" id="pst-back-to-top">
+    <i class="fa-solid fa-arrow-up"></i>Back to top</button>
+
+  
+  <input type="checkbox"
+          class="sidebar-toggle"
+          id="pst-primary-sidebar-checkbox"/>
+  <label class="overlay overlay-primary" for="pst-primary-sidebar-checkbox"></label>
+  
+  <input type="checkbox"
+          class="sidebar-toggle"
+          id="pst-secondary-sidebar-checkbox"/>
+  <label class="overlay overlay-secondary" for="pst-secondary-sidebar-checkbox"></label>
+  
+  <div class="search-button__wrapper">
+    <div class="search-button__overlay"></div>
+    <div class="search-button__search-container">
+<form class="bd-search d-flex align-items-center"
+      action="../../search.html"
+      method="get">
+  <i class="fa-solid fa-magnifying-glass"></i>
+  <input type="search"
+         class="form-control"
+         name="q"
+         id="search-input"
+         placeholder="Search this book..."
+         aria-label="Search this book..."
+         autocomplete="off"
+         autocorrect="off"
+         autocapitalize="off"
+         spellcheck="false"/>
+  <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
+</form></div>
+  </div>
+
+  <div class="pst-async-banner-revealer d-none">
+  <aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
+</div>
+
+  
+    <header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
+    </header>
+  
+
+  <div class="bd-container">
+    <div class="bd-container__inner bd-page-width">
+      
+      
+      
+      <div class="bd-sidebar-primary bd-sidebar">
+        
+
+  
+  <div class="sidebar-header-items sidebar-primary__section">
+    
+    
+    
+    
+  </div>
+  
+    <div class="sidebar-primary-items__start sidebar-primary__section">
+        <div class="sidebar-primary-item">
+
+  
+    
+  
+
+<a class="navbar-brand logo" href="../../intro.html">
+  
+  
+  
+  
+  
+    
+    
+      
+    
+    
+    <img src="../../_static/PseudoLab_logo.png" class="logo__image only-light" alt="Text-to-Image Generation-feat-Diffusion - Home"/>
+    <script>document.write(`<img src="../../_static/PseudoLab_logo.png" class="logo__image only-dark" alt="Text-to-Image Generation-feat-Diffusion - Home"/>`);</script>
+  
+  
+</a></div>
+        <div class="sidebar-primary-item"><nav class="bd-links" id="bd-docs-nav" aria-label="Main">
+    <div class="bd-toc-item navbar-nav active">
+        
+        <ul class="nav bd-sidenav bd-sidenav__home-link">
+            <li class="toctree-l1">
+                <a class="reference internal" href="../../intro.html">
+                    Welcome to PseudoDiffusers!!
+                </a>
+            </li>
+        </ul>
+        <p aria-level="2" class="caption" role="heading"><span class="caption-text">Preliminary Works</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="vae.html">VAE</a></li>
+<li class="toctree-l1"><a class="reference internal" href="gan.html">GAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DDPM.html">DDPM</a></li>
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DDIM.html">DDIM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="A_Study_on_the_Evaluation_of_Generative_Models.html">A Study on the Evaluation of Generative Models</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Image Generation</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="cycleGAN.html">CycleGAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="StyleGAN.html">StyleGAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="diffusion_beats_GANs.html">Diffusion Models Beat GANs on Image Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="dalle.html">DALL-E</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DALLE2.html">DALL-E 2</a></li>
+<li class="toctree-l1"><a class="reference internal" href="dreambooth.html">DreamBooth</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ControlNet.html">ControlNet</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Latent_Diffusion_Model.html">Introduction</a></li>
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Textual_Inversion.html">Textual Inversion</a></li>
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="CustomDiffusion.html">Custom Diffusion</a></li>
+<li class="toctree-l1"><a class="reference internal" href="LoRA.html">LoRA</a></li>
+
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="I-DDPM.html">I-DDPM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="StyO.html">StyO</a></li>
+<li class="toctree-l1"><a class="reference internal" href="imagen.html">Imagen</a></li>
+<li class="toctree-l1"><a class="reference internal" href="imagen_editor.html">Imagen Editor</a></li>
+<li class="toctree-l1"><a class="reference internal" href="SDEdit.html">SDEdit</a></li>
+<li class="toctree-l1"><a class="reference internal" href="SDXL.html">SDXL</a></li>
+<li class="toctree-l1"><a class="reference internal" href="t2i_adapter.html">T2I-Adapter</a></li>
+<li class="toctree-l1"><a class="reference internal" href="IP_Adapter.html">IP-Adapter</a></li>
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="HyperDreamBooth.html">HyperDreamBooth</a></li>
+<li class="toctree-l1"><a class="reference internal" href="CM3leon.html">CM3leon</a></li>
+
+<li class="toctree-l1"><a class="reference internal" href="Synthetic_Data_from_Diffusion_Models_Improves_ImageNet_Classification.html">Synthetic Data from Diffusion Models Improves ImageNet Classification</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="GLIDE.html">GLIDE</a></li>
+<li class="toctree-l1"><a class="reference internal" href="BBDM.html">BBDM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Your_Diffusion_Model_is_Secretly_a_Zero_Shot_Classifier.html">Your Diffusion Model is Secretly a Zero-Shot Classifier</a></li>
+<li class="toctree-l1"><a class="reference internal" href="progressive_distillation.html">Progressive Distillation for Fast Sampling of Diffusion Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ConceptLab.html">ConceptLab</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Diffusion_models_already_have_a_Semantic_Latent_Space.html">Diffusion Models already have a Semantic Latent Space</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Muse.html">Muse</a></li>
+
+
+<li class="toctree-l1"><a class="reference internal" href="GIGAGAN.html">Scaling up GANs for Text-to-Image Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="consistency_models.html">Consistency Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="latent_consistency_models.html">Latent Consistency Models</a></li>
+
+<li class="toctree-l1"><a class="reference internal" href="LLM_grounded_Diffusion.html">LLM Grounded Diffusion</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DiT.html">DiT</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="one-step-image-translation.html">One-Step Image Translation with Text-to-Image Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="LCM-LoRA.html">LCM-LoRA: A Universal Stable-Diffusion Acceleration Module</a></li>
+
+
+<li class="toctree-l1"><a class="reference internal" href="MimicBrush.html">MimicBrush: Zero-shot Image Editing with Reference Imitation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="one_step_diffusion_with_distribution_matching_distillation.html">One-step Diffusion with Distribution Matching Distillation</a></li>
+
+
+
+
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Video Generation</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="Make_A_Video.html">Make A Video</a></li>
+<li class="toctree-l1"><a class="reference internal" href="VideoLDM.html">VideoLDM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="AnimateDiff.html">AnimateDiff</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Animate_Anyone.html">Animate Anyone</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreaMoving.html">DreaMoving</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreamPose.html">DreamPose: Fashion Image-to-Video Synthesis via Stable Diffusion</a></li>
+
+
+
+
+
+
+
+
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">3D Generation</span></p>
+<ul class="current nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="NeRF.html">NeRF : Representing Scenes as Neural Radiance Fields for View Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="3DGS.html">3D Gaussian Splatting for Real-Time Radiance Field Rendering</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Point_E.html">Point-E: A System for Generating 3D Point Clouds from Complex Prompts (Arxiv 2022)</a></li>
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Shap-E.html">Shap-E</a></li>
+
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DreamFusion.html"><strong>DreamFusion</strong></a></li>
+<li class="toctree-l1"><a class="reference internal" href="magic-3d.html">Magic3D</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreamBooth3D.html">Dream Booth 3D</a></li>
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="zero123plus.html">Zero123++</a></li>
+<li class="toctree-l1 current active"><a class="current reference internal" href="#">ProlificDreamer: High-Fidelity and Diverse Text-to-3D Generation with Variational Score Distillation</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DreamGaussian.html">DreamGaussian</a></li>
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Coin3D.html">Coin3D</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Experiments</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="../experiments/js_exp.html">Synthetic Data with Stable Diffusion for Foliar Disease Classification</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../experiments/swjo_exp.html">Training DreamBooth on Naver Webtoon Face Dataset</a></li>
+</ul>
+
+    </div>
+</nav></div>
+    </div>
+  
+  
+  <div class="sidebar-primary-items__end sidebar-primary__section">
+  </div>
+  
+  <div id="rtd-footer-container"></div>
+
+
+      </div>
+      
+      <main id="main-content" class="bd-main" role="main">
+        
+        
+
+<div class="sbt-scroll-pixel-helper"></div>
+
+          <div class="bd-content">
+            <div class="bd-article-container">
+              
+              <div class="bd-header-article d-print-none">
+<div class="header-article-items header-article__inner">
+  
+    <div class="header-article-items__start">
+      
+        <div class="header-article-item"><label class="sidebar-toggle primary-toggle btn btn-sm" for="__primary" title="Toggle primary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
+  <span class="fa-solid fa-bars"></span>
+</label></div>
+      
+    </div>
+  
+  
+    <div class="header-article-items__end">
+      
+        <div class="header-article-item">
+
+<div class="article-header-buttons">
+
+
+
+
+
+<div class="dropdown dropdown-source-buttons">
+  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Source repositories">
+    <i class="fab fa-github"></i>
+  </button>
+  <ul class="dropdown-menu">
+      
+      
+      
+      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion" target="_blank"
+   class="btn btn-sm btn-source-repository-button dropdown-item"
+   title="Source repository"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fab fa-github"></i>
+  </span>
+<span class="btn__text-container">Repository</span>
+</a>
+</li>
+      
+      
+      
+      
+      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion/issues/new?title=Issue%20on%20page%20%2Fdocs/review/ProlificDreamer.html&body=Your%20issue%20content%20here." target="_blank"
+   class="btn btn-sm btn-source-issues-button dropdown-item"
+   title="Open an issue"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-lightbulb"></i>
+  </span>
+<span class="btn__text-container">Open issue</span>
+</a>
+</li>
+      
+  </ul>
+</div>
+
+
+
+
+
+
+<div class="dropdown dropdown-download-buttons">
+  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Download this page">
+    <i class="fas fa-download"></i>
+  </button>
+  <ul class="dropdown-menu">
+      
+      
+      
+      <li><a href="../../_sources/docs/review/ProlificDreamer.md" target="_blank"
+   class="btn btn-sm btn-download-source-button dropdown-item"
+   title="Download source file"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-file"></i>
+  </span>
+<span class="btn__text-container">.md</span>
+</a>
+</li>
+      
+      
+      
+      
+      <li>
+<button onclick="window.print()"
+  class="btn btn-sm btn-download-pdf-button dropdown-item"
+  title="Print to PDF"
+  data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-file-pdf"></i>
+  </span>
+<span class="btn__text-container">.pdf</span>
+</button>
+</li>
+      
+  </ul>
+</div>
+
+
+
+
+<button onclick="toggleFullScreen()"
+  class="btn btn-sm btn-fullscreen-button"
+  title="Fullscreen mode"
+  data-bs-placement="bottom" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-expand"></i>
+  </span>
+
+</button>
+
+
+
+<script>
+document.write(`
+  <button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light"></i>
+    <i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark"></i>
+    <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"></i>
+  </button>
+`);
+</script>
+
+
+<script>
+document.write(`
+  <button class="btn btn-sm pst-navbar-icon search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <i class="fa-solid fa-magnifying-glass fa-lg"></i>
+  </button>
+`);
+</script>
+<label class="sidebar-toggle secondary-toggle btn btn-sm" for="__secondary"title="Toggle secondary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <span class="fa-solid fa-list"></span>
+</label>
+</div></div>
+      
+    </div>
+  
+</div>
+</div>
+              
+              
+
+<div id="jb-print-docs-body" class="onlyprint">
+    <h1>ProlificDreamer: High-Fidelity and Diverse Text-to-3D Generation with Variational Score Distillation</h1>
+    <!-- Table of contents -->
+    <div id="print-main-content">
+        <div id="jb-print-toc">
+            
+            <div>
+                <h2> Contents </h2>
+            </div>
+            <nav aria-label="Page">
+                <ul class="visible nav section-nav flex-column">
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#">ProlificDreamer: High-Fidelity and Diverse Text-to-3D Generation with Variational Score Distillation</a></li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">1 Introduction</a></li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#background">2. Background</a><ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#diffusion-models-dms">Diffusion models (DMs)</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#text-to-3d-by-score-distillation-sampling"><strong>Text-to-3D by score distillation sampling</strong></a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#d-representations">3D representations</a></li>
+</ul>
+</li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#variational-score-distillation">3. Variational Score Distillation</a><ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#sampling-from-3d-distribution-via-variational-inference">3.1 Sampling from 3D Distribution via Variational Inference</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#update-rule-for-variational-score-distillation">3.2 Update Rule for Variational Score Distillation</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#comparison-with-sds">3.3 Comparison with SDS</a></li>
+</ul>
+</li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#prolific-dreamer">4. Prolific Dreamer</a><ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#design-space-of-text-to-3d-generation">4.1 Design Space of Text-to-3D Generation</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#d-representation-and-training">4.2 3D Representation and Training</a></li>
+</ul>
+</li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#experiments">5. Experiments</a><ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#results-of-prolific-dreamer">5.1 Results of Prolific Dreamer</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#ablation-study">5.2 Ablation Study</a></li>
+</ul>
+</li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#future-work">Future Work</a></li>
+</ul>
+
+            </nav>
+        </div>
+    </div>
+</div>
+
+              
+                
+<div id="searchbox"></div>
+                <article class="bd-article">
+                  
+  <div class="admonition-information admonition">
+<p class="admonition-title">Information</p>
+<ul class="simple">
+<li><p><strong>Title:</strong> ProlificDreamer: High-Fidelity and Diverse Text-to-3D Generation with Variational Score Distillation (NeurIPS 2023 Spotlight)</p></li>
+<li><p><strong>Reference</strong></p>
+<ul>
+<li><p>Paper: <a class="reference external" href="https://arxiv.org/abs/2305.16213">https://arxiv.org/abs/2305.16213</a></p></li>
+<li><p>Code: <a class="github reference external" href="https://github.com/thu-ml/prolificdreamer?tab=readme-ov-file">thu-ml/prolificdreamer</a></p></li>
+</ul>
+</li>
+<li><p><strong>Author:</strong> Kyeongmin Yu</p></li>
+<li><p><strong>Last updated on Dec. 26, 2024</strong></p></li>
+</ul>
+</div>
+<section class="tex2jax_ignore mathjax_ignore" id="prolificdreamer-high-fidelity-and-diverse-text-to-3d-generation-with-variational-score-distillation">
+<h1>ProlificDreamer: High-Fidelity and Diverse Text-to-3D Generation with Variational Score Distillation<a class="headerlink" href="#prolificdreamer-high-fidelity-and-diverse-text-to-3d-generation-with-variational-score-distillation" title="Permalink to this heading">#</a></h1>
+<ul class="simple">
+<li><p>view generation이 가능한 DM의 특성을 3D rendering 모델로 전달하여 pretrained 된 DM이 생성하는 이미지 분포와 3D representaiton의 분포를 맞춰가는 것으로 Dream Fusion(SDS)과 유사하지만 개선된 아이디어(VSD)를 제안하고자한 논문이다.</p></li>
+</ul>
+<figure class="align-default" id="id1">
+<a class="mb-1 reference internal image-reference" href="../../_images/image23.png"><img alt="prolificdreamer_1" class="mb-1" src="../../_images/image23.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 754 </span><span class="caption-text">Prolific Dreamer Overview</span><a class="headerlink" href="#id1" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>📌  <strong>Prolific Dreamer 2-stage approach</strong></p>
+<ol class="arabic simple">
+<li><p>optimize a high-resolution NeRF by <strong>VSD</strong></p></li>
+<li><p>geometry optimization of mesh from NeRF with <strong>SDS</strong> (optional)</p></li>
+</ol>
+<p>appendix를 참고하면, triangle 크기가 비교적 클때 VSD와 SDS의 차이가 크지 않으므로 SDS를 사용했다고 하며, 더 섬세한 mesh의 경우 VSD가 SDS에 비해 표현력이 좋을것으로 믿는다고 함.</p>
+<figure class="align-default" id="id2">
+<a class="mb-1 reference internal image-reference" href="../../_images/image_1.png"><img alt="prolificdreamer_2" class="mb-1" src="../../_images/image_1.png" style="width: 400px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 755 </span><span class="caption-text">ProlificDreamer vs. DreamFusion 정성적 결과</span><a class="headerlink" href="#id2" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<figure class="align-default" id="id3">
+<a class="mb-1 reference internal image-reference" href="../../_images/image_2.png"><img alt="prolificdreamer_3" class="mb-1" src="../../_images/image_2.png" style="width: 400px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 756 </span><span class="caption-text">ProlificDreamer vs. DreamFusion 정성적 결과</span><a class="headerlink" href="#id3" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</section>
+<section class="tex2jax_ignore mathjax_ignore" id="introduction">
+<h1>1 Introduction<a class="headerlink" href="#introduction" title="Permalink to this heading">#</a></h1>
+<p>고품질의 3D content를 생산하는 것은 품이 많이 드는 일이다. 이러한 어려움을 해결하기 위해 text-to-3D 방식이 발전하고 있다. texture 기술을 기반으로 3D content 생성을 자동화하는 방식은 유망하며 다양한 분야(encompassing architecture, animation, gaming, AR/VR)  paradigm의 변화를 일으킬 수 있을 것이다.</p>
+<p>Diffusion model의 text-to-image 생성력을 바탕으로 3D content를 생성하려는 DreamFusion과 같은 시도가 있었다. Dream Fusion은  <strong>Score Distillation Sampling(SDS)</strong> 알고리즘을 이용해 단일 3D representation을 최적화 했다. 이는 어떤 방향에서 렌더링된 이미지라도 주어진 text에 대해 높은 likelihood를 갖도록 한것으로써 diffusion model에 의해 evaluate되었다. (diffusion model을 loss에 활용) 하지만 over-satuation, over-smoothing, low-diversity 문제가 발생했다. 또한 text-to-3D의 design space에서 orthogonal 한 요소(rendering resolution - distillation time schedule)들에 대한 파악이 아직 부족하다.</p>
+<p>본 논문에서는 섬세한 3D representation을 얻기 위해 이러한 모든 요소에 대해 systematic study를 진행한다. 먼저 <strong>Variational Score Distillation(VSD)</strong> 를 제안한다. 이는 주어진 textual prompt와 해당 3D scene을 하나의 random variable로 취급하며 하나의 점(single point)로 취급한 SDS와는 다르다. VSD는 3D scene의 분포를 최적화하며 이러한 분포는 모든 시점에 대해 rendered images의 분포가 서로 최대한 가까워지도록 한다. pretrained 2D diffusion model의 경우에는 KL divergence에 해당한다?! 이러한 variational formulation에서 VS는 multiple 3D scene을 하나의 prompt에 정렬할 수 있는 특성을 가질 수 있다.</p>
+<ul>
+<li><p>원문참고</p>
+<p>VSD optimizesa distribution of 3D scenes such that the distribution induced on images rendered from all views aligns as closely as possible, in terms of KL divergence, with the one defined by the pretrained 2D diffusion model (see Sec. 3.1). Under this variational formulation, VSD naturally characterizes the phenomenon that multiple 3D scenes can potentially align with one prompt.</p>
+</li>
+</ul>
+<p>해당 variational formulation을 효율적으로 풀기 위해 VSD는 <strong>particle-based variational inference</strong>를 도입하고, 3D representation을 표현하기 위해 하나의 3D parameters 집합을 particles로 표현하였다. <strong>Wasserstein gradient flow</strong>를 이용해 이러한 particles로 부터 새로운 gradient-based update rule을 이끌어 냈다. 이는 최적화 수렴 후, 해당 particles가 desired distribution으로 부터 sample된 것임을 보장한다. update 시에는 diffused rendered images의 분포의 score function이 필요한데 이는 <strong>pretrained diffusion model + low-rank adaptation(LoRA)</strong> 로 얻을 수 있었다. 최종적으로 particles과 score function을 업데이트 하는 형태가 된다.</p>
+<p>Sec 3.3에서 SDS는 variational distribution에 single-point Dirac distribution을 사용하는 VSD라고 볼 수 있음을 보인다. 이를 통해 SDS가 diversity가 fidelity가 낮은 이유를 알 수 있다. single particle만으로도 VSD는 parametric score model을 학습할 수 있고 잠재적으로 SDS보다 더 나은 생성 결과를 제공할 수 있다. 또한 동일한 렌더링 함수를 이용해 2D space에서 SDS와 VSD를 비교하여 3D 요소만 분리하여 비교한 결과를 담았다. diffusion model의 고전 샘플링 방식과 같이 VSD는 CFG의 가중치 조절을 통해 보다 사실적인 sample을 생성할 수 있다. 반면 SDS는 이전 text-to-3D연구과 유사한 over-saturation, over-smoothing문제를 보이는 부족한 결과를 보였다.</p>
+<figure class="align-default" id="id4">
+<a class="mb-1 reference internal image-reference" href="../../_images/image_3.png"><img alt="prolificdreamer_3" class="mb-1" src="../../_images/image_3.png" style="width: 300px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 757 </span><span class="caption-text">Prolific Dreamer</span><a class="headerlink" href="#id4" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<div class="math notranslate nohighlight">
+\[\begin{split}
+\delta(x)=\begin{cases}0, &amp;{x!=0} \\ \infty, &amp;{x=0} \end{cases}
+\end{split}\]</div>
+<div class="math notranslate nohighlight">
+\[
+\int_{-\infty} ^\infty \delta(x) dx=1
+\]</div>
+<p>Sec 4는 text-to-3D 알고리즘의 orthogonal 요소들에 대한 추가적인 <strong>systematically study</strong>와 clear <strong>design space</strong>를 담고 있다. 특히 훈련과정 중 고화질 렌더링과 시각적 품질 개선을 위한 <strong>annealed distilling time schedule</strong>을 제안한다. 또한 복잡한 scene을 표현하기 위한 s<strong>cene initialization</strong>을 제안한다. 요소들에 대한 ablation study는 Sec 5에서 볼 수 있으며, 앞서 언급한 요소들은 VSD에 효과적임을 보인다. 결론적으로 high-fidelity, diverse 3D 결과를 얻을 수있으며 이를 <strong>ProlificDreamer</strong>라고 한다.</p>
+<p>Sec 5에서 ProlificDreamer의 고화질(512x512) rendering 능력과 rich structure와 complex effects를 Neural Radiance Fields(NeRF)상에서 표현할 수 있음을 보인다. ProlificDreamer는 다중 물체가 포함된 복잡한 scene의 360도 전방향을 성공적으로 표현하는 것에 처음으로 성공했다. 게다가 NeRF로 초기화 한 후 ProlificDreamer로 세세하고 photorealistic한 3D texture mesh들을 생성할 수있다.</p>
+<figure class="align-default" id="id5">
+<a class="mb-1 reference internal image-reference" href="../../_images/image_4.png"><img alt="prolificdreamer_5" class="mb-1" src="../../_images/image_4.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 758 </span><span class="caption-text">Prolific Dreamer  생성 결과</span><a class="headerlink" href="#id5" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</section>
+<section class="tex2jax_ignore mathjax_ignore" id="background">
+<h1>2. Background<a class="headerlink" href="#background" title="Permalink to this heading">#</a></h1>
+<section id="diffusion-models-dms">
+<h2>Diffusion models (DMs)<a class="headerlink" href="#diffusion-models-dms" title="Permalink to this heading">#</a></h2>
+<p><strong>Score-Based Generative Modeling through Stochastic Differential Equations</strong></p>
+<ul>
+<li><p><strong>forward process</strong> <span class="math notranslate nohighlight">\(\{q_t\}_{t\in[0,1]}\)</span> - gradually add noise to a data point <span class="math notranslate nohighlight">\(x_0\sim p_0(x_0)\)</span></p>
+<div class="math notranslate nohighlight">
+\[\begin{split}
+    q_t(x_t|x_0):=\mathcal N(\alpha_tx_0,\sigma_t^2 I)\\q_t(x_t):=\int q_t(x_t|x_0)q_0(x_0) dx_0
+    \end{split}\]</div>
+<p><span class="math notranslate nohighlight">\(\alpha_t, \sigma_t &gt;0\)</span> 는 hyperparameter로 <span class="math notranslate nohighlight">\(\alpha_0\approx 1,\sigma_0\approx 0,\alpha_1\approx 0, \sigma_t\approx 1\)</span> 이다.</p>
+</li>
+<li><p><strong>reverse process</strong> <span class="math notranslate nohighlight">\(p_t\)</span> - denoising from <span class="math notranslate nohighlight">\(p_1(x_1):=\mathcal N(0,I)\)</span> by predicting the noise added to a clean data <span class="math notranslate nohighlight">\(x_0\)</span></p>
+<p>noise prediction network <span class="math notranslate nohighlight">\(\epsilon_\phi(x_t,t)\)</span>을 학습하는 과정은 아래와 같다.</p>
+<div class="math notranslate nohighlight">
+\[
+    \mathcal L_\text{Diff}(\phi):=\Bbb E_{x_0\sim q_0(x_0),t\sim\mathcal U(0,1),\epsilon \sim \mathcal N(o,I)}\Big[ \omega(t)\|\epsilon_\phi(\alpha_t x_0+\sigma_t\epsilon)-\epsilon\|_2^2\Big], \tag 1
+    \]</div>
+<p><span class="math notranslate nohighlight">\(\omega(t)\)</span>는 time dependent weighting function이다. 훈련이 끝나면 <span class="math notranslate nohighlight">\(p_t\approx q_t\)</span> 가 되며 따라서 <span class="math notranslate nohighlight">\(p_0\approx q_0\)</span> 으로 sample들을 그릴 수 있게 된다. 덧붙여 noise prediction network는 <span class="math notranslate nohighlight">\(p_t, q_t\)</span> 의 score function을 approximating하는 데에도 사용가능하다. <span class="math notranslate nohighlight">\(\triangledown_{x_t}\text{log}q_t(x_t)\approx\triangledown_{x_t}\text{log}p_t(x_t)\approx-\epsilon_\phi(x_t,t)/\sigma_t\)</span></p>
+</li>
+</ul>
+<p>diffusion model이 가장 활발히 활용되고 있는 분야 중 하나는 text-to-image generation으로 text prompt <span class="math notranslate nohighlight">\(y\)</span>를 조건으로 noise를 예측한다. 또한 Classifier-free Guidence를 통해 샘플 품질과 다양성을 조절한다. guidance scale이 커질 수록 품질이 올라가지만 다양성이 감소하는 경향을 보인다.</p>
+</section>
+<section id="text-to-3d-by-score-distillation-sampling">
+<h2><strong>Text-to-3D by score distillation sampling</strong><a class="headerlink" href="#text-to-3d-by-score-distillation-sampling" title="Permalink to this heading">#</a></h2>
+<ul class="simple">
+<li><p><strong>Score Distillation Sampling (SDS)</strong> from DreamFusion</p></li>
+<li><p><strong>Score Jacobian Chaining (SJC)</strong> 라고도 불리며 Zero-1-to-3, Magic3d, Fantasia3d, Latent NeRF 등 다양한 연구에 활용되고 있다.</p></li>
+</ul>
+<p>사전학습된 T2I diffusion model <span class="math notranslate nohighlight">\(p_t(x_t|y)\)</span>과 noise prediction network <span class="math notranslate nohighlight">\(\epsilon_\text{pretrained}(x_t,t,y)\)</span> 을 이용해 SDS는 single 3D representation의 parameter <span class="math notranslate nohighlight">\(\theta \in \Theta\)</span>를 최적화 한다. 이때 사용하는 metric은 <strong>Euclidean metric</strong>으로 <span class="math notranslate nohighlight">\(\Theta\)</span>는 Euclidean space이다.</p>
+<p>camera parameter <span class="math notranslate nohighlight">\(c\)</span>가 분포 <span class="math notranslate nohighlight">\(p(c)\)</span>를 따르고,
+differentiable rendering mapping <span class="math notranslate nohighlight">\(g(\cdot,c):\Theta \rightarrow \Bbb R^d\)</span> 이 주어진다고 하자.
+<span class="math notranslate nohighlight">\(y^c\)</span>를 view dependent prompt라고 하면, rendering image <span class="math notranslate nohighlight">\(g(\theta,c)\)</span>에서 시작하는 forward diffusion process는 <span class="math notranslate nohighlight">\(q_t^\theta(x_t|c)\)</span>로 표현할 수 있다.</p>
+<p>SDS는 parameter <span class="math notranslate nohighlight">\(\theta\)</span>를 아래와 같이 최적화한다.</p>
+<div class="math notranslate nohighlight">
+\[
+\mathcal L_{\text{SDS}}(\theta):=\Bbb E_{t,c}\Big [\frac{\sigma_t}{\alpha_t}\space\omega(t)\space D_\text{KL}(q_t^\theta(x_t|c)\|p_t(x_t|y^c)\Big] \tag{2}
+\]</div>
+<div class="math notranslate nohighlight">
+\[
+\mathcal L_{\text{SDS}}(\theta)\approx\Bbb E_{t,\epsilon,c}\Big [\omega(t)\space \big(\epsilon_\text{pretrained}(x_t,t,y^c)-\epsilon\big)\frac{\partial g(\theta,c)}{\partial\theta}\Big] \tag{3}
+\]</div>
+</section>
+<section id="d-representations">
+<h2>3D representations<a class="headerlink" href="#d-representations" title="Permalink to this heading">#</a></h2>
+<ul class="simple">
+<li><p><strong>NeRF</strong> → MLP</p>
+<ul>
+<li><p>multilayer perceptron을 이용해 3D 객체를 표현한다. 3차원 공간상의 위치 정보를 입력하면 해당하는 색과 밀도를 얻을 수 있다. 이때 <span class="math notranslate nohighlight">\(\theta\)</span>는 MLP의 내부 파라미터를 의미한다.</p></li>
+<li><p>카메라 위치 <span class="math notranslate nohighlight">\(c\)</span>가 주어질때, rendering process <span class="math notranslate nohighlight">\(g(\theta,c)\)</span>는 casting rays로 정의되며 각 ray의 sampling points의 색을 가중합하여 각 픽셀의 값을 결정한다.</p></li>
+<li><p>NeRF는 최적화 측면에서 유연하고 복잡한 장면도 표현가능하다. (매우 상대적인 표현으로 사료됨.)</p></li>
+</ul>
+</li>
+<li><p><strong>Textured mesh</strong> → triangle mesh + texture</p>
+<ul>
+<li><p>triangle mesh와 해당 mesh 표면의 texture, color로 3D 객체를 표현한다. 여기서 3D parameter <span class="math notranslate nohighlight">\(\theta\)</span>는 triangle meshes의 좌표와 texture parameter를 의미한다.</p></li>
+<li><p>카메라 위치 <span class="math notranslate nohighlight">\(c\)</span>가 주어질때, rendering process <span class="math notranslate nohighlight">\(g(\theta,c)\)</span>는 casting rays로 정의되며 각 ray가 지나는 mesh의 intersection의 색을 계산함으로써 각 픽셀의 값을 결정한다.</p></li>
+<li><p>Textured mesh는 고화질 렌더링이 가능하고 differentiable rasterization을 이용하면 렌더링 속도가 빠르다.</p></li>
+</ul>
+</li>
+</ul>
+</section>
+</section>
+<section class="tex2jax_ignore mathjax_ignore" id="variational-score-distillation">
+<h1>3. Variational Score Distillation<a class="headerlink" href="#variational-score-distillation" title="Permalink to this heading">#</a></h1>
+<figure class="align-default" id="id6">
+<a class="mb-1 reference internal image-reference" href="../../_images/image_5.png"><img alt="prolificdreamer_5" class="mb-1" src="../../_images/image_5.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 759 </span><span class="caption-text">Prolific Dreamer</span><a class="headerlink" href="#id6" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<section id="sampling-from-3d-distribution-via-variational-inference">
+<h2>3.1 Sampling from 3D Distribution via Variational Inference<a class="headerlink" href="#sampling-from-3d-distribution-via-variational-inference" title="Permalink to this heading">#</a></h2>
+<p>3D represetation에 사용되는 parameter <span class="math notranslate nohighlight">\(\theta\)</span>들은 확률밀도 <span class="math notranslate nohighlight">\(\mu(\theta|y)\)</span>로 모델링 할 수 있다.
+<span class="math notranslate nohighlight">\(q_0^\mu(x_0|c,y)\)</span>는 rendered image <span class="math notranslate nohighlight">\(x_0;=g(\theta,c)\)</span>의 분포, <span class="math notranslate nohighlight">\(p_0(x_0|y^c)\)</span>는 <span class="math notranslate nohighlight">\(t=0\)</span> 일때 marginal distribution이다.</p>
+<p>고품질의 3D representation을 얻기 위해서 distribution <span class="math notranslate nohighlight">\(\mu\)</span>를 최적화 하는 방법을 제안한다. 사전학습된 DM을 이용해 모든 view에 대한 rendered image sample과 distribution <span class="math notranslate nohighlight">\(\mu\)</span>를 정렬(align)하는 것은 아래와 같이 두 분포의 거리를 좁히는 것이라고 할 수 있다.</p>
+<div class="math notranslate nohighlight">
+\[
+\text{min}_\mu D_\text{KL}\big(q_0^\mu(x_0|c,y)\|p_0(x_0|y^c)\big) \tag{4}
+\]</div>
+<ul>
+<li><p>SDS eq.2</p>
+<div class="math notranslate nohighlight">
+\[
+    \mathcal L_{\text{SDS}}(\theta):=\Bbb E_{t,c}\Big [({\sigma_t}/{\alpha_t})\space\omega(t)\space D_\text{KL}(q_t^\theta(x_t|c)\|p_t(x_t|y^c)\Big] \tag{2}
+    \]</div>
+</li>
+</ul>
+<p>위의 식은 일반적인 variational inference problem으로 variational distribution <span class="math notranslate nohighlight">\(q_0^\mu(x_0|c,y)\)</span>을 target distribution <span class="math notranslate nohighlight">\(p_0(x_0|y^c)\)</span>으로 근사(distill)하는 방식을 사용한다.</p>
+<p>위의 식 4의 prob을 직접 푸는것은 복잡하고 비효율적이기 때문에 diffusion model을 이용해 series of optimization problem을 통해 해결하고자 한다. <span class="math notranslate nohighlight">\(t\)</span>가 <span class="math notranslate nohighlight">\(T\)</span>를 향해 점점 커질때, 위의 최적화 문제는 diffused distribution이 gaussian distribution에 가까워 지며 점점 쉬워진다.</p>
+<p>(기존 SDS 최적화 식의 parameter <span class="math notranslate nohighlight">\(\theta\)</span>가 distribution <span class="math notranslate nohighlight">\(\mu\)</span>로 바뀐 형태)</p>
+<div class="math notranslate nohighlight">
+\[
+\mu^*:=\text{argmin}_\mu\Bbb E_{t,c}\Big[ (\sigma_t/\alpha_t)\omega(t)D_{KL}(q_t^\mu(x_t|c,y)\|p_t(x_t|y^c))\Big] \tag5
+\]</div>
+<ul>
+<li><p>SDS eq.2</p>
+<div class="math notranslate nohighlight">
+\[
+    \mathcal L_{\text{SDS}}(\theta):=\Bbb E_{t,c}\Big [({\sigma_t}/{\alpha_t})\space\omega(t)\space D_\text{KL}(q_t^\theta(x_t|c)\|p_t(x_t|y^c)\Big] \tag{2}
+    \]</div>
+</li>
+</ul>
+</section>
+<section id="update-rule-for-variational-score-distillation">
+<h2>3.2 Update Rule for Variational Score Distillation<a class="headerlink" href="#update-rule-for-variational-score-distillation" title="Permalink to this heading">#</a></h2>
+<p>식 5의 prob을 풀기위해 또 다른 생성모델을 훈련하여 풀 수 있는데 이는 resource가 많이 필요하고 최적화 과정이 복잡해진다. 앞선 particle-based variational inference 연구와 유사하게,  n개의 3D particles를 유지하고 해당 particles을 위한 새로운 update rule을 제안한다. 즉, <span class="math notranslate nohighlight">\(\{\theta\}^n_{i=1}\)</span>을 현재 distribution <span class="math notranslate nohighlight">\(\mu\)</span>를 표현하기 위해 사용하는 것이다. <span class="math notranslate nohighlight">\(\theta^{(i)}\)</span>는 최적화 과정이 수렴되고 하면 최적 분포 <span class="math notranslate nohighlight">\(\mu^*\)</span>에서 샘플링된 것이 된다.</p>
+<div class="math notranslate nohighlight">
+\[
+\frac{d\theta_\tau}{d\tau}=-\Bbb E_{t,\epsilon,c}\Big[\omega(t)\big(-\sigma_t\triangledown_{x_t} \text{log}p_t(x_t|y^c)-(-\sigma_t\triangledown_{x_t}\text{log}q_t^{\mu_\tau}(x_t|c,y))\big)\frac{\partial g(\theta_\tau,c)}{\partial\theta_\tau} \tag 7
+\]</div>
+<div class="math notranslate nohighlight">
+\[
+\text{min}_{\phi}\sum^n_{i=1}\Bbb E_{t\sim\mathcal U(0,1),\epsilon\sim\mathcal N(o,I),c\sim p(c)}\Big[\|\epsilon_\phi(\alpha_tg(\theta^{(i)}),c)+\sigma_t\epsilon,t,c,y)-\epsilon\|^2_2\Big] \tag 8
+\]</div>
+<p>최종적으로는 아래와 같은 objective function을 얻는다.</p>
+<div class="math notranslate nohighlight">
+\[
+\triangledown_\theta\mathcal L_{VSD}(\theta)\triangleq\Bbb E_{t,\epsilon,c}\Big[\omega(t
+)(\epsilon_{\text{pretrain}}(x_t,t,y^c)-\epsilon_\phi(x_t,t,c,y))\frac{\partial g(\theta,c)}{\partial\theta}\Big]\tag {9}
+\]</div>
+<figure class="align-default" id="id7">
+<a class="mb-1 reference internal image-reference" href="../../_images/image_6.png"><img alt="prolificdreamer_6" class="mb-1" src="../../_images/image_6.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 760 </span><span class="caption-text">Prolific Dreamer</span><a class="headerlink" href="#id7" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</section>
+<section id="comparison-with-sds">
+<h2>3.3 Comparison with SDS<a class="headerlink" href="#comparison-with-sds" title="Permalink to this heading">#</a></h2>
+<p><strong>SDS as a special case of VSD</strong></p>
+<div class="math notranslate nohighlight">
+\[
+\mathcal L_{\text{SDS}}(\theta)\approx\Bbb E_{t,\epsilon,c}\Big [\omega(t)\space \big(\epsilon_\text{pretrained}(x_t,t,y^c)-\epsilon\big)\frac{\partial g(\theta,c)}{\partial\theta}\Big] \tag{SDS}
+\]</div>
+<div class="math notranslate nohighlight">
+\[
+\triangledown_\theta\mathcal L_{VSD}(\theta)\triangleq\Bbb E_{t,\epsilon,c}\Big[\omega(t
+)(\epsilon_{\text{pretrain}}(x_t,t,y^c)-\epsilon_\phi(x_t,t,c,y))\frac{\partial g(\theta,c)}{\partial\theta}\Big]\tag {VSD}
+\]</div>
+<p>SDS는 <span class="math notranslate nohighlight">\(\mu(\theta|y)\approx \delta(\theta-\theta^{(1)})\)</span> 인 VSD의 special case에 해당한다. VSD는 potential mutliple particles일 뿐 아니라 parametric score function <span class="math notranslate nohighlight">\(\epsilon_\phi\)</span>도 학습하기 때문에 SDS와 동일하게 single particle을 사용해도 성능이 좋다. 또한 LoRA를 사용해 text prompt 로 부터 추가적으로 뽑아낸 정보를 estimation <span class="math notranslate nohighlight">\(\epsilon_\phi(x_t,t,c,y)\)</span>에 반영할 수 있다.</p>
+<figure class="align-default" id="id8">
+<a class="mb-1 reference internal image-reference" href="../../_images/image_7.png"><img alt="prolificdreamer_7" class="mb-1" src="../../_images/image_7.png" style="width: 300px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 761 </span><span class="caption-text">particle이 뭘까? 느낌적인 느낌을 받아보자.</span><a class="headerlink" href="#id8" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p><strong>VSD is friendly to CFG</strong></p>
+<p>VSD는 사전학습된 diffusion model을 이용해 optimal <span class="math notranslate nohighlight">\(\mu^*\)</span>에서 sample <span class="math notranslate nohighlight">\(\theta\)</span>를 추출하고자 한다. 때문에 3D sampling에서 CFG를 tuning한 효과가 기존 2D이미지 생성시 DPM-solver에서 CFG 값을 조절하는 것과 유사하다. 그래서 CFG 값을 조절하면서 더 다양한 실험결과를 얻을 수 있게 된다. SDS도 이점은 마찬가지 이나, CFG 값이 클때만 유효한 3D content를 만들어 낼 수 있었다.</p>
+<p><strong>VSD vs. SDS in 2D experiments that isolate 3D representations</strong></p>
+<p>동일한 rendering 모델을 이용해 VSD와 SDS의 3D 표현력만 비교한 결과이다.</p>
+<figure class="align-default" id="id9">
+<a class="mb-1 reference internal image-reference" href="../../_images/image_81.png"><img alt="prolificdreamer_8" class="mb-1" src="../../_images/image_81.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 762 </span><span class="caption-text">particle이 뭘까? 느낌적인 느낌을 받아보자.</span><a class="headerlink" href="#id9" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<details>
+<summary>Appendix의 실험결과</summary>
+<figure class="align-default" id="id10">
+<a class="mb-1 reference internal image-reference" href="../../_images/image_91.png"><img alt="prolificdreamer_9" class="mb-1" src="../../_images/image_91.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 763 </span><span class="caption-text">particle 개수에 따른 생성 퀄리티 비교 (single particle을 사용할 때도 SDS보다 성능이 좋다고 함)</span><a class="headerlink" href="#id10" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<figure class="align-default" id="id11">
+<a class="mb-1 reference internal image-reference" href="../../_images/image_101.png"><img alt="prolificdreamer_10" class="mb-1" src="../../_images/image_101.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 764 </span><span class="caption-text">2D 이미지 생성으로 비교한 VSD와 SDS의 생성 품질  차이 / SDS는 VSD에 비해 부드럽고 세부표현이 부족하다.</span><a class="headerlink" href="#id11" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<figure class="align-default" id="id12">
+<a class="mb-1 reference internal image-reference" href="../../_images/image_111.png"><img alt="prolificdreamer_11" class="mb-1" src="../../_images/image_111.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 765 </span><span class="caption-text">gradient visualization에서도 SDS와 VSD의 차이점을 확인 할 수 있다.</span><a class="headerlink" href="#id12" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</details>
+</section>
+</section>
+<section class="tex2jax_ignore mathjax_ignore" id="prolific-dreamer">
+<h1>4. Prolific Dreamer<a class="headerlink" href="#prolific-dreamer" title="Permalink to this heading">#</a></h1>
+<section id="design-space-of-text-to-3d-generation">
+<h2>4.1 Design Space of Text-to-3D Generation<a class="headerlink" href="#design-space-of-text-to-3d-generation" title="Permalink to this heading">#</a></h2>
+<p><strong>two-stage approch</strong>를 이용해 text-to-3D 생성의 design space를 개선하고자 했다.</p>
+<ol class="arabic simple">
+<li><p><strong>First Stage</strong> - optimize a high-resolution NeRF by VSD</p></li>
+<li><p><strong>Second Stage</strong> - DMTet to extract textured mesh from NeRF</p></li>
+</ol>
+<figure class="align-default" id="id13">
+<a class="mb-1 reference internal image-reference" href="../../_images/image_121.png"><img alt="prolificdreamer_12" class="mb-1" src="../../_images/image_121.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 766 </span><span class="caption-text">Prolific Dreamer와 다른 모델의 특성 비교</span><a class="headerlink" href="#id13" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</section>
+<section id="d-representation-and-training">
+<h2>4.2 3D Representation and Training<a class="headerlink" href="#d-representation-and-training" title="Permalink to this heading">#</a></h2>
+<figure class="align-default" id="id14">
+<a class="mb-1 reference internal image-reference" href="../../_images/image_131.png"><img alt="prolificdreamer_13" class="mb-1" src="../../_images/image_131.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 767 </span><span class="caption-text">Prolific Dreamer 수행 결과</span><a class="headerlink" href="#id14" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p><strong>High-resolution rendering for NeRF training</strong> (in 1st stage)</p>
+<p>ProlificDreamer에서는 고화질 렌더링을 위해 <strong>Instant NGP</strong>를 사용했으며 VSD를 이용해 512 resolution 까지 NeRF를 최적화 했다. VSD를 사용함으로써 high-fidelity 결과를 얻을 수 있었다.</p>
+<ul class="simple">
+<li><p>Instant NGP</p></li>
+</ul>
+<p><strong>Scene initialization for NeRF training</strong> (in 1st stage)</p>
+<p>NeRF의 초기 density는 <span class="math notranslate nohighlight">\(\sigma_\text{init}(\mu)=\lambda_\sigma(1-\frac{\|\mu\|_2}{r})\)</span>로 초기화 한다. <span class="math notranslate nohighlight">\(\lambda\)</span> 는 density strength, <span class="math notranslate nohighlight">\(r\)</span> 는 density radius, <span class="math notranslate nohighlight">\(\mu\)</span>는 3d coordinate이다.</p>
+<p>object-centric scene에서는 Magic3D의 방식을 따랐으며(<span class="math notranslate nohighlight">\(\lambda=10, r=0.5\)</span>),
+복잡한 scene의 경우  <span class="math notranslate nohighlight">\(\lambda=-10\)</span> 로 하여 density가 거의 비어있도록 하고, <span class="math notranslate nohighlight">\(r\)</span>을 2.5로 하여 camera를 둘러 싸도록 했다.</p>
+<p><strong>Annealed time schedule for score distillation</strong> (in 1st stage)</p>
+<p>단순한 2단계 annealing을 score distillation objective에 적용했다. 이는 SDS나 VSD 모두에 적용가능하다. 초기 몇 스텝에서는 <span class="math notranslate nohighlight">\(t\sim \mathcal U(0.02,0.98)\)</span>로 하고 이후에는 <span class="math notranslate nohighlight">\(t\sim \mathcal U(0.02,0.50)\)</span>로 설정했다.</p>
+<p>여기서 핵심은 <span class="math notranslate nohighlight">\(q_0^\mu(x_0|c,y)\)</span>와 <span class="math notranslate nohighlight">\(p_0(x_0|y^c)\)</span>를 맞추는 것인데 t가 커지면 KL divergence가 학습초기에 더 적당한 최적화 방향으로 갈 수 있다. t가 작으면 더 세부적인 조정이 가능하므로 <span class="math notranslate nohighlight">\(p_t(x^*|y^c)\)</span>와  <span class="math notranslate nohighlight">\(p_0(x^*|y^C)\)</span>의 차를 더 줄일 수 있다.</p>
+<p><strong>Mesh representation and fine-tuning</strong> (in 2nd stage)</p>
+<p>coordinate-based hash grid encoder의 특성을 이용해 NeRF에서 mesh를 추출했다. Fantasia3D의 방법론을 따랐는데 여기서는 geometry와 texture를 분리하여 최적화했다. 첫번째로는 normal map을 이용해 geometry를 최적화하고 두번째로 texture를 최적화하는 식이다. 실험결과에서 이단계에서는 SDS와 VSD의 품질 차이가 크지않아 효율성을 위해 SDS를 사용했다. 하지만 Fantasia3D와 비교했을때 VSD 및 앞선 방법론을 이용해 최적화한 NeRF에서 뽑아낸 mesh는 SDS를 이용한 것보다 뛰어났다.</p>
+</section>
+</section>
+<section class="tex2jax_ignore mathjax_ignore" id="experiments">
+<h1>5. Experiments<a class="headerlink" href="#experiments" title="Permalink to this heading">#</a></h1>
+<section id="results-of-prolific-dreamer">
+<h2>5.1 Results of Prolific Dreamer<a class="headerlink" href="#results-of-prolific-dreamer" title="Permalink to this heading">#</a></h2>
+<details>
+<summary>Appendix의 실험결과</summary>
+<figure class="align-default" id="id15">
+<a class="mb-1 reference internal image-reference" href="../../_images/image_91.png"><img alt="prolificdreamer_9" class="mb-1" src="../../_images/image_91.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 768 </span><span class="caption-text">particle 개수에 따른 생성 퀄리티 비교 (single particle을 사용할 때도 SDS보다 성능이 좋다고 함)</span><a class="headerlink" href="#id15" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<figure class="align-default" id="id16">
+<a class="mb-1 reference internal image-reference" href="../../_images/image_101.png"><img alt="prolificdreamer_10" class="mb-1" src="../../_images/image_101.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 769 </span><span class="caption-text">2D 이미지 생성으로 비교한 VSD와 SDS의 생성 품질  차이 / SDS는 VSD에 비해 부드럽고 세부표현이 부족하다.</span><a class="headerlink" href="#id16" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<figure class="align-default" id="id17">
+<a class="mb-1 reference internal image-reference" href="../../_images/image_111.png"><img alt="prolificdreamer_11" class="mb-1" src="../../_images/image_111.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 770 </span><span class="caption-text">gradient visualization에서도 SDS와 VSD의 차이점을 확인 할 수 있다.</span><a class="headerlink" href="#id17" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</details>
+</section>
+<section id="ablation-study">
+<h2>5.2 Ablation Study<a class="headerlink" href="#ablation-study" title="Permalink to this heading">#</a></h2>
+<p><strong>Ablation on NeRF Training</strong></p>
+<p>64x64 rendering + SDS에서 시작하여 요소들을 추가하며 실험한 결과는 아래와 같다.</p>
+<figure class="align-default" id="id18">
+<a class="mb-1 reference internal image-reference" href="../../_images/image_141.png"><img alt="prolificdreamer_14" class="mb-1" src="../../_images/image_141.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 771 </span><span class="caption-text">Prolific Dreamer 실험 결과</span><a class="headerlink" href="#id18" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p><strong>Ablation on mesh fine-tuning</strong></p>
+<figure class="align-default" id="id19">
+<a class="mb-1 reference internal image-reference" href="../../_images/image_151.png"><img alt="prolificdreamer_15" class="mb-1" src="../../_images/image_151.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 772 </span><span class="caption-text">Prolific Dreamer 실험 결과</span><a class="headerlink" href="#id19" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p><strong>Ablation on CFG</strong></p>
+<p>CFG 값이 작으면 diversity 상승, CFG 값이 크면 비교적 diversity가 하락하는 실험결과를 얻음. VSD의 경우 CFG 값이 작을때에도 좋은 품질의 3D content를 생성할 수있기 때문에 충분한 diversity의 결과를 얻을 수 있지만 SDS의 경우 CFG 값이 커야만 괜찮은 3D content를 생성하기 때문에 diversity가 하락할 수밖에 없음.</p>
+<figure class="align-default" id="id20">
+<a class="mb-1 reference internal image-reference" href="../../_images/image_161.png"><img alt="prolificdreamer_16" class="mb-1" src="../../_images/image_161.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 773 </span><span class="caption-text">Prolific Dreamer 실험 결과</span><a class="headerlink" href="#id20" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<figure class="align-default" id="id21">
+<a class="mb-1 reference internal image-reference" href="../../_images/image_171.png"><img alt="prolificdreamer_17" class="mb-1" src="../../_images/image_171.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 774 </span><span class="caption-text">Prolific Dreamer 실험 결과</span><a class="headerlink" href="#id21" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<figure class="align-default" id="id22">
+<a class="mb-1 reference internal image-reference" href="../../_images/image_181.png"><img alt="prolificdreamer_18" class="mb-1" src="../../_images/image_181.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 775 </span><span class="caption-text">Prolific Dreamer 실험 결과</span><a class="headerlink" href="#id22" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<div class="highlight-none notranslate"><div class="highlight"><pre><span></span>📌개인적 감상
+
+GAN : WGAN = DreamFusion : ProlificDreamer
+
+GAN : Diffusion = DreamFusion : ProlificDreamer
+</pre></div>
+</div>
+</section>
+</section>
+<section class="tex2jax_ignore mathjax_ignore" id="future-work">
+<h1>Future Work<a class="headerlink" href="#future-work" title="Permalink to this heading">#</a></h1>
+<ul class="simple">
+<li><p>GECO</p>
+<ul>
+<li><p>고정된 수의 particle을 이용하는 prolific dreamer의 단점을 보완해 새로운 샘플을 생성할 수 있도록 함.</p></li>
+</ul>
+</li>
+</ul>
+</section>
+
+    <script type="text/x-thebe-config">
+    {
+        requestKernel: true,
+        binderOptions: {
+            repo: "binder-examples/jupyter-stacks-datascience",
+            ref: "master",
+        },
+        codeMirrorConfig: {
+            theme: "abcdef",
+            mode: "python"
+        },
+        kernelOptions: {
+            name: "python3",
+            path: "./docs\review"
+        },
+        predefinedOutput: true
+    }
+    </script>
+    <script>kernelName = 'python3'</script>
+
+                </article>
+              
+
+              
+              
+              
+              
+                <footer class="prev-next-footer d-print-none">
+                  
+<div class="prev-next-area">
+    <a class="left-prev"
+       href="zero123plus.html"
+       title="previous page">
+      <i class="fa-solid fa-angle-left"></i>
+      <div class="prev-next-info">
+        <p class="prev-next-subtitle">previous</p>
+        <p class="prev-next-title">Zero123++</p>
+      </div>
+    </a>
+    <a class="right-next"
+       href="DreamGaussian.html"
+       title="next page">
+      <div class="prev-next-info">
+        <p class="prev-next-subtitle">next</p>
+        <p class="prev-next-title">DreamGaussian</p>
+      </div>
+      <i class="fa-solid fa-angle-right"></i>
+    </a>
+</div>
+                </footer>
+              
+            </div>
+            
+            
+              
+                <div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
+
+
+  <div class="sidebar-secondary-item">
+  <div class="page-toc tocsection onthispage">
+    <i class="fa-solid fa-list"></i> Contents
+  </div>
+  <nav class="bd-toc-nav page-toc">
+    <ul class="visible nav section-nav flex-column">
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#">ProlificDreamer: High-Fidelity and Diverse Text-to-3D Generation with Variational Score Distillation</a></li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">1 Introduction</a></li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#background">2. Background</a><ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#diffusion-models-dms">Diffusion models (DMs)</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#text-to-3d-by-score-distillation-sampling"><strong>Text-to-3D by score distillation sampling</strong></a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#d-representations">3D representations</a></li>
+</ul>
+</li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#variational-score-distillation">3. Variational Score Distillation</a><ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#sampling-from-3d-distribution-via-variational-inference">3.1 Sampling from 3D Distribution via Variational Inference</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#update-rule-for-variational-score-distillation">3.2 Update Rule for Variational Score Distillation</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#comparison-with-sds">3.3 Comparison with SDS</a></li>
+</ul>
+</li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#prolific-dreamer">4. Prolific Dreamer</a><ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#design-space-of-text-to-3d-generation">4.1 Design Space of Text-to-3D Generation</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#d-representation-and-training">4.2 3D Representation and Training</a></li>
+</ul>
+</li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#experiments">5. Experiments</a><ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#results-of-prolific-dreamer">5.1 Results of Prolific Dreamer</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#ablation-study">5.2 Ablation Study</a></li>
+</ul>
+</li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#future-work">Future Work</a></li>
+</ul>
+
+  </nav></div>
+
+</div></div>
+              
+            
+          </div>
+          <footer class="bd-footer-content">
+            
+<div class="bd-footer-content__inner container">
+  
+  <div class="footer-item">
+    
+<p class="component-author">
+By PseudoLab
+</p>
+
+  </div>
+  
+  <div class="footer-item">
+    
+
+  <p class="copyright">
+    
+      © Copyright 2022.
+      <br/>
+    
+  </p>
+
+  </div>
+  
+  <div class="footer-item">
+    
+  </div>
+  
+  <div class="footer-item">
+    
+  </div>
+  
+</div>
+          </footer>
+        
+
+      </main>
+    </div>
+  </div>
+  
+  <!-- Scripts loaded after <body> so the DOM is not blocked -->
+  <script src="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b"></script>
+<script src="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b"></script>
+
+  <footer class="bd-footer">
+  </footer>
+  </body>
+</html>
\ No newline at end of file
diff --git a/docs/review/SDEdit.html b/docs/review/SDEdit.html
old mode 100644
new mode 100755
index 36b03540..1d120389
--- a/docs/review/SDEdit.html
+++ b/docs/review/SDEdit.html
@@ -1,878 +1,898 @@
-
-
-<!DOCTYPE html>
-
-
-<html lang="en" data-content_root="" >
-
-  <head>
-    <meta charset="utf-8" />
-    <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.18.1: http://docutils.sourceforge.net/" />
-
-    <title>SDEdit &#8212; Text-to-Image Generation-feat-Diffusion</title>
-  
-  
-  
-  <script data-cfasync="false">
-    document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
-    document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
-  </script>
-  
-  <!-- Loaded before other Sphinx assets -->
-  <link href="../../_static/styles/theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-<link href="../../_static/styles/bootstrap.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-<link href="../../_static/styles/pydata-sphinx-theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-
-  
-  <link href="../../_static/vendor/fontawesome/6.5.2/css/all.min.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-  <link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.woff2" />
-<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.woff2" />
-<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.woff2" />
-
-    <link rel="stylesheet" type="text/css" href="../../_static/pygments.css" />
-    <link rel="stylesheet" href="../../_static/styles/sphinx-book-theme.css?digest=14f4ca6b54d191a8c7657f6c759bf11a5fb86285" type="text/css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/togglebutton.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/mystnb.4510f1fc1dee50b3e5859aac5469c37c29e427902b24a333a5f9fcb2f0b3ac41.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/sphinx-thebe.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/design-style.4045f2051d55cab465a707391d5b2007.min.css" />
-  
-  <!-- Pre-loaded scripts that we'll load fully later -->
-  <link rel="preload" as="script" href="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b" />
-<link rel="preload" as="script" href="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b" />
-  <script src="../../_static/vendor/fontawesome/6.5.2/js/all.min.js?digest=dfe6caa3a7d634c4db9b"></script>
-
-    <script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
-    <script src="../../_static/jquery.js"></script>
-    <script src="../../_static/underscore.js"></script>
-    <script src="../../_static/_sphinx_javascript_frameworks_compat.js"></script>
-    <script src="../../_static/doctools.js"></script>
-    <script src="../../_static/clipboard.min.js"></script>
-    <script src="../../_static/copybutton.js"></script>
-    <script src="../../_static/scripts/sphinx-book-theme.js?digest=5a5c038af52cf7bc1a1ec88eea08e6366ee68824"></script>
-    <script>let toggleHintShow = 'Click to show';</script>
-    <script>let toggleHintHide = 'Click to hide';</script>
-    <script>let toggleOpenOnPrint = 'true';</script>
-    <script src="../../_static/togglebutton.js"></script>
-    <script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
-    <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
-    <script src="../../_static/design-tabs.js"></script>
-    <script>const THEBE_JS_URL = "https://unpkg.com/thebe@0.8.2/lib/index.js"
-const thebe_selector = ".thebe,.cell"
-const thebe_selector_input = "pre"
-const thebe_selector_output = ".output, .cell_output"
-</script>
-    <script async="async" src="../../_static/sphinx-thebe.js"></script>
-    <script>window.MathJax = {"options": {"processHtmlClass": "tex2jax_process|mathjax_process|math|output_area"}}</script>
-    <script defer="defer" src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
-    <script>DOCUMENTATION_OPTIONS.pagename = 'docs/review/SDEdit';</script>
-    <link rel="shortcut icon" href="../../_static/PseudoLab_logo.png"/>
-    <link rel="index" title="Index" href="../../genindex.html" />
-    <link rel="search" title="Search" href="../../search.html" />
-    <link rel="next" title="SDXL" href="SDXL.html" />
-    <link rel="prev" title="Imagen Editor" href="imagen_editor.html" />
-  <meta name="viewport" content="width=device-width, initial-scale=1"/>
-  <meta name="docsearch:language" content="en"/>
-  </head>
-  
-  
-  <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
-
-  
-  
-  <div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
-  
-  <div id="pst-scroll-pixel-helper"></div>
-  
-  <button type="button" class="btn rounded-pill" id="pst-back-to-top">
-    <i class="fa-solid fa-arrow-up"></i>Back to top</button>
-
-  
-  <input type="checkbox"
-          class="sidebar-toggle"
-          id="pst-primary-sidebar-checkbox"/>
-  <label class="overlay overlay-primary" for="pst-primary-sidebar-checkbox"></label>
-  
-  <input type="checkbox"
-          class="sidebar-toggle"
-          id="pst-secondary-sidebar-checkbox"/>
-  <label class="overlay overlay-secondary" for="pst-secondary-sidebar-checkbox"></label>
-  
-  <div class="search-button__wrapper">
-    <div class="search-button__overlay"></div>
-    <div class="search-button__search-container">
-<form class="bd-search d-flex align-items-center"
-      action="../../search.html"
-      method="get">
-  <i class="fa-solid fa-magnifying-glass"></i>
-  <input type="search"
-         class="form-control"
-         name="q"
-         id="search-input"
-         placeholder="Search this book..."
-         aria-label="Search this book..."
-         autocomplete="off"
-         autocorrect="off"
-         autocapitalize="off"
-         spellcheck="false"/>
-  <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
-</form></div>
-  </div>
-
-  <div class="pst-async-banner-revealer d-none">
-  <aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
-</div>
-
-  
-    <header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
-    </header>
-  
-
-  <div class="bd-container">
-    <div class="bd-container__inner bd-page-width">
-      
-      
-      
-      <div class="bd-sidebar-primary bd-sidebar">
-        
-
-  
-  <div class="sidebar-header-items sidebar-primary__section">
-    
-    
-    
-    
-  </div>
-  
-    <div class="sidebar-primary-items__start sidebar-primary__section">
-        <div class="sidebar-primary-item">
-
-  
-    
-  
-
-<a class="navbar-brand logo" href="../../intro.html">
-  
-  
-  
-  
-  
-    
-    
-      
-    
-    
-    <img src="../../_static/PseudoLab_logo.png" class="logo__image only-light" alt="Text-to-Image Generation-feat-Diffusion - Home"/>
-    <script>document.write(`<img src="../../_static/PseudoLab_logo.png" class="logo__image only-dark" alt="Text-to-Image Generation-feat-Diffusion - Home"/>`);</script>
-  
-  
-</a></div>
-        <div class="sidebar-primary-item"><nav class="bd-links" id="bd-docs-nav" aria-label="Main">
-    <div class="bd-toc-item navbar-nav active">
-        
-        <ul class="nav bd-sidenav bd-sidenav__home-link">
-            <li class="toctree-l1">
-                <a class="reference internal" href="../../intro.html">
-                    Welcome to PseudoDiffusers!!
-                </a>
-            </li>
-        </ul>
-        <p aria-level="2" class="caption" role="heading"><span class="caption-text">Preliminary Works</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="vae.html">VAE</a></li>
-<li class="toctree-l1"><a class="reference internal" href="gan.html">GAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DDPM.html">DDPM</a></li>
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="DDIM.html">DDIM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="A_Study_on_the_Evaluation_of_Generative_Models.html">A Study on the Evaluation of Generative Models</a></li>
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Image Generation</span></p>
-<ul class="current nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="cycleGAN.html">CycleGAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="StyleGAN.html">StyleGAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="diffusion_beats_GANs.html">Diffusion Models Beat GANs on Image Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="dalle.html">DALL-E</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DALLE2.html">DALL-E 2</a></li>
-<li class="toctree-l1"><a class="reference internal" href="dreambooth.html">DreamBooth</a></li>
-<li class="toctree-l1"><a class="reference internal" href="ControlNet.html">ControlNet</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Latent_Diffusion_Model.html">Introduction</a></li>
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="Textual_Inversion.html">Textual Inversion</a></li>
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="CustomDiffusion.html">Custom Diffusion</a></li>
-<li class="toctree-l1"><a class="reference internal" href="LoRA.html">LoRA</a></li>
-
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="I-DDPM.html">I-DDPM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="StyO.html">StyO</a></li>
-<li class="toctree-l1"><a class="reference internal" href="imagen.html">Imagen</a></li>
-<li class="toctree-l1"><a class="reference internal" href="imagen_editor.html">Imagen Editor</a></li>
-<li class="toctree-l1 current active"><a class="current reference internal" href="#">SDEdit</a></li>
-<li class="toctree-l1"><a class="reference internal" href="SDXL.html">SDXL</a></li>
-<li class="toctree-l1"><a class="reference internal" href="t2i_adapter.html">T2I-Adapter</a></li>
-<li class="toctree-l1"><a class="reference internal" href="IP_Adapter.html">IP-Adapter</a></li>
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="HyperDreamBooth.html">HyperDreamBooth</a></li>
-<li class="toctree-l1"><a class="reference internal" href="CM3leon.html">CM3leon</a></li>
-
-<li class="toctree-l1"><a class="reference internal" href="Synthetic_Data_from_Diffusion_Models_Improves_ImageNet_Classification.html">Synthetic Data from Diffusion Models Improves ImageNet Classification</a></li>
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="GLIDE.html">GLIDE</a></li>
-<li class="toctree-l1"><a class="reference internal" href="BBDM.html">BBDM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Your_Diffusion_Model_is_Secretly_a_Zero_Shot_Classifier.html">Your Diffusion Model is Secretly a Zero-Shot Classifier</a></li>
-<li class="toctree-l1"><a class="reference internal" href="progressive_distillation.html">Progressive Distillation for Fast Sampling of Diffusion Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="ConceptLab.html">ConceptLab</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Diffusion_models_already_have_a_Semantic_Latent_Space.html">Diffusion Models already have a Semantic Latent Space</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Muse.html">Muse</a></li>
-
-
-<li class="toctree-l1"><a class="reference internal" href="GIGAGAN.html">Scaling up GANs for Text-to-Image Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="consistency_models.html">Consistency Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="latent_consistency_models.html">Latent Consistency Models</a></li>
-
-<li class="toctree-l1"><a class="reference internal" href="LLM_grounded_Diffusion.html">LLM Grounded Diffusion</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DiT.html">DiT</a></li>
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="one-step-image-translation.html">One-Step Image Translation with Text-to-Image Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="LCM-LoRA.html">LCM-LoRA: A Universal Stable-Diffusion Acceleration Module</a></li>
-
-
-<li class="toctree-l1"><a class="reference internal" href="MimicBrush.html">MimicBrush: Zero-shot Image Editing with Reference Imitation</a></li>
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Video Generation</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="Make_A_Video.html">Make A Video</a></li>
-<li class="toctree-l1"><a class="reference internal" href="VideoLDM.html">VideoLDM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="AnimateDiff.html">AnimateDiff</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Animate_Anyone.html">Animate Anyone</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreaMoving.html">DreaMoving</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreamPose.html">DreamPose: Fashion Image-to-Video Synthesis via Stable Diffusion</a></li>
-
-
-
-
-
-
-
-
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">3D Generation</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="NeRF.html">NeRF : Representing Scenes as Neural Radiance Fields for View Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="3DGS.html">3D Gaussian Splatting for Real-Time Radiance Field Rendering</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Point_E.html">Point-E: A System for Generating 3D Point Clouds from Complex Prompts (Arxiv 2022)</a></li>
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="Shap-E.html">Shap-E</a></li>
-
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="DreamFusion.html"><strong>DreamFusion</strong></a></li>
-<li class="toctree-l1"><a class="reference internal" href="magic-3d.html">Magic3D</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreamBooth3D.html">Dream Booth 3D</a></li>
-
-
-
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Experiments</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="../experiments/js_exp.html">Synthetic Data with Stable Diffusion for Foliar Disease Classification</a></li>
-<li class="toctree-l1"><a class="reference internal" href="../experiments/swjo_exp.html">Training DreamBooth on Naver Webtoon Face Dataset</a></li>
-</ul>
-
-    </div>
-</nav></div>
-    </div>
-  
-  
-  <div class="sidebar-primary-items__end sidebar-primary__section">
-  </div>
-  
-  <div id="rtd-footer-container"></div>
-
-
-      </div>
-      
-      <main id="main-content" class="bd-main" role="main">
-        
-        
-
-<div class="sbt-scroll-pixel-helper"></div>
-
-          <div class="bd-content">
-            <div class="bd-article-container">
-              
-              <div class="bd-header-article d-print-none">
-<div class="header-article-items header-article__inner">
-  
-    <div class="header-article-items__start">
-      
-        <div class="header-article-item"><label class="sidebar-toggle primary-toggle btn btn-sm" for="__primary" title="Toggle primary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
-  <span class="fa-solid fa-bars"></span>
-</label></div>
-      
-    </div>
-  
-  
-    <div class="header-article-items__end">
-      
-        <div class="header-article-item">
-
-<div class="article-header-buttons">
-
-
-
-
-
-<div class="dropdown dropdown-source-buttons">
-  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Source repositories">
-    <i class="fab fa-github"></i>
-  </button>
-  <ul class="dropdown-menu">
-      
-      
-      
-      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion" target="_blank"
-   class="btn btn-sm btn-source-repository-button dropdown-item"
-   title="Source repository"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fab fa-github"></i>
-  </span>
-<span class="btn__text-container">Repository</span>
-</a>
-</li>
-      
-      
-      
-      
-      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion/issues/new?title=Issue%20on%20page%20%2Fdocs/review/SDEdit.html&body=Your%20issue%20content%20here." target="_blank"
-   class="btn btn-sm btn-source-issues-button dropdown-item"
-   title="Open an issue"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-lightbulb"></i>
-  </span>
-<span class="btn__text-container">Open issue</span>
-</a>
-</li>
-      
-  </ul>
-</div>
-
-
-
-
-
-
-<div class="dropdown dropdown-download-buttons">
-  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Download this page">
-    <i class="fas fa-download"></i>
-  </button>
-  <ul class="dropdown-menu">
-      
-      
-      
-      <li><a href="../../_sources/docs/review/SDEdit.md" target="_blank"
-   class="btn btn-sm btn-download-source-button dropdown-item"
-   title="Download source file"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-file"></i>
-  </span>
-<span class="btn__text-container">.md</span>
-</a>
-</li>
-      
-      
-      
-      
-      <li>
-<button onclick="window.print()"
-  class="btn btn-sm btn-download-pdf-button dropdown-item"
-  title="Print to PDF"
-  data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-file-pdf"></i>
-  </span>
-<span class="btn__text-container">.pdf</span>
-</button>
-</li>
-      
-  </ul>
-</div>
-
-
-
-
-<button onclick="toggleFullScreen()"
-  class="btn btn-sm btn-fullscreen-button"
-  title="Fullscreen mode"
-  data-bs-placement="bottom" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-expand"></i>
-  </span>
-
-</button>
-
-
-
-<script>
-document.write(`
-  <button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light"></i>
-    <i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark"></i>
-    <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"></i>
-  </button>
-`);
-</script>
-
-
-<script>
-document.write(`
-  <button class="btn btn-sm pst-navbar-icon search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <i class="fa-solid fa-magnifying-glass fa-lg"></i>
-  </button>
-`);
-</script>
-<label class="sidebar-toggle secondary-toggle btn btn-sm" for="__secondary"title="Toggle secondary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <span class="fa-solid fa-list"></span>
-</label>
-</div></div>
-      
-    </div>
-  
-</div>
-</div>
-              
-              
-
-<div id="jb-print-docs-body" class="onlyprint">
-    <h1>SDEdit</h1>
-    <!-- Table of contents -->
-    <div id="print-main-content">
-        <div id="jb-print-toc">
-            
-            <div>
-                <h2> Contents </h2>
-            </div>
-            <nav aria-label="Page">
-                <ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#abstract">Abstract</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">1. Introduction</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#related-works">2. Related Works</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#score-based-generated-model">2.1. Score Based Generated Model</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#score-based-generated-diffusion-model-sde-smld">2.2. Score Based Generated Diffusion Model (SDE, SMLD)</a></li>
-</ul>
-</li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#methods">3. Methods</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#experiments">4. Experiments</a></li>
-</ul>
-            </nav>
-        </div>
-    </div>
-</div>
-
-              
-                
-<div id="searchbox"></div>
-                <article class="bd-article">
-                  
-  <div class="admonition-information admonition">
-<p class="admonition-title">Information</p>
-<ul class="simple">
-<li><p><strong>Title:</strong> SDEdit: Guided Image Synthesis and Editing with Stochastic Differential Equations</p></li>
-<li><p><strong>Reference</strong></p>
-<ul>
-<li><p>Paper: <a class="reference external" href="https://arxiv.org/pdf/2108.01073.pdf">https://arxiv.org/pdf/2108.01073.pdf</a></p></li>
-</ul>
-</li>
-<li><p><strong>Author:</strong> Seunghwan Ji</p></li>
-<li><p><strong>Last updated on Oct. 03, 2023</strong></p></li>
-</ul>
-</div>
-<section class="tex2jax_ignore mathjax_ignore" id="sdedit">
-<h1>SDEdit<a class="headerlink" href="#sdedit" title="Permalink to this heading">#</a></h1>
-<section id="abstract">
-<h2>Abstract<a class="headerlink" href="#abstract" title="Permalink to this heading">#</a></h2>
-<ul class="simple">
-<li><p>최근 이미지 생성 분야에서의 놀라운 진화 속도가 계속 되어오고있다. (GAN, Diffusion etc..)</p></li>
-<li><p>이 중 이미지에 random noise를 추가해 denoising 과정을 학습하는 Diffusion을 통해 high quality의 이미지를 생성할 수 있다.</p></li>
-<li><p>또, 생성되는 이미지를 사용자가 원하는 방향으로 이끌어내려는 연구 분야도 활발히 진행되고있다 (a.k.a Editing)</p></li>
-<li><p>하지만, GAN 또는 Diffusion을 포함한 방식으로의 Editing에는 몇가지 단점이 있고, SDEdit은 그런 문제점을 해결해나아갔다는 점을 논문의 핵심 Contribution으로 제시하였다.</p></li>
-</ul>
-</section>
-<section id="introduction">
-<h2>1. Introduction<a class="headerlink" href="#introduction" title="Permalink to this heading">#</a></h2>
-<ul class="simple">
-<li><p>Abstract에서 말한 Editing이란, 유저가 생성하고자 하는 Guide를 제시하면 모델은 해당 Guide를 기반으로 이미지를 생성해내는 Vision Task를 의미한다.</p></li>
-<li><p>이때 두가지의 평가요소가 있는데</p>
-<ol class="arabic simple">
-<li><p>faithful : 유저의 Guide를 얼마나 잘 따르는지</p></li>
-<li><p>realistic : 생성된 이미지가 얼마나 real한지</p></li>
-</ol>
-</li>
-<li><p>기존의 연구방식은 크게 두가지로 나뉜다.</p>
-<ol class="arabic simple">
-<li><p>GAN(Generative Adversarial Network) 기반</p></li>
-<li><p>Diffusion 기반</p></li>
-</ol>
-</li>
-<li><p>이 중 기존에 SOTA를 이룬 GAN 방식을 살펴보면 다시 크게 두가지로 나뉜다.</p>
-<ol class="arabic simple">
-<li><p>conditional GAN</p>
-<ul>
-<li><p>특징 : 원본 이미지에서 Edit된 Pair 이미지를 직접 학습</p></li>
-<li><p>단점 : Pair Dataset이 반드시 필요하고, Condition마다 재학습을 요구</p></li>
-</ul>
-</li>
-<li><p>GAN Inversion</p>
-<ul>
-<li><p>특징 : 이미지를 Latent space로 Inversion한 후, Latent vactor를 조작해(manipulate) Edited image를 생성</p></li>
-<li><p>단점 : 새로운 loss function이 정의되어야하고, condition마다 재학습을 요구</p></li>
-</ul>
-</li>
-</ol>
-</li>
-<li><p>그에 반해 SDEdit은</p>
-<ol class="arabic simple">
-<li><p>Pair Dataset이 필요하지 않다.</p></li>
-<li><p>추가적인 loss function과 재학습이 모두 필요하지 않다.</p></li>
-<li><p>단 한개의 pretrained weight로 모든 condition의 이미지를 생성할 수 있다.</p></li>
-</ol>
-</li>
-</ul>
-</section>
-<section id="related-works">
-<h2>2. Related Works<a class="headerlink" href="#related-works" title="Permalink to this heading">#</a></h2>
-<section id="score-based-generated-model">
-<h3>2.1. Score Based Generated Model<a class="headerlink" href="#score-based-generated-model" title="Permalink to this heading">#</a></h3>
-<figure class="align-default" id="id1">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img04.png"><img alt="SDEdit_00" class="bg-primary mb-1" src="../../_images/img04.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 229 </span><span class="caption-text">Image 1</span><a class="headerlink" href="#id1" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p>Key Idea</p>
-<ul>
-<li><p><em>“Real 이미지들은 실제 데이터 확률 분포에서 높은 값을 유지할 것이다. 따라서, 이미지를 분포가 높은곳으로 update 해나가면 좋은 퀄리티의 이미지를 생성하는 모델을 얻어낼 수 있다.”</em></p></li>
-</ul>
-</li>
-<li><p>이 때, score는 확률 밀도 함수의 순간 기울기(미분값)로 정의한다.</p></li>
-</ul>
-</section>
-<section id="score-based-generated-diffusion-model-sde-smld">
-<h3>2.2. Score Based Generated Diffusion Model (SDE, SMLD)<a class="headerlink" href="#score-based-generated-diffusion-model-sde-smld" title="Permalink to this heading">#</a></h3>
-<figure class="align-default" id="id2">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img110.png"><img alt="SDEdit_01" class="bg-primary mb-1" src="../../_images/img110.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 230 </span><span class="caption-text">Image 2</span><a class="headerlink" href="#id2" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p>위에서 제시한 Score Based Generated Model에 Diffusion 방식을 적용한 모델</p></li>
-<li><p>Forward Process 과정에서 이미지에 noise를 주입하는데, 이 때 Stochastic Differential Equation 수식을 이용해 noise를 주입한다.</p></li>
-<li><p>또다른 Diffusion 모델인 (Probability based) DDPM과의 차이는 Forward, Reverse process에서 정의하는 equation의 차이 정도이다.</p></li>
-<li><p>paper : <a class="reference external" href="https://arxiv.org/abs/1907.05600">https://arxiv.org/abs/1907.05600</a></p></li>
-</ul>
-</section>
-</section>
-<section id="methods">
-<h2>3. Methods<a class="headerlink" href="#methods" title="Permalink to this heading">#</a></h2>
-<ol class="arabic">
-<li><p>Pre-Setup</p>
-<ul>
-<li><p>Guide image의 Level을 정의한다.</p>
-<figure class="align-default" id="id3">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img25.png"><img alt="SDEdit_02" class="bg-primary mb-1" src="../../_images/img25.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 231 </span><span class="caption-text">Image 3</span><a class="headerlink" href="#id3" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ol class="arabic simple">
-<li><p>low-level guide : real 이미지위에 image patch를 추가</p></li>
-<li><p>mid-level guide : real 이미지위에 stroke를 추가</p></li>
-<li><p>high-level guide : 단순히 coarse한 stroke의 이미지</p></li>
-</ol>
-</li>
-</ul>
-</li>
-<li><p>Procedure</p>
-<ul>
-<li><p>DDPM과 달리 SDE의 경우, 완전히 noise화된 이미지 즉, random noise로부터 denoising을 진행할 필요가 없다.</p></li>
-<li><p>즉, 적절한 <span class="math notranslate nohighlight">\(t_{0} \in [0,1]\)</span>를 지정한 후 denoising process가 가능하다.</p>
-<figure class="align-default" id="id4">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img35.png"><img alt="SDEdit_03" class="bg-primary mb-1" src="../../_images/img35.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 232 </span><span class="caption-text">Image 4</span><a class="headerlink" href="#id4" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</li>
-<li><p>이 때, 적절한 <span class="math notranslate nohighlight">\(t_{0}\)</span>를 정의해야하는데,</p>
-<ol class="arabic simple">
-<li><p><span class="math notranslate nohighlight">\(t_{0}\)</span> = 1 (i.e. random noise)이면, realistic하지만, faithful 하지않은 이미지</p></li>
-<li><p><span class="math notranslate nohighlight">\(t_{0}\)</span> = 0 이면, faithful하지만, artistic한 이미지</p></li>
-</ol>
-<p>를 얻게된다.</p>
-<figure class="align-default" id="id5">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img44.png"><img alt="SDEdit_04" class="bg-primary mb-1" src="../../_images/img44.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 233 </span><span class="caption-text">Image 5</span><a class="headerlink" href="#id5" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</li>
-<li><p>아래는 SDEdit의 적용 과정이다.</p>
-<figure class="align-default" id="id6">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img54.png"><img alt="SDEdit_05" class="bg-primary mb-1" src="../../_images/img54.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 234 </span><span class="caption-text">Image 6</span><a class="headerlink" href="#id6" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</li>
-</ul>
-</li>
-</ol>
-</section>
-<section id="experiments">
-<h2>4. Experiments<a class="headerlink" href="#experiments" title="Permalink to this heading">#</a></h2>
-<ul>
-<li><p>Score</p>
-<ul>
-<li><p>Metric</p>
-<ul>
-<li><p>realistic : Kid score (lower is better)</p></li>
-<li><p>faithful : <span class="math notranslate nohighlight">\(L_{2}\)</span> score (lower is better)</p></li>
-<li><p>그 외 종합적인 평가 지표로 survey를 통한 수치를 제시하였다.</p>
-<figure class="align-default" id="id7">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img64.png"><img alt="SDEdit_06" class="bg-primary mb-1" src="../../_images/img64.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 235 </span><span class="caption-text">Image 7</span><a class="headerlink" href="#id7" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</li>
-</ul>
-</li>
-<li><p>기존의 GAN 방식들과 비교했을 때 Kid, <span class="math notranslate nohighlight">\(L_{2}\)</span> score 모두 더 좋은 수치를 보이는 것을 확인할 수 있다.</p></li>
-</ul>
-</li>
-<li><p>Comparison with GAN (styleGAN-ADA + Inversion)</p>
-<figure class="align-default" id="id8">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img74.png"><img alt="SDEdit_07" class="bg-primary mb-1" src="../../_images/img74.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 236 </span><span class="caption-text">Image 8</span><a class="headerlink" href="#id8" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p>SDEdit이 GAN Based model보다 더 자연스럽고(realistic), 유저의 guide를 잘 따르는(faithful)것을 확인할 수 있다.</p></li>
-</ul>
-</li>
-<li><p>Comparison with original blending technique</p>
-<figure class="align-default" id="id9">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img85.png"><img alt="SDEdit_08" class="bg-primary mb-1" src="../../_images/img85.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 237 </span><span class="caption-text">Image 9</span><a class="headerlink" href="#id9" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<figure class="align-default" id="id10">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img94.png"><img alt="SDEdit_09" class="bg-primary mb-1" src="../../_images/img94.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 238 </span><span class="caption-text">Image 10</span><a class="headerlink" href="#id10" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p>기존의 전통적인 방식의 몇가지 blending 기법과 비교해도 더 좋은 성능과 수치를 보이는 것을 확인할 수 있다.</p></li>
-</ul>
-</li>
-</ul>
-</section>
-</section>
-
-    <script type="text/x-thebe-config">
-    {
-        requestKernel: true,
-        binderOptions: {
-            repo: "binder-examples/jupyter-stacks-datascience",
-            ref: "master",
-        },
-        codeMirrorConfig: {
-            theme: "abcdef",
-            mode: "python"
-        },
-        kernelOptions: {
-            name: "python3",
-            path: "./docs/review"
-        },
-        predefinedOutput: true
-    }
-    </script>
-    <script>kernelName = 'python3'</script>
-
-                </article>
-              
-
-              
-              
-              
-              
-                <footer class="prev-next-footer d-print-none">
-                  
-<div class="prev-next-area">
-    <a class="left-prev"
-       href="imagen_editor.html"
-       title="previous page">
-      <i class="fa-solid fa-angle-left"></i>
-      <div class="prev-next-info">
-        <p class="prev-next-subtitle">previous</p>
-        <p class="prev-next-title">Imagen Editor</p>
-      </div>
-    </a>
-    <a class="right-next"
-       href="SDXL.html"
-       title="next page">
-      <div class="prev-next-info">
-        <p class="prev-next-subtitle">next</p>
-        <p class="prev-next-title">SDXL</p>
-      </div>
-      <i class="fa-solid fa-angle-right"></i>
-    </a>
-</div>
-                </footer>
-              
-            </div>
-            
-            
-              
-                <div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
-
-
-  <div class="sidebar-secondary-item">
-  <div class="page-toc tocsection onthispage">
-    <i class="fa-solid fa-list"></i> Contents
-  </div>
-  <nav class="bd-toc-nav page-toc">
-    <ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#abstract">Abstract</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">1. Introduction</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#related-works">2. Related Works</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#score-based-generated-model">2.1. Score Based Generated Model</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#score-based-generated-diffusion-model-sde-smld">2.2. Score Based Generated Diffusion Model (SDE, SMLD)</a></li>
-</ul>
-</li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#methods">3. Methods</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#experiments">4. Experiments</a></li>
-</ul>
-  </nav></div>
-
-</div></div>
-              
-            
-          </div>
-          <footer class="bd-footer-content">
-            
-<div class="bd-footer-content__inner container">
-  
-  <div class="footer-item">
-    
-<p class="component-author">
-By PseudoLab
-</p>
-
-  </div>
-  
-  <div class="footer-item">
-    
-
-  <p class="copyright">
-    
-      © Copyright 2022.
-      <br/>
-    
-  </p>
-
-  </div>
-  
-  <div class="footer-item">
-    
-  </div>
-  
-  <div class="footer-item">
-    
-  </div>
-  
-</div>
-          </footer>
-        
-
-      </main>
-    </div>
-  </div>
-  
-  <!-- Scripts loaded after <body> so the DOM is not blocked -->
-  <script src="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b"></script>
-<script src="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b"></script>
-
-  <footer class="bd-footer">
-  </footer>
-  </body>
+
+
+<!DOCTYPE html>
+
+
+<html lang="en" data-content_root="" >
+
+  <head>
+    <meta charset="utf-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.17.1: http://docutils.sourceforge.net/" />
+
+    <title>SDEdit &#8212; Text-to-Image Generation-feat-Diffusion</title>
+  
+  
+  
+  <script data-cfasync="false">
+    document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
+    document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
+  </script>
+  
+  <!-- Loaded before other Sphinx assets -->
+  <link href="../../_static/styles/theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+<link href="../../_static/styles/bootstrap.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+<link href="../../_static/styles/pydata-sphinx-theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+
+  
+  <link href="../../_static/vendor/fontawesome/6.5.2/css/all.min.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+  <link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.woff2" />
+<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.woff2" />
+<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.woff2" />
+
+    <link rel="stylesheet" type="text/css" href="../../_static/pygments.css" />
+    <link rel="stylesheet" href="../../_static/styles/sphinx-book-theme.css?digest=14f4ca6b54d191a8c7657f6c759bf11a5fb86285" type="text/css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/togglebutton.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/mystnb.4510f1fc1dee50b3e5859aac5469c37c29e427902b24a333a5f9fcb2f0b3ac41.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/sphinx-thebe.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/design-style.4045f2051d55cab465a707391d5b2007.min.css" />
+  
+  <!-- Pre-loaded scripts that we'll load fully later -->
+  <link rel="preload" as="script" href="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b" />
+<link rel="preload" as="script" href="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b" />
+  <script src="../../_static/vendor/fontawesome/6.5.2/js/all.min.js?digest=dfe6caa3a7d634c4db9b"></script>
+
+    <script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
+    <script src="../../_static/jquery.js"></script>
+    <script src="../../_static/underscore.js"></script>
+    <script src="../../_static/_sphinx_javascript_frameworks_compat.js"></script>
+    <script src="../../_static/doctools.js"></script>
+    <script src="../../_static/clipboard.min.js"></script>
+    <script src="../../_static/copybutton.js"></script>
+    <script src="../../_static/scripts/sphinx-book-theme.js?digest=5a5c038af52cf7bc1a1ec88eea08e6366ee68824"></script>
+    <script>let toggleHintShow = 'Click to show';</script>
+    <script>let toggleHintHide = 'Click to hide';</script>
+    <script>let toggleOpenOnPrint = 'true';</script>
+    <script src="../../_static/togglebutton.js"></script>
+    <script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
+    <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
+    <script src="../../_static/design-tabs.js"></script>
+    <script>const THEBE_JS_URL = "https://unpkg.com/thebe@0.8.2/lib/index.js"
+const thebe_selector = ".thebe,.cell"
+const thebe_selector_input = "pre"
+const thebe_selector_output = ".output, .cell_output"
+</script>
+    <script async="async" src="../../_static/sphinx-thebe.js"></script>
+    <script>window.MathJax = {"options": {"processHtmlClass": "tex2jax_process|mathjax_process|math|output_area"}}</script>
+    <script defer="defer" src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
+    <script>DOCUMENTATION_OPTIONS.pagename = 'docs/review/SDEdit';</script>
+    <link rel="shortcut icon" href="../../_static/PseudoLab_logo.png"/>
+    <link rel="index" title="Index" href="../../genindex.html" />
+    <link rel="search" title="Search" href="../../search.html" />
+    <link rel="next" title="SDXL" href="SDXL.html" />
+    <link rel="prev" title="Imagen Editor" href="imagen_editor.html" />
+  <meta name="viewport" content="width=device-width, initial-scale=1"/>
+  <meta name="docsearch:language" content="en"/>
+  </head>
+  
+  
+  <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
+
+  
+  
+  <div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
+  
+  <div id="pst-scroll-pixel-helper"></div>
+  
+  <button type="button" class="btn rounded-pill" id="pst-back-to-top">
+    <i class="fa-solid fa-arrow-up"></i>Back to top</button>
+
+  
+  <input type="checkbox"
+          class="sidebar-toggle"
+          id="pst-primary-sidebar-checkbox"/>
+  <label class="overlay overlay-primary" for="pst-primary-sidebar-checkbox"></label>
+  
+  <input type="checkbox"
+          class="sidebar-toggle"
+          id="pst-secondary-sidebar-checkbox"/>
+  <label class="overlay overlay-secondary" for="pst-secondary-sidebar-checkbox"></label>
+  
+  <div class="search-button__wrapper">
+    <div class="search-button__overlay"></div>
+    <div class="search-button__search-container">
+<form class="bd-search d-flex align-items-center"
+      action="../../search.html"
+      method="get">
+  <i class="fa-solid fa-magnifying-glass"></i>
+  <input type="search"
+         class="form-control"
+         name="q"
+         id="search-input"
+         placeholder="Search this book..."
+         aria-label="Search this book..."
+         autocomplete="off"
+         autocorrect="off"
+         autocapitalize="off"
+         spellcheck="false"/>
+  <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
+</form></div>
+  </div>
+
+  <div class="pst-async-banner-revealer d-none">
+  <aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
+</div>
+
+  
+    <header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
+    </header>
+  
+
+  <div class="bd-container">
+    <div class="bd-container__inner bd-page-width">
+      
+      
+      
+      <div class="bd-sidebar-primary bd-sidebar">
+        
+
+  
+  <div class="sidebar-header-items sidebar-primary__section">
+    
+    
+    
+    
+  </div>
+  
+    <div class="sidebar-primary-items__start sidebar-primary__section">
+        <div class="sidebar-primary-item">
+
+  
+    
+  
+
+<a class="navbar-brand logo" href="../../intro.html">
+  
+  
+  
+  
+  
+    
+    
+      
+    
+    
+    <img src="../../_static/PseudoLab_logo.png" class="logo__image only-light" alt="Text-to-Image Generation-feat-Diffusion - Home"/>
+    <script>document.write(`<img src="../../_static/PseudoLab_logo.png" class="logo__image only-dark" alt="Text-to-Image Generation-feat-Diffusion - Home"/>`);</script>
+  
+  
+</a></div>
+        <div class="sidebar-primary-item"><nav class="bd-links" id="bd-docs-nav" aria-label="Main">
+    <div class="bd-toc-item navbar-nav active">
+        
+        <ul class="nav bd-sidenav bd-sidenav__home-link">
+            <li class="toctree-l1">
+                <a class="reference internal" href="../../intro.html">
+                    Welcome to PseudoDiffusers!!
+                </a>
+            </li>
+        </ul>
+        <p aria-level="2" class="caption" role="heading"><span class="caption-text">Preliminary Works</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="vae.html">VAE</a></li>
+<li class="toctree-l1"><a class="reference internal" href="gan.html">GAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DDPM.html">DDPM</a></li>
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DDIM.html">DDIM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="A_Study_on_the_Evaluation_of_Generative_Models.html">A Study on the Evaluation of Generative Models</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Image Generation</span></p>
+<ul class="current nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="cycleGAN.html">CycleGAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="StyleGAN.html">StyleGAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="diffusion_beats_GANs.html">Diffusion Models Beat GANs on Image Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="dalle.html">DALL-E</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DALLE2.html">DALL-E 2</a></li>
+<li class="toctree-l1"><a class="reference internal" href="dreambooth.html">DreamBooth</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ControlNet.html">ControlNet</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Latent_Diffusion_Model.html">Introduction</a></li>
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Textual_Inversion.html">Textual Inversion</a></li>
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="CustomDiffusion.html">Custom Diffusion</a></li>
+<li class="toctree-l1"><a class="reference internal" href="LoRA.html">LoRA</a></li>
+
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="I-DDPM.html">I-DDPM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="StyO.html">StyO</a></li>
+<li class="toctree-l1"><a class="reference internal" href="imagen.html">Imagen</a></li>
+<li class="toctree-l1"><a class="reference internal" href="imagen_editor.html">Imagen Editor</a></li>
+<li class="toctree-l1 current active"><a class="current reference internal" href="#">SDEdit</a></li>
+<li class="toctree-l1"><a class="reference internal" href="SDXL.html">SDXL</a></li>
+<li class="toctree-l1"><a class="reference internal" href="t2i_adapter.html">T2I-Adapter</a></li>
+<li class="toctree-l1"><a class="reference internal" href="IP_Adapter.html">IP-Adapter</a></li>
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="HyperDreamBooth.html">HyperDreamBooth</a></li>
+<li class="toctree-l1"><a class="reference internal" href="CM3leon.html">CM3leon</a></li>
+
+<li class="toctree-l1"><a class="reference internal" href="Synthetic_Data_from_Diffusion_Models_Improves_ImageNet_Classification.html">Synthetic Data from Diffusion Models Improves ImageNet Classification</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="GLIDE.html">GLIDE</a></li>
+<li class="toctree-l1"><a class="reference internal" href="BBDM.html">BBDM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Your_Diffusion_Model_is_Secretly_a_Zero_Shot_Classifier.html">Your Diffusion Model is Secretly a Zero-Shot Classifier</a></li>
+<li class="toctree-l1"><a class="reference internal" href="progressive_distillation.html">Progressive Distillation for Fast Sampling of Diffusion Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ConceptLab.html">ConceptLab</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Diffusion_models_already_have_a_Semantic_Latent_Space.html">Diffusion Models already have a Semantic Latent Space</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Muse.html">Muse</a></li>
+
+
+<li class="toctree-l1"><a class="reference internal" href="GIGAGAN.html">Scaling up GANs for Text-to-Image Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="consistency_models.html">Consistency Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="latent_consistency_models.html">Latent Consistency Models</a></li>
+
+<li class="toctree-l1"><a class="reference internal" href="LLM_grounded_Diffusion.html">LLM Grounded Diffusion</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DiT.html">DiT</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="one-step-image-translation.html">One-Step Image Translation with Text-to-Image Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="LCM-LoRA.html">LCM-LoRA: A Universal Stable-Diffusion Acceleration Module</a></li>
+
+
+<li class="toctree-l1"><a class="reference internal" href="MimicBrush.html">MimicBrush: Zero-shot Image Editing with Reference Imitation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="one_step_diffusion_with_distribution_matching_distillation.html">One-step Diffusion with Distribution Matching Distillation</a></li>
+
+
+
+
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Video Generation</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="Make_A_Video.html">Make A Video</a></li>
+<li class="toctree-l1"><a class="reference internal" href="VideoLDM.html">VideoLDM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="AnimateDiff.html">AnimateDiff</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Animate_Anyone.html">Animate Anyone</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreaMoving.html">DreaMoving</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreamPose.html">DreamPose: Fashion Image-to-Video Synthesis via Stable Diffusion</a></li>
+
+
+
+
+
+
+
+
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">3D Generation</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="NeRF.html">NeRF : Representing Scenes as Neural Radiance Fields for View Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="3DGS.html">3D Gaussian Splatting for Real-Time Radiance Field Rendering</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Point_E.html">Point-E: A System for Generating 3D Point Clouds from Complex Prompts (Arxiv 2022)</a></li>
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Shap-E.html">Shap-E</a></li>
+
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DreamFusion.html"><strong>DreamFusion</strong></a></li>
+<li class="toctree-l1"><a class="reference internal" href="magic-3d.html">Magic3D</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreamBooth3D.html">Dream Booth 3D</a></li>
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="zero123plus.html">Zero123++</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ProlificDreamer.html">ProlificDreamer: High-Fidelity and Diverse Text-to-3D Generation with Variational Score Distillation</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DreamGaussian.html">DreamGaussian</a></li>
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Coin3D.html">Coin3D</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Experiments</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="../experiments/js_exp.html">Synthetic Data with Stable Diffusion for Foliar Disease Classification</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../experiments/swjo_exp.html">Training DreamBooth on Naver Webtoon Face Dataset</a></li>
+</ul>
+
+    </div>
+</nav></div>
+    </div>
+  
+  
+  <div class="sidebar-primary-items__end sidebar-primary__section">
+  </div>
+  
+  <div id="rtd-footer-container"></div>
+
+
+      </div>
+      
+      <main id="main-content" class="bd-main" role="main">
+        
+        
+
+<div class="sbt-scroll-pixel-helper"></div>
+
+          <div class="bd-content">
+            <div class="bd-article-container">
+              
+              <div class="bd-header-article d-print-none">
+<div class="header-article-items header-article__inner">
+  
+    <div class="header-article-items__start">
+      
+        <div class="header-article-item"><label class="sidebar-toggle primary-toggle btn btn-sm" for="__primary" title="Toggle primary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
+  <span class="fa-solid fa-bars"></span>
+</label></div>
+      
+    </div>
+  
+  
+    <div class="header-article-items__end">
+      
+        <div class="header-article-item">
+
+<div class="article-header-buttons">
+
+
+
+
+
+<div class="dropdown dropdown-source-buttons">
+  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Source repositories">
+    <i class="fab fa-github"></i>
+  </button>
+  <ul class="dropdown-menu">
+      
+      
+      
+      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion" target="_blank"
+   class="btn btn-sm btn-source-repository-button dropdown-item"
+   title="Source repository"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fab fa-github"></i>
+  </span>
+<span class="btn__text-container">Repository</span>
+</a>
+</li>
+      
+      
+      
+      
+      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion/issues/new?title=Issue%20on%20page%20%2Fdocs/review/SDEdit.html&body=Your%20issue%20content%20here." target="_blank"
+   class="btn btn-sm btn-source-issues-button dropdown-item"
+   title="Open an issue"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-lightbulb"></i>
+  </span>
+<span class="btn__text-container">Open issue</span>
+</a>
+</li>
+      
+  </ul>
+</div>
+
+
+
+
+
+
+<div class="dropdown dropdown-download-buttons">
+  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Download this page">
+    <i class="fas fa-download"></i>
+  </button>
+  <ul class="dropdown-menu">
+      
+      
+      
+      <li><a href="../../_sources/docs/review/SDEdit.md" target="_blank"
+   class="btn btn-sm btn-download-source-button dropdown-item"
+   title="Download source file"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-file"></i>
+  </span>
+<span class="btn__text-container">.md</span>
+</a>
+</li>
+      
+      
+      
+      
+      <li>
+<button onclick="window.print()"
+  class="btn btn-sm btn-download-pdf-button dropdown-item"
+  title="Print to PDF"
+  data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-file-pdf"></i>
+  </span>
+<span class="btn__text-container">.pdf</span>
+</button>
+</li>
+      
+  </ul>
+</div>
+
+
+
+
+<button onclick="toggleFullScreen()"
+  class="btn btn-sm btn-fullscreen-button"
+  title="Fullscreen mode"
+  data-bs-placement="bottom" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-expand"></i>
+  </span>
+
+</button>
+
+
+
+<script>
+document.write(`
+  <button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light"></i>
+    <i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark"></i>
+    <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"></i>
+  </button>
+`);
+</script>
+
+
+<script>
+document.write(`
+  <button class="btn btn-sm pst-navbar-icon search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <i class="fa-solid fa-magnifying-glass fa-lg"></i>
+  </button>
+`);
+</script>
+<label class="sidebar-toggle secondary-toggle btn btn-sm" for="__secondary"title="Toggle secondary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <span class="fa-solid fa-list"></span>
+</label>
+</div></div>
+      
+    </div>
+  
+</div>
+</div>
+              
+              
+
+<div id="jb-print-docs-body" class="onlyprint">
+    <h1>SDEdit</h1>
+    <!-- Table of contents -->
+    <div id="print-main-content">
+        <div id="jb-print-toc">
+            
+            <div>
+                <h2> Contents </h2>
+            </div>
+            <nav aria-label="Page">
+                <ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#abstract">Abstract</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">1. Introduction</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#related-works">2. Related Works</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#score-based-generated-model">2.1. Score Based Generated Model</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#score-based-generated-diffusion-model-sde-smld">2.2. Score Based Generated Diffusion Model (SDE, SMLD)</a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#methods">3. Methods</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#experiments">4. Experiments</a></li>
+</ul>
+            </nav>
+        </div>
+    </div>
+</div>
+
+              
+                
+<div id="searchbox"></div>
+                <article class="bd-article">
+                  
+  <div class="admonition-information admonition">
+<p class="admonition-title">Information</p>
+<ul class="simple">
+<li><p><strong>Title:</strong> SDEdit: Guided Image Synthesis and Editing with Stochastic Differential Equations</p></li>
+<li><p><strong>Reference</strong></p>
+<ul>
+<li><p>Paper: <a class="reference external" href="https://arxiv.org/pdf/2108.01073.pdf">https://arxiv.org/pdf/2108.01073.pdf</a></p></li>
+</ul>
+</li>
+<li><p><strong>Author:</strong> Seunghwan Ji</p></li>
+<li><p><strong>Last updated on Oct. 03, 2023</strong></p></li>
+</ul>
+</div>
+<section class="tex2jax_ignore mathjax_ignore" id="sdedit">
+<h1>SDEdit<a class="headerlink" href="#sdedit" title="Permalink to this heading">#</a></h1>
+<section id="abstract">
+<h2>Abstract<a class="headerlink" href="#abstract" title="Permalink to this heading">#</a></h2>
+<ul class="simple">
+<li><p>최근 이미지 생성 분야에서의 놀라운 진화 속도가 계속 되어오고있다. (GAN, Diffusion etc..)</p></li>
+<li><p>이 중 이미지에 random noise를 추가해 denoising 과정을 학습하는 Diffusion을 통해 high quality의 이미지를 생성할 수 있다.</p></li>
+<li><p>또, 생성되는 이미지를 사용자가 원하는 방향으로 이끌어내려는 연구 분야도 활발히 진행되고있다 (a.k.a Editing)</p></li>
+<li><p>하지만, GAN 또는 Diffusion을 포함한 방식으로의 Editing에는 몇가지 단점이 있고, SDEdit은 그런 문제점을 해결해나아갔다는 점을 논문의 핵심 Contribution으로 제시하였다.</p></li>
+</ul>
+</section>
+<section id="introduction">
+<h2>1. Introduction<a class="headerlink" href="#introduction" title="Permalink to this heading">#</a></h2>
+<ul class="simple">
+<li><p>Abstract에서 말한 Editing이란, 유저가 생성하고자 하는 Guide를 제시하면 모델은 해당 Guide를 기반으로 이미지를 생성해내는 Vision Task를 의미한다.</p></li>
+<li><p>이때 두가지의 평가요소가 있는데</p>
+<ol class="arabic simple">
+<li><p>faithful : 유저의 Guide를 얼마나 잘 따르는지</p></li>
+<li><p>realistic : 생성된 이미지가 얼마나 real한지</p></li>
+</ol>
+</li>
+<li><p>기존의 연구방식은 크게 두가지로 나뉜다.</p>
+<ol class="arabic simple">
+<li><p>GAN(Generative Adversarial Network) 기반</p></li>
+<li><p>Diffusion 기반</p></li>
+</ol>
+</li>
+<li><p>이 중 기존에 SOTA를 이룬 GAN 방식을 살펴보면 다시 크게 두가지로 나뉜다.</p>
+<ol class="arabic simple">
+<li><p>conditional GAN</p>
+<ul>
+<li><p>특징 : 원본 이미지에서 Edit된 Pair 이미지를 직접 학습</p></li>
+<li><p>단점 : Pair Dataset이 반드시 필요하고, Condition마다 재학습을 요구</p></li>
+</ul>
+</li>
+<li><p>GAN Inversion</p>
+<ul>
+<li><p>특징 : 이미지를 Latent space로 Inversion한 후, Latent vactor를 조작해(manipulate) Edited image를 생성</p></li>
+<li><p>단점 : 새로운 loss function이 정의되어야하고, condition마다 재학습을 요구</p></li>
+</ul>
+</li>
+</ol>
+</li>
+<li><p>그에 반해 SDEdit은</p>
+<ol class="arabic simple">
+<li><p>Pair Dataset이 필요하지 않다.</p></li>
+<li><p>추가적인 loss function과 재학습이 모두 필요하지 않다.</p></li>
+<li><p>단 한개의 pretrained weight로 모든 condition의 이미지를 생성할 수 있다.</p></li>
+</ol>
+</li>
+</ul>
+</section>
+<section id="related-works">
+<h2>2. Related Works<a class="headerlink" href="#related-works" title="Permalink to this heading">#</a></h2>
+<section id="score-based-generated-model">
+<h3>2.1. Score Based Generated Model<a class="headerlink" href="#score-based-generated-model" title="Permalink to this heading">#</a></h3>
+<figure class="align-default" id="id1">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img04.png"><img alt="SDEdit_00" class="bg-primary mb-1" src="../../_images/img04.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 229 </span><span class="caption-text">Image 1</span><a class="headerlink" href="#id1" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p>Key Idea</p>
+<ul>
+<li><p><em>“Real 이미지들은 실제 데이터 확률 분포에서 높은 값을 유지할 것이다. 따라서, 이미지를 분포가 높은곳으로 update 해나가면 좋은 퀄리티의 이미지를 생성하는 모델을 얻어낼 수 있다.”</em></p></li>
+</ul>
+</li>
+<li><p>이 때, score는 확률 밀도 함수의 순간 기울기(미분값)로 정의한다.</p></li>
+</ul>
+</section>
+<section id="score-based-generated-diffusion-model-sde-smld">
+<h3>2.2. Score Based Generated Diffusion Model (SDE, SMLD)<a class="headerlink" href="#score-based-generated-diffusion-model-sde-smld" title="Permalink to this heading">#</a></h3>
+<figure class="align-default" id="id2">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img110.png"><img alt="SDEdit_01" class="bg-primary mb-1" src="../../_images/img110.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 230 </span><span class="caption-text">Image 2</span><a class="headerlink" href="#id2" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p>위에서 제시한 Score Based Generated Model에 Diffusion 방식을 적용한 모델</p></li>
+<li><p>Forward Process 과정에서 이미지에 noise를 주입하는데, 이 때 Stochastic Differential Equation 수식을 이용해 noise를 주입한다.</p></li>
+<li><p>또다른 Diffusion 모델인 (Probability based) DDPM과의 차이는 Forward, Reverse process에서 정의하는 equation의 차이 정도이다.</p></li>
+<li><p>paper : <a class="reference external" href="https://arxiv.org/abs/1907.05600">https://arxiv.org/abs/1907.05600</a></p></li>
+</ul>
+</section>
+</section>
+<section id="methods">
+<h2>3. Methods<a class="headerlink" href="#methods" title="Permalink to this heading">#</a></h2>
+<ol class="arabic">
+<li><p>Pre-Setup</p>
+<ul>
+<li><p>Guide image의 Level을 정의한다.</p>
+<figure class="align-default" id="id3">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img25.png"><img alt="SDEdit_02" class="bg-primary mb-1" src="../../_images/img25.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 231 </span><span class="caption-text">Image 3</span><a class="headerlink" href="#id3" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ol class="arabic simple">
+<li><p>low-level guide : real 이미지위에 image patch를 추가</p></li>
+<li><p>mid-level guide : real 이미지위에 stroke를 추가</p></li>
+<li><p>high-level guide : 단순히 coarse한 stroke의 이미지</p></li>
+</ol>
+</li>
+</ul>
+</li>
+<li><p>Procedure</p>
+<ul>
+<li><p>DDPM과 달리 SDE의 경우, 완전히 noise화된 이미지 즉, random noise로부터 denoising을 진행할 필요가 없다.</p></li>
+<li><p>즉, 적절한 <span class="math notranslate nohighlight">\(t_{0} \in [0,1]\)</span>를 지정한 후 denoising process가 가능하다.</p>
+<figure class="align-default" id="id4">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img35.png"><img alt="SDEdit_03" class="bg-primary mb-1" src="../../_images/img35.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 232 </span><span class="caption-text">Image 4</span><a class="headerlink" href="#id4" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</li>
+<li><p>이 때, 적절한 <span class="math notranslate nohighlight">\(t_{0}\)</span>를 정의해야하는데,</p>
+<ol class="arabic simple">
+<li><p><span class="math notranslate nohighlight">\(t_{0}\)</span> = 1 (i.e. random noise)이면, realistic하지만, faithful 하지않은 이미지</p></li>
+<li><p><span class="math notranslate nohighlight">\(t_{0}\)</span> = 0 이면, faithful하지만, artistic한 이미지</p></li>
+</ol>
+<p>를 얻게된다.</p>
+<figure class="align-default" id="id5">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img44.png"><img alt="SDEdit_04" class="bg-primary mb-1" src="../../_images/img44.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 233 </span><span class="caption-text">Image 5</span><a class="headerlink" href="#id5" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</li>
+<li><p>아래는 SDEdit의 적용 과정이다.</p>
+<figure class="align-default" id="id6">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img54.png"><img alt="SDEdit_05" class="bg-primary mb-1" src="../../_images/img54.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 234 </span><span class="caption-text">Image 6</span><a class="headerlink" href="#id6" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</li>
+</ul>
+</li>
+</ol>
+</section>
+<section id="experiments">
+<h2>4. Experiments<a class="headerlink" href="#experiments" title="Permalink to this heading">#</a></h2>
+<ul>
+<li><p>Score</p>
+<ul>
+<li><p>Metric</p>
+<ul>
+<li><p>realistic : Kid score (lower is better)</p></li>
+<li><p>faithful : <span class="math notranslate nohighlight">\(L_{2}\)</span> score (lower is better)</p></li>
+<li><p>그 외 종합적인 평가 지표로 survey를 통한 수치를 제시하였다.</p>
+<figure class="align-default" id="id7">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img64.png"><img alt="SDEdit_06" class="bg-primary mb-1" src="../../_images/img64.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 235 </span><span class="caption-text">Image 7</span><a class="headerlink" href="#id7" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</li>
+</ul>
+</li>
+<li><p>기존의 GAN 방식들과 비교했을 때 Kid, <span class="math notranslate nohighlight">\(L_{2}\)</span> score 모두 더 좋은 수치를 보이는 것을 확인할 수 있다.</p></li>
+</ul>
+</li>
+<li><p>Comparison with GAN (styleGAN-ADA + Inversion)</p>
+<figure class="align-default" id="id8">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img74.png"><img alt="SDEdit_07" class="bg-primary mb-1" src="../../_images/img74.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 236 </span><span class="caption-text">Image 8</span><a class="headerlink" href="#id8" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p>SDEdit이 GAN Based model보다 더 자연스럽고(realistic), 유저의 guide를 잘 따르는(faithful)것을 확인할 수 있다.</p></li>
+</ul>
+</li>
+<li><p>Comparison with original blending technique</p>
+<figure class="align-default" id="id9">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img85.png"><img alt="SDEdit_08" class="bg-primary mb-1" src="../../_images/img85.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 237 </span><span class="caption-text">Image 9</span><a class="headerlink" href="#id9" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<figure class="align-default" id="id10">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img94.png"><img alt="SDEdit_09" class="bg-primary mb-1" src="../../_images/img94.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 238 </span><span class="caption-text">Image 10</span><a class="headerlink" href="#id10" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p>기존의 전통적인 방식의 몇가지 blending 기법과 비교해도 더 좋은 성능과 수치를 보이는 것을 확인할 수 있다.</p></li>
+</ul>
+</li>
+</ul>
+</section>
+</section>
+
+    <script type="text/x-thebe-config">
+    {
+        requestKernel: true,
+        binderOptions: {
+            repo: "binder-examples/jupyter-stacks-datascience",
+            ref: "master",
+        },
+        codeMirrorConfig: {
+            theme: "abcdef",
+            mode: "python"
+        },
+        kernelOptions: {
+            name: "python3",
+            path: "./docs\review"
+        },
+        predefinedOutput: true
+    }
+    </script>
+    <script>kernelName = 'python3'</script>
+
+                </article>
+              
+
+              
+              
+              
+              
+                <footer class="prev-next-footer d-print-none">
+                  
+<div class="prev-next-area">
+    <a class="left-prev"
+       href="imagen_editor.html"
+       title="previous page">
+      <i class="fa-solid fa-angle-left"></i>
+      <div class="prev-next-info">
+        <p class="prev-next-subtitle">previous</p>
+        <p class="prev-next-title">Imagen Editor</p>
+      </div>
+    </a>
+    <a class="right-next"
+       href="SDXL.html"
+       title="next page">
+      <div class="prev-next-info">
+        <p class="prev-next-subtitle">next</p>
+        <p class="prev-next-title">SDXL</p>
+      </div>
+      <i class="fa-solid fa-angle-right"></i>
+    </a>
+</div>
+                </footer>
+              
+            </div>
+            
+            
+              
+                <div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
+
+
+  <div class="sidebar-secondary-item">
+  <div class="page-toc tocsection onthispage">
+    <i class="fa-solid fa-list"></i> Contents
+  </div>
+  <nav class="bd-toc-nav page-toc">
+    <ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#abstract">Abstract</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">1. Introduction</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#related-works">2. Related Works</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#score-based-generated-model">2.1. Score Based Generated Model</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#score-based-generated-diffusion-model-sde-smld">2.2. Score Based Generated Diffusion Model (SDE, SMLD)</a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#methods">3. Methods</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#experiments">4. Experiments</a></li>
+</ul>
+  </nav></div>
+
+</div></div>
+              
+            
+          </div>
+          <footer class="bd-footer-content">
+            
+<div class="bd-footer-content__inner container">
+  
+  <div class="footer-item">
+    
+<p class="component-author">
+By PseudoLab
+</p>
+
+  </div>
+  
+  <div class="footer-item">
+    
+
+  <p class="copyright">
+    
+      © Copyright 2022.
+      <br/>
+    
+  </p>
+
+  </div>
+  
+  <div class="footer-item">
+    
+  </div>
+  
+  <div class="footer-item">
+    
+  </div>
+  
+</div>
+          </footer>
+        
+
+      </main>
+    </div>
+  </div>
+  
+  <!-- Scripts loaded after <body> so the DOM is not blocked -->
+  <script src="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b"></script>
+<script src="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b"></script>
+
+  <footer class="bd-footer">
+  </footer>
+  </body>
 </html>
\ No newline at end of file
diff --git a/docs/review/SDXL.html b/docs/review/SDXL.html
old mode 100644
new mode 100755
index 937f1b96..7188bcb9
--- a/docs/review/SDXL.html
+++ b/docs/review/SDXL.html
@@ -1,808 +1,828 @@
-
-
-<!DOCTYPE html>
-
-
-<html lang="en" data-content_root="" >
-
-  <head>
-    <meta charset="utf-8" />
-    <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.18.1: http://docutils.sourceforge.net/" />
-
-    <title>SDXL &#8212; Text-to-Image Generation-feat-Diffusion</title>
-  
-  
-  
-  <script data-cfasync="false">
-    document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
-    document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
-  </script>
-  
-  <!-- Loaded before other Sphinx assets -->
-  <link href="../../_static/styles/theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-<link href="../../_static/styles/bootstrap.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-<link href="../../_static/styles/pydata-sphinx-theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-
-  
-  <link href="../../_static/vendor/fontawesome/6.5.2/css/all.min.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-  <link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.woff2" />
-<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.woff2" />
-<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.woff2" />
-
-    <link rel="stylesheet" type="text/css" href="../../_static/pygments.css" />
-    <link rel="stylesheet" href="../../_static/styles/sphinx-book-theme.css?digest=14f4ca6b54d191a8c7657f6c759bf11a5fb86285" type="text/css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/togglebutton.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/mystnb.4510f1fc1dee50b3e5859aac5469c37c29e427902b24a333a5f9fcb2f0b3ac41.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/sphinx-thebe.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/design-style.4045f2051d55cab465a707391d5b2007.min.css" />
-  
-  <!-- Pre-loaded scripts that we'll load fully later -->
-  <link rel="preload" as="script" href="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b" />
-<link rel="preload" as="script" href="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b" />
-  <script src="../../_static/vendor/fontawesome/6.5.2/js/all.min.js?digest=dfe6caa3a7d634c4db9b"></script>
-
-    <script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
-    <script src="../../_static/jquery.js"></script>
-    <script src="../../_static/underscore.js"></script>
-    <script src="../../_static/_sphinx_javascript_frameworks_compat.js"></script>
-    <script src="../../_static/doctools.js"></script>
-    <script src="../../_static/clipboard.min.js"></script>
-    <script src="../../_static/copybutton.js"></script>
-    <script src="../../_static/scripts/sphinx-book-theme.js?digest=5a5c038af52cf7bc1a1ec88eea08e6366ee68824"></script>
-    <script>let toggleHintShow = 'Click to show';</script>
-    <script>let toggleHintHide = 'Click to hide';</script>
-    <script>let toggleOpenOnPrint = 'true';</script>
-    <script src="../../_static/togglebutton.js"></script>
-    <script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
-    <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
-    <script src="../../_static/design-tabs.js"></script>
-    <script>const THEBE_JS_URL = "https://unpkg.com/thebe@0.8.2/lib/index.js"
-const thebe_selector = ".thebe,.cell"
-const thebe_selector_input = "pre"
-const thebe_selector_output = ".output, .cell_output"
-</script>
-    <script async="async" src="../../_static/sphinx-thebe.js"></script>
-    <script>window.MathJax = {"options": {"processHtmlClass": "tex2jax_process|mathjax_process|math|output_area"}}</script>
-    <script defer="defer" src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
-    <script>DOCUMENTATION_OPTIONS.pagename = 'docs/review/SDXL';</script>
-    <link rel="shortcut icon" href="../../_static/PseudoLab_logo.png"/>
-    <link rel="index" title="Index" href="../../genindex.html" />
-    <link rel="search" title="Search" href="../../search.html" />
-    <link rel="next" title="T2I-Adapter" href="t2i_adapter.html" />
-    <link rel="prev" title="SDEdit" href="SDEdit.html" />
-  <meta name="viewport" content="width=device-width, initial-scale=1"/>
-  <meta name="docsearch:language" content="en"/>
-  </head>
-  
-  
-  <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
-
-  
-  
-  <div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
-  
-  <div id="pst-scroll-pixel-helper"></div>
-  
-  <button type="button" class="btn rounded-pill" id="pst-back-to-top">
-    <i class="fa-solid fa-arrow-up"></i>Back to top</button>
-
-  
-  <input type="checkbox"
-          class="sidebar-toggle"
-          id="pst-primary-sidebar-checkbox"/>
-  <label class="overlay overlay-primary" for="pst-primary-sidebar-checkbox"></label>
-  
-  <input type="checkbox"
-          class="sidebar-toggle"
-          id="pst-secondary-sidebar-checkbox"/>
-  <label class="overlay overlay-secondary" for="pst-secondary-sidebar-checkbox"></label>
-  
-  <div class="search-button__wrapper">
-    <div class="search-button__overlay"></div>
-    <div class="search-button__search-container">
-<form class="bd-search d-flex align-items-center"
-      action="../../search.html"
-      method="get">
-  <i class="fa-solid fa-magnifying-glass"></i>
-  <input type="search"
-         class="form-control"
-         name="q"
-         id="search-input"
-         placeholder="Search this book..."
-         aria-label="Search this book..."
-         autocomplete="off"
-         autocorrect="off"
-         autocapitalize="off"
-         spellcheck="false"/>
-  <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
-</form></div>
-  </div>
-
-  <div class="pst-async-banner-revealer d-none">
-  <aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
-</div>
-
-  
-    <header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
-    </header>
-  
-
-  <div class="bd-container">
-    <div class="bd-container__inner bd-page-width">
-      
-      
-      
-      <div class="bd-sidebar-primary bd-sidebar">
-        
-
-  
-  <div class="sidebar-header-items sidebar-primary__section">
-    
-    
-    
-    
-  </div>
-  
-    <div class="sidebar-primary-items__start sidebar-primary__section">
-        <div class="sidebar-primary-item">
-
-  
-    
-  
-
-<a class="navbar-brand logo" href="../../intro.html">
-  
-  
-  
-  
-  
-    
-    
-      
-    
-    
-    <img src="../../_static/PseudoLab_logo.png" class="logo__image only-light" alt="Text-to-Image Generation-feat-Diffusion - Home"/>
-    <script>document.write(`<img src="../../_static/PseudoLab_logo.png" class="logo__image only-dark" alt="Text-to-Image Generation-feat-Diffusion - Home"/>`);</script>
-  
-  
-</a></div>
-        <div class="sidebar-primary-item"><nav class="bd-links" id="bd-docs-nav" aria-label="Main">
-    <div class="bd-toc-item navbar-nav active">
-        
-        <ul class="nav bd-sidenav bd-sidenav__home-link">
-            <li class="toctree-l1">
-                <a class="reference internal" href="../../intro.html">
-                    Welcome to PseudoDiffusers!!
-                </a>
-            </li>
-        </ul>
-        <p aria-level="2" class="caption" role="heading"><span class="caption-text">Preliminary Works</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="vae.html">VAE</a></li>
-<li class="toctree-l1"><a class="reference internal" href="gan.html">GAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DDPM.html">DDPM</a></li>
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="DDIM.html">DDIM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="A_Study_on_the_Evaluation_of_Generative_Models.html">A Study on the Evaluation of Generative Models</a></li>
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Image Generation</span></p>
-<ul class="current nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="cycleGAN.html">CycleGAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="StyleGAN.html">StyleGAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="diffusion_beats_GANs.html">Diffusion Models Beat GANs on Image Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="dalle.html">DALL-E</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DALLE2.html">DALL-E 2</a></li>
-<li class="toctree-l1"><a class="reference internal" href="dreambooth.html">DreamBooth</a></li>
-<li class="toctree-l1"><a class="reference internal" href="ControlNet.html">ControlNet</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Latent_Diffusion_Model.html">Introduction</a></li>
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="Textual_Inversion.html">Textual Inversion</a></li>
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="CustomDiffusion.html">Custom Diffusion</a></li>
-<li class="toctree-l1"><a class="reference internal" href="LoRA.html">LoRA</a></li>
-
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="I-DDPM.html">I-DDPM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="StyO.html">StyO</a></li>
-<li class="toctree-l1"><a class="reference internal" href="imagen.html">Imagen</a></li>
-<li class="toctree-l1"><a class="reference internal" href="imagen_editor.html">Imagen Editor</a></li>
-<li class="toctree-l1"><a class="reference internal" href="SDEdit.html">SDEdit</a></li>
-<li class="toctree-l1 current active"><a class="current reference internal" href="#">SDXL</a></li>
-<li class="toctree-l1"><a class="reference internal" href="t2i_adapter.html">T2I-Adapter</a></li>
-<li class="toctree-l1"><a class="reference internal" href="IP_Adapter.html">IP-Adapter</a></li>
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="HyperDreamBooth.html">HyperDreamBooth</a></li>
-<li class="toctree-l1"><a class="reference internal" href="CM3leon.html">CM3leon</a></li>
-
-<li class="toctree-l1"><a class="reference internal" href="Synthetic_Data_from_Diffusion_Models_Improves_ImageNet_Classification.html">Synthetic Data from Diffusion Models Improves ImageNet Classification</a></li>
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="GLIDE.html">GLIDE</a></li>
-<li class="toctree-l1"><a class="reference internal" href="BBDM.html">BBDM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Your_Diffusion_Model_is_Secretly_a_Zero_Shot_Classifier.html">Your Diffusion Model is Secretly a Zero-Shot Classifier</a></li>
-<li class="toctree-l1"><a class="reference internal" href="progressive_distillation.html">Progressive Distillation for Fast Sampling of Diffusion Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="ConceptLab.html">ConceptLab</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Diffusion_models_already_have_a_Semantic_Latent_Space.html">Diffusion Models already have a Semantic Latent Space</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Muse.html">Muse</a></li>
-
-
-<li class="toctree-l1"><a class="reference internal" href="GIGAGAN.html">Scaling up GANs for Text-to-Image Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="consistency_models.html">Consistency Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="latent_consistency_models.html">Latent Consistency Models</a></li>
-
-<li class="toctree-l1"><a class="reference internal" href="LLM_grounded_Diffusion.html">LLM Grounded Diffusion</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DiT.html">DiT</a></li>
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="one-step-image-translation.html">One-Step Image Translation with Text-to-Image Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="LCM-LoRA.html">LCM-LoRA: A Universal Stable-Diffusion Acceleration Module</a></li>
-
-
-<li class="toctree-l1"><a class="reference internal" href="MimicBrush.html">MimicBrush: Zero-shot Image Editing with Reference Imitation</a></li>
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Video Generation</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="Make_A_Video.html">Make A Video</a></li>
-<li class="toctree-l1"><a class="reference internal" href="VideoLDM.html">VideoLDM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="AnimateDiff.html">AnimateDiff</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Animate_Anyone.html">Animate Anyone</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreaMoving.html">DreaMoving</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreamPose.html">DreamPose: Fashion Image-to-Video Synthesis via Stable Diffusion</a></li>
-
-
-
-
-
-
-
-
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">3D Generation</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="NeRF.html">NeRF : Representing Scenes as Neural Radiance Fields for View Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="3DGS.html">3D Gaussian Splatting for Real-Time Radiance Field Rendering</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Point_E.html">Point-E: A System for Generating 3D Point Clouds from Complex Prompts (Arxiv 2022)</a></li>
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="Shap-E.html">Shap-E</a></li>
-
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="DreamFusion.html"><strong>DreamFusion</strong></a></li>
-<li class="toctree-l1"><a class="reference internal" href="magic-3d.html">Magic3D</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreamBooth3D.html">Dream Booth 3D</a></li>
-
-
-
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Experiments</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="../experiments/js_exp.html">Synthetic Data with Stable Diffusion for Foliar Disease Classification</a></li>
-<li class="toctree-l1"><a class="reference internal" href="../experiments/swjo_exp.html">Training DreamBooth on Naver Webtoon Face Dataset</a></li>
-</ul>
-
-    </div>
-</nav></div>
-    </div>
-  
-  
-  <div class="sidebar-primary-items__end sidebar-primary__section">
-  </div>
-  
-  <div id="rtd-footer-container"></div>
-
-
-      </div>
-      
-      <main id="main-content" class="bd-main" role="main">
-        
-        
-
-<div class="sbt-scroll-pixel-helper"></div>
-
-          <div class="bd-content">
-            <div class="bd-article-container">
-              
-              <div class="bd-header-article d-print-none">
-<div class="header-article-items header-article__inner">
-  
-    <div class="header-article-items__start">
-      
-        <div class="header-article-item"><label class="sidebar-toggle primary-toggle btn btn-sm" for="__primary" title="Toggle primary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
-  <span class="fa-solid fa-bars"></span>
-</label></div>
-      
-    </div>
-  
-  
-    <div class="header-article-items__end">
-      
-        <div class="header-article-item">
-
-<div class="article-header-buttons">
-
-
-
-
-
-<div class="dropdown dropdown-source-buttons">
-  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Source repositories">
-    <i class="fab fa-github"></i>
-  </button>
-  <ul class="dropdown-menu">
-      
-      
-      
-      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion" target="_blank"
-   class="btn btn-sm btn-source-repository-button dropdown-item"
-   title="Source repository"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fab fa-github"></i>
-  </span>
-<span class="btn__text-container">Repository</span>
-</a>
-</li>
-      
-      
-      
-      
-      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion/issues/new?title=Issue%20on%20page%20%2Fdocs/review/SDXL.html&body=Your%20issue%20content%20here." target="_blank"
-   class="btn btn-sm btn-source-issues-button dropdown-item"
-   title="Open an issue"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-lightbulb"></i>
-  </span>
-<span class="btn__text-container">Open issue</span>
-</a>
-</li>
-      
-  </ul>
-</div>
-
-
-
-
-
-
-<div class="dropdown dropdown-download-buttons">
-  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Download this page">
-    <i class="fas fa-download"></i>
-  </button>
-  <ul class="dropdown-menu">
-      
-      
-      
-      <li><a href="../../_sources/docs/review/SDXL.md" target="_blank"
-   class="btn btn-sm btn-download-source-button dropdown-item"
-   title="Download source file"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-file"></i>
-  </span>
-<span class="btn__text-container">.md</span>
-</a>
-</li>
-      
-      
-      
-      
-      <li>
-<button onclick="window.print()"
-  class="btn btn-sm btn-download-pdf-button dropdown-item"
-  title="Print to PDF"
-  data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-file-pdf"></i>
-  </span>
-<span class="btn__text-container">.pdf</span>
-</button>
-</li>
-      
-  </ul>
-</div>
-
-
-
-
-<button onclick="toggleFullScreen()"
-  class="btn btn-sm btn-fullscreen-button"
-  title="Fullscreen mode"
-  data-bs-placement="bottom" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-expand"></i>
-  </span>
-
-</button>
-
-
-
-<script>
-document.write(`
-  <button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light"></i>
-    <i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark"></i>
-    <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"></i>
-  </button>
-`);
-</script>
-
-
-<script>
-document.write(`
-  <button class="btn btn-sm pst-navbar-icon search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <i class="fa-solid fa-magnifying-glass fa-lg"></i>
-  </button>
-`);
-</script>
-<label class="sidebar-toggle secondary-toggle btn btn-sm" for="__secondary"title="Toggle secondary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <span class="fa-solid fa-list"></span>
-</label>
-</div></div>
-      
-    </div>
-  
-</div>
-</div>
-              
-              
-
-<div id="jb-print-docs-body" class="onlyprint">
-    <h1>SDXL</h1>
-    <!-- Table of contents -->
-    <div id="print-main-content">
-        <div id="jb-print-toc">
-            
-            <div>
-                <h2> Contents </h2>
-            </div>
-            <nav aria-label="Page">
-                <ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#abstract">Abstract</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">Introduction</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#micro-conditioning">2.2 Micro-Conditioning</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#conditioning-the-model-on-cropping-parameters">Conditioning the Model on Cropping Parameters</a></li>
-</ul>
-</li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#multi-aspect-training">2.3 Multi-Aspect Training</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#improved-autoencoder">2.4 Improved Autoencoder</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#putting-everything-together">2.5 Putting Everything Together</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#refinement-stage">Refinement Stage</a></li>
-</ul>
-</li>
-</ul>
-            </nav>
-        </div>
-    </div>
-</div>
-
-              
-                
-<div id="searchbox"></div>
-                <article class="bd-article">
-                  
-  <div class="admonition-information admonition">
-<p class="admonition-title">Information</p>
-<ul class="simple">
-<li><p><strong>Title:</strong> SDXL: Improving Latent Diffusion Models for High-Resolution Image Synthesis</p></li>
-<li><p><strong>Reference</strong></p>
-<ul>
-<li><p>Paper: <a class="reference external" href="https://arxiv.org/abs/2307.01952">https://arxiv.org/abs/2307.01952</a></p></li>
-<li><p>Code: <a class="github reference external" href="https://github.com/Stability-AI/generative-models">Stability-AI/generative-models</a></p></li>
-</ul>
-</li>
-<li><p><strong>Author:</strong> Jun-Hyoung Lee</p></li>
-<li><p><strong>Last updated on May. 31. 2023</strong></p></li>
-</ul>
-</div>
-<section class="tex2jax_ignore mathjax_ignore" id="sdxl">
-<h1>SDXL<a class="headerlink" href="#sdxl" title="Permalink to this heading">#</a></h1>
-<section id="abstract">
-<h2>Abstract<a class="headerlink" href="#abstract" title="Permalink to this heading">#</a></h2>
-<p>SDXL은 T2I latent diffusion 모델이다. Stable Diffusion과 비교하면, SDXL은 세 배 더 큰 규모의 UNet을 포함한다. 더 많은 attention 블록과 더 큰 cross attention context 가 SDXL에서 두 번째 text encoder로 사용되면서 모델 파라미터가 증가했다. 다수의 새로운 conditioning 방법과 다양한 비율에 맞도록 SDXL을 학습할 수 있도록 설계했다. 또한 후처리 방식의 image to image 기술을 사용해 SDXL의 생성 샘플의 시각적인 fidelity를 향상시킨 refinement model을 소개한다. SDXL은 대폭 향상된 성능을 보여준다.</p>
-<figure class="align-default" id="sdxl-result">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/sdxl_result.png"><img alt="sdxl_result" class="bg-primary mb-1" src="../../_images/sdxl_result.png" style="width: 600px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 239 </span><span class="caption-text">SDXL result</span><a class="headerlink" href="#sdxl-result" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</section>
-<section id="introduction">
-<h2>Introduction<a class="headerlink" href="#introduction" title="Permalink to this heading">#</a></h2>
-<p>세 가지 주요 기능이라 볼 수 있는데,</p>
-<ol class="arabic simple">
-<li><p>3배 더 큰 UNet backbone,</p></li>
-<li><p>어떤 형태의 추가 감독(supervision)없는 간단하면서도 효과적인 추가의 conditioning 기술</p></li>
-<li><p>noising-denoising 과정을 적용해 시각적 품질을 향상하는 latent를 생성할 수 있는 별개의 diffusion 기반 img-to-img refinement 모델을 포함한다.</p></li>
-</ol>
-<figure class="align-default" id="figure-1">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/fig_11.png"><img alt="fig_1" class="bg-primary mb-1" src="../../_images/fig_11.png" style="width: 600px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 240 </span><span class="caption-text">Figure 1</span><a class="headerlink" href="#figure-1" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>그림 1에서 왼쪽 그림을 보면 추가의 refinement 단계를 추가해 성능을 높인 SDXL이 기존 SD보다 성능이 우수한 것을 확인할 수 있다. 오른쪽 그림은 아키텍처를 시각화했는데, 128x128 크기의 latent를 생성한다. 그 후 고해상도 refinement 모델을 활용하고 동일한 프롬프트를 활용해 첫 번째 단계에서 생성된 latent를 SDEdit을 적용한다. SDXL과 refinement 모델은 동일한 autoencoder를 사용한다.</p>
-<figure class="align-default" id="table-1">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/table_1.png"><img alt="table_1" class="bg-primary mb-1" src="../../_images/table_1.png" style="width: 600px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 241 </span><span class="caption-text">Table 1</span><a class="headerlink" href="#table-1" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>SD와 다르게 UNet 내의 transformer 블록의 heterogeneous 분포를 사용했다는 점이다. 테이블 1을 참고하면 highest feature level에서 transformer 블럭을 사용했고, lower level에서는 2, 10 개의 블럭을 사용했고, UNet에서 lowest level(8x downsampling)을 제거했다. text conditioning을 위한 pretrained 된 text encoder를 사용했다. 특히, CLIP Vit-L과 함께 OpenCLIP ViT-bigG를 사용했고, 채널 축에 두 번째 text encoder의 output을 concat 했다. 게다가 text input으로 모델에 condition을 주기 위해 cross attention 레이어를 사용했으며, 또 OpenCLIP로부터 pooled text embedding을 모델에 condition으로 추가했다. 이러한 변화는 UNet의 파라미터 사이즈가 2.6B로 증가했다. text encoder는 817M 파라미터를 가지고 있다.</p>
-</section>
-<section id="micro-conditioning">
-<h2>2.2 Micro-Conditioning<a class="headerlink" href="#micro-conditioning" title="Permalink to this heading">#</a></h2>
-<figure class="align-default" id="figure-2">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/fig_2.png"><img alt="fig_2" class="bg-primary mb-1" src="../../_images/fig_2.png" style="width: 600px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 242 </span><span class="caption-text">Figure 2</span><a class="headerlink" href="#figure-2" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>SD 1.4/1.5 같은 경우 512 픽셀 이하 크기의 이미지는 제외하고 학습을 시키거나, 너무 작은 이미지는 upscale하여 학습을 시켰다. 이는 학습할 때의 최소 크기가 정해지는 문제점이 발생한다. 따라서 성능을 저하시키거나, 일반화를 잘 못할 수 있다.</p>
-<p>그림 2를 보면 SDXL의 데이터 셋의 분포를 시각화해주는 그림이다. 제안된 size-conditiong 없이, 256x256 픽셀 크기 미만의 데이터가 39%나 달한다. upscale 하게 된다면 최종 결과물이 blur 한 결과를 가져와 좋지 않은 아티팩트가 생긴다.</p>
-<p>대신, 저자들은 원래의 이미지 해상도에서 UNet 모델에 condition을 주었다. 특히 어떠한 rescaling 전의 원래의 크기인  <span class="math notranslate nohighlight">\(c_\text{size}=(h_\text{original}, w_\text{original})\)</span>를 제공해 추가의 condition을 줄 수 있게 했다. UNet의 denoising 할 때의 condition으로 추가된다.</p>
-<p>Inference 때, 사용자가 size-conditioning을 통해 해상도를 정할 수 있다. 모델은 conditioning 크기를 해상도에 의존적인 이미지 feature과 연관시키도록 하는 방법을 학습했다.</p>
-<figure class="align-default" id="figure-3">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/fig_31.png"><img alt="fig_3" class="bg-primary mb-1" src="../../_images/fig_31.png" style="width: 600px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 243 </span><span class="caption-text">Figure 3</span><a class="headerlink" href="#figure-3" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>또 ImageNet으로 평가를 진행해 size-conditiong에 대한 우수성을 입증했다.</p>
-<figure class="align-default" id="table-2">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/table_2.png"><img alt="table_2" class="bg-primary mb-1" src="../../_images/table_2.png" style="width: 600px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 244 </span><span class="caption-text">Table 2</span><a class="headerlink" href="#table-2" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p><em>CIN-512-only</em> 는 512 미만의 이미지를 제외하고 학습을 시켰고(70k 장), <em>CIN-nocond</em> 는 모든 ImageNet 이미지를 사용했으며, <em>CIN-size-cond</em> 는 추가 size-condition을 사용했다. 표 2에서 보다시피 <em>CIN-size-cond</em> 모델이 FID, IS 모두 높은 성능을 보였다.</p>
-<section id="conditioning-the-model-on-cropping-parameters">
-<h3>Conditioning the Model on Cropping Parameters<a class="headerlink" href="#conditioning-the-model-on-cropping-parameters" title="Permalink to this heading">#</a></h3>
-<figure class="align-default" id="figure-4">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/fig_41.png"><img alt="fig_4" class="bg-primary mb-1" src="../../_images/fig_41.png" style="width: 600px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 245 </span><span class="caption-text">Figure 4</span><a class="headerlink" href="#figure-4" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>그림 4에서 SD 같은 경우 고양이 머리가 잘려진 결과를 얻었다. 이러한 이유는 학습할 때, random cropping으로 인해 생성되었기 때문이다.</p>
-<p>이러한 문제를 해결하기 위해, 간단한 효과적인 방법을 제안한다. 데이터를 loading 할 때, 균등하게  <span class="math notranslate nohighlight">\(c_\text{top}\)</span>과  <span class="math notranslate nohighlight">\(c_\text{left}\)</span> (높이 및 너비 축을 따라 왼쪽 상단 모서리에서 잘린 픽셀의 양을 지정하는 정수)를 샘플링한다. 그 후 Fourier feature 임베딩을 통해 conditioning 파라미터로써 모델에 입력한다. 위에서 언급한 size conditioning과 비슷하다. concat 된 임베딩  <span class="math notranslate nohighlight">\(c_\text{crop}\)</span>은 추가의 conditioning 파라미터로 사용된다.</p>
-<p>저자들은 LDM 뿐만 아니라 어떠한 DM에서도 사용될 수 있다고 강조한다. crop 및 size-conditioning은 쉽게 결합될 수 있다. 이러한 경우, crop 및 size-conditioning을 feature 임베딩을 채널 축에 concat 하고 UNet의 타임스텝 임베딩에 추가한다.</p>
-</section>
-</section>
-<section id="multi-aspect-training">
-<h2>2.3 Multi-Aspect Training<a class="headerlink" href="#multi-aspect-training" title="Permalink to this heading">#</a></h2>
-<p>일반적인 T2I 모델에서 결과물의 크기는 512x512, 1024x1024 로 얻을 수 있는데, 이는 현실 세계에서 부자연스럽다. 이유는 현실 세계에서는 다양한 크기, 비율을 가진 이미지가 많고, 풍경 같은 경우 16:9 비율의 크기를 지니고 있다.</p>
-<p>따라서, 다양한 비율을 동시에 다룰수 있도록 모델을 파인튜닝했다. 픽셀수를 1024x1024 만큼 수를 최대한 유지하면서 다양한 비율의 데이터를 사용했고, 64의 배수를 지니도록 했다.</p>
-<figure class="align-default" id="multi-aspect-ratio">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/multi_aspect_ratio.png"><img alt="multi_aspect_ratio" class="bg-primary mb-1" src="../../_images/multi_aspect_ratio.png" style="width: 600px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 246 </span><span class="caption-text">Multi aspect ratio</span><a class="headerlink" href="#multi-aspect-ratio" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>최적화 동안, 학습 배치는 동일한 버킷(같은 비율의 이미지들?)의 이미지로 구성되며, 각 훈련 스텝마다 버킷 크기를 번갈아 가며 사용했다. 추가적으로, 모델은 버킷 크기(혹은 타겟 크기)를 conditioning으로 주었으며, 위에서 언급한 size, crop conditioning과 유사하게 Fourier 공간에 임베딩되는  <span class="math notranslate nohighlight">\(c_\text{ar}=(h_\text{tgt}, w_\text{tgt})\)</span>  형태로 표현된다.</p>
-<p>실제로, 모델이 고정된 비율및 해상도의 데이터로 pretraining이 마친 후 파인튜닝 단계에서는 다양한 비율의 데이터로 학습했고, 채널 축으로 concat 하는 2.2절에서 소개한 conditioning 기술과 함께 결합했다. 이를 아래의 그림 16에서 코드로 확인할 수 있다.</p>
-</section>
-<section id="improved-autoencoder">
-<h2>2.4 Improved Autoencoder<a class="headerlink" href="#improved-autoencoder" title="Permalink to this heading">#</a></h2>
-<p>SD는 LDM 중 하나이고, autoencoder의 latent space를 학습한다. semantic composition은 LDM으로부터 표현되지만 저자들은 local, high frequency 디테일한 부분을 향상하고자 autoencoder를 향상했다. 끝으로, 원래의 SD를 사용한 autoencoder 아키텍처에서 더 큰 배치사이즈(256 vs 9)로 학습했고 추가로 exponential moving average를 사용한 가중치를 사용했다. 결과 autoencoder의 성능이 reconstruction 메트릭에 좋은 결과를 가져왔다.</p>
-<figure class="align-default" id="table-3">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/table_31.png"><img alt="table_3" class="bg-primary mb-1" src="../../_images/table_31.png" style="width: 600px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 247 </span><span class="caption-text">Table 3</span><a class="headerlink" href="#table-3" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</section>
-<section id="putting-everything-together">
-<h2>2.5 Putting Everything Together<a class="headerlink" href="#putting-everything-together" title="Permalink to this heading">#</a></h2>
-<p>학습 파라미터를 정리해주는 절입니다. diffusion time step은 1000 step을 사용했다. 우선, base model를 내부 데이터 셋으로 그림 2에 나와있는 높이-너비 분포에 맞게 학습을 시켰다. 600,000 step을 사용했으며, 256x256 사이즈로, 배치는 2048로, size &amp; crop conditioning을 사용했다. 그 후 512x512 이미지를 추가로 200,000 최적화 step으로 학습시켰고, 마침내 offset 노이즈 [11, 25] 0.05 수준과 함께 다중 비율 학습을 활용하여 ~ 1024x1024 영역의 다양한 비율로 모델을 학습했다.</p>
-<section id="refinement-stage">
-<h3>Refinement Stage<a class="headerlink" href="#refinement-stage" title="Permalink to this heading">#</a></h3>
-<figure class="align-default" id="figure-6">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/fig_61.png"><img alt="fig_6" class="bg-primary mb-1" src="../../_images/fig_61.png" style="width: 600px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 248 </span><span class="caption-text">Figure 6</span><a class="headerlink" href="#figure-6" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>경험적으로, 그림 6처럼 특정 부분 퀄리티가 낮은 샘플의 결과를 찾았다. 왼쪽 그림이 refinement stage 적용 전, 오른쪽 그림이 refinement stage를 적용한 그림이다.</p>
-<p>이를 해결하기 위해, 고품질, 고해상도 데이터에 특화된 latent space 내에서 별도의 LDM을 학습했다. 기본 모델의 샘플에 대해 SDEdit에서 도입한 노이즈 제거 과정을 사용했다. eDiff-I 방법을 따랐으며, 이를 첫 200 노이즈 스케일에 refinement 모델을 사용했다. inference에서, base SDXL에서 latent를 추출하고 바로 diffuse와 denoise를 refinement 모델에 넣었다. 이 스텝은 선택이지만 배경 및 사람 얼굴과 같은 디테일에서 향상된 결과(그림 6, 13)를 얻을 수 있었다.</p>
-<figure class="align-default" id="figure-13">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/fig_131.png"><img alt="fig_13" class="bg-primary mb-1" src="../../_images/fig_131.png" style="width: 600px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 249 </span><span class="caption-text">Figure 13</span><a class="headerlink" href="#figure-13" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</section>
-</section>
-</section>
-
-    <script type="text/x-thebe-config">
-    {
-        requestKernel: true,
-        binderOptions: {
-            repo: "binder-examples/jupyter-stacks-datascience",
-            ref: "master",
-        },
-        codeMirrorConfig: {
-            theme: "abcdef",
-            mode: "python"
-        },
-        kernelOptions: {
-            name: "python3",
-            path: "./docs/review"
-        },
-        predefinedOutput: true
-    }
-    </script>
-    <script>kernelName = 'python3'</script>
-
-                </article>
-              
-
-              
-              
-              
-              
-                <footer class="prev-next-footer d-print-none">
-                  
-<div class="prev-next-area">
-    <a class="left-prev"
-       href="SDEdit.html"
-       title="previous page">
-      <i class="fa-solid fa-angle-left"></i>
-      <div class="prev-next-info">
-        <p class="prev-next-subtitle">previous</p>
-        <p class="prev-next-title">SDEdit</p>
-      </div>
-    </a>
-    <a class="right-next"
-       href="t2i_adapter.html"
-       title="next page">
-      <div class="prev-next-info">
-        <p class="prev-next-subtitle">next</p>
-        <p class="prev-next-title">T2I-Adapter</p>
-      </div>
-      <i class="fa-solid fa-angle-right"></i>
-    </a>
-</div>
-                </footer>
-              
-            </div>
-            
-            
-              
-                <div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
-
-
-  <div class="sidebar-secondary-item">
-  <div class="page-toc tocsection onthispage">
-    <i class="fa-solid fa-list"></i> Contents
-  </div>
-  <nav class="bd-toc-nav page-toc">
-    <ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#abstract">Abstract</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">Introduction</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#micro-conditioning">2.2 Micro-Conditioning</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#conditioning-the-model-on-cropping-parameters">Conditioning the Model on Cropping Parameters</a></li>
-</ul>
-</li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#multi-aspect-training">2.3 Multi-Aspect Training</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#improved-autoencoder">2.4 Improved Autoencoder</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#putting-everything-together">2.5 Putting Everything Together</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#refinement-stage">Refinement Stage</a></li>
-</ul>
-</li>
-</ul>
-  </nav></div>
-
-</div></div>
-              
-            
-          </div>
-          <footer class="bd-footer-content">
-            
-<div class="bd-footer-content__inner container">
-  
-  <div class="footer-item">
-    
-<p class="component-author">
-By PseudoLab
-</p>
-
-  </div>
-  
-  <div class="footer-item">
-    
-
-  <p class="copyright">
-    
-      © Copyright 2022.
-      <br/>
-    
-  </p>
-
-  </div>
-  
-  <div class="footer-item">
-    
-  </div>
-  
-  <div class="footer-item">
-    
-  </div>
-  
-</div>
-          </footer>
-        
-
-      </main>
-    </div>
-  </div>
-  
-  <!-- Scripts loaded after <body> so the DOM is not blocked -->
-  <script src="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b"></script>
-<script src="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b"></script>
-
-  <footer class="bd-footer">
-  </footer>
-  </body>
+
+
+<!DOCTYPE html>
+
+
+<html lang="en" data-content_root="" >
+
+  <head>
+    <meta charset="utf-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.17.1: http://docutils.sourceforge.net/" />
+
+    <title>SDXL &#8212; Text-to-Image Generation-feat-Diffusion</title>
+  
+  
+  
+  <script data-cfasync="false">
+    document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
+    document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
+  </script>
+  
+  <!-- Loaded before other Sphinx assets -->
+  <link href="../../_static/styles/theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+<link href="../../_static/styles/bootstrap.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+<link href="../../_static/styles/pydata-sphinx-theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+
+  
+  <link href="../../_static/vendor/fontawesome/6.5.2/css/all.min.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+  <link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.woff2" />
+<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.woff2" />
+<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.woff2" />
+
+    <link rel="stylesheet" type="text/css" href="../../_static/pygments.css" />
+    <link rel="stylesheet" href="../../_static/styles/sphinx-book-theme.css?digest=14f4ca6b54d191a8c7657f6c759bf11a5fb86285" type="text/css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/togglebutton.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/mystnb.4510f1fc1dee50b3e5859aac5469c37c29e427902b24a333a5f9fcb2f0b3ac41.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/sphinx-thebe.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/design-style.4045f2051d55cab465a707391d5b2007.min.css" />
+  
+  <!-- Pre-loaded scripts that we'll load fully later -->
+  <link rel="preload" as="script" href="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b" />
+<link rel="preload" as="script" href="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b" />
+  <script src="../../_static/vendor/fontawesome/6.5.2/js/all.min.js?digest=dfe6caa3a7d634c4db9b"></script>
+
+    <script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
+    <script src="../../_static/jquery.js"></script>
+    <script src="../../_static/underscore.js"></script>
+    <script src="../../_static/_sphinx_javascript_frameworks_compat.js"></script>
+    <script src="../../_static/doctools.js"></script>
+    <script src="../../_static/clipboard.min.js"></script>
+    <script src="../../_static/copybutton.js"></script>
+    <script src="../../_static/scripts/sphinx-book-theme.js?digest=5a5c038af52cf7bc1a1ec88eea08e6366ee68824"></script>
+    <script>let toggleHintShow = 'Click to show';</script>
+    <script>let toggleHintHide = 'Click to hide';</script>
+    <script>let toggleOpenOnPrint = 'true';</script>
+    <script src="../../_static/togglebutton.js"></script>
+    <script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
+    <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
+    <script src="../../_static/design-tabs.js"></script>
+    <script>const THEBE_JS_URL = "https://unpkg.com/thebe@0.8.2/lib/index.js"
+const thebe_selector = ".thebe,.cell"
+const thebe_selector_input = "pre"
+const thebe_selector_output = ".output, .cell_output"
+</script>
+    <script async="async" src="../../_static/sphinx-thebe.js"></script>
+    <script>window.MathJax = {"options": {"processHtmlClass": "tex2jax_process|mathjax_process|math|output_area"}}</script>
+    <script defer="defer" src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
+    <script>DOCUMENTATION_OPTIONS.pagename = 'docs/review/SDXL';</script>
+    <link rel="shortcut icon" href="../../_static/PseudoLab_logo.png"/>
+    <link rel="index" title="Index" href="../../genindex.html" />
+    <link rel="search" title="Search" href="../../search.html" />
+    <link rel="next" title="T2I-Adapter" href="t2i_adapter.html" />
+    <link rel="prev" title="SDEdit" href="SDEdit.html" />
+  <meta name="viewport" content="width=device-width, initial-scale=1"/>
+  <meta name="docsearch:language" content="en"/>
+  </head>
+  
+  
+  <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
+
+  
+  
+  <div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
+  
+  <div id="pst-scroll-pixel-helper"></div>
+  
+  <button type="button" class="btn rounded-pill" id="pst-back-to-top">
+    <i class="fa-solid fa-arrow-up"></i>Back to top</button>
+
+  
+  <input type="checkbox"
+          class="sidebar-toggle"
+          id="pst-primary-sidebar-checkbox"/>
+  <label class="overlay overlay-primary" for="pst-primary-sidebar-checkbox"></label>
+  
+  <input type="checkbox"
+          class="sidebar-toggle"
+          id="pst-secondary-sidebar-checkbox"/>
+  <label class="overlay overlay-secondary" for="pst-secondary-sidebar-checkbox"></label>
+  
+  <div class="search-button__wrapper">
+    <div class="search-button__overlay"></div>
+    <div class="search-button__search-container">
+<form class="bd-search d-flex align-items-center"
+      action="../../search.html"
+      method="get">
+  <i class="fa-solid fa-magnifying-glass"></i>
+  <input type="search"
+         class="form-control"
+         name="q"
+         id="search-input"
+         placeholder="Search this book..."
+         aria-label="Search this book..."
+         autocomplete="off"
+         autocorrect="off"
+         autocapitalize="off"
+         spellcheck="false"/>
+  <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
+</form></div>
+  </div>
+
+  <div class="pst-async-banner-revealer d-none">
+  <aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
+</div>
+
+  
+    <header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
+    </header>
+  
+
+  <div class="bd-container">
+    <div class="bd-container__inner bd-page-width">
+      
+      
+      
+      <div class="bd-sidebar-primary bd-sidebar">
+        
+
+  
+  <div class="sidebar-header-items sidebar-primary__section">
+    
+    
+    
+    
+  </div>
+  
+    <div class="sidebar-primary-items__start sidebar-primary__section">
+        <div class="sidebar-primary-item">
+
+  
+    
+  
+
+<a class="navbar-brand logo" href="../../intro.html">
+  
+  
+  
+  
+  
+    
+    
+      
+    
+    
+    <img src="../../_static/PseudoLab_logo.png" class="logo__image only-light" alt="Text-to-Image Generation-feat-Diffusion - Home"/>
+    <script>document.write(`<img src="../../_static/PseudoLab_logo.png" class="logo__image only-dark" alt="Text-to-Image Generation-feat-Diffusion - Home"/>`);</script>
+  
+  
+</a></div>
+        <div class="sidebar-primary-item"><nav class="bd-links" id="bd-docs-nav" aria-label="Main">
+    <div class="bd-toc-item navbar-nav active">
+        
+        <ul class="nav bd-sidenav bd-sidenav__home-link">
+            <li class="toctree-l1">
+                <a class="reference internal" href="../../intro.html">
+                    Welcome to PseudoDiffusers!!
+                </a>
+            </li>
+        </ul>
+        <p aria-level="2" class="caption" role="heading"><span class="caption-text">Preliminary Works</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="vae.html">VAE</a></li>
+<li class="toctree-l1"><a class="reference internal" href="gan.html">GAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DDPM.html">DDPM</a></li>
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DDIM.html">DDIM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="A_Study_on_the_Evaluation_of_Generative_Models.html">A Study on the Evaluation of Generative Models</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Image Generation</span></p>
+<ul class="current nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="cycleGAN.html">CycleGAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="StyleGAN.html">StyleGAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="diffusion_beats_GANs.html">Diffusion Models Beat GANs on Image Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="dalle.html">DALL-E</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DALLE2.html">DALL-E 2</a></li>
+<li class="toctree-l1"><a class="reference internal" href="dreambooth.html">DreamBooth</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ControlNet.html">ControlNet</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Latent_Diffusion_Model.html">Introduction</a></li>
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Textual_Inversion.html">Textual Inversion</a></li>
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="CustomDiffusion.html">Custom Diffusion</a></li>
+<li class="toctree-l1"><a class="reference internal" href="LoRA.html">LoRA</a></li>
+
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="I-DDPM.html">I-DDPM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="StyO.html">StyO</a></li>
+<li class="toctree-l1"><a class="reference internal" href="imagen.html">Imagen</a></li>
+<li class="toctree-l1"><a class="reference internal" href="imagen_editor.html">Imagen Editor</a></li>
+<li class="toctree-l1"><a class="reference internal" href="SDEdit.html">SDEdit</a></li>
+<li class="toctree-l1 current active"><a class="current reference internal" href="#">SDXL</a></li>
+<li class="toctree-l1"><a class="reference internal" href="t2i_adapter.html">T2I-Adapter</a></li>
+<li class="toctree-l1"><a class="reference internal" href="IP_Adapter.html">IP-Adapter</a></li>
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="HyperDreamBooth.html">HyperDreamBooth</a></li>
+<li class="toctree-l1"><a class="reference internal" href="CM3leon.html">CM3leon</a></li>
+
+<li class="toctree-l1"><a class="reference internal" href="Synthetic_Data_from_Diffusion_Models_Improves_ImageNet_Classification.html">Synthetic Data from Diffusion Models Improves ImageNet Classification</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="GLIDE.html">GLIDE</a></li>
+<li class="toctree-l1"><a class="reference internal" href="BBDM.html">BBDM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Your_Diffusion_Model_is_Secretly_a_Zero_Shot_Classifier.html">Your Diffusion Model is Secretly a Zero-Shot Classifier</a></li>
+<li class="toctree-l1"><a class="reference internal" href="progressive_distillation.html">Progressive Distillation for Fast Sampling of Diffusion Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ConceptLab.html">ConceptLab</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Diffusion_models_already_have_a_Semantic_Latent_Space.html">Diffusion Models already have a Semantic Latent Space</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Muse.html">Muse</a></li>
+
+
+<li class="toctree-l1"><a class="reference internal" href="GIGAGAN.html">Scaling up GANs for Text-to-Image Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="consistency_models.html">Consistency Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="latent_consistency_models.html">Latent Consistency Models</a></li>
+
+<li class="toctree-l1"><a class="reference internal" href="LLM_grounded_Diffusion.html">LLM Grounded Diffusion</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DiT.html">DiT</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="one-step-image-translation.html">One-Step Image Translation with Text-to-Image Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="LCM-LoRA.html">LCM-LoRA: A Universal Stable-Diffusion Acceleration Module</a></li>
+
+
+<li class="toctree-l1"><a class="reference internal" href="MimicBrush.html">MimicBrush: Zero-shot Image Editing with Reference Imitation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="one_step_diffusion_with_distribution_matching_distillation.html">One-step Diffusion with Distribution Matching Distillation</a></li>
+
+
+
+
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Video Generation</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="Make_A_Video.html">Make A Video</a></li>
+<li class="toctree-l1"><a class="reference internal" href="VideoLDM.html">VideoLDM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="AnimateDiff.html">AnimateDiff</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Animate_Anyone.html">Animate Anyone</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreaMoving.html">DreaMoving</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreamPose.html">DreamPose: Fashion Image-to-Video Synthesis via Stable Diffusion</a></li>
+
+
+
+
+
+
+
+
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">3D Generation</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="NeRF.html">NeRF : Representing Scenes as Neural Radiance Fields for View Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="3DGS.html">3D Gaussian Splatting for Real-Time Radiance Field Rendering</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Point_E.html">Point-E: A System for Generating 3D Point Clouds from Complex Prompts (Arxiv 2022)</a></li>
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Shap-E.html">Shap-E</a></li>
+
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DreamFusion.html"><strong>DreamFusion</strong></a></li>
+<li class="toctree-l1"><a class="reference internal" href="magic-3d.html">Magic3D</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreamBooth3D.html">Dream Booth 3D</a></li>
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="zero123plus.html">Zero123++</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ProlificDreamer.html">ProlificDreamer: High-Fidelity and Diverse Text-to-3D Generation with Variational Score Distillation</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DreamGaussian.html">DreamGaussian</a></li>
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Coin3D.html">Coin3D</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Experiments</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="../experiments/js_exp.html">Synthetic Data with Stable Diffusion for Foliar Disease Classification</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../experiments/swjo_exp.html">Training DreamBooth on Naver Webtoon Face Dataset</a></li>
+</ul>
+
+    </div>
+</nav></div>
+    </div>
+  
+  
+  <div class="sidebar-primary-items__end sidebar-primary__section">
+  </div>
+  
+  <div id="rtd-footer-container"></div>
+
+
+      </div>
+      
+      <main id="main-content" class="bd-main" role="main">
+        
+        
+
+<div class="sbt-scroll-pixel-helper"></div>
+
+          <div class="bd-content">
+            <div class="bd-article-container">
+              
+              <div class="bd-header-article d-print-none">
+<div class="header-article-items header-article__inner">
+  
+    <div class="header-article-items__start">
+      
+        <div class="header-article-item"><label class="sidebar-toggle primary-toggle btn btn-sm" for="__primary" title="Toggle primary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
+  <span class="fa-solid fa-bars"></span>
+</label></div>
+      
+    </div>
+  
+  
+    <div class="header-article-items__end">
+      
+        <div class="header-article-item">
+
+<div class="article-header-buttons">
+
+
+
+
+
+<div class="dropdown dropdown-source-buttons">
+  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Source repositories">
+    <i class="fab fa-github"></i>
+  </button>
+  <ul class="dropdown-menu">
+      
+      
+      
+      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion" target="_blank"
+   class="btn btn-sm btn-source-repository-button dropdown-item"
+   title="Source repository"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fab fa-github"></i>
+  </span>
+<span class="btn__text-container">Repository</span>
+</a>
+</li>
+      
+      
+      
+      
+      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion/issues/new?title=Issue%20on%20page%20%2Fdocs/review/SDXL.html&body=Your%20issue%20content%20here." target="_blank"
+   class="btn btn-sm btn-source-issues-button dropdown-item"
+   title="Open an issue"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-lightbulb"></i>
+  </span>
+<span class="btn__text-container">Open issue</span>
+</a>
+</li>
+      
+  </ul>
+</div>
+
+
+
+
+
+
+<div class="dropdown dropdown-download-buttons">
+  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Download this page">
+    <i class="fas fa-download"></i>
+  </button>
+  <ul class="dropdown-menu">
+      
+      
+      
+      <li><a href="../../_sources/docs/review/SDXL.md" target="_blank"
+   class="btn btn-sm btn-download-source-button dropdown-item"
+   title="Download source file"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-file"></i>
+  </span>
+<span class="btn__text-container">.md</span>
+</a>
+</li>
+      
+      
+      
+      
+      <li>
+<button onclick="window.print()"
+  class="btn btn-sm btn-download-pdf-button dropdown-item"
+  title="Print to PDF"
+  data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-file-pdf"></i>
+  </span>
+<span class="btn__text-container">.pdf</span>
+</button>
+</li>
+      
+  </ul>
+</div>
+
+
+
+
+<button onclick="toggleFullScreen()"
+  class="btn btn-sm btn-fullscreen-button"
+  title="Fullscreen mode"
+  data-bs-placement="bottom" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-expand"></i>
+  </span>
+
+</button>
+
+
+
+<script>
+document.write(`
+  <button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light"></i>
+    <i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark"></i>
+    <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"></i>
+  </button>
+`);
+</script>
+
+
+<script>
+document.write(`
+  <button class="btn btn-sm pst-navbar-icon search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <i class="fa-solid fa-magnifying-glass fa-lg"></i>
+  </button>
+`);
+</script>
+<label class="sidebar-toggle secondary-toggle btn btn-sm" for="__secondary"title="Toggle secondary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <span class="fa-solid fa-list"></span>
+</label>
+</div></div>
+      
+    </div>
+  
+</div>
+</div>
+              
+              
+
+<div id="jb-print-docs-body" class="onlyprint">
+    <h1>SDXL</h1>
+    <!-- Table of contents -->
+    <div id="print-main-content">
+        <div id="jb-print-toc">
+            
+            <div>
+                <h2> Contents </h2>
+            </div>
+            <nav aria-label="Page">
+                <ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#abstract">Abstract</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">Introduction</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#micro-conditioning">2.2 Micro-Conditioning</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#conditioning-the-model-on-cropping-parameters">Conditioning the Model on Cropping Parameters</a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#multi-aspect-training">2.3 Multi-Aspect Training</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#improved-autoencoder">2.4 Improved Autoencoder</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#putting-everything-together">2.5 Putting Everything Together</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#refinement-stage">Refinement Stage</a></li>
+</ul>
+</li>
+</ul>
+            </nav>
+        </div>
+    </div>
+</div>
+
+              
+                
+<div id="searchbox"></div>
+                <article class="bd-article">
+                  
+  <div class="admonition-information admonition">
+<p class="admonition-title">Information</p>
+<ul class="simple">
+<li><p><strong>Title:</strong> SDXL: Improving Latent Diffusion Models for High-Resolution Image Synthesis</p></li>
+<li><p><strong>Reference</strong></p>
+<ul>
+<li><p>Paper: <a class="reference external" href="https://arxiv.org/abs/2307.01952">https://arxiv.org/abs/2307.01952</a></p></li>
+<li><p>Code: <a class="github reference external" href="https://github.com/Stability-AI/generative-models">Stability-AI/generative-models</a></p></li>
+</ul>
+</li>
+<li><p><strong>Author:</strong> Jun-Hyoung Lee</p></li>
+<li><p><strong>Last updated on May. 31. 2023</strong></p></li>
+</ul>
+</div>
+<section class="tex2jax_ignore mathjax_ignore" id="sdxl">
+<h1>SDXL<a class="headerlink" href="#sdxl" title="Permalink to this heading">#</a></h1>
+<section id="abstract">
+<h2>Abstract<a class="headerlink" href="#abstract" title="Permalink to this heading">#</a></h2>
+<p>SDXL은 T2I latent diffusion 모델이다. Stable Diffusion과 비교하면, SDXL은 세 배 더 큰 규모의 UNet을 포함한다. 더 많은 attention 블록과 더 큰 cross attention context 가 SDXL에서 두 번째 text encoder로 사용되면서 모델 파라미터가 증가했다. 다수의 새로운 conditioning 방법과 다양한 비율에 맞도록 SDXL을 학습할 수 있도록 설계했다. 또한 후처리 방식의 image to image 기술을 사용해 SDXL의 생성 샘플의 시각적인 fidelity를 향상시킨 refinement model을 소개한다. SDXL은 대폭 향상된 성능을 보여준다.</p>
+<figure class="align-default" id="sdxl-result">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/sdxl_result.png"><img alt="sdxl_result" class="bg-primary mb-1" src="../../_images/sdxl_result.png" style="width: 600px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 239 </span><span class="caption-text">SDXL result</span><a class="headerlink" href="#sdxl-result" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</section>
+<section id="introduction">
+<h2>Introduction<a class="headerlink" href="#introduction" title="Permalink to this heading">#</a></h2>
+<p>세 가지 주요 기능이라 볼 수 있는데,</p>
+<ol class="arabic simple">
+<li><p>3배 더 큰 UNet backbone,</p></li>
+<li><p>어떤 형태의 추가 감독(supervision)없는 간단하면서도 효과적인 추가의 conditioning 기술</p></li>
+<li><p>noising-denoising 과정을 적용해 시각적 품질을 향상하는 latent를 생성할 수 있는 별개의 diffusion 기반 img-to-img refinement 모델을 포함한다.</p></li>
+</ol>
+<figure class="align-default" id="figure-1">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/fig_11.png"><img alt="fig_1" class="bg-primary mb-1" src="../../_images/fig_11.png" style="width: 600px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 240 </span><span class="caption-text">Figure 1</span><a class="headerlink" href="#figure-1" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>그림 1에서 왼쪽 그림을 보면 추가의 refinement 단계를 추가해 성능을 높인 SDXL이 기존 SD보다 성능이 우수한 것을 확인할 수 있다. 오른쪽 그림은 아키텍처를 시각화했는데, 128x128 크기의 latent를 생성한다. 그 후 고해상도 refinement 모델을 활용하고 동일한 프롬프트를 활용해 첫 번째 단계에서 생성된 latent를 SDEdit을 적용한다. SDXL과 refinement 모델은 동일한 autoencoder를 사용한다.</p>
+<figure class="align-default" id="table-1">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/table_1.png"><img alt="table_1" class="bg-primary mb-1" src="../../_images/table_1.png" style="width: 600px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 241 </span><span class="caption-text">Table 1</span><a class="headerlink" href="#table-1" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>SD와 다르게 UNet 내의 transformer 블록의 heterogeneous 분포를 사용했다는 점이다. 테이블 1을 참고하면 highest feature level에서 transformer 블럭을 사용했고, lower level에서는 2, 10 개의 블럭을 사용했고, UNet에서 lowest level(8x downsampling)을 제거했다. text conditioning을 위한 pretrained 된 text encoder를 사용했다. 특히, CLIP Vit-L과 함께 OpenCLIP ViT-bigG를 사용했고, 채널 축에 두 번째 text encoder의 output을 concat 했다. 게다가 text input으로 모델에 condition을 주기 위해 cross attention 레이어를 사용했으며, 또 OpenCLIP로부터 pooled text embedding을 모델에 condition으로 추가했다. 이러한 변화는 UNet의 파라미터 사이즈가 2.6B로 증가했다. text encoder는 817M 파라미터를 가지고 있다.</p>
+</section>
+<section id="micro-conditioning">
+<h2>2.2 Micro-Conditioning<a class="headerlink" href="#micro-conditioning" title="Permalink to this heading">#</a></h2>
+<figure class="align-default" id="figure-2">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/fig_2.png"><img alt="fig_2" class="bg-primary mb-1" src="../../_images/fig_2.png" style="width: 600px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 242 </span><span class="caption-text">Figure 2</span><a class="headerlink" href="#figure-2" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>SD 1.4/1.5 같은 경우 512 픽셀 이하 크기의 이미지는 제외하고 학습을 시키거나, 너무 작은 이미지는 upscale하여 학습을 시켰다. 이는 학습할 때의 최소 크기가 정해지는 문제점이 발생한다. 따라서 성능을 저하시키거나, 일반화를 잘 못할 수 있다.</p>
+<p>그림 2를 보면 SDXL의 데이터 셋의 분포를 시각화해주는 그림이다. 제안된 size-conditiong 없이, 256x256 픽셀 크기 미만의 데이터가 39%나 달한다. upscale 하게 된다면 최종 결과물이 blur 한 결과를 가져와 좋지 않은 아티팩트가 생긴다.</p>
+<p>대신, 저자들은 원래의 이미지 해상도에서 UNet 모델에 condition을 주었다. 특히 어떠한 rescaling 전의 원래의 크기인  <span class="math notranslate nohighlight">\(c_\text{size}=(h_\text{original}, w_\text{original})\)</span>를 제공해 추가의 condition을 줄 수 있게 했다. UNet의 denoising 할 때의 condition으로 추가된다.</p>
+<p>Inference 때, 사용자가 size-conditioning을 통해 해상도를 정할 수 있다. 모델은 conditioning 크기를 해상도에 의존적인 이미지 feature과 연관시키도록 하는 방법을 학습했다.</p>
+<figure class="align-default" id="figure-3">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/fig_31.png"><img alt="fig_3" class="bg-primary mb-1" src="../../_images/fig_31.png" style="width: 600px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 243 </span><span class="caption-text">Figure 3</span><a class="headerlink" href="#figure-3" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>또 ImageNet으로 평가를 진행해 size-conditiong에 대한 우수성을 입증했다.</p>
+<figure class="align-default" id="table-2">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/table_2.png"><img alt="table_2" class="bg-primary mb-1" src="../../_images/table_2.png" style="width: 600px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 244 </span><span class="caption-text">Table 2</span><a class="headerlink" href="#table-2" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p><em>CIN-512-only</em> 는 512 미만의 이미지를 제외하고 학습을 시켰고(70k 장), <em>CIN-nocond</em> 는 모든 ImageNet 이미지를 사용했으며, <em>CIN-size-cond</em> 는 추가 size-condition을 사용했다. 표 2에서 보다시피 <em>CIN-size-cond</em> 모델이 FID, IS 모두 높은 성능을 보였다.</p>
+<section id="conditioning-the-model-on-cropping-parameters">
+<h3>Conditioning the Model on Cropping Parameters<a class="headerlink" href="#conditioning-the-model-on-cropping-parameters" title="Permalink to this heading">#</a></h3>
+<figure class="align-default" id="figure-4">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/fig_41.png"><img alt="fig_4" class="bg-primary mb-1" src="../../_images/fig_41.png" style="width: 600px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 245 </span><span class="caption-text">Figure 4</span><a class="headerlink" href="#figure-4" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>그림 4에서 SD 같은 경우 고양이 머리가 잘려진 결과를 얻었다. 이러한 이유는 학습할 때, random cropping으로 인해 생성되었기 때문이다.</p>
+<p>이러한 문제를 해결하기 위해, 간단한 효과적인 방법을 제안한다. 데이터를 loading 할 때, 균등하게  <span class="math notranslate nohighlight">\(c_\text{top}\)</span>과  <span class="math notranslate nohighlight">\(c_\text{left}\)</span> (높이 및 너비 축을 따라 왼쪽 상단 모서리에서 잘린 픽셀의 양을 지정하는 정수)를 샘플링한다. 그 후 Fourier feature 임베딩을 통해 conditioning 파라미터로써 모델에 입력한다. 위에서 언급한 size conditioning과 비슷하다. concat 된 임베딩  <span class="math notranslate nohighlight">\(c_\text{crop}\)</span>은 추가의 conditioning 파라미터로 사용된다.</p>
+<p>저자들은 LDM 뿐만 아니라 어떠한 DM에서도 사용될 수 있다고 강조한다. crop 및 size-conditioning은 쉽게 결합될 수 있다. 이러한 경우, crop 및 size-conditioning을 feature 임베딩을 채널 축에 concat 하고 UNet의 타임스텝 임베딩에 추가한다.</p>
+</section>
+</section>
+<section id="multi-aspect-training">
+<h2>2.3 Multi-Aspect Training<a class="headerlink" href="#multi-aspect-training" title="Permalink to this heading">#</a></h2>
+<p>일반적인 T2I 모델에서 결과물의 크기는 512x512, 1024x1024 로 얻을 수 있는데, 이는 현실 세계에서 부자연스럽다. 이유는 현실 세계에서는 다양한 크기, 비율을 가진 이미지가 많고, 풍경 같은 경우 16:9 비율의 크기를 지니고 있다.</p>
+<p>따라서, 다양한 비율을 동시에 다룰수 있도록 모델을 파인튜닝했다. 픽셀수를 1024x1024 만큼 수를 최대한 유지하면서 다양한 비율의 데이터를 사용했고, 64의 배수를 지니도록 했다.</p>
+<figure class="align-default" id="multi-aspect-ratio">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/multi_aspect_ratio.png"><img alt="multi_aspect_ratio" class="bg-primary mb-1" src="../../_images/multi_aspect_ratio.png" style="width: 600px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 246 </span><span class="caption-text">Multi aspect ratio</span><a class="headerlink" href="#multi-aspect-ratio" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>최적화 동안, 학습 배치는 동일한 버킷(같은 비율의 이미지들?)의 이미지로 구성되며, 각 훈련 스텝마다 버킷 크기를 번갈아 가며 사용했다. 추가적으로, 모델은 버킷 크기(혹은 타겟 크기)를 conditioning으로 주었으며, 위에서 언급한 size, crop conditioning과 유사하게 Fourier 공간에 임베딩되는  <span class="math notranslate nohighlight">\(c_\text{ar}=(h_\text{tgt}, w_\text{tgt})\)</span>  형태로 표현된다.</p>
+<p>실제로, 모델이 고정된 비율및 해상도의 데이터로 pretraining이 마친 후 파인튜닝 단계에서는 다양한 비율의 데이터로 학습했고, 채널 축으로 concat 하는 2.2절에서 소개한 conditioning 기술과 함께 결합했다. 이를 아래의 그림 16에서 코드로 확인할 수 있다.</p>
+</section>
+<section id="improved-autoencoder">
+<h2>2.4 Improved Autoencoder<a class="headerlink" href="#improved-autoencoder" title="Permalink to this heading">#</a></h2>
+<p>SD는 LDM 중 하나이고, autoencoder의 latent space를 학습한다. semantic composition은 LDM으로부터 표현되지만 저자들은 local, high frequency 디테일한 부분을 향상하고자 autoencoder를 향상했다. 끝으로, 원래의 SD를 사용한 autoencoder 아키텍처에서 더 큰 배치사이즈(256 vs 9)로 학습했고 추가로 exponential moving average를 사용한 가중치를 사용했다. 결과 autoencoder의 성능이 reconstruction 메트릭에 좋은 결과를 가져왔다.</p>
+<figure class="align-default" id="table-3">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/table_31.png"><img alt="table_3" class="bg-primary mb-1" src="../../_images/table_31.png" style="width: 600px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 247 </span><span class="caption-text">Table 3</span><a class="headerlink" href="#table-3" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</section>
+<section id="putting-everything-together">
+<h2>2.5 Putting Everything Together<a class="headerlink" href="#putting-everything-together" title="Permalink to this heading">#</a></h2>
+<p>학습 파라미터를 정리해주는 절입니다. diffusion time step은 1000 step을 사용했다. 우선, base model를 내부 데이터 셋으로 그림 2에 나와있는 높이-너비 분포에 맞게 학습을 시켰다. 600,000 step을 사용했으며, 256x256 사이즈로, 배치는 2048로, size &amp; crop conditioning을 사용했다. 그 후 512x512 이미지를 추가로 200,000 최적화 step으로 학습시켰고, 마침내 offset 노이즈 [11, 25] 0.05 수준과 함께 다중 비율 학습을 활용하여 ~ 1024x1024 영역의 다양한 비율로 모델을 학습했다.</p>
+<section id="refinement-stage">
+<h3>Refinement Stage<a class="headerlink" href="#refinement-stage" title="Permalink to this heading">#</a></h3>
+<figure class="align-default" id="figure-6">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/fig_61.png"><img alt="fig_6" class="bg-primary mb-1" src="../../_images/fig_61.png" style="width: 600px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 248 </span><span class="caption-text">Figure 6</span><a class="headerlink" href="#figure-6" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>경험적으로, 그림 6처럼 특정 부분 퀄리티가 낮은 샘플의 결과를 찾았다. 왼쪽 그림이 refinement stage 적용 전, 오른쪽 그림이 refinement stage를 적용한 그림이다.</p>
+<p>이를 해결하기 위해, 고품질, 고해상도 데이터에 특화된 latent space 내에서 별도의 LDM을 학습했다. 기본 모델의 샘플에 대해 SDEdit에서 도입한 노이즈 제거 과정을 사용했다. eDiff-I 방법을 따랐으며, 이를 첫 200 노이즈 스케일에 refinement 모델을 사용했다. inference에서, base SDXL에서 latent를 추출하고 바로 diffuse와 denoise를 refinement 모델에 넣었다. 이 스텝은 선택이지만 배경 및 사람 얼굴과 같은 디테일에서 향상된 결과(그림 6, 13)를 얻을 수 있었다.</p>
+<figure class="align-default" id="figure-13">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/fig_131.png"><img alt="fig_13" class="bg-primary mb-1" src="../../_images/fig_131.png" style="width: 600px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 249 </span><span class="caption-text">Figure 13</span><a class="headerlink" href="#figure-13" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</section>
+</section>
+</section>
+
+    <script type="text/x-thebe-config">
+    {
+        requestKernel: true,
+        binderOptions: {
+            repo: "binder-examples/jupyter-stacks-datascience",
+            ref: "master",
+        },
+        codeMirrorConfig: {
+            theme: "abcdef",
+            mode: "python"
+        },
+        kernelOptions: {
+            name: "python3",
+            path: "./docs\review"
+        },
+        predefinedOutput: true
+    }
+    </script>
+    <script>kernelName = 'python3'</script>
+
+                </article>
+              
+
+              
+              
+              
+              
+                <footer class="prev-next-footer d-print-none">
+                  
+<div class="prev-next-area">
+    <a class="left-prev"
+       href="SDEdit.html"
+       title="previous page">
+      <i class="fa-solid fa-angle-left"></i>
+      <div class="prev-next-info">
+        <p class="prev-next-subtitle">previous</p>
+        <p class="prev-next-title">SDEdit</p>
+      </div>
+    </a>
+    <a class="right-next"
+       href="t2i_adapter.html"
+       title="next page">
+      <div class="prev-next-info">
+        <p class="prev-next-subtitle">next</p>
+        <p class="prev-next-title">T2I-Adapter</p>
+      </div>
+      <i class="fa-solid fa-angle-right"></i>
+    </a>
+</div>
+                </footer>
+              
+            </div>
+            
+            
+              
+                <div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
+
+
+  <div class="sidebar-secondary-item">
+  <div class="page-toc tocsection onthispage">
+    <i class="fa-solid fa-list"></i> Contents
+  </div>
+  <nav class="bd-toc-nav page-toc">
+    <ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#abstract">Abstract</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">Introduction</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#micro-conditioning">2.2 Micro-Conditioning</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#conditioning-the-model-on-cropping-parameters">Conditioning the Model on Cropping Parameters</a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#multi-aspect-training">2.3 Multi-Aspect Training</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#improved-autoencoder">2.4 Improved Autoencoder</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#putting-everything-together">2.5 Putting Everything Together</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#refinement-stage">Refinement Stage</a></li>
+</ul>
+</li>
+</ul>
+  </nav></div>
+
+</div></div>
+              
+            
+          </div>
+          <footer class="bd-footer-content">
+            
+<div class="bd-footer-content__inner container">
+  
+  <div class="footer-item">
+    
+<p class="component-author">
+By PseudoLab
+</p>
+
+  </div>
+  
+  <div class="footer-item">
+    
+
+  <p class="copyright">
+    
+      © Copyright 2022.
+      <br/>
+    
+  </p>
+
+  </div>
+  
+  <div class="footer-item">
+    
+  </div>
+  
+  <div class="footer-item">
+    
+  </div>
+  
+</div>
+          </footer>
+        
+
+      </main>
+    </div>
+  </div>
+  
+  <!-- Scripts loaded after <body> so the DOM is not blocked -->
+  <script src="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b"></script>
+<script src="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b"></script>
+
+  <footer class="bd-footer">
+  </footer>
+  </body>
 </html>
\ No newline at end of file
diff --git a/docs/review/Shap-E.html b/docs/review/Shap-E.html
old mode 100644
new mode 100755
index d68bc044..54120d48
--- a/docs/review/Shap-E.html
+++ b/docs/review/Shap-E.html
@@ -1,1076 +1,1096 @@
-
-
-<!DOCTYPE html>
-
-
-<html lang="en" data-content_root="" >
-
-  <head>
-    <meta charset="utf-8" />
-    <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.18.1: http://docutils.sourceforge.net/" />
-
-    <title>Shap-E &#8212; Text-to-Image Generation-feat-Diffusion</title>
-  
-  
-  
-  <script data-cfasync="false">
-    document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
-    document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
-  </script>
-  
-  <!-- Loaded before other Sphinx assets -->
-  <link href="../../_static/styles/theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-<link href="../../_static/styles/bootstrap.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-<link href="../../_static/styles/pydata-sphinx-theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-
-  
-  <link href="../../_static/vendor/fontawesome/6.5.2/css/all.min.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-  <link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.woff2" />
-<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.woff2" />
-<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.woff2" />
-
-    <link rel="stylesheet" type="text/css" href="../../_static/pygments.css" />
-    <link rel="stylesheet" href="../../_static/styles/sphinx-book-theme.css?digest=14f4ca6b54d191a8c7657f6c759bf11a5fb86285" type="text/css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/togglebutton.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/mystnb.4510f1fc1dee50b3e5859aac5469c37c29e427902b24a333a5f9fcb2f0b3ac41.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/sphinx-thebe.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/design-style.4045f2051d55cab465a707391d5b2007.min.css" />
-  
-  <!-- Pre-loaded scripts that we'll load fully later -->
-  <link rel="preload" as="script" href="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b" />
-<link rel="preload" as="script" href="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b" />
-  <script src="../../_static/vendor/fontawesome/6.5.2/js/all.min.js?digest=dfe6caa3a7d634c4db9b"></script>
-
-    <script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
-    <script src="../../_static/jquery.js"></script>
-    <script src="../../_static/underscore.js"></script>
-    <script src="../../_static/_sphinx_javascript_frameworks_compat.js"></script>
-    <script src="../../_static/doctools.js"></script>
-    <script src="../../_static/clipboard.min.js"></script>
-    <script src="../../_static/copybutton.js"></script>
-    <script src="../../_static/scripts/sphinx-book-theme.js?digest=5a5c038af52cf7bc1a1ec88eea08e6366ee68824"></script>
-    <script>let toggleHintShow = 'Click to show';</script>
-    <script>let toggleHintHide = 'Click to hide';</script>
-    <script>let toggleOpenOnPrint = 'true';</script>
-    <script src="../../_static/togglebutton.js"></script>
-    <script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
-    <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
-    <script src="../../_static/design-tabs.js"></script>
-    <script>const THEBE_JS_URL = "https://unpkg.com/thebe@0.8.2/lib/index.js"
-const thebe_selector = ".thebe,.cell"
-const thebe_selector_input = "pre"
-const thebe_selector_output = ".output, .cell_output"
-</script>
-    <script async="async" src="../../_static/sphinx-thebe.js"></script>
-    <script>window.MathJax = {"options": {"processHtmlClass": "tex2jax_process|mathjax_process|math|output_area"}}</script>
-    <script defer="defer" src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
-    <script>DOCUMENTATION_OPTIONS.pagename = 'docs/review/Shap-E';</script>
-    <link rel="shortcut icon" href="../../_static/PseudoLab_logo.png"/>
-    <link rel="index" title="Index" href="../../genindex.html" />
-    <link rel="search" title="Search" href="../../search.html" />
-    <link rel="next" title="DreamFusion" href="DreamFusion.html" />
-    <link rel="prev" title="Point-E: A System for Generating 3D Point Clouds from Complex Prompts (Arxiv 2022)" href="Point_E.html" />
-  <meta name="viewport" content="width=device-width, initial-scale=1"/>
-  <meta name="docsearch:language" content="en"/>
-  </head>
-  
-  
-  <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
-
-  
-  
-  <div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
-  
-  <div id="pst-scroll-pixel-helper"></div>
-  
-  <button type="button" class="btn rounded-pill" id="pst-back-to-top">
-    <i class="fa-solid fa-arrow-up"></i>Back to top</button>
-
-  
-  <input type="checkbox"
-          class="sidebar-toggle"
-          id="pst-primary-sidebar-checkbox"/>
-  <label class="overlay overlay-primary" for="pst-primary-sidebar-checkbox"></label>
-  
-  <input type="checkbox"
-          class="sidebar-toggle"
-          id="pst-secondary-sidebar-checkbox"/>
-  <label class="overlay overlay-secondary" for="pst-secondary-sidebar-checkbox"></label>
-  
-  <div class="search-button__wrapper">
-    <div class="search-button__overlay"></div>
-    <div class="search-button__search-container">
-<form class="bd-search d-flex align-items-center"
-      action="../../search.html"
-      method="get">
-  <i class="fa-solid fa-magnifying-glass"></i>
-  <input type="search"
-         class="form-control"
-         name="q"
-         id="search-input"
-         placeholder="Search this book..."
-         aria-label="Search this book..."
-         autocomplete="off"
-         autocorrect="off"
-         autocapitalize="off"
-         spellcheck="false"/>
-  <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
-</form></div>
-  </div>
-
-  <div class="pst-async-banner-revealer d-none">
-  <aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
-</div>
-
-  
-    <header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
-    </header>
-  
-
-  <div class="bd-container">
-    <div class="bd-container__inner bd-page-width">
-      
-      
-      
-      <div class="bd-sidebar-primary bd-sidebar">
-        
-
-  
-  <div class="sidebar-header-items sidebar-primary__section">
-    
-    
-    
-    
-  </div>
-  
-    <div class="sidebar-primary-items__start sidebar-primary__section">
-        <div class="sidebar-primary-item">
-
-  
-    
-  
-
-<a class="navbar-brand logo" href="../../intro.html">
-  
-  
-  
-  
-  
-    
-    
-      
-    
-    
-    <img src="../../_static/PseudoLab_logo.png" class="logo__image only-light" alt="Text-to-Image Generation-feat-Diffusion - Home"/>
-    <script>document.write(`<img src="../../_static/PseudoLab_logo.png" class="logo__image only-dark" alt="Text-to-Image Generation-feat-Diffusion - Home"/>`);</script>
-  
-  
-</a></div>
-        <div class="sidebar-primary-item"><nav class="bd-links" id="bd-docs-nav" aria-label="Main">
-    <div class="bd-toc-item navbar-nav active">
-        
-        <ul class="nav bd-sidenav bd-sidenav__home-link">
-            <li class="toctree-l1">
-                <a class="reference internal" href="../../intro.html">
-                    Welcome to PseudoDiffusers!!
-                </a>
-            </li>
-        </ul>
-        <p aria-level="2" class="caption" role="heading"><span class="caption-text">Preliminary Works</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="vae.html">VAE</a></li>
-<li class="toctree-l1"><a class="reference internal" href="gan.html">GAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DDPM.html">DDPM</a></li>
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="DDIM.html">DDIM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="A_Study_on_the_Evaluation_of_Generative_Models.html">A Study on the Evaluation of Generative Models</a></li>
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Image Generation</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="cycleGAN.html">CycleGAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="StyleGAN.html">StyleGAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="diffusion_beats_GANs.html">Diffusion Models Beat GANs on Image Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="dalle.html">DALL-E</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DALLE2.html">DALL-E 2</a></li>
-<li class="toctree-l1"><a class="reference internal" href="dreambooth.html">DreamBooth</a></li>
-<li class="toctree-l1"><a class="reference internal" href="ControlNet.html">ControlNet</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Latent_Diffusion_Model.html">Introduction</a></li>
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="Textual_Inversion.html">Textual Inversion</a></li>
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="CustomDiffusion.html">Custom Diffusion</a></li>
-<li class="toctree-l1"><a class="reference internal" href="LoRA.html">LoRA</a></li>
-
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="I-DDPM.html">I-DDPM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="StyO.html">StyO</a></li>
-<li class="toctree-l1"><a class="reference internal" href="imagen.html">Imagen</a></li>
-<li class="toctree-l1"><a class="reference internal" href="imagen_editor.html">Imagen Editor</a></li>
-<li class="toctree-l1"><a class="reference internal" href="SDEdit.html">SDEdit</a></li>
-<li class="toctree-l1"><a class="reference internal" href="SDXL.html">SDXL</a></li>
-<li class="toctree-l1"><a class="reference internal" href="t2i_adapter.html">T2I-Adapter</a></li>
-<li class="toctree-l1"><a class="reference internal" href="IP_Adapter.html">IP-Adapter</a></li>
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="HyperDreamBooth.html">HyperDreamBooth</a></li>
-<li class="toctree-l1"><a class="reference internal" href="CM3leon.html">CM3leon</a></li>
-
-<li class="toctree-l1"><a class="reference internal" href="Synthetic_Data_from_Diffusion_Models_Improves_ImageNet_Classification.html">Synthetic Data from Diffusion Models Improves ImageNet Classification</a></li>
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="GLIDE.html">GLIDE</a></li>
-<li class="toctree-l1"><a class="reference internal" href="BBDM.html">BBDM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Your_Diffusion_Model_is_Secretly_a_Zero_Shot_Classifier.html">Your Diffusion Model is Secretly a Zero-Shot Classifier</a></li>
-<li class="toctree-l1"><a class="reference internal" href="progressive_distillation.html">Progressive Distillation for Fast Sampling of Diffusion Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="ConceptLab.html">ConceptLab</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Diffusion_models_already_have_a_Semantic_Latent_Space.html">Diffusion Models already have a Semantic Latent Space</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Muse.html">Muse</a></li>
-
-
-<li class="toctree-l1"><a class="reference internal" href="GIGAGAN.html">Scaling up GANs for Text-to-Image Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="consistency_models.html">Consistency Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="latent_consistency_models.html">Latent Consistency Models</a></li>
-
-<li class="toctree-l1"><a class="reference internal" href="LLM_grounded_Diffusion.html">LLM Grounded Diffusion</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DiT.html">DiT</a></li>
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="one-step-image-translation.html">One-Step Image Translation with Text-to-Image Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="LCM-LoRA.html">LCM-LoRA: A Universal Stable-Diffusion Acceleration Module</a></li>
-
-
-<li class="toctree-l1"><a class="reference internal" href="MimicBrush.html">MimicBrush: Zero-shot Image Editing with Reference Imitation</a></li>
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Video Generation</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="Make_A_Video.html">Make A Video</a></li>
-<li class="toctree-l1"><a class="reference internal" href="VideoLDM.html">VideoLDM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="AnimateDiff.html">AnimateDiff</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Animate_Anyone.html">Animate Anyone</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreaMoving.html">DreaMoving</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreamPose.html">DreamPose: Fashion Image-to-Video Synthesis via Stable Diffusion</a></li>
-
-
-
-
-
-
-
-
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">3D Generation</span></p>
-<ul class="current nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="NeRF.html">NeRF : Representing Scenes as Neural Radiance Fields for View Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="3DGS.html">3D Gaussian Splatting for Real-Time Radiance Field Rendering</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Point_E.html">Point-E: A System for Generating 3D Point Clouds from Complex Prompts (Arxiv 2022)</a></li>
-
-
-
-
-
-
-
-
-<li class="toctree-l1 current active"><a class="current reference internal" href="#">Shap-E</a></li>
-
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="DreamFusion.html"><strong>DreamFusion</strong></a></li>
-<li class="toctree-l1"><a class="reference internal" href="magic-3d.html">Magic3D</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreamBooth3D.html">Dream Booth 3D</a></li>
-
-
-
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Experiments</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="../experiments/js_exp.html">Synthetic Data with Stable Diffusion for Foliar Disease Classification</a></li>
-<li class="toctree-l1"><a class="reference internal" href="../experiments/swjo_exp.html">Training DreamBooth on Naver Webtoon Face Dataset</a></li>
-</ul>
-
-    </div>
-</nav></div>
-    </div>
-  
-  
-  <div class="sidebar-primary-items__end sidebar-primary__section">
-  </div>
-  
-  <div id="rtd-footer-container"></div>
-
-
-      </div>
-      
-      <main id="main-content" class="bd-main" role="main">
-        
-        
-
-<div class="sbt-scroll-pixel-helper"></div>
-
-          <div class="bd-content">
-            <div class="bd-article-container">
-              
-              <div class="bd-header-article d-print-none">
-<div class="header-article-items header-article__inner">
-  
-    <div class="header-article-items__start">
-      
-        <div class="header-article-item"><label class="sidebar-toggle primary-toggle btn btn-sm" for="__primary" title="Toggle primary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
-  <span class="fa-solid fa-bars"></span>
-</label></div>
-      
-    </div>
-  
-  
-    <div class="header-article-items__end">
-      
-        <div class="header-article-item">
-
-<div class="article-header-buttons">
-
-
-
-
-
-<div class="dropdown dropdown-source-buttons">
-  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Source repositories">
-    <i class="fab fa-github"></i>
-  </button>
-  <ul class="dropdown-menu">
-      
-      
-      
-      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion" target="_blank"
-   class="btn btn-sm btn-source-repository-button dropdown-item"
-   title="Source repository"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fab fa-github"></i>
-  </span>
-<span class="btn__text-container">Repository</span>
-</a>
-</li>
-      
-      
-      
-      
-      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion/issues/new?title=Issue%20on%20page%20%2Fdocs/review/Shap-E.html&body=Your%20issue%20content%20here." target="_blank"
-   class="btn btn-sm btn-source-issues-button dropdown-item"
-   title="Open an issue"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-lightbulb"></i>
-  </span>
-<span class="btn__text-container">Open issue</span>
-</a>
-</li>
-      
-  </ul>
-</div>
-
-
-
-
-
-
-<div class="dropdown dropdown-download-buttons">
-  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Download this page">
-    <i class="fas fa-download"></i>
-  </button>
-  <ul class="dropdown-menu">
-      
-      
-      
-      <li><a href="../../_sources/docs/review/Shap-E.md" target="_blank"
-   class="btn btn-sm btn-download-source-button dropdown-item"
-   title="Download source file"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-file"></i>
-  </span>
-<span class="btn__text-container">.md</span>
-</a>
-</li>
-      
-      
-      
-      
-      <li>
-<button onclick="window.print()"
-  class="btn btn-sm btn-download-pdf-button dropdown-item"
-  title="Print to PDF"
-  data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-file-pdf"></i>
-  </span>
-<span class="btn__text-container">.pdf</span>
-</button>
-</li>
-      
-  </ul>
-</div>
-
-
-
-
-<button onclick="toggleFullScreen()"
-  class="btn btn-sm btn-fullscreen-button"
-  title="Fullscreen mode"
-  data-bs-placement="bottom" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-expand"></i>
-  </span>
-
-</button>
-
-
-
-<script>
-document.write(`
-  <button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light"></i>
-    <i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark"></i>
-    <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"></i>
-  </button>
-`);
-</script>
-
-
-<script>
-document.write(`
-  <button class="btn btn-sm pst-navbar-icon search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <i class="fa-solid fa-magnifying-glass fa-lg"></i>
-  </button>
-`);
-</script>
-<label class="sidebar-toggle secondary-toggle btn btn-sm" for="__secondary"title="Toggle secondary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <span class="fa-solid fa-list"></span>
-</label>
-</div></div>
-      
-    </div>
-  
-</div>
-</div>
-              
-              
-
-<div id="jb-print-docs-body" class="onlyprint">
-    <h1>Shap-E</h1>
-    <!-- Table of contents -->
-    <div id="print-main-content">
-        <div id="jb-print-toc">
-            
-            <div>
-                <h2> Contents </h2>
-            </div>
-            <nav aria-label="Page">
-                <ul class="visible nav section-nav flex-column">
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#">Shap-E</a></li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#abstract">0. Abstract</a></li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">1. Introduction</a></li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#background">2. Background</a><ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#neural-radiance-fields-nerf">2.1 Neural Radiance Fields (NeRF)</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#signed-distance-functions-and-texture-field-stf">2.2 Signed Distance Functions and Texture Field (STF)</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#diffusion-models">2.3 Diffusion Models</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#latent-diffusion">2.4 Latent Diffusion</a></li>
-</ul>
-</li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#related-work">3. Related Work</a></li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#method">4. Method</a><ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#d-encoder">4.1 3D Encoder</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#decoding-with-nerf-rendering">4.1.1 Decoding with NeRF Rendering</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#decoding-with-stf-rendering">4.1.2 Decoding with STF Rendering</a></li>
-</ul>
-</li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#id1">4.2 Latent Diffusion</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#dataset">4.3 Dataset</a></li>
-</ul>
-</li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#result">5. Result</a><ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#encoder-evaluation">5.1 Encoder Evaluation</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#comparison-to-point-e">5.2 Comparison to Point-E</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#comparison-to-other-methods">5.3 Comparison to Other Methods</a></li>
-</ul>
-</li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#limitations-and-future-work">6. Limitations and Future Work</a></li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#conclusion">7. Conclusion</a></li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#acknowledgements">8. Acknowledgements</a></li>
-</ul>
-
-            </nav>
-        </div>
-    </div>
-</div>
-
-              
-                
-<div id="searchbox"></div>
-                <article class="bd-article">
-                  
-  <div class="admonition-information admonition">
-<p class="admonition-title">Information</p>
-<ul class="simple">
-<li><p><strong>Title:</strong> Shap-E: Generating Conditional 3D Implicit Function</p></li>
-<li><p><strong>Reference</strong></p>
-<ul>
-<li><p>Paper: <a class="reference external" href="https:arxiv.org/abs/2305.02463">https:arxiv.org/abs/2305.02463</a></p></li>
-<li><p>Code: <a class="reference external" href="https:github.com/openai/shap-e">https:github.com/openai/shap-e</a></p></li>
-</ul>
-</li>
-<li><p><strong>Author:</strong> Kyeongmin Yu</p></li>
-<li><p><strong>Last updated on July. 18. 2024</strong></p></li>
-</ul>
-</div>
-<section class="tex2jax_ignore mathjax_ignore" id="shap-e">
-<h1>Shap-E<a class="headerlink" href="#shap-e" title="Permalink to this heading">#</a></h1>
-<figure class="align-default" id="id2">
-<a class="bg-light mb-1 reference internal image-reference" href="../../_images/figure1.png"><img alt="figure1" class="bg-light mb-1" src="../../_images/figure1.png" style="width: 600px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 676 </span><span class="caption-text">Shap-E를 통해 생성한 3D assets</span><a class="headerlink" href="#id2" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</section>
-<section class="tex2jax_ignore mathjax_ignore" id="abstract">
-<h1>0. Abstract<a class="headerlink" href="#abstract" title="Permalink to this heading">#</a></h1>
-<blockquote>
-<div><p>📌 <strong>논문요약</strong><br />
-2023년 openai의 <a class="reference external" href="https:arxiv.org/search/cs?searchtype=author&amp;query=Jun,+H">Heewoo Jun</a>, <a class="reference external" href="https:arxiv.org/search/cs?searchtype=author&amp;query=Nichol,+A">Alex Nichol</a> 가 발표한 논문입니다. official code는 <a class="reference external" href="https:github.com/openai/shap-e/tree/main">github</a>에서, diffusers를 활용한 코드는 <a class="reference external" href="https:huggingface.co/docs/diffusers/en/api/pipelines/shap_e">huggingface</a>에서 확인할 수 있습니다. <br />
-<strong>목적 -</strong> 조건부 3D assets 생성 <br />
-<strong>생성방식 -</strong> encoder를 통해 implicit function의 parameter 형태로 표현한 후, 이를 diffusion model의 조건으로 사용함으로써 conditional 3D assets을 생성할 수 있도록 했다.<br />
-<strong>차별점 -</strong> texture mesh 나 NeRF 모두 생성 가능한 implicit function의 parameters를 직접적으로 생성할 수 있다. (다른 3D 생성 모델의 경우 단일 표현만 가능한 경우가 많다고 합니다.)</p>
-</div></blockquote>
-</section>
-<section class="tex2jax_ignore mathjax_ignore" id="introduction">
-<h1>1. Introduction<a class="headerlink" href="#introduction" title="Permalink to this heading">#</a></h1>
-<p>implicit neural representations (INRs)는 3D assets을 인코딩하는 방식으로 많이 사용된다. 3D asset을 표현하기 위해 INRs는 주로 3D coordinate를 location specific info(density, color)로 맵핑한다. 일반적으로 INRs는 화질에 영향을 받지 않는데 이는 고정된 grid나 sequence가 아닌 arbitrary input points를 처리할 수 있기 때문이다. 덕분에 end-to-end 미분이 가능하다. INRs은 이후 다양한 downstream applications도 가능하게 한다. 본 논문에서는 2가지 타입의 INRs을 다룬다.</p>
-<ul class="simple">
-<li><p><strong>Neural Radiamce Field (NeRF)</strong> - 3D scene을 function mapping으로 표현.</p>
-<ul>
-<li><p>coordinate, viewing direction <span class="math notranslate nohighlight">\(\rightarrow\)</span> density, colors along camera rays</p></li>
-</ul>
-</li>
-<li><p><strong>textured 3D mesh</strong> (DMTet, GET3D)</p>
-<ul>
-<li><p>coordinate <span class="math notranslate nohighlight">\(\rightarrow\)</span> colors, signed distances, vertex offsets</p></li>
-<li><p>INRs는 삼각메쉬를 생성할 때 사용될 수 있다.</p></li>
-</ul>
-</li>
-</ul>
-<p>이미지, 비디오, 오디오, 3D assets 생성에 관한 다양한 연구가 있지만 downstream application에서 사용하기 편한 형태로 3D assets을 표현하는 방법에 대한 연구는 부족하다. 본 논문은 단일 representation으로 부터 두가지 형태로 rendering 가능하게 했다는 특징이 있다.</p>
-</section>
-<section class="tex2jax_ignore mathjax_ignore" id="background">
-<h1>2. Background<a class="headerlink" href="#background" title="Permalink to this heading">#</a></h1>
-<section id="neural-radiance-fields-nerf">
-<h2>2.1 Neural Radiance Fields (NeRF)<a class="headerlink" href="#neural-radiance-fields-nerf" title="Permalink to this heading">#</a></h2>
-<p>Mildenhall et al. 는 아래와 같이 NeRF(3D scene을 implicit function으로 표현하는 방법)를 제안했다.</p>
-<div class="math notranslate nohighlight">
-\[
-F_{\Theta} : (\mathbf{x},\mathbf d)↦(\mathbf c,\sigma) \tag{1}
-\]</div>
-<p><span class="math notranslate nohighlight">\(x\)</span> 는 3D 공간 좌표, <span class="math notranslate nohighlight">\(d\)</span> 는 3D 시야 각도, <span class="math notranslate nohighlight">\(c\)</span> 는 RGB, <span class="math notranslate nohighlight">\(\sigma\)</span> 는 density(<span class="math notranslate nohighlight">\(\ge 0\)</span>) 이다. <span class="math notranslate nohighlight">\(F_\Theta\)</span> 는 편의를 위해 <span class="math notranslate nohighlight">\(\sigma(x)\)</span> 와 <span class="math notranslate nohighlight">\(c(x,d)\)</span> 두개의 식으로 나누어 표현했다.</p>
-<p>새로운 시야에서 바라본 scene 을 렌더링하기 위해서, 아래와 같이 각 ray에 맞는 color값을 계산한다.</p>
-<div class="math notranslate nohighlight">
-\[
-\hat C(\mathbf r)=\int^\infty_0 T(t)\sigma(\mathbf R(t))\mathbf c(\mathbf r(t),\mathbf d)dt, \space \text{where} \space T(t)=\text{exp}\Big(-\int^\infty_0 \sigma(\mathbf r(s))ds\Big) \tag{2}
-\]</div>
-<ul>
-<li><p>수식(2) 설명</p>
-<figure class="align-default" id="id3">
-<a class="bg-light mb-1 reference internal image-reference" href="../../_images/IMG_4859.png"><img alt="figure1" class="bg-light mb-1" src="../../_images/IMG_4859.png" style="width: 800px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 677 </span><span class="caption-text">수식 (2) 보충설명</span><a class="headerlink" href="#id3" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</li>
-</ul>
-<p>위의 적분식을 아래와 같이 discrete sum으로 간략화 할 수 있다.</p>
-<div class="math notranslate nohighlight">
-\[
-\hat C(\mathbf r)=\sum^N_{i=1} T_i(1-\text{exp}(-\sigma(\mathbf r(t_i))\delta_t))\mathbf c (\mathbf r(t_i),\mathbf d), \space \text{where} \space T_i=\text{exp}\Big(-\sum^{i-1}_{j=1} \sigma(\mathbf r(t_j))\delta_j\Big) \tag{3}
-\]</div>
-<p>구간을 나누는 방식은 중요한 부분으로 coarse와 fine 두단계로 나누어 더 세부적으로 sequence를 나눈다. 2개의 NeRF 모델을 이용하여 2번의 sampling을 한다.</p>
-<div class="math notranslate nohighlight">
-\[
-w_i \sim T_i(1-\text{exp}(-\sigma(\mathbf r(t_i))\delta_i))\tag{4}
-\]</div>
-<p>본 논문에서는 ray의 transmittance를 아래와 같이 추가적으로 정의하였다. 이는 직관적으로 ray의 alpha값이나 opacity의 총합에 해당한다.</p>
-<div class="math notranslate nohighlight">
-\[
-\hat T(\mathbf r)=1-\text{exp}\Big(-\sum^N_{i=1}\sigma(\mathbf r(t_i))\delta_i\Big)\tag{5}
-\]</div>
-<ul>
-<li><p>수식(5) 설명</p>
-<figure class="align-default" id="id4">
-<a class="bg-light mb-1 reference internal image-reference" href="../../_images/IMG_4860.png"><img alt="figure2" class="bg-light mb-1" src="../../_images/IMG_4860.png" style="width: 400px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 678 </span><span class="caption-text">수식 (5) 보충 설명</span><a class="headerlink" href="#id4" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</li>
-</ul>
-</section>
-<section id="signed-distance-functions-and-texture-field-stf">
-<h2>2.2 Signed Distance Functions and Texture Field (STF)<a class="headerlink" href="#signed-distance-functions-and-texture-field-stf" title="Permalink to this heading">#</a></h2>
-<p>본 논문에서 STF는 signed distances와 texture colors 두가지 모두를 생성하는 implicit function을 의미한다. 이번 섹션에서는 이러한 implicit function이 meshes를 구성하고 rendering을 만드는 방식을 설명한다.</p>
-<figure class="align-default" id="id5">
-<a class="bg-light mb-1 reference internal image-reference" href="../../_images/IMG_4872.png"><img alt="figure3" class="bg-light mb-1" src="../../_images/IMG_4872.png" style="width: 800px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 679 </span><span class="caption-text">point cloud, voxel, polygon mesh의 비교 <br />
-source - 3D Vision with Transformers: A Survey</span><a class="headerlink" href="#id5" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>**Signed Distance Functions (SDFs)**는 3D shape을 scaler field에서 표현하는 전통적인 방법중 하나다. 특히 SDF <span class="math notranslate nohighlight">\(f\)</span>는 coordinate <span class="math notranslate nohighlight">\(x\)</span>를 scaler 로 mapping한다. (<span class="math notranslate nohighlight">\(f(\mathbf x)=d\)</span>) 여기서 <span class="math notranslate nohighlight">\(d\)</span>는 특정 위치 <span class="math notranslate nohighlight">\(x\)</span>에서 가장 가까운 물체의 표면까지의 거리를 말한다. <span class="math notranslate nohighlight">\(d\)</span>가 0보다 작으면 해당 물체 외부임을 의미한다. 이러한 정의에 따라 <span class="math notranslate nohighlight">\(f(\mathbf x)=0\)</span> 일때는 물체의 표면을 의미한다. <span class="math notranslate nohighlight">\(\text{sign}(d)\)</span>는 표면에 따른 normal orientation을 의미한다.</p>
-<ul class="simple">
-<li><p>DMTet : SDFs를 활용하여 3D shape을 생성하는 모델. coarse voxel을 입력으로 받아 synthesized shape(SDF, tetrahedral)을 만들어 낸다.  DMTet의 출력은 dense spatial grid에서의 각 vertex <span class="math notranslate nohighlight">\(v_i\)</span>별 SDF 값 <span class="math notranslate nohighlight">\(s_i\)</span>와 displacement <span class="math notranslate nohighlight">\(\vartriangle v_i\)</span> 이다. 이후 설명 생략</p></li>
-<li><p>GET3D : DMTet에 추가적인 texture 정보까지도 생성하는 모델이다. 물체의 표면의 지점 <span class="math notranslate nohighlight">\(p\)</span> 마다 RGB color를 예측하는 모델을 따로 학습시켜 texture를 만들었다. 이후 설명 생략</p></li>
-</ul>
-<figure class="align-default" id="id6">
-<a class="bg-light mb-1 reference internal image-reference" href="../../_images/IMG_4874.png"><img alt="figure4" class="bg-light mb-1" src="../../_images/IMG_4874.png" style="width: 800px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 680 </span><span class="caption-text">texture, bump, displacement의 비교 <br />
-source - <a class="reference external" href="https:grabcad.com/tutorials/adding-textures-to-3d-models-texture-bump-and-displacement-mapping-how-to-make-photo-realistic-models">tutorials in grabcad</a></span><a class="headerlink" href="#id6" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>bump는 lighting 을 고려하여 texture가 더 자연스러워 졌지만 구의 표면을 보면 물체의 형태가 실제로 바뀐것은 아님을 알수 있다.displacement를 보면 texture를 따라 물체의 표면이 변화된것을 볼 수 있다.</p>
-</section>
-<section id="diffusion-models">
-<h2>2.3 Diffusion Models<a class="headerlink" href="#diffusion-models" title="Permalink to this heading">#</a></h2>
-<p>본 논문에서 활용한 diffusion model은 DDPM으로 diffusion process(noising process)를 data sample <span class="math notranslate nohighlight">\(x_0\)</span> 에 gaussian noise를 서서히 추가하여 완전한 노이즈가 되어가는 과정 <span class="math notranslate nohighlight">\((x_1,x_2,…x_T)\)</span> 으로 표현했다. 일반적으로 <span class="math notranslate nohighlight">\(x_T\)</span>는 gaussian noise와 구분불가능한 상태로 상정한다. 해당 과정은 sequential하게 진행되지만 활용시에는 아래의 식과 같이 특정 단계로 바로 “jump”하는 방식을 이용한다.</p>
-<div class="math notranslate nohighlight">
-\[
-x_t=\sqrt{\bar\alpha_t}x_0+\sqrt{1-\bar\alpha_t}\epsilon\tag{6}
-\]</div>
-<p><span class="math notranslate nohighlight">\(\epsilon\)</span> 은 랜덤한 노이즈를 의미하고, <span class="math notranslate nohighlight">\(\bar\alpha_t\)</span>는 단조감소하는 노이즈 스케줄을 의미한다. (<span class="math notranslate nohighlight">\(t=0\)</span> 일때는 sample data가 되어야 하므로 <span class="math notranslate nohighlight">\(\bar\alpha_0=1\)</span>)</p>
-<p>모델 <span class="math notranslate nohighlight">\(\epsilon_\theta\)</span>를 학습할때는 아래의 손실함수를 사용한다.</p>
-<div class="math notranslate nohighlight">
-\[
-L_{\text{simple}}=E_{x_0\sim q(x_0),\epsilon\sim\mathcal N(0,\mathbf I),t\sim U[1,T]}\|\epsilon -\epsilon_\theta (x_t,t)\|^2_2\tag{7}
-\]</div>
-<p>아래와 같이 표현할 수도 있는데 Shap-E 논문에서는 아래의 식을 활용하였다. 위는 (모델이 예측하는 노이즈, diffusion process에서 더해진 노이즈)의 차이를 줄이는 방향으로 학습한다는 의미이고, 아래는 (data sample <span class="math notranslate nohighlight">\(x_0\)</span>, 모델이 예측한 노이즈를 제거하여 만든 이미지)의 차이를 줄이는 방향으로 학습한다는 의미이다.</p>
-<div class="math notranslate nohighlight">
-\[
-L_{x_0}=E_{x_0\sim q(x_0),\epsilon\sim\mathcal N(0,\mathbf I),t\sim U[1,T]}\|x_\theta (x_t,t)-x_0\|^2_2\tag{8}
-\]</div>
-<p>denosing시에는 높은 퀄리티와 적당한 latency를 위해 Heun sampler와 classifier-free guidance를 사용했다.</p>
-<div class="math notranslate nohighlight">
-\[
-\hat x_\theta(x_t,t|y)=x_\theta(x_T,t)+s\space\cdot\space (x_\theta(x_t,t|y)-x_\theta(x_t,t)) \tag{9}
-\]</div>
-<p><span class="math notranslate nohighlight">\(s\)</span> 는 guidance scale이고 <span class="math notranslate nohighlight">\(s=0, s=1\)</span> 일때는 regular unconditional, conditional sampling을 뜻한다. <span class="math notranslate nohighlight">\(s\)</span> 를 더 키우면 일관성(coherence)은 커지지만 다양성(diversity)이 떨어질 수 있다. 실험적으로 나은 결과물을 얻기 위해서는 guidance가 필요하다는 것을 알아냈다. (section 5의 figure 4 참고)</p>
-</section>
-<section id="latent-diffusion">
-<h2>2.4 Latent Diffusion<a class="headerlink" href="#latent-diffusion" title="Permalink to this heading">#</a></h2>
-<p>continuous latent space에서도 diffusion을 활용하여 샘플들을 생성할 수 있다. 이는 Stable Diffusion(LDM)에서 제안된 것으로, pixel space와 latent space간의 변환을 담당하는 encoder와 decoder를 추가하여 two-stage방식으로 모델을 학습시키면 된다. 앞서 봤던 노이즈를 예측을 담당하는 모델 <span class="math notranslate nohighlight">\(\epsilon_\theta\)</span>는 latent space에서 추가된 노이즈(latent noise)를 예측하게 되는 것이다. original LDM에서는 latent noise를 원본 이미지 보다 낮은 복잡도(lower-dimensional distribution)를 가지도록 KL penalty나 vector quantization layer를 사용했다.</p>
-<p>본 논문에서도 위와 유사한 방식을 사용했으나 GAN-based objective와 perceptual loss를 사용하지 않고 단순히 <span class="math notranslate nohighlight">\(L_1\)</span>, <span class="math notranslate nohighlight">\(L_2\)</span> reconstruction loss를 사용했다. 또한 KL regularization과 vector quantization은 bottleneck이 되므로 고정된 numerical range를 가지도록 하고 diffusion style의 noise를 추가 했다.</p>
-</section>
-</section>
-<section class="tex2jax_ignore mathjax_ignore" id="related-work">
-<h1>3. Related Work<a class="headerlink" href="#related-work" title="Permalink to this heading">#</a></h1>
-<ul class="simple">
-<li><p>Point-E</p></li>
-<li><p>3D auto-encoder + implicit decoder</p>
-<ul>
-<li><p>Fu et al. [16] - SDF sample grid를 encode, implicit decoder의 condition으로 사용.</p></li>
-<li><p>Sanghi et al. [54] - voxel grid를 encode, implicit occupancy network의 condition으로 사용.</p></li>
-<li><p>Liu et al. [34] - voxel-based encoder와 implicit occupancy, color decoder를 학습.</p></li>
-<li><p>Kosiorek et al. [30] - rendered view을 encode, encoding된 latent vector를 NeRF의 condition으로 사용.</p></li>
-<li><p>Chen and Wang [6] - transformer기반 모델을 사용하여 rendered view에서 MLP parameter를 곧바로 생성.</p></li>
-</ul>
-</li>
-<li><p>학습된 encoder 없이 implicit 3D representation을 생성하는것을 목표로 하는 모델들</p>
-<ul>
-<li><p>Park et al. [43] - auto decoder를 학습. 데이터셋 내의 각 샘플의 embedding vector table을 학습.</p></li>
-<li><p>Bautista et al. [4] - NeRF decoder를 조건으로 scene 별 latent code를 학습.</p></li>
-<li><p>Dupont et al. [12] - implicit function을 학습하기 위해 meta learning 활용.</p></li>
-<li><p>Erkoç et al. [14] - implicit MLP weight를 곧바로 생성하기 위해 diffusion을 활용.</p></li>
-<li><p>akin to [12] - NeRF parameter fitting을 필요로 함.</p></li>
-<li><p>Wang et al. [66] - 데이터셋 내의 각 샘플의 개별 NeRF를 joint 학습.</p></li>
-</ul>
-</li>
-</ul>
-</section>
-<section class="tex2jax_ignore mathjax_ignore" id="method">
-<h1>4. Method<a class="headerlink" href="#method" title="Permalink to this heading">#</a></h1>
-<blockquote>
-<div><p>📌 훈련 방법 <br />
-two stage 방식으로 Shap-E를 학습시킨다.<br />
-<strong>Stage 1. train an encoder</strong> <br />
-<strong>Stage 2. train a conditional diffusion model on outputs of the encoder</strong></p>
-</div></blockquote>
-<section id="d-encoder">
-<h2>4.1 3D Encoder<a class="headerlink" href="#d-encoder" title="Permalink to this heading">#</a></h2>
-<figure class="align-default" id="id7">
-<a class="bg-light mb-1 reference internal image-reference" href="../../_images/IMG_4861.png"><img alt="figure5" class="bg-light mb-1" src="../../_images/IMG_4861.png" style="width: 800px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 681 </span><span class="caption-text">3D Encoder의 구조</span><a class="headerlink" href="#id7" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p><strong>encoder의 input :</strong>  (point clouds, rendered views) <br />
-<strong>encoder의 output :</strong> MLP의 parameter</p>
-<blockquote>
-<div><p>입력 representation의 세부 특성 <br />
-Point-E와 비교하였을때, post-processing 방식을 변경하여 3D Asset별 사용하는 RGB point cloud의 point 개수를 늘이고, 더 많은 view를 256x256 크기로 렌더링 하여 사용했다. 구체적으로는 다음과 같다.</p>
-<ul class="simple">
-<li><p>Point Clouds: 기존 4K -&gt; 16K</p></li>
-<li><p>Multiview point clouds: 기존 20 views -&gt; 60 views (20개의 view를 사용한 경우 생성된 pointcloud에 crack이 발생했다고 함)\ view 렌더링시 조명과 물체표면의 특성을 간략화했다.</p></li>
-</ul>
-</div></blockquote>
-<p>encoder에서 얻은 parameter는 implicit function에서 asset의 representation을 의미한다. (+의미상 다양한 형태로 입력받은 3D asset의 특성을 융합하여 하나로 표현한 것, 논문의 장점으로 NeRF와 point cloud 모두를 얻을수 있다고 했으므로 상당히 의도가 느껴지는 입력으로 보인다. )</p>
-<figure class="align-default" id="id8">
-<a class="bg-light mb-1 reference internal image-reference" href="../../_images/IMG_4869.png"><img alt="figure7" class="bg-light mb-1" src="../../_images/IMG_4869.png" style="width: 800px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 682 </span><span class="caption-text">pseudocode</span><a class="headerlink" href="#id8" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>encoder에 입력된 point clouds와 views는 cross-attention과 transformer backbone에 의해 처리되어 sequence of vectors가 된다. 이후 latent bottleneck과 projection layer를 통과하여 MLP weight matrices를 만든다.</p>
-<p>encoder는 NeRF rendering objective를 사용(Section 4.1.1 참고)하여 사전 학습한다. mesh-based objective를 이용한 사전학습시 보다 더 안정적인 결과물을 얻을 수 있었다고 한다. 이후에는 SDF와 texture color prediction을 위해 추가적인 output head를 넣어 Section 4.1.2와 같이 two-stage 방식으로 head들을 학습시킨다.</p>
-<section id="decoding-with-nerf-rendering">
-<h3>4.1.1 Decoding with NeRF Rendering<a class="headerlink" href="#decoding-with-nerf-rendering" title="Permalink to this heading">#</a></h3>
-<p>original NeRF의 식과 유사하지만 coarse net과 fine net이 parameter들을 공유할 수 있도록 하지는 않았다. 랜덤한 4096개의 ray를 각 학습 데이터에서 샘플링하였으며, <span class="math notranslate nohighlight">\(L_1\)</span> loss가 최소가 되도록 했다. (original NeRF에서는 <span class="math notranslate nohighlight">\(L_2\)</span> loss를 사용)</p>
-<div class="math notranslate nohighlight">
-\[
-L_{\mathbf{RGB}}=E_{\mathbf r\in R}[\|\hat C_c(\mathbf r)-C(\mathbf r)\|_1+\|\hat C_f(\mathbf r)-C(\mathbf r)\|_1] \tag{10}
-\]</div>
-<p>여기에 추가적으로 각 ray의 transmittance에 대한 손실함수를 추가했다. 특히, 한 ray의 density 적분값(integrated density)을 통해 얻은transmittance로 coarse rendering과 fine rendering시 <span class="math notranslate nohighlight">\(\hat T_c(r)\)</span> 와 <span class="math notranslate nohighlight">\(\hat T_f(r)\)</span>를 예측하였다. ground truth로는 gt rendering결과의 alpha channel을 사용하였다. 이 손실함수는 아래와 같이 표현할 수 있다. (+NeRF의 경우 novel view를 만드는 것이 목적이었으나 본 논문은 mesh도 생성해야 하므로 노이즈 제거가 더욱 중요하였을 것으로 생각된다.)</p>
-<div class="math notranslate nohighlight">
-\[
-L_T=E_{\mathbf r\in R}[\|\hat T_c(\mathbf r)-T(\mathbf r)\|_1 +\|\hat T_f(\mathbf r)-T(\mathbf r)\|_1]\tag{11}
-\]</div>
-<p>최종적으로는 두 손실함수를 합하여 최적화를 진행하였다.</p>
-<div class="math notranslate nohighlight">
-\[
-L_\text{NeRF}=L_\text{RGB}+L_T \tag{12}
-\]</div>
-</section>
-<section id="decoding-with-stf-rendering">
-<h3>4.1.2 Decoding with STF Rendering<a class="headerlink" href="#decoding-with-stf-rendering" title="Permalink to this heading">#</a></h3>
-<figure class="align-default" id="id9">
-<a class="bg-light mb-1 reference internal image-reference" href="../../_images/IMG_4874.png"><img alt="figure8" class="bg-light mb-1" src="../../_images/IMG_4874.png" style="width: 800px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 683 </span><span class="caption-text">texture, bump, displacement의 비교 <br />
-source - https:grabcad.com/tutorials/adding-textures-to-3d-models-texture-bump-and-displacement-mapping-how-to-make-photo-realistic-models</span><a class="headerlink" href="#id9" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>NeRF 방식을 통해 사전학습한 후, MLPs에 STF output heads를 추가한다. 이러한 MLPs는 SDF와 texture color를 예측한다. triangle mesh를 생성하기 위해서는 각 vertex의 SDF를 regular <span class="math notranslate nohighlight">\(128^3\)</span> grid로 옮겨 미분가능한 형태의 Marching Cube를 진행해야 한다. 이후 texture color는 최종 mesh의 각 vertex texture color head를 통해 얻는다. Pytorch 3D를 활용하면 미분가능한 rendering을 통해 textured mesh를 얻을 수 있다고 한다. 렌더링 시에는 데이터셋 구축시 preprocessing에 사용한 것과 동일한 lighting 조건을 사용했다.</p>
-<p>사전 실험시 랜덤 초기화된 STF output heads를 사용했을 때는 결과가 불안정 했으며, rendering based objective를 사용하여 학습하는 것이 어려웠다. 해당 문제를 완화하기 위해 SDF와 texture color를 해당 output heads를 직접 학습시키기 전에 distill 접근법을 사용했다. Point-E의 regression model을 활용하여 입력 좌표를 랜덤하게 샘플링하고, SDF distillation target을 구했다. 그리고 RGB target로는 asset RGB point cloud에서 특정위치 <span class="math notranslate nohighlight">\(x\)</span>와 가장 가까운(nearest neighbor) point의 색을 사용했다. distillation training 시 distillation loss와 NeRF loss를 더하여 사용했다.</p>
-<div class="math notranslate nohighlight">
-\[
-L_\text{distill}=L_\text{NeRF}+E_{\mathbf x\sim U[-1,1]^3}[\|\text{SDF}_\theta(\mathbf x)-\text{SDF}_\text{regression}(\mathbf x)\|_1+\|\text{RGB}_\theta(\mathbf x)-\text{RGB}_\text{NN}(\mathbf x)\|_1]
-\tag{13}
-\]</div>
-<p>STF output heads가 distillation을 통해 적절한 초기값을 갖게된 후, NeRF encoder와 STF rendering 전체를 end-to-end로 fine-tune한다. 실험적으로 STF rendering에는 <span class="math notranslate nohighlight">\(L_1\)</span>을 사용하는 것은 불안정했으므로 <span class="math notranslate nohighlight">\(L_2\)</span> 손실함수만 사용하는 것이 이러한 rendering 방식에 적절함을 알 수 있었다. STF rendering에 사용한 loss는 아래와 같다.</p>
-<div class="math notranslate nohighlight">
-\[
-L_\text{STF}=\frac{1}{N\space \cdot\space s^2}\sum^N_{i=1}\|\text{Render}(\text{Mesh}_i)-\text{Image}_i\|^2_2\tag{14}
-\]</div>
-<ul class="simple">
-<li><p>mesh를 렌더링한 이미지와 target 이미지의 L2 reconstruction loss의 평균</p></li>
-</ul>
-<p>N은 이미지 개수, s는 이미지의 화질, <span class="math notranslate nohighlight">\(\text{Mesh}_i\)</span>는 <span class="math notranslate nohighlight">\(\text{sample}_i\)</span>의 constructed mesh를 말한다. <span class="math notranslate nohighlight">\(\text{Image}_i\)</span>는 RGBA rendering된 결과물로 alpha채널을 포함하고 있기 때문에 transmittance에 대한 loss를 따로 추가하지 않았다.</p>
-<p>최종 fine-tuning 단계에서는 아래와 같이 더한 objective function을 사용한다.</p>
-<div class="math notranslate nohighlight">
-\[
-L_\text{FT}=L_\text{NeRF}+L_\text{STF}\tag{15}
-\]</div>
-</section>
-</section>
-<section id="id1">
-<h2>4.2 Latent Diffusion<a class="headerlink" href="#id1" title="Permalink to this heading">#</a></h2>
-<p>Point-E의 transformer 기반 diffusion 구조를 채택했다. 하지만 point cloud를 latent vector의 sequence로 바꾸었다. latent sequences의 크기는 <span class="math notranslate nohighlight">\(1024\times1024\)</span> 로 이를 길이가 1024인 1024개의 token처럼 transformer의 입력으로 사용했다. 각 token은 MLP weight matrices의 각 row와 일치한다. Shap-E의 모델은 Point-E base 모델과 유사한 부분이 많다.(context length와 width가 동일) 하지만 더 고차원의 샘플(samples in higher-dimensional)을 생성하는데 이는 입출력 채널의 복잡도(dimension)가 증가하였기 때문이다.</p>
-<p>Point-E의 conditioning 방식을 동일하게 사용하였다. 이미지 조건부 3d 생성시 256-token CLIP embedding sequence를 transformer context로 사용했으며, 텍스트 조건부 3d 생성시 single token을 사용했다.</p>
-<p>Point-E와의 차이점으로는 diffusion model의 출력을 <span class="math notranslate nohighlight">\(\epsilon\)</span> prediction으로 parameterize하지 않았다는 것이다. 대신 본 논문에서는 곧바로 sample을 예측하는 방식을 사용했다. 대수적으로는 동일한 의미이나 초기 실험에서 더 일관된 결과물을 생성하여 해당 방식을 사용하였다고 함.</p>
-</section>
-<section id="dataset">
-<h2>4.3 Dataset<a class="headerlink" href="#dataset" title="Permalink to this heading">#</a></h2>
-<p>공정한 비교를 위해 대부분의 실험에서 Point-E와 동일한 3D assets을 사용했다. 하지만 post-processing부분에서는 차이가 있다.</p>
-<ul>
-<li><p>point cloud 계산시, 20개가 아닌 60개의 view를 rendering했다. 20개만 사용했을때 주어진 view에서 확인할 수 없는 영역때문에 crack 발생 (+NeRF 때문으로 추정)</p></li>
-<li><p>point cloud를 4K 가아닌 16K의 point로 만들었다.</p></li>
-<li><p>encoder학습을 위한 view를 렌더링 할때 단순한 소재와 라이팅을 사용하였다. 특히 모든 모델은 동일한 고정된 라이팅 조건내에서 렌더링 되었다. ambient와 diffuse shading만 사용 (+반사광이 고려되지 않아 표면이 매끈한 물체는 생성하기 어려울 것으로 추정됨)</p>
-<figure class="align-default" id="id10">
-<a class="bg-light mb-1 reference internal image-reference" href="../../_images/untitled.png"><img alt="figure7" class="bg-light mb-1" src="../../_images/untitled.png" style="width: 800px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 684 </span><span class="caption-text">Phong model <br />
-기본적인 shading방식으로 본 논문에서는 specular를 사용하지 않았다 <br />
-source - <a class="reference external" href="https:www.researchgate.net/publication/265514880_Realistic_Visualisation_of_Endoscopic_Surgery_in_a_Virtual_Training_Environment">Realistic_Visualisation_of_Endoscopic_Surgery_in_a_Virtual_Training_Environment</a></span><a class="headerlink" href="#id10" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</li>
-</ul>
-<p>text 조건부 모델과 해당 Point-E baseline을 위해 데이터 셋을 더욱 확장했다. 이 데이터 셋을 위해 대략 100만개의 3D assets과 12만개의 (human labeled)caption을 추가로 수집했다.</p>
-</section>
-</section>
-<section class="tex2jax_ignore mathjax_ignore" id="result">
-<h1>5. Result<a class="headerlink" href="#result" title="Permalink to this heading">#</a></h1>
-<section id="encoder-evaluation">
-<h2>5.1 Encoder Evaluation<a class="headerlink" href="#encoder-evaluation" title="Permalink to this heading">#</a></h2>
-<figure class="align-default" id="id11">
-<a class="bg-light mb-1 reference internal image-reference" href="../../_images/table12.png"><img alt="figure8" class="bg-light mb-1" src="../../_images/table12.png" style="width: 800px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 685 </span><span class="caption-text">각 스테이지 별 훈련 이후 encoder 성능평가</span><a class="headerlink" href="#id11" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>distillation에서 rendering 이미지의 퀄리티가 떨어지는 것처럼 보이나 finetuning시 퀄리티가 더욱 좋아진다. 또한 STF의 퀄리티 또한 크게 상승한다.</p>
-</section>
-<section id="comparison-to-point-e">
-<h2>5.2 Comparison to Point-E<a class="headerlink" href="#comparison-to-point-e" title="Permalink to this heading">#</a></h2>
-<figure class="align-default" id="id12">
-<a class="bg-light mb-1 reference internal image-reference" href="../../_images/figure4.png"><img alt="figure9" class="bg-light mb-1" src="../../_images/figure4.png" style="width: 800px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 686 </span><span class="caption-text">Shap-E와 Point-E비교<br />
-세모 마크가 Point-E, 원형 마크가 Shap-E이다.</span><a class="headerlink" href="#id12" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>point-E보다 Shap-E의 CLIP score가 더 높다. 더 많은수의 parameter를 가진 point-E를 사용하여도 Shap-E의 성능이 우수함.</p>
-<p>두 평가 지표 모두 OpenAI의 CLIP (Contrastive Language-Image Pretraining) 모델을 활용한 평가 지표로 CLIP score의 경우 주어진 텍스트와 생성결과의 일관성을 평가하기 위한 것이고, CLIP R precision의 경우 생성결과와 참조 이미지가 얼마나 비슷한지 평가하기 위한 것이다.</p>
-<figure class="align-default" id="id13">
-<a class="bg-light mb-1 reference internal image-reference" href="../../_images/figure5.png"><img alt="figure10" class="bg-light mb-1" src="../../_images/figure5.png" style="width: 800px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 687 </span><span class="caption-text">Shap-E와 Point-E비교</span><a class="headerlink" href="#id13" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>동일한 base model의 크기 동일한 데이터셋으로 학습시킨 결과. 텍스트 조건부 생성시에는 퀄리티 차이가 크지 않음.</p>
-<figure class="align-default" id="id14">
-<a class="bg-light mb-1 reference internal image-reference" href="../../_images/figure6.png"><img alt="figure11" class="bg-light mb-1" src="../../_images/figure6.png" style="width: 800px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 688 </span><span class="caption-text">Shap-E와 Point-E비교</span><a class="headerlink" href="#id14" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>이미지 조건부 생성시에는 비교적 차이가 크다.
-벤치 결과를 보면 point-E에서 나무사이 빈공간을 무시해버린것을 볼수 있다.
-위의 강아지와 컵 이미지 기반 생성 결과를 보면 point-E와 shap-E가 유사한 케이스에서 실패하는 모습을 보였다.</p>
-</section>
-<section id="comparison-to-other-methods">
-<h2>5.3 Comparison to Other Methods<a class="headerlink" href="#comparison-to-other-methods" title="Permalink to this heading">#</a></h2>
-<figure class="align-default" id="id15">
-<a class="bg-light mb-1 reference internal image-reference" href="../../_images/table21.png"><img alt="figure12" class="bg-light mb-1" src="../../_images/table21.png" style="width: 800px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 689 </span><span class="caption-text">COCO 데이터셋을 이용한 비교결과</span><a class="headerlink" href="#id15" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>reference latency에서 point-E와 Shap-E의 차이가 있는데, 이는 Shap-E는 추가적인 upsampling diffusion model을 사용하지 않기 때문이다.</p>
-</section>
-</section>
-<section class="tex2jax_ignore mathjax_ignore" id="limitations-and-future-work">
-<h1>6. Limitations and Future Work<a class="headerlink" href="#limitations-and-future-work" title="Permalink to this heading">#</a></h1>
-<figure class="align-default" id="id16">
-<a class="bg-light mb-1 reference internal image-reference" href="../../_images/figure7.png"><img alt="figure13" class="bg-light mb-1" src="../../_images/figure7.png" style="width: 800px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 690 </span><span class="caption-text">텍스트 조건부 생성 결과</span><a class="headerlink" href="#id16" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>왼쪽 그림과 같이 여러가지 특성을 가진물체를 생성하는데에 어려움을 겪는 모습을 보인다. 이는 학습에 사용한 paired data가 제한적이기 때문으로 더 많은 3D dataset을 수집하면 나아질 수 있다. 또한 texture의 세부 특성을 encoder가 무시하는 경우도 있는데, 더 나은 encoder를 사용함으로써 개선될수 있다.</p>
-<p>Shap-E는 다양한 3D 생성 기술들을 융합하는데에 도움을 줄 수 있다. 예를 들어 Shap-E로 생성한 NeRF와 mesh를 다른 최적화 기반  모델을 초기화 하는데 사용하는 것이다. 이를 통해 더 빠른 수렴도 가능할 것으로 생각된다.</p>
-</section>
-<section class="tex2jax_ignore mathjax_ignore" id="conclusion">
-<h1>7. Conclusion<a class="headerlink" href="#conclusion" title="Permalink to this heading">#</a></h1>
-<p>Shap-E는 latent diffusion model을 3D implicit function공간에서 전개하여 NeRF와 textured mesh 모두를 생성 할 수 있었다. 동일한 데이터셋을 활용하여 다른 생성모델들과 비교하였을때 더 나은 성능을 보임을 확인했다. 또한 text 조건부 생성시 이미지 없이도 다양한 흥미로운 물체를 생성할 수 있음확인했다. 이는 implicit represention을 생성함에 큰 가능성을 보여준다.</p>
-</section>
-<section class="tex2jax_ignore mathjax_ignore" id="acknowledgements">
-<h1>8. Acknowledgements<a class="headerlink" href="#acknowledgements" title="Permalink to this heading">#</a></h1>
-<p>특정 인물들에 대한 언급 외에도 ChatGPT로 부터 valuable writing feedback을 받았다고 표현한 부분있었다.</p>
-</section>
-
-    <script type="text/x-thebe-config">
-    {
-        requestKernel: true,
-        binderOptions: {
-            repo: "binder-examples/jupyter-stacks-datascience",
-            ref: "master",
-        },
-        codeMirrorConfig: {
-            theme: "abcdef",
-            mode: "python"
-        },
-        kernelOptions: {
-            name: "python3",
-            path: "./docs/review"
-        },
-        predefinedOutput: true
-    }
-    </script>
-    <script>kernelName = 'python3'</script>
-
-                </article>
-              
-
-              
-              
-              
-              
-                <footer class="prev-next-footer d-print-none">
-                  
-<div class="prev-next-area">
-    <a class="left-prev"
-       href="Point_E.html"
-       title="previous page">
-      <i class="fa-solid fa-angle-left"></i>
-      <div class="prev-next-info">
-        <p class="prev-next-subtitle">previous</p>
-        <p class="prev-next-title">Point-E: A System for Generating 3D Point Clouds from Complex Prompts (Arxiv 2022)</p>
-      </div>
-    </a>
-    <a class="right-next"
-       href="DreamFusion.html"
-       title="next page">
-      <div class="prev-next-info">
-        <p class="prev-next-subtitle">next</p>
-        <p class="prev-next-title"><strong>DreamFusion</strong></p>
-      </div>
-      <i class="fa-solid fa-angle-right"></i>
-    </a>
-</div>
-                </footer>
-              
-            </div>
-            
-            
-              
-                <div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
-
-
-  <div class="sidebar-secondary-item">
-  <div class="page-toc tocsection onthispage">
-    <i class="fa-solid fa-list"></i> Contents
-  </div>
-  <nav class="bd-toc-nav page-toc">
-    <ul class="visible nav section-nav flex-column">
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#">Shap-E</a></li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#abstract">0. Abstract</a></li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">1. Introduction</a></li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#background">2. Background</a><ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#neural-radiance-fields-nerf">2.1 Neural Radiance Fields (NeRF)</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#signed-distance-functions-and-texture-field-stf">2.2 Signed Distance Functions and Texture Field (STF)</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#diffusion-models">2.3 Diffusion Models</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#latent-diffusion">2.4 Latent Diffusion</a></li>
-</ul>
-</li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#related-work">3. Related Work</a></li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#method">4. Method</a><ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#d-encoder">4.1 3D Encoder</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#decoding-with-nerf-rendering">4.1.1 Decoding with NeRF Rendering</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#decoding-with-stf-rendering">4.1.2 Decoding with STF Rendering</a></li>
-</ul>
-</li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#id1">4.2 Latent Diffusion</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#dataset">4.3 Dataset</a></li>
-</ul>
-</li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#result">5. Result</a><ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#encoder-evaluation">5.1 Encoder Evaluation</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#comparison-to-point-e">5.2 Comparison to Point-E</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#comparison-to-other-methods">5.3 Comparison to Other Methods</a></li>
-</ul>
-</li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#limitations-and-future-work">6. Limitations and Future Work</a></li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#conclusion">7. Conclusion</a></li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#acknowledgements">8. Acknowledgements</a></li>
-</ul>
-
-  </nav></div>
-
-</div></div>
-              
-            
-          </div>
-          <footer class="bd-footer-content">
-            
-<div class="bd-footer-content__inner container">
-  
-  <div class="footer-item">
-    
-<p class="component-author">
-By PseudoLab
-</p>
-
-  </div>
-  
-  <div class="footer-item">
-    
-
-  <p class="copyright">
-    
-      © Copyright 2022.
-      <br/>
-    
-  </p>
-
-  </div>
-  
-  <div class="footer-item">
-    
-  </div>
-  
-  <div class="footer-item">
-    
-  </div>
-  
-</div>
-          </footer>
-        
-
-      </main>
-    </div>
-  </div>
-  
-  <!-- Scripts loaded after <body> so the DOM is not blocked -->
-  <script src="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b"></script>
-<script src="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b"></script>
-
-  <footer class="bd-footer">
-  </footer>
-  </body>
+
+
+<!DOCTYPE html>
+
+
+<html lang="en" data-content_root="" >
+
+  <head>
+    <meta charset="utf-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.17.1: http://docutils.sourceforge.net/" />
+
+    <title>Shap-E &#8212; Text-to-Image Generation-feat-Diffusion</title>
+  
+  
+  
+  <script data-cfasync="false">
+    document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
+    document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
+  </script>
+  
+  <!-- Loaded before other Sphinx assets -->
+  <link href="../../_static/styles/theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+<link href="../../_static/styles/bootstrap.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+<link href="../../_static/styles/pydata-sphinx-theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+
+  
+  <link href="../../_static/vendor/fontawesome/6.5.2/css/all.min.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+  <link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.woff2" />
+<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.woff2" />
+<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.woff2" />
+
+    <link rel="stylesheet" type="text/css" href="../../_static/pygments.css" />
+    <link rel="stylesheet" href="../../_static/styles/sphinx-book-theme.css?digest=14f4ca6b54d191a8c7657f6c759bf11a5fb86285" type="text/css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/togglebutton.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/mystnb.4510f1fc1dee50b3e5859aac5469c37c29e427902b24a333a5f9fcb2f0b3ac41.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/sphinx-thebe.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/design-style.4045f2051d55cab465a707391d5b2007.min.css" />
+  
+  <!-- Pre-loaded scripts that we'll load fully later -->
+  <link rel="preload" as="script" href="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b" />
+<link rel="preload" as="script" href="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b" />
+  <script src="../../_static/vendor/fontawesome/6.5.2/js/all.min.js?digest=dfe6caa3a7d634c4db9b"></script>
+
+    <script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
+    <script src="../../_static/jquery.js"></script>
+    <script src="../../_static/underscore.js"></script>
+    <script src="../../_static/_sphinx_javascript_frameworks_compat.js"></script>
+    <script src="../../_static/doctools.js"></script>
+    <script src="../../_static/clipboard.min.js"></script>
+    <script src="../../_static/copybutton.js"></script>
+    <script src="../../_static/scripts/sphinx-book-theme.js?digest=5a5c038af52cf7bc1a1ec88eea08e6366ee68824"></script>
+    <script>let toggleHintShow = 'Click to show';</script>
+    <script>let toggleHintHide = 'Click to hide';</script>
+    <script>let toggleOpenOnPrint = 'true';</script>
+    <script src="../../_static/togglebutton.js"></script>
+    <script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
+    <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
+    <script src="../../_static/design-tabs.js"></script>
+    <script>const THEBE_JS_URL = "https://unpkg.com/thebe@0.8.2/lib/index.js"
+const thebe_selector = ".thebe,.cell"
+const thebe_selector_input = "pre"
+const thebe_selector_output = ".output, .cell_output"
+</script>
+    <script async="async" src="../../_static/sphinx-thebe.js"></script>
+    <script>window.MathJax = {"options": {"processHtmlClass": "tex2jax_process|mathjax_process|math|output_area"}}</script>
+    <script defer="defer" src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
+    <script>DOCUMENTATION_OPTIONS.pagename = 'docs/review/Shap-E';</script>
+    <link rel="shortcut icon" href="../../_static/PseudoLab_logo.png"/>
+    <link rel="index" title="Index" href="../../genindex.html" />
+    <link rel="search" title="Search" href="../../search.html" />
+    <link rel="next" title="DreamFusion" href="DreamFusion.html" />
+    <link rel="prev" title="Point-E: A System for Generating 3D Point Clouds from Complex Prompts (Arxiv 2022)" href="Point_E.html" />
+  <meta name="viewport" content="width=device-width, initial-scale=1"/>
+  <meta name="docsearch:language" content="en"/>
+  </head>
+  
+  
+  <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
+
+  
+  
+  <div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
+  
+  <div id="pst-scroll-pixel-helper"></div>
+  
+  <button type="button" class="btn rounded-pill" id="pst-back-to-top">
+    <i class="fa-solid fa-arrow-up"></i>Back to top</button>
+
+  
+  <input type="checkbox"
+          class="sidebar-toggle"
+          id="pst-primary-sidebar-checkbox"/>
+  <label class="overlay overlay-primary" for="pst-primary-sidebar-checkbox"></label>
+  
+  <input type="checkbox"
+          class="sidebar-toggle"
+          id="pst-secondary-sidebar-checkbox"/>
+  <label class="overlay overlay-secondary" for="pst-secondary-sidebar-checkbox"></label>
+  
+  <div class="search-button__wrapper">
+    <div class="search-button__overlay"></div>
+    <div class="search-button__search-container">
+<form class="bd-search d-flex align-items-center"
+      action="../../search.html"
+      method="get">
+  <i class="fa-solid fa-magnifying-glass"></i>
+  <input type="search"
+         class="form-control"
+         name="q"
+         id="search-input"
+         placeholder="Search this book..."
+         aria-label="Search this book..."
+         autocomplete="off"
+         autocorrect="off"
+         autocapitalize="off"
+         spellcheck="false"/>
+  <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
+</form></div>
+  </div>
+
+  <div class="pst-async-banner-revealer d-none">
+  <aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
+</div>
+
+  
+    <header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
+    </header>
+  
+
+  <div class="bd-container">
+    <div class="bd-container__inner bd-page-width">
+      
+      
+      
+      <div class="bd-sidebar-primary bd-sidebar">
+        
+
+  
+  <div class="sidebar-header-items sidebar-primary__section">
+    
+    
+    
+    
+  </div>
+  
+    <div class="sidebar-primary-items__start sidebar-primary__section">
+        <div class="sidebar-primary-item">
+
+  
+    
+  
+
+<a class="navbar-brand logo" href="../../intro.html">
+  
+  
+  
+  
+  
+    
+    
+      
+    
+    
+    <img src="../../_static/PseudoLab_logo.png" class="logo__image only-light" alt="Text-to-Image Generation-feat-Diffusion - Home"/>
+    <script>document.write(`<img src="../../_static/PseudoLab_logo.png" class="logo__image only-dark" alt="Text-to-Image Generation-feat-Diffusion - Home"/>`);</script>
+  
+  
+</a></div>
+        <div class="sidebar-primary-item"><nav class="bd-links" id="bd-docs-nav" aria-label="Main">
+    <div class="bd-toc-item navbar-nav active">
+        
+        <ul class="nav bd-sidenav bd-sidenav__home-link">
+            <li class="toctree-l1">
+                <a class="reference internal" href="../../intro.html">
+                    Welcome to PseudoDiffusers!!
+                </a>
+            </li>
+        </ul>
+        <p aria-level="2" class="caption" role="heading"><span class="caption-text">Preliminary Works</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="vae.html">VAE</a></li>
+<li class="toctree-l1"><a class="reference internal" href="gan.html">GAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DDPM.html">DDPM</a></li>
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DDIM.html">DDIM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="A_Study_on_the_Evaluation_of_Generative_Models.html">A Study on the Evaluation of Generative Models</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Image Generation</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="cycleGAN.html">CycleGAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="StyleGAN.html">StyleGAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="diffusion_beats_GANs.html">Diffusion Models Beat GANs on Image Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="dalle.html">DALL-E</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DALLE2.html">DALL-E 2</a></li>
+<li class="toctree-l1"><a class="reference internal" href="dreambooth.html">DreamBooth</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ControlNet.html">ControlNet</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Latent_Diffusion_Model.html">Introduction</a></li>
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Textual_Inversion.html">Textual Inversion</a></li>
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="CustomDiffusion.html">Custom Diffusion</a></li>
+<li class="toctree-l1"><a class="reference internal" href="LoRA.html">LoRA</a></li>
+
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="I-DDPM.html">I-DDPM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="StyO.html">StyO</a></li>
+<li class="toctree-l1"><a class="reference internal" href="imagen.html">Imagen</a></li>
+<li class="toctree-l1"><a class="reference internal" href="imagen_editor.html">Imagen Editor</a></li>
+<li class="toctree-l1"><a class="reference internal" href="SDEdit.html">SDEdit</a></li>
+<li class="toctree-l1"><a class="reference internal" href="SDXL.html">SDXL</a></li>
+<li class="toctree-l1"><a class="reference internal" href="t2i_adapter.html">T2I-Adapter</a></li>
+<li class="toctree-l1"><a class="reference internal" href="IP_Adapter.html">IP-Adapter</a></li>
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="HyperDreamBooth.html">HyperDreamBooth</a></li>
+<li class="toctree-l1"><a class="reference internal" href="CM3leon.html">CM3leon</a></li>
+
+<li class="toctree-l1"><a class="reference internal" href="Synthetic_Data_from_Diffusion_Models_Improves_ImageNet_Classification.html">Synthetic Data from Diffusion Models Improves ImageNet Classification</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="GLIDE.html">GLIDE</a></li>
+<li class="toctree-l1"><a class="reference internal" href="BBDM.html">BBDM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Your_Diffusion_Model_is_Secretly_a_Zero_Shot_Classifier.html">Your Diffusion Model is Secretly a Zero-Shot Classifier</a></li>
+<li class="toctree-l1"><a class="reference internal" href="progressive_distillation.html">Progressive Distillation for Fast Sampling of Diffusion Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ConceptLab.html">ConceptLab</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Diffusion_models_already_have_a_Semantic_Latent_Space.html">Diffusion Models already have a Semantic Latent Space</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Muse.html">Muse</a></li>
+
+
+<li class="toctree-l1"><a class="reference internal" href="GIGAGAN.html">Scaling up GANs for Text-to-Image Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="consistency_models.html">Consistency Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="latent_consistency_models.html">Latent Consistency Models</a></li>
+
+<li class="toctree-l1"><a class="reference internal" href="LLM_grounded_Diffusion.html">LLM Grounded Diffusion</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DiT.html">DiT</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="one-step-image-translation.html">One-Step Image Translation with Text-to-Image Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="LCM-LoRA.html">LCM-LoRA: A Universal Stable-Diffusion Acceleration Module</a></li>
+
+
+<li class="toctree-l1"><a class="reference internal" href="MimicBrush.html">MimicBrush: Zero-shot Image Editing with Reference Imitation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="one_step_diffusion_with_distribution_matching_distillation.html">One-step Diffusion with Distribution Matching Distillation</a></li>
+
+
+
+
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Video Generation</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="Make_A_Video.html">Make A Video</a></li>
+<li class="toctree-l1"><a class="reference internal" href="VideoLDM.html">VideoLDM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="AnimateDiff.html">AnimateDiff</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Animate_Anyone.html">Animate Anyone</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreaMoving.html">DreaMoving</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreamPose.html">DreamPose: Fashion Image-to-Video Synthesis via Stable Diffusion</a></li>
+
+
+
+
+
+
+
+
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">3D Generation</span></p>
+<ul class="current nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="NeRF.html">NeRF : Representing Scenes as Neural Radiance Fields for View Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="3DGS.html">3D Gaussian Splatting for Real-Time Radiance Field Rendering</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Point_E.html">Point-E: A System for Generating 3D Point Clouds from Complex Prompts (Arxiv 2022)</a></li>
+
+
+
+
+
+
+
+
+<li class="toctree-l1 current active"><a class="current reference internal" href="#">Shap-E</a></li>
+
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DreamFusion.html"><strong>DreamFusion</strong></a></li>
+<li class="toctree-l1"><a class="reference internal" href="magic-3d.html">Magic3D</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreamBooth3D.html">Dream Booth 3D</a></li>
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="zero123plus.html">Zero123++</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ProlificDreamer.html">ProlificDreamer: High-Fidelity and Diverse Text-to-3D Generation with Variational Score Distillation</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DreamGaussian.html">DreamGaussian</a></li>
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Coin3D.html">Coin3D</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Experiments</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="../experiments/js_exp.html">Synthetic Data with Stable Diffusion for Foliar Disease Classification</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../experiments/swjo_exp.html">Training DreamBooth on Naver Webtoon Face Dataset</a></li>
+</ul>
+
+    </div>
+</nav></div>
+    </div>
+  
+  
+  <div class="sidebar-primary-items__end sidebar-primary__section">
+  </div>
+  
+  <div id="rtd-footer-container"></div>
+
+
+      </div>
+      
+      <main id="main-content" class="bd-main" role="main">
+        
+        
+
+<div class="sbt-scroll-pixel-helper"></div>
+
+          <div class="bd-content">
+            <div class="bd-article-container">
+              
+              <div class="bd-header-article d-print-none">
+<div class="header-article-items header-article__inner">
+  
+    <div class="header-article-items__start">
+      
+        <div class="header-article-item"><label class="sidebar-toggle primary-toggle btn btn-sm" for="__primary" title="Toggle primary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
+  <span class="fa-solid fa-bars"></span>
+</label></div>
+      
+    </div>
+  
+  
+    <div class="header-article-items__end">
+      
+        <div class="header-article-item">
+
+<div class="article-header-buttons">
+
+
+
+
+
+<div class="dropdown dropdown-source-buttons">
+  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Source repositories">
+    <i class="fab fa-github"></i>
+  </button>
+  <ul class="dropdown-menu">
+      
+      
+      
+      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion" target="_blank"
+   class="btn btn-sm btn-source-repository-button dropdown-item"
+   title="Source repository"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fab fa-github"></i>
+  </span>
+<span class="btn__text-container">Repository</span>
+</a>
+</li>
+      
+      
+      
+      
+      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion/issues/new?title=Issue%20on%20page%20%2Fdocs/review/Shap-E.html&body=Your%20issue%20content%20here." target="_blank"
+   class="btn btn-sm btn-source-issues-button dropdown-item"
+   title="Open an issue"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-lightbulb"></i>
+  </span>
+<span class="btn__text-container">Open issue</span>
+</a>
+</li>
+      
+  </ul>
+</div>
+
+
+
+
+
+
+<div class="dropdown dropdown-download-buttons">
+  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Download this page">
+    <i class="fas fa-download"></i>
+  </button>
+  <ul class="dropdown-menu">
+      
+      
+      
+      <li><a href="../../_sources/docs/review/Shap-E.md" target="_blank"
+   class="btn btn-sm btn-download-source-button dropdown-item"
+   title="Download source file"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-file"></i>
+  </span>
+<span class="btn__text-container">.md</span>
+</a>
+</li>
+      
+      
+      
+      
+      <li>
+<button onclick="window.print()"
+  class="btn btn-sm btn-download-pdf-button dropdown-item"
+  title="Print to PDF"
+  data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-file-pdf"></i>
+  </span>
+<span class="btn__text-container">.pdf</span>
+</button>
+</li>
+      
+  </ul>
+</div>
+
+
+
+
+<button onclick="toggleFullScreen()"
+  class="btn btn-sm btn-fullscreen-button"
+  title="Fullscreen mode"
+  data-bs-placement="bottom" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-expand"></i>
+  </span>
+
+</button>
+
+
+
+<script>
+document.write(`
+  <button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light"></i>
+    <i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark"></i>
+    <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"></i>
+  </button>
+`);
+</script>
+
+
+<script>
+document.write(`
+  <button class="btn btn-sm pst-navbar-icon search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <i class="fa-solid fa-magnifying-glass fa-lg"></i>
+  </button>
+`);
+</script>
+<label class="sidebar-toggle secondary-toggle btn btn-sm" for="__secondary"title="Toggle secondary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <span class="fa-solid fa-list"></span>
+</label>
+</div></div>
+      
+    </div>
+  
+</div>
+</div>
+              
+              
+
+<div id="jb-print-docs-body" class="onlyprint">
+    <h1>Shap-E</h1>
+    <!-- Table of contents -->
+    <div id="print-main-content">
+        <div id="jb-print-toc">
+            
+            <div>
+                <h2> Contents </h2>
+            </div>
+            <nav aria-label="Page">
+                <ul class="visible nav section-nav flex-column">
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#">Shap-E</a></li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#abstract">0. Abstract</a></li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">1. Introduction</a></li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#background">2. Background</a><ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#neural-radiance-fields-nerf">2.1 Neural Radiance Fields (NeRF)</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#signed-distance-functions-and-texture-field-stf">2.2 Signed Distance Functions and Texture Field (STF)</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#diffusion-models">2.3 Diffusion Models</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#latent-diffusion">2.4 Latent Diffusion</a></li>
+</ul>
+</li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#related-work">3. Related Work</a></li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#method">4. Method</a><ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#d-encoder">4.1 3D Encoder</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#decoding-with-nerf-rendering">4.1.1 Decoding with NeRF Rendering</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#decoding-with-stf-rendering">4.1.2 Decoding with STF Rendering</a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#id1">4.2 Latent Diffusion</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#dataset">4.3 Dataset</a></li>
+</ul>
+</li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#result">5. Result</a><ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#encoder-evaluation">5.1 Encoder Evaluation</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#comparison-to-point-e">5.2 Comparison to Point-E</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#comparison-to-other-methods">5.3 Comparison to Other Methods</a></li>
+</ul>
+</li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#limitations-and-future-work">6. Limitations and Future Work</a></li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#conclusion">7. Conclusion</a></li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#acknowledgements">8. Acknowledgements</a></li>
+</ul>
+
+            </nav>
+        </div>
+    </div>
+</div>
+
+              
+                
+<div id="searchbox"></div>
+                <article class="bd-article">
+                  
+  <div class="admonition-information admonition">
+<p class="admonition-title">Information</p>
+<ul class="simple">
+<li><p><strong>Title:</strong> Shap-E: Generating Conditional 3D Implicit Function</p></li>
+<li><p><strong>Reference</strong></p>
+<ul>
+<li><p>Paper: <a class="reference external" href="https:arxiv.org/abs/2305.02463">https:arxiv.org/abs/2305.02463</a></p></li>
+<li><p>Code: <a class="reference external" href="https:github.com/openai/shap-e">https:github.com/openai/shap-e</a></p></li>
+</ul>
+</li>
+<li><p><strong>Author:</strong> Kyeongmin Yu</p></li>
+<li><p><strong>Last updated on July. 18. 2024</strong></p></li>
+</ul>
+</div>
+<section class="tex2jax_ignore mathjax_ignore" id="shap-e">
+<h1>Shap-E<a class="headerlink" href="#shap-e" title="Permalink to this heading">#</a></h1>
+<figure class="align-default" id="id2">
+<a class="bg-light mb-1 reference internal image-reference" href="../../_images/figure1.png"><img alt="figure1" class="bg-light mb-1" src="../../_images/figure1.png" style="width: 600px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 688 </span><span class="caption-text">Shap-E를 통해 생성한 3D assets</span><a class="headerlink" href="#id2" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</section>
+<section class="tex2jax_ignore mathjax_ignore" id="abstract">
+<h1>0. Abstract<a class="headerlink" href="#abstract" title="Permalink to this heading">#</a></h1>
+<blockquote>
+<div><p>📌 <strong>논문요약</strong><br />
+2023년 openai의 <a class="reference external" href="https:arxiv.org/search/cs?searchtype=author&amp;query=Jun,+H">Heewoo Jun</a>, <a class="reference external" href="https:arxiv.org/search/cs?searchtype=author&amp;query=Nichol,+A">Alex Nichol</a> 가 발표한 논문입니다. official code는 <a class="reference external" href="https:github.com/openai/shap-e/tree/main">github</a>에서, diffusers를 활용한 코드는 <a class="reference external" href="https:huggingface.co/docs/diffusers/en/api/pipelines/shap_e">huggingface</a>에서 확인할 수 있습니다. <br />
+<strong>목적 -</strong> 조건부 3D assets 생성 <br />
+<strong>생성방식 -</strong> encoder를 통해 implicit function의 parameter 형태로 표현한 후, 이를 diffusion model의 조건으로 사용함으로써 conditional 3D assets을 생성할 수 있도록 했다.<br />
+<strong>차별점 -</strong> texture mesh 나 NeRF 모두 생성 가능한 implicit function의 parameters를 직접적으로 생성할 수 있다. (다른 3D 생성 모델의 경우 단일 표현만 가능한 경우가 많다고 합니다.)</p>
+</div></blockquote>
+</section>
+<section class="tex2jax_ignore mathjax_ignore" id="introduction">
+<h1>1. Introduction<a class="headerlink" href="#introduction" title="Permalink to this heading">#</a></h1>
+<p>implicit neural representations (INRs)는 3D assets을 인코딩하는 방식으로 많이 사용된다. 3D asset을 표현하기 위해 INRs는 주로 3D coordinate를 location specific info(density, color)로 맵핑한다. 일반적으로 INRs는 화질에 영향을 받지 않는데 이는 고정된 grid나 sequence가 아닌 arbitrary input points를 처리할 수 있기 때문이다. 덕분에 end-to-end 미분이 가능하다. INRs은 이후 다양한 downstream applications도 가능하게 한다. 본 논문에서는 2가지 타입의 INRs을 다룬다.</p>
+<ul class="simple">
+<li><p><strong>Neural Radiamce Field (NeRF)</strong> - 3D scene을 function mapping으로 표현.</p>
+<ul>
+<li><p>coordinate, viewing direction <span class="math notranslate nohighlight">\(\rightarrow\)</span> density, colors along camera rays</p></li>
+</ul>
+</li>
+<li><p><strong>textured 3D mesh</strong> (DMTet, GET3D)</p>
+<ul>
+<li><p>coordinate <span class="math notranslate nohighlight">\(\rightarrow\)</span> colors, signed distances, vertex offsets</p></li>
+<li><p>INRs는 삼각메쉬를 생성할 때 사용될 수 있다.</p></li>
+</ul>
+</li>
+</ul>
+<p>이미지, 비디오, 오디오, 3D assets 생성에 관한 다양한 연구가 있지만 downstream application에서 사용하기 편한 형태로 3D assets을 표현하는 방법에 대한 연구는 부족하다. 본 논문은 단일 representation으로 부터 두가지 형태로 rendering 가능하게 했다는 특징이 있다.</p>
+</section>
+<section class="tex2jax_ignore mathjax_ignore" id="background">
+<h1>2. Background<a class="headerlink" href="#background" title="Permalink to this heading">#</a></h1>
+<section id="neural-radiance-fields-nerf">
+<h2>2.1 Neural Radiance Fields (NeRF)<a class="headerlink" href="#neural-radiance-fields-nerf" title="Permalink to this heading">#</a></h2>
+<p>Mildenhall et al. 는 아래와 같이 NeRF(3D scene을 implicit function으로 표현하는 방법)를 제안했다.</p>
+<div class="math notranslate nohighlight">
+\[
+F_{\Theta} : (\mathbf{x},\mathbf d)↦(\mathbf c,\sigma) \tag{1}
+\]</div>
+<p><span class="math notranslate nohighlight">\(x\)</span> 는 3D 공간 좌표, <span class="math notranslate nohighlight">\(d\)</span> 는 3D 시야 각도, <span class="math notranslate nohighlight">\(c\)</span> 는 RGB, <span class="math notranslate nohighlight">\(\sigma\)</span> 는 density(<span class="math notranslate nohighlight">\(\ge 0\)</span>) 이다. <span class="math notranslate nohighlight">\(F_\Theta\)</span> 는 편의를 위해 <span class="math notranslate nohighlight">\(\sigma(x)\)</span> 와 <span class="math notranslate nohighlight">\(c(x,d)\)</span> 두개의 식으로 나누어 표현했다.</p>
+<p>새로운 시야에서 바라본 scene 을 렌더링하기 위해서, 아래와 같이 각 ray에 맞는 color값을 계산한다.</p>
+<div class="math notranslate nohighlight">
+\[
+\hat C(\mathbf r)=\int^\infty_0 T(t)\sigma(\mathbf R(t))\mathbf c(\mathbf r(t),\mathbf d)dt, \space \text{where} \space T(t)=\text{exp}\Big(-\int^\infty_0 \sigma(\mathbf r(s))ds\Big) \tag{2}
+\]</div>
+<ul>
+<li><p>수식(2) 설명</p>
+<figure class="align-default" id="id3">
+<a class="bg-light mb-1 reference internal image-reference" href="../../_images/IMG_4859.png"><img alt="figure1" class="bg-light mb-1" src="../../_images/IMG_4859.png" style="width: 800px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 689 </span><span class="caption-text">수식 (2) 보충설명</span><a class="headerlink" href="#id3" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</li>
+</ul>
+<p>위의 적분식을 아래와 같이 discrete sum으로 간략화 할 수 있다.</p>
+<div class="math notranslate nohighlight">
+\[
+\hat C(\mathbf r)=\sum^N_{i=1} T_i(1-\text{exp}(-\sigma(\mathbf r(t_i))\delta_t))\mathbf c (\mathbf r(t_i),\mathbf d), \space \text{where} \space T_i=\text{exp}\Big(-\sum^{i-1}_{j=1} \sigma(\mathbf r(t_j))\delta_j\Big) \tag{3}
+\]</div>
+<p>구간을 나누는 방식은 중요한 부분으로 coarse와 fine 두단계로 나누어 더 세부적으로 sequence를 나눈다. 2개의 NeRF 모델을 이용하여 2번의 sampling을 한다.</p>
+<div class="math notranslate nohighlight">
+\[
+w_i \sim T_i(1-\text{exp}(-\sigma(\mathbf r(t_i))\delta_i))\tag{4}
+\]</div>
+<p>본 논문에서는 ray의 transmittance를 아래와 같이 추가적으로 정의하였다. 이는 직관적으로 ray의 alpha값이나 opacity의 총합에 해당한다.</p>
+<div class="math notranslate nohighlight">
+\[
+\hat T(\mathbf r)=1-\text{exp}\Big(-\sum^N_{i=1}\sigma(\mathbf r(t_i))\delta_i\Big)\tag{5}
+\]</div>
+<ul>
+<li><p>수식(5) 설명</p>
+<figure class="align-default" id="id4">
+<a class="bg-light mb-1 reference internal image-reference" href="../../_images/IMG_4860.png"><img alt="figure2" class="bg-light mb-1" src="../../_images/IMG_4860.png" style="width: 400px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 690 </span><span class="caption-text">수식 (5) 보충 설명</span><a class="headerlink" href="#id4" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</li>
+</ul>
+</section>
+<section id="signed-distance-functions-and-texture-field-stf">
+<h2>2.2 Signed Distance Functions and Texture Field (STF)<a class="headerlink" href="#signed-distance-functions-and-texture-field-stf" title="Permalink to this heading">#</a></h2>
+<p>본 논문에서 STF는 signed distances와 texture colors 두가지 모두를 생성하는 implicit function을 의미한다. 이번 섹션에서는 이러한 implicit function이 meshes를 구성하고 rendering을 만드는 방식을 설명한다.</p>
+<figure class="align-default" id="id5">
+<a class="bg-light mb-1 reference internal image-reference" href="../../_images/IMG_4872.png"><img alt="figure3" class="bg-light mb-1" src="../../_images/IMG_4872.png" style="width: 800px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 691 </span><span class="caption-text">point cloud, voxel, polygon mesh의 비교 <br />
+source - 3D Vision with Transformers: A Survey</span><a class="headerlink" href="#id5" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>**Signed Distance Functions (SDFs)**는 3D shape을 scaler field에서 표현하는 전통적인 방법중 하나다. 특히 SDF <span class="math notranslate nohighlight">\(f\)</span>는 coordinate <span class="math notranslate nohighlight">\(x\)</span>를 scaler 로 mapping한다. (<span class="math notranslate nohighlight">\(f(\mathbf x)=d\)</span>) 여기서 <span class="math notranslate nohighlight">\(d\)</span>는 특정 위치 <span class="math notranslate nohighlight">\(x\)</span>에서 가장 가까운 물체의 표면까지의 거리를 말한다. <span class="math notranslate nohighlight">\(d\)</span>가 0보다 작으면 해당 물체 외부임을 의미한다. 이러한 정의에 따라 <span class="math notranslate nohighlight">\(f(\mathbf x)=0\)</span> 일때는 물체의 표면을 의미한다. <span class="math notranslate nohighlight">\(\text{sign}(d)\)</span>는 표면에 따른 normal orientation을 의미한다.</p>
+<ul class="simple">
+<li><p>DMTet : SDFs를 활용하여 3D shape을 생성하는 모델. coarse voxel을 입력으로 받아 synthesized shape(SDF, tetrahedral)을 만들어 낸다.  DMTet의 출력은 dense spatial grid에서의 각 vertex <span class="math notranslate nohighlight">\(v_i\)</span>별 SDF 값 <span class="math notranslate nohighlight">\(s_i\)</span>와 displacement <span class="math notranslate nohighlight">\(\vartriangle v_i\)</span> 이다. 이후 설명 생략</p></li>
+<li><p>GET3D : DMTet에 추가적인 texture 정보까지도 생성하는 모델이다. 물체의 표면의 지점 <span class="math notranslate nohighlight">\(p\)</span> 마다 RGB color를 예측하는 모델을 따로 학습시켜 texture를 만들었다. 이후 설명 생략</p></li>
+</ul>
+<figure class="align-default" id="id6">
+<a class="bg-light mb-1 reference internal image-reference" href="../../_images/IMG_4874.png"><img alt="figure4" class="bg-light mb-1" src="../../_images/IMG_4874.png" style="width: 800px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 692 </span><span class="caption-text">texture, bump, displacement의 비교 <br />
+source - <a class="reference external" href="https:grabcad.com/tutorials/adding-textures-to-3d-models-texture-bump-and-displacement-mapping-how-to-make-photo-realistic-models">tutorials in grabcad</a></span><a class="headerlink" href="#id6" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>bump는 lighting 을 고려하여 texture가 더 자연스러워 졌지만 구의 표면을 보면 물체의 형태가 실제로 바뀐것은 아님을 알수 있다.displacement를 보면 texture를 따라 물체의 표면이 변화된것을 볼 수 있다.</p>
+</section>
+<section id="diffusion-models">
+<h2>2.3 Diffusion Models<a class="headerlink" href="#diffusion-models" title="Permalink to this heading">#</a></h2>
+<p>본 논문에서 활용한 diffusion model은 DDPM으로 diffusion process(noising process)를 data sample <span class="math notranslate nohighlight">\(x_0\)</span> 에 gaussian noise를 서서히 추가하여 완전한 노이즈가 되어가는 과정 <span class="math notranslate nohighlight">\((x_1,x_2,…x_T)\)</span> 으로 표현했다. 일반적으로 <span class="math notranslate nohighlight">\(x_T\)</span>는 gaussian noise와 구분불가능한 상태로 상정한다. 해당 과정은 sequential하게 진행되지만 활용시에는 아래의 식과 같이 특정 단계로 바로 “jump”하는 방식을 이용한다.</p>
+<div class="math notranslate nohighlight">
+\[
+x_t=\sqrt{\bar\alpha_t}x_0+\sqrt{1-\bar\alpha_t}\epsilon\tag{6}
+\]</div>
+<p><span class="math notranslate nohighlight">\(\epsilon\)</span> 은 랜덤한 노이즈를 의미하고, <span class="math notranslate nohighlight">\(\bar\alpha_t\)</span>는 단조감소하는 노이즈 스케줄을 의미한다. (<span class="math notranslate nohighlight">\(t=0\)</span> 일때는 sample data가 되어야 하므로 <span class="math notranslate nohighlight">\(\bar\alpha_0=1\)</span>)</p>
+<p>모델 <span class="math notranslate nohighlight">\(\epsilon_\theta\)</span>를 학습할때는 아래의 손실함수를 사용한다.</p>
+<div class="math notranslate nohighlight">
+\[
+L_{\text{simple}}=E_{x_0\sim q(x_0),\epsilon\sim\mathcal N(0,\mathbf I),t\sim U[1,T]}\|\epsilon -\epsilon_\theta (x_t,t)\|^2_2\tag{7}
+\]</div>
+<p>아래와 같이 표현할 수도 있는데 Shap-E 논문에서는 아래의 식을 활용하였다. 위는 (모델이 예측하는 노이즈, diffusion process에서 더해진 노이즈)의 차이를 줄이는 방향으로 학습한다는 의미이고, 아래는 (data sample <span class="math notranslate nohighlight">\(x_0\)</span>, 모델이 예측한 노이즈를 제거하여 만든 이미지)의 차이를 줄이는 방향으로 학습한다는 의미이다.</p>
+<div class="math notranslate nohighlight">
+\[
+L_{x_0}=E_{x_0\sim q(x_0),\epsilon\sim\mathcal N(0,\mathbf I),t\sim U[1,T]}\|x_\theta (x_t,t)-x_0\|^2_2\tag{8}
+\]</div>
+<p>denosing시에는 높은 퀄리티와 적당한 latency를 위해 Heun sampler와 classifier-free guidance를 사용했다.</p>
+<div class="math notranslate nohighlight">
+\[
+\hat x_\theta(x_t,t|y)=x_\theta(x_T,t)+s\space\cdot\space (x_\theta(x_t,t|y)-x_\theta(x_t,t)) \tag{9}
+\]</div>
+<p><span class="math notranslate nohighlight">\(s\)</span> 는 guidance scale이고 <span class="math notranslate nohighlight">\(s=0, s=1\)</span> 일때는 regular unconditional, conditional sampling을 뜻한다. <span class="math notranslate nohighlight">\(s\)</span> 를 더 키우면 일관성(coherence)은 커지지만 다양성(diversity)이 떨어질 수 있다. 실험적으로 나은 결과물을 얻기 위해서는 guidance가 필요하다는 것을 알아냈다. (section 5의 figure 4 참고)</p>
+</section>
+<section id="latent-diffusion">
+<h2>2.4 Latent Diffusion<a class="headerlink" href="#latent-diffusion" title="Permalink to this heading">#</a></h2>
+<p>continuous latent space에서도 diffusion을 활용하여 샘플들을 생성할 수 있다. 이는 Stable Diffusion(LDM)에서 제안된 것으로, pixel space와 latent space간의 변환을 담당하는 encoder와 decoder를 추가하여 two-stage방식으로 모델을 학습시키면 된다. 앞서 봤던 노이즈를 예측을 담당하는 모델 <span class="math notranslate nohighlight">\(\epsilon_\theta\)</span>는 latent space에서 추가된 노이즈(latent noise)를 예측하게 되는 것이다. original LDM에서는 latent noise를 원본 이미지 보다 낮은 복잡도(lower-dimensional distribution)를 가지도록 KL penalty나 vector quantization layer를 사용했다.</p>
+<p>본 논문에서도 위와 유사한 방식을 사용했으나 GAN-based objective와 perceptual loss를 사용하지 않고 단순히 <span class="math notranslate nohighlight">\(L_1\)</span>, <span class="math notranslate nohighlight">\(L_2\)</span> reconstruction loss를 사용했다. 또한 KL regularization과 vector quantization은 bottleneck이 되므로 고정된 numerical range를 가지도록 하고 diffusion style의 noise를 추가 했다.</p>
+</section>
+</section>
+<section class="tex2jax_ignore mathjax_ignore" id="related-work">
+<h1>3. Related Work<a class="headerlink" href="#related-work" title="Permalink to this heading">#</a></h1>
+<ul class="simple">
+<li><p>Point-E</p></li>
+<li><p>3D auto-encoder + implicit decoder</p>
+<ul>
+<li><p>Fu et al. [16] - SDF sample grid를 encode, implicit decoder의 condition으로 사용.</p></li>
+<li><p>Sanghi et al. [54] - voxel grid를 encode, implicit occupancy network의 condition으로 사용.</p></li>
+<li><p>Liu et al. [34] - voxel-based encoder와 implicit occupancy, color decoder를 학습.</p></li>
+<li><p>Kosiorek et al. [30] - rendered view을 encode, encoding된 latent vector를 NeRF의 condition으로 사용.</p></li>
+<li><p>Chen and Wang [6] - transformer기반 모델을 사용하여 rendered view에서 MLP parameter를 곧바로 생성.</p></li>
+</ul>
+</li>
+<li><p>학습된 encoder 없이 implicit 3D representation을 생성하는것을 목표로 하는 모델들</p>
+<ul>
+<li><p>Park et al. [43] - auto decoder를 학습. 데이터셋 내의 각 샘플의 embedding vector table을 학습.</p></li>
+<li><p>Bautista et al. [4] - NeRF decoder를 조건으로 scene 별 latent code를 학습.</p></li>
+<li><p>Dupont et al. [12] - implicit function을 학습하기 위해 meta learning 활용.</p></li>
+<li><p>Erkoç et al. [14] - implicit MLP weight를 곧바로 생성하기 위해 diffusion을 활용.</p></li>
+<li><p>akin to [12] - NeRF parameter fitting을 필요로 함.</p></li>
+<li><p>Wang et al. [66] - 데이터셋 내의 각 샘플의 개별 NeRF를 joint 학습.</p></li>
+</ul>
+</li>
+</ul>
+</section>
+<section class="tex2jax_ignore mathjax_ignore" id="method">
+<h1>4. Method<a class="headerlink" href="#method" title="Permalink to this heading">#</a></h1>
+<blockquote>
+<div><p>📌 훈련 방법 <br />
+two stage 방식으로 Shap-E를 학습시킨다.<br />
+<strong>Stage 1. train an encoder</strong> <br />
+<strong>Stage 2. train a conditional diffusion model on outputs of the encoder</strong></p>
+</div></blockquote>
+<section id="d-encoder">
+<h2>4.1 3D Encoder<a class="headerlink" href="#d-encoder" title="Permalink to this heading">#</a></h2>
+<figure class="align-default" id="id7">
+<a class="bg-light mb-1 reference internal image-reference" href="../../_images/IMG_4861.png"><img alt="figure5" class="bg-light mb-1" src="../../_images/IMG_4861.png" style="width: 800px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 693 </span><span class="caption-text">3D Encoder의 구조</span><a class="headerlink" href="#id7" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p><strong>encoder의 input :</strong>  (point clouds, rendered views) <br />
+<strong>encoder의 output :</strong> MLP의 parameter</p>
+<blockquote>
+<div><p>입력 representation의 세부 특성 <br />
+Point-E와 비교하였을때, post-processing 방식을 변경하여 3D Asset별 사용하는 RGB point cloud의 point 개수를 늘이고, 더 많은 view를 256x256 크기로 렌더링 하여 사용했다. 구체적으로는 다음과 같다.</p>
+<ul class="simple">
+<li><p>Point Clouds: 기존 4K -&gt; 16K</p></li>
+<li><p>Multiview point clouds: 기존 20 views -&gt; 60 views (20개의 view를 사용한 경우 생성된 pointcloud에 crack이 발생했다고 함)\ view 렌더링시 조명과 물체표면의 특성을 간략화했다.</p></li>
+</ul>
+</div></blockquote>
+<p>encoder에서 얻은 parameter는 implicit function에서 asset의 representation을 의미한다. (+의미상 다양한 형태로 입력받은 3D asset의 특성을 융합하여 하나로 표현한 것, 논문의 장점으로 NeRF와 point cloud 모두를 얻을수 있다고 했으므로 상당히 의도가 느껴지는 입력으로 보인다. )</p>
+<figure class="align-default" id="id8">
+<a class="bg-light mb-1 reference internal image-reference" href="../../_images/IMG_4869.png"><img alt="figure7" class="bg-light mb-1" src="../../_images/IMG_4869.png" style="width: 800px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 694 </span><span class="caption-text">pseudocode</span><a class="headerlink" href="#id8" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>encoder에 입력된 point clouds와 views는 cross-attention과 transformer backbone에 의해 처리되어 sequence of vectors가 된다. 이후 latent bottleneck과 projection layer를 통과하여 MLP weight matrices를 만든다.</p>
+<p>encoder는 NeRF rendering objective를 사용(Section 4.1.1 참고)하여 사전 학습한다. mesh-based objective를 이용한 사전학습시 보다 더 안정적인 결과물을 얻을 수 있었다고 한다. 이후에는 SDF와 texture color prediction을 위해 추가적인 output head를 넣어 Section 4.1.2와 같이 two-stage 방식으로 head들을 학습시킨다.</p>
+<section id="decoding-with-nerf-rendering">
+<h3>4.1.1 Decoding with NeRF Rendering<a class="headerlink" href="#decoding-with-nerf-rendering" title="Permalink to this heading">#</a></h3>
+<p>original NeRF의 식과 유사하지만 coarse net과 fine net이 parameter들을 공유할 수 있도록 하지는 않았다. 랜덤한 4096개의 ray를 각 학습 데이터에서 샘플링하였으며, <span class="math notranslate nohighlight">\(L_1\)</span> loss가 최소가 되도록 했다. (original NeRF에서는 <span class="math notranslate nohighlight">\(L_2\)</span> loss를 사용)</p>
+<div class="math notranslate nohighlight">
+\[
+L_{\mathbf{RGB}}=E_{\mathbf r\in R}[\|\hat C_c(\mathbf r)-C(\mathbf r)\|_1+\|\hat C_f(\mathbf r)-C(\mathbf r)\|_1] \tag{10}
+\]</div>
+<p>여기에 추가적으로 각 ray의 transmittance에 대한 손실함수를 추가했다. 특히, 한 ray의 density 적분값(integrated density)을 통해 얻은transmittance로 coarse rendering과 fine rendering시 <span class="math notranslate nohighlight">\(\hat T_c(r)\)</span> 와 <span class="math notranslate nohighlight">\(\hat T_f(r)\)</span>를 예측하였다. ground truth로는 gt rendering결과의 alpha channel을 사용하였다. 이 손실함수는 아래와 같이 표현할 수 있다. (+NeRF의 경우 novel view를 만드는 것이 목적이었으나 본 논문은 mesh도 생성해야 하므로 노이즈 제거가 더욱 중요하였을 것으로 생각된다.)</p>
+<div class="math notranslate nohighlight">
+\[
+L_T=E_{\mathbf r\in R}[\|\hat T_c(\mathbf r)-T(\mathbf r)\|_1 +\|\hat T_f(\mathbf r)-T(\mathbf r)\|_1]\tag{11}
+\]</div>
+<p>최종적으로는 두 손실함수를 합하여 최적화를 진행하였다.</p>
+<div class="math notranslate nohighlight">
+\[
+L_\text{NeRF}=L_\text{RGB}+L_T \tag{12}
+\]</div>
+</section>
+<section id="decoding-with-stf-rendering">
+<h3>4.1.2 Decoding with STF Rendering<a class="headerlink" href="#decoding-with-stf-rendering" title="Permalink to this heading">#</a></h3>
+<figure class="align-default" id="id9">
+<a class="bg-light mb-1 reference internal image-reference" href="../../_images/IMG_4874.png"><img alt="figure8" class="bg-light mb-1" src="../../_images/IMG_4874.png" style="width: 800px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 695 </span><span class="caption-text">texture, bump, displacement의 비교 <br />
+source - https:grabcad.com/tutorials/adding-textures-to-3d-models-texture-bump-and-displacement-mapping-how-to-make-photo-realistic-models</span><a class="headerlink" href="#id9" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>NeRF 방식을 통해 사전학습한 후, MLPs에 STF output heads를 추가한다. 이러한 MLPs는 SDF와 texture color를 예측한다. triangle mesh를 생성하기 위해서는 각 vertex의 SDF를 regular <span class="math notranslate nohighlight">\(128^3\)</span> grid로 옮겨 미분가능한 형태의 Marching Cube를 진행해야 한다. 이후 texture color는 최종 mesh의 각 vertex texture color head를 통해 얻는다. Pytorch 3D를 활용하면 미분가능한 rendering을 통해 textured mesh를 얻을 수 있다고 한다. 렌더링 시에는 데이터셋 구축시 preprocessing에 사용한 것과 동일한 lighting 조건을 사용했다.</p>
+<p>사전 실험시 랜덤 초기화된 STF output heads를 사용했을 때는 결과가 불안정 했으며, rendering based objective를 사용하여 학습하는 것이 어려웠다. 해당 문제를 완화하기 위해 SDF와 texture color를 해당 output heads를 직접 학습시키기 전에 distill 접근법을 사용했다. Point-E의 regression model을 활용하여 입력 좌표를 랜덤하게 샘플링하고, SDF distillation target을 구했다. 그리고 RGB target로는 asset RGB point cloud에서 특정위치 <span class="math notranslate nohighlight">\(x\)</span>와 가장 가까운(nearest neighbor) point의 색을 사용했다. distillation training 시 distillation loss와 NeRF loss를 더하여 사용했다.</p>
+<div class="math notranslate nohighlight">
+\[
+L_\text{distill}=L_\text{NeRF}+E_{\mathbf x\sim U[-1,1]^3}[\|\text{SDF}_\theta(\mathbf x)-\text{SDF}_\text{regression}(\mathbf x)\|_1+\|\text{RGB}_\theta(\mathbf x)-\text{RGB}_\text{NN}(\mathbf x)\|_1]
+\tag{13}
+\]</div>
+<p>STF output heads가 distillation을 통해 적절한 초기값을 갖게된 후, NeRF encoder와 STF rendering 전체를 end-to-end로 fine-tune한다. 실험적으로 STF rendering에는 <span class="math notranslate nohighlight">\(L_1\)</span>을 사용하는 것은 불안정했으므로 <span class="math notranslate nohighlight">\(L_2\)</span> 손실함수만 사용하는 것이 이러한 rendering 방식에 적절함을 알 수 있었다. STF rendering에 사용한 loss는 아래와 같다.</p>
+<div class="math notranslate nohighlight">
+\[
+L_\text{STF}=\frac{1}{N\space \cdot\space s^2}\sum^N_{i=1}\|\text{Render}(\text{Mesh}_i)-\text{Image}_i\|^2_2\tag{14}
+\]</div>
+<ul class="simple">
+<li><p>mesh를 렌더링한 이미지와 target 이미지의 L2 reconstruction loss의 평균</p></li>
+</ul>
+<p>N은 이미지 개수, s는 이미지의 화질, <span class="math notranslate nohighlight">\(\text{Mesh}_i\)</span>는 <span class="math notranslate nohighlight">\(\text{sample}_i\)</span>의 constructed mesh를 말한다. <span class="math notranslate nohighlight">\(\text{Image}_i\)</span>는 RGBA rendering된 결과물로 alpha채널을 포함하고 있기 때문에 transmittance에 대한 loss를 따로 추가하지 않았다.</p>
+<p>최종 fine-tuning 단계에서는 아래와 같이 더한 objective function을 사용한다.</p>
+<div class="math notranslate nohighlight">
+\[
+L_\text{FT}=L_\text{NeRF}+L_\text{STF}\tag{15}
+\]</div>
+</section>
+</section>
+<section id="id1">
+<h2>4.2 Latent Diffusion<a class="headerlink" href="#id1" title="Permalink to this heading">#</a></h2>
+<p>Point-E의 transformer 기반 diffusion 구조를 채택했다. 하지만 point cloud를 latent vector의 sequence로 바꾸었다. latent sequences의 크기는 <span class="math notranslate nohighlight">\(1024\times1024\)</span> 로 이를 길이가 1024인 1024개의 token처럼 transformer의 입력으로 사용했다. 각 token은 MLP weight matrices의 각 row와 일치한다. Shap-E의 모델은 Point-E base 모델과 유사한 부분이 많다.(context length와 width가 동일) 하지만 더 고차원의 샘플(samples in higher-dimensional)을 생성하는데 이는 입출력 채널의 복잡도(dimension)가 증가하였기 때문이다.</p>
+<p>Point-E의 conditioning 방식을 동일하게 사용하였다. 이미지 조건부 3d 생성시 256-token CLIP embedding sequence를 transformer context로 사용했으며, 텍스트 조건부 3d 생성시 single token을 사용했다.</p>
+<p>Point-E와의 차이점으로는 diffusion model의 출력을 <span class="math notranslate nohighlight">\(\epsilon\)</span> prediction으로 parameterize하지 않았다는 것이다. 대신 본 논문에서는 곧바로 sample을 예측하는 방식을 사용했다. 대수적으로는 동일한 의미이나 초기 실험에서 더 일관된 결과물을 생성하여 해당 방식을 사용하였다고 함.</p>
+</section>
+<section id="dataset">
+<h2>4.3 Dataset<a class="headerlink" href="#dataset" title="Permalink to this heading">#</a></h2>
+<p>공정한 비교를 위해 대부분의 실험에서 Point-E와 동일한 3D assets을 사용했다. 하지만 post-processing부분에서는 차이가 있다.</p>
+<ul>
+<li><p>point cloud 계산시, 20개가 아닌 60개의 view를 rendering했다. 20개만 사용했을때 주어진 view에서 확인할 수 없는 영역때문에 crack 발생 (+NeRF 때문으로 추정)</p></li>
+<li><p>point cloud를 4K 가아닌 16K의 point로 만들었다.</p></li>
+<li><p>encoder학습을 위한 view를 렌더링 할때 단순한 소재와 라이팅을 사용하였다. 특히 모든 모델은 동일한 고정된 라이팅 조건내에서 렌더링 되었다. ambient와 diffuse shading만 사용 (+반사광이 고려되지 않아 표면이 매끈한 물체는 생성하기 어려울 것으로 추정됨)</p>
+<figure class="align-default" id="id10">
+<a class="bg-light mb-1 reference internal image-reference" href="../../_images/untitled.png"><img alt="figure7" class="bg-light mb-1" src="../../_images/untitled.png" style="width: 800px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 696 </span><span class="caption-text">Phong model <br />
+기본적인 shading방식으로 본 논문에서는 specular를 사용하지 않았다 <br />
+source - <a class="reference external" href="https:www.researchgate.net/publication/265514880_Realistic_Visualisation_of_Endoscopic_Surgery_in_a_Virtual_Training_Environment">Realistic_Visualisation_of_Endoscopic_Surgery_in_a_Virtual_Training_Environment</a></span><a class="headerlink" href="#id10" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</li>
+</ul>
+<p>text 조건부 모델과 해당 Point-E baseline을 위해 데이터 셋을 더욱 확장했다. 이 데이터 셋을 위해 대략 100만개의 3D assets과 12만개의 (human labeled)caption을 추가로 수집했다.</p>
+</section>
+</section>
+<section class="tex2jax_ignore mathjax_ignore" id="result">
+<h1>5. Result<a class="headerlink" href="#result" title="Permalink to this heading">#</a></h1>
+<section id="encoder-evaluation">
+<h2>5.1 Encoder Evaluation<a class="headerlink" href="#encoder-evaluation" title="Permalink to this heading">#</a></h2>
+<figure class="align-default" id="id11">
+<a class="bg-light mb-1 reference internal image-reference" href="../../_images/table12.png"><img alt="figure8" class="bg-light mb-1" src="../../_images/table12.png" style="width: 800px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 697 </span><span class="caption-text">각 스테이지 별 훈련 이후 encoder 성능평가</span><a class="headerlink" href="#id11" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>distillation에서 rendering 이미지의 퀄리티가 떨어지는 것처럼 보이나 finetuning시 퀄리티가 더욱 좋아진다. 또한 STF의 퀄리티 또한 크게 상승한다.</p>
+</section>
+<section id="comparison-to-point-e">
+<h2>5.2 Comparison to Point-E<a class="headerlink" href="#comparison-to-point-e" title="Permalink to this heading">#</a></h2>
+<figure class="align-default" id="id12">
+<a class="bg-light mb-1 reference internal image-reference" href="../../_images/figure4.png"><img alt="figure9" class="bg-light mb-1" src="../../_images/figure4.png" style="width: 800px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 698 </span><span class="caption-text">Shap-E와 Point-E비교<br />
+세모 마크가 Point-E, 원형 마크가 Shap-E이다.</span><a class="headerlink" href="#id12" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>point-E보다 Shap-E의 CLIP score가 더 높다. 더 많은수의 parameter를 가진 point-E를 사용하여도 Shap-E의 성능이 우수함.</p>
+<p>두 평가 지표 모두 OpenAI의 CLIP (Contrastive Language-Image Pretraining) 모델을 활용한 평가 지표로 CLIP score의 경우 주어진 텍스트와 생성결과의 일관성을 평가하기 위한 것이고, CLIP R precision의 경우 생성결과와 참조 이미지가 얼마나 비슷한지 평가하기 위한 것이다.</p>
+<figure class="align-default" id="id13">
+<a class="bg-light mb-1 reference internal image-reference" href="../../_images/figure5.png"><img alt="figure10" class="bg-light mb-1" src="../../_images/figure5.png" style="width: 800px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 699 </span><span class="caption-text">Shap-E와 Point-E비교</span><a class="headerlink" href="#id13" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>동일한 base model의 크기 동일한 데이터셋으로 학습시킨 결과. 텍스트 조건부 생성시에는 퀄리티 차이가 크지 않음.</p>
+<figure class="align-default" id="id14">
+<a class="bg-light mb-1 reference internal image-reference" href="../../_images/figure6.png"><img alt="figure11" class="bg-light mb-1" src="../../_images/figure6.png" style="width: 800px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 700 </span><span class="caption-text">Shap-E와 Point-E비교</span><a class="headerlink" href="#id14" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>이미지 조건부 생성시에는 비교적 차이가 크다.
+벤치 결과를 보면 point-E에서 나무사이 빈공간을 무시해버린것을 볼수 있다.
+위의 강아지와 컵 이미지 기반 생성 결과를 보면 point-E와 shap-E가 유사한 케이스에서 실패하는 모습을 보였다.</p>
+</section>
+<section id="comparison-to-other-methods">
+<h2>5.3 Comparison to Other Methods<a class="headerlink" href="#comparison-to-other-methods" title="Permalink to this heading">#</a></h2>
+<figure class="align-default" id="id15">
+<a class="bg-light mb-1 reference internal image-reference" href="../../_images/table21.png"><img alt="figure12" class="bg-light mb-1" src="../../_images/table21.png" style="width: 800px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 701 </span><span class="caption-text">COCO 데이터셋을 이용한 비교결과</span><a class="headerlink" href="#id15" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>reference latency에서 point-E와 Shap-E의 차이가 있는데, 이는 Shap-E는 추가적인 upsampling diffusion model을 사용하지 않기 때문이다.</p>
+</section>
+</section>
+<section class="tex2jax_ignore mathjax_ignore" id="limitations-and-future-work">
+<h1>6. Limitations and Future Work<a class="headerlink" href="#limitations-and-future-work" title="Permalink to this heading">#</a></h1>
+<figure class="align-default" id="id16">
+<a class="bg-light mb-1 reference internal image-reference" href="../../_images/figure7.png"><img alt="figure13" class="bg-light mb-1" src="../../_images/figure7.png" style="width: 800px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 702 </span><span class="caption-text">텍스트 조건부 생성 결과</span><a class="headerlink" href="#id16" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>왼쪽 그림과 같이 여러가지 특성을 가진물체를 생성하는데에 어려움을 겪는 모습을 보인다. 이는 학습에 사용한 paired data가 제한적이기 때문으로 더 많은 3D dataset을 수집하면 나아질 수 있다. 또한 texture의 세부 특성을 encoder가 무시하는 경우도 있는데, 더 나은 encoder를 사용함으로써 개선될수 있다.</p>
+<p>Shap-E는 다양한 3D 생성 기술들을 융합하는데에 도움을 줄 수 있다. 예를 들어 Shap-E로 생성한 NeRF와 mesh를 다른 최적화 기반  모델을 초기화 하는데 사용하는 것이다. 이를 통해 더 빠른 수렴도 가능할 것으로 생각된다.</p>
+</section>
+<section class="tex2jax_ignore mathjax_ignore" id="conclusion">
+<h1>7. Conclusion<a class="headerlink" href="#conclusion" title="Permalink to this heading">#</a></h1>
+<p>Shap-E는 latent diffusion model을 3D implicit function공간에서 전개하여 NeRF와 textured mesh 모두를 생성 할 수 있었다. 동일한 데이터셋을 활용하여 다른 생성모델들과 비교하였을때 더 나은 성능을 보임을 확인했다. 또한 text 조건부 생성시 이미지 없이도 다양한 흥미로운 물체를 생성할 수 있음확인했다. 이는 implicit represention을 생성함에 큰 가능성을 보여준다.</p>
+</section>
+<section class="tex2jax_ignore mathjax_ignore" id="acknowledgements">
+<h1>8. Acknowledgements<a class="headerlink" href="#acknowledgements" title="Permalink to this heading">#</a></h1>
+<p>특정 인물들에 대한 언급 외에도 ChatGPT로 부터 valuable writing feedback을 받았다고 표현한 부분있었다.</p>
+</section>
+
+    <script type="text/x-thebe-config">
+    {
+        requestKernel: true,
+        binderOptions: {
+            repo: "binder-examples/jupyter-stacks-datascience",
+            ref: "master",
+        },
+        codeMirrorConfig: {
+            theme: "abcdef",
+            mode: "python"
+        },
+        kernelOptions: {
+            name: "python3",
+            path: "./docs\review"
+        },
+        predefinedOutput: true
+    }
+    </script>
+    <script>kernelName = 'python3'</script>
+
+                </article>
+              
+
+              
+              
+              
+              
+                <footer class="prev-next-footer d-print-none">
+                  
+<div class="prev-next-area">
+    <a class="left-prev"
+       href="Point_E.html"
+       title="previous page">
+      <i class="fa-solid fa-angle-left"></i>
+      <div class="prev-next-info">
+        <p class="prev-next-subtitle">previous</p>
+        <p class="prev-next-title">Point-E: A System for Generating 3D Point Clouds from Complex Prompts (Arxiv 2022)</p>
+      </div>
+    </a>
+    <a class="right-next"
+       href="DreamFusion.html"
+       title="next page">
+      <div class="prev-next-info">
+        <p class="prev-next-subtitle">next</p>
+        <p class="prev-next-title"><strong>DreamFusion</strong></p>
+      </div>
+      <i class="fa-solid fa-angle-right"></i>
+    </a>
+</div>
+                </footer>
+              
+            </div>
+            
+            
+              
+                <div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
+
+
+  <div class="sidebar-secondary-item">
+  <div class="page-toc tocsection onthispage">
+    <i class="fa-solid fa-list"></i> Contents
+  </div>
+  <nav class="bd-toc-nav page-toc">
+    <ul class="visible nav section-nav flex-column">
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#">Shap-E</a></li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#abstract">0. Abstract</a></li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">1. Introduction</a></li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#background">2. Background</a><ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#neural-radiance-fields-nerf">2.1 Neural Radiance Fields (NeRF)</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#signed-distance-functions-and-texture-field-stf">2.2 Signed Distance Functions and Texture Field (STF)</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#diffusion-models">2.3 Diffusion Models</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#latent-diffusion">2.4 Latent Diffusion</a></li>
+</ul>
+</li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#related-work">3. Related Work</a></li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#method">4. Method</a><ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#d-encoder">4.1 3D Encoder</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#decoding-with-nerf-rendering">4.1.1 Decoding with NeRF Rendering</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#decoding-with-stf-rendering">4.1.2 Decoding with STF Rendering</a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#id1">4.2 Latent Diffusion</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#dataset">4.3 Dataset</a></li>
+</ul>
+</li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#result">5. Result</a><ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#encoder-evaluation">5.1 Encoder Evaluation</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#comparison-to-point-e">5.2 Comparison to Point-E</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#comparison-to-other-methods">5.3 Comparison to Other Methods</a></li>
+</ul>
+</li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#limitations-and-future-work">6. Limitations and Future Work</a></li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#conclusion">7. Conclusion</a></li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#acknowledgements">8. Acknowledgements</a></li>
+</ul>
+
+  </nav></div>
+
+</div></div>
+              
+            
+          </div>
+          <footer class="bd-footer-content">
+            
+<div class="bd-footer-content__inner container">
+  
+  <div class="footer-item">
+    
+<p class="component-author">
+By PseudoLab
+</p>
+
+  </div>
+  
+  <div class="footer-item">
+    
+
+  <p class="copyright">
+    
+      © Copyright 2022.
+      <br/>
+    
+  </p>
+
+  </div>
+  
+  <div class="footer-item">
+    
+  </div>
+  
+  <div class="footer-item">
+    
+  </div>
+  
+</div>
+          </footer>
+        
+
+      </main>
+    </div>
+  </div>
+  
+  <!-- Scripts loaded after <body> so the DOM is not blocked -->
+  <script src="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b"></script>
+<script src="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b"></script>
+
+  <footer class="bd-footer">
+  </footer>
+  </body>
 </html>
\ No newline at end of file
diff --git a/docs/review/StyO.html b/docs/review/StyO.html
old mode 100644
new mode 100755
index 18f08db9..4153f939
--- a/docs/review/StyO.html
+++ b/docs/review/StyO.html
@@ -1,929 +1,949 @@
-
-
-<!DOCTYPE html>
-
-
-<html lang="en" data-content_root="" >
-
-  <head>
-    <meta charset="utf-8" />
-    <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.18.1: http://docutils.sourceforge.net/" />
-
-    <title>StyO &#8212; Text-to-Image Generation-feat-Diffusion</title>
-  
-  
-  
-  <script data-cfasync="false">
-    document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
-    document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
-  </script>
-  
-  <!-- Loaded before other Sphinx assets -->
-  <link href="../../_static/styles/theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-<link href="../../_static/styles/bootstrap.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-<link href="../../_static/styles/pydata-sphinx-theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-
-  
-  <link href="../../_static/vendor/fontawesome/6.5.2/css/all.min.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-  <link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.woff2" />
-<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.woff2" />
-<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.woff2" />
-
-    <link rel="stylesheet" type="text/css" href="../../_static/pygments.css" />
-    <link rel="stylesheet" href="../../_static/styles/sphinx-book-theme.css?digest=14f4ca6b54d191a8c7657f6c759bf11a5fb86285" type="text/css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/togglebutton.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/mystnb.4510f1fc1dee50b3e5859aac5469c37c29e427902b24a333a5f9fcb2f0b3ac41.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/sphinx-thebe.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/design-style.4045f2051d55cab465a707391d5b2007.min.css" />
-  
-  <!-- Pre-loaded scripts that we'll load fully later -->
-  <link rel="preload" as="script" href="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b" />
-<link rel="preload" as="script" href="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b" />
-  <script src="../../_static/vendor/fontawesome/6.5.2/js/all.min.js?digest=dfe6caa3a7d634c4db9b"></script>
-
-    <script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
-    <script src="../../_static/jquery.js"></script>
-    <script src="../../_static/underscore.js"></script>
-    <script src="../../_static/_sphinx_javascript_frameworks_compat.js"></script>
-    <script src="../../_static/doctools.js"></script>
-    <script src="../../_static/clipboard.min.js"></script>
-    <script src="../../_static/copybutton.js"></script>
-    <script src="../../_static/scripts/sphinx-book-theme.js?digest=5a5c038af52cf7bc1a1ec88eea08e6366ee68824"></script>
-    <script>let toggleHintShow = 'Click to show';</script>
-    <script>let toggleHintHide = 'Click to hide';</script>
-    <script>let toggleOpenOnPrint = 'true';</script>
-    <script src="../../_static/togglebutton.js"></script>
-    <script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
-    <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
-    <script src="../../_static/design-tabs.js"></script>
-    <script>const THEBE_JS_URL = "https://unpkg.com/thebe@0.8.2/lib/index.js"
-const thebe_selector = ".thebe,.cell"
-const thebe_selector_input = "pre"
-const thebe_selector_output = ".output, .cell_output"
-</script>
-    <script async="async" src="../../_static/sphinx-thebe.js"></script>
-    <script>window.MathJax = {"options": {"processHtmlClass": "tex2jax_process|mathjax_process|math|output_area"}}</script>
-    <script defer="defer" src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
-    <script>DOCUMENTATION_OPTIONS.pagename = 'docs/review/StyO';</script>
-    <link rel="shortcut icon" href="../../_static/PseudoLab_logo.png"/>
-    <link rel="index" title="Index" href="../../genindex.html" />
-    <link rel="search" title="Search" href="../../search.html" />
-    <link rel="next" title="Imagen" href="imagen.html" />
-    <link rel="prev" title="I-DDPM" href="I-DDPM.html" />
-  <meta name="viewport" content="width=device-width, initial-scale=1"/>
-  <meta name="docsearch:language" content="en"/>
-  </head>
-  
-  
-  <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
-
-  
-  
-  <div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
-  
-  <div id="pst-scroll-pixel-helper"></div>
-  
-  <button type="button" class="btn rounded-pill" id="pst-back-to-top">
-    <i class="fa-solid fa-arrow-up"></i>Back to top</button>
-
-  
-  <input type="checkbox"
-          class="sidebar-toggle"
-          id="pst-primary-sidebar-checkbox"/>
-  <label class="overlay overlay-primary" for="pst-primary-sidebar-checkbox"></label>
-  
-  <input type="checkbox"
-          class="sidebar-toggle"
-          id="pst-secondary-sidebar-checkbox"/>
-  <label class="overlay overlay-secondary" for="pst-secondary-sidebar-checkbox"></label>
-  
-  <div class="search-button__wrapper">
-    <div class="search-button__overlay"></div>
-    <div class="search-button__search-container">
-<form class="bd-search d-flex align-items-center"
-      action="../../search.html"
-      method="get">
-  <i class="fa-solid fa-magnifying-glass"></i>
-  <input type="search"
-         class="form-control"
-         name="q"
-         id="search-input"
-         placeholder="Search this book..."
-         aria-label="Search this book..."
-         autocomplete="off"
-         autocorrect="off"
-         autocapitalize="off"
-         spellcheck="false"/>
-  <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
-</form></div>
-  </div>
-
-  <div class="pst-async-banner-revealer d-none">
-  <aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
-</div>
-
-  
-    <header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
-    </header>
-  
-
-  <div class="bd-container">
-    <div class="bd-container__inner bd-page-width">
-      
-      
-      
-      <div class="bd-sidebar-primary bd-sidebar">
-        
-
-  
-  <div class="sidebar-header-items sidebar-primary__section">
-    
-    
-    
-    
-  </div>
-  
-    <div class="sidebar-primary-items__start sidebar-primary__section">
-        <div class="sidebar-primary-item">
-
-  
-    
-  
-
-<a class="navbar-brand logo" href="../../intro.html">
-  
-  
-  
-  
-  
-    
-    
-      
-    
-    
-    <img src="../../_static/PseudoLab_logo.png" class="logo__image only-light" alt="Text-to-Image Generation-feat-Diffusion - Home"/>
-    <script>document.write(`<img src="../../_static/PseudoLab_logo.png" class="logo__image only-dark" alt="Text-to-Image Generation-feat-Diffusion - Home"/>`);</script>
-  
-  
-</a></div>
-        <div class="sidebar-primary-item"><nav class="bd-links" id="bd-docs-nav" aria-label="Main">
-    <div class="bd-toc-item navbar-nav active">
-        
-        <ul class="nav bd-sidenav bd-sidenav__home-link">
-            <li class="toctree-l1">
-                <a class="reference internal" href="../../intro.html">
-                    Welcome to PseudoDiffusers!!
-                </a>
-            </li>
-        </ul>
-        <p aria-level="2" class="caption" role="heading"><span class="caption-text">Preliminary Works</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="vae.html">VAE</a></li>
-<li class="toctree-l1"><a class="reference internal" href="gan.html">GAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DDPM.html">DDPM</a></li>
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="DDIM.html">DDIM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="A_Study_on_the_Evaluation_of_Generative_Models.html">A Study on the Evaluation of Generative Models</a></li>
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Image Generation</span></p>
-<ul class="current nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="cycleGAN.html">CycleGAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="StyleGAN.html">StyleGAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="diffusion_beats_GANs.html">Diffusion Models Beat GANs on Image Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="dalle.html">DALL-E</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DALLE2.html">DALL-E 2</a></li>
-<li class="toctree-l1"><a class="reference internal" href="dreambooth.html">DreamBooth</a></li>
-<li class="toctree-l1"><a class="reference internal" href="ControlNet.html">ControlNet</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Latent_Diffusion_Model.html">Introduction</a></li>
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="Textual_Inversion.html">Textual Inversion</a></li>
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="CustomDiffusion.html">Custom Diffusion</a></li>
-<li class="toctree-l1"><a class="reference internal" href="LoRA.html">LoRA</a></li>
-
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="I-DDPM.html">I-DDPM</a></li>
-<li class="toctree-l1 current active"><a class="current reference internal" href="#">StyO</a></li>
-<li class="toctree-l1"><a class="reference internal" href="imagen.html">Imagen</a></li>
-<li class="toctree-l1"><a class="reference internal" href="imagen_editor.html">Imagen Editor</a></li>
-<li class="toctree-l1"><a class="reference internal" href="SDEdit.html">SDEdit</a></li>
-<li class="toctree-l1"><a class="reference internal" href="SDXL.html">SDXL</a></li>
-<li class="toctree-l1"><a class="reference internal" href="t2i_adapter.html">T2I-Adapter</a></li>
-<li class="toctree-l1"><a class="reference internal" href="IP_Adapter.html">IP-Adapter</a></li>
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="HyperDreamBooth.html">HyperDreamBooth</a></li>
-<li class="toctree-l1"><a class="reference internal" href="CM3leon.html">CM3leon</a></li>
-
-<li class="toctree-l1"><a class="reference internal" href="Synthetic_Data_from_Diffusion_Models_Improves_ImageNet_Classification.html">Synthetic Data from Diffusion Models Improves ImageNet Classification</a></li>
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="GLIDE.html">GLIDE</a></li>
-<li class="toctree-l1"><a class="reference internal" href="BBDM.html">BBDM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Your_Diffusion_Model_is_Secretly_a_Zero_Shot_Classifier.html">Your Diffusion Model is Secretly a Zero-Shot Classifier</a></li>
-<li class="toctree-l1"><a class="reference internal" href="progressive_distillation.html">Progressive Distillation for Fast Sampling of Diffusion Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="ConceptLab.html">ConceptLab</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Diffusion_models_already_have_a_Semantic_Latent_Space.html">Diffusion Models already have a Semantic Latent Space</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Muse.html">Muse</a></li>
-
-
-<li class="toctree-l1"><a class="reference internal" href="GIGAGAN.html">Scaling up GANs for Text-to-Image Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="consistency_models.html">Consistency Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="latent_consistency_models.html">Latent Consistency Models</a></li>
-
-<li class="toctree-l1"><a class="reference internal" href="LLM_grounded_Diffusion.html">LLM Grounded Diffusion</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DiT.html">DiT</a></li>
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="one-step-image-translation.html">One-Step Image Translation with Text-to-Image Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="LCM-LoRA.html">LCM-LoRA: A Universal Stable-Diffusion Acceleration Module</a></li>
-
-
-<li class="toctree-l1"><a class="reference internal" href="MimicBrush.html">MimicBrush: Zero-shot Image Editing with Reference Imitation</a></li>
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Video Generation</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="Make_A_Video.html">Make A Video</a></li>
-<li class="toctree-l1"><a class="reference internal" href="VideoLDM.html">VideoLDM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="AnimateDiff.html">AnimateDiff</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Animate_Anyone.html">Animate Anyone</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreaMoving.html">DreaMoving</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreamPose.html">DreamPose: Fashion Image-to-Video Synthesis via Stable Diffusion</a></li>
-
-
-
-
-
-
-
-
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">3D Generation</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="NeRF.html">NeRF : Representing Scenes as Neural Radiance Fields for View Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="3DGS.html">3D Gaussian Splatting for Real-Time Radiance Field Rendering</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Point_E.html">Point-E: A System for Generating 3D Point Clouds from Complex Prompts (Arxiv 2022)</a></li>
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="Shap-E.html">Shap-E</a></li>
-
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="DreamFusion.html"><strong>DreamFusion</strong></a></li>
-<li class="toctree-l1"><a class="reference internal" href="magic-3d.html">Magic3D</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreamBooth3D.html">Dream Booth 3D</a></li>
-
-
-
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Experiments</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="../experiments/js_exp.html">Synthetic Data with Stable Diffusion for Foliar Disease Classification</a></li>
-<li class="toctree-l1"><a class="reference internal" href="../experiments/swjo_exp.html">Training DreamBooth on Naver Webtoon Face Dataset</a></li>
-</ul>
-
-    </div>
-</nav></div>
-    </div>
-  
-  
-  <div class="sidebar-primary-items__end sidebar-primary__section">
-  </div>
-  
-  <div id="rtd-footer-container"></div>
-
-
-      </div>
-      
-      <main id="main-content" class="bd-main" role="main">
-        
-        
-
-<div class="sbt-scroll-pixel-helper"></div>
-
-          <div class="bd-content">
-            <div class="bd-article-container">
-              
-              <div class="bd-header-article d-print-none">
-<div class="header-article-items header-article__inner">
-  
-    <div class="header-article-items__start">
-      
-        <div class="header-article-item"><label class="sidebar-toggle primary-toggle btn btn-sm" for="__primary" title="Toggle primary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
-  <span class="fa-solid fa-bars"></span>
-</label></div>
-      
-    </div>
-  
-  
-    <div class="header-article-items__end">
-      
-        <div class="header-article-item">
-
-<div class="article-header-buttons">
-
-
-
-
-
-<div class="dropdown dropdown-source-buttons">
-  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Source repositories">
-    <i class="fab fa-github"></i>
-  </button>
-  <ul class="dropdown-menu">
-      
-      
-      
-      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion" target="_blank"
-   class="btn btn-sm btn-source-repository-button dropdown-item"
-   title="Source repository"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fab fa-github"></i>
-  </span>
-<span class="btn__text-container">Repository</span>
-</a>
-</li>
-      
-      
-      
-      
-      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion/issues/new?title=Issue%20on%20page%20%2Fdocs/review/StyO.html&body=Your%20issue%20content%20here." target="_blank"
-   class="btn btn-sm btn-source-issues-button dropdown-item"
-   title="Open an issue"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-lightbulb"></i>
-  </span>
-<span class="btn__text-container">Open issue</span>
-</a>
-</li>
-      
-  </ul>
-</div>
-
-
-
-
-
-
-<div class="dropdown dropdown-download-buttons">
-  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Download this page">
-    <i class="fas fa-download"></i>
-  </button>
-  <ul class="dropdown-menu">
-      
-      
-      
-      <li><a href="../../_sources/docs/review/StyO.md" target="_blank"
-   class="btn btn-sm btn-download-source-button dropdown-item"
-   title="Download source file"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-file"></i>
-  </span>
-<span class="btn__text-container">.md</span>
-</a>
-</li>
-      
-      
-      
-      
-      <li>
-<button onclick="window.print()"
-  class="btn btn-sm btn-download-pdf-button dropdown-item"
-  title="Print to PDF"
-  data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-file-pdf"></i>
-  </span>
-<span class="btn__text-container">.pdf</span>
-</button>
-</li>
-      
-  </ul>
-</div>
-
-
-
-
-<button onclick="toggleFullScreen()"
-  class="btn btn-sm btn-fullscreen-button"
-  title="Fullscreen mode"
-  data-bs-placement="bottom" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-expand"></i>
-  </span>
-
-</button>
-
-
-
-<script>
-document.write(`
-  <button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light"></i>
-    <i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark"></i>
-    <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"></i>
-  </button>
-`);
-</script>
-
-
-<script>
-document.write(`
-  <button class="btn btn-sm pst-navbar-icon search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <i class="fa-solid fa-magnifying-glass fa-lg"></i>
-  </button>
-`);
-</script>
-<label class="sidebar-toggle secondary-toggle btn btn-sm" for="__secondary"title="Toggle secondary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <span class="fa-solid fa-list"></span>
-</label>
-</div></div>
-      
-    </div>
-  
-</div>
-</div>
-              
-              
-
-<div id="jb-print-docs-body" class="onlyprint">
-    <h1>StyO</h1>
-    <!-- Table of contents -->
-    <div id="print-main-content">
-        <div id="jb-print-toc">
-            
-            <div>
-                <h2> Contents </h2>
-            </div>
-            <nav aria-label="Page">
-                <ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#abstract">Abstract</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">1. Introduction</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#related-work">2. Related Work</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#diffusion-model">2.1. Diffusion Model</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#face-stylization">2.2. Face Stylization</a></li>
-</ul>
-</li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#method">3. Method</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#framework-of-styo">3.2. Framework of StyO</a></li>
-</ul>
-</li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#experiments">4. Experiments</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#conclusion">5. Conclusion</a></li>
-</ul>
-            </nav>
-        </div>
-    </div>
-</div>
-
-              
-                
-<div id="searchbox"></div>
-                <article class="bd-article">
-                  
-  <div class="admonition-information admonition">
-<p class="admonition-title">Information</p>
-<ul class="simple">
-<li><p><strong>Title:</strong> StyO: Stylize Your Face in Only One-Shot</p></li>
-<li><p><strong>Reference</strong></p>
-<ul>
-<li><p>Paper:  <a class="reference external" href="https://arxiv.org/abs/2303.03231">https://arxiv.org/abs/2303.03231</a></p></li>
-</ul>
-</li>
-<li><p><strong>Author:</strong> Seunghwan Ji</p></li>
-<li><p><strong>Last updated on Aug. 6, 2023</strong></p></li>
-</ul>
-</div>
-<section class="tex2jax_ignore mathjax_ignore" id="styo">
-<h1>StyO<a class="headerlink" href="#styo" title="Permalink to this heading">#</a></h1>
-<section id="abstract">
-<h2>Abstract<a class="headerlink" href="#abstract" title="Permalink to this heading">#</a></h2>
-<ul class="simple">
-<li><p>“<strong>Sty</strong>lize the face in only <strong>O</strong>ne-shot.”</p></li>
-<li><p>한장의 이미지만으로 다른 이미지로 스타일을 Transfer!</p></li>
-</ul>
-</section>
-<section id="introduction">
-<h2>1. Introduction<a class="headerlink" href="#introduction" title="Permalink to this heading">#</a></h2>
-<ul class="simple">
-<li><p>현재 다양한 분야에서 이미지에 특정 스타일을 입히고자하는 연구들이 활발히 진행중이다.</p></li>
-<li><p>이전까지의 연구들은 대부분 각각의 source 이미지, target 이미지 한장씩을 사용해 GAN based model을 활용하려는 식이 주를 이루었다.</p></li>
-<li><p>단 이러한 방식에는 한계가 있는데,</p>
-<ol class="arabic simple">
-<li><p>Real Face를 학습한 pre-trained GAN 모델의 의존도가 너무 커서 Style을 입히기 힘들다.</p></li>
-<li><p>latent space안에서 Content 정보와 Style 정보가 Entangle 되어있다.</p></li>
-</ol>
-</li>
-<li><p><strong>StyO는?</strong></p>
-<ul>
-<li><p>GAN 대신 Data의 Distribution을 더 잘 포용하는 Latent Diffusion Model을 Base모델로 채용한다.</p></li>
-<li><p>총 2 Stage로 구성되는데</p>
-<ol class="arabic simple">
-<li><p>Identifier Disentanglement Learner(IDL)</p>
-<ul>
-<li><p>이미지의 content 정보와 Style 정보를 분리</p></li>
-</ul>
-</li>
-<li><p>Fine-grained Content Controller(FCC)</p>
-<ul>
-<li><p>IDL로부터 분리된 Content와 Style을 원하는대로 재조합</p></li>
-</ul>
-</li>
-</ol>
-</li>
-<li><p>추가로 src 이미지의 detail한 정보(head-pose, hair color 등)를 유지하기위해 Generate 과정에서 src 이미지의 attention map을 재사용하는 trick을 제안했다.</p></li>
-</ul>
-</li>
-<li><p>이러한 StyO는 GAN based 모델에 비해 더 좋은 퀄리티의 이미지를 생성해내고, one-shot face stylization 분야에서 SOTA를 기록했다.</p></li>
-</ul>
-</section>
-<section id="related-work">
-<h2>2. Related Work<a class="headerlink" href="#related-work" title="Permalink to this heading">#</a></h2>
-<section id="diffusion-model">
-<h3>2.1. Diffusion Model<a class="headerlink" href="#diffusion-model" title="Permalink to this heading">#</a></h3>
-<ul class="simple">
-<li><p>GAN이 생성 분야를 장악하던 중 최근 DDPM의 등장으로 Diffusion 모델이 주목을 받기 시작했다.</p></li>
-<li><p>text prompt를 기반으로 manipulated image 생성이 가능해졌지만, detail한 부분까지 control하기에는 한계가 있었다.</p></li>
-<li><p>이 때, StyO는 이미지의 fine한 style 정보까지 transfer 가능한 diffusion model이다.</p></li>
-</ul>
-</section>
-<section id="face-stylization">
-<h3>2.2. Face Stylization<a class="headerlink" href="#face-stylization" title="Permalink to this heading">#</a></h3>
-<ul class="simple">
-<li><p>최근 GAN Based 생성 모델이 좋은 성능을 보이면서 styleGAN을 베이스로 하는 face image style transfer 모델이 좋은 성능을 보여주었다.</p></li>
-<li><p>하지만 real face dataset을 학습한 pretrained checkpoint를 사용하고 이에 대한 의존성이 너무 커 artistic style 정보를 입히는데 한계를 보여준다.</p></li>
-<li><p>StyO는 이러한 한계를 개선한 결과를 보여준다.</p></li>
-</ul>
-</section>
-</section>
-<section id="method">
-<h2>3. Method<a class="headerlink" href="#method" title="Permalink to this heading">#</a></h2>
-<section id="framework-of-styo">
-<h3>3.2. Framework of StyO<a class="headerlink" href="#framework-of-styo" title="Permalink to this heading">#</a></h3>
-<figure class="align-default" id="id1">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img.png"><img alt="StyO_00" class="bg-primary mb-1" src="../../_images/img.png" style="width: 800px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 203 </span><span class="caption-text">Figure 1</span><a class="headerlink" href="#id1" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p>image 간의 style transfer를 위해 <strong>identifier disentaglement learner</strong>과 <strong>fine-grained content controller</strong>를 제안한다.</p></li>
-</ul>
-<p><strong>IDL</strong></p>
-<ul class="simple">
-<li><p>image의 content 정보와 style 정보를 분리하는 방향으로 학습이 진행</p></li>
-<li><p>src 이미지는 <code class="docutils literal notranslate"><span class="pre">&quot;a</span> <span class="pre">drawing</span> <span class="pre">with</span> <span class="pre">$S_{src}$</span> <span class="pre">not</span> <span class="pre">$S_{tgt}$</span> <span class="pre">style</span> <span class="pre">of</span> <span class="pre">$C_{src}$</span> <span class="pre">not</span> <span class="pre">$C_{tgt}$</span> <span class="pre">portrait&quot;</span></code> prompt로 학습 (tgt 이미지는 반대)</p></li>
-</ul>
-<p>⇒ 이미지 간의 Style 정보와 Content 정보가 Disentangle 되고, <span class="math notranslate nohighlight">\(S_{src}\)</span>안에 이미지 A의 Style 정보가, <span class="math notranslate nohighlight">\(C_{tgt}\)</span> 안에 src 이미지의 content 정보가 embedding 되도록 학습</p>
-<ul>
-<li><p>이 때 <span class="math notranslate nohighlight">\(S_{src}\)</span>, <span class="math notranslate nohighlight">\(C_{src}\)</span>에 target 이미지의 conext 정보를 배제함과 동시에<span class="math notranslate nohighlight">\(S_{tgt}\)</span>, <span class="math notranslate nohighlight">\(C_{tgt}\)</span>에 포함하기위해 앞에 negator(=부정의 의미를 가진 단어)를 사용</p>
-<ul class="simple">
-<li><p><em>e.g</em>. <em>not, without, except …</em></p></li>
-</ul>
-</li>
-<li><p>src, tgt 이미지에 추가로 auxiliary 이미지 셋을 구성해 <code class="docutils literal notranslate"><span class="pre">“a</span> <span class="pre">drawing</span> <span class="pre">with</span> <span class="pre">$S_{src}$</span> <span class="pre">not</span> <span class="pre">$S_{tgt}$</span> <span class="pre">style</span> <span class="pre">of</span> <span class="pre">portrait”</span></code> prompt로 학습</p>
-<ul class="simple">
-<li><p><span class="math notranslate nohighlight">\(X_{aux}\)</span> : FFHQ dataset에서 임의로 200장의 데이터를 sampling</p></li>
-</ul>
-</li>
-<li><p>효과</p>
-<ol class="arabic simple">
-<li><p>auxiliary 이미지를 학습함으로써 key prompt간 disentanglement를 향상</p></li>
-<li><p>auxiliary 이미지에는 없는 src 이미지만의 정보를 <span class="math notranslate nohighlight">\(C_{src}\)</span> 에 주입</p></li>
-<li><p>src 이미지의 style과 tgt 이미지의 style을 구별하는데 도움을 줌</p></li>
-</ol>
-</li>
-<li><p>Full Loss</p>
-<figure class="align-default" id="id2">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img114.png"><img alt="StyO_01" class="bg-primary mb-1" src="../../_images/img114.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 204 </span><span class="caption-text">Equation 1</span><a class="headerlink" href="#id2" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</li>
-<li><p>이러한 IDL의 학습만으로 src 이미지와 tgt 이미지의 style transfer가 가능하다.</p>
-<ul>
-<li><p><code class="docutils literal notranslate"><span class="pre">“a</span> <span class="pre">drawing</span> <span class="pre">with</span> <span class="pre">$S_{tgt}$</span> <span class="pre">not</span> <span class="pre">$S_{src}$</span> <span class="pre">style</span> <span class="pre">of</span> <span class="pre">$C_{src}$</span> <span class="pre">not</span> <span class="pre">$C_{tgt}$</span> <span class="pre">portrait”</span></code></p>
-<figure class="align-default" id="id3">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img26.png"><img alt="StyO_02" class="bg-primary mb-1" src="../../_images/img26.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 205 </span><span class="caption-text">Figure 2</span><a class="headerlink" href="#id3" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</li>
-</ul>
-</li>
-<li><p>하지만 위 이미지처럼 src 이미지의 content 정보(head-pose, facial feature)를 잃어버리는 경향이 있다.</p></li>
-<li><p>이러한 문제점을 개선하기위해 <strong>FCC</strong>를 추가로 도입하였다.</p></li>
-</ul>
-<p><strong>FCC</strong></p>
-<ul class="simple">
-<li><p>IDL로 분리된 content 정보와 style 정보를 원하는 방식으로 조합(Recombination)할 때 A의 Content 정보를 유지하도록 하는 Trick</p></li>
-</ul>
-<ol class="arabic">
-<li><p>Cross Attention Control</p>
-<ul>
-<li><p>LDM은 기본적으로 Text 정보를 생성 이미지에 주입하기위해 cross attention mechanism을 사용</p>
-<ul class="simple">
-<li><p><span class="math notranslate nohighlight">\(Attn(z, r) = M(z, r)V\)</span>, <em>z : image latent, r : text embedding</em></p></li>
-</ul>
-</li>
-<li><p>이 때 “prompt-to-promt” paper에서 <strong>attention map M의 값이 생성 이미지의 Layout에 강한 영향을 미친다</strong>는 점을 확인</p></li>
-<li><p>따라서 src 이미지의 attention mask를 generate 과정에 주입합으로써 content 정보를 좀 더 잘 유지하도록 유도</p></li>
-<li><p>단, attention map의 모든 값을 replace하지않고, content에 관한 Index만 선택적으로 replace</p>
-<ul>
-<li><p>content index : ‘<span class="math notranslate nohighlight">\(C_{src}\)</span><code class="docutils literal notranslate"><span class="pre">,</span> </code>not<code class="docutils literal notranslate"><span class="pre">,</span> </code><span class="math notranslate nohighlight">\(C_{tgt}\)</span><code class="docutils literal notranslate"><span class="pre">,</span> </code>portrait`</p>
-<figure class="align-default" id="id4">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img36.png"><img alt="StyO_03" class="bg-primary mb-1" src="../../_images/img36.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 206 </span><span class="caption-text">Equation 3</span><a class="headerlink" href="#id4" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</li>
-</ul>
-</li>
-</ul>
-</li>
-<li><p>Augmented Text Prompt</p>
-<ul class="simple">
-<li><p>training time에서 key prompt를 n번 사용함으로서 생성되는 이미지에 context 정보를 강하게 주입</p>
-<ul>
-<li><p>src 이미지는 <code class="docutils literal notranslate"><span class="pre">“a</span> <span class="pre">drawing</span> <span class="pre">with</span> <span class="pre">($S_{src}$</span> <span class="pre">not</span> <span class="pre">$S_{tgt}$)</span> <span class="pre">*</span> <span class="pre">$n_{s}$</span> <span class="pre">style</span> <span class="pre">of</span> <span class="pre">($C_{src}$</span> <span class="pre">not</span> <span class="pre">$C_{tgt}$)</span> <span class="pre">*</span> <span class="pre">$n_{c}$</span> <span class="pre">portrait”</span></code> (tgt 이미지는 반대)</p></li>
-</ul>
-</li>
-<li><p>실험상 hyperparameter <span class="math notranslate nohighlight">\(n_{s}\)</span>와 <span class="math notranslate nohighlight">\(n_{c}\)</span>는 3 이하의 값을 추천</p></li>
-</ul>
-</li>
-</ol>
-</section>
-</section>
-<section id="experiments">
-<h2>4. Experiments<a class="headerlink" href="#experiments" title="Permalink to this heading">#</a></h2>
-<p><strong>Implementation Details</strong></p>
-<ul class="simple">
-<li><p>base model : Pretrained LDM model checkpoint (trained by LAION-5B)</p></li>
-<li><p>hyper parameter</p>
-<ul>
-<li><p>key prompt : “ak47”, “aug”, “sks”, m4a1”</p></li>
-<li><p>Learning rate : 1e-6</p></li>
-<li><p>Optimizer : Adam</p></li>
-<li><p>train step : 400</p></li>
-<li><p><span class="math notranslate nohighlight">\(n_{s}\)</span> : 3, <span class="math notranslate nohighlight">\(n_{c}\)</span> : 1</p></li>
-<li><p>나머지는 LDM과 동일</p></li>
-</ul>
-</li>
-</ul>
-<p><strong>Comparison with SOTA methods</strong></p>
-<figure class="align-default" id="id5">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img45.png"><img alt="StyO_04" class="bg-primary mb-1" src="../../_images/img45.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 207 </span><span class="caption-text">Figure 3</span><a class="headerlink" href="#id5" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul>
-<li><p>StyO가 src 이미지의 face identity와 local detail 모두 잘 유지함과 동시에, style 정보를 자연스럽게 입힌 결과물을 생성해낸다.</p></li>
-<li><p>User Study도 다른 모델들에 비해 좋은 결과를 보였다.</p>
-<figure class="align-default" id="id6">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img55.png"><img alt="StyO_05" class="bg-primary mb-1" src="../../_images/img55.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 208 </span><span class="caption-text">Table 1</span><a class="headerlink" href="#id6" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</li>
-</ul>
-<p><strong>Ablation Study</strong></p>
-<ol class="arabic">
-<li><p><em>Effect of Contrastive Disentangled Prompt Template</em></p>
-<ul>
-<li><p>negative prompt 없이 positive prompt만 넣고 학습할경우 학습 이미지의 overfitting이 심하고, style과 content 정보의 분리에 어려움을 보인다.</p>
-<figure class="align-default" id="id7">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img65.png"><img alt="StyO_06" class="bg-primary mb-1" src="../../_images/img65.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 209 </span><span class="caption-text">Figure 4</span><a class="headerlink" href="#id7" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</li>
-<li><p>또, source 이미지의 local detail을 유지하기위해 auxiliary set의 trick도 적용하는것이 Best Quality의 결과물을 생성해냈다.</p></li>
-</ul>
-</li>
-<li><p><em>Effect of Fine-grained Content Controller</em></p>
-<ul>
-<li><p>FCC 없이 Inference할 경우 generated 이미지의 높은 diversity를 보이지만, FCC를 포함할 경우 src 이미지의 fidelity가 높아져 좀더 significant한 이미지가 생성되는것을 보여주었다.</p>
-<figure class="align-default" id="id8">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img75.png"><img alt="StyO_07" class="bg-primary mb-1" src="../../_images/img75.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 210 </span><span class="caption-text">Figure 5</span><a class="headerlink" href="#id8" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</li>
-</ul>
-</li>
-<li><p><em>Hyper-parameters in Augmented Text Prompt</em></p>
-<ul class="simple">
-<li><p><span class="math notranslate nohighlight">\(n_{s}\)</span> 값이 커질수록 이미지가 photorealistic에서 artistic하게 바뀌고, <span class="math notranslate nohighlight">\(n_{c}\)</span>도 마찬가지로 값이 커질수록 src 이미지에 overfitting된 이미지가 나오는 경향을 보여주었다.</p></li>
-</ul>
-</li>
-</ol>
-</section>
-<section id="conclusion">
-<h2>5. Conclusion<a class="headerlink" href="#conclusion" title="Permalink to this heading">#</a></h2>
-<ul class="simple">
-<li><p>StyO는 IDL과 FCC를 사용해 기존 GAN을 이용한 SOTA 모델들보다 더 자연스럽고 Quality 좋은 style transfered 이미지를 생성해낼 수 있었다.</p></li>
-<li><p><strong>단, style 하나의 transfer를 위해 single GPU로 10분이 걸리므로 time-efficiency가 좋지 못하다는 단점이 있다.</strong></p></li>
-</ul>
-</section>
-</section>
-
-    <script type="text/x-thebe-config">
-    {
-        requestKernel: true,
-        binderOptions: {
-            repo: "binder-examples/jupyter-stacks-datascience",
-            ref: "master",
-        },
-        codeMirrorConfig: {
-            theme: "abcdef",
-            mode: "python"
-        },
-        kernelOptions: {
-            name: "python3",
-            path: "./docs/review"
-        },
-        predefinedOutput: true
-    }
-    </script>
-    <script>kernelName = 'python3'</script>
-
-                </article>
-              
-
-              
-              
-              
-              
-                <footer class="prev-next-footer d-print-none">
-                  
-<div class="prev-next-area">
-    <a class="left-prev"
-       href="I-DDPM.html"
-       title="previous page">
-      <i class="fa-solid fa-angle-left"></i>
-      <div class="prev-next-info">
-        <p class="prev-next-subtitle">previous</p>
-        <p class="prev-next-title">I-DDPM</p>
-      </div>
-    </a>
-    <a class="right-next"
-       href="imagen.html"
-       title="next page">
-      <div class="prev-next-info">
-        <p class="prev-next-subtitle">next</p>
-        <p class="prev-next-title">Imagen</p>
-      </div>
-      <i class="fa-solid fa-angle-right"></i>
-    </a>
-</div>
-                </footer>
-              
-            </div>
-            
-            
-              
-                <div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
-
-
-  <div class="sidebar-secondary-item">
-  <div class="page-toc tocsection onthispage">
-    <i class="fa-solid fa-list"></i> Contents
-  </div>
-  <nav class="bd-toc-nav page-toc">
-    <ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#abstract">Abstract</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">1. Introduction</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#related-work">2. Related Work</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#diffusion-model">2.1. Diffusion Model</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#face-stylization">2.2. Face Stylization</a></li>
-</ul>
-</li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#method">3. Method</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#framework-of-styo">3.2. Framework of StyO</a></li>
-</ul>
-</li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#experiments">4. Experiments</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#conclusion">5. Conclusion</a></li>
-</ul>
-  </nav></div>
-
-</div></div>
-              
-            
-          </div>
-          <footer class="bd-footer-content">
-            
-<div class="bd-footer-content__inner container">
-  
-  <div class="footer-item">
-    
-<p class="component-author">
-By PseudoLab
-</p>
-
-  </div>
-  
-  <div class="footer-item">
-    
-
-  <p class="copyright">
-    
-      © Copyright 2022.
-      <br/>
-    
-  </p>
-
-  </div>
-  
-  <div class="footer-item">
-    
-  </div>
-  
-  <div class="footer-item">
-    
-  </div>
-  
-</div>
-          </footer>
-        
-
-      </main>
-    </div>
-  </div>
-  
-  <!-- Scripts loaded after <body> so the DOM is not blocked -->
-  <script src="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b"></script>
-<script src="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b"></script>
-
-  <footer class="bd-footer">
-  </footer>
-  </body>
+
+
+<!DOCTYPE html>
+
+
+<html lang="en" data-content_root="" >
+
+  <head>
+    <meta charset="utf-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.17.1: http://docutils.sourceforge.net/" />
+
+    <title>StyO &#8212; Text-to-Image Generation-feat-Diffusion</title>
+  
+  
+  
+  <script data-cfasync="false">
+    document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
+    document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
+  </script>
+  
+  <!-- Loaded before other Sphinx assets -->
+  <link href="../../_static/styles/theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+<link href="../../_static/styles/bootstrap.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+<link href="../../_static/styles/pydata-sphinx-theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+
+  
+  <link href="../../_static/vendor/fontawesome/6.5.2/css/all.min.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+  <link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.woff2" />
+<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.woff2" />
+<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.woff2" />
+
+    <link rel="stylesheet" type="text/css" href="../../_static/pygments.css" />
+    <link rel="stylesheet" href="../../_static/styles/sphinx-book-theme.css?digest=14f4ca6b54d191a8c7657f6c759bf11a5fb86285" type="text/css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/togglebutton.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/mystnb.4510f1fc1dee50b3e5859aac5469c37c29e427902b24a333a5f9fcb2f0b3ac41.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/sphinx-thebe.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/design-style.4045f2051d55cab465a707391d5b2007.min.css" />
+  
+  <!-- Pre-loaded scripts that we'll load fully later -->
+  <link rel="preload" as="script" href="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b" />
+<link rel="preload" as="script" href="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b" />
+  <script src="../../_static/vendor/fontawesome/6.5.2/js/all.min.js?digest=dfe6caa3a7d634c4db9b"></script>
+
+    <script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
+    <script src="../../_static/jquery.js"></script>
+    <script src="../../_static/underscore.js"></script>
+    <script src="../../_static/_sphinx_javascript_frameworks_compat.js"></script>
+    <script src="../../_static/doctools.js"></script>
+    <script src="../../_static/clipboard.min.js"></script>
+    <script src="../../_static/copybutton.js"></script>
+    <script src="../../_static/scripts/sphinx-book-theme.js?digest=5a5c038af52cf7bc1a1ec88eea08e6366ee68824"></script>
+    <script>let toggleHintShow = 'Click to show';</script>
+    <script>let toggleHintHide = 'Click to hide';</script>
+    <script>let toggleOpenOnPrint = 'true';</script>
+    <script src="../../_static/togglebutton.js"></script>
+    <script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
+    <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
+    <script src="../../_static/design-tabs.js"></script>
+    <script>const THEBE_JS_URL = "https://unpkg.com/thebe@0.8.2/lib/index.js"
+const thebe_selector = ".thebe,.cell"
+const thebe_selector_input = "pre"
+const thebe_selector_output = ".output, .cell_output"
+</script>
+    <script async="async" src="../../_static/sphinx-thebe.js"></script>
+    <script>window.MathJax = {"options": {"processHtmlClass": "tex2jax_process|mathjax_process|math|output_area"}}</script>
+    <script defer="defer" src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
+    <script>DOCUMENTATION_OPTIONS.pagename = 'docs/review/StyO';</script>
+    <link rel="shortcut icon" href="../../_static/PseudoLab_logo.png"/>
+    <link rel="index" title="Index" href="../../genindex.html" />
+    <link rel="search" title="Search" href="../../search.html" />
+    <link rel="next" title="Imagen" href="imagen.html" />
+    <link rel="prev" title="I-DDPM" href="I-DDPM.html" />
+  <meta name="viewport" content="width=device-width, initial-scale=1"/>
+  <meta name="docsearch:language" content="en"/>
+  </head>
+  
+  
+  <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
+
+  
+  
+  <div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
+  
+  <div id="pst-scroll-pixel-helper"></div>
+  
+  <button type="button" class="btn rounded-pill" id="pst-back-to-top">
+    <i class="fa-solid fa-arrow-up"></i>Back to top</button>
+
+  
+  <input type="checkbox"
+          class="sidebar-toggle"
+          id="pst-primary-sidebar-checkbox"/>
+  <label class="overlay overlay-primary" for="pst-primary-sidebar-checkbox"></label>
+  
+  <input type="checkbox"
+          class="sidebar-toggle"
+          id="pst-secondary-sidebar-checkbox"/>
+  <label class="overlay overlay-secondary" for="pst-secondary-sidebar-checkbox"></label>
+  
+  <div class="search-button__wrapper">
+    <div class="search-button__overlay"></div>
+    <div class="search-button__search-container">
+<form class="bd-search d-flex align-items-center"
+      action="../../search.html"
+      method="get">
+  <i class="fa-solid fa-magnifying-glass"></i>
+  <input type="search"
+         class="form-control"
+         name="q"
+         id="search-input"
+         placeholder="Search this book..."
+         aria-label="Search this book..."
+         autocomplete="off"
+         autocorrect="off"
+         autocapitalize="off"
+         spellcheck="false"/>
+  <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
+</form></div>
+  </div>
+
+  <div class="pst-async-banner-revealer d-none">
+  <aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
+</div>
+
+  
+    <header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
+    </header>
+  
+
+  <div class="bd-container">
+    <div class="bd-container__inner bd-page-width">
+      
+      
+      
+      <div class="bd-sidebar-primary bd-sidebar">
+        
+
+  
+  <div class="sidebar-header-items sidebar-primary__section">
+    
+    
+    
+    
+  </div>
+  
+    <div class="sidebar-primary-items__start sidebar-primary__section">
+        <div class="sidebar-primary-item">
+
+  
+    
+  
+
+<a class="navbar-brand logo" href="../../intro.html">
+  
+  
+  
+  
+  
+    
+    
+      
+    
+    
+    <img src="../../_static/PseudoLab_logo.png" class="logo__image only-light" alt="Text-to-Image Generation-feat-Diffusion - Home"/>
+    <script>document.write(`<img src="../../_static/PseudoLab_logo.png" class="logo__image only-dark" alt="Text-to-Image Generation-feat-Diffusion - Home"/>`);</script>
+  
+  
+</a></div>
+        <div class="sidebar-primary-item"><nav class="bd-links" id="bd-docs-nav" aria-label="Main">
+    <div class="bd-toc-item navbar-nav active">
+        
+        <ul class="nav bd-sidenav bd-sidenav__home-link">
+            <li class="toctree-l1">
+                <a class="reference internal" href="../../intro.html">
+                    Welcome to PseudoDiffusers!!
+                </a>
+            </li>
+        </ul>
+        <p aria-level="2" class="caption" role="heading"><span class="caption-text">Preliminary Works</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="vae.html">VAE</a></li>
+<li class="toctree-l1"><a class="reference internal" href="gan.html">GAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DDPM.html">DDPM</a></li>
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DDIM.html">DDIM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="A_Study_on_the_Evaluation_of_Generative_Models.html">A Study on the Evaluation of Generative Models</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Image Generation</span></p>
+<ul class="current nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="cycleGAN.html">CycleGAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="StyleGAN.html">StyleGAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="diffusion_beats_GANs.html">Diffusion Models Beat GANs on Image Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="dalle.html">DALL-E</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DALLE2.html">DALL-E 2</a></li>
+<li class="toctree-l1"><a class="reference internal" href="dreambooth.html">DreamBooth</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ControlNet.html">ControlNet</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Latent_Diffusion_Model.html">Introduction</a></li>
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Textual_Inversion.html">Textual Inversion</a></li>
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="CustomDiffusion.html">Custom Diffusion</a></li>
+<li class="toctree-l1"><a class="reference internal" href="LoRA.html">LoRA</a></li>
+
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="I-DDPM.html">I-DDPM</a></li>
+<li class="toctree-l1 current active"><a class="current reference internal" href="#">StyO</a></li>
+<li class="toctree-l1"><a class="reference internal" href="imagen.html">Imagen</a></li>
+<li class="toctree-l1"><a class="reference internal" href="imagen_editor.html">Imagen Editor</a></li>
+<li class="toctree-l1"><a class="reference internal" href="SDEdit.html">SDEdit</a></li>
+<li class="toctree-l1"><a class="reference internal" href="SDXL.html">SDXL</a></li>
+<li class="toctree-l1"><a class="reference internal" href="t2i_adapter.html">T2I-Adapter</a></li>
+<li class="toctree-l1"><a class="reference internal" href="IP_Adapter.html">IP-Adapter</a></li>
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="HyperDreamBooth.html">HyperDreamBooth</a></li>
+<li class="toctree-l1"><a class="reference internal" href="CM3leon.html">CM3leon</a></li>
+
+<li class="toctree-l1"><a class="reference internal" href="Synthetic_Data_from_Diffusion_Models_Improves_ImageNet_Classification.html">Synthetic Data from Diffusion Models Improves ImageNet Classification</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="GLIDE.html">GLIDE</a></li>
+<li class="toctree-l1"><a class="reference internal" href="BBDM.html">BBDM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Your_Diffusion_Model_is_Secretly_a_Zero_Shot_Classifier.html">Your Diffusion Model is Secretly a Zero-Shot Classifier</a></li>
+<li class="toctree-l1"><a class="reference internal" href="progressive_distillation.html">Progressive Distillation for Fast Sampling of Diffusion Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ConceptLab.html">ConceptLab</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Diffusion_models_already_have_a_Semantic_Latent_Space.html">Diffusion Models already have a Semantic Latent Space</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Muse.html">Muse</a></li>
+
+
+<li class="toctree-l1"><a class="reference internal" href="GIGAGAN.html">Scaling up GANs for Text-to-Image Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="consistency_models.html">Consistency Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="latent_consistency_models.html">Latent Consistency Models</a></li>
+
+<li class="toctree-l1"><a class="reference internal" href="LLM_grounded_Diffusion.html">LLM Grounded Diffusion</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DiT.html">DiT</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="one-step-image-translation.html">One-Step Image Translation with Text-to-Image Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="LCM-LoRA.html">LCM-LoRA: A Universal Stable-Diffusion Acceleration Module</a></li>
+
+
+<li class="toctree-l1"><a class="reference internal" href="MimicBrush.html">MimicBrush: Zero-shot Image Editing with Reference Imitation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="one_step_diffusion_with_distribution_matching_distillation.html">One-step Diffusion with Distribution Matching Distillation</a></li>
+
+
+
+
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Video Generation</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="Make_A_Video.html">Make A Video</a></li>
+<li class="toctree-l1"><a class="reference internal" href="VideoLDM.html">VideoLDM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="AnimateDiff.html">AnimateDiff</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Animate_Anyone.html">Animate Anyone</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreaMoving.html">DreaMoving</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreamPose.html">DreamPose: Fashion Image-to-Video Synthesis via Stable Diffusion</a></li>
+
+
+
+
+
+
+
+
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">3D Generation</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="NeRF.html">NeRF : Representing Scenes as Neural Radiance Fields for View Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="3DGS.html">3D Gaussian Splatting for Real-Time Radiance Field Rendering</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Point_E.html">Point-E: A System for Generating 3D Point Clouds from Complex Prompts (Arxiv 2022)</a></li>
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Shap-E.html">Shap-E</a></li>
+
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DreamFusion.html"><strong>DreamFusion</strong></a></li>
+<li class="toctree-l1"><a class="reference internal" href="magic-3d.html">Magic3D</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreamBooth3D.html">Dream Booth 3D</a></li>
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="zero123plus.html">Zero123++</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ProlificDreamer.html">ProlificDreamer: High-Fidelity and Diverse Text-to-3D Generation with Variational Score Distillation</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DreamGaussian.html">DreamGaussian</a></li>
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Coin3D.html">Coin3D</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Experiments</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="../experiments/js_exp.html">Synthetic Data with Stable Diffusion for Foliar Disease Classification</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../experiments/swjo_exp.html">Training DreamBooth on Naver Webtoon Face Dataset</a></li>
+</ul>
+
+    </div>
+</nav></div>
+    </div>
+  
+  
+  <div class="sidebar-primary-items__end sidebar-primary__section">
+  </div>
+  
+  <div id="rtd-footer-container"></div>
+
+
+      </div>
+      
+      <main id="main-content" class="bd-main" role="main">
+        
+        
+
+<div class="sbt-scroll-pixel-helper"></div>
+
+          <div class="bd-content">
+            <div class="bd-article-container">
+              
+              <div class="bd-header-article d-print-none">
+<div class="header-article-items header-article__inner">
+  
+    <div class="header-article-items__start">
+      
+        <div class="header-article-item"><label class="sidebar-toggle primary-toggle btn btn-sm" for="__primary" title="Toggle primary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
+  <span class="fa-solid fa-bars"></span>
+</label></div>
+      
+    </div>
+  
+  
+    <div class="header-article-items__end">
+      
+        <div class="header-article-item">
+
+<div class="article-header-buttons">
+
+
+
+
+
+<div class="dropdown dropdown-source-buttons">
+  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Source repositories">
+    <i class="fab fa-github"></i>
+  </button>
+  <ul class="dropdown-menu">
+      
+      
+      
+      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion" target="_blank"
+   class="btn btn-sm btn-source-repository-button dropdown-item"
+   title="Source repository"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fab fa-github"></i>
+  </span>
+<span class="btn__text-container">Repository</span>
+</a>
+</li>
+      
+      
+      
+      
+      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion/issues/new?title=Issue%20on%20page%20%2Fdocs/review/StyO.html&body=Your%20issue%20content%20here." target="_blank"
+   class="btn btn-sm btn-source-issues-button dropdown-item"
+   title="Open an issue"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-lightbulb"></i>
+  </span>
+<span class="btn__text-container">Open issue</span>
+</a>
+</li>
+      
+  </ul>
+</div>
+
+
+
+
+
+
+<div class="dropdown dropdown-download-buttons">
+  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Download this page">
+    <i class="fas fa-download"></i>
+  </button>
+  <ul class="dropdown-menu">
+      
+      
+      
+      <li><a href="../../_sources/docs/review/StyO.md" target="_blank"
+   class="btn btn-sm btn-download-source-button dropdown-item"
+   title="Download source file"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-file"></i>
+  </span>
+<span class="btn__text-container">.md</span>
+</a>
+</li>
+      
+      
+      
+      
+      <li>
+<button onclick="window.print()"
+  class="btn btn-sm btn-download-pdf-button dropdown-item"
+  title="Print to PDF"
+  data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-file-pdf"></i>
+  </span>
+<span class="btn__text-container">.pdf</span>
+</button>
+</li>
+      
+  </ul>
+</div>
+
+
+
+
+<button onclick="toggleFullScreen()"
+  class="btn btn-sm btn-fullscreen-button"
+  title="Fullscreen mode"
+  data-bs-placement="bottom" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-expand"></i>
+  </span>
+
+</button>
+
+
+
+<script>
+document.write(`
+  <button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light"></i>
+    <i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark"></i>
+    <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"></i>
+  </button>
+`);
+</script>
+
+
+<script>
+document.write(`
+  <button class="btn btn-sm pst-navbar-icon search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <i class="fa-solid fa-magnifying-glass fa-lg"></i>
+  </button>
+`);
+</script>
+<label class="sidebar-toggle secondary-toggle btn btn-sm" for="__secondary"title="Toggle secondary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <span class="fa-solid fa-list"></span>
+</label>
+</div></div>
+      
+    </div>
+  
+</div>
+</div>
+              
+              
+
+<div id="jb-print-docs-body" class="onlyprint">
+    <h1>StyO</h1>
+    <!-- Table of contents -->
+    <div id="print-main-content">
+        <div id="jb-print-toc">
+            
+            <div>
+                <h2> Contents </h2>
+            </div>
+            <nav aria-label="Page">
+                <ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#abstract">Abstract</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">1. Introduction</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#related-work">2. Related Work</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#diffusion-model">2.1. Diffusion Model</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#face-stylization">2.2. Face Stylization</a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#method">3. Method</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#framework-of-styo">3.2. Framework of StyO</a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#experiments">4. Experiments</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#conclusion">5. Conclusion</a></li>
+</ul>
+            </nav>
+        </div>
+    </div>
+</div>
+
+              
+                
+<div id="searchbox"></div>
+                <article class="bd-article">
+                  
+  <div class="admonition-information admonition">
+<p class="admonition-title">Information</p>
+<ul class="simple">
+<li><p><strong>Title:</strong> StyO: Stylize Your Face in Only One-Shot</p></li>
+<li><p><strong>Reference</strong></p>
+<ul>
+<li><p>Paper:  <a class="reference external" href="https://arxiv.org/abs/2303.03231">https://arxiv.org/abs/2303.03231</a></p></li>
+</ul>
+</li>
+<li><p><strong>Author:</strong> Seunghwan Ji</p></li>
+<li><p><strong>Last updated on Aug. 6, 2023</strong></p></li>
+</ul>
+</div>
+<section class="tex2jax_ignore mathjax_ignore" id="styo">
+<h1>StyO<a class="headerlink" href="#styo" title="Permalink to this heading">#</a></h1>
+<section id="abstract">
+<h2>Abstract<a class="headerlink" href="#abstract" title="Permalink to this heading">#</a></h2>
+<ul class="simple">
+<li><p>“<strong>Sty</strong>lize the face in only <strong>O</strong>ne-shot.”</p></li>
+<li><p>한장의 이미지만으로 다른 이미지로 스타일을 Transfer!</p></li>
+</ul>
+</section>
+<section id="introduction">
+<h2>1. Introduction<a class="headerlink" href="#introduction" title="Permalink to this heading">#</a></h2>
+<ul class="simple">
+<li><p>현재 다양한 분야에서 이미지에 특정 스타일을 입히고자하는 연구들이 활발히 진행중이다.</p></li>
+<li><p>이전까지의 연구들은 대부분 각각의 source 이미지, target 이미지 한장씩을 사용해 GAN based model을 활용하려는 식이 주를 이루었다.</p></li>
+<li><p>단 이러한 방식에는 한계가 있는데,</p>
+<ol class="arabic simple">
+<li><p>Real Face를 학습한 pre-trained GAN 모델의 의존도가 너무 커서 Style을 입히기 힘들다.</p></li>
+<li><p>latent space안에서 Content 정보와 Style 정보가 Entangle 되어있다.</p></li>
+</ol>
+</li>
+<li><p><strong>StyO는?</strong></p>
+<ul>
+<li><p>GAN 대신 Data의 Distribution을 더 잘 포용하는 Latent Diffusion Model을 Base모델로 채용한다.</p></li>
+<li><p>총 2 Stage로 구성되는데</p>
+<ol class="arabic simple">
+<li><p>Identifier Disentanglement Learner(IDL)</p>
+<ul>
+<li><p>이미지의 content 정보와 Style 정보를 분리</p></li>
+</ul>
+</li>
+<li><p>Fine-grained Content Controller(FCC)</p>
+<ul>
+<li><p>IDL로부터 분리된 Content와 Style을 원하는대로 재조합</p></li>
+</ul>
+</li>
+</ol>
+</li>
+<li><p>추가로 src 이미지의 detail한 정보(head-pose, hair color 등)를 유지하기위해 Generate 과정에서 src 이미지의 attention map을 재사용하는 trick을 제안했다.</p></li>
+</ul>
+</li>
+<li><p>이러한 StyO는 GAN based 모델에 비해 더 좋은 퀄리티의 이미지를 생성해내고, one-shot face stylization 분야에서 SOTA를 기록했다.</p></li>
+</ul>
+</section>
+<section id="related-work">
+<h2>2. Related Work<a class="headerlink" href="#related-work" title="Permalink to this heading">#</a></h2>
+<section id="diffusion-model">
+<h3>2.1. Diffusion Model<a class="headerlink" href="#diffusion-model" title="Permalink to this heading">#</a></h3>
+<ul class="simple">
+<li><p>GAN이 생성 분야를 장악하던 중 최근 DDPM의 등장으로 Diffusion 모델이 주목을 받기 시작했다.</p></li>
+<li><p>text prompt를 기반으로 manipulated image 생성이 가능해졌지만, detail한 부분까지 control하기에는 한계가 있었다.</p></li>
+<li><p>이 때, StyO는 이미지의 fine한 style 정보까지 transfer 가능한 diffusion model이다.</p></li>
+</ul>
+</section>
+<section id="face-stylization">
+<h3>2.2. Face Stylization<a class="headerlink" href="#face-stylization" title="Permalink to this heading">#</a></h3>
+<ul class="simple">
+<li><p>최근 GAN Based 생성 모델이 좋은 성능을 보이면서 styleGAN을 베이스로 하는 face image style transfer 모델이 좋은 성능을 보여주었다.</p></li>
+<li><p>하지만 real face dataset을 학습한 pretrained checkpoint를 사용하고 이에 대한 의존성이 너무 커 artistic style 정보를 입히는데 한계를 보여준다.</p></li>
+<li><p>StyO는 이러한 한계를 개선한 결과를 보여준다.</p></li>
+</ul>
+</section>
+</section>
+<section id="method">
+<h2>3. Method<a class="headerlink" href="#method" title="Permalink to this heading">#</a></h2>
+<section id="framework-of-styo">
+<h3>3.2. Framework of StyO<a class="headerlink" href="#framework-of-styo" title="Permalink to this heading">#</a></h3>
+<figure class="align-default" id="id1">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img.png"><img alt="StyO_00" class="bg-primary mb-1" src="../../_images/img.png" style="width: 800px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 203 </span><span class="caption-text">Figure 1</span><a class="headerlink" href="#id1" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p>image 간의 style transfer를 위해 <strong>identifier disentaglement learner</strong>과 <strong>fine-grained content controller</strong>를 제안한다.</p></li>
+</ul>
+<p><strong>IDL</strong></p>
+<ul class="simple">
+<li><p>image의 content 정보와 style 정보를 분리하는 방향으로 학습이 진행</p></li>
+<li><p>src 이미지는 <code class="docutils literal notranslate"><span class="pre">&quot;a</span> <span class="pre">drawing</span> <span class="pre">with</span> <span class="pre">$S_{src}$</span> <span class="pre">not</span> <span class="pre">$S_{tgt}$</span> <span class="pre">style</span> <span class="pre">of</span> <span class="pre">$C_{src}$</span> <span class="pre">not</span> <span class="pre">$C_{tgt}$</span> <span class="pre">portrait&quot;</span></code> prompt로 학습 (tgt 이미지는 반대)</p></li>
+</ul>
+<p>⇒ 이미지 간의 Style 정보와 Content 정보가 Disentangle 되고, <span class="math notranslate nohighlight">\(S_{src}\)</span>안에 이미지 A의 Style 정보가, <span class="math notranslate nohighlight">\(C_{tgt}\)</span> 안에 src 이미지의 content 정보가 embedding 되도록 학습</p>
+<ul>
+<li><p>이 때 <span class="math notranslate nohighlight">\(S_{src}\)</span>, <span class="math notranslate nohighlight">\(C_{src}\)</span>에 target 이미지의 conext 정보를 배제함과 동시에<span class="math notranslate nohighlight">\(S_{tgt}\)</span>, <span class="math notranslate nohighlight">\(C_{tgt}\)</span>에 포함하기위해 앞에 negator(=부정의 의미를 가진 단어)를 사용</p>
+<ul class="simple">
+<li><p><em>e.g</em>. <em>not, without, except …</em></p></li>
+</ul>
+</li>
+<li><p>src, tgt 이미지에 추가로 auxiliary 이미지 셋을 구성해 <code class="docutils literal notranslate"><span class="pre">“a</span> <span class="pre">drawing</span> <span class="pre">with</span> <span class="pre">$S_{src}$</span> <span class="pre">not</span> <span class="pre">$S_{tgt}$</span> <span class="pre">style</span> <span class="pre">of</span> <span class="pre">portrait”</span></code> prompt로 학습</p>
+<ul class="simple">
+<li><p><span class="math notranslate nohighlight">\(X_{aux}\)</span> : FFHQ dataset에서 임의로 200장의 데이터를 sampling</p></li>
+</ul>
+</li>
+<li><p>효과</p>
+<ol class="arabic simple">
+<li><p>auxiliary 이미지를 학습함으로써 key prompt간 disentanglement를 향상</p></li>
+<li><p>auxiliary 이미지에는 없는 src 이미지만의 정보를 <span class="math notranslate nohighlight">\(C_{src}\)</span> 에 주입</p></li>
+<li><p>src 이미지의 style과 tgt 이미지의 style을 구별하는데 도움을 줌</p></li>
+</ol>
+</li>
+<li><p>Full Loss</p>
+<figure class="align-default" id="id2">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img114.png"><img alt="StyO_01" class="bg-primary mb-1" src="../../_images/img114.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 204 </span><span class="caption-text">Equation 1</span><a class="headerlink" href="#id2" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</li>
+<li><p>이러한 IDL의 학습만으로 src 이미지와 tgt 이미지의 style transfer가 가능하다.</p>
+<ul>
+<li><p><code class="docutils literal notranslate"><span class="pre">“a</span> <span class="pre">drawing</span> <span class="pre">with</span> <span class="pre">$S_{tgt}$</span> <span class="pre">not</span> <span class="pre">$S_{src}$</span> <span class="pre">style</span> <span class="pre">of</span> <span class="pre">$C_{src}$</span> <span class="pre">not</span> <span class="pre">$C_{tgt}$</span> <span class="pre">portrait”</span></code></p>
+<figure class="align-default" id="id3">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img26.png"><img alt="StyO_02" class="bg-primary mb-1" src="../../_images/img26.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 205 </span><span class="caption-text">Figure 2</span><a class="headerlink" href="#id3" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</li>
+</ul>
+</li>
+<li><p>하지만 위 이미지처럼 src 이미지의 content 정보(head-pose, facial feature)를 잃어버리는 경향이 있다.</p></li>
+<li><p>이러한 문제점을 개선하기위해 <strong>FCC</strong>를 추가로 도입하였다.</p></li>
+</ul>
+<p><strong>FCC</strong></p>
+<ul class="simple">
+<li><p>IDL로 분리된 content 정보와 style 정보를 원하는 방식으로 조합(Recombination)할 때 A의 Content 정보를 유지하도록 하는 Trick</p></li>
+</ul>
+<ol class="arabic">
+<li><p>Cross Attention Control</p>
+<ul>
+<li><p>LDM은 기본적으로 Text 정보를 생성 이미지에 주입하기위해 cross attention mechanism을 사용</p>
+<ul class="simple">
+<li><p><span class="math notranslate nohighlight">\(Attn(z, r) = M(z, r)V\)</span>, <em>z : image latent, r : text embedding</em></p></li>
+</ul>
+</li>
+<li><p>이 때 “prompt-to-promt” paper에서 <strong>attention map M의 값이 생성 이미지의 Layout에 강한 영향을 미친다</strong>는 점을 확인</p></li>
+<li><p>따라서 src 이미지의 attention mask를 generate 과정에 주입합으로써 content 정보를 좀 더 잘 유지하도록 유도</p></li>
+<li><p>단, attention map의 모든 값을 replace하지않고, content에 관한 Index만 선택적으로 replace</p>
+<ul>
+<li><p>content index : ‘<span class="math notranslate nohighlight">\(C_{src}\)</span><code class="docutils literal notranslate"><span class="pre">,</span> </code>not<code class="docutils literal notranslate"><span class="pre">,</span> </code><span class="math notranslate nohighlight">\(C_{tgt}\)</span><code class="docutils literal notranslate"><span class="pre">,</span> </code>portrait`</p>
+<figure class="align-default" id="id4">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img36.png"><img alt="StyO_03" class="bg-primary mb-1" src="../../_images/img36.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 206 </span><span class="caption-text">Equation 3</span><a class="headerlink" href="#id4" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</li>
+</ul>
+</li>
+</ul>
+</li>
+<li><p>Augmented Text Prompt</p>
+<ul class="simple">
+<li><p>training time에서 key prompt를 n번 사용함으로서 생성되는 이미지에 context 정보를 강하게 주입</p>
+<ul>
+<li><p>src 이미지는 <code class="docutils literal notranslate"><span class="pre">“a</span> <span class="pre">drawing</span> <span class="pre">with</span> <span class="pre">($S_{src}$</span> <span class="pre">not</span> <span class="pre">$S_{tgt}$)</span> <span class="pre">*</span> <span class="pre">$n_{s}$</span> <span class="pre">style</span> <span class="pre">of</span> <span class="pre">($C_{src}$</span> <span class="pre">not</span> <span class="pre">$C_{tgt}$)</span> <span class="pre">*</span> <span class="pre">$n_{c}$</span> <span class="pre">portrait”</span></code> (tgt 이미지는 반대)</p></li>
+</ul>
+</li>
+<li><p>실험상 hyperparameter <span class="math notranslate nohighlight">\(n_{s}\)</span>와 <span class="math notranslate nohighlight">\(n_{c}\)</span>는 3 이하의 값을 추천</p></li>
+</ul>
+</li>
+</ol>
+</section>
+</section>
+<section id="experiments">
+<h2>4. Experiments<a class="headerlink" href="#experiments" title="Permalink to this heading">#</a></h2>
+<p><strong>Implementation Details</strong></p>
+<ul class="simple">
+<li><p>base model : Pretrained LDM model checkpoint (trained by LAION-5B)</p></li>
+<li><p>hyper parameter</p>
+<ul>
+<li><p>key prompt : “ak47”, “aug”, “sks”, m4a1”</p></li>
+<li><p>Learning rate : 1e-6</p></li>
+<li><p>Optimizer : Adam</p></li>
+<li><p>train step : 400</p></li>
+<li><p><span class="math notranslate nohighlight">\(n_{s}\)</span> : 3, <span class="math notranslate nohighlight">\(n_{c}\)</span> : 1</p></li>
+<li><p>나머지는 LDM과 동일</p></li>
+</ul>
+</li>
+</ul>
+<p><strong>Comparison with SOTA methods</strong></p>
+<figure class="align-default" id="id5">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img45.png"><img alt="StyO_04" class="bg-primary mb-1" src="../../_images/img45.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 207 </span><span class="caption-text">Figure 3</span><a class="headerlink" href="#id5" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul>
+<li><p>StyO가 src 이미지의 face identity와 local detail 모두 잘 유지함과 동시에, style 정보를 자연스럽게 입힌 결과물을 생성해낸다.</p></li>
+<li><p>User Study도 다른 모델들에 비해 좋은 결과를 보였다.</p>
+<figure class="align-default" id="id6">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img55.png"><img alt="StyO_05" class="bg-primary mb-1" src="../../_images/img55.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 208 </span><span class="caption-text">Table 1</span><a class="headerlink" href="#id6" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</li>
+</ul>
+<p><strong>Ablation Study</strong></p>
+<ol class="arabic">
+<li><p><em>Effect of Contrastive Disentangled Prompt Template</em></p>
+<ul>
+<li><p>negative prompt 없이 positive prompt만 넣고 학습할경우 학습 이미지의 overfitting이 심하고, style과 content 정보의 분리에 어려움을 보인다.</p>
+<figure class="align-default" id="id7">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img65.png"><img alt="StyO_06" class="bg-primary mb-1" src="../../_images/img65.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 209 </span><span class="caption-text">Figure 4</span><a class="headerlink" href="#id7" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</li>
+<li><p>또, source 이미지의 local detail을 유지하기위해 auxiliary set의 trick도 적용하는것이 Best Quality의 결과물을 생성해냈다.</p></li>
+</ul>
+</li>
+<li><p><em>Effect of Fine-grained Content Controller</em></p>
+<ul>
+<li><p>FCC 없이 Inference할 경우 generated 이미지의 높은 diversity를 보이지만, FCC를 포함할 경우 src 이미지의 fidelity가 높아져 좀더 significant한 이미지가 생성되는것을 보여주었다.</p>
+<figure class="align-default" id="id8">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img75.png"><img alt="StyO_07" class="bg-primary mb-1" src="../../_images/img75.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 210 </span><span class="caption-text">Figure 5</span><a class="headerlink" href="#id8" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</li>
+</ul>
+</li>
+<li><p><em>Hyper-parameters in Augmented Text Prompt</em></p>
+<ul class="simple">
+<li><p><span class="math notranslate nohighlight">\(n_{s}\)</span> 값이 커질수록 이미지가 photorealistic에서 artistic하게 바뀌고, <span class="math notranslate nohighlight">\(n_{c}\)</span>도 마찬가지로 값이 커질수록 src 이미지에 overfitting된 이미지가 나오는 경향을 보여주었다.</p></li>
+</ul>
+</li>
+</ol>
+</section>
+<section id="conclusion">
+<h2>5. Conclusion<a class="headerlink" href="#conclusion" title="Permalink to this heading">#</a></h2>
+<ul class="simple">
+<li><p>StyO는 IDL과 FCC를 사용해 기존 GAN을 이용한 SOTA 모델들보다 더 자연스럽고 Quality 좋은 style transfered 이미지를 생성해낼 수 있었다.</p></li>
+<li><p><strong>단, style 하나의 transfer를 위해 single GPU로 10분이 걸리므로 time-efficiency가 좋지 못하다는 단점이 있다.</strong></p></li>
+</ul>
+</section>
+</section>
+
+    <script type="text/x-thebe-config">
+    {
+        requestKernel: true,
+        binderOptions: {
+            repo: "binder-examples/jupyter-stacks-datascience",
+            ref: "master",
+        },
+        codeMirrorConfig: {
+            theme: "abcdef",
+            mode: "python"
+        },
+        kernelOptions: {
+            name: "python3",
+            path: "./docs\review"
+        },
+        predefinedOutput: true
+    }
+    </script>
+    <script>kernelName = 'python3'</script>
+
+                </article>
+              
+
+              
+              
+              
+              
+                <footer class="prev-next-footer d-print-none">
+                  
+<div class="prev-next-area">
+    <a class="left-prev"
+       href="I-DDPM.html"
+       title="previous page">
+      <i class="fa-solid fa-angle-left"></i>
+      <div class="prev-next-info">
+        <p class="prev-next-subtitle">previous</p>
+        <p class="prev-next-title">I-DDPM</p>
+      </div>
+    </a>
+    <a class="right-next"
+       href="imagen.html"
+       title="next page">
+      <div class="prev-next-info">
+        <p class="prev-next-subtitle">next</p>
+        <p class="prev-next-title">Imagen</p>
+      </div>
+      <i class="fa-solid fa-angle-right"></i>
+    </a>
+</div>
+                </footer>
+              
+            </div>
+            
+            
+              
+                <div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
+
+
+  <div class="sidebar-secondary-item">
+  <div class="page-toc tocsection onthispage">
+    <i class="fa-solid fa-list"></i> Contents
+  </div>
+  <nav class="bd-toc-nav page-toc">
+    <ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#abstract">Abstract</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">1. Introduction</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#related-work">2. Related Work</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#diffusion-model">2.1. Diffusion Model</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#face-stylization">2.2. Face Stylization</a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#method">3. Method</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#framework-of-styo">3.2. Framework of StyO</a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#experiments">4. Experiments</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#conclusion">5. Conclusion</a></li>
+</ul>
+  </nav></div>
+
+</div></div>
+              
+            
+          </div>
+          <footer class="bd-footer-content">
+            
+<div class="bd-footer-content__inner container">
+  
+  <div class="footer-item">
+    
+<p class="component-author">
+By PseudoLab
+</p>
+
+  </div>
+  
+  <div class="footer-item">
+    
+
+  <p class="copyright">
+    
+      © Copyright 2022.
+      <br/>
+    
+  </p>
+
+  </div>
+  
+  <div class="footer-item">
+    
+  </div>
+  
+  <div class="footer-item">
+    
+  </div>
+  
+</div>
+          </footer>
+        
+
+      </main>
+    </div>
+  </div>
+  
+  <!-- Scripts loaded after <body> so the DOM is not blocked -->
+  <script src="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b"></script>
+<script src="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b"></script>
+
+  <footer class="bd-footer">
+  </footer>
+  </body>
 </html>
\ No newline at end of file
diff --git a/docs/review/StyleGAN.html b/docs/review/StyleGAN.html
old mode 100644
new mode 100755
index 6ce03971..83cad522
--- a/docs/review/StyleGAN.html
+++ b/docs/review/StyleGAN.html
@@ -1,822 +1,842 @@
-
-
-<!DOCTYPE html>
-
-
-<html lang="en" data-content_root="" >
-
-  <head>
-    <meta charset="utf-8" />
-    <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.18.1: http://docutils.sourceforge.net/" />
-
-    <title>StyleGAN &#8212; Text-to-Image Generation-feat-Diffusion</title>
-  
-  
-  
-  <script data-cfasync="false">
-    document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
-    document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
-  </script>
-  
-  <!-- Loaded before other Sphinx assets -->
-  <link href="../../_static/styles/theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-<link href="../../_static/styles/bootstrap.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-<link href="../../_static/styles/pydata-sphinx-theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-
-  
-  <link href="../../_static/vendor/fontawesome/6.5.2/css/all.min.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-  <link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.woff2" />
-<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.woff2" />
-<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.woff2" />
-
-    <link rel="stylesheet" type="text/css" href="../../_static/pygments.css" />
-    <link rel="stylesheet" href="../../_static/styles/sphinx-book-theme.css?digest=14f4ca6b54d191a8c7657f6c759bf11a5fb86285" type="text/css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/togglebutton.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/mystnb.4510f1fc1dee50b3e5859aac5469c37c29e427902b24a333a5f9fcb2f0b3ac41.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/sphinx-thebe.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/design-style.4045f2051d55cab465a707391d5b2007.min.css" />
-  
-  <!-- Pre-loaded scripts that we'll load fully later -->
-  <link rel="preload" as="script" href="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b" />
-<link rel="preload" as="script" href="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b" />
-  <script src="../../_static/vendor/fontawesome/6.5.2/js/all.min.js?digest=dfe6caa3a7d634c4db9b"></script>
-
-    <script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
-    <script src="../../_static/jquery.js"></script>
-    <script src="../../_static/underscore.js"></script>
-    <script src="../../_static/_sphinx_javascript_frameworks_compat.js"></script>
-    <script src="../../_static/doctools.js"></script>
-    <script src="../../_static/clipboard.min.js"></script>
-    <script src="../../_static/copybutton.js"></script>
-    <script src="../../_static/scripts/sphinx-book-theme.js?digest=5a5c038af52cf7bc1a1ec88eea08e6366ee68824"></script>
-    <script>let toggleHintShow = 'Click to show';</script>
-    <script>let toggleHintHide = 'Click to hide';</script>
-    <script>let toggleOpenOnPrint = 'true';</script>
-    <script src="../../_static/togglebutton.js"></script>
-    <script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
-    <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
-    <script src="../../_static/design-tabs.js"></script>
-    <script>const THEBE_JS_URL = "https://unpkg.com/thebe@0.8.2/lib/index.js"
-const thebe_selector = ".thebe,.cell"
-const thebe_selector_input = "pre"
-const thebe_selector_output = ".output, .cell_output"
-</script>
-    <script async="async" src="../../_static/sphinx-thebe.js"></script>
-    <script>window.MathJax = {"options": {"processHtmlClass": "tex2jax_process|mathjax_process|math|output_area"}}</script>
-    <script defer="defer" src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
-    <script>DOCUMENTATION_OPTIONS.pagename = 'docs/review/StyleGAN';</script>
-    <link rel="shortcut icon" href="../../_static/PseudoLab_logo.png"/>
-    <link rel="index" title="Index" href="../../genindex.html" />
-    <link rel="search" title="Search" href="../../search.html" />
-    <link rel="next" title="Diffusion Models Beat GANs on Image Synthesis" href="diffusion_beats_GANs.html" />
-    <link rel="prev" title="CycleGAN" href="cycleGAN.html" />
-  <meta name="viewport" content="width=device-width, initial-scale=1"/>
-  <meta name="docsearch:language" content="en"/>
-  </head>
-  
-  
-  <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
-
-  
-  
-  <div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
-  
-  <div id="pst-scroll-pixel-helper"></div>
-  
-  <button type="button" class="btn rounded-pill" id="pst-back-to-top">
-    <i class="fa-solid fa-arrow-up"></i>Back to top</button>
-
-  
-  <input type="checkbox"
-          class="sidebar-toggle"
-          id="pst-primary-sidebar-checkbox"/>
-  <label class="overlay overlay-primary" for="pst-primary-sidebar-checkbox"></label>
-  
-  <input type="checkbox"
-          class="sidebar-toggle"
-          id="pst-secondary-sidebar-checkbox"/>
-  <label class="overlay overlay-secondary" for="pst-secondary-sidebar-checkbox"></label>
-  
-  <div class="search-button__wrapper">
-    <div class="search-button__overlay"></div>
-    <div class="search-button__search-container">
-<form class="bd-search d-flex align-items-center"
-      action="../../search.html"
-      method="get">
-  <i class="fa-solid fa-magnifying-glass"></i>
-  <input type="search"
-         class="form-control"
-         name="q"
-         id="search-input"
-         placeholder="Search this book..."
-         aria-label="Search this book..."
-         autocomplete="off"
-         autocorrect="off"
-         autocapitalize="off"
-         spellcheck="false"/>
-  <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
-</form></div>
-  </div>
-
-  <div class="pst-async-banner-revealer d-none">
-  <aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
-</div>
-
-  
-    <header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
-    </header>
-  
-
-  <div class="bd-container">
-    <div class="bd-container__inner bd-page-width">
-      
-      
-      
-      <div class="bd-sidebar-primary bd-sidebar">
-        
-
-  
-  <div class="sidebar-header-items sidebar-primary__section">
-    
-    
-    
-    
-  </div>
-  
-    <div class="sidebar-primary-items__start sidebar-primary__section">
-        <div class="sidebar-primary-item">
-
-  
-    
-  
-
-<a class="navbar-brand logo" href="../../intro.html">
-  
-  
-  
-  
-  
-    
-    
-      
-    
-    
-    <img src="../../_static/PseudoLab_logo.png" class="logo__image only-light" alt="Text-to-Image Generation-feat-Diffusion - Home"/>
-    <script>document.write(`<img src="../../_static/PseudoLab_logo.png" class="logo__image only-dark" alt="Text-to-Image Generation-feat-Diffusion - Home"/>`);</script>
-  
-  
-</a></div>
-        <div class="sidebar-primary-item"><nav class="bd-links" id="bd-docs-nav" aria-label="Main">
-    <div class="bd-toc-item navbar-nav active">
-        
-        <ul class="nav bd-sidenav bd-sidenav__home-link">
-            <li class="toctree-l1">
-                <a class="reference internal" href="../../intro.html">
-                    Welcome to PseudoDiffusers!!
-                </a>
-            </li>
-        </ul>
-        <p aria-level="2" class="caption" role="heading"><span class="caption-text">Preliminary Works</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="vae.html">VAE</a></li>
-<li class="toctree-l1"><a class="reference internal" href="gan.html">GAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DDPM.html">DDPM</a></li>
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="DDIM.html">DDIM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="A_Study_on_the_Evaluation_of_Generative_Models.html">A Study on the Evaluation of Generative Models</a></li>
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Image Generation</span></p>
-<ul class="current nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="cycleGAN.html">CycleGAN</a></li>
-<li class="toctree-l1 current active"><a class="current reference internal" href="#">StyleGAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="diffusion_beats_GANs.html">Diffusion Models Beat GANs on Image Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="dalle.html">DALL-E</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DALLE2.html">DALL-E 2</a></li>
-<li class="toctree-l1"><a class="reference internal" href="dreambooth.html">DreamBooth</a></li>
-<li class="toctree-l1"><a class="reference internal" href="ControlNet.html">ControlNet</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Latent_Diffusion_Model.html">Introduction</a></li>
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="Textual_Inversion.html">Textual Inversion</a></li>
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="CustomDiffusion.html">Custom Diffusion</a></li>
-<li class="toctree-l1"><a class="reference internal" href="LoRA.html">LoRA</a></li>
-
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="I-DDPM.html">I-DDPM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="StyO.html">StyO</a></li>
-<li class="toctree-l1"><a class="reference internal" href="imagen.html">Imagen</a></li>
-<li class="toctree-l1"><a class="reference internal" href="imagen_editor.html">Imagen Editor</a></li>
-<li class="toctree-l1"><a class="reference internal" href="SDEdit.html">SDEdit</a></li>
-<li class="toctree-l1"><a class="reference internal" href="SDXL.html">SDXL</a></li>
-<li class="toctree-l1"><a class="reference internal" href="t2i_adapter.html">T2I-Adapter</a></li>
-<li class="toctree-l1"><a class="reference internal" href="IP_Adapter.html">IP-Adapter</a></li>
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="HyperDreamBooth.html">HyperDreamBooth</a></li>
-<li class="toctree-l1"><a class="reference internal" href="CM3leon.html">CM3leon</a></li>
-
-<li class="toctree-l1"><a class="reference internal" href="Synthetic_Data_from_Diffusion_Models_Improves_ImageNet_Classification.html">Synthetic Data from Diffusion Models Improves ImageNet Classification</a></li>
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="GLIDE.html">GLIDE</a></li>
-<li class="toctree-l1"><a class="reference internal" href="BBDM.html">BBDM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Your_Diffusion_Model_is_Secretly_a_Zero_Shot_Classifier.html">Your Diffusion Model is Secretly a Zero-Shot Classifier</a></li>
-<li class="toctree-l1"><a class="reference internal" href="progressive_distillation.html">Progressive Distillation for Fast Sampling of Diffusion Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="ConceptLab.html">ConceptLab</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Diffusion_models_already_have_a_Semantic_Latent_Space.html">Diffusion Models already have a Semantic Latent Space</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Muse.html">Muse</a></li>
-
-
-<li class="toctree-l1"><a class="reference internal" href="GIGAGAN.html">Scaling up GANs for Text-to-Image Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="consistency_models.html">Consistency Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="latent_consistency_models.html">Latent Consistency Models</a></li>
-
-<li class="toctree-l1"><a class="reference internal" href="LLM_grounded_Diffusion.html">LLM Grounded Diffusion</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DiT.html">DiT</a></li>
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="one-step-image-translation.html">One-Step Image Translation with Text-to-Image Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="LCM-LoRA.html">LCM-LoRA: A Universal Stable-Diffusion Acceleration Module</a></li>
-
-
-<li class="toctree-l1"><a class="reference internal" href="MimicBrush.html">MimicBrush: Zero-shot Image Editing with Reference Imitation</a></li>
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Video Generation</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="Make_A_Video.html">Make A Video</a></li>
-<li class="toctree-l1"><a class="reference internal" href="VideoLDM.html">VideoLDM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="AnimateDiff.html">AnimateDiff</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Animate_Anyone.html">Animate Anyone</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreaMoving.html">DreaMoving</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreamPose.html">DreamPose: Fashion Image-to-Video Synthesis via Stable Diffusion</a></li>
-
-
-
-
-
-
-
-
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">3D Generation</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="NeRF.html">NeRF : Representing Scenes as Neural Radiance Fields for View Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="3DGS.html">3D Gaussian Splatting for Real-Time Radiance Field Rendering</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Point_E.html">Point-E: A System for Generating 3D Point Clouds from Complex Prompts (Arxiv 2022)</a></li>
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="Shap-E.html">Shap-E</a></li>
-
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="DreamFusion.html"><strong>DreamFusion</strong></a></li>
-<li class="toctree-l1"><a class="reference internal" href="magic-3d.html">Magic3D</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreamBooth3D.html">Dream Booth 3D</a></li>
-
-
-
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Experiments</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="../experiments/js_exp.html">Synthetic Data with Stable Diffusion for Foliar Disease Classification</a></li>
-<li class="toctree-l1"><a class="reference internal" href="../experiments/swjo_exp.html">Training DreamBooth on Naver Webtoon Face Dataset</a></li>
-</ul>
-
-    </div>
-</nav></div>
-    </div>
-  
-  
-  <div class="sidebar-primary-items__end sidebar-primary__section">
-  </div>
-  
-  <div id="rtd-footer-container"></div>
-
-
-      </div>
-      
-      <main id="main-content" class="bd-main" role="main">
-        
-        
-
-<div class="sbt-scroll-pixel-helper"></div>
-
-          <div class="bd-content">
-            <div class="bd-article-container">
-              
-              <div class="bd-header-article d-print-none">
-<div class="header-article-items header-article__inner">
-  
-    <div class="header-article-items__start">
-      
-        <div class="header-article-item"><label class="sidebar-toggle primary-toggle btn btn-sm" for="__primary" title="Toggle primary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
-  <span class="fa-solid fa-bars"></span>
-</label></div>
-      
-    </div>
-  
-  
-    <div class="header-article-items__end">
-      
-        <div class="header-article-item">
-
-<div class="article-header-buttons">
-
-
-
-
-
-<div class="dropdown dropdown-source-buttons">
-  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Source repositories">
-    <i class="fab fa-github"></i>
-  </button>
-  <ul class="dropdown-menu">
-      
-      
-      
-      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion" target="_blank"
-   class="btn btn-sm btn-source-repository-button dropdown-item"
-   title="Source repository"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fab fa-github"></i>
-  </span>
-<span class="btn__text-container">Repository</span>
-</a>
-</li>
-      
-      
-      
-      
-      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion/issues/new?title=Issue%20on%20page%20%2Fdocs/review/StyleGAN.html&body=Your%20issue%20content%20here." target="_blank"
-   class="btn btn-sm btn-source-issues-button dropdown-item"
-   title="Open an issue"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-lightbulb"></i>
-  </span>
-<span class="btn__text-container">Open issue</span>
-</a>
-</li>
-      
-  </ul>
-</div>
-
-
-
-
-
-
-<div class="dropdown dropdown-download-buttons">
-  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Download this page">
-    <i class="fas fa-download"></i>
-  </button>
-  <ul class="dropdown-menu">
-      
-      
-      
-      <li><a href="../../_sources/docs/review/StyleGAN.md" target="_blank"
-   class="btn btn-sm btn-download-source-button dropdown-item"
-   title="Download source file"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-file"></i>
-  </span>
-<span class="btn__text-container">.md</span>
-</a>
-</li>
-      
-      
-      
-      
-      <li>
-<button onclick="window.print()"
-  class="btn btn-sm btn-download-pdf-button dropdown-item"
-  title="Print to PDF"
-  data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-file-pdf"></i>
-  </span>
-<span class="btn__text-container">.pdf</span>
-</button>
-</li>
-      
-  </ul>
-</div>
-
-
-
-
-<button onclick="toggleFullScreen()"
-  class="btn btn-sm btn-fullscreen-button"
-  title="Fullscreen mode"
-  data-bs-placement="bottom" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-expand"></i>
-  </span>
-
-</button>
-
-
-
-<script>
-document.write(`
-  <button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light"></i>
-    <i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark"></i>
-    <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"></i>
-  </button>
-`);
-</script>
-
-
-<script>
-document.write(`
-  <button class="btn btn-sm pst-navbar-icon search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <i class="fa-solid fa-magnifying-glass fa-lg"></i>
-  </button>
-`);
-</script>
-<label class="sidebar-toggle secondary-toggle btn btn-sm" for="__secondary"title="Toggle secondary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <span class="fa-solid fa-list"></span>
-</label>
-</div></div>
-      
-    </div>
-  
-</div>
-</div>
-              
-              
-
-<div id="jb-print-docs-body" class="onlyprint">
-    <h1>StyleGAN</h1>
-    <!-- Table of contents -->
-    <div id="print-main-content">
-        <div id="jb-print-toc">
-            
-            <div>
-                <h2> Contents </h2>
-            </div>
-            <nav aria-label="Page">
-                <ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#mapping-network">Mapping Network</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#style-and-adain">Style and AdaIN</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#stochastic-variation">Stochastic Variation</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#mixing-regularization">Mixing Regularization</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#id1">실험 결과</a></li>
-</ul>
-            </nav>
-        </div>
-    </div>
-</div>
-
-              
-                
-<div id="searchbox"></div>
-                <article class="bd-article">
-                  
-  <div class="admonition-information admonition">
-<p class="admonition-title">Information</p>
-<ul class="simple">
-<li><p><strong>Title:</strong> A Style-Based Generator Architecture for Generative Adversarial Networks (CVPR 2019)</p></li>
-<li><p><strong>Reference</strong></p>
-<ul>
-<li><p>Paper: <a class="reference external" href="https://arxiv.org/abs/1812.04948">https://arxiv.org/abs/1812.04948</a></p></li>
-<li><p>Code: <a class="github reference external" href="https://github.com/huangzh13/StyleGAN.pytorch">huangzh13/StyleGAN.pytorch</a></p></li>
-</ul>
-</li>
-<li><p><strong>Author:</strong> Jisu Kim</p></li>
-<li><p><strong>Last updated on Apr. 12, 2023</strong></p></li>
-</ul>
-</div>
-<section class="tex2jax_ignore mathjax_ignore" id="stylegan">
-<h1>StyleGAN<a class="headerlink" href="#stylegan" title="Permalink to this heading">#</a></h1>
-<p>오늘 알아볼 모델은 StyleGAN입니다. 기존에 다뤘던 GAN과 같이 이미지를 생성하는 모델입니다. generator 구조를 변경함으로써 성능을 올리고 feature의 control이 가능하게 했습니다. loss나 discriminator 구조 개선에 관한 논문은 아닙니다. 먼저 결과를 보도록 하죠.</p>
-<figure class="align-default" id="id2">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/StyleGAN_fig1.png"><img alt="stylegan_01" class="bg-primary mb-1" src="../../_images/StyleGAN_fig1.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 62 </span><span class="caption-text">Images generated by StyleGAN</span><a class="headerlink" href="#id2" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>이 논문의 contribution은 다음과 같습니다.</p>
-<ol class="arabic simple">
-<li><p>새로운 구조를 제안하여 성능을 높이면서 feature의 control이 가능해졌습니다.</p></li>
-<li><p>새로운 데이터셋을 제안했습니다. (FFHQ)</p></li>
-</ol>
-<p>이 중에서 첫 번째 contribution을 자세히 보도록 하겠습니다. 논문의 abstract에는 다음과 같은 문장이 있습니다.</p>
-<blockquote>
-<div><p>The new architecture leads to an automatically learned, <strong>unsupervised separation of high-level attributes</strong> (e.g., pose and identity when trained on human faces) and stochastic variation in the generated images (e.g., freckles, hair), and it enables intuitive, scale-specific control of the synthesis.</p>
-</div></blockquote>
-<p>논문에서 제안한 새로운 generator 구조가 할 수 있는 일을 설명하는 부분입니다. 여기서 보시면 high level attribute의 separation이 가능하다고 얘기하고 있습니다. 저는 개인적으로 이 부분이 StyleGAN의 가장 중요한 특징이라고 생각합니다.</p>
-<p>생성 모델로 이미지를 생성하고자 할 때, 사용자는 어떠한 목적을 가지고 자신이 원하는 이미지를 만들고자 할 것입니다. 이미지의 품질이 좋더라도 모델이 사용자의 의도와 상관없는 랜덤한 이미지를 내뱉어준다면 그 모델의 실용성이 좋다고 할 수 없을 것입니다. 근래에 Text-to-Image 모델들이 인기를 얻었던 이유도 누구나 쉽게 텍스트를 통해서 생성되는 이미지를 조절할 수 있다는 점도 한몫했다고 생각합니다. StyleGAN은 그런 controllability를 어느 정도 가능하게 한 모델이라는 측면에서 의미있다고 생각합니다.</p>
-<p>StyleGAN의 구조는 아래 그림과 같습니다. synthesis network는 해상도를 4x4에서 시작해서 1024x1024까지 높여줍니다. 최종적으로 1024x1024 해상도를 가지는 이미지를 갖게됩니다. 아래 구조를 보면 기존 GAN하고 비교해서 특이한 점이 세 가지 있습니다.</p>
-<ol class="arabic simple">
-<li><p>z를 input으로 받는 mapping network</p></li>
-<li><p>style과 AdaIN</p></li>
-<li><p>noise와 B (stochastic variation)</p></li>
-</ol>
-<p>이 각각에 대해서 알아보도록 합시다.</p>
-<figure class="align-default" id="id3">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/StyleGAN_fig2.png"><img alt="stylegan_02" class="bg-primary mb-1" src="../../_images/StyleGAN_fig2.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 63 </span><span class="caption-text">Structure of StyleGAN</span><a class="headerlink" href="#id3" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<section id="mapping-network">
-<h2>Mapping Network<a class="headerlink" href="#mapping-network" title="Permalink to this heading">#</a></h2>
-<figure class="align-default" id="id4">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/StyleGAN_fig3.png"><img alt="stylegan_03" class="bg-primary mb-1" src="../../_images/StyleGAN_fig3.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 64 </span><span class="caption-text">Mappings with <span class="math notranslate nohighlight">\(w\)</span> and without <span class="math notranslate nohighlight">\(w\)</span></span><a class="headerlink" href="#id4" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>기존 GAN을 생각해보면 z를 input으로 받아서 generator를 거쳐서 이미지를 생성하는 구조입니다. 이 z는 보통 Gaussian distribution에서 샘플링으로 얻습니다. GAN은 학습을 통해 Gaussian distribution을 data distribution으로 보내는 방법을 배우게 될 것이고, 이 분포는 (b)처럼 생기게 될 것입니다. 그런데 데이터가 (a)처럼 주어져서 특정한 데이터가 없거나 적을 수도 있을 것입니다. 예를 들어, 데이터에 피부가 희면서 머리가 긴 샘플들이 없다고 해봅시다. 그러면 피부색과 머리 길이라는 두 feature는 서로 얽히게(entangled)되어, 하나를 바꿀 때 다른 하나도 같이 바뀌는 현상이 일어나게 됩니다. 이런 현상을 완화하기 위해 논문에서는 Gaussian에서 뽑은 z를 바로 사용하는 것이 아니라 mapping network를 통해 learnable distribution에서 뽑은 w를 사용합니다.</p>
-</section>
-<section id="style-and-adain">
-<h2>Style and AdaIN<a class="headerlink" href="#style-and-adain" title="Permalink to this heading">#</a></h2>
-<p>instance normalization은 샘플 하나의 각 채널마다 정규화를 취해주는 방법입니다.</p>
-<figure class="align-default" id="id5">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/StyleGAN_fig4.png"><img alt="stylegan_04" class="bg-primary mb-1" src="../../_images/StyleGAN_fig4.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 65 </span><span class="caption-text">Normalization methods</span><a class="headerlink" href="#id5" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>adaptive instance normalization (AdaIN) 은 instance normalization에 scale을 곱해주고 bias를 더해주는 형태입니다. 그런데 이 scale과 bias가 style vector의 linear transformation으로 주어지는 형태입니다. linear layer를 통해서 w는 <span class="math notranslate nohighlight">\(\mathbf{y}=(\mathbf{y}_{s},\mathbf{y}_{b})\)</span>로 보내지게 됩니다. AdaIN의 수식은 아래와 같습니다.</p>
-<div class="math notranslate nohighlight">
-\[
-AdaIN(\mathbf{x}_{i},\mathbf{y})=\mathbf{y}_{s,i}\frac{\mathbf{x}_{i}-\mu(\mathbf{x}_{i})}{\sigma(\mathbf{x}_{i})}+\mathbf{y}_{b,i}
-\]</div>
-<p>AdaIN은 각 블록마다 두 개씩 들어가서 style은 총 열여덟 번 AdaIN을 통해 generator에 들어가게 됩니다. AdaIN은 localization이라는 특징과도 연관이 있습니다. 여기서 말하는 localization이란 열여덟 개의 style 중에서 일부를 바꿈으로써 이미지의 일부 특징들을 바꿀 수 있다는 의미입니다. AdaIN은 각 convolution layer 다음에 적용이 됩니다. 이 때 feature map들은 normalization되고 style에 의해 새로운 statistics를 가지게 됩니다. style은 하나의 convolution에 적용되고, 다음 convolution에서 다시 normalization이 수행되기 때문에 이전 layer에 적용된 style과 다음 layer에 적용된 style이 분리되게 학습될 수 있습니다.</p>
-<p>관련 코드</p>
-<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="k">class</span> <span class="nc">StyleMod</span><span class="p">(</span><span class="n">nn</span><span class="o">.</span><span class="n">Module</span><span class="p">):</span>
-    <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">latent_size</span><span class="p">,</span> <span class="n">channels</span><span class="p">,</span> <span class="n">use_wscale</span><span class="p">):</span>
-        <span class="nb">super</span><span class="p">(</span><span class="n">StyleMod</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span>
-        <span class="bp">self</span><span class="o">.</span><span class="n">lin</span> <span class="o">=</span> <span class="n">EqualizedLinear</span><span class="p">(</span><span class="n">latent_size</span><span class="p">,</span>
-                                   <span class="n">channels</span> <span class="o">*</span> <span class="mi">2</span><span class="p">,</span>
-                                   <span class="n">gain</span><span class="o">=</span><span class="mf">1.0</span><span class="p">,</span> <span class="n">use_wscale</span><span class="o">=</span><span class="n">use_wscale</span><span class="p">)</span>
-
-    <span class="k">def</span> <span class="nf">forward</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">x</span><span class="p">,</span> <span class="n">latent</span><span class="p">):</span>
-        <span class="n">style</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">lin</span><span class="p">(</span><span class="n">latent</span><span class="p">)</span>  <span class="c1"># style =&gt; [batch_size, n_channels*2]</span>
-
-        <span class="n">shape</span> <span class="o">=</span> <span class="p">[</span><span class="o">-</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="n">x</span><span class="o">.</span><span class="n">size</span><span class="p">(</span><span class="mi">1</span><span class="p">)]</span> <span class="o">+</span> <span class="p">(</span><span class="n">x</span><span class="o">.</span><span class="n">dim</span><span class="p">()</span> <span class="o">-</span> <span class="mi">2</span><span class="p">)</span> <span class="o">*</span> <span class="p">[</span><span class="mi">1</span><span class="p">]</span>
-        <span class="n">style</span> <span class="o">=</span> <span class="n">style</span><span class="o">.</span><span class="n">view</span><span class="p">(</span><span class="n">shape</span><span class="p">)</span>  <span class="c1"># [batch_size, 2, n_channels, ...]</span>
-        <span class="n">x</span> <span class="o">=</span> <span class="n">x</span> <span class="o">*</span> <span class="p">(</span><span class="n">style</span><span class="p">[:,</span> <span class="mi">0</span><span class="p">]</span> <span class="o">+</span> <span class="mf">1.</span><span class="p">)</span> <span class="o">+</span> <span class="n">style</span><span class="p">[:,</span> <span class="mi">1</span><span class="p">]</span>
-        <span class="k">return</span> <span class="n">x</span>
-
-<span class="k">class</span> <span class="nc">LayerEpilogue</span><span class="p">(</span><span class="n">nn</span><span class="o">.</span><span class="n">Module</span><span class="p">):</span>
-<span class="w">    </span><span class="sd">&quot;&quot;&quot;Things to do at the end of each layer.&quot;&quot;&quot;</span>
-
-    <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">channels</span><span class="p">,</span> <span class="n">dlatent_size</span><span class="p">,</span> <span class="n">use_wscale</span><span class="p">,</span>
-                 <span class="n">use_noise</span><span class="p">,</span> <span class="n">use_pixel_norm</span><span class="p">,</span> <span class="n">use_instance_norm</span><span class="p">,</span> <span class="n">use_styles</span><span class="p">,</span> <span class="n">activation_layer</span><span class="p">):</span>
-        <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span>
-
-        <span class="n">layers</span> <span class="o">=</span> <span class="p">[]</span>
-        <span class="k">if</span> <span class="n">use_noise</span><span class="p">:</span>
-            <span class="n">layers</span><span class="o">.</span><span class="n">append</span><span class="p">((</span><span class="s1">&#39;noise&#39;</span><span class="p">,</span> <span class="n">NoiseLayer</span><span class="p">(</span><span class="n">channels</span><span class="p">)))</span>
-        <span class="n">layers</span><span class="o">.</span><span class="n">append</span><span class="p">((</span><span class="s1">&#39;activation&#39;</span><span class="p">,</span> <span class="n">activation_layer</span><span class="p">))</span>
-        <span class="k">if</span> <span class="n">use_pixel_norm</span><span class="p">:</span>
-            <span class="n">layers</span><span class="o">.</span><span class="n">append</span><span class="p">((</span><span class="s1">&#39;pixel_norm&#39;</span><span class="p">,</span> <span class="n">PixelNormLayer</span><span class="p">()))</span>
-        <span class="k">if</span> <span class="n">use_instance_norm</span><span class="p">:</span>
-            <span class="n">layers</span><span class="o">.</span><span class="n">append</span><span class="p">((</span><span class="s1">&#39;instance_norm&#39;</span><span class="p">,</span> <span class="n">nn</span><span class="o">.</span><span class="n">InstanceNorm2d</span><span class="p">(</span><span class="n">channels</span><span class="p">)))</span>
-
-        <span class="bp">self</span><span class="o">.</span><span class="n">top_epi</span> <span class="o">=</span> <span class="n">nn</span><span class="o">.</span><span class="n">Sequential</span><span class="p">(</span><span class="n">OrderedDict</span><span class="p">(</span><span class="n">layers</span><span class="p">))</span>
-
-        <span class="k">if</span> <span class="n">use_styles</span><span class="p">:</span>
-            <span class="bp">self</span><span class="o">.</span><span class="n">style_mod</span> <span class="o">=</span> <span class="n">StyleMod</span><span class="p">(</span><span class="n">dlatent_size</span><span class="p">,</span> <span class="n">channels</span><span class="p">,</span> <span class="n">use_wscale</span><span class="o">=</span><span class="n">use_wscale</span><span class="p">)</span>
-        <span class="k">else</span><span class="p">:</span>
-            <span class="bp">self</span><span class="o">.</span><span class="n">style_mod</span> <span class="o">=</span> <span class="kc">None</span>
-
-    <span class="k">def</span> <span class="nf">forward</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">x</span><span class="p">,</span> <span class="n">dlatents_in_slice</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
-        <span class="n">x</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">top_epi</span><span class="p">(</span><span class="n">x</span><span class="p">)</span>
-        <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">style_mod</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
-            <span class="n">x</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">style_mod</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="n">dlatents_in_slice</span><span class="p">)</span>
-        <span class="k">else</span><span class="p">:</span>
-            <span class="k">assert</span> <span class="n">dlatents_in_slice</span> <span class="ow">is</span> <span class="kc">None</span>
-        <span class="k">return</span> <span class="n">x</span>
-</pre></div>
-</div>
-<p>code from <a class="github reference external" href="https://github.com/huangzh13/StyleGAN.pytorch">huangzh13/StyleGAN.pytorch</a></p>
-<p>아래 그림은 source A의 style 중 일부를 source B의 style로 변경해서 만든 이미지들입니다. style은 총 18곳에서 사용되는데 처음 4곳 (<span class="math notranslate nohighlight">\(4^2 - 8^2\)</span>)을 coarse, 그다음 4곳 (<span class="math notranslate nohighlight">\(16^2-32^2\)</span>)을 middle, 마지막 10곳 (<span class="math notranslate nohighlight">\(64^2-1024^2\)</span>)을 fine style로 정의하였습니다. 그림을 보시면 윗 부분에서는 포즈나 전체적인 머리 스타일같이 coarse style은 source B의 것을 유지하고, 아래로 갈수록 source A의 큰 틀을 유지하면서 세부적인 부분들을 B에서 가져왔음을 볼 수 있습니다.</p>
-<figure class="align-default" id="id6">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/StyleGAN_fig5.png"><img alt="stylegan_05" class="bg-primary mb-1" src="../../_images/StyleGAN_fig5.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 66 </span><span class="caption-text">Mixing two styles</span><a class="headerlink" href="#id6" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</section>
-<section id="stochastic-variation">
-<h2>Stochastic Variation<a class="headerlink" href="#stochastic-variation" title="Permalink to this heading">#</a></h2>
-<p>한 사람의 이미지 안에는 확률적으로 바뀔 수 있는 부분이 있습니다. (주근깨, 머릿결, 피부) 이를 모델링하기 위해서 noise를 추가적인 input으로 사용하여 각 convolution layer 다음에 더해집니다. 아래 그림에서 (a)의 생성된 한 사람의 이미지 안에서도 디테일들은 (b)와 같이 달라질 수 있습니다. (c)와 같이 standard deviation을 구해봤을 때 얼굴형과 같은 attribute는 변하지않지만 noise에 의해서 머리카락과 같은 부분은 variation이 생김을 볼 수 있습니다.</p>
-<figure class="align-default" id="id7">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/StyleGAN_fig6.png"><img alt="stylegan_06" class="bg-primary mb-1" src="../../_images/StyleGAN_fig6.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 67 </span><span class="caption-text">Examples of stochastic variation</span><a class="headerlink" href="#id7" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>아래 그림에서 (a)는 모든 layer에 noise를 준 경우, (b)는 noise를 주지 않은 경우, (c)는 fine layers (<span class="math notranslate nohighlight">\(64^2 - 1024^2\)</span>)에만 noise를 준 경우, (d)는 coarse layers (<span class="math notranslate nohighlight">\(4^2 - 32^2\)</span>)에만 noise를 준 경우입니다. (b)를 보면 noise가 없을 경우 머리카락같은 디테일이 제대로 살아있지 않은 것을 볼 수 있습니다. (c)와 (d)를 보면 fine layers에 들어간 noise가 머리카락의 더 세밀한 부분에 영향을 끼친다는 것을 볼 수 있습니다.</p>
-<figure class="align-default" id="id8">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/StyleGAN_fig7.png"><img alt="stylegan_07" class="bg-primary mb-1" src="../../_images/StyleGAN_fig7.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 68 </span><span class="caption-text">Effect of noise inputs at different layers</span><a class="headerlink" href="#id8" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</section>
-<section id="mixing-regularization">
-<h2>Mixing Regularization<a class="headerlink" href="#mixing-regularization" title="Permalink to this heading">#</a></h2>
-<p>논문에서는 localization이 더 잘 되게하기 위해 style mixing이라는 방법을 훈련에 사용합니다. 두 개의 style vector <span class="math notranslate nohighlight">\(\mathbf{w}_{1},\mathbf{w}_{2}\)</span>를 사용하여 앞 쪽 layer에는 <span class="math notranslate nohighlight">\(\mathbf{w}_{1}\)</span>을, 뒤 쪽 layer에는 <span class="math notranslate nohighlight">\(\mathbf{w}_{2}\)</span>를 사용하는 방법입니다. 이는 generator가 인접한 style끼리 correlated되어있다고 학습하는 것을 막아서 localization을 더 잘 되게 하는 목적입니다.</p>
-</section>
-<section id="id1">
-<h2>실험 결과<a class="headerlink" href="#id1" title="Permalink to this heading">#</a></h2>
-<p>마지막으로 저자들이 제안한 방법들이 실제로 효과가 있었는지 확인해봅시다. 아래 표와 같이 실험적으로 보았을 때 저자들이 제안한 방법들을 모두 사용한 경우 FID가 가장 우수하게 나왔습니다.</p>
-<figure class="align-default" id="id9">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/StyleGAN_fig8.png"><img alt="stylegan_08" class="bg-primary mb-1" src="../../_images/StyleGAN_fig8.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 69 </span><span class="caption-text">FID for various generator designs</span><a class="headerlink" href="#id9" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</section>
-</section>
-
-    <script type="text/x-thebe-config">
-    {
-        requestKernel: true,
-        binderOptions: {
-            repo: "binder-examples/jupyter-stacks-datascience",
-            ref: "master",
-        },
-        codeMirrorConfig: {
-            theme: "abcdef",
-            mode: "python"
-        },
-        kernelOptions: {
-            name: "python3",
-            path: "./docs/review"
-        },
-        predefinedOutput: true
-    }
-    </script>
-    <script>kernelName = 'python3'</script>
-
-                </article>
-              
-
-              
-              
-              
-              
-                <footer class="prev-next-footer d-print-none">
-                  
-<div class="prev-next-area">
-    <a class="left-prev"
-       href="cycleGAN.html"
-       title="previous page">
-      <i class="fa-solid fa-angle-left"></i>
-      <div class="prev-next-info">
-        <p class="prev-next-subtitle">previous</p>
-        <p class="prev-next-title">CycleGAN</p>
-      </div>
-    </a>
-    <a class="right-next"
-       href="diffusion_beats_GANs.html"
-       title="next page">
-      <div class="prev-next-info">
-        <p class="prev-next-subtitle">next</p>
-        <p class="prev-next-title">Diffusion Models Beat GANs on Image Synthesis</p>
-      </div>
-      <i class="fa-solid fa-angle-right"></i>
-    </a>
-</div>
-                </footer>
-              
-            </div>
-            
-            
-              
-                <div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
-
-
-  <div class="sidebar-secondary-item">
-  <div class="page-toc tocsection onthispage">
-    <i class="fa-solid fa-list"></i> Contents
-  </div>
-  <nav class="bd-toc-nav page-toc">
-    <ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#mapping-network">Mapping Network</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#style-and-adain">Style and AdaIN</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#stochastic-variation">Stochastic Variation</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#mixing-regularization">Mixing Regularization</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#id1">실험 결과</a></li>
-</ul>
-  </nav></div>
-
-</div></div>
-              
-            
-          </div>
-          <footer class="bd-footer-content">
-            
-<div class="bd-footer-content__inner container">
-  
-  <div class="footer-item">
-    
-<p class="component-author">
-By PseudoLab
-</p>
-
-  </div>
-  
-  <div class="footer-item">
-    
-
-  <p class="copyright">
-    
-      © Copyright 2022.
-      <br/>
-    
-  </p>
-
-  </div>
-  
-  <div class="footer-item">
-    
-  </div>
-  
-  <div class="footer-item">
-    
-  </div>
-  
-</div>
-          </footer>
-        
-
-      </main>
-    </div>
-  </div>
-  
-  <!-- Scripts loaded after <body> so the DOM is not blocked -->
-  <script src="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b"></script>
-<script src="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b"></script>
-
-  <footer class="bd-footer">
-  </footer>
-  </body>
+
+
+<!DOCTYPE html>
+
+
+<html lang="en" data-content_root="" >
+
+  <head>
+    <meta charset="utf-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.17.1: http://docutils.sourceforge.net/" />
+
+    <title>StyleGAN &#8212; Text-to-Image Generation-feat-Diffusion</title>
+  
+  
+  
+  <script data-cfasync="false">
+    document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
+    document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
+  </script>
+  
+  <!-- Loaded before other Sphinx assets -->
+  <link href="../../_static/styles/theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+<link href="../../_static/styles/bootstrap.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+<link href="../../_static/styles/pydata-sphinx-theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+
+  
+  <link href="../../_static/vendor/fontawesome/6.5.2/css/all.min.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+  <link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.woff2" />
+<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.woff2" />
+<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.woff2" />
+
+    <link rel="stylesheet" type="text/css" href="../../_static/pygments.css" />
+    <link rel="stylesheet" href="../../_static/styles/sphinx-book-theme.css?digest=14f4ca6b54d191a8c7657f6c759bf11a5fb86285" type="text/css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/togglebutton.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/mystnb.4510f1fc1dee50b3e5859aac5469c37c29e427902b24a333a5f9fcb2f0b3ac41.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/sphinx-thebe.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/design-style.4045f2051d55cab465a707391d5b2007.min.css" />
+  
+  <!-- Pre-loaded scripts that we'll load fully later -->
+  <link rel="preload" as="script" href="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b" />
+<link rel="preload" as="script" href="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b" />
+  <script src="../../_static/vendor/fontawesome/6.5.2/js/all.min.js?digest=dfe6caa3a7d634c4db9b"></script>
+
+    <script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
+    <script src="../../_static/jquery.js"></script>
+    <script src="../../_static/underscore.js"></script>
+    <script src="../../_static/_sphinx_javascript_frameworks_compat.js"></script>
+    <script src="../../_static/doctools.js"></script>
+    <script src="../../_static/clipboard.min.js"></script>
+    <script src="../../_static/copybutton.js"></script>
+    <script src="../../_static/scripts/sphinx-book-theme.js?digest=5a5c038af52cf7bc1a1ec88eea08e6366ee68824"></script>
+    <script>let toggleHintShow = 'Click to show';</script>
+    <script>let toggleHintHide = 'Click to hide';</script>
+    <script>let toggleOpenOnPrint = 'true';</script>
+    <script src="../../_static/togglebutton.js"></script>
+    <script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
+    <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
+    <script src="../../_static/design-tabs.js"></script>
+    <script>const THEBE_JS_URL = "https://unpkg.com/thebe@0.8.2/lib/index.js"
+const thebe_selector = ".thebe,.cell"
+const thebe_selector_input = "pre"
+const thebe_selector_output = ".output, .cell_output"
+</script>
+    <script async="async" src="../../_static/sphinx-thebe.js"></script>
+    <script>window.MathJax = {"options": {"processHtmlClass": "tex2jax_process|mathjax_process|math|output_area"}}</script>
+    <script defer="defer" src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
+    <script>DOCUMENTATION_OPTIONS.pagename = 'docs/review/StyleGAN';</script>
+    <link rel="shortcut icon" href="../../_static/PseudoLab_logo.png"/>
+    <link rel="index" title="Index" href="../../genindex.html" />
+    <link rel="search" title="Search" href="../../search.html" />
+    <link rel="next" title="Diffusion Models Beat GANs on Image Synthesis" href="diffusion_beats_GANs.html" />
+    <link rel="prev" title="CycleGAN" href="cycleGAN.html" />
+  <meta name="viewport" content="width=device-width, initial-scale=1"/>
+  <meta name="docsearch:language" content="en"/>
+  </head>
+  
+  
+  <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
+
+  
+  
+  <div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
+  
+  <div id="pst-scroll-pixel-helper"></div>
+  
+  <button type="button" class="btn rounded-pill" id="pst-back-to-top">
+    <i class="fa-solid fa-arrow-up"></i>Back to top</button>
+
+  
+  <input type="checkbox"
+          class="sidebar-toggle"
+          id="pst-primary-sidebar-checkbox"/>
+  <label class="overlay overlay-primary" for="pst-primary-sidebar-checkbox"></label>
+  
+  <input type="checkbox"
+          class="sidebar-toggle"
+          id="pst-secondary-sidebar-checkbox"/>
+  <label class="overlay overlay-secondary" for="pst-secondary-sidebar-checkbox"></label>
+  
+  <div class="search-button__wrapper">
+    <div class="search-button__overlay"></div>
+    <div class="search-button__search-container">
+<form class="bd-search d-flex align-items-center"
+      action="../../search.html"
+      method="get">
+  <i class="fa-solid fa-magnifying-glass"></i>
+  <input type="search"
+         class="form-control"
+         name="q"
+         id="search-input"
+         placeholder="Search this book..."
+         aria-label="Search this book..."
+         autocomplete="off"
+         autocorrect="off"
+         autocapitalize="off"
+         spellcheck="false"/>
+  <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
+</form></div>
+  </div>
+
+  <div class="pst-async-banner-revealer d-none">
+  <aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
+</div>
+
+  
+    <header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
+    </header>
+  
+
+  <div class="bd-container">
+    <div class="bd-container__inner bd-page-width">
+      
+      
+      
+      <div class="bd-sidebar-primary bd-sidebar">
+        
+
+  
+  <div class="sidebar-header-items sidebar-primary__section">
+    
+    
+    
+    
+  </div>
+  
+    <div class="sidebar-primary-items__start sidebar-primary__section">
+        <div class="sidebar-primary-item">
+
+  
+    
+  
+
+<a class="navbar-brand logo" href="../../intro.html">
+  
+  
+  
+  
+  
+    
+    
+      
+    
+    
+    <img src="../../_static/PseudoLab_logo.png" class="logo__image only-light" alt="Text-to-Image Generation-feat-Diffusion - Home"/>
+    <script>document.write(`<img src="../../_static/PseudoLab_logo.png" class="logo__image only-dark" alt="Text-to-Image Generation-feat-Diffusion - Home"/>`);</script>
+  
+  
+</a></div>
+        <div class="sidebar-primary-item"><nav class="bd-links" id="bd-docs-nav" aria-label="Main">
+    <div class="bd-toc-item navbar-nav active">
+        
+        <ul class="nav bd-sidenav bd-sidenav__home-link">
+            <li class="toctree-l1">
+                <a class="reference internal" href="../../intro.html">
+                    Welcome to PseudoDiffusers!!
+                </a>
+            </li>
+        </ul>
+        <p aria-level="2" class="caption" role="heading"><span class="caption-text">Preliminary Works</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="vae.html">VAE</a></li>
+<li class="toctree-l1"><a class="reference internal" href="gan.html">GAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DDPM.html">DDPM</a></li>
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DDIM.html">DDIM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="A_Study_on_the_Evaluation_of_Generative_Models.html">A Study on the Evaluation of Generative Models</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Image Generation</span></p>
+<ul class="current nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="cycleGAN.html">CycleGAN</a></li>
+<li class="toctree-l1 current active"><a class="current reference internal" href="#">StyleGAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="diffusion_beats_GANs.html">Diffusion Models Beat GANs on Image Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="dalle.html">DALL-E</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DALLE2.html">DALL-E 2</a></li>
+<li class="toctree-l1"><a class="reference internal" href="dreambooth.html">DreamBooth</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ControlNet.html">ControlNet</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Latent_Diffusion_Model.html">Introduction</a></li>
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Textual_Inversion.html">Textual Inversion</a></li>
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="CustomDiffusion.html">Custom Diffusion</a></li>
+<li class="toctree-l1"><a class="reference internal" href="LoRA.html">LoRA</a></li>
+
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="I-DDPM.html">I-DDPM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="StyO.html">StyO</a></li>
+<li class="toctree-l1"><a class="reference internal" href="imagen.html">Imagen</a></li>
+<li class="toctree-l1"><a class="reference internal" href="imagen_editor.html">Imagen Editor</a></li>
+<li class="toctree-l1"><a class="reference internal" href="SDEdit.html">SDEdit</a></li>
+<li class="toctree-l1"><a class="reference internal" href="SDXL.html">SDXL</a></li>
+<li class="toctree-l1"><a class="reference internal" href="t2i_adapter.html">T2I-Adapter</a></li>
+<li class="toctree-l1"><a class="reference internal" href="IP_Adapter.html">IP-Adapter</a></li>
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="HyperDreamBooth.html">HyperDreamBooth</a></li>
+<li class="toctree-l1"><a class="reference internal" href="CM3leon.html">CM3leon</a></li>
+
+<li class="toctree-l1"><a class="reference internal" href="Synthetic_Data_from_Diffusion_Models_Improves_ImageNet_Classification.html">Synthetic Data from Diffusion Models Improves ImageNet Classification</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="GLIDE.html">GLIDE</a></li>
+<li class="toctree-l1"><a class="reference internal" href="BBDM.html">BBDM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Your_Diffusion_Model_is_Secretly_a_Zero_Shot_Classifier.html">Your Diffusion Model is Secretly a Zero-Shot Classifier</a></li>
+<li class="toctree-l1"><a class="reference internal" href="progressive_distillation.html">Progressive Distillation for Fast Sampling of Diffusion Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ConceptLab.html">ConceptLab</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Diffusion_models_already_have_a_Semantic_Latent_Space.html">Diffusion Models already have a Semantic Latent Space</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Muse.html">Muse</a></li>
+
+
+<li class="toctree-l1"><a class="reference internal" href="GIGAGAN.html">Scaling up GANs for Text-to-Image Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="consistency_models.html">Consistency Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="latent_consistency_models.html">Latent Consistency Models</a></li>
+
+<li class="toctree-l1"><a class="reference internal" href="LLM_grounded_Diffusion.html">LLM Grounded Diffusion</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DiT.html">DiT</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="one-step-image-translation.html">One-Step Image Translation with Text-to-Image Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="LCM-LoRA.html">LCM-LoRA: A Universal Stable-Diffusion Acceleration Module</a></li>
+
+
+<li class="toctree-l1"><a class="reference internal" href="MimicBrush.html">MimicBrush: Zero-shot Image Editing with Reference Imitation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="one_step_diffusion_with_distribution_matching_distillation.html">One-step Diffusion with Distribution Matching Distillation</a></li>
+
+
+
+
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Video Generation</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="Make_A_Video.html">Make A Video</a></li>
+<li class="toctree-l1"><a class="reference internal" href="VideoLDM.html">VideoLDM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="AnimateDiff.html">AnimateDiff</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Animate_Anyone.html">Animate Anyone</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreaMoving.html">DreaMoving</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreamPose.html">DreamPose: Fashion Image-to-Video Synthesis via Stable Diffusion</a></li>
+
+
+
+
+
+
+
+
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">3D Generation</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="NeRF.html">NeRF : Representing Scenes as Neural Radiance Fields for View Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="3DGS.html">3D Gaussian Splatting for Real-Time Radiance Field Rendering</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Point_E.html">Point-E: A System for Generating 3D Point Clouds from Complex Prompts (Arxiv 2022)</a></li>
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Shap-E.html">Shap-E</a></li>
+
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DreamFusion.html"><strong>DreamFusion</strong></a></li>
+<li class="toctree-l1"><a class="reference internal" href="magic-3d.html">Magic3D</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreamBooth3D.html">Dream Booth 3D</a></li>
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="zero123plus.html">Zero123++</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ProlificDreamer.html">ProlificDreamer: High-Fidelity and Diverse Text-to-3D Generation with Variational Score Distillation</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DreamGaussian.html">DreamGaussian</a></li>
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Coin3D.html">Coin3D</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Experiments</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="../experiments/js_exp.html">Synthetic Data with Stable Diffusion for Foliar Disease Classification</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../experiments/swjo_exp.html">Training DreamBooth on Naver Webtoon Face Dataset</a></li>
+</ul>
+
+    </div>
+</nav></div>
+    </div>
+  
+  
+  <div class="sidebar-primary-items__end sidebar-primary__section">
+  </div>
+  
+  <div id="rtd-footer-container"></div>
+
+
+      </div>
+      
+      <main id="main-content" class="bd-main" role="main">
+        
+        
+
+<div class="sbt-scroll-pixel-helper"></div>
+
+          <div class="bd-content">
+            <div class="bd-article-container">
+              
+              <div class="bd-header-article d-print-none">
+<div class="header-article-items header-article__inner">
+  
+    <div class="header-article-items__start">
+      
+        <div class="header-article-item"><label class="sidebar-toggle primary-toggle btn btn-sm" for="__primary" title="Toggle primary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
+  <span class="fa-solid fa-bars"></span>
+</label></div>
+      
+    </div>
+  
+  
+    <div class="header-article-items__end">
+      
+        <div class="header-article-item">
+
+<div class="article-header-buttons">
+
+
+
+
+
+<div class="dropdown dropdown-source-buttons">
+  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Source repositories">
+    <i class="fab fa-github"></i>
+  </button>
+  <ul class="dropdown-menu">
+      
+      
+      
+      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion" target="_blank"
+   class="btn btn-sm btn-source-repository-button dropdown-item"
+   title="Source repository"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fab fa-github"></i>
+  </span>
+<span class="btn__text-container">Repository</span>
+</a>
+</li>
+      
+      
+      
+      
+      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion/issues/new?title=Issue%20on%20page%20%2Fdocs/review/StyleGAN.html&body=Your%20issue%20content%20here." target="_blank"
+   class="btn btn-sm btn-source-issues-button dropdown-item"
+   title="Open an issue"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-lightbulb"></i>
+  </span>
+<span class="btn__text-container">Open issue</span>
+</a>
+</li>
+      
+  </ul>
+</div>
+
+
+
+
+
+
+<div class="dropdown dropdown-download-buttons">
+  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Download this page">
+    <i class="fas fa-download"></i>
+  </button>
+  <ul class="dropdown-menu">
+      
+      
+      
+      <li><a href="../../_sources/docs/review/StyleGAN.md" target="_blank"
+   class="btn btn-sm btn-download-source-button dropdown-item"
+   title="Download source file"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-file"></i>
+  </span>
+<span class="btn__text-container">.md</span>
+</a>
+</li>
+      
+      
+      
+      
+      <li>
+<button onclick="window.print()"
+  class="btn btn-sm btn-download-pdf-button dropdown-item"
+  title="Print to PDF"
+  data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-file-pdf"></i>
+  </span>
+<span class="btn__text-container">.pdf</span>
+</button>
+</li>
+      
+  </ul>
+</div>
+
+
+
+
+<button onclick="toggleFullScreen()"
+  class="btn btn-sm btn-fullscreen-button"
+  title="Fullscreen mode"
+  data-bs-placement="bottom" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-expand"></i>
+  </span>
+
+</button>
+
+
+
+<script>
+document.write(`
+  <button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light"></i>
+    <i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark"></i>
+    <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"></i>
+  </button>
+`);
+</script>
+
+
+<script>
+document.write(`
+  <button class="btn btn-sm pst-navbar-icon search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <i class="fa-solid fa-magnifying-glass fa-lg"></i>
+  </button>
+`);
+</script>
+<label class="sidebar-toggle secondary-toggle btn btn-sm" for="__secondary"title="Toggle secondary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <span class="fa-solid fa-list"></span>
+</label>
+</div></div>
+      
+    </div>
+  
+</div>
+</div>
+              
+              
+
+<div id="jb-print-docs-body" class="onlyprint">
+    <h1>StyleGAN</h1>
+    <!-- Table of contents -->
+    <div id="print-main-content">
+        <div id="jb-print-toc">
+            
+            <div>
+                <h2> Contents </h2>
+            </div>
+            <nav aria-label="Page">
+                <ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#mapping-network">Mapping Network</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#style-and-adain">Style and AdaIN</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#stochastic-variation">Stochastic Variation</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#mixing-regularization">Mixing Regularization</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#id1">실험 결과</a></li>
+</ul>
+            </nav>
+        </div>
+    </div>
+</div>
+
+              
+                
+<div id="searchbox"></div>
+                <article class="bd-article">
+                  
+  <div class="admonition-information admonition">
+<p class="admonition-title">Information</p>
+<ul class="simple">
+<li><p><strong>Title:</strong> A Style-Based Generator Architecture for Generative Adversarial Networks (CVPR 2019)</p></li>
+<li><p><strong>Reference</strong></p>
+<ul>
+<li><p>Paper: <a class="reference external" href="https://arxiv.org/abs/1812.04948">https://arxiv.org/abs/1812.04948</a></p></li>
+<li><p>Code: <a class="github reference external" href="https://github.com/huangzh13/StyleGAN.pytorch">huangzh13/StyleGAN.pytorch</a></p></li>
+</ul>
+</li>
+<li><p><strong>Author:</strong> Jisu Kim</p></li>
+<li><p><strong>Last updated on Apr. 12, 2023</strong></p></li>
+</ul>
+</div>
+<section class="tex2jax_ignore mathjax_ignore" id="stylegan">
+<h1>StyleGAN<a class="headerlink" href="#stylegan" title="Permalink to this heading">#</a></h1>
+<p>오늘 알아볼 모델은 StyleGAN입니다. 기존에 다뤘던 GAN과 같이 이미지를 생성하는 모델입니다. generator 구조를 변경함으로써 성능을 올리고 feature의 control이 가능하게 했습니다. loss나 discriminator 구조 개선에 관한 논문은 아닙니다. 먼저 결과를 보도록 하죠.</p>
+<figure class="align-default" id="id2">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/StyleGAN_fig1.png"><img alt="stylegan_01" class="bg-primary mb-1" src="../../_images/StyleGAN_fig1.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 62 </span><span class="caption-text">Images generated by StyleGAN</span><a class="headerlink" href="#id2" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>이 논문의 contribution은 다음과 같습니다.</p>
+<ol class="arabic simple">
+<li><p>새로운 구조를 제안하여 성능을 높이면서 feature의 control이 가능해졌습니다.</p></li>
+<li><p>새로운 데이터셋을 제안했습니다. (FFHQ)</p></li>
+</ol>
+<p>이 중에서 첫 번째 contribution을 자세히 보도록 하겠습니다. 논문의 abstract에는 다음과 같은 문장이 있습니다.</p>
+<blockquote>
+<div><p>The new architecture leads to an automatically learned, <strong>unsupervised separation of high-level attributes</strong> (e.g., pose and identity when trained on human faces) and stochastic variation in the generated images (e.g., freckles, hair), and it enables intuitive, scale-specific control of the synthesis.</p>
+</div></blockquote>
+<p>논문에서 제안한 새로운 generator 구조가 할 수 있는 일을 설명하는 부분입니다. 여기서 보시면 high level attribute의 separation이 가능하다고 얘기하고 있습니다. 저는 개인적으로 이 부분이 StyleGAN의 가장 중요한 특징이라고 생각합니다.</p>
+<p>생성 모델로 이미지를 생성하고자 할 때, 사용자는 어떠한 목적을 가지고 자신이 원하는 이미지를 만들고자 할 것입니다. 이미지의 품질이 좋더라도 모델이 사용자의 의도와 상관없는 랜덤한 이미지를 내뱉어준다면 그 모델의 실용성이 좋다고 할 수 없을 것입니다. 근래에 Text-to-Image 모델들이 인기를 얻었던 이유도 누구나 쉽게 텍스트를 통해서 생성되는 이미지를 조절할 수 있다는 점도 한몫했다고 생각합니다. StyleGAN은 그런 controllability를 어느 정도 가능하게 한 모델이라는 측면에서 의미있다고 생각합니다.</p>
+<p>StyleGAN의 구조는 아래 그림과 같습니다. synthesis network는 해상도를 4x4에서 시작해서 1024x1024까지 높여줍니다. 최종적으로 1024x1024 해상도를 가지는 이미지를 갖게됩니다. 아래 구조를 보면 기존 GAN하고 비교해서 특이한 점이 세 가지 있습니다.</p>
+<ol class="arabic simple">
+<li><p>z를 input으로 받는 mapping network</p></li>
+<li><p>style과 AdaIN</p></li>
+<li><p>noise와 B (stochastic variation)</p></li>
+</ol>
+<p>이 각각에 대해서 알아보도록 합시다.</p>
+<figure class="align-default" id="id3">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/StyleGAN_fig2.png"><img alt="stylegan_02" class="bg-primary mb-1" src="../../_images/StyleGAN_fig2.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 63 </span><span class="caption-text">Structure of StyleGAN</span><a class="headerlink" href="#id3" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<section id="mapping-network">
+<h2>Mapping Network<a class="headerlink" href="#mapping-network" title="Permalink to this heading">#</a></h2>
+<figure class="align-default" id="id4">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/StyleGAN_fig3.png"><img alt="stylegan_03" class="bg-primary mb-1" src="../../_images/StyleGAN_fig3.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 64 </span><span class="caption-text">Mappings with <span class="math notranslate nohighlight">\(w\)</span> and without <span class="math notranslate nohighlight">\(w\)</span></span><a class="headerlink" href="#id4" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>기존 GAN을 생각해보면 z를 input으로 받아서 generator를 거쳐서 이미지를 생성하는 구조입니다. 이 z는 보통 Gaussian distribution에서 샘플링으로 얻습니다. GAN은 학습을 통해 Gaussian distribution을 data distribution으로 보내는 방법을 배우게 될 것이고, 이 분포는 (b)처럼 생기게 될 것입니다. 그런데 데이터가 (a)처럼 주어져서 특정한 데이터가 없거나 적을 수도 있을 것입니다. 예를 들어, 데이터에 피부가 희면서 머리가 긴 샘플들이 없다고 해봅시다. 그러면 피부색과 머리 길이라는 두 feature는 서로 얽히게(entangled)되어, 하나를 바꿀 때 다른 하나도 같이 바뀌는 현상이 일어나게 됩니다. 이런 현상을 완화하기 위해 논문에서는 Gaussian에서 뽑은 z를 바로 사용하는 것이 아니라 mapping network를 통해 learnable distribution에서 뽑은 w를 사용합니다.</p>
+</section>
+<section id="style-and-adain">
+<h2>Style and AdaIN<a class="headerlink" href="#style-and-adain" title="Permalink to this heading">#</a></h2>
+<p>instance normalization은 샘플 하나의 각 채널마다 정규화를 취해주는 방법입니다.</p>
+<figure class="align-default" id="id5">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/StyleGAN_fig4.png"><img alt="stylegan_04" class="bg-primary mb-1" src="../../_images/StyleGAN_fig4.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 65 </span><span class="caption-text">Normalization methods</span><a class="headerlink" href="#id5" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>adaptive instance normalization (AdaIN) 은 instance normalization에 scale을 곱해주고 bias를 더해주는 형태입니다. 그런데 이 scale과 bias가 style vector의 linear transformation으로 주어지는 형태입니다. linear layer를 통해서 w는 <span class="math notranslate nohighlight">\(\mathbf{y}=(\mathbf{y}_{s},\mathbf{y}_{b})\)</span>로 보내지게 됩니다. AdaIN의 수식은 아래와 같습니다.</p>
+<div class="math notranslate nohighlight">
+\[
+AdaIN(\mathbf{x}_{i},\mathbf{y})=\mathbf{y}_{s,i}\frac{\mathbf{x}_{i}-\mu(\mathbf{x}_{i})}{\sigma(\mathbf{x}_{i})}+\mathbf{y}_{b,i}
+\]</div>
+<p>AdaIN은 각 블록마다 두 개씩 들어가서 style은 총 열여덟 번 AdaIN을 통해 generator에 들어가게 됩니다. AdaIN은 localization이라는 특징과도 연관이 있습니다. 여기서 말하는 localization이란 열여덟 개의 style 중에서 일부를 바꿈으로써 이미지의 일부 특징들을 바꿀 수 있다는 의미입니다. AdaIN은 각 convolution layer 다음에 적용이 됩니다. 이 때 feature map들은 normalization되고 style에 의해 새로운 statistics를 가지게 됩니다. style은 하나의 convolution에 적용되고, 다음 convolution에서 다시 normalization이 수행되기 때문에 이전 layer에 적용된 style과 다음 layer에 적용된 style이 분리되게 학습될 수 있습니다.</p>
+<p>관련 코드</p>
+<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="k">class</span><span class="w"> </span><span class="nc">StyleMod</span><span class="p">(</span><span class="n">nn</span><span class="o">.</span><span class="n">Module</span><span class="p">):</span>
+    <span class="k">def</span><span class="w"> </span><span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">latent_size</span><span class="p">,</span> <span class="n">channels</span><span class="p">,</span> <span class="n">use_wscale</span><span class="p">):</span>
+        <span class="nb">super</span><span class="p">(</span><span class="n">StyleMod</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span>
+        <span class="bp">self</span><span class="o">.</span><span class="n">lin</span> <span class="o">=</span> <span class="n">EqualizedLinear</span><span class="p">(</span><span class="n">latent_size</span><span class="p">,</span>
+                                   <span class="n">channels</span> <span class="o">*</span> <span class="mi">2</span><span class="p">,</span>
+                                   <span class="n">gain</span><span class="o">=</span><span class="mf">1.0</span><span class="p">,</span> <span class="n">use_wscale</span><span class="o">=</span><span class="n">use_wscale</span><span class="p">)</span>
+
+    <span class="k">def</span><span class="w"> </span><span class="nf">forward</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">x</span><span class="p">,</span> <span class="n">latent</span><span class="p">):</span>
+        <span class="n">style</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">lin</span><span class="p">(</span><span class="n">latent</span><span class="p">)</span>  <span class="c1"># style =&gt; [batch_size, n_channels*2]</span>
+
+        <span class="n">shape</span> <span class="o">=</span> <span class="p">[</span><span class="o">-</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="n">x</span><span class="o">.</span><span class="n">size</span><span class="p">(</span><span class="mi">1</span><span class="p">)]</span> <span class="o">+</span> <span class="p">(</span><span class="n">x</span><span class="o">.</span><span class="n">dim</span><span class="p">()</span> <span class="o">-</span> <span class="mi">2</span><span class="p">)</span> <span class="o">*</span> <span class="p">[</span><span class="mi">1</span><span class="p">]</span>
+        <span class="n">style</span> <span class="o">=</span> <span class="n">style</span><span class="o">.</span><span class="n">view</span><span class="p">(</span><span class="n">shape</span><span class="p">)</span>  <span class="c1"># [batch_size, 2, n_channels, ...]</span>
+        <span class="n">x</span> <span class="o">=</span> <span class="n">x</span> <span class="o">*</span> <span class="p">(</span><span class="n">style</span><span class="p">[:,</span> <span class="mi">0</span><span class="p">]</span> <span class="o">+</span> <span class="mf">1.</span><span class="p">)</span> <span class="o">+</span> <span class="n">style</span><span class="p">[:,</span> <span class="mi">1</span><span class="p">]</span>
+        <span class="k">return</span> <span class="n">x</span>
+
+<span class="k">class</span><span class="w"> </span><span class="nc">LayerEpilogue</span><span class="p">(</span><span class="n">nn</span><span class="o">.</span><span class="n">Module</span><span class="p">):</span>
+<span class="w">    </span><span class="sd">&quot;&quot;&quot;Things to do at the end of each layer.&quot;&quot;&quot;</span>
+
+    <span class="k">def</span><span class="w"> </span><span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">channels</span><span class="p">,</span> <span class="n">dlatent_size</span><span class="p">,</span> <span class="n">use_wscale</span><span class="p">,</span>
+                 <span class="n">use_noise</span><span class="p">,</span> <span class="n">use_pixel_norm</span><span class="p">,</span> <span class="n">use_instance_norm</span><span class="p">,</span> <span class="n">use_styles</span><span class="p">,</span> <span class="n">activation_layer</span><span class="p">):</span>
+        <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span>
+
+        <span class="n">layers</span> <span class="o">=</span> <span class="p">[]</span>
+        <span class="k">if</span> <span class="n">use_noise</span><span class="p">:</span>
+            <span class="n">layers</span><span class="o">.</span><span class="n">append</span><span class="p">((</span><span class="s1">&#39;noise&#39;</span><span class="p">,</span> <span class="n">NoiseLayer</span><span class="p">(</span><span class="n">channels</span><span class="p">)))</span>
+        <span class="n">layers</span><span class="o">.</span><span class="n">append</span><span class="p">((</span><span class="s1">&#39;activation&#39;</span><span class="p">,</span> <span class="n">activation_layer</span><span class="p">))</span>
+        <span class="k">if</span> <span class="n">use_pixel_norm</span><span class="p">:</span>
+            <span class="n">layers</span><span class="o">.</span><span class="n">append</span><span class="p">((</span><span class="s1">&#39;pixel_norm&#39;</span><span class="p">,</span> <span class="n">PixelNormLayer</span><span class="p">()))</span>
+        <span class="k">if</span> <span class="n">use_instance_norm</span><span class="p">:</span>
+            <span class="n">layers</span><span class="o">.</span><span class="n">append</span><span class="p">((</span><span class="s1">&#39;instance_norm&#39;</span><span class="p">,</span> <span class="n">nn</span><span class="o">.</span><span class="n">InstanceNorm2d</span><span class="p">(</span><span class="n">channels</span><span class="p">)))</span>
+
+        <span class="bp">self</span><span class="o">.</span><span class="n">top_epi</span> <span class="o">=</span> <span class="n">nn</span><span class="o">.</span><span class="n">Sequential</span><span class="p">(</span><span class="n">OrderedDict</span><span class="p">(</span><span class="n">layers</span><span class="p">))</span>
+
+        <span class="k">if</span> <span class="n">use_styles</span><span class="p">:</span>
+            <span class="bp">self</span><span class="o">.</span><span class="n">style_mod</span> <span class="o">=</span> <span class="n">StyleMod</span><span class="p">(</span><span class="n">dlatent_size</span><span class="p">,</span> <span class="n">channels</span><span class="p">,</span> <span class="n">use_wscale</span><span class="o">=</span><span class="n">use_wscale</span><span class="p">)</span>
+        <span class="k">else</span><span class="p">:</span>
+            <span class="bp">self</span><span class="o">.</span><span class="n">style_mod</span> <span class="o">=</span> <span class="kc">None</span>
+
+    <span class="k">def</span><span class="w"> </span><span class="nf">forward</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">x</span><span class="p">,</span> <span class="n">dlatents_in_slice</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
+        <span class="n">x</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">top_epi</span><span class="p">(</span><span class="n">x</span><span class="p">)</span>
+        <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">style_mod</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
+            <span class="n">x</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">style_mod</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="n">dlatents_in_slice</span><span class="p">)</span>
+        <span class="k">else</span><span class="p">:</span>
+            <span class="k">assert</span> <span class="n">dlatents_in_slice</span> <span class="ow">is</span> <span class="kc">None</span>
+        <span class="k">return</span> <span class="n">x</span>
+</pre></div>
+</div>
+<p>code from <a class="github reference external" href="https://github.com/huangzh13/StyleGAN.pytorch">huangzh13/StyleGAN.pytorch</a></p>
+<p>아래 그림은 source A의 style 중 일부를 source B의 style로 변경해서 만든 이미지들입니다. style은 총 18곳에서 사용되는데 처음 4곳 (<span class="math notranslate nohighlight">\(4^2 - 8^2\)</span>)을 coarse, 그다음 4곳 (<span class="math notranslate nohighlight">\(16^2-32^2\)</span>)을 middle, 마지막 10곳 (<span class="math notranslate nohighlight">\(64^2-1024^2\)</span>)을 fine style로 정의하였습니다. 그림을 보시면 윗 부분에서는 포즈나 전체적인 머리 스타일같이 coarse style은 source B의 것을 유지하고, 아래로 갈수록 source A의 큰 틀을 유지하면서 세부적인 부분들을 B에서 가져왔음을 볼 수 있습니다.</p>
+<figure class="align-default" id="id6">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/StyleGAN_fig5.png"><img alt="stylegan_05" class="bg-primary mb-1" src="../../_images/StyleGAN_fig5.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 66 </span><span class="caption-text">Mixing two styles</span><a class="headerlink" href="#id6" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</section>
+<section id="stochastic-variation">
+<h2>Stochastic Variation<a class="headerlink" href="#stochastic-variation" title="Permalink to this heading">#</a></h2>
+<p>한 사람의 이미지 안에는 확률적으로 바뀔 수 있는 부분이 있습니다. (주근깨, 머릿결, 피부) 이를 모델링하기 위해서 noise를 추가적인 input으로 사용하여 각 convolution layer 다음에 더해집니다. 아래 그림에서 (a)의 생성된 한 사람의 이미지 안에서도 디테일들은 (b)와 같이 달라질 수 있습니다. (c)와 같이 standard deviation을 구해봤을 때 얼굴형과 같은 attribute는 변하지않지만 noise에 의해서 머리카락과 같은 부분은 variation이 생김을 볼 수 있습니다.</p>
+<figure class="align-default" id="id7">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/StyleGAN_fig6.png"><img alt="stylegan_06" class="bg-primary mb-1" src="../../_images/StyleGAN_fig6.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 67 </span><span class="caption-text">Examples of stochastic variation</span><a class="headerlink" href="#id7" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>아래 그림에서 (a)는 모든 layer에 noise를 준 경우, (b)는 noise를 주지 않은 경우, (c)는 fine layers (<span class="math notranslate nohighlight">\(64^2 - 1024^2\)</span>)에만 noise를 준 경우, (d)는 coarse layers (<span class="math notranslate nohighlight">\(4^2 - 32^2\)</span>)에만 noise를 준 경우입니다. (b)를 보면 noise가 없을 경우 머리카락같은 디테일이 제대로 살아있지 않은 것을 볼 수 있습니다. (c)와 (d)를 보면 fine layers에 들어간 noise가 머리카락의 더 세밀한 부분에 영향을 끼친다는 것을 볼 수 있습니다.</p>
+<figure class="align-default" id="id8">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/StyleGAN_fig7.png"><img alt="stylegan_07" class="bg-primary mb-1" src="../../_images/StyleGAN_fig7.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 68 </span><span class="caption-text">Effect of noise inputs at different layers</span><a class="headerlink" href="#id8" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</section>
+<section id="mixing-regularization">
+<h2>Mixing Regularization<a class="headerlink" href="#mixing-regularization" title="Permalink to this heading">#</a></h2>
+<p>논문에서는 localization이 더 잘 되게하기 위해 style mixing이라는 방법을 훈련에 사용합니다. 두 개의 style vector <span class="math notranslate nohighlight">\(\mathbf{w}_{1},\mathbf{w}_{2}\)</span>를 사용하여 앞 쪽 layer에는 <span class="math notranslate nohighlight">\(\mathbf{w}_{1}\)</span>을, 뒤 쪽 layer에는 <span class="math notranslate nohighlight">\(\mathbf{w}_{2}\)</span>를 사용하는 방법입니다. 이는 generator가 인접한 style끼리 correlated되어있다고 학습하는 것을 막아서 localization을 더 잘 되게 하는 목적입니다.</p>
+</section>
+<section id="id1">
+<h2>실험 결과<a class="headerlink" href="#id1" title="Permalink to this heading">#</a></h2>
+<p>마지막으로 저자들이 제안한 방법들이 실제로 효과가 있었는지 확인해봅시다. 아래 표와 같이 실험적으로 보았을 때 저자들이 제안한 방법들을 모두 사용한 경우 FID가 가장 우수하게 나왔습니다.</p>
+<figure class="align-default" id="id9">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/StyleGAN_fig8.png"><img alt="stylegan_08" class="bg-primary mb-1" src="../../_images/StyleGAN_fig8.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 69 </span><span class="caption-text">FID for various generator designs</span><a class="headerlink" href="#id9" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</section>
+</section>
+
+    <script type="text/x-thebe-config">
+    {
+        requestKernel: true,
+        binderOptions: {
+            repo: "binder-examples/jupyter-stacks-datascience",
+            ref: "master",
+        },
+        codeMirrorConfig: {
+            theme: "abcdef",
+            mode: "python"
+        },
+        kernelOptions: {
+            name: "python3",
+            path: "./docs\review"
+        },
+        predefinedOutput: true
+    }
+    </script>
+    <script>kernelName = 'python3'</script>
+
+                </article>
+              
+
+              
+              
+              
+              
+                <footer class="prev-next-footer d-print-none">
+                  
+<div class="prev-next-area">
+    <a class="left-prev"
+       href="cycleGAN.html"
+       title="previous page">
+      <i class="fa-solid fa-angle-left"></i>
+      <div class="prev-next-info">
+        <p class="prev-next-subtitle">previous</p>
+        <p class="prev-next-title">CycleGAN</p>
+      </div>
+    </a>
+    <a class="right-next"
+       href="diffusion_beats_GANs.html"
+       title="next page">
+      <div class="prev-next-info">
+        <p class="prev-next-subtitle">next</p>
+        <p class="prev-next-title">Diffusion Models Beat GANs on Image Synthesis</p>
+      </div>
+      <i class="fa-solid fa-angle-right"></i>
+    </a>
+</div>
+                </footer>
+              
+            </div>
+            
+            
+              
+                <div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
+
+
+  <div class="sidebar-secondary-item">
+  <div class="page-toc tocsection onthispage">
+    <i class="fa-solid fa-list"></i> Contents
+  </div>
+  <nav class="bd-toc-nav page-toc">
+    <ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#mapping-network">Mapping Network</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#style-and-adain">Style and AdaIN</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#stochastic-variation">Stochastic Variation</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#mixing-regularization">Mixing Regularization</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#id1">실험 결과</a></li>
+</ul>
+  </nav></div>
+
+</div></div>
+              
+            
+          </div>
+          <footer class="bd-footer-content">
+            
+<div class="bd-footer-content__inner container">
+  
+  <div class="footer-item">
+    
+<p class="component-author">
+By PseudoLab
+</p>
+
+  </div>
+  
+  <div class="footer-item">
+    
+
+  <p class="copyright">
+    
+      © Copyright 2022.
+      <br/>
+    
+  </p>
+
+  </div>
+  
+  <div class="footer-item">
+    
+  </div>
+  
+  <div class="footer-item">
+    
+  </div>
+  
+</div>
+          </footer>
+        
+
+      </main>
+    </div>
+  </div>
+  
+  <!-- Scripts loaded after <body> so the DOM is not blocked -->
+  <script src="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b"></script>
+<script src="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b"></script>
+
+  <footer class="bd-footer">
+  </footer>
+  </body>
 </html>
\ No newline at end of file
diff --git a/docs/review/Synthetic_Data_from_Diffusion_Models_Improves_ImageNet_Classification.html b/docs/review/Synthetic_Data_from_Diffusion_Models_Improves_ImageNet_Classification.html
old mode 100644
new mode 100755
index 5f312a7b..9945fb97
--- a/docs/review/Synthetic_Data_from_Diffusion_Models_Improves_ImageNet_Classification.html
+++ b/docs/review/Synthetic_Data_from_Diffusion_Models_Improves_ImageNet_Classification.html
@@ -1,912 +1,932 @@
-
-
-<!DOCTYPE html>
-
-
-<html lang="en" data-content_root="" >
-
-  <head>
-    <meta charset="utf-8" />
-    <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.18.1: http://docutils.sourceforge.net/" />
-
-    <title>Synthetic Data from Diffusion Models Improves ImageNet Classification &#8212; Text-to-Image Generation-feat-Diffusion</title>
-  
-  
-  
-  <script data-cfasync="false">
-    document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
-    document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
-  </script>
-  
-  <!-- Loaded before other Sphinx assets -->
-  <link href="../../_static/styles/theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-<link href="../../_static/styles/bootstrap.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-<link href="../../_static/styles/pydata-sphinx-theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-
-  
-  <link href="../../_static/vendor/fontawesome/6.5.2/css/all.min.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-  <link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.woff2" />
-<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.woff2" />
-<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.woff2" />
-
-    <link rel="stylesheet" type="text/css" href="../../_static/pygments.css" />
-    <link rel="stylesheet" href="../../_static/styles/sphinx-book-theme.css?digest=14f4ca6b54d191a8c7657f6c759bf11a5fb86285" type="text/css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/togglebutton.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/mystnb.4510f1fc1dee50b3e5859aac5469c37c29e427902b24a333a5f9fcb2f0b3ac41.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/sphinx-thebe.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/design-style.4045f2051d55cab465a707391d5b2007.min.css" />
-  
-  <!-- Pre-loaded scripts that we'll load fully later -->
-  <link rel="preload" as="script" href="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b" />
-<link rel="preload" as="script" href="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b" />
-  <script src="../../_static/vendor/fontawesome/6.5.2/js/all.min.js?digest=dfe6caa3a7d634c4db9b"></script>
-
-    <script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
-    <script src="../../_static/jquery.js"></script>
-    <script src="../../_static/underscore.js"></script>
-    <script src="../../_static/_sphinx_javascript_frameworks_compat.js"></script>
-    <script src="../../_static/doctools.js"></script>
-    <script src="../../_static/clipboard.min.js"></script>
-    <script src="../../_static/copybutton.js"></script>
-    <script src="../../_static/scripts/sphinx-book-theme.js?digest=5a5c038af52cf7bc1a1ec88eea08e6366ee68824"></script>
-    <script>let toggleHintShow = 'Click to show';</script>
-    <script>let toggleHintHide = 'Click to hide';</script>
-    <script>let toggleOpenOnPrint = 'true';</script>
-    <script src="../../_static/togglebutton.js"></script>
-    <script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
-    <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
-    <script src="../../_static/design-tabs.js"></script>
-    <script>const THEBE_JS_URL = "https://unpkg.com/thebe@0.8.2/lib/index.js"
-const thebe_selector = ".thebe,.cell"
-const thebe_selector_input = "pre"
-const thebe_selector_output = ".output, .cell_output"
-</script>
-    <script async="async" src="../../_static/sphinx-thebe.js"></script>
-    <script>DOCUMENTATION_OPTIONS.pagename = 'docs/review/Synthetic_Data_from_Diffusion_Models_Improves_ImageNet_Classification';</script>
-    <link rel="shortcut icon" href="../../_static/PseudoLab_logo.png"/>
-    <link rel="index" title="Index" href="../../genindex.html" />
-    <link rel="search" title="Search" href="../../search.html" />
-    <link rel="next" title="GLIDE" href="GLIDE.html" />
-    <link rel="prev" title="CM3leon" href="CM3leon.html" />
-  <meta name="viewport" content="width=device-width, initial-scale=1"/>
-  <meta name="docsearch:language" content="en"/>
-  </head>
-  
-  
-  <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
-
-  
-  
-  <div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
-  
-  <div id="pst-scroll-pixel-helper"></div>
-  
-  <button type="button" class="btn rounded-pill" id="pst-back-to-top">
-    <i class="fa-solid fa-arrow-up"></i>Back to top</button>
-
-  
-  <input type="checkbox"
-          class="sidebar-toggle"
-          id="pst-primary-sidebar-checkbox"/>
-  <label class="overlay overlay-primary" for="pst-primary-sidebar-checkbox"></label>
-  
-  <input type="checkbox"
-          class="sidebar-toggle"
-          id="pst-secondary-sidebar-checkbox"/>
-  <label class="overlay overlay-secondary" for="pst-secondary-sidebar-checkbox"></label>
-  
-  <div class="search-button__wrapper">
-    <div class="search-button__overlay"></div>
-    <div class="search-button__search-container">
-<form class="bd-search d-flex align-items-center"
-      action="../../search.html"
-      method="get">
-  <i class="fa-solid fa-magnifying-glass"></i>
-  <input type="search"
-         class="form-control"
-         name="q"
-         id="search-input"
-         placeholder="Search this book..."
-         aria-label="Search this book..."
-         autocomplete="off"
-         autocorrect="off"
-         autocapitalize="off"
-         spellcheck="false"/>
-  <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
-</form></div>
-  </div>
-
-  <div class="pst-async-banner-revealer d-none">
-  <aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
-</div>
-
-  
-    <header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
-    </header>
-  
-
-  <div class="bd-container">
-    <div class="bd-container__inner bd-page-width">
-      
-      
-      
-      <div class="bd-sidebar-primary bd-sidebar">
-        
-
-  
-  <div class="sidebar-header-items sidebar-primary__section">
-    
-    
-    
-    
-  </div>
-  
-    <div class="sidebar-primary-items__start sidebar-primary__section">
-        <div class="sidebar-primary-item">
-
-  
-    
-  
-
-<a class="navbar-brand logo" href="../../intro.html">
-  
-  
-  
-  
-  
-    
-    
-      
-    
-    
-    <img src="../../_static/PseudoLab_logo.png" class="logo__image only-light" alt="Text-to-Image Generation-feat-Diffusion - Home"/>
-    <script>document.write(`<img src="../../_static/PseudoLab_logo.png" class="logo__image only-dark" alt="Text-to-Image Generation-feat-Diffusion - Home"/>`);</script>
-  
-  
-</a></div>
-        <div class="sidebar-primary-item"><nav class="bd-links" id="bd-docs-nav" aria-label="Main">
-    <div class="bd-toc-item navbar-nav active">
-        
-        <ul class="nav bd-sidenav bd-sidenav__home-link">
-            <li class="toctree-l1">
-                <a class="reference internal" href="../../intro.html">
-                    Welcome to PseudoDiffusers!!
-                </a>
-            </li>
-        </ul>
-        <p aria-level="2" class="caption" role="heading"><span class="caption-text">Preliminary Works</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="vae.html">VAE</a></li>
-<li class="toctree-l1"><a class="reference internal" href="gan.html">GAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DDPM.html">DDPM</a></li>
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="DDIM.html">DDIM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="A_Study_on_the_Evaluation_of_Generative_Models.html">A Study on the Evaluation of Generative Models</a></li>
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Image Generation</span></p>
-<ul class="current nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="cycleGAN.html">CycleGAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="StyleGAN.html">StyleGAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="diffusion_beats_GANs.html">Diffusion Models Beat GANs on Image Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="dalle.html">DALL-E</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DALLE2.html">DALL-E 2</a></li>
-<li class="toctree-l1"><a class="reference internal" href="dreambooth.html">DreamBooth</a></li>
-<li class="toctree-l1"><a class="reference internal" href="ControlNet.html">ControlNet</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Latent_Diffusion_Model.html">Introduction</a></li>
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="Textual_Inversion.html">Textual Inversion</a></li>
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="CustomDiffusion.html">Custom Diffusion</a></li>
-<li class="toctree-l1"><a class="reference internal" href="LoRA.html">LoRA</a></li>
-
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="I-DDPM.html">I-DDPM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="StyO.html">StyO</a></li>
-<li class="toctree-l1"><a class="reference internal" href="imagen.html">Imagen</a></li>
-<li class="toctree-l1"><a class="reference internal" href="imagen_editor.html">Imagen Editor</a></li>
-<li class="toctree-l1"><a class="reference internal" href="SDEdit.html">SDEdit</a></li>
-<li class="toctree-l1"><a class="reference internal" href="SDXL.html">SDXL</a></li>
-<li class="toctree-l1"><a class="reference internal" href="t2i_adapter.html">T2I-Adapter</a></li>
-<li class="toctree-l1"><a class="reference internal" href="IP_Adapter.html">IP-Adapter</a></li>
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="HyperDreamBooth.html">HyperDreamBooth</a></li>
-<li class="toctree-l1"><a class="reference internal" href="CM3leon.html">CM3leon</a></li>
-
-<li class="toctree-l1 current active"><a class="current reference internal" href="#">Synthetic Data from Diffusion Models Improves ImageNet Classification</a></li>
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="GLIDE.html">GLIDE</a></li>
-<li class="toctree-l1"><a class="reference internal" href="BBDM.html">BBDM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Your_Diffusion_Model_is_Secretly_a_Zero_Shot_Classifier.html">Your Diffusion Model is Secretly a Zero-Shot Classifier</a></li>
-<li class="toctree-l1"><a class="reference internal" href="progressive_distillation.html">Progressive Distillation for Fast Sampling of Diffusion Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="ConceptLab.html">ConceptLab</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Diffusion_models_already_have_a_Semantic_Latent_Space.html">Diffusion Models already have a Semantic Latent Space</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Muse.html">Muse</a></li>
-
-
-<li class="toctree-l1"><a class="reference internal" href="GIGAGAN.html">Scaling up GANs for Text-to-Image Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="consistency_models.html">Consistency Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="latent_consistency_models.html">Latent Consistency Models</a></li>
-
-<li class="toctree-l1"><a class="reference internal" href="LLM_grounded_Diffusion.html">LLM Grounded Diffusion</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DiT.html">DiT</a></li>
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="one-step-image-translation.html">One-Step Image Translation with Text-to-Image Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="LCM-LoRA.html">LCM-LoRA: A Universal Stable-Diffusion Acceleration Module</a></li>
-
-
-<li class="toctree-l1"><a class="reference internal" href="MimicBrush.html">MimicBrush: Zero-shot Image Editing with Reference Imitation</a></li>
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Video Generation</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="Make_A_Video.html">Make A Video</a></li>
-<li class="toctree-l1"><a class="reference internal" href="VideoLDM.html">VideoLDM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="AnimateDiff.html">AnimateDiff</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Animate_Anyone.html">Animate Anyone</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreaMoving.html">DreaMoving</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreamPose.html">DreamPose: Fashion Image-to-Video Synthesis via Stable Diffusion</a></li>
-
-
-
-
-
-
-
-
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">3D Generation</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="NeRF.html">NeRF : Representing Scenes as Neural Radiance Fields for View Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="3DGS.html">3D Gaussian Splatting for Real-Time Radiance Field Rendering</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Point_E.html">Point-E: A System for Generating 3D Point Clouds from Complex Prompts (Arxiv 2022)</a></li>
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="Shap-E.html">Shap-E</a></li>
-
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="DreamFusion.html"><strong>DreamFusion</strong></a></li>
-<li class="toctree-l1"><a class="reference internal" href="magic-3d.html">Magic3D</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreamBooth3D.html">Dream Booth 3D</a></li>
-
-
-
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Experiments</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="../experiments/js_exp.html">Synthetic Data with Stable Diffusion for Foliar Disease Classification</a></li>
-<li class="toctree-l1"><a class="reference internal" href="../experiments/swjo_exp.html">Training DreamBooth on Naver Webtoon Face Dataset</a></li>
-</ul>
-
-    </div>
-</nav></div>
-    </div>
-  
-  
-  <div class="sidebar-primary-items__end sidebar-primary__section">
-  </div>
-  
-  <div id="rtd-footer-container"></div>
-
-
-      </div>
-      
-      <main id="main-content" class="bd-main" role="main">
-        
-        
-
-<div class="sbt-scroll-pixel-helper"></div>
-
-          <div class="bd-content">
-            <div class="bd-article-container">
-              
-              <div class="bd-header-article d-print-none">
-<div class="header-article-items header-article__inner">
-  
-    <div class="header-article-items__start">
-      
-        <div class="header-article-item"><label class="sidebar-toggle primary-toggle btn btn-sm" for="__primary" title="Toggle primary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
-  <span class="fa-solid fa-bars"></span>
-</label></div>
-      
-    </div>
-  
-  
-    <div class="header-article-items__end">
-      
-        <div class="header-article-item">
-
-<div class="article-header-buttons">
-
-
-
-
-
-<div class="dropdown dropdown-source-buttons">
-  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Source repositories">
-    <i class="fab fa-github"></i>
-  </button>
-  <ul class="dropdown-menu">
-      
-      
-      
-      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion" target="_blank"
-   class="btn btn-sm btn-source-repository-button dropdown-item"
-   title="Source repository"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fab fa-github"></i>
-  </span>
-<span class="btn__text-container">Repository</span>
-</a>
-</li>
-      
-      
-      
-      
-      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion/issues/new?title=Issue%20on%20page%20%2Fdocs/review/Synthetic_Data_from_Diffusion_Models_Improves_ImageNet_Classification.html&body=Your%20issue%20content%20here." target="_blank"
-   class="btn btn-sm btn-source-issues-button dropdown-item"
-   title="Open an issue"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-lightbulb"></i>
-  </span>
-<span class="btn__text-container">Open issue</span>
-</a>
-</li>
-      
-  </ul>
-</div>
-
-
-
-
-
-
-<div class="dropdown dropdown-download-buttons">
-  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Download this page">
-    <i class="fas fa-download"></i>
-  </button>
-  <ul class="dropdown-menu">
-      
-      
-      
-      <li><a href="../../_sources/docs/review/Synthetic_Data_from_Diffusion_Models_Improves_ImageNet_Classification.md" target="_blank"
-   class="btn btn-sm btn-download-source-button dropdown-item"
-   title="Download source file"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-file"></i>
-  </span>
-<span class="btn__text-container">.md</span>
-</a>
-</li>
-      
-      
-      
-      
-      <li>
-<button onclick="window.print()"
-  class="btn btn-sm btn-download-pdf-button dropdown-item"
-  title="Print to PDF"
-  data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-file-pdf"></i>
-  </span>
-<span class="btn__text-container">.pdf</span>
-</button>
-</li>
-      
-  </ul>
-</div>
-
-
-
-
-<button onclick="toggleFullScreen()"
-  class="btn btn-sm btn-fullscreen-button"
-  title="Fullscreen mode"
-  data-bs-placement="bottom" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-expand"></i>
-  </span>
-
-</button>
-
-
-
-<script>
-document.write(`
-  <button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light"></i>
-    <i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark"></i>
-    <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"></i>
-  </button>
-`);
-</script>
-
-
-<script>
-document.write(`
-  <button class="btn btn-sm pst-navbar-icon search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <i class="fa-solid fa-magnifying-glass fa-lg"></i>
-  </button>
-`);
-</script>
-<label class="sidebar-toggle secondary-toggle btn btn-sm" for="__secondary"title="Toggle secondary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <span class="fa-solid fa-list"></span>
-</label>
-</div></div>
-      
-    </div>
-  
-</div>
-</div>
-              
-              
-
-<div id="jb-print-docs-body" class="onlyprint">
-    <h1>Synthetic Data from Diffusion Models Improves ImageNet Classification</h1>
-    <!-- Table of contents -->
-    <div id="print-main-content">
-        <div id="jb-print-toc">
-            
-            <div>
-                <h2> Contents </h2>
-            </div>
-            <nav aria-label="Page">
-                <ul class="visible nav section-nav flex-column">
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#">Synthetic Data from Diffusion Models Improves ImageNet Classification</a></li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">1. Introduction</a></li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#related-work">2. Related Work</a></li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#background">3. Background</a></li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#generative-model-training-and-sampling">4. Generative Model Training and Sampling</a><ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#imagen-fine-tuning">4.1. Imagen Fine-tuning</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#sampling-parameters">4.2. Sampling Parameters</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#generation-protocol">4.3. Generation Protocol</a></li>
-</ul>
-</li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#result">5. Result</a><ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#sample-quality-fid-and-is">5-1. Sample Quality: FID and IS</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#classification-accuracy-score">5.2. Classification Accuracy Score</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#classification-accuracy-with-different-models">5.3. Classification Accuracy with Different Models</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#merging-real-and-synthetic-data-at-scale">5.4. Merging Real and Synthetic Data at Scale</a></li>
-</ul>
-</li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#conclusion">6. Conclusion</a></li>
-</ul>
-
-            </nav>
-        </div>
-    </div>
-</div>
-
-              
-                
-<div id="searchbox"></div>
-                <article class="bd-article">
-                  
-  <div class="admonition-information admonition">
-<p class="admonition-title">Information</p>
-<ul class="simple">
-<li><p><strong>Title:</strong> Synthetic Data from Diffusion Models Improves ImageNet Classification</p></li>
-<li><p><strong>Reference</strong></p>
-<ul>
-<li><p>Paper:  <a class="reference external" href="https://arxiv.org/abs/2303.03231">https://arxiv.org/abs/2304.08466</a></p></li>
-</ul>
-</li>
-<li><p><strong>Author:</strong> <a class="reference external" href="https://www.linkedin.com/in/jeonghwa-yoo-8403a716b">Jeonghwa Yoo</a></p></li>
-<li><p><strong>Last updated on Oct. 25, 2023</strong></p></li>
-</ul>
-</div>
-<section class="tex2jax_ignore mathjax_ignore" id="synthetic-data-from-diffusion-models-improves-imagenet-classification">
-<h1>Synthetic Data from Diffusion Models Improves ImageNet Classification<a class="headerlink" href="#synthetic-data-from-diffusion-models-improves-imagenet-classification" title="Permalink to this heading">#</a></h1>
-<p>이번에 리뷰할 논문은 구글 리서치 그룹에서 TMLR(Transactions on Machine Learning Research) 2023에 제출한 논문인 <a class="reference external" href="https://arxiv.org/abs/2304.08466">Synthetic Data from Diffusion Models Improves ImageNet Classification</a>입니다.</p>
-<p>생성 모델이 놀라운 속도로 발전하고 있는데요! 해당 논문에서는 생성 모델의 수준이 얼만큼 왔는지, 복잡한 이미지 데이터인 ImageNet 데이터에 대해서도 충분한 퀄리티의 데이터를 생성할 수 있는 정도가 되었는지, 그래서 이 생성된 데이터를 augment된 데이터로 사용할 수 있는 정도까지 왔는지에 대한 실험과 답을 제시합니다. 이 글의 목차는 논문 내용과 동일하게 구성하였습니다.</p>
-<aside>
-💡 핵심 요약 
-<ul class="simple">
-<li><p>Classification task에서 생성 모델을 데이터 augmentation으로 사용해 분류 성능을 개선 시킴</p></li>
-<li><p>Large-scale text-to-image diffusion 모델을 fine-tuning하여 FID와 Inception Score, Classification Accuracy Score에서 SOTA를 달성</p></li>
-<li><p>ImageNet에 대해 fine-tuning된 Imagen 모델을 사용함</p></li>
-<li><p>Diffusion으로 만든 합성 데이터를 학습에 사용하였을 경우 ResNet 및 Vision Transformer의 분류 성능이 크게 향상 됨</p></li>
-</ul>
-</aside>
-<p>본 논문에서는 기술적으로 엄청 새로운 내용은 없는데요! 다만 보통 사전학습된 text-to-image diffusion 모델을 사용하던 기존 방법들과는 달리 Imagen을 ImageNet에 대해 파인튜닝 했다는 것이 새롭습니다.</p>
-</section>
-<section class="tex2jax_ignore mathjax_ignore" id="introduction">
-<h1>1. Introduction<a class="headerlink" href="#introduction" title="Permalink to this heading">#</a></h1>
-<p>Diffusion 모델의 등장으로 생성 기술이 크게 발전되었습니다. 현재 생성 기술 수준이 data augmentation으로 사용될 수 있을 만큼의 자연스러운 이미지를 생성하는 것도 가능할까?에 대한 질문이 나오는 것은 당연하고, 본 논문에서는 이에 대한 답을 찾고자 했습니다. 먼저 이 질문에 대한 답을 이야기 하면 아래와 같습니다.</p>
-<ul>
-<li><p>결과 요약</p>
-<ul>
-<li><p>ImageNet에 대해 fine-tuning된 Imagen이 FID, Inception Score, CAS 성능에 대해 SOTA 성능을 달성 하였다.</p></li>
-<li><p>합성 데이터와 실제 데이터를 결합하여 사용하고, 합성 데이터의 양이 많고, 훈련 시간이 길수록 생성 데이터로 훈련된 모델의 성능이 더욱 향상되었다.</p>
-<figure class="align-default" id="id1">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/14.png"><img alt="improved_imagenet_classification_00" class="bg-primary mb-1" src="../../_images/14.png" style="width: 500px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 313 </span><span class="caption-text">위 그림: 합성 데이터로만 학습된 모델 분류 성능과 진짜 데이터로 학습된 모델의 분류 성능 비교 \
-아래 그림: 합성 및 진짜 데이터를 사용하였을 때의 분류 성능과 진짜 데이터로 학습된 모델의 분류 성능 비교</span><a class="headerlink" href="#id1" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</li>
-</ul>
-</li>
-</ul>
-<p>위의 그림에서 볼 수 있듯이 합성 데이터로만 학습한 모델의 정확도와 실제 데이터로 학습한 모델의 정확도를 비교했을 때, 다른 모델들에 비해 본 논문에서 제안한 모델이 훨씬 성능 차이가 적다는 것을 알 수 있습니다. 또한, 아래 그림을 보면, 실제 데이터와 생성된 데이터를 더해서 학습했을 경우에는 ResNet 기반 모델과 Transformer 기반 모델들에서 모두 실제 데이터를 사용했을 때보다 성능 향상이 있었습니다.</p>
-</section>
-<section class="tex2jax_ignore mathjax_ignore" id="related-work">
-<h1>2. Related Work<a class="headerlink" href="#related-work" title="Permalink to this heading">#</a></h1>
-<p>생성 모델을 이용해 data augmentation을 하려고 했던 기존 방법들에 대해 짧게 이야기 햐려고 합니다. 최근에는 large-scale text-to-image 모델들이 학습 데이터를 보강하는데 사용되기 시작했습니다.</p>
-<p>그 예로 “<a class="reference external" href="https://arxiv.org/abs/2210.07574">Is synthetic data from generative models ready for image recognition?</a>” 논문이 있습니다. 해당 논문에서는 GLIDE로 생성된 합성 데이터가 zero-shot과 few-shot 이미지 분류 성능을 향상 시켰으며, CIFAR-100 이미지에서 GLIDE를 fine-tuning하여 생성된 합성 데이터 세트가 CIFAR-100의 분류 정확도를 크게 향상 시켰다고 이야기 합니다.</p>
-<p>하지만, 위의 논문을 포함해서 기존의 논문들은 이런 생성 모델을 이용해서 data augmentation을 하여도 ImageNet validation set에 대해서는 성능을 향상 시키지 못했습니다. 또한, 기존에 논문들은 pretrained Stable Diffusion 모델을 사용하고, fine-tuning은 하지 않았습니다. 본 논문에서는 기존 논문들과는 다르게 Imagen을 ImageNet에 잘 동작하고 fine-tuning을 하였고, 그 결과 ImageNet validation set에 대해서도 성능을 향상 시킬 수 있었습니다.</p>
-</section>
-<section class="tex2jax_ignore mathjax_ignore" id="background">
-<h1>3. Background<a class="headerlink" href="#background" title="Permalink to this heading">#</a></h1>
-<p>본 논문에서는 Classification Accuracy Scores(CAS)라는 성능 지표를 소개합니다. FID와 Inception Score는 생성 모델의 성능 지표로 워낙 많이 쓰여서 설명은 생략하고, CAS에 대해서는 논문에서 써져 있는 내용으로 소개하겠습니다.</p>
-<p>CAS는 FID와 Inception Score와 마찬가지로 생성 모델이 만들어낸 샘플의 품질을 평가하는 방법으로 제안 된 성능 지표입니다. 이것은 ‘합성 데이터’로만 훈련된 ResNet-50 모델에 대한 ImageNet validation set에 대한 분류 성능을 의미합니다. 먼저, 생성 모델을 통해 ImageNet 데이터에 대한 합성 데이터를 만들어냅니다. 그리고 이 합성 데이터만을 이용하여 ResNet-50을 훈련 시키고, 그 훈련된 모델의 실제 ImageNet validation set에 대해 분류 성능이 CAS가 됩니다. 만약 합성 데이터가 실제 ImageNet과 비슷하다면 그 합성 데이터로 학습된 모델은 실제 ImageNet validation set에 대해 좋은 분류 성능을 보일 것이라는 가정을 이용한 성능 지표라고 이해하면 될 것 같습니다.</p>
-<p>저자에 의하면 그동안 생성모델의 CAS 성능은 좋지 않았다고 합니다. 생성된 샘플로만 훈련된 모델은 실제 데이터로 훈련된 모델보다 성능이 떨어졌고 (이는 당연해보입니다), 실제 데이터에 합성 데이터를 추가하면 성능이 떨어졌다고 합니다. 이는 아마도 생성된 샘플의 품질, 다양성 등이 원인일 수 있을 것이라고 합니다.</p>
-</section>
-<section class="tex2jax_ignore mathjax_ignore" id="generative-model-training-and-sampling">
-<h1>4. Generative Model Training and Sampling<a class="headerlink" href="#generative-model-training-and-sampling" title="Permalink to this heading">#</a></h1>
-<p>여기서는 실제로 저자들이 어떻게 text-to-image diffusion 모델을 학습하고, 샘플링을 하였는지에 대한 설명을 합니다.</p>
-<p>먼저 저자들은 text-to-image diffusion 모델로는 Imagen을 사용하였습니다. Text-to-image 모델을 어떻게 ImageNet 클래스와 alignment 할 지에 대한 고민이 필요했다고 합니다. 처음에는 CLIP에서 사용한 방법과 유사하게 짧은 텍스트를 ImageNet 클래스의 텍스트 프롬프트로 사용했다고 하였는데 이 경우에 성능이 좋지 않았다고 합니다. 이는 Imagen에서 high guidance weight를 사용하여 샘플의 다양성이 저하 되면서 생기는 현상일 수 있다고 합니다. 따라서, 저자들은 프롬프트를 한 두단어 클래스 이름으로 수정하고, 모델의 weight와 sampling parameter를 fine-tuning 했다고 합니다.</p>
-<figure class="align-default" id="id2">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/21.png"><img alt="improved_imagenet_classification_01" class="bg-primary mb-1" src="../../_images/21.png" style="width: 500px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 314 </span><span class="caption-text">Figure 2</span><a class="headerlink" href="#id2" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>왼쪽 그림이 fine-tuning이 적용된 Imagen이 만들어낸 이미지고, 오른쪽이 fine-tuning이 적용되지 않은 Imagen입니다. 아래에서 두 번째 클래스인 Schipperke를 보면, 이것은 스키퍼키라는 개 품종을 의미하는데 fine-tuning이 적용되지 않은 Imagen의 경우는 꽃과 같은 전혀 엉뚱한 이미지를 만들고 있는 것을 볼 수 있습니다.</p>
-<section id="imagen-fine-tuning">
-<h2>4.1. Imagen Fine-tuning<a class="headerlink" href="#imagen-fine-tuning" title="Permalink to this heading">#</a></h2>
-<p>이 부분은 Imagen을 어떻게 fine-tuning 했는지를 설명하는 부분입니다.</p>
-<p>먼저 Imagen 구조는 아래와 같습니다.</p>
-<figure class="align-default" id="id3">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/31.png"><img alt="improved_imagenet_classification_02" class="bg-primary mb-1" src="../../_images/31.png" style="width: 200px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 315 </span><span class="caption-text">Imagen 구조</span><a class="headerlink" href="#id3" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>본 논문에서는 위의 Imagen 구조에서 빨간 원으로 표시된 부분에 대해서만 fine-tuning 했습니다. Frozen Text Encoder의 경우는 원래 Imagen에서도 학습을 하지 않는 부분이라 마찬가지로 학습을 하지 않았고, 1024x1024 Image를 출력으로 하는 마지막 Super-Resolution Diffusion Model의 경우 ImageNet에 고해상도의 데이터가 적어서 fine-tuning을 하지 않았다고 합니다.</p>
-<p>64x64 모델의 경우는 210K step 정도 학습하였고, optimizer의 경우는 Imagen에서 사용하였던 Adafactor optimizer를 사용하였다고 합니다. 64x64 → 256x256 super-resolution 모델의 경우는 490K step 정도 하였고, Adam optimizer를 사용하였다고 합니다.</p>
-<p>최적의 모델 선택의 기준으로는 기본 Imagen sampler와 ImageNet-1K validation set에 대해 10K개의 샘플들에 대해 FID score를 계산했을 때 가장 좋은 성능의 모델을 선택했다고 합니다.</p>
-</section>
-<section id="sampling-parameters">
-<h2>4.2. Sampling Parameters<a class="headerlink" href="#sampling-parameters" title="Permalink to this heading">#</a></h2>
-<p>이 부분은 본 논문에서 sampling parameter는 어떻게 정했는지를 설명하는 부분입니다. 먼저, Text-conditioned diffusion model 샘플링의 품질, 다양성, 속도는 디퓨전 스텝 수, noise condition augmentation, guidance weight for classifier-free guidance, log-variance mixing coefficient 등에 대해 큰 영향을 받는다고 합니다.</p>
-<p>각각에 대해 간단하게 설명하면 아래와 같습니다.</p>
-<ul>
-<li><p>Noise condition augmentation:</p>
-<p>이미지 생성 과정에서 확률적인 요소를 도입하여 생성된 이미지의 다양성을 증가시키는 기술. 일반적으로, 모델은 잠재 공간의 랜덤한 노이즈를 입력으로 받아 다양한 이미지를 생성하게 됨. 이것은 생성된 이미지가 조금씩 다른 것으로 보이게 만들며, 더 다양한 결과를 얻을 수 있게 함  (자세한 내용은 “<a class="reference external" href="https://arxiv.org/abs/2205.11487">Photorealistic Text-to-Image Diffusion Models with Deep Language Understanding</a>”를 참고해주세요)
-<br></p>
-</li>
-<li><p>Guidance weight for classifier-free guidance:</p>
-<p>“Classifier-free guidance”는 이미지를 생성하는 데 분류기나 특정 지표 없이 외부 정보를 사용한다는 것.  “Guidance weights”는 외부 정보를 모델에 어떻게 반영할지를 조절하는 가중치를 의미할 수 있으며, 이러한 가중치를 조절하여 모델이 원하는 특성이나 스타일을 가진 이미지를 더 잘 생성하도록 함 (자세한 내용은 “<a class="reference external" href="https://arxiv.org/abs/2207.12598">Classifier-free diffusion guidance</a>”를 참고해주세요)
-<br></p>
-</li>
-<li><p>Log-variance mixing coefficient:</p>
-<p>이미지 생성 모델에서 사용되는 확률 분포의 변동성을 조절하는 데 사용되는 계수를 나타냄. 이미지 생성 모델은 일반적으로 확률 분포를 사용하여 이미지를 생성하며, 이 확률 분포의 평균과 분산을 조절함으로써 생성된 이미지의 다양성과 품질을 조절할 수 있음. 로그-분산 혼합 계수는 이러한 분산을 조절하는 데 사용되며, 높은 값은 더 큰 분산을 의미하고, 작은 값은 더 작은 분산을 의미함. 이를 통해 이미지 생성의 다양성을 조절할 수 있음 (자세한 내용은 “<a class="reference external" href="https://arxiv.org/abs/2102.09672">Improved denoising diffusion probabilistic models</a>”를 참고해주세요)<br />
-<br></p>
-</li>
-</ul>
-<p>64x64 기반 모델의 샘플링 parameter 설정법에 대해 설명하겠습니다. 해당 모델의 샘플링 이미지 샘플링의 전반적인 특징과 다양성의 영향을 주게 됩니다. 1차 sweep으로 DDPM 샘플러를 이용하여 FID-50K에 대해 가장 최적의 하이퍼파라미터를 찾습니다. Sweep의 사용한 각 하이퍼파라미터의 범위는 아래와 같습니다.</p>
-<ul class="simple">
-<li><p>Guidance weight: [1.0, 1.25, 1.5, 1.75, 2.0, 5.0]</p></li>
-<li><p>Log-variance: [0.0, 0.2, 0.3, 0.4, 1.0]</p></li>
-<li><p>Denoise step: [128, 500, 1000]</p></li>
-</ul>
-<p>1차 sweep 결과 최적의 FID는 log-variance는 0이고 denoising step은 1000이었을 때라고 합니다.</p>
-<p>1차 sweep이 끝난 후에는 guidance weight에 대해서만 sweep을 합니다. 이 때에는 1.2M 이미지를 사용하고, 각 guidacne weight에 대해 FID, IS, CAS를 측정했다고 합니다.</p>
-<p>각 샘플링 하이퍼파라미터에 대한 실험 결과는 아래와 같습니다.</p>
-<figure class="align-default" id="id4">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/41.png"><img alt="improved_imagenet_classification_03" class="bg-primary mb-1" src="../../_images/41.png" style="width: 800px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 316 </span><span class="caption-text">Figure 3</span><a class="headerlink" href="#id4" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>왼쪽 그림이 1차 sweep에 대한 결과고, 가운데와 오른쪽 그림이 2차 sweep에 대한 결과로 guidance weight에 따른 FID, IS, CAS를 나타낸 결과입니다.</p>
-<p>이제 다음으로는 64x64 → 256x256 super-resolution 모델에 대해 하이퍼파라미터를 선택하는 부분에 대해 설명하겠습니다. 하이퍼파라미터의 range는 아래와 같습니다.
-- Guidance weight: [1.0, 2.0, 5.0, 10.0, 30.0]
-- Noise conditioning augmentation: [0.0, 0.1, 0.2, 0.3, 0.4]
-- Log-variance mixing coefficients: [0,1, 0.3]
-- Denose steps: [129, 500, 1000]</p>
-<figure class="align-default" id="id5">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/51.png"><img alt="improved_imagenet_classification_04" class="bg-primary mb-1" src="../../_images/51.png" style="width: 500px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 317 </span><span class="caption-text">Figure 4</span><a class="headerlink" href="#id5" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>위 그래프는 guidance weight를 1.0으로 설정하고 noise condition 파라미터를 변경했을 때 FID와 CAS의 그래프를 나타낸 그래프입니다. CAS 같은 경우는 logvar coeff가 0.3일 때 전반적으로 좋은 성능을 보였으며, FID 같은 경우도 logvar coeff가 0.3일 때 전반적으로 좋은 성능을 보인 것을 알 수 있습니다.</p>
-<br>
-<p>샘플링 하이퍼파라미터의 결과를 분석해보자면, 전반적으로 FID와 CAS는 높은 상관관계가 있으며 (Figure 4 참고), guidance weight가 작을수록 CAS는 높아지지만, Inception Score에는 부정적인 영향을 주며 (Figure 3 참고), noise augmentation이 0일 때 FID가 가장 작은 것을 볼 수 있습니다. (Figure 4 참고)</p>
-<br>
-<p>이런 하이퍼파라미터 설정 방법을 기준으로 본 논문에서 최종적으로 설정한 값은 아래와 같다고 합니다.</p>
-<ul class="simple">
-<li><p>Guidance weight</p>
-<ul>
-<li><p>베이스 모델: 1.25</p></li>
-<li><p>나머지 resolution: 1.0</p></li>
-</ul>
-</li>
-<li><p>Log-variance mixing coefficients (sampler, steps)</p>
-<ul>
-<li><p>64x64 샘플: 0.0 (DDPM, 1000 denoising steps)</p></li>
-<li><p>256x256 샘플: 0.1  (DDPM, 1000 denoising steps)</p></li>
-<li><p>1024x1024 샘플: 0.0 (DDIM, 32 denoising steps)</p></li>
-</ul>
-</li>
-</ul>
-</section>
-<section id="generation-protocol">
-<h2>4.3. Generation Protocol<a class="headerlink" href="#generation-protocol" title="Permalink to this heading">#</a></h2>
-<p>이 부분은 실제로 데이터 합성은 어떤 프로토콜을 따랐는지에 대해 설명하는 부분입니다. 본 논문에서는 원본 데이터셋의 class balance를 유지하며 데이터를 합성했으며, 합성된 결과 총 훈련 데이터셋의 규모는 1배인 1.2M 에서 10배인 12M 규모의 데이터셋의 범위를 가지도록 데이터를 합성했다고 합니다.</p>
-</section>
-</section>
-<section class="tex2jax_ignore mathjax_ignore" id="result">
-<h1>5. Result<a class="headerlink" href="#result" title="Permalink to this heading">#</a></h1>
-<section id="sample-quality-fid-and-is">
-<h2>5-1. Sample Quality: FID and IS<a class="headerlink" href="#sample-quality-fid-and-is" title="Permalink to this heading">#</a></h2>
-<p>먼저, 합성된 데이터의 품질을 합성 태스크에서 많이 사용되는 지표인 FID와 IS의 관점으로 봅니다.</p>
-<figure class="align-default" id="id6">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/61.png"><img alt="improved_imagenet_classification_05" class="bg-primary mb-1" src="../../_images/61.png" style="width: 500px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 318 </span><span class="caption-text">Table 1</span><a class="headerlink" href="#id6" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>위 표에서 볼 수 있듯이, 본 논문의 파인 튜닝된 Imagen이 ImageNet에 대한 데이터 생성에 대해 다른 베이스모델들 보다 FID와 IS가 뛰어난 것을 알 수 있습니다. 이는 64x64 resolution과 256x256 resolution에서 모두 해당되었습니다.</p>
-</section>
-<section id="classification-accuracy-score">
-<h2>5.2. Classification Accuracy Score<a class="headerlink" href="#classification-accuracy-score" title="Permalink to this heading">#</a></h2>
-<p>이 부분은 CAS 성능 지표를 통해 본 논문에서 제안한 모델의 데이터 합성 능력을 확인하는 부분입니다.</p>
-<figure class="align-default" id="id7">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/71.png"><img alt="improved_imagenet_classification_06" class="bg-primary mb-1" src="../../_images/71.png" style="width: 800px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 319 </span><span class="caption-text">CAS score</span><a class="headerlink" href="#id7" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>Figure 5에서 파란색 부분은 실제 학습 데이터로 학습된 모델의 분류 성능이고, 빨간색 부분은 합성된 데이터로 학습된 모델의 분류 성능입니다. 왼쪽 그림은 베이스라인 중 하나인 CDM 모델의 성능을 나타낸 그림이며, 가운데는 본 논문에서 256x256 resolution 모델의 성능, 오른쪽은 본 논문에서 제안한 1024x1024 resolution 모델의 성능을 나타낸 것입니다. 빨간색 부분이 파란색 부분보다 전반적으로 위쪽에 위치하면 모델의 성능이 좋다고 해석할 수 있습니다. 이 그림을 통해 본 논문에서 제안한 모델들이 베이스라인보다 좋은 성능을 보인다는 것을 알 수 있습니다.</p>
-<p>Table 2에서도 마찬가지로 본 논문 모델이 다른 베이스 모델보다 성능이 뛰어난 것을 알 수 있습니다. 여기서 주목할 만한 점은 CAS를 평가하기 위한 ResNet50이 256x256으로 입력 데이터를 다운샘플링 함에도 1024x1024 샘플에 대한 결과가 훨씬 좋다는 것을 볼 수 있습니다. (Ours 256x256 resolution보다 Ours 1024x1024 resolution의 CAS 성능이 월등히 높음)</p>
-</section>
-<section id="classification-accuracy-with-different-models">
-<h2>5.3. Classification Accuracy with Different Models<a class="headerlink" href="#classification-accuracy-with-different-models" title="Permalink to this heading">#</a></h2>
-<p>이 부분은 합성된 데이터를 여러 종류의 모델로 학습 시켰을 때, 각 모델의 분류 성능을 확인하는 부분입니다. CAS와 비슷하지만 CAS에서는 ResNet50 모델로 분류 성능을 확인했지만 여기서는 ResNet50 이외에 모델로도 분류 성능을 본다는 차이점이 있습니다.</p>
-<figure class="align-default" id="id8">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/81.png"><img alt="improved_imagenet_classification_06" class="bg-primary mb-1" src="../../_images/81.png" style="width: 800px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 320 </span><span class="caption-text">Table 3</span><a class="headerlink" href="#id8" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>위 표에서 확인할 수 있듯이, 다양한 모델에 대해서 분류 정확도를 살펴본 결과 생성된 데이터로만 학습될 경우에는 실제 데이터로 학습할 때 보다 성능이 낮았지만, 실제 데이터와 생성된 데이터를 합쳐서 학습할 경우 실제 데이터만 사용했을 때보다 성능이 증가한 것을 볼 수 있습니다. 이것은 onvNet기반 모델과 transformer 기반 모델에 대해서 동일한 양상을 보였습니다.</p>
-</section>
-<section id="merging-real-and-synthetic-data-at-scale">
-<h2>5.4. Merging Real and Synthetic Data at Scale<a class="headerlink" href="#merging-real-and-synthetic-data-at-scale" title="Permalink to this heading">#</a></h2>
-<p>이 부분은 합성 데이터 규모에 따른 ResNet-50의 성능을 분석한 부분입니다.</p>
-<figure class="align-default" id="id9">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/91.png"><img alt="improved_imagenet_classification_06" class="bg-primary mb-1" src="../../_images/91.png" style="width: 500px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 321 </span><span class="caption-text">Figure 6</span><a class="headerlink" href="#id9" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>64x64 이미지의 경우 생성되는 데이터의 양이 증가함에 따라 성능이 지속적으로 향상되는 것을 볼 수 있습니다.</p>
-<figure class="align-default" id="id10">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/102.png"><img alt="improved_imagenet_classification_06" class="bg-primary mb-1" src="../../_images/102.png" style="width: 500px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 322 </span><span class="caption-text">Table 4</span><a class="headerlink" href="#id10" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>하지만 다른 resolution에 대해서는 다른 양상을 보였습니다. 학습 데이터가 4.8M 규모가 될 때까지는 합성 데이터를 추가하는 것이 분류 성능에 좋았으나, 합성 데이터를 더 늘려 그 이상의 규모가 되었을 때는 오히려 성능이 떨어지는 것을 볼 수 있었습니다.</p>
-</section>
-</section>
-<section class="tex2jax_ignore mathjax_ignore" id="conclusion">
-<h1>6. Conclusion<a class="headerlink" href="#conclusion" title="Permalink to this heading">#</a></h1>
-<p>본 논문에 결론 부분을 보자면, 이 논문에서는 Large-sclae text-to-image diffusion 모델을 파인튜닝하여 FID, Inception Score, CAS 성능 지표에 대해서 SOTA를 달성했습니다.</p>
-<ul class="simple">
-<li><p>FID: 1.76 at 256x256</p></li>
-<li><p>Inception Score: 239 at 256x256</p></li>
-<li><p>CAS: 64.96 for 256x256, 69.24 for 1024x1024</p></li>
-</ul>
-<p>또한 그렇게 생성 데이터를 이용하여 ResNet과 Transformer 기반 모델들에 대한 ImageNet classification accuracy를 향상 시켰습니다.</p>
-<p>실험 결과에 대해서 생각해볼만한 거리들이 있었는데 그 중 하나는 CAS 성능 측정할 때 ResNet50이 입력을 256x256으로 다운샘플링 함에도 불구하고 256x256보다 1024x1024의 모델의 CAS가 좋은 것이 있었습니다. 이는 다운샘플링을 하더라도 다운샘플링 전 원본 데이터 resolution이 클 때 더 많은 정보를 담는다는 것을 의미하는 것일 수 있습니다. 또한,  64x64 데이터에서 합성 데이터의 양이 증가함에 따라 분류 정확도가 지속적으로 증가했지만 고해상도 데이터에서는 그렇지 않았던 것을 통해 고해상도에 이미지에 대해서는 보다 정교한 훈련 방법이 필요할 수 있음을 시사하고 있습니다.</p>
-<hr class="docutils" />
-<p>이렇게 Synthetic Data from Diffusion Models Improves ImageNet Classification 논문의 리뷰를 마치겠습니다. 개인적으로 느낀 점은 실제 산업에서는 data shortage나 class imbalance 문제가 대부분 발생하는데 본 논문이 그 해결법 중 하나가 될 수 있을 것 같다는 생각이 들었습니다. 다만 Frozen Text Encoder는 추가적으로 파인튜닝이 되지 않기 때문에 특정 산업에서만 쓰이는 특정 텍스트가 들어왔을 때는 잘 동작할 수 있을까 하는 의문이 들었습니다. 또한 합성하고자 하는 데이터셋에 맞게 파인튜닝을 해야하는 점이 꽤나 불편할 것 같아서 파인튜닝이 모델 성능에 얼마나 큰 의미를 갖는지, 파인튜닝을 하지 않았을 때의 CAS 성능도 논문에 있었으면 좋았을 것 같다는 개인적인 생각이 들었습니다. (물론 Figure 2를 보고 어느 정도 결과를 유추해볼 순 있지만요!)</p>
-</section>
-
-    <script type="text/x-thebe-config">
-    {
-        requestKernel: true,
-        binderOptions: {
-            repo: "binder-examples/jupyter-stacks-datascience",
-            ref: "master",
-        },
-        codeMirrorConfig: {
-            theme: "abcdef",
-            mode: "python"
-        },
-        kernelOptions: {
-            name: "python3",
-            path: "./docs/review"
-        },
-        predefinedOutput: true
-    }
-    </script>
-    <script>kernelName = 'python3'</script>
-
-                </article>
-              
-
-              
-              
-              
-              
-                <footer class="prev-next-footer d-print-none">
-                  
-<div class="prev-next-area">
-    <a class="left-prev"
-       href="CM3leon.html"
-       title="previous page">
-      <i class="fa-solid fa-angle-left"></i>
-      <div class="prev-next-info">
-        <p class="prev-next-subtitle">previous</p>
-        <p class="prev-next-title">CM3leon</p>
-      </div>
-    </a>
-    <a class="right-next"
-       href="GLIDE.html"
-       title="next page">
-      <div class="prev-next-info">
-        <p class="prev-next-subtitle">next</p>
-        <p class="prev-next-title">GLIDE</p>
-      </div>
-      <i class="fa-solid fa-angle-right"></i>
-    </a>
-</div>
-                </footer>
-              
-            </div>
-            
-            
-              
-                <div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
-
-
-  <div class="sidebar-secondary-item">
-  <div class="page-toc tocsection onthispage">
-    <i class="fa-solid fa-list"></i> Contents
-  </div>
-  <nav class="bd-toc-nav page-toc">
-    <ul class="visible nav section-nav flex-column">
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#">Synthetic Data from Diffusion Models Improves ImageNet Classification</a></li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">1. Introduction</a></li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#related-work">2. Related Work</a></li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#background">3. Background</a></li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#generative-model-training-and-sampling">4. Generative Model Training and Sampling</a><ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#imagen-fine-tuning">4.1. Imagen Fine-tuning</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#sampling-parameters">4.2. Sampling Parameters</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#generation-protocol">4.3. Generation Protocol</a></li>
-</ul>
-</li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#result">5. Result</a><ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#sample-quality-fid-and-is">5-1. Sample Quality: FID and IS</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#classification-accuracy-score">5.2. Classification Accuracy Score</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#classification-accuracy-with-different-models">5.3. Classification Accuracy with Different Models</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#merging-real-and-synthetic-data-at-scale">5.4. Merging Real and Synthetic Data at Scale</a></li>
-</ul>
-</li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#conclusion">6. Conclusion</a></li>
-</ul>
-
-  </nav></div>
-
-</div></div>
-              
-            
-          </div>
-          <footer class="bd-footer-content">
-            
-<div class="bd-footer-content__inner container">
-  
-  <div class="footer-item">
-    
-<p class="component-author">
-By PseudoLab
-</p>
-
-  </div>
-  
-  <div class="footer-item">
-    
-
-  <p class="copyright">
-    
-      © Copyright 2022.
-      <br/>
-    
-  </p>
-
-  </div>
-  
-  <div class="footer-item">
-    
-  </div>
-  
-  <div class="footer-item">
-    
-  </div>
-  
-</div>
-          </footer>
-        
-
-      </main>
-    </div>
-  </div>
-  
-  <!-- Scripts loaded after <body> so the DOM is not blocked -->
-  <script src="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b"></script>
-<script src="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b"></script>
-
-  <footer class="bd-footer">
-  </footer>
-  </body>
+
+
+<!DOCTYPE html>
+
+
+<html lang="en" data-content_root="" >
+
+  <head>
+    <meta charset="utf-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.17.1: http://docutils.sourceforge.net/" />
+
+    <title>Synthetic Data from Diffusion Models Improves ImageNet Classification &#8212; Text-to-Image Generation-feat-Diffusion</title>
+  
+  
+  
+  <script data-cfasync="false">
+    document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
+    document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
+  </script>
+  
+  <!-- Loaded before other Sphinx assets -->
+  <link href="../../_static/styles/theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+<link href="../../_static/styles/bootstrap.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+<link href="../../_static/styles/pydata-sphinx-theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+
+  
+  <link href="../../_static/vendor/fontawesome/6.5.2/css/all.min.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+  <link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.woff2" />
+<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.woff2" />
+<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.woff2" />
+
+    <link rel="stylesheet" type="text/css" href="../../_static/pygments.css" />
+    <link rel="stylesheet" href="../../_static/styles/sphinx-book-theme.css?digest=14f4ca6b54d191a8c7657f6c759bf11a5fb86285" type="text/css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/togglebutton.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/mystnb.4510f1fc1dee50b3e5859aac5469c37c29e427902b24a333a5f9fcb2f0b3ac41.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/sphinx-thebe.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/design-style.4045f2051d55cab465a707391d5b2007.min.css" />
+  
+  <!-- Pre-loaded scripts that we'll load fully later -->
+  <link rel="preload" as="script" href="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b" />
+<link rel="preload" as="script" href="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b" />
+  <script src="../../_static/vendor/fontawesome/6.5.2/js/all.min.js?digest=dfe6caa3a7d634c4db9b"></script>
+
+    <script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
+    <script src="../../_static/jquery.js"></script>
+    <script src="../../_static/underscore.js"></script>
+    <script src="../../_static/_sphinx_javascript_frameworks_compat.js"></script>
+    <script src="../../_static/doctools.js"></script>
+    <script src="../../_static/clipboard.min.js"></script>
+    <script src="../../_static/copybutton.js"></script>
+    <script src="../../_static/scripts/sphinx-book-theme.js?digest=5a5c038af52cf7bc1a1ec88eea08e6366ee68824"></script>
+    <script>let toggleHintShow = 'Click to show';</script>
+    <script>let toggleHintHide = 'Click to hide';</script>
+    <script>let toggleOpenOnPrint = 'true';</script>
+    <script src="../../_static/togglebutton.js"></script>
+    <script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
+    <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
+    <script src="../../_static/design-tabs.js"></script>
+    <script>const THEBE_JS_URL = "https://unpkg.com/thebe@0.8.2/lib/index.js"
+const thebe_selector = ".thebe,.cell"
+const thebe_selector_input = "pre"
+const thebe_selector_output = ".output, .cell_output"
+</script>
+    <script async="async" src="../../_static/sphinx-thebe.js"></script>
+    <script>DOCUMENTATION_OPTIONS.pagename = 'docs/review/Synthetic_Data_from_Diffusion_Models_Improves_ImageNet_Classification';</script>
+    <link rel="shortcut icon" href="../../_static/PseudoLab_logo.png"/>
+    <link rel="index" title="Index" href="../../genindex.html" />
+    <link rel="search" title="Search" href="../../search.html" />
+    <link rel="next" title="GLIDE" href="GLIDE.html" />
+    <link rel="prev" title="CM3leon" href="CM3leon.html" />
+  <meta name="viewport" content="width=device-width, initial-scale=1"/>
+  <meta name="docsearch:language" content="en"/>
+  </head>
+  
+  
+  <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
+
+  
+  
+  <div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
+  
+  <div id="pst-scroll-pixel-helper"></div>
+  
+  <button type="button" class="btn rounded-pill" id="pst-back-to-top">
+    <i class="fa-solid fa-arrow-up"></i>Back to top</button>
+
+  
+  <input type="checkbox"
+          class="sidebar-toggle"
+          id="pst-primary-sidebar-checkbox"/>
+  <label class="overlay overlay-primary" for="pst-primary-sidebar-checkbox"></label>
+  
+  <input type="checkbox"
+          class="sidebar-toggle"
+          id="pst-secondary-sidebar-checkbox"/>
+  <label class="overlay overlay-secondary" for="pst-secondary-sidebar-checkbox"></label>
+  
+  <div class="search-button__wrapper">
+    <div class="search-button__overlay"></div>
+    <div class="search-button__search-container">
+<form class="bd-search d-flex align-items-center"
+      action="../../search.html"
+      method="get">
+  <i class="fa-solid fa-magnifying-glass"></i>
+  <input type="search"
+         class="form-control"
+         name="q"
+         id="search-input"
+         placeholder="Search this book..."
+         aria-label="Search this book..."
+         autocomplete="off"
+         autocorrect="off"
+         autocapitalize="off"
+         spellcheck="false"/>
+  <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
+</form></div>
+  </div>
+
+  <div class="pst-async-banner-revealer d-none">
+  <aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
+</div>
+
+  
+    <header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
+    </header>
+  
+
+  <div class="bd-container">
+    <div class="bd-container__inner bd-page-width">
+      
+      
+      
+      <div class="bd-sidebar-primary bd-sidebar">
+        
+
+  
+  <div class="sidebar-header-items sidebar-primary__section">
+    
+    
+    
+    
+  </div>
+  
+    <div class="sidebar-primary-items__start sidebar-primary__section">
+        <div class="sidebar-primary-item">
+
+  
+    
+  
+
+<a class="navbar-brand logo" href="../../intro.html">
+  
+  
+  
+  
+  
+    
+    
+      
+    
+    
+    <img src="../../_static/PseudoLab_logo.png" class="logo__image only-light" alt="Text-to-Image Generation-feat-Diffusion - Home"/>
+    <script>document.write(`<img src="../../_static/PseudoLab_logo.png" class="logo__image only-dark" alt="Text-to-Image Generation-feat-Diffusion - Home"/>`);</script>
+  
+  
+</a></div>
+        <div class="sidebar-primary-item"><nav class="bd-links" id="bd-docs-nav" aria-label="Main">
+    <div class="bd-toc-item navbar-nav active">
+        
+        <ul class="nav bd-sidenav bd-sidenav__home-link">
+            <li class="toctree-l1">
+                <a class="reference internal" href="../../intro.html">
+                    Welcome to PseudoDiffusers!!
+                </a>
+            </li>
+        </ul>
+        <p aria-level="2" class="caption" role="heading"><span class="caption-text">Preliminary Works</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="vae.html">VAE</a></li>
+<li class="toctree-l1"><a class="reference internal" href="gan.html">GAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DDPM.html">DDPM</a></li>
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DDIM.html">DDIM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="A_Study_on_the_Evaluation_of_Generative_Models.html">A Study on the Evaluation of Generative Models</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Image Generation</span></p>
+<ul class="current nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="cycleGAN.html">CycleGAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="StyleGAN.html">StyleGAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="diffusion_beats_GANs.html">Diffusion Models Beat GANs on Image Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="dalle.html">DALL-E</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DALLE2.html">DALL-E 2</a></li>
+<li class="toctree-l1"><a class="reference internal" href="dreambooth.html">DreamBooth</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ControlNet.html">ControlNet</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Latent_Diffusion_Model.html">Introduction</a></li>
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Textual_Inversion.html">Textual Inversion</a></li>
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="CustomDiffusion.html">Custom Diffusion</a></li>
+<li class="toctree-l1"><a class="reference internal" href="LoRA.html">LoRA</a></li>
+
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="I-DDPM.html">I-DDPM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="StyO.html">StyO</a></li>
+<li class="toctree-l1"><a class="reference internal" href="imagen.html">Imagen</a></li>
+<li class="toctree-l1"><a class="reference internal" href="imagen_editor.html">Imagen Editor</a></li>
+<li class="toctree-l1"><a class="reference internal" href="SDEdit.html">SDEdit</a></li>
+<li class="toctree-l1"><a class="reference internal" href="SDXL.html">SDXL</a></li>
+<li class="toctree-l1"><a class="reference internal" href="t2i_adapter.html">T2I-Adapter</a></li>
+<li class="toctree-l1"><a class="reference internal" href="IP_Adapter.html">IP-Adapter</a></li>
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="HyperDreamBooth.html">HyperDreamBooth</a></li>
+<li class="toctree-l1"><a class="reference internal" href="CM3leon.html">CM3leon</a></li>
+
+<li class="toctree-l1 current active"><a class="current reference internal" href="#">Synthetic Data from Diffusion Models Improves ImageNet Classification</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="GLIDE.html">GLIDE</a></li>
+<li class="toctree-l1"><a class="reference internal" href="BBDM.html">BBDM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Your_Diffusion_Model_is_Secretly_a_Zero_Shot_Classifier.html">Your Diffusion Model is Secretly a Zero-Shot Classifier</a></li>
+<li class="toctree-l1"><a class="reference internal" href="progressive_distillation.html">Progressive Distillation for Fast Sampling of Diffusion Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ConceptLab.html">ConceptLab</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Diffusion_models_already_have_a_Semantic_Latent_Space.html">Diffusion Models already have a Semantic Latent Space</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Muse.html">Muse</a></li>
+
+
+<li class="toctree-l1"><a class="reference internal" href="GIGAGAN.html">Scaling up GANs for Text-to-Image Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="consistency_models.html">Consistency Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="latent_consistency_models.html">Latent Consistency Models</a></li>
+
+<li class="toctree-l1"><a class="reference internal" href="LLM_grounded_Diffusion.html">LLM Grounded Diffusion</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DiT.html">DiT</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="one-step-image-translation.html">One-Step Image Translation with Text-to-Image Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="LCM-LoRA.html">LCM-LoRA: A Universal Stable-Diffusion Acceleration Module</a></li>
+
+
+<li class="toctree-l1"><a class="reference internal" href="MimicBrush.html">MimicBrush: Zero-shot Image Editing with Reference Imitation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="one_step_diffusion_with_distribution_matching_distillation.html">One-step Diffusion with Distribution Matching Distillation</a></li>
+
+
+
+
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Video Generation</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="Make_A_Video.html">Make A Video</a></li>
+<li class="toctree-l1"><a class="reference internal" href="VideoLDM.html">VideoLDM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="AnimateDiff.html">AnimateDiff</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Animate_Anyone.html">Animate Anyone</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreaMoving.html">DreaMoving</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreamPose.html">DreamPose: Fashion Image-to-Video Synthesis via Stable Diffusion</a></li>
+
+
+
+
+
+
+
+
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">3D Generation</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="NeRF.html">NeRF : Representing Scenes as Neural Radiance Fields for View Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="3DGS.html">3D Gaussian Splatting for Real-Time Radiance Field Rendering</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Point_E.html">Point-E: A System for Generating 3D Point Clouds from Complex Prompts (Arxiv 2022)</a></li>
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Shap-E.html">Shap-E</a></li>
+
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DreamFusion.html"><strong>DreamFusion</strong></a></li>
+<li class="toctree-l1"><a class="reference internal" href="magic-3d.html">Magic3D</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreamBooth3D.html">Dream Booth 3D</a></li>
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="zero123plus.html">Zero123++</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ProlificDreamer.html">ProlificDreamer: High-Fidelity and Diverse Text-to-3D Generation with Variational Score Distillation</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DreamGaussian.html">DreamGaussian</a></li>
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Coin3D.html">Coin3D</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Experiments</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="../experiments/js_exp.html">Synthetic Data with Stable Diffusion for Foliar Disease Classification</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../experiments/swjo_exp.html">Training DreamBooth on Naver Webtoon Face Dataset</a></li>
+</ul>
+
+    </div>
+</nav></div>
+    </div>
+  
+  
+  <div class="sidebar-primary-items__end sidebar-primary__section">
+  </div>
+  
+  <div id="rtd-footer-container"></div>
+
+
+      </div>
+      
+      <main id="main-content" class="bd-main" role="main">
+        
+        
+
+<div class="sbt-scroll-pixel-helper"></div>
+
+          <div class="bd-content">
+            <div class="bd-article-container">
+              
+              <div class="bd-header-article d-print-none">
+<div class="header-article-items header-article__inner">
+  
+    <div class="header-article-items__start">
+      
+        <div class="header-article-item"><label class="sidebar-toggle primary-toggle btn btn-sm" for="__primary" title="Toggle primary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
+  <span class="fa-solid fa-bars"></span>
+</label></div>
+      
+    </div>
+  
+  
+    <div class="header-article-items__end">
+      
+        <div class="header-article-item">
+
+<div class="article-header-buttons">
+
+
+
+
+
+<div class="dropdown dropdown-source-buttons">
+  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Source repositories">
+    <i class="fab fa-github"></i>
+  </button>
+  <ul class="dropdown-menu">
+      
+      
+      
+      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion" target="_blank"
+   class="btn btn-sm btn-source-repository-button dropdown-item"
+   title="Source repository"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fab fa-github"></i>
+  </span>
+<span class="btn__text-container">Repository</span>
+</a>
+</li>
+      
+      
+      
+      
+      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion/issues/new?title=Issue%20on%20page%20%2Fdocs/review/Synthetic_Data_from_Diffusion_Models_Improves_ImageNet_Classification.html&body=Your%20issue%20content%20here." target="_blank"
+   class="btn btn-sm btn-source-issues-button dropdown-item"
+   title="Open an issue"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-lightbulb"></i>
+  </span>
+<span class="btn__text-container">Open issue</span>
+</a>
+</li>
+      
+  </ul>
+</div>
+
+
+
+
+
+
+<div class="dropdown dropdown-download-buttons">
+  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Download this page">
+    <i class="fas fa-download"></i>
+  </button>
+  <ul class="dropdown-menu">
+      
+      
+      
+      <li><a href="../../_sources/docs/review/Synthetic_Data_from_Diffusion_Models_Improves_ImageNet_Classification.md" target="_blank"
+   class="btn btn-sm btn-download-source-button dropdown-item"
+   title="Download source file"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-file"></i>
+  </span>
+<span class="btn__text-container">.md</span>
+</a>
+</li>
+      
+      
+      
+      
+      <li>
+<button onclick="window.print()"
+  class="btn btn-sm btn-download-pdf-button dropdown-item"
+  title="Print to PDF"
+  data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-file-pdf"></i>
+  </span>
+<span class="btn__text-container">.pdf</span>
+</button>
+</li>
+      
+  </ul>
+</div>
+
+
+
+
+<button onclick="toggleFullScreen()"
+  class="btn btn-sm btn-fullscreen-button"
+  title="Fullscreen mode"
+  data-bs-placement="bottom" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-expand"></i>
+  </span>
+
+</button>
+
+
+
+<script>
+document.write(`
+  <button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light"></i>
+    <i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark"></i>
+    <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"></i>
+  </button>
+`);
+</script>
+
+
+<script>
+document.write(`
+  <button class="btn btn-sm pst-navbar-icon search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <i class="fa-solid fa-magnifying-glass fa-lg"></i>
+  </button>
+`);
+</script>
+<label class="sidebar-toggle secondary-toggle btn btn-sm" for="__secondary"title="Toggle secondary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <span class="fa-solid fa-list"></span>
+</label>
+</div></div>
+      
+    </div>
+  
+</div>
+</div>
+              
+              
+
+<div id="jb-print-docs-body" class="onlyprint">
+    <h1>Synthetic Data from Diffusion Models Improves ImageNet Classification</h1>
+    <!-- Table of contents -->
+    <div id="print-main-content">
+        <div id="jb-print-toc">
+            
+            <div>
+                <h2> Contents </h2>
+            </div>
+            <nav aria-label="Page">
+                <ul class="visible nav section-nav flex-column">
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#">Synthetic Data from Diffusion Models Improves ImageNet Classification</a></li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">1. Introduction</a></li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#related-work">2. Related Work</a></li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#background">3. Background</a></li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#generative-model-training-and-sampling">4. Generative Model Training and Sampling</a><ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#imagen-fine-tuning">4.1. Imagen Fine-tuning</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#sampling-parameters">4.2. Sampling Parameters</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#generation-protocol">4.3. Generation Protocol</a></li>
+</ul>
+</li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#result">5. Result</a><ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#sample-quality-fid-and-is">5-1. Sample Quality: FID and IS</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#classification-accuracy-score">5.2. Classification Accuracy Score</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#classification-accuracy-with-different-models">5.3. Classification Accuracy with Different Models</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#merging-real-and-synthetic-data-at-scale">5.4. Merging Real and Synthetic Data at Scale</a></li>
+</ul>
+</li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#conclusion">6. Conclusion</a></li>
+</ul>
+
+            </nav>
+        </div>
+    </div>
+</div>
+
+              
+                
+<div id="searchbox"></div>
+                <article class="bd-article">
+                  
+  <div class="admonition-information admonition">
+<p class="admonition-title">Information</p>
+<ul class="simple">
+<li><p><strong>Title:</strong> Synthetic Data from Diffusion Models Improves ImageNet Classification</p></li>
+<li><p><strong>Reference</strong></p>
+<ul>
+<li><p>Paper:  <a class="reference external" href="https://arxiv.org/abs/2303.03231">https://arxiv.org/abs/2304.08466</a></p></li>
+</ul>
+</li>
+<li><p><strong>Author:</strong> <a class="reference external" href="https://www.linkedin.com/in/jeonghwa-yoo-8403a716b">Jeonghwa Yoo</a></p></li>
+<li><p><strong>Last updated on Oct. 25, 2023</strong></p></li>
+</ul>
+</div>
+<section class="tex2jax_ignore mathjax_ignore" id="synthetic-data-from-diffusion-models-improves-imagenet-classification">
+<h1>Synthetic Data from Diffusion Models Improves ImageNet Classification<a class="headerlink" href="#synthetic-data-from-diffusion-models-improves-imagenet-classification" title="Permalink to this heading">#</a></h1>
+<p>이번에 리뷰할 논문은 구글 리서치 그룹에서 TMLR(Transactions on Machine Learning Research) 2023에 제출한 논문인 <a class="reference external" href="https://arxiv.org/abs/2304.08466">Synthetic Data from Diffusion Models Improves ImageNet Classification</a>입니다.</p>
+<p>생성 모델이 놀라운 속도로 발전하고 있는데요! 해당 논문에서는 생성 모델의 수준이 얼만큼 왔는지, 복잡한 이미지 데이터인 ImageNet 데이터에 대해서도 충분한 퀄리티의 데이터를 생성할 수 있는 정도가 되었는지, 그래서 이 생성된 데이터를 augment된 데이터로 사용할 수 있는 정도까지 왔는지에 대한 실험과 답을 제시합니다. 이 글의 목차는 논문 내용과 동일하게 구성하였습니다.</p>
+<aside>
+💡 핵심 요약 
+<ul class="simple">
+<li><p>Classification task에서 생성 모델을 데이터 augmentation으로 사용해 분류 성능을 개선 시킴</p></li>
+<li><p>Large-scale text-to-image diffusion 모델을 fine-tuning하여 FID와 Inception Score, Classification Accuracy Score에서 SOTA를 달성</p></li>
+<li><p>ImageNet에 대해 fine-tuning된 Imagen 모델을 사용함</p></li>
+<li><p>Diffusion으로 만든 합성 데이터를 학습에 사용하였을 경우 ResNet 및 Vision Transformer의 분류 성능이 크게 향상 됨</p></li>
+</ul>
+</aside>
+<p>본 논문에서는 기술적으로 엄청 새로운 내용은 없는데요! 다만 보통 사전학습된 text-to-image diffusion 모델을 사용하던 기존 방법들과는 달리 Imagen을 ImageNet에 대해 파인튜닝 했다는 것이 새롭습니다.</p>
+</section>
+<section class="tex2jax_ignore mathjax_ignore" id="introduction">
+<h1>1. Introduction<a class="headerlink" href="#introduction" title="Permalink to this heading">#</a></h1>
+<p>Diffusion 모델의 등장으로 생성 기술이 크게 발전되었습니다. 현재 생성 기술 수준이 data augmentation으로 사용될 수 있을 만큼의 자연스러운 이미지를 생성하는 것도 가능할까?에 대한 질문이 나오는 것은 당연하고, 본 논문에서는 이에 대한 답을 찾고자 했습니다. 먼저 이 질문에 대한 답을 이야기 하면 아래와 같습니다.</p>
+<ul>
+<li><p>결과 요약</p>
+<ul>
+<li><p>ImageNet에 대해 fine-tuning된 Imagen이 FID, Inception Score, CAS 성능에 대해 SOTA 성능을 달성 하였다.</p></li>
+<li><p>합성 데이터와 실제 데이터를 결합하여 사용하고, 합성 데이터의 양이 많고, 훈련 시간이 길수록 생성 데이터로 훈련된 모델의 성능이 더욱 향상되었다.</p>
+<figure class="align-default" id="id1">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/14.png"><img alt="improved_imagenet_classification_00" class="bg-primary mb-1" src="../../_images/14.png" style="width: 500px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 313 </span><span class="caption-text">위 그림: 합성 데이터로만 학습된 모델 분류 성능과 진짜 데이터로 학습된 모델의 분류 성능 비교 \
+아래 그림: 합성 및 진짜 데이터를 사용하였을 때의 분류 성능과 진짜 데이터로 학습된 모델의 분류 성능 비교</span><a class="headerlink" href="#id1" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</li>
+</ul>
+</li>
+</ul>
+<p>위의 그림에서 볼 수 있듯이 합성 데이터로만 학습한 모델의 정확도와 실제 데이터로 학습한 모델의 정확도를 비교했을 때, 다른 모델들에 비해 본 논문에서 제안한 모델이 훨씬 성능 차이가 적다는 것을 알 수 있습니다. 또한, 아래 그림을 보면, 실제 데이터와 생성된 데이터를 더해서 학습했을 경우에는 ResNet 기반 모델과 Transformer 기반 모델들에서 모두 실제 데이터를 사용했을 때보다 성능 향상이 있었습니다.</p>
+</section>
+<section class="tex2jax_ignore mathjax_ignore" id="related-work">
+<h1>2. Related Work<a class="headerlink" href="#related-work" title="Permalink to this heading">#</a></h1>
+<p>생성 모델을 이용해 data augmentation을 하려고 했던 기존 방법들에 대해 짧게 이야기 햐려고 합니다. 최근에는 large-scale text-to-image 모델들이 학습 데이터를 보강하는데 사용되기 시작했습니다.</p>
+<p>그 예로 “<a class="reference external" href="https://arxiv.org/abs/2210.07574">Is synthetic data from generative models ready for image recognition?</a>” 논문이 있습니다. 해당 논문에서는 GLIDE로 생성된 합성 데이터가 zero-shot과 few-shot 이미지 분류 성능을 향상 시켰으며, CIFAR-100 이미지에서 GLIDE를 fine-tuning하여 생성된 합성 데이터 세트가 CIFAR-100의 분류 정확도를 크게 향상 시켰다고 이야기 합니다.</p>
+<p>하지만, 위의 논문을 포함해서 기존의 논문들은 이런 생성 모델을 이용해서 data augmentation을 하여도 ImageNet validation set에 대해서는 성능을 향상 시키지 못했습니다. 또한, 기존에 논문들은 pretrained Stable Diffusion 모델을 사용하고, fine-tuning은 하지 않았습니다. 본 논문에서는 기존 논문들과는 다르게 Imagen을 ImageNet에 잘 동작하고 fine-tuning을 하였고, 그 결과 ImageNet validation set에 대해서도 성능을 향상 시킬 수 있었습니다.</p>
+</section>
+<section class="tex2jax_ignore mathjax_ignore" id="background">
+<h1>3. Background<a class="headerlink" href="#background" title="Permalink to this heading">#</a></h1>
+<p>본 논문에서는 Classification Accuracy Scores(CAS)라는 성능 지표를 소개합니다. FID와 Inception Score는 생성 모델의 성능 지표로 워낙 많이 쓰여서 설명은 생략하고, CAS에 대해서는 논문에서 써져 있는 내용으로 소개하겠습니다.</p>
+<p>CAS는 FID와 Inception Score와 마찬가지로 생성 모델이 만들어낸 샘플의 품질을 평가하는 방법으로 제안 된 성능 지표입니다. 이것은 ‘합성 데이터’로만 훈련된 ResNet-50 모델에 대한 ImageNet validation set에 대한 분류 성능을 의미합니다. 먼저, 생성 모델을 통해 ImageNet 데이터에 대한 합성 데이터를 만들어냅니다. 그리고 이 합성 데이터만을 이용하여 ResNet-50을 훈련 시키고, 그 훈련된 모델의 실제 ImageNet validation set에 대해 분류 성능이 CAS가 됩니다. 만약 합성 데이터가 실제 ImageNet과 비슷하다면 그 합성 데이터로 학습된 모델은 실제 ImageNet validation set에 대해 좋은 분류 성능을 보일 것이라는 가정을 이용한 성능 지표라고 이해하면 될 것 같습니다.</p>
+<p>저자에 의하면 그동안 생성모델의 CAS 성능은 좋지 않았다고 합니다. 생성된 샘플로만 훈련된 모델은 실제 데이터로 훈련된 모델보다 성능이 떨어졌고 (이는 당연해보입니다), 실제 데이터에 합성 데이터를 추가하면 성능이 떨어졌다고 합니다. 이는 아마도 생성된 샘플의 품질, 다양성 등이 원인일 수 있을 것이라고 합니다.</p>
+</section>
+<section class="tex2jax_ignore mathjax_ignore" id="generative-model-training-and-sampling">
+<h1>4. Generative Model Training and Sampling<a class="headerlink" href="#generative-model-training-and-sampling" title="Permalink to this heading">#</a></h1>
+<p>여기서는 실제로 저자들이 어떻게 text-to-image diffusion 모델을 학습하고, 샘플링을 하였는지에 대한 설명을 합니다.</p>
+<p>먼저 저자들은 text-to-image diffusion 모델로는 Imagen을 사용하였습니다. Text-to-image 모델을 어떻게 ImageNet 클래스와 alignment 할 지에 대한 고민이 필요했다고 합니다. 처음에는 CLIP에서 사용한 방법과 유사하게 짧은 텍스트를 ImageNet 클래스의 텍스트 프롬프트로 사용했다고 하였는데 이 경우에 성능이 좋지 않았다고 합니다. 이는 Imagen에서 high guidance weight를 사용하여 샘플의 다양성이 저하 되면서 생기는 현상일 수 있다고 합니다. 따라서, 저자들은 프롬프트를 한 두단어 클래스 이름으로 수정하고, 모델의 weight와 sampling parameter를 fine-tuning 했다고 합니다.</p>
+<figure class="align-default" id="id2">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/21.png"><img alt="improved_imagenet_classification_01" class="bg-primary mb-1" src="../../_images/21.png" style="width: 500px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 314 </span><span class="caption-text">Figure 2</span><a class="headerlink" href="#id2" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>왼쪽 그림이 fine-tuning이 적용된 Imagen이 만들어낸 이미지고, 오른쪽이 fine-tuning이 적용되지 않은 Imagen입니다. 아래에서 두 번째 클래스인 Schipperke를 보면, 이것은 스키퍼키라는 개 품종을 의미하는데 fine-tuning이 적용되지 않은 Imagen의 경우는 꽃과 같은 전혀 엉뚱한 이미지를 만들고 있는 것을 볼 수 있습니다.</p>
+<section id="imagen-fine-tuning">
+<h2>4.1. Imagen Fine-tuning<a class="headerlink" href="#imagen-fine-tuning" title="Permalink to this heading">#</a></h2>
+<p>이 부분은 Imagen을 어떻게 fine-tuning 했는지를 설명하는 부분입니다.</p>
+<p>먼저 Imagen 구조는 아래와 같습니다.</p>
+<figure class="align-default" id="id3">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/31.png"><img alt="improved_imagenet_classification_02" class="bg-primary mb-1" src="../../_images/31.png" style="width: 200px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 315 </span><span class="caption-text">Imagen 구조</span><a class="headerlink" href="#id3" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>본 논문에서는 위의 Imagen 구조에서 빨간 원으로 표시된 부분에 대해서만 fine-tuning 했습니다. Frozen Text Encoder의 경우는 원래 Imagen에서도 학습을 하지 않는 부분이라 마찬가지로 학습을 하지 않았고, 1024x1024 Image를 출력으로 하는 마지막 Super-Resolution Diffusion Model의 경우 ImageNet에 고해상도의 데이터가 적어서 fine-tuning을 하지 않았다고 합니다.</p>
+<p>64x64 모델의 경우는 210K step 정도 학습하였고, optimizer의 경우는 Imagen에서 사용하였던 Adafactor optimizer를 사용하였다고 합니다. 64x64 → 256x256 super-resolution 모델의 경우는 490K step 정도 하였고, Adam optimizer를 사용하였다고 합니다.</p>
+<p>최적의 모델 선택의 기준으로는 기본 Imagen sampler와 ImageNet-1K validation set에 대해 10K개의 샘플들에 대해 FID score를 계산했을 때 가장 좋은 성능의 모델을 선택했다고 합니다.</p>
+</section>
+<section id="sampling-parameters">
+<h2>4.2. Sampling Parameters<a class="headerlink" href="#sampling-parameters" title="Permalink to this heading">#</a></h2>
+<p>이 부분은 본 논문에서 sampling parameter는 어떻게 정했는지를 설명하는 부분입니다. 먼저, Text-conditioned diffusion model 샘플링의 품질, 다양성, 속도는 디퓨전 스텝 수, noise condition augmentation, guidance weight for classifier-free guidance, log-variance mixing coefficient 등에 대해 큰 영향을 받는다고 합니다.</p>
+<p>각각에 대해 간단하게 설명하면 아래와 같습니다.</p>
+<ul>
+<li><p>Noise condition augmentation:</p>
+<p>이미지 생성 과정에서 확률적인 요소를 도입하여 생성된 이미지의 다양성을 증가시키는 기술. 일반적으로, 모델은 잠재 공간의 랜덤한 노이즈를 입력으로 받아 다양한 이미지를 생성하게 됨. 이것은 생성된 이미지가 조금씩 다른 것으로 보이게 만들며, 더 다양한 결과를 얻을 수 있게 함  (자세한 내용은 “<a class="reference external" href="https://arxiv.org/abs/2205.11487">Photorealistic Text-to-Image Diffusion Models with Deep Language Understanding</a>”를 참고해주세요)
+<br></p>
+</li>
+<li><p>Guidance weight for classifier-free guidance:</p>
+<p>“Classifier-free guidance”는 이미지를 생성하는 데 분류기나 특정 지표 없이 외부 정보를 사용한다는 것.  “Guidance weights”는 외부 정보를 모델에 어떻게 반영할지를 조절하는 가중치를 의미할 수 있으며, 이러한 가중치를 조절하여 모델이 원하는 특성이나 스타일을 가진 이미지를 더 잘 생성하도록 함 (자세한 내용은 “<a class="reference external" href="https://arxiv.org/abs/2207.12598">Classifier-free diffusion guidance</a>”를 참고해주세요)
+<br></p>
+</li>
+<li><p>Log-variance mixing coefficient:</p>
+<p>이미지 생성 모델에서 사용되는 확률 분포의 변동성을 조절하는 데 사용되는 계수를 나타냄. 이미지 생성 모델은 일반적으로 확률 분포를 사용하여 이미지를 생성하며, 이 확률 분포의 평균과 분산을 조절함으로써 생성된 이미지의 다양성과 품질을 조절할 수 있음. 로그-분산 혼합 계수는 이러한 분산을 조절하는 데 사용되며, 높은 값은 더 큰 분산을 의미하고, 작은 값은 더 작은 분산을 의미함. 이를 통해 이미지 생성의 다양성을 조절할 수 있음 (자세한 내용은 “<a class="reference external" href="https://arxiv.org/abs/2102.09672">Improved denoising diffusion probabilistic models</a>”를 참고해주세요)<br />
+<br></p>
+</li>
+</ul>
+<p>64x64 기반 모델의 샘플링 parameter 설정법에 대해 설명하겠습니다. 해당 모델의 샘플링 이미지 샘플링의 전반적인 특징과 다양성의 영향을 주게 됩니다. 1차 sweep으로 DDPM 샘플러를 이용하여 FID-50K에 대해 가장 최적의 하이퍼파라미터를 찾습니다. Sweep의 사용한 각 하이퍼파라미터의 범위는 아래와 같습니다.</p>
+<ul class="simple">
+<li><p>Guidance weight: [1.0, 1.25, 1.5, 1.75, 2.0, 5.0]</p></li>
+<li><p>Log-variance: [0.0, 0.2, 0.3, 0.4, 1.0]</p></li>
+<li><p>Denoise step: [128, 500, 1000]</p></li>
+</ul>
+<p>1차 sweep 결과 최적의 FID는 log-variance는 0이고 denoising step은 1000이었을 때라고 합니다.</p>
+<p>1차 sweep이 끝난 후에는 guidance weight에 대해서만 sweep을 합니다. 이 때에는 1.2M 이미지를 사용하고, 각 guidacne weight에 대해 FID, IS, CAS를 측정했다고 합니다.</p>
+<p>각 샘플링 하이퍼파라미터에 대한 실험 결과는 아래와 같습니다.</p>
+<figure class="align-default" id="id4">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/41.png"><img alt="improved_imagenet_classification_03" class="bg-primary mb-1" src="../../_images/41.png" style="width: 800px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 316 </span><span class="caption-text">Figure 3</span><a class="headerlink" href="#id4" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>왼쪽 그림이 1차 sweep에 대한 결과고, 가운데와 오른쪽 그림이 2차 sweep에 대한 결과로 guidance weight에 따른 FID, IS, CAS를 나타낸 결과입니다.</p>
+<p>이제 다음으로는 64x64 → 256x256 super-resolution 모델에 대해 하이퍼파라미터를 선택하는 부분에 대해 설명하겠습니다. 하이퍼파라미터의 range는 아래와 같습니다.
+- Guidance weight: [1.0, 2.0, 5.0, 10.0, 30.0]
+- Noise conditioning augmentation: [0.0, 0.1, 0.2, 0.3, 0.4]
+- Log-variance mixing coefficients: [0,1, 0.3]
+- Denose steps: [129, 500, 1000]</p>
+<figure class="align-default" id="id5">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/51.png"><img alt="improved_imagenet_classification_04" class="bg-primary mb-1" src="../../_images/51.png" style="width: 500px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 317 </span><span class="caption-text">Figure 4</span><a class="headerlink" href="#id5" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>위 그래프는 guidance weight를 1.0으로 설정하고 noise condition 파라미터를 변경했을 때 FID와 CAS의 그래프를 나타낸 그래프입니다. CAS 같은 경우는 logvar coeff가 0.3일 때 전반적으로 좋은 성능을 보였으며, FID 같은 경우도 logvar coeff가 0.3일 때 전반적으로 좋은 성능을 보인 것을 알 수 있습니다.</p>
+<br>
+<p>샘플링 하이퍼파라미터의 결과를 분석해보자면, 전반적으로 FID와 CAS는 높은 상관관계가 있으며 (Figure 4 참고), guidance weight가 작을수록 CAS는 높아지지만, Inception Score에는 부정적인 영향을 주며 (Figure 3 참고), noise augmentation이 0일 때 FID가 가장 작은 것을 볼 수 있습니다. (Figure 4 참고)</p>
+<br>
+<p>이런 하이퍼파라미터 설정 방법을 기준으로 본 논문에서 최종적으로 설정한 값은 아래와 같다고 합니다.</p>
+<ul class="simple">
+<li><p>Guidance weight</p>
+<ul>
+<li><p>베이스 모델: 1.25</p></li>
+<li><p>나머지 resolution: 1.0</p></li>
+</ul>
+</li>
+<li><p>Log-variance mixing coefficients (sampler, steps)</p>
+<ul>
+<li><p>64x64 샘플: 0.0 (DDPM, 1000 denoising steps)</p></li>
+<li><p>256x256 샘플: 0.1  (DDPM, 1000 denoising steps)</p></li>
+<li><p>1024x1024 샘플: 0.0 (DDIM, 32 denoising steps)</p></li>
+</ul>
+</li>
+</ul>
+</section>
+<section id="generation-protocol">
+<h2>4.3. Generation Protocol<a class="headerlink" href="#generation-protocol" title="Permalink to this heading">#</a></h2>
+<p>이 부분은 실제로 데이터 합성은 어떤 프로토콜을 따랐는지에 대해 설명하는 부분입니다. 본 논문에서는 원본 데이터셋의 class balance를 유지하며 데이터를 합성했으며, 합성된 결과 총 훈련 데이터셋의 규모는 1배인 1.2M 에서 10배인 12M 규모의 데이터셋의 범위를 가지도록 데이터를 합성했다고 합니다.</p>
+</section>
+</section>
+<section class="tex2jax_ignore mathjax_ignore" id="result">
+<h1>5. Result<a class="headerlink" href="#result" title="Permalink to this heading">#</a></h1>
+<section id="sample-quality-fid-and-is">
+<h2>5-1. Sample Quality: FID and IS<a class="headerlink" href="#sample-quality-fid-and-is" title="Permalink to this heading">#</a></h2>
+<p>먼저, 합성된 데이터의 품질을 합성 태스크에서 많이 사용되는 지표인 FID와 IS의 관점으로 봅니다.</p>
+<figure class="align-default" id="id6">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/61.png"><img alt="improved_imagenet_classification_05" class="bg-primary mb-1" src="../../_images/61.png" style="width: 500px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 318 </span><span class="caption-text">Table 1</span><a class="headerlink" href="#id6" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>위 표에서 볼 수 있듯이, 본 논문의 파인 튜닝된 Imagen이 ImageNet에 대한 데이터 생성에 대해 다른 베이스모델들 보다 FID와 IS가 뛰어난 것을 알 수 있습니다. 이는 64x64 resolution과 256x256 resolution에서 모두 해당되었습니다.</p>
+</section>
+<section id="classification-accuracy-score">
+<h2>5.2. Classification Accuracy Score<a class="headerlink" href="#classification-accuracy-score" title="Permalink to this heading">#</a></h2>
+<p>이 부분은 CAS 성능 지표를 통해 본 논문에서 제안한 모델의 데이터 합성 능력을 확인하는 부분입니다.</p>
+<figure class="align-default" id="id7">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/71.png"><img alt="improved_imagenet_classification_06" class="bg-primary mb-1" src="../../_images/71.png" style="width: 800px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 319 </span><span class="caption-text">CAS score</span><a class="headerlink" href="#id7" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>Figure 5에서 파란색 부분은 실제 학습 데이터로 학습된 모델의 분류 성능이고, 빨간색 부분은 합성된 데이터로 학습된 모델의 분류 성능입니다. 왼쪽 그림은 베이스라인 중 하나인 CDM 모델의 성능을 나타낸 그림이며, 가운데는 본 논문에서 256x256 resolution 모델의 성능, 오른쪽은 본 논문에서 제안한 1024x1024 resolution 모델의 성능을 나타낸 것입니다. 빨간색 부분이 파란색 부분보다 전반적으로 위쪽에 위치하면 모델의 성능이 좋다고 해석할 수 있습니다. 이 그림을 통해 본 논문에서 제안한 모델들이 베이스라인보다 좋은 성능을 보인다는 것을 알 수 있습니다.</p>
+<p>Table 2에서도 마찬가지로 본 논문 모델이 다른 베이스 모델보다 성능이 뛰어난 것을 알 수 있습니다. 여기서 주목할 만한 점은 CAS를 평가하기 위한 ResNet50이 256x256으로 입력 데이터를 다운샘플링 함에도 1024x1024 샘플에 대한 결과가 훨씬 좋다는 것을 볼 수 있습니다. (Ours 256x256 resolution보다 Ours 1024x1024 resolution의 CAS 성능이 월등히 높음)</p>
+</section>
+<section id="classification-accuracy-with-different-models">
+<h2>5.3. Classification Accuracy with Different Models<a class="headerlink" href="#classification-accuracy-with-different-models" title="Permalink to this heading">#</a></h2>
+<p>이 부분은 합성된 데이터를 여러 종류의 모델로 학습 시켰을 때, 각 모델의 분류 성능을 확인하는 부분입니다. CAS와 비슷하지만 CAS에서는 ResNet50 모델로 분류 성능을 확인했지만 여기서는 ResNet50 이외에 모델로도 분류 성능을 본다는 차이점이 있습니다.</p>
+<figure class="align-default" id="id8">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/81.png"><img alt="improved_imagenet_classification_06" class="bg-primary mb-1" src="../../_images/81.png" style="width: 800px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 320 </span><span class="caption-text">Table 3</span><a class="headerlink" href="#id8" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>위 표에서 확인할 수 있듯이, 다양한 모델에 대해서 분류 정확도를 살펴본 결과 생성된 데이터로만 학습될 경우에는 실제 데이터로 학습할 때 보다 성능이 낮았지만, 실제 데이터와 생성된 데이터를 합쳐서 학습할 경우 실제 데이터만 사용했을 때보다 성능이 증가한 것을 볼 수 있습니다. 이것은 onvNet기반 모델과 transformer 기반 모델에 대해서 동일한 양상을 보였습니다.</p>
+</section>
+<section id="merging-real-and-synthetic-data-at-scale">
+<h2>5.4. Merging Real and Synthetic Data at Scale<a class="headerlink" href="#merging-real-and-synthetic-data-at-scale" title="Permalink to this heading">#</a></h2>
+<p>이 부분은 합성 데이터 규모에 따른 ResNet-50의 성능을 분석한 부분입니다.</p>
+<figure class="align-default" id="id9">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/91.png"><img alt="improved_imagenet_classification_06" class="bg-primary mb-1" src="../../_images/91.png" style="width: 500px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 321 </span><span class="caption-text">Figure 6</span><a class="headerlink" href="#id9" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>64x64 이미지의 경우 생성되는 데이터의 양이 증가함에 따라 성능이 지속적으로 향상되는 것을 볼 수 있습니다.</p>
+<figure class="align-default" id="id10">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/102.png"><img alt="improved_imagenet_classification_06" class="bg-primary mb-1" src="../../_images/102.png" style="width: 500px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 322 </span><span class="caption-text">Table 4</span><a class="headerlink" href="#id10" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>하지만 다른 resolution에 대해서는 다른 양상을 보였습니다. 학습 데이터가 4.8M 규모가 될 때까지는 합성 데이터를 추가하는 것이 분류 성능에 좋았으나, 합성 데이터를 더 늘려 그 이상의 규모가 되었을 때는 오히려 성능이 떨어지는 것을 볼 수 있었습니다.</p>
+</section>
+</section>
+<section class="tex2jax_ignore mathjax_ignore" id="conclusion">
+<h1>6. Conclusion<a class="headerlink" href="#conclusion" title="Permalink to this heading">#</a></h1>
+<p>본 논문에 결론 부분을 보자면, 이 논문에서는 Large-sclae text-to-image diffusion 모델을 파인튜닝하여 FID, Inception Score, CAS 성능 지표에 대해서 SOTA를 달성했습니다.</p>
+<ul class="simple">
+<li><p>FID: 1.76 at 256x256</p></li>
+<li><p>Inception Score: 239 at 256x256</p></li>
+<li><p>CAS: 64.96 for 256x256, 69.24 for 1024x1024</p></li>
+</ul>
+<p>또한 그렇게 생성 데이터를 이용하여 ResNet과 Transformer 기반 모델들에 대한 ImageNet classification accuracy를 향상 시켰습니다.</p>
+<p>실험 결과에 대해서 생각해볼만한 거리들이 있었는데 그 중 하나는 CAS 성능 측정할 때 ResNet50이 입력을 256x256으로 다운샘플링 함에도 불구하고 256x256보다 1024x1024의 모델의 CAS가 좋은 것이 있었습니다. 이는 다운샘플링을 하더라도 다운샘플링 전 원본 데이터 resolution이 클 때 더 많은 정보를 담는다는 것을 의미하는 것일 수 있습니다. 또한,  64x64 데이터에서 합성 데이터의 양이 증가함에 따라 분류 정확도가 지속적으로 증가했지만 고해상도 데이터에서는 그렇지 않았던 것을 통해 고해상도에 이미지에 대해서는 보다 정교한 훈련 방법이 필요할 수 있음을 시사하고 있습니다.</p>
+<hr class="docutils" />
+<p>이렇게 Synthetic Data from Diffusion Models Improves ImageNet Classification 논문의 리뷰를 마치겠습니다. 개인적으로 느낀 점은 실제 산업에서는 data shortage나 class imbalance 문제가 대부분 발생하는데 본 논문이 그 해결법 중 하나가 될 수 있을 것 같다는 생각이 들었습니다. 다만 Frozen Text Encoder는 추가적으로 파인튜닝이 되지 않기 때문에 특정 산업에서만 쓰이는 특정 텍스트가 들어왔을 때는 잘 동작할 수 있을까 하는 의문이 들었습니다. 또한 합성하고자 하는 데이터셋에 맞게 파인튜닝을 해야하는 점이 꽤나 불편할 것 같아서 파인튜닝이 모델 성능에 얼마나 큰 의미를 갖는지, 파인튜닝을 하지 않았을 때의 CAS 성능도 논문에 있었으면 좋았을 것 같다는 개인적인 생각이 들었습니다. (물론 Figure 2를 보고 어느 정도 결과를 유추해볼 순 있지만요!)</p>
+</section>
+
+    <script type="text/x-thebe-config">
+    {
+        requestKernel: true,
+        binderOptions: {
+            repo: "binder-examples/jupyter-stacks-datascience",
+            ref: "master",
+        },
+        codeMirrorConfig: {
+            theme: "abcdef",
+            mode: "python"
+        },
+        kernelOptions: {
+            name: "python3",
+            path: "./docs\review"
+        },
+        predefinedOutput: true
+    }
+    </script>
+    <script>kernelName = 'python3'</script>
+
+                </article>
+              
+
+              
+              
+              
+              
+                <footer class="prev-next-footer d-print-none">
+                  
+<div class="prev-next-area">
+    <a class="left-prev"
+       href="CM3leon.html"
+       title="previous page">
+      <i class="fa-solid fa-angle-left"></i>
+      <div class="prev-next-info">
+        <p class="prev-next-subtitle">previous</p>
+        <p class="prev-next-title">CM3leon</p>
+      </div>
+    </a>
+    <a class="right-next"
+       href="GLIDE.html"
+       title="next page">
+      <div class="prev-next-info">
+        <p class="prev-next-subtitle">next</p>
+        <p class="prev-next-title">GLIDE</p>
+      </div>
+      <i class="fa-solid fa-angle-right"></i>
+    </a>
+</div>
+                </footer>
+              
+            </div>
+            
+            
+              
+                <div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
+
+
+  <div class="sidebar-secondary-item">
+  <div class="page-toc tocsection onthispage">
+    <i class="fa-solid fa-list"></i> Contents
+  </div>
+  <nav class="bd-toc-nav page-toc">
+    <ul class="visible nav section-nav flex-column">
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#">Synthetic Data from Diffusion Models Improves ImageNet Classification</a></li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">1. Introduction</a></li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#related-work">2. Related Work</a></li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#background">3. Background</a></li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#generative-model-training-and-sampling">4. Generative Model Training and Sampling</a><ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#imagen-fine-tuning">4.1. Imagen Fine-tuning</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#sampling-parameters">4.2. Sampling Parameters</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#generation-protocol">4.3. Generation Protocol</a></li>
+</ul>
+</li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#result">5. Result</a><ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#sample-quality-fid-and-is">5-1. Sample Quality: FID and IS</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#classification-accuracy-score">5.2. Classification Accuracy Score</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#classification-accuracy-with-different-models">5.3. Classification Accuracy with Different Models</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#merging-real-and-synthetic-data-at-scale">5.4. Merging Real and Synthetic Data at Scale</a></li>
+</ul>
+</li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#conclusion">6. Conclusion</a></li>
+</ul>
+
+  </nav></div>
+
+</div></div>
+              
+            
+          </div>
+          <footer class="bd-footer-content">
+            
+<div class="bd-footer-content__inner container">
+  
+  <div class="footer-item">
+    
+<p class="component-author">
+By PseudoLab
+</p>
+
+  </div>
+  
+  <div class="footer-item">
+    
+
+  <p class="copyright">
+    
+      © Copyright 2022.
+      <br/>
+    
+  </p>
+
+  </div>
+  
+  <div class="footer-item">
+    
+  </div>
+  
+  <div class="footer-item">
+    
+  </div>
+  
+</div>
+          </footer>
+        
+
+      </main>
+    </div>
+  </div>
+  
+  <!-- Scripts loaded after <body> so the DOM is not blocked -->
+  <script src="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b"></script>
+<script src="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b"></script>
+
+  <footer class="bd-footer">
+  </footer>
+  </body>
 </html>
\ No newline at end of file
diff --git a/docs/review/Textual_Inversion.html b/docs/review/Textual_Inversion.html
old mode 100644
new mode 100755
index b8bd6184..4419ff73
--- a/docs/review/Textual_Inversion.html
+++ b/docs/review/Textual_Inversion.html
@@ -1,944 +1,964 @@
-
-
-<!DOCTYPE html>
-
-
-<html lang="en" data-content_root="" >
-
-  <head>
-    <meta charset="utf-8" />
-    <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.18.1: http://docutils.sourceforge.net/" />
-
-    <title>Textual Inversion &#8212; Text-to-Image Generation-feat-Diffusion</title>
-  
-  
-  
-  <script data-cfasync="false">
-    document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
-    document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
-  </script>
-  
-  <!-- Loaded before other Sphinx assets -->
-  <link href="../../_static/styles/theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-<link href="../../_static/styles/bootstrap.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-<link href="../../_static/styles/pydata-sphinx-theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-
-  
-  <link href="../../_static/vendor/fontawesome/6.5.2/css/all.min.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-  <link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.woff2" />
-<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.woff2" />
-<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.woff2" />
-
-    <link rel="stylesheet" type="text/css" href="../../_static/pygments.css" />
-    <link rel="stylesheet" href="../../_static/styles/sphinx-book-theme.css?digest=14f4ca6b54d191a8c7657f6c759bf11a5fb86285" type="text/css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/togglebutton.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/mystnb.4510f1fc1dee50b3e5859aac5469c37c29e427902b24a333a5f9fcb2f0b3ac41.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/sphinx-thebe.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/design-style.4045f2051d55cab465a707391d5b2007.min.css" />
-  
-  <!-- Pre-loaded scripts that we'll load fully later -->
-  <link rel="preload" as="script" href="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b" />
-<link rel="preload" as="script" href="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b" />
-  <script src="../../_static/vendor/fontawesome/6.5.2/js/all.min.js?digest=dfe6caa3a7d634c4db9b"></script>
-
-    <script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
-    <script src="../../_static/jquery.js"></script>
-    <script src="../../_static/underscore.js"></script>
-    <script src="../../_static/_sphinx_javascript_frameworks_compat.js"></script>
-    <script src="../../_static/doctools.js"></script>
-    <script src="../../_static/clipboard.min.js"></script>
-    <script src="../../_static/copybutton.js"></script>
-    <script src="../../_static/scripts/sphinx-book-theme.js?digest=5a5c038af52cf7bc1a1ec88eea08e6366ee68824"></script>
-    <script>let toggleHintShow = 'Click to show';</script>
-    <script>let toggleHintHide = 'Click to hide';</script>
-    <script>let toggleOpenOnPrint = 'true';</script>
-    <script src="../../_static/togglebutton.js"></script>
-    <script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
-    <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
-    <script src="../../_static/design-tabs.js"></script>
-    <script>const THEBE_JS_URL = "https://unpkg.com/thebe@0.8.2/lib/index.js"
-const thebe_selector = ".thebe,.cell"
-const thebe_selector_input = "pre"
-const thebe_selector_output = ".output, .cell_output"
-</script>
-    <script async="async" src="../../_static/sphinx-thebe.js"></script>
-    <script>DOCUMENTATION_OPTIONS.pagename = 'docs/review/Textual_Inversion';</script>
-    <link rel="shortcut icon" href="../../_static/PseudoLab_logo.png"/>
-    <link rel="index" title="Index" href="../../genindex.html" />
-    <link rel="search" title="Search" href="../../search.html" />
-    <link rel="next" title="Custom Diffusion" href="CustomDiffusion.html" />
-    <link rel="prev" title="Introduction" href="Latent_Diffusion_Model.html" />
-  <meta name="viewport" content="width=device-width, initial-scale=1"/>
-  <meta name="docsearch:language" content="en"/>
-  </head>
-  
-  
-  <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
-
-  
-  
-  <div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
-  
-  <div id="pst-scroll-pixel-helper"></div>
-  
-  <button type="button" class="btn rounded-pill" id="pst-back-to-top">
-    <i class="fa-solid fa-arrow-up"></i>Back to top</button>
-
-  
-  <input type="checkbox"
-          class="sidebar-toggle"
-          id="pst-primary-sidebar-checkbox"/>
-  <label class="overlay overlay-primary" for="pst-primary-sidebar-checkbox"></label>
-  
-  <input type="checkbox"
-          class="sidebar-toggle"
-          id="pst-secondary-sidebar-checkbox"/>
-  <label class="overlay overlay-secondary" for="pst-secondary-sidebar-checkbox"></label>
-  
-  <div class="search-button__wrapper">
-    <div class="search-button__overlay"></div>
-    <div class="search-button__search-container">
-<form class="bd-search d-flex align-items-center"
-      action="../../search.html"
-      method="get">
-  <i class="fa-solid fa-magnifying-glass"></i>
-  <input type="search"
-         class="form-control"
-         name="q"
-         id="search-input"
-         placeholder="Search this book..."
-         aria-label="Search this book..."
-         autocomplete="off"
-         autocorrect="off"
-         autocapitalize="off"
-         spellcheck="false"/>
-  <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
-</form></div>
-  </div>
-
-  <div class="pst-async-banner-revealer d-none">
-  <aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
-</div>
-
-  
-    <header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
-    </header>
-  
-
-  <div class="bd-container">
-    <div class="bd-container__inner bd-page-width">
-      
-      
-      
-      <div class="bd-sidebar-primary bd-sidebar">
-        
-
-  
-  <div class="sidebar-header-items sidebar-primary__section">
-    
-    
-    
-    
-  </div>
-  
-    <div class="sidebar-primary-items__start sidebar-primary__section">
-        <div class="sidebar-primary-item">
-
-  
-    
-  
-
-<a class="navbar-brand logo" href="../../intro.html">
-  
-  
-  
-  
-  
-    
-    
-      
-    
-    
-    <img src="../../_static/PseudoLab_logo.png" class="logo__image only-light" alt="Text-to-Image Generation-feat-Diffusion - Home"/>
-    <script>document.write(`<img src="../../_static/PseudoLab_logo.png" class="logo__image only-dark" alt="Text-to-Image Generation-feat-Diffusion - Home"/>`);</script>
-  
-  
-</a></div>
-        <div class="sidebar-primary-item"><nav class="bd-links" id="bd-docs-nav" aria-label="Main">
-    <div class="bd-toc-item navbar-nav active">
-        
-        <ul class="nav bd-sidenav bd-sidenav__home-link">
-            <li class="toctree-l1">
-                <a class="reference internal" href="../../intro.html">
-                    Welcome to PseudoDiffusers!!
-                </a>
-            </li>
-        </ul>
-        <p aria-level="2" class="caption" role="heading"><span class="caption-text">Preliminary Works</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="vae.html">VAE</a></li>
-<li class="toctree-l1"><a class="reference internal" href="gan.html">GAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DDPM.html">DDPM</a></li>
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="DDIM.html">DDIM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="A_Study_on_the_Evaluation_of_Generative_Models.html">A Study on the Evaluation of Generative Models</a></li>
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Image Generation</span></p>
-<ul class="current nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="cycleGAN.html">CycleGAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="StyleGAN.html">StyleGAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="diffusion_beats_GANs.html">Diffusion Models Beat GANs on Image Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="dalle.html">DALL-E</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DALLE2.html">DALL-E 2</a></li>
-<li class="toctree-l1"><a class="reference internal" href="dreambooth.html">DreamBooth</a></li>
-<li class="toctree-l1"><a class="reference internal" href="ControlNet.html">ControlNet</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Latent_Diffusion_Model.html">Introduction</a></li>
-
-
-
-<li class="toctree-l1 current active"><a class="current reference internal" href="#">Textual Inversion</a></li>
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="CustomDiffusion.html">Custom Diffusion</a></li>
-<li class="toctree-l1"><a class="reference internal" href="LoRA.html">LoRA</a></li>
-
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="I-DDPM.html">I-DDPM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="StyO.html">StyO</a></li>
-<li class="toctree-l1"><a class="reference internal" href="imagen.html">Imagen</a></li>
-<li class="toctree-l1"><a class="reference internal" href="imagen_editor.html">Imagen Editor</a></li>
-<li class="toctree-l1"><a class="reference internal" href="SDEdit.html">SDEdit</a></li>
-<li class="toctree-l1"><a class="reference internal" href="SDXL.html">SDXL</a></li>
-<li class="toctree-l1"><a class="reference internal" href="t2i_adapter.html">T2I-Adapter</a></li>
-<li class="toctree-l1"><a class="reference internal" href="IP_Adapter.html">IP-Adapter</a></li>
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="HyperDreamBooth.html">HyperDreamBooth</a></li>
-<li class="toctree-l1"><a class="reference internal" href="CM3leon.html">CM3leon</a></li>
-
-<li class="toctree-l1"><a class="reference internal" href="Synthetic_Data_from_Diffusion_Models_Improves_ImageNet_Classification.html">Synthetic Data from Diffusion Models Improves ImageNet Classification</a></li>
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="GLIDE.html">GLIDE</a></li>
-<li class="toctree-l1"><a class="reference internal" href="BBDM.html">BBDM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Your_Diffusion_Model_is_Secretly_a_Zero_Shot_Classifier.html">Your Diffusion Model is Secretly a Zero-Shot Classifier</a></li>
-<li class="toctree-l1"><a class="reference internal" href="progressive_distillation.html">Progressive Distillation for Fast Sampling of Diffusion Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="ConceptLab.html">ConceptLab</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Diffusion_models_already_have_a_Semantic_Latent_Space.html">Diffusion Models already have a Semantic Latent Space</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Muse.html">Muse</a></li>
-
-
-<li class="toctree-l1"><a class="reference internal" href="GIGAGAN.html">Scaling up GANs for Text-to-Image Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="consistency_models.html">Consistency Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="latent_consistency_models.html">Latent Consistency Models</a></li>
-
-<li class="toctree-l1"><a class="reference internal" href="LLM_grounded_Diffusion.html">LLM Grounded Diffusion</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DiT.html">DiT</a></li>
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="one-step-image-translation.html">One-Step Image Translation with Text-to-Image Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="LCM-LoRA.html">LCM-LoRA: A Universal Stable-Diffusion Acceleration Module</a></li>
-
-
-<li class="toctree-l1"><a class="reference internal" href="MimicBrush.html">MimicBrush: Zero-shot Image Editing with Reference Imitation</a></li>
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Video Generation</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="Make_A_Video.html">Make A Video</a></li>
-<li class="toctree-l1"><a class="reference internal" href="VideoLDM.html">VideoLDM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="AnimateDiff.html">AnimateDiff</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Animate_Anyone.html">Animate Anyone</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreaMoving.html">DreaMoving</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreamPose.html">DreamPose: Fashion Image-to-Video Synthesis via Stable Diffusion</a></li>
-
-
-
-
-
-
-
-
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">3D Generation</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="NeRF.html">NeRF : Representing Scenes as Neural Radiance Fields for View Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="3DGS.html">3D Gaussian Splatting for Real-Time Radiance Field Rendering</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Point_E.html">Point-E: A System for Generating 3D Point Clouds from Complex Prompts (Arxiv 2022)</a></li>
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="Shap-E.html">Shap-E</a></li>
-
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="DreamFusion.html"><strong>DreamFusion</strong></a></li>
-<li class="toctree-l1"><a class="reference internal" href="magic-3d.html">Magic3D</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreamBooth3D.html">Dream Booth 3D</a></li>
-
-
-
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Experiments</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="../experiments/js_exp.html">Synthetic Data with Stable Diffusion for Foliar Disease Classification</a></li>
-<li class="toctree-l1"><a class="reference internal" href="../experiments/swjo_exp.html">Training DreamBooth on Naver Webtoon Face Dataset</a></li>
-</ul>
-
-    </div>
-</nav></div>
-    </div>
-  
-  
-  <div class="sidebar-primary-items__end sidebar-primary__section">
-  </div>
-  
-  <div id="rtd-footer-container"></div>
-
-
-      </div>
-      
-      <main id="main-content" class="bd-main" role="main">
-        
-        
-
-<div class="sbt-scroll-pixel-helper"></div>
-
-          <div class="bd-content">
-            <div class="bd-article-container">
-              
-              <div class="bd-header-article d-print-none">
-<div class="header-article-items header-article__inner">
-  
-    <div class="header-article-items__start">
-      
-        <div class="header-article-item"><label class="sidebar-toggle primary-toggle btn btn-sm" for="__primary" title="Toggle primary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
-  <span class="fa-solid fa-bars"></span>
-</label></div>
-      
-    </div>
-  
-  
-    <div class="header-article-items__end">
-      
-        <div class="header-article-item">
-
-<div class="article-header-buttons">
-
-
-
-
-
-<div class="dropdown dropdown-source-buttons">
-  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Source repositories">
-    <i class="fab fa-github"></i>
-  </button>
-  <ul class="dropdown-menu">
-      
-      
-      
-      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion" target="_blank"
-   class="btn btn-sm btn-source-repository-button dropdown-item"
-   title="Source repository"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fab fa-github"></i>
-  </span>
-<span class="btn__text-container">Repository</span>
-</a>
-</li>
-      
-      
-      
-      
-      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion/issues/new?title=Issue%20on%20page%20%2Fdocs/review/Textual_Inversion.html&body=Your%20issue%20content%20here." target="_blank"
-   class="btn btn-sm btn-source-issues-button dropdown-item"
-   title="Open an issue"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-lightbulb"></i>
-  </span>
-<span class="btn__text-container">Open issue</span>
-</a>
-</li>
-      
-  </ul>
-</div>
-
-
-
-
-
-
-<div class="dropdown dropdown-download-buttons">
-  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Download this page">
-    <i class="fas fa-download"></i>
-  </button>
-  <ul class="dropdown-menu">
-      
-      
-      
-      <li><a href="../../_sources/docs/review/Textual_Inversion.md" target="_blank"
-   class="btn btn-sm btn-download-source-button dropdown-item"
-   title="Download source file"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-file"></i>
-  </span>
-<span class="btn__text-container">.md</span>
-</a>
-</li>
-      
-      
-      
-      
-      <li>
-<button onclick="window.print()"
-  class="btn btn-sm btn-download-pdf-button dropdown-item"
-  title="Print to PDF"
-  data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-file-pdf"></i>
-  </span>
-<span class="btn__text-container">.pdf</span>
-</button>
-</li>
-      
-  </ul>
-</div>
-
-
-
-
-<button onclick="toggleFullScreen()"
-  class="btn btn-sm btn-fullscreen-button"
-  title="Fullscreen mode"
-  data-bs-placement="bottom" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-expand"></i>
-  </span>
-
-</button>
-
-
-
-<script>
-document.write(`
-  <button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light"></i>
-    <i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark"></i>
-    <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"></i>
-  </button>
-`);
-</script>
-
-
-<script>
-document.write(`
-  <button class="btn btn-sm pst-navbar-icon search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <i class="fa-solid fa-magnifying-glass fa-lg"></i>
-  </button>
-`);
-</script>
-<label class="sidebar-toggle secondary-toggle btn btn-sm" for="__secondary"title="Toggle secondary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <span class="fa-solid fa-list"></span>
-</label>
-</div></div>
-      
-    </div>
-  
-</div>
-</div>
-              
-              
-
-<div id="jb-print-docs-body" class="onlyprint">
-    <h1>Textual Inversion</h1>
-    <!-- Table of contents -->
-    <div id="print-main-content">
-        <div id="jb-print-toc">
-            
-            <div>
-                <h2> Contents </h2>
-            </div>
-            <nav aria-label="Page">
-                <ul class="visible nav section-nav flex-column">
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#">Textual Inversion</a></li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#abstract">Abstract</a></li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">Introduction</a></li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#related-work">Related work</a></li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#method">Method</a><ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#cf-gan-inversion">cf) GAN Inversion(이해 못함)</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#ldm-latent-diffusion-model">LDM(Latent Diffusion Model)</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#text-embeddings">Text Embeddings</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#id1">Textual Inversion</a></li>
-</ul>
-</li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#id2">성능평가</a><ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#dall-e-2">DALL:E-2와 비교</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#text-guided-synthesis">Text guided synthesis</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#pseudo-word">pseudo word 두 개 사용</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#bias-reduction">Bias Reduction</a></li>
-</ul>
-</li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#id5">정량평가</a><ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#setups">평가 setups</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#id6">결과</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#id7">주목할 점</a></li>
-</ul>
-</li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#id8">사용자평가</a></li>
-</ul>
-</li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#limitation">Limitation</a></li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#id9">마무리</a></li>
-</ul>
-
-            </nav>
-        </div>
-    </div>
-</div>
-
-              
-                
-<div id="searchbox"></div>
-                <article class="bd-article">
-                  
-  <div class="admonition-information admonition">
-<p class="admonition-title">Information</p>
-<ul class="simple">
-<li><p><strong>Title:</strong> An Image is Worth One Word: Personalizing Text-to-Image Generation using Textual Inversion</p></li>
-<li><p><strong>Reference</strong></p>
-<ul>
-<li><p>Paper:  <a class="reference external" href="https://arxiv.org/pdf/2208.01618.pdf">https://arxiv.org/pdf/2208.01618.pdf</a></p></li>
-<li><p>Code: <a class="reference external" href="https://textual-inversion.github.io/">https://textual-inversion.github.io/</a></p></li>
-<li><p>Review: <a class="reference external" href="https://devocean.sk.com/blog/techBoardDetail.do?page=&amp;query=&amp;ID=164320&amp;boardType=writer&amp;searchData=sam56903&amp;subIndex=&amp;idList=&amp;pnwriterID=sam56903">https://devocean.sk.com/blog/techBoardDetail.do?page=&amp;query=&amp;ID=164320&amp;boardType=writer&amp;searchData=sam56903&amp;subIndex=&amp;idList=&amp;pnwriterID=sam56903</a></p></li>
-</ul>
-</li>
-<li><p><strong>Author:</strong> Kwang-Su Mun</p></li>
-<li><p><strong>Last updated on May. 31. 2023</strong></p></li>
-</ul>
-</div>
-<section class="tex2jax_ignore mathjax_ignore" id="textual-inversion">
-<h1>Textual Inversion<a class="headerlink" href="#textual-inversion" title="Permalink to this heading">#</a></h1>
-</section>
-<section class="tex2jax_ignore mathjax_ignore" id="abstract">
-<h1>Abstract<a class="headerlink" href="#abstract" title="Permalink to this heading">#</a></h1>
-<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">이미지</span> <span class="mi">3</span><span class="o">-</span><span class="mi">5</span><span class="n">장으로</span> <span class="n">새로운</span> <span class="n">개념</span><span class="p">(</span><span class="n">또는</span> <span class="n">콘셉트</span><span class="p">,</span> <span class="n">concept</span><span class="p">)</span><span class="n">을</span> <span class="n">학습해</span> <span class="n">관련된</span> <span class="n">이미지를</span> <span class="n">뽑아내는</span> <span class="n">모델</span>
-</pre></div>
-</div>
-<p>text-to-image model은 자연어를 통한 creation에 전례없는 자유도를 주었다. 하지만, 특정한 contept를 생성하고, 그것의 생김새를 바꾸거나, 새로운 역할이 주어지거나 참신한 장면이 그려지는건 아직 불분명하다. 즉, ‘이것을 그려줘’라고 말할 때, ‘이것’에 대한 설명을 prompt로 어떻게 할 것이냐는 물음에는 아직 한계가 있는 것 같다. 이를 해결하기 위해, 저자는 image를 3-5개만으로 사물이나 스타일과 같은 concept, 즉 새로운 ‘단어’를 고정된 text-to-image model의 embedding space에서 표현하는 방법을 제안한다. 이러한 ‘단어’는 자연어 문장에 녹아들어가, 직관적인 방법으로 ‘개인화된’ 이미지 생성을 이끌어 낸다. 특히, 독자적이면서 다양한 콘셉트를 capture하기 위해서는 single word embedding이 충분하다는 것을 알게 되었다.</p>
-<figure class="align-default" id="textual-inverison-example">
-<a class="bg-primary mb-1 reference internal image-reference" href="https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FbIVL03%2Fbtsg8b6ssL1%2FsZQKABrsLJG58fJuvqd5MK%2Fimg.png"><img alt="textual inverison example" class="bg-primary mb-1" src="https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FbIVL03%2Fbtsg8b6ssL1%2FsZQKABrsLJG58fJuvqd5MK%2Fimg.png" style="width: 800px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 152 </span><span class="caption-text">textual inversion example \  (source: <a class="reference external" href="https://arxiv.org/abs/2208.01618">https://arxiv.org/abs/2208.01618</a>)</span><a class="headerlink" href="#textual-inverison-example" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</section>
-<section class="tex2jax_ignore mathjax_ignore" id="introduction">
-<h1>Introduction<a class="headerlink" href="#introduction" title="Permalink to this heading">#</a></h1>
-<p>대규모 학습된 모델에 새로운 개념을 도입하는 일은 어려운 일이다. 각 새로운 개념에 대해 확장된 데이터 셋을 사용해 모델을 retraining하는 것은 엄청나게 비용이 많이 들고, 몇 가지 예제에 해서 fine-tuning은 보통 치명적인 망각을 초래한다. 따라서 저자들은 사전 훈련된 텍스트-이미지 모델의 텍스트 임베딩 공간에서 새로운 단어를 찾아 이러한 문제를 극복할 것을 제안.</p>
-<figure class="align-default" id="architecture">
-<a class="bg-primary mb-1 reference internal image-reference" href="https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2Fd0jLjp%2Fbtsg9DuSNQj%2FkjfhEfeTTA212mS5htrb71%2Fimg.png"><img alt="architecture" class="bg-primary mb-1" src="https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2Fd0jLjp%2Fbtsg9DuSNQj%2FkjfhEfeTTA212mS5htrb71%2Fimg.png" style="width: 800px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 153 </span><span class="caption-text">architecture \  (source: <a class="reference external" href="https://arxiv.org/abs/2208.01618">https://arxiv.org/abs/2208.01618</a>)</span><a class="headerlink" href="#architecture" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>위 figure에서, “A photo of S*”은 tokenizer를 지나면서 각각 ‘508’, ‘701’, ‘73’, ‘*’과 같은 형태의 token set으로 변환되고, 이후 각 토큰은 자체 임베딩 벡터로 변환되고 이러한 벡터는 다운스트림 모델을 통해 제공됨.</p>
-<p>input image의 concept를 나타내는, 새로운 pseudo-word인 S<em>를 이용해 새로운 embedding vector(v</em>)를 나타낸다. 이후 이 vector는 다른 단어와 같이 처리되며 생성 모델에 대한 새로운 text query를 구성하는데 사용될 수 있음. 따라서 이 query는 generator에 들어가서 사용자가 의도한바와 일치하도록 새로운 image를 생성하도록 하는 것이 전반적인 그림이라고 볼 수 있음.</p>
-<p>여기서 중요한 것은, 이 과정에서 생성모델(여기서는 LDM이 쓰임)은 untouched되어 있다는 것(즉, 따로 수정이 들어가지 않는듯함). 그렇게 함으로써 새로운 task에 대한 fine-tuning을 할 때 일반적으로 손실되는 text에 대한 이해도나 generalization을 유지할 수 있음.</p>
-<p>이러한 ‘유사단어’를 찾기 위해, 이 작업을 하나로 inversion시켜 프레임화 한다. 그리고 고정된, pre-trained text-to-image model을 사용하고, 3-5개의 concept를 나타내는 small image set이 주어진다. 저자들은 ‘a photo of S*’와 같은 형태의 문장을 설정해 주어진 작은 dataset에서 이미지를 재구성 하는 것으로 이어지는 single-word embedding을 찾는 것을 목표로 함.</p>
-<p>이 모델의 목표는 <strong>새로운 concept인 입력 이미지를 나타내는 S*를 표현하는 방법을 찾는 것</strong>이며, 이러한 task를 **’textual inversion’**이라고 한다고 함.</p>
-<div class="highlight-default notranslate"><div class="highlight"><pre><span></span>This embedding is found through an optimization process, which we refer to as “Textual Inversion”.
-</pre></div>
-</div>
-</section>
-<section class="tex2jax_ignore mathjax_ignore" id="related-work">
-<h1>Related work<a class="headerlink" href="#related-work" title="Permalink to this heading">#</a></h1>
-<ul class="simple">
-<li><p>text-guided synthesis</p></li>
-<li><p>GAN inversion</p></li>
-<li><p>Diffusion-based inversion</p></li>
-<li><p>personalization</p>
-<ul>
-<li><p>PALAVRA: image를 S*으로 바꾸는데 사용되는 기술로 추정.</p></li>
-<li><p>pre-trained CLIP model을 이용해서 personalized object의 복구 및 segmentation을 수행. PALAVRA는 특정 개체를 참조하는 CLIP의 textual embedding space에서 pseudo-word를 식별함. 그 다음 검색을 위해 이미지를 설명하거나 어떤 장면에서 특정 개체를 분할하기 위해 사용됨. figure 5에서 보듯이, 그들의 접근 방식은 새로운 장면에서 그럴듯한 재구성 또는 합성에 필요한 세부 정보를 캡처하지 못함.</p></li>
-</ul>
-</li>
-</ul>
-</section>
-<section class="tex2jax_ignore mathjax_ignore" id="method">
-<h1>Method<a class="headerlink" href="#method" title="Permalink to this heading">#</a></h1>
-<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">Our</span> <span class="n">goal</span> <span class="ow">is</span> <span class="n">to</span> <span class="n">enable</span> <span class="n">language</span><span class="o">-</span><span class="n">guided</span> <span class="n">generation</span> <span class="n">of</span> <span class="n">new</span><span class="p">,</span> <span class="n">user</span><span class="o">-</span><span class="n">specified</span> <span class="n">concepts</span><span class="o">.</span>
-</pre></div>
-</div>
-<ul class="simple">
-<li><p>의역) 목표: 유저가 의도한 것에 초첨을 맞춘, 새로운 concept를 embedding으로 잘 가이드해서 괜찮은 성과물을 내는 것.</p></li>
-</ul>
-<p>따라서 pre-trained text-to-image model의 중간 단계의 representation으로 이러한 새로운 ‘concepts’을 인코딩하는데 초점을 맞춤. 일반적인 text-to-image model에서는 image의 representation에 대한 후보군을 text encoder의 word-embedding 단계에서 찾는다. 그러나 이러한 접근 방식은 이미지에 대한 in-depth visual understanding을 필요로 하지 않는다(생성자가 이미지에 대해서 시각적인 이해? 없이 그린다.) 따라서 여기서는 GAN inversion에서 영감을 받은 visual reconstruction objective를 제시.</p>
-<section id="cf-gan-inversion">
-<h2>cf) GAN Inversion(이해 못함)<a class="headerlink" href="#cf-gan-inversion" title="Permalink to this heading">#</a></h2>
-<p>출처) - <a class="reference external" href="https://hyoseok-personality.tistory.com/entry/GAN-Inversion">https://hyoseok-personality.tistory.com/entry/GAN-Inversion</a></p>
-<figure class="align-default" id="gan-inversion">
-<a class="bg-primary mb-1 reference internal image-reference" href="https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FpomZT%2Fbtsg9EHfVqc%2F4a4K6BmSPZV5ncVQXtfCHk%2Fimg.png"><img alt="GAN inversion" class="bg-primary mb-1" src="https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FpomZT%2Fbtsg9EHfVqc%2F4a4K6BmSPZV5ncVQXtfCHk%2Fimg.png" style="width: 800px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 154 </span><span class="caption-text">GAN inversion \  (source: <a class="reference external" href="https://hyoseok-personality.tistory.com/entry/GAN-Inversion">https://hyoseok-personality.tistory.com/entry/GAN-Inversion</a>)</span><a class="headerlink" href="#gan-inversion" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p>입력 이미지와 유사한 결과 이미지를 얻을 수 있도록 하는 latent vector를 찾는 과정. GAN이 학습되면 random latent vector로부터 이미지를 생성해낸다. GAN inversion은 이의 역과정으로써 GAN의 latent space로 input image를 inverting시켜 latent vector를 알아가는 과정.</p></li>
-</ul>
-</section>
-<section id="ldm-latent-diffusion-model">
-<h2>LDM(Latent Diffusion Model)<a class="headerlink" href="#ldm-latent-diffusion-model" title="Permalink to this heading">#</a></h2>
-<p>논문에서는 생성모델로서 LDM(Latent Diffusion Model)을 사용함. 이전에 말했듯이, LDM은 하나도 건들지 않음.</p>
-<figure class="align-default" id="ldm-objective-function">
-<a class="bg-primary mb-1 reference internal image-reference" href="https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2Fdw5kRl%2FbtshgoiBpt4%2Fz72rzU3tvL8kLFbtBXwWVk%2Fimg.png"><img alt="LDM objective function" class="bg-primary mb-1" src="https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2Fdw5kRl%2FbtshgoiBpt4%2Fz72rzU3tvL8kLFbtBXwWVk%2Fimg.png" style="width: 800px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 155 </span><span class="caption-text">LDM objective function \  (source: <a class="reference external" href="https://arxiv.org/pdf/2208.01618.pdf">https://arxiv.org/pdf/2208.01618.pdf</a>)</span><a class="headerlink" href="#ldm-objective-function" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</section>
-<section id="text-embeddings">
-<h2>Text Embeddings<a class="headerlink" href="#text-embeddings" title="Permalink to this heading">#</a></h2>
-<figure class="align-default" id="text-embedding">
-<a class="bg-primary mb-1 reference internal image-reference" href="https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2Fv0EWv%2Fbtsg9e9ZI4u%2FzfXraAXg1vpKpxemZLtVPk%2Fimg.png"><img alt="Text-Embedding" class="bg-primary mb-1" src="https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2Fv0EWv%2Fbtsg9e9ZI4u%2FzfXraAXg1vpKpxemZLtVPk%2Fimg.png" style="width: 800px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 156 </span><span class="caption-text">Text-Embedding \  (source: <a class="reference external" href="https://arxiv.org/pdf/2208.01618.pdf">https://arxiv.org/pdf/2208.01618.pdf</a>)</span><a class="headerlink" href="#text-embedding" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p>입력된 문자열의 각 단어, 하위 단어는 tokenizer를 통과하며, 미리 정의된 dictionary에서 index token으로 변환함. 각 토큰을 통해 찾을 수 있는 고유한 임베딩 벡터에 연결됨.</p></li>
-<li><p>index에 의한 embedding vector는 일반적으로 text encoder인 C_Θ의 일부로 학습된다. 이러한 space를 inversion target으로 삼았음. 새로운 개념을 나타내기 위해 자리표시자 문자열인 S<em>를 새롭게 지정함. 이 과정에서 PALAVRA를 사용했을 것으로 추정함. 임베딩 process에 개입해서 tokenize된 문자열과 관련된 vector를 새로운 학습된 embedding V</em>로 대체하여 본질적으로 어휘(pseudo-word)에 개념을 주입함. 이렇게 함으로써 다른 단어와 마찬가지로 concept를 포함하는 새로운 문장을 만들 수 있었음.</p></li>
-</ul>
-</section>
-<section id="id1">
-<h2>Textual Inversion<a class="headerlink" href="#id1" title="Permalink to this heading">#</a></h2>
-<p>새로운 embedding을 찾기 위해 작은 규모의 dataset(3-5장)을 사용해 다양한 배경 또는 포즈와 같은 여러 설정에 걸쳐 목표 concept을 묘사함. 이러한 작은 dataset에서 LDM loss를 최소화하는 과정을 통해 V를 최적화함. 생성 조건을 고정하기 위해 CLIP ImageNet 템플릿에서 파생된 중립 컨텍스트 텍스트를 무작위로 샘플링한다. 여기에는 “A photo of S*”, “A rendition of S*” 등의 형식 프롬프트가 포함된다.(아마 원본 이미지와 최대한 비슷하게 만들어서 원본과 비교하기 위한 목적이 아닐까 싶음) 최적화 목표식은 다음과 같음.</p>
-<figure class="align-default" id="textual-inversion-objective-function">
-<a class="bg-primary mb-1 reference internal image-reference" href="https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FqY4nD%2FbtshiHP4k6T%2FvZrYjfSUAE2XePwon4rTIk%2Fimg.png"><img alt="textual inversion objective function" class="bg-primary mb-1" src="https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FqY4nD%2FbtshiHP4k6T%2FvZrYjfSUAE2XePwon4rTIk%2Fimg.png" style="width: 800px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 157 </span><span class="caption-text">textual inversion objective function \  (source: <a class="reference external" href="https://arxiv.org/pdf/2208.01618.pdf">https://arxiv.org/pdf/2208.01618.pdf</a>)</span><a class="headerlink" href="#textual-inversion-objective-function" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>LDM loss함수와 매우 유사함. 여기서 CΘ와 eΘ는 고정. 해당 따라서 학습된 embedding이 개념에 미세한 시각적 detail을 포착할 수 있을것으로 기대함.</p>
-</section>
-</section>
-<section class="tex2jax_ignore mathjax_ignore" id="id2">
-<h1>성능평가<a class="headerlink" href="#id2" title="Permalink to this heading">#</a></h1>
-<section id="dall-e-2">
-<h2>DALL:E-2와 비교<a class="headerlink" href="#dall-e-2" title="Permalink to this heading">#</a></h2>
-<figure class="align-default" id="compare-with-dalle-2">
-<a class="bg-primary mb-1 reference internal image-reference" href="https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FbkJvkY%2Fbtsg95YTKmc%2FX6lxVI5tL30ZP5gKEmoAv1%2Fimg.png"><img alt="compare with DALLE-2" class="bg-primary mb-1" src="https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FbkJvkY%2Fbtsg95YTKmc%2FX6lxVI5tL30ZP5gKEmoAv1%2Fimg.png" style="width: 800px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 158 </span><span class="caption-text">compare with DALLE-2 \  (source: <a class="reference external" href="https://arxiv.org/pdf/2208.01618.pdf">https://arxiv.org/pdf/2208.01618.pdf</a>)</span><a class="headerlink" href="#compare-with-dalle-2" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p>input image에 대한 디테일을 더 잘 포착하는 모습을 볼 수 있다.</p></li>
-</ul>
-</section>
-<section id="text-guided-synthesis">
-<h2>Text guided synthesis<a class="headerlink" href="#text-guided-synthesis" title="Permalink to this heading">#</a></h2>
-<figure class="align-default" id="id3">
-<a class="bg-primary mb-1 reference internal image-reference" href="https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FbRLYR1%2Fbtsg95SasXe%2FaUe9K6FVb2yC9sZqoK5eSk%2Fimg.png"><img alt="text guided synthesis" class="bg-primary mb-1" src="https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FbRLYR1%2Fbtsg95SasXe%2FaUe9K6FVb2yC9sZqoK5eSk%2Fimg.png" style="width: 800px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 159 </span><span class="caption-text">text guided synthesis - 입력 이미지의 스타일과 유사하면서도 text guide에 맞춰서 잘 진행함.
-\  (source: <a class="reference external" href="https://arxiv.org/pdf/2208.01618.pdf">https://arxiv.org/pdf/2208.01618.pdf</a>)</span><a class="headerlink" href="#id3" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p>Textual Inversion 모델은 새로운 주제에 대해 더 정확하게 개념을 보존하고, 새로운 임베딩과 나머지 캡션들에 대해서도 모두 추론이 가능했음.</p></li>
-</ul>
-<figure class="align-default" id="style-transfer">
-<a class="bg-primary mb-1 reference internal image-reference" href="https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FbLlXhf%2Fbtsg8cEna6l%2FgiZvyYgqCaPj6X5wKTIzZk%2Fimg.png"><img alt="style transfer" class="bg-primary mb-1" src="https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FbLlXhf%2Fbtsg8cEna6l%2FgiZvyYgqCaPj6X5wKTIzZk%2Fimg.png" style="width: 800px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 160 </span><span class="caption-text">style transfer \  (source: <a class="reference external" href="https://arxiv.org/pdf/2208.01618.pdf">https://arxiv.org/pdf/2208.01618.pdf</a>)</span><a class="headerlink" href="#style-transfer" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p>적은 데이터셋으로도 style을 보존하면서 표현한 그림</p></li>
-</ul>
-</section>
-<section id="pseudo-word">
-<h2>pseudo word 두 개 사용<a class="headerlink" href="#pseudo-word" title="Permalink to this heading">#</a></h2>
-<figure class="align-default" id="two-pseudo-word">
-<a class="bg-primary mb-1 reference internal image-reference" href="https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FJtPJY%2Fbtsg9OinOOb%2FMLn4k48Hk7CP7vGv1yAaYk%2Fimg.png"><img alt="two pseudo word" class="bg-primary mb-1" src="https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FJtPJY%2Fbtsg9OinOOb%2FMLn4k48Hk7CP7vGv1yAaYk%2Fimg.png" style="width: 800px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 161 </span><span class="caption-text">two pseudo word \  (source: <a class="reference external" href="https://arxiv.org/pdf/2208.01618.pdf">https://arxiv.org/pdf/2208.01618.pdf</a>)</span><a class="headerlink" href="#two-pseudo-word" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</section>
-<section id="bias-reduction">
-<h2>Bias Reduction<a class="headerlink" href="#bias-reduction" title="Permalink to this heading">#</a></h2>
-<figure class="align-default" id="id4">
-<a class="bg-primary mb-1 reference internal image-reference" href="https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FPakAR%2Fbtsg9OvWWW9%2FJZkKl1AFTKJgEKJsA2rb2K%2Fimg.png"><img alt="Bias reduction" class="bg-primary mb-1" src="https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FPakAR%2Fbtsg9OvWWW9%2FJZkKl1AFTKJgEKJsA2rb2K%2Fimg.png" style="width: 800px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 162 </span><span class="caption-text">Bias reduction \  (source: <a class="reference external" href="https://arxiv.org/pdf/2208.01618.pdf">https://arxiv.org/pdf/2208.01618.pdf</a>)</span><a class="headerlink" href="#id4" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>기존 모델의 결과를 보면, 위 사진에서와 같이 ‘의사’라는 단어를 사용하면, 보통 백인 남성 의사를 잘 그려냈음. 이는 기존 데이터셋에서 남성 의사 사진 데이터가 많았음을 보여준다. 보다 작은 imageset에서 새로운 embedding을 학습함으로써 이러한 bias를 줄일 수 있음을 보여준다(즉, 성별 및 인종적 다양성에 대한 인식을 높일 수 있음).</p>
-</section>
-</section>
-<section class="tex2jax_ignore mathjax_ignore" id="id5">
-<h1>정량평가<a class="headerlink" href="#id5" title="Permalink to this heading">#</a></h1>
-<p>latent space embedding의 품질을 분석.</p>
-<ol class="arabic simple">
-<li><p>reconstruction(y축?):  target concept를 얼마나 잘 복제하는지. 특정 이미지가 아닌 개념에 대한 변형을 생성하므로 의미적 CLIP 공간 거리를 고려하여 유사성을 측정.(이미지에 자체가 아닌, 이미지가 가진 ‘개념’에 대해 latent space를 생성하므로)  각 컨셉에 대해 “A photo of S*”라는 prompt를 사용해 64개의 이미지를 생성.</p></li>
-<li><p>editability(x축?): text prompt를 사용해 개념을 수정하는 능력을 평가. 다양한 난이도와 다양한 설정의 prompt를 사용해 일련의 이미지를 생성.</p></li>
-</ol>
-<p>각 prompt 별로, 50 DDIM step을 사용해 64개의 샘플을 만들고, CLIP-space embedding을 평가, textual prompt의 CLIP-space embedding에서 cosine similarity를 계산. 높은 스코어는 더 높은 editing capability와 prompt의 신뢰도를 보여줌.</p>
-<section id="setups">
-<h2>평가 setups<a class="headerlink" href="#setups" title="Permalink to this heading">#</a></h2>
-<p>GAN inversion에서 영감을 받은 실험 환경 설정에 따름. 생략</p>
-</section>
-<section id="id6">
-<h2>결과<a class="headerlink" href="#id6" title="Permalink to this heading">#</a></h2>
-<figure class="align-default" id="quantative-evaluation1">
-<a class="bg-primary mb-1 reference internal image-reference" href="https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FcxKm1h%2Fbtshb63SIYh%2FNflBiQZTV5V0yh0I3EYpq1%2Fimg.png"><img alt="quantative evaluation1" class="bg-primary mb-1" src="https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FcxKm1h%2Fbtshb63SIYh%2FNflBiQZTV5V0yh0I3EYpq1%2Fimg.png" style="width: 800px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 163 </span><span class="caption-text">quantative evaluation1 \  (source: <a class="reference external" href="https://arxiv.org/pdf/2208.01618.pdf">https://arxiv.org/pdf/2208.01618.pdf</a>)</span><a class="headerlink" href="#quantative-evaluation1" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<section id="id7">
-<h3>주목할 점<a class="headerlink" href="#id7" title="Permalink to this heading">#</a></h3>
-<ol class="arabic simple">
-<li><p>많은 baseline과 우리 방법의 semantic reconstruction quality는 단순히 training set에서 임의의 이미지를 샘플링하는 것과 비슷함(== 원본 이미지와 생성된 이미지가 큰 차이가 없었다?)</p></li>
-<li><p>single-word method는 비슷한 reconstruction quality를 달성하고, 모든 multi-word baseline에서 상당히 향상된 editablity을 달성. 이러한 점은 text embedding space의 인상적인 유연성을 나타내고, 단일 pseudo word만 사용하면서 높은 정확도로 새로운 개념을 캡처하는데 도움이 될 수 있음을 보여줌.</p></li>
-<li><p>baseline이 distortion-editability tradeoff 곡선의 outline을 그리며 실제 단어 분포에 더 가까운 embedding이 더 쉽게 수정될 수 있음. 그러나 target의 세부 정보를 캡처하지는 못함. 반대로, 단어 분포에서 멀리 벗어나면 editability가 크게 감소하는 대신 향상된 reconstruction이 가능해짐. 특히 single embedding model은 단순히 learning rate를 변경해 이 곡선을 따라 이동할 수 있으므로 사용자에게 이 tradeoff에 대한 어느 정도의 제어를 제공함.</p></li>
-<li><p>concept에 대한 human description을 사용하면 유사성을 포착하지 못하면서도, editability가 감소함.</p></li>
-</ol>
-</section>
-</section>
-<section id="id8">
-<h2>사용자평가<a class="headerlink" href="#id8" title="Permalink to this heading">#</a></h2>
-<figure class="align-default" id="human-test">
-<a class="bg-primary mb-1 reference internal image-reference" href="https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2Frx5Ei%2Fbtsg9MSpakC%2FFsPkgODR3zTGIBnvq6RXik%2Fimg.png"><img alt="human test" class="bg-primary mb-1" src="https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2Frx5Ei%2Fbtsg9MSpakC%2FFsPkgODR3zTGIBnvq6RXik%2Fimg.png" style="width: 800px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 164 </span><span class="caption-text">human test \  (source: <a class="reference external" href="https://arxiv.org/pdf/2208.01618.pdf">https://arxiv.org/pdf/2208.01618.pdf</a>)</span><a class="headerlink" href="#human-test" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>두 개의 설문지:</p>
-<ol class="arabic simple">
-<li><p>사용자는 concept의 training set에서 4개의 이미지를 제공받았고, 이미지와의 유사성에 따라 5개의 모델에서 생성된 결과의 순위를 매김.</p></li>
-<li><p>이미지 context를 설명하는 텍스트를 제공받았고, 텍스트와 생성된 이미지의 유사성에 따라 순위를 매김.</p></li>
-</ol>
-<p>각 질문별로 600개씩 총 1,200개의 응답을 수집.</p>
-</section>
-</section>
-<section class="tex2jax_ignore mathjax_ignore" id="limitation">
-<h1>Limitation<a class="headerlink" href="#limitation" title="Permalink to this heading">#</a></h1>
-<ol class="arabic simple">
-<li><p>이미지 생성에 더 많은 자유도를 제공하지만, concept의 의미론적인 본질을 파악하거나, 정확한 shape를 학습하는데 한계.</p></li>
-<li><p>최적화가 오래 걸린다. 하나의 concept를 학습하는데 약 2시간이 소요됨.</p></li>
-</ol>
-</section>
-<section class="tex2jax_ignore mathjax_ignore" id="id9">
-<h1>마무리<a class="headerlink" href="#id9" title="Permalink to this heading">#</a></h1>
-<p>: 새로운 설정과 장면에서 특정 concept의 이미지를 생성하기 위해 text-to-image model를 활용하는 개인화되며, language-guided generation을 소개함. 여기서 사용한 ‘text inversion’은 pretrained text-to-image 모델의 text embedding space 내에서 concept를 새로운 pseudo word로 inverse하여 작동함. 이러한 pseudo-word는 간단한 자연어 설명을 사용해 새로운 장면에 삽입할 수 있으므로 간단하고 직관적인 수정이 가능함.</p>
-<p>어떤 의미에서 이 방법은 사용자가 편집하기 쉽도록 텍스트 기반 interpace를 사용하지만 자연 언어의 한계에 접근할 때 시각적 단서를 제공하는 등 multi modal 정보를 활용할 수 있도록 함.</p>
-<p>이러한 접근 방식은 공개적으로 사용가능한 가장 큰 text-to-image model인 LDM을 통해 구현됨. 그러나 접근 방식에 아키텍처 세부 정보에 의존하지 않음. 따라서 textual inversion은 추가적인 대규모 text-to-image model에 쉽게 적용할 수 있다고 생각. 거기에서 text-to-image alignment, shape preseravation, image generation fidelity가 더 향상될 수 있음.</p>
-</section>
-
-    <script type="text/x-thebe-config">
-    {
-        requestKernel: true,
-        binderOptions: {
-            repo: "binder-examples/jupyter-stacks-datascience",
-            ref: "master",
-        },
-        codeMirrorConfig: {
-            theme: "abcdef",
-            mode: "python"
-        },
-        kernelOptions: {
-            name: "python3",
-            path: "./docs/review"
-        },
-        predefinedOutput: true
-    }
-    </script>
-    <script>kernelName = 'python3'</script>
-
-                </article>
-              
-
-              
-              
-              
-              
-                <footer class="prev-next-footer d-print-none">
-                  
-<div class="prev-next-area">
-    <a class="left-prev"
-       href="Latent_Diffusion_Model.html"
-       title="previous page">
-      <i class="fa-solid fa-angle-left"></i>
-      <div class="prev-next-info">
-        <p class="prev-next-subtitle">previous</p>
-        <p class="prev-next-title">Introduction</p>
-      </div>
-    </a>
-    <a class="right-next"
-       href="CustomDiffusion.html"
-       title="next page">
-      <div class="prev-next-info">
-        <p class="prev-next-subtitle">next</p>
-        <p class="prev-next-title">Custom Diffusion</p>
-      </div>
-      <i class="fa-solid fa-angle-right"></i>
-    </a>
-</div>
-                </footer>
-              
-            </div>
-            
-            
-              
-                <div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
-
-
-  <div class="sidebar-secondary-item">
-  <div class="page-toc tocsection onthispage">
-    <i class="fa-solid fa-list"></i> Contents
-  </div>
-  <nav class="bd-toc-nav page-toc">
-    <ul class="visible nav section-nav flex-column">
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#">Textual Inversion</a></li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#abstract">Abstract</a></li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">Introduction</a></li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#related-work">Related work</a></li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#method">Method</a><ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#cf-gan-inversion">cf) GAN Inversion(이해 못함)</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#ldm-latent-diffusion-model">LDM(Latent Diffusion Model)</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#text-embeddings">Text Embeddings</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#id1">Textual Inversion</a></li>
-</ul>
-</li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#id2">성능평가</a><ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#dall-e-2">DALL:E-2와 비교</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#text-guided-synthesis">Text guided synthesis</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#pseudo-word">pseudo word 두 개 사용</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#bias-reduction">Bias Reduction</a></li>
-</ul>
-</li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#id5">정량평가</a><ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#setups">평가 setups</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#id6">결과</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#id7">주목할 점</a></li>
-</ul>
-</li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#id8">사용자평가</a></li>
-</ul>
-</li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#limitation">Limitation</a></li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#id9">마무리</a></li>
-</ul>
-
-  </nav></div>
-
-</div></div>
-              
-            
-          </div>
-          <footer class="bd-footer-content">
-            
-<div class="bd-footer-content__inner container">
-  
-  <div class="footer-item">
-    
-<p class="component-author">
-By PseudoLab
-</p>
-
-  </div>
-  
-  <div class="footer-item">
-    
-
-  <p class="copyright">
-    
-      © Copyright 2022.
-      <br/>
-    
-  </p>
-
-  </div>
-  
-  <div class="footer-item">
-    
-  </div>
-  
-  <div class="footer-item">
-    
-  </div>
-  
-</div>
-          </footer>
-        
-
-      </main>
-    </div>
-  </div>
-  
-  <!-- Scripts loaded after <body> so the DOM is not blocked -->
-  <script src="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b"></script>
-<script src="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b"></script>
-
-  <footer class="bd-footer">
-  </footer>
-  </body>
+
+
+<!DOCTYPE html>
+
+
+<html lang="en" data-content_root="" >
+
+  <head>
+    <meta charset="utf-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.17.1: http://docutils.sourceforge.net/" />
+
+    <title>Textual Inversion &#8212; Text-to-Image Generation-feat-Diffusion</title>
+  
+  
+  
+  <script data-cfasync="false">
+    document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
+    document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
+  </script>
+  
+  <!-- Loaded before other Sphinx assets -->
+  <link href="../../_static/styles/theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+<link href="../../_static/styles/bootstrap.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+<link href="../../_static/styles/pydata-sphinx-theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+
+  
+  <link href="../../_static/vendor/fontawesome/6.5.2/css/all.min.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+  <link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.woff2" />
+<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.woff2" />
+<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.woff2" />
+
+    <link rel="stylesheet" type="text/css" href="../../_static/pygments.css" />
+    <link rel="stylesheet" href="../../_static/styles/sphinx-book-theme.css?digest=14f4ca6b54d191a8c7657f6c759bf11a5fb86285" type="text/css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/togglebutton.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/mystnb.4510f1fc1dee50b3e5859aac5469c37c29e427902b24a333a5f9fcb2f0b3ac41.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/sphinx-thebe.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/design-style.4045f2051d55cab465a707391d5b2007.min.css" />
+  
+  <!-- Pre-loaded scripts that we'll load fully later -->
+  <link rel="preload" as="script" href="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b" />
+<link rel="preload" as="script" href="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b" />
+  <script src="../../_static/vendor/fontawesome/6.5.2/js/all.min.js?digest=dfe6caa3a7d634c4db9b"></script>
+
+    <script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
+    <script src="../../_static/jquery.js"></script>
+    <script src="../../_static/underscore.js"></script>
+    <script src="../../_static/_sphinx_javascript_frameworks_compat.js"></script>
+    <script src="../../_static/doctools.js"></script>
+    <script src="../../_static/clipboard.min.js"></script>
+    <script src="../../_static/copybutton.js"></script>
+    <script src="../../_static/scripts/sphinx-book-theme.js?digest=5a5c038af52cf7bc1a1ec88eea08e6366ee68824"></script>
+    <script>let toggleHintShow = 'Click to show';</script>
+    <script>let toggleHintHide = 'Click to hide';</script>
+    <script>let toggleOpenOnPrint = 'true';</script>
+    <script src="../../_static/togglebutton.js"></script>
+    <script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
+    <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
+    <script src="../../_static/design-tabs.js"></script>
+    <script>const THEBE_JS_URL = "https://unpkg.com/thebe@0.8.2/lib/index.js"
+const thebe_selector = ".thebe,.cell"
+const thebe_selector_input = "pre"
+const thebe_selector_output = ".output, .cell_output"
+</script>
+    <script async="async" src="../../_static/sphinx-thebe.js"></script>
+    <script>DOCUMENTATION_OPTIONS.pagename = 'docs/review/Textual_Inversion';</script>
+    <link rel="shortcut icon" href="../../_static/PseudoLab_logo.png"/>
+    <link rel="index" title="Index" href="../../genindex.html" />
+    <link rel="search" title="Search" href="../../search.html" />
+    <link rel="next" title="Custom Diffusion" href="CustomDiffusion.html" />
+    <link rel="prev" title="Introduction" href="Latent_Diffusion_Model.html" />
+  <meta name="viewport" content="width=device-width, initial-scale=1"/>
+  <meta name="docsearch:language" content="en"/>
+  </head>
+  
+  
+  <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
+
+  
+  
+  <div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
+  
+  <div id="pst-scroll-pixel-helper"></div>
+  
+  <button type="button" class="btn rounded-pill" id="pst-back-to-top">
+    <i class="fa-solid fa-arrow-up"></i>Back to top</button>
+
+  
+  <input type="checkbox"
+          class="sidebar-toggle"
+          id="pst-primary-sidebar-checkbox"/>
+  <label class="overlay overlay-primary" for="pst-primary-sidebar-checkbox"></label>
+  
+  <input type="checkbox"
+          class="sidebar-toggle"
+          id="pst-secondary-sidebar-checkbox"/>
+  <label class="overlay overlay-secondary" for="pst-secondary-sidebar-checkbox"></label>
+  
+  <div class="search-button__wrapper">
+    <div class="search-button__overlay"></div>
+    <div class="search-button__search-container">
+<form class="bd-search d-flex align-items-center"
+      action="../../search.html"
+      method="get">
+  <i class="fa-solid fa-magnifying-glass"></i>
+  <input type="search"
+         class="form-control"
+         name="q"
+         id="search-input"
+         placeholder="Search this book..."
+         aria-label="Search this book..."
+         autocomplete="off"
+         autocorrect="off"
+         autocapitalize="off"
+         spellcheck="false"/>
+  <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
+</form></div>
+  </div>
+
+  <div class="pst-async-banner-revealer d-none">
+  <aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
+</div>
+
+  
+    <header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
+    </header>
+  
+
+  <div class="bd-container">
+    <div class="bd-container__inner bd-page-width">
+      
+      
+      
+      <div class="bd-sidebar-primary bd-sidebar">
+        
+
+  
+  <div class="sidebar-header-items sidebar-primary__section">
+    
+    
+    
+    
+  </div>
+  
+    <div class="sidebar-primary-items__start sidebar-primary__section">
+        <div class="sidebar-primary-item">
+
+  
+    
+  
+
+<a class="navbar-brand logo" href="../../intro.html">
+  
+  
+  
+  
+  
+    
+    
+      
+    
+    
+    <img src="../../_static/PseudoLab_logo.png" class="logo__image only-light" alt="Text-to-Image Generation-feat-Diffusion - Home"/>
+    <script>document.write(`<img src="../../_static/PseudoLab_logo.png" class="logo__image only-dark" alt="Text-to-Image Generation-feat-Diffusion - Home"/>`);</script>
+  
+  
+</a></div>
+        <div class="sidebar-primary-item"><nav class="bd-links" id="bd-docs-nav" aria-label="Main">
+    <div class="bd-toc-item navbar-nav active">
+        
+        <ul class="nav bd-sidenav bd-sidenav__home-link">
+            <li class="toctree-l1">
+                <a class="reference internal" href="../../intro.html">
+                    Welcome to PseudoDiffusers!!
+                </a>
+            </li>
+        </ul>
+        <p aria-level="2" class="caption" role="heading"><span class="caption-text">Preliminary Works</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="vae.html">VAE</a></li>
+<li class="toctree-l1"><a class="reference internal" href="gan.html">GAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DDPM.html">DDPM</a></li>
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DDIM.html">DDIM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="A_Study_on_the_Evaluation_of_Generative_Models.html">A Study on the Evaluation of Generative Models</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Image Generation</span></p>
+<ul class="current nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="cycleGAN.html">CycleGAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="StyleGAN.html">StyleGAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="diffusion_beats_GANs.html">Diffusion Models Beat GANs on Image Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="dalle.html">DALL-E</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DALLE2.html">DALL-E 2</a></li>
+<li class="toctree-l1"><a class="reference internal" href="dreambooth.html">DreamBooth</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ControlNet.html">ControlNet</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Latent_Diffusion_Model.html">Introduction</a></li>
+
+
+
+<li class="toctree-l1 current active"><a class="current reference internal" href="#">Textual Inversion</a></li>
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="CustomDiffusion.html">Custom Diffusion</a></li>
+<li class="toctree-l1"><a class="reference internal" href="LoRA.html">LoRA</a></li>
+
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="I-DDPM.html">I-DDPM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="StyO.html">StyO</a></li>
+<li class="toctree-l1"><a class="reference internal" href="imagen.html">Imagen</a></li>
+<li class="toctree-l1"><a class="reference internal" href="imagen_editor.html">Imagen Editor</a></li>
+<li class="toctree-l1"><a class="reference internal" href="SDEdit.html">SDEdit</a></li>
+<li class="toctree-l1"><a class="reference internal" href="SDXL.html">SDXL</a></li>
+<li class="toctree-l1"><a class="reference internal" href="t2i_adapter.html">T2I-Adapter</a></li>
+<li class="toctree-l1"><a class="reference internal" href="IP_Adapter.html">IP-Adapter</a></li>
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="HyperDreamBooth.html">HyperDreamBooth</a></li>
+<li class="toctree-l1"><a class="reference internal" href="CM3leon.html">CM3leon</a></li>
+
+<li class="toctree-l1"><a class="reference internal" href="Synthetic_Data_from_Diffusion_Models_Improves_ImageNet_Classification.html">Synthetic Data from Diffusion Models Improves ImageNet Classification</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="GLIDE.html">GLIDE</a></li>
+<li class="toctree-l1"><a class="reference internal" href="BBDM.html">BBDM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Your_Diffusion_Model_is_Secretly_a_Zero_Shot_Classifier.html">Your Diffusion Model is Secretly a Zero-Shot Classifier</a></li>
+<li class="toctree-l1"><a class="reference internal" href="progressive_distillation.html">Progressive Distillation for Fast Sampling of Diffusion Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ConceptLab.html">ConceptLab</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Diffusion_models_already_have_a_Semantic_Latent_Space.html">Diffusion Models already have a Semantic Latent Space</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Muse.html">Muse</a></li>
+
+
+<li class="toctree-l1"><a class="reference internal" href="GIGAGAN.html">Scaling up GANs for Text-to-Image Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="consistency_models.html">Consistency Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="latent_consistency_models.html">Latent Consistency Models</a></li>
+
+<li class="toctree-l1"><a class="reference internal" href="LLM_grounded_Diffusion.html">LLM Grounded Diffusion</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DiT.html">DiT</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="one-step-image-translation.html">One-Step Image Translation with Text-to-Image Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="LCM-LoRA.html">LCM-LoRA: A Universal Stable-Diffusion Acceleration Module</a></li>
+
+
+<li class="toctree-l1"><a class="reference internal" href="MimicBrush.html">MimicBrush: Zero-shot Image Editing with Reference Imitation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="one_step_diffusion_with_distribution_matching_distillation.html">One-step Diffusion with Distribution Matching Distillation</a></li>
+
+
+
+
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Video Generation</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="Make_A_Video.html">Make A Video</a></li>
+<li class="toctree-l1"><a class="reference internal" href="VideoLDM.html">VideoLDM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="AnimateDiff.html">AnimateDiff</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Animate_Anyone.html">Animate Anyone</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreaMoving.html">DreaMoving</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreamPose.html">DreamPose: Fashion Image-to-Video Synthesis via Stable Diffusion</a></li>
+
+
+
+
+
+
+
+
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">3D Generation</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="NeRF.html">NeRF : Representing Scenes as Neural Radiance Fields for View Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="3DGS.html">3D Gaussian Splatting for Real-Time Radiance Field Rendering</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Point_E.html">Point-E: A System for Generating 3D Point Clouds from Complex Prompts (Arxiv 2022)</a></li>
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Shap-E.html">Shap-E</a></li>
+
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DreamFusion.html"><strong>DreamFusion</strong></a></li>
+<li class="toctree-l1"><a class="reference internal" href="magic-3d.html">Magic3D</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreamBooth3D.html">Dream Booth 3D</a></li>
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="zero123plus.html">Zero123++</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ProlificDreamer.html">ProlificDreamer: High-Fidelity and Diverse Text-to-3D Generation with Variational Score Distillation</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DreamGaussian.html">DreamGaussian</a></li>
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Coin3D.html">Coin3D</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Experiments</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="../experiments/js_exp.html">Synthetic Data with Stable Diffusion for Foliar Disease Classification</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../experiments/swjo_exp.html">Training DreamBooth on Naver Webtoon Face Dataset</a></li>
+</ul>
+
+    </div>
+</nav></div>
+    </div>
+  
+  
+  <div class="sidebar-primary-items__end sidebar-primary__section">
+  </div>
+  
+  <div id="rtd-footer-container"></div>
+
+
+      </div>
+      
+      <main id="main-content" class="bd-main" role="main">
+        
+        
+
+<div class="sbt-scroll-pixel-helper"></div>
+
+          <div class="bd-content">
+            <div class="bd-article-container">
+              
+              <div class="bd-header-article d-print-none">
+<div class="header-article-items header-article__inner">
+  
+    <div class="header-article-items__start">
+      
+        <div class="header-article-item"><label class="sidebar-toggle primary-toggle btn btn-sm" for="__primary" title="Toggle primary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
+  <span class="fa-solid fa-bars"></span>
+</label></div>
+      
+    </div>
+  
+  
+    <div class="header-article-items__end">
+      
+        <div class="header-article-item">
+
+<div class="article-header-buttons">
+
+
+
+
+
+<div class="dropdown dropdown-source-buttons">
+  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Source repositories">
+    <i class="fab fa-github"></i>
+  </button>
+  <ul class="dropdown-menu">
+      
+      
+      
+      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion" target="_blank"
+   class="btn btn-sm btn-source-repository-button dropdown-item"
+   title="Source repository"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fab fa-github"></i>
+  </span>
+<span class="btn__text-container">Repository</span>
+</a>
+</li>
+      
+      
+      
+      
+      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion/issues/new?title=Issue%20on%20page%20%2Fdocs/review/Textual_Inversion.html&body=Your%20issue%20content%20here." target="_blank"
+   class="btn btn-sm btn-source-issues-button dropdown-item"
+   title="Open an issue"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-lightbulb"></i>
+  </span>
+<span class="btn__text-container">Open issue</span>
+</a>
+</li>
+      
+  </ul>
+</div>
+
+
+
+
+
+
+<div class="dropdown dropdown-download-buttons">
+  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Download this page">
+    <i class="fas fa-download"></i>
+  </button>
+  <ul class="dropdown-menu">
+      
+      
+      
+      <li><a href="../../_sources/docs/review/Textual_Inversion.md" target="_blank"
+   class="btn btn-sm btn-download-source-button dropdown-item"
+   title="Download source file"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-file"></i>
+  </span>
+<span class="btn__text-container">.md</span>
+</a>
+</li>
+      
+      
+      
+      
+      <li>
+<button onclick="window.print()"
+  class="btn btn-sm btn-download-pdf-button dropdown-item"
+  title="Print to PDF"
+  data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-file-pdf"></i>
+  </span>
+<span class="btn__text-container">.pdf</span>
+</button>
+</li>
+      
+  </ul>
+</div>
+
+
+
+
+<button onclick="toggleFullScreen()"
+  class="btn btn-sm btn-fullscreen-button"
+  title="Fullscreen mode"
+  data-bs-placement="bottom" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-expand"></i>
+  </span>
+
+</button>
+
+
+
+<script>
+document.write(`
+  <button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light"></i>
+    <i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark"></i>
+    <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"></i>
+  </button>
+`);
+</script>
+
+
+<script>
+document.write(`
+  <button class="btn btn-sm pst-navbar-icon search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <i class="fa-solid fa-magnifying-glass fa-lg"></i>
+  </button>
+`);
+</script>
+<label class="sidebar-toggle secondary-toggle btn btn-sm" for="__secondary"title="Toggle secondary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <span class="fa-solid fa-list"></span>
+</label>
+</div></div>
+      
+    </div>
+  
+</div>
+</div>
+              
+              
+
+<div id="jb-print-docs-body" class="onlyprint">
+    <h1>Textual Inversion</h1>
+    <!-- Table of contents -->
+    <div id="print-main-content">
+        <div id="jb-print-toc">
+            
+            <div>
+                <h2> Contents </h2>
+            </div>
+            <nav aria-label="Page">
+                <ul class="visible nav section-nav flex-column">
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#">Textual Inversion</a></li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#abstract">Abstract</a></li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">Introduction</a></li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#related-work">Related work</a></li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#method">Method</a><ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#cf-gan-inversion">cf) GAN Inversion(이해 못함)</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#ldm-latent-diffusion-model">LDM(Latent Diffusion Model)</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#text-embeddings">Text Embeddings</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#id1">Textual Inversion</a></li>
+</ul>
+</li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#id2">성능평가</a><ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#dall-e-2">DALL:E-2와 비교</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#text-guided-synthesis">Text guided synthesis</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#pseudo-word">pseudo word 두 개 사용</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#bias-reduction">Bias Reduction</a></li>
+</ul>
+</li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#id5">정량평가</a><ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#setups">평가 setups</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#id6">결과</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#id7">주목할 점</a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#id8">사용자평가</a></li>
+</ul>
+</li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#limitation">Limitation</a></li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#id9">마무리</a></li>
+</ul>
+
+            </nav>
+        </div>
+    </div>
+</div>
+
+              
+                
+<div id="searchbox"></div>
+                <article class="bd-article">
+                  
+  <div class="admonition-information admonition">
+<p class="admonition-title">Information</p>
+<ul class="simple">
+<li><p><strong>Title:</strong> An Image is Worth One Word: Personalizing Text-to-Image Generation using Textual Inversion</p></li>
+<li><p><strong>Reference</strong></p>
+<ul>
+<li><p>Paper:  <a class="reference external" href="https://arxiv.org/pdf/2208.01618.pdf">https://arxiv.org/pdf/2208.01618.pdf</a></p></li>
+<li><p>Code: <a class="reference external" href="https://textual-inversion.github.io/">https://textual-inversion.github.io/</a></p></li>
+<li><p>Review: <a class="reference external" href="https://devocean.sk.com/blog/techBoardDetail.do?page=&amp;query=&amp;ID=164320&amp;boardType=writer&amp;searchData=sam56903&amp;subIndex=&amp;idList=&amp;pnwriterID=sam56903">https://devocean.sk.com/blog/techBoardDetail.do?page=&amp;query=&amp;ID=164320&amp;boardType=writer&amp;searchData=sam56903&amp;subIndex=&amp;idList=&amp;pnwriterID=sam56903</a></p></li>
+</ul>
+</li>
+<li><p><strong>Author:</strong> Kwang-Su Mun</p></li>
+<li><p><strong>Last updated on May. 31. 2023</strong></p></li>
+</ul>
+</div>
+<section class="tex2jax_ignore mathjax_ignore" id="textual-inversion">
+<h1>Textual Inversion<a class="headerlink" href="#textual-inversion" title="Permalink to this heading">#</a></h1>
+</section>
+<section class="tex2jax_ignore mathjax_ignore" id="abstract">
+<h1>Abstract<a class="headerlink" href="#abstract" title="Permalink to this heading">#</a></h1>
+<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">이미지</span> <span class="mi">3</span><span class="o">-</span><span class="mi">5</span><span class="n">장으로</span> <span class="n">새로운</span> <span class="n">개념</span><span class="p">(</span><span class="n">또는</span> <span class="n">콘셉트</span><span class="p">,</span> <span class="n">concept</span><span class="p">)</span><span class="n">을</span> <span class="n">학습해</span> <span class="n">관련된</span> <span class="n">이미지를</span> <span class="n">뽑아내는</span> <span class="n">모델</span>
+</pre></div>
+</div>
+<p>text-to-image model은 자연어를 통한 creation에 전례없는 자유도를 주었다. 하지만, 특정한 contept를 생성하고, 그것의 생김새를 바꾸거나, 새로운 역할이 주어지거나 참신한 장면이 그려지는건 아직 불분명하다. 즉, ‘이것을 그려줘’라고 말할 때, ‘이것’에 대한 설명을 prompt로 어떻게 할 것이냐는 물음에는 아직 한계가 있는 것 같다. 이를 해결하기 위해, 저자는 image를 3-5개만으로 사물이나 스타일과 같은 concept, 즉 새로운 ‘단어’를 고정된 text-to-image model의 embedding space에서 표현하는 방법을 제안한다. 이러한 ‘단어’는 자연어 문장에 녹아들어가, 직관적인 방법으로 ‘개인화된’ 이미지 생성을 이끌어 낸다. 특히, 독자적이면서 다양한 콘셉트를 capture하기 위해서는 single word embedding이 충분하다는 것을 알게 되었다.</p>
+<figure class="align-default" id="textual-inverison-example">
+<a class="bg-primary mb-1 reference internal image-reference" href="https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FbIVL03%2Fbtsg8b6ssL1%2FsZQKABrsLJG58fJuvqd5MK%2Fimg.png"><img alt="textual inverison example" class="bg-primary mb-1" src="https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FbIVL03%2Fbtsg8b6ssL1%2FsZQKABrsLJG58fJuvqd5MK%2Fimg.png" style="width: 800px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 152 </span><span class="caption-text">textual inversion example \  (source: <a class="reference external" href="https://arxiv.org/abs/2208.01618">https://arxiv.org/abs/2208.01618</a>)</span><a class="headerlink" href="#textual-inverison-example" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</section>
+<section class="tex2jax_ignore mathjax_ignore" id="introduction">
+<h1>Introduction<a class="headerlink" href="#introduction" title="Permalink to this heading">#</a></h1>
+<p>대규모 학습된 모델에 새로운 개념을 도입하는 일은 어려운 일이다. 각 새로운 개념에 대해 확장된 데이터 셋을 사용해 모델을 retraining하는 것은 엄청나게 비용이 많이 들고, 몇 가지 예제에 해서 fine-tuning은 보통 치명적인 망각을 초래한다. 따라서 저자들은 사전 훈련된 텍스트-이미지 모델의 텍스트 임베딩 공간에서 새로운 단어를 찾아 이러한 문제를 극복할 것을 제안.</p>
+<figure class="align-default" id="architecture">
+<a class="bg-primary mb-1 reference internal image-reference" href="https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2Fd0jLjp%2Fbtsg9DuSNQj%2FkjfhEfeTTA212mS5htrb71%2Fimg.png"><img alt="architecture" class="bg-primary mb-1" src="https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2Fd0jLjp%2Fbtsg9DuSNQj%2FkjfhEfeTTA212mS5htrb71%2Fimg.png" style="width: 800px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 153 </span><span class="caption-text">architecture \  (source: <a class="reference external" href="https://arxiv.org/abs/2208.01618">https://arxiv.org/abs/2208.01618</a>)</span><a class="headerlink" href="#architecture" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>위 figure에서, “A photo of S*”은 tokenizer를 지나면서 각각 ‘508’, ‘701’, ‘73’, ‘*’과 같은 형태의 token set으로 변환되고, 이후 각 토큰은 자체 임베딩 벡터로 변환되고 이러한 벡터는 다운스트림 모델을 통해 제공됨.</p>
+<p>input image의 concept를 나타내는, 새로운 pseudo-word인 S<em>를 이용해 새로운 embedding vector(v</em>)를 나타낸다. 이후 이 vector는 다른 단어와 같이 처리되며 생성 모델에 대한 새로운 text query를 구성하는데 사용될 수 있음. 따라서 이 query는 generator에 들어가서 사용자가 의도한바와 일치하도록 새로운 image를 생성하도록 하는 것이 전반적인 그림이라고 볼 수 있음.</p>
+<p>여기서 중요한 것은, 이 과정에서 생성모델(여기서는 LDM이 쓰임)은 untouched되어 있다는 것(즉, 따로 수정이 들어가지 않는듯함). 그렇게 함으로써 새로운 task에 대한 fine-tuning을 할 때 일반적으로 손실되는 text에 대한 이해도나 generalization을 유지할 수 있음.</p>
+<p>이러한 ‘유사단어’를 찾기 위해, 이 작업을 하나로 inversion시켜 프레임화 한다. 그리고 고정된, pre-trained text-to-image model을 사용하고, 3-5개의 concept를 나타내는 small image set이 주어진다. 저자들은 ‘a photo of S*’와 같은 형태의 문장을 설정해 주어진 작은 dataset에서 이미지를 재구성 하는 것으로 이어지는 single-word embedding을 찾는 것을 목표로 함.</p>
+<p>이 모델의 목표는 <strong>새로운 concept인 입력 이미지를 나타내는 S*를 표현하는 방법을 찾는 것</strong>이며, 이러한 task를 **’textual inversion’**이라고 한다고 함.</p>
+<div class="highlight-default notranslate"><div class="highlight"><pre><span></span>This embedding is found through an optimization process, which we refer to as “Textual Inversion”.
+</pre></div>
+</div>
+</section>
+<section class="tex2jax_ignore mathjax_ignore" id="related-work">
+<h1>Related work<a class="headerlink" href="#related-work" title="Permalink to this heading">#</a></h1>
+<ul class="simple">
+<li><p>text-guided synthesis</p></li>
+<li><p>GAN inversion</p></li>
+<li><p>Diffusion-based inversion</p></li>
+<li><p>personalization</p>
+<ul>
+<li><p>PALAVRA: image를 S*으로 바꾸는데 사용되는 기술로 추정.</p></li>
+<li><p>pre-trained CLIP model을 이용해서 personalized object의 복구 및 segmentation을 수행. PALAVRA는 특정 개체를 참조하는 CLIP의 textual embedding space에서 pseudo-word를 식별함. 그 다음 검색을 위해 이미지를 설명하거나 어떤 장면에서 특정 개체를 분할하기 위해 사용됨. figure 5에서 보듯이, 그들의 접근 방식은 새로운 장면에서 그럴듯한 재구성 또는 합성에 필요한 세부 정보를 캡처하지 못함.</p></li>
+</ul>
+</li>
+</ul>
+</section>
+<section class="tex2jax_ignore mathjax_ignore" id="method">
+<h1>Method<a class="headerlink" href="#method" title="Permalink to this heading">#</a></h1>
+<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">Our</span> <span class="n">goal</span> <span class="ow">is</span> <span class="n">to</span> <span class="n">enable</span> <span class="n">language</span><span class="o">-</span><span class="n">guided</span> <span class="n">generation</span> <span class="n">of</span> <span class="n">new</span><span class="p">,</span> <span class="n">user</span><span class="o">-</span><span class="n">specified</span> <span class="n">concepts</span><span class="o">.</span>
+</pre></div>
+</div>
+<ul class="simple">
+<li><p>의역) 목표: 유저가 의도한 것에 초첨을 맞춘, 새로운 concept를 embedding으로 잘 가이드해서 괜찮은 성과물을 내는 것.</p></li>
+</ul>
+<p>따라서 pre-trained text-to-image model의 중간 단계의 representation으로 이러한 새로운 ‘concepts’을 인코딩하는데 초점을 맞춤. 일반적인 text-to-image model에서는 image의 representation에 대한 후보군을 text encoder의 word-embedding 단계에서 찾는다. 그러나 이러한 접근 방식은 이미지에 대한 in-depth visual understanding을 필요로 하지 않는다(생성자가 이미지에 대해서 시각적인 이해? 없이 그린다.) 따라서 여기서는 GAN inversion에서 영감을 받은 visual reconstruction objective를 제시.</p>
+<section id="cf-gan-inversion">
+<h2>cf) GAN Inversion(이해 못함)<a class="headerlink" href="#cf-gan-inversion" title="Permalink to this heading">#</a></h2>
+<p>출처) - <a class="reference external" href="https://hyoseok-personality.tistory.com/entry/GAN-Inversion">https://hyoseok-personality.tistory.com/entry/GAN-Inversion</a></p>
+<figure class="align-default" id="gan-inversion">
+<a class="bg-primary mb-1 reference internal image-reference" href="https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FpomZT%2Fbtsg9EHfVqc%2F4a4K6BmSPZV5ncVQXtfCHk%2Fimg.png"><img alt="GAN inversion" class="bg-primary mb-1" src="https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FpomZT%2Fbtsg9EHfVqc%2F4a4K6BmSPZV5ncVQXtfCHk%2Fimg.png" style="width: 800px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 154 </span><span class="caption-text">GAN inversion \  (source: <a class="reference external" href="https://hyoseok-personality.tistory.com/entry/GAN-Inversion">https://hyoseok-personality.tistory.com/entry/GAN-Inversion</a>)</span><a class="headerlink" href="#gan-inversion" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p>입력 이미지와 유사한 결과 이미지를 얻을 수 있도록 하는 latent vector를 찾는 과정. GAN이 학습되면 random latent vector로부터 이미지를 생성해낸다. GAN inversion은 이의 역과정으로써 GAN의 latent space로 input image를 inverting시켜 latent vector를 알아가는 과정.</p></li>
+</ul>
+</section>
+<section id="ldm-latent-diffusion-model">
+<h2>LDM(Latent Diffusion Model)<a class="headerlink" href="#ldm-latent-diffusion-model" title="Permalink to this heading">#</a></h2>
+<p>논문에서는 생성모델로서 LDM(Latent Diffusion Model)을 사용함. 이전에 말했듯이, LDM은 하나도 건들지 않음.</p>
+<figure class="align-default" id="ldm-objective-function">
+<a class="bg-primary mb-1 reference internal image-reference" href="https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2Fdw5kRl%2FbtshgoiBpt4%2Fz72rzU3tvL8kLFbtBXwWVk%2Fimg.png"><img alt="LDM objective function" class="bg-primary mb-1" src="https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2Fdw5kRl%2FbtshgoiBpt4%2Fz72rzU3tvL8kLFbtBXwWVk%2Fimg.png" style="width: 800px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 155 </span><span class="caption-text">LDM objective function \  (source: <a class="reference external" href="https://arxiv.org/pdf/2208.01618.pdf">https://arxiv.org/pdf/2208.01618.pdf</a>)</span><a class="headerlink" href="#ldm-objective-function" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</section>
+<section id="text-embeddings">
+<h2>Text Embeddings<a class="headerlink" href="#text-embeddings" title="Permalink to this heading">#</a></h2>
+<figure class="align-default" id="text-embedding">
+<a class="bg-primary mb-1 reference internal image-reference" href="https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2Fv0EWv%2Fbtsg9e9ZI4u%2FzfXraAXg1vpKpxemZLtVPk%2Fimg.png"><img alt="Text-Embedding" class="bg-primary mb-1" src="https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2Fv0EWv%2Fbtsg9e9ZI4u%2FzfXraAXg1vpKpxemZLtVPk%2Fimg.png" style="width: 800px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 156 </span><span class="caption-text">Text-Embedding \  (source: <a class="reference external" href="https://arxiv.org/pdf/2208.01618.pdf">https://arxiv.org/pdf/2208.01618.pdf</a>)</span><a class="headerlink" href="#text-embedding" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p>입력된 문자열의 각 단어, 하위 단어는 tokenizer를 통과하며, 미리 정의된 dictionary에서 index token으로 변환함. 각 토큰을 통해 찾을 수 있는 고유한 임베딩 벡터에 연결됨.</p></li>
+<li><p>index에 의한 embedding vector는 일반적으로 text encoder인 C_Θ의 일부로 학습된다. 이러한 space를 inversion target으로 삼았음. 새로운 개념을 나타내기 위해 자리표시자 문자열인 S<em>를 새롭게 지정함. 이 과정에서 PALAVRA를 사용했을 것으로 추정함. 임베딩 process에 개입해서 tokenize된 문자열과 관련된 vector를 새로운 학습된 embedding V</em>로 대체하여 본질적으로 어휘(pseudo-word)에 개념을 주입함. 이렇게 함으로써 다른 단어와 마찬가지로 concept를 포함하는 새로운 문장을 만들 수 있었음.</p></li>
+</ul>
+</section>
+<section id="id1">
+<h2>Textual Inversion<a class="headerlink" href="#id1" title="Permalink to this heading">#</a></h2>
+<p>새로운 embedding을 찾기 위해 작은 규모의 dataset(3-5장)을 사용해 다양한 배경 또는 포즈와 같은 여러 설정에 걸쳐 목표 concept을 묘사함. 이러한 작은 dataset에서 LDM loss를 최소화하는 과정을 통해 V를 최적화함. 생성 조건을 고정하기 위해 CLIP ImageNet 템플릿에서 파생된 중립 컨텍스트 텍스트를 무작위로 샘플링한다. 여기에는 “A photo of S*”, “A rendition of S*” 등의 형식 프롬프트가 포함된다.(아마 원본 이미지와 최대한 비슷하게 만들어서 원본과 비교하기 위한 목적이 아닐까 싶음) 최적화 목표식은 다음과 같음.</p>
+<figure class="align-default" id="textual-inversion-objective-function">
+<a class="bg-primary mb-1 reference internal image-reference" href="https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FqY4nD%2FbtshiHP4k6T%2FvZrYjfSUAE2XePwon4rTIk%2Fimg.png"><img alt="textual inversion objective function" class="bg-primary mb-1" src="https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FqY4nD%2FbtshiHP4k6T%2FvZrYjfSUAE2XePwon4rTIk%2Fimg.png" style="width: 800px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 157 </span><span class="caption-text">textual inversion objective function \  (source: <a class="reference external" href="https://arxiv.org/pdf/2208.01618.pdf">https://arxiv.org/pdf/2208.01618.pdf</a>)</span><a class="headerlink" href="#textual-inversion-objective-function" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>LDM loss함수와 매우 유사함. 여기서 CΘ와 eΘ는 고정. 해당 따라서 학습된 embedding이 개념에 미세한 시각적 detail을 포착할 수 있을것으로 기대함.</p>
+</section>
+</section>
+<section class="tex2jax_ignore mathjax_ignore" id="id2">
+<h1>성능평가<a class="headerlink" href="#id2" title="Permalink to this heading">#</a></h1>
+<section id="dall-e-2">
+<h2>DALL:E-2와 비교<a class="headerlink" href="#dall-e-2" title="Permalink to this heading">#</a></h2>
+<figure class="align-default" id="compare-with-dalle-2">
+<a class="bg-primary mb-1 reference internal image-reference" href="https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FbkJvkY%2Fbtsg95YTKmc%2FX6lxVI5tL30ZP5gKEmoAv1%2Fimg.png"><img alt="compare with DALLE-2" class="bg-primary mb-1" src="https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FbkJvkY%2Fbtsg95YTKmc%2FX6lxVI5tL30ZP5gKEmoAv1%2Fimg.png" style="width: 800px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 158 </span><span class="caption-text">compare with DALLE-2 \  (source: <a class="reference external" href="https://arxiv.org/pdf/2208.01618.pdf">https://arxiv.org/pdf/2208.01618.pdf</a>)</span><a class="headerlink" href="#compare-with-dalle-2" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p>input image에 대한 디테일을 더 잘 포착하는 모습을 볼 수 있다.</p></li>
+</ul>
+</section>
+<section id="text-guided-synthesis">
+<h2>Text guided synthesis<a class="headerlink" href="#text-guided-synthesis" title="Permalink to this heading">#</a></h2>
+<figure class="align-default" id="id3">
+<a class="bg-primary mb-1 reference internal image-reference" href="https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FbRLYR1%2Fbtsg95SasXe%2FaUe9K6FVb2yC9sZqoK5eSk%2Fimg.png"><img alt="text guided synthesis" class="bg-primary mb-1" src="https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FbRLYR1%2Fbtsg95SasXe%2FaUe9K6FVb2yC9sZqoK5eSk%2Fimg.png" style="width: 800px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 159 </span><span class="caption-text">text guided synthesis - 입력 이미지의 스타일과 유사하면서도 text guide에 맞춰서 잘 진행함.
+\  (source: <a class="reference external" href="https://arxiv.org/pdf/2208.01618.pdf">https://arxiv.org/pdf/2208.01618.pdf</a>)</span><a class="headerlink" href="#id3" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p>Textual Inversion 모델은 새로운 주제에 대해 더 정확하게 개념을 보존하고, 새로운 임베딩과 나머지 캡션들에 대해서도 모두 추론이 가능했음.</p></li>
+</ul>
+<figure class="align-default" id="style-transfer">
+<a class="bg-primary mb-1 reference internal image-reference" href="https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FbLlXhf%2Fbtsg8cEna6l%2FgiZvyYgqCaPj6X5wKTIzZk%2Fimg.png"><img alt="style transfer" class="bg-primary mb-1" src="https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FbLlXhf%2Fbtsg8cEna6l%2FgiZvyYgqCaPj6X5wKTIzZk%2Fimg.png" style="width: 800px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 160 </span><span class="caption-text">style transfer \  (source: <a class="reference external" href="https://arxiv.org/pdf/2208.01618.pdf">https://arxiv.org/pdf/2208.01618.pdf</a>)</span><a class="headerlink" href="#style-transfer" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p>적은 데이터셋으로도 style을 보존하면서 표현한 그림</p></li>
+</ul>
+</section>
+<section id="pseudo-word">
+<h2>pseudo word 두 개 사용<a class="headerlink" href="#pseudo-word" title="Permalink to this heading">#</a></h2>
+<figure class="align-default" id="two-pseudo-word">
+<a class="bg-primary mb-1 reference internal image-reference" href="https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FJtPJY%2Fbtsg9OinOOb%2FMLn4k48Hk7CP7vGv1yAaYk%2Fimg.png"><img alt="two pseudo word" class="bg-primary mb-1" src="https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FJtPJY%2Fbtsg9OinOOb%2FMLn4k48Hk7CP7vGv1yAaYk%2Fimg.png" style="width: 800px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 161 </span><span class="caption-text">two pseudo word \  (source: <a class="reference external" href="https://arxiv.org/pdf/2208.01618.pdf">https://arxiv.org/pdf/2208.01618.pdf</a>)</span><a class="headerlink" href="#two-pseudo-word" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</section>
+<section id="bias-reduction">
+<h2>Bias Reduction<a class="headerlink" href="#bias-reduction" title="Permalink to this heading">#</a></h2>
+<figure class="align-default" id="id4">
+<a class="bg-primary mb-1 reference internal image-reference" href="https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FPakAR%2Fbtsg9OvWWW9%2FJZkKl1AFTKJgEKJsA2rb2K%2Fimg.png"><img alt="Bias reduction" class="bg-primary mb-1" src="https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FPakAR%2Fbtsg9OvWWW9%2FJZkKl1AFTKJgEKJsA2rb2K%2Fimg.png" style="width: 800px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 162 </span><span class="caption-text">Bias reduction \  (source: <a class="reference external" href="https://arxiv.org/pdf/2208.01618.pdf">https://arxiv.org/pdf/2208.01618.pdf</a>)</span><a class="headerlink" href="#id4" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>기존 모델의 결과를 보면, 위 사진에서와 같이 ‘의사’라는 단어를 사용하면, 보통 백인 남성 의사를 잘 그려냈음. 이는 기존 데이터셋에서 남성 의사 사진 데이터가 많았음을 보여준다. 보다 작은 imageset에서 새로운 embedding을 학습함으로써 이러한 bias를 줄일 수 있음을 보여준다(즉, 성별 및 인종적 다양성에 대한 인식을 높일 수 있음).</p>
+</section>
+</section>
+<section class="tex2jax_ignore mathjax_ignore" id="id5">
+<h1>정량평가<a class="headerlink" href="#id5" title="Permalink to this heading">#</a></h1>
+<p>latent space embedding의 품질을 분석.</p>
+<ol class="arabic simple">
+<li><p>reconstruction(y축?):  target concept를 얼마나 잘 복제하는지. 특정 이미지가 아닌 개념에 대한 변형을 생성하므로 의미적 CLIP 공간 거리를 고려하여 유사성을 측정.(이미지에 자체가 아닌, 이미지가 가진 ‘개념’에 대해 latent space를 생성하므로)  각 컨셉에 대해 “A photo of S*”라는 prompt를 사용해 64개의 이미지를 생성.</p></li>
+<li><p>editability(x축?): text prompt를 사용해 개념을 수정하는 능력을 평가. 다양한 난이도와 다양한 설정의 prompt를 사용해 일련의 이미지를 생성.</p></li>
+</ol>
+<p>각 prompt 별로, 50 DDIM step을 사용해 64개의 샘플을 만들고, CLIP-space embedding을 평가, textual prompt의 CLIP-space embedding에서 cosine similarity를 계산. 높은 스코어는 더 높은 editing capability와 prompt의 신뢰도를 보여줌.</p>
+<section id="setups">
+<h2>평가 setups<a class="headerlink" href="#setups" title="Permalink to this heading">#</a></h2>
+<p>GAN inversion에서 영감을 받은 실험 환경 설정에 따름. 생략</p>
+</section>
+<section id="id6">
+<h2>결과<a class="headerlink" href="#id6" title="Permalink to this heading">#</a></h2>
+<figure class="align-default" id="quantative-evaluation1">
+<a class="bg-primary mb-1 reference internal image-reference" href="https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FcxKm1h%2Fbtshb63SIYh%2FNflBiQZTV5V0yh0I3EYpq1%2Fimg.png"><img alt="quantative evaluation1" class="bg-primary mb-1" src="https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FcxKm1h%2Fbtshb63SIYh%2FNflBiQZTV5V0yh0I3EYpq1%2Fimg.png" style="width: 800px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 163 </span><span class="caption-text">quantative evaluation1 \  (source: <a class="reference external" href="https://arxiv.org/pdf/2208.01618.pdf">https://arxiv.org/pdf/2208.01618.pdf</a>)</span><a class="headerlink" href="#quantative-evaluation1" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<section id="id7">
+<h3>주목할 점<a class="headerlink" href="#id7" title="Permalink to this heading">#</a></h3>
+<ol class="arabic simple">
+<li><p>많은 baseline과 우리 방법의 semantic reconstruction quality는 단순히 training set에서 임의의 이미지를 샘플링하는 것과 비슷함(== 원본 이미지와 생성된 이미지가 큰 차이가 없었다?)</p></li>
+<li><p>single-word method는 비슷한 reconstruction quality를 달성하고, 모든 multi-word baseline에서 상당히 향상된 editablity을 달성. 이러한 점은 text embedding space의 인상적인 유연성을 나타내고, 단일 pseudo word만 사용하면서 높은 정확도로 새로운 개념을 캡처하는데 도움이 될 수 있음을 보여줌.</p></li>
+<li><p>baseline이 distortion-editability tradeoff 곡선의 outline을 그리며 실제 단어 분포에 더 가까운 embedding이 더 쉽게 수정될 수 있음. 그러나 target의 세부 정보를 캡처하지는 못함. 반대로, 단어 분포에서 멀리 벗어나면 editability가 크게 감소하는 대신 향상된 reconstruction이 가능해짐. 특히 single embedding model은 단순히 learning rate를 변경해 이 곡선을 따라 이동할 수 있으므로 사용자에게 이 tradeoff에 대한 어느 정도의 제어를 제공함.</p></li>
+<li><p>concept에 대한 human description을 사용하면 유사성을 포착하지 못하면서도, editability가 감소함.</p></li>
+</ol>
+</section>
+</section>
+<section id="id8">
+<h2>사용자평가<a class="headerlink" href="#id8" title="Permalink to this heading">#</a></h2>
+<figure class="align-default" id="human-test">
+<a class="bg-primary mb-1 reference internal image-reference" href="https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2Frx5Ei%2Fbtsg9MSpakC%2FFsPkgODR3zTGIBnvq6RXik%2Fimg.png"><img alt="human test" class="bg-primary mb-1" src="https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2Frx5Ei%2Fbtsg9MSpakC%2FFsPkgODR3zTGIBnvq6RXik%2Fimg.png" style="width: 800px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 164 </span><span class="caption-text">human test \  (source: <a class="reference external" href="https://arxiv.org/pdf/2208.01618.pdf">https://arxiv.org/pdf/2208.01618.pdf</a>)</span><a class="headerlink" href="#human-test" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>두 개의 설문지:</p>
+<ol class="arabic simple">
+<li><p>사용자는 concept의 training set에서 4개의 이미지를 제공받았고, 이미지와의 유사성에 따라 5개의 모델에서 생성된 결과의 순위를 매김.</p></li>
+<li><p>이미지 context를 설명하는 텍스트를 제공받았고, 텍스트와 생성된 이미지의 유사성에 따라 순위를 매김.</p></li>
+</ol>
+<p>각 질문별로 600개씩 총 1,200개의 응답을 수집.</p>
+</section>
+</section>
+<section class="tex2jax_ignore mathjax_ignore" id="limitation">
+<h1>Limitation<a class="headerlink" href="#limitation" title="Permalink to this heading">#</a></h1>
+<ol class="arabic simple">
+<li><p>이미지 생성에 더 많은 자유도를 제공하지만, concept의 의미론적인 본질을 파악하거나, 정확한 shape를 학습하는데 한계.</p></li>
+<li><p>최적화가 오래 걸린다. 하나의 concept를 학습하는데 약 2시간이 소요됨.</p></li>
+</ol>
+</section>
+<section class="tex2jax_ignore mathjax_ignore" id="id9">
+<h1>마무리<a class="headerlink" href="#id9" title="Permalink to this heading">#</a></h1>
+<p>: 새로운 설정과 장면에서 특정 concept의 이미지를 생성하기 위해 text-to-image model를 활용하는 개인화되며, language-guided generation을 소개함. 여기서 사용한 ‘text inversion’은 pretrained text-to-image 모델의 text embedding space 내에서 concept를 새로운 pseudo word로 inverse하여 작동함. 이러한 pseudo-word는 간단한 자연어 설명을 사용해 새로운 장면에 삽입할 수 있으므로 간단하고 직관적인 수정이 가능함.</p>
+<p>어떤 의미에서 이 방법은 사용자가 편집하기 쉽도록 텍스트 기반 interpace를 사용하지만 자연 언어의 한계에 접근할 때 시각적 단서를 제공하는 등 multi modal 정보를 활용할 수 있도록 함.</p>
+<p>이러한 접근 방식은 공개적으로 사용가능한 가장 큰 text-to-image model인 LDM을 통해 구현됨. 그러나 접근 방식에 아키텍처 세부 정보에 의존하지 않음. 따라서 textual inversion은 추가적인 대규모 text-to-image model에 쉽게 적용할 수 있다고 생각. 거기에서 text-to-image alignment, shape preseravation, image generation fidelity가 더 향상될 수 있음.</p>
+</section>
+
+    <script type="text/x-thebe-config">
+    {
+        requestKernel: true,
+        binderOptions: {
+            repo: "binder-examples/jupyter-stacks-datascience",
+            ref: "master",
+        },
+        codeMirrorConfig: {
+            theme: "abcdef",
+            mode: "python"
+        },
+        kernelOptions: {
+            name: "python3",
+            path: "./docs\review"
+        },
+        predefinedOutput: true
+    }
+    </script>
+    <script>kernelName = 'python3'</script>
+
+                </article>
+              
+
+              
+              
+              
+              
+                <footer class="prev-next-footer d-print-none">
+                  
+<div class="prev-next-area">
+    <a class="left-prev"
+       href="Latent_Diffusion_Model.html"
+       title="previous page">
+      <i class="fa-solid fa-angle-left"></i>
+      <div class="prev-next-info">
+        <p class="prev-next-subtitle">previous</p>
+        <p class="prev-next-title">Introduction</p>
+      </div>
+    </a>
+    <a class="right-next"
+       href="CustomDiffusion.html"
+       title="next page">
+      <div class="prev-next-info">
+        <p class="prev-next-subtitle">next</p>
+        <p class="prev-next-title">Custom Diffusion</p>
+      </div>
+      <i class="fa-solid fa-angle-right"></i>
+    </a>
+</div>
+                </footer>
+              
+            </div>
+            
+            
+              
+                <div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
+
+
+  <div class="sidebar-secondary-item">
+  <div class="page-toc tocsection onthispage">
+    <i class="fa-solid fa-list"></i> Contents
+  </div>
+  <nav class="bd-toc-nav page-toc">
+    <ul class="visible nav section-nav flex-column">
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#">Textual Inversion</a></li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#abstract">Abstract</a></li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">Introduction</a></li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#related-work">Related work</a></li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#method">Method</a><ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#cf-gan-inversion">cf) GAN Inversion(이해 못함)</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#ldm-latent-diffusion-model">LDM(Latent Diffusion Model)</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#text-embeddings">Text Embeddings</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#id1">Textual Inversion</a></li>
+</ul>
+</li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#id2">성능평가</a><ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#dall-e-2">DALL:E-2와 비교</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#text-guided-synthesis">Text guided synthesis</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#pseudo-word">pseudo word 두 개 사용</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#bias-reduction">Bias Reduction</a></li>
+</ul>
+</li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#id5">정량평가</a><ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#setups">평가 setups</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#id6">결과</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#id7">주목할 점</a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#id8">사용자평가</a></li>
+</ul>
+</li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#limitation">Limitation</a></li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#id9">마무리</a></li>
+</ul>
+
+  </nav></div>
+
+</div></div>
+              
+            
+          </div>
+          <footer class="bd-footer-content">
+            
+<div class="bd-footer-content__inner container">
+  
+  <div class="footer-item">
+    
+<p class="component-author">
+By PseudoLab
+</p>
+
+  </div>
+  
+  <div class="footer-item">
+    
+
+  <p class="copyright">
+    
+      © Copyright 2022.
+      <br/>
+    
+  </p>
+
+  </div>
+  
+  <div class="footer-item">
+    
+  </div>
+  
+  <div class="footer-item">
+    
+  </div>
+  
+</div>
+          </footer>
+        
+
+      </main>
+    </div>
+  </div>
+  
+  <!-- Scripts loaded after <body> so the DOM is not blocked -->
+  <script src="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b"></script>
+<script src="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b"></script>
+
+  <footer class="bd-footer">
+  </footer>
+  </body>
 </html>
\ No newline at end of file
diff --git a/docs/review/VideoLDM.html b/docs/review/VideoLDM.html
old mode 100644
new mode 100755
index 72353dda..b7f8728f
--- a/docs/review/VideoLDM.html
+++ b/docs/review/VideoLDM.html
@@ -1,1000 +1,1020 @@
-
-
-<!DOCTYPE html>
-
-
-<html lang="en" data-content_root="" >
-
-  <head>
-    <meta charset="utf-8" />
-    <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.18.1: http://docutils.sourceforge.net/" />
-
-    <title>VideoLDM &#8212; Text-to-Image Generation-feat-Diffusion</title>
-  
-  
-  
-  <script data-cfasync="false">
-    document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
-    document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
-  </script>
-  
-  <!-- Loaded before other Sphinx assets -->
-  <link href="../../_static/styles/theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-<link href="../../_static/styles/bootstrap.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-<link href="../../_static/styles/pydata-sphinx-theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-
-  
-  <link href="../../_static/vendor/fontawesome/6.5.2/css/all.min.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-  <link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.woff2" />
-<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.woff2" />
-<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.woff2" />
-
-    <link rel="stylesheet" type="text/css" href="../../_static/pygments.css" />
-    <link rel="stylesheet" href="../../_static/styles/sphinx-book-theme.css?digest=14f4ca6b54d191a8c7657f6c759bf11a5fb86285" type="text/css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/togglebutton.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/mystnb.4510f1fc1dee50b3e5859aac5469c37c29e427902b24a333a5f9fcb2f0b3ac41.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/sphinx-thebe.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/design-style.4045f2051d55cab465a707391d5b2007.min.css" />
-  
-  <!-- Pre-loaded scripts that we'll load fully later -->
-  <link rel="preload" as="script" href="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b" />
-<link rel="preload" as="script" href="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b" />
-  <script src="../../_static/vendor/fontawesome/6.5.2/js/all.min.js?digest=dfe6caa3a7d634c4db9b"></script>
-
-    <script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
-    <script src="../../_static/jquery.js"></script>
-    <script src="../../_static/underscore.js"></script>
-    <script src="../../_static/_sphinx_javascript_frameworks_compat.js"></script>
-    <script src="../../_static/doctools.js"></script>
-    <script src="../../_static/clipboard.min.js"></script>
-    <script src="../../_static/copybutton.js"></script>
-    <script src="../../_static/scripts/sphinx-book-theme.js?digest=5a5c038af52cf7bc1a1ec88eea08e6366ee68824"></script>
-    <script>let toggleHintShow = 'Click to show';</script>
-    <script>let toggleHintHide = 'Click to hide';</script>
-    <script>let toggleOpenOnPrint = 'true';</script>
-    <script src="../../_static/togglebutton.js"></script>
-    <script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
-    <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
-    <script src="../../_static/design-tabs.js"></script>
-    <script>const THEBE_JS_URL = "https://unpkg.com/thebe@0.8.2/lib/index.js"
-const thebe_selector = ".thebe,.cell"
-const thebe_selector_input = "pre"
-const thebe_selector_output = ".output, .cell_output"
-</script>
-    <script async="async" src="../../_static/sphinx-thebe.js"></script>
-    <script>window.MathJax = {"options": {"processHtmlClass": "tex2jax_process|mathjax_process|math|output_area"}}</script>
-    <script defer="defer" src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
-    <script>DOCUMENTATION_OPTIONS.pagename = 'docs/review/VideoLDM';</script>
-    <link rel="shortcut icon" href="../../_static/PseudoLab_logo.png"/>
-    <link rel="index" title="Index" href="../../genindex.html" />
-    <link rel="search" title="Search" href="../../search.html" />
-    <link rel="next" title="AnimateDiff" href="AnimateDiff.html" />
-    <link rel="prev" title="Make A Video" href="Make_A_Video.html" />
-  <meta name="viewport" content="width=device-width, initial-scale=1"/>
-  <meta name="docsearch:language" content="en"/>
-  </head>
-  
-  
-  <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
-
-  
-  
-  <div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
-  
-  <div id="pst-scroll-pixel-helper"></div>
-  
-  <button type="button" class="btn rounded-pill" id="pst-back-to-top">
-    <i class="fa-solid fa-arrow-up"></i>Back to top</button>
-
-  
-  <input type="checkbox"
-          class="sidebar-toggle"
-          id="pst-primary-sidebar-checkbox"/>
-  <label class="overlay overlay-primary" for="pst-primary-sidebar-checkbox"></label>
-  
-  <input type="checkbox"
-          class="sidebar-toggle"
-          id="pst-secondary-sidebar-checkbox"/>
-  <label class="overlay overlay-secondary" for="pst-secondary-sidebar-checkbox"></label>
-  
-  <div class="search-button__wrapper">
-    <div class="search-button__overlay"></div>
-    <div class="search-button__search-container">
-<form class="bd-search d-flex align-items-center"
-      action="../../search.html"
-      method="get">
-  <i class="fa-solid fa-magnifying-glass"></i>
-  <input type="search"
-         class="form-control"
-         name="q"
-         id="search-input"
-         placeholder="Search this book..."
-         aria-label="Search this book..."
-         autocomplete="off"
-         autocorrect="off"
-         autocapitalize="off"
-         spellcheck="false"/>
-  <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
-</form></div>
-  </div>
-
-  <div class="pst-async-banner-revealer d-none">
-  <aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
-</div>
-
-  
-    <header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
-    </header>
-  
-
-  <div class="bd-container">
-    <div class="bd-container__inner bd-page-width">
-      
-      
-      
-      <div class="bd-sidebar-primary bd-sidebar">
-        
-
-  
-  <div class="sidebar-header-items sidebar-primary__section">
-    
-    
-    
-    
-  </div>
-  
-    <div class="sidebar-primary-items__start sidebar-primary__section">
-        <div class="sidebar-primary-item">
-
-  
-    
-  
-
-<a class="navbar-brand logo" href="../../intro.html">
-  
-  
-  
-  
-  
-    
-    
-      
-    
-    
-    <img src="../../_static/PseudoLab_logo.png" class="logo__image only-light" alt="Text-to-Image Generation-feat-Diffusion - Home"/>
-    <script>document.write(`<img src="../../_static/PseudoLab_logo.png" class="logo__image only-dark" alt="Text-to-Image Generation-feat-Diffusion - Home"/>`);</script>
-  
-  
-</a></div>
-        <div class="sidebar-primary-item"><nav class="bd-links" id="bd-docs-nav" aria-label="Main">
-    <div class="bd-toc-item navbar-nav active">
-        
-        <ul class="nav bd-sidenav bd-sidenav__home-link">
-            <li class="toctree-l1">
-                <a class="reference internal" href="../../intro.html">
-                    Welcome to PseudoDiffusers!!
-                </a>
-            </li>
-        </ul>
-        <p aria-level="2" class="caption" role="heading"><span class="caption-text">Preliminary Works</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="vae.html">VAE</a></li>
-<li class="toctree-l1"><a class="reference internal" href="gan.html">GAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DDPM.html">DDPM</a></li>
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="DDIM.html">DDIM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="A_Study_on_the_Evaluation_of_Generative_Models.html">A Study on the Evaluation of Generative Models</a></li>
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Image Generation</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="cycleGAN.html">CycleGAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="StyleGAN.html">StyleGAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="diffusion_beats_GANs.html">Diffusion Models Beat GANs on Image Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="dalle.html">DALL-E</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DALLE2.html">DALL-E 2</a></li>
-<li class="toctree-l1"><a class="reference internal" href="dreambooth.html">DreamBooth</a></li>
-<li class="toctree-l1"><a class="reference internal" href="ControlNet.html">ControlNet</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Latent_Diffusion_Model.html">Introduction</a></li>
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="Textual_Inversion.html">Textual Inversion</a></li>
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="CustomDiffusion.html">Custom Diffusion</a></li>
-<li class="toctree-l1"><a class="reference internal" href="LoRA.html">LoRA</a></li>
-
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="I-DDPM.html">I-DDPM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="StyO.html">StyO</a></li>
-<li class="toctree-l1"><a class="reference internal" href="imagen.html">Imagen</a></li>
-<li class="toctree-l1"><a class="reference internal" href="imagen_editor.html">Imagen Editor</a></li>
-<li class="toctree-l1"><a class="reference internal" href="SDEdit.html">SDEdit</a></li>
-<li class="toctree-l1"><a class="reference internal" href="SDXL.html">SDXL</a></li>
-<li class="toctree-l1"><a class="reference internal" href="t2i_adapter.html">T2I-Adapter</a></li>
-<li class="toctree-l1"><a class="reference internal" href="IP_Adapter.html">IP-Adapter</a></li>
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="HyperDreamBooth.html">HyperDreamBooth</a></li>
-<li class="toctree-l1"><a class="reference internal" href="CM3leon.html">CM3leon</a></li>
-
-<li class="toctree-l1"><a class="reference internal" href="Synthetic_Data_from_Diffusion_Models_Improves_ImageNet_Classification.html">Synthetic Data from Diffusion Models Improves ImageNet Classification</a></li>
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="GLIDE.html">GLIDE</a></li>
-<li class="toctree-l1"><a class="reference internal" href="BBDM.html">BBDM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Your_Diffusion_Model_is_Secretly_a_Zero_Shot_Classifier.html">Your Diffusion Model is Secretly a Zero-Shot Classifier</a></li>
-<li class="toctree-l1"><a class="reference internal" href="progressive_distillation.html">Progressive Distillation for Fast Sampling of Diffusion Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="ConceptLab.html">ConceptLab</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Diffusion_models_already_have_a_Semantic_Latent_Space.html">Diffusion Models already have a Semantic Latent Space</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Muse.html">Muse</a></li>
-
-
-<li class="toctree-l1"><a class="reference internal" href="GIGAGAN.html">Scaling up GANs for Text-to-Image Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="consistency_models.html">Consistency Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="latent_consistency_models.html">Latent Consistency Models</a></li>
-
-<li class="toctree-l1"><a class="reference internal" href="LLM_grounded_Diffusion.html">LLM Grounded Diffusion</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DiT.html">DiT</a></li>
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="one-step-image-translation.html">One-Step Image Translation with Text-to-Image Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="LCM-LoRA.html">LCM-LoRA: A Universal Stable-Diffusion Acceleration Module</a></li>
-
-
-<li class="toctree-l1"><a class="reference internal" href="MimicBrush.html">MimicBrush: Zero-shot Image Editing with Reference Imitation</a></li>
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Video Generation</span></p>
-<ul class="current nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="Make_A_Video.html">Make A Video</a></li>
-<li class="toctree-l1 current active"><a class="current reference internal" href="#">VideoLDM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="AnimateDiff.html">AnimateDiff</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Animate_Anyone.html">Animate Anyone</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreaMoving.html">DreaMoving</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreamPose.html">DreamPose: Fashion Image-to-Video Synthesis via Stable Diffusion</a></li>
-
-
-
-
-
-
-
-
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">3D Generation</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="NeRF.html">NeRF : Representing Scenes as Neural Radiance Fields for View Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="3DGS.html">3D Gaussian Splatting for Real-Time Radiance Field Rendering</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Point_E.html">Point-E: A System for Generating 3D Point Clouds from Complex Prompts (Arxiv 2022)</a></li>
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="Shap-E.html">Shap-E</a></li>
-
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="DreamFusion.html"><strong>DreamFusion</strong></a></li>
-<li class="toctree-l1"><a class="reference internal" href="magic-3d.html">Magic3D</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreamBooth3D.html">Dream Booth 3D</a></li>
-
-
-
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Experiments</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="../experiments/js_exp.html">Synthetic Data with Stable Diffusion for Foliar Disease Classification</a></li>
-<li class="toctree-l1"><a class="reference internal" href="../experiments/swjo_exp.html">Training DreamBooth on Naver Webtoon Face Dataset</a></li>
-</ul>
-
-    </div>
-</nav></div>
-    </div>
-  
-  
-  <div class="sidebar-primary-items__end sidebar-primary__section">
-  </div>
-  
-  <div id="rtd-footer-container"></div>
-
-
-      </div>
-      
-      <main id="main-content" class="bd-main" role="main">
-        
-        
-
-<div class="sbt-scroll-pixel-helper"></div>
-
-          <div class="bd-content">
-            <div class="bd-article-container">
-              
-              <div class="bd-header-article d-print-none">
-<div class="header-article-items header-article__inner">
-  
-    <div class="header-article-items__start">
-      
-        <div class="header-article-item"><label class="sidebar-toggle primary-toggle btn btn-sm" for="__primary" title="Toggle primary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
-  <span class="fa-solid fa-bars"></span>
-</label></div>
-      
-    </div>
-  
-  
-    <div class="header-article-items__end">
-      
-        <div class="header-article-item">
-
-<div class="article-header-buttons">
-
-
-
-
-
-<div class="dropdown dropdown-source-buttons">
-  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Source repositories">
-    <i class="fab fa-github"></i>
-  </button>
-  <ul class="dropdown-menu">
-      
-      
-      
-      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion" target="_blank"
-   class="btn btn-sm btn-source-repository-button dropdown-item"
-   title="Source repository"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fab fa-github"></i>
-  </span>
-<span class="btn__text-container">Repository</span>
-</a>
-</li>
-      
-      
-      
-      
-      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion/issues/new?title=Issue%20on%20page%20%2Fdocs/review/VideoLDM.html&body=Your%20issue%20content%20here." target="_blank"
-   class="btn btn-sm btn-source-issues-button dropdown-item"
-   title="Open an issue"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-lightbulb"></i>
-  </span>
-<span class="btn__text-container">Open issue</span>
-</a>
-</li>
-      
-  </ul>
-</div>
-
-
-
-
-
-
-<div class="dropdown dropdown-download-buttons">
-  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Download this page">
-    <i class="fas fa-download"></i>
-  </button>
-  <ul class="dropdown-menu">
-      
-      
-      
-      <li><a href="../../_sources/docs/review/VideoLDM.md" target="_blank"
-   class="btn btn-sm btn-download-source-button dropdown-item"
-   title="Download source file"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-file"></i>
-  </span>
-<span class="btn__text-container">.md</span>
-</a>
-</li>
-      
-      
-      
-      
-      <li>
-<button onclick="window.print()"
-  class="btn btn-sm btn-download-pdf-button dropdown-item"
-  title="Print to PDF"
-  data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-file-pdf"></i>
-  </span>
-<span class="btn__text-container">.pdf</span>
-</button>
-</li>
-      
-  </ul>
-</div>
-
-
-
-
-<button onclick="toggleFullScreen()"
-  class="btn btn-sm btn-fullscreen-button"
-  title="Fullscreen mode"
-  data-bs-placement="bottom" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-expand"></i>
-  </span>
-
-</button>
-
-
-
-<script>
-document.write(`
-  <button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light"></i>
-    <i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark"></i>
-    <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"></i>
-  </button>
-`);
-</script>
-
-
-<script>
-document.write(`
-  <button class="btn btn-sm pst-navbar-icon search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <i class="fa-solid fa-magnifying-glass fa-lg"></i>
-  </button>
-`);
-</script>
-<label class="sidebar-toggle secondary-toggle btn btn-sm" for="__secondary"title="Toggle secondary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <span class="fa-solid fa-list"></span>
-</label>
-</div></div>
-      
-    </div>
-  
-</div>
-</div>
-              
-              
-
-<div id="jb-print-docs-body" class="onlyprint">
-    <h1>VideoLDM</h1>
-    <!-- Table of contents -->
-    <div id="print-main-content">
-        <div id="jb-print-toc">
-            
-            <div>
-                <h2> Contents </h2>
-            </div>
-            <nav aria-label="Page">
-                <ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#abstract">Abstract</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#latent-video-diffusion-models">3. Latent Video Diffusion Models</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#turning-latent-image-into-video-generators">3.1. Turning Latent Image into Video Generators</a><ul class="nav section-nav flex-column">
-<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#temporal-autoencoder-finetuning">3.1.1 Temporal Autoencoder Finetuning</a></li>
-</ul>
-</li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#prediction-models-for-long-term-generation">3.2. Prediction Models for Long-Term Generation</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#temporal-interpolation-for-high-frame-rates">3.3. Temporal Interpolation for High Frame Rates</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#temporal-fine-tuning-of-sr-models">3.4. Temporal Fine-tuning of SR Models</a></li>
-</ul>
-</li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#experiments">4. Experiments</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#high-resolution-driving-video-synthesis">4.1. High-Resolution Driving Video Synthesis</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#text-to-video-with-stable-diffusion">4.2. Text-to-Video with Stable Diffusion</a><ul class="nav section-nav flex-column">
-<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#personalized-text-to-video-with-dreambooth">4.2.1 Personalized Text-to-Video with Dreambooth</a></li>
-</ul>
-</li>
-</ul>
-</li>
-</ul>
-            </nav>
-        </div>
-    </div>
-</div>
-
-              
-                
-<div id="searchbox"></div>
-                <article class="bd-article">
-                  
-  <div class="admonition-information admonition">
-<p class="admonition-title">Information</p>
-<ul class="simple">
-<li><p><strong>Title:</strong> Align your Latents: High-Resolution Video Synthesis with Latent Diffusion Models</p></li>
-<li><p><strong>Reference</strong></p>
-<ul>
-<li><p>Paper:  <a class="reference external" href="https://arxiv.org/abs/2304.08818">https://arxiv.org/abs/2304.08818</a></p></li>
-</ul>
-</li>
-<li><p><strong>Author:</strong> Jun-Hyoung Lee</p></li>
-<li><p><strong>Last updated on Nov. 30. 2023</strong></p></li>
-</ul>
-</div>
-<section class="tex2jax_ignore mathjax_ignore" id="videoldm">
-<h1>VideoLDM<a class="headerlink" href="#videoldm" title="Permalink to this heading">#</a></h1>
-<figure class="align-default" id="id1">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/figure11.png"><img alt="figure1" class="bg-primary mb-1" src="../../_images/figure11.png" style="width: 600px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 590 </span><span class="caption-text">Video LDM samples</span><a class="headerlink" href="#id1" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<section id="abstract">
-<h2>Abstract<a class="headerlink" href="#abstract" title="Permalink to this heading">#</a></h2>
-<ul>
-<li><p>Latent Diffusion Models (LDMs)는 computing resource 를 줄이기 위해 낮은 차원의 latent space 로 압축하여 high quality 의 image synthesis 를 가능하게 했다.</p></li>
-<li><p>비디오 생성 모델링의 퀄리티 부족하며,</p>
-<ul class="simple">
-<li><p>이유가 학습에 필요한 computing cost 가 많이 발생, 데이터 셋 부족하다.</p></li>
-</ul>
-</li>
-<li><p>제안</p>
-<figure class="align-default" id="id2">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/figure2.png"><img alt="figure2" class="bg-primary mb-1" src="../../_images/figure2.png" style="width: 600px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 591 </span><span class="caption-text">Temproal Video finetuning</span><a class="headerlink" href="#id2" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p>기존에는 가우시안 노이즈의 랜덤한 샘플들 끼리의 denoising 결과 다른 이미지를 생성했다.</p></li>
-<li><p>Temporal Video finetuning 을 거치게 되면 비디오 시퀀스의 형태(시간축에 정렬된 이미지)로 생성할 수 있다.</p></li>
-</ul>
-</li>
-<li><p>VideoLDM 은 기존 LDM 방법에 고해상도의 비디오 생성을 적용했다.</p>
-<ol class="arabic simple">
-<li><p>대규모 이미지 데이터 셋을 활용해 LDM 을 pre-train 했고, (only image)</p>
-<ul class="simple">
-<li><p>pre-trained image LDMs 를 활용 가능하다.</p>
-<ul>
-<li><p>temporal modeling 만 학습한다.(기존 이미지 LDM은 freeze)</p></li>
-<li><p>1280x2048 해상도 까지 가능하다.</p></li>
-</ul>
-</li>
-</ul>
-</li>
-<li><p>그 후, 이미지 generator 를 비디오 generator 로 전환한다.</p>
-<ul class="simple">
-<li><p>latent space diffusion model 에 temporal(시간적) 차원을 적용한다.</p></li>
-<li><p>이미지 시퀀스(비디오)를 인코딩해 파인 튜닝 진행한다.</p></li>
-</ul>
-</li>
-<li><p>diffusion model upsampler 를 시간적으로 정렬하여 일관적인 비디오 super resolution model 로 변환한다.</p></li>
-</ol>
-</li>
-<li><p>Applied task</p>
-<ul class="simple">
-<li><p>자율 주행의 시뮬레이션 엔진 (512x1024 해상도로 실제로 평가 진행해 sota 달성)</p></li>
-<li><p>creative content creation (using text-to-video)</p></li>
-</ul>
-</li>
-</ul>
-</section>
-<section id="latent-video-diffusion-models">
-<h2>3. Latent Video Diffusion Models<a class="headerlink" href="#latent-video-diffusion-models" title="Permalink to this heading">#</a></h2>
-<ul class="simple">
-<li><p>비디오 데이터 셋: <span class="math notranslate nohighlight">\(x ∈ R^{T×3×\tilde H×\tilde W}\)</span> 로 표현</p>
-<ul>
-<li><p><span class="math notranslate nohighlight">\(T\)</span>: frame 수, <span class="math notranslate nohighlight">\(\tilde H, \tilde W\)</span>: 높이, 너비</p></li>
-</ul>
-</li>
-</ul>
-<section id="turning-latent-image-into-video-generators">
-<h3>3.1. Turning Latent Image into Video Generators<a class="headerlink" href="#turning-latent-image-into-video-generators" title="Permalink to this heading">#</a></h3>
-<ul>
-<li><p>잘 학습된 image LDM 을 활용하는 것이 주요한 key point.</p>
-<ul class="simple">
-<li><p>문제점</p>
-<ul>
-<li><p>image LDM 은 개별의 프레임에 대한 high quality 이미지를 생성할 수 있고,
-→ 시간적인 정보는 포함하고 있지 않다.</p></li>
-<li><p>따라서 이를 연속적인 프레임으로 렌더링해 사용할 수 없다.</p></li>
-</ul>
-</li>
-</ul>
-</li>
-<li><p><span class="math notranslate nohighlight">\(l_\phi ^i\)</span> 로 표현하는 temporal neural network 를 추가했다.</p>
-<ul class="simple">
-<li><p>이는 이미지 LDM 의 공간적인 정보에 연관되며, 시간적으로 일관된 방식으로 개별 프레임을 정렬할 수 있도록 한다.</p>
-<ul>
-<li><p>비디오를 인식할 수 있는 backbone 을 정의한다.</p></li>
-</ul>
-</li>
-</ul>
-<figure class="align-default" id="id3">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/figure41.png"><img alt="figure4" class="bg-primary mb-1" src="../../_images/figure41.png" style="width: 600px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 592 </span><span class="caption-text">Video-Aware Temporal Backbone</span><a class="headerlink" href="#id3" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<figure class="align-default" id="id4">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/einops.png"><img alt="einops" class="bg-primary mb-1" src="../../_images/einops.png" style="width: 600px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 593 </span><span class="caption-text">Einops notation</span><a class="headerlink" href="#id4" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p>einops 로 구현했으며, spatial layer 에서는 비디오(배치x시간) 정보가 함께 인코딩이 되며,</p>
-<ul>
-<li><p>temporal layer 에서는 이를 rearrange 를 통해 배치, 시간 정보를 나눠 시간 차원에서 인코딩이 진행된다.</p>
-<ul>
-<li><p>(option) 이때 text prompt 가 conditioning 이 될 수 있다.</p></li>
-<li><p>(i) temporal attention (ii) 3D conv 로 구성된다.</p>
-<ul>
-<li><ul>
-<li><p>Sinusoidal embedding 을 사용해 시간에 대한 위치 인코딩 활용했다.</p></li>
-</ul>
-</li>
-</ul>
-</li>
-</ul>
-</li>
-<li><p>temporal layer 을 거친 후, spatial layer 의 output 과 가중합을 통해 정보가 결합된다.</p></li>
-</ul>
-</li>
-</ul>
-</li>
-</ul>
-<section id="temporal-autoencoder-finetuning">
-<h4>3.1.1 Temporal Autoencoder Finetuning<a class="headerlink" href="#temporal-autoencoder-finetuning" title="Permalink to this heading">#</a></h4>
-<ul>
-<li><p>Image LDM 을 사용하면 시퀀스로 생성할 때 flickering이 발생하는 문제가 있다.</p>
-<ul>
-<li><p>이를 해결하기 위해, autoencoder 의 decoder 에서 temporal 한 layer 를 추가한다.</p></li>
-<li><p>이는 3D conv 로 구축된 patch-wise temporal discriminator 도 추가해 비디오 데이터를 fine tuning 한다.</p>
-<figure class="align-default" id="id5">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/figure3.png"><img alt="figure3" class="bg-primary mb-1" src="../../_images/figure3.png" style="width: 600px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 594 </span><span class="caption-text">Temporal Autoencoder Finetuning</span><a class="headerlink" href="#id5" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</li>
-</ul>
-</li>
-<li><p>인코딩된 비디오 프레임의 latent space 내에서 image DM 을 사용할 수 있도록 인코더는 학습이 되지 않는다.</p></li>
-</ul>
-</section>
-</section>
-<section id="prediction-models-for-long-term-generation">
-<h3>3.2. Prediction Models for Long-Term Generation<a class="headerlink" href="#prediction-models-for-long-term-generation" title="Permalink to this heading">#</a></h3>
-<ul class="simple">
-<li><p>그럼에도 불구하고, 긴 동영상은 생성하지 못하는 한계가 있다.</p></li>
-<li><p>따라서 전체 <span class="math notranslate nohighlight">\(T\)</span> 프레임에서 마스킹된 <span class="math notranslate nohighlight">\(S\)</span> 프레임으로 구성해 모델이 예측하게끔 학습을 한다.</p>
-<ul>
-<li><p>이러한 프레임들은 LDM 의 인코더를 통해 채널 차원에 concat 되며, temporal layer 에 입력된다.</p></li>
-</ul>
-</li>
-<li><p>inference 에서는 반복적인 샘플링 과정을 통해 긴 영상을 생성할 수 있게 했다.</p>
-<ul>
-<li><p>최신 prediction 을 재 사용해 새로운 context 를 생성했다.</p></li>
-<li><p>classifier-free guidance 를 도입해 마스킹된 프레임 수를 0, 1, 2 개를 사용해 학습.</p></li>
-</ul>
-</li>
-</ul>
-</section>
-<section id="temporal-interpolation-for-high-frame-rates">
-<h3>3.3. Temporal Interpolation for High Frame Rates<a class="headerlink" href="#temporal-interpolation-for-high-frame-rates" title="Permalink to this heading">#</a></h3>
-<figure class="align-default" id="id6">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/interpolation.png"><img alt="interpolation" class="bg-primary mb-1" src="../../_images/interpolation.png" style="width: 600px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 595 </span><span class="caption-text">Temporal Interpolation</span><a class="headerlink" href="#id6" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p>High resolution video 란 해상도 뿐만 아니라 높은 frame rate 를 가지고 있어야 한다.</p></li>
-<li><p>이를 위해 두 가지 과정으로 진행한다.</p>
-<ol class="arabic simple">
-<li><p>semantic 한 큰 변화가 있는 키 프레임을 생성한다.</p>
-<ul>
-<li><p>메모리 제약으로 인해 low frame rate 로 생성할 수 있다.</p></li>
-</ul>
-</li>
-<li><p>키 프레임을 활용한 interpolate 진행한다.</p>
-<ul>
-<li><p>interpolate 할 프레임을 masking 을 씌운다.</p>
-<ul>
-<li><p>두 개의 키 프레임에 대해 세 개의 프레임을 예측하는 것으로 T → 4T interpolation model 을 학습해 사용했다.</p></li>
-<li><p>높은 frame rate 를 위해 16T 까지 interpolation 모델 구축.</p></li>
-</ul>
-</li>
-</ul>
-</li>
-</ol>
-</li>
-</ul>
-</section>
-<section id="temporal-fine-tuning-of-sr-models">
-<h3>3.4. Temporal Fine-tuning of SR Models<a class="headerlink" href="#temporal-fine-tuning-of-sr-models" title="Permalink to this heading">#</a></h3>
-<ul>
-<li><p>megapixel 의 해상도까지 생성하는 것이 목표이다.</p>
-<ul>
-<li><p>cascaded DMs 에 영감받아 4배 해상도를 키웠다.</p>
-<figure class="align-default" id="id7">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/cascaded_dms.png"><img alt="cascaded_dms" class="bg-primary mb-1" src="../../_images/cascaded_dms.png" style="width: 600px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 596 </span><span class="caption-text">Cascaded DM</span><a class="headerlink" href="#id7" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</li>
-<li><p>noise augmentation(with noise level conditioning) 으로 super resolution 모델 학습했다.</p></li>
-</ul>
-</li>
-<li><p>또한 consistency 한 SR 모델을 구축하기 위해 spatial / temporal layer를 추가했다.</p>
-<ul class="simple">
-<li><p>저해상도 시퀀스 길이 <span class="math notranslate nohighlight">\(T\)</span> 를 concat 하여 conditioning</p></li>
-<li><p>locally 하게 patch 단위로 연산하고, 후에 convolution 을 진행한다.</p></li>
-</ul>
-</li>
-<li><p>computing resource</p>
-<ul class="simple">
-<li><p>VideoLDM 에서의 main LDM 을 효율적으로 연산을 하기 위해 latent space 에서 모든 비디오 모델링이 수행된다.</p>
-<ul>
-<li><p>그로 인해, 높은 배치 사이즈 + 긴 영상 생성 가능하다.</p></li>
-</ul>
-</li>
-<li><p>upsampler 는 패치 단위로 진행하기에 computing resource 를 줄일 수 있다.</p></li>
-</ul>
-</li>
-</ul>
-</section>
-</section>
-<section id="experiments">
-<h2>4. Experiments<a class="headerlink" href="#experiments" title="Permalink to this heading">#</a></h2>
-<ul class="simple">
-<li><p>Dataset</p>
-<ul>
-<li><p>RDS(real driving scene): 683,060 개, 8초(30 fps), 512×1024, day/night, “crowdedness”</p></li>
-<li><p>WebVid-10M: 10.7M video-caption pairs, 52K video hours, resized 320×512</p></li>
-</ul>
-</li>
-<li><p>Evaluation metric</p>
-<ul>
-<li><p>FVD + human evaluation</p></li>
-<li><p>CLIP similarity (CLIP- SIM) + IS</p></li>
-</ul>
-</li>
-</ul>
-<section id="high-resolution-driving-video-synthesis">
-<h3>4.1. High-Resolution Driving Video Synthesis<a class="headerlink" href="#high-resolution-driving-video-synthesis" title="Permalink to this heading">#</a></h3>
-<figure class="align-default" id="id8">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/figure71.png"><img alt="figure7" class="bg-primary mb-1" src="../../_images/figure71.png" style="width: 600px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 597 </span><span class="caption-text">Real-World Driving Scenes with Video LDM</span><a class="headerlink" href="#id8" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</section>
-<section id="text-to-video-with-stable-diffusion">
-<h3>4.2. Text-to-Video with Stable Diffusion<a class="headerlink" href="#text-to-video-with-stable-diffusion" title="Permalink to this heading">#</a></h3>
-<ul>
-<li><p>WebVid-10M 데이터셋(resized 320×512)으로 Stable Diffusion 의 spatial layer 에 대해 학습했고,</p>
-<ul>
-<li><p>text-conditioning 을 적용한 temporal layer 를 추가해 학습 진행했다.</p></li>
-<li><p>그 후 upscaler 를 학습해 4배 upscale 해 1280×2048 해상도로 비디오 생성 가능해졌다.</p>
-<ul class="simple">
-<li><p>113 frames: 24fps 4.7초 or 30fps 3.8초</p></li>
-</ul>
-<figure class="align-default" id="id9">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/figure61.png"><img alt="figure6" class="bg-primary mb-1" src="../../_images/figure61.png" style="width: 600px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 598 </span><span class="caption-text">Text-to-Video with Stable Diffusion</span><a class="headerlink" href="#id9" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</li>
-<li><p>다양성이 적은 Real video 로 제한적인 데이터로 학습했지만, 기존 Stable Diffusion 의 생성 능력을 가져와 artistic 한 생성이 가능하다.</p></li>
-<li><p>performance</p>
-<figure class="align-default" id="id10">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/table4_5.png"><img alt="table4_5" class="bg-primary mb-1" src="../../_images/table4_5.png" style="width: 600px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 599 </span><span class="caption-text">Performance Table</span><a class="headerlink" href="#id10" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p>Make-A-Video 의 경우 VideoLDM 보다 더 많은 데이터 셋과 text-to-video를 entirely하게 학습했다.</p></li>
-</ul>
-</li>
-</ul>
-</li>
-</ul>
-<section id="personalized-text-to-video-with-dreambooth">
-<h4>4.2.1 Personalized Text-to-Video with Dreambooth<a class="headerlink" href="#personalized-text-to-video-with-dreambooth" title="Permalink to this heading">#</a></h4>
-<figure class="align-default" id="id11">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/figure8.png"><img alt="figure8" class="bg-primary mb-1" src="../../_images/figure8.png" style="width: 600px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 600 </span><span class="caption-text">Text-to-Video with DreamBooth</span><a class="headerlink" href="#id11" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p>위쪽의 VideoLDM 을 활용한 결과가 consistency 한 결과를 가져왔다.</p></li>
-</ul>
-</section>
-</section>
-</section>
-</section>
-
-    <script type="text/x-thebe-config">
-    {
-        requestKernel: true,
-        binderOptions: {
-            repo: "binder-examples/jupyter-stacks-datascience",
-            ref: "master",
-        },
-        codeMirrorConfig: {
-            theme: "abcdef",
-            mode: "python"
-        },
-        kernelOptions: {
-            name: "python3",
-            path: "./docs/review"
-        },
-        predefinedOutput: true
-    }
-    </script>
-    <script>kernelName = 'python3'</script>
-
-                </article>
-              
-
-              
-              
-              
-              
-                <footer class="prev-next-footer d-print-none">
-                  
-<div class="prev-next-area">
-    <a class="left-prev"
-       href="Make_A_Video.html"
-       title="previous page">
-      <i class="fa-solid fa-angle-left"></i>
-      <div class="prev-next-info">
-        <p class="prev-next-subtitle">previous</p>
-        <p class="prev-next-title">Make A Video</p>
-      </div>
-    </a>
-    <a class="right-next"
-       href="AnimateDiff.html"
-       title="next page">
-      <div class="prev-next-info">
-        <p class="prev-next-subtitle">next</p>
-        <p class="prev-next-title">AnimateDiff</p>
-      </div>
-      <i class="fa-solid fa-angle-right"></i>
-    </a>
-</div>
-                </footer>
-              
-            </div>
-            
-            
-              
-                <div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
-
-
-  <div class="sidebar-secondary-item">
-  <div class="page-toc tocsection onthispage">
-    <i class="fa-solid fa-list"></i> Contents
-  </div>
-  <nav class="bd-toc-nav page-toc">
-    <ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#abstract">Abstract</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#latent-video-diffusion-models">3. Latent Video Diffusion Models</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#turning-latent-image-into-video-generators">3.1. Turning Latent Image into Video Generators</a><ul class="nav section-nav flex-column">
-<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#temporal-autoencoder-finetuning">3.1.1 Temporal Autoencoder Finetuning</a></li>
-</ul>
-</li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#prediction-models-for-long-term-generation">3.2. Prediction Models for Long-Term Generation</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#temporal-interpolation-for-high-frame-rates">3.3. Temporal Interpolation for High Frame Rates</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#temporal-fine-tuning-of-sr-models">3.4. Temporal Fine-tuning of SR Models</a></li>
-</ul>
-</li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#experiments">4. Experiments</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#high-resolution-driving-video-synthesis">4.1. High-Resolution Driving Video Synthesis</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#text-to-video-with-stable-diffusion">4.2. Text-to-Video with Stable Diffusion</a><ul class="nav section-nav flex-column">
-<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#personalized-text-to-video-with-dreambooth">4.2.1 Personalized Text-to-Video with Dreambooth</a></li>
-</ul>
-</li>
-</ul>
-</li>
-</ul>
-  </nav></div>
-
-</div></div>
-              
-            
-          </div>
-          <footer class="bd-footer-content">
-            
-<div class="bd-footer-content__inner container">
-  
-  <div class="footer-item">
-    
-<p class="component-author">
-By PseudoLab
-</p>
-
-  </div>
-  
-  <div class="footer-item">
-    
-
-  <p class="copyright">
-    
-      © Copyright 2022.
-      <br/>
-    
-  </p>
-
-  </div>
-  
-  <div class="footer-item">
-    
-  </div>
-  
-  <div class="footer-item">
-    
-  </div>
-  
-</div>
-          </footer>
-        
-
-      </main>
-    </div>
-  </div>
-  
-  <!-- Scripts loaded after <body> so the DOM is not blocked -->
-  <script src="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b"></script>
-<script src="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b"></script>
-
-  <footer class="bd-footer">
-  </footer>
-  </body>
+
+
+<!DOCTYPE html>
+
+
+<html lang="en" data-content_root="" >
+
+  <head>
+    <meta charset="utf-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.17.1: http://docutils.sourceforge.net/" />
+
+    <title>VideoLDM &#8212; Text-to-Image Generation-feat-Diffusion</title>
+  
+  
+  
+  <script data-cfasync="false">
+    document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
+    document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
+  </script>
+  
+  <!-- Loaded before other Sphinx assets -->
+  <link href="../../_static/styles/theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+<link href="../../_static/styles/bootstrap.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+<link href="../../_static/styles/pydata-sphinx-theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+
+  
+  <link href="../../_static/vendor/fontawesome/6.5.2/css/all.min.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+  <link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.woff2" />
+<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.woff2" />
+<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.woff2" />
+
+    <link rel="stylesheet" type="text/css" href="../../_static/pygments.css" />
+    <link rel="stylesheet" href="../../_static/styles/sphinx-book-theme.css?digest=14f4ca6b54d191a8c7657f6c759bf11a5fb86285" type="text/css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/togglebutton.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/mystnb.4510f1fc1dee50b3e5859aac5469c37c29e427902b24a333a5f9fcb2f0b3ac41.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/sphinx-thebe.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/design-style.4045f2051d55cab465a707391d5b2007.min.css" />
+  
+  <!-- Pre-loaded scripts that we'll load fully later -->
+  <link rel="preload" as="script" href="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b" />
+<link rel="preload" as="script" href="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b" />
+  <script src="../../_static/vendor/fontawesome/6.5.2/js/all.min.js?digest=dfe6caa3a7d634c4db9b"></script>
+
+    <script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
+    <script src="../../_static/jquery.js"></script>
+    <script src="../../_static/underscore.js"></script>
+    <script src="../../_static/_sphinx_javascript_frameworks_compat.js"></script>
+    <script src="../../_static/doctools.js"></script>
+    <script src="../../_static/clipboard.min.js"></script>
+    <script src="../../_static/copybutton.js"></script>
+    <script src="../../_static/scripts/sphinx-book-theme.js?digest=5a5c038af52cf7bc1a1ec88eea08e6366ee68824"></script>
+    <script>let toggleHintShow = 'Click to show';</script>
+    <script>let toggleHintHide = 'Click to hide';</script>
+    <script>let toggleOpenOnPrint = 'true';</script>
+    <script src="../../_static/togglebutton.js"></script>
+    <script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
+    <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
+    <script src="../../_static/design-tabs.js"></script>
+    <script>const THEBE_JS_URL = "https://unpkg.com/thebe@0.8.2/lib/index.js"
+const thebe_selector = ".thebe,.cell"
+const thebe_selector_input = "pre"
+const thebe_selector_output = ".output, .cell_output"
+</script>
+    <script async="async" src="../../_static/sphinx-thebe.js"></script>
+    <script>window.MathJax = {"options": {"processHtmlClass": "tex2jax_process|mathjax_process|math|output_area"}}</script>
+    <script defer="defer" src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
+    <script>DOCUMENTATION_OPTIONS.pagename = 'docs/review/VideoLDM';</script>
+    <link rel="shortcut icon" href="../../_static/PseudoLab_logo.png"/>
+    <link rel="index" title="Index" href="../../genindex.html" />
+    <link rel="search" title="Search" href="../../search.html" />
+    <link rel="next" title="AnimateDiff" href="AnimateDiff.html" />
+    <link rel="prev" title="Make A Video" href="Make_A_Video.html" />
+  <meta name="viewport" content="width=device-width, initial-scale=1"/>
+  <meta name="docsearch:language" content="en"/>
+  </head>
+  
+  
+  <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
+
+  
+  
+  <div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
+  
+  <div id="pst-scroll-pixel-helper"></div>
+  
+  <button type="button" class="btn rounded-pill" id="pst-back-to-top">
+    <i class="fa-solid fa-arrow-up"></i>Back to top</button>
+
+  
+  <input type="checkbox"
+          class="sidebar-toggle"
+          id="pst-primary-sidebar-checkbox"/>
+  <label class="overlay overlay-primary" for="pst-primary-sidebar-checkbox"></label>
+  
+  <input type="checkbox"
+          class="sidebar-toggle"
+          id="pst-secondary-sidebar-checkbox"/>
+  <label class="overlay overlay-secondary" for="pst-secondary-sidebar-checkbox"></label>
+  
+  <div class="search-button__wrapper">
+    <div class="search-button__overlay"></div>
+    <div class="search-button__search-container">
+<form class="bd-search d-flex align-items-center"
+      action="../../search.html"
+      method="get">
+  <i class="fa-solid fa-magnifying-glass"></i>
+  <input type="search"
+         class="form-control"
+         name="q"
+         id="search-input"
+         placeholder="Search this book..."
+         aria-label="Search this book..."
+         autocomplete="off"
+         autocorrect="off"
+         autocapitalize="off"
+         spellcheck="false"/>
+  <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
+</form></div>
+  </div>
+
+  <div class="pst-async-banner-revealer d-none">
+  <aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
+</div>
+
+  
+    <header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
+    </header>
+  
+
+  <div class="bd-container">
+    <div class="bd-container__inner bd-page-width">
+      
+      
+      
+      <div class="bd-sidebar-primary bd-sidebar">
+        
+
+  
+  <div class="sidebar-header-items sidebar-primary__section">
+    
+    
+    
+    
+  </div>
+  
+    <div class="sidebar-primary-items__start sidebar-primary__section">
+        <div class="sidebar-primary-item">
+
+  
+    
+  
+
+<a class="navbar-brand logo" href="../../intro.html">
+  
+  
+  
+  
+  
+    
+    
+      
+    
+    
+    <img src="../../_static/PseudoLab_logo.png" class="logo__image only-light" alt="Text-to-Image Generation-feat-Diffusion - Home"/>
+    <script>document.write(`<img src="../../_static/PseudoLab_logo.png" class="logo__image only-dark" alt="Text-to-Image Generation-feat-Diffusion - Home"/>`);</script>
+  
+  
+</a></div>
+        <div class="sidebar-primary-item"><nav class="bd-links" id="bd-docs-nav" aria-label="Main">
+    <div class="bd-toc-item navbar-nav active">
+        
+        <ul class="nav bd-sidenav bd-sidenav__home-link">
+            <li class="toctree-l1">
+                <a class="reference internal" href="../../intro.html">
+                    Welcome to PseudoDiffusers!!
+                </a>
+            </li>
+        </ul>
+        <p aria-level="2" class="caption" role="heading"><span class="caption-text">Preliminary Works</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="vae.html">VAE</a></li>
+<li class="toctree-l1"><a class="reference internal" href="gan.html">GAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DDPM.html">DDPM</a></li>
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DDIM.html">DDIM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="A_Study_on_the_Evaluation_of_Generative_Models.html">A Study on the Evaluation of Generative Models</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Image Generation</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="cycleGAN.html">CycleGAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="StyleGAN.html">StyleGAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="diffusion_beats_GANs.html">Diffusion Models Beat GANs on Image Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="dalle.html">DALL-E</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DALLE2.html">DALL-E 2</a></li>
+<li class="toctree-l1"><a class="reference internal" href="dreambooth.html">DreamBooth</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ControlNet.html">ControlNet</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Latent_Diffusion_Model.html">Introduction</a></li>
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Textual_Inversion.html">Textual Inversion</a></li>
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="CustomDiffusion.html">Custom Diffusion</a></li>
+<li class="toctree-l1"><a class="reference internal" href="LoRA.html">LoRA</a></li>
+
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="I-DDPM.html">I-DDPM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="StyO.html">StyO</a></li>
+<li class="toctree-l1"><a class="reference internal" href="imagen.html">Imagen</a></li>
+<li class="toctree-l1"><a class="reference internal" href="imagen_editor.html">Imagen Editor</a></li>
+<li class="toctree-l1"><a class="reference internal" href="SDEdit.html">SDEdit</a></li>
+<li class="toctree-l1"><a class="reference internal" href="SDXL.html">SDXL</a></li>
+<li class="toctree-l1"><a class="reference internal" href="t2i_adapter.html">T2I-Adapter</a></li>
+<li class="toctree-l1"><a class="reference internal" href="IP_Adapter.html">IP-Adapter</a></li>
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="HyperDreamBooth.html">HyperDreamBooth</a></li>
+<li class="toctree-l1"><a class="reference internal" href="CM3leon.html">CM3leon</a></li>
+
+<li class="toctree-l1"><a class="reference internal" href="Synthetic_Data_from_Diffusion_Models_Improves_ImageNet_Classification.html">Synthetic Data from Diffusion Models Improves ImageNet Classification</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="GLIDE.html">GLIDE</a></li>
+<li class="toctree-l1"><a class="reference internal" href="BBDM.html">BBDM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Your_Diffusion_Model_is_Secretly_a_Zero_Shot_Classifier.html">Your Diffusion Model is Secretly a Zero-Shot Classifier</a></li>
+<li class="toctree-l1"><a class="reference internal" href="progressive_distillation.html">Progressive Distillation for Fast Sampling of Diffusion Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ConceptLab.html">ConceptLab</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Diffusion_models_already_have_a_Semantic_Latent_Space.html">Diffusion Models already have a Semantic Latent Space</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Muse.html">Muse</a></li>
+
+
+<li class="toctree-l1"><a class="reference internal" href="GIGAGAN.html">Scaling up GANs for Text-to-Image Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="consistency_models.html">Consistency Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="latent_consistency_models.html">Latent Consistency Models</a></li>
+
+<li class="toctree-l1"><a class="reference internal" href="LLM_grounded_Diffusion.html">LLM Grounded Diffusion</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DiT.html">DiT</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="one-step-image-translation.html">One-Step Image Translation with Text-to-Image Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="LCM-LoRA.html">LCM-LoRA: A Universal Stable-Diffusion Acceleration Module</a></li>
+
+
+<li class="toctree-l1"><a class="reference internal" href="MimicBrush.html">MimicBrush: Zero-shot Image Editing with Reference Imitation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="one_step_diffusion_with_distribution_matching_distillation.html">One-step Diffusion with Distribution Matching Distillation</a></li>
+
+
+
+
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Video Generation</span></p>
+<ul class="current nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="Make_A_Video.html">Make A Video</a></li>
+<li class="toctree-l1 current active"><a class="current reference internal" href="#">VideoLDM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="AnimateDiff.html">AnimateDiff</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Animate_Anyone.html">Animate Anyone</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreaMoving.html">DreaMoving</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreamPose.html">DreamPose: Fashion Image-to-Video Synthesis via Stable Diffusion</a></li>
+
+
+
+
+
+
+
+
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">3D Generation</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="NeRF.html">NeRF : Representing Scenes as Neural Radiance Fields for View Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="3DGS.html">3D Gaussian Splatting for Real-Time Radiance Field Rendering</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Point_E.html">Point-E: A System for Generating 3D Point Clouds from Complex Prompts (Arxiv 2022)</a></li>
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Shap-E.html">Shap-E</a></li>
+
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DreamFusion.html"><strong>DreamFusion</strong></a></li>
+<li class="toctree-l1"><a class="reference internal" href="magic-3d.html">Magic3D</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreamBooth3D.html">Dream Booth 3D</a></li>
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="zero123plus.html">Zero123++</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ProlificDreamer.html">ProlificDreamer: High-Fidelity and Diverse Text-to-3D Generation with Variational Score Distillation</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DreamGaussian.html">DreamGaussian</a></li>
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Coin3D.html">Coin3D</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Experiments</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="../experiments/js_exp.html">Synthetic Data with Stable Diffusion for Foliar Disease Classification</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../experiments/swjo_exp.html">Training DreamBooth on Naver Webtoon Face Dataset</a></li>
+</ul>
+
+    </div>
+</nav></div>
+    </div>
+  
+  
+  <div class="sidebar-primary-items__end sidebar-primary__section">
+  </div>
+  
+  <div id="rtd-footer-container"></div>
+
+
+      </div>
+      
+      <main id="main-content" class="bd-main" role="main">
+        
+        
+
+<div class="sbt-scroll-pixel-helper"></div>
+
+          <div class="bd-content">
+            <div class="bd-article-container">
+              
+              <div class="bd-header-article d-print-none">
+<div class="header-article-items header-article__inner">
+  
+    <div class="header-article-items__start">
+      
+        <div class="header-article-item"><label class="sidebar-toggle primary-toggle btn btn-sm" for="__primary" title="Toggle primary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
+  <span class="fa-solid fa-bars"></span>
+</label></div>
+      
+    </div>
+  
+  
+    <div class="header-article-items__end">
+      
+        <div class="header-article-item">
+
+<div class="article-header-buttons">
+
+
+
+
+
+<div class="dropdown dropdown-source-buttons">
+  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Source repositories">
+    <i class="fab fa-github"></i>
+  </button>
+  <ul class="dropdown-menu">
+      
+      
+      
+      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion" target="_blank"
+   class="btn btn-sm btn-source-repository-button dropdown-item"
+   title="Source repository"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fab fa-github"></i>
+  </span>
+<span class="btn__text-container">Repository</span>
+</a>
+</li>
+      
+      
+      
+      
+      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion/issues/new?title=Issue%20on%20page%20%2Fdocs/review/VideoLDM.html&body=Your%20issue%20content%20here." target="_blank"
+   class="btn btn-sm btn-source-issues-button dropdown-item"
+   title="Open an issue"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-lightbulb"></i>
+  </span>
+<span class="btn__text-container">Open issue</span>
+</a>
+</li>
+      
+  </ul>
+</div>
+
+
+
+
+
+
+<div class="dropdown dropdown-download-buttons">
+  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Download this page">
+    <i class="fas fa-download"></i>
+  </button>
+  <ul class="dropdown-menu">
+      
+      
+      
+      <li><a href="../../_sources/docs/review/VideoLDM.md" target="_blank"
+   class="btn btn-sm btn-download-source-button dropdown-item"
+   title="Download source file"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-file"></i>
+  </span>
+<span class="btn__text-container">.md</span>
+</a>
+</li>
+      
+      
+      
+      
+      <li>
+<button onclick="window.print()"
+  class="btn btn-sm btn-download-pdf-button dropdown-item"
+  title="Print to PDF"
+  data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-file-pdf"></i>
+  </span>
+<span class="btn__text-container">.pdf</span>
+</button>
+</li>
+      
+  </ul>
+</div>
+
+
+
+
+<button onclick="toggleFullScreen()"
+  class="btn btn-sm btn-fullscreen-button"
+  title="Fullscreen mode"
+  data-bs-placement="bottom" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-expand"></i>
+  </span>
+
+</button>
+
+
+
+<script>
+document.write(`
+  <button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light"></i>
+    <i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark"></i>
+    <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"></i>
+  </button>
+`);
+</script>
+
+
+<script>
+document.write(`
+  <button class="btn btn-sm pst-navbar-icon search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <i class="fa-solid fa-magnifying-glass fa-lg"></i>
+  </button>
+`);
+</script>
+<label class="sidebar-toggle secondary-toggle btn btn-sm" for="__secondary"title="Toggle secondary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <span class="fa-solid fa-list"></span>
+</label>
+</div></div>
+      
+    </div>
+  
+</div>
+</div>
+              
+              
+
+<div id="jb-print-docs-body" class="onlyprint">
+    <h1>VideoLDM</h1>
+    <!-- Table of contents -->
+    <div id="print-main-content">
+        <div id="jb-print-toc">
+            
+            <div>
+                <h2> Contents </h2>
+            </div>
+            <nav aria-label="Page">
+                <ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#abstract">Abstract</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#latent-video-diffusion-models">3. Latent Video Diffusion Models</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#turning-latent-image-into-video-generators">3.1. Turning Latent Image into Video Generators</a><ul class="nav section-nav flex-column">
+<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#temporal-autoencoder-finetuning">3.1.1 Temporal Autoencoder Finetuning</a></li>
+</ul>
+</li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#prediction-models-for-long-term-generation">3.2. Prediction Models for Long-Term Generation</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#temporal-interpolation-for-high-frame-rates">3.3. Temporal Interpolation for High Frame Rates</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#temporal-fine-tuning-of-sr-models">3.4. Temporal Fine-tuning of SR Models</a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#experiments">4. Experiments</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#high-resolution-driving-video-synthesis">4.1. High-Resolution Driving Video Synthesis</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#text-to-video-with-stable-diffusion">4.2. Text-to-Video with Stable Diffusion</a><ul class="nav section-nav flex-column">
+<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#personalized-text-to-video-with-dreambooth">4.2.1 Personalized Text-to-Video with Dreambooth</a></li>
+</ul>
+</li>
+</ul>
+</li>
+</ul>
+            </nav>
+        </div>
+    </div>
+</div>
+
+              
+                
+<div id="searchbox"></div>
+                <article class="bd-article">
+                  
+  <div class="admonition-information admonition">
+<p class="admonition-title">Information</p>
+<ul class="simple">
+<li><p><strong>Title:</strong> Align your Latents: High-Resolution Video Synthesis with Latent Diffusion Models</p></li>
+<li><p><strong>Reference</strong></p>
+<ul>
+<li><p>Paper:  <a class="reference external" href="https://arxiv.org/abs/2304.08818">https://arxiv.org/abs/2304.08818</a></p></li>
+</ul>
+</li>
+<li><p><strong>Author:</strong> Jun-Hyoung Lee</p></li>
+<li><p><strong>Last updated on Nov. 30. 2023</strong></p></li>
+</ul>
+</div>
+<section class="tex2jax_ignore mathjax_ignore" id="videoldm">
+<h1>VideoLDM<a class="headerlink" href="#videoldm" title="Permalink to this heading">#</a></h1>
+<figure class="align-default" id="id1">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/figure11.png"><img alt="figure1" class="bg-primary mb-1" src="../../_images/figure11.png" style="width: 600px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 602 </span><span class="caption-text">Video LDM samples</span><a class="headerlink" href="#id1" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<section id="abstract">
+<h2>Abstract<a class="headerlink" href="#abstract" title="Permalink to this heading">#</a></h2>
+<ul>
+<li><p>Latent Diffusion Models (LDMs)는 computing resource 를 줄이기 위해 낮은 차원의 latent space 로 압축하여 high quality 의 image synthesis 를 가능하게 했다.</p></li>
+<li><p>비디오 생성 모델링의 퀄리티 부족하며,</p>
+<ul class="simple">
+<li><p>이유가 학습에 필요한 computing cost 가 많이 발생, 데이터 셋 부족하다.</p></li>
+</ul>
+</li>
+<li><p>제안</p>
+<figure class="align-default" id="id2">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/figure2.png"><img alt="figure2" class="bg-primary mb-1" src="../../_images/figure2.png" style="width: 600px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 603 </span><span class="caption-text">Temproal Video finetuning</span><a class="headerlink" href="#id2" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p>기존에는 가우시안 노이즈의 랜덤한 샘플들 끼리의 denoising 결과 다른 이미지를 생성했다.</p></li>
+<li><p>Temporal Video finetuning 을 거치게 되면 비디오 시퀀스의 형태(시간축에 정렬된 이미지)로 생성할 수 있다.</p></li>
+</ul>
+</li>
+<li><p>VideoLDM 은 기존 LDM 방법에 고해상도의 비디오 생성을 적용했다.</p>
+<ol class="arabic simple">
+<li><p>대규모 이미지 데이터 셋을 활용해 LDM 을 pre-train 했고, (only image)</p>
+<ul class="simple">
+<li><p>pre-trained image LDMs 를 활용 가능하다.</p>
+<ul>
+<li><p>temporal modeling 만 학습한다.(기존 이미지 LDM은 freeze)</p></li>
+<li><p>1280x2048 해상도 까지 가능하다.</p></li>
+</ul>
+</li>
+</ul>
+</li>
+<li><p>그 후, 이미지 generator 를 비디오 generator 로 전환한다.</p>
+<ul class="simple">
+<li><p>latent space diffusion model 에 temporal(시간적) 차원을 적용한다.</p></li>
+<li><p>이미지 시퀀스(비디오)를 인코딩해 파인 튜닝 진행한다.</p></li>
+</ul>
+</li>
+<li><p>diffusion model upsampler 를 시간적으로 정렬하여 일관적인 비디오 super resolution model 로 변환한다.</p></li>
+</ol>
+</li>
+<li><p>Applied task</p>
+<ul class="simple">
+<li><p>자율 주행의 시뮬레이션 엔진 (512x1024 해상도로 실제로 평가 진행해 sota 달성)</p></li>
+<li><p>creative content creation (using text-to-video)</p></li>
+</ul>
+</li>
+</ul>
+</section>
+<section id="latent-video-diffusion-models">
+<h2>3. Latent Video Diffusion Models<a class="headerlink" href="#latent-video-diffusion-models" title="Permalink to this heading">#</a></h2>
+<ul class="simple">
+<li><p>비디오 데이터 셋: <span class="math notranslate nohighlight">\(x ∈ R^{T×3×\tilde H×\tilde W}\)</span> 로 표현</p>
+<ul>
+<li><p><span class="math notranslate nohighlight">\(T\)</span>: frame 수, <span class="math notranslate nohighlight">\(\tilde H, \tilde W\)</span>: 높이, 너비</p></li>
+</ul>
+</li>
+</ul>
+<section id="turning-latent-image-into-video-generators">
+<h3>3.1. Turning Latent Image into Video Generators<a class="headerlink" href="#turning-latent-image-into-video-generators" title="Permalink to this heading">#</a></h3>
+<ul>
+<li><p>잘 학습된 image LDM 을 활용하는 것이 주요한 key point.</p>
+<ul class="simple">
+<li><p>문제점</p>
+<ul>
+<li><p>image LDM 은 개별의 프레임에 대한 high quality 이미지를 생성할 수 있고,
+→ 시간적인 정보는 포함하고 있지 않다.</p></li>
+<li><p>따라서 이를 연속적인 프레임으로 렌더링해 사용할 수 없다.</p></li>
+</ul>
+</li>
+</ul>
+</li>
+<li><p><span class="math notranslate nohighlight">\(l_\phi ^i\)</span> 로 표현하는 temporal neural network 를 추가했다.</p>
+<ul class="simple">
+<li><p>이는 이미지 LDM 의 공간적인 정보에 연관되며, 시간적으로 일관된 방식으로 개별 프레임을 정렬할 수 있도록 한다.</p>
+<ul>
+<li><p>비디오를 인식할 수 있는 backbone 을 정의한다.</p></li>
+</ul>
+</li>
+</ul>
+<figure class="align-default" id="id3">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/figure41.png"><img alt="figure4" class="bg-primary mb-1" src="../../_images/figure41.png" style="width: 600px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 604 </span><span class="caption-text">Video-Aware Temporal Backbone</span><a class="headerlink" href="#id3" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<figure class="align-default" id="id4">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/einops.png"><img alt="einops" class="bg-primary mb-1" src="../../_images/einops.png" style="width: 600px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 605 </span><span class="caption-text">Einops notation</span><a class="headerlink" href="#id4" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p>einops 로 구현했으며, spatial layer 에서는 비디오(배치x시간) 정보가 함께 인코딩이 되며,</p>
+<ul>
+<li><p>temporal layer 에서는 이를 rearrange 를 통해 배치, 시간 정보를 나눠 시간 차원에서 인코딩이 진행된다.</p>
+<ul>
+<li><p>(option) 이때 text prompt 가 conditioning 이 될 수 있다.</p></li>
+<li><p>(i) temporal attention (ii) 3D conv 로 구성된다.</p>
+<ul>
+<li><ul>
+<li><p>Sinusoidal embedding 을 사용해 시간에 대한 위치 인코딩 활용했다.</p></li>
+</ul>
+</li>
+</ul>
+</li>
+</ul>
+</li>
+<li><p>temporal layer 을 거친 후, spatial layer 의 output 과 가중합을 통해 정보가 결합된다.</p></li>
+</ul>
+</li>
+</ul>
+</li>
+</ul>
+<section id="temporal-autoencoder-finetuning">
+<h4>3.1.1 Temporal Autoencoder Finetuning<a class="headerlink" href="#temporal-autoencoder-finetuning" title="Permalink to this heading">#</a></h4>
+<ul>
+<li><p>Image LDM 을 사용하면 시퀀스로 생성할 때 flickering이 발생하는 문제가 있다.</p>
+<ul>
+<li><p>이를 해결하기 위해, autoencoder 의 decoder 에서 temporal 한 layer 를 추가한다.</p></li>
+<li><p>이는 3D conv 로 구축된 patch-wise temporal discriminator 도 추가해 비디오 데이터를 fine tuning 한다.</p>
+<figure class="align-default" id="id5">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/figure3.png"><img alt="figure3" class="bg-primary mb-1" src="../../_images/figure3.png" style="width: 600px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 606 </span><span class="caption-text">Temporal Autoencoder Finetuning</span><a class="headerlink" href="#id5" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</li>
+</ul>
+</li>
+<li><p>인코딩된 비디오 프레임의 latent space 내에서 image DM 을 사용할 수 있도록 인코더는 학습이 되지 않는다.</p></li>
+</ul>
+</section>
+</section>
+<section id="prediction-models-for-long-term-generation">
+<h3>3.2. Prediction Models for Long-Term Generation<a class="headerlink" href="#prediction-models-for-long-term-generation" title="Permalink to this heading">#</a></h3>
+<ul class="simple">
+<li><p>그럼에도 불구하고, 긴 동영상은 생성하지 못하는 한계가 있다.</p></li>
+<li><p>따라서 전체 <span class="math notranslate nohighlight">\(T\)</span> 프레임에서 마스킹된 <span class="math notranslate nohighlight">\(S\)</span> 프레임으로 구성해 모델이 예측하게끔 학습을 한다.</p>
+<ul>
+<li><p>이러한 프레임들은 LDM 의 인코더를 통해 채널 차원에 concat 되며, temporal layer 에 입력된다.</p></li>
+</ul>
+</li>
+<li><p>inference 에서는 반복적인 샘플링 과정을 통해 긴 영상을 생성할 수 있게 했다.</p>
+<ul>
+<li><p>최신 prediction 을 재 사용해 새로운 context 를 생성했다.</p></li>
+<li><p>classifier-free guidance 를 도입해 마스킹된 프레임 수를 0, 1, 2 개를 사용해 학습.</p></li>
+</ul>
+</li>
+</ul>
+</section>
+<section id="temporal-interpolation-for-high-frame-rates">
+<h3>3.3. Temporal Interpolation for High Frame Rates<a class="headerlink" href="#temporal-interpolation-for-high-frame-rates" title="Permalink to this heading">#</a></h3>
+<figure class="align-default" id="id6">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/interpolation.png"><img alt="interpolation" class="bg-primary mb-1" src="../../_images/interpolation.png" style="width: 600px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 607 </span><span class="caption-text">Temporal Interpolation</span><a class="headerlink" href="#id6" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p>High resolution video 란 해상도 뿐만 아니라 높은 frame rate 를 가지고 있어야 한다.</p></li>
+<li><p>이를 위해 두 가지 과정으로 진행한다.</p>
+<ol class="arabic simple">
+<li><p>semantic 한 큰 변화가 있는 키 프레임을 생성한다.</p>
+<ul>
+<li><p>메모리 제약으로 인해 low frame rate 로 생성할 수 있다.</p></li>
+</ul>
+</li>
+<li><p>키 프레임을 활용한 interpolate 진행한다.</p>
+<ul>
+<li><p>interpolate 할 프레임을 masking 을 씌운다.</p>
+<ul>
+<li><p>두 개의 키 프레임에 대해 세 개의 프레임을 예측하는 것으로 T → 4T interpolation model 을 학습해 사용했다.</p></li>
+<li><p>높은 frame rate 를 위해 16T 까지 interpolation 모델 구축.</p></li>
+</ul>
+</li>
+</ul>
+</li>
+</ol>
+</li>
+</ul>
+</section>
+<section id="temporal-fine-tuning-of-sr-models">
+<h3>3.4. Temporal Fine-tuning of SR Models<a class="headerlink" href="#temporal-fine-tuning-of-sr-models" title="Permalink to this heading">#</a></h3>
+<ul>
+<li><p>megapixel 의 해상도까지 생성하는 것이 목표이다.</p>
+<ul>
+<li><p>cascaded DMs 에 영감받아 4배 해상도를 키웠다.</p>
+<figure class="align-default" id="id7">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/cascaded_dms.png"><img alt="cascaded_dms" class="bg-primary mb-1" src="../../_images/cascaded_dms.png" style="width: 600px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 608 </span><span class="caption-text">Cascaded DM</span><a class="headerlink" href="#id7" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</li>
+<li><p>noise augmentation(with noise level conditioning) 으로 super resolution 모델 학습했다.</p></li>
+</ul>
+</li>
+<li><p>또한 consistency 한 SR 모델을 구축하기 위해 spatial / temporal layer를 추가했다.</p>
+<ul class="simple">
+<li><p>저해상도 시퀀스 길이 <span class="math notranslate nohighlight">\(T\)</span> 를 concat 하여 conditioning</p></li>
+<li><p>locally 하게 patch 단위로 연산하고, 후에 convolution 을 진행한다.</p></li>
+</ul>
+</li>
+<li><p>computing resource</p>
+<ul class="simple">
+<li><p>VideoLDM 에서의 main LDM 을 효율적으로 연산을 하기 위해 latent space 에서 모든 비디오 모델링이 수행된다.</p>
+<ul>
+<li><p>그로 인해, 높은 배치 사이즈 + 긴 영상 생성 가능하다.</p></li>
+</ul>
+</li>
+<li><p>upsampler 는 패치 단위로 진행하기에 computing resource 를 줄일 수 있다.</p></li>
+</ul>
+</li>
+</ul>
+</section>
+</section>
+<section id="experiments">
+<h2>4. Experiments<a class="headerlink" href="#experiments" title="Permalink to this heading">#</a></h2>
+<ul class="simple">
+<li><p>Dataset</p>
+<ul>
+<li><p>RDS(real driving scene): 683,060 개, 8초(30 fps), 512×1024, day/night, “crowdedness”</p></li>
+<li><p>WebVid-10M: 10.7M video-caption pairs, 52K video hours, resized 320×512</p></li>
+</ul>
+</li>
+<li><p>Evaluation metric</p>
+<ul>
+<li><p>FVD + human evaluation</p></li>
+<li><p>CLIP similarity (CLIP- SIM) + IS</p></li>
+</ul>
+</li>
+</ul>
+<section id="high-resolution-driving-video-synthesis">
+<h3>4.1. High-Resolution Driving Video Synthesis<a class="headerlink" href="#high-resolution-driving-video-synthesis" title="Permalink to this heading">#</a></h3>
+<figure class="align-default" id="id8">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/figure71.png"><img alt="figure7" class="bg-primary mb-1" src="../../_images/figure71.png" style="width: 600px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 609 </span><span class="caption-text">Real-World Driving Scenes with Video LDM</span><a class="headerlink" href="#id8" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</section>
+<section id="text-to-video-with-stable-diffusion">
+<h3>4.2. Text-to-Video with Stable Diffusion<a class="headerlink" href="#text-to-video-with-stable-diffusion" title="Permalink to this heading">#</a></h3>
+<ul>
+<li><p>WebVid-10M 데이터셋(resized 320×512)으로 Stable Diffusion 의 spatial layer 에 대해 학습했고,</p>
+<ul>
+<li><p>text-conditioning 을 적용한 temporal layer 를 추가해 학습 진행했다.</p></li>
+<li><p>그 후 upscaler 를 학습해 4배 upscale 해 1280×2048 해상도로 비디오 생성 가능해졌다.</p>
+<ul class="simple">
+<li><p>113 frames: 24fps 4.7초 or 30fps 3.8초</p></li>
+</ul>
+<figure class="align-default" id="id9">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/figure61.png"><img alt="figure6" class="bg-primary mb-1" src="../../_images/figure61.png" style="width: 600px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 610 </span><span class="caption-text">Text-to-Video with Stable Diffusion</span><a class="headerlink" href="#id9" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</li>
+<li><p>다양성이 적은 Real video 로 제한적인 데이터로 학습했지만, 기존 Stable Diffusion 의 생성 능력을 가져와 artistic 한 생성이 가능하다.</p></li>
+<li><p>performance</p>
+<figure class="align-default" id="id10">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/table4_5.png"><img alt="table4_5" class="bg-primary mb-1" src="../../_images/table4_5.png" style="width: 600px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 611 </span><span class="caption-text">Performance Table</span><a class="headerlink" href="#id10" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p>Make-A-Video 의 경우 VideoLDM 보다 더 많은 데이터 셋과 text-to-video를 entirely하게 학습했다.</p></li>
+</ul>
+</li>
+</ul>
+</li>
+</ul>
+<section id="personalized-text-to-video-with-dreambooth">
+<h4>4.2.1 Personalized Text-to-Video with Dreambooth<a class="headerlink" href="#personalized-text-to-video-with-dreambooth" title="Permalink to this heading">#</a></h4>
+<figure class="align-default" id="id11">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/figure8.png"><img alt="figure8" class="bg-primary mb-1" src="../../_images/figure8.png" style="width: 600px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 612 </span><span class="caption-text">Text-to-Video with DreamBooth</span><a class="headerlink" href="#id11" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p>위쪽의 VideoLDM 을 활용한 결과가 consistency 한 결과를 가져왔다.</p></li>
+</ul>
+</section>
+</section>
+</section>
+</section>
+
+    <script type="text/x-thebe-config">
+    {
+        requestKernel: true,
+        binderOptions: {
+            repo: "binder-examples/jupyter-stacks-datascience",
+            ref: "master",
+        },
+        codeMirrorConfig: {
+            theme: "abcdef",
+            mode: "python"
+        },
+        kernelOptions: {
+            name: "python3",
+            path: "./docs\review"
+        },
+        predefinedOutput: true
+    }
+    </script>
+    <script>kernelName = 'python3'</script>
+
+                </article>
+              
+
+              
+              
+              
+              
+                <footer class="prev-next-footer d-print-none">
+                  
+<div class="prev-next-area">
+    <a class="left-prev"
+       href="Make_A_Video.html"
+       title="previous page">
+      <i class="fa-solid fa-angle-left"></i>
+      <div class="prev-next-info">
+        <p class="prev-next-subtitle">previous</p>
+        <p class="prev-next-title">Make A Video</p>
+      </div>
+    </a>
+    <a class="right-next"
+       href="AnimateDiff.html"
+       title="next page">
+      <div class="prev-next-info">
+        <p class="prev-next-subtitle">next</p>
+        <p class="prev-next-title">AnimateDiff</p>
+      </div>
+      <i class="fa-solid fa-angle-right"></i>
+    </a>
+</div>
+                </footer>
+              
+            </div>
+            
+            
+              
+                <div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
+
+
+  <div class="sidebar-secondary-item">
+  <div class="page-toc tocsection onthispage">
+    <i class="fa-solid fa-list"></i> Contents
+  </div>
+  <nav class="bd-toc-nav page-toc">
+    <ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#abstract">Abstract</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#latent-video-diffusion-models">3. Latent Video Diffusion Models</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#turning-latent-image-into-video-generators">3.1. Turning Latent Image into Video Generators</a><ul class="nav section-nav flex-column">
+<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#temporal-autoencoder-finetuning">3.1.1 Temporal Autoencoder Finetuning</a></li>
+</ul>
+</li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#prediction-models-for-long-term-generation">3.2. Prediction Models for Long-Term Generation</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#temporal-interpolation-for-high-frame-rates">3.3. Temporal Interpolation for High Frame Rates</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#temporal-fine-tuning-of-sr-models">3.4. Temporal Fine-tuning of SR Models</a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#experiments">4. Experiments</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#high-resolution-driving-video-synthesis">4.1. High-Resolution Driving Video Synthesis</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#text-to-video-with-stable-diffusion">4.2. Text-to-Video with Stable Diffusion</a><ul class="nav section-nav flex-column">
+<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#personalized-text-to-video-with-dreambooth">4.2.1 Personalized Text-to-Video with Dreambooth</a></li>
+</ul>
+</li>
+</ul>
+</li>
+</ul>
+  </nav></div>
+
+</div></div>
+              
+            
+          </div>
+          <footer class="bd-footer-content">
+            
+<div class="bd-footer-content__inner container">
+  
+  <div class="footer-item">
+    
+<p class="component-author">
+By PseudoLab
+</p>
+
+  </div>
+  
+  <div class="footer-item">
+    
+
+  <p class="copyright">
+    
+      © Copyright 2022.
+      <br/>
+    
+  </p>
+
+  </div>
+  
+  <div class="footer-item">
+    
+  </div>
+  
+  <div class="footer-item">
+    
+  </div>
+  
+</div>
+          </footer>
+        
+
+      </main>
+    </div>
+  </div>
+  
+  <!-- Scripts loaded after <body> so the DOM is not blocked -->
+  <script src="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b"></script>
+<script src="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b"></script>
+
+  <footer class="bd-footer">
+  </footer>
+  </body>
 </html>
\ No newline at end of file
diff --git a/docs/review/Your_Diffusion_Model_is_Secretly_a_Zero_Shot_Classifier.html b/docs/review/Your_Diffusion_Model_is_Secretly_a_Zero_Shot_Classifier.html
old mode 100644
new mode 100755
index c3141ded..325e5b4e
--- a/docs/review/Your_Diffusion_Model_is_Secretly_a_Zero_Shot_Classifier.html
+++ b/docs/review/Your_Diffusion_Model_is_Secretly_a_Zero_Shot_Classifier.html
@@ -1,949 +1,969 @@
-
-
-<!DOCTYPE html>
-
-
-<html lang="en" data-content_root="" >
-
-  <head>
-    <meta charset="utf-8" />
-    <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.18.1: http://docutils.sourceforge.net/" />
-
-    <title>Your Diffusion Model is Secretly a Zero-Shot Classifier &#8212; Text-to-Image Generation-feat-Diffusion</title>
-  
-  
-  
-  <script data-cfasync="false">
-    document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
-    document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
-  </script>
-  
-  <!-- Loaded before other Sphinx assets -->
-  <link href="../../_static/styles/theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-<link href="../../_static/styles/bootstrap.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-<link href="../../_static/styles/pydata-sphinx-theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-
-  
-  <link href="../../_static/vendor/fontawesome/6.5.2/css/all.min.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-  <link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.woff2" />
-<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.woff2" />
-<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.woff2" />
-
-    <link rel="stylesheet" type="text/css" href="../../_static/pygments.css" />
-    <link rel="stylesheet" href="../../_static/styles/sphinx-book-theme.css?digest=14f4ca6b54d191a8c7657f6c759bf11a5fb86285" type="text/css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/togglebutton.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/mystnb.4510f1fc1dee50b3e5859aac5469c37c29e427902b24a333a5f9fcb2f0b3ac41.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/sphinx-thebe.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/design-style.4045f2051d55cab465a707391d5b2007.min.css" />
-  
-  <!-- Pre-loaded scripts that we'll load fully later -->
-  <link rel="preload" as="script" href="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b" />
-<link rel="preload" as="script" href="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b" />
-  <script src="../../_static/vendor/fontawesome/6.5.2/js/all.min.js?digest=dfe6caa3a7d634c4db9b"></script>
-
-    <script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
-    <script src="../../_static/jquery.js"></script>
-    <script src="../../_static/underscore.js"></script>
-    <script src="../../_static/_sphinx_javascript_frameworks_compat.js"></script>
-    <script src="../../_static/doctools.js"></script>
-    <script src="../../_static/clipboard.min.js"></script>
-    <script src="../../_static/copybutton.js"></script>
-    <script src="../../_static/scripts/sphinx-book-theme.js?digest=5a5c038af52cf7bc1a1ec88eea08e6366ee68824"></script>
-    <script>let toggleHintShow = 'Click to show';</script>
-    <script>let toggleHintHide = 'Click to hide';</script>
-    <script>let toggleOpenOnPrint = 'true';</script>
-    <script src="../../_static/togglebutton.js"></script>
-    <script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
-    <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
-    <script src="../../_static/design-tabs.js"></script>
-    <script>const THEBE_JS_URL = "https://unpkg.com/thebe@0.8.2/lib/index.js"
-const thebe_selector = ".thebe,.cell"
-const thebe_selector_input = "pre"
-const thebe_selector_output = ".output, .cell_output"
-</script>
-    <script async="async" src="../../_static/sphinx-thebe.js"></script>
-    <script>window.MathJax = {"options": {"processHtmlClass": "tex2jax_process|mathjax_process|math|output_area"}}</script>
-    <script defer="defer" src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
-    <script>DOCUMENTATION_OPTIONS.pagename = 'docs/review/Your_Diffusion_Model_is_Secretly_a_Zero_Shot_Classifier';</script>
-    <link rel="shortcut icon" href="../../_static/PseudoLab_logo.png"/>
-    <link rel="index" title="Index" href="../../genindex.html" />
-    <link rel="search" title="Search" href="../../search.html" />
-    <link rel="next" title="Progressive Distillation for Fast Sampling of Diffusion Models" href="progressive_distillation.html" />
-    <link rel="prev" title="BBDM" href="BBDM.html" />
-  <meta name="viewport" content="width=device-width, initial-scale=1"/>
-  <meta name="docsearch:language" content="en"/>
-  </head>
-  
-  
-  <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
-
-  
-  
-  <div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
-  
-  <div id="pst-scroll-pixel-helper"></div>
-  
-  <button type="button" class="btn rounded-pill" id="pst-back-to-top">
-    <i class="fa-solid fa-arrow-up"></i>Back to top</button>
-
-  
-  <input type="checkbox"
-          class="sidebar-toggle"
-          id="pst-primary-sidebar-checkbox"/>
-  <label class="overlay overlay-primary" for="pst-primary-sidebar-checkbox"></label>
-  
-  <input type="checkbox"
-          class="sidebar-toggle"
-          id="pst-secondary-sidebar-checkbox"/>
-  <label class="overlay overlay-secondary" for="pst-secondary-sidebar-checkbox"></label>
-  
-  <div class="search-button__wrapper">
-    <div class="search-button__overlay"></div>
-    <div class="search-button__search-container">
-<form class="bd-search d-flex align-items-center"
-      action="../../search.html"
-      method="get">
-  <i class="fa-solid fa-magnifying-glass"></i>
-  <input type="search"
-         class="form-control"
-         name="q"
-         id="search-input"
-         placeholder="Search this book..."
-         aria-label="Search this book..."
-         autocomplete="off"
-         autocorrect="off"
-         autocapitalize="off"
-         spellcheck="false"/>
-  <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
-</form></div>
-  </div>
-
-  <div class="pst-async-banner-revealer d-none">
-  <aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
-</div>
-
-  
-    <header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
-    </header>
-  
-
-  <div class="bd-container">
-    <div class="bd-container__inner bd-page-width">
-      
-      
-      
-      <div class="bd-sidebar-primary bd-sidebar">
-        
-
-  
-  <div class="sidebar-header-items sidebar-primary__section">
-    
-    
-    
-    
-  </div>
-  
-    <div class="sidebar-primary-items__start sidebar-primary__section">
-        <div class="sidebar-primary-item">
-
-  
-    
-  
-
-<a class="navbar-brand logo" href="../../intro.html">
-  
-  
-  
-  
-  
-    
-    
-      
-    
-    
-    <img src="../../_static/PseudoLab_logo.png" class="logo__image only-light" alt="Text-to-Image Generation-feat-Diffusion - Home"/>
-    <script>document.write(`<img src="../../_static/PseudoLab_logo.png" class="logo__image only-dark" alt="Text-to-Image Generation-feat-Diffusion - Home"/>`);</script>
-  
-  
-</a></div>
-        <div class="sidebar-primary-item"><nav class="bd-links" id="bd-docs-nav" aria-label="Main">
-    <div class="bd-toc-item navbar-nav active">
-        
-        <ul class="nav bd-sidenav bd-sidenav__home-link">
-            <li class="toctree-l1">
-                <a class="reference internal" href="../../intro.html">
-                    Welcome to PseudoDiffusers!!
-                </a>
-            </li>
-        </ul>
-        <p aria-level="2" class="caption" role="heading"><span class="caption-text">Preliminary Works</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="vae.html">VAE</a></li>
-<li class="toctree-l1"><a class="reference internal" href="gan.html">GAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DDPM.html">DDPM</a></li>
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="DDIM.html">DDIM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="A_Study_on_the_Evaluation_of_Generative_Models.html">A Study on the Evaluation of Generative Models</a></li>
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Image Generation</span></p>
-<ul class="current nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="cycleGAN.html">CycleGAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="StyleGAN.html">StyleGAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="diffusion_beats_GANs.html">Diffusion Models Beat GANs on Image Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="dalle.html">DALL-E</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DALLE2.html">DALL-E 2</a></li>
-<li class="toctree-l1"><a class="reference internal" href="dreambooth.html">DreamBooth</a></li>
-<li class="toctree-l1"><a class="reference internal" href="ControlNet.html">ControlNet</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Latent_Diffusion_Model.html">Introduction</a></li>
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="Textual_Inversion.html">Textual Inversion</a></li>
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="CustomDiffusion.html">Custom Diffusion</a></li>
-<li class="toctree-l1"><a class="reference internal" href="LoRA.html">LoRA</a></li>
-
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="I-DDPM.html">I-DDPM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="StyO.html">StyO</a></li>
-<li class="toctree-l1"><a class="reference internal" href="imagen.html">Imagen</a></li>
-<li class="toctree-l1"><a class="reference internal" href="imagen_editor.html">Imagen Editor</a></li>
-<li class="toctree-l1"><a class="reference internal" href="SDEdit.html">SDEdit</a></li>
-<li class="toctree-l1"><a class="reference internal" href="SDXL.html">SDXL</a></li>
-<li class="toctree-l1"><a class="reference internal" href="t2i_adapter.html">T2I-Adapter</a></li>
-<li class="toctree-l1"><a class="reference internal" href="IP_Adapter.html">IP-Adapter</a></li>
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="HyperDreamBooth.html">HyperDreamBooth</a></li>
-<li class="toctree-l1"><a class="reference internal" href="CM3leon.html">CM3leon</a></li>
-
-<li class="toctree-l1"><a class="reference internal" href="Synthetic_Data_from_Diffusion_Models_Improves_ImageNet_Classification.html">Synthetic Data from Diffusion Models Improves ImageNet Classification</a></li>
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="GLIDE.html">GLIDE</a></li>
-<li class="toctree-l1"><a class="reference internal" href="BBDM.html">BBDM</a></li>
-<li class="toctree-l1 current active"><a class="current reference internal" href="#">Your Diffusion Model is Secretly a Zero-Shot Classifier</a></li>
-<li class="toctree-l1"><a class="reference internal" href="progressive_distillation.html">Progressive Distillation for Fast Sampling of Diffusion Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="ConceptLab.html">ConceptLab</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Diffusion_models_already_have_a_Semantic_Latent_Space.html">Diffusion Models already have a Semantic Latent Space</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Muse.html">Muse</a></li>
-
-
-<li class="toctree-l1"><a class="reference internal" href="GIGAGAN.html">Scaling up GANs for Text-to-Image Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="consistency_models.html">Consistency Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="latent_consistency_models.html">Latent Consistency Models</a></li>
-
-<li class="toctree-l1"><a class="reference internal" href="LLM_grounded_Diffusion.html">LLM Grounded Diffusion</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DiT.html">DiT</a></li>
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="one-step-image-translation.html">One-Step Image Translation with Text-to-Image Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="LCM-LoRA.html">LCM-LoRA: A Universal Stable-Diffusion Acceleration Module</a></li>
-
-
-<li class="toctree-l1"><a class="reference internal" href="MimicBrush.html">MimicBrush: Zero-shot Image Editing with Reference Imitation</a></li>
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Video Generation</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="Make_A_Video.html">Make A Video</a></li>
-<li class="toctree-l1"><a class="reference internal" href="VideoLDM.html">VideoLDM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="AnimateDiff.html">AnimateDiff</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Animate_Anyone.html">Animate Anyone</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreaMoving.html">DreaMoving</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreamPose.html">DreamPose: Fashion Image-to-Video Synthesis via Stable Diffusion</a></li>
-
-
-
-
-
-
-
-
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">3D Generation</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="NeRF.html">NeRF : Representing Scenes as Neural Radiance Fields for View Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="3DGS.html">3D Gaussian Splatting for Real-Time Radiance Field Rendering</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Point_E.html">Point-E: A System for Generating 3D Point Clouds from Complex Prompts (Arxiv 2022)</a></li>
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="Shap-E.html">Shap-E</a></li>
-
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="DreamFusion.html"><strong>DreamFusion</strong></a></li>
-<li class="toctree-l1"><a class="reference internal" href="magic-3d.html">Magic3D</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreamBooth3D.html">Dream Booth 3D</a></li>
-
-
-
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Experiments</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="../experiments/js_exp.html">Synthetic Data with Stable Diffusion for Foliar Disease Classification</a></li>
-<li class="toctree-l1"><a class="reference internal" href="../experiments/swjo_exp.html">Training DreamBooth on Naver Webtoon Face Dataset</a></li>
-</ul>
-
-    </div>
-</nav></div>
-    </div>
-  
-  
-  <div class="sidebar-primary-items__end sidebar-primary__section">
-  </div>
-  
-  <div id="rtd-footer-container"></div>
-
-
-      </div>
-      
-      <main id="main-content" class="bd-main" role="main">
-        
-        
-
-<div class="sbt-scroll-pixel-helper"></div>
-
-          <div class="bd-content">
-            <div class="bd-article-container">
-              
-              <div class="bd-header-article d-print-none">
-<div class="header-article-items header-article__inner">
-  
-    <div class="header-article-items__start">
-      
-        <div class="header-article-item"><label class="sidebar-toggle primary-toggle btn btn-sm" for="__primary" title="Toggle primary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
-  <span class="fa-solid fa-bars"></span>
-</label></div>
-      
-    </div>
-  
-  
-    <div class="header-article-items__end">
-      
-        <div class="header-article-item">
-
-<div class="article-header-buttons">
-
-
-
-
-
-<div class="dropdown dropdown-source-buttons">
-  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Source repositories">
-    <i class="fab fa-github"></i>
-  </button>
-  <ul class="dropdown-menu">
-      
-      
-      
-      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion" target="_blank"
-   class="btn btn-sm btn-source-repository-button dropdown-item"
-   title="Source repository"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fab fa-github"></i>
-  </span>
-<span class="btn__text-container">Repository</span>
-</a>
-</li>
-      
-      
-      
-      
-      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion/issues/new?title=Issue%20on%20page%20%2Fdocs/review/Your_Diffusion_Model_is_Secretly_a_Zero_Shot_Classifier.html&body=Your%20issue%20content%20here." target="_blank"
-   class="btn btn-sm btn-source-issues-button dropdown-item"
-   title="Open an issue"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-lightbulb"></i>
-  </span>
-<span class="btn__text-container">Open issue</span>
-</a>
-</li>
-      
-  </ul>
-</div>
-
-
-
-
-
-
-<div class="dropdown dropdown-download-buttons">
-  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Download this page">
-    <i class="fas fa-download"></i>
-  </button>
-  <ul class="dropdown-menu">
-      
-      
-      
-      <li><a href="../../_sources/docs/review/Your_Diffusion_Model_is_Secretly_a_Zero_Shot_Classifier.md" target="_blank"
-   class="btn btn-sm btn-download-source-button dropdown-item"
-   title="Download source file"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-file"></i>
-  </span>
-<span class="btn__text-container">.md</span>
-</a>
-</li>
-      
-      
-      
-      
-      <li>
-<button onclick="window.print()"
-  class="btn btn-sm btn-download-pdf-button dropdown-item"
-  title="Print to PDF"
-  data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-file-pdf"></i>
-  </span>
-<span class="btn__text-container">.pdf</span>
-</button>
-</li>
-      
-  </ul>
-</div>
-
-
-
-
-<button onclick="toggleFullScreen()"
-  class="btn btn-sm btn-fullscreen-button"
-  title="Fullscreen mode"
-  data-bs-placement="bottom" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-expand"></i>
-  </span>
-
-</button>
-
-
-
-<script>
-document.write(`
-  <button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light"></i>
-    <i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark"></i>
-    <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"></i>
-  </button>
-`);
-</script>
-
-
-<script>
-document.write(`
-  <button class="btn btn-sm pst-navbar-icon search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <i class="fa-solid fa-magnifying-glass fa-lg"></i>
-  </button>
-`);
-</script>
-
-</div></div>
-      
-    </div>
-  
-</div>
-</div>
-              
-              
-
-<div id="jb-print-docs-body" class="onlyprint">
-    <h1>Your Diffusion Model is Secretly a Zero-Shot Classifier</h1>
-    <!-- Table of contents -->
-    <div id="print-main-content">
-        <div id="jb-print-toc">
-            
-        </div>
-    </div>
-</div>
-
-              
-                
-<div id="searchbox"></div>
-                <article class="bd-article">
-                  
-  <div class="admonition-information admonition">
-<p class="admonition-title">Information</p>
-<ul class="simple">
-<li><p><strong>Title:</strong> {Your Diffusion Model is Secretly a Zero-Shot Classifier}, {ICCV 2023}</p></li>
-<li><p><strong>Reference</strong></p>
-<ul>
-<li><p>Paper:  <a class="reference external" href="https://arxiv.org/pdf/2303.16203.pdf">https://arxiv.org/pdf/2303.16203.pdf</a></p></li>
-<li><p>Github io: <a class="reference external" href="https://diffusion-classifier.github.io/">https://diffusion-classifier.github.io/</a></p></li>
-<li><p>Code: <a class="github reference external" href="https://github.com/diffusion-classifier/diffusion-classifier">diffusion-classifier/diffusion-classifier</a></p></li>
-</ul>
-</li>
-<li><p><strong>Author:</strong> <a href="https://www.linkedin.com/in/seonhoonkim/">SeonHoon Kim</a></p></li>
-<li><p><strong>Edited by:</strong> <a href="https://www.linkedin.com/in/seonhoonkim/">SeonHoon Kim</a></p></li>
-<li><p><strong>Last updated on Nov. 09, 2023</strong></p></li>
-</ul>
-</div>
-<section class="tex2jax_ignore mathjax_ignore" id="your-diffusion-model-is-secretly-a-zero-shot-classifier">
-<h1>Your Diffusion Model is Secretly a Zero-Shot Classifier<a class="headerlink" href="#your-diffusion-model-is-secretly-a-zero-shot-classifier" title="Permalink to this heading">#</a></h1>
-<ul class="simple">
-<li><p><strong>핵심</strong></p>
-<ul>
-<li><p>학습된 <strong>Diffusion Models 에서 Classifier 를  추가 학습 없이 획득</strong>할 수 있다.</p>
-<ul>
-<li><p><strong>Stable Diffusion</strong> 같은 거대 모델로부터 <strong>Zero-shot classifier</strong> 를 얻을 수 있다.</p></li>
-<li><p><strong>Class-conditional Diffusion Models</strong> 에서는 <strong>일반적인 (non Zero-shot) classifier</strong> 를 얻을 수 있다.</p></li>
-</ul>
-</li>
-</ul>
-</li>
-<li><p><strong>결과 요약</strong></p>
-<ul>
-<li><p><strong>Classification 성능이 나쁘지 않았다.</strong></p></li>
-<li><p><strong>Zero-shot classifier 는 Multimodal Compositional reasoning ability 가 매우 훌륭</strong>했다.</p></li>
-<li><p>이렇게 Diffusion 모델에서 추출된 Classifiers 는 <strong>Distribution shift 에 대해 Robust</strong> 한 성능을 보여주었다.</p></li>
-</ul>
-</li>
-<li><p><strong>Classifier 구현 방법</strong></p></li>
-</ul>
-<figure class="align-default" id="id1">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_002.png"><img alt="img_00" class="bg-primary mb-1" src="../../_images/img_002.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 388 </span><span class="caption-text">Diffusion Classifier 아키텍쳐</span><a class="headerlink" href="#id1" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p><strong>예시로 먼저 살펴보기.</strong></p></li>
-<li><p>예를 들어, 어떤 동물 이미지 X 를 Stable Diffusion 으로 Classification 하고 싶다면..<br>
-1. 일단 해당 동물의 클래스를 포함하고 있을 만한 데이터셋을 구한다.<br>
-37개의 동물 클래스가 존재하는 Pets 데이터셋을 사용한다고 치자.<br>
-2. text prompts 로 “호랑이” 가 주어진 Stable Diffusion 으로, <br>
-X 의 Noised Image 에서 Reverse process 를 진행한다. 그럼 Loss 를 획득할 수 있을 것이다.<br>
-3. 37개의 모든 Pets Classes 에 대해서 이를 수행해서, <br>
-가장 Loss 가 작은 Class 를 판별한다. <br>
-이 Class 가 바로 이미지 X 의 클래스이다.</p></li>
-</ul>
-<figure class="align-default" id="id2">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_012.png"><img alt="img_01" class="bg-primary mb-1" src="../../_images/img_012.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 389 </span><span class="caption-text">Algorithm 1 : Diffusion Classifier 학습 알고리즘</span><a class="headerlink" href="#id2" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ol class="arabic simple">
-<li><p><code class="docutils literal notranslate"><span class="pre">n_samples</span></code> 에 지정된 수 만큼 t 와 noise 를 각각 샘플링해 벡터를 만든다.</p></li>
-<li><p>클래스 판별이 필요한 이미지 X 의 t-step Noised image 인 X_t 를 구한다.</p></li>
-<li><p>X_t 를 Diffusion Model 에 Input 으로 주어 Noise 를 출력한다.</p></li>
-<li><p><strong>loss</strong> 를 구한다.<br></p></li>
-</ol>
-<ul class="simple">
-<li><p>위 과정을, 여러 번 (<code class="docutils literal notranslate"><span class="pre">n_trials</span></code> 만큼) 시도해서 평균낼 수도 있다.</p></li>
-</ul>
-<ol class="arabic simple" start="5">
-<li><p>loss 가 가장 낮은 Class 를 찾을 때 까지, 가능한 모든 Class 에 대해 추론한다.</p></li>
-<li><p>최종 남은 Class 를 X 의 Class 라고 판정한다.</p></li>
-</ol>
-<ul class="simple">
-<li><p>Zero-shot classification 도 위와 동일한 과정으로 진행된다. <br>
-다만 추론할 Class list 가 필요하다.<br>
-- 예를 들어서, Stable Diffusion 의 Zero-shot classification 을 수행하기 위해서는, <br>
-(Stable Diffusion 이 학습하지는 않았지만) 37개의 클래스가 정의되어 있는 <br>
-Pets 와 같은 데이터셋으로 Classification 을 수행할 수 있다.</p></li>
-<li><p>하지만, Class 마다 n_samples 수 만큼 t 를 샘플링하고,<br>
-또 X_t 를 구하고,<br>
-Diffusion Model 로 노이즈를 추론하고,<br>
-loss 를 구하는 것은 Inference times 가 많이 소모됨.<br>
-따라서 다음의 방법을 활용해 inference times 을 줄인다.</p></li>
-</ul>
-<figure class="align-default" id="id3">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_022.png"><img alt="img_02" class="bg-primary mb-1" src="../../_images/img_022.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 390 </span><span class="caption-text">Algorithm 2. Efficient Diffusion Classifier Algorithm</span><a class="headerlink" href="#id3" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ol class="arabic simple">
-<li><p><strong>일단 작은 수의 n_samples 로 error 가 높은 class 들을 걸러낸다.</strong></p></li>
-<li><p><strong>소수의 class 만 남았다면, <br>
-이제는 정확한 추론을 위해서 더 큰 n_samples 를 설정해 추론한다. <br>
-(large n_samples 로 t 와 <span class="math notranslate nohighlight">\(\epsilon\)</span> 을 sampling 한다.)</strong></p></li>
-</ol>
-<ul class="simple">
-<li><p>c.f.</p></li>
-</ul>
-<div class="highlight-markdown notranslate"><div class="highlight"><pre><span></span><span class="gu">### Oxford-IIIT Pets</span>
-```bash
-python eval_prob_adaptive.py --dataset pets --split test --n_trials 1 \
-        --to_keep 5 1 --n_samples 25 250 --loss l1 \
-        --prompt_path prompts/pets_prompts.csv
-</pre></div>
-</div>
-<ul class="simple">
-<li><p>왜 이렇게까지 inference time 을 줄이려고 하지??<br>
-- 위의 스크립트 그대로 RTX 3090 에서 돌리면, <br>
-Pets 이미지 1장 Classification 하는데 18초 걸린다.<br>
-- ImageNet 은 Class 1,000 개 있는데, <br>
-512x512 이미지 1장 Classification 하려면 1,000 초 걸린다.</p></li>
-<li><p><strong>c.f. Loss 계산 코드 (eval_prob_adaptive.py)</strong></p></li>
-</ul>
-<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="n">all_noise</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">randn</span><span class="p">((</span><span class="n">max_n_samples</span> <span class="o">*</span> <span class="n">args</span><span class="o">.</span><span class="n">n_trials</span><span class="p">,</span> <span class="mi">4</span><span class="p">,</span> <span class="n">latent_size</span><span class="p">,</span> <span class="n">latent_size</span><span class="p">),</span> <span class="n">device</span><span class="o">=</span><span class="n">latent</span><span class="o">.</span><span class="n">device</span><span class="p">)</span>
-
-<span class="k">def</span> <span class="nf">eval_error</span><span class="p">(</span><span class="n">unet</span><span class="p">,</span> <span class="n">scheduler</span><span class="p">,</span> <span class="n">latent</span><span class="p">,</span> <span class="n">all_noise</span><span class="p">,</span> <span class="n">ts</span><span class="p">,</span> <span class="n">noise_idxs</span><span class="p">,</span>
-                <span class="n">text_embeds</span><span class="p">,</span> <span class="n">text_embed_idxs</span><span class="p">,</span> <span class="n">batch_size</span><span class="o">=</span><span class="mi">32</span><span class="p">,</span> <span class="n">dtype</span><span class="o">=</span><span class="s1">&#39;float32&#39;</span><span class="p">,</span> <span class="n">loss</span><span class="o">=</span><span class="s1">&#39;l2&#39;</span><span class="p">):</span>
-        <span class="k">assert</span> <span class="nb">len</span><span class="p">(</span><span class="n">ts</span><span class="p">)</span> <span class="o">==</span> <span class="nb">len</span><span class="p">(</span><span class="n">noise_idxs</span><span class="p">)</span> <span class="o">==</span> <span class="nb">len</span><span class="p">(</span><span class="n">text_embed_idxs</span><span class="p">)</span>
-        <span class="n">pred_errors</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">zeros</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">ts</span><span class="p">),</span> <span class="n">device</span><span class="o">=</span><span class="s1">&#39;cpu&#39;</span><span class="p">)</span>
-        <span class="n">idx</span> <span class="o">=</span> <span class="mi">0</span>
-        <span class="k">with</span> <span class="n">torch</span><span class="o">.</span><span class="n">inference_mode</span><span class="p">():</span>
-        <span class="k">for</span> <span class="n">_</span> <span class="ow">in</span> <span class="n">tqdm</span><span class="o">.</span><span class="n">trange</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">ts</span><span class="p">)</span> <span class="o">//</span> <span class="n">batch_size</span> <span class="o">+</span> <span class="nb">int</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">ts</span><span class="p">)</span> <span class="o">%</span> <span class="n">batch_size</span> <span class="o">!=</span> <span class="mi">0</span><span class="p">),</span> <span class="n">leave</span><span class="o">=</span><span class="kc">False</span><span class="p">):</span>
-                <span class="n">batch_ts</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">tensor</span><span class="p">(</span><span class="n">ts</span><span class="p">[</span><span class="n">idx</span><span class="p">:</span> <span class="n">idx</span> <span class="o">+</span> <span class="n">batch_size</span><span class="p">])</span>
-                <span class="n">noise</span> <span class="o">=</span> <span class="n">all_noise</span><span class="p">[</span><span class="n">noise_idxs</span><span class="p">[</span><span class="n">idx</span><span class="p">:</span> <span class="n">idx</span> <span class="o">+</span> <span class="n">batch_size</span><span class="p">]]</span>
-                <span class="n">noised_latent</span> <span class="o">=</span> <span class="n">latent</span> <span class="o">*</span> <span class="p">(</span><span class="n">scheduler</span><span class="o">.</span><span class="n">alphas_cumprod</span><span class="p">[</span><span class="n">batch_ts</span><span class="p">]</span>  <span class="mf">0.5</span><span class="p">)</span><span class="o">.</span><span class="n">view</span><span class="p">(</span><span class="o">-</span><span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">)</span><span class="o">.</span><span class="n">to</span><span class="p">(</span><span class="n">device</span><span class="p">)</span> <span class="o">+</span> \
-                                <span class="n">noise</span> <span class="o">*</span> <span class="p">((</span><span class="mi">1</span> <span class="o">-</span> <span class="n">scheduler</span><span class="o">.</span><span class="n">alphas_cumprod</span><span class="p">[</span><span class="n">batch_ts</span><span class="p">])</span>  <span class="mf">0.5</span><span class="p">)</span><span class="o">.</span><span class="n">view</span><span class="p">(</span><span class="o">-</span><span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">)</span><span class="o">.</span><span class="n">to</span><span class="p">(</span><span class="n">device</span><span class="p">)</span>
-                <span class="n">t_input</span> <span class="o">=</span> <span class="n">batch_ts</span><span class="o">.</span><span class="n">to</span><span class="p">(</span><span class="n">device</span><span class="p">)</span><span class="o">.</span><span class="n">half</span><span class="p">()</span> <span class="k">if</span> <span class="n">dtype</span> <span class="o">==</span> <span class="s1">&#39;float16&#39;</span> <span class="k">else</span> <span class="n">batch_ts</span><span class="o">.</span><span class="n">to</span><span class="p">(</span><span class="n">device</span><span class="p">)</span>
-                <span class="n">text_input</span> <span class="o">=</span> <span class="n">text_embeds</span><span class="p">[</span><span class="n">text_embed_idxs</span><span class="p">[</span><span class="n">idx</span><span class="p">:</span> <span class="n">idx</span> <span class="o">+</span> <span class="n">batch_size</span><span class="p">]]</span>
-                <span class="n">noise_pred</span> <span class="o">=</span> <span class="n">unet</span><span class="p">(</span><span class="n">noised_latent</span><span class="p">,</span> <span class="n">t_input</span><span class="p">,</span> <span class="n">encoder_hidden_states</span><span class="o">=</span><span class="n">text_input</span><span class="p">)</span><span class="o">.</span><span class="n">sample</span>
-                <span class="k">if</span> <span class="n">loss</span> <span class="o">==</span> <span class="s1">&#39;l2&#39;</span><span class="p">:</span>
-                <span class="n">error</span> <span class="o">=</span> <span class="n">F</span><span class="o">.</span><span class="n">mse_loss</span><span class="p">(</span><span class="n">noise</span><span class="p">,</span> <span class="n">noise_pred</span><span class="p">,</span> <span class="n">reduction</span><span class="o">=</span><span class="s1">&#39;none&#39;</span><span class="p">)</span><span class="o">.</span><span class="n">mean</span><span class="p">(</span><span class="n">dim</span><span class="o">=</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">))</span>
-                <span class="k">elif</span> <span class="n">loss</span> <span class="o">==</span> <span class="s1">&#39;l1&#39;</span><span class="p">:</span>
-                <span class="n">error</span> <span class="o">=</span> <span class="n">F</span><span class="o">.</span><span class="n">l1_loss</span><span class="p">(</span><span class="n">noise</span><span class="p">,</span> <span class="n">noise_pred</span><span class="p">,</span> <span class="n">reduction</span><span class="o">=</span><span class="s1">&#39;none&#39;</span><span class="p">)</span><span class="o">.</span><span class="n">mean</span><span class="p">(</span><span class="n">dim</span><span class="o">=</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">))</span>
-                <span class="k">elif</span> <span class="n">loss</span> <span class="o">==</span> <span class="s1">&#39;huber&#39;</span><span class="p">:</span>
-                <span class="n">error</span> <span class="o">=</span> <span class="n">F</span><span class="o">.</span><span class="n">huber_loss</span><span class="p">(</span><span class="n">noise</span><span class="p">,</span> <span class="n">noise_pred</span><span class="p">,</span> <span class="n">reduction</span><span class="o">=</span><span class="s1">&#39;none&#39;</span><span class="p">)</span><span class="o">.</span><span class="n">mean</span><span class="p">(</span><span class="n">dim</span><span class="o">=</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">))</span>
-                <span class="k">else</span><span class="p">:</span>
-                <span class="k">raise</span> <span class="ne">NotImplementedError</span>
-                <span class="n">pred_errors</span><span class="p">[</span><span class="n">idx</span><span class="p">:</span> <span class="n">idx</span> <span class="o">+</span> <span class="nb">len</span><span class="p">(</span><span class="n">batch_ts</span><span class="p">)]</span> <span class="o">=</span> <span class="n">error</span><span class="o">.</span><span class="n">detach</span><span class="p">()</span><span class="o">.</span><span class="n">cpu</span><span class="p">()</span>
-                <span class="n">idx</span> <span class="o">+=</span> <span class="nb">len</span><span class="p">(</span><span class="n">batch_ts</span><span class="p">)</span>
-        <span class="k">return</span> <span class="n">pred_errors</span>
-</pre></div>
-</div>
-<ul>
-<li><p><strong>실험 결과</strong></p>
-<ul>
-<li><p><strong>Figure 2</strong></p>
-<figure class="align-default" id="id4">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_032.png"><img alt="img_03" class="bg-primary mb-1" src="../../_images/img_032.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 391 </span><span class="caption-text">Figure 2</span><a class="headerlink" href="#id4" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p>특정한 이미지 x 의 모든 클래스에 대해서 loss 를 추론하게 될텐데, <br>
-<strong>모든 클래스에 대해서 <br>
-동일한 <span class="math notranslate nohighlight">\(\epsilon\)</span></strong> (즉 sampled noise) <strong>과 동일한 t</strong> (즉 sampled time steps) <strong>를 사용해야</strong> 한다. <br>
-<strong>이 두 변수에 따라 loss 가 크게 달라지기 때문.</strong></p></li>
-</ul>
-</li>
-</ul>
-</li>
-<li><p><strong>Figure 3 &amp; Figure 4</strong></p>
-<ul class="simple">
-<li><p><strong>Figure 3</strong></p>
-<ul>
-<li><p>t 에 따라서, Classification 성능이 달라졌다.</p></li>
-</ul>
-</li>
-<li><p><strong>Figure 4</strong></p>
-<ul>
-<li><p>Figure 3 의 결과에 따라서, <br>
-intermediate timesteps 를 더 많이 sampling 하면 성능이 올라가는지 실험해보았다.</p></li>
-<li><p>그렇지 않았다.<br>
-timesteps 를 Uniform 하게 sampling 했을 때 성능이 가장 좋았다.</p></li>
-</ul>
-</li>
-</ul>
-</li>
-</ul>
-<figure class="align-default" id="id5">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_041.png"><img alt="img_04" class="bg-primary mb-1" src="../../_images/img_041.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 392 </span><span class="caption-text">Figure 3</span><a class="headerlink" href="#id5" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<figure class="align-default" id="id6">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_051.png"><img alt="img_05" class="bg-primary mb-1" src="../../_images/img_051.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 393 </span><span class="caption-text">Figure 4</span><a class="headerlink" href="#id6" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p><strong>Table 1</strong> (+ F. Additional Implementation Details 참고)</p></li>
-</ul>
-<figure class="align-default" id="id7">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_062.png"><img alt="img_06" class="bg-primary mb-1" src="../../_images/img_062.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 394 </span><span class="caption-text">Table 1</span><a class="headerlink" href="#id7" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p>본 논문에서 제시한 Diffusion Classifier 가 Classification 능력이 나쁘지 않았다.</p></li>
-</ul>
-<ol class="arabic simple">
-<li><p>Diffusion 모델에서 knowledge 를 추출해내는 다른 방법들보다 성능이 뛰어났다.<br>
-- Diffusion Classifier 는 <strong>Zero-shot 성능</strong>이, <br>
-<strong>“Stable Diffusion 으로 생성된 영상을“ 학습한</strong> <strong>ResNet-50</strong> <strong>classifier</strong> 보다 뛰어났다.<br>
-- <strong>Synthetic SD data :</strong> <br>
-Class 마다 10,000 장의 이미지를 Stable Diffusion 2.0 으로 생성해 <br>
-데이터셋을 구축하고 (90% train / 10% validation), <br>
-해당 데이터셋으로 ResNet-50 classifier 를 학습시켜서 classification 수행한 결과<br>
-- Diffusion Classifier 는 <strong>Classification 성능</strong>이, <br>
-<strong>Stable Diffusion 의 intermediate U-Net layer 를 추출해 학습시킨 <br>
-ResNet-based 모델</strong>보다 뛰어났다.<br>
-- <strong>SD features :</strong> <br>
-Input 이미지에 따른 Stable Diffusion 의 Intermediate U-Net features 를 <br>
-ResNet 기반의 classifier 에 전달해서 추론. <br>
-이 때 classifier 는 모든 데이터셋을 직접 학습한다. 따라서 zero-shot 은 아니다.<br></p></li>
-<li><p><strong>CLIP ResNet-50 모델보다도 성능이 뛰어났다.</strong></p></li>
-<li><p><strong>OpenCLIP ViT-H/14 모델에 competitive</strong> 했다.</p></li>
-</ol>
-<ul class="simple">
-<li><p><strong>Table 2</strong></p></li>
-</ul>
-<figure class="align-default" id="id8">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_071.png"><img alt="img_07" class="bg-primary mb-1" src="../../_images/img_071.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 395 </span><span class="caption-text">Table 2</span><a class="headerlink" href="#id8" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p><strong>Stable Diffusion 은</strong> <br>
-Resolution 이 높은지, Aesthetic 한지, Safe-for-work 한지에 따라서 <strong>filtered 된 <br>
-LAION-5B 데이터셋을 학습</strong>했다.</p></li>
-<li><p>이와 같은 기준으로 filtering 하면, <br>
-<strong>CIFAR10, Pets, Flowers, STL10, ImageNet 데이터셋의 test set 은 97~100% 가 filtered out</strong> 된다.</p></li>
-<li><p>따라서, <strong>이들 데이터셋은 Stable Diffusion 에게 완전한 out-of-distribution 데이터</strong>이다.</p></li>
-<li><p>따라서, <strong>필터링이 안된 데이터로 Stable Diffusion 을 추가 학습시키면<br>
-classification 성능도 올라갈 것</strong>이다.</p></li>
-<li><p><strong>Figure 5 &amp; Table 3</strong></p></li>
-</ul>
-<figure class="align-default" id="id9">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_082.png"><img alt="img_08" class="bg-primary mb-1" src="../../_images/img_082.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 396 </span><span class="caption-text">Figure 5</span><a class="headerlink" href="#id9" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<figure class="align-default" id="id10">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_092.png"><img alt="img_09" class="bg-primary mb-1" src="../../_images/img_092.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 397 </span><span class="caption-text">Table 3</span><a class="headerlink" href="#id10" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p>본 논문에서는 Winoground 데이터셋을 활용해 <br>
-visio-linguistic compositional reasoning abilities 를 측정했다.<br></p>
-<ul>
-<li><p>주어진 captions 를 적절한 이미지에 매치시키는 능력을 측정하는 것이다.<br></p></li>
-<li><p>Winoground 데이터셋<br></p>
-<ul>
-<li><p>Object 는 명사절끼리 뒤바뀐 경우<br></p></li>
-<li><p>Relation 은 동사끼리 or 형용사끼리 or 부사끼리 뒤바뀐 경우<br></p></li>
-<li><p>Both 는 다른 품사끼리 서로 뒤바뀐 경우<br></p></li>
-</ul>
-</li>
-</ul>
-</li>
-<li><p>Stable Diffusion 의 Diffusion Classifier 가 최고의 성능을 보여주었다.</p></li>
-<li><p>본 논문에서 제시한 method 를 통해서 <strong>추가 학습 없이,</strong> <br>
-여느 diffusion 모델처럼 sample generation 만을 학습했음에도, <br>
-<strong>Stable Diffusion 모델을 훌륭한 classifier 이자 reasoner 로 변모</strong>시킬 수 있었다.</p></li>
-<li><p><strong>Table 4</strong></p></li>
-</ul>
-<figure class="align-default" id="id11">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_102.png"><img alt="img_10" class="bg-primary mb-1" src="../../_images/img_102.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 398 </span><span class="caption-text">Table 4</span><a class="headerlink" href="#id11" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p>ImageNet 에 존재하는 <strong>1,000 개의 클래스를 활용해</strong> <br>
-Pretrained <strong>DiT</strong> (Diffusion Transformer) 를 활용한 <strong>Diffusion Classifier 의 성능</strong>을, <br>
-<strong>Discriminative Classifiers</strong> (ResNet-101 and ViT-B/16) <strong>와 비교</strong>했다.</p></li>
-<li><p><strong>ImageNet</strong> 에 대해서, <strong>79.1% 의 top-1 accuracy 를 기록하며 ViT-L/32 을 능가</strong>했다.</p></li>
-<li><p><strong>더 적은 augmentation 기법</strong>을 사용하였고, <br>
-<strong>regularization 은 사용하지 않았음에도</strong> Discriminative Classifiers 의 성능을 능가했다.</p></li>
-<li><p><strong>Figure 6</strong></p></li>
-</ul>
-<figure class="align-default" id="id12">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_112.png"><img alt="img_11" class="bg-primary mb-1" src="../../_images/img_112.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 399 </span><span class="caption-text">Figure 6</span><a class="headerlink" href="#id12" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p>ImageNet 데이터셋에서, <br>
-ImageNet-A 와 겹치는 클래스에 대해서만 Classification 을 수행한다.</p></li>
-<li><p>일반적인 <strong>discriminative classifiers 는 신뢰구간 과 함께 파란 점</strong>으로 찍혀 있다.</p></li>
-<li><p><strong>Diffusion Classifiers 는 신뢰구간 과 함께 별 모양의 점</strong>으로 찍혀 있다.</p></li>
-<li><p>Diffusion Classifiers 는 In-distribution (ImageNet) 에서 획득한 Accuracy 에 따라 <br>
-기대되는 것보다, <br>
-훨씬 Out-of-distribution (ImageNet-A) 에서의 성능이 뛰어났다.<br>
-- 즉, OOD 에 훨씬 Robust 하다.</p></li>
-<li><p>결론</p>
-<ul>
-<li><p>Diffusion Models 에서 <strong>Diffusion Classifier 를 추출하는 방법을 제시</strong>함</p></li>
-<li><p>Stable Diffusion 에서 추출한 <strong>Diffusion Classifier 가 Zero-shot 능력이 우수함을 확인</strong></p></li>
-<li><p>DiT 에서 추출한 <strong>Diffusion Classifier 가 Standard Classification 능력이 우수함을 확인</strong></p></li>
-<li><p>Diffusion Classifiers 의 <strong>Compositional Reasoning 능력이 우수함을 확인</strong></p></li>
-<li><p>Diffusion Classifiers 가 <strong>OOD 에 매우 Robust 함</strong></p></li>
-<li><p><strong>Filtering  되지 않은 데이터도 학습시킬 수 있다면, <br>
-Stable Diffusion 의 Diffusion Classifier 성능은 더 개선될 것</strong>임.</p></li>
-<li><p>Imagen 의 경우 OpenCLIP 보다 훨씬 큰 거대 언어 모델인, T5-XXL 을 활용했음.<br>
-<strong>Imagen 의 Classification 능력은 Stable Diffusion 보다 뛰어날 것으로 예상</strong>됨.</p></li>
-</ul>
-</li>
-</ul>
-</section>
-
-    <script type="text/x-thebe-config">
-    {
-        requestKernel: true,
-        binderOptions: {
-            repo: "binder-examples/jupyter-stacks-datascience",
-            ref: "master",
-        },
-        codeMirrorConfig: {
-            theme: "abcdef",
-            mode: "python"
-        },
-        kernelOptions: {
-            name: "python3",
-            path: "./docs/review"
-        },
-        predefinedOutput: true
-    }
-    </script>
-    <script>kernelName = 'python3'</script>
-
-                </article>
-              
-
-              
-              
-              
-              
-                <footer class="prev-next-footer d-print-none">
-                  
-<div class="prev-next-area">
-    <a class="left-prev"
-       href="BBDM.html"
-       title="previous page">
-      <i class="fa-solid fa-angle-left"></i>
-      <div class="prev-next-info">
-        <p class="prev-next-subtitle">previous</p>
-        <p class="prev-next-title">BBDM</p>
-      </div>
-    </a>
-    <a class="right-next"
-       href="progressive_distillation.html"
-       title="next page">
-      <div class="prev-next-info">
-        <p class="prev-next-subtitle">next</p>
-        <p class="prev-next-title">Progressive Distillation for Fast Sampling of Diffusion Models</p>
-      </div>
-      <i class="fa-solid fa-angle-right"></i>
-    </a>
-</div>
-                </footer>
-              
-            </div>
-            
-            
-              
-            
-          </div>
-          <footer class="bd-footer-content">
-            
-<div class="bd-footer-content__inner container">
-  
-  <div class="footer-item">
-    
-<p class="component-author">
-By PseudoLab
-</p>
-
-  </div>
-  
-  <div class="footer-item">
-    
-
-  <p class="copyright">
-    
-      © Copyright 2022.
-      <br/>
-    
-  </p>
-
-  </div>
-  
-  <div class="footer-item">
-    
-  </div>
-  
-  <div class="footer-item">
-    
-  </div>
-  
-</div>
-          </footer>
-        
-
-      </main>
-    </div>
-  </div>
-  
-  <!-- Scripts loaded after <body> so the DOM is not blocked -->
-  <script src="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b"></script>
-<script src="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b"></script>
-
-  <footer class="bd-footer">
-  </footer>
-  </body>
+
+
+<!DOCTYPE html>
+
+
+<html lang="en" data-content_root="" >
+
+  <head>
+    <meta charset="utf-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.17.1: http://docutils.sourceforge.net/" />
+
+    <title>Your Diffusion Model is Secretly a Zero-Shot Classifier &#8212; Text-to-Image Generation-feat-Diffusion</title>
+  
+  
+  
+  <script data-cfasync="false">
+    document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
+    document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
+  </script>
+  
+  <!-- Loaded before other Sphinx assets -->
+  <link href="../../_static/styles/theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+<link href="../../_static/styles/bootstrap.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+<link href="../../_static/styles/pydata-sphinx-theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+
+  
+  <link href="../../_static/vendor/fontawesome/6.5.2/css/all.min.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+  <link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.woff2" />
+<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.woff2" />
+<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.woff2" />
+
+    <link rel="stylesheet" type="text/css" href="../../_static/pygments.css" />
+    <link rel="stylesheet" href="../../_static/styles/sphinx-book-theme.css?digest=14f4ca6b54d191a8c7657f6c759bf11a5fb86285" type="text/css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/togglebutton.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/mystnb.4510f1fc1dee50b3e5859aac5469c37c29e427902b24a333a5f9fcb2f0b3ac41.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/sphinx-thebe.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/design-style.4045f2051d55cab465a707391d5b2007.min.css" />
+  
+  <!-- Pre-loaded scripts that we'll load fully later -->
+  <link rel="preload" as="script" href="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b" />
+<link rel="preload" as="script" href="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b" />
+  <script src="../../_static/vendor/fontawesome/6.5.2/js/all.min.js?digest=dfe6caa3a7d634c4db9b"></script>
+
+    <script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
+    <script src="../../_static/jquery.js"></script>
+    <script src="../../_static/underscore.js"></script>
+    <script src="../../_static/_sphinx_javascript_frameworks_compat.js"></script>
+    <script src="../../_static/doctools.js"></script>
+    <script src="../../_static/clipboard.min.js"></script>
+    <script src="../../_static/copybutton.js"></script>
+    <script src="../../_static/scripts/sphinx-book-theme.js?digest=5a5c038af52cf7bc1a1ec88eea08e6366ee68824"></script>
+    <script>let toggleHintShow = 'Click to show';</script>
+    <script>let toggleHintHide = 'Click to hide';</script>
+    <script>let toggleOpenOnPrint = 'true';</script>
+    <script src="../../_static/togglebutton.js"></script>
+    <script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
+    <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
+    <script src="../../_static/design-tabs.js"></script>
+    <script>const THEBE_JS_URL = "https://unpkg.com/thebe@0.8.2/lib/index.js"
+const thebe_selector = ".thebe,.cell"
+const thebe_selector_input = "pre"
+const thebe_selector_output = ".output, .cell_output"
+</script>
+    <script async="async" src="../../_static/sphinx-thebe.js"></script>
+    <script>window.MathJax = {"options": {"processHtmlClass": "tex2jax_process|mathjax_process|math|output_area"}}</script>
+    <script defer="defer" src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
+    <script>DOCUMENTATION_OPTIONS.pagename = 'docs/review/Your_Diffusion_Model_is_Secretly_a_Zero_Shot_Classifier';</script>
+    <link rel="shortcut icon" href="../../_static/PseudoLab_logo.png"/>
+    <link rel="index" title="Index" href="../../genindex.html" />
+    <link rel="search" title="Search" href="../../search.html" />
+    <link rel="next" title="Progressive Distillation for Fast Sampling of Diffusion Models" href="progressive_distillation.html" />
+    <link rel="prev" title="BBDM" href="BBDM.html" />
+  <meta name="viewport" content="width=device-width, initial-scale=1"/>
+  <meta name="docsearch:language" content="en"/>
+  </head>
+  
+  
+  <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
+
+  
+  
+  <div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
+  
+  <div id="pst-scroll-pixel-helper"></div>
+  
+  <button type="button" class="btn rounded-pill" id="pst-back-to-top">
+    <i class="fa-solid fa-arrow-up"></i>Back to top</button>
+
+  
+  <input type="checkbox"
+          class="sidebar-toggle"
+          id="pst-primary-sidebar-checkbox"/>
+  <label class="overlay overlay-primary" for="pst-primary-sidebar-checkbox"></label>
+  
+  <input type="checkbox"
+          class="sidebar-toggle"
+          id="pst-secondary-sidebar-checkbox"/>
+  <label class="overlay overlay-secondary" for="pst-secondary-sidebar-checkbox"></label>
+  
+  <div class="search-button__wrapper">
+    <div class="search-button__overlay"></div>
+    <div class="search-button__search-container">
+<form class="bd-search d-flex align-items-center"
+      action="../../search.html"
+      method="get">
+  <i class="fa-solid fa-magnifying-glass"></i>
+  <input type="search"
+         class="form-control"
+         name="q"
+         id="search-input"
+         placeholder="Search this book..."
+         aria-label="Search this book..."
+         autocomplete="off"
+         autocorrect="off"
+         autocapitalize="off"
+         spellcheck="false"/>
+  <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
+</form></div>
+  </div>
+
+  <div class="pst-async-banner-revealer d-none">
+  <aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
+</div>
+
+  
+    <header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
+    </header>
+  
+
+  <div class="bd-container">
+    <div class="bd-container__inner bd-page-width">
+      
+      
+      
+      <div class="bd-sidebar-primary bd-sidebar">
+        
+
+  
+  <div class="sidebar-header-items sidebar-primary__section">
+    
+    
+    
+    
+  </div>
+  
+    <div class="sidebar-primary-items__start sidebar-primary__section">
+        <div class="sidebar-primary-item">
+
+  
+    
+  
+
+<a class="navbar-brand logo" href="../../intro.html">
+  
+  
+  
+  
+  
+    
+    
+      
+    
+    
+    <img src="../../_static/PseudoLab_logo.png" class="logo__image only-light" alt="Text-to-Image Generation-feat-Diffusion - Home"/>
+    <script>document.write(`<img src="../../_static/PseudoLab_logo.png" class="logo__image only-dark" alt="Text-to-Image Generation-feat-Diffusion - Home"/>`);</script>
+  
+  
+</a></div>
+        <div class="sidebar-primary-item"><nav class="bd-links" id="bd-docs-nav" aria-label="Main">
+    <div class="bd-toc-item navbar-nav active">
+        
+        <ul class="nav bd-sidenav bd-sidenav__home-link">
+            <li class="toctree-l1">
+                <a class="reference internal" href="../../intro.html">
+                    Welcome to PseudoDiffusers!!
+                </a>
+            </li>
+        </ul>
+        <p aria-level="2" class="caption" role="heading"><span class="caption-text">Preliminary Works</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="vae.html">VAE</a></li>
+<li class="toctree-l1"><a class="reference internal" href="gan.html">GAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DDPM.html">DDPM</a></li>
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DDIM.html">DDIM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="A_Study_on_the_Evaluation_of_Generative_Models.html">A Study on the Evaluation of Generative Models</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Image Generation</span></p>
+<ul class="current nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="cycleGAN.html">CycleGAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="StyleGAN.html">StyleGAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="diffusion_beats_GANs.html">Diffusion Models Beat GANs on Image Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="dalle.html">DALL-E</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DALLE2.html">DALL-E 2</a></li>
+<li class="toctree-l1"><a class="reference internal" href="dreambooth.html">DreamBooth</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ControlNet.html">ControlNet</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Latent_Diffusion_Model.html">Introduction</a></li>
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Textual_Inversion.html">Textual Inversion</a></li>
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="CustomDiffusion.html">Custom Diffusion</a></li>
+<li class="toctree-l1"><a class="reference internal" href="LoRA.html">LoRA</a></li>
+
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="I-DDPM.html">I-DDPM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="StyO.html">StyO</a></li>
+<li class="toctree-l1"><a class="reference internal" href="imagen.html">Imagen</a></li>
+<li class="toctree-l1"><a class="reference internal" href="imagen_editor.html">Imagen Editor</a></li>
+<li class="toctree-l1"><a class="reference internal" href="SDEdit.html">SDEdit</a></li>
+<li class="toctree-l1"><a class="reference internal" href="SDXL.html">SDXL</a></li>
+<li class="toctree-l1"><a class="reference internal" href="t2i_adapter.html">T2I-Adapter</a></li>
+<li class="toctree-l1"><a class="reference internal" href="IP_Adapter.html">IP-Adapter</a></li>
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="HyperDreamBooth.html">HyperDreamBooth</a></li>
+<li class="toctree-l1"><a class="reference internal" href="CM3leon.html">CM3leon</a></li>
+
+<li class="toctree-l1"><a class="reference internal" href="Synthetic_Data_from_Diffusion_Models_Improves_ImageNet_Classification.html">Synthetic Data from Diffusion Models Improves ImageNet Classification</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="GLIDE.html">GLIDE</a></li>
+<li class="toctree-l1"><a class="reference internal" href="BBDM.html">BBDM</a></li>
+<li class="toctree-l1 current active"><a class="current reference internal" href="#">Your Diffusion Model is Secretly a Zero-Shot Classifier</a></li>
+<li class="toctree-l1"><a class="reference internal" href="progressive_distillation.html">Progressive Distillation for Fast Sampling of Diffusion Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ConceptLab.html">ConceptLab</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Diffusion_models_already_have_a_Semantic_Latent_Space.html">Diffusion Models already have a Semantic Latent Space</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Muse.html">Muse</a></li>
+
+
+<li class="toctree-l1"><a class="reference internal" href="GIGAGAN.html">Scaling up GANs for Text-to-Image Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="consistency_models.html">Consistency Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="latent_consistency_models.html">Latent Consistency Models</a></li>
+
+<li class="toctree-l1"><a class="reference internal" href="LLM_grounded_Diffusion.html">LLM Grounded Diffusion</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DiT.html">DiT</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="one-step-image-translation.html">One-Step Image Translation with Text-to-Image Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="LCM-LoRA.html">LCM-LoRA: A Universal Stable-Diffusion Acceleration Module</a></li>
+
+
+<li class="toctree-l1"><a class="reference internal" href="MimicBrush.html">MimicBrush: Zero-shot Image Editing with Reference Imitation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="one_step_diffusion_with_distribution_matching_distillation.html">One-step Diffusion with Distribution Matching Distillation</a></li>
+
+
+
+
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Video Generation</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="Make_A_Video.html">Make A Video</a></li>
+<li class="toctree-l1"><a class="reference internal" href="VideoLDM.html">VideoLDM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="AnimateDiff.html">AnimateDiff</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Animate_Anyone.html">Animate Anyone</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreaMoving.html">DreaMoving</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreamPose.html">DreamPose: Fashion Image-to-Video Synthesis via Stable Diffusion</a></li>
+
+
+
+
+
+
+
+
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">3D Generation</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="NeRF.html">NeRF : Representing Scenes as Neural Radiance Fields for View Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="3DGS.html">3D Gaussian Splatting for Real-Time Radiance Field Rendering</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Point_E.html">Point-E: A System for Generating 3D Point Clouds from Complex Prompts (Arxiv 2022)</a></li>
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Shap-E.html">Shap-E</a></li>
+
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DreamFusion.html"><strong>DreamFusion</strong></a></li>
+<li class="toctree-l1"><a class="reference internal" href="magic-3d.html">Magic3D</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreamBooth3D.html">Dream Booth 3D</a></li>
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="zero123plus.html">Zero123++</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ProlificDreamer.html">ProlificDreamer: High-Fidelity and Diverse Text-to-3D Generation with Variational Score Distillation</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DreamGaussian.html">DreamGaussian</a></li>
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Coin3D.html">Coin3D</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Experiments</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="../experiments/js_exp.html">Synthetic Data with Stable Diffusion for Foliar Disease Classification</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../experiments/swjo_exp.html">Training DreamBooth on Naver Webtoon Face Dataset</a></li>
+</ul>
+
+    </div>
+</nav></div>
+    </div>
+  
+  
+  <div class="sidebar-primary-items__end sidebar-primary__section">
+  </div>
+  
+  <div id="rtd-footer-container"></div>
+
+
+      </div>
+      
+      <main id="main-content" class="bd-main" role="main">
+        
+        
+
+<div class="sbt-scroll-pixel-helper"></div>
+
+          <div class="bd-content">
+            <div class="bd-article-container">
+              
+              <div class="bd-header-article d-print-none">
+<div class="header-article-items header-article__inner">
+  
+    <div class="header-article-items__start">
+      
+        <div class="header-article-item"><label class="sidebar-toggle primary-toggle btn btn-sm" for="__primary" title="Toggle primary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
+  <span class="fa-solid fa-bars"></span>
+</label></div>
+      
+    </div>
+  
+  
+    <div class="header-article-items__end">
+      
+        <div class="header-article-item">
+
+<div class="article-header-buttons">
+
+
+
+
+
+<div class="dropdown dropdown-source-buttons">
+  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Source repositories">
+    <i class="fab fa-github"></i>
+  </button>
+  <ul class="dropdown-menu">
+      
+      
+      
+      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion" target="_blank"
+   class="btn btn-sm btn-source-repository-button dropdown-item"
+   title="Source repository"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fab fa-github"></i>
+  </span>
+<span class="btn__text-container">Repository</span>
+</a>
+</li>
+      
+      
+      
+      
+      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion/issues/new?title=Issue%20on%20page%20%2Fdocs/review/Your_Diffusion_Model_is_Secretly_a_Zero_Shot_Classifier.html&body=Your%20issue%20content%20here." target="_blank"
+   class="btn btn-sm btn-source-issues-button dropdown-item"
+   title="Open an issue"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-lightbulb"></i>
+  </span>
+<span class="btn__text-container">Open issue</span>
+</a>
+</li>
+      
+  </ul>
+</div>
+
+
+
+
+
+
+<div class="dropdown dropdown-download-buttons">
+  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Download this page">
+    <i class="fas fa-download"></i>
+  </button>
+  <ul class="dropdown-menu">
+      
+      
+      
+      <li><a href="../../_sources/docs/review/Your_Diffusion_Model_is_Secretly_a_Zero_Shot_Classifier.md" target="_blank"
+   class="btn btn-sm btn-download-source-button dropdown-item"
+   title="Download source file"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-file"></i>
+  </span>
+<span class="btn__text-container">.md</span>
+</a>
+</li>
+      
+      
+      
+      
+      <li>
+<button onclick="window.print()"
+  class="btn btn-sm btn-download-pdf-button dropdown-item"
+  title="Print to PDF"
+  data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-file-pdf"></i>
+  </span>
+<span class="btn__text-container">.pdf</span>
+</button>
+</li>
+      
+  </ul>
+</div>
+
+
+
+
+<button onclick="toggleFullScreen()"
+  class="btn btn-sm btn-fullscreen-button"
+  title="Fullscreen mode"
+  data-bs-placement="bottom" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-expand"></i>
+  </span>
+
+</button>
+
+
+
+<script>
+document.write(`
+  <button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light"></i>
+    <i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark"></i>
+    <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"></i>
+  </button>
+`);
+</script>
+
+
+<script>
+document.write(`
+  <button class="btn btn-sm pst-navbar-icon search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <i class="fa-solid fa-magnifying-glass fa-lg"></i>
+  </button>
+`);
+</script>
+
+</div></div>
+      
+    </div>
+  
+</div>
+</div>
+              
+              
+
+<div id="jb-print-docs-body" class="onlyprint">
+    <h1>Your Diffusion Model is Secretly a Zero-Shot Classifier</h1>
+    <!-- Table of contents -->
+    <div id="print-main-content">
+        <div id="jb-print-toc">
+            
+        </div>
+    </div>
+</div>
+
+              
+                
+<div id="searchbox"></div>
+                <article class="bd-article">
+                  
+  <div class="admonition-information admonition">
+<p class="admonition-title">Information</p>
+<ul class="simple">
+<li><p><strong>Title:</strong> {Your Diffusion Model is Secretly a Zero-Shot Classifier}, {ICCV 2023}</p></li>
+<li><p><strong>Reference</strong></p>
+<ul>
+<li><p>Paper:  <a class="reference external" href="https://arxiv.org/pdf/2303.16203.pdf">https://arxiv.org/pdf/2303.16203.pdf</a></p></li>
+<li><p>Github io: <a class="reference external" href="https://diffusion-classifier.github.io/">https://diffusion-classifier.github.io/</a></p></li>
+<li><p>Code: <a class="github reference external" href="https://github.com/diffusion-classifier/diffusion-classifier">diffusion-classifier/diffusion-classifier</a></p></li>
+</ul>
+</li>
+<li><p><strong>Author:</strong> <a href="https://www.linkedin.com/in/seonhoonkim/">SeonHoon Kim</a></p></li>
+<li><p><strong>Edited by:</strong> <a href="https://www.linkedin.com/in/seonhoonkim/">SeonHoon Kim</a></p></li>
+<li><p><strong>Last updated on Nov. 09, 2023</strong></p></li>
+</ul>
+</div>
+<section class="tex2jax_ignore mathjax_ignore" id="your-diffusion-model-is-secretly-a-zero-shot-classifier">
+<h1>Your Diffusion Model is Secretly a Zero-Shot Classifier<a class="headerlink" href="#your-diffusion-model-is-secretly-a-zero-shot-classifier" title="Permalink to this heading">#</a></h1>
+<ul class="simple">
+<li><p><strong>핵심</strong></p>
+<ul>
+<li><p>학습된 <strong>Diffusion Models 에서 Classifier 를  추가 학습 없이 획득</strong>할 수 있다.</p>
+<ul>
+<li><p><strong>Stable Diffusion</strong> 같은 거대 모델로부터 <strong>Zero-shot classifier</strong> 를 얻을 수 있다.</p></li>
+<li><p><strong>Class-conditional Diffusion Models</strong> 에서는 <strong>일반적인 (non Zero-shot) classifier</strong> 를 얻을 수 있다.</p></li>
+</ul>
+</li>
+</ul>
+</li>
+<li><p><strong>결과 요약</strong></p>
+<ul>
+<li><p><strong>Classification 성능이 나쁘지 않았다.</strong></p></li>
+<li><p><strong>Zero-shot classifier 는 Multimodal Compositional reasoning ability 가 매우 훌륭</strong>했다.</p></li>
+<li><p>이렇게 Diffusion 모델에서 추출된 Classifiers 는 <strong>Distribution shift 에 대해 Robust</strong> 한 성능을 보여주었다.</p></li>
+</ul>
+</li>
+<li><p><strong>Classifier 구현 방법</strong></p></li>
+</ul>
+<figure class="align-default" id="id1">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_002.png"><img alt="img_00" class="bg-primary mb-1" src="../../_images/img_002.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 388 </span><span class="caption-text">Diffusion Classifier 아키텍쳐</span><a class="headerlink" href="#id1" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p><strong>예시로 먼저 살펴보기.</strong></p></li>
+<li><p>예를 들어, 어떤 동물 이미지 X 를 Stable Diffusion 으로 Classification 하고 싶다면..<br>
+1. 일단 해당 동물의 클래스를 포함하고 있을 만한 데이터셋을 구한다.<br>
+37개의 동물 클래스가 존재하는 Pets 데이터셋을 사용한다고 치자.<br>
+2. text prompts 로 “호랑이” 가 주어진 Stable Diffusion 으로, <br>
+X 의 Noised Image 에서 Reverse process 를 진행한다. 그럼 Loss 를 획득할 수 있을 것이다.<br>
+3. 37개의 모든 Pets Classes 에 대해서 이를 수행해서, <br>
+가장 Loss 가 작은 Class 를 판별한다. <br>
+이 Class 가 바로 이미지 X 의 클래스이다.</p></li>
+</ul>
+<figure class="align-default" id="id2">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_012.png"><img alt="img_01" class="bg-primary mb-1" src="../../_images/img_012.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 389 </span><span class="caption-text">Algorithm 1 : Diffusion Classifier 학습 알고리즘</span><a class="headerlink" href="#id2" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ol class="arabic simple">
+<li><p><code class="docutils literal notranslate"><span class="pre">n_samples</span></code> 에 지정된 수 만큼 t 와 noise 를 각각 샘플링해 벡터를 만든다.</p></li>
+<li><p>클래스 판별이 필요한 이미지 X 의 t-step Noised image 인 X_t 를 구한다.</p></li>
+<li><p>X_t 를 Diffusion Model 에 Input 으로 주어 Noise 를 출력한다.</p></li>
+<li><p><strong>loss</strong> 를 구한다.<br></p></li>
+</ol>
+<ul class="simple">
+<li><p>위 과정을, 여러 번 (<code class="docutils literal notranslate"><span class="pre">n_trials</span></code> 만큼) 시도해서 평균낼 수도 있다.</p></li>
+</ul>
+<ol class="arabic simple" start="5">
+<li><p>loss 가 가장 낮은 Class 를 찾을 때 까지, 가능한 모든 Class 에 대해 추론한다.</p></li>
+<li><p>최종 남은 Class 를 X 의 Class 라고 판정한다.</p></li>
+</ol>
+<ul class="simple">
+<li><p>Zero-shot classification 도 위와 동일한 과정으로 진행된다. <br>
+다만 추론할 Class list 가 필요하다.<br>
+- 예를 들어서, Stable Diffusion 의 Zero-shot classification 을 수행하기 위해서는, <br>
+(Stable Diffusion 이 학습하지는 않았지만) 37개의 클래스가 정의되어 있는 <br>
+Pets 와 같은 데이터셋으로 Classification 을 수행할 수 있다.</p></li>
+<li><p>하지만, Class 마다 n_samples 수 만큼 t 를 샘플링하고,<br>
+또 X_t 를 구하고,<br>
+Diffusion Model 로 노이즈를 추론하고,<br>
+loss 를 구하는 것은 Inference times 가 많이 소모됨.<br>
+따라서 다음의 방법을 활용해 inference times 을 줄인다.</p></li>
+</ul>
+<figure class="align-default" id="id3">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_022.png"><img alt="img_02" class="bg-primary mb-1" src="../../_images/img_022.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 390 </span><span class="caption-text">Algorithm 2. Efficient Diffusion Classifier Algorithm</span><a class="headerlink" href="#id3" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ol class="arabic simple">
+<li><p><strong>일단 작은 수의 n_samples 로 error 가 높은 class 들을 걸러낸다.</strong></p></li>
+<li><p><strong>소수의 class 만 남았다면, <br>
+이제는 정확한 추론을 위해서 더 큰 n_samples 를 설정해 추론한다. <br>
+(large n_samples 로 t 와 <span class="math notranslate nohighlight">\(\epsilon\)</span> 을 sampling 한다.)</strong></p></li>
+</ol>
+<ul class="simple">
+<li><p>c.f.</p></li>
+</ul>
+<div class="highlight-markdown notranslate"><div class="highlight"><pre><span></span><span class="gu">### Oxford-IIIT Pets</span>
+```bash
+python eval_prob_adaptive.py --dataset pets --split test --n_trials 1 \
+        --to_keep 5 1 --n_samples 25 250 --loss l1 \
+        --prompt_path prompts/pets_prompts.csv
+</pre></div>
+</div>
+<ul class="simple">
+<li><p>왜 이렇게까지 inference time 을 줄이려고 하지??<br>
+- 위의 스크립트 그대로 RTX 3090 에서 돌리면, <br>
+Pets 이미지 1장 Classification 하는데 18초 걸린다.<br>
+- ImageNet 은 Class 1,000 개 있는데, <br>
+512x512 이미지 1장 Classification 하려면 1,000 초 걸린다.</p></li>
+<li><p><strong>c.f. Loss 계산 코드 (eval_prob_adaptive.py)</strong></p></li>
+</ul>
+<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="n">all_noise</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">randn</span><span class="p">((</span><span class="n">max_n_samples</span> <span class="o">*</span> <span class="n">args</span><span class="o">.</span><span class="n">n_trials</span><span class="p">,</span> <span class="mi">4</span><span class="p">,</span> <span class="n">latent_size</span><span class="p">,</span> <span class="n">latent_size</span><span class="p">),</span> <span class="n">device</span><span class="o">=</span><span class="n">latent</span><span class="o">.</span><span class="n">device</span><span class="p">)</span>
+
+<span class="k">def</span><span class="w"> </span><span class="nf">eval_error</span><span class="p">(</span><span class="n">unet</span><span class="p">,</span> <span class="n">scheduler</span><span class="p">,</span> <span class="n">latent</span><span class="p">,</span> <span class="n">all_noise</span><span class="p">,</span> <span class="n">ts</span><span class="p">,</span> <span class="n">noise_idxs</span><span class="p">,</span>
+                <span class="n">text_embeds</span><span class="p">,</span> <span class="n">text_embed_idxs</span><span class="p">,</span> <span class="n">batch_size</span><span class="o">=</span><span class="mi">32</span><span class="p">,</span> <span class="n">dtype</span><span class="o">=</span><span class="s1">&#39;float32&#39;</span><span class="p">,</span> <span class="n">loss</span><span class="o">=</span><span class="s1">&#39;l2&#39;</span><span class="p">):</span>
+        <span class="k">assert</span> <span class="nb">len</span><span class="p">(</span><span class="n">ts</span><span class="p">)</span> <span class="o">==</span> <span class="nb">len</span><span class="p">(</span><span class="n">noise_idxs</span><span class="p">)</span> <span class="o">==</span> <span class="nb">len</span><span class="p">(</span><span class="n">text_embed_idxs</span><span class="p">)</span>
+        <span class="n">pred_errors</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">zeros</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">ts</span><span class="p">),</span> <span class="n">device</span><span class="o">=</span><span class="s1">&#39;cpu&#39;</span><span class="p">)</span>
+        <span class="n">idx</span> <span class="o">=</span> <span class="mi">0</span>
+        <span class="k">with</span> <span class="n">torch</span><span class="o">.</span><span class="n">inference_mode</span><span class="p">():</span>
+        <span class="k">for</span> <span class="n">_</span> <span class="ow">in</span> <span class="n">tqdm</span><span class="o">.</span><span class="n">trange</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">ts</span><span class="p">)</span> <span class="o">//</span> <span class="n">batch_size</span> <span class="o">+</span> <span class="nb">int</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">ts</span><span class="p">)</span> <span class="o">%</span> <span class="n">batch_size</span> <span class="o">!=</span> <span class="mi">0</span><span class="p">),</span> <span class="n">leave</span><span class="o">=</span><span class="kc">False</span><span class="p">):</span>
+                <span class="n">batch_ts</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">tensor</span><span class="p">(</span><span class="n">ts</span><span class="p">[</span><span class="n">idx</span><span class="p">:</span> <span class="n">idx</span> <span class="o">+</span> <span class="n">batch_size</span><span class="p">])</span>
+                <span class="n">noise</span> <span class="o">=</span> <span class="n">all_noise</span><span class="p">[</span><span class="n">noise_idxs</span><span class="p">[</span><span class="n">idx</span><span class="p">:</span> <span class="n">idx</span> <span class="o">+</span> <span class="n">batch_size</span><span class="p">]]</span>
+                <span class="n">noised_latent</span> <span class="o">=</span> <span class="n">latent</span> <span class="o">*</span> <span class="p">(</span><span class="n">scheduler</span><span class="o">.</span><span class="n">alphas_cumprod</span><span class="p">[</span><span class="n">batch_ts</span><span class="p">]</span>  <span class="mf">0.5</span><span class="p">)</span><span class="o">.</span><span class="n">view</span><span class="p">(</span><span class="o">-</span><span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">)</span><span class="o">.</span><span class="n">to</span><span class="p">(</span><span class="n">device</span><span class="p">)</span> <span class="o">+</span> \
+                                <span class="n">noise</span> <span class="o">*</span> <span class="p">((</span><span class="mi">1</span> <span class="o">-</span> <span class="n">scheduler</span><span class="o">.</span><span class="n">alphas_cumprod</span><span class="p">[</span><span class="n">batch_ts</span><span class="p">])</span>  <span class="mf">0.5</span><span class="p">)</span><span class="o">.</span><span class="n">view</span><span class="p">(</span><span class="o">-</span><span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">)</span><span class="o">.</span><span class="n">to</span><span class="p">(</span><span class="n">device</span><span class="p">)</span>
+                <span class="n">t_input</span> <span class="o">=</span> <span class="n">batch_ts</span><span class="o">.</span><span class="n">to</span><span class="p">(</span><span class="n">device</span><span class="p">)</span><span class="o">.</span><span class="n">half</span><span class="p">()</span> <span class="k">if</span> <span class="n">dtype</span> <span class="o">==</span> <span class="s1">&#39;float16&#39;</span> <span class="k">else</span> <span class="n">batch_ts</span><span class="o">.</span><span class="n">to</span><span class="p">(</span><span class="n">device</span><span class="p">)</span>
+                <span class="n">text_input</span> <span class="o">=</span> <span class="n">text_embeds</span><span class="p">[</span><span class="n">text_embed_idxs</span><span class="p">[</span><span class="n">idx</span><span class="p">:</span> <span class="n">idx</span> <span class="o">+</span> <span class="n">batch_size</span><span class="p">]]</span>
+                <span class="n">noise_pred</span> <span class="o">=</span> <span class="n">unet</span><span class="p">(</span><span class="n">noised_latent</span><span class="p">,</span> <span class="n">t_input</span><span class="p">,</span> <span class="n">encoder_hidden_states</span><span class="o">=</span><span class="n">text_input</span><span class="p">)</span><span class="o">.</span><span class="n">sample</span>
+                <span class="k">if</span> <span class="n">loss</span> <span class="o">==</span> <span class="s1">&#39;l2&#39;</span><span class="p">:</span>
+                <span class="n">error</span> <span class="o">=</span> <span class="n">F</span><span class="o">.</span><span class="n">mse_loss</span><span class="p">(</span><span class="n">noise</span><span class="p">,</span> <span class="n">noise_pred</span><span class="p">,</span> <span class="n">reduction</span><span class="o">=</span><span class="s1">&#39;none&#39;</span><span class="p">)</span><span class="o">.</span><span class="n">mean</span><span class="p">(</span><span class="n">dim</span><span class="o">=</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">))</span>
+                <span class="k">elif</span> <span class="n">loss</span> <span class="o">==</span> <span class="s1">&#39;l1&#39;</span><span class="p">:</span>
+                <span class="n">error</span> <span class="o">=</span> <span class="n">F</span><span class="o">.</span><span class="n">l1_loss</span><span class="p">(</span><span class="n">noise</span><span class="p">,</span> <span class="n">noise_pred</span><span class="p">,</span> <span class="n">reduction</span><span class="o">=</span><span class="s1">&#39;none&#39;</span><span class="p">)</span><span class="o">.</span><span class="n">mean</span><span class="p">(</span><span class="n">dim</span><span class="o">=</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">))</span>
+                <span class="k">elif</span> <span class="n">loss</span> <span class="o">==</span> <span class="s1">&#39;huber&#39;</span><span class="p">:</span>
+                <span class="n">error</span> <span class="o">=</span> <span class="n">F</span><span class="o">.</span><span class="n">huber_loss</span><span class="p">(</span><span class="n">noise</span><span class="p">,</span> <span class="n">noise_pred</span><span class="p">,</span> <span class="n">reduction</span><span class="o">=</span><span class="s1">&#39;none&#39;</span><span class="p">)</span><span class="o">.</span><span class="n">mean</span><span class="p">(</span><span class="n">dim</span><span class="o">=</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">))</span>
+                <span class="k">else</span><span class="p">:</span>
+                <span class="k">raise</span> <span class="ne">NotImplementedError</span>
+                <span class="n">pred_errors</span><span class="p">[</span><span class="n">idx</span><span class="p">:</span> <span class="n">idx</span> <span class="o">+</span> <span class="nb">len</span><span class="p">(</span><span class="n">batch_ts</span><span class="p">)]</span> <span class="o">=</span> <span class="n">error</span><span class="o">.</span><span class="n">detach</span><span class="p">()</span><span class="o">.</span><span class="n">cpu</span><span class="p">()</span>
+                <span class="n">idx</span> <span class="o">+=</span> <span class="nb">len</span><span class="p">(</span><span class="n">batch_ts</span><span class="p">)</span>
+        <span class="k">return</span> <span class="n">pred_errors</span>
+</pre></div>
+</div>
+<ul>
+<li><p><strong>실험 결과</strong></p>
+<ul>
+<li><p><strong>Figure 2</strong></p>
+<figure class="align-default" id="id4">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_032.png"><img alt="img_03" class="bg-primary mb-1" src="../../_images/img_032.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 391 </span><span class="caption-text">Figure 2</span><a class="headerlink" href="#id4" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p>특정한 이미지 x 의 모든 클래스에 대해서 loss 를 추론하게 될텐데, <br>
+<strong>모든 클래스에 대해서 <br>
+동일한 <span class="math notranslate nohighlight">\(\epsilon\)</span></strong> (즉 sampled noise) <strong>과 동일한 t</strong> (즉 sampled time steps) <strong>를 사용해야</strong> 한다. <br>
+<strong>이 두 변수에 따라 loss 가 크게 달라지기 때문.</strong></p></li>
+</ul>
+</li>
+</ul>
+</li>
+<li><p><strong>Figure 3 &amp; Figure 4</strong></p>
+<ul class="simple">
+<li><p><strong>Figure 3</strong></p>
+<ul>
+<li><p>t 에 따라서, Classification 성능이 달라졌다.</p></li>
+</ul>
+</li>
+<li><p><strong>Figure 4</strong></p>
+<ul>
+<li><p>Figure 3 의 결과에 따라서, <br>
+intermediate timesteps 를 더 많이 sampling 하면 성능이 올라가는지 실험해보았다.</p></li>
+<li><p>그렇지 않았다.<br>
+timesteps 를 Uniform 하게 sampling 했을 때 성능이 가장 좋았다.</p></li>
+</ul>
+</li>
+</ul>
+</li>
+</ul>
+<figure class="align-default" id="id5">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_041.png"><img alt="img_04" class="bg-primary mb-1" src="../../_images/img_041.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 392 </span><span class="caption-text">Figure 3</span><a class="headerlink" href="#id5" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<figure class="align-default" id="id6">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_051.png"><img alt="img_05" class="bg-primary mb-1" src="../../_images/img_051.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 393 </span><span class="caption-text">Figure 4</span><a class="headerlink" href="#id6" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p><strong>Table 1</strong> (+ F. Additional Implementation Details 참고)</p></li>
+</ul>
+<figure class="align-default" id="id7">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_062.png"><img alt="img_06" class="bg-primary mb-1" src="../../_images/img_062.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 394 </span><span class="caption-text">Table 1</span><a class="headerlink" href="#id7" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p>본 논문에서 제시한 Diffusion Classifier 가 Classification 능력이 나쁘지 않았다.</p></li>
+</ul>
+<ol class="arabic simple">
+<li><p>Diffusion 모델에서 knowledge 를 추출해내는 다른 방법들보다 성능이 뛰어났다.<br>
+- Diffusion Classifier 는 <strong>Zero-shot 성능</strong>이, <br>
+<strong>“Stable Diffusion 으로 생성된 영상을“ 학습한</strong> <strong>ResNet-50</strong> <strong>classifier</strong> 보다 뛰어났다.<br>
+- <strong>Synthetic SD data :</strong> <br>
+Class 마다 10,000 장의 이미지를 Stable Diffusion 2.0 으로 생성해 <br>
+데이터셋을 구축하고 (90% train / 10% validation), <br>
+해당 데이터셋으로 ResNet-50 classifier 를 학습시켜서 classification 수행한 결과<br>
+- Diffusion Classifier 는 <strong>Classification 성능</strong>이, <br>
+<strong>Stable Diffusion 의 intermediate U-Net layer 를 추출해 학습시킨 <br>
+ResNet-based 모델</strong>보다 뛰어났다.<br>
+- <strong>SD features :</strong> <br>
+Input 이미지에 따른 Stable Diffusion 의 Intermediate U-Net features 를 <br>
+ResNet 기반의 classifier 에 전달해서 추론. <br>
+이 때 classifier 는 모든 데이터셋을 직접 학습한다. 따라서 zero-shot 은 아니다.<br></p></li>
+<li><p><strong>CLIP ResNet-50 모델보다도 성능이 뛰어났다.</strong></p></li>
+<li><p><strong>OpenCLIP ViT-H/14 모델에 competitive</strong> 했다.</p></li>
+</ol>
+<ul class="simple">
+<li><p><strong>Table 2</strong></p></li>
+</ul>
+<figure class="align-default" id="id8">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_071.png"><img alt="img_07" class="bg-primary mb-1" src="../../_images/img_071.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 395 </span><span class="caption-text">Table 2</span><a class="headerlink" href="#id8" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p><strong>Stable Diffusion 은</strong> <br>
+Resolution 이 높은지, Aesthetic 한지, Safe-for-work 한지에 따라서 <strong>filtered 된 <br>
+LAION-5B 데이터셋을 학습</strong>했다.</p></li>
+<li><p>이와 같은 기준으로 filtering 하면, <br>
+<strong>CIFAR10, Pets, Flowers, STL10, ImageNet 데이터셋의 test set 은 97~100% 가 filtered out</strong> 된다.</p></li>
+<li><p>따라서, <strong>이들 데이터셋은 Stable Diffusion 에게 완전한 out-of-distribution 데이터</strong>이다.</p></li>
+<li><p>따라서, <strong>필터링이 안된 데이터로 Stable Diffusion 을 추가 학습시키면<br>
+classification 성능도 올라갈 것</strong>이다.</p></li>
+<li><p><strong>Figure 5 &amp; Table 3</strong></p></li>
+</ul>
+<figure class="align-default" id="id9">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_082.png"><img alt="img_08" class="bg-primary mb-1" src="../../_images/img_082.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 396 </span><span class="caption-text">Figure 5</span><a class="headerlink" href="#id9" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<figure class="align-default" id="id10">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_092.png"><img alt="img_09" class="bg-primary mb-1" src="../../_images/img_092.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 397 </span><span class="caption-text">Table 3</span><a class="headerlink" href="#id10" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p>본 논문에서는 Winoground 데이터셋을 활용해 <br>
+visio-linguistic compositional reasoning abilities 를 측정했다.<br></p>
+<ul>
+<li><p>주어진 captions 를 적절한 이미지에 매치시키는 능력을 측정하는 것이다.<br></p></li>
+<li><p>Winoground 데이터셋<br></p>
+<ul>
+<li><p>Object 는 명사절끼리 뒤바뀐 경우<br></p></li>
+<li><p>Relation 은 동사끼리 or 형용사끼리 or 부사끼리 뒤바뀐 경우<br></p></li>
+<li><p>Both 는 다른 품사끼리 서로 뒤바뀐 경우<br></p></li>
+</ul>
+</li>
+</ul>
+</li>
+<li><p>Stable Diffusion 의 Diffusion Classifier 가 최고의 성능을 보여주었다.</p></li>
+<li><p>본 논문에서 제시한 method 를 통해서 <strong>추가 학습 없이,</strong> <br>
+여느 diffusion 모델처럼 sample generation 만을 학습했음에도, <br>
+<strong>Stable Diffusion 모델을 훌륭한 classifier 이자 reasoner 로 변모</strong>시킬 수 있었다.</p></li>
+<li><p><strong>Table 4</strong></p></li>
+</ul>
+<figure class="align-default" id="id11">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_102.png"><img alt="img_10" class="bg-primary mb-1" src="../../_images/img_102.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 398 </span><span class="caption-text">Table 4</span><a class="headerlink" href="#id11" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p>ImageNet 에 존재하는 <strong>1,000 개의 클래스를 활용해</strong> <br>
+Pretrained <strong>DiT</strong> (Diffusion Transformer) 를 활용한 <strong>Diffusion Classifier 의 성능</strong>을, <br>
+<strong>Discriminative Classifiers</strong> (ResNet-101 and ViT-B/16) <strong>와 비교</strong>했다.</p></li>
+<li><p><strong>ImageNet</strong> 에 대해서, <strong>79.1% 의 top-1 accuracy 를 기록하며 ViT-L/32 을 능가</strong>했다.</p></li>
+<li><p><strong>더 적은 augmentation 기법</strong>을 사용하였고, <br>
+<strong>regularization 은 사용하지 않았음에도</strong> Discriminative Classifiers 의 성능을 능가했다.</p></li>
+<li><p><strong>Figure 6</strong></p></li>
+</ul>
+<figure class="align-default" id="id12">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_112.png"><img alt="img_11" class="bg-primary mb-1" src="../../_images/img_112.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 399 </span><span class="caption-text">Figure 6</span><a class="headerlink" href="#id12" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p>ImageNet 데이터셋에서, <br>
+ImageNet-A 와 겹치는 클래스에 대해서만 Classification 을 수행한다.</p></li>
+<li><p>일반적인 <strong>discriminative classifiers 는 신뢰구간 과 함께 파란 점</strong>으로 찍혀 있다.</p></li>
+<li><p><strong>Diffusion Classifiers 는 신뢰구간 과 함께 별 모양의 점</strong>으로 찍혀 있다.</p></li>
+<li><p>Diffusion Classifiers 는 In-distribution (ImageNet) 에서 획득한 Accuracy 에 따라 <br>
+기대되는 것보다, <br>
+훨씬 Out-of-distribution (ImageNet-A) 에서의 성능이 뛰어났다.<br>
+- 즉, OOD 에 훨씬 Robust 하다.</p></li>
+<li><p>결론</p>
+<ul>
+<li><p>Diffusion Models 에서 <strong>Diffusion Classifier 를 추출하는 방법을 제시</strong>함</p></li>
+<li><p>Stable Diffusion 에서 추출한 <strong>Diffusion Classifier 가 Zero-shot 능력이 우수함을 확인</strong></p></li>
+<li><p>DiT 에서 추출한 <strong>Diffusion Classifier 가 Standard Classification 능력이 우수함을 확인</strong></p></li>
+<li><p>Diffusion Classifiers 의 <strong>Compositional Reasoning 능력이 우수함을 확인</strong></p></li>
+<li><p>Diffusion Classifiers 가 <strong>OOD 에 매우 Robust 함</strong></p></li>
+<li><p><strong>Filtering  되지 않은 데이터도 학습시킬 수 있다면, <br>
+Stable Diffusion 의 Diffusion Classifier 성능은 더 개선될 것</strong>임.</p></li>
+<li><p>Imagen 의 경우 OpenCLIP 보다 훨씬 큰 거대 언어 모델인, T5-XXL 을 활용했음.<br>
+<strong>Imagen 의 Classification 능력은 Stable Diffusion 보다 뛰어날 것으로 예상</strong>됨.</p></li>
+</ul>
+</li>
+</ul>
+</section>
+
+    <script type="text/x-thebe-config">
+    {
+        requestKernel: true,
+        binderOptions: {
+            repo: "binder-examples/jupyter-stacks-datascience",
+            ref: "master",
+        },
+        codeMirrorConfig: {
+            theme: "abcdef",
+            mode: "python"
+        },
+        kernelOptions: {
+            name: "python3",
+            path: "./docs\review"
+        },
+        predefinedOutput: true
+    }
+    </script>
+    <script>kernelName = 'python3'</script>
+
+                </article>
+              
+
+              
+              
+              
+              
+                <footer class="prev-next-footer d-print-none">
+                  
+<div class="prev-next-area">
+    <a class="left-prev"
+       href="BBDM.html"
+       title="previous page">
+      <i class="fa-solid fa-angle-left"></i>
+      <div class="prev-next-info">
+        <p class="prev-next-subtitle">previous</p>
+        <p class="prev-next-title">BBDM</p>
+      </div>
+    </a>
+    <a class="right-next"
+       href="progressive_distillation.html"
+       title="next page">
+      <div class="prev-next-info">
+        <p class="prev-next-subtitle">next</p>
+        <p class="prev-next-title">Progressive Distillation for Fast Sampling of Diffusion Models</p>
+      </div>
+      <i class="fa-solid fa-angle-right"></i>
+    </a>
+</div>
+                </footer>
+              
+            </div>
+            
+            
+              
+            
+          </div>
+          <footer class="bd-footer-content">
+            
+<div class="bd-footer-content__inner container">
+  
+  <div class="footer-item">
+    
+<p class="component-author">
+By PseudoLab
+</p>
+
+  </div>
+  
+  <div class="footer-item">
+    
+
+  <p class="copyright">
+    
+      © Copyright 2022.
+      <br/>
+    
+  </p>
+
+  </div>
+  
+  <div class="footer-item">
+    
+  </div>
+  
+  <div class="footer-item">
+    
+  </div>
+  
+</div>
+          </footer>
+        
+
+      </main>
+    </div>
+  </div>
+  
+  <!-- Scripts loaded after <body> so the DOM is not blocked -->
+  <script src="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b"></script>
+<script src="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b"></script>
+
+  <footer class="bd-footer">
+  </footer>
+  </body>
 </html>
\ No newline at end of file
diff --git a/docs/review/consistency_models.html b/docs/review/consistency_models.html
old mode 100644
new mode 100755
index d721f1dd..e58dbafd
--- a/docs/review/consistency_models.html
+++ b/docs/review/consistency_models.html
@@ -1,975 +1,995 @@
-
-
-<!DOCTYPE html>
-
-
-<html lang="en" data-content_root="" >
-
-  <head>
-    <meta charset="utf-8" />
-    <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.18.1: http://docutils.sourceforge.net/" />
-
-    <title>Consistency Models &#8212; Text-to-Image Generation-feat-Diffusion</title>
-  
-  
-  
-  <script data-cfasync="false">
-    document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
-    document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
-  </script>
-  
-  <!-- Loaded before other Sphinx assets -->
-  <link href="../../_static/styles/theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-<link href="../../_static/styles/bootstrap.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-<link href="../../_static/styles/pydata-sphinx-theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-
-  
-  <link href="../../_static/vendor/fontawesome/6.5.2/css/all.min.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-  <link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.woff2" />
-<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.woff2" />
-<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.woff2" />
-
-    <link rel="stylesheet" type="text/css" href="../../_static/pygments.css" />
-    <link rel="stylesheet" href="../../_static/styles/sphinx-book-theme.css?digest=14f4ca6b54d191a8c7657f6c759bf11a5fb86285" type="text/css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/togglebutton.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/mystnb.4510f1fc1dee50b3e5859aac5469c37c29e427902b24a333a5f9fcb2f0b3ac41.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/sphinx-thebe.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/design-style.4045f2051d55cab465a707391d5b2007.min.css" />
-  
-  <!-- Pre-loaded scripts that we'll load fully later -->
-  <link rel="preload" as="script" href="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b" />
-<link rel="preload" as="script" href="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b" />
-  <script src="../../_static/vendor/fontawesome/6.5.2/js/all.min.js?digest=dfe6caa3a7d634c4db9b"></script>
-
-    <script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
-    <script src="../../_static/jquery.js"></script>
-    <script src="../../_static/underscore.js"></script>
-    <script src="../../_static/_sphinx_javascript_frameworks_compat.js"></script>
-    <script src="../../_static/doctools.js"></script>
-    <script src="../../_static/clipboard.min.js"></script>
-    <script src="../../_static/copybutton.js"></script>
-    <script src="../../_static/scripts/sphinx-book-theme.js?digest=5a5c038af52cf7bc1a1ec88eea08e6366ee68824"></script>
-    <script>let toggleHintShow = 'Click to show';</script>
-    <script>let toggleHintHide = 'Click to hide';</script>
-    <script>let toggleOpenOnPrint = 'true';</script>
-    <script src="../../_static/togglebutton.js"></script>
-    <script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
-    <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
-    <script src="../../_static/design-tabs.js"></script>
-    <script>const THEBE_JS_URL = "https://unpkg.com/thebe@0.8.2/lib/index.js"
-const thebe_selector = ".thebe,.cell"
-const thebe_selector_input = "pre"
-const thebe_selector_output = ".output, .cell_output"
-</script>
-    <script async="async" src="../../_static/sphinx-thebe.js"></script>
-    <script>window.MathJax = {"options": {"processHtmlClass": "tex2jax_process|mathjax_process|math|output_area"}}</script>
-    <script defer="defer" src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
-    <script>DOCUMENTATION_OPTIONS.pagename = 'docs/review/consistency_models';</script>
-    <link rel="shortcut icon" href="../../_static/PseudoLab_logo.png"/>
-    <link rel="index" title="Index" href="../../genindex.html" />
-    <link rel="search" title="Search" href="../../search.html" />
-    <link rel="next" title="Latent Consistency Models" href="latent_consistency_models.html" />
-    <link rel="prev" title="Scaling up GANs for Text-to-Image Synthesis" href="GIGAGAN.html" />
-  <meta name="viewport" content="width=device-width, initial-scale=1"/>
-  <meta name="docsearch:language" content="en"/>
-  </head>
-  
-  
-  <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
-
-  
-  
-  <div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
-  
-  <div id="pst-scroll-pixel-helper"></div>
-  
-  <button type="button" class="btn rounded-pill" id="pst-back-to-top">
-    <i class="fa-solid fa-arrow-up"></i>Back to top</button>
-
-  
-  <input type="checkbox"
-          class="sidebar-toggle"
-          id="pst-primary-sidebar-checkbox"/>
-  <label class="overlay overlay-primary" for="pst-primary-sidebar-checkbox"></label>
-  
-  <input type="checkbox"
-          class="sidebar-toggle"
-          id="pst-secondary-sidebar-checkbox"/>
-  <label class="overlay overlay-secondary" for="pst-secondary-sidebar-checkbox"></label>
-  
-  <div class="search-button__wrapper">
-    <div class="search-button__overlay"></div>
-    <div class="search-button__search-container">
-<form class="bd-search d-flex align-items-center"
-      action="../../search.html"
-      method="get">
-  <i class="fa-solid fa-magnifying-glass"></i>
-  <input type="search"
-         class="form-control"
-         name="q"
-         id="search-input"
-         placeholder="Search this book..."
-         aria-label="Search this book..."
-         autocomplete="off"
-         autocorrect="off"
-         autocapitalize="off"
-         spellcheck="false"/>
-  <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
-</form></div>
-  </div>
-
-  <div class="pst-async-banner-revealer d-none">
-  <aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
-</div>
-
-  
-    <header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
-    </header>
-  
-
-  <div class="bd-container">
-    <div class="bd-container__inner bd-page-width">
-      
-      
-      
-      <div class="bd-sidebar-primary bd-sidebar">
-        
-
-  
-  <div class="sidebar-header-items sidebar-primary__section">
-    
-    
-    
-    
-  </div>
-  
-    <div class="sidebar-primary-items__start sidebar-primary__section">
-        <div class="sidebar-primary-item">
-
-  
-    
-  
-
-<a class="navbar-brand logo" href="../../intro.html">
-  
-  
-  
-  
-  
-    
-    
-      
-    
-    
-    <img src="../../_static/PseudoLab_logo.png" class="logo__image only-light" alt="Text-to-Image Generation-feat-Diffusion - Home"/>
-    <script>document.write(`<img src="../../_static/PseudoLab_logo.png" class="logo__image only-dark" alt="Text-to-Image Generation-feat-Diffusion - Home"/>`);</script>
-  
-  
-</a></div>
-        <div class="sidebar-primary-item"><nav class="bd-links" id="bd-docs-nav" aria-label="Main">
-    <div class="bd-toc-item navbar-nav active">
-        
-        <ul class="nav bd-sidenav bd-sidenav__home-link">
-            <li class="toctree-l1">
-                <a class="reference internal" href="../../intro.html">
-                    Welcome to PseudoDiffusers!!
-                </a>
-            </li>
-        </ul>
-        <p aria-level="2" class="caption" role="heading"><span class="caption-text">Preliminary Works</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="vae.html">VAE</a></li>
-<li class="toctree-l1"><a class="reference internal" href="gan.html">GAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DDPM.html">DDPM</a></li>
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="DDIM.html">DDIM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="A_Study_on_the_Evaluation_of_Generative_Models.html">A Study on the Evaluation of Generative Models</a></li>
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Image Generation</span></p>
-<ul class="current nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="cycleGAN.html">CycleGAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="StyleGAN.html">StyleGAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="diffusion_beats_GANs.html">Diffusion Models Beat GANs on Image Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="dalle.html">DALL-E</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DALLE2.html">DALL-E 2</a></li>
-<li class="toctree-l1"><a class="reference internal" href="dreambooth.html">DreamBooth</a></li>
-<li class="toctree-l1"><a class="reference internal" href="ControlNet.html">ControlNet</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Latent_Diffusion_Model.html">Introduction</a></li>
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="Textual_Inversion.html">Textual Inversion</a></li>
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="CustomDiffusion.html">Custom Diffusion</a></li>
-<li class="toctree-l1"><a class="reference internal" href="LoRA.html">LoRA</a></li>
-
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="I-DDPM.html">I-DDPM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="StyO.html">StyO</a></li>
-<li class="toctree-l1"><a class="reference internal" href="imagen.html">Imagen</a></li>
-<li class="toctree-l1"><a class="reference internal" href="imagen_editor.html">Imagen Editor</a></li>
-<li class="toctree-l1"><a class="reference internal" href="SDEdit.html">SDEdit</a></li>
-<li class="toctree-l1"><a class="reference internal" href="SDXL.html">SDXL</a></li>
-<li class="toctree-l1"><a class="reference internal" href="t2i_adapter.html">T2I-Adapter</a></li>
-<li class="toctree-l1"><a class="reference internal" href="IP_Adapter.html">IP-Adapter</a></li>
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="HyperDreamBooth.html">HyperDreamBooth</a></li>
-<li class="toctree-l1"><a class="reference internal" href="CM3leon.html">CM3leon</a></li>
-
-<li class="toctree-l1"><a class="reference internal" href="Synthetic_Data_from_Diffusion_Models_Improves_ImageNet_Classification.html">Synthetic Data from Diffusion Models Improves ImageNet Classification</a></li>
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="GLIDE.html">GLIDE</a></li>
-<li class="toctree-l1"><a class="reference internal" href="BBDM.html">BBDM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Your_Diffusion_Model_is_Secretly_a_Zero_Shot_Classifier.html">Your Diffusion Model is Secretly a Zero-Shot Classifier</a></li>
-<li class="toctree-l1"><a class="reference internal" href="progressive_distillation.html">Progressive Distillation for Fast Sampling of Diffusion Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="ConceptLab.html">ConceptLab</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Diffusion_models_already_have_a_Semantic_Latent_Space.html">Diffusion Models already have a Semantic Latent Space</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Muse.html">Muse</a></li>
-
-
-<li class="toctree-l1"><a class="reference internal" href="GIGAGAN.html">Scaling up GANs for Text-to-Image Synthesis</a></li>
-<li class="toctree-l1 current active"><a class="current reference internal" href="#">Consistency Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="latent_consistency_models.html">Latent Consistency Models</a></li>
-
-<li class="toctree-l1"><a class="reference internal" href="LLM_grounded_Diffusion.html">LLM Grounded Diffusion</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DiT.html">DiT</a></li>
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="one-step-image-translation.html">One-Step Image Translation with Text-to-Image Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="LCM-LoRA.html">LCM-LoRA: A Universal Stable-Diffusion Acceleration Module</a></li>
-
-
-<li class="toctree-l1"><a class="reference internal" href="MimicBrush.html">MimicBrush: Zero-shot Image Editing with Reference Imitation</a></li>
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Video Generation</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="Make_A_Video.html">Make A Video</a></li>
-<li class="toctree-l1"><a class="reference internal" href="VideoLDM.html">VideoLDM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="AnimateDiff.html">AnimateDiff</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Animate_Anyone.html">Animate Anyone</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreaMoving.html">DreaMoving</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreamPose.html">DreamPose: Fashion Image-to-Video Synthesis via Stable Diffusion</a></li>
-
-
-
-
-
-
-
-
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">3D Generation</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="NeRF.html">NeRF : Representing Scenes as Neural Radiance Fields for View Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="3DGS.html">3D Gaussian Splatting for Real-Time Radiance Field Rendering</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Point_E.html">Point-E: A System for Generating 3D Point Clouds from Complex Prompts (Arxiv 2022)</a></li>
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="Shap-E.html">Shap-E</a></li>
-
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="DreamFusion.html"><strong>DreamFusion</strong></a></li>
-<li class="toctree-l1"><a class="reference internal" href="magic-3d.html">Magic3D</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreamBooth3D.html">Dream Booth 3D</a></li>
-
-
-
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Experiments</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="../experiments/js_exp.html">Synthetic Data with Stable Diffusion for Foliar Disease Classification</a></li>
-<li class="toctree-l1"><a class="reference internal" href="../experiments/swjo_exp.html">Training DreamBooth on Naver Webtoon Face Dataset</a></li>
-</ul>
-
-    </div>
-</nav></div>
-    </div>
-  
-  
-  <div class="sidebar-primary-items__end sidebar-primary__section">
-  </div>
-  
-  <div id="rtd-footer-container"></div>
-
-
-      </div>
-      
-      <main id="main-content" class="bd-main" role="main">
-        
-        
-
-<div class="sbt-scroll-pixel-helper"></div>
-
-          <div class="bd-content">
-            <div class="bd-article-container">
-              
-              <div class="bd-header-article d-print-none">
-<div class="header-article-items header-article__inner">
-  
-    <div class="header-article-items__start">
-      
-        <div class="header-article-item"><label class="sidebar-toggle primary-toggle btn btn-sm" for="__primary" title="Toggle primary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
-  <span class="fa-solid fa-bars"></span>
-</label></div>
-      
-    </div>
-  
-  
-    <div class="header-article-items__end">
-      
-        <div class="header-article-item">
-
-<div class="article-header-buttons">
-
-
-
-
-
-<div class="dropdown dropdown-source-buttons">
-  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Source repositories">
-    <i class="fab fa-github"></i>
-  </button>
-  <ul class="dropdown-menu">
-      
-      
-      
-      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion" target="_blank"
-   class="btn btn-sm btn-source-repository-button dropdown-item"
-   title="Source repository"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fab fa-github"></i>
-  </span>
-<span class="btn__text-container">Repository</span>
-</a>
-</li>
-      
-      
-      
-      
-      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion/issues/new?title=Issue%20on%20page%20%2Fdocs/review/consistency_models.html&body=Your%20issue%20content%20here." target="_blank"
-   class="btn btn-sm btn-source-issues-button dropdown-item"
-   title="Open an issue"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-lightbulb"></i>
-  </span>
-<span class="btn__text-container">Open issue</span>
-</a>
-</li>
-      
-  </ul>
-</div>
-
-
-
-
-
-
-<div class="dropdown dropdown-download-buttons">
-  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Download this page">
-    <i class="fas fa-download"></i>
-  </button>
-  <ul class="dropdown-menu">
-      
-      
-      
-      <li><a href="../../_sources/docs/review/consistency_models.md" target="_blank"
-   class="btn btn-sm btn-download-source-button dropdown-item"
-   title="Download source file"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-file"></i>
-  </span>
-<span class="btn__text-container">.md</span>
-</a>
-</li>
-      
-      
-      
-      
-      <li>
-<button onclick="window.print()"
-  class="btn btn-sm btn-download-pdf-button dropdown-item"
-  title="Print to PDF"
-  data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-file-pdf"></i>
-  </span>
-<span class="btn__text-container">.pdf</span>
-</button>
-</li>
-      
-  </ul>
-</div>
-
-
-
-
-<button onclick="toggleFullScreen()"
-  class="btn btn-sm btn-fullscreen-button"
-  title="Fullscreen mode"
-  data-bs-placement="bottom" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-expand"></i>
-  </span>
-
-</button>
-
-
-
-<script>
-document.write(`
-  <button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light"></i>
-    <i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark"></i>
-    <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"></i>
-  </button>
-`);
-</script>
-
-
-<script>
-document.write(`
-  <button class="btn btn-sm pst-navbar-icon search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <i class="fa-solid fa-magnifying-glass fa-lg"></i>
-  </button>
-`);
-</script>
-<label class="sidebar-toggle secondary-toggle btn btn-sm" for="__secondary"title="Toggle secondary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <span class="fa-solid fa-list"></span>
-</label>
-</div></div>
-      
-    </div>
-  
-</div>
-</div>
-              
-              
-
-<div id="jb-print-docs-body" class="onlyprint">
-    <h1>Consistency Models</h1>
-    <!-- Table of contents -->
-    <div id="print-main-content">
-        <div id="jb-print-toc">
-            
-            <div>
-                <h2> Contents </h2>
-            </div>
-            <nav aria-label="Page">
-                <ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">1. Introduction</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#diffusion-models">2. Diffusion Models</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#id1">3.  Consistency Models</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#training-consistency-models-via-distillation">4. Training Consistency Models via Distillation</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#training-consistency-models-in-isolation">5. Training Consistency Models in Isolation</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#experiments">6. Experiments</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#training-consistency-models">6.1. Training Consistency Models</a><ul class="nav section-nav flex-column">
-<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#few-step-image-generation">6.2. Few-Step Image Generation</a></li>
-</ul>
-</li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#zero-shot-image-editing">6.3. Zero-Shot Image Editing</a></li>
-</ul>
-</li>
-</ul>
-            </nav>
-        </div>
-    </div>
-</div>
-
-              
-                
-<div id="searchbox"></div>
-                <article class="bd-article">
-                  
-  <div class="admonition-information admonition">
-<p class="admonition-title">Information</p>
-<ul class="simple">
-<li><p><strong>Title:</strong> Consistency Models (ICML 2023)</p></li>
-<li><p><strong>Reference</strong></p>
-<ul>
-<li><p>Paper: <a class="reference external" href="https://arxiv.org/pdf/2303.01469">https://arxiv.org/pdf/2303.01469</a></p></li>
-<li><p>Code: <a class="github reference external" href="https://github.com/openai/consistency_models">openai/consistency_models</a></p></li>
-</ul>
-</li>
-<li><p><strong>Author:</strong> Sangwoo Jo</p></li>
-<li><p><strong>Last updated on Apr. 26, 2024</strong></p></li>
-</ul>
-</div>
-<section class="tex2jax_ignore mathjax_ignore" id="consistency-models">
-<h1>Consistency Models<a class="headerlink" href="#consistency-models" title="Permalink to this heading">#</a></h1>
-<section id="introduction">
-<h2>1. Introduction<a class="headerlink" href="#introduction" title="Permalink to this heading">#</a></h2>
-<p>Diffusion Models 은 image generation, audio synthesis, video generation 등의 다양한 분야에 연구가 진행되어 왔습니다. 하지만 single-step generative model 인 VAE, GAN, 그리고 normalizing flows 에 비해 추론 속도가 10-2000배 더 많은 연산작업을 요하는 치명적인 단점이 존재합니다.</p>
-<p>이러한 문제를 해결하기 위해 논문에서 Consistency Model 을 소개합니다.</p>
-<figure class="align-default" id="id2">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/consistency_models_01.png"><img alt="consistency_models_01" class="bg-primary mb-1" src="../../_images/consistency_models_01.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 480 </span><span class="caption-text">Overview of Consistency Models</span><a class="headerlink" href="#id2" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>위의 사진처럼 주어진 PF ODE (Probability Flow Ordinary Differential Equation) 에 대해서 동일한 trajectory 에 있는 point 들이 동일한 시작점으로 매핑되도록 모델을 학습하는 방식을 제안하고, 이러한 self-consistency 특성을 만족시킬 수 있도록 2가지 학습 방식을 소개합니다.</p>
-<ol class="arabic simple">
-<li><p>첫번째 방식으로는 우선적으로 numerical ODE solver 와 사전 학습된 diffusion model을 사용하여 PF ODE trajectory 에서 인접한 point 쌍을 생성합니다. 그리고 이러한 쌍에 대한 모델 출력 간의 차이를 최소화하도록 모델을 학습함으로써 diffusion model 을 consistency model 로 효과적으로 knowledge distillation 을 적용할 수 있고, 단 한번의 step 만으로도 high quality sample 을 생성할 수 있게 됩니다.</p></li>
-<li><p>두번째 방식으로는 사전학습된 diffusion model 에 의존하지 않고 독립적으로 consistency model 을 학습하는 방식입니다.</p></li>
-</ol>
-<p>CIFAR-10, ImageNet 64x64, LSUN 256x256 데이터셋에 실험한 결과, 기존 distillation 기법을 적용한 모델 (i.e., progressive distillation) 보다 성능이 개선되고, 독립적인 모델로서도 사전학습된 diffusion model 없이 GAN 을 제외한 single-step generative model 보다 성능이 좋다고 합니다. 마지막으로 다양한 zero-shot data editing (image denoising, interpolation, inpainting, colorization, super-resolution, stroke-guided image editing) task 에도 consistency model 이 좋은 성능을 보여준다는 것을 확인하였습니다.</p>
-</section>
-<section id="diffusion-models">
-<h2>2. Diffusion Models<a class="headerlink" href="#diffusion-models" title="Permalink to this heading">#</a></h2>
-<p><strong>Diffusion Models in Continuous Time</strong></p>
-<p>Continuous time 에서의 diffusion model 을 다음과 같이 SDE 수식으로 표현할 수 있습니다. (<a class="reference external" href="https://arxiv.org/abs/2011.13456">Song et al., 2021</a>)</p>
-<figure class="align-default" id="id3">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/consistency_models_02.png"><img alt="consistency_models_02" class="bg-primary mb-1" src="../../_images/consistency_models_02.png" style="width: 500px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 481 </span><span class="caption-text">Diffusion Models in Continuous Time</span><a class="headerlink" href="#id3" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p><span class="math notranslate nohighlight">\(t \in [0,T], T &gt; 0\)</span></p></li>
-<li><p><span class="math notranslate nohighlight">\(\mu(\cdot,\cdot), \sigma(\cdot)\)</span> := drift, diffusion coefficients</p></li>
-<li><p><span class="math notranslate nohighlight">\(\{w_t\}_{t \in [0,T]}\)</span> := standard Brownian motion</p></li>
-</ul>
-<p>그리고 해당 SDE 는 아래 식과 같은 PF ODE 로 표현할 수 있다는 성질을 가지고 있습니다. 이때 <span class="math notranslate nohighlight">\(\nabla \log p_t(x_t)\)</span> 를 <em>score function</em> 이라고 하고, 시점 t 에 대한 solution trajectory 가 <span class="math notranslate nohighlight">\(p_t(x)\)</span> 에 따라 분포합니다.</p>
-<figure class="align-default" id="id4">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/consistency_models_03.png"><img alt="consistency_models_03" class="bg-primary mb-1" src="../../_images/consistency_models_03.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 482 </span><span class="caption-text">Probability Flow (PF) ODE</span><a class="headerlink" href="#id4" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>일반적으로 위의 SDE 수식에서 <span class="math notranslate nohighlight">\(p_T(x)\)</span> 가 Gaussian distribution <span class="math notranslate nohighlight">\(\pi(x)\)</span> 를 따르도록 정의하고, 해당 논문에서 <a class="reference external" href="https://arxiv.org/abs/2206.00364">Karras et al., 2022</a> 와 동일하게 <span class="math notranslate nohighlight">\(\mu(x,t) = 0, \sigma(t) = \sqrt{2}t\)</span> 로 설정하였습니다. 그리고 sampling 시, score matching 을 통해 score model <span class="math notranslate nohighlight">\(s_{\phi}(x,t) \approx \nabla \log p_t(x)\)</span> 우선적으로 학습한 후, 위의 식에 대입하면 다음과 같은 empirical PF ODE 로 표현할 수 있습니다</p>
-<figure class="align-default" id="id5">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/consistency_models_04.png"><img alt="consistency_models_04" class="bg-primary mb-1" src="../../_images/consistency_models_04.png" style="width: 400px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 483 </span><span class="caption-text">Empirical PF ODE</span><a class="headerlink" href="#id5" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>다음 과정으로는 initial condition <span class="math notranslate nohighlight">\(x_T \sim N(0, T^2I)\)</span>  을 기반으로 Euler 나 Heun 등의 numerical ode solver 를 통해 역방향으로 풀어 solution trajectory <span class="math notranslate nohighlight">\(\{x_t\}_{t \in [0,T]}\)</span><span class="math notranslate nohighlight">\({x_t}\)</span><span class="math notranslate nohighlight">\(\hat{x}_0\)</span><span class="math notranslate nohighlight">\({x_t}\)</span><span class="math notranslate nohighlight">\(p_{data}(x)\)</span> 로부터 나온 샘플에 대한 근사값이라고 할 수 있습니다.</p>
-<p>추가적으로 논문에서 numerical instability 를 방지하기 위해 고정된 작은 양수 <span class="math notranslate nohighlight">\(\epsilon\)</span> 에 대해 <span class="math notranslate nohighlight">\(t = \epsilon\)</span> 에서 solver 를 멈추고 <span class="math notranslate nohighlight">\(\hat{x}_{\epsilon}\)</span> 를 <span class="math notranslate nohighlight">\(p_{data}(x)\)</span> 에 대한 근사값으로 간주합니다. 그리고 <a class="reference external" href="https://arxiv.org/abs/2206.00364">Karras et al., 2022</a> 와 동일하게 이미지 픽셀 값을 <span class="math notranslate nohighlight">\([-1,1]\)</span> 로 rescale 하고  <span class="math notranslate nohighlight">\(T = 80, \epsilon = 0.002\)</span> 로 설정합니다.</p>
-<p>앞서 소개드린 방식으로 diffusion model 을 통한 sampling 시, ode solver 를 사용하는데 score model <span class="math notranslate nohighlight">\(s_{\phi}(x_t,t)\)</span> 의 수많은 iterative evaluation 작업이 필요합니다. 빠른 sampling 작업을 위해, 더 빠른 numerical ode solver 에 대한 연구들이 진행되었지만, 이를 활용해도 최소 10번 이상의 evaluation step 을 거쳐야만 competitive 한 성능을 보여준다고 합니다. 여러 distillation 기법들에 대한 연구들도 진행되었지만, <a class="reference external" href="https://arxiv.org/abs/2202.00512">Salimans &amp; Ho (2022)</a> 를 제외하고는 distillation 작업을 하기 위해서 사전에 diffusion model 로부터 대량의 데이터를 수집해야한다는 단점이 있습니다.</p>
-</section>
-<section id="id1">
-<h2>3.  Consistency Models<a class="headerlink" href="#id1" title="Permalink to this heading">#</a></h2>
-<p>논문에서 single-step generation 이 가능한 consistency model 을 제안하고, 학습하는 방식으로 1) 사전학습된 diffusion model 로부터 knowledge distillation 진행하는 방식과 2) 독립적으로 학습하는 방식 을 소개합니다.</p>
-<p><strong>Definition</strong></p>
-<p>주어진 PF ODE 에 대한 trajectory <span class="math notranslate nohighlight">\(\{x_t\}_{t \in [0,T]}\)</span> 에 대해서 다음과 같은 <em>consistency function</em>            <span class="math notranslate nohighlight">\(f : (x_t, t) \mapsto x_{\epsilon}\)</span> 을 정의하고, 함수 <span class="math notranslate nohighlight">\(f\)</span> 는 동일한 trajectory 에 있는 임의의 두 <span class="math notranslate nohighlight">\(t, t' \in [\epsilon, T]\)</span> 에 대해 <span class="math notranslate nohighlight">\(f(x_t, t) = f(x_{t'}, t')\)</span>, 즉 <em>self-consistency</em> 성질을 만족합니다. 이러한 함수 <span class="math notranslate nohighlight">\(f\)</span> 를 예측하기 위해 데이터로부터 <span class="math notranslate nohighlight">\(f_{\theta}\)</span> 을 학습하는 것이 consistency model 의 objective 이며, 이는 <a class="reference external" href="https://arxiv.org/abs/2110.13040">Bilos et al., 2021</a> 에서 정의하는 neural flow 와 유사하지만, consistency model 은 invertible property 를 부여하지는 않습니다.</p>
-<figure class="align-default" id="id6">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/consistency_models_05.png"><img alt="consistency_models_05" class="bg-primary mb-1" src="../../_images/consistency_models_05.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 484 </span><span class="caption-text">Self-Consistency</span><a class="headerlink" href="#id6" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p><strong>Parameterization</strong></p>
-<p>앞서 정의한 부분에 의해, 모든 consistency function <span class="math notranslate nohighlight">\(f\)</span> 에 대해서 <span class="math notranslate nohighlight">\(f(x_{\epsilon}, t) = x_{\epsilon}\)</span> 를 만족해야 하는 <em>boundary condition</em> 이 존재하고, 이를 만족하기 위해 다음과 같이 두 가지 방식으로 parameterization 방식을 정의하였습니다.</p>
-<figure class="align-default" id="id7">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/consistency_models_06.png"><img alt="consistency_models_06" class="bg-primary mb-1" src="../../_images/consistency_models_06.png" style="width: 500px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 485 </span><span class="caption-text">Parameterization Method 1</span><a class="headerlink" href="#id7" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>여기서 <span class="math notranslate nohighlight">\(F_{\theta}(x,t)\)</span> 는 output 차원이 <span class="math notranslate nohighlight">\(x\)</span> 와 동일한 free-form deep neural network 입니다. 두번째 방식으로는, 다음과 같이 skip-connection 을 활용합니다.</p>
-<figure class="align-default" id="id8">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/consistency_models_07.png"><img alt="consistency_models_07" class="bg-primary mb-1" src="../../_images/consistency_models_07.png" style="width: 500px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 486 </span><span class="caption-text">Parameterization Method 2</span><a class="headerlink" href="#id8" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>이때, <span class="math notranslate nohighlight">\(F_{\theta}(x,t), c_{skip}(t), c_{out}(t)\)</span> 는 <span class="math notranslate nohighlight">\(c_{skip}(\epsilon) = 1, c_{out}(\epsilon) = 0\)</span> 조건을 만족시키는 미분 가능한 함수로 정의합니다.</p>
-<figure class="align-default" id="id9">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/consistency_models_08.png"><img alt="consistency_models_08" class="bg-primary mb-1" src="../../_images/consistency_models_08.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 487 </span><span class="caption-text">Parameterization Method 2 - <span class="math notranslate nohighlight">\(c_{skip}(t), c_{out}(t)\)</span></span><a class="headerlink" href="#id9" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>두번째 parameterization 기법이 diffusion model 형식과 매우 유사하여 이러한 diffusion model 아키텍쳐를 leverage 하기 위해 논문에서 두번째 방식으로 parameterization 을 하는 방식을 택합니다.</p>
-<p><strong>Sampling</strong></p>
-<p>학습된 consistency model <span class="math notranslate nohighlight">\(f_{\theta}(\cdot,\cdot)\)</span> 와 initial distribution <span class="math notranslate nohighlight">\(\hat{x}_T \sim N(0, T^2I)\)</span> 를 활용하여 단일 sampling step 만으로 <span class="math notranslate nohighlight">\(\hat{x}_{\epsilon} = f_{\theta}(\hat{x}_{T}, T)\)</span> 를 생성할 수 있습니다. 더불어 하단 pseudo code 에 보이듯이, denoising 과 noise injection 작업을 여러 번 거치는 <em>multistep</em> consistency sampling 을 통해 computing cost 와 sample quality 를 trade-off 할 수 있는 유연성도 제공합니다.</p>
-<figure class="align-default" id="id10">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/consistency_models_09.png"><img alt="consistency_models_09" class="bg-primary mb-1" src="../../_images/consistency_models_09.png" style="width: 500px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 488 </span><span class="caption-text">Multistep Consistency Sampling</span><a class="headerlink" href="#id10" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p><strong>Zero-Shot Data Editing</strong></p>
-<p>Diffusion model 과 유사하게 추가적인 학습 없이 zero-shot 형태로 image editing 그리고 manipulation task 이 가능합니다. 예를 들어, consistency model 은 Gaussian noise 로부터 one-to-one mapping 을 통해 <span class="math notranslate nohighlight">\(x_{\epsilon}\)</span>을 생성하기 때문에 GAN, VAE, normalizing flow 와 유사하게 latent space 을 통해 sample 들간의 interpolation 이 가능합니다. 또한, multistep consistency sampling 을 통해 zero-shot 의 한계점을 보완하면서 image editing, inpainting, colorization, super-resolution, stroke-guided image editing (<a class="reference external" href="https://arxiv.org/abs/2108.01073">Meng et al., 2021</a>) 등의 다양한 task 를 수행할 수 있습니다. 대표적으로 몇 가지 zero-shot image editing 결과 예시들을 공유합니다.</p>
-<ul>
-<li><p>Inpainting</p>
-<figure class="align-default" id="id11">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/consistency_models_10.png"><img alt="consistency_models_10" class="bg-primary mb-1" src="../../_images/consistency_models_10.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 489 </span><span class="caption-text">Consistency Models - Inpainting</span><a class="headerlink" href="#id11" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</li>
-<li><p>Interpolation</p>
-<figure class="align-default" id="id12">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/consistency_models_11.png"><img alt="consistency_models_11" class="bg-primary mb-1" src="../../_images/consistency_models_11.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 490 </span><span class="caption-text">Consistency Models - Interpolation</span><a class="headerlink" href="#id12" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</li>
-<li><p>Self-consistency</p>
-<figure class="align-default" id="id13">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/consistency_models_12.png"><img alt="consistency_models_12" class="bg-primary mb-1" src="../../_images/consistency_models_12.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 491 </span><span class="caption-text">Consistency Models - Self-consistency</span><a class="headerlink" href="#id13" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</li>
-</ul>
-</section>
-<section id="training-consistency-models-via-distillation">
-<h2>4. Training Consistency Models via Distillation<a class="headerlink" href="#training-consistency-models-via-distillation" title="Permalink to this heading">#</a></h2>
-<p>앞서 소개한 두 가지 학습 방식 중, 첫번째로 사전학습된 score model <span class="math notranslate nohighlight">\(s_{\phi}(x,t)\)</span> 를 consistency model 에 knowledge distillation 하는 학습하는 방식을 소개합니다.</p>
-<p>논문에서 특히 continuous time interval <span class="math notranslate nohighlight">\([\epsilon, T]\)</span> 을 boundary <span class="math notranslate nohighlight">\(t_1 = \epsilon &lt; t_2 &lt; \cdots &lt; t_N=T\)</span> 를 기준으로 <span class="math notranslate nohighlight">\(N-1\)</span> 개의 interval 로 discretize 하는 상황을 가정하고, <a class="reference external" href="https://arxiv.org/abs/2206.00364">Karras et al., 2022</a> 의 설정과 동일하게 boundary 를 다음과 같이 정의합니다.</p>
-<div class="math notranslate nohighlight">
-\[
-t_i = (\epsilon^{1/\rho} + \frac{i-1}{N-1}(T^{1/\rho}-\epsilon^{1/\rho}))^{\rho}, \quad \rho = 7
-\]</div>
-<p>이때, <span class="math notranslate nohighlight">\(N\)</span> 이 충분히 크다면 다음과 같이 numerical ode solver 의 discretization step 을 한번 진행시켜 <span class="math notranslate nohighlight">\(x_{t_{n+1}}\)</span> 로부터 <span class="math notranslate nohighlight">\(x_{t_n}\)</span> 에 대한 정확한 예측값을 얻을 수 있다고 합니다.</p>
-<figure class="align-default" id="id14">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/consistency_models_13.png"><img alt="consistency_models_13" class="bg-primary mb-1" src="../../_images/consistency_models_13.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 492 </span><span class="caption-text">One Discretization Step of a Numerical ODE Solver</span><a class="headerlink" href="#id14" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>이때, <span class="math notranslate nohighlight">\(\Phi(\cdots; \phi)\)</span> 은 empirical PF ODE 에 적용되는 one step ODE solver 의 update function 입니다. Euler solver 같은 경우, <span class="math notranslate nohighlight">\(\Phi(x,t;\phi) = -ts_{\phi}(x,t)\)</span> 를 대입하면 다음과 같이 표현할 수 있게 됩니다.</p>
-<figure class="align-default" id="id15">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/consistency_models_14.png"><img alt="consistency_models_14" class="bg-primary mb-1" src="../../_images/consistency_models_14.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 493 </span><span class="caption-text">One Discretization Step of a Euler ODE Solver</span><a class="headerlink" href="#id15" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>위 수식을 활용하여 주어진 <span class="math notranslate nohighlight">\(x \sim p_{data}\)</span> 에 대해서 PF ODE trajectory 에 있는 인접한 두 지점 <span class="math notranslate nohighlight">\((\hat{x}^{\phi}_{t_n}, x_{t_{n+1}})\)</span> 을 샘플링할 수 있게 됩니다. 더 자세하게는, <span class="math notranslate nohighlight">\(x_{t_{n+1}} \sim N(x,t^2_{n+1}I)\)</span> 를 우선적으로 샘플링한 후, 위 수식을 기반으로 numerical ode solver 의 discretization step 을 한번 거쳐 <span class="math notranslate nohighlight">\(\hat{x}^{\phi}_{t_n}\)</span> 을 계산합니다. 최종적으로 <span class="math notranslate nohighlight">\((\hat{x}^{\phi}_{t_n}, x_{t_{n+1}})\)</span> 로부터의 모델 출력값 차이를 최소화하도록 consistency model 을 학습하게 되고, 학습 시 사용되는 <em>consistency distillation loss</em> 는 다음과 같이 정의합니다.</p>
-<figure class="align-default" id="id16">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/consistency_models_15.png"><img alt="consistency_models_15" class="bg-primary mb-1" src="../../_images/consistency_models_15.png" style="width: 500px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 494 </span><span class="caption-text">Consistency Distillation Loss</span><a class="headerlink" href="#id16" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>여기서 metric function <span class="math notranslate nohighlight">\(d(\cdot,\cdot)\)</span> 로는 L1 distance, L2 distance, 그리고 LPIPS distance 를 사용하였고, 실험적으로 확인해본 결과 <span class="math notranslate nohighlight">\(\lambda(t_n) \equiv 1\)</span> 를 적용했을 때 모델 성능이 가장 좋은 부분을 확인할 수 있었다고 합니다. 파라미터 <span class="math notranslate nohighlight">\(\theta\)</span> 는 stochastic gradient descent 그리고 <span class="math notranslate nohighlight">\(\theta^-\)</span> 는 exponential moving average (EMA) 로 학습하였다고 합니다.</p>
-<figure class="align-default" id="id17">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/consistency_models_16.png"><img alt="consistency_models_16" class="bg-primary mb-1" src="../../_images/consistency_models_16.png" style="width: 500px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 495 </span><span class="caption-text">Exponential Moving Average (EMA)</span><a class="headerlink" href="#id17" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>이처럼 EMA update 와 stopgrad operator 로 학습할 시, <span class="math notranslate nohighlight">\(\theta = \theta^{-}\)</span> 로 설정할때보다 모델을 더 안정적으로 학습할 수 있고 성능에도 개선이 있었다고 합니다. 전체적인 학습 절차는 하단의 Algorithm 2 처럼 정리할 수 있습니다.</p>
-<figure class="align-default" id="id18">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/consistency_models_17.png"><img alt="consistency_models_17" class="bg-primary mb-1" src="../../_images/consistency_models_17.png" style="width: 500px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 496 </span><span class="caption-text">Overview of Consistency Distillation (CD)</span><a class="headerlink" href="#id18" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p><span class="math notranslate nohighlight">\(\theta^{-}\)</span>  는 과거 <span class="math notranslate nohighlight">\(\theta\)</span> 에 대한 running average 이므로 Algorithm 2 가 수렴할 시 이 둘은 일치하게 됩니다. 즉, target network <span class="math notranslate nohighlight">\(f_{\theta^-}\)</span>와 online network <span class="math notranslate nohighlight">\(f_{\theta}\)</span>  가 일치하게 됩니다.</p>
-</section>
-<section id="training-consistency-models-in-isolation">
-<h2>5. Training Consistency Models in Isolation<a class="headerlink" href="#training-consistency-models-in-isolation" title="Permalink to this heading">#</a></h2>
-<p>Consistency Distillation 방식에서는 ground truth score function <span class="math notranslate nohighlight">\(\nabla \log p_t(x_t)\)</span> 을 근사하는 사전학습된 score model <span class="math notranslate nohighlight">\(s_{\phi}(x,t)\)</span> 에 의존했다면, 이에 의존하지 않고 다음과 같은  <span class="math notranslate nohighlight">\(\nabla \log p_t(x_t)\)</span> 에 대한 unbiased estimator 를 활용할 수도 있습니다. 다시 말해, <span class="math notranslate nohighlight">\(-(x_t - x)/t^2\)</span> 로 <span class="math notranslate nohighlight">\(\nabla \log p_t(x_t)\)</span> 를 근사할 수 있습니다.</p>
-<figure class="align-default" id="id19">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/consistency_models_18.png"><img alt="consistency_models_18" class="bg-primary mb-1" src="../../_images/consistency_models_18.png" style="width: 500px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 497 </span><span class="caption-text">Unbiased Estimator of Score Function</span><a class="headerlink" href="#id19" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p><span class="math notranslate nohighlight">\(x \sim p_{data}, x_t \sim N(x; t^2I)\)</span></p></li>
-</ul>
-<p>Consistency Training (CT) Loss 는 다음과 같이 정의하고, 이는 사전학습된 diffusion model 파라미터 <span class="math notranslate nohighlight">\(\phi\)</span>  와는 독립적인 사실을 확인할 수 있습다.</p>
-<div class="math notranslate nohighlight">
-\[
-L_{CT}^{N}(\theta,\theta^{-}) = \mathbb{E}[\lambda(t_n)d(f_{\theta}(x + t_{n+1}z, t_{n+1}), f_{\theta^-}(x+t_nz, t_n))]
-\]</div>
-<ul>
-<li><p><span class="math notranslate nohighlight">\(x \sim p_{data}, n \sim \mathbb{U}[[1,N-1]], x_{t_{n+1}} \sim N(x;t^2_{n+1}I), z \sim N(0,I)\)</span></p></li>
-<li><p>Consistency Training Loss 를 위와 같이 정의하게 된 배경은 다음과 같습니다.</p>
-<figure class="align-default" id="id20">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/consistency_models_19.png"><img alt="consistency_models_19" class="bg-primary mb-1" src="../../_images/consistency_models_19.png" style="width: 500px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 498 </span><span class="caption-text">Consistency Training Loss</span><a class="headerlink" href="#id20" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</li>
-</ul>
-<figure class="align-default" id="id21">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/consistency_models_20.png"><img alt="consistency_models_20" class="bg-primary mb-1" src="../../_images/consistency_models_20.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 499 </span><span class="caption-text">Schedule Functions for Consistency Training</span><a class="headerlink" href="#id21" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>논문에서 추가적인 성능 개선을 위해 <span class="math notranslate nohighlight">\(N\)</span> 을 점진적으로 증가시키는 방식을 제안합니다. <span class="math notranslate nohighlight">\(N\)</span>  이 작을수록 (<span class="math notranslate nohighlight">\(i.e., \Delta t\)</span> 가 클수록) consistency training loss 가 consistency distillation loss 와 비교했을때 variance 는 감소하지만 bias 는 증가하게 되어, 초기에 더 빠른 convergence 에 이르는데 용이합니다. 반면에, <span class="math notranslate nohighlight">\(N\)</span> 이 클수록 (<span class="math notranslate nohighlight">\(i.e., \Delta t\)</span> 가 작을수록) variance 는 증가하고 bias 는 감소하게 되어, 학습의 마지막 단계에 적용하는 것이 바람직하다고 제안합니다. EMA decay rate schedule function <span class="math notranslate nohighlight">\(\mu(\cdot)\)</span> 도 <span class="math notranslate nohighlight">\(N\)</span>  에 따라 바뀌는 함수로 최종적으로 정의합니다. 전체적인 학습 절차는 하단의 Algorithm 3 처럼 정리할 수 있습니다.</p>
-<figure class="align-default" id="id22">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/consistency_models_21.png"><img alt="consistency_models_21" class="bg-primary mb-1" src="../../_images/consistency_models_21.png" style="width: 500px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 500 </span><span class="caption-text">Overview of Consistency Training (CT)</span><a class="headerlink" href="#id22" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</section>
-<section id="experiments">
-<h2>6. Experiments<a class="headerlink" href="#experiments" title="Permalink to this heading">#</a></h2>
-<p>논문에서 CIFAR-10, ImageNet 64x64, LSUN Bedroom 256x256, 그리고 LSUN Cat 256x256 데이터셋에 consistency distillation, consistency training 두 학습 방식을 모두 실험하였고, 모델 성능 지표는 FID, IS, Precision, 그리고 Recall 을 사용하였습니다. 모델 architecture 는 CIFAR-10 데이터셋에는 NCSN++, 그리고 그 외 데이터셋에는 ADM 모델을 사용하였습니다.</p>
-<section id="training-consistency-models">
-<h3>6.1. Training Consistency Models<a class="headerlink" href="#training-consistency-models" title="Permalink to this heading">#</a></h3>
-<p>CIFAR-10 데이터셋에 다음과 같은 hyperparameter tuning 작업을 진행하였습니다. (metric function <span class="math notranslate nohighlight">\(d(\cdot,\cdot)\)</span>, ODE solver,  CD (Consistency Distillation) 에서의 number of discretization steps <span class="math notranslate nohighlight">\(N\)</span>, 그리고 CT (Consistency Training)) 에서의 schedule functions <span class="math notranslate nohighlight">\(N(\cdot), \mu(\cdot)\)</span>)</p>
-<p><strong>Parameter Initialization</strong></p>
-<p>모델 초기 파라미터 값은 다음과 같이 설정하였습니다.</p>
-<ul class="simple">
-<li><p>Consistency Distillation - 사전학습된 diffusion model 파라미터 값</p></li>
-<li><p>Consistency Training - Random Initialization</p></li>
-</ul>
-<p><strong>Results</strong></p>
-<p>모델 실험 결과를 다음과 같이 정리할 수 있습니다.</p>
-<figure class="align-default" id="id23">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/consistency_models_22.png"><img alt="consistency_models_22" class="bg-primary mb-1" src="../../_images/consistency_models_22.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 501 </span><span class="caption-text">Experimental Results - Hyperparameters</span><a class="headerlink" href="#id23" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul>
-<li><p>Consistency Distillation (CD)</p>
-<ul class="simple">
-<li><p>LPIPS 를 metric function 으로 사용했을때 모델 성능이 가장 좋았고, 이는 LPIPS metric 이 CIFAR-10 와 같은 natural image 들 간의 유사도를 측정하는데 특화되어 있기 때문이라고 합니다.</p></li>
-<li><p>Euler ODE solver 보다 Heun ODE solver 를 사용했을 때, 그리고 <span class="math notranslate nohighlight">\(N = 18\)</span> 로 설정했을때 모델 성능이 가장 좋았습니다. 또한, 동일한 <span class="math notranslate nohighlight">\(N\)</span> 에 대해서 Heun’s second ode solver 를 사용했을때 Euler’s first ode solver 를 사용했을 때보다 모델 성능이 우월한 부분을 확인할 수 있었다고 합니다.</p></li>
-</ul>
-<p>이외에도 다른 데이터셋에 hyperparameter tuning 작업을 별도로 진행하였습니다.</p>
-</li>
-<li><p>Consistency Training (CT)</p>
-<ul class="simple">
-<li><p>CD 와 동일하게 LPIPS metric function 사용하였고, ODE solver 는 사용하지 않았습니다.</p></li>
-<li><p><span class="math notranslate nohighlight">\(N\)</span> 이 작을수록, 모델이 더 빨리 수렴하지만 생성된 이미지 퀄리티는 좋지 않은 부분을 재차 확인할 수 있습니다. (and vice versa)</p></li>
-<li><p><span class="math notranslate nohighlight">\(N\)</span> 을 점차적으로 증가시키면서 <span class="math notranslate nohighlight">\(\mu\)</span> 도 변화시켰을때 성능이 가장 좋았습니다.</p></li>
-</ul>
-</li>
-</ul>
-<section id="few-step-image-generation">
-<h4>6.2. Few-Step Image Generation<a class="headerlink" href="#few-step-image-generation" title="Permalink to this heading">#</a></h4>
-<p><strong>Distillation</strong></p>
-<p>논문에서는 Consistency Distillation 모델의 성능을 synthetic data generation 을 필요하지 않는 knowledge distillation 기법 (PD, <a class="reference external" href="https://arxiv.org/abs/2202.00512">Salimans &amp; Ho (2022)</a>) 과 다음과 같이 비교합니다.</p>
-<figure class="align-default" id="id24">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/consistency_models_23.png"><img alt="consistency_models_23" class="bg-primary mb-1" src="../../_images/consistency_models_23.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 502 </span><span class="caption-text">Experimental Results - Distillation</span><a class="headerlink" href="#id24" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p><strong>Direct Generation</strong></p>
-<p>CIFAR-10 데이터셋 기준으로 VAE, normalizing flow 를 비롯한 타 single-step generative model 보다 CT 가 성능이 가장 좋았습니다. 또한, distillation 기법 없이도 Progressive Distillation (PD, <a class="reference external" href="https://arxiv.org/abs/2202.00512">Salimans &amp; Ho (2022)</a>) 와 견줄만한 성능을 가진 부분을 확인할 수 있습니다. 마지막으로 동일한 noise 로부터 높은 structural similarity 를 가진 이미지들을 생성함으로써 self-consistency 성질도 확인할 수 있었다고 합니다.</p>
-<figure class="align-default" id="id25">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/consistency_models_24.png"><img alt="consistency_models_24" class="bg-primary mb-1" src="../../_images/consistency_models_24.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 503 </span><span class="caption-text">Experimental Results - Direct Generation</span><a class="headerlink" href="#id25" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</section>
-</section>
-<section id="zero-shot-image-editing">
-<h3>6.3. Zero-Shot Image Editing<a class="headerlink" href="#zero-shot-image-editing" title="Permalink to this heading">#</a></h3>
-<p>Diffusion model 과 유사하게 consistency model 도 multistep sampling 알고리즘을 수정함으로써 zero shot image editing 이 가능합니다. 해당 사진은 LSUN Bedroom 데이터셋에 colorization, super-resolution, stroke-guided image editing task 를 적용한 결과입니다.</p>
-<figure class="align-default" id="id26">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/consistency_models_25.png"><img alt="consistency_models_25" class="bg-primary mb-1" src="../../_images/consistency_models_25.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 504 </span><span class="caption-text">Pseudocode for Zero-Shot Image Editing</span><a class="headerlink" href="#id26" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<figure class="align-default" id="id27">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/consistency_models_26.png"><img alt="consistency_models_26" class="bg-primary mb-1" src="../../_images/consistency_models_26.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 505 </span><span class="caption-text">Zero-Shot Image Editing Results</span><a class="headerlink" href="#id27" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</section>
-</section>
-</section>
-
-    <script type="text/x-thebe-config">
-    {
-        requestKernel: true,
-        binderOptions: {
-            repo: "binder-examples/jupyter-stacks-datascience",
-            ref: "master",
-        },
-        codeMirrorConfig: {
-            theme: "abcdef",
-            mode: "python"
-        },
-        kernelOptions: {
-            name: "python3",
-            path: "./docs/review"
-        },
-        predefinedOutput: true
-    }
-    </script>
-    <script>kernelName = 'python3'</script>
-
-                </article>
-              
-
-              
-              
-              
-              
-                <footer class="prev-next-footer d-print-none">
-                  
-<div class="prev-next-area">
-    <a class="left-prev"
-       href="GIGAGAN.html"
-       title="previous page">
-      <i class="fa-solid fa-angle-left"></i>
-      <div class="prev-next-info">
-        <p class="prev-next-subtitle">previous</p>
-        <p class="prev-next-title">Scaling up GANs for Text-to-Image Synthesis</p>
-      </div>
-    </a>
-    <a class="right-next"
-       href="latent_consistency_models.html"
-       title="next page">
-      <div class="prev-next-info">
-        <p class="prev-next-subtitle">next</p>
-        <p class="prev-next-title">Latent Consistency Models</p>
-      </div>
-      <i class="fa-solid fa-angle-right"></i>
-    </a>
-</div>
-                </footer>
-              
-            </div>
-            
-            
-              
-                <div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
-
-
-  <div class="sidebar-secondary-item">
-  <div class="page-toc tocsection onthispage">
-    <i class="fa-solid fa-list"></i> Contents
-  </div>
-  <nav class="bd-toc-nav page-toc">
-    <ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">1. Introduction</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#diffusion-models">2. Diffusion Models</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#id1">3.  Consistency Models</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#training-consistency-models-via-distillation">4. Training Consistency Models via Distillation</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#training-consistency-models-in-isolation">5. Training Consistency Models in Isolation</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#experiments">6. Experiments</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#training-consistency-models">6.1. Training Consistency Models</a><ul class="nav section-nav flex-column">
-<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#few-step-image-generation">6.2. Few-Step Image Generation</a></li>
-</ul>
-</li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#zero-shot-image-editing">6.3. Zero-Shot Image Editing</a></li>
-</ul>
-</li>
-</ul>
-  </nav></div>
-
-</div></div>
-              
-            
-          </div>
-          <footer class="bd-footer-content">
-            
-<div class="bd-footer-content__inner container">
-  
-  <div class="footer-item">
-    
-<p class="component-author">
-By PseudoLab
-</p>
-
-  </div>
-  
-  <div class="footer-item">
-    
-
-  <p class="copyright">
-    
-      © Copyright 2022.
-      <br/>
-    
-  </p>
-
-  </div>
-  
-  <div class="footer-item">
-    
-  </div>
-  
-  <div class="footer-item">
-    
-  </div>
-  
-</div>
-          </footer>
-        
-
-      </main>
-    </div>
-  </div>
-  
-  <!-- Scripts loaded after <body> so the DOM is not blocked -->
-  <script src="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b"></script>
-<script src="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b"></script>
-
-  <footer class="bd-footer">
-  </footer>
-  </body>
+
+
+<!DOCTYPE html>
+
+
+<html lang="en" data-content_root="" >
+
+  <head>
+    <meta charset="utf-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.17.1: http://docutils.sourceforge.net/" />
+
+    <title>Consistency Models &#8212; Text-to-Image Generation-feat-Diffusion</title>
+  
+  
+  
+  <script data-cfasync="false">
+    document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
+    document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
+  </script>
+  
+  <!-- Loaded before other Sphinx assets -->
+  <link href="../../_static/styles/theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+<link href="../../_static/styles/bootstrap.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+<link href="../../_static/styles/pydata-sphinx-theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+
+  
+  <link href="../../_static/vendor/fontawesome/6.5.2/css/all.min.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+  <link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.woff2" />
+<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.woff2" />
+<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.woff2" />
+
+    <link rel="stylesheet" type="text/css" href="../../_static/pygments.css" />
+    <link rel="stylesheet" href="../../_static/styles/sphinx-book-theme.css?digest=14f4ca6b54d191a8c7657f6c759bf11a5fb86285" type="text/css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/togglebutton.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/mystnb.4510f1fc1dee50b3e5859aac5469c37c29e427902b24a333a5f9fcb2f0b3ac41.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/sphinx-thebe.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/design-style.4045f2051d55cab465a707391d5b2007.min.css" />
+  
+  <!-- Pre-loaded scripts that we'll load fully later -->
+  <link rel="preload" as="script" href="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b" />
+<link rel="preload" as="script" href="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b" />
+  <script src="../../_static/vendor/fontawesome/6.5.2/js/all.min.js?digest=dfe6caa3a7d634c4db9b"></script>
+
+    <script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
+    <script src="../../_static/jquery.js"></script>
+    <script src="../../_static/underscore.js"></script>
+    <script src="../../_static/_sphinx_javascript_frameworks_compat.js"></script>
+    <script src="../../_static/doctools.js"></script>
+    <script src="../../_static/clipboard.min.js"></script>
+    <script src="../../_static/copybutton.js"></script>
+    <script src="../../_static/scripts/sphinx-book-theme.js?digest=5a5c038af52cf7bc1a1ec88eea08e6366ee68824"></script>
+    <script>let toggleHintShow = 'Click to show';</script>
+    <script>let toggleHintHide = 'Click to hide';</script>
+    <script>let toggleOpenOnPrint = 'true';</script>
+    <script src="../../_static/togglebutton.js"></script>
+    <script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
+    <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
+    <script src="../../_static/design-tabs.js"></script>
+    <script>const THEBE_JS_URL = "https://unpkg.com/thebe@0.8.2/lib/index.js"
+const thebe_selector = ".thebe,.cell"
+const thebe_selector_input = "pre"
+const thebe_selector_output = ".output, .cell_output"
+</script>
+    <script async="async" src="../../_static/sphinx-thebe.js"></script>
+    <script>window.MathJax = {"options": {"processHtmlClass": "tex2jax_process|mathjax_process|math|output_area"}}</script>
+    <script defer="defer" src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
+    <script>DOCUMENTATION_OPTIONS.pagename = 'docs/review/consistency_models';</script>
+    <link rel="shortcut icon" href="../../_static/PseudoLab_logo.png"/>
+    <link rel="index" title="Index" href="../../genindex.html" />
+    <link rel="search" title="Search" href="../../search.html" />
+    <link rel="next" title="Latent Consistency Models" href="latent_consistency_models.html" />
+    <link rel="prev" title="Scaling up GANs for Text-to-Image Synthesis" href="GIGAGAN.html" />
+  <meta name="viewport" content="width=device-width, initial-scale=1"/>
+  <meta name="docsearch:language" content="en"/>
+  </head>
+  
+  
+  <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
+
+  
+  
+  <div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
+  
+  <div id="pst-scroll-pixel-helper"></div>
+  
+  <button type="button" class="btn rounded-pill" id="pst-back-to-top">
+    <i class="fa-solid fa-arrow-up"></i>Back to top</button>
+
+  
+  <input type="checkbox"
+          class="sidebar-toggle"
+          id="pst-primary-sidebar-checkbox"/>
+  <label class="overlay overlay-primary" for="pst-primary-sidebar-checkbox"></label>
+  
+  <input type="checkbox"
+          class="sidebar-toggle"
+          id="pst-secondary-sidebar-checkbox"/>
+  <label class="overlay overlay-secondary" for="pst-secondary-sidebar-checkbox"></label>
+  
+  <div class="search-button__wrapper">
+    <div class="search-button__overlay"></div>
+    <div class="search-button__search-container">
+<form class="bd-search d-flex align-items-center"
+      action="../../search.html"
+      method="get">
+  <i class="fa-solid fa-magnifying-glass"></i>
+  <input type="search"
+         class="form-control"
+         name="q"
+         id="search-input"
+         placeholder="Search this book..."
+         aria-label="Search this book..."
+         autocomplete="off"
+         autocorrect="off"
+         autocapitalize="off"
+         spellcheck="false"/>
+  <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
+</form></div>
+  </div>
+
+  <div class="pst-async-banner-revealer d-none">
+  <aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
+</div>
+
+  
+    <header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
+    </header>
+  
+
+  <div class="bd-container">
+    <div class="bd-container__inner bd-page-width">
+      
+      
+      
+      <div class="bd-sidebar-primary bd-sidebar">
+        
+
+  
+  <div class="sidebar-header-items sidebar-primary__section">
+    
+    
+    
+    
+  </div>
+  
+    <div class="sidebar-primary-items__start sidebar-primary__section">
+        <div class="sidebar-primary-item">
+
+  
+    
+  
+
+<a class="navbar-brand logo" href="../../intro.html">
+  
+  
+  
+  
+  
+    
+    
+      
+    
+    
+    <img src="../../_static/PseudoLab_logo.png" class="logo__image only-light" alt="Text-to-Image Generation-feat-Diffusion - Home"/>
+    <script>document.write(`<img src="../../_static/PseudoLab_logo.png" class="logo__image only-dark" alt="Text-to-Image Generation-feat-Diffusion - Home"/>`);</script>
+  
+  
+</a></div>
+        <div class="sidebar-primary-item"><nav class="bd-links" id="bd-docs-nav" aria-label="Main">
+    <div class="bd-toc-item navbar-nav active">
+        
+        <ul class="nav bd-sidenav bd-sidenav__home-link">
+            <li class="toctree-l1">
+                <a class="reference internal" href="../../intro.html">
+                    Welcome to PseudoDiffusers!!
+                </a>
+            </li>
+        </ul>
+        <p aria-level="2" class="caption" role="heading"><span class="caption-text">Preliminary Works</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="vae.html">VAE</a></li>
+<li class="toctree-l1"><a class="reference internal" href="gan.html">GAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DDPM.html">DDPM</a></li>
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DDIM.html">DDIM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="A_Study_on_the_Evaluation_of_Generative_Models.html">A Study on the Evaluation of Generative Models</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Image Generation</span></p>
+<ul class="current nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="cycleGAN.html">CycleGAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="StyleGAN.html">StyleGAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="diffusion_beats_GANs.html">Diffusion Models Beat GANs on Image Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="dalle.html">DALL-E</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DALLE2.html">DALL-E 2</a></li>
+<li class="toctree-l1"><a class="reference internal" href="dreambooth.html">DreamBooth</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ControlNet.html">ControlNet</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Latent_Diffusion_Model.html">Introduction</a></li>
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Textual_Inversion.html">Textual Inversion</a></li>
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="CustomDiffusion.html">Custom Diffusion</a></li>
+<li class="toctree-l1"><a class="reference internal" href="LoRA.html">LoRA</a></li>
+
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="I-DDPM.html">I-DDPM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="StyO.html">StyO</a></li>
+<li class="toctree-l1"><a class="reference internal" href="imagen.html">Imagen</a></li>
+<li class="toctree-l1"><a class="reference internal" href="imagen_editor.html">Imagen Editor</a></li>
+<li class="toctree-l1"><a class="reference internal" href="SDEdit.html">SDEdit</a></li>
+<li class="toctree-l1"><a class="reference internal" href="SDXL.html">SDXL</a></li>
+<li class="toctree-l1"><a class="reference internal" href="t2i_adapter.html">T2I-Adapter</a></li>
+<li class="toctree-l1"><a class="reference internal" href="IP_Adapter.html">IP-Adapter</a></li>
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="HyperDreamBooth.html">HyperDreamBooth</a></li>
+<li class="toctree-l1"><a class="reference internal" href="CM3leon.html">CM3leon</a></li>
+
+<li class="toctree-l1"><a class="reference internal" href="Synthetic_Data_from_Diffusion_Models_Improves_ImageNet_Classification.html">Synthetic Data from Diffusion Models Improves ImageNet Classification</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="GLIDE.html">GLIDE</a></li>
+<li class="toctree-l1"><a class="reference internal" href="BBDM.html">BBDM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Your_Diffusion_Model_is_Secretly_a_Zero_Shot_Classifier.html">Your Diffusion Model is Secretly a Zero-Shot Classifier</a></li>
+<li class="toctree-l1"><a class="reference internal" href="progressive_distillation.html">Progressive Distillation for Fast Sampling of Diffusion Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ConceptLab.html">ConceptLab</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Diffusion_models_already_have_a_Semantic_Latent_Space.html">Diffusion Models already have a Semantic Latent Space</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Muse.html">Muse</a></li>
+
+
+<li class="toctree-l1"><a class="reference internal" href="GIGAGAN.html">Scaling up GANs for Text-to-Image Synthesis</a></li>
+<li class="toctree-l1 current active"><a class="current reference internal" href="#">Consistency Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="latent_consistency_models.html">Latent Consistency Models</a></li>
+
+<li class="toctree-l1"><a class="reference internal" href="LLM_grounded_Diffusion.html">LLM Grounded Diffusion</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DiT.html">DiT</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="one-step-image-translation.html">One-Step Image Translation with Text-to-Image Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="LCM-LoRA.html">LCM-LoRA: A Universal Stable-Diffusion Acceleration Module</a></li>
+
+
+<li class="toctree-l1"><a class="reference internal" href="MimicBrush.html">MimicBrush: Zero-shot Image Editing with Reference Imitation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="one_step_diffusion_with_distribution_matching_distillation.html">One-step Diffusion with Distribution Matching Distillation</a></li>
+
+
+
+
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Video Generation</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="Make_A_Video.html">Make A Video</a></li>
+<li class="toctree-l1"><a class="reference internal" href="VideoLDM.html">VideoLDM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="AnimateDiff.html">AnimateDiff</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Animate_Anyone.html">Animate Anyone</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreaMoving.html">DreaMoving</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreamPose.html">DreamPose: Fashion Image-to-Video Synthesis via Stable Diffusion</a></li>
+
+
+
+
+
+
+
+
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">3D Generation</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="NeRF.html">NeRF : Representing Scenes as Neural Radiance Fields for View Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="3DGS.html">3D Gaussian Splatting for Real-Time Radiance Field Rendering</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Point_E.html">Point-E: A System for Generating 3D Point Clouds from Complex Prompts (Arxiv 2022)</a></li>
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Shap-E.html">Shap-E</a></li>
+
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DreamFusion.html"><strong>DreamFusion</strong></a></li>
+<li class="toctree-l1"><a class="reference internal" href="magic-3d.html">Magic3D</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreamBooth3D.html">Dream Booth 3D</a></li>
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="zero123plus.html">Zero123++</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ProlificDreamer.html">ProlificDreamer: High-Fidelity and Diverse Text-to-3D Generation with Variational Score Distillation</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DreamGaussian.html">DreamGaussian</a></li>
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Coin3D.html">Coin3D</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Experiments</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="../experiments/js_exp.html">Synthetic Data with Stable Diffusion for Foliar Disease Classification</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../experiments/swjo_exp.html">Training DreamBooth on Naver Webtoon Face Dataset</a></li>
+</ul>
+
+    </div>
+</nav></div>
+    </div>
+  
+  
+  <div class="sidebar-primary-items__end sidebar-primary__section">
+  </div>
+  
+  <div id="rtd-footer-container"></div>
+
+
+      </div>
+      
+      <main id="main-content" class="bd-main" role="main">
+        
+        
+
+<div class="sbt-scroll-pixel-helper"></div>
+
+          <div class="bd-content">
+            <div class="bd-article-container">
+              
+              <div class="bd-header-article d-print-none">
+<div class="header-article-items header-article__inner">
+  
+    <div class="header-article-items__start">
+      
+        <div class="header-article-item"><label class="sidebar-toggle primary-toggle btn btn-sm" for="__primary" title="Toggle primary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
+  <span class="fa-solid fa-bars"></span>
+</label></div>
+      
+    </div>
+  
+  
+    <div class="header-article-items__end">
+      
+        <div class="header-article-item">
+
+<div class="article-header-buttons">
+
+
+
+
+
+<div class="dropdown dropdown-source-buttons">
+  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Source repositories">
+    <i class="fab fa-github"></i>
+  </button>
+  <ul class="dropdown-menu">
+      
+      
+      
+      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion" target="_blank"
+   class="btn btn-sm btn-source-repository-button dropdown-item"
+   title="Source repository"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fab fa-github"></i>
+  </span>
+<span class="btn__text-container">Repository</span>
+</a>
+</li>
+      
+      
+      
+      
+      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion/issues/new?title=Issue%20on%20page%20%2Fdocs/review/consistency_models.html&body=Your%20issue%20content%20here." target="_blank"
+   class="btn btn-sm btn-source-issues-button dropdown-item"
+   title="Open an issue"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-lightbulb"></i>
+  </span>
+<span class="btn__text-container">Open issue</span>
+</a>
+</li>
+      
+  </ul>
+</div>
+
+
+
+
+
+
+<div class="dropdown dropdown-download-buttons">
+  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Download this page">
+    <i class="fas fa-download"></i>
+  </button>
+  <ul class="dropdown-menu">
+      
+      
+      
+      <li><a href="../../_sources/docs/review/consistency_models.md" target="_blank"
+   class="btn btn-sm btn-download-source-button dropdown-item"
+   title="Download source file"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-file"></i>
+  </span>
+<span class="btn__text-container">.md</span>
+</a>
+</li>
+      
+      
+      
+      
+      <li>
+<button onclick="window.print()"
+  class="btn btn-sm btn-download-pdf-button dropdown-item"
+  title="Print to PDF"
+  data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-file-pdf"></i>
+  </span>
+<span class="btn__text-container">.pdf</span>
+</button>
+</li>
+      
+  </ul>
+</div>
+
+
+
+
+<button onclick="toggleFullScreen()"
+  class="btn btn-sm btn-fullscreen-button"
+  title="Fullscreen mode"
+  data-bs-placement="bottom" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-expand"></i>
+  </span>
+
+</button>
+
+
+
+<script>
+document.write(`
+  <button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light"></i>
+    <i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark"></i>
+    <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"></i>
+  </button>
+`);
+</script>
+
+
+<script>
+document.write(`
+  <button class="btn btn-sm pst-navbar-icon search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <i class="fa-solid fa-magnifying-glass fa-lg"></i>
+  </button>
+`);
+</script>
+<label class="sidebar-toggle secondary-toggle btn btn-sm" for="__secondary"title="Toggle secondary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <span class="fa-solid fa-list"></span>
+</label>
+</div></div>
+      
+    </div>
+  
+</div>
+</div>
+              
+              
+
+<div id="jb-print-docs-body" class="onlyprint">
+    <h1>Consistency Models</h1>
+    <!-- Table of contents -->
+    <div id="print-main-content">
+        <div id="jb-print-toc">
+            
+            <div>
+                <h2> Contents </h2>
+            </div>
+            <nav aria-label="Page">
+                <ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">1. Introduction</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#diffusion-models">2. Diffusion Models</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#id1">3.  Consistency Models</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#training-consistency-models-via-distillation">4. Training Consistency Models via Distillation</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#training-consistency-models-in-isolation">5. Training Consistency Models in Isolation</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#experiments">6. Experiments</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#training-consistency-models">6.1. Training Consistency Models</a><ul class="nav section-nav flex-column">
+<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#few-step-image-generation">6.2. Few-Step Image Generation</a></li>
+</ul>
+</li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#zero-shot-image-editing">6.3. Zero-Shot Image Editing</a></li>
+</ul>
+</li>
+</ul>
+            </nav>
+        </div>
+    </div>
+</div>
+
+              
+                
+<div id="searchbox"></div>
+                <article class="bd-article">
+                  
+  <div class="admonition-information admonition">
+<p class="admonition-title">Information</p>
+<ul class="simple">
+<li><p><strong>Title:</strong> Consistency Models (ICML 2023)</p></li>
+<li><p><strong>Reference</strong></p>
+<ul>
+<li><p>Paper: <a class="reference external" href="https://arxiv.org/pdf/2303.01469">https://arxiv.org/pdf/2303.01469</a></p></li>
+<li><p>Code: <a class="github reference external" href="https://github.com/openai/consistency_models">openai/consistency_models</a></p></li>
+</ul>
+</li>
+<li><p><strong>Author:</strong> Sangwoo Jo</p></li>
+<li><p><strong>Last updated on Apr. 26, 2024</strong></p></li>
+</ul>
+</div>
+<section class="tex2jax_ignore mathjax_ignore" id="consistency-models">
+<h1>Consistency Models<a class="headerlink" href="#consistency-models" title="Permalink to this heading">#</a></h1>
+<section id="introduction">
+<h2>1. Introduction<a class="headerlink" href="#introduction" title="Permalink to this heading">#</a></h2>
+<p>Diffusion Models 은 image generation, audio synthesis, video generation 등의 다양한 분야에 연구가 진행되어 왔습니다. 하지만 single-step generative model 인 VAE, GAN, 그리고 normalizing flows 에 비해 추론 속도가 10-2000배 더 많은 연산작업을 요하는 치명적인 단점이 존재합니다.</p>
+<p>이러한 문제를 해결하기 위해 논문에서 Consistency Model 을 소개합니다.</p>
+<figure class="align-default" id="id2">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/consistency_models_01.png"><img alt="consistency_models_01" class="bg-primary mb-1" src="../../_images/consistency_models_01.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 480 </span><span class="caption-text">Overview of Consistency Models</span><a class="headerlink" href="#id2" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>위의 사진처럼 주어진 PF ODE (Probability Flow Ordinary Differential Equation) 에 대해서 동일한 trajectory 에 있는 point 들이 동일한 시작점으로 매핑되도록 모델을 학습하는 방식을 제안하고, 이러한 self-consistency 특성을 만족시킬 수 있도록 2가지 학습 방식을 소개합니다.</p>
+<ol class="arabic simple">
+<li><p>첫번째 방식으로는 우선적으로 numerical ODE solver 와 사전 학습된 diffusion model을 사용하여 PF ODE trajectory 에서 인접한 point 쌍을 생성합니다. 그리고 이러한 쌍에 대한 모델 출력 간의 차이를 최소화하도록 모델을 학습함으로써 diffusion model 을 consistency model 로 효과적으로 knowledge distillation 을 적용할 수 있고, 단 한번의 step 만으로도 high quality sample 을 생성할 수 있게 됩니다.</p></li>
+<li><p>두번째 방식으로는 사전학습된 diffusion model 에 의존하지 않고 독립적으로 consistency model 을 학습하는 방식입니다.</p></li>
+</ol>
+<p>CIFAR-10, ImageNet 64x64, LSUN 256x256 데이터셋에 실험한 결과, 기존 distillation 기법을 적용한 모델 (i.e., progressive distillation) 보다 성능이 개선되고, 독립적인 모델로서도 사전학습된 diffusion model 없이 GAN 을 제외한 single-step generative model 보다 성능이 좋다고 합니다. 마지막으로 다양한 zero-shot data editing (image denoising, interpolation, inpainting, colorization, super-resolution, stroke-guided image editing) task 에도 consistency model 이 좋은 성능을 보여준다는 것을 확인하였습니다.</p>
+</section>
+<section id="diffusion-models">
+<h2>2. Diffusion Models<a class="headerlink" href="#diffusion-models" title="Permalink to this heading">#</a></h2>
+<p><strong>Diffusion Models in Continuous Time</strong></p>
+<p>Continuous time 에서의 diffusion model 을 다음과 같이 SDE 수식으로 표현할 수 있습니다. (<a class="reference external" href="https://arxiv.org/abs/2011.13456">Song et al., 2021</a>)</p>
+<figure class="align-default" id="id3">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/consistency_models_02.png"><img alt="consistency_models_02" class="bg-primary mb-1" src="../../_images/consistency_models_02.png" style="width: 500px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 481 </span><span class="caption-text">Diffusion Models in Continuous Time</span><a class="headerlink" href="#id3" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p><span class="math notranslate nohighlight">\(t \in [0,T], T &gt; 0\)</span></p></li>
+<li><p><span class="math notranslate nohighlight">\(\mu(\cdot,\cdot), \sigma(\cdot)\)</span> := drift, diffusion coefficients</p></li>
+<li><p><span class="math notranslate nohighlight">\(\{w_t\}_{t \in [0,T]}\)</span> := standard Brownian motion</p></li>
+</ul>
+<p>그리고 해당 SDE 는 아래 식과 같은 PF ODE 로 표현할 수 있다는 성질을 가지고 있습니다. 이때 <span class="math notranslate nohighlight">\(\nabla \log p_t(x_t)\)</span> 를 <em>score function</em> 이라고 하고, 시점 t 에 대한 solution trajectory 가 <span class="math notranslate nohighlight">\(p_t(x)\)</span> 에 따라 분포합니다.</p>
+<figure class="align-default" id="id4">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/consistency_models_03.png"><img alt="consistency_models_03" class="bg-primary mb-1" src="../../_images/consistency_models_03.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 482 </span><span class="caption-text">Probability Flow (PF) ODE</span><a class="headerlink" href="#id4" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>일반적으로 위의 SDE 수식에서 <span class="math notranslate nohighlight">\(p_T(x)\)</span> 가 Gaussian distribution <span class="math notranslate nohighlight">\(\pi(x)\)</span> 를 따르도록 정의하고, 해당 논문에서 <a class="reference external" href="https://arxiv.org/abs/2206.00364">Karras et al., 2022</a> 와 동일하게 <span class="math notranslate nohighlight">\(\mu(x,t) = 0, \sigma(t) = \sqrt{2}t\)</span> 로 설정하였습니다. 그리고 sampling 시, score matching 을 통해 score model <span class="math notranslate nohighlight">\(s_{\phi}(x,t) \approx \nabla \log p_t(x)\)</span> 우선적으로 학습한 후, 위의 식에 대입하면 다음과 같은 empirical PF ODE 로 표현할 수 있습니다</p>
+<figure class="align-default" id="id5">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/consistency_models_04.png"><img alt="consistency_models_04" class="bg-primary mb-1" src="../../_images/consistency_models_04.png" style="width: 400px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 483 </span><span class="caption-text">Empirical PF ODE</span><a class="headerlink" href="#id5" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>다음 과정으로는 initial condition <span class="math notranslate nohighlight">\(x_T \sim N(0, T^2I)\)</span>  을 기반으로 Euler 나 Heun 등의 numerical ode solver 를 통해 역방향으로 풀어 solution trajectory <span class="math notranslate nohighlight">\(\{x_t\}_{t \in [0,T]}\)</span><span class="math notranslate nohighlight">\({x_t}\)</span><span class="math notranslate nohighlight">\(\hat{x}_0\)</span><span class="math notranslate nohighlight">\({x_t}\)</span><span class="math notranslate nohighlight">\(p_{data}(x)\)</span> 로부터 나온 샘플에 대한 근사값이라고 할 수 있습니다.</p>
+<p>추가적으로 논문에서 numerical instability 를 방지하기 위해 고정된 작은 양수 <span class="math notranslate nohighlight">\(\epsilon\)</span> 에 대해 <span class="math notranslate nohighlight">\(t = \epsilon\)</span> 에서 solver 를 멈추고 <span class="math notranslate nohighlight">\(\hat{x}_{\epsilon}\)</span> 를 <span class="math notranslate nohighlight">\(p_{data}(x)\)</span> 에 대한 근사값으로 간주합니다. 그리고 <a class="reference external" href="https://arxiv.org/abs/2206.00364">Karras et al., 2022</a> 와 동일하게 이미지 픽셀 값을 <span class="math notranslate nohighlight">\([-1,1]\)</span> 로 rescale 하고  <span class="math notranslate nohighlight">\(T = 80, \epsilon = 0.002\)</span> 로 설정합니다.</p>
+<p>앞서 소개드린 방식으로 diffusion model 을 통한 sampling 시, ode solver 를 사용하는데 score model <span class="math notranslate nohighlight">\(s_{\phi}(x_t,t)\)</span> 의 수많은 iterative evaluation 작업이 필요합니다. 빠른 sampling 작업을 위해, 더 빠른 numerical ode solver 에 대한 연구들이 진행되었지만, 이를 활용해도 최소 10번 이상의 evaluation step 을 거쳐야만 competitive 한 성능을 보여준다고 합니다. 여러 distillation 기법들에 대한 연구들도 진행되었지만, <a class="reference external" href="https://arxiv.org/abs/2202.00512">Salimans &amp; Ho (2022)</a> 를 제외하고는 distillation 작업을 하기 위해서 사전에 diffusion model 로부터 대량의 데이터를 수집해야한다는 단점이 있습니다.</p>
+</section>
+<section id="id1">
+<h2>3.  Consistency Models<a class="headerlink" href="#id1" title="Permalink to this heading">#</a></h2>
+<p>논문에서 single-step generation 이 가능한 consistency model 을 제안하고, 학습하는 방식으로 1) 사전학습된 diffusion model 로부터 knowledge distillation 진행하는 방식과 2) 독립적으로 학습하는 방식 을 소개합니다.</p>
+<p><strong>Definition</strong></p>
+<p>주어진 PF ODE 에 대한 trajectory <span class="math notranslate nohighlight">\(\{x_t\}_{t \in [0,T]}\)</span> 에 대해서 다음과 같은 <em>consistency function</em>            <span class="math notranslate nohighlight">\(f : (x_t, t) \mapsto x_{\epsilon}\)</span> 을 정의하고, 함수 <span class="math notranslate nohighlight">\(f\)</span> 는 동일한 trajectory 에 있는 임의의 두 <span class="math notranslate nohighlight">\(t, t' \in [\epsilon, T]\)</span> 에 대해 <span class="math notranslate nohighlight">\(f(x_t, t) = f(x_{t'}, t')\)</span>, 즉 <em>self-consistency</em> 성질을 만족합니다. 이러한 함수 <span class="math notranslate nohighlight">\(f\)</span> 를 예측하기 위해 데이터로부터 <span class="math notranslate nohighlight">\(f_{\theta}\)</span> 을 학습하는 것이 consistency model 의 objective 이며, 이는 <a class="reference external" href="https://arxiv.org/abs/2110.13040">Bilos et al., 2021</a> 에서 정의하는 neural flow 와 유사하지만, consistency model 은 invertible property 를 부여하지는 않습니다.</p>
+<figure class="align-default" id="id6">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/consistency_models_05.png"><img alt="consistency_models_05" class="bg-primary mb-1" src="../../_images/consistency_models_05.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 484 </span><span class="caption-text">Self-Consistency</span><a class="headerlink" href="#id6" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p><strong>Parameterization</strong></p>
+<p>앞서 정의한 부분에 의해, 모든 consistency function <span class="math notranslate nohighlight">\(f\)</span> 에 대해서 <span class="math notranslate nohighlight">\(f(x_{\epsilon}, t) = x_{\epsilon}\)</span> 를 만족해야 하는 <em>boundary condition</em> 이 존재하고, 이를 만족하기 위해 다음과 같이 두 가지 방식으로 parameterization 방식을 정의하였습니다.</p>
+<figure class="align-default" id="id7">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/consistency_models_06.png"><img alt="consistency_models_06" class="bg-primary mb-1" src="../../_images/consistency_models_06.png" style="width: 500px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 485 </span><span class="caption-text">Parameterization Method 1</span><a class="headerlink" href="#id7" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>여기서 <span class="math notranslate nohighlight">\(F_{\theta}(x,t)\)</span> 는 output 차원이 <span class="math notranslate nohighlight">\(x\)</span> 와 동일한 free-form deep neural network 입니다. 두번째 방식으로는, 다음과 같이 skip-connection 을 활용합니다.</p>
+<figure class="align-default" id="id8">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/consistency_models_07.png"><img alt="consistency_models_07" class="bg-primary mb-1" src="../../_images/consistency_models_07.png" style="width: 500px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 486 </span><span class="caption-text">Parameterization Method 2</span><a class="headerlink" href="#id8" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>이때, <span class="math notranslate nohighlight">\(F_{\theta}(x,t), c_{skip}(t), c_{out}(t)\)</span> 는 <span class="math notranslate nohighlight">\(c_{skip}(\epsilon) = 1, c_{out}(\epsilon) = 0\)</span> 조건을 만족시키는 미분 가능한 함수로 정의합니다.</p>
+<figure class="align-default" id="id9">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/consistency_models_08.png"><img alt="consistency_models_08" class="bg-primary mb-1" src="../../_images/consistency_models_08.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 487 </span><span class="caption-text">Parameterization Method 2 - <span class="math notranslate nohighlight">\(c_{skip}(t), c_{out}(t)\)</span></span><a class="headerlink" href="#id9" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>두번째 parameterization 기법이 diffusion model 형식과 매우 유사하여 이러한 diffusion model 아키텍쳐를 leverage 하기 위해 논문에서 두번째 방식으로 parameterization 을 하는 방식을 택합니다.</p>
+<p><strong>Sampling</strong></p>
+<p>학습된 consistency model <span class="math notranslate nohighlight">\(f_{\theta}(\cdot,\cdot)\)</span> 와 initial distribution <span class="math notranslate nohighlight">\(\hat{x}_T \sim N(0, T^2I)\)</span> 를 활용하여 단일 sampling step 만으로 <span class="math notranslate nohighlight">\(\hat{x}_{\epsilon} = f_{\theta}(\hat{x}_{T}, T)\)</span> 를 생성할 수 있습니다. 더불어 하단 pseudo code 에 보이듯이, denoising 과 noise injection 작업을 여러 번 거치는 <em>multistep</em> consistency sampling 을 통해 computing cost 와 sample quality 를 trade-off 할 수 있는 유연성도 제공합니다.</p>
+<figure class="align-default" id="id10">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/consistency_models_09.png"><img alt="consistency_models_09" class="bg-primary mb-1" src="../../_images/consistency_models_09.png" style="width: 500px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 488 </span><span class="caption-text">Multistep Consistency Sampling</span><a class="headerlink" href="#id10" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p><strong>Zero-Shot Data Editing</strong></p>
+<p>Diffusion model 과 유사하게 추가적인 학습 없이 zero-shot 형태로 image editing 그리고 manipulation task 이 가능합니다. 예를 들어, consistency model 은 Gaussian noise 로부터 one-to-one mapping 을 통해 <span class="math notranslate nohighlight">\(x_{\epsilon}\)</span>을 생성하기 때문에 GAN, VAE, normalizing flow 와 유사하게 latent space 을 통해 sample 들간의 interpolation 이 가능합니다. 또한, multistep consistency sampling 을 통해 zero-shot 의 한계점을 보완하면서 image editing, inpainting, colorization, super-resolution, stroke-guided image editing (<a class="reference external" href="https://arxiv.org/abs/2108.01073">Meng et al., 2021</a>) 등의 다양한 task 를 수행할 수 있습니다. 대표적으로 몇 가지 zero-shot image editing 결과 예시들을 공유합니다.</p>
+<ul>
+<li><p>Inpainting</p>
+<figure class="align-default" id="id11">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/consistency_models_10.png"><img alt="consistency_models_10" class="bg-primary mb-1" src="../../_images/consistency_models_10.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 489 </span><span class="caption-text">Consistency Models - Inpainting</span><a class="headerlink" href="#id11" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</li>
+<li><p>Interpolation</p>
+<figure class="align-default" id="id12">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/consistency_models_11.png"><img alt="consistency_models_11" class="bg-primary mb-1" src="../../_images/consistency_models_11.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 490 </span><span class="caption-text">Consistency Models - Interpolation</span><a class="headerlink" href="#id12" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</li>
+<li><p>Self-consistency</p>
+<figure class="align-default" id="id13">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/consistency_models_12.png"><img alt="consistency_models_12" class="bg-primary mb-1" src="../../_images/consistency_models_12.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 491 </span><span class="caption-text">Consistency Models - Self-consistency</span><a class="headerlink" href="#id13" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</li>
+</ul>
+</section>
+<section id="training-consistency-models-via-distillation">
+<h2>4. Training Consistency Models via Distillation<a class="headerlink" href="#training-consistency-models-via-distillation" title="Permalink to this heading">#</a></h2>
+<p>앞서 소개한 두 가지 학습 방식 중, 첫번째로 사전학습된 score model <span class="math notranslate nohighlight">\(s_{\phi}(x,t)\)</span> 를 consistency model 에 knowledge distillation 하는 학습하는 방식을 소개합니다.</p>
+<p>논문에서 특히 continuous time interval <span class="math notranslate nohighlight">\([\epsilon, T]\)</span> 을 boundary <span class="math notranslate nohighlight">\(t_1 = \epsilon &lt; t_2 &lt; \cdots &lt; t_N=T\)</span> 를 기준으로 <span class="math notranslate nohighlight">\(N-1\)</span> 개의 interval 로 discretize 하는 상황을 가정하고, <a class="reference external" href="https://arxiv.org/abs/2206.00364">Karras et al., 2022</a> 의 설정과 동일하게 boundary 를 다음과 같이 정의합니다.</p>
+<div class="math notranslate nohighlight">
+\[
+t_i = (\epsilon^{1/\rho} + \frac{i-1}{N-1}(T^{1/\rho}-\epsilon^{1/\rho}))^{\rho}, \quad \rho = 7
+\]</div>
+<p>이때, <span class="math notranslate nohighlight">\(N\)</span> 이 충분히 크다면 다음과 같이 numerical ode solver 의 discretization step 을 한번 진행시켜 <span class="math notranslate nohighlight">\(x_{t_{n+1}}\)</span> 로부터 <span class="math notranslate nohighlight">\(x_{t_n}\)</span> 에 대한 정확한 예측값을 얻을 수 있다고 합니다.</p>
+<figure class="align-default" id="id14">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/consistency_models_13.png"><img alt="consistency_models_13" class="bg-primary mb-1" src="../../_images/consistency_models_13.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 492 </span><span class="caption-text">One Discretization Step of a Numerical ODE Solver</span><a class="headerlink" href="#id14" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>이때, <span class="math notranslate nohighlight">\(\Phi(\cdots; \phi)\)</span> 은 empirical PF ODE 에 적용되는 one step ODE solver 의 update function 입니다. Euler solver 같은 경우, <span class="math notranslate nohighlight">\(\Phi(x,t;\phi) = -ts_{\phi}(x,t)\)</span> 를 대입하면 다음과 같이 표현할 수 있게 됩니다.</p>
+<figure class="align-default" id="id15">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/consistency_models_14.png"><img alt="consistency_models_14" class="bg-primary mb-1" src="../../_images/consistency_models_14.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 493 </span><span class="caption-text">One Discretization Step of a Euler ODE Solver</span><a class="headerlink" href="#id15" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>위 수식을 활용하여 주어진 <span class="math notranslate nohighlight">\(x \sim p_{data}\)</span> 에 대해서 PF ODE trajectory 에 있는 인접한 두 지점 <span class="math notranslate nohighlight">\((\hat{x}^{\phi}_{t_n}, x_{t_{n+1}})\)</span> 을 샘플링할 수 있게 됩니다. 더 자세하게는, <span class="math notranslate nohighlight">\(x_{t_{n+1}} \sim N(x,t^2_{n+1}I)\)</span> 를 우선적으로 샘플링한 후, 위 수식을 기반으로 numerical ode solver 의 discretization step 을 한번 거쳐 <span class="math notranslate nohighlight">\(\hat{x}^{\phi}_{t_n}\)</span> 을 계산합니다. 최종적으로 <span class="math notranslate nohighlight">\((\hat{x}^{\phi}_{t_n}, x_{t_{n+1}})\)</span> 로부터의 모델 출력값 차이를 최소화하도록 consistency model 을 학습하게 되고, 학습 시 사용되는 <em>consistency distillation loss</em> 는 다음과 같이 정의합니다.</p>
+<figure class="align-default" id="id16">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/consistency_models_15.png"><img alt="consistency_models_15" class="bg-primary mb-1" src="../../_images/consistency_models_15.png" style="width: 500px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 494 </span><span class="caption-text">Consistency Distillation Loss</span><a class="headerlink" href="#id16" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>여기서 metric function <span class="math notranslate nohighlight">\(d(\cdot,\cdot)\)</span> 로는 L1 distance, L2 distance, 그리고 LPIPS distance 를 사용하였고, 실험적으로 확인해본 결과 <span class="math notranslate nohighlight">\(\lambda(t_n) \equiv 1\)</span> 를 적용했을 때 모델 성능이 가장 좋은 부분을 확인할 수 있었다고 합니다. 파라미터 <span class="math notranslate nohighlight">\(\theta\)</span> 는 stochastic gradient descent 그리고 <span class="math notranslate nohighlight">\(\theta^-\)</span> 는 exponential moving average (EMA) 로 학습하였다고 합니다.</p>
+<figure class="align-default" id="id17">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/consistency_models_16.png"><img alt="consistency_models_16" class="bg-primary mb-1" src="../../_images/consistency_models_16.png" style="width: 500px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 495 </span><span class="caption-text">Exponential Moving Average (EMA)</span><a class="headerlink" href="#id17" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>이처럼 EMA update 와 stopgrad operator 로 학습할 시, <span class="math notranslate nohighlight">\(\theta = \theta^{-}\)</span> 로 설정할때보다 모델을 더 안정적으로 학습할 수 있고 성능에도 개선이 있었다고 합니다. 전체적인 학습 절차는 하단의 Algorithm 2 처럼 정리할 수 있습니다.</p>
+<figure class="align-default" id="id18">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/consistency_models_17.png"><img alt="consistency_models_17" class="bg-primary mb-1" src="../../_images/consistency_models_17.png" style="width: 500px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 496 </span><span class="caption-text">Overview of Consistency Distillation (CD)</span><a class="headerlink" href="#id18" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p><span class="math notranslate nohighlight">\(\theta^{-}\)</span>  는 과거 <span class="math notranslate nohighlight">\(\theta\)</span> 에 대한 running average 이므로 Algorithm 2 가 수렴할 시 이 둘은 일치하게 됩니다. 즉, target network <span class="math notranslate nohighlight">\(f_{\theta^-}\)</span>와 online network <span class="math notranslate nohighlight">\(f_{\theta}\)</span>  가 일치하게 됩니다.</p>
+</section>
+<section id="training-consistency-models-in-isolation">
+<h2>5. Training Consistency Models in Isolation<a class="headerlink" href="#training-consistency-models-in-isolation" title="Permalink to this heading">#</a></h2>
+<p>Consistency Distillation 방식에서는 ground truth score function <span class="math notranslate nohighlight">\(\nabla \log p_t(x_t)\)</span> 을 근사하는 사전학습된 score model <span class="math notranslate nohighlight">\(s_{\phi}(x,t)\)</span> 에 의존했다면, 이에 의존하지 않고 다음과 같은  <span class="math notranslate nohighlight">\(\nabla \log p_t(x_t)\)</span> 에 대한 unbiased estimator 를 활용할 수도 있습니다. 다시 말해, <span class="math notranslate nohighlight">\(-(x_t - x)/t^2\)</span> 로 <span class="math notranslate nohighlight">\(\nabla \log p_t(x_t)\)</span> 를 근사할 수 있습니다.</p>
+<figure class="align-default" id="id19">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/consistency_models_18.png"><img alt="consistency_models_18" class="bg-primary mb-1" src="../../_images/consistency_models_18.png" style="width: 500px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 497 </span><span class="caption-text">Unbiased Estimator of Score Function</span><a class="headerlink" href="#id19" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p><span class="math notranslate nohighlight">\(x \sim p_{data}, x_t \sim N(x; t^2I)\)</span></p></li>
+</ul>
+<p>Consistency Training (CT) Loss 는 다음과 같이 정의하고, 이는 사전학습된 diffusion model 파라미터 <span class="math notranslate nohighlight">\(\phi\)</span>  와는 독립적인 사실을 확인할 수 있습다.</p>
+<div class="math notranslate nohighlight">
+\[
+L_{CT}^{N}(\theta,\theta^{-}) = \mathbb{E}[\lambda(t_n)d(f_{\theta}(x + t_{n+1}z, t_{n+1}), f_{\theta^-}(x+t_nz, t_n))]
+\]</div>
+<ul>
+<li><p><span class="math notranslate nohighlight">\(x \sim p_{data}, n \sim \mathbb{U}[[1,N-1]], x_{t_{n+1}} \sim N(x;t^2_{n+1}I), z \sim N(0,I)\)</span></p></li>
+<li><p>Consistency Training Loss 를 위와 같이 정의하게 된 배경은 다음과 같습니다.</p>
+<figure class="align-default" id="id20">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/consistency_models_19.png"><img alt="consistency_models_19" class="bg-primary mb-1" src="../../_images/consistency_models_19.png" style="width: 500px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 498 </span><span class="caption-text">Consistency Training Loss</span><a class="headerlink" href="#id20" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</li>
+</ul>
+<figure class="align-default" id="id21">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/consistency_models_20.png"><img alt="consistency_models_20" class="bg-primary mb-1" src="../../_images/consistency_models_20.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 499 </span><span class="caption-text">Schedule Functions for Consistency Training</span><a class="headerlink" href="#id21" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>논문에서 추가적인 성능 개선을 위해 <span class="math notranslate nohighlight">\(N\)</span> 을 점진적으로 증가시키는 방식을 제안합니다. <span class="math notranslate nohighlight">\(N\)</span>  이 작을수록 (<span class="math notranslate nohighlight">\(i.e., \Delta t\)</span> 가 클수록) consistency training loss 가 consistency distillation loss 와 비교했을때 variance 는 감소하지만 bias 는 증가하게 되어, 초기에 더 빠른 convergence 에 이르는데 용이합니다. 반면에, <span class="math notranslate nohighlight">\(N\)</span> 이 클수록 (<span class="math notranslate nohighlight">\(i.e., \Delta t\)</span> 가 작을수록) variance 는 증가하고 bias 는 감소하게 되어, 학습의 마지막 단계에 적용하는 것이 바람직하다고 제안합니다. EMA decay rate schedule function <span class="math notranslate nohighlight">\(\mu(\cdot)\)</span> 도 <span class="math notranslate nohighlight">\(N\)</span>  에 따라 바뀌는 함수로 최종적으로 정의합니다. 전체적인 학습 절차는 하단의 Algorithm 3 처럼 정리할 수 있습니다.</p>
+<figure class="align-default" id="id22">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/consistency_models_21.png"><img alt="consistency_models_21" class="bg-primary mb-1" src="../../_images/consistency_models_21.png" style="width: 500px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 500 </span><span class="caption-text">Overview of Consistency Training (CT)</span><a class="headerlink" href="#id22" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</section>
+<section id="experiments">
+<h2>6. Experiments<a class="headerlink" href="#experiments" title="Permalink to this heading">#</a></h2>
+<p>논문에서 CIFAR-10, ImageNet 64x64, LSUN Bedroom 256x256, 그리고 LSUN Cat 256x256 데이터셋에 consistency distillation, consistency training 두 학습 방식을 모두 실험하였고, 모델 성능 지표는 FID, IS, Precision, 그리고 Recall 을 사용하였습니다. 모델 architecture 는 CIFAR-10 데이터셋에는 NCSN++, 그리고 그 외 데이터셋에는 ADM 모델을 사용하였습니다.</p>
+<section id="training-consistency-models">
+<h3>6.1. Training Consistency Models<a class="headerlink" href="#training-consistency-models" title="Permalink to this heading">#</a></h3>
+<p>CIFAR-10 데이터셋에 다음과 같은 hyperparameter tuning 작업을 진행하였습니다. (metric function <span class="math notranslate nohighlight">\(d(\cdot,\cdot)\)</span>, ODE solver,  CD (Consistency Distillation) 에서의 number of discretization steps <span class="math notranslate nohighlight">\(N\)</span>, 그리고 CT (Consistency Training)) 에서의 schedule functions <span class="math notranslate nohighlight">\(N(\cdot), \mu(\cdot)\)</span>)</p>
+<p><strong>Parameter Initialization</strong></p>
+<p>모델 초기 파라미터 값은 다음과 같이 설정하였습니다.</p>
+<ul class="simple">
+<li><p>Consistency Distillation - 사전학습된 diffusion model 파라미터 값</p></li>
+<li><p>Consistency Training - Random Initialization</p></li>
+</ul>
+<p><strong>Results</strong></p>
+<p>모델 실험 결과를 다음과 같이 정리할 수 있습니다.</p>
+<figure class="align-default" id="id23">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/consistency_models_22.png"><img alt="consistency_models_22" class="bg-primary mb-1" src="../../_images/consistency_models_22.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 501 </span><span class="caption-text">Experimental Results - Hyperparameters</span><a class="headerlink" href="#id23" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul>
+<li><p>Consistency Distillation (CD)</p>
+<ul class="simple">
+<li><p>LPIPS 를 metric function 으로 사용했을때 모델 성능이 가장 좋았고, 이는 LPIPS metric 이 CIFAR-10 와 같은 natural image 들 간의 유사도를 측정하는데 특화되어 있기 때문이라고 합니다.</p></li>
+<li><p>Euler ODE solver 보다 Heun ODE solver 를 사용했을 때, 그리고 <span class="math notranslate nohighlight">\(N = 18\)</span> 로 설정했을때 모델 성능이 가장 좋았습니다. 또한, 동일한 <span class="math notranslate nohighlight">\(N\)</span> 에 대해서 Heun’s second ode solver 를 사용했을때 Euler’s first ode solver 를 사용했을 때보다 모델 성능이 우월한 부분을 확인할 수 있었다고 합니다.</p></li>
+</ul>
+<p>이외에도 다른 데이터셋에 hyperparameter tuning 작업을 별도로 진행하였습니다.</p>
+</li>
+<li><p>Consistency Training (CT)</p>
+<ul class="simple">
+<li><p>CD 와 동일하게 LPIPS metric function 사용하였고, ODE solver 는 사용하지 않았습니다.</p></li>
+<li><p><span class="math notranslate nohighlight">\(N\)</span> 이 작을수록, 모델이 더 빨리 수렴하지만 생성된 이미지 퀄리티는 좋지 않은 부분을 재차 확인할 수 있습니다. (and vice versa)</p></li>
+<li><p><span class="math notranslate nohighlight">\(N\)</span> 을 점차적으로 증가시키면서 <span class="math notranslate nohighlight">\(\mu\)</span> 도 변화시켰을때 성능이 가장 좋았습니다.</p></li>
+</ul>
+</li>
+</ul>
+<section id="few-step-image-generation">
+<h4>6.2. Few-Step Image Generation<a class="headerlink" href="#few-step-image-generation" title="Permalink to this heading">#</a></h4>
+<p><strong>Distillation</strong></p>
+<p>논문에서는 Consistency Distillation 모델의 성능을 synthetic data generation 을 필요하지 않는 knowledge distillation 기법 (PD, <a class="reference external" href="https://arxiv.org/abs/2202.00512">Salimans &amp; Ho (2022)</a>) 과 다음과 같이 비교합니다.</p>
+<figure class="align-default" id="id24">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/consistency_models_23.png"><img alt="consistency_models_23" class="bg-primary mb-1" src="../../_images/consistency_models_23.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 502 </span><span class="caption-text">Experimental Results - Distillation</span><a class="headerlink" href="#id24" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p><strong>Direct Generation</strong></p>
+<p>CIFAR-10 데이터셋 기준으로 VAE, normalizing flow 를 비롯한 타 single-step generative model 보다 CT 가 성능이 가장 좋았습니다. 또한, distillation 기법 없이도 Progressive Distillation (PD, <a class="reference external" href="https://arxiv.org/abs/2202.00512">Salimans &amp; Ho (2022)</a>) 와 견줄만한 성능을 가진 부분을 확인할 수 있습니다. 마지막으로 동일한 noise 로부터 높은 structural similarity 를 가진 이미지들을 생성함으로써 self-consistency 성질도 확인할 수 있었다고 합니다.</p>
+<figure class="align-default" id="id25">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/consistency_models_24.png"><img alt="consistency_models_24" class="bg-primary mb-1" src="../../_images/consistency_models_24.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 503 </span><span class="caption-text">Experimental Results - Direct Generation</span><a class="headerlink" href="#id25" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</section>
+</section>
+<section id="zero-shot-image-editing">
+<h3>6.3. Zero-Shot Image Editing<a class="headerlink" href="#zero-shot-image-editing" title="Permalink to this heading">#</a></h3>
+<p>Diffusion model 과 유사하게 consistency model 도 multistep sampling 알고리즘을 수정함으로써 zero shot image editing 이 가능합니다. 해당 사진은 LSUN Bedroom 데이터셋에 colorization, super-resolution, stroke-guided image editing task 를 적용한 결과입니다.</p>
+<figure class="align-default" id="id26">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/consistency_models_25.png"><img alt="consistency_models_25" class="bg-primary mb-1" src="../../_images/consistency_models_25.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 504 </span><span class="caption-text">Pseudocode for Zero-Shot Image Editing</span><a class="headerlink" href="#id26" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<figure class="align-default" id="id27">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/consistency_models_26.png"><img alt="consistency_models_26" class="bg-primary mb-1" src="../../_images/consistency_models_26.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 505 </span><span class="caption-text">Zero-Shot Image Editing Results</span><a class="headerlink" href="#id27" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</section>
+</section>
+</section>
+
+    <script type="text/x-thebe-config">
+    {
+        requestKernel: true,
+        binderOptions: {
+            repo: "binder-examples/jupyter-stacks-datascience",
+            ref: "master",
+        },
+        codeMirrorConfig: {
+            theme: "abcdef",
+            mode: "python"
+        },
+        kernelOptions: {
+            name: "python3",
+            path: "./docs\review"
+        },
+        predefinedOutput: true
+    }
+    </script>
+    <script>kernelName = 'python3'</script>
+
+                </article>
+              
+
+              
+              
+              
+              
+                <footer class="prev-next-footer d-print-none">
+                  
+<div class="prev-next-area">
+    <a class="left-prev"
+       href="GIGAGAN.html"
+       title="previous page">
+      <i class="fa-solid fa-angle-left"></i>
+      <div class="prev-next-info">
+        <p class="prev-next-subtitle">previous</p>
+        <p class="prev-next-title">Scaling up GANs for Text-to-Image Synthesis</p>
+      </div>
+    </a>
+    <a class="right-next"
+       href="latent_consistency_models.html"
+       title="next page">
+      <div class="prev-next-info">
+        <p class="prev-next-subtitle">next</p>
+        <p class="prev-next-title">Latent Consistency Models</p>
+      </div>
+      <i class="fa-solid fa-angle-right"></i>
+    </a>
+</div>
+                </footer>
+              
+            </div>
+            
+            
+              
+                <div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
+
+
+  <div class="sidebar-secondary-item">
+  <div class="page-toc tocsection onthispage">
+    <i class="fa-solid fa-list"></i> Contents
+  </div>
+  <nav class="bd-toc-nav page-toc">
+    <ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">1. Introduction</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#diffusion-models">2. Diffusion Models</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#id1">3.  Consistency Models</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#training-consistency-models-via-distillation">4. Training Consistency Models via Distillation</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#training-consistency-models-in-isolation">5. Training Consistency Models in Isolation</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#experiments">6. Experiments</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#training-consistency-models">6.1. Training Consistency Models</a><ul class="nav section-nav flex-column">
+<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#few-step-image-generation">6.2. Few-Step Image Generation</a></li>
+</ul>
+</li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#zero-shot-image-editing">6.3. Zero-Shot Image Editing</a></li>
+</ul>
+</li>
+</ul>
+  </nav></div>
+
+</div></div>
+              
+            
+          </div>
+          <footer class="bd-footer-content">
+            
+<div class="bd-footer-content__inner container">
+  
+  <div class="footer-item">
+    
+<p class="component-author">
+By PseudoLab
+</p>
+
+  </div>
+  
+  <div class="footer-item">
+    
+
+  <p class="copyright">
+    
+      © Copyright 2022.
+      <br/>
+    
+  </p>
+
+  </div>
+  
+  <div class="footer-item">
+    
+  </div>
+  
+  <div class="footer-item">
+    
+  </div>
+  
+</div>
+          </footer>
+        
+
+      </main>
+    </div>
+  </div>
+  
+  <!-- Scripts loaded after <body> so the DOM is not blocked -->
+  <script src="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b"></script>
+<script src="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b"></script>
+
+  <footer class="bd-footer">
+  </footer>
+  </body>
 </html>
\ No newline at end of file
diff --git a/docs/review/cycleGAN.html b/docs/review/cycleGAN.html
old mode 100644
new mode 100755
index 19419c90..4e83b711
--- a/docs/review/cycleGAN.html
+++ b/docs/review/cycleGAN.html
@@ -1,1018 +1,1038 @@
-
-
-<!DOCTYPE html>
-
-
-<html lang="en" data-content_root="" >
-
-  <head>
-    <meta charset="utf-8" />
-    <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.18.1: http://docutils.sourceforge.net/" />
-
-    <title>CycleGAN &#8212; Text-to-Image Generation-feat-Diffusion</title>
-  
-  
-  
-  <script data-cfasync="false">
-    document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
-    document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
-  </script>
-  
-  <!-- Loaded before other Sphinx assets -->
-  <link href="../../_static/styles/theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-<link href="../../_static/styles/bootstrap.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-<link href="../../_static/styles/pydata-sphinx-theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-
-  
-  <link href="../../_static/vendor/fontawesome/6.5.2/css/all.min.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-  <link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.woff2" />
-<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.woff2" />
-<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.woff2" />
-
-    <link rel="stylesheet" type="text/css" href="../../_static/pygments.css" />
-    <link rel="stylesheet" href="../../_static/styles/sphinx-book-theme.css?digest=14f4ca6b54d191a8c7657f6c759bf11a5fb86285" type="text/css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/togglebutton.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/mystnb.4510f1fc1dee50b3e5859aac5469c37c29e427902b24a333a5f9fcb2f0b3ac41.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/sphinx-thebe.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/design-style.4045f2051d55cab465a707391d5b2007.min.css" />
-  
-  <!-- Pre-loaded scripts that we'll load fully later -->
-  <link rel="preload" as="script" href="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b" />
-<link rel="preload" as="script" href="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b" />
-  <script src="../../_static/vendor/fontawesome/6.5.2/js/all.min.js?digest=dfe6caa3a7d634c4db9b"></script>
-
-    <script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
-    <script src="../../_static/jquery.js"></script>
-    <script src="../../_static/underscore.js"></script>
-    <script src="../../_static/_sphinx_javascript_frameworks_compat.js"></script>
-    <script src="../../_static/doctools.js"></script>
-    <script src="../../_static/clipboard.min.js"></script>
-    <script src="../../_static/copybutton.js"></script>
-    <script src="../../_static/scripts/sphinx-book-theme.js?digest=5a5c038af52cf7bc1a1ec88eea08e6366ee68824"></script>
-    <script>let toggleHintShow = 'Click to show';</script>
-    <script>let toggleHintHide = 'Click to hide';</script>
-    <script>let toggleOpenOnPrint = 'true';</script>
-    <script src="../../_static/togglebutton.js"></script>
-    <script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
-    <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
-    <script src="../../_static/design-tabs.js"></script>
-    <script>const THEBE_JS_URL = "https://unpkg.com/thebe@0.8.2/lib/index.js"
-const thebe_selector = ".thebe,.cell"
-const thebe_selector_input = "pre"
-const thebe_selector_output = ".output, .cell_output"
-</script>
-    <script async="async" src="../../_static/sphinx-thebe.js"></script>
-    <script>window.MathJax = {"options": {"processHtmlClass": "tex2jax_process|mathjax_process|math|output_area"}}</script>
-    <script defer="defer" src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
-    <script>DOCUMENTATION_OPTIONS.pagename = 'docs/review/cycleGAN';</script>
-    <link rel="shortcut icon" href="../../_static/PseudoLab_logo.png"/>
-    <link rel="index" title="Index" href="../../genindex.html" />
-    <link rel="search" title="Search" href="../../search.html" />
-    <link rel="next" title="StyleGAN" href="StyleGAN.html" />
-    <link rel="prev" title="A Study on the Evaluation of Generative Models" href="A_Study_on_the_Evaluation_of_Generative_Models.html" />
-  <meta name="viewport" content="width=device-width, initial-scale=1"/>
-  <meta name="docsearch:language" content="en"/>
-  </head>
-  
-  
-  <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
-
-  
-  
-  <div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
-  
-  <div id="pst-scroll-pixel-helper"></div>
-  
-  <button type="button" class="btn rounded-pill" id="pst-back-to-top">
-    <i class="fa-solid fa-arrow-up"></i>Back to top</button>
-
-  
-  <input type="checkbox"
-          class="sidebar-toggle"
-          id="pst-primary-sidebar-checkbox"/>
-  <label class="overlay overlay-primary" for="pst-primary-sidebar-checkbox"></label>
-  
-  <input type="checkbox"
-          class="sidebar-toggle"
-          id="pst-secondary-sidebar-checkbox"/>
-  <label class="overlay overlay-secondary" for="pst-secondary-sidebar-checkbox"></label>
-  
-  <div class="search-button__wrapper">
-    <div class="search-button__overlay"></div>
-    <div class="search-button__search-container">
-<form class="bd-search d-flex align-items-center"
-      action="../../search.html"
-      method="get">
-  <i class="fa-solid fa-magnifying-glass"></i>
-  <input type="search"
-         class="form-control"
-         name="q"
-         id="search-input"
-         placeholder="Search this book..."
-         aria-label="Search this book..."
-         autocomplete="off"
-         autocorrect="off"
-         autocapitalize="off"
-         spellcheck="false"/>
-  <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
-</form></div>
-  </div>
-
-  <div class="pst-async-banner-revealer d-none">
-  <aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
-</div>
-
-  
-    <header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
-    </header>
-  
-
-  <div class="bd-container">
-    <div class="bd-container__inner bd-page-width">
-      
-      
-      
-      <div class="bd-sidebar-primary bd-sidebar">
-        
-
-  
-  <div class="sidebar-header-items sidebar-primary__section">
-    
-    
-    
-    
-  </div>
-  
-    <div class="sidebar-primary-items__start sidebar-primary__section">
-        <div class="sidebar-primary-item">
-
-  
-    
-  
-
-<a class="navbar-brand logo" href="../../intro.html">
-  
-  
-  
-  
-  
-    
-    
-      
-    
-    
-    <img src="../../_static/PseudoLab_logo.png" class="logo__image only-light" alt="Text-to-Image Generation-feat-Diffusion - Home"/>
-    <script>document.write(`<img src="../../_static/PseudoLab_logo.png" class="logo__image only-dark" alt="Text-to-Image Generation-feat-Diffusion - Home"/>`);</script>
-  
-  
-</a></div>
-        <div class="sidebar-primary-item"><nav class="bd-links" id="bd-docs-nav" aria-label="Main">
-    <div class="bd-toc-item navbar-nav active">
-        
-        <ul class="nav bd-sidenav bd-sidenav__home-link">
-            <li class="toctree-l1">
-                <a class="reference internal" href="../../intro.html">
-                    Welcome to PseudoDiffusers!!
-                </a>
-            </li>
-        </ul>
-        <p aria-level="2" class="caption" role="heading"><span class="caption-text">Preliminary Works</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="vae.html">VAE</a></li>
-<li class="toctree-l1"><a class="reference internal" href="gan.html">GAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DDPM.html">DDPM</a></li>
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="DDIM.html">DDIM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="A_Study_on_the_Evaluation_of_Generative_Models.html">A Study on the Evaluation of Generative Models</a></li>
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Image Generation</span></p>
-<ul class="current nav bd-sidenav">
-<li class="toctree-l1 current active"><a class="current reference internal" href="#">CycleGAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="StyleGAN.html">StyleGAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="diffusion_beats_GANs.html">Diffusion Models Beat GANs on Image Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="dalle.html">DALL-E</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DALLE2.html">DALL-E 2</a></li>
-<li class="toctree-l1"><a class="reference internal" href="dreambooth.html">DreamBooth</a></li>
-<li class="toctree-l1"><a class="reference internal" href="ControlNet.html">ControlNet</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Latent_Diffusion_Model.html">Introduction</a></li>
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="Textual_Inversion.html">Textual Inversion</a></li>
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="CustomDiffusion.html">Custom Diffusion</a></li>
-<li class="toctree-l1"><a class="reference internal" href="LoRA.html">LoRA</a></li>
-
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="I-DDPM.html">I-DDPM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="StyO.html">StyO</a></li>
-<li class="toctree-l1"><a class="reference internal" href="imagen.html">Imagen</a></li>
-<li class="toctree-l1"><a class="reference internal" href="imagen_editor.html">Imagen Editor</a></li>
-<li class="toctree-l1"><a class="reference internal" href="SDEdit.html">SDEdit</a></li>
-<li class="toctree-l1"><a class="reference internal" href="SDXL.html">SDXL</a></li>
-<li class="toctree-l1"><a class="reference internal" href="t2i_adapter.html">T2I-Adapter</a></li>
-<li class="toctree-l1"><a class="reference internal" href="IP_Adapter.html">IP-Adapter</a></li>
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="HyperDreamBooth.html">HyperDreamBooth</a></li>
-<li class="toctree-l1"><a class="reference internal" href="CM3leon.html">CM3leon</a></li>
-
-<li class="toctree-l1"><a class="reference internal" href="Synthetic_Data_from_Diffusion_Models_Improves_ImageNet_Classification.html">Synthetic Data from Diffusion Models Improves ImageNet Classification</a></li>
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="GLIDE.html">GLIDE</a></li>
-<li class="toctree-l1"><a class="reference internal" href="BBDM.html">BBDM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Your_Diffusion_Model_is_Secretly_a_Zero_Shot_Classifier.html">Your Diffusion Model is Secretly a Zero-Shot Classifier</a></li>
-<li class="toctree-l1"><a class="reference internal" href="progressive_distillation.html">Progressive Distillation for Fast Sampling of Diffusion Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="ConceptLab.html">ConceptLab</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Diffusion_models_already_have_a_Semantic_Latent_Space.html">Diffusion Models already have a Semantic Latent Space</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Muse.html">Muse</a></li>
-
-
-<li class="toctree-l1"><a class="reference internal" href="GIGAGAN.html">Scaling up GANs for Text-to-Image Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="consistency_models.html">Consistency Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="latent_consistency_models.html">Latent Consistency Models</a></li>
-
-<li class="toctree-l1"><a class="reference internal" href="LLM_grounded_Diffusion.html">LLM Grounded Diffusion</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DiT.html">DiT</a></li>
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="one-step-image-translation.html">One-Step Image Translation with Text-to-Image Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="LCM-LoRA.html">LCM-LoRA: A Universal Stable-Diffusion Acceleration Module</a></li>
-
-
-<li class="toctree-l1"><a class="reference internal" href="MimicBrush.html">MimicBrush: Zero-shot Image Editing with Reference Imitation</a></li>
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Video Generation</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="Make_A_Video.html">Make A Video</a></li>
-<li class="toctree-l1"><a class="reference internal" href="VideoLDM.html">VideoLDM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="AnimateDiff.html">AnimateDiff</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Animate_Anyone.html">Animate Anyone</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreaMoving.html">DreaMoving</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreamPose.html">DreamPose: Fashion Image-to-Video Synthesis via Stable Diffusion</a></li>
-
-
-
-
-
-
-
-
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">3D Generation</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="NeRF.html">NeRF : Representing Scenes as Neural Radiance Fields for View Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="3DGS.html">3D Gaussian Splatting for Real-Time Radiance Field Rendering</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Point_E.html">Point-E: A System for Generating 3D Point Clouds from Complex Prompts (Arxiv 2022)</a></li>
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="Shap-E.html">Shap-E</a></li>
-
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="DreamFusion.html"><strong>DreamFusion</strong></a></li>
-<li class="toctree-l1"><a class="reference internal" href="magic-3d.html">Magic3D</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreamBooth3D.html">Dream Booth 3D</a></li>
-
-
-
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Experiments</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="../experiments/js_exp.html">Synthetic Data with Stable Diffusion for Foliar Disease Classification</a></li>
-<li class="toctree-l1"><a class="reference internal" href="../experiments/swjo_exp.html">Training DreamBooth on Naver Webtoon Face Dataset</a></li>
-</ul>
-
-    </div>
-</nav></div>
-    </div>
-  
-  
-  <div class="sidebar-primary-items__end sidebar-primary__section">
-  </div>
-  
-  <div id="rtd-footer-container"></div>
-
-
-      </div>
-      
-      <main id="main-content" class="bd-main" role="main">
-        
-        
-
-<div class="sbt-scroll-pixel-helper"></div>
-
-          <div class="bd-content">
-            <div class="bd-article-container">
-              
-              <div class="bd-header-article d-print-none">
-<div class="header-article-items header-article__inner">
-  
-    <div class="header-article-items__start">
-      
-        <div class="header-article-item"><label class="sidebar-toggle primary-toggle btn btn-sm" for="__primary" title="Toggle primary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
-  <span class="fa-solid fa-bars"></span>
-</label></div>
-      
-    </div>
-  
-  
-    <div class="header-article-items__end">
-      
-        <div class="header-article-item">
-
-<div class="article-header-buttons">
-
-
-
-
-
-<div class="dropdown dropdown-source-buttons">
-  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Source repositories">
-    <i class="fab fa-github"></i>
-  </button>
-  <ul class="dropdown-menu">
-      
-      
-      
-      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion" target="_blank"
-   class="btn btn-sm btn-source-repository-button dropdown-item"
-   title="Source repository"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fab fa-github"></i>
-  </span>
-<span class="btn__text-container">Repository</span>
-</a>
-</li>
-      
-      
-      
-      
-      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion/issues/new?title=Issue%20on%20page%20%2Fdocs/review/cycleGAN.html&body=Your%20issue%20content%20here." target="_blank"
-   class="btn btn-sm btn-source-issues-button dropdown-item"
-   title="Open an issue"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-lightbulb"></i>
-  </span>
-<span class="btn__text-container">Open issue</span>
-</a>
-</li>
-      
-  </ul>
-</div>
-
-
-
-
-
-
-<div class="dropdown dropdown-download-buttons">
-  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Download this page">
-    <i class="fas fa-download"></i>
-  </button>
-  <ul class="dropdown-menu">
-      
-      
-      
-      <li><a href="../../_sources/docs/review/cycleGAN.md" target="_blank"
-   class="btn btn-sm btn-download-source-button dropdown-item"
-   title="Download source file"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-file"></i>
-  </span>
-<span class="btn__text-container">.md</span>
-</a>
-</li>
-      
-      
-      
-      
-      <li>
-<button onclick="window.print()"
-  class="btn btn-sm btn-download-pdf-button dropdown-item"
-  title="Print to PDF"
-  data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-file-pdf"></i>
-  </span>
-<span class="btn__text-container">.pdf</span>
-</button>
-</li>
-      
-  </ul>
-</div>
-
-
-
-
-<button onclick="toggleFullScreen()"
-  class="btn btn-sm btn-fullscreen-button"
-  title="Fullscreen mode"
-  data-bs-placement="bottom" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-expand"></i>
-  </span>
-
-</button>
-
-
-
-<script>
-document.write(`
-  <button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light"></i>
-    <i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark"></i>
-    <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"></i>
-  </button>
-`);
-</script>
-
-
-<script>
-document.write(`
-  <button class="btn btn-sm pst-navbar-icon search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <i class="fa-solid fa-magnifying-glass fa-lg"></i>
-  </button>
-`);
-</script>
-<label class="sidebar-toggle secondary-toggle btn btn-sm" for="__secondary"title="Toggle secondary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <span class="fa-solid fa-list"></span>
-</label>
-</div></div>
-      
-    </div>
-  
-</div>
-</div>
-              
-              
-
-<div id="jb-print-docs-body" class="onlyprint">
-    <h1>CycleGAN</h1>
-    <!-- Table of contents -->
-    <div id="print-main-content">
-        <div id="jb-print-toc">
-            
-            <div>
-                <h2> Contents </h2>
-            </div>
-            <nav aria-label="Page">
-                <ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#abstract">Abstract</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#related-work">Related work</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#background">Background</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#image-to-image-translation">Image-to-Image Translation</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#mode-collapse">Mode Collapse</a></li>
-</ul>
-</li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#method">Method</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#overview">Overview</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#adversarial-loss">Adversarial Loss</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#cycle-consistency-loss">Cycle Consistency Loss</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#full-objective">Full Objective</a></li>
-</ul>
-</li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#implementation">Implementation</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#network-architecture">Network Architecture</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#training-details">Training details</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#least-square-loss">(참고) least-square loss 추가 설명</a></li>
-</ul>
-</li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#evaluation">Evaluation</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#comparison-against-baselines">Comparison against baselines</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#ablation-study-analysis-of-the-loss-function">Ablation Study - Analysis of the loss function</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#image-reconstruction-quality">Image reconstruction quality</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#additional-results-on-paired-datasets">Additional results on paired datasets</a></li>
-</ul>
-</li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#limitations-and-discusssion">Limitations and Discusssion</a></li>
-</ul>
-            </nav>
-        </div>
-    </div>
-</div>
-
-              
-                
-<div id="searchbox"></div>
-                <article class="bd-article">
-                  
-  <div class="admonition-information admonition">
-<p class="admonition-title">Information</p>
-<ul class="simple">
-<li><p><strong>Title:</strong> Unpaired Image-to-Image Translation using Cycle-Consistent Adversarial Networks (ICCV 2017)</p></li>
-<li><p><strong>Reference</strong></p>
-<ul>
-<li><p>Paper: <a class="reference external" href="https://arxiv.org/abs/1703.10593">https://arxiv.org/abs/1703.10593</a></p></li>
-<li><p>Code: <a class="reference external" href="https://www.tensorflow.org/tutorials/generative/cyclegan?hl=ko">TensorFlow CycleGAN tutorial</a></p></li>
-<li><p><a class="reference external" href="https://velog.io/&#64;sjinu/CycleGAN">[논문리뷰] Cycle GAN: Unpaired Image-to-Image Translation using Cycle-Consistent Adversarial Networks</a>
-<a class="reference external" href="https://comlini8-8.tistory.com/9">CycleGAN을 만든 사람이 한국인이라고? CycleGAN 논문 뜯어보기</a></p></li>
-</ul>
-</li>
-<li><p><strong>Author:</strong> KwangSu Mun</p></li>
-<li><p><strong>Author:</strong> ChangHwan Lee</p></li>
-<li><p><strong>Last updated on May. 16, 2024</strong></p></li>
-</ul>
-</div>
-<section class="tex2jax_ignore mathjax_ignore" id="cyclegan">
-<h1>CycleGAN<a class="headerlink" href="#cyclegan" title="Permalink to this heading">#</a></h1>
-<section id="abstract">
-<h2>Abstract<a class="headerlink" href="#abstract" title="Permalink to this heading">#</a></h2>
-<ul class="simple">
-<li><p>Image-to-image translation 은 한 이미지 도메인을 다른 이미지 도메인으로 변환시키는 computer vision 의 한 task 입니다.</p></li>
-<li><p>Image-to-image translation 은 보통 input과 output이 짝이 지어진 상태에서 학습하지만 짝이 지어진 학습 데이터를 얻는 것이 어렵습니다. 따라서 CycleGAN 논문에서는 짝지어진 예시 없이 <span class="math notranslate nohighlight">\(X\)</span> 라는 domain 으로부터 얻은 이미지를 target domain <span class="math notranslate nohighlight">\(Y\)</span> 로 바꾸는 방법을 제안합니다. 이 연구는 Adversarial loss 를 활용해, <span class="math notranslate nohighlight">\(G(x)\)</span> 로부터 생성된 이미지 데이터의 분포와 <span class="math notranslate nohighlight">\(Y\)</span> 로부터의 이미지 데이터의 분포가 구분이 불가능하도록 함수 <span class="math notranslate nohighlight">\(G: X -&gt; Y\)</span> 를 학습시키는 것을 목표로 합니다. 더불어, <span class="math notranslate nohighlight">\(X -&gt; Y\)</span> 로의 mapping 에 제약을 가해서 원하는 이미지를 강제하기 위해 <span class="math notranslate nohighlight">\(F: Y -&gt; X\)</span> 와 같은 역방향 매핑을 함께 진행합니다. 즉, <span class="math notranslate nohighlight">\(F(G(x))\)</span> 가 <span class="math notranslate nohighlight">\(X\)</span> 와 유사해지도록 강제하는 cycle consistency loss 를 도입했습니다.</p></li>
-<li><p>결과적으로 collection style transfer, object transfiguration, season transfer, photo enhancement 등의 task 에서 이미지 pair 가 존재하지 않는 상태에서 우수한 결과를 보여줬다고 합니다.</p></li>
-</ul>
-</section>
-<section id="related-work">
-<h2>Related work<a class="headerlink" href="#related-work" title="Permalink to this heading">#</a></h2>
-<ul class="simple">
-<li><p>GAN : adversarial loss 를 사용하여 이미지를 생성하는 방법입니다.</p></li>
-<li><p>Image-to-Image Translation : 특정 image 를 input 로 넣으면 그에 맞는 image 가 output 로 나오는 방법입니다. pix2pix 같은 방법이 있으며 상세한 설명은 아래의 Background section 을 참조하면 됩니다.</p></li>
-<li><p>Unpaired Image-to-Image Translation : 위의 Image-to-Image Translation 에서 pair 가 아닌 데이터로 학습해서 Image-to-Image Translation 과 같은 input, output 결과가 나오도록 만드는 방법입니다.</p></li>
-<li><p>Cycle Consistency : 한 이미지를 다른 도메인으로 변환하고 다시 원래 도메인으로 변환할 때 처음의 원본으로 되도록 하여 일종의 순환(사이클)을 만드는 방법으로 학습 프로세스가 더 안정적이게 되고, 이미지 간의 일관성을 보다 잘 유지할 수 있도록 만듭니다. (ex) <span class="math notranslate nohighlight">\(X\)</span> 를 모델 <span class="math notranslate nohighlight">\(A\)</span> 에 거쳐 <span class="math notranslate nohighlight">\(Y\)</span> 로 만든 뒤 다시 모델 <span class="math notranslate nohighlight">\(B\)</span> 를 거쳐 <span class="math notranslate nohighlight">\(X\)</span> 로 복구)</p></li>
-<li><p>Neural Style Transfer : pre-trained 된 deep features 의 Gram matrix statistics 일치를 기반으로 이미지 content 를 다른 image 의 스타일과 결합하여 새로운 이미지를 합성하는 방법입니다.</p></li>
-</ul>
-</section>
-<section id="background">
-<h2>Background<a class="headerlink" href="#background" title="Permalink to this heading">#</a></h2>
-<section id="image-to-image-translation">
-<h3>Image-to-Image Translation<a class="headerlink" href="#image-to-image-translation" title="Permalink to this heading">#</a></h3>
-<figure class="align-default" id="id1">
-<a class="bg-primary mb-1 reference internal image-reference" href="https://phillipi.github.io/pix2pix/images/teaser_v3.png"><img alt="https://phillipi.github.io/pix2pix/images/teaser_v3.png" class="bg-primary mb-1" src="https://phillipi.github.io/pix2pix/images/teaser_v3.png" style="width: 800px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 45 </span><span class="caption-text">image-to-image translation</span><a class="headerlink" href="#id1" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>Image-to-image translation 은 input image 를 다른 스타일, 속성, 구조 등을 가진 output image 로 변환하는 task 입니다. 예를 들어 사진을 그림으로 변환한다거나, 낮에 찍은 사진을 밤에 찍은 것 처럼 변환하는 것을 말합니다. 흔히 translation 은 input 과 output 로 짝이 지어진 데이터를 바탕으로 학습이 이루어져 있었는데요. 짝이 지어진 사진 데이터를 얻는 것은 어렵고 값이 비싼 일이 됩니다.</p>
-<figure class="align-default" id="id2">
-<a class="bg-primary mb-1 reference internal image-reference" href="https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FbhMGUZ%2Fbtr7HimHXN5%2FHvjTh02iCzP5Sgk8UYkKO0%2Fimg.png"><img alt="https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FbhMGUZ%2Fbtr7HimHXN5%2FHvjTh02iCzP5Sgk8UYkKO0%2Fimg.png" class="bg-primary mb-1" src="https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FbhMGUZ%2Fbtr7HimHXN5%2FHvjTh02iCzP5Sgk8UYkKO0%2Fimg.png" style="width: 800px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 46 </span><span class="caption-text">paired and unpaired data</span><a class="headerlink" href="#id2" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>이 논문에서는 input image와 output image가 일대일로 짝지어지지 않은 상태에서 하나의 image 모음의 특성을 캡쳐하고, 이러한 특성을 다른 image 모음으로 변환할 수 있는 방법을 제시합니다.
-GAN은 domain <span class="math notranslate nohighlight">\(X\)</span> 에 이미지 한 세트, domain <span class="math notranslate nohighlight">\(Y\)</span> 에 이미지 한 세트가 제공되고, model 의 output 과 <span class="math notranslate nohighlight">\(Y\)</span> 가 discriminator 에 의해 구별할 수 없도록 모델 <span class="math notranslate nohighlight">\(G: X -&gt; Y\)</span> 를 학습합니다. 하지만, 이것이 개별 입력 <span class="math notranslate nohighlight">\(x\)</span> 와 출력 <span class="math notranslate nohighlight">\(y\)</span> 가 무조건 유의미하게 쌍을 이룬다는 것을 뜻하지는 않습니다. <span class="math notranslate nohighlight">\(G\)</span> 가 생성할 수 있는 image 에는 무한한 경우의 수가 있기 때문에 종종 mode collapse 현상이 일어나기도 합니다.</p>
-</section>
-<section id="mode-collapse">
-<h3>Mode Collapse<a class="headerlink" href="#mode-collapse" title="Permalink to this heading">#</a></h3>
-<figure class="align-default" id="id3">
-<a class="bg-primary mb-1 reference internal image-reference" href="https://1.bp.blogspot.com/-oDCR5UnEIl4/WZkIId-rYCI/AAAAAAAAAJk/PoLvou4JLNIxn5U-OmPFZ_heyxVQGbMNQCEwYBhgL/s1600/14.png"><img alt="https://1.bp.blogspot.com/-oDCR5UnEIl4/WZkIId-rYCI/AAAAAAAAAJk/PoLvou4JLNIxn5U-OmPFZ_heyxVQGbMNQCEwYBhgL/s1600/14.png" class="bg-primary mb-1" src="https://1.bp.blogspot.com/-oDCR5UnEIl4/WZkIId-rYCI/AAAAAAAAAJk/PoLvou4JLNIxn5U-OmPFZ_heyxVQGbMNQCEwYBhgL/s1600/14.png" style="width: 800px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 47 </span><span class="caption-text">mode collapsing 출처: <a class="reference external" href="http://dl-ai.blogspot.com/2017/08/gan-problems.html">http://dl-ai.blogspot.com/2017/08/gan-problems.html</a></span><a class="headerlink" href="#id3" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>어떤 input image 든 모두 같은 output image 로 매핑하면서 최적화에 실패하는 현상입니다. 이 현상은 generator 입장에서, discriminator 가 이 사진이 진짜 <span class="math notranslate nohighlight">\(Y\)</span>인지 가짜인 <span class="math notranslate nohighlight">\(\hat{Y}\)</span>인지 구별하는 것을 ‘<strong>속이기만</strong>’ 하면 되기 때문에 우리의 목적과 전혀 상관이 없는 데이터를 generator 가 만들더라도 문제가 생기지 않아서 발생합니다.</p>
-<ul class="simple">
-<li><p>참고: <a class="reference external" href="http://dl-ai.blogspot.com/2017/08/gan-problems.html">http://dl-ai.blogspot.com/2017/08/gan-problems.html</a></p></li>
-</ul>
-<p>이러한 이슈로 인해 추가 objective function 이 필요해졌습니다. 따라서 translation task 는 영어 -&gt; 프랑스어 -&gt; 영어로 번역했을 때 원래 문장에 다시 도달하는 것처럼, <span class="math notranslate nohighlight">\(X --&gt; Y --&gt; X'\)</span> 로 돌아가는 과정에서 <span class="math notranslate nohighlight">\(X\)</span> 와 <span class="math notranslate nohighlight">\(X'\)</span> 이 최대한 같아야 한다는 의미의 cycle consistency 이라는 속성을 이용합니다. 필요한 목적식을 간단하게 정리하면 다음과 같습니다.</p>
-<ul class="simple">
-<li><p>정방향, 역방향 adversarial loss: <span class="math notranslate nohighlight">\(X -&gt; Y &amp; Y -&gt; X\)</span></p></li>
-<li><p>Cycle consistency loss: <span class="math notranslate nohighlight">\(X \)</span>\approx<span class="math notranslate nohighlight">\( F(G(x))\)</span></p></li>
-</ul>
-</section>
-</section>
-<section id="method">
-<h2>Method<a class="headerlink" href="#method" title="Permalink to this heading">#</a></h2>
-<!---->
-<!--Overview 에서 전체적인 구성과 학습과정을 설명하며, 아래 "Adversarial Loss", "Cycle consistency Loss"는 모델의 핵심 요소임. 이를 기반으로 "full objective"가 나옴-->
-<section id="overview">
-<h3>Overview<a class="headerlink" href="#overview" title="Permalink to this heading">#</a></h3>
-<figure class="align-default" id="id4">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/fig2.png"><img alt="../../_images/fig2.png" class="bg-primary mb-1" src="../../_images/fig2.png" style="width: 800px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 48 </span><span class="caption-text">CycleGAN 도식화 자료</span><a class="headerlink" href="#id4" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p>목표: <span class="math notranslate nohighlight">\(X\)</span>, <span class="math notranslate nohighlight">\(Y\)</span> 를 mapping 하는 function 을 학습하는 것</p></li>
-<li><p>용어 정리</p>
-<ul>
-<li><p>data 분포를 <span class="math notranslate nohighlight">\(x ~ p_{data}(x)\)</span>, <span class="math notranslate nohighlight">\(y ~ p_{data}(y)\)</span> 로 표기</p></li>
-<li><p><span class="math notranslate nohighlight">\(G : X -&gt; Y\)</span>, <span class="math notranslate nohighlight">\(F: Y -&gt; X\)</span> 는 generator</p></li>
-<li><p><span class="math notranslate nohighlight">\(D_X\)</span>, <span class="math notranslate nohighlight">\(D_Y\)</span> 는 discriminator</p></li>
-<li><p><span class="math notranslate nohighlight">\(D_X\)</span> 는 <span class="math notranslate nohighlight">\(X\)</span> 와 <span class="math notranslate nohighlight">\(F(y)\)</span> 그리고 <span class="math notranslate nohighlight">\(D_Y\)</span> 는 <span class="math notranslate nohighlight">\(y\)</span> 와 <span class="math notranslate nohighlight">\(G(x)\)</span> 를 구분하고, 다음과 같이 두 개의 목적식으로 학습합니다.</p>
-<ul>
-<li><p>adversarial loss: 생성된 이미지의 분포를 대상 domain 의 data distribution 과 일치시키기 위한 것.</p></li>
-<li><p>cycle consistency loss: 학습된 mapping <span class="math notranslate nohighlight">\(G\)</span> 와 <span class="math notranslate nohighlight">\(F\)</span> 가 서로 모순되는 것을 방지하기 위한 것.</p></li>
-</ul>
-</li>
-</ul>
-</li>
-</ul>
-</section>
-<section id="adversarial-loss">
-<h3>Adversarial Loss<a class="headerlink" href="#adversarial-loss" title="Permalink to this heading">#</a></h3>
-<p><span class="math notranslate nohighlight">\(G: X -&gt; Y\)</span> 와 <span class="math notranslate nohighlight">\(D_Y\)</span> 에 대한 목적식은 다음과 같습니다.</p>
-<figure class="align-default" id="mathcal-l-gan-loss-function">
-<img alt="L_GAN Loss function" src="https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FnvzuE%2Fbtr725OfuJy%2FI1IgwK5PIzXpzINWnJxysK%2Fimg.png" />
-<figcaption>
-<p><span class="caption-number">Fig. 49 </span><span class="caption-text"><span class="math notranslate nohighlight">\(\mathcal{L}_{GAN}\)</span> Loss function (source: <a class="reference external" href="https://arxiv.org/abs/1703.10593">https://arxiv.org/abs/1703.10593</a>)</span><a class="headerlink" href="#mathcal-l-gan-loss-function" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>이는 GAN 에서 쓰이는 loss function 를 사용하지만, 차이점이 있다면 <span class="math notranslate nohighlight">\(X -&gt; Y\)</span> 로 갈 때와 <span class="math notranslate nohighlight">\(Y -&gt; X\)</span> 로 갈 때 총 두 개의 수식이 나옵니다. 다시 말해, <span class="math notranslate nohighlight">\(F: Y -&gt; X\)</span> 와 <span class="math notranslate nohighlight">\(D_X\)</span> 에 대해서도 <span class="math notranslate nohighlight">\(F\)</span>, <span class="math notranslate nohighlight">\(D_X\)</span> 를 넣은 동일한 수식을 사용합니다.</p>
-</section>
-<section id="cycle-consistency-loss">
-<h3>Cycle Consistency Loss<a class="headerlink" href="#cycle-consistency-loss" title="Permalink to this heading">#</a></h3>
-<figure class="align-default" id="id5">
-<a class="bg-primary mb-1 reference internal image-reference" href="https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2Fmq8pC%2Fbtr724Pl3Q2%2FUSK4TDRaUK860iIdvG0vV0%2Fimg.png"><img alt="https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2Fmq8pC%2Fbtr724Pl3Q2%2FUSK4TDRaUK860iIdvG0vV0%2Fimg.png" class="bg-primary mb-1" src="https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2Fmq8pC%2Fbtr724Pl3Q2%2FUSK4TDRaUK860iIdvG0vV0%2Fimg.png" style="width: 600px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 50 </span><span class="caption-text">cycle consistency loss function</span><a class="headerlink" href="#id5" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p>앞서 말했듯이, mapping distribution 에 제한을 두어 최대한 우리가 원하는 이미지를 생성하기 위해 사용되는 loss function 입니다.</p></li>
-<li><p>예비 실험에서 L1 norm 을 adversarial loss 로 대체해봤는데, 성능 향상을 관찰할 수 없었다고 합니다.</p></li>
-<li><p>cycle consistency loss 를 통해 유도된 결과는 아래 그림에서 볼 수 있습니다.</p></li>
-</ul>
-<figure class="align-default" id="id6">
-<a class="bg-primary mb-1 reference internal image-reference" href="https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FzsgD6%2Fbtr8ay8PEBE%2F3mAKd1YSAiCK4ZXeIg84s1%2Fimg.png"><img alt="https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FzsgD6%2Fbtr8ay8PEBE%2F3mAKd1YSAiCK4ZXeIg84s1%2Fimg.png" class="bg-primary mb-1" src="https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FzsgD6%2Fbtr8ay8PEBE%2F3mAKd1YSAiCK4ZXeIg84s1%2Fimg.png" style="width: 600px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 51 </span><span class="caption-text">cycle consistency loss result</span><a class="headerlink" href="#id6" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</section>
-<section id="full-objective">
-<h3>Full Objective<a class="headerlink" href="#full-objective" title="Permalink to this heading">#</a></h3>
-<figure class="align-default" id="id7">
-<a class="bg-primary mb-1 reference internal image-reference" href="https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FUyaOu%2Fbtr724Pl3Rj%2FigjKaeukv5m8Cbdzulp5jK%2Fimg.png"><img alt="https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FUyaOu%2Fbtr724Pl3Rj%2FigjKaeukv5m8Cbdzulp5jK%2Fimg.png" class="bg-primary mb-1" src="https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FUyaOu%2Fbtr724Pl3Rj%2FigjKaeukv5m8Cbdzulp5jK%2Fimg.png" style="width: 600px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 52 </span><span class="caption-text">full objective function</span><a class="headerlink" href="#id7" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>이때 consistency loss 앞에 붙은 가중치 <span class="math notranslate nohighlight">\(\lambda\)</span> 는 GAN Loss 와의 상대적 중요도에 따라 결정됩니다.</p>
-</section>
-</section>
-<section id="implementation">
-<h2>Implementation<a class="headerlink" href="#implementation" title="Permalink to this heading">#</a></h2>
-<section id="network-architecture">
-<h3>Network Architecture<a class="headerlink" href="#network-architecture" title="Permalink to this heading">#</a></h3>
-<p>Baseline architecture 로서 neural style transfer 와 super-resolution 에 인상적인 결과를 보여준 논문(<a class="reference external" href="https://arxiv.org/abs/1603.08155">https://arxiv.org/abs/1603.08155</a>) 에서 사용된 구조를 채택합니다.</p>
-<ul class="simple">
-<li><p>3 개의 convolutions and several residual blocks,</p></li>
-<li><p>fractionally-strided convolution with stride 1/2,</p></li>
-<li><p>feature 를 RGB 로 매핑하는 one convolution layer.</p></li>
-<li><p>6 blocks for 128 x 128 image // 9 blocks for 256 x 256 및 고해상도 학습 image.</p></li>
-<li><p>instance normalization</p></li>
-</ul>
-</section>
-<section id="training-details">
-<h3>Training details<a class="headerlink" href="#training-details" title="Permalink to this heading">#</a></h3>
-<p>모델 학습을 안정화시키기 위해 아래와 같은 테크닉을 추가로 적용합니다.</p>
-<ul class="simple">
-<li><p>Loss function <span class="math notranslate nohighlight">\(\mathcal{L}_{GAN}\)</span> 에서 nll loss 를 least-squared loss 로 변경</p></li>
-<li><p>생성된 이미지 중 가장 최근의 50개를 따로 저장해 discriminator 가 이를 한꺼번에 분류(모델 진동을 최소화하기 위함)</p></li>
-</ul>
-</section>
-<section id="least-square-loss">
-<h3>(참고) least-square loss 추가 설명<a class="headerlink" href="#least-square-loss" title="Permalink to this heading">#</a></h3>
-<ul class="simple">
-<li><p><a class="reference external" href="https://velog.io/&#64;sjinu/CycleGAN">https://velog.io/&#64;sjinu/CycleGAN</a>
--<a class="reference external" href="https://ysbsb.github.io/gan/2022/02/23/LSGAN.html">https://ysbsb.github.io/gan/2022/02/23/LSGAN.html</a></p></li>
-</ul>
-<p>LSGAN 을 참고했으며, 논문에서는 generator 업데이트시 더 안정적인 학습과 quality 높은 결과를 생성한다고 합니다.</p>
-<figure class="align-default" id="id8">
-<a class="bg-primary mb-1 reference internal image-reference" href="https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2F6JIT8%2Fbtr73nVyIqs%2FKfcPK33U3OY0AjKhjFlUh1%2Fimg.png"><img alt="https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2F6JIT8%2Fbtr73nVyIqs%2FKfcPK33U3OY0AjKhjFlUh1%2Fimg.png" class="bg-primary mb-1" src="https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2F6JIT8%2Fbtr73nVyIqs%2FKfcPK33U3OY0AjKhjFlUh1%2Fimg.png" style="width: 600px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 53 </span><span class="caption-text">출처: <a class="reference external" href="https://velog.io/&#64;sjinu/CycleGAN">https://velog.io/&#64;sjinu/CycleGAN</a></span><a class="headerlink" href="#id8" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>(원래 Discriminator 는 이보다 더 고차원이지만) 간략히 2차원을 표방하면 결정경계를 위와 같이 나타낼 수 있습니다. 윗 쪽이 가짜 영역, 아래 쪽이 진짜 영역입니다 이 때, 아래에 보면 진짜 데이터 샘플과 거리가 먼 가짜 데이터 샘플이 존재합니다. 즉, NLL Loss 를 사용한다면, Generator 의 입장에서는 이미 Discriminator 를 잘 속이고 있기 때문에 학습할 필요가 없게 됩니다. 즉, Vanishing Gradient 현상이 일어나기 때문에, Discriminator 를 잘 속인다는 이유만으로, 안 좋은 샘플을 생성하는 것에 대해 패널티를 줄 수가 없게 됩니다. 이 때, LSGAN 을 사용한다면 실제 데이터 분포와 가짜 데이터 샘플이 거리가 먼 것에 대해서도 패널티를 주게 됩니다.</p>
-<figure class="align-default" id="id9">
-<a class="bg-primary mb-1 reference internal image-reference" href="https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FHsUiX%2Fbtr77PQw99h%2F0Er06IYIGYlBGw2rVufXc0%2Fimg.png"><img alt="https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FHsUiX%2Fbtr77PQw99h%2F0Er06IYIGYlBGw2rVufXc0%2Fimg.png" class="bg-primary mb-1" src="https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FHsUiX%2Fbtr77PQw99h%2F0Er06IYIGYlBGw2rVufXc0%2Fimg.png" style="width: 600px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 54 </span><span class="caption-text">출처: <a class="reference external" href="https://velog.io/&#64;sjinu/CycleGAN">https://velog.io/&#64;sjinu/CycleGAN</a></span><a class="headerlink" href="#id9" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>그리고 모든 실험에 대해서 <span class="math notranslate nohighlight">\(\lambda\)</span> 를 10 으로 설정하고, batch size = 1, 그리고 Adam solver 를 사용했습니다. 첫 100 epoch 동안에는 learning rate 를 0.0002 로 설정했고, 다음 100 epoch 마다 0 으로 조금식 수렴하게 scheduling 하였습니다.</p>
-</section>
-</section>
-<section id="evaluation">
-<h2>Evaluation<a class="headerlink" href="#evaluation" title="Permalink to this heading">#</a></h2>
-<p>모델 성능 평가를 위해 아래와 같은 세 개의 지표를 기반으로 기존의 CoGAN, SimGAN, pix2pix baseline 모델과 비교했습니다. 그 외 loss function 에 대한 ablation study 도 수행했습니다.</p>
-<ol class="arabic simple">
-<li><p>AMT perceptual studies: 참가자들은 실제 사진이미지 vs 가짜 이미지, 또는 지도 이미지 vs 가짜이미지에 노출된 후 진짜라고 생각되는 이미지를 선택하게 합니다.</p></li>
-<li><p>FCN Score: 1번 study 가 테스트에 있어 매우 좋은 기준임에도 불구하고, 이번에는 사람을 대상으로 한 실험이 아닌 양적인 기준을 사용합니다. 우선적으로 FCN 모델을 통해 생성된 사진에 대한 레이블 맵을 예측합니다. 이 레이블 맵은 아래에서 설명하는 standard semantic segmentation metric 을 사용하여 input ground truth label 과 비교할 수 있습니다. “도로 상의 자동차”라는 label 에서 사진 이미지를 생성하면, 생성된 이미지에 적용된 FCN 이 “도로 상의 자동차”를 감지하면 성공한 것입니다.</p></li>
-<li><p>Semantic segmentation metric: pixel 당 정확도, class 당 정확도, 그리고 IoU(Intersection-Over-Union) 를 포함하는 cityscapes benchmark 의 표준 metric 를 사용합니다.</p></li>
-</ol>
-<section id="comparison-against-baselines">
-<h3>Comparison against baselines<a class="headerlink" href="#comparison-against-baselines" title="Permalink to this heading">#</a></h3>
-<figure class="align-default" id="id10">
-<a class="bg-primary mb-1 reference internal image-reference" href="https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FcZUe4E%2Fbtr8eXUQ6ou%2FikWglP8dEglGUny4dRkMjK%2Fimg.png"><img alt="https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FcZUe4E%2Fbtr8eXUQ6ou%2FikWglP8dEglGUny4dRkMjK%2Fimg.png" class="bg-primary mb-1" src="https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FcZUe4E%2Fbtr8eXUQ6ou%2FikWglP8dEglGUny4dRkMjK%2Fimg.png" style="width: 800px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 55 </span><span class="caption-text">Comparison aginst baselines</span><a class="headerlink" href="#id10" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>타 baseline 모델보다 성능이 좋을 뿐만 아니라, fully supervised 모델인 pix2pix 와 비슷한 품질의 translation 성능을 보여줍니다.</p>
-<ul class="simple">
-<li><p>AMT Score *</p></li>
-</ul>
-<figure class="align-default" id="id11">
-<a class="bg-primary mb-1 reference internal image-reference" href="https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2Fb1Zhnx%2Fbtr8eWhk9ID%2FtauuT1N0W2qxRekj3IAnc1%2Fimg.png"><img alt="https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2Fb1Zhnx%2Fbtr8eWhk9ID%2FtauuT1N0W2qxRekj3IAnc1%2Fimg.png" class="bg-primary mb-1" src="https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2Fb1Zhnx%2Fbtr8eWhk9ID%2FtauuT1N0W2qxRekj3IAnc1%2Fimg.png" style="width: 600px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 56 </span><span class="caption-text">AMT score</span><a class="headerlink" href="#id11" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>Table 1 은 AMT perceptual realism task 에 대한 성능을 나타냅니다. CycleGAN 의 지도에서 항공 사진, 그리고 항공 사진에서 지도 translation 결과에서 약 1/4의 참가자를 속일 수 있었던 반면에 그 외 모든 baseline 모델은 참가자를 거의 속일 수 없었습니다.</p>
-<ul class="simple">
-<li><p>FCN Score *</p></li>
-</ul>
-<figure class="align-default" id="id12">
-<a class="bg-primary mb-1 reference internal image-reference" href="https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FqzYO1%2Fbtr728xs5iD%2FN5NDNYwUYLnEZfnOVYONM0%2Fimg.png"><img alt="https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FqzYO1%2Fbtr728xs5iD%2FN5NDNYwUYLnEZfnOVYONM0%2Fimg.png" class="bg-primary mb-1" src="https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FqzYO1%2Fbtr728xs5iD%2FN5NDNYwUYLnEZfnOVYONM0%2Fimg.png" style="width: 500px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 57 </span><span class="caption-text">FCN scores</span><a class="headerlink" href="#id12" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>Table 2, Table 3 는 각각 도시 풍경에 대한 label -&gt; photo, 그리고 photo -&gt; label translation task 의 성능을 보여줍니다. 두 경우 모두 CycleGAN 이 baseline 들의 성능을 능가합니다.</p>
-</section>
-<section id="ablation-study-analysis-of-the-loss-function">
-<h3>Ablation Study - Analysis of the loss function<a class="headerlink" href="#ablation-study-analysis-of-the-loss-function" title="Permalink to this heading">#</a></h3>
-<figure class="align-default" id="id13">
-<a class="bg-primary mb-1 reference internal image-reference" href="https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FcjQ9QQ%2Fbtr79farEX8%2FkQ6SWARw9QK9jqRqHlZoi1%2Fimg.png"><img alt="https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FcjQ9QQ%2Fbtr79farEX8%2FkQ6SWARw9QK9jqRqHlZoi1%2Fimg.png" class="bg-primary mb-1" src="https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FcjQ9QQ%2Fbtr79farEX8%2FkQ6SWARw9QK9jqRqHlZoi1%2Fimg.png" style="width: 500px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 58 </span><span class="caption-text">Analysis of loss function</span><a class="headerlink" href="#id13" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>GAN 과 cycle consistency loss 의 중요성을 보여주는 ablation study 입니다. GAN loss 그리고 cycle consistency loss 를 각각 제거하면 성능이 크게 저하되는 부분을 확인할 수 있습니다. 또한 한쪽 방향에 대해서만 GAN + forward cycle 만 돌렸을 때와 GAN + backward cycle 만 돌렸을 때 학습의 불안정성을 보이고, mode collapse 를 유발하는 것을 확인할 수 있었다고 합니다.</p>
-</section>
-<section id="image-reconstruction-quality">
-<h3>Image reconstruction quality<a class="headerlink" href="#image-reconstruction-quality" title="Permalink to this heading">#</a></h3>
-<figure class="align-default" id="id14">
-<a class="bg-primary mb-1 reference internal image-reference" href="https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2Fyy7lt%2Fbtr73PdbuJp%2F5bmDtKSlQJJnd5yKvPgfB1%2Fimg.png"><img alt="https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2Fyy7lt%2Fbtr73PdbuJp%2F5bmDtKSlQJJnd5yKvPgfB1%2Fimg.png" class="bg-primary mb-1" src="https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2Fyy7lt%2Fbtr73PdbuJp%2F5bmDtKSlQJJnd5yKvPgfB1%2Fimg.png" style="width: 500px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 59 </span><span class="caption-text">Results on Cycle Consistency</span><a class="headerlink" href="#id14" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>Reconctructed 된 이미지 예시들입니다. 지도 -&gt; 항공 사진과 같이 하나의 도메인이 훨씬 더 다양한 정보를 나타내는 경우에도 재구성된 이미지가 훈련 및 테스트 시 모두 원래 입력 <span class="math notranslate nohighlight">\(x\)</span> 에 가깝게 복원되는 경우가 많았습니다.</p>
-</section>
-<section id="additional-results-on-paired-datasets">
-<h3>Additional results on paired datasets<a class="headerlink" href="#additional-results-on-paired-datasets" title="Permalink to this heading">#</a></h3>
-<figure class="align-default" id="id15">
-<a class="bg-primary mb-1 reference internal image-reference" href="https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FbqNrhb%2Fbtr72YaInQa%2Fk8b4K99KrAsD9C0SHINtt1%2Fimg.png"><img alt="https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FbqNrhb%2Fbtr72YaInQa%2Fk8b4K99KrAsD9C0SHINtt1%2Fimg.png" class="bg-primary mb-1" src="https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FbqNrhb%2Fbtr72YaInQa%2Fk8b4K99KrAsD9C0SHINtt1%2Fimg.png" style="width: 600px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 60 </span><span class="caption-text">Additional results on paired datasets</span><a class="headerlink" href="#id15" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>Figure 8 은 CMP Facade Database 의 건축 레이블 &lt;-&gt; 사진, 그리고 UT Zapoos50K dataset 의 edge &lt;-&gt; 신발 을 비롯하여 pix2pix 에 사용된 paired dataset 에 대한 몇 가지 예시 결과를 보여줍니다. CycleGAN 이 생성한 이미지 품질이 fully supervised 된 pix2pix 에 대응하는 성능을 보여주는 것을 확인할 수 있습니다.</p>
-<!---->
-<!--## Applications-->
-<!---->
-<!--Paired data 가 없는 상태에서 CycleGAN 의 application 예시 결과들입니다. -->
-<!---->
-<!--### Collection style transfer-->
-<!---->
-<!--Neural Style Transfer 에 대한 최근 작업과 달리, CycleGAN 은 선택한 단일 예술 작품의 스타일을 전달하는 대신 전체 예술 작품 컬렉션의 스타일을 모방하는 방법을 학습합니다. 그래서 '별이 빛나는 밤에'처럼 그리는 것 보다 '반 고흐'를 따라하는 느낌을 따라한다.-->
-<!---->
-<!--### Object transfiguration-->
-<!---->
-<!---->
-<!--Turmukhambetov et al. \[50\] 하나의 객체를 동일한 범주의 다른 객체로 변환하는 부분 공간 모델을 제안하는 반면, 우리의 방법은 시각적으로 유사한 두 범주 사이의 객체 변형에 중점을 둡니다.  -->
-<!--Turning a horse video into a zebra video (by CycleGAN)-->
-<!---->
-<!--### Season transfer-->
-<!---->
-<!---->
-<!--### Photo generation from paintings-->
-<!---->
-<!---->
-<!--그림을 사진으로 바꿀 때, 입력과 출력 간 색 구성을 보존하기 위해 추가적인 loss를 도입하는 것이 유용하다는 것을 발견할 수 있습니다. 특히, Taigman et al. \[49\]의 기술을 채택하여 제너레이터가 대상 도메인의 실제 샘플을 입력으로 제공받을 때 identity mapping 근처에 있도록 정규화합니다. 즉, **Lidentity(G,F) = Ey\_pdata(y)\[∥G(y) − y∥1\] + Ex∼pdata (x) \[∥F (x) − x∥1 \]**입니다.-->
-<!---->
-<!--Lidentity가 없으면, 생성자 G와 F는 굳이 필요하지 않을 때 입력 이미지의 색조를 자유롭게 변경할 수 있습니다. 예를 들어, Monet의 그림과 Flickr 사진 간의 매핑을 학습할 때, 생성자는 종종 낮에 그린 그림을 일몰 시간에 찍은 사진에 매핑합니다. 왜냐하면 적대적 손실과 사이클 일관성 손실 아래에서 이러한 매핑이 동등하게 유효할 수 있기 때문입니다. 이러한 identity mapping 손실의 효과는 그림 9에서 보여집니다. figure 12, figure 9는 학습 데이터셋에 포함되어 있는 그림, 하지만 다른 set은 오직 test set으로부터 그려진 그림. training set이 paired datqa를 포함하고 있지 않아서, 학습 세트 그림에 대한 타당한 translation을 찾는 것은 쉬운 일이 아니다. 실제로, Monet이 새 그림을 그릴 수 없기 때문에, 보지 않은 test set 그림에 대한 generalization은 not pressing problem-->
-<!---->
-<!--### Photo enhancement-->
-<!---->
-<!--우리는 우리의 방법이 얕은 깊이의 초점을 가진 사진을 생성하는 데 사용될 수 있음을 보여줍니다. 우리는 Flickr에서 다운로드한 꽃 사진을 기반으로 모델을 훈련합니다. 소스 도메인은 스마트폰으로 찍힌 꽃 사진으로 구성되어 있으며, 보통 작은 조리개로 인해 깊은 DoF(초점 깊이)를 가지고 있습니다. 대상은 조리개가 큰 DSLR로 촬영된 사진을 포함합니다. 우리 모델은 스마트폰으로 촬영된 사진으로부터 더 얕은 깊이의 초점을 가진 사진을 성공적으로 생성합니다.-->
-<!---->
-<!--> : shallow depth of field: 얕은 초점. 초점이 맞은 대상과 배경이 흐릿하게 보이는 효과. 인물 사진 / 작품 사진에 활용. 구목하고자 하는 대상을 강조하기 위해 활용.  -->
-<!--> 따라서 source domain은 스마트폰의 **작은 조리개로 깊은 초점** \--> target은 **조리개가 커서 얕은 초점**.-->
-<!---->
-<!--### Comparison with Gatys-->
-</section>
-</section>
-<section id="limitations-and-discusssion">
-<h2>Limitations and Discusssion<a class="headerlink" href="#limitations-and-discusssion" title="Permalink to this heading">#</a></h2>
-<figure class="align-default" id="id16">
-<a class="bg-primary mb-1 reference internal image-reference" href="https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FdJc1k5%2Fbtr76zUPUWj%2F27Mk0oQ5VanEHANWWmaseK%2Fimg.png"><img alt="https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FdJc1k5%2Fbtr76zUPUWj%2F27Mk0oQ5VanEHANWWmaseK%2Fimg.png" class="bg-primary mb-1" src="https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FdJc1k5%2Fbtr76zUPUWj%2F27Mk0oQ5VanEHANWWmaseK%2Fimg.png" style="width: 800px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 61 </span><span class="caption-text">Limitations and Discussion</span><a class="headerlink" href="#id16" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>이 방법은 많은 경우에 흥미로운 결과를 얻을 수 있지만, 결과가 균일하게 좋은 것은 아니었습니다.</p>
-<ol class="arabic simple">
-<li><p>개 &lt;-&gt; 고양이 translation task 와 같은 경우는 input image 에서 최소한의 변화만 주어, 사람이 보았을 때 실제로 변화가 안되는 경우도 있었고, 형체가 애매해진 경우도 있었습니다. 이를 보았을 때, geometry 가 반영되는 눈, 코, 입 등의 세부적인 구조에 대한 정확히 구현하는데 한계가 있어 보입니다.</p></li>
-<li><p>말 &lt;–&gt; 얼룩말 translation 예제의 경우, 말은 사람이 타는 모습이 많았는데 얼룩말의 경우는 사람이 타는 사진이 없다보니, 사람 뿐만 아니라 배경도 얼룩 그림을 그리거나 단순히 얼룩말에서 노랗게 칠한 경우가 존재합니다.</p></li>
-<li><p>때때로 photo -&gt; image translation task 에서 나무와 건물의 label 을 바꾸는 경우도 있었습니다.<br />
-이러한 모호성을 해결하려면 weak semantic supervision 이 필요할 수도 있을 것 같습니다.</p></li>
-</ol>
-<p>그럼에도 불구하고 해당 논문은 완전히 paired 되지 않은 “unsupervised” setting 에서도 image translation task 의 한계를 늘리는데 기여합니다.</p>
-</section>
-</section>
-
-    <script type="text/x-thebe-config">
-    {
-        requestKernel: true,
-        binderOptions: {
-            repo: "binder-examples/jupyter-stacks-datascience",
-            ref: "master",
-        },
-        codeMirrorConfig: {
-            theme: "abcdef",
-            mode: "python"
-        },
-        kernelOptions: {
-            name: "python3",
-            path: "./docs/review"
-        },
-        predefinedOutput: true
-    }
-    </script>
-    <script>kernelName = 'python3'</script>
-
-                </article>
-              
-
-              
-              
-              
-              
-                <footer class="prev-next-footer d-print-none">
-                  
-<div class="prev-next-area">
-    <a class="left-prev"
-       href="A_Study_on_the_Evaluation_of_Generative_Models.html"
-       title="previous page">
-      <i class="fa-solid fa-angle-left"></i>
-      <div class="prev-next-info">
-        <p class="prev-next-subtitle">previous</p>
-        <p class="prev-next-title">A Study on the Evaluation of Generative Models</p>
-      </div>
-    </a>
-    <a class="right-next"
-       href="StyleGAN.html"
-       title="next page">
-      <div class="prev-next-info">
-        <p class="prev-next-subtitle">next</p>
-        <p class="prev-next-title">StyleGAN</p>
-      </div>
-      <i class="fa-solid fa-angle-right"></i>
-    </a>
-</div>
-                </footer>
-              
-            </div>
-            
-            
-              
-                <div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
-
-
-  <div class="sidebar-secondary-item">
-  <div class="page-toc tocsection onthispage">
-    <i class="fa-solid fa-list"></i> Contents
-  </div>
-  <nav class="bd-toc-nav page-toc">
-    <ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#abstract">Abstract</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#related-work">Related work</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#background">Background</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#image-to-image-translation">Image-to-Image Translation</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#mode-collapse">Mode Collapse</a></li>
-</ul>
-</li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#method">Method</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#overview">Overview</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#adversarial-loss">Adversarial Loss</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#cycle-consistency-loss">Cycle Consistency Loss</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#full-objective">Full Objective</a></li>
-</ul>
-</li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#implementation">Implementation</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#network-architecture">Network Architecture</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#training-details">Training details</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#least-square-loss">(참고) least-square loss 추가 설명</a></li>
-</ul>
-</li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#evaluation">Evaluation</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#comparison-against-baselines">Comparison against baselines</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#ablation-study-analysis-of-the-loss-function">Ablation Study - Analysis of the loss function</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#image-reconstruction-quality">Image reconstruction quality</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#additional-results-on-paired-datasets">Additional results on paired datasets</a></li>
-</ul>
-</li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#limitations-and-discusssion">Limitations and Discusssion</a></li>
-</ul>
-  </nav></div>
-
-</div></div>
-              
-            
-          </div>
-          <footer class="bd-footer-content">
-            
-<div class="bd-footer-content__inner container">
-  
-  <div class="footer-item">
-    
-<p class="component-author">
-By PseudoLab
-</p>
-
-  </div>
-  
-  <div class="footer-item">
-    
-
-  <p class="copyright">
-    
-      © Copyright 2022.
-      <br/>
-    
-  </p>
-
-  </div>
-  
-  <div class="footer-item">
-    
-  </div>
-  
-  <div class="footer-item">
-    
-  </div>
-  
-</div>
-          </footer>
-        
-
-      </main>
-    </div>
-  </div>
-  
-  <!-- Scripts loaded after <body> so the DOM is not blocked -->
-  <script src="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b"></script>
-<script src="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b"></script>
-
-  <footer class="bd-footer">
-  </footer>
-  </body>
+
+
+<!DOCTYPE html>
+
+
+<html lang="en" data-content_root="" >
+
+  <head>
+    <meta charset="utf-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.17.1: http://docutils.sourceforge.net/" />
+
+    <title>CycleGAN &#8212; Text-to-Image Generation-feat-Diffusion</title>
+  
+  
+  
+  <script data-cfasync="false">
+    document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
+    document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
+  </script>
+  
+  <!-- Loaded before other Sphinx assets -->
+  <link href="../../_static/styles/theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+<link href="../../_static/styles/bootstrap.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+<link href="../../_static/styles/pydata-sphinx-theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+
+  
+  <link href="../../_static/vendor/fontawesome/6.5.2/css/all.min.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+  <link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.woff2" />
+<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.woff2" />
+<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.woff2" />
+
+    <link rel="stylesheet" type="text/css" href="../../_static/pygments.css" />
+    <link rel="stylesheet" href="../../_static/styles/sphinx-book-theme.css?digest=14f4ca6b54d191a8c7657f6c759bf11a5fb86285" type="text/css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/togglebutton.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/mystnb.4510f1fc1dee50b3e5859aac5469c37c29e427902b24a333a5f9fcb2f0b3ac41.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/sphinx-thebe.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/design-style.4045f2051d55cab465a707391d5b2007.min.css" />
+  
+  <!-- Pre-loaded scripts that we'll load fully later -->
+  <link rel="preload" as="script" href="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b" />
+<link rel="preload" as="script" href="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b" />
+  <script src="../../_static/vendor/fontawesome/6.5.2/js/all.min.js?digest=dfe6caa3a7d634c4db9b"></script>
+
+    <script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
+    <script src="../../_static/jquery.js"></script>
+    <script src="../../_static/underscore.js"></script>
+    <script src="../../_static/_sphinx_javascript_frameworks_compat.js"></script>
+    <script src="../../_static/doctools.js"></script>
+    <script src="../../_static/clipboard.min.js"></script>
+    <script src="../../_static/copybutton.js"></script>
+    <script src="../../_static/scripts/sphinx-book-theme.js?digest=5a5c038af52cf7bc1a1ec88eea08e6366ee68824"></script>
+    <script>let toggleHintShow = 'Click to show';</script>
+    <script>let toggleHintHide = 'Click to hide';</script>
+    <script>let toggleOpenOnPrint = 'true';</script>
+    <script src="../../_static/togglebutton.js"></script>
+    <script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
+    <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
+    <script src="../../_static/design-tabs.js"></script>
+    <script>const THEBE_JS_URL = "https://unpkg.com/thebe@0.8.2/lib/index.js"
+const thebe_selector = ".thebe,.cell"
+const thebe_selector_input = "pre"
+const thebe_selector_output = ".output, .cell_output"
+</script>
+    <script async="async" src="../../_static/sphinx-thebe.js"></script>
+    <script>window.MathJax = {"options": {"processHtmlClass": "tex2jax_process|mathjax_process|math|output_area"}}</script>
+    <script defer="defer" src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
+    <script>DOCUMENTATION_OPTIONS.pagename = 'docs/review/cycleGAN';</script>
+    <link rel="shortcut icon" href="../../_static/PseudoLab_logo.png"/>
+    <link rel="index" title="Index" href="../../genindex.html" />
+    <link rel="search" title="Search" href="../../search.html" />
+    <link rel="next" title="StyleGAN" href="StyleGAN.html" />
+    <link rel="prev" title="A Study on the Evaluation of Generative Models" href="A_Study_on_the_Evaluation_of_Generative_Models.html" />
+  <meta name="viewport" content="width=device-width, initial-scale=1"/>
+  <meta name="docsearch:language" content="en"/>
+  </head>
+  
+  
+  <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
+
+  
+  
+  <div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
+  
+  <div id="pst-scroll-pixel-helper"></div>
+  
+  <button type="button" class="btn rounded-pill" id="pst-back-to-top">
+    <i class="fa-solid fa-arrow-up"></i>Back to top</button>
+
+  
+  <input type="checkbox"
+          class="sidebar-toggle"
+          id="pst-primary-sidebar-checkbox"/>
+  <label class="overlay overlay-primary" for="pst-primary-sidebar-checkbox"></label>
+  
+  <input type="checkbox"
+          class="sidebar-toggle"
+          id="pst-secondary-sidebar-checkbox"/>
+  <label class="overlay overlay-secondary" for="pst-secondary-sidebar-checkbox"></label>
+  
+  <div class="search-button__wrapper">
+    <div class="search-button__overlay"></div>
+    <div class="search-button__search-container">
+<form class="bd-search d-flex align-items-center"
+      action="../../search.html"
+      method="get">
+  <i class="fa-solid fa-magnifying-glass"></i>
+  <input type="search"
+         class="form-control"
+         name="q"
+         id="search-input"
+         placeholder="Search this book..."
+         aria-label="Search this book..."
+         autocomplete="off"
+         autocorrect="off"
+         autocapitalize="off"
+         spellcheck="false"/>
+  <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
+</form></div>
+  </div>
+
+  <div class="pst-async-banner-revealer d-none">
+  <aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
+</div>
+
+  
+    <header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
+    </header>
+  
+
+  <div class="bd-container">
+    <div class="bd-container__inner bd-page-width">
+      
+      
+      
+      <div class="bd-sidebar-primary bd-sidebar">
+        
+
+  
+  <div class="sidebar-header-items sidebar-primary__section">
+    
+    
+    
+    
+  </div>
+  
+    <div class="sidebar-primary-items__start sidebar-primary__section">
+        <div class="sidebar-primary-item">
+
+  
+    
+  
+
+<a class="navbar-brand logo" href="../../intro.html">
+  
+  
+  
+  
+  
+    
+    
+      
+    
+    
+    <img src="../../_static/PseudoLab_logo.png" class="logo__image only-light" alt="Text-to-Image Generation-feat-Diffusion - Home"/>
+    <script>document.write(`<img src="../../_static/PseudoLab_logo.png" class="logo__image only-dark" alt="Text-to-Image Generation-feat-Diffusion - Home"/>`);</script>
+  
+  
+</a></div>
+        <div class="sidebar-primary-item"><nav class="bd-links" id="bd-docs-nav" aria-label="Main">
+    <div class="bd-toc-item navbar-nav active">
+        
+        <ul class="nav bd-sidenav bd-sidenav__home-link">
+            <li class="toctree-l1">
+                <a class="reference internal" href="../../intro.html">
+                    Welcome to PseudoDiffusers!!
+                </a>
+            </li>
+        </ul>
+        <p aria-level="2" class="caption" role="heading"><span class="caption-text">Preliminary Works</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="vae.html">VAE</a></li>
+<li class="toctree-l1"><a class="reference internal" href="gan.html">GAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DDPM.html">DDPM</a></li>
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DDIM.html">DDIM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="A_Study_on_the_Evaluation_of_Generative_Models.html">A Study on the Evaluation of Generative Models</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Image Generation</span></p>
+<ul class="current nav bd-sidenav">
+<li class="toctree-l1 current active"><a class="current reference internal" href="#">CycleGAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="StyleGAN.html">StyleGAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="diffusion_beats_GANs.html">Diffusion Models Beat GANs on Image Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="dalle.html">DALL-E</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DALLE2.html">DALL-E 2</a></li>
+<li class="toctree-l1"><a class="reference internal" href="dreambooth.html">DreamBooth</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ControlNet.html">ControlNet</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Latent_Diffusion_Model.html">Introduction</a></li>
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Textual_Inversion.html">Textual Inversion</a></li>
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="CustomDiffusion.html">Custom Diffusion</a></li>
+<li class="toctree-l1"><a class="reference internal" href="LoRA.html">LoRA</a></li>
+
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="I-DDPM.html">I-DDPM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="StyO.html">StyO</a></li>
+<li class="toctree-l1"><a class="reference internal" href="imagen.html">Imagen</a></li>
+<li class="toctree-l1"><a class="reference internal" href="imagen_editor.html">Imagen Editor</a></li>
+<li class="toctree-l1"><a class="reference internal" href="SDEdit.html">SDEdit</a></li>
+<li class="toctree-l1"><a class="reference internal" href="SDXL.html">SDXL</a></li>
+<li class="toctree-l1"><a class="reference internal" href="t2i_adapter.html">T2I-Adapter</a></li>
+<li class="toctree-l1"><a class="reference internal" href="IP_Adapter.html">IP-Adapter</a></li>
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="HyperDreamBooth.html">HyperDreamBooth</a></li>
+<li class="toctree-l1"><a class="reference internal" href="CM3leon.html">CM3leon</a></li>
+
+<li class="toctree-l1"><a class="reference internal" href="Synthetic_Data_from_Diffusion_Models_Improves_ImageNet_Classification.html">Synthetic Data from Diffusion Models Improves ImageNet Classification</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="GLIDE.html">GLIDE</a></li>
+<li class="toctree-l1"><a class="reference internal" href="BBDM.html">BBDM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Your_Diffusion_Model_is_Secretly_a_Zero_Shot_Classifier.html">Your Diffusion Model is Secretly a Zero-Shot Classifier</a></li>
+<li class="toctree-l1"><a class="reference internal" href="progressive_distillation.html">Progressive Distillation for Fast Sampling of Diffusion Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ConceptLab.html">ConceptLab</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Diffusion_models_already_have_a_Semantic_Latent_Space.html">Diffusion Models already have a Semantic Latent Space</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Muse.html">Muse</a></li>
+
+
+<li class="toctree-l1"><a class="reference internal" href="GIGAGAN.html">Scaling up GANs for Text-to-Image Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="consistency_models.html">Consistency Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="latent_consistency_models.html">Latent Consistency Models</a></li>
+
+<li class="toctree-l1"><a class="reference internal" href="LLM_grounded_Diffusion.html">LLM Grounded Diffusion</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DiT.html">DiT</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="one-step-image-translation.html">One-Step Image Translation with Text-to-Image Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="LCM-LoRA.html">LCM-LoRA: A Universal Stable-Diffusion Acceleration Module</a></li>
+
+
+<li class="toctree-l1"><a class="reference internal" href="MimicBrush.html">MimicBrush: Zero-shot Image Editing with Reference Imitation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="one_step_diffusion_with_distribution_matching_distillation.html">One-step Diffusion with Distribution Matching Distillation</a></li>
+
+
+
+
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Video Generation</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="Make_A_Video.html">Make A Video</a></li>
+<li class="toctree-l1"><a class="reference internal" href="VideoLDM.html">VideoLDM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="AnimateDiff.html">AnimateDiff</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Animate_Anyone.html">Animate Anyone</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreaMoving.html">DreaMoving</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreamPose.html">DreamPose: Fashion Image-to-Video Synthesis via Stable Diffusion</a></li>
+
+
+
+
+
+
+
+
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">3D Generation</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="NeRF.html">NeRF : Representing Scenes as Neural Radiance Fields for View Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="3DGS.html">3D Gaussian Splatting for Real-Time Radiance Field Rendering</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Point_E.html">Point-E: A System for Generating 3D Point Clouds from Complex Prompts (Arxiv 2022)</a></li>
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Shap-E.html">Shap-E</a></li>
+
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DreamFusion.html"><strong>DreamFusion</strong></a></li>
+<li class="toctree-l1"><a class="reference internal" href="magic-3d.html">Magic3D</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreamBooth3D.html">Dream Booth 3D</a></li>
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="zero123plus.html">Zero123++</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ProlificDreamer.html">ProlificDreamer: High-Fidelity and Diverse Text-to-3D Generation with Variational Score Distillation</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DreamGaussian.html">DreamGaussian</a></li>
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Coin3D.html">Coin3D</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Experiments</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="../experiments/js_exp.html">Synthetic Data with Stable Diffusion for Foliar Disease Classification</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../experiments/swjo_exp.html">Training DreamBooth on Naver Webtoon Face Dataset</a></li>
+</ul>
+
+    </div>
+</nav></div>
+    </div>
+  
+  
+  <div class="sidebar-primary-items__end sidebar-primary__section">
+  </div>
+  
+  <div id="rtd-footer-container"></div>
+
+
+      </div>
+      
+      <main id="main-content" class="bd-main" role="main">
+        
+        
+
+<div class="sbt-scroll-pixel-helper"></div>
+
+          <div class="bd-content">
+            <div class="bd-article-container">
+              
+              <div class="bd-header-article d-print-none">
+<div class="header-article-items header-article__inner">
+  
+    <div class="header-article-items__start">
+      
+        <div class="header-article-item"><label class="sidebar-toggle primary-toggle btn btn-sm" for="__primary" title="Toggle primary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
+  <span class="fa-solid fa-bars"></span>
+</label></div>
+      
+    </div>
+  
+  
+    <div class="header-article-items__end">
+      
+        <div class="header-article-item">
+
+<div class="article-header-buttons">
+
+
+
+
+
+<div class="dropdown dropdown-source-buttons">
+  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Source repositories">
+    <i class="fab fa-github"></i>
+  </button>
+  <ul class="dropdown-menu">
+      
+      
+      
+      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion" target="_blank"
+   class="btn btn-sm btn-source-repository-button dropdown-item"
+   title="Source repository"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fab fa-github"></i>
+  </span>
+<span class="btn__text-container">Repository</span>
+</a>
+</li>
+      
+      
+      
+      
+      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion/issues/new?title=Issue%20on%20page%20%2Fdocs/review/cycleGAN.html&body=Your%20issue%20content%20here." target="_blank"
+   class="btn btn-sm btn-source-issues-button dropdown-item"
+   title="Open an issue"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-lightbulb"></i>
+  </span>
+<span class="btn__text-container">Open issue</span>
+</a>
+</li>
+      
+  </ul>
+</div>
+
+
+
+
+
+
+<div class="dropdown dropdown-download-buttons">
+  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Download this page">
+    <i class="fas fa-download"></i>
+  </button>
+  <ul class="dropdown-menu">
+      
+      
+      
+      <li><a href="../../_sources/docs/review/cycleGAN.md" target="_blank"
+   class="btn btn-sm btn-download-source-button dropdown-item"
+   title="Download source file"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-file"></i>
+  </span>
+<span class="btn__text-container">.md</span>
+</a>
+</li>
+      
+      
+      
+      
+      <li>
+<button onclick="window.print()"
+  class="btn btn-sm btn-download-pdf-button dropdown-item"
+  title="Print to PDF"
+  data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-file-pdf"></i>
+  </span>
+<span class="btn__text-container">.pdf</span>
+</button>
+</li>
+      
+  </ul>
+</div>
+
+
+
+
+<button onclick="toggleFullScreen()"
+  class="btn btn-sm btn-fullscreen-button"
+  title="Fullscreen mode"
+  data-bs-placement="bottom" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-expand"></i>
+  </span>
+
+</button>
+
+
+
+<script>
+document.write(`
+  <button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light"></i>
+    <i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark"></i>
+    <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"></i>
+  </button>
+`);
+</script>
+
+
+<script>
+document.write(`
+  <button class="btn btn-sm pst-navbar-icon search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <i class="fa-solid fa-magnifying-glass fa-lg"></i>
+  </button>
+`);
+</script>
+<label class="sidebar-toggle secondary-toggle btn btn-sm" for="__secondary"title="Toggle secondary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <span class="fa-solid fa-list"></span>
+</label>
+</div></div>
+      
+    </div>
+  
+</div>
+</div>
+              
+              
+
+<div id="jb-print-docs-body" class="onlyprint">
+    <h1>CycleGAN</h1>
+    <!-- Table of contents -->
+    <div id="print-main-content">
+        <div id="jb-print-toc">
+            
+            <div>
+                <h2> Contents </h2>
+            </div>
+            <nav aria-label="Page">
+                <ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#abstract">Abstract</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#related-work">Related work</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#background">Background</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#image-to-image-translation">Image-to-Image Translation</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#mode-collapse">Mode Collapse</a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#method">Method</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#overview">Overview</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#adversarial-loss">Adversarial Loss</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#cycle-consistency-loss">Cycle Consistency Loss</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#full-objective">Full Objective</a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#implementation">Implementation</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#network-architecture">Network Architecture</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#training-details">Training details</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#least-square-loss">(참고) least-square loss 추가 설명</a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#evaluation">Evaluation</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#comparison-against-baselines">Comparison against baselines</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#ablation-study-analysis-of-the-loss-function">Ablation Study - Analysis of the loss function</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#image-reconstruction-quality">Image reconstruction quality</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#additional-results-on-paired-datasets">Additional results on paired datasets</a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#limitations-and-discusssion">Limitations and Discusssion</a></li>
+</ul>
+            </nav>
+        </div>
+    </div>
+</div>
+
+              
+                
+<div id="searchbox"></div>
+                <article class="bd-article">
+                  
+  <div class="admonition-information admonition">
+<p class="admonition-title">Information</p>
+<ul class="simple">
+<li><p><strong>Title:</strong> Unpaired Image-to-Image Translation using Cycle-Consistent Adversarial Networks (ICCV 2017)</p></li>
+<li><p><strong>Reference</strong></p>
+<ul>
+<li><p>Paper: <a class="reference external" href="https://arxiv.org/abs/1703.10593">https://arxiv.org/abs/1703.10593</a></p></li>
+<li><p>Code: <a class="reference external" href="https://www.tensorflow.org/tutorials/generative/cyclegan?hl=ko">TensorFlow CycleGAN tutorial</a></p></li>
+<li><p><a class="reference external" href="https://velog.io/&#64;sjinu/CycleGAN">[논문리뷰] Cycle GAN: Unpaired Image-to-Image Translation using Cycle-Consistent Adversarial Networks</a>
+<a class="reference external" href="https://comlini8-8.tistory.com/9">CycleGAN을 만든 사람이 한국인이라고? CycleGAN 논문 뜯어보기</a></p></li>
+</ul>
+</li>
+<li><p><strong>Author:</strong> KwangSu Mun</p></li>
+<li><p><strong>Author:</strong> ChangHwan Lee</p></li>
+<li><p><strong>Last updated on May. 16, 2024</strong></p></li>
+</ul>
+</div>
+<section class="tex2jax_ignore mathjax_ignore" id="cyclegan">
+<h1>CycleGAN<a class="headerlink" href="#cyclegan" title="Permalink to this heading">#</a></h1>
+<section id="abstract">
+<h2>Abstract<a class="headerlink" href="#abstract" title="Permalink to this heading">#</a></h2>
+<ul class="simple">
+<li><p>Image-to-image translation 은 한 이미지 도메인을 다른 이미지 도메인으로 변환시키는 computer vision 의 한 task 입니다.</p></li>
+<li><p>Image-to-image translation 은 보통 input과 output이 짝이 지어진 상태에서 학습하지만 짝이 지어진 학습 데이터를 얻는 것이 어렵습니다. 따라서 CycleGAN 논문에서는 짝지어진 예시 없이 <span class="math notranslate nohighlight">\(X\)</span> 라는 domain 으로부터 얻은 이미지를 target domain <span class="math notranslate nohighlight">\(Y\)</span> 로 바꾸는 방법을 제안합니다. 이 연구는 Adversarial loss 를 활용해, <span class="math notranslate nohighlight">\(G(x)\)</span> 로부터 생성된 이미지 데이터의 분포와 <span class="math notranslate nohighlight">\(Y\)</span> 로부터의 이미지 데이터의 분포가 구분이 불가능하도록 함수 <span class="math notranslate nohighlight">\(G: X -&gt; Y\)</span> 를 학습시키는 것을 목표로 합니다. 더불어, <span class="math notranslate nohighlight">\(X -&gt; Y\)</span> 로의 mapping 에 제약을 가해서 원하는 이미지를 강제하기 위해 <span class="math notranslate nohighlight">\(F: Y -&gt; X\)</span> 와 같은 역방향 매핑을 함께 진행합니다. 즉, <span class="math notranslate nohighlight">\(F(G(x))\)</span> 가 <span class="math notranslate nohighlight">\(X\)</span> 와 유사해지도록 강제하는 cycle consistency loss 를 도입했습니다.</p></li>
+<li><p>결과적으로 collection style transfer, object transfiguration, season transfer, photo enhancement 등의 task 에서 이미지 pair 가 존재하지 않는 상태에서 우수한 결과를 보여줬다고 합니다.</p></li>
+</ul>
+</section>
+<section id="related-work">
+<h2>Related work<a class="headerlink" href="#related-work" title="Permalink to this heading">#</a></h2>
+<ul class="simple">
+<li><p>GAN : adversarial loss 를 사용하여 이미지를 생성하는 방법입니다.</p></li>
+<li><p>Image-to-Image Translation : 특정 image 를 input 로 넣으면 그에 맞는 image 가 output 로 나오는 방법입니다. pix2pix 같은 방법이 있으며 상세한 설명은 아래의 Background section 을 참조하면 됩니다.</p></li>
+<li><p>Unpaired Image-to-Image Translation : 위의 Image-to-Image Translation 에서 pair 가 아닌 데이터로 학습해서 Image-to-Image Translation 과 같은 input, output 결과가 나오도록 만드는 방법입니다.</p></li>
+<li><p>Cycle Consistency : 한 이미지를 다른 도메인으로 변환하고 다시 원래 도메인으로 변환할 때 처음의 원본으로 되도록 하여 일종의 순환(사이클)을 만드는 방법으로 학습 프로세스가 더 안정적이게 되고, 이미지 간의 일관성을 보다 잘 유지할 수 있도록 만듭니다. (ex) <span class="math notranslate nohighlight">\(X\)</span> 를 모델 <span class="math notranslate nohighlight">\(A\)</span> 에 거쳐 <span class="math notranslate nohighlight">\(Y\)</span> 로 만든 뒤 다시 모델 <span class="math notranslate nohighlight">\(B\)</span> 를 거쳐 <span class="math notranslate nohighlight">\(X\)</span> 로 복구)</p></li>
+<li><p>Neural Style Transfer : pre-trained 된 deep features 의 Gram matrix statistics 일치를 기반으로 이미지 content 를 다른 image 의 스타일과 결합하여 새로운 이미지를 합성하는 방법입니다.</p></li>
+</ul>
+</section>
+<section id="background">
+<h2>Background<a class="headerlink" href="#background" title="Permalink to this heading">#</a></h2>
+<section id="image-to-image-translation">
+<h3>Image-to-Image Translation<a class="headerlink" href="#image-to-image-translation" title="Permalink to this heading">#</a></h3>
+<figure class="align-default" id="id1">
+<a class="bg-primary mb-1 reference internal image-reference" href="https://phillipi.github.io/pix2pix/images/teaser_v3.png"><img alt="https://phillipi.github.io/pix2pix/images/teaser_v3.png" class="bg-primary mb-1" src="https://phillipi.github.io/pix2pix/images/teaser_v3.png" style="width: 800px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 45 </span><span class="caption-text">image-to-image translation</span><a class="headerlink" href="#id1" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>Image-to-image translation 은 input image 를 다른 스타일, 속성, 구조 등을 가진 output image 로 변환하는 task 입니다. 예를 들어 사진을 그림으로 변환한다거나, 낮에 찍은 사진을 밤에 찍은 것 처럼 변환하는 것을 말합니다. 흔히 translation 은 input 과 output 로 짝이 지어진 데이터를 바탕으로 학습이 이루어져 있었는데요. 짝이 지어진 사진 데이터를 얻는 것은 어렵고 값이 비싼 일이 됩니다.</p>
+<figure class="align-default" id="id2">
+<a class="bg-primary mb-1 reference internal image-reference" href="https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FbhMGUZ%2Fbtr7HimHXN5%2FHvjTh02iCzP5Sgk8UYkKO0%2Fimg.png"><img alt="https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FbhMGUZ%2Fbtr7HimHXN5%2FHvjTh02iCzP5Sgk8UYkKO0%2Fimg.png" class="bg-primary mb-1" src="https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FbhMGUZ%2Fbtr7HimHXN5%2FHvjTh02iCzP5Sgk8UYkKO0%2Fimg.png" style="width: 800px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 46 </span><span class="caption-text">paired and unpaired data</span><a class="headerlink" href="#id2" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>이 논문에서는 input image와 output image가 일대일로 짝지어지지 않은 상태에서 하나의 image 모음의 특성을 캡쳐하고, 이러한 특성을 다른 image 모음으로 변환할 수 있는 방법을 제시합니다.
+GAN은 domain <span class="math notranslate nohighlight">\(X\)</span> 에 이미지 한 세트, domain <span class="math notranslate nohighlight">\(Y\)</span> 에 이미지 한 세트가 제공되고, model 의 output 과 <span class="math notranslate nohighlight">\(Y\)</span> 가 discriminator 에 의해 구별할 수 없도록 모델 <span class="math notranslate nohighlight">\(G: X -&gt; Y\)</span> 를 학습합니다. 하지만, 이것이 개별 입력 <span class="math notranslate nohighlight">\(x\)</span> 와 출력 <span class="math notranslate nohighlight">\(y\)</span> 가 무조건 유의미하게 쌍을 이룬다는 것을 뜻하지는 않습니다. <span class="math notranslate nohighlight">\(G\)</span> 가 생성할 수 있는 image 에는 무한한 경우의 수가 있기 때문에 종종 mode collapse 현상이 일어나기도 합니다.</p>
+</section>
+<section id="mode-collapse">
+<h3>Mode Collapse<a class="headerlink" href="#mode-collapse" title="Permalink to this heading">#</a></h3>
+<figure class="align-default" id="id3">
+<a class="bg-primary mb-1 reference internal image-reference" href="https://1.bp.blogspot.com/-oDCR5UnEIl4/WZkIId-rYCI/AAAAAAAAAJk/PoLvou4JLNIxn5U-OmPFZ_heyxVQGbMNQCEwYBhgL/s1600/14.png"><img alt="https://1.bp.blogspot.com/-oDCR5UnEIl4/WZkIId-rYCI/AAAAAAAAAJk/PoLvou4JLNIxn5U-OmPFZ_heyxVQGbMNQCEwYBhgL/s1600/14.png" class="bg-primary mb-1" src="https://1.bp.blogspot.com/-oDCR5UnEIl4/WZkIId-rYCI/AAAAAAAAAJk/PoLvou4JLNIxn5U-OmPFZ_heyxVQGbMNQCEwYBhgL/s1600/14.png" style="width: 800px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 47 </span><span class="caption-text">mode collapsing 출처: <a class="reference external" href="http://dl-ai.blogspot.com/2017/08/gan-problems.html">http://dl-ai.blogspot.com/2017/08/gan-problems.html</a></span><a class="headerlink" href="#id3" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>어떤 input image 든 모두 같은 output image 로 매핑하면서 최적화에 실패하는 현상입니다. 이 현상은 generator 입장에서, discriminator 가 이 사진이 진짜 <span class="math notranslate nohighlight">\(Y\)</span>인지 가짜인 <span class="math notranslate nohighlight">\(\hat{Y}\)</span>인지 구별하는 것을 ‘<strong>속이기만</strong>’ 하면 되기 때문에 우리의 목적과 전혀 상관이 없는 데이터를 generator 가 만들더라도 문제가 생기지 않아서 발생합니다.</p>
+<ul class="simple">
+<li><p>참고: <a class="reference external" href="http://dl-ai.blogspot.com/2017/08/gan-problems.html">http://dl-ai.blogspot.com/2017/08/gan-problems.html</a></p></li>
+</ul>
+<p>이러한 이슈로 인해 추가 objective function 이 필요해졌습니다. 따라서 translation task 는 영어 -&gt; 프랑스어 -&gt; 영어로 번역했을 때 원래 문장에 다시 도달하는 것처럼, <span class="math notranslate nohighlight">\(X --&gt; Y --&gt; X'\)</span> 로 돌아가는 과정에서 <span class="math notranslate nohighlight">\(X\)</span> 와 <span class="math notranslate nohighlight">\(X'\)</span> 이 최대한 같아야 한다는 의미의 cycle consistency 이라는 속성을 이용합니다. 필요한 목적식을 간단하게 정리하면 다음과 같습니다.</p>
+<ul class="simple">
+<li><p>정방향, 역방향 adversarial loss: <span class="math notranslate nohighlight">\(X -&gt; Y &amp; Y -&gt; X\)</span></p></li>
+<li><p>Cycle consistency loss: <span class="math notranslate nohighlight">\(X \)</span>\approx<span class="math notranslate nohighlight">\( F(G(x))\)</span></p></li>
+</ul>
+</section>
+</section>
+<section id="method">
+<h2>Method<a class="headerlink" href="#method" title="Permalink to this heading">#</a></h2>
+<!---->
+<!--Overview 에서 전체적인 구성과 학습과정을 설명하며, 아래 "Adversarial Loss", "Cycle consistency Loss"는 모델의 핵심 요소임. 이를 기반으로 "full objective"가 나옴-->
+<section id="overview">
+<h3>Overview<a class="headerlink" href="#overview" title="Permalink to this heading">#</a></h3>
+<figure class="align-default" id="id4">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/fig2.png"><img alt="../../_images/fig2.png" class="bg-primary mb-1" src="../../_images/fig2.png" style="width: 800px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 48 </span><span class="caption-text">CycleGAN 도식화 자료</span><a class="headerlink" href="#id4" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p>목표: <span class="math notranslate nohighlight">\(X\)</span>, <span class="math notranslate nohighlight">\(Y\)</span> 를 mapping 하는 function 을 학습하는 것</p></li>
+<li><p>용어 정리</p>
+<ul>
+<li><p>data 분포를 <span class="math notranslate nohighlight">\(x ~ p_{data}(x)\)</span>, <span class="math notranslate nohighlight">\(y ~ p_{data}(y)\)</span> 로 표기</p></li>
+<li><p><span class="math notranslate nohighlight">\(G : X -&gt; Y\)</span>, <span class="math notranslate nohighlight">\(F: Y -&gt; X\)</span> 는 generator</p></li>
+<li><p><span class="math notranslate nohighlight">\(D_X\)</span>, <span class="math notranslate nohighlight">\(D_Y\)</span> 는 discriminator</p></li>
+<li><p><span class="math notranslate nohighlight">\(D_X\)</span> 는 <span class="math notranslate nohighlight">\(X\)</span> 와 <span class="math notranslate nohighlight">\(F(y)\)</span> 그리고 <span class="math notranslate nohighlight">\(D_Y\)</span> 는 <span class="math notranslate nohighlight">\(y\)</span> 와 <span class="math notranslate nohighlight">\(G(x)\)</span> 를 구분하고, 다음과 같이 두 개의 목적식으로 학습합니다.</p>
+<ul>
+<li><p>adversarial loss: 생성된 이미지의 분포를 대상 domain 의 data distribution 과 일치시키기 위한 것.</p></li>
+<li><p>cycle consistency loss: 학습된 mapping <span class="math notranslate nohighlight">\(G\)</span> 와 <span class="math notranslate nohighlight">\(F\)</span> 가 서로 모순되는 것을 방지하기 위한 것.</p></li>
+</ul>
+</li>
+</ul>
+</li>
+</ul>
+</section>
+<section id="adversarial-loss">
+<h3>Adversarial Loss<a class="headerlink" href="#adversarial-loss" title="Permalink to this heading">#</a></h3>
+<p><span class="math notranslate nohighlight">\(G: X -&gt; Y\)</span> 와 <span class="math notranslate nohighlight">\(D_Y\)</span> 에 대한 목적식은 다음과 같습니다.</p>
+<figure class="align-default" id="mathcal-l-gan-loss-function">
+<img alt="L_GAN Loss function" src="https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FnvzuE%2Fbtr725OfuJy%2FI1IgwK5PIzXpzINWnJxysK%2Fimg.png" />
+<figcaption>
+<p><span class="caption-number">Fig. 49 </span><span class="caption-text"><span class="math notranslate nohighlight">\(\mathcal{L}_{GAN}\)</span> Loss function (source: <a class="reference external" href="https://arxiv.org/abs/1703.10593">https://arxiv.org/abs/1703.10593</a>)</span><a class="headerlink" href="#mathcal-l-gan-loss-function" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>이는 GAN 에서 쓰이는 loss function 를 사용하지만, 차이점이 있다면 <span class="math notranslate nohighlight">\(X -&gt; Y\)</span> 로 갈 때와 <span class="math notranslate nohighlight">\(Y -&gt; X\)</span> 로 갈 때 총 두 개의 수식이 나옵니다. 다시 말해, <span class="math notranslate nohighlight">\(F: Y -&gt; X\)</span> 와 <span class="math notranslate nohighlight">\(D_X\)</span> 에 대해서도 <span class="math notranslate nohighlight">\(F\)</span>, <span class="math notranslate nohighlight">\(D_X\)</span> 를 넣은 동일한 수식을 사용합니다.</p>
+</section>
+<section id="cycle-consistency-loss">
+<h3>Cycle Consistency Loss<a class="headerlink" href="#cycle-consistency-loss" title="Permalink to this heading">#</a></h3>
+<figure class="align-default" id="id5">
+<a class="bg-primary mb-1 reference internal image-reference" href="https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2Fmq8pC%2Fbtr724Pl3Q2%2FUSK4TDRaUK860iIdvG0vV0%2Fimg.png"><img alt="https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2Fmq8pC%2Fbtr724Pl3Q2%2FUSK4TDRaUK860iIdvG0vV0%2Fimg.png" class="bg-primary mb-1" src="https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2Fmq8pC%2Fbtr724Pl3Q2%2FUSK4TDRaUK860iIdvG0vV0%2Fimg.png" style="width: 600px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 50 </span><span class="caption-text">cycle consistency loss function</span><a class="headerlink" href="#id5" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p>앞서 말했듯이, mapping distribution 에 제한을 두어 최대한 우리가 원하는 이미지를 생성하기 위해 사용되는 loss function 입니다.</p></li>
+<li><p>예비 실험에서 L1 norm 을 adversarial loss 로 대체해봤는데, 성능 향상을 관찰할 수 없었다고 합니다.</p></li>
+<li><p>cycle consistency loss 를 통해 유도된 결과는 아래 그림에서 볼 수 있습니다.</p></li>
+</ul>
+<figure class="align-default" id="id6">
+<a class="bg-primary mb-1 reference internal image-reference" href="https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FzsgD6%2Fbtr8ay8PEBE%2F3mAKd1YSAiCK4ZXeIg84s1%2Fimg.png"><img alt="https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FzsgD6%2Fbtr8ay8PEBE%2F3mAKd1YSAiCK4ZXeIg84s1%2Fimg.png" class="bg-primary mb-1" src="https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FzsgD6%2Fbtr8ay8PEBE%2F3mAKd1YSAiCK4ZXeIg84s1%2Fimg.png" style="width: 600px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 51 </span><span class="caption-text">cycle consistency loss result</span><a class="headerlink" href="#id6" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</section>
+<section id="full-objective">
+<h3>Full Objective<a class="headerlink" href="#full-objective" title="Permalink to this heading">#</a></h3>
+<figure class="align-default" id="id7">
+<a class="bg-primary mb-1 reference internal image-reference" href="https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FUyaOu%2Fbtr724Pl3Rj%2FigjKaeukv5m8Cbdzulp5jK%2Fimg.png"><img alt="https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FUyaOu%2Fbtr724Pl3Rj%2FigjKaeukv5m8Cbdzulp5jK%2Fimg.png" class="bg-primary mb-1" src="https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FUyaOu%2Fbtr724Pl3Rj%2FigjKaeukv5m8Cbdzulp5jK%2Fimg.png" style="width: 600px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 52 </span><span class="caption-text">full objective function</span><a class="headerlink" href="#id7" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>이때 consistency loss 앞에 붙은 가중치 <span class="math notranslate nohighlight">\(\lambda\)</span> 는 GAN Loss 와의 상대적 중요도에 따라 결정됩니다.</p>
+</section>
+</section>
+<section id="implementation">
+<h2>Implementation<a class="headerlink" href="#implementation" title="Permalink to this heading">#</a></h2>
+<section id="network-architecture">
+<h3>Network Architecture<a class="headerlink" href="#network-architecture" title="Permalink to this heading">#</a></h3>
+<p>Baseline architecture 로서 neural style transfer 와 super-resolution 에 인상적인 결과를 보여준 논문(<a class="reference external" href="https://arxiv.org/abs/1603.08155">https://arxiv.org/abs/1603.08155</a>) 에서 사용된 구조를 채택합니다.</p>
+<ul class="simple">
+<li><p>3 개의 convolutions and several residual blocks,</p></li>
+<li><p>fractionally-strided convolution with stride 1/2,</p></li>
+<li><p>feature 를 RGB 로 매핑하는 one convolution layer.</p></li>
+<li><p>6 blocks for 128 x 128 image // 9 blocks for 256 x 256 및 고해상도 학습 image.</p></li>
+<li><p>instance normalization</p></li>
+</ul>
+</section>
+<section id="training-details">
+<h3>Training details<a class="headerlink" href="#training-details" title="Permalink to this heading">#</a></h3>
+<p>모델 학습을 안정화시키기 위해 아래와 같은 테크닉을 추가로 적용합니다.</p>
+<ul class="simple">
+<li><p>Loss function <span class="math notranslate nohighlight">\(\mathcal{L}_{GAN}\)</span> 에서 nll loss 를 least-squared loss 로 변경</p></li>
+<li><p>생성된 이미지 중 가장 최근의 50개를 따로 저장해 discriminator 가 이를 한꺼번에 분류(모델 진동을 최소화하기 위함)</p></li>
+</ul>
+</section>
+<section id="least-square-loss">
+<h3>(참고) least-square loss 추가 설명<a class="headerlink" href="#least-square-loss" title="Permalink to this heading">#</a></h3>
+<ul class="simple">
+<li><p><a class="reference external" href="https://velog.io/&#64;sjinu/CycleGAN">https://velog.io/&#64;sjinu/CycleGAN</a>
+-<a class="reference external" href="https://ysbsb.github.io/gan/2022/02/23/LSGAN.html">https://ysbsb.github.io/gan/2022/02/23/LSGAN.html</a></p></li>
+</ul>
+<p>LSGAN 을 참고했으며, 논문에서는 generator 업데이트시 더 안정적인 학습과 quality 높은 결과를 생성한다고 합니다.</p>
+<figure class="align-default" id="id8">
+<a class="bg-primary mb-1 reference internal image-reference" href="https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2F6JIT8%2Fbtr73nVyIqs%2FKfcPK33U3OY0AjKhjFlUh1%2Fimg.png"><img alt="https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2F6JIT8%2Fbtr73nVyIqs%2FKfcPK33U3OY0AjKhjFlUh1%2Fimg.png" class="bg-primary mb-1" src="https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2F6JIT8%2Fbtr73nVyIqs%2FKfcPK33U3OY0AjKhjFlUh1%2Fimg.png" style="width: 600px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 53 </span><span class="caption-text">출처: <a class="reference external" href="https://velog.io/&#64;sjinu/CycleGAN">https://velog.io/&#64;sjinu/CycleGAN</a></span><a class="headerlink" href="#id8" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>(원래 Discriminator 는 이보다 더 고차원이지만) 간략히 2차원을 표방하면 결정경계를 위와 같이 나타낼 수 있습니다. 윗 쪽이 가짜 영역, 아래 쪽이 진짜 영역입니다 이 때, 아래에 보면 진짜 데이터 샘플과 거리가 먼 가짜 데이터 샘플이 존재합니다. 즉, NLL Loss 를 사용한다면, Generator 의 입장에서는 이미 Discriminator 를 잘 속이고 있기 때문에 학습할 필요가 없게 됩니다. 즉, Vanishing Gradient 현상이 일어나기 때문에, Discriminator 를 잘 속인다는 이유만으로, 안 좋은 샘플을 생성하는 것에 대해 패널티를 줄 수가 없게 됩니다. 이 때, LSGAN 을 사용한다면 실제 데이터 분포와 가짜 데이터 샘플이 거리가 먼 것에 대해서도 패널티를 주게 됩니다.</p>
+<figure class="align-default" id="id9">
+<a class="bg-primary mb-1 reference internal image-reference" href="https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FHsUiX%2Fbtr77PQw99h%2F0Er06IYIGYlBGw2rVufXc0%2Fimg.png"><img alt="https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FHsUiX%2Fbtr77PQw99h%2F0Er06IYIGYlBGw2rVufXc0%2Fimg.png" class="bg-primary mb-1" src="https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FHsUiX%2Fbtr77PQw99h%2F0Er06IYIGYlBGw2rVufXc0%2Fimg.png" style="width: 600px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 54 </span><span class="caption-text">출처: <a class="reference external" href="https://velog.io/&#64;sjinu/CycleGAN">https://velog.io/&#64;sjinu/CycleGAN</a></span><a class="headerlink" href="#id9" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>그리고 모든 실험에 대해서 <span class="math notranslate nohighlight">\(\lambda\)</span> 를 10 으로 설정하고, batch size = 1, 그리고 Adam solver 를 사용했습니다. 첫 100 epoch 동안에는 learning rate 를 0.0002 로 설정했고, 다음 100 epoch 마다 0 으로 조금식 수렴하게 scheduling 하였습니다.</p>
+</section>
+</section>
+<section id="evaluation">
+<h2>Evaluation<a class="headerlink" href="#evaluation" title="Permalink to this heading">#</a></h2>
+<p>모델 성능 평가를 위해 아래와 같은 세 개의 지표를 기반으로 기존의 CoGAN, SimGAN, pix2pix baseline 모델과 비교했습니다. 그 외 loss function 에 대한 ablation study 도 수행했습니다.</p>
+<ol class="arabic simple">
+<li><p>AMT perceptual studies: 참가자들은 실제 사진이미지 vs 가짜 이미지, 또는 지도 이미지 vs 가짜이미지에 노출된 후 진짜라고 생각되는 이미지를 선택하게 합니다.</p></li>
+<li><p>FCN Score: 1번 study 가 테스트에 있어 매우 좋은 기준임에도 불구하고, 이번에는 사람을 대상으로 한 실험이 아닌 양적인 기준을 사용합니다. 우선적으로 FCN 모델을 통해 생성된 사진에 대한 레이블 맵을 예측합니다. 이 레이블 맵은 아래에서 설명하는 standard semantic segmentation metric 을 사용하여 input ground truth label 과 비교할 수 있습니다. “도로 상의 자동차”라는 label 에서 사진 이미지를 생성하면, 생성된 이미지에 적용된 FCN 이 “도로 상의 자동차”를 감지하면 성공한 것입니다.</p></li>
+<li><p>Semantic segmentation metric: pixel 당 정확도, class 당 정확도, 그리고 IoU(Intersection-Over-Union) 를 포함하는 cityscapes benchmark 의 표준 metric 를 사용합니다.</p></li>
+</ol>
+<section id="comparison-against-baselines">
+<h3>Comparison against baselines<a class="headerlink" href="#comparison-against-baselines" title="Permalink to this heading">#</a></h3>
+<figure class="align-default" id="id10">
+<a class="bg-primary mb-1 reference internal image-reference" href="https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FcZUe4E%2Fbtr8eXUQ6ou%2FikWglP8dEglGUny4dRkMjK%2Fimg.png"><img alt="https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FcZUe4E%2Fbtr8eXUQ6ou%2FikWglP8dEglGUny4dRkMjK%2Fimg.png" class="bg-primary mb-1" src="https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FcZUe4E%2Fbtr8eXUQ6ou%2FikWglP8dEglGUny4dRkMjK%2Fimg.png" style="width: 800px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 55 </span><span class="caption-text">Comparison aginst baselines</span><a class="headerlink" href="#id10" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>타 baseline 모델보다 성능이 좋을 뿐만 아니라, fully supervised 모델인 pix2pix 와 비슷한 품질의 translation 성능을 보여줍니다.</p>
+<ul class="simple">
+<li><p>AMT Score *</p></li>
+</ul>
+<figure class="align-default" id="id11">
+<a class="bg-primary mb-1 reference internal image-reference" href="https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2Fb1Zhnx%2Fbtr8eWhk9ID%2FtauuT1N0W2qxRekj3IAnc1%2Fimg.png"><img alt="https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2Fb1Zhnx%2Fbtr8eWhk9ID%2FtauuT1N0W2qxRekj3IAnc1%2Fimg.png" class="bg-primary mb-1" src="https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2Fb1Zhnx%2Fbtr8eWhk9ID%2FtauuT1N0W2qxRekj3IAnc1%2Fimg.png" style="width: 600px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 56 </span><span class="caption-text">AMT score</span><a class="headerlink" href="#id11" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>Table 1 은 AMT perceptual realism task 에 대한 성능을 나타냅니다. CycleGAN 의 지도에서 항공 사진, 그리고 항공 사진에서 지도 translation 결과에서 약 1/4의 참가자를 속일 수 있었던 반면에 그 외 모든 baseline 모델은 참가자를 거의 속일 수 없었습니다.</p>
+<ul class="simple">
+<li><p>FCN Score *</p></li>
+</ul>
+<figure class="align-default" id="id12">
+<a class="bg-primary mb-1 reference internal image-reference" href="https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FqzYO1%2Fbtr728xs5iD%2FN5NDNYwUYLnEZfnOVYONM0%2Fimg.png"><img alt="https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FqzYO1%2Fbtr728xs5iD%2FN5NDNYwUYLnEZfnOVYONM0%2Fimg.png" class="bg-primary mb-1" src="https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FqzYO1%2Fbtr728xs5iD%2FN5NDNYwUYLnEZfnOVYONM0%2Fimg.png" style="width: 500px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 57 </span><span class="caption-text">FCN scores</span><a class="headerlink" href="#id12" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>Table 2, Table 3 는 각각 도시 풍경에 대한 label -&gt; photo, 그리고 photo -&gt; label translation task 의 성능을 보여줍니다. 두 경우 모두 CycleGAN 이 baseline 들의 성능을 능가합니다.</p>
+</section>
+<section id="ablation-study-analysis-of-the-loss-function">
+<h3>Ablation Study - Analysis of the loss function<a class="headerlink" href="#ablation-study-analysis-of-the-loss-function" title="Permalink to this heading">#</a></h3>
+<figure class="align-default" id="id13">
+<a class="bg-primary mb-1 reference internal image-reference" href="https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FcjQ9QQ%2Fbtr79farEX8%2FkQ6SWARw9QK9jqRqHlZoi1%2Fimg.png"><img alt="https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FcjQ9QQ%2Fbtr79farEX8%2FkQ6SWARw9QK9jqRqHlZoi1%2Fimg.png" class="bg-primary mb-1" src="https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FcjQ9QQ%2Fbtr79farEX8%2FkQ6SWARw9QK9jqRqHlZoi1%2Fimg.png" style="width: 500px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 58 </span><span class="caption-text">Analysis of loss function</span><a class="headerlink" href="#id13" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>GAN 과 cycle consistency loss 의 중요성을 보여주는 ablation study 입니다. GAN loss 그리고 cycle consistency loss 를 각각 제거하면 성능이 크게 저하되는 부분을 확인할 수 있습니다. 또한 한쪽 방향에 대해서만 GAN + forward cycle 만 돌렸을 때와 GAN + backward cycle 만 돌렸을 때 학습의 불안정성을 보이고, mode collapse 를 유발하는 것을 확인할 수 있었다고 합니다.</p>
+</section>
+<section id="image-reconstruction-quality">
+<h3>Image reconstruction quality<a class="headerlink" href="#image-reconstruction-quality" title="Permalink to this heading">#</a></h3>
+<figure class="align-default" id="id14">
+<a class="bg-primary mb-1 reference internal image-reference" href="https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2Fyy7lt%2Fbtr73PdbuJp%2F5bmDtKSlQJJnd5yKvPgfB1%2Fimg.png"><img alt="https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2Fyy7lt%2Fbtr73PdbuJp%2F5bmDtKSlQJJnd5yKvPgfB1%2Fimg.png" class="bg-primary mb-1" src="https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2Fyy7lt%2Fbtr73PdbuJp%2F5bmDtKSlQJJnd5yKvPgfB1%2Fimg.png" style="width: 500px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 59 </span><span class="caption-text">Results on Cycle Consistency</span><a class="headerlink" href="#id14" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>Reconctructed 된 이미지 예시들입니다. 지도 -&gt; 항공 사진과 같이 하나의 도메인이 훨씬 더 다양한 정보를 나타내는 경우에도 재구성된 이미지가 훈련 및 테스트 시 모두 원래 입력 <span class="math notranslate nohighlight">\(x\)</span> 에 가깝게 복원되는 경우가 많았습니다.</p>
+</section>
+<section id="additional-results-on-paired-datasets">
+<h3>Additional results on paired datasets<a class="headerlink" href="#additional-results-on-paired-datasets" title="Permalink to this heading">#</a></h3>
+<figure class="align-default" id="id15">
+<a class="bg-primary mb-1 reference internal image-reference" href="https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FbqNrhb%2Fbtr72YaInQa%2Fk8b4K99KrAsD9C0SHINtt1%2Fimg.png"><img alt="https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FbqNrhb%2Fbtr72YaInQa%2Fk8b4K99KrAsD9C0SHINtt1%2Fimg.png" class="bg-primary mb-1" src="https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FbqNrhb%2Fbtr72YaInQa%2Fk8b4K99KrAsD9C0SHINtt1%2Fimg.png" style="width: 600px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 60 </span><span class="caption-text">Additional results on paired datasets</span><a class="headerlink" href="#id15" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>Figure 8 은 CMP Facade Database 의 건축 레이블 &lt;-&gt; 사진, 그리고 UT Zapoos50K dataset 의 edge &lt;-&gt; 신발 을 비롯하여 pix2pix 에 사용된 paired dataset 에 대한 몇 가지 예시 결과를 보여줍니다. CycleGAN 이 생성한 이미지 품질이 fully supervised 된 pix2pix 에 대응하는 성능을 보여주는 것을 확인할 수 있습니다.</p>
+<!---->
+<!--## Applications-->
+<!---->
+<!--Paired data 가 없는 상태에서 CycleGAN 의 application 예시 결과들입니다. -->
+<!---->
+<!--### Collection style transfer-->
+<!---->
+<!--Neural Style Transfer 에 대한 최근 작업과 달리, CycleGAN 은 선택한 단일 예술 작품의 스타일을 전달하는 대신 전체 예술 작품 컬렉션의 스타일을 모방하는 방법을 학습합니다. 그래서 '별이 빛나는 밤에'처럼 그리는 것 보다 '반 고흐'를 따라하는 느낌을 따라한다.-->
+<!---->
+<!--### Object transfiguration-->
+<!---->
+<!---->
+<!--Turmukhambetov et al. \[50\] 하나의 객체를 동일한 범주의 다른 객체로 변환하는 부분 공간 모델을 제안하는 반면, 우리의 방법은 시각적으로 유사한 두 범주 사이의 객체 변형에 중점을 둡니다.  -->
+<!--Turning a horse video into a zebra video (by CycleGAN)-->
+<!---->
+<!--### Season transfer-->
+<!---->
+<!---->
+<!--### Photo generation from paintings-->
+<!---->
+<!---->
+<!--그림을 사진으로 바꿀 때, 입력과 출력 간 색 구성을 보존하기 위해 추가적인 loss를 도입하는 것이 유용하다는 것을 발견할 수 있습니다. 특히, Taigman et al. \[49\]의 기술을 채택하여 제너레이터가 대상 도메인의 실제 샘플을 입력으로 제공받을 때 identity mapping 근처에 있도록 정규화합니다. 즉, **Lidentity(G,F) = Ey\_pdata(y)\[∥G(y) − y∥1\] + Ex∼pdata (x) \[∥F (x) − x∥1 \]**입니다.-->
+<!---->
+<!--Lidentity가 없으면, 생성자 G와 F는 굳이 필요하지 않을 때 입력 이미지의 색조를 자유롭게 변경할 수 있습니다. 예를 들어, Monet의 그림과 Flickr 사진 간의 매핑을 학습할 때, 생성자는 종종 낮에 그린 그림을 일몰 시간에 찍은 사진에 매핑합니다. 왜냐하면 적대적 손실과 사이클 일관성 손실 아래에서 이러한 매핑이 동등하게 유효할 수 있기 때문입니다. 이러한 identity mapping 손실의 효과는 그림 9에서 보여집니다. figure 12, figure 9는 학습 데이터셋에 포함되어 있는 그림, 하지만 다른 set은 오직 test set으로부터 그려진 그림. training set이 paired datqa를 포함하고 있지 않아서, 학습 세트 그림에 대한 타당한 translation을 찾는 것은 쉬운 일이 아니다. 실제로, Monet이 새 그림을 그릴 수 없기 때문에, 보지 않은 test set 그림에 대한 generalization은 not pressing problem-->
+<!---->
+<!--### Photo enhancement-->
+<!---->
+<!--우리는 우리의 방법이 얕은 깊이의 초점을 가진 사진을 생성하는 데 사용될 수 있음을 보여줍니다. 우리는 Flickr에서 다운로드한 꽃 사진을 기반으로 모델을 훈련합니다. 소스 도메인은 스마트폰으로 찍힌 꽃 사진으로 구성되어 있으며, 보통 작은 조리개로 인해 깊은 DoF(초점 깊이)를 가지고 있습니다. 대상은 조리개가 큰 DSLR로 촬영된 사진을 포함합니다. 우리 모델은 스마트폰으로 촬영된 사진으로부터 더 얕은 깊이의 초점을 가진 사진을 성공적으로 생성합니다.-->
+<!---->
+<!--> : shallow depth of field: 얕은 초점. 초점이 맞은 대상과 배경이 흐릿하게 보이는 효과. 인물 사진 / 작품 사진에 활용. 구목하고자 하는 대상을 강조하기 위해 활용.  -->
+<!--> 따라서 source domain은 스마트폰의 **작은 조리개로 깊은 초점** \--> target은 **조리개가 커서 얕은 초점**.-->
+<!---->
+<!--### Comparison with Gatys-->
+</section>
+</section>
+<section id="limitations-and-discusssion">
+<h2>Limitations and Discusssion<a class="headerlink" href="#limitations-and-discusssion" title="Permalink to this heading">#</a></h2>
+<figure class="align-default" id="id16">
+<a class="bg-primary mb-1 reference internal image-reference" href="https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FdJc1k5%2Fbtr76zUPUWj%2F27Mk0oQ5VanEHANWWmaseK%2Fimg.png"><img alt="https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FdJc1k5%2Fbtr76zUPUWj%2F27Mk0oQ5VanEHANWWmaseK%2Fimg.png" class="bg-primary mb-1" src="https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FdJc1k5%2Fbtr76zUPUWj%2F27Mk0oQ5VanEHANWWmaseK%2Fimg.png" style="width: 800px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 61 </span><span class="caption-text">Limitations and Discussion</span><a class="headerlink" href="#id16" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>이 방법은 많은 경우에 흥미로운 결과를 얻을 수 있지만, 결과가 균일하게 좋은 것은 아니었습니다.</p>
+<ol class="arabic simple">
+<li><p>개 &lt;-&gt; 고양이 translation task 와 같은 경우는 input image 에서 최소한의 변화만 주어, 사람이 보았을 때 실제로 변화가 안되는 경우도 있었고, 형체가 애매해진 경우도 있었습니다. 이를 보았을 때, geometry 가 반영되는 눈, 코, 입 등의 세부적인 구조에 대한 정확히 구현하는데 한계가 있어 보입니다.</p></li>
+<li><p>말 &lt;–&gt; 얼룩말 translation 예제의 경우, 말은 사람이 타는 모습이 많았는데 얼룩말의 경우는 사람이 타는 사진이 없다보니, 사람 뿐만 아니라 배경도 얼룩 그림을 그리거나 단순히 얼룩말에서 노랗게 칠한 경우가 존재합니다.</p></li>
+<li><p>때때로 photo -&gt; image translation task 에서 나무와 건물의 label 을 바꾸는 경우도 있었습니다.<br />
+이러한 모호성을 해결하려면 weak semantic supervision 이 필요할 수도 있을 것 같습니다.</p></li>
+</ol>
+<p>그럼에도 불구하고 해당 논문은 완전히 paired 되지 않은 “unsupervised” setting 에서도 image translation task 의 한계를 늘리는데 기여합니다.</p>
+</section>
+</section>
+
+    <script type="text/x-thebe-config">
+    {
+        requestKernel: true,
+        binderOptions: {
+            repo: "binder-examples/jupyter-stacks-datascience",
+            ref: "master",
+        },
+        codeMirrorConfig: {
+            theme: "abcdef",
+            mode: "python"
+        },
+        kernelOptions: {
+            name: "python3",
+            path: "./docs\review"
+        },
+        predefinedOutput: true
+    }
+    </script>
+    <script>kernelName = 'python3'</script>
+
+                </article>
+              
+
+              
+              
+              
+              
+                <footer class="prev-next-footer d-print-none">
+                  
+<div class="prev-next-area">
+    <a class="left-prev"
+       href="A_Study_on_the_Evaluation_of_Generative_Models.html"
+       title="previous page">
+      <i class="fa-solid fa-angle-left"></i>
+      <div class="prev-next-info">
+        <p class="prev-next-subtitle">previous</p>
+        <p class="prev-next-title">A Study on the Evaluation of Generative Models</p>
+      </div>
+    </a>
+    <a class="right-next"
+       href="StyleGAN.html"
+       title="next page">
+      <div class="prev-next-info">
+        <p class="prev-next-subtitle">next</p>
+        <p class="prev-next-title">StyleGAN</p>
+      </div>
+      <i class="fa-solid fa-angle-right"></i>
+    </a>
+</div>
+                </footer>
+              
+            </div>
+            
+            
+              
+                <div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
+
+
+  <div class="sidebar-secondary-item">
+  <div class="page-toc tocsection onthispage">
+    <i class="fa-solid fa-list"></i> Contents
+  </div>
+  <nav class="bd-toc-nav page-toc">
+    <ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#abstract">Abstract</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#related-work">Related work</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#background">Background</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#image-to-image-translation">Image-to-Image Translation</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#mode-collapse">Mode Collapse</a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#method">Method</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#overview">Overview</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#adversarial-loss">Adversarial Loss</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#cycle-consistency-loss">Cycle Consistency Loss</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#full-objective">Full Objective</a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#implementation">Implementation</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#network-architecture">Network Architecture</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#training-details">Training details</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#least-square-loss">(참고) least-square loss 추가 설명</a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#evaluation">Evaluation</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#comparison-against-baselines">Comparison against baselines</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#ablation-study-analysis-of-the-loss-function">Ablation Study - Analysis of the loss function</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#image-reconstruction-quality">Image reconstruction quality</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#additional-results-on-paired-datasets">Additional results on paired datasets</a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#limitations-and-discusssion">Limitations and Discusssion</a></li>
+</ul>
+  </nav></div>
+
+</div></div>
+              
+            
+          </div>
+          <footer class="bd-footer-content">
+            
+<div class="bd-footer-content__inner container">
+  
+  <div class="footer-item">
+    
+<p class="component-author">
+By PseudoLab
+</p>
+
+  </div>
+  
+  <div class="footer-item">
+    
+
+  <p class="copyright">
+    
+      © Copyright 2022.
+      <br/>
+    
+  </p>
+
+  </div>
+  
+  <div class="footer-item">
+    
+  </div>
+  
+  <div class="footer-item">
+    
+  </div>
+  
+</div>
+          </footer>
+        
+
+      </main>
+    </div>
+  </div>
+  
+  <!-- Scripts loaded after <body> so the DOM is not blocked -->
+  <script src="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b"></script>
+<script src="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b"></script>
+
+  <footer class="bd-footer">
+  </footer>
+  </body>
 </html>
\ No newline at end of file
diff --git a/docs/review/dalle.html b/docs/review/dalle.html
old mode 100644
new mode 100755
index eb671466..16566b07
--- a/docs/review/dalle.html
+++ b/docs/review/dalle.html
@@ -1,957 +1,977 @@
-
-
-<!DOCTYPE html>
-
-
-<html lang="en" data-content_root="" >
-
-  <head>
-    <meta charset="utf-8" />
-    <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.18.1: http://docutils.sourceforge.net/" />
-
-    <title>DALL-E &#8212; Text-to-Image Generation-feat-Diffusion</title>
-  
-  
-  
-  <script data-cfasync="false">
-    document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
-    document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
-  </script>
-  
-  <!-- Loaded before other Sphinx assets -->
-  <link href="../../_static/styles/theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-<link href="../../_static/styles/bootstrap.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-<link href="../../_static/styles/pydata-sphinx-theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-
-  
-  <link href="../../_static/vendor/fontawesome/6.5.2/css/all.min.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-  <link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.woff2" />
-<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.woff2" />
-<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.woff2" />
-
-    <link rel="stylesheet" type="text/css" href="../../_static/pygments.css" />
-    <link rel="stylesheet" href="../../_static/styles/sphinx-book-theme.css?digest=14f4ca6b54d191a8c7657f6c759bf11a5fb86285" type="text/css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/togglebutton.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/mystnb.4510f1fc1dee50b3e5859aac5469c37c29e427902b24a333a5f9fcb2f0b3ac41.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/sphinx-thebe.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/design-style.4045f2051d55cab465a707391d5b2007.min.css" />
-  
-  <!-- Pre-loaded scripts that we'll load fully later -->
-  <link rel="preload" as="script" href="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b" />
-<link rel="preload" as="script" href="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b" />
-  <script src="../../_static/vendor/fontawesome/6.5.2/js/all.min.js?digest=dfe6caa3a7d634c4db9b"></script>
-
-    <script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
-    <script src="../../_static/jquery.js"></script>
-    <script src="../../_static/underscore.js"></script>
-    <script src="../../_static/_sphinx_javascript_frameworks_compat.js"></script>
-    <script src="../../_static/doctools.js"></script>
-    <script src="../../_static/clipboard.min.js"></script>
-    <script src="../../_static/copybutton.js"></script>
-    <script src="../../_static/scripts/sphinx-book-theme.js?digest=5a5c038af52cf7bc1a1ec88eea08e6366ee68824"></script>
-    <script>let toggleHintShow = 'Click to show';</script>
-    <script>let toggleHintHide = 'Click to hide';</script>
-    <script>let toggleOpenOnPrint = 'true';</script>
-    <script src="../../_static/togglebutton.js"></script>
-    <script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
-    <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
-    <script src="../../_static/design-tabs.js"></script>
-    <script>const THEBE_JS_URL = "https://unpkg.com/thebe@0.8.2/lib/index.js"
-const thebe_selector = ".thebe,.cell"
-const thebe_selector_input = "pre"
-const thebe_selector_output = ".output, .cell_output"
-</script>
-    <script async="async" src="../../_static/sphinx-thebe.js"></script>
-    <script>window.MathJax = {"options": {"processHtmlClass": "tex2jax_process|mathjax_process|math|output_area"}}</script>
-    <script defer="defer" src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
-    <script>DOCUMENTATION_OPTIONS.pagename = 'docs/review/dalle';</script>
-    <link rel="shortcut icon" href="../../_static/PseudoLab_logo.png"/>
-    <link rel="index" title="Index" href="../../genindex.html" />
-    <link rel="search" title="Search" href="../../search.html" />
-    <link rel="next" title="DALL-E 2" href="DALLE2.html" />
-    <link rel="prev" title="Diffusion Models Beat GANs on Image Synthesis" href="diffusion_beats_GANs.html" />
-  <meta name="viewport" content="width=device-width, initial-scale=1"/>
-  <meta name="docsearch:language" content="en"/>
-  </head>
-  
-  
-  <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
-
-  
-  
-  <div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
-  
-  <div id="pst-scroll-pixel-helper"></div>
-  
-  <button type="button" class="btn rounded-pill" id="pst-back-to-top">
-    <i class="fa-solid fa-arrow-up"></i>Back to top</button>
-
-  
-  <input type="checkbox"
-          class="sidebar-toggle"
-          id="pst-primary-sidebar-checkbox"/>
-  <label class="overlay overlay-primary" for="pst-primary-sidebar-checkbox"></label>
-  
-  <input type="checkbox"
-          class="sidebar-toggle"
-          id="pst-secondary-sidebar-checkbox"/>
-  <label class="overlay overlay-secondary" for="pst-secondary-sidebar-checkbox"></label>
-  
-  <div class="search-button__wrapper">
-    <div class="search-button__overlay"></div>
-    <div class="search-button__search-container">
-<form class="bd-search d-flex align-items-center"
-      action="../../search.html"
-      method="get">
-  <i class="fa-solid fa-magnifying-glass"></i>
-  <input type="search"
-         class="form-control"
-         name="q"
-         id="search-input"
-         placeholder="Search this book..."
-         aria-label="Search this book..."
-         autocomplete="off"
-         autocorrect="off"
-         autocapitalize="off"
-         spellcheck="false"/>
-  <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
-</form></div>
-  </div>
-
-  <div class="pst-async-banner-revealer d-none">
-  <aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
-</div>
-
-  
-    <header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
-    </header>
-  
-
-  <div class="bd-container">
-    <div class="bd-container__inner bd-page-width">
-      
-      
-      
-      <div class="bd-sidebar-primary bd-sidebar">
-        
-
-  
-  <div class="sidebar-header-items sidebar-primary__section">
-    
-    
-    
-    
-  </div>
-  
-    <div class="sidebar-primary-items__start sidebar-primary__section">
-        <div class="sidebar-primary-item">
-
-  
-    
-  
-
-<a class="navbar-brand logo" href="../../intro.html">
-  
-  
-  
-  
-  
-    
-    
-      
-    
-    
-    <img src="../../_static/PseudoLab_logo.png" class="logo__image only-light" alt="Text-to-Image Generation-feat-Diffusion - Home"/>
-    <script>document.write(`<img src="../../_static/PseudoLab_logo.png" class="logo__image only-dark" alt="Text-to-Image Generation-feat-Diffusion - Home"/>`);</script>
-  
-  
-</a></div>
-        <div class="sidebar-primary-item"><nav class="bd-links" id="bd-docs-nav" aria-label="Main">
-    <div class="bd-toc-item navbar-nav active">
-        
-        <ul class="nav bd-sidenav bd-sidenav__home-link">
-            <li class="toctree-l1">
-                <a class="reference internal" href="../../intro.html">
-                    Welcome to PseudoDiffusers!!
-                </a>
-            </li>
-        </ul>
-        <p aria-level="2" class="caption" role="heading"><span class="caption-text">Preliminary Works</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="vae.html">VAE</a></li>
-<li class="toctree-l1"><a class="reference internal" href="gan.html">GAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DDPM.html">DDPM</a></li>
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="DDIM.html">DDIM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="A_Study_on_the_Evaluation_of_Generative_Models.html">A Study on the Evaluation of Generative Models</a></li>
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Image Generation</span></p>
-<ul class="current nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="cycleGAN.html">CycleGAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="StyleGAN.html">StyleGAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="diffusion_beats_GANs.html">Diffusion Models Beat GANs on Image Synthesis</a></li>
-<li class="toctree-l1 current active"><a class="current reference internal" href="#">DALL-E</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DALLE2.html">DALL-E 2</a></li>
-<li class="toctree-l1"><a class="reference internal" href="dreambooth.html">DreamBooth</a></li>
-<li class="toctree-l1"><a class="reference internal" href="ControlNet.html">ControlNet</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Latent_Diffusion_Model.html">Introduction</a></li>
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="Textual_Inversion.html">Textual Inversion</a></li>
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="CustomDiffusion.html">Custom Diffusion</a></li>
-<li class="toctree-l1"><a class="reference internal" href="LoRA.html">LoRA</a></li>
-
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="I-DDPM.html">I-DDPM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="StyO.html">StyO</a></li>
-<li class="toctree-l1"><a class="reference internal" href="imagen.html">Imagen</a></li>
-<li class="toctree-l1"><a class="reference internal" href="imagen_editor.html">Imagen Editor</a></li>
-<li class="toctree-l1"><a class="reference internal" href="SDEdit.html">SDEdit</a></li>
-<li class="toctree-l1"><a class="reference internal" href="SDXL.html">SDXL</a></li>
-<li class="toctree-l1"><a class="reference internal" href="t2i_adapter.html">T2I-Adapter</a></li>
-<li class="toctree-l1"><a class="reference internal" href="IP_Adapter.html">IP-Adapter</a></li>
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="HyperDreamBooth.html">HyperDreamBooth</a></li>
-<li class="toctree-l1"><a class="reference internal" href="CM3leon.html">CM3leon</a></li>
-
-<li class="toctree-l1"><a class="reference internal" href="Synthetic_Data_from_Diffusion_Models_Improves_ImageNet_Classification.html">Synthetic Data from Diffusion Models Improves ImageNet Classification</a></li>
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="GLIDE.html">GLIDE</a></li>
-<li class="toctree-l1"><a class="reference internal" href="BBDM.html">BBDM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Your_Diffusion_Model_is_Secretly_a_Zero_Shot_Classifier.html">Your Diffusion Model is Secretly a Zero-Shot Classifier</a></li>
-<li class="toctree-l1"><a class="reference internal" href="progressive_distillation.html">Progressive Distillation for Fast Sampling of Diffusion Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="ConceptLab.html">ConceptLab</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Diffusion_models_already_have_a_Semantic_Latent_Space.html">Diffusion Models already have a Semantic Latent Space</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Muse.html">Muse</a></li>
-
-
-<li class="toctree-l1"><a class="reference internal" href="GIGAGAN.html">Scaling up GANs for Text-to-Image Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="consistency_models.html">Consistency Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="latent_consistency_models.html">Latent Consistency Models</a></li>
-
-<li class="toctree-l1"><a class="reference internal" href="LLM_grounded_Diffusion.html">LLM Grounded Diffusion</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DiT.html">DiT</a></li>
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="one-step-image-translation.html">One-Step Image Translation with Text-to-Image Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="LCM-LoRA.html">LCM-LoRA: A Universal Stable-Diffusion Acceleration Module</a></li>
-
-
-<li class="toctree-l1"><a class="reference internal" href="MimicBrush.html">MimicBrush: Zero-shot Image Editing with Reference Imitation</a></li>
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Video Generation</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="Make_A_Video.html">Make A Video</a></li>
-<li class="toctree-l1"><a class="reference internal" href="VideoLDM.html">VideoLDM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="AnimateDiff.html">AnimateDiff</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Animate_Anyone.html">Animate Anyone</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreaMoving.html">DreaMoving</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreamPose.html">DreamPose: Fashion Image-to-Video Synthesis via Stable Diffusion</a></li>
-
-
-
-
-
-
-
-
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">3D Generation</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="NeRF.html">NeRF : Representing Scenes as Neural Radiance Fields for View Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="3DGS.html">3D Gaussian Splatting for Real-Time Radiance Field Rendering</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Point_E.html">Point-E: A System for Generating 3D Point Clouds from Complex Prompts (Arxiv 2022)</a></li>
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="Shap-E.html">Shap-E</a></li>
-
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="DreamFusion.html"><strong>DreamFusion</strong></a></li>
-<li class="toctree-l1"><a class="reference internal" href="magic-3d.html">Magic3D</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreamBooth3D.html">Dream Booth 3D</a></li>
-
-
-
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Experiments</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="../experiments/js_exp.html">Synthetic Data with Stable Diffusion for Foliar Disease Classification</a></li>
-<li class="toctree-l1"><a class="reference internal" href="../experiments/swjo_exp.html">Training DreamBooth on Naver Webtoon Face Dataset</a></li>
-</ul>
-
-    </div>
-</nav></div>
-    </div>
-  
-  
-  <div class="sidebar-primary-items__end sidebar-primary__section">
-  </div>
-  
-  <div id="rtd-footer-container"></div>
-
-
-      </div>
-      
-      <main id="main-content" class="bd-main" role="main">
-        
-        
-
-<div class="sbt-scroll-pixel-helper"></div>
-
-          <div class="bd-content">
-            <div class="bd-article-container">
-              
-              <div class="bd-header-article d-print-none">
-<div class="header-article-items header-article__inner">
-  
-    <div class="header-article-items__start">
-      
-        <div class="header-article-item"><label class="sidebar-toggle primary-toggle btn btn-sm" for="__primary" title="Toggle primary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
-  <span class="fa-solid fa-bars"></span>
-</label></div>
-      
-    </div>
-  
-  
-    <div class="header-article-items__end">
-      
-        <div class="header-article-item">
-
-<div class="article-header-buttons">
-
-
-
-
-
-<div class="dropdown dropdown-source-buttons">
-  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Source repositories">
-    <i class="fab fa-github"></i>
-  </button>
-  <ul class="dropdown-menu">
-      
-      
-      
-      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion" target="_blank"
-   class="btn btn-sm btn-source-repository-button dropdown-item"
-   title="Source repository"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fab fa-github"></i>
-  </span>
-<span class="btn__text-container">Repository</span>
-</a>
-</li>
-      
-      
-      
-      
-      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion/issues/new?title=Issue%20on%20page%20%2Fdocs/review/dalle.html&body=Your%20issue%20content%20here." target="_blank"
-   class="btn btn-sm btn-source-issues-button dropdown-item"
-   title="Open an issue"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-lightbulb"></i>
-  </span>
-<span class="btn__text-container">Open issue</span>
-</a>
-</li>
-      
-  </ul>
-</div>
-
-
-
-
-
-
-<div class="dropdown dropdown-download-buttons">
-  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Download this page">
-    <i class="fas fa-download"></i>
-  </button>
-  <ul class="dropdown-menu">
-      
-      
-      
-      <li><a href="../../_sources/docs/review/dalle.md" target="_blank"
-   class="btn btn-sm btn-download-source-button dropdown-item"
-   title="Download source file"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-file"></i>
-  </span>
-<span class="btn__text-container">.md</span>
-</a>
-</li>
-      
-      
-      
-      
-      <li>
-<button onclick="window.print()"
-  class="btn btn-sm btn-download-pdf-button dropdown-item"
-  title="Print to PDF"
-  data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-file-pdf"></i>
-  </span>
-<span class="btn__text-container">.pdf</span>
-</button>
-</li>
-      
-  </ul>
-</div>
-
-
-
-
-<button onclick="toggleFullScreen()"
-  class="btn btn-sm btn-fullscreen-button"
-  title="Fullscreen mode"
-  data-bs-placement="bottom" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-expand"></i>
-  </span>
-
-</button>
-
-
-
-<script>
-document.write(`
-  <button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light"></i>
-    <i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark"></i>
-    <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"></i>
-  </button>
-`);
-</script>
-
-
-<script>
-document.write(`
-  <button class="btn btn-sm pst-navbar-icon search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <i class="fa-solid fa-magnifying-glass fa-lg"></i>
-  </button>
-`);
-</script>
-<label class="sidebar-toggle secondary-toggle btn btn-sm" for="__secondary"title="Toggle secondary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <span class="fa-solid fa-list"></span>
-</label>
-</div></div>
-      
-    </div>
-  
-</div>
-</div>
-              
-              
-
-<div id="jb-print-docs-body" class="onlyprint">
-    <h1>DALL-E</h1>
-    <!-- Table of contents -->
-    <div id="print-main-content">
-        <div id="jb-print-toc">
-            
-            <div>
-                <h2> Contents </h2>
-            </div>
-            <nav aria-label="Page">
-                <ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">1. Introduction</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#background">2. Background</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#gpt-3">GPT-3</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#vq-vae">VQ-VAE</a></li>
-</ul>
-</li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#methodology">3. Methodology</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#limitation-of-previous-works">Limitation of Previous Works</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#dall-e-overview">DALL-E Overview</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#stage-1-training-vq-vae">Stage 1: Training VQ-VAE</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#stage-2-training-an-autoregressive-transformer">Stage 2: Training an Autoregressive Transformer</a></li>
-</ul>
-</li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#dall-e-pipeline">DALL-E Pipeline 예시</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#methodology-details">Methodology Details</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#dall-e-equations">DALL-E Equations</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#dall-e-stage-1-learning-the-visual-codebook">DALL-E 학습과정 Stage 1: Learning the VIsual Codebook</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#dall-e-stage-2-learning-the-prior">DALL-E 학습과정 Stage 2: Learning the Prior</a></li>
-</ul>
-</li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#results">Results</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#conclusion">Conclusion</a></li>
-</ul>
-            </nav>
-        </div>
-    </div>
-</div>
-
-              
-                
-<div id="searchbox"></div>
-                <article class="bd-article">
-                  
-  <div class="admonition-information admonition">
-<p class="admonition-title">Information</p>
-<ul class="simple">
-<li><p><strong>Title:</strong> Zero-shot text-to-image generation (ICML 2021)</p></li>
-<li><p><strong>Reference</strong></p>
-<ul>
-<li><p>Paper:  <a class="reference external" href="https://arxiv.org/abs/2102.12092">https://arxiv.org/abs/2102.12092</a></p></li>
-<li><p>Code: <a class="reference external" href="https://github.com/lucidrains/DALLE-pytorch">Unofficial-PyTorch</a></p></li>
-<li><p>Code: <a class="reference external" href="https://github.com/openai/DALL-E">Official</a></p></li>
-</ul>
-</li>
-<li><p><strong>Author:</strong> Donggeun “Sean” Ko</p></li>
-<li><p><strong>Last updated on June 22 2023</strong></p></li>
-</ul>
-</div>
-<section class="tex2jax_ignore mathjax_ignore" id="dall-e">
-<h1>DALL-E<a class="headerlink" href="#dall-e" title="Permalink to this heading">#</a></h1>
-<section id="introduction">
-<h2>1. Introduction<a class="headerlink" href="#introduction" title="Permalink to this heading">#</a></h2>
-<ul class="simple">
-<li><p>GPT-3 기반 모델이며 120억개 parameter 수와 2.5억 데이터 (text,image) set으로 학습</p></li>
-<li><p>Autoregressive 한 모델링을 통하여 image와 text를 이용하여 text-to-image generation task를 수행</p></li>
-<li><p>2021년 기준 zero-shot SOTA performance 달성</p></li>
-<li><p>아래 그림과 같이 text input에 따라 diverse한 이미지 생성</p></li>
-</ul>
-<figure class="align-default" id="id1">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/fig1.png"><img alt="fig1" class="bg-primary mb-1" src="../../_images/fig1.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 86 </span><span class="caption-text">Images generated using DALL-E</span><a class="headerlink" href="#id1" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<figure class="align-default" id="id2">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/fig21.png"><img alt="fig2" class="bg-primary mb-1" src="../../_images/fig21.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 87 </span><span class="caption-text">Images generated using DALL-E</span><a class="headerlink" href="#id2" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</section>
-<section id="background">
-<h2>2. Background<a class="headerlink" href="#background" title="Permalink to this heading">#</a></h2>
-<ul class="simple">
-<li><p>GPT-3와 VQ-VAE를 활용하여 나온 논문.</p></li>
-<li><p>VQ-VAE를 먼저 학습하고, Autoregressive Transformer을 순차적으로 학습하여 zero-shot architecture을 구축.</p></li>
-</ul>
-<section id="gpt-3">
-<h3>GPT-3<a class="headerlink" href="#gpt-3" title="Permalink to this heading">#</a></h3>
-<ul class="simple">
-<li><p>Autoregressive Language Model며 few-shot learning을 통해 fine-tuning 없이 높은 성능을 냄 *(fine-tuning 을 할 수는 있지만 본 논문에서는 task-agnostic performance 에 중점을 맞춰 Few shot을 함)</p></li>
-<li><p>GPT-3 는 transformer에서 decoder 부분만 사용 (GPT-2 와 유사한 구조를 가지고 있음 )</p></li>
-<li><p>약 1750억 parameter 개수의 모델</p></li>
-</ul>
-<figure class="align-default" id="id3">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/fig3.png"><img alt="fig3" class="bg-primary mb-1" src="../../_images/fig3.png" style="width: 600px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 88 </span><span class="caption-text">Transformer 아키텍쳐 \ (source: <a class="reference external" href="https://arxiv.org/pdf/2005.14165.pdf">https://arxiv.org/pdf/2005.14165.pdf</a>)</span><a class="headerlink" href="#id3" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<figure class="align-default" id="id4">
-<img alt="GPT-3 GIF" src="../../_images/fig4.gif" />
-<figcaption>
-<p><span class="caption-number">Fig. 89 </span><span class="caption-text">GPT 3 Animation \ (source: <a class="reference external" href="https://jalammar.github.io/how-gpt3-works-visualizations-animations/">https://jalammar.github.io/how-gpt3-works-visualizations-animations/</a>)</span><a class="headerlink" href="#id4" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</section>
-<section id="vq-vae">
-<h3>VQ-VAE<a class="headerlink" href="#vq-vae" title="Permalink to this heading">#</a></h3>
-<ul class="simple">
-<li><p>Encoder에서 나온 output은 discrete 하며 posterior 과 prior 이 categorical distribution을 갖는다고 가정함.</p></li>
-<li><p>CNN (encoder) 을 거친 각 D차원의 위치에 <span class="math notranslate nohighlight">\(H \times W\)</span> 그리드로 이미지를 나누고 embedding space (Codebook) 에서 <span class="math notranslate nohighlight">\(𝑒_1\)</span>부터 <span class="math notranslate nohighlight">\(𝑒_𝑘\)</span> 중에서 가까운 1개 embedding code로 변환.</p></li>
-<li><p>Quantization: Encoding output <span class="math notranslate nohighlight">\(z_{e}(x)\)</span> representation 과 유사한 codebook embedding <span class="math notranslate nohighlight">\(e_j\)</span> 를 찾아서 <span class="math notranslate nohighlight">\(k\)</span> 값을 부여함.</p></li>
-</ul>
-<figure class="align-default" id="id5">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/fig51.png"><img alt="fig5" class="bg-primary mb-1" src="../../_images/fig51.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 90 </span><span class="caption-text">VQ-VAE 아키텍쳐, Loss 함수 \ (source: <a class="reference external" href="https://velog.io/&#64;p2yeong/Understanding-VQ-VAE-DALL-E-Explained-Pt.-1">https://velog.io/&#64;p2yeong/Understanding-VQ-VAE-DALL-E-Explained-Pt.-1</a>)</span><a class="headerlink" href="#id5" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<figure class="align-default" id="id6">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/fig61.png"><img alt="fig6" class="bg-primary mb-1" src="../../_images/fig61.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 91 </span><span class="caption-text">Quantization of VQ-VAE</span><a class="headerlink" href="#id6" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</section>
-</section>
-<section id="methodology">
-<h2>3. Methodology<a class="headerlink" href="#methodology" title="Permalink to this heading">#</a></h2>
-</section>
-<section id="limitation-of-previous-works">
-<h2>Limitation of Previous Works<a class="headerlink" href="#limitation-of-previous-works" title="Permalink to this heading">#</a></h2>
-<ol class="arabic simple">
-<li><p>Memory/Bottleneck Issue</p></li>
-</ol>
-<ul class="simple">
-<li><p>각 Image에서 나오는 pixel을 직접적으로 image token을 사용하면 고화질 이미지일수록 너무 많은 메모리량이 필요해서 “비효율적”</p></li>
-</ul>
-<ol class="arabic simple" start="2">
-<li><p>Short-range dependence modeling between pixels</p></li>
-</ol>
-<ul class="simple">
-<li><p>Model들 중 Likelihood function을 objective function으로 사용하면 short-range dependency를 우선적으로 볼 것이며 low-frequency 보다 high-frequency detail에 더욱 집중하게 됨.</p></li>
-<li><p>Low frequency 는 visually recognizable해서 시각적으로 더 도움이 되는 부분</p></li>
-</ul>
-<p>이 2가지 문제점을 극복하고자 Two-stage training process 제안</p>
-</section>
-<section id="dall-e-overview">
-<h2>DALL-E Overview<a class="headerlink" href="#dall-e-overview" title="Permalink to this heading">#</a></h2>
-<section id="stage-1-training-vq-vae">
-<h3>Stage 1: Training VQ-VAE<a class="headerlink" href="#stage-1-training-vq-vae" title="Permalink to this heading">#</a></h3>
-<ul class="simple">
-<li><p><strong>Discrete VAE</strong>를 이용하여 <span class="math notranslate nohighlight">\(256 \times 256\)</span> RGB image \rightarrow  <span class="math notranslate nohighlight">\(32 \times 32\)</span> 이미지 토큰으로 압축</p></li>
-<li><p>각 이미지 토큰은 8,192개의 code 값 중에 하나 배정</p></li>
-<li><p>이미지의 <strong>quality 손실 없이</strong> <span class="math notranslate nohighlight">\(8 \times 8 \times 3\)</span> 배 만큼 context size를 적게 만들 수 있음.</p></li>
-</ul>
-</section>
-<section id="stage-2-training-an-autoregressive-transformer">
-<h3>Stage 2: Training an Autoregressive Transformer<a class="headerlink" href="#stage-2-training-an-autoregressive-transformer" title="Permalink to this heading">#</a></h3>
-<ul class="simple">
-<li><p><strong>최대 256 BPE-Encoded text tokens</strong>들과 1024 image tokens (<span class="math notranslate nohighlight">\(32 \times 32\)</span>) 를 연속적으로 입력함 (concatenate)</p></li>
-<li><p>Text token과 Image Tokens 들의 joint distribution (결합 분포)를 모델링하여 autoregressive transformer을 학습</p></li>
-</ul>
-</section>
-</section>
-<section id="dall-e-pipeline">
-<h2>DALL-E Pipeline 예시<a class="headerlink" href="#dall-e-pipeline" title="Permalink to this heading">#</a></h2>
-<figure class="align-default" id="id7">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/fig71.png"><img alt="fig7" class="bg-primary mb-1" src="../../_images/fig71.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 92 </span><span class="caption-text">DALL-E 시각화 \ (source:<a class="reference external" href="https://jiho-ml.com/weekly-nlp-40/">https://jiho-ml.com/weekly-nlp-40/</a>)</span><a class="headerlink" href="#id7" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<figure class="align-default" id="id8">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/fig81.png"><img alt="fig8" class="bg-primary mb-1" src="../../_images/fig81.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 93 </span><span class="caption-text">DALL-E 파이프라인 \ (source:<a class="reference external" href="https://www.youtube.com/watch?v=CQoM0r2kMvI&amp;amp;t=1729s">https://www.youtube.com/watch?v=CQoM0r2kMvI&amp;t=1729s</a>)</span><a class="headerlink" href="#id8" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</section>
-<section id="methodology-details">
-<h2>Methodology Details<a class="headerlink" href="#methodology-details" title="Permalink to this heading">#</a></h2>
-<section id="dall-e-equations">
-<h3>DALL-E Equations<a class="headerlink" href="#dall-e-equations" title="Permalink to this heading">#</a></h3>
-<figure class="align-default" id="id9">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/fig91.png"><img alt="fig9" class="bg-primary mb-1" src="../../_images/fig91.png" style="width: 600px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 94 </span><span class="caption-text">equation 1</span><a class="headerlink" href="#id9" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<figure class="align-default" id="id10">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/fig10.png"><img alt="fig10" class="bg-primary mb-1" src="../../_images/fig10.png" style="width: 600px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 95 </span><span class="caption-text">equation 2: Maximizing ELBO</span><a class="headerlink" href="#id10" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>x: images, y: captions , z: encoded RGB image tokens</p>
-<p><strong><span style="color: red;">𝑞<sub>Φ (red)</sub></span></strong> : input image에서 dVAE encoder에서 생성한 32 x 32 image token를 예측</p>
-<p><strong><span style="color: blue;">𝑝<sub>𝜃 (blue)</sub></span></strong>: image token에서 dVAE decoder에서 생성한 RGB image를 예측</p>
-<p><strong><span style="color: purple;">𝑝<sub>ψ (purple)</sub></span></strong>: transformer 모델로 모델링한 text와 image token들의 결합 분포 (joint distribution)</p>
-</section>
-<section id="dall-e-stage-1-learning-the-visual-codebook">
-<h3>DALL-E 학습과정 Stage 1: Learning the VIsual Codebook<a class="headerlink" href="#dall-e-stage-1-learning-the-visual-codebook" title="Permalink to this heading">#</a></h3>
-<ul class="simple">
-<li><p>Transformer을 고정하고 dVAE encoder &amp; decoder (𝑞_Φ , 𝑝_𝜃) 을 학습함</p>
-<ul>
-<li><p>즉, ELB (Evidence Lower Bound를 maximize 함)</p></li>
-<li><p>K = 8,192 codebook (embedding space)로 설정</p></li>
-</ul>
-</li>
-<li><p><strong>ELB를 optimize</strong> 하기 위해서는 discrete distribution을 continuous를 바꿔야 함</p>
-<ul>
-<li><p>학습시에는 결국, argmax를 사용해서 codebook vector 인덱스를 선택하여 계산하면 Reparameterization gradient를 연산 X</p></li>
-<li><p>argmax 대신 <strong>gumbel softmax</strong>를 사용하여 해결</p></li>
-<li><p>평가를 진행할 때에는 <span class="math notranslate nohighlight">\(z = codebook[\underset{i}{argmax}[g_i+log(q(e_i|x))]]\)</span></p></li>
-</ul>
-</li>
-<li><p>Gumbel Softmax Relaxation를 사용하여 해결! <span class="math notranslate nohighlight">\(q_\phi \rightarrow q_{\phi}^{\tau}\)</span>, temperature <span class="math notranslate nohighlight">\(\tau \rightarrow 0\)</span>, relaxation을 tight하게 잡아줌.</p></li>
-</ul>
-</section>
-<section id="dall-e-stage-2-learning-the-prior">
-<h3>DALL-E 학습과정 Stage 2: Learning the Prior<a class="headerlink" href="#dall-e-stage-2-learning-the-prior" title="Permalink to this heading">#</a></h3>
-<ul class="simple">
-<li><p>Transformer을 고정하고 dVAE encoder &amp; decoder (<span class="math notranslate nohighlight">\(q_{phi}\)</span> , <span class="math notranslate nohighlight">\(p_{\theta}\)</span>) transformer의 prior distribution <span class="math notranslate nohighlight">\(p_{\psi}\)</span>를 학습함.</p></li>
-<li><p>이때, <span class="math notranslate nohighlight">\(p_{\psi}\)</span>의 ELB를 maximize 하며 120억개의 parameter를 가진 sparse transformer 구조를 사용함</p></li>
-<li><p>Image token은 dVAE Encoder logit에서 Argmax sampling을 통해 생성</p></li>
-<li><p>Text token은 소문자화 후 16,384 개의 vocabulary를 BPE-encoding 통해 한번에 최대 256 token을 활용</p></li>
-</ul>
-<figure class="align-default" id="id11">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/fig11.png"><img alt="fig11" class="bg-primary mb-1" src="../../_images/fig11.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 96 </span><span class="caption-text">Text-to-text attention: causal attention mask
-Image-to-image attention: row/column/convolutional attention mask 적용</span><a class="headerlink" href="#id11" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</section>
-</section>
-<section id="results">
-<h2>Results<a class="headerlink" href="#results" title="Permalink to this heading">#</a></h2>
-<ul class="simple">
-<li><p>추론 시에는 text에 대하여 N개의 이미지를 생성.</p></li>
-<li><p>Best of N개는 <strong>N개 생성 후 best</strong>를 골라서 선택 함.</p></li>
-<li><p>우수한 이미지를 고르기 위해 CLIP (Contrastive Language-Image Pretraining, 2021) 논문에서 제시한 text 와 k 번째로 similarity 점수가 높은 이미지를 선택함 (k=1)</p></li>
-</ul>
-<figure class="align-default" id="id12">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/fig12.png"><img alt="fig12" class="bg-primary mb-1" src="../../_images/fig12.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 97 </span><span class="caption-text">DALL-E 결과물. Best를 고를때 N 수가 증가할수록 주어진 text prompt랑 더 유사한 결과물이 나옴.</span><a class="headerlink" href="#id12" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p>생성한 512개 이미지 중 CLIP 알고리즘을 통해 similarity score이 제일 높은 이미지를 뽑음.</p></li>
-<li><p>Ours (DALL-E) vs 다른 baseline method 와 비교 시 text에 더욱 알맞은 이미지를 생성한 것을 확인 할 수 있음.</p></li>
-</ul>
-<figure class="align-default" id="id13">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/fig13.png"><img alt="fig13" class="bg-primary mb-1" src="../../_images/fig13.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 98 </span><span class="caption-text">선택하는 이미지 개수에 따른 성능 향상</span><a class="headerlink" href="#id13" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p>DF-GAN 이랑 비교해서 MS-COCO dataset에 대하여 정성적 평가를 진행.</p></li>
-<li><p>Best-of-Five votes 중에 DF-GAN보다 매번 압도적인 차이로 투표 수를 받았음.</p></li>
-</ul>
-<figure class="align-default" id="id14">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/fig14.png"><img alt="fig14" class="bg-primary mb-1" src="../../_images/fig14.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 99 </span><span class="caption-text">DF-GAN 이랑 Qualitative Results 비교</span><a class="headerlink" href="#id14" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p>FID (Frechet Inception Distance)는 값이 낮을수록 좋으며 / IS (Inception Score)는 높을수록 좋음</p></li>
-<li><p>MS-COCO 랑 CUB (새 특화 데이터셋) 기준, DALL-E는 MS-COCO에서는 뛰어난 성능을 보여줬음.</p></li>
-<li><p>CUB에서는 SOTA를 찍지 못하였고 Inception score에서는 낮은 점수를 기록함.</p></li>
-<li><p>저자들은 Fine-tuning 으로 CUB에 성능 계선을 할 수 있다고 생각함.</p></li>
-</ul>
-<figure class="align-default" id="id15">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/fig15.png"><img alt="fig15" class="bg-primary mb-1" src="../../_images/fig15.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 100 </span><span class="caption-text">MS-COCO 와 CUB dataset에서 FID/IS 결과값 비교</span><a class="headerlink" href="#id15" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</section>
-<section id="conclusion">
-<h2>Conclusion<a class="headerlink" href="#conclusion" title="Permalink to this heading">#</a></h2>
-<ul class="simple">
-<li><p>GPT-3의 확장 모델로 120억개의 parameter과 autoregressive Transformer (Decoder only) 기반 모델링을 통해 text-to-image generation task를 뛰어나게 해결함.</p></li>
-<li><p>Zero-shot learning에서 다른 모델보다 훌륭한 일반화 성능을 보임</p></li>
-<li><p>정량적 / 정성적 평가에서 준수한 성능을 보이고 있으며 다양한 이미지 생성이 가능함.</p></li>
-</ul>
-<p>** Limitations: **</p>
-<ul class="simple">
-<li><p>생성하고 싶은 이미지에 다양한 객체가 포함되면 어려움을 겪음</p></li>
-<li><p>(b)에 보면 고슴도치가 2마리거나 강아지와 고슴도치 둘다 크리스마스 스웨터를 입고 있음.</p></li>
-<li><p>CUB dataset 처럼 다소 아쉬운 성능을 보인 데이터셋이 있지만 fine-tuning으로 해결</p></li>
-</ul>
-<figure class="align-default" id="id16">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/fig16.png"><img alt="fig16" class="bg-primary mb-1" src="../../_images/fig16.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 101 </span><span class="caption-text">Limitation을 보여주는 결과물.</span><a class="headerlink" href="#id16" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</section>
-</section>
-
-    <script type="text/x-thebe-config">
-    {
-        requestKernel: true,
-        binderOptions: {
-            repo: "binder-examples/jupyter-stacks-datascience",
-            ref: "master",
-        },
-        codeMirrorConfig: {
-            theme: "abcdef",
-            mode: "python"
-        },
-        kernelOptions: {
-            name: "python3",
-            path: "./docs/review"
-        },
-        predefinedOutput: true
-    }
-    </script>
-    <script>kernelName = 'python3'</script>
-
-                </article>
-              
-
-              
-              
-              
-              
-                <footer class="prev-next-footer d-print-none">
-                  
-<div class="prev-next-area">
-    <a class="left-prev"
-       href="diffusion_beats_GANs.html"
-       title="previous page">
-      <i class="fa-solid fa-angle-left"></i>
-      <div class="prev-next-info">
-        <p class="prev-next-subtitle">previous</p>
-        <p class="prev-next-title">Diffusion Models Beat GANs on Image Synthesis</p>
-      </div>
-    </a>
-    <a class="right-next"
-       href="DALLE2.html"
-       title="next page">
-      <div class="prev-next-info">
-        <p class="prev-next-subtitle">next</p>
-        <p class="prev-next-title">DALL-E 2</p>
-      </div>
-      <i class="fa-solid fa-angle-right"></i>
-    </a>
-</div>
-                </footer>
-              
-            </div>
-            
-            
-              
-                <div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
-
-
-  <div class="sidebar-secondary-item">
-  <div class="page-toc tocsection onthispage">
-    <i class="fa-solid fa-list"></i> Contents
-  </div>
-  <nav class="bd-toc-nav page-toc">
-    <ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">1. Introduction</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#background">2. Background</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#gpt-3">GPT-3</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#vq-vae">VQ-VAE</a></li>
-</ul>
-</li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#methodology">3. Methodology</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#limitation-of-previous-works">Limitation of Previous Works</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#dall-e-overview">DALL-E Overview</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#stage-1-training-vq-vae">Stage 1: Training VQ-VAE</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#stage-2-training-an-autoregressive-transformer">Stage 2: Training an Autoregressive Transformer</a></li>
-</ul>
-</li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#dall-e-pipeline">DALL-E Pipeline 예시</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#methodology-details">Methodology Details</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#dall-e-equations">DALL-E Equations</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#dall-e-stage-1-learning-the-visual-codebook">DALL-E 학습과정 Stage 1: Learning the VIsual Codebook</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#dall-e-stage-2-learning-the-prior">DALL-E 학습과정 Stage 2: Learning the Prior</a></li>
-</ul>
-</li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#results">Results</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#conclusion">Conclusion</a></li>
-</ul>
-  </nav></div>
-
-</div></div>
-              
-            
-          </div>
-          <footer class="bd-footer-content">
-            
-<div class="bd-footer-content__inner container">
-  
-  <div class="footer-item">
-    
-<p class="component-author">
-By PseudoLab
-</p>
-
-  </div>
-  
-  <div class="footer-item">
-    
-
-  <p class="copyright">
-    
-      © Copyright 2022.
-      <br/>
-    
-  </p>
-
-  </div>
-  
-  <div class="footer-item">
-    
-  </div>
-  
-  <div class="footer-item">
-    
-  </div>
-  
-</div>
-          </footer>
-        
-
-      </main>
-    </div>
-  </div>
-  
-  <!-- Scripts loaded after <body> so the DOM is not blocked -->
-  <script src="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b"></script>
-<script src="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b"></script>
-
-  <footer class="bd-footer">
-  </footer>
-  </body>
+
+
+<!DOCTYPE html>
+
+
+<html lang="en" data-content_root="" >
+
+  <head>
+    <meta charset="utf-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.17.1: http://docutils.sourceforge.net/" />
+
+    <title>DALL-E &#8212; Text-to-Image Generation-feat-Diffusion</title>
+  
+  
+  
+  <script data-cfasync="false">
+    document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
+    document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
+  </script>
+  
+  <!-- Loaded before other Sphinx assets -->
+  <link href="../../_static/styles/theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+<link href="../../_static/styles/bootstrap.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+<link href="../../_static/styles/pydata-sphinx-theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+
+  
+  <link href="../../_static/vendor/fontawesome/6.5.2/css/all.min.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+  <link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.woff2" />
+<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.woff2" />
+<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.woff2" />
+
+    <link rel="stylesheet" type="text/css" href="../../_static/pygments.css" />
+    <link rel="stylesheet" href="../../_static/styles/sphinx-book-theme.css?digest=14f4ca6b54d191a8c7657f6c759bf11a5fb86285" type="text/css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/togglebutton.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/mystnb.4510f1fc1dee50b3e5859aac5469c37c29e427902b24a333a5f9fcb2f0b3ac41.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/sphinx-thebe.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/design-style.4045f2051d55cab465a707391d5b2007.min.css" />
+  
+  <!-- Pre-loaded scripts that we'll load fully later -->
+  <link rel="preload" as="script" href="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b" />
+<link rel="preload" as="script" href="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b" />
+  <script src="../../_static/vendor/fontawesome/6.5.2/js/all.min.js?digest=dfe6caa3a7d634c4db9b"></script>
+
+    <script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
+    <script src="../../_static/jquery.js"></script>
+    <script src="../../_static/underscore.js"></script>
+    <script src="../../_static/_sphinx_javascript_frameworks_compat.js"></script>
+    <script src="../../_static/doctools.js"></script>
+    <script src="../../_static/clipboard.min.js"></script>
+    <script src="../../_static/copybutton.js"></script>
+    <script src="../../_static/scripts/sphinx-book-theme.js?digest=5a5c038af52cf7bc1a1ec88eea08e6366ee68824"></script>
+    <script>let toggleHintShow = 'Click to show';</script>
+    <script>let toggleHintHide = 'Click to hide';</script>
+    <script>let toggleOpenOnPrint = 'true';</script>
+    <script src="../../_static/togglebutton.js"></script>
+    <script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
+    <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
+    <script src="../../_static/design-tabs.js"></script>
+    <script>const THEBE_JS_URL = "https://unpkg.com/thebe@0.8.2/lib/index.js"
+const thebe_selector = ".thebe,.cell"
+const thebe_selector_input = "pre"
+const thebe_selector_output = ".output, .cell_output"
+</script>
+    <script async="async" src="../../_static/sphinx-thebe.js"></script>
+    <script>window.MathJax = {"options": {"processHtmlClass": "tex2jax_process|mathjax_process|math|output_area"}}</script>
+    <script defer="defer" src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
+    <script>DOCUMENTATION_OPTIONS.pagename = 'docs/review/dalle';</script>
+    <link rel="shortcut icon" href="../../_static/PseudoLab_logo.png"/>
+    <link rel="index" title="Index" href="../../genindex.html" />
+    <link rel="search" title="Search" href="../../search.html" />
+    <link rel="next" title="DALL-E 2" href="DALLE2.html" />
+    <link rel="prev" title="Diffusion Models Beat GANs on Image Synthesis" href="diffusion_beats_GANs.html" />
+  <meta name="viewport" content="width=device-width, initial-scale=1"/>
+  <meta name="docsearch:language" content="en"/>
+  </head>
+  
+  
+  <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
+
+  
+  
+  <div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
+  
+  <div id="pst-scroll-pixel-helper"></div>
+  
+  <button type="button" class="btn rounded-pill" id="pst-back-to-top">
+    <i class="fa-solid fa-arrow-up"></i>Back to top</button>
+
+  
+  <input type="checkbox"
+          class="sidebar-toggle"
+          id="pst-primary-sidebar-checkbox"/>
+  <label class="overlay overlay-primary" for="pst-primary-sidebar-checkbox"></label>
+  
+  <input type="checkbox"
+          class="sidebar-toggle"
+          id="pst-secondary-sidebar-checkbox"/>
+  <label class="overlay overlay-secondary" for="pst-secondary-sidebar-checkbox"></label>
+  
+  <div class="search-button__wrapper">
+    <div class="search-button__overlay"></div>
+    <div class="search-button__search-container">
+<form class="bd-search d-flex align-items-center"
+      action="../../search.html"
+      method="get">
+  <i class="fa-solid fa-magnifying-glass"></i>
+  <input type="search"
+         class="form-control"
+         name="q"
+         id="search-input"
+         placeholder="Search this book..."
+         aria-label="Search this book..."
+         autocomplete="off"
+         autocorrect="off"
+         autocapitalize="off"
+         spellcheck="false"/>
+  <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
+</form></div>
+  </div>
+
+  <div class="pst-async-banner-revealer d-none">
+  <aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
+</div>
+
+  
+    <header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
+    </header>
+  
+
+  <div class="bd-container">
+    <div class="bd-container__inner bd-page-width">
+      
+      
+      
+      <div class="bd-sidebar-primary bd-sidebar">
+        
+
+  
+  <div class="sidebar-header-items sidebar-primary__section">
+    
+    
+    
+    
+  </div>
+  
+    <div class="sidebar-primary-items__start sidebar-primary__section">
+        <div class="sidebar-primary-item">
+
+  
+    
+  
+
+<a class="navbar-brand logo" href="../../intro.html">
+  
+  
+  
+  
+  
+    
+    
+      
+    
+    
+    <img src="../../_static/PseudoLab_logo.png" class="logo__image only-light" alt="Text-to-Image Generation-feat-Diffusion - Home"/>
+    <script>document.write(`<img src="../../_static/PseudoLab_logo.png" class="logo__image only-dark" alt="Text-to-Image Generation-feat-Diffusion - Home"/>`);</script>
+  
+  
+</a></div>
+        <div class="sidebar-primary-item"><nav class="bd-links" id="bd-docs-nav" aria-label="Main">
+    <div class="bd-toc-item navbar-nav active">
+        
+        <ul class="nav bd-sidenav bd-sidenav__home-link">
+            <li class="toctree-l1">
+                <a class="reference internal" href="../../intro.html">
+                    Welcome to PseudoDiffusers!!
+                </a>
+            </li>
+        </ul>
+        <p aria-level="2" class="caption" role="heading"><span class="caption-text">Preliminary Works</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="vae.html">VAE</a></li>
+<li class="toctree-l1"><a class="reference internal" href="gan.html">GAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DDPM.html">DDPM</a></li>
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DDIM.html">DDIM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="A_Study_on_the_Evaluation_of_Generative_Models.html">A Study on the Evaluation of Generative Models</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Image Generation</span></p>
+<ul class="current nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="cycleGAN.html">CycleGAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="StyleGAN.html">StyleGAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="diffusion_beats_GANs.html">Diffusion Models Beat GANs on Image Synthesis</a></li>
+<li class="toctree-l1 current active"><a class="current reference internal" href="#">DALL-E</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DALLE2.html">DALL-E 2</a></li>
+<li class="toctree-l1"><a class="reference internal" href="dreambooth.html">DreamBooth</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ControlNet.html">ControlNet</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Latent_Diffusion_Model.html">Introduction</a></li>
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Textual_Inversion.html">Textual Inversion</a></li>
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="CustomDiffusion.html">Custom Diffusion</a></li>
+<li class="toctree-l1"><a class="reference internal" href="LoRA.html">LoRA</a></li>
+
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="I-DDPM.html">I-DDPM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="StyO.html">StyO</a></li>
+<li class="toctree-l1"><a class="reference internal" href="imagen.html">Imagen</a></li>
+<li class="toctree-l1"><a class="reference internal" href="imagen_editor.html">Imagen Editor</a></li>
+<li class="toctree-l1"><a class="reference internal" href="SDEdit.html">SDEdit</a></li>
+<li class="toctree-l1"><a class="reference internal" href="SDXL.html">SDXL</a></li>
+<li class="toctree-l1"><a class="reference internal" href="t2i_adapter.html">T2I-Adapter</a></li>
+<li class="toctree-l1"><a class="reference internal" href="IP_Adapter.html">IP-Adapter</a></li>
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="HyperDreamBooth.html">HyperDreamBooth</a></li>
+<li class="toctree-l1"><a class="reference internal" href="CM3leon.html">CM3leon</a></li>
+
+<li class="toctree-l1"><a class="reference internal" href="Synthetic_Data_from_Diffusion_Models_Improves_ImageNet_Classification.html">Synthetic Data from Diffusion Models Improves ImageNet Classification</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="GLIDE.html">GLIDE</a></li>
+<li class="toctree-l1"><a class="reference internal" href="BBDM.html">BBDM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Your_Diffusion_Model_is_Secretly_a_Zero_Shot_Classifier.html">Your Diffusion Model is Secretly a Zero-Shot Classifier</a></li>
+<li class="toctree-l1"><a class="reference internal" href="progressive_distillation.html">Progressive Distillation for Fast Sampling of Diffusion Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ConceptLab.html">ConceptLab</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Diffusion_models_already_have_a_Semantic_Latent_Space.html">Diffusion Models already have a Semantic Latent Space</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Muse.html">Muse</a></li>
+
+
+<li class="toctree-l1"><a class="reference internal" href="GIGAGAN.html">Scaling up GANs for Text-to-Image Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="consistency_models.html">Consistency Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="latent_consistency_models.html">Latent Consistency Models</a></li>
+
+<li class="toctree-l1"><a class="reference internal" href="LLM_grounded_Diffusion.html">LLM Grounded Diffusion</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DiT.html">DiT</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="one-step-image-translation.html">One-Step Image Translation with Text-to-Image Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="LCM-LoRA.html">LCM-LoRA: A Universal Stable-Diffusion Acceleration Module</a></li>
+
+
+<li class="toctree-l1"><a class="reference internal" href="MimicBrush.html">MimicBrush: Zero-shot Image Editing with Reference Imitation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="one_step_diffusion_with_distribution_matching_distillation.html">One-step Diffusion with Distribution Matching Distillation</a></li>
+
+
+
+
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Video Generation</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="Make_A_Video.html">Make A Video</a></li>
+<li class="toctree-l1"><a class="reference internal" href="VideoLDM.html">VideoLDM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="AnimateDiff.html">AnimateDiff</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Animate_Anyone.html">Animate Anyone</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreaMoving.html">DreaMoving</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreamPose.html">DreamPose: Fashion Image-to-Video Synthesis via Stable Diffusion</a></li>
+
+
+
+
+
+
+
+
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">3D Generation</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="NeRF.html">NeRF : Representing Scenes as Neural Radiance Fields for View Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="3DGS.html">3D Gaussian Splatting for Real-Time Radiance Field Rendering</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Point_E.html">Point-E: A System for Generating 3D Point Clouds from Complex Prompts (Arxiv 2022)</a></li>
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Shap-E.html">Shap-E</a></li>
+
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DreamFusion.html"><strong>DreamFusion</strong></a></li>
+<li class="toctree-l1"><a class="reference internal" href="magic-3d.html">Magic3D</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreamBooth3D.html">Dream Booth 3D</a></li>
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="zero123plus.html">Zero123++</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ProlificDreamer.html">ProlificDreamer: High-Fidelity and Diverse Text-to-3D Generation with Variational Score Distillation</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DreamGaussian.html">DreamGaussian</a></li>
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Coin3D.html">Coin3D</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Experiments</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="../experiments/js_exp.html">Synthetic Data with Stable Diffusion for Foliar Disease Classification</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../experiments/swjo_exp.html">Training DreamBooth on Naver Webtoon Face Dataset</a></li>
+</ul>
+
+    </div>
+</nav></div>
+    </div>
+  
+  
+  <div class="sidebar-primary-items__end sidebar-primary__section">
+  </div>
+  
+  <div id="rtd-footer-container"></div>
+
+
+      </div>
+      
+      <main id="main-content" class="bd-main" role="main">
+        
+        
+
+<div class="sbt-scroll-pixel-helper"></div>
+
+          <div class="bd-content">
+            <div class="bd-article-container">
+              
+              <div class="bd-header-article d-print-none">
+<div class="header-article-items header-article__inner">
+  
+    <div class="header-article-items__start">
+      
+        <div class="header-article-item"><label class="sidebar-toggle primary-toggle btn btn-sm" for="__primary" title="Toggle primary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
+  <span class="fa-solid fa-bars"></span>
+</label></div>
+      
+    </div>
+  
+  
+    <div class="header-article-items__end">
+      
+        <div class="header-article-item">
+
+<div class="article-header-buttons">
+
+
+
+
+
+<div class="dropdown dropdown-source-buttons">
+  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Source repositories">
+    <i class="fab fa-github"></i>
+  </button>
+  <ul class="dropdown-menu">
+      
+      
+      
+      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion" target="_blank"
+   class="btn btn-sm btn-source-repository-button dropdown-item"
+   title="Source repository"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fab fa-github"></i>
+  </span>
+<span class="btn__text-container">Repository</span>
+</a>
+</li>
+      
+      
+      
+      
+      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion/issues/new?title=Issue%20on%20page%20%2Fdocs/review/dalle.html&body=Your%20issue%20content%20here." target="_blank"
+   class="btn btn-sm btn-source-issues-button dropdown-item"
+   title="Open an issue"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-lightbulb"></i>
+  </span>
+<span class="btn__text-container">Open issue</span>
+</a>
+</li>
+      
+  </ul>
+</div>
+
+
+
+
+
+
+<div class="dropdown dropdown-download-buttons">
+  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Download this page">
+    <i class="fas fa-download"></i>
+  </button>
+  <ul class="dropdown-menu">
+      
+      
+      
+      <li><a href="../../_sources/docs/review/dalle.md" target="_blank"
+   class="btn btn-sm btn-download-source-button dropdown-item"
+   title="Download source file"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-file"></i>
+  </span>
+<span class="btn__text-container">.md</span>
+</a>
+</li>
+      
+      
+      
+      
+      <li>
+<button onclick="window.print()"
+  class="btn btn-sm btn-download-pdf-button dropdown-item"
+  title="Print to PDF"
+  data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-file-pdf"></i>
+  </span>
+<span class="btn__text-container">.pdf</span>
+</button>
+</li>
+      
+  </ul>
+</div>
+
+
+
+
+<button onclick="toggleFullScreen()"
+  class="btn btn-sm btn-fullscreen-button"
+  title="Fullscreen mode"
+  data-bs-placement="bottom" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-expand"></i>
+  </span>
+
+</button>
+
+
+
+<script>
+document.write(`
+  <button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light"></i>
+    <i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark"></i>
+    <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"></i>
+  </button>
+`);
+</script>
+
+
+<script>
+document.write(`
+  <button class="btn btn-sm pst-navbar-icon search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <i class="fa-solid fa-magnifying-glass fa-lg"></i>
+  </button>
+`);
+</script>
+<label class="sidebar-toggle secondary-toggle btn btn-sm" for="__secondary"title="Toggle secondary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <span class="fa-solid fa-list"></span>
+</label>
+</div></div>
+      
+    </div>
+  
+</div>
+</div>
+              
+              
+
+<div id="jb-print-docs-body" class="onlyprint">
+    <h1>DALL-E</h1>
+    <!-- Table of contents -->
+    <div id="print-main-content">
+        <div id="jb-print-toc">
+            
+            <div>
+                <h2> Contents </h2>
+            </div>
+            <nav aria-label="Page">
+                <ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">1. Introduction</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#background">2. Background</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#gpt-3">GPT-3</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#vq-vae">VQ-VAE</a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#methodology">3. Methodology</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#limitation-of-previous-works">Limitation of Previous Works</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#dall-e-overview">DALL-E Overview</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#stage-1-training-vq-vae">Stage 1: Training VQ-VAE</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#stage-2-training-an-autoregressive-transformer">Stage 2: Training an Autoregressive Transformer</a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#dall-e-pipeline">DALL-E Pipeline 예시</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#methodology-details">Methodology Details</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#dall-e-equations">DALL-E Equations</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#dall-e-stage-1-learning-the-visual-codebook">DALL-E 학습과정 Stage 1: Learning the VIsual Codebook</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#dall-e-stage-2-learning-the-prior">DALL-E 학습과정 Stage 2: Learning the Prior</a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#results">Results</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#conclusion">Conclusion</a></li>
+</ul>
+            </nav>
+        </div>
+    </div>
+</div>
+
+              
+                
+<div id="searchbox"></div>
+                <article class="bd-article">
+                  
+  <div class="admonition-information admonition">
+<p class="admonition-title">Information</p>
+<ul class="simple">
+<li><p><strong>Title:</strong> Zero-shot text-to-image generation (ICML 2021)</p></li>
+<li><p><strong>Reference</strong></p>
+<ul>
+<li><p>Paper:  <a class="reference external" href="https://arxiv.org/abs/2102.12092">https://arxiv.org/abs/2102.12092</a></p></li>
+<li><p>Code: <a class="reference external" href="https://github.com/lucidrains/DALLE-pytorch">Unofficial-PyTorch</a></p></li>
+<li><p>Code: <a class="reference external" href="https://github.com/openai/DALL-E">Official</a></p></li>
+</ul>
+</li>
+<li><p><strong>Author:</strong> Donggeun “Sean” Ko</p></li>
+<li><p><strong>Last updated on June 22 2023</strong></p></li>
+</ul>
+</div>
+<section class="tex2jax_ignore mathjax_ignore" id="dall-e">
+<h1>DALL-E<a class="headerlink" href="#dall-e" title="Permalink to this heading">#</a></h1>
+<section id="introduction">
+<h2>1. Introduction<a class="headerlink" href="#introduction" title="Permalink to this heading">#</a></h2>
+<ul class="simple">
+<li><p>GPT-3 기반 모델이며 120억개 parameter 수와 2.5억 데이터 (text,image) set으로 학습</p></li>
+<li><p>Autoregressive 한 모델링을 통하여 image와 text를 이용하여 text-to-image generation task를 수행</p></li>
+<li><p>2021년 기준 zero-shot SOTA performance 달성</p></li>
+<li><p>아래 그림과 같이 text input에 따라 diverse한 이미지 생성</p></li>
+</ul>
+<figure class="align-default" id="id1">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/fig1.png"><img alt="fig1" class="bg-primary mb-1" src="../../_images/fig1.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 86 </span><span class="caption-text">Images generated using DALL-E</span><a class="headerlink" href="#id1" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<figure class="align-default" id="id2">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/fig21.png"><img alt="fig2" class="bg-primary mb-1" src="../../_images/fig21.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 87 </span><span class="caption-text">Images generated using DALL-E</span><a class="headerlink" href="#id2" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</section>
+<section id="background">
+<h2>2. Background<a class="headerlink" href="#background" title="Permalink to this heading">#</a></h2>
+<ul class="simple">
+<li><p>GPT-3와 VQ-VAE를 활용하여 나온 논문.</p></li>
+<li><p>VQ-VAE를 먼저 학습하고, Autoregressive Transformer을 순차적으로 학습하여 zero-shot architecture을 구축.</p></li>
+</ul>
+<section id="gpt-3">
+<h3>GPT-3<a class="headerlink" href="#gpt-3" title="Permalink to this heading">#</a></h3>
+<ul class="simple">
+<li><p>Autoregressive Language Model며 few-shot learning을 통해 fine-tuning 없이 높은 성능을 냄 *(fine-tuning 을 할 수는 있지만 본 논문에서는 task-agnostic performance 에 중점을 맞춰 Few shot을 함)</p></li>
+<li><p>GPT-3 는 transformer에서 decoder 부분만 사용 (GPT-2 와 유사한 구조를 가지고 있음 )</p></li>
+<li><p>약 1750억 parameter 개수의 모델</p></li>
+</ul>
+<figure class="align-default" id="id3">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/fig3.png"><img alt="fig3" class="bg-primary mb-1" src="../../_images/fig3.png" style="width: 600px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 88 </span><span class="caption-text">Transformer 아키텍쳐 \ (source: <a class="reference external" href="https://arxiv.org/pdf/2005.14165.pdf">https://arxiv.org/pdf/2005.14165.pdf</a>)</span><a class="headerlink" href="#id3" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<figure class="align-default" id="id4">
+<img alt="GPT-3 GIF" src="../../_images/fig4.gif" />
+<figcaption>
+<p><span class="caption-number">Fig. 89 </span><span class="caption-text">GPT 3 Animation \ (source: <a class="reference external" href="https://jalammar.github.io/how-gpt3-works-visualizations-animations/">https://jalammar.github.io/how-gpt3-works-visualizations-animations/</a>)</span><a class="headerlink" href="#id4" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</section>
+<section id="vq-vae">
+<h3>VQ-VAE<a class="headerlink" href="#vq-vae" title="Permalink to this heading">#</a></h3>
+<ul class="simple">
+<li><p>Encoder에서 나온 output은 discrete 하며 posterior 과 prior 이 categorical distribution을 갖는다고 가정함.</p></li>
+<li><p>CNN (encoder) 을 거친 각 D차원의 위치에 <span class="math notranslate nohighlight">\(H \times W\)</span> 그리드로 이미지를 나누고 embedding space (Codebook) 에서 <span class="math notranslate nohighlight">\(𝑒_1\)</span>부터 <span class="math notranslate nohighlight">\(𝑒_𝑘\)</span> 중에서 가까운 1개 embedding code로 변환.</p></li>
+<li><p>Quantization: Encoding output <span class="math notranslate nohighlight">\(z_{e}(x)\)</span> representation 과 유사한 codebook embedding <span class="math notranslate nohighlight">\(e_j\)</span> 를 찾아서 <span class="math notranslate nohighlight">\(k\)</span> 값을 부여함.</p></li>
+</ul>
+<figure class="align-default" id="id5">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/fig51.png"><img alt="fig5" class="bg-primary mb-1" src="../../_images/fig51.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 90 </span><span class="caption-text">VQ-VAE 아키텍쳐, Loss 함수 \ (source: <a class="reference external" href="https://velog.io/&#64;p2yeong/Understanding-VQ-VAE-DALL-E-Explained-Pt.-1">https://velog.io/&#64;p2yeong/Understanding-VQ-VAE-DALL-E-Explained-Pt.-1</a>)</span><a class="headerlink" href="#id5" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<figure class="align-default" id="id6">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/fig61.png"><img alt="fig6" class="bg-primary mb-1" src="../../_images/fig61.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 91 </span><span class="caption-text">Quantization of VQ-VAE</span><a class="headerlink" href="#id6" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</section>
+</section>
+<section id="methodology">
+<h2>3. Methodology<a class="headerlink" href="#methodology" title="Permalink to this heading">#</a></h2>
+</section>
+<section id="limitation-of-previous-works">
+<h2>Limitation of Previous Works<a class="headerlink" href="#limitation-of-previous-works" title="Permalink to this heading">#</a></h2>
+<ol class="arabic simple">
+<li><p>Memory/Bottleneck Issue</p></li>
+</ol>
+<ul class="simple">
+<li><p>각 Image에서 나오는 pixel을 직접적으로 image token을 사용하면 고화질 이미지일수록 너무 많은 메모리량이 필요해서 “비효율적”</p></li>
+</ul>
+<ol class="arabic simple" start="2">
+<li><p>Short-range dependence modeling between pixels</p></li>
+</ol>
+<ul class="simple">
+<li><p>Model들 중 Likelihood function을 objective function으로 사용하면 short-range dependency를 우선적으로 볼 것이며 low-frequency 보다 high-frequency detail에 더욱 집중하게 됨.</p></li>
+<li><p>Low frequency 는 visually recognizable해서 시각적으로 더 도움이 되는 부분</p></li>
+</ul>
+<p>이 2가지 문제점을 극복하고자 Two-stage training process 제안</p>
+</section>
+<section id="dall-e-overview">
+<h2>DALL-E Overview<a class="headerlink" href="#dall-e-overview" title="Permalink to this heading">#</a></h2>
+<section id="stage-1-training-vq-vae">
+<h3>Stage 1: Training VQ-VAE<a class="headerlink" href="#stage-1-training-vq-vae" title="Permalink to this heading">#</a></h3>
+<ul class="simple">
+<li><p><strong>Discrete VAE</strong>를 이용하여 <span class="math notranslate nohighlight">\(256 \times 256\)</span> RGB image \rightarrow  <span class="math notranslate nohighlight">\(32 \times 32\)</span> 이미지 토큰으로 압축</p></li>
+<li><p>각 이미지 토큰은 8,192개의 code 값 중에 하나 배정</p></li>
+<li><p>이미지의 <strong>quality 손실 없이</strong> <span class="math notranslate nohighlight">\(8 \times 8 \times 3\)</span> 배 만큼 context size를 적게 만들 수 있음.</p></li>
+</ul>
+</section>
+<section id="stage-2-training-an-autoregressive-transformer">
+<h3>Stage 2: Training an Autoregressive Transformer<a class="headerlink" href="#stage-2-training-an-autoregressive-transformer" title="Permalink to this heading">#</a></h3>
+<ul class="simple">
+<li><p><strong>최대 256 BPE-Encoded text tokens</strong>들과 1024 image tokens (<span class="math notranslate nohighlight">\(32 \times 32\)</span>) 를 연속적으로 입력함 (concatenate)</p></li>
+<li><p>Text token과 Image Tokens 들의 joint distribution (결합 분포)를 모델링하여 autoregressive transformer을 학습</p></li>
+</ul>
+</section>
+</section>
+<section id="dall-e-pipeline">
+<h2>DALL-E Pipeline 예시<a class="headerlink" href="#dall-e-pipeline" title="Permalink to this heading">#</a></h2>
+<figure class="align-default" id="id7">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/fig71.png"><img alt="fig7" class="bg-primary mb-1" src="../../_images/fig71.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 92 </span><span class="caption-text">DALL-E 시각화 \ (source:<a class="reference external" href="https://jiho-ml.com/weekly-nlp-40/">https://jiho-ml.com/weekly-nlp-40/</a>)</span><a class="headerlink" href="#id7" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<figure class="align-default" id="id8">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/fig81.png"><img alt="fig8" class="bg-primary mb-1" src="../../_images/fig81.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 93 </span><span class="caption-text">DALL-E 파이프라인 \ (source:<a class="reference external" href="https://www.youtube.com/watch?v=CQoM0r2kMvI&amp;amp;t=1729s">https://www.youtube.com/watch?v=CQoM0r2kMvI&amp;t=1729s</a>)</span><a class="headerlink" href="#id8" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</section>
+<section id="methodology-details">
+<h2>Methodology Details<a class="headerlink" href="#methodology-details" title="Permalink to this heading">#</a></h2>
+<section id="dall-e-equations">
+<h3>DALL-E Equations<a class="headerlink" href="#dall-e-equations" title="Permalink to this heading">#</a></h3>
+<figure class="align-default" id="id9">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/fig91.png"><img alt="fig9" class="bg-primary mb-1" src="../../_images/fig91.png" style="width: 600px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 94 </span><span class="caption-text">equation 1</span><a class="headerlink" href="#id9" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<figure class="align-default" id="id10">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/fig10.png"><img alt="fig10" class="bg-primary mb-1" src="../../_images/fig10.png" style="width: 600px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 95 </span><span class="caption-text">equation 2: Maximizing ELBO</span><a class="headerlink" href="#id10" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>x: images, y: captions , z: encoded RGB image tokens</p>
+<p><strong><span style="color: red;">𝑞<sub>Φ (red)</sub></span></strong> : input image에서 dVAE encoder에서 생성한 32 x 32 image token를 예측</p>
+<p><strong><span style="color: blue;">𝑝<sub>𝜃 (blue)</sub></span></strong>: image token에서 dVAE decoder에서 생성한 RGB image를 예측</p>
+<p><strong><span style="color: purple;">𝑝<sub>ψ (purple)</sub></span></strong>: transformer 모델로 모델링한 text와 image token들의 결합 분포 (joint distribution)</p>
+</section>
+<section id="dall-e-stage-1-learning-the-visual-codebook">
+<h3>DALL-E 학습과정 Stage 1: Learning the VIsual Codebook<a class="headerlink" href="#dall-e-stage-1-learning-the-visual-codebook" title="Permalink to this heading">#</a></h3>
+<ul class="simple">
+<li><p>Transformer을 고정하고 dVAE encoder &amp; decoder (𝑞_Φ , 𝑝_𝜃) 을 학습함</p>
+<ul>
+<li><p>즉, ELB (Evidence Lower Bound를 maximize 함)</p></li>
+<li><p>K = 8,192 codebook (embedding space)로 설정</p></li>
+</ul>
+</li>
+<li><p><strong>ELB를 optimize</strong> 하기 위해서는 discrete distribution을 continuous를 바꿔야 함</p>
+<ul>
+<li><p>학습시에는 결국, argmax를 사용해서 codebook vector 인덱스를 선택하여 계산하면 Reparameterization gradient를 연산 X</p></li>
+<li><p>argmax 대신 <strong>gumbel softmax</strong>를 사용하여 해결</p></li>
+<li><p>평가를 진행할 때에는 <span class="math notranslate nohighlight">\(z = codebook[\underset{i}{argmax}[g_i+log(q(e_i|x))]]\)</span></p></li>
+</ul>
+</li>
+<li><p>Gumbel Softmax Relaxation를 사용하여 해결! <span class="math notranslate nohighlight">\(q_\phi \rightarrow q_{\phi}^{\tau}\)</span>, temperature <span class="math notranslate nohighlight">\(\tau \rightarrow 0\)</span>, relaxation을 tight하게 잡아줌.</p></li>
+</ul>
+</section>
+<section id="dall-e-stage-2-learning-the-prior">
+<h3>DALL-E 학습과정 Stage 2: Learning the Prior<a class="headerlink" href="#dall-e-stage-2-learning-the-prior" title="Permalink to this heading">#</a></h3>
+<ul class="simple">
+<li><p>Transformer을 고정하고 dVAE encoder &amp; decoder (<span class="math notranslate nohighlight">\(q_{phi}\)</span> , <span class="math notranslate nohighlight">\(p_{\theta}\)</span>) transformer의 prior distribution <span class="math notranslate nohighlight">\(p_{\psi}\)</span>를 학습함.</p></li>
+<li><p>이때, <span class="math notranslate nohighlight">\(p_{\psi}\)</span>의 ELB를 maximize 하며 120억개의 parameter를 가진 sparse transformer 구조를 사용함</p></li>
+<li><p>Image token은 dVAE Encoder logit에서 Argmax sampling을 통해 생성</p></li>
+<li><p>Text token은 소문자화 후 16,384 개의 vocabulary를 BPE-encoding 통해 한번에 최대 256 token을 활용</p></li>
+</ul>
+<figure class="align-default" id="id11">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/fig11.png"><img alt="fig11" class="bg-primary mb-1" src="../../_images/fig11.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 96 </span><span class="caption-text">Text-to-text attention: causal attention mask
+Image-to-image attention: row/column/convolutional attention mask 적용</span><a class="headerlink" href="#id11" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</section>
+</section>
+<section id="results">
+<h2>Results<a class="headerlink" href="#results" title="Permalink to this heading">#</a></h2>
+<ul class="simple">
+<li><p>추론 시에는 text에 대하여 N개의 이미지를 생성.</p></li>
+<li><p>Best of N개는 <strong>N개 생성 후 best</strong>를 골라서 선택 함.</p></li>
+<li><p>우수한 이미지를 고르기 위해 CLIP (Contrastive Language-Image Pretraining, 2021) 논문에서 제시한 text 와 k 번째로 similarity 점수가 높은 이미지를 선택함 (k=1)</p></li>
+</ul>
+<figure class="align-default" id="id12">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/fig12.png"><img alt="fig12" class="bg-primary mb-1" src="../../_images/fig12.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 97 </span><span class="caption-text">DALL-E 결과물. Best를 고를때 N 수가 증가할수록 주어진 text prompt랑 더 유사한 결과물이 나옴.</span><a class="headerlink" href="#id12" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p>생성한 512개 이미지 중 CLIP 알고리즘을 통해 similarity score이 제일 높은 이미지를 뽑음.</p></li>
+<li><p>Ours (DALL-E) vs 다른 baseline method 와 비교 시 text에 더욱 알맞은 이미지를 생성한 것을 확인 할 수 있음.</p></li>
+</ul>
+<figure class="align-default" id="id13">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/fig13.png"><img alt="fig13" class="bg-primary mb-1" src="../../_images/fig13.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 98 </span><span class="caption-text">선택하는 이미지 개수에 따른 성능 향상</span><a class="headerlink" href="#id13" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p>DF-GAN 이랑 비교해서 MS-COCO dataset에 대하여 정성적 평가를 진행.</p></li>
+<li><p>Best-of-Five votes 중에 DF-GAN보다 매번 압도적인 차이로 투표 수를 받았음.</p></li>
+</ul>
+<figure class="align-default" id="id14">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/fig14.png"><img alt="fig14" class="bg-primary mb-1" src="../../_images/fig14.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 99 </span><span class="caption-text">DF-GAN 이랑 Qualitative Results 비교</span><a class="headerlink" href="#id14" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p>FID (Frechet Inception Distance)는 값이 낮을수록 좋으며 / IS (Inception Score)는 높을수록 좋음</p></li>
+<li><p>MS-COCO 랑 CUB (새 특화 데이터셋) 기준, DALL-E는 MS-COCO에서는 뛰어난 성능을 보여줬음.</p></li>
+<li><p>CUB에서는 SOTA를 찍지 못하였고 Inception score에서는 낮은 점수를 기록함.</p></li>
+<li><p>저자들은 Fine-tuning 으로 CUB에 성능 계선을 할 수 있다고 생각함.</p></li>
+</ul>
+<figure class="align-default" id="id15">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/fig15.png"><img alt="fig15" class="bg-primary mb-1" src="../../_images/fig15.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 100 </span><span class="caption-text">MS-COCO 와 CUB dataset에서 FID/IS 결과값 비교</span><a class="headerlink" href="#id15" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</section>
+<section id="conclusion">
+<h2>Conclusion<a class="headerlink" href="#conclusion" title="Permalink to this heading">#</a></h2>
+<ul class="simple">
+<li><p>GPT-3의 확장 모델로 120억개의 parameter과 autoregressive Transformer (Decoder only) 기반 모델링을 통해 text-to-image generation task를 뛰어나게 해결함.</p></li>
+<li><p>Zero-shot learning에서 다른 모델보다 훌륭한 일반화 성능을 보임</p></li>
+<li><p>정량적 / 정성적 평가에서 준수한 성능을 보이고 있으며 다양한 이미지 생성이 가능함.</p></li>
+</ul>
+<p>** Limitations: **</p>
+<ul class="simple">
+<li><p>생성하고 싶은 이미지에 다양한 객체가 포함되면 어려움을 겪음</p></li>
+<li><p>(b)에 보면 고슴도치가 2마리거나 강아지와 고슴도치 둘다 크리스마스 스웨터를 입고 있음.</p></li>
+<li><p>CUB dataset 처럼 다소 아쉬운 성능을 보인 데이터셋이 있지만 fine-tuning으로 해결</p></li>
+</ul>
+<figure class="align-default" id="id16">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/fig16.png"><img alt="fig16" class="bg-primary mb-1" src="../../_images/fig16.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 101 </span><span class="caption-text">Limitation을 보여주는 결과물.</span><a class="headerlink" href="#id16" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</section>
+</section>
+
+    <script type="text/x-thebe-config">
+    {
+        requestKernel: true,
+        binderOptions: {
+            repo: "binder-examples/jupyter-stacks-datascience",
+            ref: "master",
+        },
+        codeMirrorConfig: {
+            theme: "abcdef",
+            mode: "python"
+        },
+        kernelOptions: {
+            name: "python3",
+            path: "./docs\review"
+        },
+        predefinedOutput: true
+    }
+    </script>
+    <script>kernelName = 'python3'</script>
+
+                </article>
+              
+
+              
+              
+              
+              
+                <footer class="prev-next-footer d-print-none">
+                  
+<div class="prev-next-area">
+    <a class="left-prev"
+       href="diffusion_beats_GANs.html"
+       title="previous page">
+      <i class="fa-solid fa-angle-left"></i>
+      <div class="prev-next-info">
+        <p class="prev-next-subtitle">previous</p>
+        <p class="prev-next-title">Diffusion Models Beat GANs on Image Synthesis</p>
+      </div>
+    </a>
+    <a class="right-next"
+       href="DALLE2.html"
+       title="next page">
+      <div class="prev-next-info">
+        <p class="prev-next-subtitle">next</p>
+        <p class="prev-next-title">DALL-E 2</p>
+      </div>
+      <i class="fa-solid fa-angle-right"></i>
+    </a>
+</div>
+                </footer>
+              
+            </div>
+            
+            
+              
+                <div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
+
+
+  <div class="sidebar-secondary-item">
+  <div class="page-toc tocsection onthispage">
+    <i class="fa-solid fa-list"></i> Contents
+  </div>
+  <nav class="bd-toc-nav page-toc">
+    <ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">1. Introduction</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#background">2. Background</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#gpt-3">GPT-3</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#vq-vae">VQ-VAE</a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#methodology">3. Methodology</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#limitation-of-previous-works">Limitation of Previous Works</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#dall-e-overview">DALL-E Overview</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#stage-1-training-vq-vae">Stage 1: Training VQ-VAE</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#stage-2-training-an-autoregressive-transformer">Stage 2: Training an Autoregressive Transformer</a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#dall-e-pipeline">DALL-E Pipeline 예시</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#methodology-details">Methodology Details</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#dall-e-equations">DALL-E Equations</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#dall-e-stage-1-learning-the-visual-codebook">DALL-E 학습과정 Stage 1: Learning the VIsual Codebook</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#dall-e-stage-2-learning-the-prior">DALL-E 학습과정 Stage 2: Learning the Prior</a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#results">Results</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#conclusion">Conclusion</a></li>
+</ul>
+  </nav></div>
+
+</div></div>
+              
+            
+          </div>
+          <footer class="bd-footer-content">
+            
+<div class="bd-footer-content__inner container">
+  
+  <div class="footer-item">
+    
+<p class="component-author">
+By PseudoLab
+</p>
+
+  </div>
+  
+  <div class="footer-item">
+    
+
+  <p class="copyright">
+    
+      © Copyright 2022.
+      <br/>
+    
+  </p>
+
+  </div>
+  
+  <div class="footer-item">
+    
+  </div>
+  
+  <div class="footer-item">
+    
+  </div>
+  
+</div>
+          </footer>
+        
+
+      </main>
+    </div>
+  </div>
+  
+  <!-- Scripts loaded after <body> so the DOM is not blocked -->
+  <script src="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b"></script>
+<script src="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b"></script>
+
+  <footer class="bd-footer">
+  </footer>
+  </body>
 </html>
\ No newline at end of file
diff --git a/docs/review/diffusion_beats_GANs.html b/docs/review/diffusion_beats_GANs.html
old mode 100644
new mode 100755
index 5b80ec78..2224d24d
--- a/docs/review/diffusion_beats_GANs.html
+++ b/docs/review/diffusion_beats_GANs.html
@@ -1,965 +1,985 @@
-
-
-<!DOCTYPE html>
-
-
-<html lang="en" data-content_root="" >
-
-  <head>
-    <meta charset="utf-8" />
-    <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.18.1: http://docutils.sourceforge.net/" />
-
-    <title>Diffusion Models Beat GANs on Image Synthesis &#8212; Text-to-Image Generation-feat-Diffusion</title>
-  
-  
-  
-  <script data-cfasync="false">
-    document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
-    document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
-  </script>
-  
-  <!-- Loaded before other Sphinx assets -->
-  <link href="../../_static/styles/theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-<link href="../../_static/styles/bootstrap.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-<link href="../../_static/styles/pydata-sphinx-theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-
-  
-  <link href="../../_static/vendor/fontawesome/6.5.2/css/all.min.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-  <link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.woff2" />
-<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.woff2" />
-<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.woff2" />
-
-    <link rel="stylesheet" type="text/css" href="../../_static/pygments.css" />
-    <link rel="stylesheet" href="../../_static/styles/sphinx-book-theme.css?digest=14f4ca6b54d191a8c7657f6c759bf11a5fb86285" type="text/css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/togglebutton.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/mystnb.4510f1fc1dee50b3e5859aac5469c37c29e427902b24a333a5f9fcb2f0b3ac41.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/sphinx-thebe.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/design-style.4045f2051d55cab465a707391d5b2007.min.css" />
-  
-  <!-- Pre-loaded scripts that we'll load fully later -->
-  <link rel="preload" as="script" href="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b" />
-<link rel="preload" as="script" href="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b" />
-  <script src="../../_static/vendor/fontawesome/6.5.2/js/all.min.js?digest=dfe6caa3a7d634c4db9b"></script>
-
-    <script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
-    <script src="../../_static/jquery.js"></script>
-    <script src="../../_static/underscore.js"></script>
-    <script src="../../_static/_sphinx_javascript_frameworks_compat.js"></script>
-    <script src="../../_static/doctools.js"></script>
-    <script src="../../_static/clipboard.min.js"></script>
-    <script src="../../_static/copybutton.js"></script>
-    <script src="../../_static/scripts/sphinx-book-theme.js?digest=5a5c038af52cf7bc1a1ec88eea08e6366ee68824"></script>
-    <script>let toggleHintShow = 'Click to show';</script>
-    <script>let toggleHintHide = 'Click to hide';</script>
-    <script>let toggleOpenOnPrint = 'true';</script>
-    <script src="../../_static/togglebutton.js"></script>
-    <script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
-    <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
-    <script src="../../_static/design-tabs.js"></script>
-    <script>const THEBE_JS_URL = "https://unpkg.com/thebe@0.8.2/lib/index.js"
-const thebe_selector = ".thebe,.cell"
-const thebe_selector_input = "pre"
-const thebe_selector_output = ".output, .cell_output"
-</script>
-    <script async="async" src="../../_static/sphinx-thebe.js"></script>
-    <script>window.MathJax = {"options": {"processHtmlClass": "tex2jax_process|mathjax_process|math|output_area"}}</script>
-    <script defer="defer" src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
-    <script>DOCUMENTATION_OPTIONS.pagename = 'docs/review/diffusion_beats_GANs';</script>
-    <link rel="shortcut icon" href="../../_static/PseudoLab_logo.png"/>
-    <link rel="index" title="Index" href="../../genindex.html" />
-    <link rel="search" title="Search" href="../../search.html" />
-    <link rel="next" title="DALL-E" href="dalle.html" />
-    <link rel="prev" title="StyleGAN" href="StyleGAN.html" />
-  <meta name="viewport" content="width=device-width, initial-scale=1"/>
-  <meta name="docsearch:language" content="en"/>
-  </head>
-  
-  
-  <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
-
-  
-  
-  <div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
-  
-  <div id="pst-scroll-pixel-helper"></div>
-  
-  <button type="button" class="btn rounded-pill" id="pst-back-to-top">
-    <i class="fa-solid fa-arrow-up"></i>Back to top</button>
-
-  
-  <input type="checkbox"
-          class="sidebar-toggle"
-          id="pst-primary-sidebar-checkbox"/>
-  <label class="overlay overlay-primary" for="pst-primary-sidebar-checkbox"></label>
-  
-  <input type="checkbox"
-          class="sidebar-toggle"
-          id="pst-secondary-sidebar-checkbox"/>
-  <label class="overlay overlay-secondary" for="pst-secondary-sidebar-checkbox"></label>
-  
-  <div class="search-button__wrapper">
-    <div class="search-button__overlay"></div>
-    <div class="search-button__search-container">
-<form class="bd-search d-flex align-items-center"
-      action="../../search.html"
-      method="get">
-  <i class="fa-solid fa-magnifying-glass"></i>
-  <input type="search"
-         class="form-control"
-         name="q"
-         id="search-input"
-         placeholder="Search this book..."
-         aria-label="Search this book..."
-         autocomplete="off"
-         autocorrect="off"
-         autocapitalize="off"
-         spellcheck="false"/>
-  <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
-</form></div>
-  </div>
-
-  <div class="pst-async-banner-revealer d-none">
-  <aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
-</div>
-
-  
-    <header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
-    </header>
-  
-
-  <div class="bd-container">
-    <div class="bd-container__inner bd-page-width">
-      
-      
-      
-      <div class="bd-sidebar-primary bd-sidebar">
-        
-
-  
-  <div class="sidebar-header-items sidebar-primary__section">
-    
-    
-    
-    
-  </div>
-  
-    <div class="sidebar-primary-items__start sidebar-primary__section">
-        <div class="sidebar-primary-item">
-
-  
-    
-  
-
-<a class="navbar-brand logo" href="../../intro.html">
-  
-  
-  
-  
-  
-    
-    
-      
-    
-    
-    <img src="../../_static/PseudoLab_logo.png" class="logo__image only-light" alt="Text-to-Image Generation-feat-Diffusion - Home"/>
-    <script>document.write(`<img src="../../_static/PseudoLab_logo.png" class="logo__image only-dark" alt="Text-to-Image Generation-feat-Diffusion - Home"/>`);</script>
-  
-  
-</a></div>
-        <div class="sidebar-primary-item"><nav class="bd-links" id="bd-docs-nav" aria-label="Main">
-    <div class="bd-toc-item navbar-nav active">
-        
-        <ul class="nav bd-sidenav bd-sidenav__home-link">
-            <li class="toctree-l1">
-                <a class="reference internal" href="../../intro.html">
-                    Welcome to PseudoDiffusers!!
-                </a>
-            </li>
-        </ul>
-        <p aria-level="2" class="caption" role="heading"><span class="caption-text">Preliminary Works</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="vae.html">VAE</a></li>
-<li class="toctree-l1"><a class="reference internal" href="gan.html">GAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DDPM.html">DDPM</a></li>
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="DDIM.html">DDIM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="A_Study_on_the_Evaluation_of_Generative_Models.html">A Study on the Evaluation of Generative Models</a></li>
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Image Generation</span></p>
-<ul class="current nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="cycleGAN.html">CycleGAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="StyleGAN.html">StyleGAN</a></li>
-<li class="toctree-l1 current active"><a class="current reference internal" href="#">Diffusion Models Beat GANs on Image Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="dalle.html">DALL-E</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DALLE2.html">DALL-E 2</a></li>
-<li class="toctree-l1"><a class="reference internal" href="dreambooth.html">DreamBooth</a></li>
-<li class="toctree-l1"><a class="reference internal" href="ControlNet.html">ControlNet</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Latent_Diffusion_Model.html">Introduction</a></li>
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="Textual_Inversion.html">Textual Inversion</a></li>
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="CustomDiffusion.html">Custom Diffusion</a></li>
-<li class="toctree-l1"><a class="reference internal" href="LoRA.html">LoRA</a></li>
-
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="I-DDPM.html">I-DDPM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="StyO.html">StyO</a></li>
-<li class="toctree-l1"><a class="reference internal" href="imagen.html">Imagen</a></li>
-<li class="toctree-l1"><a class="reference internal" href="imagen_editor.html">Imagen Editor</a></li>
-<li class="toctree-l1"><a class="reference internal" href="SDEdit.html">SDEdit</a></li>
-<li class="toctree-l1"><a class="reference internal" href="SDXL.html">SDXL</a></li>
-<li class="toctree-l1"><a class="reference internal" href="t2i_adapter.html">T2I-Adapter</a></li>
-<li class="toctree-l1"><a class="reference internal" href="IP_Adapter.html">IP-Adapter</a></li>
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="HyperDreamBooth.html">HyperDreamBooth</a></li>
-<li class="toctree-l1"><a class="reference internal" href="CM3leon.html">CM3leon</a></li>
-
-<li class="toctree-l1"><a class="reference internal" href="Synthetic_Data_from_Diffusion_Models_Improves_ImageNet_Classification.html">Synthetic Data from Diffusion Models Improves ImageNet Classification</a></li>
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="GLIDE.html">GLIDE</a></li>
-<li class="toctree-l1"><a class="reference internal" href="BBDM.html">BBDM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Your_Diffusion_Model_is_Secretly_a_Zero_Shot_Classifier.html">Your Diffusion Model is Secretly a Zero-Shot Classifier</a></li>
-<li class="toctree-l1"><a class="reference internal" href="progressive_distillation.html">Progressive Distillation for Fast Sampling of Diffusion Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="ConceptLab.html">ConceptLab</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Diffusion_models_already_have_a_Semantic_Latent_Space.html">Diffusion Models already have a Semantic Latent Space</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Muse.html">Muse</a></li>
-
-
-<li class="toctree-l1"><a class="reference internal" href="GIGAGAN.html">Scaling up GANs for Text-to-Image Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="consistency_models.html">Consistency Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="latent_consistency_models.html">Latent Consistency Models</a></li>
-
-<li class="toctree-l1"><a class="reference internal" href="LLM_grounded_Diffusion.html">LLM Grounded Diffusion</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DiT.html">DiT</a></li>
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="one-step-image-translation.html">One-Step Image Translation with Text-to-Image Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="LCM-LoRA.html">LCM-LoRA: A Universal Stable-Diffusion Acceleration Module</a></li>
-
-
-<li class="toctree-l1"><a class="reference internal" href="MimicBrush.html">MimicBrush: Zero-shot Image Editing with Reference Imitation</a></li>
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Video Generation</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="Make_A_Video.html">Make A Video</a></li>
-<li class="toctree-l1"><a class="reference internal" href="VideoLDM.html">VideoLDM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="AnimateDiff.html">AnimateDiff</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Animate_Anyone.html">Animate Anyone</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreaMoving.html">DreaMoving</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreamPose.html">DreamPose: Fashion Image-to-Video Synthesis via Stable Diffusion</a></li>
-
-
-
-
-
-
-
-
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">3D Generation</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="NeRF.html">NeRF : Representing Scenes as Neural Radiance Fields for View Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="3DGS.html">3D Gaussian Splatting for Real-Time Radiance Field Rendering</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Point_E.html">Point-E: A System for Generating 3D Point Clouds from Complex Prompts (Arxiv 2022)</a></li>
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="Shap-E.html">Shap-E</a></li>
-
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="DreamFusion.html"><strong>DreamFusion</strong></a></li>
-<li class="toctree-l1"><a class="reference internal" href="magic-3d.html">Magic3D</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreamBooth3D.html">Dream Booth 3D</a></li>
-
-
-
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Experiments</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="../experiments/js_exp.html">Synthetic Data with Stable Diffusion for Foliar Disease Classification</a></li>
-<li class="toctree-l1"><a class="reference internal" href="../experiments/swjo_exp.html">Training DreamBooth on Naver Webtoon Face Dataset</a></li>
-</ul>
-
-    </div>
-</nav></div>
-    </div>
-  
-  
-  <div class="sidebar-primary-items__end sidebar-primary__section">
-  </div>
-  
-  <div id="rtd-footer-container"></div>
-
-
-      </div>
-      
-      <main id="main-content" class="bd-main" role="main">
-        
-        
-
-<div class="sbt-scroll-pixel-helper"></div>
-
-          <div class="bd-content">
-            <div class="bd-article-container">
-              
-              <div class="bd-header-article d-print-none">
-<div class="header-article-items header-article__inner">
-  
-    <div class="header-article-items__start">
-      
-        <div class="header-article-item"><label class="sidebar-toggle primary-toggle btn btn-sm" for="__primary" title="Toggle primary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
-  <span class="fa-solid fa-bars"></span>
-</label></div>
-      
-    </div>
-  
-  
-    <div class="header-article-items__end">
-      
-        <div class="header-article-item">
-
-<div class="article-header-buttons">
-
-
-
-
-
-<div class="dropdown dropdown-source-buttons">
-  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Source repositories">
-    <i class="fab fa-github"></i>
-  </button>
-  <ul class="dropdown-menu">
-      
-      
-      
-      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion" target="_blank"
-   class="btn btn-sm btn-source-repository-button dropdown-item"
-   title="Source repository"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fab fa-github"></i>
-  </span>
-<span class="btn__text-container">Repository</span>
-</a>
-</li>
-      
-      
-      
-      
-      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion/issues/new?title=Issue%20on%20page%20%2Fdocs/review/diffusion_beats_GANs.html&body=Your%20issue%20content%20here." target="_blank"
-   class="btn btn-sm btn-source-issues-button dropdown-item"
-   title="Open an issue"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-lightbulb"></i>
-  </span>
-<span class="btn__text-container">Open issue</span>
-</a>
-</li>
-      
-  </ul>
-</div>
-
-
-
-
-
-
-<div class="dropdown dropdown-download-buttons">
-  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Download this page">
-    <i class="fas fa-download"></i>
-  </button>
-  <ul class="dropdown-menu">
-      
-      
-      
-      <li><a href="../../_sources/docs/review/diffusion_beats_GANs.md" target="_blank"
-   class="btn btn-sm btn-download-source-button dropdown-item"
-   title="Download source file"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-file"></i>
-  </span>
-<span class="btn__text-container">.md</span>
-</a>
-</li>
-      
-      
-      
-      
-      <li>
-<button onclick="window.print()"
-  class="btn btn-sm btn-download-pdf-button dropdown-item"
-  title="Print to PDF"
-  data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-file-pdf"></i>
-  </span>
-<span class="btn__text-container">.pdf</span>
-</button>
-</li>
-      
-  </ul>
-</div>
-
-
-
-
-<button onclick="toggleFullScreen()"
-  class="btn btn-sm btn-fullscreen-button"
-  title="Fullscreen mode"
-  data-bs-placement="bottom" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-expand"></i>
-  </span>
-
-</button>
-
-
-
-<script>
-document.write(`
-  <button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light"></i>
-    <i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark"></i>
-    <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"></i>
-  </button>
-`);
-</script>
-
-
-<script>
-document.write(`
-  <button class="btn btn-sm pst-navbar-icon search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <i class="fa-solid fa-magnifying-glass fa-lg"></i>
-  </button>
-`);
-</script>
-<label class="sidebar-toggle secondary-toggle btn btn-sm" for="__secondary"title="Toggle secondary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <span class="fa-solid fa-list"></span>
-</label>
-</div></div>
-      
-    </div>
-  
-</div>
-</div>
-              
-              
-
-<div id="jb-print-docs-body" class="onlyprint">
-    <h1>Diffusion Models Beat GANs on Image Synthesis</h1>
-    <!-- Table of contents -->
-    <div id="print-main-content">
-        <div id="jb-print-toc">
-            
-            <div>
-                <h2> Contents </h2>
-            </div>
-            <nav aria-label="Page">
-                <ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#abstract">Abstract</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">1. Introduction</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#background">2. Background</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#ddpm">DDPM</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#improved-ddpm">Improved DDPM</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#ddim">DDIM</a></li>
-</ul>
-</li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#architectural-improvements">3. Architectural Improvements</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#adaptive-group-normalization">4. Adaptive Group Normalization</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#classifier-guidance">5. Classifier Guidance</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#algorithm">6. Algorithm</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#impact-of-parameter-s-in-classifier-guidance">7. Impact of parameter s in classifier guidance</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#results">8. Results</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#image-synthesis-results">8-2. Image Synthesis Results</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#limitation-and-future-work">9. Limitation and Future Work</a></li>
-</ul>
-            </nav>
-        </div>
-    </div>
-</div>
-
-              
-                
-<div id="searchbox"></div>
-                <article class="bd-article">
-                  
-  <div class="admonition-information admonition">
-<p class="admonition-title">Information</p>
-<ul class="simple">
-<li><p><strong>Title:</strong> Diffusion Models Beat GANs on Image Synthesis (NeurIPS 2021)</p></li>
-<li><p><strong>Reference</strong></p>
-<ul>
-<li><p>Paper:  <a class="reference external" href="https://arxiv.org/abs/2105.05233">https://arxiv.org/abs/2105.05233</a></p></li>
-<li><p>Code: <a class="reference external" href="https://github.com/openai/guided-diffusion">Official</a></p></li>
-</ul>
-</li>
-<li><p><strong>Author:</strong> Donggeun Sean Ko</p></li>
-<li><p><strong>Last updated on May. 17, 2023</strong></p></li>
-</ul>
-</div>
-<section class="tex2jax_ignore mathjax_ignore" id="diffusion-models-beat-gans-on-image-synthesis">
-<h1>Diffusion Models Beat GANs on Image Synthesis<a class="headerlink" href="#diffusion-models-beat-gans-on-image-synthesis" title="Permalink to this heading">#</a></h1>
-<section id="abstract">
-<h2>Abstract<a class="headerlink" href="#abstract" title="Permalink to this heading">#</a></h2>
-<ul class="simple">
-<li><p>Diffusion 모델들은 기존 unconditional 이미지 생성 모델들의 SOTA를 뛰어넘음.</p></li>
-<li><p>Conditional image synthesis 부분에서도 classifier guidance를 활용해 diffusion model을 활용하여 좋은 성능을 보여준다고 주장함.</p></li>
-<li><p>Classifier guidance를 활용해 diversity와 fidelity의 trade-off에 대해서도 분석</p></li>
-</ul>
-</section>
-<section id="introduction">
-<h2>1. Introduction<a class="headerlink" href="#introduction" title="Permalink to this heading">#</a></h2>
-<ul class="simple">
-<li><p>Diffusion 모델들은 likelihood-based model들이며 고화질 이미지를 생성해내는데에 성공 했음.</p></li>
-<li><p>하지만, FID 수치는 BigGAN-deep에 비해 낮으며, 개선사항이 필요함.</p></li>
-<li><p>두가지 contribution을 통해 Diffusion Model들의 성능을 끌어올리며 FID 결과 수치를 낮추겠다고 주장.</p>
-<ul>
-<li><p>모델 아키텍쳐 개선</p></li>
-<li><p>Classifier Guidance</p></li>
-</ul>
-</li>
-</ul>
-</section>
-<section id="background">
-<h2>2. Background<a class="headerlink" href="#background" title="Permalink to this heading">#</a></h2>
-<ul class="simple">
-<li><p>DDPM, DDIM, Improved DDPM은 이전에 설명되있으므로, 각 background 논문들의 핵심 부분만 설명하겠습니다.</p></li>
-<li></li>
-</ul>
-<section id="ddpm">
-<h3>DDPM<a class="headerlink" href="#ddpm" title="Permalink to this heading">#</a></h3>
-<ul class="simple">
-<li><p><span class="math notranslate nohighlight">\(p_\theta(x_{t-1}|x_t)\)</span>은 <span class="math notranslate nohighlight">\(q(x_{t-1}|x_t)\)</span>의 근사값이라고 가정하며 계산한다.
-- <span class="math notranslate nohighlight">\(p_\theta(x_{t-1}|x_t)\)</span>를 학습하여 <span class="math notranslate nohighlight">\(p_\theta(x_{t-1}|x_t) \approx\)</span> <span class="math notranslate nohighlight">\(q(x_{t-1}|x_t)\)</span>를 만든다.</p></li>
-<li><p><span class="math notranslate nohighlight">\(\epsilon_\theta(x_t,t)\)</span> 을 모델링하여  <strong>noise</strong>를 예측한다.</p></li>
-<li><p>공분산 <span class="math notranslate nohighlight">\(\Sigma_\theta(X_t,t)\)</span>은 학습 불가능한 매개변수로 설정되며 constant 값을 가진다.</p></li>
-<li><p>아래와 같이 <span class="math notranslate nohighlight">\(L_{simple}\)</span> 을 새로운 Loss function으로 제안한다.</p></li>
-</ul>
-<figure class="align-default" id="id1">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/ddpm_pipeline.png"><img alt="ddpm_pipeline" class="bg-primary mb-1" src="../../_images/ddpm_pipeline.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 70 </span><span class="caption-text">DDPM Pipeline</span><a class="headerlink" href="#id1" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<figure class="align-default" id="id2">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/DDPM_eq.png"><img alt="ddpm_eq" class="bg-primary mb-1" src="../../_images/DDPM_eq.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 71 </span><span class="caption-text">DDPM Equation</span><a class="headerlink" href="#id2" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</section>
-<section id="improved-ddpm">
-<h3>Improved DDPM<a class="headerlink" href="#improved-ddpm" title="Permalink to this heading">#</a></h3>
-<figure class="align-default" id="id3">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/improved_ddpm_pic.png"><img alt="improved_ddpm_pic" class="bg-primary mb-1" src="../../_images/improved_ddpm_pic.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 72 </span><span class="caption-text">Improved DDPM scheduling comparison with DDPM (Linear vs Cosine)</span><a class="headerlink" href="#id3" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p>더 적은 diffusion step으로 샘플링 함.</p></li>
-<li><p>Competitive log-likelihood 지표 성능 개선 (전 DDPM에선 log-likelihood 지표가 상대적으로 GAN 모델의 비해 낮았다)</p></li>
-<li><p>전 DDPM 논문에서는 linear scheduling을 사용했지만, 본 논문에서는 cosine scheduling을 사용해서 성능 향상을 했다고 주장했다.</p></li>
-<li><p>분산  <span class="math notranslate nohighlight">\(\Sigma_\theta(X_t,t)\)</span>을 학습에도 활용</p></li>
-<li><p><span class="math notranslate nohighlight">\(L_{hybrid}\)</span>라는 새로운 loss 함수 제시</p></li>
-</ul>
-<figure class="align-default" id="id4">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/improved_ddpm_eq.png"><img alt="improved_ddpm_eq" class="bg-primary mb-1" src="../../_images/improved_ddpm_eq.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 73 </span><span class="caption-text">Improved DDPM Equation</span><a class="headerlink" href="#id4" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</section>
-<section id="ddim">
-<h3>DDIM<a class="headerlink" href="#ddim" title="Permalink to this heading">#</a></h3>
-<figure class="align-default" id="id5">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/ddim_pipe.png"><img alt="ddim_pipe" class="bg-primary mb-1" src="../../_images/ddim_pipe.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 74 </span><span class="caption-text">DDIM Pipeline</span><a class="headerlink" href="#id5" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p>Markovian Chain Process를 끊고 Non-Markovian 형태로 Deterministic 하게 수식을 바꿈</p></li>
-<li><p>DDPM 보다 더 적은 iteration으로 image synthesis 가능</p></li>
-</ul>
-<figure class="align-default" id="id6">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/DDIM_pic.png"><img alt="ddim_pic" class="bg-primary mb-1" src="../../_images/DDIM_pic.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 75 </span><span class="caption-text">DDIM Sampling Equation</span><a class="headerlink" href="#id6" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</section>
-</section>
-<section id="architectural-improvements">
-<h2>3. Architectural Improvements<a class="headerlink" href="#architectural-improvements" title="Permalink to this heading">#</a></h2>
-<ul class="simple">
-<li><p>DDPM에서 사용한 architecture을 그대로 채택했지만, 다양한 ablation 및 parameter을 변경하여 제일 높은 성능이 나오는 architecture을 설명 및 채택함</p></li>
-<li><p>모델 크기를 일정하게 가져가면서 Depth vs Width 증가 보기</p></li>
-<li><p>Attention head 수 증가 시켜보기</p></li>
-<li><p>각 Attention head에 resolution 을 8x8, 16x16, 32x32 로 실험 해보기</p></li>
-<li><p>일반 ResNet Residual Block이 아닌 BigGAN의 residual block을 채택하여 upsampling / downsampling 사용 해보기</p></li>
-<li><p>Residual Connection을 1/√2 로 rescaling 해보기</p></li>
-</ul>
-<figure class="align-default" id="id7">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/architect_1.png"><img alt="architect_1" class="bg-primary mb-1" src="../../_images/architect_1.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 76 </span><span class="caption-text">Table 1: Ablation of various architecture changes</span><a class="headerlink" href="#id7" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<figure class="align-default" id="id8">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/architect_2.png"><img alt="architect_2" class="bg-primary mb-1" src="../../_images/architect_2.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 77 </span><span class="caption-text">Table 2: Ablation of various attention configurations. Attention head 가 32일때 FID 값이 제일 낮다 (좋다)</span><a class="headerlink" href="#id8" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>** 3-1. Best Architecture **</p>
-<ul class="simple">
-<li><p>Channel 수 160</p></li>
-<li><p>Depth 2</p></li>
-<li><p>number of Attention Head = 4</p></li>
-<li><p>Attention Resolution을 32, 16, 8 로 block마다 줄이기</p></li>
-<li><p>BigGAN residual block 채택</p></li>
-<li><p>Rescaling X</p></li>
-<li><p>위와 같은 parameter를 통해 제일 좋은 FID 결과가 나옴</p></li>
-</ul>
-<figure class="align-default" id="id9">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/architect_3.png"><img alt="architect_3" class="bg-primary mb-1" src="../../_images/architect_3.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 78 </span><span class="caption-text">Table 3: 다양한 parameter 튜닝을 통한 제일 좋은 FID 성능 테이블</span><a class="headerlink" href="#id9" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</section>
-<section id="adaptive-group-normalization">
-<h2>4. Adaptive Group Normalization<a class="headerlink" href="#adaptive-group-normalization" title="Permalink to this heading">#</a></h2>
-<ul class="simple">
-<li><p>본 저자들은 AdaIN이랑 비슷한 방식으로 연산하는 AdaGN 이라는 것을 소개했다. (원래 있는 방법론인지는 모르겠다…)</p></li>
-<li><p>Group Normalization을 adpative하게 하는 방법으로 Group Normalization 후에 residual block에 time step embedding과 class embedding을 AdaIN 방식으로 곱하고 더함</p></li>
-</ul>
-<p>Equation</p>
-<div class="math notranslate nohighlight">
-\[AdaIN(x,y) = \sigma(y)(\frac{x-\mu(x)}{\sigma(x)})+\mu(y)\]</div>
-<div class="math notranslate nohighlight">
-\[AdaGN(h,y) = y_s + GroupNorm(h) + y_b\]</div>
-<p>where <span class="math notranslate nohighlight">\(h =\)</span> residual block and <span class="math notranslate nohighlight">\(y = [y_s,y_b]\)</span> time-step embedding and class embedding’s linear projection respectively</p>
-<p><strong>4-1 AdaGN의 성능</strong></p>
-<figure class="align-default" id="id10">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/adagn_table.png"><img alt="adagn_table" class="bg-primary mb-1" src="../../_images/adagn_table.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 79 </span><span class="caption-text">AdaGN과 Additon+GroupNorm 비교 테이블. DDPM에서 사용한 normalization보다 더 좋은 성능을 보여주고 있음.</span><a class="headerlink" href="#id10" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p>기존 DDPM은 Addition + GroupNorm layer을 사용했는데, AdaGN 을 사용하는 것이 FID가 더 낮게 (즉 더 좋은 성능) 나온 것을 볼 수 있다</p></li>
-</ul>
-</section>
-<section id="classifier-guidance">
-<h2>5. Classifier Guidance<a class="headerlink" href="#classifier-guidance" title="Permalink to this heading">#</a></h2>
-<ul class="simple">
-<li><p>본 논문의 주 contribution 중 하나가 classifier guidance를 사용했다는 점이다.</p></li>
-<li><p>unconditional de-noising process에서 label y를 condition으로 줌으로써 conditional de-noising process로 진행</p></li>
-</ul>
-<p>Equation
-$<span class="math notranslate nohighlight">\(p_{\theta, \phi }(x_t|x_{t+1},y) = Zp_\theta(x_t|x_{t+1})p_\phi(y|x_t)\)</span>$</p>
-<ul class="simple">
-<li><p>Z 는 normalizing을 위한 상수 이다</p></li>
-</ul>
-<p><strong>5-1 Classifier Guidance 유도</strong></p>
-<p><span class="math notranslate nohighlight">\(log_\phi p(y|x_t)\)</span>가 <span class="math notranslate nohighlight">\(\Sigma^-1\)</span> 에 비해 곡률이 낮으며, 이 가정을 따라, diffusion step이 무한으로 갈 시, <span class="math notranslate nohighlight">\(||\Sigma^ || \rightarrow0\)</span> 이므로,<span class="math notranslate nohighlight">\(log_\phi p(y|x_t)\)</span>가 테일러 급수를 활용하여 식을 <span class="math notranslate nohighlight">\(x_t = \mu\)</span> 로 재전개 할 수 있다.</p>
-<ul class="simple">
-<li><p>classifier의 gradient를 활용해서 학습을 같이 해준다.</p></li>
-<li><p>식 유도는 아래와 같다. 본문의 (3) ~ (10) 번식이므로 본 논문을 참고하면 좋다.</p></li>
-</ul>
-<figure class="align-default" id="id11">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/class_eq1.png"><img alt="class_eq1" class="bg-primary mb-1" src="../../_images/class_eq1.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 80 </span><span class="caption-text">Classifier Guidance 유도 식 1,2</span><a class="headerlink" href="#id11" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<figure class="align-default" id="id12">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/class_eq2.png"><img alt="classifier_2" class="bg-primary mb-1" src="../../_images/class_eq2.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 81 </span><span class="caption-text">Classifier Guidance 유도 식 3~7</span><a class="headerlink" href="#id12" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</section>
-<section id="algorithm">
-<h2>6. Algorithm<a class="headerlink" href="#algorithm" title="Permalink to this heading">#</a></h2>
-<figure class="align-default" id="id13">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/algorithm.png"><img alt="algorithm" class="bg-primary mb-1" src="../../_images/algorithm.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 82 </span><span class="caption-text">Algorithm 1 &amp; 2 sampling method. Algorithm 1은 일반적인 DDPM 기준, Algorithm 2는 DDIM 기준 guidance 한 sampling 방법</span><a class="headerlink" href="#id13" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p>Algorithm 1 은 일반 DDPM에서 샘플링 하는 방법이다. 똑같이 Gaussian distribution에서 샘플링 할 시, classifier의 gradient를 활용하여 <span class="math notranslate nohighlight">\(x_{t-1}\)</span>를 sample한다.</p></li>
-<li><p>Algorithm 2 는 DDIM에서 샘플링 하는 방법이다. <span class="math notranslate nohighlight">\(\epsilon\)</span> 모델에서 나오는 output과 classifier의 gradient의 joint distribution 값을 빼 score을 구한다.</p></li>
-<li><p>DDIM은 Deterministic하기때문에 모든 시점의 값을 모두 계산할 필요 없이 subset의 시점만으로 sampling이 가능하다.</p></li>
-<li><p>이 Accelerating method는 약간의 quality 저하가 있지만 Computational efficiency를 충분히 증가시킬 수 있다.</p></li>
-<li><p><strong>DDIM 방식의 재학습 없이 DDPM의 training에 DDIM의 sampling이 가능하다.</strong></p></li>
-</ul>
-</section>
-<section id="impact-of-parameter-s-in-classifier-guidance">
-<h2>7. Impact of parameter s in classifier guidance<a class="headerlink" href="#impact-of-parameter-s-in-classifier-guidance" title="Permalink to this heading">#</a></h2>
-<figure class="align-default" id="id14">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/classifier_guidance_vis.png"><img alt="class_guidance_vis" class="bg-primary mb-1" src="../../_images/classifier_guidance_vis.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 83 </span><span class="caption-text">Classifier Guidance scaling의 영향 시각화</span><a class="headerlink" href="#id14" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p>classifier guidance 앞에 hyperparameter \bf{s} 의 값에 따라 classifier가 줄 수 있는 scaling이 다르다.</p></li>
-<li><p>scale을 1.0으로 주면 웰시코기라는 class의 scale 영향을 덜 받아 “웰시코기스러운” 강아지가 생성이 많이 되지는 않는다.</p></li>
-<li><p>scale을 10.0으로 주면 웰시코기 class라는 scaling의 영향을 많이 받아 웰시코기 분위기의 강아지의 이미지가 더 많이 생성 되는 것을 볼 수 있다.</p></li>
-<li><p>epsilon이라는 모델이 결국 scale에 따라 gradient의 영향을 얼마나 많이 받는지 sampling할 때 볼 수 있다.</p></li>
-</ul>
-</section>
-<section id="results">
-<h2>8. Results<a class="headerlink" href="#results" title="Permalink to this heading">#</a></h2>
-<figure class="align-default" id="id15">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/plot_result.png"><img alt="plot result" class="bg-primary mb-1" src="../../_images/plot_result.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 84 </span><span class="caption-text">Fidelity vs Diversity Trade-off 결과</span><a class="headerlink" href="#id15" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p>gradient scale이 높을수록 recall은 낮지만, precision은 높다. 즉 trade-off 가 생기는데, recall이 낮을수록 diveristy가 낮다는 의미이고, precision이 높을수록 fidelity가 높다는 뜻이다.</p></li>
-<li><p>scale을 높일수록 다양한 이미지가 생성되는 것이 아닌, classifier가 준 label쪽으로 guide가 생기므로 일정한 class의 사진이 나온다.</p></li>
-<li><p>FID와 sFID는 diversity와 fidelity의 trade-off로 도출되는 값이므로, 최고의 값은 중간 지점에서 나왔다.</p></li>
-</ul>
-<p><strong>8-1. Result Table</strong></p>
-<ul class="simple">
-<li><p>ADM은 Ablated Diffusion Model의 약자이며, ADM-G는 Ablated Diffusion Model with Guidance의 약자이다.</p></li>
-<li><p>Guidance를 주었을 시 제일 좋은 FID값이 나왔으며, Precision이 높을수록, Recall이 낮게 나왔다 (and vice versa).</p></li>
-</ul>
-</section>
-<section id="image-synthesis-results">
-<h2>8-2. Image Synthesis Results<a class="headerlink" href="#image-synthesis-results" title="Permalink to this heading">#</a></h2>
-<figure class="align-default" id="id16">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_results.png"><img alt="img_results" class="bg-primary mb-1" src="../../_images/img_results.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 85 </span><span class="caption-text">Generated Images (Left: BigGAN, Center: DMs, Right: Train Dataset)</span><a class="headerlink" href="#id16" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p>두번쨰 플라밍고 생성된 사진을 볼때, BigGAN은 이미지간들의 diversity가 없다. 학습된 플라밍고가 다수 플라밍고 시 비슷한 느낌의 이미지만 뽑아낸다.</p></li>
-<li><p>반면, Diffusion model with guidance를 사용했을 시, 다채로운 플라밍고 사진을 볼 수 있다. 한마리만 있는 플라밍고 사진도 뽑아 낼 수 있다.</p></li>
-</ul>
-</section>
-<section id="limitation-and-future-work">
-<h2>9. Limitation and Future Work<a class="headerlink" href="#limitation-and-future-work" title="Permalink to this heading">#</a></h2>
-<p><strong>Limitation 1</strong></p>
-<ul class="simple">
-<li><p>Diffusion 모델들은 GAN보다 샘플링 시간이 아직 느리다.</p></li>
-</ul>
-<p><strong>Future Work 1</strong></p>
-<ul class="simple">
-<li><p>DDIM의 sampling process를 distillation 해서 빠르게 하는 법을 고려</p></li>
-</ul>
-<p><strong>Limitation 2</strong></p>
-<ul class="simple">
-<li><p>Classifier guidance는 classification function의 gradient를 사용함으로써, label이 없는 data에는 확장이 불가능하다.</p></li>
-</ul>
-<p><strong>Future Work 2</strong></p>
-<ul class="simple">
-<li><p>Unlabeled sample을 clustering 하는 방법을 통해 방법론을 expand 하려 한다.</p></li>
-</ul>
-</section>
-</section>
-
-    <script type="text/x-thebe-config">
-    {
-        requestKernel: true,
-        binderOptions: {
-            repo: "binder-examples/jupyter-stacks-datascience",
-            ref: "master",
-        },
-        codeMirrorConfig: {
-            theme: "abcdef",
-            mode: "python"
-        },
-        kernelOptions: {
-            name: "python3",
-            path: "./docs/review"
-        },
-        predefinedOutput: true
-    }
-    </script>
-    <script>kernelName = 'python3'</script>
-
-                </article>
-              
-
-              
-              
-              
-              
-                <footer class="prev-next-footer d-print-none">
-                  
-<div class="prev-next-area">
-    <a class="left-prev"
-       href="StyleGAN.html"
-       title="previous page">
-      <i class="fa-solid fa-angle-left"></i>
-      <div class="prev-next-info">
-        <p class="prev-next-subtitle">previous</p>
-        <p class="prev-next-title">StyleGAN</p>
-      </div>
-    </a>
-    <a class="right-next"
-       href="dalle.html"
-       title="next page">
-      <div class="prev-next-info">
-        <p class="prev-next-subtitle">next</p>
-        <p class="prev-next-title">DALL-E</p>
-      </div>
-      <i class="fa-solid fa-angle-right"></i>
-    </a>
-</div>
-                </footer>
-              
-            </div>
-            
-            
-              
-                <div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
-
-
-  <div class="sidebar-secondary-item">
-  <div class="page-toc tocsection onthispage">
-    <i class="fa-solid fa-list"></i> Contents
-  </div>
-  <nav class="bd-toc-nav page-toc">
-    <ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#abstract">Abstract</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">1. Introduction</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#background">2. Background</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#ddpm">DDPM</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#improved-ddpm">Improved DDPM</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#ddim">DDIM</a></li>
-</ul>
-</li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#architectural-improvements">3. Architectural Improvements</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#adaptive-group-normalization">4. Adaptive Group Normalization</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#classifier-guidance">5. Classifier Guidance</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#algorithm">6. Algorithm</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#impact-of-parameter-s-in-classifier-guidance">7. Impact of parameter s in classifier guidance</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#results">8. Results</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#image-synthesis-results">8-2. Image Synthesis Results</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#limitation-and-future-work">9. Limitation and Future Work</a></li>
-</ul>
-  </nav></div>
-
-</div></div>
-              
-            
-          </div>
-          <footer class="bd-footer-content">
-            
-<div class="bd-footer-content__inner container">
-  
-  <div class="footer-item">
-    
-<p class="component-author">
-By PseudoLab
-</p>
-
-  </div>
-  
-  <div class="footer-item">
-    
-
-  <p class="copyright">
-    
-      © Copyright 2022.
-      <br/>
-    
-  </p>
-
-  </div>
-  
-  <div class="footer-item">
-    
-  </div>
-  
-  <div class="footer-item">
-    
-  </div>
-  
-</div>
-          </footer>
-        
-
-      </main>
-    </div>
-  </div>
-  
-  <!-- Scripts loaded after <body> so the DOM is not blocked -->
-  <script src="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b"></script>
-<script src="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b"></script>
-
-  <footer class="bd-footer">
-  </footer>
-  </body>
+
+
+<!DOCTYPE html>
+
+
+<html lang="en" data-content_root="" >
+
+  <head>
+    <meta charset="utf-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.17.1: http://docutils.sourceforge.net/" />
+
+    <title>Diffusion Models Beat GANs on Image Synthesis &#8212; Text-to-Image Generation-feat-Diffusion</title>
+  
+  
+  
+  <script data-cfasync="false">
+    document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
+    document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
+  </script>
+  
+  <!-- Loaded before other Sphinx assets -->
+  <link href="../../_static/styles/theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+<link href="../../_static/styles/bootstrap.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+<link href="../../_static/styles/pydata-sphinx-theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+
+  
+  <link href="../../_static/vendor/fontawesome/6.5.2/css/all.min.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+  <link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.woff2" />
+<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.woff2" />
+<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.woff2" />
+
+    <link rel="stylesheet" type="text/css" href="../../_static/pygments.css" />
+    <link rel="stylesheet" href="../../_static/styles/sphinx-book-theme.css?digest=14f4ca6b54d191a8c7657f6c759bf11a5fb86285" type="text/css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/togglebutton.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/mystnb.4510f1fc1dee50b3e5859aac5469c37c29e427902b24a333a5f9fcb2f0b3ac41.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/sphinx-thebe.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/design-style.4045f2051d55cab465a707391d5b2007.min.css" />
+  
+  <!-- Pre-loaded scripts that we'll load fully later -->
+  <link rel="preload" as="script" href="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b" />
+<link rel="preload" as="script" href="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b" />
+  <script src="../../_static/vendor/fontawesome/6.5.2/js/all.min.js?digest=dfe6caa3a7d634c4db9b"></script>
+
+    <script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
+    <script src="../../_static/jquery.js"></script>
+    <script src="../../_static/underscore.js"></script>
+    <script src="../../_static/_sphinx_javascript_frameworks_compat.js"></script>
+    <script src="../../_static/doctools.js"></script>
+    <script src="../../_static/clipboard.min.js"></script>
+    <script src="../../_static/copybutton.js"></script>
+    <script src="../../_static/scripts/sphinx-book-theme.js?digest=5a5c038af52cf7bc1a1ec88eea08e6366ee68824"></script>
+    <script>let toggleHintShow = 'Click to show';</script>
+    <script>let toggleHintHide = 'Click to hide';</script>
+    <script>let toggleOpenOnPrint = 'true';</script>
+    <script src="../../_static/togglebutton.js"></script>
+    <script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
+    <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
+    <script src="../../_static/design-tabs.js"></script>
+    <script>const THEBE_JS_URL = "https://unpkg.com/thebe@0.8.2/lib/index.js"
+const thebe_selector = ".thebe,.cell"
+const thebe_selector_input = "pre"
+const thebe_selector_output = ".output, .cell_output"
+</script>
+    <script async="async" src="../../_static/sphinx-thebe.js"></script>
+    <script>window.MathJax = {"options": {"processHtmlClass": "tex2jax_process|mathjax_process|math|output_area"}}</script>
+    <script defer="defer" src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
+    <script>DOCUMENTATION_OPTIONS.pagename = 'docs/review/diffusion_beats_GANs';</script>
+    <link rel="shortcut icon" href="../../_static/PseudoLab_logo.png"/>
+    <link rel="index" title="Index" href="../../genindex.html" />
+    <link rel="search" title="Search" href="../../search.html" />
+    <link rel="next" title="DALL-E" href="dalle.html" />
+    <link rel="prev" title="StyleGAN" href="StyleGAN.html" />
+  <meta name="viewport" content="width=device-width, initial-scale=1"/>
+  <meta name="docsearch:language" content="en"/>
+  </head>
+  
+  
+  <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
+
+  
+  
+  <div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
+  
+  <div id="pst-scroll-pixel-helper"></div>
+  
+  <button type="button" class="btn rounded-pill" id="pst-back-to-top">
+    <i class="fa-solid fa-arrow-up"></i>Back to top</button>
+
+  
+  <input type="checkbox"
+          class="sidebar-toggle"
+          id="pst-primary-sidebar-checkbox"/>
+  <label class="overlay overlay-primary" for="pst-primary-sidebar-checkbox"></label>
+  
+  <input type="checkbox"
+          class="sidebar-toggle"
+          id="pst-secondary-sidebar-checkbox"/>
+  <label class="overlay overlay-secondary" for="pst-secondary-sidebar-checkbox"></label>
+  
+  <div class="search-button__wrapper">
+    <div class="search-button__overlay"></div>
+    <div class="search-button__search-container">
+<form class="bd-search d-flex align-items-center"
+      action="../../search.html"
+      method="get">
+  <i class="fa-solid fa-magnifying-glass"></i>
+  <input type="search"
+         class="form-control"
+         name="q"
+         id="search-input"
+         placeholder="Search this book..."
+         aria-label="Search this book..."
+         autocomplete="off"
+         autocorrect="off"
+         autocapitalize="off"
+         spellcheck="false"/>
+  <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
+</form></div>
+  </div>
+
+  <div class="pst-async-banner-revealer d-none">
+  <aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
+</div>
+
+  
+    <header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
+    </header>
+  
+
+  <div class="bd-container">
+    <div class="bd-container__inner bd-page-width">
+      
+      
+      
+      <div class="bd-sidebar-primary bd-sidebar">
+        
+
+  
+  <div class="sidebar-header-items sidebar-primary__section">
+    
+    
+    
+    
+  </div>
+  
+    <div class="sidebar-primary-items__start sidebar-primary__section">
+        <div class="sidebar-primary-item">
+
+  
+    
+  
+
+<a class="navbar-brand logo" href="../../intro.html">
+  
+  
+  
+  
+  
+    
+    
+      
+    
+    
+    <img src="../../_static/PseudoLab_logo.png" class="logo__image only-light" alt="Text-to-Image Generation-feat-Diffusion - Home"/>
+    <script>document.write(`<img src="../../_static/PseudoLab_logo.png" class="logo__image only-dark" alt="Text-to-Image Generation-feat-Diffusion - Home"/>`);</script>
+  
+  
+</a></div>
+        <div class="sidebar-primary-item"><nav class="bd-links" id="bd-docs-nav" aria-label="Main">
+    <div class="bd-toc-item navbar-nav active">
+        
+        <ul class="nav bd-sidenav bd-sidenav__home-link">
+            <li class="toctree-l1">
+                <a class="reference internal" href="../../intro.html">
+                    Welcome to PseudoDiffusers!!
+                </a>
+            </li>
+        </ul>
+        <p aria-level="2" class="caption" role="heading"><span class="caption-text">Preliminary Works</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="vae.html">VAE</a></li>
+<li class="toctree-l1"><a class="reference internal" href="gan.html">GAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DDPM.html">DDPM</a></li>
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DDIM.html">DDIM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="A_Study_on_the_Evaluation_of_Generative_Models.html">A Study on the Evaluation of Generative Models</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Image Generation</span></p>
+<ul class="current nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="cycleGAN.html">CycleGAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="StyleGAN.html">StyleGAN</a></li>
+<li class="toctree-l1 current active"><a class="current reference internal" href="#">Diffusion Models Beat GANs on Image Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="dalle.html">DALL-E</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DALLE2.html">DALL-E 2</a></li>
+<li class="toctree-l1"><a class="reference internal" href="dreambooth.html">DreamBooth</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ControlNet.html">ControlNet</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Latent_Diffusion_Model.html">Introduction</a></li>
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Textual_Inversion.html">Textual Inversion</a></li>
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="CustomDiffusion.html">Custom Diffusion</a></li>
+<li class="toctree-l1"><a class="reference internal" href="LoRA.html">LoRA</a></li>
+
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="I-DDPM.html">I-DDPM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="StyO.html">StyO</a></li>
+<li class="toctree-l1"><a class="reference internal" href="imagen.html">Imagen</a></li>
+<li class="toctree-l1"><a class="reference internal" href="imagen_editor.html">Imagen Editor</a></li>
+<li class="toctree-l1"><a class="reference internal" href="SDEdit.html">SDEdit</a></li>
+<li class="toctree-l1"><a class="reference internal" href="SDXL.html">SDXL</a></li>
+<li class="toctree-l1"><a class="reference internal" href="t2i_adapter.html">T2I-Adapter</a></li>
+<li class="toctree-l1"><a class="reference internal" href="IP_Adapter.html">IP-Adapter</a></li>
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="HyperDreamBooth.html">HyperDreamBooth</a></li>
+<li class="toctree-l1"><a class="reference internal" href="CM3leon.html">CM3leon</a></li>
+
+<li class="toctree-l1"><a class="reference internal" href="Synthetic_Data_from_Diffusion_Models_Improves_ImageNet_Classification.html">Synthetic Data from Diffusion Models Improves ImageNet Classification</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="GLIDE.html">GLIDE</a></li>
+<li class="toctree-l1"><a class="reference internal" href="BBDM.html">BBDM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Your_Diffusion_Model_is_Secretly_a_Zero_Shot_Classifier.html">Your Diffusion Model is Secretly a Zero-Shot Classifier</a></li>
+<li class="toctree-l1"><a class="reference internal" href="progressive_distillation.html">Progressive Distillation for Fast Sampling of Diffusion Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ConceptLab.html">ConceptLab</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Diffusion_models_already_have_a_Semantic_Latent_Space.html">Diffusion Models already have a Semantic Latent Space</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Muse.html">Muse</a></li>
+
+
+<li class="toctree-l1"><a class="reference internal" href="GIGAGAN.html">Scaling up GANs for Text-to-Image Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="consistency_models.html">Consistency Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="latent_consistency_models.html">Latent Consistency Models</a></li>
+
+<li class="toctree-l1"><a class="reference internal" href="LLM_grounded_Diffusion.html">LLM Grounded Diffusion</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DiT.html">DiT</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="one-step-image-translation.html">One-Step Image Translation with Text-to-Image Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="LCM-LoRA.html">LCM-LoRA: A Universal Stable-Diffusion Acceleration Module</a></li>
+
+
+<li class="toctree-l1"><a class="reference internal" href="MimicBrush.html">MimicBrush: Zero-shot Image Editing with Reference Imitation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="one_step_diffusion_with_distribution_matching_distillation.html">One-step Diffusion with Distribution Matching Distillation</a></li>
+
+
+
+
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Video Generation</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="Make_A_Video.html">Make A Video</a></li>
+<li class="toctree-l1"><a class="reference internal" href="VideoLDM.html">VideoLDM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="AnimateDiff.html">AnimateDiff</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Animate_Anyone.html">Animate Anyone</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreaMoving.html">DreaMoving</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreamPose.html">DreamPose: Fashion Image-to-Video Synthesis via Stable Diffusion</a></li>
+
+
+
+
+
+
+
+
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">3D Generation</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="NeRF.html">NeRF : Representing Scenes as Neural Radiance Fields for View Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="3DGS.html">3D Gaussian Splatting for Real-Time Radiance Field Rendering</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Point_E.html">Point-E: A System for Generating 3D Point Clouds from Complex Prompts (Arxiv 2022)</a></li>
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Shap-E.html">Shap-E</a></li>
+
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DreamFusion.html"><strong>DreamFusion</strong></a></li>
+<li class="toctree-l1"><a class="reference internal" href="magic-3d.html">Magic3D</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreamBooth3D.html">Dream Booth 3D</a></li>
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="zero123plus.html">Zero123++</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ProlificDreamer.html">ProlificDreamer: High-Fidelity and Diverse Text-to-3D Generation with Variational Score Distillation</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DreamGaussian.html">DreamGaussian</a></li>
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Coin3D.html">Coin3D</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Experiments</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="../experiments/js_exp.html">Synthetic Data with Stable Diffusion for Foliar Disease Classification</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../experiments/swjo_exp.html">Training DreamBooth on Naver Webtoon Face Dataset</a></li>
+</ul>
+
+    </div>
+</nav></div>
+    </div>
+  
+  
+  <div class="sidebar-primary-items__end sidebar-primary__section">
+  </div>
+  
+  <div id="rtd-footer-container"></div>
+
+
+      </div>
+      
+      <main id="main-content" class="bd-main" role="main">
+        
+        
+
+<div class="sbt-scroll-pixel-helper"></div>
+
+          <div class="bd-content">
+            <div class="bd-article-container">
+              
+              <div class="bd-header-article d-print-none">
+<div class="header-article-items header-article__inner">
+  
+    <div class="header-article-items__start">
+      
+        <div class="header-article-item"><label class="sidebar-toggle primary-toggle btn btn-sm" for="__primary" title="Toggle primary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
+  <span class="fa-solid fa-bars"></span>
+</label></div>
+      
+    </div>
+  
+  
+    <div class="header-article-items__end">
+      
+        <div class="header-article-item">
+
+<div class="article-header-buttons">
+
+
+
+
+
+<div class="dropdown dropdown-source-buttons">
+  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Source repositories">
+    <i class="fab fa-github"></i>
+  </button>
+  <ul class="dropdown-menu">
+      
+      
+      
+      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion" target="_blank"
+   class="btn btn-sm btn-source-repository-button dropdown-item"
+   title="Source repository"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fab fa-github"></i>
+  </span>
+<span class="btn__text-container">Repository</span>
+</a>
+</li>
+      
+      
+      
+      
+      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion/issues/new?title=Issue%20on%20page%20%2Fdocs/review/diffusion_beats_GANs.html&body=Your%20issue%20content%20here." target="_blank"
+   class="btn btn-sm btn-source-issues-button dropdown-item"
+   title="Open an issue"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-lightbulb"></i>
+  </span>
+<span class="btn__text-container">Open issue</span>
+</a>
+</li>
+      
+  </ul>
+</div>
+
+
+
+
+
+
+<div class="dropdown dropdown-download-buttons">
+  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Download this page">
+    <i class="fas fa-download"></i>
+  </button>
+  <ul class="dropdown-menu">
+      
+      
+      
+      <li><a href="../../_sources/docs/review/diffusion_beats_GANs.md" target="_blank"
+   class="btn btn-sm btn-download-source-button dropdown-item"
+   title="Download source file"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-file"></i>
+  </span>
+<span class="btn__text-container">.md</span>
+</a>
+</li>
+      
+      
+      
+      
+      <li>
+<button onclick="window.print()"
+  class="btn btn-sm btn-download-pdf-button dropdown-item"
+  title="Print to PDF"
+  data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-file-pdf"></i>
+  </span>
+<span class="btn__text-container">.pdf</span>
+</button>
+</li>
+      
+  </ul>
+</div>
+
+
+
+
+<button onclick="toggleFullScreen()"
+  class="btn btn-sm btn-fullscreen-button"
+  title="Fullscreen mode"
+  data-bs-placement="bottom" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-expand"></i>
+  </span>
+
+</button>
+
+
+
+<script>
+document.write(`
+  <button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light"></i>
+    <i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark"></i>
+    <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"></i>
+  </button>
+`);
+</script>
+
+
+<script>
+document.write(`
+  <button class="btn btn-sm pst-navbar-icon search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <i class="fa-solid fa-magnifying-glass fa-lg"></i>
+  </button>
+`);
+</script>
+<label class="sidebar-toggle secondary-toggle btn btn-sm" for="__secondary"title="Toggle secondary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <span class="fa-solid fa-list"></span>
+</label>
+</div></div>
+      
+    </div>
+  
+</div>
+</div>
+              
+              
+
+<div id="jb-print-docs-body" class="onlyprint">
+    <h1>Diffusion Models Beat GANs on Image Synthesis</h1>
+    <!-- Table of contents -->
+    <div id="print-main-content">
+        <div id="jb-print-toc">
+            
+            <div>
+                <h2> Contents </h2>
+            </div>
+            <nav aria-label="Page">
+                <ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#abstract">Abstract</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">1. Introduction</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#background">2. Background</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#ddpm">DDPM</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#improved-ddpm">Improved DDPM</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#ddim">DDIM</a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#architectural-improvements">3. Architectural Improvements</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#adaptive-group-normalization">4. Adaptive Group Normalization</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#classifier-guidance">5. Classifier Guidance</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#algorithm">6. Algorithm</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#impact-of-parameter-s-in-classifier-guidance">7. Impact of parameter s in classifier guidance</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#results">8. Results</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#image-synthesis-results">8-2. Image Synthesis Results</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#limitation-and-future-work">9. Limitation and Future Work</a></li>
+</ul>
+            </nav>
+        </div>
+    </div>
+</div>
+
+              
+                
+<div id="searchbox"></div>
+                <article class="bd-article">
+                  
+  <div class="admonition-information admonition">
+<p class="admonition-title">Information</p>
+<ul class="simple">
+<li><p><strong>Title:</strong> Diffusion Models Beat GANs on Image Synthesis (NeurIPS 2021)</p></li>
+<li><p><strong>Reference</strong></p>
+<ul>
+<li><p>Paper:  <a class="reference external" href="https://arxiv.org/abs/2105.05233">https://arxiv.org/abs/2105.05233</a></p></li>
+<li><p>Code: <a class="reference external" href="https://github.com/openai/guided-diffusion">Official</a></p></li>
+</ul>
+</li>
+<li><p><strong>Author:</strong> Donggeun Sean Ko</p></li>
+<li><p><strong>Last updated on May. 17, 2023</strong></p></li>
+</ul>
+</div>
+<section class="tex2jax_ignore mathjax_ignore" id="diffusion-models-beat-gans-on-image-synthesis">
+<h1>Diffusion Models Beat GANs on Image Synthesis<a class="headerlink" href="#diffusion-models-beat-gans-on-image-synthesis" title="Permalink to this heading">#</a></h1>
+<section id="abstract">
+<h2>Abstract<a class="headerlink" href="#abstract" title="Permalink to this heading">#</a></h2>
+<ul class="simple">
+<li><p>Diffusion 모델들은 기존 unconditional 이미지 생성 모델들의 SOTA를 뛰어넘음.</p></li>
+<li><p>Conditional image synthesis 부분에서도 classifier guidance를 활용해 diffusion model을 활용하여 좋은 성능을 보여준다고 주장함.</p></li>
+<li><p>Classifier guidance를 활용해 diversity와 fidelity의 trade-off에 대해서도 분석</p></li>
+</ul>
+</section>
+<section id="introduction">
+<h2>1. Introduction<a class="headerlink" href="#introduction" title="Permalink to this heading">#</a></h2>
+<ul class="simple">
+<li><p>Diffusion 모델들은 likelihood-based model들이며 고화질 이미지를 생성해내는데에 성공 했음.</p></li>
+<li><p>하지만, FID 수치는 BigGAN-deep에 비해 낮으며, 개선사항이 필요함.</p></li>
+<li><p>두가지 contribution을 통해 Diffusion Model들의 성능을 끌어올리며 FID 결과 수치를 낮추겠다고 주장.</p>
+<ul>
+<li><p>모델 아키텍쳐 개선</p></li>
+<li><p>Classifier Guidance</p></li>
+</ul>
+</li>
+</ul>
+</section>
+<section id="background">
+<h2>2. Background<a class="headerlink" href="#background" title="Permalink to this heading">#</a></h2>
+<ul class="simple">
+<li><p>DDPM, DDIM, Improved DDPM은 이전에 설명되있으므로, 각 background 논문들의 핵심 부분만 설명하겠습니다.</p></li>
+<li></li>
+</ul>
+<section id="ddpm">
+<h3>DDPM<a class="headerlink" href="#ddpm" title="Permalink to this heading">#</a></h3>
+<ul class="simple">
+<li><p><span class="math notranslate nohighlight">\(p_\theta(x_{t-1}|x_t)\)</span>은 <span class="math notranslate nohighlight">\(q(x_{t-1}|x_t)\)</span>의 근사값이라고 가정하며 계산한다.
+- <span class="math notranslate nohighlight">\(p_\theta(x_{t-1}|x_t)\)</span>를 학습하여 <span class="math notranslate nohighlight">\(p_\theta(x_{t-1}|x_t) \approx\)</span> <span class="math notranslate nohighlight">\(q(x_{t-1}|x_t)\)</span>를 만든다.</p></li>
+<li><p><span class="math notranslate nohighlight">\(\epsilon_\theta(x_t,t)\)</span> 을 모델링하여  <strong>noise</strong>를 예측한다.</p></li>
+<li><p>공분산 <span class="math notranslate nohighlight">\(\Sigma_\theta(X_t,t)\)</span>은 학습 불가능한 매개변수로 설정되며 constant 값을 가진다.</p></li>
+<li><p>아래와 같이 <span class="math notranslate nohighlight">\(L_{simple}\)</span> 을 새로운 Loss function으로 제안한다.</p></li>
+</ul>
+<figure class="align-default" id="id1">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/ddpm_pipeline.png"><img alt="ddpm_pipeline" class="bg-primary mb-1" src="../../_images/ddpm_pipeline.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 70 </span><span class="caption-text">DDPM Pipeline</span><a class="headerlink" href="#id1" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<figure class="align-default" id="id2">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/DDPM_eq.png"><img alt="ddpm_eq" class="bg-primary mb-1" src="../../_images/DDPM_eq.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 71 </span><span class="caption-text">DDPM Equation</span><a class="headerlink" href="#id2" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</section>
+<section id="improved-ddpm">
+<h3>Improved DDPM<a class="headerlink" href="#improved-ddpm" title="Permalink to this heading">#</a></h3>
+<figure class="align-default" id="id3">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/improved_ddpm_pic.png"><img alt="improved_ddpm_pic" class="bg-primary mb-1" src="../../_images/improved_ddpm_pic.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 72 </span><span class="caption-text">Improved DDPM scheduling comparison with DDPM (Linear vs Cosine)</span><a class="headerlink" href="#id3" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p>더 적은 diffusion step으로 샘플링 함.</p></li>
+<li><p>Competitive log-likelihood 지표 성능 개선 (전 DDPM에선 log-likelihood 지표가 상대적으로 GAN 모델의 비해 낮았다)</p></li>
+<li><p>전 DDPM 논문에서는 linear scheduling을 사용했지만, 본 논문에서는 cosine scheduling을 사용해서 성능 향상을 했다고 주장했다.</p></li>
+<li><p>분산  <span class="math notranslate nohighlight">\(\Sigma_\theta(X_t,t)\)</span>을 학습에도 활용</p></li>
+<li><p><span class="math notranslate nohighlight">\(L_{hybrid}\)</span>라는 새로운 loss 함수 제시</p></li>
+</ul>
+<figure class="align-default" id="id4">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/improved_ddpm_eq.png"><img alt="improved_ddpm_eq" class="bg-primary mb-1" src="../../_images/improved_ddpm_eq.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 73 </span><span class="caption-text">Improved DDPM Equation</span><a class="headerlink" href="#id4" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</section>
+<section id="ddim">
+<h3>DDIM<a class="headerlink" href="#ddim" title="Permalink to this heading">#</a></h3>
+<figure class="align-default" id="id5">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/ddim_pipe.png"><img alt="ddim_pipe" class="bg-primary mb-1" src="../../_images/ddim_pipe.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 74 </span><span class="caption-text">DDIM Pipeline</span><a class="headerlink" href="#id5" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p>Markovian Chain Process를 끊고 Non-Markovian 형태로 Deterministic 하게 수식을 바꿈</p></li>
+<li><p>DDPM 보다 더 적은 iteration으로 image synthesis 가능</p></li>
+</ul>
+<figure class="align-default" id="id6">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/DDIM_pic.png"><img alt="ddim_pic" class="bg-primary mb-1" src="../../_images/DDIM_pic.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 75 </span><span class="caption-text">DDIM Sampling Equation</span><a class="headerlink" href="#id6" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</section>
+</section>
+<section id="architectural-improvements">
+<h2>3. Architectural Improvements<a class="headerlink" href="#architectural-improvements" title="Permalink to this heading">#</a></h2>
+<ul class="simple">
+<li><p>DDPM에서 사용한 architecture을 그대로 채택했지만, 다양한 ablation 및 parameter을 변경하여 제일 높은 성능이 나오는 architecture을 설명 및 채택함</p></li>
+<li><p>모델 크기를 일정하게 가져가면서 Depth vs Width 증가 보기</p></li>
+<li><p>Attention head 수 증가 시켜보기</p></li>
+<li><p>각 Attention head에 resolution 을 8x8, 16x16, 32x32 로 실험 해보기</p></li>
+<li><p>일반 ResNet Residual Block이 아닌 BigGAN의 residual block을 채택하여 upsampling / downsampling 사용 해보기</p></li>
+<li><p>Residual Connection을 1/√2 로 rescaling 해보기</p></li>
+</ul>
+<figure class="align-default" id="id7">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/architect_1.png"><img alt="architect_1" class="bg-primary mb-1" src="../../_images/architect_1.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 76 </span><span class="caption-text">Table 1: Ablation of various architecture changes</span><a class="headerlink" href="#id7" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<figure class="align-default" id="id8">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/architect_2.png"><img alt="architect_2" class="bg-primary mb-1" src="../../_images/architect_2.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 77 </span><span class="caption-text">Table 2: Ablation of various attention configurations. Attention head 가 32일때 FID 값이 제일 낮다 (좋다)</span><a class="headerlink" href="#id8" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>** 3-1. Best Architecture **</p>
+<ul class="simple">
+<li><p>Channel 수 160</p></li>
+<li><p>Depth 2</p></li>
+<li><p>number of Attention Head = 4</p></li>
+<li><p>Attention Resolution을 32, 16, 8 로 block마다 줄이기</p></li>
+<li><p>BigGAN residual block 채택</p></li>
+<li><p>Rescaling X</p></li>
+<li><p>위와 같은 parameter를 통해 제일 좋은 FID 결과가 나옴</p></li>
+</ul>
+<figure class="align-default" id="id9">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/architect_3.png"><img alt="architect_3" class="bg-primary mb-1" src="../../_images/architect_3.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 78 </span><span class="caption-text">Table 3: 다양한 parameter 튜닝을 통한 제일 좋은 FID 성능 테이블</span><a class="headerlink" href="#id9" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</section>
+<section id="adaptive-group-normalization">
+<h2>4. Adaptive Group Normalization<a class="headerlink" href="#adaptive-group-normalization" title="Permalink to this heading">#</a></h2>
+<ul class="simple">
+<li><p>본 저자들은 AdaIN이랑 비슷한 방식으로 연산하는 AdaGN 이라는 것을 소개했다. (원래 있는 방법론인지는 모르겠다…)</p></li>
+<li><p>Group Normalization을 adpative하게 하는 방법으로 Group Normalization 후에 residual block에 time step embedding과 class embedding을 AdaIN 방식으로 곱하고 더함</p></li>
+</ul>
+<p>Equation</p>
+<div class="math notranslate nohighlight">
+\[AdaIN(x,y) = \sigma(y)(\frac{x-\mu(x)}{\sigma(x)})+\mu(y)\]</div>
+<div class="math notranslate nohighlight">
+\[AdaGN(h,y) = y_s + GroupNorm(h) + y_b\]</div>
+<p>where <span class="math notranslate nohighlight">\(h =\)</span> residual block and <span class="math notranslate nohighlight">\(y = [y_s,y_b]\)</span> time-step embedding and class embedding’s linear projection respectively</p>
+<p><strong>4-1 AdaGN의 성능</strong></p>
+<figure class="align-default" id="id10">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/adagn_table.png"><img alt="adagn_table" class="bg-primary mb-1" src="../../_images/adagn_table.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 79 </span><span class="caption-text">AdaGN과 Additon+GroupNorm 비교 테이블. DDPM에서 사용한 normalization보다 더 좋은 성능을 보여주고 있음.</span><a class="headerlink" href="#id10" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p>기존 DDPM은 Addition + GroupNorm layer을 사용했는데, AdaGN 을 사용하는 것이 FID가 더 낮게 (즉 더 좋은 성능) 나온 것을 볼 수 있다</p></li>
+</ul>
+</section>
+<section id="classifier-guidance">
+<h2>5. Classifier Guidance<a class="headerlink" href="#classifier-guidance" title="Permalink to this heading">#</a></h2>
+<ul class="simple">
+<li><p>본 논문의 주 contribution 중 하나가 classifier guidance를 사용했다는 점이다.</p></li>
+<li><p>unconditional de-noising process에서 label y를 condition으로 줌으로써 conditional de-noising process로 진행</p></li>
+</ul>
+<p>Equation
+$<span class="math notranslate nohighlight">\(p_{\theta, \phi }(x_t|x_{t+1},y) = Zp_\theta(x_t|x_{t+1})p_\phi(y|x_t)\)</span>$</p>
+<ul class="simple">
+<li><p>Z 는 normalizing을 위한 상수 이다</p></li>
+</ul>
+<p><strong>5-1 Classifier Guidance 유도</strong></p>
+<p><span class="math notranslate nohighlight">\(log_\phi p(y|x_t)\)</span>가 <span class="math notranslate nohighlight">\(\Sigma^-1\)</span> 에 비해 곡률이 낮으며, 이 가정을 따라, diffusion step이 무한으로 갈 시, <span class="math notranslate nohighlight">\(||\Sigma^ || \rightarrow0\)</span> 이므로,<span class="math notranslate nohighlight">\(log_\phi p(y|x_t)\)</span>가 테일러 급수를 활용하여 식을 <span class="math notranslate nohighlight">\(x_t = \mu\)</span> 로 재전개 할 수 있다.</p>
+<ul class="simple">
+<li><p>classifier의 gradient를 활용해서 학습을 같이 해준다.</p></li>
+<li><p>식 유도는 아래와 같다. 본문의 (3) ~ (10) 번식이므로 본 논문을 참고하면 좋다.</p></li>
+</ul>
+<figure class="align-default" id="id11">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/class_eq1.png"><img alt="class_eq1" class="bg-primary mb-1" src="../../_images/class_eq1.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 80 </span><span class="caption-text">Classifier Guidance 유도 식 1,2</span><a class="headerlink" href="#id11" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<figure class="align-default" id="id12">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/class_eq2.png"><img alt="classifier_2" class="bg-primary mb-1" src="../../_images/class_eq2.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 81 </span><span class="caption-text">Classifier Guidance 유도 식 3~7</span><a class="headerlink" href="#id12" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</section>
+<section id="algorithm">
+<h2>6. Algorithm<a class="headerlink" href="#algorithm" title="Permalink to this heading">#</a></h2>
+<figure class="align-default" id="id13">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/algorithm.png"><img alt="algorithm" class="bg-primary mb-1" src="../../_images/algorithm.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 82 </span><span class="caption-text">Algorithm 1 &amp; 2 sampling method. Algorithm 1은 일반적인 DDPM 기준, Algorithm 2는 DDIM 기준 guidance 한 sampling 방법</span><a class="headerlink" href="#id13" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p>Algorithm 1 은 일반 DDPM에서 샘플링 하는 방법이다. 똑같이 Gaussian distribution에서 샘플링 할 시, classifier의 gradient를 활용하여 <span class="math notranslate nohighlight">\(x_{t-1}\)</span>를 sample한다.</p></li>
+<li><p>Algorithm 2 는 DDIM에서 샘플링 하는 방법이다. <span class="math notranslate nohighlight">\(\epsilon\)</span> 모델에서 나오는 output과 classifier의 gradient의 joint distribution 값을 빼 score을 구한다.</p></li>
+<li><p>DDIM은 Deterministic하기때문에 모든 시점의 값을 모두 계산할 필요 없이 subset의 시점만으로 sampling이 가능하다.</p></li>
+<li><p>이 Accelerating method는 약간의 quality 저하가 있지만 Computational efficiency를 충분히 증가시킬 수 있다.</p></li>
+<li><p><strong>DDIM 방식의 재학습 없이 DDPM의 training에 DDIM의 sampling이 가능하다.</strong></p></li>
+</ul>
+</section>
+<section id="impact-of-parameter-s-in-classifier-guidance">
+<h2>7. Impact of parameter s in classifier guidance<a class="headerlink" href="#impact-of-parameter-s-in-classifier-guidance" title="Permalink to this heading">#</a></h2>
+<figure class="align-default" id="id14">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/classifier_guidance_vis.png"><img alt="class_guidance_vis" class="bg-primary mb-1" src="../../_images/classifier_guidance_vis.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 83 </span><span class="caption-text">Classifier Guidance scaling의 영향 시각화</span><a class="headerlink" href="#id14" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p>classifier guidance 앞에 hyperparameter \bf{s} 의 값에 따라 classifier가 줄 수 있는 scaling이 다르다.</p></li>
+<li><p>scale을 1.0으로 주면 웰시코기라는 class의 scale 영향을 덜 받아 “웰시코기스러운” 강아지가 생성이 많이 되지는 않는다.</p></li>
+<li><p>scale을 10.0으로 주면 웰시코기 class라는 scaling의 영향을 많이 받아 웰시코기 분위기의 강아지의 이미지가 더 많이 생성 되는 것을 볼 수 있다.</p></li>
+<li><p>epsilon이라는 모델이 결국 scale에 따라 gradient의 영향을 얼마나 많이 받는지 sampling할 때 볼 수 있다.</p></li>
+</ul>
+</section>
+<section id="results">
+<h2>8. Results<a class="headerlink" href="#results" title="Permalink to this heading">#</a></h2>
+<figure class="align-default" id="id15">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/plot_result.png"><img alt="plot result" class="bg-primary mb-1" src="../../_images/plot_result.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 84 </span><span class="caption-text">Fidelity vs Diversity Trade-off 결과</span><a class="headerlink" href="#id15" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p>gradient scale이 높을수록 recall은 낮지만, precision은 높다. 즉 trade-off 가 생기는데, recall이 낮을수록 diveristy가 낮다는 의미이고, precision이 높을수록 fidelity가 높다는 뜻이다.</p></li>
+<li><p>scale을 높일수록 다양한 이미지가 생성되는 것이 아닌, classifier가 준 label쪽으로 guide가 생기므로 일정한 class의 사진이 나온다.</p></li>
+<li><p>FID와 sFID는 diversity와 fidelity의 trade-off로 도출되는 값이므로, 최고의 값은 중간 지점에서 나왔다.</p></li>
+</ul>
+<p><strong>8-1. Result Table</strong></p>
+<ul class="simple">
+<li><p>ADM은 Ablated Diffusion Model의 약자이며, ADM-G는 Ablated Diffusion Model with Guidance의 약자이다.</p></li>
+<li><p>Guidance를 주었을 시 제일 좋은 FID값이 나왔으며, Precision이 높을수록, Recall이 낮게 나왔다 (and vice versa).</p></li>
+</ul>
+</section>
+<section id="image-synthesis-results">
+<h2>8-2. Image Synthesis Results<a class="headerlink" href="#image-synthesis-results" title="Permalink to this heading">#</a></h2>
+<figure class="align-default" id="id16">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/img_results.png"><img alt="img_results" class="bg-primary mb-1" src="../../_images/img_results.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 85 </span><span class="caption-text">Generated Images (Left: BigGAN, Center: DMs, Right: Train Dataset)</span><a class="headerlink" href="#id16" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p>두번쨰 플라밍고 생성된 사진을 볼때, BigGAN은 이미지간들의 diversity가 없다. 학습된 플라밍고가 다수 플라밍고 시 비슷한 느낌의 이미지만 뽑아낸다.</p></li>
+<li><p>반면, Diffusion model with guidance를 사용했을 시, 다채로운 플라밍고 사진을 볼 수 있다. 한마리만 있는 플라밍고 사진도 뽑아 낼 수 있다.</p></li>
+</ul>
+</section>
+<section id="limitation-and-future-work">
+<h2>9. Limitation and Future Work<a class="headerlink" href="#limitation-and-future-work" title="Permalink to this heading">#</a></h2>
+<p><strong>Limitation 1</strong></p>
+<ul class="simple">
+<li><p>Diffusion 모델들은 GAN보다 샘플링 시간이 아직 느리다.</p></li>
+</ul>
+<p><strong>Future Work 1</strong></p>
+<ul class="simple">
+<li><p>DDIM의 sampling process를 distillation 해서 빠르게 하는 법을 고려</p></li>
+</ul>
+<p><strong>Limitation 2</strong></p>
+<ul class="simple">
+<li><p>Classifier guidance는 classification function의 gradient를 사용함으로써, label이 없는 data에는 확장이 불가능하다.</p></li>
+</ul>
+<p><strong>Future Work 2</strong></p>
+<ul class="simple">
+<li><p>Unlabeled sample을 clustering 하는 방법을 통해 방법론을 expand 하려 한다.</p></li>
+</ul>
+</section>
+</section>
+
+    <script type="text/x-thebe-config">
+    {
+        requestKernel: true,
+        binderOptions: {
+            repo: "binder-examples/jupyter-stacks-datascience",
+            ref: "master",
+        },
+        codeMirrorConfig: {
+            theme: "abcdef",
+            mode: "python"
+        },
+        kernelOptions: {
+            name: "python3",
+            path: "./docs\review"
+        },
+        predefinedOutput: true
+    }
+    </script>
+    <script>kernelName = 'python3'</script>
+
+                </article>
+              
+
+              
+              
+              
+              
+                <footer class="prev-next-footer d-print-none">
+                  
+<div class="prev-next-area">
+    <a class="left-prev"
+       href="StyleGAN.html"
+       title="previous page">
+      <i class="fa-solid fa-angle-left"></i>
+      <div class="prev-next-info">
+        <p class="prev-next-subtitle">previous</p>
+        <p class="prev-next-title">StyleGAN</p>
+      </div>
+    </a>
+    <a class="right-next"
+       href="dalle.html"
+       title="next page">
+      <div class="prev-next-info">
+        <p class="prev-next-subtitle">next</p>
+        <p class="prev-next-title">DALL-E</p>
+      </div>
+      <i class="fa-solid fa-angle-right"></i>
+    </a>
+</div>
+                </footer>
+              
+            </div>
+            
+            
+              
+                <div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
+
+
+  <div class="sidebar-secondary-item">
+  <div class="page-toc tocsection onthispage">
+    <i class="fa-solid fa-list"></i> Contents
+  </div>
+  <nav class="bd-toc-nav page-toc">
+    <ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#abstract">Abstract</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">1. Introduction</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#background">2. Background</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#ddpm">DDPM</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#improved-ddpm">Improved DDPM</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#ddim">DDIM</a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#architectural-improvements">3. Architectural Improvements</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#adaptive-group-normalization">4. Adaptive Group Normalization</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#classifier-guidance">5. Classifier Guidance</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#algorithm">6. Algorithm</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#impact-of-parameter-s-in-classifier-guidance">7. Impact of parameter s in classifier guidance</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#results">8. Results</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#image-synthesis-results">8-2. Image Synthesis Results</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#limitation-and-future-work">9. Limitation and Future Work</a></li>
+</ul>
+  </nav></div>
+
+</div></div>
+              
+            
+          </div>
+          <footer class="bd-footer-content">
+            
+<div class="bd-footer-content__inner container">
+  
+  <div class="footer-item">
+    
+<p class="component-author">
+By PseudoLab
+</p>
+
+  </div>
+  
+  <div class="footer-item">
+    
+
+  <p class="copyright">
+    
+      © Copyright 2022.
+      <br/>
+    
+  </p>
+
+  </div>
+  
+  <div class="footer-item">
+    
+  </div>
+  
+  <div class="footer-item">
+    
+  </div>
+  
+</div>
+          </footer>
+        
+
+      </main>
+    </div>
+  </div>
+  
+  <!-- Scripts loaded after <body> so the DOM is not blocked -->
+  <script src="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b"></script>
+<script src="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b"></script>
+
+  <footer class="bd-footer">
+  </footer>
+  </body>
 </html>
\ No newline at end of file
diff --git a/docs/review/dreambooth.html b/docs/review/dreambooth.html
old mode 100644
new mode 100755
index 2c29ef88..f683223d
--- a/docs/review/dreambooth.html
+++ b/docs/review/dreambooth.html
@@ -1,924 +1,944 @@
-
-
-<!DOCTYPE html>
-
-
-<html lang="en" data-content_root="" >
-
-  <head>
-    <meta charset="utf-8" />
-    <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.18.1: http://docutils.sourceforge.net/" />
-
-    <title>DreamBooth &#8212; Text-to-Image Generation-feat-Diffusion</title>
-  
-  
-  
-  <script data-cfasync="false">
-    document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
-    document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
-  </script>
-  
-  <!-- Loaded before other Sphinx assets -->
-  <link href="../../_static/styles/theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-<link href="../../_static/styles/bootstrap.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-<link href="../../_static/styles/pydata-sphinx-theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-
-  
-  <link href="../../_static/vendor/fontawesome/6.5.2/css/all.min.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-  <link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.woff2" />
-<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.woff2" />
-<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.woff2" />
-
-    <link rel="stylesheet" type="text/css" href="../../_static/pygments.css" />
-    <link rel="stylesheet" href="../../_static/styles/sphinx-book-theme.css?digest=14f4ca6b54d191a8c7657f6c759bf11a5fb86285" type="text/css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/togglebutton.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/mystnb.4510f1fc1dee50b3e5859aac5469c37c29e427902b24a333a5f9fcb2f0b3ac41.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/sphinx-thebe.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/design-style.4045f2051d55cab465a707391d5b2007.min.css" />
-  
-  <!-- Pre-loaded scripts that we'll load fully later -->
-  <link rel="preload" as="script" href="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b" />
-<link rel="preload" as="script" href="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b" />
-  <script src="../../_static/vendor/fontawesome/6.5.2/js/all.min.js?digest=dfe6caa3a7d634c4db9b"></script>
-
-    <script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
-    <script src="../../_static/jquery.js"></script>
-    <script src="../../_static/underscore.js"></script>
-    <script src="../../_static/_sphinx_javascript_frameworks_compat.js"></script>
-    <script src="../../_static/doctools.js"></script>
-    <script src="../../_static/clipboard.min.js"></script>
-    <script src="../../_static/copybutton.js"></script>
-    <script src="../../_static/scripts/sphinx-book-theme.js?digest=5a5c038af52cf7bc1a1ec88eea08e6366ee68824"></script>
-    <script>let toggleHintShow = 'Click to show';</script>
-    <script>let toggleHintHide = 'Click to hide';</script>
-    <script>let toggleOpenOnPrint = 'true';</script>
-    <script src="../../_static/togglebutton.js"></script>
-    <script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
-    <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
-    <script src="../../_static/design-tabs.js"></script>
-    <script>const THEBE_JS_URL = "https://unpkg.com/thebe@0.8.2/lib/index.js"
-const thebe_selector = ".thebe,.cell"
-const thebe_selector_input = "pre"
-const thebe_selector_output = ".output, .cell_output"
-</script>
-    <script async="async" src="../../_static/sphinx-thebe.js"></script>
-    <script>window.MathJax = {"options": {"processHtmlClass": "tex2jax_process|mathjax_process|math|output_area"}}</script>
-    <script defer="defer" src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
-    <script>DOCUMENTATION_OPTIONS.pagename = 'docs/review/dreambooth';</script>
-    <link rel="shortcut icon" href="../../_static/PseudoLab_logo.png"/>
-    <link rel="index" title="Index" href="../../genindex.html" />
-    <link rel="search" title="Search" href="../../search.html" />
-    <link rel="next" title="ControlNet" href="ControlNet.html" />
-    <link rel="prev" title="DALL-E 2" href="DALLE2.html" />
-  <meta name="viewport" content="width=device-width, initial-scale=1"/>
-  <meta name="docsearch:language" content="en"/>
-  </head>
-  
-  
-  <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
-
-  
-  
-  <div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
-  
-  <div id="pst-scroll-pixel-helper"></div>
-  
-  <button type="button" class="btn rounded-pill" id="pst-back-to-top">
-    <i class="fa-solid fa-arrow-up"></i>Back to top</button>
-
-  
-  <input type="checkbox"
-          class="sidebar-toggle"
-          id="pst-primary-sidebar-checkbox"/>
-  <label class="overlay overlay-primary" for="pst-primary-sidebar-checkbox"></label>
-  
-  <input type="checkbox"
-          class="sidebar-toggle"
-          id="pst-secondary-sidebar-checkbox"/>
-  <label class="overlay overlay-secondary" for="pst-secondary-sidebar-checkbox"></label>
-  
-  <div class="search-button__wrapper">
-    <div class="search-button__overlay"></div>
-    <div class="search-button__search-container">
-<form class="bd-search d-flex align-items-center"
-      action="../../search.html"
-      method="get">
-  <i class="fa-solid fa-magnifying-glass"></i>
-  <input type="search"
-         class="form-control"
-         name="q"
-         id="search-input"
-         placeholder="Search this book..."
-         aria-label="Search this book..."
-         autocomplete="off"
-         autocorrect="off"
-         autocapitalize="off"
-         spellcheck="false"/>
-  <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
-</form></div>
-  </div>
-
-  <div class="pst-async-banner-revealer d-none">
-  <aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
-</div>
-
-  
-    <header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
-    </header>
-  
-
-  <div class="bd-container">
-    <div class="bd-container__inner bd-page-width">
-      
-      
-      
-      <div class="bd-sidebar-primary bd-sidebar">
-        
-
-  
-  <div class="sidebar-header-items sidebar-primary__section">
-    
-    
-    
-    
-  </div>
-  
-    <div class="sidebar-primary-items__start sidebar-primary__section">
-        <div class="sidebar-primary-item">
-
-  
-    
-  
-
-<a class="navbar-brand logo" href="../../intro.html">
-  
-  
-  
-  
-  
-    
-    
-      
-    
-    
-    <img src="../../_static/PseudoLab_logo.png" class="logo__image only-light" alt="Text-to-Image Generation-feat-Diffusion - Home"/>
-    <script>document.write(`<img src="../../_static/PseudoLab_logo.png" class="logo__image only-dark" alt="Text-to-Image Generation-feat-Diffusion - Home"/>`);</script>
-  
-  
-</a></div>
-        <div class="sidebar-primary-item"><nav class="bd-links" id="bd-docs-nav" aria-label="Main">
-    <div class="bd-toc-item navbar-nav active">
-        
-        <ul class="nav bd-sidenav bd-sidenav__home-link">
-            <li class="toctree-l1">
-                <a class="reference internal" href="../../intro.html">
-                    Welcome to PseudoDiffusers!!
-                </a>
-            </li>
-        </ul>
-        <p aria-level="2" class="caption" role="heading"><span class="caption-text">Preliminary Works</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="vae.html">VAE</a></li>
-<li class="toctree-l1"><a class="reference internal" href="gan.html">GAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DDPM.html">DDPM</a></li>
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="DDIM.html">DDIM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="A_Study_on_the_Evaluation_of_Generative_Models.html">A Study on the Evaluation of Generative Models</a></li>
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Image Generation</span></p>
-<ul class="current nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="cycleGAN.html">CycleGAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="StyleGAN.html">StyleGAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="diffusion_beats_GANs.html">Diffusion Models Beat GANs on Image Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="dalle.html">DALL-E</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DALLE2.html">DALL-E 2</a></li>
-<li class="toctree-l1 current active"><a class="current reference internal" href="#">DreamBooth</a></li>
-<li class="toctree-l1"><a class="reference internal" href="ControlNet.html">ControlNet</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Latent_Diffusion_Model.html">Introduction</a></li>
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="Textual_Inversion.html">Textual Inversion</a></li>
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="CustomDiffusion.html">Custom Diffusion</a></li>
-<li class="toctree-l1"><a class="reference internal" href="LoRA.html">LoRA</a></li>
-
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="I-DDPM.html">I-DDPM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="StyO.html">StyO</a></li>
-<li class="toctree-l1"><a class="reference internal" href="imagen.html">Imagen</a></li>
-<li class="toctree-l1"><a class="reference internal" href="imagen_editor.html">Imagen Editor</a></li>
-<li class="toctree-l1"><a class="reference internal" href="SDEdit.html">SDEdit</a></li>
-<li class="toctree-l1"><a class="reference internal" href="SDXL.html">SDXL</a></li>
-<li class="toctree-l1"><a class="reference internal" href="t2i_adapter.html">T2I-Adapter</a></li>
-<li class="toctree-l1"><a class="reference internal" href="IP_Adapter.html">IP-Adapter</a></li>
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="HyperDreamBooth.html">HyperDreamBooth</a></li>
-<li class="toctree-l1"><a class="reference internal" href="CM3leon.html">CM3leon</a></li>
-
-<li class="toctree-l1"><a class="reference internal" href="Synthetic_Data_from_Diffusion_Models_Improves_ImageNet_Classification.html">Synthetic Data from Diffusion Models Improves ImageNet Classification</a></li>
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="GLIDE.html">GLIDE</a></li>
-<li class="toctree-l1"><a class="reference internal" href="BBDM.html">BBDM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Your_Diffusion_Model_is_Secretly_a_Zero_Shot_Classifier.html">Your Diffusion Model is Secretly a Zero-Shot Classifier</a></li>
-<li class="toctree-l1"><a class="reference internal" href="progressive_distillation.html">Progressive Distillation for Fast Sampling of Diffusion Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="ConceptLab.html">ConceptLab</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Diffusion_models_already_have_a_Semantic_Latent_Space.html">Diffusion Models already have a Semantic Latent Space</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Muse.html">Muse</a></li>
-
-
-<li class="toctree-l1"><a class="reference internal" href="GIGAGAN.html">Scaling up GANs for Text-to-Image Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="consistency_models.html">Consistency Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="latent_consistency_models.html">Latent Consistency Models</a></li>
-
-<li class="toctree-l1"><a class="reference internal" href="LLM_grounded_Diffusion.html">LLM Grounded Diffusion</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DiT.html">DiT</a></li>
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="one-step-image-translation.html">One-Step Image Translation with Text-to-Image Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="LCM-LoRA.html">LCM-LoRA: A Universal Stable-Diffusion Acceleration Module</a></li>
-
-
-<li class="toctree-l1"><a class="reference internal" href="MimicBrush.html">MimicBrush: Zero-shot Image Editing with Reference Imitation</a></li>
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Video Generation</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="Make_A_Video.html">Make A Video</a></li>
-<li class="toctree-l1"><a class="reference internal" href="VideoLDM.html">VideoLDM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="AnimateDiff.html">AnimateDiff</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Animate_Anyone.html">Animate Anyone</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreaMoving.html">DreaMoving</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreamPose.html">DreamPose: Fashion Image-to-Video Synthesis via Stable Diffusion</a></li>
-
-
-
-
-
-
-
-
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">3D Generation</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="NeRF.html">NeRF : Representing Scenes as Neural Radiance Fields for View Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="3DGS.html">3D Gaussian Splatting for Real-Time Radiance Field Rendering</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Point_E.html">Point-E: A System for Generating 3D Point Clouds from Complex Prompts (Arxiv 2022)</a></li>
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="Shap-E.html">Shap-E</a></li>
-
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="DreamFusion.html"><strong>DreamFusion</strong></a></li>
-<li class="toctree-l1"><a class="reference internal" href="magic-3d.html">Magic3D</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreamBooth3D.html">Dream Booth 3D</a></li>
-
-
-
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Experiments</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="../experiments/js_exp.html">Synthetic Data with Stable Diffusion for Foliar Disease Classification</a></li>
-<li class="toctree-l1"><a class="reference internal" href="../experiments/swjo_exp.html">Training DreamBooth on Naver Webtoon Face Dataset</a></li>
-</ul>
-
-    </div>
-</nav></div>
-    </div>
-  
-  
-  <div class="sidebar-primary-items__end sidebar-primary__section">
-  </div>
-  
-  <div id="rtd-footer-container"></div>
-
-
-      </div>
-      
-      <main id="main-content" class="bd-main" role="main">
-        
-        
-
-<div class="sbt-scroll-pixel-helper"></div>
-
-          <div class="bd-content">
-            <div class="bd-article-container">
-              
-              <div class="bd-header-article d-print-none">
-<div class="header-article-items header-article__inner">
-  
-    <div class="header-article-items__start">
-      
-        <div class="header-article-item"><label class="sidebar-toggle primary-toggle btn btn-sm" for="__primary" title="Toggle primary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
-  <span class="fa-solid fa-bars"></span>
-</label></div>
-      
-    </div>
-  
-  
-    <div class="header-article-items__end">
-      
-        <div class="header-article-item">
-
-<div class="article-header-buttons">
-
-
-
-
-
-<div class="dropdown dropdown-source-buttons">
-  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Source repositories">
-    <i class="fab fa-github"></i>
-  </button>
-  <ul class="dropdown-menu">
-      
-      
-      
-      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion" target="_blank"
-   class="btn btn-sm btn-source-repository-button dropdown-item"
-   title="Source repository"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fab fa-github"></i>
-  </span>
-<span class="btn__text-container">Repository</span>
-</a>
-</li>
-      
-      
-      
-      
-      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion/issues/new?title=Issue%20on%20page%20%2Fdocs/review/dreambooth.html&body=Your%20issue%20content%20here." target="_blank"
-   class="btn btn-sm btn-source-issues-button dropdown-item"
-   title="Open an issue"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-lightbulb"></i>
-  </span>
-<span class="btn__text-container">Open issue</span>
-</a>
-</li>
-      
-  </ul>
-</div>
-
-
-
-
-
-
-<div class="dropdown dropdown-download-buttons">
-  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Download this page">
-    <i class="fas fa-download"></i>
-  </button>
-  <ul class="dropdown-menu">
-      
-      
-      
-      <li><a href="../../_sources/docs/review/dreambooth.md" target="_blank"
-   class="btn btn-sm btn-download-source-button dropdown-item"
-   title="Download source file"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-file"></i>
-  </span>
-<span class="btn__text-container">.md</span>
-</a>
-</li>
-      
-      
-      
-      
-      <li>
-<button onclick="window.print()"
-  class="btn btn-sm btn-download-pdf-button dropdown-item"
-  title="Print to PDF"
-  data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-file-pdf"></i>
-  </span>
-<span class="btn__text-container">.pdf</span>
-</button>
-</li>
-      
-  </ul>
-</div>
-
-
-
-
-<button onclick="toggleFullScreen()"
-  class="btn btn-sm btn-fullscreen-button"
-  title="Fullscreen mode"
-  data-bs-placement="bottom" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-expand"></i>
-  </span>
-
-</button>
-
-
-
-<script>
-document.write(`
-  <button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light"></i>
-    <i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark"></i>
-    <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"></i>
-  </button>
-`);
-</script>
-
-
-<script>
-document.write(`
-  <button class="btn btn-sm pst-navbar-icon search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <i class="fa-solid fa-magnifying-glass fa-lg"></i>
-  </button>
-`);
-</script>
-<label class="sidebar-toggle secondary-toggle btn btn-sm" for="__secondary"title="Toggle secondary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <span class="fa-solid fa-list"></span>
-</label>
-</div></div>
-      
-    </div>
-  
-</div>
-</div>
-              
-              
-
-<div id="jb-print-docs-body" class="onlyprint">
-    <h1>DreamBooth</h1>
-    <!-- Table of contents -->
-    <div id="print-main-content">
-        <div id="jb-print-toc">
-            
-            <div>
-                <h2> Contents </h2>
-            </div>
-            <nav aria-label="Page">
-                <ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">Introduction</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#text-to-image-diffusion-models">Text-to-Image Diffusion Models</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#fine-tuning">Fine-tuning</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#experiments">Experiments</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#ablation-studies">Ablation Studies</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#applications">Applications</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#limitations">Limitations</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#appendix">Appendix</a></li>
-</ul>
-            </nav>
-        </div>
-    </div>
-</div>
-
-              
-                
-<div id="searchbox"></div>
-                <article class="bd-article">
-                  
-  <div class="admonition-information admonition">
-<p class="admonition-title">Information</p>
-<ul class="simple">
-<li><p><strong>Title:</strong> DreamBooth: Fine Tuning Text-to-Image Diffusion Models for Subject-Driven Generation (CVPR 2023)</p></li>
-<li><p><strong>Reference</strong></p>
-<ul>
-<li><p>Paper: <a class="reference external" href="https://arxiv.org/abs/2208.12242">https://arxiv.org/abs/2208.12242</a></p></li>
-<li><p>Code: <a class="github reference external" href="https://github.com/huggingface/diffusers/tree/main/examples/dreambooth">huggingface/diffusers</a></p></li>
-</ul>
-</li>
-<li><p><strong>Author:</strong> Sangwoo Jo</p></li>
-<li><p><strong>Last updated on May. 31, 2023</strong></p></li>
-</ul>
-</div>
-<section class="tex2jax_ignore mathjax_ignore" id="dreambooth">
-<h1>DreamBooth<a class="headerlink" href="#dreambooth" title="Permalink to this heading">#</a></h1>
-<section id="introduction">
-<h2>Introduction<a class="headerlink" href="#introduction" title="Permalink to this heading">#</a></h2>
-<p>최근에 DALL-E2, Imagen, Stable Diffusion 등 다양한 text-to-image generation 모델들이 등장하였지만, 어떠한 동일한 subject 에 대해서 다른 context 에 적용하는 부분에서 부족한 면들을 보여주고 있습니다. DreamBooth 논문은 이러한 문제점을 개선하기 위해 text-to-image 모델을 fine-tuning 하는 기법으로 소개되었고, 단 3-5장의 이미지를 학습하면 되며 이를 NVIDIA A100 으로 학습하는데 5분 정도밖에 소요되지 않는다고 합니다.</p>
-<figure class="align-default" id="id1">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/dreambooth_01.png"><img alt="dreambooth_01" class="bg-primary mb-1" src="../../_images/dreambooth_01.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 131 </span><span class="caption-text">Subject-Driven Generation</span><a class="headerlink" href="#id1" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>DreamBooth 가 무엇인지 자세히 알아보기 전에 text-to-image diffusion model 에 대해 다시 한번 개념 정리를 해볼 필요가 있습니다.</p>
-</section>
-<section id="text-to-image-diffusion-models">
-<h2>Text-to-Image Diffusion Models<a class="headerlink" href="#text-to-image-diffusion-models" title="Permalink to this heading">#</a></h2>
-<p>사전학습된 text-to-image diffusion model <span class="math notranslate nohighlight">\(\hat{x}_{\theta}\)</span> 는 input 으로 원본 이미지 <span class="math notranslate nohighlight">\(x\)</span>, 그리고 text prompt <span class="math notranslate nohighlight">\(P\)</span> 와 text-encoder <span class="math notranslate nohighlight">\(\Gamma\)</span> 로부터 나오는 conditioning vector <span class="math notranslate nohighlight">\(c = \Gamma(P)\)</span> 를 입력받아서 이미지 <span class="math notranslate nohighlight">\(x_{gen} = \hat{x}_{\theta}(\epsilon, c)\)</span> 를 생성하게 됩니다. 학습 시, mean squared loss 를 사용하고 이를 수식적으로 표현하면 다음과 같습니다.</p>
-<div class="math notranslate nohighlight">
-\[
-\mathbb{E}_{x,c,\epsilon,t}[w_t || \hat{x}_{\theta}(\alpha_tx + \sigma_{t}\epsilon, c) - x ||_{2}^{2}]
-\]</div>
-<p>이때, DreamBooth 에서는 text encoder 를 CLIP text embedding 과 사전학습된 T5-XXL 모델 중 T5-XXL 모델을 사용했다고 합니다. 그리고 DreamBooth 로 fine-tuning 할때, diffusion process 에서 사용되는 U-net (때로는 text encoder 도 포함) 은 learnable 한 parameter 로 설정하고 생성된 latent vector 로부터 새로운 이미지를 생성하는 Decoder 의 파라미터 값은 고정시킨다고 합니다.</p>
-<p>앞써 설명드렸던 내용들을 해당 implementation code 에서 확인할 수 있습니다.</p>
-<ul>
-<li><p><strong>code</strong></p>
-<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="c1"># https://github.com/huggingface/diffusers/blob/main/examples/dreambooth/train_dreambooth.py</span>
-<span class="n">text_encoder_cls</span> <span class="o">=</span> <span class="n">import_model_class_from_model_name_or_path</span><span class="p">(</span><span class="n">args</span><span class="o">.</span><span class="n">pretrained_model_name_or_path</span><span class="p">,</span> <span class="n">args</span><span class="o">.</span><span class="n">revision</span><span class="p">)</span>
-
-<span class="c1"># Load scheduler and models</span>
-<span class="n">noise_scheduler</span> <span class="o">=</span> <span class="n">DDPMScheduler</span><span class="o">.</span><span class="n">from_pretrained</span><span class="p">(</span><span class="n">args</span><span class="o">.</span><span class="n">pretrained_model_name_or_path</span><span class="p">,</span> <span class="n">subfolder</span><span class="o">=</span><span class="s2">&quot;scheduler&quot;</span><span class="p">)</span>
-<span class="n">text_encoder</span> <span class="o">=</span> <span class="n">text_encoder_cls</span><span class="o">.</span><span class="n">from_pretrained</span><span class="p">(</span>
-    <span class="n">args</span><span class="o">.</span><span class="n">pretrained_model_name_or_path</span><span class="p">,</span> <span class="n">subfolder</span><span class="o">=</span><span class="s2">&quot;text_encoder&quot;</span><span class="p">,</span> <span class="n">revision</span><span class="o">=</span><span class="n">args</span><span class="o">.</span><span class="n">revision</span>
-<span class="p">)</span>
-<span class="n">vae</span> <span class="o">=</span> <span class="n">AutoencoderKL</span><span class="o">.</span><span class="n">from_pretrained</span><span class="p">(</span><span class="n">args</span><span class="o">.</span><span class="n">pretrained_model_name_or_path</span><span class="p">,</span> <span class="n">subfolder</span><span class="o">=</span><span class="s2">&quot;vae&quot;</span><span class="p">,</span> <span class="n">revision</span><span class="o">=</span><span class="n">args</span><span class="o">.</span><span class="n">revision</span><span class="p">)</span>
-<span class="n">unet</span> <span class="o">=</span> <span class="n">UNet2DConditionModel</span><span class="o">.</span><span class="n">from_pretrained</span><span class="p">(</span>
-    <span class="n">args</span><span class="o">.</span><span class="n">pretrained_model_name_or_path</span><span class="p">,</span> <span class="n">subfolder</span><span class="o">=</span><span class="s2">&quot;unet&quot;</span><span class="p">,</span> <span class="n">revision</span><span class="o">=</span><span class="n">args</span><span class="o">.</span><span class="n">revision</span>
-<span class="p">)</span>
-</pre></div>
-</div>
-</li>
-<li><p><strong>training code</strong></p>
-<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="c1"># https://github.com/huggingface/diffusers/blob/main/examples/dreambooth/train_dreambooth.py</span>
-<span class="k">for</span> <span class="n">epoch</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">first_epoch</span><span class="p">,</span> <span class="n">args</span><span class="o">.</span><span class="n">num_train_epochs</span><span class="p">):</span>
-        <span class="n">unet</span><span class="o">.</span><span class="n">train</span><span class="p">()</span>
-        <span class="k">if</span> <span class="n">args</span><span class="o">.</span><span class="n">train_text_encoder</span><span class="p">:</span>
-            <span class="n">text_encoder</span><span class="o">.</span><span class="n">train</span><span class="p">()</span>
-        <span class="k">for</span> <span class="n">step</span><span class="p">,</span> <span class="n">batch</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">train_dataloader</span><span class="p">):</span>
-            <span class="c1"># Skip steps until we reach the resumed step</span>
-            <span class="k">if</span> <span class="n">args</span><span class="o">.</span><span class="n">resume_from_checkpoint</span> <span class="ow">and</span> <span class="n">epoch</span> <span class="o">==</span> <span class="n">first_epoch</span> <span class="ow">and</span> <span class="n">step</span> <span class="o">&lt;</span> <span class="n">resume_step</span><span class="p">:</span>
-                <span class="k">if</span> <span class="n">step</span> <span class="o">%</span> <span class="n">args</span><span class="o">.</span><span class="n">gradient_accumulation_steps</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span>
-                    <span class="n">progress_bar</span><span class="o">.</span><span class="n">update</span><span class="p">(</span><span class="mi">1</span><span class="p">)</span>
-                <span class="k">continue</span>
-
-            <span class="k">with</span> <span class="n">accelerator</span><span class="o">.</span><span class="n">accumulate</span><span class="p">(</span><span class="n">unet</span><span class="p">):</span>
-                <span class="c1"># Convert images to latent space</span>
-                <span class="n">latents</span> <span class="o">=</span> <span class="n">vae</span><span class="o">.</span><span class="n">encode</span><span class="p">(</span><span class="n">batch</span><span class="p">[</span><span class="s2">&quot;pixel_values&quot;</span><span class="p">]</span><span class="o">.</span><span class="n">to</span><span class="p">(</span><span class="n">dtype</span><span class="o">=</span><span class="n">weight_dtype</span><span class="p">))</span><span class="o">.</span><span class="n">latent_dist</span><span class="o">.</span><span class="n">sample</span><span class="p">()</span>
-                <span class="n">latents</span> <span class="o">=</span> <span class="n">latents</span> <span class="o">*</span> <span class="n">vae</span><span class="o">.</span><span class="n">config</span><span class="o">.</span><span class="n">scaling_factor</span>
-
-                <span class="c1"># Sample noise that we&#39;ll add to the latents</span>
-                <span class="k">if</span> <span class="n">args</span><span class="o">.</span><span class="n">offset_noise</span><span class="p">:</span>
-                    <span class="n">noise</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">randn_like</span><span class="p">(</span><span class="n">latents</span><span class="p">)</span> <span class="o">+</span> <span class="mf">0.1</span> <span class="o">*</span> <span class="n">torch</span><span class="o">.</span><span class="n">randn</span><span class="p">(</span>
-                        <span class="n">latents</span><span class="o">.</span><span class="n">shape</span><span class="p">[</span><span class="mi">0</span><span class="p">],</span> <span class="n">latents</span><span class="o">.</span><span class="n">shape</span><span class="p">[</span><span class="mi">1</span><span class="p">],</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="n">device</span><span class="o">=</span><span class="n">latents</span><span class="o">.</span><span class="n">device</span>
-                    <span class="p">)</span>
-                <span class="k">else</span><span class="p">:</span>
-                    <span class="n">noise</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">randn_like</span><span class="p">(</span><span class="n">latents</span><span class="p">)</span>
-                <span class="n">bsz</span> <span class="o">=</span> <span class="n">latents</span><span class="o">.</span><span class="n">shape</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span>
-                <span class="c1"># Sample a random timestep for each image</span>
-                <span class="n">timesteps</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">randint</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="n">noise_scheduler</span><span class="o">.</span><span class="n">config</span><span class="o">.</span><span class="n">num_train_timesteps</span><span class="p">,</span> <span class="p">(</span><span class="n">bsz</span><span class="p">,),</span> <span class="n">device</span><span class="o">=</span><span class="n">latents</span><span class="o">.</span><span class="n">device</span><span class="p">)</span>
-                <span class="n">timesteps</span> <span class="o">=</span> <span class="n">timesteps</span><span class="o">.</span><span class="n">long</span><span class="p">()</span>
-
-                <span class="c1"># Add noise to the latents according to the noise magnitude at each timestep</span>
-                <span class="c1"># (this is the forward diffusion process)</span>
-                <span class="n">noisy_latents</span> <span class="o">=</span> <span class="n">noise_scheduler</span><span class="o">.</span><span class="n">add_noise</span><span class="p">(</span><span class="n">latents</span><span class="p">,</span> <span class="n">noise</span><span class="p">,</span> <span class="n">timesteps</span><span class="p">)</span>
-
-                <span class="c1"># Get the text embedding for conditioning</span>
-                <span class="n">encoder_hidden_states</span> <span class="o">=</span> <span class="n">text_encoder</span><span class="p">(</span><span class="n">batch</span><span class="p">[</span><span class="s2">&quot;input_ids&quot;</span><span class="p">])[</span><span class="mi">0</span><span class="p">]</span>
-
-                <span class="c1"># Predict the noise residual</span>
-                <span class="n">model_pred</span> <span class="o">=</span> <span class="n">unet</span><span class="p">(</span><span class="n">noisy_latents</span><span class="p">,</span> <span class="n">timesteps</span><span class="p">,</span> <span class="n">encoder_hidden_states</span><span class="p">)</span><span class="o">.</span><span class="n">sample</span>
-
-                <span class="c1"># Get the target for loss depending on the prediction type</span>
-                <span class="k">if</span> <span class="n">noise_scheduler</span><span class="o">.</span><span class="n">config</span><span class="o">.</span><span class="n">prediction_type</span> <span class="o">==</span> <span class="s2">&quot;epsilon&quot;</span><span class="p">:</span>
-                    <span class="n">target</span> <span class="o">=</span> <span class="n">noise</span>
-                <span class="k">elif</span> <span class="n">noise_scheduler</span><span class="o">.</span><span class="n">config</span><span class="o">.</span><span class="n">prediction_type</span> <span class="o">==</span> <span class="s2">&quot;v_prediction&quot;</span><span class="p">:</span>
-                    <span class="n">target</span> <span class="o">=</span> <span class="n">noise_scheduler</span><span class="o">.</span><span class="n">get_velocity</span><span class="p">(</span><span class="n">latents</span><span class="p">,</span> <span class="n">noise</span><span class="p">,</span> <span class="n">timesteps</span><span class="p">)</span>
-                <span class="k">else</span><span class="p">:</span>
-                    <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;Unknown prediction type </span><span class="si">{</span><span class="n">noise_scheduler</span><span class="o">.</span><span class="n">config</span><span class="o">.</span><span class="n">prediction_type</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">)</span>
-
-                <span class="k">if</span> <span class="n">args</span><span class="o">.</span><span class="n">with_prior_preservation</span><span class="p">:</span>
-                    <span class="c1"># Chunk the noise and model_pred into two parts and compute the loss on each part separately.</span>
-                    <span class="n">model_pred</span><span class="p">,</span> <span class="n">model_pred_prior</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">chunk</span><span class="p">(</span><span class="n">model_pred</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="n">dim</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
-                    <span class="n">target</span><span class="p">,</span> <span class="n">target_prior</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">chunk</span><span class="p">(</span><span class="n">target</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="n">dim</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
-
-                    <span class="c1"># Compute instance loss</span>
-                    <span class="n">loss</span> <span class="o">=</span> <span class="n">F</span><span class="o">.</span><span class="n">mse_loss</span><span class="p">(</span><span class="n">model_pred</span><span class="o">.</span><span class="n">float</span><span class="p">(),</span> <span class="n">target</span><span class="o">.</span><span class="n">float</span><span class="p">(),</span> <span class="n">reduction</span><span class="o">=</span><span class="s2">&quot;mean&quot;</span><span class="p">)</span>
-
-                    <span class="c1"># Compute prior loss</span>
-                    <span class="n">prior_loss</span> <span class="o">=</span> <span class="n">F</span><span class="o">.</span><span class="n">mse_loss</span><span class="p">(</span><span class="n">model_pred_prior</span><span class="o">.</span><span class="n">float</span><span class="p">(),</span> <span class="n">target_prior</span><span class="o">.</span><span class="n">float</span><span class="p">(),</span> <span class="n">reduction</span><span class="o">=</span><span class="s2">&quot;mean&quot;</span><span class="p">)</span>
-
-                    <span class="c1"># Add the prior loss to the instance loss.</span>
-                    <span class="n">loss</span> <span class="o">=</span> <span class="n">loss</span> <span class="o">+</span> <span class="n">args</span><span class="o">.</span><span class="n">prior_loss_weight</span> <span class="o">*</span> <span class="n">prior_loss</span>
-                <span class="k">else</span><span class="p">:</span>
-                    <span class="n">loss</span> <span class="o">=</span> <span class="n">F</span><span class="o">.</span><span class="n">mse_loss</span><span class="p">(</span><span class="n">model_pred</span><span class="o">.</span><span class="n">float</span><span class="p">(),</span> <span class="n">target</span><span class="o">.</span><span class="n">float</span><span class="p">(),</span> <span class="n">reduction</span><span class="o">=</span><span class="s2">&quot;mean&quot;</span><span class="p">)</span>
-
-                <span class="n">accelerator</span><span class="o">.</span><span class="n">backward</span><span class="p">(</span><span class="n">loss</span><span class="p">)</span>
-                <span class="k">if</span> <span class="n">accelerator</span><span class="o">.</span><span class="n">sync_gradients</span><span class="p">:</span>
-                    <span class="n">params_to_clip</span> <span class="o">=</span> <span class="p">(</span>
-                        <span class="n">itertools</span><span class="o">.</span><span class="n">chain</span><span class="p">(</span><span class="n">unet</span><span class="o">.</span><span class="n">parameters</span><span class="p">(),</span> <span class="n">text_encoder</span><span class="o">.</span><span class="n">parameters</span><span class="p">())</span>
-                        <span class="k">if</span> <span class="n">args</span><span class="o">.</span><span class="n">train_text_encoder</span>
-                        <span class="k">else</span> <span class="n">unet</span><span class="o">.</span><span class="n">parameters</span><span class="p">()</span>
-                    <span class="p">)</span>
-                    <span class="n">accelerator</span><span class="o">.</span><span class="n">clip_grad_norm_</span><span class="p">(</span><span class="n">params_to_clip</span><span class="p">,</span> <span class="n">args</span><span class="o">.</span><span class="n">max_grad_norm</span><span class="p">)</span>
-                <span class="n">optimizer</span><span class="o">.</span><span class="n">step</span><span class="p">()</span>
-                <span class="n">lr_scheduler</span><span class="o">.</span><span class="n">step</span><span class="p">()</span>
-                <span class="n">optimizer</span><span class="o">.</span><span class="n">zero_grad</span><span class="p">(</span><span class="n">set_to_none</span><span class="o">=</span><span class="n">args</span><span class="o">.</span><span class="n">set_grads_to_none</span><span class="p">)</span>
-</pre></div>
-</div>
-</li>
-</ul>
-</section>
-<section id="fine-tuning">
-<h2>Fine-tuning<a class="headerlink" href="#fine-tuning" title="Permalink to this heading">#</a></h2>
-<p>DreamBooth 에서 pre-trained 된 text-to-image generation 모델을 fine-tuning 할 때 <em>“a [unique identifier] [class noun]”</em> 그리고 <em>“a [class noun]”</em>  형태의 두 가지 text prompt 를 사용합니다. 이때, <em>unique identifier</em> 에 유지하고자 하는 대상에 대한 정보를 담는 것을 목표로 하기 때문에 사전 정보가 없는 rare token 을 사용하는 것이 중요하다고 합니다. 논문에서는 3개 이하의 Unicode character 혹은 T5-XXL tokenizer 를 랜덤하게 샘플링해서 token 을 생성하고 이를 기반으로 <em>unique identifier</em> 를 정의합니다.</p>
-<p>또한, 논문에서 <em>Language Drift</em> 그리고 <em>Reduced Output Diversity</em> 두 가지 문제점을 해결하기 위해 Class-specific Prior Preservation Loss 를 소개합니다. 이를 활용하여 모델을 fine-tuning 하는 방법은 다음과 같습니다.</p>
-<figure class="align-default" id="id2">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/dreambooth_02.png"><img alt="dreambooth_02" class="bg-primary mb-1" src="../../_images/dreambooth_02.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 132 </span><span class="caption-text">Fine-tuning</span><a class="headerlink" href="#id2" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>우선, Gaussian 노이즈 이미지와 <em>“A V [class noun]”</em> 형태의 text prompt 를 사전학습된 text-to-image diffusion 모델에 입력하여 이미지를 생성한 후, 원본 이미지와의 <em>Reconstruction Loss</em> 를 계산합니다. 그리고 비슷한 과정으로 Gaussian 노이즈 이미지와 <em>“A [class noun]”</em> 형태의 text prompt 를 학습하고자 하는 모델, 그리고 freeze 시킨 또 다른 pre-trained diffusion 모델에 각각 입력하여 이미지를 생성한 후 <em>Class-Specific Prior Preservation Loss</em> 를 계산합니다. 이에 대한 training objective 를 수식적으로 표현하면 다음과 같습니다.</p>
-<div class="math notranslate nohighlight">
-\[
-\mathbb{E}_{x,c,\epsilon,\epsilon^{'},t}[w_t || \hat{x}_{\theta}(\alpha_tx + \sigma_t\epsilon, c) - x ||_{2}^{2} + \lambda w_{t^{'}} || \hat{x}_{\theta}(\alpha_{t^{'}} x_{pr} + \sigma_{t^{'}}\epsilon^{'}, c_{pr}) - x_{pr} ||_{2}^{2}]
-\]</div>
-<p><em>Class-Specific Prior Preservation Loss</em> 를 추가함으로써 class prior 에 대한 정보를 유지하게 되고, 이로써 동일한 class 에 대해 더 다양한 이미지들을 생성할 수 있는 부분을 아래 그림에서 확인할 수 있습니다.</p>
-<figure class="align-default" id="id3">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/dreambooth_03.png"><img alt="dreambooth_03" class="bg-primary mb-1" src="../../_images/dreambooth_03.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 133 </span><span class="caption-text">Encouraging diversity with prior-preservation loss</span><a class="headerlink" href="#id3" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</section>
-<section id="experiments">
-<h2>Experiments<a class="headerlink" href="#experiments" title="Permalink to this heading">#</a></h2>
-<p>DreamBooth 논문에서 세 가지의 모델 평가 metric 을 소개합니다. 첫번째로는 <em>subject fidelity</em> 를 측정하는 CLIP-I, DINO 그리고 <em>prompt fidelity</em> 를 측정하는 CLIP-T metric 을 사용합니다. 이때, DINO metric 이 동일한 class 를 가진 subject 에 대해서 다른 embedding 이 생성되기 때문에 CLIP-I 보다 더 선호된다고 합니다. 더 자세하게는 각 metric 은 다음과 같이 계산됩니다.</p>
-<ul class="simple">
-<li><p>CLIP-I := 생성된 이미지와 실제 이미지의 CLIP embedding 의 평균 pairwise cosine similarity</p></li>
-<li><p>DINO := 생성된 이미지와 실제 이미지의 ViT-S/16 DINO embedding 의 평균 pairwise cosine similarity</p></li>
-<li><p>CLIP-T := 입력 prompt 와 생성된 이미지의 CLIP embedding 의 평균 pairwise cosine similarity</p></li>
-</ul>
-<p>Textual Inversion 과 비교했을때, 세 개의 metric 에서 모두 DreamBooth 가 더 좋은 성능을 보여주는 것을 확인할 수 있습니다.</p>
-<figure class="align-default" id="id4">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/dreambooth_04.png"><img alt="dreambooth_04" class="bg-primary mb-1" src="../../_images/dreambooth_04.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 134 </span><span class="caption-text">Comparison of models</span><a class="headerlink" href="#id4" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</section>
-<section id="ablation-studies">
-<h2>Ablation Studies<a class="headerlink" href="#ablation-studies" title="Permalink to this heading">#</a></h2>
-<p>Prior Preservation Loss (PPL) 과 Class-Prior 에 대한 Ablation Studies 결과도 논문에서 공유합니다. PPL 가 적용됨으로써 앞써 소개드렸던 Language Drift 그리고 Reduced Output Diversity 문제점을 PRES 그리고 DIV metric 을 통해 해결되는 것을 보여줍니다. 또한, Class-Prior Ablation 에서 다음과 같은 세 가지 prompt 를 사용하여 fine-tuning 했을 때, 해당 subject 에 맞는 <em>class noun</em> 을 prompt 에 입력했을때가 가장 좋은 성능을 보여준다고 설명합니다.</p>
-<ul class="simple">
-<li><p>“no class noun”</p></li>
-<li><p>“a randomly sampled incorrect class noun” (e.g., “can” for a backpack)</p></li>
-<li><p>“correct class noun”</p></li>
-</ul>
-</section>
-<section id="applications">
-<h2>Applications<a class="headerlink" href="#applications" title="Permalink to this heading">#</a></h2>
-<p>논문에서 DreamBooth 를 활용한 여러 application 도 소개합니다.</p>
-<figure class="align-default" id="id5">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/dreambooth_05.png"><img alt="dreambooth_05" class="bg-primary mb-1" src="../../_images/dreambooth_05.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 135 </span><span class="caption-text">Applications of DreamBooth</span><a class="headerlink" href="#id5" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ol class="arabic simple">
-<li><p>Recontextualization</p></li>
-</ol>
-<ul class="simple">
-<li><p>Prompt: “a [V] [class noun] [context description]”</p></li>
-<li><p>다음과 같은 prompt 입력 시, 사전에 보지 못했던 새로운 pose 나 articulation 을 잘 표현하는 부분을 확인할 수 있습니다.</p></li>
-</ul>
-<figure class="align-default" id="id6">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/dreambooth_06.png"><img alt="dreambooth_06" class="bg-primary mb-1" src="../../_images/dreambooth_06.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 136 </span><span class="caption-text">Recontextualization</span><a class="headerlink" href="#id6" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ol class="arabic simple" start="2">
-<li><p>Art Renditions</p></li>
-</ol>
-<ul class="simple">
-<li><p>Prompt: “a painting of a [V] [class noun] in the style of [famous painter]” or “a statue of a [V] [class noun] in the style of [famous sculptor]”</p></li>
-<li><p>Style Transfer 와 다르게 동일한 구조를 유지한 채 style 만 바꾸는 것이 아니라 다양한 pose 형태도 생성 가능합니다.</p></li>
-</ul>
-<ol class="arabic simple" start="3">
-<li><p>Novel View Synthesis</p></li>
-</ol>
-<ul class="simple">
-<li><p>동일한 subject 에 대해 다양한 각도에서 보는 이미지 생성도 가능합니다.</p></li>
-</ul>
-<ol class="arabic simple" start="4">
-<li><p>Property Modification</p></li>
-</ol>
-<ul class="simple">
-<li><p>Prompt: “a cross of a [V] dog and a [target species]”</p></li>
-<li><p>사전 학습한 subject 의 고유 feature 들이 다른 target species 에서도 반영이 되는 부분을 확인할 수 있습니다.</p></li>
-</ul>
-</section>
-<section id="limitations">
-<h2>Limitations<a class="headerlink" href="#limitations" title="Permalink to this heading">#</a></h2>
-<p>하지만 DreamBooth 모델에 다음과 같은 한계점도 존재합니다.</p>
-<figure class="align-default" id="id7">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/dreambooth_07.png"><img alt="dreambooth_07" class="bg-primary mb-1" src="../../_images/dreambooth_07.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 137 </span><span class="caption-text">Limitations of DreamBooth</span><a class="headerlink" href="#id7" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p>Incorrect context synthesis := 대표적으로 training set 에 자주 나타나지 않는 subject, prompt, context 에 대해서 낮은 성능을 보여줍니다.</p></li>
-<li><p>Context-appearance entanglement := 유지하고자 하는 대상의 appearance (e.g, color) 가 prompted context 에 의해 달라지는 현상</p></li>
-<li><p>Overfitting := 사전학습된 데이터와 유사한 prompt 입력 시, overfitting 현상 발생</p></li>
-</ul>
-<p>마지막으로 subject 대상에 따라 모델 성능(fidelity)이 차이를 보인다고 합니다.</p>
-</section>
-<section id="appendix">
-<h2>Appendix<a class="headerlink" href="#appendix" title="Permalink to this heading">#</a></h2>
-<p>마지막으로, 논문 본문에 소개되고 있지는 않지만 Appendix 부문에서도 흥미로운 결과들을 확인할 수 있습니다. Figure 20 은 fine tuning 하는 이미지 개수에 따른 DreamBooth 학습결과를 보여주는데, 단 한 장만으로도 identity 의 전반적인 특징을 잘 담는 것을 확인할 수 있습니다. Figure 18 은 만화 캐릭터의 identity 를 유지한 상태로 다양한 만화 사진들을 모델이 생성하는 사례들을 보여줍니다.</p>
-<figure class="align-default" id="id8">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/dreambooth_08.png"><img alt="dreambooth_08" class="bg-primary mb-1" src="../../_images/dreambooth_08.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 138 </span><span class="caption-text">Appendix-1</span><a class="headerlink" href="#id8" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<figure class="align-default" id="id9">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/dreambooth_09.png"><img alt="dreambooth_09" class="bg-primary mb-1" src="../../_images/dreambooth_09.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 139 </span><span class="caption-text">Appendix-2</span><a class="headerlink" href="#id9" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</section>
-</section>
-
-    <script type="text/x-thebe-config">
-    {
-        requestKernel: true,
-        binderOptions: {
-            repo: "binder-examples/jupyter-stacks-datascience",
-            ref: "master",
-        },
-        codeMirrorConfig: {
-            theme: "abcdef",
-            mode: "python"
-        },
-        kernelOptions: {
-            name: "python3",
-            path: "./docs/review"
-        },
-        predefinedOutput: true
-    }
-    </script>
-    <script>kernelName = 'python3'</script>
-
-                </article>
-              
-
-              
-              
-              
-              
-                <footer class="prev-next-footer d-print-none">
-                  
-<div class="prev-next-area">
-    <a class="left-prev"
-       href="DALLE2.html"
-       title="previous page">
-      <i class="fa-solid fa-angle-left"></i>
-      <div class="prev-next-info">
-        <p class="prev-next-subtitle">previous</p>
-        <p class="prev-next-title">DALL-E 2</p>
-      </div>
-    </a>
-    <a class="right-next"
-       href="ControlNet.html"
-       title="next page">
-      <div class="prev-next-info">
-        <p class="prev-next-subtitle">next</p>
-        <p class="prev-next-title">ControlNet</p>
-      </div>
-      <i class="fa-solid fa-angle-right"></i>
-    </a>
-</div>
-                </footer>
-              
-            </div>
-            
-            
-              
-                <div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
-
-
-  <div class="sidebar-secondary-item">
-  <div class="page-toc tocsection onthispage">
-    <i class="fa-solid fa-list"></i> Contents
-  </div>
-  <nav class="bd-toc-nav page-toc">
-    <ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">Introduction</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#text-to-image-diffusion-models">Text-to-Image Diffusion Models</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#fine-tuning">Fine-tuning</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#experiments">Experiments</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#ablation-studies">Ablation Studies</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#applications">Applications</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#limitations">Limitations</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#appendix">Appendix</a></li>
-</ul>
-  </nav></div>
-
-</div></div>
-              
-            
-          </div>
-          <footer class="bd-footer-content">
-            
-<div class="bd-footer-content__inner container">
-  
-  <div class="footer-item">
-    
-<p class="component-author">
-By PseudoLab
-</p>
-
-  </div>
-  
-  <div class="footer-item">
-    
-
-  <p class="copyright">
-    
-      © Copyright 2022.
-      <br/>
-    
-  </p>
-
-  </div>
-  
-  <div class="footer-item">
-    
-  </div>
-  
-  <div class="footer-item">
-    
-  </div>
-  
-</div>
-          </footer>
-        
-
-      </main>
-    </div>
-  </div>
-  
-  <!-- Scripts loaded after <body> so the DOM is not blocked -->
-  <script src="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b"></script>
-<script src="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b"></script>
-
-  <footer class="bd-footer">
-  </footer>
-  </body>
+
+
+<!DOCTYPE html>
+
+
+<html lang="en" data-content_root="" >
+
+  <head>
+    <meta charset="utf-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.17.1: http://docutils.sourceforge.net/" />
+
+    <title>DreamBooth &#8212; Text-to-Image Generation-feat-Diffusion</title>
+  
+  
+  
+  <script data-cfasync="false">
+    document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
+    document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
+  </script>
+  
+  <!-- Loaded before other Sphinx assets -->
+  <link href="../../_static/styles/theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+<link href="../../_static/styles/bootstrap.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+<link href="../../_static/styles/pydata-sphinx-theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+
+  
+  <link href="../../_static/vendor/fontawesome/6.5.2/css/all.min.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+  <link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.woff2" />
+<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.woff2" />
+<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.woff2" />
+
+    <link rel="stylesheet" type="text/css" href="../../_static/pygments.css" />
+    <link rel="stylesheet" href="../../_static/styles/sphinx-book-theme.css?digest=14f4ca6b54d191a8c7657f6c759bf11a5fb86285" type="text/css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/togglebutton.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/mystnb.4510f1fc1dee50b3e5859aac5469c37c29e427902b24a333a5f9fcb2f0b3ac41.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/sphinx-thebe.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/design-style.4045f2051d55cab465a707391d5b2007.min.css" />
+  
+  <!-- Pre-loaded scripts that we'll load fully later -->
+  <link rel="preload" as="script" href="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b" />
+<link rel="preload" as="script" href="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b" />
+  <script src="../../_static/vendor/fontawesome/6.5.2/js/all.min.js?digest=dfe6caa3a7d634c4db9b"></script>
+
+    <script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
+    <script src="../../_static/jquery.js"></script>
+    <script src="../../_static/underscore.js"></script>
+    <script src="../../_static/_sphinx_javascript_frameworks_compat.js"></script>
+    <script src="../../_static/doctools.js"></script>
+    <script src="../../_static/clipboard.min.js"></script>
+    <script src="../../_static/copybutton.js"></script>
+    <script src="../../_static/scripts/sphinx-book-theme.js?digest=5a5c038af52cf7bc1a1ec88eea08e6366ee68824"></script>
+    <script>let toggleHintShow = 'Click to show';</script>
+    <script>let toggleHintHide = 'Click to hide';</script>
+    <script>let toggleOpenOnPrint = 'true';</script>
+    <script src="../../_static/togglebutton.js"></script>
+    <script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
+    <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
+    <script src="../../_static/design-tabs.js"></script>
+    <script>const THEBE_JS_URL = "https://unpkg.com/thebe@0.8.2/lib/index.js"
+const thebe_selector = ".thebe,.cell"
+const thebe_selector_input = "pre"
+const thebe_selector_output = ".output, .cell_output"
+</script>
+    <script async="async" src="../../_static/sphinx-thebe.js"></script>
+    <script>window.MathJax = {"options": {"processHtmlClass": "tex2jax_process|mathjax_process|math|output_area"}}</script>
+    <script defer="defer" src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
+    <script>DOCUMENTATION_OPTIONS.pagename = 'docs/review/dreambooth';</script>
+    <link rel="shortcut icon" href="../../_static/PseudoLab_logo.png"/>
+    <link rel="index" title="Index" href="../../genindex.html" />
+    <link rel="search" title="Search" href="../../search.html" />
+    <link rel="next" title="ControlNet" href="ControlNet.html" />
+    <link rel="prev" title="DALL-E 2" href="DALLE2.html" />
+  <meta name="viewport" content="width=device-width, initial-scale=1"/>
+  <meta name="docsearch:language" content="en"/>
+  </head>
+  
+  
+  <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
+
+  
+  
+  <div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
+  
+  <div id="pst-scroll-pixel-helper"></div>
+  
+  <button type="button" class="btn rounded-pill" id="pst-back-to-top">
+    <i class="fa-solid fa-arrow-up"></i>Back to top</button>
+
+  
+  <input type="checkbox"
+          class="sidebar-toggle"
+          id="pst-primary-sidebar-checkbox"/>
+  <label class="overlay overlay-primary" for="pst-primary-sidebar-checkbox"></label>
+  
+  <input type="checkbox"
+          class="sidebar-toggle"
+          id="pst-secondary-sidebar-checkbox"/>
+  <label class="overlay overlay-secondary" for="pst-secondary-sidebar-checkbox"></label>
+  
+  <div class="search-button__wrapper">
+    <div class="search-button__overlay"></div>
+    <div class="search-button__search-container">
+<form class="bd-search d-flex align-items-center"
+      action="../../search.html"
+      method="get">
+  <i class="fa-solid fa-magnifying-glass"></i>
+  <input type="search"
+         class="form-control"
+         name="q"
+         id="search-input"
+         placeholder="Search this book..."
+         aria-label="Search this book..."
+         autocomplete="off"
+         autocorrect="off"
+         autocapitalize="off"
+         spellcheck="false"/>
+  <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
+</form></div>
+  </div>
+
+  <div class="pst-async-banner-revealer d-none">
+  <aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
+</div>
+
+  
+    <header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
+    </header>
+  
+
+  <div class="bd-container">
+    <div class="bd-container__inner bd-page-width">
+      
+      
+      
+      <div class="bd-sidebar-primary bd-sidebar">
+        
+
+  
+  <div class="sidebar-header-items sidebar-primary__section">
+    
+    
+    
+    
+  </div>
+  
+    <div class="sidebar-primary-items__start sidebar-primary__section">
+        <div class="sidebar-primary-item">
+
+  
+    
+  
+
+<a class="navbar-brand logo" href="../../intro.html">
+  
+  
+  
+  
+  
+    
+    
+      
+    
+    
+    <img src="../../_static/PseudoLab_logo.png" class="logo__image only-light" alt="Text-to-Image Generation-feat-Diffusion - Home"/>
+    <script>document.write(`<img src="../../_static/PseudoLab_logo.png" class="logo__image only-dark" alt="Text-to-Image Generation-feat-Diffusion - Home"/>`);</script>
+  
+  
+</a></div>
+        <div class="sidebar-primary-item"><nav class="bd-links" id="bd-docs-nav" aria-label="Main">
+    <div class="bd-toc-item navbar-nav active">
+        
+        <ul class="nav bd-sidenav bd-sidenav__home-link">
+            <li class="toctree-l1">
+                <a class="reference internal" href="../../intro.html">
+                    Welcome to PseudoDiffusers!!
+                </a>
+            </li>
+        </ul>
+        <p aria-level="2" class="caption" role="heading"><span class="caption-text">Preliminary Works</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="vae.html">VAE</a></li>
+<li class="toctree-l1"><a class="reference internal" href="gan.html">GAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DDPM.html">DDPM</a></li>
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DDIM.html">DDIM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="A_Study_on_the_Evaluation_of_Generative_Models.html">A Study on the Evaluation of Generative Models</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Image Generation</span></p>
+<ul class="current nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="cycleGAN.html">CycleGAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="StyleGAN.html">StyleGAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="diffusion_beats_GANs.html">Diffusion Models Beat GANs on Image Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="dalle.html">DALL-E</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DALLE2.html">DALL-E 2</a></li>
+<li class="toctree-l1 current active"><a class="current reference internal" href="#">DreamBooth</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ControlNet.html">ControlNet</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Latent_Diffusion_Model.html">Introduction</a></li>
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Textual_Inversion.html">Textual Inversion</a></li>
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="CustomDiffusion.html">Custom Diffusion</a></li>
+<li class="toctree-l1"><a class="reference internal" href="LoRA.html">LoRA</a></li>
+
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="I-DDPM.html">I-DDPM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="StyO.html">StyO</a></li>
+<li class="toctree-l1"><a class="reference internal" href="imagen.html">Imagen</a></li>
+<li class="toctree-l1"><a class="reference internal" href="imagen_editor.html">Imagen Editor</a></li>
+<li class="toctree-l1"><a class="reference internal" href="SDEdit.html">SDEdit</a></li>
+<li class="toctree-l1"><a class="reference internal" href="SDXL.html">SDXL</a></li>
+<li class="toctree-l1"><a class="reference internal" href="t2i_adapter.html">T2I-Adapter</a></li>
+<li class="toctree-l1"><a class="reference internal" href="IP_Adapter.html">IP-Adapter</a></li>
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="HyperDreamBooth.html">HyperDreamBooth</a></li>
+<li class="toctree-l1"><a class="reference internal" href="CM3leon.html">CM3leon</a></li>
+
+<li class="toctree-l1"><a class="reference internal" href="Synthetic_Data_from_Diffusion_Models_Improves_ImageNet_Classification.html">Synthetic Data from Diffusion Models Improves ImageNet Classification</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="GLIDE.html">GLIDE</a></li>
+<li class="toctree-l1"><a class="reference internal" href="BBDM.html">BBDM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Your_Diffusion_Model_is_Secretly_a_Zero_Shot_Classifier.html">Your Diffusion Model is Secretly a Zero-Shot Classifier</a></li>
+<li class="toctree-l1"><a class="reference internal" href="progressive_distillation.html">Progressive Distillation for Fast Sampling of Diffusion Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ConceptLab.html">ConceptLab</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Diffusion_models_already_have_a_Semantic_Latent_Space.html">Diffusion Models already have a Semantic Latent Space</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Muse.html">Muse</a></li>
+
+
+<li class="toctree-l1"><a class="reference internal" href="GIGAGAN.html">Scaling up GANs for Text-to-Image Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="consistency_models.html">Consistency Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="latent_consistency_models.html">Latent Consistency Models</a></li>
+
+<li class="toctree-l1"><a class="reference internal" href="LLM_grounded_Diffusion.html">LLM Grounded Diffusion</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DiT.html">DiT</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="one-step-image-translation.html">One-Step Image Translation with Text-to-Image Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="LCM-LoRA.html">LCM-LoRA: A Universal Stable-Diffusion Acceleration Module</a></li>
+
+
+<li class="toctree-l1"><a class="reference internal" href="MimicBrush.html">MimicBrush: Zero-shot Image Editing with Reference Imitation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="one_step_diffusion_with_distribution_matching_distillation.html">One-step Diffusion with Distribution Matching Distillation</a></li>
+
+
+
+
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Video Generation</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="Make_A_Video.html">Make A Video</a></li>
+<li class="toctree-l1"><a class="reference internal" href="VideoLDM.html">VideoLDM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="AnimateDiff.html">AnimateDiff</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Animate_Anyone.html">Animate Anyone</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreaMoving.html">DreaMoving</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreamPose.html">DreamPose: Fashion Image-to-Video Synthesis via Stable Diffusion</a></li>
+
+
+
+
+
+
+
+
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">3D Generation</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="NeRF.html">NeRF : Representing Scenes as Neural Radiance Fields for View Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="3DGS.html">3D Gaussian Splatting for Real-Time Radiance Field Rendering</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Point_E.html">Point-E: A System for Generating 3D Point Clouds from Complex Prompts (Arxiv 2022)</a></li>
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Shap-E.html">Shap-E</a></li>
+
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DreamFusion.html"><strong>DreamFusion</strong></a></li>
+<li class="toctree-l1"><a class="reference internal" href="magic-3d.html">Magic3D</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreamBooth3D.html">Dream Booth 3D</a></li>
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="zero123plus.html">Zero123++</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ProlificDreamer.html">ProlificDreamer: High-Fidelity and Diverse Text-to-3D Generation with Variational Score Distillation</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DreamGaussian.html">DreamGaussian</a></li>
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Coin3D.html">Coin3D</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Experiments</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="../experiments/js_exp.html">Synthetic Data with Stable Diffusion for Foliar Disease Classification</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../experiments/swjo_exp.html">Training DreamBooth on Naver Webtoon Face Dataset</a></li>
+</ul>
+
+    </div>
+</nav></div>
+    </div>
+  
+  
+  <div class="sidebar-primary-items__end sidebar-primary__section">
+  </div>
+  
+  <div id="rtd-footer-container"></div>
+
+
+      </div>
+      
+      <main id="main-content" class="bd-main" role="main">
+        
+        
+
+<div class="sbt-scroll-pixel-helper"></div>
+
+          <div class="bd-content">
+            <div class="bd-article-container">
+              
+              <div class="bd-header-article d-print-none">
+<div class="header-article-items header-article__inner">
+  
+    <div class="header-article-items__start">
+      
+        <div class="header-article-item"><label class="sidebar-toggle primary-toggle btn btn-sm" for="__primary" title="Toggle primary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
+  <span class="fa-solid fa-bars"></span>
+</label></div>
+      
+    </div>
+  
+  
+    <div class="header-article-items__end">
+      
+        <div class="header-article-item">
+
+<div class="article-header-buttons">
+
+
+
+
+
+<div class="dropdown dropdown-source-buttons">
+  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Source repositories">
+    <i class="fab fa-github"></i>
+  </button>
+  <ul class="dropdown-menu">
+      
+      
+      
+      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion" target="_blank"
+   class="btn btn-sm btn-source-repository-button dropdown-item"
+   title="Source repository"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fab fa-github"></i>
+  </span>
+<span class="btn__text-container">Repository</span>
+</a>
+</li>
+      
+      
+      
+      
+      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion/issues/new?title=Issue%20on%20page%20%2Fdocs/review/dreambooth.html&body=Your%20issue%20content%20here." target="_blank"
+   class="btn btn-sm btn-source-issues-button dropdown-item"
+   title="Open an issue"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-lightbulb"></i>
+  </span>
+<span class="btn__text-container">Open issue</span>
+</a>
+</li>
+      
+  </ul>
+</div>
+
+
+
+
+
+
+<div class="dropdown dropdown-download-buttons">
+  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Download this page">
+    <i class="fas fa-download"></i>
+  </button>
+  <ul class="dropdown-menu">
+      
+      
+      
+      <li><a href="../../_sources/docs/review/dreambooth.md" target="_blank"
+   class="btn btn-sm btn-download-source-button dropdown-item"
+   title="Download source file"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-file"></i>
+  </span>
+<span class="btn__text-container">.md</span>
+</a>
+</li>
+      
+      
+      
+      
+      <li>
+<button onclick="window.print()"
+  class="btn btn-sm btn-download-pdf-button dropdown-item"
+  title="Print to PDF"
+  data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-file-pdf"></i>
+  </span>
+<span class="btn__text-container">.pdf</span>
+</button>
+</li>
+      
+  </ul>
+</div>
+
+
+
+
+<button onclick="toggleFullScreen()"
+  class="btn btn-sm btn-fullscreen-button"
+  title="Fullscreen mode"
+  data-bs-placement="bottom" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-expand"></i>
+  </span>
+
+</button>
+
+
+
+<script>
+document.write(`
+  <button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light"></i>
+    <i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark"></i>
+    <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"></i>
+  </button>
+`);
+</script>
+
+
+<script>
+document.write(`
+  <button class="btn btn-sm pst-navbar-icon search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <i class="fa-solid fa-magnifying-glass fa-lg"></i>
+  </button>
+`);
+</script>
+<label class="sidebar-toggle secondary-toggle btn btn-sm" for="__secondary"title="Toggle secondary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <span class="fa-solid fa-list"></span>
+</label>
+</div></div>
+      
+    </div>
+  
+</div>
+</div>
+              
+              
+
+<div id="jb-print-docs-body" class="onlyprint">
+    <h1>DreamBooth</h1>
+    <!-- Table of contents -->
+    <div id="print-main-content">
+        <div id="jb-print-toc">
+            
+            <div>
+                <h2> Contents </h2>
+            </div>
+            <nav aria-label="Page">
+                <ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">Introduction</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#text-to-image-diffusion-models">Text-to-Image Diffusion Models</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#fine-tuning">Fine-tuning</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#experiments">Experiments</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#ablation-studies">Ablation Studies</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#applications">Applications</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#limitations">Limitations</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#appendix">Appendix</a></li>
+</ul>
+            </nav>
+        </div>
+    </div>
+</div>
+
+              
+                
+<div id="searchbox"></div>
+                <article class="bd-article">
+                  
+  <div class="admonition-information admonition">
+<p class="admonition-title">Information</p>
+<ul class="simple">
+<li><p><strong>Title:</strong> DreamBooth: Fine Tuning Text-to-Image Diffusion Models for Subject-Driven Generation (CVPR 2023)</p></li>
+<li><p><strong>Reference</strong></p>
+<ul>
+<li><p>Paper: <a class="reference external" href="https://arxiv.org/abs/2208.12242">https://arxiv.org/abs/2208.12242</a></p></li>
+<li><p>Code: <a class="github reference external" href="https://github.com/huggingface/diffusers/tree/main/examples/dreambooth">huggingface/diffusers</a></p></li>
+</ul>
+</li>
+<li><p><strong>Author:</strong> Sangwoo Jo</p></li>
+<li><p><strong>Last updated on May. 31, 2023</strong></p></li>
+</ul>
+</div>
+<section class="tex2jax_ignore mathjax_ignore" id="dreambooth">
+<h1>DreamBooth<a class="headerlink" href="#dreambooth" title="Permalink to this heading">#</a></h1>
+<section id="introduction">
+<h2>Introduction<a class="headerlink" href="#introduction" title="Permalink to this heading">#</a></h2>
+<p>최근에 DALL-E2, Imagen, Stable Diffusion 등 다양한 text-to-image generation 모델들이 등장하였지만, 어떠한 동일한 subject 에 대해서 다른 context 에 적용하는 부분에서 부족한 면들을 보여주고 있습니다. DreamBooth 논문은 이러한 문제점을 개선하기 위해 text-to-image 모델을 fine-tuning 하는 기법으로 소개되었고, 단 3-5장의 이미지를 학습하면 되며 이를 NVIDIA A100 으로 학습하는데 5분 정도밖에 소요되지 않는다고 합니다.</p>
+<figure class="align-default" id="id1">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/dreambooth_01.png"><img alt="dreambooth_01" class="bg-primary mb-1" src="../../_images/dreambooth_01.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 131 </span><span class="caption-text">Subject-Driven Generation</span><a class="headerlink" href="#id1" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>DreamBooth 가 무엇인지 자세히 알아보기 전에 text-to-image diffusion model 에 대해 다시 한번 개념 정리를 해볼 필요가 있습니다.</p>
+</section>
+<section id="text-to-image-diffusion-models">
+<h2>Text-to-Image Diffusion Models<a class="headerlink" href="#text-to-image-diffusion-models" title="Permalink to this heading">#</a></h2>
+<p>사전학습된 text-to-image diffusion model <span class="math notranslate nohighlight">\(\hat{x}_{\theta}\)</span> 는 input 으로 원본 이미지 <span class="math notranslate nohighlight">\(x\)</span>, 그리고 text prompt <span class="math notranslate nohighlight">\(P\)</span> 와 text-encoder <span class="math notranslate nohighlight">\(\Gamma\)</span> 로부터 나오는 conditioning vector <span class="math notranslate nohighlight">\(c = \Gamma(P)\)</span> 를 입력받아서 이미지 <span class="math notranslate nohighlight">\(x_{gen} = \hat{x}_{\theta}(\epsilon, c)\)</span> 를 생성하게 됩니다. 학습 시, mean squared loss 를 사용하고 이를 수식적으로 표현하면 다음과 같습니다.</p>
+<div class="math notranslate nohighlight">
+\[
+\mathbb{E}_{x,c,\epsilon,t}[w_t || \hat{x}_{\theta}(\alpha_tx + \sigma_{t}\epsilon, c) - x ||_{2}^{2}]
+\]</div>
+<p>이때, DreamBooth 에서는 text encoder 를 CLIP text embedding 과 사전학습된 T5-XXL 모델 중 T5-XXL 모델을 사용했다고 합니다. 그리고 DreamBooth 로 fine-tuning 할때, diffusion process 에서 사용되는 U-net (때로는 text encoder 도 포함) 은 learnable 한 parameter 로 설정하고 생성된 latent vector 로부터 새로운 이미지를 생성하는 Decoder 의 파라미터 값은 고정시킨다고 합니다.</p>
+<p>앞써 설명드렸던 내용들을 해당 implementation code 에서 확인할 수 있습니다.</p>
+<ul>
+<li><p><strong>code</strong></p>
+<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="c1"># https://github.com/huggingface/diffusers/blob/main/examples/dreambooth/train_dreambooth.py</span>
+<span class="n">text_encoder_cls</span> <span class="o">=</span> <span class="n">import_model_class_from_model_name_or_path</span><span class="p">(</span><span class="n">args</span><span class="o">.</span><span class="n">pretrained_model_name_or_path</span><span class="p">,</span> <span class="n">args</span><span class="o">.</span><span class="n">revision</span><span class="p">)</span>
+
+<span class="c1"># Load scheduler and models</span>
+<span class="n">noise_scheduler</span> <span class="o">=</span> <span class="n">DDPMScheduler</span><span class="o">.</span><span class="n">from_pretrained</span><span class="p">(</span><span class="n">args</span><span class="o">.</span><span class="n">pretrained_model_name_or_path</span><span class="p">,</span> <span class="n">subfolder</span><span class="o">=</span><span class="s2">&quot;scheduler&quot;</span><span class="p">)</span>
+<span class="n">text_encoder</span> <span class="o">=</span> <span class="n">text_encoder_cls</span><span class="o">.</span><span class="n">from_pretrained</span><span class="p">(</span>
+    <span class="n">args</span><span class="o">.</span><span class="n">pretrained_model_name_or_path</span><span class="p">,</span> <span class="n">subfolder</span><span class="o">=</span><span class="s2">&quot;text_encoder&quot;</span><span class="p">,</span> <span class="n">revision</span><span class="o">=</span><span class="n">args</span><span class="o">.</span><span class="n">revision</span>
+<span class="p">)</span>
+<span class="n">vae</span> <span class="o">=</span> <span class="n">AutoencoderKL</span><span class="o">.</span><span class="n">from_pretrained</span><span class="p">(</span><span class="n">args</span><span class="o">.</span><span class="n">pretrained_model_name_or_path</span><span class="p">,</span> <span class="n">subfolder</span><span class="o">=</span><span class="s2">&quot;vae&quot;</span><span class="p">,</span> <span class="n">revision</span><span class="o">=</span><span class="n">args</span><span class="o">.</span><span class="n">revision</span><span class="p">)</span>
+<span class="n">unet</span> <span class="o">=</span> <span class="n">UNet2DConditionModel</span><span class="o">.</span><span class="n">from_pretrained</span><span class="p">(</span>
+    <span class="n">args</span><span class="o">.</span><span class="n">pretrained_model_name_or_path</span><span class="p">,</span> <span class="n">subfolder</span><span class="o">=</span><span class="s2">&quot;unet&quot;</span><span class="p">,</span> <span class="n">revision</span><span class="o">=</span><span class="n">args</span><span class="o">.</span><span class="n">revision</span>
+<span class="p">)</span>
+</pre></div>
+</div>
+</li>
+<li><p><strong>training code</strong></p>
+<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="c1"># https://github.com/huggingface/diffusers/blob/main/examples/dreambooth/train_dreambooth.py</span>
+<span class="k">for</span> <span class="n">epoch</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">first_epoch</span><span class="p">,</span> <span class="n">args</span><span class="o">.</span><span class="n">num_train_epochs</span><span class="p">):</span>
+        <span class="n">unet</span><span class="o">.</span><span class="n">train</span><span class="p">()</span>
+        <span class="k">if</span> <span class="n">args</span><span class="o">.</span><span class="n">train_text_encoder</span><span class="p">:</span>
+            <span class="n">text_encoder</span><span class="o">.</span><span class="n">train</span><span class="p">()</span>
+        <span class="k">for</span> <span class="n">step</span><span class="p">,</span> <span class="n">batch</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">train_dataloader</span><span class="p">):</span>
+            <span class="c1"># Skip steps until we reach the resumed step</span>
+            <span class="k">if</span> <span class="n">args</span><span class="o">.</span><span class="n">resume_from_checkpoint</span> <span class="ow">and</span> <span class="n">epoch</span> <span class="o">==</span> <span class="n">first_epoch</span> <span class="ow">and</span> <span class="n">step</span> <span class="o">&lt;</span> <span class="n">resume_step</span><span class="p">:</span>
+                <span class="k">if</span> <span class="n">step</span> <span class="o">%</span> <span class="n">args</span><span class="o">.</span><span class="n">gradient_accumulation_steps</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span>
+                    <span class="n">progress_bar</span><span class="o">.</span><span class="n">update</span><span class="p">(</span><span class="mi">1</span><span class="p">)</span>
+                <span class="k">continue</span>
+
+            <span class="k">with</span> <span class="n">accelerator</span><span class="o">.</span><span class="n">accumulate</span><span class="p">(</span><span class="n">unet</span><span class="p">):</span>
+                <span class="c1"># Convert images to latent space</span>
+                <span class="n">latents</span> <span class="o">=</span> <span class="n">vae</span><span class="o">.</span><span class="n">encode</span><span class="p">(</span><span class="n">batch</span><span class="p">[</span><span class="s2">&quot;pixel_values&quot;</span><span class="p">]</span><span class="o">.</span><span class="n">to</span><span class="p">(</span><span class="n">dtype</span><span class="o">=</span><span class="n">weight_dtype</span><span class="p">))</span><span class="o">.</span><span class="n">latent_dist</span><span class="o">.</span><span class="n">sample</span><span class="p">()</span>
+                <span class="n">latents</span> <span class="o">=</span> <span class="n">latents</span> <span class="o">*</span> <span class="n">vae</span><span class="o">.</span><span class="n">config</span><span class="o">.</span><span class="n">scaling_factor</span>
+
+                <span class="c1"># Sample noise that we&#39;ll add to the latents</span>
+                <span class="k">if</span> <span class="n">args</span><span class="o">.</span><span class="n">offset_noise</span><span class="p">:</span>
+                    <span class="n">noise</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">randn_like</span><span class="p">(</span><span class="n">latents</span><span class="p">)</span> <span class="o">+</span> <span class="mf">0.1</span> <span class="o">*</span> <span class="n">torch</span><span class="o">.</span><span class="n">randn</span><span class="p">(</span>
+                        <span class="n">latents</span><span class="o">.</span><span class="n">shape</span><span class="p">[</span><span class="mi">0</span><span class="p">],</span> <span class="n">latents</span><span class="o">.</span><span class="n">shape</span><span class="p">[</span><span class="mi">1</span><span class="p">],</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="n">device</span><span class="o">=</span><span class="n">latents</span><span class="o">.</span><span class="n">device</span>
+                    <span class="p">)</span>
+                <span class="k">else</span><span class="p">:</span>
+                    <span class="n">noise</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">randn_like</span><span class="p">(</span><span class="n">latents</span><span class="p">)</span>
+                <span class="n">bsz</span> <span class="o">=</span> <span class="n">latents</span><span class="o">.</span><span class="n">shape</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span>
+                <span class="c1"># Sample a random timestep for each image</span>
+                <span class="n">timesteps</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">randint</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="n">noise_scheduler</span><span class="o">.</span><span class="n">config</span><span class="o">.</span><span class="n">num_train_timesteps</span><span class="p">,</span> <span class="p">(</span><span class="n">bsz</span><span class="p">,),</span> <span class="n">device</span><span class="o">=</span><span class="n">latents</span><span class="o">.</span><span class="n">device</span><span class="p">)</span>
+                <span class="n">timesteps</span> <span class="o">=</span> <span class="n">timesteps</span><span class="o">.</span><span class="n">long</span><span class="p">()</span>
+
+                <span class="c1"># Add noise to the latents according to the noise magnitude at each timestep</span>
+                <span class="c1"># (this is the forward diffusion process)</span>
+                <span class="n">noisy_latents</span> <span class="o">=</span> <span class="n">noise_scheduler</span><span class="o">.</span><span class="n">add_noise</span><span class="p">(</span><span class="n">latents</span><span class="p">,</span> <span class="n">noise</span><span class="p">,</span> <span class="n">timesteps</span><span class="p">)</span>
+
+                <span class="c1"># Get the text embedding for conditioning</span>
+                <span class="n">encoder_hidden_states</span> <span class="o">=</span> <span class="n">text_encoder</span><span class="p">(</span><span class="n">batch</span><span class="p">[</span><span class="s2">&quot;input_ids&quot;</span><span class="p">])[</span><span class="mi">0</span><span class="p">]</span>
+
+                <span class="c1"># Predict the noise residual</span>
+                <span class="n">model_pred</span> <span class="o">=</span> <span class="n">unet</span><span class="p">(</span><span class="n">noisy_latents</span><span class="p">,</span> <span class="n">timesteps</span><span class="p">,</span> <span class="n">encoder_hidden_states</span><span class="p">)</span><span class="o">.</span><span class="n">sample</span>
+
+                <span class="c1"># Get the target for loss depending on the prediction type</span>
+                <span class="k">if</span> <span class="n">noise_scheduler</span><span class="o">.</span><span class="n">config</span><span class="o">.</span><span class="n">prediction_type</span> <span class="o">==</span> <span class="s2">&quot;epsilon&quot;</span><span class="p">:</span>
+                    <span class="n">target</span> <span class="o">=</span> <span class="n">noise</span>
+                <span class="k">elif</span> <span class="n">noise_scheduler</span><span class="o">.</span><span class="n">config</span><span class="o">.</span><span class="n">prediction_type</span> <span class="o">==</span> <span class="s2">&quot;v_prediction&quot;</span><span class="p">:</span>
+                    <span class="n">target</span> <span class="o">=</span> <span class="n">noise_scheduler</span><span class="o">.</span><span class="n">get_velocity</span><span class="p">(</span><span class="n">latents</span><span class="p">,</span> <span class="n">noise</span><span class="p">,</span> <span class="n">timesteps</span><span class="p">)</span>
+                <span class="k">else</span><span class="p">:</span>
+                    <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;Unknown prediction type </span><span class="si">{</span><span class="n">noise_scheduler</span><span class="o">.</span><span class="n">config</span><span class="o">.</span><span class="n">prediction_type</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">)</span>
+
+                <span class="k">if</span> <span class="n">args</span><span class="o">.</span><span class="n">with_prior_preservation</span><span class="p">:</span>
+                    <span class="c1"># Chunk the noise and model_pred into two parts and compute the loss on each part separately.</span>
+                    <span class="n">model_pred</span><span class="p">,</span> <span class="n">model_pred_prior</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">chunk</span><span class="p">(</span><span class="n">model_pred</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="n">dim</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
+                    <span class="n">target</span><span class="p">,</span> <span class="n">target_prior</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">chunk</span><span class="p">(</span><span class="n">target</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="n">dim</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
+
+                    <span class="c1"># Compute instance loss</span>
+                    <span class="n">loss</span> <span class="o">=</span> <span class="n">F</span><span class="o">.</span><span class="n">mse_loss</span><span class="p">(</span><span class="n">model_pred</span><span class="o">.</span><span class="n">float</span><span class="p">(),</span> <span class="n">target</span><span class="o">.</span><span class="n">float</span><span class="p">(),</span> <span class="n">reduction</span><span class="o">=</span><span class="s2">&quot;mean&quot;</span><span class="p">)</span>
+
+                    <span class="c1"># Compute prior loss</span>
+                    <span class="n">prior_loss</span> <span class="o">=</span> <span class="n">F</span><span class="o">.</span><span class="n">mse_loss</span><span class="p">(</span><span class="n">model_pred_prior</span><span class="o">.</span><span class="n">float</span><span class="p">(),</span> <span class="n">target_prior</span><span class="o">.</span><span class="n">float</span><span class="p">(),</span> <span class="n">reduction</span><span class="o">=</span><span class="s2">&quot;mean&quot;</span><span class="p">)</span>
+
+                    <span class="c1"># Add the prior loss to the instance loss.</span>
+                    <span class="n">loss</span> <span class="o">=</span> <span class="n">loss</span> <span class="o">+</span> <span class="n">args</span><span class="o">.</span><span class="n">prior_loss_weight</span> <span class="o">*</span> <span class="n">prior_loss</span>
+                <span class="k">else</span><span class="p">:</span>
+                    <span class="n">loss</span> <span class="o">=</span> <span class="n">F</span><span class="o">.</span><span class="n">mse_loss</span><span class="p">(</span><span class="n">model_pred</span><span class="o">.</span><span class="n">float</span><span class="p">(),</span> <span class="n">target</span><span class="o">.</span><span class="n">float</span><span class="p">(),</span> <span class="n">reduction</span><span class="o">=</span><span class="s2">&quot;mean&quot;</span><span class="p">)</span>
+
+                <span class="n">accelerator</span><span class="o">.</span><span class="n">backward</span><span class="p">(</span><span class="n">loss</span><span class="p">)</span>
+                <span class="k">if</span> <span class="n">accelerator</span><span class="o">.</span><span class="n">sync_gradients</span><span class="p">:</span>
+                    <span class="n">params_to_clip</span> <span class="o">=</span> <span class="p">(</span>
+                        <span class="n">itertools</span><span class="o">.</span><span class="n">chain</span><span class="p">(</span><span class="n">unet</span><span class="o">.</span><span class="n">parameters</span><span class="p">(),</span> <span class="n">text_encoder</span><span class="o">.</span><span class="n">parameters</span><span class="p">())</span>
+                        <span class="k">if</span> <span class="n">args</span><span class="o">.</span><span class="n">train_text_encoder</span>
+                        <span class="k">else</span> <span class="n">unet</span><span class="o">.</span><span class="n">parameters</span><span class="p">()</span>
+                    <span class="p">)</span>
+                    <span class="n">accelerator</span><span class="o">.</span><span class="n">clip_grad_norm_</span><span class="p">(</span><span class="n">params_to_clip</span><span class="p">,</span> <span class="n">args</span><span class="o">.</span><span class="n">max_grad_norm</span><span class="p">)</span>
+                <span class="n">optimizer</span><span class="o">.</span><span class="n">step</span><span class="p">()</span>
+                <span class="n">lr_scheduler</span><span class="o">.</span><span class="n">step</span><span class="p">()</span>
+                <span class="n">optimizer</span><span class="o">.</span><span class="n">zero_grad</span><span class="p">(</span><span class="n">set_to_none</span><span class="o">=</span><span class="n">args</span><span class="o">.</span><span class="n">set_grads_to_none</span><span class="p">)</span>
+</pre></div>
+</div>
+</li>
+</ul>
+</section>
+<section id="fine-tuning">
+<h2>Fine-tuning<a class="headerlink" href="#fine-tuning" title="Permalink to this heading">#</a></h2>
+<p>DreamBooth 에서 pre-trained 된 text-to-image generation 모델을 fine-tuning 할 때 <em>“a [unique identifier] [class noun]”</em> 그리고 <em>“a [class noun]”</em>  형태의 두 가지 text prompt 를 사용합니다. 이때, <em>unique identifier</em> 에 유지하고자 하는 대상에 대한 정보를 담는 것을 목표로 하기 때문에 사전 정보가 없는 rare token 을 사용하는 것이 중요하다고 합니다. 논문에서는 3개 이하의 Unicode character 혹은 T5-XXL tokenizer 를 랜덤하게 샘플링해서 token 을 생성하고 이를 기반으로 <em>unique identifier</em> 를 정의합니다.</p>
+<p>또한, 논문에서 <em>Language Drift</em> 그리고 <em>Reduced Output Diversity</em> 두 가지 문제점을 해결하기 위해 Class-specific Prior Preservation Loss 를 소개합니다. 이를 활용하여 모델을 fine-tuning 하는 방법은 다음과 같습니다.</p>
+<figure class="align-default" id="id2">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/dreambooth_02.png"><img alt="dreambooth_02" class="bg-primary mb-1" src="../../_images/dreambooth_02.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 132 </span><span class="caption-text">Fine-tuning</span><a class="headerlink" href="#id2" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>우선, Gaussian 노이즈 이미지와 <em>“A V [class noun]”</em> 형태의 text prompt 를 사전학습된 text-to-image diffusion 모델에 입력하여 이미지를 생성한 후, 원본 이미지와의 <em>Reconstruction Loss</em> 를 계산합니다. 그리고 비슷한 과정으로 Gaussian 노이즈 이미지와 <em>“A [class noun]”</em> 형태의 text prompt 를 학습하고자 하는 모델, 그리고 freeze 시킨 또 다른 pre-trained diffusion 모델에 각각 입력하여 이미지를 생성한 후 <em>Class-Specific Prior Preservation Loss</em> 를 계산합니다. 이에 대한 training objective 를 수식적으로 표현하면 다음과 같습니다.</p>
+<div class="math notranslate nohighlight">
+\[
+\mathbb{E}_{x,c,\epsilon,\epsilon^{'},t}[w_t || \hat{x}_{\theta}(\alpha_tx + \sigma_t\epsilon, c) - x ||_{2}^{2} + \lambda w_{t^{'}} || \hat{x}_{\theta}(\alpha_{t^{'}} x_{pr} + \sigma_{t^{'}}\epsilon^{'}, c_{pr}) - x_{pr} ||_{2}^{2}]
+\]</div>
+<p><em>Class-Specific Prior Preservation Loss</em> 를 추가함으로써 class prior 에 대한 정보를 유지하게 되고, 이로써 동일한 class 에 대해 더 다양한 이미지들을 생성할 수 있는 부분을 아래 그림에서 확인할 수 있습니다.</p>
+<figure class="align-default" id="id3">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/dreambooth_03.png"><img alt="dreambooth_03" class="bg-primary mb-1" src="../../_images/dreambooth_03.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 133 </span><span class="caption-text">Encouraging diversity with prior-preservation loss</span><a class="headerlink" href="#id3" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</section>
+<section id="experiments">
+<h2>Experiments<a class="headerlink" href="#experiments" title="Permalink to this heading">#</a></h2>
+<p>DreamBooth 논문에서 세 가지의 모델 평가 metric 을 소개합니다. 첫번째로는 <em>subject fidelity</em> 를 측정하는 CLIP-I, DINO 그리고 <em>prompt fidelity</em> 를 측정하는 CLIP-T metric 을 사용합니다. 이때, DINO metric 이 동일한 class 를 가진 subject 에 대해서 다른 embedding 이 생성되기 때문에 CLIP-I 보다 더 선호된다고 합니다. 더 자세하게는 각 metric 은 다음과 같이 계산됩니다.</p>
+<ul class="simple">
+<li><p>CLIP-I := 생성된 이미지와 실제 이미지의 CLIP embedding 의 평균 pairwise cosine similarity</p></li>
+<li><p>DINO := 생성된 이미지와 실제 이미지의 ViT-S/16 DINO embedding 의 평균 pairwise cosine similarity</p></li>
+<li><p>CLIP-T := 입력 prompt 와 생성된 이미지의 CLIP embedding 의 평균 pairwise cosine similarity</p></li>
+</ul>
+<p>Textual Inversion 과 비교했을때, 세 개의 metric 에서 모두 DreamBooth 가 더 좋은 성능을 보여주는 것을 확인할 수 있습니다.</p>
+<figure class="align-default" id="id4">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/dreambooth_04.png"><img alt="dreambooth_04" class="bg-primary mb-1" src="../../_images/dreambooth_04.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 134 </span><span class="caption-text">Comparison of models</span><a class="headerlink" href="#id4" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</section>
+<section id="ablation-studies">
+<h2>Ablation Studies<a class="headerlink" href="#ablation-studies" title="Permalink to this heading">#</a></h2>
+<p>Prior Preservation Loss (PPL) 과 Class-Prior 에 대한 Ablation Studies 결과도 논문에서 공유합니다. PPL 가 적용됨으로써 앞써 소개드렸던 Language Drift 그리고 Reduced Output Diversity 문제점을 PRES 그리고 DIV metric 을 통해 해결되는 것을 보여줍니다. 또한, Class-Prior Ablation 에서 다음과 같은 세 가지 prompt 를 사용하여 fine-tuning 했을 때, 해당 subject 에 맞는 <em>class noun</em> 을 prompt 에 입력했을때가 가장 좋은 성능을 보여준다고 설명합니다.</p>
+<ul class="simple">
+<li><p>“no class noun”</p></li>
+<li><p>“a randomly sampled incorrect class noun” (e.g., “can” for a backpack)</p></li>
+<li><p>“correct class noun”</p></li>
+</ul>
+</section>
+<section id="applications">
+<h2>Applications<a class="headerlink" href="#applications" title="Permalink to this heading">#</a></h2>
+<p>논문에서 DreamBooth 를 활용한 여러 application 도 소개합니다.</p>
+<figure class="align-default" id="id5">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/dreambooth_05.png"><img alt="dreambooth_05" class="bg-primary mb-1" src="../../_images/dreambooth_05.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 135 </span><span class="caption-text">Applications of DreamBooth</span><a class="headerlink" href="#id5" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ol class="arabic simple">
+<li><p>Recontextualization</p></li>
+</ol>
+<ul class="simple">
+<li><p>Prompt: “a [V] [class noun] [context description]”</p></li>
+<li><p>다음과 같은 prompt 입력 시, 사전에 보지 못했던 새로운 pose 나 articulation 을 잘 표현하는 부분을 확인할 수 있습니다.</p></li>
+</ul>
+<figure class="align-default" id="id6">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/dreambooth_06.png"><img alt="dreambooth_06" class="bg-primary mb-1" src="../../_images/dreambooth_06.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 136 </span><span class="caption-text">Recontextualization</span><a class="headerlink" href="#id6" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ol class="arabic simple" start="2">
+<li><p>Art Renditions</p></li>
+</ol>
+<ul class="simple">
+<li><p>Prompt: “a painting of a [V] [class noun] in the style of [famous painter]” or “a statue of a [V] [class noun] in the style of [famous sculptor]”</p></li>
+<li><p>Style Transfer 와 다르게 동일한 구조를 유지한 채 style 만 바꾸는 것이 아니라 다양한 pose 형태도 생성 가능합니다.</p></li>
+</ul>
+<ol class="arabic simple" start="3">
+<li><p>Novel View Synthesis</p></li>
+</ol>
+<ul class="simple">
+<li><p>동일한 subject 에 대해 다양한 각도에서 보는 이미지 생성도 가능합니다.</p></li>
+</ul>
+<ol class="arabic simple" start="4">
+<li><p>Property Modification</p></li>
+</ol>
+<ul class="simple">
+<li><p>Prompt: “a cross of a [V] dog and a [target species]”</p></li>
+<li><p>사전 학습한 subject 의 고유 feature 들이 다른 target species 에서도 반영이 되는 부분을 확인할 수 있습니다.</p></li>
+</ul>
+</section>
+<section id="limitations">
+<h2>Limitations<a class="headerlink" href="#limitations" title="Permalink to this heading">#</a></h2>
+<p>하지만 DreamBooth 모델에 다음과 같은 한계점도 존재합니다.</p>
+<figure class="align-default" id="id7">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/dreambooth_07.png"><img alt="dreambooth_07" class="bg-primary mb-1" src="../../_images/dreambooth_07.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 137 </span><span class="caption-text">Limitations of DreamBooth</span><a class="headerlink" href="#id7" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p>Incorrect context synthesis := 대표적으로 training set 에 자주 나타나지 않는 subject, prompt, context 에 대해서 낮은 성능을 보여줍니다.</p></li>
+<li><p>Context-appearance entanglement := 유지하고자 하는 대상의 appearance (e.g, color) 가 prompted context 에 의해 달라지는 현상</p></li>
+<li><p>Overfitting := 사전학습된 데이터와 유사한 prompt 입력 시, overfitting 현상 발생</p></li>
+</ul>
+<p>마지막으로 subject 대상에 따라 모델 성능(fidelity)이 차이를 보인다고 합니다.</p>
+</section>
+<section id="appendix">
+<h2>Appendix<a class="headerlink" href="#appendix" title="Permalink to this heading">#</a></h2>
+<p>마지막으로, 논문 본문에 소개되고 있지는 않지만 Appendix 부문에서도 흥미로운 결과들을 확인할 수 있습니다. Figure 20 은 fine tuning 하는 이미지 개수에 따른 DreamBooth 학습결과를 보여주는데, 단 한 장만으로도 identity 의 전반적인 특징을 잘 담는 것을 확인할 수 있습니다. Figure 18 은 만화 캐릭터의 identity 를 유지한 상태로 다양한 만화 사진들을 모델이 생성하는 사례들을 보여줍니다.</p>
+<figure class="align-default" id="id8">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/dreambooth_08.png"><img alt="dreambooth_08" class="bg-primary mb-1" src="../../_images/dreambooth_08.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 138 </span><span class="caption-text">Appendix-1</span><a class="headerlink" href="#id8" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<figure class="align-default" id="id9">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/dreambooth_09.png"><img alt="dreambooth_09" class="bg-primary mb-1" src="../../_images/dreambooth_09.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 139 </span><span class="caption-text">Appendix-2</span><a class="headerlink" href="#id9" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</section>
+</section>
+
+    <script type="text/x-thebe-config">
+    {
+        requestKernel: true,
+        binderOptions: {
+            repo: "binder-examples/jupyter-stacks-datascience",
+            ref: "master",
+        },
+        codeMirrorConfig: {
+            theme: "abcdef",
+            mode: "python"
+        },
+        kernelOptions: {
+            name: "python3",
+            path: "./docs\review"
+        },
+        predefinedOutput: true
+    }
+    </script>
+    <script>kernelName = 'python3'</script>
+
+                </article>
+              
+
+              
+              
+              
+              
+                <footer class="prev-next-footer d-print-none">
+                  
+<div class="prev-next-area">
+    <a class="left-prev"
+       href="DALLE2.html"
+       title="previous page">
+      <i class="fa-solid fa-angle-left"></i>
+      <div class="prev-next-info">
+        <p class="prev-next-subtitle">previous</p>
+        <p class="prev-next-title">DALL-E 2</p>
+      </div>
+    </a>
+    <a class="right-next"
+       href="ControlNet.html"
+       title="next page">
+      <div class="prev-next-info">
+        <p class="prev-next-subtitle">next</p>
+        <p class="prev-next-title">ControlNet</p>
+      </div>
+      <i class="fa-solid fa-angle-right"></i>
+    </a>
+</div>
+                </footer>
+              
+            </div>
+            
+            
+              
+                <div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
+
+
+  <div class="sidebar-secondary-item">
+  <div class="page-toc tocsection onthispage">
+    <i class="fa-solid fa-list"></i> Contents
+  </div>
+  <nav class="bd-toc-nav page-toc">
+    <ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">Introduction</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#text-to-image-diffusion-models">Text-to-Image Diffusion Models</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#fine-tuning">Fine-tuning</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#experiments">Experiments</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#ablation-studies">Ablation Studies</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#applications">Applications</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#limitations">Limitations</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#appendix">Appendix</a></li>
+</ul>
+  </nav></div>
+
+</div></div>
+              
+            
+          </div>
+          <footer class="bd-footer-content">
+            
+<div class="bd-footer-content__inner container">
+  
+  <div class="footer-item">
+    
+<p class="component-author">
+By PseudoLab
+</p>
+
+  </div>
+  
+  <div class="footer-item">
+    
+
+  <p class="copyright">
+    
+      © Copyright 2022.
+      <br/>
+    
+  </p>
+
+  </div>
+  
+  <div class="footer-item">
+    
+  </div>
+  
+  <div class="footer-item">
+    
+  </div>
+  
+</div>
+          </footer>
+        
+
+      </main>
+    </div>
+  </div>
+  
+  <!-- Scripts loaded after <body> so the DOM is not blocked -->
+  <script src="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b"></script>
+<script src="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b"></script>
+
+  <footer class="bd-footer">
+  </footer>
+  </body>
 </html>
\ No newline at end of file
diff --git a/docs/review/gan.html b/docs/review/gan.html
old mode 100644
new mode 100755
index 2be489e6..6622c097
--- a/docs/review/gan.html
+++ b/docs/review/gan.html
@@ -1,893 +1,913 @@
-
-
-<!DOCTYPE html>
-
-
-<html lang="en" data-content_root="" >
-
-  <head>
-    <meta charset="utf-8" />
-    <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.18.1: http://docutils.sourceforge.net/" />
-
-    <title>GAN &#8212; Text-to-Image Generation-feat-Diffusion</title>
-  
-  
-  
-  <script data-cfasync="false">
-    document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
-    document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
-  </script>
-  
-  <!-- Loaded before other Sphinx assets -->
-  <link href="../../_static/styles/theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-<link href="../../_static/styles/bootstrap.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-<link href="../../_static/styles/pydata-sphinx-theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-
-  
-  <link href="../../_static/vendor/fontawesome/6.5.2/css/all.min.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-  <link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.woff2" />
-<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.woff2" />
-<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.woff2" />
-
-    <link rel="stylesheet" type="text/css" href="../../_static/pygments.css" />
-    <link rel="stylesheet" href="../../_static/styles/sphinx-book-theme.css?digest=14f4ca6b54d191a8c7657f6c759bf11a5fb86285" type="text/css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/togglebutton.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/mystnb.4510f1fc1dee50b3e5859aac5469c37c29e427902b24a333a5f9fcb2f0b3ac41.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/sphinx-thebe.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/design-style.4045f2051d55cab465a707391d5b2007.min.css" />
-  
-  <!-- Pre-loaded scripts that we'll load fully later -->
-  <link rel="preload" as="script" href="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b" />
-<link rel="preload" as="script" href="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b" />
-  <script src="../../_static/vendor/fontawesome/6.5.2/js/all.min.js?digest=dfe6caa3a7d634c4db9b"></script>
-
-    <script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
-    <script src="../../_static/jquery.js"></script>
-    <script src="../../_static/underscore.js"></script>
-    <script src="../../_static/_sphinx_javascript_frameworks_compat.js"></script>
-    <script src="../../_static/doctools.js"></script>
-    <script src="../../_static/clipboard.min.js"></script>
-    <script src="../../_static/copybutton.js"></script>
-    <script src="../../_static/scripts/sphinx-book-theme.js?digest=5a5c038af52cf7bc1a1ec88eea08e6366ee68824"></script>
-    <script>let toggleHintShow = 'Click to show';</script>
-    <script>let toggleHintHide = 'Click to hide';</script>
-    <script>let toggleOpenOnPrint = 'true';</script>
-    <script src="../../_static/togglebutton.js"></script>
-    <script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
-    <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
-    <script src="../../_static/design-tabs.js"></script>
-    <script>const THEBE_JS_URL = "https://unpkg.com/thebe@0.8.2/lib/index.js"
-const thebe_selector = ".thebe,.cell"
-const thebe_selector_input = "pre"
-const thebe_selector_output = ".output, .cell_output"
-</script>
-    <script async="async" src="../../_static/sphinx-thebe.js"></script>
-    <script>window.MathJax = {"options": {"processHtmlClass": "tex2jax_process|mathjax_process|math|output_area"}}</script>
-    <script defer="defer" src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
-    <script>DOCUMENTATION_OPTIONS.pagename = 'docs/review/gan';</script>
-    <link rel="shortcut icon" href="../../_static/PseudoLab_logo.png"/>
-    <link rel="index" title="Index" href="../../genindex.html" />
-    <link rel="search" title="Search" href="../../search.html" />
-    <link rel="next" title="DDPM" href="DDPM.html" />
-    <link rel="prev" title="VAE" href="vae.html" />
-  <meta name="viewport" content="width=device-width, initial-scale=1"/>
-  <meta name="docsearch:language" content="en"/>
-  </head>
-  
-  
-  <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
-
-  
-  
-  <div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
-  
-  <div id="pst-scroll-pixel-helper"></div>
-  
-  <button type="button" class="btn rounded-pill" id="pst-back-to-top">
-    <i class="fa-solid fa-arrow-up"></i>Back to top</button>
-
-  
-  <input type="checkbox"
-          class="sidebar-toggle"
-          id="pst-primary-sidebar-checkbox"/>
-  <label class="overlay overlay-primary" for="pst-primary-sidebar-checkbox"></label>
-  
-  <input type="checkbox"
-          class="sidebar-toggle"
-          id="pst-secondary-sidebar-checkbox"/>
-  <label class="overlay overlay-secondary" for="pst-secondary-sidebar-checkbox"></label>
-  
-  <div class="search-button__wrapper">
-    <div class="search-button__overlay"></div>
-    <div class="search-button__search-container">
-<form class="bd-search d-flex align-items-center"
-      action="../../search.html"
-      method="get">
-  <i class="fa-solid fa-magnifying-glass"></i>
-  <input type="search"
-         class="form-control"
-         name="q"
-         id="search-input"
-         placeholder="Search this book..."
-         aria-label="Search this book..."
-         autocomplete="off"
-         autocorrect="off"
-         autocapitalize="off"
-         spellcheck="false"/>
-  <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
-</form></div>
-  </div>
-
-  <div class="pst-async-banner-revealer d-none">
-  <aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
-</div>
-
-  
-    <header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
-    </header>
-  
-
-  <div class="bd-container">
-    <div class="bd-container__inner bd-page-width">
-      
-      
-      
-      <div class="bd-sidebar-primary bd-sidebar">
-        
-
-  
-  <div class="sidebar-header-items sidebar-primary__section">
-    
-    
-    
-    
-  </div>
-  
-    <div class="sidebar-primary-items__start sidebar-primary__section">
-        <div class="sidebar-primary-item">
-
-  
-    
-  
-
-<a class="navbar-brand logo" href="../../intro.html">
-  
-  
-  
-  
-  
-    
-    
-      
-    
-    
-    <img src="../../_static/PseudoLab_logo.png" class="logo__image only-light" alt="Text-to-Image Generation-feat-Diffusion - Home"/>
-    <script>document.write(`<img src="../../_static/PseudoLab_logo.png" class="logo__image only-dark" alt="Text-to-Image Generation-feat-Diffusion - Home"/>`);</script>
-  
-  
-</a></div>
-        <div class="sidebar-primary-item"><nav class="bd-links" id="bd-docs-nav" aria-label="Main">
-    <div class="bd-toc-item navbar-nav active">
-        
-        <ul class="nav bd-sidenav bd-sidenav__home-link">
-            <li class="toctree-l1">
-                <a class="reference internal" href="../../intro.html">
-                    Welcome to PseudoDiffusers!!
-                </a>
-            </li>
-        </ul>
-        <p aria-level="2" class="caption" role="heading"><span class="caption-text">Preliminary Works</span></p>
-<ul class="current nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="vae.html">VAE</a></li>
-<li class="toctree-l1 current active"><a class="current reference internal" href="#">GAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DDPM.html">DDPM</a></li>
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="DDIM.html">DDIM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="A_Study_on_the_Evaluation_of_Generative_Models.html">A Study on the Evaluation of Generative Models</a></li>
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Image Generation</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="cycleGAN.html">CycleGAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="StyleGAN.html">StyleGAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="diffusion_beats_GANs.html">Diffusion Models Beat GANs on Image Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="dalle.html">DALL-E</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DALLE2.html">DALL-E 2</a></li>
-<li class="toctree-l1"><a class="reference internal" href="dreambooth.html">DreamBooth</a></li>
-<li class="toctree-l1"><a class="reference internal" href="ControlNet.html">ControlNet</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Latent_Diffusion_Model.html">Introduction</a></li>
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="Textual_Inversion.html">Textual Inversion</a></li>
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="CustomDiffusion.html">Custom Diffusion</a></li>
-<li class="toctree-l1"><a class="reference internal" href="LoRA.html">LoRA</a></li>
-
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="I-DDPM.html">I-DDPM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="StyO.html">StyO</a></li>
-<li class="toctree-l1"><a class="reference internal" href="imagen.html">Imagen</a></li>
-<li class="toctree-l1"><a class="reference internal" href="imagen_editor.html">Imagen Editor</a></li>
-<li class="toctree-l1"><a class="reference internal" href="SDEdit.html">SDEdit</a></li>
-<li class="toctree-l1"><a class="reference internal" href="SDXL.html">SDXL</a></li>
-<li class="toctree-l1"><a class="reference internal" href="t2i_adapter.html">T2I-Adapter</a></li>
-<li class="toctree-l1"><a class="reference internal" href="IP_Adapter.html">IP-Adapter</a></li>
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="HyperDreamBooth.html">HyperDreamBooth</a></li>
-<li class="toctree-l1"><a class="reference internal" href="CM3leon.html">CM3leon</a></li>
-
-<li class="toctree-l1"><a class="reference internal" href="Synthetic_Data_from_Diffusion_Models_Improves_ImageNet_Classification.html">Synthetic Data from Diffusion Models Improves ImageNet Classification</a></li>
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="GLIDE.html">GLIDE</a></li>
-<li class="toctree-l1"><a class="reference internal" href="BBDM.html">BBDM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Your_Diffusion_Model_is_Secretly_a_Zero_Shot_Classifier.html">Your Diffusion Model is Secretly a Zero-Shot Classifier</a></li>
-<li class="toctree-l1"><a class="reference internal" href="progressive_distillation.html">Progressive Distillation for Fast Sampling of Diffusion Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="ConceptLab.html">ConceptLab</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Diffusion_models_already_have_a_Semantic_Latent_Space.html">Diffusion Models already have a Semantic Latent Space</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Muse.html">Muse</a></li>
-
-
-<li class="toctree-l1"><a class="reference internal" href="GIGAGAN.html">Scaling up GANs for Text-to-Image Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="consistency_models.html">Consistency Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="latent_consistency_models.html">Latent Consistency Models</a></li>
-
-<li class="toctree-l1"><a class="reference internal" href="LLM_grounded_Diffusion.html">LLM Grounded Diffusion</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DiT.html">DiT</a></li>
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="one-step-image-translation.html">One-Step Image Translation with Text-to-Image Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="LCM-LoRA.html">LCM-LoRA: A Universal Stable-Diffusion Acceleration Module</a></li>
-
-
-<li class="toctree-l1"><a class="reference internal" href="MimicBrush.html">MimicBrush: Zero-shot Image Editing with Reference Imitation</a></li>
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Video Generation</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="Make_A_Video.html">Make A Video</a></li>
-<li class="toctree-l1"><a class="reference internal" href="VideoLDM.html">VideoLDM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="AnimateDiff.html">AnimateDiff</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Animate_Anyone.html">Animate Anyone</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreaMoving.html">DreaMoving</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreamPose.html">DreamPose: Fashion Image-to-Video Synthesis via Stable Diffusion</a></li>
-
-
-
-
-
-
-
-
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">3D Generation</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="NeRF.html">NeRF : Representing Scenes as Neural Radiance Fields for View Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="3DGS.html">3D Gaussian Splatting for Real-Time Radiance Field Rendering</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Point_E.html">Point-E: A System for Generating 3D Point Clouds from Complex Prompts (Arxiv 2022)</a></li>
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="Shap-E.html">Shap-E</a></li>
-
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="DreamFusion.html"><strong>DreamFusion</strong></a></li>
-<li class="toctree-l1"><a class="reference internal" href="magic-3d.html">Magic3D</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreamBooth3D.html">Dream Booth 3D</a></li>
-
-
-
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Experiments</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="../experiments/js_exp.html">Synthetic Data with Stable Diffusion for Foliar Disease Classification</a></li>
-<li class="toctree-l1"><a class="reference internal" href="../experiments/swjo_exp.html">Training DreamBooth on Naver Webtoon Face Dataset</a></li>
-</ul>
-
-    </div>
-</nav></div>
-    </div>
-  
-  
-  <div class="sidebar-primary-items__end sidebar-primary__section">
-  </div>
-  
-  <div id="rtd-footer-container"></div>
-
-
-      </div>
-      
-      <main id="main-content" class="bd-main" role="main">
-        
-        
-
-<div class="sbt-scroll-pixel-helper"></div>
-
-          <div class="bd-content">
-            <div class="bd-article-container">
-              
-              <div class="bd-header-article d-print-none">
-<div class="header-article-items header-article__inner">
-  
-    <div class="header-article-items__start">
-      
-        <div class="header-article-item"><label class="sidebar-toggle primary-toggle btn btn-sm" for="__primary" title="Toggle primary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
-  <span class="fa-solid fa-bars"></span>
-</label></div>
-      
-    </div>
-  
-  
-    <div class="header-article-items__end">
-      
-        <div class="header-article-item">
-
-<div class="article-header-buttons">
-
-
-
-
-
-<div class="dropdown dropdown-source-buttons">
-  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Source repositories">
-    <i class="fab fa-github"></i>
-  </button>
-  <ul class="dropdown-menu">
-      
-      
-      
-      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion" target="_blank"
-   class="btn btn-sm btn-source-repository-button dropdown-item"
-   title="Source repository"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fab fa-github"></i>
-  </span>
-<span class="btn__text-container">Repository</span>
-</a>
-</li>
-      
-      
-      
-      
-      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion/issues/new?title=Issue%20on%20page%20%2Fdocs/review/gan.html&body=Your%20issue%20content%20here." target="_blank"
-   class="btn btn-sm btn-source-issues-button dropdown-item"
-   title="Open an issue"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-lightbulb"></i>
-  </span>
-<span class="btn__text-container">Open issue</span>
-</a>
-</li>
-      
-  </ul>
-</div>
-
-
-
-
-
-
-<div class="dropdown dropdown-download-buttons">
-  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Download this page">
-    <i class="fas fa-download"></i>
-  </button>
-  <ul class="dropdown-menu">
-      
-      
-      
-      <li><a href="../../_sources/docs/review/gan.md" target="_blank"
-   class="btn btn-sm btn-download-source-button dropdown-item"
-   title="Download source file"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-file"></i>
-  </span>
-<span class="btn__text-container">.md</span>
-</a>
-</li>
-      
-      
-      
-      
-      <li>
-<button onclick="window.print()"
-  class="btn btn-sm btn-download-pdf-button dropdown-item"
-  title="Print to PDF"
-  data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-file-pdf"></i>
-  </span>
-<span class="btn__text-container">.pdf</span>
-</button>
-</li>
-      
-  </ul>
-</div>
-
-
-
-
-<button onclick="toggleFullScreen()"
-  class="btn btn-sm btn-fullscreen-button"
-  title="Fullscreen mode"
-  data-bs-placement="bottom" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-expand"></i>
-  </span>
-
-</button>
-
-
-
-<script>
-document.write(`
-  <button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light"></i>
-    <i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark"></i>
-    <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"></i>
-  </button>
-`);
-</script>
-
-
-<script>
-document.write(`
-  <button class="btn btn-sm pst-navbar-icon search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <i class="fa-solid fa-magnifying-glass fa-lg"></i>
-  </button>
-`);
-</script>
-<label class="sidebar-toggle secondary-toggle btn btn-sm" for="__secondary"title="Toggle secondary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <span class="fa-solid fa-list"></span>
-</label>
-</div></div>
-      
-    </div>
-  
-</div>
-</div>
-              
-              
-
-<div id="jb-print-docs-body" class="onlyprint">
-    <h1>GAN</h1>
-    <!-- Table of contents -->
-    <div id="print-main-content">
-        <div id="jb-print-toc">
-            
-            <div>
-                <h2> Contents </h2>
-            </div>
-            <nav aria-label="Page">
-                <ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">Introduction</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#training-procedure">Training Procedure</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#theoretical-results">Theoretical Results</a></li>
-</ul>
-</li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#experiments">Experiments</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#summary">Summary</a></li>
-</ul>
-            </nav>
-        </div>
-    </div>
-</div>
-
-              
-                
-<div id="searchbox"></div>
-                <article class="bd-article">
-                  
-  <div class="admonition-information admonition">
-<p class="admonition-title">Information</p>
-<ul class="simple">
-<li><p><strong>Title:</strong> Generative Adversarial Networks (NIPS 2014)</p></li>
-<li><p><strong>Reference</strong></p>
-<ul>
-<li><p>Paper: <a class="reference external" href="https://arxiv.org/abs/1406.2661">https://arxiv.org/abs/1406.2661</a></p></li>
-<li><p>Code: <a class="github reference external" href="https://github.com/eriklindernoren/PyTorch-GAN">eriklindernoren/PyTorch-GAN</a></p></li>
-<li><p><a class="reference external" href="https://www.youtube.com/watch?v=cd-kj1ysqOc">Smart Design Lab &#64;KAIST | 딥러닝 Chp 3.4 GAN</a></p></li>
-</ul>
-</li>
-<li><p><strong>Author:</strong> Sangwoo Jo</p></li>
-<li><p><strong>Editor:</strong> Changhwan Lee</p></li>
-<li><p><strong>Last updated on Apr. 09, 2024</strong></p></li>
-</ul>
-</div>
-<section class="tex2jax_ignore mathjax_ignore" id="gan">
-<h1>GAN<a class="headerlink" href="#gan" title="Permalink to this heading">#</a></h1>
-<section id="introduction">
-<h2>Introduction<a class="headerlink" href="#introduction" title="Permalink to this heading">#</a></h2>
-<p>생성형 모델은 크게 생성하고자 하는 데이터의 explicit density 와 implicit density 를 계산하는 방식으로 나뉩니다. Explicit density 를 계산한다는 것은 데이터 분포를 명확하게 사전에 정의하고 모델을 학습하는 것을 의미합니다. 이때, 데이터의 분포를 직접적인 계산이 가능한 tractable density 로 추정하는 방법과 근사화시켜 approximate density 로 추정하는 방법으로 나뉩니다.</p>
-<ul class="simple">
-<li><p>Tractable density: AutoRegressive 하게 구하는 방식이 있습니다. AutoRegressive 모델을 사용하여 이전 단계의 데이터를 활용하여 모델을 학습하고, 대표적인 모델로는 PixelCNN, PixelRNN 등이 있습니다.</p></li>
-<li><p>Approximate density: 대표적으로 score-based model, Boltzmann Machine 등이 있습니다.</p>
-<ul>
-<li><p>Score-based model - 모델 파라미터의 gradient 가 아닌 데이터의 gradient 활용하여 모델을 학습하는 방식으로, energy-based model 에서 MLE 에 사용하는 확률분포를 정규화하는 term 을 따로 계산하지 않아도 되는 장점이 있습니다.</p></li>
-<li><p>Boltzmann Machine : 완전그래프 구조로 학습하는 생성형 모델입니다. 모델을 학습하는 과정에서 확률 분포의 학습이 어려워(계산량이 많아서 어려움) Markov chain 을 활용하여 학습합니다. 또한, 완전그래프이기 때문에 노드가 늘어날수록 간선, 파라미터 등이 급증하는 문제가 있어 Restricted Boltzmann Machine(RBM) 이 제안되기도 했습니다.</p></li>
-</ul>
-</li>
-</ul>
-<p>반면에 데이터의 분포를 명확히 정의하지 않고 implicit 하게 모델을 학습하는 방식도 존재합니다. 대표적으로는 Ian Goodfellow 가 2014년에 발표한 GAN 모델이 있습니다. GAN 은 최근에 Diffusion Model 이 소개되기 전까지 몇 년 동안 이미지 생성 분야에서 대표적인 모델로 자리잡았었습니다. GAN 은 VAE 와 달리 marginal likelihood <span class="math notranslate nohighlight">\(p_{\theta}(x)\)</span> 를 직접 구하지 않고, Adversarial Process 를 통해 implicit 하게 샘플링을 해서 분포를 구하게 됩니다.</p>
-<figure class="align-default" id="id1">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/gan_01.png"><img alt="gan_01" class="bg-primary mb-1" src="../../_images/gan_01.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 8 </span><span class="caption-text">Taxonomy of Generative Models</span><a class="headerlink" href="#id1" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>아래 그림과 같이 GAN 은 크게 잠재변수 <span class="math notranslate nohighlight">\(z\)</span> 로부터 가짜 데이터를 생성하는 Generator 와 그로부터 생성된 데이터와 실제 training 데이터를 구분하는 Discriminator 로 구성이 되어 있습니다. 다시 말해서 Discriminator 는 실제 데이터가 들어오면 1, 그리고 가짜로 생성된 데이터가 들어오면 0 을 출력하는 binary classification task 를 진행합니다.</p>
-<figure class="align-default" id="id2">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/gan_03.png"><img alt="gan_03" class="bg-primary mb-1" src="../../_images/gan_03.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 9 </span><span class="caption-text">Generative Adversarial Network(GAN) Architecture</span><a class="headerlink" href="#id2" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>Generator 와 Discriminator 구현 코드도 같이 살펴보겠습니다.</p>
-<ul>
-<li><p><strong>Generator 구현 code</strong></p>
-<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="k">class</span> <span class="nc">Generator</span><span class="p">(</span><span class="n">nn</span><span class="o">.</span><span class="n">Module</span><span class="p">):</span>
-    <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
-        <span class="nb">super</span><span class="p">(</span><span class="n">Generator</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span>
-
-        <span class="k">def</span> <span class="nf">block</span><span class="p">(</span><span class="n">in_feat</span><span class="p">,</span> <span class="n">out_feat</span><span class="p">,</span> <span class="n">normalize</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span>
-            <span class="n">layers</span> <span class="o">=</span> <span class="p">[</span><span class="n">nn</span><span class="o">.</span><span class="n">Linear</span><span class="p">(</span><span class="n">in_feat</span><span class="p">,</span> <span class="n">out_feat</span><span class="p">)]</span>
-            <span class="k">if</span> <span class="n">normalize</span><span class="p">:</span>
-                <span class="n">layers</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">nn</span><span class="o">.</span><span class="n">BatchNorm1d</span><span class="p">(</span><span class="n">out_feat</span><span class="p">,</span> <span class="mf">0.8</span><span class="p">))</span>
-            <span class="n">layers</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">nn</span><span class="o">.</span><span class="n">LeakyReLU</span><span class="p">(</span><span class="mf">0.2</span><span class="p">,</span> <span class="n">inplace</span><span class="o">=</span><span class="kc">True</span><span class="p">))</span>
-            <span class="k">return</span> <span class="n">layers</span>
-
-        <span class="bp">self</span><span class="o">.</span><span class="n">model</span> <span class="o">=</span> <span class="n">nn</span><span class="o">.</span><span class="n">Sequential</span><span class="p">(</span>
-            <span class="o">*</span><span class="n">block</span><span class="p">(</span><span class="n">opt</span><span class="o">.</span><span class="n">latent_dim</span><span class="p">,</span> <span class="mi">128</span><span class="p">,</span> <span class="n">normalize</span><span class="o">=</span><span class="kc">False</span><span class="p">),</span>
-            <span class="o">*</span><span class="n">block</span><span class="p">(</span><span class="mi">128</span><span class="p">,</span> <span class="mi">256</span><span class="p">),</span>
-            <span class="o">*</span><span class="n">block</span><span class="p">(</span><span class="mi">256</span><span class="p">,</span> <span class="mi">512</span><span class="p">),</span>
-            <span class="o">*</span><span class="n">block</span><span class="p">(</span><span class="mi">512</span><span class="p">,</span> <span class="mi">1024</span><span class="p">),</span>
-            <span class="n">nn</span><span class="o">.</span><span class="n">Linear</span><span class="p">(</span><span class="mi">1024</span><span class="p">,</span> <span class="nb">int</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">prod</span><span class="p">(</span><span class="n">img_shape</span><span class="p">))),</span>
-            <span class="n">nn</span><span class="o">.</span><span class="n">Tanh</span><span class="p">()</span>
-        <span class="p">)</span>
-
-    <span class="k">def</span> <span class="nf">forward</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">z</span><span class="p">):</span>
-        <span class="n">img</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">model</span><span class="p">(</span><span class="n">z</span><span class="p">)</span>
-        <span class="n">img</span> <span class="o">=</span> <span class="n">img</span><span class="o">.</span><span class="n">view</span><span class="p">(</span><span class="n">img</span><span class="o">.</span><span class="n">size</span><span class="p">(</span><span class="mi">0</span><span class="p">),</span> <span class="o">*</span><span class="n">img_shape</span><span class="p">)</span>
-        <span class="k">return</span> <span class="n">img</span>
-</pre></div>
-</div>
-</li>
-<li><p><strong>Discriminator 구현 code</strong></p>
-<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="k">class</span> <span class="nc">Discriminator</span><span class="p">(</span><span class="n">nn</span><span class="o">.</span><span class="n">Module</span><span class="p">):</span>
-    <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
-        <span class="nb">super</span><span class="p">(</span><span class="n">Discriminator</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span>
-
-        <span class="bp">self</span><span class="o">.</span><span class="n">model</span> <span class="o">=</span> <span class="n">nn</span><span class="o">.</span><span class="n">Sequential</span><span class="p">(</span>
-            <span class="n">nn</span><span class="o">.</span><span class="n">Linear</span><span class="p">(</span><span class="nb">int</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">prod</span><span class="p">(</span><span class="n">img_shape</span><span class="p">)),</span> <span class="mi">512</span><span class="p">),</span>
-            <span class="n">nn</span><span class="o">.</span><span class="n">LeakyReLU</span><span class="p">(</span><span class="mf">0.2</span><span class="p">,</span> <span class="n">inplace</span><span class="o">=</span><span class="kc">True</span><span class="p">),</span>
-            <span class="n">nn</span><span class="o">.</span><span class="n">Linear</span><span class="p">(</span><span class="mi">512</span><span class="p">,</span> <span class="mi">256</span><span class="p">),</span>
-            <span class="n">nn</span><span class="o">.</span><span class="n">LeakyReLU</span><span class="p">(</span><span class="mf">0.2</span><span class="p">,</span> <span class="n">inplace</span><span class="o">=</span><span class="kc">True</span><span class="p">),</span>
-            <span class="n">nn</span><span class="o">.</span><span class="n">Linear</span><span class="p">(</span><span class="mi">256</span><span class="p">,</span> <span class="mi">1</span><span class="p">),</span>
-            <span class="n">nn</span><span class="o">.</span><span class="n">Sigmoid</span><span class="p">(),</span>
-        <span class="p">)</span>
-
-    <span class="k">def</span> <span class="nf">forward</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">img</span><span class="p">):</span>
-        <span class="n">img_flat</span> <span class="o">=</span> <span class="n">img</span><span class="o">.</span><span class="n">view</span><span class="p">(</span><span class="n">img</span><span class="o">.</span><span class="n">size</span><span class="p">(</span><span class="mi">0</span><span class="p">),</span> <span class="o">-</span><span class="mi">1</span><span class="p">)</span>
-        <span class="n">validity</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">model</span><span class="p">(</span><span class="n">img_flat</span><span class="p">)</span>
-
-        <span class="k">return</span> <span class="n">validity</span>
-</pre></div>
-</div>
-</li>
-</ul>
-</section>
-<section id="training-procedure">
-<h2>Training Procedure<a class="headerlink" href="#training-procedure" title="Permalink to this heading">#</a></h2>
-<p>GAN 을 학습할 시, <strong>D를 먼저 최적화하는 k 단계</strong>와 <strong>G를 최적화하는 한 단계를 번갈아 수행</strong>합니다. 그리고 이때 쓰이는 손실함수(loss function)은 다음과 같습니다.</p>
-<div class="math notranslate nohighlight">
-\[
-\min_G \max_D V(D,G) = \mathbb{E}\_{x \sim p_{data}(x)}[logD(x)] + \mathbb{E}\_{z \sim p_z(z)}[\log(1-D(G(z))]
-\]</div>
-<p>논문에서 제시한 학습 알고리즘과 실제 implementation code 를 비교해보겠습니다.</p>
-<figure class="align-default" id="id3">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/gan_02.png"><img alt="gan_02" class="bg-primary mb-1" src="../../_images/gan_02.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 10 </span><span class="caption-text">Generative Adversarial Network(GAN) Training Procedure</span><a class="headerlink" href="#id3" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul>
-<li><p><strong>GAN 학습 code</strong></p>
-<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="c1"># ----------</span>
-<span class="c1">#  Training</span>
-<span class="c1"># ----------</span>
-
-<span class="k">for</span> <span class="n">epoch</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">opt</span><span class="o">.</span><span class="n">n_epochs</span><span class="p">):</span>
-    <span class="k">for</span> <span class="n">i</span><span class="p">,</span> <span class="p">(</span><span class="n">imgs</span><span class="p">,</span> <span class="n">_</span><span class="p">)</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">dataloader</span><span class="p">):</span>
-
-        <span class="c1"># Adversarial ground truths</span>
-        <span class="n">valid</span> <span class="o">=</span> <span class="n">Variable</span><span class="p">(</span><span class="n">Tensor</span><span class="p">(</span><span class="n">imgs</span><span class="o">.</span><span class="n">size</span><span class="p">(</span><span class="mi">0</span><span class="p">),</span> <span class="mi">1</span><span class="p">)</span><span class="o">.</span><span class="n">fill_</span><span class="p">(</span><span class="mf">1.0</span><span class="p">),</span> <span class="n">requires_grad</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span>
-        <span class="n">fake</span> <span class="o">=</span> <span class="n">Variable</span><span class="p">(</span><span class="n">Tensor</span><span class="p">(</span><span class="n">imgs</span><span class="o">.</span><span class="n">size</span><span class="p">(</span><span class="mi">0</span><span class="p">),</span> <span class="mi">1</span><span class="p">)</span><span class="o">.</span><span class="n">fill_</span><span class="p">(</span><span class="mf">0.0</span><span class="p">),</span> <span class="n">requires_grad</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span>
-
-        <span class="c1"># Configure input</span>
-        <span class="n">real_imgs</span> <span class="o">=</span> <span class="n">Variable</span><span class="p">(</span><span class="n">imgs</span><span class="o">.</span><span class="n">type</span><span class="p">(</span><span class="n">Tensor</span><span class="p">))</span>
-
-        <span class="c1"># -----------------</span>
-        <span class="c1">#  Train Generator</span>
-        <span class="c1"># -----------------</span>
-
-        <span class="n">optimizer_G</span><span class="o">.</span><span class="n">zero_grad</span><span class="p">()</span>
-
-        <span class="c1"># Sample noise as generator input</span>
-        <span class="n">z</span> <span class="o">=</span> <span class="n">Variable</span><span class="p">(</span><span class="n">Tensor</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">normal</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="p">(</span><span class="n">imgs</span><span class="o">.</span><span class="n">shape</span><span class="p">[</span><span class="mi">0</span><span class="p">],</span> <span class="n">opt</span><span class="o">.</span><span class="n">latent_dim</span><span class="p">))))</span>
-
-        <span class="c1"># Generate a batch of images</span>
-        <span class="n">gen_imgs</span> <span class="o">=</span> <span class="n">generator</span><span class="p">(</span><span class="n">z</span><span class="p">)</span>
-
-        <span class="c1"># Loss measures generator&#39;s ability to fool the discriminator</span>
-        <span class="n">g_loss</span> <span class="o">=</span> <span class="n">adversarial_loss</span><span class="p">(</span><span class="n">discriminator</span><span class="p">(</span><span class="n">gen_imgs</span><span class="p">),</span> <span class="n">valid</span><span class="p">)</span>
-
-        <span class="n">g_loss</span><span class="o">.</span><span class="n">backward</span><span class="p">()</span>
-        <span class="n">optimizer_G</span><span class="o">.</span><span class="n">step</span><span class="p">()</span>
-
-        <span class="c1"># ---------------------</span>
-        <span class="c1">#  Train Discriminator</span>
-        <span class="c1"># ---------------------</span>
-
-        <span class="n">optimizer_D</span><span class="o">.</span><span class="n">zero_grad</span><span class="p">()</span>
-
-        <span class="c1"># Measure discriminator&#39;s ability to classify real from generated samples</span>
-        <span class="n">real_loss</span> <span class="o">=</span> <span class="n">adversarial_loss</span><span class="p">(</span><span class="n">discriminator</span><span class="p">(</span><span class="n">real_imgs</span><span class="p">),</span> <span class="n">valid</span><span class="p">)</span>
-        <span class="n">fake_loss</span> <span class="o">=</span> <span class="n">adversarial_loss</span><span class="p">(</span><span class="n">discriminator</span><span class="p">(</span><span class="n">gen_imgs</span><span class="o">.</span><span class="n">detach</span><span class="p">()),</span> <span class="n">fake</span><span class="p">)</span>
-        <span class="n">d_loss</span> <span class="o">=</span> <span class="p">(</span><span class="n">real_loss</span> <span class="o">+</span> <span class="n">fake_loss</span><span class="p">)</span> <span class="o">/</span> <span class="mi">2</span>
-
-        <span class="n">d_loss</span><span class="o">.</span><span class="n">backward</span><span class="p">()</span>
-        <span class="n">optimizer_D</span><span class="o">.</span><span class="n">step</span><span class="p">()</span>
-
-        <span class="nb">print</span><span class="p">(</span>
-            <span class="s2">&quot;[Epoch </span><span class="si">%d</span><span class="s2">/</span><span class="si">%d</span><span class="s2">] [Batch </span><span class="si">%d</span><span class="s2">/</span><span class="si">%d</span><span class="s2">] [D loss: </span><span class="si">%f</span><span class="s2">] [G loss: </span><span class="si">%f</span><span class="s2">]&quot;</span>
-            <span class="o">%</span> <span class="p">(</span><span class="n">epoch</span><span class="p">,</span> <span class="n">opt</span><span class="o">.</span><span class="n">n_epochs</span><span class="p">,</span> <span class="n">i</span><span class="p">,</span> <span class="nb">len</span><span class="p">(</span><span class="n">dataloader</span><span class="p">),</span> <span class="n">d_loss</span><span class="o">.</span><span class="n">item</span><span class="p">(),</span> <span class="n">g_loss</span><span class="o">.</span><span class="n">item</span><span class="p">())</span>
-        <span class="p">)</span>
-
-        <span class="n">batches_done</span> <span class="o">=</span> <span class="n">epoch</span> <span class="o">*</span> <span class="nb">len</span><span class="p">(</span><span class="n">dataloader</span><span class="p">)</span> <span class="o">+</span> <span class="n">i</span>
-        <span class="k">if</span> <span class="n">batches_done</span> <span class="o">%</span> <span class="n">opt</span><span class="o">.</span><span class="n">sample_interval</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span>
-            <span class="n">save_image</span><span class="p">(</span><span class="n">gen_imgs</span><span class="o">.</span><span class="n">data</span><span class="p">[:</span><span class="mi">25</span><span class="p">],</span> <span class="s2">&quot;images/</span><span class="si">%d</span><span class="s2">.png&quot;</span> <span class="o">%</span> <span class="n">batches_done</span><span class="p">,</span> <span class="n">nrow</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">normalize</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
-</pre></div>
-</div>
-</li>
-</ul>
-<p>이렇게 Discriminator 와 Generator 는 각각 <span class="math notranslate nohighlight">\(V(D,G)\)</span> 가 최대화하고 최소화하는 방향으로 stochastic gradient descent 를 진행하게 됩니다. 하지만 아래 그림처럼 실제로 Generator를 학습할 때, 초반에 <span class="math notranslate nohighlight">\(D(G(z)) \approx 0\)</span> 일 경우 학습하지 못하는 상황이 발생합니다. 이 때, <span class="math notranslate nohighlight">\(\log(1-D(G(z))\)</span> 를 최소화하지 않고 <span class="math notranslate nohighlight">\(\log(D(G(z))\)</span> 를 최대화하는 방향으로 Generator 를 학습하는 기법도 있습니다.</p>
-<figure class="align-default" id="id4">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/gan_04.png"><img alt="gan_04" class="bg-primary mb-1" src="../../_images/gan_04.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 11 </span><span class="caption-text">Alternative to Vanishing Gradient when Training the Generator</span><a class="headerlink" href="#id4" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>이렇게 학습함으로써 최적화된 solution 에서는 Generator 가 training 데이터 분포를 완벽히 복원하고 Discriminator 는 binary classification 확률을 언제나 1/2 로 내뱉게 됩니다.</p>
-<section id="theoretical-results">
-<h3>Theoretical Results<a class="headerlink" href="#theoretical-results" title="Permalink to this heading">#</a></h3>
-<p><strong>Proposition 1. 고정된 Generator 에 대해서, 최적화된 Discriminator 는 다음과 같습니다.</strong></p>
-<div class="math notranslate nohighlight">
-\[
-D_{G}^*(x) = \frac{p_{data}(x)}{p_{data}(x) + p_g(x)}
-\]</div>
-<p>이를 증명하자면, Discriminator 에 대한 손실함수를 다음과 같이 쓸 수 있고 <span class="math notranslate nohighlight">\(D = D_{G}^*(x)\)</span> 가 이를 최대화하는 solution 입니다.</p>
-<div class="math notranslate nohighlight">
-\[
-V(D,G) = \int_x p_{data}(x) \log(D(x)) dx+ \int_z p_{z}(z) \log(1-D(g(z)) dz 
-\]</div>
-<div class="math notranslate nohighlight">
-\[
-= \int_x p_{data}(x) \log(D(x)) + p_{g}(x) \log(1-D(x)) dx 
-\]</div>
-<p><strong>Proposition 2. 최적화된 Discriminator 에 대해 <span class="math notranslate nohighlight">\(\max_D V(D,G)\)</span> 를 최소화하는 Generator 는 <span class="math notranslate nohighlight">\(p_g = p_{data}\)</span> 일때 성립하고 이때 <span class="math notranslate nohighlight">\(D = D_{G}^*(x) = 1/2\)</span> 입니다.</strong></p>
-<p>이를 증명하자면, 최적화된 Discriminator 에 대한 손실함수는 다음과 같고</p>
-<div class="math notranslate nohighlight">
-\[
-V(D^{\ast},G) = \mathbb{E}\_{x \sim p_{data}(x)} [ \log D^{\ast}(x) ] + \mathbb{E}\_{x \sim p_g(x)} [ \log(1-D^{\ast}(x) ]
-\]</div>
-<div class="math notranslate nohighlight">
-\[
-= \int_x p_{data}(x) \log(\frac{p_{data}(x)}{p_{data}(x) + p_g(x)}) + \int_x p_{g}(x) \log(\frac{p_{g}(x)}{p_{data}(x) + p_g(x)})\ dx 
-\]</div>
-<div class="math notranslate nohighlight">
-\[
-= -\log(4)\ + KL(p_{data}(x)\ ||\ \frac{p_{data}+p_{g}}{2}) +  KL(p_{g}(x)\ ||\ \frac{p_{data}+p_{g}}{2}) 
-\]</div>
-<p><span class="math notranslate nohighlight">\(KL(p_{data}(x)\ ||\ \frac{p_{data}+p_{g}}{2}) +  KL(p_{g}(x)\ ||\ \frac{p_{data}+p_{g}}{2}) = 2\ \cdot\ JSD(p_{data}\ ||\ p_{g})\)</span> 의 최솟값은 0 이고 이는 <span class="math notranslate nohighlight">\(p_g = p_{data}\)</span> 일때 성립합니다.</p>
-</section>
-</section>
-<section id="experiments">
-<h2>Experiments<a class="headerlink" href="#experiments" title="Permalink to this heading">#</a></h2>
-<p>논문에서 MNIST, the Toronto Face Database(TFD), 그리고 CIFAR-10 dataset 로 모델 실험 및 성능 평가했습니다. 평가시에는 <span class="math notranslate nohighlight">\(p_g\)</span> 로부터 Parzen density estimation을 거쳐 계산한 log likelihood estimate 로 모델 성능 평가를 진행했습니다. 아래 표를 보면 실험 방법 중 GAN이 제일 결과가 좋은 것을 볼 수 있습니다.</p>
-<figure class="align-default" id="id5">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/gan_05.png"><img alt="gan_05" class="bg-primary mb-1" src="../../_images/gan_05.png" style="width: 400px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 12 </span><span class="caption-text">Experimental Results</span><a class="headerlink" href="#id5" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</section>
-<section id="summary">
-<h2>Summary<a class="headerlink" href="#summary" title="Permalink to this heading">#</a></h2>
-<p>VAE는 새로운 데이터를 잘 생성하지만 생성된 이미지가 흐릿하다는 단점을 지니고 있습니다. 반면에 GAN 은 high quality image 를 잘 생성하지만 unstable 한 convergence 를 가지고 있습니다. 그래서 실제로 VAE 는 Encoder 를 활용한 차원축소로 많이 활용되고 이미지 데이터를 생성하는데는 GAN 이 많이 활용되었다고 합니다.</p>
-</section>
-</section>
-
-    <script type="text/x-thebe-config">
-    {
-        requestKernel: true,
-        binderOptions: {
-            repo: "binder-examples/jupyter-stacks-datascience",
-            ref: "master",
-        },
-        codeMirrorConfig: {
-            theme: "abcdef",
-            mode: "python"
-        },
-        kernelOptions: {
-            name: "python3",
-            path: "./docs/review"
-        },
-        predefinedOutput: true
-    }
-    </script>
-    <script>kernelName = 'python3'</script>
-
-                </article>
-              
-
-              
-              
-              
-              
-                <footer class="prev-next-footer d-print-none">
-                  
-<div class="prev-next-area">
-    <a class="left-prev"
-       href="vae.html"
-       title="previous page">
-      <i class="fa-solid fa-angle-left"></i>
-      <div class="prev-next-info">
-        <p class="prev-next-subtitle">previous</p>
-        <p class="prev-next-title">VAE</p>
-      </div>
-    </a>
-    <a class="right-next"
-       href="DDPM.html"
-       title="next page">
-      <div class="prev-next-info">
-        <p class="prev-next-subtitle">next</p>
-        <p class="prev-next-title">DDPM</p>
-      </div>
-      <i class="fa-solid fa-angle-right"></i>
-    </a>
-</div>
-                </footer>
-              
-            </div>
-            
-            
-              
-                <div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
-
-
-  <div class="sidebar-secondary-item">
-  <div class="page-toc tocsection onthispage">
-    <i class="fa-solid fa-list"></i> Contents
-  </div>
-  <nav class="bd-toc-nav page-toc">
-    <ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">Introduction</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#training-procedure">Training Procedure</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#theoretical-results">Theoretical Results</a></li>
-</ul>
-</li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#experiments">Experiments</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#summary">Summary</a></li>
-</ul>
-  </nav></div>
-
-</div></div>
-              
-            
-          </div>
-          <footer class="bd-footer-content">
-            
-<div class="bd-footer-content__inner container">
-  
-  <div class="footer-item">
-    
-<p class="component-author">
-By PseudoLab
-</p>
-
-  </div>
-  
-  <div class="footer-item">
-    
-
-  <p class="copyright">
-    
-      © Copyright 2022.
-      <br/>
-    
-  </p>
-
-  </div>
-  
-  <div class="footer-item">
-    
-  </div>
-  
-  <div class="footer-item">
-    
-  </div>
-  
-</div>
-          </footer>
-        
-
-      </main>
-    </div>
-  </div>
-  
-  <!-- Scripts loaded after <body> so the DOM is not blocked -->
-  <script src="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b"></script>
-<script src="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b"></script>
-
-  <footer class="bd-footer">
-  </footer>
-  </body>
+
+
+<!DOCTYPE html>
+
+
+<html lang="en" data-content_root="" >
+
+  <head>
+    <meta charset="utf-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.17.1: http://docutils.sourceforge.net/" />
+
+    <title>GAN &#8212; Text-to-Image Generation-feat-Diffusion</title>
+  
+  
+  
+  <script data-cfasync="false">
+    document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
+    document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
+  </script>
+  
+  <!-- Loaded before other Sphinx assets -->
+  <link href="../../_static/styles/theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+<link href="../../_static/styles/bootstrap.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+<link href="../../_static/styles/pydata-sphinx-theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+
+  
+  <link href="../../_static/vendor/fontawesome/6.5.2/css/all.min.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+  <link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.woff2" />
+<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.woff2" />
+<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.woff2" />
+
+    <link rel="stylesheet" type="text/css" href="../../_static/pygments.css" />
+    <link rel="stylesheet" href="../../_static/styles/sphinx-book-theme.css?digest=14f4ca6b54d191a8c7657f6c759bf11a5fb86285" type="text/css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/togglebutton.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/mystnb.4510f1fc1dee50b3e5859aac5469c37c29e427902b24a333a5f9fcb2f0b3ac41.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/sphinx-thebe.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/design-style.4045f2051d55cab465a707391d5b2007.min.css" />
+  
+  <!-- Pre-loaded scripts that we'll load fully later -->
+  <link rel="preload" as="script" href="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b" />
+<link rel="preload" as="script" href="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b" />
+  <script src="../../_static/vendor/fontawesome/6.5.2/js/all.min.js?digest=dfe6caa3a7d634c4db9b"></script>
+
+    <script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
+    <script src="../../_static/jquery.js"></script>
+    <script src="../../_static/underscore.js"></script>
+    <script src="../../_static/_sphinx_javascript_frameworks_compat.js"></script>
+    <script src="../../_static/doctools.js"></script>
+    <script src="../../_static/clipboard.min.js"></script>
+    <script src="../../_static/copybutton.js"></script>
+    <script src="../../_static/scripts/sphinx-book-theme.js?digest=5a5c038af52cf7bc1a1ec88eea08e6366ee68824"></script>
+    <script>let toggleHintShow = 'Click to show';</script>
+    <script>let toggleHintHide = 'Click to hide';</script>
+    <script>let toggleOpenOnPrint = 'true';</script>
+    <script src="../../_static/togglebutton.js"></script>
+    <script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
+    <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
+    <script src="../../_static/design-tabs.js"></script>
+    <script>const THEBE_JS_URL = "https://unpkg.com/thebe@0.8.2/lib/index.js"
+const thebe_selector = ".thebe,.cell"
+const thebe_selector_input = "pre"
+const thebe_selector_output = ".output, .cell_output"
+</script>
+    <script async="async" src="../../_static/sphinx-thebe.js"></script>
+    <script>window.MathJax = {"options": {"processHtmlClass": "tex2jax_process|mathjax_process|math|output_area"}}</script>
+    <script defer="defer" src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
+    <script>DOCUMENTATION_OPTIONS.pagename = 'docs/review/gan';</script>
+    <link rel="shortcut icon" href="../../_static/PseudoLab_logo.png"/>
+    <link rel="index" title="Index" href="../../genindex.html" />
+    <link rel="search" title="Search" href="../../search.html" />
+    <link rel="next" title="DDPM" href="DDPM.html" />
+    <link rel="prev" title="VAE" href="vae.html" />
+  <meta name="viewport" content="width=device-width, initial-scale=1"/>
+  <meta name="docsearch:language" content="en"/>
+  </head>
+  
+  
+  <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
+
+  
+  
+  <div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
+  
+  <div id="pst-scroll-pixel-helper"></div>
+  
+  <button type="button" class="btn rounded-pill" id="pst-back-to-top">
+    <i class="fa-solid fa-arrow-up"></i>Back to top</button>
+
+  
+  <input type="checkbox"
+          class="sidebar-toggle"
+          id="pst-primary-sidebar-checkbox"/>
+  <label class="overlay overlay-primary" for="pst-primary-sidebar-checkbox"></label>
+  
+  <input type="checkbox"
+          class="sidebar-toggle"
+          id="pst-secondary-sidebar-checkbox"/>
+  <label class="overlay overlay-secondary" for="pst-secondary-sidebar-checkbox"></label>
+  
+  <div class="search-button__wrapper">
+    <div class="search-button__overlay"></div>
+    <div class="search-button__search-container">
+<form class="bd-search d-flex align-items-center"
+      action="../../search.html"
+      method="get">
+  <i class="fa-solid fa-magnifying-glass"></i>
+  <input type="search"
+         class="form-control"
+         name="q"
+         id="search-input"
+         placeholder="Search this book..."
+         aria-label="Search this book..."
+         autocomplete="off"
+         autocorrect="off"
+         autocapitalize="off"
+         spellcheck="false"/>
+  <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
+</form></div>
+  </div>
+
+  <div class="pst-async-banner-revealer d-none">
+  <aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
+</div>
+
+  
+    <header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
+    </header>
+  
+
+  <div class="bd-container">
+    <div class="bd-container__inner bd-page-width">
+      
+      
+      
+      <div class="bd-sidebar-primary bd-sidebar">
+        
+
+  
+  <div class="sidebar-header-items sidebar-primary__section">
+    
+    
+    
+    
+  </div>
+  
+    <div class="sidebar-primary-items__start sidebar-primary__section">
+        <div class="sidebar-primary-item">
+
+  
+    
+  
+
+<a class="navbar-brand logo" href="../../intro.html">
+  
+  
+  
+  
+  
+    
+    
+      
+    
+    
+    <img src="../../_static/PseudoLab_logo.png" class="logo__image only-light" alt="Text-to-Image Generation-feat-Diffusion - Home"/>
+    <script>document.write(`<img src="../../_static/PseudoLab_logo.png" class="logo__image only-dark" alt="Text-to-Image Generation-feat-Diffusion - Home"/>`);</script>
+  
+  
+</a></div>
+        <div class="sidebar-primary-item"><nav class="bd-links" id="bd-docs-nav" aria-label="Main">
+    <div class="bd-toc-item navbar-nav active">
+        
+        <ul class="nav bd-sidenav bd-sidenav__home-link">
+            <li class="toctree-l1">
+                <a class="reference internal" href="../../intro.html">
+                    Welcome to PseudoDiffusers!!
+                </a>
+            </li>
+        </ul>
+        <p aria-level="2" class="caption" role="heading"><span class="caption-text">Preliminary Works</span></p>
+<ul class="current nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="vae.html">VAE</a></li>
+<li class="toctree-l1 current active"><a class="current reference internal" href="#">GAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DDPM.html">DDPM</a></li>
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DDIM.html">DDIM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="A_Study_on_the_Evaluation_of_Generative_Models.html">A Study on the Evaluation of Generative Models</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Image Generation</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="cycleGAN.html">CycleGAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="StyleGAN.html">StyleGAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="diffusion_beats_GANs.html">Diffusion Models Beat GANs on Image Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="dalle.html">DALL-E</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DALLE2.html">DALL-E 2</a></li>
+<li class="toctree-l1"><a class="reference internal" href="dreambooth.html">DreamBooth</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ControlNet.html">ControlNet</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Latent_Diffusion_Model.html">Introduction</a></li>
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Textual_Inversion.html">Textual Inversion</a></li>
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="CustomDiffusion.html">Custom Diffusion</a></li>
+<li class="toctree-l1"><a class="reference internal" href="LoRA.html">LoRA</a></li>
+
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="I-DDPM.html">I-DDPM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="StyO.html">StyO</a></li>
+<li class="toctree-l1"><a class="reference internal" href="imagen.html">Imagen</a></li>
+<li class="toctree-l1"><a class="reference internal" href="imagen_editor.html">Imagen Editor</a></li>
+<li class="toctree-l1"><a class="reference internal" href="SDEdit.html">SDEdit</a></li>
+<li class="toctree-l1"><a class="reference internal" href="SDXL.html">SDXL</a></li>
+<li class="toctree-l1"><a class="reference internal" href="t2i_adapter.html">T2I-Adapter</a></li>
+<li class="toctree-l1"><a class="reference internal" href="IP_Adapter.html">IP-Adapter</a></li>
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="HyperDreamBooth.html">HyperDreamBooth</a></li>
+<li class="toctree-l1"><a class="reference internal" href="CM3leon.html">CM3leon</a></li>
+
+<li class="toctree-l1"><a class="reference internal" href="Synthetic_Data_from_Diffusion_Models_Improves_ImageNet_Classification.html">Synthetic Data from Diffusion Models Improves ImageNet Classification</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="GLIDE.html">GLIDE</a></li>
+<li class="toctree-l1"><a class="reference internal" href="BBDM.html">BBDM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Your_Diffusion_Model_is_Secretly_a_Zero_Shot_Classifier.html">Your Diffusion Model is Secretly a Zero-Shot Classifier</a></li>
+<li class="toctree-l1"><a class="reference internal" href="progressive_distillation.html">Progressive Distillation for Fast Sampling of Diffusion Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ConceptLab.html">ConceptLab</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Diffusion_models_already_have_a_Semantic_Latent_Space.html">Diffusion Models already have a Semantic Latent Space</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Muse.html">Muse</a></li>
+
+
+<li class="toctree-l1"><a class="reference internal" href="GIGAGAN.html">Scaling up GANs for Text-to-Image Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="consistency_models.html">Consistency Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="latent_consistency_models.html">Latent Consistency Models</a></li>
+
+<li class="toctree-l1"><a class="reference internal" href="LLM_grounded_Diffusion.html">LLM Grounded Diffusion</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DiT.html">DiT</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="one-step-image-translation.html">One-Step Image Translation with Text-to-Image Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="LCM-LoRA.html">LCM-LoRA: A Universal Stable-Diffusion Acceleration Module</a></li>
+
+
+<li class="toctree-l1"><a class="reference internal" href="MimicBrush.html">MimicBrush: Zero-shot Image Editing with Reference Imitation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="one_step_diffusion_with_distribution_matching_distillation.html">One-step Diffusion with Distribution Matching Distillation</a></li>
+
+
+
+
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Video Generation</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="Make_A_Video.html">Make A Video</a></li>
+<li class="toctree-l1"><a class="reference internal" href="VideoLDM.html">VideoLDM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="AnimateDiff.html">AnimateDiff</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Animate_Anyone.html">Animate Anyone</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreaMoving.html">DreaMoving</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreamPose.html">DreamPose: Fashion Image-to-Video Synthesis via Stable Diffusion</a></li>
+
+
+
+
+
+
+
+
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">3D Generation</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="NeRF.html">NeRF : Representing Scenes as Neural Radiance Fields for View Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="3DGS.html">3D Gaussian Splatting for Real-Time Radiance Field Rendering</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Point_E.html">Point-E: A System for Generating 3D Point Clouds from Complex Prompts (Arxiv 2022)</a></li>
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Shap-E.html">Shap-E</a></li>
+
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DreamFusion.html"><strong>DreamFusion</strong></a></li>
+<li class="toctree-l1"><a class="reference internal" href="magic-3d.html">Magic3D</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreamBooth3D.html">Dream Booth 3D</a></li>
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="zero123plus.html">Zero123++</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ProlificDreamer.html">ProlificDreamer: High-Fidelity and Diverse Text-to-3D Generation with Variational Score Distillation</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DreamGaussian.html">DreamGaussian</a></li>
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Coin3D.html">Coin3D</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Experiments</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="../experiments/js_exp.html">Synthetic Data with Stable Diffusion for Foliar Disease Classification</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../experiments/swjo_exp.html">Training DreamBooth on Naver Webtoon Face Dataset</a></li>
+</ul>
+
+    </div>
+</nav></div>
+    </div>
+  
+  
+  <div class="sidebar-primary-items__end sidebar-primary__section">
+  </div>
+  
+  <div id="rtd-footer-container"></div>
+
+
+      </div>
+      
+      <main id="main-content" class="bd-main" role="main">
+        
+        
+
+<div class="sbt-scroll-pixel-helper"></div>
+
+          <div class="bd-content">
+            <div class="bd-article-container">
+              
+              <div class="bd-header-article d-print-none">
+<div class="header-article-items header-article__inner">
+  
+    <div class="header-article-items__start">
+      
+        <div class="header-article-item"><label class="sidebar-toggle primary-toggle btn btn-sm" for="__primary" title="Toggle primary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
+  <span class="fa-solid fa-bars"></span>
+</label></div>
+      
+    </div>
+  
+  
+    <div class="header-article-items__end">
+      
+        <div class="header-article-item">
+
+<div class="article-header-buttons">
+
+
+
+
+
+<div class="dropdown dropdown-source-buttons">
+  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Source repositories">
+    <i class="fab fa-github"></i>
+  </button>
+  <ul class="dropdown-menu">
+      
+      
+      
+      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion" target="_blank"
+   class="btn btn-sm btn-source-repository-button dropdown-item"
+   title="Source repository"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fab fa-github"></i>
+  </span>
+<span class="btn__text-container">Repository</span>
+</a>
+</li>
+      
+      
+      
+      
+      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion/issues/new?title=Issue%20on%20page%20%2Fdocs/review/gan.html&body=Your%20issue%20content%20here." target="_blank"
+   class="btn btn-sm btn-source-issues-button dropdown-item"
+   title="Open an issue"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-lightbulb"></i>
+  </span>
+<span class="btn__text-container">Open issue</span>
+</a>
+</li>
+      
+  </ul>
+</div>
+
+
+
+
+
+
+<div class="dropdown dropdown-download-buttons">
+  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Download this page">
+    <i class="fas fa-download"></i>
+  </button>
+  <ul class="dropdown-menu">
+      
+      
+      
+      <li><a href="../../_sources/docs/review/gan.md" target="_blank"
+   class="btn btn-sm btn-download-source-button dropdown-item"
+   title="Download source file"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-file"></i>
+  </span>
+<span class="btn__text-container">.md</span>
+</a>
+</li>
+      
+      
+      
+      
+      <li>
+<button onclick="window.print()"
+  class="btn btn-sm btn-download-pdf-button dropdown-item"
+  title="Print to PDF"
+  data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-file-pdf"></i>
+  </span>
+<span class="btn__text-container">.pdf</span>
+</button>
+</li>
+      
+  </ul>
+</div>
+
+
+
+
+<button onclick="toggleFullScreen()"
+  class="btn btn-sm btn-fullscreen-button"
+  title="Fullscreen mode"
+  data-bs-placement="bottom" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-expand"></i>
+  </span>
+
+</button>
+
+
+
+<script>
+document.write(`
+  <button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light"></i>
+    <i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark"></i>
+    <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"></i>
+  </button>
+`);
+</script>
+
+
+<script>
+document.write(`
+  <button class="btn btn-sm pst-navbar-icon search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <i class="fa-solid fa-magnifying-glass fa-lg"></i>
+  </button>
+`);
+</script>
+<label class="sidebar-toggle secondary-toggle btn btn-sm" for="__secondary"title="Toggle secondary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <span class="fa-solid fa-list"></span>
+</label>
+</div></div>
+      
+    </div>
+  
+</div>
+</div>
+              
+              
+
+<div id="jb-print-docs-body" class="onlyprint">
+    <h1>GAN</h1>
+    <!-- Table of contents -->
+    <div id="print-main-content">
+        <div id="jb-print-toc">
+            
+            <div>
+                <h2> Contents </h2>
+            </div>
+            <nav aria-label="Page">
+                <ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">Introduction</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#training-procedure">Training Procedure</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#theoretical-results">Theoretical Results</a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#experiments">Experiments</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#summary">Summary</a></li>
+</ul>
+            </nav>
+        </div>
+    </div>
+</div>
+
+              
+                
+<div id="searchbox"></div>
+                <article class="bd-article">
+                  
+  <div class="admonition-information admonition">
+<p class="admonition-title">Information</p>
+<ul class="simple">
+<li><p><strong>Title:</strong> Generative Adversarial Networks (NIPS 2014)</p></li>
+<li><p><strong>Reference</strong></p>
+<ul>
+<li><p>Paper: <a class="reference external" href="https://arxiv.org/abs/1406.2661">https://arxiv.org/abs/1406.2661</a></p></li>
+<li><p>Code: <a class="github reference external" href="https://github.com/eriklindernoren/PyTorch-GAN">eriklindernoren/PyTorch-GAN</a></p></li>
+<li><p><a class="reference external" href="https://www.youtube.com/watch?v=cd-kj1ysqOc">Smart Design Lab &#64;KAIST | 딥러닝 Chp 3.4 GAN</a></p></li>
+</ul>
+</li>
+<li><p><strong>Author:</strong> Sangwoo Jo</p></li>
+<li><p><strong>Editor:</strong> Changhwan Lee</p></li>
+<li><p><strong>Last updated on Apr. 09, 2024</strong></p></li>
+</ul>
+</div>
+<section class="tex2jax_ignore mathjax_ignore" id="gan">
+<h1>GAN<a class="headerlink" href="#gan" title="Permalink to this heading">#</a></h1>
+<section id="introduction">
+<h2>Introduction<a class="headerlink" href="#introduction" title="Permalink to this heading">#</a></h2>
+<p>생성형 모델은 크게 생성하고자 하는 데이터의 explicit density 와 implicit density 를 계산하는 방식으로 나뉩니다. Explicit density 를 계산한다는 것은 데이터 분포를 명확하게 사전에 정의하고 모델을 학습하는 것을 의미합니다. 이때, 데이터의 분포를 직접적인 계산이 가능한 tractable density 로 추정하는 방법과 근사화시켜 approximate density 로 추정하는 방법으로 나뉩니다.</p>
+<ul class="simple">
+<li><p>Tractable density: AutoRegressive 하게 구하는 방식이 있습니다. AutoRegressive 모델을 사용하여 이전 단계의 데이터를 활용하여 모델을 학습하고, 대표적인 모델로는 PixelCNN, PixelRNN 등이 있습니다.</p></li>
+<li><p>Approximate density: 대표적으로 score-based model, Boltzmann Machine 등이 있습니다.</p>
+<ul>
+<li><p>Score-based model - 모델 파라미터의 gradient 가 아닌 데이터의 gradient 활용하여 모델을 학습하는 방식으로, energy-based model 에서 MLE 에 사용하는 확률분포를 정규화하는 term 을 따로 계산하지 않아도 되는 장점이 있습니다.</p></li>
+<li><p>Boltzmann Machine : 완전그래프 구조로 학습하는 생성형 모델입니다. 모델을 학습하는 과정에서 확률 분포의 학습이 어려워(계산량이 많아서 어려움) Markov chain 을 활용하여 학습합니다. 또한, 완전그래프이기 때문에 노드가 늘어날수록 간선, 파라미터 등이 급증하는 문제가 있어 Restricted Boltzmann Machine(RBM) 이 제안되기도 했습니다.</p></li>
+</ul>
+</li>
+</ul>
+<p>반면에 데이터의 분포를 명확히 정의하지 않고 implicit 하게 모델을 학습하는 방식도 존재합니다. 대표적으로는 Ian Goodfellow 가 2014년에 발표한 GAN 모델이 있습니다. GAN 은 최근에 Diffusion Model 이 소개되기 전까지 몇 년 동안 이미지 생성 분야에서 대표적인 모델로 자리잡았었습니다. GAN 은 VAE 와 달리 marginal likelihood <span class="math notranslate nohighlight">\(p_{\theta}(x)\)</span> 를 직접 구하지 않고, Adversarial Process 를 통해 implicit 하게 샘플링을 해서 분포를 구하게 됩니다.</p>
+<figure class="align-default" id="id1">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/gan_01.png"><img alt="gan_01" class="bg-primary mb-1" src="../../_images/gan_01.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 8 </span><span class="caption-text">Taxonomy of Generative Models</span><a class="headerlink" href="#id1" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>아래 그림과 같이 GAN 은 크게 잠재변수 <span class="math notranslate nohighlight">\(z\)</span> 로부터 가짜 데이터를 생성하는 Generator 와 그로부터 생성된 데이터와 실제 training 데이터를 구분하는 Discriminator 로 구성이 되어 있습니다. 다시 말해서 Discriminator 는 실제 데이터가 들어오면 1, 그리고 가짜로 생성된 데이터가 들어오면 0 을 출력하는 binary classification task 를 진행합니다.</p>
+<figure class="align-default" id="id2">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/gan_03.png"><img alt="gan_03" class="bg-primary mb-1" src="../../_images/gan_03.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 9 </span><span class="caption-text">Generative Adversarial Network(GAN) Architecture</span><a class="headerlink" href="#id2" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>Generator 와 Discriminator 구현 코드도 같이 살펴보겠습니다.</p>
+<ul>
+<li><p><strong>Generator 구현 code</strong></p>
+<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="k">class</span><span class="w"> </span><span class="nc">Generator</span><span class="p">(</span><span class="n">nn</span><span class="o">.</span><span class="n">Module</span><span class="p">):</span>
+    <span class="k">def</span><span class="w"> </span><span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+        <span class="nb">super</span><span class="p">(</span><span class="n">Generator</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span>
+
+        <span class="k">def</span><span class="w"> </span><span class="nf">block</span><span class="p">(</span><span class="n">in_feat</span><span class="p">,</span> <span class="n">out_feat</span><span class="p">,</span> <span class="n">normalize</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span>
+            <span class="n">layers</span> <span class="o">=</span> <span class="p">[</span><span class="n">nn</span><span class="o">.</span><span class="n">Linear</span><span class="p">(</span><span class="n">in_feat</span><span class="p">,</span> <span class="n">out_feat</span><span class="p">)]</span>
+            <span class="k">if</span> <span class="n">normalize</span><span class="p">:</span>
+                <span class="n">layers</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">nn</span><span class="o">.</span><span class="n">BatchNorm1d</span><span class="p">(</span><span class="n">out_feat</span><span class="p">,</span> <span class="mf">0.8</span><span class="p">))</span>
+            <span class="n">layers</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">nn</span><span class="o">.</span><span class="n">LeakyReLU</span><span class="p">(</span><span class="mf">0.2</span><span class="p">,</span> <span class="n">inplace</span><span class="o">=</span><span class="kc">True</span><span class="p">))</span>
+            <span class="k">return</span> <span class="n">layers</span>
+
+        <span class="bp">self</span><span class="o">.</span><span class="n">model</span> <span class="o">=</span> <span class="n">nn</span><span class="o">.</span><span class="n">Sequential</span><span class="p">(</span>
+            <span class="o">*</span><span class="n">block</span><span class="p">(</span><span class="n">opt</span><span class="o">.</span><span class="n">latent_dim</span><span class="p">,</span> <span class="mi">128</span><span class="p">,</span> <span class="n">normalize</span><span class="o">=</span><span class="kc">False</span><span class="p">),</span>
+            <span class="o">*</span><span class="n">block</span><span class="p">(</span><span class="mi">128</span><span class="p">,</span> <span class="mi">256</span><span class="p">),</span>
+            <span class="o">*</span><span class="n">block</span><span class="p">(</span><span class="mi">256</span><span class="p">,</span> <span class="mi">512</span><span class="p">),</span>
+            <span class="o">*</span><span class="n">block</span><span class="p">(</span><span class="mi">512</span><span class="p">,</span> <span class="mi">1024</span><span class="p">),</span>
+            <span class="n">nn</span><span class="o">.</span><span class="n">Linear</span><span class="p">(</span><span class="mi">1024</span><span class="p">,</span> <span class="nb">int</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">prod</span><span class="p">(</span><span class="n">img_shape</span><span class="p">))),</span>
+            <span class="n">nn</span><span class="o">.</span><span class="n">Tanh</span><span class="p">()</span>
+        <span class="p">)</span>
+
+    <span class="k">def</span><span class="w"> </span><span class="nf">forward</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">z</span><span class="p">):</span>
+        <span class="n">img</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">model</span><span class="p">(</span><span class="n">z</span><span class="p">)</span>
+        <span class="n">img</span> <span class="o">=</span> <span class="n">img</span><span class="o">.</span><span class="n">view</span><span class="p">(</span><span class="n">img</span><span class="o">.</span><span class="n">size</span><span class="p">(</span><span class="mi">0</span><span class="p">),</span> <span class="o">*</span><span class="n">img_shape</span><span class="p">)</span>
+        <span class="k">return</span> <span class="n">img</span>
+</pre></div>
+</div>
+</li>
+<li><p><strong>Discriminator 구현 code</strong></p>
+<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="k">class</span><span class="w"> </span><span class="nc">Discriminator</span><span class="p">(</span><span class="n">nn</span><span class="o">.</span><span class="n">Module</span><span class="p">):</span>
+    <span class="k">def</span><span class="w"> </span><span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+        <span class="nb">super</span><span class="p">(</span><span class="n">Discriminator</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span>
+
+        <span class="bp">self</span><span class="o">.</span><span class="n">model</span> <span class="o">=</span> <span class="n">nn</span><span class="o">.</span><span class="n">Sequential</span><span class="p">(</span>
+            <span class="n">nn</span><span class="o">.</span><span class="n">Linear</span><span class="p">(</span><span class="nb">int</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">prod</span><span class="p">(</span><span class="n">img_shape</span><span class="p">)),</span> <span class="mi">512</span><span class="p">),</span>
+            <span class="n">nn</span><span class="o">.</span><span class="n">LeakyReLU</span><span class="p">(</span><span class="mf">0.2</span><span class="p">,</span> <span class="n">inplace</span><span class="o">=</span><span class="kc">True</span><span class="p">),</span>
+            <span class="n">nn</span><span class="o">.</span><span class="n">Linear</span><span class="p">(</span><span class="mi">512</span><span class="p">,</span> <span class="mi">256</span><span class="p">),</span>
+            <span class="n">nn</span><span class="o">.</span><span class="n">LeakyReLU</span><span class="p">(</span><span class="mf">0.2</span><span class="p">,</span> <span class="n">inplace</span><span class="o">=</span><span class="kc">True</span><span class="p">),</span>
+            <span class="n">nn</span><span class="o">.</span><span class="n">Linear</span><span class="p">(</span><span class="mi">256</span><span class="p">,</span> <span class="mi">1</span><span class="p">),</span>
+            <span class="n">nn</span><span class="o">.</span><span class="n">Sigmoid</span><span class="p">(),</span>
+        <span class="p">)</span>
+
+    <span class="k">def</span><span class="w"> </span><span class="nf">forward</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">img</span><span class="p">):</span>
+        <span class="n">img_flat</span> <span class="o">=</span> <span class="n">img</span><span class="o">.</span><span class="n">view</span><span class="p">(</span><span class="n">img</span><span class="o">.</span><span class="n">size</span><span class="p">(</span><span class="mi">0</span><span class="p">),</span> <span class="o">-</span><span class="mi">1</span><span class="p">)</span>
+        <span class="n">validity</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">model</span><span class="p">(</span><span class="n">img_flat</span><span class="p">)</span>
+
+        <span class="k">return</span> <span class="n">validity</span>
+</pre></div>
+</div>
+</li>
+</ul>
+</section>
+<section id="training-procedure">
+<h2>Training Procedure<a class="headerlink" href="#training-procedure" title="Permalink to this heading">#</a></h2>
+<p>GAN 을 학습할 시, <strong>D를 먼저 최적화하는 k 단계</strong>와 <strong>G를 최적화하는 한 단계를 번갈아 수행</strong>합니다. 그리고 이때 쓰이는 손실함수(loss function)은 다음과 같습니다.</p>
+<div class="math notranslate nohighlight">
+\[
+\min_G \max_D V(D,G) = \mathbb{E}\_{x \sim p_{data}(x)}[logD(x)] + \mathbb{E}\_{z \sim p_z(z)}[\log(1-D(G(z))]
+\]</div>
+<p>논문에서 제시한 학습 알고리즘과 실제 implementation code 를 비교해보겠습니다.</p>
+<figure class="align-default" id="id3">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/gan_02.png"><img alt="gan_02" class="bg-primary mb-1" src="../../_images/gan_02.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 10 </span><span class="caption-text">Generative Adversarial Network(GAN) Training Procedure</span><a class="headerlink" href="#id3" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul>
+<li><p><strong>GAN 학습 code</strong></p>
+<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="c1"># ----------</span>
+<span class="c1">#  Training</span>
+<span class="c1"># ----------</span>
+
+<span class="k">for</span> <span class="n">epoch</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">opt</span><span class="o">.</span><span class="n">n_epochs</span><span class="p">):</span>
+    <span class="k">for</span> <span class="n">i</span><span class="p">,</span> <span class="p">(</span><span class="n">imgs</span><span class="p">,</span> <span class="n">_</span><span class="p">)</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">dataloader</span><span class="p">):</span>
+
+        <span class="c1"># Adversarial ground truths</span>
+        <span class="n">valid</span> <span class="o">=</span> <span class="n">Variable</span><span class="p">(</span><span class="n">Tensor</span><span class="p">(</span><span class="n">imgs</span><span class="o">.</span><span class="n">size</span><span class="p">(</span><span class="mi">0</span><span class="p">),</span> <span class="mi">1</span><span class="p">)</span><span class="o">.</span><span class="n">fill_</span><span class="p">(</span><span class="mf">1.0</span><span class="p">),</span> <span class="n">requires_grad</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span>
+        <span class="n">fake</span> <span class="o">=</span> <span class="n">Variable</span><span class="p">(</span><span class="n">Tensor</span><span class="p">(</span><span class="n">imgs</span><span class="o">.</span><span class="n">size</span><span class="p">(</span><span class="mi">0</span><span class="p">),</span> <span class="mi">1</span><span class="p">)</span><span class="o">.</span><span class="n">fill_</span><span class="p">(</span><span class="mf">0.0</span><span class="p">),</span> <span class="n">requires_grad</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span>
+
+        <span class="c1"># Configure input</span>
+        <span class="n">real_imgs</span> <span class="o">=</span> <span class="n">Variable</span><span class="p">(</span><span class="n">imgs</span><span class="o">.</span><span class="n">type</span><span class="p">(</span><span class="n">Tensor</span><span class="p">))</span>
+
+        <span class="c1"># -----------------</span>
+        <span class="c1">#  Train Generator</span>
+        <span class="c1"># -----------------</span>
+
+        <span class="n">optimizer_G</span><span class="o">.</span><span class="n">zero_grad</span><span class="p">()</span>
+
+        <span class="c1"># Sample noise as generator input</span>
+        <span class="n">z</span> <span class="o">=</span> <span class="n">Variable</span><span class="p">(</span><span class="n">Tensor</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">normal</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="p">(</span><span class="n">imgs</span><span class="o">.</span><span class="n">shape</span><span class="p">[</span><span class="mi">0</span><span class="p">],</span> <span class="n">opt</span><span class="o">.</span><span class="n">latent_dim</span><span class="p">))))</span>
+
+        <span class="c1"># Generate a batch of images</span>
+        <span class="n">gen_imgs</span> <span class="o">=</span> <span class="n">generator</span><span class="p">(</span><span class="n">z</span><span class="p">)</span>
+
+        <span class="c1"># Loss measures generator&#39;s ability to fool the discriminator</span>
+        <span class="n">g_loss</span> <span class="o">=</span> <span class="n">adversarial_loss</span><span class="p">(</span><span class="n">discriminator</span><span class="p">(</span><span class="n">gen_imgs</span><span class="p">),</span> <span class="n">valid</span><span class="p">)</span>
+
+        <span class="n">g_loss</span><span class="o">.</span><span class="n">backward</span><span class="p">()</span>
+        <span class="n">optimizer_G</span><span class="o">.</span><span class="n">step</span><span class="p">()</span>
+
+        <span class="c1"># ---------------------</span>
+        <span class="c1">#  Train Discriminator</span>
+        <span class="c1"># ---------------------</span>
+
+        <span class="n">optimizer_D</span><span class="o">.</span><span class="n">zero_grad</span><span class="p">()</span>
+
+        <span class="c1"># Measure discriminator&#39;s ability to classify real from generated samples</span>
+        <span class="n">real_loss</span> <span class="o">=</span> <span class="n">adversarial_loss</span><span class="p">(</span><span class="n">discriminator</span><span class="p">(</span><span class="n">real_imgs</span><span class="p">),</span> <span class="n">valid</span><span class="p">)</span>
+        <span class="n">fake_loss</span> <span class="o">=</span> <span class="n">adversarial_loss</span><span class="p">(</span><span class="n">discriminator</span><span class="p">(</span><span class="n">gen_imgs</span><span class="o">.</span><span class="n">detach</span><span class="p">()),</span> <span class="n">fake</span><span class="p">)</span>
+        <span class="n">d_loss</span> <span class="o">=</span> <span class="p">(</span><span class="n">real_loss</span> <span class="o">+</span> <span class="n">fake_loss</span><span class="p">)</span> <span class="o">/</span> <span class="mi">2</span>
+
+        <span class="n">d_loss</span><span class="o">.</span><span class="n">backward</span><span class="p">()</span>
+        <span class="n">optimizer_D</span><span class="o">.</span><span class="n">step</span><span class="p">()</span>
+
+        <span class="nb">print</span><span class="p">(</span>
+            <span class="s2">&quot;[Epoch </span><span class="si">%d</span><span class="s2">/</span><span class="si">%d</span><span class="s2">] [Batch </span><span class="si">%d</span><span class="s2">/</span><span class="si">%d</span><span class="s2">] [D loss: </span><span class="si">%f</span><span class="s2">] [G loss: </span><span class="si">%f</span><span class="s2">]&quot;</span>
+            <span class="o">%</span> <span class="p">(</span><span class="n">epoch</span><span class="p">,</span> <span class="n">opt</span><span class="o">.</span><span class="n">n_epochs</span><span class="p">,</span> <span class="n">i</span><span class="p">,</span> <span class="nb">len</span><span class="p">(</span><span class="n">dataloader</span><span class="p">),</span> <span class="n">d_loss</span><span class="o">.</span><span class="n">item</span><span class="p">(),</span> <span class="n">g_loss</span><span class="o">.</span><span class="n">item</span><span class="p">())</span>
+        <span class="p">)</span>
+
+        <span class="n">batches_done</span> <span class="o">=</span> <span class="n">epoch</span> <span class="o">*</span> <span class="nb">len</span><span class="p">(</span><span class="n">dataloader</span><span class="p">)</span> <span class="o">+</span> <span class="n">i</span>
+        <span class="k">if</span> <span class="n">batches_done</span> <span class="o">%</span> <span class="n">opt</span><span class="o">.</span><span class="n">sample_interval</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span>
+            <span class="n">save_image</span><span class="p">(</span><span class="n">gen_imgs</span><span class="o">.</span><span class="n">data</span><span class="p">[:</span><span class="mi">25</span><span class="p">],</span> <span class="s2">&quot;images/</span><span class="si">%d</span><span class="s2">.png&quot;</span> <span class="o">%</span> <span class="n">batches_done</span><span class="p">,</span> <span class="n">nrow</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">normalize</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
+</pre></div>
+</div>
+</li>
+</ul>
+<p>이렇게 Discriminator 와 Generator 는 각각 <span class="math notranslate nohighlight">\(V(D,G)\)</span> 가 최대화하고 최소화하는 방향으로 stochastic gradient descent 를 진행하게 됩니다. 하지만 아래 그림처럼 실제로 Generator를 학습할 때, 초반에 <span class="math notranslate nohighlight">\(D(G(z)) \approx 0\)</span> 일 경우 학습하지 못하는 상황이 발생합니다. 이 때, <span class="math notranslate nohighlight">\(\log(1-D(G(z))\)</span> 를 최소화하지 않고 <span class="math notranslate nohighlight">\(\log(D(G(z))\)</span> 를 최대화하는 방향으로 Generator 를 학습하는 기법도 있습니다.</p>
+<figure class="align-default" id="id4">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/gan_04.png"><img alt="gan_04" class="bg-primary mb-1" src="../../_images/gan_04.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 11 </span><span class="caption-text">Alternative to Vanishing Gradient when Training the Generator</span><a class="headerlink" href="#id4" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>이렇게 학습함으로써 최적화된 solution 에서는 Generator 가 training 데이터 분포를 완벽히 복원하고 Discriminator 는 binary classification 확률을 언제나 1/2 로 내뱉게 됩니다.</p>
+<section id="theoretical-results">
+<h3>Theoretical Results<a class="headerlink" href="#theoretical-results" title="Permalink to this heading">#</a></h3>
+<p><strong>Proposition 1. 고정된 Generator 에 대해서, 최적화된 Discriminator 는 다음과 같습니다.</strong></p>
+<div class="math notranslate nohighlight">
+\[
+D_{G}^*(x) = \frac{p_{data}(x)}{p_{data}(x) + p_g(x)}
+\]</div>
+<p>이를 증명하자면, Discriminator 에 대한 손실함수를 다음과 같이 쓸 수 있고 <span class="math notranslate nohighlight">\(D = D_{G}^*(x)\)</span> 가 이를 최대화하는 solution 입니다.</p>
+<div class="math notranslate nohighlight">
+\[
+V(D,G) = \int_x p_{data}(x) \log(D(x)) dx+ \int_z p_{z}(z) \log(1-D(g(z)) dz 
+\]</div>
+<div class="math notranslate nohighlight">
+\[
+= \int_x p_{data}(x) \log(D(x)) + p_{g}(x) \log(1-D(x)) dx 
+\]</div>
+<p><strong>Proposition 2. 최적화된 Discriminator 에 대해 <span class="math notranslate nohighlight">\(\max_D V(D,G)\)</span> 를 최소화하는 Generator 는 <span class="math notranslate nohighlight">\(p_g = p_{data}\)</span> 일때 성립하고 이때 <span class="math notranslate nohighlight">\(D = D_{G}^*(x) = 1/2\)</span> 입니다.</strong></p>
+<p>이를 증명하자면, 최적화된 Discriminator 에 대한 손실함수는 다음과 같고</p>
+<div class="math notranslate nohighlight">
+\[
+V(D^{\ast},G) = \mathbb{E}\_{x \sim p_{data}(x)} [ \log D^{\ast}(x) ] + \mathbb{E}\_{x \sim p_g(x)} [ \log(1-D^{\ast}(x) ]
+\]</div>
+<div class="math notranslate nohighlight">
+\[
+= \int_x p_{data}(x) \log(\frac{p_{data}(x)}{p_{data}(x) + p_g(x)}) + \int_x p_{g}(x) \log(\frac{p_{g}(x)}{p_{data}(x) + p_g(x)})\ dx 
+\]</div>
+<div class="math notranslate nohighlight">
+\[
+= -\log(4)\ + KL(p_{data}(x)\ ||\ \frac{p_{data}+p_{g}}{2}) +  KL(p_{g}(x)\ ||\ \frac{p_{data}+p_{g}}{2}) 
+\]</div>
+<p><span class="math notranslate nohighlight">\(KL(p_{data}(x)\ ||\ \frac{p_{data}+p_{g}}{2}) +  KL(p_{g}(x)\ ||\ \frac{p_{data}+p_{g}}{2}) = 2\ \cdot\ JSD(p_{data}\ ||\ p_{g})\)</span> 의 최솟값은 0 이고 이는 <span class="math notranslate nohighlight">\(p_g = p_{data}\)</span> 일때 성립합니다.</p>
+</section>
+</section>
+<section id="experiments">
+<h2>Experiments<a class="headerlink" href="#experiments" title="Permalink to this heading">#</a></h2>
+<p>논문에서 MNIST, the Toronto Face Database(TFD), 그리고 CIFAR-10 dataset 로 모델 실험 및 성능 평가했습니다. 평가시에는 <span class="math notranslate nohighlight">\(p_g\)</span> 로부터 Parzen density estimation을 거쳐 계산한 log likelihood estimate 로 모델 성능 평가를 진행했습니다. 아래 표를 보면 실험 방법 중 GAN이 제일 결과가 좋은 것을 볼 수 있습니다.</p>
+<figure class="align-default" id="id5">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/gan_05.png"><img alt="gan_05" class="bg-primary mb-1" src="../../_images/gan_05.png" style="width: 400px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 12 </span><span class="caption-text">Experimental Results</span><a class="headerlink" href="#id5" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</section>
+<section id="summary">
+<h2>Summary<a class="headerlink" href="#summary" title="Permalink to this heading">#</a></h2>
+<p>VAE는 새로운 데이터를 잘 생성하지만 생성된 이미지가 흐릿하다는 단점을 지니고 있습니다. 반면에 GAN 은 high quality image 를 잘 생성하지만 unstable 한 convergence 를 가지고 있습니다. 그래서 실제로 VAE 는 Encoder 를 활용한 차원축소로 많이 활용되고 이미지 데이터를 생성하는데는 GAN 이 많이 활용되었다고 합니다.</p>
+</section>
+</section>
+
+    <script type="text/x-thebe-config">
+    {
+        requestKernel: true,
+        binderOptions: {
+            repo: "binder-examples/jupyter-stacks-datascience",
+            ref: "master",
+        },
+        codeMirrorConfig: {
+            theme: "abcdef",
+            mode: "python"
+        },
+        kernelOptions: {
+            name: "python3",
+            path: "./docs\review"
+        },
+        predefinedOutput: true
+    }
+    </script>
+    <script>kernelName = 'python3'</script>
+
+                </article>
+              
+
+              
+              
+              
+              
+                <footer class="prev-next-footer d-print-none">
+                  
+<div class="prev-next-area">
+    <a class="left-prev"
+       href="vae.html"
+       title="previous page">
+      <i class="fa-solid fa-angle-left"></i>
+      <div class="prev-next-info">
+        <p class="prev-next-subtitle">previous</p>
+        <p class="prev-next-title">VAE</p>
+      </div>
+    </a>
+    <a class="right-next"
+       href="DDPM.html"
+       title="next page">
+      <div class="prev-next-info">
+        <p class="prev-next-subtitle">next</p>
+        <p class="prev-next-title">DDPM</p>
+      </div>
+      <i class="fa-solid fa-angle-right"></i>
+    </a>
+</div>
+                </footer>
+              
+            </div>
+            
+            
+              
+                <div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
+
+
+  <div class="sidebar-secondary-item">
+  <div class="page-toc tocsection onthispage">
+    <i class="fa-solid fa-list"></i> Contents
+  </div>
+  <nav class="bd-toc-nav page-toc">
+    <ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">Introduction</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#training-procedure">Training Procedure</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#theoretical-results">Theoretical Results</a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#experiments">Experiments</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#summary">Summary</a></li>
+</ul>
+  </nav></div>
+
+</div></div>
+              
+            
+          </div>
+          <footer class="bd-footer-content">
+            
+<div class="bd-footer-content__inner container">
+  
+  <div class="footer-item">
+    
+<p class="component-author">
+By PseudoLab
+</p>
+
+  </div>
+  
+  <div class="footer-item">
+    
+
+  <p class="copyright">
+    
+      © Copyright 2022.
+      <br/>
+    
+  </p>
+
+  </div>
+  
+  <div class="footer-item">
+    
+  </div>
+  
+  <div class="footer-item">
+    
+  </div>
+  
+</div>
+          </footer>
+        
+
+      </main>
+    </div>
+  </div>
+  
+  <!-- Scripts loaded after <body> so the DOM is not blocked -->
+  <script src="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b"></script>
+<script src="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b"></script>
+
+  <footer class="bd-footer">
+  </footer>
+  </body>
 </html>
\ No newline at end of file
diff --git a/docs/review/imagen.html b/docs/review/imagen.html
old mode 100644
new mode 100755
index eedd5076..f70ae297
--- a/docs/review/imagen.html
+++ b/docs/review/imagen.html
@@ -1,905 +1,925 @@
-
-
-<!DOCTYPE html>
-
-
-<html lang="en" data-content_root="" >
-
-  <head>
-    <meta charset="utf-8" />
-    <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.18.1: http://docutils.sourceforge.net/" />
-
-    <title>Imagen &#8212; Text-to-Image Generation-feat-Diffusion</title>
-  
-  
-  
-  <script data-cfasync="false">
-    document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
-    document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
-  </script>
-  
-  <!-- Loaded before other Sphinx assets -->
-  <link href="../../_static/styles/theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-<link href="../../_static/styles/bootstrap.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-<link href="../../_static/styles/pydata-sphinx-theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-
-  
-  <link href="../../_static/vendor/fontawesome/6.5.2/css/all.min.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-  <link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.woff2" />
-<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.woff2" />
-<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.woff2" />
-
-    <link rel="stylesheet" type="text/css" href="../../_static/pygments.css" />
-    <link rel="stylesheet" href="../../_static/styles/sphinx-book-theme.css?digest=14f4ca6b54d191a8c7657f6c759bf11a5fb86285" type="text/css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/togglebutton.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/mystnb.4510f1fc1dee50b3e5859aac5469c37c29e427902b24a333a5f9fcb2f0b3ac41.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/sphinx-thebe.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/design-style.4045f2051d55cab465a707391d5b2007.min.css" />
-  
-  <!-- Pre-loaded scripts that we'll load fully later -->
-  <link rel="preload" as="script" href="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b" />
-<link rel="preload" as="script" href="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b" />
-  <script src="../../_static/vendor/fontawesome/6.5.2/js/all.min.js?digest=dfe6caa3a7d634c4db9b"></script>
-
-    <script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
-    <script src="../../_static/jquery.js"></script>
-    <script src="../../_static/underscore.js"></script>
-    <script src="../../_static/_sphinx_javascript_frameworks_compat.js"></script>
-    <script src="../../_static/doctools.js"></script>
-    <script src="../../_static/clipboard.min.js"></script>
-    <script src="../../_static/copybutton.js"></script>
-    <script src="../../_static/scripts/sphinx-book-theme.js?digest=5a5c038af52cf7bc1a1ec88eea08e6366ee68824"></script>
-    <script>let toggleHintShow = 'Click to show';</script>
-    <script>let toggleHintHide = 'Click to hide';</script>
-    <script>let toggleOpenOnPrint = 'true';</script>
-    <script src="../../_static/togglebutton.js"></script>
-    <script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
-    <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
-    <script src="../../_static/design-tabs.js"></script>
-    <script>const THEBE_JS_URL = "https://unpkg.com/thebe@0.8.2/lib/index.js"
-const thebe_selector = ".thebe,.cell"
-const thebe_selector_input = "pre"
-const thebe_selector_output = ".output, .cell_output"
-</script>
-    <script async="async" src="../../_static/sphinx-thebe.js"></script>
-    <script>DOCUMENTATION_OPTIONS.pagename = 'docs/review/imagen';</script>
-    <link rel="shortcut icon" href="../../_static/PseudoLab_logo.png"/>
-    <link rel="index" title="Index" href="../../genindex.html" />
-    <link rel="search" title="Search" href="../../search.html" />
-    <link rel="next" title="Imagen Editor" href="imagen_editor.html" />
-    <link rel="prev" title="StyO" href="StyO.html" />
-  <meta name="viewport" content="width=device-width, initial-scale=1"/>
-  <meta name="docsearch:language" content="en"/>
-  </head>
-  
-  
-  <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
-
-  
-  
-  <div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
-  
-  <div id="pst-scroll-pixel-helper"></div>
-  
-  <button type="button" class="btn rounded-pill" id="pst-back-to-top">
-    <i class="fa-solid fa-arrow-up"></i>Back to top</button>
-
-  
-  <input type="checkbox"
-          class="sidebar-toggle"
-          id="pst-primary-sidebar-checkbox"/>
-  <label class="overlay overlay-primary" for="pst-primary-sidebar-checkbox"></label>
-  
-  <input type="checkbox"
-          class="sidebar-toggle"
-          id="pst-secondary-sidebar-checkbox"/>
-  <label class="overlay overlay-secondary" for="pst-secondary-sidebar-checkbox"></label>
-  
-  <div class="search-button__wrapper">
-    <div class="search-button__overlay"></div>
-    <div class="search-button__search-container">
-<form class="bd-search d-flex align-items-center"
-      action="../../search.html"
-      method="get">
-  <i class="fa-solid fa-magnifying-glass"></i>
-  <input type="search"
-         class="form-control"
-         name="q"
-         id="search-input"
-         placeholder="Search this book..."
-         aria-label="Search this book..."
-         autocomplete="off"
-         autocorrect="off"
-         autocapitalize="off"
-         spellcheck="false"/>
-  <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
-</form></div>
-  </div>
-
-  <div class="pst-async-banner-revealer d-none">
-  <aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
-</div>
-
-  
-    <header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
-    </header>
-  
-
-  <div class="bd-container">
-    <div class="bd-container__inner bd-page-width">
-      
-      
-      
-      <div class="bd-sidebar-primary bd-sidebar">
-        
-
-  
-  <div class="sidebar-header-items sidebar-primary__section">
-    
-    
-    
-    
-  </div>
-  
-    <div class="sidebar-primary-items__start sidebar-primary__section">
-        <div class="sidebar-primary-item">
-
-  
-    
-  
-
-<a class="navbar-brand logo" href="../../intro.html">
-  
-  
-  
-  
-  
-    
-    
-      
-    
-    
-    <img src="../../_static/PseudoLab_logo.png" class="logo__image only-light" alt="Text-to-Image Generation-feat-Diffusion - Home"/>
-    <script>document.write(`<img src="../../_static/PseudoLab_logo.png" class="logo__image only-dark" alt="Text-to-Image Generation-feat-Diffusion - Home"/>`);</script>
-  
-  
-</a></div>
-        <div class="sidebar-primary-item"><nav class="bd-links" id="bd-docs-nav" aria-label="Main">
-    <div class="bd-toc-item navbar-nav active">
-        
-        <ul class="nav bd-sidenav bd-sidenav__home-link">
-            <li class="toctree-l1">
-                <a class="reference internal" href="../../intro.html">
-                    Welcome to PseudoDiffusers!!
-                </a>
-            </li>
-        </ul>
-        <p aria-level="2" class="caption" role="heading"><span class="caption-text">Preliminary Works</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="vae.html">VAE</a></li>
-<li class="toctree-l1"><a class="reference internal" href="gan.html">GAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DDPM.html">DDPM</a></li>
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="DDIM.html">DDIM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="A_Study_on_the_Evaluation_of_Generative_Models.html">A Study on the Evaluation of Generative Models</a></li>
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Image Generation</span></p>
-<ul class="current nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="cycleGAN.html">CycleGAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="StyleGAN.html">StyleGAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="diffusion_beats_GANs.html">Diffusion Models Beat GANs on Image Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="dalle.html">DALL-E</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DALLE2.html">DALL-E 2</a></li>
-<li class="toctree-l1"><a class="reference internal" href="dreambooth.html">DreamBooth</a></li>
-<li class="toctree-l1"><a class="reference internal" href="ControlNet.html">ControlNet</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Latent_Diffusion_Model.html">Introduction</a></li>
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="Textual_Inversion.html">Textual Inversion</a></li>
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="CustomDiffusion.html">Custom Diffusion</a></li>
-<li class="toctree-l1"><a class="reference internal" href="LoRA.html">LoRA</a></li>
-
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="I-DDPM.html">I-DDPM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="StyO.html">StyO</a></li>
-<li class="toctree-l1 current active"><a class="current reference internal" href="#">Imagen</a></li>
-<li class="toctree-l1"><a class="reference internal" href="imagen_editor.html">Imagen Editor</a></li>
-<li class="toctree-l1"><a class="reference internal" href="SDEdit.html">SDEdit</a></li>
-<li class="toctree-l1"><a class="reference internal" href="SDXL.html">SDXL</a></li>
-<li class="toctree-l1"><a class="reference internal" href="t2i_adapter.html">T2I-Adapter</a></li>
-<li class="toctree-l1"><a class="reference internal" href="IP_Adapter.html">IP-Adapter</a></li>
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="HyperDreamBooth.html">HyperDreamBooth</a></li>
-<li class="toctree-l1"><a class="reference internal" href="CM3leon.html">CM3leon</a></li>
-
-<li class="toctree-l1"><a class="reference internal" href="Synthetic_Data_from_Diffusion_Models_Improves_ImageNet_Classification.html">Synthetic Data from Diffusion Models Improves ImageNet Classification</a></li>
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="GLIDE.html">GLIDE</a></li>
-<li class="toctree-l1"><a class="reference internal" href="BBDM.html">BBDM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Your_Diffusion_Model_is_Secretly_a_Zero_Shot_Classifier.html">Your Diffusion Model is Secretly a Zero-Shot Classifier</a></li>
-<li class="toctree-l1"><a class="reference internal" href="progressive_distillation.html">Progressive Distillation for Fast Sampling of Diffusion Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="ConceptLab.html">ConceptLab</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Diffusion_models_already_have_a_Semantic_Latent_Space.html">Diffusion Models already have a Semantic Latent Space</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Muse.html">Muse</a></li>
-
-
-<li class="toctree-l1"><a class="reference internal" href="GIGAGAN.html">Scaling up GANs for Text-to-Image Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="consistency_models.html">Consistency Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="latent_consistency_models.html">Latent Consistency Models</a></li>
-
-<li class="toctree-l1"><a class="reference internal" href="LLM_grounded_Diffusion.html">LLM Grounded Diffusion</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DiT.html">DiT</a></li>
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="one-step-image-translation.html">One-Step Image Translation with Text-to-Image Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="LCM-LoRA.html">LCM-LoRA: A Universal Stable-Diffusion Acceleration Module</a></li>
-
-
-<li class="toctree-l1"><a class="reference internal" href="MimicBrush.html">MimicBrush: Zero-shot Image Editing with Reference Imitation</a></li>
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Video Generation</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="Make_A_Video.html">Make A Video</a></li>
-<li class="toctree-l1"><a class="reference internal" href="VideoLDM.html">VideoLDM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="AnimateDiff.html">AnimateDiff</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Animate_Anyone.html">Animate Anyone</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreaMoving.html">DreaMoving</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreamPose.html">DreamPose: Fashion Image-to-Video Synthesis via Stable Diffusion</a></li>
-
-
-
-
-
-
-
-
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">3D Generation</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="NeRF.html">NeRF : Representing Scenes as Neural Radiance Fields for View Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="3DGS.html">3D Gaussian Splatting for Real-Time Radiance Field Rendering</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Point_E.html">Point-E: A System for Generating 3D Point Clouds from Complex Prompts (Arxiv 2022)</a></li>
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="Shap-E.html">Shap-E</a></li>
-
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="DreamFusion.html"><strong>DreamFusion</strong></a></li>
-<li class="toctree-l1"><a class="reference internal" href="magic-3d.html">Magic3D</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreamBooth3D.html">Dream Booth 3D</a></li>
-
-
-
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Experiments</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="../experiments/js_exp.html">Synthetic Data with Stable Diffusion for Foliar Disease Classification</a></li>
-<li class="toctree-l1"><a class="reference internal" href="../experiments/swjo_exp.html">Training DreamBooth on Naver Webtoon Face Dataset</a></li>
-</ul>
-
-    </div>
-</nav></div>
-    </div>
-  
-  
-  <div class="sidebar-primary-items__end sidebar-primary__section">
-  </div>
-  
-  <div id="rtd-footer-container"></div>
-
-
-      </div>
-      
-      <main id="main-content" class="bd-main" role="main">
-        
-        
-
-<div class="sbt-scroll-pixel-helper"></div>
-
-          <div class="bd-content">
-            <div class="bd-article-container">
-              
-              <div class="bd-header-article d-print-none">
-<div class="header-article-items header-article__inner">
-  
-    <div class="header-article-items__start">
-      
-        <div class="header-article-item"><label class="sidebar-toggle primary-toggle btn btn-sm" for="__primary" title="Toggle primary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
-  <span class="fa-solid fa-bars"></span>
-</label></div>
-      
-    </div>
-  
-  
-    <div class="header-article-items__end">
-      
-        <div class="header-article-item">
-
-<div class="article-header-buttons">
-
-
-
-
-
-<div class="dropdown dropdown-source-buttons">
-  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Source repositories">
-    <i class="fab fa-github"></i>
-  </button>
-  <ul class="dropdown-menu">
-      
-      
-      
-      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion" target="_blank"
-   class="btn btn-sm btn-source-repository-button dropdown-item"
-   title="Source repository"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fab fa-github"></i>
-  </span>
-<span class="btn__text-container">Repository</span>
-</a>
-</li>
-      
-      
-      
-      
-      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion/issues/new?title=Issue%20on%20page%20%2Fdocs/review/imagen.html&body=Your%20issue%20content%20here." target="_blank"
-   class="btn btn-sm btn-source-issues-button dropdown-item"
-   title="Open an issue"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-lightbulb"></i>
-  </span>
-<span class="btn__text-container">Open issue</span>
-</a>
-</li>
-      
-  </ul>
-</div>
-
-
-
-
-
-
-<div class="dropdown dropdown-download-buttons">
-  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Download this page">
-    <i class="fas fa-download"></i>
-  </button>
-  <ul class="dropdown-menu">
-      
-      
-      
-      <li><a href="../../_sources/docs/review/imagen.md" target="_blank"
-   class="btn btn-sm btn-download-source-button dropdown-item"
-   title="Download source file"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-file"></i>
-  </span>
-<span class="btn__text-container">.md</span>
-</a>
-</li>
-      
-      
-      
-      
-      <li>
-<button onclick="window.print()"
-  class="btn btn-sm btn-download-pdf-button dropdown-item"
-  title="Print to PDF"
-  data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-file-pdf"></i>
-  </span>
-<span class="btn__text-container">.pdf</span>
-</button>
-</li>
-      
-  </ul>
-</div>
-
-
-
-
-<button onclick="toggleFullScreen()"
-  class="btn btn-sm btn-fullscreen-button"
-  title="Fullscreen mode"
-  data-bs-placement="bottom" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-expand"></i>
-  </span>
-
-</button>
-
-
-
-<script>
-document.write(`
-  <button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light"></i>
-    <i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark"></i>
-    <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"></i>
-  </button>
-`);
-</script>
-
-
-<script>
-document.write(`
-  <button class="btn btn-sm pst-navbar-icon search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <i class="fa-solid fa-magnifying-glass fa-lg"></i>
-  </button>
-`);
-</script>
-<label class="sidebar-toggle secondary-toggle btn btn-sm" for="__secondary"title="Toggle secondary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <span class="fa-solid fa-list"></span>
-</label>
-</div></div>
-      
-    </div>
-  
-</div>
-</div>
-              
-              
-
-<div id="jb-print-docs-body" class="onlyprint">
-    <h1>Imagen</h1>
-    <!-- Table of contents -->
-    <div id="print-main-content">
-        <div id="jb-print-toc">
-            
-            <div>
-                <h2> Contents </h2>
-            </div>
-            <nav aria-label="Page">
-                <ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">Introduction</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#contributions">Contributions</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#methodology">Methodology</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#pretrained-t5-xxl-cascaded-diffusion-model">Pretrained T5-XXL + Cascaded Diffusion Model</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#classifier-free-guidance">Classifier-Free Guidance</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#large-guidance-weight-sampler">Large guidance weight sampler</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#static-thresholding">Static Thresholding</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#dynamic-thresholding">Dynamic Thresholding</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#super-resolution-models">Super Resolution Models</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#drawbench">DrawBench</a></li>
-</ul>
-</li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#results">Results</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#qualitative-result-table-of-imagen-from-human-evaluators">Qualitative Result Table of Imagen from Human Evaluators</a></li>
-</ul>
-</li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#ablation-study">Ablation Study</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#conclusion">Conclusion</a></li>
-</ul>
-            </nav>
-        </div>
-    </div>
-</div>
-
-              
-                
-<div id="searchbox"></div>
-                <article class="bd-article">
-                  
-  <div class="admonition-information admonition">
-<p class="admonition-title">Information</p>
-<ul class="simple">
-<li><p><strong>Title:</strong> Photorealistic Text-to-Image Diffusion Models with Deep Language Understanding (NeurIPS 2022)</p></li>
-<li><p><strong>Reference</strong></p>
-<ul>
-<li><p>Paper: <a class="reference external" href="https://arxiv.org/abs/2205.11487">https://arxiv.org/abs/2205.11487</a></p></li>
-</ul>
-</li>
-<li><p><strong>Author:</strong> Donggeun Sean Ko</p></li>
-<li><p><strong>Last updated on Sep. 13, 2023</strong></p></li>
-</ul>
-</div>
-<section class="tex2jax_ignore mathjax_ignore" id="imagen">
-<h1>Imagen<a class="headerlink" href="#imagen" title="Permalink to this heading">#</a></h1>
-<section id="introduction">
-<h2>Introduction<a class="headerlink" href="#introduction" title="Permalink to this heading">#</a></h2>
-<ul class="simple">
-<li><p>Multi-modal learning, 특히 text-to-image generation 에서 contrastive learning이 최근에 많은 주목을 받고 있음.</p></li>
-<li><p>Contrastive learning 과 더불어 large language model (LLM) 들과 diffusion model 들을 사용하여 독창적인 image 생성도 가능함</p></li>
-<li><p>텍스트 전용 말뭉치 (text corpus)로 학습된 LLM들의 text embedding들은 text-to-image 합성에 매우 효과적이라고 함.</p></li>
-<li><p>Classifier-free guidance 사용하여, 더 높은 충실도 (fidelity)의 이미지를 생성하는 새로운 샘플링 기술을 사용함.</p></li>
-</ul>
-<figure class="align-default" id="id1">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/imagen_1.png"><img alt="imagen_1" class="bg-primary mb-1" src="../../_images/imagen_1.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 211 </span><span class="caption-text">Concept of Contrastive Learning</span><a class="headerlink" href="#id1" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</section>
-<section id="contributions">
-<h2>Contributions<a class="headerlink" href="#contributions" title="Permalink to this heading">#</a></h2>
-<ol class="arabic simple">
-<li><p><strong>Pretrained Frozen</strong> text encoder (T5-XXL) 이 text-to-image generation task 에 매우 좋은 성능을 보여줌.</p></li>
-<li><p>Pretrained Text Encoder 사이즈를 <strong>fine-tuning</strong>하는 것이 diffusion model size fine tuning 하는 것보다 더 중요하다는 것을 실험적으로 증명함</p></li>
-<li><p><strong>Dynamic Thresholding</strong> 이라는 새로운 diffusion sampling technique (thresholding diffusion sampler) 을 제시하여 high guidance weight을 leverage 할 수 있게 만들어 더욱 “현실적인” 이미지 생성을 할 수 있음</p></li>
-<li><p><strong>Efficient U-Net</strong>이라는 기존 Palette 나 DDIM에서 사용하는 U-Net 구조보다 computational, memory efficient 한 U-Net 구조를 제시함</p></li>
-<li><p>COCO FID 점수 <strong>7.27</strong> SOTA 점수를 달성함</p></li>
-<li><p><strong>DrawBench</strong>라는 새로운 text-to-image generation evaluation용 benchmark dataset을 제시함</p></li>
-</ol>
-</section>
-<section id="methodology">
-<h2>Methodology<a class="headerlink" href="#methodology" title="Permalink to this heading">#</a></h2>
-<section id="pretrained-t5-xxl-cascaded-diffusion-model">
-<h3>Pretrained T5-XXL + Cascaded Diffusion Model<a class="headerlink" href="#pretrained-t5-xxl-cascaded-diffusion-model" title="Permalink to this heading">#</a></h3>
-<ul class="simple">
-<li><p>Pretrained Text Encoder 중 T5-XXL (구글 모델) 사용</p></li>
-<li><p>학습 시 pretrained text encoder을 Freeze 해놓음</p></li>
-<li><p>Text-to-Image Diffusion Model (Improved DDPM 아키텍쳐) 사용해 64x64 image 생성</p></li>
-<li><p>2가지 SR model (Efficient U-Net)을 사용해서 64 → 256 → 1024 로 upsampling</p></li>
-</ul>
-<figure class="align-default" id="id2">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/imagen_2.png"><img alt="imagen_2" class="bg-primary mb-1" src="../../_images/imagen_2.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 212 </span><span class="caption-text">Imagen overall pipeline</span><a class="headerlink" href="#id2" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</section>
-<section id="classifier-free-guidance">
-<h3>Classifier-Free Guidance<a class="headerlink" href="#classifier-free-guidance" title="Permalink to this heading">#</a></h3>
-<ul class="simple">
-<li><p>Classifier-free guidance 이란 auxiliary classifier의 효과 없이 classifier guidance 효과를 얻는 방법</p></li>
-<li><p>아래의 그림처럼 guidance가 없을 시 image generation이 일정하지 않음. 즉, label/class 의 영향을 못받아서, 생성이 일정하지 않음.</p></li>
-<li><p>guidance를 줄 시, 생성된 이미지의 class나 object이 일정하고 무엇을 생성하는것인지 좀 더 자세하게 알 수 있음.</p></li>
-</ul>
-<figure class="align-default" id="id3">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/imagen_3.png"><img alt="imagen_3" class="bg-primary mb-1" src="../../_images/imagen_3.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 213 </span><span class="caption-text">Comparison between when guidance is not used (left) vs when guidance is used with parameter, w=3 (right)</span><a class="headerlink" href="#id3" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</section>
-<section id="large-guidance-weight-sampler">
-<h3>Large guidance weight sampler<a class="headerlink" href="#large-guidance-weight-sampler" title="Permalink to this heading">#</a></h3>
-<ul class="simple">
-<li><p>Guide의 가중치 w 를 높이면 train-test 불일치가 생긴다.</p></li>
-<li><p>이로 인해, 높은 가중치의 이미지는 훈련 데이터 범위 안에 없어 [-1,1], classifier-free guidance가 평균과 분산을 이동시켜 이미지가 아예 “빗나가” 이상한 이미지를 생성하게 된다</p></li>
-</ul>
-</section>
-<section id="static-thresholding">
-<h3>Static Thresholding<a class="headerlink" href="#static-thresholding" title="Permalink to this heading">#</a></h3>
-<ul class="simple">
-<li><p>x-prediction 을 [-1,1]로 clipping 한다. 여전히 saturation 이 되고 fidelity가 덜한 이미지가 생성 됌</p></li>
-<li><p>문제를 해결하고자 dynamic thresholding 을 제시함</p></li>
-</ul>
-<figure class="align-default" id="id4">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/imagen_5.png"><img alt="imagen_5" class="bg-primary mb-1" src="../../_images/imagen_5.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 214 </span><span class="caption-text">Graphical visualization of static thresholding</span><a class="headerlink" href="#id4" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</section>
-<section id="dynamic-thresholding">
-<h3>Dynamic Thresholding<a class="headerlink" href="#dynamic-thresholding" title="Permalink to this heading">#</a></h3>
-<ul class="simple">
-<li><p>특정 백분위수 절대 픽셀 값을 s 라고 지정하고 s &gt; 1 이면, 임계값을 [-s,s]로 지정한 다음 s로 나눈다.</p></li>
-<li><p>예시: 90% 지점의 픽셀 값이 3 이면 [-3,3]으로 clipping 한 후 3으로 나눠서 [-1,1] 로 normalize 함.</p></li>
-<li><p>Thresholding 의 차이는 아래 결과 비교 이미지로 확인 할 수 있다.</p></li>
-</ul>
-<figure class="align-default" id="id5">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/imagen_6.png"><img alt="imagen_6" class="bg-primary mb-1" src="../../_images/imagen_6.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 215 </span><span class="caption-text">Graphical visualization of dynamic thresholding</span><a class="headerlink" href="#id5" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<figure class="align-default" id="id6">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/imagen_7.png"><img alt="imagen_7" class="bg-primary mb-1" src="../../_images/imagen_7.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 216 </span><span class="caption-text">Comparison among no thresholding, static thresholding and dynamic thresholding, respectively</span><a class="headerlink" href="#id6" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</section>
-<section id="super-resolution-models">
-<h3>Super Resolution Models<a class="headerlink" href="#super-resolution-models" title="Permalink to this heading">#</a></h3>
-<ul class="simple">
-<li><p>Efficient U-Net이라는 새로운 모델을 만들어, 기존 U-Net에서 여러가지 modification을 하였다고 주장 (그렇지만 EffU-Net은 의료쪽으로 이름이 이미 있는걸로 아는데…)</p></li>
-<li><p>Removed self-attention layer</p></li>
-<li><p>Keep the text cross-attention layer</p></li>
-<li><p>Skip connection scaling을 1/(√2)로 하여 convergence 를 더 빠르게 함</p></li>
-<li><p>Lower resolution block에서 residual blocks를 더 추가함</p></li>
-</ul>
-<figure class="align-default" id="id7">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/imagen_8.png"><img alt="imagen_8" class="bg-primary mb-1" src="../../_images/imagen_8.png" style="width: 600px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 217 </span><span class="caption-text">Architecture of Super Resolution Diffusion Model used in Imagen</span><a class="headerlink" href="#id7" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</section>
-<section id="drawbench">
-<h3>DrawBench<a class="headerlink" href="#drawbench" title="Permalink to this heading">#</a></h3>
-<ul class="simple">
-<li><p>Imagen 저자들이 제시한 새로운 벤치마크 데이터셋. 본 데이터셋은 text prompt 와 category label 로 이루어졌다</p></li>
-<li><p>깃허브에서 다운 받을 수 있으며, 예시는 아래 그림과 갗다
-11 categories, 200 text prompts
-Human evaluation 으로 진행 (25명의 평가자)
-Model A에서 생성한 이미지 set vs Model B에서 생성한 이미지 set</p></li>
-</ul>
-<p>평가자는 2가지 질문을 주며 2가지 기준점으로 평가함
-<strong>Q1. Which set of images is of higher quality?</strong>
-<strong>Q2. Which set of images better represents the text caption: {text caption}?</strong></p>
-<p>기준점</p>
-<ul class="simple">
-<li><p>Image Fidelity</p></li>
-<li><p>Image-text alignment</p></li>
-</ul>
-<p>평가자는 3가지 답변 중 하나를 선택해야함</p>
-<ol class="arabic simple">
-<li><p>I prefer set A</p></li>
-<li><p>I am Indifferent</p></li>
-<li><p>I prefer set B</p></li>
-</ol>
-<figure class="align-default" id="id8">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/imagen_9.png"><img alt="imagen_9" class="bg-primary mb-1" src="../../_images/imagen_9.png" style="width: 550px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 218 </span><span class="caption-text">Screenshot of DrawBench dataset</span><a class="headerlink" href="#id8" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</section>
-</section>
-<section id="results">
-<h2>Results<a class="headerlink" href="#results" title="Permalink to this heading">#</a></h2>
-<ul class="simple">
-<li><p>Figure 2 에서는 DrawBench에서 나온 결과를 체리피킹 없이 보여준다.</p></li>
-<li><p>아마 저자들은 체리피킹 없이도 좋은 결과를 보여주고, 다양한 카테고리에서도 훌륭한 이미지를 생성 할 수 있다는 주장인 것 같다.</p></li>
-</ul>
-<figure class="align-default" id="id9">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/imagen_10.png"><img alt="imagen_10" class="bg-primary mb-1" src="../../_images/imagen_10.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 219 </span><span class="caption-text">Result of Imagen in DrawBench dataset</span><a class="headerlink" href="#id9" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p>Zero-shot 으로 한 FID값이 MS-COCO로 학습한 모델들 FID 보다 높음.</p></li>
-<li><p>Table 2 에서는 Imagen이 no people (사람이 없는 사진) 에는 photorealism 점수가 올라감
-→ Imagen 은 photorealistic people을 생성하기에 한계가 있음.</p></li>
-</ul>
-<figure class="align-default" id="id10">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/imagen_11.png"><img alt="imagen_11" class="bg-primary mb-1" src="../../_images/imagen_11.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 220 </span><span class="caption-text">Result Table of Imagen</span><a class="headerlink" href="#id10" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<section id="qualitative-result-table-of-imagen-from-human-evaluators">
-<h3>Qualitative Result Table of Imagen from Human Evaluators<a class="headerlink" href="#qualitative-result-table-of-imagen-from-human-evaluators" title="Permalink to this heading">#</a></h3>
-<ul class="simple">
-<li><p>Human raters (사람 평가자) 들은 T5-XXL로 text encoding 한 text-to-image generation 모델을 CLIP-based 보다 더 선호함</p></li>
-<li><p>기본적으로 Imagen 은 다른 text-to-image generation 모델에서 (SOTA 모델인 DALL-E 2) 보다도 human raters 에서 DrawBench 데이터셋에서 좋은 평가를 받음</p></li>
-</ul>
-<figure class="align-default" id="id11">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/imagen_12.png"><img alt="imagen_12" class="bg-primary mb-1" src="../../_images/imagen_12.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 221 </span><span class="caption-text">Qualitative Result Table of Imagen from Human evaulators</span><a class="headerlink" href="#id11" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</section>
-</section>
-<section id="ablation-study">
-<h2>Ablation Study<a class="headerlink" href="#ablation-study" title="Permalink to this heading">#</a></h2>
-<ul class="simple">
-<li><p>Scaling text encoder size 가 U-Net size scaling 보다 더 중요함</p></li>
-<li><p>(a)의 text encoder 사이즈의 변화가 FID 및 CLIP score 점수에 더욱 많은 영향을 끼침</p></li>
-<li><p>Dynamic thresholding 이 performance boost에 더욱 영향을 끼침</p></li>
-<li><p>Dynamic thresholding을 이용하면 성능을 더욱 끌어 올릴 수 있음</p></li>
-</ul>
-<figure class="align-default" id="id12">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/imagen_13.png"><img alt="imagen_13" class="bg-primary mb-1" src="../../_images/imagen_13.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 222 </span><span class="caption-text">Qualitative Result Table of Imagen from Human evaulators</span><a class="headerlink" href="#id12" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</section>
-<section id="conclusion">
-<h2>Conclusion<a class="headerlink" href="#conclusion" title="Permalink to this heading">#</a></h2>
-<ul class="simple">
-<li><p>Frozen large pretrained language model shows better performance over text-image paired multimodal encoders such as CLIP in text-to-image generation task</p></li>
-<li><p>Efficient U-Net significantly improves performance time</p></li>
-<li><p>Dynamic thresholding allows usage of much higher guidance weights with better fidelity of generated images</p></li>
-</ul>
-</section>
-</section>
-
-    <script type="text/x-thebe-config">
-    {
-        requestKernel: true,
-        binderOptions: {
-            repo: "binder-examples/jupyter-stacks-datascience",
-            ref: "master",
-        },
-        codeMirrorConfig: {
-            theme: "abcdef",
-            mode: "python"
-        },
-        kernelOptions: {
-            name: "python3",
-            path: "./docs/review"
-        },
-        predefinedOutput: true
-    }
-    </script>
-    <script>kernelName = 'python3'</script>
-
-                </article>
-              
-
-              
-              
-              
-              
-                <footer class="prev-next-footer d-print-none">
-                  
-<div class="prev-next-area">
-    <a class="left-prev"
-       href="StyO.html"
-       title="previous page">
-      <i class="fa-solid fa-angle-left"></i>
-      <div class="prev-next-info">
-        <p class="prev-next-subtitle">previous</p>
-        <p class="prev-next-title">StyO</p>
-      </div>
-    </a>
-    <a class="right-next"
-       href="imagen_editor.html"
-       title="next page">
-      <div class="prev-next-info">
-        <p class="prev-next-subtitle">next</p>
-        <p class="prev-next-title">Imagen Editor</p>
-      </div>
-      <i class="fa-solid fa-angle-right"></i>
-    </a>
-</div>
-                </footer>
-              
-            </div>
-            
-            
-              
-                <div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
-
-
-  <div class="sidebar-secondary-item">
-  <div class="page-toc tocsection onthispage">
-    <i class="fa-solid fa-list"></i> Contents
-  </div>
-  <nav class="bd-toc-nav page-toc">
-    <ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">Introduction</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#contributions">Contributions</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#methodology">Methodology</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#pretrained-t5-xxl-cascaded-diffusion-model">Pretrained T5-XXL + Cascaded Diffusion Model</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#classifier-free-guidance">Classifier-Free Guidance</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#large-guidance-weight-sampler">Large guidance weight sampler</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#static-thresholding">Static Thresholding</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#dynamic-thresholding">Dynamic Thresholding</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#super-resolution-models">Super Resolution Models</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#drawbench">DrawBench</a></li>
-</ul>
-</li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#results">Results</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#qualitative-result-table-of-imagen-from-human-evaluators">Qualitative Result Table of Imagen from Human Evaluators</a></li>
-</ul>
-</li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#ablation-study">Ablation Study</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#conclusion">Conclusion</a></li>
-</ul>
-  </nav></div>
-
-</div></div>
-              
-            
-          </div>
-          <footer class="bd-footer-content">
-            
-<div class="bd-footer-content__inner container">
-  
-  <div class="footer-item">
-    
-<p class="component-author">
-By PseudoLab
-</p>
-
-  </div>
-  
-  <div class="footer-item">
-    
-
-  <p class="copyright">
-    
-      © Copyright 2022.
-      <br/>
-    
-  </p>
-
-  </div>
-  
-  <div class="footer-item">
-    
-  </div>
-  
-  <div class="footer-item">
-    
-  </div>
-  
-</div>
-          </footer>
-        
-
-      </main>
-    </div>
-  </div>
-  
-  <!-- Scripts loaded after <body> so the DOM is not blocked -->
-  <script src="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b"></script>
-<script src="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b"></script>
-
-  <footer class="bd-footer">
-  </footer>
-  </body>
+
+
+<!DOCTYPE html>
+
+
+<html lang="en" data-content_root="" >
+
+  <head>
+    <meta charset="utf-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.17.1: http://docutils.sourceforge.net/" />
+
+    <title>Imagen &#8212; Text-to-Image Generation-feat-Diffusion</title>
+  
+  
+  
+  <script data-cfasync="false">
+    document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
+    document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
+  </script>
+  
+  <!-- Loaded before other Sphinx assets -->
+  <link href="../../_static/styles/theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+<link href="../../_static/styles/bootstrap.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+<link href="../../_static/styles/pydata-sphinx-theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+
+  
+  <link href="../../_static/vendor/fontawesome/6.5.2/css/all.min.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+  <link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.woff2" />
+<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.woff2" />
+<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.woff2" />
+
+    <link rel="stylesheet" type="text/css" href="../../_static/pygments.css" />
+    <link rel="stylesheet" href="../../_static/styles/sphinx-book-theme.css?digest=14f4ca6b54d191a8c7657f6c759bf11a5fb86285" type="text/css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/togglebutton.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/mystnb.4510f1fc1dee50b3e5859aac5469c37c29e427902b24a333a5f9fcb2f0b3ac41.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/sphinx-thebe.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/design-style.4045f2051d55cab465a707391d5b2007.min.css" />
+  
+  <!-- Pre-loaded scripts that we'll load fully later -->
+  <link rel="preload" as="script" href="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b" />
+<link rel="preload" as="script" href="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b" />
+  <script src="../../_static/vendor/fontawesome/6.5.2/js/all.min.js?digest=dfe6caa3a7d634c4db9b"></script>
+
+    <script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
+    <script src="../../_static/jquery.js"></script>
+    <script src="../../_static/underscore.js"></script>
+    <script src="../../_static/_sphinx_javascript_frameworks_compat.js"></script>
+    <script src="../../_static/doctools.js"></script>
+    <script src="../../_static/clipboard.min.js"></script>
+    <script src="../../_static/copybutton.js"></script>
+    <script src="../../_static/scripts/sphinx-book-theme.js?digest=5a5c038af52cf7bc1a1ec88eea08e6366ee68824"></script>
+    <script>let toggleHintShow = 'Click to show';</script>
+    <script>let toggleHintHide = 'Click to hide';</script>
+    <script>let toggleOpenOnPrint = 'true';</script>
+    <script src="../../_static/togglebutton.js"></script>
+    <script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
+    <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
+    <script src="../../_static/design-tabs.js"></script>
+    <script>const THEBE_JS_URL = "https://unpkg.com/thebe@0.8.2/lib/index.js"
+const thebe_selector = ".thebe,.cell"
+const thebe_selector_input = "pre"
+const thebe_selector_output = ".output, .cell_output"
+</script>
+    <script async="async" src="../../_static/sphinx-thebe.js"></script>
+    <script>DOCUMENTATION_OPTIONS.pagename = 'docs/review/imagen';</script>
+    <link rel="shortcut icon" href="../../_static/PseudoLab_logo.png"/>
+    <link rel="index" title="Index" href="../../genindex.html" />
+    <link rel="search" title="Search" href="../../search.html" />
+    <link rel="next" title="Imagen Editor" href="imagen_editor.html" />
+    <link rel="prev" title="StyO" href="StyO.html" />
+  <meta name="viewport" content="width=device-width, initial-scale=1"/>
+  <meta name="docsearch:language" content="en"/>
+  </head>
+  
+  
+  <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
+
+  
+  
+  <div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
+  
+  <div id="pst-scroll-pixel-helper"></div>
+  
+  <button type="button" class="btn rounded-pill" id="pst-back-to-top">
+    <i class="fa-solid fa-arrow-up"></i>Back to top</button>
+
+  
+  <input type="checkbox"
+          class="sidebar-toggle"
+          id="pst-primary-sidebar-checkbox"/>
+  <label class="overlay overlay-primary" for="pst-primary-sidebar-checkbox"></label>
+  
+  <input type="checkbox"
+          class="sidebar-toggle"
+          id="pst-secondary-sidebar-checkbox"/>
+  <label class="overlay overlay-secondary" for="pst-secondary-sidebar-checkbox"></label>
+  
+  <div class="search-button__wrapper">
+    <div class="search-button__overlay"></div>
+    <div class="search-button__search-container">
+<form class="bd-search d-flex align-items-center"
+      action="../../search.html"
+      method="get">
+  <i class="fa-solid fa-magnifying-glass"></i>
+  <input type="search"
+         class="form-control"
+         name="q"
+         id="search-input"
+         placeholder="Search this book..."
+         aria-label="Search this book..."
+         autocomplete="off"
+         autocorrect="off"
+         autocapitalize="off"
+         spellcheck="false"/>
+  <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
+</form></div>
+  </div>
+
+  <div class="pst-async-banner-revealer d-none">
+  <aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
+</div>
+
+  
+    <header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
+    </header>
+  
+
+  <div class="bd-container">
+    <div class="bd-container__inner bd-page-width">
+      
+      
+      
+      <div class="bd-sidebar-primary bd-sidebar">
+        
+
+  
+  <div class="sidebar-header-items sidebar-primary__section">
+    
+    
+    
+    
+  </div>
+  
+    <div class="sidebar-primary-items__start sidebar-primary__section">
+        <div class="sidebar-primary-item">
+
+  
+    
+  
+
+<a class="navbar-brand logo" href="../../intro.html">
+  
+  
+  
+  
+  
+    
+    
+      
+    
+    
+    <img src="../../_static/PseudoLab_logo.png" class="logo__image only-light" alt="Text-to-Image Generation-feat-Diffusion - Home"/>
+    <script>document.write(`<img src="../../_static/PseudoLab_logo.png" class="logo__image only-dark" alt="Text-to-Image Generation-feat-Diffusion - Home"/>`);</script>
+  
+  
+</a></div>
+        <div class="sidebar-primary-item"><nav class="bd-links" id="bd-docs-nav" aria-label="Main">
+    <div class="bd-toc-item navbar-nav active">
+        
+        <ul class="nav bd-sidenav bd-sidenav__home-link">
+            <li class="toctree-l1">
+                <a class="reference internal" href="../../intro.html">
+                    Welcome to PseudoDiffusers!!
+                </a>
+            </li>
+        </ul>
+        <p aria-level="2" class="caption" role="heading"><span class="caption-text">Preliminary Works</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="vae.html">VAE</a></li>
+<li class="toctree-l1"><a class="reference internal" href="gan.html">GAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DDPM.html">DDPM</a></li>
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DDIM.html">DDIM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="A_Study_on_the_Evaluation_of_Generative_Models.html">A Study on the Evaluation of Generative Models</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Image Generation</span></p>
+<ul class="current nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="cycleGAN.html">CycleGAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="StyleGAN.html">StyleGAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="diffusion_beats_GANs.html">Diffusion Models Beat GANs on Image Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="dalle.html">DALL-E</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DALLE2.html">DALL-E 2</a></li>
+<li class="toctree-l1"><a class="reference internal" href="dreambooth.html">DreamBooth</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ControlNet.html">ControlNet</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Latent_Diffusion_Model.html">Introduction</a></li>
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Textual_Inversion.html">Textual Inversion</a></li>
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="CustomDiffusion.html">Custom Diffusion</a></li>
+<li class="toctree-l1"><a class="reference internal" href="LoRA.html">LoRA</a></li>
+
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="I-DDPM.html">I-DDPM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="StyO.html">StyO</a></li>
+<li class="toctree-l1 current active"><a class="current reference internal" href="#">Imagen</a></li>
+<li class="toctree-l1"><a class="reference internal" href="imagen_editor.html">Imagen Editor</a></li>
+<li class="toctree-l1"><a class="reference internal" href="SDEdit.html">SDEdit</a></li>
+<li class="toctree-l1"><a class="reference internal" href="SDXL.html">SDXL</a></li>
+<li class="toctree-l1"><a class="reference internal" href="t2i_adapter.html">T2I-Adapter</a></li>
+<li class="toctree-l1"><a class="reference internal" href="IP_Adapter.html">IP-Adapter</a></li>
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="HyperDreamBooth.html">HyperDreamBooth</a></li>
+<li class="toctree-l1"><a class="reference internal" href="CM3leon.html">CM3leon</a></li>
+
+<li class="toctree-l1"><a class="reference internal" href="Synthetic_Data_from_Diffusion_Models_Improves_ImageNet_Classification.html">Synthetic Data from Diffusion Models Improves ImageNet Classification</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="GLIDE.html">GLIDE</a></li>
+<li class="toctree-l1"><a class="reference internal" href="BBDM.html">BBDM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Your_Diffusion_Model_is_Secretly_a_Zero_Shot_Classifier.html">Your Diffusion Model is Secretly a Zero-Shot Classifier</a></li>
+<li class="toctree-l1"><a class="reference internal" href="progressive_distillation.html">Progressive Distillation for Fast Sampling of Diffusion Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ConceptLab.html">ConceptLab</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Diffusion_models_already_have_a_Semantic_Latent_Space.html">Diffusion Models already have a Semantic Latent Space</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Muse.html">Muse</a></li>
+
+
+<li class="toctree-l1"><a class="reference internal" href="GIGAGAN.html">Scaling up GANs for Text-to-Image Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="consistency_models.html">Consistency Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="latent_consistency_models.html">Latent Consistency Models</a></li>
+
+<li class="toctree-l1"><a class="reference internal" href="LLM_grounded_Diffusion.html">LLM Grounded Diffusion</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DiT.html">DiT</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="one-step-image-translation.html">One-Step Image Translation with Text-to-Image Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="LCM-LoRA.html">LCM-LoRA: A Universal Stable-Diffusion Acceleration Module</a></li>
+
+
+<li class="toctree-l1"><a class="reference internal" href="MimicBrush.html">MimicBrush: Zero-shot Image Editing with Reference Imitation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="one_step_diffusion_with_distribution_matching_distillation.html">One-step Diffusion with Distribution Matching Distillation</a></li>
+
+
+
+
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Video Generation</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="Make_A_Video.html">Make A Video</a></li>
+<li class="toctree-l1"><a class="reference internal" href="VideoLDM.html">VideoLDM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="AnimateDiff.html">AnimateDiff</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Animate_Anyone.html">Animate Anyone</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreaMoving.html">DreaMoving</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreamPose.html">DreamPose: Fashion Image-to-Video Synthesis via Stable Diffusion</a></li>
+
+
+
+
+
+
+
+
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">3D Generation</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="NeRF.html">NeRF : Representing Scenes as Neural Radiance Fields for View Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="3DGS.html">3D Gaussian Splatting for Real-Time Radiance Field Rendering</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Point_E.html">Point-E: A System for Generating 3D Point Clouds from Complex Prompts (Arxiv 2022)</a></li>
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Shap-E.html">Shap-E</a></li>
+
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DreamFusion.html"><strong>DreamFusion</strong></a></li>
+<li class="toctree-l1"><a class="reference internal" href="magic-3d.html">Magic3D</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreamBooth3D.html">Dream Booth 3D</a></li>
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="zero123plus.html">Zero123++</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ProlificDreamer.html">ProlificDreamer: High-Fidelity and Diverse Text-to-3D Generation with Variational Score Distillation</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DreamGaussian.html">DreamGaussian</a></li>
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Coin3D.html">Coin3D</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Experiments</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="../experiments/js_exp.html">Synthetic Data with Stable Diffusion for Foliar Disease Classification</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../experiments/swjo_exp.html">Training DreamBooth on Naver Webtoon Face Dataset</a></li>
+</ul>
+
+    </div>
+</nav></div>
+    </div>
+  
+  
+  <div class="sidebar-primary-items__end sidebar-primary__section">
+  </div>
+  
+  <div id="rtd-footer-container"></div>
+
+
+      </div>
+      
+      <main id="main-content" class="bd-main" role="main">
+        
+        
+
+<div class="sbt-scroll-pixel-helper"></div>
+
+          <div class="bd-content">
+            <div class="bd-article-container">
+              
+              <div class="bd-header-article d-print-none">
+<div class="header-article-items header-article__inner">
+  
+    <div class="header-article-items__start">
+      
+        <div class="header-article-item"><label class="sidebar-toggle primary-toggle btn btn-sm" for="__primary" title="Toggle primary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
+  <span class="fa-solid fa-bars"></span>
+</label></div>
+      
+    </div>
+  
+  
+    <div class="header-article-items__end">
+      
+        <div class="header-article-item">
+
+<div class="article-header-buttons">
+
+
+
+
+
+<div class="dropdown dropdown-source-buttons">
+  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Source repositories">
+    <i class="fab fa-github"></i>
+  </button>
+  <ul class="dropdown-menu">
+      
+      
+      
+      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion" target="_blank"
+   class="btn btn-sm btn-source-repository-button dropdown-item"
+   title="Source repository"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fab fa-github"></i>
+  </span>
+<span class="btn__text-container">Repository</span>
+</a>
+</li>
+      
+      
+      
+      
+      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion/issues/new?title=Issue%20on%20page%20%2Fdocs/review/imagen.html&body=Your%20issue%20content%20here." target="_blank"
+   class="btn btn-sm btn-source-issues-button dropdown-item"
+   title="Open an issue"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-lightbulb"></i>
+  </span>
+<span class="btn__text-container">Open issue</span>
+</a>
+</li>
+      
+  </ul>
+</div>
+
+
+
+
+
+
+<div class="dropdown dropdown-download-buttons">
+  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Download this page">
+    <i class="fas fa-download"></i>
+  </button>
+  <ul class="dropdown-menu">
+      
+      
+      
+      <li><a href="../../_sources/docs/review/imagen.md" target="_blank"
+   class="btn btn-sm btn-download-source-button dropdown-item"
+   title="Download source file"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-file"></i>
+  </span>
+<span class="btn__text-container">.md</span>
+</a>
+</li>
+      
+      
+      
+      
+      <li>
+<button onclick="window.print()"
+  class="btn btn-sm btn-download-pdf-button dropdown-item"
+  title="Print to PDF"
+  data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-file-pdf"></i>
+  </span>
+<span class="btn__text-container">.pdf</span>
+</button>
+</li>
+      
+  </ul>
+</div>
+
+
+
+
+<button onclick="toggleFullScreen()"
+  class="btn btn-sm btn-fullscreen-button"
+  title="Fullscreen mode"
+  data-bs-placement="bottom" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-expand"></i>
+  </span>
+
+</button>
+
+
+
+<script>
+document.write(`
+  <button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light"></i>
+    <i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark"></i>
+    <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"></i>
+  </button>
+`);
+</script>
+
+
+<script>
+document.write(`
+  <button class="btn btn-sm pst-navbar-icon search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <i class="fa-solid fa-magnifying-glass fa-lg"></i>
+  </button>
+`);
+</script>
+<label class="sidebar-toggle secondary-toggle btn btn-sm" for="__secondary"title="Toggle secondary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <span class="fa-solid fa-list"></span>
+</label>
+</div></div>
+      
+    </div>
+  
+</div>
+</div>
+              
+              
+
+<div id="jb-print-docs-body" class="onlyprint">
+    <h1>Imagen</h1>
+    <!-- Table of contents -->
+    <div id="print-main-content">
+        <div id="jb-print-toc">
+            
+            <div>
+                <h2> Contents </h2>
+            </div>
+            <nav aria-label="Page">
+                <ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">Introduction</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#contributions">Contributions</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#methodology">Methodology</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#pretrained-t5-xxl-cascaded-diffusion-model">Pretrained T5-XXL + Cascaded Diffusion Model</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#classifier-free-guidance">Classifier-Free Guidance</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#large-guidance-weight-sampler">Large guidance weight sampler</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#static-thresholding">Static Thresholding</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#dynamic-thresholding">Dynamic Thresholding</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#super-resolution-models">Super Resolution Models</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#drawbench">DrawBench</a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#results">Results</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#qualitative-result-table-of-imagen-from-human-evaluators">Qualitative Result Table of Imagen from Human Evaluators</a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#ablation-study">Ablation Study</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#conclusion">Conclusion</a></li>
+</ul>
+            </nav>
+        </div>
+    </div>
+</div>
+
+              
+                
+<div id="searchbox"></div>
+                <article class="bd-article">
+                  
+  <div class="admonition-information admonition">
+<p class="admonition-title">Information</p>
+<ul class="simple">
+<li><p><strong>Title:</strong> Photorealistic Text-to-Image Diffusion Models with Deep Language Understanding (NeurIPS 2022)</p></li>
+<li><p><strong>Reference</strong></p>
+<ul>
+<li><p>Paper: <a class="reference external" href="https://arxiv.org/abs/2205.11487">https://arxiv.org/abs/2205.11487</a></p></li>
+</ul>
+</li>
+<li><p><strong>Author:</strong> Donggeun Sean Ko</p></li>
+<li><p><strong>Last updated on Sep. 13, 2023</strong></p></li>
+</ul>
+</div>
+<section class="tex2jax_ignore mathjax_ignore" id="imagen">
+<h1>Imagen<a class="headerlink" href="#imagen" title="Permalink to this heading">#</a></h1>
+<section id="introduction">
+<h2>Introduction<a class="headerlink" href="#introduction" title="Permalink to this heading">#</a></h2>
+<ul class="simple">
+<li><p>Multi-modal learning, 특히 text-to-image generation 에서 contrastive learning이 최근에 많은 주목을 받고 있음.</p></li>
+<li><p>Contrastive learning 과 더불어 large language model (LLM) 들과 diffusion model 들을 사용하여 독창적인 image 생성도 가능함</p></li>
+<li><p>텍스트 전용 말뭉치 (text corpus)로 학습된 LLM들의 text embedding들은 text-to-image 합성에 매우 효과적이라고 함.</p></li>
+<li><p>Classifier-free guidance 사용하여, 더 높은 충실도 (fidelity)의 이미지를 생성하는 새로운 샘플링 기술을 사용함.</p></li>
+</ul>
+<figure class="align-default" id="id1">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/imagen_1.png"><img alt="imagen_1" class="bg-primary mb-1" src="../../_images/imagen_1.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 211 </span><span class="caption-text">Concept of Contrastive Learning</span><a class="headerlink" href="#id1" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</section>
+<section id="contributions">
+<h2>Contributions<a class="headerlink" href="#contributions" title="Permalink to this heading">#</a></h2>
+<ol class="arabic simple">
+<li><p><strong>Pretrained Frozen</strong> text encoder (T5-XXL) 이 text-to-image generation task 에 매우 좋은 성능을 보여줌.</p></li>
+<li><p>Pretrained Text Encoder 사이즈를 <strong>fine-tuning</strong>하는 것이 diffusion model size fine tuning 하는 것보다 더 중요하다는 것을 실험적으로 증명함</p></li>
+<li><p><strong>Dynamic Thresholding</strong> 이라는 새로운 diffusion sampling technique (thresholding diffusion sampler) 을 제시하여 high guidance weight을 leverage 할 수 있게 만들어 더욱 “현실적인” 이미지 생성을 할 수 있음</p></li>
+<li><p><strong>Efficient U-Net</strong>이라는 기존 Palette 나 DDIM에서 사용하는 U-Net 구조보다 computational, memory efficient 한 U-Net 구조를 제시함</p></li>
+<li><p>COCO FID 점수 <strong>7.27</strong> SOTA 점수를 달성함</p></li>
+<li><p><strong>DrawBench</strong>라는 새로운 text-to-image generation evaluation용 benchmark dataset을 제시함</p></li>
+</ol>
+</section>
+<section id="methodology">
+<h2>Methodology<a class="headerlink" href="#methodology" title="Permalink to this heading">#</a></h2>
+<section id="pretrained-t5-xxl-cascaded-diffusion-model">
+<h3>Pretrained T5-XXL + Cascaded Diffusion Model<a class="headerlink" href="#pretrained-t5-xxl-cascaded-diffusion-model" title="Permalink to this heading">#</a></h3>
+<ul class="simple">
+<li><p>Pretrained Text Encoder 중 T5-XXL (구글 모델) 사용</p></li>
+<li><p>학습 시 pretrained text encoder을 Freeze 해놓음</p></li>
+<li><p>Text-to-Image Diffusion Model (Improved DDPM 아키텍쳐) 사용해 64x64 image 생성</p></li>
+<li><p>2가지 SR model (Efficient U-Net)을 사용해서 64 → 256 → 1024 로 upsampling</p></li>
+</ul>
+<figure class="align-default" id="id2">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/imagen_2.png"><img alt="imagen_2" class="bg-primary mb-1" src="../../_images/imagen_2.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 212 </span><span class="caption-text">Imagen overall pipeline</span><a class="headerlink" href="#id2" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</section>
+<section id="classifier-free-guidance">
+<h3>Classifier-Free Guidance<a class="headerlink" href="#classifier-free-guidance" title="Permalink to this heading">#</a></h3>
+<ul class="simple">
+<li><p>Classifier-free guidance 이란 auxiliary classifier의 효과 없이 classifier guidance 효과를 얻는 방법</p></li>
+<li><p>아래의 그림처럼 guidance가 없을 시 image generation이 일정하지 않음. 즉, label/class 의 영향을 못받아서, 생성이 일정하지 않음.</p></li>
+<li><p>guidance를 줄 시, 생성된 이미지의 class나 object이 일정하고 무엇을 생성하는것인지 좀 더 자세하게 알 수 있음.</p></li>
+</ul>
+<figure class="align-default" id="id3">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/imagen_3.png"><img alt="imagen_3" class="bg-primary mb-1" src="../../_images/imagen_3.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 213 </span><span class="caption-text">Comparison between when guidance is not used (left) vs when guidance is used with parameter, w=3 (right)</span><a class="headerlink" href="#id3" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</section>
+<section id="large-guidance-weight-sampler">
+<h3>Large guidance weight sampler<a class="headerlink" href="#large-guidance-weight-sampler" title="Permalink to this heading">#</a></h3>
+<ul class="simple">
+<li><p>Guide의 가중치 w 를 높이면 train-test 불일치가 생긴다.</p></li>
+<li><p>이로 인해, 높은 가중치의 이미지는 훈련 데이터 범위 안에 없어 [-1,1], classifier-free guidance가 평균과 분산을 이동시켜 이미지가 아예 “빗나가” 이상한 이미지를 생성하게 된다</p></li>
+</ul>
+</section>
+<section id="static-thresholding">
+<h3>Static Thresholding<a class="headerlink" href="#static-thresholding" title="Permalink to this heading">#</a></h3>
+<ul class="simple">
+<li><p>x-prediction 을 [-1,1]로 clipping 한다. 여전히 saturation 이 되고 fidelity가 덜한 이미지가 생성 됌</p></li>
+<li><p>문제를 해결하고자 dynamic thresholding 을 제시함</p></li>
+</ul>
+<figure class="align-default" id="id4">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/imagen_5.png"><img alt="imagen_5" class="bg-primary mb-1" src="../../_images/imagen_5.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 214 </span><span class="caption-text">Graphical visualization of static thresholding</span><a class="headerlink" href="#id4" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</section>
+<section id="dynamic-thresholding">
+<h3>Dynamic Thresholding<a class="headerlink" href="#dynamic-thresholding" title="Permalink to this heading">#</a></h3>
+<ul class="simple">
+<li><p>특정 백분위수 절대 픽셀 값을 s 라고 지정하고 s &gt; 1 이면, 임계값을 [-s,s]로 지정한 다음 s로 나눈다.</p></li>
+<li><p>예시: 90% 지점의 픽셀 값이 3 이면 [-3,3]으로 clipping 한 후 3으로 나눠서 [-1,1] 로 normalize 함.</p></li>
+<li><p>Thresholding 의 차이는 아래 결과 비교 이미지로 확인 할 수 있다.</p></li>
+</ul>
+<figure class="align-default" id="id5">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/imagen_6.png"><img alt="imagen_6" class="bg-primary mb-1" src="../../_images/imagen_6.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 215 </span><span class="caption-text">Graphical visualization of dynamic thresholding</span><a class="headerlink" href="#id5" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<figure class="align-default" id="id6">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/imagen_7.png"><img alt="imagen_7" class="bg-primary mb-1" src="../../_images/imagen_7.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 216 </span><span class="caption-text">Comparison among no thresholding, static thresholding and dynamic thresholding, respectively</span><a class="headerlink" href="#id6" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</section>
+<section id="super-resolution-models">
+<h3>Super Resolution Models<a class="headerlink" href="#super-resolution-models" title="Permalink to this heading">#</a></h3>
+<ul class="simple">
+<li><p>Efficient U-Net이라는 새로운 모델을 만들어, 기존 U-Net에서 여러가지 modification을 하였다고 주장 (그렇지만 EffU-Net은 의료쪽으로 이름이 이미 있는걸로 아는데…)</p></li>
+<li><p>Removed self-attention layer</p></li>
+<li><p>Keep the text cross-attention layer</p></li>
+<li><p>Skip connection scaling을 1/(√2)로 하여 convergence 를 더 빠르게 함</p></li>
+<li><p>Lower resolution block에서 residual blocks를 더 추가함</p></li>
+</ul>
+<figure class="align-default" id="id7">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/imagen_8.png"><img alt="imagen_8" class="bg-primary mb-1" src="../../_images/imagen_8.png" style="width: 600px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 217 </span><span class="caption-text">Architecture of Super Resolution Diffusion Model used in Imagen</span><a class="headerlink" href="#id7" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</section>
+<section id="drawbench">
+<h3>DrawBench<a class="headerlink" href="#drawbench" title="Permalink to this heading">#</a></h3>
+<ul class="simple">
+<li><p>Imagen 저자들이 제시한 새로운 벤치마크 데이터셋. 본 데이터셋은 text prompt 와 category label 로 이루어졌다</p></li>
+<li><p>깃허브에서 다운 받을 수 있으며, 예시는 아래 그림과 갗다
+11 categories, 200 text prompts
+Human evaluation 으로 진행 (25명의 평가자)
+Model A에서 생성한 이미지 set vs Model B에서 생성한 이미지 set</p></li>
+</ul>
+<p>평가자는 2가지 질문을 주며 2가지 기준점으로 평가함
+<strong>Q1. Which set of images is of higher quality?</strong>
+<strong>Q2. Which set of images better represents the text caption: {text caption}?</strong></p>
+<p>기준점</p>
+<ul class="simple">
+<li><p>Image Fidelity</p></li>
+<li><p>Image-text alignment</p></li>
+</ul>
+<p>평가자는 3가지 답변 중 하나를 선택해야함</p>
+<ol class="arabic simple">
+<li><p>I prefer set A</p></li>
+<li><p>I am Indifferent</p></li>
+<li><p>I prefer set B</p></li>
+</ol>
+<figure class="align-default" id="id8">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/imagen_9.png"><img alt="imagen_9" class="bg-primary mb-1" src="../../_images/imagen_9.png" style="width: 550px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 218 </span><span class="caption-text">Screenshot of DrawBench dataset</span><a class="headerlink" href="#id8" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</section>
+</section>
+<section id="results">
+<h2>Results<a class="headerlink" href="#results" title="Permalink to this heading">#</a></h2>
+<ul class="simple">
+<li><p>Figure 2 에서는 DrawBench에서 나온 결과를 체리피킹 없이 보여준다.</p></li>
+<li><p>아마 저자들은 체리피킹 없이도 좋은 결과를 보여주고, 다양한 카테고리에서도 훌륭한 이미지를 생성 할 수 있다는 주장인 것 같다.</p></li>
+</ul>
+<figure class="align-default" id="id9">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/imagen_10.png"><img alt="imagen_10" class="bg-primary mb-1" src="../../_images/imagen_10.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 219 </span><span class="caption-text">Result of Imagen in DrawBench dataset</span><a class="headerlink" href="#id9" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p>Zero-shot 으로 한 FID값이 MS-COCO로 학습한 모델들 FID 보다 높음.</p></li>
+<li><p>Table 2 에서는 Imagen이 no people (사람이 없는 사진) 에는 photorealism 점수가 올라감
+→ Imagen 은 photorealistic people을 생성하기에 한계가 있음.</p></li>
+</ul>
+<figure class="align-default" id="id10">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/imagen_11.png"><img alt="imagen_11" class="bg-primary mb-1" src="../../_images/imagen_11.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 220 </span><span class="caption-text">Result Table of Imagen</span><a class="headerlink" href="#id10" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<section id="qualitative-result-table-of-imagen-from-human-evaluators">
+<h3>Qualitative Result Table of Imagen from Human Evaluators<a class="headerlink" href="#qualitative-result-table-of-imagen-from-human-evaluators" title="Permalink to this heading">#</a></h3>
+<ul class="simple">
+<li><p>Human raters (사람 평가자) 들은 T5-XXL로 text encoding 한 text-to-image generation 모델을 CLIP-based 보다 더 선호함</p></li>
+<li><p>기본적으로 Imagen 은 다른 text-to-image generation 모델에서 (SOTA 모델인 DALL-E 2) 보다도 human raters 에서 DrawBench 데이터셋에서 좋은 평가를 받음</p></li>
+</ul>
+<figure class="align-default" id="id11">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/imagen_12.png"><img alt="imagen_12" class="bg-primary mb-1" src="../../_images/imagen_12.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 221 </span><span class="caption-text">Qualitative Result Table of Imagen from Human evaulators</span><a class="headerlink" href="#id11" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</section>
+</section>
+<section id="ablation-study">
+<h2>Ablation Study<a class="headerlink" href="#ablation-study" title="Permalink to this heading">#</a></h2>
+<ul class="simple">
+<li><p>Scaling text encoder size 가 U-Net size scaling 보다 더 중요함</p></li>
+<li><p>(a)의 text encoder 사이즈의 변화가 FID 및 CLIP score 점수에 더욱 많은 영향을 끼침</p></li>
+<li><p>Dynamic thresholding 이 performance boost에 더욱 영향을 끼침</p></li>
+<li><p>Dynamic thresholding을 이용하면 성능을 더욱 끌어 올릴 수 있음</p></li>
+</ul>
+<figure class="align-default" id="id12">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/imagen_13.png"><img alt="imagen_13" class="bg-primary mb-1" src="../../_images/imagen_13.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 222 </span><span class="caption-text">Qualitative Result Table of Imagen from Human evaulators</span><a class="headerlink" href="#id12" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</section>
+<section id="conclusion">
+<h2>Conclusion<a class="headerlink" href="#conclusion" title="Permalink to this heading">#</a></h2>
+<ul class="simple">
+<li><p>Frozen large pretrained language model shows better performance over text-image paired multimodal encoders such as CLIP in text-to-image generation task</p></li>
+<li><p>Efficient U-Net significantly improves performance time</p></li>
+<li><p>Dynamic thresholding allows usage of much higher guidance weights with better fidelity of generated images</p></li>
+</ul>
+</section>
+</section>
+
+    <script type="text/x-thebe-config">
+    {
+        requestKernel: true,
+        binderOptions: {
+            repo: "binder-examples/jupyter-stacks-datascience",
+            ref: "master",
+        },
+        codeMirrorConfig: {
+            theme: "abcdef",
+            mode: "python"
+        },
+        kernelOptions: {
+            name: "python3",
+            path: "./docs\review"
+        },
+        predefinedOutput: true
+    }
+    </script>
+    <script>kernelName = 'python3'</script>
+
+                </article>
+              
+
+              
+              
+              
+              
+                <footer class="prev-next-footer d-print-none">
+                  
+<div class="prev-next-area">
+    <a class="left-prev"
+       href="StyO.html"
+       title="previous page">
+      <i class="fa-solid fa-angle-left"></i>
+      <div class="prev-next-info">
+        <p class="prev-next-subtitle">previous</p>
+        <p class="prev-next-title">StyO</p>
+      </div>
+    </a>
+    <a class="right-next"
+       href="imagen_editor.html"
+       title="next page">
+      <div class="prev-next-info">
+        <p class="prev-next-subtitle">next</p>
+        <p class="prev-next-title">Imagen Editor</p>
+      </div>
+      <i class="fa-solid fa-angle-right"></i>
+    </a>
+</div>
+                </footer>
+              
+            </div>
+            
+            
+              
+                <div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
+
+
+  <div class="sidebar-secondary-item">
+  <div class="page-toc tocsection onthispage">
+    <i class="fa-solid fa-list"></i> Contents
+  </div>
+  <nav class="bd-toc-nav page-toc">
+    <ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">Introduction</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#contributions">Contributions</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#methodology">Methodology</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#pretrained-t5-xxl-cascaded-diffusion-model">Pretrained T5-XXL + Cascaded Diffusion Model</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#classifier-free-guidance">Classifier-Free Guidance</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#large-guidance-weight-sampler">Large guidance weight sampler</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#static-thresholding">Static Thresholding</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#dynamic-thresholding">Dynamic Thresholding</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#super-resolution-models">Super Resolution Models</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#drawbench">DrawBench</a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#results">Results</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#qualitative-result-table-of-imagen-from-human-evaluators">Qualitative Result Table of Imagen from Human Evaluators</a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#ablation-study">Ablation Study</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#conclusion">Conclusion</a></li>
+</ul>
+  </nav></div>
+
+</div></div>
+              
+            
+          </div>
+          <footer class="bd-footer-content">
+            
+<div class="bd-footer-content__inner container">
+  
+  <div class="footer-item">
+    
+<p class="component-author">
+By PseudoLab
+</p>
+
+  </div>
+  
+  <div class="footer-item">
+    
+
+  <p class="copyright">
+    
+      © Copyright 2022.
+      <br/>
+    
+  </p>
+
+  </div>
+  
+  <div class="footer-item">
+    
+  </div>
+  
+  <div class="footer-item">
+    
+  </div>
+  
+</div>
+          </footer>
+        
+
+      </main>
+    </div>
+  </div>
+  
+  <!-- Scripts loaded after <body> so the DOM is not blocked -->
+  <script src="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b"></script>
+<script src="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b"></script>
+
+  <footer class="bd-footer">
+  </footer>
+  </body>
 </html>
\ No newline at end of file
diff --git a/docs/review/imagen_editor.html b/docs/review/imagen_editor.html
old mode 100644
new mode 100755
index b4b19606..01b8032a
--- a/docs/review/imagen_editor.html
+++ b/docs/review/imagen_editor.html
@@ -1,693 +1,713 @@
-
-
-<!DOCTYPE html>
-
-
-<html lang="en" data-content_root="" >
-
-  <head>
-    <meta charset="utf-8" />
-    <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.18.1: http://docutils.sourceforge.net/" />
-
-    <title>Imagen Editor &#8212; Text-to-Image Generation-feat-Diffusion</title>
-  
-  
-  
-  <script data-cfasync="false">
-    document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
-    document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
-  </script>
-  
-  <!-- Loaded before other Sphinx assets -->
-  <link href="../../_static/styles/theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-<link href="../../_static/styles/bootstrap.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-<link href="../../_static/styles/pydata-sphinx-theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-
-  
-  <link href="../../_static/vendor/fontawesome/6.5.2/css/all.min.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-  <link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.woff2" />
-<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.woff2" />
-<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.woff2" />
-
-    <link rel="stylesheet" type="text/css" href="../../_static/pygments.css" />
-    <link rel="stylesheet" href="../../_static/styles/sphinx-book-theme.css?digest=14f4ca6b54d191a8c7657f6c759bf11a5fb86285" type="text/css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/togglebutton.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/mystnb.4510f1fc1dee50b3e5859aac5469c37c29e427902b24a333a5f9fcb2f0b3ac41.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/sphinx-thebe.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/design-style.4045f2051d55cab465a707391d5b2007.min.css" />
-  
-  <!-- Pre-loaded scripts that we'll load fully later -->
-  <link rel="preload" as="script" href="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b" />
-<link rel="preload" as="script" href="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b" />
-  <script src="../../_static/vendor/fontawesome/6.5.2/js/all.min.js?digest=dfe6caa3a7d634c4db9b"></script>
-
-    <script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
-    <script src="../../_static/jquery.js"></script>
-    <script src="../../_static/underscore.js"></script>
-    <script src="../../_static/_sphinx_javascript_frameworks_compat.js"></script>
-    <script src="../../_static/doctools.js"></script>
-    <script src="../../_static/clipboard.min.js"></script>
-    <script src="../../_static/copybutton.js"></script>
-    <script src="../../_static/scripts/sphinx-book-theme.js?digest=5a5c038af52cf7bc1a1ec88eea08e6366ee68824"></script>
-    <script>let toggleHintShow = 'Click to show';</script>
-    <script>let toggleHintHide = 'Click to hide';</script>
-    <script>let toggleOpenOnPrint = 'true';</script>
-    <script src="../../_static/togglebutton.js"></script>
-    <script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
-    <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
-    <script src="../../_static/design-tabs.js"></script>
-    <script>const THEBE_JS_URL = "https://unpkg.com/thebe@0.8.2/lib/index.js"
-const thebe_selector = ".thebe,.cell"
-const thebe_selector_input = "pre"
-const thebe_selector_output = ".output, .cell_output"
-</script>
-    <script async="async" src="../../_static/sphinx-thebe.js"></script>
-    <script>DOCUMENTATION_OPTIONS.pagename = 'docs/review/imagen_editor';</script>
-    <link rel="shortcut icon" href="../../_static/PseudoLab_logo.png"/>
-    <link rel="index" title="Index" href="../../genindex.html" />
-    <link rel="search" title="Search" href="../../search.html" />
-    <link rel="next" title="SDEdit" href="SDEdit.html" />
-    <link rel="prev" title="Imagen" href="imagen.html" />
-  <meta name="viewport" content="width=device-width, initial-scale=1"/>
-  <meta name="docsearch:language" content="en"/>
-  </head>
-  
-  
-  <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
-
-  
-  
-  <div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
-  
-  <div id="pst-scroll-pixel-helper"></div>
-  
-  <button type="button" class="btn rounded-pill" id="pst-back-to-top">
-    <i class="fa-solid fa-arrow-up"></i>Back to top</button>
-
-  
-  <input type="checkbox"
-          class="sidebar-toggle"
-          id="pst-primary-sidebar-checkbox"/>
-  <label class="overlay overlay-primary" for="pst-primary-sidebar-checkbox"></label>
-  
-  <input type="checkbox"
-          class="sidebar-toggle"
-          id="pst-secondary-sidebar-checkbox"/>
-  <label class="overlay overlay-secondary" for="pst-secondary-sidebar-checkbox"></label>
-  
-  <div class="search-button__wrapper">
-    <div class="search-button__overlay"></div>
-    <div class="search-button__search-container">
-<form class="bd-search d-flex align-items-center"
-      action="../../search.html"
-      method="get">
-  <i class="fa-solid fa-magnifying-glass"></i>
-  <input type="search"
-         class="form-control"
-         name="q"
-         id="search-input"
-         placeholder="Search this book..."
-         aria-label="Search this book..."
-         autocomplete="off"
-         autocorrect="off"
-         autocapitalize="off"
-         spellcheck="false"/>
-  <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
-</form></div>
-  </div>
-
-  <div class="pst-async-banner-revealer d-none">
-  <aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
-</div>
-
-  
-    <header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
-    </header>
-  
-
-  <div class="bd-container">
-    <div class="bd-container__inner bd-page-width">
-      
-      
-      
-      <div class="bd-sidebar-primary bd-sidebar">
-        
-
-  
-  <div class="sidebar-header-items sidebar-primary__section">
-    
-    
-    
-    
-  </div>
-  
-    <div class="sidebar-primary-items__start sidebar-primary__section">
-        <div class="sidebar-primary-item">
-
-  
-    
-  
-
-<a class="navbar-brand logo" href="../../intro.html">
-  
-  
-  
-  
-  
-    
-    
-      
-    
-    
-    <img src="../../_static/PseudoLab_logo.png" class="logo__image only-light" alt="Text-to-Image Generation-feat-Diffusion - Home"/>
-    <script>document.write(`<img src="../../_static/PseudoLab_logo.png" class="logo__image only-dark" alt="Text-to-Image Generation-feat-Diffusion - Home"/>`);</script>
-  
-  
-</a></div>
-        <div class="sidebar-primary-item"><nav class="bd-links" id="bd-docs-nav" aria-label="Main">
-    <div class="bd-toc-item navbar-nav active">
-        
-        <ul class="nav bd-sidenav bd-sidenav__home-link">
-            <li class="toctree-l1">
-                <a class="reference internal" href="../../intro.html">
-                    Welcome to PseudoDiffusers!!
-                </a>
-            </li>
-        </ul>
-        <p aria-level="2" class="caption" role="heading"><span class="caption-text">Preliminary Works</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="vae.html">VAE</a></li>
-<li class="toctree-l1"><a class="reference internal" href="gan.html">GAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DDPM.html">DDPM</a></li>
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="DDIM.html">DDIM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="A_Study_on_the_Evaluation_of_Generative_Models.html">A Study on the Evaluation of Generative Models</a></li>
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Image Generation</span></p>
-<ul class="current nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="cycleGAN.html">CycleGAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="StyleGAN.html">StyleGAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="diffusion_beats_GANs.html">Diffusion Models Beat GANs on Image Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="dalle.html">DALL-E</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DALLE2.html">DALL-E 2</a></li>
-<li class="toctree-l1"><a class="reference internal" href="dreambooth.html">DreamBooth</a></li>
-<li class="toctree-l1"><a class="reference internal" href="ControlNet.html">ControlNet</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Latent_Diffusion_Model.html">Introduction</a></li>
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="Textual_Inversion.html">Textual Inversion</a></li>
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="CustomDiffusion.html">Custom Diffusion</a></li>
-<li class="toctree-l1"><a class="reference internal" href="LoRA.html">LoRA</a></li>
-
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="I-DDPM.html">I-DDPM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="StyO.html">StyO</a></li>
-<li class="toctree-l1"><a class="reference internal" href="imagen.html">Imagen</a></li>
-<li class="toctree-l1 current active"><a class="current reference internal" href="#">Imagen Editor</a></li>
-<li class="toctree-l1"><a class="reference internal" href="SDEdit.html">SDEdit</a></li>
-<li class="toctree-l1"><a class="reference internal" href="SDXL.html">SDXL</a></li>
-<li class="toctree-l1"><a class="reference internal" href="t2i_adapter.html">T2I-Adapter</a></li>
-<li class="toctree-l1"><a class="reference internal" href="IP_Adapter.html">IP-Adapter</a></li>
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="HyperDreamBooth.html">HyperDreamBooth</a></li>
-<li class="toctree-l1"><a class="reference internal" href="CM3leon.html">CM3leon</a></li>
-
-<li class="toctree-l1"><a class="reference internal" href="Synthetic_Data_from_Diffusion_Models_Improves_ImageNet_Classification.html">Synthetic Data from Diffusion Models Improves ImageNet Classification</a></li>
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="GLIDE.html">GLIDE</a></li>
-<li class="toctree-l1"><a class="reference internal" href="BBDM.html">BBDM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Your_Diffusion_Model_is_Secretly_a_Zero_Shot_Classifier.html">Your Diffusion Model is Secretly a Zero-Shot Classifier</a></li>
-<li class="toctree-l1"><a class="reference internal" href="progressive_distillation.html">Progressive Distillation for Fast Sampling of Diffusion Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="ConceptLab.html">ConceptLab</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Diffusion_models_already_have_a_Semantic_Latent_Space.html">Diffusion Models already have a Semantic Latent Space</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Muse.html">Muse</a></li>
-
-
-<li class="toctree-l1"><a class="reference internal" href="GIGAGAN.html">Scaling up GANs for Text-to-Image Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="consistency_models.html">Consistency Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="latent_consistency_models.html">Latent Consistency Models</a></li>
-
-<li class="toctree-l1"><a class="reference internal" href="LLM_grounded_Diffusion.html">LLM Grounded Diffusion</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DiT.html">DiT</a></li>
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="one-step-image-translation.html">One-Step Image Translation with Text-to-Image Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="LCM-LoRA.html">LCM-LoRA: A Universal Stable-Diffusion Acceleration Module</a></li>
-
-
-<li class="toctree-l1"><a class="reference internal" href="MimicBrush.html">MimicBrush: Zero-shot Image Editing with Reference Imitation</a></li>
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Video Generation</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="Make_A_Video.html">Make A Video</a></li>
-<li class="toctree-l1"><a class="reference internal" href="VideoLDM.html">VideoLDM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="AnimateDiff.html">AnimateDiff</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Animate_Anyone.html">Animate Anyone</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreaMoving.html">DreaMoving</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreamPose.html">DreamPose: Fashion Image-to-Video Synthesis via Stable Diffusion</a></li>
-
-
-
-
-
-
-
-
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">3D Generation</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="NeRF.html">NeRF : Representing Scenes as Neural Radiance Fields for View Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="3DGS.html">3D Gaussian Splatting for Real-Time Radiance Field Rendering</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Point_E.html">Point-E: A System for Generating 3D Point Clouds from Complex Prompts (Arxiv 2022)</a></li>
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="Shap-E.html">Shap-E</a></li>
-
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="DreamFusion.html"><strong>DreamFusion</strong></a></li>
-<li class="toctree-l1"><a class="reference internal" href="magic-3d.html">Magic3D</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreamBooth3D.html">Dream Booth 3D</a></li>
-
-
-
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Experiments</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="../experiments/js_exp.html">Synthetic Data with Stable Diffusion for Foliar Disease Classification</a></li>
-<li class="toctree-l1"><a class="reference internal" href="../experiments/swjo_exp.html">Training DreamBooth on Naver Webtoon Face Dataset</a></li>
-</ul>
-
-    </div>
-</nav></div>
-    </div>
-  
-  
-  <div class="sidebar-primary-items__end sidebar-primary__section">
-  </div>
-  
-  <div id="rtd-footer-container"></div>
-
-
-      </div>
-      
-      <main id="main-content" class="bd-main" role="main">
-        
-        
-
-<div class="sbt-scroll-pixel-helper"></div>
-
-          <div class="bd-content">
-            <div class="bd-article-container">
-              
-              <div class="bd-header-article d-print-none">
-<div class="header-article-items header-article__inner">
-  
-    <div class="header-article-items__start">
-      
-        <div class="header-article-item"><label class="sidebar-toggle primary-toggle btn btn-sm" for="__primary" title="Toggle primary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
-  <span class="fa-solid fa-bars"></span>
-</label></div>
-      
-    </div>
-  
-  
-    <div class="header-article-items__end">
-      
-        <div class="header-article-item">
-
-<div class="article-header-buttons">
-
-
-
-
-
-<div class="dropdown dropdown-source-buttons">
-  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Source repositories">
-    <i class="fab fa-github"></i>
-  </button>
-  <ul class="dropdown-menu">
-      
-      
-      
-      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion" target="_blank"
-   class="btn btn-sm btn-source-repository-button dropdown-item"
-   title="Source repository"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fab fa-github"></i>
-  </span>
-<span class="btn__text-container">Repository</span>
-</a>
-</li>
-      
-      
-      
-      
-      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion/issues/new?title=Issue%20on%20page%20%2Fdocs/review/imagen_editor.html&body=Your%20issue%20content%20here." target="_blank"
-   class="btn btn-sm btn-source-issues-button dropdown-item"
-   title="Open an issue"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-lightbulb"></i>
-  </span>
-<span class="btn__text-container">Open issue</span>
-</a>
-</li>
-      
-  </ul>
-</div>
-
-
-
-
-
-
-<div class="dropdown dropdown-download-buttons">
-  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Download this page">
-    <i class="fas fa-download"></i>
-  </button>
-  <ul class="dropdown-menu">
-      
-      
-      
-      <li><a href="../../_sources/docs/review/imagen_editor.md" target="_blank"
-   class="btn btn-sm btn-download-source-button dropdown-item"
-   title="Download source file"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-file"></i>
-  </span>
-<span class="btn__text-container">.md</span>
-</a>
-</li>
-      
-      
-      
-      
-      <li>
-<button onclick="window.print()"
-  class="btn btn-sm btn-download-pdf-button dropdown-item"
-  title="Print to PDF"
-  data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-file-pdf"></i>
-  </span>
-<span class="btn__text-container">.pdf</span>
-</button>
-</li>
-      
-  </ul>
-</div>
-
-
-
-
-<button onclick="toggleFullScreen()"
-  class="btn btn-sm btn-fullscreen-button"
-  title="Fullscreen mode"
-  data-bs-placement="bottom" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-expand"></i>
-  </span>
-
-</button>
-
-
-
-<script>
-document.write(`
-  <button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light"></i>
-    <i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark"></i>
-    <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"></i>
-  </button>
-`);
-</script>
-
-
-<script>
-document.write(`
-  <button class="btn btn-sm pst-navbar-icon search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <i class="fa-solid fa-magnifying-glass fa-lg"></i>
-  </button>
-`);
-</script>
-
-</div></div>
-      
-    </div>
-  
-</div>
-</div>
-              
-              
-
-<div id="jb-print-docs-body" class="onlyprint">
-    <h1>Imagen Editor</h1>
-    <!-- Table of contents -->
-    <div id="print-main-content">
-        <div id="jb-print-toc">
-            
-        </div>
-    </div>
-</div>
-
-              
-                
-<div id="searchbox"></div>
-                <article class="bd-article">
-                  
-  <div class="admonition-information admonition">
-<p class="admonition-title">Information</p>
-<ul class="simple">
-<li><p><strong>Title:</strong> Imagen Editor and EditBench: Advancing and Evaluating Text-Guided Image Inpainting (CVPR 2023)</p></li>
-<li><p><strong>Reference</strong></p>
-<ul>
-<li><p>Paper: <a class="reference external" href="https://arxiv.org/pdf/2212.06909">https://arxiv.org/pdf/2212.06909</a></p></li>
-</ul>
-</li>
-<li><p><strong>Author:</strong> Sangwoo Jo</p></li>
-<li><p><strong>Last updated on Sep. 06, 2023</strong></p></li>
-</ul>
-</div>
-<section class="tex2jax_ignore mathjax_ignore" id="imagen-editor">
-<h1>Imagen Editor<a class="headerlink" href="#imagen-editor" title="Permalink to this heading">#</a></h1>
-<p>이번 시간에는 Google Research 에서 소개하는 Imagen 모델 기반의 text-guided image inpainting 모델 Imagen Editor 와 text-guided impainting 의 평가기법 EditBench 에 대해 알아볼 예정입니다.</p>
-<p>Text-guided image inpainting 에서 기존에는 mask 영역을 random 하게 지정하여 학습을 진행했습니다. 이는 입력된 text prompt 와 무관한 영역을 masking 하게 됨으로써 모델이 prompt 를 참조하지 않고 오로지 image content 만으로 학습하게 되는 현상이 발생합니다. Imagen Editor 는 이를 해결하기 위해 Object Masking 기법을 소개합니다. Prompt 에 해당하는 객체 전체를 masking 함으로써 모델이 text prompt 를 더 참조할 수 있도록 유도하는 것이 목표입니다. SSD MobileNet v2 모델을 Object Detector 로 사용함으로써 모델 성능이 크게 개선되는 부분을 확인할 수 있었다고 합니다.</p>
-<figure class="align-default" id="id1">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/imagen_editor_01.png"><img alt="imagen_editor_01" class="bg-primary mb-1" src="../../_images/imagen_editor_01.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 223 </span><span class="caption-text">Effect of Object Masking</span><a class="headerlink" href="#id1" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>Imagen Editor 에서 또 다른 특징은 Imagen 모델 기반의 cascaded diffusion model architecture 를 지니고 있다는 점입니다. 이때, SR3, Palette, GLIDE 와 유사하게 이미지와 mask 가 Encoder 를 거친 후, diffusion latent 와 concatenate 하면서 conditioning input 으로 들어가게 되며, 모두 1024x1024 해상도를 가진다고 합니다. 따라서, base diffusion 64x64 모델 그리고 64x64 → 256x256 super resolution 모델에 입력 시, downsampling 작업 후 모델 input 으로 입력합니다. 또한, conditioning 이미지와 mask 없을 시 Imagen 모델을 사용하는 것과 동일한 효과를 내기 위해, 새로 추가되는 input channel weights 는 0으로 초기화해서 학습을 진행했다고 소개합니다.</p>
-<figure class="align-default" id="id2">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/imagen_editor_02.png"><img alt="imagen_editor_02" class="bg-primary mb-1" src="../../_images/imagen_editor_02.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 224 </span><span class="caption-text">Imagen Editor Architecture</span><a class="headerlink" href="#id2" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>Imagen 에서 소개되었던 Classifier-Free Guidance 를 동일하게 사용하고, 이때 guidance weight 를 1부터 30 까지 범위 내에서 변화시키는 oscillating guidance 기법을 적용함으로써 생성된 이미지 퀄리티 및 text-image alignment 가 상승되는 효과를 볼 수 있었다고 합니다.</p>
-<p>논문에서는 Imagen Editor 와 같은 text-guided image inpainting 모델들을 평가할 수 있는 새로운 benchmark EditBench 를 제시합니다. 240개의 (image, mask) 쌍으로 데이터셋이 구축되어있고, 각 쌍마다 3가지의 prompt 로 생성된 이미지로 사람이 모델 성능을 측정하게 됩니다. Automatic Evaluation Metric 으로는 CLIPScore, 그리고 CLIP-R-Prec 를 사용했습니다.</p>
-<p>EditBench 이미지 데이터셋의 절반은 open source 로 공개된 computer vision 데이터셋으로부터 수집되었고, 나머지 절반은 text-to-image 모델로 생성해서 구축했습니다. 이때, <em>attribute-object-scene</em> 의 요소들을 모두 갖추도록 이미지들을 수집 및 생성했습니다.</p>
-<ul class="simple">
-<li><p>Attributes (material, color, shape, size, count)</p></li>
-<li><p>Objects (common, rare, text rendering)</p></li>
-<li><p>Scenes (indoor, outdoor, realistic, paintings)</p></li>
-</ul>
-<p>예를 들어서, ‘a=metal|o=cat|s=outdoor’ 요소들을 포함하는 문구를 ‘a metal cat standing in the middle of a farm field’ 처럼 생성하는 것입니다. 앞써 언급한 3가지 prompt 는 해당사진처럼 <em>Mask-Simple</em>, <em>Mask-Rich</em>, 그리고 <em>Full</em> 로 정의합니다.</p>
-<figure class="align-default" id="id3">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/imagen_editor_03.png"><img alt="imagen_editor_03" class="bg-primary mb-1" src="../../_images/imagen_editor_03.png" style="width: 600px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 225 </span><span class="caption-text">EditBench example</span><a class="headerlink" href="#id3" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>데이터셋 구축시, mask 크기도 다양하게 설정하여 mask 크기에 따른 모델 성능도 확인할 수 있었습니다. 성능을 측정해본 결과, Object masking 으로 학습한 모델이 random masking 으로 학습한 모델보다 small/medium masks 에서 성능적으로 월등히 좋다는 것을 확인할 수 있습니다.</p>
-<figure class="align-default" id="id4">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/imagen_editor_04.png"><img alt="imagen_editor_04" class="bg-primary mb-1" src="../../_images/imagen_editor_04.png" style="width: 500px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 226 </span><span class="caption-text">Human Evaluations on EditBench</span><a class="headerlink" href="#id4" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>또한, object-rendering 에 비해 text-rendering 성능이 저하되는 부분을 확인할 수 있고, material/color/size 속성보다 count/size 속성에 더 취약한 부분도 확인할 수 있었습니다.</p>
-<figure class="align-default" id="id5">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/imagen_editor_05.png"><img alt="imagen_editor_05" class="bg-primary mb-1" src="../../_images/imagen_editor_05.png" style="width: 500px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 227 </span><span class="caption-text">Imagen Editor failure cases by attribute</span><a class="headerlink" href="#id5" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>마지막으로, 동일한 prompt 에 대해 Stable Diffusion, DALL-E2, Imagen Editor 모델로 inpainting 한 결과를 비교한 예시 사진입니다.</p>
-<figure class="align-default" id="id6">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/imagen_editor_06.png"><img alt="imagen_editor_06" class="bg-primary mb-1" src="../../_images/imagen_editor_06.png" style="width: 500px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 228 </span><span class="caption-text">Example model outputs for Mask-Simple vs MaskRich prompts</span><a class="headerlink" href="#id6" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</section>
-
-    <script type="text/x-thebe-config">
-    {
-        requestKernel: true,
-        binderOptions: {
-            repo: "binder-examples/jupyter-stacks-datascience",
-            ref: "master",
-        },
-        codeMirrorConfig: {
-            theme: "abcdef",
-            mode: "python"
-        },
-        kernelOptions: {
-            name: "python3",
-            path: "./docs/review"
-        },
-        predefinedOutput: true
-    }
-    </script>
-    <script>kernelName = 'python3'</script>
-
-                </article>
-              
-
-              
-              
-              
-              
-                <footer class="prev-next-footer d-print-none">
-                  
-<div class="prev-next-area">
-    <a class="left-prev"
-       href="imagen.html"
-       title="previous page">
-      <i class="fa-solid fa-angle-left"></i>
-      <div class="prev-next-info">
-        <p class="prev-next-subtitle">previous</p>
-        <p class="prev-next-title">Imagen</p>
-      </div>
-    </a>
-    <a class="right-next"
-       href="SDEdit.html"
-       title="next page">
-      <div class="prev-next-info">
-        <p class="prev-next-subtitle">next</p>
-        <p class="prev-next-title">SDEdit</p>
-      </div>
-      <i class="fa-solid fa-angle-right"></i>
-    </a>
-</div>
-                </footer>
-              
-            </div>
-            
-            
-              
-            
-          </div>
-          <footer class="bd-footer-content">
-            
-<div class="bd-footer-content__inner container">
-  
-  <div class="footer-item">
-    
-<p class="component-author">
-By PseudoLab
-</p>
-
-  </div>
-  
-  <div class="footer-item">
-    
-
-  <p class="copyright">
-    
-      © Copyright 2022.
-      <br/>
-    
-  </p>
-
-  </div>
-  
-  <div class="footer-item">
-    
-  </div>
-  
-  <div class="footer-item">
-    
-  </div>
-  
-</div>
-          </footer>
-        
-
-      </main>
-    </div>
-  </div>
-  
-  <!-- Scripts loaded after <body> so the DOM is not blocked -->
-  <script src="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b"></script>
-<script src="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b"></script>
-
-  <footer class="bd-footer">
-  </footer>
-  </body>
+
+
+<!DOCTYPE html>
+
+
+<html lang="en" data-content_root="" >
+
+  <head>
+    <meta charset="utf-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.17.1: http://docutils.sourceforge.net/" />
+
+    <title>Imagen Editor &#8212; Text-to-Image Generation-feat-Diffusion</title>
+  
+  
+  
+  <script data-cfasync="false">
+    document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
+    document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
+  </script>
+  
+  <!-- Loaded before other Sphinx assets -->
+  <link href="../../_static/styles/theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+<link href="../../_static/styles/bootstrap.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+<link href="../../_static/styles/pydata-sphinx-theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+
+  
+  <link href="../../_static/vendor/fontawesome/6.5.2/css/all.min.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+  <link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.woff2" />
+<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.woff2" />
+<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.woff2" />
+
+    <link rel="stylesheet" type="text/css" href="../../_static/pygments.css" />
+    <link rel="stylesheet" href="../../_static/styles/sphinx-book-theme.css?digest=14f4ca6b54d191a8c7657f6c759bf11a5fb86285" type="text/css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/togglebutton.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/mystnb.4510f1fc1dee50b3e5859aac5469c37c29e427902b24a333a5f9fcb2f0b3ac41.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/sphinx-thebe.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/design-style.4045f2051d55cab465a707391d5b2007.min.css" />
+  
+  <!-- Pre-loaded scripts that we'll load fully later -->
+  <link rel="preload" as="script" href="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b" />
+<link rel="preload" as="script" href="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b" />
+  <script src="../../_static/vendor/fontawesome/6.5.2/js/all.min.js?digest=dfe6caa3a7d634c4db9b"></script>
+
+    <script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
+    <script src="../../_static/jquery.js"></script>
+    <script src="../../_static/underscore.js"></script>
+    <script src="../../_static/_sphinx_javascript_frameworks_compat.js"></script>
+    <script src="../../_static/doctools.js"></script>
+    <script src="../../_static/clipboard.min.js"></script>
+    <script src="../../_static/copybutton.js"></script>
+    <script src="../../_static/scripts/sphinx-book-theme.js?digest=5a5c038af52cf7bc1a1ec88eea08e6366ee68824"></script>
+    <script>let toggleHintShow = 'Click to show';</script>
+    <script>let toggleHintHide = 'Click to hide';</script>
+    <script>let toggleOpenOnPrint = 'true';</script>
+    <script src="../../_static/togglebutton.js"></script>
+    <script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
+    <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
+    <script src="../../_static/design-tabs.js"></script>
+    <script>const THEBE_JS_URL = "https://unpkg.com/thebe@0.8.2/lib/index.js"
+const thebe_selector = ".thebe,.cell"
+const thebe_selector_input = "pre"
+const thebe_selector_output = ".output, .cell_output"
+</script>
+    <script async="async" src="../../_static/sphinx-thebe.js"></script>
+    <script>DOCUMENTATION_OPTIONS.pagename = 'docs/review/imagen_editor';</script>
+    <link rel="shortcut icon" href="../../_static/PseudoLab_logo.png"/>
+    <link rel="index" title="Index" href="../../genindex.html" />
+    <link rel="search" title="Search" href="../../search.html" />
+    <link rel="next" title="SDEdit" href="SDEdit.html" />
+    <link rel="prev" title="Imagen" href="imagen.html" />
+  <meta name="viewport" content="width=device-width, initial-scale=1"/>
+  <meta name="docsearch:language" content="en"/>
+  </head>
+  
+  
+  <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
+
+  
+  
+  <div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
+  
+  <div id="pst-scroll-pixel-helper"></div>
+  
+  <button type="button" class="btn rounded-pill" id="pst-back-to-top">
+    <i class="fa-solid fa-arrow-up"></i>Back to top</button>
+
+  
+  <input type="checkbox"
+          class="sidebar-toggle"
+          id="pst-primary-sidebar-checkbox"/>
+  <label class="overlay overlay-primary" for="pst-primary-sidebar-checkbox"></label>
+  
+  <input type="checkbox"
+          class="sidebar-toggle"
+          id="pst-secondary-sidebar-checkbox"/>
+  <label class="overlay overlay-secondary" for="pst-secondary-sidebar-checkbox"></label>
+  
+  <div class="search-button__wrapper">
+    <div class="search-button__overlay"></div>
+    <div class="search-button__search-container">
+<form class="bd-search d-flex align-items-center"
+      action="../../search.html"
+      method="get">
+  <i class="fa-solid fa-magnifying-glass"></i>
+  <input type="search"
+         class="form-control"
+         name="q"
+         id="search-input"
+         placeholder="Search this book..."
+         aria-label="Search this book..."
+         autocomplete="off"
+         autocorrect="off"
+         autocapitalize="off"
+         spellcheck="false"/>
+  <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
+</form></div>
+  </div>
+
+  <div class="pst-async-banner-revealer d-none">
+  <aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
+</div>
+
+  
+    <header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
+    </header>
+  
+
+  <div class="bd-container">
+    <div class="bd-container__inner bd-page-width">
+      
+      
+      
+      <div class="bd-sidebar-primary bd-sidebar">
+        
+
+  
+  <div class="sidebar-header-items sidebar-primary__section">
+    
+    
+    
+    
+  </div>
+  
+    <div class="sidebar-primary-items__start sidebar-primary__section">
+        <div class="sidebar-primary-item">
+
+  
+    
+  
+
+<a class="navbar-brand logo" href="../../intro.html">
+  
+  
+  
+  
+  
+    
+    
+      
+    
+    
+    <img src="../../_static/PseudoLab_logo.png" class="logo__image only-light" alt="Text-to-Image Generation-feat-Diffusion - Home"/>
+    <script>document.write(`<img src="../../_static/PseudoLab_logo.png" class="logo__image only-dark" alt="Text-to-Image Generation-feat-Diffusion - Home"/>`);</script>
+  
+  
+</a></div>
+        <div class="sidebar-primary-item"><nav class="bd-links" id="bd-docs-nav" aria-label="Main">
+    <div class="bd-toc-item navbar-nav active">
+        
+        <ul class="nav bd-sidenav bd-sidenav__home-link">
+            <li class="toctree-l1">
+                <a class="reference internal" href="../../intro.html">
+                    Welcome to PseudoDiffusers!!
+                </a>
+            </li>
+        </ul>
+        <p aria-level="2" class="caption" role="heading"><span class="caption-text">Preliminary Works</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="vae.html">VAE</a></li>
+<li class="toctree-l1"><a class="reference internal" href="gan.html">GAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DDPM.html">DDPM</a></li>
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DDIM.html">DDIM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="A_Study_on_the_Evaluation_of_Generative_Models.html">A Study on the Evaluation of Generative Models</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Image Generation</span></p>
+<ul class="current nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="cycleGAN.html">CycleGAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="StyleGAN.html">StyleGAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="diffusion_beats_GANs.html">Diffusion Models Beat GANs on Image Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="dalle.html">DALL-E</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DALLE2.html">DALL-E 2</a></li>
+<li class="toctree-l1"><a class="reference internal" href="dreambooth.html">DreamBooth</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ControlNet.html">ControlNet</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Latent_Diffusion_Model.html">Introduction</a></li>
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Textual_Inversion.html">Textual Inversion</a></li>
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="CustomDiffusion.html">Custom Diffusion</a></li>
+<li class="toctree-l1"><a class="reference internal" href="LoRA.html">LoRA</a></li>
+
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="I-DDPM.html">I-DDPM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="StyO.html">StyO</a></li>
+<li class="toctree-l1"><a class="reference internal" href="imagen.html">Imagen</a></li>
+<li class="toctree-l1 current active"><a class="current reference internal" href="#">Imagen Editor</a></li>
+<li class="toctree-l1"><a class="reference internal" href="SDEdit.html">SDEdit</a></li>
+<li class="toctree-l1"><a class="reference internal" href="SDXL.html">SDXL</a></li>
+<li class="toctree-l1"><a class="reference internal" href="t2i_adapter.html">T2I-Adapter</a></li>
+<li class="toctree-l1"><a class="reference internal" href="IP_Adapter.html">IP-Adapter</a></li>
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="HyperDreamBooth.html">HyperDreamBooth</a></li>
+<li class="toctree-l1"><a class="reference internal" href="CM3leon.html">CM3leon</a></li>
+
+<li class="toctree-l1"><a class="reference internal" href="Synthetic_Data_from_Diffusion_Models_Improves_ImageNet_Classification.html">Synthetic Data from Diffusion Models Improves ImageNet Classification</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="GLIDE.html">GLIDE</a></li>
+<li class="toctree-l1"><a class="reference internal" href="BBDM.html">BBDM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Your_Diffusion_Model_is_Secretly_a_Zero_Shot_Classifier.html">Your Diffusion Model is Secretly a Zero-Shot Classifier</a></li>
+<li class="toctree-l1"><a class="reference internal" href="progressive_distillation.html">Progressive Distillation for Fast Sampling of Diffusion Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ConceptLab.html">ConceptLab</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Diffusion_models_already_have_a_Semantic_Latent_Space.html">Diffusion Models already have a Semantic Latent Space</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Muse.html">Muse</a></li>
+
+
+<li class="toctree-l1"><a class="reference internal" href="GIGAGAN.html">Scaling up GANs for Text-to-Image Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="consistency_models.html">Consistency Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="latent_consistency_models.html">Latent Consistency Models</a></li>
+
+<li class="toctree-l1"><a class="reference internal" href="LLM_grounded_Diffusion.html">LLM Grounded Diffusion</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DiT.html">DiT</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="one-step-image-translation.html">One-Step Image Translation with Text-to-Image Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="LCM-LoRA.html">LCM-LoRA: A Universal Stable-Diffusion Acceleration Module</a></li>
+
+
+<li class="toctree-l1"><a class="reference internal" href="MimicBrush.html">MimicBrush: Zero-shot Image Editing with Reference Imitation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="one_step_diffusion_with_distribution_matching_distillation.html">One-step Diffusion with Distribution Matching Distillation</a></li>
+
+
+
+
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Video Generation</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="Make_A_Video.html">Make A Video</a></li>
+<li class="toctree-l1"><a class="reference internal" href="VideoLDM.html">VideoLDM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="AnimateDiff.html">AnimateDiff</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Animate_Anyone.html">Animate Anyone</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreaMoving.html">DreaMoving</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreamPose.html">DreamPose: Fashion Image-to-Video Synthesis via Stable Diffusion</a></li>
+
+
+
+
+
+
+
+
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">3D Generation</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="NeRF.html">NeRF : Representing Scenes as Neural Radiance Fields for View Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="3DGS.html">3D Gaussian Splatting for Real-Time Radiance Field Rendering</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Point_E.html">Point-E: A System for Generating 3D Point Clouds from Complex Prompts (Arxiv 2022)</a></li>
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Shap-E.html">Shap-E</a></li>
+
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DreamFusion.html"><strong>DreamFusion</strong></a></li>
+<li class="toctree-l1"><a class="reference internal" href="magic-3d.html">Magic3D</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreamBooth3D.html">Dream Booth 3D</a></li>
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="zero123plus.html">Zero123++</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ProlificDreamer.html">ProlificDreamer: High-Fidelity and Diverse Text-to-3D Generation with Variational Score Distillation</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DreamGaussian.html">DreamGaussian</a></li>
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Coin3D.html">Coin3D</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Experiments</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="../experiments/js_exp.html">Synthetic Data with Stable Diffusion for Foliar Disease Classification</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../experiments/swjo_exp.html">Training DreamBooth on Naver Webtoon Face Dataset</a></li>
+</ul>
+
+    </div>
+</nav></div>
+    </div>
+  
+  
+  <div class="sidebar-primary-items__end sidebar-primary__section">
+  </div>
+  
+  <div id="rtd-footer-container"></div>
+
+
+      </div>
+      
+      <main id="main-content" class="bd-main" role="main">
+        
+        
+
+<div class="sbt-scroll-pixel-helper"></div>
+
+          <div class="bd-content">
+            <div class="bd-article-container">
+              
+              <div class="bd-header-article d-print-none">
+<div class="header-article-items header-article__inner">
+  
+    <div class="header-article-items__start">
+      
+        <div class="header-article-item"><label class="sidebar-toggle primary-toggle btn btn-sm" for="__primary" title="Toggle primary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
+  <span class="fa-solid fa-bars"></span>
+</label></div>
+      
+    </div>
+  
+  
+    <div class="header-article-items__end">
+      
+        <div class="header-article-item">
+
+<div class="article-header-buttons">
+
+
+
+
+
+<div class="dropdown dropdown-source-buttons">
+  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Source repositories">
+    <i class="fab fa-github"></i>
+  </button>
+  <ul class="dropdown-menu">
+      
+      
+      
+      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion" target="_blank"
+   class="btn btn-sm btn-source-repository-button dropdown-item"
+   title="Source repository"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fab fa-github"></i>
+  </span>
+<span class="btn__text-container">Repository</span>
+</a>
+</li>
+      
+      
+      
+      
+      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion/issues/new?title=Issue%20on%20page%20%2Fdocs/review/imagen_editor.html&body=Your%20issue%20content%20here." target="_blank"
+   class="btn btn-sm btn-source-issues-button dropdown-item"
+   title="Open an issue"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-lightbulb"></i>
+  </span>
+<span class="btn__text-container">Open issue</span>
+</a>
+</li>
+      
+  </ul>
+</div>
+
+
+
+
+
+
+<div class="dropdown dropdown-download-buttons">
+  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Download this page">
+    <i class="fas fa-download"></i>
+  </button>
+  <ul class="dropdown-menu">
+      
+      
+      
+      <li><a href="../../_sources/docs/review/imagen_editor.md" target="_blank"
+   class="btn btn-sm btn-download-source-button dropdown-item"
+   title="Download source file"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-file"></i>
+  </span>
+<span class="btn__text-container">.md</span>
+</a>
+</li>
+      
+      
+      
+      
+      <li>
+<button onclick="window.print()"
+  class="btn btn-sm btn-download-pdf-button dropdown-item"
+  title="Print to PDF"
+  data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-file-pdf"></i>
+  </span>
+<span class="btn__text-container">.pdf</span>
+</button>
+</li>
+      
+  </ul>
+</div>
+
+
+
+
+<button onclick="toggleFullScreen()"
+  class="btn btn-sm btn-fullscreen-button"
+  title="Fullscreen mode"
+  data-bs-placement="bottom" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-expand"></i>
+  </span>
+
+</button>
+
+
+
+<script>
+document.write(`
+  <button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light"></i>
+    <i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark"></i>
+    <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"></i>
+  </button>
+`);
+</script>
+
+
+<script>
+document.write(`
+  <button class="btn btn-sm pst-navbar-icon search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <i class="fa-solid fa-magnifying-glass fa-lg"></i>
+  </button>
+`);
+</script>
+
+</div></div>
+      
+    </div>
+  
+</div>
+</div>
+              
+              
+
+<div id="jb-print-docs-body" class="onlyprint">
+    <h1>Imagen Editor</h1>
+    <!-- Table of contents -->
+    <div id="print-main-content">
+        <div id="jb-print-toc">
+            
+        </div>
+    </div>
+</div>
+
+              
+                
+<div id="searchbox"></div>
+                <article class="bd-article">
+                  
+  <div class="admonition-information admonition">
+<p class="admonition-title">Information</p>
+<ul class="simple">
+<li><p><strong>Title:</strong> Imagen Editor and EditBench: Advancing and Evaluating Text-Guided Image Inpainting (CVPR 2023)</p></li>
+<li><p><strong>Reference</strong></p>
+<ul>
+<li><p>Paper: <a class="reference external" href="https://arxiv.org/pdf/2212.06909">https://arxiv.org/pdf/2212.06909</a></p></li>
+</ul>
+</li>
+<li><p><strong>Author:</strong> Sangwoo Jo</p></li>
+<li><p><strong>Last updated on Sep. 06, 2023</strong></p></li>
+</ul>
+</div>
+<section class="tex2jax_ignore mathjax_ignore" id="imagen-editor">
+<h1>Imagen Editor<a class="headerlink" href="#imagen-editor" title="Permalink to this heading">#</a></h1>
+<p>이번 시간에는 Google Research 에서 소개하는 Imagen 모델 기반의 text-guided image inpainting 모델 Imagen Editor 와 text-guided impainting 의 평가기법 EditBench 에 대해 알아볼 예정입니다.</p>
+<p>Text-guided image inpainting 에서 기존에는 mask 영역을 random 하게 지정하여 학습을 진행했습니다. 이는 입력된 text prompt 와 무관한 영역을 masking 하게 됨으로써 모델이 prompt 를 참조하지 않고 오로지 image content 만으로 학습하게 되는 현상이 발생합니다. Imagen Editor 는 이를 해결하기 위해 Object Masking 기법을 소개합니다. Prompt 에 해당하는 객체 전체를 masking 함으로써 모델이 text prompt 를 더 참조할 수 있도록 유도하는 것이 목표입니다. SSD MobileNet v2 모델을 Object Detector 로 사용함으로써 모델 성능이 크게 개선되는 부분을 확인할 수 있었다고 합니다.</p>
+<figure class="align-default" id="id1">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/imagen_editor_01.png"><img alt="imagen_editor_01" class="bg-primary mb-1" src="../../_images/imagen_editor_01.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 223 </span><span class="caption-text">Effect of Object Masking</span><a class="headerlink" href="#id1" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>Imagen Editor 에서 또 다른 특징은 Imagen 모델 기반의 cascaded diffusion model architecture 를 지니고 있다는 점입니다. 이때, SR3, Palette, GLIDE 와 유사하게 이미지와 mask 가 Encoder 를 거친 후, diffusion latent 와 concatenate 하면서 conditioning input 으로 들어가게 되며, 모두 1024x1024 해상도를 가진다고 합니다. 따라서, base diffusion 64x64 모델 그리고 64x64 → 256x256 super resolution 모델에 입력 시, downsampling 작업 후 모델 input 으로 입력합니다. 또한, conditioning 이미지와 mask 없을 시 Imagen 모델을 사용하는 것과 동일한 효과를 내기 위해, 새로 추가되는 input channel weights 는 0으로 초기화해서 학습을 진행했다고 소개합니다.</p>
+<figure class="align-default" id="id2">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/imagen_editor_02.png"><img alt="imagen_editor_02" class="bg-primary mb-1" src="../../_images/imagen_editor_02.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 224 </span><span class="caption-text">Imagen Editor Architecture</span><a class="headerlink" href="#id2" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>Imagen 에서 소개되었던 Classifier-Free Guidance 를 동일하게 사용하고, 이때 guidance weight 를 1부터 30 까지 범위 내에서 변화시키는 oscillating guidance 기법을 적용함으로써 생성된 이미지 퀄리티 및 text-image alignment 가 상승되는 효과를 볼 수 있었다고 합니다.</p>
+<p>논문에서는 Imagen Editor 와 같은 text-guided image inpainting 모델들을 평가할 수 있는 새로운 benchmark EditBench 를 제시합니다. 240개의 (image, mask) 쌍으로 데이터셋이 구축되어있고, 각 쌍마다 3가지의 prompt 로 생성된 이미지로 사람이 모델 성능을 측정하게 됩니다. Automatic Evaluation Metric 으로는 CLIPScore, 그리고 CLIP-R-Prec 를 사용했습니다.</p>
+<p>EditBench 이미지 데이터셋의 절반은 open source 로 공개된 computer vision 데이터셋으로부터 수집되었고, 나머지 절반은 text-to-image 모델로 생성해서 구축했습니다. 이때, <em>attribute-object-scene</em> 의 요소들을 모두 갖추도록 이미지들을 수집 및 생성했습니다.</p>
+<ul class="simple">
+<li><p>Attributes (material, color, shape, size, count)</p></li>
+<li><p>Objects (common, rare, text rendering)</p></li>
+<li><p>Scenes (indoor, outdoor, realistic, paintings)</p></li>
+</ul>
+<p>예를 들어서, ‘a=metal|o=cat|s=outdoor’ 요소들을 포함하는 문구를 ‘a metal cat standing in the middle of a farm field’ 처럼 생성하는 것입니다. 앞써 언급한 3가지 prompt 는 해당사진처럼 <em>Mask-Simple</em>, <em>Mask-Rich</em>, 그리고 <em>Full</em> 로 정의합니다.</p>
+<figure class="align-default" id="id3">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/imagen_editor_03.png"><img alt="imagen_editor_03" class="bg-primary mb-1" src="../../_images/imagen_editor_03.png" style="width: 600px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 225 </span><span class="caption-text">EditBench example</span><a class="headerlink" href="#id3" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>데이터셋 구축시, mask 크기도 다양하게 설정하여 mask 크기에 따른 모델 성능도 확인할 수 있었습니다. 성능을 측정해본 결과, Object masking 으로 학습한 모델이 random masking 으로 학습한 모델보다 small/medium masks 에서 성능적으로 월등히 좋다는 것을 확인할 수 있습니다.</p>
+<figure class="align-default" id="id4">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/imagen_editor_04.png"><img alt="imagen_editor_04" class="bg-primary mb-1" src="../../_images/imagen_editor_04.png" style="width: 500px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 226 </span><span class="caption-text">Human Evaluations on EditBench</span><a class="headerlink" href="#id4" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>또한, object-rendering 에 비해 text-rendering 성능이 저하되는 부분을 확인할 수 있고, material/color/size 속성보다 count/size 속성에 더 취약한 부분도 확인할 수 있었습니다.</p>
+<figure class="align-default" id="id5">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/imagen_editor_05.png"><img alt="imagen_editor_05" class="bg-primary mb-1" src="../../_images/imagen_editor_05.png" style="width: 500px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 227 </span><span class="caption-text">Imagen Editor failure cases by attribute</span><a class="headerlink" href="#id5" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>마지막으로, 동일한 prompt 에 대해 Stable Diffusion, DALL-E2, Imagen Editor 모델로 inpainting 한 결과를 비교한 예시 사진입니다.</p>
+<figure class="align-default" id="id6">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/imagen_editor_06.png"><img alt="imagen_editor_06" class="bg-primary mb-1" src="../../_images/imagen_editor_06.png" style="width: 500px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 228 </span><span class="caption-text">Example model outputs for Mask-Simple vs MaskRich prompts</span><a class="headerlink" href="#id6" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</section>
+
+    <script type="text/x-thebe-config">
+    {
+        requestKernel: true,
+        binderOptions: {
+            repo: "binder-examples/jupyter-stacks-datascience",
+            ref: "master",
+        },
+        codeMirrorConfig: {
+            theme: "abcdef",
+            mode: "python"
+        },
+        kernelOptions: {
+            name: "python3",
+            path: "./docs\review"
+        },
+        predefinedOutput: true
+    }
+    </script>
+    <script>kernelName = 'python3'</script>
+
+                </article>
+              
+
+              
+              
+              
+              
+                <footer class="prev-next-footer d-print-none">
+                  
+<div class="prev-next-area">
+    <a class="left-prev"
+       href="imagen.html"
+       title="previous page">
+      <i class="fa-solid fa-angle-left"></i>
+      <div class="prev-next-info">
+        <p class="prev-next-subtitle">previous</p>
+        <p class="prev-next-title">Imagen</p>
+      </div>
+    </a>
+    <a class="right-next"
+       href="SDEdit.html"
+       title="next page">
+      <div class="prev-next-info">
+        <p class="prev-next-subtitle">next</p>
+        <p class="prev-next-title">SDEdit</p>
+      </div>
+      <i class="fa-solid fa-angle-right"></i>
+    </a>
+</div>
+                </footer>
+              
+            </div>
+            
+            
+              
+            
+          </div>
+          <footer class="bd-footer-content">
+            
+<div class="bd-footer-content__inner container">
+  
+  <div class="footer-item">
+    
+<p class="component-author">
+By PseudoLab
+</p>
+
+  </div>
+  
+  <div class="footer-item">
+    
+
+  <p class="copyright">
+    
+      © Copyright 2022.
+      <br/>
+    
+  </p>
+
+  </div>
+  
+  <div class="footer-item">
+    
+  </div>
+  
+  <div class="footer-item">
+    
+  </div>
+  
+</div>
+          </footer>
+        
+
+      </main>
+    </div>
+  </div>
+  
+  <!-- Scripts loaded after <body> so the DOM is not blocked -->
+  <script src="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b"></script>
+<script src="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b"></script>
+
+  <footer class="bd-footer">
+  </footer>
+  </body>
 </html>
\ No newline at end of file
diff --git a/docs/review/latent_consistency_models.html b/docs/review/latent_consistency_models.html
old mode 100644
new mode 100755
index cabc15da..27fd24a2
--- a/docs/review/latent_consistency_models.html
+++ b/docs/review/latent_consistency_models.html
@@ -1,1008 +1,1028 @@
-
-
-<!DOCTYPE html>
-
-
-<html lang="en" data-content_root="" >
-
-  <head>
-    <meta charset="utf-8" />
-    <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.18.1: http://docutils.sourceforge.net/" />
-
-    <title>Latent Consistency Models &#8212; Text-to-Image Generation-feat-Diffusion</title>
-  
-  
-  
-  <script data-cfasync="false">
-    document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
-    document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
-  </script>
-  
-  <!-- Loaded before other Sphinx assets -->
-  <link href="../../_static/styles/theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-<link href="../../_static/styles/bootstrap.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-<link href="../../_static/styles/pydata-sphinx-theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-
-  
-  <link href="../../_static/vendor/fontawesome/6.5.2/css/all.min.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-  <link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.woff2" />
-<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.woff2" />
-<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.woff2" />
-
-    <link rel="stylesheet" type="text/css" href="../../_static/pygments.css" />
-    <link rel="stylesheet" href="../../_static/styles/sphinx-book-theme.css?digest=14f4ca6b54d191a8c7657f6c759bf11a5fb86285" type="text/css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/togglebutton.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/mystnb.4510f1fc1dee50b3e5859aac5469c37c29e427902b24a333a5f9fcb2f0b3ac41.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/sphinx-thebe.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/design-style.4045f2051d55cab465a707391d5b2007.min.css" />
-  
-  <!-- Pre-loaded scripts that we'll load fully later -->
-  <link rel="preload" as="script" href="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b" />
-<link rel="preload" as="script" href="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b" />
-  <script src="../../_static/vendor/fontawesome/6.5.2/js/all.min.js?digest=dfe6caa3a7d634c4db9b"></script>
-
-    <script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
-    <script src="../../_static/jquery.js"></script>
-    <script src="../../_static/underscore.js"></script>
-    <script src="../../_static/_sphinx_javascript_frameworks_compat.js"></script>
-    <script src="../../_static/doctools.js"></script>
-    <script src="../../_static/clipboard.min.js"></script>
-    <script src="../../_static/copybutton.js"></script>
-    <script src="../../_static/scripts/sphinx-book-theme.js?digest=5a5c038af52cf7bc1a1ec88eea08e6366ee68824"></script>
-    <script>let toggleHintShow = 'Click to show';</script>
-    <script>let toggleHintHide = 'Click to hide';</script>
-    <script>let toggleOpenOnPrint = 'true';</script>
-    <script src="../../_static/togglebutton.js"></script>
-    <script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
-    <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
-    <script src="../../_static/design-tabs.js"></script>
-    <script>const THEBE_JS_URL = "https://unpkg.com/thebe@0.8.2/lib/index.js"
-const thebe_selector = ".thebe,.cell"
-const thebe_selector_input = "pre"
-const thebe_selector_output = ".output, .cell_output"
-</script>
-    <script async="async" src="../../_static/sphinx-thebe.js"></script>
-    <script>window.MathJax = {"options": {"processHtmlClass": "tex2jax_process|mathjax_process|math|output_area"}}</script>
-    <script defer="defer" src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
-    <script>DOCUMENTATION_OPTIONS.pagename = 'docs/review/latent_consistency_models';</script>
-    <link rel="shortcut icon" href="../../_static/PseudoLab_logo.png"/>
-    <link rel="index" title="Index" href="../../genindex.html" />
-    <link rel="search" title="Search" href="../../search.html" />
-    <link rel="next" title="LLM Grounded Diffusion" href="LLM_grounded_Diffusion.html" />
-    <link rel="prev" title="Consistency Models" href="consistency_models.html" />
-  <meta name="viewport" content="width=device-width, initial-scale=1"/>
-  <meta name="docsearch:language" content="en"/>
-  </head>
-  
-  
-  <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
-
-  
-  
-  <div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
-  
-  <div id="pst-scroll-pixel-helper"></div>
-  
-  <button type="button" class="btn rounded-pill" id="pst-back-to-top">
-    <i class="fa-solid fa-arrow-up"></i>Back to top</button>
-
-  
-  <input type="checkbox"
-          class="sidebar-toggle"
-          id="pst-primary-sidebar-checkbox"/>
-  <label class="overlay overlay-primary" for="pst-primary-sidebar-checkbox"></label>
-  
-  <input type="checkbox"
-          class="sidebar-toggle"
-          id="pst-secondary-sidebar-checkbox"/>
-  <label class="overlay overlay-secondary" for="pst-secondary-sidebar-checkbox"></label>
-  
-  <div class="search-button__wrapper">
-    <div class="search-button__overlay"></div>
-    <div class="search-button__search-container">
-<form class="bd-search d-flex align-items-center"
-      action="../../search.html"
-      method="get">
-  <i class="fa-solid fa-magnifying-glass"></i>
-  <input type="search"
-         class="form-control"
-         name="q"
-         id="search-input"
-         placeholder="Search this book..."
-         aria-label="Search this book..."
-         autocomplete="off"
-         autocorrect="off"
-         autocapitalize="off"
-         spellcheck="false"/>
-  <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
-</form></div>
-  </div>
-
-  <div class="pst-async-banner-revealer d-none">
-  <aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
-</div>
-
-  
-    <header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
-    </header>
-  
-
-  <div class="bd-container">
-    <div class="bd-container__inner bd-page-width">
-      
-      
-      
-      <div class="bd-sidebar-primary bd-sidebar">
-        
-
-  
-  <div class="sidebar-header-items sidebar-primary__section">
-    
-    
-    
-    
-  </div>
-  
-    <div class="sidebar-primary-items__start sidebar-primary__section">
-        <div class="sidebar-primary-item">
-
-  
-    
-  
-
-<a class="navbar-brand logo" href="../../intro.html">
-  
-  
-  
-  
-  
-    
-    
-      
-    
-    
-    <img src="../../_static/PseudoLab_logo.png" class="logo__image only-light" alt="Text-to-Image Generation-feat-Diffusion - Home"/>
-    <script>document.write(`<img src="../../_static/PseudoLab_logo.png" class="logo__image only-dark" alt="Text-to-Image Generation-feat-Diffusion - Home"/>`);</script>
-  
-  
-</a></div>
-        <div class="sidebar-primary-item"><nav class="bd-links" id="bd-docs-nav" aria-label="Main">
-    <div class="bd-toc-item navbar-nav active">
-        
-        <ul class="nav bd-sidenav bd-sidenav__home-link">
-            <li class="toctree-l1">
-                <a class="reference internal" href="../../intro.html">
-                    Welcome to PseudoDiffusers!!
-                </a>
-            </li>
-        </ul>
-        <p aria-level="2" class="caption" role="heading"><span class="caption-text">Preliminary Works</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="vae.html">VAE</a></li>
-<li class="toctree-l1"><a class="reference internal" href="gan.html">GAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DDPM.html">DDPM</a></li>
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="DDIM.html">DDIM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="A_Study_on_the_Evaluation_of_Generative_Models.html">A Study on the Evaluation of Generative Models</a></li>
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Image Generation</span></p>
-<ul class="current nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="cycleGAN.html">CycleGAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="StyleGAN.html">StyleGAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="diffusion_beats_GANs.html">Diffusion Models Beat GANs on Image Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="dalle.html">DALL-E</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DALLE2.html">DALL-E 2</a></li>
-<li class="toctree-l1"><a class="reference internal" href="dreambooth.html">DreamBooth</a></li>
-<li class="toctree-l1"><a class="reference internal" href="ControlNet.html">ControlNet</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Latent_Diffusion_Model.html">Introduction</a></li>
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="Textual_Inversion.html">Textual Inversion</a></li>
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="CustomDiffusion.html">Custom Diffusion</a></li>
-<li class="toctree-l1"><a class="reference internal" href="LoRA.html">LoRA</a></li>
-
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="I-DDPM.html">I-DDPM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="StyO.html">StyO</a></li>
-<li class="toctree-l1"><a class="reference internal" href="imagen.html">Imagen</a></li>
-<li class="toctree-l1"><a class="reference internal" href="imagen_editor.html">Imagen Editor</a></li>
-<li class="toctree-l1"><a class="reference internal" href="SDEdit.html">SDEdit</a></li>
-<li class="toctree-l1"><a class="reference internal" href="SDXL.html">SDXL</a></li>
-<li class="toctree-l1"><a class="reference internal" href="t2i_adapter.html">T2I-Adapter</a></li>
-<li class="toctree-l1"><a class="reference internal" href="IP_Adapter.html">IP-Adapter</a></li>
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="HyperDreamBooth.html">HyperDreamBooth</a></li>
-<li class="toctree-l1"><a class="reference internal" href="CM3leon.html">CM3leon</a></li>
-
-<li class="toctree-l1"><a class="reference internal" href="Synthetic_Data_from_Diffusion_Models_Improves_ImageNet_Classification.html">Synthetic Data from Diffusion Models Improves ImageNet Classification</a></li>
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="GLIDE.html">GLIDE</a></li>
-<li class="toctree-l1"><a class="reference internal" href="BBDM.html">BBDM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Your_Diffusion_Model_is_Secretly_a_Zero_Shot_Classifier.html">Your Diffusion Model is Secretly a Zero-Shot Classifier</a></li>
-<li class="toctree-l1"><a class="reference internal" href="progressive_distillation.html">Progressive Distillation for Fast Sampling of Diffusion Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="ConceptLab.html">ConceptLab</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Diffusion_models_already_have_a_Semantic_Latent_Space.html">Diffusion Models already have a Semantic Latent Space</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Muse.html">Muse</a></li>
-
-
-<li class="toctree-l1"><a class="reference internal" href="GIGAGAN.html">Scaling up GANs for Text-to-Image Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="consistency_models.html">Consistency Models</a></li>
-<li class="toctree-l1 current active"><a class="current reference internal" href="#">Latent Consistency Models</a></li>
-
-<li class="toctree-l1"><a class="reference internal" href="LLM_grounded_Diffusion.html">LLM Grounded Diffusion</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DiT.html">DiT</a></li>
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="one-step-image-translation.html">One-Step Image Translation with Text-to-Image Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="LCM-LoRA.html">LCM-LoRA: A Universal Stable-Diffusion Acceleration Module</a></li>
-
-
-<li class="toctree-l1"><a class="reference internal" href="MimicBrush.html">MimicBrush: Zero-shot Image Editing with Reference Imitation</a></li>
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Video Generation</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="Make_A_Video.html">Make A Video</a></li>
-<li class="toctree-l1"><a class="reference internal" href="VideoLDM.html">VideoLDM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="AnimateDiff.html">AnimateDiff</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Animate_Anyone.html">Animate Anyone</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreaMoving.html">DreaMoving</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreamPose.html">DreamPose: Fashion Image-to-Video Synthesis via Stable Diffusion</a></li>
-
-
-
-
-
-
-
-
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">3D Generation</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="NeRF.html">NeRF : Representing Scenes as Neural Radiance Fields for View Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="3DGS.html">3D Gaussian Splatting for Real-Time Radiance Field Rendering</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Point_E.html">Point-E: A System for Generating 3D Point Clouds from Complex Prompts (Arxiv 2022)</a></li>
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="Shap-E.html">Shap-E</a></li>
-
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="DreamFusion.html"><strong>DreamFusion</strong></a></li>
-<li class="toctree-l1"><a class="reference internal" href="magic-3d.html">Magic3D</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreamBooth3D.html">Dream Booth 3D</a></li>
-
-
-
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Experiments</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="../experiments/js_exp.html">Synthetic Data with Stable Diffusion for Foliar Disease Classification</a></li>
-<li class="toctree-l1"><a class="reference internal" href="../experiments/swjo_exp.html">Training DreamBooth on Naver Webtoon Face Dataset</a></li>
-</ul>
-
-    </div>
-</nav></div>
-    </div>
-  
-  
-  <div class="sidebar-primary-items__end sidebar-primary__section">
-  </div>
-  
-  <div id="rtd-footer-container"></div>
-
-
-      </div>
-      
-      <main id="main-content" class="bd-main" role="main">
-        
-        
-
-<div class="sbt-scroll-pixel-helper"></div>
-
-          <div class="bd-content">
-            <div class="bd-article-container">
-              
-              <div class="bd-header-article d-print-none">
-<div class="header-article-items header-article__inner">
-  
-    <div class="header-article-items__start">
-      
-        <div class="header-article-item"><label class="sidebar-toggle primary-toggle btn btn-sm" for="__primary" title="Toggle primary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
-  <span class="fa-solid fa-bars"></span>
-</label></div>
-      
-    </div>
-  
-  
-    <div class="header-article-items__end">
-      
-        <div class="header-article-item">
-
-<div class="article-header-buttons">
-
-
-
-
-
-<div class="dropdown dropdown-source-buttons">
-  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Source repositories">
-    <i class="fab fa-github"></i>
-  </button>
-  <ul class="dropdown-menu">
-      
-      
-      
-      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion" target="_blank"
-   class="btn btn-sm btn-source-repository-button dropdown-item"
-   title="Source repository"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fab fa-github"></i>
-  </span>
-<span class="btn__text-container">Repository</span>
-</a>
-</li>
-      
-      
-      
-      
-      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion/issues/new?title=Issue%20on%20page%20%2Fdocs/review/latent_consistency_models.html&body=Your%20issue%20content%20here." target="_blank"
-   class="btn btn-sm btn-source-issues-button dropdown-item"
-   title="Open an issue"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-lightbulb"></i>
-  </span>
-<span class="btn__text-container">Open issue</span>
-</a>
-</li>
-      
-  </ul>
-</div>
-
-
-
-
-
-
-<div class="dropdown dropdown-download-buttons">
-  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Download this page">
-    <i class="fas fa-download"></i>
-  </button>
-  <ul class="dropdown-menu">
-      
-      
-      
-      <li><a href="../../_sources/docs/review/latent_consistency_models.md" target="_blank"
-   class="btn btn-sm btn-download-source-button dropdown-item"
-   title="Download source file"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-file"></i>
-  </span>
-<span class="btn__text-container">.md</span>
-</a>
-</li>
-      
-      
-      
-      
-      <li>
-<button onclick="window.print()"
-  class="btn btn-sm btn-download-pdf-button dropdown-item"
-  title="Print to PDF"
-  data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-file-pdf"></i>
-  </span>
-<span class="btn__text-container">.pdf</span>
-</button>
-</li>
-      
-  </ul>
-</div>
-
-
-
-
-<button onclick="toggleFullScreen()"
-  class="btn btn-sm btn-fullscreen-button"
-  title="Fullscreen mode"
-  data-bs-placement="bottom" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-expand"></i>
-  </span>
-
-</button>
-
-
-
-<script>
-document.write(`
-  <button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light"></i>
-    <i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark"></i>
-    <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"></i>
-  </button>
-`);
-</script>
-
-
-<script>
-document.write(`
-  <button class="btn btn-sm pst-navbar-icon search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <i class="fa-solid fa-magnifying-glass fa-lg"></i>
-  </button>
-`);
-</script>
-<label class="sidebar-toggle secondary-toggle btn btn-sm" for="__secondary"title="Toggle secondary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <span class="fa-solid fa-list"></span>
-</label>
-</div></div>
-      
-    </div>
-  
-</div>
-</div>
-              
-              
-
-<div id="jb-print-docs-body" class="onlyprint">
-    <h1>Latent Consistency Models</h1>
-    <!-- Table of contents -->
-    <div id="print-main-content">
-        <div id="jb-print-toc">
-            
-            <div>
-                <h2> Contents </h2>
-            </div>
-            <nav aria-label="Page">
-                <ul class="visible nav section-nav flex-column">
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#">Latent Consistency Models</a><ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">1. Introduction</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#preliminaries">2. Preliminaries</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#diffusion-models">Diffusion Models</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#consistency-models">Consistency Models</a></li>
-</ul>
-</li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#id1">3. Latent Consistency Models</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#consistency-distillation-in-the-latent-space">3.1 Consistency Distillation in the Latent Space</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#one-stage-guided-distillation-by-solving-augmented-pf-ode">3.2 One-Stage Guided Distillation by solving augmented PF-ODE</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#accelerating-distillation-with-skipping-time-steps">3.3 Accelerating Distillation with Skipping Time Steps</a></li>
-</ul>
-</li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#latent-consistency-fine-tuning-for-customized-dataset">3.4 Latent Consistency Fine-tuning for customized dataset</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#experiments">4. Experiments</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#text-to-image-generation">4.1 Text-To-Image Generation</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#abulation-study">4.2 Abulation Study</a><ul class="nav section-nav flex-column">
-<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#ode-solvers-skipping-step-schedule">ODE Solvers &amp; Skipping-Step Schedule</a></li>
-<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#the-effect-of-guidance-scale-omega">The Effect of Guidance Scale <span class="math notranslate nohighlight">\(\omega\)</span></a></li>
-</ul>
-</li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#downstream-consistency-fine-tuning-results">4.3 Downstream Consistency Fine-tuning Results</a></li>
-</ul>
-</li>
-</ul>
-</li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#conclusion">Conclusion</a></li>
-</ul>
-
-            </nav>
-        </div>
-    </div>
-</div>
-
-              
-                
-<div id="searchbox"></div>
-                <article class="bd-article">
-                  
-  <div class="admonition-information admonition">
-<p class="admonition-title">Information</p>
-<ul class="simple">
-<li><p><strong>Title:</strong> Latent Consistency Models: Synthesizing High-Resolution Images with Few-Step Inference</p></li>
-<li><p><strong>Reference</strong></p>
-<ul>
-<li><p>Paper: <a class="reference external" href="https://arxiv.org/pdf/2310.04378">https://arxiv.org/pdf/2310.04378</a></p></li>
-<li><p>Code: <a class="github reference external" href="https://github.com/luosiallen/latent-consistency-model">luosiallen/latent-consistency-model</a></p></li>
-<li><p>Project Page: <a class="reference external" href="https://latent-consistency-models.github.io/">https://latent-consistency-models.github.io/</a></p></li>
-</ul>
-</li>
-<li><p><strong>Author:</strong> Donghyun Han</p></li>
-<li><p><strong>Last updated on May. 1, 2024</strong></p></li>
-</ul>
-</div>
-<section class="tex2jax_ignore mathjax_ignore" id="latent-consistency-models">
-<h1>Latent Consistency Models<a class="headerlink" href="#latent-consistency-models" title="Permalink to this heading">#</a></h1>
-<section id="introduction">
-<h2>1. Introduction<a class="headerlink" href="#introduction" title="Permalink to this heading">#</a></h2>
-<p>Diffusion model은 다양한 분야에서 주목할만한 성과를 거두었지만  매우 느린 sampling 속도를 가지기 때문에 실시간 사용이 불가능하다. 이 같은 단점을 극복하기 위해 sampling 속도를 향상시키는 다양한 accelerating 방법이 제안되었다:</p>
-<p> </p>
-<ol class="arabic simple">
-<li><p>ODE solver의 성능개선을 통해 10~20 step만으로도 좋은 성능을 가지는 방법.</p>
-<ul class="simple">
-<li><p>DPM-Solver (<a class="reference external" href="https://arxiv.org/pdf/2206.00927">lu et al.</a>)</p></li>
-</ul>
-</li>
-<li><p>사전 학습된 Diffusion model을 몇 step만으로도 추론할수 있도록 <strong>distillation</strong>하는 방법.</p>
-<ul class="simple">
-<li><p>PD (Progressive Distillation). → 2 stage (<a class="reference external" href="https://arxiv.org/pdf/2202.00512">Salimans et al.</a>)</p></li>
-<li><p>On Distillation of Guided Diffusion Models. (<a class="reference external" href="https://arxiv.org/pdf/2210.03142">Meng et al.</a>)</p></li>
-<li><p>Consistency Models (<a class="reference external" href="https://arxiv.org/pdf/2303.01469">Song et al.</a>)</p></li>
-</ul>
-</li>
-</ol>
-<p>이중 특히 Consistency Models은 ODE-trajectory에 대한 일관성을 갖도록 하는 모델로서, single step만으로도 이미지를 생성할 수 있기 때문에 반복적인 계산이 필요하지 않다. 그러나 이 모델 또한 2가지의 단점을 가지고 있다:</p>
-<p> </p>
-<ol class="arabic simple">
-<li><p>Pixel space의 Flow-based Model이기 때문에 <strong>high-resolution 이미지 생성</strong>에 적합하지 않음.</p></li>
-<li><p>Conditional(Classifer-free Guidance)한 이미지 생성을 고려하지 않아 <strong>text2img</strong>에 적합하지 않음.</p></li>
-</ol>
-<hr class="docutils" />
-<p>본 논문의 제안점은 다음 3가지다:</p>
-<p> </p>
-<ul class="simple">
-<li><p>빠르고 high-resolution 이미지를 생성하기 위한 Latent Consistency Models(LCMs)를 제안한다. LCMs은 영상의 latent space에 Consistency Models 개념을 적용해 매우 적은 step 만으로도 <strong>고품질의 이미지</strong>를 생성할 수 있다.</p></li>
-<li><p>guided consistency distillation을 통해 Stable Diffusion을 매우 적은 step(1~4)으로 sampling 할 수 있는 방법을 제공한다. <strong>Skipping-Step</strong>이라는 테크닉을 통해 학습을 가속화 한다. 2, 4 step Model의 경우 학습에 A100 GPU 32시간 밖에 걸리지 않으며 LAION-5B-Aesthetics dataset에서 SOTA의 성능을 달성했다.</p></li>
-<li><p>LCMs에 대한 새로운 fine-tuning 방식인 Latent Consistency Fine-tuning을 통해 <strong>빠른 추론 속도를 유지하면서도 Custom Dataset에 효율적으로 적용</strong>할 수 있다.</p></li>
-</ul>
-<p> </p>
-<figure class="align-default" id="id2">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/ldm_01.png"><img alt="ldm_01" class="bg-primary mb-1" src="../../_images/ldm_01.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 506 </span><span class="caption-text">768x768 Resolution image in 1~4 steps.</span><a class="headerlink" href="#id2" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</section>
-<section id="preliminaries">
-<h2>2. Preliminaries<a class="headerlink" href="#preliminaries" title="Permalink to this heading">#</a></h2>
-<section id="diffusion-models">
-<h3>Diffusion Models<a class="headerlink" href="#diffusion-models" title="Permalink to this heading">#</a></h3>
-<p>Diffusion Models 혹은 Score-based Models는 데이터에 점진적으로 Gaussian noise를 주입하고 reverse denoise process로 noise를 제거하여 데이터를 sampling하는 기법이다. 반면 forwad process는 원본 데이터 분포인 <span class="math notranslate nohighlight">\(p_{data}(x)\)</span>를 주변 확률분포인 <span class="math notranslate nohighlight">\(q_{t}(x_{t})\)</span>로 변환한다:</p>
-<div class="math notranslate nohighlight">
-\[
-q_{0t}(x_{t}|x_{0})=\mathcal{N}(x_t|\alpha(t)x_0,\sigma^2(t)I)
-\]</div>
-<p>여기서 <span class="math notranslate nohighlight">\(\alpha(t)\)</span>와 <span class="math notranslate nohighlight">\(\sigma(t)\)</span>는 noise scheduler를 의미한다. 연속적인 timestep의 관점에서 이를 확률미분방정식(Stochastic Differential Equation, SDE)으로 나타낼 수 있는데, 다음과 같다:</p>
-<div class="math notranslate nohighlight">
-\[
-f(t)=\frac{d\log{\alpha(t)}}{dt}, g^2(t)=\frac{d\sigma^2(t)}{dt}-2\frac{d\log{\alpha(t)}}{dt}\sigma^2(t). \tag{1}
-\]</div>
-<p>또한 주변 확률분포 <span class="math notranslate nohighlight">\(q_t(x)\)</span>는 **Ptobability Flow ODE(PF-ODE)**라는 상미분방정식(Ordinary Differential Equation, ODE)을 만족하는데  다음과 같다:</p>
-<div class="math notranslate nohighlight">
-\[
-\frac{dx_t}{dt}=f(x)x_t-\frac{1}{2}g^2(t)\nabla_x\log{q_t(x_t)}, \ x_T \sim q_T(x_T). \tag{2}
-\]</div>
-<p>이때 Diffusion model은 <span class="math notranslate nohighlight">\(-\nabla\log{q_t(x_t)}\)</span>(score function)를 예측하는 noise 예측 모델(<span class="math notranslate nohighlight">\(\epsilon_\theta(x_t,t)\)</span>)을 학습시킨다. 학습된 모델은 score function의 근사치를 예측하고 sampling하는데 이를 empirical PF-ODE라 한다 (경험적 PF-ODE):</p>
-<div class="math notranslate nohighlight">
-\[
-\frac{dx_t}{dt}=f(t)x_t+\frac{g^2(t)}{2\sigma_t}\epsilon_\theta(x_t,t), \ x_T \sim \mathcal{N}(0, \tilde{\sigma}^2I). \tag{3}
-\]</div>
-<p>Classifier-Free Guidance (CFG)는 sampling의 퀄리티를 높이기 위해 GLIDE, Stable Diffusion, DALL<span class="math notranslate nohighlight">\(\cdot\)</span>E2, Imagen 등 다양한 conditional model에서 사용되었다. CFG의 scale <span class="math notranslate nohighlight">\(\omega\)</span>가 주어졌을 때 원본 noise prediction은 conditional, unconditional noise prediction을 선형적으로 혼합하여 대체된다:</p>
-<div class="math notranslate nohighlight">
-\[
-\tilde{\epsilon}_\theta(z_t,\omega, c,t)=(1+\omega)\epsilon_\theta(z_t, c,t)-\omega\epsilon_\theta(z, \emptyset, t).
-\]</div>
-<p> </p>
-</section>
-<section id="consistency-models">
-<h3>Consistency Models<a class="headerlink" href="#consistency-models" title="Permalink to this heading">#</a></h3>
-<p>Consistenct Model(CM)은 몇 step 혹은 한번의 step 만으로 데이터를 생성할 수 있는 모델이다. CM의 핵심은 <strong>PF-ODE의 궤적에 어떤 point와 PF-ODE의 solution에 대해 mapping되는 function (<span class="math notranslate nohighlight">\(f:(x_t, t) \mapsto x_\epsilon\)</span>)을 추정</strong>하는 것이다.</p>
-<figure class="align-default" id="id3">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/ldm_02.png"><img alt="ldm_02" class="bg-primary mb-1" src="../../_images/ldm_02.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 507 </span><span class="caption-text">Consistency Models (CM).</span><a class="headerlink" href="#id3" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p><span class="math notranslate nohighlight">\(\epsilon\)</span>은 고정된 매우 작은 양수값을 가지며 CM의 function은 자기 자신에 대한 <strong>self-consistency</strong>를 만족해야한다. 즉 어떠한 time step에 대해서도 <span class="math notranslate nohighlight">\(x_\epsilon\)</span>을 sampling 할 수 있어야 한다.</p>
-<div class="math notranslate nohighlight">
-\[
-f(x_t,t)=f(x_{t'},t'), \forall t,t' \in [\epsilon, T]. \tag{4}
-\]</div>
-<p><span class="math notranslate nohighlight">\(f_\theta(x, \epsilon)=x\)</span>를 만족하는 모델 <span class="math notranslate nohighlight">\(f_\theta\)</span>는 다음과 같이 정리할 수 있다:</p>
-<div class="math notranslate nohighlight">
-\[
-f_\theta(x,t)=c_{skip}(t)x+c_{out}(t)F_\theta(x,t). \tag{5}
-\]</div>
-<p><span class="math notranslate nohighlight">\(c_{skip}(t)\)</span>와 <span class="math notranslate nohighlight">\(c_{out}(t)\)</span>는 미분 가능한 함수이며 <span class="math notranslate nohighlight">\(c_{skip}=1, c_{out}=0\)</span>이기 때문에 <span class="math notranslate nohighlight">\(f_\theta(x, \epsilon)=x\)</span>를 만족한다.  <span class="math notranslate nohighlight">\(\theta\)</span>는 학습 가능한 파라미터로 <span class="math notranslate nohighlight">\(F_\theta\)</span>는 심층 신경망을 의미한다.</p>
-<p> </p>
-<p>CM은 pre-trained 모델에 대한 Distillation 방식과 scratch부터 학습하는 방식이 있는데 주로 <strong>Distillation 방식</strong>을 사용한다. Distillation 방식은 parameter <span class="math notranslate nohighlight">\(\theta^-\)</span>가 <span class="math notranslate nohighlight">\(\theta\)</span>를 통해 학습하며 모델에 대한 self-consistency를 위해 다음과 같이 손실함수를 구성한다:</p>
-<div class="math notranslate nohighlight">
-\[
-\mathcal{L}(\theta,\theta^-;\Phi)=\mathbb{E}_{x,t}\bigg[d\bigg(f_\theta(x_{t_{n+1}, t_{n+1}}), f_{\theta^-}(\hat{x}^\phi_{t_n}, t_n)\bigg)\bigg]. \tag{6}
-\]</div>
-<p>이 때 <span class="math notranslate nohighlight">\(\theta^-\)</span>는 <span class="math notranslate nohighlight">\(\theta\)</span>에 대한 지수평균이동(Exponential Moving Average, EMA)이며 <span class="math notranslate nohighlight">\(\theta^-  \leftarrow \mu\theta^-+(1-\mu)\theta\)</span>이다. <span class="math notranslate nohighlight">\(d(\cdot, \cdot)\)</span>은 두 sample 사이의 거리를 측정하는 지표이다. <span class="math notranslate nohighlight">\(\hat{x}^{\phi}_{t_n}\)</span>은 <span class="math notranslate nohighlight">\(x_{t_{n+1}}\)</span>에 대한 <span class="math notranslate nohighlight">\(x_{t_n}\)</span>을 추정한 값으로 다음과 같다:</p>
-<div class="math notranslate nohighlight">
-\[
-\hat{x}^\phi_{t_n} \leftarrow x_{t_{n+1}}+(t_{n}-t_{n+1})\Phi(x_{t_{n+1}}, t_{n+1};\phi). \tag{7}
-\]</div>
-<p><span class="math notranslate nohighlight">\(\Phi\)</span>는 PF-ODE에 사용되는 ODE Solver로 <a class="reference external" href="https://en.wikipedia.org/wiki/Euler_method">Euler</a>나 <a class="reference external" href="https://en.wikipedia.org/wiki/Heun%27s_method">Heun</a> Method등의 수치적인 ODE solver를 사용할 수 있다. 즉 Consistency Distillation은 ODE Solver로 예측한 <span class="math notranslate nohighlight">\(\hat{x}^{\phi}_{t_n}\)</span>과 <span class="math notranslate nohighlight">\(x_{t_{n+1}}\)</span>을 입력으로 <span class="math notranslate nohighlight">\(f_{\theta^-}\)</span>와 <span class="math notranslate nohighlight">\(f_\theta\)</span>로 <strong>예측한 값의 Consistency를 비교하는 방식으로 Distillation을 수행</strong>한다.</p>
-</section>
-</section>
-<section id="id1">
-<h2>3. Latent Consistency Models<a class="headerlink" href="#id1" title="Permalink to this heading">#</a></h2>
-<p>CM의 한계:</p>
-<ul class="simple">
-<li><p>ImageNet 64x64, LSUN 256x256 영상에 대한 Generation만 수행</p>
-<ul>
-<li><p><strong>High Resolution</strong>의 잠재성이 아직 탐구되지 않았음.</p></li>
-<li><p><strong>Classifier-free Guidance(CFG)</strong> 등을 사용하지 않음.</p></li>
-</ul>
-</li>
-</ul>
-<p>Latent Consistency Models(LCMs)는 CM의 잠재력을 충분히 발휘하여 좀더 도전적인 task를 수행한다.</p>
-<section id="consistency-distillation-in-the-latent-space">
-<h3>3.1 Consistency Distillation in the Latent Space<a class="headerlink" href="#consistency-distillation-in-the-latent-space" title="Permalink to this heading">#</a></h3>
-<p>본 논문에서는 pre-trained 된 Stable Diffusion에 Consistency Distillation을 적용한 Latent Consistency Distillation (LCD)을 제안한다. LCMs는 LDM(SD)을 기반으로 설계되었기 때문에 <span class="math notranslate nohighlight">\(z=\varepsilon(x)\)</span>를 통해 <span class="math notranslate nohighlight">\(x\)</span>를 latent vector로 임베딩하고 <span class="math notranslate nohighlight">\(\hat{x}=\mathcal{D}(z)\)</span>를 통해 원본 영상으로 복원한다. latent space 상에서 연산이 이뤄지기 때문에 <strong>Computation Cost를 크게 줄일 수 있어</strong> high-resolution 영상을 laptop GPU에서 생성할 수도 있다.</p>
-<p>condition을 추가한 PF-ODE의 reverse process는 다음과 같이 정의된다:</p>
-<div class="math notranslate nohighlight">
-\[
-\frac{dz_t}{dt}=f(t)z_t+\frac{g^2(t)}{2\sigma_t}\epsilon_\theta(z_t,c,t), \ z_T\sim\mathcal{N}(0,\tilde{\sigma}^2I). \tag{8}
-\]</div>
-<p><span class="math notranslate nohighlight">\(z_t\)</span>는 t step의 image latents, <span class="math notranslate nohighlight">\(\epsilon_\theta(z_t,c,t)\)</span>는 noise 예측 모델, c는 text와 같은 conditional prompt를 의미한다. PF-ODE상에서 모든 t step에 대해 consistency function <span class="math notranslate nohighlight">\(f_\theta :(z_t,c,t) \mapsto z_0\)</span>이기 때문에 이를 수식으로 정리하자면 다음과 같이 나타낼 수 있다 (<span class="math notranslate nohighlight">\(\hat{\epsilon}_\theta\)</span>는 noise prediction model.):</p>
-<div class="math notranslate nohighlight">
-\[
-f_\theta(z,c,t)=c_{skip}(t)z+c_{out}(t)\bigg( \frac{z-\sigma_t\hat{\epsilon}_\theta(z,c,t)}{\alpha_t} \bigg). \ (\epsilon-Prediction) \tag{9}
-\]</div>
-<p>수식을 살펴보면 ddpm 등의 reparameterization trick인 <span class="math notranslate nohighlight">\(x_t := \sqrt{\bar{\alpha}_t}x_0 + \sqrt{1-\bar{\alpha}_t}\epsilon\)</span>을 변형하여 식에 대입한 것을 알 수 있음. (<span class="math notranslate nohighlight">\(x\)</span>→<span class="math notranslate nohighlight">\(z\)</span>로 치환)</p>
-<div class="math notranslate nohighlight">
-\[
-x_0 = \frac{x_t-\sqrt{1-\bar{\alpha}_t}\epsilon}{\sqrt{\bar{\alpha}_t}}, \ \hat{z}_0 = \frac{z_t-\sigma(t)\hat{\epsilon}_{\theta}(z,c,t)}{\alpha(t)}.
-\]</div>
-<p>CM과 마찬가지로 <span class="math notranslate nohighlight">\(c_{skip}(0)=1, c_{out}(0)=0\)</span>이고 <span class="math notranslate nohighlight">\(\hat{\epsilon}_{\theta}(z,c,t)\)</span>는 teacher diffusion model과 유사한 noise 예측 모델 parameter이다. <span class="math notranslate nohighlight">\(f_\theta\)</span>는 <span class="math notranslate nohighlight">\(\epsilon-Prediction\)</span> 외에도 <span class="math notranslate nohighlight">\(x-Prediction\)</span>이나 <span class="math notranslate nohighlight">\(v-Prediction\)</span>을 사용할 수도 있다. (<span class="math notranslate nohighlight">\(x-Prediction\)</span>은 DDPM, <span class="math notranslate nohighlight">\(v-prediction\)</span>은 PD에서 나온 개념)</p>
-<div class="math notranslate nohighlight">
-\[
-\mathcal{L_{CD}}(\theta,\theta^-;\psi)=\mathbb{E}_{z,c,n}\bigg[ d(f_\theta(z_{t_{n+1}},c,t_{n+1}), f_{\theta^-}(\hat{z}^\psi_{t_n},c,t_n)) \bigg]. \tag{10}
-\]</div>
-<p><span class="math notranslate nohighlight">\(\psi(z_t,t,x,c)\)</span>는 ODE solver이며 특정한 time step <span class="math notranslate nohighlight">\(t \sim s\)</span> 사이에 대한 Eq. 8의 우항을 근사한 값이다. ODE Solver이기 때문에 <strong>DDIM, DPM-Solver, DPM-Solver++ 등을 사용할 수 있다.</strong> 또한 <span class="math notranslate nohighlight">\(\psi\)</span>는 학습 및 Distillation시에만 사용한다. 이때 <span class="math notranslate nohighlight">\(t_n\)</span>은 EDM을 토대로 CM에서 나오는 값이다. 기존 timestep <span class="math notranslate nohighlight">\([t, T]\)</span>에 대한 하위 간격으로 <span class="math notranslate nohighlight">\(t_1=\epsilon&lt;t_2&lt;\cdots&lt;t_N=T\)</span>인 어떠한간격을 의미한다. <span class="math notranslate nohighlight">\(t_i\)</span>는 다음과 같이 나타낼 수 있다:</p>
-<div class="math notranslate nohighlight">
-\[
-t_i=(\epsilon^{1 / \rho} +\frac{i-1}{N-1}(T^{1 / \rho}-\epsilon^{1 / \rho}))^\rho, \rho=7
-\]</div>
-<p>Eq, 8을 <span class="math notranslate nohighlight">\(t_{n+1} \sim t_n\)</span>까지 t에 대해 적분 했을 때 다음과 같은 수식을 얻을 수 있다:</p>
-<div class="math notranslate nohighlight">
-\[
-\hat{z}_{t_n}^\psi-z_{t_{n+1}}=\int^{t_n}_{t_{n+1}}{\bigg( f(t)z_t+\frac{g^2(t)}{2\sigma_t}\epsilon_\theta(z_t,c,t) \bigg)}dt \approx \psi(z_{t_{n+1}}, t_{n+1},c). \tag{11}
-\]</div>
-</section>
-<section id="one-stage-guided-distillation-by-solving-augmented-pf-ode">
-<h3>3.2 One-Stage Guided Distillation by solving augmented PF-ODE<a class="headerlink" href="#one-stage-guided-distillation-by-solving-augmented-pf-ode" title="Permalink to this heading">#</a></h3>
-<p>Clasifier-free Guidance(CFG)는 high-quality의 conditional 이미지 생성을 가능하게 했다. 다만 CFG는 2개의 Diffusion Model을 훈련해야하기 때문에 효율적이지 못하며, <strong>LCMs와 같은 few-step sampling method에 사용하기 힘들다.</strong> 따라서 이를 해결하기 위해 본 논문에서는 CFG를 Distillation 과정에서 통합하였다.</p>
-<p>Guided-Distill의 경우 two-stage Distillation을 통해  few-step sampling에 CFG를 통합하였으나 학습시간이 길고 <strong>2단계를 거치며</strong> 손실이 누적되기 때문에 최적의 성능을 내기 힘들다.</p>
-<figure class="align-default" id="id4">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/ldm_03.png"><img alt="ldm_03" class="bg-primary mb-1" src="../../_images/ldm_03.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 508 </span><span class="caption-text">2 Stage Distillation.</span><a class="headerlink" href="#id4" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>t이에 반해 LCMs는 augmented PF-ODE를 해결하는 방식으로 one-stage의 guided Distillation을 제안했다. 일단 CFG에 대한 reverse diffusion process는 다음과 같다:</p>
-<div class="math notranslate nohighlight">
-\[
-\tilde{\epsilon}_{\theta}(z_t,\omega,c,t):=(1+\omega)\epsilon_\theta(z_t,c,t)-\omega\epsilon_\theta(z_t,\varnothing,t). \tag{12}
-\]</div>
-<p>CFG는 conditional noise 예측값과 unconditional noise 예측값을 선형 결합하여 사용한다. 즉 noise 값이 <span class="math notranslate nohighlight">\(\omega\)</span>에 따라 변형되므로 <strong>augmented PF-ODE</strong>라고 한다. augmented PF-ODE는 다음과 같이 나타낼 수 있다:</p>
-<div class="math notranslate nohighlight">
-\[
-\frac{dz_t}{dt}=f(t)z_t+\frac{g^2(t)}{2\sigma_t}\tilde{\epsilon}_\theta(z_t,\omega,c,t), \ z_T\sim\mathcal{N}(0,\tilde{\sigma}^2I). \tag{13}
-\]</div>
-<p>consistency function도 <span class="math notranslate nohighlight">\(\omega\)</span>를 변수로 받아오기 때문에 <span class="math notranslate nohighlight">\(f_\theta:(z_t,\omega,c,t)\mapsto z_0\)</span>로 다시 정의된다. Consistency Distillation Loss 또한 다음과 같이 나타낼 수 있다:</p>
-<div class="math notranslate nohighlight">
-\[
-\mathcal{L_{CD}}(\theta,\theta^-;\psi)=\mathbb{E}_{z,c,\omega,n}\bigg[ d\bigg( f_\theta(z_{t_{n+1}},\omega,c,t_{n+1}), f_{\theta^-}(\hat{z}_{t_n}^{\psi,\omega},\omega,c,t_n) \bigg) \bigg] \tag{14}
-\]</div>
-<p><span class="math notranslate nohighlight">\(\omega\)</span>와 <span class="math notranslate nohighlight">\(n\)</span>는 각각 <span class="math notranslate nohighlight">\([\omega_{min}, \omega_{max}]\)</span>, <span class="math notranslate nohighlight">\(\{1,…,N-1\}\)</span>에서 sampling된다. <span class="math notranslate nohighlight">\(\hat{z}^{\psi, \omega}_{t_n}\)</span>는 이전과 마찬가지로 CFG가 추가된 ODE-Solver를 사용하여 근사한 값을 의미한다. 이때 사용되는 새로운 noise 예측모델 <span class="math notranslate nohighlight">\(\tilde{\epsilon}_\theta(z_t,\omega,c,t)\)</span>는  Eq. 11처럼  <span class="math notranslate nohighlight">\(t_{n+1} \sim t_n\)</span>까지 t에 대해 적분 했을 때 다음과 같이 나타낼 수 있다:</p>
-<div class="math notranslate nohighlight">
-\[
-\hat{z}^{\psi, \omega}_{t_n}-z_{t_n+1}=\int^{t_n}_{t_{n+1}}\bigg(f(t)z_t+\frac{g^2(t)}{2\sigma_t}\tilde{\epsilon}_\theta(z_t,\omega,c,t)\bigg)dt
-\]</div>
-<div class="math notranslate nohighlight">
-\[
-=(1+\omega)\int^{t_n}_{t_{n+1}}\bigg(f(t)z_t+\frac{g^2(t)}{2\sigma_t}\epsilon_\theta(z_t,c,t)\bigg)dt
-\]</div>
-<div class="math notranslate nohighlight">
-\[
--\omega\int^{t_n}_{t_{n+1}}\bigg(f(t)z_t+\frac{g^2(t)}{2\sigma_t}\epsilon_\theta(z_t,\varnothing,t)\bigg)dt
-\]</div>
-<div class="math notranslate nohighlight">
-\[
-\approx(1+\omega)\psi(z_{t_{n+1}}, t_{n+1},t_n,c)-\omega\psi(z_{t_{n+1}}, t_{n+1},t_n,\varnothing). \tag{15}
-\]</div>
-<p>마찬가지로 PF-ODE Solver <span class="math notranslate nohighlight">\(\psi(\cdot,\cdot,\cdot,\cdot)\)</span>에는 DDIM, DPM-Solver, DPM-Solver++ 등을 사용할 수 있다.</p>
-</section>
-<section id="accelerating-distillation-with-skipping-time-steps">
-<h3>3.3 Accelerating Distillation with Skipping Time Steps<a class="headerlink" href="#accelerating-distillation-with-skipping-time-steps" title="Permalink to this heading">#</a></h3>
-<p>Stable Diffusion 등 보통의 Diffusion Model들은 매우 큰 step을 전체 time step으로 잡고 학습한다. 그러나 이같이 촘촘한 time step은 각 <span class="math notranslate nohighlight">\(t_n\)</span>과 <span class="math notranslate nohighlight">\(t_{n+1}\)</span>의 변화량을 감소시키기 때문에 자연스럽게 Consistency Distillation Loss도 작아지게 된다. <strong>Loss가 작아지면 학습의 수렴속도도 느려지게 된다.</strong> 따라서 LCMs는 학습 수렴의 속도를 높이기 위해 time step을 수천에서 수십으로 크기 단축시키는 SKIPPING-STEP 방법을 제안하였다.</p>
-<p>기존 CMs 모델의 경우 time scheduler로 EDM을 사용하고 ODE-Solver로 Euler 방법이나 Heun 방법을 사용한다.  그러나 LCMs는 Eq. 8을 통해 DDIM, DPM-Solver, DPM-Solver++와 같은 효율적인 solver도 효과적으로 데이터를 생성할 수 있다는 것을 증명했다. 따라서 <strong>SKIPPING-STEP 방법은 <span class="math notranslate nohighlight">\(t_{n+1} → t_n\)</span> 사이의 Consistency를 비교하는것이 아니라 특정 k-step만큼 거리가 있는 time step에 대한 Consistency를 비교한다.</strong> (<span class="math notranslate nohighlight">\(t_{n+k}→t_n\)</span>)</p>
-<p>이때 <span class="math notranslate nohighlight">\(k\)</span>값의 크기는 trade-off 관계를 가진다. 너무작으면 (<span class="math notranslate nohighlight">\(k=1\)</span>) 기존과 같이 느린 수렴속도를 갖게되며, 너무 큰 값일 때는 ODE solver 를 통해 근사할 때 오차가 매우 커질수 있다. 논문의 저자는 <span class="math notranslate nohighlight">\(k=20\)</span>을 사용해 <strong>time step을 수천에서 수십으로 대폭 줄여</strong> 학습을 Accelerating 할 수 있었다. Eq. 14에 k값을 추가해 SKIPPING-STEP을 표현할 수 있다.</p>
-<div class="math notranslate nohighlight">
-\[
-\mathcal{L_{CD}}(\theta,\theta^-;\psi)=\mathbb{E}_{z,c,\omega,n}\bigg[ d\bigg( f_\theta(z_{t_{n+k}},\omega,c,t_{n+k}), f_{\theta^-}(\hat{z}_{t_n}^{\psi,\omega},\omega,c,t_n) \bigg) \bigg]. \tag{16}
-\]</div>
-<p><span class="math notranslate nohighlight">\(\hat{z}^{\psi, \omega}_{t_n}\)</span>에 대한 수식도 다음과 같이 변경할 수 있다.</p>
-<div class="math notranslate nohighlight">
-\[
-\hat{z}^{\psi, \omega}_{t_n} \leftarrow z_{t_{n+k}}+(1+\omega)\psi(z_{t_{n+k}}, t_{n+k},t_n,c)-\omega\psi(z_{t_{n+k}}, t_{n+k},t_n,\varnothing). \tag{17}
-\]</div>
-</section>
-</section>
-<section id="latent-consistency-fine-tuning-for-customized-dataset">
-<h2>3.4 Latent Consistency Fine-tuning for customized dataset<a class="headerlink" href="#latent-consistency-fine-tuning-for-customized-dataset" title="Permalink to this heading">#</a></h2>
-<p>Stable Diffusion과 같은 Foundation 생성 모델은 거의 대부분의 text-to-image Generation task에서 잘 되지만 가끔 downstream task를 위해 Cunstom dataset에 대한 fine-tuning이 필요할 때가 있다. Latent Consistency Fine-tuning(LCF)는 Custom Dataset도 teacher model에 대한 종속없이 few-step inference를 성공적으로 할수 있도록 한다. 따라서 LCM은 <strong>기존의 Diffusion model에 대한 추가적인 fine tuning 방법론 없이도 Custom Dataset을 바로바로 학습하여 사용</strong>할수 있다.</p>
-<p>따로 추가적인 fine-tuning 방법이 있는것은 아니고 Consisteny Distillation 시 pre-trained 된 LDM을 사용하여 EMA를 통해 Distillation을 하기 때문에 Dataset을 Custom Dataset으로 사용하기만하면 된다. 즉 pre-trained Diffuson model → Custom Dataset fine-tuning → few step inference를 위한 Consistency Distillation을 할 필요 없이 바로학습이 가능하다는 의미이다.</p>
-</section>
-<section id="experiments">
-<h2>4. Experiments<a class="headerlink" href="#experiments" title="Permalink to this heading">#</a></h2>
-<section id="text-to-image-generation">
-<h3>4.1 Text-To-Image Generation<a class="headerlink" href="#text-to-image-generation" title="Permalink to this heading">#</a></h3>
-<p>3가지 데이터셋에 대한 평가를 진행했다. (LAION-5B, LAION-Aesthetics-6+(12M),  LAION-Aesthetics-6.5+(650k)) 앞서말한것처럼 하나의 Resolution이 아닌 512x512, 768x768의 high resolution을 생성했다. 512 size는 <span class="math notranslate nohighlight">\(\epsilon\)</span>-prediction, 768 size는 <span class="math notranslate nohighlight">\(v\)</span>-prediction을 사용했고 ODE-Solver로는 DDIM을 사용했다. 앞서말한것처럼 SKIPPING-STEP은 20의 값을 가진다.</p>
-<figure class="align-default" id="id5">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/ldm_04.png"><img alt="ldm_04" class="bg-primary mb-1" src="../../_images/ldm_04.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 509 </span><span class="caption-text">Quantitative results at 512 x 512 &amp; 768 x 768 resolution.</span><a class="headerlink" href="#id5" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<figure class="align-default" id="id6">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/ldm_05.png"><img alt="ldm_05" class="bg-primary mb-1" src="../../_images/ldm_05.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 510 </span><span class="caption-text">Qualitative results on LAION-Aesthetic-6.5+ Dataset. (2,4 steps)</span><a class="headerlink" href="#id6" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>DDIM, DPM-Solver, DPM-Solver++, Guided-Distill 4가지 모델에 대해 LCM과 성능비교를 했는데 이때 Guided-Distill은 오픈소스 코드가 없기 때문에 논문의 내용과 동일하게 Implementation 해서 성능을 비교하였다. LCM은 같은 메모리 Cost 대비 더 빠르게 수렴하고 더 좋은 품질의 영상을 생성하였다. 특히 Guided-Distill은 2 stage Distillation이지만 LCM은 <strong>1 Stage</strong>만 사용해도 이같은 성능을 보여줬다.</p>
-</section>
-<section id="abulation-study">
-<h3>4.2 Abulation Study<a class="headerlink" href="#abulation-study" title="Permalink to this heading">#</a></h3>
-<section id="ode-solvers-skipping-step-schedule">
-<h4>ODE Solvers &amp; Skipping-Step Schedule<a class="headerlink" href="#ode-solvers-skipping-step-schedule" title="Permalink to this heading">#</a></h4>
-<p>augmented PF-ODE를 푸는 solver들(DDIM, DPM, DPM++)을 LCM에 사용할 때 성능 비교와 SKIPPING-STEP schedule의 <span class="math notranslate nohighlight">\(k\)</span>값에 따른 성능 변화를 비교하였다. 모든 모델은 2,000 iteration에서의 4-step inference로 고정해서 비교했다.</p>
-<figure class="align-default" id="id7">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/ldm_06.png"><img alt="ldm_06" class="bg-primary mb-1" src="../../_images/ldm_06.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 511 </span><span class="caption-text">Different ODE solvers and skipping step k.</span><a class="headerlink" href="#id7" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>Skipping step의 경우 <span class="math notranslate nohighlight">\(k\)</span> 값을 올렸을 때 훨씬더 빠르게 수렴하며 때때로 더 좋은 FID 값을 보여주었다. 또한 DPM과 DPM++은 <span class="math notranslate nohighlight">\(k\)</span>가 50일 때 DDIM보다 더 좋은 성능을 보였다. 이는 <strong><span class="math notranslate nohighlight">\(k\)</span> 값이 클수록 더 큰 ODE approximation error를 가지는 DDIM에 비해 오차가 적기 때문</strong>이다.</p>
-<p><span class="math notranslate nohighlight">\(k=20\)</span>일 때, 3가지 모델 모두 좋은 성능이 보였다.</p>
-</section>
-<section id="the-effect-of-guidance-scale-omega">
-<h4>The Effect of Guidance Scale <span class="math notranslate nohighlight">\(\omega\)</span><a class="headerlink" href="#the-effect-of-guidance-scale-omega" title="Permalink to this heading">#</a></h4>
-<p>일반적으로 <span class="math notranslate nohighlight">\(\omega\)</span>값이 클수록 CLIP score 같은 품질의 지표는 좋아지지만 작을수록 다양성이 떨어져 FID Score가 떨어진다. 즉 <span class="math notranslate nohighlight">\(\omega\)</span>의 크기는 <strong>Quality와 Diversity에 대한 trade-off가 있다.</strong></p>
-<figure class="align-default" id="id8">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/ldm_07.png"><img alt="ldm_07" class="bg-primary mb-1" src="../../_images/ldm_07.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 512 </span><span class="caption-text">Different classifier-free guidance scales <span class="math notranslate nohighlight">\(\omega\)</span>.</span><a class="headerlink" href="#id8" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>그래프를 보면 2~8 step inference는 성능에 큰 차이를 가지지는 않는것으로 확인된다. 그러나 <strong>1 step inference는 아직 개선의 여지가 있는것</strong>을 확인할 수 있다.</p>
-<figure class="align-default" id="id9">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/ldm_08.png"><img alt="ldm_08" class="bg-primary mb-1" src="../../_images/ldm_08.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 513 </span><span class="caption-text">Different classifier-free guidance scales <span class="math notranslate nohighlight">\(\omega\)</span>.</span><a class="headerlink" href="#id9" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p><span class="math notranslate nohighlight">\(\omega\)</span>에 따른 실제 생성 이미지를 비교해 봤을 때 생성 영상의 Quality 차이가 확연하게 들어난다. 즉 Distillation 시에도 CFG를 적용하는 것이 성능을 크게 개선할 수 있다는 것을 증명한다.</p>
-</section>
-</section>
-<section id="downstream-consistency-fine-tuning-results">
-<h3>4.3 Downstream Consistency Fine-tuning Results<a class="headerlink" href="#downstream-consistency-fine-tuning-results" title="Permalink to this heading">#</a></h3>
-<p>포켓몬 데이터셋과 심슨 데이터셋에 LCF를 적용했을 때를 비교하였다. 90%는 학습 데이터로, 10%는 검증 데이터로 사용했다. 완벽하진 않지만 Custom Dataset의 style을 잘 catch한 모습을 보여준다.</p>
-<figure class="align-default" id="id10">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/ldm_09.png"><img alt="ldm_09" class="bg-primary mb-1" src="../../_images/ldm_09.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 514 </span><span class="caption-text">Latent Consistency Fine-tuning(LCF) on two customized dataset.. <span class="math notranslate nohighlight">\(\omega\)</span>.</span><a class="headerlink" href="#id10" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</section>
-</section>
-</section>
-<section class="tex2jax_ignore mathjax_ignore" id="conclusion">
-<h1>Conclusion<a class="headerlink" href="#conclusion" title="Permalink to this heading">#</a></h1>
-<p>LCM은 Consistency Distillation을 Latent 상에 적용하여 <strong>고화질의 영상을 매우 적은 time step으로 inference 할 수 있도록 한 모델</strong>이다. 즉 성능 좋고 고해상도의 영상을 few-step으로 가능하게 만들었다. 특히 Custom Dataset에도 Distillation을 적용했을 때 적은 time step으로도 어느정도의 style을 간단하게 학습하는 결과를 보여주었다.</p>
-</section>
-
-    <script type="text/x-thebe-config">
-    {
-        requestKernel: true,
-        binderOptions: {
-            repo: "binder-examples/jupyter-stacks-datascience",
-            ref: "master",
-        },
-        codeMirrorConfig: {
-            theme: "abcdef",
-            mode: "python"
-        },
-        kernelOptions: {
-            name: "python3",
-            path: "./docs/review"
-        },
-        predefinedOutput: true
-    }
-    </script>
-    <script>kernelName = 'python3'</script>
-
-                </article>
-              
-
-              
-              
-              
-              
-                <footer class="prev-next-footer d-print-none">
-                  
-<div class="prev-next-area">
-    <a class="left-prev"
-       href="consistency_models.html"
-       title="previous page">
-      <i class="fa-solid fa-angle-left"></i>
-      <div class="prev-next-info">
-        <p class="prev-next-subtitle">previous</p>
-        <p class="prev-next-title">Consistency Models</p>
-      </div>
-    </a>
-    <a class="right-next"
-       href="LLM_grounded_Diffusion.html"
-       title="next page">
-      <div class="prev-next-info">
-        <p class="prev-next-subtitle">next</p>
-        <p class="prev-next-title">LLM Grounded Diffusion</p>
-      </div>
-      <i class="fa-solid fa-angle-right"></i>
-    </a>
-</div>
-                </footer>
-              
-            </div>
-            
-            
-              
-                <div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
-
-
-  <div class="sidebar-secondary-item">
-  <div class="page-toc tocsection onthispage">
-    <i class="fa-solid fa-list"></i> Contents
-  </div>
-  <nav class="bd-toc-nav page-toc">
-    <ul class="visible nav section-nav flex-column">
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#">Latent Consistency Models</a><ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">1. Introduction</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#preliminaries">2. Preliminaries</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#diffusion-models">Diffusion Models</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#consistency-models">Consistency Models</a></li>
-</ul>
-</li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#id1">3. Latent Consistency Models</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#consistency-distillation-in-the-latent-space">3.1 Consistency Distillation in the Latent Space</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#one-stage-guided-distillation-by-solving-augmented-pf-ode">3.2 One-Stage Guided Distillation by solving augmented PF-ODE</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#accelerating-distillation-with-skipping-time-steps">3.3 Accelerating Distillation with Skipping Time Steps</a></li>
-</ul>
-</li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#latent-consistency-fine-tuning-for-customized-dataset">3.4 Latent Consistency Fine-tuning for customized dataset</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#experiments">4. Experiments</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#text-to-image-generation">4.1 Text-To-Image Generation</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#abulation-study">4.2 Abulation Study</a><ul class="nav section-nav flex-column">
-<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#ode-solvers-skipping-step-schedule">ODE Solvers &amp; Skipping-Step Schedule</a></li>
-<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#the-effect-of-guidance-scale-omega">The Effect of Guidance Scale <span class="math notranslate nohighlight">\(\omega\)</span></a></li>
-</ul>
-</li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#downstream-consistency-fine-tuning-results">4.3 Downstream Consistency Fine-tuning Results</a></li>
-</ul>
-</li>
-</ul>
-</li>
-<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#conclusion">Conclusion</a></li>
-</ul>
-
-  </nav></div>
-
-</div></div>
-              
-            
-          </div>
-          <footer class="bd-footer-content">
-            
-<div class="bd-footer-content__inner container">
-  
-  <div class="footer-item">
-    
-<p class="component-author">
-By PseudoLab
-</p>
-
-  </div>
-  
-  <div class="footer-item">
-    
-
-  <p class="copyright">
-    
-      © Copyright 2022.
-      <br/>
-    
-  </p>
-
-  </div>
-  
-  <div class="footer-item">
-    
-  </div>
-  
-  <div class="footer-item">
-    
-  </div>
-  
-</div>
-          </footer>
-        
-
-      </main>
-    </div>
-  </div>
-  
-  <!-- Scripts loaded after <body> so the DOM is not blocked -->
-  <script src="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b"></script>
-<script src="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b"></script>
-
-  <footer class="bd-footer">
-  </footer>
-  </body>
+
+
+<!DOCTYPE html>
+
+
+<html lang="en" data-content_root="" >
+
+  <head>
+    <meta charset="utf-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.17.1: http://docutils.sourceforge.net/" />
+
+    <title>Latent Consistency Models &#8212; Text-to-Image Generation-feat-Diffusion</title>
+  
+  
+  
+  <script data-cfasync="false">
+    document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
+    document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
+  </script>
+  
+  <!-- Loaded before other Sphinx assets -->
+  <link href="../../_static/styles/theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+<link href="../../_static/styles/bootstrap.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+<link href="../../_static/styles/pydata-sphinx-theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+
+  
+  <link href="../../_static/vendor/fontawesome/6.5.2/css/all.min.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+  <link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.woff2" />
+<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.woff2" />
+<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.woff2" />
+
+    <link rel="stylesheet" type="text/css" href="../../_static/pygments.css" />
+    <link rel="stylesheet" href="../../_static/styles/sphinx-book-theme.css?digest=14f4ca6b54d191a8c7657f6c759bf11a5fb86285" type="text/css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/togglebutton.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/mystnb.4510f1fc1dee50b3e5859aac5469c37c29e427902b24a333a5f9fcb2f0b3ac41.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/sphinx-thebe.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/design-style.4045f2051d55cab465a707391d5b2007.min.css" />
+  
+  <!-- Pre-loaded scripts that we'll load fully later -->
+  <link rel="preload" as="script" href="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b" />
+<link rel="preload" as="script" href="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b" />
+  <script src="../../_static/vendor/fontawesome/6.5.2/js/all.min.js?digest=dfe6caa3a7d634c4db9b"></script>
+
+    <script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
+    <script src="../../_static/jquery.js"></script>
+    <script src="../../_static/underscore.js"></script>
+    <script src="../../_static/_sphinx_javascript_frameworks_compat.js"></script>
+    <script src="../../_static/doctools.js"></script>
+    <script src="../../_static/clipboard.min.js"></script>
+    <script src="../../_static/copybutton.js"></script>
+    <script src="../../_static/scripts/sphinx-book-theme.js?digest=5a5c038af52cf7bc1a1ec88eea08e6366ee68824"></script>
+    <script>let toggleHintShow = 'Click to show';</script>
+    <script>let toggleHintHide = 'Click to hide';</script>
+    <script>let toggleOpenOnPrint = 'true';</script>
+    <script src="../../_static/togglebutton.js"></script>
+    <script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
+    <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
+    <script src="../../_static/design-tabs.js"></script>
+    <script>const THEBE_JS_URL = "https://unpkg.com/thebe@0.8.2/lib/index.js"
+const thebe_selector = ".thebe,.cell"
+const thebe_selector_input = "pre"
+const thebe_selector_output = ".output, .cell_output"
+</script>
+    <script async="async" src="../../_static/sphinx-thebe.js"></script>
+    <script>window.MathJax = {"options": {"processHtmlClass": "tex2jax_process|mathjax_process|math|output_area"}}</script>
+    <script defer="defer" src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
+    <script>DOCUMENTATION_OPTIONS.pagename = 'docs/review/latent_consistency_models';</script>
+    <link rel="shortcut icon" href="../../_static/PseudoLab_logo.png"/>
+    <link rel="index" title="Index" href="../../genindex.html" />
+    <link rel="search" title="Search" href="../../search.html" />
+    <link rel="next" title="LLM Grounded Diffusion" href="LLM_grounded_Diffusion.html" />
+    <link rel="prev" title="Consistency Models" href="consistency_models.html" />
+  <meta name="viewport" content="width=device-width, initial-scale=1"/>
+  <meta name="docsearch:language" content="en"/>
+  </head>
+  
+  
+  <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
+
+  
+  
+  <div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
+  
+  <div id="pst-scroll-pixel-helper"></div>
+  
+  <button type="button" class="btn rounded-pill" id="pst-back-to-top">
+    <i class="fa-solid fa-arrow-up"></i>Back to top</button>
+
+  
+  <input type="checkbox"
+          class="sidebar-toggle"
+          id="pst-primary-sidebar-checkbox"/>
+  <label class="overlay overlay-primary" for="pst-primary-sidebar-checkbox"></label>
+  
+  <input type="checkbox"
+          class="sidebar-toggle"
+          id="pst-secondary-sidebar-checkbox"/>
+  <label class="overlay overlay-secondary" for="pst-secondary-sidebar-checkbox"></label>
+  
+  <div class="search-button__wrapper">
+    <div class="search-button__overlay"></div>
+    <div class="search-button__search-container">
+<form class="bd-search d-flex align-items-center"
+      action="../../search.html"
+      method="get">
+  <i class="fa-solid fa-magnifying-glass"></i>
+  <input type="search"
+         class="form-control"
+         name="q"
+         id="search-input"
+         placeholder="Search this book..."
+         aria-label="Search this book..."
+         autocomplete="off"
+         autocorrect="off"
+         autocapitalize="off"
+         spellcheck="false"/>
+  <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
+</form></div>
+  </div>
+
+  <div class="pst-async-banner-revealer d-none">
+  <aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
+</div>
+
+  
+    <header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
+    </header>
+  
+
+  <div class="bd-container">
+    <div class="bd-container__inner bd-page-width">
+      
+      
+      
+      <div class="bd-sidebar-primary bd-sidebar">
+        
+
+  
+  <div class="sidebar-header-items sidebar-primary__section">
+    
+    
+    
+    
+  </div>
+  
+    <div class="sidebar-primary-items__start sidebar-primary__section">
+        <div class="sidebar-primary-item">
+
+  
+    
+  
+
+<a class="navbar-brand logo" href="../../intro.html">
+  
+  
+  
+  
+  
+    
+    
+      
+    
+    
+    <img src="../../_static/PseudoLab_logo.png" class="logo__image only-light" alt="Text-to-Image Generation-feat-Diffusion - Home"/>
+    <script>document.write(`<img src="../../_static/PseudoLab_logo.png" class="logo__image only-dark" alt="Text-to-Image Generation-feat-Diffusion - Home"/>`);</script>
+  
+  
+</a></div>
+        <div class="sidebar-primary-item"><nav class="bd-links" id="bd-docs-nav" aria-label="Main">
+    <div class="bd-toc-item navbar-nav active">
+        
+        <ul class="nav bd-sidenav bd-sidenav__home-link">
+            <li class="toctree-l1">
+                <a class="reference internal" href="../../intro.html">
+                    Welcome to PseudoDiffusers!!
+                </a>
+            </li>
+        </ul>
+        <p aria-level="2" class="caption" role="heading"><span class="caption-text">Preliminary Works</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="vae.html">VAE</a></li>
+<li class="toctree-l1"><a class="reference internal" href="gan.html">GAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DDPM.html">DDPM</a></li>
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DDIM.html">DDIM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="A_Study_on_the_Evaluation_of_Generative_Models.html">A Study on the Evaluation of Generative Models</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Image Generation</span></p>
+<ul class="current nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="cycleGAN.html">CycleGAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="StyleGAN.html">StyleGAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="diffusion_beats_GANs.html">Diffusion Models Beat GANs on Image Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="dalle.html">DALL-E</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DALLE2.html">DALL-E 2</a></li>
+<li class="toctree-l1"><a class="reference internal" href="dreambooth.html">DreamBooth</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ControlNet.html">ControlNet</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Latent_Diffusion_Model.html">Introduction</a></li>
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Textual_Inversion.html">Textual Inversion</a></li>
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="CustomDiffusion.html">Custom Diffusion</a></li>
+<li class="toctree-l1"><a class="reference internal" href="LoRA.html">LoRA</a></li>
+
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="I-DDPM.html">I-DDPM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="StyO.html">StyO</a></li>
+<li class="toctree-l1"><a class="reference internal" href="imagen.html">Imagen</a></li>
+<li class="toctree-l1"><a class="reference internal" href="imagen_editor.html">Imagen Editor</a></li>
+<li class="toctree-l1"><a class="reference internal" href="SDEdit.html">SDEdit</a></li>
+<li class="toctree-l1"><a class="reference internal" href="SDXL.html">SDXL</a></li>
+<li class="toctree-l1"><a class="reference internal" href="t2i_adapter.html">T2I-Adapter</a></li>
+<li class="toctree-l1"><a class="reference internal" href="IP_Adapter.html">IP-Adapter</a></li>
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="HyperDreamBooth.html">HyperDreamBooth</a></li>
+<li class="toctree-l1"><a class="reference internal" href="CM3leon.html">CM3leon</a></li>
+
+<li class="toctree-l1"><a class="reference internal" href="Synthetic_Data_from_Diffusion_Models_Improves_ImageNet_Classification.html">Synthetic Data from Diffusion Models Improves ImageNet Classification</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="GLIDE.html">GLIDE</a></li>
+<li class="toctree-l1"><a class="reference internal" href="BBDM.html">BBDM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Your_Diffusion_Model_is_Secretly_a_Zero_Shot_Classifier.html">Your Diffusion Model is Secretly a Zero-Shot Classifier</a></li>
+<li class="toctree-l1"><a class="reference internal" href="progressive_distillation.html">Progressive Distillation for Fast Sampling of Diffusion Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ConceptLab.html">ConceptLab</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Diffusion_models_already_have_a_Semantic_Latent_Space.html">Diffusion Models already have a Semantic Latent Space</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Muse.html">Muse</a></li>
+
+
+<li class="toctree-l1"><a class="reference internal" href="GIGAGAN.html">Scaling up GANs for Text-to-Image Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="consistency_models.html">Consistency Models</a></li>
+<li class="toctree-l1 current active"><a class="current reference internal" href="#">Latent Consistency Models</a></li>
+
+<li class="toctree-l1"><a class="reference internal" href="LLM_grounded_Diffusion.html">LLM Grounded Diffusion</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DiT.html">DiT</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="one-step-image-translation.html">One-Step Image Translation with Text-to-Image Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="LCM-LoRA.html">LCM-LoRA: A Universal Stable-Diffusion Acceleration Module</a></li>
+
+
+<li class="toctree-l1"><a class="reference internal" href="MimicBrush.html">MimicBrush: Zero-shot Image Editing with Reference Imitation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="one_step_diffusion_with_distribution_matching_distillation.html">One-step Diffusion with Distribution Matching Distillation</a></li>
+
+
+
+
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Video Generation</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="Make_A_Video.html">Make A Video</a></li>
+<li class="toctree-l1"><a class="reference internal" href="VideoLDM.html">VideoLDM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="AnimateDiff.html">AnimateDiff</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Animate_Anyone.html">Animate Anyone</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreaMoving.html">DreaMoving</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreamPose.html">DreamPose: Fashion Image-to-Video Synthesis via Stable Diffusion</a></li>
+
+
+
+
+
+
+
+
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">3D Generation</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="NeRF.html">NeRF : Representing Scenes as Neural Radiance Fields for View Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="3DGS.html">3D Gaussian Splatting for Real-Time Radiance Field Rendering</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Point_E.html">Point-E: A System for Generating 3D Point Clouds from Complex Prompts (Arxiv 2022)</a></li>
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Shap-E.html">Shap-E</a></li>
+
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DreamFusion.html"><strong>DreamFusion</strong></a></li>
+<li class="toctree-l1"><a class="reference internal" href="magic-3d.html">Magic3D</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreamBooth3D.html">Dream Booth 3D</a></li>
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="zero123plus.html">Zero123++</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ProlificDreamer.html">ProlificDreamer: High-Fidelity and Diverse Text-to-3D Generation with Variational Score Distillation</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DreamGaussian.html">DreamGaussian</a></li>
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Coin3D.html">Coin3D</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Experiments</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="../experiments/js_exp.html">Synthetic Data with Stable Diffusion for Foliar Disease Classification</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../experiments/swjo_exp.html">Training DreamBooth on Naver Webtoon Face Dataset</a></li>
+</ul>
+
+    </div>
+</nav></div>
+    </div>
+  
+  
+  <div class="sidebar-primary-items__end sidebar-primary__section">
+  </div>
+  
+  <div id="rtd-footer-container"></div>
+
+
+      </div>
+      
+      <main id="main-content" class="bd-main" role="main">
+        
+        
+
+<div class="sbt-scroll-pixel-helper"></div>
+
+          <div class="bd-content">
+            <div class="bd-article-container">
+              
+              <div class="bd-header-article d-print-none">
+<div class="header-article-items header-article__inner">
+  
+    <div class="header-article-items__start">
+      
+        <div class="header-article-item"><label class="sidebar-toggle primary-toggle btn btn-sm" for="__primary" title="Toggle primary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
+  <span class="fa-solid fa-bars"></span>
+</label></div>
+      
+    </div>
+  
+  
+    <div class="header-article-items__end">
+      
+        <div class="header-article-item">
+
+<div class="article-header-buttons">
+
+
+
+
+
+<div class="dropdown dropdown-source-buttons">
+  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Source repositories">
+    <i class="fab fa-github"></i>
+  </button>
+  <ul class="dropdown-menu">
+      
+      
+      
+      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion" target="_blank"
+   class="btn btn-sm btn-source-repository-button dropdown-item"
+   title="Source repository"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fab fa-github"></i>
+  </span>
+<span class="btn__text-container">Repository</span>
+</a>
+</li>
+      
+      
+      
+      
+      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion/issues/new?title=Issue%20on%20page%20%2Fdocs/review/latent_consistency_models.html&body=Your%20issue%20content%20here." target="_blank"
+   class="btn btn-sm btn-source-issues-button dropdown-item"
+   title="Open an issue"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-lightbulb"></i>
+  </span>
+<span class="btn__text-container">Open issue</span>
+</a>
+</li>
+      
+  </ul>
+</div>
+
+
+
+
+
+
+<div class="dropdown dropdown-download-buttons">
+  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Download this page">
+    <i class="fas fa-download"></i>
+  </button>
+  <ul class="dropdown-menu">
+      
+      
+      
+      <li><a href="../../_sources/docs/review/latent_consistency_models.md" target="_blank"
+   class="btn btn-sm btn-download-source-button dropdown-item"
+   title="Download source file"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-file"></i>
+  </span>
+<span class="btn__text-container">.md</span>
+</a>
+</li>
+      
+      
+      
+      
+      <li>
+<button onclick="window.print()"
+  class="btn btn-sm btn-download-pdf-button dropdown-item"
+  title="Print to PDF"
+  data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-file-pdf"></i>
+  </span>
+<span class="btn__text-container">.pdf</span>
+</button>
+</li>
+      
+  </ul>
+</div>
+
+
+
+
+<button onclick="toggleFullScreen()"
+  class="btn btn-sm btn-fullscreen-button"
+  title="Fullscreen mode"
+  data-bs-placement="bottom" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-expand"></i>
+  </span>
+
+</button>
+
+
+
+<script>
+document.write(`
+  <button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light"></i>
+    <i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark"></i>
+    <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"></i>
+  </button>
+`);
+</script>
+
+
+<script>
+document.write(`
+  <button class="btn btn-sm pst-navbar-icon search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <i class="fa-solid fa-magnifying-glass fa-lg"></i>
+  </button>
+`);
+</script>
+<label class="sidebar-toggle secondary-toggle btn btn-sm" for="__secondary"title="Toggle secondary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <span class="fa-solid fa-list"></span>
+</label>
+</div></div>
+      
+    </div>
+  
+</div>
+</div>
+              
+              
+
+<div id="jb-print-docs-body" class="onlyprint">
+    <h1>Latent Consistency Models</h1>
+    <!-- Table of contents -->
+    <div id="print-main-content">
+        <div id="jb-print-toc">
+            
+            <div>
+                <h2> Contents </h2>
+            </div>
+            <nav aria-label="Page">
+                <ul class="visible nav section-nav flex-column">
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#">Latent Consistency Models</a><ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">1. Introduction</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#preliminaries">2. Preliminaries</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#diffusion-models">Diffusion Models</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#consistency-models">Consistency Models</a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#id1">3. Latent Consistency Models</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#consistency-distillation-in-the-latent-space">3.1 Consistency Distillation in the Latent Space</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#one-stage-guided-distillation-by-solving-augmented-pf-ode">3.2 One-Stage Guided Distillation by solving augmented PF-ODE</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#accelerating-distillation-with-skipping-time-steps">3.3 Accelerating Distillation with Skipping Time Steps</a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#latent-consistency-fine-tuning-for-customized-dataset">3.4 Latent Consistency Fine-tuning for customized dataset</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#experiments">4. Experiments</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#text-to-image-generation">4.1 Text-To-Image Generation</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#abulation-study">4.2 Abulation Study</a><ul class="nav section-nav flex-column">
+<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#ode-solvers-skipping-step-schedule">ODE Solvers &amp; Skipping-Step Schedule</a></li>
+<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#the-effect-of-guidance-scale-omega">The Effect of Guidance Scale <span class="math notranslate nohighlight">\(\omega\)</span></a></li>
+</ul>
+</li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#downstream-consistency-fine-tuning-results">4.3 Downstream Consistency Fine-tuning Results</a></li>
+</ul>
+</li>
+</ul>
+</li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#conclusion">Conclusion</a></li>
+</ul>
+
+            </nav>
+        </div>
+    </div>
+</div>
+
+              
+                
+<div id="searchbox"></div>
+                <article class="bd-article">
+                  
+  <div class="admonition-information admonition">
+<p class="admonition-title">Information</p>
+<ul class="simple">
+<li><p><strong>Title:</strong> Latent Consistency Models: Synthesizing High-Resolution Images with Few-Step Inference</p></li>
+<li><p><strong>Reference</strong></p>
+<ul>
+<li><p>Paper: <a class="reference external" href="https://arxiv.org/pdf/2310.04378">https://arxiv.org/pdf/2310.04378</a></p></li>
+<li><p>Code: <a class="github reference external" href="https://github.com/luosiallen/latent-consistency-model">luosiallen/latent-consistency-model</a></p></li>
+<li><p>Project Page: <a class="reference external" href="https://latent-consistency-models.github.io/">https://latent-consistency-models.github.io/</a></p></li>
+</ul>
+</li>
+<li><p><strong>Author:</strong> Donghyun Han</p></li>
+<li><p><strong>Last updated on May. 1, 2024</strong></p></li>
+</ul>
+</div>
+<section class="tex2jax_ignore mathjax_ignore" id="latent-consistency-models">
+<h1>Latent Consistency Models<a class="headerlink" href="#latent-consistency-models" title="Permalink to this heading">#</a></h1>
+<section id="introduction">
+<h2>1. Introduction<a class="headerlink" href="#introduction" title="Permalink to this heading">#</a></h2>
+<p>Diffusion model은 다양한 분야에서 주목할만한 성과를 거두었지만  매우 느린 sampling 속도를 가지기 때문에 실시간 사용이 불가능하다. 이 같은 단점을 극복하기 위해 sampling 속도를 향상시키는 다양한 accelerating 방법이 제안되었다:</p>
+<p> </p>
+<ol class="arabic simple">
+<li><p>ODE solver의 성능개선을 통해 10~20 step만으로도 좋은 성능을 가지는 방법.</p>
+<ul class="simple">
+<li><p>DPM-Solver (<a class="reference external" href="https://arxiv.org/pdf/2206.00927">lu et al.</a>)</p></li>
+</ul>
+</li>
+<li><p>사전 학습된 Diffusion model을 몇 step만으로도 추론할수 있도록 <strong>distillation</strong>하는 방법.</p>
+<ul class="simple">
+<li><p>PD (Progressive Distillation). → 2 stage (<a class="reference external" href="https://arxiv.org/pdf/2202.00512">Salimans et al.</a>)</p></li>
+<li><p>On Distillation of Guided Diffusion Models. (<a class="reference external" href="https://arxiv.org/pdf/2210.03142">Meng et al.</a>)</p></li>
+<li><p>Consistency Models (<a class="reference external" href="https://arxiv.org/pdf/2303.01469">Song et al.</a>)</p></li>
+</ul>
+</li>
+</ol>
+<p>이중 특히 Consistency Models은 ODE-trajectory에 대한 일관성을 갖도록 하는 모델로서, single step만으로도 이미지를 생성할 수 있기 때문에 반복적인 계산이 필요하지 않다. 그러나 이 모델 또한 2가지의 단점을 가지고 있다:</p>
+<p> </p>
+<ol class="arabic simple">
+<li><p>Pixel space의 Flow-based Model이기 때문에 <strong>high-resolution 이미지 생성</strong>에 적합하지 않음.</p></li>
+<li><p>Conditional(Classifer-free Guidance)한 이미지 생성을 고려하지 않아 <strong>text2img</strong>에 적합하지 않음.</p></li>
+</ol>
+<hr class="docutils" />
+<p>본 논문의 제안점은 다음 3가지다:</p>
+<p> </p>
+<ul class="simple">
+<li><p>빠르고 high-resolution 이미지를 생성하기 위한 Latent Consistency Models(LCMs)를 제안한다. LCMs은 영상의 latent space에 Consistency Models 개념을 적용해 매우 적은 step 만으로도 <strong>고품질의 이미지</strong>를 생성할 수 있다.</p></li>
+<li><p>guided consistency distillation을 통해 Stable Diffusion을 매우 적은 step(1~4)으로 sampling 할 수 있는 방법을 제공한다. <strong>Skipping-Step</strong>이라는 테크닉을 통해 학습을 가속화 한다. 2, 4 step Model의 경우 학습에 A100 GPU 32시간 밖에 걸리지 않으며 LAION-5B-Aesthetics dataset에서 SOTA의 성능을 달성했다.</p></li>
+<li><p>LCMs에 대한 새로운 fine-tuning 방식인 Latent Consistency Fine-tuning을 통해 <strong>빠른 추론 속도를 유지하면서도 Custom Dataset에 효율적으로 적용</strong>할 수 있다.</p></li>
+</ul>
+<p> </p>
+<figure class="align-default" id="id2">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/ldm_01.png"><img alt="ldm_01" class="bg-primary mb-1" src="../../_images/ldm_01.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 506 </span><span class="caption-text">768x768 Resolution image in 1~4 steps.</span><a class="headerlink" href="#id2" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</section>
+<section id="preliminaries">
+<h2>2. Preliminaries<a class="headerlink" href="#preliminaries" title="Permalink to this heading">#</a></h2>
+<section id="diffusion-models">
+<h3>Diffusion Models<a class="headerlink" href="#diffusion-models" title="Permalink to this heading">#</a></h3>
+<p>Diffusion Models 혹은 Score-based Models는 데이터에 점진적으로 Gaussian noise를 주입하고 reverse denoise process로 noise를 제거하여 데이터를 sampling하는 기법이다. 반면 forwad process는 원본 데이터 분포인 <span class="math notranslate nohighlight">\(p_{data}(x)\)</span>를 주변 확률분포인 <span class="math notranslate nohighlight">\(q_{t}(x_{t})\)</span>로 변환한다:</p>
+<div class="math notranslate nohighlight">
+\[
+q_{0t}(x_{t}|x_{0})=\mathcal{N}(x_t|\alpha(t)x_0,\sigma^2(t)I)
+\]</div>
+<p>여기서 <span class="math notranslate nohighlight">\(\alpha(t)\)</span>와 <span class="math notranslate nohighlight">\(\sigma(t)\)</span>는 noise scheduler를 의미한다. 연속적인 timestep의 관점에서 이를 확률미분방정식(Stochastic Differential Equation, SDE)으로 나타낼 수 있는데, 다음과 같다:</p>
+<div class="math notranslate nohighlight">
+\[
+f(t)=\frac{d\log{\alpha(t)}}{dt}, g^2(t)=\frac{d\sigma^2(t)}{dt}-2\frac{d\log{\alpha(t)}}{dt}\sigma^2(t). \tag{1}
+\]</div>
+<p>또한 주변 확률분포 <span class="math notranslate nohighlight">\(q_t(x)\)</span>는 **Ptobability Flow ODE(PF-ODE)**라는 상미분방정식(Ordinary Differential Equation, ODE)을 만족하는데  다음과 같다:</p>
+<div class="math notranslate nohighlight">
+\[
+\frac{dx_t}{dt}=f(x)x_t-\frac{1}{2}g^2(t)\nabla_x\log{q_t(x_t)}, \ x_T \sim q_T(x_T). \tag{2}
+\]</div>
+<p>이때 Diffusion model은 <span class="math notranslate nohighlight">\(-\nabla\log{q_t(x_t)}\)</span>(score function)를 예측하는 noise 예측 모델(<span class="math notranslate nohighlight">\(\epsilon_\theta(x_t,t)\)</span>)을 학습시킨다. 학습된 모델은 score function의 근사치를 예측하고 sampling하는데 이를 empirical PF-ODE라 한다 (경험적 PF-ODE):</p>
+<div class="math notranslate nohighlight">
+\[
+\frac{dx_t}{dt}=f(t)x_t+\frac{g^2(t)}{2\sigma_t}\epsilon_\theta(x_t,t), \ x_T \sim \mathcal{N}(0, \tilde{\sigma}^2I). \tag{3}
+\]</div>
+<p>Classifier-Free Guidance (CFG)는 sampling의 퀄리티를 높이기 위해 GLIDE, Stable Diffusion, DALL<span class="math notranslate nohighlight">\(\cdot\)</span>E2, Imagen 등 다양한 conditional model에서 사용되었다. CFG의 scale <span class="math notranslate nohighlight">\(\omega\)</span>가 주어졌을 때 원본 noise prediction은 conditional, unconditional noise prediction을 선형적으로 혼합하여 대체된다:</p>
+<div class="math notranslate nohighlight">
+\[
+\tilde{\epsilon}_\theta(z_t,\omega, c,t)=(1+\omega)\epsilon_\theta(z_t, c,t)-\omega\epsilon_\theta(z, \emptyset, t).
+\]</div>
+<p> </p>
+</section>
+<section id="consistency-models">
+<h3>Consistency Models<a class="headerlink" href="#consistency-models" title="Permalink to this heading">#</a></h3>
+<p>Consistenct Model(CM)은 몇 step 혹은 한번의 step 만으로 데이터를 생성할 수 있는 모델이다. CM의 핵심은 <strong>PF-ODE의 궤적에 어떤 point와 PF-ODE의 solution에 대해 mapping되는 function (<span class="math notranslate nohighlight">\(f:(x_t, t) \mapsto x_\epsilon\)</span>)을 추정</strong>하는 것이다.</p>
+<figure class="align-default" id="id3">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/ldm_02.png"><img alt="ldm_02" class="bg-primary mb-1" src="../../_images/ldm_02.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 507 </span><span class="caption-text">Consistency Models (CM).</span><a class="headerlink" href="#id3" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p><span class="math notranslate nohighlight">\(\epsilon\)</span>은 고정된 매우 작은 양수값을 가지며 CM의 function은 자기 자신에 대한 <strong>self-consistency</strong>를 만족해야한다. 즉 어떠한 time step에 대해서도 <span class="math notranslate nohighlight">\(x_\epsilon\)</span>을 sampling 할 수 있어야 한다.</p>
+<div class="math notranslate nohighlight">
+\[
+f(x_t,t)=f(x_{t'},t'), \forall t,t' \in [\epsilon, T]. \tag{4}
+\]</div>
+<p><span class="math notranslate nohighlight">\(f_\theta(x, \epsilon)=x\)</span>를 만족하는 모델 <span class="math notranslate nohighlight">\(f_\theta\)</span>는 다음과 같이 정리할 수 있다:</p>
+<div class="math notranslate nohighlight">
+\[
+f_\theta(x,t)=c_{skip}(t)x+c_{out}(t)F_\theta(x,t). \tag{5}
+\]</div>
+<p><span class="math notranslate nohighlight">\(c_{skip}(t)\)</span>와 <span class="math notranslate nohighlight">\(c_{out}(t)\)</span>는 미분 가능한 함수이며 <span class="math notranslate nohighlight">\(c_{skip}=1, c_{out}=0\)</span>이기 때문에 <span class="math notranslate nohighlight">\(f_\theta(x, \epsilon)=x\)</span>를 만족한다.  <span class="math notranslate nohighlight">\(\theta\)</span>는 학습 가능한 파라미터로 <span class="math notranslate nohighlight">\(F_\theta\)</span>는 심층 신경망을 의미한다.</p>
+<p> </p>
+<p>CM은 pre-trained 모델에 대한 Distillation 방식과 scratch부터 학습하는 방식이 있는데 주로 <strong>Distillation 방식</strong>을 사용한다. Distillation 방식은 parameter <span class="math notranslate nohighlight">\(\theta^-\)</span>가 <span class="math notranslate nohighlight">\(\theta\)</span>를 통해 학습하며 모델에 대한 self-consistency를 위해 다음과 같이 손실함수를 구성한다:</p>
+<div class="math notranslate nohighlight">
+\[
+\mathcal{L}(\theta,\theta^-;\Phi)=\mathbb{E}_{x,t}\bigg[d\bigg(f_\theta(x_{t_{n+1}, t_{n+1}}), f_{\theta^-}(\hat{x}^\phi_{t_n}, t_n)\bigg)\bigg]. \tag{6}
+\]</div>
+<p>이 때 <span class="math notranslate nohighlight">\(\theta^-\)</span>는 <span class="math notranslate nohighlight">\(\theta\)</span>에 대한 지수평균이동(Exponential Moving Average, EMA)이며 <span class="math notranslate nohighlight">\(\theta^-  \leftarrow \mu\theta^-+(1-\mu)\theta\)</span>이다. <span class="math notranslate nohighlight">\(d(\cdot, \cdot)\)</span>은 두 sample 사이의 거리를 측정하는 지표이다. <span class="math notranslate nohighlight">\(\hat{x}^{\phi}_{t_n}\)</span>은 <span class="math notranslate nohighlight">\(x_{t_{n+1}}\)</span>에 대한 <span class="math notranslate nohighlight">\(x_{t_n}\)</span>을 추정한 값으로 다음과 같다:</p>
+<div class="math notranslate nohighlight">
+\[
+\hat{x}^\phi_{t_n} \leftarrow x_{t_{n+1}}+(t_{n}-t_{n+1})\Phi(x_{t_{n+1}}, t_{n+1};\phi). \tag{7}
+\]</div>
+<p><span class="math notranslate nohighlight">\(\Phi\)</span>는 PF-ODE에 사용되는 ODE Solver로 <a class="reference external" href="https://en.wikipedia.org/wiki/Euler_method">Euler</a>나 <a class="reference external" href="https://en.wikipedia.org/wiki/Heun%27s_method">Heun</a> Method등의 수치적인 ODE solver를 사용할 수 있다. 즉 Consistency Distillation은 ODE Solver로 예측한 <span class="math notranslate nohighlight">\(\hat{x}^{\phi}_{t_n}\)</span>과 <span class="math notranslate nohighlight">\(x_{t_{n+1}}\)</span>을 입력으로 <span class="math notranslate nohighlight">\(f_{\theta^-}\)</span>와 <span class="math notranslate nohighlight">\(f_\theta\)</span>로 <strong>예측한 값의 Consistency를 비교하는 방식으로 Distillation을 수행</strong>한다.</p>
+</section>
+</section>
+<section id="id1">
+<h2>3. Latent Consistency Models<a class="headerlink" href="#id1" title="Permalink to this heading">#</a></h2>
+<p>CM의 한계:</p>
+<ul class="simple">
+<li><p>ImageNet 64x64, LSUN 256x256 영상에 대한 Generation만 수행</p>
+<ul>
+<li><p><strong>High Resolution</strong>의 잠재성이 아직 탐구되지 않았음.</p></li>
+<li><p><strong>Classifier-free Guidance(CFG)</strong> 등을 사용하지 않음.</p></li>
+</ul>
+</li>
+</ul>
+<p>Latent Consistency Models(LCMs)는 CM의 잠재력을 충분히 발휘하여 좀더 도전적인 task를 수행한다.</p>
+<section id="consistency-distillation-in-the-latent-space">
+<h3>3.1 Consistency Distillation in the Latent Space<a class="headerlink" href="#consistency-distillation-in-the-latent-space" title="Permalink to this heading">#</a></h3>
+<p>본 논문에서는 pre-trained 된 Stable Diffusion에 Consistency Distillation을 적용한 Latent Consistency Distillation (LCD)을 제안한다. LCMs는 LDM(SD)을 기반으로 설계되었기 때문에 <span class="math notranslate nohighlight">\(z=\varepsilon(x)\)</span>를 통해 <span class="math notranslate nohighlight">\(x\)</span>를 latent vector로 임베딩하고 <span class="math notranslate nohighlight">\(\hat{x}=\mathcal{D}(z)\)</span>를 통해 원본 영상으로 복원한다. latent space 상에서 연산이 이뤄지기 때문에 <strong>Computation Cost를 크게 줄일 수 있어</strong> high-resolution 영상을 laptop GPU에서 생성할 수도 있다.</p>
+<p>condition을 추가한 PF-ODE의 reverse process는 다음과 같이 정의된다:</p>
+<div class="math notranslate nohighlight">
+\[
+\frac{dz_t}{dt}=f(t)z_t+\frac{g^2(t)}{2\sigma_t}\epsilon_\theta(z_t,c,t), \ z_T\sim\mathcal{N}(0,\tilde{\sigma}^2I). \tag{8}
+\]</div>
+<p><span class="math notranslate nohighlight">\(z_t\)</span>는 t step의 image latents, <span class="math notranslate nohighlight">\(\epsilon_\theta(z_t,c,t)\)</span>는 noise 예측 모델, c는 text와 같은 conditional prompt를 의미한다. PF-ODE상에서 모든 t step에 대해 consistency function <span class="math notranslate nohighlight">\(f_\theta :(z_t,c,t) \mapsto z_0\)</span>이기 때문에 이를 수식으로 정리하자면 다음과 같이 나타낼 수 있다 (<span class="math notranslate nohighlight">\(\hat{\epsilon}_\theta\)</span>는 noise prediction model.):</p>
+<div class="math notranslate nohighlight">
+\[
+f_\theta(z,c,t)=c_{skip}(t)z+c_{out}(t)\bigg( \frac{z-\sigma_t\hat{\epsilon}_\theta(z,c,t)}{\alpha_t} \bigg). \ (\epsilon-Prediction) \tag{9}
+\]</div>
+<p>수식을 살펴보면 ddpm 등의 reparameterization trick인 <span class="math notranslate nohighlight">\(x_t := \sqrt{\bar{\alpha}_t}x_0 + \sqrt{1-\bar{\alpha}_t}\epsilon\)</span>을 변형하여 식에 대입한 것을 알 수 있음. (<span class="math notranslate nohighlight">\(x\)</span>→<span class="math notranslate nohighlight">\(z\)</span>로 치환)</p>
+<div class="math notranslate nohighlight">
+\[
+x_0 = \frac{x_t-\sqrt{1-\bar{\alpha}_t}\epsilon}{\sqrt{\bar{\alpha}_t}}, \ \hat{z}_0 = \frac{z_t-\sigma(t)\hat{\epsilon}_{\theta}(z,c,t)}{\alpha(t)}.
+\]</div>
+<p>CM과 마찬가지로 <span class="math notranslate nohighlight">\(c_{skip}(0)=1, c_{out}(0)=0\)</span>이고 <span class="math notranslate nohighlight">\(\hat{\epsilon}_{\theta}(z,c,t)\)</span>는 teacher diffusion model과 유사한 noise 예측 모델 parameter이다. <span class="math notranslate nohighlight">\(f_\theta\)</span>는 <span class="math notranslate nohighlight">\(\epsilon-Prediction\)</span> 외에도 <span class="math notranslate nohighlight">\(x-Prediction\)</span>이나 <span class="math notranslate nohighlight">\(v-Prediction\)</span>을 사용할 수도 있다. (<span class="math notranslate nohighlight">\(x-Prediction\)</span>은 DDPM, <span class="math notranslate nohighlight">\(v-prediction\)</span>은 PD에서 나온 개념)</p>
+<div class="math notranslate nohighlight">
+\[
+\mathcal{L_{CD}}(\theta,\theta^-;\psi)=\mathbb{E}_{z,c,n}\bigg[ d(f_\theta(z_{t_{n+1}},c,t_{n+1}), f_{\theta^-}(\hat{z}^\psi_{t_n},c,t_n)) \bigg]. \tag{10}
+\]</div>
+<p><span class="math notranslate nohighlight">\(\psi(z_t,t,x,c)\)</span>는 ODE solver이며 특정한 time step <span class="math notranslate nohighlight">\(t \sim s\)</span> 사이에 대한 Eq. 8의 우항을 근사한 값이다. ODE Solver이기 때문에 <strong>DDIM, DPM-Solver, DPM-Solver++ 등을 사용할 수 있다.</strong> 또한 <span class="math notranslate nohighlight">\(\psi\)</span>는 학습 및 Distillation시에만 사용한다. 이때 <span class="math notranslate nohighlight">\(t_n\)</span>은 EDM을 토대로 CM에서 나오는 값이다. 기존 timestep <span class="math notranslate nohighlight">\([t, T]\)</span>에 대한 하위 간격으로 <span class="math notranslate nohighlight">\(t_1=\epsilon&lt;t_2&lt;\cdots&lt;t_N=T\)</span>인 어떠한간격을 의미한다. <span class="math notranslate nohighlight">\(t_i\)</span>는 다음과 같이 나타낼 수 있다:</p>
+<div class="math notranslate nohighlight">
+\[
+t_i=(\epsilon^{1 / \rho} +\frac{i-1}{N-1}(T^{1 / \rho}-\epsilon^{1 / \rho}))^\rho, \rho=7
+\]</div>
+<p>Eq, 8을 <span class="math notranslate nohighlight">\(t_{n+1} \sim t_n\)</span>까지 t에 대해 적분 했을 때 다음과 같은 수식을 얻을 수 있다:</p>
+<div class="math notranslate nohighlight">
+\[
+\hat{z}_{t_n}^\psi-z_{t_{n+1}}=\int^{t_n}_{t_{n+1}}{\bigg( f(t)z_t+\frac{g^2(t)}{2\sigma_t}\epsilon_\theta(z_t,c,t) \bigg)}dt \approx \psi(z_{t_{n+1}}, t_{n+1},c). \tag{11}
+\]</div>
+</section>
+<section id="one-stage-guided-distillation-by-solving-augmented-pf-ode">
+<h3>3.2 One-Stage Guided Distillation by solving augmented PF-ODE<a class="headerlink" href="#one-stage-guided-distillation-by-solving-augmented-pf-ode" title="Permalink to this heading">#</a></h3>
+<p>Clasifier-free Guidance(CFG)는 high-quality의 conditional 이미지 생성을 가능하게 했다. 다만 CFG는 2개의 Diffusion Model을 훈련해야하기 때문에 효율적이지 못하며, <strong>LCMs와 같은 few-step sampling method에 사용하기 힘들다.</strong> 따라서 이를 해결하기 위해 본 논문에서는 CFG를 Distillation 과정에서 통합하였다.</p>
+<p>Guided-Distill의 경우 two-stage Distillation을 통해  few-step sampling에 CFG를 통합하였으나 학습시간이 길고 <strong>2단계를 거치며</strong> 손실이 누적되기 때문에 최적의 성능을 내기 힘들다.</p>
+<figure class="align-default" id="id4">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/ldm_03.png"><img alt="ldm_03" class="bg-primary mb-1" src="../../_images/ldm_03.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 508 </span><span class="caption-text">2 Stage Distillation.</span><a class="headerlink" href="#id4" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>t이에 반해 LCMs는 augmented PF-ODE를 해결하는 방식으로 one-stage의 guided Distillation을 제안했다. 일단 CFG에 대한 reverse diffusion process는 다음과 같다:</p>
+<div class="math notranslate nohighlight">
+\[
+\tilde{\epsilon}_{\theta}(z_t,\omega,c,t):=(1+\omega)\epsilon_\theta(z_t,c,t)-\omega\epsilon_\theta(z_t,\varnothing,t). \tag{12}
+\]</div>
+<p>CFG는 conditional noise 예측값과 unconditional noise 예측값을 선형 결합하여 사용한다. 즉 noise 값이 <span class="math notranslate nohighlight">\(\omega\)</span>에 따라 변형되므로 <strong>augmented PF-ODE</strong>라고 한다. augmented PF-ODE는 다음과 같이 나타낼 수 있다:</p>
+<div class="math notranslate nohighlight">
+\[
+\frac{dz_t}{dt}=f(t)z_t+\frac{g^2(t)}{2\sigma_t}\tilde{\epsilon}_\theta(z_t,\omega,c,t), \ z_T\sim\mathcal{N}(0,\tilde{\sigma}^2I). \tag{13}
+\]</div>
+<p>consistency function도 <span class="math notranslate nohighlight">\(\omega\)</span>를 변수로 받아오기 때문에 <span class="math notranslate nohighlight">\(f_\theta:(z_t,\omega,c,t)\mapsto z_0\)</span>로 다시 정의된다. Consistency Distillation Loss 또한 다음과 같이 나타낼 수 있다:</p>
+<div class="math notranslate nohighlight">
+\[
+\mathcal{L_{CD}}(\theta,\theta^-;\psi)=\mathbb{E}_{z,c,\omega,n}\bigg[ d\bigg( f_\theta(z_{t_{n+1}},\omega,c,t_{n+1}), f_{\theta^-}(\hat{z}_{t_n}^{\psi,\omega},\omega,c,t_n) \bigg) \bigg] \tag{14}
+\]</div>
+<p><span class="math notranslate nohighlight">\(\omega\)</span>와 <span class="math notranslate nohighlight">\(n\)</span>는 각각 <span class="math notranslate nohighlight">\([\omega_{min}, \omega_{max}]\)</span>, <span class="math notranslate nohighlight">\(\{1,…,N-1\}\)</span>에서 sampling된다. <span class="math notranslate nohighlight">\(\hat{z}^{\psi, \omega}_{t_n}\)</span>는 이전과 마찬가지로 CFG가 추가된 ODE-Solver를 사용하여 근사한 값을 의미한다. 이때 사용되는 새로운 noise 예측모델 <span class="math notranslate nohighlight">\(\tilde{\epsilon}_\theta(z_t,\omega,c,t)\)</span>는  Eq. 11처럼  <span class="math notranslate nohighlight">\(t_{n+1} \sim t_n\)</span>까지 t에 대해 적분 했을 때 다음과 같이 나타낼 수 있다:</p>
+<div class="math notranslate nohighlight">
+\[
+\hat{z}^{\psi, \omega}_{t_n}-z_{t_n+1}=\int^{t_n}_{t_{n+1}}\bigg(f(t)z_t+\frac{g^2(t)}{2\sigma_t}\tilde{\epsilon}_\theta(z_t,\omega,c,t)\bigg)dt
+\]</div>
+<div class="math notranslate nohighlight">
+\[
+=(1+\omega)\int^{t_n}_{t_{n+1}}\bigg(f(t)z_t+\frac{g^2(t)}{2\sigma_t}\epsilon_\theta(z_t,c,t)\bigg)dt
+\]</div>
+<div class="math notranslate nohighlight">
+\[
+-\omega\int^{t_n}_{t_{n+1}}\bigg(f(t)z_t+\frac{g^2(t)}{2\sigma_t}\epsilon_\theta(z_t,\varnothing,t)\bigg)dt
+\]</div>
+<div class="math notranslate nohighlight">
+\[
+\approx(1+\omega)\psi(z_{t_{n+1}}, t_{n+1},t_n,c)-\omega\psi(z_{t_{n+1}}, t_{n+1},t_n,\varnothing). \tag{15}
+\]</div>
+<p>마찬가지로 PF-ODE Solver <span class="math notranslate nohighlight">\(\psi(\cdot,\cdot,\cdot,\cdot)\)</span>에는 DDIM, DPM-Solver, DPM-Solver++ 등을 사용할 수 있다.</p>
+</section>
+<section id="accelerating-distillation-with-skipping-time-steps">
+<h3>3.3 Accelerating Distillation with Skipping Time Steps<a class="headerlink" href="#accelerating-distillation-with-skipping-time-steps" title="Permalink to this heading">#</a></h3>
+<p>Stable Diffusion 등 보통의 Diffusion Model들은 매우 큰 step을 전체 time step으로 잡고 학습한다. 그러나 이같이 촘촘한 time step은 각 <span class="math notranslate nohighlight">\(t_n\)</span>과 <span class="math notranslate nohighlight">\(t_{n+1}\)</span>의 변화량을 감소시키기 때문에 자연스럽게 Consistency Distillation Loss도 작아지게 된다. <strong>Loss가 작아지면 학습의 수렴속도도 느려지게 된다.</strong> 따라서 LCMs는 학습 수렴의 속도를 높이기 위해 time step을 수천에서 수십으로 크기 단축시키는 SKIPPING-STEP 방법을 제안하였다.</p>
+<p>기존 CMs 모델의 경우 time scheduler로 EDM을 사용하고 ODE-Solver로 Euler 방법이나 Heun 방법을 사용한다.  그러나 LCMs는 Eq. 8을 통해 DDIM, DPM-Solver, DPM-Solver++와 같은 효율적인 solver도 효과적으로 데이터를 생성할 수 있다는 것을 증명했다. 따라서 <strong>SKIPPING-STEP 방법은 <span class="math notranslate nohighlight">\(t_{n+1} → t_n\)</span> 사이의 Consistency를 비교하는것이 아니라 특정 k-step만큼 거리가 있는 time step에 대한 Consistency를 비교한다.</strong> (<span class="math notranslate nohighlight">\(t_{n+k}→t_n\)</span>)</p>
+<p>이때 <span class="math notranslate nohighlight">\(k\)</span>값의 크기는 trade-off 관계를 가진다. 너무작으면 (<span class="math notranslate nohighlight">\(k=1\)</span>) 기존과 같이 느린 수렴속도를 갖게되며, 너무 큰 값일 때는 ODE solver 를 통해 근사할 때 오차가 매우 커질수 있다. 논문의 저자는 <span class="math notranslate nohighlight">\(k=20\)</span>을 사용해 <strong>time step을 수천에서 수십으로 대폭 줄여</strong> 학습을 Accelerating 할 수 있었다. Eq. 14에 k값을 추가해 SKIPPING-STEP을 표현할 수 있다.</p>
+<div class="math notranslate nohighlight">
+\[
+\mathcal{L_{CD}}(\theta,\theta^-;\psi)=\mathbb{E}_{z,c,\omega,n}\bigg[ d\bigg( f_\theta(z_{t_{n+k}},\omega,c,t_{n+k}), f_{\theta^-}(\hat{z}_{t_n}^{\psi,\omega},\omega,c,t_n) \bigg) \bigg]. \tag{16}
+\]</div>
+<p><span class="math notranslate nohighlight">\(\hat{z}^{\psi, \omega}_{t_n}\)</span>에 대한 수식도 다음과 같이 변경할 수 있다.</p>
+<div class="math notranslate nohighlight">
+\[
+\hat{z}^{\psi, \omega}_{t_n} \leftarrow z_{t_{n+k}}+(1+\omega)\psi(z_{t_{n+k}}, t_{n+k},t_n,c)-\omega\psi(z_{t_{n+k}}, t_{n+k},t_n,\varnothing). \tag{17}
+\]</div>
+</section>
+</section>
+<section id="latent-consistency-fine-tuning-for-customized-dataset">
+<h2>3.4 Latent Consistency Fine-tuning for customized dataset<a class="headerlink" href="#latent-consistency-fine-tuning-for-customized-dataset" title="Permalink to this heading">#</a></h2>
+<p>Stable Diffusion과 같은 Foundation 생성 모델은 거의 대부분의 text-to-image Generation task에서 잘 되지만 가끔 downstream task를 위해 Cunstom dataset에 대한 fine-tuning이 필요할 때가 있다. Latent Consistency Fine-tuning(LCF)는 Custom Dataset도 teacher model에 대한 종속없이 few-step inference를 성공적으로 할수 있도록 한다. 따라서 LCM은 <strong>기존의 Diffusion model에 대한 추가적인 fine tuning 방법론 없이도 Custom Dataset을 바로바로 학습하여 사용</strong>할수 있다.</p>
+<p>따로 추가적인 fine-tuning 방법이 있는것은 아니고 Consisteny Distillation 시 pre-trained 된 LDM을 사용하여 EMA를 통해 Distillation을 하기 때문에 Dataset을 Custom Dataset으로 사용하기만하면 된다. 즉 pre-trained Diffuson model → Custom Dataset fine-tuning → few step inference를 위한 Consistency Distillation을 할 필요 없이 바로학습이 가능하다는 의미이다.</p>
+</section>
+<section id="experiments">
+<h2>4. Experiments<a class="headerlink" href="#experiments" title="Permalink to this heading">#</a></h2>
+<section id="text-to-image-generation">
+<h3>4.1 Text-To-Image Generation<a class="headerlink" href="#text-to-image-generation" title="Permalink to this heading">#</a></h3>
+<p>3가지 데이터셋에 대한 평가를 진행했다. (LAION-5B, LAION-Aesthetics-6+(12M),  LAION-Aesthetics-6.5+(650k)) 앞서말한것처럼 하나의 Resolution이 아닌 512x512, 768x768의 high resolution을 생성했다. 512 size는 <span class="math notranslate nohighlight">\(\epsilon\)</span>-prediction, 768 size는 <span class="math notranslate nohighlight">\(v\)</span>-prediction을 사용했고 ODE-Solver로는 DDIM을 사용했다. 앞서말한것처럼 SKIPPING-STEP은 20의 값을 가진다.</p>
+<figure class="align-default" id="id5">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/ldm_04.png"><img alt="ldm_04" class="bg-primary mb-1" src="../../_images/ldm_04.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 509 </span><span class="caption-text">Quantitative results at 512 x 512 &amp; 768 x 768 resolution.</span><a class="headerlink" href="#id5" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<figure class="align-default" id="id6">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/ldm_05.png"><img alt="ldm_05" class="bg-primary mb-1" src="../../_images/ldm_05.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 510 </span><span class="caption-text">Qualitative results on LAION-Aesthetic-6.5+ Dataset. (2,4 steps)</span><a class="headerlink" href="#id6" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>DDIM, DPM-Solver, DPM-Solver++, Guided-Distill 4가지 모델에 대해 LCM과 성능비교를 했는데 이때 Guided-Distill은 오픈소스 코드가 없기 때문에 논문의 내용과 동일하게 Implementation 해서 성능을 비교하였다. LCM은 같은 메모리 Cost 대비 더 빠르게 수렴하고 더 좋은 품질의 영상을 생성하였다. 특히 Guided-Distill은 2 stage Distillation이지만 LCM은 <strong>1 Stage</strong>만 사용해도 이같은 성능을 보여줬다.</p>
+</section>
+<section id="abulation-study">
+<h3>4.2 Abulation Study<a class="headerlink" href="#abulation-study" title="Permalink to this heading">#</a></h3>
+<section id="ode-solvers-skipping-step-schedule">
+<h4>ODE Solvers &amp; Skipping-Step Schedule<a class="headerlink" href="#ode-solvers-skipping-step-schedule" title="Permalink to this heading">#</a></h4>
+<p>augmented PF-ODE를 푸는 solver들(DDIM, DPM, DPM++)을 LCM에 사용할 때 성능 비교와 SKIPPING-STEP schedule의 <span class="math notranslate nohighlight">\(k\)</span>값에 따른 성능 변화를 비교하였다. 모든 모델은 2,000 iteration에서의 4-step inference로 고정해서 비교했다.</p>
+<figure class="align-default" id="id7">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/ldm_06.png"><img alt="ldm_06" class="bg-primary mb-1" src="../../_images/ldm_06.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 511 </span><span class="caption-text">Different ODE solvers and skipping step k.</span><a class="headerlink" href="#id7" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>Skipping step의 경우 <span class="math notranslate nohighlight">\(k\)</span> 값을 올렸을 때 훨씬더 빠르게 수렴하며 때때로 더 좋은 FID 값을 보여주었다. 또한 DPM과 DPM++은 <span class="math notranslate nohighlight">\(k\)</span>가 50일 때 DDIM보다 더 좋은 성능을 보였다. 이는 <strong><span class="math notranslate nohighlight">\(k\)</span> 값이 클수록 더 큰 ODE approximation error를 가지는 DDIM에 비해 오차가 적기 때문</strong>이다.</p>
+<p><span class="math notranslate nohighlight">\(k=20\)</span>일 때, 3가지 모델 모두 좋은 성능이 보였다.</p>
+</section>
+<section id="the-effect-of-guidance-scale-omega">
+<h4>The Effect of Guidance Scale <span class="math notranslate nohighlight">\(\omega\)</span><a class="headerlink" href="#the-effect-of-guidance-scale-omega" title="Permalink to this heading">#</a></h4>
+<p>일반적으로 <span class="math notranslate nohighlight">\(\omega\)</span>값이 클수록 CLIP score 같은 품질의 지표는 좋아지지만 작을수록 다양성이 떨어져 FID Score가 떨어진다. 즉 <span class="math notranslate nohighlight">\(\omega\)</span>의 크기는 <strong>Quality와 Diversity에 대한 trade-off가 있다.</strong></p>
+<figure class="align-default" id="id8">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/ldm_07.png"><img alt="ldm_07" class="bg-primary mb-1" src="../../_images/ldm_07.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 512 </span><span class="caption-text">Different classifier-free guidance scales <span class="math notranslate nohighlight">\(\omega\)</span>.</span><a class="headerlink" href="#id8" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>그래프를 보면 2~8 step inference는 성능에 큰 차이를 가지지는 않는것으로 확인된다. 그러나 <strong>1 step inference는 아직 개선의 여지가 있는것</strong>을 확인할 수 있다.</p>
+<figure class="align-default" id="id9">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/ldm_08.png"><img alt="ldm_08" class="bg-primary mb-1" src="../../_images/ldm_08.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 513 </span><span class="caption-text">Different classifier-free guidance scales <span class="math notranslate nohighlight">\(\omega\)</span>.</span><a class="headerlink" href="#id9" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p><span class="math notranslate nohighlight">\(\omega\)</span>에 따른 실제 생성 이미지를 비교해 봤을 때 생성 영상의 Quality 차이가 확연하게 들어난다. 즉 Distillation 시에도 CFG를 적용하는 것이 성능을 크게 개선할 수 있다는 것을 증명한다.</p>
+</section>
+</section>
+<section id="downstream-consistency-fine-tuning-results">
+<h3>4.3 Downstream Consistency Fine-tuning Results<a class="headerlink" href="#downstream-consistency-fine-tuning-results" title="Permalink to this heading">#</a></h3>
+<p>포켓몬 데이터셋과 심슨 데이터셋에 LCF를 적용했을 때를 비교하였다. 90%는 학습 데이터로, 10%는 검증 데이터로 사용했다. 완벽하진 않지만 Custom Dataset의 style을 잘 catch한 모습을 보여준다.</p>
+<figure class="align-default" id="id10">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/ldm_09.png"><img alt="ldm_09" class="bg-primary mb-1" src="../../_images/ldm_09.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 514 </span><span class="caption-text">Latent Consistency Fine-tuning(LCF) on two customized dataset.. <span class="math notranslate nohighlight">\(\omega\)</span>.</span><a class="headerlink" href="#id10" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</section>
+</section>
+</section>
+<section class="tex2jax_ignore mathjax_ignore" id="conclusion">
+<h1>Conclusion<a class="headerlink" href="#conclusion" title="Permalink to this heading">#</a></h1>
+<p>LCM은 Consistency Distillation을 Latent 상에 적용하여 <strong>고화질의 영상을 매우 적은 time step으로 inference 할 수 있도록 한 모델</strong>이다. 즉 성능 좋고 고해상도의 영상을 few-step으로 가능하게 만들었다. 특히 Custom Dataset에도 Distillation을 적용했을 때 적은 time step으로도 어느정도의 style을 간단하게 학습하는 결과를 보여주었다.</p>
+</section>
+
+    <script type="text/x-thebe-config">
+    {
+        requestKernel: true,
+        binderOptions: {
+            repo: "binder-examples/jupyter-stacks-datascience",
+            ref: "master",
+        },
+        codeMirrorConfig: {
+            theme: "abcdef",
+            mode: "python"
+        },
+        kernelOptions: {
+            name: "python3",
+            path: "./docs\review"
+        },
+        predefinedOutput: true
+    }
+    </script>
+    <script>kernelName = 'python3'</script>
+
+                </article>
+              
+
+              
+              
+              
+              
+                <footer class="prev-next-footer d-print-none">
+                  
+<div class="prev-next-area">
+    <a class="left-prev"
+       href="consistency_models.html"
+       title="previous page">
+      <i class="fa-solid fa-angle-left"></i>
+      <div class="prev-next-info">
+        <p class="prev-next-subtitle">previous</p>
+        <p class="prev-next-title">Consistency Models</p>
+      </div>
+    </a>
+    <a class="right-next"
+       href="LLM_grounded_Diffusion.html"
+       title="next page">
+      <div class="prev-next-info">
+        <p class="prev-next-subtitle">next</p>
+        <p class="prev-next-title">LLM Grounded Diffusion</p>
+      </div>
+      <i class="fa-solid fa-angle-right"></i>
+    </a>
+</div>
+                </footer>
+              
+            </div>
+            
+            
+              
+                <div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
+
+
+  <div class="sidebar-secondary-item">
+  <div class="page-toc tocsection onthispage">
+    <i class="fa-solid fa-list"></i> Contents
+  </div>
+  <nav class="bd-toc-nav page-toc">
+    <ul class="visible nav section-nav flex-column">
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#">Latent Consistency Models</a><ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">1. Introduction</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#preliminaries">2. Preliminaries</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#diffusion-models">Diffusion Models</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#consistency-models">Consistency Models</a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#id1">3. Latent Consistency Models</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#consistency-distillation-in-the-latent-space">3.1 Consistency Distillation in the Latent Space</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#one-stage-guided-distillation-by-solving-augmented-pf-ode">3.2 One-Stage Guided Distillation by solving augmented PF-ODE</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#accelerating-distillation-with-skipping-time-steps">3.3 Accelerating Distillation with Skipping Time Steps</a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#latent-consistency-fine-tuning-for-customized-dataset">3.4 Latent Consistency Fine-tuning for customized dataset</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#experiments">4. Experiments</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#text-to-image-generation">4.1 Text-To-Image Generation</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#abulation-study">4.2 Abulation Study</a><ul class="nav section-nav flex-column">
+<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#ode-solvers-skipping-step-schedule">ODE Solvers &amp; Skipping-Step Schedule</a></li>
+<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#the-effect-of-guidance-scale-omega">The Effect of Guidance Scale <span class="math notranslate nohighlight">\(\omega\)</span></a></li>
+</ul>
+</li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#downstream-consistency-fine-tuning-results">4.3 Downstream Consistency Fine-tuning Results</a></li>
+</ul>
+</li>
+</ul>
+</li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#conclusion">Conclusion</a></li>
+</ul>
+
+  </nav></div>
+
+</div></div>
+              
+            
+          </div>
+          <footer class="bd-footer-content">
+            
+<div class="bd-footer-content__inner container">
+  
+  <div class="footer-item">
+    
+<p class="component-author">
+By PseudoLab
+</p>
+
+  </div>
+  
+  <div class="footer-item">
+    
+
+  <p class="copyright">
+    
+      © Copyright 2022.
+      <br/>
+    
+  </p>
+
+  </div>
+  
+  <div class="footer-item">
+    
+  </div>
+  
+  <div class="footer-item">
+    
+  </div>
+  
+</div>
+          </footer>
+        
+
+      </main>
+    </div>
+  </div>
+  
+  <!-- Scripts loaded after <body> so the DOM is not blocked -->
+  <script src="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b"></script>
+<script src="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b"></script>
+
+  <footer class="bd-footer">
+  </footer>
+  </body>
 </html>
\ No newline at end of file
diff --git a/docs/review/magic-3d.html b/docs/review/magic-3d.html
old mode 100644
new mode 100755
index 251daa83..f7d5c0a7
--- a/docs/review/magic-3d.html
+++ b/docs/review/magic-3d.html
@@ -1,836 +1,856 @@
-
-
-<!DOCTYPE html>
-
-
-<html lang="en" data-content_root="" >
-
-  <head>
-    <meta charset="utf-8" />
-    <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.18.1: http://docutils.sourceforge.net/" />
-
-    <title>Magic3D &#8212; Text-to-Image Generation-feat-Diffusion</title>
-  
-  
-  
-  <script data-cfasync="false">
-    document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
-    document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
-  </script>
-  
-  <!-- Loaded before other Sphinx assets -->
-  <link href="../../_static/styles/theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-<link href="../../_static/styles/bootstrap.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-<link href="../../_static/styles/pydata-sphinx-theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-
-  
-  <link href="../../_static/vendor/fontawesome/6.5.2/css/all.min.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-  <link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.woff2" />
-<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.woff2" />
-<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.woff2" />
-
-    <link rel="stylesheet" type="text/css" href="../../_static/pygments.css" />
-    <link rel="stylesheet" href="../../_static/styles/sphinx-book-theme.css?digest=14f4ca6b54d191a8c7657f6c759bf11a5fb86285" type="text/css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/togglebutton.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/mystnb.4510f1fc1dee50b3e5859aac5469c37c29e427902b24a333a5f9fcb2f0b3ac41.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/sphinx-thebe.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/design-style.4045f2051d55cab465a707391d5b2007.min.css" />
-  
-  <!-- Pre-loaded scripts that we'll load fully later -->
-  <link rel="preload" as="script" href="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b" />
-<link rel="preload" as="script" href="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b" />
-  <script src="../../_static/vendor/fontawesome/6.5.2/js/all.min.js?digest=dfe6caa3a7d634c4db9b"></script>
-
-    <script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
-    <script src="../../_static/jquery.js"></script>
-    <script src="../../_static/underscore.js"></script>
-    <script src="../../_static/_sphinx_javascript_frameworks_compat.js"></script>
-    <script src="../../_static/doctools.js"></script>
-    <script src="../../_static/clipboard.min.js"></script>
-    <script src="../../_static/copybutton.js"></script>
-    <script src="../../_static/scripts/sphinx-book-theme.js?digest=5a5c038af52cf7bc1a1ec88eea08e6366ee68824"></script>
-    <script>let toggleHintShow = 'Click to show';</script>
-    <script>let toggleHintHide = 'Click to hide';</script>
-    <script>let toggleOpenOnPrint = 'true';</script>
-    <script src="../../_static/togglebutton.js"></script>
-    <script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
-    <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
-    <script src="../../_static/design-tabs.js"></script>
-    <script>const THEBE_JS_URL = "https://unpkg.com/thebe@0.8.2/lib/index.js"
-const thebe_selector = ".thebe,.cell"
-const thebe_selector_input = "pre"
-const thebe_selector_output = ".output, .cell_output"
-</script>
-    <script async="async" src="../../_static/sphinx-thebe.js"></script>
-    <script>window.MathJax = {"options": {"processHtmlClass": "tex2jax_process|mathjax_process|math|output_area"}}</script>
-    <script defer="defer" src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
-    <script>DOCUMENTATION_OPTIONS.pagename = 'docs/review/magic-3d';</script>
-    <link rel="shortcut icon" href="../../_static/PseudoLab_logo.png"/>
-    <link rel="index" title="Index" href="../../genindex.html" />
-    <link rel="search" title="Search" href="../../search.html" />
-    <link rel="next" title="Dream Booth 3D" href="DreamBooth3D.html" />
-    <link rel="prev" title="DreamFusion" href="DreamFusion.html" />
-  <meta name="viewport" content="width=device-width, initial-scale=1"/>
-  <meta name="docsearch:language" content="en"/>
-  </head>
-  
-  
-  <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
-
-  
-  
-  <div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
-  
-  <div id="pst-scroll-pixel-helper"></div>
-  
-  <button type="button" class="btn rounded-pill" id="pst-back-to-top">
-    <i class="fa-solid fa-arrow-up"></i>Back to top</button>
-
-  
-  <input type="checkbox"
-          class="sidebar-toggle"
-          id="pst-primary-sidebar-checkbox"/>
-  <label class="overlay overlay-primary" for="pst-primary-sidebar-checkbox"></label>
-  
-  <input type="checkbox"
-          class="sidebar-toggle"
-          id="pst-secondary-sidebar-checkbox"/>
-  <label class="overlay overlay-secondary" for="pst-secondary-sidebar-checkbox"></label>
-  
-  <div class="search-button__wrapper">
-    <div class="search-button__overlay"></div>
-    <div class="search-button__search-container">
-<form class="bd-search d-flex align-items-center"
-      action="../../search.html"
-      method="get">
-  <i class="fa-solid fa-magnifying-glass"></i>
-  <input type="search"
-         class="form-control"
-         name="q"
-         id="search-input"
-         placeholder="Search this book..."
-         aria-label="Search this book..."
-         autocomplete="off"
-         autocorrect="off"
-         autocapitalize="off"
-         spellcheck="false"/>
-  <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
-</form></div>
-  </div>
-
-  <div class="pst-async-banner-revealer d-none">
-  <aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
-</div>
-
-  
-    <header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
-    </header>
-  
-
-  <div class="bd-container">
-    <div class="bd-container__inner bd-page-width">
-      
-      
-      
-      <div class="bd-sidebar-primary bd-sidebar">
-        
-
-  
-  <div class="sidebar-header-items sidebar-primary__section">
-    
-    
-    
-    
-  </div>
-  
-    <div class="sidebar-primary-items__start sidebar-primary__section">
-        <div class="sidebar-primary-item">
-
-  
-    
-  
-
-<a class="navbar-brand logo" href="../../intro.html">
-  
-  
-  
-  
-  
-    
-    
-      
-    
-    
-    <img src="../../_static/PseudoLab_logo.png" class="logo__image only-light" alt="Text-to-Image Generation-feat-Diffusion - Home"/>
-    <script>document.write(`<img src="../../_static/PseudoLab_logo.png" class="logo__image only-dark" alt="Text-to-Image Generation-feat-Diffusion - Home"/>`);</script>
-  
-  
-</a></div>
-        <div class="sidebar-primary-item"><nav class="bd-links" id="bd-docs-nav" aria-label="Main">
-    <div class="bd-toc-item navbar-nav active">
-        
-        <ul class="nav bd-sidenav bd-sidenav__home-link">
-            <li class="toctree-l1">
-                <a class="reference internal" href="../../intro.html">
-                    Welcome to PseudoDiffusers!!
-                </a>
-            </li>
-        </ul>
-        <p aria-level="2" class="caption" role="heading"><span class="caption-text">Preliminary Works</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="vae.html">VAE</a></li>
-<li class="toctree-l1"><a class="reference internal" href="gan.html">GAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DDPM.html">DDPM</a></li>
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="DDIM.html">DDIM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="A_Study_on_the_Evaluation_of_Generative_Models.html">A Study on the Evaluation of Generative Models</a></li>
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Image Generation</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="cycleGAN.html">CycleGAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="StyleGAN.html">StyleGAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="diffusion_beats_GANs.html">Diffusion Models Beat GANs on Image Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="dalle.html">DALL-E</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DALLE2.html">DALL-E 2</a></li>
-<li class="toctree-l1"><a class="reference internal" href="dreambooth.html">DreamBooth</a></li>
-<li class="toctree-l1"><a class="reference internal" href="ControlNet.html">ControlNet</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Latent_Diffusion_Model.html">Introduction</a></li>
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="Textual_Inversion.html">Textual Inversion</a></li>
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="CustomDiffusion.html">Custom Diffusion</a></li>
-<li class="toctree-l1"><a class="reference internal" href="LoRA.html">LoRA</a></li>
-
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="I-DDPM.html">I-DDPM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="StyO.html">StyO</a></li>
-<li class="toctree-l1"><a class="reference internal" href="imagen.html">Imagen</a></li>
-<li class="toctree-l1"><a class="reference internal" href="imagen_editor.html">Imagen Editor</a></li>
-<li class="toctree-l1"><a class="reference internal" href="SDEdit.html">SDEdit</a></li>
-<li class="toctree-l1"><a class="reference internal" href="SDXL.html">SDXL</a></li>
-<li class="toctree-l1"><a class="reference internal" href="t2i_adapter.html">T2I-Adapter</a></li>
-<li class="toctree-l1"><a class="reference internal" href="IP_Adapter.html">IP-Adapter</a></li>
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="HyperDreamBooth.html">HyperDreamBooth</a></li>
-<li class="toctree-l1"><a class="reference internal" href="CM3leon.html">CM3leon</a></li>
-
-<li class="toctree-l1"><a class="reference internal" href="Synthetic_Data_from_Diffusion_Models_Improves_ImageNet_Classification.html">Synthetic Data from Diffusion Models Improves ImageNet Classification</a></li>
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="GLIDE.html">GLIDE</a></li>
-<li class="toctree-l1"><a class="reference internal" href="BBDM.html">BBDM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Your_Diffusion_Model_is_Secretly_a_Zero_Shot_Classifier.html">Your Diffusion Model is Secretly a Zero-Shot Classifier</a></li>
-<li class="toctree-l1"><a class="reference internal" href="progressive_distillation.html">Progressive Distillation for Fast Sampling of Diffusion Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="ConceptLab.html">ConceptLab</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Diffusion_models_already_have_a_Semantic_Latent_Space.html">Diffusion Models already have a Semantic Latent Space</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Muse.html">Muse</a></li>
-
-
-<li class="toctree-l1"><a class="reference internal" href="GIGAGAN.html">Scaling up GANs for Text-to-Image Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="consistency_models.html">Consistency Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="latent_consistency_models.html">Latent Consistency Models</a></li>
-
-<li class="toctree-l1"><a class="reference internal" href="LLM_grounded_Diffusion.html">LLM Grounded Diffusion</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DiT.html">DiT</a></li>
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="one-step-image-translation.html">One-Step Image Translation with Text-to-Image Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="LCM-LoRA.html">LCM-LoRA: A Universal Stable-Diffusion Acceleration Module</a></li>
-
-
-<li class="toctree-l1"><a class="reference internal" href="MimicBrush.html">MimicBrush: Zero-shot Image Editing with Reference Imitation</a></li>
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Video Generation</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="Make_A_Video.html">Make A Video</a></li>
-<li class="toctree-l1"><a class="reference internal" href="VideoLDM.html">VideoLDM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="AnimateDiff.html">AnimateDiff</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Animate_Anyone.html">Animate Anyone</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreaMoving.html">DreaMoving</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreamPose.html">DreamPose: Fashion Image-to-Video Synthesis via Stable Diffusion</a></li>
-
-
-
-
-
-
-
-
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">3D Generation</span></p>
-<ul class="current nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="NeRF.html">NeRF : Representing Scenes as Neural Radiance Fields for View Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="3DGS.html">3D Gaussian Splatting for Real-Time Radiance Field Rendering</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Point_E.html">Point-E: A System for Generating 3D Point Clouds from Complex Prompts (Arxiv 2022)</a></li>
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="Shap-E.html">Shap-E</a></li>
-
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="DreamFusion.html"><strong>DreamFusion</strong></a></li>
-<li class="toctree-l1 current active"><a class="current reference internal" href="#">Magic3D</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreamBooth3D.html">Dream Booth 3D</a></li>
-
-
-
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Experiments</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="../experiments/js_exp.html">Synthetic Data with Stable Diffusion for Foliar Disease Classification</a></li>
-<li class="toctree-l1"><a class="reference internal" href="../experiments/swjo_exp.html">Training DreamBooth on Naver Webtoon Face Dataset</a></li>
-</ul>
-
-    </div>
-</nav></div>
-    </div>
-  
-  
-  <div class="sidebar-primary-items__end sidebar-primary__section">
-  </div>
-  
-  <div id="rtd-footer-container"></div>
-
-
-      </div>
-      
-      <main id="main-content" class="bd-main" role="main">
-        
-        
-
-<div class="sbt-scroll-pixel-helper"></div>
-
-          <div class="bd-content">
-            <div class="bd-article-container">
-              
-              <div class="bd-header-article d-print-none">
-<div class="header-article-items header-article__inner">
-  
-    <div class="header-article-items__start">
-      
-        <div class="header-article-item"><label class="sidebar-toggle primary-toggle btn btn-sm" for="__primary" title="Toggle primary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
-  <span class="fa-solid fa-bars"></span>
-</label></div>
-      
-    </div>
-  
-  
-    <div class="header-article-items__end">
-      
-        <div class="header-article-item">
-
-<div class="article-header-buttons">
-
-
-
-
-
-<div class="dropdown dropdown-source-buttons">
-  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Source repositories">
-    <i class="fab fa-github"></i>
-  </button>
-  <ul class="dropdown-menu">
-      
-      
-      
-      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion" target="_blank"
-   class="btn btn-sm btn-source-repository-button dropdown-item"
-   title="Source repository"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fab fa-github"></i>
-  </span>
-<span class="btn__text-container">Repository</span>
-</a>
-</li>
-      
-      
-      
-      
-      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion/issues/new?title=Issue%20on%20page%20%2Fdocs/review/magic-3d.html&body=Your%20issue%20content%20here." target="_blank"
-   class="btn btn-sm btn-source-issues-button dropdown-item"
-   title="Open an issue"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-lightbulb"></i>
-  </span>
-<span class="btn__text-container">Open issue</span>
-</a>
-</li>
-      
-  </ul>
-</div>
-
-
-
-
-
-
-<div class="dropdown dropdown-download-buttons">
-  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Download this page">
-    <i class="fas fa-download"></i>
-  </button>
-  <ul class="dropdown-menu">
-      
-      
-      
-      <li><a href="../../_sources/docs/review/magic-3d.md" target="_blank"
-   class="btn btn-sm btn-download-source-button dropdown-item"
-   title="Download source file"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-file"></i>
-  </span>
-<span class="btn__text-container">.md</span>
-</a>
-</li>
-      
-      
-      
-      
-      <li>
-<button onclick="window.print()"
-  class="btn btn-sm btn-download-pdf-button dropdown-item"
-  title="Print to PDF"
-  data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-file-pdf"></i>
-  </span>
-<span class="btn__text-container">.pdf</span>
-</button>
-</li>
-      
-  </ul>
-</div>
-
-
-
-
-<button onclick="toggleFullScreen()"
-  class="btn btn-sm btn-fullscreen-button"
-  title="Fullscreen mode"
-  data-bs-placement="bottom" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-expand"></i>
-  </span>
-
-</button>
-
-
-
-<script>
-document.write(`
-  <button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light"></i>
-    <i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark"></i>
-    <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"></i>
-  </button>
-`);
-</script>
-
-
-<script>
-document.write(`
-  <button class="btn btn-sm pst-navbar-icon search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <i class="fa-solid fa-magnifying-glass fa-lg"></i>
-  </button>
-`);
-</script>
-<label class="sidebar-toggle secondary-toggle btn btn-sm" for="__secondary"title="Toggle secondary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <span class="fa-solid fa-list"></span>
-</label>
-</div></div>
-      
-    </div>
-  
-</div>
-</div>
-              
-              
-
-<div id="jb-print-docs-body" class="onlyprint">
-    <h1>Magic3D</h1>
-    <!-- Table of contents -->
-    <div id="print-main-content">
-        <div id="jb-print-toc">
-            
-            <div>
-                <h2> Contents </h2>
-            </div>
-            <nav aria-label="Page">
-                <ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">1. Introduction</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#background-dreamfusion">3. Background: DreamFusion</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#high-resolution-3d-generation">4. High-Resolution 3D Generation</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#coarse-to-fine-diffusion-priors">4.1. Coarse-to-fine Diffusion Priors</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#scene-models">4.2. Scene Models</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#coarse-to-fine-optimization">4.3. Coarse-to-fine Optimization</a></li>
-</ul>
-</li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#experiments">5. Experiments</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#controllable-3d-generation">6. Controllable 3D Generation</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#conclusion">7. Conclusion</a></li>
-</ul>
-            </nav>
-        </div>
-    </div>
-</div>
-
-              
-                
-<div id="searchbox"></div>
-                <article class="bd-article">
-                  
-  <div class="admonition-information admonition">
-<p class="admonition-title">Information</p>
-<ul class="simple">
-<li><p><strong>Title:</strong> Magic3D: High-Resolution Text-to-3D Content Creation (CVPR 2023)</p></li>
-<li><p><strong>Reference</strong></p>
-<ul>
-<li><p>Paper: <a class="reference external" href="https://arxiv.org/pdf/2211.10440">https://arxiv.org/pdf/2211.10440</a></p></li>
-</ul>
-</li>
-<li><p><strong>Author:</strong> Sangwoo Jo</p></li>
-<li><p><strong>Last updated on Sep. 24, 2024</strong></p></li>
-</ul>
-</div>
-<section class="tex2jax_ignore mathjax_ignore" id="magic3d">
-<h1>Magic3D<a class="headerlink" href="#magic3d" title="Permalink to this heading">#</a></h1>
-<section id="introduction">
-<h2>1. Introduction<a class="headerlink" href="#introduction" title="Permalink to this heading">#</a></h2>
-<p>Pre-trained 된 text-to-image diffusion model 을 활용하여 NeRF 를 optimize 하는 DreamFusion 모델의 두 가지 단점을 명시합니다.</p>
-<ul class="simple">
-<li><p>Extremely slow optimization of NeRF</p></li>
-<li><p>Low-resolution image (64x64) space supervision on NeRF, leading to low-quality 3D models with a long processing time (1.5 hours per prompt on average using TPUv4)</p></li>
-</ul>
-<p>따라서, 논문에서는 이러한 단점을 해결하기 위해 two-stage optimization framework 제시합니다. 첫번째 단계로 DreamFusion 과 동일하게 coarse 한 NeRF representation 을 optimize 하는데, hash grid 를 활용하여 memory 그리고 computationally efficient 하게 최적화합니다. 두번째 단계로 high resolution diffusion prior 를 기반으로 mesh representation 를 최적화합니다. 3D mesh 로 rasterizing 함으로써 graphics software 에 유동적으로 전환하여 사용할 수 있다는 장점이 있습니다.</p>
-<p>정리하자면, Magic3D 는 다음과 같은 contribution 을 제공합니다.</p>
-<ul class="simple">
-<li><p>Synthesizes 3D content with an 8× higher resolution supervision, is also
-2× faster than DreamFusion</p></li>
-<li><p>3D object editing</p></li>
-</ul>
-</section>
-<section id="background-dreamfusion">
-<h2>3. Background: DreamFusion<a class="headerlink" href="#background-dreamfusion" title="Permalink to this heading">#</a></h2>
-<p>DreamFusion 을 크게 두 가지 component 로 구성되어있다고 할 수 있습니다.</p>
-<ul>
-<li><p>Neural scene representation</p>
-<p>Volumetric renderer <span class="math notranslate nohighlight">\(g\)</span> 와 3D volume 을 나타내는 parameter <span class="math notranslate nohighlight">\(\theta\)</span> 를 입력받아 rendered image 를 생성하는 scene model <span class="math notranslate nohighlight">\(x=g(\theta)\)</span>  를 정의합니다. DreamFusion 에서는 scene model 로 Mip-NeRF 360 에서 shading model 를 추가하여 사용합니다.</p>
-</li>
-<li><p>Pre-trained text-to-image diffusion-based generative model <span class="math notranslate nohighlight">\(\phi\)</span></p>
-<p>DreamFusion 에서 diffusion model 로 Imagen 모델을 사용합니다.</p>
-</li>
-</ul>
-<p>이를 기반으로, 다음과 같은 Score Distillation Sampling (SDS) 을 통해 parameter <span class="math notranslate nohighlight">\(\theta\)</span> 를 update 합니다.</p>
-<figure class="align-default" id="id1">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/magic_3d_01.png"><img alt="magic_3d_01" class="bg-primary mb-1" src="../../_images/magic_3d_01.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 711 </span><span class="caption-text">Score Distillation Sampling</span><a class="headerlink" href="#id1" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</section>
-<section id="high-resolution-3d-generation">
-<h2>4. High-Resolution 3D Generation<a class="headerlink" href="#high-resolution-3d-generation" title="Permalink to this heading">#</a></h2>
-<p>Magic3D 에서 high-resolution text-to-3D synthesis 를 위한 two-stage coarse-to-fine framework 를 다음과 같이 소개합니다.</p>
-<figure class="align-default" id="id2">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/magic_3d_02.png"><img alt="magic_3d_02" class="bg-primary mb-1" src="../../_images/magic_3d_02.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 712 </span><span class="caption-text">Magic3D Framework</span><a class="headerlink" href="#id2" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<section id="coarse-to-fine-diffusion-priors">
-<h3>4.1. Coarse-to-fine Diffusion Priors<a class="headerlink" href="#coarse-to-fine-diffusion-priors" title="Permalink to this heading">#</a></h3>
-<ol class="arabic simple">
-<li><p>64x64 rendered image 에 대한 rendered loss 를 계산하기 위해 Imagen 과 유사한 eDiff-I 를 base diffusion 모델로 사용합니다.</p></li>
-<li><p>512x512 high resolution rendered image 를 기반으로 backpropagation 할 수 있도록 Stable Diffusion model 을 LDM 으로 사용합니다. 다음과 같이 SDS 를 계산하는 과정에서 <span class="math notranslate nohighlight">\(\partial{x}/\partial{\theta}\)</span> 와 <span class="math notranslate nohighlight">\(\partial{z}/\partial{x}\)</span> 를 계산하는데 시간이 다소 소요된다고 합니다.</p></li>
-</ol>
-<figure class="align-default" id="id3">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/magic_3d_03.png"><img alt="magic_3d_03" class="bg-primary mb-1" src="../../_images/magic_3d_03.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 713 </span><span class="caption-text">SDS in high resolution 512x512</span><a class="headerlink" href="#id3" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</section>
-<section id="scene-models">
-<h3>4.2. Scene Models<a class="headerlink" href="#scene-models" title="Permalink to this heading">#</a></h3>
-<p><strong>Neural fields as coarse scene models</strong></p>
-<p>기존에 DreamFusion 에서 scene model 로 사용하였던 Mip-NeRF 360 모델이 3D geometry 에 대한 정보를 scratch 로부터 담아내는데 유용하다는 것을 보였지만, MLP 네트워크를 통해 dense 하게 sampling 하여 rendering 하는 과정이 computation cost 가 높다고 말합니다.</p>
-<p>이를 해결하기 위한 방법으로 InstantNGP 에서 소개하는 hash grid encoding 을 사용한다고 합니다. Hash grid 를 활용하여 두 개의 single layer neural network 를 학습하는데, 하나는 albedo 와 density 그리고 나머지 하나는 normal 을 예측합니다. 또한, density-based voxel pruning (empty space 에 대한 처리) 과 octree-based ray sampling/rendering 기법을 활용하여 computation cost 를 줄였다고 합니다.</p>
-<p><strong>Textured meshes as fine scene models</strong></p>
-<p>Fine stage 에서도 high resolution image 를 기반으로 동일한 scene model (neural field) 을 학습하는 방식도 있지만, 아래 예시처럼 메모리와 연산적인 제한이 있어 좋은 성능을 내기가 어렵다고 합니다.</p>
-<p>따라서, Magic3D 에서는 textured 3D mesh 를 scene representation 으로 사용합니다. 더 자세하게는, 다음과 같은 tetrahedral grid <span class="math notranslate nohighlight">\((V_T,T)\)</span> 형태로 3D mesh 를 표현합니다. 이때, <span class="math notranslate nohighlight">\(V_T\)</span> 는 grid <span class="math notranslate nohighlight">\(T\)</span> 에 존재하는 vertices 를 의미하고, 각 vertex <span class="math notranslate nohighlight">\(v_i \in V_T \subset \mathbb{R}^3\)</span> 는 signed distance field (SDF) <span class="math notranslate nohighlight">\(s_i \in \mathbb{R}^3\)</span> 그리고 deformation <span class="math notranslate nohighlight">\(\Delta v_i \in \mathbb{R}^3\)</span> 값을 가집니다.</p>
-<p>이로부터 differentiable 한 marching tetrahedra 알고리즘을 통해 SDF 로부터 surface mesh 를 생성할 수 있으며, texture 에 대한 정보는 neural color field 로 정의할 수 있다고 합니다.</p>
-</section>
-<section id="coarse-to-fine-optimization">
-<h3>4.3. Coarse-to-fine Optimization<a class="headerlink" href="#coarse-to-fine-optimization" title="Permalink to this heading">#</a></h3>
-<p><strong>Neural field optimization</strong></p>
-<p>Instant NGP 와 동일하게 <span class="math notranslate nohighlight">\(256^3\)</span> resolution 의 occupancy grid 로 initialize 하고, 10 iterations 마다 grid 를 업데이트하며 empty space skipping 을 위한 octree 를 생성합니다. 매 업데이트마다 Instant NGP 와 동일한 파라미터 값을 설정하였다고 합니다.</p>
-<p>또한, DreamFusion 과 동일하게 background 를 표현하는 environment map MLP 를 사용하는데, 이때 Mip-NeRF 360 에서 사용하는 scene representation 을 사용할 수 없어, 모델이 background 로부터 object 에 대한 정보를 학습할 수 있어 이를 방지하기 위해 MLP 사이즈를 작게 하고 learning rate 를 10배 증가시켰다고 합니다.</p>
-<p><strong>Mesh optimization</strong></p>
-<p>Mesh 에 대한 optimization 을 진행하기 위해, 앞서 최적화한 coarse neural field 를 non-zero constant 를 차감함으로써 SDF 로 전환하고, texture field 는 coarse stage 에서 최적화된 color field 로 초기값을 설정합니다.</p>
-<p>최적화 단계를 진행할때, differentiable rasterizer 를 사용하여 surface mesh 를 rendering 하는 작업을 진행합니다. 각 vertex <span class="math notranslate nohighlight">\(v_i\)</span> 에 대해 앞서 정의한 high resolution 에서의 SDS gradient 를 통해 <span class="math notranslate nohighlight">\(s_i\)</span> 와 <span class="math notranslate nohighlight">\(\Delta v_i\)</span> 를 최적화하게 됩니다. 이때, rendering 하는 과정에서 각 pixel 에 해당하는 3D coordinate 를 추적하여 texture field 도 동시에 최적화합니다.</p>
-</section>
-</section>
-<section id="experiments">
-<h2>5. Experiments<a class="headerlink" href="#experiments" title="Permalink to this heading">#</a></h2>
-<p>DreamFusion 과 397 개의 text prompt 에 대한 성능을 비교합니다.</p>
-<p><strong>Speed evaluation</strong></p>
-<ul class="simple">
-<li><p>Coarse stage : 5000 iterations / 1024 samples / batch size 32 와 같은 설정으로 학습하였고, 하나의 객체를 생성하는데 8 NVIDIA A100 GPU 기준 15 분 소요된다고 합니다.</p></li>
-<li><p>Fine stage : 3000 iterations / batch size 32 와 같은 설정으로 학습하였고, 하나의 객체를 생성하는데 8 NVIDIA A100 GPU 기준 25 분 소요된다고 합니다.</p></li>
-</ul>
-<p><strong>Qualitative comparisons</strong></p>
-<p>3D 객체에서의 geometry 와 texture 에 대한 생성을 잘하는 부분을 확인할 수 있습니다.</p>
-<figure class="align-default" id="id4">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/magic_3d_04.png"><img alt="magic_3d_04" class="bg-primary mb-1" src="../../_images/magic_3d_04.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 714 </span><span class="caption-text">Qualitative comparisons</span><a class="headerlink" href="#id4" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p><strong>User studies</strong></p>
-<p>397 개의 text prompt 을 입력받아 생성한 Magic3D 와 DreamFusion 로 생성한 3D 객체들에 대해 설문조사해본 결과, 61.7% 의 유저들이 Magic3D 모델을 더 우세하게 평가하였습니다.</p>
-<figure class="align-default" id="id5">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/magic_3d_05.png"><img alt="magic_3d_05" class="bg-primary mb-1" src="../../_images/magic_3d_05.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 715 </span><span class="caption-text">User studies</span><a class="headerlink" href="#id5" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p><strong>Can single-stage optimization work with LDM prior?</strong></p>
-<p>LDM prior 를 활용한 single-stage optimization setup 으로 scene model 을 최적화할 시에 대한 ablation study 를 진행해본 결과, 성능이 좋지 않은 부분을 확인할 수 있었다고 합니다.</p>
-<figure class="align-default" id="id6">
-<a class="bg-primary mb-1 reference internal image-reference" href="pics/magic-3d/magic_3d_06.png"><img alt="magic_3d_06" class="bg-primary mb-1" src="pics/magic-3d/magic_3d_06.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 716 </span><span class="caption-text">Single-stage vs Coarse-to-fine</span><a class="headerlink" href="#id6" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p><strong>Can we use NeRF for the fine model?</strong></p>
-<p>NeRF 를 scratch 로부터 single-step 으로 최적화하는 것은 어렵지만, fine stage 에서 scene model 을 NeRF 로 변경하는 것은 가능합니다. 위 그림의 하단 4개 그림 중 좌측, 우측 사진이 각각 coarse stage 그리고 fine stage 에서 NeRF 를 학습한 결과입니다.</p>
-<p><strong>Coarse models vs. fine models</strong></p>
-<p>동일한 coarse model 에 대해서 NeRF 와 Mesh 모델을 모두 fine-tuning 한 결과, 모두 좋은 성능을 보이고 fine-tuned 된 mesh 모델이 특히 3D 객체 퀄리티를 실사적으로 잘 표현하는 것을 확인할 수 있습니다.</p>
-<figure class="align-default" id="id7">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/magic_3d_07.png"><img alt="magic_3d_07" class="bg-primary mb-1" src="../../_images/magic_3d_07.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 717 </span><span class="caption-text">Coarse models vs. fine models</span><a class="headerlink" href="#id7" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</section>
-<section id="controllable-3d-generation">
-<h2>6. Controllable 3D Generation<a class="headerlink" href="#controllable-3d-generation" title="Permalink to this heading">#</a></h2>
-<p><strong>Personalized text-to-3D</strong></p>
-<p>사전에 diffusion model (eDiff-I, LDM) 을 DreamBooth 을 통해 학습하고, unique identifier <span class="math notranslate nohighlight">\([V]\)</span> 와 함께 3D scene model 을 학습합니다. 아래 사진과 같이, subject 에 대한 정보를 유지한 채 3D model 을 잘 생성하는 부분을 확인할 수 있습니다.</p>
-<figure class="align-default" id="id8">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/magic_3d_08.png"><img alt="magic_3d_08" class="bg-primary mb-1" src="../../_images/magic_3d_08.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 718 </span><span class="caption-text">Controllable 3D Generation</span><a class="headerlink" href="#id8" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p><strong>Prompt-based editing through fine-tuning</strong></p>
-<p>다음과 같은 3단계로 prompt-based editing 으로 fine-tuning 을 진행합니다.</p>
-<ol class="arabic simple">
-<li><p>우선, coarse model 을 base prompt 로 학습합니다.</p></li>
-<li><p>Base prompt 를 수정한 후, coarse NeRF 모델을 학습하고 이와 LDM 을 기반으로 high resolution NeRF 모델을 만듭니다.</p></li>
-<li><p>마지막으로, NeRF 모델을 기반으로 high-resolution fine-tuning 을 진행합니다.</p></li>
-</ol>
-<figure class="align-default" id="id9">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/magic_3d_09.png"><img alt="magic_3d_09" class="bg-primary mb-1" src="../../_images/magic_3d_09.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 719 </span><span class="caption-text">Prompt-based editing through fine-tuning</span><a class="headerlink" href="#id9" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</section>
-<section id="conclusion">
-<h2>7. Conclusion<a class="headerlink" href="#conclusion" title="Permalink to this heading">#</a></h2>
-<p>정리하자면, 논문에서 Magic3D 의 coarse-to-fine optimization 방식을 소개하고, mesh 형태의 scene model 과 고해상도 이미지에 대한 diffusion prior 를 활용함으로써 high resolution 에 대해서도 좋은 성능을 보여줄 수 있었습니다. 추가적으로, 주어진 text prompt 에 대해 3D mesh model 을 40분 만에 생성이 가능하고, 그래픽 소프트웨어와 호환이 바로 가능하다는 장점이 있습니다.</p>
-</section>
-</section>
-
-    <script type="text/x-thebe-config">
-    {
-        requestKernel: true,
-        binderOptions: {
-            repo: "binder-examples/jupyter-stacks-datascience",
-            ref: "master",
-        },
-        codeMirrorConfig: {
-            theme: "abcdef",
-            mode: "python"
-        },
-        kernelOptions: {
-            name: "python3",
-            path: "./docs/review"
-        },
-        predefinedOutput: true
-    }
-    </script>
-    <script>kernelName = 'python3'</script>
-
-                </article>
-              
-
-              
-              
-              
-              
-                <footer class="prev-next-footer d-print-none">
-                  
-<div class="prev-next-area">
-    <a class="left-prev"
-       href="DreamFusion.html"
-       title="previous page">
-      <i class="fa-solid fa-angle-left"></i>
-      <div class="prev-next-info">
-        <p class="prev-next-subtitle">previous</p>
-        <p class="prev-next-title"><strong>DreamFusion</strong></p>
-      </div>
-    </a>
-    <a class="right-next"
-       href="DreamBooth3D.html"
-       title="next page">
-      <div class="prev-next-info">
-        <p class="prev-next-subtitle">next</p>
-        <p class="prev-next-title">Dream Booth 3D</p>
-      </div>
-      <i class="fa-solid fa-angle-right"></i>
-    </a>
-</div>
-                </footer>
-              
-            </div>
-            
-            
-              
-                <div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
-
-
-  <div class="sidebar-secondary-item">
-  <div class="page-toc tocsection onthispage">
-    <i class="fa-solid fa-list"></i> Contents
-  </div>
-  <nav class="bd-toc-nav page-toc">
-    <ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">1. Introduction</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#background-dreamfusion">3. Background: DreamFusion</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#high-resolution-3d-generation">4. High-Resolution 3D Generation</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#coarse-to-fine-diffusion-priors">4.1. Coarse-to-fine Diffusion Priors</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#scene-models">4.2. Scene Models</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#coarse-to-fine-optimization">4.3. Coarse-to-fine Optimization</a></li>
-</ul>
-</li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#experiments">5. Experiments</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#controllable-3d-generation">6. Controllable 3D Generation</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#conclusion">7. Conclusion</a></li>
-</ul>
-  </nav></div>
-
-</div></div>
-              
-            
-          </div>
-          <footer class="bd-footer-content">
-            
-<div class="bd-footer-content__inner container">
-  
-  <div class="footer-item">
-    
-<p class="component-author">
-By PseudoLab
-</p>
-
-  </div>
-  
-  <div class="footer-item">
-    
-
-  <p class="copyright">
-    
-      © Copyright 2022.
-      <br/>
-    
-  </p>
-
-  </div>
-  
-  <div class="footer-item">
-    
-  </div>
-  
-  <div class="footer-item">
-    
-  </div>
-  
-</div>
-          </footer>
-        
-
-      </main>
-    </div>
-  </div>
-  
-  <!-- Scripts loaded after <body> so the DOM is not blocked -->
-  <script src="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b"></script>
-<script src="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b"></script>
-
-  <footer class="bd-footer">
-  </footer>
-  </body>
+
+
+<!DOCTYPE html>
+
+
+<html lang="en" data-content_root="" >
+
+  <head>
+    <meta charset="utf-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.17.1: http://docutils.sourceforge.net/" />
+
+    <title>Magic3D &#8212; Text-to-Image Generation-feat-Diffusion</title>
+  
+  
+  
+  <script data-cfasync="false">
+    document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
+    document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
+  </script>
+  
+  <!-- Loaded before other Sphinx assets -->
+  <link href="../../_static/styles/theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+<link href="../../_static/styles/bootstrap.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+<link href="../../_static/styles/pydata-sphinx-theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+
+  
+  <link href="../../_static/vendor/fontawesome/6.5.2/css/all.min.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+  <link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.woff2" />
+<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.woff2" />
+<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.woff2" />
+
+    <link rel="stylesheet" type="text/css" href="../../_static/pygments.css" />
+    <link rel="stylesheet" href="../../_static/styles/sphinx-book-theme.css?digest=14f4ca6b54d191a8c7657f6c759bf11a5fb86285" type="text/css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/togglebutton.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/mystnb.4510f1fc1dee50b3e5859aac5469c37c29e427902b24a333a5f9fcb2f0b3ac41.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/sphinx-thebe.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/design-style.4045f2051d55cab465a707391d5b2007.min.css" />
+  
+  <!-- Pre-loaded scripts that we'll load fully later -->
+  <link rel="preload" as="script" href="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b" />
+<link rel="preload" as="script" href="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b" />
+  <script src="../../_static/vendor/fontawesome/6.5.2/js/all.min.js?digest=dfe6caa3a7d634c4db9b"></script>
+
+    <script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
+    <script src="../../_static/jquery.js"></script>
+    <script src="../../_static/underscore.js"></script>
+    <script src="../../_static/_sphinx_javascript_frameworks_compat.js"></script>
+    <script src="../../_static/doctools.js"></script>
+    <script src="../../_static/clipboard.min.js"></script>
+    <script src="../../_static/copybutton.js"></script>
+    <script src="../../_static/scripts/sphinx-book-theme.js?digest=5a5c038af52cf7bc1a1ec88eea08e6366ee68824"></script>
+    <script>let toggleHintShow = 'Click to show';</script>
+    <script>let toggleHintHide = 'Click to hide';</script>
+    <script>let toggleOpenOnPrint = 'true';</script>
+    <script src="../../_static/togglebutton.js"></script>
+    <script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
+    <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
+    <script src="../../_static/design-tabs.js"></script>
+    <script>const THEBE_JS_URL = "https://unpkg.com/thebe@0.8.2/lib/index.js"
+const thebe_selector = ".thebe,.cell"
+const thebe_selector_input = "pre"
+const thebe_selector_output = ".output, .cell_output"
+</script>
+    <script async="async" src="../../_static/sphinx-thebe.js"></script>
+    <script>window.MathJax = {"options": {"processHtmlClass": "tex2jax_process|mathjax_process|math|output_area"}}</script>
+    <script defer="defer" src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
+    <script>DOCUMENTATION_OPTIONS.pagename = 'docs/review/magic-3d';</script>
+    <link rel="shortcut icon" href="../../_static/PseudoLab_logo.png"/>
+    <link rel="index" title="Index" href="../../genindex.html" />
+    <link rel="search" title="Search" href="../../search.html" />
+    <link rel="next" title="Dream Booth 3D" href="DreamBooth3D.html" />
+    <link rel="prev" title="DreamFusion" href="DreamFusion.html" />
+  <meta name="viewport" content="width=device-width, initial-scale=1"/>
+  <meta name="docsearch:language" content="en"/>
+  </head>
+  
+  
+  <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
+
+  
+  
+  <div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
+  
+  <div id="pst-scroll-pixel-helper"></div>
+  
+  <button type="button" class="btn rounded-pill" id="pst-back-to-top">
+    <i class="fa-solid fa-arrow-up"></i>Back to top</button>
+
+  
+  <input type="checkbox"
+          class="sidebar-toggle"
+          id="pst-primary-sidebar-checkbox"/>
+  <label class="overlay overlay-primary" for="pst-primary-sidebar-checkbox"></label>
+  
+  <input type="checkbox"
+          class="sidebar-toggle"
+          id="pst-secondary-sidebar-checkbox"/>
+  <label class="overlay overlay-secondary" for="pst-secondary-sidebar-checkbox"></label>
+  
+  <div class="search-button__wrapper">
+    <div class="search-button__overlay"></div>
+    <div class="search-button__search-container">
+<form class="bd-search d-flex align-items-center"
+      action="../../search.html"
+      method="get">
+  <i class="fa-solid fa-magnifying-glass"></i>
+  <input type="search"
+         class="form-control"
+         name="q"
+         id="search-input"
+         placeholder="Search this book..."
+         aria-label="Search this book..."
+         autocomplete="off"
+         autocorrect="off"
+         autocapitalize="off"
+         spellcheck="false"/>
+  <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
+</form></div>
+  </div>
+
+  <div class="pst-async-banner-revealer d-none">
+  <aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
+</div>
+
+  
+    <header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
+    </header>
+  
+
+  <div class="bd-container">
+    <div class="bd-container__inner bd-page-width">
+      
+      
+      
+      <div class="bd-sidebar-primary bd-sidebar">
+        
+
+  
+  <div class="sidebar-header-items sidebar-primary__section">
+    
+    
+    
+    
+  </div>
+  
+    <div class="sidebar-primary-items__start sidebar-primary__section">
+        <div class="sidebar-primary-item">
+
+  
+    
+  
+
+<a class="navbar-brand logo" href="../../intro.html">
+  
+  
+  
+  
+  
+    
+    
+      
+    
+    
+    <img src="../../_static/PseudoLab_logo.png" class="logo__image only-light" alt="Text-to-Image Generation-feat-Diffusion - Home"/>
+    <script>document.write(`<img src="../../_static/PseudoLab_logo.png" class="logo__image only-dark" alt="Text-to-Image Generation-feat-Diffusion - Home"/>`);</script>
+  
+  
+</a></div>
+        <div class="sidebar-primary-item"><nav class="bd-links" id="bd-docs-nav" aria-label="Main">
+    <div class="bd-toc-item navbar-nav active">
+        
+        <ul class="nav bd-sidenav bd-sidenav__home-link">
+            <li class="toctree-l1">
+                <a class="reference internal" href="../../intro.html">
+                    Welcome to PseudoDiffusers!!
+                </a>
+            </li>
+        </ul>
+        <p aria-level="2" class="caption" role="heading"><span class="caption-text">Preliminary Works</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="vae.html">VAE</a></li>
+<li class="toctree-l1"><a class="reference internal" href="gan.html">GAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DDPM.html">DDPM</a></li>
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DDIM.html">DDIM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="A_Study_on_the_Evaluation_of_Generative_Models.html">A Study on the Evaluation of Generative Models</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Image Generation</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="cycleGAN.html">CycleGAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="StyleGAN.html">StyleGAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="diffusion_beats_GANs.html">Diffusion Models Beat GANs on Image Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="dalle.html">DALL-E</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DALLE2.html">DALL-E 2</a></li>
+<li class="toctree-l1"><a class="reference internal" href="dreambooth.html">DreamBooth</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ControlNet.html">ControlNet</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Latent_Diffusion_Model.html">Introduction</a></li>
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Textual_Inversion.html">Textual Inversion</a></li>
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="CustomDiffusion.html">Custom Diffusion</a></li>
+<li class="toctree-l1"><a class="reference internal" href="LoRA.html">LoRA</a></li>
+
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="I-DDPM.html">I-DDPM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="StyO.html">StyO</a></li>
+<li class="toctree-l1"><a class="reference internal" href="imagen.html">Imagen</a></li>
+<li class="toctree-l1"><a class="reference internal" href="imagen_editor.html">Imagen Editor</a></li>
+<li class="toctree-l1"><a class="reference internal" href="SDEdit.html">SDEdit</a></li>
+<li class="toctree-l1"><a class="reference internal" href="SDXL.html">SDXL</a></li>
+<li class="toctree-l1"><a class="reference internal" href="t2i_adapter.html">T2I-Adapter</a></li>
+<li class="toctree-l1"><a class="reference internal" href="IP_Adapter.html">IP-Adapter</a></li>
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="HyperDreamBooth.html">HyperDreamBooth</a></li>
+<li class="toctree-l1"><a class="reference internal" href="CM3leon.html">CM3leon</a></li>
+
+<li class="toctree-l1"><a class="reference internal" href="Synthetic_Data_from_Diffusion_Models_Improves_ImageNet_Classification.html">Synthetic Data from Diffusion Models Improves ImageNet Classification</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="GLIDE.html">GLIDE</a></li>
+<li class="toctree-l1"><a class="reference internal" href="BBDM.html">BBDM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Your_Diffusion_Model_is_Secretly_a_Zero_Shot_Classifier.html">Your Diffusion Model is Secretly a Zero-Shot Classifier</a></li>
+<li class="toctree-l1"><a class="reference internal" href="progressive_distillation.html">Progressive Distillation for Fast Sampling of Diffusion Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ConceptLab.html">ConceptLab</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Diffusion_models_already_have_a_Semantic_Latent_Space.html">Diffusion Models already have a Semantic Latent Space</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Muse.html">Muse</a></li>
+
+
+<li class="toctree-l1"><a class="reference internal" href="GIGAGAN.html">Scaling up GANs for Text-to-Image Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="consistency_models.html">Consistency Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="latent_consistency_models.html">Latent Consistency Models</a></li>
+
+<li class="toctree-l1"><a class="reference internal" href="LLM_grounded_Diffusion.html">LLM Grounded Diffusion</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DiT.html">DiT</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="one-step-image-translation.html">One-Step Image Translation with Text-to-Image Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="LCM-LoRA.html">LCM-LoRA: A Universal Stable-Diffusion Acceleration Module</a></li>
+
+
+<li class="toctree-l1"><a class="reference internal" href="MimicBrush.html">MimicBrush: Zero-shot Image Editing with Reference Imitation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="one_step_diffusion_with_distribution_matching_distillation.html">One-step Diffusion with Distribution Matching Distillation</a></li>
+
+
+
+
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Video Generation</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="Make_A_Video.html">Make A Video</a></li>
+<li class="toctree-l1"><a class="reference internal" href="VideoLDM.html">VideoLDM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="AnimateDiff.html">AnimateDiff</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Animate_Anyone.html">Animate Anyone</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreaMoving.html">DreaMoving</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreamPose.html">DreamPose: Fashion Image-to-Video Synthesis via Stable Diffusion</a></li>
+
+
+
+
+
+
+
+
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">3D Generation</span></p>
+<ul class="current nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="NeRF.html">NeRF : Representing Scenes as Neural Radiance Fields for View Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="3DGS.html">3D Gaussian Splatting for Real-Time Radiance Field Rendering</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Point_E.html">Point-E: A System for Generating 3D Point Clouds from Complex Prompts (Arxiv 2022)</a></li>
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Shap-E.html">Shap-E</a></li>
+
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DreamFusion.html"><strong>DreamFusion</strong></a></li>
+<li class="toctree-l1 current active"><a class="current reference internal" href="#">Magic3D</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreamBooth3D.html">Dream Booth 3D</a></li>
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="zero123plus.html">Zero123++</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ProlificDreamer.html">ProlificDreamer: High-Fidelity and Diverse Text-to-3D Generation with Variational Score Distillation</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DreamGaussian.html">DreamGaussian</a></li>
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Coin3D.html">Coin3D</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Experiments</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="../experiments/js_exp.html">Synthetic Data with Stable Diffusion for Foliar Disease Classification</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../experiments/swjo_exp.html">Training DreamBooth on Naver Webtoon Face Dataset</a></li>
+</ul>
+
+    </div>
+</nav></div>
+    </div>
+  
+  
+  <div class="sidebar-primary-items__end sidebar-primary__section">
+  </div>
+  
+  <div id="rtd-footer-container"></div>
+
+
+      </div>
+      
+      <main id="main-content" class="bd-main" role="main">
+        
+        
+
+<div class="sbt-scroll-pixel-helper"></div>
+
+          <div class="bd-content">
+            <div class="bd-article-container">
+              
+              <div class="bd-header-article d-print-none">
+<div class="header-article-items header-article__inner">
+  
+    <div class="header-article-items__start">
+      
+        <div class="header-article-item"><label class="sidebar-toggle primary-toggle btn btn-sm" for="__primary" title="Toggle primary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
+  <span class="fa-solid fa-bars"></span>
+</label></div>
+      
+    </div>
+  
+  
+    <div class="header-article-items__end">
+      
+        <div class="header-article-item">
+
+<div class="article-header-buttons">
+
+
+
+
+
+<div class="dropdown dropdown-source-buttons">
+  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Source repositories">
+    <i class="fab fa-github"></i>
+  </button>
+  <ul class="dropdown-menu">
+      
+      
+      
+      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion" target="_blank"
+   class="btn btn-sm btn-source-repository-button dropdown-item"
+   title="Source repository"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fab fa-github"></i>
+  </span>
+<span class="btn__text-container">Repository</span>
+</a>
+</li>
+      
+      
+      
+      
+      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion/issues/new?title=Issue%20on%20page%20%2Fdocs/review/magic-3d.html&body=Your%20issue%20content%20here." target="_blank"
+   class="btn btn-sm btn-source-issues-button dropdown-item"
+   title="Open an issue"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-lightbulb"></i>
+  </span>
+<span class="btn__text-container">Open issue</span>
+</a>
+</li>
+      
+  </ul>
+</div>
+
+
+
+
+
+
+<div class="dropdown dropdown-download-buttons">
+  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Download this page">
+    <i class="fas fa-download"></i>
+  </button>
+  <ul class="dropdown-menu">
+      
+      
+      
+      <li><a href="../../_sources/docs/review/magic-3d.md" target="_blank"
+   class="btn btn-sm btn-download-source-button dropdown-item"
+   title="Download source file"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-file"></i>
+  </span>
+<span class="btn__text-container">.md</span>
+</a>
+</li>
+      
+      
+      
+      
+      <li>
+<button onclick="window.print()"
+  class="btn btn-sm btn-download-pdf-button dropdown-item"
+  title="Print to PDF"
+  data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-file-pdf"></i>
+  </span>
+<span class="btn__text-container">.pdf</span>
+</button>
+</li>
+      
+  </ul>
+</div>
+
+
+
+
+<button onclick="toggleFullScreen()"
+  class="btn btn-sm btn-fullscreen-button"
+  title="Fullscreen mode"
+  data-bs-placement="bottom" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-expand"></i>
+  </span>
+
+</button>
+
+
+
+<script>
+document.write(`
+  <button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light"></i>
+    <i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark"></i>
+    <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"></i>
+  </button>
+`);
+</script>
+
+
+<script>
+document.write(`
+  <button class="btn btn-sm pst-navbar-icon search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <i class="fa-solid fa-magnifying-glass fa-lg"></i>
+  </button>
+`);
+</script>
+<label class="sidebar-toggle secondary-toggle btn btn-sm" for="__secondary"title="Toggle secondary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <span class="fa-solid fa-list"></span>
+</label>
+</div></div>
+      
+    </div>
+  
+</div>
+</div>
+              
+              
+
+<div id="jb-print-docs-body" class="onlyprint">
+    <h1>Magic3D</h1>
+    <!-- Table of contents -->
+    <div id="print-main-content">
+        <div id="jb-print-toc">
+            
+            <div>
+                <h2> Contents </h2>
+            </div>
+            <nav aria-label="Page">
+                <ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">1. Introduction</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#background-dreamfusion">3. Background: DreamFusion</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#high-resolution-3d-generation">4. High-Resolution 3D Generation</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#coarse-to-fine-diffusion-priors">4.1. Coarse-to-fine Diffusion Priors</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#scene-models">4.2. Scene Models</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#coarse-to-fine-optimization">4.3. Coarse-to-fine Optimization</a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#experiments">5. Experiments</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#controllable-3d-generation">6. Controllable 3D Generation</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#conclusion">7. Conclusion</a></li>
+</ul>
+            </nav>
+        </div>
+    </div>
+</div>
+
+              
+                
+<div id="searchbox"></div>
+                <article class="bd-article">
+                  
+  <div class="admonition-information admonition">
+<p class="admonition-title">Information</p>
+<ul class="simple">
+<li><p><strong>Title:</strong> Magic3D: High-Resolution Text-to-3D Content Creation (CVPR 2023)</p></li>
+<li><p><strong>Reference</strong></p>
+<ul>
+<li><p>Paper: <a class="reference external" href="https://arxiv.org/pdf/2211.10440">https://arxiv.org/pdf/2211.10440</a></p></li>
+</ul>
+</li>
+<li><p><strong>Author:</strong> Sangwoo Jo</p></li>
+<li><p><strong>Last updated on Sep. 24, 2024</strong></p></li>
+</ul>
+</div>
+<section class="tex2jax_ignore mathjax_ignore" id="magic3d">
+<h1>Magic3D<a class="headerlink" href="#magic3d" title="Permalink to this heading">#</a></h1>
+<section id="introduction">
+<h2>1. Introduction<a class="headerlink" href="#introduction" title="Permalink to this heading">#</a></h2>
+<p>Pre-trained 된 text-to-image diffusion model 을 활용하여 NeRF 를 optimize 하는 DreamFusion 모델의 두 가지 단점을 명시합니다.</p>
+<ul class="simple">
+<li><p>Extremely slow optimization of NeRF</p></li>
+<li><p>Low-resolution image (64x64) space supervision on NeRF, leading to low-quality 3D models with a long processing time (1.5 hours per prompt on average using TPUv4)</p></li>
+</ul>
+<p>따라서, 논문에서는 이러한 단점을 해결하기 위해 two-stage optimization framework 제시합니다. 첫번째 단계로 DreamFusion 과 동일하게 coarse 한 NeRF representation 을 optimize 하는데, hash grid 를 활용하여 memory 그리고 computationally efficient 하게 최적화합니다. 두번째 단계로 high resolution diffusion prior 를 기반으로 mesh representation 를 최적화합니다. 3D mesh 로 rasterizing 함으로써 graphics software 에 유동적으로 전환하여 사용할 수 있다는 장점이 있습니다.</p>
+<p>정리하자면, Magic3D 는 다음과 같은 contribution 을 제공합니다.</p>
+<ul class="simple">
+<li><p>Synthesizes 3D content with an 8× higher resolution supervision, is also
+2× faster than DreamFusion</p></li>
+<li><p>3D object editing</p></li>
+</ul>
+</section>
+<section id="background-dreamfusion">
+<h2>3. Background: DreamFusion<a class="headerlink" href="#background-dreamfusion" title="Permalink to this heading">#</a></h2>
+<p>DreamFusion 을 크게 두 가지 component 로 구성되어있다고 할 수 있습니다.</p>
+<ul>
+<li><p>Neural scene representation</p>
+<p>Volumetric renderer <span class="math notranslate nohighlight">\(g\)</span> 와 3D volume 을 나타내는 parameter <span class="math notranslate nohighlight">\(\theta\)</span> 를 입력받아 rendered image 를 생성하는 scene model <span class="math notranslate nohighlight">\(x=g(\theta)\)</span>  를 정의합니다. DreamFusion 에서는 scene model 로 Mip-NeRF 360 에서 shading model 를 추가하여 사용합니다.</p>
+</li>
+<li><p>Pre-trained text-to-image diffusion-based generative model <span class="math notranslate nohighlight">\(\phi\)</span></p>
+<p>DreamFusion 에서 diffusion model 로 Imagen 모델을 사용합니다.</p>
+</li>
+</ul>
+<p>이를 기반으로, 다음과 같은 Score Distillation Sampling (SDS) 을 통해 parameter <span class="math notranslate nohighlight">\(\theta\)</span> 를 update 합니다.</p>
+<figure class="align-default" id="id1">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/magic_3d_01.png"><img alt="magic_3d_01" class="bg-primary mb-1" src="../../_images/magic_3d_01.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 723 </span><span class="caption-text">Score Distillation Sampling</span><a class="headerlink" href="#id1" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</section>
+<section id="high-resolution-3d-generation">
+<h2>4. High-Resolution 3D Generation<a class="headerlink" href="#high-resolution-3d-generation" title="Permalink to this heading">#</a></h2>
+<p>Magic3D 에서 high-resolution text-to-3D synthesis 를 위한 two-stage coarse-to-fine framework 를 다음과 같이 소개합니다.</p>
+<figure class="align-default" id="id2">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/magic_3d_02.png"><img alt="magic_3d_02" class="bg-primary mb-1" src="../../_images/magic_3d_02.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 724 </span><span class="caption-text">Magic3D Framework</span><a class="headerlink" href="#id2" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<section id="coarse-to-fine-diffusion-priors">
+<h3>4.1. Coarse-to-fine Diffusion Priors<a class="headerlink" href="#coarse-to-fine-diffusion-priors" title="Permalink to this heading">#</a></h3>
+<ol class="arabic simple">
+<li><p>64x64 rendered image 에 대한 rendered loss 를 계산하기 위해 Imagen 과 유사한 eDiff-I 를 base diffusion 모델로 사용합니다.</p></li>
+<li><p>512x512 high resolution rendered image 를 기반으로 backpropagation 할 수 있도록 Stable Diffusion model 을 LDM 으로 사용합니다. 다음과 같이 SDS 를 계산하는 과정에서 <span class="math notranslate nohighlight">\(\partial{x}/\partial{\theta}\)</span> 와 <span class="math notranslate nohighlight">\(\partial{z}/\partial{x}\)</span> 를 계산하는데 시간이 다소 소요된다고 합니다.</p></li>
+</ol>
+<figure class="align-default" id="id3">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/magic_3d_03.png"><img alt="magic_3d_03" class="bg-primary mb-1" src="../../_images/magic_3d_03.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 725 </span><span class="caption-text">SDS in high resolution 512x512</span><a class="headerlink" href="#id3" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</section>
+<section id="scene-models">
+<h3>4.2. Scene Models<a class="headerlink" href="#scene-models" title="Permalink to this heading">#</a></h3>
+<p><strong>Neural fields as coarse scene models</strong></p>
+<p>기존에 DreamFusion 에서 scene model 로 사용하였던 Mip-NeRF 360 모델이 3D geometry 에 대한 정보를 scratch 로부터 담아내는데 유용하다는 것을 보였지만, MLP 네트워크를 통해 dense 하게 sampling 하여 rendering 하는 과정이 computation cost 가 높다고 말합니다.</p>
+<p>이를 해결하기 위한 방법으로 InstantNGP 에서 소개하는 hash grid encoding 을 사용한다고 합니다. Hash grid 를 활용하여 두 개의 single layer neural network 를 학습하는데, 하나는 albedo 와 density 그리고 나머지 하나는 normal 을 예측합니다. 또한, density-based voxel pruning (empty space 에 대한 처리) 과 octree-based ray sampling/rendering 기법을 활용하여 computation cost 를 줄였다고 합니다.</p>
+<p><strong>Textured meshes as fine scene models</strong></p>
+<p>Fine stage 에서도 high resolution image 를 기반으로 동일한 scene model (neural field) 을 학습하는 방식도 있지만, 아래 예시처럼 메모리와 연산적인 제한이 있어 좋은 성능을 내기가 어렵다고 합니다.</p>
+<p>따라서, Magic3D 에서는 textured 3D mesh 를 scene representation 으로 사용합니다. 더 자세하게는, 다음과 같은 tetrahedral grid <span class="math notranslate nohighlight">\((V_T,T)\)</span> 형태로 3D mesh 를 표현합니다. 이때, <span class="math notranslate nohighlight">\(V_T\)</span> 는 grid <span class="math notranslate nohighlight">\(T\)</span> 에 존재하는 vertices 를 의미하고, 각 vertex <span class="math notranslate nohighlight">\(v_i \in V_T \subset \mathbb{R}^3\)</span> 는 signed distance field (SDF) <span class="math notranslate nohighlight">\(s_i \in \mathbb{R}^3\)</span> 그리고 deformation <span class="math notranslate nohighlight">\(\Delta v_i \in \mathbb{R}^3\)</span> 값을 가집니다.</p>
+<p>이로부터 differentiable 한 marching tetrahedra 알고리즘을 통해 SDF 로부터 surface mesh 를 생성할 수 있으며, texture 에 대한 정보는 neural color field 로 정의할 수 있다고 합니다.</p>
+</section>
+<section id="coarse-to-fine-optimization">
+<h3>4.3. Coarse-to-fine Optimization<a class="headerlink" href="#coarse-to-fine-optimization" title="Permalink to this heading">#</a></h3>
+<p><strong>Neural field optimization</strong></p>
+<p>Instant NGP 와 동일하게 <span class="math notranslate nohighlight">\(256^3\)</span> resolution 의 occupancy grid 로 initialize 하고, 10 iterations 마다 grid 를 업데이트하며 empty space skipping 을 위한 octree 를 생성합니다. 매 업데이트마다 Instant NGP 와 동일한 파라미터 값을 설정하였다고 합니다.</p>
+<p>또한, DreamFusion 과 동일하게 background 를 표현하는 environment map MLP 를 사용하는데, 이때 Mip-NeRF 360 에서 사용하는 scene representation 을 사용할 수 없어, 모델이 background 로부터 object 에 대한 정보를 학습할 수 있어 이를 방지하기 위해 MLP 사이즈를 작게 하고 learning rate 를 10배 증가시켰다고 합니다.</p>
+<p><strong>Mesh optimization</strong></p>
+<p>Mesh 에 대한 optimization 을 진행하기 위해, 앞서 최적화한 coarse neural field 를 non-zero constant 를 차감함으로써 SDF 로 전환하고, texture field 는 coarse stage 에서 최적화된 color field 로 초기값을 설정합니다.</p>
+<p>최적화 단계를 진행할때, differentiable rasterizer 를 사용하여 surface mesh 를 rendering 하는 작업을 진행합니다. 각 vertex <span class="math notranslate nohighlight">\(v_i\)</span> 에 대해 앞서 정의한 high resolution 에서의 SDS gradient 를 통해 <span class="math notranslate nohighlight">\(s_i\)</span> 와 <span class="math notranslate nohighlight">\(\Delta v_i\)</span> 를 최적화하게 됩니다. 이때, rendering 하는 과정에서 각 pixel 에 해당하는 3D coordinate 를 추적하여 texture field 도 동시에 최적화합니다.</p>
+</section>
+</section>
+<section id="experiments">
+<h2>5. Experiments<a class="headerlink" href="#experiments" title="Permalink to this heading">#</a></h2>
+<p>DreamFusion 과 397 개의 text prompt 에 대한 성능을 비교합니다.</p>
+<p><strong>Speed evaluation</strong></p>
+<ul class="simple">
+<li><p>Coarse stage : 5000 iterations / 1024 samples / batch size 32 와 같은 설정으로 학습하였고, 하나의 객체를 생성하는데 8 NVIDIA A100 GPU 기준 15 분 소요된다고 합니다.</p></li>
+<li><p>Fine stage : 3000 iterations / batch size 32 와 같은 설정으로 학습하였고, 하나의 객체를 생성하는데 8 NVIDIA A100 GPU 기준 25 분 소요된다고 합니다.</p></li>
+</ul>
+<p><strong>Qualitative comparisons</strong></p>
+<p>3D 객체에서의 geometry 와 texture 에 대한 생성을 잘하는 부분을 확인할 수 있습니다.</p>
+<figure class="align-default" id="id4">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/magic_3d_04.png"><img alt="magic_3d_04" class="bg-primary mb-1" src="../../_images/magic_3d_04.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 726 </span><span class="caption-text">Qualitative comparisons</span><a class="headerlink" href="#id4" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p><strong>User studies</strong></p>
+<p>397 개의 text prompt 을 입력받아 생성한 Magic3D 와 DreamFusion 로 생성한 3D 객체들에 대해 설문조사해본 결과, 61.7% 의 유저들이 Magic3D 모델을 더 우세하게 평가하였습니다.</p>
+<figure class="align-default" id="id5">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/magic_3d_05.png"><img alt="magic_3d_05" class="bg-primary mb-1" src="../../_images/magic_3d_05.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 727 </span><span class="caption-text">User studies</span><a class="headerlink" href="#id5" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p><strong>Can single-stage optimization work with LDM prior?</strong></p>
+<p>LDM prior 를 활용한 single-stage optimization setup 으로 scene model 을 최적화할 시에 대한 ablation study 를 진행해본 결과, 성능이 좋지 않은 부분을 확인할 수 있었다고 합니다.</p>
+<figure class="align-default" id="id6">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/magic_3d_06.png"><img alt="magic_3d_06" class="bg-primary mb-1" src="../../_images/magic_3d_06.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 728 </span><span class="caption-text">Single-stage vs Coarse-to-fine</span><a class="headerlink" href="#id6" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p><strong>Can we use NeRF for the fine model?</strong></p>
+<p>NeRF 를 scratch 로부터 single-step 으로 최적화하는 것은 어렵지만, fine stage 에서 scene model 을 NeRF 로 변경하는 것은 가능합니다. 위 그림의 하단 4개 그림 중 좌측, 우측 사진이 각각 coarse stage 그리고 fine stage 에서 NeRF 를 학습한 결과입니다.</p>
+<p><strong>Coarse models vs. fine models</strong></p>
+<p>동일한 coarse model 에 대해서 NeRF 와 Mesh 모델을 모두 fine-tuning 한 결과, 모두 좋은 성능을 보이고 fine-tuned 된 mesh 모델이 특히 3D 객체 퀄리티를 실사적으로 잘 표현하는 것을 확인할 수 있습니다.</p>
+<figure class="align-default" id="id7">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/magic_3d_07.png"><img alt="magic_3d_07" class="bg-primary mb-1" src="../../_images/magic_3d_07.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 729 </span><span class="caption-text">Coarse models vs. fine models</span><a class="headerlink" href="#id7" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</section>
+<section id="controllable-3d-generation">
+<h2>6. Controllable 3D Generation<a class="headerlink" href="#controllable-3d-generation" title="Permalink to this heading">#</a></h2>
+<p><strong>Personalized text-to-3D</strong></p>
+<p>사전에 diffusion model (eDiff-I, LDM) 을 DreamBooth 을 통해 학습하고, unique identifier <span class="math notranslate nohighlight">\([V]\)</span> 와 함께 3D scene model 을 학습합니다. 아래 사진과 같이, subject 에 대한 정보를 유지한 채 3D model 을 잘 생성하는 부분을 확인할 수 있습니다.</p>
+<figure class="align-default" id="id8">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/magic_3d_08.png"><img alt="magic_3d_08" class="bg-primary mb-1" src="../../_images/magic_3d_08.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 730 </span><span class="caption-text">Controllable 3D Generation</span><a class="headerlink" href="#id8" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p><strong>Prompt-based editing through fine-tuning</strong></p>
+<p>다음과 같은 3단계로 prompt-based editing 으로 fine-tuning 을 진행합니다.</p>
+<ol class="arabic simple">
+<li><p>우선, coarse model 을 base prompt 로 학습합니다.</p></li>
+<li><p>Base prompt 를 수정한 후, coarse NeRF 모델을 학습하고 이와 LDM 을 기반으로 high resolution NeRF 모델을 만듭니다.</p></li>
+<li><p>마지막으로, NeRF 모델을 기반으로 high-resolution fine-tuning 을 진행합니다.</p></li>
+</ol>
+<figure class="align-default" id="id9">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/magic_3d_09.png"><img alt="magic_3d_09" class="bg-primary mb-1" src="../../_images/magic_3d_09.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 731 </span><span class="caption-text">Prompt-based editing through fine-tuning</span><a class="headerlink" href="#id9" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</section>
+<section id="conclusion">
+<h2>7. Conclusion<a class="headerlink" href="#conclusion" title="Permalink to this heading">#</a></h2>
+<p>정리하자면, 논문에서 Magic3D 의 coarse-to-fine optimization 방식을 소개하고, mesh 형태의 scene model 과 고해상도 이미지에 대한 diffusion prior 를 활용함으로써 high resolution 에 대해서도 좋은 성능을 보여줄 수 있었습니다. 추가적으로, 주어진 text prompt 에 대해 3D mesh model 을 40분 만에 생성이 가능하고, 그래픽 소프트웨어와 호환이 바로 가능하다는 장점이 있습니다.</p>
+</section>
+</section>
+
+    <script type="text/x-thebe-config">
+    {
+        requestKernel: true,
+        binderOptions: {
+            repo: "binder-examples/jupyter-stacks-datascience",
+            ref: "master",
+        },
+        codeMirrorConfig: {
+            theme: "abcdef",
+            mode: "python"
+        },
+        kernelOptions: {
+            name: "python3",
+            path: "./docs\review"
+        },
+        predefinedOutput: true
+    }
+    </script>
+    <script>kernelName = 'python3'</script>
+
+                </article>
+              
+
+              
+              
+              
+              
+                <footer class="prev-next-footer d-print-none">
+                  
+<div class="prev-next-area">
+    <a class="left-prev"
+       href="DreamFusion.html"
+       title="previous page">
+      <i class="fa-solid fa-angle-left"></i>
+      <div class="prev-next-info">
+        <p class="prev-next-subtitle">previous</p>
+        <p class="prev-next-title"><strong>DreamFusion</strong></p>
+      </div>
+    </a>
+    <a class="right-next"
+       href="DreamBooth3D.html"
+       title="next page">
+      <div class="prev-next-info">
+        <p class="prev-next-subtitle">next</p>
+        <p class="prev-next-title">Dream Booth 3D</p>
+      </div>
+      <i class="fa-solid fa-angle-right"></i>
+    </a>
+</div>
+                </footer>
+              
+            </div>
+            
+            
+              
+                <div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
+
+
+  <div class="sidebar-secondary-item">
+  <div class="page-toc tocsection onthispage">
+    <i class="fa-solid fa-list"></i> Contents
+  </div>
+  <nav class="bd-toc-nav page-toc">
+    <ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">1. Introduction</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#background-dreamfusion">3. Background: DreamFusion</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#high-resolution-3d-generation">4. High-Resolution 3D Generation</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#coarse-to-fine-diffusion-priors">4.1. Coarse-to-fine Diffusion Priors</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#scene-models">4.2. Scene Models</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#coarse-to-fine-optimization">4.3. Coarse-to-fine Optimization</a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#experiments">5. Experiments</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#controllable-3d-generation">6. Controllable 3D Generation</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#conclusion">7. Conclusion</a></li>
+</ul>
+  </nav></div>
+
+</div></div>
+              
+            
+          </div>
+          <footer class="bd-footer-content">
+            
+<div class="bd-footer-content__inner container">
+  
+  <div class="footer-item">
+    
+<p class="component-author">
+By PseudoLab
+</p>
+
+  </div>
+  
+  <div class="footer-item">
+    
+
+  <p class="copyright">
+    
+      © Copyright 2022.
+      <br/>
+    
+  </p>
+
+  </div>
+  
+  <div class="footer-item">
+    
+  </div>
+  
+  <div class="footer-item">
+    
+  </div>
+  
+</div>
+          </footer>
+        
+
+      </main>
+    </div>
+  </div>
+  
+  <!-- Scripts loaded after <body> so the DOM is not blocked -->
+  <script src="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b"></script>
+<script src="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b"></script>
+
+  <footer class="bd-footer">
+  </footer>
+  </body>
 </html>
\ No newline at end of file
diff --git a/docs/review/one-step-image-translation.html b/docs/review/one-step-image-translation.html
old mode 100644
new mode 100755
index b32dcaac..42b84d76
--- a/docs/review/one-step-image-translation.html
+++ b/docs/review/one-step-image-translation.html
@@ -1,998 +1,1018 @@
-
-
-<!DOCTYPE html>
-
-
-<html lang="en" data-content_root="" >
-
-  <head>
-    <meta charset="utf-8" />
-    <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.18.1: http://docutils.sourceforge.net/" />
-
-    <title>One-Step Image Translation with Text-to-Image Models &#8212; Text-to-Image Generation-feat-Diffusion</title>
-  
-  
-  
-  <script data-cfasync="false">
-    document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
-    document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
-  </script>
-  
-  <!-- Loaded before other Sphinx assets -->
-  <link href="../../_static/styles/theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-<link href="../../_static/styles/bootstrap.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-<link href="../../_static/styles/pydata-sphinx-theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-
-  
-  <link href="../../_static/vendor/fontawesome/6.5.2/css/all.min.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-  <link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.woff2" />
-<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.woff2" />
-<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.woff2" />
-
-    <link rel="stylesheet" type="text/css" href="../../_static/pygments.css" />
-    <link rel="stylesheet" href="../../_static/styles/sphinx-book-theme.css?digest=14f4ca6b54d191a8c7657f6c759bf11a5fb86285" type="text/css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/togglebutton.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/mystnb.4510f1fc1dee50b3e5859aac5469c37c29e427902b24a333a5f9fcb2f0b3ac41.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/sphinx-thebe.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/design-style.4045f2051d55cab465a707391d5b2007.min.css" />
-  
-  <!-- Pre-loaded scripts that we'll load fully later -->
-  <link rel="preload" as="script" href="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b" />
-<link rel="preload" as="script" href="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b" />
-  <script src="../../_static/vendor/fontawesome/6.5.2/js/all.min.js?digest=dfe6caa3a7d634c4db9b"></script>
-
-    <script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
-    <script src="../../_static/jquery.js"></script>
-    <script src="../../_static/underscore.js"></script>
-    <script src="../../_static/_sphinx_javascript_frameworks_compat.js"></script>
-    <script src="../../_static/doctools.js"></script>
-    <script src="../../_static/clipboard.min.js"></script>
-    <script src="../../_static/copybutton.js"></script>
-    <script src="../../_static/scripts/sphinx-book-theme.js?digest=5a5c038af52cf7bc1a1ec88eea08e6366ee68824"></script>
-    <script>let toggleHintShow = 'Click to show';</script>
-    <script>let toggleHintHide = 'Click to hide';</script>
-    <script>let toggleOpenOnPrint = 'true';</script>
-    <script src="../../_static/togglebutton.js"></script>
-    <script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
-    <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
-    <script src="../../_static/design-tabs.js"></script>
-    <script>const THEBE_JS_URL = "https://unpkg.com/thebe@0.8.2/lib/index.js"
-const thebe_selector = ".thebe,.cell"
-const thebe_selector_input = "pre"
-const thebe_selector_output = ".output, .cell_output"
-</script>
-    <script async="async" src="../../_static/sphinx-thebe.js"></script>
-    <script>window.MathJax = {"options": {"processHtmlClass": "tex2jax_process|mathjax_process|math|output_area"}}</script>
-    <script defer="defer" src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
-    <script>DOCUMENTATION_OPTIONS.pagename = 'docs/review/one-step-image-translation';</script>
-    <link rel="shortcut icon" href="../../_static/PseudoLab_logo.png"/>
-    <link rel="index" title="Index" href="../../genindex.html" />
-    <link rel="search" title="Search" href="../../search.html" />
-    <link rel="next" title="LCM-LoRA: A Universal Stable-Diffusion Acceleration Module" href="LCM-LoRA.html" />
-    <link rel="prev" title="DiT" href="DiT.html" />
-  <meta name="viewport" content="width=device-width, initial-scale=1"/>
-  <meta name="docsearch:language" content="en"/>
-  </head>
-  
-  
-  <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
-
-  
-  
-  <div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
-  
-  <div id="pst-scroll-pixel-helper"></div>
-  
-  <button type="button" class="btn rounded-pill" id="pst-back-to-top">
-    <i class="fa-solid fa-arrow-up"></i>Back to top</button>
-
-  
-  <input type="checkbox"
-          class="sidebar-toggle"
-          id="pst-primary-sidebar-checkbox"/>
-  <label class="overlay overlay-primary" for="pst-primary-sidebar-checkbox"></label>
-  
-  <input type="checkbox"
-          class="sidebar-toggle"
-          id="pst-secondary-sidebar-checkbox"/>
-  <label class="overlay overlay-secondary" for="pst-secondary-sidebar-checkbox"></label>
-  
-  <div class="search-button__wrapper">
-    <div class="search-button__overlay"></div>
-    <div class="search-button__search-container">
-<form class="bd-search d-flex align-items-center"
-      action="../../search.html"
-      method="get">
-  <i class="fa-solid fa-magnifying-glass"></i>
-  <input type="search"
-         class="form-control"
-         name="q"
-         id="search-input"
-         placeholder="Search this book..."
-         aria-label="Search this book..."
-         autocomplete="off"
-         autocorrect="off"
-         autocapitalize="off"
-         spellcheck="false"/>
-  <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
-</form></div>
-  </div>
-
-  <div class="pst-async-banner-revealer d-none">
-  <aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
-</div>
-
-  
-    <header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
-    </header>
-  
-
-  <div class="bd-container">
-    <div class="bd-container__inner bd-page-width">
-      
-      
-      
-      <div class="bd-sidebar-primary bd-sidebar">
-        
-
-  
-  <div class="sidebar-header-items sidebar-primary__section">
-    
-    
-    
-    
-  </div>
-  
-    <div class="sidebar-primary-items__start sidebar-primary__section">
-        <div class="sidebar-primary-item">
-
-  
-    
-  
-
-<a class="navbar-brand logo" href="../../intro.html">
-  
-  
-  
-  
-  
-    
-    
-      
-    
-    
-    <img src="../../_static/PseudoLab_logo.png" class="logo__image only-light" alt="Text-to-Image Generation-feat-Diffusion - Home"/>
-    <script>document.write(`<img src="../../_static/PseudoLab_logo.png" class="logo__image only-dark" alt="Text-to-Image Generation-feat-Diffusion - Home"/>`);</script>
-  
-  
-</a></div>
-        <div class="sidebar-primary-item"><nav class="bd-links" id="bd-docs-nav" aria-label="Main">
-    <div class="bd-toc-item navbar-nav active">
-        
-        <ul class="nav bd-sidenav bd-sidenav__home-link">
-            <li class="toctree-l1">
-                <a class="reference internal" href="../../intro.html">
-                    Welcome to PseudoDiffusers!!
-                </a>
-            </li>
-        </ul>
-        <p aria-level="2" class="caption" role="heading"><span class="caption-text">Preliminary Works</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="vae.html">VAE</a></li>
-<li class="toctree-l1"><a class="reference internal" href="gan.html">GAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DDPM.html">DDPM</a></li>
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="DDIM.html">DDIM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="A_Study_on_the_Evaluation_of_Generative_Models.html">A Study on the Evaluation of Generative Models</a></li>
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Image Generation</span></p>
-<ul class="current nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="cycleGAN.html">CycleGAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="StyleGAN.html">StyleGAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="diffusion_beats_GANs.html">Diffusion Models Beat GANs on Image Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="dalle.html">DALL-E</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DALLE2.html">DALL-E 2</a></li>
-<li class="toctree-l1"><a class="reference internal" href="dreambooth.html">DreamBooth</a></li>
-<li class="toctree-l1"><a class="reference internal" href="ControlNet.html">ControlNet</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Latent_Diffusion_Model.html">Introduction</a></li>
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="Textual_Inversion.html">Textual Inversion</a></li>
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="CustomDiffusion.html">Custom Diffusion</a></li>
-<li class="toctree-l1"><a class="reference internal" href="LoRA.html">LoRA</a></li>
-
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="I-DDPM.html">I-DDPM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="StyO.html">StyO</a></li>
-<li class="toctree-l1"><a class="reference internal" href="imagen.html">Imagen</a></li>
-<li class="toctree-l1"><a class="reference internal" href="imagen_editor.html">Imagen Editor</a></li>
-<li class="toctree-l1"><a class="reference internal" href="SDEdit.html">SDEdit</a></li>
-<li class="toctree-l1"><a class="reference internal" href="SDXL.html">SDXL</a></li>
-<li class="toctree-l1"><a class="reference internal" href="t2i_adapter.html">T2I-Adapter</a></li>
-<li class="toctree-l1"><a class="reference internal" href="IP_Adapter.html">IP-Adapter</a></li>
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="HyperDreamBooth.html">HyperDreamBooth</a></li>
-<li class="toctree-l1"><a class="reference internal" href="CM3leon.html">CM3leon</a></li>
-
-<li class="toctree-l1"><a class="reference internal" href="Synthetic_Data_from_Diffusion_Models_Improves_ImageNet_Classification.html">Synthetic Data from Diffusion Models Improves ImageNet Classification</a></li>
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="GLIDE.html">GLIDE</a></li>
-<li class="toctree-l1"><a class="reference internal" href="BBDM.html">BBDM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Your_Diffusion_Model_is_Secretly_a_Zero_Shot_Classifier.html">Your Diffusion Model is Secretly a Zero-Shot Classifier</a></li>
-<li class="toctree-l1"><a class="reference internal" href="progressive_distillation.html">Progressive Distillation for Fast Sampling of Diffusion Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="ConceptLab.html">ConceptLab</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Diffusion_models_already_have_a_Semantic_Latent_Space.html">Diffusion Models already have a Semantic Latent Space</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Muse.html">Muse</a></li>
-
-
-<li class="toctree-l1"><a class="reference internal" href="GIGAGAN.html">Scaling up GANs for Text-to-Image Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="consistency_models.html">Consistency Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="latent_consistency_models.html">Latent Consistency Models</a></li>
-
-<li class="toctree-l1"><a class="reference internal" href="LLM_grounded_Diffusion.html">LLM Grounded Diffusion</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DiT.html">DiT</a></li>
-
-
-
-
-
-
-<li class="toctree-l1 current active"><a class="current reference internal" href="#">One-Step Image Translation with Text-to-Image Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="LCM-LoRA.html">LCM-LoRA: A Universal Stable-Diffusion Acceleration Module</a></li>
-
-
-<li class="toctree-l1"><a class="reference internal" href="MimicBrush.html">MimicBrush: Zero-shot Image Editing with Reference Imitation</a></li>
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Video Generation</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="Make_A_Video.html">Make A Video</a></li>
-<li class="toctree-l1"><a class="reference internal" href="VideoLDM.html">VideoLDM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="AnimateDiff.html">AnimateDiff</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Animate_Anyone.html">Animate Anyone</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreaMoving.html">DreaMoving</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreamPose.html">DreamPose: Fashion Image-to-Video Synthesis via Stable Diffusion</a></li>
-
-
-
-
-
-
-
-
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">3D Generation</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="NeRF.html">NeRF : Representing Scenes as Neural Radiance Fields for View Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="3DGS.html">3D Gaussian Splatting for Real-Time Radiance Field Rendering</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Point_E.html">Point-E: A System for Generating 3D Point Clouds from Complex Prompts (Arxiv 2022)</a></li>
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="Shap-E.html">Shap-E</a></li>
-
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="DreamFusion.html"><strong>DreamFusion</strong></a></li>
-<li class="toctree-l1"><a class="reference internal" href="magic-3d.html">Magic3D</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreamBooth3D.html">Dream Booth 3D</a></li>
-
-
-
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Experiments</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="../experiments/js_exp.html">Synthetic Data with Stable Diffusion for Foliar Disease Classification</a></li>
-<li class="toctree-l1"><a class="reference internal" href="../experiments/swjo_exp.html">Training DreamBooth on Naver Webtoon Face Dataset</a></li>
-</ul>
-
-    </div>
-</nav></div>
-    </div>
-  
-  
-  <div class="sidebar-primary-items__end sidebar-primary__section">
-  </div>
-  
-  <div id="rtd-footer-container"></div>
-
-
-      </div>
-      
-      <main id="main-content" class="bd-main" role="main">
-        
-        
-
-<div class="sbt-scroll-pixel-helper"></div>
-
-          <div class="bd-content">
-            <div class="bd-article-container">
-              
-              <div class="bd-header-article d-print-none">
-<div class="header-article-items header-article__inner">
-  
-    <div class="header-article-items__start">
-      
-        <div class="header-article-item"><label class="sidebar-toggle primary-toggle btn btn-sm" for="__primary" title="Toggle primary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
-  <span class="fa-solid fa-bars"></span>
-</label></div>
-      
-    </div>
-  
-  
-    <div class="header-article-items__end">
-      
-        <div class="header-article-item">
-
-<div class="article-header-buttons">
-
-
-
-
-
-<div class="dropdown dropdown-source-buttons">
-  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Source repositories">
-    <i class="fab fa-github"></i>
-  </button>
-  <ul class="dropdown-menu">
-      
-      
-      
-      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion" target="_blank"
-   class="btn btn-sm btn-source-repository-button dropdown-item"
-   title="Source repository"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fab fa-github"></i>
-  </span>
-<span class="btn__text-container">Repository</span>
-</a>
-</li>
-      
-      
-      
-      
-      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion/issues/new?title=Issue%20on%20page%20%2Fdocs/review/one-step-image-translation.html&body=Your%20issue%20content%20here." target="_blank"
-   class="btn btn-sm btn-source-issues-button dropdown-item"
-   title="Open an issue"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-lightbulb"></i>
-  </span>
-<span class="btn__text-container">Open issue</span>
-</a>
-</li>
-      
-  </ul>
-</div>
-
-
-
-
-
-
-<div class="dropdown dropdown-download-buttons">
-  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Download this page">
-    <i class="fas fa-download"></i>
-  </button>
-  <ul class="dropdown-menu">
-      
-      
-      
-      <li><a href="../../_sources/docs/review/one-step-image-translation.md" target="_blank"
-   class="btn btn-sm btn-download-source-button dropdown-item"
-   title="Download source file"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-file"></i>
-  </span>
-<span class="btn__text-container">.md</span>
-</a>
-</li>
-      
-      
-      
-      
-      <li>
-<button onclick="window.print()"
-  class="btn btn-sm btn-download-pdf-button dropdown-item"
-  title="Print to PDF"
-  data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-file-pdf"></i>
-  </span>
-<span class="btn__text-container">.pdf</span>
-</button>
-</li>
-      
-  </ul>
-</div>
-
-
-
-
-<button onclick="toggleFullScreen()"
-  class="btn btn-sm btn-fullscreen-button"
-  title="Fullscreen mode"
-  data-bs-placement="bottom" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-expand"></i>
-  </span>
-
-</button>
-
-
-
-<script>
-document.write(`
-  <button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light"></i>
-    <i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark"></i>
-    <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"></i>
-  </button>
-`);
-</script>
-
-
-<script>
-document.write(`
-  <button class="btn btn-sm pst-navbar-icon search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <i class="fa-solid fa-magnifying-glass fa-lg"></i>
-  </button>
-`);
-</script>
-<label class="sidebar-toggle secondary-toggle btn btn-sm" for="__secondary"title="Toggle secondary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <span class="fa-solid fa-list"></span>
-</label>
-</div></div>
-      
-    </div>
-  
-</div>
-</div>
-              
-              
-
-<div id="jb-print-docs-body" class="onlyprint">
-    <h1>One-Step Image Translation with Text-to-Image Models</h1>
-    <!-- Table of contents -->
-    <div id="print-main-content">
-        <div id="jb-print-toc">
-            
-            <div>
-                <h2> Contents </h2>
-            </div>
-            <nav aria-label="Page">
-                <ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">1. Introduction</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#related-work">2. Related Work</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#method">3. Method</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#adding-conditional-input">3.1. Adding Conditional Input</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#preserving-input-details">3.2. Preserving Input Details</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#unpaired-training">3.3. Unpaired Training</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#extensions">3.4. Extensions</a></li>
-</ul>
-</li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#experiments">4. Experiments</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#comparison-to-unpaired-methods">4.1. Comparison to Unpaired Methods</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#ablation-study">4.2. Ablation Study</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#id1">4.3. Extensions</a></li>
-</ul>
-</li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#discussions-and-limitations">5. Discussions and Limitations</a></li>
-</ul>
-            </nav>
-        </div>
-    </div>
-</div>
-
-              
-                
-<div id="searchbox"></div>
-                <article class="bd-article">
-                  
-  <div class="admonition-information admonition">
-<p class="admonition-title">Information</p>
-<ul class="simple">
-<li><p><strong>Title:</strong> One-Step Image Translation with Text-to-Image Models</p></li>
-<li><p><strong>Reference</strong></p>
-<ul>
-<li><p>Paper: <a class="reference external" href="https://arxiv.org/pdf/2403.12036">https://arxiv.org/pdf/2403.12036</a></p></li>
-<li><p>Code: <a class="github reference external" href="https://github.com/GaParmar/img2img-turbo">GaParmar/img2img-turbo</a></p></li>
-</ul>
-</li>
-<li><p><strong>Author:</strong> Sangwoo Jo</p></li>
-<li><p><strong>Last updated on Sep. 24, 2024</strong></p></li>
-</ul>
-</div>
-<section class="tex2jax_ignore mathjax_ignore" id="one-step-image-translation-with-text-to-image-models">
-<h1>One-Step Image Translation with Text-to-Image Models<a class="headerlink" href="#one-step-image-translation-with-text-to-image-models" title="Permalink to this heading">#</a></h1>
-<section id="introduction">
-<h2>1. Introduction<a class="headerlink" href="#introduction" title="Permalink to this heading">#</a></h2>
-<p>논문에서 기존의 conditional diffusion model 에 대해 다음과 같이 1) slow inference time, 2) paired data 에 대한 의존성 두 가지 한계점을 명시합니다. 이를 보완하기 위해, paired setting 과 unpaired setting 에서 모두 적용 가능한 학습 아키텍쳐를 제시합니다.</p>
-<p>기존에 adapter 를 추가하는 방식은 one-step diffusion model 에 적합하지 않다고 설명하고, 또한 SD-Turbo 모델의 Encoder-UNet-Decoder 형태의 multi-stage pipeline 에서 이미지의 많은 시각적 디테일이 손실된다고 주장합니다. 그리고 이러한 정보 손실은 입력 이미지가 실제 이미지일 때 특히 더 치명적이라고 합니다.</p>
-<p>이를 보완하기 위해, 논문에서는 첫번째로 input image 를 직접 noise encoder 에 입력함으로써 noise map 과 input control 의 충돌을 방지합니다. 두번째로, Encoder-Unet-Decoder 를 하나의 end-to-end 로 학습 가능한 아키텍처를 정의하고, 이를 위해 여러 LoRA adapter 를 정의합니다. 마지막으로, high-frequency detail 을 담아내기 위해 encoder 와 decoder 간의 skip connection 을 추가합니다.</p>
-<figure class="align-default" id="id2">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/translation_turbo_01.png"><img alt="translation_turbo_01" class="bg-primary mb-1" src="../../_images/translation_turbo_01.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 540 </span><span class="caption-text">Overall Architecture</span><a class="headerlink" href="#id2" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul>
-<li><p><strong>CycleGAN-Turbo 코드</strong></p>
-<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="k">class</span> <span class="nc">CycleGAN_Turbo</span><span class="p">(</span><span class="n">torch</span><span class="o">.</span><span class="n">nn</span><span class="o">.</span><span class="n">Module</span><span class="p">):</span>
-    <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">pretrained_name</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">pretrained_path</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">ckpt_folder</span><span class="o">=</span><span class="s2">&quot;checkpoints&quot;</span><span class="p">,</span> <span class="n">lora_rank_unet</span><span class="o">=</span><span class="mi">8</span><span class="p">,</span> <span class="n">lora_rank_vae</span><span class="o">=</span><span class="mi">4</span><span class="p">):</span>
-        <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span>
-        <span class="bp">self</span><span class="o">.</span><span class="n">tokenizer</span> <span class="o">=</span> <span class="n">AutoTokenizer</span><span class="o">.</span><span class="n">from_pretrained</span><span class="p">(</span><span class="s2">&quot;stabilityai/sd-turbo&quot;</span><span class="p">,</span> <span class="n">subfolder</span><span class="o">=</span><span class="s2">&quot;tokenizer&quot;</span><span class="p">)</span>
-        <span class="bp">self</span><span class="o">.</span><span class="n">text_encoder</span> <span class="o">=</span> <span class="n">CLIPTextModel</span><span class="o">.</span><span class="n">from_pretrained</span><span class="p">(</span><span class="s2">&quot;stabilityai/sd-turbo&quot;</span><span class="p">,</span> <span class="n">subfolder</span><span class="o">=</span><span class="s2">&quot;text_encoder&quot;</span><span class="p">)</span><span class="o">.</span><span class="n">cuda</span><span class="p">()</span>
-        <span class="bp">self</span><span class="o">.</span><span class="n">sched</span> <span class="o">=</span> <span class="n">make_1step_sched</span><span class="p">()</span>
-        <span class="n">vae</span> <span class="o">=</span> <span class="n">AutoencoderKL</span><span class="o">.</span><span class="n">from_pretrained</span><span class="p">(</span><span class="s2">&quot;stabilityai/sd-turbo&quot;</span><span class="p">,</span> <span class="n">subfolder</span><span class="o">=</span><span class="s2">&quot;vae&quot;</span><span class="p">)</span>
-        <span class="n">unet</span> <span class="o">=</span> <span class="n">UNet2DConditionModel</span><span class="o">.</span><span class="n">from_pretrained</span><span class="p">(</span><span class="s2">&quot;stabilityai/sd-turbo&quot;</span><span class="p">,</span> <span class="n">subfolder</span><span class="o">=</span><span class="s2">&quot;unet&quot;</span><span class="p">)</span>
-        <span class="n">vae</span><span class="o">.</span><span class="n">encoder</span><span class="o">.</span><span class="n">forward</span> <span class="o">=</span> <span class="n">my_vae_encoder_fwd</span><span class="o">.</span><span class="fm">__get__</span><span class="p">(</span><span class="n">vae</span><span class="o">.</span><span class="n">encoder</span><span class="p">,</span> <span class="n">vae</span><span class="o">.</span><span class="n">encoder</span><span class="o">.</span><span class="vm">__class__</span><span class="p">)</span>
-        <span class="n">vae</span><span class="o">.</span><span class="n">decoder</span><span class="o">.</span><span class="n">forward</span> <span class="o">=</span> <span class="n">my_vae_decoder_fwd</span><span class="o">.</span><span class="fm">__get__</span><span class="p">(</span><span class="n">vae</span><span class="o">.</span><span class="n">decoder</span><span class="p">,</span> <span class="n">vae</span><span class="o">.</span><span class="n">decoder</span><span class="o">.</span><span class="vm">__class__</span><span class="p">)</span>
-        <span class="c1"># add the skip connection convs</span>
-        <span class="n">vae</span><span class="o">.</span><span class="n">decoder</span><span class="o">.</span><span class="n">skip_conv_1</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">nn</span><span class="o">.</span><span class="n">Conv2d</span><span class="p">(</span><span class="mi">512</span><span class="p">,</span> <span class="mi">512</span><span class="p">,</span> <span class="n">kernel_size</span><span class="o">=</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">),</span> <span class="n">stride</span><span class="o">=</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">),</span> <span class="n">bias</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span><span class="o">.</span><span class="n">cuda</span><span class="p">()</span>
-        <span class="n">vae</span><span class="o">.</span><span class="n">decoder</span><span class="o">.</span><span class="n">skip_conv_2</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">nn</span><span class="o">.</span><span class="n">Conv2d</span><span class="p">(</span><span class="mi">256</span><span class="p">,</span> <span class="mi">512</span><span class="p">,</span> <span class="n">kernel_size</span><span class="o">=</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">),</span> <span class="n">stride</span><span class="o">=</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">),</span> <span class="n">bias</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span><span class="o">.</span><span class="n">cuda</span><span class="p">()</span>
-        <span class="n">vae</span><span class="o">.</span><span class="n">decoder</span><span class="o">.</span><span class="n">skip_conv_3</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">nn</span><span class="o">.</span><span class="n">Conv2d</span><span class="p">(</span><span class="mi">128</span><span class="p">,</span> <span class="mi">512</span><span class="p">,</span> <span class="n">kernel_size</span><span class="o">=</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">),</span> <span class="n">stride</span><span class="o">=</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">),</span> <span class="n">bias</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span><span class="o">.</span><span class="n">cuda</span><span class="p">()</span>
-        <span class="n">vae</span><span class="o">.</span><span class="n">decoder</span><span class="o">.</span><span class="n">skip_conv_4</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">nn</span><span class="o">.</span><span class="n">Conv2d</span><span class="p">(</span><span class="mi">128</span><span class="p">,</span> <span class="mi">256</span><span class="p">,</span> <span class="n">kernel_size</span><span class="o">=</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">),</span> <span class="n">stride</span><span class="o">=</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">),</span> <span class="n">bias</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span><span class="o">.</span><span class="n">cuda</span><span class="p">()</span>
-        <span class="n">vae</span><span class="o">.</span><span class="n">decoder</span><span class="o">.</span><span class="n">ignore_skip</span> <span class="o">=</span> <span class="kc">False</span>
-        <span class="bp">self</span><span class="o">.</span><span class="n">unet</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">vae</span> <span class="o">=</span> <span class="n">unet</span><span class="p">,</span> <span class="n">vae</span>
-</pre></div>
-</div>
-</li>
-</ul>
-<p>또한, 해당 논문에서 제시하는 아케텍쳐는 CycleGAN, pix2pix 등의 GAN 기반의 모델에 plug-and-play 형태로도 적용 가능하며, CycleGAN-Turbo, pix2pix-Turbo 모델이 기존의 GAN 기반 그리고 diffusion model 기반 모델의 image translation 성능보다 우수하다고 합니다.</p>
-</section>
-<section id="related-work">
-<h2>2. Related Work<a class="headerlink" href="#related-work" title="Permalink to this heading">#</a></h2>
-<p>기존에 paired dataset 에서의 image-to-image translation 모델들은 대표적으로 reconstruction loss 와 adversarial loss 를 기반으로 학습되었습니다. 최근에는 GLIGEN, T2I-Adapter, ControlNet 와 같이 adapter 를 통한 input conditioning 하는 방식도 소개되었습니다. 그리고 unpaired dataset 에서 diffusion 모델을 활용하여 image translation 하는 사례들도 있지만, 이들은 새로 보이는 domain 마다 scratch 로부터 모델을 학습해야 하는 치명적인 단점이 있습니다.</p>
-<p>SDEdit 와 Prompt-to-Prompt 모델과 같이 zero-shot 으로 이미지를 editing 하는 연구 사례들도 있지만, 이들은 multi-object 와 complex scene setting 에서 약점을 보인다고 합니다.</p>
-<p>이와 동시에 diffusion model 의 inference time 을 단축하기 위해서 ODE solver 혹은 knowledge distillation 을 활용한 연구 사례들도 있었습니다.</p>
-</section>
-<section id="method">
-<h2>3. Method<a class="headerlink" href="#method" title="Permalink to this heading">#</a></h2>
-<section id="adding-conditional-input">
-<h3>3.1. Adding Conditional Input<a class="headerlink" href="#adding-conditional-input" title="Permalink to this heading">#</a></h3>
-<p>SD-Turbo 와 같이 pretrained 된 one-step text-to-image 모델을 기반으로 input image 에 대한 conditioning 을 하는 방법을 다음과 같이 처음에 제안합니다. 아래 사진과 같이, 사전학습된 stable diffusion encoder 의 가중치 혹은 랜덤한 가중치를 가진 adapter 를 새로 정의하여 input image 에 대한 feature map 을 추출합니다.</p>
-<figure class="align-default" id="id3">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/translation_turbo_02.png"><img alt="translation_turbo_02" class="bg-primary mb-1" src="../../_images/translation_turbo_02.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 541 </span><span class="caption-text">Adding Conditional Input</span><a class="headerlink" href="#id3" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>하지만 multi-step diffusion model 과 다르게 single-step 만으로도 noise map 이 생성되는 이미지의 전체적인 layout 을 결정하기 때문에, condition encoder 를 거쳐서 나온 feature map 와의 충돌이 생겨 학습에 어려움이 생기는 현상을 보여준다고 합니다.</p>
-<p>따라서, adapter 를 추가적으로 정의하는 방식이 아닌 conditioning input 을 network 에 직접 적용하는 방식을 논문에서 제안합니다.</p>
-</section>
-<section id="preserving-input-details">
-<h3>3.2. Preserving Input Details<a class="headerlink" href="#preserving-input-details" title="Permalink to this heading">#</a></h3>
-<p>Latent Diffusion Model (LDM) 이 image 를 encoding 하는 과정에서 차원을 8 배 축소하기 때문에 정보에 대한 손실이 크고, 따라서 이러한 방식은 fine detail 에 민감한 image translation task 에 적합하지 않을 수 있다고 주장합니다. 아래 사진을 보시면, 기존 아키텍쳐에서 skip connection 을 추가하기 전후로 원본 이미지에 대한 detail preservation 정도 차이를 확인할 수 있습니다.</p>
-<figure class="align-default" id="id4">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/translation_turbo_03.png"><img alt="translation_turbo_03" class="bg-primary mb-1" src="../../_images/translation_turbo_03.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 542 </span><span class="caption-text">Preserving Input Details</span><a class="headerlink" href="#id4" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>더 자세하게는, skip connection 을 encoder 내의 각 downsampling layer 를 거쳐 4개의 activation 을 추출하고 이를 1 x 1 zero-convolution layer 를 통과시켜 decoder 에 대응되는 upsampling block 에 입력시킵니다.</p>
-</section>
-<section id="unpaired-training">
-<h3>3.3. Unpaired Training<a class="headerlink" href="#unpaired-training" title="Permalink to this heading">#</a></h3>
-<p>논문에서는 SD-Turbo (v2.1) 를 base network 로 사용하고, 변형된 CycleGAN objective 를 적용하여 unpaired translation 을 진행하였습니다. 이때, cycle-consistency loss 와 adversarial loss 를 다음과 같이 정의합니다.</p>
-<figure class="align-default" id="id5">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/translation_turbo_04.png"><img alt="translation_turbo_04" class="bg-primary mb-1" src="../../_images/translation_turbo_04.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 543 </span><span class="caption-text">Cycle-Consistency Loss</span><a class="headerlink" href="#id5" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<figure class="align-default" id="id6">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/translation_turbo_05.png"><img alt="translation_turbo_05" class="bg-primary mb-1" src="../../_images/translation_turbo_05.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 544 </span><span class="caption-text">Adversarial Loss</span><a class="headerlink" href="#id6" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>여기서 <span class="math notranslate nohighlight">\(X \subset \mathbb{R}^{H \times W \times 3}\)</span>, <span class="math notranslate nohighlight">\(Y \subset \mathbb{R}^{H \times W \times 3}\)</span> 는 각각 source domain, target domain, 그리고 <span class="math notranslate nohighlight">\(G(x, c_Y): X → Y\)</span>, <span class="math notranslate nohighlight">\(G(y, c_X):
-Y → X\)</span> 는 translation 함수입니다. 두 translation 모두 동일한 generator <span class="math notranslate nohighlight">\(G\)</span> 를 사용하며, caption <span class="math notranslate nohighlight">\(c_X,c_Y\)</span> 만 task 에 따라 변형하게 됩니다. (i.e., day → night translation task 에서 <span class="math notranslate nohighlight">\(c_X\)</span> 는 “Driving in the day” 그리고 <span class="math notranslate nohighlight">\(c_Y\)</span> 는 “Driving in the night”)</p>
-<p>대부분의 layer 는 고정시킨 상태에서 U-Net 의 첫번째 convolutional layer 와 LoRA adapter 를 학습시켰다고 합니다.</p>
-<figure class="align-default" id="id7">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/translation_turbo_01.png"><img alt="translation_turbo_01" class="bg-primary mb-1" src="../../_images/translation_turbo_01.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 545 </span><span class="caption-text">Overall Architecture</span><a class="headerlink" href="#id7" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>또한, 새롭게 소개되는 <span class="math notranslate nohighlight">\(L_{rec}\)</span> 손실함수는 <span class="math notranslate nohighlight">\(L_1\)</span> 과 <span class="math notranslate nohighlight">\(LPIPS\)</span> 의 조합으로 구성되어있습니다. 그리고 adversarial loss 에서 사용되는 discriminator <span class="math notranslate nohighlight">\(D_X(x),D_Y(y)\)</span> 는 CLIP 모델을 backbone 으로 사용하였습니다.</p>
-<p>마지막으로, identity regularization loss 도 <span class="math notranslate nohighlight">\(L_{idt} = E_y [L_{rec}(G(y, c_Y ), y)] + E_x [L_{rec}(G(x, c_X), x)]\)</span> 와 같이 정의하고, 최종 objective 를 이들의 가중치 합으로 정의합니다.</p>
-<figure class="align-default" id="id8">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/translation_turbo_06.png"><img alt="translation_turbo_06" class="bg-primary mb-1" src="../../_images/translation_turbo_06.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 546 </span><span class="caption-text">Full objective</span><a class="headerlink" href="#id8" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</section>
-<section id="extensions">
-<h3>3.4. Extensions<a class="headerlink" href="#extensions" title="Permalink to this heading">#</a></h3>
-<p>해당 논문에서 unpaired training setting 뿐만 아니라 paired training 그리고 stochastic output generation 에 대해서도 실험을 진행하였습니다.</p>
-<p><strong>Paired Training</strong></p>
-<p>Paired setting 에서는 single translation function <span class="math notranslate nohighlight">\(G(x,c): X \rightarrow Y\)</span> 를 학습하고, objective 는 (1)  perceptual loss 와 pixel-space reconstruction loss 로 구성된 reconstruction loss (2) unpaired setting 에서 target domain 에 대해서만 정의된 GAN loss, 그리고 (3) CLIP text-image alignment loss <span class="math notranslate nohighlight">\(L_{CLIP}\)</span> 의 가중치 합으로 정의합니다.</p>
-<p><strong>Generating Diverse Outputs</strong></p>
-<p>One-step model 로 diverse 한 output 을 생성하는 것은 어려운 일입니다. 논문에서는 해당 task 를 수행하기 위해 input image <span class="math notranslate nohighlight">\(x\)</span>, noise map <span class="math notranslate nohighlight">\(z\)</span>, 그리고 interpolation coefficient <span class="math notranslate nohighlight">\(\gamma\)</span> 를 입력받는 <span class="math notranslate nohighlight">\(G(x,z,\gamma)\)</span> 를 정의합니다.</p>
-<p><span class="math notranslate nohighlight">\(G(x,z,\gamma)\)</span> 는 우선 noise <span class="math notranslate nohighlight">\(z\)</span> 와 encoder output <span class="math notranslate nohighlight">\(G_{enc}(x)\)</span> 를 다음과 같이 interpolation 합니다: <span class="math notranslate nohighlight">\(\gamma G_{enc}(x) + (1 − \gamma) z\)</span>. 그리고 LoRA adapter 가중치와 skip connection 출력값을 <span class="math notranslate nohighlight">\(\theta = \theta_0 + \gamma \cdot \Delta \theta\)</span> 와 같이 조정합니다. 여기서 <span class="math notranslate nohighlight">\(\theta_0\)</span> 는 원래 가중치이고, <span class="math notranslate nohighlight">\(\Delta \theta\)</span> 는 새로 추가된 가중치입니다.</p>
-<figure class="align-default" id="id9">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/translation_turbo_07.png"><img alt="translation_turbo_07" class="bg-primary mb-1" src="../../_images/translation_turbo_07.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 547 </span><span class="caption-text">Reconstruction loss with interpolation coefficient</span><a class="headerlink" href="#id9" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>해당 objective 로 interpolation coefficient <span class="math notranslate nohighlight">\(\gamma\)</span> 를 변화시키면서 모델을 fine-tuning 함으로써 다양한 output 을 생성할 수 있었다고 합니다.</p>
-</section>
-</section>
-<section id="experiments">
-<h2>4. Experiments<a class="headerlink" href="#experiments" title="Permalink to this heading">#</a></h2>
-<p><strong>Training Details</strong></p>
-<p>학습하는 파라미터: LoRA 가중치, zero-convolutional layer, 그리고 첫번째 U-Net convolutional layer 가 330MB 소요됩니다.</p>
-<ul>
-<li><p><strong>UNet initialization 코드</strong></p>
-<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="k">def</span> <span class="nf">initialize_unet</span><span class="p">(</span><span class="n">rank</span><span class="p">,</span> <span class="n">return_lora_module_names</span><span class="o">=</span><span class="kc">False</span><span class="p">):</span>
-    <span class="n">unet</span> <span class="o">=</span> <span class="n">UNet2DConditionModel</span><span class="o">.</span><span class="n">from_pretrained</span><span class="p">(</span><span class="s2">&quot;stabilityai/sd-turbo&quot;</span><span class="p">,</span> <span class="n">subfolder</span><span class="o">=</span><span class="s2">&quot;unet&quot;</span><span class="p">)</span>
-    <span class="n">unet</span><span class="o">.</span><span class="n">requires_grad_</span><span class="p">(</span><span class="kc">False</span><span class="p">)</span>
-    <span class="n">unet</span><span class="o">.</span><span class="n">train</span><span class="p">()</span>
-    <span class="n">l_target_modules_encoder</span><span class="p">,</span> <span class="n">l_target_modules_decoder</span><span class="p">,</span> <span class="n">l_modules_others</span> <span class="o">=</span> <span class="p">[],</span> <span class="p">[],</span> <span class="p">[]</span>
-    <span class="n">l_grep</span> <span class="o">=</span> <span class="p">[</span><span class="s2">&quot;to_k&quot;</span><span class="p">,</span> <span class="s2">&quot;to_q&quot;</span><span class="p">,</span> <span class="s2">&quot;to_v&quot;</span><span class="p">,</span> <span class="s2">&quot;to_out.0&quot;</span><span class="p">,</span> <span class="s2">&quot;conv&quot;</span><span class="p">,</span> <span class="s2">&quot;conv1&quot;</span><span class="p">,</span> <span class="s2">&quot;conv2&quot;</span><span class="p">,</span> <span class="s2">&quot;conv_in&quot;</span><span class="p">,</span> <span class="s2">&quot;conv_shortcut&quot;</span><span class="p">,</span> <span class="s2">&quot;conv_out&quot;</span><span class="p">,</span> <span class="s2">&quot;proj_out&quot;</span><span class="p">,</span> <span class="s2">&quot;proj_in&quot;</span><span class="p">,</span> <span class="s2">&quot;ff.net.2&quot;</span><span class="p">,</span> <span class="s2">&quot;ff.net.0.proj&quot;</span><span class="p">]</span>
-    <span class="k">for</span> <span class="n">n</span><span class="p">,</span> <span class="n">p</span> <span class="ow">in</span> <span class="n">unet</span><span class="o">.</span><span class="n">named_parameters</span><span class="p">():</span>
-        <span class="k">if</span> <span class="s2">&quot;bias&quot;</span> <span class="ow">in</span> <span class="n">n</span> <span class="ow">or</span> <span class="s2">&quot;norm&quot;</span> <span class="ow">in</span> <span class="n">n</span><span class="p">:</span> <span class="k">continue</span>
-        <span class="k">for</span> <span class="n">pattern</span> <span class="ow">in</span> <span class="n">l_grep</span><span class="p">:</span>
-            <span class="k">if</span> <span class="n">pattern</span> <span class="ow">in</span> <span class="n">n</span> <span class="ow">and</span> <span class="p">(</span><span class="s2">&quot;down_blocks&quot;</span> <span class="ow">in</span> <span class="n">n</span> <span class="ow">or</span> <span class="s2">&quot;conv_in&quot;</span> <span class="ow">in</span> <span class="n">n</span><span class="p">):</span>
-                <span class="n">l_target_modules_encoder</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">n</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s2">&quot;.weight&quot;</span><span class="p">,</span><span class="s2">&quot;&quot;</span><span class="p">))</span>
-                <span class="k">break</span>
-            <span class="k">elif</span> <span class="n">pattern</span> <span class="ow">in</span> <span class="n">n</span> <span class="ow">and</span> <span class="s2">&quot;up_blocks&quot;</span> <span class="ow">in</span> <span class="n">n</span><span class="p">:</span>
-                <span class="n">l_target_modules_decoder</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">n</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s2">&quot;.weight&quot;</span><span class="p">,</span><span class="s2">&quot;&quot;</span><span class="p">))</span>
-                <span class="k">break</span>
-            <span class="k">elif</span> <span class="n">pattern</span> <span class="ow">in</span> <span class="n">n</span><span class="p">:</span>
-                <span class="n">l_modules_others</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">n</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s2">&quot;.weight&quot;</span><span class="p">,</span><span class="s2">&quot;&quot;</span><span class="p">))</span>
-                <span class="k">break</span>
-    <span class="n">lora_conf_encoder</span> <span class="o">=</span> <span class="n">LoraConfig</span><span class="p">(</span><span class="n">r</span><span class="o">=</span><span class="n">rank</span><span class="p">,</span> <span class="n">init_lora_weights</span><span class="o">=</span><span class="s2">&quot;gaussian&quot;</span><span class="p">,</span><span class="n">target_modules</span><span class="o">=</span><span class="n">l_target_modules_encoder</span><span class="p">,</span> <span class="n">lora_alpha</span><span class="o">=</span><span class="n">rank</span><span class="p">)</span>
-    <span class="n">lora_conf_decoder</span> <span class="o">=</span> <span class="n">LoraConfig</span><span class="p">(</span><span class="n">r</span><span class="o">=</span><span class="n">rank</span><span class="p">,</span> <span class="n">init_lora_weights</span><span class="o">=</span><span class="s2">&quot;gaussian&quot;</span><span class="p">,</span><span class="n">target_modules</span><span class="o">=</span><span class="n">l_target_modules_decoder</span><span class="p">,</span> <span class="n">lora_alpha</span><span class="o">=</span><span class="n">rank</span><span class="p">)</span>
-    <span class="n">lora_conf_others</span> <span class="o">=</span> <span class="n">LoraConfig</span><span class="p">(</span><span class="n">r</span><span class="o">=</span><span class="n">rank</span><span class="p">,</span> <span class="n">init_lora_weights</span><span class="o">=</span><span class="s2">&quot;gaussian&quot;</span><span class="p">,</span><span class="n">target_modules</span><span class="o">=</span><span class="n">l_modules_others</span><span class="p">,</span> <span class="n">lora_alpha</span><span class="o">=</span><span class="n">rank</span><span class="p">)</span>
-    <span class="n">unet</span><span class="o">.</span><span class="n">add_adapter</span><span class="p">(</span><span class="n">lora_conf_encoder</span><span class="p">,</span> <span class="n">adapter_name</span><span class="o">=</span><span class="s2">&quot;default_encoder&quot;</span><span class="p">)</span>
-    <span class="n">unet</span><span class="o">.</span><span class="n">add_adapter</span><span class="p">(</span><span class="n">lora_conf_decoder</span><span class="p">,</span> <span class="n">adapter_name</span><span class="o">=</span><span class="s2">&quot;default_decoder&quot;</span><span class="p">)</span>
-    <span class="n">unet</span><span class="o">.</span><span class="n">add_adapter</span><span class="p">(</span><span class="n">lora_conf_others</span><span class="p">,</span> <span class="n">adapter_name</span><span class="o">=</span><span class="s2">&quot;default_others&quot;</span><span class="p">)</span>
-    <span class="n">unet</span><span class="o">.</span><span class="n">set_adapters</span><span class="p">([</span><span class="s2">&quot;default_encoder&quot;</span><span class="p">,</span> <span class="s2">&quot;default_decoder&quot;</span><span class="p">,</span> <span class="s2">&quot;default_others&quot;</span><span class="p">])</span>
-    <span class="k">if</span> <span class="n">return_lora_module_names</span><span class="p">:</span>
-        <span class="k">return</span> <span class="n">unet</span><span class="p">,</span> <span class="n">l_target_modules_encoder</span><span class="p">,</span> <span class="n">l_target_modules_decoder</span><span class="p">,</span> <span class="n">l_modules_others</span>
-    <span class="k">else</span><span class="p">:</span>
-        <span class="k">return</span> <span class="n">unet</span>
-</pre></div>
-</div>
-</li>
-<li><p><strong>VAE initialization 코드</strong></p>
-<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="k">def</span> <span class="nf">initialize_vae</span><span class="p">(</span><span class="n">rank</span><span class="o">=</span><span class="mi">4</span><span class="p">,</span> <span class="n">return_lora_module_names</span><span class="o">=</span><span class="kc">False</span><span class="p">):</span>
-    <span class="n">vae</span> <span class="o">=</span> <span class="n">AutoencoderKL</span><span class="o">.</span><span class="n">from_pretrained</span><span class="p">(</span><span class="s2">&quot;stabilityai/sd-turbo&quot;</span><span class="p">,</span> <span class="n">subfolder</span><span class="o">=</span><span class="s2">&quot;vae&quot;</span><span class="p">)</span>
-    <span class="n">vae</span><span class="o">.</span><span class="n">requires_grad_</span><span class="p">(</span><span class="kc">False</span><span class="p">)</span>
-    <span class="n">vae</span><span class="o">.</span><span class="n">encoder</span><span class="o">.</span><span class="n">forward</span> <span class="o">=</span> <span class="n">my_vae_encoder_fwd</span><span class="o">.</span><span class="fm">__get__</span><span class="p">(</span><span class="n">vae</span><span class="o">.</span><span class="n">encoder</span><span class="p">,</span> <span class="n">vae</span><span class="o">.</span><span class="n">encoder</span><span class="o">.</span><span class="vm">__class__</span><span class="p">)</span>
-    <span class="n">vae</span><span class="o">.</span><span class="n">decoder</span><span class="o">.</span><span class="n">forward</span> <span class="o">=</span> <span class="n">my_vae_decoder_fwd</span><span class="o">.</span><span class="fm">__get__</span><span class="p">(</span><span class="n">vae</span><span class="o">.</span><span class="n">decoder</span><span class="p">,</span> <span class="n">vae</span><span class="o">.</span><span class="n">decoder</span><span class="o">.</span><span class="vm">__class__</span><span class="p">)</span>
-    <span class="n">vae</span><span class="o">.</span><span class="n">requires_grad_</span><span class="p">(</span><span class="kc">True</span><span class="p">)</span>
-    <span class="n">vae</span><span class="o">.</span><span class="n">train</span><span class="p">()</span>
-    <span class="c1"># add the skip connection convs</span>
-    <span class="n">vae</span><span class="o">.</span><span class="n">decoder</span><span class="o">.</span><span class="n">skip_conv_1</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">nn</span><span class="o">.</span><span class="n">Conv2d</span><span class="p">(</span><span class="mi">512</span><span class="p">,</span> <span class="mi">512</span><span class="p">,</span> <span class="n">kernel_size</span><span class="o">=</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">),</span> <span class="n">stride</span><span class="o">=</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">),</span> <span class="n">bias</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span><span class="o">.</span><span class="n">cuda</span><span class="p">()</span><span class="o">.</span><span class="n">requires_grad_</span><span class="p">(</span><span class="kc">True</span><span class="p">)</span>
-    <span class="n">vae</span><span class="o">.</span><span class="n">decoder</span><span class="o">.</span><span class="n">skip_conv_2</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">nn</span><span class="o">.</span><span class="n">Conv2d</span><span class="p">(</span><span class="mi">256</span><span class="p">,</span> <span class="mi">512</span><span class="p">,</span> <span class="n">kernel_size</span><span class="o">=</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">),</span> <span class="n">stride</span><span class="o">=</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">),</span> <span class="n">bias</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span><span class="o">.</span><span class="n">cuda</span><span class="p">()</span><span class="o">.</span><span class="n">requires_grad_</span><span class="p">(</span><span class="kc">True</span><span class="p">)</span>
-    <span class="n">vae</span><span class="o">.</span><span class="n">decoder</span><span class="o">.</span><span class="n">skip_conv_3</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">nn</span><span class="o">.</span><span class="n">Conv2d</span><span class="p">(</span><span class="mi">128</span><span class="p">,</span> <span class="mi">512</span><span class="p">,</span> <span class="n">kernel_size</span><span class="o">=</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">),</span> <span class="n">stride</span><span class="o">=</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">),</span> <span class="n">bias</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span><span class="o">.</span><span class="n">cuda</span><span class="p">()</span><span class="o">.</span><span class="n">requires_grad_</span><span class="p">(</span><span class="kc">True</span><span class="p">)</span>
-    <span class="n">vae</span><span class="o">.</span><span class="n">decoder</span><span class="o">.</span><span class="n">skip_conv_4</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">nn</span><span class="o">.</span><span class="n">Conv2d</span><span class="p">(</span><span class="mi">128</span><span class="p">,</span> <span class="mi">256</span><span class="p">,</span> <span class="n">kernel_size</span><span class="o">=</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">),</span> <span class="n">stride</span><span class="o">=</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">),</span> <span class="n">bias</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span><span class="o">.</span><span class="n">cuda</span><span class="p">()</span><span class="o">.</span><span class="n">requires_grad_</span><span class="p">(</span><span class="kc">True</span><span class="p">)</span>
-    <span class="n">torch</span><span class="o">.</span><span class="n">nn</span><span class="o">.</span><span class="n">init</span><span class="o">.</span><span class="n">constant_</span><span class="p">(</span><span class="n">vae</span><span class="o">.</span><span class="n">decoder</span><span class="o">.</span><span class="n">skip_conv_1</span><span class="o">.</span><span class="n">weight</span><span class="p">,</span> <span class="mf">1e-5</span><span class="p">)</span>
-    <span class="n">torch</span><span class="o">.</span><span class="n">nn</span><span class="o">.</span><span class="n">init</span><span class="o">.</span><span class="n">constant_</span><span class="p">(</span><span class="n">vae</span><span class="o">.</span><span class="n">decoder</span><span class="o">.</span><span class="n">skip_conv_2</span><span class="o">.</span><span class="n">weight</span><span class="p">,</span> <span class="mf">1e-5</span><span class="p">)</span>
-    <span class="n">torch</span><span class="o">.</span><span class="n">nn</span><span class="o">.</span><span class="n">init</span><span class="o">.</span><span class="n">constant_</span><span class="p">(</span><span class="n">vae</span><span class="o">.</span><span class="n">decoder</span><span class="o">.</span><span class="n">skip_conv_3</span><span class="o">.</span><span class="n">weight</span><span class="p">,</span> <span class="mf">1e-5</span><span class="p">)</span>
-    <span class="n">torch</span><span class="o">.</span><span class="n">nn</span><span class="o">.</span><span class="n">init</span><span class="o">.</span><span class="n">constant_</span><span class="p">(</span><span class="n">vae</span><span class="o">.</span><span class="n">decoder</span><span class="o">.</span><span class="n">skip_conv_4</span><span class="o">.</span><span class="n">weight</span><span class="p">,</span> <span class="mf">1e-5</span><span class="p">)</span>
-    <span class="n">vae</span><span class="o">.</span><span class="n">decoder</span><span class="o">.</span><span class="n">ignore_skip</span> <span class="o">=</span> <span class="kc">False</span>
-    <span class="n">vae</span><span class="o">.</span><span class="n">decoder</span><span class="o">.</span><span class="n">gamma</span> <span class="o">=</span> <span class="mi">1</span>
-    <span class="n">l_vae_target_modules</span> <span class="o">=</span> <span class="p">[</span><span class="s2">&quot;conv1&quot;</span><span class="p">,</span><span class="s2">&quot;conv2&quot;</span><span class="p">,</span><span class="s2">&quot;conv_in&quot;</span><span class="p">,</span> <span class="s2">&quot;conv_shortcut&quot;</span><span class="p">,</span>
-        <span class="s2">&quot;conv&quot;</span><span class="p">,</span> <span class="s2">&quot;conv_out&quot;</span><span class="p">,</span> <span class="s2">&quot;skip_conv_1&quot;</span><span class="p">,</span> <span class="s2">&quot;skip_conv_2&quot;</span><span class="p">,</span> <span class="s2">&quot;skip_conv_3&quot;</span><span class="p">,</span> 
-        <span class="s2">&quot;skip_conv_4&quot;</span><span class="p">,</span> <span class="s2">&quot;to_k&quot;</span><span class="p">,</span> <span class="s2">&quot;to_q&quot;</span><span class="p">,</span> <span class="s2">&quot;to_v&quot;</span><span class="p">,</span> <span class="s2">&quot;to_out.0&quot;</span><span class="p">,</span>
-    <span class="p">]</span>
-    <span class="n">vae_lora_config</span> <span class="o">=</span> <span class="n">LoraConfig</span><span class="p">(</span><span class="n">r</span><span class="o">=</span><span class="n">rank</span><span class="p">,</span> <span class="n">init_lora_weights</span><span class="o">=</span><span class="s2">&quot;gaussian&quot;</span><span class="p">,</span> <span class="n">target_modules</span><span class="o">=</span><span class="n">l_vae_target_modules</span><span class="p">)</span>
-    <span class="n">vae</span><span class="o">.</span><span class="n">add_adapter</span><span class="p">(</span><span class="n">vae_lora_config</span><span class="p">,</span> <span class="n">adapter_name</span><span class="o">=</span><span class="s2">&quot;vae_skip&quot;</span><span class="p">)</span>
-    <span class="k">if</span> <span class="n">return_lora_module_names</span><span class="p">:</span>
-        <span class="k">return</span> <span class="n">vae</span><span class="p">,</span> <span class="n">l_vae_target_modules</span>
-    <span class="k">else</span><span class="p">:</span>
-        <span class="k">return</span> <span class="n">vae</span>
-</pre></div>
-</div>
-</li>
-</ul>
-<p><strong>Datasets</strong></p>
-<p>Unpaired datasets 에서 자주 사용되는 Horse <span class="math notranslate nohighlight">\(\leftrightarrow\)</span> Zebra, Yosemite Summer <span class="math notranslate nohighlight">\(\leftrightarrow\)</span> Winter, 그리고 고차원 이미지의 주행 데이터셋 BDD100k 의 day <span class="math notranslate nohighlight">\(\leftrightarrow\)</span> night, clear <span class="math notranslate nohighlight">\(\leftrightarrow\)</span> foggy 데이터셋으로 실험하였습니다.</p>
-<p><strong>Evaluation Protocol</strong></p>
-<p>Image translation task 에서 다음과 같은 2가지 요소를 만족시켜야 한다고 합니다.</p>
-<p>(1) target domain 의 데이터셋 분포와의 일치: FID 로 측정</p>
-<p>(2) input image 의 구조적인 정보 유지: DINO-Struct-Dist 로 측정</p>
-<section id="comparison-to-unpaired-methods">
-<h3>4.1. Comparison to Unpaired Methods<a class="headerlink" href="#comparison-to-unpaired-methods" title="Permalink to this heading">#</a></h3>
-<p>기존에 GAN-based 그리고 Diffusion-based 모델들이 output realism 그리고 structure preservation 에 모두 좋은 성능을 내지는 못하는 것을 확인할 수 있습니다.</p>
-<figure class="align-default" id="id10">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/translation_turbo_08.png"><img alt="translation_turbo_08" class="bg-primary mb-1" src="../../_images/translation_turbo_08.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 548 </span><span class="caption-text">Comparison to baselines on 256 × 256 datasets.</span><a class="headerlink" href="#id10" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<figure class="align-default" id="id11">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/translation_turbo_09.png"><img alt="translation_turbo_09" class="bg-primary mb-1" src="../../_images/translation_turbo_09.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 549 </span><span class="caption-text">Comparison to baselines on driving datasets (512 × 512).</span><a class="headerlink" href="#id11" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>아래 예시 사진들과 table 에서 보이듯이, CycleGAN-Turbo 모델이 CycleGAN 과 CUT 모델보다 더 낮은 FID, DINO Structure score 를 보여주는 것을 확인할 수 있습니다.</p>
-<p>또한, diffusion 기반의 zero-shot image translation 모델: SDEdit, Plug-and Play, pix2pix-zero, CycleDiffusion, 그리고 DDIB 들이 realistic 한 이미지는 잘 생성하지만 원본 이미지의 structure 를 훼손시키는 경우가 있음을 확인할 수 있습니다. 이러한 현상은 multiple object 가 존재하는 주행 데이터셋에서 더 빈번하게 발생하고, 이는 Instructpix2pix 모델을 제외하고는 noise map 으로 inverting 하는 과정에서 원본 이미지에 대한 손실이 일어나기 때문이라고 주장합니다.</p>
-<ul>
-<li><p><strong>Comparison to GAN-based methods</strong></p>
-<figure class="align-default" id="id12">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/translation_turbo_10.png"><img alt="translation_turbo_10" class="bg-primary mb-1" src="../../_images/translation_turbo_10.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 550 </span><span class="caption-text">Comparison to GAN-based methods on 256 × 256 datasets.</span><a class="headerlink" href="#id12" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<figure class="align-default" id="id13">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/translation_turbo_11.png"><img alt="translation_turbo_11" class="bg-primary mb-1" src="../../_images/translation_turbo_11.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 551 </span><span class="caption-text">Comparison to GAN-based methods on driving datasets (512 × 512).</span><a class="headerlink" href="#id13" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</li>
-<li><p><strong>Comparison to Diffusion-based editing methods</strong></p>
-<figure class="align-default" id="id14">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/translation_turbo_12.png"><img alt="translation_turbo_12" class="bg-primary mb-1" src="../../_images/translation_turbo_12.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 552 </span><span class="caption-text">Comparison to Diffusion-based editing methods on driving datasets (512 × 512).</span><a class="headerlink" href="#id14" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<figure class="align-default" id="id15">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/translation_turbo_13.png"><img alt="translation_turbo_13" class="bg-primary mb-1" src="../../_images/translation_turbo_13.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 553 </span><span class="caption-text">Comparison to Diffusion-based editing methods on driving datasets (512 × 512).</span><a class="headerlink" href="#id15" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</li>
-</ul>
-<figure class="align-default" id="id16">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/translation_turbo_14.png"><img alt="translation_turbo_14" class="bg-primary mb-1" src="../../_images/translation_turbo_14.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 554 </span><span class="caption-text">Evaluation on standard CycleGAN datasets (256 × 256).</span><a class="headerlink" href="#id16" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<figure class="align-default" id="id17">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/translation_turbo_15.png"><img alt="translation_turbo_15" class="bg-primary mb-1" src="../../_images/translation_turbo_15.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 555 </span><span class="caption-text">Comparison on 512 × 512 driving datasets.</span><a class="headerlink" href="#id17" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</section>
-<section id="ablation-study">
-<h3>4.2. Ablation Study<a class="headerlink" href="#ablation-study" title="Permalink to this heading">#</a></h3>
-<p><strong>Using pre-trained weights</strong></p>
-<p>랜덤한 가중치로 초기화하는 것보다 pre-trained 된 모델을 사용할 때, 모델 성능이 더 좋은 것을 확인할 수 있습니다.</p>
-<figure class="align-default" id="id18">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/translation_turbo_16.png"><img alt="translation_turbo_16" class="bg-primary mb-1" src="../../_images/translation_turbo_16.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 556 </span><span class="caption-text">Ablation with Horse to Zebra</span><a class="headerlink" href="#id18" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p><strong>Different ways of adding conditioning inputs</strong></p>
-<p>ControlNet 이나 T2I-Adapter 를 사용하여 conditioning 할 때, 원본 이미지와의 structure 충돌이 일어나는 부분을 재차 확인할 수 있습니다.</p>
-<figure class="align-default" id="id19">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/translation_turbo_17.png"><img alt="translation_turbo_17" class="bg-primary mb-1" src="../../_images/translation_turbo_17.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 557 </span><span class="caption-text">Ablating individual components</span><a class="headerlink" href="#id19" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p><strong>Skip Connections and trainable encoder and decoder</strong></p>
-<p>Config D 와 비교하였을 때, FID 에 대한 성능이 미세하게 떨어지는 반면에 structure preservation 성능이 월등히 높음을 확인할 수 있습니다.</p>
-</section>
-<section id="id1">
-<h3>4.3. Extensions<a class="headerlink" href="#id1" title="Permalink to this heading">#</a></h3>
-<p><strong>Paired translation</strong></p>
-<p>Pix2pix-Turbo 와 LCM-ControlNet, SD-Turbo ControlNet, 그리고 SD ControlNet 모델과 정성적인 평가를 진행하였습니다. Classifier-free guidance, negative prompt 없이 단일 step 만으로도 좋은 성능을 보여줌을 확인할 수 있습니다.</p>
-<figure class="align-default" id="id20">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/translation_turbo_18.png"><img alt="translation_turbo_18" class="bg-primary mb-1" src="../../_images/translation_turbo_18.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 558 </span><span class="caption-text">Comparison on paired edge-to-image task (512 × 512).</span><a class="headerlink" href="#id20" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p><strong>Generating diverse outputs</strong></p>
-<figure class="align-default" id="id21">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/translation_turbo_19.png"><img alt="translation_turbo_19" class="bg-primary mb-1" src="../../_images/translation_turbo_19.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 559 </span><span class="caption-text">Generating diverse outputs</span><a class="headerlink" href="#id21" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</section>
-</section>
-<section id="discussions-and-limitations">
-<h2>5. Discussions and Limitations<a class="headerlink" href="#discussions-and-limitations" title="Permalink to this heading">#</a></h2>
-<p>해당 논문은 multi-step diffusion training 에 의존하지 않고, single-step 만으로 다양한 GAN 기반의 objective 와 융합해서 downstream task 에 적용할 수 있는 방식을 제안합니다. 하지만, 다음과 같은 3가지 한계점을 제시합니다: (1) SD-Turbo 는 classifier-free guidance 를 사용하지 않아, guidance control 에 대한 설정할 수 없습니다. (2) 해당 방식은 negative prompt 를 지원하지 않습니다. 그리고 마지막으로 (3) high capacity generator 기반의 cycle consistency loss 로 학습하는데 메모리에 대한 부담이 큽니다.</p>
-</section>
-</section>
-
-    <script type="text/x-thebe-config">
-    {
-        requestKernel: true,
-        binderOptions: {
-            repo: "binder-examples/jupyter-stacks-datascience",
-            ref: "master",
-        },
-        codeMirrorConfig: {
-            theme: "abcdef",
-            mode: "python"
-        },
-        kernelOptions: {
-            name: "python3",
-            path: "./docs/review"
-        },
-        predefinedOutput: true
-    }
-    </script>
-    <script>kernelName = 'python3'</script>
-
-                </article>
-              
-
-              
-              
-              
-              
-                <footer class="prev-next-footer d-print-none">
-                  
-<div class="prev-next-area">
-    <a class="left-prev"
-       href="DiT.html"
-       title="previous page">
-      <i class="fa-solid fa-angle-left"></i>
-      <div class="prev-next-info">
-        <p class="prev-next-subtitle">previous</p>
-        <p class="prev-next-title">DiT</p>
-      </div>
-    </a>
-    <a class="right-next"
-       href="LCM-LoRA.html"
-       title="next page">
-      <div class="prev-next-info">
-        <p class="prev-next-subtitle">next</p>
-        <p class="prev-next-title">LCM-LoRA: A Universal Stable-Diffusion Acceleration Module</p>
-      </div>
-      <i class="fa-solid fa-angle-right"></i>
-    </a>
-</div>
-                </footer>
-              
-            </div>
-            
-            
-              
-                <div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
-
-
-  <div class="sidebar-secondary-item">
-  <div class="page-toc tocsection onthispage">
-    <i class="fa-solid fa-list"></i> Contents
-  </div>
-  <nav class="bd-toc-nav page-toc">
-    <ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">1. Introduction</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#related-work">2. Related Work</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#method">3. Method</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#adding-conditional-input">3.1. Adding Conditional Input</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#preserving-input-details">3.2. Preserving Input Details</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#unpaired-training">3.3. Unpaired Training</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#extensions">3.4. Extensions</a></li>
-</ul>
-</li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#experiments">4. Experiments</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#comparison-to-unpaired-methods">4.1. Comparison to Unpaired Methods</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#ablation-study">4.2. Ablation Study</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#id1">4.3. Extensions</a></li>
-</ul>
-</li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#discussions-and-limitations">5. Discussions and Limitations</a></li>
-</ul>
-  </nav></div>
-
-</div></div>
-              
-            
-          </div>
-          <footer class="bd-footer-content">
-            
-<div class="bd-footer-content__inner container">
-  
-  <div class="footer-item">
-    
-<p class="component-author">
-By PseudoLab
-</p>
-
-  </div>
-  
-  <div class="footer-item">
-    
-
-  <p class="copyright">
-    
-      © Copyright 2022.
-      <br/>
-    
-  </p>
-
-  </div>
-  
-  <div class="footer-item">
-    
-  </div>
-  
-  <div class="footer-item">
-    
-  </div>
-  
-</div>
-          </footer>
-        
-
-      </main>
-    </div>
-  </div>
-  
-  <!-- Scripts loaded after <body> so the DOM is not blocked -->
-  <script src="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b"></script>
-<script src="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b"></script>
-
-  <footer class="bd-footer">
-  </footer>
-  </body>
+
+
+<!DOCTYPE html>
+
+
+<html lang="en" data-content_root="" >
+
+  <head>
+    <meta charset="utf-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.17.1: http://docutils.sourceforge.net/" />
+
+    <title>One-Step Image Translation with Text-to-Image Models &#8212; Text-to-Image Generation-feat-Diffusion</title>
+  
+  
+  
+  <script data-cfasync="false">
+    document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
+    document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
+  </script>
+  
+  <!-- Loaded before other Sphinx assets -->
+  <link href="../../_static/styles/theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+<link href="../../_static/styles/bootstrap.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+<link href="../../_static/styles/pydata-sphinx-theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+
+  
+  <link href="../../_static/vendor/fontawesome/6.5.2/css/all.min.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+  <link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.woff2" />
+<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.woff2" />
+<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.woff2" />
+
+    <link rel="stylesheet" type="text/css" href="../../_static/pygments.css" />
+    <link rel="stylesheet" href="../../_static/styles/sphinx-book-theme.css?digest=14f4ca6b54d191a8c7657f6c759bf11a5fb86285" type="text/css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/togglebutton.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/mystnb.4510f1fc1dee50b3e5859aac5469c37c29e427902b24a333a5f9fcb2f0b3ac41.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/sphinx-thebe.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/design-style.4045f2051d55cab465a707391d5b2007.min.css" />
+  
+  <!-- Pre-loaded scripts that we'll load fully later -->
+  <link rel="preload" as="script" href="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b" />
+<link rel="preload" as="script" href="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b" />
+  <script src="../../_static/vendor/fontawesome/6.5.2/js/all.min.js?digest=dfe6caa3a7d634c4db9b"></script>
+
+    <script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
+    <script src="../../_static/jquery.js"></script>
+    <script src="../../_static/underscore.js"></script>
+    <script src="../../_static/_sphinx_javascript_frameworks_compat.js"></script>
+    <script src="../../_static/doctools.js"></script>
+    <script src="../../_static/clipboard.min.js"></script>
+    <script src="../../_static/copybutton.js"></script>
+    <script src="../../_static/scripts/sphinx-book-theme.js?digest=5a5c038af52cf7bc1a1ec88eea08e6366ee68824"></script>
+    <script>let toggleHintShow = 'Click to show';</script>
+    <script>let toggleHintHide = 'Click to hide';</script>
+    <script>let toggleOpenOnPrint = 'true';</script>
+    <script src="../../_static/togglebutton.js"></script>
+    <script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
+    <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
+    <script src="../../_static/design-tabs.js"></script>
+    <script>const THEBE_JS_URL = "https://unpkg.com/thebe@0.8.2/lib/index.js"
+const thebe_selector = ".thebe,.cell"
+const thebe_selector_input = "pre"
+const thebe_selector_output = ".output, .cell_output"
+</script>
+    <script async="async" src="../../_static/sphinx-thebe.js"></script>
+    <script>window.MathJax = {"options": {"processHtmlClass": "tex2jax_process|mathjax_process|math|output_area"}}</script>
+    <script defer="defer" src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
+    <script>DOCUMENTATION_OPTIONS.pagename = 'docs/review/one-step-image-translation';</script>
+    <link rel="shortcut icon" href="../../_static/PseudoLab_logo.png"/>
+    <link rel="index" title="Index" href="../../genindex.html" />
+    <link rel="search" title="Search" href="../../search.html" />
+    <link rel="next" title="LCM-LoRA: A Universal Stable-Diffusion Acceleration Module" href="LCM-LoRA.html" />
+    <link rel="prev" title="DiT" href="DiT.html" />
+  <meta name="viewport" content="width=device-width, initial-scale=1"/>
+  <meta name="docsearch:language" content="en"/>
+  </head>
+  
+  
+  <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
+
+  
+  
+  <div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
+  
+  <div id="pst-scroll-pixel-helper"></div>
+  
+  <button type="button" class="btn rounded-pill" id="pst-back-to-top">
+    <i class="fa-solid fa-arrow-up"></i>Back to top</button>
+
+  
+  <input type="checkbox"
+          class="sidebar-toggle"
+          id="pst-primary-sidebar-checkbox"/>
+  <label class="overlay overlay-primary" for="pst-primary-sidebar-checkbox"></label>
+  
+  <input type="checkbox"
+          class="sidebar-toggle"
+          id="pst-secondary-sidebar-checkbox"/>
+  <label class="overlay overlay-secondary" for="pst-secondary-sidebar-checkbox"></label>
+  
+  <div class="search-button__wrapper">
+    <div class="search-button__overlay"></div>
+    <div class="search-button__search-container">
+<form class="bd-search d-flex align-items-center"
+      action="../../search.html"
+      method="get">
+  <i class="fa-solid fa-magnifying-glass"></i>
+  <input type="search"
+         class="form-control"
+         name="q"
+         id="search-input"
+         placeholder="Search this book..."
+         aria-label="Search this book..."
+         autocomplete="off"
+         autocorrect="off"
+         autocapitalize="off"
+         spellcheck="false"/>
+  <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
+</form></div>
+  </div>
+
+  <div class="pst-async-banner-revealer d-none">
+  <aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
+</div>
+
+  
+    <header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
+    </header>
+  
+
+  <div class="bd-container">
+    <div class="bd-container__inner bd-page-width">
+      
+      
+      
+      <div class="bd-sidebar-primary bd-sidebar">
+        
+
+  
+  <div class="sidebar-header-items sidebar-primary__section">
+    
+    
+    
+    
+  </div>
+  
+    <div class="sidebar-primary-items__start sidebar-primary__section">
+        <div class="sidebar-primary-item">
+
+  
+    
+  
+
+<a class="navbar-brand logo" href="../../intro.html">
+  
+  
+  
+  
+  
+    
+    
+      
+    
+    
+    <img src="../../_static/PseudoLab_logo.png" class="logo__image only-light" alt="Text-to-Image Generation-feat-Diffusion - Home"/>
+    <script>document.write(`<img src="../../_static/PseudoLab_logo.png" class="logo__image only-dark" alt="Text-to-Image Generation-feat-Diffusion - Home"/>`);</script>
+  
+  
+</a></div>
+        <div class="sidebar-primary-item"><nav class="bd-links" id="bd-docs-nav" aria-label="Main">
+    <div class="bd-toc-item navbar-nav active">
+        
+        <ul class="nav bd-sidenav bd-sidenav__home-link">
+            <li class="toctree-l1">
+                <a class="reference internal" href="../../intro.html">
+                    Welcome to PseudoDiffusers!!
+                </a>
+            </li>
+        </ul>
+        <p aria-level="2" class="caption" role="heading"><span class="caption-text">Preliminary Works</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="vae.html">VAE</a></li>
+<li class="toctree-l1"><a class="reference internal" href="gan.html">GAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DDPM.html">DDPM</a></li>
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DDIM.html">DDIM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="A_Study_on_the_Evaluation_of_Generative_Models.html">A Study on the Evaluation of Generative Models</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Image Generation</span></p>
+<ul class="current nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="cycleGAN.html">CycleGAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="StyleGAN.html">StyleGAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="diffusion_beats_GANs.html">Diffusion Models Beat GANs on Image Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="dalle.html">DALL-E</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DALLE2.html">DALL-E 2</a></li>
+<li class="toctree-l1"><a class="reference internal" href="dreambooth.html">DreamBooth</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ControlNet.html">ControlNet</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Latent_Diffusion_Model.html">Introduction</a></li>
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Textual_Inversion.html">Textual Inversion</a></li>
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="CustomDiffusion.html">Custom Diffusion</a></li>
+<li class="toctree-l1"><a class="reference internal" href="LoRA.html">LoRA</a></li>
+
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="I-DDPM.html">I-DDPM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="StyO.html">StyO</a></li>
+<li class="toctree-l1"><a class="reference internal" href="imagen.html">Imagen</a></li>
+<li class="toctree-l1"><a class="reference internal" href="imagen_editor.html">Imagen Editor</a></li>
+<li class="toctree-l1"><a class="reference internal" href="SDEdit.html">SDEdit</a></li>
+<li class="toctree-l1"><a class="reference internal" href="SDXL.html">SDXL</a></li>
+<li class="toctree-l1"><a class="reference internal" href="t2i_adapter.html">T2I-Adapter</a></li>
+<li class="toctree-l1"><a class="reference internal" href="IP_Adapter.html">IP-Adapter</a></li>
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="HyperDreamBooth.html">HyperDreamBooth</a></li>
+<li class="toctree-l1"><a class="reference internal" href="CM3leon.html">CM3leon</a></li>
+
+<li class="toctree-l1"><a class="reference internal" href="Synthetic_Data_from_Diffusion_Models_Improves_ImageNet_Classification.html">Synthetic Data from Diffusion Models Improves ImageNet Classification</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="GLIDE.html">GLIDE</a></li>
+<li class="toctree-l1"><a class="reference internal" href="BBDM.html">BBDM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Your_Diffusion_Model_is_Secretly_a_Zero_Shot_Classifier.html">Your Diffusion Model is Secretly a Zero-Shot Classifier</a></li>
+<li class="toctree-l1"><a class="reference internal" href="progressive_distillation.html">Progressive Distillation for Fast Sampling of Diffusion Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ConceptLab.html">ConceptLab</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Diffusion_models_already_have_a_Semantic_Latent_Space.html">Diffusion Models already have a Semantic Latent Space</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Muse.html">Muse</a></li>
+
+
+<li class="toctree-l1"><a class="reference internal" href="GIGAGAN.html">Scaling up GANs for Text-to-Image Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="consistency_models.html">Consistency Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="latent_consistency_models.html">Latent Consistency Models</a></li>
+
+<li class="toctree-l1"><a class="reference internal" href="LLM_grounded_Diffusion.html">LLM Grounded Diffusion</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DiT.html">DiT</a></li>
+
+
+
+
+
+
+<li class="toctree-l1 current active"><a class="current reference internal" href="#">One-Step Image Translation with Text-to-Image Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="LCM-LoRA.html">LCM-LoRA: A Universal Stable-Diffusion Acceleration Module</a></li>
+
+
+<li class="toctree-l1"><a class="reference internal" href="MimicBrush.html">MimicBrush: Zero-shot Image Editing with Reference Imitation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="one_step_diffusion_with_distribution_matching_distillation.html">One-step Diffusion with Distribution Matching Distillation</a></li>
+
+
+
+
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Video Generation</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="Make_A_Video.html">Make A Video</a></li>
+<li class="toctree-l1"><a class="reference internal" href="VideoLDM.html">VideoLDM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="AnimateDiff.html">AnimateDiff</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Animate_Anyone.html">Animate Anyone</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreaMoving.html">DreaMoving</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreamPose.html">DreamPose: Fashion Image-to-Video Synthesis via Stable Diffusion</a></li>
+
+
+
+
+
+
+
+
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">3D Generation</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="NeRF.html">NeRF : Representing Scenes as Neural Radiance Fields for View Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="3DGS.html">3D Gaussian Splatting for Real-Time Radiance Field Rendering</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Point_E.html">Point-E: A System for Generating 3D Point Clouds from Complex Prompts (Arxiv 2022)</a></li>
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Shap-E.html">Shap-E</a></li>
+
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DreamFusion.html"><strong>DreamFusion</strong></a></li>
+<li class="toctree-l1"><a class="reference internal" href="magic-3d.html">Magic3D</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreamBooth3D.html">Dream Booth 3D</a></li>
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="zero123plus.html">Zero123++</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ProlificDreamer.html">ProlificDreamer: High-Fidelity and Diverse Text-to-3D Generation with Variational Score Distillation</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DreamGaussian.html">DreamGaussian</a></li>
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Coin3D.html">Coin3D</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Experiments</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="../experiments/js_exp.html">Synthetic Data with Stable Diffusion for Foliar Disease Classification</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../experiments/swjo_exp.html">Training DreamBooth on Naver Webtoon Face Dataset</a></li>
+</ul>
+
+    </div>
+</nav></div>
+    </div>
+  
+  
+  <div class="sidebar-primary-items__end sidebar-primary__section">
+  </div>
+  
+  <div id="rtd-footer-container"></div>
+
+
+      </div>
+      
+      <main id="main-content" class="bd-main" role="main">
+        
+        
+
+<div class="sbt-scroll-pixel-helper"></div>
+
+          <div class="bd-content">
+            <div class="bd-article-container">
+              
+              <div class="bd-header-article d-print-none">
+<div class="header-article-items header-article__inner">
+  
+    <div class="header-article-items__start">
+      
+        <div class="header-article-item"><label class="sidebar-toggle primary-toggle btn btn-sm" for="__primary" title="Toggle primary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
+  <span class="fa-solid fa-bars"></span>
+</label></div>
+      
+    </div>
+  
+  
+    <div class="header-article-items__end">
+      
+        <div class="header-article-item">
+
+<div class="article-header-buttons">
+
+
+
+
+
+<div class="dropdown dropdown-source-buttons">
+  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Source repositories">
+    <i class="fab fa-github"></i>
+  </button>
+  <ul class="dropdown-menu">
+      
+      
+      
+      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion" target="_blank"
+   class="btn btn-sm btn-source-repository-button dropdown-item"
+   title="Source repository"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fab fa-github"></i>
+  </span>
+<span class="btn__text-container">Repository</span>
+</a>
+</li>
+      
+      
+      
+      
+      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion/issues/new?title=Issue%20on%20page%20%2Fdocs/review/one-step-image-translation.html&body=Your%20issue%20content%20here." target="_blank"
+   class="btn btn-sm btn-source-issues-button dropdown-item"
+   title="Open an issue"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-lightbulb"></i>
+  </span>
+<span class="btn__text-container">Open issue</span>
+</a>
+</li>
+      
+  </ul>
+</div>
+
+
+
+
+
+
+<div class="dropdown dropdown-download-buttons">
+  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Download this page">
+    <i class="fas fa-download"></i>
+  </button>
+  <ul class="dropdown-menu">
+      
+      
+      
+      <li><a href="../../_sources/docs/review/one-step-image-translation.md" target="_blank"
+   class="btn btn-sm btn-download-source-button dropdown-item"
+   title="Download source file"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-file"></i>
+  </span>
+<span class="btn__text-container">.md</span>
+</a>
+</li>
+      
+      
+      
+      
+      <li>
+<button onclick="window.print()"
+  class="btn btn-sm btn-download-pdf-button dropdown-item"
+  title="Print to PDF"
+  data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-file-pdf"></i>
+  </span>
+<span class="btn__text-container">.pdf</span>
+</button>
+</li>
+      
+  </ul>
+</div>
+
+
+
+
+<button onclick="toggleFullScreen()"
+  class="btn btn-sm btn-fullscreen-button"
+  title="Fullscreen mode"
+  data-bs-placement="bottom" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-expand"></i>
+  </span>
+
+</button>
+
+
+
+<script>
+document.write(`
+  <button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light"></i>
+    <i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark"></i>
+    <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"></i>
+  </button>
+`);
+</script>
+
+
+<script>
+document.write(`
+  <button class="btn btn-sm pst-navbar-icon search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <i class="fa-solid fa-magnifying-glass fa-lg"></i>
+  </button>
+`);
+</script>
+<label class="sidebar-toggle secondary-toggle btn btn-sm" for="__secondary"title="Toggle secondary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <span class="fa-solid fa-list"></span>
+</label>
+</div></div>
+      
+    </div>
+  
+</div>
+</div>
+              
+              
+
+<div id="jb-print-docs-body" class="onlyprint">
+    <h1>One-Step Image Translation with Text-to-Image Models</h1>
+    <!-- Table of contents -->
+    <div id="print-main-content">
+        <div id="jb-print-toc">
+            
+            <div>
+                <h2> Contents </h2>
+            </div>
+            <nav aria-label="Page">
+                <ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">1. Introduction</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#related-work">2. Related Work</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#method">3. Method</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#adding-conditional-input">3.1. Adding Conditional Input</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#preserving-input-details">3.2. Preserving Input Details</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#unpaired-training">3.3. Unpaired Training</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#extensions">3.4. Extensions</a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#experiments">4. Experiments</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#comparison-to-unpaired-methods">4.1. Comparison to Unpaired Methods</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#ablation-study">4.2. Ablation Study</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#id1">4.3. Extensions</a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#discussions-and-limitations">5. Discussions and Limitations</a></li>
+</ul>
+            </nav>
+        </div>
+    </div>
+</div>
+
+              
+                
+<div id="searchbox"></div>
+                <article class="bd-article">
+                  
+  <div class="admonition-information admonition">
+<p class="admonition-title">Information</p>
+<ul class="simple">
+<li><p><strong>Title:</strong> One-Step Image Translation with Text-to-Image Models</p></li>
+<li><p><strong>Reference</strong></p>
+<ul>
+<li><p>Paper: <a class="reference external" href="https://arxiv.org/pdf/2403.12036">https://arxiv.org/pdf/2403.12036</a></p></li>
+<li><p>Code: <a class="github reference external" href="https://github.com/GaParmar/img2img-turbo">GaParmar/img2img-turbo</a></p></li>
+</ul>
+</li>
+<li><p><strong>Author:</strong> Sangwoo Jo</p></li>
+<li><p><strong>Last updated on Sep. 24, 2024</strong></p></li>
+</ul>
+</div>
+<section class="tex2jax_ignore mathjax_ignore" id="one-step-image-translation-with-text-to-image-models">
+<h1>One-Step Image Translation with Text-to-Image Models<a class="headerlink" href="#one-step-image-translation-with-text-to-image-models" title="Permalink to this heading">#</a></h1>
+<section id="introduction">
+<h2>1. Introduction<a class="headerlink" href="#introduction" title="Permalink to this heading">#</a></h2>
+<p>논문에서 기존의 conditional diffusion model 에 대해 다음과 같이 1) slow inference time, 2) paired data 에 대한 의존성 두 가지 한계점을 명시합니다. 이를 보완하기 위해, paired setting 과 unpaired setting 에서 모두 적용 가능한 학습 아키텍쳐를 제시합니다.</p>
+<p>기존에 adapter 를 추가하는 방식은 one-step diffusion model 에 적합하지 않다고 설명하고, 또한 SD-Turbo 모델의 Encoder-UNet-Decoder 형태의 multi-stage pipeline 에서 이미지의 많은 시각적 디테일이 손실된다고 주장합니다. 그리고 이러한 정보 손실은 입력 이미지가 실제 이미지일 때 특히 더 치명적이라고 합니다.</p>
+<p>이를 보완하기 위해, 논문에서는 첫번째로 input image 를 직접 noise encoder 에 입력함으로써 noise map 과 input control 의 충돌을 방지합니다. 두번째로, Encoder-Unet-Decoder 를 하나의 end-to-end 로 학습 가능한 아키텍처를 정의하고, 이를 위해 여러 LoRA adapter 를 정의합니다. 마지막으로, high-frequency detail 을 담아내기 위해 encoder 와 decoder 간의 skip connection 을 추가합니다.</p>
+<figure class="align-default" id="id2">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/translation_turbo_01.png"><img alt="translation_turbo_01" class="bg-primary mb-1" src="../../_images/translation_turbo_01.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 540 </span><span class="caption-text">Overall Architecture</span><a class="headerlink" href="#id2" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul>
+<li><p><strong>CycleGAN-Turbo 코드</strong></p>
+<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="k">class</span><span class="w"> </span><span class="nc">CycleGAN_Turbo</span><span class="p">(</span><span class="n">torch</span><span class="o">.</span><span class="n">nn</span><span class="o">.</span><span class="n">Module</span><span class="p">):</span>
+    <span class="k">def</span><span class="w"> </span><span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">pretrained_name</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">pretrained_path</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">ckpt_folder</span><span class="o">=</span><span class="s2">&quot;checkpoints&quot;</span><span class="p">,</span> <span class="n">lora_rank_unet</span><span class="o">=</span><span class="mi">8</span><span class="p">,</span> <span class="n">lora_rank_vae</span><span class="o">=</span><span class="mi">4</span><span class="p">):</span>
+        <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span>
+        <span class="bp">self</span><span class="o">.</span><span class="n">tokenizer</span> <span class="o">=</span> <span class="n">AutoTokenizer</span><span class="o">.</span><span class="n">from_pretrained</span><span class="p">(</span><span class="s2">&quot;stabilityai/sd-turbo&quot;</span><span class="p">,</span> <span class="n">subfolder</span><span class="o">=</span><span class="s2">&quot;tokenizer&quot;</span><span class="p">)</span>
+        <span class="bp">self</span><span class="o">.</span><span class="n">text_encoder</span> <span class="o">=</span> <span class="n">CLIPTextModel</span><span class="o">.</span><span class="n">from_pretrained</span><span class="p">(</span><span class="s2">&quot;stabilityai/sd-turbo&quot;</span><span class="p">,</span> <span class="n">subfolder</span><span class="o">=</span><span class="s2">&quot;text_encoder&quot;</span><span class="p">)</span><span class="o">.</span><span class="n">cuda</span><span class="p">()</span>
+        <span class="bp">self</span><span class="o">.</span><span class="n">sched</span> <span class="o">=</span> <span class="n">make_1step_sched</span><span class="p">()</span>
+        <span class="n">vae</span> <span class="o">=</span> <span class="n">AutoencoderKL</span><span class="o">.</span><span class="n">from_pretrained</span><span class="p">(</span><span class="s2">&quot;stabilityai/sd-turbo&quot;</span><span class="p">,</span> <span class="n">subfolder</span><span class="o">=</span><span class="s2">&quot;vae&quot;</span><span class="p">)</span>
+        <span class="n">unet</span> <span class="o">=</span> <span class="n">UNet2DConditionModel</span><span class="o">.</span><span class="n">from_pretrained</span><span class="p">(</span><span class="s2">&quot;stabilityai/sd-turbo&quot;</span><span class="p">,</span> <span class="n">subfolder</span><span class="o">=</span><span class="s2">&quot;unet&quot;</span><span class="p">)</span>
+        <span class="n">vae</span><span class="o">.</span><span class="n">encoder</span><span class="o">.</span><span class="n">forward</span> <span class="o">=</span> <span class="n">my_vae_encoder_fwd</span><span class="o">.</span><span class="fm">__get__</span><span class="p">(</span><span class="n">vae</span><span class="o">.</span><span class="n">encoder</span><span class="p">,</span> <span class="n">vae</span><span class="o">.</span><span class="n">encoder</span><span class="o">.</span><span class="vm">__class__</span><span class="p">)</span>
+        <span class="n">vae</span><span class="o">.</span><span class="n">decoder</span><span class="o">.</span><span class="n">forward</span> <span class="o">=</span> <span class="n">my_vae_decoder_fwd</span><span class="o">.</span><span class="fm">__get__</span><span class="p">(</span><span class="n">vae</span><span class="o">.</span><span class="n">decoder</span><span class="p">,</span> <span class="n">vae</span><span class="o">.</span><span class="n">decoder</span><span class="o">.</span><span class="vm">__class__</span><span class="p">)</span>
+        <span class="c1"># add the skip connection convs</span>
+        <span class="n">vae</span><span class="o">.</span><span class="n">decoder</span><span class="o">.</span><span class="n">skip_conv_1</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">nn</span><span class="o">.</span><span class="n">Conv2d</span><span class="p">(</span><span class="mi">512</span><span class="p">,</span> <span class="mi">512</span><span class="p">,</span> <span class="n">kernel_size</span><span class="o">=</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">),</span> <span class="n">stride</span><span class="o">=</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">),</span> <span class="n">bias</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span><span class="o">.</span><span class="n">cuda</span><span class="p">()</span>
+        <span class="n">vae</span><span class="o">.</span><span class="n">decoder</span><span class="o">.</span><span class="n">skip_conv_2</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">nn</span><span class="o">.</span><span class="n">Conv2d</span><span class="p">(</span><span class="mi">256</span><span class="p">,</span> <span class="mi">512</span><span class="p">,</span> <span class="n">kernel_size</span><span class="o">=</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">),</span> <span class="n">stride</span><span class="o">=</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">),</span> <span class="n">bias</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span><span class="o">.</span><span class="n">cuda</span><span class="p">()</span>
+        <span class="n">vae</span><span class="o">.</span><span class="n">decoder</span><span class="o">.</span><span class="n">skip_conv_3</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">nn</span><span class="o">.</span><span class="n">Conv2d</span><span class="p">(</span><span class="mi">128</span><span class="p">,</span> <span class="mi">512</span><span class="p">,</span> <span class="n">kernel_size</span><span class="o">=</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">),</span> <span class="n">stride</span><span class="o">=</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">),</span> <span class="n">bias</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span><span class="o">.</span><span class="n">cuda</span><span class="p">()</span>
+        <span class="n">vae</span><span class="o">.</span><span class="n">decoder</span><span class="o">.</span><span class="n">skip_conv_4</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">nn</span><span class="o">.</span><span class="n">Conv2d</span><span class="p">(</span><span class="mi">128</span><span class="p">,</span> <span class="mi">256</span><span class="p">,</span> <span class="n">kernel_size</span><span class="o">=</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">),</span> <span class="n">stride</span><span class="o">=</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">),</span> <span class="n">bias</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span><span class="o">.</span><span class="n">cuda</span><span class="p">()</span>
+        <span class="n">vae</span><span class="o">.</span><span class="n">decoder</span><span class="o">.</span><span class="n">ignore_skip</span> <span class="o">=</span> <span class="kc">False</span>
+        <span class="bp">self</span><span class="o">.</span><span class="n">unet</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">vae</span> <span class="o">=</span> <span class="n">unet</span><span class="p">,</span> <span class="n">vae</span>
+</pre></div>
+</div>
+</li>
+</ul>
+<p>또한, 해당 논문에서 제시하는 아케텍쳐는 CycleGAN, pix2pix 등의 GAN 기반의 모델에 plug-and-play 형태로도 적용 가능하며, CycleGAN-Turbo, pix2pix-Turbo 모델이 기존의 GAN 기반 그리고 diffusion model 기반 모델의 image translation 성능보다 우수하다고 합니다.</p>
+</section>
+<section id="related-work">
+<h2>2. Related Work<a class="headerlink" href="#related-work" title="Permalink to this heading">#</a></h2>
+<p>기존에 paired dataset 에서의 image-to-image translation 모델들은 대표적으로 reconstruction loss 와 adversarial loss 를 기반으로 학습되었습니다. 최근에는 GLIGEN, T2I-Adapter, ControlNet 와 같이 adapter 를 통한 input conditioning 하는 방식도 소개되었습니다. 그리고 unpaired dataset 에서 diffusion 모델을 활용하여 image translation 하는 사례들도 있지만, 이들은 새로 보이는 domain 마다 scratch 로부터 모델을 학습해야 하는 치명적인 단점이 있습니다.</p>
+<p>SDEdit 와 Prompt-to-Prompt 모델과 같이 zero-shot 으로 이미지를 editing 하는 연구 사례들도 있지만, 이들은 multi-object 와 complex scene setting 에서 약점을 보인다고 합니다.</p>
+<p>이와 동시에 diffusion model 의 inference time 을 단축하기 위해서 ODE solver 혹은 knowledge distillation 을 활용한 연구 사례들도 있었습니다.</p>
+</section>
+<section id="method">
+<h2>3. Method<a class="headerlink" href="#method" title="Permalink to this heading">#</a></h2>
+<section id="adding-conditional-input">
+<h3>3.1. Adding Conditional Input<a class="headerlink" href="#adding-conditional-input" title="Permalink to this heading">#</a></h3>
+<p>SD-Turbo 와 같이 pretrained 된 one-step text-to-image 모델을 기반으로 input image 에 대한 conditioning 을 하는 방법을 다음과 같이 처음에 제안합니다. 아래 사진과 같이, 사전학습된 stable diffusion encoder 의 가중치 혹은 랜덤한 가중치를 가진 adapter 를 새로 정의하여 input image 에 대한 feature map 을 추출합니다.</p>
+<figure class="align-default" id="id3">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/translation_turbo_02.png"><img alt="translation_turbo_02" class="bg-primary mb-1" src="../../_images/translation_turbo_02.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 541 </span><span class="caption-text">Adding Conditional Input</span><a class="headerlink" href="#id3" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>하지만 multi-step diffusion model 과 다르게 single-step 만으로도 noise map 이 생성되는 이미지의 전체적인 layout 을 결정하기 때문에, condition encoder 를 거쳐서 나온 feature map 와의 충돌이 생겨 학습에 어려움이 생기는 현상을 보여준다고 합니다.</p>
+<p>따라서, adapter 를 추가적으로 정의하는 방식이 아닌 conditioning input 을 network 에 직접 적용하는 방식을 논문에서 제안합니다.</p>
+</section>
+<section id="preserving-input-details">
+<h3>3.2. Preserving Input Details<a class="headerlink" href="#preserving-input-details" title="Permalink to this heading">#</a></h3>
+<p>Latent Diffusion Model (LDM) 이 image 를 encoding 하는 과정에서 차원을 8 배 축소하기 때문에 정보에 대한 손실이 크고, 따라서 이러한 방식은 fine detail 에 민감한 image translation task 에 적합하지 않을 수 있다고 주장합니다. 아래 사진을 보시면, 기존 아키텍쳐에서 skip connection 을 추가하기 전후로 원본 이미지에 대한 detail preservation 정도 차이를 확인할 수 있습니다.</p>
+<figure class="align-default" id="id4">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/translation_turbo_03.png"><img alt="translation_turbo_03" class="bg-primary mb-1" src="../../_images/translation_turbo_03.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 542 </span><span class="caption-text">Preserving Input Details</span><a class="headerlink" href="#id4" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>더 자세하게는, skip connection 을 encoder 내의 각 downsampling layer 를 거쳐 4개의 activation 을 추출하고 이를 1 x 1 zero-convolution layer 를 통과시켜 decoder 에 대응되는 upsampling block 에 입력시킵니다.</p>
+</section>
+<section id="unpaired-training">
+<h3>3.3. Unpaired Training<a class="headerlink" href="#unpaired-training" title="Permalink to this heading">#</a></h3>
+<p>논문에서는 SD-Turbo (v2.1) 를 base network 로 사용하고, 변형된 CycleGAN objective 를 적용하여 unpaired translation 을 진행하였습니다. 이때, cycle-consistency loss 와 adversarial loss 를 다음과 같이 정의합니다.</p>
+<figure class="align-default" id="id5">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/translation_turbo_04.png"><img alt="translation_turbo_04" class="bg-primary mb-1" src="../../_images/translation_turbo_04.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 543 </span><span class="caption-text">Cycle-Consistency Loss</span><a class="headerlink" href="#id5" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<figure class="align-default" id="id6">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/translation_turbo_05.png"><img alt="translation_turbo_05" class="bg-primary mb-1" src="../../_images/translation_turbo_05.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 544 </span><span class="caption-text">Adversarial Loss</span><a class="headerlink" href="#id6" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>여기서 <span class="math notranslate nohighlight">\(X \subset \mathbb{R}^{H \times W \times 3}\)</span>, <span class="math notranslate nohighlight">\(Y \subset \mathbb{R}^{H \times W \times 3}\)</span> 는 각각 source domain, target domain, 그리고 <span class="math notranslate nohighlight">\(G(x, c_Y): X → Y\)</span>, <span class="math notranslate nohighlight">\(G(y, c_X):
+Y → X\)</span> 는 translation 함수입니다. 두 translation 모두 동일한 generator <span class="math notranslate nohighlight">\(G\)</span> 를 사용하며, caption <span class="math notranslate nohighlight">\(c_X,c_Y\)</span> 만 task 에 따라 변형하게 됩니다. (i.e., day → night translation task 에서 <span class="math notranslate nohighlight">\(c_X\)</span> 는 “Driving in the day” 그리고 <span class="math notranslate nohighlight">\(c_Y\)</span> 는 “Driving in the night”)</p>
+<p>대부분의 layer 는 고정시킨 상태에서 U-Net 의 첫번째 convolutional layer 와 LoRA adapter 를 학습시켰다고 합니다.</p>
+<figure class="align-default" id="id7">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/translation_turbo_01.png"><img alt="translation_turbo_01" class="bg-primary mb-1" src="../../_images/translation_turbo_01.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 545 </span><span class="caption-text">Overall Architecture</span><a class="headerlink" href="#id7" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>또한, 새롭게 소개되는 <span class="math notranslate nohighlight">\(L_{rec}\)</span> 손실함수는 <span class="math notranslate nohighlight">\(L_1\)</span> 과 <span class="math notranslate nohighlight">\(LPIPS\)</span> 의 조합으로 구성되어있습니다. 그리고 adversarial loss 에서 사용되는 discriminator <span class="math notranslate nohighlight">\(D_X(x),D_Y(y)\)</span> 는 CLIP 모델을 backbone 으로 사용하였습니다.</p>
+<p>마지막으로, identity regularization loss 도 <span class="math notranslate nohighlight">\(L_{idt} = E_y [L_{rec}(G(y, c_Y ), y)] + E_x [L_{rec}(G(x, c_X), x)]\)</span> 와 같이 정의하고, 최종 objective 를 이들의 가중치 합으로 정의합니다.</p>
+<figure class="align-default" id="id8">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/translation_turbo_06.png"><img alt="translation_turbo_06" class="bg-primary mb-1" src="../../_images/translation_turbo_06.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 546 </span><span class="caption-text">Full objective</span><a class="headerlink" href="#id8" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</section>
+<section id="extensions">
+<h3>3.4. Extensions<a class="headerlink" href="#extensions" title="Permalink to this heading">#</a></h3>
+<p>해당 논문에서 unpaired training setting 뿐만 아니라 paired training 그리고 stochastic output generation 에 대해서도 실험을 진행하였습니다.</p>
+<p><strong>Paired Training</strong></p>
+<p>Paired setting 에서는 single translation function <span class="math notranslate nohighlight">\(G(x,c): X \rightarrow Y\)</span> 를 학습하고, objective 는 (1)  perceptual loss 와 pixel-space reconstruction loss 로 구성된 reconstruction loss (2) unpaired setting 에서 target domain 에 대해서만 정의된 GAN loss, 그리고 (3) CLIP text-image alignment loss <span class="math notranslate nohighlight">\(L_{CLIP}\)</span> 의 가중치 합으로 정의합니다.</p>
+<p><strong>Generating Diverse Outputs</strong></p>
+<p>One-step model 로 diverse 한 output 을 생성하는 것은 어려운 일입니다. 논문에서는 해당 task 를 수행하기 위해 input image <span class="math notranslate nohighlight">\(x\)</span>, noise map <span class="math notranslate nohighlight">\(z\)</span>, 그리고 interpolation coefficient <span class="math notranslate nohighlight">\(\gamma\)</span> 를 입력받는 <span class="math notranslate nohighlight">\(G(x,z,\gamma)\)</span> 를 정의합니다.</p>
+<p><span class="math notranslate nohighlight">\(G(x,z,\gamma)\)</span> 는 우선 noise <span class="math notranslate nohighlight">\(z\)</span> 와 encoder output <span class="math notranslate nohighlight">\(G_{enc}(x)\)</span> 를 다음과 같이 interpolation 합니다: <span class="math notranslate nohighlight">\(\gamma G_{enc}(x) + (1 − \gamma) z\)</span>. 그리고 LoRA adapter 가중치와 skip connection 출력값을 <span class="math notranslate nohighlight">\(\theta = \theta_0 + \gamma \cdot \Delta \theta\)</span> 와 같이 조정합니다. 여기서 <span class="math notranslate nohighlight">\(\theta_0\)</span> 는 원래 가중치이고, <span class="math notranslate nohighlight">\(\Delta \theta\)</span> 는 새로 추가된 가중치입니다.</p>
+<figure class="align-default" id="id9">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/translation_turbo_07.png"><img alt="translation_turbo_07" class="bg-primary mb-1" src="../../_images/translation_turbo_07.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 547 </span><span class="caption-text">Reconstruction loss with interpolation coefficient</span><a class="headerlink" href="#id9" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>해당 objective 로 interpolation coefficient <span class="math notranslate nohighlight">\(\gamma\)</span> 를 변화시키면서 모델을 fine-tuning 함으로써 다양한 output 을 생성할 수 있었다고 합니다.</p>
+</section>
+</section>
+<section id="experiments">
+<h2>4. Experiments<a class="headerlink" href="#experiments" title="Permalink to this heading">#</a></h2>
+<p><strong>Training Details</strong></p>
+<p>학습하는 파라미터: LoRA 가중치, zero-convolutional layer, 그리고 첫번째 U-Net convolutional layer 가 330MB 소요됩니다.</p>
+<ul>
+<li><p><strong>UNet initialization 코드</strong></p>
+<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="k">def</span><span class="w"> </span><span class="nf">initialize_unet</span><span class="p">(</span><span class="n">rank</span><span class="p">,</span> <span class="n">return_lora_module_names</span><span class="o">=</span><span class="kc">False</span><span class="p">):</span>
+    <span class="n">unet</span> <span class="o">=</span> <span class="n">UNet2DConditionModel</span><span class="o">.</span><span class="n">from_pretrained</span><span class="p">(</span><span class="s2">&quot;stabilityai/sd-turbo&quot;</span><span class="p">,</span> <span class="n">subfolder</span><span class="o">=</span><span class="s2">&quot;unet&quot;</span><span class="p">)</span>
+    <span class="n">unet</span><span class="o">.</span><span class="n">requires_grad_</span><span class="p">(</span><span class="kc">False</span><span class="p">)</span>
+    <span class="n">unet</span><span class="o">.</span><span class="n">train</span><span class="p">()</span>
+    <span class="n">l_target_modules_encoder</span><span class="p">,</span> <span class="n">l_target_modules_decoder</span><span class="p">,</span> <span class="n">l_modules_others</span> <span class="o">=</span> <span class="p">[],</span> <span class="p">[],</span> <span class="p">[]</span>
+    <span class="n">l_grep</span> <span class="o">=</span> <span class="p">[</span><span class="s2">&quot;to_k&quot;</span><span class="p">,</span> <span class="s2">&quot;to_q&quot;</span><span class="p">,</span> <span class="s2">&quot;to_v&quot;</span><span class="p">,</span> <span class="s2">&quot;to_out.0&quot;</span><span class="p">,</span> <span class="s2">&quot;conv&quot;</span><span class="p">,</span> <span class="s2">&quot;conv1&quot;</span><span class="p">,</span> <span class="s2">&quot;conv2&quot;</span><span class="p">,</span> <span class="s2">&quot;conv_in&quot;</span><span class="p">,</span> <span class="s2">&quot;conv_shortcut&quot;</span><span class="p">,</span> <span class="s2">&quot;conv_out&quot;</span><span class="p">,</span> <span class="s2">&quot;proj_out&quot;</span><span class="p">,</span> <span class="s2">&quot;proj_in&quot;</span><span class="p">,</span> <span class="s2">&quot;ff.net.2&quot;</span><span class="p">,</span> <span class="s2">&quot;ff.net.0.proj&quot;</span><span class="p">]</span>
+    <span class="k">for</span> <span class="n">n</span><span class="p">,</span> <span class="n">p</span> <span class="ow">in</span> <span class="n">unet</span><span class="o">.</span><span class="n">named_parameters</span><span class="p">():</span>
+        <span class="k">if</span> <span class="s2">&quot;bias&quot;</span> <span class="ow">in</span> <span class="n">n</span> <span class="ow">or</span> <span class="s2">&quot;norm&quot;</span> <span class="ow">in</span> <span class="n">n</span><span class="p">:</span> <span class="k">continue</span>
+        <span class="k">for</span> <span class="n">pattern</span> <span class="ow">in</span> <span class="n">l_grep</span><span class="p">:</span>
+            <span class="k">if</span> <span class="n">pattern</span> <span class="ow">in</span> <span class="n">n</span> <span class="ow">and</span> <span class="p">(</span><span class="s2">&quot;down_blocks&quot;</span> <span class="ow">in</span> <span class="n">n</span> <span class="ow">or</span> <span class="s2">&quot;conv_in&quot;</span> <span class="ow">in</span> <span class="n">n</span><span class="p">):</span>
+                <span class="n">l_target_modules_encoder</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">n</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s2">&quot;.weight&quot;</span><span class="p">,</span><span class="s2">&quot;&quot;</span><span class="p">))</span>
+                <span class="k">break</span>
+            <span class="k">elif</span> <span class="n">pattern</span> <span class="ow">in</span> <span class="n">n</span> <span class="ow">and</span> <span class="s2">&quot;up_blocks&quot;</span> <span class="ow">in</span> <span class="n">n</span><span class="p">:</span>
+                <span class="n">l_target_modules_decoder</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">n</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s2">&quot;.weight&quot;</span><span class="p">,</span><span class="s2">&quot;&quot;</span><span class="p">))</span>
+                <span class="k">break</span>
+            <span class="k">elif</span> <span class="n">pattern</span> <span class="ow">in</span> <span class="n">n</span><span class="p">:</span>
+                <span class="n">l_modules_others</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">n</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s2">&quot;.weight&quot;</span><span class="p">,</span><span class="s2">&quot;&quot;</span><span class="p">))</span>
+                <span class="k">break</span>
+    <span class="n">lora_conf_encoder</span> <span class="o">=</span> <span class="n">LoraConfig</span><span class="p">(</span><span class="n">r</span><span class="o">=</span><span class="n">rank</span><span class="p">,</span> <span class="n">init_lora_weights</span><span class="o">=</span><span class="s2">&quot;gaussian&quot;</span><span class="p">,</span><span class="n">target_modules</span><span class="o">=</span><span class="n">l_target_modules_encoder</span><span class="p">,</span> <span class="n">lora_alpha</span><span class="o">=</span><span class="n">rank</span><span class="p">)</span>
+    <span class="n">lora_conf_decoder</span> <span class="o">=</span> <span class="n">LoraConfig</span><span class="p">(</span><span class="n">r</span><span class="o">=</span><span class="n">rank</span><span class="p">,</span> <span class="n">init_lora_weights</span><span class="o">=</span><span class="s2">&quot;gaussian&quot;</span><span class="p">,</span><span class="n">target_modules</span><span class="o">=</span><span class="n">l_target_modules_decoder</span><span class="p">,</span> <span class="n">lora_alpha</span><span class="o">=</span><span class="n">rank</span><span class="p">)</span>
+    <span class="n">lora_conf_others</span> <span class="o">=</span> <span class="n">LoraConfig</span><span class="p">(</span><span class="n">r</span><span class="o">=</span><span class="n">rank</span><span class="p">,</span> <span class="n">init_lora_weights</span><span class="o">=</span><span class="s2">&quot;gaussian&quot;</span><span class="p">,</span><span class="n">target_modules</span><span class="o">=</span><span class="n">l_modules_others</span><span class="p">,</span> <span class="n">lora_alpha</span><span class="o">=</span><span class="n">rank</span><span class="p">)</span>
+    <span class="n">unet</span><span class="o">.</span><span class="n">add_adapter</span><span class="p">(</span><span class="n">lora_conf_encoder</span><span class="p">,</span> <span class="n">adapter_name</span><span class="o">=</span><span class="s2">&quot;default_encoder&quot;</span><span class="p">)</span>
+    <span class="n">unet</span><span class="o">.</span><span class="n">add_adapter</span><span class="p">(</span><span class="n">lora_conf_decoder</span><span class="p">,</span> <span class="n">adapter_name</span><span class="o">=</span><span class="s2">&quot;default_decoder&quot;</span><span class="p">)</span>
+    <span class="n">unet</span><span class="o">.</span><span class="n">add_adapter</span><span class="p">(</span><span class="n">lora_conf_others</span><span class="p">,</span> <span class="n">adapter_name</span><span class="o">=</span><span class="s2">&quot;default_others&quot;</span><span class="p">)</span>
+    <span class="n">unet</span><span class="o">.</span><span class="n">set_adapters</span><span class="p">([</span><span class="s2">&quot;default_encoder&quot;</span><span class="p">,</span> <span class="s2">&quot;default_decoder&quot;</span><span class="p">,</span> <span class="s2">&quot;default_others&quot;</span><span class="p">])</span>
+    <span class="k">if</span> <span class="n">return_lora_module_names</span><span class="p">:</span>
+        <span class="k">return</span> <span class="n">unet</span><span class="p">,</span> <span class="n">l_target_modules_encoder</span><span class="p">,</span> <span class="n">l_target_modules_decoder</span><span class="p">,</span> <span class="n">l_modules_others</span>
+    <span class="k">else</span><span class="p">:</span>
+        <span class="k">return</span> <span class="n">unet</span>
+</pre></div>
+</div>
+</li>
+<li><p><strong>VAE initialization 코드</strong></p>
+<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="k">def</span><span class="w"> </span><span class="nf">initialize_vae</span><span class="p">(</span><span class="n">rank</span><span class="o">=</span><span class="mi">4</span><span class="p">,</span> <span class="n">return_lora_module_names</span><span class="o">=</span><span class="kc">False</span><span class="p">):</span>
+    <span class="n">vae</span> <span class="o">=</span> <span class="n">AutoencoderKL</span><span class="o">.</span><span class="n">from_pretrained</span><span class="p">(</span><span class="s2">&quot;stabilityai/sd-turbo&quot;</span><span class="p">,</span> <span class="n">subfolder</span><span class="o">=</span><span class="s2">&quot;vae&quot;</span><span class="p">)</span>
+    <span class="n">vae</span><span class="o">.</span><span class="n">requires_grad_</span><span class="p">(</span><span class="kc">False</span><span class="p">)</span>
+    <span class="n">vae</span><span class="o">.</span><span class="n">encoder</span><span class="o">.</span><span class="n">forward</span> <span class="o">=</span> <span class="n">my_vae_encoder_fwd</span><span class="o">.</span><span class="fm">__get__</span><span class="p">(</span><span class="n">vae</span><span class="o">.</span><span class="n">encoder</span><span class="p">,</span> <span class="n">vae</span><span class="o">.</span><span class="n">encoder</span><span class="o">.</span><span class="vm">__class__</span><span class="p">)</span>
+    <span class="n">vae</span><span class="o">.</span><span class="n">decoder</span><span class="o">.</span><span class="n">forward</span> <span class="o">=</span> <span class="n">my_vae_decoder_fwd</span><span class="o">.</span><span class="fm">__get__</span><span class="p">(</span><span class="n">vae</span><span class="o">.</span><span class="n">decoder</span><span class="p">,</span> <span class="n">vae</span><span class="o">.</span><span class="n">decoder</span><span class="o">.</span><span class="vm">__class__</span><span class="p">)</span>
+    <span class="n">vae</span><span class="o">.</span><span class="n">requires_grad_</span><span class="p">(</span><span class="kc">True</span><span class="p">)</span>
+    <span class="n">vae</span><span class="o">.</span><span class="n">train</span><span class="p">()</span>
+    <span class="c1"># add the skip connection convs</span>
+    <span class="n">vae</span><span class="o">.</span><span class="n">decoder</span><span class="o">.</span><span class="n">skip_conv_1</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">nn</span><span class="o">.</span><span class="n">Conv2d</span><span class="p">(</span><span class="mi">512</span><span class="p">,</span> <span class="mi">512</span><span class="p">,</span> <span class="n">kernel_size</span><span class="o">=</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">),</span> <span class="n">stride</span><span class="o">=</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">),</span> <span class="n">bias</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span><span class="o">.</span><span class="n">cuda</span><span class="p">()</span><span class="o">.</span><span class="n">requires_grad_</span><span class="p">(</span><span class="kc">True</span><span class="p">)</span>
+    <span class="n">vae</span><span class="o">.</span><span class="n">decoder</span><span class="o">.</span><span class="n">skip_conv_2</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">nn</span><span class="o">.</span><span class="n">Conv2d</span><span class="p">(</span><span class="mi">256</span><span class="p">,</span> <span class="mi">512</span><span class="p">,</span> <span class="n">kernel_size</span><span class="o">=</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">),</span> <span class="n">stride</span><span class="o">=</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">),</span> <span class="n">bias</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span><span class="o">.</span><span class="n">cuda</span><span class="p">()</span><span class="o">.</span><span class="n">requires_grad_</span><span class="p">(</span><span class="kc">True</span><span class="p">)</span>
+    <span class="n">vae</span><span class="o">.</span><span class="n">decoder</span><span class="o">.</span><span class="n">skip_conv_3</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">nn</span><span class="o">.</span><span class="n">Conv2d</span><span class="p">(</span><span class="mi">128</span><span class="p">,</span> <span class="mi">512</span><span class="p">,</span> <span class="n">kernel_size</span><span class="o">=</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">),</span> <span class="n">stride</span><span class="o">=</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">),</span> <span class="n">bias</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span><span class="o">.</span><span class="n">cuda</span><span class="p">()</span><span class="o">.</span><span class="n">requires_grad_</span><span class="p">(</span><span class="kc">True</span><span class="p">)</span>
+    <span class="n">vae</span><span class="o">.</span><span class="n">decoder</span><span class="o">.</span><span class="n">skip_conv_4</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">nn</span><span class="o">.</span><span class="n">Conv2d</span><span class="p">(</span><span class="mi">128</span><span class="p">,</span> <span class="mi">256</span><span class="p">,</span> <span class="n">kernel_size</span><span class="o">=</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">),</span> <span class="n">stride</span><span class="o">=</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">),</span> <span class="n">bias</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span><span class="o">.</span><span class="n">cuda</span><span class="p">()</span><span class="o">.</span><span class="n">requires_grad_</span><span class="p">(</span><span class="kc">True</span><span class="p">)</span>
+    <span class="n">torch</span><span class="o">.</span><span class="n">nn</span><span class="o">.</span><span class="n">init</span><span class="o">.</span><span class="n">constant_</span><span class="p">(</span><span class="n">vae</span><span class="o">.</span><span class="n">decoder</span><span class="o">.</span><span class="n">skip_conv_1</span><span class="o">.</span><span class="n">weight</span><span class="p">,</span> <span class="mf">1e-5</span><span class="p">)</span>
+    <span class="n">torch</span><span class="o">.</span><span class="n">nn</span><span class="o">.</span><span class="n">init</span><span class="o">.</span><span class="n">constant_</span><span class="p">(</span><span class="n">vae</span><span class="o">.</span><span class="n">decoder</span><span class="o">.</span><span class="n">skip_conv_2</span><span class="o">.</span><span class="n">weight</span><span class="p">,</span> <span class="mf">1e-5</span><span class="p">)</span>
+    <span class="n">torch</span><span class="o">.</span><span class="n">nn</span><span class="o">.</span><span class="n">init</span><span class="o">.</span><span class="n">constant_</span><span class="p">(</span><span class="n">vae</span><span class="o">.</span><span class="n">decoder</span><span class="o">.</span><span class="n">skip_conv_3</span><span class="o">.</span><span class="n">weight</span><span class="p">,</span> <span class="mf">1e-5</span><span class="p">)</span>
+    <span class="n">torch</span><span class="o">.</span><span class="n">nn</span><span class="o">.</span><span class="n">init</span><span class="o">.</span><span class="n">constant_</span><span class="p">(</span><span class="n">vae</span><span class="o">.</span><span class="n">decoder</span><span class="o">.</span><span class="n">skip_conv_4</span><span class="o">.</span><span class="n">weight</span><span class="p">,</span> <span class="mf">1e-5</span><span class="p">)</span>
+    <span class="n">vae</span><span class="o">.</span><span class="n">decoder</span><span class="o">.</span><span class="n">ignore_skip</span> <span class="o">=</span> <span class="kc">False</span>
+    <span class="n">vae</span><span class="o">.</span><span class="n">decoder</span><span class="o">.</span><span class="n">gamma</span> <span class="o">=</span> <span class="mi">1</span>
+    <span class="n">l_vae_target_modules</span> <span class="o">=</span> <span class="p">[</span><span class="s2">&quot;conv1&quot;</span><span class="p">,</span><span class="s2">&quot;conv2&quot;</span><span class="p">,</span><span class="s2">&quot;conv_in&quot;</span><span class="p">,</span> <span class="s2">&quot;conv_shortcut&quot;</span><span class="p">,</span>
+        <span class="s2">&quot;conv&quot;</span><span class="p">,</span> <span class="s2">&quot;conv_out&quot;</span><span class="p">,</span> <span class="s2">&quot;skip_conv_1&quot;</span><span class="p">,</span> <span class="s2">&quot;skip_conv_2&quot;</span><span class="p">,</span> <span class="s2">&quot;skip_conv_3&quot;</span><span class="p">,</span> 
+        <span class="s2">&quot;skip_conv_4&quot;</span><span class="p">,</span> <span class="s2">&quot;to_k&quot;</span><span class="p">,</span> <span class="s2">&quot;to_q&quot;</span><span class="p">,</span> <span class="s2">&quot;to_v&quot;</span><span class="p">,</span> <span class="s2">&quot;to_out.0&quot;</span><span class="p">,</span>
+    <span class="p">]</span>
+    <span class="n">vae_lora_config</span> <span class="o">=</span> <span class="n">LoraConfig</span><span class="p">(</span><span class="n">r</span><span class="o">=</span><span class="n">rank</span><span class="p">,</span> <span class="n">init_lora_weights</span><span class="o">=</span><span class="s2">&quot;gaussian&quot;</span><span class="p">,</span> <span class="n">target_modules</span><span class="o">=</span><span class="n">l_vae_target_modules</span><span class="p">)</span>
+    <span class="n">vae</span><span class="o">.</span><span class="n">add_adapter</span><span class="p">(</span><span class="n">vae_lora_config</span><span class="p">,</span> <span class="n">adapter_name</span><span class="o">=</span><span class="s2">&quot;vae_skip&quot;</span><span class="p">)</span>
+    <span class="k">if</span> <span class="n">return_lora_module_names</span><span class="p">:</span>
+        <span class="k">return</span> <span class="n">vae</span><span class="p">,</span> <span class="n">l_vae_target_modules</span>
+    <span class="k">else</span><span class="p">:</span>
+        <span class="k">return</span> <span class="n">vae</span>
+</pre></div>
+</div>
+</li>
+</ul>
+<p><strong>Datasets</strong></p>
+<p>Unpaired datasets 에서 자주 사용되는 Horse <span class="math notranslate nohighlight">\(\leftrightarrow\)</span> Zebra, Yosemite Summer <span class="math notranslate nohighlight">\(\leftrightarrow\)</span> Winter, 그리고 고차원 이미지의 주행 데이터셋 BDD100k 의 day <span class="math notranslate nohighlight">\(\leftrightarrow\)</span> night, clear <span class="math notranslate nohighlight">\(\leftrightarrow\)</span> foggy 데이터셋으로 실험하였습니다.</p>
+<p><strong>Evaluation Protocol</strong></p>
+<p>Image translation task 에서 다음과 같은 2가지 요소를 만족시켜야 한다고 합니다.</p>
+<p>(1) target domain 의 데이터셋 분포와의 일치: FID 로 측정</p>
+<p>(2) input image 의 구조적인 정보 유지: DINO-Struct-Dist 로 측정</p>
+<section id="comparison-to-unpaired-methods">
+<h3>4.1. Comparison to Unpaired Methods<a class="headerlink" href="#comparison-to-unpaired-methods" title="Permalink to this heading">#</a></h3>
+<p>기존에 GAN-based 그리고 Diffusion-based 모델들이 output realism 그리고 structure preservation 에 모두 좋은 성능을 내지는 못하는 것을 확인할 수 있습니다.</p>
+<figure class="align-default" id="id10">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/translation_turbo_08.png"><img alt="translation_turbo_08" class="bg-primary mb-1" src="../../_images/translation_turbo_08.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 548 </span><span class="caption-text">Comparison to baselines on 256 × 256 datasets.</span><a class="headerlink" href="#id10" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<figure class="align-default" id="id11">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/translation_turbo_09.png"><img alt="translation_turbo_09" class="bg-primary mb-1" src="../../_images/translation_turbo_09.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 549 </span><span class="caption-text">Comparison to baselines on driving datasets (512 × 512).</span><a class="headerlink" href="#id11" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>아래 예시 사진들과 table 에서 보이듯이, CycleGAN-Turbo 모델이 CycleGAN 과 CUT 모델보다 더 낮은 FID, DINO Structure score 를 보여주는 것을 확인할 수 있습니다.</p>
+<p>또한, diffusion 기반의 zero-shot image translation 모델: SDEdit, Plug-and Play, pix2pix-zero, CycleDiffusion, 그리고 DDIB 들이 realistic 한 이미지는 잘 생성하지만 원본 이미지의 structure 를 훼손시키는 경우가 있음을 확인할 수 있습니다. 이러한 현상은 multiple object 가 존재하는 주행 데이터셋에서 더 빈번하게 발생하고, 이는 Instructpix2pix 모델을 제외하고는 noise map 으로 inverting 하는 과정에서 원본 이미지에 대한 손실이 일어나기 때문이라고 주장합니다.</p>
+<ul>
+<li><p><strong>Comparison to GAN-based methods</strong></p>
+<figure class="align-default" id="id12">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/translation_turbo_10.png"><img alt="translation_turbo_10" class="bg-primary mb-1" src="../../_images/translation_turbo_10.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 550 </span><span class="caption-text">Comparison to GAN-based methods on 256 × 256 datasets.</span><a class="headerlink" href="#id12" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<figure class="align-default" id="id13">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/translation_turbo_11.png"><img alt="translation_turbo_11" class="bg-primary mb-1" src="../../_images/translation_turbo_11.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 551 </span><span class="caption-text">Comparison to GAN-based methods on driving datasets (512 × 512).</span><a class="headerlink" href="#id13" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</li>
+<li><p><strong>Comparison to Diffusion-based editing methods</strong></p>
+<figure class="align-default" id="id14">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/translation_turbo_12.png"><img alt="translation_turbo_12" class="bg-primary mb-1" src="../../_images/translation_turbo_12.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 552 </span><span class="caption-text">Comparison to Diffusion-based editing methods on driving datasets (512 × 512).</span><a class="headerlink" href="#id14" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<figure class="align-default" id="id15">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/translation_turbo_13.png"><img alt="translation_turbo_13" class="bg-primary mb-1" src="../../_images/translation_turbo_13.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 553 </span><span class="caption-text">Comparison to Diffusion-based editing methods on driving datasets (512 × 512).</span><a class="headerlink" href="#id15" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</li>
+</ul>
+<figure class="align-default" id="id16">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/translation_turbo_14.png"><img alt="translation_turbo_14" class="bg-primary mb-1" src="../../_images/translation_turbo_14.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 554 </span><span class="caption-text">Evaluation on standard CycleGAN datasets (256 × 256).</span><a class="headerlink" href="#id16" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<figure class="align-default" id="id17">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/translation_turbo_15.png"><img alt="translation_turbo_15" class="bg-primary mb-1" src="../../_images/translation_turbo_15.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 555 </span><span class="caption-text">Comparison on 512 × 512 driving datasets.</span><a class="headerlink" href="#id17" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</section>
+<section id="ablation-study">
+<h3>4.2. Ablation Study<a class="headerlink" href="#ablation-study" title="Permalink to this heading">#</a></h3>
+<p><strong>Using pre-trained weights</strong></p>
+<p>랜덤한 가중치로 초기화하는 것보다 pre-trained 된 모델을 사용할 때, 모델 성능이 더 좋은 것을 확인할 수 있습니다.</p>
+<figure class="align-default" id="id18">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/translation_turbo_16.png"><img alt="translation_turbo_16" class="bg-primary mb-1" src="../../_images/translation_turbo_16.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 556 </span><span class="caption-text">Ablation with Horse to Zebra</span><a class="headerlink" href="#id18" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p><strong>Different ways of adding conditioning inputs</strong></p>
+<p>ControlNet 이나 T2I-Adapter 를 사용하여 conditioning 할 때, 원본 이미지와의 structure 충돌이 일어나는 부분을 재차 확인할 수 있습니다.</p>
+<figure class="align-default" id="id19">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/translation_turbo_17.png"><img alt="translation_turbo_17" class="bg-primary mb-1" src="../../_images/translation_turbo_17.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 557 </span><span class="caption-text">Ablating individual components</span><a class="headerlink" href="#id19" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p><strong>Skip Connections and trainable encoder and decoder</strong></p>
+<p>Config D 와 비교하였을 때, FID 에 대한 성능이 미세하게 떨어지는 반면에 structure preservation 성능이 월등히 높음을 확인할 수 있습니다.</p>
+</section>
+<section id="id1">
+<h3>4.3. Extensions<a class="headerlink" href="#id1" title="Permalink to this heading">#</a></h3>
+<p><strong>Paired translation</strong></p>
+<p>Pix2pix-Turbo 와 LCM-ControlNet, SD-Turbo ControlNet, 그리고 SD ControlNet 모델과 정성적인 평가를 진행하였습니다. Classifier-free guidance, negative prompt 없이 단일 step 만으로도 좋은 성능을 보여줌을 확인할 수 있습니다.</p>
+<figure class="align-default" id="id20">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/translation_turbo_18.png"><img alt="translation_turbo_18" class="bg-primary mb-1" src="../../_images/translation_turbo_18.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 558 </span><span class="caption-text">Comparison on paired edge-to-image task (512 × 512).</span><a class="headerlink" href="#id20" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p><strong>Generating diverse outputs</strong></p>
+<figure class="align-default" id="id21">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/translation_turbo_19.png"><img alt="translation_turbo_19" class="bg-primary mb-1" src="../../_images/translation_turbo_19.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 559 </span><span class="caption-text">Generating diverse outputs</span><a class="headerlink" href="#id21" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</section>
+</section>
+<section id="discussions-and-limitations">
+<h2>5. Discussions and Limitations<a class="headerlink" href="#discussions-and-limitations" title="Permalink to this heading">#</a></h2>
+<p>해당 논문은 multi-step diffusion training 에 의존하지 않고, single-step 만으로 다양한 GAN 기반의 objective 와 융합해서 downstream task 에 적용할 수 있는 방식을 제안합니다. 하지만, 다음과 같은 3가지 한계점을 제시합니다: (1) SD-Turbo 는 classifier-free guidance 를 사용하지 않아, guidance control 에 대한 설정할 수 없습니다. (2) 해당 방식은 negative prompt 를 지원하지 않습니다. 그리고 마지막으로 (3) high capacity generator 기반의 cycle consistency loss 로 학습하는데 메모리에 대한 부담이 큽니다.</p>
+</section>
+</section>
+
+    <script type="text/x-thebe-config">
+    {
+        requestKernel: true,
+        binderOptions: {
+            repo: "binder-examples/jupyter-stacks-datascience",
+            ref: "master",
+        },
+        codeMirrorConfig: {
+            theme: "abcdef",
+            mode: "python"
+        },
+        kernelOptions: {
+            name: "python3",
+            path: "./docs\review"
+        },
+        predefinedOutput: true
+    }
+    </script>
+    <script>kernelName = 'python3'</script>
+
+                </article>
+              
+
+              
+              
+              
+              
+                <footer class="prev-next-footer d-print-none">
+                  
+<div class="prev-next-area">
+    <a class="left-prev"
+       href="DiT.html"
+       title="previous page">
+      <i class="fa-solid fa-angle-left"></i>
+      <div class="prev-next-info">
+        <p class="prev-next-subtitle">previous</p>
+        <p class="prev-next-title">DiT</p>
+      </div>
+    </a>
+    <a class="right-next"
+       href="LCM-LoRA.html"
+       title="next page">
+      <div class="prev-next-info">
+        <p class="prev-next-subtitle">next</p>
+        <p class="prev-next-title">LCM-LoRA: A Universal Stable-Diffusion Acceleration Module</p>
+      </div>
+      <i class="fa-solid fa-angle-right"></i>
+    </a>
+</div>
+                </footer>
+              
+            </div>
+            
+            
+              
+                <div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
+
+
+  <div class="sidebar-secondary-item">
+  <div class="page-toc tocsection onthispage">
+    <i class="fa-solid fa-list"></i> Contents
+  </div>
+  <nav class="bd-toc-nav page-toc">
+    <ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">1. Introduction</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#related-work">2. Related Work</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#method">3. Method</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#adding-conditional-input">3.1. Adding Conditional Input</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#preserving-input-details">3.2. Preserving Input Details</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#unpaired-training">3.3. Unpaired Training</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#extensions">3.4. Extensions</a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#experiments">4. Experiments</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#comparison-to-unpaired-methods">4.1. Comparison to Unpaired Methods</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#ablation-study">4.2. Ablation Study</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#id1">4.3. Extensions</a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#discussions-and-limitations">5. Discussions and Limitations</a></li>
+</ul>
+  </nav></div>
+
+</div></div>
+              
+            
+          </div>
+          <footer class="bd-footer-content">
+            
+<div class="bd-footer-content__inner container">
+  
+  <div class="footer-item">
+    
+<p class="component-author">
+By PseudoLab
+</p>
+
+  </div>
+  
+  <div class="footer-item">
+    
+
+  <p class="copyright">
+    
+      © Copyright 2022.
+      <br/>
+    
+  </p>
+
+  </div>
+  
+  <div class="footer-item">
+    
+  </div>
+  
+  <div class="footer-item">
+    
+  </div>
+  
+</div>
+          </footer>
+        
+
+      </main>
+    </div>
+  </div>
+  
+  <!-- Scripts loaded after <body> so the DOM is not blocked -->
+  <script src="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b"></script>
+<script src="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b"></script>
+
+  <footer class="bd-footer">
+  </footer>
+  </body>
 </html>
\ No newline at end of file
diff --git a/docs/review/one_step_diffusion_with_distribution_matching_distillation.html b/docs/review/one_step_diffusion_with_distribution_matching_distillation.html
new file mode 100755
index 00000000..4f3e2744
--- /dev/null
+++ b/docs/review/one_step_diffusion_with_distribution_matching_distillation.html
@@ -0,0 +1,934 @@
+
+
+<!DOCTYPE html>
+
+
+<html lang="en" data-content_root="" >
+
+  <head>
+    <meta charset="utf-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.17.1: http://docutils.sourceforge.net/" />
+
+    <title>One-step Diffusion with Distribution Matching Distillation &#8212; Text-to-Image Generation-feat-Diffusion</title>
+  
+  
+  
+  <script data-cfasync="false">
+    document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
+    document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
+  </script>
+  
+  <!-- Loaded before other Sphinx assets -->
+  <link href="../../_static/styles/theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+<link href="../../_static/styles/bootstrap.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+<link href="../../_static/styles/pydata-sphinx-theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+
+  
+  <link href="../../_static/vendor/fontawesome/6.5.2/css/all.min.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+  <link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.woff2" />
+<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.woff2" />
+<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.woff2" />
+
+    <link rel="stylesheet" type="text/css" href="../../_static/pygments.css" />
+    <link rel="stylesheet" href="../../_static/styles/sphinx-book-theme.css?digest=14f4ca6b54d191a8c7657f6c759bf11a5fb86285" type="text/css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/togglebutton.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/mystnb.4510f1fc1dee50b3e5859aac5469c37c29e427902b24a333a5f9fcb2f0b3ac41.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/sphinx-thebe.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/design-style.4045f2051d55cab465a707391d5b2007.min.css" />
+  
+  <!-- Pre-loaded scripts that we'll load fully later -->
+  <link rel="preload" as="script" href="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b" />
+<link rel="preload" as="script" href="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b" />
+  <script src="../../_static/vendor/fontawesome/6.5.2/js/all.min.js?digest=dfe6caa3a7d634c4db9b"></script>
+
+    <script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
+    <script src="../../_static/jquery.js"></script>
+    <script src="../../_static/underscore.js"></script>
+    <script src="../../_static/_sphinx_javascript_frameworks_compat.js"></script>
+    <script src="../../_static/doctools.js"></script>
+    <script src="../../_static/clipboard.min.js"></script>
+    <script src="../../_static/copybutton.js"></script>
+    <script src="../../_static/scripts/sphinx-book-theme.js?digest=5a5c038af52cf7bc1a1ec88eea08e6366ee68824"></script>
+    <script>let toggleHintShow = 'Click to show';</script>
+    <script>let toggleHintHide = 'Click to hide';</script>
+    <script>let toggleOpenOnPrint = 'true';</script>
+    <script src="../../_static/togglebutton.js"></script>
+    <script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
+    <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
+    <script src="../../_static/design-tabs.js"></script>
+    <script>const THEBE_JS_URL = "https://unpkg.com/thebe@0.8.2/lib/index.js"
+const thebe_selector = ".thebe,.cell"
+const thebe_selector_input = "pre"
+const thebe_selector_output = ".output, .cell_output"
+</script>
+    <script async="async" src="../../_static/sphinx-thebe.js"></script>
+    <script>window.MathJax = {"options": {"processHtmlClass": "tex2jax_process|mathjax_process|math|output_area"}}</script>
+    <script defer="defer" src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
+    <script>DOCUMENTATION_OPTIONS.pagename = 'docs/review/one_step_diffusion_with_distribution_matching_distillation';</script>
+    <link rel="shortcut icon" href="../../_static/PseudoLab_logo.png"/>
+    <link rel="index" title="Index" href="../../genindex.html" />
+    <link rel="search" title="Search" href="../../search.html" />
+    <link rel="next" title="Make A Video" href="Make_A_Video.html" />
+    <link rel="prev" title="MimicBrush: Zero-shot Image Editing with Reference Imitation" href="MimicBrush.html" />
+  <meta name="viewport" content="width=device-width, initial-scale=1"/>
+  <meta name="docsearch:language" content="en"/>
+  </head>
+  
+  
+  <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
+
+  
+  
+  <div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
+  
+  <div id="pst-scroll-pixel-helper"></div>
+  
+  <button type="button" class="btn rounded-pill" id="pst-back-to-top">
+    <i class="fa-solid fa-arrow-up"></i>Back to top</button>
+
+  
+  <input type="checkbox"
+          class="sidebar-toggle"
+          id="pst-primary-sidebar-checkbox"/>
+  <label class="overlay overlay-primary" for="pst-primary-sidebar-checkbox"></label>
+  
+  <input type="checkbox"
+          class="sidebar-toggle"
+          id="pst-secondary-sidebar-checkbox"/>
+  <label class="overlay overlay-secondary" for="pst-secondary-sidebar-checkbox"></label>
+  
+  <div class="search-button__wrapper">
+    <div class="search-button__overlay"></div>
+    <div class="search-button__search-container">
+<form class="bd-search d-flex align-items-center"
+      action="../../search.html"
+      method="get">
+  <i class="fa-solid fa-magnifying-glass"></i>
+  <input type="search"
+         class="form-control"
+         name="q"
+         id="search-input"
+         placeholder="Search this book..."
+         aria-label="Search this book..."
+         autocomplete="off"
+         autocorrect="off"
+         autocapitalize="off"
+         spellcheck="false"/>
+  <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
+</form></div>
+  </div>
+
+  <div class="pst-async-banner-revealer d-none">
+  <aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
+</div>
+
+  
+    <header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
+    </header>
+  
+
+  <div class="bd-container">
+    <div class="bd-container__inner bd-page-width">
+      
+      
+      
+      <div class="bd-sidebar-primary bd-sidebar">
+        
+
+  
+  <div class="sidebar-header-items sidebar-primary__section">
+    
+    
+    
+    
+  </div>
+  
+    <div class="sidebar-primary-items__start sidebar-primary__section">
+        <div class="sidebar-primary-item">
+
+  
+    
+  
+
+<a class="navbar-brand logo" href="../../intro.html">
+  
+  
+  
+  
+  
+    
+    
+      
+    
+    
+    <img src="../../_static/PseudoLab_logo.png" class="logo__image only-light" alt="Text-to-Image Generation-feat-Diffusion - Home"/>
+    <script>document.write(`<img src="../../_static/PseudoLab_logo.png" class="logo__image only-dark" alt="Text-to-Image Generation-feat-Diffusion - Home"/>`);</script>
+  
+  
+</a></div>
+        <div class="sidebar-primary-item"><nav class="bd-links" id="bd-docs-nav" aria-label="Main">
+    <div class="bd-toc-item navbar-nav active">
+        
+        <ul class="nav bd-sidenav bd-sidenav__home-link">
+            <li class="toctree-l1">
+                <a class="reference internal" href="../../intro.html">
+                    Welcome to PseudoDiffusers!!
+                </a>
+            </li>
+        </ul>
+        <p aria-level="2" class="caption" role="heading"><span class="caption-text">Preliminary Works</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="vae.html">VAE</a></li>
+<li class="toctree-l1"><a class="reference internal" href="gan.html">GAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DDPM.html">DDPM</a></li>
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DDIM.html">DDIM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="A_Study_on_the_Evaluation_of_Generative_Models.html">A Study on the Evaluation of Generative Models</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Image Generation</span></p>
+<ul class="current nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="cycleGAN.html">CycleGAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="StyleGAN.html">StyleGAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="diffusion_beats_GANs.html">Diffusion Models Beat GANs on Image Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="dalle.html">DALL-E</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DALLE2.html">DALL-E 2</a></li>
+<li class="toctree-l1"><a class="reference internal" href="dreambooth.html">DreamBooth</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ControlNet.html">ControlNet</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Latent_Diffusion_Model.html">Introduction</a></li>
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Textual_Inversion.html">Textual Inversion</a></li>
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="CustomDiffusion.html">Custom Diffusion</a></li>
+<li class="toctree-l1"><a class="reference internal" href="LoRA.html">LoRA</a></li>
+
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="I-DDPM.html">I-DDPM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="StyO.html">StyO</a></li>
+<li class="toctree-l1"><a class="reference internal" href="imagen.html">Imagen</a></li>
+<li class="toctree-l1"><a class="reference internal" href="imagen_editor.html">Imagen Editor</a></li>
+<li class="toctree-l1"><a class="reference internal" href="SDEdit.html">SDEdit</a></li>
+<li class="toctree-l1"><a class="reference internal" href="SDXL.html">SDXL</a></li>
+<li class="toctree-l1"><a class="reference internal" href="t2i_adapter.html">T2I-Adapter</a></li>
+<li class="toctree-l1"><a class="reference internal" href="IP_Adapter.html">IP-Adapter</a></li>
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="HyperDreamBooth.html">HyperDreamBooth</a></li>
+<li class="toctree-l1"><a class="reference internal" href="CM3leon.html">CM3leon</a></li>
+
+<li class="toctree-l1"><a class="reference internal" href="Synthetic_Data_from_Diffusion_Models_Improves_ImageNet_Classification.html">Synthetic Data from Diffusion Models Improves ImageNet Classification</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="GLIDE.html">GLIDE</a></li>
+<li class="toctree-l1"><a class="reference internal" href="BBDM.html">BBDM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Your_Diffusion_Model_is_Secretly_a_Zero_Shot_Classifier.html">Your Diffusion Model is Secretly a Zero-Shot Classifier</a></li>
+<li class="toctree-l1"><a class="reference internal" href="progressive_distillation.html">Progressive Distillation for Fast Sampling of Diffusion Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ConceptLab.html">ConceptLab</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Diffusion_models_already_have_a_Semantic_Latent_Space.html">Diffusion Models already have a Semantic Latent Space</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Muse.html">Muse</a></li>
+
+
+<li class="toctree-l1"><a class="reference internal" href="GIGAGAN.html">Scaling up GANs for Text-to-Image Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="consistency_models.html">Consistency Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="latent_consistency_models.html">Latent Consistency Models</a></li>
+
+<li class="toctree-l1"><a class="reference internal" href="LLM_grounded_Diffusion.html">LLM Grounded Diffusion</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DiT.html">DiT</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="one-step-image-translation.html">One-Step Image Translation with Text-to-Image Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="LCM-LoRA.html">LCM-LoRA: A Universal Stable-Diffusion Acceleration Module</a></li>
+
+
+<li class="toctree-l1"><a class="reference internal" href="MimicBrush.html">MimicBrush: Zero-shot Image Editing with Reference Imitation</a></li>
+<li class="toctree-l1 current active"><a class="current reference internal" href="#">One-step Diffusion with Distribution Matching Distillation</a></li>
+
+
+
+
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Video Generation</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="Make_A_Video.html">Make A Video</a></li>
+<li class="toctree-l1"><a class="reference internal" href="VideoLDM.html">VideoLDM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="AnimateDiff.html">AnimateDiff</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Animate_Anyone.html">Animate Anyone</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreaMoving.html">DreaMoving</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreamPose.html">DreamPose: Fashion Image-to-Video Synthesis via Stable Diffusion</a></li>
+
+
+
+
+
+
+
+
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">3D Generation</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="NeRF.html">NeRF : Representing Scenes as Neural Radiance Fields for View Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="3DGS.html">3D Gaussian Splatting for Real-Time Radiance Field Rendering</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Point_E.html">Point-E: A System for Generating 3D Point Clouds from Complex Prompts (Arxiv 2022)</a></li>
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Shap-E.html">Shap-E</a></li>
+
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DreamFusion.html"><strong>DreamFusion</strong></a></li>
+<li class="toctree-l1"><a class="reference internal" href="magic-3d.html">Magic3D</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreamBooth3D.html">Dream Booth 3D</a></li>
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="zero123plus.html">Zero123++</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ProlificDreamer.html">ProlificDreamer: High-Fidelity and Diverse Text-to-3D Generation with Variational Score Distillation</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DreamGaussian.html">DreamGaussian</a></li>
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Coin3D.html">Coin3D</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Experiments</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="../experiments/js_exp.html">Synthetic Data with Stable Diffusion for Foliar Disease Classification</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../experiments/swjo_exp.html">Training DreamBooth on Naver Webtoon Face Dataset</a></li>
+</ul>
+
+    </div>
+</nav></div>
+    </div>
+  
+  
+  <div class="sidebar-primary-items__end sidebar-primary__section">
+  </div>
+  
+  <div id="rtd-footer-container"></div>
+
+
+      </div>
+      
+      <main id="main-content" class="bd-main" role="main">
+        
+        
+
+<div class="sbt-scroll-pixel-helper"></div>
+
+          <div class="bd-content">
+            <div class="bd-article-container">
+              
+              <div class="bd-header-article d-print-none">
+<div class="header-article-items header-article__inner">
+  
+    <div class="header-article-items__start">
+      
+        <div class="header-article-item"><label class="sidebar-toggle primary-toggle btn btn-sm" for="__primary" title="Toggle primary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
+  <span class="fa-solid fa-bars"></span>
+</label></div>
+      
+    </div>
+  
+  
+    <div class="header-article-items__end">
+      
+        <div class="header-article-item">
+
+<div class="article-header-buttons">
+
+
+
+
+
+<div class="dropdown dropdown-source-buttons">
+  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Source repositories">
+    <i class="fab fa-github"></i>
+  </button>
+  <ul class="dropdown-menu">
+      
+      
+      
+      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion" target="_blank"
+   class="btn btn-sm btn-source-repository-button dropdown-item"
+   title="Source repository"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fab fa-github"></i>
+  </span>
+<span class="btn__text-container">Repository</span>
+</a>
+</li>
+      
+      
+      
+      
+      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion/issues/new?title=Issue%20on%20page%20%2Fdocs/review/one_step_diffusion_with_distribution_matching_distillation.html&body=Your%20issue%20content%20here." target="_blank"
+   class="btn btn-sm btn-source-issues-button dropdown-item"
+   title="Open an issue"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-lightbulb"></i>
+  </span>
+<span class="btn__text-container">Open issue</span>
+</a>
+</li>
+      
+  </ul>
+</div>
+
+
+
+
+
+
+<div class="dropdown dropdown-download-buttons">
+  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Download this page">
+    <i class="fas fa-download"></i>
+  </button>
+  <ul class="dropdown-menu">
+      
+      
+      
+      <li><a href="../../_sources/docs/review/one_step_diffusion_with_distribution_matching_distillation.md" target="_blank"
+   class="btn btn-sm btn-download-source-button dropdown-item"
+   title="Download source file"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-file"></i>
+  </span>
+<span class="btn__text-container">.md</span>
+</a>
+</li>
+      
+      
+      
+      
+      <li>
+<button onclick="window.print()"
+  class="btn btn-sm btn-download-pdf-button dropdown-item"
+  title="Print to PDF"
+  data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-file-pdf"></i>
+  </span>
+<span class="btn__text-container">.pdf</span>
+</button>
+</li>
+      
+  </ul>
+</div>
+
+
+
+
+<button onclick="toggleFullScreen()"
+  class="btn btn-sm btn-fullscreen-button"
+  title="Fullscreen mode"
+  data-bs-placement="bottom" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-expand"></i>
+  </span>
+
+</button>
+
+
+
+<script>
+document.write(`
+  <button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light"></i>
+    <i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark"></i>
+    <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"></i>
+  </button>
+`);
+</script>
+
+
+<script>
+document.write(`
+  <button class="btn btn-sm pst-navbar-icon search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <i class="fa-solid fa-magnifying-glass fa-lg"></i>
+  </button>
+`);
+</script>
+<label class="sidebar-toggle secondary-toggle btn btn-sm" for="__secondary"title="Toggle secondary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <span class="fa-solid fa-list"></span>
+</label>
+</div></div>
+      
+    </div>
+  
+</div>
+</div>
+              
+              
+
+<div id="jb-print-docs-body" class="onlyprint">
+    <h1>One-step Diffusion with Distribution Matching Distillation</h1>
+    <!-- Table of contents -->
+    <div id="print-main-content">
+        <div id="jb-print-toc">
+            
+            <div>
+                <h2> Contents </h2>
+            </div>
+            <nav aria-label="Page">
+                <ul class="visible nav section-nav flex-column">
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#">One-step Diffusion with Distribution Matching Distillation</a></li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction-and-preliminaries">Introduction and Preliminaries</a><ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#overview">Overview</a></li>
+</ul>
+</li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#method">Method</a><ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#id1">Overview</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#distribution-matching-loss">Distribution Matching Loss</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#regression-loss">Regression Loss</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#full-algorithm">Full algorithm</a></li>
+</ul>
+</li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#results">Results</a><ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#main-comparison">Main comparison</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#ablation-study">Ablation Study</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#comparison-with-unaccelerated-models">Comparison with Unaccelerated Models</a></li>
+</ul>
+</li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#conclusion-and-limitations">Conclusion and Limitations</a></li>
+</ul>
+
+            </nav>
+        </div>
+    </div>
+</div>
+
+              
+                
+<div id="searchbox"></div>
+                <article class="bd-article">
+                  
+  <div class="admonition-information admonition">
+<p class="admonition-title">Information</p>
+<ul class="simple">
+<li><p><strong>Title:</strong> One-Step Image Diffusion with Distribution Matching Distillation</p></li>
+<li><p><strong>Reference</strong></p>
+<ul>
+<li><p>Paper: <a class="reference external" href="https://arxiv.org/abs/2311.18828">https://arxiv.org/abs/2311.18828</a></p></li>
+<li><p>Code: <a class="github reference external" href="https://github.com/tianweiy/DMD2">tianweiy/DMD2</a></p></li>
+</ul>
+</li>
+<li><p><strong>Author:</strong> Joongwon Lee</p></li>
+<li><p><strong>Last updated on Oct. 16, 2024</strong></p></li>
+</ul>
+</div>
+<section class="tex2jax_ignore mathjax_ignore" id="one-step-diffusion-with-distribution-matching-distillation">
+<h1>One-step Diffusion with Distribution Matching Distillation<a class="headerlink" href="#one-step-diffusion-with-distribution-matching-distillation" title="Permalink to this heading">#</a></h1>
+</section>
+<section class="tex2jax_ignore mathjax_ignore" id="introduction-and-preliminaries">
+<h1>Introduction and Preliminaries<a class="headerlink" href="#introduction-and-preliminaries" title="Permalink to this heading">#</a></h1>
+<section id="overview">
+<h2>Overview<a class="headerlink" href="#overview" title="Permalink to this heading">#</a></h2>
+<ul>
+<li><p>Diffusion model has revolutionized image generation, 하지만 느린 sampling speed 가 느린 문제점이 있음</p></li>
+<li><p>Accelerating sampling speed 를 목적으로 하는 많은 연구가 이루어져 왔음</p>
+<ul class="simple">
+<li><p>ODE solving: diffusion model 의 큰 틀은 유지한 채 sampling step 의 수를 줄이면서 유사한 수준의 sample 생성 (DDIM, InstaFlow, CFM)</p></li>
+<li><p>하지만 여전히 50 ~ 100 step 이하로 step 을 줄이게 되면 sample quality 가 크게 감소하여 diffusion distillation 을 통한 one-step generation 방법이 연구되어지고 있음</p></li>
+<li><p>Single step distillation: Diffusion model 을 teacher 삼아 one-step generation model 학습</p></li>
+<li><p>직관적으로 생각해보면, diffusion model 을 학습 시킨후 학습된 모델을 통해 다수의 (noise, image) pair 얻은 후 one-step VAE 를 학습시키는 것을 생각 해 볼 수 있음.</p></li>
+<li><p>그러나, 학습된 multi step 으로 학습 된 diffusion model 을 one-step generation model 에 distillation 을 하는 것은 어려움이 있음</p>
+<ul>
+<li><p>Noise level 을 점진적으로 증가 시키며, one-step generation 을 학습시키는 방법</p></li>
+</ul>
+</li>
+</ul>
+</li>
+<li><p>GAN 에서 영감을 받아, (noise, image) 의 대응을 강제하는 대신 (such as autoencoder),  student model 이 생성하는 이미지를 teacher model (original diffusion model) 이 생성하는 이미지와 indistinguishable 하게 학습시키는 distribution matching 전략을 생각해 볼 수 있음</p></li>
+<li><p>Diffusion model 의 score (<span class="math notranslate nohighlight">\(\newcommand{\laplacianx}{\Delta x}\)</span><span class="math notranslate nohighlight">\(\nabla_{\mathbf{x}} \log p(\mathbf{x})\)</span>) 을 사용해서 student model (<span class="math notranslate nohighlight">\(p(x)\)</span>) 을 학습시킬 수 있음 (real image score 가 증가하는 방향으로 pattern 을 업데이트 하는 것이 desired 방향 + fake image 를 생성하는 diffusion model 의 score 을 감소시키는 방향으로 parameter update)</p>
+<ul>
+<li><p>Diffusion model and score base model</p>
+<p>엄밀하게 본다면, 이 논문은 diffusion model (DDPM style) 보다는 score matching 의 철학과 논리전개를 바탕으로 두고 있음. 그러나, diffusion model 과 score based model 은 궁극적으로 같은 objective 를 다른 방식으로 학습하고 있을 뿐이고, 그 score 과 diffusion model 이 예측하는 one-step denoised 분포 (<span class="math notranslate nohighlight">\(\mu_{base}\)</span>) 는 쉽게 변환 가능함.</p>
+<div class="math notranslate nohighlight">
+\[
+        s_{\text{real}}(x_t, t) = - \frac{x_t - \alpha_t \mu_{\text{base}}(x_t, t)}{\sigma_t^2}
+        \]</div>
+<figure class="align-default" id="id2">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/image%202.png"><img alt="NCSN" class="bg-primary mb-1" src="../../_images/image%202.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 576 </span><span class="caption-text">NCSN</span><a class="headerlink" href="#id2" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<figure class="align-default" id="id3">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/langevin.gif"><img alt="Langevin" class="bg-primary mb-1" src="../../_images/langevin.gif" style="width: 300px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 577 </span><span class="caption-text">Langevin sampling of score models</span><a class="headerlink" href="#id3" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</li>
+</ul>
+</li>
+</ul>
+</section>
+</section>
+<section class="tex2jax_ignore mathjax_ignore" id="method">
+<h1>Method<a class="headerlink" href="#method" title="Permalink to this heading">#</a></h1>
+<section id="id1">
+<h2>Overview<a class="headerlink" href="#id1" title="Permalink to this heading">#</a></h2>
+<figure class="align-default" id="id4">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/image%203.png"><img alt="../../_images/image%203.png" class="bg-primary mb-1" src="../../_images/image%203.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 578 </span><span class="caption-text">Overall scheme</span><a class="headerlink" href="#id4" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul>
+<li><p>학습된 diffusion model (real data score function)  이 주어진 상황에서 one-step generator (<span class="math notranslate nohighlight">\(G_{\theta}\)</span>) 를 학습시키기 위해, 두개의 loss 1) distribution matching gradient (엄밀하게는 loss 보다는 parameter update gradient) 2) regression loss 를 사용</p></li>
+<li><p>Adversarial AutoEncoder 가 연상되는 architecture 를 가지고 있음</p>
+<ul>
+<li><p>Adversarial AutoEncoder</p>
+<figure class="align-default" id="id5">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/image%204.png"><img alt="../../_images/image%204.png" class="bg-primary mb-1" src="../../_images/image%204.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 579 </span><span class="caption-text">Adeverserial AE architecture</span><a class="headerlink" href="#id5" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>AAE 는 VAE 가 생성하는 이미지에 대한 1) regression loss 와 2) implicit distribution matching loss 를 가지고 있는데, 여기서 implicit distribution matching 을 teacher diffusion model 의 distribution matching gradient 로 대체한 형태</p>
+</li>
+</ul>
+</li>
+<li><p>총  네 부분으로 나뉘어져 있음</p>
+<ul class="simple">
+<li><p>Paired dataset construction</p></li>
+<li><p>Pretrained base model (= real data score function, freezed)</p></li>
+<li><p>One-step generator (main objective)</p></li>
+<li><p>Fake data generator (= fake data score function, on-line training)</p></li>
+</ul>
+</li>
+</ul>
+</section>
+<section id="distribution-matching-loss">
+<h2>Distribution Matching Loss<a class="headerlink" href="#distribution-matching-loss" title="Permalink to this heading">#</a></h2>
+<p>우선, 생성모델의 training objective를 생각해보면, <span class="math notranslate nohighlight">\(p_\text{fake}\)</span> (one-step generator이 생성하는 분포)  <span class="math notranslate nohighlight">\(p_\text{real}\)</span> (실제 데이터의 분포) 를 matching 시키도록 학습을 시켜야하는 것이 one-step generator의 학습 objective이고 아래와 같이 씌여질 수 있음:</p>
+<div class="math notranslate nohighlight">
+\[\begin{split}
+\begin{align*}
+D_{KL}(p_{\text{fake}} \parallel p_{\text{real}}) &amp;= \mathbb{E}_{x \sim p_{\text{fake}}} \left( \log \frac{p_{\text{fake}}(x)}{p_{\text{real}}(x)} \right) \\
+&amp;= \mathbb{E}_{\substack{z \sim \mathcal{N}(0; I) \\ x = G_{\theta}(z)}} \left( - \log p_{\text{real}}(x) + \log p_{\text{fake}}(x|z)\right)
+\end{align*}
+\end{split}\]</div>
+<p>지만, <span class="math notranslate nohighlight">\(p_\text{real}(x)\)</span> 를 바로 구하는 것이 어려움 (이것이 곧 생성모델의 objective). 그러나, 모델을 학습시키기 위해서는 <span class="math notranslate nohighlight">\(D_{KL}\)</span> 을 직접 구할 필요는 없고, <span class="math notranslate nohighlight">\(D_{KL}\)</span> 을 minimize 하는 (fake 과 real 의 분포사이의 거리를 최소화 시키는 방향으로) parameter update 를 하기 위한 미분값만 알면 충분함. 위 식을 one-step generator 의 learnable paramter (<span class="math notranslate nohighlight">\(\theta\)</span>) 에 대해 미분 해주면,</p>
+<div class="math notranslate nohighlight">
+\[\begin{split}
+\nabla_{\theta} D_{KL} = \mathbb{E}_{\substack{z \sim \mathcal{N}(0; I) \\ x = G_{\theta}(z)}} \left[ - \left( s_{\text{real}}(x) - s_{\text{fake}}(x) \right) \nabla_{\theta} G_{\theta}(z) \right]
+\end{split}\]</div>
+<div class="math notranslate nohighlight">
+\[
+s_{\text{real}}(x) = \nabla_x \log p_{\text{real}}(x), \quad s_{\text{fake}}(x) = \nabla_x \log p_{\text{fake}}(x)
+\]</div>
+<p>이 유도되는데, 여기서 score <span class="math notranslate nohighlight">\(s_{\text{real}}(x)\)</span> 와  <span class="math notranslate nohighlight">\(s_{\text{fake}}(x)\)</span> 를 정확히 알 수 있다면, one-step generator 을 학습시킬 수 있음. 단, score 이 <span class="math notranslate nohighlight">\(x\)</span> 가 존재하는 전체 space 에 대해서 잘 작동하는 score 이여야함 (= Score-SDE).</p>
+<p>이제, 그러면 우리의 objective 는 real score 와 fake score 을 어떻게 구할지가 되는데, <span class="math notranslate nohighlight">\(s_{\text{real}}(x)\)</span> 은 pretrained diffusion model 에서:</p>
+<div class="math notranslate nohighlight">
+\[
+s_{\text{real}}(x_t, t) = - \frac{x_t - \alpha_t \mu_{\text{base}}(x_t, t)}{\sigma_t^2}
+\]</div>
+<p>와 같이 유도됨.  <span class="math notranslate nohighlight">\(s_{\text{fake}}(x)\)</span> 의 경우가 복잡해지는데, <span class="math notranslate nohighlight">\(s_{\text{fake}}(x)\)</span> 는 one-step generator 가 생성하는 이미지의 score function 라서 one-step generator 로 생성 된 이미지가 있어야 해당 이미지를 생성하는 diffusion 모델을 학습시켜서 구할 수 있음.</p>
+<figure class="align-default" id="id6">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/f637f3b3-9e18-48d1-946a-784830e6fb98.png"><img alt="../../_images/f637f3b3-9e18-48d1-946a-784830e6fb98.png" class="bg-primary mb-1" src="../../_images/f637f3b3-9e18-48d1-946a-784830e6fb98.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 580 </span><span class="caption-text">Distribution matching gradient computation</span><a class="headerlink" href="#id6" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>여기서 저자들은 fake data score function (initialized to real data score function) 을 동시에 학습시키는 방법으로 해결</p>
+<div class="math notranslate nohighlight">
+\[
+s_{\text{fake}}(x_t, t) = - \frac{x_t - \alpha_t \mu_{\text{fake}}^{\phi}(x_t, t)}{\sigma_t^2}
+\]</div>
+<div class="math notranslate nohighlight">
+\[
+\mathcal{L}_{\text{denoise}}^{\phi} = \left\lVert \mu_{\text{fake}}^{\phi}(x_t, t) - x_0 \right\rVert_2^2
+\]</div>
+<p>정리하자면, real score 은 real distribution (data distribution) 방향으로 parameter update를 하면서 fake distribution (one-step generation 의 output) 을 real distribution 에 가깝게 일치시키는 역할을 하며, fake score 의 반대방향으로 parameter update 를 하는 것은 fake data generator (one-step generator) 의 반대방향으로 distribution 을 밀어내서 most probable 한 한개의 점으로 모든 fake image 가 collapse 하는것을 방지하는 regularizer 역할을 한다.</p>
+</section>
+<section id="regression-loss">
+<h2>Regression Loss<a class="headerlink" href="#regression-loss" title="Permalink to this heading">#</a></h2>
+<p>그런데, score 만을 사용하여 one-step generator 을 학습시키는 것은 충분하지 않음. 두가지 측면에서 생각 해 볼 수 있는데 1) Practically, 매우 작은 noise level 에서는 score 이 reliable 하지 않아짐 2) Theoretically, <span class="math notranslate nohighlight">\(\nabla_x \log p_(x)\)</span> 는 <span class="math notranslate nohighlight">\(p(x)\)</span> 의 scale 에 영향을 받지 않아, 데이터의 높고 낮음에 대한 정보를 줄 수 없음.</p>
+<p>따라서, real + fake score 로 학습이 진행된다면,  낮은 real score 을 보이는 영역은 커버하지 못하는 부분으로 one-step generation 모델이 수렴하게 될 것 이고, high dimension 에서는 generated image 의 pixel level accuracy 에 문제가 생길 수 있음.</p>
+<p>여기서 저자들은 pixel-wise MSE (regression loss) 를 사용하여 간단히 이 문제를 해결함.</p>
+<figure class="align-default" id="id7">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/fed6a1b4-97d4-4ef7-ab99-6ac3cef4bbbd.png"><img alt="../../_images/fed6a1b4-97d4-4ef7-ab99-6ac3cef4bbbd.png" class="bg-primary mb-1" src="../../_images/fed6a1b4-97d4-4ef7-ab99-6ac3cef4bbbd.png" style="width: 500px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 581 </span><span class="caption-text">Regression loss</span><a class="headerlink" href="#id7" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<figure class="align-default" id="id8">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/image%2011.png"><img alt="../../_images/image%2011.png" class="bg-primary mb-1" src="../../_images/image%2011.png" style="width: 500px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 582 </span><span class="caption-text">The effect of real and fake scores and regression loss</span><a class="headerlink" href="#id8" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul>
+<li><p>그렇다면, Regression loss 하나만으로는 학습이 왜 불가능한가? (개인적 생각)</p>
+<p>이론상 regression loss 만을 사용해도 충분히 one-step generator 을 학습시킬 수 있어 보인다. 그러나, regression 의 근본적 문제점은 distribution to distribution matching 이 아니라는 점이다. e.g.</p>
+</li>
+</ul>
+<p>Regression loss 를 얻기 위해서는 (noise, real image) pair 가 필요하게 되는데, 저자들은 학습된 diffusion model 에서부터 ODE solver 을 사용하여 gaussian noise와 real image 사이에 쌍을 얻어서 데이터셋을 학습 이전에 구축, 해당 pair들을 바탕으로 regression loss 를 구함 (Learned Perceptual Image Patch Similarity).</p>
+<div class="math notranslate nohighlight">
+\[
+\mathcal{L}_{\text{reg}} = \mathbb{E}_{(z,y) \sim \mathcal{D}} \, \ell(G_\theta(z), y)
+\]</div>
+</section>
+<section id="full-algorithm">
+<h2>Full algorithm<a class="headerlink" href="#full-algorithm" title="Permalink to this heading">#</a></h2>
+<figure class="align-default" id="id9">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/image%2012.png"><img alt="../../_images/image%2012.png" class="bg-primary mb-1" src="../../_images/image%2012.png" style="width: 400px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 583 </span><span class="caption-text">Training algorithm</span><a class="headerlink" href="#id9" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</section>
+</section>
+<section class="tex2jax_ignore mathjax_ignore" id="results">
+<h1>Results<a class="headerlink" href="#results" title="Permalink to this heading">#</a></h1>
+<section id="main-comparison">
+<h2>Main comparison<a class="headerlink" href="#main-comparison" title="Permalink to this heading">#</a></h2>
+<figure class="align-default" id="id10">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/image%2013.png"><img alt="../../_images/image%2013.png" class="bg-primary mb-1" src="../../_images/image%2013.png" style="width: 300px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 584 </span><span class="caption-text">Image generation benchmarks</span><a class="headerlink" href="#id10" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</section>
+<section id="ablation-study">
+<h2>Ablation Study<a class="headerlink" href="#ablation-study" title="Permalink to this heading">#</a></h2>
+<figure class="align-default" id="id11">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/image%2014.png"><img alt="../../_images/image%2014.png" class="bg-primary mb-1" src="../../_images/image%2014.png" style="width: 300px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 585 </span><span class="caption-text">Ablation on distribution matching</span><a class="headerlink" href="#id11" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<figure class="align-default" id="id12">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/image%2015.png"><img alt="../../_images/image%2015.png" class="bg-primary mb-1" src="../../_images/image%2015.png" style="width: 500px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 586 </span><span class="caption-text">Ablation on regression loss</span><a class="headerlink" href="#id12" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</section>
+<section id="comparison-with-unaccelerated-models">
+<h2>Comparison with Unaccelerated Models<a class="headerlink" href="#comparison-with-unaccelerated-models" title="Permalink to this heading">#</a></h2>
+<figure class="align-default" id="id13">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/image%2017.png"><img alt="../../_images/image%2017.png" class="bg-primary mb-1" src="../../_images/image%2017.png" style="width: 500px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 587 </span><span class="caption-text">Comparison with Unaccelerated Models</span><a class="headerlink" href="#id13" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</section>
+</section>
+<section class="tex2jax_ignore mathjax_ignore" id="conclusion-and-limitations">
+<h1>Conclusion and Limitations<a class="headerlink" href="#conclusion-and-limitations" title="Permalink to this heading">#</a></h1>
+<ul class="simple">
+<li><p>Score model 을 사용한 distribution matching loss 와 regularizing term 인 regression loss 를 통해, teacher model 에 준하는 성능을 낼 수 있었다</p></li>
+<li><p>One step generator 와 multi-step generation 사이에는 근본적인 성능 tradeoff 가 존재함</p></li>
+<li><p>one-step generator 의 성능은 teacher diffusion model 의 성능에 종속된다</p></li>
+</ul>
+</section>
+
+    <script type="text/x-thebe-config">
+    {
+        requestKernel: true,
+        binderOptions: {
+            repo: "binder-examples/jupyter-stacks-datascience",
+            ref: "master",
+        },
+        codeMirrorConfig: {
+            theme: "abcdef",
+            mode: "python"
+        },
+        kernelOptions: {
+            name: "python3",
+            path: "./docs\review"
+        },
+        predefinedOutput: true
+    }
+    </script>
+    <script>kernelName = 'python3'</script>
+
+                </article>
+              
+
+              
+              
+              
+              
+                <footer class="prev-next-footer d-print-none">
+                  
+<div class="prev-next-area">
+    <a class="left-prev"
+       href="MimicBrush.html"
+       title="previous page">
+      <i class="fa-solid fa-angle-left"></i>
+      <div class="prev-next-info">
+        <p class="prev-next-subtitle">previous</p>
+        <p class="prev-next-title">MimicBrush: Zero-shot Image Editing with Reference Imitation</p>
+      </div>
+    </a>
+    <a class="right-next"
+       href="Make_A_Video.html"
+       title="next page">
+      <div class="prev-next-info">
+        <p class="prev-next-subtitle">next</p>
+        <p class="prev-next-title">Make A Video</p>
+      </div>
+      <i class="fa-solid fa-angle-right"></i>
+    </a>
+</div>
+                </footer>
+              
+            </div>
+            
+            
+              
+                <div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
+
+
+  <div class="sidebar-secondary-item">
+  <div class="page-toc tocsection onthispage">
+    <i class="fa-solid fa-list"></i> Contents
+  </div>
+  <nav class="bd-toc-nav page-toc">
+    <ul class="visible nav section-nav flex-column">
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#">One-step Diffusion with Distribution Matching Distillation</a></li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction-and-preliminaries">Introduction and Preliminaries</a><ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#overview">Overview</a></li>
+</ul>
+</li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#method">Method</a><ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#id1">Overview</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#distribution-matching-loss">Distribution Matching Loss</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#regression-loss">Regression Loss</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#full-algorithm">Full algorithm</a></li>
+</ul>
+</li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#results">Results</a><ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#main-comparison">Main comparison</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#ablation-study">Ablation Study</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#comparison-with-unaccelerated-models">Comparison with Unaccelerated Models</a></li>
+</ul>
+</li>
+<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#conclusion-and-limitations">Conclusion and Limitations</a></li>
+</ul>
+
+  </nav></div>
+
+</div></div>
+              
+            
+          </div>
+          <footer class="bd-footer-content">
+            
+<div class="bd-footer-content__inner container">
+  
+  <div class="footer-item">
+    
+<p class="component-author">
+By PseudoLab
+</p>
+
+  </div>
+  
+  <div class="footer-item">
+    
+
+  <p class="copyright">
+    
+      © Copyright 2022.
+      <br/>
+    
+  </p>
+
+  </div>
+  
+  <div class="footer-item">
+    
+  </div>
+  
+  <div class="footer-item">
+    
+  </div>
+  
+</div>
+          </footer>
+        
+
+      </main>
+    </div>
+  </div>
+  
+  <!-- Scripts loaded after <body> so the DOM is not blocked -->
+  <script src="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b"></script>
+<script src="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b"></script>
+
+  <footer class="bd-footer">
+  </footer>
+  </body>
+</html>
\ No newline at end of file
diff --git a/docs/review/progressive_distillation.html b/docs/review/progressive_distillation.html
old mode 100644
new mode 100755
index 748246bc..c9df83ce
--- a/docs/review/progressive_distillation.html
+++ b/docs/review/progressive_distillation.html
@@ -1,910 +1,930 @@
-
-
-<!DOCTYPE html>
-
-
-<html lang="en" data-content_root="" >
-
-  <head>
-    <meta charset="utf-8" />
-    <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.18.1: http://docutils.sourceforge.net/" />
-
-    <title>Progressive Distillation for Fast Sampling of Diffusion Models &#8212; Text-to-Image Generation-feat-Diffusion</title>
-  
-  
-  
-  <script data-cfasync="false">
-    document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
-    document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
-  </script>
-  
-  <!-- Loaded before other Sphinx assets -->
-  <link href="../../_static/styles/theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-<link href="../../_static/styles/bootstrap.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-<link href="../../_static/styles/pydata-sphinx-theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-
-  
-  <link href="../../_static/vendor/fontawesome/6.5.2/css/all.min.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-  <link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.woff2" />
-<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.woff2" />
-<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.woff2" />
-
-    <link rel="stylesheet" type="text/css" href="../../_static/pygments.css" />
-    <link rel="stylesheet" href="../../_static/styles/sphinx-book-theme.css?digest=14f4ca6b54d191a8c7657f6c759bf11a5fb86285" type="text/css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/togglebutton.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/mystnb.4510f1fc1dee50b3e5859aac5469c37c29e427902b24a333a5f9fcb2f0b3ac41.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/sphinx-thebe.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/design-style.4045f2051d55cab465a707391d5b2007.min.css" />
-  
-  <!-- Pre-loaded scripts that we'll load fully later -->
-  <link rel="preload" as="script" href="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b" />
-<link rel="preload" as="script" href="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b" />
-  <script src="../../_static/vendor/fontawesome/6.5.2/js/all.min.js?digest=dfe6caa3a7d634c4db9b"></script>
-
-    <script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
-    <script src="../../_static/jquery.js"></script>
-    <script src="../../_static/underscore.js"></script>
-    <script src="../../_static/_sphinx_javascript_frameworks_compat.js"></script>
-    <script src="../../_static/doctools.js"></script>
-    <script src="../../_static/clipboard.min.js"></script>
-    <script src="../../_static/copybutton.js"></script>
-    <script src="../../_static/scripts/sphinx-book-theme.js?digest=5a5c038af52cf7bc1a1ec88eea08e6366ee68824"></script>
-    <script>let toggleHintShow = 'Click to show';</script>
-    <script>let toggleHintHide = 'Click to hide';</script>
-    <script>let toggleOpenOnPrint = 'true';</script>
-    <script src="../../_static/togglebutton.js"></script>
-    <script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
-    <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
-    <script src="../../_static/design-tabs.js"></script>
-    <script>const THEBE_JS_URL = "https://unpkg.com/thebe@0.8.2/lib/index.js"
-const thebe_selector = ".thebe,.cell"
-const thebe_selector_input = "pre"
-const thebe_selector_output = ".output, .cell_output"
-</script>
-    <script async="async" src="../../_static/sphinx-thebe.js"></script>
-    <script>window.MathJax = {"options": {"processHtmlClass": "tex2jax_process|mathjax_process|math|output_area"}}</script>
-    <script defer="defer" src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
-    <script>DOCUMENTATION_OPTIONS.pagename = 'docs/review/progressive_distillation';</script>
-    <link rel="shortcut icon" href="../../_static/PseudoLab_logo.png"/>
-    <link rel="index" title="Index" href="../../genindex.html" />
-    <link rel="search" title="Search" href="../../search.html" />
-    <link rel="next" title="ConceptLab" href="ConceptLab.html" />
-    <link rel="prev" title="Your Diffusion Model is Secretly a Zero-Shot Classifier" href="Your_Diffusion_Model_is_Secretly_a_Zero_Shot_Classifier.html" />
-  <meta name="viewport" content="width=device-width, initial-scale=1"/>
-  <meta name="docsearch:language" content="en"/>
-  </head>
-  
-  
-  <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
-
-  
-  
-  <div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
-  
-  <div id="pst-scroll-pixel-helper"></div>
-  
-  <button type="button" class="btn rounded-pill" id="pst-back-to-top">
-    <i class="fa-solid fa-arrow-up"></i>Back to top</button>
-
-  
-  <input type="checkbox"
-          class="sidebar-toggle"
-          id="pst-primary-sidebar-checkbox"/>
-  <label class="overlay overlay-primary" for="pst-primary-sidebar-checkbox"></label>
-  
-  <input type="checkbox"
-          class="sidebar-toggle"
-          id="pst-secondary-sidebar-checkbox"/>
-  <label class="overlay overlay-secondary" for="pst-secondary-sidebar-checkbox"></label>
-  
-  <div class="search-button__wrapper">
-    <div class="search-button__overlay"></div>
-    <div class="search-button__search-container">
-<form class="bd-search d-flex align-items-center"
-      action="../../search.html"
-      method="get">
-  <i class="fa-solid fa-magnifying-glass"></i>
-  <input type="search"
-         class="form-control"
-         name="q"
-         id="search-input"
-         placeholder="Search this book..."
-         aria-label="Search this book..."
-         autocomplete="off"
-         autocorrect="off"
-         autocapitalize="off"
-         spellcheck="false"/>
-  <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
-</form></div>
-  </div>
-
-  <div class="pst-async-banner-revealer d-none">
-  <aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
-</div>
-
-  
-    <header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
-    </header>
-  
-
-  <div class="bd-container">
-    <div class="bd-container__inner bd-page-width">
-      
-      
-      
-      <div class="bd-sidebar-primary bd-sidebar">
-        
-
-  
-  <div class="sidebar-header-items sidebar-primary__section">
-    
-    
-    
-    
-  </div>
-  
-    <div class="sidebar-primary-items__start sidebar-primary__section">
-        <div class="sidebar-primary-item">
-
-  
-    
-  
-
-<a class="navbar-brand logo" href="../../intro.html">
-  
-  
-  
-  
-  
-    
-    
-      
-    
-    
-    <img src="../../_static/PseudoLab_logo.png" class="logo__image only-light" alt="Text-to-Image Generation-feat-Diffusion - Home"/>
-    <script>document.write(`<img src="../../_static/PseudoLab_logo.png" class="logo__image only-dark" alt="Text-to-Image Generation-feat-Diffusion - Home"/>`);</script>
-  
-  
-</a></div>
-        <div class="sidebar-primary-item"><nav class="bd-links" id="bd-docs-nav" aria-label="Main">
-    <div class="bd-toc-item navbar-nav active">
-        
-        <ul class="nav bd-sidenav bd-sidenav__home-link">
-            <li class="toctree-l1">
-                <a class="reference internal" href="../../intro.html">
-                    Welcome to PseudoDiffusers!!
-                </a>
-            </li>
-        </ul>
-        <p aria-level="2" class="caption" role="heading"><span class="caption-text">Preliminary Works</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="vae.html">VAE</a></li>
-<li class="toctree-l1"><a class="reference internal" href="gan.html">GAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DDPM.html">DDPM</a></li>
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="DDIM.html">DDIM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="A_Study_on_the_Evaluation_of_Generative_Models.html">A Study on the Evaluation of Generative Models</a></li>
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Image Generation</span></p>
-<ul class="current nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="cycleGAN.html">CycleGAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="StyleGAN.html">StyleGAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="diffusion_beats_GANs.html">Diffusion Models Beat GANs on Image Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="dalle.html">DALL-E</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DALLE2.html">DALL-E 2</a></li>
-<li class="toctree-l1"><a class="reference internal" href="dreambooth.html">DreamBooth</a></li>
-<li class="toctree-l1"><a class="reference internal" href="ControlNet.html">ControlNet</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Latent_Diffusion_Model.html">Introduction</a></li>
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="Textual_Inversion.html">Textual Inversion</a></li>
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="CustomDiffusion.html">Custom Diffusion</a></li>
-<li class="toctree-l1"><a class="reference internal" href="LoRA.html">LoRA</a></li>
-
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="I-DDPM.html">I-DDPM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="StyO.html">StyO</a></li>
-<li class="toctree-l1"><a class="reference internal" href="imagen.html">Imagen</a></li>
-<li class="toctree-l1"><a class="reference internal" href="imagen_editor.html">Imagen Editor</a></li>
-<li class="toctree-l1"><a class="reference internal" href="SDEdit.html">SDEdit</a></li>
-<li class="toctree-l1"><a class="reference internal" href="SDXL.html">SDXL</a></li>
-<li class="toctree-l1"><a class="reference internal" href="t2i_adapter.html">T2I-Adapter</a></li>
-<li class="toctree-l1"><a class="reference internal" href="IP_Adapter.html">IP-Adapter</a></li>
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="HyperDreamBooth.html">HyperDreamBooth</a></li>
-<li class="toctree-l1"><a class="reference internal" href="CM3leon.html">CM3leon</a></li>
-
-<li class="toctree-l1"><a class="reference internal" href="Synthetic_Data_from_Diffusion_Models_Improves_ImageNet_Classification.html">Synthetic Data from Diffusion Models Improves ImageNet Classification</a></li>
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="GLIDE.html">GLIDE</a></li>
-<li class="toctree-l1"><a class="reference internal" href="BBDM.html">BBDM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Your_Diffusion_Model_is_Secretly_a_Zero_Shot_Classifier.html">Your Diffusion Model is Secretly a Zero-Shot Classifier</a></li>
-<li class="toctree-l1 current active"><a class="current reference internal" href="#">Progressive Distillation for Fast Sampling of Diffusion Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="ConceptLab.html">ConceptLab</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Diffusion_models_already_have_a_Semantic_Latent_Space.html">Diffusion Models already have a Semantic Latent Space</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Muse.html">Muse</a></li>
-
-
-<li class="toctree-l1"><a class="reference internal" href="GIGAGAN.html">Scaling up GANs for Text-to-Image Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="consistency_models.html">Consistency Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="latent_consistency_models.html">Latent Consistency Models</a></li>
-
-<li class="toctree-l1"><a class="reference internal" href="LLM_grounded_Diffusion.html">LLM Grounded Diffusion</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DiT.html">DiT</a></li>
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="one-step-image-translation.html">One-Step Image Translation with Text-to-Image Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="LCM-LoRA.html">LCM-LoRA: A Universal Stable-Diffusion Acceleration Module</a></li>
-
-
-<li class="toctree-l1"><a class="reference internal" href="MimicBrush.html">MimicBrush: Zero-shot Image Editing with Reference Imitation</a></li>
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Video Generation</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="Make_A_Video.html">Make A Video</a></li>
-<li class="toctree-l1"><a class="reference internal" href="VideoLDM.html">VideoLDM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="AnimateDiff.html">AnimateDiff</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Animate_Anyone.html">Animate Anyone</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreaMoving.html">DreaMoving</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreamPose.html">DreamPose: Fashion Image-to-Video Synthesis via Stable Diffusion</a></li>
-
-
-
-
-
-
-
-
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">3D Generation</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="NeRF.html">NeRF : Representing Scenes as Neural Radiance Fields for View Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="3DGS.html">3D Gaussian Splatting for Real-Time Radiance Field Rendering</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Point_E.html">Point-E: A System for Generating 3D Point Clouds from Complex Prompts (Arxiv 2022)</a></li>
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="Shap-E.html">Shap-E</a></li>
-
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="DreamFusion.html"><strong>DreamFusion</strong></a></li>
-<li class="toctree-l1"><a class="reference internal" href="magic-3d.html">Magic3D</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreamBooth3D.html">Dream Booth 3D</a></li>
-
-
-
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Experiments</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="../experiments/js_exp.html">Synthetic Data with Stable Diffusion for Foliar Disease Classification</a></li>
-<li class="toctree-l1"><a class="reference internal" href="../experiments/swjo_exp.html">Training DreamBooth on Naver Webtoon Face Dataset</a></li>
-</ul>
-
-    </div>
-</nav></div>
-    </div>
-  
-  
-  <div class="sidebar-primary-items__end sidebar-primary__section">
-  </div>
-  
-  <div id="rtd-footer-container"></div>
-
-
-      </div>
-      
-      <main id="main-content" class="bd-main" role="main">
-        
-        
-
-<div class="sbt-scroll-pixel-helper"></div>
-
-          <div class="bd-content">
-            <div class="bd-article-container">
-              
-              <div class="bd-header-article d-print-none">
-<div class="header-article-items header-article__inner">
-  
-    <div class="header-article-items__start">
-      
-        <div class="header-article-item"><label class="sidebar-toggle primary-toggle btn btn-sm" for="__primary" title="Toggle primary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
-  <span class="fa-solid fa-bars"></span>
-</label></div>
-      
-    </div>
-  
-  
-    <div class="header-article-items__end">
-      
-        <div class="header-article-item">
-
-<div class="article-header-buttons">
-
-
-
-
-
-<div class="dropdown dropdown-source-buttons">
-  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Source repositories">
-    <i class="fab fa-github"></i>
-  </button>
-  <ul class="dropdown-menu">
-      
-      
-      
-      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion" target="_blank"
-   class="btn btn-sm btn-source-repository-button dropdown-item"
-   title="Source repository"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fab fa-github"></i>
-  </span>
-<span class="btn__text-container">Repository</span>
-</a>
-</li>
-      
-      
-      
-      
-      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion/issues/new?title=Issue%20on%20page%20%2Fdocs/review/progressive_distillation.html&body=Your%20issue%20content%20here." target="_blank"
-   class="btn btn-sm btn-source-issues-button dropdown-item"
-   title="Open an issue"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-lightbulb"></i>
-  </span>
-<span class="btn__text-container">Open issue</span>
-</a>
-</li>
-      
-  </ul>
-</div>
-
-
-
-
-
-
-<div class="dropdown dropdown-download-buttons">
-  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Download this page">
-    <i class="fas fa-download"></i>
-  </button>
-  <ul class="dropdown-menu">
-      
-      
-      
-      <li><a href="../../_sources/docs/review/progressive_distillation.md" target="_blank"
-   class="btn btn-sm btn-download-source-button dropdown-item"
-   title="Download source file"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-file"></i>
-  </span>
-<span class="btn__text-container">.md</span>
-</a>
-</li>
-      
-      
-      
-      
-      <li>
-<button onclick="window.print()"
-  class="btn btn-sm btn-download-pdf-button dropdown-item"
-  title="Print to PDF"
-  data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-file-pdf"></i>
-  </span>
-<span class="btn__text-container">.pdf</span>
-</button>
-</li>
-      
-  </ul>
-</div>
-
-
-
-
-<button onclick="toggleFullScreen()"
-  class="btn btn-sm btn-fullscreen-button"
-  title="Fullscreen mode"
-  data-bs-placement="bottom" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-expand"></i>
-  </span>
-
-</button>
-
-
-
-<script>
-document.write(`
-  <button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light"></i>
-    <i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark"></i>
-    <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"></i>
-  </button>
-`);
-</script>
-
-
-<script>
-document.write(`
-  <button class="btn btn-sm pst-navbar-icon search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <i class="fa-solid fa-magnifying-glass fa-lg"></i>
-  </button>
-`);
-</script>
-<label class="sidebar-toggle secondary-toggle btn btn-sm" for="__secondary"title="Toggle secondary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <span class="fa-solid fa-list"></span>
-</label>
-</div></div>
-      
-    </div>
-  
-</div>
-</div>
-              
-              
-
-<div id="jb-print-docs-body" class="onlyprint">
-    <h1>Progressive Distillation for Fast Sampling of Diffusion Models</h1>
-    <!-- Table of contents -->
-    <div id="print-main-content">
-        <div id="jb-print-toc">
-            
-            <div>
-                <h2> Contents </h2>
-            </div>
-            <nav aria-label="Page">
-                <ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">1. Introduction</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#background-diffusion-model-in-continuous-time">2. Background - Diffusion model in continuous time</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#definition">2.1. Definition</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#objective">2.2. Objective</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#sampling">2.3. Sampling</a><ul class="nav section-nav flex-column">
-<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#ancestral-sampling-ddpm">2.3.1. Ancestral Sampling - DDPM</a></li>
-<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#probability-flow-ode">2.3.2. Probability Flow ODE</a></li>
-</ul>
-</li>
-</ul>
-</li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#progressive-distillation">3. Progressive Distillation</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#diffusion-model-parameterization-and-training-loss">4. Diffusion Model Parameterization and Training Loss</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#experiments">5. Experiments</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#model-parametrization-and-training-loss">5.1. Model Parametrization and Training Loss</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#id1">5.2. Progressive Distillation</a></li>
-</ul>
-</li>
-</ul>
-            </nav>
-        </div>
-    </div>
-</div>
-
-              
-                
-<div id="searchbox"></div>
-                <article class="bd-article">
-                  
-  <div class="admonition-information admonition">
-<p class="admonition-title">Information</p>
-<ul class="simple">
-<li><p><strong>Title:</strong> Progressive Distillation for Fast Sampling of Diffusion Models (ICLR 2022)</p></li>
-<li><p><strong>Reference</strong></p>
-<ul>
-<li><p>Paper: <a class="reference external" href="https://arxiv.org/abs/2202.00512">https://arxiv.org/abs/2202.00512</a></p></li>
-<li><p>Code: <a class="github reference external" href="https://github.com/google-research/google-research/tree/master/diffusion_distillation/diffusion_distillation">google-research/google-research</a></p></li>
-</ul>
-</li>
-<li><p><strong>Author:</strong> Sangwoo Jo</p></li>
-<li><p><strong>Last updated on Nov. 14, 2023</strong></p></li>
-</ul>
-</div>
-<section class="tex2jax_ignore mathjax_ignore" id="progressive-distillation-for-fast-sampling-of-diffusion-models">
-<h1>Progressive Distillation for Fast Sampling of Diffusion Models<a class="headerlink" href="#progressive-distillation-for-fast-sampling-of-diffusion-models" title="Permalink to this heading">#</a></h1>
-<section id="introduction">
-<h2>1. Introduction<a class="headerlink" href="#introduction" title="Permalink to this heading">#</a></h2>
-<p>Diffusion model 이 ImageNet generation task 에서 기존 BigGAN-deep 그리고 VQ-VAE-2 모델보다 FID/CAS score 기준으로 더 좋은 성능을 보여주며 많은 각광을 받고 있습니다. 그러나 sampling 속도가 느리다는 치명적인 단점을 가지고 있습니다.</p>
-<p>이를 해결하기 위해, 논문에서는 Progressive Distillation 기법을 소개하게 됩니다. 간략히 설명하자면 사전학습된 <span class="math notranslate nohighlight">\(N\)</span>-step DDIM 모델을 <span class="math notranslate nohighlight">\(N/2\)</span>-step student 모델에 distillation 하는 과정을 반복하여 최종적으로 4 steps 만으로도 state-of-the-art 모델을 수천번의 sampling steps 를 거쳐 생성한 이미지들과 유사한 모델 성능을 보여준다고 합니다.</p>
-</section>
-<section id="background-diffusion-model-in-continuous-time">
-<h2>2. Background - Diffusion model in continuous time<a class="headerlink" href="#background-diffusion-model-in-continuous-time" title="Permalink to this heading">#</a></h2>
-<section id="definition">
-<h3>2.1. Definition<a class="headerlink" href="#definition" title="Permalink to this heading">#</a></h3>
-<p>Continuous 한 time domain 에서의 diffusion model 을 다음과 같은 요소들로 정의합니다.</p>
-<ul class="simple">
-<li><p>Training data <span class="math notranslate nohighlight">\(x \sim p(x)\)</span></p></li>
-<li><p>Latent variables <span class="math notranslate nohighlight">\(z = \{z_t | t \in [0,1]\}\)</span></p></li>
-</ul>
-<p>여기서 <span class="math notranslate nohighlight">\(z_t\)</span> 는 differentiable 한 noise schedule functions <span class="math notranslate nohighlight">\(\alpha_t, \sigma_t\)</span> 로 값이 정의되고, 이 함수들은 log <em>signal-to-noise-ratio</em> <span class="math notranslate nohighlight">\(\lambda_t = \log[\alpha_t^2/\sigma_t^2]\)</span> 가 monotonically decreasing 하도록 설정됩니다. 그리고 이들을 기반으로 다음과 같은 Markovian forward process 를 정의합니다.</p>
-<figure class="align-default" id="id2">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/progressive_distillation_01.png"><img alt="progressive_distillation_01" class="bg-primary mb-1" src="../../_images/progressive_distillation_01.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 400 </span><span class="caption-text">Markovian Forward Process</span><a class="headerlink" href="#id2" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>where <span class="math notranslate nohighlight">\(0 \leq s &lt; t \leq 1\)</span>  and <span class="math notranslate nohighlight">\(\sigma_{t|s}^2 = (1-e^{\lambda_t - \lambda_s}) \sigma_t^2\)</span></p>
-</section>
-<section id="objective">
-<h3>2.2. Objective<a class="headerlink" href="#objective" title="Permalink to this heading">#</a></h3>
-<p>Diffusion model 의 objective 는 <span class="math notranslate nohighlight">\(\hat{x}_{\theta}(z_t)\)</span> 모델에서 <span class="math notranslate nohighlight">\(z_t \sim q(z_t | x)\)</span> 와 <span class="math notranslate nohighlight">\(\lambda_t\)</span> 를 입력받아 다음과 같이 Mean Squared Error Loss 를 최소화하는 방향으로 원본 이미지 <span class="math notranslate nohighlight">\(x\)</span> 를 예측하는 것입니다. 이때, <span class="math notranslate nohighlight">\(w(\lambda_t)\)</span> 를 <em>weighting function</em> 이라 부릅니다.</p>
-<figure class="align-default" id="id3">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/progressive_distillation_02.png"><img alt="progressive_distillation_02" class="bg-primary mb-1" src="../../_images/progressive_distillation_02.png" style="width: 500px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 401 </span><span class="caption-text">Objective</span><a class="headerlink" href="#id3" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>where <span class="math notranslate nohighlight">\(t \sim U[0,1]\)</span></p>
-</section>
-<section id="sampling">
-<h3>2.3. Sampling<a class="headerlink" href="#sampling" title="Permalink to this heading">#</a></h3>
-<p>Diffusion model 에서 sampling 하는 방식은 다양하게 존재합니다.</p>
-<section id="ancestral-sampling-ddpm">
-<h4>2.3.1. Ancestral Sampling - DDPM<a class="headerlink" href="#ancestral-sampling-ddpm" title="Permalink to this heading">#</a></h4>
-<p>첫번째로는 DDPM 논문에서 소개하는 discrete time ancestral sampling 방식입니다. 위에 소개했던 notation 기준으로 reverse process 를 다음과 같이 수식적으로 표현 가능합니다.</p>
-<div class="math notranslate nohighlight">
-\[
-q(z_s | z_t,x) = N(z_s | \hat{\mu}_{s|t}(z_t,x), \tilde{\sigma}_{s|t}^2I)
-\]</div>
-<figure class="align-default" id="id4">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/progressive_distillation_03.png"><img alt="progressive_distillation_03" class="bg-primary mb-1" src="../../_images/progressive_distillation_03.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 402 </span><span class="caption-text">Reverse Process</span><a class="headerlink" href="#id4" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>이를 기반으로 <span class="math notranslate nohighlight">\(z_1 \sim N(0,I)\)</span> 로부터 다음과 같은 ancestral sampler 를 정의하게 됩니다. 이때, <span class="math notranslate nohighlight">\(\gamma\)</span> 는 sampling 시 얼마나 많은 noise 를 추가할지 설정하는 hyperparameter 입니다.</p>
-<figure class="align-default" id="id5">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/progressive_distillation_04.png"><img alt="progressive_distillation_04" class="bg-primary mb-1" src="../../_images/progressive_distillation_04.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 403 </span><span class="caption-text">Ancestral Sampler</span><a class="headerlink" href="#id5" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</section>
-<section id="probability-flow-ode">
-<h4>2.3.2. Probability Flow ODE<a class="headerlink" href="#probability-flow-ode" title="Permalink to this heading">#</a></h4>
-<p>반면에, Song et al. (2021c) 에서 forward diffusion process 를 SDE 로 표현할 수 있고, 이를 통한 sampling process 를 <em>probabiility flow</em> ODE 로 표현해서 구할 수 있다고 제시합니다.</p>
-<figure class="align-default" id="id6">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/progressive_distillation_05.png"><img alt="progressive_distillation_05" class="bg-primary mb-1" src="../../_images/progressive_distillation_05.png" style="width: 600px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 404 </span><span class="caption-text">Probability flow ODE</span><a class="headerlink" href="#id6" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>이때, <span class="math notranslate nohighlight">\(f(z_t,t) = \frac{d \log \alpha_t}{dt}z_t, g^2(t) = \frac{dσ_t^2}{dt} − 2 \frac{d\log \alpha_t}{dt}\sigma_t^2, \text{and}\)</span>  <span class="math notranslate nohighlight">\(\nabla_z \log \hat{p}_{\theta}(z_t) = \frac{\alpha_t\hat{x}_{\theta}(z_t) -z_t}{\sigma_t^2}\)</span> 로 정의합니다.</p>
-<p>다시 말해 <span class="math notranslate nohighlight">\(z_1 \sim N(0,I)\)</span> 로부터 이미지 <span class="math notranslate nohighlight">\(x\)</span> 를 생성하는 task 를 위와 같이 ODE solver 문제로 해석할 수 있고, Euler rule 이나 Runge-Kutta method 등의 전통적인 ODE integrator 보다 DDIM sampler 를 적용했을때 성능이 가장 좋다고 논문에서 제시합니다. 아래 사진은 다양한 Probabiltity Flow ODE solver 들의 128x128 ImageNet 데이터셋 FID 성능을 비교한 결과입니다.</p>
-<figure class="align-default" id="id7">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/progressive_distillation_06.png"><img alt="progressive_distillation_06" class="bg-primary mb-1" src="../../_images/progressive_distillation_06.png" style="width: 600px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 405 </span><span class="caption-text">FID scores on 128 × 128 ImageNet for various probability flow ODE integrators</span><a class="headerlink" href="#id7" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>참고로 DDIM sampler 를 ODE solver 문제로 해석하면 다음과 같이 표현할 수 있고, 이 수식은 앞으로 자주 보게 될 예정입니다.</p>
-<figure class="align-default" id="id8">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/progressive_distillation_07.png"><img alt="progressive_distillation_07" class="bg-primary mb-1" src="../../_images/progressive_distillation_07.png" style="width: 600px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 406 </span><span class="caption-text">DDIM sampler</span><a class="headerlink" href="#id8" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</section>
-</section>
-</section>
-<section id="progressive-distillation">
-<h2>3. Progressive Distillation<a class="headerlink" href="#progressive-distillation" title="Permalink to this heading">#</a></h2>
-<p>Diffusion model 을 더 효율적으로 sampling 하기 위해 소개한 <em>progressive distillation</em> 기법은 다음과 같은 절차로 진행됩니다.</p>
-<figure class="align-default" id="id9">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/progressive_distillation_08.png"><img alt="progressive_distillation_08" class="bg-primary mb-1" src="../../_images/progressive_distillation_08.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 407 </span><span class="caption-text">Progressive Distillation</span><a class="headerlink" href="#id9" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ol class="arabic simple">
-<li><p>Standard diffusion training 기법으로 Teacher Diffusion Model 학습</p></li>
-<li><p>Student Model 정의 - Teacher Model 로부터 모델 구조 및 parameter 복사</p></li>
-<li><p>Student Model 학습</p>
-<ol class="arabic simple">
-<li><p>이때, original data <span class="math notranslate nohighlight">\(x\)</span> 대신에 <span class="math notranslate nohighlight">\(\tilde{x}\)</span>  를 target 로 student model 을 학습합니다. <span class="math notranslate nohighlight">\(\tilde{x}\)</span> 에 대한 공식은 아래 pseudocode 에 소개되는데, 이는 one-step student sample <span class="math notranslate nohighlight">\(\tilde{z}_{t''}\)</span> 과 two-step teacher sample <span class="math notranslate nohighlight">\(z_{t''}\)</span> 를 일치시키기 위해 나온 공식입니다.</p></li>
-<li><p>2 DDIM steps of teacher model 결과와 1 DDIM step of student model 결과를 일치시키는 것이 핵심입니다. 여기서 <span class="math notranslate nohighlight">\(z_t\)</span>  에서 <span class="math notranslate nohighlight">\(z_{t-1/N}\)</span> 로 넘어가는 과정을 1 DDIM step 라 정의하고, <span class="math notranslate nohighlight">\(N\)</span> 은 총 진행되는 student sampling steps 입니다.</p></li>
-<li><p>기존 denoising model 학습 시, <span class="math notranslate nohighlight">\(x\)</span> 가 <span class="math notranslate nohighlight">\(z_t\)</span> 에 대해 deterministic 하지 않기 때문에 (다른 <span class="math notranslate nohighlight">\(x\)</span> 값들에 대해 동일한 <span class="math notranslate nohighlight">\(z_t\)</span> 생성 가능) 모델은 사실상 <span class="math notranslate nohighlight">\(x\)</span> 가 아닌 weighted average of possible <span class="math notranslate nohighlight">\(x\)</span> values 를 예측하는 모델이라고 합니다. 따라서, <span class="math notranslate nohighlight">\(z_t\)</span>에 대해 deterministic 한 <span class="math notranslate nohighlight">\(\tilde{x}(z_t)\)</span> 를 예측하도록 학습한 student model 은 teacher model 보다 더 sharp 한 prediction 을 할 수 있다고 주장합니다.</p></li>
-</ol>
-</li>
-<li><p>Student Model 이 새로운 Teacher Model 이 되고 sampling steps <span class="math notranslate nohighlight">\(N\)</span> → <span class="math notranslate nohighlight">\(N/2\)</span> 로 줄어드는 이 과정을 계속 반복</p></li>
-</ol>
-<p>이에 대한 pseudocode 도 확인해보겠습니다.</p>
-<ul>
-<li><p><strong>PseudoCode</strong></p>
-<figure class="align-default" id="id10">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/progressive_distillation_09.png"><img alt="progressive_distillation_09" class="bg-primary mb-1" src="../../_images/progressive_distillation_09.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 408 </span><span class="caption-text">Pseudocode for Progresssive Distillation</span><a class="headerlink" href="#id10" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</li>
-</ul>
-</section>
-<section id="diffusion-model-parameterization-and-training-loss">
-<h2>4. Diffusion Model Parameterization and Training Loss<a class="headerlink" href="#diffusion-model-parameterization-and-training-loss" title="Permalink to this heading">#</a></h2>
-<p>이제 denoising model <span class="math notranslate nohighlight">\(\hat{x}_{\theta}\)</span> 와 reconstruction loss weight <span class="math notranslate nohighlight">\(w(\lambda_t)\)</span> 에 대한 설정값에 대해 자세히 알아보겠습니다. 우선, 논문에서는 일반성을 잃지 않고 (without loss of generalization) <em>variance-preserving</em> diffusion process (i.e., <span class="math notranslate nohighlight">\(\alpha_t^2 + \sigma_t^2 = 1\)</span> ) 라는 가정을 하게 됩니다. 더 자세하게는 cosine schedule <span class="math notranslate nohighlight">\(\alpha_t = cos(0.5\pi t)\)</span> 를 사용합니다.</p>
-<p>DDPM 을 비롯한 대다수의 논문에서 이미지 <span class="math notranslate nohighlight">\(x\)</span> 가 아닌 noise <span class="math notranslate nohighlight">\(\epsilon\)</span> 를 예측하는 denoising model <span class="math notranslate nohighlight">\(\hat{\epsilon}_{\theta}(z_t)\)</span> 를 정의합니다. <span class="math notranslate nohighlight">\(\epsilon\)</span>-space 에 정의된 손실함수에 <span class="math notranslate nohighlight">\(\hat{x_{\theta}}(z_t) = \frac{1}{\alpha_t}(z_t - \sigma_t \hat{\epsilon}_{\theta}(z_t))\)</span> 식을 대입해보겠습니다.</p>
-<figure class="align-default" id="id11">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/progressive_distillation_10.png"><img alt="progressive_distillation_10" class="bg-primary mb-1" src="../../_images/progressive_distillation_10.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 409 </span><span class="caption-text">Training loss on <span class="math notranslate nohighlight">\(\epsilon\)</span>-space and <span class="math notranslate nohighlight">\(x\)</span>-space</span><a class="headerlink" href="#id11" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>따라서, 이는 이미지 <span class="math notranslate nohighlight">\(x\)</span> domain 에서 weighted reconstruction loss 를 적용하는 것과 동일하며 이때 weighting function <span class="math notranslate nohighlight">\(w(\lambda_t) = exp(\lambda_t), \lambda_t = \log[\alpha_t^2/\sigma_t^2]\)</span> 로 정의할 수 있습니다. 그러나 이러한 standard training procedure 는 progressive distillation 에 적합하지 않다고 주장합니다.</p>
-<p>Standard diffusion training 기법에서는 다양한 범위 내에서의 signal-to-noise ratio <span class="math notranslate nohighlight">\(\alpha_t^2/\sigma_t^2\)</span> 에서 모델이 학습되지만, distillation 이 진행될수록 이 signal-to-noise ratio 가 감소한다는 단점을 확인하게 됩니다. 더 자세히 설명하자면, <span class="math notranslate nohighlight">\(t\)</span> 가 증가할수록 signal-to-noise-ratio <span class="math notranslate nohighlight">\(\alpha_t^2/\sigma_t^2\)</span> 는 0 에 가까워지게 되고, <span class="math notranslate nohighlight">\(\hat{x_{\theta}}(z_t) = \frac{1}{\alpha_t}(z_t - \sigma_t \hat{\epsilon}_{\theta}(z_t))\)</span> 에서 <span class="math notranslate nohighlight">\(\alpha_t \rightarrow 0\)</span> 이므로 <span class="math notranslate nohighlight">\(\hat{\epsilon}_{\theta}(z_t)\)</span> 에 대한 <span class="math notranslate nohighlight">\(x\)</span>-prediction 변화량이 점차적으로 커지게 됩니다. 이는 여러번의 training step 을 거칠 때 상관없지만, sampling steps 가 줄어들수록 치명적이게 됩니다. 최종적으로 sampling steps=1 일 때까지 progressively distillation 을 적용하면 모델의 입력으로는 단순한 pure noise <span class="math notranslate nohighlight">\(\epsilon\)</span> (i.e., <span class="math notranslate nohighlight">\(\alpha_t = 0, \sigma_t = 1\)</span> ) 이 들어가게 되고, <span class="math notranslate nohighlight">\(\epsilon\)</span>-prediction 과 <span class="math notranslate nohighlight">\(x\)</span>-prediction 의 상관관계가 완전히 사라지게 됩니다. 이는 위 loss function 에서 weighting function <span class="math notranslate nohighlight">\(w(\lambda_t) = 0\)</span> 인 부분에서 확인할 수 있습니다.</p>
-<p>그래서 논문에서는 다음과 같은 세가지 방법으로 stable 한 <span class="math notranslate nohighlight">\(\hat{x}_{\theta}(z_t)\)</span> prediction 을 구할 수 있는 방법들을 제시합니다.</p>
-<figure class="align-default" id="id12">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/progressive_distillation_11.png"><img alt="progressive_distillation_11" class="bg-primary mb-1" src="../../_images/progressive_distillation_11.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 410 </span><span class="caption-text">Different parameterizations</span><a class="headerlink" href="#id12" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>Weighting function <span class="math notranslate nohighlight">\(w(\lambda_t)\)</span> 도 두 가지 방안으로 실험했습니다. 이는 signal-to-noise ratio 가 0 으로 수렴하는 현상을 방지하도록 설정되었다고 합니다.</p>
-<figure class="align-default" id="id13">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/progressive_distillation_12.png"><img alt="progressive_distillation_12" class="bg-primary mb-1" src="../../_images/progressive_distillation_12.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 411 </span><span class="caption-text">Different loss weighting functions</span><a class="headerlink" href="#id13" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<figure class="align-default" id="id14">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/progressive_distillation_13.png"><img alt="progressive_distillation_13" class="bg-primary mb-1" src="../../_images/progressive_distillation_13.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 412 </span><span class="caption-text">Visualization of different loss weighting functions</span><a class="headerlink" href="#id14" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</section>
-<section id="experiments">
-<h2>5. Experiments<a class="headerlink" href="#experiments" title="Permalink to this heading">#</a></h2>
-<p>논문에서 32x32 부터 128x128 까지 다양한 resolution 에서 모델 성능을 확인했습니다. 또한, cosine schedule <span class="math notranslate nohighlight">\(\alpha_t = cos(0.5 \pi t)\)</span> 그리고 DDPM 에서 소개한 U-Net 아키텍쳐를 사용했으며 부가적으로 Nichol &amp; Dhariwal (2021), Song et al. (2021c) 에서 사용된 BigGAN-style up/downsampling 기법을 활용했습니다.</p>
-<section id="model-parametrization-and-training-loss">
-<h3>5.1. Model Parametrization and Training Loss<a class="headerlink" href="#model-parametrization-and-training-loss" title="Permalink to this heading">#</a></h3>
-<p>아래 지표는 unconditional CIFAR-10 데이터셋에 앞써 소개드린 <span class="math notranslate nohighlight">\(\epsilon\)</span>-prediction 외에 다른 세 가지 parametrization 기법들로 original diffusion model 의 FID 와 Inception Score 성능을 확인해본 결과입니다.</p>
-<figure class="align-default" id="id15">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/progressive_distillation_14.png"><img alt="progressive_distillation_14" class="bg-primary mb-1" src="../../_images/progressive_distillation_14.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 413 </span><span class="caption-text">Ablation Study on Parameterizations and Loss Weightings</span><a class="headerlink" href="#id15" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>성능을 비교해본 결과 <span class="math notranslate nohighlight">\(v\)</span>-prediction/<span class="math notranslate nohighlight">\(x\)</span>-prediction 과 Truncated SNR loss function 을 사용했을때 성능이 가장 좋은 부분을 확인할 수 있습니다. 또한, <span class="math notranslate nohighlight">\(\epsilon\)</span>-prediction 과 Truncated SNR loss function 의 조합을 사용하여 학습 시, unstable 한 convergence 를 보이는 현상도 볼 수 있습니다.</p>
-<p>위 실험결과를 바탕으로 progressive distillation 진행시 CIFAR-10 데이터셋에는 <span class="math notranslate nohighlight">\(x\)</span>-prediction, 그 외 데이터셋에서는 <span class="math notranslate nohighlight">\((x,\epsilon)\)</span>-prediction 을 사용했다고 합니다. 더 자세한 hyperparameter setting 은 Appendix E 참조하시면 됩니다.</p>
-</section>
-<section id="id1">
-<h3>5.2. Progressive Distillation<a class="headerlink" href="#id1" title="Permalink to this heading">#</a></h3>
-<p>논문에서 CIFAR-10, 64x64 downsampled ImageNet, 128 × 128 LSUN bedrooms, 그리고 128 × 128 LSUN Church-Outdoor 데이터셋에 progressive distillation 을 적용하여 모델 성능을 측정합니다. CIFAR-10 데이터셋 기준으로 teacher model 로부터 progressive distillation 진행 시 8192 steps 부터 시작하였고 batch size=128 로 설정하였습니다. 그 외 resolution 이 큰 데이터셋에 대해서는 1024 steps 부터 시작하고 batch size=2048 로 실험을 진행했습니다. 또한, 매 iteration 마다 <span class="math notranslate nohighlight">\(10^{-4}\)</span> 에서 <span class="math notranslate nohighlight">\(0\)</span> 으로 learning rate 를 linearly anneal 했다고 합니다.</p>
-<p>FID 성능을 확인해본 결과, 실험을 진행한 모든 4개의 데이터셋에 대해 progressive distillation 을 통해 4-8 sampling steps 만 진행해도 undistilled DDIM 그리고 stochastic sampler 에 준하는 성능을 보여주는 것을 확인할 수 있습니다. 4 sampling steps 까지 progressive distillation 진행하면서 발생하는 computational cost 가 baseline 모델 학습하는 것과 비슷한 부분을 생각했을때 엄청난 장점이라고 생각합니다.</p>
-<figure class="align-default" id="id16">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/progressive_distillation_15.png"><img alt="progressive_distillation_15" class="bg-primary mb-1" src="../../_images/progressive_distillation_15.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 414 </span><span class="caption-text">Comparison between Distilled, DDIM, and Stochastic Sampler</span><a class="headerlink" href="#id16" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>추가적으로 CIFAR-10 데이터셋에서 타 fast sampling method 들과 FID 성능을 비교해본 결과입니다.</p>
-<figure class="align-default" id="id17">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/progressive_distillation_16.png"><img alt="progressive_distillation_16" class="bg-primary mb-1" src="../../_images/progressive_distillation_16.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 415 </span><span class="caption-text">Comparison of fast sampling results</span><a class="headerlink" href="#id17" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>그리고 64x64 ImageNet 데이터셋에 distilled 모델로 생성한 예시 이미지들입니다. 동일한 seed 에 대해서 input noise 로부터 output image 까지 mapping 이 잘되는 부분을 확인할 수 있습니다.</p>
-<figure class="align-default" id="id18">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/progressive_distillation_17.png"><img alt="progressive_distillation_17" class="bg-primary mb-1" src="../../_images/progressive_distillation_17.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 416 </span><span class="caption-text">Random samples from distilled 64 × 64 ImageNet models</span><a class="headerlink" href="#id18" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>마지막으로 distillation scheduling 에 대한 ablation study 도 논문에서 진행했습니다. 첫번째 ablation study 로는 매 distillation iteration 마다 parameter update 횟수를 <span class="math notranslate nohighlight">\(50k\)</span> 에서 <span class="math notranslate nohighlight">\(25k, 10k, 5k\)</span>  로 점차 줄이면서 FID 성능을 비교해보고, 두번째 ablation study 로는 매 distillation iteration 마다 sampling step 을 2배 대신에 4배씩 줄여가면서 student model 을 학습하여 성능을 비교합니다. 그 결과 parameter update 횟수를 현저히 줄임에도 불구하고 FID 성능이 크게 줄지 않는 반면, 각 iteration 마다 sampling step 을 4배씩 줄이는 학습방식으로는 모델 성능이 좋지 못한 부분을 확인할 수 있습니다.</p>
-<figure class="align-default" id="id19">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/progressive_distillation_18.png"><img alt="progressive_distillation_18" class="bg-primary mb-1" src="../../_images/progressive_distillation_18.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 417 </span><span class="caption-text">Ablation study on fast sampling schedule</span><a class="headerlink" href="#id19" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>동일하게 CIFAR-10 외 ImageNet 그리고 LSUN 데이터셋에서 fast sampling schedule 을 적용한 성능 결과도 공유합니다.</p>
-<figure class="align-default" id="id20">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/progressive_distillation_19.png"><img alt="progressive_distillation_18" class="bg-primary mb-1" src="../../_images/progressive_distillation_19.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 418 </span><span class="caption-text">50k updates vs 10k updates on ImageNet/LSUN datasets</span><a class="headerlink" href="#id20" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</section>
-</section>
-</section>
-
-    <script type="text/x-thebe-config">
-    {
-        requestKernel: true,
-        binderOptions: {
-            repo: "binder-examples/jupyter-stacks-datascience",
-            ref: "master",
-        },
-        codeMirrorConfig: {
-            theme: "abcdef",
-            mode: "python"
-        },
-        kernelOptions: {
-            name: "python3",
-            path: "./docs/review"
-        },
-        predefinedOutput: true
-    }
-    </script>
-    <script>kernelName = 'python3'</script>
-
-                </article>
-              
-
-              
-              
-              
-              
-                <footer class="prev-next-footer d-print-none">
-                  
-<div class="prev-next-area">
-    <a class="left-prev"
-       href="Your_Diffusion_Model_is_Secretly_a_Zero_Shot_Classifier.html"
-       title="previous page">
-      <i class="fa-solid fa-angle-left"></i>
-      <div class="prev-next-info">
-        <p class="prev-next-subtitle">previous</p>
-        <p class="prev-next-title">Your Diffusion Model is Secretly a Zero-Shot Classifier</p>
-      </div>
-    </a>
-    <a class="right-next"
-       href="ConceptLab.html"
-       title="next page">
-      <div class="prev-next-info">
-        <p class="prev-next-subtitle">next</p>
-        <p class="prev-next-title">ConceptLab</p>
-      </div>
-      <i class="fa-solid fa-angle-right"></i>
-    </a>
-</div>
-                </footer>
-              
-            </div>
-            
-            
-              
-                <div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
-
-
-  <div class="sidebar-secondary-item">
-  <div class="page-toc tocsection onthispage">
-    <i class="fa-solid fa-list"></i> Contents
-  </div>
-  <nav class="bd-toc-nav page-toc">
-    <ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">1. Introduction</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#background-diffusion-model-in-continuous-time">2. Background - Diffusion model in continuous time</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#definition">2.1. Definition</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#objective">2.2. Objective</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#sampling">2.3. Sampling</a><ul class="nav section-nav flex-column">
-<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#ancestral-sampling-ddpm">2.3.1. Ancestral Sampling - DDPM</a></li>
-<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#probability-flow-ode">2.3.2. Probability Flow ODE</a></li>
-</ul>
-</li>
-</ul>
-</li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#progressive-distillation">3. Progressive Distillation</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#diffusion-model-parameterization-and-training-loss">4. Diffusion Model Parameterization and Training Loss</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#experiments">5. Experiments</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#model-parametrization-and-training-loss">5.1. Model Parametrization and Training Loss</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#id1">5.2. Progressive Distillation</a></li>
-</ul>
-</li>
-</ul>
-  </nav></div>
-
-</div></div>
-              
-            
-          </div>
-          <footer class="bd-footer-content">
-            
-<div class="bd-footer-content__inner container">
-  
-  <div class="footer-item">
-    
-<p class="component-author">
-By PseudoLab
-</p>
-
-  </div>
-  
-  <div class="footer-item">
-    
-
-  <p class="copyright">
-    
-      © Copyright 2022.
-      <br/>
-    
-  </p>
-
-  </div>
-  
-  <div class="footer-item">
-    
-  </div>
-  
-  <div class="footer-item">
-    
-  </div>
-  
-</div>
-          </footer>
-        
-
-      </main>
-    </div>
-  </div>
-  
-  <!-- Scripts loaded after <body> so the DOM is not blocked -->
-  <script src="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b"></script>
-<script src="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b"></script>
-
-  <footer class="bd-footer">
-  </footer>
-  </body>
+
+
+<!DOCTYPE html>
+
+
+<html lang="en" data-content_root="" >
+
+  <head>
+    <meta charset="utf-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.17.1: http://docutils.sourceforge.net/" />
+
+    <title>Progressive Distillation for Fast Sampling of Diffusion Models &#8212; Text-to-Image Generation-feat-Diffusion</title>
+  
+  
+  
+  <script data-cfasync="false">
+    document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
+    document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
+  </script>
+  
+  <!-- Loaded before other Sphinx assets -->
+  <link href="../../_static/styles/theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+<link href="../../_static/styles/bootstrap.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+<link href="../../_static/styles/pydata-sphinx-theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+
+  
+  <link href="../../_static/vendor/fontawesome/6.5.2/css/all.min.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+  <link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.woff2" />
+<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.woff2" />
+<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.woff2" />
+
+    <link rel="stylesheet" type="text/css" href="../../_static/pygments.css" />
+    <link rel="stylesheet" href="../../_static/styles/sphinx-book-theme.css?digest=14f4ca6b54d191a8c7657f6c759bf11a5fb86285" type="text/css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/togglebutton.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/mystnb.4510f1fc1dee50b3e5859aac5469c37c29e427902b24a333a5f9fcb2f0b3ac41.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/sphinx-thebe.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/design-style.4045f2051d55cab465a707391d5b2007.min.css" />
+  
+  <!-- Pre-loaded scripts that we'll load fully later -->
+  <link rel="preload" as="script" href="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b" />
+<link rel="preload" as="script" href="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b" />
+  <script src="../../_static/vendor/fontawesome/6.5.2/js/all.min.js?digest=dfe6caa3a7d634c4db9b"></script>
+
+    <script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
+    <script src="../../_static/jquery.js"></script>
+    <script src="../../_static/underscore.js"></script>
+    <script src="../../_static/_sphinx_javascript_frameworks_compat.js"></script>
+    <script src="../../_static/doctools.js"></script>
+    <script src="../../_static/clipboard.min.js"></script>
+    <script src="../../_static/copybutton.js"></script>
+    <script src="../../_static/scripts/sphinx-book-theme.js?digest=5a5c038af52cf7bc1a1ec88eea08e6366ee68824"></script>
+    <script>let toggleHintShow = 'Click to show';</script>
+    <script>let toggleHintHide = 'Click to hide';</script>
+    <script>let toggleOpenOnPrint = 'true';</script>
+    <script src="../../_static/togglebutton.js"></script>
+    <script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
+    <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
+    <script src="../../_static/design-tabs.js"></script>
+    <script>const THEBE_JS_URL = "https://unpkg.com/thebe@0.8.2/lib/index.js"
+const thebe_selector = ".thebe,.cell"
+const thebe_selector_input = "pre"
+const thebe_selector_output = ".output, .cell_output"
+</script>
+    <script async="async" src="../../_static/sphinx-thebe.js"></script>
+    <script>window.MathJax = {"options": {"processHtmlClass": "tex2jax_process|mathjax_process|math|output_area"}}</script>
+    <script defer="defer" src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
+    <script>DOCUMENTATION_OPTIONS.pagename = 'docs/review/progressive_distillation';</script>
+    <link rel="shortcut icon" href="../../_static/PseudoLab_logo.png"/>
+    <link rel="index" title="Index" href="../../genindex.html" />
+    <link rel="search" title="Search" href="../../search.html" />
+    <link rel="next" title="ConceptLab" href="ConceptLab.html" />
+    <link rel="prev" title="Your Diffusion Model is Secretly a Zero-Shot Classifier" href="Your_Diffusion_Model_is_Secretly_a_Zero_Shot_Classifier.html" />
+  <meta name="viewport" content="width=device-width, initial-scale=1"/>
+  <meta name="docsearch:language" content="en"/>
+  </head>
+  
+  
+  <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
+
+  
+  
+  <div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
+  
+  <div id="pst-scroll-pixel-helper"></div>
+  
+  <button type="button" class="btn rounded-pill" id="pst-back-to-top">
+    <i class="fa-solid fa-arrow-up"></i>Back to top</button>
+
+  
+  <input type="checkbox"
+          class="sidebar-toggle"
+          id="pst-primary-sidebar-checkbox"/>
+  <label class="overlay overlay-primary" for="pst-primary-sidebar-checkbox"></label>
+  
+  <input type="checkbox"
+          class="sidebar-toggle"
+          id="pst-secondary-sidebar-checkbox"/>
+  <label class="overlay overlay-secondary" for="pst-secondary-sidebar-checkbox"></label>
+  
+  <div class="search-button__wrapper">
+    <div class="search-button__overlay"></div>
+    <div class="search-button__search-container">
+<form class="bd-search d-flex align-items-center"
+      action="../../search.html"
+      method="get">
+  <i class="fa-solid fa-magnifying-glass"></i>
+  <input type="search"
+         class="form-control"
+         name="q"
+         id="search-input"
+         placeholder="Search this book..."
+         aria-label="Search this book..."
+         autocomplete="off"
+         autocorrect="off"
+         autocapitalize="off"
+         spellcheck="false"/>
+  <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
+</form></div>
+  </div>
+
+  <div class="pst-async-banner-revealer d-none">
+  <aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
+</div>
+
+  
+    <header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
+    </header>
+  
+
+  <div class="bd-container">
+    <div class="bd-container__inner bd-page-width">
+      
+      
+      
+      <div class="bd-sidebar-primary bd-sidebar">
+        
+
+  
+  <div class="sidebar-header-items sidebar-primary__section">
+    
+    
+    
+    
+  </div>
+  
+    <div class="sidebar-primary-items__start sidebar-primary__section">
+        <div class="sidebar-primary-item">
+
+  
+    
+  
+
+<a class="navbar-brand logo" href="../../intro.html">
+  
+  
+  
+  
+  
+    
+    
+      
+    
+    
+    <img src="../../_static/PseudoLab_logo.png" class="logo__image only-light" alt="Text-to-Image Generation-feat-Diffusion - Home"/>
+    <script>document.write(`<img src="../../_static/PseudoLab_logo.png" class="logo__image only-dark" alt="Text-to-Image Generation-feat-Diffusion - Home"/>`);</script>
+  
+  
+</a></div>
+        <div class="sidebar-primary-item"><nav class="bd-links" id="bd-docs-nav" aria-label="Main">
+    <div class="bd-toc-item navbar-nav active">
+        
+        <ul class="nav bd-sidenav bd-sidenav__home-link">
+            <li class="toctree-l1">
+                <a class="reference internal" href="../../intro.html">
+                    Welcome to PseudoDiffusers!!
+                </a>
+            </li>
+        </ul>
+        <p aria-level="2" class="caption" role="heading"><span class="caption-text">Preliminary Works</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="vae.html">VAE</a></li>
+<li class="toctree-l1"><a class="reference internal" href="gan.html">GAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DDPM.html">DDPM</a></li>
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DDIM.html">DDIM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="A_Study_on_the_Evaluation_of_Generative_Models.html">A Study on the Evaluation of Generative Models</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Image Generation</span></p>
+<ul class="current nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="cycleGAN.html">CycleGAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="StyleGAN.html">StyleGAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="diffusion_beats_GANs.html">Diffusion Models Beat GANs on Image Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="dalle.html">DALL-E</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DALLE2.html">DALL-E 2</a></li>
+<li class="toctree-l1"><a class="reference internal" href="dreambooth.html">DreamBooth</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ControlNet.html">ControlNet</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Latent_Diffusion_Model.html">Introduction</a></li>
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Textual_Inversion.html">Textual Inversion</a></li>
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="CustomDiffusion.html">Custom Diffusion</a></li>
+<li class="toctree-l1"><a class="reference internal" href="LoRA.html">LoRA</a></li>
+
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="I-DDPM.html">I-DDPM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="StyO.html">StyO</a></li>
+<li class="toctree-l1"><a class="reference internal" href="imagen.html">Imagen</a></li>
+<li class="toctree-l1"><a class="reference internal" href="imagen_editor.html">Imagen Editor</a></li>
+<li class="toctree-l1"><a class="reference internal" href="SDEdit.html">SDEdit</a></li>
+<li class="toctree-l1"><a class="reference internal" href="SDXL.html">SDXL</a></li>
+<li class="toctree-l1"><a class="reference internal" href="t2i_adapter.html">T2I-Adapter</a></li>
+<li class="toctree-l1"><a class="reference internal" href="IP_Adapter.html">IP-Adapter</a></li>
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="HyperDreamBooth.html">HyperDreamBooth</a></li>
+<li class="toctree-l1"><a class="reference internal" href="CM3leon.html">CM3leon</a></li>
+
+<li class="toctree-l1"><a class="reference internal" href="Synthetic_Data_from_Diffusion_Models_Improves_ImageNet_Classification.html">Synthetic Data from Diffusion Models Improves ImageNet Classification</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="GLIDE.html">GLIDE</a></li>
+<li class="toctree-l1"><a class="reference internal" href="BBDM.html">BBDM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Your_Diffusion_Model_is_Secretly_a_Zero_Shot_Classifier.html">Your Diffusion Model is Secretly a Zero-Shot Classifier</a></li>
+<li class="toctree-l1 current active"><a class="current reference internal" href="#">Progressive Distillation for Fast Sampling of Diffusion Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ConceptLab.html">ConceptLab</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Diffusion_models_already_have_a_Semantic_Latent_Space.html">Diffusion Models already have a Semantic Latent Space</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Muse.html">Muse</a></li>
+
+
+<li class="toctree-l1"><a class="reference internal" href="GIGAGAN.html">Scaling up GANs for Text-to-Image Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="consistency_models.html">Consistency Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="latent_consistency_models.html">Latent Consistency Models</a></li>
+
+<li class="toctree-l1"><a class="reference internal" href="LLM_grounded_Diffusion.html">LLM Grounded Diffusion</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DiT.html">DiT</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="one-step-image-translation.html">One-Step Image Translation with Text-to-Image Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="LCM-LoRA.html">LCM-LoRA: A Universal Stable-Diffusion Acceleration Module</a></li>
+
+
+<li class="toctree-l1"><a class="reference internal" href="MimicBrush.html">MimicBrush: Zero-shot Image Editing with Reference Imitation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="one_step_diffusion_with_distribution_matching_distillation.html">One-step Diffusion with Distribution Matching Distillation</a></li>
+
+
+
+
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Video Generation</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="Make_A_Video.html">Make A Video</a></li>
+<li class="toctree-l1"><a class="reference internal" href="VideoLDM.html">VideoLDM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="AnimateDiff.html">AnimateDiff</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Animate_Anyone.html">Animate Anyone</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreaMoving.html">DreaMoving</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreamPose.html">DreamPose: Fashion Image-to-Video Synthesis via Stable Diffusion</a></li>
+
+
+
+
+
+
+
+
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">3D Generation</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="NeRF.html">NeRF : Representing Scenes as Neural Radiance Fields for View Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="3DGS.html">3D Gaussian Splatting for Real-Time Radiance Field Rendering</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Point_E.html">Point-E: A System for Generating 3D Point Clouds from Complex Prompts (Arxiv 2022)</a></li>
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Shap-E.html">Shap-E</a></li>
+
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DreamFusion.html"><strong>DreamFusion</strong></a></li>
+<li class="toctree-l1"><a class="reference internal" href="magic-3d.html">Magic3D</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreamBooth3D.html">Dream Booth 3D</a></li>
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="zero123plus.html">Zero123++</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ProlificDreamer.html">ProlificDreamer: High-Fidelity and Diverse Text-to-3D Generation with Variational Score Distillation</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DreamGaussian.html">DreamGaussian</a></li>
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Coin3D.html">Coin3D</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Experiments</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="../experiments/js_exp.html">Synthetic Data with Stable Diffusion for Foliar Disease Classification</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../experiments/swjo_exp.html">Training DreamBooth on Naver Webtoon Face Dataset</a></li>
+</ul>
+
+    </div>
+</nav></div>
+    </div>
+  
+  
+  <div class="sidebar-primary-items__end sidebar-primary__section">
+  </div>
+  
+  <div id="rtd-footer-container"></div>
+
+
+      </div>
+      
+      <main id="main-content" class="bd-main" role="main">
+        
+        
+
+<div class="sbt-scroll-pixel-helper"></div>
+
+          <div class="bd-content">
+            <div class="bd-article-container">
+              
+              <div class="bd-header-article d-print-none">
+<div class="header-article-items header-article__inner">
+  
+    <div class="header-article-items__start">
+      
+        <div class="header-article-item"><label class="sidebar-toggle primary-toggle btn btn-sm" for="__primary" title="Toggle primary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
+  <span class="fa-solid fa-bars"></span>
+</label></div>
+      
+    </div>
+  
+  
+    <div class="header-article-items__end">
+      
+        <div class="header-article-item">
+
+<div class="article-header-buttons">
+
+
+
+
+
+<div class="dropdown dropdown-source-buttons">
+  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Source repositories">
+    <i class="fab fa-github"></i>
+  </button>
+  <ul class="dropdown-menu">
+      
+      
+      
+      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion" target="_blank"
+   class="btn btn-sm btn-source-repository-button dropdown-item"
+   title="Source repository"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fab fa-github"></i>
+  </span>
+<span class="btn__text-container">Repository</span>
+</a>
+</li>
+      
+      
+      
+      
+      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion/issues/new?title=Issue%20on%20page%20%2Fdocs/review/progressive_distillation.html&body=Your%20issue%20content%20here." target="_blank"
+   class="btn btn-sm btn-source-issues-button dropdown-item"
+   title="Open an issue"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-lightbulb"></i>
+  </span>
+<span class="btn__text-container">Open issue</span>
+</a>
+</li>
+      
+  </ul>
+</div>
+
+
+
+
+
+
+<div class="dropdown dropdown-download-buttons">
+  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Download this page">
+    <i class="fas fa-download"></i>
+  </button>
+  <ul class="dropdown-menu">
+      
+      
+      
+      <li><a href="../../_sources/docs/review/progressive_distillation.md" target="_blank"
+   class="btn btn-sm btn-download-source-button dropdown-item"
+   title="Download source file"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-file"></i>
+  </span>
+<span class="btn__text-container">.md</span>
+</a>
+</li>
+      
+      
+      
+      
+      <li>
+<button onclick="window.print()"
+  class="btn btn-sm btn-download-pdf-button dropdown-item"
+  title="Print to PDF"
+  data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-file-pdf"></i>
+  </span>
+<span class="btn__text-container">.pdf</span>
+</button>
+</li>
+      
+  </ul>
+</div>
+
+
+
+
+<button onclick="toggleFullScreen()"
+  class="btn btn-sm btn-fullscreen-button"
+  title="Fullscreen mode"
+  data-bs-placement="bottom" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-expand"></i>
+  </span>
+
+</button>
+
+
+
+<script>
+document.write(`
+  <button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light"></i>
+    <i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark"></i>
+    <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"></i>
+  </button>
+`);
+</script>
+
+
+<script>
+document.write(`
+  <button class="btn btn-sm pst-navbar-icon search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <i class="fa-solid fa-magnifying-glass fa-lg"></i>
+  </button>
+`);
+</script>
+<label class="sidebar-toggle secondary-toggle btn btn-sm" for="__secondary"title="Toggle secondary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <span class="fa-solid fa-list"></span>
+</label>
+</div></div>
+      
+    </div>
+  
+</div>
+</div>
+              
+              
+
+<div id="jb-print-docs-body" class="onlyprint">
+    <h1>Progressive Distillation for Fast Sampling of Diffusion Models</h1>
+    <!-- Table of contents -->
+    <div id="print-main-content">
+        <div id="jb-print-toc">
+            
+            <div>
+                <h2> Contents </h2>
+            </div>
+            <nav aria-label="Page">
+                <ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">1. Introduction</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#background-diffusion-model-in-continuous-time">2. Background - Diffusion model in continuous time</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#definition">2.1. Definition</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#objective">2.2. Objective</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#sampling">2.3. Sampling</a><ul class="nav section-nav flex-column">
+<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#ancestral-sampling-ddpm">2.3.1. Ancestral Sampling - DDPM</a></li>
+<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#probability-flow-ode">2.3.2. Probability Flow ODE</a></li>
+</ul>
+</li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#progressive-distillation">3. Progressive Distillation</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#diffusion-model-parameterization-and-training-loss">4. Diffusion Model Parameterization and Training Loss</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#experiments">5. Experiments</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#model-parametrization-and-training-loss">5.1. Model Parametrization and Training Loss</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#id1">5.2. Progressive Distillation</a></li>
+</ul>
+</li>
+</ul>
+            </nav>
+        </div>
+    </div>
+</div>
+
+              
+                
+<div id="searchbox"></div>
+                <article class="bd-article">
+                  
+  <div class="admonition-information admonition">
+<p class="admonition-title">Information</p>
+<ul class="simple">
+<li><p><strong>Title:</strong> Progressive Distillation for Fast Sampling of Diffusion Models (ICLR 2022)</p></li>
+<li><p><strong>Reference</strong></p>
+<ul>
+<li><p>Paper: <a class="reference external" href="https://arxiv.org/abs/2202.00512">https://arxiv.org/abs/2202.00512</a></p></li>
+<li><p>Code: <a class="github reference external" href="https://github.com/google-research/google-research/tree/master/diffusion_distillation/diffusion_distillation">google-research/google-research</a></p></li>
+</ul>
+</li>
+<li><p><strong>Author:</strong> Sangwoo Jo</p></li>
+<li><p><strong>Last updated on Nov. 14, 2023</strong></p></li>
+</ul>
+</div>
+<section class="tex2jax_ignore mathjax_ignore" id="progressive-distillation-for-fast-sampling-of-diffusion-models">
+<h1>Progressive Distillation for Fast Sampling of Diffusion Models<a class="headerlink" href="#progressive-distillation-for-fast-sampling-of-diffusion-models" title="Permalink to this heading">#</a></h1>
+<section id="introduction">
+<h2>1. Introduction<a class="headerlink" href="#introduction" title="Permalink to this heading">#</a></h2>
+<p>Diffusion model 이 ImageNet generation task 에서 기존 BigGAN-deep 그리고 VQ-VAE-2 모델보다 FID/CAS score 기준으로 더 좋은 성능을 보여주며 많은 각광을 받고 있습니다. 그러나 sampling 속도가 느리다는 치명적인 단점을 가지고 있습니다.</p>
+<p>이를 해결하기 위해, 논문에서는 Progressive Distillation 기법을 소개하게 됩니다. 간략히 설명하자면 사전학습된 <span class="math notranslate nohighlight">\(N\)</span>-step DDIM 모델을 <span class="math notranslate nohighlight">\(N/2\)</span>-step student 모델에 distillation 하는 과정을 반복하여 최종적으로 4 steps 만으로도 state-of-the-art 모델을 수천번의 sampling steps 를 거쳐 생성한 이미지들과 유사한 모델 성능을 보여준다고 합니다.</p>
+</section>
+<section id="background-diffusion-model-in-continuous-time">
+<h2>2. Background - Diffusion model in continuous time<a class="headerlink" href="#background-diffusion-model-in-continuous-time" title="Permalink to this heading">#</a></h2>
+<section id="definition">
+<h3>2.1. Definition<a class="headerlink" href="#definition" title="Permalink to this heading">#</a></h3>
+<p>Continuous 한 time domain 에서의 diffusion model 을 다음과 같은 요소들로 정의합니다.</p>
+<ul class="simple">
+<li><p>Training data <span class="math notranslate nohighlight">\(x \sim p(x)\)</span></p></li>
+<li><p>Latent variables <span class="math notranslate nohighlight">\(z = \{z_t | t \in [0,1]\}\)</span></p></li>
+</ul>
+<p>여기서 <span class="math notranslate nohighlight">\(z_t\)</span> 는 differentiable 한 noise schedule functions <span class="math notranslate nohighlight">\(\alpha_t, \sigma_t\)</span> 로 값이 정의되고, 이 함수들은 log <em>signal-to-noise-ratio</em> <span class="math notranslate nohighlight">\(\lambda_t = \log[\alpha_t^2/\sigma_t^2]\)</span> 가 monotonically decreasing 하도록 설정됩니다. 그리고 이들을 기반으로 다음과 같은 Markovian forward process 를 정의합니다.</p>
+<figure class="align-default" id="id2">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/progressive_distillation_01.png"><img alt="progressive_distillation_01" class="bg-primary mb-1" src="../../_images/progressive_distillation_01.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 400 </span><span class="caption-text">Markovian Forward Process</span><a class="headerlink" href="#id2" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>where <span class="math notranslate nohighlight">\(0 \leq s &lt; t \leq 1\)</span>  and <span class="math notranslate nohighlight">\(\sigma_{t|s}^2 = (1-e^{\lambda_t - \lambda_s}) \sigma_t^2\)</span></p>
+</section>
+<section id="objective">
+<h3>2.2. Objective<a class="headerlink" href="#objective" title="Permalink to this heading">#</a></h3>
+<p>Diffusion model 의 objective 는 <span class="math notranslate nohighlight">\(\hat{x}_{\theta}(z_t)\)</span> 모델에서 <span class="math notranslate nohighlight">\(z_t \sim q(z_t | x)\)</span> 와 <span class="math notranslate nohighlight">\(\lambda_t\)</span> 를 입력받아 다음과 같이 Mean Squared Error Loss 를 최소화하는 방향으로 원본 이미지 <span class="math notranslate nohighlight">\(x\)</span> 를 예측하는 것입니다. 이때, <span class="math notranslate nohighlight">\(w(\lambda_t)\)</span> 를 <em>weighting function</em> 이라 부릅니다.</p>
+<figure class="align-default" id="id3">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/progressive_distillation_02.png"><img alt="progressive_distillation_02" class="bg-primary mb-1" src="../../_images/progressive_distillation_02.png" style="width: 500px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 401 </span><span class="caption-text">Objective</span><a class="headerlink" href="#id3" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>where <span class="math notranslate nohighlight">\(t \sim U[0,1]\)</span></p>
+</section>
+<section id="sampling">
+<h3>2.3. Sampling<a class="headerlink" href="#sampling" title="Permalink to this heading">#</a></h3>
+<p>Diffusion model 에서 sampling 하는 방식은 다양하게 존재합니다.</p>
+<section id="ancestral-sampling-ddpm">
+<h4>2.3.1. Ancestral Sampling - DDPM<a class="headerlink" href="#ancestral-sampling-ddpm" title="Permalink to this heading">#</a></h4>
+<p>첫번째로는 DDPM 논문에서 소개하는 discrete time ancestral sampling 방식입니다. 위에 소개했던 notation 기준으로 reverse process 를 다음과 같이 수식적으로 표현 가능합니다.</p>
+<div class="math notranslate nohighlight">
+\[
+q(z_s | z_t,x) = N(z_s | \hat{\mu}_{s|t}(z_t,x), \tilde{\sigma}_{s|t}^2I)
+\]</div>
+<figure class="align-default" id="id4">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/progressive_distillation_03.png"><img alt="progressive_distillation_03" class="bg-primary mb-1" src="../../_images/progressive_distillation_03.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 402 </span><span class="caption-text">Reverse Process</span><a class="headerlink" href="#id4" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>이를 기반으로 <span class="math notranslate nohighlight">\(z_1 \sim N(0,I)\)</span> 로부터 다음과 같은 ancestral sampler 를 정의하게 됩니다. 이때, <span class="math notranslate nohighlight">\(\gamma\)</span> 는 sampling 시 얼마나 많은 noise 를 추가할지 설정하는 hyperparameter 입니다.</p>
+<figure class="align-default" id="id5">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/progressive_distillation_04.png"><img alt="progressive_distillation_04" class="bg-primary mb-1" src="../../_images/progressive_distillation_04.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 403 </span><span class="caption-text">Ancestral Sampler</span><a class="headerlink" href="#id5" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</section>
+<section id="probability-flow-ode">
+<h4>2.3.2. Probability Flow ODE<a class="headerlink" href="#probability-flow-ode" title="Permalink to this heading">#</a></h4>
+<p>반면에, Song et al. (2021c) 에서 forward diffusion process 를 SDE 로 표현할 수 있고, 이를 통한 sampling process 를 <em>probabiility flow</em> ODE 로 표현해서 구할 수 있다고 제시합니다.</p>
+<figure class="align-default" id="id6">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/progressive_distillation_05.png"><img alt="progressive_distillation_05" class="bg-primary mb-1" src="../../_images/progressive_distillation_05.png" style="width: 600px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 404 </span><span class="caption-text">Probability flow ODE</span><a class="headerlink" href="#id6" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>이때, <span class="math notranslate nohighlight">\(f(z_t,t) = \frac{d \log \alpha_t}{dt}z_t, g^2(t) = \frac{dσ_t^2}{dt} − 2 \frac{d\log \alpha_t}{dt}\sigma_t^2, \text{and}\)</span>  <span class="math notranslate nohighlight">\(\nabla_z \log \hat{p}_{\theta}(z_t) = \frac{\alpha_t\hat{x}_{\theta}(z_t) -z_t}{\sigma_t^2}\)</span> 로 정의합니다.</p>
+<p>다시 말해 <span class="math notranslate nohighlight">\(z_1 \sim N(0,I)\)</span> 로부터 이미지 <span class="math notranslate nohighlight">\(x\)</span> 를 생성하는 task 를 위와 같이 ODE solver 문제로 해석할 수 있고, Euler rule 이나 Runge-Kutta method 등의 전통적인 ODE integrator 보다 DDIM sampler 를 적용했을때 성능이 가장 좋다고 논문에서 제시합니다. 아래 사진은 다양한 Probabiltity Flow ODE solver 들의 128x128 ImageNet 데이터셋 FID 성능을 비교한 결과입니다.</p>
+<figure class="align-default" id="id7">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/progressive_distillation_06.png"><img alt="progressive_distillation_06" class="bg-primary mb-1" src="../../_images/progressive_distillation_06.png" style="width: 600px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 405 </span><span class="caption-text">FID scores on 128 × 128 ImageNet for various probability flow ODE integrators</span><a class="headerlink" href="#id7" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>참고로 DDIM sampler 를 ODE solver 문제로 해석하면 다음과 같이 표현할 수 있고, 이 수식은 앞으로 자주 보게 될 예정입니다.</p>
+<figure class="align-default" id="id8">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/progressive_distillation_07.png"><img alt="progressive_distillation_07" class="bg-primary mb-1" src="../../_images/progressive_distillation_07.png" style="width: 600px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 406 </span><span class="caption-text">DDIM sampler</span><a class="headerlink" href="#id8" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</section>
+</section>
+</section>
+<section id="progressive-distillation">
+<h2>3. Progressive Distillation<a class="headerlink" href="#progressive-distillation" title="Permalink to this heading">#</a></h2>
+<p>Diffusion model 을 더 효율적으로 sampling 하기 위해 소개한 <em>progressive distillation</em> 기법은 다음과 같은 절차로 진행됩니다.</p>
+<figure class="align-default" id="id9">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/progressive_distillation_08.png"><img alt="progressive_distillation_08" class="bg-primary mb-1" src="../../_images/progressive_distillation_08.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 407 </span><span class="caption-text">Progressive Distillation</span><a class="headerlink" href="#id9" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ol class="arabic simple">
+<li><p>Standard diffusion training 기법으로 Teacher Diffusion Model 학습</p></li>
+<li><p>Student Model 정의 - Teacher Model 로부터 모델 구조 및 parameter 복사</p></li>
+<li><p>Student Model 학습</p>
+<ol class="arabic simple">
+<li><p>이때, original data <span class="math notranslate nohighlight">\(x\)</span> 대신에 <span class="math notranslate nohighlight">\(\tilde{x}\)</span>  를 target 로 student model 을 학습합니다. <span class="math notranslate nohighlight">\(\tilde{x}\)</span> 에 대한 공식은 아래 pseudocode 에 소개되는데, 이는 one-step student sample <span class="math notranslate nohighlight">\(\tilde{z}_{t''}\)</span> 과 two-step teacher sample <span class="math notranslate nohighlight">\(z_{t''}\)</span> 를 일치시키기 위해 나온 공식입니다.</p></li>
+<li><p>2 DDIM steps of teacher model 결과와 1 DDIM step of student model 결과를 일치시키는 것이 핵심입니다. 여기서 <span class="math notranslate nohighlight">\(z_t\)</span>  에서 <span class="math notranslate nohighlight">\(z_{t-1/N}\)</span> 로 넘어가는 과정을 1 DDIM step 라 정의하고, <span class="math notranslate nohighlight">\(N\)</span> 은 총 진행되는 student sampling steps 입니다.</p></li>
+<li><p>기존 denoising model 학습 시, <span class="math notranslate nohighlight">\(x\)</span> 가 <span class="math notranslate nohighlight">\(z_t\)</span> 에 대해 deterministic 하지 않기 때문에 (다른 <span class="math notranslate nohighlight">\(x\)</span> 값들에 대해 동일한 <span class="math notranslate nohighlight">\(z_t\)</span> 생성 가능) 모델은 사실상 <span class="math notranslate nohighlight">\(x\)</span> 가 아닌 weighted average of possible <span class="math notranslate nohighlight">\(x\)</span> values 를 예측하는 모델이라고 합니다. 따라서, <span class="math notranslate nohighlight">\(z_t\)</span>에 대해 deterministic 한 <span class="math notranslate nohighlight">\(\tilde{x}(z_t)\)</span> 를 예측하도록 학습한 student model 은 teacher model 보다 더 sharp 한 prediction 을 할 수 있다고 주장합니다.</p></li>
+</ol>
+</li>
+<li><p>Student Model 이 새로운 Teacher Model 이 되고 sampling steps <span class="math notranslate nohighlight">\(N\)</span> → <span class="math notranslate nohighlight">\(N/2\)</span> 로 줄어드는 이 과정을 계속 반복</p></li>
+</ol>
+<p>이에 대한 pseudocode 도 확인해보겠습니다.</p>
+<ul>
+<li><p><strong>PseudoCode</strong></p>
+<figure class="align-default" id="id10">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/progressive_distillation_09.png"><img alt="progressive_distillation_09" class="bg-primary mb-1" src="../../_images/progressive_distillation_09.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 408 </span><span class="caption-text">Pseudocode for Progresssive Distillation</span><a class="headerlink" href="#id10" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</li>
+</ul>
+</section>
+<section id="diffusion-model-parameterization-and-training-loss">
+<h2>4. Diffusion Model Parameterization and Training Loss<a class="headerlink" href="#diffusion-model-parameterization-and-training-loss" title="Permalink to this heading">#</a></h2>
+<p>이제 denoising model <span class="math notranslate nohighlight">\(\hat{x}_{\theta}\)</span> 와 reconstruction loss weight <span class="math notranslate nohighlight">\(w(\lambda_t)\)</span> 에 대한 설정값에 대해 자세히 알아보겠습니다. 우선, 논문에서는 일반성을 잃지 않고 (without loss of generalization) <em>variance-preserving</em> diffusion process (i.e., <span class="math notranslate nohighlight">\(\alpha_t^2 + \sigma_t^2 = 1\)</span> ) 라는 가정을 하게 됩니다. 더 자세하게는 cosine schedule <span class="math notranslate nohighlight">\(\alpha_t = cos(0.5\pi t)\)</span> 를 사용합니다.</p>
+<p>DDPM 을 비롯한 대다수의 논문에서 이미지 <span class="math notranslate nohighlight">\(x\)</span> 가 아닌 noise <span class="math notranslate nohighlight">\(\epsilon\)</span> 를 예측하는 denoising model <span class="math notranslate nohighlight">\(\hat{\epsilon}_{\theta}(z_t)\)</span> 를 정의합니다. <span class="math notranslate nohighlight">\(\epsilon\)</span>-space 에 정의된 손실함수에 <span class="math notranslate nohighlight">\(\hat{x_{\theta}}(z_t) = \frac{1}{\alpha_t}(z_t - \sigma_t \hat{\epsilon}_{\theta}(z_t))\)</span> 식을 대입해보겠습니다.</p>
+<figure class="align-default" id="id11">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/progressive_distillation_10.png"><img alt="progressive_distillation_10" class="bg-primary mb-1" src="../../_images/progressive_distillation_10.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 409 </span><span class="caption-text">Training loss on <span class="math notranslate nohighlight">\(\epsilon\)</span>-space and <span class="math notranslate nohighlight">\(x\)</span>-space</span><a class="headerlink" href="#id11" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>따라서, 이는 이미지 <span class="math notranslate nohighlight">\(x\)</span> domain 에서 weighted reconstruction loss 를 적용하는 것과 동일하며 이때 weighting function <span class="math notranslate nohighlight">\(w(\lambda_t) = exp(\lambda_t), \lambda_t = \log[\alpha_t^2/\sigma_t^2]\)</span> 로 정의할 수 있습니다. 그러나 이러한 standard training procedure 는 progressive distillation 에 적합하지 않다고 주장합니다.</p>
+<p>Standard diffusion training 기법에서는 다양한 범위 내에서의 signal-to-noise ratio <span class="math notranslate nohighlight">\(\alpha_t^2/\sigma_t^2\)</span> 에서 모델이 학습되지만, distillation 이 진행될수록 이 signal-to-noise ratio 가 감소한다는 단점을 확인하게 됩니다. 더 자세히 설명하자면, <span class="math notranslate nohighlight">\(t\)</span> 가 증가할수록 signal-to-noise-ratio <span class="math notranslate nohighlight">\(\alpha_t^2/\sigma_t^2\)</span> 는 0 에 가까워지게 되고, <span class="math notranslate nohighlight">\(\hat{x_{\theta}}(z_t) = \frac{1}{\alpha_t}(z_t - \sigma_t \hat{\epsilon}_{\theta}(z_t))\)</span> 에서 <span class="math notranslate nohighlight">\(\alpha_t \rightarrow 0\)</span> 이므로 <span class="math notranslate nohighlight">\(\hat{\epsilon}_{\theta}(z_t)\)</span> 에 대한 <span class="math notranslate nohighlight">\(x\)</span>-prediction 변화량이 점차적으로 커지게 됩니다. 이는 여러번의 training step 을 거칠 때 상관없지만, sampling steps 가 줄어들수록 치명적이게 됩니다. 최종적으로 sampling steps=1 일 때까지 progressively distillation 을 적용하면 모델의 입력으로는 단순한 pure noise <span class="math notranslate nohighlight">\(\epsilon\)</span> (i.e., <span class="math notranslate nohighlight">\(\alpha_t = 0, \sigma_t = 1\)</span> ) 이 들어가게 되고, <span class="math notranslate nohighlight">\(\epsilon\)</span>-prediction 과 <span class="math notranslate nohighlight">\(x\)</span>-prediction 의 상관관계가 완전히 사라지게 됩니다. 이는 위 loss function 에서 weighting function <span class="math notranslate nohighlight">\(w(\lambda_t) = 0\)</span> 인 부분에서 확인할 수 있습니다.</p>
+<p>그래서 논문에서는 다음과 같은 세가지 방법으로 stable 한 <span class="math notranslate nohighlight">\(\hat{x}_{\theta}(z_t)\)</span> prediction 을 구할 수 있는 방법들을 제시합니다.</p>
+<figure class="align-default" id="id12">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/progressive_distillation_11.png"><img alt="progressive_distillation_11" class="bg-primary mb-1" src="../../_images/progressive_distillation_11.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 410 </span><span class="caption-text">Different parameterizations</span><a class="headerlink" href="#id12" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>Weighting function <span class="math notranslate nohighlight">\(w(\lambda_t)\)</span> 도 두 가지 방안으로 실험했습니다. 이는 signal-to-noise ratio 가 0 으로 수렴하는 현상을 방지하도록 설정되었다고 합니다.</p>
+<figure class="align-default" id="id13">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/progressive_distillation_12.png"><img alt="progressive_distillation_12" class="bg-primary mb-1" src="../../_images/progressive_distillation_12.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 411 </span><span class="caption-text">Different loss weighting functions</span><a class="headerlink" href="#id13" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<figure class="align-default" id="id14">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/progressive_distillation_13.png"><img alt="progressive_distillation_13" class="bg-primary mb-1" src="../../_images/progressive_distillation_13.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 412 </span><span class="caption-text">Visualization of different loss weighting functions</span><a class="headerlink" href="#id14" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</section>
+<section id="experiments">
+<h2>5. Experiments<a class="headerlink" href="#experiments" title="Permalink to this heading">#</a></h2>
+<p>논문에서 32x32 부터 128x128 까지 다양한 resolution 에서 모델 성능을 확인했습니다. 또한, cosine schedule <span class="math notranslate nohighlight">\(\alpha_t = cos(0.5 \pi t)\)</span> 그리고 DDPM 에서 소개한 U-Net 아키텍쳐를 사용했으며 부가적으로 Nichol &amp; Dhariwal (2021), Song et al. (2021c) 에서 사용된 BigGAN-style up/downsampling 기법을 활용했습니다.</p>
+<section id="model-parametrization-and-training-loss">
+<h3>5.1. Model Parametrization and Training Loss<a class="headerlink" href="#model-parametrization-and-training-loss" title="Permalink to this heading">#</a></h3>
+<p>아래 지표는 unconditional CIFAR-10 데이터셋에 앞써 소개드린 <span class="math notranslate nohighlight">\(\epsilon\)</span>-prediction 외에 다른 세 가지 parametrization 기법들로 original diffusion model 의 FID 와 Inception Score 성능을 확인해본 결과입니다.</p>
+<figure class="align-default" id="id15">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/progressive_distillation_14.png"><img alt="progressive_distillation_14" class="bg-primary mb-1" src="../../_images/progressive_distillation_14.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 413 </span><span class="caption-text">Ablation Study on Parameterizations and Loss Weightings</span><a class="headerlink" href="#id15" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>성능을 비교해본 결과 <span class="math notranslate nohighlight">\(v\)</span>-prediction/<span class="math notranslate nohighlight">\(x\)</span>-prediction 과 Truncated SNR loss function 을 사용했을때 성능이 가장 좋은 부분을 확인할 수 있습니다. 또한, <span class="math notranslate nohighlight">\(\epsilon\)</span>-prediction 과 Truncated SNR loss function 의 조합을 사용하여 학습 시, unstable 한 convergence 를 보이는 현상도 볼 수 있습니다.</p>
+<p>위 실험결과를 바탕으로 progressive distillation 진행시 CIFAR-10 데이터셋에는 <span class="math notranslate nohighlight">\(x\)</span>-prediction, 그 외 데이터셋에서는 <span class="math notranslate nohighlight">\((x,\epsilon)\)</span>-prediction 을 사용했다고 합니다. 더 자세한 hyperparameter setting 은 Appendix E 참조하시면 됩니다.</p>
+</section>
+<section id="id1">
+<h3>5.2. Progressive Distillation<a class="headerlink" href="#id1" title="Permalink to this heading">#</a></h3>
+<p>논문에서 CIFAR-10, 64x64 downsampled ImageNet, 128 × 128 LSUN bedrooms, 그리고 128 × 128 LSUN Church-Outdoor 데이터셋에 progressive distillation 을 적용하여 모델 성능을 측정합니다. CIFAR-10 데이터셋 기준으로 teacher model 로부터 progressive distillation 진행 시 8192 steps 부터 시작하였고 batch size=128 로 설정하였습니다. 그 외 resolution 이 큰 데이터셋에 대해서는 1024 steps 부터 시작하고 batch size=2048 로 실험을 진행했습니다. 또한, 매 iteration 마다 <span class="math notranslate nohighlight">\(10^{-4}\)</span> 에서 <span class="math notranslate nohighlight">\(0\)</span> 으로 learning rate 를 linearly anneal 했다고 합니다.</p>
+<p>FID 성능을 확인해본 결과, 실험을 진행한 모든 4개의 데이터셋에 대해 progressive distillation 을 통해 4-8 sampling steps 만 진행해도 undistilled DDIM 그리고 stochastic sampler 에 준하는 성능을 보여주는 것을 확인할 수 있습니다. 4 sampling steps 까지 progressive distillation 진행하면서 발생하는 computational cost 가 baseline 모델 학습하는 것과 비슷한 부분을 생각했을때 엄청난 장점이라고 생각합니다.</p>
+<figure class="align-default" id="id16">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/progressive_distillation_15.png"><img alt="progressive_distillation_15" class="bg-primary mb-1" src="../../_images/progressive_distillation_15.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 414 </span><span class="caption-text">Comparison between Distilled, DDIM, and Stochastic Sampler</span><a class="headerlink" href="#id16" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>추가적으로 CIFAR-10 데이터셋에서 타 fast sampling method 들과 FID 성능을 비교해본 결과입니다.</p>
+<figure class="align-default" id="id17">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/progressive_distillation_16.png"><img alt="progressive_distillation_16" class="bg-primary mb-1" src="../../_images/progressive_distillation_16.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 415 </span><span class="caption-text">Comparison of fast sampling results</span><a class="headerlink" href="#id17" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>그리고 64x64 ImageNet 데이터셋에 distilled 모델로 생성한 예시 이미지들입니다. 동일한 seed 에 대해서 input noise 로부터 output image 까지 mapping 이 잘되는 부분을 확인할 수 있습니다.</p>
+<figure class="align-default" id="id18">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/progressive_distillation_17.png"><img alt="progressive_distillation_17" class="bg-primary mb-1" src="../../_images/progressive_distillation_17.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 416 </span><span class="caption-text">Random samples from distilled 64 × 64 ImageNet models</span><a class="headerlink" href="#id18" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>마지막으로 distillation scheduling 에 대한 ablation study 도 논문에서 진행했습니다. 첫번째 ablation study 로는 매 distillation iteration 마다 parameter update 횟수를 <span class="math notranslate nohighlight">\(50k\)</span> 에서 <span class="math notranslate nohighlight">\(25k, 10k, 5k\)</span>  로 점차 줄이면서 FID 성능을 비교해보고, 두번째 ablation study 로는 매 distillation iteration 마다 sampling step 을 2배 대신에 4배씩 줄여가면서 student model 을 학습하여 성능을 비교합니다. 그 결과 parameter update 횟수를 현저히 줄임에도 불구하고 FID 성능이 크게 줄지 않는 반면, 각 iteration 마다 sampling step 을 4배씩 줄이는 학습방식으로는 모델 성능이 좋지 못한 부분을 확인할 수 있습니다.</p>
+<figure class="align-default" id="id19">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/progressive_distillation_18.png"><img alt="progressive_distillation_18" class="bg-primary mb-1" src="../../_images/progressive_distillation_18.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 417 </span><span class="caption-text">Ablation study on fast sampling schedule</span><a class="headerlink" href="#id19" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>동일하게 CIFAR-10 외 ImageNet 그리고 LSUN 데이터셋에서 fast sampling schedule 을 적용한 성능 결과도 공유합니다.</p>
+<figure class="align-default" id="id20">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/progressive_distillation_19.png"><img alt="progressive_distillation_18" class="bg-primary mb-1" src="../../_images/progressive_distillation_19.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 418 </span><span class="caption-text">50k updates vs 10k updates on ImageNet/LSUN datasets</span><a class="headerlink" href="#id20" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</section>
+</section>
+</section>
+
+    <script type="text/x-thebe-config">
+    {
+        requestKernel: true,
+        binderOptions: {
+            repo: "binder-examples/jupyter-stacks-datascience",
+            ref: "master",
+        },
+        codeMirrorConfig: {
+            theme: "abcdef",
+            mode: "python"
+        },
+        kernelOptions: {
+            name: "python3",
+            path: "./docs\review"
+        },
+        predefinedOutput: true
+    }
+    </script>
+    <script>kernelName = 'python3'</script>
+
+                </article>
+              
+
+              
+              
+              
+              
+                <footer class="prev-next-footer d-print-none">
+                  
+<div class="prev-next-area">
+    <a class="left-prev"
+       href="Your_Diffusion_Model_is_Secretly_a_Zero_Shot_Classifier.html"
+       title="previous page">
+      <i class="fa-solid fa-angle-left"></i>
+      <div class="prev-next-info">
+        <p class="prev-next-subtitle">previous</p>
+        <p class="prev-next-title">Your Diffusion Model is Secretly a Zero-Shot Classifier</p>
+      </div>
+    </a>
+    <a class="right-next"
+       href="ConceptLab.html"
+       title="next page">
+      <div class="prev-next-info">
+        <p class="prev-next-subtitle">next</p>
+        <p class="prev-next-title">ConceptLab</p>
+      </div>
+      <i class="fa-solid fa-angle-right"></i>
+    </a>
+</div>
+                </footer>
+              
+            </div>
+            
+            
+              
+                <div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
+
+
+  <div class="sidebar-secondary-item">
+  <div class="page-toc tocsection onthispage">
+    <i class="fa-solid fa-list"></i> Contents
+  </div>
+  <nav class="bd-toc-nav page-toc">
+    <ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">1. Introduction</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#background-diffusion-model-in-continuous-time">2. Background - Diffusion model in continuous time</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#definition">2.1. Definition</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#objective">2.2. Objective</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#sampling">2.3. Sampling</a><ul class="nav section-nav flex-column">
+<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#ancestral-sampling-ddpm">2.3.1. Ancestral Sampling - DDPM</a></li>
+<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#probability-flow-ode">2.3.2. Probability Flow ODE</a></li>
+</ul>
+</li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#progressive-distillation">3. Progressive Distillation</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#diffusion-model-parameterization-and-training-loss">4. Diffusion Model Parameterization and Training Loss</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#experiments">5. Experiments</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#model-parametrization-and-training-loss">5.1. Model Parametrization and Training Loss</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#id1">5.2. Progressive Distillation</a></li>
+</ul>
+</li>
+</ul>
+  </nav></div>
+
+</div></div>
+              
+            
+          </div>
+          <footer class="bd-footer-content">
+            
+<div class="bd-footer-content__inner container">
+  
+  <div class="footer-item">
+    
+<p class="component-author">
+By PseudoLab
+</p>
+
+  </div>
+  
+  <div class="footer-item">
+    
+
+  <p class="copyright">
+    
+      © Copyright 2022.
+      <br/>
+    
+  </p>
+
+  </div>
+  
+  <div class="footer-item">
+    
+  </div>
+  
+  <div class="footer-item">
+    
+  </div>
+  
+</div>
+          </footer>
+        
+
+      </main>
+    </div>
+  </div>
+  
+  <!-- Scripts loaded after <body> so the DOM is not blocked -->
+  <script src="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b"></script>
+<script src="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b"></script>
+
+  <footer class="bd-footer">
+  </footer>
+  </body>
 </html>
\ No newline at end of file
diff --git a/docs/review/t2i_adapter.html b/docs/review/t2i_adapter.html
old mode 100644
new mode 100755
index 37fac996..ae14aca1
--- a/docs/review/t2i_adapter.html
+++ b/docs/review/t2i_adapter.html
@@ -1,1054 +1,1074 @@
-
-
-<!DOCTYPE html>
-
-
-<html lang="en" data-content_root="" >
-
-  <head>
-    <meta charset="utf-8" />
-    <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.18.1: http://docutils.sourceforge.net/" />
-
-    <title>T2I-Adapter &#8212; Text-to-Image Generation-feat-Diffusion</title>
-  
-  
-  
-  <script data-cfasync="false">
-    document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
-    document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
-  </script>
-  
-  <!-- Loaded before other Sphinx assets -->
-  <link href="../../_static/styles/theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-<link href="../../_static/styles/bootstrap.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-<link href="../../_static/styles/pydata-sphinx-theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-
-  
-  <link href="../../_static/vendor/fontawesome/6.5.2/css/all.min.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-  <link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.woff2" />
-<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.woff2" />
-<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.woff2" />
-
-    <link rel="stylesheet" type="text/css" href="../../_static/pygments.css" />
-    <link rel="stylesheet" href="../../_static/styles/sphinx-book-theme.css?digest=14f4ca6b54d191a8c7657f6c759bf11a5fb86285" type="text/css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/togglebutton.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/mystnb.4510f1fc1dee50b3e5859aac5469c37c29e427902b24a333a5f9fcb2f0b3ac41.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/sphinx-thebe.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/design-style.4045f2051d55cab465a707391d5b2007.min.css" />
-  
-  <!-- Pre-loaded scripts that we'll load fully later -->
-  <link rel="preload" as="script" href="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b" />
-<link rel="preload" as="script" href="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b" />
-  <script src="../../_static/vendor/fontawesome/6.5.2/js/all.min.js?digest=dfe6caa3a7d634c4db9b"></script>
-
-    <script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
-    <script src="../../_static/jquery.js"></script>
-    <script src="../../_static/underscore.js"></script>
-    <script src="../../_static/_sphinx_javascript_frameworks_compat.js"></script>
-    <script src="../../_static/doctools.js"></script>
-    <script src="../../_static/clipboard.min.js"></script>
-    <script src="../../_static/copybutton.js"></script>
-    <script src="../../_static/scripts/sphinx-book-theme.js?digest=5a5c038af52cf7bc1a1ec88eea08e6366ee68824"></script>
-    <script>let toggleHintShow = 'Click to show';</script>
-    <script>let toggleHintHide = 'Click to hide';</script>
-    <script>let toggleOpenOnPrint = 'true';</script>
-    <script src="../../_static/togglebutton.js"></script>
-    <script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
-    <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
-    <script src="../../_static/design-tabs.js"></script>
-    <script>const THEBE_JS_URL = "https://unpkg.com/thebe@0.8.2/lib/index.js"
-const thebe_selector = ".thebe,.cell"
-const thebe_selector_input = "pre"
-const thebe_selector_output = ".output, .cell_output"
-</script>
-    <script async="async" src="../../_static/sphinx-thebe.js"></script>
-    <script>window.MathJax = {"options": {"processHtmlClass": "tex2jax_process|mathjax_process|math|output_area"}}</script>
-    <script defer="defer" src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
-    <script>DOCUMENTATION_OPTIONS.pagename = 'docs/review/t2i_adapter';</script>
-    <link rel="shortcut icon" href="../../_static/PseudoLab_logo.png"/>
-    <link rel="index" title="Index" href="../../genindex.html" />
-    <link rel="search" title="Search" href="../../search.html" />
-    <link rel="next" title="IP-Adapter" href="IP_Adapter.html" />
-    <link rel="prev" title="SDXL" href="SDXL.html" />
-  <meta name="viewport" content="width=device-width, initial-scale=1"/>
-  <meta name="docsearch:language" content="en"/>
-  </head>
-  
-  
-  <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
-
-  
-  
-  <div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
-  
-  <div id="pst-scroll-pixel-helper"></div>
-  
-  <button type="button" class="btn rounded-pill" id="pst-back-to-top">
-    <i class="fa-solid fa-arrow-up"></i>Back to top</button>
-
-  
-  <input type="checkbox"
-          class="sidebar-toggle"
-          id="pst-primary-sidebar-checkbox"/>
-  <label class="overlay overlay-primary" for="pst-primary-sidebar-checkbox"></label>
-  
-  <input type="checkbox"
-          class="sidebar-toggle"
-          id="pst-secondary-sidebar-checkbox"/>
-  <label class="overlay overlay-secondary" for="pst-secondary-sidebar-checkbox"></label>
-  
-  <div class="search-button__wrapper">
-    <div class="search-button__overlay"></div>
-    <div class="search-button__search-container">
-<form class="bd-search d-flex align-items-center"
-      action="../../search.html"
-      method="get">
-  <i class="fa-solid fa-magnifying-glass"></i>
-  <input type="search"
-         class="form-control"
-         name="q"
-         id="search-input"
-         placeholder="Search this book..."
-         aria-label="Search this book..."
-         autocomplete="off"
-         autocorrect="off"
-         autocapitalize="off"
-         spellcheck="false"/>
-  <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
-</form></div>
-  </div>
-
-  <div class="pst-async-banner-revealer d-none">
-  <aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
-</div>
-
-  
-    <header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
-    </header>
-  
-
-  <div class="bd-container">
-    <div class="bd-container__inner bd-page-width">
-      
-      
-      
-      <div class="bd-sidebar-primary bd-sidebar">
-        
-
-  
-  <div class="sidebar-header-items sidebar-primary__section">
-    
-    
-    
-    
-  </div>
-  
-    <div class="sidebar-primary-items__start sidebar-primary__section">
-        <div class="sidebar-primary-item">
-
-  
-    
-  
-
-<a class="navbar-brand logo" href="../../intro.html">
-  
-  
-  
-  
-  
-    
-    
-      
-    
-    
-    <img src="../../_static/PseudoLab_logo.png" class="logo__image only-light" alt="Text-to-Image Generation-feat-Diffusion - Home"/>
-    <script>document.write(`<img src="../../_static/PseudoLab_logo.png" class="logo__image only-dark" alt="Text-to-Image Generation-feat-Diffusion - Home"/>`);</script>
-  
-  
-</a></div>
-        <div class="sidebar-primary-item"><nav class="bd-links" id="bd-docs-nav" aria-label="Main">
-    <div class="bd-toc-item navbar-nav active">
-        
-        <ul class="nav bd-sidenav bd-sidenav__home-link">
-            <li class="toctree-l1">
-                <a class="reference internal" href="../../intro.html">
-                    Welcome to PseudoDiffusers!!
-                </a>
-            </li>
-        </ul>
-        <p aria-level="2" class="caption" role="heading"><span class="caption-text">Preliminary Works</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="vae.html">VAE</a></li>
-<li class="toctree-l1"><a class="reference internal" href="gan.html">GAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DDPM.html">DDPM</a></li>
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="DDIM.html">DDIM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="A_Study_on_the_Evaluation_of_Generative_Models.html">A Study on the Evaluation of Generative Models</a></li>
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Image Generation</span></p>
-<ul class="current nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="cycleGAN.html">CycleGAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="StyleGAN.html">StyleGAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="diffusion_beats_GANs.html">Diffusion Models Beat GANs on Image Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="dalle.html">DALL-E</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DALLE2.html">DALL-E 2</a></li>
-<li class="toctree-l1"><a class="reference internal" href="dreambooth.html">DreamBooth</a></li>
-<li class="toctree-l1"><a class="reference internal" href="ControlNet.html">ControlNet</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Latent_Diffusion_Model.html">Introduction</a></li>
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="Textual_Inversion.html">Textual Inversion</a></li>
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="CustomDiffusion.html">Custom Diffusion</a></li>
-<li class="toctree-l1"><a class="reference internal" href="LoRA.html">LoRA</a></li>
-
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="I-DDPM.html">I-DDPM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="StyO.html">StyO</a></li>
-<li class="toctree-l1"><a class="reference internal" href="imagen.html">Imagen</a></li>
-<li class="toctree-l1"><a class="reference internal" href="imagen_editor.html">Imagen Editor</a></li>
-<li class="toctree-l1"><a class="reference internal" href="SDEdit.html">SDEdit</a></li>
-<li class="toctree-l1"><a class="reference internal" href="SDXL.html">SDXL</a></li>
-<li class="toctree-l1 current active"><a class="current reference internal" href="#">T2I-Adapter</a></li>
-<li class="toctree-l1"><a class="reference internal" href="IP_Adapter.html">IP-Adapter</a></li>
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="HyperDreamBooth.html">HyperDreamBooth</a></li>
-<li class="toctree-l1"><a class="reference internal" href="CM3leon.html">CM3leon</a></li>
-
-<li class="toctree-l1"><a class="reference internal" href="Synthetic_Data_from_Diffusion_Models_Improves_ImageNet_Classification.html">Synthetic Data from Diffusion Models Improves ImageNet Classification</a></li>
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="GLIDE.html">GLIDE</a></li>
-<li class="toctree-l1"><a class="reference internal" href="BBDM.html">BBDM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Your_Diffusion_Model_is_Secretly_a_Zero_Shot_Classifier.html">Your Diffusion Model is Secretly a Zero-Shot Classifier</a></li>
-<li class="toctree-l1"><a class="reference internal" href="progressive_distillation.html">Progressive Distillation for Fast Sampling of Diffusion Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="ConceptLab.html">ConceptLab</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Diffusion_models_already_have_a_Semantic_Latent_Space.html">Diffusion Models already have a Semantic Latent Space</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Muse.html">Muse</a></li>
-
-
-<li class="toctree-l1"><a class="reference internal" href="GIGAGAN.html">Scaling up GANs for Text-to-Image Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="consistency_models.html">Consistency Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="latent_consistency_models.html">Latent Consistency Models</a></li>
-
-<li class="toctree-l1"><a class="reference internal" href="LLM_grounded_Diffusion.html">LLM Grounded Diffusion</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DiT.html">DiT</a></li>
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="one-step-image-translation.html">One-Step Image Translation with Text-to-Image Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="LCM-LoRA.html">LCM-LoRA: A Universal Stable-Diffusion Acceleration Module</a></li>
-
-
-<li class="toctree-l1"><a class="reference internal" href="MimicBrush.html">MimicBrush: Zero-shot Image Editing with Reference Imitation</a></li>
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Video Generation</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="Make_A_Video.html">Make A Video</a></li>
-<li class="toctree-l1"><a class="reference internal" href="VideoLDM.html">VideoLDM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="AnimateDiff.html">AnimateDiff</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Animate_Anyone.html">Animate Anyone</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreaMoving.html">DreaMoving</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreamPose.html">DreamPose: Fashion Image-to-Video Synthesis via Stable Diffusion</a></li>
-
-
-
-
-
-
-
-
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">3D Generation</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="NeRF.html">NeRF : Representing Scenes as Neural Radiance Fields for View Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="3DGS.html">3D Gaussian Splatting for Real-Time Radiance Field Rendering</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Point_E.html">Point-E: A System for Generating 3D Point Clouds from Complex Prompts (Arxiv 2022)</a></li>
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="Shap-E.html">Shap-E</a></li>
-
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="DreamFusion.html"><strong>DreamFusion</strong></a></li>
-<li class="toctree-l1"><a class="reference internal" href="magic-3d.html">Magic3D</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreamBooth3D.html">Dream Booth 3D</a></li>
-
-
-
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Experiments</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="../experiments/js_exp.html">Synthetic Data with Stable Diffusion for Foliar Disease Classification</a></li>
-<li class="toctree-l1"><a class="reference internal" href="../experiments/swjo_exp.html">Training DreamBooth on Naver Webtoon Face Dataset</a></li>
-</ul>
-
-    </div>
-</nav></div>
-    </div>
-  
-  
-  <div class="sidebar-primary-items__end sidebar-primary__section">
-  </div>
-  
-  <div id="rtd-footer-container"></div>
-
-
-      </div>
-      
-      <main id="main-content" class="bd-main" role="main">
-        
-        
-
-<div class="sbt-scroll-pixel-helper"></div>
-
-          <div class="bd-content">
-            <div class="bd-article-container">
-              
-              <div class="bd-header-article d-print-none">
-<div class="header-article-items header-article__inner">
-  
-    <div class="header-article-items__start">
-      
-        <div class="header-article-item"><label class="sidebar-toggle primary-toggle btn btn-sm" for="__primary" title="Toggle primary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
-  <span class="fa-solid fa-bars"></span>
-</label></div>
-      
-    </div>
-  
-  
-    <div class="header-article-items__end">
-      
-        <div class="header-article-item">
-
-<div class="article-header-buttons">
-
-
-
-
-
-<div class="dropdown dropdown-source-buttons">
-  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Source repositories">
-    <i class="fab fa-github"></i>
-  </button>
-  <ul class="dropdown-menu">
-      
-      
-      
-      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion" target="_blank"
-   class="btn btn-sm btn-source-repository-button dropdown-item"
-   title="Source repository"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fab fa-github"></i>
-  </span>
-<span class="btn__text-container">Repository</span>
-</a>
-</li>
-      
-      
-      
-      
-      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion/issues/new?title=Issue%20on%20page%20%2Fdocs/review/t2i_adapter.html&body=Your%20issue%20content%20here." target="_blank"
-   class="btn btn-sm btn-source-issues-button dropdown-item"
-   title="Open an issue"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-lightbulb"></i>
-  </span>
-<span class="btn__text-container">Open issue</span>
-</a>
-</li>
-      
-  </ul>
-</div>
-
-
-
-
-
-
-<div class="dropdown dropdown-download-buttons">
-  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Download this page">
-    <i class="fas fa-download"></i>
-  </button>
-  <ul class="dropdown-menu">
-      
-      
-      
-      <li><a href="../../_sources/docs/review/t2i_adapter.md" target="_blank"
-   class="btn btn-sm btn-download-source-button dropdown-item"
-   title="Download source file"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-file"></i>
-  </span>
-<span class="btn__text-container">.md</span>
-</a>
-</li>
-      
-      
-      
-      
-      <li>
-<button onclick="window.print()"
-  class="btn btn-sm btn-download-pdf-button dropdown-item"
-  title="Print to PDF"
-  data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-file-pdf"></i>
-  </span>
-<span class="btn__text-container">.pdf</span>
-</button>
-</li>
-      
-  </ul>
-</div>
-
-
-
-
-<button onclick="toggleFullScreen()"
-  class="btn btn-sm btn-fullscreen-button"
-  title="Fullscreen mode"
-  data-bs-placement="bottom" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-expand"></i>
-  </span>
-
-</button>
-
-
-
-<script>
-document.write(`
-  <button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light"></i>
-    <i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark"></i>
-    <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"></i>
-  </button>
-`);
-</script>
-
-
-<script>
-document.write(`
-  <button class="btn btn-sm pst-navbar-icon search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <i class="fa-solid fa-magnifying-glass fa-lg"></i>
-  </button>
-`);
-</script>
-<label class="sidebar-toggle secondary-toggle btn btn-sm" for="__secondary"title="Toggle secondary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <span class="fa-solid fa-list"></span>
-</label>
-</div></div>
-      
-    </div>
-  
-</div>
-</div>
-              
-              
-
-<div id="jb-print-docs-body" class="onlyprint">
-    <h1>T2I-Adapter</h1>
-    <!-- Table of contents -->
-    <div id="print-main-content">
-        <div id="jb-print-toc">
-            
-            <div>
-                <h2> Contents </h2>
-            </div>
-            <nav aria-label="Page">
-                <ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">Introduction</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#method">Method</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#preliminary-stable-diffusion">3.1. Preliminary: Stable Diffusion</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#overview-of-t2i-adapter">3.2. Overview of T2I-Adapter</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#adapter-design">3.3. Adapter Design</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#model-optimization">3.4. Model Optimization</a></li>
-</ul>
-</li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#experiment">Experiment</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#implementation-details">4.1. Implementation Details</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#comparison">4.2. Comparison</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#applications">4.3. Applications</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#ablation-study">4.4. Ablation Study</a></li>
-</ul>
-</li>
-</ul>
-            </nav>
-        </div>
-    </div>
-</div>
-
-              
-                
-<div id="searchbox"></div>
-                <article class="bd-article">
-                  
-  <div class="admonition-information admonition">
-<p class="admonition-title">Information</p>
-<ul class="simple">
-<li><p><strong>Title:</strong> T2I-Adapter: Learning Adapters to Dig out More Controllable Ability for Text-to-Image Diffusion Models</p></li>
-<li><p><strong>Reference</strong></p>
-<ul>
-<li><p>Paper: <a class="reference external" href="https://arxiv.org/abs/2302.08453">https://arxiv.org/abs/2302.08453</a></p></li>
-<li><p>Code: <a class="github reference external" href="https://github.com/huggingface/diffusers/tree/main/src/diffusers/pipelines/t2i_adapter">huggingface/diffusers</a></p></li>
-</ul>
-</li>
-<li><p><strong>Author:</strong> Sangwoo Jo</p></li>
-<li><p><strong>Last updated on Oct. 03, 2023</strong></p></li>
-</ul>
-</div>
-<section class="tex2jax_ignore mathjax_ignore" id="t2i-adapter">
-<h1>T2I-Adapter<a class="headerlink" href="#t2i-adapter" title="Permalink to this heading">#</a></h1>
-<section id="introduction">
-<h2>Introduction<a class="headerlink" href="#introduction" title="Permalink to this heading">#</a></h2>
-<p>이번 시간에는 Tencent ARC Lab 에서 소개하는 T2I-Adapter 모델에 대해 알아볼 예정입니다.</p>
-<p>Stable Diffusion 을 비롯한 기존의 T2I 모델들이 난해한 prompt (e.g., “A car with flying wings” &amp; “Iron Man with bunny ears”) 을 입력받을 시, 생성되는 이미지 퀄리티가 저하되는 부분을 확인할 수 있는데요. 논문에서는 T2I 모델이 low level (e.g., textures), middle level (e.g., edges), 그리고 high level (e.g., semantics) 에 대한 정보들을 implicit 하게 가지고 있지만, 이를 표현하기 위해서는 text prompt 만으로는 한계가 있고 보다 세밀한 controlling (e.g., color, structure) 이 필요하다고 서술합니다. 즉, T2I 모델의 internal knowledge 와 external guidance 의 alignment 에 대한 추가적인 학습이 필요하다고 주장합니다.</p>
-<figure class="align-default" id="id1">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/t2i_adapter_01.png"><img alt="t2i_adapter_01" class="bg-primary mb-1" src="../../_images/t2i_adapter_01.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 250 </span><span class="caption-text">Effect of External Guidance</span><a class="headerlink" href="#id1" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>논문에서는 이를 해결하기 위해 T2I-Adapter 모델을 소개하고 다음과 같이 5가지 장점이 있다고 합니다.</p>
-<figure class="align-default" id="id2">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/t2i_adapter_02.png"><img alt="t2i_adapter_02" class="bg-primary mb-1" src="../../_images/t2i_adapter_02.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 251 </span><span class="caption-text">Various Guidance of T2I-Adapter</span><a class="headerlink" href="#id2" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul>
-<li><p><em>Plug-and-play</em> : 기존의 T2I 모델의 generalization ability 유지</p></li>
-<li><p><em>Simple and small</em> : ~77M parameters and ~300M storage</p>
-<figure class="align-default" id="id3">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/t2i_adapter_03.png"><img alt="t2i_adapter_03" class="bg-primary mb-1" src="../../_images/t2i_adapter_03.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 252 </span><span class="caption-text">ControlNet vs T2I-Adapter</span><a class="headerlink" href="#id3" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p>ControlNet 같은 경우에 reverse diffusion process 에서 ControlNet 과 Unet 모두 연산작업이 실행됩니다. 이때 ControlNet 은 Unet Encoder 의 구조를 그대로 가져오기 때문에 parameter size 및 storage 용량이 크고, 이는 이미지 생성하는데 큰 bottleneck 이 됩니다.</p></li>
-</ul>
-</li>
-<li><p><em>Flexible</em> : 다양한 adapter (e.g., color, structure) 학습 가능</p></li>
-<li><p><em>Composable</em> : Multiple adapter 적용 가능</p></li>
-<li><p><em>Generalizable</em> : 동일한 구조를 가진 다른 T2I 모델에 동일한 adapter 적용 가능</p></li>
-</ul>
-</section>
-<section id="method">
-<h2>Method<a class="headerlink" href="#method" title="Permalink to this heading">#</a></h2>
-<section id="preliminary-stable-diffusion">
-<h3>3.1. Preliminary: Stable Diffusion<a class="headerlink" href="#preliminary-stable-diffusion" title="Permalink to this heading">#</a></h3>
-<p>T2I-Adapter 의 기반이 되는 T2I 모델 Stable Diffusion 모델은 기본적으로 two-stage model 이고, autoencoder 와 Unet denoiser 로 구성되어 있습니다. Autoencoder 를 통해 이미지를 latent space 로 바꾸고 다시 복원하는 역할을 하고, Unet denoiser 는 diffusion process 를 통해 다음과 같은 손실함수를 최소화하는 방향으로 학습하게 됩니다.</p>
-<div class="math notranslate nohighlight">
-\[
-L = \mathbb{E}_{Z_{t}, C, \epsilon, t}(||\epsilon-\epsilon_{\theta}(Z_t, C)||_2^2)
-\]</div>
-<ul class="simple">
-<li><p><span class="math notranslate nohighlight">\(Z_t = \sqrt{\bar{\alpha}_t}Z_0 + \sqrt{1-\bar{\alpha}_t}\epsilon, \epsilon \sim N(0,I)\)</span> := noised feature map at step t</p></li>
-<li><p><span class="math notranslate nohighlight">\(C\)</span> := conditional information</p></li>
-<li><p><span class="math notranslate nohighlight">\(\epsilon_{\theta}\)</span> := UNet denoiser</p></li>
-</ul>
-<p>Inference 시에는 random Gaussian distribution 을 따르는 <span class="math notranslate nohighlight">\(Z_T\)</span>, 그리고 text prompt 를 CLIP text encoder 에 입력함으로써 생성한 token <span class="math notranslate nohighlight">\(y\)</span> 를 cross attention 을 통해 Unet denoiser <span class="math notranslate nohighlight">\(\epsilon_{\theta}\)</span> 에 입력합니다. 최종적으로, diffusion process 로부터 생성된 denoise 된 latent feature 를 decoder 를 통해 최종 이미지를 생성하게 됩니다. 자세한 cross attention 하는 방식은 다음과 같습니다.</p>
-<figure class="align-default" id="id4">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/t2i_adapter_04.png"><img alt="t2i_adapter_04" class="bg-primary mb-1" src="../../_images/t2i_adapter_04.png" style="width: 550px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 253 </span><span class="caption-text">Cross Attention</span><a class="headerlink" href="#id4" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<ul class="simple">
-<li><p><span class="math notranslate nohighlight">\(W_Q, W_K, W_V\)</span> := learnable projection matrices</p></li>
-<li><p><span class="math notranslate nohighlight">\(\phi(\cdot), \tau(\cdot)\)</span> := learnable embeddings</p></li>
-</ul>
-</section>
-<section id="overview-of-t2i-adapter">
-<h3>3.2. Overview of T2I-Adapter<a class="headerlink" href="#overview-of-t2i-adapter" title="Permalink to this heading">#</a></h3>
-<p>논문에서는 다음과 같은 형태로 pre-trained 된 Stable Diffusion 을 비롯한 T2I 모델에 Adapter 를 추가하는 방식을 소개합니다. Adapter 의 자세한 구조는 다음과 같습니다.</p>
-<figure class="align-default" id="id5">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/t2i_adapter_05.png"><img alt="t2i_adapter_05" class="bg-primary mb-1" src="../../_images/t2i_adapter_05.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 254 </span><span class="caption-text">Overview of T2I-Adapter</span><a class="headerlink" href="#id5" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</section>
-<section id="adapter-design">
-<h3>3.3. Adapter Design<a class="headerlink" href="#adapter-design" title="Permalink to this heading">#</a></h3>
-<figure class="align-default" id="id6">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/t2i_adapter_06.png"><img alt="t2i_adapter_06" class="bg-primary mb-1" src="../../_images/t2i_adapter_06.png" style="width: 600px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 255 </span><span class="caption-text">Adapter Design</span><a class="headerlink" href="#id6" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>Conditional input 은 512x512 의 크기를 가지며, 이는 <em>pixel unshuffle downsampling</em> 을 통해 64x64 이미지로 변환이 되어 1개의 convolution layer 와 2개의 residual block 으로 구성된 <em>scale</em> 을 4번 통과하게 됩니다. 이때, 각 <em>scale</em> 을 거치고 나온 condition feature 를 <span class="math notranslate nohighlight">\(F_c^k\)</span> 라 정의합니다.</p>
-<p>최종적으로 multi-scale condition feature <span class="math notranslate nohighlight">\(F_c = \{F_c^1, F_c^2, F_c^3, F_c^4\}\)</span> 가 생성되고, 이는 Unet encoder 에서의 intermediate feature <span class="math notranslate nohighlight">\(F_{enc} = \{F_{enc}^1, F_{enc}^2, F_{enc}^3, F_{enc}^4\}\)</span> 와 더해지게 됩니다. 이때, dimension 크기는 동일하도록 설정했기 때문에 덧셈 연산하는데 문제 없습니다.</p>
-<figure class="align-default" id="id7">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/t2i_adapter_07.png"><img alt="t2i_adapter_07" class="bg-primary mb-1" src="../../_images/t2i_adapter_07.png" style="width: 500px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 256 </span><span class="caption-text">Multi-Scale Condition Feature</span><a class="headerlink" href="#id7" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>해당 implementation code 도 살펴보겠습니다.</p>
-<ul>
-<li><p><strong>T2I-Adapter module code</strong></p>
-<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="k">class</span> <span class="nc">FullAdapter</span><span class="p">(</span><span class="n">nn</span><span class="o">.</span><span class="n">Module</span><span class="p">):</span>
-    <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span>
-        <span class="bp">self</span><span class="p">,</span>
-        <span class="n">in_channels</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">3</span><span class="p">,</span>
-        <span class="n">channels</span><span class="p">:</span> <span class="n">List</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="p">[</span><span class="mi">320</span><span class="p">,</span> <span class="mi">640</span><span class="p">,</span> <span class="mi">1280</span><span class="p">,</span> <span class="mi">1280</span><span class="p">],</span>
-        <span class="n">num_res_blocks</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">2</span><span class="p">,</span>
-        <span class="n">downscale_factor</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">8</span><span class="p">,</span>
-    <span class="p">):</span>
-        <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span>
-
-        <span class="n">in_channels</span> <span class="o">=</span> <span class="n">in_channels</span> <span class="o">*</span> <span class="n">downscale_factor</span><span class="o">**</span><span class="mi">2</span>
-
-        <span class="bp">self</span><span class="o">.</span><span class="n">unshuffle</span> <span class="o">=</span> <span class="n">nn</span><span class="o">.</span><span class="n">PixelUnshuffle</span><span class="p">(</span><span class="n">downscale_factor</span><span class="p">)</span>
-        <span class="bp">self</span><span class="o">.</span><span class="n">conv_in</span> <span class="o">=</span> <span class="n">nn</span><span class="o">.</span><span class="n">Conv2d</span><span class="p">(</span><span class="n">in_channels</span><span class="p">,</span> <span class="n">channels</span><span class="p">[</span><span class="mi">0</span><span class="p">],</span> <span class="n">kernel_size</span><span class="o">=</span><span class="mi">3</span><span class="p">,</span> <span class="n">padding</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
-
-        <span class="bp">self</span><span class="o">.</span><span class="n">body</span> <span class="o">=</span> <span class="n">nn</span><span class="o">.</span><span class="n">ModuleList</span><span class="p">(</span>
-            <span class="p">[</span>
-                <span class="n">AdapterBlock</span><span class="p">(</span><span class="n">channels</span><span class="p">[</span><span class="mi">0</span><span class="p">],</span> <span class="n">channels</span><span class="p">[</span><span class="mi">0</span><span class="p">],</span> <span class="n">num_res_blocks</span><span class="p">),</span>
-                <span class="o">*</span><span class="p">[</span>
-                    <span class="n">AdapterBlock</span><span class="p">(</span><span class="n">channels</span><span class="p">[</span><span class="n">i</span> <span class="o">-</span> <span class="mi">1</span><span class="p">],</span> <span class="n">channels</span><span class="p">[</span><span class="n">i</span><span class="p">],</span> <span class="n">num_res_blocks</span><span class="p">,</span> <span class="n">down</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
-                    <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="nb">len</span><span class="p">(</span><span class="n">channels</span><span class="p">))</span>
-                <span class="p">],</span>
-            <span class="p">]</span>
-        <span class="p">)</span>
-
-        <span class="bp">self</span><span class="o">.</span><span class="n">total_downscale_factor</span> <span class="o">=</span> <span class="n">downscale_factor</span> <span class="o">*</span> <span class="mi">2</span> <span class="o">**</span> <span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">channels</span><span class="p">)</span> <span class="o">-</span> <span class="mi">1</span><span class="p">)</span>
-
-    <span class="k">def</span> <span class="nf">forward</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">x</span><span class="p">:</span> <span class="n">torch</span><span class="o">.</span><span class="n">Tensor</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">List</span><span class="p">[</span><span class="n">torch</span><span class="o">.</span><span class="n">Tensor</span><span class="p">]:</span>
-        <span class="n">x</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">unshuffle</span><span class="p">(</span><span class="n">x</span><span class="p">)</span>
-        <span class="n">x</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">conv_in</span><span class="p">(</span><span class="n">x</span><span class="p">)</span>
-
-        <span class="n">features</span> <span class="o">=</span> <span class="p">[]</span>
-
-        <span class="k">for</span> <span class="n">block</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">body</span><span class="p">:</span>
-            <span class="n">x</span> <span class="o">=</span> <span class="n">block</span><span class="p">(</span><span class="n">x</span><span class="p">)</span>
-            <span class="n">features</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">x</span><span class="p">)</span>
-
-        <span class="k">return</span> <span class="n">features</span>
-</pre></div>
-</div>
-<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="k">class</span> <span class="nc">AdapterBlock</span><span class="p">(</span><span class="n">nn</span><span class="o">.</span><span class="n">Module</span><span class="p">):</span>
-    <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">in_channels</span><span class="p">,</span> <span class="n">out_channels</span><span class="p">,</span> <span class="n">num_res_blocks</span><span class="p">,</span> <span class="n">down</span><span class="o">=</span><span class="kc">False</span><span class="p">):</span>
-        <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span>
-
-        <span class="bp">self</span><span class="o">.</span><span class="n">downsample</span> <span class="o">=</span> <span class="kc">None</span>
-        <span class="k">if</span> <span class="n">down</span><span class="p">:</span>
-            <span class="bp">self</span><span class="o">.</span><span class="n">downsample</span> <span class="o">=</span> <span class="n">Downsample2D</span><span class="p">(</span><span class="n">in_channels</span><span class="p">)</span>
-
-        <span class="bp">self</span><span class="o">.</span><span class="n">in_conv</span> <span class="o">=</span> <span class="kc">None</span>
-        <span class="k">if</span> <span class="n">in_channels</span> <span class="o">!=</span> <span class="n">out_channels</span><span class="p">:</span>
-            <span class="bp">self</span><span class="o">.</span><span class="n">in_conv</span> <span class="o">=</span> <span class="n">nn</span><span class="o">.</span><span class="n">Conv2d</span><span class="p">(</span><span class="n">in_channels</span><span class="p">,</span> <span class="n">out_channels</span><span class="p">,</span> <span class="n">kernel_size</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
-
-        <span class="bp">self</span><span class="o">.</span><span class="n">resnets</span> <span class="o">=</span> <span class="n">nn</span><span class="o">.</span><span class="n">Sequential</span><span class="p">(</span>
-            <span class="o">*</span><span class="p">[</span><span class="n">AdapterResnetBlock</span><span class="p">(</span><span class="n">out_channels</span><span class="p">)</span> <span class="k">for</span> <span class="n">_</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">num_res_blocks</span><span class="p">)],</span>
-        <span class="p">)</span>
-
-    <span class="k">def</span> <span class="nf">forward</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">x</span><span class="p">):</span>
-        <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">downsample</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
-            <span class="n">x</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">downsample</span><span class="p">(</span><span class="n">x</span><span class="p">)</span>
-
-        <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">in_conv</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
-            <span class="n">x</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">in_conv</span><span class="p">(</span><span class="n">x</span><span class="p">)</span>
-
-        <span class="n">x</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">resnets</span><span class="p">(</span><span class="n">x</span><span class="p">)</span>
-
-        <span class="k">return</span> <span class="n">x</span>
-
-<span class="k">class</span> <span class="nc">AdapterResnetBlock</span><span class="p">(</span><span class="n">nn</span><span class="o">.</span><span class="n">Module</span><span class="p">):</span>
-    <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">channels</span><span class="p">):</span>
-        <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span>
-        <span class="bp">self</span><span class="o">.</span><span class="n">block1</span> <span class="o">=</span> <span class="n">nn</span><span class="o">.</span><span class="n">Conv2d</span><span class="p">(</span><span class="n">channels</span><span class="p">,</span> <span class="n">channels</span><span class="p">,</span> <span class="n">kernel_size</span><span class="o">=</span><span class="mi">3</span><span class="p">,</span> <span class="n">padding</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
-        <span class="bp">self</span><span class="o">.</span><span class="n">act</span> <span class="o">=</span> <span class="n">nn</span><span class="o">.</span><span class="n">ReLU</span><span class="p">()</span>
-        <span class="bp">self</span><span class="o">.</span><span class="n">block2</span> <span class="o">=</span> <span class="n">nn</span><span class="o">.</span><span class="n">Conv2d</span><span class="p">(</span><span class="n">channels</span><span class="p">,</span> <span class="n">channels</span><span class="p">,</span> <span class="n">kernel_size</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
-
-    <span class="k">def</span> <span class="nf">forward</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">x</span><span class="p">):</span>
-        <span class="n">h</span> <span class="o">=</span> <span class="n">x</span>
-        <span class="n">h</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">block1</span><span class="p">(</span><span class="n">h</span><span class="p">)</span>
-        <span class="n">h</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">act</span><span class="p">(</span><span class="n">h</span><span class="p">)</span>
-        <span class="n">h</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">block2</span><span class="p">(</span><span class="n">h</span><span class="p">)</span>
-
-        <span class="k">return</span> <span class="n">h</span> <span class="o">+</span> <span class="n">x</span>
-</pre></div>
-</div>
-</li>
-<li><p><strong>SD + T2I-Adapter implementation code</strong></p>
-<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="c1"># 7. Denoising loop</span>
-<span class="n">adapter_state</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">adapter</span><span class="p">(</span><span class="n">adapter_input</span><span class="p">)</span>
-<span class="k">for</span> <span class="n">k</span><span class="p">,</span> <span class="n">v</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">adapter_state</span><span class="p">):</span>
-    <span class="n">adapter_state</span><span class="p">[</span><span class="n">k</span><span class="p">]</span> <span class="o">=</span> <span class="n">v</span> <span class="o">*</span> <span class="n">adapter_conditioning_scale</span>
-<span class="k">if</span> <span class="n">num_images_per_prompt</span> <span class="o">&gt;</span> <span class="mi">1</span><span class="p">:</span>
-    <span class="k">for</span> <span class="n">k</span><span class="p">,</span> <span class="n">v</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">adapter_state</span><span class="p">):</span>
-        <span class="n">adapter_state</span><span class="p">[</span><span class="n">k</span><span class="p">]</span> <span class="o">=</span> <span class="n">v</span><span class="o">.</span><span class="n">repeat</span><span class="p">(</span><span class="n">num_images_per_prompt</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">)</span>
-<span class="k">if</span> <span class="n">do_classifier_free_guidance</span><span class="p">:</span>
-    <span class="k">for</span> <span class="n">k</span><span class="p">,</span> <span class="n">v</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">adapter_state</span><span class="p">):</span>
-        <span class="n">adapter_state</span><span class="p">[</span><span class="n">k</span><span class="p">]</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">cat</span><span class="p">([</span><span class="n">v</span><span class="p">]</span> <span class="o">*</span> <span class="mi">2</span><span class="p">,</span> <span class="n">dim</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
-
-<span class="n">num_warmup_steps</span> <span class="o">=</span> <span class="nb">len</span><span class="p">(</span><span class="n">timesteps</span><span class="p">)</span> <span class="o">-</span> <span class="n">num_inference_steps</span> <span class="o">*</span> <span class="bp">self</span><span class="o">.</span><span class="n">scheduler</span><span class="o">.</span><span class="n">order</span>
-<span class="k">with</span> <span class="bp">self</span><span class="o">.</span><span class="n">progress_bar</span><span class="p">(</span><span class="n">total</span><span class="o">=</span><span class="n">num_inference_steps</span><span class="p">)</span> <span class="k">as</span> <span class="n">progress_bar</span><span class="p">:</span>
-    <span class="k">for</span> <span class="n">i</span><span class="p">,</span> <span class="n">t</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">timesteps</span><span class="p">):</span>
-        <span class="c1"># expand the latents if we are doing classifier free guidance</span>
-        <span class="n">latent_model_input</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">cat</span><span class="p">([</span><span class="n">latents</span><span class="p">]</span> <span class="o">*</span> <span class="mi">2</span><span class="p">)</span> <span class="k">if</span> <span class="n">do_classifier_free_guidance</span> <span class="k">else</span> <span class="n">latents</span>
-        <span class="n">latent_model_input</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">scheduler</span><span class="o">.</span><span class="n">scale_model_input</span><span class="p">(</span><span class="n">latent_model_input</span><span class="p">,</span> <span class="n">t</span><span class="p">)</span>
-
-        <span class="c1"># predict the noise residual</span>
-        <span class="n">noise_pred</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">unet</span><span class="p">(</span>
-            <span class="n">latent_model_input</span><span class="p">,</span>
-            <span class="n">t</span><span class="p">,</span>
-            <span class="n">encoder_hidden_states</span><span class="o">=</span><span class="n">prompt_embeds</span><span class="p">,</span>
-            <span class="n">cross_attention_kwargs</span><span class="o">=</span><span class="n">cross_attention_kwargs</span><span class="p">,</span>
-            <span class="n">down_block_additional_residuals</span><span class="o">=</span><span class="p">[</span><span class="n">state</span><span class="o">.</span><span class="n">clone</span><span class="p">()</span> <span class="k">for</span> <span class="n">state</span> <span class="ow">in</span> <span class="n">adapter_state</span><span class="p">],</span>
-        <span class="p">)</span><span class="o">.</span><span class="n">sample</span>
-
-        <span class="c1"># perform guidance</span>
-        <span class="k">if</span> <span class="n">do_classifier_free_guidance</span><span class="p">:</span>
-            <span class="n">noise_pred_uncond</span><span class="p">,</span> <span class="n">noise_pred_text</span> <span class="o">=</span> <span class="n">noise_pred</span><span class="o">.</span><span class="n">chunk</span><span class="p">(</span><span class="mi">2</span><span class="p">)</span>
-            <span class="n">noise_pred</span> <span class="o">=</span> <span class="n">noise_pred_uncond</span> <span class="o">+</span> <span class="n">guidance_scale</span> <span class="o">*</span> <span class="p">(</span><span class="n">noise_pred_text</span> <span class="o">-</span> <span class="n">noise_pred_uncond</span><span class="p">)</span>
-
-        <span class="c1"># compute the previous noisy sample x_t -&gt; x_t-1</span>
-        <span class="n">latents</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">scheduler</span><span class="o">.</span><span class="n">step</span><span class="p">(</span><span class="n">noise_pred</span><span class="p">,</span> <span class="n">t</span><span class="p">,</span> <span class="n">latents</span><span class="p">,</span> <span class="o">**</span><span class="n">extra_step_kwargs</span><span class="p">)</span><span class="o">.</span><span class="n">prev_sample</span>
-</pre></div>
-</div>
-</li>
-</ul>
-<p>Adapter 종류로는 크게 structure 에 대한 conditioning 과 color 에 대한 conditioning 으로 분류할 수 있습니다. Structure controlling 으로는 대표적으로 sketch, depth map, semantic segmentation map, keypose 등이 있습니다. Color map 은 이미지를 우선적으로 <em>high bicubic downsampling</em> 을 통해 semantic 및 structural 한 정보를 제외시키고, <em>nearest upsampling</em> 기법으로 다시 원본 이미지 크기로 복원하는 작업을 통해 생성합니다.</p>
-<p>앞써 설명한 부분처럼 추가 학습 없이 여러 adapter 로 conditioning 할 수도 있습니다. Multi-adapter 로 controlling 할 시, 다음과 같이 각 adapter 로부터 나온 condition feature 에 weight <span class="math notranslate nohighlight">\(w_k\)</span> 를 부여해 최종 condition feature 를 정의하게 됩니다.</p>
-<figure class="align-default" id="id8">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/t2i_adapter_08.png"><img alt="t2i_adapter_08" class="bg-primary mb-1" src="../../_images/t2i_adapter_08.png" style="width: 350px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 257 </span><span class="caption-text">Multi-Adapter Conditioning</span><a class="headerlink" href="#id8" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</section>
-<section id="model-optimization">
-<h3>3.4. Model Optimization<a class="headerlink" href="#model-optimization" title="Permalink to this heading">#</a></h3>
-<p>모델 학습 시, SD 파라미터는 고정시킨 상태로 T2I-Adapter 파라미터만 학습합니다. 이때, T2-Adapter 손실함수는 SD 학습 시와 유사하게 다음과 같이 정의합니다.</p>
-<div class="math notranslate nohighlight">
-\[
-L_{AD} = \mathbb{E}_{Z_{0}, t, F_c, \epsilon \sim N(0,I)}[||\epsilon-\epsilon_{\theta}(Z_t, t, \tau(y), F_c)||_2^2]
-\]</div>
-<p>where <span class="math notranslate nohighlight">\(t \sim U(0,T)\)</span></p>
-<p><strong>Non-uniform time step sampling during training</strong></p>
-<p>Diffusion 모델 학습 시와 동일하게, time embedding 을 adapter 에 input 으로 넣으면서 성능 개선 효과가 있는 것을 확인했지만 매 time step <span class="math notranslate nohighlight">\(t\)</span> 마다 <span class="math notranslate nohighlight">\(F_c\)</span> 를 conditioning 하는 것은 computationally expensive 합니다.</p>
-<p>따라서, 논문에서는 DDIM inference sampling 을 크게 3가지 stage (i.e., beginning, middle, late stage) 로 분류하는 방법을 소개합니다. 실험해본 결과, middle 그리고 late stage 에 적용하는 것보다 beginning stage 에서 guidance 를 주는 효과가 더 크다고 합니다.</p>
-<figure class="align-default" id="id9">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/t2i_adapter_09.png"><img alt="t2i_adapter_09" class="bg-primary mb-1" src="../../_images/t2i_adapter_09.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 258 </span><span class="caption-text">DDIM Inference Sampling Stages</span><a class="headerlink" href="#id9" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>따라서, 최대한 time step <span class="math notranslate nohighlight">\(t\)</span> 가 early sampling stage 에 포함되도록 다음 수식처럼 non-uniformly 하게 sampling 작업을 진행했고, 이에 대한 결과도 공유합니다.</p>
-<div class="math notranslate nohighlight">
-\[
-t = (1-(t/T)^3) \times T, t \in U(0,T)
-\]</div>
-<figure class="align-default" id="id10">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/t2i_adapter_10.png"><img alt="t2i_adapter_10" class="bg-primary mb-1" src="../../_images/t2i_adapter_10.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 259 </span><span class="caption-text">Effect of Cubic Sampling</span><a class="headerlink" href="#id10" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</section>
-</section>
-<section id="experiment">
-<h2>Experiment<a class="headerlink" href="#experiment" title="Permalink to this heading">#</a></h2>
-<section id="implementation-details">
-<h3>4.1. Implementation Details<a class="headerlink" href="#implementation-details" title="Permalink to this heading">#</a></h3>
-<p>T2I-Adapter 학습 시, hyperparameter 및 데이터셋 구축 상세사항은 다음과 같습니다.</p>
-<ul class="simple">
-<li><p>Hyperparameters</p>
-<ul>
-<li><p>10 epochs</p></li>
-<li><p>Batch size = 8</p></li>
-<li><p>Learning rate = <span class="math notranslate nohighlight">\(1 \times 10^{-5}\)</span></p></li>
-<li><p>Adam optimizer</p></li>
-<li><p>4X NVIDIA Tesla 32G-V100 GPUs (3 days)</p></li>
-</ul>
-</li>
-<li><p>실험별 데이터셋 구축</p>
-<ul>
-<li><p><em>Sketch Map</em></p>
-<ul>
-<li><p>COCO17 데이터셋 - 164K images</p></li>
-<li><p>PiDiNet 를 활용해 sketch map 생성</p></li>
-</ul>
-</li>
-<li><p><em>Semantic segmentation map</em></p>
-<ul>
-<li><p>COCO-Stuff 데이터셋 - 164K images</p></li>
-</ul>
-</li>
-<li><p><em>Keypoints &amp; Color &amp; Depth maps</em></p>
-<ul>
-<li><p>LAION-AESTHETICS 데이터셋로부터 600K images-text pairs 추출</p></li>
-<li><p>MM-Pose, MiDaS 모델로 각각 Keypoint, Depth map 생성</p></li>
-</ul>
-</li>
-</ul>
-</li>
-</ul>
-</section>
-<section id="comparison">
-<h3>4.2. Comparison<a class="headerlink" href="#comparison" title="Permalink to this heading">#</a></h3>
-<p>기존 SOTA 모델들과 정량적인 수치로 비교하는데 FID 와 CLIP Score 를 사용하였고, 하단 사진처럼 기존 GAN-based 그리고 diffusion-based method 모델들보다 성능이 좋습니다.</p>
-<figure class="align-default" id="id11">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/t2i_adapter_11.png"><img alt="t2i_adapter_11" class="bg-primary mb-1" src="../../_images/t2i_adapter_11.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 260 </span><span class="caption-text">Qualitative Comparison</span><a class="headerlink" href="#id11" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<figure class="align-default" id="id12">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/t2i_adapter_12.png"><img alt="t2i_adapter_12" class="bg-primary mb-1" src="../../_images/t2i_adapter_12.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 261 </span><span class="caption-text">Quantitative Comparisoin</span><a class="headerlink" href="#id12" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</section>
-<section id="applications">
-<h3>4.3. Applications<a class="headerlink" href="#applications" title="Permalink to this heading">#</a></h3>
-<p>해당 예시들은 다양한 single adapter controlling 에 대한 결과들을 보여줍니다. 특히 인상적인 부분은 sketch 로 controlling 시, sketch 가 정확하지 않아도 이미지 생성에 robust 한 성능을 보여주는 것을 확인할 수 있습니다.</p>
-<figure class="align-default" id="id13">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/t2i_adapter_13.png"><img alt="t2i_adapter_13" class="bg-primary mb-1" src="../../_images/t2i_adapter_13.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 262 </span><span class="caption-text">Visualization of Single-Adapter Controlling</span><a class="headerlink" href="#id13" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>또한, image editing 도 가능합니다. SD inpainting mode 로 특정 지역을 masking 한 후, T2I-Adapter 를 통해 image editing 을 한 예시 사진입니다. Adapter 없이, SD inpainting 만으로는 성능이 좋지 못하다고 합니다.</p>
-<figure class="align-default" id="id14">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/t2i_adapter_14.png"><img alt="t2i_adapter_14" class="bg-primary mb-1" src="../../_images/t2i_adapter_14.png" style="width: 600px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 263 </span><span class="caption-text">Image Editing with T2I-Adapter</span><a class="headerlink" href="#id14" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>아래 예시는 multiple adapter 를 적용한 것로 위에서부터 아래로 각각 depth + keypose 그리고 sketch + color map 을 conditioning 한 결과입니다.</p>
-<figure class="align-default" id="id15">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/t2i_adapter_15.png"><img alt="t2i_adapter_15" class="bg-primary mb-1" src="../../_images/t2i_adapter_15.png" style="width: 600px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 264 </span><span class="caption-text">Composable Controlling</span><a class="headerlink" href="#id15" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>마지막으로, 장점들 중 하나로 명시되었던 generalization ability 를 보여준 사례입니다. 학습 완료한 Adapter 를 동일한 구조를 가진 T2I 모델에 적용 가능한 것을 확인할 수 있습니다.</p>
-<figure class="align-default" id="id16">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/t2i_adapter_16.png"><img alt="t2i_adapter_16" class="bg-primary mb-1" src="../../_images/t2i_adapter_16.png" style="width: 600px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 265 </span><span class="caption-text">Generalizable Controlling</span><a class="headerlink" href="#id16" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</section>
-<section id="ablation-study">
-<h3>4.4. Ablation Study<a class="headerlink" href="#ablation-study" title="Permalink to this heading">#</a></h3>
-<p>논문에서는 guidance mode, 그리고 complexity 에 대한 ablation study 를 진행했습니다.</p>
-<p>SD 모델은 encoder 그리고 decoder 에 각각 4개의 scale (i.e., 64×64, 32×32, 16×16, 8×8) 을 가지고 있는데, 하단 table 처럼 각각 다른 scale 에 adapter guidance 를 적용하면서 FID 성능을 비교했습니다. Scale Number 가 4보다 작을 경우, large scale 에 순차적으로 guidance 를 적용했습니다. 그 결과, Unet encoder 에만 4 scales 모두 guidance 를 적용하는 것이 성능이 제일 좋다고 합니다.</p>
-<figure class="align-default" id="id17">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/t2i_adapter_17.png"><img alt="t2i_adapter_17" class="bg-primary mb-1" src="../../_images/t2i_adapter_17.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 266 </span><span class="caption-text">Guidance Mode</span><a class="headerlink" href="#id17" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>또한, condition map 는 비교적 sparse 하기 때문에 더 경량화된 adapter 를 사용해도 성능이 좋은 부분을 하단 예시처럼 확인할 수 있었다고 합니다. 더 자세하게는, adapter block 의 intermediate channel 숫자를 바꿔가며 adapter-small, adapter-tiny 모델을 각각 x4, x8 compression 작업을 진행했습니다.</p>
-<figure class="align-default" id="id18">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/t2i_adapter_18.png"><img alt="t2i_adapter_18" class="bg-primary mb-1" src="../../_images/t2i_adapter_18.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 267 </span><span class="caption-text">Complexity Ablation</span><a class="headerlink" href="#id18" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</section>
-</section>
-</section>
-
-    <script type="text/x-thebe-config">
-    {
-        requestKernel: true,
-        binderOptions: {
-            repo: "binder-examples/jupyter-stacks-datascience",
-            ref: "master",
-        },
-        codeMirrorConfig: {
-            theme: "abcdef",
-            mode: "python"
-        },
-        kernelOptions: {
-            name: "python3",
-            path: "./docs/review"
-        },
-        predefinedOutput: true
-    }
-    </script>
-    <script>kernelName = 'python3'</script>
-
-                </article>
-              
-
-              
-              
-              
-              
-                <footer class="prev-next-footer d-print-none">
-                  
-<div class="prev-next-area">
-    <a class="left-prev"
-       href="SDXL.html"
-       title="previous page">
-      <i class="fa-solid fa-angle-left"></i>
-      <div class="prev-next-info">
-        <p class="prev-next-subtitle">previous</p>
-        <p class="prev-next-title">SDXL</p>
-      </div>
-    </a>
-    <a class="right-next"
-       href="IP_Adapter.html"
-       title="next page">
-      <div class="prev-next-info">
-        <p class="prev-next-subtitle">next</p>
-        <p class="prev-next-title">IP-Adapter</p>
-      </div>
-      <i class="fa-solid fa-angle-right"></i>
-    </a>
-</div>
-                </footer>
-              
-            </div>
-            
-            
-              
-                <div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
-
-
-  <div class="sidebar-secondary-item">
-  <div class="page-toc tocsection onthispage">
-    <i class="fa-solid fa-list"></i> Contents
-  </div>
-  <nav class="bd-toc-nav page-toc">
-    <ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">Introduction</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#method">Method</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#preliminary-stable-diffusion">3.1. Preliminary: Stable Diffusion</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#overview-of-t2i-adapter">3.2. Overview of T2I-Adapter</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#adapter-design">3.3. Adapter Design</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#model-optimization">3.4. Model Optimization</a></li>
-</ul>
-</li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#experiment">Experiment</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#implementation-details">4.1. Implementation Details</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#comparison">4.2. Comparison</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#applications">4.3. Applications</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#ablation-study">4.4. Ablation Study</a></li>
-</ul>
-</li>
-</ul>
-  </nav></div>
-
-</div></div>
-              
-            
-          </div>
-          <footer class="bd-footer-content">
-            
-<div class="bd-footer-content__inner container">
-  
-  <div class="footer-item">
-    
-<p class="component-author">
-By PseudoLab
-</p>
-
-  </div>
-  
-  <div class="footer-item">
-    
-
-  <p class="copyright">
-    
-      © Copyright 2022.
-      <br/>
-    
-  </p>
-
-  </div>
-  
-  <div class="footer-item">
-    
-  </div>
-  
-  <div class="footer-item">
-    
-  </div>
-  
-</div>
-          </footer>
-        
-
-      </main>
-    </div>
-  </div>
-  
-  <!-- Scripts loaded after <body> so the DOM is not blocked -->
-  <script src="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b"></script>
-<script src="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b"></script>
-
-  <footer class="bd-footer">
-  </footer>
-  </body>
+
+
+<!DOCTYPE html>
+
+
+<html lang="en" data-content_root="" >
+
+  <head>
+    <meta charset="utf-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.17.1: http://docutils.sourceforge.net/" />
+
+    <title>T2I-Adapter &#8212; Text-to-Image Generation-feat-Diffusion</title>
+  
+  
+  
+  <script data-cfasync="false">
+    document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
+    document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
+  </script>
+  
+  <!-- Loaded before other Sphinx assets -->
+  <link href="../../_static/styles/theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+<link href="../../_static/styles/bootstrap.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+<link href="../../_static/styles/pydata-sphinx-theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+
+  
+  <link href="../../_static/vendor/fontawesome/6.5.2/css/all.min.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+  <link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.woff2" />
+<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.woff2" />
+<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.woff2" />
+
+    <link rel="stylesheet" type="text/css" href="../../_static/pygments.css" />
+    <link rel="stylesheet" href="../../_static/styles/sphinx-book-theme.css?digest=14f4ca6b54d191a8c7657f6c759bf11a5fb86285" type="text/css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/togglebutton.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/mystnb.4510f1fc1dee50b3e5859aac5469c37c29e427902b24a333a5f9fcb2f0b3ac41.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/sphinx-thebe.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/design-style.4045f2051d55cab465a707391d5b2007.min.css" />
+  
+  <!-- Pre-loaded scripts that we'll load fully later -->
+  <link rel="preload" as="script" href="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b" />
+<link rel="preload" as="script" href="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b" />
+  <script src="../../_static/vendor/fontawesome/6.5.2/js/all.min.js?digest=dfe6caa3a7d634c4db9b"></script>
+
+    <script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
+    <script src="../../_static/jquery.js"></script>
+    <script src="../../_static/underscore.js"></script>
+    <script src="../../_static/_sphinx_javascript_frameworks_compat.js"></script>
+    <script src="../../_static/doctools.js"></script>
+    <script src="../../_static/clipboard.min.js"></script>
+    <script src="../../_static/copybutton.js"></script>
+    <script src="../../_static/scripts/sphinx-book-theme.js?digest=5a5c038af52cf7bc1a1ec88eea08e6366ee68824"></script>
+    <script>let toggleHintShow = 'Click to show';</script>
+    <script>let toggleHintHide = 'Click to hide';</script>
+    <script>let toggleOpenOnPrint = 'true';</script>
+    <script src="../../_static/togglebutton.js"></script>
+    <script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
+    <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
+    <script src="../../_static/design-tabs.js"></script>
+    <script>const THEBE_JS_URL = "https://unpkg.com/thebe@0.8.2/lib/index.js"
+const thebe_selector = ".thebe,.cell"
+const thebe_selector_input = "pre"
+const thebe_selector_output = ".output, .cell_output"
+</script>
+    <script async="async" src="../../_static/sphinx-thebe.js"></script>
+    <script>window.MathJax = {"options": {"processHtmlClass": "tex2jax_process|mathjax_process|math|output_area"}}</script>
+    <script defer="defer" src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
+    <script>DOCUMENTATION_OPTIONS.pagename = 'docs/review/t2i_adapter';</script>
+    <link rel="shortcut icon" href="../../_static/PseudoLab_logo.png"/>
+    <link rel="index" title="Index" href="../../genindex.html" />
+    <link rel="search" title="Search" href="../../search.html" />
+    <link rel="next" title="IP-Adapter" href="IP_Adapter.html" />
+    <link rel="prev" title="SDXL" href="SDXL.html" />
+  <meta name="viewport" content="width=device-width, initial-scale=1"/>
+  <meta name="docsearch:language" content="en"/>
+  </head>
+  
+  
+  <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
+
+  
+  
+  <div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
+  
+  <div id="pst-scroll-pixel-helper"></div>
+  
+  <button type="button" class="btn rounded-pill" id="pst-back-to-top">
+    <i class="fa-solid fa-arrow-up"></i>Back to top</button>
+
+  
+  <input type="checkbox"
+          class="sidebar-toggle"
+          id="pst-primary-sidebar-checkbox"/>
+  <label class="overlay overlay-primary" for="pst-primary-sidebar-checkbox"></label>
+  
+  <input type="checkbox"
+          class="sidebar-toggle"
+          id="pst-secondary-sidebar-checkbox"/>
+  <label class="overlay overlay-secondary" for="pst-secondary-sidebar-checkbox"></label>
+  
+  <div class="search-button__wrapper">
+    <div class="search-button__overlay"></div>
+    <div class="search-button__search-container">
+<form class="bd-search d-flex align-items-center"
+      action="../../search.html"
+      method="get">
+  <i class="fa-solid fa-magnifying-glass"></i>
+  <input type="search"
+         class="form-control"
+         name="q"
+         id="search-input"
+         placeholder="Search this book..."
+         aria-label="Search this book..."
+         autocomplete="off"
+         autocorrect="off"
+         autocapitalize="off"
+         spellcheck="false"/>
+  <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
+</form></div>
+  </div>
+
+  <div class="pst-async-banner-revealer d-none">
+  <aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
+</div>
+
+  
+    <header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
+    </header>
+  
+
+  <div class="bd-container">
+    <div class="bd-container__inner bd-page-width">
+      
+      
+      
+      <div class="bd-sidebar-primary bd-sidebar">
+        
+
+  
+  <div class="sidebar-header-items sidebar-primary__section">
+    
+    
+    
+    
+  </div>
+  
+    <div class="sidebar-primary-items__start sidebar-primary__section">
+        <div class="sidebar-primary-item">
+
+  
+    
+  
+
+<a class="navbar-brand logo" href="../../intro.html">
+  
+  
+  
+  
+  
+    
+    
+      
+    
+    
+    <img src="../../_static/PseudoLab_logo.png" class="logo__image only-light" alt="Text-to-Image Generation-feat-Diffusion - Home"/>
+    <script>document.write(`<img src="../../_static/PseudoLab_logo.png" class="logo__image only-dark" alt="Text-to-Image Generation-feat-Diffusion - Home"/>`);</script>
+  
+  
+</a></div>
+        <div class="sidebar-primary-item"><nav class="bd-links" id="bd-docs-nav" aria-label="Main">
+    <div class="bd-toc-item navbar-nav active">
+        
+        <ul class="nav bd-sidenav bd-sidenav__home-link">
+            <li class="toctree-l1">
+                <a class="reference internal" href="../../intro.html">
+                    Welcome to PseudoDiffusers!!
+                </a>
+            </li>
+        </ul>
+        <p aria-level="2" class="caption" role="heading"><span class="caption-text">Preliminary Works</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="vae.html">VAE</a></li>
+<li class="toctree-l1"><a class="reference internal" href="gan.html">GAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DDPM.html">DDPM</a></li>
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DDIM.html">DDIM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="A_Study_on_the_Evaluation_of_Generative_Models.html">A Study on the Evaluation of Generative Models</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Image Generation</span></p>
+<ul class="current nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="cycleGAN.html">CycleGAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="StyleGAN.html">StyleGAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="diffusion_beats_GANs.html">Diffusion Models Beat GANs on Image Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="dalle.html">DALL-E</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DALLE2.html">DALL-E 2</a></li>
+<li class="toctree-l1"><a class="reference internal" href="dreambooth.html">DreamBooth</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ControlNet.html">ControlNet</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Latent_Diffusion_Model.html">Introduction</a></li>
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Textual_Inversion.html">Textual Inversion</a></li>
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="CustomDiffusion.html">Custom Diffusion</a></li>
+<li class="toctree-l1"><a class="reference internal" href="LoRA.html">LoRA</a></li>
+
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="I-DDPM.html">I-DDPM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="StyO.html">StyO</a></li>
+<li class="toctree-l1"><a class="reference internal" href="imagen.html">Imagen</a></li>
+<li class="toctree-l1"><a class="reference internal" href="imagen_editor.html">Imagen Editor</a></li>
+<li class="toctree-l1"><a class="reference internal" href="SDEdit.html">SDEdit</a></li>
+<li class="toctree-l1"><a class="reference internal" href="SDXL.html">SDXL</a></li>
+<li class="toctree-l1 current active"><a class="current reference internal" href="#">T2I-Adapter</a></li>
+<li class="toctree-l1"><a class="reference internal" href="IP_Adapter.html">IP-Adapter</a></li>
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="HyperDreamBooth.html">HyperDreamBooth</a></li>
+<li class="toctree-l1"><a class="reference internal" href="CM3leon.html">CM3leon</a></li>
+
+<li class="toctree-l1"><a class="reference internal" href="Synthetic_Data_from_Diffusion_Models_Improves_ImageNet_Classification.html">Synthetic Data from Diffusion Models Improves ImageNet Classification</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="GLIDE.html">GLIDE</a></li>
+<li class="toctree-l1"><a class="reference internal" href="BBDM.html">BBDM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Your_Diffusion_Model_is_Secretly_a_Zero_Shot_Classifier.html">Your Diffusion Model is Secretly a Zero-Shot Classifier</a></li>
+<li class="toctree-l1"><a class="reference internal" href="progressive_distillation.html">Progressive Distillation for Fast Sampling of Diffusion Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ConceptLab.html">ConceptLab</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Diffusion_models_already_have_a_Semantic_Latent_Space.html">Diffusion Models already have a Semantic Latent Space</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Muse.html">Muse</a></li>
+
+
+<li class="toctree-l1"><a class="reference internal" href="GIGAGAN.html">Scaling up GANs for Text-to-Image Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="consistency_models.html">Consistency Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="latent_consistency_models.html">Latent Consistency Models</a></li>
+
+<li class="toctree-l1"><a class="reference internal" href="LLM_grounded_Diffusion.html">LLM Grounded Diffusion</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DiT.html">DiT</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="one-step-image-translation.html">One-Step Image Translation with Text-to-Image Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="LCM-LoRA.html">LCM-LoRA: A Universal Stable-Diffusion Acceleration Module</a></li>
+
+
+<li class="toctree-l1"><a class="reference internal" href="MimicBrush.html">MimicBrush: Zero-shot Image Editing with Reference Imitation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="one_step_diffusion_with_distribution_matching_distillation.html">One-step Diffusion with Distribution Matching Distillation</a></li>
+
+
+
+
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Video Generation</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="Make_A_Video.html">Make A Video</a></li>
+<li class="toctree-l1"><a class="reference internal" href="VideoLDM.html">VideoLDM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="AnimateDiff.html">AnimateDiff</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Animate_Anyone.html">Animate Anyone</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreaMoving.html">DreaMoving</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreamPose.html">DreamPose: Fashion Image-to-Video Synthesis via Stable Diffusion</a></li>
+
+
+
+
+
+
+
+
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">3D Generation</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="NeRF.html">NeRF : Representing Scenes as Neural Radiance Fields for View Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="3DGS.html">3D Gaussian Splatting for Real-Time Radiance Field Rendering</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Point_E.html">Point-E: A System for Generating 3D Point Clouds from Complex Prompts (Arxiv 2022)</a></li>
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Shap-E.html">Shap-E</a></li>
+
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DreamFusion.html"><strong>DreamFusion</strong></a></li>
+<li class="toctree-l1"><a class="reference internal" href="magic-3d.html">Magic3D</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreamBooth3D.html">Dream Booth 3D</a></li>
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="zero123plus.html">Zero123++</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ProlificDreamer.html">ProlificDreamer: High-Fidelity and Diverse Text-to-3D Generation with Variational Score Distillation</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DreamGaussian.html">DreamGaussian</a></li>
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Coin3D.html">Coin3D</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Experiments</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="../experiments/js_exp.html">Synthetic Data with Stable Diffusion for Foliar Disease Classification</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../experiments/swjo_exp.html">Training DreamBooth on Naver Webtoon Face Dataset</a></li>
+</ul>
+
+    </div>
+</nav></div>
+    </div>
+  
+  
+  <div class="sidebar-primary-items__end sidebar-primary__section">
+  </div>
+  
+  <div id="rtd-footer-container"></div>
+
+
+      </div>
+      
+      <main id="main-content" class="bd-main" role="main">
+        
+        
+
+<div class="sbt-scroll-pixel-helper"></div>
+
+          <div class="bd-content">
+            <div class="bd-article-container">
+              
+              <div class="bd-header-article d-print-none">
+<div class="header-article-items header-article__inner">
+  
+    <div class="header-article-items__start">
+      
+        <div class="header-article-item"><label class="sidebar-toggle primary-toggle btn btn-sm" for="__primary" title="Toggle primary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
+  <span class="fa-solid fa-bars"></span>
+</label></div>
+      
+    </div>
+  
+  
+    <div class="header-article-items__end">
+      
+        <div class="header-article-item">
+
+<div class="article-header-buttons">
+
+
+
+
+
+<div class="dropdown dropdown-source-buttons">
+  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Source repositories">
+    <i class="fab fa-github"></i>
+  </button>
+  <ul class="dropdown-menu">
+      
+      
+      
+      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion" target="_blank"
+   class="btn btn-sm btn-source-repository-button dropdown-item"
+   title="Source repository"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fab fa-github"></i>
+  </span>
+<span class="btn__text-container">Repository</span>
+</a>
+</li>
+      
+      
+      
+      
+      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion/issues/new?title=Issue%20on%20page%20%2Fdocs/review/t2i_adapter.html&body=Your%20issue%20content%20here." target="_blank"
+   class="btn btn-sm btn-source-issues-button dropdown-item"
+   title="Open an issue"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-lightbulb"></i>
+  </span>
+<span class="btn__text-container">Open issue</span>
+</a>
+</li>
+      
+  </ul>
+</div>
+
+
+
+
+
+
+<div class="dropdown dropdown-download-buttons">
+  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Download this page">
+    <i class="fas fa-download"></i>
+  </button>
+  <ul class="dropdown-menu">
+      
+      
+      
+      <li><a href="../../_sources/docs/review/t2i_adapter.md" target="_blank"
+   class="btn btn-sm btn-download-source-button dropdown-item"
+   title="Download source file"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-file"></i>
+  </span>
+<span class="btn__text-container">.md</span>
+</a>
+</li>
+      
+      
+      
+      
+      <li>
+<button onclick="window.print()"
+  class="btn btn-sm btn-download-pdf-button dropdown-item"
+  title="Print to PDF"
+  data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-file-pdf"></i>
+  </span>
+<span class="btn__text-container">.pdf</span>
+</button>
+</li>
+      
+  </ul>
+</div>
+
+
+
+
+<button onclick="toggleFullScreen()"
+  class="btn btn-sm btn-fullscreen-button"
+  title="Fullscreen mode"
+  data-bs-placement="bottom" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-expand"></i>
+  </span>
+
+</button>
+
+
+
+<script>
+document.write(`
+  <button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light"></i>
+    <i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark"></i>
+    <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"></i>
+  </button>
+`);
+</script>
+
+
+<script>
+document.write(`
+  <button class="btn btn-sm pst-navbar-icon search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <i class="fa-solid fa-magnifying-glass fa-lg"></i>
+  </button>
+`);
+</script>
+<label class="sidebar-toggle secondary-toggle btn btn-sm" for="__secondary"title="Toggle secondary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <span class="fa-solid fa-list"></span>
+</label>
+</div></div>
+      
+    </div>
+  
+</div>
+</div>
+              
+              
+
+<div id="jb-print-docs-body" class="onlyprint">
+    <h1>T2I-Adapter</h1>
+    <!-- Table of contents -->
+    <div id="print-main-content">
+        <div id="jb-print-toc">
+            
+            <div>
+                <h2> Contents </h2>
+            </div>
+            <nav aria-label="Page">
+                <ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">Introduction</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#method">Method</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#preliminary-stable-diffusion">3.1. Preliminary: Stable Diffusion</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#overview-of-t2i-adapter">3.2. Overview of T2I-Adapter</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#adapter-design">3.3. Adapter Design</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#model-optimization">3.4. Model Optimization</a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#experiment">Experiment</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#implementation-details">4.1. Implementation Details</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#comparison">4.2. Comparison</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#applications">4.3. Applications</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#ablation-study">4.4. Ablation Study</a></li>
+</ul>
+</li>
+</ul>
+            </nav>
+        </div>
+    </div>
+</div>
+
+              
+                
+<div id="searchbox"></div>
+                <article class="bd-article">
+                  
+  <div class="admonition-information admonition">
+<p class="admonition-title">Information</p>
+<ul class="simple">
+<li><p><strong>Title:</strong> T2I-Adapter: Learning Adapters to Dig out More Controllable Ability for Text-to-Image Diffusion Models</p></li>
+<li><p><strong>Reference</strong></p>
+<ul>
+<li><p>Paper: <a class="reference external" href="https://arxiv.org/abs/2302.08453">https://arxiv.org/abs/2302.08453</a></p></li>
+<li><p>Code: <a class="github reference external" href="https://github.com/huggingface/diffusers/tree/main/src/diffusers/pipelines/t2i_adapter">huggingface/diffusers</a></p></li>
+</ul>
+</li>
+<li><p><strong>Author:</strong> Sangwoo Jo</p></li>
+<li><p><strong>Last updated on Oct. 03, 2023</strong></p></li>
+</ul>
+</div>
+<section class="tex2jax_ignore mathjax_ignore" id="t2i-adapter">
+<h1>T2I-Adapter<a class="headerlink" href="#t2i-adapter" title="Permalink to this heading">#</a></h1>
+<section id="introduction">
+<h2>Introduction<a class="headerlink" href="#introduction" title="Permalink to this heading">#</a></h2>
+<p>이번 시간에는 Tencent ARC Lab 에서 소개하는 T2I-Adapter 모델에 대해 알아볼 예정입니다.</p>
+<p>Stable Diffusion 을 비롯한 기존의 T2I 모델들이 난해한 prompt (e.g., “A car with flying wings” &amp; “Iron Man with bunny ears”) 을 입력받을 시, 생성되는 이미지 퀄리티가 저하되는 부분을 확인할 수 있는데요. 논문에서는 T2I 모델이 low level (e.g., textures), middle level (e.g., edges), 그리고 high level (e.g., semantics) 에 대한 정보들을 implicit 하게 가지고 있지만, 이를 표현하기 위해서는 text prompt 만으로는 한계가 있고 보다 세밀한 controlling (e.g., color, structure) 이 필요하다고 서술합니다. 즉, T2I 모델의 internal knowledge 와 external guidance 의 alignment 에 대한 추가적인 학습이 필요하다고 주장합니다.</p>
+<figure class="align-default" id="id1">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/t2i_adapter_01.png"><img alt="t2i_adapter_01" class="bg-primary mb-1" src="../../_images/t2i_adapter_01.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 250 </span><span class="caption-text">Effect of External Guidance</span><a class="headerlink" href="#id1" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>논문에서는 이를 해결하기 위해 T2I-Adapter 모델을 소개하고 다음과 같이 5가지 장점이 있다고 합니다.</p>
+<figure class="align-default" id="id2">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/t2i_adapter_02.png"><img alt="t2i_adapter_02" class="bg-primary mb-1" src="../../_images/t2i_adapter_02.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 251 </span><span class="caption-text">Various Guidance of T2I-Adapter</span><a class="headerlink" href="#id2" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul>
+<li><p><em>Plug-and-play</em> : 기존의 T2I 모델의 generalization ability 유지</p></li>
+<li><p><em>Simple and small</em> : ~77M parameters and ~300M storage</p>
+<figure class="align-default" id="id3">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/t2i_adapter_03.png"><img alt="t2i_adapter_03" class="bg-primary mb-1" src="../../_images/t2i_adapter_03.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 252 </span><span class="caption-text">ControlNet vs T2I-Adapter</span><a class="headerlink" href="#id3" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p>ControlNet 같은 경우에 reverse diffusion process 에서 ControlNet 과 Unet 모두 연산작업이 실행됩니다. 이때 ControlNet 은 Unet Encoder 의 구조를 그대로 가져오기 때문에 parameter size 및 storage 용량이 크고, 이는 이미지 생성하는데 큰 bottleneck 이 됩니다.</p></li>
+</ul>
+</li>
+<li><p><em>Flexible</em> : 다양한 adapter (e.g., color, structure) 학습 가능</p></li>
+<li><p><em>Composable</em> : Multiple adapter 적용 가능</p></li>
+<li><p><em>Generalizable</em> : 동일한 구조를 가진 다른 T2I 모델에 동일한 adapter 적용 가능</p></li>
+</ul>
+</section>
+<section id="method">
+<h2>Method<a class="headerlink" href="#method" title="Permalink to this heading">#</a></h2>
+<section id="preliminary-stable-diffusion">
+<h3>3.1. Preliminary: Stable Diffusion<a class="headerlink" href="#preliminary-stable-diffusion" title="Permalink to this heading">#</a></h3>
+<p>T2I-Adapter 의 기반이 되는 T2I 모델 Stable Diffusion 모델은 기본적으로 two-stage model 이고, autoencoder 와 Unet denoiser 로 구성되어 있습니다. Autoencoder 를 통해 이미지를 latent space 로 바꾸고 다시 복원하는 역할을 하고, Unet denoiser 는 diffusion process 를 통해 다음과 같은 손실함수를 최소화하는 방향으로 학습하게 됩니다.</p>
+<div class="math notranslate nohighlight">
+\[
+L = \mathbb{E}_{Z_{t}, C, \epsilon, t}(||\epsilon-\epsilon_{\theta}(Z_t, C)||_2^2)
+\]</div>
+<ul class="simple">
+<li><p><span class="math notranslate nohighlight">\(Z_t = \sqrt{\bar{\alpha}_t}Z_0 + \sqrt{1-\bar{\alpha}_t}\epsilon, \epsilon \sim N(0,I)\)</span> := noised feature map at step t</p></li>
+<li><p><span class="math notranslate nohighlight">\(C\)</span> := conditional information</p></li>
+<li><p><span class="math notranslate nohighlight">\(\epsilon_{\theta}\)</span> := UNet denoiser</p></li>
+</ul>
+<p>Inference 시에는 random Gaussian distribution 을 따르는 <span class="math notranslate nohighlight">\(Z_T\)</span>, 그리고 text prompt 를 CLIP text encoder 에 입력함으로써 생성한 token <span class="math notranslate nohighlight">\(y\)</span> 를 cross attention 을 통해 Unet denoiser <span class="math notranslate nohighlight">\(\epsilon_{\theta}\)</span> 에 입력합니다. 최종적으로, diffusion process 로부터 생성된 denoise 된 latent feature 를 decoder 를 통해 최종 이미지를 생성하게 됩니다. 자세한 cross attention 하는 방식은 다음과 같습니다.</p>
+<figure class="align-default" id="id4">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/t2i_adapter_04.png"><img alt="t2i_adapter_04" class="bg-primary mb-1" src="../../_images/t2i_adapter_04.png" style="width: 550px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 253 </span><span class="caption-text">Cross Attention</span><a class="headerlink" href="#id4" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<ul class="simple">
+<li><p><span class="math notranslate nohighlight">\(W_Q, W_K, W_V\)</span> := learnable projection matrices</p></li>
+<li><p><span class="math notranslate nohighlight">\(\phi(\cdot), \tau(\cdot)\)</span> := learnable embeddings</p></li>
+</ul>
+</section>
+<section id="overview-of-t2i-adapter">
+<h3>3.2. Overview of T2I-Adapter<a class="headerlink" href="#overview-of-t2i-adapter" title="Permalink to this heading">#</a></h3>
+<p>논문에서는 다음과 같은 형태로 pre-trained 된 Stable Diffusion 을 비롯한 T2I 모델에 Adapter 를 추가하는 방식을 소개합니다. Adapter 의 자세한 구조는 다음과 같습니다.</p>
+<figure class="align-default" id="id5">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/t2i_adapter_05.png"><img alt="t2i_adapter_05" class="bg-primary mb-1" src="../../_images/t2i_adapter_05.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 254 </span><span class="caption-text">Overview of T2I-Adapter</span><a class="headerlink" href="#id5" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</section>
+<section id="adapter-design">
+<h3>3.3. Adapter Design<a class="headerlink" href="#adapter-design" title="Permalink to this heading">#</a></h3>
+<figure class="align-default" id="id6">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/t2i_adapter_06.png"><img alt="t2i_adapter_06" class="bg-primary mb-1" src="../../_images/t2i_adapter_06.png" style="width: 600px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 255 </span><span class="caption-text">Adapter Design</span><a class="headerlink" href="#id6" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>Conditional input 은 512x512 의 크기를 가지며, 이는 <em>pixel unshuffle downsampling</em> 을 통해 64x64 이미지로 변환이 되어 1개의 convolution layer 와 2개의 residual block 으로 구성된 <em>scale</em> 을 4번 통과하게 됩니다. 이때, 각 <em>scale</em> 을 거치고 나온 condition feature 를 <span class="math notranslate nohighlight">\(F_c^k\)</span> 라 정의합니다.</p>
+<p>최종적으로 multi-scale condition feature <span class="math notranslate nohighlight">\(F_c = \{F_c^1, F_c^2, F_c^3, F_c^4\}\)</span> 가 생성되고, 이는 Unet encoder 에서의 intermediate feature <span class="math notranslate nohighlight">\(F_{enc} = \{F_{enc}^1, F_{enc}^2, F_{enc}^3, F_{enc}^4\}\)</span> 와 더해지게 됩니다. 이때, dimension 크기는 동일하도록 설정했기 때문에 덧셈 연산하는데 문제 없습니다.</p>
+<figure class="align-default" id="id7">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/t2i_adapter_07.png"><img alt="t2i_adapter_07" class="bg-primary mb-1" src="../../_images/t2i_adapter_07.png" style="width: 500px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 256 </span><span class="caption-text">Multi-Scale Condition Feature</span><a class="headerlink" href="#id7" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>해당 implementation code 도 살펴보겠습니다.</p>
+<ul>
+<li><p><strong>T2I-Adapter module code</strong></p>
+<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="k">class</span><span class="w"> </span><span class="nc">FullAdapter</span><span class="p">(</span><span class="n">nn</span><span class="o">.</span><span class="n">Module</span><span class="p">):</span>
+    <span class="k">def</span><span class="w"> </span><span class="fm">__init__</span><span class="p">(</span>
+        <span class="bp">self</span><span class="p">,</span>
+        <span class="n">in_channels</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">3</span><span class="p">,</span>
+        <span class="n">channels</span><span class="p">:</span> <span class="n">List</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="p">[</span><span class="mi">320</span><span class="p">,</span> <span class="mi">640</span><span class="p">,</span> <span class="mi">1280</span><span class="p">,</span> <span class="mi">1280</span><span class="p">],</span>
+        <span class="n">num_res_blocks</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">2</span><span class="p">,</span>
+        <span class="n">downscale_factor</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">8</span><span class="p">,</span>
+    <span class="p">):</span>
+        <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span>
+
+        <span class="n">in_channels</span> <span class="o">=</span> <span class="n">in_channels</span> <span class="o">*</span> <span class="n">downscale_factor</span><span class="o">**</span><span class="mi">2</span>
+
+        <span class="bp">self</span><span class="o">.</span><span class="n">unshuffle</span> <span class="o">=</span> <span class="n">nn</span><span class="o">.</span><span class="n">PixelUnshuffle</span><span class="p">(</span><span class="n">downscale_factor</span><span class="p">)</span>
+        <span class="bp">self</span><span class="o">.</span><span class="n">conv_in</span> <span class="o">=</span> <span class="n">nn</span><span class="o">.</span><span class="n">Conv2d</span><span class="p">(</span><span class="n">in_channels</span><span class="p">,</span> <span class="n">channels</span><span class="p">[</span><span class="mi">0</span><span class="p">],</span> <span class="n">kernel_size</span><span class="o">=</span><span class="mi">3</span><span class="p">,</span> <span class="n">padding</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
+
+        <span class="bp">self</span><span class="o">.</span><span class="n">body</span> <span class="o">=</span> <span class="n">nn</span><span class="o">.</span><span class="n">ModuleList</span><span class="p">(</span>
+            <span class="p">[</span>
+                <span class="n">AdapterBlock</span><span class="p">(</span><span class="n">channels</span><span class="p">[</span><span class="mi">0</span><span class="p">],</span> <span class="n">channels</span><span class="p">[</span><span class="mi">0</span><span class="p">],</span> <span class="n">num_res_blocks</span><span class="p">),</span>
+                <span class="o">*</span><span class="p">[</span>
+                    <span class="n">AdapterBlock</span><span class="p">(</span><span class="n">channels</span><span class="p">[</span><span class="n">i</span> <span class="o">-</span> <span class="mi">1</span><span class="p">],</span> <span class="n">channels</span><span class="p">[</span><span class="n">i</span><span class="p">],</span> <span class="n">num_res_blocks</span><span class="p">,</span> <span class="n">down</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
+                    <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="nb">len</span><span class="p">(</span><span class="n">channels</span><span class="p">))</span>
+                <span class="p">],</span>
+            <span class="p">]</span>
+        <span class="p">)</span>
+
+        <span class="bp">self</span><span class="o">.</span><span class="n">total_downscale_factor</span> <span class="o">=</span> <span class="n">downscale_factor</span> <span class="o">*</span> <span class="mi">2</span> <span class="o">**</span> <span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">channels</span><span class="p">)</span> <span class="o">-</span> <span class="mi">1</span><span class="p">)</span>
+
+    <span class="k">def</span><span class="w"> </span><span class="nf">forward</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">x</span><span class="p">:</span> <span class="n">torch</span><span class="o">.</span><span class="n">Tensor</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">List</span><span class="p">[</span><span class="n">torch</span><span class="o">.</span><span class="n">Tensor</span><span class="p">]:</span>
+        <span class="n">x</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">unshuffle</span><span class="p">(</span><span class="n">x</span><span class="p">)</span>
+        <span class="n">x</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">conv_in</span><span class="p">(</span><span class="n">x</span><span class="p">)</span>
+
+        <span class="n">features</span> <span class="o">=</span> <span class="p">[]</span>
+
+        <span class="k">for</span> <span class="n">block</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">body</span><span class="p">:</span>
+            <span class="n">x</span> <span class="o">=</span> <span class="n">block</span><span class="p">(</span><span class="n">x</span><span class="p">)</span>
+            <span class="n">features</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">x</span><span class="p">)</span>
+
+        <span class="k">return</span> <span class="n">features</span>
+</pre></div>
+</div>
+<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="k">class</span><span class="w"> </span><span class="nc">AdapterBlock</span><span class="p">(</span><span class="n">nn</span><span class="o">.</span><span class="n">Module</span><span class="p">):</span>
+    <span class="k">def</span><span class="w"> </span><span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">in_channels</span><span class="p">,</span> <span class="n">out_channels</span><span class="p">,</span> <span class="n">num_res_blocks</span><span class="p">,</span> <span class="n">down</span><span class="o">=</span><span class="kc">False</span><span class="p">):</span>
+        <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span>
+
+        <span class="bp">self</span><span class="o">.</span><span class="n">downsample</span> <span class="o">=</span> <span class="kc">None</span>
+        <span class="k">if</span> <span class="n">down</span><span class="p">:</span>
+            <span class="bp">self</span><span class="o">.</span><span class="n">downsample</span> <span class="o">=</span> <span class="n">Downsample2D</span><span class="p">(</span><span class="n">in_channels</span><span class="p">)</span>
+
+        <span class="bp">self</span><span class="o">.</span><span class="n">in_conv</span> <span class="o">=</span> <span class="kc">None</span>
+        <span class="k">if</span> <span class="n">in_channels</span> <span class="o">!=</span> <span class="n">out_channels</span><span class="p">:</span>
+            <span class="bp">self</span><span class="o">.</span><span class="n">in_conv</span> <span class="o">=</span> <span class="n">nn</span><span class="o">.</span><span class="n">Conv2d</span><span class="p">(</span><span class="n">in_channels</span><span class="p">,</span> <span class="n">out_channels</span><span class="p">,</span> <span class="n">kernel_size</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
+
+        <span class="bp">self</span><span class="o">.</span><span class="n">resnets</span> <span class="o">=</span> <span class="n">nn</span><span class="o">.</span><span class="n">Sequential</span><span class="p">(</span>
+            <span class="o">*</span><span class="p">[</span><span class="n">AdapterResnetBlock</span><span class="p">(</span><span class="n">out_channels</span><span class="p">)</span> <span class="k">for</span> <span class="n">_</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">num_res_blocks</span><span class="p">)],</span>
+        <span class="p">)</span>
+
+    <span class="k">def</span><span class="w"> </span><span class="nf">forward</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">x</span><span class="p">):</span>
+        <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">downsample</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
+            <span class="n">x</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">downsample</span><span class="p">(</span><span class="n">x</span><span class="p">)</span>
+
+        <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">in_conv</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
+            <span class="n">x</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">in_conv</span><span class="p">(</span><span class="n">x</span><span class="p">)</span>
+
+        <span class="n">x</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">resnets</span><span class="p">(</span><span class="n">x</span><span class="p">)</span>
+
+        <span class="k">return</span> <span class="n">x</span>
+
+<span class="k">class</span><span class="w"> </span><span class="nc">AdapterResnetBlock</span><span class="p">(</span><span class="n">nn</span><span class="o">.</span><span class="n">Module</span><span class="p">):</span>
+    <span class="k">def</span><span class="w"> </span><span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">channels</span><span class="p">):</span>
+        <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span>
+        <span class="bp">self</span><span class="o">.</span><span class="n">block1</span> <span class="o">=</span> <span class="n">nn</span><span class="o">.</span><span class="n">Conv2d</span><span class="p">(</span><span class="n">channels</span><span class="p">,</span> <span class="n">channels</span><span class="p">,</span> <span class="n">kernel_size</span><span class="o">=</span><span class="mi">3</span><span class="p">,</span> <span class="n">padding</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
+        <span class="bp">self</span><span class="o">.</span><span class="n">act</span> <span class="o">=</span> <span class="n">nn</span><span class="o">.</span><span class="n">ReLU</span><span class="p">()</span>
+        <span class="bp">self</span><span class="o">.</span><span class="n">block2</span> <span class="o">=</span> <span class="n">nn</span><span class="o">.</span><span class="n">Conv2d</span><span class="p">(</span><span class="n">channels</span><span class="p">,</span> <span class="n">channels</span><span class="p">,</span> <span class="n">kernel_size</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
+
+    <span class="k">def</span><span class="w"> </span><span class="nf">forward</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">x</span><span class="p">):</span>
+        <span class="n">h</span> <span class="o">=</span> <span class="n">x</span>
+        <span class="n">h</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">block1</span><span class="p">(</span><span class="n">h</span><span class="p">)</span>
+        <span class="n">h</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">act</span><span class="p">(</span><span class="n">h</span><span class="p">)</span>
+        <span class="n">h</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">block2</span><span class="p">(</span><span class="n">h</span><span class="p">)</span>
+
+        <span class="k">return</span> <span class="n">h</span> <span class="o">+</span> <span class="n">x</span>
+</pre></div>
+</div>
+</li>
+<li><p><strong>SD + T2I-Adapter implementation code</strong></p>
+<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="c1"># 7. Denoising loop</span>
+<span class="n">adapter_state</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">adapter</span><span class="p">(</span><span class="n">adapter_input</span><span class="p">)</span>
+<span class="k">for</span> <span class="n">k</span><span class="p">,</span> <span class="n">v</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">adapter_state</span><span class="p">):</span>
+    <span class="n">adapter_state</span><span class="p">[</span><span class="n">k</span><span class="p">]</span> <span class="o">=</span> <span class="n">v</span> <span class="o">*</span> <span class="n">adapter_conditioning_scale</span>
+<span class="k">if</span> <span class="n">num_images_per_prompt</span> <span class="o">&gt;</span> <span class="mi">1</span><span class="p">:</span>
+    <span class="k">for</span> <span class="n">k</span><span class="p">,</span> <span class="n">v</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">adapter_state</span><span class="p">):</span>
+        <span class="n">adapter_state</span><span class="p">[</span><span class="n">k</span><span class="p">]</span> <span class="o">=</span> <span class="n">v</span><span class="o">.</span><span class="n">repeat</span><span class="p">(</span><span class="n">num_images_per_prompt</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">)</span>
+<span class="k">if</span> <span class="n">do_classifier_free_guidance</span><span class="p">:</span>
+    <span class="k">for</span> <span class="n">k</span><span class="p">,</span> <span class="n">v</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">adapter_state</span><span class="p">):</span>
+        <span class="n">adapter_state</span><span class="p">[</span><span class="n">k</span><span class="p">]</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">cat</span><span class="p">([</span><span class="n">v</span><span class="p">]</span> <span class="o">*</span> <span class="mi">2</span><span class="p">,</span> <span class="n">dim</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
+
+<span class="n">num_warmup_steps</span> <span class="o">=</span> <span class="nb">len</span><span class="p">(</span><span class="n">timesteps</span><span class="p">)</span> <span class="o">-</span> <span class="n">num_inference_steps</span> <span class="o">*</span> <span class="bp">self</span><span class="o">.</span><span class="n">scheduler</span><span class="o">.</span><span class="n">order</span>
+<span class="k">with</span> <span class="bp">self</span><span class="o">.</span><span class="n">progress_bar</span><span class="p">(</span><span class="n">total</span><span class="o">=</span><span class="n">num_inference_steps</span><span class="p">)</span> <span class="k">as</span> <span class="n">progress_bar</span><span class="p">:</span>
+    <span class="k">for</span> <span class="n">i</span><span class="p">,</span> <span class="n">t</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">timesteps</span><span class="p">):</span>
+        <span class="c1"># expand the latents if we are doing classifier free guidance</span>
+        <span class="n">latent_model_input</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">cat</span><span class="p">([</span><span class="n">latents</span><span class="p">]</span> <span class="o">*</span> <span class="mi">2</span><span class="p">)</span> <span class="k">if</span> <span class="n">do_classifier_free_guidance</span> <span class="k">else</span> <span class="n">latents</span>
+        <span class="n">latent_model_input</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">scheduler</span><span class="o">.</span><span class="n">scale_model_input</span><span class="p">(</span><span class="n">latent_model_input</span><span class="p">,</span> <span class="n">t</span><span class="p">)</span>
+
+        <span class="c1"># predict the noise residual</span>
+        <span class="n">noise_pred</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">unet</span><span class="p">(</span>
+            <span class="n">latent_model_input</span><span class="p">,</span>
+            <span class="n">t</span><span class="p">,</span>
+            <span class="n">encoder_hidden_states</span><span class="o">=</span><span class="n">prompt_embeds</span><span class="p">,</span>
+            <span class="n">cross_attention_kwargs</span><span class="o">=</span><span class="n">cross_attention_kwargs</span><span class="p">,</span>
+            <span class="n">down_block_additional_residuals</span><span class="o">=</span><span class="p">[</span><span class="n">state</span><span class="o">.</span><span class="n">clone</span><span class="p">()</span> <span class="k">for</span> <span class="n">state</span> <span class="ow">in</span> <span class="n">adapter_state</span><span class="p">],</span>
+        <span class="p">)</span><span class="o">.</span><span class="n">sample</span>
+
+        <span class="c1"># perform guidance</span>
+        <span class="k">if</span> <span class="n">do_classifier_free_guidance</span><span class="p">:</span>
+            <span class="n">noise_pred_uncond</span><span class="p">,</span> <span class="n">noise_pred_text</span> <span class="o">=</span> <span class="n">noise_pred</span><span class="o">.</span><span class="n">chunk</span><span class="p">(</span><span class="mi">2</span><span class="p">)</span>
+            <span class="n">noise_pred</span> <span class="o">=</span> <span class="n">noise_pred_uncond</span> <span class="o">+</span> <span class="n">guidance_scale</span> <span class="o">*</span> <span class="p">(</span><span class="n">noise_pred_text</span> <span class="o">-</span> <span class="n">noise_pred_uncond</span><span class="p">)</span>
+
+        <span class="c1"># compute the previous noisy sample x_t -&gt; x_t-1</span>
+        <span class="n">latents</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">scheduler</span><span class="o">.</span><span class="n">step</span><span class="p">(</span><span class="n">noise_pred</span><span class="p">,</span> <span class="n">t</span><span class="p">,</span> <span class="n">latents</span><span class="p">,</span> <span class="o">**</span><span class="n">extra_step_kwargs</span><span class="p">)</span><span class="o">.</span><span class="n">prev_sample</span>
+</pre></div>
+</div>
+</li>
+</ul>
+<p>Adapter 종류로는 크게 structure 에 대한 conditioning 과 color 에 대한 conditioning 으로 분류할 수 있습니다. Structure controlling 으로는 대표적으로 sketch, depth map, semantic segmentation map, keypose 등이 있습니다. Color map 은 이미지를 우선적으로 <em>high bicubic downsampling</em> 을 통해 semantic 및 structural 한 정보를 제외시키고, <em>nearest upsampling</em> 기법으로 다시 원본 이미지 크기로 복원하는 작업을 통해 생성합니다.</p>
+<p>앞써 설명한 부분처럼 추가 학습 없이 여러 adapter 로 conditioning 할 수도 있습니다. Multi-adapter 로 controlling 할 시, 다음과 같이 각 adapter 로부터 나온 condition feature 에 weight <span class="math notranslate nohighlight">\(w_k\)</span> 를 부여해 최종 condition feature 를 정의하게 됩니다.</p>
+<figure class="align-default" id="id8">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/t2i_adapter_08.png"><img alt="t2i_adapter_08" class="bg-primary mb-1" src="../../_images/t2i_adapter_08.png" style="width: 350px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 257 </span><span class="caption-text">Multi-Adapter Conditioning</span><a class="headerlink" href="#id8" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</section>
+<section id="model-optimization">
+<h3>3.4. Model Optimization<a class="headerlink" href="#model-optimization" title="Permalink to this heading">#</a></h3>
+<p>모델 학습 시, SD 파라미터는 고정시킨 상태로 T2I-Adapter 파라미터만 학습합니다. 이때, T2-Adapter 손실함수는 SD 학습 시와 유사하게 다음과 같이 정의합니다.</p>
+<div class="math notranslate nohighlight">
+\[
+L_{AD} = \mathbb{E}_{Z_{0}, t, F_c, \epsilon \sim N(0,I)}[||\epsilon-\epsilon_{\theta}(Z_t, t, \tau(y), F_c)||_2^2]
+\]</div>
+<p>where <span class="math notranslate nohighlight">\(t \sim U(0,T)\)</span></p>
+<p><strong>Non-uniform time step sampling during training</strong></p>
+<p>Diffusion 모델 학습 시와 동일하게, time embedding 을 adapter 에 input 으로 넣으면서 성능 개선 효과가 있는 것을 확인했지만 매 time step <span class="math notranslate nohighlight">\(t\)</span> 마다 <span class="math notranslate nohighlight">\(F_c\)</span> 를 conditioning 하는 것은 computationally expensive 합니다.</p>
+<p>따라서, 논문에서는 DDIM inference sampling 을 크게 3가지 stage (i.e., beginning, middle, late stage) 로 분류하는 방법을 소개합니다. 실험해본 결과, middle 그리고 late stage 에 적용하는 것보다 beginning stage 에서 guidance 를 주는 효과가 더 크다고 합니다.</p>
+<figure class="align-default" id="id9">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/t2i_adapter_09.png"><img alt="t2i_adapter_09" class="bg-primary mb-1" src="../../_images/t2i_adapter_09.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 258 </span><span class="caption-text">DDIM Inference Sampling Stages</span><a class="headerlink" href="#id9" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>따라서, 최대한 time step <span class="math notranslate nohighlight">\(t\)</span> 가 early sampling stage 에 포함되도록 다음 수식처럼 non-uniformly 하게 sampling 작업을 진행했고, 이에 대한 결과도 공유합니다.</p>
+<div class="math notranslate nohighlight">
+\[
+t = (1-(t/T)^3) \times T, t \in U(0,T)
+\]</div>
+<figure class="align-default" id="id10">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/t2i_adapter_10.png"><img alt="t2i_adapter_10" class="bg-primary mb-1" src="../../_images/t2i_adapter_10.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 259 </span><span class="caption-text">Effect of Cubic Sampling</span><a class="headerlink" href="#id10" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</section>
+</section>
+<section id="experiment">
+<h2>Experiment<a class="headerlink" href="#experiment" title="Permalink to this heading">#</a></h2>
+<section id="implementation-details">
+<h3>4.1. Implementation Details<a class="headerlink" href="#implementation-details" title="Permalink to this heading">#</a></h3>
+<p>T2I-Adapter 학습 시, hyperparameter 및 데이터셋 구축 상세사항은 다음과 같습니다.</p>
+<ul class="simple">
+<li><p>Hyperparameters</p>
+<ul>
+<li><p>10 epochs</p></li>
+<li><p>Batch size = 8</p></li>
+<li><p>Learning rate = <span class="math notranslate nohighlight">\(1 \times 10^{-5}\)</span></p></li>
+<li><p>Adam optimizer</p></li>
+<li><p>4X NVIDIA Tesla 32G-V100 GPUs (3 days)</p></li>
+</ul>
+</li>
+<li><p>실험별 데이터셋 구축</p>
+<ul>
+<li><p><em>Sketch Map</em></p>
+<ul>
+<li><p>COCO17 데이터셋 - 164K images</p></li>
+<li><p>PiDiNet 를 활용해 sketch map 생성</p></li>
+</ul>
+</li>
+<li><p><em>Semantic segmentation map</em></p>
+<ul>
+<li><p>COCO-Stuff 데이터셋 - 164K images</p></li>
+</ul>
+</li>
+<li><p><em>Keypoints &amp; Color &amp; Depth maps</em></p>
+<ul>
+<li><p>LAION-AESTHETICS 데이터셋로부터 600K images-text pairs 추출</p></li>
+<li><p>MM-Pose, MiDaS 모델로 각각 Keypoint, Depth map 생성</p></li>
+</ul>
+</li>
+</ul>
+</li>
+</ul>
+</section>
+<section id="comparison">
+<h3>4.2. Comparison<a class="headerlink" href="#comparison" title="Permalink to this heading">#</a></h3>
+<p>기존 SOTA 모델들과 정량적인 수치로 비교하는데 FID 와 CLIP Score 를 사용하였고, 하단 사진처럼 기존 GAN-based 그리고 diffusion-based method 모델들보다 성능이 좋습니다.</p>
+<figure class="align-default" id="id11">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/t2i_adapter_11.png"><img alt="t2i_adapter_11" class="bg-primary mb-1" src="../../_images/t2i_adapter_11.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 260 </span><span class="caption-text">Qualitative Comparison</span><a class="headerlink" href="#id11" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<figure class="align-default" id="id12">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/t2i_adapter_12.png"><img alt="t2i_adapter_12" class="bg-primary mb-1" src="../../_images/t2i_adapter_12.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 261 </span><span class="caption-text">Quantitative Comparisoin</span><a class="headerlink" href="#id12" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</section>
+<section id="applications">
+<h3>4.3. Applications<a class="headerlink" href="#applications" title="Permalink to this heading">#</a></h3>
+<p>해당 예시들은 다양한 single adapter controlling 에 대한 결과들을 보여줍니다. 특히 인상적인 부분은 sketch 로 controlling 시, sketch 가 정확하지 않아도 이미지 생성에 robust 한 성능을 보여주는 것을 확인할 수 있습니다.</p>
+<figure class="align-default" id="id13">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/t2i_adapter_13.png"><img alt="t2i_adapter_13" class="bg-primary mb-1" src="../../_images/t2i_adapter_13.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 262 </span><span class="caption-text">Visualization of Single-Adapter Controlling</span><a class="headerlink" href="#id13" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>또한, image editing 도 가능합니다. SD inpainting mode 로 특정 지역을 masking 한 후, T2I-Adapter 를 통해 image editing 을 한 예시 사진입니다. Adapter 없이, SD inpainting 만으로는 성능이 좋지 못하다고 합니다.</p>
+<figure class="align-default" id="id14">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/t2i_adapter_14.png"><img alt="t2i_adapter_14" class="bg-primary mb-1" src="../../_images/t2i_adapter_14.png" style="width: 600px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 263 </span><span class="caption-text">Image Editing with T2I-Adapter</span><a class="headerlink" href="#id14" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>아래 예시는 multiple adapter 를 적용한 것로 위에서부터 아래로 각각 depth + keypose 그리고 sketch + color map 을 conditioning 한 결과입니다.</p>
+<figure class="align-default" id="id15">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/t2i_adapter_15.png"><img alt="t2i_adapter_15" class="bg-primary mb-1" src="../../_images/t2i_adapter_15.png" style="width: 600px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 264 </span><span class="caption-text">Composable Controlling</span><a class="headerlink" href="#id15" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>마지막으로, 장점들 중 하나로 명시되었던 generalization ability 를 보여준 사례입니다. 학습 완료한 Adapter 를 동일한 구조를 가진 T2I 모델에 적용 가능한 것을 확인할 수 있습니다.</p>
+<figure class="align-default" id="id16">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/t2i_adapter_16.png"><img alt="t2i_adapter_16" class="bg-primary mb-1" src="../../_images/t2i_adapter_16.png" style="width: 600px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 265 </span><span class="caption-text">Generalizable Controlling</span><a class="headerlink" href="#id16" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</section>
+<section id="ablation-study">
+<h3>4.4. Ablation Study<a class="headerlink" href="#ablation-study" title="Permalink to this heading">#</a></h3>
+<p>논문에서는 guidance mode, 그리고 complexity 에 대한 ablation study 를 진행했습니다.</p>
+<p>SD 모델은 encoder 그리고 decoder 에 각각 4개의 scale (i.e., 64×64, 32×32, 16×16, 8×8) 을 가지고 있는데, 하단 table 처럼 각각 다른 scale 에 adapter guidance 를 적용하면서 FID 성능을 비교했습니다. Scale Number 가 4보다 작을 경우, large scale 에 순차적으로 guidance 를 적용했습니다. 그 결과, Unet encoder 에만 4 scales 모두 guidance 를 적용하는 것이 성능이 제일 좋다고 합니다.</p>
+<figure class="align-default" id="id17">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/t2i_adapter_17.png"><img alt="t2i_adapter_17" class="bg-primary mb-1" src="../../_images/t2i_adapter_17.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 266 </span><span class="caption-text">Guidance Mode</span><a class="headerlink" href="#id17" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>또한, condition map 는 비교적 sparse 하기 때문에 더 경량화된 adapter 를 사용해도 성능이 좋은 부분을 하단 예시처럼 확인할 수 있었다고 합니다. 더 자세하게는, adapter block 의 intermediate channel 숫자를 바꿔가며 adapter-small, adapter-tiny 모델을 각각 x4, x8 compression 작업을 진행했습니다.</p>
+<figure class="align-default" id="id18">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/t2i_adapter_18.png"><img alt="t2i_adapter_18" class="bg-primary mb-1" src="../../_images/t2i_adapter_18.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 267 </span><span class="caption-text">Complexity Ablation</span><a class="headerlink" href="#id18" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</section>
+</section>
+</section>
+
+    <script type="text/x-thebe-config">
+    {
+        requestKernel: true,
+        binderOptions: {
+            repo: "binder-examples/jupyter-stacks-datascience",
+            ref: "master",
+        },
+        codeMirrorConfig: {
+            theme: "abcdef",
+            mode: "python"
+        },
+        kernelOptions: {
+            name: "python3",
+            path: "./docs\review"
+        },
+        predefinedOutput: true
+    }
+    </script>
+    <script>kernelName = 'python3'</script>
+
+                </article>
+              
+
+              
+              
+              
+              
+                <footer class="prev-next-footer d-print-none">
+                  
+<div class="prev-next-area">
+    <a class="left-prev"
+       href="SDXL.html"
+       title="previous page">
+      <i class="fa-solid fa-angle-left"></i>
+      <div class="prev-next-info">
+        <p class="prev-next-subtitle">previous</p>
+        <p class="prev-next-title">SDXL</p>
+      </div>
+    </a>
+    <a class="right-next"
+       href="IP_Adapter.html"
+       title="next page">
+      <div class="prev-next-info">
+        <p class="prev-next-subtitle">next</p>
+        <p class="prev-next-title">IP-Adapter</p>
+      </div>
+      <i class="fa-solid fa-angle-right"></i>
+    </a>
+</div>
+                </footer>
+              
+            </div>
+            
+            
+              
+                <div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
+
+
+  <div class="sidebar-secondary-item">
+  <div class="page-toc tocsection onthispage">
+    <i class="fa-solid fa-list"></i> Contents
+  </div>
+  <nav class="bd-toc-nav page-toc">
+    <ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">Introduction</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#method">Method</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#preliminary-stable-diffusion">3.1. Preliminary: Stable Diffusion</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#overview-of-t2i-adapter">3.2. Overview of T2I-Adapter</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#adapter-design">3.3. Adapter Design</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#model-optimization">3.4. Model Optimization</a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#experiment">Experiment</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#implementation-details">4.1. Implementation Details</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#comparison">4.2. Comparison</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#applications">4.3. Applications</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#ablation-study">4.4. Ablation Study</a></li>
+</ul>
+</li>
+</ul>
+  </nav></div>
+
+</div></div>
+              
+            
+          </div>
+          <footer class="bd-footer-content">
+            
+<div class="bd-footer-content__inner container">
+  
+  <div class="footer-item">
+    
+<p class="component-author">
+By PseudoLab
+</p>
+
+  </div>
+  
+  <div class="footer-item">
+    
+
+  <p class="copyright">
+    
+      © Copyright 2022.
+      <br/>
+    
+  </p>
+
+  </div>
+  
+  <div class="footer-item">
+    
+  </div>
+  
+  <div class="footer-item">
+    
+  </div>
+  
+</div>
+          </footer>
+        
+
+      </main>
+    </div>
+  </div>
+  
+  <!-- Scripts loaded after <body> so the DOM is not blocked -->
+  <script src="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b"></script>
+<script src="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b"></script>
+
+  <footer class="bd-footer">
+  </footer>
+  </body>
 </html>
\ No newline at end of file
diff --git a/docs/review/vae.html b/docs/review/vae.html
old mode 100644
new mode 100755
index 8699df68..99fff034
--- a/docs/review/vae.html
+++ b/docs/review/vae.html
@@ -1,830 +1,850 @@
-
-
-<!DOCTYPE html>
-
-
-<html lang="en" data-content_root="" >
-
-  <head>
-    <meta charset="utf-8" />
-    <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.18.1: http://docutils.sourceforge.net/" />
-
-    <title>VAE &#8212; Text-to-Image Generation-feat-Diffusion</title>
-  
-  
-  
-  <script data-cfasync="false">
-    document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
-    document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
-  </script>
-  
-  <!-- Loaded before other Sphinx assets -->
-  <link href="../../_static/styles/theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-<link href="../../_static/styles/bootstrap.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-<link href="../../_static/styles/pydata-sphinx-theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-
-  
-  <link href="../../_static/vendor/fontawesome/6.5.2/css/all.min.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-  <link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.woff2" />
-<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.woff2" />
-<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.woff2" />
-
-    <link rel="stylesheet" type="text/css" href="../../_static/pygments.css" />
-    <link rel="stylesheet" href="../../_static/styles/sphinx-book-theme.css?digest=14f4ca6b54d191a8c7657f6c759bf11a5fb86285" type="text/css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/togglebutton.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/mystnb.4510f1fc1dee50b3e5859aac5469c37c29e427902b24a333a5f9fcb2f0b3ac41.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/sphinx-thebe.css" />
-    <link rel="stylesheet" type="text/css" href="../../_static/design-style.4045f2051d55cab465a707391d5b2007.min.css" />
-  
-  <!-- Pre-loaded scripts that we'll load fully later -->
-  <link rel="preload" as="script" href="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b" />
-<link rel="preload" as="script" href="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b" />
-  <script src="../../_static/vendor/fontawesome/6.5.2/js/all.min.js?digest=dfe6caa3a7d634c4db9b"></script>
-
-    <script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
-    <script src="../../_static/jquery.js"></script>
-    <script src="../../_static/underscore.js"></script>
-    <script src="../../_static/_sphinx_javascript_frameworks_compat.js"></script>
-    <script src="../../_static/doctools.js"></script>
-    <script src="../../_static/clipboard.min.js"></script>
-    <script src="../../_static/copybutton.js"></script>
-    <script src="../../_static/scripts/sphinx-book-theme.js?digest=5a5c038af52cf7bc1a1ec88eea08e6366ee68824"></script>
-    <script>let toggleHintShow = 'Click to show';</script>
-    <script>let toggleHintHide = 'Click to hide';</script>
-    <script>let toggleOpenOnPrint = 'true';</script>
-    <script src="../../_static/togglebutton.js"></script>
-    <script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
-    <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
-    <script src="../../_static/design-tabs.js"></script>
-    <script>const THEBE_JS_URL = "https://unpkg.com/thebe@0.8.2/lib/index.js"
-const thebe_selector = ".thebe,.cell"
-const thebe_selector_input = "pre"
-const thebe_selector_output = ".output, .cell_output"
-</script>
-    <script async="async" src="../../_static/sphinx-thebe.js"></script>
-    <script>window.MathJax = {"options": {"processHtmlClass": "tex2jax_process|mathjax_process|math|output_area"}}</script>
-    <script defer="defer" src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
-    <script>DOCUMENTATION_OPTIONS.pagename = 'docs/review/vae';</script>
-    <link rel="shortcut icon" href="../../_static/PseudoLab_logo.png"/>
-    <link rel="index" title="Index" href="../../genindex.html" />
-    <link rel="search" title="Search" href="../../search.html" />
-    <link rel="next" title="GAN" href="gan.html" />
-    <link rel="prev" title="Welcome to PseudoDiffusers!!" href="../../intro.html" />
-  <meta name="viewport" content="width=device-width, initial-scale=1"/>
-  <meta name="docsearch:language" content="en"/>
-  </head>
-  
-  
-  <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
-
-  
-  
-  <div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
-  
-  <div id="pst-scroll-pixel-helper"></div>
-  
-  <button type="button" class="btn rounded-pill" id="pst-back-to-top">
-    <i class="fa-solid fa-arrow-up"></i>Back to top</button>
-
-  
-  <input type="checkbox"
-          class="sidebar-toggle"
-          id="pst-primary-sidebar-checkbox"/>
-  <label class="overlay overlay-primary" for="pst-primary-sidebar-checkbox"></label>
-  
-  <input type="checkbox"
-          class="sidebar-toggle"
-          id="pst-secondary-sidebar-checkbox"/>
-  <label class="overlay overlay-secondary" for="pst-secondary-sidebar-checkbox"></label>
-  
-  <div class="search-button__wrapper">
-    <div class="search-button__overlay"></div>
-    <div class="search-button__search-container">
-<form class="bd-search d-flex align-items-center"
-      action="../../search.html"
-      method="get">
-  <i class="fa-solid fa-magnifying-glass"></i>
-  <input type="search"
-         class="form-control"
-         name="q"
-         id="search-input"
-         placeholder="Search this book..."
-         aria-label="Search this book..."
-         autocomplete="off"
-         autocorrect="off"
-         autocapitalize="off"
-         spellcheck="false"/>
-  <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
-</form></div>
-  </div>
-
-  <div class="pst-async-banner-revealer d-none">
-  <aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
-</div>
-
-  
-    <header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
-    </header>
-  
-
-  <div class="bd-container">
-    <div class="bd-container__inner bd-page-width">
-      
-      
-      
-      <div class="bd-sidebar-primary bd-sidebar">
-        
-
-  
-  <div class="sidebar-header-items sidebar-primary__section">
-    
-    
-    
-    
-  </div>
-  
-    <div class="sidebar-primary-items__start sidebar-primary__section">
-        <div class="sidebar-primary-item">
-
-  
-    
-  
-
-<a class="navbar-brand logo" href="../../intro.html">
-  
-  
-  
-  
-  
-    
-    
-      
-    
-    
-    <img src="../../_static/PseudoLab_logo.png" class="logo__image only-light" alt="Text-to-Image Generation-feat-Diffusion - Home"/>
-    <script>document.write(`<img src="../../_static/PseudoLab_logo.png" class="logo__image only-dark" alt="Text-to-Image Generation-feat-Diffusion - Home"/>`);</script>
-  
-  
-</a></div>
-        <div class="sidebar-primary-item"><nav class="bd-links" id="bd-docs-nav" aria-label="Main">
-    <div class="bd-toc-item navbar-nav active">
-        
-        <ul class="nav bd-sidenav bd-sidenav__home-link">
-            <li class="toctree-l1">
-                <a class="reference internal" href="../../intro.html">
-                    Welcome to PseudoDiffusers!!
-                </a>
-            </li>
-        </ul>
-        <p aria-level="2" class="caption" role="heading"><span class="caption-text">Preliminary Works</span></p>
-<ul class="current nav bd-sidenav">
-<li class="toctree-l1 current active"><a class="current reference internal" href="#">VAE</a></li>
-<li class="toctree-l1"><a class="reference internal" href="gan.html">GAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DDPM.html">DDPM</a></li>
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="DDIM.html">DDIM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="A_Study_on_the_Evaluation_of_Generative_Models.html">A Study on the Evaluation of Generative Models</a></li>
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Image Generation</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="cycleGAN.html">CycleGAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="StyleGAN.html">StyleGAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="diffusion_beats_GANs.html">Diffusion Models Beat GANs on Image Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="dalle.html">DALL-E</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DALLE2.html">DALL-E 2</a></li>
-<li class="toctree-l1"><a class="reference internal" href="dreambooth.html">DreamBooth</a></li>
-<li class="toctree-l1"><a class="reference internal" href="ControlNet.html">ControlNet</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Latent_Diffusion_Model.html">Introduction</a></li>
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="Textual_Inversion.html">Textual Inversion</a></li>
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="CustomDiffusion.html">Custom Diffusion</a></li>
-<li class="toctree-l1"><a class="reference internal" href="LoRA.html">LoRA</a></li>
-
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="I-DDPM.html">I-DDPM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="StyO.html">StyO</a></li>
-<li class="toctree-l1"><a class="reference internal" href="imagen.html">Imagen</a></li>
-<li class="toctree-l1"><a class="reference internal" href="imagen_editor.html">Imagen Editor</a></li>
-<li class="toctree-l1"><a class="reference internal" href="SDEdit.html">SDEdit</a></li>
-<li class="toctree-l1"><a class="reference internal" href="SDXL.html">SDXL</a></li>
-<li class="toctree-l1"><a class="reference internal" href="t2i_adapter.html">T2I-Adapter</a></li>
-<li class="toctree-l1"><a class="reference internal" href="IP_Adapter.html">IP-Adapter</a></li>
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="HyperDreamBooth.html">HyperDreamBooth</a></li>
-<li class="toctree-l1"><a class="reference internal" href="CM3leon.html">CM3leon</a></li>
-
-<li class="toctree-l1"><a class="reference internal" href="Synthetic_Data_from_Diffusion_Models_Improves_ImageNet_Classification.html">Synthetic Data from Diffusion Models Improves ImageNet Classification</a></li>
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="GLIDE.html">GLIDE</a></li>
-<li class="toctree-l1"><a class="reference internal" href="BBDM.html">BBDM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Your_Diffusion_Model_is_Secretly_a_Zero_Shot_Classifier.html">Your Diffusion Model is Secretly a Zero-Shot Classifier</a></li>
-<li class="toctree-l1"><a class="reference internal" href="progressive_distillation.html">Progressive Distillation for Fast Sampling of Diffusion Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="ConceptLab.html">ConceptLab</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Diffusion_models_already_have_a_Semantic_Latent_Space.html">Diffusion Models already have a Semantic Latent Space</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Muse.html">Muse</a></li>
-
-
-<li class="toctree-l1"><a class="reference internal" href="GIGAGAN.html">Scaling up GANs for Text-to-Image Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="consistency_models.html">Consistency Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="latent_consistency_models.html">Latent Consistency Models</a></li>
-
-<li class="toctree-l1"><a class="reference internal" href="LLM_grounded_Diffusion.html">LLM Grounded Diffusion</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DiT.html">DiT</a></li>
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="one-step-image-translation.html">One-Step Image Translation with Text-to-Image Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="LCM-LoRA.html">LCM-LoRA: A Universal Stable-Diffusion Acceleration Module</a></li>
-
-
-<li class="toctree-l1"><a class="reference internal" href="MimicBrush.html">MimicBrush: Zero-shot Image Editing with Reference Imitation</a></li>
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Video Generation</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="Make_A_Video.html">Make A Video</a></li>
-<li class="toctree-l1"><a class="reference internal" href="VideoLDM.html">VideoLDM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="AnimateDiff.html">AnimateDiff</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Animate_Anyone.html">Animate Anyone</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreaMoving.html">DreaMoving</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreamPose.html">DreamPose: Fashion Image-to-Video Synthesis via Stable Diffusion</a></li>
-
-
-
-
-
-
-
-
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">3D Generation</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="NeRF.html">NeRF : Representing Scenes as Neural Radiance Fields for View Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="3DGS.html">3D Gaussian Splatting for Real-Time Radiance Field Rendering</a></li>
-<li class="toctree-l1"><a class="reference internal" href="Point_E.html">Point-E: A System for Generating 3D Point Clouds from Complex Prompts (Arxiv 2022)</a></li>
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="Shap-E.html">Shap-E</a></li>
-
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="DreamFusion.html"><strong>DreamFusion</strong></a></li>
-<li class="toctree-l1"><a class="reference internal" href="magic-3d.html">Magic3D</a></li>
-<li class="toctree-l1"><a class="reference internal" href="DreamBooth3D.html">Dream Booth 3D</a></li>
-
-
-
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Experiments</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="../experiments/js_exp.html">Synthetic Data with Stable Diffusion for Foliar Disease Classification</a></li>
-<li class="toctree-l1"><a class="reference internal" href="../experiments/swjo_exp.html">Training DreamBooth on Naver Webtoon Face Dataset</a></li>
-</ul>
-
-    </div>
-</nav></div>
-    </div>
-  
-  
-  <div class="sidebar-primary-items__end sidebar-primary__section">
-  </div>
-  
-  <div id="rtd-footer-container"></div>
-
-
-      </div>
-      
-      <main id="main-content" class="bd-main" role="main">
-        
-        
-
-<div class="sbt-scroll-pixel-helper"></div>
-
-          <div class="bd-content">
-            <div class="bd-article-container">
-              
-              <div class="bd-header-article d-print-none">
-<div class="header-article-items header-article__inner">
-  
-    <div class="header-article-items__start">
-      
-        <div class="header-article-item"><label class="sidebar-toggle primary-toggle btn btn-sm" for="__primary" title="Toggle primary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
-  <span class="fa-solid fa-bars"></span>
-</label></div>
-      
-    </div>
-  
-  
-    <div class="header-article-items__end">
-      
-        <div class="header-article-item">
-
-<div class="article-header-buttons">
-
-
-
-
-
-<div class="dropdown dropdown-source-buttons">
-  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Source repositories">
-    <i class="fab fa-github"></i>
-  </button>
-  <ul class="dropdown-menu">
-      
-      
-      
-      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion" target="_blank"
-   class="btn btn-sm btn-source-repository-button dropdown-item"
-   title="Source repository"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fab fa-github"></i>
-  </span>
-<span class="btn__text-container">Repository</span>
-</a>
-</li>
-      
-      
-      
-      
-      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion/issues/new?title=Issue%20on%20page%20%2Fdocs/review/vae.html&body=Your%20issue%20content%20here." target="_blank"
-   class="btn btn-sm btn-source-issues-button dropdown-item"
-   title="Open an issue"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-lightbulb"></i>
-  </span>
-<span class="btn__text-container">Open issue</span>
-</a>
-</li>
-      
-  </ul>
-</div>
-
-
-
-
-
-
-<div class="dropdown dropdown-download-buttons">
-  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Download this page">
-    <i class="fas fa-download"></i>
-  </button>
-  <ul class="dropdown-menu">
-      
-      
-      
-      <li><a href="../../_sources/docs/review/vae.md" target="_blank"
-   class="btn btn-sm btn-download-source-button dropdown-item"
-   title="Download source file"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-file"></i>
-  </span>
-<span class="btn__text-container">.md</span>
-</a>
-</li>
-      
-      
-      
-      
-      <li>
-<button onclick="window.print()"
-  class="btn btn-sm btn-download-pdf-button dropdown-item"
-  title="Print to PDF"
-  data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-file-pdf"></i>
-  </span>
-<span class="btn__text-container">.pdf</span>
-</button>
-</li>
-      
-  </ul>
-</div>
-
-
-
-
-<button onclick="toggleFullScreen()"
-  class="btn btn-sm btn-fullscreen-button"
-  title="Fullscreen mode"
-  data-bs-placement="bottom" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-expand"></i>
-  </span>
-
-</button>
-
-
-
-<script>
-document.write(`
-  <button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light"></i>
-    <i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark"></i>
-    <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"></i>
-  </button>
-`);
-</script>
-
-
-<script>
-document.write(`
-  <button class="btn btn-sm pst-navbar-icon search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <i class="fa-solid fa-magnifying-glass fa-lg"></i>
-  </button>
-`);
-</script>
-<label class="sidebar-toggle secondary-toggle btn btn-sm" for="__secondary"title="Toggle secondary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <span class="fa-solid fa-list"></span>
-</label>
-</div></div>
-      
-    </div>
-  
-</div>
-</div>
-              
-              
-
-<div id="jb-print-docs-body" class="onlyprint">
-    <h1>VAE</h1>
-    <!-- Table of contents -->
-    <div id="print-main-content">
-        <div id="jb-print-toc">
-            
-            <div>
-                <h2> Contents </h2>
-            </div>
-            <nav aria-label="Page">
-                <ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">Introduction</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#intractability">Intractability</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#sgvb-stochastic-gradient-variational-bayes">SGVB(Stochastic Gradient Variational Bayes)</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#reparameterization-trick">Reparameterization Trick</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#training-algorithm">Training Algorithm</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#experiments">Experiments</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#summary">Summary</a></li>
-</ul>
-            </nav>
-        </div>
-    </div>
-</div>
-
-              
-                
-<div id="searchbox"></div>
-                <article class="bd-article">
-                  
-  <div class="admonition-information admonition">
-<p class="admonition-title">Information</p>
-<ul class="simple">
-<li><p><strong>Title:</strong> Auto-Encoding Variational Bayes (ICLR 2014)</p></li>
-<li><p><strong>Reference</strong></p>
-<ul>
-<li><p>Paper: <a class="reference external" href="https://arxiv.org/abs/1312.6114">https://arxiv.org/abs/1312.6114</a></p></li>
-<li><p>Code: <a class="github reference external" href="https://github.com/GunhoChoi/PyTorch-FastCampus">GunhoChoi/PyTorch-FastCampus</a></p></li>
-<li><p><a class="reference external" href="https://www.youtube.com/watch?v=GbCAwVVKaHY&amp;t=95s">Smart Design Lab &#64;KAIST | 딥러닝 Ch.3.3 VAE</a></p></li>
-</ul>
-</li>
-<li><p><strong>Author:</strong> Sangwoo Jo</p></li>
-<li><p><strong>Editor:</strong> Changhwan Lee</p></li>
-<li><p><strong>Last updated on Apr. 26, 2024</strong></p></li>
-</ul>
-</div>
-<section class="tex2jax_ignore mathjax_ignore" id="vae">
-<h1>VAE<a class="headerlink" href="#vae" title="Permalink to this heading">#</a></h1>
-<section id="introduction">
-<h2>Introduction<a class="headerlink" href="#introduction" title="Permalink to this heading">#</a></h2>
-<p>논문의 Introduction 에 다음과 같은 문구가 적혀있는데요.</p>
-<blockquote>
-<div><p>“Variational Bayesian (VB) approach involves the optimization of an approximation to the intractable posterior”</p>
-</div></blockquote>
-<p>이처럼 Variational Autoencoder 는 논문에서 제시하는 Auto-Encoding Variational Bayes(AEVB) 알고리즘 중 하나로, intractable 한 posterior 분포를 다루기 쉬운 뉴럴 네트워크로 근사함으로써 Variational Inference 를 하게 됩니다.</p>
-<p>이가 의미하는 바가 무엇인지 한번 살펴보도록 하겠습니다.</p>
-</section>
-<section id="intractability">
-<h2>Intractability<a class="headerlink" href="#intractability" title="Permalink to this heading">#</a></h2>
-<p>Variational Autoencoder(VAE) 는 크게 Encoder 와 Decoder 부분으로 이루어져 있습니다. 더 자세하게는, Encoder는 입력 데이터 <span class="math notranslate nohighlight">\(x\)</span> 를 받아서 잠재변수(Latent Variable) <span class="math notranslate nohighlight">\(z\)</span> 를 만들어내고, Decoder 는 잠재변수 <span class="math notranslate nohighlight">\(z\)</span> 를 활용해서 다시 <span class="math notranslate nohighlight">\(x\)</span> 를 복원하게 됩니다.</p>
-<figure class="align-default" id="id1">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/vae_01.png"><img alt="vae_01" class="bg-primary mb-1" src="../../_images/vae_01.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 1 </span><span class="caption-text">Variational Autoencoder(VAE) Architecture</span><a class="headerlink" href="#id1" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>Variational Autoencoder (VAE) 는 AutoEncoder 와 달리 확률 분포를 이용해 어떤 새로운 데이터를 생성하는 Decoder 부분에 초점을 둡니다. 이때 논문에서 다음과 같은 assumption 들을 내립니다. 첫번째로 <span class="math notranslate nohighlight">\(p_{\theta}(z)\)</span> 와 <span class="math notranslate nohighlight">\(p_{\theta}(x|z)\)</span> 는 parametric 한 distribution 을 가지고 있고, 이는 <span class="math notranslate nohighlight">\(\theta\)</span> 와 <span class="math notranslate nohighlight">\(z\)</span> 에 대해 differentiable 하다는 가정을 내립니다. 이 때, 대표적으로 <span class="math notranslate nohighlight">\(p_{\theta}(z)\)</span> 는 Gaussian distribution 을 따르고 <span class="math notranslate nohighlight">\(p_{\theta}(x|z)\)</span> 는 생성하고자 하는 데이터 성질에 따라 Bernoulli 혹은 Gaussian distribution 을 따르도록 정의합니다. 그리고 <span class="math notranslate nohighlight">\(p_{\theta}(x|z)\)</span> 의 파라미터 <span class="math notranslate nohighlight">\(p\)</span> 혹은 <span class="math notranslate nohighlight">\((\mu, \sigma)\)</span> 는 아래 그림과 같이 뉴럴 네트워크로 구성된 Decoder 로부터 계산이 됩니다.</p>
-<figure class="align-default" id="id2">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/vae_07.png"><img alt="vae_07" class="bg-primary mb-1" src="../../_images/vae_07.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 2 </span><span class="caption-text">Overview of Bernoulli(left) and Gaussian(right) Decoder</span><a class="headerlink" href="#id2" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>이를 기반으로 우리는 ML/MAP estimation 을 통해 marginal likelihood <span class="math notranslate nohighlight">\(p_{\theta}(x)\)</span> 를 최대화시키는 파라미터 <span class="math notranslate nohighlight">\(\theta\)</span> 를 구하는 것이 목적입니다. 하지만, <span class="math notranslate nohighlight">\(p_{\theta}(x) = \int p_{\theta}(z)p_{\theta}(x|z) \ dz\)</span>  는 intractable 하기 때문에 <span class="math notranslate nohighlight">\(p_{\theta}(z|x)\)</span> 를 계산하기 위한 Encoder 가 등장하게 됩니다.</p>
-<div class="math notranslate nohighlight">
-\[
-p_{\theta}(x) = p_{\theta}(x|z)p_{\theta}(z)/p_{\theta}(z|x)
-\]</div>
-<p>여기서 <span class="math notranslate nohighlight">\(p_{\theta}(z|x)\)</span> 역시 intractable 하기 때문에 이를 잘 근사화하는 뉴럴 네트워크 <span class="math notranslate nohighlight">\(q_{\phi}(z|x)\)</span> 를 정의하게 되고, 이러한 과정을 변분추론(Variational Inference) 라고 합니다. 아래는 Encoder 와 Decoder 를 함께 도식화한 그림입니다. 정리하자면, MLP Encoder 를 통해 계산된 <span class="math notranslate nohighlight">\(\mu\)</span> 와 <span class="math notranslate nohighlight">\(\sigma\)</span> 로 잠재변수 <span class="math notranslate nohighlight">\(z\)</span> 를 생성하게 되고, 이를 기반으로 Decoder 는 원본 이미지와 유사한 데이터를 생성하게 됩니다.</p>
-<figure class="align-default" id="id3">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/vae_08.png"><img alt="vae_08" class="bg-primary mb-1" src="../../_images/vae_08.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 3 </span><span class="caption-text">Overview of Gaussian Encoder and Decoder</span><a class="headerlink" href="#id3" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>해당 implementation code 도 확인해보겠습니다.</p>
-<ul>
-<li><p><strong>Encoder 구현 code</strong></p>
-<div class="highlight-python notranslate"><div class="highlight"><pre><span></span>
-<span class="k">class</span> <span class="nc">Encoder</span><span class="p">(</span><span class="n">nn</span><span class="o">.</span><span class="n">Module</span><span class="p">):</span>
-    <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
-        <span class="nb">super</span><span class="p">(</span><span class="n">Encoder</span><span class="p">,</span><span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span>
-        <span class="bp">self</span><span class="o">.</span><span class="n">fc1_1</span> <span class="o">=</span> <span class="n">nn</span><span class="o">.</span><span class="n">Linear</span><span class="p">(</span><span class="mi">784</span><span class="p">,</span> <span class="n">hidden_size</span><span class="p">)</span>
-        <span class="bp">self</span><span class="o">.</span><span class="n">fc1_2</span> <span class="o">=</span> <span class="n">nn</span><span class="o">.</span><span class="n">Linear</span><span class="p">(</span><span class="mi">784</span><span class="p">,</span> <span class="n">hidden_size</span><span class="p">)</span>
-        <span class="bp">self</span><span class="o">.</span><span class="n">relu</span> <span class="o">=</span> <span class="n">nn</span><span class="o">.</span><span class="n">ReLU</span><span class="p">()</span>
-                        
-    <span class="k">def</span> <span class="nf">encode</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span><span class="n">x</span><span class="p">):</span>
-        <span class="n">x</span> <span class="o">=</span> <span class="n">x</span><span class="o">.</span><span class="n">view</span><span class="p">(</span><span class="n">batch_size</span><span class="p">,</span><span class="o">-</span><span class="mi">1</span><span class="p">)</span>
-        <span class="n">mu</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">relu</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">fc1_1</span><span class="p">(</span><span class="n">x</span><span class="p">))</span>
-        <span class="n">log_var</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">relu</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">fc1_2</span><span class="p">(</span><span class="n">x</span><span class="p">))</span>
-                
-        <span class="k">return</span> <span class="n">mu</span><span class="p">,</span><span class="n">log_var</span>
-    
-    <span class="k">def</span> <span class="nf">reparametrize</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">mu</span><span class="p">,</span> <span class="n">logvar</span><span class="p">):</span>
-        <span class="n">std</span> <span class="o">=</span> <span class="n">logvar</span><span class="o">.</span><span class="n">mul</span><span class="p">(</span><span class="mf">0.5</span><span class="p">)</span><span class="o">.</span><span class="n">exp_</span><span class="p">()</span>
-        
-        <span class="n">eps</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">FloatTensor</span><span class="p">(</span><span class="n">std</span><span class="o">.</span><span class="n">size</span><span class="p">())</span><span class="o">.</span><span class="n">normal_</span><span class="p">()</span>
-        <span class="n">eps</span> <span class="o">=</span> <span class="n">Variable</span><span class="p">(</span><span class="n">eps</span><span class="p">)</span><span class="o">.</span><span class="n">cuda</span><span class="p">()</span>
-        
-        <span class="k">return</span> <span class="n">eps</span><span class="o">.</span><span class="n">mul</span><span class="p">(</span><span class="n">std</span><span class="p">)</span><span class="o">.</span><span class="n">add_</span><span class="p">(</span><span class="n">mu</span><span class="p">)</span>
-    
-    <span class="k">def</span> <span class="nf">forward</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span><span class="n">x</span><span class="p">):</span>
-        <span class="n">mu</span><span class="p">,</span> <span class="n">logvar</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">encode</span><span class="p">(</span><span class="n">x</span><span class="p">)</span>
-        <span class="n">reparam</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">reparametrize</span><span class="p">(</span><span class="n">mu</span><span class="p">,</span><span class="n">logvar</span><span class="p">)</span>
-        
-        <span class="k">return</span> <span class="n">mu</span><span class="p">,</span><span class="n">logvar</span><span class="p">,</span><span class="n">reparam</span>
-</pre></div>
-</div>
-</li>
-<li><p><strong>Decoder 구현 code</strong></p>
-<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="k">class</span> <span class="nc">Decoder</span><span class="p">(</span><span class="n">nn</span><span class="o">.</span><span class="n">Module</span><span class="p">):</span>
-    <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
-        <span class="nb">super</span><span class="p">(</span><span class="n">Decoder</span><span class="p">,</span><span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span>
-        <span class="bp">self</span><span class="o">.</span><span class="n">fc1</span> <span class="o">=</span> <span class="n">nn</span><span class="o">.</span><span class="n">Linear</span><span class="p">(</span><span class="n">hidden_size</span><span class="p">,</span> <span class="mi">784</span><span class="p">)</span>
-        <span class="bp">self</span><span class="o">.</span><span class="n">sigmoid</span> <span class="o">=</span> <span class="n">nn</span><span class="o">.</span><span class="n">Sigmoid</span><span class="p">()</span>
-    
-    <span class="k">def</span> <span class="nf">forward</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span><span class="n">x</span><span class="p">):</span>
-        <span class="n">out</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">fc1</span><span class="p">(</span><span class="n">x</span><span class="p">)</span>
-        <span class="n">out</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">sigmoid</span><span class="p">(</span><span class="n">out</span><span class="p">)</span>
-        <span class="n">out</span> <span class="o">=</span> <span class="n">out</span><span class="o">.</span><span class="n">view</span><span class="p">(</span><span class="n">batch_size</span><span class="p">,</span><span class="mi">28</span><span class="p">,</span><span class="mi">28</span><span class="p">,</span><span class="mi">1</span><span class="p">)</span>
-        
-        <span class="k">return</span> <span class="n">out</span>
-</pre></div>
-</div>
-</li>
-</ul>
-</section>
-<section id="sgvb-stochastic-gradient-variational-bayes">
-<h2>SGVB(Stochastic Gradient Variational Bayes)<a class="headerlink" href="#sgvb-stochastic-gradient-variational-bayes" title="Permalink to this heading">#</a></h2>
-<p>이로써 우리는 marginal likelihood <span class="math notranslate nohighlight">\(p_{\theta}(x)\)</span> 를 최대화시키는 파라미터 <span class="math notranslate nohighlight">\((\theta, \phi)\)</span> 를 찾으면 되고, 이에 대한 lower bound 를 수식적으로 표현하면 손실함수(loss function) 를 다음과 같이 Reconstruction Error 와 Regularization term 로 분할할 수 있습니다.</p>
-<div class="math notranslate nohighlight">
-\[
-L(\theta, \phi;x_i) = \arg \min_{\theta, \phi} \sum_{i} -\mathbb{E}\_{q_{\phi}(z|x_i)}[\log(p(x_i|g_{\theta}(z))] + KL(q_{\phi}(z|x_i)||p(z))
-\]</div>
-<p>Reconstruction Error 는 Decoder 에서 생성하는 데이터가 최대한 원본 데이터와 유사하도록 하는 term 이고, Regularization 은 Encoder 에서 만드는 잠재변수의 분포가 저희가 부여한 prior distribution 이랑 가깝도록 설정하는 term 입니다. 이때, Reconstruction Error 는 다음과 같은 Monte Carlo Estimator <span class="math notranslate nohighlight">\(\tilde{\mathcal{L}}^{B}\)</span> 로 근사값을 구할 수 있고, 하나의 sample 을 계산하는 것도 연산량이 많으므로 논문에서는 sample size <span class="math notranslate nohighlight">\(L\)</span> 을 1 로 설정합니다.</p>
-<div class="math notranslate nohighlight">
-\[ \tilde{\mathcal{L}}^{B}(\theta, \phi; x^{(i)})=-D_{KL}(q_{\phi}(z|x^{(i)})||p_{\theta}(z))+\frac{1}{L}\sum^{L}_{l=1} \log p\_\theta  (x^{(i)}|z^{(i,l)})) \]</div>
-<p>Lower bound 에 대한 수식을 변환하여 수식 전체에 대한 Monte Carlo Estimator <span class="math notranslate nohighlight">\(\tilde{\mathcal{L}}^{A}\)</span> 로도 근사값을 구할 수 있는데 이는 평균적으로 <span class="math notranslate nohighlight">\(\tilde{\mathcal{L}}^{B}\)</span> 에 비해 variance 가 높다고 합니다.</p>
-<div class="math notranslate nohighlight">
-\[ \tilde{\mathcal{L}}^{A}(\theta,\phi;x^{(i)})=\frac{1}{L}\sum^{L}_{l=1}\log p_{\theta}(x^{(i)},z^{(i,l)})-\log q_{\phi}(z^{(i,l)}|x^{(i)}) \]</div>
-</section>
-<section id="reparameterization-trick">
-<h2>Reparameterization Trick<a class="headerlink" href="#reparameterization-trick" title="Permalink to this heading">#</a></h2>
-<p>논문에서는 모델 학습 시 backpropagation 을 원활히 사용할 수 있도록 reparameterization trick 을 소개합니다. 잠재변수 <span class="math notranslate nohighlight">\(z\)</span> 를 Encoder 에서 나온 <span class="math notranslate nohighlight">\(\mu\)</span> 와 <span class="math notranslate nohighlight">\(\sigma\)</span> 로 직접 샘플링하지 않고, backpropagation 이 가능하도록 Gaussian noise 를 우선적으로 샘플링하고 해당 <span class="math notranslate nohighlight">\(\mu\)</span> 와 <span class="math notranslate nohighlight">\(\sigma\)</span> 를 각각 더하고 곱하게 됩니다. 이는 <span class="math notranslate nohighlight">\(q_{\phi}(z|x)\)</span> 이 Gaussian distribution 을 따른다고 설정했을 때이고, <span class="math notranslate nohighlight">\(q_{\phi}(z|x)\)</span> 에 대해 다른 분포를 가정하여 그에 따른 다른 reparameterization trick 을 시도할 수 있다고 논문에 명시되어 있습니다.</p>
-<p>하단에는 Gaussian 분포에 대한 reparameterization trick 를 도식화한 그림입니다. 왼쪽에는 reparameterization trick이 적용되지 않은 경우로 <span class="math notranslate nohighlight">\(\mu\)</span> 나 <span class="math notranslate nohighlight">\(\sigma\)</span> 값이 고정되어 있어도 <span class="math notranslate nohighlight">\(\mathcal{N}(\mu,\sigma^2)\)</span> 에서 샘플링하므로 <span class="math notranslate nohighlight">\(z\)</span> 는 랜덤한 값이 되고 loss function 을 <span class="math notranslate nohighlight">\(\mu\)</span> 나 <span class="math notranslate nohighlight">\(\sigma\)</span> 에 대해 미분할 수 없어 backpropagation 을 적용하기가 어렵습니다. 반면에, 오른쪽처럼 reparameterization trick 을 적용하면, <span class="math notranslate nohighlight">\(z\)</span> 가 deterministic 한 형태로 정의되고 <span class="math notranslate nohighlight">\(\mu\)</span> 나 <span class="math notranslate nohighlight">\(\sigma\)</span> 에 대한 변수로도 미분이 가능해지기 때문에 backpropagation 을 활용하여 모델을 학습시킬 수 있습니다.</p>
-<figure class="align-default" id="id4">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/vae_05.png"><img alt="vae_05" class="bg-primary mb-1" src="../../_images/vae_05.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 4 </span><span class="caption-text">Overview of Reparameterization Trick</span><a class="headerlink" href="#id4" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</section>
-<section id="training-algorithm">
-<h2>Training Algorithm<a class="headerlink" href="#training-algorithm" title="Permalink to this heading">#</a></h2>
-<p>모델을 학습하는 전체적인 과정은 아래와 같습니다.</p>
-<figure class="align-default" id="id5">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/vae_09.png"><img alt="vae_09" class="bg-primary mb-1" src="../../_images/vae_09.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 5 </span><span class="caption-text">Overview of Training Algorithm</span><a class="headerlink" href="#id5" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-</section>
-<section id="experiments">
-<h2>Experiments<a class="headerlink" href="#experiments" title="Permalink to this heading">#</a></h2>
-<p>논문에서는 MNIST 와 Frey Face 데이터셋에 대해 AEVB(Auto-Encoder variational Bayesian)와 wake-sleep 알고리즘를 적용해서 비교합니다. 여기서 Frey Face 데이터셋은 continuous 하므로 Gaussian Decoder 를 사용합니다. 실험결과는 아래 그림과 같습니다.</p>
-<figure class="align-default" id="id6">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/vae_10.png"><img alt="vae_10" class="bg-primary mb-1" src="../../_images/vae_10.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 6 </span><span class="caption-text">Experimental Results - Likelihood lower bound</span><a class="headerlink" href="#id6" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>해당 그림처럼 lower bound 를 최적화하는데 AEVB 알고리즘이 더 빠르게 수렴하며 모든 실험에서 성능적으로도 더 나은 부분을 확인할 수 있습니다.</p>
-<figure class="align-default" id="id7">
-<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/vae_11.png"><img alt="vae_11" class="bg-primary mb-1" src="../../_images/vae_11.png" style="width: 700px;" /></a>
-<figcaption>
-<p><span class="caption-number">Fig. 7 </span><span class="caption-text">Experimental Results - Marginal likelihood</span><a class="headerlink" href="#id7" title="Permalink to this image">#</a></p>
-</figcaption>
-</figure>
-<p>또한, latent variable <span class="math notranslate nohighlight">\(z\)</span> 의 차원이 작으면 marginal likelihood 를 직접 Monte Carlo EM 을 할용하여 구할 수 있는데, 이에 대한 결과도 논문에서 보여줍니다. Monte Carlo EM 의 경우 학습 데이터가 많으면 수렴이 되지 않는 부분을 확인할 수 있습니다.</p>
-</section>
-<section id="summary">
-<h2>Summary<a class="headerlink" href="#summary" title="Permalink to this heading">#</a></h2>
-<p>AutoEncoder 는 latent space 에 하나의 값으로 지정해줬다면, VAE 는 평균 그리고 분산 파라미터들과 Gaussian 분포를 가진 샘플을 통해 잠재변수를 생성합니다. 그리고 VAE 를 실제로 사용해보면 생성된 데이터 image quality 가 낮다는 단점을 가지고 있다고 합니다.</p>
-</section>
-</section>
-
-    <script type="text/x-thebe-config">
-    {
-        requestKernel: true,
-        binderOptions: {
-            repo: "binder-examples/jupyter-stacks-datascience",
-            ref: "master",
-        },
-        codeMirrorConfig: {
-            theme: "abcdef",
-            mode: "python"
-        },
-        kernelOptions: {
-            name: "python3",
-            path: "./docs/review"
-        },
-        predefinedOutput: true
-    }
-    </script>
-    <script>kernelName = 'python3'</script>
-
-                </article>
-              
-
-              
-              
-              
-              
-                <footer class="prev-next-footer d-print-none">
-                  
-<div class="prev-next-area">
-    <a class="left-prev"
-       href="../../intro.html"
-       title="previous page">
-      <i class="fa-solid fa-angle-left"></i>
-      <div class="prev-next-info">
-        <p class="prev-next-subtitle">previous</p>
-        <p class="prev-next-title">Welcome to PseudoDiffusers!!</p>
-      </div>
-    </a>
-    <a class="right-next"
-       href="gan.html"
-       title="next page">
-      <div class="prev-next-info">
-        <p class="prev-next-subtitle">next</p>
-        <p class="prev-next-title">GAN</p>
-      </div>
-      <i class="fa-solid fa-angle-right"></i>
-    </a>
-</div>
-                </footer>
-              
-            </div>
-            
-            
-              
-                <div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
-
-
-  <div class="sidebar-secondary-item">
-  <div class="page-toc tocsection onthispage">
-    <i class="fa-solid fa-list"></i> Contents
-  </div>
-  <nav class="bd-toc-nav page-toc">
-    <ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">Introduction</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#intractability">Intractability</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#sgvb-stochastic-gradient-variational-bayes">SGVB(Stochastic Gradient Variational Bayes)</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#reparameterization-trick">Reparameterization Trick</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#training-algorithm">Training Algorithm</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#experiments">Experiments</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#summary">Summary</a></li>
-</ul>
-  </nav></div>
-
-</div></div>
-              
-            
-          </div>
-          <footer class="bd-footer-content">
-            
-<div class="bd-footer-content__inner container">
-  
-  <div class="footer-item">
-    
-<p class="component-author">
-By PseudoLab
-</p>
-
-  </div>
-  
-  <div class="footer-item">
-    
-
-  <p class="copyright">
-    
-      © Copyright 2022.
-      <br/>
-    
-  </p>
-
-  </div>
-  
-  <div class="footer-item">
-    
-  </div>
-  
-  <div class="footer-item">
-    
-  </div>
-  
-</div>
-          </footer>
-        
-
-      </main>
-    </div>
-  </div>
-  
-  <!-- Scripts loaded after <body> so the DOM is not blocked -->
-  <script src="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b"></script>
-<script src="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b"></script>
-
-  <footer class="bd-footer">
-  </footer>
-  </body>
+
+
+<!DOCTYPE html>
+
+
+<html lang="en" data-content_root="" >
+
+  <head>
+    <meta charset="utf-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.17.1: http://docutils.sourceforge.net/" />
+
+    <title>VAE &#8212; Text-to-Image Generation-feat-Diffusion</title>
+  
+  
+  
+  <script data-cfasync="false">
+    document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
+    document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
+  </script>
+  
+  <!-- Loaded before other Sphinx assets -->
+  <link href="../../_static/styles/theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+<link href="../../_static/styles/bootstrap.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+<link href="../../_static/styles/pydata-sphinx-theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+
+  
+  <link href="../../_static/vendor/fontawesome/6.5.2/css/all.min.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+  <link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.woff2" />
+<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.woff2" />
+<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.woff2" />
+
+    <link rel="stylesheet" type="text/css" href="../../_static/pygments.css" />
+    <link rel="stylesheet" href="../../_static/styles/sphinx-book-theme.css?digest=14f4ca6b54d191a8c7657f6c759bf11a5fb86285" type="text/css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/togglebutton.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/mystnb.4510f1fc1dee50b3e5859aac5469c37c29e427902b24a333a5f9fcb2f0b3ac41.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/sphinx-thebe.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/design-style.4045f2051d55cab465a707391d5b2007.min.css" />
+  
+  <!-- Pre-loaded scripts that we'll load fully later -->
+  <link rel="preload" as="script" href="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b" />
+<link rel="preload" as="script" href="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b" />
+  <script src="../../_static/vendor/fontawesome/6.5.2/js/all.min.js?digest=dfe6caa3a7d634c4db9b"></script>
+
+    <script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
+    <script src="../../_static/jquery.js"></script>
+    <script src="../../_static/underscore.js"></script>
+    <script src="../../_static/_sphinx_javascript_frameworks_compat.js"></script>
+    <script src="../../_static/doctools.js"></script>
+    <script src="../../_static/clipboard.min.js"></script>
+    <script src="../../_static/copybutton.js"></script>
+    <script src="../../_static/scripts/sphinx-book-theme.js?digest=5a5c038af52cf7bc1a1ec88eea08e6366ee68824"></script>
+    <script>let toggleHintShow = 'Click to show';</script>
+    <script>let toggleHintHide = 'Click to hide';</script>
+    <script>let toggleOpenOnPrint = 'true';</script>
+    <script src="../../_static/togglebutton.js"></script>
+    <script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
+    <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
+    <script src="../../_static/design-tabs.js"></script>
+    <script>const THEBE_JS_URL = "https://unpkg.com/thebe@0.8.2/lib/index.js"
+const thebe_selector = ".thebe,.cell"
+const thebe_selector_input = "pre"
+const thebe_selector_output = ".output, .cell_output"
+</script>
+    <script async="async" src="../../_static/sphinx-thebe.js"></script>
+    <script>window.MathJax = {"options": {"processHtmlClass": "tex2jax_process|mathjax_process|math|output_area"}}</script>
+    <script defer="defer" src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
+    <script>DOCUMENTATION_OPTIONS.pagename = 'docs/review/vae';</script>
+    <link rel="shortcut icon" href="../../_static/PseudoLab_logo.png"/>
+    <link rel="index" title="Index" href="../../genindex.html" />
+    <link rel="search" title="Search" href="../../search.html" />
+    <link rel="next" title="GAN" href="gan.html" />
+    <link rel="prev" title="Welcome to PseudoDiffusers!!" href="../../intro.html" />
+  <meta name="viewport" content="width=device-width, initial-scale=1"/>
+  <meta name="docsearch:language" content="en"/>
+  </head>
+  
+  
+  <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
+
+  
+  
+  <div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
+  
+  <div id="pst-scroll-pixel-helper"></div>
+  
+  <button type="button" class="btn rounded-pill" id="pst-back-to-top">
+    <i class="fa-solid fa-arrow-up"></i>Back to top</button>
+
+  
+  <input type="checkbox"
+          class="sidebar-toggle"
+          id="pst-primary-sidebar-checkbox"/>
+  <label class="overlay overlay-primary" for="pst-primary-sidebar-checkbox"></label>
+  
+  <input type="checkbox"
+          class="sidebar-toggle"
+          id="pst-secondary-sidebar-checkbox"/>
+  <label class="overlay overlay-secondary" for="pst-secondary-sidebar-checkbox"></label>
+  
+  <div class="search-button__wrapper">
+    <div class="search-button__overlay"></div>
+    <div class="search-button__search-container">
+<form class="bd-search d-flex align-items-center"
+      action="../../search.html"
+      method="get">
+  <i class="fa-solid fa-magnifying-glass"></i>
+  <input type="search"
+         class="form-control"
+         name="q"
+         id="search-input"
+         placeholder="Search this book..."
+         aria-label="Search this book..."
+         autocomplete="off"
+         autocorrect="off"
+         autocapitalize="off"
+         spellcheck="false"/>
+  <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
+</form></div>
+  </div>
+
+  <div class="pst-async-banner-revealer d-none">
+  <aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
+</div>
+
+  
+    <header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
+    </header>
+  
+
+  <div class="bd-container">
+    <div class="bd-container__inner bd-page-width">
+      
+      
+      
+      <div class="bd-sidebar-primary bd-sidebar">
+        
+
+  
+  <div class="sidebar-header-items sidebar-primary__section">
+    
+    
+    
+    
+  </div>
+  
+    <div class="sidebar-primary-items__start sidebar-primary__section">
+        <div class="sidebar-primary-item">
+
+  
+    
+  
+
+<a class="navbar-brand logo" href="../../intro.html">
+  
+  
+  
+  
+  
+    
+    
+      
+    
+    
+    <img src="../../_static/PseudoLab_logo.png" class="logo__image only-light" alt="Text-to-Image Generation-feat-Diffusion - Home"/>
+    <script>document.write(`<img src="../../_static/PseudoLab_logo.png" class="logo__image only-dark" alt="Text-to-Image Generation-feat-Diffusion - Home"/>`);</script>
+  
+  
+</a></div>
+        <div class="sidebar-primary-item"><nav class="bd-links" id="bd-docs-nav" aria-label="Main">
+    <div class="bd-toc-item navbar-nav active">
+        
+        <ul class="nav bd-sidenav bd-sidenav__home-link">
+            <li class="toctree-l1">
+                <a class="reference internal" href="../../intro.html">
+                    Welcome to PseudoDiffusers!!
+                </a>
+            </li>
+        </ul>
+        <p aria-level="2" class="caption" role="heading"><span class="caption-text">Preliminary Works</span></p>
+<ul class="current nav bd-sidenav">
+<li class="toctree-l1 current active"><a class="current reference internal" href="#">VAE</a></li>
+<li class="toctree-l1"><a class="reference internal" href="gan.html">GAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DDPM.html">DDPM</a></li>
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DDIM.html">DDIM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="A_Study_on_the_Evaluation_of_Generative_Models.html">A Study on the Evaluation of Generative Models</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Image Generation</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="cycleGAN.html">CycleGAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="StyleGAN.html">StyleGAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="diffusion_beats_GANs.html">Diffusion Models Beat GANs on Image Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="dalle.html">DALL-E</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DALLE2.html">DALL-E 2</a></li>
+<li class="toctree-l1"><a class="reference internal" href="dreambooth.html">DreamBooth</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ControlNet.html">ControlNet</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Latent_Diffusion_Model.html">Introduction</a></li>
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Textual_Inversion.html">Textual Inversion</a></li>
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="CustomDiffusion.html">Custom Diffusion</a></li>
+<li class="toctree-l1"><a class="reference internal" href="LoRA.html">LoRA</a></li>
+
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="I-DDPM.html">I-DDPM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="StyO.html">StyO</a></li>
+<li class="toctree-l1"><a class="reference internal" href="imagen.html">Imagen</a></li>
+<li class="toctree-l1"><a class="reference internal" href="imagen_editor.html">Imagen Editor</a></li>
+<li class="toctree-l1"><a class="reference internal" href="SDEdit.html">SDEdit</a></li>
+<li class="toctree-l1"><a class="reference internal" href="SDXL.html">SDXL</a></li>
+<li class="toctree-l1"><a class="reference internal" href="t2i_adapter.html">T2I-Adapter</a></li>
+<li class="toctree-l1"><a class="reference internal" href="IP_Adapter.html">IP-Adapter</a></li>
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="HyperDreamBooth.html">HyperDreamBooth</a></li>
+<li class="toctree-l1"><a class="reference internal" href="CM3leon.html">CM3leon</a></li>
+
+<li class="toctree-l1"><a class="reference internal" href="Synthetic_Data_from_Diffusion_Models_Improves_ImageNet_Classification.html">Synthetic Data from Diffusion Models Improves ImageNet Classification</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="GLIDE.html">GLIDE</a></li>
+<li class="toctree-l1"><a class="reference internal" href="BBDM.html">BBDM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Your_Diffusion_Model_is_Secretly_a_Zero_Shot_Classifier.html">Your Diffusion Model is Secretly a Zero-Shot Classifier</a></li>
+<li class="toctree-l1"><a class="reference internal" href="progressive_distillation.html">Progressive Distillation for Fast Sampling of Diffusion Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ConceptLab.html">ConceptLab</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Diffusion_models_already_have_a_Semantic_Latent_Space.html">Diffusion Models already have a Semantic Latent Space</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Muse.html">Muse</a></li>
+
+
+<li class="toctree-l1"><a class="reference internal" href="GIGAGAN.html">Scaling up GANs for Text-to-Image Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="consistency_models.html">Consistency Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="latent_consistency_models.html">Latent Consistency Models</a></li>
+
+<li class="toctree-l1"><a class="reference internal" href="LLM_grounded_Diffusion.html">LLM Grounded Diffusion</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DiT.html">DiT</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="one-step-image-translation.html">One-Step Image Translation with Text-to-Image Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="LCM-LoRA.html">LCM-LoRA: A Universal Stable-Diffusion Acceleration Module</a></li>
+
+
+<li class="toctree-l1"><a class="reference internal" href="MimicBrush.html">MimicBrush: Zero-shot Image Editing with Reference Imitation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="one_step_diffusion_with_distribution_matching_distillation.html">One-step Diffusion with Distribution Matching Distillation</a></li>
+
+
+
+
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Video Generation</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="Make_A_Video.html">Make A Video</a></li>
+<li class="toctree-l1"><a class="reference internal" href="VideoLDM.html">VideoLDM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="AnimateDiff.html">AnimateDiff</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Animate_Anyone.html">Animate Anyone</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreaMoving.html">DreaMoving</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreamPose.html">DreamPose: Fashion Image-to-Video Synthesis via Stable Diffusion</a></li>
+
+
+
+
+
+
+
+
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">3D Generation</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="NeRF.html">NeRF : Representing Scenes as Neural Radiance Fields for View Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="3DGS.html">3D Gaussian Splatting for Real-Time Radiance Field Rendering</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Point_E.html">Point-E: A System for Generating 3D Point Clouds from Complex Prompts (Arxiv 2022)</a></li>
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Shap-E.html">Shap-E</a></li>
+
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DreamFusion.html"><strong>DreamFusion</strong></a></li>
+<li class="toctree-l1"><a class="reference internal" href="magic-3d.html">Magic3D</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreamBooth3D.html">Dream Booth 3D</a></li>
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="zero123plus.html">Zero123++</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ProlificDreamer.html">ProlificDreamer: High-Fidelity and Diverse Text-to-3D Generation with Variational Score Distillation</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DreamGaussian.html">DreamGaussian</a></li>
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Coin3D.html">Coin3D</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Experiments</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="../experiments/js_exp.html">Synthetic Data with Stable Diffusion for Foliar Disease Classification</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../experiments/swjo_exp.html">Training DreamBooth on Naver Webtoon Face Dataset</a></li>
+</ul>
+
+    </div>
+</nav></div>
+    </div>
+  
+  
+  <div class="sidebar-primary-items__end sidebar-primary__section">
+  </div>
+  
+  <div id="rtd-footer-container"></div>
+
+
+      </div>
+      
+      <main id="main-content" class="bd-main" role="main">
+        
+        
+
+<div class="sbt-scroll-pixel-helper"></div>
+
+          <div class="bd-content">
+            <div class="bd-article-container">
+              
+              <div class="bd-header-article d-print-none">
+<div class="header-article-items header-article__inner">
+  
+    <div class="header-article-items__start">
+      
+        <div class="header-article-item"><label class="sidebar-toggle primary-toggle btn btn-sm" for="__primary" title="Toggle primary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
+  <span class="fa-solid fa-bars"></span>
+</label></div>
+      
+    </div>
+  
+  
+    <div class="header-article-items__end">
+      
+        <div class="header-article-item">
+
+<div class="article-header-buttons">
+
+
+
+
+
+<div class="dropdown dropdown-source-buttons">
+  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Source repositories">
+    <i class="fab fa-github"></i>
+  </button>
+  <ul class="dropdown-menu">
+      
+      
+      
+      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion" target="_blank"
+   class="btn btn-sm btn-source-repository-button dropdown-item"
+   title="Source repository"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fab fa-github"></i>
+  </span>
+<span class="btn__text-container">Repository</span>
+</a>
+</li>
+      
+      
+      
+      
+      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion/issues/new?title=Issue%20on%20page%20%2Fdocs/review/vae.html&body=Your%20issue%20content%20here." target="_blank"
+   class="btn btn-sm btn-source-issues-button dropdown-item"
+   title="Open an issue"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-lightbulb"></i>
+  </span>
+<span class="btn__text-container">Open issue</span>
+</a>
+</li>
+      
+  </ul>
+</div>
+
+
+
+
+
+
+<div class="dropdown dropdown-download-buttons">
+  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Download this page">
+    <i class="fas fa-download"></i>
+  </button>
+  <ul class="dropdown-menu">
+      
+      
+      
+      <li><a href="../../_sources/docs/review/vae.md" target="_blank"
+   class="btn btn-sm btn-download-source-button dropdown-item"
+   title="Download source file"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-file"></i>
+  </span>
+<span class="btn__text-container">.md</span>
+</a>
+</li>
+      
+      
+      
+      
+      <li>
+<button onclick="window.print()"
+  class="btn btn-sm btn-download-pdf-button dropdown-item"
+  title="Print to PDF"
+  data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-file-pdf"></i>
+  </span>
+<span class="btn__text-container">.pdf</span>
+</button>
+</li>
+      
+  </ul>
+</div>
+
+
+
+
+<button onclick="toggleFullScreen()"
+  class="btn btn-sm btn-fullscreen-button"
+  title="Fullscreen mode"
+  data-bs-placement="bottom" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-expand"></i>
+  </span>
+
+</button>
+
+
+
+<script>
+document.write(`
+  <button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light"></i>
+    <i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark"></i>
+    <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"></i>
+  </button>
+`);
+</script>
+
+
+<script>
+document.write(`
+  <button class="btn btn-sm pst-navbar-icon search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <i class="fa-solid fa-magnifying-glass fa-lg"></i>
+  </button>
+`);
+</script>
+<label class="sidebar-toggle secondary-toggle btn btn-sm" for="__secondary"title="Toggle secondary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <span class="fa-solid fa-list"></span>
+</label>
+</div></div>
+      
+    </div>
+  
+</div>
+</div>
+              
+              
+
+<div id="jb-print-docs-body" class="onlyprint">
+    <h1>VAE</h1>
+    <!-- Table of contents -->
+    <div id="print-main-content">
+        <div id="jb-print-toc">
+            
+            <div>
+                <h2> Contents </h2>
+            </div>
+            <nav aria-label="Page">
+                <ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">Introduction</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#intractability">Intractability</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#sgvb-stochastic-gradient-variational-bayes">SGVB(Stochastic Gradient Variational Bayes)</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#reparameterization-trick">Reparameterization Trick</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#training-algorithm">Training Algorithm</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#experiments">Experiments</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#summary">Summary</a></li>
+</ul>
+            </nav>
+        </div>
+    </div>
+</div>
+
+              
+                
+<div id="searchbox"></div>
+                <article class="bd-article">
+                  
+  <div class="admonition-information admonition">
+<p class="admonition-title">Information</p>
+<ul class="simple">
+<li><p><strong>Title:</strong> Auto-Encoding Variational Bayes (ICLR 2014)</p></li>
+<li><p><strong>Reference</strong></p>
+<ul>
+<li><p>Paper: <a class="reference external" href="https://arxiv.org/abs/1312.6114">https://arxiv.org/abs/1312.6114</a></p></li>
+<li><p>Code: <a class="github reference external" href="https://github.com/GunhoChoi/PyTorch-FastCampus">GunhoChoi/PyTorch-FastCampus</a></p></li>
+<li><p><a class="reference external" href="https://www.youtube.com/watch?v=GbCAwVVKaHY&amp;t=95s">Smart Design Lab &#64;KAIST | 딥러닝 Ch.3.3 VAE</a></p></li>
+</ul>
+</li>
+<li><p><strong>Author:</strong> Sangwoo Jo</p></li>
+<li><p><strong>Editor:</strong> Changhwan Lee</p></li>
+<li><p><strong>Last updated on Apr. 26, 2024</strong></p></li>
+</ul>
+</div>
+<section class="tex2jax_ignore mathjax_ignore" id="vae">
+<h1>VAE<a class="headerlink" href="#vae" title="Permalink to this heading">#</a></h1>
+<section id="introduction">
+<h2>Introduction<a class="headerlink" href="#introduction" title="Permalink to this heading">#</a></h2>
+<p>논문의 Introduction 에 다음과 같은 문구가 적혀있는데요.</p>
+<blockquote>
+<div><p>“Variational Bayesian (VB) approach involves the optimization of an approximation to the intractable posterior”</p>
+</div></blockquote>
+<p>이처럼 Variational Autoencoder 는 논문에서 제시하는 Auto-Encoding Variational Bayes(AEVB) 알고리즘 중 하나로, intractable 한 posterior 분포를 다루기 쉬운 뉴럴 네트워크로 근사함으로써 Variational Inference 를 하게 됩니다.</p>
+<p>이가 의미하는 바가 무엇인지 한번 살펴보도록 하겠습니다.</p>
+</section>
+<section id="intractability">
+<h2>Intractability<a class="headerlink" href="#intractability" title="Permalink to this heading">#</a></h2>
+<p>Variational Autoencoder(VAE) 는 크게 Encoder 와 Decoder 부분으로 이루어져 있습니다. 더 자세하게는, Encoder는 입력 데이터 <span class="math notranslate nohighlight">\(x\)</span> 를 받아서 잠재변수(Latent Variable) <span class="math notranslate nohighlight">\(z\)</span> 를 만들어내고, Decoder 는 잠재변수 <span class="math notranslate nohighlight">\(z\)</span> 를 활용해서 다시 <span class="math notranslate nohighlight">\(x\)</span> 를 복원하게 됩니다.</p>
+<figure class="align-default" id="id1">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/vae_01.png"><img alt="vae_01" class="bg-primary mb-1" src="../../_images/vae_01.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 1 </span><span class="caption-text">Variational Autoencoder(VAE) Architecture</span><a class="headerlink" href="#id1" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>Variational Autoencoder (VAE) 는 AutoEncoder 와 달리 확률 분포를 이용해 어떤 새로운 데이터를 생성하는 Decoder 부분에 초점을 둡니다. 이때 논문에서 다음과 같은 assumption 들을 내립니다. 첫번째로 <span class="math notranslate nohighlight">\(p_{\theta}(z)\)</span> 와 <span class="math notranslate nohighlight">\(p_{\theta}(x|z)\)</span> 는 parametric 한 distribution 을 가지고 있고, 이는 <span class="math notranslate nohighlight">\(\theta\)</span> 와 <span class="math notranslate nohighlight">\(z\)</span> 에 대해 differentiable 하다는 가정을 내립니다. 이 때, 대표적으로 <span class="math notranslate nohighlight">\(p_{\theta}(z)\)</span> 는 Gaussian distribution 을 따르고 <span class="math notranslate nohighlight">\(p_{\theta}(x|z)\)</span> 는 생성하고자 하는 데이터 성질에 따라 Bernoulli 혹은 Gaussian distribution 을 따르도록 정의합니다. 그리고 <span class="math notranslate nohighlight">\(p_{\theta}(x|z)\)</span> 의 파라미터 <span class="math notranslate nohighlight">\(p\)</span> 혹은 <span class="math notranslate nohighlight">\((\mu, \sigma)\)</span> 는 아래 그림과 같이 뉴럴 네트워크로 구성된 Decoder 로부터 계산이 됩니다.</p>
+<figure class="align-default" id="id2">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/vae_07.png"><img alt="vae_07" class="bg-primary mb-1" src="../../_images/vae_07.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 2 </span><span class="caption-text">Overview of Bernoulli(left) and Gaussian(right) Decoder</span><a class="headerlink" href="#id2" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>이를 기반으로 우리는 ML/MAP estimation 을 통해 marginal likelihood <span class="math notranslate nohighlight">\(p_{\theta}(x)\)</span> 를 최대화시키는 파라미터 <span class="math notranslate nohighlight">\(\theta\)</span> 를 구하는 것이 목적입니다. 하지만, <span class="math notranslate nohighlight">\(p_{\theta}(x) = \int p_{\theta}(z)p_{\theta}(x|z) \ dz\)</span>  는 intractable 하기 때문에 <span class="math notranslate nohighlight">\(p_{\theta}(z|x)\)</span> 를 계산하기 위한 Encoder 가 등장하게 됩니다.</p>
+<div class="math notranslate nohighlight">
+\[
+p_{\theta}(x) = p_{\theta}(x|z)p_{\theta}(z)/p_{\theta}(z|x)
+\]</div>
+<p>여기서 <span class="math notranslate nohighlight">\(p_{\theta}(z|x)\)</span> 역시 intractable 하기 때문에 이를 잘 근사화하는 뉴럴 네트워크 <span class="math notranslate nohighlight">\(q_{\phi}(z|x)\)</span> 를 정의하게 되고, 이러한 과정을 변분추론(Variational Inference) 라고 합니다. 아래는 Encoder 와 Decoder 를 함께 도식화한 그림입니다. 정리하자면, MLP Encoder 를 통해 계산된 <span class="math notranslate nohighlight">\(\mu\)</span> 와 <span class="math notranslate nohighlight">\(\sigma\)</span> 로 잠재변수 <span class="math notranslate nohighlight">\(z\)</span> 를 생성하게 되고, 이를 기반으로 Decoder 는 원본 이미지와 유사한 데이터를 생성하게 됩니다.</p>
+<figure class="align-default" id="id3">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/vae_08.png"><img alt="vae_08" class="bg-primary mb-1" src="../../_images/vae_08.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 3 </span><span class="caption-text">Overview of Gaussian Encoder and Decoder</span><a class="headerlink" href="#id3" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>해당 implementation code 도 확인해보겠습니다.</p>
+<ul>
+<li><p><strong>Encoder 구현 code</strong></p>
+<div class="highlight-python notranslate"><div class="highlight"><pre><span></span>
+<span class="k">class</span><span class="w"> </span><span class="nc">Encoder</span><span class="p">(</span><span class="n">nn</span><span class="o">.</span><span class="n">Module</span><span class="p">):</span>
+    <span class="k">def</span><span class="w"> </span><span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+        <span class="nb">super</span><span class="p">(</span><span class="n">Encoder</span><span class="p">,</span><span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span>
+        <span class="bp">self</span><span class="o">.</span><span class="n">fc1_1</span> <span class="o">=</span> <span class="n">nn</span><span class="o">.</span><span class="n">Linear</span><span class="p">(</span><span class="mi">784</span><span class="p">,</span> <span class="n">hidden_size</span><span class="p">)</span>
+        <span class="bp">self</span><span class="o">.</span><span class="n">fc1_2</span> <span class="o">=</span> <span class="n">nn</span><span class="o">.</span><span class="n">Linear</span><span class="p">(</span><span class="mi">784</span><span class="p">,</span> <span class="n">hidden_size</span><span class="p">)</span>
+        <span class="bp">self</span><span class="o">.</span><span class="n">relu</span> <span class="o">=</span> <span class="n">nn</span><span class="o">.</span><span class="n">ReLU</span><span class="p">()</span>
+                        
+    <span class="k">def</span><span class="w"> </span><span class="nf">encode</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span><span class="n">x</span><span class="p">):</span>
+        <span class="n">x</span> <span class="o">=</span> <span class="n">x</span><span class="o">.</span><span class="n">view</span><span class="p">(</span><span class="n">batch_size</span><span class="p">,</span><span class="o">-</span><span class="mi">1</span><span class="p">)</span>
+        <span class="n">mu</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">relu</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">fc1_1</span><span class="p">(</span><span class="n">x</span><span class="p">))</span>
+        <span class="n">log_var</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">relu</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">fc1_2</span><span class="p">(</span><span class="n">x</span><span class="p">))</span>
+                
+        <span class="k">return</span> <span class="n">mu</span><span class="p">,</span><span class="n">log_var</span>
+    
+    <span class="k">def</span><span class="w"> </span><span class="nf">reparametrize</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">mu</span><span class="p">,</span> <span class="n">logvar</span><span class="p">):</span>
+        <span class="n">std</span> <span class="o">=</span> <span class="n">logvar</span><span class="o">.</span><span class="n">mul</span><span class="p">(</span><span class="mf">0.5</span><span class="p">)</span><span class="o">.</span><span class="n">exp_</span><span class="p">()</span>
+        
+        <span class="n">eps</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">FloatTensor</span><span class="p">(</span><span class="n">std</span><span class="o">.</span><span class="n">size</span><span class="p">())</span><span class="o">.</span><span class="n">normal_</span><span class="p">()</span>
+        <span class="n">eps</span> <span class="o">=</span> <span class="n">Variable</span><span class="p">(</span><span class="n">eps</span><span class="p">)</span><span class="o">.</span><span class="n">cuda</span><span class="p">()</span>
+        
+        <span class="k">return</span> <span class="n">eps</span><span class="o">.</span><span class="n">mul</span><span class="p">(</span><span class="n">std</span><span class="p">)</span><span class="o">.</span><span class="n">add_</span><span class="p">(</span><span class="n">mu</span><span class="p">)</span>
+    
+    <span class="k">def</span><span class="w"> </span><span class="nf">forward</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span><span class="n">x</span><span class="p">):</span>
+        <span class="n">mu</span><span class="p">,</span> <span class="n">logvar</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">encode</span><span class="p">(</span><span class="n">x</span><span class="p">)</span>
+        <span class="n">reparam</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">reparametrize</span><span class="p">(</span><span class="n">mu</span><span class="p">,</span><span class="n">logvar</span><span class="p">)</span>
+        
+        <span class="k">return</span> <span class="n">mu</span><span class="p">,</span><span class="n">logvar</span><span class="p">,</span><span class="n">reparam</span>
+</pre></div>
+</div>
+</li>
+<li><p><strong>Decoder 구현 code</strong></p>
+<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="k">class</span><span class="w"> </span><span class="nc">Decoder</span><span class="p">(</span><span class="n">nn</span><span class="o">.</span><span class="n">Module</span><span class="p">):</span>
+    <span class="k">def</span><span class="w"> </span><span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+        <span class="nb">super</span><span class="p">(</span><span class="n">Decoder</span><span class="p">,</span><span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span>
+        <span class="bp">self</span><span class="o">.</span><span class="n">fc1</span> <span class="o">=</span> <span class="n">nn</span><span class="o">.</span><span class="n">Linear</span><span class="p">(</span><span class="n">hidden_size</span><span class="p">,</span> <span class="mi">784</span><span class="p">)</span>
+        <span class="bp">self</span><span class="o">.</span><span class="n">sigmoid</span> <span class="o">=</span> <span class="n">nn</span><span class="o">.</span><span class="n">Sigmoid</span><span class="p">()</span>
+    
+    <span class="k">def</span><span class="w"> </span><span class="nf">forward</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span><span class="n">x</span><span class="p">):</span>
+        <span class="n">out</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">fc1</span><span class="p">(</span><span class="n">x</span><span class="p">)</span>
+        <span class="n">out</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">sigmoid</span><span class="p">(</span><span class="n">out</span><span class="p">)</span>
+        <span class="n">out</span> <span class="o">=</span> <span class="n">out</span><span class="o">.</span><span class="n">view</span><span class="p">(</span><span class="n">batch_size</span><span class="p">,</span><span class="mi">28</span><span class="p">,</span><span class="mi">28</span><span class="p">,</span><span class="mi">1</span><span class="p">)</span>
+        
+        <span class="k">return</span> <span class="n">out</span>
+</pre></div>
+</div>
+</li>
+</ul>
+</section>
+<section id="sgvb-stochastic-gradient-variational-bayes">
+<h2>SGVB(Stochastic Gradient Variational Bayes)<a class="headerlink" href="#sgvb-stochastic-gradient-variational-bayes" title="Permalink to this heading">#</a></h2>
+<p>이로써 우리는 marginal likelihood <span class="math notranslate nohighlight">\(p_{\theta}(x)\)</span> 를 최대화시키는 파라미터 <span class="math notranslate nohighlight">\((\theta, \phi)\)</span> 를 찾으면 되고, 이에 대한 lower bound 를 수식적으로 표현하면 손실함수(loss function) 를 다음과 같이 Reconstruction Error 와 Regularization term 로 분할할 수 있습니다.</p>
+<div class="math notranslate nohighlight">
+\[
+L(\theta, \phi;x_i) = \arg \min_{\theta, \phi} \sum_{i} -\mathbb{E}\_{q_{\phi}(z|x_i)}[\log(p(x_i|g_{\theta}(z))] + KL(q_{\phi}(z|x_i)||p(z))
+\]</div>
+<p>Reconstruction Error 는 Decoder 에서 생성하는 데이터가 최대한 원본 데이터와 유사하도록 하는 term 이고, Regularization 은 Encoder 에서 만드는 잠재변수의 분포가 저희가 부여한 prior distribution 이랑 가깝도록 설정하는 term 입니다. 이때, Reconstruction Error 는 다음과 같은 Monte Carlo Estimator <span class="math notranslate nohighlight">\(\tilde{\mathcal{L}}^{B}\)</span> 로 근사값을 구할 수 있고, 하나의 sample 을 계산하는 것도 연산량이 많으므로 논문에서는 sample size <span class="math notranslate nohighlight">\(L\)</span> 을 1 로 설정합니다.</p>
+<div class="math notranslate nohighlight">
+\[ \tilde{\mathcal{L}}^{B}(\theta, \phi; x^{(i)})=-D_{KL}(q_{\phi}(z|x^{(i)})||p_{\theta}(z))+\frac{1}{L}\sum^{L}_{l=1} \log p\_\theta  (x^{(i)}|z^{(i,l)})) \]</div>
+<p>Lower bound 에 대한 수식을 변환하여 수식 전체에 대한 Monte Carlo Estimator <span class="math notranslate nohighlight">\(\tilde{\mathcal{L}}^{A}\)</span> 로도 근사값을 구할 수 있는데 이는 평균적으로 <span class="math notranslate nohighlight">\(\tilde{\mathcal{L}}^{B}\)</span> 에 비해 variance 가 높다고 합니다.</p>
+<div class="math notranslate nohighlight">
+\[ \tilde{\mathcal{L}}^{A}(\theta,\phi;x^{(i)})=\frac{1}{L}\sum^{L}_{l=1}\log p_{\theta}(x^{(i)},z^{(i,l)})-\log q_{\phi}(z^{(i,l)}|x^{(i)}) \]</div>
+</section>
+<section id="reparameterization-trick">
+<h2>Reparameterization Trick<a class="headerlink" href="#reparameterization-trick" title="Permalink to this heading">#</a></h2>
+<p>논문에서는 모델 학습 시 backpropagation 을 원활히 사용할 수 있도록 reparameterization trick 을 소개합니다. 잠재변수 <span class="math notranslate nohighlight">\(z\)</span> 를 Encoder 에서 나온 <span class="math notranslate nohighlight">\(\mu\)</span> 와 <span class="math notranslate nohighlight">\(\sigma\)</span> 로 직접 샘플링하지 않고, backpropagation 이 가능하도록 Gaussian noise 를 우선적으로 샘플링하고 해당 <span class="math notranslate nohighlight">\(\mu\)</span> 와 <span class="math notranslate nohighlight">\(\sigma\)</span> 를 각각 더하고 곱하게 됩니다. 이는 <span class="math notranslate nohighlight">\(q_{\phi}(z|x)\)</span> 이 Gaussian distribution 을 따른다고 설정했을 때이고, <span class="math notranslate nohighlight">\(q_{\phi}(z|x)\)</span> 에 대해 다른 분포를 가정하여 그에 따른 다른 reparameterization trick 을 시도할 수 있다고 논문에 명시되어 있습니다.</p>
+<p>하단에는 Gaussian 분포에 대한 reparameterization trick 를 도식화한 그림입니다. 왼쪽에는 reparameterization trick이 적용되지 않은 경우로 <span class="math notranslate nohighlight">\(\mu\)</span> 나 <span class="math notranslate nohighlight">\(\sigma\)</span> 값이 고정되어 있어도 <span class="math notranslate nohighlight">\(\mathcal{N}(\mu,\sigma^2)\)</span> 에서 샘플링하므로 <span class="math notranslate nohighlight">\(z\)</span> 는 랜덤한 값이 되고 loss function 을 <span class="math notranslate nohighlight">\(\mu\)</span> 나 <span class="math notranslate nohighlight">\(\sigma\)</span> 에 대해 미분할 수 없어 backpropagation 을 적용하기가 어렵습니다. 반면에, 오른쪽처럼 reparameterization trick 을 적용하면, <span class="math notranslate nohighlight">\(z\)</span> 가 deterministic 한 형태로 정의되고 <span class="math notranslate nohighlight">\(\mu\)</span> 나 <span class="math notranslate nohighlight">\(\sigma\)</span> 에 대한 변수로도 미분이 가능해지기 때문에 backpropagation 을 활용하여 모델을 학습시킬 수 있습니다.</p>
+<figure class="align-default" id="id4">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/vae_05.png"><img alt="vae_05" class="bg-primary mb-1" src="../../_images/vae_05.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 4 </span><span class="caption-text">Overview of Reparameterization Trick</span><a class="headerlink" href="#id4" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</section>
+<section id="training-algorithm">
+<h2>Training Algorithm<a class="headerlink" href="#training-algorithm" title="Permalink to this heading">#</a></h2>
+<p>모델을 학습하는 전체적인 과정은 아래와 같습니다.</p>
+<figure class="align-default" id="id5">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/vae_09.png"><img alt="vae_09" class="bg-primary mb-1" src="../../_images/vae_09.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 5 </span><span class="caption-text">Overview of Training Algorithm</span><a class="headerlink" href="#id5" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</section>
+<section id="experiments">
+<h2>Experiments<a class="headerlink" href="#experiments" title="Permalink to this heading">#</a></h2>
+<p>논문에서는 MNIST 와 Frey Face 데이터셋에 대해 AEVB(Auto-Encoder variational Bayesian)와 wake-sleep 알고리즘를 적용해서 비교합니다. 여기서 Frey Face 데이터셋은 continuous 하므로 Gaussian Decoder 를 사용합니다. 실험결과는 아래 그림과 같습니다.</p>
+<figure class="align-default" id="id6">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/vae_10.png"><img alt="vae_10" class="bg-primary mb-1" src="../../_images/vae_10.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 6 </span><span class="caption-text">Experimental Results - Likelihood lower bound</span><a class="headerlink" href="#id6" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>해당 그림처럼 lower bound 를 최적화하는데 AEVB 알고리즘이 더 빠르게 수렴하며 모든 실험에서 성능적으로도 더 나은 부분을 확인할 수 있습니다.</p>
+<figure class="align-default" id="id7">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/vae_11.png"><img alt="vae_11" class="bg-primary mb-1" src="../../_images/vae_11.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 7 </span><span class="caption-text">Experimental Results - Marginal likelihood</span><a class="headerlink" href="#id7" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>또한, latent variable <span class="math notranslate nohighlight">\(z\)</span> 의 차원이 작으면 marginal likelihood 를 직접 Monte Carlo EM 을 할용하여 구할 수 있는데, 이에 대한 결과도 논문에서 보여줍니다. Monte Carlo EM 의 경우 학습 데이터가 많으면 수렴이 되지 않는 부분을 확인할 수 있습니다.</p>
+</section>
+<section id="summary">
+<h2>Summary<a class="headerlink" href="#summary" title="Permalink to this heading">#</a></h2>
+<p>AutoEncoder 는 latent space 에 하나의 값으로 지정해줬다면, VAE 는 평균 그리고 분산 파라미터들과 Gaussian 분포를 가진 샘플을 통해 잠재변수를 생성합니다. 그리고 VAE 를 실제로 사용해보면 생성된 데이터 image quality 가 낮다는 단점을 가지고 있다고 합니다.</p>
+</section>
+</section>
+
+    <script type="text/x-thebe-config">
+    {
+        requestKernel: true,
+        binderOptions: {
+            repo: "binder-examples/jupyter-stacks-datascience",
+            ref: "master",
+        },
+        codeMirrorConfig: {
+            theme: "abcdef",
+            mode: "python"
+        },
+        kernelOptions: {
+            name: "python3",
+            path: "./docs\review"
+        },
+        predefinedOutput: true
+    }
+    </script>
+    <script>kernelName = 'python3'</script>
+
+                </article>
+              
+
+              
+              
+              
+              
+                <footer class="prev-next-footer d-print-none">
+                  
+<div class="prev-next-area">
+    <a class="left-prev"
+       href="../../intro.html"
+       title="previous page">
+      <i class="fa-solid fa-angle-left"></i>
+      <div class="prev-next-info">
+        <p class="prev-next-subtitle">previous</p>
+        <p class="prev-next-title">Welcome to PseudoDiffusers!!</p>
+      </div>
+    </a>
+    <a class="right-next"
+       href="gan.html"
+       title="next page">
+      <div class="prev-next-info">
+        <p class="prev-next-subtitle">next</p>
+        <p class="prev-next-title">GAN</p>
+      </div>
+      <i class="fa-solid fa-angle-right"></i>
+    </a>
+</div>
+                </footer>
+              
+            </div>
+            
+            
+              
+                <div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
+
+
+  <div class="sidebar-secondary-item">
+  <div class="page-toc tocsection onthispage">
+    <i class="fa-solid fa-list"></i> Contents
+  </div>
+  <nav class="bd-toc-nav page-toc">
+    <ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">Introduction</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#intractability">Intractability</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#sgvb-stochastic-gradient-variational-bayes">SGVB(Stochastic Gradient Variational Bayes)</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#reparameterization-trick">Reparameterization Trick</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#training-algorithm">Training Algorithm</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#experiments">Experiments</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#summary">Summary</a></li>
+</ul>
+  </nav></div>
+
+</div></div>
+              
+            
+          </div>
+          <footer class="bd-footer-content">
+            
+<div class="bd-footer-content__inner container">
+  
+  <div class="footer-item">
+    
+<p class="component-author">
+By PseudoLab
+</p>
+
+  </div>
+  
+  <div class="footer-item">
+    
+
+  <p class="copyright">
+    
+      © Copyright 2022.
+      <br/>
+    
+  </p>
+
+  </div>
+  
+  <div class="footer-item">
+    
+  </div>
+  
+  <div class="footer-item">
+    
+  </div>
+  
+</div>
+          </footer>
+        
+
+      </main>
+    </div>
+  </div>
+  
+  <!-- Scripts loaded after <body> so the DOM is not blocked -->
+  <script src="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b"></script>
+<script src="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b"></script>
+
+  <footer class="bd-footer">
+  </footer>
+  </body>
 </html>
\ No newline at end of file
diff --git a/docs/review/zero123plus.html b/docs/review/zero123plus.html
new file mode 100755
index 00000000..910a3fa9
--- /dev/null
+++ b/docs/review/zero123plus.html
@@ -0,0 +1,852 @@
+
+
+<!DOCTYPE html>
+
+
+<html lang="en" data-content_root="" >
+
+  <head>
+    <meta charset="utf-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.17.1: http://docutils.sourceforge.net/" />
+
+    <title>Zero123++ &#8212; Text-to-Image Generation-feat-Diffusion</title>
+  
+  
+  
+  <script data-cfasync="false">
+    document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
+    document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
+  </script>
+  
+  <!-- Loaded before other Sphinx assets -->
+  <link href="../../_static/styles/theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+<link href="../../_static/styles/bootstrap.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+<link href="../../_static/styles/pydata-sphinx-theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+
+  
+  <link href="../../_static/vendor/fontawesome/6.5.2/css/all.min.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+  <link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.woff2" />
+<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.woff2" />
+<link rel="preload" as="font" type="font/woff2" crossorigin href="../../_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.woff2" />
+
+    <link rel="stylesheet" type="text/css" href="../../_static/pygments.css" />
+    <link rel="stylesheet" href="../../_static/styles/sphinx-book-theme.css?digest=14f4ca6b54d191a8c7657f6c759bf11a5fb86285" type="text/css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/togglebutton.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/mystnb.4510f1fc1dee50b3e5859aac5469c37c29e427902b24a333a5f9fcb2f0b3ac41.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/sphinx-thebe.css" />
+    <link rel="stylesheet" type="text/css" href="../../_static/design-style.4045f2051d55cab465a707391d5b2007.min.css" />
+  
+  <!-- Pre-loaded scripts that we'll load fully later -->
+  <link rel="preload" as="script" href="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b" />
+<link rel="preload" as="script" href="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b" />
+  <script src="../../_static/vendor/fontawesome/6.5.2/js/all.min.js?digest=dfe6caa3a7d634c4db9b"></script>
+
+    <script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
+    <script src="../../_static/jquery.js"></script>
+    <script src="../../_static/underscore.js"></script>
+    <script src="../../_static/_sphinx_javascript_frameworks_compat.js"></script>
+    <script src="../../_static/doctools.js"></script>
+    <script src="../../_static/clipboard.min.js"></script>
+    <script src="../../_static/copybutton.js"></script>
+    <script src="../../_static/scripts/sphinx-book-theme.js?digest=5a5c038af52cf7bc1a1ec88eea08e6366ee68824"></script>
+    <script>let toggleHintShow = 'Click to show';</script>
+    <script>let toggleHintHide = 'Click to hide';</script>
+    <script>let toggleOpenOnPrint = 'true';</script>
+    <script src="../../_static/togglebutton.js"></script>
+    <script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
+    <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
+    <script src="../../_static/design-tabs.js"></script>
+    <script>const THEBE_JS_URL = "https://unpkg.com/thebe@0.8.2/lib/index.js"
+const thebe_selector = ".thebe,.cell"
+const thebe_selector_input = "pre"
+const thebe_selector_output = ".output, .cell_output"
+</script>
+    <script async="async" src="../../_static/sphinx-thebe.js"></script>
+    <script>window.MathJax = {"options": {"processHtmlClass": "tex2jax_process|mathjax_process|math|output_area"}}</script>
+    <script defer="defer" src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
+    <script>DOCUMENTATION_OPTIONS.pagename = 'docs/review/zero123plus';</script>
+    <link rel="shortcut icon" href="../../_static/PseudoLab_logo.png"/>
+    <link rel="index" title="Index" href="../../genindex.html" />
+    <link rel="search" title="Search" href="../../search.html" />
+    <link rel="next" title="ProlificDreamer: High-Fidelity and Diverse Text-to-3D Generation with Variational Score Distillation" href="ProlificDreamer.html" />
+    <link rel="prev" title="Dream Booth 3D" href="DreamBooth3D.html" />
+  <meta name="viewport" content="width=device-width, initial-scale=1"/>
+  <meta name="docsearch:language" content="en"/>
+  </head>
+  
+  
+  <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
+
+  
+  
+  <div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
+  
+  <div id="pst-scroll-pixel-helper"></div>
+  
+  <button type="button" class="btn rounded-pill" id="pst-back-to-top">
+    <i class="fa-solid fa-arrow-up"></i>Back to top</button>
+
+  
+  <input type="checkbox"
+          class="sidebar-toggle"
+          id="pst-primary-sidebar-checkbox"/>
+  <label class="overlay overlay-primary" for="pst-primary-sidebar-checkbox"></label>
+  
+  <input type="checkbox"
+          class="sidebar-toggle"
+          id="pst-secondary-sidebar-checkbox"/>
+  <label class="overlay overlay-secondary" for="pst-secondary-sidebar-checkbox"></label>
+  
+  <div class="search-button__wrapper">
+    <div class="search-button__overlay"></div>
+    <div class="search-button__search-container">
+<form class="bd-search d-flex align-items-center"
+      action="../../search.html"
+      method="get">
+  <i class="fa-solid fa-magnifying-glass"></i>
+  <input type="search"
+         class="form-control"
+         name="q"
+         id="search-input"
+         placeholder="Search this book..."
+         aria-label="Search this book..."
+         autocomplete="off"
+         autocorrect="off"
+         autocapitalize="off"
+         spellcheck="false"/>
+  <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
+</form></div>
+  </div>
+
+  <div class="pst-async-banner-revealer d-none">
+  <aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
+</div>
+
+  
+    <header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
+    </header>
+  
+
+  <div class="bd-container">
+    <div class="bd-container__inner bd-page-width">
+      
+      
+      
+      <div class="bd-sidebar-primary bd-sidebar">
+        
+
+  
+  <div class="sidebar-header-items sidebar-primary__section">
+    
+    
+    
+    
+  </div>
+  
+    <div class="sidebar-primary-items__start sidebar-primary__section">
+        <div class="sidebar-primary-item">
+
+  
+    
+  
+
+<a class="navbar-brand logo" href="../../intro.html">
+  
+  
+  
+  
+  
+    
+    
+      
+    
+    
+    <img src="../../_static/PseudoLab_logo.png" class="logo__image only-light" alt="Text-to-Image Generation-feat-Diffusion - Home"/>
+    <script>document.write(`<img src="../../_static/PseudoLab_logo.png" class="logo__image only-dark" alt="Text-to-Image Generation-feat-Diffusion - Home"/>`);</script>
+  
+  
+</a></div>
+        <div class="sidebar-primary-item"><nav class="bd-links" id="bd-docs-nav" aria-label="Main">
+    <div class="bd-toc-item navbar-nav active">
+        
+        <ul class="nav bd-sidenav bd-sidenav__home-link">
+            <li class="toctree-l1">
+                <a class="reference internal" href="../../intro.html">
+                    Welcome to PseudoDiffusers!!
+                </a>
+            </li>
+        </ul>
+        <p aria-level="2" class="caption" role="heading"><span class="caption-text">Preliminary Works</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="vae.html">VAE</a></li>
+<li class="toctree-l1"><a class="reference internal" href="gan.html">GAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DDPM.html">DDPM</a></li>
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DDIM.html">DDIM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="A_Study_on_the_Evaluation_of_Generative_Models.html">A Study on the Evaluation of Generative Models</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Image Generation</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="cycleGAN.html">CycleGAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="StyleGAN.html">StyleGAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="diffusion_beats_GANs.html">Diffusion Models Beat GANs on Image Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="dalle.html">DALL-E</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DALLE2.html">DALL-E 2</a></li>
+<li class="toctree-l1"><a class="reference internal" href="dreambooth.html">DreamBooth</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ControlNet.html">ControlNet</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Latent_Diffusion_Model.html">Introduction</a></li>
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Textual_Inversion.html">Textual Inversion</a></li>
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="CustomDiffusion.html">Custom Diffusion</a></li>
+<li class="toctree-l1"><a class="reference internal" href="LoRA.html">LoRA</a></li>
+
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="I-DDPM.html">I-DDPM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="StyO.html">StyO</a></li>
+<li class="toctree-l1"><a class="reference internal" href="imagen.html">Imagen</a></li>
+<li class="toctree-l1"><a class="reference internal" href="imagen_editor.html">Imagen Editor</a></li>
+<li class="toctree-l1"><a class="reference internal" href="SDEdit.html">SDEdit</a></li>
+<li class="toctree-l1"><a class="reference internal" href="SDXL.html">SDXL</a></li>
+<li class="toctree-l1"><a class="reference internal" href="t2i_adapter.html">T2I-Adapter</a></li>
+<li class="toctree-l1"><a class="reference internal" href="IP_Adapter.html">IP-Adapter</a></li>
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="HyperDreamBooth.html">HyperDreamBooth</a></li>
+<li class="toctree-l1"><a class="reference internal" href="CM3leon.html">CM3leon</a></li>
+
+<li class="toctree-l1"><a class="reference internal" href="Synthetic_Data_from_Diffusion_Models_Improves_ImageNet_Classification.html">Synthetic Data from Diffusion Models Improves ImageNet Classification</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="GLIDE.html">GLIDE</a></li>
+<li class="toctree-l1"><a class="reference internal" href="BBDM.html">BBDM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Your_Diffusion_Model_is_Secretly_a_Zero_Shot_Classifier.html">Your Diffusion Model is Secretly a Zero-Shot Classifier</a></li>
+<li class="toctree-l1"><a class="reference internal" href="progressive_distillation.html">Progressive Distillation for Fast Sampling of Diffusion Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ConceptLab.html">ConceptLab</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Diffusion_models_already_have_a_Semantic_Latent_Space.html">Diffusion Models already have a Semantic Latent Space</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Muse.html">Muse</a></li>
+
+
+<li class="toctree-l1"><a class="reference internal" href="GIGAGAN.html">Scaling up GANs for Text-to-Image Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="consistency_models.html">Consistency Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="latent_consistency_models.html">Latent Consistency Models</a></li>
+
+<li class="toctree-l1"><a class="reference internal" href="LLM_grounded_Diffusion.html">LLM Grounded Diffusion</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DiT.html">DiT</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="one-step-image-translation.html">One-Step Image Translation with Text-to-Image Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="LCM-LoRA.html">LCM-LoRA: A Universal Stable-Diffusion Acceleration Module</a></li>
+
+
+<li class="toctree-l1"><a class="reference internal" href="MimicBrush.html">MimicBrush: Zero-shot Image Editing with Reference Imitation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="one_step_diffusion_with_distribution_matching_distillation.html">One-step Diffusion with Distribution Matching Distillation</a></li>
+
+
+
+
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Video Generation</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="Make_A_Video.html">Make A Video</a></li>
+<li class="toctree-l1"><a class="reference internal" href="VideoLDM.html">VideoLDM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="AnimateDiff.html">AnimateDiff</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Animate_Anyone.html">Animate Anyone</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreaMoving.html">DreaMoving</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreamPose.html">DreamPose: Fashion Image-to-Video Synthesis via Stable Diffusion</a></li>
+
+
+
+
+
+
+
+
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">3D Generation</span></p>
+<ul class="current nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="NeRF.html">NeRF : Representing Scenes as Neural Radiance Fields for View Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="3DGS.html">3D Gaussian Splatting for Real-Time Radiance Field Rendering</a></li>
+<li class="toctree-l1"><a class="reference internal" href="Point_E.html">Point-E: A System for Generating 3D Point Clouds from Complex Prompts (Arxiv 2022)</a></li>
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Shap-E.html">Shap-E</a></li>
+
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DreamFusion.html"><strong>DreamFusion</strong></a></li>
+<li class="toctree-l1"><a class="reference internal" href="magic-3d.html">Magic3D</a></li>
+<li class="toctree-l1"><a class="reference internal" href="DreamBooth3D.html">Dream Booth 3D</a></li>
+
+
+
+<li class="toctree-l1 current active"><a class="current reference internal" href="#">Zero123++</a></li>
+<li class="toctree-l1"><a class="reference internal" href="ProlificDreamer.html">ProlificDreamer: High-Fidelity and Diverse Text-to-3D Generation with Variational Score Distillation</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="DreamGaussian.html">DreamGaussian</a></li>
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="Coin3D.html">Coin3D</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Experiments</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="../experiments/js_exp.html">Synthetic Data with Stable Diffusion for Foliar Disease Classification</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../experiments/swjo_exp.html">Training DreamBooth on Naver Webtoon Face Dataset</a></li>
+</ul>
+
+    </div>
+</nav></div>
+    </div>
+  
+  
+  <div class="sidebar-primary-items__end sidebar-primary__section">
+  </div>
+  
+  <div id="rtd-footer-container"></div>
+
+
+      </div>
+      
+      <main id="main-content" class="bd-main" role="main">
+        
+        
+
+<div class="sbt-scroll-pixel-helper"></div>
+
+          <div class="bd-content">
+            <div class="bd-article-container">
+              
+              <div class="bd-header-article d-print-none">
+<div class="header-article-items header-article__inner">
+  
+    <div class="header-article-items__start">
+      
+        <div class="header-article-item"><label class="sidebar-toggle primary-toggle btn btn-sm" for="__primary" title="Toggle primary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
+  <span class="fa-solid fa-bars"></span>
+</label></div>
+      
+    </div>
+  
+  
+    <div class="header-article-items__end">
+      
+        <div class="header-article-item">
+
+<div class="article-header-buttons">
+
+
+
+
+
+<div class="dropdown dropdown-source-buttons">
+  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Source repositories">
+    <i class="fab fa-github"></i>
+  </button>
+  <ul class="dropdown-menu">
+      
+      
+      
+      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion" target="_blank"
+   class="btn btn-sm btn-source-repository-button dropdown-item"
+   title="Source repository"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fab fa-github"></i>
+  </span>
+<span class="btn__text-container">Repository</span>
+</a>
+</li>
+      
+      
+      
+      
+      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion/issues/new?title=Issue%20on%20page%20%2Fdocs/review/zero123plus.html&body=Your%20issue%20content%20here." target="_blank"
+   class="btn btn-sm btn-source-issues-button dropdown-item"
+   title="Open an issue"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-lightbulb"></i>
+  </span>
+<span class="btn__text-container">Open issue</span>
+</a>
+</li>
+      
+  </ul>
+</div>
+
+
+
+
+
+
+<div class="dropdown dropdown-download-buttons">
+  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Download this page">
+    <i class="fas fa-download"></i>
+  </button>
+  <ul class="dropdown-menu">
+      
+      
+      
+      <li><a href="../../_sources/docs/review/zero123plus.md" target="_blank"
+   class="btn btn-sm btn-download-source-button dropdown-item"
+   title="Download source file"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-file"></i>
+  </span>
+<span class="btn__text-container">.md</span>
+</a>
+</li>
+      
+      
+      
+      
+      <li>
+<button onclick="window.print()"
+  class="btn btn-sm btn-download-pdf-button dropdown-item"
+  title="Print to PDF"
+  data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-file-pdf"></i>
+  </span>
+<span class="btn__text-container">.pdf</span>
+</button>
+</li>
+      
+  </ul>
+</div>
+
+
+
+
+<button onclick="toggleFullScreen()"
+  class="btn btn-sm btn-fullscreen-button"
+  title="Fullscreen mode"
+  data-bs-placement="bottom" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-expand"></i>
+  </span>
+
+</button>
+
+
+
+<script>
+document.write(`
+  <button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light"></i>
+    <i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark"></i>
+    <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"></i>
+  </button>
+`);
+</script>
+
+
+<script>
+document.write(`
+  <button class="btn btn-sm pst-navbar-icon search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <i class="fa-solid fa-magnifying-glass fa-lg"></i>
+  </button>
+`);
+</script>
+<label class="sidebar-toggle secondary-toggle btn btn-sm" for="__secondary"title="Toggle secondary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <span class="fa-solid fa-list"></span>
+</label>
+</div></div>
+      
+    </div>
+  
+</div>
+</div>
+              
+              
+
+<div id="jb-print-docs-body" class="onlyprint">
+    <h1>Zero123++</h1>
+    <!-- Table of contents -->
+    <div id="print-main-content">
+        <div id="jb-print-toc">
+            
+            <div>
+                <h2> Contents </h2>
+            </div>
+            <nav aria-label="Page">
+                <ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">1. Introduction</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#improving-consistency-and-conditioning">2. Improving Consistency and Conditioning</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#multi-view-generation">2.1. Multi-view Generation</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#consistency-and-stability-noise-schedule">2.2. Consistency and Stability: Noise Schedule</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#local-condition-scaled-reference-attention">2.3. Local Condition: Scaled Reference Attention</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#global-condition-flexdiffuse">2.4. Global Condition: FlexDiffuse</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#putting-everything-together">2.5. Putting Everything Together</a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#comparison-to-the-state-of-the-art">3. Comparison to the State of the Art</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#image-to-multi-view">3.1. Image to Multi-view</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#text-to-multi-view">3.2. Text to Multi-view</a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#depth-controlnet-for-zero123">4. Depth ControlNet for Zero123++</a></li>
+</ul>
+            </nav>
+        </div>
+    </div>
+</div>
+
+              
+                
+<div id="searchbox"></div>
+                <article class="bd-article">
+                  
+  <div class="admonition-information admonition">
+<p class="admonition-title">Information</p>
+<ul class="simple">
+<li><p><strong>Title:</strong> Zero123++: a Single Image to Consistent Multi-view Diffusion Base Model</p></li>
+<li><p><strong>Reference</strong></p>
+<ul>
+<li><p>Paper: <a class="reference external" href="https://arxiv.org/abs/2310.15110">https://arxiv.org/abs/2310.15110</a></p></li>
+<li><p>Code: <a class="github reference external" href="https://github.com/SUDO-AI-3D/zero123plus">SUDO-AI-3D/zero123plus</a></p></li>
+</ul>
+</li>
+<li><p><strong>Author:</strong> Sangwoo Jo</p></li>
+<li><p><strong>Last updated on Dec. 16, 2024</strong></p></li>
+</ul>
+</div>
+<section class="tex2jax_ignore mathjax_ignore" id="zero123">
+<h1>Zero123++<a class="headerlink" href="#zero123" title="Permalink to this heading">#</a></h1>
+<section id="introduction">
+<h2>1. Introduction<a class="headerlink" href="#introduction" title="Permalink to this heading">#</a></h2>
+<figure class="align-default" id="id1">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/zero123plus_01.png"><img alt="zero123plus_01" class="bg-primary mb-1" src="../../_images/zero123plus_01.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 742 </span><span class="caption-text">High-quality, consistent multi-view 3D images from Zero123++</span><a class="headerlink" href="#id1" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>Zero-1-to-3(Zero123) 논문이 zero-shot 형태로 single-image-to-3D conversion 을 하는 기법을 처음으로 소개하였습니다. 하지만 해당 방식으로는 주어진 view 에 대해서 독립적으로 객체를 생성하게 됨으로써 multi-view consistency 에서 부족한 부분을 보여주게 된다고 설명합니다. Zero123++ 논문에서 이를 해결하기 위해 여섯개의 view 로부터 하나의 이미지를 생성하여 multi-view 에 대한 joint distirbution 을 학습할 수 있도록 설정합니다.</p>
+<p>또한, Zero-1-to-3 논문에서 다음과 같은 한계점이 있다고 제시합니다.</p>
+<p>a) 첫번째로 global 및 local conditioning mechanism 을 비롯한 Stable Diffusion model prior 를 효율적으로 사용하지 않았고,</p>
+<p>b) 두번째로 Zero-1-to-3 논문에서 512x512 이미지 해상도로 학습 시 불안정하게 수렴하게 되어 256x256 해상도로 줄인 부분에 대해 논문 저자는 원인을 분석하며 새로운 scheduling 기법을 소개합니다.</p>
+</section>
+<section id="improving-consistency-and-conditioning">
+<h2>2. Improving Consistency and Conditioning<a class="headerlink" href="#improving-consistency-and-conditioning" title="Permalink to this heading">#</a></h2>
+<section id="multi-view-generation">
+<h3>2.1. Multi-view Generation<a class="headerlink" href="#multi-view-generation" title="Permalink to this heading">#</a></h3>
+<p>Zero-1-to-3 모델은 단일 이미지를 독립적으로 생성하며 multi-view 이미지에 대한 상관관계를 학습 혹은 생성 시에 고려하지 않습니다. 따라서, Zero123++에서는 3×2 layout 의 6개 이미지를 단일 프레임으로 tiling 하여 multiple image 에 대한 joint distribution 을 학습하게 됩니다.</p>
+<p>Objaverse 데이터셋은 기본적으로 gravity axis 은 동일하지만 객체들이 일관된 canonical pose 를 가지고 있지 않습니다. 따라서 절대적인 camera pose 를 기반으로 해당 데이터셋을 학습하게 되면 객체의 orientation 을 학습하는데 어려움이 있다고 주장합니다.</p>
+<p>반면에 Zero-1-to-3 는 input view 에 대한 상대적인 camera pose(elevation/azimuth angle) 을 입력받아 학습하였습니다. 그러나 해당 방식을 활용한다면 novel view 에 대한 relative pose 를 구하기 위해서는 input view 에 대한 elevation angle 을 사전에 알아야 한다는 단점이 있습니다. 후속적으로 One-2-3-45 그리고 DreamGaussian 논문에서 elevation angle 을 추가적으로 예측하는 모듈을 정의하고, 이에 따라 오차율도 증가하게 됩니다.</p>
+<ul>
+<li><p>Elevation/Azimuth angle 이란?</p>
+<figure class="align-default" id="id2">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/zero123plus_02.png"><img alt="zero123plus_02" class="bg-primary mb-1" src="../../_images/zero123plus_02.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 743 </span><span class="caption-text">Elevation/Azimuth angle</span><a class="headerlink" href="#id2" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</li>
+</ul>
+<p>이러한 문제를 해결하기 위해 elevation angle 을 고정시킨 상태에서 상대적인 azimuth angle 을 통한 novel view pose 를 정의합니다. 더 자세하게는 6개의 pose 를 아래 사진과 같이 정의하게 됩니다.</p>
+<figure class="align-default" id="id3">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/zero123plus_03.png"><img alt="zero123plus_03" class="bg-primary mb-1" src="../../_images/zero123plus_03.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 744 </span><span class="caption-text">3x2 layout of Zero123++ prediction</span><a class="headerlink" href="#id3" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</section>
+<section id="consistency-and-stability-noise-schedule">
+<h3>2.2. Consistency and Stability: Noise Schedule<a class="headerlink" href="#consistency-and-stability-noise-schedule" title="Permalink to this heading">#</a></h3>
+<p>Stable Diffusion 모델에서 사용되었던 scaled-linear schedule 은 local detail 을 학습하는데 초점을 두고 Signal-to-Noise Ratio (SNR) 가 낮은 timestep 이 극히 드뭅니다. SNR 이 낮은 구간에서 global low frequency 정보들을 학습하게 되며 해당 단계에서 step 수가 적으면 구조적인 변형이 클 수가 있습니다. 따라서, 이러한 scheduling 은 단일 이미지를 생성하는데는 유용하지만 multi-view consistent 한 이미지를 생성하는데 한계가 있다고 주장합니다.</p>
+<figure class="align-default" id="id4">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/zero123plus_04.png"><img alt="zero123plus_04" class="bg-primary mb-1" src="../../_images/zero123plus_04.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 745 </span><span class="caption-text">Linear vs Scaled linear schedule</span><a class="headerlink" href="#id4" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>또한, 동일한 noise 가 주입되었을때 고해상도 이미지가 저해상도 이미지에 비해 noise level 이 적기 때문에, Zero-1-to-3 모델에서 고해상도 이미지를 학습하였을 때 불안정한 모습을 보여주었던 것도 동일한 원인 때문이라고 설명합니다.</p>
+<p>Zero123++ 에서는 scaled-linear schedule 대신에 linear schedule 를 사용하게 되고, 변화된 schedule 에 따라 <span class="math notranslate nohighlight">\(x\)</span>-prediction, <span class="math notranslate nohighlight">\(\epsilon\)</span>-prediction 모델보다 <span class="math notranslate nohighlight">\(v\)</span>-prediction 모델이 더 안정적으로 학습되었다고 합니다. 따라서, Stable Diffusion 2 <span class="math notranslate nohighlight">\(v\)</span>-prediction 모델로 fine-tuning 을 진행하였다고 합니다.</p>
+</section>
+<section id="local-condition-scaled-reference-attention">
+<h3>2.3. Local Condition: Scaled Reference Attention<a class="headerlink" href="#local-condition-scaled-reference-attention" title="Permalink to this heading">#</a></h3>
+<p>기존에 Zero-1-to-3 논문에서 noisy input 과 conditioned image(single-view input) 가 feature-wise concatenate 하는데 해당 방식으로는 pixel-wise spatial correspondence 가 정확하지 않다고 합니다.</p>
+<p>Zero123++ 에서는 이 부분을 보완하여 Reference Attention 이라는 기법을 소개합니다. Reference Attention 이란, 아래 그림과 같이 noisy latent 와 conditioned latent 간에 self-attention 모듈에서의 key, value 값을 추가하여 연산 작업을 진행합니다. 이때, noisy input 에 주입된 noise 를 동일하게 conditioned image 에 적용하였다고 합니다.</p>
+<figure class="align-default" id="id5">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/zero123plus_05.png"><img alt="zero123plus_05" class="bg-primary mb-1" src="../../_images/zero123plus_05.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 746 </span><span class="caption-text">Reference Attention</span><a class="headerlink" href="#id5" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>Reference Attention 기법을 적용한 결과, fine-tuning 작업을 진행하지 않아도 reference image 에서의 semantic content 와 texture 가 잘 반영되었습니다. 또한, fine-tuning 을 하였을때 reference latent 을 5x scaling 하였을때 reference image 와의 일관성을 가장 잘 보여주었다고 합니다.</p>
+<figure class="align-default" id="id6">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/zero123plus_06.png"><img alt="zero123plus_06" class="bg-primary mb-1" src="../../_images/zero123plus_06.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 747 </span><span class="caption-text">Comparison on local conditioning</span><a class="headerlink" href="#id6" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</section>
+<section id="global-condition-flexdiffuse">
+<h3>2.4. Global Condition: FlexDiffuse<a class="headerlink" href="#global-condition-flexdiffuse" title="Permalink to this heading">#</a></h3>
+<p>Zero123++ 논문에서 추가적으로 FlexDiffuse 에서 소개한 linear guidance mechanism 을 활용하여 fine-tuning 범위를 최소화하는 선에서 global image conditioning 하였습니다.</p>
+<p>더 자세하게는, <span class="math notranslate nohighlight">\(L \times D\)</span> 차원의 prompt embedding <span class="math notranslate nohighlight">\(T\)</span> 와 <span class="math notranslate nohighlight">\(D\)</span> 차원의 CLIP global image embedding <span class="math notranslate nohighlight">\(I\)</span> 에 global weight <span class="math notranslate nohighlight">\(w_i\)</span> 를 곱한 값을 더하여 모델에 입력합니다. 이때, <span class="math notranslate nohighlight">\(L\)</span> 은 token length 이고 <span class="math notranslate nohighlight">\(D\)</span> 는 token embedding 의 차원 크기입니다. 이때, <span class="math notranslate nohighlight">\(w_i = \frac{i}{L}\)</span> 로 초기 가중치 값을 설정하였습니다. Text condition 이 없을 경우에는 empty prompt 를 encoding 하여 <span class="math notranslate nohighlight">\(T\)</span> 를 얻게 됩니다.</p>
+<figure class="align-default" id="id7">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/zero123plus_07.png"><img alt="zero123plus_07" class="bg-primary mb-1" src="../../_images/zero123plus_07.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 748 </span><span class="caption-text">FlexDiffuse’s linear guidance</span><a class="headerlink" href="#id7" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p>위와 같은 global conditioning 을 하였을때, 보이지 않은 unseen region 에서도 semantic 한 정보들을 유지한채 이미지를 잘 생성하는 부분을 확인할 수 있습니다.</p>
+<figure class="align-default" id="id8">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/zero123plus_08.png"><img alt="zero123plus_08" class="bg-primary mb-1" src="../../_images/zero123plus_08.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 749 </span><span class="caption-text">Ablation on global conditioning</span><a class="headerlink" href="#id8" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</section>
+<section id="putting-everything-together">
+<h3>2.5. Putting Everything Together<a class="headerlink" href="#putting-everything-together" title="Permalink to this heading">#</a></h3>
+<p>정리하자면 해당 논문은 Stable Diffusion 2 <span class="math notranslate nohighlight">\(v\)</span>-model 을 사용하였고, Objaverse 데이터를 random HDRI environment lighting 를 적용하여 렌더링한 데이터에 학습하였습니다. 그리고 Stable Diffusion Image Variations model 의 학습 방식을 도입하여 two-stage 로 학습을 진행하였습니다.</p>
+<p>첫번째 phase 에서는 self-attention layer 와 cross-attention layer 의 KV 행렬만 fine-tuning 을 하였고, AdamW optimizer 와 cosine annealing schedule 을 사용하였습니다. 두번째 phase 에서는 UNet 모델 전체를 학습하고 <span class="math notranslate nohighlight">\(5 \times 10^{-6}\)</span> 값의 constant learning rate 를 사용하였습니다. 그리고 학습 과정을 더 효율적으로 하기 위해 Min-SNR weighting 기법도 활용하였습니다.</p>
+</section>
+</section>
+<section id="comparison-to-the-state-of-the-art">
+<h2>3. Comparison to the State of the Art<a class="headerlink" href="#comparison-to-the-state-of-the-art" title="Permalink to this heading">#</a></h2>
+<section id="image-to-multi-view">
+<h3>3.1. Image to Multi-view<a class="headerlink" href="#image-to-multi-view" title="Permalink to this heading">#</a></h3>
+<p><strong>Qualitative Comparison</strong></p>
+<p>논문에서 Zero-1-to-3 XL 그리고 SyncDreamer 모델과의 성능을 비교합니다. Zero123++ 모델이 unseen view 에 대해서 가장 월등하게 이미지를 생성하는 것을 확인할 수 있습니다.</p>
+<figure class="align-default" id="id9">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/zero123plus_09.png"><img alt="zero123plus_09" class="bg-primary mb-1" src="../../_images/zero123plus_09.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 750 </span><span class="caption-text">Qualitative comparison on image to multi-view task</span><a class="headerlink" href="#id9" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+<p><strong>Quantitative Comparison</strong></p>
+<p>정량적으로 LPIPS 지표를 기준으로 비교하였을 때에도 Zero123++ 모델이 가장 좋은 성능을 보여주고 있습니다. 이때, 모델이 생성된 6개의 이미지와 Objaverse 데이터셋을 렌더링한 6개의 이미지를 각각 결합하여 LPIPS 를 측정하였다고 합니다.</p>
+<figure class="align-default" id="id10">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/zero123plus_10.png"><img alt="zero123plus_10" class="bg-primary mb-1" src="../../_images/zero123plus_10.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 751 </span><span class="caption-text">Quantitative Comparison on image to multi-view task</span><a class="headerlink" href="#id10" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</section>
+<section id="text-to-multi-view">
+<h3>3.2. Text to Multi-view<a class="headerlink" href="#text-to-multi-view" title="Permalink to this heading">#</a></h3>
+<p>Text 를 입력받아 우선적으로 SDXL 모델을 통해 단일 이미지를 생성한 후, Zero123++ 모델을 적용한 결과입니다. MVDream 과 Zero-1-to-3 XL 모델과 비교하였을 때, Zero123++ 모델이 가장 realistic 하고 multi-view consistent 한 이미지를 생성하는 부분을 확인할 수 있습니다.</p>
+<figure class="align-default" id="id11">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/zero123plus_11.png"><img alt="zero123plus_11" class="bg-primary mb-1" src="../../_images/zero123plus_11.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 752 </span><span class="caption-text">Qualitative comparison on text to multi-view task</span><a class="headerlink" href="#id11" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</section>
+</section>
+<section id="depth-controlnet-for-zero123">
+<h2>4. Depth ControlNet for Zero123++<a class="headerlink" href="#depth-controlnet-for-zero123" title="Permalink to this heading">#</a></h2>
+<p>아래 사진은 추가적으로 렌더링한 depth map 를 기반으로 ControlNet 을 학습한 결과입니다.</p>
+<figure class="align-default" id="id12">
+<a class="bg-primary mb-1 reference internal image-reference" href="../../_images/zero123plus_12.png"><img alt="zero123plus_12" class="bg-primary mb-1" src="../../_images/zero123plus_12.png" style="width: 700px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 753 </span><span class="caption-text">Depth-controlled Zero123++</span><a class="headerlink" href="#id12" title="Permalink to this image">#</a></p>
+</figcaption>
+</figure>
+</section>
+</section>
+
+    <script type="text/x-thebe-config">
+    {
+        requestKernel: true,
+        binderOptions: {
+            repo: "binder-examples/jupyter-stacks-datascience",
+            ref: "master",
+        },
+        codeMirrorConfig: {
+            theme: "abcdef",
+            mode: "python"
+        },
+        kernelOptions: {
+            name: "python3",
+            path: "./docs\review"
+        },
+        predefinedOutput: true
+    }
+    </script>
+    <script>kernelName = 'python3'</script>
+
+                </article>
+              
+
+              
+              
+              
+              
+                <footer class="prev-next-footer d-print-none">
+                  
+<div class="prev-next-area">
+    <a class="left-prev"
+       href="DreamBooth3D.html"
+       title="previous page">
+      <i class="fa-solid fa-angle-left"></i>
+      <div class="prev-next-info">
+        <p class="prev-next-subtitle">previous</p>
+        <p class="prev-next-title">Dream Booth 3D</p>
+      </div>
+    </a>
+    <a class="right-next"
+       href="ProlificDreamer.html"
+       title="next page">
+      <div class="prev-next-info">
+        <p class="prev-next-subtitle">next</p>
+        <p class="prev-next-title">ProlificDreamer: High-Fidelity and Diverse Text-to-3D Generation with Variational Score Distillation</p>
+      </div>
+      <i class="fa-solid fa-angle-right"></i>
+    </a>
+</div>
+                </footer>
+              
+            </div>
+            
+            
+              
+                <div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
+
+
+  <div class="sidebar-secondary-item">
+  <div class="page-toc tocsection onthispage">
+    <i class="fa-solid fa-list"></i> Contents
+  </div>
+  <nav class="bd-toc-nav page-toc">
+    <ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">1. Introduction</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#improving-consistency-and-conditioning">2. Improving Consistency and Conditioning</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#multi-view-generation">2.1. Multi-view Generation</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#consistency-and-stability-noise-schedule">2.2. Consistency and Stability: Noise Schedule</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#local-condition-scaled-reference-attention">2.3. Local Condition: Scaled Reference Attention</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#global-condition-flexdiffuse">2.4. Global Condition: FlexDiffuse</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#putting-everything-together">2.5. Putting Everything Together</a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#comparison-to-the-state-of-the-art">3. Comparison to the State of the Art</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#image-to-multi-view">3.1. Image to Multi-view</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#text-to-multi-view">3.2. Text to Multi-view</a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#depth-controlnet-for-zero123">4. Depth ControlNet for Zero123++</a></li>
+</ul>
+  </nav></div>
+
+</div></div>
+              
+            
+          </div>
+          <footer class="bd-footer-content">
+            
+<div class="bd-footer-content__inner container">
+  
+  <div class="footer-item">
+    
+<p class="component-author">
+By PseudoLab
+</p>
+
+  </div>
+  
+  <div class="footer-item">
+    
+
+  <p class="copyright">
+    
+      © Copyright 2022.
+      <br/>
+    
+  </p>
+
+  </div>
+  
+  <div class="footer-item">
+    
+  </div>
+  
+  <div class="footer-item">
+    
+  </div>
+  
+</div>
+          </footer>
+        
+
+      </main>
+    </div>
+  </div>
+  
+  <!-- Scripts loaded after <body> so the DOM is not blocked -->
+  <script src="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b"></script>
+<script src="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b"></script>
+
+  <footer class="bd-footer">
+  </footer>
+  </body>
+</html>
\ No newline at end of file
diff --git a/genindex.html b/genindex.html
old mode 100644
new mode 100755
index 38d21169..a4438284
--- a/genindex.html
+++ b/genindex.html
@@ -1,546 +1,566 @@
-
-
-<!DOCTYPE html>
-
-
-<html lang="en" data-content_root="" >
-
-  <head>
-    <meta charset="utf-8" />
-    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-    <title>Index &#8212; Text-to-Image Generation-feat-Diffusion</title>
-  
-  
-  
-  <script data-cfasync="false">
-    document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
-    document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
-  </script>
-  
-  <!-- Loaded before other Sphinx assets -->
-  <link href="_static/styles/theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-<link href="_static/styles/bootstrap.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-<link href="_static/styles/pydata-sphinx-theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-
-  
-  <link href="_static/vendor/fontawesome/6.5.2/css/all.min.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-  <link rel="preload" as="font" type="font/woff2" crossorigin href="_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.woff2" />
-<link rel="preload" as="font" type="font/woff2" crossorigin href="_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.woff2" />
-<link rel="preload" as="font" type="font/woff2" crossorigin href="_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.woff2" />
-
-    <link rel="stylesheet" type="text/css" href="_static/pygments.css" />
-    <link rel="stylesheet" href="_static/styles/sphinx-book-theme.css?digest=14f4ca6b54d191a8c7657f6c759bf11a5fb86285" type="text/css" />
-    <link rel="stylesheet" type="text/css" href="_static/togglebutton.css" />
-    <link rel="stylesheet" type="text/css" href="_static/copybutton.css" />
-    <link rel="stylesheet" type="text/css" href="_static/mystnb.4510f1fc1dee50b3e5859aac5469c37c29e427902b24a333a5f9fcb2f0b3ac41.css" />
-    <link rel="stylesheet" type="text/css" href="_static/sphinx-thebe.css" />
-    <link rel="stylesheet" type="text/css" href="_static/design-style.4045f2051d55cab465a707391d5b2007.min.css" />
-  
-  <!-- Pre-loaded scripts that we'll load fully later -->
-  <link rel="preload" as="script" href="_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b" />
-<link rel="preload" as="script" href="_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b" />
-  <script src="_static/vendor/fontawesome/6.5.2/js/all.min.js?digest=dfe6caa3a7d634c4db9b"></script>
-
-    <script data-url_root="./" id="documentation_options" src="_static/documentation_options.js"></script>
-    <script src="_static/jquery.js"></script>
-    <script src="_static/underscore.js"></script>
-    <script src="_static/_sphinx_javascript_frameworks_compat.js"></script>
-    <script src="_static/doctools.js"></script>
-    <script src="_static/clipboard.min.js"></script>
-    <script src="_static/copybutton.js"></script>
-    <script src="_static/scripts/sphinx-book-theme.js?digest=5a5c038af52cf7bc1a1ec88eea08e6366ee68824"></script>
-    <script>let toggleHintShow = 'Click to show';</script>
-    <script>let toggleHintHide = 'Click to hide';</script>
-    <script>let toggleOpenOnPrint = 'true';</script>
-    <script src="_static/togglebutton.js"></script>
-    <script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
-    <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
-    <script src="_static/design-tabs.js"></script>
-    <script>const THEBE_JS_URL = "https://unpkg.com/thebe@0.8.2/lib/index.js"
-const thebe_selector = ".thebe,.cell"
-const thebe_selector_input = "pre"
-const thebe_selector_output = ".output, .cell_output"
-</script>
-    <script async="async" src="_static/sphinx-thebe.js"></script>
-    <script>DOCUMENTATION_OPTIONS.pagename = 'genindex';</script>
-    <link rel="shortcut icon" href="_static/PseudoLab_logo.png"/>
-    <link rel="index" title="Index" href="#" />
-    <link rel="search" title="Search" href="search.html" />
-  <meta name="viewport" content="width=device-width, initial-scale=1"/>
-  <meta name="docsearch:language" content="en"/>
-  </head>
-  
-  
-  <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
-
-  
-  
-  <div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
-  
-  <div id="pst-scroll-pixel-helper"></div>
-  
-  <button type="button" class="btn rounded-pill" id="pst-back-to-top">
-    <i class="fa-solid fa-arrow-up"></i>Back to top</button>
-
-  
-  <input type="checkbox"
-          class="sidebar-toggle"
-          id="pst-primary-sidebar-checkbox"/>
-  <label class="overlay overlay-primary" for="pst-primary-sidebar-checkbox"></label>
-  
-  <input type="checkbox"
-          class="sidebar-toggle"
-          id="pst-secondary-sidebar-checkbox"/>
-  <label class="overlay overlay-secondary" for="pst-secondary-sidebar-checkbox"></label>
-  
-  <div class="search-button__wrapper">
-    <div class="search-button__overlay"></div>
-    <div class="search-button__search-container">
-<form class="bd-search d-flex align-items-center"
-      action="search.html"
-      method="get">
-  <i class="fa-solid fa-magnifying-glass"></i>
-  <input type="search"
-         class="form-control"
-         name="q"
-         id="search-input"
-         placeholder="Search this book..."
-         aria-label="Search this book..."
-         autocomplete="off"
-         autocorrect="off"
-         autocapitalize="off"
-         spellcheck="false"/>
-  <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
-</form></div>
-  </div>
-
-  <div class="pst-async-banner-revealer d-none">
-  <aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
-</div>
-
-  
-    <header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
-    </header>
-  
-
-  <div class="bd-container">
-    <div class="bd-container__inner bd-page-width">
-      
-      
-      
-        
-      
-      <div class="bd-sidebar-primary bd-sidebar">
-        
-
-  
-  <div class="sidebar-header-items sidebar-primary__section">
-    
-    
-    
-    
-  </div>
-  
-    <div class="sidebar-primary-items__start sidebar-primary__section">
-        <div class="sidebar-primary-item">
-
-  
-    
-  
-
-<a class="navbar-brand logo" href="intro.html">
-  
-  
-  
-  
-  
-    
-    
-      
-    
-    
-    <img src="_static/PseudoLab_logo.png" class="logo__image only-light" alt="Text-to-Image Generation-feat-Diffusion - Home"/>
-    <script>document.write(`<img src="_static/PseudoLab_logo.png" class="logo__image only-dark" alt="Text-to-Image Generation-feat-Diffusion - Home"/>`);</script>
-  
-  
-</a></div>
-        <div class="sidebar-primary-item"><nav class="bd-links" id="bd-docs-nav" aria-label="Main">
-    <div class="bd-toc-item navbar-nav active">
-        
-        <ul class="nav bd-sidenav bd-sidenav__home-link">
-            <li class="toctree-l1">
-                <a class="reference internal" href="intro.html">
-                    Welcome to PseudoDiffusers!!
-                </a>
-            </li>
-        </ul>
-        <p aria-level="2" class="caption" role="heading"><span class="caption-text">Preliminary Works</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="docs/review/vae.html">VAE</a></li>
-<li class="toctree-l1"><a class="reference internal" href="docs/review/gan.html">GAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="docs/review/DDPM.html">DDPM</a></li>
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="docs/review/DDIM.html">DDIM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="docs/review/A_Study_on_the_Evaluation_of_Generative_Models.html">A Study on the Evaluation of Generative Models</a></li>
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Image Generation</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="docs/review/cycleGAN.html">CycleGAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="docs/review/StyleGAN.html">StyleGAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="docs/review/diffusion_beats_GANs.html">Diffusion Models Beat GANs on Image Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="docs/review/dalle.html">DALL-E</a></li>
-<li class="toctree-l1"><a class="reference internal" href="docs/review/DALLE2.html">DALL-E 2</a></li>
-<li class="toctree-l1"><a class="reference internal" href="docs/review/dreambooth.html">DreamBooth</a></li>
-<li class="toctree-l1"><a class="reference internal" href="docs/review/ControlNet.html">ControlNet</a></li>
-<li class="toctree-l1"><a class="reference internal" href="docs/review/Latent_Diffusion_Model.html">Introduction</a></li>
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="docs/review/Textual_Inversion.html">Textual Inversion</a></li>
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="docs/review/CustomDiffusion.html">Custom Diffusion</a></li>
-<li class="toctree-l1"><a class="reference internal" href="docs/review/LoRA.html">LoRA</a></li>
-
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="docs/review/I-DDPM.html">I-DDPM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="docs/review/StyO.html">StyO</a></li>
-<li class="toctree-l1"><a class="reference internal" href="docs/review/imagen.html">Imagen</a></li>
-<li class="toctree-l1"><a class="reference internal" href="docs/review/imagen_editor.html">Imagen Editor</a></li>
-<li class="toctree-l1"><a class="reference internal" href="docs/review/SDEdit.html">SDEdit</a></li>
-<li class="toctree-l1"><a class="reference internal" href="docs/review/SDXL.html">SDXL</a></li>
-<li class="toctree-l1"><a class="reference internal" href="docs/review/t2i_adapter.html">T2I-Adapter</a></li>
-<li class="toctree-l1"><a class="reference internal" href="docs/review/IP_Adapter.html">IP-Adapter</a></li>
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="docs/review/HyperDreamBooth.html">HyperDreamBooth</a></li>
-<li class="toctree-l1"><a class="reference internal" href="docs/review/CM3leon.html">CM3leon</a></li>
-
-<li class="toctree-l1"><a class="reference internal" href="docs/review/Synthetic_Data_from_Diffusion_Models_Improves_ImageNet_Classification.html">Synthetic Data from Diffusion Models Improves ImageNet Classification</a></li>
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="docs/review/GLIDE.html">GLIDE</a></li>
-<li class="toctree-l1"><a class="reference internal" href="docs/review/BBDM.html">BBDM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="docs/review/Your_Diffusion_Model_is_Secretly_a_Zero_Shot_Classifier.html">Your Diffusion Model is Secretly a Zero-Shot Classifier</a></li>
-<li class="toctree-l1"><a class="reference internal" href="docs/review/progressive_distillation.html">Progressive Distillation for Fast Sampling of Diffusion Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="docs/review/ConceptLab.html">ConceptLab</a></li>
-<li class="toctree-l1"><a class="reference internal" href="docs/review/Diffusion_models_already_have_a_Semantic_Latent_Space.html">Diffusion Models already have a Semantic Latent Space</a></li>
-<li class="toctree-l1"><a class="reference internal" href="docs/review/Muse.html">Muse</a></li>
-
-
-<li class="toctree-l1"><a class="reference internal" href="docs/review/GIGAGAN.html">Scaling up GANs for Text-to-Image Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="docs/review/consistency_models.html">Consistency Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="docs/review/latent_consistency_models.html">Latent Consistency Models</a></li>
-
-<li class="toctree-l1"><a class="reference internal" href="docs/review/LLM_grounded_Diffusion.html">LLM Grounded Diffusion</a></li>
-<li class="toctree-l1"><a class="reference internal" href="docs/review/DiT.html">DiT</a></li>
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="docs/review/one-step-image-translation.html">One-Step Image Translation with Text-to-Image Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="docs/review/LCM-LoRA.html">LCM-LoRA: A Universal Stable-Diffusion Acceleration Module</a></li>
-
-
-<li class="toctree-l1"><a class="reference internal" href="docs/review/MimicBrush.html">MimicBrush: Zero-shot Image Editing with Reference Imitation</a></li>
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Video Generation</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="docs/review/Make_A_Video.html">Make A Video</a></li>
-<li class="toctree-l1"><a class="reference internal" href="docs/review/VideoLDM.html">VideoLDM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="docs/review/AnimateDiff.html">AnimateDiff</a></li>
-<li class="toctree-l1"><a class="reference internal" href="docs/review/Animate_Anyone.html">Animate Anyone</a></li>
-<li class="toctree-l1"><a class="reference internal" href="docs/review/DreaMoving.html">DreaMoving</a></li>
-<li class="toctree-l1"><a class="reference internal" href="docs/review/DreamPose.html">DreamPose: Fashion Image-to-Video Synthesis via Stable Diffusion</a></li>
-
-
-
-
-
-
-
-
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">3D Generation</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="docs/review/NeRF.html">NeRF : Representing Scenes as Neural Radiance Fields for View Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="docs/review/3DGS.html">3D Gaussian Splatting for Real-Time Radiance Field Rendering</a></li>
-<li class="toctree-l1"><a class="reference internal" href="docs/review/Point_E.html">Point-E: A System for Generating 3D Point Clouds from Complex Prompts (Arxiv 2022)</a></li>
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="docs/review/Shap-E.html">Shap-E</a></li>
-
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="docs/review/DreamFusion.html"><strong>DreamFusion</strong></a></li>
-<li class="toctree-l1"><a class="reference internal" href="docs/review/magic-3d.html">Magic3D</a></li>
-<li class="toctree-l1"><a class="reference internal" href="docs/review/DreamBooth3D.html">Dream Booth 3D</a></li>
-
-
-
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Experiments</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="docs/experiments/js_exp.html">Synthetic Data with Stable Diffusion for Foliar Disease Classification</a></li>
-<li class="toctree-l1"><a class="reference internal" href="docs/experiments/swjo_exp.html">Training DreamBooth on Naver Webtoon Face Dataset</a></li>
-</ul>
-
-    </div>
-</nav></div>
-    </div>
-  
-  
-  <div class="sidebar-primary-items__end sidebar-primary__section">
-  </div>
-  
-  <div id="rtd-footer-container"></div>
-
-
-      </div>
-      
-      <main id="main-content" class="bd-main" role="main">
-        
-        
-
-<div class="sbt-scroll-pixel-helper"></div>
-
-          <div class="bd-content">
-            <div class="bd-article-container">
-              
-              <div class="bd-header-article d-print-none">
-<div class="header-article-items header-article__inner">
-  
-    <div class="header-article-items__start">
-      
-        <div class="header-article-item"><label class="sidebar-toggle primary-toggle btn btn-sm" for="__primary" title="Toggle primary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
-  <span class="fa-solid fa-bars"></span>
-</label></div>
-      
-    </div>
-  
-  
-    <div class="header-article-items__end">
-      
-        <div class="header-article-item">
-
-<div class="article-header-buttons">
-
-
-
-
-
-<div class="dropdown dropdown-source-buttons">
-  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Source repositories">
-    <i class="fab fa-github"></i>
-  </button>
-  <ul class="dropdown-menu">
-      
-      
-      
-      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion" target="_blank"
-   class="btn btn-sm btn-source-repository-button dropdown-item"
-   title="Source repository"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fab fa-github"></i>
-  </span>
-<span class="btn__text-container">Repository</span>
-</a>
-</li>
-      
-      
-      
-      
-      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion/issues/new?title=Issue%20on%20page%20%2Fgenindex.html&body=Your%20issue%20content%20here." target="_blank"
-   class="btn btn-sm btn-source-issues-button dropdown-item"
-   title="Open an issue"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-lightbulb"></i>
-  </span>
-<span class="btn__text-container">Open issue</span>
-</a>
-</li>
-      
-  </ul>
-</div>
-
-
-
-
-<button onclick="toggleFullScreen()"
-  class="btn btn-sm btn-fullscreen-button"
-  title="Fullscreen mode"
-  data-bs-placement="bottom" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-expand"></i>
-  </span>
-
-</button>
-
-
-
-<script>
-document.write(`
-  <button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light"></i>
-    <i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark"></i>
-    <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"></i>
-  </button>
-`);
-</script>
-
-
-<script>
-document.write(`
-  <button class="btn btn-sm pst-navbar-icon search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <i class="fa-solid fa-magnifying-glass fa-lg"></i>
-  </button>
-`);
-</script>
-
-</div></div>
-      
-    </div>
-  
-</div>
-</div>
-              
-              
-
-<div id="jb-print-docs-body" class="onlyprint">
-    <h1></h1>
-    <!-- Table of contents -->
-    <div id="print-main-content">
-        <div id="jb-print-toc">
-            
-        </div>
-    </div>
-</div>
-
-              
-                
-<div id="searchbox"></div>
-                <article class="bd-article">
-                  
-
-<h1 id="index">Index</h1>
-
-<div class="genindex-jumpbox">
- 
-</div>
-
-
-                </article>
-              
-
-              
-              
-              
-              
-                <footer class="prev-next-footer d-print-none">
-                  
-<div class="prev-next-area">
-</div>
-                </footer>
-              
-            </div>
-            
-            
-              
-            
-          </div>
-          <footer class="bd-footer-content">
-            
-<div class="bd-footer-content__inner container">
-  
-  <div class="footer-item">
-    
-<p class="component-author">
-By PseudoLab
-</p>
-
-  </div>
-  
-  <div class="footer-item">
-    
-
-  <p class="copyright">
-    
-      © Copyright 2022.
-      <br/>
-    
-  </p>
-
-  </div>
-  
-  <div class="footer-item">
-    
-  </div>
-  
-  <div class="footer-item">
-    
-  </div>
-  
-</div>
-          </footer>
-        
-
-      </main>
-    </div>
-  </div>
-  
-  <!-- Scripts loaded after <body> so the DOM is not blocked -->
-  <script src="_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b"></script>
-<script src="_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b"></script>
-
-  <footer class="bd-footer">
-  </footer>
-  </body>
+
+
+<!DOCTYPE html>
+
+
+<html lang="en" data-content_root="" >
+
+  <head>
+    <meta charset="utf-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+    <title>Index &#8212; Text-to-Image Generation-feat-Diffusion</title>
+  
+  
+  
+  <script data-cfasync="false">
+    document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
+    document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
+  </script>
+  
+  <!-- Loaded before other Sphinx assets -->
+  <link href="_static/styles/theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+<link href="_static/styles/bootstrap.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+<link href="_static/styles/pydata-sphinx-theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+
+  
+  <link href="_static/vendor/fontawesome/6.5.2/css/all.min.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+  <link rel="preload" as="font" type="font/woff2" crossorigin href="_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.woff2" />
+<link rel="preload" as="font" type="font/woff2" crossorigin href="_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.woff2" />
+<link rel="preload" as="font" type="font/woff2" crossorigin href="_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.woff2" />
+
+    <link rel="stylesheet" type="text/css" href="_static/pygments.css" />
+    <link rel="stylesheet" href="_static/styles/sphinx-book-theme.css?digest=14f4ca6b54d191a8c7657f6c759bf11a5fb86285" type="text/css" />
+    <link rel="stylesheet" type="text/css" href="_static/togglebutton.css" />
+    <link rel="stylesheet" type="text/css" href="_static/copybutton.css" />
+    <link rel="stylesheet" type="text/css" href="_static/mystnb.4510f1fc1dee50b3e5859aac5469c37c29e427902b24a333a5f9fcb2f0b3ac41.css" />
+    <link rel="stylesheet" type="text/css" href="_static/sphinx-thebe.css" />
+    <link rel="stylesheet" type="text/css" href="_static/design-style.4045f2051d55cab465a707391d5b2007.min.css" />
+  
+  <!-- Pre-loaded scripts that we'll load fully later -->
+  <link rel="preload" as="script" href="_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b" />
+<link rel="preload" as="script" href="_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b" />
+  <script src="_static/vendor/fontawesome/6.5.2/js/all.min.js?digest=dfe6caa3a7d634c4db9b"></script>
+
+    <script data-url_root="./" id="documentation_options" src="_static/documentation_options.js"></script>
+    <script src="_static/jquery.js"></script>
+    <script src="_static/underscore.js"></script>
+    <script src="_static/_sphinx_javascript_frameworks_compat.js"></script>
+    <script src="_static/doctools.js"></script>
+    <script src="_static/clipboard.min.js"></script>
+    <script src="_static/copybutton.js"></script>
+    <script src="_static/scripts/sphinx-book-theme.js?digest=5a5c038af52cf7bc1a1ec88eea08e6366ee68824"></script>
+    <script>let toggleHintShow = 'Click to show';</script>
+    <script>let toggleHintHide = 'Click to hide';</script>
+    <script>let toggleOpenOnPrint = 'true';</script>
+    <script src="_static/togglebutton.js"></script>
+    <script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
+    <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
+    <script src="_static/design-tabs.js"></script>
+    <script>const THEBE_JS_URL = "https://unpkg.com/thebe@0.8.2/lib/index.js"
+const thebe_selector = ".thebe,.cell"
+const thebe_selector_input = "pre"
+const thebe_selector_output = ".output, .cell_output"
+</script>
+    <script async="async" src="_static/sphinx-thebe.js"></script>
+    <script>DOCUMENTATION_OPTIONS.pagename = 'genindex';</script>
+    <link rel="shortcut icon" href="_static/PseudoLab_logo.png"/>
+    <link rel="index" title="Index" href="#" />
+    <link rel="search" title="Search" href="search.html" />
+  <meta name="viewport" content="width=device-width, initial-scale=1"/>
+  <meta name="docsearch:language" content="en"/>
+  </head>
+  
+  
+  <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
+
+  
+  
+  <div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
+  
+  <div id="pst-scroll-pixel-helper"></div>
+  
+  <button type="button" class="btn rounded-pill" id="pst-back-to-top">
+    <i class="fa-solid fa-arrow-up"></i>Back to top</button>
+
+  
+  <input type="checkbox"
+          class="sidebar-toggle"
+          id="pst-primary-sidebar-checkbox"/>
+  <label class="overlay overlay-primary" for="pst-primary-sidebar-checkbox"></label>
+  
+  <input type="checkbox"
+          class="sidebar-toggle"
+          id="pst-secondary-sidebar-checkbox"/>
+  <label class="overlay overlay-secondary" for="pst-secondary-sidebar-checkbox"></label>
+  
+  <div class="search-button__wrapper">
+    <div class="search-button__overlay"></div>
+    <div class="search-button__search-container">
+<form class="bd-search d-flex align-items-center"
+      action="search.html"
+      method="get">
+  <i class="fa-solid fa-magnifying-glass"></i>
+  <input type="search"
+         class="form-control"
+         name="q"
+         id="search-input"
+         placeholder="Search this book..."
+         aria-label="Search this book..."
+         autocomplete="off"
+         autocorrect="off"
+         autocapitalize="off"
+         spellcheck="false"/>
+  <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
+</form></div>
+  </div>
+
+  <div class="pst-async-banner-revealer d-none">
+  <aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
+</div>
+
+  
+    <header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
+    </header>
+  
+
+  <div class="bd-container">
+    <div class="bd-container__inner bd-page-width">
+      
+      
+      
+        
+      
+      <div class="bd-sidebar-primary bd-sidebar">
+        
+
+  
+  <div class="sidebar-header-items sidebar-primary__section">
+    
+    
+    
+    
+  </div>
+  
+    <div class="sidebar-primary-items__start sidebar-primary__section">
+        <div class="sidebar-primary-item">
+
+  
+    
+  
+
+<a class="navbar-brand logo" href="intro.html">
+  
+  
+  
+  
+  
+    
+    
+      
+    
+    
+    <img src="_static/PseudoLab_logo.png" class="logo__image only-light" alt="Text-to-Image Generation-feat-Diffusion - Home"/>
+    <script>document.write(`<img src="_static/PseudoLab_logo.png" class="logo__image only-dark" alt="Text-to-Image Generation-feat-Diffusion - Home"/>`);</script>
+  
+  
+</a></div>
+        <div class="sidebar-primary-item"><nav class="bd-links" id="bd-docs-nav" aria-label="Main">
+    <div class="bd-toc-item navbar-nav active">
+        
+        <ul class="nav bd-sidenav bd-sidenav__home-link">
+            <li class="toctree-l1">
+                <a class="reference internal" href="intro.html">
+                    Welcome to PseudoDiffusers!!
+                </a>
+            </li>
+        </ul>
+        <p aria-level="2" class="caption" role="heading"><span class="caption-text">Preliminary Works</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="docs/review/vae.html">VAE</a></li>
+<li class="toctree-l1"><a class="reference internal" href="docs/review/gan.html">GAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="docs/review/DDPM.html">DDPM</a></li>
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="docs/review/DDIM.html">DDIM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="docs/review/A_Study_on_the_Evaluation_of_Generative_Models.html">A Study on the Evaluation of Generative Models</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Image Generation</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="docs/review/cycleGAN.html">CycleGAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="docs/review/StyleGAN.html">StyleGAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="docs/review/diffusion_beats_GANs.html">Diffusion Models Beat GANs on Image Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="docs/review/dalle.html">DALL-E</a></li>
+<li class="toctree-l1"><a class="reference internal" href="docs/review/DALLE2.html">DALL-E 2</a></li>
+<li class="toctree-l1"><a class="reference internal" href="docs/review/dreambooth.html">DreamBooth</a></li>
+<li class="toctree-l1"><a class="reference internal" href="docs/review/ControlNet.html">ControlNet</a></li>
+<li class="toctree-l1"><a class="reference internal" href="docs/review/Latent_Diffusion_Model.html">Introduction</a></li>
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="docs/review/Textual_Inversion.html">Textual Inversion</a></li>
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="docs/review/CustomDiffusion.html">Custom Diffusion</a></li>
+<li class="toctree-l1"><a class="reference internal" href="docs/review/LoRA.html">LoRA</a></li>
+
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="docs/review/I-DDPM.html">I-DDPM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="docs/review/StyO.html">StyO</a></li>
+<li class="toctree-l1"><a class="reference internal" href="docs/review/imagen.html">Imagen</a></li>
+<li class="toctree-l1"><a class="reference internal" href="docs/review/imagen_editor.html">Imagen Editor</a></li>
+<li class="toctree-l1"><a class="reference internal" href="docs/review/SDEdit.html">SDEdit</a></li>
+<li class="toctree-l1"><a class="reference internal" href="docs/review/SDXL.html">SDXL</a></li>
+<li class="toctree-l1"><a class="reference internal" href="docs/review/t2i_adapter.html">T2I-Adapter</a></li>
+<li class="toctree-l1"><a class="reference internal" href="docs/review/IP_Adapter.html">IP-Adapter</a></li>
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="docs/review/HyperDreamBooth.html">HyperDreamBooth</a></li>
+<li class="toctree-l1"><a class="reference internal" href="docs/review/CM3leon.html">CM3leon</a></li>
+
+<li class="toctree-l1"><a class="reference internal" href="docs/review/Synthetic_Data_from_Diffusion_Models_Improves_ImageNet_Classification.html">Synthetic Data from Diffusion Models Improves ImageNet Classification</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="docs/review/GLIDE.html">GLIDE</a></li>
+<li class="toctree-l1"><a class="reference internal" href="docs/review/BBDM.html">BBDM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="docs/review/Your_Diffusion_Model_is_Secretly_a_Zero_Shot_Classifier.html">Your Diffusion Model is Secretly a Zero-Shot Classifier</a></li>
+<li class="toctree-l1"><a class="reference internal" href="docs/review/progressive_distillation.html">Progressive Distillation for Fast Sampling of Diffusion Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="docs/review/ConceptLab.html">ConceptLab</a></li>
+<li class="toctree-l1"><a class="reference internal" href="docs/review/Diffusion_models_already_have_a_Semantic_Latent_Space.html">Diffusion Models already have a Semantic Latent Space</a></li>
+<li class="toctree-l1"><a class="reference internal" href="docs/review/Muse.html">Muse</a></li>
+
+
+<li class="toctree-l1"><a class="reference internal" href="docs/review/GIGAGAN.html">Scaling up GANs for Text-to-Image Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="docs/review/consistency_models.html">Consistency Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="docs/review/latent_consistency_models.html">Latent Consistency Models</a></li>
+
+<li class="toctree-l1"><a class="reference internal" href="docs/review/LLM_grounded_Diffusion.html">LLM Grounded Diffusion</a></li>
+<li class="toctree-l1"><a class="reference internal" href="docs/review/DiT.html">DiT</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="docs/review/one-step-image-translation.html">One-Step Image Translation with Text-to-Image Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="docs/review/LCM-LoRA.html">LCM-LoRA: A Universal Stable-Diffusion Acceleration Module</a></li>
+
+
+<li class="toctree-l1"><a class="reference internal" href="docs/review/MimicBrush.html">MimicBrush: Zero-shot Image Editing with Reference Imitation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="docs/review/one_step_diffusion_with_distribution_matching_distillation.html">One-step Diffusion with Distribution Matching Distillation</a></li>
+
+
+
+
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Video Generation</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="docs/review/Make_A_Video.html">Make A Video</a></li>
+<li class="toctree-l1"><a class="reference internal" href="docs/review/VideoLDM.html">VideoLDM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="docs/review/AnimateDiff.html">AnimateDiff</a></li>
+<li class="toctree-l1"><a class="reference internal" href="docs/review/Animate_Anyone.html">Animate Anyone</a></li>
+<li class="toctree-l1"><a class="reference internal" href="docs/review/DreaMoving.html">DreaMoving</a></li>
+<li class="toctree-l1"><a class="reference internal" href="docs/review/DreamPose.html">DreamPose: Fashion Image-to-Video Synthesis via Stable Diffusion</a></li>
+
+
+
+
+
+
+
+
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">3D Generation</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="docs/review/NeRF.html">NeRF : Representing Scenes as Neural Radiance Fields for View Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="docs/review/3DGS.html">3D Gaussian Splatting for Real-Time Radiance Field Rendering</a></li>
+<li class="toctree-l1"><a class="reference internal" href="docs/review/Point_E.html">Point-E: A System for Generating 3D Point Clouds from Complex Prompts (Arxiv 2022)</a></li>
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="docs/review/Shap-E.html">Shap-E</a></li>
+
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="docs/review/DreamFusion.html"><strong>DreamFusion</strong></a></li>
+<li class="toctree-l1"><a class="reference internal" href="docs/review/magic-3d.html">Magic3D</a></li>
+<li class="toctree-l1"><a class="reference internal" href="docs/review/DreamBooth3D.html">Dream Booth 3D</a></li>
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="docs/review/zero123plus.html">Zero123++</a></li>
+<li class="toctree-l1"><a class="reference internal" href="docs/review/ProlificDreamer.html">ProlificDreamer: High-Fidelity and Diverse Text-to-3D Generation with Variational Score Distillation</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="docs/review/DreamGaussian.html">DreamGaussian</a></li>
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="docs/review/Coin3D.html">Coin3D</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Experiments</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="docs/experiments/js_exp.html">Synthetic Data with Stable Diffusion for Foliar Disease Classification</a></li>
+<li class="toctree-l1"><a class="reference internal" href="docs/experiments/swjo_exp.html">Training DreamBooth on Naver Webtoon Face Dataset</a></li>
+</ul>
+
+    </div>
+</nav></div>
+    </div>
+  
+  
+  <div class="sidebar-primary-items__end sidebar-primary__section">
+  </div>
+  
+  <div id="rtd-footer-container"></div>
+
+
+      </div>
+      
+      <main id="main-content" class="bd-main" role="main">
+        
+        
+
+<div class="sbt-scroll-pixel-helper"></div>
+
+          <div class="bd-content">
+            <div class="bd-article-container">
+              
+              <div class="bd-header-article d-print-none">
+<div class="header-article-items header-article__inner">
+  
+    <div class="header-article-items__start">
+      
+        <div class="header-article-item"><label class="sidebar-toggle primary-toggle btn btn-sm" for="__primary" title="Toggle primary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
+  <span class="fa-solid fa-bars"></span>
+</label></div>
+      
+    </div>
+  
+  
+    <div class="header-article-items__end">
+      
+        <div class="header-article-item">
+
+<div class="article-header-buttons">
+
+
+
+
+
+<div class="dropdown dropdown-source-buttons">
+  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Source repositories">
+    <i class="fab fa-github"></i>
+  </button>
+  <ul class="dropdown-menu">
+      
+      
+      
+      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion" target="_blank"
+   class="btn btn-sm btn-source-repository-button dropdown-item"
+   title="Source repository"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fab fa-github"></i>
+  </span>
+<span class="btn__text-container">Repository</span>
+</a>
+</li>
+      
+      
+      
+      
+      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion/issues/new?title=Issue%20on%20page%20%2Fgenindex.html&body=Your%20issue%20content%20here." target="_blank"
+   class="btn btn-sm btn-source-issues-button dropdown-item"
+   title="Open an issue"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-lightbulb"></i>
+  </span>
+<span class="btn__text-container">Open issue</span>
+</a>
+</li>
+      
+  </ul>
+</div>
+
+
+
+
+<button onclick="toggleFullScreen()"
+  class="btn btn-sm btn-fullscreen-button"
+  title="Fullscreen mode"
+  data-bs-placement="bottom" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-expand"></i>
+  </span>
+
+</button>
+
+
+
+<script>
+document.write(`
+  <button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light"></i>
+    <i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark"></i>
+    <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"></i>
+  </button>
+`);
+</script>
+
+
+<script>
+document.write(`
+  <button class="btn btn-sm pst-navbar-icon search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <i class="fa-solid fa-magnifying-glass fa-lg"></i>
+  </button>
+`);
+</script>
+
+</div></div>
+      
+    </div>
+  
+</div>
+</div>
+              
+              
+
+<div id="jb-print-docs-body" class="onlyprint">
+    <h1></h1>
+    <!-- Table of contents -->
+    <div id="print-main-content">
+        <div id="jb-print-toc">
+            
+        </div>
+    </div>
+</div>
+
+              
+                
+<div id="searchbox"></div>
+                <article class="bd-article">
+                  
+
+<h1 id="index">Index</h1>
+
+<div class="genindex-jumpbox">
+ 
+</div>
+
+
+                </article>
+              
+
+              
+              
+              
+              
+                <footer class="prev-next-footer d-print-none">
+                  
+<div class="prev-next-area">
+</div>
+                </footer>
+              
+            </div>
+            
+            
+              
+            
+          </div>
+          <footer class="bd-footer-content">
+            
+<div class="bd-footer-content__inner container">
+  
+  <div class="footer-item">
+    
+<p class="component-author">
+By PseudoLab
+</p>
+
+  </div>
+  
+  <div class="footer-item">
+    
+
+  <p class="copyright">
+    
+      © Copyright 2022.
+      <br/>
+    
+  </p>
+
+  </div>
+  
+  <div class="footer-item">
+    
+  </div>
+  
+  <div class="footer-item">
+    
+  </div>
+  
+</div>
+          </footer>
+        
+
+      </main>
+    </div>
+  </div>
+  
+  <!-- Scripts loaded after <body> so the DOM is not blocked -->
+  <script src="_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b"></script>
+<script src="_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b"></script>
+
+  <footer class="bd-footer">
+  </footer>
+  </body>
 </html>
\ No newline at end of file
diff --git a/index.html b/index.html
old mode 100644
new mode 100755
diff --git a/intro.html b/intro.html
old mode 100644
new mode 100755
index fb047f8a..a689a612
--- a/intro.html
+++ b/intro.html
@@ -1,709 +1,729 @@
-
-
-<!DOCTYPE html>
-
-
-<html lang="en" data-content_root="" >
-
-  <head>
-    <meta charset="utf-8" />
-    <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.18.1: http://docutils.sourceforge.net/" />
-
-    <title>Welcome to PseudoDiffusers!! &#8212; Text-to-Image Generation-feat-Diffusion</title>
-  
-  
-  
-  <script data-cfasync="false">
-    document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
-    document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
-  </script>
-  
-  <!-- Loaded before other Sphinx assets -->
-  <link href="_static/styles/theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-<link href="_static/styles/bootstrap.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-<link href="_static/styles/pydata-sphinx-theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-
-  
-  <link href="_static/vendor/fontawesome/6.5.2/css/all.min.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-  <link rel="preload" as="font" type="font/woff2" crossorigin href="_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.woff2" />
-<link rel="preload" as="font" type="font/woff2" crossorigin href="_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.woff2" />
-<link rel="preload" as="font" type="font/woff2" crossorigin href="_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.woff2" />
-
-    <link rel="stylesheet" type="text/css" href="_static/pygments.css" />
-    <link rel="stylesheet" href="_static/styles/sphinx-book-theme.css?digest=14f4ca6b54d191a8c7657f6c759bf11a5fb86285" type="text/css" />
-    <link rel="stylesheet" type="text/css" href="_static/togglebutton.css" />
-    <link rel="stylesheet" type="text/css" href="_static/copybutton.css" />
-    <link rel="stylesheet" type="text/css" href="_static/mystnb.4510f1fc1dee50b3e5859aac5469c37c29e427902b24a333a5f9fcb2f0b3ac41.css" />
-    <link rel="stylesheet" type="text/css" href="_static/sphinx-thebe.css" />
-    <link rel="stylesheet" type="text/css" href="_static/design-style.4045f2051d55cab465a707391d5b2007.min.css" />
-  
-  <!-- Pre-loaded scripts that we'll load fully later -->
-  <link rel="preload" as="script" href="_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b" />
-<link rel="preload" as="script" href="_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b" />
-  <script src="_static/vendor/fontawesome/6.5.2/js/all.min.js?digest=dfe6caa3a7d634c4db9b"></script>
-
-    <script data-url_root="./" id="documentation_options" src="_static/documentation_options.js"></script>
-    <script src="_static/jquery.js"></script>
-    <script src="_static/underscore.js"></script>
-    <script src="_static/_sphinx_javascript_frameworks_compat.js"></script>
-    <script src="_static/doctools.js"></script>
-    <script src="_static/clipboard.min.js"></script>
-    <script src="_static/copybutton.js"></script>
-    <script src="_static/scripts/sphinx-book-theme.js?digest=5a5c038af52cf7bc1a1ec88eea08e6366ee68824"></script>
-    <script>let toggleHintShow = 'Click to show';</script>
-    <script>let toggleHintHide = 'Click to hide';</script>
-    <script>let toggleOpenOnPrint = 'true';</script>
-    <script src="_static/togglebutton.js"></script>
-    <script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
-    <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
-    <script src="_static/design-tabs.js"></script>
-    <script>const THEBE_JS_URL = "https://unpkg.com/thebe@0.8.2/lib/index.js"
-const thebe_selector = ".thebe,.cell"
-const thebe_selector_input = "pre"
-const thebe_selector_output = ".output, .cell_output"
-</script>
-    <script async="async" src="_static/sphinx-thebe.js"></script>
-    <script>DOCUMENTATION_OPTIONS.pagename = 'intro';</script>
-    <link rel="shortcut icon" href="_static/PseudoLab_logo.png"/>
-    <link rel="index" title="Index" href="genindex.html" />
-    <link rel="search" title="Search" href="search.html" />
-    <link rel="next" title="VAE" href="docs/review/vae.html" />
-  <meta name="viewport" content="width=device-width, initial-scale=1"/>
-  <meta name="docsearch:language" content="en"/>
-  </head>
-  
-  
-  <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
-
-  
-  
-  <div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
-  
-  <div id="pst-scroll-pixel-helper"></div>
-  
-  <button type="button" class="btn rounded-pill" id="pst-back-to-top">
-    <i class="fa-solid fa-arrow-up"></i>Back to top</button>
-
-  
-  <input type="checkbox"
-          class="sidebar-toggle"
-          id="pst-primary-sidebar-checkbox"/>
-  <label class="overlay overlay-primary" for="pst-primary-sidebar-checkbox"></label>
-  
-  <input type="checkbox"
-          class="sidebar-toggle"
-          id="pst-secondary-sidebar-checkbox"/>
-  <label class="overlay overlay-secondary" for="pst-secondary-sidebar-checkbox"></label>
-  
-  <div class="search-button__wrapper">
-    <div class="search-button__overlay"></div>
-    <div class="search-button__search-container">
-<form class="bd-search d-flex align-items-center"
-      action="search.html"
-      method="get">
-  <i class="fa-solid fa-magnifying-glass"></i>
-  <input type="search"
-         class="form-control"
-         name="q"
-         id="search-input"
-         placeholder="Search this book..."
-         aria-label="Search this book..."
-         autocomplete="off"
-         autocorrect="off"
-         autocapitalize="off"
-         spellcheck="false"/>
-  <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
-</form></div>
-  </div>
-
-  <div class="pst-async-banner-revealer d-none">
-  <aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
-</div>
-
-  
-    <header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
-    </header>
-  
-
-  <div class="bd-container">
-    <div class="bd-container__inner bd-page-width">
-      
-      
-      
-        
-      
-      <div class="bd-sidebar-primary bd-sidebar">
-        
-
-  
-  <div class="sidebar-header-items sidebar-primary__section">
-    
-    
-    
-    
-  </div>
-  
-    <div class="sidebar-primary-items__start sidebar-primary__section">
-        <div class="sidebar-primary-item">
-
-  
-    
-  
-
-<a class="navbar-brand logo" href="#">
-  
-  
-  
-  
-  
-    
-    
-      
-    
-    
-    <img src="_static/PseudoLab_logo.png" class="logo__image only-light" alt="Text-to-Image Generation-feat-Diffusion - Home"/>
-    <script>document.write(`<img src="_static/PseudoLab_logo.png" class="logo__image only-dark" alt="Text-to-Image Generation-feat-Diffusion - Home"/>`);</script>
-  
-  
-</a></div>
-        <div class="sidebar-primary-item"><nav class="bd-links" id="bd-docs-nav" aria-label="Main">
-    <div class="bd-toc-item navbar-nav active">
-        
-        <ul class="nav bd-sidenav bd-sidenav__home-link">
-            <li class="toctree-l1 current active">
-                <a class="reference internal" href="#">
-                    Welcome to PseudoDiffusers!!
-                </a>
-            </li>
-        </ul>
-        <p aria-level="2" class="caption" role="heading"><span class="caption-text">Preliminary Works</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="docs/review/vae.html">VAE</a></li>
-<li class="toctree-l1"><a class="reference internal" href="docs/review/gan.html">GAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="docs/review/DDPM.html">DDPM</a></li>
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="docs/review/DDIM.html">DDIM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="docs/review/A_Study_on_the_Evaluation_of_Generative_Models.html">A Study on the Evaluation of Generative Models</a></li>
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Image Generation</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="docs/review/cycleGAN.html">CycleGAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="docs/review/StyleGAN.html">StyleGAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="docs/review/diffusion_beats_GANs.html">Diffusion Models Beat GANs on Image Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="docs/review/dalle.html">DALL-E</a></li>
-<li class="toctree-l1"><a class="reference internal" href="docs/review/DALLE2.html">DALL-E 2</a></li>
-<li class="toctree-l1"><a class="reference internal" href="docs/review/dreambooth.html">DreamBooth</a></li>
-<li class="toctree-l1"><a class="reference internal" href="docs/review/ControlNet.html">ControlNet</a></li>
-<li class="toctree-l1"><a class="reference internal" href="docs/review/Latent_Diffusion_Model.html">Introduction</a></li>
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="docs/review/Textual_Inversion.html">Textual Inversion</a></li>
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="docs/review/CustomDiffusion.html">Custom Diffusion</a></li>
-<li class="toctree-l1"><a class="reference internal" href="docs/review/LoRA.html">LoRA</a></li>
-
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="docs/review/I-DDPM.html">I-DDPM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="docs/review/StyO.html">StyO</a></li>
-<li class="toctree-l1"><a class="reference internal" href="docs/review/imagen.html">Imagen</a></li>
-<li class="toctree-l1"><a class="reference internal" href="docs/review/imagen_editor.html">Imagen Editor</a></li>
-<li class="toctree-l1"><a class="reference internal" href="docs/review/SDEdit.html">SDEdit</a></li>
-<li class="toctree-l1"><a class="reference internal" href="docs/review/SDXL.html">SDXL</a></li>
-<li class="toctree-l1"><a class="reference internal" href="docs/review/t2i_adapter.html">T2I-Adapter</a></li>
-<li class="toctree-l1"><a class="reference internal" href="docs/review/IP_Adapter.html">IP-Adapter</a></li>
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="docs/review/HyperDreamBooth.html">HyperDreamBooth</a></li>
-<li class="toctree-l1"><a class="reference internal" href="docs/review/CM3leon.html">CM3leon</a></li>
-
-<li class="toctree-l1"><a class="reference internal" href="docs/review/Synthetic_Data_from_Diffusion_Models_Improves_ImageNet_Classification.html">Synthetic Data from Diffusion Models Improves ImageNet Classification</a></li>
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="docs/review/GLIDE.html">GLIDE</a></li>
-<li class="toctree-l1"><a class="reference internal" href="docs/review/BBDM.html">BBDM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="docs/review/Your_Diffusion_Model_is_Secretly_a_Zero_Shot_Classifier.html">Your Diffusion Model is Secretly a Zero-Shot Classifier</a></li>
-<li class="toctree-l1"><a class="reference internal" href="docs/review/progressive_distillation.html">Progressive Distillation for Fast Sampling of Diffusion Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="docs/review/ConceptLab.html">ConceptLab</a></li>
-<li class="toctree-l1"><a class="reference internal" href="docs/review/Diffusion_models_already_have_a_Semantic_Latent_Space.html">Diffusion Models already have a Semantic Latent Space</a></li>
-<li class="toctree-l1"><a class="reference internal" href="docs/review/Muse.html">Muse</a></li>
-
-
-<li class="toctree-l1"><a class="reference internal" href="docs/review/GIGAGAN.html">Scaling up GANs for Text-to-Image Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="docs/review/consistency_models.html">Consistency Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="docs/review/latent_consistency_models.html">Latent Consistency Models</a></li>
-
-<li class="toctree-l1"><a class="reference internal" href="docs/review/LLM_grounded_Diffusion.html">LLM Grounded Diffusion</a></li>
-<li class="toctree-l1"><a class="reference internal" href="docs/review/DiT.html">DiT</a></li>
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="docs/review/one-step-image-translation.html">One-Step Image Translation with Text-to-Image Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="docs/review/LCM-LoRA.html">LCM-LoRA: A Universal Stable-Diffusion Acceleration Module</a></li>
-
-
-<li class="toctree-l1"><a class="reference internal" href="docs/review/MimicBrush.html">MimicBrush: Zero-shot Image Editing with Reference Imitation</a></li>
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Video Generation</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="docs/review/Make_A_Video.html">Make A Video</a></li>
-<li class="toctree-l1"><a class="reference internal" href="docs/review/VideoLDM.html">VideoLDM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="docs/review/AnimateDiff.html">AnimateDiff</a></li>
-<li class="toctree-l1"><a class="reference internal" href="docs/review/Animate_Anyone.html">Animate Anyone</a></li>
-<li class="toctree-l1"><a class="reference internal" href="docs/review/DreaMoving.html">DreaMoving</a></li>
-<li class="toctree-l1"><a class="reference internal" href="docs/review/DreamPose.html">DreamPose: Fashion Image-to-Video Synthesis via Stable Diffusion</a></li>
-
-
-
-
-
-
-
-
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">3D Generation</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="docs/review/NeRF.html">NeRF : Representing Scenes as Neural Radiance Fields for View Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="docs/review/3DGS.html">3D Gaussian Splatting for Real-Time Radiance Field Rendering</a></li>
-<li class="toctree-l1"><a class="reference internal" href="docs/review/Point_E.html">Point-E: A System for Generating 3D Point Clouds from Complex Prompts (Arxiv 2022)</a></li>
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="docs/review/Shap-E.html">Shap-E</a></li>
-
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="docs/review/DreamFusion.html"><strong>DreamFusion</strong></a></li>
-<li class="toctree-l1"><a class="reference internal" href="docs/review/magic-3d.html">Magic3D</a></li>
-<li class="toctree-l1"><a class="reference internal" href="docs/review/DreamBooth3D.html">Dream Booth 3D</a></li>
-
-
-
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Experiments</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="docs/experiments/js_exp.html">Synthetic Data with Stable Diffusion for Foliar Disease Classification</a></li>
-<li class="toctree-l1"><a class="reference internal" href="docs/experiments/swjo_exp.html">Training DreamBooth on Naver Webtoon Face Dataset</a></li>
-</ul>
-
-    </div>
-</nav></div>
-    </div>
-  
-  
-  <div class="sidebar-primary-items__end sidebar-primary__section">
-  </div>
-  
-  <div id="rtd-footer-container"></div>
-
-
-      </div>
-      
-      <main id="main-content" class="bd-main" role="main">
-        
-        
-
-<div class="sbt-scroll-pixel-helper"></div>
-
-          <div class="bd-content">
-            <div class="bd-article-container">
-              
-              <div class="bd-header-article d-print-none">
-<div class="header-article-items header-article__inner">
-  
-    <div class="header-article-items__start">
-      
-        <div class="header-article-item"><label class="sidebar-toggle primary-toggle btn btn-sm" for="__primary" title="Toggle primary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
-  <span class="fa-solid fa-bars"></span>
-</label></div>
-      
-    </div>
-  
-  
-    <div class="header-article-items__end">
-      
-        <div class="header-article-item">
-
-<div class="article-header-buttons">
-
-
-
-
-
-<div class="dropdown dropdown-source-buttons">
-  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Source repositories">
-    <i class="fab fa-github"></i>
-  </button>
-  <ul class="dropdown-menu">
-      
-      
-      
-      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion" target="_blank"
-   class="btn btn-sm btn-source-repository-button dropdown-item"
-   title="Source repository"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fab fa-github"></i>
-  </span>
-<span class="btn__text-container">Repository</span>
-</a>
-</li>
-      
-      
-      
-      
-      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion/issues/new?title=Issue%20on%20page%20%2Fintro.html&body=Your%20issue%20content%20here." target="_blank"
-   class="btn btn-sm btn-source-issues-button dropdown-item"
-   title="Open an issue"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-lightbulb"></i>
-  </span>
-<span class="btn__text-container">Open issue</span>
-</a>
-</li>
-      
-  </ul>
-</div>
-
-
-
-
-
-
-<div class="dropdown dropdown-download-buttons">
-  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Download this page">
-    <i class="fas fa-download"></i>
-  </button>
-  <ul class="dropdown-menu">
-      
-      
-      
-      <li><a href="_sources/intro.md" target="_blank"
-   class="btn btn-sm btn-download-source-button dropdown-item"
-   title="Download source file"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-file"></i>
-  </span>
-<span class="btn__text-container">.md</span>
-</a>
-</li>
-      
-      
-      
-      
-      <li>
-<button onclick="window.print()"
-  class="btn btn-sm btn-download-pdf-button dropdown-item"
-  title="Print to PDF"
-  data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-file-pdf"></i>
-  </span>
-<span class="btn__text-container">.pdf</span>
-</button>
-</li>
-      
-  </ul>
-</div>
-
-
-
-
-<button onclick="toggleFullScreen()"
-  class="btn btn-sm btn-fullscreen-button"
-  title="Fullscreen mode"
-  data-bs-placement="bottom" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-expand"></i>
-  </span>
-
-</button>
-
-
-
-<script>
-document.write(`
-  <button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light"></i>
-    <i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark"></i>
-    <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"></i>
-  </button>
-`);
-</script>
-
-
-<script>
-document.write(`
-  <button class="btn btn-sm pst-navbar-icon search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <i class="fa-solid fa-magnifying-glass fa-lg"></i>
-  </button>
-`);
-</script>
-<label class="sidebar-toggle secondary-toggle btn btn-sm" for="__secondary"title="Toggle secondary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <span class="fa-solid fa-list"></span>
-</label>
-</div></div>
-      
-    </div>
-  
-</div>
-</div>
-              
-              
-
-<div id="jb-print-docs-body" class="onlyprint">
-    <h1>Welcome to PseudoDiffusers!!</h1>
-    <!-- Table of contents -->
-    <div id="print-main-content">
-        <div id="jb-print-toc">
-            
-            <div>
-                <h2> Contents </h2>
-            </div>
-            <nav aria-label="Page">
-                <ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#about-us">About Us</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#publications">Publications</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tech-blog-contents">Tech Blog Contents</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#contributors">Contributors</a></li>
-</ul>
-            </nav>
-        </div>
-    </div>
-</div>
-
-              
-                
-<div id="searchbox"></div>
-                <article class="bd-article">
-                  
-  <section class="tex2jax_ignore mathjax_ignore" id="welcome-to-pseudodiffusers">
-<h1>Welcome to PseudoDiffusers!!<a class="headerlink" href="#welcome-to-pseudodiffusers" title="Permalink to this heading">#</a></h1>
-<section id="about-us">
-<h2>About Us<a class="headerlink" href="#about-us" title="Permalink to this heading">#</a></h2>
-<p>This is the repository of PseudoDiffusers team.</p>
-<p>Our aim is to review papers and code related to computer vision generation models, approach them theoretically, and conduct various experiments by fine-tuning diffusion based models.</p>
-<p><a class="reference external" href="https://www.linkedin.com/company/pseudolab/">About Us - PseudoLab</a></p>
-<p><a class="reference external" href="https://chanrankim.notion.site/PseudoDiffusers-b666d39ea1924b4692796e442bebcd44">About Us - PseudoDiffusers</a></p>
-<p>참여 방법: 매주 수요일 오후 9시, 가짜연구소 Discord Room-DH 로 입장!</p>
-</section>
-<section id="publications">
-<h2>Publications<a class="headerlink" href="#publications" title="Permalink to this heading">#</a></h2>
-<p><strong>DiffInject: Revisiting Debias via Synthetic Data Generation using Diffusion-based Style Injection</strong><br />
-Donggeun Ko*, Sangwoo Jo*, Dongjun Lee, Namjun Park, Jaekwang KIM<br />
-CVPR 2024 Workshop<br />
-<a class="reference external" href="https://openreview.net/pdf?id=jSB5wlUU3p">PDF</a></p>
-</section>
-<section id="tech-blog-contents">
-<h2>Tech Blog Contents<a class="headerlink" href="#tech-blog-contents" title="Permalink to this heading">#</a></h2>
-<ul class="simple">
-<li><p>Preliminary Works</p></li>
-<li><p>Image Generation</p></li>
-<li><p>Video Generation</p></li>
-<li><p>3D Generation</p></li>
-<li><p>Experiments</p></li>
-</ul>
-</section>
-<section id="contributors">
-<h2>Contributors<a class="headerlink" href="#contributors" title="Permalink to this heading">#</a></h2>
-<ul class="simple">
-<li><p>조상우 [Sangwoo Jo] | <a class="reference external" href="https://github.com/jasonjo97">Github</a> | <a class="reference external" href="https://www.linkedin.com/in/sangwoojo/">Linkedin</a> |</p></li>
-<li><p>문광수 [Kwangsu Mun] | <a class="reference external" href="https://github.com/mksoo">Github</a> | <a class="reference external" href="https://www.linkedin.com/in/%EA%B4%91%EC%88%98-%EB%AC%B8-95681b229/">Linkedin</a> |</p></li>
-<li><p>김지수 [Jisu Kim] | Github |  <a class="reference external" href="https://www.linkedin.com/in/%EC%A7%80%EC%88%98-%EA%B9%80-5a0b2320a/">Linkedin</a> |</p></li>
-<li><p>박범수 [Beomsoo Park] | <a class="reference external" href="https://github.com/hanlyang0522">Github</a> | Linkedin |</p></li>
-<li><p>지승환 [Seunghwan Ji] | <a class="reference external" href="https://github.com/hwansnaa">Github</a> | <a class="reference external" href="https://www.linkedin.com/in/%EC%8A%B9%ED%99%98-%EC%A7%80-0169b425a/">Linkedin</a> |</p></li>
-<li><p>고동근 [Donggeun Sean Ko] | <a class="reference external" href="https://github.com/seanko29">Github</a> | <a class="reference external" href="https://www.linkedin.com/in/sangwoojo/">Linkedin</a> |</p></li>
-<li><p>조남경 [Namkyeong Cho] | Github | Linkedin |</p></li>
-<li><p>김선훈 [SeonHoon Kim] | <a class="reference external" href="https://github.com/egshkim">Github</a> | <a class="reference external" href="https://www.linkedin.com/in/seonhoonkim/">Linkedin</a> |</p></li>
-<li><p>이준형 [Junhyoung Lee] | <a class="reference external" href="https://github.com/jjuun0">Github</a> | <a class="reference external" href="https://www.linkedin.com/in/jjuun0">Linkedin</a> |</p></li>
-<li><p>조형서 [Hyoungseo Cho] | <a class="reference external" href="https://github.com/ChoHyoungSeo">Github</a> | <a class="reference external" href="https://www.linkedin.com/in/hyoungseo-cho/">Linkedin</a> |</p></li>
-<li><p>유정화 [Jeonghwa Yoo] | <a class="reference external" href="https://github.com/jeongHwarr">Github</a> | <a class="reference external" href="https://www.linkedin.com/in/jeonghwa-yoo-8403a716b/">Linkedin</a> |</p></li>
-<li><p>박세환 [Sehwan Park] | <a class="reference external" href="https://github.com/shp216">Github</a> | Linkedin |</p></li>
-<li><p>송건학 [Geonhak Song] | <a class="reference external" href="https://github.com/geonhak904">Github</a> | Linkedin |</p></li>
-<li><p>한동현 [Donghyun Han] | <a class="reference external" href="https://github.com/donghyun99">GitHub</a> | <a class="reference external" href="https://www.linkedin.com/in/donghyun99/">Linkedin</a> |</p></li>
-<li><p>이창환 [ChangHwan Lee] | <a class="reference external" href="https://github.com/Hwan-I">Github</a> | Linkedin</p></li>
-<li><p>유경민 [Kyeongmin Yu] | <a class="reference external" href="https://github.com/yukyeongmin">Github</a> | Linkdedin |</p></li>
-<li><p>이정인 [Jeongin Lee] | Github | Linkdedin |</p></li>
-</ul>
-</section>
-<div class="toctree-wrapper compound">
-</div>
-<div class="toctree-wrapper compound">
-</div>
-<div class="toctree-wrapper compound">
-</div>
-<div class="toctree-wrapper compound">
-</div>
-<div class="toctree-wrapper compound">
-</div>
-</section>
-
-    <script type="text/x-thebe-config">
-    {
-        requestKernel: true,
-        binderOptions: {
-            repo: "binder-examples/jupyter-stacks-datascience",
-            ref: "master",
-        },
-        codeMirrorConfig: {
-            theme: "abcdef",
-            mode: "python"
-        },
-        kernelOptions: {
-            name: "python3",
-            path: "./."
-        },
-        predefinedOutput: true
-    }
-    </script>
-    <script>kernelName = 'python3'</script>
-
-                </article>
-              
-
-              
-              
-              
-              
-                <footer class="prev-next-footer d-print-none">
-                  
-<div class="prev-next-area">
-    <a class="right-next"
-       href="docs/review/vae.html"
-       title="next page">
-      <div class="prev-next-info">
-        <p class="prev-next-subtitle">next</p>
-        <p class="prev-next-title">VAE</p>
-      </div>
-      <i class="fa-solid fa-angle-right"></i>
-    </a>
-</div>
-                </footer>
-              
-            </div>
-            
-            
-              
-                <div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
-
-
-  <div class="sidebar-secondary-item">
-  <div class="page-toc tocsection onthispage">
-    <i class="fa-solid fa-list"></i> Contents
-  </div>
-  <nav class="bd-toc-nav page-toc">
-    <ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#about-us">About Us</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#publications">Publications</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tech-blog-contents">Tech Blog Contents</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#contributors">Contributors</a></li>
-</ul>
-  </nav></div>
-
-</div></div>
-              
-            
-          </div>
-          <footer class="bd-footer-content">
-            
-<div class="bd-footer-content__inner container">
-  
-  <div class="footer-item">
-    
-<p class="component-author">
-By PseudoLab
-</p>
-
-  </div>
-  
-  <div class="footer-item">
-    
-
-  <p class="copyright">
-    
-      © Copyright 2022.
-      <br/>
-    
-  </p>
-
-  </div>
-  
-  <div class="footer-item">
-    
-  </div>
-  
-  <div class="footer-item">
-    
-  </div>
-  
-</div>
-          </footer>
-        
-
-      </main>
-    </div>
-  </div>
-  
-  <!-- Scripts loaded after <body> so the DOM is not blocked -->
-  <script src="_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b"></script>
-<script src="_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b"></script>
-
-  <footer class="bd-footer">
-  </footer>
-  </body>
+
+
+<!DOCTYPE html>
+
+
+<html lang="en" data-content_root="" >
+
+  <head>
+    <meta charset="utf-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.17.1: http://docutils.sourceforge.net/" />
+
+    <title>Welcome to PseudoDiffusers!! &#8212; Text-to-Image Generation-feat-Diffusion</title>
+  
+  
+  
+  <script data-cfasync="false">
+    document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
+    document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
+  </script>
+  
+  <!-- Loaded before other Sphinx assets -->
+  <link href="_static/styles/theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+<link href="_static/styles/bootstrap.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+<link href="_static/styles/pydata-sphinx-theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+
+  
+  <link href="_static/vendor/fontawesome/6.5.2/css/all.min.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+  <link rel="preload" as="font" type="font/woff2" crossorigin href="_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.woff2" />
+<link rel="preload" as="font" type="font/woff2" crossorigin href="_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.woff2" />
+<link rel="preload" as="font" type="font/woff2" crossorigin href="_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.woff2" />
+
+    <link rel="stylesheet" type="text/css" href="_static/pygments.css" />
+    <link rel="stylesheet" href="_static/styles/sphinx-book-theme.css?digest=14f4ca6b54d191a8c7657f6c759bf11a5fb86285" type="text/css" />
+    <link rel="stylesheet" type="text/css" href="_static/togglebutton.css" />
+    <link rel="stylesheet" type="text/css" href="_static/copybutton.css" />
+    <link rel="stylesheet" type="text/css" href="_static/mystnb.4510f1fc1dee50b3e5859aac5469c37c29e427902b24a333a5f9fcb2f0b3ac41.css" />
+    <link rel="stylesheet" type="text/css" href="_static/sphinx-thebe.css" />
+    <link rel="stylesheet" type="text/css" href="_static/design-style.4045f2051d55cab465a707391d5b2007.min.css" />
+  
+  <!-- Pre-loaded scripts that we'll load fully later -->
+  <link rel="preload" as="script" href="_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b" />
+<link rel="preload" as="script" href="_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b" />
+  <script src="_static/vendor/fontawesome/6.5.2/js/all.min.js?digest=dfe6caa3a7d634c4db9b"></script>
+
+    <script data-url_root="./" id="documentation_options" src="_static/documentation_options.js"></script>
+    <script src="_static/jquery.js"></script>
+    <script src="_static/underscore.js"></script>
+    <script src="_static/_sphinx_javascript_frameworks_compat.js"></script>
+    <script src="_static/doctools.js"></script>
+    <script src="_static/clipboard.min.js"></script>
+    <script src="_static/copybutton.js"></script>
+    <script src="_static/scripts/sphinx-book-theme.js?digest=5a5c038af52cf7bc1a1ec88eea08e6366ee68824"></script>
+    <script>let toggleHintShow = 'Click to show';</script>
+    <script>let toggleHintHide = 'Click to hide';</script>
+    <script>let toggleOpenOnPrint = 'true';</script>
+    <script src="_static/togglebutton.js"></script>
+    <script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
+    <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
+    <script src="_static/design-tabs.js"></script>
+    <script>const THEBE_JS_URL = "https://unpkg.com/thebe@0.8.2/lib/index.js"
+const thebe_selector = ".thebe,.cell"
+const thebe_selector_input = "pre"
+const thebe_selector_output = ".output, .cell_output"
+</script>
+    <script async="async" src="_static/sphinx-thebe.js"></script>
+    <script>DOCUMENTATION_OPTIONS.pagename = 'intro';</script>
+    <link rel="shortcut icon" href="_static/PseudoLab_logo.png"/>
+    <link rel="index" title="Index" href="genindex.html" />
+    <link rel="search" title="Search" href="search.html" />
+    <link rel="next" title="VAE" href="docs/review/vae.html" />
+  <meta name="viewport" content="width=device-width, initial-scale=1"/>
+  <meta name="docsearch:language" content="en"/>
+  </head>
+  
+  
+  <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
+
+  
+  
+  <div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
+  
+  <div id="pst-scroll-pixel-helper"></div>
+  
+  <button type="button" class="btn rounded-pill" id="pst-back-to-top">
+    <i class="fa-solid fa-arrow-up"></i>Back to top</button>
+
+  
+  <input type="checkbox"
+          class="sidebar-toggle"
+          id="pst-primary-sidebar-checkbox"/>
+  <label class="overlay overlay-primary" for="pst-primary-sidebar-checkbox"></label>
+  
+  <input type="checkbox"
+          class="sidebar-toggle"
+          id="pst-secondary-sidebar-checkbox"/>
+  <label class="overlay overlay-secondary" for="pst-secondary-sidebar-checkbox"></label>
+  
+  <div class="search-button__wrapper">
+    <div class="search-button__overlay"></div>
+    <div class="search-button__search-container">
+<form class="bd-search d-flex align-items-center"
+      action="search.html"
+      method="get">
+  <i class="fa-solid fa-magnifying-glass"></i>
+  <input type="search"
+         class="form-control"
+         name="q"
+         id="search-input"
+         placeholder="Search this book..."
+         aria-label="Search this book..."
+         autocomplete="off"
+         autocorrect="off"
+         autocapitalize="off"
+         spellcheck="false"/>
+  <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
+</form></div>
+  </div>
+
+  <div class="pst-async-banner-revealer d-none">
+  <aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
+</div>
+
+  
+    <header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
+    </header>
+  
+
+  <div class="bd-container">
+    <div class="bd-container__inner bd-page-width">
+      
+      
+      
+        
+      
+      <div class="bd-sidebar-primary bd-sidebar">
+        
+
+  
+  <div class="sidebar-header-items sidebar-primary__section">
+    
+    
+    
+    
+  </div>
+  
+    <div class="sidebar-primary-items__start sidebar-primary__section">
+        <div class="sidebar-primary-item">
+
+  
+    
+  
+
+<a class="navbar-brand logo" href="#">
+  
+  
+  
+  
+  
+    
+    
+      
+    
+    
+    <img src="_static/PseudoLab_logo.png" class="logo__image only-light" alt="Text-to-Image Generation-feat-Diffusion - Home"/>
+    <script>document.write(`<img src="_static/PseudoLab_logo.png" class="logo__image only-dark" alt="Text-to-Image Generation-feat-Diffusion - Home"/>`);</script>
+  
+  
+</a></div>
+        <div class="sidebar-primary-item"><nav class="bd-links" id="bd-docs-nav" aria-label="Main">
+    <div class="bd-toc-item navbar-nav active">
+        
+        <ul class="nav bd-sidenav bd-sidenav__home-link">
+            <li class="toctree-l1 current active">
+                <a class="reference internal" href="#">
+                    Welcome to PseudoDiffusers!!
+                </a>
+            </li>
+        </ul>
+        <p aria-level="2" class="caption" role="heading"><span class="caption-text">Preliminary Works</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="docs/review/vae.html">VAE</a></li>
+<li class="toctree-l1"><a class="reference internal" href="docs/review/gan.html">GAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="docs/review/DDPM.html">DDPM</a></li>
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="docs/review/DDIM.html">DDIM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="docs/review/A_Study_on_the_Evaluation_of_Generative_Models.html">A Study on the Evaluation of Generative Models</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Image Generation</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="docs/review/cycleGAN.html">CycleGAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="docs/review/StyleGAN.html">StyleGAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="docs/review/diffusion_beats_GANs.html">Diffusion Models Beat GANs on Image Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="docs/review/dalle.html">DALL-E</a></li>
+<li class="toctree-l1"><a class="reference internal" href="docs/review/DALLE2.html">DALL-E 2</a></li>
+<li class="toctree-l1"><a class="reference internal" href="docs/review/dreambooth.html">DreamBooth</a></li>
+<li class="toctree-l1"><a class="reference internal" href="docs/review/ControlNet.html">ControlNet</a></li>
+<li class="toctree-l1"><a class="reference internal" href="docs/review/Latent_Diffusion_Model.html">Introduction</a></li>
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="docs/review/Textual_Inversion.html">Textual Inversion</a></li>
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="docs/review/CustomDiffusion.html">Custom Diffusion</a></li>
+<li class="toctree-l1"><a class="reference internal" href="docs/review/LoRA.html">LoRA</a></li>
+
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="docs/review/I-DDPM.html">I-DDPM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="docs/review/StyO.html">StyO</a></li>
+<li class="toctree-l1"><a class="reference internal" href="docs/review/imagen.html">Imagen</a></li>
+<li class="toctree-l1"><a class="reference internal" href="docs/review/imagen_editor.html">Imagen Editor</a></li>
+<li class="toctree-l1"><a class="reference internal" href="docs/review/SDEdit.html">SDEdit</a></li>
+<li class="toctree-l1"><a class="reference internal" href="docs/review/SDXL.html">SDXL</a></li>
+<li class="toctree-l1"><a class="reference internal" href="docs/review/t2i_adapter.html">T2I-Adapter</a></li>
+<li class="toctree-l1"><a class="reference internal" href="docs/review/IP_Adapter.html">IP-Adapter</a></li>
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="docs/review/HyperDreamBooth.html">HyperDreamBooth</a></li>
+<li class="toctree-l1"><a class="reference internal" href="docs/review/CM3leon.html">CM3leon</a></li>
+
+<li class="toctree-l1"><a class="reference internal" href="docs/review/Synthetic_Data_from_Diffusion_Models_Improves_ImageNet_Classification.html">Synthetic Data from Diffusion Models Improves ImageNet Classification</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="docs/review/GLIDE.html">GLIDE</a></li>
+<li class="toctree-l1"><a class="reference internal" href="docs/review/BBDM.html">BBDM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="docs/review/Your_Diffusion_Model_is_Secretly_a_Zero_Shot_Classifier.html">Your Diffusion Model is Secretly a Zero-Shot Classifier</a></li>
+<li class="toctree-l1"><a class="reference internal" href="docs/review/progressive_distillation.html">Progressive Distillation for Fast Sampling of Diffusion Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="docs/review/ConceptLab.html">ConceptLab</a></li>
+<li class="toctree-l1"><a class="reference internal" href="docs/review/Diffusion_models_already_have_a_Semantic_Latent_Space.html">Diffusion Models already have a Semantic Latent Space</a></li>
+<li class="toctree-l1"><a class="reference internal" href="docs/review/Muse.html">Muse</a></li>
+
+
+<li class="toctree-l1"><a class="reference internal" href="docs/review/GIGAGAN.html">Scaling up GANs for Text-to-Image Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="docs/review/consistency_models.html">Consistency Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="docs/review/latent_consistency_models.html">Latent Consistency Models</a></li>
+
+<li class="toctree-l1"><a class="reference internal" href="docs/review/LLM_grounded_Diffusion.html">LLM Grounded Diffusion</a></li>
+<li class="toctree-l1"><a class="reference internal" href="docs/review/DiT.html">DiT</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="docs/review/one-step-image-translation.html">One-Step Image Translation with Text-to-Image Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="docs/review/LCM-LoRA.html">LCM-LoRA: A Universal Stable-Diffusion Acceleration Module</a></li>
+
+
+<li class="toctree-l1"><a class="reference internal" href="docs/review/MimicBrush.html">MimicBrush: Zero-shot Image Editing with Reference Imitation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="docs/review/one_step_diffusion_with_distribution_matching_distillation.html">One-step Diffusion with Distribution Matching Distillation</a></li>
+
+
+
+
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Video Generation</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="docs/review/Make_A_Video.html">Make A Video</a></li>
+<li class="toctree-l1"><a class="reference internal" href="docs/review/VideoLDM.html">VideoLDM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="docs/review/AnimateDiff.html">AnimateDiff</a></li>
+<li class="toctree-l1"><a class="reference internal" href="docs/review/Animate_Anyone.html">Animate Anyone</a></li>
+<li class="toctree-l1"><a class="reference internal" href="docs/review/DreaMoving.html">DreaMoving</a></li>
+<li class="toctree-l1"><a class="reference internal" href="docs/review/DreamPose.html">DreamPose: Fashion Image-to-Video Synthesis via Stable Diffusion</a></li>
+
+
+
+
+
+
+
+
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">3D Generation</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="docs/review/NeRF.html">NeRF : Representing Scenes as Neural Radiance Fields for View Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="docs/review/3DGS.html">3D Gaussian Splatting for Real-Time Radiance Field Rendering</a></li>
+<li class="toctree-l1"><a class="reference internal" href="docs/review/Point_E.html">Point-E: A System for Generating 3D Point Clouds from Complex Prompts (Arxiv 2022)</a></li>
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="docs/review/Shap-E.html">Shap-E</a></li>
+
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="docs/review/DreamFusion.html"><strong>DreamFusion</strong></a></li>
+<li class="toctree-l1"><a class="reference internal" href="docs/review/magic-3d.html">Magic3D</a></li>
+<li class="toctree-l1"><a class="reference internal" href="docs/review/DreamBooth3D.html">Dream Booth 3D</a></li>
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="docs/review/zero123plus.html">Zero123++</a></li>
+<li class="toctree-l1"><a class="reference internal" href="docs/review/ProlificDreamer.html">ProlificDreamer: High-Fidelity and Diverse Text-to-3D Generation with Variational Score Distillation</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="docs/review/DreamGaussian.html">DreamGaussian</a></li>
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="docs/review/Coin3D.html">Coin3D</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Experiments</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="docs/experiments/js_exp.html">Synthetic Data with Stable Diffusion for Foliar Disease Classification</a></li>
+<li class="toctree-l1"><a class="reference internal" href="docs/experiments/swjo_exp.html">Training DreamBooth on Naver Webtoon Face Dataset</a></li>
+</ul>
+
+    </div>
+</nav></div>
+    </div>
+  
+  
+  <div class="sidebar-primary-items__end sidebar-primary__section">
+  </div>
+  
+  <div id="rtd-footer-container"></div>
+
+
+      </div>
+      
+      <main id="main-content" class="bd-main" role="main">
+        
+        
+
+<div class="sbt-scroll-pixel-helper"></div>
+
+          <div class="bd-content">
+            <div class="bd-article-container">
+              
+              <div class="bd-header-article d-print-none">
+<div class="header-article-items header-article__inner">
+  
+    <div class="header-article-items__start">
+      
+        <div class="header-article-item"><label class="sidebar-toggle primary-toggle btn btn-sm" for="__primary" title="Toggle primary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
+  <span class="fa-solid fa-bars"></span>
+</label></div>
+      
+    </div>
+  
+  
+    <div class="header-article-items__end">
+      
+        <div class="header-article-item">
+
+<div class="article-header-buttons">
+
+
+
+
+
+<div class="dropdown dropdown-source-buttons">
+  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Source repositories">
+    <i class="fab fa-github"></i>
+  </button>
+  <ul class="dropdown-menu">
+      
+      
+      
+      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion" target="_blank"
+   class="btn btn-sm btn-source-repository-button dropdown-item"
+   title="Source repository"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fab fa-github"></i>
+  </span>
+<span class="btn__text-container">Repository</span>
+</a>
+</li>
+      
+      
+      
+      
+      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion/issues/new?title=Issue%20on%20page%20%2Fintro.html&body=Your%20issue%20content%20here." target="_blank"
+   class="btn btn-sm btn-source-issues-button dropdown-item"
+   title="Open an issue"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-lightbulb"></i>
+  </span>
+<span class="btn__text-container">Open issue</span>
+</a>
+</li>
+      
+  </ul>
+</div>
+
+
+
+
+
+
+<div class="dropdown dropdown-download-buttons">
+  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Download this page">
+    <i class="fas fa-download"></i>
+  </button>
+  <ul class="dropdown-menu">
+      
+      
+      
+      <li><a href="_sources/intro.md" target="_blank"
+   class="btn btn-sm btn-download-source-button dropdown-item"
+   title="Download source file"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-file"></i>
+  </span>
+<span class="btn__text-container">.md</span>
+</a>
+</li>
+      
+      
+      
+      
+      <li>
+<button onclick="window.print()"
+  class="btn btn-sm btn-download-pdf-button dropdown-item"
+  title="Print to PDF"
+  data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-file-pdf"></i>
+  </span>
+<span class="btn__text-container">.pdf</span>
+</button>
+</li>
+      
+  </ul>
+</div>
+
+
+
+
+<button onclick="toggleFullScreen()"
+  class="btn btn-sm btn-fullscreen-button"
+  title="Fullscreen mode"
+  data-bs-placement="bottom" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-expand"></i>
+  </span>
+
+</button>
+
+
+
+<script>
+document.write(`
+  <button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light"></i>
+    <i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark"></i>
+    <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"></i>
+  </button>
+`);
+</script>
+
+
+<script>
+document.write(`
+  <button class="btn btn-sm pst-navbar-icon search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <i class="fa-solid fa-magnifying-glass fa-lg"></i>
+  </button>
+`);
+</script>
+<label class="sidebar-toggle secondary-toggle btn btn-sm" for="__secondary"title="Toggle secondary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <span class="fa-solid fa-list"></span>
+</label>
+</div></div>
+      
+    </div>
+  
+</div>
+</div>
+              
+              
+
+<div id="jb-print-docs-body" class="onlyprint">
+    <h1>Welcome to PseudoDiffusers!!</h1>
+    <!-- Table of contents -->
+    <div id="print-main-content">
+        <div id="jb-print-toc">
+            
+            <div>
+                <h2> Contents </h2>
+            </div>
+            <nav aria-label="Page">
+                <ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#about-us">About Us</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#publications">Publications</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tech-blog-contents">Tech Blog Contents</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#contributors">Contributors</a></li>
+</ul>
+            </nav>
+        </div>
+    </div>
+</div>
+
+              
+                
+<div id="searchbox"></div>
+                <article class="bd-article">
+                  
+  <section class="tex2jax_ignore mathjax_ignore" id="welcome-to-pseudodiffusers">
+<h1>Welcome to PseudoDiffusers!!<a class="headerlink" href="#welcome-to-pseudodiffusers" title="Permalink to this heading">#</a></h1>
+<section id="about-us">
+<h2>About Us<a class="headerlink" href="#about-us" title="Permalink to this heading">#</a></h2>
+<p>This is the repository of PseudoDiffusers team.</p>
+<p>Our aim is to review papers and code related to computer vision generation models, approach them theoretically, and conduct various experiments by fine-tuning diffusion based models.</p>
+<p><a class="reference external" href="https://www.linkedin.com/company/pseudolab/">About Us - PseudoLab</a></p>
+<p><a class="reference external" href="https://chanrankim.notion.site/PseudoDiffusers-b666d39ea1924b4692796e442bebcd44">About Us - PseudoDiffusers</a></p>
+<p>참여 방법: 매주 수요일 오후 9시, 가짜연구소 Discord Room-DH 로 입장!</p>
+</section>
+<section id="publications">
+<h2>Publications<a class="headerlink" href="#publications" title="Permalink to this heading">#</a></h2>
+<p><strong>DiffInject: Revisiting Debias via Synthetic Data Generation using Diffusion-based Style Injection</strong><br />
+Donggeun Ko*, Sangwoo Jo*, Dongjun Lee, Namjun Park, Jaekwang KIM<br />
+CVPR 2024 Workshop<br />
+<a class="reference external" href="https://openreview.net/pdf?id=jSB5wlUU3p">PDF</a></p>
+</section>
+<section id="tech-blog-contents">
+<h2>Tech Blog Contents<a class="headerlink" href="#tech-blog-contents" title="Permalink to this heading">#</a></h2>
+<ul class="simple">
+<li><p>Preliminary Works</p></li>
+<li><p>Image Generation</p></li>
+<li><p>Video Generation</p></li>
+<li><p>3D Generation</p></li>
+<li><p>Experiments</p></li>
+</ul>
+</section>
+<section id="contributors">
+<h2>Contributors<a class="headerlink" href="#contributors" title="Permalink to this heading">#</a></h2>
+<ul class="simple">
+<li><p>조상우 [Sangwoo Jo] | <a class="reference external" href="https://github.com/jasonjo97">Github</a> | <a class="reference external" href="https://www.linkedin.com/in/sangwoojo/">Linkedin</a> |</p></li>
+<li><p>문광수 [Kwangsu Mun] | <a class="reference external" href="https://github.com/mksoo">Github</a> | <a class="reference external" href="https://www.linkedin.com/in/%EA%B4%91%EC%88%98-%EB%AC%B8-95681b229/">Linkedin</a> |</p></li>
+<li><p>김지수 [Jisu Kim] | Github |  <a class="reference external" href="https://www.linkedin.com/in/%EC%A7%80%EC%88%98-%EA%B9%80-5a0b2320a/">Linkedin</a> |</p></li>
+<li><p>박범수 [Beomsoo Park] | <a class="reference external" href="https://github.com/hanlyang0522">Github</a> | Linkedin |</p></li>
+<li><p>지승환 [Seunghwan Ji] | <a class="reference external" href="https://github.com/hwansnaa">Github</a> | <a class="reference external" href="https://www.linkedin.com/in/%EC%8A%B9%ED%99%98-%EC%A7%80-0169b425a/">Linkedin</a> |</p></li>
+<li><p>고동근 [Donggeun Sean Ko] | <a class="reference external" href="https://github.com/seanko29">Github</a> | <a class="reference external" href="https://www.linkedin.com/in/sangwoojo/">Linkedin</a> |</p></li>
+<li><p>조남경 [Namkyeong Cho] | Github | Linkedin |</p></li>
+<li><p>김선훈 [SeonHoon Kim] | <a class="reference external" href="https://github.com/egshkim">Github</a> | <a class="reference external" href="https://www.linkedin.com/in/seonhoonkim/">Linkedin</a> |</p></li>
+<li><p>이준형 [Junhyoung Lee] | <a class="reference external" href="https://github.com/jjuun0">Github</a> | <a class="reference external" href="https://www.linkedin.com/in/jjuun0">Linkedin</a> |</p></li>
+<li><p>조형서 [Hyoungseo Cho] | <a class="reference external" href="https://github.com/ChoHyoungSeo">Github</a> | <a class="reference external" href="https://www.linkedin.com/in/hyoungseo-cho/">Linkedin</a> |</p></li>
+<li><p>유정화 [Jeonghwa Yoo] | <a class="reference external" href="https://github.com/jeongHwarr">Github</a> | <a class="reference external" href="https://www.linkedin.com/in/jeonghwa-yoo-8403a716b/">Linkedin</a> |</p></li>
+<li><p>박세환 [Sehwan Park] | <a class="reference external" href="https://github.com/shp216">Github</a> | Linkedin |</p></li>
+<li><p>송건학 [Geonhak Song] | <a class="reference external" href="https://github.com/geonhak904">Github</a> | Linkedin |</p></li>
+<li><p>한동현 [Donghyun Han] | <a class="reference external" href="https://github.com/donghyun99">GitHub</a> | <a class="reference external" href="https://www.linkedin.com/in/donghyun99/">Linkedin</a> |</p></li>
+<li><p>이창환 [ChangHwan Lee] | <a class="reference external" href="https://github.com/Hwan-I">Github</a> | Linkedin</p></li>
+<li><p>유경민 [Kyeongmin Yu] | <a class="reference external" href="https://github.com/yukyeongmin">Github</a> | Linkdedin |</p></li>
+<li><p>이정인 [Jeongin Lee] | Github | Linkdedin |</p></li>
+</ul>
+</section>
+<div class="toctree-wrapper compound">
+</div>
+<div class="toctree-wrapper compound">
+</div>
+<div class="toctree-wrapper compound">
+</div>
+<div class="toctree-wrapper compound">
+</div>
+<div class="toctree-wrapper compound">
+</div>
+</section>
+
+    <script type="text/x-thebe-config">
+    {
+        requestKernel: true,
+        binderOptions: {
+            repo: "binder-examples/jupyter-stacks-datascience",
+            ref: "master",
+        },
+        codeMirrorConfig: {
+            theme: "abcdef",
+            mode: "python"
+        },
+        kernelOptions: {
+            name: "python3",
+            path: "./."
+        },
+        predefinedOutput: true
+    }
+    </script>
+    <script>kernelName = 'python3'</script>
+
+                </article>
+              
+
+              
+              
+              
+              
+                <footer class="prev-next-footer d-print-none">
+                  
+<div class="prev-next-area">
+    <a class="right-next"
+       href="docs/review/vae.html"
+       title="next page">
+      <div class="prev-next-info">
+        <p class="prev-next-subtitle">next</p>
+        <p class="prev-next-title">VAE</p>
+      </div>
+      <i class="fa-solid fa-angle-right"></i>
+    </a>
+</div>
+                </footer>
+              
+            </div>
+            
+            
+              
+                <div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
+
+
+  <div class="sidebar-secondary-item">
+  <div class="page-toc tocsection onthispage">
+    <i class="fa-solid fa-list"></i> Contents
+  </div>
+  <nav class="bd-toc-nav page-toc">
+    <ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#about-us">About Us</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#publications">Publications</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tech-blog-contents">Tech Blog Contents</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#contributors">Contributors</a></li>
+</ul>
+  </nav></div>
+
+</div></div>
+              
+            
+          </div>
+          <footer class="bd-footer-content">
+            
+<div class="bd-footer-content__inner container">
+  
+  <div class="footer-item">
+    
+<p class="component-author">
+By PseudoLab
+</p>
+
+  </div>
+  
+  <div class="footer-item">
+    
+
+  <p class="copyright">
+    
+      © Copyright 2022.
+      <br/>
+    
+  </p>
+
+  </div>
+  
+  <div class="footer-item">
+    
+  </div>
+  
+  <div class="footer-item">
+    
+  </div>
+  
+</div>
+          </footer>
+        
+
+      </main>
+    </div>
+  </div>
+  
+  <!-- Scripts loaded after <body> so the DOM is not blocked -->
+  <script src="_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b"></script>
+<script src="_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b"></script>
+
+  <footer class="bd-footer">
+  </footer>
+  </body>
 </html>
\ No newline at end of file
diff --git a/objects.inv b/objects.inv
old mode 100644
new mode 100755
index 2cfce88d..3bf2d539
Binary files a/objects.inv and b/objects.inv differ
diff --git a/search.html b/search.html
old mode 100644
new mode 100755
index 12a3b76a..0505a173
--- a/search.html
+++ b/search.html
@@ -1,558 +1,578 @@
-
-
-<!DOCTYPE html>
-
-
-<html lang="en" data-content_root="" >
-
-  <head>
-    <meta charset="utf-8" />
-    <meta name="viewport" content="width=device-width, initial-scale=1.0" /><title>Search - Text-to-Image Generation-feat-Diffusion</title>
-  
-  
-  
-  <script data-cfasync="false">
-    document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
-    document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
-  </script>
-  
-  <!-- Loaded before other Sphinx assets -->
-  <link href="_static/styles/theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-<link href="_static/styles/bootstrap.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-<link href="_static/styles/pydata-sphinx-theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-
-  
-  <link href="_static/vendor/fontawesome/6.5.2/css/all.min.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
-  <link rel="preload" as="font" type="font/woff2" crossorigin href="_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.woff2" />
-<link rel="preload" as="font" type="font/woff2" crossorigin href="_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.woff2" />
-<link rel="preload" as="font" type="font/woff2" crossorigin href="_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.woff2" />
-
-    <link rel="stylesheet" type="text/css" href="_static/pygments.css" />
-    <link rel="stylesheet" href="_static/styles/sphinx-book-theme.css?digest=14f4ca6b54d191a8c7657f6c759bf11a5fb86285" type="text/css" />
-    <link rel="stylesheet" type="text/css" href="_static/togglebutton.css" />
-    <link rel="stylesheet" type="text/css" href="_static/copybutton.css" />
-    <link rel="stylesheet" type="text/css" href="_static/mystnb.4510f1fc1dee50b3e5859aac5469c37c29e427902b24a333a5f9fcb2f0b3ac41.css" />
-    <link rel="stylesheet" type="text/css" href="_static/sphinx-thebe.css" />
-    <link rel="stylesheet" type="text/css" href="_static/design-style.4045f2051d55cab465a707391d5b2007.min.css" />
-  
-  <!-- Pre-loaded scripts that we'll load fully later -->
-  <link rel="preload" as="script" href="_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b" />
-<link rel="preload" as="script" href="_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b" />
-  <script src="_static/vendor/fontawesome/6.5.2/js/all.min.js?digest=dfe6caa3a7d634c4db9b"></script>
-
-    <script data-url_root="./" id="documentation_options" src="_static/documentation_options.js"></script>
-    <script src="_static/jquery.js"></script>
-    <script src="_static/underscore.js"></script>
-    <script src="_static/_sphinx_javascript_frameworks_compat.js"></script>
-    <script src="_static/doctools.js"></script>
-    <script src="_static/clipboard.min.js"></script>
-    <script src="_static/copybutton.js"></script>
-    <script src="_static/scripts/sphinx-book-theme.js?digest=5a5c038af52cf7bc1a1ec88eea08e6366ee68824"></script>
-    <script>let toggleHintShow = 'Click to show';</script>
-    <script>let toggleHintHide = 'Click to hide';</script>
-    <script>let toggleOpenOnPrint = 'true';</script>
-    <script src="_static/togglebutton.js"></script>
-    <script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
-    <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
-    <script src="_static/design-tabs.js"></script>
-    <script>const THEBE_JS_URL = "https://unpkg.com/thebe@0.8.2/lib/index.js"
-const thebe_selector = ".thebe,.cell"
-const thebe_selector_input = "pre"
-const thebe_selector_output = ".output, .cell_output"
-</script>
-    <script async="async" src="_static/sphinx-thebe.js"></script>
-    <script>DOCUMENTATION_OPTIONS.pagename = 'search';</script>
-  <script src="_static/searchtools.js"></script>
-  <script src="_static/language_data.js"></script>
-  <script src="searchindex.js"></script>
-    <link rel="shortcut icon" href="_static/PseudoLab_logo.png"/>
-    <link rel="index" title="Index" href="genindex.html" />
-    <link rel="search" title="Search" href="#" />
-  <meta name="viewport" content="width=device-width, initial-scale=1"/>
-  <meta name="docsearch:language" content="en"/>
-  </head>
-  
-  
-  <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
-
-  
-  
-  <div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
-  
-  <div id="pst-scroll-pixel-helper"></div>
-  
-  <button type="button" class="btn rounded-pill" id="pst-back-to-top">
-    <i class="fa-solid fa-arrow-up"></i>Back to top</button>
-
-  
-  <input type="checkbox"
-          class="sidebar-toggle"
-          id="pst-primary-sidebar-checkbox"/>
-  <label class="overlay overlay-primary" for="pst-primary-sidebar-checkbox"></label>
-  
-  <input type="checkbox"
-          class="sidebar-toggle"
-          id="pst-secondary-sidebar-checkbox"/>
-  <label class="overlay overlay-secondary" for="pst-secondary-sidebar-checkbox"></label>
-  
-  <div class="search-button__wrapper">
-    <div class="search-button__overlay"></div>
-    <div class="search-button__search-container">
-<form class="bd-search d-flex align-items-center"
-      action="#"
-      method="get">
-  <i class="fa-solid fa-magnifying-glass"></i>
-  <input type="search"
-         class="form-control"
-         name="q"
-         id="search-input"
-         placeholder="Search this book..."
-         aria-label="Search this book..."
-         autocomplete="off"
-         autocorrect="off"
-         autocapitalize="off"
-         spellcheck="false"/>
-  <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
-</form></div>
-  </div>
-
-  <div class="pst-async-banner-revealer d-none">
-  <aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
-</div>
-
-  
-    <header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
-    </header>
-  
-
-  <div class="bd-container">
-    <div class="bd-container__inner bd-page-width">
-      
-      
-      
-        
-      
-      <div class="bd-sidebar-primary bd-sidebar">
-        
-
-  
-  <div class="sidebar-header-items sidebar-primary__section">
-    
-    
-    
-    
-  </div>
-  
-    <div class="sidebar-primary-items__start sidebar-primary__section">
-        <div class="sidebar-primary-item">
-
-  
-    
-  
-
-<a class="navbar-brand logo" href="intro.html">
-  
-  
-  
-  
-  
-    
-    
-      
-    
-    
-    <img src="_static/PseudoLab_logo.png" class="logo__image only-light" alt="Text-to-Image Generation-feat-Diffusion - Home"/>
-    <script>document.write(`<img src="_static/PseudoLab_logo.png" class="logo__image only-dark" alt="Text-to-Image Generation-feat-Diffusion - Home"/>`);</script>
-  
-  
-</a></div>
-        <div class="sidebar-primary-item"><nav class="bd-links" id="bd-docs-nav" aria-label="Main">
-    <div class="bd-toc-item navbar-nav active">
-        
-        <ul class="nav bd-sidenav bd-sidenav__home-link">
-            <li class="toctree-l1">
-                <a class="reference internal" href="intro.html">
-                    Welcome to PseudoDiffusers!!
-                </a>
-            </li>
-        </ul>
-        <p aria-level="2" class="caption" role="heading"><span class="caption-text">Preliminary Works</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="docs/review/vae.html">VAE</a></li>
-<li class="toctree-l1"><a class="reference internal" href="docs/review/gan.html">GAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="docs/review/DDPM.html">DDPM</a></li>
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="docs/review/DDIM.html">DDIM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="docs/review/A_Study_on_the_Evaluation_of_Generative_Models.html">A Study on the Evaluation of Generative Models</a></li>
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Image Generation</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="docs/review/cycleGAN.html">CycleGAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="docs/review/StyleGAN.html">StyleGAN</a></li>
-<li class="toctree-l1"><a class="reference internal" href="docs/review/diffusion_beats_GANs.html">Diffusion Models Beat GANs on Image Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="docs/review/dalle.html">DALL-E</a></li>
-<li class="toctree-l1"><a class="reference internal" href="docs/review/DALLE2.html">DALL-E 2</a></li>
-<li class="toctree-l1"><a class="reference internal" href="docs/review/dreambooth.html">DreamBooth</a></li>
-<li class="toctree-l1"><a class="reference internal" href="docs/review/ControlNet.html">ControlNet</a></li>
-<li class="toctree-l1"><a class="reference internal" href="docs/review/Latent_Diffusion_Model.html">Introduction</a></li>
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="docs/review/Textual_Inversion.html">Textual Inversion</a></li>
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="docs/review/CustomDiffusion.html">Custom Diffusion</a></li>
-<li class="toctree-l1"><a class="reference internal" href="docs/review/LoRA.html">LoRA</a></li>
-
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="docs/review/I-DDPM.html">I-DDPM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="docs/review/StyO.html">StyO</a></li>
-<li class="toctree-l1"><a class="reference internal" href="docs/review/imagen.html">Imagen</a></li>
-<li class="toctree-l1"><a class="reference internal" href="docs/review/imagen_editor.html">Imagen Editor</a></li>
-<li class="toctree-l1"><a class="reference internal" href="docs/review/SDEdit.html">SDEdit</a></li>
-<li class="toctree-l1"><a class="reference internal" href="docs/review/SDXL.html">SDXL</a></li>
-<li class="toctree-l1"><a class="reference internal" href="docs/review/t2i_adapter.html">T2I-Adapter</a></li>
-<li class="toctree-l1"><a class="reference internal" href="docs/review/IP_Adapter.html">IP-Adapter</a></li>
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="docs/review/HyperDreamBooth.html">HyperDreamBooth</a></li>
-<li class="toctree-l1"><a class="reference internal" href="docs/review/CM3leon.html">CM3leon</a></li>
-
-<li class="toctree-l1"><a class="reference internal" href="docs/review/Synthetic_Data_from_Diffusion_Models_Improves_ImageNet_Classification.html">Synthetic Data from Diffusion Models Improves ImageNet Classification</a></li>
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="docs/review/GLIDE.html">GLIDE</a></li>
-<li class="toctree-l1"><a class="reference internal" href="docs/review/BBDM.html">BBDM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="docs/review/Your_Diffusion_Model_is_Secretly_a_Zero_Shot_Classifier.html">Your Diffusion Model is Secretly a Zero-Shot Classifier</a></li>
-<li class="toctree-l1"><a class="reference internal" href="docs/review/progressive_distillation.html">Progressive Distillation for Fast Sampling of Diffusion Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="docs/review/ConceptLab.html">ConceptLab</a></li>
-<li class="toctree-l1"><a class="reference internal" href="docs/review/Diffusion_models_already_have_a_Semantic_Latent_Space.html">Diffusion Models already have a Semantic Latent Space</a></li>
-<li class="toctree-l1"><a class="reference internal" href="docs/review/Muse.html">Muse</a></li>
-
-
-<li class="toctree-l1"><a class="reference internal" href="docs/review/GIGAGAN.html">Scaling up GANs for Text-to-Image Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="docs/review/consistency_models.html">Consistency Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="docs/review/latent_consistency_models.html">Latent Consistency Models</a></li>
-
-<li class="toctree-l1"><a class="reference internal" href="docs/review/LLM_grounded_Diffusion.html">LLM Grounded Diffusion</a></li>
-<li class="toctree-l1"><a class="reference internal" href="docs/review/DiT.html">DiT</a></li>
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="docs/review/one-step-image-translation.html">One-Step Image Translation with Text-to-Image Models</a></li>
-<li class="toctree-l1"><a class="reference internal" href="docs/review/LCM-LoRA.html">LCM-LoRA: A Universal Stable-Diffusion Acceleration Module</a></li>
-
-
-<li class="toctree-l1"><a class="reference internal" href="docs/review/MimicBrush.html">MimicBrush: Zero-shot Image Editing with Reference Imitation</a></li>
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Video Generation</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="docs/review/Make_A_Video.html">Make A Video</a></li>
-<li class="toctree-l1"><a class="reference internal" href="docs/review/VideoLDM.html">VideoLDM</a></li>
-<li class="toctree-l1"><a class="reference internal" href="docs/review/AnimateDiff.html">AnimateDiff</a></li>
-<li class="toctree-l1"><a class="reference internal" href="docs/review/Animate_Anyone.html">Animate Anyone</a></li>
-<li class="toctree-l1"><a class="reference internal" href="docs/review/DreaMoving.html">DreaMoving</a></li>
-<li class="toctree-l1"><a class="reference internal" href="docs/review/DreamPose.html">DreamPose: Fashion Image-to-Video Synthesis via Stable Diffusion</a></li>
-
-
-
-
-
-
-
-
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">3D Generation</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="docs/review/NeRF.html">NeRF : Representing Scenes as Neural Radiance Fields for View Synthesis</a></li>
-<li class="toctree-l1"><a class="reference internal" href="docs/review/3DGS.html">3D Gaussian Splatting for Real-Time Radiance Field Rendering</a></li>
-<li class="toctree-l1"><a class="reference internal" href="docs/review/Point_E.html">Point-E: A System for Generating 3D Point Clouds from Complex Prompts (Arxiv 2022)</a></li>
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="docs/review/Shap-E.html">Shap-E</a></li>
-
-
-
-
-
-
-
-
-
-<li class="toctree-l1"><a class="reference internal" href="docs/review/DreamFusion.html"><strong>DreamFusion</strong></a></li>
-<li class="toctree-l1"><a class="reference internal" href="docs/review/magic-3d.html">Magic3D</a></li>
-<li class="toctree-l1"><a class="reference internal" href="docs/review/DreamBooth3D.html">Dream Booth 3D</a></li>
-
-
-
-</ul>
-<p aria-level="2" class="caption" role="heading"><span class="caption-text">Experiments</span></p>
-<ul class="nav bd-sidenav">
-<li class="toctree-l1"><a class="reference internal" href="docs/experiments/js_exp.html">Synthetic Data with Stable Diffusion for Foliar Disease Classification</a></li>
-<li class="toctree-l1"><a class="reference internal" href="docs/experiments/swjo_exp.html">Training DreamBooth on Naver Webtoon Face Dataset</a></li>
-</ul>
-
-    </div>
-</nav></div>
-    </div>
-  
-  
-  <div class="sidebar-primary-items__end sidebar-primary__section">
-  </div>
-  
-  <div id="rtd-footer-container"></div>
-
-
-      </div>
-      
-      <main id="main-content" class="bd-main" role="main">
-        
-        
-
-<div class="sbt-scroll-pixel-helper"></div>
-
-          <div class="bd-content">
-            <div class="bd-article-container">
-              
-              <div class="bd-header-article d-print-none">
-<div class="header-article-items header-article__inner">
-  
-    <div class="header-article-items__start">
-      
-        <div class="header-article-item"><label class="sidebar-toggle primary-toggle btn btn-sm" for="__primary" title="Toggle primary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
-  <span class="fa-solid fa-bars"></span>
-</label></div>
-      
-    </div>
-  
-  
-    <div class="header-article-items__end">
-      
-        <div class="header-article-item">
-
-<div class="article-header-buttons">
-
-
-
-
-
-<div class="dropdown dropdown-source-buttons">
-  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Source repositories">
-    <i class="fab fa-github"></i>
-  </button>
-  <ul class="dropdown-menu">
-      
-      
-      
-      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion" target="_blank"
-   class="btn btn-sm btn-source-repository-button dropdown-item"
-   title="Source repository"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fab fa-github"></i>
-  </span>
-<span class="btn__text-container">Repository</span>
-</a>
-</li>
-      
-      
-      
-      
-      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion/issues/new?title=Issue%20on%20page%20%2Fsearch.html&body=Your%20issue%20content%20here." target="_blank"
-   class="btn btn-sm btn-source-issues-button dropdown-item"
-   title="Open an issue"
-   data-bs-placement="left" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-lightbulb"></i>
-  </span>
-<span class="btn__text-container">Open issue</span>
-</a>
-</li>
-      
-  </ul>
-</div>
-
-
-
-
-<button onclick="toggleFullScreen()"
-  class="btn btn-sm btn-fullscreen-button"
-  title="Fullscreen mode"
-  data-bs-placement="bottom" data-bs-toggle="tooltip"
->
-  
-
-<span class="btn__icon-container">
-  <i class="fas fa-expand"></i>
-  </span>
-
-</button>
-
-
-
-<script>
-document.write(`
-  <button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light"></i>
-    <i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark"></i>
-    <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"></i>
-  </button>
-`);
-</script>
-
-
-<script>
-document.write(`
-  <button class="btn btn-sm pst-navbar-icon search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <i class="fa-solid fa-magnifying-glass fa-lg"></i>
-  </button>
-`);
-</script>
-
-</div></div>
-      
-    </div>
-  
-</div>
-</div>
-              
-              
-  <div class="bd-search-container">
-    <h1>Search</h1>
-    <noscript>
-      <div class="admonition error">
-        <p class="admonition-title">Error</p>
-        <p>Please activate JavaScript to enable the search functionality.</p>
-      </div>
-    </noscript>
-    
-<form class="bd-search d-flex align-items-center"
-      action="#"
-      method="get">
-  <i class="fa-solid fa-magnifying-glass"></i>
-  <input type="search"
-         class="form-control"
-         name="q"
-         id="search-input"
-         placeholder="Search this book..."
-         aria-label="Search this book..."
-         autocomplete="off"
-         autocorrect="off"
-         autocapitalize="off"
-         spellcheck="false"/>
-  <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
-</form>
-    <div id="search-results"></div>
-  </div>
-  <script>
-    // Activate the search field on page load
-    let searchInput = document.querySelector("form.bd-search input");
-    if (searchInput) {
-        searchInput.focus();
-        searchInput.select();
-        console.log("[PST]: Set focus on search field.");
-    }
-  </script>
-
-              
-              
-              
-              
-                <footer class="prev-next-footer d-print-none">
-                  
-<div class="prev-next-area">
-</div>
-                </footer>
-              
-            </div>
-            
-            
-              
-            
-          </div>
-          <footer class="bd-footer-content">
-            
-<div class="bd-footer-content__inner container">
-  
-  <div class="footer-item">
-    
-<p class="component-author">
-By PseudoLab
-</p>
-
-  </div>
-  
-  <div class="footer-item">
-    
-
-  <p class="copyright">
-    
-      © Copyright 2022.
-      <br/>
-    
-  </p>
-
-  </div>
-  
-  <div class="footer-item">
-    
-  </div>
-  
-  <div class="footer-item">
-    
-  </div>
-  
-</div>
-          </footer>
-        
-
-      </main>
-    </div>
-  </div>
-  
-  <!-- Scripts loaded after <body> so the DOM is not blocked -->
-  <script src="_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b"></script>
-<script src="_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b"></script>
-
-  <footer class="bd-footer">
-  </footer>
-  </body>
+
+
+<!DOCTYPE html>
+
+
+<html lang="en" data-content_root="" >
+
+  <head>
+    <meta charset="utf-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" /><title>Search - Text-to-Image Generation-feat-Diffusion</title>
+  
+  
+  
+  <script data-cfasync="false">
+    document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
+    document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
+  </script>
+  
+  <!-- Loaded before other Sphinx assets -->
+  <link href="_static/styles/theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+<link href="_static/styles/bootstrap.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+<link href="_static/styles/pydata-sphinx-theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+
+  
+  <link href="_static/vendor/fontawesome/6.5.2/css/all.min.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
+  <link rel="preload" as="font" type="font/woff2" crossorigin href="_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.woff2" />
+<link rel="preload" as="font" type="font/woff2" crossorigin href="_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.woff2" />
+<link rel="preload" as="font" type="font/woff2" crossorigin href="_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.woff2" />
+
+    <link rel="stylesheet" type="text/css" href="_static/pygments.css" />
+    <link rel="stylesheet" href="_static/styles/sphinx-book-theme.css?digest=14f4ca6b54d191a8c7657f6c759bf11a5fb86285" type="text/css" />
+    <link rel="stylesheet" type="text/css" href="_static/togglebutton.css" />
+    <link rel="stylesheet" type="text/css" href="_static/copybutton.css" />
+    <link rel="stylesheet" type="text/css" href="_static/mystnb.4510f1fc1dee50b3e5859aac5469c37c29e427902b24a333a5f9fcb2f0b3ac41.css" />
+    <link rel="stylesheet" type="text/css" href="_static/sphinx-thebe.css" />
+    <link rel="stylesheet" type="text/css" href="_static/design-style.4045f2051d55cab465a707391d5b2007.min.css" />
+  
+  <!-- Pre-loaded scripts that we'll load fully later -->
+  <link rel="preload" as="script" href="_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b" />
+<link rel="preload" as="script" href="_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b" />
+  <script src="_static/vendor/fontawesome/6.5.2/js/all.min.js?digest=dfe6caa3a7d634c4db9b"></script>
+
+    <script data-url_root="./" id="documentation_options" src="_static/documentation_options.js"></script>
+    <script src="_static/jquery.js"></script>
+    <script src="_static/underscore.js"></script>
+    <script src="_static/_sphinx_javascript_frameworks_compat.js"></script>
+    <script src="_static/doctools.js"></script>
+    <script src="_static/clipboard.min.js"></script>
+    <script src="_static/copybutton.js"></script>
+    <script src="_static/scripts/sphinx-book-theme.js?digest=5a5c038af52cf7bc1a1ec88eea08e6366ee68824"></script>
+    <script>let toggleHintShow = 'Click to show';</script>
+    <script>let toggleHintHide = 'Click to hide';</script>
+    <script>let toggleOpenOnPrint = 'true';</script>
+    <script src="_static/togglebutton.js"></script>
+    <script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
+    <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
+    <script src="_static/design-tabs.js"></script>
+    <script>const THEBE_JS_URL = "https://unpkg.com/thebe@0.8.2/lib/index.js"
+const thebe_selector = ".thebe,.cell"
+const thebe_selector_input = "pre"
+const thebe_selector_output = ".output, .cell_output"
+</script>
+    <script async="async" src="_static/sphinx-thebe.js"></script>
+    <script>DOCUMENTATION_OPTIONS.pagename = 'search';</script>
+  <script src="_static/searchtools.js"></script>
+  <script src="_static/language_data.js"></script>
+  <script src="searchindex.js"></script>
+    <link rel="shortcut icon" href="_static/PseudoLab_logo.png"/>
+    <link rel="index" title="Index" href="genindex.html" />
+    <link rel="search" title="Search" href="#" />
+  <meta name="viewport" content="width=device-width, initial-scale=1"/>
+  <meta name="docsearch:language" content="en"/>
+  </head>
+  
+  
+  <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
+
+  
+  
+  <div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
+  
+  <div id="pst-scroll-pixel-helper"></div>
+  
+  <button type="button" class="btn rounded-pill" id="pst-back-to-top">
+    <i class="fa-solid fa-arrow-up"></i>Back to top</button>
+
+  
+  <input type="checkbox"
+          class="sidebar-toggle"
+          id="pst-primary-sidebar-checkbox"/>
+  <label class="overlay overlay-primary" for="pst-primary-sidebar-checkbox"></label>
+  
+  <input type="checkbox"
+          class="sidebar-toggle"
+          id="pst-secondary-sidebar-checkbox"/>
+  <label class="overlay overlay-secondary" for="pst-secondary-sidebar-checkbox"></label>
+  
+  <div class="search-button__wrapper">
+    <div class="search-button__overlay"></div>
+    <div class="search-button__search-container">
+<form class="bd-search d-flex align-items-center"
+      action="#"
+      method="get">
+  <i class="fa-solid fa-magnifying-glass"></i>
+  <input type="search"
+         class="form-control"
+         name="q"
+         id="search-input"
+         placeholder="Search this book..."
+         aria-label="Search this book..."
+         autocomplete="off"
+         autocorrect="off"
+         autocapitalize="off"
+         spellcheck="false"/>
+  <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
+</form></div>
+  </div>
+
+  <div class="pst-async-banner-revealer d-none">
+  <aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
+</div>
+
+  
+    <header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
+    </header>
+  
+
+  <div class="bd-container">
+    <div class="bd-container__inner bd-page-width">
+      
+      
+      
+        
+      
+      <div class="bd-sidebar-primary bd-sidebar">
+        
+
+  
+  <div class="sidebar-header-items sidebar-primary__section">
+    
+    
+    
+    
+  </div>
+  
+    <div class="sidebar-primary-items__start sidebar-primary__section">
+        <div class="sidebar-primary-item">
+
+  
+    
+  
+
+<a class="navbar-brand logo" href="intro.html">
+  
+  
+  
+  
+  
+    
+    
+      
+    
+    
+    <img src="_static/PseudoLab_logo.png" class="logo__image only-light" alt="Text-to-Image Generation-feat-Diffusion - Home"/>
+    <script>document.write(`<img src="_static/PseudoLab_logo.png" class="logo__image only-dark" alt="Text-to-Image Generation-feat-Diffusion - Home"/>`);</script>
+  
+  
+</a></div>
+        <div class="sidebar-primary-item"><nav class="bd-links" id="bd-docs-nav" aria-label="Main">
+    <div class="bd-toc-item navbar-nav active">
+        
+        <ul class="nav bd-sidenav bd-sidenav__home-link">
+            <li class="toctree-l1">
+                <a class="reference internal" href="intro.html">
+                    Welcome to PseudoDiffusers!!
+                </a>
+            </li>
+        </ul>
+        <p aria-level="2" class="caption" role="heading"><span class="caption-text">Preliminary Works</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="docs/review/vae.html">VAE</a></li>
+<li class="toctree-l1"><a class="reference internal" href="docs/review/gan.html">GAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="docs/review/DDPM.html">DDPM</a></li>
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="docs/review/DDIM.html">DDIM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="docs/review/A_Study_on_the_Evaluation_of_Generative_Models.html">A Study on the Evaluation of Generative Models</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Image Generation</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="docs/review/cycleGAN.html">CycleGAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="docs/review/StyleGAN.html">StyleGAN</a></li>
+<li class="toctree-l1"><a class="reference internal" href="docs/review/diffusion_beats_GANs.html">Diffusion Models Beat GANs on Image Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="docs/review/dalle.html">DALL-E</a></li>
+<li class="toctree-l1"><a class="reference internal" href="docs/review/DALLE2.html">DALL-E 2</a></li>
+<li class="toctree-l1"><a class="reference internal" href="docs/review/dreambooth.html">DreamBooth</a></li>
+<li class="toctree-l1"><a class="reference internal" href="docs/review/ControlNet.html">ControlNet</a></li>
+<li class="toctree-l1"><a class="reference internal" href="docs/review/Latent_Diffusion_Model.html">Introduction</a></li>
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="docs/review/Textual_Inversion.html">Textual Inversion</a></li>
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="docs/review/CustomDiffusion.html">Custom Diffusion</a></li>
+<li class="toctree-l1"><a class="reference internal" href="docs/review/LoRA.html">LoRA</a></li>
+
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="docs/review/I-DDPM.html">I-DDPM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="docs/review/StyO.html">StyO</a></li>
+<li class="toctree-l1"><a class="reference internal" href="docs/review/imagen.html">Imagen</a></li>
+<li class="toctree-l1"><a class="reference internal" href="docs/review/imagen_editor.html">Imagen Editor</a></li>
+<li class="toctree-l1"><a class="reference internal" href="docs/review/SDEdit.html">SDEdit</a></li>
+<li class="toctree-l1"><a class="reference internal" href="docs/review/SDXL.html">SDXL</a></li>
+<li class="toctree-l1"><a class="reference internal" href="docs/review/t2i_adapter.html">T2I-Adapter</a></li>
+<li class="toctree-l1"><a class="reference internal" href="docs/review/IP_Adapter.html">IP-Adapter</a></li>
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="docs/review/HyperDreamBooth.html">HyperDreamBooth</a></li>
+<li class="toctree-l1"><a class="reference internal" href="docs/review/CM3leon.html">CM3leon</a></li>
+
+<li class="toctree-l1"><a class="reference internal" href="docs/review/Synthetic_Data_from_Diffusion_Models_Improves_ImageNet_Classification.html">Synthetic Data from Diffusion Models Improves ImageNet Classification</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="docs/review/GLIDE.html">GLIDE</a></li>
+<li class="toctree-l1"><a class="reference internal" href="docs/review/BBDM.html">BBDM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="docs/review/Your_Diffusion_Model_is_Secretly_a_Zero_Shot_Classifier.html">Your Diffusion Model is Secretly a Zero-Shot Classifier</a></li>
+<li class="toctree-l1"><a class="reference internal" href="docs/review/progressive_distillation.html">Progressive Distillation for Fast Sampling of Diffusion Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="docs/review/ConceptLab.html">ConceptLab</a></li>
+<li class="toctree-l1"><a class="reference internal" href="docs/review/Diffusion_models_already_have_a_Semantic_Latent_Space.html">Diffusion Models already have a Semantic Latent Space</a></li>
+<li class="toctree-l1"><a class="reference internal" href="docs/review/Muse.html">Muse</a></li>
+
+
+<li class="toctree-l1"><a class="reference internal" href="docs/review/GIGAGAN.html">Scaling up GANs for Text-to-Image Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="docs/review/consistency_models.html">Consistency Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="docs/review/latent_consistency_models.html">Latent Consistency Models</a></li>
+
+<li class="toctree-l1"><a class="reference internal" href="docs/review/LLM_grounded_Diffusion.html">LLM Grounded Diffusion</a></li>
+<li class="toctree-l1"><a class="reference internal" href="docs/review/DiT.html">DiT</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="docs/review/one-step-image-translation.html">One-Step Image Translation with Text-to-Image Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="docs/review/LCM-LoRA.html">LCM-LoRA: A Universal Stable-Diffusion Acceleration Module</a></li>
+
+
+<li class="toctree-l1"><a class="reference internal" href="docs/review/MimicBrush.html">MimicBrush: Zero-shot Image Editing with Reference Imitation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="docs/review/one_step_diffusion_with_distribution_matching_distillation.html">One-step Diffusion with Distribution Matching Distillation</a></li>
+
+
+
+
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Video Generation</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="docs/review/Make_A_Video.html">Make A Video</a></li>
+<li class="toctree-l1"><a class="reference internal" href="docs/review/VideoLDM.html">VideoLDM</a></li>
+<li class="toctree-l1"><a class="reference internal" href="docs/review/AnimateDiff.html">AnimateDiff</a></li>
+<li class="toctree-l1"><a class="reference internal" href="docs/review/Animate_Anyone.html">Animate Anyone</a></li>
+<li class="toctree-l1"><a class="reference internal" href="docs/review/DreaMoving.html">DreaMoving</a></li>
+<li class="toctree-l1"><a class="reference internal" href="docs/review/DreamPose.html">DreamPose: Fashion Image-to-Video Synthesis via Stable Diffusion</a></li>
+
+
+
+
+
+
+
+
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">3D Generation</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="docs/review/NeRF.html">NeRF : Representing Scenes as Neural Radiance Fields for View Synthesis</a></li>
+<li class="toctree-l1"><a class="reference internal" href="docs/review/3DGS.html">3D Gaussian Splatting for Real-Time Radiance Field Rendering</a></li>
+<li class="toctree-l1"><a class="reference internal" href="docs/review/Point_E.html">Point-E: A System for Generating 3D Point Clouds from Complex Prompts (Arxiv 2022)</a></li>
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="docs/review/Shap-E.html">Shap-E</a></li>
+
+
+
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="docs/review/DreamFusion.html"><strong>DreamFusion</strong></a></li>
+<li class="toctree-l1"><a class="reference internal" href="docs/review/magic-3d.html">Magic3D</a></li>
+<li class="toctree-l1"><a class="reference internal" href="docs/review/DreamBooth3D.html">Dream Booth 3D</a></li>
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="docs/review/zero123plus.html">Zero123++</a></li>
+<li class="toctree-l1"><a class="reference internal" href="docs/review/ProlificDreamer.html">ProlificDreamer: High-Fidelity and Diverse Text-to-3D Generation with Variational Score Distillation</a></li>
+
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="docs/review/DreamGaussian.html">DreamGaussian</a></li>
+
+
+
+
+
+<li class="toctree-l1"><a class="reference internal" href="docs/review/Coin3D.html">Coin3D</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Experiments</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="docs/experiments/js_exp.html">Synthetic Data with Stable Diffusion for Foliar Disease Classification</a></li>
+<li class="toctree-l1"><a class="reference internal" href="docs/experiments/swjo_exp.html">Training DreamBooth on Naver Webtoon Face Dataset</a></li>
+</ul>
+
+    </div>
+</nav></div>
+    </div>
+  
+  
+  <div class="sidebar-primary-items__end sidebar-primary__section">
+  </div>
+  
+  <div id="rtd-footer-container"></div>
+
+
+      </div>
+      
+      <main id="main-content" class="bd-main" role="main">
+        
+        
+
+<div class="sbt-scroll-pixel-helper"></div>
+
+          <div class="bd-content">
+            <div class="bd-article-container">
+              
+              <div class="bd-header-article d-print-none">
+<div class="header-article-items header-article__inner">
+  
+    <div class="header-article-items__start">
+      
+        <div class="header-article-item"><label class="sidebar-toggle primary-toggle btn btn-sm" for="__primary" title="Toggle primary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
+  <span class="fa-solid fa-bars"></span>
+</label></div>
+      
+    </div>
+  
+  
+    <div class="header-article-items__end">
+      
+        <div class="header-article-item">
+
+<div class="article-header-buttons">
+
+
+
+
+
+<div class="dropdown dropdown-source-buttons">
+  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Source repositories">
+    <i class="fab fa-github"></i>
+  </button>
+  <ul class="dropdown-menu">
+      
+      
+      
+      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion" target="_blank"
+   class="btn btn-sm btn-source-repository-button dropdown-item"
+   title="Source repository"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fab fa-github"></i>
+  </span>
+<span class="btn__text-container">Repository</span>
+</a>
+</li>
+      
+      
+      
+      
+      <li><a href="https://github.com/pseudo-lab/text-to-image-generation-feat-diffusion/issues/new?title=Issue%20on%20page%20%2Fsearch.html&body=Your%20issue%20content%20here." target="_blank"
+   class="btn btn-sm btn-source-issues-button dropdown-item"
+   title="Open an issue"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-lightbulb"></i>
+  </span>
+<span class="btn__text-container">Open issue</span>
+</a>
+</li>
+      
+  </ul>
+</div>
+
+
+
+
+<button onclick="toggleFullScreen()"
+  class="btn btn-sm btn-fullscreen-button"
+  title="Fullscreen mode"
+  data-bs-placement="bottom" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-expand"></i>
+  </span>
+
+</button>
+
+
+
+<script>
+document.write(`
+  <button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light"></i>
+    <i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark"></i>
+    <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"></i>
+  </button>
+`);
+</script>
+
+
+<script>
+document.write(`
+  <button class="btn btn-sm pst-navbar-icon search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <i class="fa-solid fa-magnifying-glass fa-lg"></i>
+  </button>
+`);
+</script>
+
+</div></div>
+      
+    </div>
+  
+</div>
+</div>
+              
+              
+  <div class="bd-search-container">
+    <h1>Search</h1>
+    <noscript>
+      <div class="admonition error">
+        <p class="admonition-title">Error</p>
+        <p>Please activate JavaScript to enable the search functionality.</p>
+      </div>
+    </noscript>
+    
+<form class="bd-search d-flex align-items-center"
+      action="#"
+      method="get">
+  <i class="fa-solid fa-magnifying-glass"></i>
+  <input type="search"
+         class="form-control"
+         name="q"
+         id="search-input"
+         placeholder="Search this book..."
+         aria-label="Search this book..."
+         autocomplete="off"
+         autocorrect="off"
+         autocapitalize="off"
+         spellcheck="false"/>
+  <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
+</form>
+    <div id="search-results"></div>
+  </div>
+  <script>
+    // Activate the search field on page load
+    let searchInput = document.querySelector("form.bd-search input");
+    if (searchInput) {
+        searchInput.focus();
+        searchInput.select();
+        console.log("[PST]: Set focus on search field.");
+    }
+  </script>
+
+              
+              
+              
+              
+                <footer class="prev-next-footer d-print-none">
+                  
+<div class="prev-next-area">
+</div>
+                </footer>
+              
+            </div>
+            
+            
+              
+            
+          </div>
+          <footer class="bd-footer-content">
+            
+<div class="bd-footer-content__inner container">
+  
+  <div class="footer-item">
+    
+<p class="component-author">
+By PseudoLab
+</p>
+
+  </div>
+  
+  <div class="footer-item">
+    
+
+  <p class="copyright">
+    
+      © Copyright 2022.
+      <br/>
+    
+  </p>
+
+  </div>
+  
+  <div class="footer-item">
+    
+  </div>
+  
+  <div class="footer-item">
+    
+  </div>
+  
+</div>
+          </footer>
+        
+
+      </main>
+    </div>
+  </div>
+  
+  <!-- Scripts loaded after <body> so the DOM is not blocked -->
+  <script src="_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b"></script>
+<script src="_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b"></script>
+
+  <footer class="bd-footer">
+  </footer>
+  </body>
 </html>
\ No newline at end of file
diff --git a/searchindex.js b/searchindex.js
old mode 100644
new mode 100755
index 2d0d3831..e0ee27dd
--- a/searchindex.js
+++ b/searchindex.js
@@ -1 +1 @@
-Search.setIndex({"docnames": ["docs/experiments/js_exp", "docs/experiments/swjo_exp", "docs/review/3DGS", "docs/review/A_Study_on_the_Evaluation_of_Generative_Models", "docs/review/AnimateDiff", "docs/review/Animate_Anyone", "docs/review/BBDM", "docs/review/CM3leon", "docs/review/ConceptLab", "docs/review/ControlNet", "docs/review/CustomDiffusion", "docs/review/DALLE2", "docs/review/DDIM", "docs/review/DDPM", "docs/review/DiT", "docs/review/Diffusion_models_already_have_a_Semantic_Latent_Space", "docs/review/DreaMoving", "docs/review/DreamBooth3D", "docs/review/DreamFusion", "docs/review/DreamPose", "docs/review/GIGAGAN", "docs/review/GLIDE", "docs/review/HyperDreamBooth", "docs/review/I-DDPM", "docs/review/IP_Adapter", "docs/review/LCM-LoRA", "docs/review/LLM_grounded_Diffusion", "docs/review/Latent_Diffusion_Model", "docs/review/LoRA", "docs/review/Make_A_Video", "docs/review/MimicBrush", "docs/review/Muse", "docs/review/NeRF", "docs/review/Point_E", "docs/review/SDEdit", "docs/review/SDXL", "docs/review/Shap-E", "docs/review/StyO", "docs/review/StyleGAN", "docs/review/Synthetic_Data_from_Diffusion_Models_Improves_ImageNet_Classification", "docs/review/Textual_Inversion", "docs/review/VideoLDM", "docs/review/Your_Diffusion_Model_is_Secretly_a_Zero_Shot_Classifier", "docs/review/consistency_models", "docs/review/cycleGAN", "docs/review/dalle", "docs/review/diffusion_beats_GANs", "docs/review/dreambooth", "docs/review/gan", "docs/review/imagen", "docs/review/imagen_editor", "docs/review/latent_consistency_models", "docs/review/magic-3d", "docs/review/one-step-image-translation", "docs/review/progressive_distillation", "docs/review/t2i_adapter", "docs/review/vae", "intro"], "filenames": ["docs/experiments/js_exp.md", "docs/experiments/swjo_exp.md", "docs/review/3DGS.md", "docs/review/A_Study_on_the_Evaluation_of_Generative_Models.md", "docs/review/AnimateDiff.md", "docs/review/Animate_Anyone.md", "docs/review/BBDM.md", "docs/review/CM3leon.md", "docs/review/ConceptLab.md", "docs/review/ControlNet.md", "docs/review/CustomDiffusion.md", "docs/review/DALLE2.md", "docs/review/DDIM.md", "docs/review/DDPM.md", "docs/review/DiT.md", "docs/review/Diffusion_models_already_have_a_Semantic_Latent_Space.md", "docs/review/DreaMoving.md", "docs/review/DreamBooth3D.md", "docs/review/DreamFusion.md", "docs/review/DreamPose.md", "docs/review/GIGAGAN.md", "docs/review/GLIDE.md", "docs/review/HyperDreamBooth.md", "docs/review/I-DDPM.md", "docs/review/IP_Adapter.md", "docs/review/LCM-LoRA.md", "docs/review/LLM_grounded_Diffusion.md", "docs/review/Latent_Diffusion_Model.md", "docs/review/LoRA.md", "docs/review/Make_A_Video.md", "docs/review/MimicBrush.md", "docs/review/Muse.md", "docs/review/NeRF.md", "docs/review/Point_E.md", "docs/review/SDEdit.md", "docs/review/SDXL.md", "docs/review/Shap-E.md", "docs/review/StyO.md", "docs/review/StyleGAN.md", "docs/review/Synthetic_Data_from_Diffusion_Models_Improves_ImageNet_Classification.md", "docs/review/Textual_Inversion.md", "docs/review/VideoLDM.md", "docs/review/Your_Diffusion_Model_is_Secretly_a_Zero_Shot_Classifier.md", "docs/review/consistency_models.md", "docs/review/cycleGAN.md", "docs/review/dalle.md", "docs/review/diffusion_beats_GANs.md", "docs/review/dreambooth.md", "docs/review/gan.md", "docs/review/imagen.md", "docs/review/imagen_editor.md", "docs/review/latent_consistency_models.md", "docs/review/magic-3d.md", "docs/review/one-step-image-translation.md", "docs/review/progressive_distillation.md", "docs/review/t2i_adapter.md", "docs/review/vae.md", "intro.md"], "titles": ["Synthetic Data with Stable Diffusion for Foliar Disease Classification", "Training DreamBooth on Naver Webtoon Face Dataset", "3D Gaussian Splatting for Real-Time Radiance Field Rendering", "A Study on the Evaluation of Generative Models", "AnimateDiff", "Animate Anyone", "BBDM", "CM3leon", "ConceptLab", "ControlNet", "Custom Diffusion", "DALL-E 2", "DDIM", "DDPM", "DiT", "Diffusion Models already have a Semantic Latent Space", "DreaMoving", "Dream Booth 3D", "<strong>DreamFusion</strong>", "DreamPose: Fashion Image-to-Video Synthesis via Stable Diffusion", "Scaling up GANs for Text-to-Image Synthesis", "GLIDE", "HyperDreamBooth", "I-DDPM", "IP-Adapter", "LCM-LoRA: A Universal Stable-Diffusion Acceleration Module", "LLM Grounded Diffusion", "Introduction", "LoRA", "Make A Video", "MimicBrush: Zero-shot Image Editing with Reference Imitation", "Muse", "NeRF : Representing Scenes as Neural Radiance Fields for View Synthesis", "Point-E: A System for Generating 3D Point Clouds from Complex Prompts (Arxiv 2022)", "SDEdit", "SDXL", "Shap-E", "StyO", "StyleGAN", "Synthetic Data from Diffusion Models Improves ImageNet Classification", "Textual Inversion", "VideoLDM", "Your Diffusion Model is Secretly a Zero-Shot Classifier", "Consistency Models", "CycleGAN", "DALL-E", "Diffusion Models Beat GANs on Image Synthesis", "DreamBooth", "GAN", "Imagen", "Imagen Editor", "Latent Consistency Models", "Magic3D", "One-Step Image Translation with Text-to-Image Models", "Progressive Distillation for Fast Sampling of Diffusion Models", "T2I-Adapter", "VAE", "Welcome to PseudoDiffusers!!"], "terms": {"titl": [0, 1, 2, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56], "author": [0, 1, 2, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56], "jisu": [0, 9, 38, 57], "kim": [0, 2, 6, 9, 11, 38, 42, 57], "last": [0, 1, 2, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56], "updat": [0, 1, 2, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56], "jul": [0, 1], "05": [0, 3, 24, 30, 35], "2023": [0, 1, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 19, 20, 21, 22, 23, 27, 28, 29, 34, 35, 37, 38, 39, 40, 41, 42, 43, 45, 46, 47, 49, 50, 52, 54, 55], "\uc0ac\uacfc": 0, "\ub098\ubb34\uc758": 0, "\uc78e\uc5d0": 0, "\uc0dd\uae30\ub294": [0, 39, 53], "\uc9c8\ubcd1\uc744": 0, "\uc774\ubbf8\uc9c0\ub85c": [0, 1, 4, 8, 10, 14, 17, 22, 32, 35, 37, 49, 50, 55], "\ud310\ubcc4\ud558\ub294": 0, "kaggl": 0, "competit": [0, 42, 43, 46], "\ub9c1\ud06c": [0, 9, 14], "\uc5d0\uc11c": [0, 2, 3, 4, 6, 8, 9, 11, 13, 14, 15, 16, 17, 19, 21, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 36, 39, 41, 42, 43, 44, 45, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56], "\uc544\uc774\ub514\uc5b4\ub97c": 0, "\uc5bb\uc5b4\uc11c": 0, "\uc9c4\ud589\ud55c": [0, 14, 21, 54], "\ud504\ub85c\uc81d\ud2b8\uc785\ub2c8\ub2e4": 0, "\ud574\ub2f9": [0, 3, 4, 5, 8, 10, 13, 17, 18, 19, 21, 22, 24, 26, 29, 30, 31, 32, 33, 34, 36, 39, 40, 42, 43, 44, 47, 53, 55, 56], "competition\uc740": 0, "\uc0ac\uacfc\ub098\ubb34": 0, "\uac78\ub9b0": 0, "\uc9c8\ubcd1\uc5d0": 0, "\ub530\ub77c": [0, 2, 3, 4, 5, 6, 7, 8, 11, 14, 15, 17, 18, 19, 21, 22, 23, 25, 26, 28, 30, 31, 32, 33, 35, 36, 39, 40, 42, 43, 44, 45, 46, 47, 51, 53, 56], "\uc78e": 0, "\uc774\ubbf8\uc9c0\ub97c": [0, 3, 4, 7, 8, 9, 10, 11, 12, 13, 14, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 29, 31, 32, 33, 34, 35, 37, 38, 39, 40, 41, 42, 44, 45, 46, 47, 49, 51, 53, 55], "4\uac1c\uc758": [0, 11, 14, 18, 29, 32, 40, 53, 54, 55], "class\ub85c": 0, "\ubd84\ub958\ud558\ub294": [0, 21, 55], "task\uc785\ub2c8\ub2e4": 0, "class": [0, 9, 10, 12, 13, 17, 20, 21, 23, 27, 29, 38, 39, 42, 44, 46, 47, 48, 49, 53, 55, 56], "leav": [0, 42], "competition\uc744": 0, "\uc124\uba85\ud55c": [0, 29, 55], "articl": 0, "\uc804\uccb4\uc801\uc778": [0, 2, 4, 11, 15, 26, 38, 43, 53, 56], "accuracy\ub294": 0, "97": [0, 42], "\uc774\uc9c0\ub9cc": [0, 32], "multipl": [0, 20, 30, 31, 53, 55], "class\uc758": [0, 3, 46], "\uacbd\uc6b0": [0, 1, 4, 6, 8, 9, 10, 14, 15, 17, 19, 22, 23, 24, 25, 26, 28, 32, 33, 34, 35, 36, 37, 38, 39, 41, 42, 43, 44, 48, 51, 55, 56], "accuracy\uac00": 0, "51": 0, "\uc5d0": [0, 1, 2, 4, 6, 7, 8, 9, 10, 11, 13, 14, 15, 17, 18, 19, 20, 21, 22, 23, 25, 26, 27, 28, 29, 30, 31, 32, 33, 36, 37, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56], "\ubd88\uacfc\ud588\ub2e4\uace0": 0, "\uc5b8\uae09\ud569\ub2c8\ub2e4": [0, 24], "\uc774\ubbf8\uc9c0": [0, 2, 3, 4, 5, 7, 8, 9, 10, 11, 12, 13, 14, 17, 18, 19, 20, 21, 22, 24, 26, 27, 29, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 54, 55], "\uac1c\uc218\uac00": [0, 4], "\ub2e4\ub978": [0, 2, 3, 4, 5, 6, 8, 10, 12, 13, 14, 15, 17, 18, 19, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 33, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47, 49, 50, 54, 55, 56], "class\uc5d0": [0, 15, 21], "\ube44\ud574": [0, 4, 6, 7, 9, 10, 12, 14, 15, 17, 18, 19, 21, 22, 23, 26, 27, 31, 33, 37, 39, 43, 46, 50, 51, 56], "\uc801\uc740": [0, 2, 3, 4, 7, 9, 10, 12, 13, 14, 18, 21, 23, 24, 28, 29, 40, 41, 42, 46, 51], "\uc810\uc5d0": [0, 19, 33], "\uc8fc\ubaa9\ud588\uace0": 0, "diffusion\uc744": [0, 5, 8, 19, 25, 34, 36, 51], "\uc0ac\uc6a9\ud558\uc5ec": [0, 4, 8, 13, 14, 15, 17, 18, 19, 22, 24, 26, 27, 29, 32, 33, 36, 38, 39, 43, 44, 45, 47, 48, 49, 51, 52, 53, 54], "\ud074\ub798\uc2a4\uc758": [0, 8, 39], "\ub370\uc774\ud130": [0, 3, 4, 5, 14, 17, 26, 27, 29, 32, 33, 34, 35, 36, 39, 40, 41, 42, 44, 45, 48, 49, 51, 56], "\uac1c\uc218\ub97c": [0, 4, 13, 36], "\ub298\ub824\uc11c": 0, "classifi": [0, 4, 14, 15, 24, 25, 33, 36, 39, 41, 48, 50, 51, 53, 55], "\ud559\uc2b5\uc5d0": [0, 7, 14, 19, 20, 23, 28, 30, 36, 39, 41, 51, 53], "\uc0ac\uc6a9\ud558\uba74": [0, 13, 23, 31, 40, 41, 45], "\ub354": [0, 1, 2, 4, 5, 6, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 49, 50, 51, 52, 53, 54, 55, 56], "\uc88b\uc740": [0, 1, 2, 3, 4, 6, 10, 15, 18, 19, 20, 21, 25, 26, 27, 29, 33, 34, 35, 37, 39, 43, 44, 46, 47, 48, 49, 51, 52, 53, 54, 55], "\uc131\ub2a5\uc758": [0, 14, 39], "classifier\ub97c": [0, 15, 19, 21, 24], "\uc5bb\uc744": [0, 2, 4, 8, 15, 24, 25, 26, 35, 36, 39, 40, 42, 43, 44, 51], "\uc218": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56], "\uc788\uc744": [0, 1, 4, 5, 6, 9, 13, 24, 25, 26, 28, 30, 31, 32, 33, 38, 39, 42, 44], "\uac83\uc73c\ub85c": [0, 4, 14, 19, 21, 26, 28, 29, 32, 33, 36, 39, 40, 41, 42], "\uae30\ub300\ud588\uc2b5\ub2c8\ub2e4": 0, "\ubb38\uc81c": [0, 5, 17, 18, 33, 55], "\uc0c1\ud669\uc744": [0, 43], "\uc7ac\ud604\ud558\uae30": 0, "\uc704\ud574": [0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 24, 26, 27, 28, 29, 30, 31, 32, 33, 35, 36, 37, 38, 40, 41, 43, 44, 45, 47, 50, 51, 52, 53, 54, 55], "\uae30\uc874": [0, 2, 4, 7, 8, 9, 10, 14, 15, 17, 18, 19, 21, 22, 23, 24, 26, 27, 28, 29, 30, 32, 33, 35, 36, 37, 38, 39, 40, 41, 43, 46, 49, 51, 53, 54, 55], "\ub370\uc774\ud130\ub85c": [0, 1, 3, 9, 18, 33, 35, 39, 41, 42, 44, 51], "imag": [0, 1, 2, 3, 6, 8, 11, 12, 16, 17, 18, 22, 25, 26, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 42, 45, 48, 49, 50, 52, 54, 55, 56, 57], "\ud559\uc2b5\ud558\uc5ec": [0, 4, 24, 25, 33, 45, 46, 51, 54], "baseline\uc73c\ub85c": 0, "\uc7a1\uc558\uc2b5\ub2c8\ub2e4": 0, "\ubaa8\ub378\uc740": [0, 4, 5, 6, 7, 8, 11, 14, 17, 18, 19, 22, 23, 24, 25, 26, 27, 28, 31, 32, 33, 34, 35, 36, 38, 39, 40, 44, 48, 51, 54, 55], "pretrained\ub41c": 0, "resnet18\uc5d0": 0, "linear": [0, 3, 6, 13, 14, 15, 21, 23, 24, 28, 33, 38, 46, 48, 56], "layer\ub97c": [0, 4, 5, 20, 24, 28, 29, 31, 36, 38, 41], "\ubd99\uc5ec\uc11c": 0, "\uc0ac\uc6a9\ud588\uc2b5\ub2c8\ub2e4": [0, 11, 22, 44, 50], "\uc804\uccb4": [0, 4, 5, 6, 8, 9, 10, 14, 15, 17, 19, 21, 22, 23, 26, 27, 29, 30, 33, 41, 51], "7": [0, 1, 3, 5, 6, 7, 12, 13, 18, 23, 24, 25, 26, 32, 34, 43, 49, 51, 55], "class\ubcc4": 0, "healthi": 0, "99": 0, "73": [0, 40], "rust": 0, "scab": 0, "98": [0, 18], "class\ub294": [0, 20], "\uac1c\uc218": [0, 36], "91\uac1c\ub85c": 0, "\ud074\ub798\uc2a4\ub4e4\uc5d0": 0, "\ube44\ud574\uc11c": [0, 11], "\uc801\uc2b5\ub2c8\ub2e4": 0, "imbalance\uac00": 0, "\uc131\ub2a5\uc744": [0, 2, 3, 6, 7, 8, 9, 10, 12, 14, 15, 17, 19, 20, 21, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 35, 36, 37, 38, 39, 42, 43, 44, 45, 46, 47, 49, 50, 51, 52, 53, 54, 55], "\ub0ae\ucd94\ub294": [0, 24], "\uc6d0\uc778\uc77c": [0, 39], "\uac83\uc774\ub77c": [0, 28], "\uac00\uc815\ud558\uace0": [0, 25, 43], "diffusion\uc73c\ub85c": [0, 39], "data\ub97c": [0, 10, 24, 27], "\ucd94\uac00\ub85c": [0, 7, 17, 19, 23, 25, 27, 29, 30, 33, 35, 36, 37, 44], "\uc0dd\uc131\ud574\ubcf4\uae30\ub85c": 0, "\ud588\uc2b5\ub2c8\ub2e4": [0, 1, 11, 38, 39, 48], "\uc608\uc2dc": [0, 5, 7, 19, 29, 32, 33, 44, 49, 50, 53, 54, 55], "pretran": 0, "diffusion\uc758": [0, 20, 24, 26], "\ub300\ud55c": [0, 1, 2, 3, 4, 5, 6, 8, 9, 10, 11, 13, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 29, 31, 32, 33, 35, 36, 37, 39, 40, 41, 43, 44, 47, 48, 51, 52, 53, 54, 55, 56], "\uc815\ubcf4\uac00": [0, 11, 19, 22, 37, 41, 47], "\uc5c6\uc5b4\uc11c": [0, 19], "\uc0dd\uc131\ud560": [0, 1, 2, 4, 7, 8, 9, 11, 17, 18, 20, 22, 24, 25, 26, 29, 31, 33, 34, 35, 36, 39, 41, 43, 44, 47, 51, 52, 53], "\uc544\ub798\uc640": [0, 2, 4, 6, 8, 9, 15, 19, 24, 26, 27, 36, 38, 39, 44, 46, 56], "\uac19\uc774": [0, 4, 6, 8, 9, 10, 11, 13, 15, 18, 19, 20, 22, 24, 25, 26, 27, 29, 30, 31, 36, 38, 40, 43, 44, 45, 46, 47, 48, 51, 52, 53, 54, 55, 56], "\uad00\ub828\uc5c6\ub294": 0, "\uc774\ubbf8\uc9c0\uac00": [0, 9, 11, 13, 17, 19, 20, 22, 23, 24, 26, 27, 29, 31, 34, 35, 36, 37, 39, 40, 44, 46, 48, 49, 53], "\uc0dd\uc131\ub429\ub2c8\ub2e4": [0, 8], "prompt": [0, 4, 5, 9, 10, 11, 16, 17, 18, 20, 21, 22, 26, 27, 30, 31, 37, 40, 41, 42, 47, 49, 50, 52, 53, 55], "photo": [0, 1, 4, 6, 8, 10, 26, 36, 40, 44], "\ub530\ub77c\uc11c": [0, 2, 3, 4, 6, 7, 9, 11, 13, 15, 19, 21, 22, 23, 24, 25, 26, 31, 33, 34, 35, 37, 39, 40, 41, 42, 44, 50, 51, 52, 53, 54, 55], "model": [0, 6, 8, 9, 11, 12, 20, 22, 26, 28, 32, 44, 45, 48, 50, 57], "\uc815\ubcf4\ub97c": [0, 4, 5, 9, 11, 13, 15, 17, 18, 19, 22, 24, 26, 29, 31, 33, 37, 39, 40, 41, 44, 47, 52, 55], "\ub123\uc5b4\uc8fc\uae30": 0, "dreambooth": [0, 4, 10, 16, 19, 24, 30, 52], "\ub97c": [0, 1, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56], "tuning\ud588\uc2b5\ub2c8\ub2e4": 0, "training\uc5d0": [0, 12, 46], "\uc0ac\uc6a9\ud55c": [0, 4, 7, 15, 17, 18, 19, 22, 24, 25, 26, 29, 32, 33, 35, 36, 38, 39, 40, 46], "prompt\ub294": [0, 22, 26], "disea": 0, "leaf": 0, "\uc774\uba70": [0, 4, 6, 40, 43, 51], "\uc0dd\uc131\ud55c": [0, 4, 9, 11, 13, 17, 24, 26, 29, 33, 36, 44, 45, 47, 49, 52, 54, 55], "\uc774\ubbf8\uc9c0\uc758": [0, 1, 2, 3, 9, 10, 11, 13, 17, 19, 20, 24, 26, 27, 33, 36, 37, 38, 39, 40, 44, 45, 47, 49, 53], "\uc608\uc2dc\ub294": [0, 17, 49, 55], "\uac19\uc2b5\ub2c8\ub2e4": [0, 1, 9, 11, 31, 38, 39, 43, 44, 47, 48, 55, 56], "\uc0dd\uc131": [0, 3, 4, 5, 7, 8, 11, 12, 13, 14, 18, 19, 20, 22, 23, 24, 25, 27, 28, 29, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 45, 46, 47, 48, 49, 51, 54, 55], "engineering\uc744": [0, 24], "\uc218\ud589\ud558\ub358": 0, "\uc911": [0, 5, 7, 8, 10, 12, 16, 18, 22, 23, 25, 28, 33, 34, 35, 37, 38, 39, 43, 44, 45, 46, 47, 48, 49, 52, 55, 56], "\uc758\ub3c4\ud558\uc9c0\uc54a\uc740": 0, "\uacb0\uacfc\ub97c": [0, 4, 5, 6, 7, 8, 9, 11, 12, 13, 15, 17, 19, 21, 22, 23, 24, 25, 26, 27, 29, 30, 31, 32, 33, 35, 36, 37, 38, 39, 40, 41, 43, 44, 49, 50, 51, 54], "\ubc1c\uacac\ud588\uc2b5\ub2c8\ub2e4": [0, 1, 11], "\uc544\ub798\ub294": [0, 9, 34, 36, 56], "\uc774\uc5d0": [0, 4, 5, 6, 8, 9, 15, 16, 22, 25, 32, 37, 39, 43, 47, 54, 55, 56], "\uc608\uc2dc\ub85c": [0, 26, 42], "\uc804\uc758": [0, 35], "model\uc758": [0, 3, 4, 5, 9, 10, 13, 15, 17, 18, 22, 23, 24, 25, 26, 27, 28, 36, 39, 40, 46, 51], "\uacb0\uacfc\uc640": [0, 11, 26, 27, 32, 54], "\ube44\uad50\uc785\ub2c8\ub2e4": 0, "\uc0c1\ud6691": 0, "\uc804": [0, 13, 28, 32, 35, 39, 46], "\ud6c4": [0, 1, 3, 4, 6, 7, 11, 13, 14, 15, 16, 17, 19, 21, 27, 28, 31, 32, 33, 34, 35, 36, 41, 43, 44, 45, 47, 49, 50, 52, 55], "\uc0c1\ud6691\uc744": 0, "\ubcf4\uba74": [0, 3, 4, 6, 10, 13, 15, 20, 21, 23, 26, 27, 35, 36, 38, 39, 40, 44, 45, 48, 51], "\ub2f4\uc740": [0, 24], "uniqu": [0, 1, 47, 52], "identifi": [0, 1, 37, 47, 52], "\uac00": [0, 1, 6, 8, 9, 11, 13, 14, 15, 17, 18, 19, 21, 22, 23, 24, 26, 27, 28, 29, 30, 31, 32, 35, 36, 41, 42, 43, 44, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56], "\uc5c6\uc74c\uc5d0\ub3c4": [0, 21], "diseases\uc758": 0, "\uc78e\ub4e4\ub9cc": 0, "\uc774\ub294": [0, 2, 3, 4, 5, 7, 8, 9, 12, 15, 18, 19, 22, 24, 26, 27, 32, 33, 35, 36, 38, 39, 40, 41, 43, 44, 48, 50, 51, 53, 54, 55, 56], "\uac19\uc740": [0, 1, 2, 4, 5, 6, 7, 8, 9, 11, 13, 14, 15, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 35, 38, 39, 40, 42, 43, 44, 46, 47, 50, 51, 52, 53, 54, 55, 56], "\uc18d\ud558\ub294": [0, 8], "\uc774\ubbf8\uc9c0\ub4e4\uc744": [0, 1, 3, 4, 6, 9, 11, 17, 24, 32, 43, 47, 50], "\uc0dd\uc131\ud574\ub0b4\uc9c0": [0, 10], "\ubabb\ud558\uace0": [0, 13, 24, 26], "\uc788\ub2e4\ub294": [0, 4, 21, 22, 24, 28, 33, 38, 40, 43, 49, 50, 51, 52], "\uac83\uc785\ub2c8\ub2e4": [0, 9, 11, 22, 38, 39, 44, 50, 54], "\uc774": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 51, 53, 54, 55, 56], "\ud604\uc0c1\uc744": [0, 10, 17, 38, 53, 54], "languag": [0, 4, 7, 10, 11, 17, 24, 26, 27, 28, 29, 36, 39, 40, 45, 47, 49], "drift\ub77c\uace0": 0, "\ud558\uba70": [0, 2, 6, 45], "\ubaa8\ub378\uc774": [0, 1, 4, 5, 7, 8, 9, 10, 12, 13, 14, 17, 18, 19, 20, 22, 24, 27, 29, 30, 31, 32, 33, 35, 36, 37, 38, 39, 41, 43, 46, 47, 48, 50, 52, 53, 54, 55], "leaf\uac00": 0, "\uc544\ub2cc": [0, 1, 4, 6, 8, 9, 12, 15, 17, 19, 24, 28, 33, 36, 40, 44, 46, 48, 51, 53, 54], "\uc77c\ubc18\uc801\uc778": [0, 7, 10, 17, 18, 19, 22, 33, 35, 40, 42, 46], "\uad00\ud55c": [0, 23, 25, 26, 30, 36, 37, 38], "\uc78a\uc5b4\ubc84\ub838\uae30": 0, "\ub54c\ubb38\uc785\ub2c8\ub2e4": 0, "\uc0c1\ud6692": 0, "\uc0c1\ud6692\ub97c": 0, "photo\ub77c\ub294": 0, "prompt\ub9cc": [0, 16, 37], "\uc0ac\uc6a9\ud558\uc600\ub294\ub370\ub3c4": 0, "\uc774\ubbf8\uc9c0\ub4e4\uc5d0": [0, 11], "\ud2b9\uc9d5\ub4e4\uc774": 0, "\ub098\ud0c0\ub0a9\ub2c8\ub2e4": 0, "dreambooth\uc5d0\uc11c\ub294": 0, "drift\ub97c": 0, "prior": [0, 8, 11, 17, 19, 24, 26, 29, 47, 56], "preserv": [0, 4, 17, 47, 54], "loss\ub97c": [0, 2, 4, 7, 13, 15, 17, 18, 20, 27, 31, 36, 40], "\uc0ac\uc6a9\ud574\uc11c": [0, 6, 9, 11, 15, 21, 26, 31, 45, 46, 49], "\ud574\uacb0\ud558\uc600\uc73c\ubbc0\ub85c": 0, "\ubc29\ubc95\uc744": [0, 4, 5, 6, 7, 8, 15, 18, 19, 21, 22, 23, 25, 26, 32, 33, 35, 38, 39, 40, 42, 44, 46, 51, 53, 55], "\ud574\uacb0\ud558\uae30": [0, 2, 6, 8, 15, 17, 26, 28, 30, 35, 40, 41, 43, 47, 50, 51, 52, 54, 55], "train": [0, 2, 3, 6, 11, 12, 15, 17, 18, 19, 20, 22, 23, 24, 25, 26, 27, 28, 32, 36, 37, 38, 40, 41, 42, 46, 47, 49, 51, 52, 55], "prompt\uc5d0\uc11c": [0, 24], "\uc81c\uc678\ud558\uace0": [0, 28, 35], "\ucd5c\ub300\ud55c": [0, 19, 26, 35, 40, 44, 55, 56], "\ub2e8\uc21c\ud55c": [0, 4, 10, 17, 19, 24, 25, 26, 33, 36, 54], "model\uc744": [0, 3, 4, 5, 8, 9, 10, 12, 15, 17, 18, 19, 20, 21, 22, 23, 24, 26, 27, 28, 35, 36, 37, 40, 43, 46, 51], "\ub2e4\uc2dc": [0, 4, 6, 13, 15, 25, 28, 32, 34, 38, 43, 44, 47, 48, 51, 54, 55, 56], "\uacb0\uacfc": [0, 1, 3, 4, 5, 6, 7, 10, 11, 14, 17, 18, 19, 22, 23, 26, 27, 29, 30, 33, 35, 36, 39, 41, 42, 43, 46, 49, 50, 52, 54, 55], "\uc7ac\ud6c8\ub828": 0, "\uc774\ud6c4\uc5d0\ub3c4": 0, "model\ub85c": [0, 3, 21, 24, 26], "\uc0dd\uc131\ud558\uc600\uc744": 0, "\ub54c\uc640": [0, 44], "\ube44\uc2b7\ud55c": [0, 6, 7, 10, 14, 15, 18, 23, 29, 40, 44, 46, 47, 54], "\uc758": [0, 1, 2, 3, 4, 6, 8, 9, 11, 12, 13, 14, 15, 16, 19, 20, 21, 22, 23, 24, 25, 26, 28, 30, 31, 32, 33, 35, 36, 38, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56], "\uacbd\uc6b0\uc5d0\ub294": [0, 15, 19, 26, 29, 39], "\uc5ec\uc804\ud788": [0, 4, 6, 8, 10, 19, 20, 22, 26, 30, 32, 33, 49], "\uc601\ud5a5\uc744": [0, 3, 4, 5, 7, 8, 13, 14, 15, 23, 36, 37, 38, 39, 46, 49], "\ubc1b\uc740": [0, 2, 25, 40], "\uac83\uac19\uc740": [0, 18], "\uc774\ubbf8\uc9c0\ub4e4\uc774": [0, 6, 9], "photo\uc758": 0, "\uc5ec\ub7ec": [0, 3, 4, 6, 8, 17, 18, 19, 22, 24, 27, 29, 31, 32, 33, 39, 40, 42, 43, 47, 53, 55], "\ub300\uc0c1\ub4e4\uacfc": 0, "\uc0ac\uc6a9\ub418\ub294": [0, 19, 22, 24, 32, 39, 40, 43, 44, 47, 51, 53], "\ud2b9\uc131\uc744": [0, 4, 8, 14, 15, 17, 19, 22, 24, 29, 36, 43, 44], "\uac00\uc9c0\uace0\uc788\uc5b4\uc11c": 0, "\uadf8\ub7f0": [0, 2, 33, 34, 38], "\uac83\uc774\ub77c\ub294": [0, 39], "\uc0dd\uac01\uc774": [0, 24, 39], "\ub4e4\uc5c8\uace0": 0, "\uc774\ub97c": [0, 2, 3, 4, 5, 6, 8, 9, 13, 15, 17, 18, 19, 22, 24, 25, 26, 27, 28, 29, 30, 33, 35, 36, 38, 39, 40, 41, 42, 43, 44, 47, 48, 50, 51, 52, 53, 54, 55, 56], "\uccb4\ud06c\ud574\ubcf4\uae30": 0, "\ud2b9\uc815\ud55c": [0, 11, 32, 38, 40, 42, 51], "photo\uc640": 0, "\uc6a9\ub3c4\ub85c": 0, "prompt\ub4e4\ub85c": 0, "\uc0dd\uc131\ubcf4\uc558\uc2b5\ub2c8\ub2e4": 0, "\ub300\uc0c1": [0, 15, 17, 19, 44], "\uc138\uac00\uc9c0\ub85c\ub294": 0, "cat": [0, 13, 26, 43, 50, 55], "sea": 0, "pirate\uc744": 0, "\uc0ac\uc6a9\ud588\uace0": [0, 7, 21, 26, 35, 51], "\ube44\uc2b7\ud558\uac8c": [0, 6, 14, 40], "\ud14d\uc2a4\ud2b8": [0, 4, 7, 8, 11, 18, 19, 22, 29, 31, 33, 36, 39, 40, 49], "\uc138\uac00\uc9c0\ub294": 0, "illustr": [0, 15], "anim": [0, 30, 45], "wallpaper\ub97c": 0, "\uc774\ubbf8\uc9c0\ub294": [0, 5, 8, 10, 16, 17, 18, 22, 35, 37, 49, 53], "\uae00": 0, "\ub9c8\uc9c0\ub9c9": [0, 3, 5, 6, 13, 14, 16, 18, 21, 33, 38, 39, 43], "\ubd80\ubd84\uc758": [0, 3], "appendix\uc5d0": 0, "\uc788\uc2b5\ub2c8\ub2e4": [0, 1, 4, 8, 9, 11, 22, 36, 38, 39, 43, 44, 47, 48, 50, 52, 53, 54, 55, 56], "\ub300\uc0c1\uc744": [0, 44], "\uc9c0\uce6d\ud558\ub294": 0, "\ud14d\uc2a4\ud2b8\uc758": [0, 29], "\ub300\uc0c1\uc758": [0, 17, 47], "\ud2b9\uc9d5\uc774": [0, 36], "\uc798": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 15, 17, 19, 21, 22, 23, 24, 25, 26, 27, 29, 30, 31, 32, 33, 34, 35, 37, 38, 39, 40, 41, 44, 47, 48, 51, 52, 53, 56], "\ub4dc\ub7ec\ub098\ub294": 0, "\uc0dd\uc131\ub418\uc5c8\uc9c0\ub9cc": 0, "\ub300\uc0c1\uacfc": [0, 17, 22, 44], "\ud568\uaed8": [0, 8, 17, 19, 20, 21, 22, 26, 28, 30, 31, 32, 33, 35, 41, 42, 44, 52, 56], "\uc4f0\uc774\ub294": [0, 3, 39, 44, 48], "\uc78e\uc0ac\uadc0\uc758": 0, "\ud2b9\uc9d5\uc744": [0, 9, 19, 33, 47], "\uac00\uc9c0\ub294": [0, 1, 2, 5, 15, 18, 38, 51], "\uc77c\ubd80": [0, 2, 7, 8, 15, 17, 19, 21, 22, 24, 28, 29, 33, 38], "\uc0dd\uc131\ub418\uc5c8\uc2b5\ub2c8\ub2e4": 0, "tuning\ud55c": 0, "400\uc7a5": 0, "\uc0dd\uc131\ud558\uc5ec": [0, 8, 26, 29, 33, 36], "\ud6c8\ub828\ud588\uc2b5\ub2c8\ub2e4": 0, "result_bas": 0, "\ucd94\uac00": [0, 4, 5, 8, 10, 13, 16, 17, 18, 19, 20, 22, 24, 25, 29, 32, 33, 34, 35, 36, 42, 55], "\ud65c\uc6a9\ud55c": [0, 4, 5, 8, 11, 18, 21, 24, 26, 36, 41, 42, 47, 48, 52, 53], "9": [0, 3, 6, 7, 23, 32, 34, 35, 36, 44, 51], "84": 0, "result_now": 0, "kaggle\uc5d0\uc11c": 0, "\uc81c\uacf5\ud558\ub294": [0, 5, 11, 19, 33, 40], "test": [0, 2, 3, 5, 6, 19, 29, 32, 40, 42, 49], "set\uc5d0": [0, 29, 39], "\uc801\uc6a9\ud588\uc744": [0, 25, 43, 51], "\ub54c\ub294": [0, 4, 30, 36, 39, 51], "baseline\uc774": [0, 40], "94": 0, "\uacbd\uc6b0\uac00": [0, 9, 12, 15, 17, 19, 26, 30, 33, 36, 44, 53], "93": 0, "\uc5ec\uc11c": 0, "baseline\ubcf4\ub2e4": 0, "\uc5bb\uc9c0\ub294": 0, "\ubabb": 0, "\ud6c8\ub828": [0, 4, 5, 8, 9, 16, 17, 19, 29, 35, 36, 39, 44, 49], "\uc911\uac04\uc911\uac04\uc5d0": 0, "\uc77c\uc815": [0, 2, 20, 27, 33], "step\ub9c8\ub2e4": [0, 26], "\uc0dd\uc131\ud558\uac8c\ud574\uc11c": 0, "\ud6c8\ub828\uc5d0": [0, 8, 38], "\ubaa8\ub2c8\ud130\ub9c1\uc774": 0, "\uc788\uc73c\uba74": 0, "\uc88b\uaca0\ub2e4\ub294": 0, "\uc0dd\uac01\uc744": 0, "\ud6c8\ub828\uc2dc": [0, 17], "hyperparamet": [0, 12, 16, 22, 27, 30, 37, 43, 46, 54, 55], "tuning\uc744": [0, 4, 9, 22, 24, 28, 39, 40, 51], "\uc880": [0, 9, 11, 26, 30, 37, 49], "\ucca0\uc800\ud558\uac8c": 0, "\ud574\uc57c\uaca0\ub2e4\ub294": 0, "\uc2e4\uc81c\ub85c": [0, 3, 4, 6, 7, 20, 23, 26, 29, 30, 33, 35, 36, 38, 39, 41, 44, 48, 56], "\uc870\uac74\uc744": [0, 8, 19, 22, 24, 33, 36, 40, 43], "\ub9cc\uc871\ud558\ub294\uc9c0": 0, "\uac80\uc218\ud560": 0, "\ubc29\uc548\uc774": 0, "\ud544\uc694\ud569\ub2c8\ub2e4": [0, 43], "\ub0b4\uc5d0\uc11c\ub3c4": 0, "\uce74\ud14c\uace0\ub9ac\ub97c": [0, 8, 33], "\ub098\ub20c": [0, 24, 25, 27], "\uc788\ub2e4\uba74": [0, 11, 13, 29, 42, 44], "\ub098\ub220\uc11c": [0, 15, 49], "\uac01\uac01\uc5d0": [0, 11, 17, 38, 39], "tuning\ud560": [0, 10, 25, 28], "\uc218\ub3c4": [0, 4, 8, 11, 15, 24, 27, 30, 36, 38, 42, 43, 44, 51, 55], "\ud65c\uc6a9\ud574\ubcfc": 0, "submiss": 0, "score\uc5d0\uc11c": [0, 39], "baseline\uc744": [0, 36], "\uc774\uae30\uc9c0": 0, "\ud588\uc9c0\ub9cc": [0, 25], "text": [0, 1, 3, 5, 6, 8, 9, 11, 13, 14, 15, 16, 17, 19, 22, 26, 27, 30, 32, 33, 35, 36, 37, 38, 39, 42, 45, 49, 50, 52, 54, 55], "\uc774\uc6a9\ud55c": [0, 29, 36, 37, 39], "data\uc758": [0, 23, 37], "\uac00\ub2a5\uc131\uc744": [0, 12, 17, 24, 36], "\ubcfc": [0, 1, 6, 8, 11, 13, 15, 17, 18, 19, 22, 25, 26, 27, 35, 36, 38, 39, 40, 44, 45, 46, 48, 50, 54], "\uc788\uc5c8\ub2e4\uace0": [0, 26, 28, 36, 43, 44, 50, 52, 53, 55], "\uc0dd\uac01\ud569\ub2c8\ub2e4": [0, 38, 54], "\uc55e\uc5d0\uc11c": 0, "\uc5b8\uae09\ud55c": [0, 4, 9, 24, 35, 50], "prompt\uc5d0": [0, 10, 18, 21, 24, 26], "\uc608\uc2dc\uc785\ub2c8\ub2e4": [0, 1], "nsfw\ub85c": 0, "\ud310\ub2e8\ub418\uc5b4": 0, "\uac80\uc740\uc0c9\uc73c\ub85c": 0, "\ub098\uc654\uc2b5\ub2c8\ub2e4": [0, 38], "pirat": 0, "wallpap": 0, "sangwoo": [1, 43, 47, 48, 50, 52, 53, 54, 55, 56, 57], "jo": [1, 43, 47, 48, 50, 52, 53, 54, 55, 56, 57], "09": [1, 42, 48], "\uc774\ubc88": [1, 36, 50, 55], "\ud3ec\uc2a4\ud305\uc5d0\uc11c\ub294": [1, 11], "\uc9c1\uc811": [1, 4, 6, 8, 14, 15, 17, 19, 23, 24, 25, 26, 29, 30, 32, 33, 34, 36, 42, 48, 53, 56], "\ud559\uc2b5\ud574\ubcf4\uace0": 1, "\uc2e4\ud5d8\ud55c": [1, 22, 43], "\uacb0\uacfc\ub4e4\uc744": [1, 11, 17, 47, 55], "\uacf5\uc720\ud560\ub824\uace0": 1, "\ud569\ub2c8\ub2e4": [1, 8, 9, 11, 22, 36, 39, 43, 44, 47, 48, 50, 52, 53, 54, 55, 56], "\uc6b0\uc120\uc801\uc73c\ub85c": [1, 43, 44, 45, 55, 56], "\ud559\uc2b5\ub370\uc774\ud130\ub294": 1, "bryandle": 1, "data": [1, 5, 18, 24, 27, 28, 29, 30, 32, 36, 38, 42, 43, 44, 48, 51, 53, 54, 57], "\uacf5\uac1c\ub41c": [1, 4, 17, 28, 50], "yolov5": 1, "\ubaa8\ub378": [1, 4, 5, 6, 7, 8, 10, 11, 12, 14, 17, 18, 19, 20, 21, 22, 23, 24, 25, 27, 28, 29, 31, 33, 34, 35, 36, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 53, 54, 55, 56], "\ubc0f": [1, 7, 8, 11, 15, 19, 22, 25, 26, 28, 29, 32, 33, 35, 39, 40, 44, 46, 48, 49, 50, 51, 54, 55], "waifu2x": 1, "\ud6c4\ucc98\ub9ac": [1, 33, 35], "\uae30\ubc95\uc744": [1, 3, 8, 10, 21, 23, 26, 32, 43, 50, 52, 54], "\ud65c\uc6a9\ud558\uc5ec": [1, 4, 8, 17, 18, 19, 21, 22, 26, 29, 33, 35, 36, 43, 45, 46, 47, 48, 52, 53, 56], "\ud504\ub9ac\ub4dc\ub85c\uc6b0\uc5d0": 1, "\ub4f1\uc7a5\ud558\ub294": 1, "\uc778\ubb3c": [1, 5, 16, 44], "\uc0ac\uc9c4\ub4e4\uc744": [1, 32, 47], "\uc218\uc9d1\ud588\uc2b5\ub2c8\ub2e4": 1, "\ub17c\ubb38\uc5d0\uc11c\ub294": [1, 2, 3, 4, 6, 8, 9, 11, 13, 15, 18, 19, 21, 22, 24, 26, 27, 28, 32, 33, 36, 38, 39, 40, 42, 43, 44, 45, 46, 47, 50, 51, 52, 53, 54, 55, 56], "3": [1, 6, 11, 20, 24, 26, 30, 40, 42, 44, 47, 48, 49, 56], "5": [1, 2, 6, 13, 18, 21, 22, 24, 25, 26, 30, 34, 42, 48, 51, 55, 56], "\uc7a5\uc73c\ub85c": 1, "fine": [1, 4, 5, 9, 11, 15, 16, 22, 24, 26, 28, 30, 31, 32, 36, 37, 38, 40, 45, 49, 53, 57], "tune": [1, 4, 11, 16, 22, 24, 28, 36, 43, 45, 49, 52, 53, 57], "\uac00\ub2a5\ud558\ub2e4\uace0": [1, 38], "\uc81c\uc2dc\ub418\uc5b4\uc788\uc9c0\ub9cc": 1, "\uc0ac\uc9c4": [1, 6, 10, 24, 40, 44, 49], "\ub9ce\uc740": [1, 4, 5, 8, 9, 11, 14, 15, 17, 18, 19, 21, 24, 27, 29, 30, 31, 32, 33, 35, 36, 39, 40, 41, 43, 44, 45, 49, 53, 54], "\ud559\uc2b5\ud558\uba74": [1, 4, 13, 19, 47], "\uc131\ub2a5\uc774": [1, 6, 13, 14, 15, 20, 23, 26, 28, 29, 32, 33, 35, 36, 39, 42, 43, 44, 46, 50, 51, 52, 53, 54, 55], "\uc88b\uc544\uc838\uc11c": 1, "15": [1, 3, 5, 7, 14, 18, 22, 32, 36, 51, 52], "20": [1, 3, 4, 5, 7, 8, 14, 21, 23, 27, 36, 47, 51], "\uc7a5\uc758": [1, 11, 17, 19, 42], "\ud559\uc2b5\ud558\uc600\uc2b5\ub2c8\ub2e4": 1, "\ud559\uc2b5\ud55c": [1, 4, 10, 11, 21, 23, 25, 28, 37, 39, 42, 43, 47, 49, 50, 52, 54], "\uc774\ubbf8\uc9c0\ub4e4": [1, 35], "\uc2e4\ud5d8\ud558\uba74\uc11c": 1, "\ub300\ud45c\uc801\uc73c\ub85c": [1, 43, 47, 48, 53, 55, 56], "\uadf8\ub9ac\uace0": [1, 6, 15, 20, 22, 25, 26, 30, 33, 36, 39, 40, 43, 44, 47, 48, 50, 52, 53, 54, 55, 56], "\ub9c8\uc9c0\ub9c9\uc73c\ub85c": [1, 4, 19, 22, 24, 38, 43, 47, 50, 52, 53, 54, 55], "\ubc18\uc601\ud558\ub294": [1, 4, 24], "\uc815\ub3c4\ub97c": [1, 12, 23], "\uc870\uc808\ud558\ub294": [1, 9, 12, 19, 22, 39], "prior_loss_weight": [1, 47], "\ubc14\uafd4\uac00\uba74\uc11c": 1, "\ud559\uc2b5\ud574\ubcf4\uc558\uc2b5\ub2c8\ub2e4": 1, "\uc0ac\uc804\ud559\uc2b5\ub41c": [1, 4, 5, 24, 27, 39, 43, 47, 53, 54], "\ubaa8\ub378\ub85c": [1, 4, 5, 10, 17, 19, 20, 22, 24, 33, 38, 39, 45, 48, 50, 52, 54, 55], "\ucc98\uc74c\uc5d0\ub294": [1, 7, 19, 28, 39], "hakurei": 1, "waifu": 1, "diffus": [1, 3, 6, 8, 9, 11, 12, 16, 20, 22, 28, 29, 30, 31, 32, 35, 48, 50, 53, 57], "\ubaa8\ub378\uc744": [1, 3, 4, 7, 8, 10, 11, 12, 13, 14, 16, 17, 18, 19, 20, 21, 22, 23, 24, 26, 27, 29, 31, 32, 33, 34, 35, 36, 39, 40, 41, 42, 43, 44, 47, 48, 49, 50, 52, 53, 54, 55, 56], "\uc2dc\ub3c4\ud574\ubd24\uc9c0\ub9cc": 1, "\uacb0\uacfc\uac00": [1, 6, 13, 14, 21, 24, 25, 26, 29, 31, 36, 39, 41, 44, 46, 48], "\ub9cc\uc871\uc2a4\ub7fd\uc9c0": [1, 17], "\ubabb\ud574": 1, "runwayml": 1, "stabl": [1, 8, 9, 14, 17, 20, 22, 23, 24, 26, 28, 35, 36, 39, 42, 47, 50, 51, 52, 53, 54], "v1": [1, 16, 22, 24, 25], "\uc791\uc5c5\uc744": [1, 32, 40, 43, 52, 55], "\uc9c4\ud589\ud588\uc2b5\ub2c8\ub2e4": [1, 8, 22, 48, 50, 54, 55], "\uc81c\uc678\ud55c": [1, 3, 4, 5, 43], "\ub3d9\uc77c\ud55c": [1, 2, 5, 7, 8, 17, 19, 22, 23, 24, 29, 31, 32, 33, 35, 36, 39, 42, 43, 44, 47, 50, 52, 53, 54, 55], "configur": [1, 46, 48], "\uc73c\ub85c": [1, 2, 4, 6, 11, 14, 15, 17, 19, 22, 28, 29, 30, 31, 32, 33, 36, 40, 41, 42, 43, 44, 45, 47, 49, 50, 51, 52, 53, 54, 55], "\uacb0\uacfc\uc785\ub2c8\ub2e4": [1, 39, 43, 52, 54, 55], "model_nam": 1, "instance_prompt": 1, "A": [1, 4, 6, 7, 8, 9, 10, 11, 16, 18, 19, 22, 24, 26, 28, 36, 38, 40, 41, 42, 44, 47, 49, 55, 56], "sk": [1, 37, 40], "girl": [1, 4], "class_prompt": 1, "python3": 1, "train_dreambooth": [1, 47], "py": [1, 42, 47], "pretrained_model_name_or_path": [1, 47], "pretrained_vae_name_or_path": 1, "stabilityai": [1, 53], "sd": [1, 4, 5, 16, 17, 18, 24, 25, 26, 30, 35, 42, 51, 52, 53, 55], "vae": [1, 3, 5, 6, 10, 14, 23, 27, 33, 43, 47, 48, 53, 54], "ft": [1, 19, 25, 36], "mse": [1, 4, 13], "output_dir": 1, "revis": [1, 47], "fp16": 1, "with_prior_preserv": [1, 47], "1": [1, 6, 8, 9, 11, 20, 22, 24, 26, 35, 38, 40, 42, 44, 47, 48, 49, 56], "0": [1, 6, 7, 9, 10, 11, 12, 13, 14, 15, 16, 18, 19, 20, 24, 25, 27, 29, 30, 31, 33, 34, 35, 38, 39, 41, 42, 43, 44, 45, 47, 48, 51, 53, 54, 55, 56], "seed": [1, 54], "1337": 1, "resolut": [1, 6, 16, 19, 20, 21, 23, 24, 25, 29, 30, 35, 39, 42, 43, 44, 46, 50, 51, 54], "512": [1, 20, 31, 32, 35, 41, 48, 51, 53], "train_batch_s": 1, "train_text_encod": [1, 47], "mixed_precis": 1, "use_8bit_adam": 1, "gradient_accumulation_step": [1, 47], "gradient_checkpoint": 1, "learning_r": 1, "1e": [1, 5, 16, 19, 30, 37, 53], "6": [1, 2, 3, 5, 6, 7, 10, 18, 24, 31, 34, 35, 37, 42, 44, 51], "lr_schedul": [1, 47], "constant": [1, 20, 23, 32, 46, 52], "lr_warmup_step": 1, "num_class_imag": 1, "200": [1, 6, 27, 32, 35, 49], "sample_batch_s": 1, "4": [1, 2, 6, 11, 20, 24, 30, 38, 42, 48], "max_train_step": 1, "800": [1, 32], "save_interv": 1, "100": [1, 5, 18, 23, 27, 32, 33, 39, 42, 44], "save_sample_prompt": 1, "concepts_list": 1, "json": 1, "w": [1, 3, 4, 5, 6, 9, 10, 13, 18, 20, 24, 25, 27, 28, 29, 32, 38, 41, 45, 49, 53, 54], "o": [1, 3, 20, 29, 32, 37, 50], "\uc544\ub798": [1, 4, 6, 8, 9, 11, 15, 18, 19, 23, 24, 27, 29, 33, 38, 39, 43, 44, 45, 47, 48, 49, 52, 53, 54, 55, 56], "\uadf8\ub9bc\ucc98\ub7fc": [1, 11, 25, 26, 28, 48, 49, 56], "infer": [1, 5, 6, 13, 20, 24, 25, 27, 29, 30, 35, 41, 42, 51, 53, 55, 56], "\uc785\ub825\ud588\uc744": 1, "\ub54c": [1, 2, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 22, 23, 25, 26, 27, 29, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46, 47, 48, 51, 53, 54, 56], "\uc81c\uc678\ud568\uc73c\ub85c\uc368": 1, "input": [1, 5, 6, 7, 9, 10, 11, 14, 16, 17, 20, 24, 25, 26, 27, 28, 29, 30, 31, 32, 36, 38, 40, 42, 44, 45, 47, 48, 50, 54, 55], "\uac00\uae4c\uc6b4": [1, 2, 7, 8, 20, 24, 33, 36, 40, 45], "\uc6f9\ud230": 1, "\uc788\uc5c8\uc2b5\ub2c8\ub2e4": [1, 8, 9, 11, 22, 39, 44, 50, 52, 53], "\ub610\ud55c": [1, 2, 4, 5, 6, 7, 8, 9, 10, 13, 15, 16, 18, 19, 21, 22, 24, 25, 26, 27, 28, 29, 30, 33, 35, 36, 39, 41, 43, 44, 47, 48, 50, 51, 52, 53, 54, 55, 56], "\ud551\ud06c\uc0c9": 1, "\uba38\ub9ac\ub97c": 1, "\ud55c": [1, 3, 5, 6, 11, 13, 15, 17, 19, 21, 22, 23, 24, 26, 28, 29, 30, 31, 32, 35, 36, 38, 39, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56], "\uc774\ubbfc\uc9c0": 1, "\uce90\ub9ad\ud130\ub97c": 1, "\uc5b4\ub290": [1, 38, 39, 40], "\uc815\ub3c4": [1, 7, 12, 23, 28, 38, 39, 53], "\uc0dd\uc131\ud558\ub294": [1, 4, 7, 8, 9, 11, 13, 17, 19, 20, 21, 22, 24, 25, 26, 27, 29, 31, 33, 34, 36, 38, 39, 41, 44, 47, 48, 49, 50, 52, 53, 54, 56], "\ubd80\ubd84\ub3c4": [1, 50], "\ud655\uc778\ud560": [1, 4, 6, 15, 23, 26, 34, 35, 36, 39, 43, 44, 47, 50, 51, 52, 53, 54, 55, 56], "pink": 1, "hair": [1, 4, 30, 37, 38], "With": 1, "without": [1, 4, 28, 29, 37, 38, 54], "\ub3c4": [1, 6, 7, 11, 13, 20, 22, 24, 30, 31, 37, 41, 42, 43, 44, 47, 52, 53, 54, 55, 56], "\uce90\ub9ad\ud130\uc758": [1, 4, 5, 47], "\ubd80\uc790\uc5f0\uc2a4\ub7ec\uc6b4": 1, "\ubd80\ubd84\uc774\ub098": [1, 33], "\uc800\ud574\uc0c1\ub3c4": [1, 5, 27, 29, 31, 33, 41], "\uacbd\uc6b0\ub4e4\uc774": 1, "\uc885\uc885": [1, 19, 33, 44], "\ubc1c\uc0dd\ud588\ub294\ub370": 1, "\ud1b5\ud574": [1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 13, 14, 15, 16, 17, 18, 19, 21, 22, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 38, 39, 40, 41, 43, 44, 45, 46, 47, 48, 51, 52, 54, 55, 56], "\ud004\ub9ac\ud2f0\uc758": [1, 23, 25, 34, 37, 39], "ugli": 1, "disfigur": 1, "deform": [1, 30, 52], "low": [1, 5, 11, 20, 22, 23, 27, 31, 34, 41, 45, 52, 55], "\ub17c\ubb38\uc5d0\uc11c": [1, 4, 5, 6, 9, 19, 21, 24, 26, 27, 30, 33, 36, 38, 39, 42, 43, 45, 47, 48, 52, 53, 54, 56], "\uc81c\uc2dc\ud55c": [1, 10, 11, 15, 21, 26, 34, 42, 45, 48, 49], "\uc678\uc5d0": [1, 18, 54], "style": [1, 6, 11, 20, 22, 24, 25, 37, 40, 44, 47, 54, 57], "\ub77c\ub294": [1, 9, 11, 22, 39, 40, 44, 46, 49, 51, 54], "\ub85c": [1, 2, 4, 6, 7, 8, 9, 11, 13, 14, 15, 17, 18, 19, 20, 21, 22, 23, 24, 25, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 38, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57], "\ud559\uc2b5\uc744": [1, 4, 10, 14, 15, 19, 21, 27, 29, 33, 35, 38, 39, 41, 44, 46, 50, 51], "\uc2dc\ub3c4\ud574\ubcf4\uae30\ub3c4": 1, "\ud2b9\uc815": [1, 3, 4, 8, 10, 11, 12, 13, 17, 18, 19, 21, 22, 23, 24, 25, 28, 32, 33, 35, 36, 37, 39, 40, 44, 49, 51, 55], "\uc5ec\uc790": 1, "\uce90\ub9ad\ud130\uc5d0": [1, 5], "\uc815\ubcf4\ubfd0\ub9cc": 1, "\uc544\ub2c8\ub77c": [1, 2, 6, 8, 10, 11, 13, 19, 21, 24, 26, 28, 31, 33, 35, 38, 41, 44, 47, 51, 53], "\ud504\ub9ac\ub4dc\ub85c\uc6b0": 1, "\uadf8\ub9bc\uccb4": [1, 4], "\uc790\uccb4\ub97c": [1, 6, 11, 23, 24, 32], "\ub2f4\uc544\ub0b4\uae30": [1, 53], "\uc704\ud55c": [1, 2, 3, 4, 5, 6, 7, 8, 11, 15, 16, 17, 18, 19, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 33, 35, 36, 39, 40, 44, 46, 51, 52, 56], "\ubaa9\uc801\uc774\uc600\uc2b5\ub2c8\ub2e4": 1, "differ": [1, 5, 11, 26, 27, 28, 38, 51, 53, 54], "\uc2dc": [1, 20, 25, 33, 36, 43, 44, 45, 46, 47, 48, 49, 50, 51, 54, 55, 56], "\ud504\ub9ac\ub4dc\ub85c\uc6b0\uc758": 1, "\uadf8\ub9bc\uccb4\uac00": [1, 4, 11], "\ubc18\uc601\ub41c": [1, 11], "\ub0a8\uc790\uac00": 1, "\uc0dd\uc131\ub418\ub3c4\ub85d": [1, 26], "boi": 1, "\uc785\ub825\ud588\uc744\ub54c\uc758": 1, "\ud639\uc740": [1, 2, 3, 6, 10, 11, 19, 20, 24, 26, 32, 35, 47, 51, 53, 56], "\uc791\uac00\ub2d8\uc758": 1, "\uc7a5\uba74\ub4e4\ub85c": 1, "\uc804\uccb4\uc801\uc73c\ub85c": [1, 15], "\ud559\uc2b5\ud558\uac8c": [1, 5, 43, 50, 55], "\ub41c\ub2e4\uba74": [1, 35], "\ub2e4\uc591\ud55c": [1, 2, 3, 4, 7, 8, 10, 11, 13, 15, 17, 18, 19, 20, 22, 23, 24, 25, 26, 27, 29, 30, 31, 32, 33, 35, 36, 37, 39, 40, 43, 44, 45, 46, 47, 49, 51, 53, 54, 55], "\uac83": [1, 5, 6, 11, 13, 14, 19, 25, 26, 27, 29, 30, 31, 32, 33, 36, 39, 40, 42, 44, 49], "num_inference_step": [1, 55], "24": [1, 5, 26, 30, 31, 39, 52, 53], "step": [1, 4, 5, 6, 10, 11, 12, 13, 15, 16, 17, 19, 21, 23, 24, 26, 27, 31, 32, 37, 39, 42, 46, 47, 48, 52, 54, 55], "\uc744": [1, 2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 30, 31, 32, 33, 35, 36, 38, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 51, 52, 53, 54, 55, 56], "\ub298\ub824\uac00\uba74\uc11c": 1, "\ucd94\ub860\ub41c": 1, "\ud004\ub9ac\ud2f0\uac00": [1, 3, 4, 7, 35, 36, 55], "\uc0c1\uc2b9\ud558\ub294": 1, "\uc2e4\ud5d8\ub3c4": 1, "\uc9c4\ud589\ud588\ub294\ub370": 1, "\uc791\uc744\uc218\ub85d": [1, 14, 39, 43, 51], "\uc640": [1, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 17, 19, 21, 22, 23, 24, 27, 28, 29, 30, 31, 32, 33, 36, 37, 38, 40, 42, 43, 44, 45, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56], "\ubb34\uad00\ud55c": [1, 50], "random": [1, 3, 5, 6, 10, 12, 13, 15, 16, 18, 20, 22, 25, 28, 30, 31, 32, 34, 35, 40, 43, 47, 48, 50, 54, 55], "\uc0dd\uc131\ud558\uac8c": [1, 26, 39, 47, 49, 55, 56], "\ub429\ub2c8\ub2e4": [1, 8, 9, 11, 22, 31, 38, 39, 43, 44, 47, 48, 50, 52, 53, 54, 55, 56], "\ucd5c\uc885\uc801\uc73c\ub85c": [1, 26, 33, 38, 39, 43, 54, 55], "num_infer": 1, "\uac12\uc740": [1, 6, 14, 23, 25, 39, 43, 46, 47], "\uac01\uac01": [1, 3, 4, 5, 6, 7, 9, 10, 11, 15, 21, 22, 24, 26, 40, 42, 44, 47, 48, 51, 52, 53, 55, 56], "\uacfc": [1, 3, 4, 5, 6, 7, 8, 11, 12, 13, 15, 17, 18, 19, 20, 23, 24, 25, 26, 27, 29, 30, 31, 33, 35, 37, 40, 41, 42, 43, 44, 45, 47, 49, 51, 52, 53, 54, 55], "\uc124\uc815\ud558\uc600\uc2b5\ub2c8\ub2e4": [1, 43, 54], "increas": [1, 11], "number": [1, 30, 43, 46, 55], "guidance_scal": [1, 55], "\uc81c\uc678\ud574\ubcf8": 1, "\uc0dd\uc131\ub41c": [1, 3, 4, 5, 6, 8, 11, 13, 15, 16, 17, 18, 19, 20, 21, 22, 24, 26, 29, 30, 33, 34, 35, 36, 38, 39, 40, 42, 43, 44, 46, 47, 48, 49, 50, 55, 56], "\ub0a8\uc790\uc758": 1, "\uba38\ub9ac\uce74\ub77d\uc774": 1, "\uae38\uc5b4\uc9c0\uace0": 1, "\uc5ec\uc131\uc2a4\ub7ec\uc6b4": 1, "\uc0dd\uae40\uc0c8\ub97c": [1, 40], "\ub180\ub77c\uc6b4": [1, 3, 11, 33, 34, 39], "\uc0ac\uc2e4\ub3c4": 1, "\uadf8": [1, 4, 6, 7, 8, 11, 13, 19, 20, 22, 24, 26, 27, 30, 31, 33, 34, 35, 38, 39, 40, 41, 43, 44, 54, 55], "\uc678": [1, 20, 34, 43, 44, 54], "\ub530\ub978": [1, 4, 5, 9, 11, 14, 23, 26, 27, 32, 36, 39, 42, 45, 47, 50, 51, 56], "\uc7ac\ubbf8\uc788\ub294": 1, "\uc2e4\ud5d8\uacb0\uacfc\ub4e4\uc744": 1, "\uacf5\uc720\ud569\ub2c8\ub2e4": [1, 43, 47, 54, 55], "\uc544\uc9c1": [1, 3, 11, 40, 46, 51], "\uc190\uc758": [1, 5], "\ubaa8\uc591\uc744": [1, 19, 33], "\uc0dd\uc131\ud558\uc9c0": [1, 24, 41], "\ubabb\ud558\ub294": [1, 22, 26, 41, 48, 53], "\uc7ac\ucc28": [1, 43, 53], "climb": 1, "up": [1, 4, 7, 13, 54], "mountain": 1, "paint": [1, 5, 47, 50], "2": [1, 6, 8, 20, 22, 24, 26, 38, 40, 42, 44, 47, 48, 49, 56], "hand": 1, "draw": [1, 37], "\ud558\ub2e8\uc758": [1, 43], "\uc88c\uce21\uacfc": 1, "\uc6b0\uce21": [1, 52], "\uc0ac\uc9c4\uc740": [1, 19, 27, 43, 54], "\uc774\ub77c\ub294": [1, 6, 26, 30, 44, 46, 49, 51], "\ub098\ube44\ub97c": 1, "\uc0dd\uc131\ud558\ub77c\ub294": 1, "\ucd94\ub860\ud574\ubcf8": 1, "\uc218\uc2dd\ud558\ub294": 1, "\uba85\uc0ac\uac00": 1, "\uc774\ub3c4\ub85d": 1, "\uc218\uc815\ud568\uc73c\ub85c\uc368": [1, 23, 43], "butterfli": 1, "\uc0ac\uc9c4\uc744": [1, 4, 6, 19, 44, 46, 53], "\uc0dd\uc131\ud560\ub54c": 1, "\uc870\uae08\uc774\ub098\ub9c8": 1, "\uc6f9\ud230\uc758": 1, "\uadf8\ub9bc\uccb4\ub97c": [1, 4], "\ubc18\uc601\ud560": 1, "\uc788\uc5c8\ub358": [1, 44], "refer": [2, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 29, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56], "paper": [2, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57], "http": [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56], "arxiv": [2, 3, 4, 5, 6, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56], "org": [2, 3, 4, 5, 6, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56], "ab": [2, 4, 5, 6, 9, 10, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 23, 24, 27, 28, 29, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 44, 45, 46, 47, 48, 49, 54, 55, 56], "2308": [2, 24], "04079": 2, "project": [2, 4, 5, 11, 14, 16, 17, 18, 19, 24, 26, 27, 28, 30, 31, 32, 33, 36, 46, 51, 55], "repo": 2, "sam": [2, 5, 30], "inria": 2, "fr": 2, "fungraph": 2, "hyunsoo": 2, "june": [2, 4, 45], "12": [2, 6, 36, 38, 51], "2024": [2, 4, 5, 14, 16, 17, 18, 20, 24, 25, 26, 30, 31, 32, 33, 36, 43, 44, 48, 51, 52, 53, 56, 57], "nerf": [2, 18, 19, 52], "\uae30\ubc18": [2, 5, 6, 7, 14, 16, 17, 18, 19, 20, 22, 26, 32, 33, 34, 35, 36, 39, 40, 45, 53], "\ubc29\uc2dd\ub4e4\uc740": [2, 4, 19, 33], "\ub192\uc740": [2, 3, 4, 5, 6, 8, 11, 13, 17, 18, 19, 21, 22, 23, 25, 27, 28, 29, 31, 32, 33, 34, 35, 36, 37, 39, 40, 41, 42, 43, 44, 45, 46, 49], "\ud488\uc9c8\uc758": [2, 4, 13, 19, 21, 33, 44, 51], "\uc0c8\ub85c\uc6b4": [2, 4, 6, 7, 8, 10, 14, 15, 16, 17, 18, 19, 22, 24, 25, 29, 30, 32, 33, 34, 35, 36, 38, 39, 40, 41, 44, 46, 47, 48, 49, 50, 51, 54, 56], "\uc7a5\uba74": [2, 32], "\ud569\uc131\uc774": 2, "\uac00\ub2a5\ud558\uc9c0\ub9cc": 2, "\uc2dc\uac04\uc774": [2, 19, 33, 39, 46, 52], "\ub9ce\uc774": [2, 4, 5, 6, 11, 15, 18, 19, 26, 27, 32, 33, 36, 39, 40, 41, 42, 46, 48], "\uac78\ub824": 2, "\uc2e4\uc2dc\uac04": [2, 33, 51], "\ub80c\ub354\ub9c1\uc5d0\ub294": 2, "\ud55c\uacc4\uac00": [2, 4, 9, 25, 37, 40, 41, 44, 49, 55], "\uc788\ub2e4": [2, 3, 4, 5, 6, 7, 12, 15, 18, 19, 20, 21, 23, 24, 25, 26, 27, 28, 29, 33, 34, 35, 36, 37, 40, 41, 42, 46, 49, 51], "\uc2dc\uac04\uc740": 2, "\ube60\ub974\uc9c0\ub9cc": [2, 22], "\ud488\uc9c8\uc774": [2, 4, 24, 27, 29, 33, 38, 44], "\ub5a8\uc5b4\uc9c4\ub2e4": [2, 5, 6, 19, 24, 27, 51], "\ub17c\ubb38\uc740": [2, 6, 22, 26, 30, 36, 38, 39, 44, 47, 53], "splatting\uc744": 2, "\uc81c\uc548\ud569\ub2c8\ub2e4": [2, 8, 9, 11, 22, 43, 44, 53], "\ubc29\ubc95\uc740": [2, 4, 5, 6, 7, 8, 15, 16, 18, 21, 22, 24, 26, 28, 33, 40, 44, 47, 51], "mip": [2, 17, 18, 52], "nerf360\uacfc": 2, "\uc720\uc0ac\ud55c": [2, 5, 6, 10, 13, 14, 15, 17, 22, 24, 27, 33, 36, 40, 45, 47, 51, 52, 54, 56], "\uc81c\uacf5\ud558\uba74\uc11c\ub3c4": 2, "instantngp\ub9cc\ud07c": 2, "\ube60\ub974\uac8c": [2, 4, 9, 22, 23, 46, 49, 51, 56], "\ud559\uc2b5\ud560": [2, 4, 8, 14, 20, 21, 25, 29, 30, 35, 39, 43, 44, 48, 52], "splatting\uc740": 2, "\ub2e4\uc74c": [2, 6, 7, 8, 17, 19, 29, 30, 33, 38, 40, 43, 44, 49, 51, 55], "\uc138": [2, 4, 6, 7, 8, 11, 18, 32, 33, 35, 38, 41, 44, 47, 54], "\uac00\uc9c0\ub85c": 2, "\uad6c\uc131\ub41c\ub2e4\ub2e4": 2, "structur": [2, 4, 9, 15, 18, 24, 27, 28, 38, 43, 53, 55], "from": [2, 3, 7, 11, 13, 18, 26, 27, 31, 38, 48, 54], "motion": [2, 5, 6, 19, 43], "sfm": 2, "\uc5bb\uc740": [2, 4, 26, 36, 44], "\ud76c\uc18c": 2, "\uc810": [2, 20, 33, 42], "\uad6c\ub984\uc744": 2, "\ucd08\uae30\uac12\uc73c\ub85c": 2, "\uac00\uc6b0\uc2dc\uc548\uc744": 2, "\ub3c4\uc785": [2, 5, 17, 18, 20, 23, 31, 32], "\uc704\uce58": [2, 7, 32, 36, 41], "\ubd88\ud22c\uba85\ub3c4": 2, "\uc774\ubc29\uc131": 2, "\uacf5\ubd84\uc0b0": [2, 46], "\uad6c\ud615": 2, "\uace0\uc870\ud30c": 2, "sh": 2, "\uacc4\uc218\uc758": 2, "\ucd5c\uc801\ud654": [2, 4, 8, 17, 18, 22, 32, 33, 35, 36, 40, 52], "\ud0c0\uc77c": 2, "\ub798\uc2a4\ud130\ud654\uc5d0\uc11c": 2, "\uc601\uac10\uc744": [2, 16, 25, 29, 33, 40], "\ube60\ub978": [2, 10, 22, 31, 36, 43, 51], "gpu": [2, 5, 16, 19, 20, 24, 28, 32, 33, 51, 52, 55], "\uc815\ub82c": [2, 19], "\uc54c\uace0\ub9ac\uc998\uc744": [2, 18, 33, 43, 45, 52], "\ud1b5\ud55c": [2, 3, 5, 10, 16, 24, 32, 33, 34, 40, 43, 46, 53, 54], "\ub80c\ub354\ub9c1": [2, 17, 18, 19, 32, 33, 36], "\uc774\uc804\uc758": [2, 14, 25, 26], "implicit": [2, 3, 12, 19, 32, 36, 48, 55], "\uc811\uadfc\ubc95\ub4e4\uacfc": 2, "\ub3d9\ub4f1\ud558\uac70\ub098": 2, "\ub098\uc740": [2, 4, 22, 24, 33, 36, 56], "\ud488\uc9c8\uc744": [2, 4, 11, 18, 19, 24, 27, 33, 35, 39, 40], "\uc81c\uacf5\ud558\uba70": 2, "\uac00\uc7a5": [2, 3, 4, 5, 6, 7, 8, 10, 11, 14, 15, 16, 17, 21, 22, 26, 29, 30, 31, 33, 36, 38, 39, 40, 42, 43, 44, 47, 54], "\ubc29\uc2dd\uacfc": [2, 18, 25, 43, 51], "\ud559\uc2b5": [2, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 17, 18, 19, 20, 21, 23, 24, 25, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 39, 41, 42, 43, 44, 45, 46, 47, 48, 49, 51, 53, 54, 55, 56], "\uc18d\ub3c4\uc640": [2, 22], "\ubcf4\uc5ec\uc8fc\uc5b4": 2, "\uace0\uc218\uc900\uc758": 2, "\ubdf0": [2, 17, 18, 32, 33], "\ud569\uc131\uc5d0": [2, 40, 49], "\ub300\ud574": [2, 3, 5, 6, 7, 8, 9, 10, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 25, 26, 27, 29, 31, 32, 33, 35, 39, 40, 41, 42, 43, 44, 47, 48, 50, 51, 52, 53, 54, 55, 56], "\ucc98\uc74c\uc73c\ub85c": [2, 24], "\ub80c\ub354\ub9c1\uc744": [2, 17, 33], "\uc81c\uacf5\ud55c\ub2e4": [2, 29, 51], "main": [2, 26, 41, 47], "process": [2, 4, 6, 11, 17, 18, 23, 24, 25, 30, 34, 36, 40, 42, 45, 47, 48, 52, 54, 55], "peusdo": 2, "algorithm": [2, 6, 13, 32, 42, 43], "normal": [2, 3, 7, 12, 13, 14, 17, 18, 20, 32, 36, 38, 43, 44, 48, 49, 52], "\ud45c\uba74": 2, "\ubc95\uc120": 2, "\uc5c6\ub294": [2, 5, 7, 15, 16, 17, 19, 27, 33, 35, 36, 37, 44, 46, 47, 49], "\ud3ec\uc778\ud2b8\ub4e4\uc758": 2, "sparse\ud55c": 2, "\uc14b\uc744": [2, 32, 36, 37, 40, 41], "initi": [2, 7, 14, 17, 18, 25, 28, 43, 52, 53], "point\ub85c": [2, 36], "\ud558\uc5ec": [2, 3, 6, 8, 24, 33, 36, 41, 44, 49, 52], "\uace0\ud488\uc9c8\uc758": [2, 4, 17, 24, 33, 51], "novel": [2, 26, 30, 32, 36, 47], "view": [2, 4, 18, 36, 38, 42, 47, 48, 56], "synthesis\ub97c": [2, 21], "\uac00\ub2a5\ud558\uac8c": [2, 9, 15, 18, 24, 28, 31, 32, 36, 38, 41, 51], "\ud558\ub294": [2, 3, 4, 6, 7, 8, 9, 13, 14, 15, 17, 18, 19, 21, 22, 23, 24, 25, 26, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 43, 44, 46, 47, 48, 49, 51, 52, 53, 54, 55, 56], "scene": [2, 7, 11, 18, 26, 36, 41, 50, 53], "representation\uc744": [2, 18, 21, 36], "\ucd5c\uc801\ud654\ud558\ub294": [2, 17, 28, 32, 33, 48, 52], "\uac83\uc744": [2, 3, 4, 6, 7, 8, 9, 11, 13, 14, 15, 17, 18, 19, 22, 23, 24, 26, 27, 29, 30, 31, 33, 34, 35, 36, 38, 39, 40, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55], "\ubaa9\ud45c\ub85c": [2, 4, 8, 19, 22, 33, 36, 40, 44, 47], "\ud55c\ub2e4": [2, 4, 6, 7, 15, 19, 21, 24, 25, 26, 27, 29, 30, 33, 36, 40, 41, 42, 46, 49, 51], "unstructured\ud558\uace0": 2, "explicit\ud55c": 2, "primitive\ub97c": 2, "\ud544\uc694\ub85c": [2, 15, 25, 33, 36, 40], "\ubbf8\ubd84": [2, 25, 32, 33, 43, 51], "\uac00\ub2a5\ud558\uace0": [2, 3, 10, 18, 52], "2d": [2, 4, 5, 13, 17, 18, 29, 32, 33], "splats\ub85c": 2, "\uc27d\uac8c": [2, 4, 6, 14, 18, 25, 32, 33, 35, 38, 40], "project\ub418\ub294": 2, "gaussian\uc744": 2, "\uc120\ud0dd\ud588\ub2e4": 2, "equat": [2, 10, 11, 12, 16, 18, 20, 21, 23, 34, 37, 43, 46, 51], "gaussian\uc740": 2, "\ud3ec\uc778\ud2b8": [2, 32, 33], "mean": [2, 12, 13, 15, 21, 32, 42, 47, 54], "\u03bc\ub97c": 2, "\uc911\uc2ec\uc73c\ub85c": [2, 21], "\ud558\uace0": [2, 11, 15, 26, 27, 30, 32, 35, 36, 42, 43, 52, 55], "\ud589\ub82c": [2, 4], "\u03c3\ub85c": 2, "\uc815\uc758\ud55c\ub2e4": [2, 15, 19, 24, 25, 34, 41], "2d\ub85c": 2, "project\ud574\uc57c": 2, "transform": [2, 3, 4, 7, 13, 21, 23, 24, 30, 31, 33, 35, 36, 39, 42], "w\uc5d0": 2, "\uce74\uba54\ub77c": [2, 4, 17, 18, 32, 33], "\uc88c\ud45c\uacc4\uc5d0\uc11c\uc758": 2, "\u03c3": [2, 12, 32, 33], "\ub098\ud0c0\ub0bc": [2, 6, 25, 26, 32, 44, 51], "\ucd5c\uc801\ud654\ub97c": [2, 17, 18, 32, 36], "\u03c3\ub294": [2, 12], "posit": [2, 4, 7, 13, 14, 18, 20, 21, 29, 31, 33, 37], "semi": 2, "definit": [2, 43], "\ud589\ub82c\uc774\uc5b4\uc57c": 2, "\ub54c\ubb38\uc5d0": [2, 4, 5, 9, 11, 13, 14, 16, 17, 19, 20, 22, 24, 25, 26, 28, 30, 31, 32, 33, 36, 38, 39, 43, 44, 47, 48, 51, 53, 54, 55, 56], "\ucd5c\uc801\ud654\uac00": [2, 17, 18, 32, 40], "\uc5b4\ub835\ub2e4\uace0": [2, 52], "\uc9c1\uad00\uc801\uc774\uace0": 2, "\ucd5c\uc801\ud654\uc5d0": [2, 17, 18, 32, 44], "\uc801\ud569\ud55c": [2, 4, 24, 30], "\uc120\ud0dd\ud55c\ub2e4": [2, 7], "gaussian\uc758": 2, "\ud0c0\uc6d0\uccb4\uc758": 2, "\uad6c\uc131\uc744": 2, "\uc124\uba85\ud558\ub294": [2, 38, 39, 40, 44], "\uac83\uacfc": [2, 4, 6, 9, 18, 19, 21, 36, 40, 50, 54], "\uc720\uc0ac\ud558\uba70": 2, "scale": [2, 6, 7, 10, 11, 15, 16, 21, 24, 25, 28, 30, 31, 32, 38, 46, 49, 55], "matrix": [2, 25, 28, 44], "s\uc640": 2, "rotat": [2, 30, 32], "r\uc744": [2, 28], "\uc0ac\uc6a9\ud55c\ub2e4": [2, 4, 7, 15, 19, 21, 24, 26, 29, 33, 35, 36, 51], "scaling\uc740": 2, "vector": [2, 4, 7, 11, 20, 25, 27, 31, 32, 36, 38, 40, 45, 47], "s\ub85c": [2, 49], "rotation\uc740": 2, "quaternion": 2, "q\ub85c": 2, "\ud45c\ud604\ud558\uba70": 2, "\uc774\ub4e4\uc740": [2, 53], "\uac01\uac01\uc758": [2, 3, 6, 10, 15, 20, 26, 29, 32, 37], "\ud589\ub82c\ub85c": [2, 22], "\ubcc0\ud658\ub420": 2, "\ub3d9\uc548": [2, 8, 11, 15, 19, 29, 35, 48], "auto": [2, 4, 24, 36, 56], "grad": 2, "\uc790\ub3d9": 2, "\uc624\ubc84\ud5e4\ub4dc\ub97c": 2, "\ud53c\ud558\uae30": [2, 4, 7, 17, 18, 24], "\ubaa8\ub4e0": [2, 3, 4, 5, 6, 7, 8, 10, 12, 14, 15, 17, 19, 20, 22, 24, 26, 28, 29, 31, 32, 33, 34, 35, 36, 37, 38, 40, 41, 42, 43, 44, 46, 51, 54, 56], "\ud30c\ub77c\ubbf8\ud130\uc5d0": 2, "gradient\ub97c": [2, 21, 24, 45, 46], "\uba85\uc2dc\uc801\uc73c\ub85c": [2, 3, 8, 33], "\uc720\ub3c4\ud55c\ub2e4": [2, 26], "anisotrop": 2, "covari": [2, 14], "representation\uc740": [2, 27], "\uc7a5\uba74\uc758": [2, 18, 32], "geometry\uc5d0": 2, "\uc801\uc751\ud558\ub3c4\ub85d": [2, 16], "\ucd5c\uc801\ud654\ud55c\ub2e4": 2, "\ub17c\ubb38\uc758": [2, 4, 11, 22, 24, 25, 26, 29, 32, 33, 34, 36, 38, 39, 46, 51, 56], "\ud575\uc2ec": [2, 7, 17, 19, 22, 24, 29, 33, 34, 39, 42, 46], "\uc811\uadfc\ubc95\uc740": [2, 8], "free": [2, 24, 25, 26, 33, 36, 39, 41, 43, 50, 51, 53, 55], "\uc7a5\uba74\uc744": [2, 8, 32, 33], "\uc815\ud655\ud558\uac8c": [2, 8, 13, 17, 19, 40], "\ud45c\ud604\ud558\ub294": [2, 8, 13, 27, 36, 40, 41, 47, 52], "\ubc00\uc9d1\ub41c": [2, 33], "\uc138\ud2b8\ub97c": [2, 8, 29], "\ub9cc\ub4dc\ub294": [2, 4, 8, 13, 18, 19, 21, 22, 26, 29, 32, 36, 44, 56], "\ub2e8\uacc4\ub2e4": 2, "\uc5ec\uae30\uc5d0\ub294": [2, 40], "\ud835\udc5d": [2, 45], "\ud22c\uba85\ub3c4": [2, 32], "\ud835\udefc": 2, "\u03c3\ubfd0\ub9cc": 2, "scene\uc758": 2, "depend": [2, 18, 32, 45, 47], "appearance\ub97c": 2, "\uac01": [2, 3, 4, 5, 6, 8, 13, 14, 16, 17, 18, 19, 20, 22, 24, 25, 26, 28, 29, 30, 31, 32, 33, 35, 36, 38, 39, 40, 45, 46, 47, 50, 51, 52, 53, 54, 55], "\uc0c9\uc0c1": [2, 11, 17, 18, 31, 32, 33], "c\ub97c": 2, "coefficients\uae4c\uc9c0": 2, "\ud3ec\ud568\ub41c\ub2e4": [2, 19, 40], "3d\ub97c": [2, 36], "project\ud560": 2, "\ubc1c\uc0dd\ud560": [2, 3, 4, 7, 15, 33], "\uc788\ub294": [2, 4, 5, 6, 7, 8, 9, 11, 15, 17, 18, 19, 20, 21, 22, 23, 25, 26, 27, 29, 30, 31, 32, 33, 35, 38, 39, 40, 41, 42, 43, 44, 46, 47, 50, 51, 53, 54, 55], "\ubaa8\ud638\ud568\uc744": 2, "\uacfc\uc815\uc5d0\uc11c": [2, 8, 12, 13, 15, 20, 21, 22, 26, 27, 28, 30, 32, 34, 37, 39, 40, 44, 48, 51, 52, 53], "geometry\uac00": 2, "\uc0dd\uc131\ub418\uac70\ub098": 2, "\uc0ad\uc81c\ub418\uac70\ub098": 2, "\uc774\ub3d9\ud560": [2, 40], "\uc788\uc5b4\uc57c": [2, 30, 41, 51], "\ud568": [2, 5, 6, 13, 14, 15, 16, 17, 18, 27, 28, 29, 30, 31, 36, 39, 40, 42, 45, 46, 49], "\ud30c\ub77c\ubbf8\ud130\uc758": [2, 31, 48], "\ud004\ub9ac\ud2f0\ub294": [2, 43], "\ud070": [2, 3, 4, 5, 6, 8, 11, 13, 15, 18, 19, 22, 23, 24, 25, 26, 27, 28, 29, 30, 33, 35, 36, 38, 39, 40, 41, 42, 51, 54, 55], "homogen": [2, 15], "area\ub4e4\uc744": 2, "\uc218\uc758": [2, 4, 7, 24, 27, 32, 42], "gaussian\ub4e4\ub85c": 2, "\ucea1\ucc98\ub420": 2, "\uc788\uae30": [2, 9, 17, 30, 33, 36, 43, 44, 51], "representation\uc758": [2, 36], "compactness\uc5d0": 2, "\uc911\uc694": [2, 5], "sgd\ub97c": 2, "\uc0ac\uc6a9\ud558\uace0": [2, 8, 9, 10, 15, 20, 22, 24, 28, 31, 33, 37, 39, 40, 47, 50, 51, 53], "\uc5f0\uc0b0\uc740": [2, 20], "cuda": [2, 53, 56], "\ucee4\ub110\uc744": 2, "\uc0ac\uc6a9\ud569\ub2c8\ub2e4": [2, 38, 44, 47, 52, 54, 56], "\ud2b9\ud788": [2, 3, 4, 5, 11, 17, 24, 25, 35, 36, 40, 43, 49, 51, 52, 53, 55], "rasterization\uc740": 2, "\ucd5c\uc801\ud654\uc758": [2, 17, 32], "\ud6a8\uc728\uc131\uc5d0": 2, "\uc911\uc694\ud569\ub2c8\ub2e4": 2, "\uc8fc\uc694": [2, 8, 16, 17, 29, 35], "comput": [2, 11, 12, 18, 20, 25, 27, 28, 41, 43, 44, 46, 47, 49, 50, 51, 52, 54, 55, 57], "bottleneck\uc774\uae30": 2, "\ub54c\ubb38": [2, 6, 17, 20, 32, 42, 51], "\ud835\udefc\uc5d0": 2, "\ub300\ud574\uc11c\ub294": [2, 5, 10, 15, 16, 21, 23, 26, 29, 39, 54], "sigmoid": [2, 48, 56], "function\uc744": [2, 4, 14, 18, 25, 26, 36, 45], "\uacf5\ubd84\uc0b0\uc758": 2, "scale\uc5d0": [2, 46], "exponenti": [2, 12, 14, 25, 32, 35, 43, 51], "activ": [2, 38, 53], "\ud568\uc218\ub97c": [2, 8, 20, 32], "\uc0ac\uc6a9": [2, 3, 4, 5, 6, 9, 10, 13, 14, 16, 17, 18, 19, 20, 21, 23, 25, 27, 31, 32, 33, 36, 37, 45, 46, 49, 51], "\ud589\ub82c\uc740": [2, 22], "\uc810\uae4c\uc9c0\uc758": 2, "\uac70\ub9ac\uc758": 2, "\ud3c9\uade0\uc744": [2, 6, 7, 18], "\ucd95\uc73c\ub85c": [2, 4, 35], "isotrop": 2, "gaussian\uc73c\ub85c": 2, "\ucd94\uc815": [2, 29, 32, 36, 40, 51], "position\uc5d0": 2, "\ub300\ud574\uc11c\ub9cc": [2, 4, 29, 39, 42, 44, 53], "decai": [2, 16, 24, 32, 43], "\uc2a4\ucf00\uc904\ub9c1\uc744": 2, "loss": [2, 4, 6, 9, 12, 14, 17, 18, 20, 22, 23, 26, 27, 31, 32, 34, 37, 42, 43, 45, 46, 47, 48, 51, 52, 53, 56], "function\uc740": [2, 4, 26, 51], "d": [2, 4, 6, 13, 14, 15, 17, 19, 20, 24, 25, 27, 28, 29, 32, 33, 36, 38, 43, 48, 51, 53, 54], "ssim\uacfc": 2, "l1": [2, 42, 43, 44], "\uc0ac\uc6a9\ud558\uba70": [2, 7, 15, 33, 53], "ssim": [2, 5, 30], "loss\ub294": [2, 9, 13, 18, 20, 36], "\uc65c\uace1": [2, 5], "distort": [2, 5, 15, 27, 32, 40], "\ubc18\uc601\ud558\uace0": 2, "\uac00\ub2a5\ud558\uc5ec": 2, "metric\ubfd0\ub9cc": 2, "loss\ub85c\ub3c4": 2, "\uac00\ub2a5": [2, 5, 6, 10, 13, 16, 17, 18, 20, 21, 25, 27, 31, 32, 46, 54, 55], "\ucc38\uace0\ub85c": [2, 54], "ssim\uc740": 2, "\ubc1d\uae30": [2, 32], "\ub300\uc870": 2, "\uad6c\uc870\ub97c": [2, 4, 5, 7, 9, 14, 17, 19, 24, 27, 30, 32, 33, 36, 38, 44, 45, 47, 49, 55], "\uace0\ub824\ud558\uc5ec": [2, 4, 25, 32, 36, 40], "\ub450": [2, 3, 4, 5, 6, 7, 8, 9, 11, 13, 17, 18, 19, 20, 22, 23, 24, 25, 26, 29, 30, 31, 32, 33, 35, 36, 38, 39, 41, 42, 43, 44, 47, 51, 52, 53, 54], "\uac04\uc758": [2, 6, 8, 15, 18, 19, 21, 26, 29, 30, 32, 33, 37, 43, 44, 53], "\uc720\uc0ac\uc131\uc744": [2, 8, 17, 40], "\uce21\uc815\ud558\ub294": [2, 3, 8, 11, 15, 25, 42, 47, 51], "\uba54\ud2b8\ub9ad\uc774\ub2e4": 2, "\ucd08\uae30": [2, 5, 17, 18, 22, 24, 26, 33, 36, 43], "\uc810\ub4e4\uc744": [2, 32], "\uc2dc\uc791\uc73c\ub85c": [2, 5], "scene\uc744": [2, 32, 36], "\ud45c\ud604\ud558\uae30": [2, 36, 55], "unit": [2, 20, 32], "volum": [2, 17, 52], "\ub0b4": [2, 5, 11, 24, 26, 33], "gaussian\ub4e4\uc758": 2, "\uc218\uc640": [2, 45], "\ubc00\ub3c4\ub97c": 2, "\uc810\uc9c4\uc801\uc73c\ub85c": [2, 8, 33, 43, 51], "\ubc29\uc2dd\uc744": [2, 4, 10, 15, 17, 18, 19, 21, 29, 30, 33, 34, 36, 43, 52, 53, 55], "\uc81c\uc548\ud55c\ub2e4": [2, 3, 4, 5, 15, 24, 25, 26, 29, 35, 37, 40, 46, 51], "\ub9e4": [2, 6, 13, 18, 52, 54, 55], "100\ubc88\uc758": 2, "\ubc18\ubcf5": [2, 17, 22, 26, 33, 54], "iter": [2, 18, 22, 32, 33, 43, 52, 54], "\ub9c8\ub2e4": [2, 4, 6, 20, 24, 30, 31, 36, 42, 44, 52, 53, 54, 55], "\ucd94\uac00\ud558\uace0": [2, 11, 13, 20, 22, 29, 31], "\ud835\udefc\uac00": 2, "\uac12\ubcf4\ub2e4": 2, "\uc791\uc740": [2, 4, 6, 7, 10, 13, 14, 15, 18, 22, 24, 25, 28, 29, 33, 35, 39, 40, 42, 43, 44, 51], "\uc81c\uac70": [2, 13, 14, 22, 31, 35], "gaussians\ub294": 2, "\ube48": [2, 18, 30], "\uacf5\uac04\uc744": [2, 18, 19, 22], "\ucc44\uc6b0\uace0": 2, "miss": [2, 23], "geometr": 2, "feature\uc774": 2, "region\uacfc": 2, "\ud558\ub098\uc758": [2, 3, 6, 10, 13, 19, 20, 24, 28, 29, 31, 32, 33, 37, 38, 40, 44, 51, 52, 53, 56], "gaussian\uc774": 2, "\ub108\ubb34": [2, 6, 15, 18, 21, 23, 25, 27, 29, 30, 35, 37, 45, 51], "\ub113\uc740": [2, 7, 17], "region\uc744": 2, "\ucee4\ubc84\ud558\ub294": [2, 26], "region\uc5d0": 2, "\uc9d1\uc911\ud569\ub2c8\ub2e4": 2, "\uc774\ub7ec\ud55c": [2, 3, 4, 6, 8, 10, 11, 15, 17, 19, 20, 22, 23, 26, 29, 31, 32, 33, 35, 36, 37, 39, 40, 41, 43, 44, 47, 52, 53, 54, 56], "\uc9c0\uc5ed\uc740": 2, "\ubaa8\ub450": [2, 4, 5, 7, 10, 11, 12, 15, 19, 20, 22, 23, 24, 26, 29, 31, 33, 34, 35, 36, 37, 38, 39, 40, 43, 44, 46, 47, 50, 51, 52, 53, 55], "space": [2, 4, 6, 8, 11, 19, 20, 22, 24, 27, 29, 31, 35, 36, 40, 41, 43, 45, 47, 52, 53, 54, 55, 56], "\uac00\uc9c0\uba70": [2, 21, 25, 51, 55], "\uc774\ub3d9\uc2dc\ud0b5\ub2c8\ub2e4": 2, "under": 2, "reconstruct": [2, 13, 17, 32, 35, 36, 40, 47, 53, 54, 56], "\ud06c\uae30\uc758": [2, 4, 28, 35], "\ubcf5\uc81c\ud558\uace0": 2, "\uae30\uc6b8\uae30": [2, 25, 34], "\ubc29\ud5a5\uc73c\ub85c": [2, 13, 15, 18, 24, 34, 36, 37, 48, 54, 55], "\uc774\ub3d9": [2, 32], "over": [2, 3, 6, 17, 26, 28, 44, 49], "\uac1c\ub85c": 2, "\ub098\ub204\uace0": [2, 30, 33, 45], "\uc704\uce58\ub294": 2, "pdf\ub97c": 2, "\uc0d8\ud50c\ub9c1\ud574\uc11c": [2, 47], "\ucd08\uae30\ud654": [2, 4, 5, 7, 14, 16, 18, 19, 29, 36], "\uc785\ub825": [2, 5, 7, 8, 14, 16, 17, 18, 19, 24, 29, 31, 32, 33, 36, 39, 40, 44, 47, 50, 53, 56], "\uce74\uba54\ub77c\uc5d0": 2, "density\uc758": 2, "\ubd80\uc801\uc808\ud55c": 2, "\uc99d\uac00\ub97c": [2, 5], "\ubc29\uc9c0\ud558\uae30": [2, 18, 19, 43, 44, 52], "3000\ubc88\uc758": 2, "\ubc18\ubcf5\ub9c8\ub2e4": 2, "\u03b1\ub97c": 2, "0\uc5d0": [2, 7, 15], "\uac00\uae5d\uac8c": [2, 8, 17, 20, 44], "\uc124\uc815\ud55c\ub2e4": [2, 15, 21], "\ubcf8": [2, 4, 5, 6, 8, 11, 13, 15, 17, 18, 19, 22, 24, 25, 26, 28, 29, 30, 32, 33, 36, 39, 42, 45, 46, 49, 51], "\ubaa9\ud45c\ub294": [2, 22, 40], "\ub300\ub7b5\uc801\uc778": [2, 33], "\u03b1": 2, "blending\uc744": 2, "\uc774\uc804": [2, 13, 14, 15, 19, 20, 21, 28, 33, 38, 48], "\uc5f0\uad6c\uc5d0": [2, 5], "\uc874\uc7ac\ud558\ub358": [2, 24], "splat\ub4e4\uc758": 2, "\uc218\uc5d0": 2, "\uac15\ud55c": [2, 22, 30, 37], "\uc81c\uc57d\uc744": [2, 8, 44], "\ud53c\ud558\ub294": 2, "\ub80c\ub354\ub9c1\uacfc": [2, 32], "sorting\uc744": 2, "\uac83\uc774\ub2e4": [2, 4, 7, 15, 21, 24, 25, 26, 29, 34, 36, 42, 51], "\ubaa9\ud45c\ub97c": [2, 7, 19], "\ub2ec\uc131\ud558\uae30": [2, 19, 24], "tile": 2, "base": [2, 3, 4, 15, 16, 18, 20, 21, 23, 24, 25, 26, 27, 29, 30, 32, 33, 35, 36, 37, 38, 40, 42, 46, 48, 49, 50, 51, 52, 53, 55, 57], "splats\ub97c": 2, "\ud750\ub984\uc73c\ub85c": 2, "\uc9c4\ud589\ub41c\ub2e4": [2, 27, 41, 42], "screen\uc744": 2, "16": [2, 5, 6, 7, 13, 16, 17, 19, 20, 24, 27, 28, 29, 35, 36, 38, 42, 44, 45, 46, 47, 51, 55], "16\uc73c\ub85c": 2, "frustum\uacfc": 2, "\ud0c0\uc77c\uc5d0": 2, "gaussian\ub4e4\uc744": 2, "cull\ud568": 2, "frustum\uc678\uc5d0": 2, "\uac83\ub4e4\uc744": 2, "\uc81c\uac70\ud558\ub294": 2, "\uc808\ucc28": 2, "\ub098\ub208": 2, "\uc774\uc720\ub294": [2, 9, 13, 20, 35], "\ud0c0\uc77c\ub9c8\ub2e4": 2, "thread\uc5d0\uc11c": 2, "\uacc4\uc0b0\ud574\uc11c": [2, 19], "\uacc4\uc0b0\ud558\uae30": [2, 52, 56], "\uc704\ud568": [2, 13, 30, 44], "guard": 2, "band\ub97c": 2, "\uc774\uc6a9\ud574": [2, 4, 12, 15, 19, 21, 24, 30, 33, 34, 39, 40, 56], "\uadf9\ub2e8\uc801\uc778": 2, "\uac00\uae5d\uac70\ub098": 2, "\uba3c": [2, 33, 44], "gaussian\ub4e4": 2, "\uc774\ub4e4\uc758": [2, 53], "covariance\ub97c": 2, "\uacc4\uc0b0\ud558\ub294": [2, 4, 13, 25, 32, 33, 48, 52, 56], "\uac83\uc740": [2, 4, 5, 8, 11, 13, 14, 15, 17, 19, 24, 26, 29, 31, 32, 33, 36, 39, 40, 42, 44, 48, 52, 53, 55], "\ubd88\uc548\uc815\uc801": 2, "\ub4a4": [2, 24, 27, 38, 44], "\uc18d\ud55c": 2, "tile\uacfc": 2, "depth\uc5d0": 2, "key\ub97c": [2, 10], "\ubd80\uc5ec": [2, 6, 17], "radix": 2, "sort\ub97c": 2, "front": [2, 18], "back\uc73c\ub85c": 2, "color\uc640": 2, "\u03b1\uac12\uc744": 2, "accumulate\ud574\uc11c": 2, "\ud53d\uc140": [2, 6, 14, 17, 19, 20, 31, 32, 35, 43, 49], "\uac12\uc744": [2, 3, 6, 10, 12, 14, 15, 17, 18, 20, 23, 25, 29, 31, 32, 34, 37, 43, 45, 46, 49, 51, 52], "\uad6c\ud568": [2, 4, 6], "\ub370\uc774\ud130\uc14b\uc5d0": [2, 4, 6, 19, 25, 39, 43, 51, 54, 56], "\uacb0\uacfc\ub294": [2, 8, 9, 15, 17, 19, 21, 23, 24, 26, 33, 39, 44], "\ub2e4\ub974\uc9c0\ub9cc": 2, "sota\uc774\uc0c1\uc758": 2, "\ud004\ub9ac\ud2f0\ub97c": [2, 3, 4, 11, 31, 51, 52], "\ub0b4\uba74\uc11c": 2, "time\uacfc": [2, 3], "fps\ub97c": 2, "\ubcf4\uc778\ub2e4": [2, 3, 15, 19, 21, 23, 24, 26, 29, 36, 37], "experi": [2, 6, 57], "tabl": [2, 5, 6, 10, 18, 22, 23, 28, 31, 35, 37, 39, 41, 42, 44, 46, 53, 55], "ablat": [2, 6, 10, 18, 22, 26, 37, 46, 52, 54], "\ubc29\uc2dd\ub4e4\uacfc": [2, 34], "\uc720\uc0ac\ud558\uac8c": [2, 4, 8, 17, 19, 27, 31, 35, 39, 43, 50, 55], "\uad00\uce21\ub418\uc9c0": 2, "\uc54a\uc740": [2, 3, 4, 5, 8, 10, 11, 15, 19, 22, 23, 26, 29, 35, 38, 39, 42, 43, 44, 52, 56], "\uc7a5\uba74\uc740": 2, "artifact\ub4e4\uc774": 2, "\uc874\uc7ac": [2, 5, 16, 17, 32], "\ub298\uc5b4\uc9c0\uace0": 2, "\uc5bc\ub8e9\uc9c4": 2, "artifact\ub97c": 2, "\uc788\uc74c": [2, 3, 4, 6, 10, 13, 14, 17, 18, 20, 23, 27, 28, 29, 30, 31, 32, 39, 40, 45, 46, 49, 51], "\ucd5c\uc801\ud654\uc5d0\uc11c": 2, "\uac70\ub300\ud55c": [2, 3, 24], "\ub9cc\ub4e4\uc5b4\uc9c0\uba74": 2, "pop": [2, 13], "artifact": [2, 4, 17, 32], "\uac00\ub054": [2, 5, 51], "\ubc1c\uc0dd": [2, 5, 17, 27, 28, 36, 41, 47], "regularization\uc744": 2, "\uc801\uc6a9\ud558\uc9c0": 2, "\uc54a\uc74c": [2, 3, 5, 10, 13, 14, 17, 18, 29, 30, 32, 36, 40, 49, 51], "\uae30\ubc95\ub4e4\ubcf4\ub2e4": 2, "memori": [2, 7, 28, 45, 49, 52], "consumption\uc774": 2, "\uc0c1\ub2f9\ud788": [2, 4, 17, 21, 22, 28, 32, 36, 40], "\ub192\uc74c": [2, 3, 39, 49], "pdf": [3, 7, 8, 11, 22, 25, 26, 30, 31, 32, 34, 40, 42, 43, 45, 50, 51, 52, 53, 57], "2206": 3, "10935": 3, "gan": [3, 6, 10, 12, 18, 19, 21, 27, 31, 33, 34, 36, 37, 43, 44, 45, 53, 55], "diffusion\ub4f1": [3, 20], "\ubaa8\ub378\uc758": [3, 4, 5, 6, 7, 9, 10, 11, 14, 17, 18, 19, 20, 21, 22, 23, 24, 25, 27, 28, 29, 31, 33, 35, 36, 37, 38, 39, 40, 43, 46, 51, 52, 53, 54, 55], "\ubc1c\uc804\uc774": 3, "\uc774\uc5b4\uc9c0\uace0\uc788\ub2e4": 3, "\ub2e4\ub9cc": [3, 18, 39, 42, 51], "\uc0dd\uc131\ubaa8\ub378\uc744": [3, 26], "\ud3c9\uac00\ud558\ub294": [3, 11, 33, 39], "\ucc99\ub3c4": 3, "\uc120\uc815\uc740": 3, "\uc5b4\ub824\uc6b4": [3, 10, 17, 18, 27, 33, 40, 53], "\ubb38\uc81c\ub85c": [3, 54], "\ub0a8\uc544\uc788\ub2e4": 3, "\uadf8\ub098\ub9c8": 3, "\ub098": [3, 4, 8, 25, 30, 35, 36, 43, 47, 49, 51, 56], "score\ub97c": [3, 4, 15, 21, 39], "\ud3c9\uac00\ud558\uace0\uc788\uc9c0\ub9cc": 3, "metric\ub4e4\ub3c4": 3, "\uc644\uc804\ud558\uc9c0": 3, "\ub17c\ubb38\uc744": [3, 21, 22, 39, 46], "\ud3c9\uac00\uc758": [3, 18, 33], "\uc9c0\ud45c\uc5d0": [3, 4, 39], "\ud55c\ubc88\ub354": 3, "\uace0\ucc30\ud558\uace0": 3, "\ud604\uc874\ud558\ub294": 3, "metric\uc5d0": 3, "\ubc29\ud5a5\uc744": [3, 18, 32], "\uc81c\uc2dc": [3, 6, 18, 32, 40, 42, 46], "\ucd5c\uadfc": [3, 4, 5, 8, 10, 17, 19, 20, 23, 24, 33, 34, 37], "\ub4f1": [3, 4, 10, 12, 17, 19, 20, 21, 22, 24, 25, 27, 28, 30, 31, 37, 40, 47, 51], "model\ub4e4\uc774": [3, 10, 12], "\ub6f0\uc5b4\ub09c": [3, 6, 10, 12, 23, 26, 29, 39, 45], "\ubcf4\uc5ec\uc90c": [3, 5, 18, 27, 32, 40, 49], "\ud558\uc9c0\ub9cc": [3, 4, 6, 8, 9, 10, 11, 12, 14, 15, 18, 19, 20, 21, 22, 23, 24, 26, 28, 29, 30, 32, 33, 34, 36, 37, 39, 40, 42, 43, 44, 46, 47, 48, 53, 56], "task": [3, 6, 11, 24, 26, 28, 30, 32, 41, 43, 44, 45, 48, 49, 53, 54], "classif": [3, 21, 42, 46, 48, 51], "segment": [3, 44, 55], "\uc640\ub294": [3, 25, 30, 43], "\ub2e4\ub974\uac8c": [3, 12, 15, 22, 25, 35, 39, 47, 53], "metric\uc744": [3, 4], "\uc815\ud558\ub294\uac83\uc740": 3, "challeng": [3, 30], "p": [3, 6, 8, 9, 11, 14, 15, 17, 18, 19, 23, 29, 32, 33, 36, 46, 47, 53, 54, 56], "r": [3, 4, 5, 7, 13, 14, 17, 18, 19, 20, 23, 25, 27, 28, 32, 33, 36, 37, 41, 50, 52, 53], "f1": 3, "iou": [3, 44], "intersect": [3, 44], "union": [3, 44], "featu": 3, "map\uc774\ub098": [3, 19], "classfier": [3, 19], "\uc0ac\uc6a9\ud558\ub294": [3, 4, 5, 7, 9, 11, 13, 14, 19, 20, 24, 25, 27, 32, 33, 36, 38, 46, 47, 48, 49, 50, 52], "score\uac00": [3, 36, 51], "\ucd94\uc138": [3, 10], "\uc704": [3, 5, 6, 8, 9, 11, 13, 15, 17, 20, 22, 23, 25, 28, 29, 37, 39, 40, 42, 43, 52, 54], "metric\uc758": 3, "\ub2e8\uc810": [3, 33, 34], "real": [3, 4, 10, 15, 20, 24, 29, 32, 34, 37, 41, 48], "\ubd84\ud3ec\uc758": [3, 13, 39, 48], "space\uc5d0\uc11c": [3, 4, 5, 8, 15, 19, 24, 26, 27, 36, 40], "\uc218\uce58\uac00": [3, 23], "\uc815\ub9d0": [3, 11, 20], "\uc720\uc758\ubbf8\ud55c": 3, "\uc5f0\uad00\uc774": [3, 38], "\uc788\ub294\uc9c0": [3, 11, 24, 32], "\uc99d\uba85\ub418\uc9c0": [3, 23], "pretrain": [3, 4, 5, 6, 8, 9, 10, 15, 16, 18, 20, 21, 28, 29, 31, 34, 35, 36, 37, 39, 40, 42, 45, 53], "set\uc774": [3, 40], "specif": [3, 4, 10, 19, 24, 26, 36, 38, 47], "feature\uc5d0": [3, 5, 15, 24, 31], "\uc5bc\ub9c8\ub098": [3, 10, 11, 13, 15, 17, 20, 26, 30, 32, 33, 34, 36, 39, 40, 46, 54], "\ubbf8\uce58\ub294\uc9c0": 3, "\uc54c\uc218": [3, 36], "\uc5c6\uc74c": [3, 6, 17, 18, 28], "net": [3, 5, 6, 7, 13, 14, 15, 16, 18, 27, 30, 42, 47, 49, 53, 54], "imagenet": [3, 14, 21, 23, 27, 35, 40, 42, 43, 51, 54], "ddpm": [3, 5, 6, 14, 21, 39, 49, 51], "face": [3, 4, 6, 11, 16, 17, 22, 24, 30, 32, 38, 48, 56], "human": [3, 10, 11, 16, 21, 36, 38, 40, 41, 50], "study\uc758": [3, 26], "\uc9c1\uad00\uc801\uc778": [3, 17, 24, 40], "\ubc29\uc2dd\ub3c4": [3, 21, 24, 48, 52, 53], "\uc788\uc9c0\ub9cc": [3, 4, 12, 14, 17, 19, 20, 21, 24, 25, 33, 36, 44, 45, 46, 52, 53, 55], "cost\ub97c": [3, 27, 28, 51], "\ub9e4\uc6b0": [3, 4, 6, 10, 11, 12, 13, 15, 17, 19, 20, 24, 25, 28, 33, 40, 42, 43, 44, 49, 51], "\ud544\uc694\ub85c\ud55c\ub2e4\ub294": 3, "\uc810\uacfc": 3, "diversity\ub294": 3, "\uce21\uc815\ud558\uae30": [3, 11], "\uc5b4\ub835\ub2e4\ub294": 3, "e": [3, 4, 6, 7, 8, 10, 12, 13, 14, 15, 17, 18, 19, 23, 24, 25, 26, 27, 31, 34, 37, 38, 43, 47, 48, 49, 51, 53, 54, 55, 56], "g": [3, 4, 10, 12, 18, 20, 21, 23, 24, 26, 32, 33, 37, 38, 44, 47, 48, 51, 52, 53, 54, 55], "\uc774\ubbf8\uc9c0\ub9cc": [3, 4, 7, 24, 46], "\uc0dd\uc131\ud574\ub3c4": 3, "\ubc1b\uc744": [3, 49], "gpt": [3, 26, 28], "high": [3, 4, 5, 9, 11, 15, 18, 22, 23, 25, 27, 30, 31, 32, 34, 35, 38, 39, 43, 45, 48, 49, 51, 53, 55], "quality\uc758": [3, 12, 21, 23, 26, 34, 37, 51], "new": [3, 10, 12, 22, 24, 38, 40], "dataset\uc744": [3, 4, 26, 36, 37, 49, 51], "\uc704\uc758": [3, 4, 6, 9, 11, 12, 15, 17, 19, 21, 23, 24, 25, 26, 27, 31, 32, 36, 39, 42, 43, 44], "\ud559\uc2b5\ud558\uace0": [3, 4, 13, 27, 30, 39, 45, 48, 52, 53], "is\ub4f1": 3, "\uce21\uc815": [3, 8, 11, 14, 18, 40, 53], "\uc2e4\uc81c": [3, 6, 8, 11, 13, 15, 19, 22, 26, 27, 28, 29, 31, 32, 33, 34, 39, 40, 44, 47, 48, 51, 53], "revers": [3, 4, 6, 12, 17, 18, 21, 23, 25, 34, 42, 51, 54, 55], "\uac12\uacfc": [3, 12, 14, 20], "\ube44\uad50\ud574\uc11c": [3, 38, 45], "\uc720\ud6a8\uc131\uc744": 3, "\uac80\uc99d": [3, 6, 51], "v3\uacfc": 3, "clip": [3, 4, 5, 7, 8, 10, 11, 16, 17, 18, 20, 22, 24, 29, 30, 31, 33, 35, 36, 40, 41, 42, 45, 47, 49, 50, 51, 53, 55], "\ube44\uad50\ub97c": [3, 19, 21, 24, 26, 36], "v3": 3, "\uc801\ud569\uc131\uc744": 3, "\ud655\ub960\ubd84\ud3ec\uc758": 3, "\uc720\uc0ac\ub3c4\ub97c": [3, 4, 8, 21, 33, 43], "\uc9c0\ud45c": [3, 19, 33, 36, 39, 46], "q": [3, 4, 6, 7, 14, 15, 18, 21, 24, 27, 33, 36, 45, 46, 54], "sum_": [3, 13, 15, 20, 21, 25, 26, 28, 32, 56], "x": [3, 4, 5, 6, 7, 8, 9, 11, 12, 14, 15, 17, 18, 19, 20, 21, 22, 25, 27, 28, 29, 30, 31, 32, 33, 36, 38, 41, 42, 43, 44, 45, 46, 47, 48, 49, 51, 52, 53, 54, 55, 56], "log": [3, 7, 13, 18, 25, 28, 39, 43, 45, 46, 48, 51, 54, 56], "left": [3, 7, 8, 13, 17, 22, 26, 27, 28, 32, 35, 46, 49, 56], "frac": [3, 9, 13, 15, 17, 24, 25, 27, 32, 36, 38, 43, 46, 48, 51, 54, 56], "right": [3, 7, 8, 13, 17, 22, 26, 27, 28, 32, 46, 49, 56], "\ud2b9\uc9d5": [3, 8, 22, 34], "lower": [3, 13, 34, 35, 36, 45, 49, 56], "better": [3, 34, 49], "symmetr": 3, "rkl": 3, "\ub300\ubd80\ubd84": [3, 10, 23, 24, 29, 37, 39], "p\uac00": 3, "true": [3, 11, 12, 13, 28, 29, 48, 53, 55], "distribut": [3, 4, 6, 7, 11, 12, 13, 18, 20, 36, 42, 43, 44, 45, 46, 55, 56], "q\uac00": 3, "estim": [3, 5, 13, 43, 48, 56], "fidelity\uc640": [3, 19, 22], "diversity\ub97c": [3, 18, 22, 24, 37], "fidel": [3, 6, 16, 18, 22, 24, 31, 46, 47, 49], "label\uc758": 3, "\uc608\uce21\ud558\ub294\uc9c0": [3, 14], "divers": [3, 6, 7, 11, 18, 22, 23, 24, 30, 31, 36, 46, 47, 53], "\uace0\ub974\uac8c": [3, 33], "\uc0dd\uc131\ud574\ub0b4\ub294\uc9c0": 3, "exp": [3, 13, 32, 36, 54], "mathbb": [3, 6, 13, 17, 18, 19, 25, 27, 28, 32, 43, 47, 48, 51, 52, 53, 55, 56], "_x": 3, "d_": [3, 6, 13, 20, 22, 28, 48, 56], "y": [3, 4, 6, 8, 9, 11, 12, 13, 15, 18, 25, 28, 29, 32, 33, 36, 38, 44, 45, 46, 53, 55], "higher": [3, 36, 49, 52], "\uc774\ubbf8\uc9c0\uc640": [3, 4, 7, 8, 11, 12, 15, 17, 19, 21, 22, 31, 32, 33, 36, 37, 40, 47, 50, 56], "featur": [3, 4, 5, 6, 9, 10, 11, 13, 15, 16, 17, 19, 20, 24, 30, 31, 32, 35, 37, 38, 42, 44, 47, 53, 55], "vector\ub97c": [3, 25, 31, 36, 40], "\ucd94\ucd9c": [3, 16, 19, 31, 32, 55], "\ud3c9\uade0\uacfc": [3, 6, 39, 49], "\uacf5\ubd84\uc0b0\uc744": 3, "\uacc4\uc0b0": [3, 12, 13, 16, 18, 19, 27, 29, 32, 33, 40, 42], "frechet": [3, 45], "\ud3c9\uac00\uc9c0\ud45c": 3, "lvert": [3, 18], "mu_x": 3, "mu_g": 3, "rvert": [3, 18], "tr": 3, "sigma_x": 3, "sigma_g": 3, "v3\uc758": 3, "pool": [3, 35], "layer\uc758": [3, 20, 24, 32], "map\uc744": [3, 4, 15, 20, 26, 27, 37], "quality\ub97c": [3, 4, 15, 18, 23, 25, 27, 40], "fid\uc5d0\uc11c": [3, 13], "distance\ub97c": [3, 15], "\ub300\uc2e0": [3, 10, 13, 14, 15, 18, 19, 22, 23, 33, 35, 36, 37, 40, 45], "trick\uc744": [3, 37], "\uc0ac\uc6a9\ud574": [3, 4, 11, 15, 20, 24, 28, 31, 33, 35, 37, 39, 40, 41, 49, 51], "\ud655\ub960": [3, 6, 13, 32, 33, 34, 39, 48, 56], "\ub370\uc774\ud130\uc14b\uc758": [3, 7, 11, 17, 21, 29, 32, 33, 39, 42, 50], "\ud3c9\uac00\uc5d0": [3, 4, 17, 21], "\ud6a8\uacfc\uc801\uc784": 3, "metric\ubcf4\ub2e4": 3, "\uc18d\ub3c4\uac00": [3, 31, 33, 34, 43, 54], "\uc624\ub798\uac78\ub9bc": 3, "n": [3, 4, 6, 11, 13, 15, 17, 18, 19, 21, 24, 25, 27, 28, 30, 32, 36, 43, 45, 51, 53, 54, 55, 56], "kid": [3, 10, 34], "\ub17c\ubb38": [3, 11, 13, 28, 30, 31, 32, 39, 44, 45, 47], "fid\uc640": [3, 20, 21, 23, 29, 39, 46], "bias\uac00": [3, 9, 38], "\uc788\uc74c\uc744": [3, 4, 8, 17, 18, 22, 23, 25, 26, 33, 39, 40, 53], "\uc99d\uba85\ud558\uace0": 3, "dataset\uc758": [3, 4, 16, 51], "sampl": [3, 4, 5, 6, 11, 15, 20, 24, 25, 30, 31, 36, 37, 41, 42, 43, 46, 47, 48, 49, 51, 52, 55, 56], "\ubcc0\uacbd": [3, 5, 6, 8, 44], "gaussian": [3, 5, 6, 13, 18, 20, 24, 28, 32, 33, 36, 38, 43, 46, 47, 51, 53, 55, 56], "sobol": 3, "sequenc": [3, 4, 5, 7, 11, 16, 28, 36], "unbias": [3, 11, 43], "\uc81c\uc548": [3, 5, 10, 12, 14, 16, 17, 18, 23, 25, 30, 32, 33, 39, 40, 41, 45], "v3\uc5d0": 3, "\ud1b5\uacfc\ud558\uae30\uc704\ud574": 3, "resiz": [3, 5, 10, 16, 24, 30, 41], "\uacfc\uc815\uc774": [3, 8, 13, 52], "\ud3ec\ud568\ub418\ub294\ub370": 3, "score\uac12\uc5d0": 3, "\uc904\uc218": 3, "\uc788\uc5b4": [3, 5, 15, 21, 25, 33, 44, 48, 51, 52], "best": [3, 15, 23, 37, 45, 46], "percformance\uc758": 3, "one": [3, 6, 11, 37, 43, 44, 51, 53, 54], "process\ub97c": [3, 4, 12, 13, 15, 24, 25, 46], "imagenet\uc758": 3, "\ub370\uc774\ud130\ub97c": [3, 4, 8, 10, 11, 13, 18, 19, 21, 23, 26, 29, 32, 33, 35, 37, 39, 41, 43, 44, 48, 51, 56], "imagegpt\ub97c": 3, "\uc7ac\uc0dd\uc131": 3, "k": [3, 4, 10, 13, 17, 20, 24, 25, 27, 28, 30, 33, 34, 45, 48, 51, 55], "notimagenet": 3, "imagegpt": 3, "vision": [3, 11, 20, 24, 34, 36, 39, 44, 50, 57], "\ubd84\uc57c\uc5d0": [3, 17, 43], "label": [3, 6, 13, 14, 36, 44, 46, 49], "dataset\uc774": [3, 34], "\ud544\uc694\uc5c6\ub294": 3, "\uc790\uae30\uc9c0\ub3c4": 3, "\ubc29\uc2dd": [3, 4, 10, 14, 17, 19, 24, 25, 30, 31, 33, 43, 51], "challenge\uc5d0\uc11c\ub3c4": 3, "\uc0c1\ub2f9\ud55c": [3, 7, 14, 26], "\ubcf4\uc784": [3, 5, 17, 18, 23, 27, 45], "\uc0dd\uc131\ubaa8\ub378\uc5d0": [3, 4], "\ud1b5\uacfc\ud55c": [3, 4, 14], "p_": [3, 6, 8, 13, 15, 18, 19, 21, 25, 28, 43, 44, 45, 46, 48, 51, 56], "hat": [3, 17, 18, 20, 22, 24, 25, 26, 27, 29, 32, 36, 43, 44, 47, 51, 54], "\ubd84\ud3ec\ub97c": [3, 13, 17, 19, 20, 27, 29, 35, 39, 44, 48, 56], "\ube44\uad50": [3, 5, 6, 11, 12, 17, 18, 19, 20, 23, 24, 29, 36, 39, 42, 45, 46, 49], "\ud55c\uacc4": [3, 5, 6, 18, 25, 32, 33, 40, 51], "explicit": [3, 18, 32, 48], "model\uc5d0\ub9cc": 3, "\uc801\uc6a9": [3, 6, 15, 18, 19, 20, 23, 30, 33, 34, 35, 45, 51, 53, 55], "model\uc5d0\ub294": 3, "\uc801\uc6a9\ud560": [3, 4, 14, 18, 25, 40, 43, 53], "\uc0dd\uc131\ub418\ub294": [3, 8, 9, 34, 37, 38, 39, 46, 53, 55], "\ub370\uc774\ud130\uc758": [3, 4, 27, 29, 39, 44, 48], "\ubaa8\ub378\ub9c1\ud558\uc5ec": [3, 45, 46], "\uc8fc\ub85c": [3, 4, 19, 21, 22, 24, 25, 33, 36, 51], "noise\ub85c\ubd80\ud130": [3, 34], "\uacfc\uc815\uc5d0": [3, 8, 37], "\uc8fc\uc5b4\uc9c4": [3, 4, 8, 11, 15, 19, 21, 22, 26, 29, 31, 32, 36, 40, 42, 43, 45, 52], "\ubd84\ud3ec\ub85c\ubd80\ud130": 3, "\uc0d8\ud50c\ub9c1\ud558\uc5ec": 3, "volatil": 3, "rkl\uc740": 3, "\uc591\uc758": [3, 6, 9, 17, 19, 24], "epoch": [3, 19, 44, 47, 48, 55], "\ud6c4\uc5d0": [3, 5, 10, 20, 26, 41, 46], "\ubc14\ub85c": [3, 4, 10, 11, 13, 25, 31, 35, 36, 38, 42, 52], "\uc218\ub834\ud558\ub294": [3, 54], "\ubc29\uba74": 3, "is\ub294": 3, "\ubcc0\ub3d9\uc131\uc744": [3, 39], "capacity\uac00": 3, "\uc99d\uac00\ud560\uc218\ub85d": [3, 17, 45, 54], "kl\uacfc": 3, "rkl\uc758": 3, "\uac1c\uc120\ub418\ub294": [3, 26, 50], "\ud655\uc778": [3, 14, 15, 17, 23, 37, 42, 45, 49], "fid\ub098": 3, "is\uac00": [3, 13, 39], "\uadf8\ub798\ud504\uc640": [3, 33], "\ud615\ud0dc\ub97c": [3, 13, 33], "\ub744\ub294\uac83\uc744": 3, "neg": [3, 13, 15, 18, 20, 21, 31, 37, 53], "kl\uacfc\ub294": 3, "colleration\uc744": 3, "\ubcf4\uc774\uc9c0\ub9cc": [3, 15, 20, 37], "rkl\uacfc\ub294": 3, "\ub192\uc9c0": 3, "capacity\uc5d0": 3, "\uc218\uce58": [3, 23], "\ubcc0\ud654\ub294": [3, 35], "\ud06c\uc9c0": [3, 20, 36], "\ub370": [3, 8, 17, 18, 19, 21, 22, 26, 32, 33, 39], "\ubc18\ud574": [3, 23, 25, 34, 51], "\uad49\uc7a5\ud788": [3, 20, 30], "\uc218\uce58\uc758": 3, "\ubcc0\ud654\ub97c": [3, 4, 15, 17, 51], "\ubcf4\uc5ec\uc900\ub2e4": [3, 4, 7, 12, 15, 20, 21, 25, 26, 35, 36, 37, 40, 49, 51], "rank": [3, 7, 53], "coller": 3, "\ubaa8\ub378\uc5d0": [3, 4, 5, 7, 8, 10, 11, 13, 14, 17, 20, 21, 23, 29, 31, 33, 35, 37, 39, 40, 42, 47, 50, 51, 53, 54, 55], "\ubcc4\ub85c": [3, 4, 17, 40], "\uc21c\uc704\ub97c": [3, 40], "\ub9e4\uaca8": 3, "\uc21c\uc704\uc758": 3, "kendal": 3, "s": [3, 5, 6, 8, 11, 12, 15, 19, 20, 26, 30, 32, 36, 37, 38, 40, 41, 43, 47, 48, 49, 50, 51, 54], "\u03c4": 3, "ranking\uc774": 3, "\ub9e4\uaca8\uc9c4": 3, "\uc218\uc5f4": 3, "\uc0ac\uc774\uc758": [3, 6, 8, 11, 13, 19, 20, 21, 25, 29, 30, 32, 51], "scipi": 3, "import": [3, 12, 15, 23, 24, 28], "stat": 3, "h": [3, 4, 5, 9, 13, 24, 25, 27, 28, 29, 32, 41, 42, 45, 46, 53, 55], "z": [3, 4, 5, 8, 9, 13, 14, 15, 16, 19, 24, 25, 26, 27, 28, 32, 33, 37, 43, 45, 46, 48, 51, 52, 53, 54, 56], "kendalltau": 3, "significanceresult": 3, "statist": [3, 44], "9999999999999999": 3, "pvalu": 3, "016666666666666666": 3, "19999999999999998": 3, "8166666666666667": 3, "result": [3, 11, 12, 15, 16, 22, 23, 25, 26, 28, 30, 35, 43, 54, 56], "\uc720\uc0ac\ub3c4\ub294": 3, "889": 3, "kl\uacfc\uc758": 3, "\ube44\uad50\ud574\ubcf4\uba74": [3, 4, 23], "infin": 3, "cleanfid": 3, "96": [3, 39], "\ub098\uba38\uc9c0": [3, 4, 8, 20, 33, 39, 40, 50, 52], "metric\uac04": 3, "\ub0ae\uc74c": [3, 28], "network": [3, 4, 8, 9, 12, 13, 20, 22, 24, 25, 30, 32, 34, 41, 43, 48, 52, 53], "\uae30\ubc18\uc758": [3, 5, 7, 17, 18, 20, 22, 27, 29, 31, 33, 42, 50, 53], "\uc911\uc5d0\uc11c\ub294": [3, 6], "infinity\uc774": 3, "\ub192\uace0": [3, 7, 24], "is\uc640": 3, "\ub300\ubd80\ubd84\uc758": [3, 6, 8, 17, 19, 24, 26, 27, 28, 36, 51, 53], "metric\uc774": [3, 26], "\uce21\uc815\uc744": 3, "v3\ub97c": 3, "\uc0ac\uc6a9\ud558\ub294\ub370": [3, 8, 14, 20, 22, 43, 52], "\uacfc\uc5f0": [3, 11], "\uc801\uc808\ud55c\uac00": 3, "\uac00\uc815": [3, 13, 31], "infinity\ub294": 3, "space\uac00": [3, 15, 24], "distribution\uc744": [3, 10, 27, 37, 38, 45], "\ub530\ub978\ub2e4\ub294": 3, "\uac00\uc815\ud558\uc5d0": 3, "\uce21\uc815\ub418\ub294": 3, "\uc2e4\ud5d8": [3, 4, 6, 11, 13, 17, 22, 24, 27, 39, 40, 42, 43, 46, 48], "10k\uc758": 3, "\uc0dd\uc131\ud558\uace0": [3, 4, 6, 7, 8, 17, 26, 29, 31, 33, 40, 45, 47], "\uc6d0\ubcf8\uc758": 3, "20k\uc758": 3, "network\uc640": [3, 29], "network\ub97c": [3, 4, 5, 13, 24, 38], "model\uc5d0": [3, 4, 8, 15, 20, 21, 24, 26, 27, 28, 34, 40, 51], "fit": [3, 17], "\uc774\ub54c": [3, 4, 5, 6, 8, 20, 24, 25, 31, 34, 41, 43, 44, 45, 47, 48, 50, 51, 52, 53, 54, 55, 56], "\uae30\ubc18\uc73c\ub85c": [3, 7, 8, 11, 14, 16, 17, 18, 19, 22, 25, 26, 27, 31, 32, 33, 34, 37, 43, 44, 47, 51, 52, 53, 54, 56], "\uc0d8\ud50c\uc758": [3, 35, 36, 39], "\ud655\ub960\uac12\uc744": 3, "\uacc4\uc0b0\ud55c\ub2e4": [3, 15, 24, 33, 36, 46], "8": [3, 6, 10, 13, 14, 15, 16, 18, 20, 23, 24, 27, 28, 29, 34, 38, 44, 45, 48, 51, 52, 53, 54, 55], "\uac12\uc774": [3, 6, 7, 10, 12, 23, 27, 30, 31, 37, 44, 45, 46, 49, 51, 54, 56], "\ub0ae\uc740": [3, 4, 5, 12, 13, 17, 18, 20, 22, 23, 27, 33, 35, 36, 41, 42, 45, 47, 53], "tail": 3, "vector\uc758": [3, 36, 38], "\uc6d0\ubcf8": [3, 4, 8, 11, 19, 20, 25, 27, 29, 34, 36, 39, 40, 47, 51, 53, 54, 55, 56], "\ub0ae\uc544\uc57c\ud568": 3, "\ud655\ub960\uc744": [3, 7, 11, 33, 48], "\uac16\ub294": [3, 6, 10, 11, 21, 32], "\ud655\uc778\ud574\ubcf4\uba74": 3, "clip\uc744": [3, 4, 5, 20], "\ud655\uc2e4\ud788": [3, 6, 26], "\ub5a8\uc5b4\uc9c0\ub294": [3, 6, 11, 23, 36, 39, 53], "\ubc18\uba74": [3, 4, 5, 6, 8, 14, 15, 19, 21, 26, 28, 33, 46, 51, 54], "inception\uc758": 3, "\uc774\ubbf8\uc9c0\ub4e4\uc740": [3, 20, 34], "\ubcf4\uc774\uace0": [3, 15, 19, 20, 25, 33, 44, 45, 52], "\uac00\uc815\uc5d0": 3, "\uc704\ubc30": 3, "latent": [3, 4, 5, 6, 8, 10, 11, 17, 18, 19, 20, 22, 24, 26, 30, 31, 33, 34, 35, 37, 38, 42, 43, 47, 50, 53, 54, 55, 56], "represent": [3, 6, 11, 14, 19, 27, 33, 36, 45, 52], "vector\ub4e4\uc744": 3, "dimension\uc5d0": [3, 28], "\ud22c\uc601\uc2dc\ucf1c": 3, "\ub530\ub974\ub294": [3, 18, 29, 34, 55], "\uc9c0": 3, "\ud655\uc778\ud55c\ub2e4": 3, "\ucd94\ucd9c\ud55c\ub2e4": 3, "\uc5f0\uc0b0\uc744": [3, 7, 33, 41], "d\ub85c": 3, "\ud22c\uc601\uc2dc\ud0a8\ub2e4": 3, "value\ub97c": 3, "\uad6c\ud55c\ub2e4": [3, 42, 46], "valu": [3, 10, 12, 24, 28, 32, 54], "\uc5b4\ub5a0\ud55c": [3, 5, 15, 33, 35, 38, 47, 51], "\uc0ac\uac74\uc774": 3, "\uc6b0\uc5f0\ud788": 3, "\uc77c\uc5b4\ub0a0": 3, "\ud655\ub960\uc774": [3, 29, 32], "\uac70\uc758": [3, 4, 11, 15, 21, 23, 26, 27, 33, 44, 51], "\uc5c6\ub2e4": [3, 19, 20, 28, 34, 41, 46], "\uc778\uacfc\uad00\uacc4\uac00": 3, "\ud06c\ub2e4": [3, 36], "distribution\uc740": 3, "random\uc744": 3, "\uae30\ubc18\uc73c\ub85c\ud558\uae30\ub54c\ubb38\uc5d0": 3, "\uc791\uc544\uc57c\ud55c\ub2e4": 3, "\uc989": [3, 4, 6, 8, 10, 11, 13, 15, 17, 18, 19, 20, 21, 25, 26, 34, 40, 42, 43, 44, 45, 46, 49, 51, 55], "value\uac00": 3, "\ucee4\uc57c\ud55c\ub2e4": 3, "dataset\uc5d0": [3, 13, 21, 25, 45, 51], "clip\uc758": [3, 4, 5, 8, 18, 40], "value\uac12\uc740": 3, "05\ub97c": 3, "\ub118\uc5b4": 3, "random\uc131\uc744": 3, "\uc720\uc9c0\ud558\uc9c0\ub9cc": 3, "inception\uc740": 3, "05\ubcf4\ub2e4": 3, "\ubcf4\uc5ec": 3, "\uc720\uc9c0\ud558\uc9c0": [3, 17], "\ubabb\ud55c\ub2e4": [3, 15, 19, 23, 24, 33], "net\uc744": 3, "\uce21\uc815\ubcf4\ub2e4": 3, "your": [4, 37, 41], "iclr": [4, 12, 15, 28, 54, 56], "2307": [4, 8, 22, 35], "04725": 4, "code": [4, 5, 6, 7, 8, 9, 10, 13, 14, 16, 18, 20, 24, 25, 26, 27, 28, 29, 30, 31, 35, 36, 38, 40, 42, 43, 44, 45, 46, 47, 48, 51, 53, 54, 55, 56, 57], "guoyww": 4, "page": [4, 5, 14, 16, 17, 18, 24, 26, 40, 51], "github": [4, 5, 16, 17, 18, 23, 24, 26, 28, 29, 36, 40, 42, 44, 45, 47, 51, 57], "io": [4, 5, 12, 13, 16, 17, 18, 23, 24, 26, 29, 40, 42, 44, 45, 51], "kyeongmin": [4, 24, 36, 57], "yu": [4, 15, 24, 36, 57], "11": [4, 6, 33, 35, 36, 49, 51], "\uc758\uc758": 4, "In": [4, 10, 14, 26, 42], "thi": [4, 11, 12, 13, 16, 28, 32, 40, 47, 57], "we": [4, 11, 26, 32, 40, 47, 52, 55], "present": 4, "framework": [4, 16, 30, 52], "requir": [4, 6, 28], "public": 4, "plug": [4, 5, 16, 25, 53, 55], "plai": [4, 5, 53, 55], "model\uacfc": [4, 8, 9, 14, 15, 18, 21, 23, 24, 27, 51], "dreambooth\ub098": 4, "lora\uc640": [4, 25], "\uac1c\uc778\ud654": [4, 17, 22], "\uae30\uc220\uc774": [4, 8, 21, 39], "\ubc1c\uc804\ud568\uc5d0": [4, 8], "\uc0ac\ub78c\ub4e4\uc740": 4, "\uc801\uc808\ud55c": [4, 34, 36, 42], "\ube44\uc6a9\uc744": [4, 24, 32], "\uc9c0\ubd88\ud558\uc5ec": 4, "\uace0\ud654\uc9c8\uc758": [4, 51], "\uc6d0\ud558\ub294": [4, 8, 9, 10, 11, 15, 19, 24, 26, 27, 30, 32, 34, 37, 38, 39, 44], "\uc788\uac8c": [4, 8, 9, 11, 19, 24, 26, 29, 30, 33, 35, 39, 41, 43, 49], "\ub418\uc5c8\ub2e4": [4, 15, 29, 33, 36, 40], "\uace0\ud654\uc9c8": [4, 24, 29, 45, 46], "\uc6c0\uc9c1\uc784\uc744": [4, 5, 19], "\ucd94\uac00\ud558\uac70\ub098": 4, "\uc560\ub2c8\uba54\uc774\uc158\uc744": [4, 19], "\uc0dd\uc131\ud558\ub3c4\ub85d": [4, 21, 24, 33, 39, 40], "\uc5b4\ub835\ub2e4": [4, 5, 16, 18, 19, 24, 27, 29, 33], "\ucd94\uac00\uc801\uc778": [4, 5, 9, 10, 13, 17, 18, 19, 20, 21, 24, 25, 26, 27, 28, 32, 33, 34, 36, 38, 40, 43, 51, 55], "\uc5c6\uc774": [4, 5, 7, 10, 12, 13, 14, 17, 18, 21, 22, 25, 29, 31, 33, 35, 36, 37, 39, 40, 42, 43, 44, 45, 46, 49, 51, 53, 55], "\ucd94\uac00\ud558\ub294": [4, 8, 15, 17, 18, 19, 28, 29, 33, 39, 53, 55], "\uc2e4\uc6a9\uc801\uc778": 4, "\ud504\ub808\uc784": [4, 19, 29, 41], "\uc6cc\ud06c\ub97c": 4, "\uc81c\uc548\ud558\ub294": [4, 6, 24], "\uc6cc\ud06c\uc758": 4, "\ud575\uc2ec\uc740": [4, 24, 25, 26, 51], "module\uc744": [4, 5], "\ud65c\uc6a9\ud558\ub294": [4, 5, 17, 30, 40, 41], "\ud55c\ubc88": [4, 11, 13, 24, 43, 47, 56], "\uc5b4\ub5a4": [4, 8, 9, 11, 13, 15, 17, 19, 21, 24, 25, 26, 27, 28, 29, 31, 35, 39, 40, 42, 44, 51, 56], "\ubaa8\ub378\uacfc\ub3c4": 4, "\uc735\ud569\ud560": 4, "\uc774\uc6a9\ud558\uba74": [4, 24, 28, 49], "module\uc740": [4, 28], "world": [4, 10, 20, 29, 41], "\ube44\ub514\uc624\ub85c": [4, 19], "\ubd80\ud130": [4, 6, 7, 15, 21, 24, 26, 31, 32, 36, 45, 54], "\ud6a8\uacfc\uc801\uc73c\ub85c": [4, 5, 8, 14, 17, 18, 19, 43, 51], "prior\ub97c": [4, 8], "\ud559\uc2b5\ub41c": [4, 5, 8, 10, 13, 14, 17, 18, 19, 24, 25, 27, 29, 30, 32, 33, 36, 39, 40, 41, 42, 43, 44, 46, 49, 51], "\ub367\ubd99\uc5ec": [4, 24], "\uc560\ub2c8\uba54\uc774\uc158": [4, 19], "\uc0ac\uc6a9\ud560": [4, 7, 9, 16, 19, 20, 21, 24, 25, 27, 28, 29, 31, 33, 39, 41, 51, 52, 53, 56], "animatediff\ub97c": 4, "\uac04\ub2e8\ud55c": [4, 9, 14, 15, 16, 27, 32, 33, 35, 40], "\ud30c\uc778\ud29c\ub2dd": [4, 17, 19, 31, 33, 35, 39], "\ubc29\uc2dd\uc778": [4, 19, 51], "motionlora\ub97c": 4, "\uc0ac\uc804": [4, 5, 7, 8, 11, 13, 17, 19, 24, 29, 33, 36, 40, 43, 47, 51], "module\uc774": 4, "\uc800\ube44\uc6a9\uc73c\ub85c": 4, "\uc6c0\uc9c1\uc784": 4, "\ud328\ud134\uc744": 4, "\ud574\uc900\ub2e4": [4, 7, 46], "ex": [4, 6, 11, 13, 17, 19, 25, 30, 32, 33, 44], "\ucd2c\uc601": [4, 17, 22], "\uae30\ubc95": [4, 10, 25, 28, 42, 43], "animatediff\uc640": 4, "\ubd80\ucc29\ud558\uc5ec": 4, "\uc2e4\ud5d8\ud588\uc73c\uba70": 4, "\ubc29\uc2dd\uc774": [4, 17, 33, 48, 51, 53], "\ud004\ub9ac\ud2f0\uc640": [4, 31, 36], "\ubcf4\uc804\ud558\uba74\uc11c\ub3c4": 4, "\uc790\uc5f0\uc2a4\ub7ec\uc6b4": [4, 26, 32, 39], "\ud074\ub9bd\uc744": 4, "\ubcf4\uc600\ub2e4": [4, 5, 19, 23, 29, 35, 36, 37, 51], "pipelin": [4, 5, 26, 46, 49, 53], "core": [4, 31], "same": [4, 11, 26, 28], "sd1": 4, "can": [4, 47, 52], "download": 4, "finetun": [4, 5, 7, 8, 10, 16, 17, 25], "civitai": 4, "hug": [4, 24], "lora\uae30\ubc18": 4, "adapter\ub97c": [4, 19, 24, 26], "\ub354\ud574": [4, 13], "video": [4, 6, 11, 16, 30, 43, 57], "\ud559\uc2b5\ud560\ub54c": 4, "\ubc1c\uc0dd\ud560\uc218": 4, "gap\uc744": 4, "\uc904\uc600\ub2e4": [4, 7], "\uc5ec\uae30\uc11c": [4, 6, 9, 13, 18, 22, 24, 27, 30, 36, 38, 39, 40, 43, 51, 53, 54, 56], "\ub9d0\ud558\ub294": [4, 24, 38], "gap\uc774\ub780": 4, "video\uc758": [4, 16, 29], "\ud504\ub808\uc784\uc744": [4, 19, 29, 41], "\ub098\ub204\uc5b4": [4, 22, 36], "\ubd24\uc744\ub54c": 4, "blur": [4, 5, 35], "compress": [4, 55], "watermarks\ub4f1\uc744": 4, "\ub9d0\ud55c\ub2e4": [4, 24, 36], "strategi": [4, 17, 32], "transfer": [4, 6, 28, 30, 37, 40, 44, 47], "videothrough": 4, "propos": [4, 26], "\ub098\uba74": [4, 13, 22, 24, 26], "t2i\ubaa8\ub378\uacfc": 4, "\uacb0\ud569\ud574": [4, 7], "generator\ub85c": 4, "pre": [4, 6, 22, 25, 28, 34, 37, 40, 41, 44, 47, 51, 52, 53, 55], "\ud504\ub86c\ud504\ud2b8\ub97c": [4, 7, 8, 17, 22, 26, 29, 31, 33, 35, 39], "\uc785\ub825\ud558\uc5ec": [4, 47], "\ub514\ud4e8\uc804": [4, 19, 24, 29, 33, 39], "\ubc1c\uc804\uc73c\ub85c": 4, "\uc608\uc220\uac00\uc640": 4, "\uc544\ub9c8\ucd94\uc5b4\ub4e4\uc774": 4, "\uc2dc\uac01": 4, "\ucee8\ud150\uce20\ub97c": 4, "\ubcf4\ub2e4": [4, 6, 12, 13, 14, 17, 23, 24, 25, 28, 30, 31, 33, 36, 39, 40, 41, 42, 43, 44, 45, 46, 47, 49, 54, 55], "\uc0dd\uc131\ub2a5\ub825": [4, 24], "creativ": [4, 41], "\uc790\uadf9\ud558\uae30": 4, "dreambooth\uc640": [4, 22, 24], "\uac00\ubcbc\uc6b4": [4, 22, 24], "\ubc29\uc2dd\ub4e4\uc774": 4, "\uc81c\uc548\ub418\uc5c8\ub2e4": [4, 51], "\ub370\uc774\ud130\uc14b\uacfc": [4, 33, 51], "\uc801\ub2f9\ud55c": [4, 36], "\ud558\ub4dc\uc6e8\uc5b4\uc5d0\uc11c\ub3c4": 4, "custom": [4, 16, 19, 24, 25], "finetuning\uc744": [4, 10, 22], "\ud560": [4, 6, 7, 8, 9, 11, 13, 15, 19, 22, 24, 25, 26, 27, 29, 30, 31, 35, 36, 37, 38, 39, 40, 41, 42, 43, 45, 46, 47, 49, 51, 52, 53, 54, 55], "\uadf8\ub85c\uc778\ud574": 4, "\uc0ac\uc6a9\uc790\ub4e4\uc774": 4, "\ube44\uc6a9\uc73c\ub85c\ub3c4": 4, "domain\uc5d0": [4, 18], "\uc801\uc6a9\ud558\uac70\ub098": 4, "\uc2dc\uac01\uc801": [4, 5, 11, 14, 19, 31, 35, 40, 53], "\ub192\uc77c": [4, 19, 22, 25, 27, 40], "ai": [4, 11, 22, 35, 44], "\uc544\ud2f0\uc2a4\ud2b8\uc640": 4, "\uc544\ub9c8\ucd94\uc5b4": 4, "\ucee4\ubba4\ub2c8\ud2f0": 4, "\uc0c1\ub2f9\ub7c9\uc758": 4, "models\uc744": [4, 24], "civitai\ub098": 4, "face\uc640": 4, "\ud50c\ub7ab\ud3fc\uc5d0": 4, "\uac1c\uc2dc\ud588\ub2e4": 4, "\ubaa8\ub378\ub4e4\uc774": [4, 5, 10, 11, 20, 38, 39, 47, 53, 55], "\uc218\uc900\uc758": [4, 19, 26], "\uc815\uc801\uc778": 4, "\uc601\ud654\ub098": 4, "\uce74\ud230\uacfc": [4, 19], "\uc2e4\uc0b0\uc5c5\uc5d0\uc11c": 4, "\uc694\uad6c\ub41c\ub2e4": 4, "\uc5f0\uad6c\uc5d0\uc11c\ub294": [4, 8, 17, 21, 22, 24, 25, 33], "\uace7\ubc14\ub85c": [4, 24, 36], "\ubcc0\ud658\ud558\ub294": [4, 19, 22, 27, 30, 32, 33, 44], "\ud30c\uc778": [4, 19, 29, 39, 41], "\ud29c\ub2dd\uc744": [4, 19, 46], "\uc218\uc9d1\uacfc": 4, "\ucef4\ud4e8\ud305": [4, 19, 29], "\uc790\uc6d0\uc758": [4, 27], "\ud544\uc694\ub294": 4, "\uc0ac\uc6a9\uc790\uc5d0\uac8c": [4, 40], "\uac78\ub9bc\ub3cc\uc774": 4, "\ub41c\ub2e4": [4, 6, 12, 15, 19, 24, 25, 26, 29, 31, 32, 33, 36, 42, 49, 51], "\uc81c\uc548\ud558\ub294\ub370": 4, "\ub2a5\ub825\uc744": [4, 6, 7, 8, 9, 11, 22, 26, 33, 39, 40, 41, 42], "\ubcf4\uc804\ud558\uba74\uc11c": 4, "\ubb38\uc81c\ub97c": [4, 8, 10, 15, 17, 19, 26, 28, 30, 32, 33, 35, 36, 40, 43, 49], "\ud574\uacb0\ud560": [4, 33], "\ud6a8\uacfc\uc801\uc778": [4, 5, 17, 18, 19, 24, 29, 31, 35], "\ud30c\uc774\ud504\ub77c\uc778\uc774\ub2e4": 4, "animatediff\uc758": 4, "\ube44\ub514\uc624": [4, 5, 16, 17, 19, 29, 36, 41], "\ub370\uc774\ud130\uc14b": [4, 6, 11, 17, 18, 19, 28, 29, 33, 36, 41, 42, 43, 45, 49, 50, 53, 54, 55], "webvid": [4, 16, 29, 41], "10m": [4, 16, 29, 41], "\uc73c\ub85c\ubd80\ud130": [4, 11, 30, 44], "\ud0c0\ub2f9\ud55c": 4, "\ud559\uc2b5\ud558\ub294": [4, 9, 13, 14, 19, 23, 24, 25, 27, 29, 32, 34, 36, 38, 43, 44, 48, 51, 52, 53, 54, 56], "module\uc758": [4, 16, 28], "\ud559\uc2b5\uc740": [4, 18, 27, 30, 31], "\uc138\uac00\uc9c0": [4, 54], "\ub2e8\uacc4\ub85c": [4, 7, 8, 19, 27, 33, 36, 52], "\uad6c\uc131\ub41c\ub2e4": [4, 19, 41], "visual": [4, 5, 15, 17, 30, 31, 40, 49, 54, 55], "target": [4, 5, 10, 11, 13, 15, 16, 36, 37, 40, 43, 44, 47, 53, 54], "dataset": [4, 5, 6, 15, 17, 18, 20, 23, 27, 30, 31, 40, 41, 42, 45, 46, 48, 49, 53, 54], "\ud488\uc9c8\ucc28\uc774": 4, "\ub3d9\uc601\uc0c1": [4, 19], "\uc6cc\ud130\ub9c8\ud06c": [4, 29], "\uc555\ucd95\uc73c\ub85c": 4, "\uc778\ud55c": [4, 33], "\ubd80\ubd84\uc740": [4, 9, 20, 21, 22, 26, 27, 38, 39, 55], "\ubaa8\ub4c8\uc774": 4, "\ud559\uc2b5\ud568\uc73c\ub85c\uc368": [4, 29, 37, 40, 43, 48], "\uc774\ud6c4": [4, 5, 6, 8, 14, 16, 19, 21, 22, 24, 26, 27, 28, 30, 31, 36, 40], "motion\uad00\ub828": 4, "\ubaa8\ub4c8\ub4e4\uc774": 4, "motion\uc5d0\ub9cc": 4, "\uc9d1\uc911\ud560": [4, 27], "\uc788\ub3c4\ub85d": [4, 7, 15, 22, 24, 26, 27, 30, 31, 33, 35, 36, 40, 41, 43, 44, 50, 51, 52, 56], "\ube44\ub514\uc624\ub97c": [4, 5, 16, 17, 19, 29, 41], "\uc785\ub825\ubc1b\uc744\uc218": 4, "inflate\uc2dc\ud0a8": 4, "\ub354\ud55c": [4, 21, 26, 36], "\ubaa8\uc158": [4, 19, 29], "\ubaa8\ub378\ub9c1\uc744": [4, 8, 29, 45], "\ubaa8\ub4c8\uc744": [4, 19], "\ucd94\uac00\ud55c\ub2e4": [4, 19, 29, 33, 35, 36, 41], "\ud559\uc2b5\ud560\ub54c\ub294": [4, 36], "adapter\uc640": [4, 24], "freeze\ud55c\ub2e4": 4, "\uc774\ub807\uac8c": [4, 6, 9, 22, 30, 39, 40, 42, 48], "\ud558\uba74": [4, 6, 9, 11, 20, 25, 27, 39, 42, 44], "\uc6c0\uc9c1\uc784\uc5d0": 4, "\ubd80\ubd84\uc744": [4, 9, 15, 20, 21, 26, 27, 30, 32, 35, 39, 43, 44, 47, 50, 52, 53, 54, 55, 56], "\uc804\ubc18\uc801\uc73c\ub85c": [4, 27, 39], "\ubaa8\ub4c8\ubcc4": 4, "\ud559\uc2b5\uc774": [4, 5, 6, 10, 13, 20, 31, 37, 41, 44, 48, 55], "\uac00\ub2a5\ud574\uc9c4\ub2e4": [4, 24], "\uc6d0\ud560\uacbd\uc6b0": 4, "\ubc14\uafb8\uba74": 4, "\ub428": [4, 6, 18, 25, 27, 30, 39, 42, 45], "option": [4, 30, 41], "motionlora\uc758": 4, "motion\uc744": [4, 5], "videos\uc640": 4, "\ud559\uc2b5\ud69f\uc218\ub85c": 4, "\ubaa9\ud45c\ub85c\ud558\ub294": 4, "\ubaa8\ub4c8\uc774\ub2e4": 4, "\uc774\ub984\uacfc": 4, "hu": 4, "et": [4, 8, 15, 21, 23, 33, 36, 43, 51, 54], "al": [4, 8, 15, 21, 23, 33, 36, 43, 51, 54], "2021": [4, 12, 15, 23, 28, 33, 43, 45, 46, 54], "\uc774\uc6a9\ud558\ub294\ub370": 4, "pattern\uc744": 4, "\uc801\uc740\uc218": 4, "50\uac1c": 4, "video\ub9cc\uc73c\ub85c": 4, "\ud559\uc2b5\uc2dc\ud0ac\uc218": 4, "\ucc28\uc9c0\ud558\ub294": [4, 30], "\uba54\ubaa8\ub9ac\ub3c4": 4, "\uc801\uc5b4": [4, 25, 31], "\ucd94\uac00\ud559\uc2b5\uc774\ub098": 4, "\uacf5\uc720": [4, 18, 19, 20], "\ubc30\ud3ec\ud558\ub294\ub370\uc5d0\ub3c4": 4, "\uc720\ub9ac\ud558\ub2e4": 4, "glide": [4, 11, 50, 51], "nichol": [4, 15, 33, 36, 54], "\ub294": [4, 5, 6, 7, 8, 9, 11, 13, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 50, 51, 52, 53, 54, 55, 56], "condition\uc744": [4, 9, 10, 24, 25, 35, 51], "\uc18c\uac1c\ud558\uace0": [4, 52, 55], "guidance\ub97c": [4, 7, 16, 19, 21, 24, 26, 36, 46, 49], "\uc870\uc808\ud558\uc5ec": [4, 39], "\uacb0\uacfc\ubb3c\uc744": [4, 17, 36, 37], "\uc5bb\ub294": [4, 26, 32, 44, 49], "\uc124\uba85\ud588\ub2e4": 4, "uid": 4, "l": [4, 5, 8, 10, 15, 17, 18, 19, 20, 21, 22, 25, 27, 32, 33, 42, 44, 51, 55, 56], "anguag": 4, "i": [4, 6, 10, 13, 15, 17, 18, 19, 20, 21, 24, 25, 26, 27, 28, 32, 34, 35, 36, 38, 41, 43, 45, 47, 48, 49, 51, 52, 53, 54, 55, 56], "mage": 4, "iffus": 4, "dite": 4, "dall": [4, 7, 17, 20, 21, 22, 24, 31, 33, 47, 49, 50, 51], "e2": [4, 22, 24, 47, 50, 51], "ramesh": [4, 8], "2022": [4, 11, 15, 18, 21, 27, 31, 43, 44, 49, 54], "\uc774\uc6a9\ud558\uc5ec": [4, 15, 21, 24, 25, 29, 30, 33, 36, 39, 45], "\uc77c\uad00\uc131\uc744": [4, 5, 17, 18, 19, 29, 36, 44, 51], "\ud5a5\uc0c1\uc2dc\ucf30\ub2e4": [4, 24], "imagen": [4, 11, 17, 18, 22, 29, 31, 42, 47, 51, 52], "saharia": 4, "\uc740": [4, 6, 7, 10, 11, 13, 16, 17, 18, 19, 20, 24, 25, 30, 31, 33, 35, 36, 38, 40, 41, 42, 43, 44, 46, 47, 48, 49, 51, 54, 55, 56], "llm\uacfc": [4, 26], "cascad": [4, 24, 41, 50], "photorealistic\ud55c": [4, 21], "\uc5bb\uace0\uc790": 4, "\ud588\ub2e4": [4, 7, 24, 33, 35, 36, 41, 42, 51], "rombach": 4, "encoder\uc758": [4, 19, 24, 35, 36, 39, 40], "\uacfc\uc815\uc744": [4, 8, 12, 13, 17, 19, 21, 22, 31, 33, 34, 35, 40, 41, 42, 54, 56], "\uc218\ud589\ud568\uc73c\ub85c\uc368": [4, 6], "\ud6a8\uc728\uc744": 4, "\ub192\uc600\ub2e4": [4, 33], "\ucc38\uace0": [4, 6, 7, 11, 14, 17, 24, 25, 29, 36, 39, 42], "ediff": [4, 24, 35, 52], "balaji": 4, "\ubaa8\ub378\ub4e4\uc744": [4, 10, 17, 50], "\uc559\uc0c1\ube14": 4, "\ud559\uc2b5\uc2dc\ucf1c": [4, 13, 27, 28, 36], "denois": [4, 5, 8, 12, 18, 22, 24, 26, 28, 34, 35, 39, 41, 43, 51, 54, 55], "\ub2e8\uacc4\ubcc4\ub85c": 4, "denoise\ub97c": [4, 35], "\uc218\ud589\ud558\uace0\uc790": 4, "contrast": [4, 20, 21, 24, 36, 37, 45, 49], "\ud65c\uc6a9\ud558\uae30": [4, 22], "\ud6a8\uc728\uc801\uc778": [4, 17, 22, 27, 51], "\ubc29\ubc95\uc5d0": [4, 15, 18, 21, 27, 36, 41], "\uc5f0\uad6c\uac00": [4, 5, 7, 21, 22, 36, 43], "\ub728\uac81\ub2e4": 4, "\uac1c\uc778\ud654\ub780": 4, "images\ub97c": [4, 15], "concepts\ub098": 4, "style\uc744": [4, 20, 24, 37, 40, 51], "\uc190\uc744": 4, "\uc798\uadf8\ub9ac\ub294": 4, "\ub208\uc744": 4, "\uc2dc\ud0a4\ub294": [4, 19, 20, 29, 30, 33], "\ub192\uc544\uc9c8": 4, "\ud559\uc2b5\ub370\uc774\ud130\ub97c": 4, "\uc78a\ub294": 4, "catastroph": 4, "forgetting\uc774": 4, "\ubb38\uc81c\uac00": [4, 5, 15, 17, 22, 39, 41, 44, 48], "\ubb38\uc81c\ub294": [4, 11, 17], "\ub370\uc774\ud130\uac00": [4, 9, 13, 19, 29, 33, 35, 38, 39, 40, 48, 56], "\uc801\uc744\ub54c": 4, "\ubc1c\uc0dd\ud55c\ub2e4": [4, 15, 19, 26, 33, 35], "ruiz": 4, "\uc0ac\uc6a9\ud558\uba74\uc11c\ub3c4": [4, 14], "\ucd94\uac00\ud558\uc5ec": [4, 15, 18, 21, 24, 33, 36, 52], "\uc2dc\ucf30\ub2e4": [4, 7, 35], "textur": [4, 19, 24, 30, 52, 55], "invers": [4, 10, 11, 17, 18, 24, 34, 47], "gal": 4, "concept": [4, 30, 31, 40, 49], "token": [4, 11, 14, 20, 24, 26, 27, 28, 29, 36, 40, 45, 47, 53, 55], "embedding\uc744": [4, 15, 24, 29, 35, 40, 46], "\ud558\uc600\ub2e4": [4, 15, 19, 26, 29, 33, 39], "\uc218\ud589\ud588\ub2e4": 4, "sec": [4, 11], "\uadf8\uc678\uc758": 4, "encod": [4, 5, 6, 7, 11, 14, 16, 17, 20, 21, 24, 29, 45, 47, 48, 49, 50, 52, 53, 55, 56], "approach": [4, 15, 29, 56, 57], "jia": 4, "t2i\ub97c": 4, "\uc560\ub2c8\uba54\uc774\uc158\ud654": [4, 29], "\ub9ce\uc9c0\ub294": 4, "\uc54a\uc9c0\ub9cc": [4, 30, 33, 47, 51], "\uc544\ub798\uc758": [4, 6, 9, 11, 17, 19, 35, 36, 44, 49], "\uc5f0\uad6c\ub4e4\uacfc": [4, 17], "\uad00\ub828\uc788\ub2e4": 4, "text2cinemagraph": 4, "mahapatra": 4, "flow": [4, 19, 24, 25, 27, 43, 51], "prediction\uc744": [4, 24, 25, 36, 51], "cinematography\ub97c": 4, "\uc0dd\uc131\ud558\uace0\uc790": [4, 8, 13, 24, 33, 34, 38, 48, 56], "align": [4, 7, 10, 13, 19, 24, 27, 31, 39, 40, 41, 49, 50, 53, 55], "blattmann": 4, "generator\ub0b4\uc758": 4, "frozen": [4, 15, 18, 20, 24, 39, 49], "layers\uac00": 4, "personalizing\uc774": 4, "\uac00\ub2a5\ud568\uc744": [4, 5, 25], "\ud655\uc778\ud588\ub2e4": [4, 36], "\ube44\uad50\uc2dc": 4, "wu": 4, "\ub2e8\uc77c": [4, 8, 17, 19, 21, 26, 32, 33, 36, 40, 43, 53], "\ud30c\ub77c\ubbf8\ud130\ub9cc": [4, 55], "\ud30c\uc778\ud29c\ub2dd\ud558\ub294": [4, 19], "\uc81c\uc548\ud588\ub2e4": [4, 5, 36, 37, 51], "tempor": [4, 5, 16, 29], "attn": [4, 13, 20, 37], "\uac00\uc9c0\uace0": [4, 6, 7, 11, 15, 17, 21, 22, 26, 27, 30, 32, 33, 35, 38, 41, 43, 45, 48, 51, 54, 55, 56], "text2video": 4, "zero": [4, 5, 7, 11, 14, 18, 21, 28, 29, 31, 32, 39, 45, 49, 52, 53], "khachatryan": 4, "\uc0ac\uc804\ud559\uc2b5\ud55c": [4, 24, 36], "t2i\ubaa8\ub378\uc744": 4, "\ud559\uc2b5\uacfc\uc815": 4, "\uc0ac\uc804\uc5d0": [4, 22, 27, 43, 47, 48, 52], "\uc815\uc758\ub41c": [4, 8, 32, 40, 53, 54], "affin": [4, 20], "matrix\ub97c": 4, "wrapping\uc744": 4, "\ubc29\uc2dd\uc774\ub2e4": [4, 24], "\uad00\ub828": [4, 18, 22, 29, 33, 38], "\uc5f0\uad6c\ub4e4\uc774": [4, 5, 10, 23, 24, 30, 33, 37, 43], "\ub9ce\ub2e4": [4, 12, 19, 36], "esser": 4, "zhou": 4, "2022a": 4, "singer": 4, "ho": [4, 21, 23, 33, 43], "2022b": 4, "ruan": 4, "luo": 4, "yin": 4, "2023b": 4, "wang": [4, 36], "hong": 4, "us": [4, 5, 8, 10, 11, 12, 18, 32, 40, 41, 44, 45, 49, 52, 53], "our": [4, 19, 32, 39, 40, 45, 57], "open": [4, 16, 20, 24, 26, 50], "sourc": [4, 5, 6, 13, 15, 16, 17, 18, 24, 26, 30, 32, 36, 37, 38, 40, 44, 45, 50, 53], "well": [4, 24], "develop": 4, "commun": 4, "mani": 4, "qualiti": [4, 5, 9, 12, 27, 30, 31, 37, 41, 43, 45, 46, 48, 49, 51, 52, 56], "eval": [4, 11, 12, 29], "mathcal": [4, 5, 7, 9, 13, 15, 17, 18, 19, 21, 24, 27, 28, 32, 36, 44, 51, 56], "decod": [4, 5, 11, 27, 29, 41, 45, 47, 53, 55, 56], "space\uc0c1\uc5d0\uc11c": [4, 24], "\uc218\ud589": [4, 5, 6, 11, 16, 17, 19, 27, 30, 31, 32, 40, 45, 48, 51], "\uc778\ucf54\ub529\ub41c": [4, 19, 31, 41], "z_0": [4, 26, 51, 55], "x_0": [4, 6, 13, 15, 19, 21, 24, 25, 33, 36, 51], "forward": [4, 6, 9, 15, 17, 18, 20, 21, 23, 24, 25, 28, 29, 32, 34, 38, 44, 47, 48, 53, 54, 55, 56], "z_t": [4, 5, 11, 15, 18, 26, 27, 51, 54, 55], "\ubcc0\ud658\ub428": 4, "t": [4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 15, 17, 18, 19, 20, 21, 23, 24, 25, 26, 27, 29, 31, 32, 33, 36, 41, 42, 43, 45, 46, 47, 51, 52, 54, 55], "sqrt": [4, 6, 13, 15, 21, 24, 27, 36, 43, 51, 55], "bar": [4, 36, 51, 55], "alpha_t": [4, 15, 17, 18, 21, 24, 36, 51, 54], "alpha": [4, 14, 18, 22, 25, 28, 32, 33, 36, 51, 55], "epsilon": [4, 6, 8, 13, 15, 17, 19, 21, 22, 24, 25, 27, 33, 36, 42, 43, 46, 47, 51, 54, 55], "sim": [4, 6, 8, 13, 17, 18, 24, 27, 30, 32, 36, 41, 43, 48, 51, 54, 55], "tag": [4, 24, 25, 36, 51], "defin": [4, 15, 28], "bar\u03b1_t": 4, "determin": [4, 13], "nois": [4, 5, 6, 10, 12, 13, 14, 17, 18, 22, 23, 24, 25, 32, 34, 35, 36, 38, 39, 41, 42, 43, 46, 47, 48, 51, 53, 54, 55, 56], "strength": [4, 15, 16], "The": [4, 11, 16, 30, 38], "\u03f5_\u03b8": 4, "predict": [4, 12, 13, 15, 22, 25, 32, 47, 49, 51, 54, 55], "ad": [4, 9, 32, 36, 55], "encourag": [4, 47], "an": [4, 9, 11, 17, 18, 24, 26, 32, 36, 38, 40, 52, 56], "bbb": [4, 24], "e_": [4, 8, 24, 36], "big": [4, 23, 36], "epsilon_": [4, 5, 6, 8, 13, 18, 19, 24, 27, 36, 46, 51, 55], "theta": [4, 5, 6, 8, 9, 13, 15, 17, 18, 19, 21, 22, 24, 25, 27, 28, 32, 36, 43, 45, 46, 47, 48, 51, 52, 53, 54, 55, 56], "tau_": [4, 25, 27], "_2": [4, 17, 32, 55], "correspond": [4, 17, 30], "\u03c4_\u03b8": 4, "map": [4, 5, 6, 9, 11, 13, 17, 19, 20, 24, 25, 27, 30, 31, 32, 36, 37, 43, 44, 52, 53, 54, 55, 56], "implement": [4, 13, 17, 28, 37, 42, 47, 48, 51, 56], "unet": [4, 5, 12, 13, 24, 35, 42, 47, 53, 55], "down": [4, 6, 13, 20, 55], "middl": [4, 5, 6, 30, 38, 50, 55], "block": [4, 5, 20, 44, 46, 48, 53, 55], "resnet": [4, 13, 14, 39, 42, 46, 55], "spatial": [4, 5, 14, 15, 19, 24, 26, 27, 29, 31, 36, 41], "self": [4, 5, 7, 9, 12, 13, 14, 20, 22, 25, 28, 29, 30, 31, 38, 43, 48, 49, 51, 53, 55, 56], "cross": [4, 5, 10, 14, 16, 19, 20, 22, 24, 26, 30, 31, 35, 36, 37, 47, 49, 55], "which": [4, 11, 40, 49], "help": 4, "understand": [4, 26, 29, 39, 45, 49], "model\uc5d0\uc11c": [4, 15, 18, 24, 26, 27, 51], "\ucc98\uc74c": [4, 21, 38], "\ub4f1\uc7a5\ud55c": 4, "\uac1c\ub150\uc73c\ub85c": 4, "\uac70\ub300": [4, 24, 42], "\uc218\ud589\ud558\uae30": [4, 19, 42, 53], "\uc81c\uc548\ub41c": [4, 18, 19, 35, 36], "\uac1c\ub150\uc774\ub2e4": 4, "lora\ub294": [4, 22, 25, 28], "\ud30c\ub77c\ubbf8\ud130\ub97c": [4, 7, 14, 22, 25, 28, 32, 35, 39], "tuning\ud558\uc9c0": [4, 15, 24], "\uc54a\uace0": [4, 5, 13, 14, 15, 17, 19, 24, 26, 28, 29, 30, 32, 36, 43, 48, 50, 53, 54, 56], "decomposit": [4, 25, 28, 29], "\uc30d\uc744": [4, 8, 19, 29, 33, 43, 44], "\uc0c8\ub86d\uac8c": [4, 11, 15, 40, 53], "\ucd94\uac00\ub41c": [4, 8, 9, 13, 15, 24, 33, 36, 51, 53], "weight\ub9cc": [4, 16, 25, 28], "\ud30c\ub77c\ubbf8\ud130\ub294": [4, 7, 25, 55], "\uace0\uc815\ud568\uc73c\ub85c\uc368": 4, "finetuning\uc2dc": [4, 36], "forget": 4, "kirkpatrick": 4, "2017": [4, 44], "\uc608\ubc29\ud560": 4, "weight": [4, 5, 7, 10, 12, 14, 15, 18, 20, 22, 24, 25, 28, 31, 32, 36, 39, 50, 53, 54, 55], "vartriangl": [4, 36], "m": [4, 7, 11, 12, 13, 17, 19, 20, 26, 27, 37], "b": [4, 5, 6, 7, 9, 13, 15, 17, 21, 22, 25, 26, 28, 29, 32, 33, 38, 42, 44, 45, 49, 56], "ar": [4, 11, 12, 13, 28, 35, 55], "pair": [4, 11, 24, 28, 31, 33, 34, 36, 41, 49, 53, 55], "matric": [4, 55], "hyper": [4, 12, 20, 23, 28, 37], "paramet": [4, 6, 9, 12, 13, 17, 22, 27, 28, 30, 32, 36, 37, 43, 45, 47, 49, 51, 52, 54, 55], "layer": [4, 5, 7, 9, 12, 13, 14, 15, 20, 24, 28, 30, 31, 32, 33, 38, 41, 42, 44, 48, 49, 52, 53, 55], "attent": [4, 5, 6, 10, 13, 14, 16, 19, 20, 21, 22, 24, 26, 27, 28, 30, 31, 35, 37, 41, 45, 46, 49, 55], "layer\uc5d0\uc11c\ub9cc": 4, "\uc0ac\uc6a9\ud560\uc218": 4, "\uc544\ub2c8\uc9c0\ub9cc": [4, 33], "\uc2e4\uc81c\ub85c\ub294": [4, 10, 12, 19], "layer\uc5d0\uc11c": [4, 5, 20, 24], "\uc0ac\uc6a9\ub41c\ub2e4": [4, 7, 13, 19, 25, 35, 36], "lora\ub97c": [4, 25, 28], "tuning\uc2dc": 4, "cost": [4, 41, 43, 51, 52, 54], "storag": [4, 28, 55], "\uc808\uc57d\ud560": [4, 10], "architectur": [4, 11, 12, 20, 22, 24, 29, 30, 32, 38, 40, 43, 48, 49, 50, 53, 56], "overal": [4, 9, 11, 20, 26, 49, 53], "\uc67c\ucabd": [4, 9, 17, 19, 22, 29, 35, 36, 39], "\uadf8\ub9bc\uc758": [4, 8, 9, 11, 15, 52], "\ud558\ub298\uc0c9": 4, "\uc774\uace0": [4, 6, 11, 15, 24, 48, 51, 55, 56], "\ucd08\ub85d\uc0c9": 4, "\uc601\uc5ed\uc774": [4, 32], "\uc774\ub2e4": [4, 6, 13, 15, 18, 28, 31, 36, 42, 46, 51], "t2i\ubaa8\ub378\uc5d0": 4, "\uc0bd\uc785\ud558\uc5ec": 4, "animatediff\uc5d0\ub294": 4, "\ud559\uc2b5\ud574\uc57c": [4, 53], "3\uac1c\uc758": [4, 29], "\ubaa8\ub4c8": [4, 16, 19, 29], "data\uc640": 4, "data\uac04\uc758": 4, "\uac04\uadf9\uc744": 4, "\uc904\uc5ec\uc8fc\uae30": 4, "\ud559\uc2b5\uacfc\uc815\uc5d0\ub9cc": 4, "\ud559\uc2b5\ud558\uae30": [4, 6, 8, 11, 14, 19, 25, 36], "\ud328\ud134": [4, 19], "\uc6cc\ud06c": 4, "\uc870\uc815\ud558\uae30": 4, "\uc704\ud55c\uac83": 4, "\ubaa8\ub4c8\uc740": 4, "\ub530\ub85c\ub530\ub85c": 4, "\ud559\uc2b5\uc2dc\ud0a4\uba70": [4, 24], "\uac01\uac01\uc744": 4, "\ud559\uc2b5\uc2dc\ud0ac\ub54c": 4, "\uc601\uc5ed\uc740": 4, "freez": [4, 10, 16, 22, 25, 28, 30, 31, 41, 47, 49], "\uc2dc\ud0a8\ub2e4": [4, 15], "\ud559\uc2b5\uc2dc": [4, 17, 24], "object": [4, 5, 6, 11, 12, 18, 26, 30, 31, 32, 33, 36, 40, 42, 43, 45, 47, 50, 52, 53], "sd\uacfc": 4, "\uac19\ub2e4": [4, 6, 15, 21, 24, 25, 26, 27, 30, 33, 36, 40, 46, 49, 51], "\ub370\uc774\ud130\uc14b\uc740": [4, 21, 42, 49, 56], "\uc218\uc9d1\ud558\uae30": 4, "bain": 4, "laion": [4, 16, 24, 27, 29, 37, 42, 51, 55], "aestet": 4, "schuhmann": 4, "\ud488\uc9c8\ucc28\uc774\ub3c4": 4, "\ud07c\uc744": 4, "\uc54c": [4, 6, 8, 15, 21, 26, 36, 39, 49, 51], "\uac1c\ubcc4": [4, 19, 26, 36, 41, 44], "\ub2e4\ub8e8\uac8c": 4, "\ub418\uba74": [4, 6, 24, 30, 41], "watermark\ub4f1\uc744": 4, "\ud3ec\ud568\ud558\uace0": [4, 7, 36, 41, 42], "\ud6c8\ub828\ud560": 4, "\uc14b\uc758": [4, 35], "\ud488\uc9c8\uc740": [4, 24, 33], "\ubb34\uc2dc\ud560": [4, 6], "\uc5c6\uc744": [4, 38, 49, 50], "\ub9cc\ud07c\uc758": [4, 6, 39], "\ucc28\uc774\uac00": [4, 13, 15, 23, 25, 29, 36, 39, 40, 51], "\uc9c1\uc811\uc801\uc73c\ub85c": [4, 26, 36, 45], "\ub370\uc774\ud130\uc14b\uc744": [4, 19, 21, 22, 29, 33, 36, 38, 42], "\uc560\ub2c8\uba54\uc774\uc158\uc758": [4, 19], "\uc81c\ud55c": [4, 18, 32, 33], "\ub420": [4, 6, 19, 31, 38, 39, 40, 41, 54], "\ud488\uc9c8\ub85c": 4, "\uc778\ud574": [4, 6, 16, 17, 22, 24, 27, 31, 33, 35, 41, 44, 49], "\ud53c\ud558\uace0": 4, "t2i\uc758": 4, "\uc9c0\uc2dd\uc744": [4, 29], "\ubcf4\uc804\ud558\uae30": 4, "\ub124\ud2b8\uc6cc\ud06c\ub97c": [4, 8, 17, 19, 22, 24, 29, 32, 52], "\ubd84\ub9ac\ud558\uc5ec": [4, 24], "\ub3c4\uba54\uc778": [4, 6, 22], "\uc601\uc0c1": [4, 5, 11, 19, 32, 41], "\uc815\ubcf4\uc5d0": [4, 32, 40, 41, 53], "\ub9de\uac8c": [4, 7, 11, 15, 17, 19, 21, 24, 26, 28, 31, 35, 39], "\ud53c\ud305\ud558\ub294": 4, "\uc2dc\uc5d0\ub294": [4, 10, 13, 19, 23, 36, 45, 55], "\uc81c\uac70\ud558\uc600\uc73c\uba70": 4, "\uc55e\uc11c": [4, 6, 24, 30, 36, 43, 44, 52], "gap\uc5d0": 4, "\uc758\ud55c": [4, 12, 40], "\ubd80\uc815\uc801": [4, 8], "\uc81c\uac70\ud558\ub294\ub370": 4, "\ud6a8\uacfc\uc801\uc774\ub77c\ub294": [4, 14], "layer\ub294": [4, 20, 21, 31], "\ud65c\uc6a9\ud588\uc73c\uba70": [4, 15], "layer\ub4e4\uc744": 4, "fig": [4, 10, 12, 18, 23, 32], "3\uacfc": 4, "\ucd94\uac00\ud558\uc600\ub2e4": [4, 15], "queri": [4, 7, 10, 16, 24, 25, 27, 28, 32, 40], "projection\uc744": 4, "\uc608\ub85c": [4, 39], "\uc0b4\ud3b4\ubcf4\uba74": [4, 6, 22, 34, 51], "OF": 4, "larg": [4, 7, 10, 14, 17, 18, 25, 26, 30, 39, 42, 55], "qz": 4, "adapterlay": 4, "cdot": [4, 7, 15, 19, 24, 25, 26, 27, 32, 36, 43, 48, 51, 53, 55], "tz": 4, "intern": [4, 20, 55], "\uc0c1\uc218\ub85c": [4, 13], "time\uc5d0": 4, "adapter\uc758": [4, 24], "\uc601\ud5a5\ub825\uc744": [4, 24], "\uc870\uc808\ud55c\ub2e4": [4, 19], "\uae30\ubcf8\uac12\uc740": 4, "\ud6a8\uacfc\ub97c": [4, 8, 15, 30, 49, 50], "\uc644\uc804\ud788": [4, 15, 17, 34, 44, 54], "\uc81c\uac70\ud558\uace0": 4, "\uc2f6\ub2e4\uba74": [4, 42], "0\uc73c\ub85c": [4, 7, 9, 14, 15, 19, 24, 39, 46, 50], "freeze\ud558\uace0": 4, "\ud30c\ub77c\ubbf8\ud130\ub4e4\ub9cc": 4, "\ub370\uc774\ud130\uc14b\uc73c\ub85c": [4, 24, 36, 42, 53], "\ub79c\ub364\ud558\uac8c": [4, 18, 24, 30, 31, 36, 47], "\uc0d8\ud50c\ud55c": 4, "static": [4, 30], "frame\ub4e4\uc744": 4, "\ucd5c\uc801\ud654\ud588\ub2e4": 4, "eq": [4, 5, 16, 51], "\uc0ac\uc6a9\ud588\ub2e4": [4, 7, 19, 24, 25, 33, 35, 36, 41, 51], "\uc544\uc9c1\uae4c\uc9c0\ub294": [4, 25], "dynamics\ub97c": 4, "\ubaa8\ub378\uacfc": [4, 8, 17, 19, 20, 24, 26, 29, 33, 36, 39, 44, 53], "\uacf5\uc720\ud558\ub294": 4, "dimension\uc0c1\uc758": 4, "\uc2dc\uac04\ucd95\uc73c\ub85c": 4, "\ubaa8\ub378\ub9c1": [4, 33], "\ud558\uae30": [4, 6, 8, 13, 17, 19, 22, 25, 26, 30, 33, 41, 43, 45, 47, 51, 54, 55, 56], "2\uac00\uc9c0": [4, 5, 8, 18, 36, 43, 45, 49, 53], "\ub2e8\uacc4\uac00": [4, 17, 26, 33], "\ud544\uc694\ud558\ub2e4": [4, 15, 19, 33, 42], "3d": [4, 8, 13, 19, 32, 41, 57], "\ub370\uc774\ud130\uc5d0": [4, 7, 27, 29, 35, 38, 39, 51], "\ud655\uc7a5\uc2dc\ucf1c\uc57c": 4, "inflat": 4, "\uc2dc\uac04\ucd95\uc0c1\uc73c\ub85c": 4, "\uc815\ubcf4\uc758": [4, 5, 19, 37], "\ud750\ub984\uc744": 4, "\ub9cc\ub4e4\uae30": [4, 18, 26, 29, 33], "sub": 4, "\ub808\uc774\uc5b4\ub294": [4, 19, 22, 32], "\uadf8\ub9bc": [4, 35, 39, 40, 52], "\uc0ac\uc804\uc9c0\uc2dd": 4, "content": [4, 5, 24, 30, 31, 37, 41, 44, 50, 52], "\ud3ec\ucc29\ud560\uc218": 4, "\ud65c\uc6a9": [4, 5, 6, 8, 14, 17, 18, 30, 33, 36, 41, 44, 45, 46], "\uc720\uc9c0": [4, 5, 11, 17, 53, 55], "\uc704\ud574\uc11c": [4, 5, 6, 11, 16, 18, 21, 26, 30, 36, 38, 42, 43, 53], "\ub3d9\uc77c": [4, 5, 18, 32, 36, 37], "video\ub97c": [4, 29, 41], "\ub2e4\ub8e8\uace0\uc790": 4, "\ub3c5\ub9bd\uc801\uc73c\ub85c": [4, 5, 17, 19, 29, 43], "\ub0b4\ubc84\ub824\ub450\uace0": 4, "\ud655\uc7a5\uc2dc\ud0a4\ub294": [4, 7], "\ubc29\ud5a5\uc774": [4, 17, 19], "\uc120\ud638\ub41c\ub2e4": 4, "\uc5f0\uad6c": [4, 10, 17, 19, 23, 29, 31, 34, 53], "\ucc38\uace0\ud558\uc5ec": 4, "5d": [4, 32], "tensor": [4, 28, 30, 42, 48, 55], "time": [4, 5, 6, 9, 10, 12, 13, 15, 17, 20, 21, 23, 24, 25, 26, 27, 28, 29, 31, 32, 35, 37, 42, 43, 45, 46, 49, 52, 53, 55], "c": [4, 5, 8, 9, 10, 13, 14, 15, 17, 18, 19, 20, 21, 22, 24, 25, 27, 29, 32, 36, 37, 38, 42, 47, 51, 53, 55], "f": [4, 9, 12, 13, 15, 17, 21, 25, 27, 28, 29, 36, 42, 43, 44, 47, 48, 51, 54], "\uc785\ub825\uc73c\ub85c": [4, 5, 18, 19, 20, 22, 24, 25, 29, 31, 32, 36, 39, 51], "\ubc1b\ub3c4\ub85d": 4, "\uc218\uc815\ud588\ub2e4": 4, "batch": [4, 5, 7, 10, 14, 16, 24, 28, 29, 31, 32, 44, 47, 48, 52, 54, 55], "frame\uc744": [4, 29], "\ub73b\ud55c\ub2e4": [4, 36], "\ub0b4\ubd80": [4, 26, 33, 35], "map\uc774": 4, "\ub808\uc774\uc5b4\ub97c": [4, 19, 29, 33, 35], "\uc9c0\ub098\uac08\ub54c\ub294": 4, "\uc2dc\uac04": [4, 6, 8, 11, 13, 16, 17, 19, 28, 29, 31, 33, 41], "\ucd95\uc744": [4, 35], "\uc758\ubbf8\ud558\ub294": [4, 10, 39, 56], "reshaping\uc744": 4, "\ubb34\uc2dc\ud55c\ub2e4": 4, "4d": 4, "bf": [4, 7, 46], "\ub808\uc774\uba38": 4, "\ucc98\ub7fc": [4, 6, 11, 17, 25, 31, 38, 43, 44, 45, 50, 55], "\ucc98\ub9ac\ud560": [4, 18, 33, 36], "\ubc18\uba74\uc5d0": [4, 20, 26, 43, 44, 48, 53, 54, 56], "\uacf5\uac04\ucd95": 4, "reshaping\ud558\uc5ec": 4, "bhw": 4, "\uc5f0\uad6c\ub4e4\uc740": [4, 17, 24, 33, 37], "modeling\uc758": 4, "\ud0d0\uad6c\ud558\uace0": [4, 19], "animatediff\uc5d0\uc11c\ub294": 4, "\ucc28\uc6a9\ud558\uc5ec": 4, "\uc2dc\uac04\ucd95\uc0c1\uc5d0\uc11c": 4, "\ub3d9\uc791\ud558\ub3c4\ub85d": 4, "\uc218\uc815\uc744": [4, 17, 22, 29], "\uac70\uccd0": [4, 30, 31, 32, 43, 44, 48, 53, 54], "design\ud588\ub2e4": 4, "\uc774\ud558": [4, 6, 35], "\uc2e4\ud5d8\uc744": [4, 6, 8, 11, 15, 17, 21, 24, 26, 32, 53, 54], "\uad6c\uc870\uac00": [4, 14, 38], "\ubaa8\ub378\ub9c1\ud558\ub294\ub370": 4, "\uc801\ud569\ud558\ub2e4\ub294": 4, "\ubc1c\uacac\ud588\ub2e4": [4, 7, 25, 33], "3\uc744": [4, 28], "transformer\uac00": 4, "\uc2dc\uac04\ucd95\uc5d0\uc11c": 4, "\ub3d9\uc791\ud558\ub294": [4, 10, 27], "block\uc73c\ub85c": [4, 16], "\uc774\ub8e8\uc5b4\uc9c4\uac83\uc744": 4, "\ubcfc\uc218": [4, 24, 36], "sinusoid": [4, 13, 41], "encoding\uc744": [4, 21, 33], "\uc560\ub2c8\uba54\uc774\uc158\uc0c1\uc758": 4, "\ud504\ub808\uc784\uc758": [4, 19, 41], "\uc2dc\uac04\uc801": [4, 19, 25, 29, 41], "\uc704\uce58\uc815\ubcf4\ub97c": 4, "\ub098\ud0c0\ub0b4\uace0\uc790": 4, "\ub300\ub85c": 4, "\uc785\ub825\ud06c\uae30\ub294": 4, "\uc870\uc808\ud558\uc600\ub2e4": 4, "\ud3bc\uce58\uace0\uc790": 4, "\ud560\ub54c\ub294": 4, "\ub2e4\uc74c\uacfc": [4, 6, 8, 9, 11, 15, 19, 21, 22, 24, 25, 26, 27, 33, 36, 38, 40, 43, 44, 47, 48, 51, 52, 53, 54, 55, 56], "\uae38\uc774": [4, 5, 16, 33, 41], "\ud06c\uae30": [4, 15, 27, 31, 35, 36, 51], "z_1": [4, 54], "z_f": 4, "z_i": [4, 11, 27], "sequence\ub85c": [4, 36], "\ub2e4\ub8f0\uc218": [4, 35], "\ubca1\ud130\uac00": 4, "block\uc744": [4, 14, 16, 21, 46], "\ud1b5\uacfc\ud558\uba74": [4, 8], "z_": [4, 8, 18, 19, 25, 45, 51, 54, 55], "out": [4, 9, 13, 20, 25, 42, 43, 51, 55, 56], "v": [4, 6, 7, 10, 11, 13, 17, 22, 24, 25, 26, 27, 28, 30, 37, 40, 45, 47, 48, 51, 52, 54, 55], "softmax": [4, 13, 24, 26, 27, 45], "qk": [4, 24, 27], "kz": 4, "vz": 4, "\ubd84\ub9ac\ub41c": [4, 37], "\uc758\ubbf8\ud55c\ub2e4": [4, 7, 15, 25, 26, 33, 34, 36, 51], "mechanism\uc744": [4, 24, 37], "\ud604": 4, "\uc0dd\uc131\uc5d0": [4, 7, 8, 16, 20, 21, 22, 24, 33, 36, 39, 40, 55], "\ud504\ub808\uc784\uc73c\ub85c": [4, 19, 41], "\ucd94\ucd9c\ub41c": [4, 19, 22, 31, 42], "\uac83\uc774": [4, 5, 6, 8, 9, 11, 13, 14, 15, 18, 19, 21, 22, 24, 25, 26, 27, 28, 30, 31, 33, 36, 38, 39, 40, 41, 43, 44, 46, 47, 49, 50, 51, 54, 55, 56], "\uac00\ub2a5\ud558\ub2e4": [4, 7, 12, 13, 16, 20, 24, 25, 33, 34, 36, 37, 41, 46], "\uacb0\uacfc\uc801\uc73c\ub85c": [4, 8, 17, 22, 24, 44], "\uac1c\ubcc4\uc801\uc73c\ub85c": [4, 32], "\ud655\uc7a5\ud558\uc5ec": [4, 17, 25, 33], "\ucd94\uac00\ud55c": [4, 17, 51], "animatediff\uac00": 4, "\uc2dc\uac04\uc5d0": [4, 41], "content\uc758": 4, "\ud3ec\ucc29\ud558\uae30": [4, 19], "\uc81c\uc791\ud558\ub3c4\ub85d": 4, "block\uc804\uc5d0": 4, "\uc78a\uc5b4\uc11c\ub294": 4, "\uc548\ub41c\ub2e4": 4, "\uc790\uccb4\uac00": [4, 5, 8, 23, 30, 40], "frame\uc758": 4, "\uc21c\uc11c\ub97c": 4, "\uc54c\uace0": [4, 6, 17], "\uc544\ub2c8\ub2e4": [4, 31, 42], "\ub123\uc74c\uc73c\ub85c": 4, "\ubb38\uc81c\ub4e4\uc744": [4, 30], "transformer\uc758": [4, 25, 28, 36, 39, 45], "\ub808\uc774\uc5b4\uc758": [4, 22, 33], "\ud558\uc600\uc73c\uba70": [4, 33], "residu": [4, 5, 13, 14, 21, 22, 31, 44, 46, 47, 49, 55], "connection\uc744": [4, 32, 46], "\uc2dc\uc791\uc2dc\uc5d0": 4, "ident": [4, 14, 16, 19, 29, 38, 47, 53], "mapping\uc73c\ub85c": [4, 36], "\uc804\ubc18\uc801\uc778": [4, 19, 26, 39, 40, 47], "\uc0ac\uc804\ud559\uc2b5\ud558\ub354\ub77c\ub3c4": 4, "\ub3d9\uc791": [4, 23, 24, 29], "\ud328\ud134\uc5d0": 4, "\uc801\uc6a9\uc5d0": [4, 32], "zoom": [4, 10, 30], "pan": 4, "roll": 4, "\uc0ac\uc804\ud559\uc2b5\uc744": 4, "\uac10\ub2f9\ud560": 4, "\uc5c6\uc5b4": [4, 5, 13, 18, 49, 52, 56], "\uc561\uc158\uc5d0": 4, "\ub9de\ucdb0": [4, 7, 21, 24, 45], "\ud29c\ub2dd\ud558\uace0\uc790": 4, "\uc0ac\uc6a9\uc790\ub97c": 4, "\ud69f\uc218\ub85c\ub3c4": 4, "\ud6a8\uc728\uc801\uc73c\ub85c": [4, 18, 25, 27, 33, 41, 51, 54], "\uc911\uc694\ud558\ub2e4": [4, 15, 19], "animatediff\uc5d0": 4, "\uc801\uc6a9\ud588\ub2e4": [4, 7, 41], "\uad6c\uc870\uc640": [4, 19], "\uc81c\ud55c\ub41c": [4, 17, 19, 22, 27], "layers\uc5d0": [4, 38], "layers\ub97c": 4, "personalization\uc744": [4, 22], "\uba87": [4, 28, 33, 40, 43, 44, 48, 51], "\uc885\uc758": 4, "\ubc29\uc2dd\uc73c\ub85c": [4, 6, 8, 11, 13, 15, 22, 24, 25, 28, 31, 32, 33, 36, 37, 41, 43, 46, 48, 51], "\uc9c4\ud589\ud558\uc600\uc73c\uba70": 4, "rule": [4, 11, 54], "augmentation\uc744": [4, 39], "videos\ub97c": 4, "\uc5bb\uc5c8\ub2e4": [4, 7, 24, 33, 35], "\uc608\ub97c": [4, 8, 15, 24, 26, 27, 36, 38, 42, 43, 44, 50], "\ub4e4\uc5b4": [4, 8, 15, 24, 26, 27, 36, 38, 42, 43, 44], "\uc5bb\uae30": [4, 7, 33, 36], "\uc810\ucc28": [4, 24, 54], "\uc904\uc774\uac70\ub098": 4, "\ub298\ub824\uac00\uba70": 4, "\uc9c4\ud589\ud588\ub2e4": [4, 7, 24, 41, 51], "motionlora\ub294": 4, "\uc815\ub3c4\uc758": [4, 40], "2000\ubc88\uc758": 4, "\ud6c8\ub828\ud69f\uc218\ub85c": 4, "\ud30c\uc778\ud29c\ub2dd\ud588\uc744\ub54c\ub3c4": 4, "\uad1c\ucc2e\uc740": [4, 6, 40], "property\ub85c": 4, "composit": [4, 18, 30, 32, 42], "capability\ub97c": [4, 24], "\uac01\uac01\uc774": 4, "time\uc0c1\uc758": 4, "effect\ub97c": 4, "\uc735\ud569\ud558\uae30\uc704\ud574": 4, "\ud611\ub825": 4, "combin": [4, 6, 11, 15], "\uac00\ub2a5\ud55c": [4, 6, 7, 8, 12, 13, 19, 27, 32, 33, 36, 37, 42, 43, 48, 51, 53, 55], "3\uac1c": [4, 47], "\ubaa8\ub4c8\uc758": 4, "objective\ub294": [4, 24], "\uc57d\uac04\uc529": 4, "\ub2e4\ub974\ub2e4": [4, 46], "adapter\ub294": [4, 24], "sd\uc758": 4, "loss\uc778": 4, "\ud559\uc2b5\ud55c\ub2e4": [4, 7, 13, 15, 19, 24, 29, 33, 35, 36, 41, 42, 51], "\uc5ed\ud560\uc744": [4, 15, 22, 23, 24, 26, 55], "module\uacfc": 4, "lora\uc758": [4, 22, 25], "data\uc5d0": [4, 13, 15, 18, 20], "\ucc28\uc6d0\uc744": [4, 13, 27, 31, 41, 53], "\uc218\uc6a9\ud558\uae30": 4, "\uc57d\uac04": [4, 23], "\uc218\uc815\ub41c": [4, 8, 33], "objective\ub97c": [4, 7, 13, 24, 28, 36, 40], "encoder\ub97c": [4, 5, 15, 16, 24, 35, 36], "\uc778\ucf54\ub529\ub41c\ub2e4": 4, "code\ub294": [4, 25, 36], "schedule\uc5d0": 4, "\ub178\uc774\uc988\uac00": [4, 13, 17, 33, 36], "\uc785\ub825\uc740": 4, "codes\uc640": 4, "\uc30d\uc774\ub418\ub294": 4, "prompts\uc774\uba70": 4, "process\uc5d0\uc11c": [4, 12, 15, 23, 34, 36, 46], "\ub178\uc774\uc988\ub97c": [4, 6, 13, 17, 19, 33, 36, 39, 42], "\uc608\uce21\ud55c\ub2e4": [4, 15, 21, 33, 36, 46], "\ucd5c\uc885": [4, 7, 13, 14, 17, 19, 20, 24, 26, 29, 32, 33, 35, 36, 42, 53, 55], "2_2": [4, 36], "\ubaa8\ub4c8\ub4e4": 4, "\ud0c0\uac9f\uc744": 4, "\uc2dc\ud0a8\ub4a4": 4, "\ud559\uc2b5\ud588\ub2e4": [4, 7, 24, 35, 41], "inference\uc2dc\uc5d0\ub294": 4, "model\ub294": 4, "\uc124\uba85\ud55c\ub300\ub85c": 4, "inflate\ub418\uba70": 4, "\uc0dd\uc131\ud55c\ub2e4": [4, 7, 19, 26, 29, 33, 35, 41], "inference\uc2dc": [4, 24, 28], "\uadf8\ub0e5": [4, 24], "\ubc30\uc81c\ud558\uc9c0": 4, "injection\ud558\uc600\uc73c\uba70": 4, "\uc601\ud5a5\ub825\uc740": 4, "4\uc758": [4, 44], "\uc870\uc808\ud588\ub2e4": 4, "3\uc758": [4, 12, 45], "study\uc5d0\uc11c": 4, "\uac12\uc5d0": [4, 6, 32, 46, 51], "\uacb0\uacfc\uc758": [4, 22, 40], "\ucc28\uc774\ub97c": [4, 19, 23, 36, 43, 47, 51, 53], "frames\uc740": 4, "process\uc640": [4, 5, 15, 21], "codes\ub97c": 4, "\ub514\ucf54\ub529": [4, 19, 31], "\ud568\uc73c\ub85c\uc368": [4, 6, 17, 18, 40, 50, 52, 53], "\uc5bb\uc744\uc218": [4, 36], "5\uc5d0": 4, "\uc801\uc6a9\ud558\uc5ec": [4, 8, 29, 33, 51, 53, 54], "\uc0ac\uc6a9\ud558\uc600\ub2e4": [4, 19, 25, 29, 36], "\uc790\uc138\ud55c": [4, 11, 15, 25, 39, 54, 55], "\uc0ac\ud56d\uc740": 4, "supplementari": [4, 17], "materi": [4, 17, 18, 50], "\ud655\uc778\ud574\uc8fc\uc138\uc694": 4, "user": [4, 8, 17, 22, 37, 40, 52], "smooth": [4, 5, 18], "\ub4f1\uc218\ub97c": 4, "\uc870\uc0ac\ud588\ub2e4": 4, "averag": [4, 11, 12, 14, 25, 43, 51, 52, 54], "aur": 4, "\uc810\uc218\ub97c": [4, 7, 11, 21, 45, 49], "\uac00\uc9c0\uba74": 4, "prefer": [4, 10, 49], "metric": [4, 11, 12, 13, 17, 34, 41, 43, 44, 47, 50], "paper\uc5d0\uc11c": [4, 37], "\uc5b8\uae09\ud588\ub358": [4, 30], "\ud14d\uc2a4\ud2b8\uc30d\uc744": 4, "\ub3d9\uc2dc\uc5d0": [4, 5, 7, 8, 10, 17, 27, 30, 32, 35, 37, 52, 53], "\ud3c9\uac00\uc9c0\ud45c\uc774\ub2e4": 4, "frames\uc640": 4, "\uc0ac\uc774": 4, "\uacc4\uc0b0\ud55c": [4, 13, 48], "score\ub294": [4, 34, 39], "\ubca1\ud130\ub4e4": 4, "\ucf54\uc0ac\uc778": 4, "\uc784\ubca0\ub529\uacfc": [4, 33, 40], "\uc784\ubca0\ub529": [4, 8, 14, 17, 19, 29, 33, 35, 40], "\uc720\uc0ac\ub3c4": [4, 8, 21, 27, 29], "\uc560\ub2c8\uba54\uc774\uc158\uc774": [4, 19], "\uc5c6\uc73c\ubbc0\ub85c": [4, 23], "image\uc640": [4, 5, 10, 15, 21, 24, 44, 45], "\uc5f0\uc18d\ub41c": [4, 19], "\uc30d\uc758": 4, "\uc784\ubca0\ub529\uc758": 4, "scaler\ub97c": 4, "adapter\uc5d0": [4, 16], "\uc81c\uac70\ud55c": [4, 27], "\uadf8\ub9bc\uc740": [4, 8, 9, 11, 15, 25, 27, 35, 38, 39], "\uccab\ubc88\uc9f8": [4, 43, 52, 53, 54], "\ud504\ub808\uc784\uc774\ub2e4": 4, "\uc81c\uac70\ud588\uc744\ub54c": 4, "\ub192\uc544": [4, 10], "\ubcf4\uc774\ub294\ub370": 4, "adapter\uac00": 4, "\ud2b9\uc131\uc774\ub77c\uace0": 4, "watermark\ub098": 4, "\ube14\ub7ec": 4, "\ub4f1\uc744": [4, 14, 22, 25, 27, 32, 33, 44, 51], "\ud559\uc2b5\ud588\uae30": 4, "\ub54c\ubb38\uc774\ub2e4": [4, 20, 35, 36], "\ud559\uc2b5\uacfc\uc815\uc5d0": 4, "\ub3c4\uc6c0\uc774": [4, 19, 22, 30, 40, 45], "\ub418\uc5c8\uc74c\uc744": 4, "transformer\uad6c\uc870\uc640": 4, "convolution\uc778": 4, "\uad6c\uc870\uc758": [4, 20], "\ube44\uad50\ud588\ub2e4": [4, 33, 51], "\ubd84\uc57c\uc5d0\uc11c": [4, 6, 12, 20, 22, 23, 25, 37, 48, 51], "\uc790\uc8fc": [4, 47, 53, 54], "attention\ubd80\ubd84\uc744": 4, "1d": [4, 29], "convolution\uc73c\ub85c": 4, "\uad50\uccb4\ud558\uc5ec": [4, 18, 24], "\ud30c\ub77c\ubbf8\ud130\uac00": [4, 35], "\ub193\uc5ec\uc788\uc74c\uc744": 4, "convolut": [4, 5, 14, 20, 23, 31, 32, 38, 41, 44, 45, 53, 55], "\ub3d9\uc77c\ud558\uac8c": [4, 14, 17, 20, 22, 36, 39, 43, 50, 51, 52, 54, 55], "\ub193\uc558\uc9c0\ub9cc": 4, "\ube44\uad50\ud558\uc5ec": [4, 26, 29], "\uc81c\ub300\ub85c": [4, 14, 18, 38], "\ubc18\uc601\ud558\uc9c0": [4, 20, 26], "\ubabb\ud588\ub2e4": [4, 23], "efficiency\uc640": 4, "\uce21\uba74\uc5d0\uc11c": [4, 5, 19, 22, 26, 27, 33, 38], "\ud6a8\uc728\uc131\uc744": [4, 32], "\uc2dc\ud5d8\ud574\ubcf4\uc558\ub2e4": 4, "\uac1c\uc218\uc640": [4, 27], "\uc870\uc808\ud574\uac00\uba70": 4, "\ud559\uc2b5\uc2dc\ucf30\ub2e4": 4, "\ubaa8\ub378\ud559\uc2b5\uc744": 4, "\uc704\ud574\ub3c4": 4, "\ubc30\ud3ec\ub97c": 4, "\uc704\ud574\uc11c\ub3c4": 4, "\uc911\uc694\ud55c": [4, 5, 7, 11, 12, 17, 19, 23, 24, 26, 27, 29, 33, 36, 38, 40], "\ubd80\ubd84\uc774\ub2e4": [4, 26], "animatediff\ub294": 4, "\ube44\uad50\uc801": [4, 12, 20, 23, 24, 33, 36, 55], "\ud30c\ub77c\ubbf8\ud130": [4, 14, 24, 25, 27, 29, 35, 43, 47, 48, 52, 53, 56], "\uc801\ub4e4\ub54c\uc5d0\ub3c4": 4, "\ub9cc\ub4e4\uc218": 4, "\uc2e4\ud5d8\uc5d0\uc11c\ub294": [4, 6, 29], "\ubcf8\uac83\uc774\ub2e4": 4, "\uc5b4\ub835\uae30": [4, 26], "\uc801\uc6a9\ud558\uae30": [4, 26, 29], "\uc801\uc744\ub54c\uc5d0\ub3c4": 4, "\ud559\uc2b5\ud558\uace0\uc790": [4, 47], "\uc6c0\uc9c1\uc784\uc740": 4, "\uc788\uc5c8\uc73c\ub098": [4, 18], "\uadf9\ub3c4\ub85c": 4, "\uc801\uc744": [4, 38], "\uae09\uaca9\ud55c": 4, "\uc800\ud558\uac00": [4, 12, 22, 26, 46], "\uc788\uc5c8\ub2e4": [4, 21, 24, 25, 33, 35, 36, 37, 42, 51], "content\uc640": [4, 37], "prior\uc758": 4, "exist": [4, 29], "content\ub97c": 4, "\uc870\uc808\ud560": [4, 9, 24, 38, 39], "\ud655\uc778\ud558\uae30": [4, 9, 21], "controlnet\uacfc": [4, 9, 24], "\uacb0\ud569\ud558\uc5ec": [4, 17, 19, 32, 33, 39, 44, 51], "\uc0dd\uc131\uc2dc": [4, 36], "depth\ub97c": [4, 16], "ddim": [4, 5, 6, 11, 21, 24, 25, 27, 39, 40, 51, 54, 55], "inversion\uc744": [4, 15, 22], "\ub2e4\ub4ec\uc5b4\uc9c4": 4, "sequences\ub97c": 4, "\uc5bb\uace0": [4, 13, 30, 33], "\ucd5c\uc2e0": [4, 19, 33, 41], "\uc218\uc815": [4, 14, 18, 29], "randomli": [4, 18, 30, 47], "noise\ub97c": [4, 5, 12, 13, 15, 20, 23, 24, 31, 34, 36, 38, 51], "\uc0dd\uc131\uc744": [4, 5, 8, 9, 16, 17, 19, 22, 24, 26, 29, 31, 33, 40, 41, 49, 51, 52], "pipeline\uc778": 4, "\ub514\uc790\uc778\ud558\uc600\uc73c\uba70": 4, "\uc720\uc9c0\ud560": [4, 9, 22, 25, 34, 40, 44], "\uc788\uc73c\uba70": [4, 8, 9, 18, 22, 25, 26, 27, 29, 33, 39, 44, 45, 49, 52], "motion\uc73c\ub85c": 4, "\ud559\uc2b5\ub418\uba74": [4, 13, 40], "animate\uc2dc\ud0a4\uace0\uc790": 4, "\ud560\ub54c": [4, 25, 36, 47], "\ud6a8\uc728\uc131\uacfc": 4, "\uc0dd\uc131\ub2a5\ub825\uc744": 4, "\uac80\uc99d\ud588\ub2e4": 4, "\ub610": [4, 8, 10, 15, 20, 23, 24, 30, 33, 34, 35, 37, 42, 47, 50], "controllability\uce21\uba74\uc5d0\uc11c\ub3c4": 4, "\ud559\uc2b5\uc5c6\uc774": [4, 24, 25], "\ucde8\ud5a5\uc758": 4, "\uc6cc\ud06c\uc5d0": 4, "\uc2dc\ud0ac": [4, 29, 30, 39, 42], "\uc218\uc788\ub294": 4, "\ubca0\uc774\uc2a4": [4, 33, 39], "\ub77c\uc778\uc73c\ub85c\uc368": 4, "\ubc29\uba74\uc758": 4, "application\uc5d0": [4, 24], "\uc7a0\uc7ac\ub825\uc744": [4, 7, 51], "\ud074\ub9ad\ud558\uba74": 4, "gif\ub97c": 4, "\ubcf4\uc2e4": 4, "side": [4, 18, 32], "17": [4, 23, 46, 51], "year": [4, 11], "old": 4, "japanes": 4, "school": 4, "gpt\ub85c": 4, "\uadf8\ub9bc\uc744": [4, 6, 11, 15, 17, 26, 35, 38, 39, 44], "input\uc73c\ub85c": [4, 9, 20, 27, 32, 35, 38], "\uc0ac\uc6a9\ud568": [4, 17, 39, 40, 45, 49], "man": [4, 55], "black": 4, "pad": [4, 9, 13, 55], "jumper": 4, "\ucd2c\uc601\ud55c": [4, 11], "\uc785\ub825\ud55c": [4, 8], "\uc0ac\uc9c4\uc758": [4, 27], "\uc778\ubb3c\uc758": [4, 16, 19], "\uc778\uc885\uc774": 4, "\uc720\uc9c0\ub418\uc9c0": 4, "\uc54a\uc558\ub294\ub370": 4, "\ud559\uc2b5\ub370\uc774\ud130": 4, "\ubd88\uade0\ud615": 4, "\ub54c\ubb38\uc73c\ub85c": [4, 36], "\uc0ac\ub8cc\ub428": 4, "blond": 4, "blue": [4, 11, 45], "ey": 4, "\ub290\ub080\uc810": 4, "10m\uc774": 4, "\uc560\ub2c8\uba54\uc774\uc158\ud654\uc5d0": 4, "\ub370\uc774\ud130\uc14b\uc778\uc9c0": 4, "\ubaa8\ub974\uaca0\ub2e4": [4, 46], "\uc0ac\uc6a9\ud558\uc9c0": [4, 11, 22, 26, 30, 33, 36, 42, 43, 51, 53], "\uc810\uc774": [4, 15, 38, 39], "\uc544\uc27d\ub2e4": 4, "\uc2e4\uc9c8\uc801\uc73c\ub85c": 4, "motionlora\uc815\ub3c4\ub77c": 4, "\uc0ac\uc6a9\uc774": [4, 51], "\ud3b8\ub9ac\ud558\ub2e4": 4, "reproduction\uc774": 4, "\uc6a9\uc774\ud558\ub2e4": 4, "\uc704\ud574\uc11c\ub294": [4, 6, 18, 19, 26, 30, 36, 40, 42, 45, 55], "t2i\uac00": 4, "\uc81c\uc77c": [4, 27, 45, 46, 48, 55], "\ubd80\ubd84\uc774\ub77c\uace0": 4, "\ud560\uc218": [4, 25, 51], "\uc788\ub294\ub370": [4, 6, 15, 18, 34, 35, 36, 37, 42, 51, 55, 56], "\uc2a4\ud0c0\uc77c\uc758": [4, 18], "\uad6c\ud558\ub294": [4, 21, 26, 42, 48, 56], "\ub9de\uc9c0": 4, "\uc54a\uc73c\uba74": 4, "\ud074\ub9bd": [4, 29], "\ucd08\ubc18\uc5d0": [4, 23, 48], "\uae09\uaca9\ud788": 4, "\ubcc0\ud654\ud558\ub294": 4, "\ubd80\ubd84\uc774": [4, 5, 15, 20, 36, 38, 39], "\uc0dd\uae34\ub2e4": [4, 25, 35, 49], "consist": [5, 12, 15, 30, 41, 53], "control": [5, 7, 16, 17, 18, 24, 26, 30, 37, 38, 53, 55], "synthesi": [5, 6, 8, 21, 27, 31, 34, 35, 38, 43, 47, 52], "charact": [5, 47], "2311": [5, 16], "17117": [5, 16], "offici": [5, 8, 10, 12, 16, 18, 20, 25, 30, 36, 45, 46], "nonoffici": [5, 18], "humanaigc": 5, "geonhak": [5, 16, 18, 57], "song": [5, 15, 16, 18, 43, 51, 54, 57], "march": [5, 16, 36, 52], "13": [5, 6, 16, 20, 30, 31, 35, 36, 49, 51], "exampl": [5, 11, 12, 17, 26, 38, 40, 47, 50], "figur": [5, 6, 12, 16, 18, 20, 23, 30, 31, 33, 35, 36, 37, 39, 40, 42, 44, 47, 49], "\uc8fc\ub958\uac00": 5, "\ub418\uc5c8\uc9c0\ub9cc": 5, "\uc601\uc5ed\uc5d0\uc11c\ub294": 5, "\uc5b4\ub824\uc6c0\uc774": [5, 8, 17, 23, 32, 53], "animation\uc5d0\uc11c": 5, "\uc0c1\uc138": 5, "\uc720\uc9c0\ud558\ub294": [5, 19, 28], "\ubb38\uc81c\uc774\ub2e4": 5, "image\uc758": [5, 10, 15, 20, 21, 23, 24, 27, 34, 37, 40], "\ubcf5\uc7a1\ud55c": [5, 6, 11, 19, 21, 24, 26, 27, 32, 33, 39], "appear": [5, 16, 47], "\ud2b9\uc9d5\uc758": 5, "\uc720\uc9c0\ud558\uae30": [5, 9, 14], "feature\uacfc": [5, 7, 35], "\ud1b5\ud569\ud560": 5, "referencenet": 5, "\uc124\uacc4": [5, 14, 23], "controllability\uc640": 5, "continuity\uc744": 5, "pose": [5, 9, 16, 37, 38, 47, 55], "guider": 5, "\ud504\ub808\uc784\uac04": 5, "\ubd80\ub4dc\ub7ec\uc6b4": 5, "\uc804\uc774\ub97c": 5, "effect": [5, 15, 32, 37, 38, 50, 55], "\uc784\uc758\uc758": [5, 6, 15, 17, 18, 21, 40, 43], "\ub300\ud574\uc11c\ub3c4": [5, 6, 13, 15, 21, 29, 39, 40, 44, 46, 51, 52, 53], "animate\ud560": 5, "\uc788\uace0": [5, 14, 15, 17, 20, 21, 22, 27, 30, 31, 32, 34, 41, 43, 48, 50, 54, 55, 56], "\uc6b0\uc6d4\uc131\uc744": 5, "histori": 5, "animation\uc740": 5, "\uc774\ubbf8\uc9c0\ub85c\ubd80\ud130": [5, 11, 17, 19, 32], "\uc0ac\uc2e4\uc801\uc778": [5, 19], "animate\ud558\ub294": 5, "\uc791\uc5c5\uc73c\ub85c": 5, "gan\uc744": [5, 19, 20, 33, 37, 38], "\uc9c4\ud589\ub418\uc5b4\uc654\ub2e4": 5, "\uadf8\ub7ec\ub098": [5, 15, 16, 18, 19, 25, 40, 51, 54], "\ub610\ub294": [5, 7, 16, 19, 21, 22, 29, 32, 33, 34, 40, 44], "\ube44\ub514\uc624\ub294": 5, "local": [5, 20, 30, 32, 33, 35, 37, 41], "detail": [5, 9, 11, 16, 17, 20, 22, 31, 37, 42], "semant": [5, 6, 11, 22, 24, 26, 27, 30, 35, 40, 41, 44, 55], "inconsist": 5, "instabl": [5, 43], "\ub110\ub9ac": [5, 19], "\uc0ac\uc6a9\ub418\uae30\uc5d0\ub294": 5, "\uc788\uc5b4\uc654\ub2e4": 5, "\uc6b0\uc218\uc131\uc5d0": 5, "task\uc5d0": [5, 7, 9, 25, 28, 40], "\ud65c\uc6a9\ud558\ub824\ub294": [5, 37], "dreampos": 5, "23": [5, 12, 44], "04": 5, "\ud655\uc7a5\ud55c": 5, "fashion": [5, 30], "\ud569\uc131\uc744": [5, 18, 27], "\uac00\ub2a5\ud558\ub294\ub370": 5, "\ucd08\uc810\uc744": [5, 33, 40, 56], "\ub9de\ucdc4\ub2e4": 5, "clip\uacfc": [5, 19], "feature\ub97c": [5, 20, 24, 31], "\ud1b5\ud569\ud55c": [5, 11, 26], "adpatar": 5, "module\ub97c": 5, "sample\uc5d0": [5, 32], "finetuning\uc774": [5, 25], "\ud544\uc694\ud558\uace0": [5, 19, 31, 34], "\uc6b4\uc6a9": 5, "\ud6a8\uc728\uc774": 5, "disco": 5, "07": [5, 23], "\uc218\uc815\ud558\uc5ec": 5, "danc": [5, 16], "\uc9c4\ud589": [5, 6, 13, 16, 17, 18, 31, 37, 45, 46, 49, 54], "controlnet\uc744": [5, 16], "\ud1b5\ud569": [5, 6, 16], "\uad6c\ucd95": [5, 6, 17, 41, 45, 55], "\ubcf4\uc874\uc5d0": [5, 19], "\uc5b4\ub824\uc6c0\uc744": [5, 16, 19, 21, 36, 37, 45], "\uacaa\uace0": [5, 21], "frame\uac04": [5, 16], "jitter": [5, 30], "issu": [5, 17, 45], "\uad00\uc810\uc5d0\uc11c\uc758": 5, "generation\uc5d0": [5, 8, 21], "\ud488\uc9c8\uacfc": [5, 19, 29], "\ub2e4\uc591\uc131\uc5d0": [5, 40], "\uc9c4\uc804\uc774": 5, "\uc788\uc5b4\uc654\uc9c0\ub9cc": 5, "detail\uc744": [5, 37, 40], "\uc0b4\ub9ac\ub294": 5, "\uc5b4\ub835\uace0": [5, 6, 16, 19, 33, 44], "\uc815\ud655\ub3c4": [5, 24, 44], "\uce21\uba74\uc5d0\uc11c\ub3c4": 5, "\ubd80\uc815\ud655\ud55c": [5, 15], "\ub354\uc6b1\uc774": 5, "\uc2e4\uc9c8\uc801": 5, "\ub2e4\ub8f0": 5, "\uc77c\uad00\uc131": [5, 16, 17, 36], "\uc548\uc815\uc801\uc774\uace0": 5, "\uc5f0\uc18d\uc801\uc778": [5, 17, 19, 32, 41, 51], "\uc601\uc0c1\uc744": [5, 11, 41, 42, 51], "\ub9cc\ub4e4\uc5b4\ub0b4\ub294": 5, "\ud604\uc7ac\ub294": 5, "\uc77c\ubc18\uc131\uacfc": 5, "\ub9cc\uc871\ud558\ub294": [5, 15, 25, 51], "\ucc3e\uc744": [5, 33, 40, 42], "\uad6c\uc870": [5, 11, 12, 17, 20, 24, 29, 32, 33, 36, 38, 39, 44, 54], "\uc694\uc57d": [5, 19, 29, 33, 39, 42], "consistency\ub97c": [5, 24, 25, 51], "attention\ub97c": 5, "unet\uc73c\ub85c": 5, "\ud558\uc5ec\uae08": 5, "\uc77c\uad00\ub41c": [5, 8, 17, 19, 29, 32, 33, 36, 41], "\uad00\uacc4\uc131\uc744": 5, "\uc885\ud569\uc801\uc73c\ub85c": 5, "controllability\ub97c": [5, 38], "lightweight": [5, 24], "signal\uc744": 5, "\uc808\ucc28\uc5d0": 5, "\ud1b5\ud569\ud568": 5, "stability\ub97c": 5, "\uc5f0\uc18d\uc801\uc774\uace0": 5, "\uace0\ud574\uc0c1\ub3c4": [5, 7, 25, 27, 29, 31, 32, 33, 35, 39, 44, 52], "\ubcf4\uc874\uc744": [5, 16], "\uad00\uacc4\uc131": 5, "5k": [5, 16, 54], "\uc778\ud130\ub137": 5, "\uc138\ud2b8\ub85c": 5, "\uc7a5\uc810": [5, 20, 24, 29, 33], "appearance\uc758": 5, "consistency\uc744": 5, "flickering\uacfc": 5, "\uc2e0\ub8b0\ub3c4\uc758": [5, 31], "image\uc5d0\ub3c4": 5, "benchmark\uc5d0": 5, "\uc6b0\uc218\uc131": 5, "\uc99d\uba85": [5, 6, 25], "t2i": [5, 7, 11, 22, 24, 26, 29, 31, 35, 53], "ldm": [5, 6, 10, 14, 16, 22, 25, 35, 36, 37, 41, 51, 52, 53], "space\uc5d0\uc11c\uc758": [5, 15], "controlnet": [5, 7, 16, 24, 53, 55], "adapt": [5, 14, 19, 20, 22, 28, 38, 53], "mask": [5, 7, 26, 29, 30, 41, 45, 50, 55], "edg": [5, 6, 9, 24, 44, 53, 55], "depth\uc640": 5, "\uc870\uac74\ubd80": [5, 19, 29, 33, 36], "ip": [5, 16], "objectstitch": 5, "edit": [5, 6, 17, 20, 21, 22, 31, 34, 40, 42, 52, 53, 55], "\ubc29\ubc95": [5, 6, 7, 11, 14, 17, 18, 19, 26, 32, 33, 36, 42, 46, 48, 49, 51, 57], "tryondiffus": 5, "virtual": 5, "apparel": 5, "try": 5, "on\uc744": 5, "parallel": [5, 15], "u": [5, 6, 13, 14, 15, 16, 17, 18, 19, 26, 27, 29, 30, 32, 36, 42, 43, 47, 49, 53, 54, 55], "t2v": [5, 29], "inter": [5, 30], "frame": [5, 16, 30], "modeling\uc744": 5, "\uc774\ub904\uc9d0": 5, "\uc0bd\uc785\ud55c": 5, "animatediff": [5, 16], "person": [5, 8, 10, 11, 22, 40, 52], "data\ub85c": [5, 10, 16, 27], "\ud559\uc2b5\uc2dc\ud0a8": [5, 9, 18, 21, 27, 36, 42], "anyone\uc5d0\uc11c\ub294": 5, "modeling\uc5d0": 5, "\ubc1b\uc544": [5, 9, 13, 16, 29, 33, 36, 39, 46], "\ubc29\ubc95\ub860": [5, 17, 18, 25, 27, 28, 29, 51], "i2v": 5, "videocompos": 5, "condit": [5, 6, 7, 10, 11, 13, 18, 20, 23, 24, 29, 30, 31, 33, 34, 36, 39, 41, 42, 43, 46, 47, 50, 51, 55], "latent\uacfc": 5, "\uac04": [5, 13, 19, 25], "mix": [5, 12, 20, 39], "videocraft": 5, "textual": [5, 10, 17, 22, 24, 47], "\ud1b5\ud569\ud558\uc5ec": 5, "attention\uc5d0": [5, 16, 20], "\uc8fc\uc785": [5, 6, 37], "\ubc29\ubc95\ub4e4": 5, "\uc548\uc815\uc801\uc778": [5, 36, 44], "\uc0ac\ub78c": [5, 16, 19, 29, 35, 44, 49], "\uc0dd\uc131\uc5d0\ub294": [5, 16, 20], "pidm": [5, 19], "lfdm": 5, "leo": 5, "\ubaa9\ud45c": [5, 10, 11, 19, 40, 44], "animation\uc744": 5, "guid": [5, 6, 8, 15, 18, 19, 25, 34, 43, 50], "\ud569\uc131": [5, 18, 19, 24, 32, 33, 39], "func": 5, "embed": [5, 7, 9, 10, 11, 13, 14, 17, 18, 19, 20, 21, 24, 28, 29, 30, 31, 36, 37, 41, 45, 46, 47, 55], "timestep": [5, 9, 10, 11, 14, 15, 18, 25, 42, 47, 51, 55], "vit": [5, 7, 14, 16, 17, 20, 21, 24, 26, 33, 35, 42, 47], "14": [5, 6, 17, 20, 24, 33, 36, 42, 51, 54], "downsampl": [5, 6, 13, 20, 27, 29, 35, 46, 50, 53, 54, 55], "upsampl": [5, 13, 20, 21, 29, 31, 36, 41, 46, 49, 53, 55], "re": [5, 7, 24, 31], "tran": 5, "block\ubcc4": 5, "attention\ub85c": 5, "\uad6c\uc131": [5, 16, 17, 18, 22, 29, 31, 32, 33], "overview": [5, 11, 15, 16, 17, 26, 32, 43, 56], "3\uac00\uc9c0": [5, 11, 16, 22, 49, 50, 51, 53, 55], "\uc694\uc18c": [5, 22, 29], "image\ub85c\ubd80\ud130": [5, 21], "character\uc758": 5, "\uc81c\uc5b4\uac00\ub2a5\ud55c": [5, 16], "movements\ub97c": 5, "signal": [5, 11, 12, 54], "\uc5f0\uc18d\uc131\uc744": [5, 20], "relationship": [5, 21, 26, 31], "text\ubcf4\ub2e4": 5, "image\uac00": [5, 15, 21, 44], "level": [5, 11, 15, 18, 26, 31, 34, 35, 38, 41, 55], "\ub0b4\ud3ec\ud568": 5, "encoder\uac00": [5, 36], "encoder\ubcf4\ub2e4": 5, "\uc0ac\uc6a9\ub418\uc5c8\uc9c0\ub9cc": 5, "consistency\uc5d0\ub294": 5, "\uc5ed\ubd80\uc871": 5, "\uc774\uc720": [5, 6, 10, 18, 23, 32], "encoder\ub294": [5, 24, 35, 36, 39, 56], "224x224\uc758": 5, "\uc774\ubbf8\uc9c0\ub4e4\ub85c": 5, "\uad6c\uc131\ub418\uc5b4": [5, 14, 26, 55], "\uc138\ubd80\uc815\ubcf4": 5, "\uc190\uc2e4\uc774": [5, 19, 27, 51, 53], "clip\uc740": [5, 15, 21, 24], "text\uc5d0": [5, 40, 45], "\ub354\uc6b1": [5, 11, 24, 29, 30, 33, 36, 39, 45, 49], "\ubd80\ud569\ud558\uac8c": 5, "\ud6c8\ub828\ub418\uc5b4": [5, 22, 24], "matching\uc5d0": 5, "\uac15\uc870\ub418\uace0": 5, "encoding\uc5d0": [5, 20], "\ubd80\uc871\ud568\uc774": 5, "extract": [5, 13], "network\uc778": [5, 15], "\uace0\uc548": 5, "\uc81c\uc678": 5, "referencenet\uc740": 5, "sd\ub85c": 5, "\ucd08\uae30\ud654\ud558\uace0": 5, "\uc218\ud589\ud558\uace0": 5, "unet\uacfc": [5, 19, 27], "layer\ub85c": [5, 29], "x_1": [5, 13, 36], "x_2": [5, 36], "\uc8fc\uc5b4\uc84c\uc744": [5, 6, 16, 17, 18, 19, 27, 29, 51], "t\ubc88": 5, "\uacf1\ud574": 5, "w\ucd95\uc5d0": 5, "concat": [5, 10, 11, 14, 30, 32, 35, 41], "attention\uc744": [5, 24], "map\uc758": [5, 20, 27, 37], "\ubc18\uc744": 5, "\uacb0\uacfc\ub85c": [5, 8, 26, 29, 39], "\ubf51\uc74c": [5, 45], "sd\ub97c": [5, 24, 35], "\uc0ac\uc6a9\ud568\uc5d0": 5, "\ucd08\uae30\uac12\uc774": 5, "\uc815\uc758": [5, 6, 12, 23, 54], "\ub41c": [5, 6, 8, 11, 22, 25, 28, 30, 31, 32, 35, 39, 42, 43, 44, 47, 51, 52, 53, 55], "\uc0ac\uc6a9\uac00\ub2a5": 5, "referencenet\uc758": 5, "\uacf5\uc720\ub418\uace0": 5, "\ub124\ud2b8\uc6cc\ud06c": [5, 17, 19, 29, 32, 56], "\uac00\uc9d0\uc5d0": 5, "unet\uc740": [5, 24], "space\uc5d0": [5, 15, 51], "\uc0c1\uad00\uad00\uacc4\uac00": [5, 39, 54], "\uc120\ubcc4\uc801\uc73c\ub85c": 5, "\uc81c\uacf5\ud568\uc5d0": 5, "\uc2e0\uc18d\ud55c": 5, "\ucd08\uae30\uac12": 5, "\uc124\uc815": [5, 6, 13, 18, 28, 33, 39, 45], "controlnet\uc740": [5, 24], "\uacf5\uac04\uc801\uc73c\ub85c": [5, 19, 26], "align\ub41c": 5, "\ubd80\uc801\ud569": 5, "\ubc29\ubc95\uc5d0\uc11c\ub294": [5, 18], "\uacf5\uac04\uc801\uc73c\ub85c\ub294": 5, "\uad00\uacc4\ub418\uc5b4\uc788\uc9c0\ub9cc": 5, "align\ub418\uc9c0": 5, "\ud0c0": [5, 43, 44, 54], "generation\uc5d0\uc11c\ub294": [5, 7], "frame\uc5d0": 5, "denoising\uc744": [5, 15, 23, 26, 34], "\ucd94\ucd9c\ud560": 5, "\ubc88\ub9cc": [5, 22], "\ud544\uc694": [5, 12, 17, 18, 25, 29, 32, 33, 46, 51], "\ud6a8\uacfc": [5, 16, 19, 25, 32, 37, 44, 49], "\ub2e8\uacc4\uc5d0\uc11c": [5, 13, 19, 22, 24, 26, 30, 35, 40], "\uacc4\uc0b0\ub7c9\uc774": [5, 48], "\uc99d\uac00\ud558\uc9c0": 5, "\uc54a\ub294\ub2e4": [5, 15, 19, 24, 26, 29, 33, 40, 41, 46], "robust\ud55c": 5, "\uc785\uc99d\ud574\uc654\uc9c0\ub9cc": 5, "tuning\uc774": [5, 39, 51], "\ud544\uc694\ud588\uc5c8\ub2e4": 5, "\uc800\uc790\ub4e4\uc740": [5, 11, 14, 15, 18, 21, 26, 30, 32, 33, 35, 39, 40, 45, 46, 49], "\uacc4\uc0b0\ub7c9": 5, "\ub9c9\uae30\uc704\ud574": 5, "\ud1b5\ud569\ud558\uc9c0": 5, "latent\uc640": 5, "\ud574\uc0c1\ub3c4\ub97c": [5, 32, 35, 38, 41, 50], "align\uc744": [5, 24], "four": 5, "kernel": [5, 20], "stride": [5, 20, 44, 53], "32": [5, 7, 12, 13, 17, 20, 27, 28, 31, 38, 39, 42, 45, 46, 52, 55], "64": [5, 17, 18, 20, 29, 32, 38, 39, 49, 54, 55], "128": [5, 20, 31, 32, 36, 39, 44, 48, 53, 54], "channel": [5, 9, 13, 29, 30, 31, 38, 46, 50, 55], "final": [5, 13, 21], "\uc774\ubbf8": [5, 10, 44, 49], "\uacf3\uc5d0\uc11c": 5, "\ud1b5\ud569\ud588\uc744": 5, "dependency\uac00": 5, "\uc548\uc5d0": [5, 37, 49], "attention\uacfc": [5, 24, 36], "\uc21c\uc11c": 5, "reshap": [5, 14], "connect": [5, 14, 29, 32, 43, 49, 53], "details\uc5d0": 5, "continu": [5, 6, 32, 36, 43, 47, 53, 56], "\ub2e8\uacc4": [5, 8, 13, 17, 18, 19, 27, 33, 48], "\uccab": [5, 6, 8, 9, 15, 19, 31, 32, 35, 38, 44], "\ubc88\uc9f8": [5, 6, 8, 9, 17, 18, 19, 32, 33, 35, 38, 39], "singl": [5, 20, 27, 32, 36, 37, 40, 43, 51, 52, 53, 55], "\ubc1b\ub294": [5, 11, 13, 19, 29, 38], "\ud074\ub9bd\uc5d0\uc11c": 5, "\ub79c\ub364\uc73c\ub85c": [5, 17, 19, 20], "\uc120\ud0dd": [5, 13, 18, 29, 32, 33, 45], "weight\ub294": [5, 9, 19, 25, 28], "guider\ub294": [5, 16], "\uadf8\ub300\ub85c": [5, 11, 15, 21, 25, 26, 29, 42, 46, 55], "\ud6c8\ub828\ud55c": [5, 17], "\uc18d": [5, 19], "layer\ub9cc": [5, 28], "24frame": 5, "10": [5, 6, 10, 13, 14, 16, 18, 22, 23, 27, 30, 31, 32, 34, 35, 36, 39, 41, 42, 43, 44, 46, 48, 51, 52, 54, 55], "second": [5, 11, 43], "long": [5, 13, 16, 47, 52], "\uc778\ud130\ub137\uc5d0\uc11c": [5, 16], "\ub2e4\uc6b4\ub85c\ub4dc": 5, "dwpose": 5, "distil": [5, 17, 20, 36, 46, 52, 53], "whole": 5, "bodi": [5, 16, 55], "idea": [5, 34], "research": [5, 31, 39, 50, 54], "student": [5, 54], "head": [5, 11, 13, 29, 37, 46], "onli": [5, 7, 12, 24, 32, 35, 37, 41, 45], "nvidia": [5, 19, 27, 32, 47, 52, 55], "a100": [5, 19, 20, 27, 28, 47, 51, 52], "768": [5, 20, 51], "\ud574\uc0c1\ub3c4": [5, 14, 41], "center": [5, 16, 24, 32, 46], "crop": [5, 16, 24, 27], "30": [5, 29, 30, 36, 39, 41, 50], "000": [5, 6, 18, 35, 42, 51], "size": [5, 7, 16, 20, 24, 26, 28, 31, 32, 35, 38, 44, 48, 49, 50, 52, 54, 55, 56], "learn": [5, 7, 11, 14, 16, 18, 19, 20, 24, 26, 28, 29, 32, 36, 37, 38, 39, 40, 44, 49, 52, 54, 55], "rate": [5, 7, 10, 12, 14, 16, 19, 24, 27, 29, 32, 37, 43, 44, 52, 54, 55], "\uce90\ub9ad\ud130": [5, 17, 22], "skeleton\uc758": 5, "\uae38\uc774\uc5d0": 5, "\uadfc\uc0ac\ud558\uae30": 5, "\uc720\ub3c4\ub41c": [5, 44], "rescal": [5, 35, 43, 46], "sampler": [5, 24, 39, 54], "\uae34": [5, 16, 38, 41], "aggreg": 5, "\ucc44\ud0dd": [5, 10, 46], "evalu": [5, 6, 10, 11, 12, 17, 20, 21, 41, 43, 50, 52, 53], "benchmark": [5, 26, 44, 49, 50], "2\uac1c": [5, 13, 19, 31], "ubc": [5, 19], "tik": 5, "tok": 5, "\uc804\uc2e0\uc774": 5, "\ub098\uc624\ub294": [5, 25, 32, 33, 37, 39, 44, 45, 46, 47, 51], "\uc808\ubc18": [5, 26], "\uae38\uc774\uc758": 5, "portrait": [5, 11, 37], "cartoon": [5, 17], "humanoid": 5, "characters\uc5d0": 5, "\ubcf4\uc774\ub294": [5, 10, 12, 17, 20, 23, 34, 44, 53, 54], "psnr": [5, 30], "lpip": [5, 6, 15, 20, 30, 43, 53], "fvd": [5, 29, 41], "fr\u00e9chet": 5, "distanc": [5, 18, 20, 29, 32, 33, 43, 45, 52], "quantit": [5, 6, 21, 24, 26, 30, 51, 55], "500": [5, 10, 19, 39], "videos\ub85c": 5, "\uc57d": [5, 17, 19, 21, 22, 40, 44, 45], "bdmm\uc740": 5, "\uc637\uc758": [5, 19], "\uc783\uc5b4\ubc84\ub9ac\ub294": [5, 33, 37], "\uc0c9\uacfc": 5, "\uc12c\uc138\ud55c": 5, "\uad6c\uc870\uc801": [5, 7, 16], "\uc694\uc18c\uc5d0": 5, "error": [5, 22, 42, 54, 56], "\uc138\ubd80": [5, 17, 22, 29, 33, 36, 40], "\ub0b4\uc6a9\uae4c\uc9c0": 5, "\uc77c\uad00\uc131\uc788\uac8c": [5, 11], "\ubcf4\uc874\ub428": 5, "tiktok": 5, "340": 5, "between": [5, 11, 13, 21, 32, 45, 49, 54], "disco\uc5d0\uc11c\ub294": 5, "foreground": [5, 26], "mask\ub97c": [5, 26, 37], "subject": [5, 19, 22, 47, 52], "motion\uc73c\ub85c\ubd80\ud130": 5, "\uc804\uacbd\uacfc": 5, "\ubc30\uacbd\uc758": 5, "\uad6c\ubd84": 5, "sequence\uc5d0\uc11c\ub3c4": 5, "\uc2dc\uac01\uc801\uc73c\ub85c": [5, 8, 45], "robust": [5, 6, 11, 15, 30, 42, 55], "gen": [5, 47], "image\uc5d0": [5, 13, 15, 21, 24, 40], "\uc678\uad00": [5, 16, 19], "\uc2e0\ub8b0\ub3c4\ub9cc": 5, "\uc5bc\uad74\uc774": 5, "\uc77c\uad00\ub418\uac8c": [5, 8, 17], "\uc720\uc9c0\ub418\ub294": [5, 11], "\ubb38\uc81c\uc5d0": [5, 13, 33], "\ubd09\ucc29\ub41c": 5, "\uc0c1\ud669": [5, 18], "\uc18d\uc5d0\uc11c": 5, "\ub300\ube44": [5, 18, 27, 51], "\uc2dc\uac04\ub3d9\uc548": 5, "apper": 5, "design": [5, 20, 38, 48, 56], "\ud6a8\uacfc\uc131": 5, "\uc99d\uba85\uc744": [5, 15], "encoder\ub9cc": 5, "\uacb0\ub860": [5, 39, 42], "referencenet\ub97c": 5, "\uc88b\uc558\ub2e4": [5, 42], "\uac83\uc5d0": [5, 9, 26, 27, 31, 40, 44], "\uce21\uba74\ub9cc": 5, "\ubcf4\uc774\uae30": 5, "\ubcf4\uc774\uc9c0": 5, "\ubd80\ubd84\uc5d0": [5, 8, 20, 26, 38, 39, 43, 56], "ill": 5, "problem\uc73c\ub85c": [5, 25], "\ubd88\uc548\uc815": [5, 36], "\ud65c\uc6a9\uc5d0": 5, "non": [5, 6, 11, 15, 20, 23, 31, 32, 42, 46, 52, 55], "oper": [5, 29, 43], "effici": [5, 9, 11, 12, 28, 32, 42, 49, 52], "translat": [6, 27, 32], "brownian": [6, 43], "bridg": 6, "cvpr": [6, 10, 18, 20, 23, 27, 31, 38, 47, 50, 52, 57], "2205": [6, 49], "07680": 6, "xuekt98": 6, "seonhoon": [6, 11, 42, 57], "relat": [6, 11, 30, 42, 57], "youtub": [6, 11, 28, 45], "nov": [6, 8, 15, 29, 30, 41, 42, 54], "\ub3c4\uc785\ud55c": [6, 22, 27, 33, 35], "\ucd5c\ucd08\uc758": [6, 11, 32], "\ud55c\uacc4\ub97c": [6, 20, 29, 31, 37, 44], "\uadf9\ubcf5\ud568": 6, "\uc774\ud574\ud558\uae30": 6, "\uc774\ud574\ud574\uc57c\ud568": 6, "stochast": [6, 11, 12, 21, 34, 43, 48, 51, 53, 54], "\ud574\ub2f9\ud568": 6, "\uc2dc\uac04\uc758": 6, "\ud750\ub984\uc5d0": 6, "\ubd88\ud655\uc2e4\uc131\uc744": 6, "\ubcc0\ud558\ub294": [6, 15], "\ubcc0\uc218\ub4e4\uc758": 6, "\uc9d1\ud569": [6, 13, 17, 24, 32], "x_t": [6, 11, 13, 15, 21, 24, 25, 27, 33, 36, 42, 43, 46, 51, 55], "\ubcc0\uc218\ub97c": [6, 26], "\ubcc0\uc218\uac00": 6, "\uad00\ucc30\ub41c": [6, 32], "\uc2dc\uac04\uc744": [6, 11, 19, 27, 33], "\ub098\ud0c0\ub0c4": [6, 13, 17, 18, 39], "discret": [6, 31, 36, 43, 45, 54], "\uad6c\ubd84\ud560": 6, "variabl": [6, 15, 18, 48, 54, 56], "wiener": 6, "\uc18c\uac1c": [6, 10, 11, 18], "\uc720\uccb4\uc758": 6, "\ubbf8\uc18c\uc785\uc790\uac00": 6, "\ubd88\uaddc\uce59\ud558\uac8c": 6, "\uc6b4\ub3d9\ud558\ub294": 6, "\ud604\uc0c1": [6, 17, 20, 27, 32, 47], "\uad74\ub69d\uc5d0\uc11c": 6, "\ud37c\uc838\ub098\uac04": 6, "\uc5f0\uae30": 6, "\uc624\ub978\ucabd\uc73c\ub85c": 6, "90\ub3c4": 6, "\ud68c\uc804\uc2dc\ud0a8": 6, "\uc0ac\uc9c4\uc73c\ub85c\ubd80\ud130": 6, "\uc9c1\uad00\uc801\uc73c\ub85c": [6, 36], "\uc774\ud574\ud574\ubcfc": 6, "\uc5f0\uc18d": [6, 19], "\uacfc\uc815\uc73c\ub85c": [6, 41, 42, 47], "\ubaa8\ub378\ub9c1\ud55c": [6, 45], "w_0": [6, 10, 25, 28], "max": [6, 8, 25, 28, 51], "1000": [6, 12, 13, 32, 35, 39], "\uc778": [6, 7, 11, 15, 27, 30, 42, 43, 51, 54], "100\ubc88": 6, "w_t": [6, 17, 43, 47], "\ub098\ud0c0\ub0b8\ub2e4": [6, 15, 40], "\uc774\ud574\ud574\ubcf4\uc790": 6, "\uac00\uc815\ud574\ubcf4\uc790": 6, "\uc774\ub77c\uace0": [6, 9, 11, 15, 40, 43], "\ud558\uc790": [6, 26], "\ud558\ub2e4\uace0": 6, "\uc815\uc218": [6, 35], "\ubd80\uc5ec\ub418\uc5b4\uc57c": 6, "\uac04\uaca9\uacfc": 6, "\ubcc0\ud654\ub7c9\uc774": [6, 10, 54], "\ube44\ub840\ud574\uc57c": 6, "\uc624\ub798": [6, 33, 40], "\uc9c0\ub0ac\uc744\uc218\ub85d": 6, "\ubcc0\ud55c\ub2e4": 6, "notat": [6, 41, 54], "www": [6, 11, 14, 28, 45], "com": [6, 11, 14, 28, 33, 36, 40, 44, 45, 47], "watch": [6, 11, 28, 45], "ld0rxwajpkm": 6, "ab_channel": [6, 11], "finrgb": 6, "delta": [6, 10, 15, 22, 25, 28, 43, 52, 53], "\uac04\uaca9": 6, "\uc0b4\ud3b4\ubcf4\uace0\uc790": 6, "\uac04\uaca9\uc758": [6, 32], "epsilon_t": [6, 15, 21], "\uc2dc\uc810\uc5d0\uc11c": [6, 17], "\uac04\uaca9\uae4c\uc9c0": 6, "\uc99d\uac00\ud55c": [6, 26, 39], "\uac12": [6, 14, 18, 20, 24, 32, 36, 43, 45], "w_": [6, 17, 35, 47], "\uc774\ud574": [6, 11, 26, 29], "\ub77c\uace0": [6, 10, 11, 15, 22, 25, 28, 30, 31, 40, 42, 49, 51, 56], "\uc815\uc758\ud574": 6, "\uadfc\uac70\ub97c": 6, "\ucc3e\uc544\ubcf4\uba74": 6, "\ubcc0\uc218": 6, "\ub3c4\uc785\ud568\uc73c\ub85c\uc368": 6, "\uac04\uaca9\ub3c4": 6, "\uace0\ub824": [6, 46], "\uadf8\ub807\ub2e4\uba74": [6, 11], "\uc65c": [6, 9, 11, 14, 15, 23, 30, 42], "\ud558\ud544": 6, "\uacf1\ud588\uc744\uae4c": 6, "\uac00\uae4c\uc6cc\uc9c8": 6, "\ucc9c\ucc9c\ud788": 6, "\uc218\ub834\ud568": 6, "\ub9cc\uc57d": [6, 39], "\ud558\ub2e4\uba74": 6, "\ub77c\uba74": 6, "\uc791\uc544\uc9d0": 6, "\ucee4\uc9c8": 6, "\ucee4\uc9d0": 6, "\uc8fc\uc758\ud560": 6, "\uc0ac\ud56d": [6, 18], "\uc774\ubbc0\ub85c": [6, 11, 15, 32, 43, 46, 54], "w_1": 6, "\uc11c\ub85c": [6, 8, 11, 15, 17, 26, 30, 38, 42, 44], "\ub3c5\ub9bd\uc778": 6, "\ub9de\uc9c0\ub9cc": 6, "\ub3c5\ub9bd\uc774\ub77c\ub294": 6, "\ub9d0\uc740": [6, 44], "\uc544\ub2d8": 6, "epsilon_0": 6, "var": 6, "\uacf5\ubd84\uc0b0\uc740": 6, "\ud30c\ub780\uc0c9": [6, 39], "\uc810\ub4e4\uc740": 6, "1\ubc88": [6, 44], "\uacb0\uacfc\uc784": [6, 27], "\uae4c\uc9c0": [6, 26, 28, 30, 32, 41, 42, 50, 51, 54], "\uc218\ud589\ud558\uba74": 6, "\ub9cc\ud07c": [6, 15, 35, 42, 45], "t_2": [6, 25, 43, 51], "t_1": [6, 25, 43, 51], "5\ubd84": [6, 47], "10\ubd84\uc73c\ub85c": 6, "\uc9c4\ud589\ud558\uba74": 6, "w_5": 6, "\uc544\ub2d0": 6, "\uc788\uc73c\ub098": [6, 24, 27, 29], "\ubcc0\ud654\ub7c9": 6, "t_": [6, 15, 17, 20, 25, 32, 34, 43, 51], "t_5": 6, "\ub530\ub978\ub2e4": [6, 33], "standard": [6, 12, 14, 20, 38, 42, 43, 44, 53, 54], "\uc2dc\uc810\uacfc": 6, "\uc2dc\uc810\uc758": [6, 12, 19, 46], "\uc77c": [6, 11, 18, 48, 51, 54], "\uc810\uc744": [6, 15, 19, 26, 33, 34, 37], "\uc120\ud615\uc73c\ub85c": 6, "\uc5f0\uacb0\ud558\ub294": 6, "\uc774\ud574\ub97c": [6, 15, 24], "probabl": [6, 18, 25, 34, 43], "start": [6, 18], "state": [6, 24, 33, 54, 55], "end": [6, 7, 13, 19, 36, 38, 53], "\ub418\uc5b4": [6, 11, 14, 17, 22, 26, 27, 31, 38, 43, 48, 55], "\uc815\uc758\ub420": 6, "\uc2dc\uc791\uac12\uacfc": 6, "123": 6, "\ubd84\uc0b0\uc740": 6, "\uc2dc\uc791\ud574\uc11c": [6, 13, 38], "\uc99d\uac00\ud558\ub2e4\uac00": 6, "\ucd5c\ub300\uac00": 6, "\ub418\uc5c8\ub2e4\uac00": 6, "\uc774\ud6c4\ub85c\ub294": 6, "\uac10\uc18c\ud558\uc5ec": [6, 26], "\ub9c8\uc9c0\ub9c9\uc5d4": 6, "\uc218\ub834\ud558\uac8c\ub41c\ub2e4": 6, "w_1000": 6, "100\uac1c\uc758": [6, 19], "\uc0d8\ud50c\ub9c1\ud55c": [6, 17, 43], "abstrcat": 6, "\uae30\uc874\uc758": [6, 9, 11, 15, 19, 25, 29, 30, 32, 34, 39, 44, 51, 53, 55], "\ub4e4\uc740": [6, 11, 30, 49], "\ubcc0\ud658\uc744": [6, 17, 27, 36], "gener": [6, 9, 11, 16, 18, 20, 21, 22, 23, 24, 25, 26, 29, 30, 31, 35, 36, 37, 38, 40, 42, 44, 45, 46, 47, 48, 49, 53, 54, 55, 57], "\ub2e4\ub8f8": [6, 32], "\uc774\ub85c": [6, 22, 49], "\uc0c1\uc774\ud55c": 6, "\ubcc0\ud658": [6, 14, 17, 32, 45], "\uc5d0\ub294": [6, 11, 13, 30, 44, 49, 51], "\uc5b4\ub824\uc6c0": [6, 10, 14, 16, 17, 18, 20, 33, 48], "\uae30\ubc18\ud55c": [6, 7, 12, 18, 32], "\ubaa8\ub378\ub9c1\ud558\ubbc0\ub85c": 6, "bidirect": 6, "\uc784": [6, 42], "\ubcc0\ud658\uc5d0": 6, "\uc811\ubaa9\ud55c": 6, "\ub17c\ubb38\uc784": 6, "\ud6cc\ub96d\ud55c": [6, 42, 45, 49], "\uc2e4\ud5d8\uc801\uc73c\ub85c": [6, 15, 36, 38, 43, 49], "\uc99d\uba85\ud568": [6, 14, 49], "introduct": 6, "i2i": 6, "\ubcc0\ud658\uc5d0\uc11c": 6, "pix2pix": [6, 44, 53], "fideltii": 6, "\ub192\uc558\uc73c\ub098": 6, "output": [6, 7, 9, 11, 13, 14, 16, 17, 18, 20, 28, 32, 36, 41, 43, 44, 45, 47, 50, 53, 54], "\uc0dd\uc131\ud615": [6, 11, 48], "\uc548\ub098\uc624\uace0": 6, "applic": 6, "\uc2dc\ud0b4\uc73c\ub85c\uc368": [6, 30], "desir": [6, 29], "\ucd94\ub860\ud574\ub0b8\ub2e4\ub294": 6, "\uba85\ub8cc\ud55c": 6, "\uc774\ub860\uc801": 6, "\uadfc\uac70\uac00": 6, "\uc548\ub418\ubbc0\ub85c": 6, "domain": [6, 15, 44, 53, 54], "\uba87\uba87": [6, 15, 21, 23, 33], "\uc5d0\uc11c\ub9cc": [6, 32], "\ud65c\uc6a9\ub420": [6, 11], "inpaint": [6, 24, 30, 31, 43, 50, 55], "super": [6, 12, 13, 20, 29, 38, 39, 41, 43, 44, 48, 50, 53, 55, 56], "\uac1c\uc120": [6, 20, 23, 32, 39, 46, 55], "\ud558\uae34": 6, "\ud588\uc73c\ub098": 6, "mechan": [6, 10, 29], "multi": [6, 7, 10, 11, 16, 19, 20, 24, 26, 40, 49, 53, 55], "modal": [6, 7, 11, 26, 40, 49], "\uc8fc\uc5b4\uc9c0\ubbc0\ub85c": 6, "\uc81c\uc2dc\ud558\uae30\uac00": 6, "\ud798\ub4e6": 6, "\uc544\ud0a4\ud14d\uccd0": [6, 11, 42, 45, 46, 49], "\uac00\uc18d\uc744": 6, "\uc218\ud589\ud568": [6, 29], "work": [6, 11, 42, 52, 57], "duffus": 6, "simplifi": 6, "\uc7a0\uae50": 6, "\uac19\uc74c": [6, 30, 40], "\uc0bd\uc785\ub41c": 6, "\ub4dc\ub7ec\ub098": 6, "\uc788\uc9c0": [6, 22, 23, 33, 41], "\uc54a\uc73c\ubbc0\ub85c": [6, 33], "\ub3c4\ub2ec\ud560": [6, 33], "\ubcf4\uc7a5\uc774": 6, "\ub3d9\uc548\uc758": 6, "\ubd84\ud3ec\uac00": [6, 32, 34, 44, 56], "\uc2dd": [6, 18, 19, 26, 46], "\ubcf4\uc558\ub358": 6, "\ubd84\uc0b0\uc744": [6, 13, 39, 49], "\uad6c\ud574\ubcf4\uc790": 6, "\uc2dd\uacfc": [6, 26, 36, 43], "\uc758\ubbf8\uc784\uc744": 6, "method": [6, 11, 12, 26, 32, 38, 42, 43, 45, 46, 54], "\ud5a5\ud574": 6, "vqgan": 6, "\uc601\uc0c1\uc758": [6, 51], "total": [6, 55], "\u03b4_t": 6, "\ubd84\uc0b0": [6, 13, 39, 46, 56], "\ub098\ud0c0\ub09c": [6, 17, 33], "\uc0ac\uc6a9\ud558\uac8c": 6, "\ucd5c\ub300": [6, 8, 28, 32, 45], "\ubd84\uc0b0\uac12": 6, "\uc5d0\uc11c\uc758": [6, 17, 20, 41, 42, 43, 52, 53, 54, 55], "\ubd84\uc0b0\uac12\uc778": 6, "\u03b4_": 6, "\ucee4\uc9c0\uba74": 6, "\ubd84\uc0b0\uac12\ub3c4": 6, "\ucee4\uc9c0\ub294\ub370": 6, "\ub2e4\ub8e8\uae30\uc5d0": 6, "\ud07c": [6, 10], "\uc774\uba74\uc11c": 6, "\ub3c5\ub9bd\uc77c": 6, "schedul": [6, 13, 17, 23, 31, 42, 43, 44, 46, 47, 54, 55], "\ud574\ubcfc": [6, 27, 47], "\uc815\uc218\uc758": 6, "\ucd5c\ub313\uac12\uc778": 6, "\uc774\ub77c\uba74": [6, 11], "delta_t": [6, 36], "\uadf8\ub9bc\uacfc": [6, 9, 24, 25, 31, 36, 38, 45, 48, 49, 56], "\uac19\uac8c": 6, "\uc2dc\uac01\ud654\ud55c": 6, "m_t": 6, "overt": 6, "\uc2dc\uc791\ud558\ub294": 6, "\uc5d0\uc11c\ub294": [6, 8, 11, 14, 15, 21, 23, 24, 26, 31, 41, 42, 47, 48, 49, 52, 53], "m_0": 6, "\ud3c9\uade0\uc740": 6, "\ub05d\ub098\ub294": 6, "\ubd84\uc0b0\uc774": [6, 27], "\uc911\uac04": [6, 8, 19, 23, 33, 40, 46], "\uc9c0\uc810\uae4c\uc9c0\ub294": [6, 26], "\uc99d\uac00": [6, 14, 17, 20, 27, 46], "\ud558\ub2e4\uac00": 6, "\uc9c0\uc810\ubd80\ud130": 6, "\uac10\uc18c": [6, 17, 18, 32], "\ubd84\uc0b0\uac12\uc5d0": 6, "\uc758\ud574": [6, 8, 17, 19, 31, 32, 33, 36, 38, 43, 44, 47], "\uacb0\uc815": 6, "\uc2a4\ucf00\uc77c\ub9c1\ud558\ub294": 6, "\ub450\uc5b4": [6, 24, 44], "\uc870\uc808": 6, "\uc870\uc808\uc744": 6, "\uacc4\uc218": 6, "\ud3ec\ud568\ub41c": [6, 26, 32, 33], "\ub514\ud3f4\ud2b8": 6, "margin": [6, 18, 48, 56], "\ub9cc": [6, 10, 11, 13, 15, 24, 28, 30, 31, 41, 42, 44, 47, 51, 53, 54], "\uc81c\uacf5": [6, 11, 17, 30], "\uc11c\ub294": 6, "transit": [6, 18], "q_": [6, 15, 45, 51, 56], "bb": 6, "x_": [6, 10, 12, 13, 15, 17, 18, 20, 21, 25, 36, 37, 43, 46, 47, 51, 54], "\uc54c\uc544\uc57c\ud568": 6, "\ub54c\uc758": [6, 11, 14, 15, 19, 35, 39], "\uc4f8": [6, 11, 15, 24, 48], "m_ty": 6, "m_": 6, "\uc4f0\ub294": 6, "\uc633\uc74c": 6, "\ud558\uac8c": [6, 7, 11, 20, 26, 30, 35, 41, 42, 46, 48, 50, 52, 54, 55, 56], "\ub300\uccb4": [6, 11, 14, 20, 28, 32], "\uc720\ub3c4\ub428": 6, "delta_": [6, 13], "\ub300\uc785": 6, "\uad6c\ud558\uba74": [6, 26], "\uc778\ub370": [6, 9, 30], "\uc544": 6, "\ub3c4\uba54\uc778\uc73c\ub85c\ubd80\ud130": 6, "\ub3c4\uba54\uc778\uc73c\ub85c\uc758": 6, "fix": [6, 12, 13, 22, 24], "\uc815\uc758\ud558\ub294\uad6c\ub098": 6, "\ub85c\ubd80\ud130": [6, 7, 8, 9, 11, 17, 21, 29, 30, 32, 43, 44, 47, 48, 52, 53, 54, 55, 56], "\uc2dc\uc791": [6, 9, 29], "\uc2a4\ud15d\ub9c8\ub2e4": [6, 35], "\uc870\uae08\uc529": [6, 11, 30, 39], "\uc81c\uac70\ud574\ub098\uac10": 6, "\ub460\uc73c\ub85c\uc368": 6, "\uc790\uccb4\uc5d0\uc11c": 6, "mu_": [6, 13, 15, 21], "\uc608\uce21\ub41c": [6, 14, 18, 19, 22, 31, 32, 33], "\ub178\uc774\uc988": [6, 17, 19, 22, 29, 33, 35, 36, 47], "\ud3c9\uade0\uac12\uc774\uba70": 6, "tild": [6, 13, 15, 19, 20, 22, 23, 25, 41, 51, 54, 56], "\ub178\uc774\uc988\uc758": [6, 41], "\uc2e0\uacbd\ub9dd": [6, 9, 22, 33], "\uc608\uc804": 6, "\uac19\uc558\uc74c": 6, "\uc774\ub85c\ubd80\ud130": [6, 52], "elbo": [6, 13, 45], "term": [6, 7, 23, 48, 56], "\uace7": [6, 11], "\ub418\ubbc0\ub85c": [6, 9, 14, 24, 36], "\ubca0\uc774\uc988": 6, "\uc774\ub860\uacfc": 6, "markov": [6, 12, 13, 24, 48], "chain": [6, 11, 13, 46, 47, 48], "properti": [6, 31, 43, 47], "\ub3c4\ucd9c": 6, "markovian": [6, 13, 15, 31, 46, 54], "\uc131\ub9bd\ub428\uc744": 6, "\uc815\ub9ac\ub428": 6, "\ud1b5\ud569\ud558\uace0": 6, "reparameter": [6, 45, 51], "mu_t": 6, "\ubcc0\ud615\ud560": 6, "\ubcc0\ud615": [6, 29], "\uc608\uce21\ud558\ub294": [6, 7, 13, 15, 21, 22, 31, 36, 41, 51, 54], "\uc608\uce21\ud558\ub3c4\ub85d": [6, 13, 19, 23, 31, 32, 54], "\ud559\uc2b5\ub428": 6, "\ub0b4\uc6a9\uc744": [6, 8, 11, 16, 22, 29], "\uc2dd\uc5d0": [6, 43, 51], "\uba85\uc2dc\ud558\uae30": 6, "\uba85\uc2dc\ub41c": 6, "\uc368\ubcfc": 6, "\uc368\ubcf4\ub294": 6, "\uac83\uc784": 6, "\uadf8\ub7f0\ub370": [6, 9, 38], "\ucc38\uace0\ud574\ubcf4\uba74": 6, "\uc6b0\ub9ac\ub294": [6, 11, 32, 56], "\uadfc\uc0ac\ud558\ub3c4\ub85d": 6, "\ud559\uc2b5\uc2dc\ucf1c\uc57c\ud568": 6, "mu": [6, 13, 18, 25, 38, 43, 46, 51, 54, 56], "_t": [6, 15, 19, 24, 26, 43, 51, 55], "\uc815\ub9ac\ub41c": 6, "\ud559\uc2b5\ub418\uc5b4\uc57c\ud558\ub294": 6, "kl": [6, 13, 18, 27, 36, 48, 56], "arg": [6, 42, 47, 56], "min_": [6, 56], "c_": [6, 8, 10, 17, 19, 20, 25, 35, 37, 43, 47, 51], "\ub2e8\uc21c\ud654\ub420": 6, "\uc815\ub9ac": [6, 33, 44], "\ub9c8\uce58": [6, 11, 26], "\uadf8\ub7ec\ud588\ub4ef\uc774": 6, "\ube60\uc9c4": 6, "acceler": [6, 12, 46, 47], "\uac00\uc18d\uc2dc\ud0ac": 6, "\uae38\uc774\ub97c": [6, 29], "\ub450\uc5c8\uc744": 6, "varibal": 6, "subset": [6, 19, 52, 53], "\uc815\uc758\ub428": 6, "\uac12\uc758": [6, 14, 19, 23, 51], "\ub450\uc5c8\uc74c": 6, "setup": [6, 17, 34, 52], "\ud558\uc774\ud37c\ub9c8\ub77c\ubbf8\ud130": 6, "\ud504\ub808\uc784\uc6cc\ud06c\ub294": 6, "\uc774\ub8e8\uc5b4\uc9d0": 6, "\uc0ac\uc6a9\ub41c": [6, 7, 8, 17, 19, 33, 44, 54], "stage": [6, 11, 16, 26, 30, 36, 52, 53, 55], "fid": [6, 7, 11, 13, 14, 21, 23, 27, 31, 33, 35, 38, 43, 45, 46, 49, 51, 53, 54, 55], "\uc0dd\uc131\ubb3c\uc758": [6, 8], "\ud3c9\uac00\ud558\uae30": [6, 11, 26, 29, 33, 36, 39], "5\uac1c\uc758": [6, 8, 19, 20, 40], "\uc0d8\ud50c\uc744": [6, 7, 17, 21, 22, 29, 32, 33, 40, 44, 56], "\ub9c8\ub2e4\uc758": 6, "\ud45c\uc900\ud3b8\ucc28\uc758": 6, "\ub300\ud574\uc11c": [6, 11, 18, 19, 21, 22, 26, 27, 29, 32, 38, 39, 40, 42, 43, 44, 47, 48, 52, 54], "\ud3c9\uade0": [6, 7, 8, 13, 16, 29, 36, 47, 56], "\ub0c4": [6, 31, 45], "\uc2e4\ud5d8\ud568": 6, "\ub2a5\ub825": [6, 11, 24, 30], "celebamask": 6, "hq": [6, 15, 22, 27], "layout": [6, 26, 27, 53], "\uc8fc\uace0": [6, 16, 21], "photorealist": [6, 11, 21, 29, 39, 49], "\uc0dd\uc131\ud574\ub0b4\ub294": [6, 10, 11, 23, 34], "\ud3c9\uac00": [6, 7, 10, 17, 18, 27, 29, 31, 32, 33, 34, 36, 41, 47], "sketch": [6, 24, 55], "edges2sho": 6, "edges2handbag": 6, "realist": [6, 18, 26, 32, 34, 36, 50, 53], "faces2com": 6, "\uc2e4\ud5d8\uc740": [6, 11, 27], "\ud3c9\uac00\ud588\ub2e4\uba74": 6, "qualit": [6, 10, 22, 24, 45, 51, 52, 55], "comparison": [6, 11, 13, 21, 28, 34, 37, 46, 47, 49, 52, 54], "\ucd94\ub860": [6, 11, 19, 42, 43, 45, 51], "\uc9c0\ub3c4": [6, 44], "\ud559\uc2b5\ud558\ubbc0\ub85c": [6, 24], "cyclegan": [6, 53], "\uc2a4\ucf00\uc77c\uc758": 6, "\ub5a8\uc5b4\uc9d0": 6, "drit": 6, "\ubaa8\ub378\ub4e4": [6, 36, 49], "\ub0c8\uc73c\ub098": 6, "\ubcc0\ud658\ub41c": 6, "oversmooth": 6, "\uc788\uc5c8\uace0": [6, 44], "ground": [6, 13, 16, 19, 31, 32, 36, 43, 44, 48], "truth": [6, 13, 19, 26, 31, 32, 43, 44, 48], "\uacfc\ub294": 6, "\uac70\ub9ac\uac00": [6, 44, 51], "\uba40\uc5c8\uc74c": 6, "cde": 6, "\ubaa8\ub378\ub4e4\ubcf4\ub2e4\ub294": 6, "\uc131\ub2a5": [6, 10, 11, 13, 17, 19, 20, 25, 26, 27, 28, 32, 33, 39, 42, 43, 44, 45, 46, 47, 48, 51, 54, 55], "\uc601\ud5a5": [6, 46], "\ubc1b\uc74c": [6, 49], "\uc904": [6, 15, 21, 35, 36, 44, 46, 49], "rregular": 6, "occlus": [6, 19], "\ub098\ud0c0\ub098\ub294\ub370": 6, "\uc9c1\uc811\uc801\uc778": [6, 48], "\ud558\ubbc0\ub85c": [6, 13, 15, 36, 56], "\ubb38\uc81c\ub85c\ubd80\ud130": 6, "\uc790\uc720\ub85c\uc6c0": 6, "\ud2b9\uc131\uc73c\ub85c": 6, "\uc0dd\uc131\ud574\ub0c4": 6, "\uc2e4\ud5d8\uc5d0\uc11c": [6, 21, 36, 44, 56], "\uae30\ub85d\ud588\uc73c\uba70": 6, "\uae30\ub85d\ud568": [6, 45], "\uc131\ub2a5\uc740": [6, 19, 23, 39, 42], "\ub6f0\uc5b4\ub0a8": 6, "\uc810\uc218\uac00": [6, 7, 14, 45, 49], "\ub6f0\uc5b4\ub0ac\uc74c": [6, 14], "\uc2e4\ud5d8\ud588\uc74c": 6, "\uc5d0\uc11c\ub3c4": [6, 44, 47, 52], "campar": 6, "\uae30": 6, "\ub85d\ud568": 6, "\uc0c9\uc0c1\ud654": 6, "\ub4f1\uc758": [6, 13, 19, 22, 24, 32, 40, 43, 44, 51, 53, 54], "studi": [6, 8, 10, 17, 22, 37, 52, 54], "factor": [6, 14, 27], "\ud588\uc74c": [6, 30, 46], "\ub450\uace0": [6, 20, 23, 25], "\uc791\uc744": [6, 55], "\uc870\uae08\ub9cc": 6, "\ub298\ub824\ub3c4": 6, "\ud06c\uac8c": [6, 8, 19, 22, 24, 25, 26, 27, 34, 36, 39, 40, 42, 44, 48, 50, 51, 52, 54, 55, 56], "\uc774\uc0c1\uc758": [6, 19, 39, 43], "\ud0a4\uc6cc\ub3c4": 6, "\ubcc0\ud654\uac00": [6, 15, 17, 41, 44, 49], "\ubbf8\ubbf8\ud568": 6, "maximum": [6, 7, 27], "varianc": [6, 12, 13, 15, 21, 32, 39, 43, 54, 56], "\uacc4\uc218\uc5d0": 6, "\ub418\uc5c8\uc74c": 6, "\uac83\ucc98\ub7fc": [6, 30, 36, 44], "conclus": 6, "futur": 6, "\ubc29\ubc95\uacfc": [6, 19, 21, 33, 35, 39, 48], "\ub2ec\ub9ac": [6, 17, 18, 19, 21, 33, 34, 39, 48, 56], "\uc5d0\ub3c4": [6, 43], "\uc801\uc6a9\ud574\ubcfc": 6, "\uc608\uc815": 6, "\uc790\ub8cc": [6, 44], "sine": 6, "qua": 6, "none": [6, 9, 13, 29, 38, 42, 53, 55], "tistori": [6, 40], "158": 6, "autoregress": [7, 14, 24, 27, 31, 48], "scontent": 7, "gmp1": 7, "xx": 7, "fbcdn": 7, "t39": 7, "2365": 7, "358725877_789390529544546_1176484804732743296_n": 7, "_nc_cat": 7, "108": 7, "ccb": 7, "_nc_sid": 7, "3c67a6": 7, "_nc_ohc": 7, "plfu_ur_vyaax_nagu8": 7, "_nc_ht": 7, "oh": 7, "00_afdrhahxv1pcf0lqicjiynmorpvcgeq0emv5_ve2_tncvg": 7, "oe": 7, "652ff632": 7, "jun": [7, 31, 35, 36, 41], "hyoung": [7, 31, 35, 41], "lee": [7, 14, 17, 31, 32, 35, 41, 44, 48, 56, 57], "oct": [7, 21, 22, 25, 34, 39, 55], "\ubcf5\uc7a1\ud558\uac8c": 7, "\uad6c\uc131\ub41c": [7, 17, 19, 27, 29, 32, 33, 53, 55, 56], "\uac1d\uccb4": [7, 11, 17, 26, 27, 32, 33, 50, 52], "\uc190": 7, "\ud14d\uc2a4\ud2b8\uc640": [7, 11, 19, 21, 22, 29, 36, 40], "\ub458": [7, 15, 21], "\ub2e4": [7, 15, 19, 26, 30], "\uac00\uc9c4": [7, 17, 21, 23, 27, 32, 35, 36, 37, 39, 40, 43, 44, 45, 47, 53, 55, 56], "\uac80\uc0c9": 7, "\uc99d\uac15": [7, 17], "\ud1a0\ud070": [7, 14, 17, 33], "\ub514\ucf54\ub354": [7, 8, 14, 19], "\uc804\uc6a9": [7, 33, 49], "\uba40\ud2f0": [7, 19], "\ubaa8\ub2ec": 7, "\ubaa8\ub378\uc774\ub2e4": [7, 19, 21, 24, 25, 27, 35, 36, 51], "cm3": 7, "\uc544\ud0a4\ud14d\ucc98\ub97c": [7, 19, 33, 35, 53], "\uc2a4\ud0c0\uc77c": [7, 8, 19, 44], "tun": 7, "\uac00\uc84c\ub2e4": 7, "\ub9de\ub3c4\ub85d": [7, 24, 35], "scale\uc758": [7, 20], "\ub2e8\uacc4\ub97c": [7, 15, 22, 35, 48, 52], "\ud3ec\ud568\ud55c\ub2e4": [7, 33, 35], "\ub370\uc774\ud130\ub294": [7, 29, 32], "\ub77c\uc774\uc13c\uc2a4\uac00": 7, "shutterstock\uc758": 7, "scale\ub85c": 7, "sft": 7, "\uc785\ub825\uacfc": [7, 14], "\ucd9c\ub825": [7, 17, 19, 32, 33, 43, 44], "\ud1a0\ud070\uc744": [7, 8, 14, 19, 31, 33, 40], "\uc11e\uc744": 7, "\ud504\ub86c\ud504\ud2b8\uc5d0": [7, 8, 26, 33], "\ub9de\ub294": [7, 8, 17, 29, 31, 33, 36, 44, 47], "\uc0dd\uc131\ud558\ub294\ub370": [7, 8, 18, 36, 52, 55], "cm3leon\uc740": 7, "output\uc744": [7, 9, 21, 35], "contain": 7, "\uc18c\uac1c\ud55c\ub2e4": [7, 15, 19, 21, 29, 35], "iamg": 7, "segmentation\uae4c\uc9c0": 7, "3\uc5b5": 7, "\uac1c\uc758": [7, 19, 22, 28, 29, 31, 32, 33, 35, 38, 40, 41, 42, 43, 44, 45, 47, 52], "\ud1a0\ud070\uc73c\ub85c": [7, 14, 31, 45], "\ud559\uc2b5\ud588\ub294\ub370": 7, "generation\ub3c4": 7, "\uc218\ud589\ud55c\ub2e4": [7, 24, 42, 51], "5\ubc30\ub85c": 7, "shot": [7, 11, 18, 21, 26, 29, 31, 37, 39, 45, 49, 53], "coco\ub85c": [7, 49], "fid\ub97c": [7, 12, 27, 33], "\uce21\uc815\ud55c": 7, "88": [7, 31], "\uc810\uc73c\ub85c": 7, "google\uc758": 7, "parti": 7, "\uc131\ub2a5\uacfc": [7, 34, 39], "\uc218\uc900\uc744": 7, "\ub2ec\uc131\ud588\ub2e4": [7, 24, 51], "ra": 7, "cm3\ub97c": 7, "\ub3c4\uba54\uc778\uc5d0\uc11c": [7, 8], "\uc5f0\uad6c\ud588\ub2e4": 7, "gafni\uc758": 7, "tokenizer\ub97c": [7, 24, 40], "tokenizer\ub294": 7, "256x256": [7, 14, 16, 21, 24, 29, 31, 35, 36, 39, 43, 50, 51], "8192\uac1c\uc758": 7, "vocabulary\uc5d0\uc11c": 7, "1024\uac1c\uc758": [7, 33, 36], "\uc778\ucf54\ub529\uc744": 7, "\uc9c4\ud589\ud55c\ub2e4": [7, 15, 26, 41, 42], "\ud14d\uc2a4\ud2b8\uc5d0\uc11c\ub294": 7, "zhang\uc758": 7, "\ucee4\uc2a4\ud140": 7, "56320": 7, "vocabulari": 7, "\uc2a4\ud398\uc15c\ud55c": 7, "\ud1a0\ud070\uc778": 7, "break": [7, 53], "figure_8_9": 7, "modality\uac04": 7, "transition\uc744": 7, "\ubaa9\uc801": [7, 17, 19, 27, 36], "sequence\uc5d0": 7, "\uad00\ub828\uc131\uc774": 7, "\ubb38\uc11c": 7, "bank": 7, "\uac80\uc0c9\ud558\ub294": 7, "dens": [7, 19, 28, 33, 36, 52], "strategy\uc744": 7, "\ucffc\ub9ac": [7, 32], "\uc608": [7, 17, 19, 27], "\ud6c4\ubcf4": 7, "\uad00\ub828\uc131": [7, 8], "\uc810\uc218": [7, 49], "return": [7, 9, 10, 12, 13, 28, 29, 38, 42, 48, 53, 55, 56], "retriv": 7, "\uae30\ubc18\uc778": 7, "bi": 7, "\ub530\ub790\ub2e4": 7, "karpukhin": 7, "\ubb38\uc11c\ub97c": 7, "\ud30c\ud2b8\ub85c": [7, 20], "\ubd84\ub9ac\ud558\uace0": 7, "\uc778\ucf54\ub354": [7, 8, 19], "\ubb38\uc11c\uc758": 7, "representation\ub85c\uc368": 7, "\uac1c\ub97c": [7, 32, 41], "\ub0b8\ub2e4": [7, 29, 36, 40], "\uac80\uc0c9\uc740": 7, "\uc810\uc218\uc5d0": [7, 49], "\uc815\ub82c\ub41c": [7, 17, 19, 41], "\ubaa9\ub85d\uc744": 7, "inner": [7, 28, 30], "product": [7, 11, 30], "search\ub85c": 7, "generator\ub97c": [7, 18, 20, 38, 48], "\uc720\uc6a9\ud55c": [7, 19], "\ucd94\ucd9c\ud558\uae30": 7, "\uac00\uc9c0": [7, 8, 9, 11, 17, 18, 19, 22, 26, 33, 35, 38, 40, 41, 43, 44, 47, 52, 53, 54], "\uc694\uc18c\ub97c": [7, 22, 29, 39, 53], "\uace0\ub824\ud588\ub2e4": 7, "relev": [7, 12], "\uac80\uc0c9\ub41c": 7, "\ubb38\uc11c\ub294": 7, "\uad00\ub828\uc788\uc5b4\uc57c": 7, "\ud14d\uc2a4\ud2b8\ub85c": [7, 9, 17], "\ubb38\uc11c\ub85c": 7, "\ub2e4\uc591\uc131\uc740": 7, "\ubb38\uc11c\uc5d0\uc11c": 7, "\uc911\ubcf5\uc131\uc744": 7, "\ud544\uc218\uc801\uc778": 7, "\uc808\ucc28\ub2e4": 7, "\ub2e8\uc21c\ud558\uac8c": 7, "\uae30\ubc18\ud574": [7, 28], "top": [7, 8, 11, 35, 42], "\ubb38\uc11c\ub9cc": 7, "\uac00\uc838\uc628\ub2e4\uba74": 7, "\uc911\ubcf5\uc774": 7, "downstream": [7, 24, 28, 36, 53], "\uc548\uc88b\uc740": 7, "\ub07c\uce60": 7, "\uc774\ud558\ub85c": 7, "dropout": [7, 19], "\uac80\uc0c9\uc5d0": 7, "\ucffc\ub9ac\uc758": 7, "\uc0ad\uc81c": [7, 13], "\ub2e4\uc591\uc131\uacfc": [7, 39], "\uc815\uaddc\ud654\ub97c": [7, 38], "\ud14d\uc2a4\ud2b8\ub97c": [7, 11, 17, 19, 29, 33, 38, 39, 40], "\uac80\uc0c9\ud55c\ub2e4": 7, "\ud559\uc2b5\uc5d0\uc11c\ub294": 7, "\ucea1\uc158": [7, 21, 33], "\uc30d\uc5d0": [7, 8, 17, 19, 33, 43], "\uc0d8\ud50c": [7, 11, 17, 19, 27, 29, 32, 33, 36, 38, 39], "3\uac1c\ub97c": 7, "\ubb34\uc791\uc704\ub85c": [7, 18, 29, 32, 33, 40], "\uc0ac\uc2e4\uc0c1": [7, 26, 54], "\ud559\uc2b5\uc5d0\uc11c": [7, 19], "4\ubc30\uc774\ub2e4": 7, "chameleon": 7, "\ubcc0\ud615\uc2dc\ucf1c": 7, "infil": 7, "\ud45c\ud604\ud55c\ub2e4": 7, "\ucd94\uac00\ub418\uc5c8\uace0": 7, "\ub2e8\uc5b4\uc758": 7, "\uc7ac\ubc30\uce58\uac00": 7, "\uc9c4\ud589\ub410\ub2e4": 7, "\ud559\uc2b5\uc5d0\ub294": 7, "\ub2e4\uc6a9\ub3c4": 7, "\uac00\uc838\uc654\ub2e4": [7, 35, 41], "cm3\uac00": 7, "\ud504\ub86c\ud504\ud2b8\ub85c": [7, 29, 33, 39], "cm3\ub294": 7, "\ud65c\uc6a9\ud55c\ub2e4": [7, 26, 27], "\ub514\ucf54\ub354\ub9cc": 7, "\uc544\ud0a4\ud14d\uccd0\ub97c": [7, 11, 43, 53, 54], "zhang\uc5d0": 7, "bia": [7, 11, 13, 14, 29, 32, 43, 53], "norm\uc758": 7, "\uc81c\uac70\ud588\ub2e4": [7, 33, 35], "length\ub97c": [7, 15, 16], "2048": [7, 41, 54], "4096\uae4c\uc9c0": 7, "\ud655\uc7a5\ud588\ub2e4": [7, 36], "\ud45c\uc900": [7, 19, 32, 33, 44], "\ud3b8\ucc28": 7, "006": 7, "truncat": [7, 20, 54], "3\uc73c\ub85c": [7, 49], "\uc798\ub9b0": [7, 35], "0002\ub85c": 7, "\ucd08\uae30\ud654\ud55c\ub2e4": [7, 19], "metaseq": 7, "\ud559\uc2b5\ub410\ub2e4": 7, "\uc0ac\uc774\uc988": [7, 41], "350m": 7, "760m": 7, "7b": 7, "4t": [7, 41], "trillion": 7, "9t": 7, "\uc8fc\uc694\ud55c": [7, 22, 41], "\ud558\uc774\ud37c": 7, "size\ub85c": 7, "\uba40\ud2f0\ubaa8\ub2ec": [7, 24], "\uc124\uc815\ud588\ub2e4": 7, "perplex": 7, "ppl": [7, 47], "\uc5b8\uc5b4": [7, 17, 42], "\ud558\ub098\uc774\ub2e4": 7, "\ud5f7\uac08\ub9ac\ub294": 7, "\ub0ae\uc744": [7, 9, 13], "\uc218\ub85d": [7, 13, 14], "\uc88b\ub2e4": [7, 11, 28, 46], "\ubaa8\ub378\uc5d0\uc11c": [7, 14, 18, 24, 29, 33, 35, 39, 40, 42, 46, 49, 54], "\uc54c\uace0\ub9ac\uc998\uc5d0": 7, "\uc9c4\ud589\ub418\uc5b4": [7, 31, 43], "\uc654\ub2e4": [7, 19, 23], "e\ub294": [7, 33, 36, 45], "\uc544\uc6c3\ud48b\uc758": 7, "\ud5a5\uc0c1\ub418\ub294": [7, 39], "\uc0d8\ud50c\ub9c1\uacfc": 7, "512\uac1c": [7, 45], "\uc804\ub7b5\uc744": [7, 14, 19, 22, 24, 29], "\ucc44\ud0dd\ud588\ub2e4": [7, 36], "make": [7, 24, 28, 36, 41], "guidance\ub85c": 7, "ranking\uc5d0": 7, "\uc624\uc9c1": [7, 8], "\uc0d8\ud50c\ub9cc": [7, 27], "\ud544\uc694\ud558\uac8c": 7, "\ub428\uc73c\ub85c\uc368": [7, 50], "\ud6c4\ubcf4\uc758": 7, "\uc218\ub97c": [7, 14, 19, 24, 29, 32, 35, 41, 45], "\ud655\ub960\uc801": 7, "\uae30\uc220\ub85c": [7, 40], "\uc0d8\ud50c\ub9c1\uc5d0\uc11c": 7, "softmax\uc758": 7, "temperature\ub97c": 7, "\uc218\uc815\ud574": [7, 11], "\uc608\uce21": [7, 11, 12, 13, 19, 22, 31, 32, 33, 45, 51], "\ubb34\uc791\uc704\uc131\uc744": 7, "\uc81c\uc5b4\ud55c\ub2e4": 7, "nucleu": 7, "\uc0d8\ud50c\ub9c1\uc73c\ub85c\ub3c4": 7, "\ubd88\ub9ac\uace0": 7, "\ubbf8\ub9ac": [7, 19, 40], "\uc815\uc758\ud55c": [7, 23, 43, 52], "\uc784\uacc4\uac12\uc744": [7, 49], "\ucd08\uacfc\ud558\ub294": 7, "\ub204\uc801": 7, "\uc0c1\uc704": [7, 33], "\uc138\ud2b8\uc5d0\uc11c": [7, 29], "\uc0d8\ud50c\ub9c1\uc744": [7, 32, 33, 39, 48], "begin": [7, 13, 19, 55], "operatornam": 7, "logit": [7, 31], "_": [7, 8, 9, 15, 17, 19, 20, 22, 25, 26, 27, 28, 29, 36, 38, 42, 43, 44, 47, 48, 51, 54, 55, 56], "cond": [7, 35], "t_y": 7, "mid": [7, 13, 28, 34], "t_x": 7, "uncond": [7, 33], "mathrm": [7, 13, 15, 17, 19, 32], "cf": [7, 29], "alpha_c": [7, 16], "cfg\ub294": [7, 18, 51], "uncondit": [7, 13, 18, 20, 21, 24, 27, 31, 36, 46, 51, 54], "\uc0d8\ud50c\uc5d0": [7, 29, 33, 35, 39, 43], "text\ub97c": [7, 18, 21, 24, 45], "\ubaa9\ud45c\uc758": 7, "\ub9c8\uc2a4\ud06c": [7, 21, 29, 31], "\ub300\uccb4\ud55c\ub2e4": 7, "\ud559\uc2b5\uc758": [7, 20, 25, 43, 44, 51], "\uc774\uc810": [7, 20], "\ud558\ub098\uc774\uba70": 7, "\uc218\ud589\ud560": [7, 11, 18, 27, 42, 43], "\ucd94\ub860\uc5d0\uc11c\ub294": 7, "stream\uc744": 7, "\ud14d\uc2a4\ud2b8\uc5d0": [7, 11, 19], "\ub2ec\ub77c\uc9c0\ub294": [7, 47], "stream\uacfc": 7, "\ud1a0\ud070\uc5d0": [7, 14, 31], "condition\ub41c": 7, "stream": 7, "cfg\uc5d0\uc11c": 7, "logit\uc758": [7, 20], "\ube84\uc148": 7, "\uc5f0\uc0b0\uc774": [7, 14, 27, 51], "\ud14d\uc2a4\ud2b8\uc5d0\uc11c": [7, 22, 29, 33], "\ubc29\ubc95\uc758": [7, 8, 26, 33, 40], "probability\ub97c": 7, "\ube84\uc148\ud558\ub294": 7, "\uc5f0\uc0b0\uacfc": 7, "\ube44\uc2b7\ud558\ub2e4": [7, 35], "ms": [7, 20, 21, 45, 49], "coco": [7, 18, 21, 31, 36, 45, 49, 55], "30k": 7, "\uce21\uc815\ud588\ub2e4": [7, 42], "\ud6a8\uc728\uc131\uc774": 7, "\ucd94\ub860\uc5d0\uc11c": 7, "1\uac1c": [7, 45], "2\uac1c\ub85c": 7, "\uc608\uc81c\ub85c": 7, "\ub3d9\uc791\ud560": [7, 39], "\uc6b0\uc218\ud55c": [7, 12, 14, 17, 19, 22, 29, 35, 44, 45], "\uae30\ub85d\ud588\ub2e4": [7, 37], "\uace0\ud488\uc9c8": [7, 11, 16, 19, 22, 33, 35], "\uac80\uc0c9\uc758": 7, "\uc911\uc694\uc131\uc744": [7, 14, 22, 44], "figure5": 7, "llm\uc5d0\uc11c": 7, "\ub2e8\uacc4\uc774\ub2e4": [7, 26], "\uba85\ub839\uc5b4": [7, 18], "\uc774\ud574\ud558\ub294": 7, "\ub3c4\uc640\uc8fc\uba70": 7, "task\uc5d0\uc11c\ub3c4": 7, "\ud29c\ub2dd\uc774": 7, "\ub208\uc5d0": [7, 19], "\ub744\uac8c": [7, 19], "\uc99d\ud3ed\uc2dc\ud0a4\ub294": 7, "cm3leon\uc744": 7, "task\ub97c": [7, 28, 34, 40, 45, 51], "\uc11e\uc5b4": 7, "\ubc94\uc704\uc5d0\uc11c": 7, "\uacfc\uc815\uc740": [7, 17, 33, 36, 56], "\ub530\ub974\uba70": 7, "instruction\uacfc": 7, "\ucd9c\ub825\uc744": [7, 19, 32, 33, 36], "figure6": 7, "image\ub97c": [7, 10, 13, 15, 20, 21, 22, 24, 25, 26, 27, 29, 34, 39, 40, 45], "\uc218\uc815\ud558\ub294": [7, 40], "task\uc774\ub2e4": 7, "instructpix2pix": [7, 19, 53], "\ud558\ub298\uc758": 7, "\uc0c9\uc744": [7, 33, 36], "\ud30c\ub780\uc0c9\uc73c\ub85c": 7, "\ubcc0\uacbd\ud574\uc918": 7, "\ud3b8\uc9d1\uc774": 7, "\uc774\uac83\uc740": [7, 11, 22, 39], "cm3leon\uc774": 7, "\uc774\ud574\ud558\uace0": [7, 27], "\uc788\uc5b4\uc11c": [7, 15], "\uc0dd\uc0b0\ud558\ub294": 7, "\uacf5\uac04\uc801": [7, 29, 32], "\uc815\ubcf4": [7, 17, 19, 27, 29, 31, 32, 37, 53], "\ud1b5\ud569\uc2dc\ud0ac": [7, 11], "figure16": 7, "flamingo": 7, "1000\uc5b5": 7, "openflamingo": 7, "400\uc5b5": 7, "30\uc5b5": 7, "\ud1a0\ud070\uc784\uc5d0\ub3c4": 7, "\ubd88\uad6c\ud558\uace0": [7, 11, 17, 19, 21, 26, 39, 41, 44, 54], "\ub3d9\ub4f1\ud55c": 7, "06949": [8, 22], "site": 8, "hyoungseo": [8, 22, 57], "cho": [8, 22, 27, 57], "generation\uc758": [8, 21], "\uc77c\ud658\uc73c\ub85c": 8, "\uc0c8\ub86d\uace0": 8, "\ucc3d\uc758\uc801\uc778": 8, "\uac1c\ub150\uc744": [8, 17, 40, 51], "\ub2e4\ub8f9\ub2c8\ub2e4": 8, "\uae30\uc220\uacfc": [8, 33, 35], "\ubfd0\ub9cc": [8, 24, 35, 41, 44, 53], "\uac1c\uc778\ud654\ub41c": [8, 16, 17, 22, 40], "\uc0dd\uc124\ud560": 8, "\ub418\uc5c8\uc2b5\ub2c8\ub2e4": 8, "\uac15\ub825\ud55c": [8, 21, 24, 25, 27, 30, 33], "\uc124\uba85\ub418\uc9c0": 8, "\ucc3d\uc758\uc801": 8, "\uc788\uc744\uae4c\uc694": 8, "sysnthesi": 8, "\uae30\uc220\uc740": 8, "\uc778\ucf54\ub354\uc5d0\uc11c": [8, 19], "\ucd94\ucd9c\ud55c": [8, 10, 20, 24, 42], "\uc784\ubca0\ub529\uc744": [8, 19, 33, 35], "conditioning\ud569\ub2c8\ub2e4": 8, "\ucc98\ub9ac\ud558\uc5ec": 8, "\ud1b5\ud569\ud558\ub294": [8, 19], "\ubc29\uc2dd\uc785\ub2c8\ub2e4": [8, 43, 54], "\ud65c\uc6a9\ud574\uc11c": [8, 11, 46, 56], "generation\uc5d0\uc11c\uc758": 8, "\uc774\uc810\uc744": [8, 27, 32], "\ubcf4\uc785\ub2c8\ub2e4": [8, 44], "\uc785\ub825\ub41c": [8, 17, 19, 36, 40, 50], "\ud574\ub2f9\ud558\ub294": [8, 9, 19, 21, 24, 31, 32, 50, 52], "\uc784\ubca0\ub529\uc73c\ub85c": [8, 19], "\ub9e4\ud551\ud569\ub2c8\ub2e4": 8, "\ub514\ucf54\ub354\ub294": 8, "\uc784\ubca0\ub529\uc5d0": [8, 19, 33, 35], "condition\uc774": [8, 24], "\ubd80\uc5ec\ub41c": 8, "\uc0dd\uc131\ud558\uae30": [8, 17, 22, 24, 33, 36, 40, 43, 44, 51], "\ud6c8\ub828\ub429\ub2c8\ub2e4": 8, "personalization\uc740": 8, "\ub9e5\ub77d\uc5d0\uc11c": 8, "\uc0ac\uc6a9\uc790\uac00": [8, 17, 24, 25, 34, 35, 40], "\uc8fc\uc81c\ub098": 8, "\uc2a4\ud0c0\uc77c\uc744": [8, 19, 22, 24, 37, 39], "\uc77c\ubc18\uc801\uc73c\ub85c": [8, 17, 18, 19, 24, 32, 33, 36, 39, 40, 43, 51], "\ud559\uc2b5\uc2dc\ud0a4\uae30": [8, 18, 36], "\ucd5c\uc801\ud654\ud558\uac70\ub098": 8, "\ud65c\uc6a9\ud569\ub2c8\ub2e4": [8, 17, 43], "\ucd08\ucca8\uc744": [8, 40], "\ub9de\ucd94\uace0": 8, "\uae30\ubc1c\ud55c": 8, "\uc811\uadfc": [8, 17, 18, 19, 22, 32, 33, 40], "\ubc29\ubc95\uc774": [8, 15, 17, 18, 19, 21, 22, 26, 29, 30, 33, 39, 44, 51], "xu": 8, "set": [8, 13, 16, 17, 18, 20, 28, 42, 44, 47, 49, 53, 54], "evolut": 8, "\ud65c\uc6a9\ud574": [8, 11, 16, 24, 30, 35, 41, 42, 44, 46, 55], "\ud615\ud0dc\uc758": [8, 24, 32, 33, 35, 36, 40, 47, 52, 53], "\uc81c\uc548\ud588\uc2b5\ub2c8\ub2e4": [8, 38], "elgamm": 8, "gan\uc758": [8, 15, 27, 40], "\ud0d0\uad6c\ud558\uba70": 8, "\uc2a4\ud0c0\uc77c\uc5d0\uc11c\uc758": 8, "\ud3b8\ucc28\ub97c": 8, "\uadf9\ub300\ud654\ud558\ub294": 8, "\ud559\uc2b5\ud588\uc2b5\ub2c8\ub2e4": [8, 11], "sbai": 8, "\uc190\uc2e4": [8, 17, 45], "\ub3c4\ub9bd\ud588\uc2b5\ub2c8\ub2e4": 8, "\uce74\ud14c\uace0\ub9ac\uc640": 8, "\uc77c\uce58\ud558\ub3c4\ub85d": [8, 33, 40], "\ucd5c\uc801\ud654\ud558\uba74\uc11c\ub3c4": 8, "\uce74\ud14c\uace0\ub9ac\uc758": 8, "\uac1c\ub150\ub4e4\uacfc": 8, "\ucc3e\ub294": [8, 15, 19, 30, 40], "\uc811\uadfc\ud588\uc2b5\ub2c8\ub2e4": 8, "\uac1c\ub150\ub4e4\uc740": 8, "\ud63c\ud569\ub420": 8, "\uc720\uc5f0\ud55c": 8, "\uac16\uac8c\ub429\ub2c8\ub2e4": [8, 38], "bottom": 8, "model\uc5d0\uc11c\ub294": [8, 24, 40], "\uc624\ud1a0\uc778\ucf54\ub354\uc758": [8, 19], "\ub0b4\uc5d0\uc11c": [8, 22, 32, 35, 40, 41, 50], "\uc9c4\ud589\ub429\ub2c8\ub2e4": [8, 54], "\uba3c\uc800": [8, 9, 17, 22, 26, 29, 32, 33, 38, 39, 42, 45, 48], "\uc778\uace0\ub354": 8, "\ub9e4\ud551\ud558\ub294": [8, 44], "d\ub294": [8, 15], "\uc7ac\uad6c\uc131\ud558\ub3c4\ub85d": 8, "ddpm\uc758": [8, 12, 21, 23, 37, 46], "\uc190\uc2e4\uc744": [8, 17, 19, 22], "\ucd5c\uc18c\ud654\ud558\ub3c4\ub85d": [8, 22, 43], "\ud559\uc2b5\ud569\ub2c8\ub2e4": [8, 44, 48, 52, 54, 55], "\uc7a0\uc7ac": [8, 26, 33, 39], "\ucf54\ub4dc": [8, 29, 38, 42, 53], "zt": 8, "\uc7a1\uc74c": 8, "\uc81c\uac70\ud569\ub2c8\ub2e4": 8, "\ud604\uc7ac": [8, 11, 12, 18, 30, 37, 39], "t\uc640": 8, "\uc870\uac74": [8, 17, 19, 21, 24, 33], "\ubca1\ud130": [8, 14, 19, 27, 32], "c\ub3c4": 8, "\uace0\ub824\ub429\ub2c8\ub2e4": 8, "model\uc740": [8, 13, 15, 20, 21, 24, 26, 27, 28, 31, 36, 40, 51], "\uc778\ucf54\ub529\uc5d0\uc11c": 8, "\ud30c\uc0dd\ub41c": [8, 19, 40], "\ud504\ub86c\ud504\ud2b8": [8, 17, 18, 26, 29], "decompos": [8, 22], "\ud504\ub86c\ud504\ud2b8\ub85c\ubd80\ud130": 8, "\uc608\uce21\ud569\ub2c8\ub2e4": [8, 22, 44, 52], "\ub2e4\uc74c\uc73c\ub85c": [8, 22, 33], "\ubd80\uc5ec\ud558\uc5ec": 8, "decoder\ub85c": 8, "\ubcf4\ub0b4\uc9d1\ub2c8\ub2e4": 8, "\ub3c5\ub9bd\uc801\uc778": [8, 12, 43], "\uc774\ub8e8\uc5b4\uc9d1\ub2c8\ub2e4": 8, "l_": [8, 18, 23, 24, 25, 34, 36, 41, 43, 46, 51, 53, 55], "\ub2e4\uc591\uc131\uc744": [8, 16, 17, 39], "\ud5a5\uc0c1\uc2dc\ud0a4\uba70": 8, "\uc811\uadfc\ud558\uace0": 8, "\uacf5\uac04\uc5d0\uc11c": [8, 14, 17, 32, 40], "conceptlab\uc740": 8, "\ub300\ud45c\ud558\ub294": 8, "v_": [8, 20], "\ucd5c\uc801\ud654\ud569\ub2c8\ub2e4": [8, 52], "\uce74\ud14c\uace0\ub9ac\uc5d0": [8, 29, 33], "\uc720\uc0ac\ud558\uba74\uc11c\ub3c4": [8, 40], "\uba64\ubc84\ub4e4\uacfc": 8, "\uac00\uc9c0\ub3c4\ub85d": [8, 36, 39], "\uc9d1\ud569\uc744": [8, 17, 24, 32], "\uacc4\uc0b0\ud569\ub2c8\ub2e4": [8, 43, 47], "\ud6c8\ub828\ud558\ub294": [8, 19, 26], "\ubc14\ud0d5\uc73c\ub85c": [8, 24, 27, 32, 44, 54], "contraints\ub97c": 8, "\ub354\ud558\uae30": 8, "blip": [8, 11], "vqa": 8, "\uae0d\uc815\uc801": 8, "\uc81c\uc57d": 8, "po": 8, "\uac00\uc9c0\ub97c": 8, "\uc870\uac74\uc740": 8, "\uc815\uc758\ub429\ub2c8\ub2e4": 8, "\ub450\uac00\uc9c0": [8, 36, 46], "\uce21\uc815\ud569\ub2c8\ub2e4": [8, 11, 54], "\uc6b0\uc120": [8, 9, 11, 21, 26, 35, 47, 52, 53, 54], "\ub2e8\uc5b4": [8, 29, 37, 40], "\ubb34\uc791\uc704": [8, 22], "\uc0d8\ud50c\ub9c1\ub41c": [8, 32, 33], "y\uc5d0": 8, "\ud1b5\ud569\ud569\ub2c8\ub2e4": 8, "\ubb38\uc7a5\uc740": 8, "\uc778\ucf54\ub529\ub418\uba70": 8, "\uc774\uac83\uc774": [8, 11, 44], "\uc815\uc758\ud569\ub2c8\ub2e4": [8, 43, 47, 50, 52, 53, 54, 55, 56], "\ud1b5\uacfc\uc2dc\ud0a4\uba74": 8, "\ud504\ub86c\ud504\ud2b8\uc758": [8, 19], "\uc778\uc2a4\ud134\uc2a4\uac00": 8, "\uc870\uac74\uacfc": [8, 30], "\uae0d\uc815": 8, "\ubd80\uc815": 8, "\uad11\ubc94\uc704\ud558\uac8c": 8, "\uc720\uc9c0\ud558\uace0\uc790": [8, 47], "\ud1b5\uacfc\ud558\uc9c0": 8, "\uc54a\uc2b5\ub2c8\ub2e4": [8, 11, 43, 44, 53], "\uc5f0\uad6c\uc5d0\uc11c\uc758": 8, "\ud568\uc218\ub294": [8, 27, 32], "langl": 8, "rangl": 8, "lambda": [8, 13, 17, 24, 26, 43, 44, 47], "v\uc5d0\uc11c": 8, "\uc870\uac74\uc5d0\uc11c": 8, "\uba40\uc5b4\uc9c0\uace0": 8, "\uc81c\uc57d\uc870\uac74\uc5d0": 8, "\uac00\uae4c\uc6cc\uc9c0\ub3c4\ub85d": [8, 15], "\uc815\uaddc\ud654\ub294": 8, "\uc9d1\ud569\uc774": 8, "\ud074": [8, 39], "\uba64\ubc84\ub85c\uc758": 8, "collapsing\uc744": 8, "\ubc29\uc9c0\ud558\ub294": [8, 17, 18, 19], "\uc0ac\uc6a9\ub429\ub2c8\ub2e4": [8, 9, 22], "\uc81c\uc57d\uc5d0": 8, "s_": [8, 18, 19, 20, 37, 43], "max_": 8, "\ubc29\uc2dd\uc740": [8, 10, 22, 24, 25, 28, 32, 33, 36, 40, 51, 53, 54, 55], "\ud568\uc218\uc5d0": 8, "\ud1b5\ud569\ub418\uba70": 8, "\ub0c4\uc73c\ub85c\uc368": 8, "\uc870\uac74\uc5d0": 8, "\ud328\ub110\ud2f0\ub97c": [8, 44], "\ubd80\uc5ec\ud569\ub2c8\ub2e4": 8, "\uacfc\uc815": [8, 10, 13, 23, 29, 31, 32, 33, 36, 40], "\uac1c\ub150\uc5d0": [8, 17, 40], "\ub2e8\uc5b4\ub97c": [8, 40], "\ucd94\ub860\ud558\uace0": [8, 42], "\uac70\uce69\ub2c8\ub2e4": 8, "\uc218\ub3d9\uc73c\ub85c": [8, 30], "\uc801\uc6a9\ud558\ub294": [8, 13, 15, 25, 28, 43, 47, 51, 53, 54, 55], "\ud798\ub4e4\uace0": 8, "\uad11\ubc94\uc704\ud55c": [8, 17], "\uba64\ubc84\ub4e4\uc744": 8, "\ub300\ud45c\ud558\uc9c0": 8, "\ubabb\ud560": [8, 22, 35], "\ud655\uc7a5\ud558\ub294": [8, 29], "scheme\uc744": 8, "\ud6c8\ub828\ub41c": [8, 9, 17, 19, 22, 24, 26, 39, 40], "\uc9c8\uc758\ud558\uc5ec": 8, "\uc774\ubbf8\uc9c0\uc5d0": [8, 10, 12, 13, 17, 19, 20, 23, 24, 29, 31, 32, 33, 34, 37, 39, 40, 42, 44, 45, 52, 53], "\uc874\uc7ac\ud558\ub294": [8, 11, 17, 32, 42, 52, 53], "\uba64\ubc84\uac00": 8, "\ubb34\uc5c7\uc778\uc9c0": [8, 27, 47, 56], "\uc2dd\ubcc4\ud558\ub3c4\ub85d": 8, "\ub098\uc628": [8, 15, 21, 22, 43, 45, 46, 49, 51, 53, 54, 55, 56], "\uc778\uc2a4\ud134\uc2a4\ub97c": 8, "\ud6c8\ub828\uc758": 8, "\ucd94\uac00\ud569\ub2c8\ub2e4": [8, 53], "\ub2e8\uacc4\uc5d0": [8, 43], "\uac78\uccd0": [8, 17, 29, 33, 40], "\ubcf4\uc5ec\uc90d\ub2c8\ub2e4": [8, 9, 44, 47, 55, 56], "\uc870\uac74\uc774": [8, 19, 22], "\uc9c0\uc18d\uc801\uc73c\ub85c": [8, 27, 39], "\uc870\uc815\ub418\uace0": 8, "\ud655\uc7a5\ub418\uc5c8\uc74c\uc744": 8, "\uac1c\ub150": [8, 40, 47, 51], "\uc14b\uc5d0": [8, 19], "\ud63c\ud569": [8, 39], "\uac1c\ub150\uc5d0\uc11c": 8, "im": 8, "\ub9cc\ub4ed\ub2c8\ub2e4": [8, 44, 52], "\ud1b5\uacfc\ud558\uc5ec": [8, 36], "\uc0dd\uc131\ud569\ub2c8\ub2e4": [8, 11, 22, 33, 43, 52, 55, 56], "\uc801\uc6a9\ud569\ub2c8\ub2e4": [8, 11, 44], "\uac1c\ub150\uc774\ub098": 8, "\uc801\uc6a9\ub420": 8, "\uacc4\uce35\u3147\uc801": 8, "\ubc18\ubcf5\uc801\uc73c\ub85c": [8, 19, 24, 32], "\uacb0\uacfc\ubb3c\uc5d0": 8, "\uac1c\ub150\uc758": 8, "\uc81c\uc5b4\ud558\uae30": [8, 16, 17, 22], "\uac00\uc911\uce58": [8, 17, 19, 22, 25, 33, 44, 49, 53], "\ud56d\ubaa9\uc774": [8, 17], "\ucd94\uac00\uc801\uc73c\ub85c": [8, 9, 11, 15, 17, 21, 24, 26, 35, 36, 39, 43, 52, 53, 54], "\uadf8\ub9bc\uc5d0\ub294": 8, "\uac1c\ub150\uc774": 8, "\uc67c\ucabd\uc5d0": [8, 29], "\ud45c\uc2dc\ub418\uc5b4": 8, "\uc0dd\uc131\ud588\ub294\uc9c0\ub97c": 8, "\ud65c\uc6a9\ud588\uc2b5\ub2c8\ub2e4": [8, 22, 54], "conceptlab\uc774": 8, "\uc81c\uc548\ud55c": [8, 15, 19, 27, 38, 39], "\ud504\ub86c\ud504\ud2b8\uc640": [8, 17, 29, 33], "\uc801\uc6a9\ud588\uc2b5\ub2c8\ub2e4": [8, 55], "\uac1c\ub150\ub4e4\uc744": 8, "\ud63c\ud569\ud558\uc5ec": [8, 19, 33, 51], "\ub3c5\ud2b9\ud55c": 8, "\ucc3d\uc870\ubb3c\uc744": 8, "\uc717\uc904\uc5d0\uc11c\ub294": 8, "\uc774\uc5b4\uc9c0\ub294": [8, 40], "\uc904\uc5d0\uc11c\ub294": 8, "\uc5bb\uc5b4\uc9c4": [8, 17], "conceptlab\uc758": 8, "\uc785\uc99d\ud558\uae30": 8, "\uc815\uc131\uc801": [8, 10, 17, 19, 27, 45], "\uc815\ub7c9\uc801": [8, 10, 17, 19, 22, 31, 45], "\ud3c9\uac00\ub97c": [8, 18, 19, 22, 26, 29, 33, 35, 44, 45, 48, 49, 51, 53], "\uadf8\ub9bc\ub4e4\uc5d0\uc11c": 8, "\uc788\ub4ef\uc774": [8, 15, 39], "negative\ub97c": 8, "\uc801\uc6a9\ud558\uc600\uace0": 8, "\uc2dc\ub4dc\ub97c": 8, "\ub2ec\ub9ac\ud558\uba70": 8, "\ub2a5\ub825\uc774": [8, 11, 17, 26, 42], "\uc7a5\uba74\uc5d0": [8, 32, 40], "\ubc30\uce58\ud560": 8, "\uc0dd\uc131\ubb3c\ub4e4\uc740": 8, "\ubc30\uacbd": [8, 17, 18, 19, 20, 30, 32, 33, 35, 40], "\ucc3d\uc870\ub4f1": 8, "\ub2e4\uc591\ud558\uac8c": [8, 25, 27, 50, 54], "\uac00\ub2a5\ud569\ub2c8\ub2e4": [8, 11, 22, 43, 47, 52, 54, 55], "conceptlab\uc744": 8, "mixing\uc758": 8, "mixing\uc740": 8, "\uac1c\ub150\ub4e4\uc758": 8, "\ud569\uccd0": 8, "\ud558\uc774\ube0c\ub9ac\ub4dc": 8, "\ud615\uc131\ud558\ub294": 8, "\uc870\uac74\ub9cc\uc744": 8, "\uc904\uc5d0\ub294": 8, "\ub78d\uc2a4\ud130\uc758": 8, "\uc0dd\uc0c1\uacfc": 8, "\uc9d1\uac8c\ubc1c": 8, "\uac70\ubd81\uc774\uc758": 8, "\ub4f1\uaecd\uc9c8": 8, "\uc735\ud569\ud558\ub294": 8, "conceptlab\uc5d0": 8, "\uac1c\ub150\ub4e4\uc774": 8, "\uc138\ub300": 8, "\uc5b4\ub5bb\uac8c": [8, 9, 11, 13, 27, 29, 30, 31, 39, 40], "\ubc1c\uc804\ud558\ub294\uc9c0": 8, "diffusion2\uc640": 8, "kandinski": 8, "\ud3c9\uac00\ud588\uc2b5\ub2c8\ub2e4": [8, 48], "kandinsky\uc758": 8, "\uc720\ub9ac\ud55c": 8, "\ud504\ub86c\ud504\ud2b8\ub294": [8, 22, 29], "model\uc774": [8, 15, 18, 21, 23, 24, 26, 27], "\ud1a0\uadfc\uacfc": 8, "\ubaa8\ub450\uc5d0": 8, "\ub9de\ucdb0\uc9c8": 8, "\ub2e4\uc911": [8, 17, 35], "\ucc98\ub9ac\ud558\uace0": 8, "\ud45c\ud604\uc744": [8, 16, 17, 19, 32, 33], "\uac16\ucd94\uace0": [8, 11, 15, 33], "\ud3ec\ud568\ud558\uba70": 8, "\uac1c\ub150\uacfc": 8, "\ub2ee\uc9c0": 8, "\uce21\uc815\ud588\uc2b5\ub2c8\ub2e4": 8, "\ud3c9\uac00\uc5d0\ub294": 8, "\uc560\uc644\ub3d9\ubb3c": 8, "\uc2dd\ubb3c": 8, "\uacfc\uc77c": 8, "\uac00\uad6c": 8, "\uc545\uae30\uc758": 8, "5\uac00\uc9c0": [8, 29, 55], "\ub3c4\uba54\uc778\uc5d0": 8, "\uace0\ub824\ud558\uace0": [8, 19], "\uc870\ud569\uc5d0": 8, "\ub79c\ub364": [8, 13, 17, 18, 19, 24, 29, 36], "\uc2dc\ub4dc\ub85c": 8, "\ud6c8\ub828\ud558\uc5ec": 8, "\ucd1d": [8, 11, 14, 16, 20, 27, 32, 37, 38, 39, 40, 44, 54], "75\uac1c\uc758": 8, "\uc5bb\uc5c8\uc2b5\ub2c8\ub2e4": 8, "32\uac1c\uc758": 8, "\uc0dd\uc131\ud588\uc2b5\ub2c8\ub2e4": [8, 50], "diffusionr\uacfc": 8, "\ubaa8\ub378\uc5d0\uc11c\ub294": [8, 22, 31, 33], "160\uac1c\uc758": 8, "\uae30\uc900\uc73c\ub85c\ub294": [8, 39], "\ud0c0\uac9f": [8, 19, 35], "\uce74\ud14c\uace0\ub9ac\uc640\uc758": 8, "\uacf5\uac04": [8, 14, 22, 29, 32, 36, 40], "\uc720\uc0ac\uc131": [8, 17], "\uacc4\uc0b0\uc744": [8, 18, 20], "\ud2b9\uc815\ub429\ub2c8\ub2e4": 8, "\ub2e4\uc74c\uc73c\ub85c\ub294": [8, 39], "\uc81c\uc57d\uacfc": [8, 29], "\uac70\ub9ac\ub97c": [8, 13, 20, 25, 27, 36, 40, 51], "\uc720\uc0ac\uc131\uc5d0\uc11c": 8, "\uc6b0\uc6d4\ud55c": [8, 43], "\ubcf4\uc600\uace0": [8, 23, 26], "\uc2e0\ub8b0\uc131": [8, 22], "\uac70\ub9ac": 8, "\uce21\uc815\uc5d0\uc11c": 8, "\uce74\ud14c\uace0\ub9ac\uc5d0\uc11c": [8, 30], "4\uac00\uc9c0": [8, 26, 51], "kandinsky\ub97c": 8, "\ub2a5\uac00\ud588\uc2b5\ub2c8\ub2e4": 8, "personalization\uacfc": [8, 16], "\ud3ec\ud568\ud558\ub294": [8, 10, 19, 26, 29, 33, 40, 44, 50], "\ud56d\uc0c1": [8, 33], "\uc720\uc9c0\ud558\uc9c0\ub294": 8, "\ubabb\ud569\ub2c8\ub2e4": [8, 22], "\uac00\uc838\uc624\uc9c0\ub294": 8, "\ube44\ud589\uae30": 8, "\ubb3c\uace0\uae30": 8, "2\uc5d0": [8, 26, 35], "\uad00\ub828\uc774": 8, "\uc18c\uac1c\ud588\uc2b5\ub2c8\ub2e4": [8, 22], "\uc0ac\uc6a9\uc744": [8, 26], "constraints\ub77c\ub294": 8, "\uc870\uac74\ub4e4\uc744": 8, "\ucd9c\ub825\uc5d0": [8, 19], "\uacfc\uc815\uc5d0\uc11c\ub294": [8, 31], "\ub3c5\ud2b9\ud558\uba74\uc11c\ub3c4": 8, "\uba64\ubc84\ub4e4\uacfc\uc758": 8, "\uba85\ud655\ud55c": [8, 21, 29], "\uad6c\ubcc4\uc744": 8, "\ubcf4\uc7a5\ud588\uc2b5\ub2c8\ub2e4": 8, "\ud6a8\uacfc\uc131\uc744": 8, "\uc785\uc99d\ud588\uc73c\uba70": 8, "\ub2e4\uc591\ud558\uace0": [8, 33], "\ub9e4\ub825\uc801\uc778": 8, "2302": [9, 55], "05543": 9, "lllyasviel": 9, "mai": [9, 19, 26, 27, 32, 35, 40, 44, 46, 47, 51], "28": [9, 56], "\ubaa8\ub378\ub4e4\uc740": [9, 10, 11, 26, 33, 46, 53], "prompt\ub85c": [9, 24, 26, 37, 40], "\uc774\ub7f0": [9, 11, 17, 18, 24, 30, 38, 39], "control\ub9cc\uc73c\ub85c": 9, "\uc870\uc808\ud558\ub294\ub370": 9, "\uc918\uc11c": 9, "controlnet\uc774\ub77c\ub294": 9, "profession": 9, "prompt\uc640": [9, 10, 21, 24], "canni": [9, 24], "edge\ub97c": 9, "\ubc1b\uc544\uc11c": [9, 11, 22, 38, 56], "\uc624\ub978\ucabd\uc758": 9, "\uc2dd\uc73c\ub85c": [9, 15, 36], "\uadf8\ub9bc\uc5d0\uc11c\ub294": [9, 31], "controlnet\uc774": 9, "\uc5ed\ud560\uc785\ub2c8\ub2e4": 9, "conrolnet": 9, "\uadf8\ub7ec\uba74": [9, 24, 26, 38], "\ud588\uc744\uae4c\uc694": [9, 11], "\uc774\uc81c\ubd80\ud130": 9, "\uc54c\uc544\ubcf4\ub3c4\ub85d": [9, 38], "\ud558\uaca0\uc2b5\ub2c8\ub2e4": [9, 22, 38, 56], "controlnet\uc758": [9, 16, 24], "\uad6c\uc870\ub294": [9, 38, 39, 55], "\uac00\uc9d1\ub2c8\ub2e4": [9, 52], "lock": 9, "copy\uc640": 9, "trainabl": [9, 12, 22, 23, 24, 25, 28, 30, 53], "copy\ub97c": 9, "\uc124\uacc4\ud588\ub294\uc9c0": 9, "\uc54c\uc544\ubd05\uc2dc\ub2e4": 9, "\uae30\uc874\uc5d0": [9, 10, 19, 25, 27, 28, 34, 38, 39, 52, 53], "\ubc29\ub300\ud55c": 9, "\uc704\ud574\uc11c\uc785\ub2c8\ub2e4": 9, "\uc591\uc774": [9, 39], "\uacbd\uc6b0\uc5d0": [9, 19, 39, 44, 55], "\uc624\ubc84\ud53c\ud305\uc744": [9, 19], "\ud53c\ud560": [9, 33], "\ud6a8\uacfc\ub3c4": 9, "convolution\uc774\ub780": 9, "weight\ub791": 9, "\ucd08\uae30\ud654\ud55c": 9, "1x1": 9, "convolution\uc744": [9, 29], "\ub9d0\ud569\ub2c8\ub2e4": [9, 44, 52], "\ud6c8\ub828\uc774": [9, 19, 29], "\uc2dc\uc791\ub418\uae30": 9, "\uc804\uc5d0\ub294": 9, "input\uc5d0": [9, 27, 45], "output\uc774": [9, 44], "\ub611\uac19\uc544\uc9d1\ub2c8\ub2e4": 9, "\ubaa8\ub378\uc774\ub791": 9, "\ub611\uac19\uc740": [9, 26], "\uac00\uc9c0\uac8c\ub418\ubbc0\ub85c": 9, "\ube44\uc2b7\ud558\ubbc0\ub85c": 9, "scratch\ubd80\ud130": [9, 25, 51], "\ud6c8\ub828\uc2dc\ud0ac": 9, "\uc788\uac8c\ub429\ub2c8\ub2e4": 9, "convolution\uc740": 9, "\ud558\ub294\uc9c0": 9, "\uc790\uc138\ud788": [9, 11, 22, 38, 47, 54], "\uadf8\ub9bc\uc5d0\uc11c": [9, 11, 15, 26, 38, 39, 44, 47], "\uc218\uc2dd\uc73c\ub85c": [9, 25, 43, 51], "\ud45c\ud604\ud558\uaca0\uc2b5\ub2c8\ub2e4": 9, "mathbf": [9, 17, 18, 24, 32, 36, 38], "neural": [9, 12, 17, 23, 25, 41, 43, 44, 52], "\uc758\ubbf8\ud569\ub2c8\ub2e4": [9, 22, 39, 48], "\ud45c\ud604\ud558\uae30\uc704\ud574": 9, "\ub9cc\ub4e4\uc5b4\uc11c": [9, 11, 40], "parameter\ub97c": [9, 10, 13, 18, 20, 25, 36, 39, 45, 46], "theta_": [9, 18, 25], "\ub77c\uace0\ud558\uace0": 9, "\uace0\uc815\uc2dc\ucf1c\ub450\uaca0\uc2b5\ub2c8\ub2e4": 9, "\ud45c\ud604\ud558\uace0": [9, 26], "convolution\uc758": 9, "z1": 9, "z2": 9, "\ub450\uaca0\uc2b5\ub2c8\ub2e4": 9, "\ud45c\ud604\ud560": [9, 22, 25, 27, 36, 43, 51, 54], "weight\uc640": [9, 39], "bias\uc758": 9, "\ucd08\uae43\uac12\uc774": 9, "0\uc774\ubbc0\ub85c": 9, "\uc9c4\ud589\ub418\uc9c0": 9, "\uc54a\uc558\uc744": [9, 39], "\uc785\ub2c8\ub2e4": [9, 11, 22, 39, 43, 44, 48, 54, 56], "\ub0b4\ubbc0\ub85c": 9, "\ubcf4\uc874\ud560": [9, 19], "\uc804\ubd80": 9, "\ucd08\uae30\ud654\ub418\uc5b4\uc788\uc73c\uba74": 9, "gradient\uac00": 9, "0\uc774\ub77c\uc11c": 9, "\uc548": [9, 11, 16, 18, 30, 44], "\ub418\ub294\uac70": 9, "\uc544\ub2d0\uae4c\uc694": 9, "\uacbd\uc6b0\ub97c": [9, 22, 26], "\uc0dd\uac01\ud574\ubcf4\uc8e0": 9, "wx": [9, 25], "gradient\ub294": 9, "partial": [9, 10, 13, 32, 52], "0\uc774\uace0": [9, 39], "neq0": 9, "gradient": [9, 12, 13, 18, 19, 20, 23, 26, 28, 32, 43, 44, 46, 48, 52], "step\uc5d0\uc11c": [9, 13, 15, 23], "0\uc774": [9, 12, 13, 24], "\uac12\uc73c\ub85c": [9, 15, 21, 23, 25, 27, 28, 31, 51, 56], "\uac00\uac8c\ub418\uace0": 9, "\ud575\uc2ec\uc801\uc778": [9, 21, 32], "\uac00\uc815\uc774": 9, "\uc704\ubc30\ub420": 9, "\uac00\ub2a5\uc131\uc774": [9, 19, 23], "\uc9c0\uae08\uae4c\uc9c0": [9, 13, 32], "\uc598\uae30\ud55c": 9, "diffusion\uc5d0": [9, 25, 27, 51], "\uc801\uc6a9\ud55c": [9, 15, 21, 23, 26, 27, 29, 34, 35, 41, 43, 51, 54, 55], "diffusion\uc5d0\uc11c": 9, "\ud615\ud0dc\uc785\ub2c8\ub2e4": [9, 38], "training\uc744": [9, 15, 21, 26], "50": [9, 13, 27, 30, 39, 40, 42], "\ud655\ub960\ub85c": [9, 19, 30], "empti": [9, 21, 52], "string\uc73c\ub85c": 9, "\ubc14\uafd4\uc8fc\uc5c8\ub2e4\uace0": 9, "prompt\uac00": [9, 10, 18], "\uc8fc\uc5b4\uc9c0\uc9c0\uc54a\uc744": 9, "semantics\ub97c": 9, "\ubc30\uc6b0\ub294": [9, 29], "\uacbd\ud5a5\uc774": [9, 10, 11, 20, 37], "\ud5a5\uc0c1\uc2dc\ucf1c\uc904": 9, "\uc788\ub2e4\uace0": [9, 11, 13, 15, 21, 25, 35, 36, 39, 40, 43, 45, 52, 53, 54, 55, 56], "training\uc774": 9, "\ubc29\ubc95\ubcf4\ub2e4": [9, 19, 29], "\ud6a8\uc728\uc801\uc774\ub77c\ub294": [9, 30], "\uacb0\uacfc\ub4e4\uc740": 9, "\uacb0\uacfc\ub4e4\uc785\ub2c8\ub2e4": 9, "\ub17c\ubb38\uc5d0": [9, 13, 29, 39, 56], "\uc788\uc73c\ub2c8": 9, "\ucc38\uace0\ud558\uc2dc\uae30": 9, "\ubc14\ub78d\ub2c8\ub2e4": 9, "limitation\uc774\ub77c\uace0": 9, "\uc774\ubbf8\uc9c0\uc785\ub2c8\ub2e4": [9, 11], "\uc8fc\uc5c8\uc74c\uc5d0\ub3c4": 9, "\uc0dd\uc131\ub418\uc9c0": 9, "\uc54a\ub294": [9, 11, 20, 22, 29, 32, 33, 39, 43, 44, 47, 54, 56], "\ubc1c\uc0dd\ud588\uc2b5\ub2c8\ub2e4": 9, "limit": [9, 20, 26], "\ucf54\ub4dc\ub294": [9, 36], "\uacf5\uc2dd": 9, "\uad6c\ud604": [9, 42, 48, 56], "\uac00\uc838\uc654\uc2b5\ub2c8\ub2e4": 9, "\ucd08\uae30\ud654\ud558\ub294": [9, 53], "\ucf54\ub4dc\ub85c": [9, 35], "\ub9cc\ub4e4": [9, 11, 24, 40, 45], "def": [9, 12, 13, 28, 29, 38, 42, 48, 53, 55, 56], "zero_modul": 9, "modul": [9, 13, 16, 20, 28, 29, 38, 48, 53, 55, 56], "detach": [9, 42, 48], "zero_": 9, "\uae30\ubcf8\uc801\uc73c\ub85c": [9, 15, 19, 20, 28, 37, 49, 55], "nn": [9, 13, 28, 29, 36, 38, 48, 53, 55, 56], "sequential\uacfc": 9, "\uac19\uc740\ub370": 9, "step\uac19\uc740": 9, "input\uc744": 9, "\ubc1b\uc544\uc904": 9, "\ub9cc\ub4e0": [9, 36, 38, 39, 44], "timestepembedsequenti": 9, "sequenti": [9, 13, 27, 38, 48, 55], "timestepblock": 9, "pass": [9, 11], "children": 9, "support": [9, 26], "extra": [9, 11, 20], "emb": [9, 13], "context": [9, 11, 13, 14, 17, 24, 26, 28, 35, 36, 37, 41, 45, 47], "isinst": 9, "elif": [9, 13, 42, 47, 53], "spatialtransform": 9, "els": [9, 12, 13, 28, 29, 38, 42, 47, 53, 55], "github\uc758": 9, "cldm": 9, "py\uc5d0": 9, "class\uc785\ub2c8\ub2e4": 9, "init": [9, 28, 53], "\uae38\uc5b4\uc11c": 9, "\uc0dd\ub7b5\ud588\uc2b5\ub2c8\ub2e4": 9, "__init__": [9, 12, 13, 29, 38, 48, 53, 55, 56], "make_zero_conv": 9, "conv_nd": 9, "dim": [9, 13, 29, 38, 42, 47, 55], "hint": [9, 10], "kwarg": 9, "t_emb": 9, "timestep_embed": 9, "model_channel": 9, "repeat_onli": 9, "fals": [9, 12, 13, 28, 29, 42, 48, 53, 55], "time_emb": 9, "guided_hint": 9, "input_hint_block": 9, "type": [9, 47, 48], "dtype": [9, 13, 42, 47], "zero_conv": 9, "zip": [9, 12, 13], "input_block": 9, "append": [9, 13, 38, 48, 53, 55], "middle_block": 9, "middle_block_out": 9, "customizi": 10, "To": [10, 11, 12, 31], "2212": [10, 14, 33, 50], "04488": 10, "seunghwan": [10, 12, 20, 23, 34, 37, 57], "ji": [10, 12, 20, 23, 34, 37, 57], "aug": [10, 17, 23, 37], "user\uc758": 10, "private\ud55c": 10, "concept\uc744": [10, 24, 40], "\uc0dd\uc131\ud558\uace0\uc790\ud558\ub294": 10, "\uc695\uad6c\ub294": 10, "\ud480\uc9c0": 10, "\ubabb\ud568": 10, "diffusion\uc740": [10, 20, 24], "partial\ud55c": 10, "\ubd80\ubd84\ub9cc\uc744": 10, "\ud559\uc2b5\uc2dc\ud0b4\uc73c\ub85c\uc368": 10, "\uae30\uc874\ubcf4\ub2e4": 10, "\ubfd0": 10, "concept\uc5d0": [10, 40], "compress\ud558\ub294": 10, "\ud65c\ubc1c\ud558\uac8c": 10, "\ub418\uc5b4\uc9d0": 10, "\uc785\ub825\ub9cc\uc73c\ub85c": 10, "\uc218\uc900\uae4c\uc9c0": [10, 23], "\uc774\ub984": [10, 15], "general\ud55c": [10, 15], "\uc0dd\uc131\ud558\uc9c0\ub9cc": [10, 19, 22, 26, 33, 48, 53], "user\uac00": 10, "concept\uc758": [10, 40], "\ud589\ubcf5\ud55c": 10, "\uc6b0\ub9ac": [10, 40], "\uac00\uc871": 10, "\uc6b0\ub9ac\uc9d1": 10, "\uac15\uc544\uc9c0": 10, "\ubf40\uc090\uac00": 10, "\ud30c\ub9ac\ub85c": 10, "\uc5ec\ud589\uc744": 10, "\ub5a0\ub098\ub294": 10, "\uacfc\uc815\uc911\uc5d0": 10, "\ubcf4\uc9c0": [10, 47], "\ubabb\ud588\uae30\ub54c\ubb38\uc5d0": 10, "model\uc5d0\uac8c\ub294": 10, "\ub2f9\uc5f0\ud55c": 10, "\uba87\uc7a5\uc758": 10, "\uc774\ubbf8\uc9c0\ub9cc\uc73c\ub85c": [10, 37], "finetuning\ud558\ub294": [10, 22], "\ud559\uc2b5\ud558\uace0\uc790\ud558\ub294": 10, "\uc0dd\uc131\ud574\ub0b4\uc57c\ud568": 10, "\ud559\uc2b5\ub418\uc5c8\ub358": 10, "finetuning\ud55c": 10, "\ud6c4\uc5d0\ub3c4": [10, 22], "customization\uc774": 10, "\uc9c4\ud589\ud558\ub2e4\ubcf4\uba74": 10, "\ud559\uc2b5\ud588\ub358": 10, "\uc78a\uc5b4\ubc84\ub9ac\uac70\ub098": 10, "\uc65c\uace1\ud574\ubc84\ub9bc": 10, "draft": 10, "overfit": [10, 47], "\ub418\uc5b4\uc11c": 10, "\uacb0\uacfc\ubb3c\uc758": [10, 35], "variation\uc774": [10, 38], "\ub0ae\uc544\uc9d0": 10, "\uc880\ub354": [10, 12, 20, 23, 37, 51], "\ub098\uc544\uac00": 10, "text\ub85c": 10, "\uc720\uc9c0\ub97c": 10, "caption\uc744": [10, 36], "regular": [10, 13, 18, 27, 32, 36, 42, 53, 56], "tuning\ub3d9\uc548": 10, "augment": [10, 14, 25, 30, 37, 39, 41, 42], "\ubc29\uc2dd\uc758": [10, 12, 18, 34, 35, 46], "\ubcf4\uc5ec\uc8fc\uace0\uc788\uc74c": 10, "\uac8c\ub2e4\uac00": [10, 11, 21, 35], "control\ub3c4": 10, "\uac00\ub2a5\ud568": [10, 14, 28, 31, 40, 45, 49], "general\ud558\uc9c0": 10, "\uc0dd\uc131\uc740": [10, 31], "\ubd88\uac00\ub2a5\ud568": 10, "global\ud55c": 10, "\ud3ec\ud568\ud55c": [10, 16, 17, 25, 29, 31, 32, 34], "\uc18c\ub7c9\uc758": [10, 13], "learning\uc740": 10, "\uc0dd\uac01\ubcf4\ub2e4": 10, "\ud6a8\uacfc\uc801\uc774\uace0": 10, "\uc720\uc6a9\ud568": 10, "\uc804\uccb4\ub97c": [10, 15, 24, 30, 36, 50], "\ud559\uc2b5\ud558\uac70\ub098": [10, 33], "\ucd94\uac00\ud574": [10, 13, 15, 20, 34, 35, 41, 51], "\uc7ac\ud559\uc2b5": [10, 12, 46], "\uc704\uc5d0\uc11c": [10, 21, 22, 29, 34, 35], "customization\uc758": 10, "\uc77c\uc73c\ud0a4\uae30": 10, "\uc26c\uc6c0": 10, "etc": [10, 17, 20, 34], "\uc544\uc8fc": [10, 15, 24], "\uc77c\ubd80\ub9cc\uc744": 10, "\ub300\uc0c1\uc73c\ub85c": [10, 44], "\ucee8\uc149\uc73c\ub85c": 10, "vs": [10, 12, 15, 17, 21, 32, 35, 44, 45, 46, 49, 50, 52, 54, 55], "compress\ud560": 10, "finetuning\ud568\uc73c\ub85c\uc368": 10, "resourse\ub97c": 10, "backbone\uc73c\ub85c": 10, "dm\uc758": 10, "\uc2dc\uc810\uc5d0": [10, 17], "noise\uac00": [10, 13, 15, 21, 23, 38], "\uc11e\uc778": 10, "text\ub098": 10, "\uc0ac\uc6a9\ud558\uc9c0\uc54a\uace0": 10, "space\ub85c": [10, 15, 20, 26, 34, 40], "embedding\ub41c": 10, "\u03b5": [10, 12], "\u03b5_": 10, "\u03b8": [10, 17], "\ub080": 10, "\u03b5\ub97c": 10, "\uc608\uce21\ud574\ub0b4\ub294": [10, 13], "tuning\ud560\ub54c\ub294": 10, "layer\uc5d0\ub300\ud574": 10, "update\ud558\ub294\uac8c": 10, "\uae30\ubcf8": [10, 19, 27, 32, 33, 35, 39], "resource\uac00": [10, 20], "\ube44\ud6a8\uc728\uc801\uc73c\ub85c": 10, "\ub9ce\uc774\ub4e4\uace0": 10, "overfitting\ub418\uae30": 10, "\ubcc0\ud654\ub7c9\uc744": [10, 25, 51], "\uccb4\ud06c": 10, "while": 10, "\ubd80\ubd84\uc5d0\ube44\ud574": 10, "\uc5f0\uc0b0\uc758": [10, 14, 24, 27], "wegith": 10, "latent\uc5d0": [10, 19], "\uc8fc\uc785\ud558\ub294": [10, 15], "kei": [10, 17, 24, 25, 27, 28, 31, 34, 37, 41], "parameter\uc5d0": 10, "\ub2e8": [10, 22, 33, 34, 37, 43, 47], "\ucc28\uc9c0": 10, "\ud3ec\ud568\ub418\ub294": 10, "\ub098\uba38\uc9c0\ub294": [10, 14, 16, 30, 37], "\uc4f0\uc9c0\uc54a\ub294": 10, "\ub2e8\uc5b4\ub85c": 10, "\ud615\uc2dd\uc73c\ub85c": 10, "captioning\ud55c": 10, "finetuning\uc911\uc5d0": 10, "\uc78a\uc5b4\ubc84\ub9ac\ub294": 10, "\ud604\uc0c1\uc774": [10, 19, 24, 38, 44, 50], "\uc788\uc744\uc218\uc788\uc74c": 10, "moon": 10, "\uc0dd\uc131\ud558\uba74": [10, 44], "finetuning\ud588\ub358": 10, "moongat": 10, "\uc0dd\uc131\ud574\ubc84\ub9bc": 10, "\ubc29\uc9c0\ud558\uae30\uc704\ud574": 10, "world\uc758": 10, "image\uc5d0\uc11c": [10, 15, 26, 45], "200\uc7a5\uc758": [10, 37], "regul": 10, "\uc720\uc0ac\ud558\ub2e4": 10, "clip\uc5d0\uc11c": [10, 39], "space\uc0c1\uc758": 10, "vector\uac00": [10, 18], "similar\ud558\ub2e4": 10, "joint": [10, 18, 21, 24, 36, 45, 46], "trane": 10, "rare\ud55c": 10, "\ubd80\uc5ec\ud574": [10, 55], "constrain": 10, "optim": [10, 12, 14, 16, 18, 22, 24, 25, 30, 31, 33, 37, 40, 45, 47, 56], "merg": [10, 28], "concept\uc73c\ub85c": 10, "weight\ub97c": [10, 20, 25, 28, 36, 39], "appendix": [10, 18, 54], "a\uc5d0\ub294": 10, "\ub098\uc640\uc788\ub294\ub370": 10, "\uc624\ud0c8\uc790\uc77c": 10, "\uac00\ub2a5\uc131": 10, "reg": [10, 27], "caption\uc758": 10, "\ubf51\uc544": [10, 46], "\uacf1\ud55c": 10, "\uac12\uacfc\uc758": 10, "norm\uc744": 10, "\uacc4\uc0b0\ud588\uc744\ub54c": 10, "n\uac1c\uc758": [10, 17, 20, 45], "attention\uc774": [10, 26], "\ucc3e\uc544": [10, 30, 40], "\ud558\ub098\ub9cc": 10, "\uc0ac\uc6a9\ud558\uc790": 10, "250": [10, 42], "two": [10, 11, 19, 26, 30, 36, 38, 40, 45, 47, 51, 52, 54, 55], "veri": 10, "small": [10, 17, 40, 50, 55], "far": [10, 32], "awai": 10, "techniqu": [10, 13, 20, 33, 34, 49], "quant": [10, 40], "\ub300\uc751\ub418\ub294": [10, 53], "\uc0dd\uc131\ud574\ub0c8\ub294\uac00": 10, "\ud45c\ud604\ud574\ub0c8\ub294\uac00": 10, "baselin": [10, 17, 26, 30, 45, 53, 54], "customdiffus": [10, 22], "all": [10, 11, 28], "\uc120\ud638": 10, "inversion\uc740": [10, 40], "alignment\ub294": 10, "\uc120\ud638\ub3c4\uc640": 10, "\ube44\uc2b7\ud558\uc9c0\ub9cc": [10, 39], "alignment\uc218\uce58\ub97c": 10, "diffusion\uc774": 10, "overfitting\ub41c": [10, 37], "\u314cgen": 10, "generate\ub41c": 10, "\uc218\uce58\ub294": [10, 23, 46], "regulat": 10, "customizing\uc774": 10, "resourse\uac00": 10, "Of": 10, "category\uc758": 10, "object\uc5d0": [10, 26, 29], "\ub3d9\uc791\ud558\uc9c0": [10, 19, 23], "hierarch": 11, "2204": 11, "06125v1": 11, "sep": [11, 17, 24, 33, 49, 50, 52, 53], "18": [11, 15, 36, 43, 47], "dalle2": [11, 20], "2022\ub144\uc5d0": 11, "\uacf5\uac1c\ub418\uc5b4": 11, "\uc138\uc0c1\uc744": 11, "\ub180\ub77c\uac8c": 11, "\ub2a5\ub825\ub3c4": 11, "\ub6f0\uc5b4\ub0ac\uace0": 11, "\uc0ac\uc6a9\uc790": 11, "\uc785\ub9db\uc5d0": 11, "\uc870\uc791\ud560": 11, "\ub418\uc5c8\uc8e0": 11, "\uc774\ub984\uc740": 11, "\uc77c\uae4c\uc694": 11, "\ucd08\ud604\uc2e4\uc8fc\uc758": 11, "\ud654\uac00": 11, "salvador": 11, "dali": 11, "wall": 11, "\ud569\uc131\uc5b4\uc785\ub2c8\ub2e4": 11, "\uc0dd\uc131\ud574\ub0b8": 11, "\uacb0\uacfc\ubb3c\uc774": [11, 35, 45], "\uc5b4\ub5bb\uae38\ub798": 11, "\uacb0\uacfc\ubb3c": [11, 17, 18, 45], "\uc0dd\uc804": 11, "\ubaa8\uc2b5": [11, 17, 31], "vibrant": 11, "robot": 11, "half": [11, 42], "\ubaa8\uc2b5\uc774": [11, 44], "\ubcf4\uc774\ub124\uc694": 11, "\ucd08\ud604\uc2e4\uc8fc\uc758\uc801": 11, "\uac19\uae30\ub3c4": 11, "corgi": 11, "\uc5b4\ub5a4\uac00\uc694": 11, "depict": 11, "explos": 11, "nebula": 11, "\ubaa8\uc2b5\uc744": [11, 25, 26, 32, 36, 40, 51], "\uc131\uc6b4\uc758": 11, "\ud3ed\ubc1c\ub85c": 11, "\ubb18\uc0ac\ud574\ub2ec\ub77c\uace0": 11, "\ud588\uc744": [11, 15, 42, 47, 51], "\uadf8\ub9bc\uc785\ub2c8\ub2e4": [11, 56], "nasa": 11, "\ucd08\uc2e0\uc131": 11, "\ud3ed\ubc1c\uc758": 11, "\uc794\ud574\uc785\ub2c8\ub2e4": 11, "\uadf8\ub7f4\ub4ef\ud558\uc9c0": 11, "\uc54a\ub098\uc694": 11, "mosaic": 11, "largest": 11, "ever": 11, "taken": 11, "hubbl": 11, "telescop": 11, "crab": 11, "six": 11, "light": [11, 17, 18, 32, 36], "wide": 11, "expand": [11, 46, 55], "remnant": 11, "star": 11, "supernova": 11, "\uc8fc\uc758\uc0ac\ud56d": 11, "\ube44\uc120\ud615\uc801\uc73c\ub85c": 11, "\uc0b4\ud3b4\ubd05\ub2c8\ub2e4": 11, "\uc624\ud508\uc6d4\ub4dc": 11, "\uac8c\uc784\ucc98\ub7fc": 11, "\ub9d0\uc774\uc8e0": 11, "\ud575\uc2ec\uc774": 11, "\ub418\ub294": [11, 15, 18, 19, 25, 26, 30, 36, 45, 46, 47, 48, 50, 55], "\uc9c8\ubb38\ub4e4\uc744": 11, "\ub358\uc9c0\uba70": 11, "\ud30c\ud5e4\uccd0": 11, "\uac81\ub2c8\ub2e4": 11, "\ud3ec\uc2a4\ud305\uc740": 11, "openai": [11, 33, 36, 43], "blog": [11, 40], "assemblyai": 11, "eden": 11, "meyer": 11, "\ucc38\uace0\ud588\uc2b5\ub2c8\ub2e4": 11, "\ubcf8\uaca9\uc801\uc73c\ub85c": 11, "\uc804\uc5d0": [11, 14, 33, 36, 47], "\uc54c\uc544\uc57c\ud560": 11, "\ubaa8\ub378\uc785\ub2c8\ub2e4": [11, 38, 48], "fundament": 11, "principl": 11, "quit": 11, "simpl": [11, 23, 24, 36, 46, 50, 55], "first": [11, 12, 28, 43], "associ": 11, "caption": [11, 21, 26, 41, 42, 45, 49, 53], "through": [11, 21, 33, 40, 52], "respect": [11, 18, 46, 49], "dimension": [11, 13, 36], "Then": [11, 28], "cosin": [11, 15, 31, 40, 46, 47, 54], "similar": [11, 30, 41, 43, 45, 47], "each": [11, 38, 47], "simultan": 11, "maxim": [11, 45], "correct": [11, 47], "minim": [11, 25], "incorrect": [11, 22, 47], "\ud1b5\ud569\uc2dc\ucf30\uc2b5\ub2c8\ub2e4": 11, "\ucd5c\ucd08\ub294": 11, "\uc815\ub2f5\uc740": 11, "\uc544\ub2d9\ub2c8\ub2e4": [11, 38], "22\ub144": 11, "5\uc6d4": 11, "\uc5d0\uac8c": [11, 42], "sota": [11, 19, 20, 29, 30, 31, 32, 33, 37, 39, 41, 45, 49, 55], "\ub0b4\uc8fc\uc5c8\uc2b5\ub2c8\ub2e4": 11, "\ucc0d\uba39\ud558\uae30": 11, "\ub0b4\uc758": [11, 14, 26, 35, 36, 53], "\ud3ec\ucc29\ud574\ub0bc": 11, "\ud45c\ud604": [11, 13, 14, 19, 27, 32, 36, 41, 54], "\ub04c\uc5b4\uc62c\ub9ac\uae30": [11, 30], "\uc778\ub370\uc694": 11, "unclip": [11, 24], "\ubd80\ub985\ub2c8\ub2e4": [11, 54], "\ubcf5\uc7a1\ud574\ubcf4\uc774\ub2c8": 11, "assembl": 11, "\ub2e8\uc21c\ud654\ub41c": 11, "\uc0b4\ud3b4\ubcfc\uac8c\uc694": 11, "f1x4fhzf4mq": 11, "360": [11, 18, 32, 52], "\ubaa8\ub378\uc778": [11, 34, 42, 44, 49], "\uac19\ub124\uc694": 11, "\ucea1\uc158\uc744": [11, 29], "\uc0c1\uc751\ud558\ub294": [11, 18], "autogregress": 11, "\ube44\uad50\ud558\ub294": [11, 15, 22, 25, 51], "\uc218\ud589\ud588\uc2b5\ub2c8\ub2e4": [11, 44], "computation": [11, 52, 55], "\ud6c4\ubc18\ubd80\uc5d0\ub294": 11, "\uc2e4\ud5d8\ud569\ub2c8\ub2e4": 11, "\ubaa8\ub378\ub9cc": 11, "\uc774\ub791": [11, 45, 56], "\uc0ac\uc6a9\ud588\uc744\uae4c\uc694": 11, "\ud559\uc2b5\ud558\ub294\ub370": [11, 27, 40, 47, 52, 53], "\uc131\uacf5\uc744": [11, 14], "\uac70\ub450\uace0": 11, "shift": [11, 14, 15, 42], "capabl": [11, 24], "\ub6f0\uc5b4\ub0ac\uc2b5\ub2c8\ub2e4": 11, "\ub2ec\uc131\ud574\ub0c8\uc2b5\ub2c8\ub2e4": 11, "tak": 11, "\uac31\uc2e0\ud558\ub294": 11, "\uc911\uc774\uc5c8\uc8e0": 11, "determinist": [11, 12, 46, 54, 56], "\ub355\ubd84\uc5d0": [11, 36], "\uc874\uc7ac\ud558\uc9c0": [11, 29, 44], "essenti": 11, "\ubcc0\uc8fc\ud558\uba74\uc11c": 11, "\uc788\uc8e0": 11, "variat": [11, 13, 24, 30], "\uc67c\ucabd\uc758": 11, "\ubcf4\uc874\ub429\ub2c8\ub2e4": 11, "\uadf8\ub4e4\uc774": 11, "\ud45c\ud604\ub418\ub294": 11, "\ubc29\uc2dd\uc774\ub098": 11, "\ubc14\ub01d\ub2c8\ub2e4": 11, "\uadf8\ub7fc\uc5d0\ub3c4": [11, 19, 26, 41, 44], "\ud2b9\uc720\uc758": 11, "\ud654\ud48d\uc740": 11, "\ubcc0\uc8fc\uace1\ucc98\ub7fc": 11, "\ub9e4\ubc88": [11, 15, 28, 45], "\uc5f0\uc8fc": 11, "\ud574\ub0bc": 11, "\uc788\ub294\uac81\ub2c8\ub2e4": 11, "\ud30c\ud5e4\uce58\uae30": 11, "\uc774\ubc88\uc5d0\ub294": [11, 44], "\uc0b4\ud3b4\ubcf4\uc8e0": 11, "\uc790\uccb4\uc758": [11, 30], "\uc124\uba85": [11, 15, 29, 33, 36, 46], "\uc0ac\uc2e4": [11, 23, 26], "\uc870\uac74\uc73c\ub85c": [11, 17, 19, 24, 33, 36], "\uc790\uccb4\ub3c4": 11, "\ubc1b\uc2b5\ub2c8\ub2e4": 11, "\ubb3c\ub860": [11, 39], "\ubc1b\uaca0\uc8e0": 11, "1\ub3001": 11, "\ub300\uc751\ub418\uae30": 11, "duel": 11, "\ubb38\uc81c\ub420": 11, "\uc5c6\ub2e4\uace0": [11, 14, 38], "\ubcc0\ub860\ud569\ub2c8\ub2e4": 11, "\ub192\uc774\uae30": [11, 23, 24, 31, 51], "2\uac1c\uc758": [11, 24, 32, 36, 51, 55], "dot": [11, 15, 20], "\uc0ac\uc6a9\ud588\ub2e4\uace0": [11, 21, 39, 47, 54], "modifi": 11, "\uc8fc\uc7a5\ud569\ub2c8\ub2e4": [11, 53, 54, 55], "\ud1b5\ud569\uc2dc\ud0a4\ub0d0\ud558\uba74": 11, "\ud558\ub294\uac70\uc8e0": 11, "\ubc29\ubc95\uc73c\ub85c": [11, 19, 26, 39, 40, 44, 46, 48, 52, 54], "\uc0ac\uc6a9\ud568\uc73c\ub85c\uc368": [11, 36, 46, 50], "\uc788\ub358": 11, "\ud65c\uc6a9\ud560": [11, 18, 24, 40, 43], "\ud544\uc694\ud560\uae4c\uc694": 11, "obtain": 11, "full": [11, 13, 19, 23, 27, 29, 37, 50, 53], "possibl": [11, 54], "given": [11, 15, 16, 17, 18], "\ub531\ud788": 11, "\uc640\ub2ff\uc9c0\ub294": 11, "\uc2e4\ub9dd\ud558\uae34": 11, "\uc774\ub985\ub2c8\ub2e4": 11, "\uc720\ubb34\uc5d0": 11, "\uc218\ud589\ud588\ub2e4\uace0": [11, 21], "\uc0b4\ud3b4\ubcfc\uae4c\uc694": 11, "\ubaa8\ub378\ucc98\ub7fc": [11, 42], "\uc8fc\uc5b4": [11, 42, 44], "\ud6cc\ub96d\ud588\uc2b5\ub2c8\ub2e4": 11, "\uacbd\uc6b0\uc758": [11, 19, 44], "\uc544\ud0a4\ud14d\uccd0\uc5d0": 11, "\uadf8\ub807\uc9c0\ub9cc": [11, 49], "\uc758\ubb38\uc774": [11, 39], "\ub9d0\ub054\ud788": 11, "\ud574\uc18c\ub418\uc9c0\ub294": 11, "\uc65c\ub0d0\ud558\uba74": [11, 15], "95": [11, 33], "\ubc29\uc2dd\uc5d0": [11, 27, 36, 40], "\uc801\uc6a9\ud574": [11, 35, 51], "\uc2e4\ud5d8\ud588\uc2b5\ub2c8\ub2e4": [11, 54], "\uacf5\uc815\ud55c": [11, 36], "\uc2e4\ud5d8\uc774\ub77c\uace0": 11, "\ubcf4\uae34": 11, "\uc5b4\ub824\uc6b8": [11, 31, 36], "\ud559\uc2b5\uc2dc\ucf30\uc744": 11, "\uc5c6\uc2b5\ub2c8\ub2e4": [11, 53, 55], "\uac1c\uc778\uc801\uc73c\ub85c": [11, 38, 39], "\uc800\ub294": [11, 38], "\ubcf4\uace0": [11, 13, 24, 25, 39], "\ubc18\ub4dc\uc2dc": [11, 34], "\uc368\uc57c\ud558\ub294": 11, "\uadfc\uac70\uc5d0": 11, "\uc124\ub4dd\ub825\uc774": 11, "\ub5a8\uc5b4\uc9c4\ub2e4\uace0": 11, "\uc0dd\uac01\ud588\uc2b5\ub2c8\ub2e4": 11, "\uc368\uc57c\ud560\uae4c\uc694": 11, "\uac1d\uccb4\ub97c": [11, 26, 31, 33, 52], "\ubb18\uc0ac\ud55c": 11, "\uac1d\uccb4\uc758": [11, 26, 33], "\ubc1c\ud604": 11, "\uc758\ubbf8\ub860\uc801": 11, "\uad00\uacc4\ub97c": [11, 12, 23, 51], "\uc911\uc694\ud558\ub2e4\uace0": [11, 47], "manipul": [11, 26, 34, 37, 43], "diff": 11, "appli": [11, 41], "interpol": [11, 20, 23, 43, 53], "normalis": 11, "produc": 11, "descript": [11, 47], "\ud558\ub294\uc9c0\ub294": 11, "\uc0b4\ud3b4\ubcf4\uaca0\uc2b5\ub2c8\ub2e4": [11, 48, 55], "\uadf8\ub798\uc11c": [11, 20, 21, 26, 39, 48, 54], "\ubb50\uac00": [11, 31], "\uc88b\uc740\uac00\uc694": 11, "\uc0dd\uc131\ubb3c\uacfc": 11, "\uc0dd\uc131\ubb3c\uc744": 11, "\uc0ac\ub78c\ub4e4\uc5d0\uac8c": 11, "\uc81c\uc2dc\ud558\uace0": [11, 17], "photor": [11, 21, 49], "\ub9e4\uae30\ub3c4\ub85d": 11, "when": [11, 38, 48, 49], "guidanc": [11, 14, 16, 18, 24, 25, 26, 30, 33, 36, 39, 41, 50, 53, 55], "both": [11, 15, 42], "versu": 11, "\uacb0\ub860\uc740": 11, "compar": [11, 25, 26, 40], "\ud6e8\uc52c": [11, 14, 15, 18, 21, 22, 23, 25, 26, 29, 30, 39, 42, 44], "bipartit": 11, "\uc778\ucf54\ub529": [11, 27, 31, 32, 41], "\uc5bb\uc73c\uba70": 11, "\ubcf5\uc6d0\ud558\ub294\ub370": 11, "\ud544\uc694\ud55c": [11, 17, 31, 32, 40, 41, 42, 44], "\uc794\uc5ec": 11, "\uc815\ubcf4\ub4e4\uc744": [11, 26, 55], "\uc9c0\ub2d9\ub2c8\ub2e4": 11, "\ubcc0\uc8fc\ud558\uae30": 11, "\u03b7": [11, 12], "\ud574\uc9c0\uace0": 11, "\ubcf5\uc6d0\ud574\ub0c5\ub2c8\ub2e4": 11, "\ucee4\uc9c8\uc218\ub85d": [11, 12, 23, 37], "\uc0dd\uae30\uace0": 11, "\uadfc\ucc98\uc5d0\uc11c": 11, "perceptu": [11, 15, 36, 44, 53], "centere": 11, "\ub9cc\ub4e4\uc5b4\ub0bc": [11, 18, 21], "\ud0a4\uc6b0\uba74": [11, 36], "\uc874\uc7ac\ud558\uace0": [11, 43], "\uc720\uc2e4\ub418\uc5c8\ub294\uc9c0": 11, "\ud0d0\uc0c9": 11, "\ud0d0\uc0c9\ud574\ub0bc": 11, "\uc788\ub294\uac70\uc8e0": 11, "\uac83\ub3c4": [11, 13, 30, 39, 56], "\ud574\uc11c": [11, 20, 25, 30, 31, 40, 46, 48, 51], "\uc900\ub2e4\uba74": 11, "\ucea1\uc158\uc774": 11, "\uc8fc\uc5b4\uc838\uc788\uc744": 11, "\uc6b0\ub9ac\uac00": [11, 13, 27, 44], "z_t0": 11, "current": [11, 12, 13], "embd": 11, "\uc870\uc791\ub429\ub2c8\ub2e4": 11, "typograph": 11, "attak": 11, "attack": 11, "\uc0ac\ubb3c": 11, "\uc704\uc5d0": [11, 18, 21, 32, 54], "\uae00\uc528\uac00": 11, "\uc4f0\uc5ec": 11, "\uacbd\uc6b0\uc785\ub2c8\ub2e4": [11, 38], "multimod": [11, 24, 42, 49], "\uc0ac\ubb3c\uc744": 11, "\ud310\ub2e8\ud558\ub294": 11, "ipod": 11, "\uc885\uc774\uac00": 11, "\ubd99\uc740": [11, 44], "\uc0ac\uacfc\ub97c": 11, "\ubd84\ub958\ub97c": [11, 20], "\uc218\ud589\ud574\ubcf4\uc558\uc2b5\ub2c8\ub2e4": 11, "\uc5ed\uc2dc": [11, 26, 30, 32, 56], "granni": 11, "smith": 11, "\uac00\uae5d\ub2e4\uace0": 11, "\ud310\ub2e8\ud588\uc2b5\ub2c8\ub2e4": 11, "\uc0ac\uacfc\uc758": 11, "\uc0ac\uc9c4\uc73c\ub85c": 11, "recov": [11, 30], "\ud574\ub0c5\ub2c8\ub2e4": 11, "\uc774\ucc98\ub7fc": [11, 43, 56], "\ub2e8\uc810\uc740": 11, "\uc5c6\ub098\uc694": 11, "cube": [11, 32, 33], "\uadf8\ub4e4\uc758": [11, 40], "\uc18d\uc131": [11, 26, 31, 44], "color": [11, 17, 18, 30, 32, 36, 37, 43, 47, 50, 52, 55], "\ub9e4\uce6d\uc2dc\ud0a4\ub294": 11, "\ub5a8\uc5b4\uc9d1\ub2c8\ub2e4": 11, "red": [11, 45], "\ud30c\ub780": [11, 42], "\ud050\ube0c": 11, "\ube68\uac04": [11, 17, 39], "\ud050\ube0c\ub97c": 11, "\uadf8\ub824\ub2ec\ub77c\uace0": 11, "\ud050\ube0c\uc640": 11, "\ud050\ube0c\uc5d0": 11, "attribut": [11, 15, 26, 38, 50], "\ubd80\uc5ec\ud574\uc57c\ud560\uc9c0": 11, "\ud5f7\uac08\ub824\ud569\ub2c8\ub2e4": 11, "sign": [11, 33, 52], "sai": 11, "deep": [11, 23, 29, 32, 39, 43, 44, 49, 54], "\ub9cc\uc758": 11, "\uc5b4\ub824\uc6cc\ud558\ub294": 11, "\ubb38\uc81c\uc785\ub2c8\ub2e4": 11, "\uc0c1\ud669\uc5d0\uc11c": [11, 13], "\ub514\ud14c\uc77c\uc744": [11, 19, 31, 40], "\ubb18\uc0ac\ud558\ub294": 11, "show": [11, 49], "some": [11, 18], "complex": [11, 53, 55], "\ub124\uc628": 11, "\uc0ac\uc778\ub4e4\uc758": 11, "\ub514\ud14c\uc77c\ub4e4\uc774": 11, "\ud655\uc778\ud558\uc2e4": 11, "\uc218\ud559\uc801": 11, "justifi": 11, "\ub77c": [11, 18, 54, 55], "\ud569\uc2dc\ub2e4": [11, 38], "\uadf8\uc5d0": [11, 34, 44, 56], "\uc800\uc790\uc758": 11, "\uc8fc\uc7a5": [11, 14, 30, 46, 49], "\uc0d8\ud50c\ub9c1\ud560": [11, 33, 43], "equal": 11, "hold": 11, "becaus": 11, "function": [11, 14, 17, 18, 20, 25, 26, 31, 32, 40, 43, 48, 51, 53, 54, 56], "\ud3ec\uc2a4\ud305\uc744": 11, "\ubd80\uac00": 11, "\uacf5\uc2dd\uc744": 11, "\ud480\uc5b4\uc11c": 11, "\ud574\uc124\ud574\ubcf4\uba74": 11, "\uc0d8\ud50c\ub9c1\ud558\uace0": [11, 14, 33, 36, 42, 56], "\uc0d8\ud50c\ub9c1\ud568\uc73c\ub85c\uc368": 11, "\uc0d8\ud50c\ub9c1\uc774": [11, 27], "\uac00\ub2a5\ud574\uc9c0\ub294": 11, "\uc5c6\ub294\uc9c0": 11, "\uad81\uae08\ud574\uc11c": 11, "\uacf5\ubd80\ud574\ubd24\uc2b5\ub2c8\ub2e4": 11, "\ud574\uc18c\ud558\uae30": 11, "\ub178\ub825\uc744": [11, 18], "\ud558\uace0\uc788\ub294\uc9c0": 11, "\uc815\ub7c9\uc801\uc73c\ub85c": [11, 19], "\ud3c9\uac00\ud560": [11, 50], "\uc870\uc0ac\ud574\ubd24\uc2b5\ub2c8\ub2e4": 11, "\uacb0\uacfc\ubd80\ud130": 11, "\ub9d0\uc500\ub4dc\ub9ac\uba74": 11, "\uc6f9\ud06c\ub864\ub9c1": 11, "\uc874\uc7ac\ud55c\ub2e4\uace0": 11, "\ud558\uace0\uc788\ub294\uc9c0\ubd80\ud130": 11, "preview": 11, "safeti": 11, "\ub178\ub825": 11, "\ub370\uc774\ud130\uc5d0\uc11c": [11, 33, 36, 39], "violent": 11, "hate": 11, "adult": 11, "\uc81c\uac70\ud568\uc73c\ub85c\uc368": 11, "\ub178\ucd9c\ub418\ub294": 11, "\ucd5c\uc18c\ud654\ud588\ub2e4\uace0": 11, "polici": 11, "\uc704\ubc18\ud55c": 11, "\uc790\uc815\ud558\ub294": 11, "\uc2dc\uc2a4\ud15c\uc744": [11, 33], "\ubcf4\uc720\ud558\uace0": 11, "\uc2e0\ub8b0\ud560": 11, "\uc804\ubb38\uac00\ub4e4\uacfc": 11, "\uac80\ud1a0\ub97c": 11, "\uc9c4\ud589\ud588\ub2e4\uace0": [11, 21, 50], "\uae30\ubc95\uc774": [11, 21, 43], "2202": [11, 54], "04053": 11, "j": [11, 13, 20, 32, 36], "min": [11, 12, 25, 28, 51], "dallev": 11, "contribut": [11, 19, 20, 32, 46, 52], "3\uac00\uc9c0\ub97c": 11, "\uc81c\uacf5\ud569\ub2c8\ub2e4": [11, 22, 43, 52], "\ucd5c\uadfc\uc758": [11, 26, 44], "recognit": [11, 14, 39], "skill": 11, "\uc0c1\ub300\uc801\uc73c\ub85c": [11, 17, 18, 22, 46], "\ub6f0\uc5b4\ub098\uc9c0\ub9cc": [11, 20, 23], "count": [11, 50], "spaial": 11, "\ub2a5\ub825\uc740": [11, 42], "\ub5a8\uc5b4\uc9d0\uc744": 11, "gender": 11, "skin": 11, "tone": 11, "bias": 11, "\ubd84\uc11d": [11, 17, 27, 40, 46], "web": 11, "\ud559\uc2b5\ud588\uc74c\uc744": 11, "\ubcf4\uc5ec\uc8fc\uc5c8\uc2b5\ub2c8\ub2e4": 11, "social": 11, "diagnost": 11, "who": 11, "nurs": 11, "252\uac1c\uc758": 11, "\ud0d0\uc9c0\ud569\ub2c8\ub2e4": 11, "autom": 11, "detect": 11, "verifi": 11, "reliabl": 11, "\uc8fc\uba74\uc11c": 11, "\uc0ac\ub78c\uc758": [11, 19, 29, 38], "\uc131\ubcc4\uc744": 11, "\ub9de\ucd94\uac8c": 11, "\ub2f5\ubcc0\uc744": 11, "\uc2e0\uacbd\ub9dd\uc73c\ub85c": 11, "facial": [11, 37], "landmark": 11, "\ucd94\ucd9c\ud558\uace0": [11, 33, 35, 53], "illumin": [11, 18], "\ubcf5\uc7a5\uc744": 11, "\ud0d0\uc9c0\ub41c": 11, "uniform": [11, 12, 42, 55], "skew": 11, "\ub418\uc5b4\uc788\ub294\uc9c0": 11, "expert": [11, 24], "per": [11, 24, 52], "profess": 11, "\ud3c9\uac00\ud558\ub294\ub370\uc5d0": 11, "\uc131\uacf5\ud588\uc2b5\ub2c8\ub2e4": 11, "satbl": 11, "\uc6f9\ud06c\ub864\ub9c1\uc744": 11, "\uc874\uc7ac\ud588\uc2b5\ub2c8\ub2e4": 11, "\ub178\ub825\uc774": 11, "\uc9c0\uc18d\ub418\uace0": 11, "\ubbf8\ub798\uc5d0\ub294": 11, "\uc548\uc804\ud558\uac8c": 11, "\uc788\uae30\ub97c": [11, 33], "\uae30\ub300\ud569\ub2c8\ub2e4": 11, "2010": 12, "02502": 12, "april": [12, 20], "\ub2e8\uc810\uc778": 12, "process\ub85c": [12, 13, 23, 27, 46, 51], "\uc815\uc758\ud568\uc73c\ub85c\uc11c": 12, "deterministic\ud55c": 12, "sampling\uc774": [12, 46], "adversari": [12, 20, 22, 27, 34, 38, 48, 53], "\ubcf4\uc5ec\uc8fc\uace0\uc788\ub2e4": 12, "gan\uc740": [12, 20, 38, 44], "\ubd88\uc548\uc815\uc131\uc744": [12, 44], "generator\uc640": 12, "discriminator\uc758": [12, 20], "imbalanced\uc5d0": 12, "mode": [12, 15, 27, 28, 55], "collaps": [12, 27], "\uadf8\ub7ec\ub358": 12, "ddpm\uacfc": [12, 15, 21, 34], "ncsn\uac19\uc740": 12, "training\uad6c\uc870\uac00": 12, "\ub4f1\uc7a5\ud558\uc600\uace0": 12, "\uc131\uacf5\uc758": 12, "\ubcf4\uc5ec\uc8fc\uc5c8\ub2e4": [12, 15, 24, 37, 42, 51], "ddpm\uc740": [12, 46], "\uac70\uce58\ub294\ub370": 12, "\uc774\ub54c\ubb38\uc5d0": 12, "gan\uc5d0": [12, 27], "\ub290\ub9b0": [12, 25, 51], "performance\ub97c": [12, 27], "50k": [12, 54], "less": 12, "than": [12, 52], "about": 12, "20h": 12, "256": [12, 14, 15, 19, 31, 32, 33, 35, 36, 44, 45, 48, 49, 52, 53], "1000h": 12, "ddim\uc740": [12, 15, 46], "chain\uc5d0": 12, "\ub300\uccb4\ud558\uc600\uace0": 12, "\uacb0\uad6d": [12, 15, 21, 23, 30, 31, 45, 46], "\ube60\ub974\uace0": [12, 15, 22, 51], "\uc0dd\uc131\ud574\ub0b4\uace0": [12, 37], "accel": 12, "ddpm\uacfc\ub294": 12, "consistency\ud55c": 12, "\ubcf4\uc5ec\uc90c\uc73c\ub85c\uc368": 12, "latent\uac04\uc758": 12, "interpolation\uc774": 12, "If": 12, "equival": 12, "process\ub294": [12, 15, 21, 51], "\ub3d9\uc791\ud55c\ub2e4": 12, "\ubbf8\ub798": 12, "\uc2dc\uc810\uc744": [12, 15, 17, 33], "\uc608\uce21\ud558\uae30\uc704\ud574": 12, "\uc774\uc6a9\ud55c\ub2e4": [12, 15, 21, 36], "\uc2dc\uc810\uc740": 12, "\uacfc\uac70": [12, 43], "\uac12\uc5d0\ub294": 12, "\uac16\ub294\ub2e4": [12, 25], "t\ub294": 12, "ddpm\uc5d0\uc11c": [12, 15, 21, 23, 46], "\uc88c\uc9c0\uc6b0\uc9c0\ud558\ub294": 12, "parameter\uc774\ub2e4": [12, 51], "\ub300\ucda9": 12, "\ubc88\uc758": [12, 31], "sequential\ud558\uac8c": [12, 36], "\uac70\uccd0\uc57c\ud558\uace0": 12, "\ud604\uc800\ud788": [12, 23, 54], "\uc18d\ub3c4\ub97c": [12, 22, 24, 29, 33, 51], "\uc694\uc18c\uac00": [12, 16], "\uad6c\ud558\uae30\uc704\ud574": 12, "\ucc38\uc870": [12, 18, 22, 36], "\uac12\ub9cc\uc744": 12, "process\uc758": [12, 18, 23], "stochastic\ud55c": [12, 15], "chap": 12, "And": 12, "unifi": [12, 24], "\uc2dd\uc744": [12, 15, 36, 46, 54], "\uc0d8\ud50c\ub9c1": [12, 17, 19, 20, 27, 29, 31, 32, 33, 39, 41, 46, 49], "\uad00\uacc4": [12, 31], "t\uc2dc\uc810\uc758": [12, 15], "\uc608\uce21\ud55c": [12, 15, 21, 22, 25, 36, 51], "\u03c3\uac00": 12, "\uac00\uc9c8": [12, 32], "\uc218\uc2dd\uacfc": 12, "\ub3d9\uc77c\ud558\ub2e4": 12, "explan": 12, "deterministic\ud558\uae30\ub54c\ubb38\uc5d0": [12, 46], "\uacc4\uc0b0\ud560": [12, 46], "subset\uc758": [12, 46], "\uc2dc\uc810\ub9cc\uc73c\ub85c": [12, 46], "method\ub294": [12, 40, 46], "\uc57d\uac04\uc758": [12, 19, 21, 26, 27, 46], "efficiency\ub97c": [12, 46], "\ucda9\ubd84\ud788": [12, 22, 26, 32, 43, 46, 51], "\uc99d\uac00\uc2dc\ud0ac": [12, 46], "ddim\uc758": [12, 46], "od": [12, 25, 33, 43, 53], "encoding\uc774": [12, 32], "\uc720\ub3c4\ud560": [12, 18], "table1": [12, 14, 20], "euqat": 12, "simple\ud558\uac8c": 12, "control\ud558\uae30\uc704\ud55c": 12, "\ud69f\uc218": [12, 22], "\u03b7\uac00": 12, "step\uc5d0": [12, 15, 23, 26, 51], "step\uacfc": 12, "time\uc774": 12, "linear\ud55c": 12, "step\uc5d0\uc11c\ub3c4": 12, "\uc5b4\ub290\uc815\ub3c4\uc758": [12, 51], "object\ub97c": 12, "kera": 12, "diffusionmodel": 12, "image_s": 12, "width": [12, 14, 20, 29, 46], "block_depth": 12, "get_network": 12, "denorm": 12, "convert": [12, 47], "pixel": [12, 24, 26, 27, 32, 36, 44, 45, 51, 52, 53, 55], "back": [12, 18], "rang": [12, 32, 45, 47, 48, 55], "tf": 12, "clip_by_valu": 12, "diffusion_schedul": 12, "diffusion_tim": 12, "angl": [12, 18], "start_angl": 12, "aco": 12, "max_signal_r": 12, "end_angl": 12, "min_signal_r": 12, "diffusion_angl": 12, "signal_r": 12, "co": [12, 13, 32, 54], "noise_r": 12, "sin": [12, 13, 32], "note": 12, "squar": [12, 47, 54], "sum": [12, 15, 25, 28, 32, 33, 36, 56], "alwai": 12, "noisy_imag": 12, "move": [12, 14, 25, 35, 43, 51], "ema_network": 12, "compon": [12, 30, 52, 53], "calcul": 12, "pred_nois": [12, 13], "pred_imag": 12, "train_step": 12, "have": 12, "deviat": 12, "like": [12, 20], "shape": [12, 13, 14, 29, 30, 32, 33, 36, 38, 40, 47, 48, 50], "batch_siz": [12, 38, 42, 56], "minval": 12, "maxval": 12, "accordingli": 12, "gradienttap": 12, "tape": 12, "separ": [12, 29, 38, 47], "noisi": [12, 17, 19, 24, 55], "noise_loss": 12, "image_loss": 12, "trainable_weight": 12, "apply_gradi": 12, "noise_loss_track": 12, "update_st": 12, "image_loss_track": 12, "name": [12, 28], "reverse_diffus": 12, "initial_nois": 12, "diffusion_step": 12, "num_imag": 12, "step_siz": 12, "line": 12, "pure": [12, 54], "its": 12, "assum": 12, "nonzero": 12, "next_noisy_imag": 12, "ones": 12, "remix": 12, "next": 12, "next_diffusion_tim": 12, "next_noise_r": 12, "next_signal_r": 12, "generated_imag": 12, "probabilist": [13, 27, 28, 39], "neurip": [13, 46, 49], "2020": [13, 15, 23, 33], "2006": [13, 28], "11239": [13, 28], "pytorch": [13, 28, 29, 36, 38, 45, 48, 56], "review": [13, 25, 28, 40, 57], "pr": [13, 17, 28, 47], "409": [13, 28], "beomsoo": [13, 28, 57], "park": [13, 15, 21, 26, 28, 36, 57], "apr": [13, 28, 38, 43, 48, 56], "19": [13, 28], "velog": [13, 44, 45], "yetsyl0705": 13, "what": 13, "inference\ub85c": [13, 51], "markov\uac00": 13, "distribution\uc758": 13, "\ub54c\uae4c\uc9c0": [13, 15, 54], "\ub354\ud574\uac00\ub294": 13, "\uc5ed\uc73c\ub85c": [13, 33], "\uac70\uce58\uba70": [13, 51], "\uad6c\uc131\ub428": [13, 14, 33], "\uc815\uc758\ud558\uae30": 13, "\uc27d\uace0": 13, "\ud559\uc2b5\uc2dc\ud0a4\ub294": [13, 21, 44], "\ud3b8\ub9ac\ud568": 13, "\uc0dd\uc131\uc774": [13, 16, 17, 18, 22, 31, 37, 41, 45, 46, 49, 52], "\ubcc0\ubd84\ucd94\ub860": [13, 56], "\uc0ac\ud6c4\ud655\ub960": 13, "posterior": [13, 18, 45, 56], "\ubd84\ud3ec": [13, 27, 33, 45], "\ub2e4\ub8e8\uae30": [13, 56], "\uc26c\uc6b4": [13, 56], "\ud655\ub960\ubd84\ud3ec": [13, 51], "\uadfc\uc0ac": 13, "approxim": [13, 18, 48, 51, 56], "\ud45c\ud604\uc2dd\uc5d0": 13, "\ubcf4\ud1b5": [13, 14, 22, 25, 28, 38, 39, 40, 44], "parameter\uc758": [13, 21], "\uc2dd\uc758": [13, 15], "\ucc28\uc218\ubcf4\ub2e4": 13, "\uc218\ub85c": 13, "3\ucc28": 13, "\ud45c\ud604\uc2dd": 13, "\ucc28\uc218\ub85c\uc758": 13, "\ud568\uc218": [13, 17, 19, 20, 32, 43, 44, 45, 46], "\uc0c1\ud0dc\uc5d0\uc11c": [13, 22, 44, 53], "\uc0c1\ud0dc\ub85c": [13, 17, 18, 22, 29, 36, 47, 55], "\ub118\uc5b4\uac08": 13, "\ub2e8\uacc4\uc758": [13, 19, 33, 40, 48], "\uc0c1\ud0dc\uc5d0\ub9cc": 13, "graphic": [13, 49, 52], "_0": [13, 43, 51], "prod_": [13, 15], "quad": [13, 26, 32, 43], "beta_t": [13, 15], "chain\uc73c\ub85c": 13, "\ucd94\uac00\ud560": [13, 17, 33], "beta_1": [13, 32], "\ub354\ud574\uc900\ub2e4": 13, "\uc774\uba74": [13, 34, 49], "mean\uc778": 13, "\uac16\uc9c0": 13, "\uc99d\uac00\ud568": 13, "\ub2e8\uc21c\ud788": [13, 19, 20, 26, 32, 34, 36, 40, 44], "noise\ub9cc\uc744": 13, "\ub354\ud574\uc8fc\ub294\uac8c": 13, "scaling\ud558\ub294": 13, "variance\uac00": 13, "\ubc1c\uc0b0\ud558\ub294": 13, "\ub9c9\uae30": 13, "\uc644\uc804": [13, 32], "destroy\ub41c": 13, "\uc0c1\ud0dc": [13, 14], "boldsymbol": 13, "sigma": [13, 15, 25, 32, 36, 38, 43, 46, 51, 54, 56], "\uac00\uc6b0\uc2dc\uc548": [13, 19, 27, 33, 41], "1994\ub144": 13, "process\uac00": [13, 34], "\uac00\uc6b0\uc2dc\uc548\uc774\uba74": 13, "process\ub3c4": 13, "\uac00\uc6b0\uc2dc\uc548\uc73c\ub85c": 13, "\uc4f0\uba74": 13, "\ub41c\ub2e4\ub77c\ub294": 13, "\uc99d\uba85\uc774": 13, "\ud574\uc57c": [13, 15, 25, 26], "sigma_": [13, 17, 46, 47, 54], "hierarach": 13, "vae\uc5d0\uc11c\uc758": 13, "\uacfc\uc815\uacfc": 13, "\ube44\uc2b7\ud568": [13, 40], "\ubaa9\uc801\uc740": 13, "\uc81c\uac70\ud560": 13, "\uac83\uc778\uac00": 13, "\ub4e4\uc5b4\uc654\uc744": [13, 19, 39], "\uc608\uce21\ud560": [13, 31, 33], "\uc608\uce21\uc774": 13, "\uac00\ub2a5\ud574\uc9d0": [13, 40], "leq": [13, 25, 54], "_q": [13, 24, 27], "geq": 13, "likelihood\ub97c": 13, "\ucd5c\uc18c\ud654": [13, 18, 32], "\uc218\uc2dd\uc744": [13, 15, 34, 43, 44, 46, 51, 56], "evid": [13, 45], "bound": [13, 18, 24, 26, 32, 33, 56], "\uc6b0\ud56d\uacfc": 13, "\uc815\ub9ac\ud558\uace0": 13, "\ud480\uc5b4\ub0b4\uba74": 13, "elbo\uc758": 13, "\uc5ed\ud560\uc740": 13, "\uad00\ucc30\ud55c": 13, "\ud798\ub4e0": 13, "\uc774\ub8e8\uace0": 13, "\uc870\uae08": [13, 30], "\ubd84\ud3ec\uc778": [13, 51], "\ud45c\ud604\ud558\ub824": 13, "\ucc28\uc774": [13, 14, 27, 34], "diverg": [13, 18], "underbrac": [13, 15], "_1": [13, 19, 36], "\ub098\uc628\ub2e4": [13, 46], "term\uc73c\ub85c": 13, "\ud559\uc2b5\uc2dc\ud0b4": 13, "\uc9c0\uc6b0\ub294": 13, "\uc9c0\uc6c0": 13, "ddpm\uc5d0\uc11c\ub294": [13, 15, 21, 23], "induct": [13, 14], "bias\ub97c": [13, 38, 40], "\ub298\ub824": [13, 39], "stable\ud558\uace0": 13, "\uc131\ub2a5\ub3c4": [13, 39, 42, 50], "\uac1c\uc120\ud560": [13, 23, 51], "\uc788\uc5c8\uc74c": [13, 14, 28, 40], "\ub9cc\ub098\ubcf4\uc9c0": 13, "\ubabb\ud588\ub358": [13, 47], "\uc815\ud655\ud55c": [13, 16, 18, 19, 26, 32, 40, 42, 43], "\uc608\uce21\uc744": [13, 19, 22, 24, 33, 36], "\ud480\ub824\ub294": 13, "\uace0\uc815": [13, 14, 16, 23, 40], "\ud588\ub354\ub2c8": 13, "\uc798\ub428": 13, "02\ub85c": 13, "linear\ud558\uac8c": 13, "\uac00\uae4c\uc6b8\uc218\ub85d": 13, "\uc801\uac8c": [13, 15, 45], "\uc8fc\ub294": [13, 15, 21, 24, 26, 29, 55], "parameter\uac00": [13, 24], "\ub418\uae30": [13, 25, 28, 32, 44], "beta": [13, 14, 22], "progress": [13, 43, 51], "posterior\ub97c": 13, "\ub9cc\ub4e4\uc5c8\uc744\ub54c": 13, "\ubcf5\uc6d0": [13, 18, 19, 33], "simplic": 13, "sjina0722": 13, "\ub9ac\ubdf0": [13, 28, 32], "\uac00\uc815\ud588\uace0": 13, "\ubc1b\uae30": [13, 33, 37], "\ud559\uc2b5\uc2dc\ud0a4\uc9c0": [13, 26], "\uc54a\uc544\ub3c4": [13, 48, 55], "\ub41c\ub2e4\uace0": 13, "\uc0dd\uac01\ud574": 13, "term\uc744": [13, 18], "\uad6c\ud558\uc9c0": [13, 48], "\uad6c\ud574": 13, "\uc815\ud655\ub3c4\ub97c": [13, 39], "\ub192\uc784": [13, 18], "int_": [13, 32], "sigma_1": 13, "arrai": 13, "ll": [13, 28, 47], "infti": 13, "255": 13, "case": [13, 20, 30, 50], "\uc0ac\uc774\ub85c": [13, 32], "linearli": [13, 15, 31, 54], "\ub2e8\uacc4\uc5d0\ub294": 13, "\ucd94\uac00\ud558\uc9c0": [13, 36], "divergence\ub97c": 13, "\uc88c\ud45c": [13, 32, 36], "\uc704\uc640": [13, 15, 28, 29, 36, 42, 43, 44, 46, 54], "\ub098\ud0c0\ub09c\ub2e4": [13, 19], "output\uac04": 13, "\uc904\uc774\ub294": [13, 22, 25, 36, 54], "denoising\uacfc": 13, "\ube44\uc2b7\ud574": 13, "ddpm\uc774\ub77c\ub294": 13, "\uc774\ub984\uc774": [13, 49], "\ubd99\uc74c": 13, "objective\uc744": 13, "\uc5d0\uc11c\ubfd0\ub9cc": 13, "t\uc5d0": [13, 24, 26, 51], "\uac00\ub2a5\ud558\uae30": [13, 17], "\ud6a8\uacfc\uc801": [13, 18], "psuedo": 13, "\ub354\ud574\ub098\uac00\ub294": 13, "\uc5bc\ub9c8\ub9cc\ud07c": 13, "\ub354\ud574\uc84c\ub294\uc9c0\ub97c": 13, "step\uc758": [13, 15, 21, 24, 25, 26, 27, 51], "\ucd94\uac00\ub418\uc5c8\ub294\uc9c0\ub97c": 13, "\ud559\uc2b5\ub41c\ub2e4": [13, 24, 33, 40], "\ucf54\ub4dc\uc5d0\uc11c\ub294": [13, 28], "\ub178\uc774\uc988\uc640": [13, 19], "t\ub85c": [13, 15, 21], "p_loss": 13, "x_start": 13, "default": [13, 28, 32], "torch": [13, 28, 42, 47, 53, 55, 56], "randn_lik": [13, 47], "q_sampl": 13, "do": [13, 38, 40, 55], "slow": [13, 52, 53], "25": [13, 28, 31, 35, 39, 42, 48, 52], "seem": 13, "significantli": [13, 49], "x_self_cond": 13, "self_condit": 13, "no_grad": 13, "model_predict": 13, "pred_x_start": 13, "detach_": 13, "take": 13, "model_out": 13, "pred_x0": 13, "pred_v": 13, "predict_v": 13, "rais": [13, 42, 47], "valueerror": [13, 47], "unknown": [13, 47], "loss_fn": 13, "reduct": [13, 42, 47], "reduc": [13, 47], "loss_weight": 13, "noise\uc5d0\uc11c": 13, "\uc21c\ucc28\uc801\uc73c\ub85c": [13, 45, 55], "p_sampl": 13, "int": [13, 36, 42, 48, 51, 55, 56], "devic": [13, 32, 42, 47], "batched_tim": 13, "model_mean": 13, "model_log_vari": 13, "p_mean_vari": 13, "clip_denois": 13, "pred_img": 13, "backbon": [13, 35, 41, 53], "\ub2e8\uacc4\ub294": [13, 26, 27], "convnext": 13, "\ube14\ub85d": 13, "groupnorm": [13, 46], "upsampling\uc73c\ub85c": 13, "block_klass": 13, "resnetblock": 13, "group": 13, "resnet_block_group": 13, "modulelist": [13, 55], "dim_in": 13, "time_emb_dim": 13, "time_dim": 13, "prenorm": 13, "linearattent": 13, "dim_out": 13, "is_last": 13, "conv2d": [13, 28, 53, 55], "init_dim": 13, "out_dim": 13, "dim_mult": 13, "learned_vari": 13, "learned_sinusoidal_cond": 13, "random_fourier_featur": 13, "learned_sinusoidal_dim": 13, "dimens": [13, 14, 28, 29, 36, 55], "input_channel": 13, "init_conv": 13, "in_out": 13, "list": [13, 42, 55], "random_or_learned_sinusoidal_cond": 13, "sinu_pos_emb": 13, "randomorlearnedsinusoidalposemb": 13, "fourier_dim": 13, "sinusoidalposemb": 13, "time_mlp": 13, "gelu": 13, "num_resolut": 13, "len": [13, 42, 48, 55], "ind": 13, "enumer": [13, 47, 48, 55], "mid_dim": 13, "mid_block1": 13, "mid_attn": 13, "mid_block2": 13, "default_out_dim": 13, "final_res_block": 13, "final_conv": 13, "zeros_lik": 13, "clone": [13, 55], "block1": [13, 55], "block2": [13, 55], "resolution\uc5d0": [13, 39], "conv\uc5d0\uc11c": 13, "3\ubc30\ub85c": 13, "\ub298\ub9ac\uace0": 13, "v\ub85c": 13, "\ubd84\ud574": [13, 22], "dim_head": [13, 29], "hidden_dim": 13, "to_qkv": 13, "to_out": [13, 53], "qkv": 13, "chunk": [13, 47, 55], "rearrang": [13, 29, 41], "einsum": 13, "layernorm": [13, 14], "block\uc5d0": [13, 20, 21, 46], "embedding\uc774": [13, 31, 40], "\ucd94\uac00\ub3fc\uc11c": 13, "\uad6c\ubd84\ub428": 13, "half_dim": 13, "math": 13, "10000": 13, "arang": 13, "score": [13, 17, 20, 21, 27, 29, 30, 31, 43, 44, 45, 48, 49, 51, 52, 53, 54, 55], "is\ub85c": 13, "model\uc778\ub370\ub3c4": 13, "model\ubcf4\ub2e4": [13, 21, 24, 34], "\uc6b0\uc6d4": 13, "codelength\uc5d0\uc11c": 13, "\uc5c6\uae30": [13, 30, 31, 33, 51], "overfitting\uc758": 13, "\uac00\ub2a5\uc131\ub3c4": 13, "\uc801\uc74c": 13, "incept": [13, 29, 33, 39, 45, 54], "v3\uc73c\ub85c": 13, "\uc9d1\ud569\uacfc": 13, "\ud074\ub798\uc2a4": [13, 29, 33, 39, 42], "\uc2e4\uc81c\uc640": 13, "IS": [13, 14, 21, 29, 33, 35, 41, 43, 45], "\ud074\ub798\uc2a4\uac00": [13, 42], "\ub098\ub204\uc5b4\uc9c0\ub294\uc9c0": 13, "\ud074\ub798\uc2a4\ub97c": [13, 42], "\uc0dd\uc131\ud558\ub294\uc9c0": 13, "\ud3c9\uac00\ud568": [13, 49], "\ub192\uc744": [13, 14], "\uc131\uc801\uc774": 13, "\uc88b\uace0": [13, 51], "variance\ub97c": [13, 23], "\uc0ac\uc6a9\ud588\uc744": [13, 26, 36, 39, 40, 43, 46], "\ub54c\uc5d0\ub3c4": [13, 15], "\uac10\uc18c\ud558\uc9c0": 13, "\uace0\uc815\ud588\uc744": 13, "\ub098\ub294": 13, "\uc0ac\uc6a9\ud558\uba74\uc11c": [13, 25, 28, 40], "\uac1c\uc120\ub428": 13, "scalabl": 14, "iccv": [14, 42, 44], "09748": 14, "facebookresearch": 14, "wpeebl": 14, "junhyoung": [14, 57], "juli": [14, 18, 36], "\ubc31\ubcf8\uc744": 14, "\ubc14\uafb8\uc5b4": 14, "depth": [14, 16, 24, 30, 32, 33, 40, 44, 46, 55], "\uc99d\uac00\ub85c": [14, 27], "\ub192\uac8c": [14, 18, 28], "\ub098\ud0c0\ub0ac\uc74c": [14, 33], "\ub0ae\uac8c": [14, 31, 46], "\uc720\uc9c0\ud55c": [14, 22, 29, 30, 47, 52], "\ubcf4\uc720\ud558\uba70": 14, "xl": [14, 26], "\ubca4\uce58\ub9c8\ud06c\uc5d0\uc11c": [14, 29], "27": [14, 49], "\ud2b8\ub80c\ub4dc\ub97c": 14, "\uc720\uc9c0\ud558\uace0": [14, 19, 23, 38], "\ub124\ud2b8\uc6cc\ud06c\uc778": 14, "\uc0ac\uc6a9\ub418\uace0": [14, 18, 24, 31], "\ucd08\uae30\uc5d0\ub294": [14, 29], "\ub808\ubca8\uc758": 14, "gan\uc5d0\uc11c": [14, 24], "\uc774\ub04c\uc5c8\uc74c": 14, "\ube14\ub7ed\uc774": [14, 31], "\uc8fc\uc694\ud558\uac8c": 14, "\uc800\ud574\uc0c1\ub3c4\uc5d0": 14, "\ud3ec\ud568\ub428": 14, "adm": [14, 15, 21, 43, 46], "\uc120\ud0dd\uc801\uc73c\ub85c": [14, 17, 37], "\uc544\ud0a4\ud14d\ucc98": [14, 33, 40], "\uad6c\uc131\uc758": 14, "\ubc1d\ud614\uc74c": 14, "\ub07c\uce58\uc9c0": 14, "\uc885\ub958\uc758": [14, 19, 25, 39], "dit\ub294": 14, "vit\ub97c": 14, "\uc900\uc218\ud558\uba70": 14, "\ub124\ud2b8\uc6cc\ud06c\ubcf4\ub2e4": 14, "\uc778\uc2dd": [14, 22, 29, 33], "\ud655\uc7a5\ub418\ub294": 14, "\ub098\ud0c0\ub0a8": 14, "\uc544\ud0a4\ud14d\ucc98\uc758": 14, "\ubcf5\uc7a1\uc131": 14, "\uce21\uc815\ud560": [14, 39], "\uce21\uc815\ud568": 14, "\uace0\ub824\ud558\uc9c0": [14, 32, 51], "\uc54a\uc544": [14, 36, 51, 53], "\ubcf5\uc7a1\uc131\uc744": [14, 19, 22], "\ubd84\uc11d\ud568": [14, 27], "\uc5f0\uc0b0\uc801\uc73c\ub85c": 14, "\ud574\uc0c1\ub3c4\ub85c": [14, 17, 29, 41], "\uc555\ucd95\ud558\ub294": [14, 27], "autoencod": [14, 27, 55, 56], "\uacf5\uac04\uc758": [14, 39], "\uc555\ucd95\ub41c": [14, 15, 25], "\ub514\ucf54\ub529\ud558\uc5ec": [14, 19], "\uc0dd\uc131\ud568": 14, "ldm\uc740": [14, 27, 37, 40, 41], "adm\uacfc": 14, "\ubaa8\ub378\ubcf4\ub2e4": [14, 17, 23, 39, 44, 45, 50, 53, 54], "gflops\ub97c": 14, "\ub2ec\uc131": [14, 17, 19, 24, 29, 31, 32, 39, 40, 41, 45], "dit\ub97c": 14, "\uc801\uc6a9\ud588\uace0": 14, "\uacf5\uac04\uc5d0\uc11c\ub3c4": 14, "\uc18d\uc131\uc744": [14, 32, 44], "\ub530\ub974\ub3c4\ub85d": [14, 20, 43, 56], "ddpm\uc744": [14, 23], "\ud328\uce58\ub97c": 14, "\uc5f0\uc0b0\ud558\ub294": [14, 25, 32, 46], "\uad6c\uc870\ub85c": [14, 22, 48], "\uc124\uacc4\ub428": 14, "fig4": 14, "256x256x3": 14, "\uc774\ubbf8\uc9c0\uc5d0\uc11c\ub294": [14, 18], "32x32x4": 14, "\uc785\ub825\ub428": 14, "\ucc28\uc6d0\uc758": [14, 20, 25, 27, 31, 41], "\uc218\uac00": [14, 24, 27, 32, 44, 45], "\ub2ec\ub77c\uc9d0": 14, "\ubc18\uc73c\ub85c": 14, "\uc904\uc774\uba74": 14, "4\ubc30\uac00": 14, "\ucd5c\uc18c": [14, 19, 35, 43], "\ub07c\uce68": [14, 49], "\uc815\ubcf4\ub85c": 14, "\uc2dc\ud000\uc2a4\uc5d0": [14, 19], "\ub354\ud574\uc90c": [14, 28], "\ucc98\ub9ac\ud558\ub294": [14, 24], "\uc9c4\ud589\ud568": [14, 40], "\ucd94\uac00\uc758": [14, 35], "\ub354\ud558\ub294": [14, 15], "\ud1a0\ud070\uacfc": [14, 31], "\ucc98\ub9ac\ud568": 14, "cl": 14, "\ube14\ub7ed\uc5d0\uc11c\ub294": 14, "\uc81c\uac70\ud568": 14, "\ubcc4\ub3c4\ub85c": [14, 32, 43], "\ub2e4\uc74c\uc5d0": [14, 29, 38], "\ud3ec\ud568\ud558\ub3c4\ub85d": 14, "\uc218\uc815\ud568": 14, "\uc624\ubc84\ud5e4\ub4dc\ub85c": 14, "\ucd94\uac00\ud568": [14, 49], "norm": [14, 44, 53], "adaln": 14, "\ube14\ub7ed\uc758": 14, "\uad50\uccb4\ud568": 14, "gamma": [14, 17, 32, 47, 53, 54], "\uac83\ubcf4\ub2e4": [14, 15, 19, 28, 29, 33, 42, 49, 53, 55], "\ud569\uc73c\ub85c\ubd80\ud130": 14, "\ud68c\uadc0\ud558\ub3c4\ub85d": 14, "\uc124\uacc4\ud568": [14, 29], "\ucd5c\uc18c\ud55c\uc758": [14, 15, 19, 44], "\ucd94\uac00\ud558\ubbc0\ub85c": 14, "\uc5f0\uc0b0\uc5d0": [14, 19], "\ud6a8\uc728\uc801\uc784": 14, "\uc801\uc6a9\ud568\uc73c\ub85c\uc368": [14, 50], "\uc88b\ub2e4\ub294": [14, 39, 50], "\ubc1c\uacac\ud568": 14, "\ucd08\uae30\ud654\ud568": 14, "\ud68c\uadc0\ud558\ub294": 14, "\uc678\uc5d0\ub3c4": [14, 28, 29, 36, 51], "\ube14\ub7ed": 14, "\uc801\uc6a9\ub418\ub294": [14, 15, 43], "\ubcc4": [14, 17, 32, 36, 42], "\ud68c\uadc0\ud568": 14, "\uc601\ubca1\ud130\ub97c": 14, "\ucd9c\ub825\ud558\ub3c4\ub85d": 14, "mlp\ub97c": [14, 18], "\ube14\ub85d\uc744": [14, 29], "\ucd08\uae30\ud654\ud558\uac8c\ub428": 14, "\ube14\ub85d\uacfc": [14, 35], "architecture2": 14, "\uc2dc\ud000\uc2a4\ub97c": [14, 19, 33], "\ub514\ucf54\ub529\ud574\uc57c\ud568": 14, "adaln\uc744": 14, "\ud559\uc2b5\uacfc": [14, 20, 44], "\uc5f0\uad00\ub428": 14, "\uc774\uc678\uc758": 14, "\ubd84\uc0b0\ub3c4": [14, 33], "\uc9c4\ud589\ud588\uc74c": 14, "vlb_loss": 14, "xlarg": 14, "config": [14, 47, 53], "512x512": [14, 16, 19, 24, 30, 31, 35, 42, 51, 52, 55], "\uc14b": [14, 41], "adamw": [14, 16, 24], "horizont": 14, "flip": [14, 30], "ema": [14, 25, 43, 51], "999": [14, 32], "table2": [14, 20], "table3": [14, 20], "\uc88b\uc74c": [14, 28, 45], "fig5": 14, "\ud559\uc2b5\uc5d0\ub3c4": [14, 46], "fig6": 14, "\ud074\uc218\ub85d": [14, 43, 51], "fig7": 14, "fig8": 14, "fig9": 14, "visual_result": 14, "\ud328\uce58": [14, 41], "\uc0ac\uc774\uc988\uc640": 14, "\ud06c\uae30\uc5d0": [14, 21, 50], "\ud328\uce58\uac00": 14, "\ud06c\uae30\uac00": [14, 24, 33, 35], "\ub6f0\uc5b4\ub098\uace0": 14, "\uc6b0\uc218\ud558\uac8c": [14, 22, 38], "\uc801\uc6a9\ud568": 14, "\ud6a8\uc728\uc131\ub3c4": 14, "\uac00\uc838\uc634": 14, "2210": 15, "10960": 15, "sehwan": [15, 21, 26, 57], "domain\uc5d0\uc11c": 15, "control\ud558\ub294": 15, "\ubd80\uc871\ud558\ub2e4": [15, 19, 26, 36, 41], "model\uc18d\uc5d0\uc11c": 15, "space\ub97c": [15, 35, 40], "\ubc1c\uacac\ud558\uae30": 15, "\uc81c\uc548\ud558\uace0": [15, 43], "space\ub77c\uace0": 15, "\uba85\uce6d\ud55c": 15, "space\uc758": [15, 27, 40, 51], "\ud2b9\uc131": [15, 36], "across": 15, "\ub4e4\uc744": [15, 18, 30, 42, 49, 56], "strength\uc640": 15, "deficiency\ub97c": 15, "\uae30\uc900\uc73c\ub85c": [15, 27, 28, 33, 39, 42, 43, 54], "\uc0bc\uace0": 15, "translation\uc744": 15, "design\uc744": 15, "guidance\ub294": [15, 19, 46], "unconditional\ud55c": [15, 19, 21], "variable\uc5d0": 15, "variable\uc744": 15, "\ud569\uce58\ub294": [15, 33], "\uc774\uc6a9\ud558\uba74\uc11c": 15, "\uba85\ud655\ud558\uac8c": [15, 48], "control\ud558\uae30\uac00": 15, "\uc27d\uc9c0": [15, 26], "\uc54a\ub2e4": [15, 19, 20, 24, 25, 34, 41, 51], "\uac70\uce58\ub294": [15, 33, 43], "variable\uc774": 15, "class\uc778\uc9c0": 15, "\ubd84\ub958\ud558\uace0": 15, "\ubd80\uc5ec\ud558\ub294": 15, "\uc791\ub3d9\ud55c\ub2e4": [15, 29], "variable\ub4e4\uc5d0": 15, "classify\ub97c": 15, "\uc2e4\ud589\ud574\uc57c": 15, "\ud558\uae30\uc5d0": 15, "\uc0ac\uc6a9\ud558\uae30\uac00": 15, "\ud798\ub4e4\uc5b4": 15, "\uc2dc\ucf1c\uc57c": [15, 30], "\uc2dc\uac04\uc801\uc73c\ub85c": [15, 19, 41], "\ube44\uc6a9\uc801\uc73c\ub85c": 15, "\ubd80\ub2f4\uc774": [15, 53], "diffusionclip": 15, "space\ub294": [15, 27], "origin": [15, 24, 30, 32, 34, 35, 36, 54], "edit\ud558\uae30": 15, "\ubc1c\uacac\ud558\uc600\uace0": 15, "\uce6d\ud55c\ub2e4": 15, "space\uc5d0\ub294": 15, "\ud2b9\uc131\ub4e4\uc774": [15, 24], "\uc874\uc7ac\ud55c\ub2e4": [15, 25], "editing\uacfc": 15, "boosting\uc744": 15, "design\ud558\uc5ec": 15, "space\ub85c\uc368\uc758": 15, "\ubc1c\uacac\uc0ac\ub840\uc774\ub2e4": 15, "\uaef4\uc788\ub294": [15, 21], "\uc5bc\ub9cc\ud07c\uc778\uc9c0": [15, 21], "\uc81c\uac70\ub41c": [15, 21, 27], "\uad6c\ud560": [15, 21, 24, 54, 56], "constant\ud55c": [15, 21], "\uace0\uc815\uc2dc\ud0a8\ub2e4": [15, 21], "ddpm\uc5d0\uc11c\uc758": [15, 21], "sigma_t": [15, 17, 18, 24, 47, 51, 54], "alpha_": [15, 17, 21, 47], "bigg": [15, 25, 51], "ddim\uc5d0\uc11c\ub294": 15, "\uad00\uc810\uc758": 15, "\uc81c\uc2dc\ud558\uc600\uace0": 15, "general\ud558\uac8c": 15, "process\uc5d0": [15, 40], "eta": [15, 22, 26], "1\uc778": 15, "ddpm\uc774": [15, 23], "\ub418\uace0": [15, 19, 20, 24, 26, 28, 29, 37, 43, 44, 47, 49, 54, 56], "stochastic\ud574\uc9c0\uba70": 15, "0\uc778": 15, "ddim\uc774": 15, "deterministic\ud574\uc9c4\ub2e4": 15, "cfrac": 15, "2i": [15, 43, 51, 54], "textrm": 15, "point": [15, 17, 18, 30, 32, 41, 43], "encoder\uc640": [15, 27, 31, 36], "text\uac04\uc758": [15, 21], "\ud3b8\uc9d1\ub41c": 15, "\ucd5c\uc18c\ud654\ud558\ub294": [15, 40, 48, 54, 55], "collapse\uc5c6\uc774": 15, "\uade0\uc77c\ud55c": [15, 32, 33], "editing\uc744": 15, "\ud588\ub2e4\uace0": [15, 21, 39, 46, 54], "_i": [15, 19, 24, 26, 32, 36], "editiing\uc744": 15, "naiv": 15, "approach\ub97c": 15, "\ud1b5\ud574\uc11c\ub294": 15, "editing\uc774": [15, 20, 21], "\uc774\ub8e8\uc5b4\uc9c0\uc9c0": 15, "chapter\uc5d0\uc11c\ub294": 15, "\uc54a\ub294\uc9c0\uc5d0": 15, "\uc124\uba85\uc744": [15, 39, 40], "\ud574\uacb0\ud558\ub294": [15, 26, 51], "controllable\ud55c": 15, "process\uc778": 15, "ddim\uc5d0\uc11c": [15, 46, 49], "\uc124\uba85\ud558\uc600\ub294\ub370": 15, "chapter\ubd80\ud130\ub294": 15, "\uc124\uc815\ud558\uace0": [15, 31, 33, 39, 44, 47], "\uc124\uc815\ud558\uc600\ub2e4": 15, "variable\ub85c": 15, "\ub2f4\ub2f9\ud558\uace0": 15, "\ub3cc\uc544\uac00\uae30\uc5d0": 15, "\ub2f4\ub2f9\ud55c\ub2e4": 15, "prompts\uc5d0": [15, 21], "manipulate\uc2dc\ud0a4\ub294": 15, "3\uc5d0\uc11c": [15, 33], "\uc18c\uac1c\ud55c": [15, 26, 35, 43, 54], "optimize\ud558\ub3c4\ub85d": 15, "update\ud558\ub294": 15, "\uc0dd\uc131\ud558\uac70\ub098": [15, 29], "manipulation\uc744": [15, 26], "\ud55c\ub2e4\uace0": [15, 30, 40, 53], "\ub300\uc548\uc73c\ub85c": [15, 24, 32], "manipulate\ud558\ub3c4\ub85d": 15, "shift\ud574\uc8fc\ub294": 15, "\uc81c\uc2dc\ub418\uc5c8\ub2e4": 15, "manipulate\ud558\uc9c0": 15, "\ub458\ub2e4": [15, 45], "shifted\ub41c": 15, "\uc0ac\uc6a9\ud558\uae30\uc5d0": 15, "cancel": 15, "out\ub418\uc5b4": 15, "variable\uc5d0\uc11c\ub294": 15, "\uae30\uc874\uacfc": [15, 51], "\ub2e4\ub984\uc774": 15, "\uc5c6\ub2e4\ub294": [15, 21], "\uc99d\uba85\uc740": 15, "proof": [15, 21], "theroem\uc744": 15, "theroem": 15, "beta_": 15, "ddim\uc5d0\uc11c\uc758": 15, "\uc2dd\uc774\uace0": 15, "\ud56d\ub9cc": 15, "\ub530\ub85c": [15, 24, 26, 31, 36, 40, 44, 48, 51], "\ubb36\uc5b4\uc11c": 15, "\ud45c\ud604\ud558\uba74": [15, 47, 56], "root\ub97c": 15, "\ub0b4\ubd80\ub97c": 15, "\uacc4\uc0b0\ud558\uba74": [15, 45], "\uc815\ub9ac\ud558\uba74": [15, 44], "therefor": 15, "epsilon\uc744": [15, 21], "\uacb0\uacfc\uc774\ub2e4": [15, 21, 26], "\ubd84\uc790\ub97c": 15, "\uc791\uae30\uc5d0": 15, "\uc218\ub834\ud558\uae30\uc5d0": 15, "\uc5c6\uc74c\uc744": [15, 23], "\ud6a8\uacfc\ub294": 15, "\uc88b\uc9c0": [15, 23, 32, 33, 35, 37, 39, 43, 52, 54, 55], "\uc54a\uc74c\uc744": [15, 17], "No": 15, "chapter": 15, "1\uc5d0\uc11c": [15, 29, 32, 35], "asyrp\ub97c": 15, "\ube44\ub300\uce6d\uc801\uc778": 15, "\uc0ac\uc6a9\ud55c\ub2e4\ub294": [15, 39], "\uac83\uc778\ub370": [15, 24], "\ub3cc\uc544\uac00\ub294": [15, 44], "\uc900\ub2e4\ub294": 15, "\ub9ccmodify\ud558\uace0": 15, "\uc720\uc9c0\ud55c\ub2e4": [15, 19], "loss\uc2dd": 15, "\uc7ac\uad6c\uc131\ud558\uc600\ub2e4": 15, "modify\ub97c": 15, "\ud558\uc9c0": [15, 17, 19, 26, 33, 39, 40, 42, 54], "modifiy\ub97c": 15, "loss\uc2dd\uc740": 15, "lambda_": [15, 17, 18, 54], "ref": [15, 18, 30], "recon": [15, 17], "\uc124\uacc4\uac00": 15, "\uc774\uc81c": [15, 39, 54], "epsilon\uc778": 15, "\uac83\uc778\uc9c0\uc5d0": 15, "result\ub97c": [15, 21], "nice": 15, "properties\ub97c": 15, "models\uc758": 15, "backbone\uc778": 15, "net\uc5d0\uc11c": [15, 49], "\ub3c4\ucd9c\ub41c\ub2e4": 15, "net\uc758": [15, 16, 19, 29], "bottleneck": [15, 28, 45, 55], "\uae4a\uc740": [15, 32, 44], "map\uc778": 15, "h_t": 15, "\uc815\ud558\uc600\ub2e4": 15, "\ubd80\ub978\ub2e4": 15, "space\ubcf4\ub2e4": 15, "resolutions\uc744": 15, "semantic\ub97c": 15, "\uac00\uc9c4\ub2e4": [15, 27, 33, 46, 51], "space\uc5d0\uc11c\ub294": 15, "\ubc1c\uacac\ud560": 15, "nice\ud55c": 15, "\ud2b9\uc131\ub4e4\uc744": 15, "\ud06c\uae30\ub294": [15, 28, 35, 36, 51, 55], "times512": 15, "times3": [15, 20], "control\uc774": [15, 26, 38], "\uc9c0\ubc30\uc801\uc774\uace0": 15, "robust\ud568\uc744": 15, "\ucd94\uce21\ud560": 15, "skip": [15, 25, 29, 32, 43, 47, 49, 52, 53], "connection\uc758": 15, "\ubc1b\uc9c0": [15, 36], "\uc54a\uc73c\uba70": [15, 17, 51], "\uacf5\uac04\uc774\uba70": 15, "control\ud558\ub294\ub370\uc5d0": 15, "\uc9c0\uc815\ud558\uae30": 15, "\uc124\uc815\ud574\ub450\uace0": 15, "\ud574\ubcf4\uc558\ub294\ub370": 15, "8th": 15, "layer\uc774\uc804\uc758": 15, "\uc9c0\uc815\ud55c": [15, 34, 49], "manipulaton\uc774": 15, "\uc774\ub8e8\uc5b4\uc84c\uace0": 15, "\uc774\ud6c4\uc758": [15, 23], "\uacfc\ud55c": 15, "manipulation\uc774": 15, "\uc774\ub8e8\uc5b4\uc9c0\uac70\ub098": 15, "\uc544\uc608": [15, 49], "\uc0dd\uc131\ub418\uc5c8\ub2e4": 15, "space\ub9cc\uc758": 15, "\ud2b9\uc131\uc740": 15, "chapter5\uc5d0\uc11c": 15, "\uc124\uba85\ud55c\ub2e4": [15, 36], "manipulating\ud558\ub294\ub370": 15, "\uc131\uacf5\ud588\uc74c\uc5d0\ub3c4": 15, "\uc218\ub9ce\uc740": [15, 30, 43], "timestep\uc5d0\uc11c": 15, "optimizing\ud558\uae30\ub780": 15, "\ub300\uc2e0\uc5d0": [15, 19, 32, 54], "\uc785\ub825\ubc1b\uc544": [15, 52, 54], "\ucd9c\ub825\ud574\uc8fc\ub294": 15, "optimizing\ud574\uc918\uc57c": 15, "\uc2dc\uac04\ub3c4": 15, "setting\uac12\ub4e4\uc5d0": 15, "robust\ud558\ub2e4": 15, "timestep\uacfc": 15, "feature\uc778": [15, 19], "\ucd9c\ub825\ud558\ub294": [15, 32, 48], "\ud559\uc2b5\ud558\uae30\uc5d0": 15, "unseen": [15, 17], "\uc77c\ubc18\ud654\ud560": 15, "accelerated\ud55c": 15, "\uacfc\uc815\uc5d0\uc11c\ub3c4": [15, 28], "\ubcf8\ub2e4": 15, "scheme\uc774": 15, "\uc5b4\ub5bb\ub4e0": 15, "\uac04\uc5d0": [15, 19], "\ubcf4\uc874\ub41c\ub2e4\uba74": 15, "\uc124\uacc4\ud574\ub3c4": 15, "manipulation\ud6a8\uacfc\ub97c": 15, "control\ud574\uc11c": 15, "\uc774\uc6a9\ud558\ub294": 15, "\uc2dd\uc740": [15, 23], "space\uc640": [15, 36], "\ube44\uad50\ud558\uc600\ub2e4": [15, 19, 51], "intuit": [15, 38], "choos": [15, 30], "interv": [15, 43], "percept": 15, "priorit": 15, "choi": 15, "earli": [15, 55], "stage\uc5d0\uc11c\ub294": 15, "context\ub97c": [15, 21, 40], "generate\ud558\uace0": 15, "later": 15, "impercept": [15, 27], "details\ub97c": 15, "generate\ud55c\ub2e4\uace0": 15, "stage\uc5d0\uc11c": 15, "\uc9c4\ud589\ud558\ub294": [15, 26, 43], "\uad6c\uac04\uc744": [15, 36], "\uc81c\uc2dc\ud55c\ub2e4": [15, 21], "process\uc5d0\uc11c\ub294": 15, "context\uac00": 15, "generate\ub418\uc5b4\uc57c": 15, "interval\uc744": 15, "\uacb0\uc815\ud558\uae30": [15, 53], "\uce21\uc815\uc9c0\ud45c\ub97c": 15, "t\uc2dc\uc810\uc5d0\uc11c": 15, "target\uc774": 15, "image\uac04\uc758": 15, "lpips\ub97c": 15, "\ub0a8\uc740": [15, 21, 42], "process\uc744": 15, "\uad6c\uc131\uc694\uc18c\ub97c": [15, 32], "\uc9c0\ud45c\ub77c\uace0": [15, 39], "t\uc758": 15, "lpips\ub85c": 15, "\uc2dc\uc810\uc5d0\uc11c\uc758": 15, "\ucc28\uc774\ub294": [15, 26, 34, 49], "\uc5bc\ub9cc\ud07c\uc758": 15, "change\ub97c": 15, "\uc8fc\uc5c8\ub294\uc9c0\ub97c": 15, "xi_t": 15, "interval\uc774": 15, "\uc791\uc73c\uba74": [15, 26, 30, 36, 56], "\uc791\uc544\uc9c0\uba70": 15, "\uc77c\uc5b4\ub098\uc9c0": 15, "\ud06c\uba74": [15, 19, 27], "\ucee4\uc9c0\uace0": 15, "\uc77c\uc5b4\ub09c\ub2e4": 15, "\ucda9\ubd84\ud55c": [15, 39], "\ud55c\uc5d0\uc11c": 15, "\ucd5c\uc18c\uc758": 15, "\uacb0\uc815\ud558\ub294": [15, 24], "\ucd5c\uace0\uc758": [15, 42, 46], "\ubc29\ubc95\uc774\ub2e4": [15, 26, 27, 46], "\uc2e4\ud5d8\uc801\uc778": 15, "33\uc778": 15, "t\uc2dc\uc810\uc744": 15, "\uacb0\uc815\ud558\uc600\ub2e4": 15, "variou": [15, 38, 46, 54, 55, 57], "proper": 15, "\ud2b9\uc131\ub4e4\uc740": 15, "\ud2b9\uc131\ub4e4\uc5d0": 15, "\uacbd\uc6b0\ub3c4": [15, 30, 36, 39, 44], "smile\ud55c": 15, "attribute\ub97c": 15, "\uacbd\uc6b0\ubcf4\ub2e4": 15, "pixar": [15, 22], "style\uc758": [15, 36], "attribute\uc744": 15, "\uae38\uac8c": 15, "\uc124\uc815\ud574\uc57c": 15, "33": 15, "t\ub97c": [15, 33], "33d": 15, "y_": [15, 28, 46], "\uc758\ubbf8\ud558\uba70": [15, 25], "\uc694\uad6c\ud558\ub294": [15, 26], "attributes\uc5d0": 15, "\uc791\uc74c": 15, "\uae40": 15, "flexibl": [15, 24, 55], "amount": 15, "chang": [15, 46], "\uc124\uc815\ud558\uba70": 15, "stochasticity\ub97c": 15, "\uc81c\uac70\ud558\uc5ec": [15, 33, 36, 51], "\uc644\ubcbd\ud55c": 15, "\uac00\ub2a5\ucf00": [15, 24], "elucid": 15, "diffusionbas": 15, "karra": [15, 33, 43], "stochasticity\uac00": 15, "\uc99d\uac00\uc2dc\ud0a8\ub2e4\uace0": 15, "\uc99d\uba85\ud558\uc600\ub2e4": [15, 19], "interval\uc740": 15, "interval\uc5d0": 15, "control\ud560": 15, "\uae38\uac8c\ub418\uba74": 15, "quality\ub294": [15, 40], "\uc99d\uac00\ud558\uc9c0\ub9cc": 15, "interval\ub3d9\uc548": 15, "\uacc4\uc18d\ud574\uc11c": 15, "\uc8fc\uc785\ud574\uc57c": 15, "content\uac00": [15, 32], "\ub2ec\uc131\ud558\uba74\uc11c\ub3c4": 15, "content\uc5d0": [15, 37], "\ubcc0\ud654\ub9cc\uc744": 15, "\uc124\uc815\ud558\ub294": [15, 54, 56], "\ud574\uacb0\ud574\uc57c": 15, "\ubd80\ubd84\uc73c\ub85c": [15, 36, 56], "\ubcf4\uc558\uc73c\uba70": 15, "image\ub85c": [15, 21, 24, 27], "\uaef4\uc788\ub294\uc9c0\uc5d0": 15, "\uc9c0\ud45c\ub85c": [15, 34, 36, 39], "defici": 15, "gamma_t": 15, "\uc5ec\uae30\uc11c\ub294": [15, 39, 40], "strength\uc640\ub294": 15, "\ud310\ub2e8\ud558\ub294\ub370\uc5d0": 15, "semantics\ubcf4\ub2e4\ub294": 15, "actual": [15, 28], "\uace0\ub824\ud588\uae30\uc5d0": 15, "\uc124\uc815\ud558\uc600\ub2e4\uace0": [15, 52], "2\uc778": 15, "gamma_": 15, "presenc": 15, "model\uc5d0\uc11c\uc758": 15, "where": [15, 24, 32, 36, 46, 54, 55], "\ub418\uba70": [15, 29, 41, 47, 50], "\uc0ac\ub77c\uc838": 15, "\ud2b9\uc131\uc774": 15, "assymetr": 15, "ddim\uc744": [15, 25, 51], "control\ub41c": 15, "f_t": 15, "\ucc98\uc74c\ubd80\ud130": [15, 19, 29], "\uc2dc\uc810\uae4c\uc9c0\ub294": 15, "\uc9c4\ud589\ud558\ub2e4\uac00": 15, "\uc2dc\uc810\ubd80\ud130": 15, "\ub05d\ub0a0": 15, "celeba": [15, 22, 27], "2018": 15, "lsun": [15, 23, 27, 43, 51, 54], "bedroom": [15, 27, 43, 54], "church": [15, 27, 54], "2015": 15, "\ub370\uc774\ud130\uc14b\uc5d0\uc11c": [15, 22, 33, 40, 42, 49, 53, 54], "2020b": 15, "meng": [15, 43, 51], "afhq": 15, "dog": [15, 17, 26, 30, 47], "iddpm": 15, "dhariw": [15, 33, 54], "metfac": 15, "p2": 15, "\ud559\uc2b5\uc2dc\ucf30\ub2e4\uace0": [15, 21, 53], "model\ub4e4\uc740": [15, 28, 51], "checkpoint\ub97c": [15, 28, 37], "frozen\uc0c1\ud0dc\ub97c": 15, "\uc720\uc9c0\uc2dc\ucf30\ub2e4\uace0": 15, "attribute\ub4e4\uc758": 15, "\ubc18\uc601\ud574\uc11c": [15, 26], "manipulate\ud588\ub2e4\ub294": 15, "\uc2ec\uc9c0\uc5b4": 15, "depart": 15, "factori": 15, "templ": 15, "attribute\uc740": 15, "\ud3ec\ud568\uc774": 15, "\ub418\uc5b4\uc788\uc9c0": 15, "\uc54a\uc558\uc74c\uc5d0\ub3c4": [15, 42], "inference\ud558\ub294": 15, "control\ud558\uace0": 15, "\ub0c8\ub2e4\ub294": 15, "\uc7a5\uc810\uc774\ub2e4": 15, "model\ub4e4\uacfc": 15, "\uc9c4\ud589\ud558\uc600\ub294\ub370": 15, "tuning\ud558\uc5ec": [15, 24, 39], "image\uc744": 15, "editing\ud558\ub294": 15, "diffsionclip": 15, "asyrp\uc758": 15, "\uc88b\uc74c\uc744": 15, "seen": 15, "smile": 15, "\ucd94\uac00\ud558\uae30": [15, 53], "\ucd5c\uc801\ud654\ub41c": [15, 17, 33, 48, 52], "\uc801\uc6a9\uc2dc\ucf30\uc744": 15, "\ub098\ud0c0\ub0b4\uc5c8\ub294\ub370": 15, "\uc801\uc6a9\ud55c\uacbd\uc6b0": 15, "face\ub85c": 15, "\ubc14\ub00c\ub294": [15, 38, 43], "distortion\uc774": 15, "\ubc1c\uc0dd\ud568\uc744": 15, "delta_h": 15, "scaling\uc744": [15, 49], "\ud558\ub294\ub370\uc5d0": 15, "change\uc758": 15, "\uc591\uc5d0": 15, "\ubc18\uc601\ub41c\ub2e4": 15, "3\ubc30": [15, 35], "\ud568\uc5d0": 15, "\ubc18\uc601\ub418\ub294": [15, 44], "attribute\ub610\ud55c": 15, "\ubcc0\ud654\ud55c\ub2e4\ub294": 15, "\ud45c\ud604\ub418\uc5b4": 15, "scaling\uc5d0": 15, "\ub41c\ub2e4\ub294": 15, "\ud569\uccd0\uc11c": [15, 39], "\ubd80\uc5ec\ub97c": 15, "\uacbd\uc6b0\uc5d0\ub3c4": [15, 44], "attribute\ub4e4\uc774": 15, "\ubc18\uc601\uc774": [15, 17, 47], "\uc8fc\uc785\ud588\uc744": 15, "\ube44\uad50\ud55c": [15, 21, 22, 50, 54], "\ucd94\uac00\ub418\uc5c8\uc5b4\ub3c4": 15, "\uc5c6\uc73c\uba70": 15, "\ucd94\uac00\ub418\uc5c8\uc744": 15, "distortion\uc740": 15, "\uc5c6\uace0": [15, 18, 21], "change\ub9cc": 15, "\uc2ec\ud558\uac8c": 15, "robustness\ud55c\uc9c0": 15, "homogeneous\ud55c": 15, "\uc131\uc9c8\uc744": [15, 43], "attribute\uc5d0": 15, "\ub40c\uc744": 15, "\ud655\uc778\ud558\uc600\ub2e4": 15, "\ub4e4\uc5d0": 15, "\ud3c9\uade0\uc778": 15, "result\uac00": 15, "\ube44\uc2b7\ud568\uc744": 15, "chapter4\uc5d0\uc11c": 15, "\ube44\ucd94\uc5b4": 15, "\ubcf4\uc558\uc744": [15, 38, 44], "process\uc5d0\uc11c\ub9cc": 15, "\uc801\uc6a9\uc744": 15, "global": [15, 20, 24], "\uce6d\ud558\uba70": 15, "\uc801\uc6a9\ub41c\ub2e4\uace0": 15, "\uac00\uc815\ud588\uc744": 15, "t_e": 15, "sum_t": 15, "\uc591\ub9cc": 15, "\uac19\ub2e4\uba74": 15, "\ube44\ub85d": 15, "\uc0ac\uc6a9\ud558\uc600\uc9c0\ub9cc": [15, 25], "\uc5f0\uad6c\ub97c": [15, 22, 25, 29, 30], "\ud574": [15, 20, 41], "\uc5ec\uc9c0\uac00": [15, 51], "\ud310\ub2e8\ud55c\ub2e4": 15, "models\uc5d0\uc11c": 15, "space\uc778": 15, "\ubc1c\uacac\ud588\uace0": 15, "\uc131\uacf5\uc801\uc778": [15, 17], "semantic\ud55c": [15, 26], "\uc81c\uc548\uc744": 15, "\ub17c\ubb38\uc774\ub2e4": [15, 26], "\ub300\ud45c\uc801\uc778": [15, 18, 26, 48], "\ud2b9\uc131\uc73c\ub85c\ub294": 15, "timesteps\uc774": 15, "framework\uc778": 16, "identity\uc640": [16, 19, 37], "postur": 16, "sequence\uac00": [16, 36], "moving\uc774\ub098": 16, "controlling\uc744": 16, "preserving\uc744": 16, "t2v\uc758": 16, "\uc9c4\uc804\uc5d0\ub3c4": 16, "\uc778\uac04": [16, 21], "\uc911\uc2ec": [16, 17, 18], "\uacaa\ub294": [16, 19, 36], "\ubd80\uc871": [16, 18], "\ubb18\uc0ac\uc758": 16, "\uc5b4\ub824\uc6c0\uc73c\ub85c": 16, "\uacaa\ub294\ub2e4": 16, "\uc81c\uc5b4\ub97c": [16, 40], "lora": [16, 22, 53], "\uae30\uc220\ub4e4\uc740": [16, 22], "\uc81c\uc5b4\uac00": [16, 17], "\ubd80\ub2f4": 16, "\ubc29\ubc95\ub860\uc778": 16, "network\ub85c": 16, "animatediff\uc5d0\uc11c": 16, "1000\uc758": 16, "\uc601\uc0c1\uc73c\ub85c": [16, 51], "\ud6c8\ub828\uc740": 16, "\ubcc0\uc774\ub098": 16, "\ud2b9\ubcc4\ud55c": [16, 29, 33], "\uc5f0\uc18d\uc801": [16, 32], "frame\uc774": 16, "\ud544\uc694\ud558\uae30": 16, "video\ub85c": 16, "split\ud558\uc5ec": 16, "6000\uac1c\uc758": 16, "\uc9e7\uc740": [16, 27, 33, 39], "\ud68d\ub4dd\ud55c\ub2e4": 16, "description\uc744": [16, 40], "minigpt": 16, "v2": [16, 25, 50, 53], "captioner\ub85c": 16, "describ": 16, "manner": [16, 30], "\uba85\ub839\uc73c\ub85c": 16, "\ud68d\ub4dd": [16, 18, 42], "subject\uc640": 16, "background": [16, 26, 30], "\ub0b4\uc6a9\uc5d0": 16, "\uc815\ud655\ud788": [16, 18, 21, 44], "\ubb18\uc0ac": 16, "consistency\uc640": 16, "\ud5a5\uc0c1\uc744": [16, 20, 31, 44, 46], "net\uacfc": [16, 29, 36], "controlnet\ub97c": 16, "block\uc740": 16, "animatediff\ub85c": 16, "\ud655\uc7a5": [16, 29, 45], "length\ub294": 16, "64\ub85c": 16, "mm_sd_v15": 16, "ckpt": 16, "\uac1c\uc778": 16, "appearance\uc640": 16, "\ubc30\uacbd\uc744": [16, 26], "\uace0\uc548\ub428": 16, "prompt\uc774\uc9c0\ub9cc": 16, "\ubb18\uc0ac\uac00": 16, "prompt\ub97c": [16, 21, 24, 26, 37, 40, 51], "\uc678\uad00\uc5d0": 16, "\ubc30\uacbd\uc5d0": 16, "\uc5bc\uad74": [16, 22], "feature\ub294": [16, 24, 38], "embedding\uc5d0": [16, 24], "concat\ub41c": 16, "\ubcf4\ub0c4": 16, "attentino": 16, "cloth": 16, "c_t": [16, 24], "c_f": [16, 36], "c_c": [16, 36], "prime": [16, 17, 32], "openclip": [16, 20, 24, 35, 42], "h14": 16, "arcface\ub97c": 16, "\uc0c1\uad00": 16, "2b\uc5d0\uc11c": 16, "\uc218\uc9d1": [16, 17, 40, 50], "v100": [16, 24, 32, 55], "100k": [16, 30], "1\uc7a5": [16, 42], "valid": [16, 18, 28, 39, 42, 48], "16\uc5d0\uc11c": [16, 35], "\ud655\uc7a5\ud558\uae30": [16, 29, 33], "18\ucd08": [16, 42], "13000": 16, "module\ub9cc": 16, "\ud6c8\ub828\ud558\uace0": 16, "controlnet\uc774\ub098": 16, "10k": [16, 29, 54], "\uc885\ub8cc": 16, "block\uacfc": 16, "unfreez": 16, "\uc218\uc9d1\ud55c": 16, "6k": 16, "dwpose\ub098": 16, "zoedepth\ub97c": 16, "352x352": 16, "25k": [16, 54], "express": 16, "\ub0ab\uac8c\ud558\uae30": 16, "\uad6c\uc870\uc5d0\uc11c": [16, 39], "5e": [16, 19], "20k": 16, "depth\uc5d0\uc11c\ub9cc": 16, "\ub3d9\uc2dc": 16, "1\uc758": [16, 25], "alpha_f": 16, "styliz": [16, 17], "driven": [17, 21, 47], "2303": [17, 20, 37, 42, 43], "13508": 17, "jeongin": [17, 32, 57], "\ud53c\uc0ac\uccb4\uc758": [17, 19], "6\uac1c\uc758": 17, "\uce90\uc8fc\uc5bc\ud55c": 17, "\ub9de\ucda4\ud654": 17, "\uacb0\ud569": [17, 24, 45], "\ubc29\ubc95\ub860\uc744": [17, 25, 46], "\ub098\uc774\ube0c\ud558\uac8c": 17, "\uacb0\ud569\uc2dc": 17, "subject\uc758": 17, "viewpoint": [17, 32], "\uc624\ubc84\ud53c\ud305\ud558\ub294": 17, "\ubabb\ud55c": [17, 54], "\uae30\ub2a5\uacfc": 17, "nerf\uc758": [17, 36], "\uacf5\ub3d9\uc73c\ub85c": 17, "3\ub2e8\uacc4": 17, "\uc804\ub7b5": [17, 19, 33], "\uadf9\ubcf5": [17, 31, 32], "\uc774\ubbf8\uc9c0\uc5d0\uc11c": [17, 19, 27, 32, 33, 34, 39], "\ud3ec\uc988": [17, 19, 22, 30, 32], "\uc911\uc2ec\uc758": 17, "asset\uc0dd\uc131\uc740": 17, "vr": 17, "\uc601\ud654": 17, "\uac8c\uc784": 17, "\uc751\uc6a9": [17, 33], "\uac00\ub2a5\ud558\ub098": 17, "\ud504\ub86c\ud504\ud2b8\ub9cc\uc73c\ub85c": 17, "\uc815\uccb4\uc131": 17, "\uae30\ud558\ud559\uc801": [17, 18, 32], "\uc678\uad00\uc744": 17, "\ub2a5\ub825\uc5d0": 17, "\uac1c\ubc1c": 17, "\ud0dc\uc2a4\ud06c\uc5d0\uc11c": [17, 19, 29, 39], "\ubcf4\uc778": [17, 39, 45], "\ub9ce\uc9c0\ub9cc": 17, "\uc0dd\uc131\uc774\ub098": 17, "\uc81c\uacf5\ud558\uc9c0\ub294": 17, "\uc18c\uc218\uc758": [17, 33, 42], "6\uac1c": 17, "\uce90\uc8fc\uc5bc\ud558\uac8c": 17, "\ucd2c\uc601\ub41c": [17, 29], "\ucd5c\uc801\ud654\ud558\uc5ec": [17, 32], "\uc790\uc0b0\uc744": 17, "\uc0dd\uc131\ud558\uc790": 17, "\ubb38\uc81c\uc810": [17, 19, 26, 41], "subject\uc5d0": [17, 19], "\uc2e4\ud328": [17, 19, 32], "\uc0ac\ub840\uac00": 17, "\uc8fc\uc81c": [17, 22], "\ubdf0\ud3ec\uc778\ud2b8\uc5d0": 17, "\uacfc\uc801\ud569": 17, "\ubdf0\ud3ec\uc778\ud2b8\uc5d0\uc11c": 17, "\ucda9\ubd84\ud558\uc9c0": [17, 19, 24], "\ud574\uacb0\ucc45": 17, "\ubd80\ubd84\uc801\uc73c\ub85c": 17, "\ubbf8\uc138": [17, 32], "\uc870\uc815": [17, 18], "\uc870\uc815\ub41c": 17, "\ubdf0\uc5d0": [17, 32], "\ub418\uc9c0": [17, 39, 41, 42, 44, 56], "subject\ubcc4": 17, "\ucea1\ucc98\ud558\uc9c0": [17, 33, 40], "\uc790\uc0b0\uc740": 17, "\uc77c\uad00\uc131\uc774": [17, 19], "\ubc18\uc601\ud558\uc9c0\ubabb\ud568": 17, "\uc870\uc815\ud558\uc5ec": [17, 22], "\uc0ac\ud56d\uc744": [17, 22, 29, 33], "\ucea1\ucc98": [17, 32], "1\ub2e8\uacc4\uc5d0\uc11c": 17, "\ud22c\uc785": [17, 32], "\uac00\uc0c1": [17, 32], "1\ub2e8\uacc4\uc758": 17, "pseudo": [17, 25, 43], "\ucd5c\uc801\ud654\ud55c": [17, 52], "\ubcfc\ub968\uc744": 17, "\ucd5c\uc801\ud654\uc2dc": 17, "\uaddc\uc81c\ud56d\uc73c\ub85c": 17, "\uc138\ud2b8\uc5d0": 17, "weak": [17, 44], "3\ub2e8\uacc4\uc5d0": 17, "\uac78\uce5c": 17, "\ud569\ub3d9": [17, 24], "\ucd5c\uc801\ud654\ub294": [17, 19, 27], "\uacfc\uc801\ud569\ub418\ub294": 17, "\uc815\uccb4\uc131\uc5d0": 17, "\ucda9\uc2e4\ud558\ub3c4\ub85d": 17, "\ubcf4\uc7a5": 17, "\ucee8\ud14d\uc2a4\ud2b8\ub97c": 17, "\uc874\uc911\ud558\uba74\uc11c": 17, "\uc720\uc0ac\uc131\uc774": 17, "\ud604\uc2e4\uc801\uc778": [17, 49], "\uc785\uc99d": [17, 32], "\ubca0\uc774\uc2a4\ub77c\uc778\uacfc": 17, "\ube44\uad50\ud560": [17, 31, 44], "\ud3ec\ucc29\ud55c\ub2e4\ub294": 17, "\ubc18\uc601": [17, 19, 20, 32], "\uc81c\uacf5\ud55c": [17, 24], "\uc790\uc5f0\uc5b4": [17, 40], "llm": [17, 28, 31, 49], "\uc131\uacf5\uc5d0": 17, "\ud798\uc785\uc5b4": 17, "\uc791\ud488\ub4e4\uc774": 17, "\uc870\uc791\uacfc": 17, "\uc791\uc5c5\uc5d0": [17, 19, 22], "\ub300\uaddc\ubaa8": [17, 18, 21, 25, 33, 40, 41], "\ud504\ub86c\ud504\ud2b8\uc5d0\uc11c": 17, "suject": 17, "\uc81c\uacf5\ud558\uae30": 17, "recontextu": [17, 47], "sleep": [17, 56], "jump": [17, 36], "\uc870\uba85": [17, 18, 30, 33], "\ub4f1\uc73c\ub85c": [17, 22, 27], "\ucd2c\uc601\ud560": 17, "\ud544\uc694\uac00": [17, 34, 44, 47], "\uc0dd\uc131\uc758": [17, 39], "\ubc1c\uc804\uc744": [17, 22, 26, 33], "\uc0ac\uc6a9\uc790\ub294": [17, 29, 38, 40], "\ud76c\uadc0": 17, "\uaddc\uc81c\ub97c": 17, "\ube44\uc804": 17, "\uc0ac\uc804\uc744": 17, "\ub098\ud0c0\ub0b4\ub294": [17, 29, 32, 40, 44, 52], "word": [17, 20, 24], "\ucd5c\uc801\ud654\ud568\uc73c\ub85c\uc368": 17, "2208": [17, 40, 47], "01618": [17, 40], "\ubc29\ubc95\ub860\ub4e4\uc740": 17, "\uc81c\uacf5\ud558\uc9c0": [17, 22], "problem": [17, 44], "i_i": 17, "ldot": 17, "\uc774\ubbf8\uc9c0\ub4e4\uc758": [17, 32], "\ub9e5\ub77d": 17, "\uc758\ubbf8": [17, 18, 19, 22, 26, 33], "stand": [17, 50], "radianc": 17, "\ud544\ub4dc\ub97c": 17, "\uc778\ucf54\ub529\ud558\ub294": [17, 36], "mlp": [17, 18, 28, 31, 32, 36, 52, 56], "field": [17, 18, 20, 30, 44, 50, 52], "\ucea1\ucc98\uac00": 17, "\uc124\uc815\uc5d0": [17, 40], "\uc81c\ud55c\uc801\uc774\uace0": 17, "\uae30\uc220\uc744": [17, 22, 32, 33, 35, 49], "3d\ub85c": 17, "stablediffus": 17, "\ubc18\uc601\ud558\uc5ec": 17, "\uc77c\uce58\ud558\uc9c0\ub9cc": 17, "\uc774\ubbf8\uc9c0\ub0b4\uc5d0\uc11c": 17, "\uc138\ubd80\uc801\uc778": [17, 19, 22, 38, 44], "\ud574\uacb0": [17, 45], "\ud53c\uc0ac\uccb4\uc5d0": [17, 19], "5\uc7a5": [17, 40], "\uc8fc\uc5b4\uc9c0\ub294": [17, 38], "\ud30c\uc778\ud29c\ub2dd\ud558\uc5ec": [17, 19, 29, 39], "casual": 17, "captur": [17, 22, 31], "\ud30c\uc778\ud29c\ub2dd\uc744": [17, 19, 39], "_d": 17, "proce": 17, "\ubc29\uc9c0\ud558\uc5ec": 17, "\uac1c\uc120\ud558\uace0": 17, "drift": [17, 43, 47], "\ubcfc\ub968\uc758": 17, "\ub79c\ub364\ubdf0\uac00": 17, "\uc0c1\uc751\ud558\ub3c4\ub85d": 17, "phi": [17, 18, 20, 25, 28, 32, 41, 43, 45, 46, 51, 52, 55, 56], "\ud45c\ud604\ub41c": 17, "\ubc00\ub3c4\uc758": [17, 32], "\uadf8\ub798\ub514\uc5b8\ud2b8\ub85c\ubd80\ud130": 17, "\uacc4\uc0b0\ub41c": [17, 33, 56], "nomals\uc740": 17, "lambertian": [17, 32], "shade": [17, 18, 52], "\uc0ac\uc2e4\uc131\uc744": 17, "\uac1c\uc120\uc2dc\ud0a4\uae30": 17, "relight": 17, "\ud558\ub294\ub370": [17, 20, 31, 33, 36, 42, 52], "\uc0ac\uc6a9\ub428": [17, 27, 40], "camera": [17, 18, 36], "locat": [17, 18, 32, 36], "albedo": [17, 18, 52], "densiti": [17, 18, 27, 32, 36, 48, 52], "\uc8fc\uc5b4\uc9c0\uba74": [17, 19, 29], "\uc74c\uc601": [17, 18], "\ucc98\ub9ac\ub41c": [17, 18, 29], "\ubcfc\ub968": [17, 32], "\ub80c\ub354\ub9c1\ud55c": [17, 36], "\ubcf4\uc774\ub3c4\ub85d": 17, "\ub9e4\uac1c\ubcc0\uc218\ub97c": 17, "\ucd5c\uc801\ud654\ud558\uae30": 17, "nabla_": [17, 18], "_v": [17, 24, 27], "\ub80c\ub354\ub9c1\ub41c": [17, 18, 32, 33], "\ubc84\uc804\ub4e4\uc744": 17, "\uc5d0\ub108\uc9c0": 17, "push": 17, "views\ub97c": 17, "\uc120\ud0dd\ud558\uace0": 17, "\uc5ed\uc804\ud30c": 17, "render": [17, 50, 52], "\uacb0\uacfc\ub4e4\uc774": 17, "\uc774\ubbf8\uc9c0\ucc98\ub7fc": [17, 37], "\ud658\uacbd\uc744": 17, "\ud53c\uc0ac\uccb4": 17, "peson": 17, "\ub9de\ucda4\ud654\ub41c": 17, "BUT": 17, "\uacb0\ud569\uc740": 17, "\ubd88\ub9cc\uc871\uc2a4\ub7ec\uc6b4": 17, "\ucd08\ub798": 17, "\uc0dd\uc131\uc5d0\uc11c": 17, "\ub2e4\uc591\uc131\uc774": [17, 24, 39, 41, 51], "\uacbd\ud5a5\uc744": [17, 23, 37], "exemplar": 17, "views\uc5d0": 17, "\uc720\uc0ac\ud558\ub3c4\ub85d": [17, 56], "\uc800\ud558\ub428": 17, "\uc190\uc2e4\uc740": [17, 53], "\uc5bb\uae30\uc5d0": 17, "\ubd88\ucda9\ubd84": 17, "\ub300\uc0c1\uc5d0": [17, 47], "\uac01\ub3c4\uc5d0\uc11c": [17, 18, 32, 33, 47], "janu": 17, "\uc0c1\ubc18\ub418\uac70\ub098": 17, "\uc5f0\uad00\ub41c": 17, "\uce21\uba74\uc744": 17, "\ub2e4\ub8e8\uc5b4\uc57c": 17, "dreambooth\ub97c": 17, "\ud6c8\ub828\uc2dc\ud0a4\uace0": 17, "nerf\ub97c": [17, 18, 32, 36], "\uac00\uc6b4\ub370": [17, 29], "nerf\uc5d0\uc11c": [17, 18], "\uc2dc\uc810": [17, 32, 43], "\uc624\ub978\ucabd": [17, 19, 22, 29, 35, 39], "\ubd80\ubd84\uc801\uc778": 17, "\uc870\uc815\ud55c": 17, "\uc190\uc2e4\uacfc": [17, 22], "\uc7ac\uad6c\uc131": [17, 33, 40], "\ud574\uacb0\ud558\uace0": [17, 26], "\ub9de\ucda4": [17, 31, 40], "booth3d": 17, "dreambootht2i": 17, "\uccb4\ud06c\ud3ec\uc778\ud2b8\uac00": 17, "\ud30c\uc778\ud29c\ub2dd\ud55c": [17, 19], "view\uc5d0": 17, "\uacfc\uc801\ud569\ub418\uc9c0": 17, "\ud558\uc5d0": 17, "dreamfusion\uc740": [17, 18], "\uc0dd\uc131\uac00\ub2a5": 17, "\uacb0\uacfc\ubb3c\uc740": 17, "\uc720\uc0ac\ud558\uc9c0": [17, 22], "\ubd80\ubd84\uc801\uc73c\ub85c\u3141": 17, "\uc720\uc0ac": 17, "\ud558\uba74\uc11c": [17, 50], "\ucda9\uc2e4\ud55c": [17, 24], "\uc811\uadfc\ubc95\uc758": 17, "\ubd80\ubd84": [17, 19, 23, 32, 35, 45], "\uac16\ucd98": 17, "fulli": [17, 23, 25, 29, 32, 44], "\ub80c\ub354\ub9c1\ud558\uc5ec": 17, "\ub80c\ub354\ub9c1\uc5d0": [17, 32, 33], "\uace0\uc815\ub41c": [17, 18, 29, 32, 35, 36, 40, 43, 48, 51], "\uc804\ud658": 17, "\uc2e4\ud589\ud558\uc5ec": 17, "\uc9c0\uc815\ud568\uc73c\ub85c\uc368": 17, "\ubc94\uc704\uc758": 17, "\ucee4\ubc84\ud558\uba74\uc11c": 17, "\ubcc0\ud615\uc5d0": [17, 19], "collect": [17, 44], "insight": 17, "\uac00\uae4c\uc6b8": 17, "dreambooth\uac00": 17, "\uccb4\ud06c\ud560": 17, "img2img": [17, 53], "\ubcc0\ud658\uc758": 17, "\ub80c\ub354\ub9c1\uc758": [17, 32], "\uc720\uc9c0\ud558\uba74\uc11c\ub3c4": [17, 51], "\uc5f0\uad6c\uc758": [17, 24, 28, 33], "\uc751\uc6a9\uc73c\ub85c\ub9cc": 17, "sds\uc640": 17, "i_v": 17, "\uacb0\ud569\uc744": [17, 25, 26], "cup": 17, "\uc900\ube44": 17, "\uc77c\ubc18\ud654\uc640": 17, "\ubcf4\uc874": 17, "\uc6b0\uc218\ud558\uae30": 17, "idendtity\uac00": 17, "\ud5a5\uc0c1\ub41c": [17, 18, 23, 35, 40], "\uc190\uc2e4\ub9cc": 17, "\uc0ac\uc6a9\uc2dc": 17, "\ubcf4\uc720": 17, "satur": [17, 49], "\ub2e4\uc218": [17, 29, 46], "\uc0c9\uc0c1\uc758": 17, "\uacfc\ub3c4\ud55c": 17, "\ud3ec\ud654": 17, "\ube44\ud604\uc2e4\uc801\uc774\uac70\ub098": 17, "\uc65c\uace1\ub41c": 17, "\ud45c\ud604\uc774": [17, 26, 32], "\ub098\ud0c0\ub098\ub294": 17, "\uacb0\ud568": 17, "\uc0c9\uc0c1\uc744": [17, 18, 27, 32, 33], "\uacfc\ub3c4\ud558\uac8c": 17, "\uac15\uc870\ud558\ub294": 17, "\uc798\ubabb": [17, 19, 33], "\uc608\uce21\ud558\uc5ec": [17, 32, 33], "\ube44\ud604\uc2e4\uc801\uc778": 17, "\ub9e4\uac1c\ubcc0\uc218": 17, "p_v": 17, "\uc788\uc73c\ubbc0\ub85c": [17, 40], "\ud6c8\ub828\uc744": [17, 24], "\uaddc\uc81c": 17, "_p": [17, 19], "\ub80c\ub354\ub9c1\ud558\ub294": [17, 32], "views\uc5d0\uc11c": 17, "\ud5a5\uc0c1": [17, 18, 19, 31, 32, 37, 39, 45], "nerf360": 17, "\uc815\uaddc\ud654": [17, 32, 33], "t5": [17, 18, 24, 31, 42, 47], "xxl": [17, 18, 31, 42, 47], "4core": 17, "tpuv4": [17, 18, 52], "\ud504\ub86c\ud504\ud2b8\ub2f9": 17, "\uc644\ub8cc\ud558\ub294": 17, "3\uc2dc\uac04": 17, "\uc18c\uc694": [17, 32], "d_\u03b8": 17, "150\ubc88\uc758": 17, "\ubc18\ubcf5\ud6c8\ub828": 17, "800\ubc88": 17, "\ucd5c\uc801\uc758": [17, 25, 39, 51], "\uc6d0\uc810\uc5d0\uc11c": 17, "\ubc18\uacbd\uc73c\ub85c": 17, "\uade0\uc77c\ud558\uac8c": [17, 19, 32, 44], "20\uac1c\uc758": [17, 33, 36], "3\ub2e8\uacc4\uc5d0\uc11c": 17, "150\ubc88": 17, "\ubc18\ubcf5\ud558\uc5ec": [17, 54], "hyperparam": 17, "\uceec\ub809\uc158\uc744": 17, "\uac1c": [17, 39, 41, 42, 44], "\uc7a5\ub09c\uac10": 17, "\ubc30\ub0ad": 17, "\uc120\uae00\ub77c\uc2a4": 17, "\ub9cc\ud654": [17, 47], "30\uac1c\uc758": 17, "\uceec\ub809\uc158\uc73c\ub85c": 17, "\uc62c\ube7c\ubbf8": 17, "\uc7a5\uc2dd\ud488": 17, "\ud76c\uadc0\ud55c": 17, "\ubd84\uc11d\ud558\uae30": 17, "contextu": 17, "\ubb38\ub9e5\ud654": 17, "\uc2dc\uc5f0": 17, "rgb": [17, 18, 29, 32, 33, 36, 44, 45], "\uacf5\uac04\uc774": 17, "\uc73c\ub85c\uc368": 17, "\uc2e4\ud589": 17, "\ud655\uc0b0": [17, 22, 33], "dreamfusion\uc744": 17, "precis": [17, 18, 27, 33, 43], "rendering\ub41c": [17, 18, 36], "\uc7a5\uba74\ub4e4\uc774": [17, 18], "\uc77c\uce58\ud558\ub294\uc9c0": [17, 29], "\ube44\uc728\uc744": [17, 18, 33, 35], "\ub4a4\uc5d0": 17, "\uc5b8\uae09": [17, 36], "\uae30\uc900": [17, 23, 45, 46, 52], "\uc624\ub9ac": 17, "\uc801\uc808\ud788": 17, "\uc791\ub3d9\ud558\uc9c0\ub9cc": 17, "\uc678\ud615": 17, "360\ub3c4": [17, 32, 33], "asset\uc744": [17, 18, 36], "\uc0dd\uc131\ud558\uba70": [17, 22, 26, 39], "\uc678\uad00\uc758": 17, "\ubc18\uc601\ud568": 17, "1\ub2e8\uacc4\uc640": 17, "nerf\uc640": [17, 18, 33, 36], "\ubd80\ubd84\uc801\uc73c\ub85c\ub9cc": 17, "\uc720\uc0ac\ud558": 17, "dreambooth3d\uc758": 17, "\ud544\uc694\ud568\uc744": 17, "dreambooth3d\uc640": 17, "\uc138\uac00\uc9c0\uce21\uba74\uc5d0": 17, "\uc9c8\ubb38\uc5d0": [17, 39], "\ub2f5\ubcc0\uc73c\ub85c": 17, "\ucda9\uc2e4\ub3c4": [17, 19, 49], "\ubcf4\uc785\ub2c8\uae4c": 17, "\uc77c\uad00\uc131\uacfc": 17, "\ud0c0\ub2f9\uc131": 17, "\ud0c0\ub2f9\ud558\uace0": 17, "\uc788\uc2b5\ub2c8\uae4c": 17, "\ube44\ub514\uc624\uac00": [17, 29], "\uc81c\uacf5\ub41c": [17, 19], "\ubc18\uc601\ud569\ub2c8\uae4c": 17, "30\uac1c": 17, "\ud68c\uc804": 17, "11\uba85\uc758": 17, "\uc751\ub2f5": 17, "54\uac1c\uc758": 17, "\uace0\uc720\ud55c": [17, 19, 40], "21\uba85\uc758": 17, "\ub2e4\uc218\uacb0": 17, "\ud22c\ud45c\ub97c": 17, "\uc0b0\ucd9c": [17, 32], "dreambooth3d\ub294": 17, "\ucda9\uc2e4\ub3c4\uc5d0\uc11c": 17, "\ubaa8\ub378\ub4e4\ubcf4\ub2e4": [17, 37, 55], "\uc720\uc758\ubbf8\ud558\uac8c": [17, 44], "\uc120\ud638\ub428": 17, "\uc7ac\ubb38\ub9e5\ud654": 17, "\uc8fc\uc81c\uc758": [17, 22], "\uc7ac\ubb38\ub9e5\ud654\ud55c": 17, "\ubb38\ub9e5\uc744": [17, 22, 27], "\ucd9c\ub825\ub41c": 17, "\uc790\uc138\uc640": 17, "\ub85c\uceec": 17, "\ubcc0\ud615\uc740": 17, "\ud3ec\uc988\uc784\uc5d0\ub3c4": 17, "\uc0ac\uc2e4\uc801": 17, "\ud3b8\uc9d1": 17, "\uc7ac\uc9c8": 17, "accessor": 17, "\uc561\uc138\uc11c\ub9ac": 17, "\ud06c\ub9bc\uc0c9": 17, "\uc2e0\ubc1c\uc744": 17, "\uc0c9\uc0c1\uacfc": [17, 32], "\ud504\ub9b4": 17, "\ucd94\uac00\ub97c": 17, "\uc2a4\ud0c0\uc77c\ud654": 17, "\ube44\uc0ac\uc2e4\uc801": 17, "\ud53c\uc0c1\uccb4": 17, "\ud3c9\uba74": 17, "\uadf8\ub7f4\ub4ef\ud55c": [17, 40], "\ud615\ud0dc\ub85c": [17, 24, 26, 31, 32, 35, 36, 43, 46, 52, 55, 56], "\uc815\uba74\uc784\uc5d0\ub3c4": 17, "\ub54c\ub54c\ub85c": [17, 26, 33, 44, 51], "\ud3ec\ud654\ub418\uace0": 17, "\ub9e4\ub044\ub7fd\uac8c": 17, "\ucc98\ub9ac\ub418\ub294": 17, "\uac00\uc774\ub358\uc2a4\ub97c": 17, "\ud53d\uc140\uc774\ub77c\ub294": 17, "\uc81c\ud55c\ub418\uc5b4": 17, "\ud6a8\uc728\uc131": [17, 22, 31], "\ud5a5\uc0c1\uc740": 17, "\ud655\uc7a5\ud560": 17, "\ud45c\ud604\uc740": 17, "\uc5c6\uc73c\uba74": 17, "\ubd88\uc77c\uce58\ud55c": 17, "\uc815\uba74\uc73c\ub85c": 17, "\ubd88\uc77c\uce58": 17, "\uc120\uae00\ub77c\uc2a4\uc640": 17, "\uc587\uc740": 17, "\uc7ac\uad6c\uc131\ud558\ub294": [17, 33], "\ubc29\ubc95\uc778": [17, 19, 26, 27, 29, 30], "dreambooth3d\ub97c": 17, "\uc18c\uaddc\ubaa8": 17, "\uc14b\ud2b8\uac00": 17, "\ud3ec\uc988\uc640": [17, 19, 40], "\ucee8\ud14d\uc2a4\ud2b8": [17, 33, 40], "\uc790\uace0": 17, "\uc810\ud504\ud558\ub294": 17, "\uc900\uc218\ud558\ub294": 17, "\uac00\uc9c0\uba74\uc11c\ub3c4": 17, "\ud3c9\uac00\uc5d0\uc11c": [17, 33, 45], "\ubcf4\uc784\uc744": [17, 21, 26, 36], "2209": [18, 29], "14988": 18, "dreamfusion3d": 18, "\uaddc\ubaa8\uc758": [18, 26, 29, 35, 39, 40], "\uc704\ud574\uc11c\uc740": 18, "label\ub41c": 18, "\ucda9\uc871\uc2dc\ud0ac": 18, "parameter": [18, 43], "prior\ub85c": [18, 26, 29], "\uac01\ub3c4\uc640": 18, "\uc870\uba85\uc5d0\uc11c": 18, "\ud658\uacbd\uc5d0": 18, "\ub370\uc774\ud130\ub098": 18, "\uc5c6\uc774\ub3c4": [18, 25, 29, 36, 43, 49, 51], "\ub3d9\uc791\ud568": [18, 31], "\uc9c0\uc6d0\ud55c\ub2e4": 18, "\uac00\ub2a5\ud588\ub358": 18, "\uc801\uc6a9\ud558\ub824\ub294": 18, "\uc2dc\ub3c4\ub294": 18, "\uc131\uacf5\uc801\uc774\uc5c8\uc9c0\ub9cc": 18, "data\uac00": [18, 27, 36], "\ubd84\uc57c\uc5d0\uc11c\ub3c4": 18, "asset\uc774": 18, "\uc694\uad6c\ub418\uc9c0\ub9cc": 18, "\uc2dc\uac04\uacfc": [18, 26], "\uc694\ud558\ub294": [18, 43], "\uc791\uc5c5": [18, 50], "voxel": [18, 32, 33, 36, 52], "cloud\ub97c": [18, 36], "\ube44\ub86f\ud55c": [18, 43, 54, 55], "\ubc29\ubc95\uc774\ub098": [18, 51], "\ub9cc\ub4e4\ub824\ub294": 18, "\uc2dc\ub3c4": 18, "\ubc29\ubc95\ub4e4\uc740": [18, 19, 26, 30], "synthesis\uac00": 18, "\ud55c\ud3b8": 18, "rendering\uc778": 18, "\ud1b5\ud569\ud558\ub824\ub294": 18, "\uc2dc\ub3c4\uac00": [18, 30], "\uc788\uc5c8\ub294\ub370": [18, 39], "\uadf8\uc911": 18, "dreamfield": 18, "dreamfield\ub294": 18, "optimization\uae30\ubc18": 18, "\ubd80\uc871\ud55c": [18, 23, 47], "\ud604\uc2e4\uc131\uacfc": 18, "\uc815\ud655\uc131\uc5d0": 18, "dream": 18, "fields\uc5d0\uc11c": 18, "model\ub85c\ubd80\ud130": 18, "distill\ub41c": 18, "\uad6c\uc131\ub418\ub294\ub370": [18, 37], "\ubbf8\ubd84\uac00\ub2a5\ud55c": [18, 33, 36], "parameterization\uc744": 18, "sds\ub97c": 18, "\uacb0\ud569\ud568\uc73c\ub85c\uc368": 18, "\uc8fc\uc5b4\uc9c4\ub2e4\uba74": 18, "dramfusion\uc740": 18, "\uace0\ud488\uc9c8\uc774\uba70": 18, "\uc77c\uad00\uc131\uc788\ub294": 18, "object\uc640": [18, 26], "scene\ub4e4\uc744": 18, "perturb": 18, "datapoint": 18, "dist": [18, 53], "approx": [18, 43, 44, 46, 48, 51], "noise\ub294": [18, 23], "density\uc5d0": 18, "\uc5f0\uad00": 18, "elbo\ub85c": 18, "match": [18, 20, 30, 43], "objective\ub85c": 18, "\uac04\uc18c\ud654": 18, "\uad00\uc810": 18, "function\uc774": [18, 26, 34, 36], "\uadfc\uc0ac\ud558\ub294": [18, 43], "\uc608\uce21\ud574\uc57c\ud558\ub294": 18, "cfg": [18, 25, 30, 31, 51], "density\uac00": 18, "\uc601\uc5ed\uc744": [18, 26, 50], "\uc120\ud638\ud558\ub3c4\ub85d": 18, "\ud76c\uc0dd\ud558\uc5ec": 18, "sampling\uc5d0\ub294": 18, "\uad00\uc2ec\uc774": 18, "\ub79c\ub364\ud55c": [18, 19, 33, 36, 38, 39, 41, 53, 56], "rendering\ud560": 18, "\uc774\uc640": [18, 42, 52, 53], "dip": 18, "differenti": [18, 21, 32, 33, 34, 43, 51, 52, 54, 56], "\ubd84\ub958\ud568": 18, "\ubcc0\ud658\ud560": [18, 44], "dip\ub97c": 18, "\ud559\uc2b5\uc2dc\ud0ac": [18, 42, 56], "3d\uc5d0\uc11c\ub294": 18, "volume\uc758": 18, "parameter\ub85c": [18, 21, 23, 25, 26], "volumetr": [18, 32, 52], "\uc9c0\uc815\ud560": 18, "parameter\ub4e4\uc744": [18, 26, 36], "\uacb0\uacfc\uc778": 18, "\ud76c\ub9dd": 18, "deepdream\uacfc": 18, "func\uc774": 18, "\uc2e0\ub8b0\ub3c4": 18, "loss\uac00": [18, 36, 51], "\uc801\uace0": 18, "\uc7ac\uc0ac\uc6a9\ud588\uc73c\ub098": 18, "gradient\uc5d0": 18, "jacobian": 18, "term\uc740": 18, "\ube44\uc6a9\uc774": [18, 19, 33, 40], "noise\uc5d0": [18, 38], "\uc791\ub3d9\ub418\uc9c0": 18, "\uc0dd\ub7b5": [18, 36, 40], "gradient\ub85c": 18, "\ubc1c\uacac": [18, 30, 33], "\uc601\uc5ed\uc73c\ub85c": 18, "\uc774\ub3d9\ud558\uae30": 18, "4\uc5d0\uc11c": [18, 35], "loss\uc758": [18, 22, 23, 36], "gradient\uc784\uc744": 18, "backpropagation\uc774": 18, "\uc54c\uace0\ub9ac\uc998": [18, 42, 56], "64x64": [18, 21, 23, 24, 29, 31, 39, 43, 49, 50, 51, 52, 54, 55], "model\ub9cc": [18, 24], "\uc218\uc815\uc5c6\uc774": 18, "volumet": 18, "raytrac": 18, "nerf\ub85c\ubd80\ud130": 18, "rendering\ud558\uae30": 18, "rai": [18, 32, 36, 52], "cast": 18, "ray\ub97c": [18, 36], "\uc0d8\ud50c\ub41c": 18, "mlp\uc5d0": [18, 32, 33], "\ud1b5\uacfc\uc2dc\ucf1c": [18, 20, 53], "\uc2a4\uce7c\ub77c": [18, 19], "tau": [18, 25, 45, 55], "mipnerf": 18, "alias": 18, "\ud2b9\ud654": [18, 22, 45], "radiance\ub97c": 18, "\ub0b4\ubcf4\ub0b4\ub294": 18, "point\ubcc4": 18, "rho": [18, 25, 43, 51], "\ud3ec\uc778\ud2b8\uc5d0": [18, 31, 33], "vector\ub294": [18, 20, 40], "coordin": [18, 28, 32, 36, 52], "\uad00\uc810\uc5d0": 18, "normalizing\uc744": [18, 46], "\uacc4\uc0b0\ub420": 18, "ambient": 18, "l_a": 18, "\ub0b4\uc6a9": [18, 24, 33], "white": 18, "textureless": 18, "\ucc98\ub9ac": [18, 52], "\ud1f4\ud654\ub41c": 18, "\uc194\ub8e8\uc158\uc744": 18, "\uc720\uc775": 18, "sphere": 18, "query\ub97c": [18, 40], "\ub0b4\uc5d0\uc11c\ub9cc": 18, "\uc218\ud589\ud558\uc5ec": 18, "\ubc00\ub3c4\uac00": 18, "\uadfc\ucc98\uc5d0": 18, "\ucc44\uc6cc\uc9c0\uc9c0": 18, "\uc54a\ub3c4\ub85d": [18, 19, 27], "\ud658\uacbd": [18, 40], "\ub9f5": 18, "\uacc4\uc0b0\ud558\uace0": [18, 19, 33], "\ub204\uc801\ub41c": [18, 32], "\ubc30\uacbd\uacfc": 18, "\uad11\uc120": [18, 32], "\uc790\uc5f0\uc2a4\ub7fd\uac8c": [18, 21, 30, 37, 51], "geometri": [18, 44, 52], "opacity\uc5d0": 18, "penalti": 18, "\uacf5\uac04\uc5d0": [18, 35], "\ubd88\ud544\uc694\ud55c": [18, 27], "\ucc44\uc6c0\uc744": 18, "orient": 18, "\ubc84\uc804\uc744": [18, 26, 33], "field\uc5d0\uc11c": [18, 36], "camera\ub85c\ubd80\ud130": 18, "\uba40\uc5b4\uc9c0\ub294": 18, "\ubc29\uc9c0\ub97c": 18, "dreamfusion\uc758": 18, "spheric": 18, "position\uc740": 18, "coordinate\uc5d0\uc11c": 18, "sample\ub428": 18, "elev": 18, "phi_": [18, 20, 25, 51], "cam": 18, "90": [18, 42, 49, 51], "azimuth": 18, "origin\uc73c\ub85c\ubd80\ud130": 18, "focal": 18, "length": [18, 32], "multipli": 18, "35": [18, 32], "\uc8fc\ubcc0": [18, 29, 51], "\ubd84\ud3ec\uc5d0\uc11c": [18, 34, 40], "pose\uc640": 18, "position\uc774": 18, "\ud574\uc0c1\ub3c4\uc758": [18, 19, 31, 33, 35], "model\ub97c": [18, 35, 40], "\uc635\uc158": 18, "\ud558\ub098\ub97c": [18, 24, 38, 49], "\uc870\uba85\uc774": 18, "\uc801\uc6a9\ub41c": [18, 19, 22, 38, 39, 44], "\uc0c1\ud0dc\uc5d0\uc11c\uc758": 18, "\ud14d\uc2a4\ucc98": [18, 19], "\uc54c\ubca0\ub3c4": 18, "\uc0c9\uc0c1\ub9cc\uc744": 18, "\uace0\ub3c4": 18, "\uac01\ub3c4": [18, 36], "60": [18, 36], "circ": [18, 32], "overhead": 18, "02": [18, 25, 31, 44], "\ub192\uac70\ub098": 18, "weight\uac00": [18, 39], "\uc90c": [18, 30, 37], "chip": [18, 31], "5h": 18, "chamfer": 18, "distance\uc640": 18, "psnr\uc740": 18, "\ubcf4\uc720\ub41c": 18, "\uc0ac\uc9c4\uacfc": [18, 44, 52, 53], "\ubcf4\uae30\uc758": 18, "gt\uac00": 18, "\ub300\uc548\uc801": 18, "precision\uc740": [18, 46], "\ucea1\uc158\uacfc": 18, "\uc77c\uce58\ud558\ub294": [18, 19, 21, 33], "\ubb38\uc7a5\uc744": [18, 29, 40], "\uc138\ud2b8": [18, 44], "\uc911\uc5d0\uc11c": [18, 38, 45], "\ucc3e\ub294\uc9c0": 18, "centric": 18, "subset\uc5d0\uc11c": 18, "153\uac1c": 18, "geo": 18, "render\uc5d0": 18, "viewaug": 18, "\uc2dc\uc57c\uac01": 18, "\uc2dc\uc57c\uac01\uc744": 18, "\uace0\ub824\ud558\ub294": 18, "viewdep": 18, "\uc758\uc874\uc801": 18, "\ubb34\ucc44\uc0c9": 18, "\ub9e4\ub044\ub7ec\uc6b4": 18, "\ud45c\uba74\uc744": [18, 36], "\ub9cc\ub4e6": [18, 19], "sds\uc758": 18, "\uc138\ubc00\ud55c": [18, 19, 38, 55], "\ub514\ud14c\uc77c": [18, 32], "\ubcf5\uc6d0\uc774": [18, 31], "\uadfc\ubcf8\uc801\uc73c\ub85c": 18, "2304": [19, 39, 41], "06025": 19, "grail": 19, "cs": 19, "washington": 19, "edu": 19, "jeonghwa": [19, 29, 33, 39, 57], "yoo": [19, 29, 33, 39, 57], "08": [19, 44], "\uc785\ucd9c\ub825": [19, 36], "\uc2dc\ud000\uc2a4": [19, 33, 41], "\uc0ac\ub78c\uc774": [19, 27, 44, 49, 50], "\uc6c0\uc9c1\uc774\ub294": [19, 29], "\ub4ef\ud55c": 19, "\uc2dc\ud000\uc2a4\uac00": 19, "\uc0ac\ub78c\uacfc": 19, "\uc637\uac10\uc758": 19, "\ud569\uc131\ud558\ub294": [19, 44], "dreampose\ub97c": 19, "\uc81c\uc548\ud558\uc600\ub2e4": [19, 23, 25, 51], "\uc2a4\ud14c\uc774\ube14": 19, "\ub514\ud4e8\uc804\uc744": [19, 33], "\ud0dc\uc2a4\ud06c\ub97c": 19, "\uc778\ucf54\ub354\uc640": 19, "\uc778\ucf54\ub354\ub97c": [19, 41], "\ub3c4\uc785\ud558\uc600\uace0": 19, "\ub3c4\uc785\ud558\uc5ec": [19, 33, 39], "\ub514\ud4e8\uc804\uc758": 19, "\ub123\uc5b4\uc92c\ub2e4": 19, "\ubc18\uc601\ud558\uae30": 19, "concat\ud558\uc5ec": 19, "\ub514\ub178\uc774\uc9d5": 19, "unet\uc5d0": [19, 24], "\uc8fc\uc5c8\ub2e4": [19, 35, 40], "\ub4c0\uc5bc": 19, "\ucda9\uc2e4\ub3c4\uc758": 19, "\uac15\ub3c4\ub97c": 19, "\uc870\uc815\ud55c\ub2e4": 19, "\ud328\uc158": 19, "\ud0dc\uc2a4\ud06c\uc5d0": 19, "dreampose\uac00": 19, "\uc628\ub77c\uc778\uc5d0": 19, "\ud37c\uc838": 19, "\uc804\ub2ec\ud560": 19, "\uc81c\ud55c\uc801\uc774\uba70": 19, "\uc785\uc5c8\uc744": 19, "\ub298\uc5b4\uc9c4": 19, "\ubaa8\uc591\uc774\ub098": 19, "\ud750\ub984": 19, "\ub258\uc559\uc2a4\ub97c": 19, "\ud3ec\ucc29\ud558\uc9c0": [19, 40], "\ub3d9\uc601\uc0c1\uc740": [19, 41], "\ubcf4\uc5ec\uc8fc\uae30\uc5d0": 19, "\uc18c\ube44\uc790\uc758": 19, "\uc758\uc0ac": [19, 40], "\uacb0\uc815\uc5d0": 19, "\uc81c\uacf5\ud558\uc9c0\ub9cc": [19, 40], "\ub3d9\uc601\uc0c1\uc774": 19, "\uc0c1\ud488\uc740": 19, "\ub4dc\ubb3c\ub2e4": 19, "\ud558\ub098": [19, 28, 45], "\ubaa8\ub378\ub4e4\uc758": [19, 46], "\ubcf4\uc5ec\uc8fc\uc5c8\uc9c0\ub9cc": 19, "\uc5bb\uc9c0": 19, "\ubabb\ud588\uc73c\uba70": 19, "\uc6c0\uc9c1\uc784\uc774\ub098": 19, "\ubaa8\uc591\uc73c\ub85c": 19, "jitter\uac00": 19, "\uc0ac\uc2e4\uc131": 19, "realism": [19, 44, 53], "\ube44\ub514\uc624\uc758": [19, 29], "\ubb3c\uccb4": [19, 32, 36], "\uc81c\uc5b4\ud560": [19, 29], "dreampose\uc758": 19, "\uc811\uadfc\ubc95": [19, 33], "\ubaa8\ub378\ub9c1\ud558\ub294": [19, 32], "\ud29c\ub2dd\ud558\uc600\ub2e4": 19, "\ucee8\ub514\uc154\ub2dd": [19, 29, 33], "\uc2e0\ud638\uc640": 19, "\ub2e8\uc21c\ud654": 19, "\uba54\ucee4\ub2c8\uc998\uc744": 19, "\uc7ac\uc124\uacc4\ud558\uc600\ub2e4": 19, "\uc2a4\ud14c\uc774\uc9c0": [19, 36], "vae\ub97c": [19, 45], "\ud3ec\uc988\ub97c": 19, "\uac04\ub2e8\ud558\uc9c0\ub9cc": 19, "fidelity\ub97c": [19, 35], "\ub192\uc5ec\uc8fc\ub294": 19, "\ud3ec\uc988\uc5d0": 19, "\uc77c\ubc18\ud654": [19, 24, 25, 45], "\uade0\ud615\uc744": [19, 20], "\ub9de\ucd94\ub294": 19, "\uc778\uc0c1\uc801\uc778": [19, 40, 44, 55], "\ubcf4\uc5ec\uc8fc\uace0": [19, 20, 21, 22, 29, 33, 46, 47, 49], "\ub4e4\uace0": [19, 33, 40], "\ub514\ud4e8\uc804\uacfc": 19, "\uc694\uad6c": [19, 30, 32, 34], "\uc0ac\ud56d\uacfc": 19, "\ub300\ud3ed": [19, 35, 51], "\uc904\uc77c": [19, 24, 25, 28, 29, 40, 41, 51], "\uccb4\ud06c\ud3ec\uc778\ud2b8\ub294": 19, "\ucd9c\uc2dc": 19, "\uc0ac\uc6a9\ub418\uc5c8\ub2e4": [19, 51], "\ub17c\ubb38\uc5d0\uc11c\ub3c4": [19, 36], "\ud65c\uc6a9\ud558\uace0": [19, 35], "\ud2b9\ud654\ub41c": [19, 35], "\ub3d9\uc601\uc0c1\uc744": [19, 29], "\ud0dc\uc2a4\ud06c": [19, 33], "\uae30\ubc18\uc774": [19, 32, 55], "\ub124\ud2b8\uc6cc\ud06c\ub85c": [19, 32, 56], "\uad6c\uc131\ub418\ub294": [19, 26], "\ub9ce\uc558\ub2e4": [19, 23], "\ub2e8\uacc4\ub9c8\ub2e4": 19, "\ubcc4\ub3c4\uc758": [19, 21, 33, 35], "\ubaa8\uc158\uc774\ub098": 19, "depth\ub4f1\uc758": 19, "\uc5c6\uac70\ub098": [19, 31, 38], "\ubd88\uc644\uc804\ud560": 19, "\ubaa8\uc158\uc774": [19, 29], "\ud06c\uace0": [19, 26, 53, 55], "\ubcf5\uc7a1\ud560": 19, "groud": 19, "truth\uc5d0": 19, "\uc608\uce21\uc740": [19, 22], "\ub3c4\ucd9c\ud558\uae30": 19, "\uc624\ub958\uac00": [19, 22], "\ubc1c\uc0dd\ud558\uae30": 19, "\uc27d\ub2e4": 19, "\uc5d4\ub4dc": 19, "\ud22c": 19, "\uc2f1\uae00": 19, "optic": 19, "warp": 19, "\ud734\uba3c": 19, "\ud588\uace0": [19, 30, 41], "flow\uc5d0": 19, "\uc758\uc874\ud574": 19, "\ubcc0\ud654": [19, 32], "\uac00\ub824\uc9c4": [19, 32, 33], "\uc601\uc5ed": [19, 26, 44], "\uc758\uc0c1": 19, "\ucd5c\uadfc\uc5d4": 19, "\uc5b4\ud150\uc158": 19, "\uc140\ud504": 19, "\ud06c\ub85c\uc2a4": 19, "\uc5b4\ud150\uc158\uc744": 19, "\ud504\ub808\uc784\uc5d0": [19, 29, 41], "\ub9de\ucd94\ub824\uace0": 19, "difffashion": 19, "\ub808\ud37c\ub7f0\uc2a4": 19, "\ud2b8\ub79c\uc2a4\ud37c\ud558\uc5ec": 19, "\uc758\ub958": 19, "\uc544\uc774\ud15c\uc744": 19, "\ud3b8\uc9d1\ud558\ub294": 19, "\ub123\uc5b4": [19, 24, 30, 32, 33, 36], "\uc801\uc6a9\ud55c\ub2e4": [19, 33, 35, 41], "\uae30\ub300\ud560\ub9cc": 19, "\ub098\uc624\uc9c0": 19, "\ubaa8\uc158\uc744": 19, "\uc2a4\ud06c\ub798\uce58\ubd80\ud130": 19, "\uac12\ube44\uc2fc": 19, "\ub9ac\uc18c\uc2a4": [19, 24, 26], "\ubc29\ub300\ud559": 19, "\uc624\ub79c": [19, 32], "turn": 19, "video\ub294": 19, "\ud30c\uc778\ud29c\ub2dd\ud55c\ub2e4": 19, "\ubc29\ubc95\ub4e4\uacfc": [19, 33], "\ub9c8\ucc2c\uac00\uc9c0\ub85c": [19, 26, 37, 39, 40, 51], "\uae5c\ube61\uac70\ub9bc": 19, "flicker": 19, "\uad6c\uc870\uc801\uc778": [19, 24, 53], "\ubd88\uc77c\uce58\uac00": [19, 49], "\ud574\uacb0\ud558\uc5ec": 19, "\uc12c\uc720\uc758": 19, "\uc6c0\uc9c1\uc784\uc758": 19, "\uc2f1\ud06c\ub97c": 19, "\uc0ac\uc6a9\ub418\uc5b4": 19, "\ucee8\ub514\uc154\ub2dd\uc740": 19, "\ub370\ub294": 19, "\ud6a8\uacfc\uc801\uc774\uc9c0\ub9cc": [19, 24], "\uc758\uc0c1\uc758": 19, "identity\ub098": 19, "\ud48d\ubd80\ud558\uace0": 19, "\uc0c1\uc138\ud55c": [19, 44], "\uc5f0\uad6c\uc5d0\uc11c": [19, 24, 28], "\ub2e4\ub8e8\uace0": 19, "\ubaa8\ub378\uc5d0\ub294": 19, "\uc784\ubca0\ub529\uc774": 19, "\ud53c\uc0ac\uccb4\ubcc4": 19, "\ub3d9\uc601\uc0c1\uc758": 19, "\ud3b8\uc9d1\ud558\uae30": [19, 40], "\ud1b5\ud569\ud558\uae30\ub3c4": 19, "pidm\uc740": 19, "\ud14d\uc2a4\ucc98\ub97c": 19, "\uc778\ucf54\ub529\ud558\uace0": [19, 31], "\uc5f0\uacb0\ud55c\ub2e4": 19, "dreampose\ub294": 19, "\uc678\ud615\ubfd0\ub9cc": 19, "\uc6c0\uc9c1\uc784\uae4c\uc9c0": 19, "pidm\uacfc": 19, "unet\uc758": [19, 22, 24, 35], "\ub808\uc774\uc5b4\uc5d0": [19, 22], "\ud1b5\ud569\ud558\uc9c0\ub9cc": 19, "\ub178\uc774\uc988\uc5d0": 19, "\uc5f0\uacb0\ub41c": 19, "concaten": [19, 24, 45, 50], "\ubd80\ub4dc\ub7fd\uace0": 19, "\uad6c\ud604\ud560": 19, "\ud488\uc9c8": [19, 29, 33, 39], "\ub2e4\uc591\uc131": [19, 24, 31, 36, 39], "\uc548\uc815\uc131": 19, "\ub2a5\uac00\ud558\ub294": [19, 32], "\uc815\uaddc": 19, "\ubd84\ud3ec\ub41c": 19, "\ub178\uc774\uc988\uc5d0\uc11c": [19, 33], "\ubcf5\uc6d0\ud558\ub294": [19, 55], "\uc791\ub3d9\ud558\ubbc0\ub85c": 19, "\ud76c\uc0dd\ud558\uba74\uc11c": 19, "\uc808\uc57d\ud55c\ub2e4": 19, "vae\uc640": [19, 24], "\uc624\ud1a0\uc778\ucf54\ub354": 19, "\ucef4\ud329\ud2b8\ud55c": 19, "\ud45c\ud604\uc5d0\uc11c": 19, "\uacb0\uc815\ub860\uc801": [19, 32], "\ud504\ub85c\uc138\uc2a4\uc5d0": 19, "\ud0c0\uc784": [19, 33], "\uc2a4\ud0ec\ud504": 19, "\ub514\ud4e8\uc988\ub418\uc5b4": 19, "\ub178\uc774\uc9c0": 19, "\ubcf5\uad6c\ud558\uae30": 19, "\ud0c0\uc784\uc2a4\ud0ec\ud504\uc5d0": 19, "feature\uc758": [19, 38], "\uc2dc\uac04\uc73c\ub85c": [19, 27], "\ucee8\ub514\uc154\ub2dd\ub41c": 19, "unet\uc774": 19, "cal": 19, "\uc138\uadf8\uba58\ud14c\uc774\uc158": 19, "\ub9c8\uc2a4\ud06c\ub4f1": 19, "\ub514\ud4e8\uc804\uc5d0\uc11c\ub294": 19, "\uc778\ucf54\ub354\ub85c\ubd80\ud130": 19, "\uc5bb\uc5b4\uc9d0": 19, "\ubcf5\uad6c\ud558\ub3c4\ub85d": 19, "\ubd84\ud3ec\ub85c": [19, 27], "\ubc00\uc5b4\ubd99\uc774\ub294": 19, "\uba54\ucee4\ub2c8\uc998\uc774\ub2e4": 19, "\uc785\ub825\uc744": [19, 24, 29, 32, 39], "\ub110": 19, "\ub300\uccb4\ud558\ub294": [19, 21], "\ub4dc\ub86d\uc544\uc6c3\uc744": 19, "\ub2ec\uc131\ub41c\ub2e4": [19, 33], "\uc778\ud37c\ub7f0\uc2a4\ud558\ub294": 19, "s\ub97c": 19, "\uc870\uac74\ubd80\ub85c": 19, "\uac00\uc774\ub4dc\ud558\ub294": 19, "emptyset": [19, 51], "null\ub85c": 19, "\uc92c\uc744": 19, "\uc608\uce21\uac12\uacfc": [19, 25, 51], "\uc608\uce21\uac12\uc744": [19, 43, 51], "\ubcf4\uac04\ud55c\ub2e4": 19, "\uc2dc\ud000\uc2a4\ub85c\ubd80\ud130": 19, "\uceec\ub809\uc158\uc5d0\uc11c": 19, "\uceec\ub809\uc158\uc5d0": 19, "\uc2e0\ud638": 19, "\ubc1b\uace0": [19, 22, 29, 49, 54], "\ub3d9\uc601\uc0c1\uc73c\ub85c": [19, 29], "\ucf58\ud150\uce20\ub97c": [19, 29, 32], "\ucd9c\ub825\ud558\uae30": 19, "\uc870\uc815\ud558\ub294": 19, "\uc791\uc5c5\uc774": [19, 43], "p_1": 19, "p_n": 19, "_n": 19, "p_i": 19, "\uc808\ucc28\ub97c": [19, 31, 32], "\ub178\uc774\uc988\ub85c": 19, "\uc2dc\uc791\ud558\uc5ec": [19, 33], "\uc2e0\ud638\ub85c": 19, "\ucffc\ub9ac\ud558\uc5ec": 19, "latent\uc758": 19, "\uc81c\uac70\ud55c\ub2e4": 19, "\ub514\ub178\uc774\uc988\ub41c": 19, "\ub9cc\ub4e0\ub2e4": [19, 33, 36, 42, 46], "\uc6d0\ub798\uc758": [19, 33, 35], "\uc218\uc815\ud558\uace0": [19, 39], "\uc2dc\uac04\uc801\uc778": [19, 41], "\uad6c\uc131\ud558\uc600\ub2e4": 19, "\uc5b4\ub311\ud130": 19, "\ud544\uc694\uc131": 19, "\ub123\uae30": 19, "\ub4e4\uc5b4\uc624\ub294": 19, "\uc2e0\ud638\ub97c": 19, "net\uc5d0": 19, "concat\ud55c\ub2e4": 19, "\uc2e0\ud638\uc5d0": [19, 33], "\uc870\uac74\ud654\uc5d0": 19, "\ub124\ud2b8\uc6cc\ud06c\uac00": [19, 22], "\uc815\ub82c\ub418\uc9c0": 19, "\ud0dc\uc2a4\ud06c\uc5d0\ub294": 19, "\ub9de\ucda4\ud615": 19, "\uc5b4\ub311\ud130\ub97c": [19, 28], "\uad6c\ud604\ud558\uc600\ub2e4": 19, "\uc5b4\ub311\ud130\ub294": 19, "\uc870\uac74\ud654\ub97c": 19, "\uacb0\ud569\ud55c\ub2e4": 19, "\uc6d0\ub798": [19, 33, 39, 44, 46, 53], "\ub9cc\ub4e4\uc5b4": [19, 25, 36, 49], "\uae30\uc6b8\uae30\ub97c": 19, "\uc774\uc720\ub85c": 19, "\uccb4\uacc4\ub294": 19, "\uc0c1\ud638": [19, 29], "\uc791\uc6a9\ud558\ub294": 19, "\uac00\uc911\uce58\ub97c": [19, 22, 25, 31, 35, 39, 53], "\ub514\ud4e8\uc804\uc774": 19, "clip\uc774": 19, "\uc2a4\ud398\uc774\uc2a4": 19, "share": [19, 30], "\uc778\ucf54\ub529\ud55c\ub2e4\ub294": 19, "\uac10\uc548\ud560": 19, "\ucee8\ub514\uc154\ub2dd\uc744": 19, "\uac04\ub2e8\ud788": 19, "\uc790\uc5f0\uc2a4\ub7ec\uc6cc": [19, 36], "\ubcf4\uc77c": [19, 39], "\uc784\ubca0\ub529\ub9cc\uc73c\ub85c\ub294": 19, "\ucea1\ucc98\ud558\uae30\uc5d0": 19, "vae\uc5d0\uc11c": 19, "\uc785\ub825\ud55c\ub2e4": [19, 35], "\ub3c4\uba54\uc778\uacfc": 19, "\uc7a5\uc810\uc744": [19, 33], "\uac00\uc9c0\uac8c": [19, 27, 38], "\uc544\ud0a4\ud14d\ucc98\ub294": 19, "latent\ub97c": [19, 26, 35], "\uc9c0\uc6d0\ud558\uc9c0": [19, 24, 53], "\uc54a\uae30": [19, 22, 26, 36, 39, 54], "\ub124\ud2b8\uc6cc\ud06c\uc758": [19, 32], "\ud63c\ud569\ud558\uace0": 19, "\ubaa8\ub4c8\uc5d0\uc11c": 19, "\uc608\uc0c1\ud558\ub294": 19, "\ubcc0\ud658\ud55c\ub2e4": [19, 27, 33, 41, 51], "\ud30c\uc778\ud29c\ub2dd\uc5d0\uc11c": 19, "\uc5b8\uae09\ud588": 19, "\ub4ef\uc774": 19, "\ucda9\uaca9\uc744": 19, "\uc644\ud654\ud558\uae30": [19, 36, 38], "\uac00\uc911\uce58\ub294": [19, 22], "\uc124\uc815\ub418\uc5b4": 19, "\uc784\ubca0\ub529\uc73c\ub85c\ub9cc": 19, "\uc2dc\uc791\ud55c\ub2e4": 19, "c_i": [19, 24, 32], "\ucee8\ub514\uc154\ub2dd\uacfc": 19, "c_p": 19, "\ube44\ub514\uc624\uc5d0\uc11c": [19, 29], "\ucd94\uc815\ub41c": 19, "\ud3ec\uc988\uc758": 19, "\ud504\ub808\uc784\uc5d0\uc11c\uc758": 19, "\uadf9\ub300\ud654\ud558\uae30": 19, "\ub2e4\uc12f": [19, 26], "pi": [19, 32, 43, 54], "\ud3ec\uc988\ub85c": 19, "\ubd80\ub4dc\ub7ec\uc6c0\uacfc": 19, "\uc99d\uac00\ud55c\ub2e4": 19, "\uad6c\uc870\uc801\uc73c\ub85c": 19, "\ucd08\uae30\ud654\ub41c": [19, 22, 29, 36], "10\uac1c\uc758": 19, "\ucc44\ub110\uc744": [19, 29, 33], "\ubc1b\uc544\ub4e4\uc774\ub3c4\ub85d": 19, "\ucc44\ub110\uc740": 19, "\uac00\uc911\uce58\uc5d0\uc11c": 19, "\uc218\uc815\ub418\uc9c0": 19, "\ub808\uc774\uc5b4": [19, 29, 33], "\uccb4\ud06c\ud3ec\uc778\ud2b8\ub85c": 19, "\ucd08\uae30\ud654\ub41c\ub2e4": 19, "\uc778\ucf54\ub354\ub294": [19, 41], "\uccb4\ud06c\ud3ec\uc778\ud2b8\uc5d0\uc11c": 19, "\ub85c\ub4dc\ub41c\ub2e4": 19, "\ucd08\uae30\uc5d0": [19, 43], "\uc2e0\ud638\uac00": 19, "\uae30\uc5ec\ud558\uc9c0": [19, 32], "\ud30c\uc778\ud29c\ub2dd\ub41c\ub2e4": 19, "phase": 19, "\ud29c\ub2dd\ud558\uc5ec": 19, "\ud569\uc131\ud55c\ub2e4": 19, "\ub514\ucf54\ub354\ub97c": [19, 29], "\uac1c\uc120\ud558\uc5ec": 19, "\ucd94\ub860\uc5d0": 19, "identity\ub97c": [19, 22], "\ubcf4\uc874\ud558\uace0": [19, 40], "\uc720\uc9c0\ud558\ub824\uba74": 19, "\uc0d8\ud50c\ubcc4": 19, "\ud30c\uc778\ud29c\ub2dd\uc774": [19, 39], "\ud544\uc218\uc801\uc774\uc5c8\ub2e4": 19, "\ud504\ub808\uc784\uacfc": [19, 29], "\ud6c8\ub828\ud558\uba74": 19, "\uace0\ucc29": 19, "stick": 19, "\uc544\ud2f0\ud329\ud2b8\uac00": [19, 35], "\ube44\ub514\uc624\uc5d0": [19, 29], "\ud06c\ub86d\uc744": 19, "\ud3ec\uc988\uc30d\uc744": 19, "\uc99d\uac15\ud55c\ub2e4": 19, "\uc120\uba85\ud558\uace0": 19, "\ubcf5\uad6c\ud558\ub294": 19, "\ud30c\uc778\ud29c\ub2dd\uc758": 19, "\uc911\uc694\uc131": 19, "\ucd94\ub860\uc2dc": [19, 29], "\uc77c\ub828\uc758": [19, 27, 40], "\ud3ec\uc988\uc5d0\uc11c": 19, "\ud504\ub808\uc784\ubcc4\ub85c": 19, "\uc774\uc911": [19, 51], "dual": [19, 30], "\uc2dc\uc5d0": [19, 30, 31, 52], "\uc218\uc815\ub41c\ub2e4": 19, "s_i": [19, 36, 52], "s_p": 19, "\uac00\uc774\ub358\uc2a4": [19, 33], "\uc6e8\uc774\ud2b8": 19, "\ucee8\ub514\uc154\ub2dd\uc774": 19, "\uacbd\uc6b0\uc640": [19, 26], "\uc6e8\uc774\ud2b8\ub97c": 19, "\uc870\uc815\ud574\uc11c": 19, "\ucda9\uc2e4\ub3c4\ub97c": [19, 22, 33], "\ubcf4\uc7a5\ud558\uace0": 19, "\uc815\ub82c\uc744": 19, "\ubcf4\uc7a5\ud55c\ub2e4": 19, "\uac00\uc774\ub4dc\ub97c": 19, "\uac15\ud654\ud558\ub294": 19, "\uc5d0\uc678\ub3c4": 19, "\ubc29\uc9c0\ud55c\ub2e4": 19, "\ubc30\uce58\uc0ac\uc774\uc988": [19, 35], "accumul": [19, 47], "1500": 19, "pndm": [19, 24], "\uc0d8\ud50c\ub7ec": 19, "100step": 19, "339\uac1c\uc758": 19, "\uc18d\ub3c4\ub294": [19, 39], "\ucd08\ub2f9": [19, 29], "30\ud504\ub808\uc784\uc774\uba70": 19, "\uae38\uc774\ub294": 19, "12\ucd08": 19, "\uc911\uc5d0\ub294": [19, 22, 30], "\ube44\ub514\uc624\ub85c\ubd80\ud130": [19, 30], "densepose\ub97c": 19, "\uc774\uc6a9\ud574\uc11c": [19, 26, 30, 39, 40], "\uacc4\uc0b0\ud558\uc600\ub2e4": 19, "\uacf5\uac1c\uc801\uc73c\ub85c": [19, 40], "mraa": 19, "articul": [19, 47], "thin": 19, "plate": 19, "spline": 19, "mothion": 19, "tpsmm": 19, "\uc218\uce58\uc801": 19, "\uc815\uc131\uc801\uc778": [19, 53], "\uc2a4\ud06c\ub9bd\ud2b8\uc640": 19, "\uad8c\uc7a5": [19, 32], "\uc5d0\ud3ed": 19, "\ud559\uc2b5\ud558\uc600\ub2e4": 19, "avd": 19, "\ubaa8\ub4dc\uc5d0\uc11c": 19, "\ud14c\uc2a4\ud2b8": [19, 29, 33, 44], "\uc2a4\ud06c\ub9bd\ud2b8\ub97c": 19, "pidm\uacfc\ub3c4": 19, "pidm\uc758": 19, "deepfashion": 19, "\uccb4\ud06c\ud3ec\uc778\ud2b8\ub97c": 19, "\uc2a4\ud15d\uc744": 19, "\uc2e4\ud589\ud558\uc600\ub2e4": 19, "\ub3d9\uc601\uc0c1\uc5d0": [19, 29], "\ud504\ub808\uc784\uc5d0\uc11c": [19, 41], "50\ud504\ub808\uc784": 19, "\uc774\uc0c1": 19, "\ub5a8\uc5b4\uc838": [19, 51], "50\uac1c\uc758": 19, "\ucd94\ucd9c\ud558\uc5ec": [19, 24, 31], "\ud14c\uc2a4\ud2b8\ud558\uc600\ub2e4": 19, "mraa\uc640": 19, "tpsmm\uc740": 19, "drive": [19, 53], "video\uc5d0\uc11c": 19, "feautre\uc5d0": 19, "\uc758\uc874\ud558\ub294": [19, 33], "uv": [19, 26], "\uc2dc\ud000\uc2a4\uc5d0\ub9cc": 19, "\uc758\uc874\ud55c\ub2e4\ub294": 19, "\uc720\uc758\ud558\ub77c": 19, "\ub124": [19, 33], "\ubaa8\ub450\uc5d0\uc11c": [19, 24, 32], "\ucde8\ud560": 19, "\uc637\uac10": 19, "\uc8fc\ub984": 19, "\ubbf8\uc138\ud55c": [19, 40], "\ud328\ud134\uc774": 19, "\uc190\uc2e4\ub418\ub294": [19, 40], "\ubcc0\uacbd\ud558\ub294": [19, 52], "mraa\ub294": 19, "\ud314": 19, "\ub2e4\ub9ac\uac00": 19, "\ubd84\ub9ac": [19, 37], "pidm\uacfc\uc758": 19, "\uc5bc\uad74\uc758": 19, "\uc5bc\uad74\uc744": 19, "\ud569\uc131\ud558\uc9c0\ub9cc": 19, "\uc77c\uce58\ud558\uc9c0": 19, "\uc637\ucc28\ub9bc\uc774": 19, "\ud504\ub808\uc784\ub9c8\ub2e4": 19, "\ub2ec\ub790\ub2e4": 19, "pidm\uc774": 19, "\ud569\uc131\uc5d0\uc11c\ub294": 19, "\ube44\uad50\ud55c\ub2e4": [19, 26, 29, 51], "NO": 19, "\ud30c\uc778\ud29c\ub2dd\ud558\uc9c0": 19, "\ubc84\uc804": 19, "\ud3ec\uc988\ub9cc": 19, "\uc5f0\uacb0\ud55c": 19, "identity\uc5d0": 19, "\uc5c6\uc5c8\ub2e4": [19, 40], "\uc778\ucf54\ub354\ub85c": 19, "\uad50\uccb4\ud55c": 19, "\ub514\ud14c\uc77c\uc740": 19, "\ucea1\ucc98\ud560": 19, "\uc678\ud615\uc5d0": 19, "\ud30c\uc778\ud29c\ub2dd\ud558\uba74": 19, "\ub514\ud14c\uc77c\uc758": 19, "\uc120\uba85\ub3c4\uac00": 19, "\ud5a5\uc0c1\ub418\uace0": [19, 33], "\uc624\ubc84\ud53c\ud305\uc774": 19, "\ubc1c\uc0dd\ud558\uc9c0": 19, "\uc785\ub825\ud558\uba74": 19, "\ud314\uacfc": 19, "\uba38\ub9ac\uce74\ub77d": 19, "\uc8fc\ubcc0\uc5d0\uc11c\uc758": 19, "\ud615\ud0dc\uac00": [19, 36], "\uae5c\ubc15\uc774\ub294": [19, 29], "\ub098\ud0c0\ub0ac\ub2e4": 19, "\uc7a5": [19, 35], "\ub123\uc5b4\uc11c": 19, "\ud30c\uc778\ud29c\ub2dd\ud560": 19, "\ucd94\uac00\ud558\uba74": [19, 20, 28, 39], "\ud5a5\uc0c1\ub41c\ub2e4": 19, "\uc0ac\ub840": 19, "\ub4dc\ubb38": 19, "\uacbd\uc6b0\uc9c0\ub9cc": 19, "\ud314\ub2e4\ub9ac\uac00": 19, "\uc637": 19, "\uc18d\uc73c\ub85c": 19, "\uc0ac\ub77c\uc9c0\uace0": 19, "hallucin": [19, 29], "feature\uac00": 19, "\ud3ec\uc988\uac00": 19, "\ub4a4\ub97c": 19, "\ud5a5\ud560": 19, "\uad00\ucc30\ub41c\ub2e4": 19, "\ud328\ud134\uc758": 19, "\uc637\uc5d0\uc11c": 19, "\ud328\ud134\uc5d0\uc11c": 19, "\uae5c\ubc15\uc784": 19, "\ub3d9\uc791\uc744": 19, "vae\uc5d0": 19, "\ub290\ub9ac\ub2e4": [19, 46], "\ud30c\uc778\ud29c\ub2dd\uc740": 19, "\ud504\ub808\uc784\ub2f9": [19, 29], "18\ucd08\uc758": 19, "\uc678\uc758": 19, "10\ubd84": 19, "\ub514\ucf54\ub354\uc758": 19, "20\ubd84\uc774": 19, "\uc18c\uc694\ub41c\ub2e4": 19, "\uc2a4\ud2f8": 19, "\uc12c\uc720": 19, "05511": 20, "task\uc758": [20, 28], "\uafb8\uc900\ud788": 20, "\ubc1c\uc804\uc911": 20, "\uae30\uc874\uc5d0\ub294": [20, 25, 41, 50], "stylegan\uacfc": 20, "\uc8fc\ub97c": [20, 37], "\uc774\ub918\uc9c0\ub9cc": 20, "\ubca0\uc774\uc2a4\ub85c": [20, 37], "\ucd94\uc138\uac00": 20, "\uae09\uaca9\ud558\uac8c": [20, 23], "\ubc14\ub00c\uc5b4\ubc84\ub9bc": 20, "\uc7a1\ub294\uac83\uc740": 20, "\ubb34\ub9ac\uc77c\uae4c": 20, "gigagan\uc740": 20, "\uc18d\ub3c4\uc810": 20, "\uc18d\ub3c4\uc801": 20, "512px\uc758": 20, "13\ucd08\ub9cc\uc5d0": 20, "megapixel": [20, 41], "1600\ub9cc": 20, "4k": [20, 33, 36], "66\ucd08\ub9cc\uc5d0": 20, "\ud65c\uc6a9\uc131": 20, "\uc0c1": [20, 26], "iteration\uc774": [20, 31], "\ub4e4\uc5b4\uac00\ub294\ub370": 20, "iteration\uc740": 20, "\uc548\uc815\uc131\uc744": 20, "\ud0a4\uc6cc\uc8fc\ub294": 20, "cost\uac00": [20, 24, 25, 28], "\ud55c\ub2e4\ub294": [20, 31, 44], "\ub2e8\uc810\uc774": [20, 21, 24, 34, 37, 43, 53], "pass\ub9cc": 20, "\ud544\uc694\ud558\ubbc0\ub85c": 20, "\uc54a\ub2e4\ub294": [20, 24], "object\uc758": [20, 26, 40], "class\uac00": 20, "\uba85\ud655\ud788": [20, 48], "\uc815\uc758\ub418\uc9c0\uc54a\uc740": 20, "develop\ud55c\ub2e4\uba74": 20, "\ub118\uc5b4\uc124": 20, "\uc788\uc744\uae4c": [20, 39], "img": [20, 35, 48], "66": [20, 36], "space\uc0c1\uc5d0\uc11c\uc758": 20, "stylegan2": 20, "stylegan2\ub85c": 20, "\uc120\uc815": 20, "distribution\uc5d0\uc11c\uc758": 20, "disentangle\ub41c": 20, "gigagan\uc5d0\uc11c\ub294": 20, "network\uc758": [20, 24, 36], "z\uc640": 20, "\ud53c\ub77c\ubbf8\ub4dc": 20, "block\ub4e4\ub85c": 20, "\uac12\uc73c\ub85c\ubd80\ud130": 20, "layer\ub9c8\ub2e4": 20, "\ub4e4\uc5b4\uac00\uc11c": [20, 38, 40], "scaling\ud568\uc73c\ub85c\uc368": 20, "demodul": 20, "select": [20, 30], "size\ub9cc": 20, "up\uc744": 20, "\uc548\ub418\ub294": [20, 44], "\uc624\ud508": 20, "\uad6c\uc870\uc0c1": 20, "\ub808\uc774\uc5b4\uc0c1\uc5d0": 20, "filter\uac00": 20, "\uc8fc\uc785\ubd80\ud130": 20, "\uc0dd\uc131\uae4c\uc9c0": 20, "\ucc38\uc5ec": [20, 57], "\ud45c\ud604\ub825\uc744": 20, "\ub5a8\uc5b4\ud2b8\ub9b4": 20, "\ub808\uc774\uc5b4\ub9c8\ub2e4": 20, "k_": 20, "set\uc744": 20, "w\uc758": 20, "\uac70\uce5c": [20, 21, 41, 45, 50], "kernel\uac12\uc5d0": 20, "summation\ud55c": 20, "filter\ub85c": 20, "\uacc4\uc0b0\uc5d0": 20, "softmax\ub97c": 20, "\uc598\ub294": 20, "differentiable\ud558\ubbc0\ub85c": 20, "kernel\uc744": 20, "\ub54c\ubcf4\ub2e4": [20, 33, 39, 43], "\uc808\uc57d\ub41c\ub2e4\ub294": 20, "interleav": 20, "filter\ub294": 20, "recept": 20, "\ub0b4\ubd80\uc758": [20, 26], "\ucea1\ucc98\uc5d0\ub294": 20, "\ud0c1\uc6d4\ud558\uc9c0\ub9cc": 20, "\uc678\ubd80\uc758": 20, "\ud55c\uacc4\uc810\uc744": [20, 43, 53], "\uadf9\ubcf5\ud558\uae30\uc704\ud574": 20, "g_": [20, 53, 56], "stylegan2\uc5d0": 20, "\uc774\uc0c1\uc774": 20, "\uc0dd\uae40": 20, "\uc6d0\uc778\uc740": 20, "product\uac00": 20, "lipschitz\ud568\uc218\uac00": 20, "\uc544\ub2c8\uae30": 20, "lipschitz": 20, "\ud568\uc218\ub780": 20, "\ube44": 20, "\uc774\uc0c1\uc73c\ub85c": 20, "\uc99d\uac00\uc2dc\ud0a4\uc9c0": 20, "\ub9cc\uc871\ud558\uc9c0": 20, "\ubabb\ud568\uc73c\ub85c\uc368": 20, "unstabl": [20, 48, 54], "\uc2e4\ud328\ud55c\ub2e4": 20, "\ub9cc\uc871\uc2dc\ud0a4\uae30": 20, "attention\uc758": [20, 24, 27], "product\ub97c": 20, "l2": [20, 36, 42, 43], "distance\ub85c": 20, "advanc": [20, 50], "stylegan2\uacfc": 20, "\ucd08\uae30\uac12\uc744": [20, 36, 52], "attentnion": 20, "\uc790\uc2e0\uc758": [20, 29], "\ud559\uc2b5\uc6a9": 20, "\uc8fc\uc785\uc6a9": 20, "gigagan": 20, "\uc720\uc5f0\uc131\uc744": [20, 40], "output\uc740": [20, 45], "size\uc640": 20, "\ub098\ub220": [20, 26, 41], "index\uc758": 20, "catch": 20, "word\ub97c": [20, 40], "global\ud558\uac8c": 20, "embedding\ud558\ub294": 20, "generator\uc758": 20, "discrimin": [20, 38, 41, 42, 44, 48, 53], "branch\uc758": 20, "conditioning\uc744": [20, 21, 35], "generating\uc744": 20, "c\ub85c\ubd80\ud130": 20, "stylegan\uc5d0\uc11c\ub294": 20, "res\uc758": 20, "loss\uc5d0": [20, 27], "\uc54a\uc558\uc9c0\ub9cc": [20, 42], "\ud45c\ud604\uc758": [20, 32], "extractor": 20, "rightarrow": [20, 32, 36, 45, 53, 54], "conv": [20, 41, 53], "level\uc5d0\uc11c\ub294": [20, 35], "level\uc5d0\uc11c\uc758": 20, "resolution\uc5d0\uc11c": [20, 39], "\uacc4\uc0b0\uc774": [20, 48, 51, 56], "\uc77c\uc5b4\ub098\uae30\ub54c\ubb38\uc5d0": 20, "ij": 20, "psi": [20, 25, 45, 51], "conv_": [20, 29], "\uc2ec\uc740": 20, "\ud569": 20, "\uc55e\ubd80\ubd84\uc740": 20, "\ub4b7\ubd80\ubd84\uc740": 20, "awar": [20, 41], "\uc55e\uc758": 20, "\ub9ac\uc5bc\ud55c\uc9c0": 20, "\uac00\uae4c\uc6b4\uc9c0\uc5d0": 20, "\ucd08\ubc18\uc5d0\ub294": [20, 23, 31], "\uc0c1\uad00\uc5c6\uc774": [20, 32], "\ud004\ub9ac\ud2f0\ub85c\ub9cc": 20, "\ud574\ubc84\ub9bc": 20, "\uac15\uc81c\ub85c": 20, "\ud558\uae30\uc704\ud574": 20, "fake": [20, 48], "pair\ub85c": 20, "\uc9c0\uc815": [20, 25], "constrast": 20, "\uba40\uac8c": 20, "\ubca1\ud130\uc640\ub294": 20, "condition\uc758": [20, 24, 34], "vector\uc640\ub294": 20, "\ud559\uc2b5\ub418\uc5b4\uc57c\ud55c\ub2e4": 20, "aid": 20, "2112": [20, 21, 27], "09130": 20, "stylegan\uc5d0\uc11c": 20, "discriminator\ub294": 20, "overfitting\ub418\ub294": 20, "\uc774\ubd80\ubd84\uc744": 20, "\ud574\uacb0\ud558\uae30\uc704\ud574": 20, "sota\uc758": [20, 51], "\ubf51\uc544\ub0b8": [20, 24, 30], "discriminator\uc5d0": 20, "fake\ub97c": 20, "\ubd84\ub958": [20, 33, 39, 44], "\uc801\uc6a9\uc774": [20, 22, 28, 38], "64x64\uc758": 20, "3\ubc88": 20, "6\ubc88": 20, "1024x1024\uc758": [20, 39], "gigagan\uc758": 20, "\uc801\uc6a9\ud560\ub54c\uc5d0\ub294": 20, "\uc0dd\uc131\uacfc\uc815\uc911\uc5d0": 20, "real\ud568\uc744": 20, "laion2d": 20, "en": 20, "coyo": [20, 24], "700m": [20, 24], "1024": [20, 25, 32, 36, 38, 41, 45, 48, 49, 52, 54], "adob": 20, "stock": 20, "machin": [20, 39, 48], "method\uac00": 20, "\ud6a8\uacfc\uac00": [20, 26, 38, 55], "\uc788\ub294\uac00": 20, "\ub2e8\uc21c": 20, "up\ubcf4\ub2e4": 20, "method\ub4e4\uc744": 20, "\uc218\uce58\ub97c": [20, 23, 34, 46], "text2imag": 20, "time\uc744": 20, "\uc5b4\ub290\uc815\ub3c4": [20, 23], "\uc774\ub8e8\uba70": 20, "\uacbd\uc7c1\ub825\uc744": 20, "diffusion\uacfc": [20, 25, 26, 35, 51], "diffutsion\uc758": 20, "\uc18d\ub3c4": [20, 22, 27, 28, 29], "\uac1c\uc120\uc744": [20, 43], "distilation\ud55c": 20, "\uc218\uce58\uc801\uc73c\ub85c\ub3c4": 20, "\uc6b0\uc704\uc5d0": 20, "time\ub3c4": 20, "\ube60\ub974\ub2e4": [20, 33], "upscal": [20, 35, 41], "md": [20, 31], "table4": 20, "src": [20, 37], "pic": 20, "img14": 20, "png": [20, 48], "alt": 20, "bg": 20, "primari": 20, "mb": 20, "700px": 20, "stylegan\uc5d0": 20, "\uc5f0\uad6c\ub41c": 20, "runcat": 20, "trick": [20, 37], "\uc0c1\uc73c\ub85c\ub294": 20, "imagen\uacfc": [20, 24], "\ube44\uad50\ud558\uba74": [20, 24, 35], "develop\uc774": 20, "\ud544\uc694\ud568": [20, 46], "failur": [20, 50], "toward": 21, "icml": [21, 43, 45], "10741": 21, "e\ubcf4\ub2e4": [21, 36], "\ud3c9\uac00\uac00": 21, "\uc6b0\uc218\ud558\ub2e4\uace0": [21, 53], "powerful\ud55c": 21, "natur": [21, 43], "language\ub85c": 21, "realistic\ud55c": 21, "\ubc29\ubc95\ub4e4\uc774": [21, 30, 38], "\uc0dd\uaca8\ub098\uace0": 21, "\ub300\uc751\ud558\ub294": [21, 26, 44], "\uc0dd\uc131\ud558\uae30\uc5d0\ub294": 21, "\uc0dd\uc131\ubaa8\ub378\uc758": [21, 24, 39], "\ub5a0\uc624\ub974\uba70": 21, "sota\ub97c": [21, 29, 34, 37, 39, 45, 46], "\ucc0d\uc5c8\ub2e4\uace0": 21, "conditional\ud55c": 21, "\uc774\ub8e8\uc5b4\uc84c\ub294\ub370": 21, "beat": 21, "synthesis\ub77c\ub294": 21, "noise\ud55c": 21, "class\ub97c": 21, "sampling\uacfc\uc815\uc5d0\uc11c": 21, "label\uc5d0": 21, "control\uc2dc\ud0a4\ub294": 21, "classifier\uc5c6\uc774": 21, "\uc18c\uac1c\ub418\uc5c8\ub2e4": 21, "guidance\ub77c\ub294": 21, "\uc81c\uc2dc\ud558\uba70": 21, "guidance\uc640": 21, "\uacb0\uacfc\uc801\uc73c\ub85c\ub294": [21, 24], "guidance\uac00": [21, 24, 26, 36, 49], "\ubcf4\uc778\ub2e4\uace0": [21, 47, 53], "shot\uc73c\ub85c": [21, 26], "\uc0dd\uc131\ud558\ub294\ub370\uc5d0": [21, 36], "\ubcf4\uc600\uc73c\ub098": 21, "photorealistc\ud55c": 21, "\uc0dd\uc131\ud558\ub294\ub370\ub294": [21, 48], "\uacaa\uc744": 21, "generation\ubfd0\ub9cc": 21, "\ud3b8\uc9d1\ud560": 21, "impainting\uae30\ub2a5\ub3c4": 21, "\uac00\ub2a5\ud558\ub3c4\ub85d": [21, 22, 30, 56], "impaint": [21, 50], "\ubc29\ud5a5\uc131\uc744": 21, "\ub764\ub2e4\ub77c\uace0": 21, "\uc8fc\uc7a5\ud55c\ub2e4": 21, "proport": 21, "find": 21, "improv": [21, 26, 49], "constant\uac12\uc73c\ub85c": 21, "\uace0\uc815\uc2dc\ud0a8": [21, 53, 55], "learnabl": [21, 24, 38, 47, 55], "\uc124\uc815\ud558\uc5ec": [21, 26, 50], "step\ub9cc\uc73c\ub85c": 21, "sample\uc744": [21, 24, 36, 46], "dharwial": 21, "image\uc0dd\uc131\uc744": 21, "\ub17c\ubb38\uc5d0\uc11c\uc758": 21, "guidance\uc774\ub2e4": 21, "\uc720\uc9c0\ud558\ub418": 21, "\uc18d\ud558\ub294\uc9c0": 21, "classifier\uc758": [21, 24, 46, 49], "\uacfc\uc815\uc758": 21, "score\uc5d0\uac8c": 21, "guide\ub97c": [21, 34], "\uc18c\uac1c\ub418\uc5c8\ub294\ub370": 21, "classifiy\ub97c": 21, "\ud574\uc57c\ud558\ubbc0\ub85c": 21, "\uaddc\ubaa8\uac00": [21, 39], "heavy\ud574\uc9c0\ub294": 21, "\ubb38\uc81c\uc810\uc744": [21, 26, 34, 37, 45, 47], "\uac1c\uc120\uc810\uc744": [21, 32], "\uae30\ubc95\uc73c\ub85c": [21, 47, 54, 55], "\uc2dd\uc5d0\uc11c": 21, "\ubcc0\ud615\uc744": [21, 40], "model\ub9cc\uc73c\ub85c": 21, "\uc774\ub8e8\uc5b4\uc838": [21, 26, 44, 56], "\uc30d\uc73c\ub85c": [21, 29, 50], "\uc774\ub8e8\uc5b4\uc9c4": [21, 29], "learning\uc744": [21, 45], "\uc9c4\ud589\uc2dc\ud0a8": 21, "\uc758\ubbf8\ub97c": [21, 31, 37, 39], "pair\uc5d0": 21, "\ucee4\uc9c0\ub3c4\ub85d": 21, "\uc791\uc544\uc9c0\ub3c4\ub85d": 21, "guidance\uc5d0\uc11c\ub294": 21, "guidance\uc5d0\uc11c": 21, "classifier\ub300\uc2e0\uc5d0": 21, "clip\ubaa8\ub378\uc744": 21, "classifier\ub300\uc2e0": 21, "\uad6c\ud55c": [21, 26], "x\uc640": [21, 22], "billion": 21, "resolution\uc744": [21, 29, 46, 51], "\ub610\ub2e4\ub978": [21, 25, 34], "256x256\uc73c\ub85c": [21, 39], "\uc99d\uac00\uc2dc\ud0a4\ub294\ub370": 21, "\uc0ac\uc6a9\ud558\uc600\ub2e4\uace0": [21, 36, 39], "base\ub85c": 21, "\uc9c4\ud589\ud558\uc600\ub2e4": [21, 26, 27, 29, 36], "\uc218\ud589\ud574\uc57c\ud55c\ub2e4": 21, "condition\uc73c\ub85c": [21, 24, 31, 35, 36, 46], "\uc8fc\uae30": [21, 35], "k\uac1c\uc758": [21, 33], "token\uc73c\ub85c": [21, 40], "encoding\ud55c": 21, "input\uac12\uc73c\ub85c": 21, "\ub123\uc5b4\uc900\ub2e4": [21, 24], "output\uc758": 21, "token\uacfc": [21, 45], "token\uc744": [21, 24, 26, 36, 45], "\uc5f0\uc0b0\ud558\uace0\uc790": 21, "projection\ud558\uc5ec": [21, 24], "adain\uae30\ubc95\uc744": 21, "block\uc758": 21, "\ub3c4\ucd9c\ud55c\ub2e4": 21, "block\ub4a4\uc5d0": 21, "\ubd99\ub294": 21, "e\uc640": [21, 36], "\uc0ac\uc6a9\ud558\uc600\uace0": [21, 42, 43, 55], "architecture\ub85c\ub294": 21, "up\ub41c": 21, "2b": [21, 24], "paremeters\ub97c": 21, "transformer\ub97c": [21, 24], "upsampling\ud558\ub294": 21, "model\ub3c4": 21, "upsampler\uc640": 21, "\ube44\uc2b7\ud558\ub2e4\uace0": 21, "\uc9c4\ud589\ud588\uc744\ub54c\ub294": 21, "\uc900": [21, 38, 46], "condition\uc5d0": 21, "sequence\ub97c": [21, 36], "impainting\uc744": 21, "\uac70\uce58\uc9c0": 21, "\uc54a\uc558\ub2e4": [21, 24, 33, 36, 42], "sampling\uc744": [21, 23, 27, 36, 45], "\uc54c\ub824\uc9c4": 21, "\uc601\uc5ed\uc5d0": [21, 32], "\uc0ac\uc6a9\ud588\uae30\uc5d0": 21, "\ucc38\uc870\ud560": [21, 50], "tuning\uacfc\uc815\uc5d0\uc11c": 21, "example\uc758": 21, "\uc9c0\uc6b4\ub2e4\uc74c": 21, "\uc815\ubcf4\ub85c\uc11c": 21, "\ucc44\ub110\uacfc": [21, 29, 32], "\uc785\ub825\ub418\ub3c4\ub85d": 21, "\uc124\uacc4\ud558\uc600\ub2e4": 21, "guidance\uc5d0": 21, "\uc801\ud569\ud558\uac8c": 21, "\ud6c8\ub828\uc2dc\ud0a4\uae30": [21, 22], "\ube44\uad50\ud588\uc74c\uc744": 21, "\uc5b8\uae09\ud588\ub2e4": 21, "\uc0ac\uc6a9\ud558\uae30": [21, 24, 28, 33, 36, 51], "models\ub97c": 21, "\uc0ac\uc6a9\ud588\uc74c\uc744": 21, "\ubc1d\ud78c\ub2e4": 21, "\uc5b8\uae09\ud588\ub4ef\uc774": [21, 26], "\uc88b\uc558\ub2e4\uace0": 21, "precision\uacfc": [21, 27], "recal": [21, 23, 27, 43], "score\uc640": [21, 33, 39], "trade": [21, 25, 27, 31, 43, 46, 51], "off": [21, 26, 27, 31, 43, 46, 51], "\uad00\ucc30\ud558\uace0": 21, "\uc5b8\uae09\ud55c\ub2e4": 21, "\ucd5c\uc801\uc73c\ub85c": 21, "\uc218\ud589\ub418\uc5c8\uc73c\uba70": 21, "\ubc29\ubc95\uc784\uc744": 21, "\ud5a5\uc0c1\uc2dc\ud0ac": 21, "\uc911\uc810\uc744": [21, 33, 45], "caption\uacfc": [21, 24, 26], "\uc77c\uce58\uc2dc\ud0a4\ub294": [21, 54], "\ub6f0\uc5b4\ub098\uc9c0": 21, "\uc54a\uc744": [21, 33, 53], "\uac00\uc124\uc744": 21, "\ud3c9\uac00\uc790\ub97c": 21, "\uc9c4\ud589\ud558\uc600\uace0": 21, "\uc778\uac04\ub4e4\uc774": 21, "\uc810\uc218\uc640": [21, 27], "\uc758\uacac\uc744": 21, "guida": 21, "nce\uac00": 21, "\uc0dd\uc131\ud55c\ub2e4\uace0": [21, 44], "\ud310\ub2e8\ud588\ub2e4": 21, "table1\uc740": 21, "unguid": 21, "evaluation\uc744": [21, 29], "\ud56d\ubaa9\uc5d0": [21, 26], "\uc555\ub3c4\uc801\uc778": [21, 45], "table2\ub294": 21, "glide\uc640": 21, "model\ub4e4\uc744": 21, "\ud45c\uc774\ub2e4": 21, "\uad6c\ud558\uc600\ub2e4": 21, "coco\uc5d0": 21, "\uacbd\ud5d8\uc774": 21, "\ub5a0\uc624\ub974\uace0": 22, "\uc8fc\uc81c\uc785\ub2c8\ub2e4": 22, "\ub9e5\ub77d\uacfc": 22, "\uc9c4\ud589\ub418\uc5c8\uae30": 22, "\uc77d\uc5b4": 22, "\ubcf4\uc2dc\uae30\ub97c": 22, "\ucd94\ucc9c\ub4dc\ub9bd\ub2c8\ub2e4": 22, "contribution\uc740": [22, 26, 38], "3\uac00\uc9c0\ub85c": 22, "lighweight": 22, "dreambooth\uc758": 22, "\uc720\uc9c0\ud558\uba74\uc11c": [22, 29, 30, 35, 38], "\ud06c\uae30\ub97c": [22, 35, 46, 55], "\uc904\uc774\uace0": 22, "hyperdreambooth\ub97c": 22, "\uad6c\ud604\ud588\uc9c0\ub9cc": 22, "fidelity\uac00": [22, 37, 40, 46, 49], "\ub5a8\uc5b4\uc9c0\uac70\ub098": 22, "hypernetwork\ub97c": 22, "via": [22, 31, 57], "finetuning\uc5d0": 22, "svdiff": 22, "styledrop": 22, "dreamartist": 22, "\uc608\uc2dc\uac00": 22, "\ub290\ub9ac\ub2e4\ub294": [22, 54], "\ub2e8\uc810\uc744": [22, 24, 48, 51, 52, 54, 56], "\uc5f0\uad6c\ub4e4\uc744": [22, 24], "hyperdreambooth\ub294": 22, "\uc774\ub8e8\uc5c8\ub2e4\uace0": 22, "\uc774\uc804\uc5d0": [22, 24, 40, 46], "dreambooth\ub294": 22, "hyperdreambooth\uc758": 22, "\uc601\uac10\uc6d0": 22, "\ud558\ub098\ub85c": [22, 28, 33, 36, 40, 55, 56], "\ud65c\uc6a9\ub418\uc5c8\uc2b5\ub2c8\ub2e4": 22, "\ub7ad\ud06c\uc758": 22, "\uadfc\uc0ac\ud654\ud558\uc5ec": 22, "\ud06c\uae30\uc640": [22, 23], "\ubc29\ubc95\uc785\ub2c8\ub2e4": [22, 38, 44], "personalization\uc774": 22, "\uc0b4\ud3b4": 22, "contribution\uc758": 22, "\uc0b4\ud3b4\ubcf4\ub3c4\ub85d": [22, 56], "\uae30\uc220": [22, 32, 33, 35, 39], "\ud558\ub098\uc778": [22, 39], "\uc904\uc5ec\uc11c": 22, "lidb\uc5d0": 22, "\uc124\uba85\ub4dc\ub9ac\uaca0\uc2b5\ub2c8\ub2e4": 22, "lidb\ub294": 22, "residuals\uc758": 22, "\uc138\ubd84\ud654\ud558\ub294": 22, "\uc544\uc774\ub514\uc5b4\uc785\ub2c8\ub2e4": 22, "orthogon": 22, "basis\ub97c": 22, "a\uc640": 22, "\ud589\ub82c\uc744": 22, "\ubd84\ud574\ud558\ub294": 22, "\uac83\uc73c\ub85c\ub3c4": 22, "\uc774\ud574\ud560": [22, 31], "\uad6c\uccb4\uc801\uc73c\ub85c": 22, "a_": [22, 26], "aux": [22, 37], "\ubd84\ud574\ub418\uba70": 22, "b_": [22, 23], "\ubd84\ud574\ud560": 22, "\ud589\ubcc4\ub85c": 22, "\uc9c1\uad50\ud558\ub294": 22, "\ubca1\ud130\ub85c": [22, 40], "\ucd08\uae30\ud654\ub418\uace0": [22, 29], "\ud559\uc2b5\ub418\ub294": 22, "\uac00\uc911\uce58\uc785\ub2c8\ub2e4": [22, 53], "\uc120\ud615": [22, 25, 33, 51], "residual\uc740": 22, "w_x": 22, "experiment": [22, 43, 48, 56], "\ub418\uc5c8\uc73c\uba70": [22, 29], "\uac1c\uc218\ub294": 22, "30k\uac1c": 22, "\uc0ac\uc774\uc988\ub294": 22, "120kb\ub85c": 22, "\uacbd\ub7c9\ud654": [22, 25], "\ubcc0\uc218\ub9cc\uc73c\ub85c": 22, "\ud3ec\uc778\ud2b8\uc785\ub2c8\ub2e4": 22, "\ub2e4\uc74c\uc740": 22, "\ub098\ud0c0\ub0b4\uba70": [22, 32, 33], "\uc544\uc774\ub514\uc5b4\ub294": 22, "x\ub97c": 22, "lidb\uc758": 22, "residual\uc778": 22, "h_": [22, 35], "\ub3cc\uc785\ud558\ub294": 22, "hypernetwork\ub294": 22, "\ud6c8\ub828\ub418\uba70": 22, "paramters\uc785\ub2c8\ub2e4": 22, "\uad00\ub828\ub41c": [22, 26, 27, 40], "\uc870\uc815\ub429\ub2c8\ub2e4": 22, "\ub098\ud0c0\ub0c5\ub2c8\ub2e4": [22, 44], "supervisori": 22, "\uc124\uc815\ub41c": 22, "\uac1c\uc778\ud654\uc5d0": 22, "\uc0c1\ub300\uc801\uc778": 22, "\ud56d\ubaa9\uc758": 22, "\uc9c0\uc6d0\ud558\uae30": 22, "\uc785\ub825\uc785\ub2c8\ub2e4": 22, "\uc9c0\uc2dc\uc0ac\ud56d": 22, "hyperdreambooth\uc5d0\uc11c\ub294": 22, "\ub4dc\ubb3c\uc9c0\ub9cc": 22, "\uc0bd\uc785\ud560": [22, 40], "hyperdreambooth\uc5d0\uc11c": 22, "\uad6c\uc131\ub418\uba70": [22, 35], "\ud558\ub098\uc785\ub2c8\ub2e4": 22, "\uac00\uc911\uce58\uc5d0": 22, "\ub354\ud558\uc5ec": [22, 36], "\uac1c\uc778\ud654\ub97c": 22, "\uc2e4\ud589\ud569\ub2c8\ub2e4": 22, "\ubc18\ubcf5\uc801": 22, "\uc218\ud589\ud569\ub2c8\ub2e4": 22, "hypernetwork\uac00": 22, "\ubc18\ubcf5\uc801\uc778": [22, 25, 41, 51], "\uac1c\uc120\ud558\ub824\uace0": 22, "\uc2dc\ub3c4\ud558\ub294": 22, "\ubc29\ud5a5\uc131\uc774": 22, "\uc62c\ubc14\ub974\uace0": 22, "\uc5bc\uad74\uacfc": [22, 35], "\ubbf8\uc138\ub9cc": 22, "\uc7a1\uc544\ub0b4\uc9c0": 22, "tuning\ud558\uace0": 22, "\ub54c\uc5d0": [22, 25], "encoding\uc740": 22, "\uc218\ud589\ub418\uba70": 22, "f\ub294": 22, "\uc2e4\ud589\ud558\uace0": 22, "\uc18d\uc131\uacfc": 22, "\ubc29\ud5a5\uc131\uc5d0": 22, "\uc62c\ubc14\ub974\uac8c": 22, "\ub418\uc9c0\ub9cc": [22, 51], "detail\uc740": 22, "dreambooth\ubcf4\ub2e4": 22, "\uc9c0\uc2dc\uc5b4": 22, "c\uc5d0": 22, "\uc870\uc815\ud569\ub2c8\ub2e4": [22, 53], "\uc810\uc740": [22, 24, 39, 40], "\uac1c\ub150\uc785\ub2c8\ub2e4": 22, "\uc644\ud654\ud558\uc5ec": 22, "rank\ub85c": [22, 25], "hypernetwork\uc758": 22, "\uc8fc\uccb4\uc758": 22, "\uace0\uc8fc\ud30c\uc218": 22, "\uadfc\uc0ac\ud654\ud560": 22, "\uc5c5\ub370\uc774\ud2b8\ubcf4\ub2e4": 22, "\ub2ec\uc131\ud560": [22, 27], "relaxed\uc758": 22, "\uac1c\ub150\uc740": 22, "\ubc29\uc2dd\ubcf4\ub2e4": 22, "\uc694\uc778\uc785\ub2c8\ub2e4": 22, "\uc5ec\uae30\uc11c\ub3c4": 22, "\uc9c0\uc6d0\ud558\uba70": [22, 33], "\uc5bc\uad74\uc5d0": 22, "\ud2b9\uc131\uacfc": 22, "\ucea1\ucc98\ud558\ub294": 22, "\uace0\ub824\ud560": 22, "40\ubc88\uc758": 22, "\ubc18\ubcf5\uc73c\ub85c": 22, "\uc644\ub8cc\ud560": 22, "\ube44\uad50\ud588\uc744": [22, 27, 34, 39], "25\ubc30": 22, "\uc18d\ub3c4\ub77c\ub294": 22, "\uad6c\ud604\ud588\uc2b5\ub2c8\ub2e4": 22, "5\uc758": [22, 26, 36], "\uc778\ucf54\ub354\ub3c4": 22, "\uac1c\uc778\ud654\ud558\uae30": 22, "\uc2dc\uac01\ud654\uc5d0": 22, "sfhq": 22, "synthet": [22, 32, 42, 43, 57], "headquart": 22, "000\uac1c\uc758": 22, "galleri": 22, "\uc544\ub798\ub85c": [22, 38, 55], "\uc778\uc2a4\ud0c0\uadf8\ub7a8": 22, "\uc140\uce74": 22, "bark": 22, "skin\uc758": 22, "\ub85d": 22, "\uc2a4\ud0c0": 22, "\uc804\ubb38\uc801\uc778": 22, "inversion\uc758": 22, "\ud45c\uc785\ub2c8\ub2e4": 22, "dino\uc640": 22, "\uc9c0\ud45c\ub97c": [22, 33, 39, 44], "\ud45c\ub294": [22, 27], "\ubd80\ubd84\uc785\ub2c8\ub2e4": [22, 38, 39], "hyperparameter\ub97c": 22, "\ube44\uad50\ud588\uc2b5\ub2c8\ub2e4": [22, 44, 55], "\ud559\uc2b5\ub960\uc744": 22, "\uc99d\uac00\uc2dc\ud0a4\uace0": 22, "\uac10\uc18c\uc2dc\ud0a4\uba74": 22, "agg": 22, "1\uc740": [22, 27, 46], "400\ubc88\uc758": 22, "\ubc18\ubcf5\uc744": 22, "\uc2dc\ud589\ud558\uace0": 22, "2\ub294": [22, 46], "1200\ubc88": 22, "\uc694\uc18c\ub85c": 22, "\ud558\uc774\ud37c\ub124\ud2b8\uc6cc\ud06c\ub97c": 22, "\ud558\uc774\ud37c\ub124\ud2b8\uc6cc\ud06c": 22, "\uc608\uce21\ub9cc": 22, "1\ubc88\ub9cc": 22, "\ube44\uad50\ud569\ub2c8\ub2e4": [22, 43, 52, 54, 56], "\uc9c0\ud45c\uc5d0\uc11c": 22, "\ub2ec\uc131\ud55c\ub2e4\ub294": 22, "\uba54\ud2b8\ub9ad": 22, "\uc2dc\ub098\ub9ac\uc624\uc5d0\uc11c": 22, "\uc57d\ud558\ub2e4\uace0": 22, "\uc774\ubbf8\uc9c0\uc5d0\ub9cc": 22, "\uc2a4\ud0c0\uc77c\uc5d0\uc11c": 22, "\uc0ac\ub78c\uc744": [22, 44], "\uc778\uc2dd\ud558\ub3c4\ub85d": 22, "\ub54c\ubb38\uc774\ub77c\uace0": [22, 43, 53], "\uc8fc\uc7a5\ud558\uba70": 22, "\ubcf4\uc644\ud558\uae30": [22, 53], "study\ub97c": 22, "\ube44\uad50\ud558\uace0": 22, "\uc0ac\uc6a9\uc790\ub4e4\uc758": 22, "\ubc1b\uc558\uc2b5\ub2c8\ub2e4": 22, "ups\uac00": 22, "\uc874\uc7ac\ud569\ub2c8\ub2e4": [22, 43, 44, 47, 48, 54], "direct": [22, 32, 36, 43], "\uc608\uce21\uc5d0\uc11c": 22, "\uc798\ubabb\ub41c": 22, "\uc2dc\ub9e8\ud2f1": 22, "\ub098\uc62c": 22, "\uc5d0\ub7ec\uc785\ub2c8\ub2e4": 22, "\ub208": [22, 44], "\uc0c9\uae54\uc774\ub098": 22, "\ud5e4\uc5b4": 22, "\ud0c0\uc785": 22, "\uc131\ubcc4": [22, 40], "\ub4f1\uc774": [22, 25, 39, 48, 55], "underfit": 22, "identity\ub294": 22, "\uc9c0\ucf1c\uc9c0\ub354\ub77c\ub3c4": 22, "\uc0d8\ud50c\uc774": [22, 33, 44], "\uc0dd\uc131\ub420": 22, "hypernetwork\uc640": 22, "\uc2a4\ud0c0\uc77c\uc5d0": 22, "\ubb38\uc81c\uc810\uc740": 22, "\ube5b": [22, 32], "ood\uc778": 22, "\uc0d8\ud50c\uc5d0\uc11c": 22, "\ub098\ud0c0\ub0a0": [22, 31], "hyperdreambooth\ub77c\ub294": 22, "\uac1c\uc778\ud654\ud558\ub294": 22, "hypernetwork\ub77c\ub294": 22, "\ud30c\ub77c\ubbf8\ud130\uc778": 22, "\uc774\uc5b4\uc11c": 22, "\uae30\ud0c0": [22, 29, 33], "\uc904\uc774\uba74\uc11c": [22, 25, 27, 54], "\ubb34\uacb0\uc131\uc744": 22, "\uc2a4\ud0c0\uc77c\uacfc": [22, 40, 44], "\uc758\ubbf8\uc801": [22, 40], "\uc218\uc815\uc774": [22, 40], "\uc785\uc99d\ud558\uc600\uc2b5\ub2c8\ub2e4": 22, "2102": [23, 45], "09672": 23, "likelihood\uc218\uce58\ub3c4": 23, "sampling\uc2dc": 23, "step\uc73c\ub85c": [23, 35, 46, 51], "\ub0bc": [23, 46], "scale\uacfc": [23, 38], "quailty\uc640": 23, "\uc218\uce58\uac04\uc758": 23, "quality\uc5d0": 23, "\ubaa8\ub378\uc5d0\ube44\ud574": 23, "\ub5a8\uc5b4\uc84c\ub2e4": 23, "diversity\uac00": [23, 46], "cifar": [23, 39, 43, 48, 54], "\ub3d9\uc791\ud588\uc9c0\ub9cc": 23, "dataset\uc5d0\uc11c\uc758": 23, "\ub3d9\uc791\uc740": 23, "imagenet\uac19\uc740": 23, "dataset\uc5d0\uc11c\ub3c4": 23, "process\uc5d0\uc11c\uc758": 23, "\ub0b4\ub294": [23, 28, 40], "\uc5f0\uad6c\ub4e4\uc5d0\uc11c": 23, "loglikelihood": 23, "\uc218\uce58\uc640": 23, "sample\uc758": 23, "quality\uac04\uc758": 23, "\uc5f0\uad00\uc131\uc744": 23, "distribution\uc5d0": 23, "\uc218\uce58\ud654\ud55c": 23, "\ub290\ub08c": 23, "\uc88b\uc544\uc9c0\uba74": 23, "quality\ub3c4": 23, "\uc99d\uac00\ud558\ub294": [23, 33], "ddpm\uc5d0\uc11c\ub3c4": 23, "\uac1c\uc120\ud55c\ub2e4\uba74": 23, "\uc99d\uac00\ud560": 23, "\uc54a\uc744\uae4c": 23, "angeloyeo": 23, "mle": [23, 48], "html": [23, 44], "\uc785\ud78c": [23, 37], "\ud615\ud0dc": [23, 41], "denoising\uc5d0": [23, 27], "noising\ud560": 23, "\uc544\ub798\uc640\uac19\uc774": 23, "\uc0ac\uc6a9\ud574\ub3c4": [23, 25, 51, 55], "\ubcf4\uc5ec\uc11c": 23, "\ubb38\uc7a5": 23, "\uc758\ubb38\uc810": 23, "\uc815": 23, "\ubc18\ub300\uc758": 23, "parameter\uc778\ub370": 23, "fix\ub97c": 23, "\ud558\ub294\uac8c": 23, "\ub9de\uc744\uae4c": 23, "step\uac04": 23, "step\uc774": [23, 46], "\ub450\uac1c\uc758": [23, 29, 32, 36], "\ub3d9\uc77c\ud574\uc9c4\ub2e4": 23, "2\ub97c": [23, 35, 39], "\uacb0\uc815\ub418\ub294\ub370": 23, "\uacb0\uc815\ub418\ub294": 23, "\ub450\ub294\uac83\uc740": 23, "\uc124\uacc4\uc758": 23, "\ud559\uc2b5\ud558\uae30\uc5d0\ub294": 23, "\ubc94\uc704\uac00": 23, "\uc791\uc544\uc11c": 23, "predict\ud558\ub3c4\ub85d": 23, "hybrid": [23, 46], "hyprid": 23, "\u03bbl_": 23, "vlb": 23, "\uc774\ubbf8\uc9c0\uc5d0\ub300\ud574": 23, "\ub3d9\uc791\ud558\uc9c0\ub9cc": 23, "32x32": [23, 46, 54], "\uc54a\ub294\uac83\uc744": 23, "scheduling\uc5d0\uc11c": 23, "mode\uc758": 23, "limitation\uc774": 23, "\uc9c0\uc801": 23, "\uac70\ub4ed\ub0a0\uc218\ub85d": 23, "\uc0c1\ub2e8": [23, 35], "noisy\ud574\uc9d0": 23, "skip\ud574\ub3c4": 23, "\uc131\ub2a5\uc5d0": [23, 39, 51], "\uc601\ud5a5\uc774": 23, "mode\ub97c": 23, "\uc758\ubbf8\uc788\ub294": [23, 29], "\ubbf8\uce58\uc9c0": 23, "equation\uc744": 23, "\uc0c8\ub85c": [23, 29, 50, 53], "\ub2e8\uacc4\uc5d0\uc11c\ub294": [23, 26, 33, 35, 36], "\uac15\ud558\uac8c": [23, 37], "\uc785\ud600\uc9c0\uc9c0\ub9cc": 23, "0\uacfc": 23, "\ubd80\uadfc\uc5d0\uc11c\ub294": 23, "\ub35c": [23, 46], "direct\ub85c": 23, "\ucd5c\uc801\ud654\ud558\ub3c4\ub85d": 23, "\uc124\uacc4\ud558\uba74": 23, "\uc774\ubbf8\uc9c0\uc640\uac19\uc774": 23, "unstable\ud574\uc11c": 23, "\ucd5c\uc801\ud654\uc5d0\ub294": 23, "\uc904\uc774\uae30\uc704\ud574": 23, "2\uc5d0\uc11c": [23, 33, 35], "\ub9d0\uae30\ub294": 23, "\ubcc0\ud654\uc5d0": 23, "\ud655\ub960\uc801\uc73c\ub85c": [23, 38], "\ucd08\ubc18\uc758": 23, "sampling\ud574\uc11c": 23, "\ud559\uc2b5\ud558\ub3c4\ub85d": 23, "\uc801\uc6a9\ud574\ubcf8": 23, "\uc801\uc6a9\ud558\uba74": [23, 54, 56], "\uc804\ubcf4\ub2e4": 23, "\ub2e4\uc18c": [23, 45, 52], "\ucde8\uc57d\ud588\ub358": 23, "64x64\uc640": 23, "cidar": 23, "\ubaa8\ub378\uc774\ub098": 23, "\ubaa8\ub378\uc911\uc5d0\uc11c\ub294": 23, "\ube44\ud574\uc11c\ub294": 23, "\uba74\uc774": [23, 26], "speed\ub97c": 23, "step\ub9cc": 23, "\uac00\ub3c4": 23, "fid\uac12\uc744": 23, "metric\uc73c\ub85c": 23, "biggan": [23, 46, 54], "\ud0c0\uac9f\uc5d0": 23, "\uc218\uce58\ub098": 23, "metric\uc5d0\uc11c": 23, "capacity\ub97c": 23, "nll": [23, 44], "\ud559\uc2b5\ub7c9": 23, "\ube44\ub840\ud568": 23, "2403": [25, 53], "12036": [25, 53], "05556": [], "luosiallen": 51, "donghyun": [25, 51, 57], "han": [25, 51, 57], "\uc801\uc6a9\ud558\uc600\ub2e4": 25, "\uadf9\ub300\ud654": 25, "pf": [25, 43], "solver\ub97c": [25, 51], "\ubc29\ubc95\ub860\ub4e4\ubcf4\ub2e4": 25, "\uc0ac\uc6a9\ud558\uae30\uc5d0\ub294": 25, "\ubb34\ub9ac\uac00": 25, "ldms\uc744": 25, "\uac00\uc18d\ud654": [25, 29, 51], "\uae30\ubc95\ub4e4\uc774": 25, "\uc81c\uc548\ub418\uc5b4": 25, "\uc654\ub294\ub370": 25, "2\uac00\uc9c0\ub85c": 25, "dpm": [24, 25, 51], "solver": [25, 43, 44, 53, 54], "ldm\uc744": [25, 35, 40, 51], "\ubc29\ubc95\ub860\uc740": 25, "step\uc744": [24, 25, 27, 31, 35, 40, 51], "\uc801\uc73c\ub85c": 25, "overhead\uac00": [25, 28], "computation\uc801\uc73c\ub85c": 25, "stage\uc758": [25, 51], "cm": [25, 51], "\ub300\uc548\uc774\ub2e4": 25, "backward": [25, 44, 47, 48], "\uc811\uadfc\ud558\uc5ec": 25, "\ud68d\uae30\uc801\uc73c\ub85c": 25, "lcms\uc740": [25, 51], "step\ub9cc\uc73c\ub85c\ub3c4": [25, 51], "\uc0dd\uc131\ud574\ub0bc": [25, 37], "\ub9ac\uc18c\uc2a4\uac00": [24, 25], "\ud544\uc694\ud558\uc9c0": [25, 26, 29, 34, 43, 51], "lcms\uc744": 25, "\ud544\uc694\ud558\uac70\ub098": 25, "\uc5f0\uad6c\ub294": [25, 36, 44], "\uc774\ub098": [24, 25, 28, 30, 51, 53, 54], "\ub4f1\uc5d0": [25, 39], "solver\uc774\uba70": [25, 51], "cms\uc740": 25, "\uc904\uc774\uba74\uc11c\ub3c4": 25, "\ubc29\ubc95\ub860\uc774\ub2e4": 25, "cms\uc758": 25, "ode\uc758": [25, 51], "\uada4\uc801\uc758": 25, "points\uac00": 25, "solution\uc5d0": [25, 51], "mapsto": [25, 43, 51], "\ucd94\uc815\ud558\ub294": [25, 48], "\ub9d0\ud574": [25, 43, 44, 54], "\ub358\uc9c0": 25, "\uc815\ud655\ud788\ub294": 25, "\ucd94\uc815\ud55c\ub2e4": 25, "timestep\uc5d0": 25, "\uad00\ud55cfunction\uc758": 25, "\uacb0\uacfc\uac12\uc740": 25, "\ub9cc\uc871\ud574\uc57c": [25, 43], "foral": [25, 26, 51], "\ub9e4\uc6b0\uc791\uc740": 25, "\uc591\uc218": [25, 43], "\uac12\uc774\ub2e4": [25, 51], "f_": [25, 32, 36, 43, 51, 55], "\uc774\uae30": [25, 30, 51], "\ub9cc\uc871\ud55c\ub2e4": [25, 51], "\uc218\uc2dd\uc740": [25, 38, 54], "\uc99d\uba85\ud558\uae30": 25, "\uc218\uc2dd\uc774\ub2e4": 25, "\uc2ec\uce35\uc2e0\uacbd\ub9dd\uc744": 25, "\ub098\ub258\ub294\ub370": 25, "\ubcf4\ud3b8\uc801\uc73c\ub85c": 25, "distillation\uc774": 25, "\uc9c0\uc218\ud3c9\uade0\uc774\ub3d9": [25, 51], "t_n": [25, 32, 43, 51], "ema\ub97c": [25, 51], "\uc9c0\ud45c\uc774\ub2e4": [25, 51], "\ucd94\uc815\ud55c": [25, 51], "leftarrow": [25, 26, 51], "numer": [25, 36, 43], "ode\ub97c": [25, 51], "ode\ub85c": 25, "\uc608\uce21\uac12\uc774": 25, "\uac19\ub3c4\ub85d": 25, "\ud575\uc2ec\uc774\ub2e4": 25, "cms\uc5d0": 25, "\ucd94\uac00\ud574\uc8fc\uace0": 25, "\uce58\ud658\ud55c\ub2e4": 25, "\uae30\ubc18\uc774\uae30": 25, "\ubcc0\uacbd\ud574\uc900\ub2e4": 25, "cd": [25, 43, 51], "psi_": [25, 51], "timestep\uc774\uc9c0\ub9cc": 25, "\ud558\uc704": [25, 40, 51], "\uac04\uaca9\uc774\ub2e4": 25, "t_i": [25, 32, 36, 43, 51], "lcms\ub294": [25, 51], "cms\uc640": 25, "distillation\ub3c4": 25, "\uc815\uc758\ub418\uc5b4\uc788\ub2e4": 25, "\uc218\ub834\uc774": [25, 56], "\ub2a6\uc5b4\uc9c0\uac8c": 25, "\uc81c\uc2dc\ud588\ub2e4": 25, "k\ub294": 25, "off\ub97c": 25, "20\uc73c\ub85c": 25, "distillation\ud560": 25, "ldms\ub97c": 25, "lcms\uc758": 25, "distillation\ub9cc": 25, "\ub0b4\uc6a9\uc740": [25, 39], "peft": [25, 28], "\uc774\ub780": [25, 49], "knowledg": [25, 42, 43, 53, 55], "prune": [25, 52], "quantiz": [25, 27, 31, 36, 45], "perf": 25, "rola\ub97c": 25, "phi_0": [25, 28], "\ub300\ud558\uc5ec": [25, 33, 45], "tuning\ud558\ub294": [24, 25], "\uac00\uc911\uce58\uac00": 25, "\uc5c5\ub370\uc774\ud2b8": 25, "underset": [25, 45], "y_t": [25, 28], "llm\uc774\ub098": 25, "task\ub85c": 25, "\uc790\uc6d0\uc801": 25, "weight\uc758": 25, "\ucc28\uc6d0\uc740": [25, 33], "\uae30\ub85d\ud558\ub294": 25, "rank\ub97c": 25, "r\ub85c": 25, "\ub0ae\ucd94\uc5b4": 25, "finetuning\ud55c\ub2e4": 25, "a\ub294": 25, "gauissian\uc73c\ub85c": 25, "b\ub294": 25, "zero\ub85c": 25, "d\uc758": [25, 33], "r\ub9cc\ud07c": 25, "matrix\ub9cc": 25, "\ud6a8\uc728\uc801\uc774\uace0": 25, "\ubc29\ubc95\ub4e4\ubcf4\ub2e4": [25, 42], "\ubcf4\uc5ec\uc8fc\uae30\ub3c4": 25, "\uc774\uacf3": 25, "\ucc38\uace0\ud558\uc600\uc2b5\ub2c8\ub2e4": 25, "llm\uc744": [25, 26], "target\uc73c\ub85c": [25, 40], "\ub9cc\ub4e4\uc5b4\uc84c\uae30": 25, "value\uc5d0": [25, 27], "diffusion\uc774\ub098": 25, "finetuning\uc2dc\uc5d0\ub3c4": 25, "\uac04\ub2e8\ud558\uac8c": [25, 39, 44, 51], "arithmetic\uc740": 25, "task\uc5d0\uc11c": [25, 39, 51], "vector\ub77c": 25, "\uc870\ud569\ud558\uc5ec": 25, "\uc815\uc758\ud560": [25, 52, 54], "\uc870\ud569\ud558\uace0": 25, "analogy\ub97c": 25, "distillation\uc5d0": 25, "\uc800\uc790\ub294": [25, 28, 40, 51], "distillation\uc740": [25, 51], "ldms\uc5d0": 25, "\uc77c\uc885\uc758": [25, 44], "tuning\uc73c\ub85c": [25, 45], "\uc5c5\ub370\uc774\ud2b8\ub294": 25, "ba": [25, 28], "\uace0\uc815\ub418\uba70": 25, "pass\ub294": 25, "w_0x": [25, 28], "bax": [25, 28], "\uc704\uc640\uac19\uc774": 25, "lcms\uc5d0": [25, 51], "\ud6a8\uc728\uc801\uc774\ub2e4": [24, 25], "\ud6c8\ub828\uacfc": 25, "\uc2e4\uc0ac\uc6a9\uc774": 25, "5\ub098": 25, "\uc0ac\uc6a9\ud588\uc9c0\ub9cc": [25, 46], "sdxl\uacfc": [25, 35], "ssd": [25, 50], "1b": [25, 33], "segmind": 25, "model\uc5d0\uc11c\ub3c4": 25, "lcd\uc744": 25, "\uc801\uc751\ud558\ub294": 25, "\uc774\uc678\uc5d0\ub3c4": [25, 43], "custion": 25, "datasets\uc5d0": 25, "\uc4f0\uc774\ub294\ub370": 25, "\uc774\uac19\uc740": [25, 51], "style\uc5d0": [25, 38], "lora\uac00": 25, "\ud569\uccd0\uc838": 25, "\ubc1c\uacac\uc774": 25, "arithmetic\uc5d0": 25, "\uad00\uc810\uc73c\ub85c": [25, 39], "\ud574\uc11d\ud560": [25, 39, 54], "\uc8fc\uc7a5\ud558\uc600\ub2e4": 25, "\uc774\ub77c": [25, 28, 54], "dataset\uc5d0\uc11c": [25, 37, 40, 45, 51], "lcms\ub97c": 25, "\uc870\ud569\ub41c\ub2e4": 25, "lambda_1": 25, "lambda_2": 25, "\uc774\ub8e8\uc5b4\uc9c0\uba70": 25, "\ud558\uc774\ud37c\ud30c\ub77c\ubbf8\ud130\ub2e4": 25, "module\uc778": 25, "\uc608\uce21\ud558\uba70": 25, "lora\uc5d0": 25, "fast": [24, 25], "\uc81c\uacf5\ud568": [25, 29, 40], "enhanc": [26, 44], "2305": [26, 36], "13655": 26, "tonylianlong": 26, "groundeddiffus": 26, "\uc774\ub8e8\uc5c8\ub2e4": [26, 37], "numeracy\uc640": 26, "reasoning\uc744": 26, "\ubb38\uc81c\ub4e4\uc774": 26, "\ub808\uc774\uc544\uc6c3": 26, "\ud5a5\uc0c1\uc2dc\ud0a4\ub294": [26, 51], "\ub4f1\uc7a5\uacfc": 26, "\ubc1c\uc804\uc5d0": 26, "\ubc1c\uc804\ub418\uc5b4\uc654\ub2e4": 26, "sdxl": 26, "\ucde8\uc57d\ud55c": [26, 50], "figure\ub97c": 26, "negat": [26, 37], "numeraci": 26, "bind": 26, "relationships\uc5d0\uc11c": 26, "\ud504\ub86c\ud504\ud2b8\uac00": [26, 40], "\ub300\uaddc\ubaa8\uc758": 26, "\uc88b\uc9c0\ubabb\ud55c": 26, "\ub300\uaddc\ubaa8\ub85c": 26, "\ud655\ubcf4\ud558\ub294": 26, "\uc870\ucc28": 26, "\uc77c\uc774\ub2e4": [26, 40], "\ud53c\ud558\uba74\uc11c": 26, "figure\uc758": 26, "\uc811\uadfc\ud55c\ub2e4": 26, "stage1": 26, "\ud45c\ud604\ub418\uc5b4\uc57c": 26, "layout\uc744": 26, "object\ub4e4\uc744": 26, "attribute\uacfc": 26, "parsing\uc744": 26, "\uc62c\ubc14\ub978": 26, "box": [26, 30], "coordinate\uc744": 26, "\ubaa9\ud45c\uc778": 26, "llm\uc758": [24, 26], "generator\ub85c\uc368": 26, "stage2": 26, "stage1\uc73c\ub85c": 26, "box\uc5d0": 26, "caption\uc774": 26, "inference\uacfc\uc815\uc5d0\uc11c": 26, "generation\uc744": [26, 40], "\uac00\ub2a5\ucf00\ud55c\ub2e4": 26, "introduc": 26, "abil": [26, 30, 42, 48, 55], "steer": 26, "shelf": 26, "instanc": [26, 38, 44, 47], "lmd": 26, "enabl": [26, 38, 40], "instruct": 26, "allow": [26, 32, 49], "broader": 26, "assess": 26, "demonstr": 26, "superior": 26, "perform": [26, 28, 30, 41, 45, 49, 55], "recent": 26, "stage1\uacfc": 26, "\ub2e8\uacc4\ub85c\uc368": 26, "box\uc758": 26, "\ub2e8\uacc4\uc5d0\uc11c\uc758": 26, "prompt\uc758": [24, 26, 40], "\uc717\ub2e8\uc778": 26, "instruction\uc744": 26, "llm\uc5d0\uac8c": 26, "\ub9d0\ub4e4\ub85c": 26, "specification\uacfc": 26, "details\ub85c": 26, "\ud574\uc57c\ud560": 26, "\uc77c\ub4e4\uacfc": 26, "prompt\ub77c\uace0": 26, "examples\uc5d0": 26, "output\uac12\uc744": 26, "few": [26, 39, 45, 51], "example\uc744": 26, "llm\uc73c\ub85c": 26, "\uc774\ub04c\uc5b4\ub0b4\ub294": 26, "\ucde8\ud55c": 26, "5\uac19\uc740": 26, "shot\uc744": [26, 45], "gpt4": 26, "\uacbd\uc6b0\ub294": [26, 39, 44], "shot\ub9cc\uc73c\ub85c\ub3c4": 26, "\ub098\uc640\uc788\ub2e4": 26, "caption\uac12\uacfc": 26, "coordinate\uac12\uc73c\ub85c": 26, "llm\uc774": 26, "\ub808\uc774\uc544\uc6c3\uc744": 26, "\ucee8\ud2b8\ub864\ub7ec\ub97c": 26, "\ub3c4\uc785\ud55c\ub2e4": 26, "\ud0dd\ud588\ub358": 26, "work\ub4e4\uc758": 26, "region": [26, 30, 32], "inference\uacfc\uc815\uc5d0\uc11c\uc758": 26, "\uc801\uc6a9\ud558\uc9c0\ub9cc": 26, "\uc81c\uc5b4\ud558\ub294": 26, "\uc778\uc2a4\ud134\uc2a4\ub4e4\uac04\uc758": 26, "\uad6c\ubcc4\uc774": 26, "space\ub098": 26, "map\uc5d0\uc11c": 26, "\ubc1c\uc0dd\ud558\uba70": 26, "\uc778\uc2a4\ud134\uc2a4": 26, "control\uc744": [24, 26], "\ud798\ub4e4\uac8c": 26, "\uc694\uc778\uc774\ub2e4": 26, "\uacbd\uacc4": [26, 33], "\uc0c1\uc790\uc5d0": 26, "\ub9c8\uc2a4\ud0b9\ub41c": [26, 31, 41], "\uc548\ub0b4\ud568\uc73c\ub85c\uc368": 26, "\uac00\ub2a5\ud1a0\ub85d": 26, "\uc774\ub97c\ud1b5\ud574": 26, "\uc778\uc2a4\ud134\uc2a4\uc758": 26, "\ubc30\uce58\uc640": 26, "\ud5c8\uc6a9\ud55c\ub2e4": 26, "stage2\ub294": 26, "step1": 26, "2\ub85c": 26, "\ub098\ub220\uc9c4\ub2e4": 26, "step1\uc758": 26, "stage1\uc5d0\uc11c": 26, "box\ub4e4\uc5d0": 26, "box\ub0b4\uc758": 26, "object\ub4e4\uc5d0": 26, "\uacfc\uc815\uc774\ub2e4": [26, 34], "compose\ud558\uc5ec": 26, "grai": 26, "cat\uc5d0": 26, "box\uac00": 26, "stage1\uc744": 26, "\uad6c\ud574\uc84c\ub2e4\uba74": 26, "box\uc548\uc5d0": 26, "cat\uc774": 26, "\uadf8\ub824\uc9c0\ub3c4\ub85d": 26, "\uc720\ub3c4\ub97c": 26, "\uadf8\ub7ec\ub294": 26, "\uc800": 26, "object\uac00": 26, "gt": [26, 30, 36], "\uac8c": 26, "\uacfc\uc815\uc774\ub77c\uace0": 26, "\uc0dd\uac01\ud558\uba74": [26, 31], "lmd\uc758": 26, "step1\uc5d0\uc11c": 26, "\uc9c4\ud589\ub41c\ub2e4\uace0": 26, "\ubcf4\uc790\uba74": [26, 39], "\uac70\uce58\uba74\uc11c": 26, "box\uc548\uc5d0\uc11c": 26, "\ud558\ub294\uac83\uc774": 26, "\uc120\ud589\uc774": 26, "\ub418\uc5b4\uc57c": [26, 36], "\uc720\ub3c4\ud558\uae30": 26, "box\ub0b4\ubd80\uc5d0": 26, "q_u": 26, "tk_v": 26, "pixel\uac12\ub4e4\uacfc": 26, "prompt\ub0b4\uc5d0\uc11c\uc758": 26, "u\ub294": 26, "pixel\ub4e4\uc744": 26, "\uc758\ubbf8\ud558\uace0": [26, 36, 39, 52], "v\ub294": 26, "\uc608\uc2dc\ub97c": 26, "\uc815\ub9ac\ud558\uc790\uba74": [26, 51, 52, 56], "orang": 26, "grass": 26, "\uc600\ub2e4\uace0": 26, "indoor": [26, 50], "\uad6c\uc131\ud55c\ub2e4": [26, 33, 51], "box\ub97c": 26, "align\ud558\uae30": 26, "\ubcf4\uc774\ub4ef\uc774": [26, 43, 53], "pixel\uc758": 26, "token\uacfc\uc758": 26, "attention\uc740": 26, "\uc99d\uac00\ub418\uace0": 26, "\uac10\uc18c\ub418\uba74": 26, "token\uacfc\ub294": 26, "box\ub0b4\ubd80": 26, "pixel\ub4e4\uc774": 26, "\uc99d\uac00\ud558\uace0": [26, 43], "\uac10\uc18c\ud558\ub3c4\ub85d": 26, "\uc720\ub3c4\ub418\uba74": 26, "\uc720\ub3c4": [26, 37, 46], "energi": [26, 48], "a_i": 26, "topk": 26, "_u": 26, "omega": 26, "i\uc5d0": 26, "v\uac00": 26, "\uc815\ud574\uc838": 26, "pixel\uc774": 26, "v\uc640\uc758": 26, "attention\uac12\uc774": 26, "\uc678\ubd80": [26, 39], "function\uac12\uc774": 26, "\ucd5c\uc18c\uac00": [26, 36], "\ub428\uc744": 26, "\uc791\uc544\uc9c0\uba74": [26, 51], "pixel\ub4e4\uc758": 26, "token\uc5d0": 26, "box\uc640": 26, "object\uac04\uc758": 26, "align\uc774": 26, "\uc774\ub8e8\uc5b4\uc9c0\uac8c": 26, "nabla": [26, 43, 51], "v_i": [26, 36, 52], "\ub418\ub3c4\ub85d": [26, 27, 36, 37, 44], "optimize\uacfc\uc815\uc744": 26, "\uac70\uce5c\ub2e4": 26, "\ub2e8\uc21c\ud558\ub2e4": 26, "descent\ub97c": 26, "\uc0dd\uac01\ud574\ubcf4\uba74": [26, 27, 38], "optimize\ud558\uba74\uc11c": 26, "\ucd5c\uc18c\ub85c": 26, "\ubaa9\ud45c\uc774\uae30\uc5d0": 26, "function\uacfc": [26, 34], "function\uc758": [26, 36, 46, 51], "\uc2dd\ub4e4\uc740": 26, "\uac00\ub2a5\ud558\uae30\uc5d0": 26, "z_t\ub97c": 26, "optimize\ud558\uc5ec": 26, "\ucd5c\uc18c\ud654\uac00": 26, "\ucd5c\uc18c\ud654\ub294": 26, "timestep\ub9c8\ub2e4": 26, "5\ud68c": 26, "\ubc18\ubcf5\ub418\uba70": 26, "\ubc88": [26, 31, 33, 38, 42, 43], "\uc9c4\ud589\ub420": 26, "\ub54c\ub9c8\ub2e4": 26, "\uc120\ud615\uc801\uc73c\ub85c": [26, 31, 51], "\ud69f\uc218\uac00": 26, "1\ud68c\ub85c": 26, "\uc904\uc5b4\ub4e0\ub2e4": 26, "30step": 26, "\ud6c4\uc5d0\ub294": [26, 39], "\uc218\ud589\ud558\uc9c0": 26, "\uc9c4\ud589\ub418\uba74\uc11c": 26, "denoising\uc774": 26, "\ub05d\ub098\uace0": 26, "sam\uc744": 26, "segment\ub97c": 26, "\uc9c4\ud589\ud558\uac70\ub098": 26, "threshold\uac12\uc744": 26, "\uad6c\ud558\uac8c": [26, 48], "\uc774\ud6c4\uc5d0": [26, 30, 31], "map\uc5d0": 26, "mask\uc640": 26, "wise\uacf1\uc744": 26, "\ud574\uc8fc\uc5b4\uc11c": 26, "step2": 26, "step2\uc5d0\uc11c\ub294": 26, "\ucc98\uc74c\uc5d0": [26, 53], "work\uc5d0": 26, "\uc758\ud558\uba74": [26, 39], "denoising\uc758": 26, "\ub2e8\uacc4\uc5d0\uc11c\ubd80\ud130": 26, "detail\ud55c": [26, 27, 37], "compose\ub97c": 26, "compose\ud55c\ub2e4\uc74c": 26, "compose\ud558\uba74\uc11c": 26, "step1\uacfc": 26, "optimize\ub97c": 26, "\uc9c4\ud589\ud558\uba70": 26, "object\ub4e4\uc774": 26, "\uc704\uce58\uc5d0": [26, 32, 45], "\uc774\ud6c4\ubd80\ud130\ub294": 26, "compose\ub098": 26, "compose\uad00\ub828": 26, "step2\uc5d0\uc11c\uc758": 26, "comp": 26, "latentcompos": 26, "\uc9c4\ud589\ub41c": [26, 31], "decoder\ub97c": [26, 27, 36], "\ubcc0\ud658\ub418\uc5b4": 26, "\uc0dd\uc131\ub41c\ub2e4": 26, "qualitive\ud55c": 26, "quantitive\ud55c": 26, "\ub098\ub204\uc5b4\uc11c": 26, "introduction\uc5d0\uc11c": 26, "4\uac00\uc9c0\ub97c": 26, "\ud574\uacb0\ud558\uace0\uc790": [26, 30, 49], "work\uc758": 26, "\ubaa9\ud45c\uc774\uc5c8\uae30\uc5d0": 26, "relationships\ub97c": 26, "100\uac1c\uc529\uc758": 26, "\uc218\uc9d1\ud574\uc11c": 26, "\uc9c4\ud589\uc744": 26, "\ud558\uc600\ub2e4\uace0": [26, 49], "\uc874\uc7ac\ud558\uc9c4": 26, "owl": 26, "detector\ub97c": 26, "box\uac12\uc744": 26, "\uc0dd\uc131\ub418\uc5b4\uc788\ub294\uc9c0\ub97c": 26, "\ud655\uc778\ud558\uc600\ub2e4\uace0": 26, "study\uc5d0": 26, "\uacb0\uacfc\ub4e4\ub3c4": 26, "\ud3ec\ud568\ub418\uc5b4": [26, 31], "qualitit": 26, "results1": 26, "results2": 26, "lmd\uac00": 26, "\uc0dd\uc131\ud558\ub294\uc9c0\ub97c": 26, "\ubcf4\uc5ec\uc8fc\ub294": [26, 44, 45, 47, 53, 54, 55], "sdxl\ubaa8\ub378\uc740": 26, "numarci": 26, "matching\ub4f1\uc744": 26, "\ub9cc\uc871\uc2dc\ud0a4\uc9c0": 26, "lmd\ub294": 26, "\ubb38\uc81c\uc810\ub4e4\uc744": [24, 26], "result\ub294": 26, "\uc81c\uc2dc\ud558\ub294": [26, 53, 56], "model\uc778": [24, 26, 40], "gligen\uc758": 26, "\uac00\uc838\uc640\uc11c": [26, 30], "\uc138\uac00\uc9c0\ub97c": 26, "sd\uc5d0": [24, 26], "\uc6d4\ub4f1\ud55c": 26, "\uc55e\uc120": [24, 26, 30], "\uc131\ub2a5\uc784\uc744": 26, "sdv1": 26, "sdv2": 26, "1\uc744": [26, 33, 35], "\uc368\ubcf8": 26, "\ub458\uc758": 26, "\uc5c6\uc5c8\ub2e4\uace0": [26, 44], "\uad6c\ud558\uae30": 26, "threshold\ub97c": 26, "\uacb0\uacfc\ub3c4": [26, 47, 54, 55, 56], "\ubcf4\uc5ec\uc8fc\uc5c8\ub294\ub370": 26, "lmd\uc77c\ub54c\uc640": 26, "\uc77c\ub54c\uac00": 26, "lmd\uc5d0\uc11c\ub294": 26, "attention\uae30\ubc18\uc758": 26, "\uad00\ub828\ud558\uc5ec": 26, "\uc815\ud655\ud558\uc9c0": [26, 55], "sam\uc740": 26, "\ub9c8\uc2a4\ud06c\ub97c": 26, "\ub3c4\uc6c0\uc744": [26, 30, 36, 37], "\uc900\ub2e4": 26, "\uc81c\uac70\ud558\uba74": [26, 44], "lmd\uc5d0\uc11c": 26, "\ubc1c\uc0dd\ud558\uac8c": 26, "gligen": [26, 53], "\uac00\uc838\uc640": [26, 31, 35, 41], "\uc774\uc6a9\ud558\uae30": 26, "sam\uc774": 26, "\uc54a\uac8c\ub41c\ub2e4": 26, "\uc624\ud788\ub824": [26, 39], "\uc120\ud0dd\ud558\uc5ec": [26, 33, 45], "\ud63c\ub780\uc744": 26, "\uc77c\uc73c\ud0a4\uace0": 26, "\uc800\ud558\uc2dc\ud0a4\uac8c": 26, "stage2\uc5d0": 26, "\uac00\uc838\uc654\ub290\ub0d0\uc5d0": 26, "quantative\ud55c": 26, "lmd\uae30\ubc95\uc744": 26, "pretrain\ub41c": 26, "\uc5ec\uae30\uc5d0": [24, 26, 36], "\uae30\ubc95\uc5d0": 26, "stage2\uc758": 26, "gligen\uc744": 26, "\uac00\uc838\uc628": 26, "gligen\uc5d0": 26, "gpt3": [26, 45], "5\ub300\uc2e0": 26, "gpt4\ub97c": 26, "\uacbd\uc6b0\ub85c": [26, 56], "gpt4\ub85c": 26, "\ubc14\uafbc": 26, "numeracy\uc5d0\uc11c\ub294": 26, "\uc0b4\uc9dd": 26, "\uac10\uc18c\ud558\uc9c0\ub9cc": [26, 43], "version": 26, "gpt\uac19\uc740": 26, "turbo\ub97c": 26, "gpt4\uc758": 26, "\ud574\ub3c4": 26, "7shots\uc73c\ub85c": 26, "\uc644\ubcbd\ud558\uac8c": [26, 30], "\ud574\ubcf8": 26, "gpt4\uac00": 26, "\ubcf4\uc600\uc74c\uc744": 26, "\ud06c\uae30\ub85c": [26, 36, 55], "\uc801\uc6a9\ud574\ubcf8\uacb0\uacfc": 26, "\uc798\uc774\ud574\ud560": 26, "intermedi": [26, 42, 55], "representation\uc778": 26, "\ud2b9\uc9d5\uc774\ub77c\uace0": [26, 38], "stage\ub85c": [26, 37], "generation\uacfc": 26, "10752": 27, "compvi": 27, "namkyeong": [27, 57], "31": [27, 35, 40, 47], "\ud559\uc2b5\ud558\ub294\uc9c0": 27, "off\ub85c": [27, 46], "\ubd84\uc11d\ud560": 27, "analysi": 27, "2\ub2e8\uacc4\ub85c": 27, "autoencoder\ub97c": [27, 35], "\uc778\uac04\uc758": [27, 29], "\uc778\uc9c0\uc640": 27, "\ub4e4\uba74": 27, "\uace0\uc591\uc774": [27, 35, 44], "\uadc0": 27, "\uaf2c\ub9ac": 27, "\uc778\uc9c0\ud55c": 27, "\uc555\ucd95": [27, 31, 45], "\ub118\uc5b4\uac00\uba74": 27, "\ube44\ud2b8\ub97c": 27, "\uc368\ub3c4": 27, "\uc778\uc9c0\ud560": 27, "rate\uac00": 27, "\ub118\uc5b4\uac00\ub294": [27, 54], "\uc778\uc9c0": [27, 44], "\uc548\ub428": [27, 30], "\uc500": [27, 32], "1\ube44\ud2b8": 27, "\ud751": 27, "\ubc31": 27, "8\ube44\ud2b8": 27, "\uc0c9": 27, "\ud14d\uc2a4\uccd0": 27, "\uc0ac\uc774\uc988\uc758": [27, 29, 49], "\uc555\ucd95\ud558\uc5ec": [27, 41], "\uc904\uc784\uc73c\ub85c\uc368": 27, "\ud559\uc2b5\ud558\uc9c0": [27, 48], "\uc758\ubbf8\uc640": 27, "\ub9d0\ud568": 27, "\uc18d\uc758": 27, "\uac1c\uac00": 27, "\uc5b4\ub514\uc5d0": [27, 30, 32], "\uc704\uce58\ud558\ub294\uc9c0": 27, "\ubc30\uacbd\uc774": [27, 44], "\uc0c1\ud669\uc778\uc9c0": 27, "\ud559\uc2b5\ud568": [27, 30, 45], "\uc218\ud589\ud558\ub098": 27, "\ucd5c\uc801\ud654\ub098": 27, "\ucc3e\uae30": [27, 40], "\ubd88\uc548\uc815\uc131": 27, "\uc0dd\uc131\uc790": 27, "\ud310\ubcc4\uc790": 27, "\uc190\uc2e4\ud568\uc218": [27, 48, 56], "\uc720\ud615\uc758": [27, 33], "gan\ubcf4\ub2e4": [27, 45, 46], "quality\uac00": 27, "flow\ub294": 27, "\ubaa8\ub378\ub9c1\ud558\uae30": [27, 38], "\uac00\uc5ed\uc801\uc778": 27, "\ube44\uc120\ud615": 27, "\uc544\uc774\ub514\uc5b4": [27, 30], "\ud568\uc218\ub4e4\uc744": 27, "\uc7a0\uc7ac\ubcc0\uc218\ub85c": 27, "\ubc18\ub300\ub85c": [27, 40], "\uc7a0\uc7ac\ubcc0\uc218\ub97c": [27, 56], "\uc5ed\ubcc0\ud658": [27, 32], "\uc591\ubc29\ud5a5": 27, "\ubcc0\ud658\uae30\ub97c": 27, "\uc7a0\uc7ac\ubcc0\uc218\ub85c\ubd80\ud130": 27, "\ucd94\uc815\uc5d0\uc11c": 27, "\ub2ec\uc131\ud558\ub098": 27, "\ube44\uc6a9\uacfc": 27, "\uc81c\uc57d\ub41c\ub2e4": 27, "\uc778\uc9c0\ud558\uae30": 27, "\ud3ec\ud568\ud558\uc5ec": 27, "likelihood": [27, 45, 46, 48, 56], "\uc6a9\ub7c9\uacfc": 27, "\uc18c\ube44\ud55c\ub2e4": 27, "\ub4f1\uc5d0\uc11c": 27, "\ubcf4\uc774\ub098": [27, 36], "\ub2e8\uc704\uc5d0\uc11c\uc758": 27, "\ud3c9\uac00\uc640": 27, "speed\uc640": 27, "\uc720\ubc1c\ud55c\ub2e4": 27, "vq": [24, 27, 54], "\uc591\uc790\ud654": 27, "quantis": 27, "\uc5f0\uc18d\ud615": 27, "\ubca1\ud130\ub97c": [27, 32, 33, 42], "\uc815\ud55c": 27, "codebook": [27, 31], "\ubca1\ud130\uc640\uc758": 27, "\uacc4\uc0b0\ud558\uc5ec": [27, 33], "\uc720\uc6a9\ud558\uba74\uc11c": 27, "vae\uc758": 27, "\uc6d0\ub9ac\ub97c": [27, 32], "\uc591\uc790\ud654\ud558\uc5ec": 27, "\ubcc0\ud658\ud558\uba74\uc11c": 27, "\uc624\ub298": [27, 38], "\uc54c\uc544\ubcfc": [27, 38, 50, 55], "model\uc774\ub2e4": [27, 37], "\ub2e4\ub918\ub358": [27, 38], "\ucef4\ud4e8\ud130": [24, 27], "\uc18c\ubaa8\ub97c": 27, "\uc5bb\ub294\uac83\uc774": 27, "\ubaa9\ud45c\uc774\ub2e4": [27, 41], "\uc8fc\uc5b4\uc84c\uc744\ub54c": 27, "\ud1b5\ud574\uc11c": [27, 38, 42], "\ub514\ucf54\ub529\uc744": 27, "\ud14c\uc2a4\ud2b8\ub97c": 27, "\ucee4\uc9c0\uc9c0": 27, "divergence\uc640": 27, "\ud65c\uc6a9\ud558\uc600\ub2e4": [27, 36], "\uc774\ubbf8\uc9c0\uc678": 27, "\ud14d\uc2a4\ud2b8\ub098": 27, "semat": 27, "map\uacfc": 27, "\uc815\ubcf4\ub294": [27, 41, 52], "\uc804\ub2ec\uc744": 27, "\ud558\uc600\uace0": [27, 30, 39], "phi_i": 27, "_k": [24, 27], "\uc815\uc758\ub418\uace0": [27, 54, 56], "\uc911\uac04\uc758": 27, "matrix\uc774\ub2e4": 27, "\ud574\ub2f9\ud558\uba70": 27, "\ud45c\ud604\ub41c\ub2e4": [24, 27, 35], "\uc8fc\ubaa9\ud560\ub9cc\ud55c": [27, 51], "dm": [24, 27, 41, 46], "function\uc73c\ub85c": [27, 29, 36, 45, 46], "\uc9c4\ud589\uc2dc\ud0a4\ub294\ub370": 27, "\ubc14\uafb8\uba74\uc11c": 27, "\uc591\uc744": [27, 35], "\uc904\uc600\ub2e4\ub294": 27, "\uc810\uc774\ub2e4": [24, 27, 35, 46], "\uc5f0\uad00\uc9c0\uc73c\uba74": 27, "\uc0dd\uac01": [27, 40], "varepsilon": [27, 51], "frequenc": [27, 32, 35, 45, 53], "detail\uc774": 27, "\uace0\ucc28\uc6d0": [27, 32, 53], "\uacf5\uac04\uacfc": 27, "\uc801\ud569\ud568": 27, "bit\uc5d0": 27, "\ucc28\uc6d0\uc5d0\uc11c": [27, 29, 41], "\ucc28\uc6d0": [27, 32], "\uac1c\uc218\uc5d0": [27, 45, 47], "resourc": [27, 41], "1\ub300\ub85c": 27, "\ucc28\uc6d0\uc5d0": [27, 41], "\uadf8\ub798\ud504\ub294": [27, 39], "\uc218\ub834\ud558\ub294\ub370": 27, "\uc18c\uc694\ub418\ub294": 27, "fidelti": 27, "\ub370\uc774\ud130\uc640": [27, 33, 39, 47, 48, 56], "\uc815\uccb4\ub41c": 27, "perceptual\uacfc": 27, "bit": 27, "ratio": [27, 35, 54], "compression\uc740": 27, "\ubd80\ubd84\uc5d0\uc11c": [27, 47, 54], "\uc555\ucd95\uc774": 27, "\uc9c0\ub098\uce58\uac8c": [24, 27], "\ubc1c\uc0dd\ud558\uc5ec": 27, "\uc81c\ud55c\ub428": 27, "hq\uc640": 27, "imagenet\uc5d0\uc11c": 27, "sampler\ub97c": [24, 27], "\ube44\uad50\ud568": 27, "imagenet\uc774": 27, "dataset\uc784": 27, "point\uac00": 27, "5\uac1c\uc778\ub370": 27, "\uc624\ub978\ucabd\ubd80\ud130": 27, "\uc67c\ucabd\uc73c\ub85c": 27, "\uc758\ubbf8\ud568": [27, 39], "outperform\uc744": 27, "\ubc1c\ud718\ud568": 27, "\uc18d\ub3c4\ub3c4": 27, "\ub290\ub9bc": 27, "ffhq": [27, 37, 38], "bedrooms\ub85c": 27, "manifold\uc758": 27, "coverag": 27, "\ucd5c\uace0": 27, "lsgm": 27, "adm\ubcf4\ub2e4": 27, "\uc808\ubc18\uc758": 27, "parameter\uc640": 27, "\uc5bb\uc74c": [27, 31], "methods\uc744": 27, "\uac1c\uc120\ud574": 27, "cover": 27, "training\uc758": 27, "recall\uc744": 27, "45b": 27, "400m\uc73c\ub85c": 27, "bert": [27, 28], "layout\uc774": 27, "\uc5f0\uacb0\ud588\uc744": 27, "inpainting\uc5d0": 27, "landscap": 27, "version\uc744": 27, "384x384": 27, "openimages\ub85c": 27, "\ub4e4\uc5b4\uac10": [27, 30], "\ubb3c\uccb4\ub97c": [27, 36], "\ubcf5\uad6c\ub41c": 27, "effeci": 28, "\uace0\uc815\ud55c": 28, "\ucc44\ub85c": [28, 30], "fc": [28, 32], "\uc5f0\uc0b0\ub7c9\uc744": 28, "parameter\ub294": [28, 36, 39], "10000\ubc30": 28, "\uba54\ubaa8\ub9ac\ub294": 28, "3\ubc30\ub97c": 28, "latency\uac00": 28, "\ud29c\ub2dd\ud558\ub294": 28, "\ud30c\ub77c\ubbf8\ud130\ub9cc\uc744": 28, "\ud29c\ub2dd\ud568\uc73c\ub85c\uc368": 28, "\uc790\uc6d0\uc73c\ub85c\ub3c4": 28, "\ud558\ub294\uac83": 28, "upstream": 28, "\ud559\uc2b5\uc2dc\ud0a4\ub294\uac83": 28, "\uc694\uccad\uc758": 28, "\uc2dc\uc791\ubd80\ud130": 28, "\uc644\ub8cc\uae4c\uc9c0": 28, "\uac78\ub9ac\ub294": 28, "llm\uc740": 28, "\uc2dc\ud0b4": [28, 30, 39], "tuning\uc5d0\uc11c": 28, "\ud559\uc2b5\uc2dc\ud0a4\uba74": [28, 36, 42], "roberta": 28, "\ub2ec\uc774": 28, "\uac78\ub9bc": 28, "intrins": [28, 32], "\uae30\ubc18\ud558\uace0": 28, "\uc0ac\uc2e4\uc5d0": 28, "\uac16\uace0": 28, "\uac00\uc815\ud568": [28, 45], "\uace0\uc815\ud558\uace0": [28, 45], "matrices\ub97c": [28, 36], "\uc2dc\ud0a4\uae30\ub85c": 28, "decomposition\ub41c": 28, "\uc791\uc544": 28, "3\ubc30\uae4c\uc9c0": 28, "\ubc14\uafd4\uc8fc\uba74": 28, "switch": 28, "overhead\ub97c": 28, "\uae30\ubc95\ub4e4\uacfc": 28, "\uac00\ub2a5\ud558\ub2e4\ub294": [28, 51, 52], "\uc7a5\uc810\uc774": [24, 28, 48, 52, 55], "w_q": [24, 28, 55], "w_k": [24, 28, 55], "w_v": [24, 28, 55], "w_o": 28, "accumulated\ub41c": 28, "convention\uc744": 28, "optimizer\ub294": 28, "adam\uc744": 28, "\uc774\uc6a9": [28, 29, 32], "feedforward": [28, 29], "ffn": 28, "agnostic\ud558\uc9c0\ub9cc": 28, "\uc9d1\uc911\ud568": 28, "agnost": [28, 45], "\uad6c\uc560\ubc1b\uc9c0": 28, "\ud574\uc11d\uc774": 28, "parameterized\ub41c": 28, "x_i": [28, 56], "y_i": 28, "target\uc30d\uc73c\ub85c": 28, "maximize\ud558\uae30": 28, "\uc5c5\ub370\uc774\ud2b8\ub428": 28, "\ud559\uc2b5\ud574": [28, 40, 41], "\uc5c4\uccad\ub09c": [28, 32, 54], "\uc804\uccb4\uac00": 28, "\uadf8\ubcf4\ub2e4": 28, "\ucc3e\uc544\ub0b4\ub294": 28, "\ubc14\ub00c\uae30": 28, "effecient\ud574\uc9d0": 28, "01": [24, 28], "\uc791\uc544\uc9c8": 28, "\uae30\uc874\uc5d0\ub3c4": 28, "learning\uc5d0\uc11c": [28, 45], "effecient\ub97c": 28, "\uac00\uc9c0\uac00": 28, "hardwar": 28, "parellelism\uc774": 28, "\uc5c6\ub2e4\uba74": 28, "\ucd94\uac00\ud574\ub3c4": 28, "\uc99d\uac00\ud574": 28, "\uc5b4\ub824\uc6e0\uc74c": 28, "prefix": 28, "tuning\uc740": [28, 39, 40], "optimize\uac00": 28, "\uacf1\ud574\uc9c4": 28, "vector\ub07c\ub9ac": 28, "wise\ud558\uac8c": 28, "scaling\ub428": 28, "rate\ucc98\ub7fc": 28, "tuning\ud574\uc11c": 28, "r\uacfc": 28, "\uc0ac\uc6a9\ud55c\ub2e4\uace0": [28, 42, 52], "lora_a": 28, "new_zero": 28, "num_embed": 28, "lora_b": 28, "embedding_dim": 28, "lora_alpha": [28, 53], "requires_grad": [28, 48], "reset_paramet": 28, "hasattr": 28, "wai": [28, 53], "zeros_": 28, "normal_": [28, 56], "bool": 28, "merge_weight": 28, "sure": 28, "transpos": [28, 29], "mark": 28, "after_a": 28, "padding_idx": 28, "max_norm": 28, "norm_typ": 28, "scale_grad_by_freq": 28, "spars": [28, 45, 55], "\ud558\ub77d\uc774": 28, "\uacbd\uc6b0\uc5d4": 28, "\ucd5c\uc18c\ud654\ud558\uae30": [28, 32, 44], "\uc801\uc6a9\ud558\uace0": 28, "\uace0\uc815\ud568": 28, "175b\ub97c": 28, "vram\uc740": 28, "2tb\uc5d0\uc11c": 28, "350gb": 28, "checkpoint": [28, 37, 53], "size\ub294": [28, 51], "350gb\uc5d0\uc11c": 28, "35mb\ub85c": 28, "\uc904\uc784": 28, "\ube68\ub77c\uc9d0": 28, "\uacbd\uc6b0\uc5d0\uc11c": 28, "accuraci": [28, 42], "transformer\uc5d0\uc11c": [28, 45], "matrix\uc5d0": 28, "matrices\uc5d0": 28, "\uc88b\uc558\uc74c": 28, "\ub274\ub7f4\ub124\ud2b8\uc6cc\ud06c\uc758": 28, "activation\uc744": 28, "\uc904\uc774\uae30\ub3c4\ud558\uace0": 28, "\ub298\ub9ac\uae30\ub3c4\ud558\ub294": 28, "\uc911\uac04\uc5d0": 28, "\uc0bd\uc785\ud558\ub294": 28, "lora\ubcf4\ub2e4": 28, "\uc54c\ub824\uc838\uc788\uc73c\uba70": 28, "3\ub97c": 28, "\ud588\uc744\ub54c": 28, "\ubcf4\ub2e4\ub3c4": [28, 49], "\uc8fc\uc7a5\ud558\uace0": 28, "\ud559\uc2b5\uc2dc\uac04\ub3c4": 28, "\uc9e7\uc544": 28, "30\ubd84\ub9cc\uc5d0": 28, "\ud29c\ub2dd\ud560": [28, 29], "loralib": 28, "\uc124\uce58": 28, "pip": 28, "instal": 28, "altern": [28, 48], "git": 28, "microsoft": 28, "befor": 28, "in_featur": 28, "out_featur": 28, "after": 28, "add": [28, 47, 53], "parameter\ub9cc": 28, "bigmodel": 28, "string": 28, "lora_": 28, "mark_only_lora_as_train": 28, "loop": [28, 55], "dataload": [28, 48], "\uc800\uc7a5\ud560": 28, "\ub54c\uc5d4": 28, "state_dict": 28, "\uc800\uc7a5\ud558\uac8c": 28, "save": 28, "checkpoint_path": 28, "lora_state_dict": 28, "\ubd88\ub7ec\uc62c": 28, "load_state_dict": 28, "strict": 28, "load": [28, 35, 47], "ckpt_pretrain": 28, "pt": [28, 45], "ckpt_lora": 28, "\ud29c\ub2dd": [28, 41], "gpu\ub85c": [28, 37], "\uac00\ub2a5\ud560\uae4c": [28, 39], "\uc18c\uac1c\ud569\ub2c8\ub2e4": [28, 39, 43, 47, 50, 52, 55, 56], "da": 28, "nhctrrve": 28, "14792": 29, "26": [29, 32, 43, 56], "lucidrain": 29, "\ud558\uc600\uc74c": [29, 30], "\ubc29\ub300\ud558\ub2e4\ub294": 29, "\uc720\uc9c0\ud568": 29, "tensor\ub97c": 29, "\ubd84\ud574\ud558\uc5ec": 29, "\uadfc\uc0ac\ud654": 29, "\uc5b4\ud50c\ub9ac\ucf00\uc774\uc158\uc5d0": 29, "pipeline\uc744": 29, "\uc778\ud130\ub137\uc744": 29, "\ud655\ubcf4\ub420": 29, "\uc218\uc9d1\ud558\uae30\ub294": 29, "\uc874\uc7ac\ud558\ub294\ub370": 29, "\ub0ad\ube44\uc77c": 29, "\ube44\uc9c0\ub3c4": 29, "\ub808\uc774\ube14\uc774": [29, 33], "\uc9c0\uc815\ub418\uc9c0": 29, "\ud398\uc5b4\ub9c1\ub41c": 29, "\ube44\ub514\uc624\ub9cc\uc73c\ub85c": 29, "\uc138\uc0c1\uc758": 29, "\uac1c\uccb4\uac00": 29, "\uc6c0\uc9c1\uc774\uace0": 29, "\uc791\uc6a9\ud558\ub294\uc9c0": 29, "t2v\ub85c": 29, "\ud544\uc694\uc131\uc744": 29, "\uc6b0\ud68c\ud55c\ub2e4": 29, "\uace0\ud504\ub808\uc784\ub960": 29, "\uc2dc\uc2a4\ud15c\uacfc": [29, 33], "\ud3c9\uac00\ud55c\ub2e4": 29, "\uc81c\ub85c\uc0f7": 29, "300\uac1c\uc758": 29, "\uc218\uc9d1\ud558\uc5ec": 29, "\uacf5\uac1c\ud560": 29, "\uacc4\ud68d\uc774\ub2e4": 29, "\uc2e0\uacbd\ub9dd\uc758": 29, "\ucc28\uc6d0\uc73c\ub85c": 29, "\uc2dc\uacf5\uac04": 29, "\uc2e0\uacbd\ub9dd\uacfc": 29, "\uc218\uc2dd": [29, 36, 56], "sr_h": 29, "sr": [29, 49], "t_l": 29, "uparrow_": 29, "bpe": [29, 45], "c_x": [29, 53], "\uacf5\uc720\ud558\uc600\ub2e4": 29, "x_e": 29, "y_e": 29, "_l": 29, "sr_l": 29, "d\uc5d0\uc11c": 29, "768x768": [29, 51], "\ud53d\uc140\ub85c": 29, "\uc99d\uac00\uc2dc\ucf1c": 29, "\ubcc0\ud658\ub41c\ub2e4": 29, "fp": [29, 41], "2\ucc28\uc6d0": [29, 32], "\ub2e4\uc74c\uc758": [29, 33, 42], "\uc218\uc815\ud55c\ub2e4": 29, "layers\ub294": 29, "\uc815\ubcf4\ub9cc": 29, "\ucd94\uac00\ud574\uc8fc\uba74": 29, "\ub9cc\ub4e4\uac8c": 29, "16\uac1c\uc758": 29, "\uc0ac\uc774\ub97c": 29, "\ubcf4\uac04\ud558\uc5ec": 29, "\uc99d\uac00\uc2dc\ud0a8\ub2e4": 29, "\ub124\ud2b8\uc6cc\ud06c\uc5d0\ub294": 29, "\ud658\uac01": 29, "\ud3ec\ud568": [29, 31, 47], "\uc794\uc0c1\uc774": 29, "\uc0dd\uae30\uc9c0": [29, 44], "\uc54a\uc73c\ub824\uba74": 29, "\ud658\uac01\uc774": 29, "\uc804\uccb4\uc5d0": [29, 32, 56], "\uc720\uc9c0\ud574\uc57c": 29, "\uc815\ubcf4\ub098": 29, "\uac00\uc0c1\uc73c\ub85c": 29, "\uc218\ud589\ud558\ub294": 29, "\ubaa8\ub4c8\uc778": 29, "\ubaa8\ub4c8\ub85c": 29, "\ub9cc\ub4e4\uae30\uc5d4": 29, "\uba54\ubaa8\ub9ac": [29, 41, 51], "\ubd80\uc871\uc73c\ub85c": 29, "\uc5b4\ub824\uc6e0\ub2e4": [29, 36], "\ucd08\uae30\ud654\ub97c": 29, "\uc804\ubc18\uc5d0": [29, 33], "\ud658\uac01\uc744": 29, "\ucee8\ubc8c\ub8e8\uc158": 29, "\ucee8\ubc8c\ub8e8\uc158\uc744": 29, "\uc313\ub294\ub2e4": 29, "\ucee8\ubc8c\ub8e8\uc158\uc758": 29, "load\ub97c": 29, "\ucee8\ubcfc\ub8e8\uc158": 29, "\ub808\uc774\uc5b4\uc640": 29, "\uc0ac\uc774\uc5d0": [29, 51], "\uacbd\uacc4\ub97c": 29, "information\uc744": [24, 29], "\ucc44": [29, 47, 52], "\ud150\uc11c": 29, "height": 29, "2_d": 29, "1_d": 29, "\ud568\uc218\ub85c": [29, 32, 43], "layer\uc5d0": [24, 29, 38], "\ud655\uc7a5\ud558\uc600\ub2e4": 29, "layer\ucc98\ub7fc": 29, "attenion": 29, "\uc313\uc544": 29, "\uadfc\uc0ac\ud654\ud558\ub294": [29, 56], "flatten": 29, "\ucd95\uc5d0": [29, 35], "flatten\ud558\ub294": 29, "\uc5f0\uc0b0": [29, 45], "hw": 29, "attn_": 29, "spatiotemporalattent": 29, "add_feed_forward": 29, "ff_mult": 29, "pos_bia": 29, "flash": 29, "causal_time_attn": 29, "assert": [29, 38, 42], "compat": [24, 29], "spatial_attn": 29, "spatial_rel_pos_bia": 29, "continuouspositionbia": 29, "num_dim": 29, "temporal_attn": 29, "causal": [29, 45], "temporal_rel_pos_bia": 29, "has_feed_forward": 29, "ff": [29, 53], "mult": 29, "enable_tim": 29, "is_video": 29, "ndim": 29, "bxf": 29, "hxw": 29, "space_rel_pos_bia": 29, "rel_pos_bia": 29, "bxhxw": 29, "time_rel_pos_bia": 29, "\ub780": [29, 41], "\ubd80\ub4dc\ub7fd\uac8c": 29, "\ub9cc\ub4e4\uace0": [29, 39, 40], "\uc5f0\uc7a5": 29, "\ubcf4\uac04\ud558\uace0": 29, "extrapolation\uc744": 29, "extrapol": 29, "\ubbf8\ub798\uc758": 29, "\uc608\uce21\ud558\uac70\ub098": 29, "spatialtempor": 29, "\uc81c\ub85c": 29, "\ud328\ub529\ud558\uace0": 29, "\uc5c5\uc0d8\ud50c\ub9c1\uc744": 29, "interpolation\uc744": 29, "\ud29c\ub2dd\ud55c\ub2e4": 29, "\uc785\ub825\uc5d0": 29, "\ub9c8\uc2a4\ud0b9": 29, "\ub9c8\uc2a4\ud0b9\ub418\ub294": 29, "\ubc14\uc774\ub108\ub9ac": 29, "\ucc44\ub110": [29, 32, 35, 41], "skips\uacfc": 29, "rate\ub97c": [29, 40], "f\ub97c": 29, "5\ub85c": 29, "16\ud504\ub808\uc784": 29, "76\ud504\ub808\uc784": 29, "x5": 29, "\uc5c5\uc0d8\ud50c\ub9c1": [29, 33], "\ub05d": 29, "\ub9c8\uc2a4\ud0b9\ud558\uc5ec": 29, "\uc560\ub2c8\uba54\uc774\uc158\uc5d0\ub3c4": 29, "\uc694\uc18c\ub4e4\uc740": 29, "\ud29c\ub2dd\ud558\uc9c0": 29, "\ub9cc\uc73c\ub85c": [29, 43, 50, 51, 53], "decoder\ub294": 29, "\uc911\uc5d0": [29, 30, 33, 45], "\ub4e4\uc5b4\uc628": 29, "\ubc1b\ub294\ub2e4": [29, 33], "\ub05d\ub098\uba74": 29, "\ucd08\uae30\ud654\ud558\uc5ec": [29, 33], "16\ud504\ub808\uc784\uc774": 29, "\ubc94\uc704": [29, 49, 50, 54], "\uc2dc\uc791\ud558\uace0": [29, 54], "\uc774\ud6c4\uc5d0\ub294": [29, 36], "\uc804\ud658\ud55c\ub2e4": [29, 41], "\ub124\ud2b8\uc6cc\ud06c\ub294": 29, "\ub514\ucf54\ub354\ub85c\ubd80\ud130": 29, "\ud29c\ub2dd\ub41c\ub2e4": 29, "5b": [29, 37, 42, 51], "3b\uc758": 29, "nsfw": 29, "\uc720\ud574\ud55c": [29, 30], "5\ubcf4\ub2e4": 29, "\ud544\ud130\ub9c1\ud558\uc600\ub2e4": 29, "Not": 29, "safe": [29, 42], "For": [29, 32], "\uc120\uc815\uc801\uc774\uac70\ub098": 29, "\uc74c\ub780\ud558\uac70\ub098": 29, "\ud3ed\ub825\uc801\uc778": 29, "\ucf58\ud150\uce20": 29, "10m\uacfc": 29, "hd": 29, "vila": 29, "100m": 29, "10m\uc744": 29, "100m\uc744": 29, "\uc561\uc158": 29, "\uace0\uc548\ub418\uc5c8\uc73c\uba70": 29, "\ud658\uacbd\uc5d0\uc11c": 29, "\ube44\ub514\uc624\uc640": 29, "\ud074\ub798\uc2a4\uc5d0": [29, 42], "\ud15c\ud50c\ub9bf": 29, "\uc791\uc131\ud558\uace0": 29, "fretchet": 29, "\uce21\uc815\ud55c\ub2e4": 29, "train\uc14b\uacfc": 29, "\uc138\ud2b8\uc758": 29, "59": 29, "794": 29, "\ucea1\uc158\uc5d0": 29, "clipsim": 29, "amazon": 29, "turk": 29, "amt": [29, 44], "\uc218\uc9d1\ud558\uc600\ub2e4": 29, "annotator\ub4e4\uc5d0\uac8c": 29, "\uc2dc\uc2a4\ud15c\uc774": 29, "\uc2f6\uc740\uc9c0": 29, "\ubb3c\uc5b4\ubd24\ub2e4": 29, "\ubd88\uc644\uc804\ud558\uac70\ub098": 29, "\ucd94\uc0c1\uc801\uc774\uac70\ub098": 29, "\ubd88\ucf8c\uac10\uc744": 29, "\ud544\ud130\ub9c1": [29, 30], "\uce74\ud14c\uace0\ub9ac": [29, 30, 33], "\ub3d9\ubb3c": [29, 42], "\ud310\ud0c0\uc9c0": 29, "\uc790\uc5f0": [29, 40], "\ud48d\uacbd": [29, 35], "\uc74c\uc2dd": 29, "\uc74c\ub8cc": 29, "\uc2dd\ubcc4\ud558\uace0": 29, "\uc120\ud0dd\ud558\uc600\ub2e4": 29, "\ub370\uc5d0": 29, "\uc0ac\uc6a9\ub418\uc9c0": 29, "\uc720\uc9c0\ud588\ub2e4": 29, "imagen\uc758": [24, 29, 39], "drawbench": 29, "\ud504\ub86c\ud504\ud2b8\ub3c4": 29, "vedio": 29, "faithfulness\ub97c": 29, "\ud3c9\uac00\ud558\uc600\ub2e4": [29, 33], "\uc21c\uc11c\ub85c": 29, "\uc88b\uc740\uc9c0": 29, "annotator\uc5d0\uac8c": 29, "\ubb3c\uc5b4\ubcf8\ub2e4": 29, "vdeio": 29, "faith": [24, 29, 34], "\ubcf4\uac04": 29, "film\uc758": 29, "\uc0ac\uc2e4\uac10\uc744": 29, "\ube44\uad50\ud558\uae30": [29, 40], "\ud3c9\uac00\ub3c4": 29, "5\uba85\uc758": 29, "\uac01\uae30": 29, "annotator\uc758": 29, "\ub4dd\ud45c\ub97c": 29, "vtt\uc5d0": 29, "\ubcf4\uace0\ud558\ub294": 29, "godiva": 29, "nuwa": 29, "\uc911\uad6d\uc5b4\uc640": 29, "\uc601\uc5b4\ub97c": 29, "cogvideo": 29, "\ucd94\ub860\uc744": [29, 42], "\uc218\ud589\ud558\uc600\ub2e4": [29, 33], "\uc0f7": 29, "\uc6b0\uc218\ud558\ub2e4": 29, "finetunning\uc744": 29, "\uacb0\uacfc\uc5d0\uc11c\ub3c4": 29, "\ub2ec\uc131\ud558\uc600\ub2e4": 29, "drawbench\uc640": 29, "\ud14c\uc2a4\ud2b8\uc14b\uc5d0": 29, "cogvideo\uc640": 29, "vdm\uc758": 29, "\uc6f9": 29, "\ud398\uc774\uc9c0\uc5d0": 29, "\ud45c\uc2dc\ub41c": [29, 39], "28\uac1c\uc758": 29, "8\uac1c\uc758": [29, 32], "8\ubc88": 29, "\ud3c9\uac00\ud558\uc5ec": 29, "76x256x256": 29, "\ud3c9\uac00\uc790\uac00": 29, "\ub0ab\ub2e4\uace0": 29, "\ud22c\ud45c\ud55c": 29, "\ud37c\uc13c\ud2b8": 29, "\ube44\uc728": [29, 35], "video\uac00": 29, "film\uc744": 29, "drawbench\uc758": 29, "\uc800\ud504\ub808\uc784\ub960": 29, "4fps\uae4c\uc9c0": 29, "\uc5c5\uc0d8\ud50c\ub9c1\ud55c\ub2e4": 29, "\ud3c9\uac00\uc790\ub4e4\uc740": 29, "62": 29, "drawbench\uc5d0": 29, "54": [29, 36], "\ucee4\uc11c": [29, 37, 44], "\ubb3c\uccb4\uac00": [29, 32], "\uc6c0\uc9c1\uc774\ub294\uc9c0\uc5d0": 29, "\uc9c0\uc2dd\uc774": 29, "\uad00\ucc30": 29, "\ub9e8": 29, "vdm": 29, "\ubaa8\uc158\uc758": 29, "\ud48d\ubd80\ud55c": [24, 29, 31], "extrpol": 29, "\uac1c\uc778\ud654\ud558\uace0": 29, "film": 29, "\uc804\ud658\ud558\uae30\ub9cc": 29, "\uc758\ubbf8\ub860\uc801\uc73c\ub85c": 29, "\ub9cc\ub4ec": 29, "\uc138\uacc4\ub85c\ubd80\ud130": 29, "intelligence\ucc98\ub7fc": 29, "system\ub3c4": 29, "\ubaa8\ubc29\ud560": [29, 30], "\ucc3d\uc758\uc801\uc774\uace0": 29, "\uc720\uc6a9\ud560": 29, "\uc5f0\uad6c\uc790\ub4e4\uc740": 29, "\ub3d9\uc601\uc0c1\uc5d0\uc11c": 29, "\uc138\uacc4\uc758": 29, "dynamic\uc744": 29, "\uadf9\ubcf5\ud560": [29, 40], "2301": 31, "00704": 31, "mar": 31, "\uc8fc\uc5b4\uc9c0\uace0": 31, "grain": [24, 30, 31, 37], "\uc790\uc138": 31, "900m": 31, "cc3m": 31, "06": [30, 31, 50], "3b": 31, "outpaint": 31, "22": [31, 32, 45], "maskgit": 31, "googl": [31, 50, 54], "\uc608\uce21\ud558\uc9c0\ub9cc": 31, "\uc2e0\ub8b0\ub3c4\uac00": 31, "\ud1a0\ud070\ub9cc": 31, "\ub514\ucf54\ub529\ub428": 31, "\uc904\uc5ec": [31, 51], "\uc778\ucf54\ub529\ub418\uace0": 31, "\ub514\ucf54\ub529\ub418\uc5b4": 31, "\ubcf5\uc6d0\ub418\ub294": [31, 44], "16x16": [31, 46], "\ub300\ubd80\ubd84\uc774": 31, "\ud30c\ub77c\ubbf8\ud130\ub85c": [31, 35, 51], "unmak": 31, "t5xxl": 31, "\ubc14\uafb8\ub294\ub370": [31, 40], "noun": [31, 47], "action": 31, "verb": 31, "adject": 31, "preposit": 31, "\uac83\uc774\ub77c\uace0": [31, 39], "\uc120\ud589": 31, "4096": [31, 32, 33], "transformer\uc5d0": [24, 31], "\uc785\ub825\ub418\uac8c": 31, "\ub9e4\ud551\uc744": [31, 44], "\ub514\ucf54\ub529\uc774": 31, "\uc778\ucf54\ub529\ud560": 31, "tame": 31, "\ud1a0\ud070\uc774": [31, 33], "\ubb34\uc2dc\ud558\uba74\uc11c": 31, "\ud568\uc744": [31, 32], "entropi": 31, "\uc788\uac8c\ub428": 31, "unmask": [30, 31], "\ud1a0\ud070\uc740": [31, 40, 45], "\uad50\uccb4": 31, "hidden": 31, "\uc0ac\uc774\uc988\uc5d0": 31, "\ubcc0\uacbd\ud558\ub294\ub370": 31, "\uc624\ucc28\ub97c": [31, 32], "\uacc4\uc0b0\ud568": 31, "tokens\ub97c": 31, "\uc99d\uac00\ud558\uae30": 31, "\uad6c\uc131\ud588\uc744": 31, "\ud3ec\ucee4\uc2f1": 31, "\uc9c4\ud589\ub428": 31, "\uacc4\uce35\uc801\uc73c\ub85c": 31, "\uc124\uacc4\ud588\uc74c": 31, "\uc644\ub8cc\ub418\uba74": 31, "\ub298\ub9bc": 31, "4\uac1c": [31, 52], "\ud45c\uc9c0\ud310\uc774": 31, "\ub410\uc74c": 31, "ell_g": 31, "ell_c": 31, "ell_u": 31, "l_c": 31, "l_u": 31, "\uc99d\uac00\uc2dc\ud0a4\ub294": [30, 31, 39, 43], "\ud6c4\ubc18\uc5d0\ub294": 31, "\uc8fc\uac8c": [31, 39, 44], "\ub85c\ub3c4": [31, 56], "foward": 31, "\uc5f0\uc0b0\uc73c\ub85c": 31, "condition": 31, "independ": 31, "\uc218\ud589\ub428": 31, "\uc608\uce21\ub418\ub294": 31, "\uc120\ud0dd\ud574": 31, "\ud574\uc81c\ub418\ub294": 31, "rich": [31, 50], "\uc218\ubc31\ubc88\uc758": 31, "460m": 31, "1m": [24, 31], "week": 31, "tpu": 31, "v4": 31, "adafactor": [31, 39], "cardin": 31, "\ud68c\uc804\ub41c": 31, "\ud004\ub9ac\ud2f0": [31, 36, 41, 50], "prompt\uc640\uc758": 31, "\uce21\uc815\ud588\uc74c": 31, "\uac00\uc838\uc62c": 31, "\uc788\ub098\uc694": 31, "\uc544\ubb34\ub798\ub3c4": [30, 31], "\ubaa8\ub378\uc774\ub77c": 31, "\ubaa8\ub378\uc778\uac00\uc694": 31, "\uae30\uc900\uc810\uc774": 31, "\uc5b4\ub5bb\ub0d0\uc5d0": 31, "\uc815\ud558\uae30": 31, "vqgan\uc744": 31, "gan\uc774\ub77c\uace0": 31, "\uc0dd\uac01\ud560": 31, "\uacb0\uacfc\uc5d0": [31, 33, 39, 42], "\ub2ec\ub77c\uc9c8": [31, 38], "\uad00\uc810\uc5d0\uc11c": [31, 33, 51], "\uc544\ub2c8\ub2e4\ub77c\uace0": 31, "\ub9d0\ud560": [31, 40], "\uac16\ub098\uc694": 31, "vqgan\uc5d0\uc11c": 31, "codebook\uc758": 31, "\uad6c\uc131\ud558\ub294\ub370\uc694": 31, "token\uc774\ub77c\uace0": 31, "\uc0dd\uac01\ud558\uc2dc\uba74": 31, "\ub123\uc5c8\uc744\ub54c": 31, "\uc774\ub904\uc9c0\ub098\uc694": 31, "inference\uc5d0\uc11c\ub294": 31, "\uc5c6\ub294\ub370": 31, "token\ub300\uc2e0": 31, "\ub4e4\uc5b4\uac00\uac8c": [31, 38, 50, 54], "\ub418\ub098\uc694": 31, "\ub9c8\uc2a4\ud06c\ub41c": 31, "\uc218\ud589\ub429\ub2c8\ub2e4": 31, "\uc218\uc2dd\uc5d0": 31, "\ub4e4\uc5b4\uac00\ub098\uc694": 31, "value\ub85c": 31, "\uc785\ub825\ub418\uc5b4": 31, "\uc218\ud589\ub418\uac8c": 31, "\uadf8\ub807\uac8c": [31, 39, 40], "feature\uc640": [24, 31], "gt\uc758": 31, "\ub07c\ub9ac": 31, "2003": 32, "08934": 32, "bmild": 32, "\uad6c\uc131\ud558\uc5ec": 32, "\uc800\uc7a5": 32, "\uc6a9\ub7c9\uc774": [32, 55], "\uad6c\uc131\ud558\uc9c0": 32, "synthes": [32, 36, 51, 52], "\uc88c\ud45c\ub97c": [32, 36], "\ubc00\ub3c4": [32, 34], "\uac01\ub3c4\ub85c": 32, "\ucc0d\uc740": [32, 44], "\uc77c\ubd80\uc758": 32, "\uac01\ub3c4\uc758": 32, "\uc720\ucd94\ud558\ub294": 32, "\ud55c\uc815\ub41c": 32, "contin": 32, "\ubc14\ub77c\ubcf4\ub294": 32, "\ubc29\ud5a5": 32, "density\uc640": 32, "\ubc29\ud5a5\uc5d0": [32, 44], "\uad11\uc120\uc744": 32, "\uace0\uc804\uc801": 32, "\uc678\ud615\uc744": 32, "keyword": 32, "08934v2": 32, "\ucea1\ucc98\ub41c": 32, "\ud568\uc218\uc758": [32, 34], "\ubd84\uc57c\uc758": 32, "\ud574\uacb0\ud568": [32, 45], "\uc815\uc801": 32, "regress": [32, 33, 36], "coord": 32, "\uc0c1\uc758": [32, 44], "\uc9c0\uc810": [32, 36, 43], "\ubc29\ucd9c\ub41c": 32, "\ub204\uc801\uac12\uc744": 32, "\ube5b\uc774": 32, "\ud835\udc65": 32, "\ud835\udc66": 32, "\ud835\udc67": 32, "\ud1b5\uacfc\ud558\ub294": 32, "\uad11\uc120\uc5d0": 32, "\ub204\uc801\ub418\ub294\uc9c0\ub97c": 32, "\uc2dc\uc810\uc73c\ub85c\ubd80\ud130\uc758": 32, "\uc774\ub3d9\ud558\uc5ec": 32, "\ud3ec\uc778\ud2b8\ub4e4\uacfc": 32, "\uc2e0\uacbd\ub9dd\uc5d0": 32, "descent": [32, 43, 48], "\uc624\ucc28": 32, "\ucd5c\uc18c\ud654\ub97c": 32, "cotent": 32, "\ud560\ub2f9\ud558\uc5ec": [32, 33], "basic": 32, "implementation\uc758": 32, "\ub300\uc548": 32, "\ud45c\ud604\uc73c\ub85c": 32, "\uc218\ub834\ub418\uc9c0": 32, "mlp\uac00": 32, "\uc8fc\ud30c\uc218\uc758": 32, "\uad11\uc120\ub2f9": 32, "\uc694\uad6c\ub418\ub294": 32, "\ube44\ud6a8\uc728\uc801": [32, 45], "\uacc4\uce35\uc801": 32, "\uace0\uc8fc\ud30c\uc218\uc758": 32, "\uc801\uc808\ud558\uac8c": 32, "\uc0d8\ud50c\ub9c1\ud558\uae30": [32, 33], "\uac10\uc18c\uc2dc\ud0b4": 32, "\uc0c1\uc18d": 32, "\uc2e4\uc138\uacc4\uc758": 32, "\ud615\ud0dc\uc640": 32, "\ud22c\uc601\ub41c": 32, "\uc801\ud569": 32, "\uace0\ud574\uc0c1\ub3c4\uc5d0\uc11c": 32, "\ubaa8\ub378\ub9c1\ud560": 32, "\uc774\uc0b0\ud654\ub41c": 32, "\ubcf5\uc140": 32, "\uadf8\ub9ac\ub4dc\uc758": 32, "3\ucc28\uc6d0": 32, "\uccb4\uc801\uc758": 32, "\ub2e8\uc704": 32, "2\ucc28\uc6d0\uc758": 32, "\uacbd\uc6b0\uc5d0\uc120": 32, "pix": 32, "\uc815\ubcf4\uc640": [32, 33, 37], "\ud22c\uacfc\uc131": 32, "volumn": 32, "\uccb4\uc801": 32, "ct": [32, 43], "mri": 32, "technic": 32, "\uae30\ud558\ud559\uacfc": 32, "\uc18c\uc7ac\ub97c": 32, "5\ucc28\uc6d0": 32, "\ub9e4\uac1c\ubcc0\uc218\ud654": 32, "\uace0\uc804\uc801\uc778": 32, "capac": [32, 53], "\uc2dc\uac01\uc801\uc778": [32, 35, 40], "\ub0b4\uc6a9\uc774": 32, "\uacf5\uac04\uc73c\ub85c": 32, "\ud560\ub2f9": [32, 33], "\uc9d1\uc911\uc801\uc73c\ub85c": 32, "\ub9e4\ud551\ud558\uae30": 32, "\uc131\uacf5\uc801\uc73c\ub85c": [32, 51], "\uace0\uc8fc\ud30c\uc758": 32, "\ud45c\ud604\uac00\ub2a5": 32, "procedur": [32, 34, 54], "practic": 32, "cartesian": 32, "\ud835\udc51_\ud835\udc65": 32, "\ud835\udc51_\ud835\udc66": 32, "\ud835\udc51_\ud835\udc67": 32, "\uae38\uc774\uac00": [32, 36], "emit": 32, "\uc0c9\uc0c1\uc740": [32, 33], "relu": [32, 55, 56], "256\uac1c": 32, "256\ucc28\uc6d0": 32, "\ubca1\ud130\ub294": [32, 40], "\uad11\uc120\uc758": 32, "\ubc29\ud5a5\uacfc": 32, "128\uac1c": 32, "\uc804\ub2ec\ub428": 32, "\uc608\uce21\ud574\uc57c": 32, "\ubb3c\uccb4\uc758": [32, 33, 36], "\ud45c\uba74\uc5d0\uc11c": 32, "\uad11\uc120\uc774": 32, "\ubc18\uc0ac\ub418\ub294": 32, "\ud45c\uba74\uc758": [32, 36], "\uac01\ub3c4\uc5d0": 32, "\uad11\ub7c9\uc774": 32, "\uc77c\uc815\ud558\ub2e4\ub294": 32, "\uc2dc\uc120": 32, "\ud45c\ud604\ud55c": [32, 36, 40], "\ubc18\uc0ac\uc131": 32, "specular": 32, "\ud45c\ud604\ud558\ub294\ub370": 32, "\ud3ec\uc778\ud2b8\uc5d0\uc11c": 32, "\ub80c\ub354\ub9c1\ud558\ub824\uba74": 32, "\uce74\uba54\ub77c\uc758": 32, "\ud53d\uc140\uc744": 32, "\ucd94\uc801\ub41c": 32, "\uc801\ubd84\uac12": [32, 36], "\ucd94\uc815\uc744": 32, "t_f": [32, 36], "\ud22c\uacfc\uc728": 32, "transmitt": 32, "quadratur": 32, "\uad6c\uc801\ubc95": 32, "\uc801\ubd84\uac12\uc744": 32, "\uc218\uce58\uc801\uc73c\ub85c": 32, "grid": [24, 30, 32, 33, 52], "\uadf8\ub9ac\ub4dc\ub97c": [32, 33], "\uad6c\uc801\ubc95\uc740": 32, "\uc774\uc0b0": 32, "\ucffc\ub9ac\ub418\uae30": 32, "stratifi": 32, "\ud45c\uc9d1": 32, "\uc811\uadfc\ubc95\uc744": [32, 36], "bin\uc73c\ub85c": 32, "\ubd84\ud560\ud55c": 32, "partit": 32, "bin": 32, "\ud45c\ubcf8\ub4e4\uc744": 32, "\uc0ac\uc6a9\ud558\ub354\ub77c\ub3c4": 32, "\uac00\ub2a5\ud558\ubbc0\ub85c": 32, "\uc704\uce58\ub4e4\uc5d0\uc11c": 32, "\ud3c9\uac00\ub418\ub3c4\ub85d": 32, "\ubf51\uc740": [32, 38], "\uc0d8\ud50c\ub4e4\ub85c": [32, 33], "\ub80c\ub354\ub9c1\uc5d0\uc11c": [32, 33], "\ub17c\uc758\ub41c": 32, "\uad6c\uc801\ubc95\uc73c\ub85c": 32, "\uc801\ubd84\uc744": 32, "sigma_i": 32, "delta_i": [32, 36], "sigma_j": 32, "delta_j": [32, 36], "adjac": 32, "dt": [32, 36, 51, 54], "\uc9d1\ud569\uc73c\ub85c\ubd80\ud130": 32, "\uac00\ub2a5\ud558\uba70": [32, 53], "alpha_i": 32, "\uc804\ud1b5\uc801\uc778": [32, 34, 36, 54], "\uc54c\ud30c": [32, 33], "\ud22c\uba85\ud55c": 32, "\uc720\ub9ac": 32, "\uadf8\ub9bc\uc790": 32, "\uacb9\uce60": 32, "\ucef4\ud3ec\uc9c0\ud305\uc744": 32, "remind": 32, "\uc694\uc18c\ub4e4\ub85c": [32, 54], "\ub2ec\uc131\ud558\uae30\uc5d0\ub294": 32, "assist": 32, "\ud615\ud0dc\uc5d0\uc11c": 32, "\uace0\uc8fc\ud30c": 32, "\ubcc0\ub3d9\uc744": 32, "\uc54a\uc558\uc74c": [32, 51], "On": [32, 51], "spectral": 32, "\uc2e0\uacbd\ub9dd\uc774": 32, "\uc800\uc8fc\ud30c": 32, "\ucabd\uc73c\ub85c": 32, "\ud3b8\ud5a5\ub418\uc5c8\uc74c\uc744": 32, "\uc2e0\uacbd\ub9dd\uc744": [32, 51], "\ud1b5\uacfc\ud558\uae30": 32, "\ub9f5\ud551": 32, "\ubcc0\ub3d9\uc774": 32, "\uc138\uac1c\uc758": 32, "\uc88c\ud45c\uac12\uacfc": 32, "\uc131\ubd84\uc5d0": 32, "\ubd84\ub9ac\ub418\uc5b4": 32, "\uc801\uc6a9\ub428": 32, "\uc5ec\uc720": 32, "\ube44\uc5b4\uc788\ub294": 32, "\ub9c9\ud600\uc788\ub294": 32, "\uc0d8\ud50c\ub9c1\ub428": 32, "\uc608\uc0c1": [32, 42], "\ud6a8\uacfc\uc5d0": 32, "\ube44\ub840\ud558\uc5ec": 32, "\uc99d\uac00\uc2dc\ud0b4": 32, "\uacf3\uc744": 32, "\ubf51\uc790": 32, "coars": [24, 32, 33, 36, 38], "n_c": 32, "\uc704\uce58\uc5d0\uc11c": 32, "\ub124\ud2b8\uc6cc\ud06c\uc5d0\uc11c\uc758": 32, "_c": 32, "\uceec\ub7ec": [32, 33], "\ub4e4\uc758": [32, 44, 45, 54], "\uac00\uc911\ud569": 32, "w_i": [32, 36], "piecewis": 32, "dfrac": 32, "w_j": 32, "\ubc00\ub3c4\ud568\uc218": 32, "2\ubc88\uc9f8": 32, "\uc0d8\ud50c\uc9d1\ud569\uc758": 32, "n_f": 32, "\ubc88\uc9f8\uc640": 32, "\uc9d1\ud569\uc758": 32, "\ud569\uc9d1\ud569\uc5d0\uc11c": 32, "_f": 32, "\uc808\ucc28\uc5d0\uc11c\ub294": 32, "\uad00\uce21": 32, "\ud3ec\ud568\ub420": 32, "\uc608\uc0c1\ub418\ub294": 32, "scene\uc774": 32, "extrins": 32, "\uacbd\uacc4\ub85c": 32, "\ub370\uc774\ud130\uc14b\uc774": [32, 33, 45, 50], "\uce74\uba54\ub77c\uac00": 32, "\uc5b4\ub514\ub97c": 32, "\ubc14\ub77c\ubcf4\uace0": 32, "\ub80c\uc988\uc640": 32, "\uc13c\uc11c": 32, "\uc758\ud574\uc11c": [32, 38], "\uacb0\uc815\ub418\uc5b4\uc9c0\ub294": 32, "\ud56d\ubaa9\uc73c\ub85c": 32, "\ud328\ub110\uc774": 32, "\ud655\ub300\ud558\uace0": 32, "\uae30\uc6b8\uc5b4\uc84c\ub294\uc9c0": 32, "shear": 32, "pramet": 32, "\uacf5\uac04\uc0c1\uc758": 32, "\ud3c9\uba74\uc5d0": 32, "\ud22c\uc0ac": [32, 33], "perspect": 32, "iteration\uc5d0\uc11c": 32, "\uc9d1\ud569\uc5d0\uc11c": 32, "batch\ub97c": 32, "\uc0d8\ud50c\uacfc": [32, 44], "\ub450\uc0d8\ud50c": 32, "\uc81c\uacf1": 32, "\ub80c\ub354\ub9c1\uc740": 32, "\uc0d8\ud50c\ub9c1\uc758": [32, 39], "adam": [30, 32, 37, 39, 44, 55], "lr": [30, 32], "beta_2": 32, "\ub2f9": [32, 44], "30\ub9cc": 32, "1\uac1c\ub85c": 32, "2\uc77c": 32, "degre": 32, "geometry\ub85c": 32, "\uc0c1\ubc18\uad6c\uc5d0": 32, "479": 32, "8\uac1c": 32, "pathtrac": 32, "\uad6c": 32, "\ud604\uc2e4": [32, 35], "\uc55e\ucabd\uc5d0\uc11c": 32, "llff": 32, "1008": 32, "756": 32, "nv": 32, "srn": 32, "fusion": 32, "\uc678\uc591": 32, "nonlambertian": 32, "\ubc18\uc0ac": 32, "ghost": 32, "ship": 32, "lego": 32, "blurri": 32, "\uae30\ud558\uc801": 32, "\ub80c\ub354\ub9c1\uc5d0\uc11c\uc758": 32, "\uacb9\uce68": 32, "\ubc88\uc9d0": 32, "\ubc1d\uae30\uc640": 32, "\uad11\ud0dd": 32, "\uac00\uc9d0": 32, "occlud": 32, "also": [32, 52], "correctli": 32, "pe": 32, "\uc758\uc874\uc131": [32, 53], "vd": 32, "\uc8fc\ud30c\uc218": 32, "\ucd08\uacfc\ud560": 32, "\ud5a5\uc0c1\uc5d0": 32, "so": 32, "li": 32, "within": 32, "dure": [32, 55], "192": [32, 45], "\ub354\ud574\uc9c4": [32, 36], "relu\ub85c": 32, "\uc5ee\uc778": 32, "\ud1b5\uacfc\ud558\uac8c": [32, 55], "deepsdf": 32, "\ub530\ub974\uace0": [32, 56], "5\ubc88\uc9f8": 32, "activation\uc5d0": 32, "output\uc73c\ub85c": 32, "08751": 33, "index": [33, 37, 40], "\ud569\ucce4\ub2e4": 33, "\uc874\uc7ac\ud558\uc5ec": 33, "\ubcf5\uc7a1\ud558\uace0": 33, "\uc18c\uaddc\ubaa8\uc758": 33, "\ud074\ub77c\uc6b0\ub4dc\ub97c": 33, "\ud504\ub85c\uc138\uc2a4\ub85c": 33, "\ud074\ub77c\uc6b0\ub4dc": 33, "\ud2b8\ub79c\uc2a4\ud3ec\uba38": 33, "2\ubc30": [33, 54], "\uc870\uac74\ud654\ub41c": 33, "\ud615\uc0c1\uc744": 33, "\uc694\uad6c\ud558\uace0": 33, "gpu\uc5d0\uc11c": [33, 51], "2\ubd84\ub9cc\uc5d0": 33, "\ud0d0\uc0c9\ud55c\ub2e4": 33, "\ubdf0\ub97c": 33, "vide": 33, "3d\ub85c\uc758": 33, "\ubc1c\uc804": 33, "\ud3ed\ubc1c\uc801\uc73c\ub85c": 33, "\uc99d\uac00\ud568\uc5d0": [33, 39], "\ucd08\ub9cc\uc5d0": 33, "\uc790\uc5f0\uc5b4\uc5d0\uc11c": 33, "\uc218\uc815\ud560": 33, "\ube44\ub514\uc624\ub098": 33, "\uac1d\uccb4\uc640": 33, "\ub3c4\uba54\uc778\uc5d0\uc11c\uc758": 33, "\ud0d0\uc0c9\ud558\uace0": 33, "\ub17c\ubb38\ub3c4": 33, "\ub454\ub2e4": 33, "\ud569\uc131\uc758": 33, "\ud569\uc131\uc740": [33, 39], "\ud558\ub098\uc5d0": 33, "\uc18d\ud55c\ub2e4": [24, 33], "\uc774\ub8ec": [33, 34], "unlabeld": 33, "directli": 33, "\ud655\uc7a5\uc131\uc758": 33, "\ud45c\ud604\ubc95\ub4e4": 33, "\uac70\uccd0\uc57c": 33, "\uac78\ub9b4": 33, "\ub4dc\ub294": 33, "\ud504\ub85c\uc138\uc2a4\uac00": [33, 44], "prior\uac00": 33, "\uc788\uac70\ub098": 33, "\uac1c\uccb4\uc5d0": 33, "\ud574\ub2f9\ud558\uc9c0": 33, "minima\uc5d0": 33, "\ube60\uc9c8": 33, "\ud30c\uc774\ud504\ub77c\uc778": [33, 45], "\uac1c\uc694": 33, "\ub450\uce74\ud14c\uace0\ub9ac\uc758": 33, "\uc30d": 33, "\ub530\ub97c": 33, "\ud30c\uc778\ud29c\ub2dd\ub41c": 33, "\uc2a4\ud0dd\uc744": 33, "\ud074\ub77c\uc6b0\ub4dc\uc5d0\uc11c": 33, "\uba54\uc26c\ub97c": 33, "\ud68c\uadc0": 33, "\uc0d8\ud50c\ub9c1\ud55c\ub2e4": [33, 35, 40], "\ucd08": [33, 42], "\ub0b4\uc5d0": [24, 33], "\uc218\ud589\ub420": 33, "\ud504\ub86c\ud504\ud2b8\ubfd0\ub9cc": 33, "\ud504\ub86c\ud504\ud2b8\uc640\ub3c4": 33, "\uc0dd\uc131\ud55c\ub2e4\ub294": 33, "\uc758\ubbf8\uc5d0\uc11c": [33, 40], "e\ub77c\uace0": 33, "\uba85\uce6d\ud558\uc600\ub2e4": 33, "\ubcc0\ud615\ud55c\ub2e4": 33, "\uc124\uc815\uc744": 33, "\ud504\ub85c\uc138\uc2a4": 33, "\ud504\ub85c\uc138\uc2a4\ub294": 33, "t\ub9c8\ub2e4": 33, "\ud3ec\ud568\ud558\uc9c0": 33, "\uc54a\uac8c": 33, "\uc5ed": 33, "\ub79c\ub358": 33, "\ud504\ub85c\uc138\uc2a4\ub97c": 33, "\uc9c4\ud589\ud558\uc5ec": 33, "\uc7a1\uc74c\uc774": 33, "xt": 33, "p\u03b8": 33, "\uadfc\uc0ac\ud558\uc5ec": 33, "\ud3c9\uade0\ubfd0\ub9cc": 33, "\uc0d8\ud50c\ub9c1\uc740": 33, "\ubc29\uc815\uc2dd": 33, "\uc124\uba85\ub420": 33, "sde": [33, 43, 51, 54], "\ud574\uc11d\uae30\ub97c": 33, "2\ucc28": [33, 39], "\uac00\uc774\ub4dc": 33, "\ubd84\ub958\uae30": 33, "saliman": [33, 43, 51], "\uc0ad\uc81c\ud55c\ub2e4": 33, "\ub4dc\ub86d": 33, "\ud50c\ub85c\uc6b0": 33, "\uc5f0\uad6c\uc640": 33, "pvd": 33, "\uc810\uc5d0\uc11c": [24, 33], "\ucc28\ubcc4\uc810\uc744": 33, "\ub9de\ucd94\uc9c0\ub9cc": 33, "\uc7ac\uad6c\uc131\ud558\ub824\uace0": 33, "\ud558\uc9c0\ub294": [33, 36], "\ub9e4\uce6d": 33, "\ubaa9\ud45c\uc5d0": 33, "\ud0d0\uad6c\ud55c\ub2e4": 33, "\uac1d\uccb4\ub098": 33, "\uc808\ucc28\uac00": 33, "\uac78\ub9b0\ub2e4": [33, 40, 42], "\uc5f0\uad6c\ub4e4\ub3c4": [33, 43], "\ud504\ub86c\ud504\ud2b8\ub098": 33, "\uc881\uc740": 33, "\ud55c\uc815\ub418\uc9c0\ub9cc": 33, "\ud574\uacb0\ud55c\ub2e4": 33, "\uc788": 33, "\uc774\ub4e4": [33, 42], "\ubd88\ucda9\ubd84\ud55c": 33, "\ub2e4\ub8e8\uba74\uc11c\ub3c4": 33, "\uc720\ub9dd\ud55c": 33, "\ub098\ub208\ub2e4": [33, 36, 49], "\uc21c\uc5f4": 33, "\ubd88\ubcc0": 33, "permut": 33, "invari": 33, "\ud074\ub77c\uc6b0\ub4dc\uc640": 33, "\uc720\uc0ac\ud558\uc9c0\ub9cc": [33, 36, 43], "\uc218\ubc31\ub9cc": 33, "\uba54\ud0c0\ub370\uc774\ud130\ub85c": 33, "\ud6c8\ub828\uc2dc\ud0a8\ub2e4": 33, "\ud074\ub77c\uc6b0\ub4dc\ub85c": 33, "\ucc98\ub9ac\ud55c\ub2e4": 33, "\ud615\uc2dd\uacfc": [33, 43], "\ub2e4\uc591\ud588\uace0": 33, "\ubcf4\uc7a5\ud558\uae30": 33, "\ud544\uc694\ud588\ub2e4": [24, 33], "blender\ub97c": 33, "\ud615\uc2dd": [33, 40], "rgbad": 33, "blender": 33, "\ud615\uc2dd\uc744": 33, "\uc5d4\uc9c4\uc744": 33, "\ud504\ub85c\uadf8\ub7a8": 33, "\uae4a\uc774": 33, "\ucc44\ub110\uc774": 33, "\ud615\uc2dd\uc758": 33, "\uc0c1\uc790": 33, "\uc815\uaddc\ud654\ud558\uace0": 33, "\uad6c\uc131\ud55c": 33, "blender\uc758": 33, "\ub0b4\uc7a5\ub41c": 33, "\ub0b4\ubcf4\ub0c8\ub2e4": 33, "\uc0c9\uc0c1\uc774": 33, "\ud53d\uc140\uc5d0": 33, "\uac1d\uccb4\uc5d0": 33, "\ud074\ub77c\uc6b0\ub4dc\ub294": 33, "\ubd84\ud3ec\ub418\uc5b4": 33, "\uc810\uc758": 33, "\uad6c\uc131\ud568\uc73c\ub85c\uc368": 33, "\uba54\uc26c\uc5d0\uc11c": 33, "\ub0b4\ubd80\uc5d0": 33, "\uc0d8\ud50c\ub9c1\ud558\ub294": [33, 40], "\uc774\uc0c1\ud55c": [33, 49], "\ud30c\uc77c": 33, "\uc800\ud488\uc9c8": 33, "\uc81c\uac70\ud558\uae30": 33, "\ud734\ub9ac\uc2a4\ud2f1\uc744": 33, "\ud074\ub77c\uc6b0\ub4dc\uc758": 33, "svd\ub97c": 33, "\ud2b9\uc774\uac12\uc774": 33, "\uc784\uacc4\uac12": 33, "threshold": 33, "\uc774\uc0c1\uc778": 33, "\uacbd\uc6b0\uc5d0\ub9cc": 33, "\uc720\uc9c0\ud568\uc73c\ub85c\uc368": 33, "\ud3c9\ud3c9\ud55c": 33, "\ud2b9\uc131\uc5d0": 33, "\ud074\ub7ec\uc2a4\ud130\ub9c1": 33, "\ud074\ub7ec\uc2a4\ud130\ub294": 33, "\ub2e4\uc591\ud558\uac70\ub098": 33, "\ud574\uc11d": 33, "\ud074\ub7ec\uc2a4\ud130\ub97c": 33, "\ubc84\ud0b7\uc73c\ub85c": 33, "\ub370\uc774\ud130\uc14b\uc73c\ub85c\uc11c": 33, "\ubc84\ud0b7\uc758": 33, "\ud63c\ud569\uc744": 33, "\uc124\uba85\ud560": 33, "\ub80c\ub354\ub7ec\uc640": 33, "\ud30c\ud2b8\uc5d0\uc11c\ub294": 33, "\ubd84\ud3ec\uc640": [33, 44], "\ub80c\ub354\ub97c": 33, "glide\ub97c": [33, 39], "\uc800\uc790\ub4e4\uc758": 33, "\ud559\uc2b5\uc14b\uc5d0": 33, "\uc791\uae30": 33, "\ub85c\ub9cc": [33, 39], "\ud69f\uc218\ub294": 33, "000\ubc88\uc758": 33, "\uc9c4\ud589\ud588\uc74c\uc744": 33, "\uc2dc\uac04\uc5d0\ub294": [33, 50, 55], "\ub80c\ub354\uc758": 33, "complet": 33, "\ud504\ub808\uc784\uc6cc\ud06c\ub97c": 33, "\ud3ec\ud568\uc2dc\ucf30\ub2e4": 33, "\ud150\uc11c\ub85c": 33, "\uc88c\ud45c\uc640": 33, "\ubc94\uc704\ub85c": 33, "\ub514\ub178\uc774\uc9d5\ud558\uc5ec": 33, "\ud150\uc11c\ub97c": 33, "\ud65c\uc6a9\ud558\ub358": 33, "\uc2a4\ud15d": [33, 39], "\ud3ec\uc778\ud2b8\ub97c": 33, "\ucc28\uc6d0\uc774": [33, 43, 56], "d\uc778": 33, "\ucee8\ud14d\uc2a4\ud2b8\ub85c": 33, "\ud0c0\uc784\uc2a4\ud15d": [33, 35], "\uc55e\uc5d0": [33, 37, 44, 46], "d\ucc28\uc6d0": 33, "\uc5bb\ub294\ub2e4": [33, 36], "\uc785\ub825\ud558\uace0": 33, "\uac00\uc838\uc628\ub2e4": 33, "256xd": 33, "lienarli": 33, "shape\uc758": 33, "\uc6b0\uc218\ud588\ub2e4": 33, "\ucee8\ud14d\uc2a4\ud2b8\ub294": 33, "257": 33, "shape\uac00": 33, "k\uc758": 33, "k\uac1c\ub97c": 33, "\uac00\uc838\uc624\uace0": 33, "\ud504\ub85c\uc81d\uc158\ud558\uc5ec": 33, "k\uac1c\uc5d0": 33, "\u03b5\uc640": 33, "\ud1a0\ud070\uc758": 33, "\ud3ec\uc778\ud2b8\ub85c": 33, "\u03c3\uc744": 33, "\uc790\uccb4\ub294": [30, 33], "\ud074\ub77c\uc6b0\ub4dc\uc5d0": 33, "\ubd84\ubcc4": 33, "\ud558\ub2e4": [33, 42], "\ubaa8\ub378\uc5d0\uc11c\uc758": 33, "\uacc4\uce35": 33, "\ubc29\uc2dd\uc5d0\uc11c\ub294": [33, 43], "\uc800\ud574\uc0c1\ub3c4\uc758": 33, "\uc5c5\uc0d8\ud50c\ud55c\ub2e4": 33, "\uc0dd\uc131\uc5d0\uc11c\uc758": 33, "1k": [33, 39], "\uac19\uc744": 33, "\ub370\uc5d0\ub294": 33, "\ubc30": [33, 35, 45, 53], "\uc5c5\uc0d8\ud50c\ub7ec": 33, "\uc5c5\uc0d8\ud50c\ub7ec\ub294": 33, "3k": 33, "\ub808\uc774\uc5b4\uac00": 33, "\uc804\ub2ec\ud558\uc5ec": 33, "\uad6c\ubcc4\ud560": [33, 44], "\ub80c\ub354\ub9c1\ud558\uc9c0": 33, "\ud14d\uc2a4\ucc98\uac00": 33, "\uc785\ud600\uc9c4": 33, "\uba54\uc26c\ub85c": 33, "\ubcc0\ud658\ud558\uace0": [33, 44], "\ub80c\ub354\ub9c1\ud55c\ub2e4": 33, "\uade0\uc5f4": 33, "\uc774\uc0c1\uce58": 33, "sap\ubaa8\ub378\uc744": 33, "\uc0ac\uc6a9\ud574\ubd23\uc73c\ub098": 33, "\uc874\uc7ac\ud588\ub358": 33, "field\ub97c": 33, "\uc608\uce21\ud558\uace0": [33, 51], "merch": 33, "\ucd94\ucd9c\ud588\ub2e4": 33, "\uba54\uc26c\uc758": 33, "\ubc84\ud14d\uc2a4\uc5d0": 33, "\ud560\ub2f9\ud588\ub2e4": 33, "sdf": [33, 36, 52], "\ud074\ub77c\uc6b0\ub4dc\ub85c\ubd80\ud130": 33, "sdf\ub97c": [33, 36], "\uba54\uc26c": 33, "\uc124\uba85\uacfc": 33, "\uc77c\uce58\ud558\ub294\uc9c0\ub97c": 33, "\uc720\uc0ac\ub3c4\uac00": 33, "r\uac1c\uc758": 33, "pointnet": 33, "\uc5d0\uce21": 33, "40m": 33, "\uc815\ubcf4\ub3c4": 33, "vec": 33, "\ucea1\uc158\uc5d0\ub9cc": 33, "\uadf8\ub9ac\ub4dc": 33, "300m": [33, 55], "\uc870\uac74\ub9cc": 33, "\uc774\ubbf8\uc9c0\ub85c\uc758": 33, "precision\uc774": [33, 46], "\ub098\uc058\uac8c": 33, "\uc784\ubca0\ub529\ubcf4\ub2e4": 33, "\uacf5\uac04\uc801\uc778": [33, 41], "\ubcf4\ub294": [33, 47], "\uc774\uc810\uc774": 33, "\uc2dc\uc0ac": 33, "\uc2a4\ucf00\uc77c\uc744": 33, "\uc99d\uac00\uc2dc\ud0a4\uba74": 33, "\uc218\ub834": 33, "\ud3ec\uc778\ud2b8\ud074\ub77c\uc6b0\ub4dc": 33, "\uc774\ud574\ud558\uc9c0": 33, "\ubabb\ud558\uac70\ub098": 33, "\ucd94\ub860\ud55c": 33, "\ud574\uc11d\ud558\ub294": 33, "\ud615\uc0c1\uc758": 33, "\uc77c\ubd80\ub97c": [33, 38], "\ucd94\ub860\ud558\ub294": 33, "e\ub97c": [33, 36], "art": [33, 47, 54], "dreamfus": 33, "\ubd88\uc77c\uce58\uc758": 33, "\ubbf8\ubb18\ud55c": 33, "\uc8fc\ubaa9\ud574\uc57c": 33, "dreamfusion\uacfc": 33, "\uba40\ud2f0\ubdf0": 33, "\ubaa8\ub4e0\ubdf0\ub97c": 33, "\ucd5c\uc801\ud654\ud558\uc9c0": 33, "\uac1d\uccb4\uac00": [33, 45], "\uc2dd\ubcc4\ub418\uc9c0": 33, "\ub0ae\uc544\uc9c8": 33, "\uc804\ucc98\ub9ac\ud574\uc57c": 33, "\ubb38\uc81c\ub2e4": 33, "\ub17c\ubb38\uc774": [33, 39], "\uc790\uccb4\uc5d0": 33, "\uc783\uc744": 33, "\ud14c\ud06c\ub2c9\ubcf4\ub2e4": 33, "\uc2e4\uc6a9\uc801\uc73c\ub85c": 33, "\ud504\ub85c\uadf8\ub7a8\uc744": 33, "\ub9cc\ub4e4\uac70\ub098": 33, "\uac1c\uccb4\ub97c": [33, 40], "\ucd5c\uc0c1\uc758": 33, "\ud5a5\ud6c4": 33, "\uc138\uacc4": 33, "\uc0dd\uc131\uae30\ub97c": 33, "\ud6c8\ub828\uc2dc\ucf1c": 33, "\uc774\ub8e8\uc5b4\uc9c4\ub2e4": 33, "\ud615\uc0c1\uc774\ub098": 33, "\uc9c8\uac10\uc758": 33, "\uba54\uc26c\ub098": 33, "\ud655\uc7a5\ud558\uba74": 33, "\ub192\uc774\ub294": 33, "\uacf5\uc720\ud560": [33, 36], "\uc608\uc0c1\ud55c\ub2e4": 33, "\uc57c\uae30\ub41c": 33, "\ud3b8\ud5a5\uc744": 33, "\ud3ec\ud568\ud560": [33, 37], "\ubb3c\ub9ac\uc801\uc73c\ub85c": 33, "\uc81c\uc791\ub420": 33, "\uc81c\ud488\uc774": 33, "\uc704\ud5d8\ud560": 33, "\uccad\uc0ac\uc9c4\uc744": 33, "\ud569\uc131\ub41c": [33, 39], "\uc2dc\uc2a4\ud15c\uc774\ub2e4": 33, "e\uac00": [33, 36], "\ubd84\uc57c\uc5d0\uc11c\uc758": [33, 34], "\uc2dc\uc791\uc810\uc73c\ub85c": [33, 43], "\uae30\uc5ec\ud560": 33, "\ud76c\ub9dd\ud55c\ub2e4": 33, "2108": 34, "01073": 34, "03": [34, 55], "\uc9c4\ud654": 34, "\uacc4\uc18d": [30, 34, 54], "\ub418\uc5b4\uc624\uace0\uc788\ub2e4": 34, "\uc774\ub04c\uc5b4\ub0b4\ub824\ub294": 34, "\ubd84\uc57c\ub3c4": 34, "\ud65c\ubc1c\ud788": [34, 37], "\uc9c4\ud589\ub418\uace0\uc788\ub2e4": 34, "\ubc29\uc2dd\uc73c\ub85c\uc758": 34, "editing\uc5d0\ub294": 34, "\uba87\uac00\uc9c0": 34, "sdedit\uc740": 34, "\ud574\uacb0\ud574\ub098\uc544\uac14\ub2e4\ub294": 34, "contribution\uc73c\ub85c": 34, "\uc81c\uc2dc\ud558\uc600\ub2e4": 34, "abstract\uc5d0\uc11c": 34, "\ub9d0\ud55c": 34, "editing\uc774\ub780": 34, "\uc720\uc800\uac00": [34, 40], "\uc81c\uc2dc\ud558\uba74": 34, "\ub450\uac00\uc9c0\uc758": 34, "\ud3c9\uac00\uc694\uc18c\uac00": 34, "\uc720\uc800\uc758": 34, "\ub530\ub974\ub294\uc9c0": 34, "real\ud55c\uc9c0": 34, "\uc5f0\uad6c\ubc29\uc2dd\uc740": 34, "\ub450\uac00\uc9c0\ub85c": 34, "\ub098\ub25c\ub2e4": 34, "edit\ub41c": 34, "condition\ub9c8\ub2e4": 34, "\uc7ac\ud559\uc2b5\uc744": 34, "inversion\ud55c": 34, "vactor\ub97c": 34, "\uc870\uc791\ud574": 34, "\uc815\uc758\ub418\uc5b4\uc57c\ud558\uace0": 34, "\uc7ac\ud559\uc2b5\uc774": 34, "\ud55c\uac1c\uc758": 34, "weight\ub85c": [24, 34], "\ub192\uc740\uacf3\uc73c\ub85c": 34, "\ud574\ub098\uac00\uba74": 34, "\uc5bb\uc5b4\ub0bc": 34, "\uc21c\uac04": 34, "\ubbf8\ubd84\uac12": 34, "\uc8fc\uc785\ud558\ub294\ub370": 34, "\uc8fc\uc785\ud55c\ub2e4": 34, "ddpm\uacfc\uc758": 34, "\uc815\uc758\ud558\ub294": [34, 43, 53], "equation\uc758": 34, "\uc815\ub3c4\uc774\ub2e4": 34, "1907": 34, "05600": 34, "level\uc744": 34, "\uc774\ubbf8\uc9c0\uc704\uc5d0": 34, "patch\ub97c": 34, "stroke\ub97c": 34, "coarse\ud55c": 34, "stroke\uc758": 34, "sde\uc758": 34, "noise\ud654\ub41c": 34, "\uc9c4\ud589\ud560": [34, 45], "\uc815\uc758\ud574\uc57c\ud558\ub294\ub370": 34, "realistic\ud558\uc9c0\ub9cc": 34, "\ud558\uc9c0\uc54a\uc740": 34, "faithful\ud558\uc9c0\ub9cc": 34, "artistic\ud55c": 34, "\uc5bb\uac8c\ub41c\ub2e4": 34, "sdedit\uc758": 34, "\uc885\ud569\uc801\uc778": 34, "survey\ub97c": 34, "stylegan": 34, "ada": 34, "sdedit\uc774": 34, "\uc790\uc5f0\uc2a4\ub7fd\uace0": [34, 37], "blend": [30, 34], "\uae30\ubc95\uacfc": 34, "\ube44\uad50\ud574\ub3c4": 34, "01952": 35, "stabil": 35, "sdxl\uc740": 35, "unet\uc744": 35, "sdxl\uc5d0\uc11c": 35, "encoder\ub85c": [24, 35], "\uc0ac\uc6a9\ub418\uba74\uc11c": 35, "\uc99d\uac00\ud588\ub2e4": 35, "\ub2e4\uc218\uc758": 35, "\ube44\uc728\uc5d0": 35, "sdxl\uc744": 35, "\uc124\uacc4\ud588\ub2e4": 35, "sdxl\uc758": 35, "\ud5a5\uc0c1\uc2dc\ud0a8": [24, 35], "\uae30\ub2a5\uc774\ub77c": 35, "\uac10\ub3c5": 35, "supervis": [30, 35, 44, 52], "\uac04\ub2e8\ud558\uba74\uc11c\ub3c4": 35, "\ud5a5\uc0c1\ud558\ub294": 35, "\ubcc4\uac1c\uc758": 35, "\ub192\uc778": 35, "sdxl\uc774": 35, "sd\ubcf4\ub2e4": 35, "\uc2dc\uac01\ud654\ud588\ub294\ub370": 35, "128x128": [35, 54], "sdedit\uc744": 35, "sd\uc640": [24, 35], "\ube14\ub85d\uc758": 35, "heterogen": 35, "\uc0ac\uc6a9\ud588\ub2e4\ub294": [24, 35, 46], "\ud14c\uc774\ube14": [35, 46], "\ucc38\uace0\ud558\uba74": [35, 46], "highest": 35, "level\uc5d0\uc11c": 35, "\ube14\ub7ed\uc744": 35, "unet\uc5d0\uc11c": 35, "lowest": 35, "8x": 35, "l\uacfc": 35, "bigg\ub97c": 35, "\uc0ac\uc6a9\ud588\uc73c\uba70": [35, 36, 54], "openclip\ub85c\ubd80\ud130": 35, "\ucd94\uac00\ud588\ub2e4": [24, 35, 36, 41], "\uc0ac\uc774\uc988\uac00": 35, "6b\ub85c": 35, "817m": 35, "\uc2dc\ud0a4\uac70\ub098": 35, "upscale\ud558\uc5ec": 35, "\uc815\ud574\uc9c0\ub294": 35, "\ubb38\uc81c\uc810\uc774": 35, "\uc800\ud558\uc2dc\ud0a4\uac70\ub098": 35, "\uc77c\ubc18\ud654\ub97c": 35, "\uc2dc\uac01\ud654\ud574\uc8fc\ub294": 35, "\uadf8\ub9bc\uc774\ub2e4": 35, "conditiong": 35, "\ubbf8\ub9cc\uc758": 35, "39": 35, "\ub2ec\ud55c\ub2e4": 35, "\ud574\uc0c1\ub3c4\uc5d0\uc11c": 35, "\ud06c\uae30\uc778": 35, "\uc81c\uacf5\ud574": 35, "\ucd94\uac00\ub41c\ub2e4": 35, "\uc815\ud560": 35, "\ud574\uc0c1\ub3c4\uc5d0": 35, "\uc758\uc874\uc801\uc778": 35, "\uc5f0\uad00\uc2dc\ud0a4\ub3c4\ub85d": 35, "imagenet\uc73c\ub85c": 35, "\uc9c4\ud589\ud574": [35, 41], "conditiong\uc5d0": 35, "\uc6b0\uc218\uc131\uc744": 35, "\uc785\uc99d\ud588\ub2e4": 35, "cin": 35, "\uc2dc\ucf30\uace0": 35, "70k": 35, "nocond": 35, "\ud45c": 35, "\ubcf4\ub2e4\uc2dc\ud53c": 35, "\uba38\ub9ac\uac00": [35, 38], "\uc798\ub824\uc9c4": 35, "cropping\uc73c\ub85c": 35, "\uc0dd\uc131\ub418\uc5c8\uae30": 35, "\uade0\ub4f1\ud558\uac8c": 35, "\ub192\uc774": [35, 41], "\ub108\ube44": [35, 41], "\ubaa8\uc11c\ub9ac\uc5d0\uc11c": 35, "\ud53d\uc140\uc758": 35, "\uc9c0\uc815\ud558\ub294": 35, "fourier": 35, "\ud30c\ub77c\ubbf8\ud130\ub85c\uc368": 35, "conditioning\uacfc": 35, "dm\uc5d0\uc11c\ub3c4": 35, "\uc0ac\uc6a9\ub420": [35, 36, 39, 40], "\uac15\uc870\ud55c\ub2e4": 35, "conditioning\uc740": 35, "\uacb0\ud569\ub420": 35, "1024x1024": [35, 38, 39, 50], "\uc138\uacc4\uc5d0\uc11c": 35, "\ubd80\uc790\uc5f0\uc2a4\ub7fd\ub2e4": 35, "\uc138\uacc4\uc5d0\uc11c\ub294": 35, "\ub9ce\uace0": [24, 35, 39], "\ube44\uc728\uc758": 35, "\uc9c0\ub2c8\uace0": [35, 48, 50], "\ud30c\uc778\ud29c\ub2dd\ud588\ub2e4": 35, "\ud53d\uc140\uc218\ub97c": 35, "64\uc758": 35, "\ubc30\uc218\ub97c": 35, "\uc9c0\ub2c8\ub3c4\ub85d": 35, "\ubc30\uce58\ub294": 35, "\ubc84\ud0b7": 35, "\ubc88\uac08\uc544": [35, 48], "\uac00\uba70": 35, "conditioning\uc73c\ub85c": 35, "\uc8fc\uc5c8\uc73c\uba70": 35, "\uc784\ubca0\ub529\ub418\ub294": 35, "tgt": [35, 37], "\ube44\uc728\ubc0f": 35, "pretraining\uc774": 35, "\ub9c8\uce5c": 35, "\ud559\uc2b5\ud588\uace0": [35, 41], "2\uc808\uc5d0\uc11c": 35, "\uacb0\ud569\ud588\ub2e4": 35, "sd\ub294": [24, 35], "\ud558\ub098\uc774\uace0": 35, "autoencoder\uc758": 35, "composition\uc740": 35, "ldm\uc73c\ub85c\ubd80\ud130": 35, "\ud45c\ud604\ub418\uc9c0\ub9cc": 35, "\ub514\ud14c\uc77c\ud55c": 35, "\ud5a5\uc0c1\ud558\uace0\uc790": 35, "\ud5a5\uc0c1\ud588\ub2e4": 35, "\ub05d\uc73c\ub85c": 35, "\uc544\ud0a4\ud14d\ucc98\uc5d0\uc11c": 35, "average\ub97c": 35, "\uba54\ud2b8\ub9ad\uc5d0": 35, "\uc815\ub9ac\ud574\uc8fc\ub294": 35, "\uc808\uc785\ub2c8\ub2e4": 35, "step\uc740": [35, 39, 51], "\uc14b\uc73c\ub85c": 35, "\ub098\uc640\uc788\ub294": 35, "\ubd84\ud3ec\uc5d0": [35, 40, 56], "600": 35, "\uc0ac\uc774\uc988\ub85c": 35, "2048\ub85c": 35, "\ud559\uc2b5\uc2dc\ucf30\uace0": 35, "\ub9c8\uce68\ub0b4": 35, "offset": [35, 36], "\uc218\uc900\uacfc": 35, "\uc601\uc5ed\uc758": 35, "\ube44\uc728\ub85c": 35, "\uacbd\ud5d8\uc801\uc73c\ub85c": 35, "6\ucc98\ub7fc": 35, "\ucc3e\uc558\ub2e4": 35, "\uadf8\ub9bc\uc774": [35, 39], "stage\ub97c": 35, "sdedit\uc5d0\uc11c": 35, "\ub530\ub790\uc73c\uba70": 35, "\uc2a4\ucf00\uc77c\uc5d0": 35, "inference\uc5d0\uc11c": 35, "diffuse\uc640": 35, "\ub123\uc5c8\ub2e4": [24, 35], "\uc2a4\ud15d\uc740": 35, "\uc120\ud0dd\uc774\uc9c0\ub9cc": 35, "\ub514\ud14c\uc77c\uc5d0\uc11c": 35, "02463": 36, "asset": 36, "\ub17c\ubb38\uc694\uc57d": 36, "2023\ub144": 36, "openai\uc758": 36, "heewoo": 36, "alex": 36, "\ubc1c\ud45c\ud55c": [36, 48], "\ub17c\ubb38\uc785\ub2c8\ub2e4": 36, "diffusers\ub97c": 36, "huggingfac": [36, 47, 55], "\uc0dd\uc131\ubc29\uc2dd": 36, "assets\uc744": 36, "\ucc28\ubcc4\uc810": 36, "mesh": [36, 52], "parameters\ub97c": 36, "\ud45c\ud604\ub9cc": 36, "\ub9ce\ub2e4\uace0": 36, "inr": 36, "inrs\ub294": 36, "coordinate\ub97c": 36, "info": 36, "\ub9f5\ud551\ud55c\ub2e4": 36, "\ud654\uc9c8\uc5d0": 36, "\uc54a\ub294\ub370": 36, "grid\ub098": 36, "arbitrari": 36, "points\ub97c": 36, "\ubbf8\ubd84\uc774": [36, 56], "inrs\uc740": 36, "applications\ub3c4": 36, "\ud0c0\uc785\uc758": 36, "inrs\uc744": 36, "\ub2e4\ub8ec\ub2e4": 36, "radiamc": 36, "along": 36, "dmtet": 36, "get3d": 36, "vertex": [36, 52], "\uc0bc\uac01\uba54\uc26c\ub97c": 36, "\uc624\ub514\uc624": 36, "application\uc5d0\uc11c": 36, "\ud3b8\ud55c": 36, "representation\uc73c\ub85c": [36, 40], "\ud588\ub2e4\ub294": [36, 39], "mildenhal": 36, "\uc2dc\uc57c": 36, "ge": 36, "\ud3b8\uc758\ub97c": 36, "\ud45c\ud604\ud588\ub2e4": 36, "\uc2dc\uc57c\uc5d0\uc11c": 36, "\ubc14\ub77c\ubcf8": 36, "\ub80c\ub354\ub9c1\ud558\uae30": 36, "ray\uc5d0": 36, "color\uac12\uc744": 36, "infty_0": 36, "ds": 36, "\ubcf4\ucda9\uc124\uba85": 36, "\uc801\ubd84\uc2dd\uc744": 36, "sum\uc73c\ub85c": 36, "\uac04\ub7b5\ud654": 36, "n_": [36, 37], "t_j": 36, "\ub098\ub204\ub294": 36, "coarse\uc640": 36, "\ub450\ub2e8\uacc4\ub85c": 36, "\uc138\ubd80\uc801\uc73c\ub85c": 36, "2\ubc88\uc758": 36, "ray\uc758": 36, "transmittance\ub97c": 36, "\uc815\uc758\ud558\uc600\ub2e4": 36, "alpha\uac12\uc774\ub098": 36, "opacity\uc758": 36, "\ucd1d\ud569\uc5d0": 36, "\ud574\ub2f9\ud55c\ub2e4": 36, "\ubcf4\ucda9": 36, "stf\ub294": 36, "distances\uc640": 36, "\ubaa8\ub450\ub97c": 36, "\uc139\uc158\uc5d0\uc11c\ub294": 36, "meshes\ub97c": 36, "\uad6c\uc131\ud558\uace0": 36, "rendering\uc744": 36, "cloud": 36, "polygon": 36, "mesh\uc758": 36, "survei": 36, "shape\uc744": 36, "scaler": 36, "\ubc29\ubc95\uc911": 36, "\ud558\ub098\ub2e4": 36, "mapping\ud55c\ub2e4": 36, "\ud45c\uba74\uae4c\uc9c0\uc758": 36, "0\ubcf4\ub2e4": 36, "\uc678\ubd80\uc784\uc744": 36, "\uc815\uc758\uc5d0": 36, "\uc77c\ub54c\ub294": 36, "\ud45c\uba74\uc5d0": 36, "orientation\uc744": 36, "sdfs\ub97c": 36, "voxel\uc744": 36, "tetrahedr": [36, 52], "dmtet\uc758": 36, "\ucd9c\ub825\uc740": 36, "grid\uc5d0\uc11c\uc758": 36, "displac": 36, "dmtet\uc5d0": 36, "\uc815\ubcf4\uae4c\uc9c0\ub3c4": 36, "color\ub97c": 36, "texture\ub97c": 36, "\ub9cc\ub4e4\uc5c8\ub2e4": [36, 51], "bump": 36, "displacement\uc758": 36, "tutori": [36, 44], "grabcad": 36, "bump\ub294": 36, "texture\uac00": 36, "\uc84c\uc9c0\ub9cc": 36, "\uad6c\uc758": 36, "\ubc14\ub010\uac83\uc740": 36, "\uc544\ub2d8\uc744": 36, "displacement\ub97c": 36, "\ud45c\uba74\uc774": 36, "\ubcc0\ud654\ub41c\uac83\uc744": 36, "ddpm\uc73c\ub85c": 36, "\uc11c\uc11c\ud788": 36, "\uc644\uc804\ud55c": [36, 42], "\ub418\uc5b4\uac00\ub294": 36, "noise\uc640": [36, 38], "\uad6c\ubd84\ubd88\uac00\ub2a5\ud55c": 36, "\uc0c1\uc815\ud55c\ub2e4": 36, "\uc9c4\ud589\ub418\uc9c0\ub9cc": 36, "\ud65c\uc6a9\uc2dc\uc5d0\ub294": 36, "\ub2e8\uc870\uac10\uc18c\ud558\ub294": 36, "\uc2a4\ucf00\uc904\uc744": 36, "alpha_0": 36, "\uc190\uc2e4\ud568\uc218\ub97c": [36, 48, 51, 55], "\uc704\ub294": 36, "\ud559\uc2b5\ud55c\ub2e4\ub294": 36, "\uc758\ubbf8\uc774\uace0": [36, 46], "\uc758\ubbf8\uc774\ub2e4": [36, 51], "denosing\uc2dc\uc5d0\ub294": 36, "latency\ub97c": 36, "heun": [36, 43, 51], "sampler\uc640": [36, 39], "scale\uc774\uace0": 36, "coher": 36, "\ucee4\uc9c0\uc9c0\ub9cc": 36, "\ub5a8\uc5b4\uc9c8": 36, "\ud544\uc694\ud558\ub2e4\ub294": 36, "\uc54c\uc544\ub0c8\ub2e4": 36, "section": [36, 44], "space\uc5d0\uc11c\ub3c4": 36, "\uc0d8\ud50c\ub4e4\uc744": 36, "space\uac04\uc758": 36, "\ub2f4\ub2f9\ud558\ub294": 36, "stage\ubc29\uc2dd\uc73c\ub85c": 36, "\ubd24\ub358": 36, "\uc608\uce21\ud558\uac8c": [24, 36], "ldm\uc5d0\uc11c\ub294": 36, "\ubcf5\uc7a1\ub3c4": 36, "penalty\ub098": 36, "\uc0ac\uc6a9\ud588\uc73c\ub098": 36, "objective\uc640": 36, "l_1": [36, 53], "l_2": 36, "regularization\uacfc": 36, "quantization\uc740": 36, "bottleneck\uc774": 36, "range\ub97c": 36, "fu": 36, "grid\ub97c": 36, "decoder\uc758": 36, "sanghi": 36, "occup": [36, 52], "liu": 36, "34": 36, "kosiorek": 36, "view\uc744": 36, "encoding\ub41c": 36, "chen": 36, "transformer\uae30\ubc18": 36, "view\uc5d0\uc11c": 36, "\uc0dd\uc131\ud558\ub294\uac83\uc744": 36, "43": 36, "table\uc744": 36, "bautista": 36, "code\ub97c": 36, "dupont": 36, "meta": 36, "erkoc": 36, "akin": 36, "fitting\uc744": 36, "\ud559\uc2b5\uc2dc\ud0a8\ub2e4": [36, 51], "mlp\uc758": 36, "\ube44\uad50\ud558\uc600\uc744\ub54c": 36, "post": 36, "\ubcc0\uacbd\ud558\uc5ec": [36, 46], "asset\ubcc4": 36, "cloud\uc758": 36, "\ub298\uc774\uace0": 36, "view\ub97c": 36, "\uad6c\uccb4\uc801\uc73c\ub85c\ub294": [24, 36], "16k": 36, "multiview": 36, "pointcloud\uc5d0": 36, "crack\uc774": 36, "\ubc1c\uc0dd\ud588\ub2e4\uace0": 36, "\ub80c\ub354\ub9c1\uc2dc": 36, "\uc870\uba85\uacfc": 36, "\ubb3c\uccb4\ud45c\uba74\uc758": 36, "\uac04\ub7b5\ud654\ud588\ub2e4": 36, "encoder\uc5d0\uc11c": [24, 36, 45], "function\uc5d0\uc11c": 36, "asset\uc758": 36, "\uc758\ubbf8\uc0c1": 36, "\uc785\ub825\ubc1b\uc740": 36, "\uc735\ud569\ud558\uc5ec": 36, "\uc7a5\uc810\uc73c\ub85c": 36, "\ud588\uc73c\ubbc0\ub85c": 36, "\uc758\ub3c4\uac00": 36, "\ub290\uaef4\uc9c0\ub294": 36, "pseudocod": [36, 43, 54], "encoder\uc5d0": 36, "clouds\uc640": 36, "views\ub294": 36, "backbone\uc5d0": 36, "\ucc98\ub9ac\ub418\uc5b4": 36, "vectors\uac00": 36, "bottleneck\uacfc": 36, "\uc0ac\uc804\ud559\uc2b5\uc2dc": 36, "sdf\uc640": 36, "head\ub97c": 36, "2\uc640": 36, "head\ub4e4\uc744": 36, "net\uc774": 36, "4096\uac1c\uc758": 36, "\uc0d8\ud50c\ub9c1\ud558\uc600\uc73c\uba70": 36, "nerf\uc5d0\uc11c\ub294": 36, "transmittance\uc5d0": 36, "integr": [36, 54], "\uc5bb\uc740transmittance\ub85c": 36, "rendering\uacfc": 36, "rendering\uc2dc": 36, "t_c": 36, "\uc608\uce21\ud558\uc600\ub2e4": 36, "truth\ub85c\ub294": 36, "rendering\uacb0\uacfc\uc758": 36, "channel\uc744": 36, "\uc190\uc2e4\ud568\uc218\ub294": [36, 48, 53, 55], "\ubaa9\uc801\uc774\uc5c8\uc73c\ub098": 36, "mesh\ub3c4": 36, "\uc0dd\uc131\ud574\uc57c": 36, "\uc81c\uac70\uac00": 36, "\uc911\uc694\ud558\uc600\uc744": 36, "\uc0dd\uac01\ub41c\ub2e4": 36, "l_t": 36, "\ucd5c\uc885\uc801\uc73c\ub85c\ub294": 36, "\ud569\ud558\uc5ec": 36, "how": [36, 45], "mlps\uc5d0": 36, "heads\ub97c": 36, "mlps\ub294": 36, "triangl": 36, "mesh\ub97c": 36, "vertex\uc758": 36, "grid\ub85c": 36, "\uc62e\uaca8": 36, "cube\ub97c": 36, "\uc9c4\ud589\ud574\uc57c": 36, "color\ub294": 36, "\ud65c\uc6a9\ud558\uba74": [24, 36], "\uad6c\ucd95\uc2dc": [36, 50], "preprocessing\uc5d0": 36, "\uc2e4\ud5d8\uc2dc": 36, "\ud588\uc73c\uba70": 36, "e\uc758": 36, "target\uc744": 36, "\uad6c\ud588\ub2e4": 36, "target\ub85c\ub294": 36, "cloud\uc5d0\uc11c": 36, "\ud2b9\uc815\uc704\uce58": 36, "nearest": [36, 55], "neighbor": 36, "point\uc758": 36, "loss\uc640": 36, "heads\uac00": 36, "distillation\uc744": [36, 51], "\uac16\uac8c\ub41c": 36, "end\ub85c": 36, "tune\ud55c\ub2e4": 36, "rendering\uc5d0\ub294": 36, "\ubd88\uc548\uc815\ud588\uc73c\ubbc0\ub85c": 36, "\uc190\uc2e4\ud568\uc218\ub9cc": 36, "\uc801\uc808\ud568\uc744": 36, "rendering\uc5d0": 36, "n\uc740": 36, "s\ub294": 36, "\ud654\uc9c8": 36, "construct": 36, "rgba": 36, "\uacb0\uacfc\ubb3c\ub85c": 36, "alpha\ucc44\ub110\uc744": 36, "\ubc14\uafb8\uc5c8\ub2e4": 36, "sequences\uc758": 36, "times1024": 36, "1024\uc778": 36, "token\ucc98\ub7fc": 36, "token\uc740": [36, 45], "matrices\uc758": 36, "row\uc640": 36, "\uc77c\uce58\ud55c\ub2e4": 36, "length\uc640": 36, "width\uac00": 36, "\uace0\ucc28\uc6d0\uc758": 36, "\ucc44\ub110\uc758": 36, "\uc99d\uac00\ud558\uc600\uae30": 36, "context\ub85c": 36, "e\uc640\uc758": 36, "\ucc28\uc774\uc810\uc73c\ub85c\ub294": 36, "prediction\uc73c\ub85c": 36, "parameterize\ud558\uc9c0": 36, "\uc54a\uc558\ub2e4\ub294": 36, "\ub300\uc218\uc801\uc73c\ub85c\ub294": 36, "\uc758\ubbf8\uc774\ub098": 36, "processing\ubd80\ubd84\uc5d0\uc11c\ub294": 36, "\uacc4\uc0b0\uc2dc": 36, "20\uac1c\uac00": 36, "60\uac1c\uc758": 36, "rendering\ud588\ub2e4": 36, "20\uac1c\ub9cc": 36, "\uc0ac\uc6a9\ud588\uc744\ub54c": [36, 43, 54], "\uc601\uc5ed\ub54c\ubb38\uc5d0": 36, "crack": 36, "\uac00\uc544\ub2cc": 36, "16k\uc758": 36, "encoder\ud559\uc2b5\uc744": 36, "\uc18c\uc7ac\uc640": 36, "\ub77c\uc774\ud305\uc744": 36, "\ub77c\uc774\ud305": 36, "\uc870\uac74\ub0b4\uc5d0\uc11c": 36, "ambient\uc640": 36, "shading\ub9cc": 36, "\ubc18\uc0ac\uad11\uc774": 36, "\uace0\ub824\ub418\uc9c0": 36, "\ub9e4\ub048\ud55c": 36, "\ubb3c\uccb4\ub294": 36, "\ucd94\uc815\ub428": 36, "phong": 36, "\uae30\ubcf8\uc801\uc778": 36, "shading\ubc29\uc2dd\uc73c\ub85c": 36, "specular\ub97c": 36, "realistic_visualisation_of_endoscopic_surgery_in_a_virtual_training_environ": 36, "\ub300\ub7b5": 36, "100\ub9cc\uac1c\uc758": 36, "assets\uacfc": 36, "12\ub9cc\uac1c\uc758": 36, "\uc218\uc9d1\ud588\ub2e4": 36, "\uc131\ub2a5\ud3c9\uac00": 36, "distillation\uc5d0\uc11c": 36, "\uc88b\uc544\uc9c4\ub2e4": 36, "stf\uc758": 36, "\uc0c1\uc2b9\ud55c\ub2e4": 36, "e\ube44\uad50": 36, "\uc138\ubaa8": 36, "\ub9c8\ud06c\uac00": 36, "\uc6d0\ud615": 36, "e\uc774\ub2e4": 36, "\ub192\ub2e4": [36, 46], "\ub9ce\uc740\uc218\uc758": 36, "\uc0ac\uc6a9\ud558\uc5ec\ub3c4": 36, "\uc6b0\uc218\ud568": 36, "score\uc758": 36, "\uc0dd\uc131\uacb0\uacfc\uc758": 36, "\uac83\uc774\uace0": [36, 38], "precision\uc758": 36, "\uc0dd\uc131\uacb0\uacfc\uc640": 36, "\ube44\uc2b7\ud55c\uc9c0": 36, "\uc0dd\uc131\uc2dc\uc5d0\ub294": 36, "\ubca4\uce58": 36, "e\uc5d0\uc11c": 36, "\ub098\ubb34\uc0ac\uc774": 36, "\ube48\uacf5\uac04\uc744": 36, "\ubb34\uc2dc\ud574\ubc84\ub9b0\uac83\uc744": 36, "\uac15\uc544\uc9c0\uc640": [36, 45], "\ucef5": 36, "\ucf00\uc774\uc2a4\uc5d0\uc11c": 36, "\uc2e4\ud328\ud558\ub294": [30, 36, 44], "\ube44\uad50\uacb0\uacfc": 36, "latency\uc5d0\uc11c": 36, "\uc5ec\ub7ec\uac00\uc9c0": [36, 49], "\uac00\uc9c4\ubb3c\uccb4\ub97c": 36, "\uc81c\ud55c\uc801\uc774\uae30": 36, "\uc218\uc9d1\ud558\uba74": 36, "\ub098\uc544\uc9c8": 36, "texture\uc758": 36, "\ubb34\uc2dc\ud558\ub294": 36, "\uac1c\uc120\ub420\uc218": 36, "\uae30\uc220\ub4e4\uc744": 36, "\uc735\ud569\ud558\ub294\ub370\uc5d0": 36, "e\ub85c": 36, "\uc218\ub834\ub3c4": 36, "\uac00\ub2a5\ud560": 36, "function\uacf5\uac04\uc5d0\uc11c": 36, "\uc804\uac1c\ud558\uc5ec": 36, "\uc0dd\uc131\ubaa8\ub378\ub4e4\uacfc": 36, "\ud765\ubbf8\ub85c\uc6b4": [36, 44, 47], "\uc788\uc74c\ud655\uc778\ud588\ub2e4": 36, "represention\uc744": 36, "\uc0dd\uc131\ud568\uc5d0": 36, "\uc778\ubb3c\ub4e4\uc5d0": 36, "chatgpt\ub85c": 36, "valuabl": 36, "write": 36, "feedback\uc744": 36, "\ubc1b\uc558\ub2e4\uace0": 36, "\ubd80\ubd84\uc788\uc5c8\ub2e4": 36, "One": [37, 40, 43], "03231": 37, "sty": 37, "lize": 37, "ne": 37, "\ud55c\uc7a5\uc758": 37, "\uc785\ud788\uace0\uc790\ud558\ub294": 37, "\uc9c4\ud589\uc911\uc774\ub2e4": 37, "\uc774\uc804\uae4c\uc9c0\uc758": 37, "\ud55c\uc7a5\uc529\uc744": 37, "\uc2dd\uc774": 37, "\ubc29\uc2dd\uc5d0\ub294": 37, "face\ub97c": 37, "\uc758\uc874\ub3c4\uac00": 37, "\uc785\ud788\uae30": 37, "\ud798\ub4e4\ub2e4": [37, 51], "space\uc548\uc5d0\uc11c": 37, "entangl": [37, 38, 47], "\ub418\uc5b4\uc788\ub2e4": 37, "styo\ub294": 37, "\ud3ec\uc6a9\ud558\ub294": 37, "base\ubaa8\ub378\ub85c": 37, "\ucc44\uc6a9\ud55c\ub2e4": 37, "disentangl": 37, "learner": 37, "idl": 37, "fcc": 37, "idl\ub85c\ubd80\ud130": 37, "\uc6d0\ud558\ub294\ub300\ub85c": 37, "\uc7ac\uc870\ud569": 37, "\uc720\uc9c0\ud558\uae30\uc704\ud574": 37, "\uc7ac\uc0ac\uc6a9\ud558\ub294": 37, "gan\uc774": [37, 40, 48], "\ubd84\uc57c\ub97c": 37, "\uc7a5\uc545\ud558\ub358": 37, "\ub4f1\uc7a5\uc73c\ub85c": [37, 39], "\uc8fc\ubaa9\uc744": [37, 49], "\uc2dc\uc791\ud588\ub2e4": 37, "\uac00\ub2a5\ud574\uc84c\uc9c0\ub9cc": 37, "\ubd80\ubd84\uae4c\uc9c0": 37, "control\ud558\uae30\uc5d0\ub294": 37, "fine\ud55c": 37, "\uc815\ubcf4\uae4c\uc9c0": 37, "\ubcf4\uc774\uba74\uc11c": 37, "stylegan\uc744": 37, "\uc758\uc874\uc131\uc774": 37, "\ucee4": 37, "artist": [37, 41], "\uc785\ud788\ub294\ub370": 37, "\uac1c\uc120\ud55c": 37, "transfer\ub97c": 37, "disentagl": 37, "\ubd84\ub9ac\ud558\ub294": 37, "\ubc18\ub300": 37, "a\uc758": [37, 38], "conext": 37, "\ubc30\uc81c\ud568\uacfc": 37, "\ud3ec\ud568\ud558\uae30\uc704\ud574": 37, "\ubd80\uc815\uc758": 37, "except": 37, "auxiliari": [37, 49], "\uad6c\uc131\ud574": [37, 41], "\uc784\uc758\ub85c": 37, "prompt\uac04": 37, "disentanglement\ub97c": 37, "\uc774\ubbf8\uc9c0\uc5d0\ub294": 37, "\uc774\ubbf8\uc9c0\ub9cc\uc758": 37, "style\uacfc": [37, 38], "\uad6c\ubcc4\ud558\ub294\ub370": 37, "idl\uc758": 37, "\ud559\uc2b5\ub9cc\uc73c\ub85c": 37, "transfer\uac00": 37, "\uac1c\uc120\ud558\uae30\uc704\ud574": 37, "\ub3c4\uc785\ud558\uc600\ub2e4": 37, "idl\ub85c": 37, "\uc870\ud569": 37, "recombin": 37, "\uc720\uc9c0\ud558\ub3c4\ub85d": 37, "\uc8fc\uc785\ud558\uae30\uc704\ud574": 37, "promt": 37, "m\uc758": 37, "layout\uc5d0": 37, "\ubbf8\uce5c\ub2e4": 37, "\uc8fc\uc785\ud569\uc73c\ub85c\uc368": 37, "replace\ud558\uc9c0\uc54a\uace0": 37, "index\ub9cc": 37, "replac": [37, 53], "time\uc5d0\uc11c": 37, "n\ubc88": 37, "\uc0ac\uc6a9\ud568\uc73c\ub85c\uc11c": 37, "\uc2e4\ud5d8\uc0c1": 37, "\uc774\ud558\uc758": [37, 47], "\ucd94\ucc9c": 37, "ak47": 37, "m4a1": 37, "400": 37, "ldm\uacfc": 37, "styo\uac00": 37, "\uc720\uc9c0\ud568\uacfc": 37, "\uc0dd\uc131\ud574\ub0b8\ub2e4": [37, 40], "study\ub3c4": 37, "\ubaa8\ub378\ub4e4\uc5d0": [37, 39], "templat": 37, "\ub123\uace0": 37, "\ud559\uc2b5\ud560\uacbd\uc6b0": 37, "overfitting\uc774": 37, "\uc2ec\ud558\uace0": 37, "\ubd84\ub9ac\uc5d0": 37, "set\uc758": 37, "trick\ub3c4": 37, "\uc801\uc6a9\ud558\ub294\uac83\uc774": 37, "\uc0dd\uc131\ud574\ub0c8\ub2e4": 37, "inference\ud560": 37, "fcc\ub97c": 37, "\ub192\uc544\uc838": 37, "significant\ud55c": 37, "\uc0dd\uc131\ub418\ub294\uac83\uc744": 37, "photorealistic\uc5d0\uc11c": 37, "artistic\ud558\uac8c": 37, "\ubc14\ub00c\uace0": 37, "idl\uacfc": 37, "10\ubd84\uc774": 37, "\uac78\ub9ac\ubbc0\ub85c": 37, "efficiency\uac00": 37, "\ubabb\ud558\ub2e4\ub294": 37, "2019": 38, "1812": 38, "04948": 38, "huangzh13": 38, "stylegan\uc785\ub2c8\ub2e4": 38, "gan\uacfc": 38, "\ubcc0\uacbd\ud568\uc73c\ub85c\uc368": 38, "\uc62c\ub9ac\uace0": 38, "loss\ub098": 38, "\uac1c\uc120\uc5d0": 38, "\ubcf4\ub3c4\ub85d": 38, "\ud558\uc8e0": 38, "\uc81c\uc548\ud558\uc5ec": 38, "\ub192\uc774\uba74\uc11c": 38, "\uac00\ub2a5\ud574\uc84c\uc2b5\ub2c8\ub2e4": 38, "contribution\uc744": [38, 46], "abstract\uc5d0\ub294": 38, "\ubb38\uc7a5\uc774": 38, "lead": [38, 52], "automat": [38, 50], "unsupervis": [38, 44], "freckl": 38, "\uc77c\uc744": 38, "\ubcf4\uc2dc\uba74": [38, 53], "attribute\uc758": 38, "separation\uc774": 38, "\uc598\uae30\ud558\uace0": 38, "stylegan\uc758": 38, "\ubaa9\uc801\uc744": 38, "\uc790\uc2e0\uc774": 38, "\ub9cc\ub4e4\uace0\uc790": 38, "\uc88b\ub354\ub77c\ub3c4": 38, "\uc0ac\uc6a9\uc790\uc758": 38, "\uc758\ub3c4\uc640": 38, "\uc0c1\uad00\uc5c6\ub294": 38, "\ub0b4\ubc49\uc5b4\uc900\ub2e4\uba74": 38, "\uc2e4\uc6a9\uc131\uc774": 38, "\uc88b\ub2e4\uace0": [38, 39, 43, 54, 55], "\uadfc\ub798\uc5d0": 38, "\uc778\uae30\ub97c": 38, "\uc5bb\uc5c8\ub358": 38, "\uc774\uc720\ub3c4": 38, "\ub204\uad6c\ub098": 38, "\uc810\ub3c4": 38, "\ud55c\ubaab\ud588\ub2e4\uace0": 38, "stylegan\uc740": 38, "\ubaa8\ub378\uc774\ub77c\ub294": 38, "\uc758\ubbf8\uc788\ub2e4\uace0": 38, "network\ub294": 38, "4x4\uc5d0\uc11c": 38, "1024x1024\uae4c\uc9c0": 38, "\ub192\uc5ec\uc90d\ub2c8\ub2e4": 38, "gan\ud558\uace0": 38, "\ud2b9\uc774\ud55c": 38, "z\ub97c": 38, "\uac70\uccd0\uc11c": [38, 53], "\uad6c\uc870\uc785\ub2c8\ub2e4": 38, "z\ub294": 38, "distribution\uc5d0\uc11c": [38, 46], "\uc0d8\ud50c\ub9c1\uc73c\ub85c": 38, "\uc5bb\uc2b5\ub2c8\ub2e4": 38, "distribution\uc73c\ub85c": 38, "\ubcf4\ub0b4\ub294": 38, "\ubc30\uc6b0\uac8c": 38, "\ubd84\ud3ec\ub294": 38, "\uc0dd\uae30\uac8c": 38, "\uc8fc\uc5b4\uc838\uc11c": 38, "\ud53c\ubd80\uac00": 38, "\ud76c\uba74\uc11c": 38, "\uc0d8\ud50c\ub4e4\uc774": 38, "\ud574\ubd05\uc2dc\ub2e4": 38, "\ud53c\ubd80\uc0c9\uacfc": 38, "\uba38\ub9ac": 38, "\uae38\uc774\ub77c\ub294": 38, "\uc5bd\ud788\uac8c": 38, "\ubc14\uafc0": 38, "\ud558\ub098\ub3c4": [38, 40], "\uc77c\uc5b4\ub098\uac8c": 38, "gaussian\uc5d0\uc11c": 38, "w\ub97c": 38, "normalization\uc740": 38, "\ucc44\ub110\ub9c8\ub2e4": 38, "\ucde8\ud574\uc8fc\ub294": 38, "normalization\uc5d0": 38, "scale\uc744": [38, 46], "\uacf1\ud574\uc8fc\uace0": 38, "\ub354\ud574\uc8fc\ub294": 38, "transformation\uc73c\ub85c": 38, "w\ub294": 38, "\ubcf4\ub0b4\uc9c0\uac8c": 38, "adain\uc758": 38, "adain\uc740": 38, "\ube14\ub85d\ub9c8\ub2e4": 38, "\uac1c\uc529": 38, "style\uc740": 38, "\uc5f4\uc5ec\ub35f": 38, "adain\uc744": 38, "generator\uc5d0": [38, 40], "localization\uc774\ub77c\ub294": 38, "\ud2b9\uc9d5\uacfc\ub3c4": 38, "localization\uc774\ub780": 38, "\ubc14\uafc8\uc73c\ub85c\uc368": 38, "\ud2b9\uc9d5\ub4e4\uc744": 38, "\uc758\ubbf8\uc785\ub2c8\ub2e4": 38, "map\ub4e4\uc740": 38, "normalization\ub418\uace0": 38, "statistics\ub97c": 38, "convolution\uc5d0": 38, "\uc801\uc6a9\ub418\uace0": 38, "convolution\uc5d0\uc11c": 38, "normalization\uc774": 38, "\uc218\ud589\ub418\uae30": 38, "style\uc774": [24, 38], "\ubd84\ub9ac\ub418\uac8c": 38, "\ud559\uc2b5\ub420": [38, 39], "stylemod": 38, "latent_s": [38, 42], "use_wscal": 38, "lin": 38, "equalizedlinear": 38, "gain": 38, "n_channel": 38, "layerepilogu": 38, "thing": 38, "dlatent_s": 38, "use_nois": 38, "use_pixel_norm": 38, "use_instance_norm": 38, "use_styl": 38, "activation_lay": 38, "noiselay": 38, "pixel_norm": 38, "pixelnormlay": 38, "instance_norm": 38, "instancenorm2d": 38, "top_epi": 38, "ordereddict": 38, "style_mod": 38, "dlatents_in_slic": 38, "b\uc758": 38, "style\ub85c": 38, "\ubcc0\uacbd\ud574\uc11c": 38, "\uc774\ubbf8\uc9c0\ub4e4\uc785\ub2c8\ub2e4": [38, 54], "18\uacf3\uc5d0\uc11c": 38, "\uc0ac\uc6a9\ub418\ub294\ub370": 38, "4\uacf3": 38, "\uadf8\ub2e4\uc74c": 38, "10\uacf3": 38, "\uc815\uc758\ud558\uc600\uc2b5\ub2c8\ub2e4": [38, 43], "\uc717": [38, 44], "\ubd80\ubd84\uc5d0\uc11c\ub294": 38, "\ud3ec\uc988\ub098": 38, "\uc2a4\ud0c0\uc77c\uac19\uc774": 38, "\uac08\uc218\ub85d": 38, "\ud2c0\uc744": 38, "\ubd80\ubd84\ub4e4\uc744": 38, "b\uc5d0\uc11c": [38, 49], "\uac00\uc838\uc654\uc74c\uc744": 38, "\uc548\uc5d0\ub294": 38, "\ubc14\ub014": 38, "\uc8fc\uadfc\uae68": 38, "\uba38\ub9bf\uacb0": 38, "\ud53c\ubd80": 38, "\ub354\ud574\uc9d1\ub2c8\ub2e4": 38, "\uc548\uc5d0\uc11c\ub3c4": 38, "\ub514\ud14c\uc77c\ub4e4\uc740": 38, "deviation\uc744": 38, "\uad6c\ud574\ubd24\uc744": 38, "\uc5bc\uad74\ud615\uacfc": 38, "attribute\ub294": 38, "\ubcc0\ud558\uc9c0\uc54a\uc9c0\ub9cc": 38, "\uba38\ub9ac\uce74\ub77d\uacfc": 38, "\uc0dd\uae40\uc744": 38, "\uc8fc\uc9c0": 38, "\uc5d0\ub9cc": [38, 55], "\uba38\ub9ac\uce74\ub77d\uac19\uc740": 38, "\ub514\ud14c\uc77c\uc774": [38, 53], "\uc0b4\uc544\uc788\uc9c0": 38, "\ub4e4\uc5b4\uac04": 38, "\uba38\ub9ac\uce74\ub77d\uc758": 38, "\ub07c\uce5c\ub2e4\ub294": 38, "localization\uc774": 38, "\ub418\uac8c\ud558\uae30": 38, "mixing\uc774\ub77c\ub294": 38, "\uc55e": 38, "\ucabd": 38, "layer\uc5d0\ub294": 38, "generator\uac00": 38, "\uc778\uc811\ud55c": [38, 43], "style\ub07c\ub9ac": 38, "correlated\ub418\uc5b4\uc788\ub2e4\uace0": 38, "\ub9c9\uc544\uc11c": 38, "localization\uc744": 38, "\ub418\uac8c": 38, "\ubaa9\uc801\uc785\ub2c8\ub2e4": [38, 56], "\uc800\uc790\ub4e4\uc774": [38, 39, 49], "\uc788\uc5c8\ub294\uc9c0": 38, "\ud655\uc778\ud574\ubd05\uc2dc\ub2e4": 38, "\ud45c\uc640": 38, "\ubc29\ubc95\ub4e4\uc744": [38, 54], "fid\uac00": [38, 39, 46], "08466": 39, "\uc774\ubc88\uc5d0": 39, "\ub9ac\ubdf0\ud560": 39, "\uad6c\uae00": [39, 49], "\ub9ac\uc11c\uce58": 39, "\uadf8\ub8f9\uc5d0\uc11c": 39, "tmlr": 39, "transact": 39, "2023\uc5d0": 39, "\uc81c\ucd9c\ud55c": 39, "\ub17c\ubb38\uc778": 39, "\uc18d\ub3c4\ub85c": 39, "\ubc1c\uc804\ud558\uace0": 39, "\uc788\ub294\ub370\uc694": [39, 55], "\uc218\uc900\uc774": 39, "\uc5bc\ub9cc\ud07c": 39, "\uc654\ub294\uc9c0": 39, "\ub370\uc774\ud130\uc778": 39, "\uc815\ub3c4\uac00": 39, "\ub418\uc5c8\ub294\uc9c0": 39, "augment\ub41c": 39, "\uc815\ub3c4\uae4c\uc9c0": 39, "\uc654\ub294\uc9c0\uc5d0": 39, "\uc2e4\ud5d8\uacfc": 39, "\ub2f5\uc744": 39, "\uc81c\uc2dc\ud569\ub2c8\ub2e4": [39, 44, 50, 52, 53, 54], "\uae00\uc758": 39, "\ubaa9\ucc28\ub294": 39, "\ub0b4\uc6a9\uacfc": [39, 51], "\uad6c\uc131\ud558\uc600\uc2b5\ub2c8\ub2e4": 39, "augmentation\uc73c\ub85c": 39, "imagenet\uc5d0": 39, "tuning\ub41c": [24, 39], "\uc0ac\uc6a9\ud558\uc600\uc744": 39, "\uae30\uc220\uc801\uc73c\ub85c": 39, "\uc5c4\uccad": 39, "\uc5c6\ub294\ub370\uc694": 39, "\uc0ac\uc6a9\ud558\ub358": 39, "\ubc29\ubc95\ub4e4\uacfc\ub294": 39, "imagen\uc744": 39, "\uc0c8\ub86d\uc2b5\ub2c8\ub2e4": 39, "\ubc1c\uc804\ub418\uc5c8\uc2b5\ub2c8\ub2e4": 39, "\uc9c8\ubb38\uc774": 39, "\ub2f9\uc5f0\ud558\uace0": 39, "\ucc3e\uace0\uc790": 39, "\uc774\uc57c\uae30": 39, "imagen\uc774": [39, 49], "ca": [39, 54], "\uae38\uc218\ub85d": 39, "\ud5a5\uc0c1\ub418\uc5c8\ub2e4": 39, "\ub370\uc774\ud130\ub85c\ub9cc": 39, "\uc9c4\uc9dc": [39, 44], "\uc815\ud655\ub3c4\uc640": 39, "\uc801\ub2e4\ub294": 39, "\ub354\ud574\uc11c": 39, "\ud559\uc2b5\ud588\uc744": 39, "\ubaa8\ub378\ub4e4\uc5d0\uc11c": 39, "\ud5a5\uc0c1\uc774": 39, "\ud558\ub824\uace0": 39, "\ud588\ub358": 39, "\ubc29\ubc95\ub4e4\uc5d0": 39, "\uc9e7\uac8c": 39, "\ud590\ub824\uace0": 39, "\ucd5c\uadfc\uc5d0\ub294": [24, 39, 53], "\ubcf4\uac15\ud558\ub294\ub370": 39, "\uc0ac\uc6a9\ub418\uae30": 39, "\uc2dc\uc791\ud588\uc2b5\ub2c8\ub2e4": 39, "Is": 39, "readi": 39, "glide\ub85c": 39, "shot\uacfc": 39, "\uc2dc\ucf30\uc73c\uba70": 39, "\uc138\ud2b8\uac00": [39, 44], "100\uc758": 39, "\uc2dc\ucf30\ub2e4\uace0": 39, "\ud3ec\ud568\ud574\uc11c": 39, "\ub17c\ubb38\ub4e4\uc740": 39, "\ud558\uc5ec\ub3c4": 39, "\uc2dc\ud0a4\uc9c0": 39, "\ubabb\ud588\uc2b5\ub2c8\ub2e4": 39, "\uc54a\uc558\uc2b5\ub2c8\ub2e4": [39, 43], "\ub17c\ubb38\ub4e4\uacfc\ub294": 39, "\ub3d9\uc791\ud558\uace0": 39, "\uc6cc\ub099": 39, "\uc4f0\uc5ec\uc11c": 39, "\uc124\uba85\uc740": [39, 44], "\uc0dd\ub7b5\ud558\uace0": 39, "cas\uc5d0": 39, "\uc368\uc838": 39, "\ub0b4\uc6a9\uc73c\ub85c": 39, "\uc18c\uac1c\ud558\uaca0\uc2b5\ub2c8\ub2e4": 39, "cas\ub294": 39, "\ub9cc\ub4e4\uc5b4\ub0b8": 39, "\uc9c0\ud45c\uc785\ub2c8\ub2e4": 39, "\ub9cc\ub4e4\uc5b4\ub0c5\ub2c8\ub2e4": 39, "\ub370\uc774\ud130\ub9cc\uc744": 39, "50\uc744": 39, "\uc2dc\ud0a4\uace0": [30, 39], "cas\uac00": 39, "imagenet\uacfc": 39, "\ube44\uc2b7\ud558\ub2e4\uba74": 39, "\uac00\uc815\uc744": [39, 46, 54, 56], "\uc774\ud574\ud558\uba74": [30, 39], "\uc800\uc790\uc5d0": 39, "\uadf8\ub3d9\uc548": 39, "\uc54a\uc558\ub2e4\uace0": 39, "\uc0d8\ud50c\ub85c\ub9cc": 39, "\ub5a8\uc5b4\uc84c\uace0": 39, "\ub2f9\uc5f0\ud574\ubcf4\uc785\ub2c8\ub2e4": 39, "\ub5a8\uc5b4\uc84c\ub2e4\uace0": 39, "\uc544\ub9c8\ub3c4": 39, "\ud558\uc600\ub294\uc9c0\uc5d0": 39, "\ubaa8\ub378\ub85c\ub294": [39, 48], "\uc0ac\uc6a9\ud558\uc600\uc2b5\ub2c8\ub2e4": [39, 43, 53], "\ud074\ub798\uc2a4\uc640": 39, "\uc9c0\uc5d0": 39, "\uace0\ubbfc\uc774": 39, "\ud544\uc694\ud588\ub2e4\uace0": 39, "\ud558\uc600\ub294\ub370": [30, 39], "imagen\uc5d0\uc11c": 39, "\uc800\ud558": [24, 39], "\ub418\uba74\uc11c": 39, "\ud604\uc0c1\uc77c": 39, "\ub450\ub2e8\uc5b4": 39, "\uc774\ub984\uc73c\ub85c": 39, "\uc774\ubbf8\uc9c0\uace0": 39, "\uc624\ub978\ucabd\uc774": 39, "\uc801\uc6a9\ub418\uc9c0": [39, 56], "imagen\uc785\ub2c8\ub2e4": 39, "\uc544\ub798\uc5d0\uc11c": [39, 44], "\ud074\ub798\uc2a4\uc778": 39, "schipperke\ub97c": 39, "\uc2a4\ud0a4\ud37c\ud0a4\ub77c\ub294": 39, "\ud488\uc885\uc744": 39, "\uc758\ubbf8\ud558\ub294\ub370": 39, "\uaf43\uacfc": 39, "\uc804\ud600": [39, 44], "\uc5c9\ub6b1\ud55c": 39, "\ud588\ub294\uc9c0\ub97c": 39, "\uc6d0\uc73c\ub85c": 39, "imagen\uc5d0\uc11c\ub3c4": 39, "\ubd80\ubd84\uc774\ub77c": 39, "\uc54a\uc558\uace0": 39, "\ucd9c\ub825\uc73c\ub85c": 39, "\uace0\ud574\uc0c1\ub3c4\uc758": [39, 41, 51], "\uc801\uc5b4\uc11c": 39, "210k": 39, "\ud559\uc2b5\ud558\uc600\uace0": [39, 52], "optimizer\uc758": 39, "\uc0ac\uc6a9\ud558\uc600\ub358": [39, 52], "optimizer\ub97c": 39, "490k": 39, "\uc120\ud0dd\uc758": 39, "10k\uac1c\uc758": 39, "\uc0d8\ud50c\ub4e4\uc5d0": 39, "\uacc4\uc0b0\ud588\uc744": 39, "\uc120\ud0dd\ud588\ub2e4\uace0": 39, "\uc815\ud588\ub294\uc9c0\ub97c": 39, "coeffici": [39, 43, 53], "\ubc1b\ub294\ub2e4\uace0": 39, "\uc124\uba85\ud558\uba74": 39, "\ud655\ub960\uc801\uc778": 39, "\ubcf4\uc774\uac8c": 39, "\ub9cc\ub4e4\uba70": 39, "\ucc38\uace0\ud574\uc8fc\uc138\uc694": 39, "\ubd84\ub958\uae30\ub098": 39, "\ubc18\uc601\ud560\uc9c0\ub97c": 39, "\uc758\ubbf8\ud560": 39, "\ud2b9\uc131\uc774\ub098": 39, "\uacc4\uc218\ub97c": 39, "\uc870\uc808\ud568\uc73c\ub85c\uc368": 39, "\ub85c\uadf8": 39, "\uacc4\uc218\ub294": 39, "\uc0ac\uc6a9\ub418\uba70": 39, "\uc124\uc815\ubc95\uc5d0": 39, "\uc124\uba85\ud558\uaca0\uc2b5\ub2c8\ub2e4": [39, 46], "\ud2b9\uc9d5\uacfc": 39, "\ub2e4\uc591\uc131\uc758": 39, "1\ucc28": 39, "sweep\uc73c\ub85c": 39, "\uc0d8\ud50c\ub7ec\ub97c": 39, "50k\uc5d0": 39, "\ud558\uc774\ud37c\ud30c\ub77c\ubbf8\ud130\ub97c": 39, "\ucc3e\uc2b5\ub2c8\ub2e4": 39, "sweep\uc758": 39, "\ud558\uc774\ud37c\ud30c\ub77c\ubbf8\ud130\uc758": 39, "\ubc94\uc704\ub294": 39, "75": [30, 39], "sweep": 39, "fid\ub294": 39, "variance\ub294": 39, "1000\uc774\uc5c8\uc744": 39, "\ub54c\ub77c\uace0": 39, "sweep\uc774": 39, "\ub05d\ub09c": 39, "weight\uc5d0": 39, "sweep\uc744": 39, "\ub54c\uc5d0\ub294": [39, 45], "2m": 39, "guidacn": 39, "cas\ub97c": 39, "\uce21\uc815\ud588\ub2e4\uace0": 39, "\ud558\uc774\ud37c\ud30c\ub77c\ubbf8\ud130\uc5d0": 39, "sweep\uc5d0": 39, "\uacb0\uacfc\uace0": 39, "\uac00\uc6b4\ub370\uc640": 39, "\ub098\ud0c0\ub0b8": 39, "\uc120\ud0dd\ud558\ub294": [39, 45], "range\ub294": 39, "denos": 39, "129": 39, "\ubcc0\uacbd\ud588\uc744": 39, "cas\uc758": 39, "\uadf8\ub798\ud504\ub97c": [39, 51], "\uadf8\ub798\ud504\uc785\ub2c8\ub2e4": 39, "logvar": [39, 56], "coeff\uac00": 39, "3\uc77c": 39, "\ubcf4\uc600\uc73c\uba70": [30, 39], "\ubd84\uc11d\ud574\ubcf4\uc790\uba74": 39, "\ub192\uc544\uc9c0\uc9c0\ub9cc": 39, "score\uc5d0\ub294": 39, "\ubd80\uc815\uc801\uc778": 39, "\uc8fc\uba70": [39, 49], "augmentation\uc774": 39, "0\uc77c": 39, "\ud558\uc774\ud37c\ud30c\ub77c\ubbf8\ud130": 39, "\uc124\uc815\ud55c": 39, "\uac19\ub2e4\uace0": 39, "\ud504\ub85c\ud1a0\ucf5c\uc744": 39, "\ub530\ub790\ub294\uc9c0\uc5d0": 39, "balance\ub97c": 39, "\uc720\uc9c0\ud558\uba70": 39, "\ud569\uc131\ud588\uc73c\uba70": 39, "\uaddc\ubaa8\ub294": 39, "1\ubc30\uc778": 39, "10\ubc30\uc778": 39, "12m": [39, 51], "\ubc94\uc704\ub97c": 39, "\ud569\uc131\ud588\ub2e4\uace0": 39, "\uc9c0\ud45c\uc778": 39, "is\uc758": 39, "\ubd05\ub2c8\ub2e4": 39, "\ud45c\uc5d0\uc11c": 39, "\ud29c\ub2dd\ub41c": 39, "\ubca0\uc774\uc2a4\ubaa8\ub378\ub4e4": 39, "resolution\uacfc": 39, "\ud574\ub2f9\ub418\uc5c8\uc2b5\ub2c8\ub2e4": 39, "\ud655\uc778\ud558\ub294": 39, "5\uc5d0\uc11c": [39, 40], "\uc131\ub2a5\uc774\uace0": 39, "\ube68\uac04\uc0c9": 39, "\uc131\ub2a5\uc785\ub2c8\ub2e4": 39, "\ubca0\uc774\uc2a4\ub77c\uc778": 39, "cdm": 39, "\uadf8\ub9bc\uc774\uba70": 39, "\uac00\uc6b4\ub370\ub294": 39, "\uc624\ub978\ucabd\uc740": 39, "\ubd80\ubd84\ubcf4\ub2e4": 39, "\uc704\ucabd\uc5d0": 39, "\uc704\uce58\ud558\uba74": 39, "\ubca0\uc774\uc2a4\ub77c\uc778\ubcf4\ub2e4": 39, "\ubcf4\uc778\ub2e4\ub294": 39, "2\uc5d0\uc11c\ub3c4": 39, "\uc8fc\ubaa9\ud560": 39, "\ub9cc\ud55c": [39, 42], "resnet50\uc774": 39, "\ub2e4\uc6b4\uc0d8\ud50c\ub9c1": 39, "\ud568\uc5d0\ub3c4": 39, "resolution\ubcf4\ub2e4": 39, "resolution\uc758": 39, "\uc6d4\ub4f1\ud788": [39, 50, 53], "\uc2dc\ucf30\uc744": 39, "cas\uc640": 39, "cas\uc5d0\uc11c\ub294": 39, "resnet50": 39, "\ud655\uc778\ud588\uc9c0\ub9cc": [39, 55], "\uc774\uc678\uc5d0": 39, "\ubaa8\ub378\ub85c\ub3c4": 39, "\ubcf8\ub2e4\ub294": 39, "\ucc28\uc774\uc810\uc774": [39, 44], "\uc0b4\ud3b4\ubcf8": 39, "\ub0ae\uc558\uc9c0\ub9cc": 39, "\ub370\uc774\ud130\ub9cc": 39, "onvnet\uae30\ubc18": 39, "\uc591\uc0c1\uc744": 39, "\ubcf4\uc600\uc2b5\ub2c8\ub2e4": 39, "\uaddc\ubaa8\uc5d0": 39, "50\uc758": 39, "\ubd84\uc11d\ud55c": 39, "8m": 39, "\ub54c\uae4c\uc9c0\ub294": 39, "\uc88b\uc558\uc73c\ub098": 39, "\ub418\uc5c8\uc744": 39, "sclae": 39, "\ub2ec\uc131\ud588\uc2b5\ub2c8\ub2e4": 39, "76": 39, "239": 39, "69": 39, "resnet\uacfc": 39, "accuracy\ub97c": 39, "\uc2dc\ucf30\uc2b5\ub2c8\ub2e4": 39, "\uc0dd\uac01\ud574\ubcfc\ub9cc\ud55c": 39, "\uac70\ub9ac\ub4e4\uc774": 39, "\ud558\ub098\ub294": [39, 52], "256x256\ubcf4\ub2e4": 39, "\ub2e4\uc6b4\uc0d8\ud50c\ub9c1\uc744": 39, "\ud558\ub354\ub77c\ub3c4": 39, "resolution\uc774": [39, 51], "\ub2f4\ub294\ub2e4\ub294": 39, "\uac83\uc77c": 39, "\uc815\ud655\ub3c4\uac00": 39, "\uc99d\uac00\ud588\uc9c0\ub9cc": 39, "\ub370\uc774\ud130\uc5d0\uc11c\ub294": 39, "\uadf8\ub807\uc9c0": [39, 42], "\uc54a\uc558\ub358": [30, 39], "\uace0\ud574\uc0c1\ub3c4\uc5d0": 39, "\uc815\uad50\ud55c": 39, "\ud544\uc694\ud560": [24, 39, 44, 51], "\uc2dc\uc0ac\ud558\uace0": 39, "\ub9ac\ubdf0\ub97c": 39, "\ub9c8\uce58\uaca0\uc2b5\ub2c8\ub2e4": 39, "\ub290\ub080": 39, "\uc0b0\uc5c5\uc5d0\uc11c\ub294": 39, "shortage\ub098": 39, "imbal": 39, "\ubc1c\uc0dd\ud558\ub294\ub370": 39, "\ud574\uacb0\ubc95": 39, "\ud558\ub098\uac00": [39, 46], "\uac19\ub2e4\ub294": 39, "\ub4e4\uc5c8\uc2b5\ub2c8\ub2e4": [24, 39], "\uc0b0\uc5c5\uc5d0\uc11c\ub9cc": 39, "\ud14d\uc2a4\ud2b8\uac00": 39, "\ud569\uc131\ud558\uace0\uc790": 39, "\ud574\uc57c\ud558\ub294": 39, "\uaf64\ub098": 39, "\ubd88\ud3b8\ud560": 39, "\uac19\uc544\uc11c": 39, "\uac16\ub294\uc9c0": 39, "\uc788\uc5c8\uc73c\uba74": 39, "\uc88b\uc558\uc744": [30, 39], "\uac1c\uc778\uc801\uc778": 39, "\uc720\ucd94\ud574\ubcfc": 39, "\uc21c": 39, "\uc788\uc9c0\ub9cc\uc694": 39, "worth": [24, 40], "devocean": 40, "techboarddetail": 40, "id": [30, 40], "164320": 40, "boardtyp": 40, "writer": 40, "searchdata": 40, "sam56903": 40, "subindex": 40, "idlist": 40, "pnwriterid": 40, "kwang": 40, "su": 40, "mun": [40, 44, 57], "5\uc7a5\uc73c\ub85c": 40, "\ucf58\uc149\ud2b8": 40, "\ubf51\uc544\ub0b4\ub294": 40, "\uc790\uc5f0\uc5b4\ub97c": 40, "creation\uc5d0": 40, "\uc804\ub840\uc5c6\ub294": 40, "\uc790\uc720\ub3c4\ub97c": 40, "contept\ub97c": 40, "\uadf8\uac83\uc758": 40, "\ubc14\uafb8\uac70\ub098": 40, "\uc5ed\ud560\uc774": 40, "\uc8fc\uc5b4\uc9c0\uac70\ub098": 40, "\ucc38\uc2e0\ud55c": 40, "\uc7a5\uba74\uc774": 40, "\uadf8\ub824\uc9c0\ub294\uac74": 40, "\ubd88\ubd84\uba85\ud558\ub2e4": 40, "\uc774\uac83\uc744": 40, "\uadf8\ub824\uc918": 40, "\uc774\uac83": 40, "\uac83\uc774\ub0d0\ub294": 40, "\ubb3c\uc74c\uc5d0\ub294": 40, "5\uac1c\ub9cc\uc73c\ub85c": 40, "\uc0ac\ubb3c\uc774\ub098": 40, "\ubb38\uc7a5\uc5d0": [40, 44], "\ub179\uc544\ub4e4\uc5b4\uac00": 40, "\uc774\ub04c\uc5b4": 40, "\ub3c5\uc790\uc801\uc774\uba74\uc11c": 40, "\ucf58\uc149\ud2b8\ub97c": 40, "capture\ud558\uae30": 40, "\ucda9\ubd84\ud558\ub2e4\ub294": 40, "\uc54c\uac8c": 40, "\ub3c4\uc785\ud558\ub294": 40, "\uc77c\uc740": 40, "\ud655\uc7a5\ub41c": 40, "retraining\ud558\ub294": 40, "\uc5c4\uccad\ub098\uac8c": 40, "\uc608\uc81c\uc5d0": 40, "\uce58\uba85\uc801\uc778": [40, 43, 53, 54], "\ub9dd\uac01\uc744": 40, "\ucd08\ub798\ud55c\ub2e4": 40, "figure\uc5d0\uc11c": 40, "\uc9c0\ub098\uba74\uc11c": 40, "508": 40, "701": 40, "set\uc73c\ub85c": [40, 45], "\ubcc0\ud658\ub418\uace0": 40, "\uc790\uccb4": 40, "\ub2e4\uc6b4\uc2a4\ud2b8\ub9bc": 40, "\uc81c\uacf5\ub428": 40, "concept\ub97c": 40, "word\uc778": 40, "\ub2e8\uc5b4\uc640": 40, "\ucc98\ub9ac\ub418\uba70": 40, "\uad6c\uc131\ud558\ub294\ub370": 40, "query\ub294": 40, "\uc758\ub3c4\ud55c\ubc14\uc640": 40, "\uadf8\ub9bc\uc774\ub77c\uace0": 40, "\uc0dd\uc131\ubaa8\ub378": 40, "ldm\uc774": 40, "\uc4f0\uc784": 40, "untouched\ub418\uc5b4": 40, "\ub4e4\uc5b4\uac00\uc9c0": 40, "\uc54a\ub294\ub4ef\ud568": 40, "\uc774\ud574\ub3c4\ub098": 40, "generalization\uc744": 40, "\uc720\uc0ac\ub2e8\uc5b4": 40, "inversion\uc2dc\ucf1c": 40, "\ud504\ub808\uc784\ud654": 40, "\uc8fc\uc5b4\uc9c4\ub2e4": 40, "\uc124\uc815\ud574": [40, 42], "concept\uc778": 40, "found": 40, "palavra": 40, "\ubcf5\uad6c": [40, 44], "segmentation\uc744": 40, "palavra\ub294": 40, "\ucc38\uc870\ud558\ub294": 40, "\uc2dd\ubcc4\ud568": 40, "\uac80\uc0c9\uc744": 40, "\uc124\uba85\ud558\uac70\ub098": 40, "\uc7a5\uba74\uc5d0\uc11c": 40, "\ubd84\ud560\ud558\uae30": 40, "\ubcf4\ub4ef\uc774": 40, "goal": 40, "specifi": 40, "\uc758\uc5ed": 40, "\uc758\ub3c4\ud55c": 40, "\ub9de\ucd98": 40, "embedding\uc73c\ub85c": [24, 40], "\uac00\uc774\ub4dc\ud574\uc11c": 40, "\uc131\uacfc\ubb3c\uc744": 40, "\uc778\ucf54\ub529\ud558\ub294\ub370": 40, "representation\uc5d0": 40, "\ud6c4\ubcf4\uad70\uc744": 40, "\ucc3e\ub294\ub2e4": 40, "understanding\uc744": 40, "\uc0dd\uc131\uc790\uac00": 40, "\uadf8\ub9b0\ub2e4": 40, "inversion\uc5d0\uc11c": 40, "\ucd9c\ucc98": [24, 40, 44], "hyoseok": 40, "entri": 40, "vector\ub85c\ubd80\ud130": 40, "\uc774\uc758": 40, "\uc5ed\uacfc\uc815\uc73c\ub85c\uc368": 40, "inverting\uc2dc\ucf1c": 40, "\uc54c\uc544\uac00\ub294": 40, "\uc0dd\uc131\ubaa8\ub378\ub85c\uc11c": 40, "\ub9d0\ud588\ub4ef\uc774": [40, 44], "\uac74\ub4e4\uc9c0": 40, "\ubb38\uc790\uc5f4\uc758": 40, "\ub2e8\uc5b4\ub294": 40, "\ud1b5\uacfc\ud558\uba70": 40, "dictionary\uc5d0\uc11c": 40, "\ubcc0\ud658\ud568": 40, "\ubca1\ud130\uc5d0": 40, "\uc5f0\uacb0\ub428": 40, "index\uc5d0": 40, "encoder\uc778": 40, "c_\u03b8\uc758": 40, "\uc77c\ubd80\ub85c": 40, "\uc0bc\uc558\uc74c": 40, "\ub098\ud0c0\ub0b4\uae30": 40, "\uc790\ub9ac\ud45c\uc2dc\uc790": 40, "\ubb38\uc790\uc5f4\uc778": 40, "\uc9c0\uc815\ud568": 40, "palavra\ub97c": 40, "\ucd94\uc815\ud568": 40, "\uac1c\uc785\ud574\uc11c": 40, "tokenize\ub41c": 40, "\ubb38\uc790\uc5f4\uacfc": 40, "\ub300\uccb4\ud558\uc5ec": 40, "\ubcf8\uc9c8\uc801\uc73c\ub85c": 40, "\uc5b4\ud718": 40, "\uc8fc\uc785\ud568": 40, "\ubb18\uc0ac\ud568": 40, "v\ub97c": 40, "\ucd5c\uc801\ud654\ud568": 40, "\uace0\uc815\ud558\uae30": 40, "\ud15c\ud50c\ub9bf\uc5d0\uc11c": 40, "\uc911\ub9bd": 40, "rendit": [40, 47], "\uc544\ub9c8": [30, 40, 49], "\uc6d0\ubcf8\uacfc": 40, "\ubaa9\uc801\uc774": 40, "\uc544\ub2d0\uae4c": 40, "\uc2f6\uc74c": 40, "\ubaa9\ud45c\uc2dd\uc740": 40, "loss\ud568\uc218\uc640": 40, "\uc720\uc0ac\ud568": 40, "c\u03b8\uc640": 40, "e\u03b8\ub294": 40, "\ud3ec\ucc29\ud560": 40, "\uc788\uc744\uac83\uc73c\ub85c": 40, "\uae30\ub300\ud568": 40, "\ud3ec\ucc29\ud558\ub294": 40, "guide\uc5d0": 40, "\ub9de\ucdb0\uc11c": 40, "\uc8fc\uc81c\uc5d0": 40, "\ucea1\uc158\ub4e4\uc5d0": 40, "\ucd94\ub860\uc774": 40, "\uac00\ub2a5\ud588\uc74c": 40, "\ub370\uc774\ud130\uc14b\uc73c\ub85c\ub3c4": 40, "\ubcf4\uc874\ud558\uba74\uc11c": 40, "\uc0ac\uc9c4\uc5d0\uc11c\uc640": 40, "\ubc31\uc778": 40, "\ub0a8\uc131": 40, "\uc758\uc0ac\ub97c": 40, "\uadf8\ub824\ub0c8\uc74c": 40, "\ub9ce\uc558\uc74c\uc744": 40, "imageset\uc5d0\uc11c": 40, "\uc778\uc885\uc801": 40, "\uc778\uc2dd\uc744": 40, "embedding\uc758": 40, "y\ucd95": 40, "\ubcf5\uc81c\ud558\ub294\uc9c0": 40, "\uc0dd\uc131\ud558\ubbc0\ub85c": 40, "\ucee8\uc149\uc5d0": 40, "64\uac1c\uc758": 40, "x\ucd95": 40, "\ub09c\uc774\ub3c4\uc640": 40, "\uc124\uc815\uc758": 40, "embedding\uc5d0\uc11c": 40, "similarity\ub97c": 40, "\uc2a4\ucf54\uc5b4\ub294": 40, "capability\uc640": 40, "\uc2e0\ub8b0\ub3c4\ub97c": 40, "\ub530\ub984": 40, "evaluation1": 40, "baseline\uacfc": 40, "set\uc5d0\uc11c": 40, "\ub2ec\uc131\ud558\uace0": 40, "baseline\uc5d0\uc11c": 40, "editablity\uc744": 40, "\ub098\ud0c0\ub0b4\uace0": 40, "word\ub9cc": 40, "\uc815\ud655\ub3c4\ub85c": 40, "\ucea1\ucc98\ud558\ub294\ub370": 40, "tradeoff": 40, "\uace1\uc120\uc758": 40, "outline\uc744": 40, "\uadf8\ub9ac\uba70": 40, "\uc218\uc815\ub420": 40, "target\uc758": 40, "\ucea1\ucc98\ud558\uc9c0\ub294": 40, "\uba40\ub9ac": 40, "\ubc97\uc5b4\ub098\uba74": 40, "editability\uac00": 40, "\uac10\uc18c\ud558\ub294": 40, "reconstruction\uc774": 40, "\ubcc0\uacbd\ud574": 40, "\uace1\uc120\uc744": 40, "tradeoff\uc5d0": 40, "\ubabb\ud558\uba74\uc11c\ub3c4": 40, "\uac10\uc18c\ud568": 40, "\uc124\ubb38\uc9c0": 40, "\uc81c\uacf5\ubc1b\uc558\uace0": 40, "\uc774\ubbf8\uc9c0\uc640\uc758": [40, 47, 53], "\uc720\uc0ac\uc131\uc5d0": 40, "\ub9e4\uae40": 40, "\uc9c8\ubb38\ubcc4\ub85c": 40, "600\uac1c\uc529": 40, "200\uac1c\uc758": 40, "\uc751\ub2f5\uc744": 40, "\uc758\ubbf8\ub860\uc801\uc778": 40, "\ubcf8\uc9c8\uc744": 40, "\ud30c\uc545\ud558\uac70\ub098": 40, "shape\ub97c": 40, "2\uc2dc\uac04\uc774": 40, "\uc18c\uc694\ub428": 40, "\uc124\uc815\uacfc": [40, 43], "\uac1c\uc778\ud654\ub418\uba70": 40, "\uc18c\uac1c\ud568": 40, "word\ub85c": 40, "inverse\ud558\uc5ec": 40, "\uc791\ub3d9\ud568": 40, "word\ub294": 40, "\uac04\ub2e8\ud558\uace0": [24, 40], "\uc27d\ub3c4\ub85d": 40, "interpace\ub97c": 40, "\uc0ac\uc6a9\ud558\uc9c0\ub9cc": [40, 44], "\uc5b8\uc5b4\uc758": 40, "\ud55c\uacc4\uc5d0": 40, "\uc811\uadfc\ud560": 40, "\ub2e8\uc11c\ub97c": 40, "\uc0ac\uc6a9\uac00\ub2a5\ud55c": 40, "\uad6c\ud604\ub428": 40, "\uc758\uc874\ud558\uc9c0": [40, 43, 53], "\uac70\uae30\uc5d0\uc11c": 40, "preserav": 40, "\ud5a5\uc0c1\ub420": 40, "08818": 41, "\uc904\uc774\uae30": [24, 41, 46], "\ubaa8\ub378\ub9c1\uc758": 41, "\ubd80\uc871\ud558\uba70": 41, "\uc774\uc720\uac00": 41, "temproal": 41, "\uc0d8\ud50c\ub4e4": 41, "\ub07c\ub9ac\uc758": 41, "\uc0dd\uc131\ud588\ub2e4": [24, 41, 51], "\uac70\uce58\uac8c": 41, "\uc2dc\ud000\uc2a4\uc758": 41, "\uc2dc\uac04\ucd95\uc5d0": 41, "1280x2048": 41, "\uc778\ucf54\ub529\ud574": 41, "\uc815\ub82c\ud558\uc5ec": 41, "\uc77c\uad00\uc801\uc778": 41, "\uc790\uc728": 41, "\uc8fc\ud589\uc758": 41, "\uc2dc\ubbac\ub808\uc774\uc158": 41, "\uc5d4\uc9c4": 41, "512x1024": 41, "creation": [41, 52], "\uac1c\ubcc4\uc758": 41, "\ub80c\ub354\ub9c1\ud574": 41, "\uc5f0\uad00\ub418\uba70": 41, "\uc815\ub82c\ud560": 41, "\uc778\uc2dd\ud560": 41, "einop": 41, "\uad6c\ud604\ud588\uc73c\uba70": 41, "\ubc30\uce58x\uc2dc\uac04": 41, "\uc778\ucf54\ub529\uc774": 41, "\ubc30\uce58": 41, "ii": 41, "\ud65c\uc6a9\ud588\ub2e4": [24, 41], "\uac00\uc911\ud569\uc744": 41, "\uacb0\ud569\ub41c\ub2e4": [24, 41], "\uc2dc\ud000\uc2a4\ub85c": 41, "flickering\uc774": 41, "\ubc1c\uc0dd\ud558\ub294": [41, 54], "\uad6c\ucd95\ub41c": 41, "patch": 41, "wise": 41, "\uc608\uce21\ud558\uac8c\ub054": 41, "\ud504\ub808\uc784\ub4e4\uc740": 41, "\uc785\ub825\ub41c\ub2e4": 41, "\uc7ac": 41, "\ub3c4\uc785\ud574": 41, "\ud0a4": 41, "\uc81c\uc57d\uc73c\ub85c": 41, "\uc50c\uc6b4\ub2e4": 41, "16t": 41, "\ud574\uc0c1\ub3c4\uae4c\uc9c0": 41, "\uc601\uac10\ubc1b\uc544": 41, "4\ubc30": 41, "\ud0a4\uc6e0\ub2e4": 41, "\uad6c\ucd95\ud558\uae30": 41, "\ub2e8\uc704\ub85c": 41, "\uc5f0\uc0b0\ud558\uace0": 41, "\ubaa8\ub378\ub9c1\uc774": 41, "\uc218\ud589\ub41c\ub2e4": 41, "\uadf8\ub85c": 41, "\uc9c4\ud589\ud558\uae30\uc5d0": 41, "rd": 41, "683": 41, "060": 41, "8\ucd08": 41, "dai": [41, 53, 55], "night": [41, 53], "crowded": 41, "7m": 41, "52k": 41, "hour": [41, 52], "320": [41, 55], "1280": [41, 55], "\uac00\ub2a5\ud574\uc84c\ub2e4": 41, "113": 41, "24fp": 41, "7\ucd08": 41, "30fp": 41, "\uc81c\ud55c\uc801\uc778": 41, "\ud559\uc2b5\ud588\uc9c0\ub9cc": 41, "\uc14b\uacfc": 41, "entirely\ud558\uac8c": 41, "\uc704\ucabd\uc758": 41, "16203": 42, "\ubaa8\ub378\ub85c\ubd80\ud130": 42, "\ub098\uc058\uc9c0": 42, "reason": 42, "\ud6cc\ub96d": 42, "\uc0b4\ud3b4\ubcf4\uae30": 42, "\uc77c\ub2e8": [42, 51], "\ub3d9\ubb3c\uc758": 42, "37\uac1c\uc758": 42, "pet": 42, "\uce58\uc790": 42, "\ud638\ub791\uc774": 42, "\uadf8\ub7fc": 42, "\ud68d\ub4dd\ud560": 42, "\uc218\ud589\ud574\uc11c": 42, "\ud310\ubcc4\ud55c\ub2e4": 42, "\ud074\ub798\uc2a4\uc774\ub2e4": 42, "n_sampl": 42, "\uc9c0\uc815\ub41c": 42, "\uc0d8\ud50c\ub9c1\ud574": 42, "\ud310\ubcc4\uc774": 42, "\ucd9c\ub825\ud55c\ub2e4": 42, "n_trial": 42, "\uc2dc\ub3c4\ud574\uc11c": 42, "\ud3c9\uade0\ub0bc": 42, "\ucd94\ub860\ud55c\ub2e4": 42, "\ud310\uc815\ud55c\ub2e4": 42, "\ucd94\ub860\ud560": 42, "\ub4e4\uc5b4\uc11c": [42, 50], "\ud559\uc2b5\ud558\uc9c0\ub294": 42, "\uc815\uc758\ub418\uc5b4": 42, "\uad6c\ud558\uace0": 42, "\uc18c\ubaa8\ub428": 42, "\uc904\uc778\ub2e4": 42, "\uac78\ub7ec\ub0b8\ub2e4": 42, "\ub0a8\uc558\ub2e4\uba74": 42, "\uc774\uc81c\ub294": 42, "oxford": 42, "iiit": 42, "bash": 42, "python": 42, "eval_prob_adapt": 42, "split": 42, "to_keep": 42, "prompt_path": 42, "pets_prompt": 42, "csv": 42, "\uc774\ub807\uac8c\uae4c\uc9c0": 42, "\uc904\uc774\ub824\uace0": 42, "\uc2a4\ud06c\ub9bd\ud2b8": 42, "rtx": 42, "3090": 42, "\ub3cc\ub9ac\uba74": 42, "\ud558\ub824\uba74": 42, "all_nois": 42, "randn": [42, 47], "max_n_sampl": 42, "eval_error": 42, "ts": 42, "noise_idx": 42, "text_emb": 42, "text_embed_idx": 42, "float32": 42, "pred_error": 42, "cpu": 42, "idx": 42, "inference_mod": 42, "tqdm": 42, "trang": 42, "batch_t": 42, "noised_lat": 42, "alphas_cumprod": 42, "t_input": 42, "float16": 42, "text_input": 42, "noise_pr": [42, 55], "encoder_hidden_st": [42, 47, 55], "mse_loss": [42, 47], "l1_loss": 42, "huber": 42, "huber_loss": 42, "notimplementederror": 42, "\ucd94\ub860\ud558\uac8c": 42, "\ub420\ud150\ub370": 42, "\uc0ac\uc6a9\ud574\uc57c": 42, "\ubcc0\uc218\uc5d0": 42, "\ub2ec\ub77c\uc9c0\uae30": 42, "\ub2ec\ub77c\uc84c\ub2e4": 42, "\uc62c\ub77c\uac00\ub294\uc9c0": 42, "\uc2e4\ud5d8\ud574\ubcf4\uc558\ub2e4": 42, "addit": [24, 30, 42, 46], "\ucd94\ucd9c\ud574\ub0b4\ub294": 42, "\ub6f0\uc5b4\ub0ac\ub2e4": 42, "\uc0dd\uc131\ud574": 42, "\uad6c\ucd95\ud558\uace0": 42, "\ud559\uc2b5\uc2dc\ucf1c\uc11c": 42, "\uc218\ud589\ud55c": 42, "\ucd94\ucd9c\ud574": 42, "\uc804\ub2ec\ud574\uc11c": 42, "\ubaa8\ub378\ubcf4\ub2e4\ub3c4": 42, "\ub192\uc740\uc9c0": 42, "aesthet": [42, 51, 55], "\ud55c\uc9c0": 42, "\ud55c\uc9c0\uc5d0": 42, "filter": [30, 42], "cifar10": 42, "flower": 42, "stl10": 42, "\ud544\ud130\ub9c1\uc774": 42, "\uc548\ub41c": 42, "\uc62c\ub77c\uac08": 42, "winoground": 42, "visio": 42, "linguist": 42, "\ub9e4\uce58\uc2dc\ud0a4\ub294": 42, "\uba85\uc0ac\uc808\ub07c\ub9ac": 42, "\ub4a4\ubc14\ub010": 42, "\ub3d9\uc0ac\ub07c\ub9ac": 42, "\ud615\uc6a9\uc0ac\ub07c\ub9ac": 42, "\ubd80\uc0ac\ub07c\ub9ac": 42, "\ud488\uc0ac\ub07c\ub9ac": 42, "\uc5ec\ub290": 42, "\ub9cc\uc744": 42, "\ud559\uc2b5\ud588\uc74c\uc5d0\ub3c4": 42, "\uc774\uc790": 42, "\ubcc0\ubaa8": 42, "dit": 42, "101": 42, "79": 42, "\uae30\ub85d\ud558\uba70": 42, "\ub2a5\uac00": 42, "\ub2a5\uac00\ud588\ub2e4": 42, "\uacb9\uce58\ub294": 42, "\uc2e0\ub8b0\uad6c\uac04": 42, "\ucc0d\ud600": 42, "\ubaa8\uc591\uc758": 42, "\ud68d\ub4dd\ud55c": 42, "\uae30\ub300\ub418\ub294": 42, "ood": 42, "\ucd94\ucd9c\ud558\ub294": [30, 42], "\uc6b0\uc218\ud568\uc744": 42, "\ub370\uc774\ud130\ub3c4": 42, "\uac1c\uc120\ub420": 42, "\ud65c\uc6a9\ud588\uc74c": 42, "\ub6f0\uc5b4\ub0a0": 42, "01469": 43, "consistency_model": 43, "audio": 43, "\uc654\uc2b5\ub2c8\ub2e4": 43, "2000\ubc30": 43, "\uc5f0\uc0b0\uc791\uc5c5\uc744": 43, "\uc0ac\uc9c4\ucc98\ub7fc": [43, 55], "ordinari": [43, 51], "trajectori": 43, "\ub4e4\uc774": [30, 43, 47, 53], "\ub9e4\ud551\ub418\ub3c4\ub85d": 43, "\ub9cc\uc871\uc2dc\ud0ac": 43, "\ubc29\uc2dd\uc73c\ub85c\ub294": 43, "\ud55c\ubc88\uc758": [43, 51], "\ub9cc\uc73c\ub85c\ub3c4": [43, 51, 53, 54], "\ub450\ubc88\uc9f8": [43, 52, 54], "\uac1c\uc120\ub418\uace0": 43, "\ubaa8\ub378\ub85c\uc11c\ub3c4": 43, "stroke": 43, "\ubcf4\uc5ec\uc900\ub2e4\ub294": 43, "\ud655\uc778\ud558\uc600\uc2b5\ub2c8\ub2e4": 43, "p_t": 43, "solut": [43, 48], "\ubd84\ud3ec\ud569\ub2c8\ub2e4": 43, "\uc218\uc2dd\uc5d0\uc11c": 43, "\uc815\uc758\ud558\uace0": [43, 48, 53, 54], "\ub300\uc785\ud558\uba74": 43, "empir": [43, 51], "\uacfc\uc815\uc73c\ub85c\ub294": 43, "euler": [43, 51, 54], "\uc5ed\ubc29\ud5a5\uc73c\ub85c": 43, "\ud480\uc5b4": 43, "\uadfc\uc0ac\uac12\uc774\ub77c\uace0": [43, 46], "\uba48\ucd94\uace0": 43, "\uadfc\uc0ac\uac12\uc73c\ub85c": 43, "\uac04\uc8fc\ud569\ub2c8\ub2e4": 43, "80": 43, "002": 43, "\uc124\uc815\ud569\ub2c8\ub2e4": [43, 52, 56], "\uc18c\uac1c\ub4dc\ub9b0": [43, 54], "\uc9c4\ud589\ub418\uc5c8\uc9c0\ub9cc": 43, "\ud65c\uc6a9\ud574\ub3c4": 43, "10\ubc88": 43, "\uac70\uccd0\uc57c\ub9cc": 43, "\ubcf4\uc5ec\uc900\ub2e4\uace0": [43, 46, 47, 53, 54], "\uae30\ubc95\ub4e4\uc5d0": 43, "\uc81c\uc678\ud558\uace0\ub294": [43, 53], "\ub300\ub7c9\uc758": 43, "\uc218\uc9d1\ud574\uc57c\ud55c\ub2e4\ub294": 43, "\ub9cc\uc871\ud569\ub2c8\ub2e4": 43, "\uc608\uce21\ud558\uae30": 43, "\ub370\uc774\ud130\ub85c\ubd80\ud130": 43, "bilo": 43, "invert": [43, 53], "\ubd80\uc5ec\ud558\uc9c0\ub294": 43, "boundari": 43, "\ub9cc\uc871\ud558\uae30": 43, "form": 43, "\ub9cc\uc871\uc2dc\ud0a4\ub294": 43, "\uc720\uc0ac\ud558\uc5ec": 43, "leverag": [43, 49], "\ud0dd\ud569\ub2c8\ub2e4": 43, "\ub354\ubd88\uc5b4": [43, 44, 49], "\ud558\ub2e8": [43, 52, 55], "inject": [30, 43, 57], "multistep": 43, "\uc720\uc5f0\uc131\ub3c4": 43, "\ub4e4\uac04\uc758": 43, "\ubcf4\uc644\ud558\uba74\uc11c": 43, "\uc608\uc2dc\ub4e4\uc744": 43, "\uccab\ubc88\uc9f8\ub85c": [24, 43, 53, 56], "\ud06c\ub2e4\uba74": 43, "\uc9c4\ud589\uc2dc\ucf1c": 43, "ts_": 43, "\uc790\uc138\ud558\uac8c\ub294": [43, 47, 52, 53, 54, 55, 56], "2_": 43, "\ub85c\ubd80\ud130\uc758": [43, 44], "\ucd9c\ub825\uac12": 43, "\ub85c\ub294": [43, 54], "\ud655\uc778\ud574\ubcf8": [43, 54], "equiv": 43, "\ud559\uc2b5\ud558\uc600\ub2e4\uace0": 43, "stopgrad": 43, "\uc124\uc815\ud560\ub54c\ubcf4\ub2e4": 43, "\uc548\uc815\uc801\uc73c\ub85c": 43, "\uc131\ub2a5\uc5d0\ub3c4": 43, "\uac1c\uc120\uc774": 43, "\uc808\ucc28\ub294": 43, "\uc815\ub9ac\ud560": [43, 51], "run": 43, "\uc218\ub834\ud560": 43, "\ub458\uc740": 43, "\uc77c\uce58\ud558\uac8c": 43, "onlin": 43, "\uc758\uc874\ud588\ub2e4\uba74": 43, "\uadfc\uc0ac\ud560": [43, 51], "\uc0ac\uc2e4\uc744": 43, "\uc788\uc2b5\ub2e4": 43, "t_nz": 43, "\uc815\uc758\ud558\uac8c": [43, 54, 55, 56], "\ubc30\uacbd\uc740": 43, "\ube44\uad50\ud588\uc744\ub54c": [43, 47], "\uc99d\uac00\ud558\uac8c": 43, "converg": [43, 48, 49, 54], "\uc774\ub974\ub294\ub370": 43, "\uc6a9\uc774\ud569\ub2c8\ub2e4": 43, "\uac10\uc18c\ud558\uac8c": 43, "\ubc14\ub78c\uc9c1\ud558\ub2e4\uace0": 43, "\uc2e4\ud5d8\ud558\uc600\uace0": 43, "\uc9c0\ud45c\ub294": [43, 51, 54], "\ub370\uc774\ud130\uc14b\uc5d0\ub294": [43, 54], "ncsn": 43, "\uc9c4\ud589\ud558\uc600\uc2b5\ub2c8\ub2e4": [43, 53], "\uc88b\uc558\uace0": 43, "\ub4e4": [30, 43], "\uce21\uc815\ud558\ub294\ub370": 43, "\ud2b9\ud654\ub418\uc5b4": 43, "\uc124\uc815\ud588\uc744\ub54c": 43, "\uc88b\uc558\uc2b5\ub2c8\ub2e4": 43, "\ube68\ub9ac": 43, "\uc218\ub834\ud558\uc9c0\ub9cc": 43, "vice": [43, 46], "versa": [43, 46], "\uc810\ucc28\uc801\uc73c\ub85c": [43, 54], "\uc99d\uac00\uc2dc\ud0a4\uba74\uc11c": 43, "\ubcc0\ud654\uc2dc\ucf30\uc744\ub54c": 43, "pd": [43, 51], "\uacac\uc904\ub9cc\ud55c": 43, "\uc0dd\uc131\ud568\uc73c\ub85c\uc368": 43, "\uc131\uc9c8\ub3c4": 43, "unpair": 44, "1703": 44, "10593": 44, "tensorflow": 44, "\ub17c\ubb38\ub9ac\ubdf0": 44, "cyclegan\uc744": 44, "\ud55c\uad6d\uc778\uc774\ub77c\uace0": 44, "\ub72f\uc5b4\ubcf4\uae30": 44, "kwangsu": [44, 57], "changhwan": [44, 48, 56, 57], "\ub3c4\uba54\uc778\uc744": 44, "\ub3c4\uba54\uc778\uc73c\ub85c": 44, "\ubcc0\ud658\uc2dc\ud0a4\ub294": 44, "input\uacfc": 44, "\uc9dd\uc774": 44, "\uc9c0\uc5b4\uc9c4": 44, "\ud559\uc2b5\ud558\uc9c0\ub9cc": 44, "\uc5b4\ub835\uc2b5\ub2c8\ub2e4": [44, 56], "\uc9dd\uc9c0\uc5b4\uc9c4": 44, "\ubc14\uafb8\ub294": [44, 47], "\uad6c\ubd84\uc774": 44, "\ubd88\uac00\ub2a5\ud558\ub3c4\ub85d": 44, "\ub85c\uc758": 44, "\uac00\ud574\uc11c": 44, "\uac15\uc81c\ud558\uae30": 44, "\uc5ed\ubc29\ud5a5": 44, "\uc9c4\ud589\ud569\ub2c8\ub2e4": [44, 48, 52], "\uc720\uc0ac\ud574\uc9c0\ub3c4\ub85d": 44, "\uac15\uc81c\ud558\ub294": 44, "\ub3c4\uc785\ud588\uc2b5\ub2c8\ub2e4": 44, "transfigur": 44, "season": 44, "\ubcf4\uc5ec\uc92c\ub2e4\uace0": 44, "\ub123\uc73c\uba74": 44, "\ucc38\uc870\ud558\uba74": 44, "\ud559\uc2b5\ud574\uc11c": 44, "\ub098\uc624\ub3c4\ub85d": 44, "\ucc98\uc74c\uc758": 44, "\uc6d0\ubcf8\uc73c\ub85c": 44, "\uc21c\ud658": 44, "\uc0ac\uc774\ud074": 44, "\uc548\uc815\uc801\uc774\uac8c": 44, "gram": 44, "\uc77c\uce58\ub97c": 44, "\uadf8\ub9bc\uc73c\ub85c": 44, "\ubcc0\ud658\ud55c\ub2e4\uac70\ub098": 44, "\ub0ae\uc5d0": 44, "\ubc24\uc5d0": 44, "\ud754\ud788": [24, 44], "\uc788\uc5c8\ub294\ub370\uc694": 44, "\ube44\uc2fc": 44, "\uc77c\uc774": 44, "\uc77c\ub300\uc77c\ub85c": 44, "\uc9dd\uc9c0\uc5b4\uc9c0\uc9c0": 44, "\ubaa8\uc74c\uc758": 44, "\ucea1\uccd0\ud558\uace0": 44, "\ubaa8\uc74c\uc73c\ub85c": 44, "\uc81c\uacf5\ub418\uace0": 44, "\uc5c6\ub3c4\ub85d": 44, "\ubb34\uc870\uac74": 44, "\uc774\ub8ec\ub2e4\ub294": 44, "\ub73b\ud558\uc9c0\ub294": 44, "\ubb34\ud55c\ud55c": 44, "\uc77c\uc5b4\ub098\uae30\ub3c4": 44, "dl": 44, "blogspot": 44, "\ub4e0": 44, "\ub9e4\ud551\ud558\uba74\uc11c": 44, "\ud604\uc0c1\uc785\ub2c8\ub2e4": 44, "\ud604\uc0c1\uc740": [44, 53], "\uc785\uc7a5\uc5d0\uc11c": 44, "\uc0ac\uc9c4\uc774": [44, 46, 52], "\uac00\uc9dc\uc778": 44, "\uad6c\ubcc4\ud558\ub294": 44, "\uc18d\uc774\uae30\ub9cc": 44, "\uc6b0\ub9ac\uc758": 44, "\ubaa9\uc801\uacfc": 44, "\uc0c1\uad00\uc774": 44, "\ub9cc\ub4e4\ub354\ub77c\ub3c4": 44, "\uc54a\uc544\uc11c": 44, "\ubc1c\uc0dd\ud569\ub2c8\ub2e4": [44, 48, 50], "\uc774\uc288\ub85c": 44, "\ud544\uc694\ud574\uc84c\uc2b5\ub2c8\ub2e4": 44, "\uc601\uc5b4": 44, "\ud504\ub791\uc2a4\uc5b4": 44, "\uc601\uc5b4\ub85c": 44, "\ubc88\uc5ed\ud588\uc744": 44, "\ub3c4\ub2ec\ud558\ub294": 44, "\uac19\uc544\uc57c": 44, "\uc758\ubbf8\uc758": 44, "\uc774\uc6a9\ud569\ub2c8\ub2e4": 44, "\ubaa9\uc801\uc2dd\uc744": 44, "\uc815\ubc29\ud5a5": 44, "\ub3c4\uc2dd\ud654": 44, "\uc6a9\uc5b4": 44, "\ud45c\uae30": 44, "d_x": [44, 53], "d_y": [44, 53], "\uad6c\ubd84\ud558\uace0": 44, "\ubaa9\uc801\uc2dd\uc73c\ub85c": 44, "\uc77c\uce58\uc2dc\ud0a4\uae30": [44, 54], "\ubaa8\uc21c\ub418\ub294": 44, "\ubaa9\uc801\uc2dd\uc740": 44, "\uac08": [44, 46], "\uc218\uc2dd\uc774": 44, "\ub098\uc635\ub2c8\ub2e4": 44, "\ub123\uc740": [24, 44], "\uc81c\ud55c\uc744": 44, "\uc608\ube44": 44, "\ub300\uccb4\ud574\ubd24\ub294\ub370": 44, "\uad00\ucc30\ud560": 44, "\uc640\uc758": [44, 53], "\uc0c1\ub300\uc801": 44, "\uc911\uc694\ub3c4\uc5d0": 44, "\uacb0\uc815\ub429\ub2c8\ub2e4": 44, "\ub85c\uc11c": 44, "\ubcf4\uc5ec\uc900": [44, 55], "1603": 44, "08155": 44, "\ucc44\ud0dd\ud569\ub2c8\ub2e4": 44, "sever": 44, "fraction": 44, "\uc548\uc815\ud654\uc2dc\ud0a4\uae30": 44, "\ud14c\ud06c\ub2c9\uc744": [44, 51], "50\uac1c\ub97c": 44, "\uc800\uc7a5\ud574": 44, "\ud55c\uaebc\ubc88\uc5d0": 44, "\uc9c4\ub3d9\uc744": 44, "sjinu": 44, "ysbsb": 44, "lsgan": 44, "\ucc38\uace0\ud588\uc73c\uba70": 44, "\uc5c5\ub370\uc774\ud2b8\uc2dc": 44, "\uc774\ubcf4\ub2e4": 44, "\uace0\ucc28\uc6d0\uc774\uc9c0\ub9cc": 44, "\uac04\ub7b5\ud788": [44, 54], "2\ucc28\uc6d0\uc744": 44, "\ud45c\ubc29\ud558\uba74": 44, "\uacb0\uc815\uacbd\uacc4\ub97c": 44, "\ucabd\uc774": 44, "\uac00\uc9dc": [44, 48], "\uc601\uc5ed\uc785\ub2c8\ub2e4": 44, "\uc544\ub798\uc5d0": 44, "\uc0ac\uc6a9\ud55c\ub2e4\uba74": 44, "\uc785\uc7a5\uc5d0\uc11c\ub294": 44, "\uc18d\uc774\uace0": 44, "\uc5c6\uac8c": [24, 44], "vanish": [44, 48], "\uc77c\uc5b4\ub098\uae30": [44, 53], "\uc18d\uc778\ub2e4\ub294": 44, "\uc774\uc720\ub9cc\uc73c\ub85c": 44, "\uc2e4\ud5d8\uc5d0": 44, "\ub3d9\uc548\uc5d0\ub294": 44, "0002": 44, "\uc124\uc815\ud588\uace0": 44, "\uc870\uae08\uc2dd": 44, "\uc218\ub834\ud558\uac8c": 44, "\ud558\uc600\uc2b5\ub2c8\ub2e4": 44, "cogan": 44, "simgan": 44, "\ucc38\uac00\uc790\ub4e4\uc740": 44, "\uc0ac\uc9c4\uc774\ubbf8\uc9c0": 44, "\uac00\uc9dc\uc774\ubbf8\uc9c0\uc5d0": 44, "\ub178\ucd9c\ub41c": 44, "\uc9c4\uc9dc\ub77c\uace0": 44, "\uc0dd\uac01\ub418\ub294": 44, "\uc120\ud0dd\ud558\uac8c": 44, "fcn": 44, "\ud14c\uc2a4\ud2b8\uc5d0": 44, "\uae30\uc900\uc784\uc5d0\ub3c4": 44, "\uc2e4\ud5d8\uc774": 44, "\uc591\uc801\uc778": 44, "\uae30\uc900\uc744": 44, "\uc0ac\uc9c4\uc5d0": 44, "\ub808\uc774\ube14": 44, "\ub9f5\uc744": 44, "\ub9f5\uc740": 44, "\ub3c4\ub85c": 44, "\uc790\ub3d9\ucc28": 44, "\uac10\uc9c0\ud558\uba74": 44, "\uc131\uacf5\ud55c": 44, "cityscap": 44, "aginst": 44, "\uc88b\uc744": 44, "\uc9c0\ub3c4\uc5d0\uc11c": 44, "\ud56d\uacf5": 44, "\uc0ac\uc9c4\uc5d0\uc11c": 44, "\uacb0\uacfc\uc5d0\uc11c": 44, "\ucc38\uac00\uc790\ub97c": 44, "\uc18d\uc77c": 44, "\uc5c6\uc5c8\uc2b5\ub2c8\ub2e4": 44, "\ub3c4\uc2dc": 44, "\ud48d\uacbd\uc5d0": 44, "\ub2a5\uac00\ud569\ub2c8\ub2e4": 44, "\uc800\ud558\ub418\ub294": [44, 50, 55], "\ud55c\ucabd": 44, "\ub3cc\ub838\uc744": 44, "\uc720\ubc1c\ud558\ub294": 44, "reconctruct": 44, "\uc608\uc2dc\ub4e4\uc785\ub2c8\ub2e4": 44, "\ub3c4\uba54\uc778\uc774": 44, "\uc7ac\uad6c\uc131\ub41c": 44, "\ub9ce\uc558\uc2b5\ub2c8\ub2e4": 44, "cmp": 44, "facad": 44, "databas": [44, 48], "\uac74\ucd95": 44, "ut": 44, "zapoos50k": 44, "\uc2e0\ubc1c": 44, "\ube44\ub86f\ud558\uc5ec": 44, "shallow": 44, "\uc595\uc740": 44, "\ucd08\uc810": 44, "\ucd08\uc810\uc774": 44, "\ub9de\uc740": 44, "\ud750\ub9bf\ud558\uac8c": 44, "\uc791\ud488": 44, "\uad6c\ubaa9\ud558\uace0\uc790": 44, "\uac15\uc870\ud558\uae30": 44, "domain\uc740": 44, "\uc2a4\ub9c8\ud2b8\ud3f0\uc758": 44, "\uc870\ub9ac\uac1c\ub85c": 44, "target\uc740": 44, "\uc870\ub9ac\uac1c\uac00": 44, "discuss": 44, "\uc544\ub2c8\uc5c8\uc2b5\ub2c8\ub2e4": 44, "\ubcc0\ud654\ub9cc": 44, "\ud615\uccb4\uac00": 44, "\uc560\ub9e4\ud574\uc9c4": 44, "\ucf54": 44, "\uc785": 44, "\uad6c\uc870\uc5d0": [24, 44], "\uad6c\ud604\ud558\ub294\ub370": 44, "\ub9d0": 44, "\uc5bc\ub8e9\ub9d0": 44, "\uc608\uc81c\uc758": 44, "\ud0c0\ub294": 44, "\ub9ce\uc558\ub294\ub370": 44, "\uc5bc\ub8e9\ub9d0\uc758": 44, "\uc5c6\ub2e4\ubcf4\ub2c8": 44, "\ubc30\uacbd\ub3c4": 44, "\uc5bc\ub8e9": 44, "\uadf8\ub9ac\uac70\ub098": 44, "\uc5bc\ub8e9\ub9d0\uc5d0\uc11c": 44, "\ub178\ub797\uac8c": 44, "\uce60\ud55c": 44, "\ub098\ubb34\uc640": 44, "\uac74\ubb3c\uc758": 44, "\ubaa8\ud638\uc131\uc744": 44, "\ud574\uacb0\ud558\ub824\uba74": 44, "\ub298\ub9ac\ub294\ub370": 44, "\uae30\uc5ec\ud569\ub2c8\ub2e4": 44, "12092": 45, "unoffici": 45, "donggeun": [45, 46, 49, 57], "sean": [45, 46, 49, 57], "ko": [45, 46, 49, 57], "\ubaa8\ub378\uc774\uba70": 45, "120\uc5b5\uac1c": 45, "5\uc5b5": 45, "\ud1b5\ud558\uc5ec": 45, "2021\ub144": 45, "diverse\ud55c": 45, "3\uc640": 45, "transformer\uc744": 45, "architecture\uc744": [45, 46], "model\uba70": 45, "\uc218\ub294": 45, "\ubd80\ubd84\ub9cc": [45, 46], "1750\uc5b5": 45, "\uac1c\uc218\uc758": 45, "2005": 45, "14165": 45, "jalammar": 45, "categor": 45, "\uac16\ub294\ub2e4\uace0": 45, "cnn": 45, "d\ucc28\uc6d0\uc758": 45, "\uadf8\ub9ac\ub4dc\ub85c": 45, "\ud835\udc52_1": 45, "\ud835\udc52_\ud835\udc58": 45, "code\ub85c": 45, "e_j": 45, "\ucc3e\uc544\uc11c": 45, "\ubd80\uc5ec\ud568": 45, "p2yeong": 45, "explain": 45, "pixel\uc744": 45, "\uc774\ubbf8\uc9c0\uc77c\uc218\ub85d": 45, "\uba54\ubaa8\ub9ac\ub7c9\uc774": 45, "\ud544\uc694\ud574\uc11c": 45, "short": 45, "model\ub4e4": 45, "dependency\ub97c": 45, "\uac83\uc774\uba70": 45, "detail\uc5d0": 45, "\uc9d1\uc911\ud558\uac8c": 45, "recognizable\ud574\uc11c": 45, "\uadf9\ubcf5\ud558\uace0\uc790": 45, "192\uac1c\uc758": 45, "\ubc30\uc815": 45, "size\ub97c": 45, "\ub4e4\uacfc": [45, 49, 54], "\uc5f0\uc18d\uc801\uc73c\ub85c": 45, "\uc785\ub825\ud568": 45, "\uc2dc\uac01\ud654": [45, 46], "jiho": 45, "ml": [45, 56], "weekli": 45, "nlp": 45, "40": 45, "cqom0r2kmvi": 45, "1729": 45, "\ud835\udc5e": 45, "\u03c6": 45, "dvae": 45, "token\ub97c": 45, "\ud835\udf03": 45, "token\uc5d0\uc11c": 45, "decoder\uc5d0\uc11c": 45, "\u03c8": 45, "purpl": 45, "text\uc640": [45, 51], "token\ub4e4\uc758": 45, "\ud835\udc5e_\u03c6": 45, "\ud835\udc5d_\ud835\udf03": 45, "elb": 45, "bound\ub97c": 45, "elb\ub97c": 45, "continuous\ub97c": 45, "\ubc14\uafd4\uc57c": 45, "\ud559\uc2b5\uc2dc\uc5d0\ub294": 45, "argmax\ub97c": 45, "\uc778\ub371\uc2a4\ub97c": 45, "argmax": 45, "gumbel": 45, "g_i": 45, "e_i": [45, 53], "relaxation\ub97c": 45, "temperatur": 45, "relaxation\uc744": 45, "tight\ud558\uac8c": 45, "\uc7a1\uc544\uc90c": 45, "120\uc5b5\uac1c\uc758": 45, "logit\uc5d0\uc11c": 45, "\uc18c\ubb38\uc790\ud654": 45, "384": 45, "vocabulary\ub97c": 45, "\ud55c\ubc88\uc5d0": 45, "row": 45, "column": 45, "n\uac1c\ub294": 45, "n\uac1c": 45, "\uace8\ub77c\uc11c": 45, "\uace0\ub974\uae30": 45, "\ubc88\uc9f8\ub85c": 45, "\uc120\ud0dd\ud568": 45, "best\ub97c": 45, "\uace0\ub97c\ub54c": 45, "prompt\ub791": 45, "\ub098\uc634": [45, 46], "score\uc774": 45, "\uc54c\ub9de\uc740": 45, "df": 45, "five": 45, "vote": 45, "\ucc28\uc774\ub85c": 45, "\ud22c\ud45c": 45, "\ubc1b\uc558\uc74c": 45, "\ub0ae\uc744\uc218\ub85d": [45, 46], "\uc88b\uc73c\uba70": 45, "\ub192\uc744\uc218\ub85d": [45, 46], "\ub791": 45, "cub": 45, "\uc0c8": 45, "coco\uc5d0\uc11c\ub294": 45, "\ubcf4\uc5ec\uc92c\uc74c": 45, "cub\uc5d0\uc11c\ub294": 45, "\ucc0d\uc9c0": 45, "\ubabb\ud558\uc600\uace0": 45, "score\uc5d0\uc11c\ub294": 45, "cub\uc5d0": 45, "\uacc4\uc120\uc744": 45, "\uc0dd\uac01\ud568": 45, "\uacb0\uacfc\uac12": 45, "parameter\uacfc": 45, "\ub6f0\uc5b4\ub098\uac8c": 45, "\uc900\uc218\ud55c": 45, "\uc2f6\uc740": 45, "\ud3ec\ud568\ub418\uba74": 45, "\uacaa\uc74c": 45, "\uace0\uc2b4\ub3c4\uce58\uac00": 45, "2\ub9c8\ub9ac\uac70\ub098": 45, "\uace0\uc2b4\ub3c4\uce58": 45, "\ud06c\ub9ac\uc2a4\ub9c8\uc2a4": 45, "\uc2a4\uc6e8\ud130\ub97c": 45, "\uc785\uace0": 45, "\uc544\uc26c\uc6b4": 45, "limitation\uc744": 45, "2105": 46, "05233": 46, "\ub6f0\uc5b4\ub118\uc74c": 46, "\ubd80\ubd84\uc5d0\uc11c\ub3c4": 46, "\uc8fc\uc7a5\ud568": 46, "diversity\uc640": 46, "fidelity\uc758": 46, "off\uc5d0": 46, "model\ub4e4\uc774\uba70": 46, "\uc0dd\uc131\ud574\ub0b4\ub294\ub370\uc5d0": 46, "\uc131\uacf5": 46, "deep\uc5d0": 46, "\ub0ae\uc73c\uba70": 46, "\uac1c\uc120\uc0ac\ud56d\uc774": 46, "model\ub4e4\uc758": 46, "\ub04c\uc5b4\uc62c\ub9ac\uba70": 46, "\ub0ae\ucd94\uaca0\ub2e4\uace0": 46, "\uc124\uba85\ub418\uc788\uc73c\ubbc0\ub85c": 46, "\ub17c\ubb38\ub4e4\uc758": 46, "\uac00\uc815\ud558\uba70": 46, "\ubd88\uac00\ub2a5\ud55c": 46, "\ub9e4\uac1c\ubcc0\uc218\ub85c": 46, "\uc124\uc815\ub418\uba70": 46, "ddpm\uc5d0\uc120": 46, "\uc9c0\ud45c\uac00": 46, "\ub0ae\uc558\ub2e4": 46, "scheduling\uc744": 46, "\uc8fc\uc7a5\ud588\ub2e4": 46, "\ub04a\uace0": 46, "\ubc14\uafc8": 46, "iteration\uc73c\ub85c": 46, "\ucc44\ud0dd\ud588\uc9c0\ub9cc": 46, "parameter\uc744": 46, "\ucc44\ud0dd\ud568": 46, "\uc77c\uc815\ud558\uac8c": 46, "\uac00\uc838\uac00\uba74\uc11c": 46, "\ubcf4\uae30": 46, "\uc2dc\ucf1c\ubcf4\uae30": 46, "head\uc5d0": 46, "8x8": 46, "\ud574\ubcf4\uae30": 46, "\uc77c\ubc18": 46, "block\uc774": 46, "biggan\uc758": 46, "\ucc44\ud0dd\ud558\uc5ec": [24, 46], "32\uc77c\ub54c": 46, "\ub0ae\ub2e4": 46, "160": 46, "block\ub9c8\ub2e4": 46, "adain\uc774\ub791": 46, "adagn": 46, "\uc18c\uac1c\ud588\ub2e4": 46, "\ubc29\ubc95\ub860\uc778\uc9c0\ub294": 46, "normalization\uc744": 46, "adpative\ud558\uac8c": 46, "embedding\uacfc": 46, "adain": 46, "\uacf1\ud558\uace0": 46, "\ub354\ud568": 46, "y_b": 46, "adagn\uc758": 46, "adagn\uacfc": 46, "additon": 46, "normalization\ubcf4\ub2e4": 46, "layer\uc744": 46, "\uc0ac\uc6a9\ud588\ub294\ub370": 46, "\uc8fc": 46, "de": 46, "y\ub97c": 46, "\uc90c\uc73c\ub85c\uc368": 46, "zp_": 46, "\uc0c1\uc218": 46, "log_": 46, "\uace1\ub960\uc774": 46, "\ubb34\ud55c\uc73c\ub85c": 46, "rightarrow0": 46, "\ud14c\uc77c\ub7ec": 46, "\uae09\uc218\ub97c": 46, "\uc7ac\uc804\uac1c": 46, "\uc720\ub3c4\ub294": 46, "\ubcf8\ubb38\uc758": 46, "\ubc88\uc2dd\uc774\ubbc0\ub85c": 46, "\ub611\uac19\uc774": 46, "sample\ud55c\ub2e4": 46, "output\uacfc": 46, "gradient\uc758": 46, "\ube7c": 46, "score\uc744": 46, "scaling\uc758": 46, "classifier\uac00": 46, "scaling\uc774": 46, "\uc8fc\uba74": 46, "\uc6f0\uc2dc\ucf54\uae30\ub77c\ub294": 46, "\uc6f0\uc2dc\ucf54\uae30\uc2a4\ub7ec\uc6b4": 46, "\uac15\uc544\uc9c0\uac00": 46, "\ub418\uc9c0\ub294": 46, "\uc6f0\uc2dc\ucf54\uae30": 46, "class\ub77c\ub294": 46, "\ubd84\uc704\uae30\uc758": 46, "\uac15\uc544\uc9c0\uc758": 46, "epsilon\uc774\ub77c\ub294": 46, "\ubc1b\ub294\uc9c0": 46, "sampling\ud560": 46, "scale\uc774": 46, "recall\uc740": 46, "\ub0ae\uc9c0\ub9cc": 46, "\uc0dd\uae30\ub294\ub370": 46, "recall\uc774": 46, "diveristy\uac00": 46, "\ub0ae\ub2e4\ub294": [46, 56], "\ub192\ub2e4\ub294": 46, "\ub73b\uc774\ub2e4": 46, "\ub192\uc77c\uc218\ub85d": 46, "label\ucabd\uc73c\ub85c": 46, "guide\uac00": 46, "\uc0dd\uae30\ubbc0\ub85c": 46, "\uc77c\uc815\ud55c": 46, "sfid\ub294": 46, "\ub3c4\ucd9c\ub418\ub294": 46, "\uac12\uc774\ubbc0\ub85c": 46, "\uc9c0\uc810\uc5d0\uc11c": 46, "\ub098\uc654\ub2e4": 46, "adm\uc740": 46, "\uc57d\uc790\uc774\uba70": 46, "g\ub294": 46, "guidance\uc758": 46, "\uc57d\uc790\uc774\ub2e4": 46, "\uc8fc\uc5c8\uc744": 46, "fid\uac12\uc774": [46, 49], "\ub098\uc654\uc73c\uba70": 46, "\ub450\ubc88\uca30": 46, "\ud50c\ub77c\ubc0d\uace0": 46, "\ubcfc\ub54c": 46, "biggan\uc740": 46, "\uc774\ubbf8\uc9c0\uac04\ub4e4\uc758": 46, "\ud50c\ub77c\ubc0d\uace0\uac00": 46, "\ub290\ub08c\uc758": 46, "\ubf51\uc544\ub0b8\ub2e4": [24, 46], "\ub2e4\ucc44\ub85c\uc6b4": 46, "\ud55c\ub9c8\ub9ac\ub9cc": 46, "\uc0ac\uc9c4\ub3c4": 46, "\ubc95\uc744": 46, "label\uc774": 46, "data\uc5d0\ub294": 46, "\ud655\uc7a5\uc774": 46, "\ubd88\uac00\ub2a5\ud558\ub2e4": [46, 51], "unlabel": 46, "cluster": 46, "\ud558\ub824": 46, "12242": 47, "\ucd5c\uadfc\uc5d0": [30, 47, 48, 49], "\ub4f1\uc7a5\ud558\uc600\uc9c0\ub9cc": 47, "\uba74\ub4e4\uc744": 47, "\uac1c\uc120\ud558\uae30": 47, "\uc18c\uac1c\ub418\uc5c8\uace0": 47, "5\uc7a5\uc758": 47, "\uc815\ub3c4\ubc16\uc5d0": 47, "\uc18c\uc694\ub418\uc9c0": 47, "\uc54a\ub294\ub2e4\uace0": 47, "\uc54c\uc544\ubcf4\uae30": 47, "\uc815\ub9ac\ub97c": 47, "\uc785\ub825\ubc1b\uc544\uc11c": 47, "\uc218\uc2dd\uc801\uc73c\ub85c": [47, 54, 56], "alpha_tx": 47, "\ub54c\ub85c\ub294": 47, "\uace0\uc815\uc2dc\ud0a8\ub2e4\uace0": 47, "\uc55e\uc368": [47, 50, 54, 55], "\uc124\uba85\ub4dc\ub838\ub358": 47, "\ub0b4\uc6a9\ub4e4\uc744": 47, "blob": 47, "text_encoder_cl": 47, "import_model_class_from_model_name_or_path": 47, "noise_schedul": 47, "ddpmschedul": 47, "from_pretrain": [47, 53], "subfold": [47, 53], "text_encod": [47, 53], "autoencoderkl": [47, 53], "unet2dconditionmodel": [47, 53], "first_epoch": 47, "num_train_epoch": 47, "train_dataload": 47, "until": 47, "reach": 47, "resum": 47, "resume_from_checkpoint": 47, "resume_step": 47, "progress_bar": [47, 55], "pixel_valu": 47, "weight_dtyp": 47, "latent_dist": 47, "scaling_factor": 47, "offset_nois": 47, "bsz": 47, "randint": 47, "num_train_timestep": 47, "accord": 47, "magnitud": 47, "noisy_lat": 47, "add_nois": 47, "get": 47, "input_id": 47, "model_pr": 47, "prediction_typ": 47, "v_predict": 47, "get_veloc": 47, "part": [30, 47], "model_pred_prior": 47, "target_prior": 47, "float": 47, "prior_loss": 47, "sync_gradi": 47, "params_to_clip": 47, "itertool": 47, "clip_grad_norm_": 47, "max_grad_norm": 47, "zero_grad": [47, 48], "set_to_non": 47, "set_grads_to_non": 47, "\ub2f4\ub294": 47, "rare": [47, 50], "unicod": 47, "\uc2dc\ud0a8": [30, 47], "\ucd94\uac00\ud568\uc73c\ub85c\uc368": 47, "\uc720\uc9c0\ud558\uac8c": 47, "\uc774\ub85c\uc368": [47, 56], "\uac00\uc9c0\uc758": [30, 47], "\uccab\ubc88\uc9f8\ub85c\ub294": [47, 54], "dino": [30, 47, 53], "\uc0dd\uc131\ub418\uae30": 47, "\uc120\ud638\ub41c\ub2e4\uace0": 47, "\uacc4\uc0b0\ub429\ub2c8\ub2e4": 47, "pairwis": 47, "\uc801\uc6a9\ub428\uc73c\ub85c\uc368": 47, "\uc18c\uac1c\ub4dc\ub838\ub358": 47, "div": 47, "\ud574\uacb0\ub418\ub294": 47, "\uc785\ub825\ud588\uc744\ub54c\uac00": 47, "\uc124\uba85\ud569\ub2c8\ub2e4": 47, "backpack": 47, "famou": 47, "painter": 47, "statu": 47, "sculptor": 47, "\ud615\ud0dc\ub3c4": 47, "\uc0dd\uc131\ub3c4": [47, 49], "modif": 47, "speci": 47, "\uace0\uc720": 47, "\ud55c\uacc4\uc810\ub3c4": 47, "\ub098\ud0c0\ub098\uc9c0": 47, "\ubcf8\ubb38\uc5d0": 47, "\uc18c\uac1c\ub418\uace0": 47, "\uc788\uc9c0\ub294": 47, "\ubd80\ubb38\uc5d0\uc11c\ub3c4": 47, "\ud559\uc2b5\uacb0\uacfc\ub97c": 47, "\ubcf4\uc5ec\uc8fc\ub294\ub370": 47, "\uc7a5\ub9cc\uc73c\ub85c\ub3c4": 47, "\uc0ac\ub840\ub4e4\uc744": 47, "nip": 48, "2014": [48, 56], "1406": 48, "2661": 48, "eriklindernoren": 48, "smart": [48, 56], "lab": [48, 55, 56], "kaist": [48, 56], "\ub525\ub7ec\ub2dd": [48, 56], "chp": 48, "editor": [48, 56], "\ub098\ub269\ub2c8\ub2e4": 48, "\uacc4\uc0b0\ud55c\ub2e4\ub294": 48, "tractabl": 48, "\uadfc\uc0ac\ud654\uc2dc\ucf1c": 48, "pixelcnn": 48, "pixelrnn": 48, "boltzmann": 48, "\ud655\ub960\ubd84\ud3ec\ub97c": 48, "\uc815\uaddc\ud654\ud558\ub294": 48, "\uacc4\uc0b0\ud558\uc9c0": 48, "\uc644\uc804\uadf8\ub798\ud504": 48, "\uc5b4\ub824\uc6cc": [30, 48], "\ub9ce\uc544\uc11c": 48, "\uc644\uc804\uadf8\ub798\ud504\uc774\uae30": 48, "\ub178\ub4dc\uac00": 48, "\ub298\uc5b4\ub0a0\uc218\ub85d": 48, "\uac04\uc120": 48, "\uae09\uc99d\ud558\ub294": 48, "restrict": 48, "rbm": 48, "\uc81c\uc548\ub418\uae30\ub3c4": 48, "\uc815\uc758\ud558\uc9c0": 48, "\ub300\ud45c\uc801\uc73c\ub85c\ub294": 48, "ian": 48, "goodfellow": 48, "2014\ub144\uc5d0": 48, "\uc18c\uac1c\ub418\uae30": 48, "\uc804\uae4c\uc9c0": 48, "\ub144": 48, "\uc790\ub9ac\uc7a1\uc558\uc5c8\uc2b5\ub2c8\ub2e4": 48, "taxonomi": 48, "\uc7a0\uc7ac\ubcc0\uc218": [48, 56], "\uadf8\ub85c\ubd80\ud130": 48, "\uad6c\ubd84\ud558\ub294": 48, "\uad6c\uc131\uc774": [30, 48], "\ub9d0\ud574\uc11c": 48, "\ub4e4\uc5b4\uc624\uba74": 48, "\uac00\uc9dc\ub85c": 48, "binari": [30, 48], "\ucf54\ub4dc\ub3c4": 48, "in_feat": 48, "out_feat": 48, "batchnorm1d": 48, "leakyrelu": 48, "inplac": 48, "opt": 48, "latent_dim": 48, "np": 48, "prod": 48, "img_shap": 48, "tanh": 48, "img_flat": 48, "d\ub97c": 48, "g\ub97c": 48, "min_g": 48, "max_d": 48, "logd": 48, "p_z": 48, "\uc54c\uace0\ub9ac\uc998\uacfc": 48, "\ube44\uad50\ud574\ubcf4\uaca0\uc2b5\ub2c8\ub2e4": 48, "n_epoch": 48, "fill_": 48, "real_img": 48, "optimizer_g": 48, "gen_img": 48, "measur": 48, "fool": 48, "g_loss": 48, "adversarial_loss": 48, "optimizer_d": 48, "real_loss": 48, "fake_loss": 48, "d_loss": 48, "print": 48, "item": 48, "batches_don": 48, "sample_interv": 48, "save_imag": 48, "nrow": 48, "\ucd5c\ub300\ud654\ud558\uace0": 48, "\uc9c4\ud589\ud558\uac8c": 48, "\uc0c1\ud669\uc774": 48, "\ucd5c\uc18c\ud654\ud558\uc9c0": 48, "\ucd5c\ub300\ud654\ud558\ub294": 48, "\uae30\ubc95\ub3c4": 48, "\uc644\ubcbd\ud788": 48, "\ubcf5\uc6d0\ud558\uace0": 48, "\uc5b8\uc81c\ub098": 48, "\ub0b4\ubc49\uac8c": 48, "proposit": 48, "p_g": 48, "\uc99d\uba85\ud558\uc790\uba74": 48, "int_x": 48, "dx": 48, "int_z": 48, "dz": [48, 56], "\uc77c\ub54c": 48, "\uc131\ub9bd\ud558\uace0": 48, "\uac19\uace0": 48, "ast": 48, "jsd": 48, "\ucd5c\uc19f\uac12\uc740": 48, "\uc131\ub9bd\ud569\ub2c8\ub2e4": 48, "mnist": [48, 56], "toronto": 48, "tfd": 48, "\ud3c9\uac00\uc2dc\uc5d0\ub294": 48, "parzen": 48, "estimation\uc744": 48, "\ud45c\ub97c": 48, "vae\ub294": 48, "\ud750\ub9bf\ud558\ub2e4\ub294": 48, "\ucc28\uc6d0\ucd95\uc18c\ub85c": 48, "\ud65c\uc6a9\ub418\uace0": 48, "\ud65c\uc6a9\ub418\uc5c8\ub2e4\uace0": 48, "11487": 49, "learning\uc774": 49, "\ub3c5\ucc3d\uc801\uc778": 49, "\ub9d0\ubb49\uce58": 49, "corpu": 49, "llm\ub4e4\uc758": 49, "embedding\ub4e4\uc740": 49, "\ud6a8\uacfc\uc801\uc774\ub77c\uace0": 49, "\uc0ac\uc774\uc988\ub97c": [49, 52], "\uc911\uc694\ud558\ub2e4\ub294": 49, "\uc81c\uc2dc\ud558\uc5ec": 49, "weight\uc744": 49, "palett": [49, 50], "\uad6c\uc870\ubcf4\ub2e4": 49, "\uc81c\uc2dc\ud568": 49, "\ub2ec\uc131\ud568": 49, "evaluation\uc6a9": 49, "encoder\uc744": 49, "\ud574\ub193\uc74c": [30, 49], "generation\uc774": 49, "\uc77c\uc815\ud558\uc9c0": 49, "\ubabb\ubc1b\uc544\uc11c": 49, "class\ub098": 49, "object\uc774": 49, "\uc77c\uc815\ud558\uace0": 49, "\ubb34\uc5c7\uc744": 49, "\uc0dd\uc131\ud558\ub294\uac83\uc778\uc9c0": 49, "\uc790\uc138\ud558\uac8c": 49, "guide\uc758": 49, "\ub192\uc774\uba74": 49, "\uac00\uc911\uce58\uc758": 49, "\uc774\ub3d9\uc2dc\ucf1c": 49, "\ube57\ub098\uac00": 49, "\ub35c\ud55c": 49, "\ub40c": 49, "\ubc31\ubd84\uc704\uc218": 49, "\uc808\ub300": 49, "\uc9c0\uc815\ud558\uace0": 49, "\uc9c0\uc810\uc758": 49, "among": 49, "net\uc774\ub77c\ub294": 49, "modification\uc744": 49, "effu": 49, "net\uc740": 49, "\uc758\ub8cc\ucabd\uc73c\ub85c": 49, "\uc788\ub294\uac78\ub85c": 49, "\uc544\ub294\ub370": 49, "remov": 49, "keep": 49, "block\uc5d0\uc11c": 49, "blocks\ub97c": 49, "\ubca4\uce58\ub9c8\ud06c": 49, "categori": 49, "\uc774\ub8e8\uc5b4\uc84c\ub2e4": 49, "\uae43\ud5c8\ube0c\uc5d0\uc11c": 49, "\ub2e4\uc6b4": 49, "\uac17\ub2e4": 49, "25\uba85\uc758": 49, "\ud3c9\uac00\uc790": 49, "a\uc5d0\uc11c": 49, "\ud3c9\uac00\uc790\ub294": 49, "\uc9c8\ubb38\uc744": 49, "\uae30\uc900\uc810\uc73c\ub85c": 49, "q1": 49, "q2": 49, "repres": 49, "\uae30\uc900\uc810": 49, "\ub2f5\ubcc0": 49, "\uc120\ud0dd\ud574\uc57c\ud568": 49, "am": 49, "indiffer": 49, "screenshot": 49, "drawbench\uc5d0\uc11c": 49, "\uccb4\ub9ac\ud53c\ud0b9": 49, "\uce74\ud14c\uace0\ub9ac\uc5d0\uc11c\ub3c4": 49, "\uc8fc\uc7a5\uc778": 49, "peopl": 49, "\uc62c\ub77c\uac10": 49, "people\uc744": 49, "\uc0dd\uc131\ud558\uae30\uc5d0": 49, "rater": 49, "xxl\ub85c": 49, "\uc120\ud638\ud568": 49, "evaul": 49, "\uc911\uc694\ud568": 49, "boost\uc5d0": 49, "thresholding\uc744": 49, "\ub04c\uc5b4": 49, "\uc62c\ub9b4": 49, "usag": 49, "much": 49, "editbench": 50, "06909": 50, "\uc18c\uac1c\ud558\ub294": [50, 52, 54, 55], "\ud3c9\uac00\uae30\ubc95": 50, "\uc608\uc815\uc785\ub2c8\ub2e4": [50, 54, 55], "\uc9c0\uc815\ud558\uc5ec": 50, "\ucc38\uc870\ud558\uc9c0": 50, "\uc624\ub85c\uc9c0": 50, "\uc720\ub3c4\ud558\ub294": 50, "\ubaa9\ud45c\uc785\ub2c8\ub2e4": 50, "mobilenet": 50, "detector": 50, "\ud2b9\uc9d5\uc740": 50, "\uc810\uc785\ub2c8\ub2e4": 50, "sr3": 50, "\uac00\uc9c4\ub2e4\uace0": 50, "\uc785\ub825\ud569\ub2c8\ub2e4": [50, 55], "\ub0b4\uae30": [50, 51], "\ucd94\uac00\ub418\ub294": 50, "\ucd08\uae30\ud654\ud574\uc11c": 50, "\uc18c\uac1c\ub418\uc5c8\ub358": 50, "1\ubd80\ud130": 50, "\ubcc0\ud654\uc2dc\ud0a4\ub294": 50, "oscil": 50, "\uc0c1\uc2b9\ub418\ub294": 50, "240\uac1c\uc758": 50, "\uad6c\ucd95\ub418\uc5b4\uc788\uace0": 50, "\uc30d\ub9c8\ub2e4": 50, "3\uac00\uc9c0\uc758": 50, "\uce21\uc815\ud558\uac8c": 50, "\uc73c\ub85c\ub294": [50, 55], "clipscor": 50, "prec": 50, "\uc808\ubc18\uc740": 50, "\ub370\uc774\ud130\uc14b\uc73c\ub85c\ubd80\ud130": 50, "\uc218\uc9d1\ub418\uc5c8\uace0": 50, "\uc0dd\uc131\ud574\uc11c": 50, "\uad6c\ucd95\ud588\uc2b5\ub2c8\ub2e4": 50, "\uc694\uc18c\ub4e4\uc744": 50, "\uac16\ucd94\ub3c4\ub85d": 50, "common": 50, "outdoor": [50, 54], "metal": 50, "\ubb38\uad6c\ub97c": 50, "farm": 50, "\ud574\ub2f9\uc0ac\uc9c4\ucc98\ub7fc": 50, "\ud06c\uae30\ub3c4": 50, "\uce21\uc815\ud574\ubcf8": 50, "medium": 50, "\uc131\ub2a5\uc801\uc73c\ub85c": 50, "\uc18d\uc131\ubcf4\ub2e4": 50, "\uc18d\uc131\uc5d0": 50, "\uc0ac\uc9c4\uc785\ub2c8\ub2e4": [50, 55], "maskrich": 50, "2310": 51, "04378": 51, "\uc131\uacfc\ub97c": 51, "\uac70\ub450\uc5c8\uc9c0\ub9cc": 51, "\uac00\uc9c0\uae30": 51, "\uadf9\ubcf5\ud558\uae30": 51, "solver\uc758": 51, "\uc131\ub2a5\uac1c\uc120\uc744": 51, "lu": 51, "\ucd94\ub860\ud560\uc218": 51, "models\uc740": 51, "trajectory\uc5d0": 51, "\uac16\ub3c4\ub85d": 51, "\ubaa8\ub378\ub85c\uc11c": 51, "2\uac00\uc9c0\uc758": 51, "model\uc774\uae30": 51, "\uc801\ud569\ud558\uc9c0": [51, 53, 54], "text2img": 51, "\uc81c\uc548\uc810\uc740": 51, "3\uac00\uc9c0\ub2e4": 51, "lcm": [51, 53], "32\uc2dc\uac04": 51, "\ubc16\uc5d0": 51, "\uac78\ub9ac\uc9c0": 51, "models\ub294": [24, 51], "\uc8fc\uc785\ud558\uace0": 51, "sampling\ud558\ub294": 51, "\uae30\ubc95\uc774\ub2e4": 51, "forwad": 51, "\ud655\ub960\ubd84\ud3ec\uc778": 51, "0t": 51, "scheduler\ub97c": 51, "timestep\uc758": 51, "\ud655\ub960\ubbf8\ubd84\ubc29\uc815\uc2dd": 51, "q_t": 51, "ptobabl": 51, "\uc0c1\ubbf8\ubd84\ubc29\uc815\uc2dd": 51, "\ub9cc\uc871\ud558\ub294\ub370": 51, "dx_t": 51, "nabla_x": 51, "\uadfc\uc0ac\uce58\ub97c": 51, "sampling\ud558\ub294\ub370": 51, "ode\ub77c": 51, "\uacbd\ud5d8\uc801": 51, "sampling\uc758": 51, "cfg\uc758": 51, "prediction\uc740": 51, "\ub300\uccb4\ub41c\ub2e4": 51, "consistenct": 51, "cm\uc758": 51, "\uada4\uc801\uc5d0": 51, "point\uc640": 51, "mapping\ub418\ub294": 51, "\uc591\uc218\uac12\uc744": 51, "\uc790\uae30": 51, "\uc790\uc2e0\uc5d0": 51, "\ub9cc\uc871\ud574\uc57c\ud55c\ub2e4": 51, "\ud568\uc218\uc774\uba70": 51, "\uc2ec\uce35": 51, "cm\uc740": 51, "\ud559\uc2b5\ud558\uba70": 51, "ode\uc5d0": 51, "solver\ub85c": 51, "method\ub4f1\uc758": 51, "\uc218\uce58\uc801\uc778": 51, "\uc601\uc0c1\uc5d0": 51, "generation\ub9cc": 51, "\uc7a0\uc7ac\uc131\uc774": 51, "\ud0d0\uad6c\ub418\uc9c0": 51, "\ubc1c\ud718\ud558\uc5ec": 51, "\ub3c4\uc804\uc801\uc778": 51, "lcd": 51, "\uc124\uacc4\ub418\uc5c8\uae30": 51, "vector\ub85c": 51, "\uc784\ubca0\ub529\ud558\uace0": 51, "\ubcf5\uc6d0\ud55c\ub2e4": 51, "\uc0c1\uc5d0\uc11c": 51, "\uc774\ub904\uc9c0\uae30": 51, "laptop": 51, "\uc815\uc758\ub41c\ub2e4": 51, "dz_t": 51, "c\ub294": 51, "ode\uc0c1\uc5d0\uc11c": 51, "trick\uc778": 51, "\ubcc0\ud615\ud558\uc5ec": 51, "\ub300\uc785\ud55c": 51, "\uce58\ud658": 51, "cm\uacfc": 51, "teacher": [51, 54], "pd\uc5d0\uc11c": 51, "8\uc758": 51, "\uc6b0\ud56d\uc744": 51, "\uadfc\uc0ac\ud55c": 51, "solver\uc774\uae30": 51, "distillation\uc2dc\uc5d0\ub9cc": 51, "edm\uc744": 51, "\ud1a0\ub300\ub85c": 51, "cm\uc5d0\uc11c": 51, "\uac04\uaca9\uc73c\ub85c": 51, "\uc5b4\ub5a0\ud55c\uac04\uaca9\uc744": 51, "8\uc744": 51, "\uc801\ubd84": 51, "clasifi": 51, "\ud6c8\ub828\ud574\uc57c\ud558\uae30": 51, "\ud6a8\uc728\uc801\uc774\uc9c0": 51, "\ubabb\ud558\uba70": 51, "lcms\uc640": 51, "method\uc5d0": 51, "cfg\ub97c": 51, "\ud1b5\ud569\ud558\uc600\ub2e4": 51, "distill\uc758": 51, "sampling\uc5d0": 51, "\ud1b5\ud569\ud558\uc600\uc73c\ub098": 51, "\ud559\uc2b5\uc2dc\uac04\uc774": 51, "\uae38\uace0": 51, "2\ub2e8\uacc4\ub97c": 51, "\ub204\uc801\ub418\uae30": 51, "t\uc774\uc5d0": 51, "cfg\uc5d0": 51, "varnoth": 51, "\ubcc0\ud615\ub418\ubbc0\ub85c": 51, "ode\ub294": 51, "function\ub3c4": 51, "\ubcc0\uc218\ub85c": 51, "\ubc1b\uc544\uc624\uae30": 51, "omega_": 51, "sampling\ub41c\ub2e4": 51, "\uc774\uc804\uacfc": 51, "cfg\uac00": 51, "\uc608\uce21\ubaa8\ub378": 51, "11\ucc98\ub7fc": 51, "\ubcf4\ud1b5\uc758": 51, "\uc7a1\uace0": 51, "\uc774\uac19\uc774": 51, "\ucd18\ucd18\ud55c": 51, "\uac10\uc18c\uc2dc\ud0a4\uae30": 51, "loss\ub3c4": 51, "\uc791\uc544\uc9c0\uac8c": 51, "\uc218\ub834\uc18d\ub3c4\ub3c4": 51, "\ub290\ub824\uc9c0\uac8c": 51, "\uc218\ub834\uc758": 51, "\uc218\ucc9c\uc5d0\uc11c": 51, "\uc218\uc2ed\uc73c\ub85c": 51, "\ub2e8\ucd95\uc2dc\ud0a4\ub294": 51, "scheduler\ub85c": 51, "solver\ub3c4": 51, "\uc99d\uba85\ud588\ub2e4": 51, "\ube44\uad50\ud558\ub294\uac83\uc774": 51, "step\ub9cc\ud07c": 51, "\ub108\ubb34\uc791\uc73c\uba74": 51, "\uc218\ub834\uc18d\ub3c4\ub97c": [24, 51], "\uac16\uac8c\ub418\uba70": 51, "\uac12\uc77c": 51, "\uc624\ucc28\uac00": 51, "\ucee4\uc9c8\uc218": 51, "14\uc5d0": 51, "k\uac12\uc744": 51, "\uc218\uc2dd\ub3c4": 51, "\ubcc0\uacbd\ud560": 51, "foundat": 51, "cunstom": 51, "\ub54c\uac00": 51, "lcf": 51, "dataset\ub3c4": 51, "\uc885\uc18d\uc5c6\uc774": 51, "inference\ub97c": 51, "lcm\uc740": 51, "\ubc14\ub85c\ubc14\ub85c": 51, "\uc788\ub294\uac83\uc740": 51, "\uc544\ub2c8\uace0": 51, "consisteni": 51, "dataset\uc73c\ub85c": [24, 51], "\uc0ac\uc6a9\ud558\uae30\ub9cc\ud558\uba74": 51, "diffuson": 51, "\ubc14\ub85c\ud559\uc2b5\uc774": 51, "650k": 51, "\uc55e\uc11c\ub9d0\ud55c\uac83\ucc98\ub7fc": 51, "768x768\uc758": 51, "solver\ub85c\ub294": 51, "20\uc758": 51, "lcm\uacfc": 51, "\uc131\ub2a5\ube44\uad50\ub97c": 51, "\ud588\ub294\ub370": [30, 51], "distill\uc740": 51, "\uc624\ud508\uc18c\uc2a4": 51, "\ucf54\ub4dc\uac00": 51, "\uc218\ub834\ud558\uace0": 51, "\uc0dd\uc131\ud558\uc600\ub2e4": 51, "distillation\uc774\uc9c0\ub9cc": 51, "\ubcf4\uc5ec\uc92c\ub2e4": 51, "\ud478\ub294": 51, "solver\ub4e4": 51, "lcm\uc5d0": 51, "\ube44\uad50\uc640": 51, "schedule\uc758": 51, "iteration\uc5d0\uc11c\uc758": 51, "\uace0\uc815\ud574\uc11c": 51, "\uc62c\ub838\uc744": 51, "\ud6e8\uc52c\ub354": 51, "\uc218\ub834\ud558\uba70": [51, 56], "dpm\uacfc": 51, "50\uc77c": 51, "ddim\ubcf4\ub2e4": 51, "error\ub97c": 51, "ddim\uc5d0": 51, "\uc801\uae30": 51, "\uc88b\uc544\uc9c0\uc9c0\ub9cc": 51, "quality\uc640": 51, "diversity\uc5d0": 51, "off\uac00": 51, "inference\ub294": 51, "\uac00\uc9c0\uc9c0\ub294": 51, "\uc54a\ub294\uac83\uc73c\ub85c": 51, "\ud655\uc778\ub41c\ub2e4": 51, "\uac1c\uc120\uc758": 51, "\uc788\ub294\uac83": 51, "\ube44\uad50\ud574": [24, 51], "\ubd24\uc744": 51, "\ud655\uc5f0\ud558\uac8c": 51, "\ub4e4\uc5b4\ub09c\ub2e4": 51, "\uc2dc\uc5d0\ub3c4": 51, "\uc99d\uba85\ud55c\ub2e4": 51, "\ud3ec\ucf13\ubaac": 51, "\uc2ec\uc2a8": 51, "lcf\ub97c": 51, "\ub54c\ub97c": 51, "\uc644\ubcbd\ud558\uc9c4": 51, "catch\ud55c": 51, "\uc0c1\uc5d0": 51, "dataset\uc5d0\ub3c4": 51, "step\uc73c\ub85c\ub3c4": 51, "2211": 52, "10440": 52, "\uba85\uc2dc\ud569\ub2c8\ub2e4": [52, 53], "extrem": 52, "hash": 52, "raster": 52, "softwar": 52, "\uc720\ub3d9\uc801\uc73c\ub85c": 52, "\uc804\ud658\ud558\uc5ec": 52, "faster": 52, "\uad6c\uc131\ub418\uc5b4\uc788\ub2e4\uace0": 52, "backpropag": [52, 56], "\uacc4\uc0b0\ud558\ub294\ub370": 52, "\uc18c\uc694\ub41c\ub2e4\uace0": 52, "scratch": [52, 53], "\ub2f4\uc544\ub0b4\ub294\ub370": 52, "\uc720\uc6a9\ud558\ub2e4\ub294": 52, "\ubcf4\uc600\uc9c0\ub9cc": 52, "\ub192\ub2e4\uace0": [52, 56], "instantngp": 52, "octre": 52, "\uc904\uc600\ub2e4\uace0": 52, "\uc608\uc2dc\ucc98\ub7fc": [52, 55], "\uba54\ubaa8\ub9ac\uc640": 52, "\uc5f0\uc0b0\uc801\uc778": 52, "\uc81c\ud55c\uc774": 52, "\ub0b4\uae30\uac00": 52, "v_t": 52, "\ud45c\ud604\ud569\ub2c8\ub2e4": 52, "vertic": 52, "tetrahedra": 52, "surfac": 52, "instant": 52, "ngp": 52, "\uc5c5\ub370\uc774\ud2b8\ud558\uba70": 52, "\uc5c5\ub370\uc774\ud2b8\ub9c8\ub2e4": 52, "environ": 52, "\uc791\uac8c": 52, "10\ubc30": 52, "\uc99d\uac00\uc2dc\ucf30\ub2e4\uace0": 52, "\uc9c4\ud589\ud558\uae30": 52, "\ucc28\uac10\ud568\uc73c\ub85c\uc368": 52, "\uc804\ud658\ud558\uace0": 52, "\uc9c4\ud589\ud560\ub54c": 52, "\ucd5c\uc801\ud654\ud558\uac8c": 52, "\ucd94\uc801\ud558\uc5ec": 52, "397": 52, "speed": 52, "5000": 52, "\uc124\uc815\uc73c\ub85c": 52, "\ubd84": 52, "3000": 52, "\uac1d\uccb4\uc5d0\uc11c\uc758": 52, "\uc798\ud558\ub294": 52, "\uac1d\uccb4\ub4e4\uc5d0": 52, "\uc124\ubb38\uc870\uc0ac\ud574\ubcf8": 52, "61": 52, "\uc720\uc800\ub4e4\uc774": 52, "\uc6b0\uc138\ud558\uac8c": 52, "\ud3c9\uac00\ud558\uc600\uc2b5\ub2c8\ub2e4": 52, "\ucd5c\uc801\ud654\ud560": 52, "\uc9c4\ud589\ud574\ubcf8": 52, "\uc5b4\ub835\uc9c0\ub9cc": 52, "\uc88c\uce21": 52, "\uc2e4\uc0ac\uc801\uc73c\ub85c": 52, "3\ub2e8\uacc4\ub85c": 52, "\uc218\uc815\ud55c": 52, "\ud65c\uc6a9\ud568\uc73c\ub85c\uc368": 52, "\ubcf4\uc5ec\uc904": 52, "40\ubd84": 52, "\ub9cc\uc5d0": 52, "\uadf8\ub798\ud53d": 52, "\uc18c\ud504\ud2b8\uc6e8\uc5b4\uc640": 52, "\ud638\ud658\uc774": 52, "gaparmar": 53, "turbo": 53, "\uc54a\ub2e4\uace0": [53, 54], "\uc124\uba85\ud558\uace0": 53, "\uc190\uc2e4\ub41c\ub2e4\uace0": 53, "\uc774\ubbf8\uc9c0\uc77c": 53, "\uce58\uba85\uc801\uc774\ub77c\uace0": 53, "\uc785\ub825\ud568\uc73c\ub85c\uc368": [53, 55], "\ucda9\ub3cc\uc744": 53, "\ubc29\uc9c0\ud569\ub2c8\ub2e4": 53, "\ub450\ubc88\uc9f8\ub85c": 53, "cyclegan_turbo": 53, "pretrained_nam": 53, "pretrained_path": 53, "ckpt_folder": 53, "lora_rank_unet": 53, "lora_rank_va": 53, "autotoken": 53, "cliptextmodel": 53, "sched": 53, "make_1step_sch": 53, "my_vae_encoder_fwd": 53, "__get__": 53, "__class__": 53, "my_vae_decoder_fwd": 53, "skip_conv_1": 53, "kernel_s": [53, 55], "skip_conv_2": 53, "skip_conv_3": 53, "skip_conv_4": 53, "ignore_skip": 53, "\uc544\ucf00\ud14d\uccd0\ub294": 53, "\ud615\ud0dc\ub85c\ub3c4": 53, "\uc131\ub2a5\ubcf4\ub2e4": 53, "\ud559\uc2b5\ub418\uc5c8\uc2b5\ub2c8\ub2e4": 53, "\uc18c\uac1c\ub418\uc5c8\uc2b5\ub2c8\ub2e4": 53, "\uc0ac\ub840\ub4e4\ub3c4": 53, "sdedit": 53, "\uc57d\uc810\uc744": 53, "\ub2e8\ucd95\ud558\uae30": 53, "\uc815\uc758\ud558\uc5ec": 53, "\ucd94\ucd9c\ud569\ub2c8\ub2e4": 53, "\ucda9\ub3cc\uc774": 53, "\uc0dd\uaca8": 53, "\ucd95\uc18c\ud558\uae30": 53, "\ubbfc\uac10\ud55c": 53, "\uc544\ud0a4\ud14d\uccd0\uc5d0\uc11c": 53, "\uc804\ud6c4\ub85c": 53, "\uc785\ub825\uc2dc\ud0b5\ub2c8\ub2e4": 53, "\ubcc0\ud615\ub41c": 53, "cycl": 53, "c_y": 53, "\ud568\uc218\uc785\ub2c8\ub2e4": 53, "\ubcc0\ud615\ud558\uac8c": 53, "\uc18c\uac1c\ub418\ub294": 53, "rec": 53, "\uc870\ud569\uc73c\ub85c": 53, "\uad6c\uc131\ub418\uc5b4\uc788\uc2b5\ub2c8\ub2e4": 53, "idt": 53, "e_x": 53, "\ud569\uc73c\ub85c": 53, "\uc77c\uc785\ub2c8\ub2e4": 53, "\uc785\ub825\ubc1b\ub294": 53, "enc": [53, 55], "\uac00\uc911\uce58\uc640": 53, "\ucd9c\ub825\uac12\uc744": 53, "theta_0": 53, "\uac00\uc911\uce58\uc774\uace0": 53, "\ubcc0\ud654\uc2dc\ud0a4\uba74\uc11c": 53, "330mb": 53, "\uc18c\uc694\ub429\ub2c8\ub2e4": 53, "initialize_unet": 53, "return_lora_module_nam": 53, "requires_grad_": 53, "l_target_modules_encod": 53, "l_target_modules_decod": 53, "l_modules_oth": 53, "l_grep": 53, "to_k": 53, "to_q": 53, "to_v": 53, "conv1": 53, "conv2": 53, "conv_in": [53, 55], "conv_shortcut": 53, "conv_out": 53, "proj_out": 53, "proj_in": 53, "proj": 53, "named_paramet": 53, "pattern": [30, 53], "down_block": 53, "up_block": 53, "lora_conf_encod": 53, "loraconfig": 53, "init_lora_weight": 53, "target_modul": 53, "lora_conf_decod": 53, "lora_conf_oth": 53, "add_adapt": 53, "adapter_nam": 53, "default_encod": 53, "default_decod": 53, "default_oth": 53, "set_adapt": 53, "initialize_va": 53, "constant_": 53, "l_vae_target_modul": 53, "vae_lora_config": 53, "vae_skip": 53, "hors": 53, "leftrightarrow": 53, "zebra": 53, "yosemit": 53, "summer": 53, "winter": 53, "\uc8fc\ud589": 53, "bdd100k": 53, "clear": 53, "foggi": 53, "\uc2e4\ud5d8\ud558\uc600\uc2b5\ub2c8\ub2e4": 53, "protocol": 53, "\ub9cc\uc871\uc2dc\ucf1c\uc57c": 53, "\ubd84\ud3ec\uc640\uc758": 53, "\uc77c\uce58": 53, "struct": 53, "\ub0b4\uc9c0\ub294": 53, "\uc0ac\uc9c4\ub4e4\uacfc": 53, "cut": 53, "cyclediffus": 53, "ddib": 53, "\ud6fc\uc190\uc2dc\ud0a4\ub294": 53, "\ube48\ubc88\ud558\uac8c": 53, "\ubc1c\uc0dd\ud558\uace0": 53, "\uac00\uc911\uce58\ub85c": 53, "\uc77c\uc5b4\ub098\ub294": 53, "individu": [30, 53], "\ube44\uad50\ud558\uc600\uc744": 53, "\ubbf8\uc138\ud558\uac8c": 53, "\ub192\uc74c\uc744": 53, "\ubcf4\uc5ec\uc90c\uc744": 53, "\uc735\ud569\ud574\uc11c": 53, "\uc124\uc815\ud560": 53, "\uba54\ubaa8\ub9ac\uc5d0": 53, "\ud07d\ub2c8\ub2e4": 53, "00512": 54, "\ubcf4\uc5ec\uc8fc\uba70": 54, "\uac01\uad11\uc744": 54, "\uc18c\uac1c\ud558\uac8c": 54, "\uc124\uba85\ud558\uc790\uba74": 54, "\uc218\ucc9c\ubc88\uc758": 54, "\uc774\ubbf8\uc9c0\ub4e4\uacfc": 54, "\ud568\uc218\ub4e4\uc740": 54, "lambda_t": 54, "monoton": 54, "decreas": 54, "\ud558\ub3c4\ub85d": [30, 54], "\uc124\uc815\ub429\ub2c8\ub2e4": 54, "\uc774\ub4e4\uc744": 54, "\uc18c\uac1c\ud588\ub358": 54, "\ucd94\uac00\ud560\uc9c0": 54, "2021c": 54, "probabiil": 54, "\ud45c\ud604\ud574\uc11c": 54, "d\u03c3_t": 54, "nabla_z": 54, "rung": 54, "kutta": 54, "\uc801\uc6a9\ud588\uc744\ub54c": 54, "probabilt": 54, "\ud574\uc11d\ud558\uba74": 54, "\uc55e\uc73c\ub85c": 54, "\ubcf4\uac8c": 54, "\uae30\ubc95\uc740": 54, "\uc808\ucc28\ub85c": 54, "\ubcf5\uc0ac": 54, "\uacf5\uc2dd\uc740": 54, "\uc18c\uac1c\ub418\ub294\ub370": 54, "\uacf5\uc2dd\uc785\ub2c8\ub2e4": 54, "\ud575\uc2ec\uc785\ub2c8\ub2e4": 54, "\uc9c4\ud589\ub418\ub294": 54, "\uac12\ub4e4\uc5d0": 54, "\ubaa8\ub378\uc774\ub77c\uace0": 54, "sharp": 54, "\uc904\uc5b4\ub4dc\ub294": 54, "\ud655\uc778\ud574\ubcf4\uaca0\uc2b5\ub2c8\ub2e4": [54, 56], "progresss": 54, "\uc124\uc815\uac12\uc5d0": 54, "\uc54c\uc544\ubcf4\uaca0\uc2b5\ub2c8\ub2e4": 54, "\uc77c\ubc18\uc131\uc744": 54, "\uc783\uc9c0": 54, "\ub300\ub2e4\uc218\uc758": 54, "\uc190\uc2e4\ud568\uc218\uc5d0": 54, "\ub300\uc785\ud574\ubcf4\uaca0\uc2b5\ub2c8\ub2e4": 54, "\ub3d9\uc77c\ud558\uba70": 54, "\uae30\ubc95\uc5d0\uc11c\ub294": 54, "\ub0b4\uc5d0\uc11c\uc758": 54, "\ud559\uc2b5\ub418\uc9c0\ub9cc": 54, "\uc9c4\ud589\ub420\uc218\ub85d": 54, "\uac10\uc18c\ud55c\ub2e4\ub294": 54, "\ud655\uc778\ud558\uac8c": 54, "\uac00\uae4c\uc6cc\uc9c0\uac8c": 54, "\ucee4\uc9c0\uac8c": 54, "\uc5ec\ub7ec\ubc88\uc758": 54, "\uac70\uce60": 54, "\uc0c1\uad00\uc5c6\uc9c0\ub9cc": 54, "\uc904\uc5b4\ub4e4\uc218\ub85d": 54, "\uce58\uba85\uc801\uc774\uac8c": 54, "\uc785\ub825\uc73c\ub85c\ub294": 54, "\uc0ac\ub77c\uc9c0\uac8c": 54, "\ubc29\uc548\uc73c\ub85c": 54, "\ubc29\uc9c0\ud558\ub3c4\ub85d": 54, "\uc124\uc815\ub418\uc5c8\ub2e4\uace0": 54, "\ud655\uc778\ud588\uc2b5\ub2c8\ub2e4": 54, "\ubd80\uac00\uc801\uc73c\ub85c": 54, "\uae30\ubc95\ub4e4\ub85c": 54, "\ube44\uad50\ud574\ubcf8": 54, "snr": 54, "\uc870\ud569\uc744": 54, "\ud604\uc0c1\ub3c4": 54, "\uc2e4\ud5d8\uacb0\uacfc\ub97c": 54, "\uc9c4\ud589\uc2dc": 54, "\ub370\uc774\ud130\uc14b\uc5d0\uc11c\ub294": 54, "\ucc38\uc870\ud558\uc2dc\uba74": 54, "8192": 54, "\uc2dc\uc791\ud558\uc600\uace0": 54, "anneal": 54, "\uc9c4\ud589\ud574\ub3c4": 54, "undistil": 54, "\uc900\ud558\ub294": 54, "\uc9c4\ud589\ud558\uba74\uc11c": 54, "\uc0dd\uac01\ud588\uc744\ub54c": 54, "\uc7a5\uc810\uc774\ub77c\uace0": 54, "\uc798\ub418\ub294": 54, "\ud69f\uc218\ub97c": 54, "\ube44\uad50\ud574\ubcf4\uace0": 54, "4\ubc30\uc529": 54, "\uc904\uc5ec\uac00\uba74\uc11c": 54, "\uc904\uc784\uc5d0\ub3c4": 54, "\uc904\uc9c0": 54, "\ud559\uc2b5\ubc29\uc2dd\uc73c\ub85c\ub294": 54, "dig": 55, "more": 55, "08453": 55, "tencent": [24, 55], "arc": 55, "\ub09c\ud574\ud55c": 55, "car": 55, "fly": 55, "wing": 55, "iron": 55, "bunni": 55, "ear": 55, "\uc785\ub825\ubc1b\uc744": 55, "\ub9cc\uc73c\ub85c\ub294": 55, "\ud544\uc694\ud558\ub2e4\uace0": 55, "\uc11c\uc220\ud569\ub2c8\ub2e4": 55, "extern": 55, "77m": 55, "\uc5f0\uc0b0\uc791\uc5c5\uc774": 55, "\uc2e4\ud589\ub429\ub2c8\ub2e4": 55, "\uac00\uc838\uc624\uae30": 55, "compos": 55, "generaliz": [24, 55], "\ubc14\uafb8\uace0": 55, "unshuffl": 55, "\ubcc0\ud658\uc774": [30, 55], "1\uac1c\uc758": 55, "4\ubc88": 55, "\uac70\uce58\uace0": 55, "f_c": 55, "\uc0dd\uc131\ub418\uace0": 55, "\ub354\ud574\uc9c0\uac8c": 55, "\ub3d9\uc77c\ud558\ub3c4\ub85d": 55, "\uc124\uc815\ud588\uae30": 55, "\ub367\uc148": 55, "\uc5f0\uc0b0\ud558\ub294\ub370": 55, "fulladapt": 55, "in_channel": 55, "640": 55, "num_res_block": 55, "downscale_factor": 55, "pixelunshuffl": [24, 55], "adapterblock": 55, "total_downscale_factor": 55, "out_channel": 55, "downsample2d": 55, "in_conv": 55, "adapterresnetblock": 55, "act": 55, "adapter_st": 55, "adapter_input": 55, "adapter_conditioning_scal": 55, "num_images_per_prompt": 55, "repeat": 55, "do_classifier_free_guid": 55, "num_warmup_step": 55, "order": 55, "latent_model_input": 55, "scale_model_input": 55, "prompt_emb": 55, "cross_attention_kwarg": 55, "down_block_additional_residu": 55, "noise_pred_uncond": 55, "noise_pred_text": 55, "previou": 55, "extra_step_kwarg": 55, "prev_sampl": 55, "\uc885\ub958\ub85c\ub294": 55, "\ubd84\ub958\ud560": 55, "keypos": 55, "bicub": 55, "\uc81c\uc678\uc2dc\ud0a4\uace0": 55, "\ubd80\ubd84\ucc98\ub7fc": 55, "t2": 55, "\uc2dc\uc640": 55, "\ub123\uc73c\uba74\uc11c": 55, "expens": 55, "late": 55, "\uc2e4\ud5d8\ud574\ubcf8": 55, "\ud06c\ub2e4\uace0": 55, "\ud3ec\ud568\ub418\ub3c4\ub85d": 55, "\uc218\uc2dd\ucc98\ub7fc": 55, "uniformli": 55, "\uc9c4\ud589\ud588\uace0": 55, "cubic": 55, "\uc0c1\uc138\uc0ac\ud56d\uc740": 55, "4x": 55, "tesla": 55, "32g": 55, "\uc2e4\ud5d8\ubcc4": 55, "coco17": 55, "164k": 55, "pidinet": 55, "stuff": 55, "keypoint": 55, "\ub370\uc774\ud130\uc14b\ub85c\ubd80\ud130": 55, "600k": 55, "mm": 55, "mida": 55, "\ubaa8\ub378\ub4e4\uacfc": 55, "\uc815\ub7c9\uc801\uc778": 55, "\uc218\uce58\ub85c": 55, "\ube44\uad50\ud558\ub294\ub370": 55, "\uc88b\uc2b5\ub2c8\ub2e4": 55, "comparisoin": 55, "\uc608\uc2dc\ub4e4\uc740": 55, "\uc9c0\uc5ed\uc744": 55, "\ubabb\ud558\ub2e4\uace0": 55, "\uac83\ub85c": 55, "\uc704\uc5d0\uc11c\ubd80\ud130": 55, "\uc7a5\uc810\ub4e4": 55, "\uba85\uc2dc\ub418\uc5c8\ub358": 55, "\uc0ac\ub840\uc785\ub2c8\ub2e4": 55, "\uc644\ub8cc\ud55c": 55, "\uc801\uc6a9\ud558\uba74\uc11c": 55, "4\ubcf4\ub2e4": 55, "\uacbd\ub7c9\ud654\ub41c": 55, "\uc22b\uc790\ub97c": 55, "\ubc14\uafd4\uac00\uba70": 55, "tini": 55, "x4": 55, "x8": 55, "1312": 56, "6114": 56, "gunhochoi": 56, "fastcampu": 56, "ch": 56, "\ubb38\uad6c\uac00": 56, "\uc801\ud600\uc788\ub294\ub370\uc694": 56, "bayesian": 56, "vb": 56, "involv": 56, "aevb": 56, "\ub274\ub7f4": 56, "\uadfc\uc0ac\ud568\uc73c\ub85c\uc368": 56, "\uc774\uac00": 56, "\ubc14\uac00": 56, "\ub9cc\ub4e4\uc5b4\ub0b4\uace0": 56, "\ubcf5\uc6d0\ud558\uac8c": 56, "\ub461\ub2c8\ub2e4": 56, "assumpt": 56, "\ub0b4\ub9bd\ub2c8\ub2e4": 56, "parametr": 56, "\ud558\ub2e4\ub294": 56, "\uc131\uc9c8\uc5d0": 56, "bernoulli": 56, "\ucd5c\ub300\ud654\uc2dc\ud0a4\ub294": 56, "\ub4f1\uc7a5\ud558\uac8c": 56, "\ub3c4\uc2dd\ud654\ud55c": 56, "fc1_1": 56, "784": 56, "hidden_s": 56, "fc1_2": 56, "log_var": 56, "reparametr": 56, "std": 56, "mul": 56, "exp_": 56, "ep": 56, "floattensor": 56, "add_": 56, "reparam": 56, "fc1": 56, "\ucc3e\uc73c\uba74": 56, "\ubd84\ud560\ud560": 56, "\uc7a0\uc7ac\ubcc0\uc218\uc758": 56, "\uc800\ud76c\uac00": 56, "\ubd80\uc5ec\ud55c": 56, "\uac00\uae5d\ub3c4\ub85d": 56, "mont": 56, "carlo": 56, "\uadfc\uc0ac\uac12\uc744": 56, "\uc5f0\uc0b0\ub7c9\uc774": 56, "\ub9ce\uc73c\ubbc0\ub85c": 56, "\ubcc0\ud658\ud558\uc5ec": 56, "\ud3c9\uade0\uc801\uc73c\ub85c": 56, "\uc6d0\ud65c\ud788": 56, "\uc0d8\ud50c\ub9c1\ud558\uc9c0": 56, "\ub354\ud558\uace0": 56, "\uacf1\ud558\uac8c": 56, "\ub530\ub978\ub2e4\uace0": 56, "\uc124\uc815\ud588\uc744": 56, "\ub54c\uc774\uace0": 56, "\uac00\uc815\ud558\uc5ec": 56, "\uc2dc\ub3c4\ud560": 56, "\uba85\uc2dc\ub418\uc5b4": 56, "\ud558\ub2e8\uc5d0\ub294": 56, "\uc67c\ucabd\uc5d0\ub294": 56, "trick\uc774": 56, "\uace0\uc815\ub418\uc5b4": 56, "\uc788\uc5b4\ub3c4": 56, "\uc0d8\ud50c\ub9c1\ud558\ubbc0\ub85c": 56, "\ubbf8\ubd84\ud560": 56, "\uc801\uc6a9\ud558\uae30\uac00": 56, "\uc624\ub978\ucabd\ucc98\ub7fc": 56, "\ubcc0\uc218\ub85c\ub3c4": 56, "\uac00\ub2a5\ud574\uc9c0\uae30": 56, "frei": 56, "wake": 56, "\uc54c\uace0\ub9ac\uc998\ub97c": 56, "\uc801\uc6a9\ud574\uc11c": 56, "\uc2e4\ud5d8\uacb0\uacfc\ub294": 56, "\ucd5c\uc801\ud654\ud558\ub294\ub370": 56, "\uc54c\uace0\ub9ac\uc998\uc774": 56, "\uc131\ub2a5\uc801\uc73c\ub85c\ub3c4": 56, "em": 56, "\ud560\uc6a9\ud558\uc5ec": 56, "\ub9ce\uc73c\uba74": 56, "\uc9c0\uc815\ud574\uc92c\ub2e4\uba74": 56, "\ud30c\ub77c\ubbf8\ud130\ub4e4\uacfc": 56, "\uc0ac\uc6a9\ud574\ubcf4\uba74": 56, "repositori": 57, "team": 57, "aim": 57, "them": 57, "theoret": 57, "conduct": 57, "pseudolab": 57, "\ub9e4\uc8fc": 57, "\uc218\uc694\uc77c": 57, "\uc624\ud6c4": 57, "9\uc2dc": 57, "\uac00\uc9dc\uc5f0\uad6c\uc18c": 57, "discord": 57, "room": 57, "dh": 57, "\uc785\uc7a5": 57, "diffinject": 57, "revisit": 57, "debia": 57, "dongjun": 57, "namjun": 57, "jaekwang": 57, "workshop": 57, "preliminari": 57, "\uc870\uc0c1\uc6b0": 57, "linkedin": 57, "\ubb38\uad11\uc218": 57, "\uae40\uc9c0\uc218": 57, "\ubc15\ubc94\uc218": 57, "\uc9c0\uc2b9\ud658": 57, "\uace0\ub3d9\uadfc": 57, "\uc870\ub0a8\uacbd": 57, "\uae40\uc120\ud6c8": 57, "\uc774\uc900\ud615": 57, "\uc870\ud615\uc11c": 57, "\uc720\uc815\ud654": 57, "\ubc15\uc138\ud658": 57, "\uc1a1\uac74\ud559": 57, "\ud55c\ub3d9\ud604": 57, "\uc774\ucc3d\ud658": 57, "\uc720\uacbd\ubbfc": 57, "linkdedin": 57, "\uc774\uc815\uc778": 57, "06721": 24, "ailab": 24, "\ubb38\uc81c\uc0c1\ud669": 24, "\ud6cc\ub96d\ud558\uc9c0\ub9cc": 24, "\uc2dc\ub3c4\ud558\uac70\ub098": 24, "tuning\ud558\uac8c": 24, "\ubc94\uc6a9\uc131": 24, "\ud638\ud658\uc131\ub3c4": 24, "\ud574\uacb0\ubc29\uc548": 24, "features\uc640": 24, "features\ub85c": 24, "decoupling\ud55c\ub2e4": 24, "\uc0c1\ud0dc\uc774\ubbc0\ub85c": 24, "\ub123\uac8c": 24, "\uc218\ud589\ud558\uac8c": 24, "\uacb0\ud569\ud558\ub294": 24, "\uc801\uc808\ud558\uc9c0": [24, 30], "\uac15\uc810": 24, "\uad6c\uc870\uc5d0\ub3c4": 24, "\ud65c\uc6a9\uac00\ub2a5\ud558\ub2e4": 24, "22m": 24, "\uac00\ubccd\ub2e4": 24, "tools\uc5d0": 24, "asid": [], "img_47a446ba601b": [], "jpeg": [], "2011aa380d107c800eb2f1cf2bff1b23cd": [], "\ud544\uc694\uc131\uacfc": 24, "\uc0dd\uc131\ud558\ub824\ub294": 24, "\uc2dc\ub3c4\uc758": 24, "\uc885\ub958\uc640": 24, "\uc7a5\ub2e8\uc810\uc744": 24, "scene\uc774\ub098": 24, "\uc785\ub825\ud560\ub54c": 24, "\uc785\ub825\ud558\ub294": 24, "\uac04\ud3b8\ud558\uace0": 24, "\ud6a8\uacfc\uc801\uc774\ub2e4": 24, "thousand": 24, "img_4891": [], "\ub0b4\uce04\ub7f4": 24, "\ud48d\uc73c\ub85c": 24, "\uce74\ud398\ub97c": 24, "\uafb8\ubbf8\uace0": 24, "\uc2dd\ubb3c\uc744": 24, "\uc7a5\uc2dd\ud558\uace0": 24, "\uc2f6\uc5b4": 24, "\ub0b4\uac00": 24, "\uc88b\uc544\ud558\ub294": 24, "\uc2dd\ubb3c\uc740": 24, "\uc2a4\ub178\uc6b0": 24, "\uc0ac\ud30c\uc774\uc5b4": 24, "\ud638\uc57c": 24, "\uc790\ubbf8\uc624\ucfe8\uce74\uc2a4\ub4f1": 24, "\uc758\uc790\uc640": 24, "\ud14c\uc774\ube14\uc740": 24, "\uc6d0\ubaa9\uc744": 24, "\uc120\ud638\ud574": 24, "e2\ub294": 24, "\uc9c0\uc6d0\ud55c": 24, "\ubaa8\ub378\uc73c\ub85c": 24, "\ubc29\uc2dd\uc774\uc5c8\ub2e4": 24, "\ubf51\uc544\ub0b4": 24, "201": [], "variations\uc640": 24, "unclip\uacfc": 24, "tuning\ud558\ub824\ub294": 24, "\uc2dc\ub3c4\ub97c": 24, "\uc0ac\uc6a9\uacfc": 24, "\uc7ac\uc0ac\uc6a9\uc131": 24, "\uc800\ud558\ub77c\ub294": 24, "tools\uacfc": 24, "\ud638\ud658\ub418\uc9c0": 24, "\uce58\uba85\uc801\uc774\ub2e4": 24, "\uad50\uccb4\ud558\ub294": 24, "\uc788\uc5c8\uc9c0\ub9cc": 24, "\uc9c0\uc6d0\ud560": 24, "\uac74\ub4dc\ub9ac\uc9c0": 24, "\uc9c0\uc6d0\ud558\ub294": 24, "segmen": 24, "adapter\ub098": 24, "uni": 24, "\uac19\uc774refer": 24, "\uc785\ub825\ud574": 24, "\uc804\ub2ec\ud558\ub824\ub294": 24, "\uc2dc\ub3c4\ub3c4": 24, "\ud750\ub984\uc758": 24, "network\uc5d0": 24, "feature\ub4e4\uc744": 24, "mapping\ud558\uc5ec": 24, "\uc735\ud569\ud558\uace0\uc790": 24, "feature\ub300\uc2e0": 24, "\ucda9\uc2e4\ub3c4\uac00": 24, "\ub098\ube74\ub2e4": 24, "f5ee9e88": [], "e2b9": [], "4ef8": [], "9e8d": [], "3fbe9b4b6a0a": [], "\ubb38\uc81c\uc810\uc758": 24, "\uc6d0\uc778\uc744": 24, "model\ub0b4\uc758": 24, "attention\uc774\ub77c\uace0": 24, "attention\uc5d0\uc11c": 24, "weights\uc740": 24, "\uc787\ub294": 24, "\uc0c1\ud0dc\uc774\ub2e4": 24, "\ud569\uccd0\uc9c0\ub294\ub370": 24, "\ubb34\uc2dc\ub418\uc5b4": 24, "\uadf9\ubcf5\ud55c": 24, "decoupl": 24, "\ubd84\ub9ac\ud55c\ub2e4": 24, "model\ub0b4": 24, "\ud6c8\ub828\ub2e8\uacc4\uc5d0\uc11c\ub294": 24, "\ud6c8\ub828\ud55c\ub2e4": 24, "\uac00\ubccd\uace0": 24, "prompt\uc640\ub3c4": 24, "\uc5b4\uc6b8\ub9b0\ub2e4": 24, "adapter\uc5d0\uc11c": 24, "reusabl": 24, "controlnet\uac00\ub2a5": 24, "\ubd80\ub958\ub85c": 24, "cogview": 24, "scene\uacfc": 24, "\uc774\uc5c8\ub2e4": 24, "image\ub4e4\uc744": 24, "token\ud654": 24, "\ub4f1\uc7a5\ud558\uc5ec": 24, "art\ub97c": 24, "glide\ub294": 24, "e2\uc758": 24, "\uc774\ud574\ub3c4\ub97c": 24, "imagen\uc740": 24, "t5\ub97c": 24, "\ub3c4\uc785\ud588\ub2e4": 24, "\ub4dc\ubb3c\uac70\ub098": 24, "\ud559\uc2b5\ud55c\uc801\uc5c6\ub294": 24, "entity\uc5d0": 24, "\ucda9\uc131\ub3c4\ub97c": 24, "\uac1c\uc120\ud588\ub2e4": 24, "\ub3d9\uc791\ud558\uac8c": 24, "\uc77c\uce58\ub3c4": 24, "i\uc758": 24, "\ub514\uc790\uc778\uc744": 24, "embedding\ub4f1": 24, "versatil": 24, "framework\ub97c": 24, "i2t": 24, "\uc0dd\uc131\ubc29\uc2dd\uc744": 24, "\uba74\uc5d0\uc11c\ub294": 24, "composer\uac00": 24, "\uc2dc\ub3c4\ud588\uc5c8\ub2e4": 24, "raphael\uc740": 24, "mixtur": 24, "moe": 24, "\ud48d\uc758": 24, "\ub9e4\ub825\uc801\uc774\ub2e4": 24, "\uc9c0\uc6d0\ud558\uace0\uc790": 24, "variant": 24, "\ubcc0\uacbd\ud55c": 24, "\uad50\uccb4\ud560": 24, "\ub192\uc73c\uba70": 24, "tool": 24, "\uc54a\ub294\ub2e4\ub294": 24, "\ube44\ud6a8\uc728\uc801\uc774\ub2e4": 24, "\ub5a0\uc624\ub974\ub294": 24, "freeze\uc2dc\ucf1c": 24, "nlp\uc5d0\uc11c": 24, "\uc624\ub7ab\ub3d9\uc548": 24, "\uc0ac\uc6a9\ub418\ub358": 24, "\uc778\uae30\ub85c": 24, "adapter\ub4e4\ub3c4": 24, "specific\ud55c": 24, "\ub123\uae30\uc704\ud574": 24, "\uc2dc\uae30\uc5d0": 24, "adapter\ub3c4": [], "\ub4f1\uc7a5\ud588\ub294\ub370": 24, "\uc0c9\uc774\ub098": 24, "\uba74\uc5d0\uc11c": 24, "\uc8fc\uace0\uc790": 24, "tuning\uc5d0": 24, "injection\uc744": 24, "control\uc678\uc5d0": 24, "content\ub098": 24, "\uc870\uc808\ud558\uace0\uc790": 24, "\uc5f0\uad6c\ub3c4": 24, "shuffle\uc758": 24, "recompose\ud558\ub3c4\ub85d": 24, "only\uc758": 24, "\ubcc0\ud615\ud588\ub2e4": 24, "\ubc84\uc804\uc758": 24, "\ub354\ud574\uc90c\uc73c\ub85c\uc11c": 24, "adapter\ub85c\uc11c\uc758": 24, "\uc5ed\ud560\ub3c4": 24, "projection\ud558": 24, "seecoder\ub294": [], "variants\ub97c": 24, "\uc791\ub3d9": 24, "202": [], "203": [], "204": [], "205": [], "206": [], "207": [], "seecod": 24, "208": [], "\uc77c\uc885\uc778": 24, "\uc0dd\uc131\ub2e8\uacc4": 24, "chain\uc744": 24, "\ud1b5\ud574\ub370\uc774\ud130\uc5d0": 24, "noise\ub85c": 24, "bold": [], "\ucd94\uac00\uc870\uac74": 24, "\ub9d0\ud558\uba70": 24, "\ub9d0\ud558\uace0": 24, "diffusino": 24, "predefin": 24, "function\uc774\ub2e4": 24, "\ud559\uc2b5\ub418\uace0": 24, "\ub178\uc774\uc988\ub85c\ubd80\ud130": 24, "solver\uc640": 24, "\ubc38\ub7f0\uc2f1\ud560": 24, "\ud65c\uc6a9\ud558\ub294\ub370": [24, 30], "\ubc88\uac70\ub85c\uc6c0\uc744": 24, "\uc9c0\uc6b0\uae30": 24, "\uc0ac\uc6a9\ud558\uae30\ub3c4": 24, "\uc811\uadfc\uc5d0\uc11c": 24, "\ubc30\uc81c\ud558\uc5ec": 24, "sampling\ub2e8\uacc4": 24, "\ubd88\ub9ac\ub294\ub370": 24, "\uc870\uc808\ud558\uae30": 24, "\uc0c1\uc218\uac12\uc774\ub2e4": 24, "\uc77c\uce58\uc131\uc744": 24, "\ub192\uc774\ub294\ub370": 24, "layer\uac00": 24, "\ud615\ud0dc\uc774\ub2e4": 24, "\ub3d9\uc791\ud558\ubbc0\ub85c": 24, "209": [], "pretain": 24, "\ubf51\uc544\ub0c8\ub2e4": 24, "learning\uc2dc\ud0a8": 24, "\uc870\uc815\ub418\uc5b4": 24, "\ud559\uc2b5\ub2e8\uacc4\uc5d0\uc11c": 24, "frozen\ub418\uc5b4": 24, "\ud559\uc2b5\ub418\uc9c0": 24, "zw_q": 24, "c_tw_k": [], "c_tw_v": [], "\ud589\ub82c\uc774\ub2e4": 24, "layers\uc758": 24, "\ud559\uc2b5\uac00\ub2a5\ud55c": 24, "weigth": 24, "matrices\ub2e4": 24, "attention\uc2dc": 24, "feature\ub85c": 24, "\ubc1c\uacac\ud558\uc5ec": 24, "\uacf3\uc5d0": 24, "\ucc98\ub9ac\ud558\ub3c4\ub85d": 24, "\uc8fc\uc5b4\uc9c8\ub54c": 24, "c_tw": [], "qeury\ub97c": 24, "\ucd94\uac00\ud558\uac8c": 24, "\ucd08\uae30\ud654\ud588\ub2e4": 24, "\ub354\ud568\uc73c\ub85c\uc368": 24, "\ucd5c\uc885\uc801\uc778": 24, "\ud615\ud0dc\ub294": 24, "zw": 24, "freeze\uc2dc\ud0a4\uace0": 24, "\uc9c4\ud589\ud558\ubbc0\ub85c": 24, "adapter\ub9cc": 24, "\ucd5c\uc801\ud654\ud558\uace0": 24, "\uace0\uc815\ud55c\ub2e4": 24, "random\ud558\uac8c": 24, "drop\ud558\uc5ec": 24, "drop\ub418\uba74": 24, "embedding\uc740": 24, "\ucc98\ub9ac\ud588\ub2e4": 24, "detach\ub418\uba70": 24, "\uac00\uc911\uce58\ub3c4": 24, "0001": 24, "librari": 24, "deepspe": 24, "sero": 24, "50step": 24, "2011": [], "2012": [], "2013": [], "2016": [], "embedding\ub97c": 24, "\uc783\uc5b4\ubc84\ub9b4": 24, "\ub514\uc790\uc778\ud588\ub2e4": 24, "penultim": 24, "\ubf51\uc544\ub0b4\uae30": [24, 30], "token\ub4e4\uc744": 24, "finer": 24, "\ucd08\ub798\ud560": 24, "pose\ub97c": 24, "\ub514\uc790\uc778\uc740": 24, "attention\uc73c\ub85c": 24, "\uace0\uc791": 24, "\ube44\ub4f1\ud558\uac70\ub098": 24, "\ud655\uc7a5\uc131\uc774": 24, "\uc88b\uc544": 24, "\ub354\uc560": 24, "\uac00\ub2a5\ucf00\ud55c\ub2e4\ub294": 24, "inversion\uc774\ub098": 24, "\uc0dd\uc131\ud558\uc9c0\ub294": 24, "\ubbf8\ub798\uc5d0": 24, "\uac1c\ubc1c\ud558\ub294": 24, "\ubaa9\ud45c\ub2e4": 24, "\uce74\ud398": [], "lorem": [], "ipsum": [], "image2": [], "td": [], "\uc2e4\ud5d8\uacb0\uacfc": 24, "preprocessor": 24, "contentshuffledetector": 24, "\uce35\uc758": 24, "\ub354\ud574\uc9d0": 24, "\ub3d9\uc791\ubc29\uc2dd": 24, "reference\uc640": 24, "\uc720\uc0ac\ud558\ub2e4\ub294": 24, "\uba87\uba87\uc740": 24, "\uc88c\uc6b0\ubc18\uc804\uc744": 24, "\ud55c\uac83\ucc98\ub7fc": 24, "\ub290\uaef4\uc84c\uc2b5\ub2c8\ub2e4": 24, "collapse\uc640": 24, "\uc544\ub2cc\uac00": 24, "\ub0ae\uc544\ubcf4\uc774\ub294": 24, "\uc758\uc544\ud588\uc73c\ub098": 24, "conclusion\uc5d0\uc11c": 24, "image6": [], "doc": [], "image8": [], "16223": [], "\ud56d\ubaa9": 24, "21": 24, "2406": 30, "07547": 30, "chanyeong": 30, "shin": 30, "\ubcc0\ud574\uc57c": 30, "\ud560\uc9c0\uc5d0": 30, "\ubc14\ub00c\uc5b4\uc57c": 30, "\uac00\uc838\uc624\ub294": 30, "\uba85\uba85\ud55c": 30, "\uc55e\uc73c\ub85c\uc758": 30, "\ubd84\uc57c": 30, "\uc2dc\ub098\ub9ac\uc624\ub97c": 30, "\ub9cc\uc871\ud558\uba74\uc11c": 30, "\uc218\ud589\ub418\uc5b4\uc57c": 30, "\ud588\uae30\uc5d0": 30, "\ud480\uace0": 30, "\ub123\uc5b4\uc8fc\uace0": 30, "\uc774\uac74": 30, "\uacf5\ud1b5": 30, "\ud558\ub098\ub85c\ub9cc": 30, "\ubf51\uc544\ub0b4\uae30\uc5d0\ub294": 30, "my": 30, "thought": 30, "refin": 30, "\ub9cc\ub4e4\uc5b4\uc8fc\ub294": 30, "\uc678\uc5d0\ub294": 30, "\uc2e4\ubb34\uc5d0\uc11c": 30, "\uc0ac\uc6a9\ud558\uc9c0\ub294": 30, "insert": 30, "\uc791\uc5c5\ucc98\ub7fc": 30, "\ub290\uaef4": 30, "shoe": 30, "sole": 30, "\ub85c\uace0\ub098": 30, "area": 30, "\ud544\uc694\ud588\uc74c": 30, "\uc5b4\uc6b0\ub7ec\uc9c0\uac8c": 30, "\uace0\ub824\ud574\uc57c": 30, "\ubaa8\uc591\ub3c4": 30, "\ub2ec\ub77c\uc11c": 30, "\ud480\uc5b4\uc57c": 30, "\ud574\ub2f9\ud558\ub294\uc9c0": 30, "\uc790\ub3d9\uc73c\ub85c": 30, "\ucc3e\uace0": 30, "\ubf51\uc544\uc11c": 30, "\ub2f9\uc5f0\ud788": 30, "\ucd94\ucd9c\ud588\uae30": 30, "\ud1b5\uacfc": 30, "\uad6c\ub3c4": 30, "\ub2ec\ub77c\ub3c4": 30, "\ud655\uc778\ud558\uc600\uc74c": 30, "comprehens": 30, "\ub418\uc5c8\ub294\uac00": 30, "\ub290\ub08c\uc744": 30, "\ubcc0\ud658\ub410\ub294\uac00": 30, "\uc8fc\ub3c4\ub85d": 30, "\ub123\uc74c": 30, "\uc704\uce58\ud558\ub294": 30, "repaint": 30, "\ucc44\uc6b0\ub294": 30, "\ud658\uacbd\uc758": 30, "\uc2dc\uc810\uc73c\ub85c": 30, "branch": 30, "\uc2e4\uc9c8\uc801\uc73c\ub85c\ub294": 30, "indic": 30, "\ub9cc\ub4e4\ub3c4\ub85d": 30, "\uc99d\uac00\uc2dc\ud0a4\uae30": 30, "\uc720\uc9c0\ub410\ub294\uc9c0": 30, "\ud65c\uc6a9\ud560\uc9c0": 30, "\ub9d0\uc9c0\ub97c": 30, "\uacb0\uc815\ud560": 30, "\ub098\uc640\uc788\uc9c0": 30, "\uc99d\uba85\ud558\uc600\uc74c": 30, "upper": 30, "\uc644\uc131\uc2dc\ud0b4": 30, "anyth": 30, "\ubf51\ub3c4\ub85d": 30, "projector": 30, "drop": 30, "\uc77c\ub4ef": 30, "\ubaa8\ubc29\ud574\uc624\ub294": 30, "\uc815\ub3c4\ub85c": 30, "\ubaa8\uc544\uc11c": 30, "\uad6c\ucd95\ud558\ub294": 30, "\ucca0\ud559\uc744": 30, "\uc9c0\ud0a4\ub824": 30, "\uc874\uc7ac\ud574\uc57c": 30, "\uae30\ub300\ud560": 30, "2\uac1c\ub97c": 30, "\ubf51\uc558\uc74c": 30, "\uce21\uc815\ud588\uace0": 30, "\ud06c\uac70\ub098": 30, "\uc2ec\uc9c0\uc5b4\ub294": 30, "\uac04\ub2e8\ud558\uac8c\ub294": 30, "\ubc29\ubc95\uc778\ub370": 30, "\uac00\uc838\uac00\uba74": 30, "easi": 30, "portion": 30, "\ucc28\uc9c0\ud55c\ub2e4\ub294": 30, "\uac83\ub4e4\uc740": 30, "\ubc18\ubcf5\ub418\ub294": 30, "sift": 30, "\uc26c\uc6e0\uae30": 30, "\uc7a5\uc744": 30, "seg": 30, "\ubc29\uc2dd\uc73c\ub85c\ub3c4": 30, "\ubd88\ub7ec\uc77c\uc73c\ud0b4": 30, "track": 30, "scenario": 30, "topic": 30, "\uc7a5\uc529": 30, "\ubaa8\uc558\uc74c": 30, "annot": 30, "\ub178\uac00\ub2e4": 30, "\uacc4\uc0b0\ud558\ub3c4\ub85d": 30, "report": 30, "pari": 30, "\ud588\ub294\uc9c0\uc5d0": 30, "\ucc3e\ub3c4\ub85d": 30, "\uac00\uc838\uc624\ub294\uc9c0\ub97c": 30, "\ud310\ub2e8\ud558\ub3c4\ub85d": 30, "\ub9de\ucd94\ub3c4\ub85d": 30, "\uace0\ub974\ub3c4\ub85d": 30, "pexel": 30, "websit": 30, "70": 30, "But": 30, "\uc791\uac70\ub098": 30, "candid": 30, "\uc0dd\uae30\uae34": 30, "\uc774\ub7f4": 30, "\ucd94\ucc9c\ud55c\ub2e4\uace0": 30, "\uace0\uc548\ub418\uc5b4\uc57c": 30, "\uc0c9\uae54\uc774": 30, "\uc2e4\ubb34\uc5d0": 30, "\ud398\uc774\ud37c\ub2e4": 30, "\uc81c\uc548\ud558\uba74\uc11c": 30, "\uc5f4\ub824\uace0": 30, "\uc778\uc0c1": 30, "\uae4a\uc5c8\ub2e4": 30, "\ub530\ub048\ub530\ub048\ud55c\ub370": 30, "\uc37c\ub294\uc9c0": 30, "\uc774\ud574\uac00": 30, "\ub410\ub358": 30, "\uc368\ubd24\uc73c\uba74": 30}, "objects": {}, "objtypes": {}, "objnames": {}, "titleterms": {"inform": [0, 1, 2, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56], "synthet": [0, 3, 39], "data": [0, 4, 7, 13, 16, 17, 39], "stabl": [0, 4, 5, 19, 25, 41, 55], "diffus": [0, 4, 5, 10, 13, 14, 15, 17, 18, 19, 21, 23, 24, 25, 26, 27, 33, 34, 36, 37, 39, 40, 41, 42, 43, 46, 47, 49, 51, 52, 54, 55], "foliar": 0, "diseas": 0, "classif": [0, 39], "1": [0, 3, 4, 5, 7, 10, 12, 13, 14, 15, 16, 17, 18, 19, 21, 23, 25, 27, 28, 29, 30, 31, 32, 33, 34, 36, 37, 39, 41, 43, 45, 46, 51, 52, 53, 54, 55], "\uac1c\uc694": 0, "2": [0, 3, 4, 5, 7, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 21, 23, 25, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 39, 41, 43, 45, 46, 51, 52, 53, 54, 55], "baselin": [0, 44], "\uad6c\ucd95": 0, "3": [0, 3, 4, 5, 7, 10, 12, 13, 14, 15, 16, 17, 18, 19, 21, 23, 25, 27, 28, 29, 31, 32, 33, 34, 35, 36, 37, 39, 41, 43, 45, 46, 51, 52, 53, 54, 55], "fine": [0, 7, 10, 21, 25, 39, 41, 47, 51, 52], "tune": [0, 7, 10, 21, 25, 39, 41, 47, 51], "4": [0, 3, 4, 5, 7, 10, 12, 13, 14, 15, 16, 17, 18, 19, 21, 23, 25, 27, 28, 29, 31, 32, 33, 34, 35, 36, 37, 39, 41, 43, 46, 51, 52, 53, 54, 55], "\uc131\ub2a5": 0, "\ube44\uad50": [0, 14, 40], "5": [0, 3, 4, 5, 10, 12, 15, 16, 17, 19, 23, 27, 28, 29, 31, 32, 33, 35, 36, 37, 39, 43, 46, 52, 53, 54], "discuss": [0, 10, 32, 53], "6": [0, 4, 12, 15, 19, 23, 32, 33, 36, 39, 43, 46, 52], "appendix": [0, 1, 32, 47], "train": [1, 4, 5, 7, 9, 10, 13, 14, 16, 21, 29, 30, 31, 35, 39, 43, 44, 45, 48, 53, 54, 56], "dreambooth": [1, 17, 22, 41, 47], "naver": 1, "webtoon": 1, "face": [1, 37], "dataset": [1, 3, 19, 29, 32, 33, 36, 44, 51], "introduct": [1, 3, 4, 5, 7, 8, 10, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 32, 33, 34, 35, 36, 37, 39, 40, 43, 45, 46, 47, 48, 49, 51, 52, 53, 54, 55, 56], "ablat": [1, 4, 5, 13, 19, 24, 30, 32, 33, 44, 47, 49, 53, 55], "studi": [1, 3, 4, 5, 19, 24, 30, 32, 44, 47, 49, 51, 53, 55], "prior": [1, 4, 45, 52], "preserv": [1, 53], "loss": [1, 13, 44, 54], "neg": [1, 4, 8], "prompt": [1, 24, 33], "instanc": 1, "guidanc": [1, 7, 19, 21, 31, 46, 49, 51], "scale": [1, 13, 14, 20, 23, 33, 39, 51], "3d": [2, 17, 18, 29, 33, 36, 52], "gaussian": 2, "splat": 2, "real": [2, 39], "time": [2, 51, 54], "radianc": [2, 32, 36], "field": [2, 32, 36], "render": [2, 18, 32, 36], "overview": [2, 19, 44, 45, 55], "differenti": 2, "optim": [2, 17, 32, 52, 55], "adapt": [2, 4, 8, 10, 24, 25, 46, 55], "densiti": 2, "control": [2, 4, 9, 52], "fast": [2, 22, 54], "diffenrenti": 2, "raster": 2, "result": [2, 4, 5, 7, 8, 9, 17, 19, 21, 24, 29, 31, 32, 33, 36, 39, 44, 45, 46, 48, 49, 51], "evalu": [2, 3, 7, 8, 14, 26, 29, 30, 36, 44, 49], "limit": [2, 5, 8, 10, 17, 19, 30, 33, 36, 40, 44, 45, 46, 47, 53], "A": [3, 25, 29, 31, 32, 33], "gener": [3, 4, 5, 7, 8, 10, 12, 15, 17, 27, 33, 34, 39, 41, 43, 51, 52], "model": [3, 4, 5, 7, 10, 13, 14, 15, 16, 17, 18, 19, 21, 23, 24, 25, 27, 29, 30, 31, 33, 34, 35, 36, 37, 39, 40, 41, 42, 43, 46, 47, 49, 51, 52, 53, 54, 55], "\ud559\uc2b5": 3, "\uc790\ub8cc": 3, "0": [3, 4, 17, 28, 32, 36], "abstract": [3, 4, 5, 7, 10, 12, 14, 15, 16, 17, 18, 20, 21, 23, 26, 28, 32, 33, 34, 35, 36, 37, 40, 41, 44, 46], "background": [3, 12, 13, 15, 19, 21, 33, 36, 39, 44, 45, 46, 52, 54], "kl": 3, "diverg": 3, "kullback": 3, "leibler": 3, "incept": 3, "score": [3, 18, 34, 39], "IS": [3, 39], "fid": [3, 39], "fr\u00e9chet": 3, "distanc": [3, 36], "kernel": 3, "clean": 3, "benchmark": [3, 30], "comparison": [3, 4, 5, 8, 15, 19, 22, 23, 24, 30, 32, 33, 36, 44, 53, 55], "between": 3, "metric": [3, 29], "Is": 3, "all": 3, "we": [3, 18], "need": 3, "animatediff": 4, "relat": [4, 5, 8, 10, 14, 17, 19, 22, 24, 25, 27, 32, 33, 34, 36, 37, 39, 40, 44, 53], "work": [4, 5, 8, 10, 14, 17, 19, 22, 24, 25, 27, 29, 30, 32, 33, 34, 36, 37, 39, 40, 44, 45, 46, 53], "text": [4, 7, 10, 18, 20, 21, 24, 29, 31, 40, 41, 47, 51, 53], "imag": [4, 5, 7, 9, 10, 14, 15, 19, 20, 21, 24, 27, 29, 30, 41, 43, 44, 46, 47, 51, 53], "person": [4, 17, 41], "t2i": [4, 17, 55], "anim": [4, 5, 19], "preliminari": [4, 5, 17, 24, 51, 55], "low": [4, 25, 28], "rank": [4, 22, 25, 28], "lora": [4, 25, 28], "allevi": 4, "effect": [4, 51], "from": [4, 12, 33, 39, 49], "domain": 4, "learn": [4, 10, 45], "motion": [4, 16], "modul": [4, 25], "new": 4, "pattern": 4, "motionlora": 4, "practic": 4, "infer": [4, 12, 16, 28, 31], "experi": [4, 5, 8, 10, 12, 13, 15, 17, 18, 19, 20, 22, 24, 27, 28, 29, 30, 32, 34, 37, 41, 43, 47, 48, 51, 52, 53, 54, 55, 56], "qualit": [4, 5, 8, 19, 29, 30, 33, 49], "quantit": [4, 7, 8, 15, 19, 29], "design": [4, 14, 15, 55], "effici": [4, 14, 25], "conclus": [4, 8, 14, 15, 17, 19, 22, 24, 25, 33, 36, 37, 39, 45, 49, 51, 52], "7": [4, 19, 31, 33, 36, 46, 52], "\uc2e4\uc2b5": 4, "anyon": 5, "video": [5, 19, 29, 41], "human": [5, 29, 49], "method": [5, 8, 10, 19, 20, 22, 24, 28, 29, 30, 33, 34, 36, 37, 40, 44, 53, 55], "network": [5, 29, 38, 44], "architectur": [5, 14, 16, 19, 31, 44, 46], "strategi": [5, 7, 30], "implement": [5, 9, 19, 30, 32, 44, 55], "bbdm": 6, "cm3leon": 7, "pretrain": [7, 25, 49], "token": [7, 31], "retriev": 7, "augment": [7, 51], "object": [7, 8, 13, 44, 54], "function": [7, 13, 36, 44], "To": [7, 29, 51], "import": 7, "decod": [7, 13, 14, 31, 36], "temperatur": 7, "sampl": [7, 12, 13, 17, 18, 23, 27, 32, 39, 54], "topp": 7, "classifi": [7, 19, 21, 31, 42, 46, 49], "free": [7, 19, 21, 31, 49], "cfg": 7, "contrast": 7, "topk": 7, "cd": 7, "k": 7, "supervis": 7, "instruct": 7, "guid": [7, 21, 40, 51], "edit": [7, 10, 15, 30, 43], "ground": [7, 26], "spatial": 7, "caption": 7, "visual": [7, 14, 45], "question": 7, "answer": 7, "task": [7, 25], "conceptlab": 8, "prelimiari": [8, 22], "The": [8, 18, 51], "constraint": 8, "regular": [8, 38], "evolutionari": 8, "creativ": 8, "concept": [8, 10], "mix": [8, 38], "setup": [8, 14, 24, 40], "controlnet": 9, "addit": [9, 28, 32, 44], "base": [9, 31, 34], "condit": [9, 14, 19, 21, 27, 35, 53], "block": [9, 14, 16], "zero": [9, 30, 42, 43], "convolut": [9, 27, 29], "custom": [10, 51], "deep": 10, "transfer": 10, "singl": 10, "multipl": [10, 19], "composit": 10, "detail": [10, 19, 30, 32, 44, 45, 53, 55], "dall": [11, 40, 45], "e": [11, 33, 36, 40, 45], "ddim": [12, 15, 46], "ddpm": [12, 13, 15, 23, 46, 54], "variat": [12, 38, 56], "For": 12, "non": 12, "markovian": 12, "forward": [12, 13], "process": [12, 13, 15], "code": 12, "q": [13, 31], "mathbf": 13, "x": 13, "_t": 13, "_": 13, "t": [13, 28], "revers": [13, 15], "p": 13, "l": 13, "denois": [13, 15, 23], "encod": [13, 19, 27, 31, 32, 36], "l_t": 13, "l_": 13, "l_0": 13, "simplifi": 13, "qualiti": [13, 15, 39, 44], "parameter": [13, 28, 54], "dit": 14, "complex": [14, 33], "latent": [14, 15, 25, 27, 36, 40, 41, 51], "transform": [14, 27, 28, 45], "space": [14, 15, 18, 51], "patchifi": 14, "experiment": [14, 24], "set": [14, 29], "class": 14, "genert": 14, "size": [14, 23], "patch": 14, "gflop": 14, "ar": 14, "critic": 14, "improv": [14, 23, 35, 39, 46], "perform": 14, "larger": 14, "more": [14, 24], "comput": 14, "alreadi": 15, "have": 15, "semant": [15, 31], "probabl": [15, 54], "implicit": 15, "manipul": 15, "clip": [15, 19, 21], "discov": 15, "In": 15, "problem": [15, 28], "asymmetr": 15, "asyrp": 15, "h": 15, "neural": [15, 18, 32, 36], "direct": 15, "With": 15, "boost": 15, "stochast": [15, 38, 56], "nois": [15, 21], "inject": 15, "overal": [15, 30], "versatil": 15, "analysi": [15, 19, 30, 44], "dreamov": 16, "collect": 16, "preprocess": 16, "content": [16, 57], "guider": 16, "dream": 17, "booth": 17, "approach": 17, "goal": 17, "\ud14d\uc2a4\ud2b8": 17, "\ud504\ub86c\ud504\ud2b8\uc5d0": 17, "\ucda9\uc2e4\ud558\uba74\uc11c": 17, "\uc8fc\uc5b4\uc9c4": 17, "subject": 17, "\uc758": 17, "ident": 17, "\uae30\ud558": 17, "\ud615\ud0dc": 17, "\ubc0f": 17, "\uc678\uad00": 17, "\uc744": 17, "\ubc18\uc601\ud558\ub294": 17, "asset": 17, "\uc0dd\uc131": 17, "dreamfus": [17, 18, 52], "failur": 17, "naiv": 17, "fusion": 17, "dreambooth3d": 17, "stage": [17, 35, 45, 51], "partial": 17, "multi": [17, 35], "view": [17, 32, 33], "stage3": 17, "final": 17, "nerf": [17, 32, 36], "applic": [17, 47, 55], "distil": [18, 25, 43, 51, 54], "how": 18, "can": 18, "paramet": [18, 25, 35, 39, 46], "pixel": 18, "algorithm": [18, 46, 56], "synthesi": [18, 19, 20, 32, 33, 40, 41, 46], "dreampos": 19, "fashion": 19, "via": [19, 43], "still": 19, "mechan": 19, "split": 19, "vae": [19, 45, 56], "modifi": 19, "unet": 19, "finetun": [19, 22, 31, 41], "pose": 19, "input": [19, 53], "futur": [19, 33, 36, 46], "8": [19, 31, 36, 46], "up": [20, 22], "gan": [20, 23, 40, 46, 48], "\uc8fc\uc694": 20, "\uc9c8\uc758\uc751\ub2f5": 20, "glide": [21, 33], "inpaint": [21, 27], "hyperdreambooth": 22, "contribut": [22, 29, 31, 49], "lightweight": 22, "lidb": 22, "hypernetwork": 22, "relax": 22, "follow": 22, "i": 23, "probabilist": 23, "log": 23, "likelihood": 23, "improc": 23, "speed": 23, "lcm": 25, "univers": 25, "acceler": [25, 51], "propos": 25, "\uae30\uc874": 25, "\uc5f0\uad6c\uc758": 25, "\ud55c\uacc4\uc810": 25, "\uae30\ubc18": 25, "\uc5f0\uad6c": 25, "consist": [25, 43, 44, 51], "cms\uacfc": 25, "\ucc28\uc774\uc810": 25, "arithmet": 25, "llm": 26, "summari": [26, 30, 48, 56], "On": 27, "perceptu": 27, "compress": 27, "tradeoff": 27, "ldm": [27, 40], "beyond": 27, "256x256": 27, "super": [27, 31, 49], "resolut": [27, 31, 41, 49, 52], "terminolog": 28, "convent": 28, "statement": 28, "aren": 28, "exist": [24, 28], "solut": 28, "good": 28, "enough": 28, "our": 28, "updat": 28, "matric": 28, "No": 28, "latenc": 28, "appli": 28, "empir": 28, "ia3": 28, "aa": 28, "\uc0ac\uc6a9\ubc95": 28, "refer": [28, 30], "make": 29, "\uc81c\uc548": 29, "\ubc30\uacbd": 29, "\ud2b9\uc131": 29, "previou": [29, 45], "spatiotempor": 29, "layer": 29, "pseudo": [29, 40], "psuedo": 29, "attent": 29, "frame": [29, 41], "interpol": [29, 41], "automat": 29, "evaluaton": 29, "msr": 29, "vtt": 29, "evluat": 29, "ucf": 29, "101": 29, "\uacb0\ub860": 29, "muse": 31, "mask": 31, "pre": 31, "us": [31, 57], "vqgan": 31, "iter": 31, "parallel": 31, "repres": 32, "scene": [32, 52], "represent": 32, "volum": 32, "posit": 32, "hierarch": 32, "point": [33, 36], "system": 33, "cloud": 33, "arxiv": 33, "2022": 33, "upsampl": 33, "produc": 33, "mesh": 33, "other": [30, 33, 36], "sdedit": 34, "sde": 34, "smld": 34, "sdxl": 35, "micro": 35, "crop": 35, "aspect": 35, "autoencod": [35, 41], "put": 35, "everyth": 35, "togeth": 35, "refin": 35, "shap": 36, "sign": 36, "textur": 36, "stf": 36, "acknowledg": 36, "styo": 37, "styliz": 37, "framework": 37, "stylegan": 38, "map": 38, "style": 38, "adain": 38, "\uc2e4\ud5d8": 38, "\uacb0\uacfc": [38, 40], "imagenet": 39, "imagen": [39, 49, 50], "protocol": 39, "accuraci": 39, "differ": 39, "merg": 39, "textual": 40, "invers": 40, "cf": 40, "\uc774\ud574": 40, "\ubabb\ud568": 40, "embed": 40, "\uc131\ub2a5\ud3c9\uac00": 40, "2\uc640": 40, "word": 40, "\ub450": 40, "\uac1c": 40, "\uc0ac\uc6a9": 40, "bia": 40, "reduct": 40, "\uc815\ub7c9\ud3c9\uac00": 40, "\ud3c9\uac00": 40, "\uc8fc\ubaa9\ud560": 40, "\uc810": 40, "\uc0ac\uc6a9\uc790\ud3c9\uac00": 40, "\ub9c8\ubb34\ub9ac": 40, "videoldm": 41, "turn": 41, "tempor": 41, "predict": 41, "long": 41, "term": 41, "high": [41, 52], "rate": 41, "sr": 41, "drive": 41, "your": 42, "secretli": 42, "shot": [30, 42, 43], "isol": 43, "few": 43, "step": [43, 51, 53], "cyclegan": 44, "translat": [44, 53], "mode": 44, "collaps": 44, "adversari": 44, "cycl": 44, "full": 44, "\ucc38\uace0": 44, "least": 44, "squar": 44, "\ucd94\uac00": 44, "\uc124\uba85": 44, "against": 44, "reconstruct": 44, "pair": 44, "discusss": 44, "gpt": 45, "vq": 45, "methodolog": [45, 49], "an": 45, "autoregress": 45, "pipelin": [30, 45], "\uc608\uc2dc": 45, "equat": 45, "\ud559\uc2b5\uacfc\uc815": 45, "codebook": 45, "beat": 46, "group": 46, "normal": 46, "impact": 46, "s": 46, "9": 46, "procedur": 48, "theoret": 48, "t5": 49, "xxl": 49, "cascad": 49, "larg": [24, 49], "weight": 49, "sampler": 49, "static": 49, "threshold": 49, "dynam": 49, "drawbench": 49, "tabl": 49, "editor": 50, "One": [51, 53], "solv": 51, "pf": 51, "od": [51, 54], "skip": 51, "abul": 51, "solver": 51, "schedul": 51, "omega": 51, "downstream": 51, "magic3d": 52, "coars": 52, "ad": 53, "unpair": 53, "extens": 53, "progress": 54, "continu": 54, "definit": 54, "ancestr": 54, "flow": 54, "parametr": 54, "intract": 56, "sgvb": 56, "gradient": 56, "bay": 56, "reparameter": 56, "trick": 56, "welcom": 57, "pseudodiffus": 57, "about": 57, "public": 57, "tech": 57, "blog": 57, "contributor": 57, "ip": 24, "\ubb38\uc81c\uc0c1\ud669": [], "\ud574\uacb0\ubc29\uc548": [], "\ub17c\ubb38\uc758": [], "\uac15\uc810": [], "mimicbrush": 30, "imit": 30, "three": 30, "line": 30, "structur": 30, "review": 30}, "envversion": {"sphinx.domains.c": 2, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 6, "sphinx.domains.index": 1, "sphinx.domains.javascript": 2, "sphinx.domains.math": 2, "sphinx.domains.python": 3, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.intersphinx": 1, "sphinx": 56}})
\ No newline at end of file
+Search.setIndex({"docnames": ["docs/experiments/js_exp", "docs/experiments/swjo_exp", "docs/review/3DGS", "docs/review/A_Study_on_the_Evaluation_of_Generative_Models", "docs/review/AnimateDiff", "docs/review/Animate_Anyone", "docs/review/BBDM", "docs/review/CM3leon", "docs/review/Coin3D", "docs/review/ConceptLab", "docs/review/ControlNet", "docs/review/CustomDiffusion", "docs/review/DALLE2", "docs/review/DDIM", "docs/review/DDPM", "docs/review/DiT", "docs/review/Diffusion_models_already_have_a_Semantic_Latent_Space", "docs/review/DreaMoving", "docs/review/DreamBooth3D", "docs/review/DreamFusion", "docs/review/DreamGaussian", "docs/review/DreamPose", "docs/review/GIGAGAN", "docs/review/GLIDE", "docs/review/HyperDreamBooth", "docs/review/I-DDPM", "docs/review/IP_Adapter", "docs/review/LCM-LoRA", "docs/review/LLM_grounded_Diffusion", "docs/review/Latent_Diffusion_Model", "docs/review/LoRA", "docs/review/Make_A_Video", "docs/review/MimicBrush", "docs/review/Muse", "docs/review/NeRF", "docs/review/Point_E", "docs/review/ProlificDreamer", "docs/review/SDEdit", "docs/review/SDXL", "docs/review/Shap-E", "docs/review/StyO", "docs/review/StyleGAN", "docs/review/Synthetic_Data_from_Diffusion_Models_Improves_ImageNet_Classification", "docs/review/Textual_Inversion", "docs/review/VideoLDM", "docs/review/Your_Diffusion_Model_is_Secretly_a_Zero_Shot_Classifier", "docs/review/consistency_models", "docs/review/cycleGAN", "docs/review/dalle", "docs/review/diffusion_beats_GANs", "docs/review/dreambooth", "docs/review/gan", "docs/review/imagen", "docs/review/imagen_editor", "docs/review/latent_consistency_models", "docs/review/magic-3d", "docs/review/one-step-image-translation", "docs/review/one_step_diffusion_with_distribution_matching_distillation", "docs/review/progressive_distillation", "docs/review/t2i_adapter", "docs/review/vae", "docs/review/zero123plus", "intro"], "filenames": ["docs\\experiments\\js_exp.md", "docs\\experiments\\swjo_exp.md", "docs\\review\\3DGS.md", "docs\\review\\A_Study_on_the_Evaluation_of_Generative_Models.md", "docs\\review\\AnimateDiff.md", "docs\\review\\Animate_Anyone.md", "docs\\review\\BBDM.md", "docs\\review\\CM3leon.md", "docs\\review\\Coin3D.md", "docs\\review\\ConceptLab.md", "docs\\review\\ControlNet.md", "docs\\review\\CustomDiffusion.md", "docs\\review\\DALLE2.md", "docs\\review\\DDIM.md", "docs\\review\\DDPM.md", "docs\\review\\DiT.md", "docs\\review\\Diffusion_models_already_have_a_Semantic_Latent_Space.md", "docs\\review\\DreaMoving.md", "docs\\review\\DreamBooth3D.md", "docs\\review\\DreamFusion.md", "docs\\review\\DreamGaussian.md", "docs\\review\\DreamPose.md", "docs\\review\\GIGAGAN.md", "docs\\review\\GLIDE.md", "docs\\review\\HyperDreamBooth.md", "docs\\review\\I-DDPM.md", "docs\\review\\IP_Adapter.md", "docs\\review\\LCM-LoRA.md", "docs\\review\\LLM_grounded_Diffusion.md", "docs\\review\\Latent_Diffusion_Model.md", "docs\\review\\LoRA.md", "docs\\review\\Make_A_Video.md", "docs\\review\\MimicBrush.md", "docs\\review\\Muse.md", "docs\\review\\NeRF.md", "docs\\review\\Point_E.md", "docs\\review\\ProlificDreamer.md", "docs\\review\\SDEdit.md", "docs\\review\\SDXL.md", "docs\\review\\Shap-E.md", "docs\\review\\StyO.md", "docs\\review\\StyleGAN.md", "docs\\review\\Synthetic_Data_from_Diffusion_Models_Improves_ImageNet_Classification.md", "docs\\review\\Textual_Inversion.md", "docs\\review\\VideoLDM.md", "docs\\review\\Your_Diffusion_Model_is_Secretly_a_Zero_Shot_Classifier.md", "docs\\review\\consistency_models.md", "docs\\review\\cycleGAN.md", "docs\\review\\dalle.md", "docs\\review\\diffusion_beats_GANs.md", "docs\\review\\dreambooth.md", "docs\\review\\gan.md", "docs\\review\\imagen.md", "docs\\review\\imagen_editor.md", "docs\\review\\latent_consistency_models.md", "docs\\review\\magic-3d.md", "docs\\review\\one-step-image-translation.md", "docs\\review\\one_step_diffusion_with_distribution_matching_distillation.md", "docs\\review\\progressive_distillation.md", "docs\\review\\t2i_adapter.md", "docs\\review\\vae.md", "docs\\review\\zero123plus.md", "intro.md"], "titles": ["Synthetic Data with Stable Diffusion for Foliar Disease Classification", "Training DreamBooth on Naver Webtoon Face Dataset", "3D Gaussian Splatting for Real-Time Radiance Field Rendering", "A Study on the Evaluation of Generative Models", "AnimateDiff", "Animate Anyone", "BBDM", "CM3leon", "Coin3D", "ConceptLab", "ControlNet", "Custom Diffusion", "DALL-E 2", "DDIM", "DDPM", "DiT", "Diffusion Models already have a Semantic Latent Space", "DreaMoving", "Dream Booth 3D", "<strong>DreamFusion</strong>", "DreamGaussian", "DreamPose: Fashion Image-to-Video Synthesis via Stable Diffusion", "Scaling up GANs for Text-to-Image Synthesis", "GLIDE", "HyperDreamBooth", "I-DDPM", "IP-Adapter", "LCM-LoRA: A Universal Stable-Diffusion Acceleration Module", "LLM Grounded Diffusion", "Introduction", "LoRA", "Make A Video", "MimicBrush: Zero-shot Image Editing with Reference Imitation", "Muse", "NeRF : Representing Scenes as Neural Radiance Fields for View Synthesis", "Point-E: A System for Generating 3D Point Clouds from Complex Prompts (Arxiv 2022)", "ProlificDreamer: High-Fidelity and Diverse Text-to-3D Generation with Variational Score Distillation", "SDEdit", "SDXL", "Shap-E", "StyO", "StyleGAN", "Synthetic Data from Diffusion Models Improves ImageNet Classification", "Textual Inversion", "VideoLDM", "Your Diffusion Model is Secretly a Zero-Shot Classifier", "Consistency Models", "CycleGAN", "DALL-E", "Diffusion Models Beat GANs on Image Synthesis", "DreamBooth", "GAN", "Imagen", "Imagen Editor", "Latent Consistency Models", "Magic3D", "One-Step Image Translation with Text-to-Image Models", "One-step Diffusion with Distribution Matching Distillation", "Progressive Distillation for Fast Sampling of Diffusion Models", "T2I-Adapter", "VAE", "Zero123++", "Welcome to PseudoDiffusers!!"], "terms": {"titl": [0, 1, 2, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61], "author": [0, 1, 2, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61], "jisu": [0, 10, 41, 62], "kim": [0, 2, 6, 10, 12, 41, 45, 62], "last": [0, 1, 2, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61], "updat": [0, 1, 2, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 31, 32, 33, 34, 35, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61], "jul": [0, 1], "05": [0, 3, 26, 32, 38], "2023": [0, 1, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 21, 22, 23, 24, 25, 29, 30, 31, 36, 37, 38, 40, 41, 42, 43, 44, 45, 46, 48, 49, 50, 52, 53, 55, 58, 59], "\uc0ac\uacfc": 0, "\ub098\ubb34\uc758": 0, "\uc78e\uc5d0": 0, "\uc0dd\uae30\ub294": [0, 42, 56], "\uc9c8\ubcd1\uc744": 0, "\uc774\ubbf8\uc9c0\ub85c": [0, 1, 4, 9, 11, 15, 18, 24, 34, 38, 40, 52, 53, 59], "\ud310\ubcc4\ud558\ub294": 0, "kaggl": 0, "competit": [0, 45, 46, 49], "\ub9c1\ud06c": [0, 10, 15], "\uc5d0\uc11c": [0, 2, 3, 4, 6, 8, 9, 10, 12, 14, 15, 16, 17, 18, 20, 21, 23, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 39, 42, 44, 45, 46, 47, 48, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61], "\uc544\uc774\ub514\uc5b4\ub97c": 0, "\uc5bb\uc5b4\uc11c": [0, 57], "\uc9c4\ud589\ud55c": [0, 15, 23, 58], "\ud504\ub85c\uc81d\ud2b8\uc785\ub2c8\ub2e4": 0, "\ud574\ub2f9": [0, 3, 4, 5, 9, 11, 14, 18, 19, 20, 21, 23, 24, 26, 28, 31, 32, 33, 34, 35, 36, 37, 39, 42, 43, 45, 46, 47, 50, 56, 57, 59, 60, 61], "competition\uc740": 0, "\uc0ac\uacfc\ub098\ubb34": 0, "\uac78\ub9b0": 0, "\uc9c8\ubcd1\uc5d0": 0, "\ub530\ub77c": [0, 2, 3, 4, 5, 6, 7, 9, 12, 15, 16, 18, 19, 20, 21, 23, 24, 25, 27, 28, 30, 32, 33, 34, 35, 38, 39, 42, 43, 45, 46, 47, 48, 49, 50, 54, 56, 60, 61], "\uc78e": 0, "\uc774\ubbf8\uc9c0\ub97c": [0, 3, 4, 7, 8, 9, 10, 11, 12, 13, 14, 15, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 31, 33, 34, 35, 37, 38, 40, 41, 42, 43, 44, 45, 47, 48, 49, 50, 52, 54, 56, 57, 59, 61], "4\uac1c\uc758": [0, 12, 15, 19, 31, 34, 43, 56, 58, 59], "class\ub85c": 0, "\ubd84\ub958\ud558\ub294": [0, 23, 59], "task\uc785\ub2c8\ub2e4": 0, "class": [0, 10, 11, 13, 14, 18, 22, 23, 25, 29, 31, 41, 42, 45, 47, 49, 50, 51, 52, 56, 59, 60], "leav": [0, 45], "competition\uc744": 0, "\uc124\uba85\ud55c": [0, 31, 59], "articl": 0, "\uc804\uccb4\uc801\uc778": [0, 2, 4, 12, 16, 28, 41, 46, 56, 60], "accuracy\ub294": 0, "97": [0, 45], "\uc774\uc9c0\ub9cc": [0, 34], "multipl": [0, 22, 32, 33, 36, 56, 59, 61], "class\uc758": [0, 3, 49], "\uacbd\uc6b0": [0, 1, 4, 6, 9, 10, 11, 15, 16, 18, 20, 21, 24, 25, 26, 27, 28, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 44, 45, 46, 47, 51, 54, 59, 60], "accuracy\uac00": 0, "51": 0, "\uc5d0": [0, 1, 2, 4, 6, 7, 8, 9, 10, 11, 12, 14, 15, 16, 18, 19, 20, 21, 22, 23, 24, 25, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 39, 40, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61], "\ubd88\uacfc\ud588\ub2e4\uace0": 0, "\uc5b8\uae09\ud569\ub2c8\ub2e4": [0, 26], "\uc774\ubbf8\uc9c0": [0, 2, 3, 4, 5, 7, 8, 9, 10, 11, 12, 13, 14, 15, 18, 19, 20, 21, 22, 23, 24, 26, 28, 29, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 58, 59, 61], "\uac1c\uc218\uac00": [0, 4], "\ub2e4\ub978": [0, 2, 3, 4, 5, 6, 8, 9, 11, 13, 14, 15, 16, 18, 19, 21, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 35, 36, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 50, 52, 53, 57, 58, 59, 60], "class\uc5d0": [0, 16, 23], "\ube44\ud574": [0, 4, 6, 7, 8, 10, 11, 13, 15, 16, 18, 19, 21, 23, 24, 25, 28, 29, 33, 35, 36, 40, 42, 46, 49, 53, 54, 60, 61], "\uc801\uc740": [0, 2, 3, 4, 7, 10, 11, 13, 14, 15, 19, 23, 25, 26, 30, 31, 43, 44, 45, 49, 54], "\uc810\uc5d0": [0, 21, 35], "\uc8fc\ubaa9\ud588\uace0": 0, "diffusion\uc744": [0, 5, 9, 20, 21, 27, 37, 39, 54], "\uc0ac\uc6a9\ud558\uc5ec": [0, 4, 9, 14, 15, 16, 18, 19, 20, 21, 24, 26, 28, 29, 31, 34, 35, 39, 41, 42, 46, 47, 48, 50, 51, 52, 54, 55, 56, 57, 58], "\ud074\ub798\uc2a4\uc758": [0, 9, 42], "\ub370\uc774\ud130": [0, 3, 4, 5, 15, 18, 28, 29, 31, 34, 35, 37, 38, 39, 42, 43, 44, 45, 47, 48, 51, 52, 54, 60], "\uac1c\uc218\ub97c": [0, 4, 14, 20, 39], "\ub298\ub824\uc11c": 0, "classifi": [0, 4, 15, 16, 26, 27, 35, 36, 39, 42, 44, 51, 53, 54, 56, 59], "\ud559\uc2b5\uc5d0": [0, 7, 15, 21, 22, 25, 30, 32, 39, 42, 44, 54, 56], "\uc0ac\uc6a9\ud558\uba74": [0, 14, 25, 33, 43, 44, 48], "\ub354": [0, 1, 2, 4, 5, 6, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 52, 53, 54, 55, 56, 58, 59, 60, 61], "\uc88b\uc740": [0, 1, 2, 3, 4, 6, 11, 16, 19, 21, 22, 23, 27, 28, 29, 31, 35, 36, 37, 38, 40, 42, 46, 47, 49, 50, 51, 52, 54, 55, 56, 58, 59, 61], "\uc131\ub2a5\uc758": [0, 15, 42], "classifier\ub97c": [0, 16, 21, 23, 26], "\uc5bb\uc744": [0, 2, 4, 9, 16, 20, 26, 27, 28, 36, 38, 39, 42, 43, 45, 46, 47, 54], "\uc218": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61], "\uc788\uc744": [0, 1, 4, 5, 6, 10, 14, 26, 27, 28, 30, 32, 33, 34, 35, 36, 41, 42, 45, 47, 57], "\uac83\uc73c\ub85c": [0, 4, 15, 20, 21, 23, 28, 30, 31, 34, 35, 36, 39, 42, 43, 44, 45], "\uae30\ub300\ud588\uc2b5\ub2c8\ub2e4": 0, "\ubb38\uc81c": [0, 5, 8, 18, 19, 35, 59], "\uc0c1\ud669\uc744": [0, 46], "\uc7ac\ud604\ud558\uae30": 0, "\uc704\ud574": [0, 2, 3, 4, 5, 6, 7, 9, 10, 11, 12, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 26, 28, 29, 30, 31, 32, 33, 34, 35, 36, 38, 39, 40, 41, 43, 44, 46, 47, 48, 50, 53, 54, 55, 56, 57, 58, 59, 61], "\uae30\uc874": [0, 2, 4, 7, 8, 9, 10, 11, 15, 16, 18, 19, 20, 21, 23, 24, 25, 26, 28, 29, 30, 31, 32, 34, 35, 36, 38, 39, 40, 41, 42, 43, 44, 46, 49, 52, 54, 56, 58, 59], "\ub370\uc774\ud130\ub85c": [0, 1, 3, 10, 19, 35, 38, 42, 44, 45, 47, 54], "imag": [0, 1, 2, 3, 6, 8, 9, 12, 13, 17, 18, 19, 24, 27, 28, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 45, 48, 51, 52, 53, 55, 57, 58, 59, 60, 62], "\ud559\uc2b5\ud558\uc5ec": [0, 4, 26, 27, 35, 48, 49, 54, 58], "baseline\uc73c\ub85c": 0, "\uc7a1\uc558\uc2b5\ub2c8\ub2e4": 0, "\ubaa8\ub378\uc740": [0, 4, 5, 6, 7, 9, 12, 15, 18, 19, 21, 24, 25, 26, 27, 28, 29, 30, 33, 34, 35, 37, 38, 39, 41, 42, 43, 47, 51, 54, 58, 59, 61], "pretrained\ub41c": 0, "resnet18\uc5d0": 0, "linear": [0, 3, 6, 14, 15, 16, 20, 23, 25, 26, 30, 35, 41, 49, 51, 60, 61], "layer\ub97c": [0, 4, 5, 22, 26, 30, 31, 33, 39, 41, 44], "\ubd99\uc5ec\uc11c": 0, "\uc0ac\uc6a9\ud588\uc2b5\ub2c8\ub2e4": [0, 12, 24, 47, 53], "\uc804\uccb4": [0, 4, 5, 6, 9, 10, 11, 15, 16, 18, 21, 23, 24, 25, 28, 29, 31, 32, 35, 44, 54, 57], "7": [0, 1, 3, 5, 6, 7, 13, 14, 19, 25, 26, 27, 28, 34, 36, 37, 46, 52, 54, 59], "class\ubcc4": 0, "healthi": 0, "99": 0, "73": [0, 43], "rust": 0, "scab": 0, "98": [0, 19, 36], "class\ub294": [0, 22], "\uac1c\uc218": [0, 39], "91\uac1c\ub85c": 0, "\ud074\ub798\uc2a4\ub4e4\uc5d0": 0, "\ube44\ud574\uc11c": [0, 12], "\uc801\uc2b5\ub2c8\ub2e4": 0, "imbalance\uac00": 0, "\uc131\ub2a5\uc744": [0, 2, 3, 6, 7, 9, 10, 11, 13, 15, 16, 18, 21, 22, 23, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 38, 39, 40, 41, 42, 45, 46, 47, 48, 49, 50, 52, 53, 54, 55, 56, 57, 58, 59, 61], "\ub0ae\ucd94\ub294": [0, 26], "\uc6d0\uc778\uc77c": [0, 42], "\uac83\uc774\ub77c": [0, 30], "\uac00\uc815\ud558\uace0": [0, 27, 46], "diffusion\uc73c\ub85c": [0, 42], "data\ub97c": [0, 11, 26, 29], "\ucd94\uac00\ub85c": [0, 7, 18, 21, 25, 27, 29, 31, 32, 35, 38, 39, 40, 47], "\uc0dd\uc131\ud574\ubcf4\uae30\ub85c": 0, "\ud588\uc2b5\ub2c8\ub2e4": [0, 1, 12, 41, 42, 51], "\uc608\uc2dc": [0, 5, 7, 20, 21, 31, 34, 35, 47, 52, 53, 56, 58, 59], "pretran": 0, "diffusion\uc758": [0, 22, 26, 28], "\ub300\ud55c": [0, 1, 2, 3, 4, 5, 6, 9, 10, 11, 12, 14, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 31, 33, 34, 35, 36, 38, 39, 40, 42, 43, 44, 46, 47, 50, 51, 54, 55, 56, 57, 58, 59, 60, 61], "\uc815\ubcf4\uac00": [0, 12, 21, 24, 40, 44, 50], "\uc5c6\uc5b4\uc11c": [0, 21], "\uc0dd\uc131\ud560": [0, 1, 2, 4, 7, 9, 10, 12, 18, 19, 22, 24, 26, 27, 28, 31, 33, 35, 36, 37, 38, 39, 42, 44, 46, 47, 50, 54, 55, 56], "\uc544\ub798\uc640": [0, 2, 4, 6, 9, 10, 16, 20, 21, 26, 28, 29, 36, 39, 41, 42, 47, 49, 57, 60], "\uac19\uc774": [0, 4, 6, 9, 10, 11, 12, 14, 16, 19, 20, 21, 22, 24, 26, 27, 28, 29, 31, 32, 33, 36, 39, 41, 43, 46, 47, 48, 49, 50, 51, 54, 55, 56, 57, 58, 59, 60, 61], "\uad00\ub828\uc5c6\ub294": 0, "\uc774\ubbf8\uc9c0\uac00": [0, 10, 12, 14, 18, 21, 22, 24, 25, 26, 28, 29, 31, 33, 37, 38, 39, 40, 42, 43, 47, 49, 51, 52, 56, 57, 61], "\uc0dd\uc131\ub429\ub2c8\ub2e4": [0, 9], "prompt": [0, 4, 5, 8, 10, 11, 12, 17, 18, 19, 22, 23, 24, 28, 29, 32, 33, 36, 40, 43, 44, 45, 50, 52, 53, 55, 56, 59, 61], "photo": [0, 1, 4, 6, 9, 11, 28, 39, 43, 47], "\ub530\ub77c\uc11c": [0, 2, 3, 4, 6, 7, 8, 10, 12, 14, 16, 21, 23, 24, 25, 26, 27, 28, 33, 35, 36, 37, 38, 40, 42, 43, 44, 45, 47, 53, 54, 55, 56, 57, 58, 59, 61], "model": [0, 6, 8, 9, 10, 12, 13, 20, 22, 24, 28, 30, 34, 47, 48, 51, 53, 61, 62], "\uc815\ubcf4\ub97c": [0, 4, 5, 8, 10, 12, 14, 16, 18, 19, 20, 21, 24, 26, 28, 31, 33, 35, 36, 40, 42, 43, 44, 47, 50, 55, 57, 59], "\ub123\uc5b4\uc8fc\uae30": 0, "dreambooth": [0, 4, 11, 17, 21, 26, 32, 55], "\ub97c": [0, 1, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61], "tuning\ud588\uc2b5\ub2c8\ub2e4": 0, "training\uc5d0": [0, 13, 49], "\uc0ac\uc6a9\ud55c": [0, 4, 7, 16, 18, 19, 20, 21, 24, 26, 27, 28, 31, 34, 35, 38, 39, 41, 42, 43, 49, 57], "prompt\ub294": [0, 24, 28], "disea": 0, "leaf": 0, "\uc774\uba70": [0, 4, 6, 43, 46, 54], "\uc0dd\uc131\ud55c": [0, 4, 10, 12, 14, 18, 26, 28, 31, 35, 39, 47, 48, 50, 52, 55, 58, 59, 61], "\uc774\ubbf8\uc9c0\uc758": [0, 1, 2, 3, 8, 10, 11, 12, 14, 18, 20, 21, 22, 26, 28, 29, 35, 39, 40, 41, 42, 43, 47, 48, 50, 52, 56, 57], "\uc608\uc2dc\ub294": [0, 18, 52, 59], "\uac19\uc2b5\ub2c8\ub2e4": [0, 1, 10, 12, 33, 41, 42, 46, 47, 50, 51, 59, 60], "\uc0dd\uc131": [0, 3, 4, 5, 7, 9, 12, 13, 14, 15, 19, 20, 21, 22, 24, 25, 26, 27, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 48, 49, 50, 51, 52, 54, 57, 58, 59, 61], "engineering\uc744": [0, 26], "\uc218\ud589\ud558\ub358": 0, "\uc911": [0, 5, 7, 8, 9, 11, 13, 17, 19, 24, 25, 27, 30, 35, 36, 37, 38, 40, 41, 42, 46, 47, 48, 49, 50, 51, 52, 55, 59, 60], "\uc758\ub3c4\ud558\uc9c0\uc54a\uc740": 0, "\uacb0\uacfc\ub97c": [0, 4, 5, 6, 7, 8, 9, 10, 12, 13, 14, 16, 18, 20, 21, 23, 24, 25, 26, 27, 28, 29, 31, 32, 33, 34, 35, 36, 38, 39, 40, 41, 42, 43, 44, 46, 47, 52, 53, 54, 58], "\ubc1c\uacac\ud588\uc2b5\ub2c8\ub2e4": [0, 1, 12], "\uc544\ub798\ub294": [0, 10, 37, 39, 60], "\uc774\uc5d0": [0, 4, 5, 6, 9, 10, 16, 17, 24, 27, 34, 40, 42, 46, 50, 58, 59, 60, 61], "\uc608\uc2dc\ub85c": [0, 28, 45], "\uc804\uc758": [0, 38], "model\uc758": [0, 3, 4, 5, 10, 11, 14, 16, 18, 19, 24, 25, 26, 27, 28, 29, 30, 36, 39, 42, 43, 49, 54], "\uacb0\uacfc\uc640": [0, 12, 28, 29, 34, 58], "\ube44\uad50\uc785\ub2c8\ub2e4": 0, "\uc0c1\ud6691": 0, "\uc804": [0, 14, 30, 34, 38, 42, 49], "\ud6c4": [0, 1, 3, 4, 6, 7, 8, 12, 14, 15, 16, 17, 18, 20, 21, 23, 29, 30, 33, 34, 35, 36, 37, 38, 39, 44, 46, 47, 48, 50, 52, 53, 55, 57, 59, 61], "\uc0c1\ud6691\uc744": 0, "\ubcf4\uba74": [0, 3, 4, 6, 11, 14, 16, 22, 23, 25, 28, 29, 38, 39, 41, 42, 43, 47, 48, 51, 54], "\ub2f4\uc740": [0, 20, 26], "uniqu": [0, 1, 50, 55], "identifi": [0, 1, 40, 50, 55], "\uac00": [0, 1, 6, 9, 10, 12, 14, 15, 16, 18, 19, 21, 23, 24, 25, 26, 28, 29, 30, 31, 32, 33, 34, 36, 38, 39, 44, 45, 46, 47, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61], "\uc5c6\uc74c\uc5d0\ub3c4": [0, 23], "diseases\uc758": 0, "\uc78e\ub4e4\ub9cc": 0, "\uc774\ub294": [0, 2, 3, 4, 5, 7, 9, 10, 13, 16, 19, 20, 21, 24, 26, 28, 29, 34, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 51, 53, 54, 56, 58, 59, 60], "\uac19\uc740": [0, 1, 2, 4, 5, 6, 7, 9, 10, 12, 14, 15, 16, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 38, 41, 42, 43, 45, 46, 47, 49, 50, 53, 54, 55, 56, 57, 58, 59, 60, 61], "\uc18d\ud558\ub294": [0, 9], "\uc774\ubbf8\uc9c0\ub4e4\uc744": [0, 1, 3, 4, 6, 10, 12, 18, 26, 34, 46, 50, 53], "\uc0dd\uc131\ud574\ub0b4\uc9c0": [0, 11], "\ubabb\ud558\uace0": [0, 14, 26, 28], "\uc788\ub2e4\ub294": [0, 4, 23, 24, 26, 30, 35, 41, 43, 46, 52, 53, 54, 55], "\uac83\uc785\ub2c8\ub2e4": [0, 10, 12, 24, 41, 42, 47, 53, 58], "\uc774": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 54, 56, 57, 58, 59, 60, 61], "\ud604\uc0c1\uc744": [0, 11, 18, 41, 56, 58], "languag": [0, 4, 7, 11, 12, 18, 26, 28, 29, 30, 31, 39, 42, 43, 48, 50, 52], "drift\ub77c\uace0": 0, "\ud558\uba70": [0, 2, 6, 20, 36, 48, 57], "\ubaa8\ub378\uc774": [0, 1, 4, 5, 7, 9, 10, 11, 13, 14, 15, 18, 19, 21, 22, 24, 26, 29, 31, 32, 33, 34, 35, 38, 39, 40, 41, 42, 44, 46, 49, 50, 51, 53, 55, 56, 57, 58, 59, 61], "leaf\uac00": 0, "\uc544\ub2cc": [0, 1, 4, 6, 9, 10, 13, 16, 18, 20, 21, 26, 30, 35, 39, 43, 47, 49, 51, 54, 56, 58], "\uc77c\ubc18\uc801\uc778": [0, 7, 11, 18, 19, 21, 24, 35, 36, 38, 43, 45, 49], "\uad00\ud55c": [0, 25, 27, 28, 32, 39, 40, 41], "\uc78a\uc5b4\ubc84\ub838\uae30": 0, "\ub54c\ubb38\uc785\ub2c8\ub2e4": 0, "\uc0c1\ud6692": 0, "\uc0c1\ud6692\ub97c": 0, "photo\ub77c\ub294": 0, "prompt\ub9cc": [0, 17, 40], "\uc0ac\uc6a9\ud558\uc600\ub294\ub370\ub3c4": 0, "\uc774\ubbf8\uc9c0\ub4e4\uc5d0": [0, 12], "\ud2b9\uc9d5\ub4e4\uc774": 0, "\ub098\ud0c0\ub0a9\ub2c8\ub2e4": 0, "dreambooth\uc5d0\uc11c\ub294": 0, "drift\ub97c": 0, "prior": [0, 8, 9, 12, 18, 21, 26, 28, 31, 50, 60, 61], "preserv": [0, 4, 18, 50, 58], "loss\ub97c": [0, 2, 4, 7, 8, 14, 16, 18, 19, 20, 22, 29, 33, 39, 43], "\uc0ac\uc6a9\ud574\uc11c": [0, 6, 10, 12, 16, 23, 28, 33, 48, 49, 52, 57], "\ud574\uacb0\ud558\uc600\uc73c\ubbc0\ub85c": 0, "\ubc29\ubc95\uc744": [0, 4, 5, 6, 7, 9, 16, 19, 20, 21, 23, 24, 25, 27, 28, 34, 35, 36, 38, 41, 42, 43, 45, 47, 49, 54, 56, 59], "\ud574\uacb0\ud558\uae30": [0, 2, 6, 9, 16, 18, 28, 30, 32, 36, 38, 43, 44, 46, 50, 53, 54, 55, 58, 59, 61], "train": [0, 2, 3, 6, 12, 13, 16, 18, 19, 21, 22, 24, 25, 26, 27, 28, 29, 30, 34, 39, 40, 41, 43, 44, 45, 49, 50, 52, 54, 55, 57, 59], "prompt\uc5d0\uc11c": [0, 26], "\uc81c\uc678\ud558\uace0": [0, 20, 30, 38], "\ucd5c\ub300\ud55c": [0, 21, 28, 36, 38, 43, 47, 59, 60], "\ub2e8\uc21c\ud55c": [0, 4, 11, 18, 21, 26, 27, 28, 35, 36, 39, 58], "model\uc744": [0, 3, 4, 5, 9, 10, 11, 13, 16, 18, 19, 20, 21, 22, 23, 24, 25, 26, 28, 29, 30, 36, 38, 39, 40, 43, 46, 49, 54], "\ub2e4\uc2dc": [0, 4, 6, 8, 14, 16, 27, 30, 34, 37, 41, 46, 47, 50, 51, 54, 58, 59, 60], "\uacb0\uacfc": [0, 1, 3, 4, 5, 6, 7, 11, 12, 15, 18, 19, 21, 24, 25, 28, 29, 31, 32, 35, 36, 38, 39, 42, 44, 45, 46, 49, 52, 53, 55, 58, 59, 61], "\uc7ac\ud6c8\ub828": 0, "\uc774\ud6c4\uc5d0\ub3c4": 0, "model\ub85c": [0, 3, 23, 26, 28], "\uc0dd\uc131\ud558\uc600\uc744": 0, "\ub54c\uc640": [0, 47], "\ube44\uc2b7\ud55c": [0, 6, 7, 8, 11, 15, 16, 19, 25, 31, 43, 47, 49, 50, 58], "\uc758": [0, 1, 2, 3, 4, 6, 9, 10, 12, 13, 14, 15, 16, 17, 20, 21, 22, 23, 24, 25, 26, 27, 28, 30, 32, 33, 34, 35, 36, 38, 39, 41, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61], "\uacbd\uc6b0\uc5d0\ub294": [0, 16, 21, 28, 31, 36, 42, 61], "\uc5ec\uc804\ud788": [0, 4, 6, 9, 11, 20, 21, 22, 24, 28, 32, 34, 35, 52, 57], "\uc601\ud5a5\uc744": [0, 3, 4, 5, 7, 9, 14, 15, 16, 25, 39, 40, 41, 42, 49, 52, 57], "\ubc1b\uc740": [0, 2, 27, 43], "\uac83\uac19\uc740": [0, 19], "\uc774\ubbf8\uc9c0\ub4e4\uc774": [0, 6, 10], "photo\uc758": 0, "\uc5ec\ub7ec": [0, 3, 4, 6, 8, 9, 18, 19, 20, 21, 24, 26, 29, 31, 33, 34, 35, 42, 43, 45, 46, 50, 56, 59], "\ub300\uc0c1\ub4e4\uacfc": 0, "\uc0ac\uc6a9\ub418\ub294": [0, 20, 21, 24, 26, 34, 36, 42, 43, 46, 47, 50, 54, 56], "\ud2b9\uc131\uc744": [0, 4, 8, 9, 15, 16, 18, 21, 24, 26, 31, 36, 39, 46, 47], "\uac00\uc9c0\uace0\uc788\uc5b4\uc11c": 0, "\uadf8\ub7f0": [0, 2, 35, 37, 41], "\uac83\uc774\ub77c\ub294": [0, 42], "\uc0dd\uac01\uc774": [0, 26, 42], "\ub4e4\uc5c8\uace0": 0, "\uc774\ub97c": [0, 2, 3, 4, 5, 6, 8, 9, 10, 14, 16, 18, 19, 20, 21, 24, 26, 27, 28, 29, 30, 31, 32, 35, 36, 38, 39, 41, 42, 43, 44, 45, 46, 47, 50, 51, 53, 54, 55, 56, 58, 59, 60, 61], "\uccb4\ud06c\ud574\ubcf4\uae30": 0, "\ud2b9\uc815\ud55c": [0, 12, 34, 41, 43, 45, 54], "photo\uc640": 0, "\uc6a9\ub3c4\ub85c": 0, "prompt\ub4e4\ub85c": 0, "\uc0dd\uc131\ubcf4\uc558\uc2b5\ub2c8\ub2e4": 0, "\ub300\uc0c1": [0, 16, 18, 21, 47], "\uc138\uac00\uc9c0\ub85c\ub294": 0, "cat": [0, 14, 28, 46, 53, 59], "sea": 0, "pirate\uc744": 0, "\uc0ac\uc6a9\ud588\uace0": [0, 7, 23, 28, 38, 54], "\ube44\uc2b7\ud558\uac8c": [0, 6, 15, 43], "\ud14d\uc2a4\ud2b8": [0, 4, 7, 8, 9, 12, 19, 21, 24, 31, 33, 35, 39, 42, 43, 52], "\uc138\uac00\uc9c0\ub294": 0, "illustr": [0, 16], "anim": [0, 32, 36, 48], "wallpaper\ub97c": 0, "\uc774\ubbf8\uc9c0\ub294": [0, 5, 9, 11, 17, 18, 19, 24, 38, 40, 52, 56], "\uae00": 0, "\ub9c8\uc9c0\ub9c9": [0, 3, 5, 6, 14, 15, 17, 19, 23, 35, 41, 42, 46], "\ubd80\ubd84\uc758": [0, 3], "appendix\uc5d0": 0, "\uc788\uc2b5\ub2c8\ub2e4": [0, 1, 4, 9, 10, 12, 24, 39, 41, 42, 46, 47, 50, 51, 53, 55, 56, 58, 59, 60, 61], "\ub300\uc0c1\uc744": [0, 47], "\uc9c0\uce6d\ud558\ub294": 0, "\ud14d\uc2a4\ud2b8\uc758": [0, 31], "\ub300\uc0c1\uc758": [0, 18, 50], "\ud2b9\uc9d5\uc774": [0, 39], "\uc798": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16, 18, 21, 23, 24, 25, 26, 27, 28, 29, 31, 32, 33, 34, 35, 37, 38, 40, 41, 42, 43, 44, 47, 50, 51, 54, 55, 56, 57, 60, 61], "\ub4dc\ub7ec\ub098\ub294": 0, "\uc0dd\uc131\ub418\uc5c8\uc9c0\ub9cc": 0, "\ub300\uc0c1\uacfc": [0, 18, 24, 47], "\ud568\uaed8": [0, 9, 18, 21, 22, 23, 24, 28, 30, 32, 33, 34, 35, 38, 44, 45, 47, 55, 60], "\uc4f0\uc774\ub294": [0, 3, 42, 47, 51], "\uc78e\uc0ac\uadc0\uc758": 0, "\ud2b9\uc9d5\uc744": [0, 10, 21, 35, 50], "\uac00\uc9c0\ub294": [0, 1, 2, 5, 16, 19, 41, 54], "\uc77c\ubd80": [0, 2, 7, 9, 16, 18, 21, 23, 24, 26, 30, 31, 35, 41], "\uc0dd\uc131\ub418\uc5c8\uc2b5\ub2c8\ub2e4": 0, "tuning\ud55c": [0, 36], "400\uc7a5": 0, "\uc0dd\uc131\ud558\uc5ec": [0, 8, 9, 28, 31, 35, 39, 61], "\ud6c8\ub828\ud588\uc2b5\ub2c8\ub2e4": 0, "result_bas": 0, "\ucd94\uac00": [0, 4, 5, 8, 9, 11, 14, 17, 18, 19, 21, 22, 24, 26, 27, 31, 34, 35, 37, 38, 39, 45, 59], "\ud65c\uc6a9\ud55c": [0, 4, 5, 8, 9, 12, 19, 23, 26, 28, 39, 44, 45, 50, 51, 55, 56], "9": [0, 3, 6, 7, 25, 34, 36, 37, 38, 39, 47, 54], "84": 0, "result_now": 0, "kaggle\uc5d0\uc11c": 0, "\uc81c\uacf5\ud558\ub294": [0, 5, 12, 21, 35, 43], "test": [0, 2, 3, 5, 6, 8, 21, 31, 34, 43, 45, 52], "set\uc5d0": [0, 31, 42], "\uc801\uc6a9\ud588\uc744": [0, 27, 46, 54], "\ub54c\ub294": [0, 4, 32, 39, 42, 54], "baseline\uc774": [0, 43], "94": 0, "\uacbd\uc6b0\uac00": [0, 8, 10, 13, 16, 18, 21, 28, 32, 35, 39, 47, 56, 57], "93": 0, "\uc5ec\uc11c": 0, "baseline\ubcf4\ub2e4": 0, "\uc5bb\uc9c0\ub294": 0, "\ubabb": 0, "\ud6c8\ub828": [0, 4, 5, 9, 10, 17, 18, 21, 31, 38, 39, 42, 47, 52], "\uc911\uac04\uc911\uac04\uc5d0": 0, "\uc77c\uc815": [0, 2, 22, 29, 35], "step\ub9c8\ub2e4": [0, 28], "\uc0dd\uc131\ud558\uac8c\ud574\uc11c": 0, "\ud6c8\ub828\uc5d0": [0, 9, 41], "\ubaa8\ub2c8\ud130\ub9c1\uc774": 0, "\uc788\uc73c\uba74": [0, 8], "\uc88b\uaca0\ub2e4\ub294": 0, "\uc0dd\uac01\uc744": 0, "\ud6c8\ub828\uc2dc": [0, 18], "hyperparamet": [0, 13, 17, 24, 29, 32, 40, 46, 49, 58, 59], "tuning\uc744": [0, 4, 10, 24, 26, 30, 42, 43, 54], "\uc880": [0, 10, 12, 28, 32, 40, 52], "\ucca0\uc800\ud558\uac8c": 0, "\ud574\uc57c\uaca0\ub2e4\ub294": 0, "\uc2e4\uc81c\ub85c": [0, 3, 4, 6, 7, 22, 25, 28, 31, 32, 35, 38, 39, 41, 42, 44, 47, 51, 60], "\uc870\uac74\uc744": [0, 9, 21, 24, 26, 35, 39, 43, 46], "\ub9cc\uc871\ud558\ub294\uc9c0": 0, "\uac80\uc218\ud560": 0, "\ubc29\uc548\uc774": 0, "\ud544\uc694\ud569\ub2c8\ub2e4": [0, 46], "\ub0b4\uc5d0\uc11c\ub3c4": 0, "\uce74\ud14c\uace0\ub9ac\ub97c": [0, 9, 35], "\ub098\ub20c": [0, 26, 27, 29], "\uc788\ub2e4\uba74": [0, 12, 14, 31, 45, 47, 57], "\ub098\ub220\uc11c": [0, 16, 52], "\uac01\uac01\uc5d0": [0, 12, 18, 41, 42], "tuning\ud560": [0, 11, 27, 30], "\uc218\ub3c4": [0, 4, 9, 12, 16, 26, 29, 32, 39, 41, 45, 46, 47, 54, 59], "\ud65c\uc6a9\ud574\ubcfc": 0, "submiss": 0, "score\uc5d0\uc11c": [0, 42], "baseline\uc744": [0, 39], "\uc774\uae30\uc9c0": 0, "\ud588\uc9c0\ub9cc": [0, 27], "text": [0, 1, 3, 5, 6, 8, 9, 10, 12, 14, 15, 16, 17, 18, 21, 24, 28, 29, 32, 34, 35, 38, 39, 40, 41, 42, 45, 48, 52, 53, 55, 57, 58, 59], "\uc774\uc6a9\ud55c": [0, 20, 31, 36, 39, 40, 42], "data\uc758": [0, 25, 40], "\uac00\ub2a5\uc131\uc744": [0, 13, 18, 26, 39], "\ubcfc": [0, 1, 6, 9, 12, 14, 16, 18, 19, 21, 24, 27, 28, 29, 36, 38, 39, 41, 42, 43, 47, 48, 49, 51, 53, 57, 58], "\uc788\uc5c8\ub2e4\uace0": [0, 28, 30, 39, 46, 47, 53, 55, 56, 59], "\uc0dd\uac01\ud569\ub2c8\ub2e4": [0, 41, 58], "\uc55e\uc5d0\uc11c": 0, "\uc5b8\uae09\ud55c": [0, 4, 10, 26, 36, 38, 53], "prompt\uc5d0": [0, 11, 19, 23, 26, 28, 36], "\uc608\uc2dc\uc785\ub2c8\ub2e4": [0, 1], "nsfw\ub85c": 0, "\ud310\ub2e8\ub418\uc5b4": 0, "\uac80\uc740\uc0c9\uc73c\ub85c": 0, "\ub098\uc654\uc2b5\ub2c8\ub2e4": [0, 41], "pirat": 0, "wallpap": 0, "sangwoo": [1, 46, 50, 51, 53, 55, 56, 58, 59, 60, 61, 62], "jo": [1, 46, 50, 51, 53, 55, 56, 58, 59, 60, 61, 62], "09": [1, 45, 51], "\uc774\ubc88": [1, 39, 53, 59], "\ud3ec\uc2a4\ud305\uc5d0\uc11c\ub294": [1, 12], "\uc9c1\uc811": [1, 4, 6, 9, 15, 16, 18, 20, 21, 25, 26, 27, 28, 31, 32, 34, 35, 36, 37, 39, 45, 51, 56, 57, 60], "\ud559\uc2b5\ud574\ubcf4\uace0": 1, "\uc2e4\ud5d8\ud55c": [1, 24, 36, 46], "\uacb0\uacfc\ub4e4\uc744": [1, 12, 18, 50, 59], "\uacf5\uc720\ud560\ub824\uace0": 1, "\ud569\ub2c8\ub2e4": [1, 9, 10, 12, 20, 24, 39, 42, 46, 47, 50, 51, 53, 55, 56, 58, 59, 60, 61], "\uc6b0\uc120\uc801\uc73c\ub85c": [1, 46, 47, 48, 59, 60, 61], "\ud559\uc2b5\ub370\uc774\ud130\ub294": 1, "bryandle": 1, "data": [1, 5, 19, 20, 26, 29, 30, 31, 32, 34, 36, 39, 41, 45, 46, 47, 51, 54, 56, 57, 58, 62], "\uacf5\uac1c\ub41c": [1, 4, 18, 30, 53], "yolov5": 1, "\ubaa8\ub378": [1, 4, 5, 6, 7, 9, 11, 12, 13, 15, 18, 19, 21, 22, 23, 24, 25, 26, 27, 29, 30, 31, 33, 35, 37, 38, 39, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 56, 58, 59, 60, 61], "\ubc0f": [1, 7, 8, 9, 12, 16, 20, 21, 24, 27, 28, 30, 31, 34, 35, 36, 38, 42, 43, 47, 49, 51, 52, 53, 54, 58, 59, 61], "waifu2x": 1, "\ud6c4\ucc98\ub9ac": [1, 35, 38], "\uae30\ubc95\uc744": [1, 3, 9, 11, 20, 23, 25, 28, 34, 46, 53, 55, 58, 61], "\ud65c\uc6a9\ud558\uc5ec": [1, 4, 8, 9, 18, 19, 20, 21, 23, 24, 28, 31, 35, 38, 39, 46, 48, 49, 50, 51, 55, 56, 60, 61], "\ud504\ub9ac\ub4dc\ub85c\uc6b0\uc5d0": 1, "\ub4f1\uc7a5\ud558\ub294": 1, "\uc778\ubb3c": [1, 5, 17, 47], "\uc0ac\uc9c4\ub4e4\uc744": [1, 34, 50], "\uc218\uc9d1\ud588\uc2b5\ub2c8\ub2e4": 1, "\ub17c\ubb38\uc5d0\uc11c\ub294": [1, 2, 3, 4, 6, 9, 10, 12, 14, 16, 19, 21, 23, 24, 26, 28, 29, 30, 34, 35, 36, 39, 41, 42, 43, 45, 46, 47, 48, 49, 50, 53, 54, 55, 56, 58, 59, 60], "3": [1, 6, 12, 22, 26, 28, 32, 43, 45, 47, 50, 51, 52, 60], "5": [1, 2, 6, 14, 19, 23, 24, 26, 27, 28, 32, 37, 45, 51, 54, 59, 60], "\uc7a5\uc73c\ub85c": 1, "fine": [1, 4, 5, 10, 12, 16, 17, 20, 24, 26, 28, 30, 32, 33, 34, 36, 39, 40, 41, 43, 48, 52, 56, 61, 62], "tune": [1, 4, 12, 17, 20, 24, 26, 30, 36, 39, 46, 48, 52, 55, 56, 61, 62], "\uac00\ub2a5\ud558\ub2e4\uace0": [1, 41], "\uc81c\uc2dc\ub418\uc5b4\uc788\uc9c0\ub9cc": 1, "\uc0ac\uc9c4": [1, 6, 11, 26, 43, 47, 52], "\ub9ce\uc740": [1, 4, 5, 9, 10, 12, 15, 16, 18, 19, 21, 23, 26, 29, 31, 32, 33, 34, 35, 38, 39, 42, 43, 44, 46, 47, 48, 52, 56, 57, 58], "\ud559\uc2b5\ud558\uba74": [1, 4, 14, 21, 50], "\uc131\ub2a5\uc774": [1, 6, 14, 15, 16, 22, 25, 28, 30, 31, 34, 35, 36, 38, 39, 42, 45, 46, 47, 49, 53, 54, 55, 56, 58, 59], "\uc88b\uc544\uc838\uc11c": 1, "15": [1, 3, 5, 7, 15, 19, 24, 34, 39, 54, 55], "20": [1, 3, 4, 5, 7, 9, 15, 23, 25, 29, 39, 50, 54], "\uc7a5\uc758": [1, 12, 18, 21, 45], "\ud559\uc2b5\ud558\uc600\uc2b5\ub2c8\ub2e4": [1, 61], "\ud559\uc2b5\ud55c": [1, 4, 11, 12, 23, 25, 27, 30, 40, 42, 45, 46, 50, 52, 53, 55, 58, 61], "\uc774\ubbf8\uc9c0\ub4e4": [1, 38], "\uc2e4\ud5d8\ud558\uba74\uc11c": 1, "\ub300\ud45c\uc801\uc73c\ub85c": [1, 46, 50, 51, 56, 59, 60], "\uadf8\ub9ac\uace0": [1, 6, 16, 20, 22, 24, 27, 28, 32, 35, 39, 42, 43, 46, 47, 50, 51, 53, 55, 56, 58, 59, 60, 61], "\ub9c8\uc9c0\ub9c9\uc73c\ub85c": [1, 4, 21, 24, 26, 41, 46, 50, 53, 55, 56, 58, 59], "\ubc18\uc601\ud558\ub294": [1, 4, 26], "\uc815\ub3c4\ub97c": [1, 13, 25], "\uc870\uc808\ud558\ub294": [1, 10, 13, 21, 24, 36, 42], "prior_loss_weight": [1, 50], "\ubc14\uafd4\uac00\uba74\uc11c": 1, "\ud559\uc2b5\ud574\ubcf4\uc558\uc2b5\ub2c8\ub2e4": 1, "\uc0ac\uc804\ud559\uc2b5\ub41c": [1, 4, 5, 20, 26, 29, 36, 42, 46, 50, 56, 58], "\ubaa8\ub378\ub85c": [1, 4, 5, 11, 18, 21, 22, 24, 26, 35, 36, 41, 42, 48, 51, 53, 55, 58, 59, 61], "\ucc98\uc74c\uc5d0\ub294": [1, 7, 21, 30, 42], "hakurei": 1, "waifu": 1, "diffus": [1, 3, 6, 9, 10, 12, 13, 17, 20, 22, 24, 30, 31, 32, 33, 34, 38, 51, 53, 56, 61, 62], "\ubaa8\ub378\uc744": [1, 3, 4, 7, 9, 11, 12, 13, 14, 15, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 28, 29, 31, 33, 34, 35, 36, 37, 38, 39, 42, 43, 44, 45, 46, 47, 50, 51, 52, 53, 55, 56, 57, 58, 59, 60, 61], "\uc2dc\ub3c4\ud574\ubd24\uc9c0\ub9cc": 1, "\uacb0\uacfc\uac00": [1, 6, 14, 15, 23, 26, 27, 28, 31, 33, 39, 42, 44, 47, 49, 51], "\ub9cc\uc871\uc2a4\ub7fd\uc9c0": [1, 18], "\ubabb\ud574": 1, "runwayml": 1, "stabl": [1, 9, 10, 15, 18, 20, 22, 24, 25, 26, 28, 30, 38, 39, 42, 45, 50, 53, 54, 55, 56, 58, 61], "v1": [1, 17, 24, 26, 27], "\uc791\uc5c5\uc744": [1, 34, 43, 46, 55, 59, 61], "\uc9c4\ud589\ud588\uc2b5\ub2c8\ub2e4": [1, 9, 24, 51, 53, 58, 59], "\uc81c\uc678\ud55c": [1, 3, 4, 5, 46], "\ub3d9\uc77c\ud55c": [1, 2, 5, 7, 9, 18, 21, 24, 25, 26, 31, 33, 34, 35, 36, 38, 39, 42, 45, 46, 47, 50, 53, 55, 56, 58, 59, 61], "configur": [1, 49, 51], "\uc73c\ub85c": [1, 2, 4, 6, 12, 15, 16, 18, 20, 21, 24, 30, 31, 32, 33, 34, 35, 36, 39, 43, 44, 45, 46, 47, 48, 50, 52, 53, 54, 55, 56, 57, 58, 59], "\uacb0\uacfc\uc785\ub2c8\ub2e4": [1, 42, 46, 55, 58, 59, 61], "model_nam": 1, "instance_prompt": 1, "A": [1, 4, 6, 7, 9, 10, 11, 12, 17, 19, 20, 21, 24, 26, 28, 30, 39, 41, 43, 44, 45, 47, 50, 52, 59, 60], "sk": [1, 40, 43], "girl": [1, 4], "class_prompt": 1, "python3": 1, "train_dreambooth": [1, 50], "py": [1, 45, 50], "pretrained_model_name_or_path": [1, 50], "pretrained_vae_name_or_path": 1, "stabilityai": [1, 56], "sd": [1, 4, 5, 17, 18, 19, 20, 26, 27, 28, 32, 38, 45, 54, 55, 56, 59], "vae": [1, 3, 5, 6, 11, 15, 25, 29, 35, 46, 50, 51, 56, 57, 58], "ft": [1, 21, 27, 39], "mse": [1, 4, 14, 20, 57], "output_dir": 1, "revis": [1, 50], "fp16": 1, "with_prior_preserv": [1, 50], "1": [1, 6, 9, 10, 12, 22, 24, 26, 28, 38, 41, 43, 45, 47, 50, 51, 52, 57, 60], "0": [1, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16, 17, 19, 20, 21, 22, 26, 27, 29, 31, 32, 33, 35, 36, 37, 38, 41, 42, 44, 45, 46, 47, 48, 50, 51, 54, 56, 57, 58, 59, 60], "seed": [1, 58], "1337": 1, "resolut": [1, 6, 17, 20, 21, 22, 23, 25, 26, 27, 31, 32, 36, 38, 42, 45, 46, 47, 49, 53, 54, 58], "512": [1, 20, 22, 33, 34, 36, 38, 44, 51, 54, 56], "train_batch_s": 1, "train_text_encod": [1, 50], "mixed_precis": 1, "use_8bit_adam": 1, "gradient_accumulation_step": [1, 50], "gradient_checkpoint": 1, "learning_r": 1, "1e": [1, 5, 17, 21, 32, 40, 56], "6": [1, 2, 3, 5, 6, 7, 11, 19, 20, 26, 33, 37, 38, 40, 45, 47, 54, 61], "lr_schedul": [1, 50], "constant": [1, 22, 25, 34, 49, 55, 61], "lr_warmup_step": 1, "num_class_imag": 1, "200": [1, 6, 29, 34, 38, 52], "sample_batch_s": 1, "4": [1, 2, 6, 12, 22, 26, 32, 41, 45, 51], "max_train_step": 1, "800": [1, 34], "save_interv": 1, "100": [1, 5, 19, 25, 29, 34, 35, 42, 45, 47, 57], "save_sample_prompt": 1, "concepts_list": 1, "json": 1, "w": [1, 3, 4, 5, 6, 8, 10, 11, 14, 19, 20, 22, 26, 27, 29, 30, 31, 34, 41, 44, 48, 52, 56, 58], "o": [1, 3, 22, 31, 34, 36, 40, 53], "\uc544\ub798": [1, 4, 6, 9, 10, 12, 16, 19, 21, 25, 26, 29, 31, 35, 41, 42, 46, 47, 48, 50, 51, 52, 55, 56, 58, 59, 60, 61], "\uadf8\ub9bc\ucc98\ub7fc": [1, 12, 27, 28, 30, 51, 52, 60], "infer": [1, 5, 6, 8, 14, 20, 22, 26, 27, 29, 31, 32, 38, 44, 45, 54, 56, 59, 60], "\uc785\ub825\ud588\uc744": 1, "\ub54c": [1, 2, 4, 5, 6, 7, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 21, 22, 24, 25, 27, 28, 29, 31, 33, 34, 35, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 49, 50, 51, 54, 56, 58, 60, 61], "\uc81c\uc678\ud568\uc73c\ub85c\uc368": 1, "input": [1, 5, 6, 7, 8, 10, 11, 12, 15, 17, 18, 22, 26, 27, 28, 29, 30, 31, 32, 33, 34, 39, 41, 43, 45, 47, 48, 50, 51, 53, 58, 59, 61], "\uac00\uae4c\uc6b4": [1, 2, 7, 9, 20, 22, 26, 35, 39, 43, 48], "\uc6f9\ud230": 1, "\uc788\uc5c8\uc2b5\ub2c8\ub2e4": [1, 9, 10, 12, 24, 42, 47, 53, 55, 56], "\ub610\ud55c": [1, 2, 4, 5, 6, 7, 9, 10, 11, 14, 16, 17, 19, 20, 21, 23, 24, 26, 27, 28, 29, 30, 31, 32, 35, 36, 38, 39, 42, 44, 46, 47, 50, 51, 53, 54, 55, 56, 58, 59, 60, 61], "\ud551\ud06c\uc0c9": 1, "\uba38\ub9ac\ub97c": 1, "\ud55c": [1, 3, 5, 6, 8, 12, 14, 16, 18, 20, 21, 23, 24, 25, 26, 28, 30, 31, 32, 33, 34, 36, 38, 39, 41, 42, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61], "\uc774\ubbfc\uc9c0": 1, "\uce90\ub9ad\ud130\ub97c": 1, "\uc5b4\ub290": [1, 41, 42, 43], "\uc815\ub3c4": [1, 7, 13, 20, 25, 30, 41, 42, 56], "\uc0dd\uc131\ud558\ub294": [1, 4, 7, 8, 9, 10, 12, 14, 18, 21, 22, 23, 24, 26, 27, 28, 29, 31, 33, 35, 36, 37, 39, 41, 42, 44, 47, 50, 51, 52, 53, 55, 56, 57, 58, 60, 61], "\ubd80\ubd84\ub3c4": [1, 53], "\ud655\uc778\ud560": [1, 4, 6, 16, 25, 28, 37, 38, 39, 42, 46, 47, 50, 53, 54, 55, 56, 58, 59, 60, 61], "pink": 1, "hair": [1, 4, 32, 40, 41], "With": 1, "without": [1, 4, 30, 31, 40, 41, 58], "\ub3c4": [1, 6, 7, 12, 14, 22, 24, 26, 32, 33, 36, 40, 44, 45, 46, 47, 50, 55, 56, 58, 59, 60], "\uce90\ub9ad\ud130\uc758": [1, 4, 5, 50], "\ubd80\uc790\uc5f0\uc2a4\ub7ec\uc6b4": 1, "\ubd80\ubd84\uc774\ub098": [1, 35], "\uc800\ud574\uc0c1\ub3c4": [1, 5, 29, 31, 33, 35, 44, 61], "\uacbd\uc6b0\ub4e4\uc774": 1, "\uc885\uc885": [1, 21, 35, 47], "\ubc1c\uc0dd\ud588\ub294\ub370": 1, "\ud1b5\ud574": [1, 2, 3, 4, 5, 6, 7, 8, 9, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 23, 24, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 41, 42, 43, 44, 46, 47, 48, 49, 50, 51, 54, 55, 57, 58, 59, 60, 61], "\ud004\ub9ac\ud2f0\uc758": [1, 25, 27, 37, 40, 42], "ugli": 1, "disfigur": 1, "deform": [1, 32, 55], "low": [1, 5, 12, 22, 24, 25, 29, 33, 36, 37, 44, 48, 55, 59, 61], "\ub17c\ubb38\uc5d0\uc11c": [1, 4, 5, 6, 10, 20, 21, 23, 26, 28, 29, 32, 35, 39, 41, 42, 45, 46, 48, 50, 51, 55, 56, 58, 60, 61], "\uc81c\uc2dc\ud55c": [1, 8, 11, 12, 16, 23, 28, 37, 45, 48, 51, 52], "\uc678\uc5d0": [1, 19, 58], "style": [1, 6, 12, 22, 24, 26, 27, 40, 43, 47, 50, 57, 58, 62], "\ub77c\ub294": [1, 10, 12, 20, 24, 42, 43, 47, 49, 52, 54, 58], "\ub85c": [1, 2, 4, 6, 7, 8, 9, 10, 12, 14, 15, 16, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 41, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62], "\ud559\uc2b5\uc744": [1, 4, 11, 15, 16, 20, 21, 23, 29, 31, 35, 38, 41, 42, 44, 47, 49, 53, 54, 57, 61], "\uc2dc\ub3c4\ud574\ubcf4\uae30\ub3c4": 1, "\ud2b9\uc815": [1, 3, 4, 9, 11, 12, 13, 14, 18, 19, 20, 21, 23, 24, 25, 26, 27, 30, 34, 35, 38, 39, 40, 42, 43, 47, 52, 54, 59], "\uc5ec\uc790": 1, "\uce90\ub9ad\ud130\uc5d0": [1, 5], "\uc815\ubcf4\ubfd0\ub9cc": 1, "\uc544\ub2c8\ub77c": [1, 2, 6, 9, 11, 12, 14, 20, 21, 23, 26, 28, 30, 33, 35, 36, 38, 41, 44, 47, 50, 54, 56], "\ud504\ub9ac\ub4dc\ub85c\uc6b0": 1, "\uadf8\ub9bc\uccb4": [1, 4], "\uc790\uccb4\ub97c": [1, 6, 12, 25, 26, 34], "\ub2f4\uc544\ub0b4\uae30": [1, 56], "\uc704\ud55c": [1, 2, 3, 4, 5, 6, 7, 9, 12, 16, 17, 18, 19, 20, 21, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 35, 36, 38, 39, 42, 43, 47, 49, 54, 55, 57, 60], "\ubaa9\uc801\uc774\uc600\uc2b5\ub2c8\ub2e4": 1, "differ": [1, 5, 12, 28, 29, 30, 41, 54, 56, 58], "\uc2dc": [1, 8, 22, 27, 35, 39, 46, 47, 48, 49, 50, 51, 52, 53, 54, 58, 59, 60, 61], "\ud504\ub9ac\ub4dc\ub85c\uc6b0\uc758": 1, "\uadf8\ub9bc\uccb4\uac00": [1, 4, 12], "\ubc18\uc601\ub41c": [1, 12], "\ub0a8\uc790\uac00": 1, "\uc0dd\uc131\ub418\ub3c4\ub85d": [1, 28], "boi": 1, "\uc785\ub825\ud588\uc744\ub54c\uc758": 1, "\ud639\uc740": [1, 2, 3, 6, 11, 12, 20, 21, 22, 26, 28, 34, 38, 50, 54, 56, 60, 61], "\uc791\uac00\ub2d8\uc758": 1, "\uc7a5\uba74\ub4e4\ub85c": 1, "\uc804\uccb4\uc801\uc73c\ub85c": [1, 16], "\ud559\uc2b5\ud558\uac8c": [1, 5, 46, 53, 59, 61], "\ub41c\ub2e4\uba74": [1, 38], "\ub2e4\uc591\ud55c": [1, 2, 3, 4, 7, 8, 9, 11, 12, 14, 16, 18, 19, 21, 22, 24, 25, 26, 27, 28, 29, 31, 32, 33, 34, 35, 36, 38, 39, 40, 42, 43, 46, 47, 48, 49, 50, 52, 54, 56, 58, 59], "\uac83": [1, 5, 6, 12, 14, 15, 21, 27, 28, 29, 31, 32, 33, 34, 35, 39, 42, 43, 45, 47, 52, 57], "num_inference_step": [1, 59], "24": [1, 5, 28, 32, 33, 42, 55, 56], "step": [1, 4, 5, 6, 11, 12, 13, 14, 16, 17, 18, 20, 21, 23, 25, 26, 28, 29, 33, 34, 40, 42, 45, 49, 50, 51, 55, 58, 59, 61], "\uc744": [1, 2, 3, 4, 6, 7, 8, 9, 11, 12, 13, 14, 15, 16, 17, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 32, 33, 34, 35, 36, 38, 39, 41, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 54, 55, 56, 57, 58, 59, 60, 61], "\ub298\ub824\uac00\uba74\uc11c": 1, "\ucd94\ub860\ub41c": 1, "\ud004\ub9ac\ud2f0\uac00": [1, 3, 4, 7, 38, 39, 59], "\uc0c1\uc2b9\ud558\ub294": 1, "\uc2e4\ud5d8\ub3c4": 1, "\uc9c4\ud589\ud588\ub294\ub370": 1, "\uc791\uc744\uc218\ub85d": [1, 15, 42, 46, 54], "\uc640": [1, 4, 6, 7, 8, 9, 11, 12, 13, 14, 15, 16, 18, 20, 21, 23, 24, 25, 26, 29, 30, 31, 32, 33, 34, 35, 36, 39, 40, 41, 43, 45, 46, 47, 48, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61], "\ubb34\uad00\ud55c": [1, 53], "random": [1, 3, 5, 6, 11, 13, 14, 16, 17, 19, 20, 22, 24, 27, 30, 32, 33, 34, 36, 37, 38, 43, 46, 50, 51, 53, 58, 59, 61], "\uc0dd\uc131\ud558\uac8c": [1, 28, 42, 50, 52, 59, 60, 61], "\ub429\ub2c8\ub2e4": [1, 9, 10, 12, 24, 33, 41, 42, 46, 47, 50, 51, 53, 55, 56, 58, 59, 60, 61], "\ucd5c\uc885\uc801\uc73c\ub85c": [1, 28, 35, 36, 41, 42, 46, 58, 59], "num_infer": 1, "\uac12\uc740": [1, 6, 15, 25, 27, 42, 46, 49, 50], "\uac01\uac01": [1, 3, 4, 5, 6, 7, 10, 11, 12, 16, 20, 23, 24, 26, 28, 43, 45, 47, 50, 51, 54, 55, 56, 59, 60, 61], "\uacfc": [1, 3, 4, 5, 6, 7, 9, 12, 13, 14, 16, 18, 19, 20, 21, 22, 25, 26, 27, 28, 29, 31, 32, 33, 35, 36, 38, 40, 43, 44, 45, 46, 47, 48, 50, 52, 54, 55, 56, 57, 58, 59, 61], "\uc124\uc815\ud558\uc600\uc2b5\ub2c8\ub2e4": [1, 46, 58, 61], "increas": [1, 12, 20], "number": [1, 20, 32, 46, 49, 59], "guidance_scal": [1, 59], "\uc81c\uc678\ud574\ubcf8": 1, "\uc0dd\uc131\ub41c": [1, 3, 4, 5, 6, 9, 12, 14, 16, 17, 18, 19, 20, 21, 22, 23, 24, 26, 28, 31, 32, 35, 37, 38, 39, 41, 42, 43, 45, 46, 47, 49, 50, 51, 52, 53, 59, 60, 61], "\ub0a8\uc790\uc758": 1, "\uba38\ub9ac\uce74\ub77d\uc774": 1, "\uae38\uc5b4\uc9c0\uace0": 1, "\uc5ec\uc131\uc2a4\ub7ec\uc6b4": 1, "\uc0dd\uae40\uc0c8\ub97c": [1, 43], "\ub180\ub77c\uc6b4": [1, 3, 12, 35, 37, 42], "\uc0ac\uc2e4\ub3c4": 1, "\uadf8": [1, 4, 6, 7, 9, 12, 14, 20, 21, 22, 24, 26, 28, 29, 32, 33, 35, 37, 38, 41, 42, 43, 44, 46, 47, 57, 58, 59], "\uc678": [1, 22, 37, 46, 47, 58], "\ub530\ub978": [1, 4, 5, 10, 12, 15, 25, 28, 29, 34, 36, 39, 42, 45, 48, 50, 53, 54, 60], "\uc7ac\ubbf8\uc788\ub294": 1, "\uc2e4\ud5d8\uacb0\uacfc\ub4e4\uc744": 1, "\uacf5\uc720\ud569\ub2c8\ub2e4": [1, 46, 50, 58, 59], "\uc544\uc9c1": [1, 3, 12, 36, 43, 49, 54], "\uc190\uc758": [1, 5], "\ubaa8\uc591\uc744": [1, 21, 35], "\uc0dd\uc131\ud558\uc9c0": [1, 26, 44], "\ubabb\ud558\ub294": [1, 8, 24, 28, 44, 51, 56, 57], "\uc7ac\ucc28": [1, 46, 56], "climb": 1, "up": [1, 4, 7, 14, 58], "mountain": 1, "paint": [1, 5, 50, 53], "2": [1, 6, 9, 22, 24, 26, 28, 41, 43, 45, 47, 50, 51, 52, 57, 60], "hand": 1, "draw": [1, 40], "\ud558\ub2e8\uc758": [1, 46], "\uc88c\uce21\uacfc": 1, "\uc6b0\uce21": [1, 55], "\uc0ac\uc9c4\uc740": [1, 21, 29, 46, 58, 61], "\uc774\ub77c\ub294": [1, 6, 28, 32, 47, 49, 52, 54, 61], "\ub098\ube44\ub97c": 1, "\uc0dd\uc131\ud558\ub77c\ub294": 1, "\ucd94\ub860\ud574\ubcf8": 1, "\uc218\uc2dd\ud558\ub294": 1, "\uba85\uc0ac\uac00": 1, "\uc774\ub3c4\ub85d": 1, "\uc218\uc815\ud568\uc73c\ub85c\uc368": [1, 25, 46], "butterfli": 1, "\uc0ac\uc9c4\uc744": [1, 4, 6, 21, 47, 49, 56], "\uc0dd\uc131\ud560\ub54c": 1, "\uc870\uae08\uc774\ub098\ub9c8": 1, "\uc6f9\ud230\uc758": 1, "\uadf8\ub9bc\uccb4\ub97c": [1, 4], "\ubc18\uc601\ud560": [1, 36], "\uc788\uc5c8\ub358": [1, 47], "refer": [2, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60], "paper": [2, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62], "http": [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61], "arxiv": [2, 3, 4, 5, 6, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61], "org": [2, 3, 4, 5, 6, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61], "ab": [2, 4, 5, 6, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 25, 26, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 47, 48, 49, 50, 51, 52, 57, 58, 59, 60, 61], "2308": [2, 26], "04079": 2, "project": [2, 4, 5, 8, 12, 15, 17, 18, 19, 21, 26, 28, 29, 30, 32, 33, 34, 35, 39, 49, 54, 59], "repo": 2, "sam": [2, 5, 32], "inria": 2, "fr": 2, "fungraph": 2, "hyunsoo": 2, "june": [2, 4, 48], "12": [2, 6, 39, 41, 54], "2024": [2, 4, 5, 8, 15, 17, 18, 19, 20, 22, 26, 27, 28, 32, 33, 34, 35, 36, 39, 46, 47, 51, 54, 55, 56, 57, 60, 61, 62], "nerf": [2, 8, 19, 20, 21, 36, 55], "\uae30\ubc18": [2, 5, 6, 7, 15, 17, 18, 19, 21, 22, 24, 28, 34, 35, 37, 38, 39, 42, 43, 48, 56], "\ubc29\uc2dd\ub4e4\uc740": [2, 4, 21, 35], "\ub192\uc740": [2, 3, 4, 5, 6, 8, 9, 12, 14, 18, 19, 21, 23, 24, 25, 27, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 42, 43, 44, 45, 46, 47, 48, 49, 52], "\ud488\uc9c8\uc758": [2, 4, 8, 14, 21, 23, 35, 36, 47, 54], "\uc0c8\ub85c\uc6b4": [2, 4, 6, 7, 9, 11, 15, 16, 17, 18, 19, 21, 24, 26, 27, 31, 32, 34, 35, 36, 37, 38, 39, 41, 42, 43, 44, 47, 49, 50, 51, 52, 53, 54, 58, 60, 61], "\uc7a5\uba74": [2, 34], "\ud569\uc131\uc774": 2, "\uac00\ub2a5\ud558\uc9c0\ub9cc": 2, "\uc2dc\uac04\uc774": [2, 21, 35, 42, 49, 55], "\ub9ce\uc774": [2, 4, 5, 6, 12, 16, 19, 21, 28, 29, 34, 35, 36, 39, 42, 43, 44, 45, 49, 51], "\uac78\ub824": 2, "\uc2e4\uc2dc\uac04": [2, 35, 54], "\ub80c\ub354\ub9c1\uc5d0\ub294": 2, "\ud55c\uacc4\uac00": [2, 4, 10, 27, 40, 43, 44, 47, 52, 59, 61], "\uc788\ub2e4": [2, 3, 4, 5, 6, 7, 13, 16, 19, 20, 21, 22, 23, 25, 26, 27, 28, 29, 30, 31, 35, 36, 37, 38, 39, 40, 43, 44, 45, 49, 52, 54], "\uc2dc\uac04\uc740": 2, "\ube60\ub974\uc9c0\ub9cc": [2, 24], "\ud488\uc9c8\uc774": [2, 4, 8, 26, 29, 31, 35, 36, 41, 47], "\ub5a8\uc5b4\uc9c4\ub2e4": [2, 5, 6, 21, 26, 29, 54], "\ub17c\ubb38\uc740": [2, 6, 24, 28, 32, 39, 41, 42, 47, 50, 56, 57, 61], "splatting\uc744": [2, 20], "\uc81c\uc548\ud569\ub2c8\ub2e4": [2, 9, 10, 12, 24, 46, 47, 56], "\ubc29\ubc95\uc740": [2, 4, 5, 6, 7, 9, 16, 17, 19, 23, 24, 26, 28, 30, 35, 43, 47, 50, 54], "mip": [2, 18, 19, 20, 55], "nerf360\uacfc": 2, "\uc720\uc0ac\ud55c": [2, 5, 6, 11, 14, 15, 16, 18, 24, 26, 29, 35, 36, 39, 43, 48, 50, 54, 55, 57, 58, 60], "\uc81c\uacf5\ud558\uba74\uc11c\ub3c4": 2, "instantngp\ub9cc\ud07c": 2, "\ube60\ub974\uac8c": [2, 4, 10, 24, 25, 49, 52, 54, 60], "\ud559\uc2b5\ud560": [2, 4, 9, 15, 22, 23, 27, 31, 32, 36, 38, 42, 46, 47, 51, 55, 61], "splatting\uc740": 2, "\ub2e4\uc74c": [2, 6, 7, 9, 18, 20, 21, 31, 32, 35, 41, 43, 46, 47, 52, 54, 59], "\uc138": [2, 4, 6, 7, 9, 12, 19, 34, 35, 38, 41, 44, 47, 50, 58], "\uac00\uc9c0\ub85c": 2, "\uad6c\uc131\ub41c\ub2e4\ub2e4": 2, "structur": [2, 4, 10, 16, 19, 26, 29, 30, 41, 46, 56, 59], "from": [2, 3, 7, 8, 12, 14, 19, 20, 28, 29, 33, 41, 51, 58, 61], "motion": [2, 5, 6, 21, 46], "sfm": [2, 20], "\uc5bb\uc740": [2, 4, 28, 39, 47, 57], "\ud76c\uc18c": 2, "\uc810": [2, 22, 35, 36, 45], "\uad6c\ub984\uc744": 2, "\ucd08\uae30\uac12\uc73c\ub85c": 2, "\uac00\uc6b0\uc2dc\uc548\uc744": 2, "\ub3c4\uc785": [2, 5, 18, 19, 22, 25, 33, 34], "\uc704\uce58": [2, 7, 34, 36, 39, 44], "\ubd88\ud22c\uba85\ub3c4": 2, "\uc774\ubc29\uc131": 2, "\uacf5\ubd84\uc0b0": [2, 49], "\uad6c\ud615": 2, "\uace0\uc870\ud30c": 2, "sh": [2, 20], "\uacc4\uc218\uc758": [2, 20], "\ucd5c\uc801\ud654": [2, 4, 9, 18, 19, 20, 24, 34, 35, 36, 38, 39, 43, 55], "\ud0c0\uc77c": 2, "\ub798\uc2a4\ud130\ud654\uc5d0\uc11c": 2, "\uc601\uac10\uc744": [2, 17, 27, 31, 35, 43, 57], "\ube60\ub978": [2, 8, 11, 20, 24, 33, 39, 46, 54], "gpu": [2, 5, 17, 20, 21, 22, 26, 30, 34, 35, 54, 55, 59], "\uc815\ub82c": [2, 8, 21, 36], "\uc54c\uace0\ub9ac\uc998\uc744": [2, 19, 20, 35, 36, 46, 48, 55], "\ud1b5\ud55c": [2, 3, 5, 11, 17, 26, 34, 35, 37, 43, 46, 49, 56, 57, 58, 61], "\ub80c\ub354\ub9c1": [2, 18, 19, 20, 21, 34, 35, 36, 39], "\uc774\uc804\uc758": [2, 15, 27, 28], "implicit": [2, 3, 8, 13, 21, 34, 39, 51, 57, 59], "\uc811\uadfc\ubc95\ub4e4\uacfc": 2, "\ub3d9\ub4f1\ud558\uac70\ub098": 2, "\ub098\uc740": [2, 4, 8, 24, 26, 35, 36, 39, 60], "\ud488\uc9c8\uc744": [2, 4, 12, 19, 20, 21, 26, 29, 35, 38, 42, 43], "\uc81c\uacf5\ud558\uba70": [2, 8], "\uac00\uc7a5": [2, 3, 4, 5, 6, 7, 8, 9, 11, 12, 15, 16, 17, 18, 23, 24, 28, 31, 32, 33, 35, 36, 39, 41, 42, 43, 45, 46, 47, 50, 58, 61], "\ubc29\uc2dd\uacfc": [2, 19, 27, 36, 46, 54], "\ud559\uc2b5": [2, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 18, 19, 21, 22, 23, 25, 26, 27, 29, 30, 31, 32, 33, 34, 35, 37, 38, 39, 40, 42, 44, 45, 46, 47, 48, 49, 50, 51, 52, 54, 56, 57, 58, 59, 60, 61], "\uc18d\ub3c4\uc640": [2, 24], "\ubcf4\uc5ec\uc8fc\uc5b4": 2, "\uace0\uc218\uc900\uc758": 2, "\ubdf0": [2, 18, 19, 34, 35], "\ud569\uc131\uc5d0": [2, 43, 52], "\ub300\ud574": [2, 3, 5, 6, 7, 9, 10, 11, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24, 25, 27, 28, 29, 31, 33, 34, 35, 36, 38, 42, 43, 44, 45, 46, 47, 50, 51, 53, 54, 55, 56, 57, 58, 59, 60, 61], "\ucc98\uc74c\uc73c\ub85c": [2, 26, 36, 61], "\ub80c\ub354\ub9c1\uc744": [2, 18, 35, 36], "\uc81c\uacf5\ud55c\ub2e4": [2, 31, 54], "main": [2, 8, 28, 44, 50], "process": [2, 4, 6, 12, 18, 19, 25, 26, 27, 32, 36, 37, 39, 43, 45, 48, 50, 51, 55, 58, 59], "peusdo": 2, "algorithm": [2, 6, 14, 34, 45, 46], "normal": [2, 3, 7, 13, 14, 15, 18, 19, 22, 34, 36, 39, 41, 46, 47, 51, 52, 55], "\ud45c\uba74": [2, 20], "\ubc95\uc120": 2, "\uc5c6\ub294": [2, 5, 7, 16, 17, 18, 21, 29, 35, 38, 39, 40, 47, 49, 50, 52], "\ud3ec\uc778\ud2b8\ub4e4\uc758": 2, "sparse\ud55c": 2, "\uc14b\uc744": [2, 34, 39, 40, 43, 44], "initi": [2, 7, 8, 15, 18, 19, 20, 27, 30, 36, 46, 55, 56, 57], "point\ub85c": [2, 39], "\ud558\uc5ec": [2, 3, 6, 9, 26, 35, 36, 39, 44, 47, 52, 55, 61], "\uace0\ud488\uc9c8\uc758": [2, 4, 18, 20, 26, 35, 36, 54], "novel": [2, 20, 28, 32, 34, 39, 50, 61], "view": [2, 4, 8, 19, 20, 36, 39, 41, 45, 50, 51, 60], "synthesis\ub97c": [2, 23], "\uac00\ub2a5\ud558\uac8c": [2, 8, 10, 16, 19, 26, 30, 33, 34, 39, 41, 44, 54], "\ud558\ub294": [2, 3, 4, 6, 7, 9, 10, 14, 15, 16, 18, 19, 21, 23, 24, 25, 26, 27, 28, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 46, 47, 49, 50, 51, 52, 54, 55, 56, 57, 58, 59, 60, 61], "scene": [2, 7, 12, 19, 20, 28, 36, 39, 44, 53, 56], "representation\uc744": [2, 19, 20, 23, 36, 39], "\ucd5c\uc801\ud654\ud558\ub294": [2, 18, 20, 30, 34, 35, 36, 51, 55], "\uac83\uc744": [2, 3, 4, 6, 7, 9, 10, 12, 14, 15, 16, 18, 19, 21, 24, 25, 26, 28, 29, 31, 32, 33, 35, 37, 38, 39, 41, 42, 43, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 61], "\ubaa9\ud45c\ub85c": [2, 4, 9, 21, 24, 35, 39, 43, 47, 50], "\ud55c\ub2e4": [2, 4, 6, 7, 16, 20, 21, 23, 26, 27, 28, 29, 31, 32, 35, 36, 39, 43, 44, 45, 49, 52, 54, 57], "unstructured\ud558\uace0": 2, "explicit\ud55c": 2, "primitive\ub97c": 2, "\ud544\uc694\ub85c": [2, 16, 27, 35, 39, 43], "\ubbf8\ubd84": [2, 27, 34, 35, 46, 54, 57], "\uac00\ub2a5\ud558\uace0": [2, 3, 11, 19, 36, 55], "2d": [2, 4, 5, 8, 14, 18, 19, 20, 31, 34, 35, 36], "splats\ub85c": 2, "\uc27d\uac8c": [2, 4, 6, 8, 15, 19, 27, 34, 35, 38, 41, 43, 57], "project\ub418\ub294": 2, "gaussian\uc744": [2, 20], "\uc120\ud0dd\ud588\ub2e4": 2, "equat": [2, 11, 12, 13, 17, 19, 22, 23, 25, 36, 37, 40, 46, 49, 54], "gaussian\uc740": 2, "\ud3ec\uc778\ud2b8": [2, 34, 35], "mean": [2, 13, 14, 16, 20, 23, 34, 45, 50, 58], "\u03bc\ub97c": 2, "\uc911\uc2ec\uc73c\ub85c": [2, 23], "\ud558\uace0": [2, 8, 12, 16, 28, 29, 32, 34, 36, 38, 39, 45, 46, 55, 59, 61], "\ud589\ub82c": [2, 4], "\u03c3\ub85c": 2, "\uc815\uc758\ud55c\ub2e4": [2, 16, 21, 26, 27, 37, 44], "2d\ub85c": [2, 20], "project\ud574\uc57c": 2, "transform": [2, 3, 4, 7, 14, 23, 25, 26, 32, 33, 35, 38, 39, 42, 45], "w\uc5d0": 2, "\uce74\uba54\ub77c": [2, 4, 18, 19, 20, 34, 35, 36], "\uc88c\ud45c\uacc4\uc5d0\uc11c\uc758": 2, "\u03c3": [2, 13, 34, 35], "\ub098\ud0c0\ub0bc": [2, 6, 27, 28, 34, 47, 54], "\ucd5c\uc801\ud654\ub97c": [2, 18, 19, 20, 34, 39], "\u03c3\ub294": [2, 13], "posit": [2, 4, 7, 14, 15, 19, 20, 22, 23, 31, 33, 35, 40], "semi": 2, "definit": [2, 46], "\ud589\ub82c\uc774\uc5b4\uc57c": 2, "\ub54c\ubb38\uc5d0": [2, 4, 5, 8, 10, 12, 14, 15, 17, 18, 20, 21, 22, 24, 26, 27, 28, 30, 32, 33, 34, 35, 36, 39, 41, 42, 46, 47, 50, 51, 54, 56, 58, 59, 60, 61], "\ucd5c\uc801\ud654\uac00": [2, 18, 19, 34, 43], "\uc5b4\ub835\ub2e4\uace0": [2, 55], "\uc9c1\uad00\uc801\uc774\uace0": 2, "\ucd5c\uc801\ud654\uc5d0": [2, 18, 19, 34, 47], "\uc801\ud569\ud55c": [2, 4, 26, 32], "\uc120\ud0dd\ud55c\ub2e4": [2, 7], "gaussian\uc758": [2, 20], "\ud0c0\uc6d0\uccb4\uc758": 2, "\uad6c\uc131\uc744": 2, "\uc124\uba85\ud558\ub294": [2, 41, 42, 43, 47], "\uac83\uacfc": [2, 4, 6, 10, 19, 21, 23, 36, 39, 43, 53, 58], "\uc720\uc0ac\ud558\uba70": 2, "scale": [2, 6, 7, 11, 12, 16, 17, 20, 23, 26, 27, 30, 32, 33, 34, 41, 49, 52, 57, 59], "matrix": [2, 20, 27, 30, 47], "s\uc640": 2, "rotat": [2, 8, 20, 32, 34], "r\uc744": [2, 30], "\uc0ac\uc6a9\ud55c\ub2e4": [2, 4, 7, 16, 20, 21, 23, 26, 28, 31, 35, 36, 38, 39, 54], "scaling\uc740": 2, "vector": [2, 4, 7, 12, 22, 27, 29, 33, 34, 39, 41, 43, 48, 50], "s\ub85c": [2, 52], "rotation\uc740": 2, "quaternion": [2, 20], "q\ub85c": 2, "\ud45c\ud604\ud558\uba70": [2, 20], "\uc774\ub4e4\uc740": [2, 56], "\uac01\uac01\uc758": [2, 3, 6, 11, 16, 20, 22, 28, 31, 34, 40], "\ud589\ub82c\ub85c": [2, 24], "\ubcc0\ud658\ub420": 2, "\ub3d9\uc548": [2, 9, 12, 16, 21, 31, 38, 51], "auto": [2, 4, 20, 26, 39, 60], "grad": 2, "\uc790\ub3d9": 2, "\uc624\ubc84\ud5e4\ub4dc\ub97c": 2, "\ud53c\ud558\uae30": [2, 4, 7, 18, 19, 26], "\ubaa8\ub4e0": [2, 3, 4, 5, 6, 7, 8, 9, 11, 13, 15, 16, 18, 20, 21, 22, 24, 26, 28, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 43, 44, 45, 46, 47, 49, 54, 57, 58, 60], "\ud30c\ub77c\ubbf8\ud130\uc5d0": 2, "gradient\ub97c": [2, 23, 26, 48, 49], "\uba85\uc2dc\uc801\uc73c\ub85c": [2, 3, 9, 35], "\uc720\ub3c4\ud55c\ub2e4": [2, 28], "anisotrop": [2, 20], "covari": [2, 15, 20], "representation\uc740": [2, 29], "\uc7a5\uba74\uc758": [2, 19, 34], "geometry\uc5d0": 2, "\uc801\uc751\ud558\ub3c4\ub85d": [2, 17], "\ucd5c\uc801\ud654\ud55c\ub2e4": [2, 36], "\ub17c\ubb38\uc758": [2, 4, 12, 20, 24, 26, 27, 28, 31, 34, 35, 37, 39, 41, 42, 49, 54, 60], "\ud575\uc2ec": [2, 7, 8, 18, 21, 24, 26, 31, 35, 37, 42, 45, 49], "\uc811\uadfc\ubc95\uc740": [2, 9], "free": [2, 26, 27, 28, 35, 36, 39, 42, 44, 46, 53, 54, 56, 59], "\uc7a5\uba74\uc744": [2, 9, 20, 34, 35], "\uc815\ud655\ud558\uac8c": [2, 8, 9, 14, 18, 21, 43], "\ud45c\ud604\ud558\ub294": [2, 9, 14, 29, 36, 39, 43, 44, 50, 55], "\ubc00\uc9d1\ub41c": [2, 35], "\uc138\ud2b8\ub97c": [2, 9, 31], "\ub9cc\ub4dc\ub294": [2, 4, 9, 14, 19, 21, 23, 24, 28, 31, 34, 39, 47, 60], "\ub2e8\uacc4\ub2e4": 2, "\uc5ec\uae30\uc5d0\ub294": [2, 43], "\ud835\udc5d": [2, 48], "\ud22c\uba85\ub3c4": [2, 34], "\ud835\udefc": 2, "\u03c3\ubfd0\ub9cc": 2, "scene\uc758": [2, 36], "depend": [2, 19, 20, 34, 36, 48, 50], "appearance\ub97c": 2, "\uac01": [2, 3, 4, 5, 6, 8, 9, 14, 15, 17, 18, 19, 20, 21, 22, 24, 26, 27, 28, 30, 31, 32, 33, 34, 35, 36, 38, 39, 41, 42, 43, 48, 49, 50, 53, 54, 55, 56, 58, 59], "\uc0c9\uc0c1": [2, 12, 18, 19, 20, 33, 34, 35], "c\ub97c": 2, "coefficients\uae4c\uc9c0": 2, "\ud3ec\ud568\ub41c\ub2e4": [2, 21, 43], "3d\ub97c": [2, 20, 39], "project\ud560": 2, "\ubc1c\uc0dd\ud560": [2, 3, 4, 7, 16, 35], "\uc788\ub294": [2, 4, 5, 6, 7, 8, 9, 10, 12, 16, 18, 19, 20, 21, 22, 23, 24, 25, 27, 28, 29, 31, 32, 33, 34, 35, 36, 38, 41, 42, 43, 44, 45, 46, 47, 49, 50, 53, 54, 56, 58, 59], "\ubaa8\ud638\ud568\uc744": 2, "\uacfc\uc815\uc5d0\uc11c": [2, 9, 13, 14, 16, 20, 22, 23, 24, 28, 29, 30, 32, 34, 37, 40, 42, 43, 47, 51, 54, 55, 56], "geometry\uac00": [2, 8], "\uc0dd\uc131\ub418\uac70\ub098": 2, "\uc0ad\uc81c\ub418\uac70\ub098": 2, "\uc774\ub3d9\ud560": [2, 43], "\uc788\uc5b4\uc57c": [2, 8, 32, 44, 54, 57], "\ud568": [2, 5, 6, 8, 14, 15, 16, 17, 18, 19, 20, 29, 30, 31, 32, 33, 36, 39, 42, 43, 45, 48, 49, 52], "\ud30c\ub77c\ubbf8\ud130\uc758": [2, 33, 51], "\ud004\ub9ac\ud2f0\ub294": [2, 46], "\ud070": [2, 3, 4, 5, 6, 9, 12, 14, 16, 19, 20, 21, 24, 25, 26, 27, 28, 29, 30, 31, 32, 35, 38, 39, 41, 42, 43, 44, 45, 54, 57, 58, 59], "homogen": [2, 16], "area\ub4e4\uc744": 2, "\uc218\uc758": [2, 4, 7, 26, 29, 34, 36, 45], "gaussian\ub4e4\ub85c": 2, "\ucea1\ucc98\ub420": 2, "\uc788\uae30": [2, 8, 10, 18, 32, 35, 39, 46, 47, 54], "representation\uc758": [2, 36, 39], "compactness\uc5d0": 2, "\uc911\uc694": [2, 5], "sgd\ub97c": 2, "\uc0ac\uc6a9\ud558\uace0": [2, 9, 10, 11, 16, 22, 24, 26, 30, 33, 35, 40, 42, 43, 50, 53, 54, 56], "\uc5f0\uc0b0\uc740": [2, 22], "cuda": [2, 56, 60], "\ucee4\ub110\uc744": 2, "\uc0ac\uc6a9\ud569\ub2c8\ub2e4": [2, 20, 41, 47, 50, 55, 58, 60], "\ud2b9\ud788": [2, 3, 4, 5, 8, 12, 18, 26, 27, 36, 38, 39, 43, 46, 52, 54, 55, 56, 59], "rasterization\uc740": 2, "\ucd5c\uc801\ud654\uc758": [2, 18, 34], "\ud6a8\uc728\uc131\uc5d0": 2, "\uc911\uc694\ud569\ub2c8\ub2e4": 2, "\uc8fc\uc694": [2, 9, 17, 18, 20, 31, 38], "comput": [2, 12, 13, 19, 22, 27, 29, 30, 44, 46, 47, 49, 50, 52, 53, 54, 55, 57, 58, 59, 62], "bottleneck\uc774\uae30": 2, "\ub54c\ubb38": [2, 6, 18, 22, 34, 45, 54], "\ud835\udefc\uc5d0": 2, "\ub300\ud574\uc11c\ub294": [2, 5, 11, 16, 17, 23, 25, 28, 31, 42, 58], "sigmoid": [2, 51, 60], "function\uc744": [2, 4, 15, 19, 27, 28, 36, 39, 48], "\uacf5\ubd84\uc0b0\uc758": 2, "scale\uc5d0": [2, 49], "exponenti": [2, 13, 15, 27, 34, 38, 46, 54], "activ": [2, 41, 56], "\ud568\uc218\ub97c": [2, 9, 20, 22, 34, 36], "\uc0ac\uc6a9": [2, 3, 4, 5, 6, 10, 11, 14, 15, 17, 18, 19, 21, 22, 23, 25, 27, 29, 33, 34, 35, 39, 40, 48, 49, 52, 54, 57], "\ud589\ub82c\uc740": [2, 24], "\uc810\uae4c\uc9c0\uc758": 2, "\uac70\ub9ac\uc758": 2, "\ud3c9\uade0\uc744": [2, 6, 7, 19], "\ucd95\uc73c\ub85c": [2, 4, 38], "isotrop": 2, "gaussian\uc73c\ub85c": [2, 20], "\ucd94\uc815": [2, 31, 34, 39, 43, 54], "position\uc5d0": 2, "\ub300\ud574\uc11c\ub9cc": [2, 4, 31, 42, 45, 47, 56], "decai": [2, 17, 26, 34, 46], "\uc2a4\ucf00\uc904\ub9c1\uc744": 2, "loss": [2, 4, 6, 10, 13, 15, 18, 19, 20, 22, 24, 25, 28, 29, 33, 34, 37, 40, 45, 46, 48, 49, 50, 51, 54, 55, 56, 60], "function\uc740": [2, 4, 28, 54], "d": [2, 4, 6, 14, 15, 16, 18, 20, 21, 22, 26, 27, 29, 30, 31, 34, 35, 36, 39, 41, 46, 51, 54, 56, 57, 58, 61], "ssim\uacfc": 2, "l1": [2, 45, 46, 47], "\uc0ac\uc6a9\ud558\uba70": [2, 7, 16, 35, 56], "ssim": [2, 5, 32], "loss\ub294": [2, 10, 14, 19, 22, 39], "\uc65c\uace1": [2, 5, 8, 20], "distort": [2, 5, 16, 29, 34, 43], "\ubc18\uc601\ud558\uace0": 2, "\uac00\ub2a5\ud558\uc5ec": 2, "metric\ubfd0\ub9cc": 2, "loss\ub85c\ub3c4": 2, "\uac00\ub2a5": [2, 5, 6, 8, 11, 14, 17, 18, 19, 20, 22, 23, 27, 29, 33, 34, 49, 58, 59], "\ucc38\uace0\ub85c": [2, 58], "ssim\uc740": 2, "\ubc1d\uae30": [2, 34], "\ub300\uc870": 2, "\uad6c\uc870\ub97c": [2, 4, 5, 7, 10, 15, 18, 20, 21, 26, 29, 32, 34, 35, 39, 41, 47, 48, 50, 52, 59], "\uace0\ub824\ud558\uc5ec": [2, 4, 27, 34, 39, 43], "\ub450": [2, 3, 4, 5, 6, 7, 9, 10, 12, 14, 18, 19, 21, 22, 24, 25, 26, 27, 28, 31, 32, 33, 34, 35, 36, 38, 39, 41, 42, 44, 45, 46, 47, 50, 54, 55, 56, 58], "\uac04\uc758": [2, 6, 9, 16, 19, 21, 23, 28, 31, 32, 34, 35, 40, 46, 47, 56], "\uc720\uc0ac\uc131\uc744": [2, 9, 18, 43], "\uce21\uc815\ud558\ub294": [2, 3, 9, 12, 16, 27, 45, 50, 54], "\uba54\ud2b8\ub9ad\uc774\ub2e4": 2, "\ucd08\uae30": [2, 5, 18, 19, 20, 24, 26, 28, 35, 36, 39, 46, 61], "\uc810\ub4e4\uc744": [2, 20, 34], "\uc2dc\uc791\uc73c\ub85c": [2, 5, 20], "scene\uc744": [2, 20, 34, 36, 39], "\ud45c\ud604\ud558\uae30": [2, 36, 39, 59], "unit": [2, 20, 22, 34], "volum": [2, 18, 55], "\ub0b4": [2, 5, 12, 26, 28, 35], "gaussian\ub4e4\uc758": [2, 20], "\uc218\uc640": [2, 48], "\ubc00\ub3c4\ub97c": [2, 36], "\uc810\uc9c4\uc801\uc73c\ub85c": [2, 9, 35, 46, 54, 57], "\ubc29\uc2dd\uc744": [2, 4, 11, 16, 18, 19, 20, 21, 23, 31, 32, 35, 36, 37, 39, 46, 55, 56, 59, 61], "\uc81c\uc548\ud55c\ub2e4": [2, 3, 4, 5, 16, 20, 26, 27, 28, 31, 36, 38, 40, 43, 49, 54], "\ub9e4": [2, 6, 14, 19, 55, 58, 59], "100\ubc88\uc758": 2, "\ubc18\ubcf5": [2, 18, 24, 28, 35, 58], "iter": [2, 19, 20, 24, 34, 35, 46, 55, 58], "\ub9c8\ub2e4": [2, 4, 6, 22, 26, 32, 33, 39, 45, 47, 55, 56, 58, 59], "\ucd94\uac00\ud558\uace0": [2, 12, 14, 22, 24, 31, 33], "\ud835\udefc\uac00": 2, "\uac12\ubcf4\ub2e4": 2, "\uc791\uc740": [2, 4, 6, 7, 11, 14, 15, 16, 19, 20, 24, 26, 27, 30, 31, 35, 38, 42, 43, 45, 46, 47, 54, 57], "\uc81c\uac70": [2, 14, 15, 24, 33, 38], "gaussians\ub294": 2, "\ube48": [2, 19, 20, 32], "\uacf5\uac04\uc744": [2, 19, 20, 21, 24], "\ucc44\uc6b0\uace0": 2, "miss": [2, 25], "geometr": 2, "feature\uc774": 2, "region\uacfc": 2, "\ud558\ub098\uc758": [2, 3, 6, 8, 11, 14, 20, 21, 22, 26, 30, 31, 33, 34, 35, 36, 40, 41, 43, 47, 54, 55, 56, 60, 61], "gaussian\uc774": 2, "\ub108\ubb34": [2, 6, 16, 19, 23, 25, 27, 29, 31, 32, 38, 40, 48, 54], "\ub113\uc740": [2, 7, 18], "region\uc744": 2, "\ucee4\ubc84\ud558\ub294": [2, 28], "region\uc5d0": 2, "\uc9d1\uc911\ud569\ub2c8\ub2e4": 2, "\uc774\ub7ec\ud55c": [2, 3, 4, 6, 9, 11, 12, 16, 18, 20, 21, 22, 24, 25, 28, 31, 33, 34, 35, 36, 38, 39, 40, 42, 43, 44, 46, 47, 50, 55, 56, 58, 60, 61], "\uc9c0\uc5ed\uc740": 2, "\ubaa8\ub450": [2, 4, 5, 7, 11, 12, 13, 16, 21, 22, 24, 25, 26, 28, 31, 33, 35, 37, 38, 39, 40, 41, 42, 43, 46, 47, 49, 50, 53, 54, 55, 56, 59], "space": [2, 4, 6, 9, 12, 21, 22, 24, 26, 29, 31, 33, 38, 39, 43, 44, 46, 48, 50, 55, 56, 57, 58, 59, 60], "\uac00\uc9c0\uba70": [2, 23, 27, 54, 59], "\uc774\ub3d9\uc2dc\ud0b5\ub2c8\ub2e4": 2, "under": [2, 36], "reconstruct": [2, 14, 18, 20, 34, 38, 39, 43, 50, 56, 58, 60], "\ud06c\uae30\uc758": [2, 4, 30, 38], "\ubcf5\uc81c\ud558\uace0": 2, "\uae30\uc6b8\uae30": [2, 27, 37], "\ubc29\ud5a5\uc73c\ub85c": [2, 14, 16, 19, 26, 36, 37, 39, 40, 51, 57, 58, 59], "\uc774\ub3d9": [2, 34], "over": [2, 3, 6, 18, 20, 28, 30, 36, 47, 52], "\uac1c\ub85c": 2, "\ub098\ub204\uace0": [2, 20, 32, 35, 48], "\uc704\uce58\ub294": 2, "pdf\ub97c": 2, "\uc0d8\ud50c\ub9c1\ud574\uc11c": [2, 50], "\ucd08\uae30\ud654": [2, 4, 5, 7, 15, 17, 19, 20, 21, 31, 36, 39], "\uc785\ub825": [2, 5, 7, 8, 9, 15, 17, 18, 19, 21, 26, 31, 33, 34, 35, 39, 42, 43, 47, 50, 53, 56, 60], "\uce74\uba54\ub77c\uc5d0": 2, "density\uc758": 2, "\ubd80\uc801\uc808\ud55c": 2, "\uc99d\uac00\ub97c": [2, 5], "\ubc29\uc9c0\ud558\uae30": [2, 19, 21, 46, 47, 55], "3000\ubc88\uc758": 2, "\ubc18\ubcf5\ub9c8\ub2e4": 2, "\u03b1\ub97c": 2, "0\uc5d0": [2, 7, 16], "\uac00\uae5d\uac8c": [2, 9, 18, 22, 47, 57], "\uc124\uc815\ud55c\ub2e4": [2, 16, 23], "\ubcf8": [2, 4, 5, 6, 9, 12, 14, 16, 18, 19, 20, 21, 24, 26, 27, 28, 30, 31, 32, 34, 35, 36, 39, 42, 45, 48, 49, 52, 54], "\ubaa9\ud45c\ub294": [2, 24, 43], "\ub300\ub7b5\uc801\uc778": [2, 35], "\u03b1": 2, "blending\uc744": 2, "\uc774\uc804": [2, 8, 14, 15, 16, 21, 22, 23, 30, 35, 36, 41, 51], "\uc5f0\uad6c\uc5d0": [2, 5, 36], "\uc874\uc7ac\ud558\ub358": [2, 26], "splat\ub4e4\uc758": 2, "\uc218\uc5d0": 2, "\uac15\ud55c": [2, 24, 32, 40], "\uc81c\uc57d\uc744": [2, 9, 47], "\ud53c\ud558\ub294": 2, "\ub80c\ub354\ub9c1\uacfc": [2, 34, 36], "sorting\uc744": 2, "\uac83\uc774\ub2e4": [2, 4, 7, 16, 20, 23, 26, 27, 28, 31, 36, 37, 39, 45, 54], "\ubaa9\ud45c\ub97c": [2, 7, 21], "\ub2ec\uc131\ud558\uae30": [2, 21, 26], "tile": [2, 61], "base": [2, 3, 4, 8, 16, 17, 19, 22, 23, 25, 26, 27, 28, 29, 31, 32, 34, 35, 36, 38, 39, 40, 41, 43, 45, 49, 51, 52, 53, 54, 55, 56, 57, 59, 61, 62], "splats\ub97c": 2, "\ud750\ub984\uc73c\ub85c": 2, "\uc9c4\ud589\ub41c\ub2e4": [2, 20, 29, 44, 45], "screen\uc744": 2, "16": [2, 5, 6, 7, 14, 17, 18, 20, 21, 22, 26, 29, 30, 31, 38, 39, 41, 45, 47, 48, 49, 50, 54, 57, 59, 61], "16\uc73c\ub85c": 2, "frustum\uacfc": 2, "\ud0c0\uc77c\uc5d0": 2, "gaussian\ub4e4\uc744": [2, 20], "cull\ud568": 2, "frustum\uc678\uc5d0": 2, "\uac83\ub4e4\uc744": 2, "\uc81c\uac70\ud558\ub294": 2, "\uc808\ucc28": 2, "\ub098\ub208": 2, "\uc774\uc720\ub294": [2, 10, 14, 22, 38], "\ud0c0\uc77c\ub9c8\ub2e4": 2, "thread\uc5d0\uc11c": 2, "\uacc4\uc0b0\ud574\uc11c": [2, 21], "\uacc4\uc0b0\ud558\uae30": [2, 20, 55, 60], "\uc704\ud568": [2, 14, 32, 47], "guard": 2, "band\ub97c": 2, "\uc774\uc6a9\ud574": [2, 4, 13, 16, 20, 21, 23, 26, 32, 35, 36, 37, 42, 43, 60], "\uadf9\ub2e8\uc801\uc778": 2, "\uac00\uae5d\uac70\ub098": 2, "\uba3c": [2, 35, 47], "gaussian\ub4e4": 2, "\uc774\ub4e4\uc758": [2, 56], "covariance\ub97c": 2, "\uacc4\uc0b0\ud558\ub294": [2, 4, 14, 27, 34, 35, 51, 55, 60], "\uac83\uc740": [2, 4, 5, 9, 12, 14, 15, 16, 18, 21, 26, 28, 31, 33, 34, 35, 36, 39, 42, 43, 45, 47, 51, 55, 56, 57, 59], "\ubd88\uc548\uc815\uc801": 2, "\ub4a4": [2, 26, 29, 41, 47], "\uc18d\ud55c": 2, "tile\uacfc": 2, "depth\uc5d0": 2, "key\ub97c": [2, 11], "\ubd80\uc5ec": [2, 6, 18], "radix": 2, "sort\ub97c": 2, "front": [2, 19], "back\uc73c\ub85c": 2, "color\uc640": 2, "\u03b1\uac12\uc744": 2, "accumulate\ud574\uc11c": 2, "\ud53d\uc140": [2, 6, 15, 18, 21, 22, 33, 34, 38, 46, 52], "\uac12\uc744": [2, 3, 6, 11, 13, 15, 16, 18, 19, 20, 22, 25, 27, 31, 33, 34, 36, 37, 40, 46, 48, 49, 52, 54, 55, 61], "\uad6c\ud568": [2, 4, 6, 57], "\ub370\uc774\ud130\uc14b\uc5d0": [2, 4, 6, 21, 27, 42, 46, 54, 58, 60], "\uacb0\uacfc\ub294": [2, 9, 10, 16, 18, 21, 23, 25, 26, 28, 35, 36, 42, 47], "\ub2e4\ub974\uc9c0\ub9cc": 2, "sota\uc774\uc0c1\uc758": 2, "\ud004\ub9ac\ud2f0\ub97c": [2, 3, 4, 12, 33, 54, 55], "\ub0b4\uba74\uc11c": 2, "time\uacfc": [2, 3], "fps\ub97c": 2, "\ubcf4\uc778\ub2e4": [2, 3, 16, 21, 23, 25, 26, 28, 31, 36, 39, 40, 57], "experi": [2, 6, 62], "tabl": [2, 5, 6, 11, 19, 24, 25, 30, 33, 38, 40, 42, 44, 45, 47, 49, 56, 59], "ablat": [2, 6, 11, 19, 24, 28, 40, 49, 55, 58, 61], "\ubc29\uc2dd\ub4e4\uacfc": [2, 37], "\uc720\uc0ac\ud558\uac8c": [2, 4, 9, 18, 21, 29, 33, 36, 38, 42, 46, 53, 59], "\uad00\uce21\ub418\uc9c0": 2, "\uc54a\uc740": [2, 3, 4, 5, 9, 11, 12, 16, 21, 24, 25, 28, 31, 38, 41, 42, 45, 46, 47, 55, 60, 61], "\uc7a5\uba74\uc740": 2, "artifact\ub4e4\uc774": 2, "\uc874\uc7ac": [2, 5, 17, 18, 34], "\ub298\uc5b4\uc9c0\uace0": 2, "\uc5bc\ub8e9\uc9c4": 2, "artifact\ub97c": 2, "\uc788\uc74c": [2, 3, 4, 6, 8, 11, 14, 15, 18, 19, 22, 25, 29, 30, 31, 32, 33, 34, 42, 43, 48, 49, 52, 54, 57], "\ucd5c\uc801\ud654\uc5d0\uc11c": 2, "\uac70\ub300\ud55c": [2, 3, 26], "\ub9cc\ub4e4\uc5b4\uc9c0\uba74": 2, "pop": [2, 14], "artifact": [2, 4, 18, 34], "\uac00\ub054": [2, 5, 54], "\ubc1c\uc0dd": [2, 5, 18, 29, 30, 39, 44, 50], "regularization\uc744": 2, "\uc801\uc6a9\ud558\uc9c0": 2, "\uc54a\uc74c": [2, 3, 5, 8, 11, 14, 15, 18, 19, 31, 32, 34, 39, 43, 52, 54, 57], "\uae30\ubc95\ub4e4\ubcf4\ub2e4": 2, "memori": [2, 7, 30, 48, 52, 55], "consumption\uc774": 2, "\uc0c1\ub2f9\ud788": [2, 4, 18, 23, 24, 30, 34, 39, 43], "\ub192\uc74c": [2, 3, 42, 52], "pdf": [3, 7, 8, 9, 12, 24, 27, 28, 32, 33, 34, 37, 43, 45, 46, 48, 53, 54, 55, 56, 62], "2206": 3, "10935": 3, "gan": [3, 6, 11, 13, 19, 21, 23, 29, 33, 35, 36, 37, 39, 40, 46, 47, 48, 56, 57, 59], "diffusion\ub4f1": [3, 22], "\ubaa8\ub378\uc758": [3, 4, 5, 6, 7, 10, 11, 12, 15, 18, 19, 21, 22, 23, 24, 25, 26, 27, 29, 30, 31, 33, 35, 36, 38, 39, 40, 41, 42, 43, 46, 49, 54, 55, 56, 58, 59], "\ubc1c\uc804\uc774": 3, "\uc774\uc5b4\uc9c0\uace0\uc788\ub2e4": 3, "\ub2e4\ub9cc": [3, 19, 42, 45, 54], "\uc0dd\uc131\ubaa8\ub378\uc744": [3, 28, 36], "\ud3c9\uac00\ud558\ub294": [3, 12, 35, 42], "\ucc99\ub3c4": 3, "\uc120\uc815\uc740": 3, "\uc5b4\ub824\uc6b4": [3, 11, 18, 19, 29, 35, 43, 56], "\ubb38\uc81c\ub85c": [3, 58], "\ub0a8\uc544\uc788\ub2e4": 3, "\uadf8\ub098\ub9c8": 3, "\ub098": [3, 4, 8, 9, 27, 32, 38, 39, 46, 50, 52, 54, 60], "score\ub97c": [3, 4, 16, 23, 42], "\ud3c9\uac00\ud558\uace0\uc788\uc9c0\ub9cc": 3, "metric\ub4e4\ub3c4": 3, "\uc644\uc804\ud558\uc9c0": 3, "\ub17c\ubb38\uc744": [3, 23, 24, 42, 49], "\ud3c9\uac00\uc758": [3, 19, 35], "\uc9c0\ud45c\uc5d0": [3, 4, 42], "\ud55c\ubc88\ub354": 3, "\uace0\ucc30\ud558\uace0": 3, "\ud604\uc874\ud558\ub294": 3, "metric\uc5d0": 3, "\ubc29\ud5a5\uc744": [3, 19, 20, 34], "\uc81c\uc2dc": [3, 6, 19, 34, 43, 45, 49], "\ucd5c\uadfc": [3, 4, 5, 9, 11, 18, 21, 22, 25, 26, 35, 37, 40], "\ub4f1": [3, 4, 8, 11, 13, 18, 21, 22, 23, 24, 26, 27, 29, 30, 32, 33, 36, 40, 43, 50, 54], "model\ub4e4\uc774": [3, 11, 13], "\ub6f0\uc5b4\ub09c": [3, 6, 11, 13, 25, 28, 31, 42, 48], "\ubcf4\uc5ec\uc90c": [3, 5, 8, 19, 20, 29, 34, 43, 52], "\ud558\uc9c0\ub9cc": [3, 4, 6, 9, 10, 11, 12, 13, 15, 16, 19, 20, 21, 22, 23, 24, 25, 26, 28, 30, 31, 32, 34, 35, 36, 37, 39, 40, 42, 43, 45, 46, 47, 49, 50, 51, 56, 57, 60, 61], "task": [3, 6, 12, 20, 26, 28, 30, 32, 34, 44, 46, 47, 48, 51, 52, 56, 58, 61], "classif": [3, 23, 45, 49, 51, 54], "segment": [3, 47, 59], "\uc640\ub294": [3, 27, 32, 46], "\ub2e4\ub974\uac8c": [3, 13, 16, 20, 24, 27, 38, 42, 50, 56], "metric\uc744": [3, 4], "\uc815\ud558\ub294\uac83\uc740": 3, "challeng": [3, 32], "p": [3, 6, 8, 9, 10, 12, 15, 16, 18, 19, 20, 21, 25, 31, 34, 35, 36, 39, 49, 50, 56, 57, 58, 60], "r": [3, 4, 5, 7, 8, 14, 15, 18, 19, 20, 21, 22, 25, 27, 29, 30, 34, 35, 36, 39, 40, 44, 53, 55, 56], "f1": 3, "iou": [3, 47], "intersect": [3, 47], "union": [3, 47], "featu": 3, "map\uc774\ub098": [3, 21], "classfier": [3, 21], "\uc0ac\uc6a9\ud558\ub294": [3, 4, 5, 7, 10, 12, 14, 15, 20, 21, 22, 26, 27, 29, 34, 35, 36, 39, 41, 49, 50, 51, 52, 53, 55], "score\uac00": [3, 39, 54], "\ucd94\uc138": [3, 11], "\uc704": [3, 5, 6, 9, 10, 12, 14, 16, 18, 22, 24, 25, 27, 30, 31, 40, 42, 43, 45, 46, 55, 57, 58], "metric\uc758": 3, "\ub2e8\uc810": [3, 35, 37], "real": [3, 4, 11, 16, 20, 22, 26, 31, 34, 37, 40, 44, 51, 57], "\ubd84\ud3ec\uc758": [3, 14, 20, 36, 42, 51], "space\uc5d0\uc11c": [3, 4, 5, 9, 16, 21, 26, 28, 29, 36, 39, 43], "\uc218\uce58\uac00": [3, 25], "\uc815\ub9d0": [3, 12, 22], "\uc720\uc758\ubbf8\ud55c": 3, "\uc5f0\uad00\uc774": [3, 41], "\uc788\ub294\uc9c0": [3, 12, 26, 34], "\uc99d\uba85\ub418\uc9c0": [3, 25], "pretrain": [3, 4, 5, 6, 9, 10, 11, 16, 17, 19, 20, 22, 23, 30, 31, 33, 36, 37, 38, 39, 40, 42, 43, 45, 48, 56, 57], "set\uc774": [3, 43], "specif": [3, 4, 11, 21, 26, 28, 39, 41, 50], "feature\uc5d0": [3, 5, 16, 26, 33], "\uc5bc\ub9c8\ub098": [3, 11, 12, 14, 16, 18, 22, 28, 32, 34, 35, 37, 39, 42, 43, 49, 58], "\ubbf8\uce58\ub294\uc9c0": 3, "\uc54c\uc218": [3, 39], "\uc5c6\uc74c": [3, 6, 8, 18, 19, 30, 36, 57], "net": [3, 5, 6, 7, 8, 14, 15, 16, 17, 19, 29, 32, 45, 50, 52, 56, 58], "imagenet": [3, 15, 23, 25, 29, 38, 43, 45, 46, 54, 58], "ddpm": [3, 5, 6, 15, 20, 23, 42, 52, 54, 57], "face": [3, 4, 6, 12, 17, 18, 20, 24, 26, 32, 34, 41, 51, 60], "human": [3, 11, 12, 17, 23, 39, 41, 43, 44, 53], "study\uc758": [3, 28], "\uc9c1\uad00\uc801\uc778": [3, 18, 26, 43], "\ubc29\uc2dd\ub3c4": [3, 23, 26, 51, 55, 56], "\uc788\uc9c0\ub9cc": [3, 4, 13, 15, 18, 21, 22, 23, 26, 27, 35, 36, 39, 47, 48, 49, 55, 56, 59], "cost\ub97c": [3, 29, 30, 54], "\ub9e4\uc6b0": [3, 4, 6, 11, 12, 13, 14, 16, 18, 20, 21, 22, 26, 27, 30, 35, 36, 43, 45, 46, 47, 52, 54, 57], "\ud544\uc694\ub85c\ud55c\ub2e4\ub294": 3, "\uc810\uacfc": 3, "diversity\ub294": 3, "\uce21\uc815\ud558\uae30": [3, 12], "\uc5b4\ub835\ub2e4\ub294": 3, "e": [3, 4, 6, 7, 8, 9, 11, 13, 14, 15, 16, 18, 19, 20, 21, 25, 26, 27, 28, 29, 33, 37, 40, 41, 46, 50, 51, 52, 54, 56, 57, 58, 59, 60], "g": [3, 4, 11, 13, 19, 20, 22, 23, 25, 26, 28, 34, 35, 36, 40, 41, 47, 50, 51, 54, 55, 56, 57, 58, 59], "\uc774\ubbf8\uc9c0\ub9cc": [3, 4, 7, 26, 49], "\uc0dd\uc131\ud574\ub3c4": 3, "\ubc1b\uc744": [3, 52], "gpt": [3, 28, 30], "high": [3, 4, 5, 10, 12, 16, 19, 20, 24, 25, 27, 29, 32, 33, 34, 37, 38, 41, 42, 46, 48, 51, 52, 54, 56, 57, 59, 61], "quality\uc758": [3, 13, 23, 25, 28, 37, 40, 54], "new": [3, 11, 13, 24, 26, 41, 43], "dataset\uc744": [3, 4, 28, 39, 40, 52, 54], "\uc704\uc758": [3, 4, 6, 10, 12, 13, 16, 18, 20, 21, 23, 25, 26, 27, 28, 29, 33, 34, 36, 39, 42, 45, 46, 47], "\ud559\uc2b5\ud558\uace0": [3, 4, 14, 29, 32, 42, 48, 51, 55, 56, 57, 61], "is\ub4f1": 3, "\uce21\uc815": [3, 9, 12, 15, 19, 43, 56], "\uc2e4\uc81c": [3, 6, 9, 12, 14, 16, 21, 24, 28, 29, 30, 31, 33, 34, 35, 37, 42, 43, 47, 50, 51, 54, 56, 57], "revers": [3, 4, 6, 13, 18, 19, 23, 25, 27, 36, 37, 45, 54, 58, 59], "\uac12\uacfc": [3, 13, 15, 22], "\ube44\uad50\ud574\uc11c": [3, 41, 48], "\uc720\ud6a8\uc131\uc744": 3, "\uac80\uc99d": [3, 6, 54], "v3\uacfc": 3, "clip": [3, 4, 5, 7, 8, 9, 11, 12, 17, 18, 19, 20, 22, 24, 26, 31, 32, 33, 35, 38, 39, 43, 44, 45, 48, 50, 52, 53, 54, 56, 59, 61], "\ube44\uad50\ub97c": [3, 21, 23, 26, 28, 39], "v3": 3, "\uc801\ud569\uc131\uc744": 3, "\ud655\ub960\ubd84\ud3ec\uc758": 3, "\uc720\uc0ac\ub3c4\ub97c": [3, 4, 9, 23, 35, 46], "\uc9c0\ud45c": [3, 21, 35, 39, 42, 49], "q": [3, 4, 6, 7, 15, 16, 19, 20, 23, 26, 29, 35, 39, 48, 49, 58], "sum_": [3, 14, 16, 22, 23, 27, 28, 30, 34, 60], "x": [3, 4, 5, 6, 7, 8, 9, 10, 12, 13, 15, 16, 18, 19, 20, 21, 22, 23, 24, 27, 29, 30, 31, 32, 33, 34, 35, 36, 39, 41, 44, 45, 46, 47, 48, 49, 50, 51, 52, 54, 55, 56, 57, 58, 59, 60, 61], "log": [3, 7, 14, 19, 27, 30, 36, 42, 46, 48, 49, 51, 54, 57, 58, 60], "left": [3, 7, 8, 9, 14, 18, 24, 28, 29, 30, 34, 38, 49, 52, 57, 60], "frac": [3, 10, 14, 16, 18, 20, 26, 27, 29, 34, 36, 39, 41, 46, 49, 51, 54, 57, 58, 60, 61], "right": [3, 7, 8, 9, 14, 18, 24, 28, 29, 30, 34, 49, 52, 57, 60], "\ud2b9\uc9d5": [3, 9, 20, 24, 37], "lower": [3, 14, 37, 38, 39, 48, 52, 60], "better": [3, 37, 52], "symmetr": 3, "rkl": 3, "\ub300\ubd80\ubd84": [3, 11, 20, 25, 26, 31, 40, 42], "p\uac00": 3, "true": [3, 12, 13, 14, 30, 31, 51, 56, 59], "distribut": [3, 4, 6, 7, 12, 13, 14, 19, 22, 39, 45, 46, 47, 48, 49, 59, 60, 61], "q\uac00": 3, "estim": [3, 5, 14, 36, 46, 51, 60], "fidelity\uc640": [3, 21, 24], "diversity\ub97c": [3, 19, 24, 26, 40], "fidel": [3, 6, 17, 19, 24, 26, 33, 49, 50, 52], "label\uc758": 3, "\uc608\uce21\ud558\ub294\uc9c0": [3, 15], "divers": [3, 6, 7, 12, 19, 24, 25, 26, 32, 33, 39, 49, 50, 56], "\uace0\ub974\uac8c": [3, 8, 35], "\uc0dd\uc131\ud574\ub0b4\ub294\uc9c0": 3, "exp": [3, 14, 20, 34, 39, 58], "mathbb": [3, 6, 8, 14, 18, 19, 21, 27, 29, 30, 34, 46, 50, 51, 54, 55, 56, 57, 59, 60], "_x": 3, "d_": [3, 6, 14, 22, 24, 30, 36, 51, 57, 60], "y": [3, 4, 6, 8, 9, 10, 12, 13, 14, 16, 19, 20, 27, 30, 31, 34, 35, 36, 39, 41, 47, 48, 49, 56, 57, 59], "higher": [3, 39, 52, 55], "\uc774\ubbf8\uc9c0\uc640": [3, 4, 7, 9, 12, 13, 16, 18, 21, 23, 24, 33, 34, 35, 39, 40, 43, 50, 53, 57, 60, 61], "featur": [3, 4, 5, 6, 8, 10, 11, 12, 14, 16, 17, 18, 20, 21, 22, 26, 32, 33, 34, 38, 40, 41, 45, 47, 50, 56, 59, 61], "vector\ub97c": [3, 27, 33, 39, 43], "\ucd94\ucd9c": [3, 17, 20, 21, 33, 34, 59], "\ud3c9\uade0\uacfc": [3, 6, 42, 52], "\uacf5\ubd84\uc0b0\uc744": 3, "\uacc4\uc0b0": [3, 13, 14, 17, 19, 21, 29, 31, 34, 35, 43, 45], "frechet": [3, 48], "\ud3c9\uac00\uc9c0\ud45c": 3, "lvert": [3, 19, 57], "mu_x": 3, "mu_g": 3, "rvert": [3, 19], "tr": 3, "sigma_x": 3, "sigma_g": 3, "v3\uc758": 3, "pool": [3, 38], "layer\uc758": [3, 22, 26, 34], "map\uc744": [3, 4, 16, 22, 28, 29, 36, 40], "quality\ub97c": [3, 4, 8, 16, 19, 25, 27, 29, 43], "fid\uc5d0\uc11c": [3, 14], "distance\ub97c": [3, 16], "\ub300\uc2e0": [3, 11, 14, 15, 16, 19, 21, 24, 25, 35, 38, 39, 40, 43, 48, 57], "trick\uc744": [3, 40], "\uc0ac\uc6a9\ud574": [3, 4, 12, 16, 20, 22, 26, 30, 33, 35, 36, 38, 40, 42, 43, 44, 52, 54], "\ud655\ub960": [3, 6, 14, 34, 35, 37, 42, 51, 60], "\ub370\uc774\ud130\uc14b\uc758": [3, 7, 12, 18, 23, 31, 34, 35, 42, 45, 53], "\ud3c9\uac00\uc5d0": [3, 4, 18, 23], "\ud6a8\uacfc\uc801\uc784": 3, "metric\ubcf4\ub2e4": 3, "\uc18d\ub3c4\uac00": [3, 33, 35, 36, 37, 46, 58], "\uc624\ub798\uac78\ub9bc": 3, "n": [3, 4, 6, 8, 12, 14, 16, 18, 19, 21, 23, 26, 27, 29, 30, 32, 34, 36, 39, 46, 48, 54, 56, 57, 58, 59, 60], "kid": [3, 11, 37], "\ub17c\ubb38": [3, 12, 14, 30, 32, 33, 34, 42, 47, 48, 50, 61], "fid\uc640": [3, 22, 23, 25, 31, 42, 49], "bias\uac00": [3, 10, 41], "\uc788\uc74c\uc744": [3, 4, 9, 18, 19, 24, 25, 27, 28, 35, 36, 42, 43, 56], "\uc99d\uba85\ud558\uace0": 3, "dataset\uc758": [3, 4, 17, 54], "sampl": [3, 4, 5, 6, 12, 16, 20, 22, 26, 27, 32, 33, 39, 40, 44, 45, 46, 49, 50, 51, 52, 54, 55, 57, 59, 60], "\ubcc0\uacbd": [3, 5, 6, 9, 47], "gaussian": [3, 5, 6, 8, 14, 19, 22, 26, 30, 34, 35, 36, 39, 41, 46, 49, 50, 54, 56, 57, 59, 60], "sobol": 3, "sequenc": [3, 4, 5, 7, 12, 17, 30, 39], "unbias": [3, 12, 46], "\uc81c\uc548": [3, 5, 11, 13, 15, 17, 18, 19, 20, 25, 27, 32, 34, 35, 42, 43, 44, 48], "v3\uc5d0": 3, "\ud1b5\uacfc\ud558\uae30\uc704\ud574": 3, "resiz": [3, 5, 11, 17, 26, 32, 44], "\uacfc\uc815\uc774": [3, 9, 14, 20, 36, 55], "\ud3ec\ud568\ub418\ub294\ub370": 3, "score\uac12\uc5d0": 3, "\uc904\uc218": 3, "\uc788\uc5b4": [3, 5, 16, 23, 27, 35, 47, 51, 54, 55, 57], "best": [3, 8, 16, 25, 40, 48, 49], "percformance\uc758": 3, "one": [3, 6, 12, 36, 40, 46, 47, 54, 56, 57, 58], "process\ub97c": [3, 4, 13, 14, 16, 20, 26, 27, 49], "imagenet\uc758": 3, "\ub370\uc774\ud130\ub97c": [3, 4, 8, 9, 11, 12, 14, 19, 21, 23, 25, 28, 31, 34, 35, 38, 40, 42, 44, 46, 47, 51, 54, 60, 61], "imagegpt\ub97c": 3, "\uc7ac\uc0dd\uc131": 3, "k": [3, 4, 11, 14, 18, 20, 22, 26, 27, 29, 30, 32, 35, 37, 48, 51, 54, 59], "notimagenet": 3, "imagegpt": 3, "vision": [3, 12, 22, 26, 37, 39, 42, 47, 53, 62], "\ubd84\uc57c\uc5d0": [3, 18, 46], "label": [3, 6, 14, 15, 39, 47, 49, 52], "dataset\uc774": [3, 37], "\ud544\uc694\uc5c6\ub294": 3, "\uc790\uae30\uc9c0\ub3c4": 3, "\ubc29\uc2dd": [3, 4, 11, 15, 18, 20, 21, 26, 27, 32, 33, 35, 46, 54], "challenge\uc5d0\uc11c\ub3c4": 3, "\uc0c1\ub2f9\ud55c": [3, 7, 15, 28], "\ubcf4\uc784": [3, 5, 8, 18, 19, 25, 29, 48], "\uc0dd\uc131\ubaa8\ub378\uc5d0": [3, 4], "\ud1b5\uacfc\ud55c": [3, 4, 15], "p_": [3, 6, 9, 14, 16, 19, 20, 21, 23, 27, 30, 46, 47, 48, 49, 51, 54, 57, 60], "hat": [3, 8, 18, 19, 22, 24, 26, 27, 28, 29, 31, 34, 39, 46, 47, 50, 54, 58], "\ubd84\ud3ec\ub97c": [3, 14, 18, 21, 22, 29, 31, 36, 38, 42, 47, 51, 60], "\ube44\uad50": [3, 5, 6, 12, 13, 18, 19, 21, 22, 25, 26, 31, 36, 39, 42, 45, 48, 49, 52], "\ud55c\uacc4": [3, 5, 6, 19, 27, 34, 35, 43, 54], "explicit": [3, 19, 34, 51], "model\uc5d0\ub9cc": 3, "\uc801\uc6a9": [3, 6, 16, 19, 20, 21, 22, 25, 32, 35, 37, 38, 48, 54, 56, 59], "model\uc5d0\ub294": 3, "\uc801\uc6a9\ud560": [3, 4, 15, 19, 27, 43, 46, 56], "\uc0dd\uc131\ub418\ub294": [3, 9, 10, 37, 40, 41, 42, 49, 56, 59], "\ub370\uc774\ud130\uc758": [3, 4, 29, 31, 42, 47, 51, 57], "\ubaa8\ub378\ub9c1\ud558\uc5ec": [3, 48, 49], "\uc8fc\ub85c": [3, 4, 20, 21, 23, 24, 26, 27, 35, 39, 54], "noise\ub85c\ubd80\ud130": [3, 37], "\uacfc\uc815\uc5d0": [3, 9, 40], "\uc8fc\uc5b4\uc9c4": [3, 4, 9, 12, 16, 20, 21, 23, 24, 28, 31, 33, 34, 36, 39, 43, 45, 46, 48, 55, 57, 61], "\ubd84\ud3ec\ub85c\ubd80\ud130": 3, "\uc0d8\ud50c\ub9c1\ud558\uc5ec": 3, "volatil": 3, "rkl\uc740": 3, "\uc591\uc758": [3, 6, 10, 18, 21, 26], "epoch": [3, 21, 47, 50, 51, 59], "\ud6c4\uc5d0": [3, 5, 11, 22, 28, 44, 49], "\ubc14\ub85c": [3, 4, 11, 12, 14, 27, 33, 38, 39, 41, 45, 55, 57], "\uc218\ub834\ud558\ub294": [3, 58], "\ubc29\uba74": 3, "is\ub294": 3, "\ubcc0\ub3d9\uc131\uc744": [3, 42], "capacity\uac00": 3, "\uc99d\uac00\ud560\uc218\ub85d": [3, 18, 48, 58], "kl\uacfc": 3, "rkl\uc758": 3, "\uac1c\uc120\ub418\ub294": [3, 28, 53], "\ud655\uc778": [3, 15, 16, 18, 25, 36, 40, 45, 48, 52], "fid\ub098": 3, "is\uac00": [3, 14, 42], "\uadf8\ub798\ud504\uc640": [3, 35], "\ud615\ud0dc\ub97c": [3, 8, 14, 35], "\ub744\ub294\uac83\uc744": 3, "neg": [3, 14, 16, 19, 22, 23, 33, 40, 56], "kl\uacfc\ub294": 3, "colleration\uc744": 3, "\ubcf4\uc774\uc9c0\ub9cc": [3, 16, 22, 40], "rkl\uacfc\ub294": 3, "\ub192\uc9c0": 3, "capacity\uc5d0": 3, "\uc218\uce58": [3, 25], "\ubcc0\ud654\ub294": [3, 38], "\ud06c\uc9c0": [3, 22, 36, 39], "\ub370": [3, 9, 18, 19, 21, 23, 24, 28, 34, 35, 42], "\ubc18\ud574": [3, 25, 27, 37, 54], "\uad49\uc7a5\ud788": [3, 22, 32], "\uc218\uce58\uc758": 3, "\ubcc0\ud654\ub97c": [3, 4, 16, 18, 36, 54], "\ubcf4\uc5ec\uc900\ub2e4": [3, 4, 7, 13, 16, 22, 23, 27, 28, 38, 39, 40, 43, 52, 54], "rank": [3, 7, 36, 56], "coller": 3, "\ubaa8\ub378\uc5d0": [3, 4, 5, 7, 9, 11, 12, 14, 15, 18, 22, 23, 25, 31, 33, 35, 38, 40, 42, 43, 45, 50, 53, 54, 56, 58, 59, 61], "\ubcc4\ub85c": [3, 4, 18, 43], "\uc21c\uc704\ub97c": [3, 43], "\ub9e4\uaca8": 3, "\uc21c\uc704\uc758": 3, "kendal": 3, "s": [3, 5, 6, 8, 9, 12, 13, 16, 20, 21, 22, 28, 32, 34, 36, 39, 40, 41, 43, 44, 46, 50, 51, 52, 53, 54, 58, 61], "\u03c4": 3, "ranking\uc774": 3, "\ub9e4\uaca8\uc9c4": 3, "\uc218\uc5f4": 3, "\uc0ac\uc774\uc758": [3, 6, 9, 12, 14, 21, 22, 23, 27, 31, 32, 34, 54], "scipi": 3, "import": [3, 13, 16, 25, 26, 30], "stat": 3, "h": [3, 4, 5, 10, 14, 20, 26, 27, 29, 30, 31, 34, 44, 45, 48, 49, 56, 59], "z": [3, 4, 5, 9, 10, 14, 15, 16, 17, 20, 21, 26, 27, 28, 29, 30, 34, 35, 40, 46, 48, 49, 51, 54, 55, 56, 57, 58, 60], "kendalltau": 3, "significanceresult": 3, "statist": [3, 47], "9999999999999999": 3, "pvalu": 3, "016666666666666666": 3, "19999999999999998": 3, "8166666666666667": 3, "result": [3, 12, 13, 16, 17, 24, 25, 27, 28, 30, 32, 38, 46, 58, 60], "\uc720\uc0ac\ub3c4\ub294": 3, "889": 3, "kl\uacfc\uc758": 3, "\ube44\uad50\ud574\ubcf4\uba74": [3, 4, 25], "infin": 3, "cleanfid": 3, "96": [3, 42], "\ub098\uba38\uc9c0": [3, 4, 9, 22, 35, 42, 43, 53, 55], "metric\uac04": 3, "\ub0ae\uc74c": [3, 30], "network": [3, 4, 8, 9, 10, 13, 14, 22, 24, 26, 27, 32, 34, 36, 37, 44, 46, 51, 55, 56], "\uae30\ubc18\uc758": [3, 5, 7, 18, 19, 22, 24, 29, 31, 33, 35, 45, 53, 56], "\uc911\uc5d0\uc11c\ub294": [3, 6], "infinity\uc774": 3, "\ub192\uace0": [3, 7, 8, 26, 57], "is\uc640": 3, "\ub300\ubd80\ubd84\uc758": [3, 6, 9, 18, 21, 26, 28, 29, 30, 39, 54, 56], "metric\uc774": [3, 28], "\uce21\uc815\uc744": 3, "v3\ub97c": 3, "\uc0ac\uc6a9\ud558\ub294\ub370": [3, 9, 15, 22, 24, 46, 55], "\uacfc\uc5f0": [3, 12], "\uc801\uc808\ud55c\uac00": 3, "\uac00\uc815": [3, 14, 33], "infinity\ub294": 3, "space\uac00": [3, 16, 26], "distribution\uc744": [3, 11, 29, 36, 40, 41, 48], "\ub530\ub978\ub2e4\ub294": 3, "\uac00\uc815\ud558\uc5d0": 3, "\uce21\uc815\ub418\ub294": 3, "\uc2e4\ud5d8": [3, 4, 6, 12, 14, 18, 20, 24, 26, 29, 36, 42, 43, 45, 46, 49, 51], "10k\uc758": 3, "\uc0dd\uc131\ud558\uace0": [3, 4, 6, 7, 9, 18, 28, 31, 33, 35, 43, 48, 50], "\uc6d0\ubcf8\uc758": 3, "20k\uc758": 3, "network\uc640": [3, 31], "network\ub97c": [3, 4, 5, 14, 26, 41], "model\uc5d0": [3, 4, 9, 16, 22, 23, 26, 28, 29, 30, 36, 37, 43, 54], "fit": [3, 18], "\uc774\ub54c": [3, 4, 5, 6, 9, 22, 26, 27, 33, 36, 37, 44, 46, 47, 48, 50, 51, 53, 54, 55, 56, 58, 59, 60, 61], "\uae30\ubc18\uc73c\ub85c": [3, 7, 9, 12, 15, 17, 18, 19, 20, 21, 24, 27, 28, 29, 33, 34, 35, 36, 37, 40, 46, 47, 50, 54, 55, 56, 58, 60, 61], "\uc0d8\ud50c\uc758": [3, 38, 39, 42], "\ud655\ub960\uac12\uc744": 3, "\uacc4\uc0b0\ud55c\ub2e4": [3, 16, 26, 35, 39, 49], "8": [3, 6, 11, 14, 15, 16, 17, 19, 20, 22, 25, 26, 29, 30, 31, 36, 37, 41, 47, 48, 51, 54, 55, 56, 58, 59], "\uac12\uc774": [3, 6, 7, 11, 13, 25, 29, 32, 33, 36, 40, 47, 48, 49, 52, 54, 58, 60], "\ub0ae\uc740": [3, 4, 5, 13, 14, 18, 19, 22, 24, 25, 29, 35, 36, 38, 39, 44, 45, 48, 50, 56, 57, 61], "tail": 3, "vector\uc758": [3, 39, 41], "\uc6d0\ubcf8": [3, 4, 9, 12, 21, 22, 27, 29, 31, 37, 39, 42, 43, 50, 54, 56, 58, 59, 60], "\ub0ae\uc544\uc57c\ud568": 3, "\ud655\ub960\uc744": [3, 7, 12, 35, 51], "\uac16\ub294": [3, 6, 11, 12, 23, 34], "\ud655\uc778\ud574\ubcf4\uba74": 3, "clip\uc744": [3, 4, 5, 22], "\ud655\uc2e4\ud788": [3, 6, 28], "\ub5a8\uc5b4\uc9c0\ub294": [3, 6, 12, 25, 39, 42, 56], "\ubc18\uba74": [3, 4, 5, 6, 8, 9, 15, 16, 21, 23, 28, 30, 35, 36, 49, 54, 58], "inception\uc758": 3, "\uc774\ubbf8\uc9c0\ub4e4\uc740": [3, 22, 37], "\ubcf4\uc774\uace0": [3, 16, 21, 22, 27, 35, 47, 48, 55], "\uac00\uc815\uc5d0": 3, "\uc704\ubc30": 3, "latent": [3, 4, 5, 6, 8, 9, 11, 12, 18, 19, 21, 22, 24, 26, 28, 32, 33, 35, 36, 37, 38, 40, 41, 45, 46, 50, 53, 56, 58, 59, 60, 61], "represent": [3, 6, 12, 15, 21, 29, 35, 39, 48, 55], "vector\ub4e4\uc744": 3, "dimension\uc5d0": [3, 30], "\ud22c\uc601\uc2dc\ucf1c": 3, "\ub530\ub974\ub294": [3, 19, 31, 37, 59], "\uc9c0": 3, "\ud655\uc778\ud55c\ub2e4": 3, "\ucd94\ucd9c\ud55c\ub2e4": [3, 20], "\uc5f0\uc0b0\uc744": [3, 7, 8, 35, 44], "d\ub85c": 3, "\ud22c\uc601\uc2dc\ud0a8\ub2e4": 3, "value\ub97c": 3, "\uad6c\ud55c\ub2e4": [3, 45, 49], "valu": [3, 11, 13, 20, 26, 30, 34, 58, 61], "\uc5b4\ub5a0\ud55c": [3, 5, 16, 35, 38, 41, 50, 54], "\uc0ac\uac74\uc774": 3, "\uc6b0\uc5f0\ud788": 3, "\uc77c\uc5b4\ub0a0": 3, "\ud655\ub960\uc774": [3, 31, 34], "\uac70\uc758": [3, 4, 12, 16, 23, 25, 28, 29, 35, 36, 47, 54], "\uc5c6\ub2e4": [3, 21, 22, 30, 37, 44, 49], "\uc778\uacfc\uad00\uacc4\uac00": 3, "\ud06c\ub2e4": [3, 39], "distribution\uc740": 3, "random\uc744": 3, "\uae30\ubc18\uc73c\ub85c\ud558\uae30\ub54c\ubb38\uc5d0": 3, "\uc791\uc544\uc57c\ud55c\ub2e4": 3, "\uc989": [3, 4, 6, 9, 11, 12, 14, 16, 18, 19, 20, 21, 22, 23, 27, 28, 36, 37, 43, 45, 46, 47, 48, 49, 52, 54, 59], "value\uac00": 3, "\ucee4\uc57c\ud55c\ub2e4": 3, "dataset\uc5d0": [3, 14, 23, 27, 48, 54], "clip\uc758": [3, 4, 5, 9, 19, 43], "value\uac12\uc740": 3, "05\ub97c": 3, "\ub118\uc5b4": 3, "random\uc131\uc744": 3, "\uc720\uc9c0\ud558\uc9c0\ub9cc": 3, "inception\uc740": 3, "05\ubcf4\ub2e4": 3, "\ubcf4\uc5ec": 3, "\uc720\uc9c0\ud558\uc9c0": [3, 8, 18], "\ubabb\ud55c\ub2e4": [3, 16, 21, 25, 26, 35], "net\uc744": 3, "\uce21\uc815\ubcf4\ub2e4": 3, "your": [4, 40, 44], "iclr": [4, 13, 16, 20, 30, 58, 60], "2307": [4, 9, 24, 38], "04725": 4, "code": [4, 5, 6, 7, 8, 9, 10, 11, 14, 15, 17, 19, 20, 22, 26, 27, 28, 29, 30, 31, 32, 33, 36, 38, 39, 41, 43, 45, 46, 47, 48, 49, 50, 51, 54, 56, 57, 58, 59, 60, 61, 62], "guoyww": 4, "page": [4, 5, 8, 15, 17, 18, 19, 20, 26, 28, 43, 54], "github": [4, 5, 8, 17, 18, 19, 20, 25, 26, 28, 30, 31, 39, 43, 45, 47, 48, 50, 54, 62], "io": [4, 5, 8, 13, 14, 17, 18, 19, 20, 25, 26, 28, 31, 43, 45, 47, 48, 54], "kyeongmin": [4, 20, 26, 36, 39, 62], "yu": [4, 16, 20, 26, 36, 39, 62], "11": [4, 6, 35, 38, 39, 52, 54], "\uc758\uc758": 4, "In": [4, 11, 15, 28, 45], "thi": [4, 12, 13, 14, 17, 20, 30, 34, 36, 43, 50, 62], "we": [4, 12, 28, 34, 43, 50, 55, 59], "present": 4, "framework": [4, 17, 20, 32, 55], "requir": [4, 6, 30], "public": 4, "plug": [4, 5, 17, 27, 56, 59], "plai": [4, 5, 56, 59], "model\uacfc": [4, 9, 10, 15, 16, 19, 23, 25, 26, 29, 54], "dreambooth\ub098": 4, "lora\uc640": [4, 27], "\uac1c\uc778\ud654": [4, 18, 24], "\uae30\uc220\uc774": [4, 9, 23, 42], "\ubc1c\uc804\ud568\uc5d0": [4, 9], "\uc0ac\ub78c\ub4e4\uc740": 4, "\uc801\uc808\ud55c": [4, 37, 39, 45], "\ube44\uc6a9\uc744": [4, 26, 34], "\uc9c0\ubd88\ud558\uc5ec": 4, "\uace0\ud654\uc9c8\uc758": [4, 54], "\uc6d0\ud558\ub294": [4, 8, 9, 10, 11, 12, 16, 21, 26, 28, 29, 32, 34, 37, 40, 41, 42, 47], "\uc788\uac8c": [4, 8, 9, 10, 12, 20, 21, 26, 28, 31, 32, 35, 36, 38, 42, 44, 46, 52], "\ub418\uc5c8\ub2e4": [4, 16, 31, 35, 39, 43], "\uace0\ud654\uc9c8": [4, 26, 31, 36, 48, 49], "\uc6c0\uc9c1\uc784\uc744": [4, 5, 21], "\ucd94\uac00\ud558\uac70\ub098": 4, "\uc560\ub2c8\uba54\uc774\uc158\uc744": [4, 21], "\uc0dd\uc131\ud558\ub3c4\ub85d": [4, 23, 26, 35, 42, 43], "\uc5b4\ub835\ub2e4": [4, 5, 17, 19, 21, 26, 29, 31, 35], "\ucd94\uac00\uc801\uc778": [4, 5, 10, 11, 14, 18, 19, 21, 22, 23, 26, 27, 28, 29, 30, 34, 35, 36, 37, 39, 41, 43, 46, 54, 59], "\uc5c6\uc774": [4, 5, 7, 11, 13, 14, 15, 18, 19, 23, 24, 27, 31, 33, 35, 38, 39, 40, 42, 43, 45, 46, 47, 48, 49, 52, 54, 56, 59], "\ucd94\uac00\ud558\ub294": [4, 9, 16, 18, 19, 21, 30, 31, 35, 42, 56, 59], "\uc2e4\uc6a9\uc801\uc778": 4, "\ud504\ub808\uc784": [4, 21, 31, 44], "\uc6cc\ud06c\ub97c": 4, "\uc81c\uc548\ud558\ub294": [4, 6, 20, 26], "\uc6cc\ud06c\uc758": 4, "\ud575\uc2ec\uc740": [4, 26, 27, 28, 36, 54], "module\uc744": [4, 5], "\ud65c\uc6a9\ud558\ub294": [4, 5, 18, 32, 43, 44], "\ud55c\ubc88": [4, 12, 14, 26, 46, 50, 60], "\uc5b4\ub5a4": [4, 9, 10, 12, 14, 16, 18, 20, 21, 23, 26, 27, 28, 29, 30, 31, 33, 36, 38, 42, 43, 45, 47, 54, 60], "\ubaa8\ub378\uacfc\ub3c4": 4, "\uc735\ud569\ud560": 4, "\uc774\uc6a9\ud558\uba74": [4, 20, 26, 30, 36, 52], "module\uc740": [4, 30], "world": [4, 11, 22, 31, 44], "\ube44\ub514\uc624\ub85c": [4, 21], "\ubd80\ud130": [4, 6, 7, 16, 20, 23, 26, 28, 33, 34, 36, 39, 48, 58], "\ud6a8\uacfc\uc801\uc73c\ub85c": [4, 5, 9, 15, 18, 19, 20, 21, 46, 54], "prior\ub97c": [4, 9], "\ud559\uc2b5\ub41c": [4, 5, 9, 11, 14, 15, 18, 19, 21, 26, 27, 29, 31, 32, 34, 35, 39, 42, 43, 44, 45, 46, 47, 49, 52, 54, 57], "\ub367\ubd99\uc5ec": [4, 20, 26, 36], "\uc560\ub2c8\uba54\uc774\uc158": [4, 21], "\uc0ac\uc6a9\ud560": [4, 7, 10, 17, 20, 21, 22, 23, 26, 27, 29, 30, 31, 33, 35, 36, 42, 44, 54, 55, 56, 60], "animatediff\ub97c": 4, "\uac04\ub2e8\ud55c": [4, 10, 15, 16, 17, 29, 34, 35, 38, 43], "\ud30c\uc778\ud29c\ub2dd": [4, 18, 21, 33, 35, 38, 42], "\ubc29\uc2dd\uc778": [4, 21, 54], "motionlora\ub97c": 4, "\uc0ac\uc804": [4, 5, 7, 9, 12, 14, 18, 21, 26, 31, 35, 39, 43, 46, 50, 54], "module\uc774": 4, "\uc800\ube44\uc6a9\uc73c\ub85c": 4, "\uc6c0\uc9c1\uc784": 4, "\ud328\ud134\uc744": 4, "\ud574\uc900\ub2e4": [4, 7, 49], "ex": [4, 6, 12, 14, 18, 21, 27, 32, 34, 35, 47], "\ucd2c\uc601": [4, 18, 24], "\uae30\ubc95": [4, 11, 27, 30, 45, 46], "animatediff\uc640": 4, "\ubd80\ucc29\ud558\uc5ec": 4, "\uc2e4\ud5d8\ud588\uc73c\uba70": 4, "\ubc29\uc2dd\uc774": [4, 18, 35, 36, 51, 54, 56], "\ud004\ub9ac\ud2f0\uc640": [4, 33, 39], "\ubcf4\uc804\ud558\uba74\uc11c\ub3c4": 4, "\uc790\uc5f0\uc2a4\ub7ec\uc6b4": [4, 8, 28, 34, 42], "\ud074\ub9bd\uc744": 4, "\ubcf4\uc600\ub2e4": [4, 5, 21, 25, 31, 36, 38, 39, 40, 54], "pipelin": [4, 5, 28, 49, 52, 56], "core": [4, 33], "same": [4, 12, 28, 30], "sd1": 4, "can": [4, 20, 36, 50, 55], "download": 4, "finetun": [4, 5, 7, 9, 11, 17, 18, 27], "civitai": 4, "hug": [4, 26], "lora\uae30\ubc18": 4, "adapter\ub97c": [4, 21, 26, 28], "\ub354\ud574": [4, 14], "video": [4, 6, 12, 17, 32, 46, 62], "\ud559\uc2b5\ud560\ub54c": 4, "\ubc1c\uc0dd\ud560\uc218": 4, "gap\uc744": 4, "\uc904\uc600\ub2e4": [4, 7], "\uc5ec\uae30\uc11c": [4, 6, 10, 14, 19, 20, 24, 26, 29, 32, 36, 39, 41, 42, 43, 46, 54, 56, 57, 58, 60], "\ub9d0\ud558\ub294": [4, 26, 41], "gap\uc774\ub780": 4, "video\uc758": [4, 17, 31], "\ud504\ub808\uc784\uc744": [4, 21, 31, 44], "\ub098\ub204\uc5b4": [4, 24, 39], "\ubd24\uc744\ub54c": 4, "blur": [4, 5, 38], "compress": [4, 59], "watermarks\ub4f1\uc744": 4, "\ub9d0\ud55c\ub2e4": [4, 20, 26, 39], "strategi": [4, 18, 34], "transfer": [4, 6, 30, 32, 40, 43, 47, 50], "videothrough": 4, "propos": [4, 8, 28], "\ub098\uba74": [4, 14, 24, 26, 28], "t2i\ubaa8\ub378\uacfc": 4, "\uacb0\ud569\ud574": [4, 7], "generator\ub85c": 4, "pre": [4, 6, 24, 27, 30, 37, 40, 43, 44, 47, 50, 54, 55, 56, 59], "\ud504\ub86c\ud504\ud2b8\ub97c": [4, 7, 9, 18, 24, 28, 31, 33, 35, 38, 42], "\uc785\ub825\ud558\uc5ec": [4, 50], "\ub514\ud4e8\uc804": [4, 20, 21, 26, 31, 35, 42], "\ubc1c\uc804\uc73c\ub85c": 4, "\uc608\uc220\uac00\uc640": 4, "\uc544\ub9c8\ucd94\uc5b4\ub4e4\uc774": 4, "\uc2dc\uac01": 4, "\ucee8\ud150\uce20\ub97c": 4, "\ubcf4\ub2e4": [4, 6, 13, 14, 15, 18, 25, 26, 27, 30, 32, 33, 35, 36, 39, 42, 43, 44, 45, 46, 47, 48, 49, 50, 52, 58, 59], "\uc0dd\uc131\ub2a5\ub825": [4, 26], "creativ": [4, 44], "\uc790\uadf9\ud558\uae30": 4, "dreambooth\uc640": [4, 24, 26], "\uac00\ubcbc\uc6b4": [4, 24, 26], "\ubc29\uc2dd\ub4e4\uc774": 4, "\uc81c\uc548\ub418\uc5c8\ub2e4": [4, 54], "\ub370\uc774\ud130\uc14b\uacfc": [4, 35, 54], "\uc801\ub2f9\ud55c": [4, 36, 39], "\ud558\ub4dc\uc6e8\uc5b4\uc5d0\uc11c\ub3c4": 4, "custom": [4, 17, 21, 26, 27], "finetuning\uc744": [4, 11, 24], "\ud560": [4, 6, 7, 8, 9, 10, 12, 14, 16, 20, 21, 24, 26, 27, 28, 29, 31, 32, 33, 36, 38, 39, 40, 41, 42, 43, 44, 45, 46, 48, 49, 50, 52, 54, 55, 56, 58, 59], "\uadf8\ub85c\uc778\ud574": 4, "\uc0ac\uc6a9\uc790\ub4e4\uc774": 4, "\ube44\uc6a9\uc73c\ub85c\ub3c4": 4, "domain\uc5d0": [4, 19], "\uc801\uc6a9\ud558\uac70\ub098": 4, "\uc2dc\uac01\uc801": [4, 5, 12, 15, 21, 33, 36, 38, 43, 56], "\ub192\uc77c": [4, 21, 24, 27, 29, 43], "ai": [4, 12, 24, 38, 47, 61], "\uc544\ud2f0\uc2a4\ud2b8\uc640": 4, "\uc544\ub9c8\ucd94\uc5b4": 4, "\ucee4\ubba4\ub2c8\ud2f0": 4, "\uc0c1\ub2f9\ub7c9\uc758": 4, "models\uc744": [4, 26], "civitai\ub098": 4, "face\uc640": 4, "\ud50c\ub7ab\ud3fc\uc5d0": 4, "\uac1c\uc2dc\ud588\ub2e4": 4, "\ubaa8\ub378\ub4e4\uc774": [4, 5, 11, 12, 22, 41, 42, 50, 56, 59], "\uc218\uc900\uc758": [4, 8, 21, 28, 57], "\uc815\uc801\uc778": 4, "\uc601\ud654\ub098": 4, "\uce74\ud230\uacfc": [4, 21], "\uc2e4\uc0b0\uc5c5\uc5d0\uc11c": 4, "\uc694\uad6c\ub41c\ub2e4": 4, "\uc5f0\uad6c\uc5d0\uc11c\ub294": [4, 9, 18, 23, 24, 26, 27, 35], "\uace7\ubc14\ub85c": [4, 26, 39], "\ubcc0\ud658\ud558\ub294": [4, 21, 24, 29, 32, 34, 35, 47], "\ud30c\uc778": [4, 21, 31, 42, 44], "\ud29c\ub2dd\uc744": [4, 21, 49], "\uc218\uc9d1\uacfc": 4, "\ucef4\ud4e8\ud305": [4, 21, 31], "\uc790\uc6d0\uc758": [4, 29], "\ud544\uc694\ub294": [4, 57], "\uc0ac\uc6a9\uc790\uc5d0\uac8c": [4, 8, 43], "\uac78\ub9bc\ub3cc\uc774": 4, "\ub41c\ub2e4": [4, 6, 13, 16, 20, 21, 26, 27, 28, 31, 33, 34, 35, 36, 39, 45, 52, 54], "\uc81c\uc548\ud558\ub294\ub370": 4, "\ub2a5\ub825\uc744": [4, 6, 7, 9, 10, 12, 24, 28, 35, 42, 43, 44, 45], "\ubcf4\uc804\ud558\uba74\uc11c": 4, "\ubb38\uc81c\ub97c": [4, 9, 11, 16, 18, 21, 28, 30, 32, 34, 35, 38, 39, 43, 46, 52, 57, 61], "\ud574\uacb0\ud560": [4, 35], "\ud6a8\uacfc\uc801\uc778": [4, 5, 18, 19, 20, 21, 26, 31, 33, 38], "\ud30c\uc774\ud504\ub77c\uc778\uc774\ub2e4": 4, "animatediff\uc758": 4, "\ube44\ub514\uc624": [4, 5, 17, 18, 21, 31, 39, 44], "\ub370\uc774\ud130\uc14b": [4, 6, 12, 18, 19, 21, 30, 31, 35, 39, 44, 45, 46, 48, 52, 53, 56, 58, 59], "webvid": [4, 17, 31, 44], "10m": [4, 17, 31, 44], "\uc73c\ub85c\ubd80\ud130": [4, 12, 32, 47], "\ud0c0\ub2f9\ud55c": 4, "\ud559\uc2b5\ud558\ub294": [4, 10, 14, 15, 21, 25, 26, 27, 29, 31, 34, 36, 37, 39, 41, 46, 47, 51, 54, 55, 56, 58, 60], "module\uc758": [4, 17, 30], "\ud559\uc2b5\uc740": [4, 19, 20, 29, 32, 33], "\uc138\uac00\uc9c0": [4, 58], "\ub2e8\uacc4\ub85c": [4, 7, 9, 21, 29, 35, 39, 55], "\uad6c\uc131\ub41c\ub2e4": [4, 21, 44], "visual": [4, 5, 16, 18, 32, 33, 43, 52, 58, 59], "target": [4, 5, 8, 11, 12, 14, 16, 17, 36, 39, 40, 43, 46, 47, 50, 56, 58], "dataset": [4, 5, 6, 16, 18, 19, 22, 25, 29, 32, 33, 43, 44, 45, 48, 49, 51, 52, 56, 57, 58], "\ud488\uc9c8\ucc28\uc774": 4, "\ub3d9\uc601\uc0c1": [4, 21], "\uc6cc\ud130\ub9c8\ud06c": [4, 31], "\uc555\ucd95\uc73c\ub85c": 4, "\uc778\ud55c": [4, 35], "\ubd80\ubd84\uc740": [4, 10, 22, 23, 24, 28, 29, 41, 42, 59], "\ubaa8\ub4c8\uc774": 4, "\ud559\uc2b5\ud568\uc73c\ub85c\uc368": [4, 31, 40, 43, 46, 51], "\uc774\ud6c4": [4, 5, 6, 9, 15, 17, 20, 21, 23, 24, 26, 28, 29, 30, 32, 33, 39, 43], "motion\uad00\ub828": 4, "\ubaa8\ub4c8\ub4e4\uc774": 4, "motion\uc5d0\ub9cc": 4, "\uc9d1\uc911\ud560": [4, 29], "\uc788\ub3c4\ub85d": [4, 7, 16, 24, 26, 28, 29, 32, 33, 35, 36, 38, 39, 43, 44, 46, 47, 53, 54, 55, 60, 61], "\ube44\ub514\uc624\ub97c": [4, 5, 17, 18, 21, 31, 44], "\uc785\ub825\ubc1b\uc744\uc218": 4, "inflate\uc2dc\ud0a8": 4, "\ub354\ud55c": [4, 23, 28, 39], "\ubaa8\uc158": [4, 21, 31], "\ubaa8\ub378\ub9c1\uc744": [4, 9, 31, 48], "\ubaa8\ub4c8\uc744": [4, 21, 61], "\ucd94\uac00\ud55c\ub2e4": [4, 21, 31, 35, 38, 39, 44], "\ud559\uc2b5\ud560\ub54c\ub294": [4, 39], "adapter\uc640": [4, 26], "freeze\ud55c\ub2e4": 4, "\uc774\ub807\uac8c": [4, 6, 10, 20, 24, 32, 42, 43, 45, 51], "\ud558\uba74": [4, 6, 10, 12, 20, 22, 27, 29, 36, 42, 45, 47], "\uc6c0\uc9c1\uc784\uc5d0": 4, "\ubd80\ubd84\uc744": [4, 10, 16, 22, 23, 28, 29, 32, 34, 38, 42, 46, 47, 50, 53, 55, 56, 58, 59, 60, 61], "\uc804\ubc18\uc801\uc73c\ub85c": [4, 29, 42], "\ubaa8\ub4c8\ubcc4": 4, "\ud559\uc2b5\uc774": [4, 5, 6, 11, 14, 22, 33, 40, 44, 47, 51, 57, 59], "\uac00\ub2a5\ud574\uc9c4\ub2e4": [4, 26], "\uc6d0\ud560\uacbd\uc6b0": 4, "\ubc14\uafb8\uba74": 4, "\ub428": [4, 6, 8, 19, 27, 29, 32, 42, 45, 48], "option": [4, 32, 36, 44], "motionlora\uc758": 4, "motion\uc744": [4, 5], "videos\uc640": 4, "\ud559\uc2b5\ud69f\uc218\ub85c": 4, "\ubaa9\ud45c\ub85c\ud558\ub294": 4, "\ubaa8\ub4c8\uc774\ub2e4": 4, "\uc774\ub984\uacfc": 4, "hu": 4, "et": [4, 8, 9, 16, 23, 25, 35, 39, 46, 54, 58], "al": [4, 8, 9, 16, 23, 25, 35, 39, 46, 54, 58], "2021": [4, 13, 16, 25, 30, 35, 46, 48, 49, 58], "\uc774\uc6a9\ud558\ub294\ub370": 4, "pattern\uc744": 4, "\uc801\uc740\uc218": 4, "50\uac1c": 4, "video\ub9cc\uc73c\ub85c": 4, "\ud559\uc2b5\uc2dc\ud0ac\uc218": 4, "\ucc28\uc9c0\ud558\ub294": [4, 32], "\uba54\ubaa8\ub9ac\ub3c4": 4, "\uc801\uc5b4": [4, 8, 27, 33], "\ucd94\uac00\ud559\uc2b5\uc774\ub098": 4, "\uacf5\uc720": [4, 19, 21, 22], "\ubc30\ud3ec\ud558\ub294\ub370\uc5d0\ub3c4": 4, "\uc720\ub9ac\ud558\ub2e4": 4, "glide": [4, 12, 53, 54], "nichol": [4, 16, 35, 39, 58], "\ub294": [4, 5, 6, 7, 9, 10, 12, 14, 15, 16, 17, 18, 19, 20, 21, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 53, 54, 55, 56, 57, 58, 59, 60, 61], "condition\uc744": [4, 8, 10, 11, 20, 26, 27, 38, 54], "\uc18c\uac1c\ud558\uace0": [4, 55, 59], "guidance\ub97c": [4, 7, 17, 20, 21, 23, 26, 28, 39, 49, 52], "\uc870\uc808\ud558\uc5ec": [4, 42], "\uacb0\uacfc\ubb3c\uc744": [4, 8, 18, 39, 40], "\uc5bb\ub294": [4, 20, 28, 34, 47, 52], "\uc124\uba85\ud588\ub2e4": 4, "uid": 4, "l": [4, 5, 9, 11, 16, 18, 19, 20, 21, 22, 23, 24, 27, 29, 34, 35, 45, 47, 54, 57, 59, 60, 61], "anguag": 4, "i": [4, 6, 8, 11, 14, 16, 18, 19, 20, 21, 22, 23, 26, 27, 28, 29, 30, 34, 36, 37, 38, 39, 41, 44, 46, 48, 50, 51, 52, 54, 55, 56, 57, 58, 59, 60, 61], "mage": 4, "iffus": 4, "dite": 4, "dall": [4, 7, 18, 22, 23, 24, 26, 33, 35, 50, 52, 53, 54], "e2": [4, 24, 26, 50, 53, 54], "ramesh": [4, 9], "2022": [4, 12, 16, 19, 23, 29, 33, 46, 47, 52, 58], "\uc774\uc6a9\ud558\uc5ec": [4, 8, 16, 20, 23, 26, 27, 31, 32, 35, 39, 42, 48], "\uc77c\uad00\uc131\uc744": [4, 5, 18, 19, 21, 31, 39, 47, 54, 61], "\ud5a5\uc0c1\uc2dc\ucf30\ub2e4": [4, 26], "imagen": [4, 12, 18, 19, 24, 31, 33, 45, 50, 54, 55], "saharia": 4, "\uc740": [4, 6, 7, 11, 12, 14, 17, 18, 19, 20, 21, 22, 26, 27, 32, 33, 35, 38, 39, 41, 43, 44, 45, 46, 47, 49, 50, 51, 52, 54, 57, 58, 59, 60, 61], "llm\uacfc": [4, 28], "cascad": [4, 26, 44, 53], "photorealistic\ud55c": [4, 23, 36], "\uc5bb\uace0\uc790": 4, "\ud588\ub2e4": [4, 7, 26, 35, 36, 38, 39, 44, 45, 54], "rombach": 4, "encoder\uc758": [4, 21, 26, 36, 38, 39, 42, 43], "\uacfc\uc815\uc744": [4, 9, 13, 14, 18, 21, 23, 24, 33, 35, 37, 38, 43, 44, 45, 58, 60, 61], "\uc218\ud589\ud568\uc73c\ub85c\uc368": [4, 6], "\ud6a8\uc728\uc744": 4, "\ub192\uc600\ub2e4": [4, 35], "\ucc38\uace0": [4, 6, 7, 12, 15, 18, 26, 27, 31, 39, 42, 45], "ediff": [4, 26, 38, 55], "balaji": 4, "\ubaa8\ub378\ub4e4\uc744": [4, 11, 18, 53], "\uc559\uc0c1\ube14": 4, "\ud559\uc2b5\uc2dc\ucf1c": [4, 14, 29, 30, 39], "denois": [4, 5, 8, 9, 13, 19, 20, 24, 26, 28, 30, 36, 37, 38, 42, 44, 46, 54, 57, 58, 59], "\ub2e8\uacc4\ubcc4\ub85c": 4, "denoise\ub97c": [4, 38], "\uc218\ud589\ud558\uace0\uc790": 4, "contrast": [4, 22, 23, 26, 39, 40, 48, 52], "\ud65c\uc6a9\ud558\uae30": [4, 24], "\ud6a8\uc728\uc801\uc778": [4, 18, 20, 24, 29, 54], "\ubc29\ubc95\uc5d0": [4, 16, 19, 23, 29, 39, 44], "\uc5f0\uad6c\uac00": [4, 5, 7, 23, 24, 39, 46, 57], "\ub728\uac81\ub2e4": 4, "\uac1c\uc778\ud654\ub780": 4, "images\ub97c": [4, 8, 16], "concepts\ub098": 4, "style\uc744": [4, 22, 26, 40, 43, 54], "\uc190\uc744": 4, "\uc798\uadf8\ub9ac\ub294": 4, "\ub208\uc744": 4, "\uc2dc\ud0a4\ub294": [4, 21, 22, 31, 32, 35, 57], "\ub192\uc544\uc9c8": 4, "\ud559\uc2b5\ub370\uc774\ud130\ub97c": 4, "\uc78a\ub294": 4, "catastroph": 4, "forgetting\uc774": 4, "\ubb38\uc81c\uac00": [4, 5, 16, 18, 24, 36, 42, 44, 47, 51, 57], "\ubb38\uc81c\ub294": [4, 12, 18, 36], "\ub370\uc774\ud130\uac00": [4, 10, 14, 21, 31, 35, 38, 41, 42, 43, 51, 60], "\uc801\uc744\ub54c": 4, "\ubc1c\uc0dd\ud55c\ub2e4": [4, 16, 21, 28, 35, 38], "ruiz": 4, "\uc0ac\uc6a9\ud558\uba74\uc11c\ub3c4": [4, 15], "\ucd94\uac00\ud558\uc5ec": [4, 16, 19, 23, 26, 35, 39, 55, 61], "\uc2dc\ucf30\ub2e4": [4, 7, 38], "textur": [4, 8, 21, 26, 32, 36, 55, 59, 61], "invers": [4, 11, 12, 18, 19, 26, 37, 50], "gal": 4, "concept": [4, 32, 33, 43, 52], "token": [4, 12, 15, 22, 26, 28, 29, 30, 31, 39, 43, 48, 50, 56, 59, 61], "embedding\uc744": [4, 16, 20, 26, 31, 38, 43, 49], "\ud558\uc600\ub2e4": [4, 16, 21, 28, 31, 35, 42], "\uc218\ud589\ud588\ub2e4": [4, 20], "sec": [4, 12, 36], "\uadf8\uc678\uc758": 4, "encod": [4, 5, 6, 7, 12, 15, 17, 18, 20, 22, 23, 26, 31, 48, 50, 51, 52, 53, 55, 56, 59, 60, 61], "approach": [4, 16, 31, 36, 60, 62], "jia": 4, "t2i\ub97c": 4, "\uc560\ub2c8\uba54\uc774\uc158\ud654": [4, 31], "\ub9ce\uc9c0\ub294": 4, "\uc54a\uc9c0\ub9cc": [4, 32, 35, 50, 54], "\uc544\ub798\uc758": [4, 6, 8, 10, 12, 18, 21, 38, 39, 47, 52], "\uc5f0\uad6c\ub4e4\uacfc": [4, 18], "\uad00\ub828\uc788\ub2e4": 4, "text2cinemagraph": 4, "mahapatra": 4, "flow": [4, 21, 26, 27, 29, 36, 46, 54], "prediction\uc744": [4, 26, 27, 39, 54], "cinematography\ub97c": 4, "\uc0dd\uc131\ud558\uace0\uc790": [4, 9, 14, 26, 35, 37, 41, 51, 60], "align": [4, 7, 11, 14, 21, 26, 29, 33, 36, 42, 43, 44, 52, 53, 56, 57, 59], "blattmann": 4, "generator\ub0b4\uc758": 4, "frozen": [4, 16, 19, 22, 26, 42, 52], "layers\uac00": 4, "personalizing\uc774": 4, "\uac00\ub2a5\ud568\uc744": [4, 5, 27], "\ud655\uc778\ud588\ub2e4": [4, 39], "\ube44\uad50\uc2dc": 4, "wu": 4, "\ub2e8\uc77c": [4, 9, 18, 21, 23, 28, 34, 35, 36, 39, 43, 46, 56, 61], "\ud30c\ub77c\ubbf8\ud130\ub9cc": [4, 59], "\ud30c\uc778\ud29c\ub2dd\ud558\ub294": [4, 21], "\uc81c\uc548\ud588\ub2e4": [4, 5, 39, 40, 54], "tempor": [4, 5, 17, 31], "attn": [4, 14, 22, 40], "\uac00\uc9c0\uace0": [4, 6, 7, 12, 16, 18, 20, 23, 24, 28, 29, 32, 34, 35, 38, 41, 44, 46, 48, 51, 54, 57, 58, 59, 60, 61], "text2video": 4, "zero": [4, 5, 7, 12, 15, 19, 20, 23, 30, 31, 33, 34, 36, 42, 48, 52, 55, 56, 61], "khachatryan": 4, "\uc0ac\uc804\ud559\uc2b5\ud55c": [4, 26, 39], "t2i\ubaa8\ub378\uc744": 4, "\ud559\uc2b5\uacfc\uc815": 4, "\uc0ac\uc804\uc5d0": [4, 24, 29, 46, 50, 51, 55, 61], "\uc815\uc758\ub41c": [4, 9, 34, 43, 56, 58], "affin": [4, 22], "matrix\ub97c": 4, "wrapping\uc744": 4, "\ubc29\uc2dd\uc774\ub2e4": [4, 26], "\uad00\ub828": [4, 19, 24, 31, 35, 41], "\uc5f0\uad6c\ub4e4\uc774": [4, 5, 11, 25, 26, 32, 35, 40, 46], "\ub9ce\ub2e4": [4, 13, 21, 39], "esser": 4, "zhou": 4, "2022a": 4, "singer": 4, "ho": [4, 23, 25, 35, 46], "2022b": 4, "ruan": 4, "luo": 4, "yin": 4, "2023b": 4, "wang": [4, 39], "hong": 4, "us": [4, 5, 9, 11, 12, 13, 19, 34, 43, 44, 47, 48, 52, 55, 56], "our": [4, 8, 21, 34, 42, 43, 48, 62], "open": [4, 17, 22, 26, 28, 53], "sourc": [4, 5, 6, 14, 16, 17, 18, 19, 26, 28, 32, 34, 39, 40, 41, 43, 47, 48, 53, 56], "well": [4, 26], "develop": 4, "commun": 4, "mani": 4, "qualiti": [4, 5, 8, 10, 13, 20, 29, 32, 33, 40, 44, 46, 48, 49, 51, 52, 54, 55, 57, 60, 61], "eval": [4, 12, 13, 31], "mathcal": [4, 5, 7, 8, 10, 14, 16, 18, 19, 20, 21, 23, 26, 29, 30, 34, 36, 39, 47, 54, 57, 60], "decod": [4, 5, 12, 29, 31, 44, 48, 50, 56, 59, 60], "space\uc0c1\uc5d0\uc11c": [4, 26], "\uc218\ud589": [4, 5, 6, 12, 17, 18, 21, 29, 32, 33, 34, 36, 43, 48, 51, 54], "\uc778\ucf54\ub529\ub41c": [4, 21, 33, 44], "z_0": [4, 28, 54, 59], "x_0": [4, 6, 14, 16, 21, 23, 26, 27, 35, 36, 39, 54, 57], "forward": [4, 6, 10, 16, 18, 19, 22, 23, 25, 26, 27, 30, 31, 34, 36, 37, 41, 47, 50, 51, 56, 58, 59, 60], "z_t": [4, 5, 12, 16, 19, 28, 29, 54, 58, 59], "\ubcc0\ud658\ub428": 4, "t": [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 15, 16, 18, 19, 20, 21, 22, 23, 25, 26, 27, 28, 29, 31, 33, 34, 35, 36, 39, 44, 45, 46, 48, 49, 50, 54, 55, 57, 58, 59, 61], "sqrt": [4, 6, 14, 16, 23, 26, 29, 39, 46, 54, 59], "bar": [4, 39, 54, 59], "alpha_t": [4, 16, 18, 19, 23, 26, 36, 39, 54, 57, 58], "alpha": [4, 15, 19, 20, 24, 27, 30, 34, 35, 39, 54, 59], "epsilon": [4, 6, 8, 9, 14, 16, 18, 20, 21, 23, 24, 26, 27, 29, 35, 36, 39, 45, 46, 49, 50, 54, 58, 59, 61], "sim": [4, 6, 8, 9, 14, 18, 19, 26, 29, 32, 34, 36, 39, 44, 46, 51, 54, 57, 58, 59], "tag": [4, 8, 20, 26, 27, 36, 39, 54], "defin": [4, 16, 30, 36], "bar\u03b1_t": 4, "determin": [4, 14], "nois": [4, 5, 6, 8, 11, 13, 14, 15, 18, 19, 20, 24, 25, 26, 27, 34, 36, 37, 38, 39, 41, 42, 44, 45, 46, 49, 50, 51, 54, 56, 57, 58, 59, 60], "strength": [4, 16, 17, 36], "The": [4, 12, 17, 32, 41, 57], "\u03f5_\u03b8": 4, "predict": [4, 8, 13, 14, 16, 24, 27, 34, 36, 50, 52, 54, 58, 59, 61], "ad": [4, 10, 34, 36, 39, 59], "encourag": [4, 50], "an": [4, 8, 10, 12, 18, 19, 20, 26, 28, 34, 39, 41, 43, 55, 60], "bbb": [4, 20, 26, 36], "e_": [4, 9, 20, 26, 36, 39], "big": [4, 20, 25, 36, 39], "epsilon_": [4, 5, 6, 8, 9, 14, 19, 20, 21, 26, 29, 36, 39, 49, 54, 59], "theta": [4, 5, 6, 8, 9, 10, 14, 16, 18, 19, 20, 21, 23, 24, 26, 27, 29, 30, 34, 36, 39, 46, 48, 49, 50, 51, 54, 55, 56, 57, 58, 59, 60], "tau_": [4, 27, 29], "_2": [4, 18, 34, 36, 59], "correspond": [4, 18, 20, 32, 61], "\u03c4_\u03b8": 4, "map": [4, 5, 6, 10, 12, 14, 18, 20, 21, 22, 26, 27, 29, 32, 33, 34, 36, 39, 40, 46, 47, 55, 56, 58, 59, 60, 61], "implement": [4, 14, 18, 30, 40, 45, 50, 51, 54, 60], "unet": [4, 5, 13, 14, 26, 38, 45, 50, 56, 59, 61], "down": [4, 6, 14, 22, 59], "middl": [4, 5, 6, 32, 41, 53, 59], "block": [4, 5, 20, 22, 47, 49, 51, 56, 59], "resnet": [4, 14, 15, 42, 45, 49, 59], "spatial": [4, 5, 15, 16, 21, 26, 28, 29, 31, 33, 39, 44, 61], "self": [4, 5, 7, 10, 13, 14, 15, 22, 24, 27, 30, 31, 32, 33, 41, 46, 51, 52, 54, 56, 59, 60, 61], "cross": [4, 5, 11, 15, 17, 21, 22, 24, 26, 28, 32, 33, 38, 39, 40, 50, 52, 59, 61], "which": [4, 12, 43, 52], "help": 4, "understand": [4, 28, 31, 42, 48, 52], "model\uc5d0\uc11c": [4, 16, 19, 26, 28, 29, 54], "\ucc98\uc74c": [4, 23, 41], "\ub4f1\uc7a5\ud55c": 4, "\uac1c\ub150\uc73c\ub85c": 4, "\uac70\ub300": [4, 26, 45], "\uc218\ud589\ud558\uae30": [4, 21, 45, 56], "\uc81c\uc548\ub41c": [4, 19, 21, 38, 39], "\uac1c\ub150\uc774\ub2e4": 4, "lora\ub294": [4, 24, 27, 30], "\ud30c\ub77c\ubbf8\ud130\ub97c": [4, 7, 15, 24, 27, 30, 34, 36, 38, 42], "tuning\ud558\uc9c0": [4, 16, 26], "\uc54a\uace0": [4, 5, 14, 15, 16, 18, 21, 26, 28, 30, 31, 32, 34, 39, 46, 51, 53, 56, 58, 60], "decomposit": [4, 27, 30, 31], "\uc30d\uc744": [4, 9, 21, 31, 35, 46, 47, 57], "\uc0c8\ub86d\uac8c": [4, 12, 16, 43, 56], "\ucd94\uac00\ub41c": [4, 8, 9, 10, 14, 16, 26, 35, 39, 54, 56], "weight\ub9cc": [4, 17, 27, 30], "\ud30c\ub77c\ubbf8\ud130\ub294": [4, 7, 27, 59], "\uace0\uc815\ud568\uc73c\ub85c\uc368": 4, "finetuning\uc2dc": [4, 39], "forget": 4, "kirkpatrick": 4, "2017": [4, 20, 47], "\uc608\ubc29\ud560": 4, "weight": [4, 5, 7, 8, 11, 13, 15, 16, 19, 20, 22, 24, 26, 27, 30, 33, 34, 36, 39, 42, 53, 56, 58, 59, 61], "vartriangl": [4, 39], "m": [4, 7, 8, 12, 13, 14, 18, 20, 21, 22, 28, 29, 40], "b": [4, 5, 6, 7, 8, 10, 14, 16, 18, 20, 23, 24, 27, 28, 30, 31, 34, 35, 41, 45, 47, 48, 52, 60, 61], "ar": [4, 12, 13, 14, 30, 36, 38, 59], "pair": [4, 12, 26, 30, 33, 35, 37, 39, 44, 52, 56, 57, 59], "matric": [4, 59], "hyper": [4, 13, 22, 25, 30, 40], "paramet": [4, 6, 10, 13, 14, 18, 20, 24, 29, 30, 32, 34, 36, 39, 40, 46, 48, 50, 52, 54, 55, 57, 58, 59], "layer": [4, 5, 7, 8, 10, 13, 14, 15, 16, 22, 26, 30, 32, 33, 34, 35, 41, 44, 45, 47, 51, 52, 55, 56, 59, 61], "attent": [4, 5, 6, 11, 14, 15, 17, 21, 22, 23, 24, 26, 28, 29, 30, 32, 33, 38, 40, 44, 48, 49, 52, 59], "layer\uc5d0\uc11c\ub9cc": 4, "\uc0ac\uc6a9\ud560\uc218": 4, "\uc544\ub2c8\uc9c0\ub9cc": [4, 35], "\uc2e4\uc81c\ub85c\ub294": [4, 11, 13, 21], "layer\uc5d0\uc11c": [4, 5, 22, 26], "\uc0ac\uc6a9\ub41c\ub2e4": [4, 7, 14, 20, 21, 27, 38, 39], "lora\ub97c": [4, 27, 30, 36], "tuning\uc2dc": 4, "cost": [4, 44, 46, 54, 55, 58], "storag": [4, 30, 59], "\uc808\uc57d\ud560": [4, 11], "architectur": [4, 8, 12, 13, 22, 24, 26, 31, 32, 34, 36, 41, 43, 46, 51, 52, 53, 56, 57, 60], "overal": [4, 10, 12, 22, 28, 52, 56, 57], "\uc67c\ucabd": [4, 10, 18, 21, 24, 31, 38, 39, 42], "\uadf8\ub9bc\uc758": [4, 9, 10, 12, 16, 55], "\ud558\ub298\uc0c9": 4, "\uc774\uace0": [4, 6, 12, 16, 20, 26, 51, 54, 57, 59, 60, 61], "\ucd08\ub85d\uc0c9": 4, "\uc601\uc5ed\uc774": [4, 34], "\uc774\ub2e4": [4, 6, 14, 16, 19, 20, 30, 33, 36, 39, 45, 49, 54], "t2i\ubaa8\ub378\uc5d0": 4, "\uc0bd\uc785\ud558\uc5ec": 4, "animatediff\uc5d0\ub294": 4, "\ud559\uc2b5\ud574\uc57c": [4, 56], "3\uac1c\uc758": [4, 20, 31], "\ubaa8\ub4c8": [4, 17, 21, 31], "data\uc640": 4, "data\uac04\uc758": 4, "\uac04\uadf9\uc744": 4, "\uc904\uc5ec\uc8fc\uae30": 4, "\ud559\uc2b5\uacfc\uc815\uc5d0\ub9cc": 4, "\ud559\uc2b5\ud558\uae30": [4, 6, 9, 12, 15, 21, 27, 36, 39], "\ud328\ud134": [4, 21], "\uc6cc\ud06c": 4, "\uc870\uc815\ud558\uae30": 4, "\uc704\ud55c\uac83": 4, "\ubaa8\ub4c8\uc740": 4, "\ub530\ub85c\ub530\ub85c": 4, "\ud559\uc2b5\uc2dc\ud0a4\uba70": [4, 26], "\uac01\uac01\uc744": 4, "\ud559\uc2b5\uc2dc\ud0ac\ub54c": 4, "\uc601\uc5ed\uc740": [4, 20, 57], "freez": [4, 11, 17, 24, 27, 30, 32, 33, 44, 50, 52, 57], "\uc2dc\ud0a8\ub2e4": [4, 16], "\ud559\uc2b5\uc2dc": [4, 18, 26], "object": [4, 5, 6, 8, 12, 13, 19, 28, 32, 33, 34, 35, 36, 39, 43, 45, 46, 48, 50, 53, 55, 56, 57], "sd\uacfc": 4, "\uac19\ub2e4": [4, 6, 16, 23, 26, 27, 28, 29, 32, 35, 36, 39, 43, 49, 52, 54], "\ub370\uc774\ud130\uc14b\uc740": [4, 23, 45, 52, 60, 61], "\uc218\uc9d1\ud558\uae30": 4, "bain": 4, "laion": [4, 17, 26, 29, 31, 40, 45, 54, 59], "aestet": 4, "schuhmann": 4, "\ud488\uc9c8\ucc28\uc774\ub3c4": 4, "\ud07c\uc744": 4, "\uc54c": [4, 6, 9, 16, 23, 28, 36, 39, 42, 52, 54, 57], "\uac1c\ubcc4": [4, 20, 21, 28, 39, 44, 47], "\ub2e4\ub8e8\uac8c": 4, "\ub418\uba74": [4, 6, 26, 32, 44, 57, 61], "watermark\ub4f1\uc744": 4, "\ud3ec\ud568\ud558\uace0": [4, 7, 39, 44, 45], "\ud6c8\ub828\ud560": 4, "\uc14b\uc758": [4, 38], "\ud488\uc9c8\uc740": [4, 26, 35], "\ubb34\uc2dc\ud560": [4, 6], "\uc5c6\uc744": [4, 8, 41, 52, 53, 61], "\ub9cc\ud07c\uc758": [4, 6, 42], "\ucc28\uc774\uac00": [4, 14, 16, 25, 27, 31, 36, 39, 42, 43, 54], "\uc9c1\uc811\uc801\uc73c\ub85c": [4, 28, 39, 48], "\ub370\uc774\ud130\uc14b\uc744": [4, 21, 23, 24, 31, 35, 39, 41, 45, 57, 61], "\uc560\ub2c8\uba54\uc774\uc158\uc758": [4, 21], "\uc81c\ud55c": [4, 19, 34, 35], "\ub420": [4, 6, 21, 33, 41, 42, 43, 44, 57, 58], "\ud488\uc9c8\ub85c": 4, "\uc778\ud574": [4, 6, 17, 18, 24, 26, 29, 33, 35, 38, 44, 47, 52], "\ud53c\ud558\uace0": 4, "t2i\uc758": 4, "\uc9c0\uc2dd\uc744": [4, 31], "\ubcf4\uc804\ud558\uae30": 4, "\ub124\ud2b8\uc6cc\ud06c\ub97c": [4, 9, 18, 21, 24, 26, 31, 34, 55], "\ubd84\ub9ac\ud558\uc5ec": [4, 20, 26, 36], "\ub3c4\uba54\uc778": [4, 6, 24], "\uc601\uc0c1": [4, 5, 12, 21, 34, 44], "\uc815\ubcf4\uc5d0": [4, 34, 43, 44, 56], "\ub9de\uac8c": [4, 7, 12, 16, 18, 21, 23, 26, 28, 30, 33, 38, 42], "\ud53c\ud305\ud558\ub294": 4, "\uc2dc\uc5d0\ub294": [4, 11, 14, 20, 21, 25, 36, 39, 48, 59], "\uc81c\uac70\ud558\uc600\uc73c\uba70": 4, "\uc55e\uc11c": [4, 6, 26, 32, 36, 39, 46, 47, 55], "gap\uc5d0": 4, "\uc758\ud55c": [4, 13, 43], "\ubd80\uc815\uc801": [4, 9], "\uc81c\uac70\ud558\ub294\ub370": 4, "\ud6a8\uacfc\uc801\uc774\ub77c\ub294": [4, 15], "layer\ub294": [4, 22, 23, 33], "\ud65c\uc6a9\ud588\uc73c\uba70": [4, 16], "layer\ub4e4\uc744": 4, "fig": [4, 11, 13, 19, 25, 34], "3\uacfc": 4, "\ucd94\uac00\ud558\uc600\ub2e4": [4, 16], "queri": [4, 7, 11, 17, 26, 27, 29, 30, 34, 43], "projection\uc744": 4, "\uc608\ub85c": [4, 42], "\uc0b4\ud3b4\ubcf4\uba74": [4, 6, 24, 37, 54], "OF": 4, "larg": [4, 7, 11, 15, 18, 19, 20, 27, 28, 32, 42, 45, 59], "qz": 4, "adapterlay": 4, "cdot": [4, 7, 16, 20, 21, 26, 27, 28, 29, 34, 36, 39, 46, 51, 54, 56, 59], "tz": 4, "intern": [4, 22, 59], "\uc0c1\uc218\ub85c": [4, 14], "time\uc5d0": [4, 20], "adapter\uc758": [4, 26], "\uc601\ud5a5\ub825\uc744": [4, 26], "\uc870\uc808\ud55c\ub2e4": [4, 21, 36], "\uae30\ubcf8\uac12\uc740": 4, "\ud6a8\uacfc\ub97c": [4, 8, 9, 16, 32, 52, 53], "\uc644\uc804\ud788": [4, 16, 18, 37, 47, 58], "\uc81c\uac70\ud558\uace0": [4, 8], "\uc2f6\ub2e4\uba74": [4, 45], "0\uc73c\ub85c": [4, 7, 10, 15, 16, 21, 26, 42, 49, 53], "freeze\ud558\uace0": 4, "\ud30c\ub77c\ubbf8\ud130\ub4e4\ub9cc": 4, "\ub370\uc774\ud130\uc14b\uc73c\ub85c": [4, 26, 39, 45, 56], "\ub79c\ub364\ud558\uac8c": [4, 19, 26, 32, 33, 39, 50], "\uc0d8\ud50c\ud55c": 4, "static": [4, 20, 32], "frame\ub4e4\uc744": 4, "\ucd5c\uc801\ud654\ud588\ub2e4": [4, 36], "eq": [4, 5, 17, 20, 36, 54], "\uc0ac\uc6a9\ud588\ub2e4": [4, 7, 21, 26, 27, 35, 36, 38, 39, 44, 54], "\uc544\uc9c1\uae4c\uc9c0\ub294": [4, 27], "dynamics\ub97c": 4, "\ubaa8\ub378\uacfc": [4, 9, 18, 21, 22, 26, 28, 31, 35, 39, 42, 47, 56, 61], "\uacf5\uc720\ud558\ub294": 4, "dimension\uc0c1\uc758": 4, "\uc2dc\uac04\ucd95\uc73c\ub85c": 4, "\ubaa8\ub378\ub9c1": [4, 20, 35, 36], "\ud558\uae30": [4, 6, 9, 14, 18, 20, 21, 24, 27, 28, 32, 35, 44, 46, 48, 50, 54, 57, 58, 59, 60, 61], "2\uac00\uc9c0": [4, 5, 9, 19, 39, 46, 48, 52, 56], "\ub2e8\uacc4\uac00": [4, 18, 28, 35], "\ud544\uc694\ud558\ub2e4": [4, 16, 20, 21, 35, 45], "3d": [4, 9, 14, 21, 34, 44, 61, 62], "\ub370\uc774\ud130\uc5d0": [4, 7, 29, 31, 38, 41, 42, 54, 61], "\ud655\uc7a5\uc2dc\ucf1c\uc57c": 4, "inflat": 4, "\uc2dc\uac04\ucd95\uc0c1\uc73c\ub85c": 4, "\uc815\ubcf4\uc758": [4, 5, 21, 40], "\ud750\ub984\uc744": 4, "\ub9cc\ub4e4\uae30": [4, 19, 28, 31, 35], "sub": 4, "\ub808\uc774\uc5b4\ub294": [4, 21, 24, 34], "\uadf8\ub9bc": [4, 38, 42, 43, 55], "\uc0ac\uc804\uc9c0\uc2dd": 4, "content": [4, 5, 8, 20, 26, 32, 33, 36, 40, 44, 47, 53, 55, 61], "\ud3ec\ucc29\ud560\uc218": 4, "\ud65c\uc6a9": [4, 5, 6, 9, 15, 18, 19, 32, 35, 36, 39, 44, 47, 48, 49], "\uc720\uc9c0": [4, 5, 8, 12, 18, 56, 59], "\uc704\ud574\uc11c": [4, 5, 6, 12, 17, 19, 23, 28, 32, 36, 39, 41, 45, 46, 56], "\ub3d9\uc77c": [4, 5, 19, 34, 39, 40], "video\ub97c": [4, 31, 44], "\ub2e4\ub8e8\uace0\uc790": 4, "\ub3c5\ub9bd\uc801\uc73c\ub85c": [4, 5, 18, 21, 31, 46, 61], "\ub0b4\ubc84\ub824\ub450\uace0": 4, "\ud655\uc7a5\uc2dc\ud0a4\ub294": [4, 7], "\ubc29\ud5a5\uc774": [4, 18, 20, 21], "\uc120\ud638\ub41c\ub2e4": 4, "\uc5f0\uad6c": [4, 11, 18, 21, 25, 31, 33, 37, 56], "\ucc38\uace0\ud558\uc5ec": 4, "5d": [4, 34], "tensor": [4, 30, 32, 45, 51, 59], "time": [4, 5, 6, 8, 10, 11, 13, 14, 16, 18, 20, 22, 23, 25, 26, 27, 28, 29, 30, 31, 33, 34, 36, 38, 40, 45, 46, 48, 49, 52, 55, 56, 59, 61], "c": [4, 5, 8, 9, 10, 11, 14, 15, 16, 18, 19, 20, 21, 22, 23, 24, 26, 27, 29, 31, 34, 36, 39, 40, 41, 45, 50, 54, 56, 59], "f": [4, 8, 10, 13, 14, 16, 18, 23, 27, 29, 30, 31, 39, 45, 46, 47, 50, 51, 54, 58], "\uc785\ub825\uc73c\ub85c": [4, 5, 19, 20, 21, 22, 24, 26, 27, 31, 33, 34, 39, 42, 54], "\ubc1b\ub3c4\ub85d": 4, "\uc218\uc815\ud588\ub2e4": 4, "batch": [4, 5, 7, 11, 15, 17, 26, 30, 31, 33, 34, 47, 50, 51, 55, 58, 59], "frame\uc744": [4, 31], "\ub73b\ud55c\ub2e4": [4, 20, 39], "\ub0b4\ubd80": [4, 28, 35, 36, 38], "map\uc774": 4, "\ub808\uc774\uc5b4\ub97c": [4, 21, 31, 35, 38], "\uc9c0\ub098\uac08\ub54c\ub294": 4, "\uc2dc\uac04": [4, 6, 9, 12, 14, 17, 18, 21, 30, 31, 33, 35, 44], "\ucd95\uc744": [4, 38], "\uc758\ubbf8\ud558\ub294": [4, 11, 42, 60], "reshaping\uc744": 4, "\ubb34\uc2dc\ud55c\ub2e4": 4, "4d": 4, "bf": [4, 7, 49], "\ub808\uc774\uba38": 4, "\ucc98\ub7fc": [4, 6, 12, 18, 27, 33, 41, 46, 47, 48, 53, 59], "\ucc98\ub9ac\ud560": [4, 19, 35, 39], "\ubc18\uba74\uc5d0": [4, 22, 28, 46, 47, 51, 56, 58, 60, 61], "\uacf5\uac04\ucd95": 4, "reshaping\ud558\uc5ec": 4, "bhw": 4, "\uc5f0\uad6c\ub4e4\uc740": [4, 18, 26, 35, 40], "modeling\uc758": 4, "\ud0d0\uad6c\ud558\uace0": [4, 21], "animatediff\uc5d0\uc11c\ub294": 4, "\ucc28\uc6a9\ud558\uc5ec": 4, "\uc2dc\uac04\ucd95\uc0c1\uc5d0\uc11c": 4, "\ub3d9\uc791\ud558\ub3c4\ub85d": 4, "\uc218\uc815\uc744": [4, 18, 24, 31], "\uac70\uccd0": [4, 32, 33, 34, 46, 47, 51, 56, 58], "design\ud588\ub2e4": 4, "\uc774\ud558": [4, 6, 38], "\uc2e4\ud5d8\uc744": [4, 6, 9, 12, 16, 18, 23, 26, 28, 34, 56, 58], "\uad6c\uc870\uac00": [4, 15, 41], "\ubaa8\ub378\ub9c1\ud558\ub294\ub370": 4, "\uc801\ud569\ud558\ub2e4\ub294": 4, "\ubc1c\uacac\ud588\ub2e4": [4, 7, 27, 35], "3\uc744": [4, 30], "transformer\uac00": 4, "\uc2dc\uac04\ucd95\uc5d0\uc11c": 4, "\ub3d9\uc791\ud558\ub294": [4, 11, 29], "block\uc73c\ub85c": [4, 17, 20], "\uc774\ub8e8\uc5b4\uc9c4\uac83\uc744": 4, "\ubcfc\uc218": [4, 26, 39], "sinusoid": [4, 14, 44], "encoding\uc744": [4, 20, 23, 35], "\uc560\ub2c8\uba54\uc774\uc158\uc0c1\uc758": 4, "\ud504\ub808\uc784\uc758": [4, 21, 44], "\uc2dc\uac04\uc801": [4, 21, 27, 31, 44], "\uc704\uce58\uc815\ubcf4\ub97c": 4, "\ub098\ud0c0\ub0b4\uace0\uc790": 4, "\ub300\ub85c": 4, "\uc785\ub825\ud06c\uae30\ub294": 4, "\uc870\uc808\ud558\uc600\ub2e4": 4, "\ud3bc\uce58\uace0\uc790": 4, "\ud560\ub54c\ub294": 4, "\ub2e4\uc74c\uacfc": [4, 6, 9, 10, 12, 16, 21, 23, 24, 26, 27, 28, 29, 35, 39, 41, 43, 46, 47, 50, 51, 54, 55, 56, 58, 59, 60, 61], "\uae38\uc774": [4, 5, 17, 35, 44], "\ud06c\uae30": [4, 16, 20, 29, 33, 38, 39, 54], "z_1": [4, 58], "z_f": 4, "z_i": [4, 12, 29], "sequence\ub85c": [4, 39], "\ub2e4\ub8f0\uc218": [4, 38], "\ubca1\ud130\uac00": 4, "block\uc744": [4, 15, 17, 23, 49], "\ud1b5\uacfc\ud558\uba74": [4, 9], "z_": [4, 9, 19, 21, 27, 48, 54, 58, 59], "out": [4, 10, 14, 22, 27, 45, 46, 54, 59, 60], "v": [4, 6, 7, 8, 11, 12, 14, 18, 24, 26, 27, 28, 29, 30, 32, 40, 43, 48, 50, 51, 54, 55, 58, 59, 61], "softmax": [4, 14, 26, 28, 29, 48], "qk": [4, 26, 29], "kz": 4, "vz": 4, "\ubd84\ub9ac\ub41c": [4, 40], "\uc758\ubbf8\ud55c\ub2e4": [4, 7, 16, 20, 27, 28, 35, 36, 37, 39, 54], "mechanism\uc744": [4, 26, 40], "\ud604": 4, "\uc0dd\uc131\uc5d0": [4, 7, 9, 17, 22, 23, 24, 26, 35, 39, 42, 43, 59], "\ud504\ub808\uc784\uc73c\ub85c": [4, 21, 44, 61], "\ucd94\ucd9c\ub41c": [4, 21, 24, 33, 45], "\uac83\uc774": [4, 5, 6, 9, 10, 12, 14, 15, 16, 19, 20, 21, 23, 24, 26, 27, 28, 29, 30, 32, 33, 35, 36, 39, 41, 42, 43, 44, 46, 47, 49, 50, 52, 53, 54, 57, 58, 59, 60], "\uac00\ub2a5\ud558\ub2e4": [4, 7, 13, 14, 17, 22, 26, 27, 35, 37, 39, 40, 44, 49], "\uacb0\uacfc\uc801\uc73c\ub85c": [4, 9, 18, 24, 26, 47], "\uac1c\ubcc4\uc801\uc73c\ub85c": [4, 34], "\ud655\uc7a5\ud558\uc5ec": [4, 18, 27, 35], "\ucd94\uac00\ud55c": [4, 18, 54], "animatediff\uac00": 4, "\uc2dc\uac04\uc5d0": [4, 44], "content\uc758": 4, "\ud3ec\ucc29\ud558\uae30": [4, 21], "\uc81c\uc791\ud558\ub3c4\ub85d": 4, "block\uc804\uc5d0": 4, "\uc78a\uc5b4\uc11c\ub294": 4, "\uc548\ub41c\ub2e4": 4, "\uc790\uccb4\uac00": [4, 5, 9, 25, 32, 43], "frame\uc758": 4, "\uc21c\uc11c\ub97c": 4, "\uc54c\uace0": [4, 6, 18], "\uc544\ub2c8\ub2e4": [4, 33, 45], "\ub123\uc74c\uc73c\ub85c": 4, "\ubb38\uc81c\ub4e4\uc744": [4, 32], "transformer\uc758": [4, 27, 30, 39, 42, 48], "\ub808\uc774\uc5b4\uc758": [4, 24, 35], "\ud558\uc600\uc73c\uba70": [4, 35], "residu": [4, 5, 14, 15, 23, 24, 33, 47, 49, 50, 52, 59], "connection\uc744": [4, 34, 49], "\uc2dc\uc791\uc2dc\uc5d0": 4, "ident": [4, 15, 17, 21, 31, 41, 50, 56], "mapping\uc73c\ub85c": [4, 39], "\uc804\ubc18\uc801\uc778": [4, 21, 28, 42, 43, 50], "\uc0ac\uc804\ud559\uc2b5\ud558\ub354\ub77c\ub3c4": 4, "\ub3d9\uc791": [4, 25, 26, 31], "\ud328\ud134\uc5d0": 4, "\uc801\uc6a9\uc5d0": [4, 34], "zoom": [4, 11, 32], "pan": [4, 8], "roll": 4, "\uc0ac\uc804\ud559\uc2b5\uc744": 4, "\uac10\ub2f9\ud560": 4, "\uc5c6\uc5b4": [4, 5, 14, 19, 52, 55, 60], "\uc561\uc158\uc5d0": 4, "\ub9de\ucdb0": [4, 7, 23, 26, 48], "\ud29c\ub2dd\ud558\uace0\uc790": 4, "\uc0ac\uc6a9\uc790\ub97c": 4, "\ud69f\uc218\ub85c\ub3c4": 4, "\ud6a8\uc728\uc801\uc73c\ub85c": [4, 19, 20, 27, 29, 35, 36, 44, 54, 58, 61], "\uc911\uc694\ud558\ub2e4": [4, 16, 21], "animatediff\uc5d0": 4, "\uc801\uc6a9\ud588\ub2e4": [4, 7, 20, 36, 44], "\uad6c\uc870\uc640": [4, 21], "\uc81c\ud55c\ub41c": [4, 18, 21, 24, 29], "layers\uc5d0": [4, 41], "layers\ub97c": 4, "personalization\uc744": [4, 24], "\uba87": [4, 8, 30, 35, 36, 43, 46, 47, 51, 54], "\uc885\uc758": 4, "\ubc29\uc2dd\uc73c\ub85c": [4, 6, 9, 12, 14, 16, 20, 24, 26, 27, 30, 33, 34, 35, 39, 40, 44, 46, 49, 51, 54, 57], "\uc9c4\ud589\ud558\uc600\uc73c\uba70": 4, "rule": [4, 12, 58], "augmentation\uc744": [4, 42], "videos\ub97c": 4, "\uc5bb\uc5c8\ub2e4": [4, 7, 26, 35, 38], "\uc608\ub97c": [4, 9, 16, 26, 28, 29, 39, 41, 45, 46, 47, 53], "\ub4e4\uc5b4": [4, 9, 16, 26, 28, 29, 39, 41, 45, 46, 47], "\uc5bb\uae30": [4, 7, 35, 36, 39, 57], "\uc810\ucc28": [4, 26, 58], "\uc904\uc774\uac70\ub098": 4, "\ub298\ub824\uac00\uba70": 4, "\uc9c4\ud589\ud588\ub2e4": [4, 7, 26, 44, 54], "motionlora\ub294": 4, "\uc815\ub3c4\uc758": [4, 43], "2000\ubc88\uc758": 4, "\ud6c8\ub828\ud69f\uc218\ub85c": 4, "\ud30c\uc778\ud29c\ub2dd\ud588\uc744\ub54c\ub3c4": 4, "\uad1c\ucc2e\uc740": [4, 6, 36, 43], "property\ub85c": 4, "composit": [4, 19, 32, 34, 45], "capability\ub97c": [4, 26], "\uac01\uac01\uc774": 4, "time\uc0c1\uc758": 4, "effect\ub97c": 4, "\uc735\ud569\ud558\uae30\uc704\ud574": 4, "\ud611\ub825": 4, "combin": [4, 6, 12, 16], "\uac00\ub2a5\ud55c": [4, 6, 7, 9, 13, 14, 21, 29, 34, 35, 36, 39, 40, 45, 46, 51, 54, 56, 59], "3\uac1c": [4, 50], "\ubaa8\ub4c8\uc758": 4, "objective\ub294": [4, 26], "\uc57d\uac04\uc529": 4, "\ub2e4\ub974\ub2e4": [4, 36, 49], "adapter\ub294": [4, 26], "sd\uc758": 4, "loss\uc778": 4, "\ud559\uc2b5\ud55c\ub2e4": [4, 7, 14, 16, 21, 26, 31, 35, 38, 39, 44, 45, 54], "\uc5ed\ud560\uc744": [4, 16, 24, 25, 26, 28, 57, 59], "module\uacfc": 4, "lora\uc758": [4, 24, 27], "data\uc5d0": [4, 14, 16, 19, 22], "\ucc28\uc6d0\uc744": [4, 14, 29, 33, 44, 56], "\uc218\uc6a9\ud558\uae30": 4, "\uc57d\uac04": [4, 25], "\uc218\uc815\ub41c": [4, 8, 9, 35], "objective\ub97c": [4, 7, 14, 26, 30, 39, 43, 57], "encoder\ub97c": [4, 5, 16, 17, 26, 38, 39], "\uc778\ucf54\ub529\ub41c\ub2e4": 4, "code\ub294": [4, 27, 39], "schedule\uc5d0": 4, "\ub178\uc774\uc988\uac00": [4, 14, 18, 35, 39], "\uc785\ub825\uc740": 4, "codes\uc640": 4, "\uc30d\uc774\ub418\ub294": 4, "prompts\uc774\uba70": 4, "process\uc5d0\uc11c": [4, 13, 16, 25, 37, 39, 49], "\ub178\uc774\uc988\ub97c": [4, 6, 14, 18, 20, 21, 35, 39, 42, 45], "\uc608\uce21\ud55c\ub2e4": [4, 16, 23, 35, 36, 39, 49], "\ucd5c\uc885": [4, 7, 14, 15, 18, 20, 21, 22, 26, 28, 31, 34, 35, 38, 39, 45, 56, 59], "2_2": [4, 20, 36, 39], "\ubaa8\ub4c8\ub4e4": 4, "\ud0c0\uac9f\uc744": 4, "\uc2dc\ud0a8\ub4a4": 4, "\ud559\uc2b5\ud588\ub2e4": [4, 7, 26, 38, 44], "inference\uc2dc\uc5d0\ub294": 4, "model\ub294": 4, "\uc124\uba85\ud55c\ub300\ub85c": 4, "inflate\ub418\uba70": 4, "\uc0dd\uc131\ud55c\ub2e4": [4, 7, 21, 28, 31, 35, 38, 44], "inference\uc2dc": [4, 26, 30], "\uadf8\ub0e5": [4, 20, 26], "\ubc30\uc81c\ud558\uc9c0": 4, "injection\ud558\uc600\uc73c\uba70": 4, "\uc601\ud5a5\ub825\uc740": 4, "4\uc758": [4, 36, 47], "\uc870\uc808\ud588\ub2e4": 4, "3\uc758": [4, 13, 48], "study\uc5d0\uc11c": 4, "\uac12\uc5d0": [4, 6, 20, 34, 49, 54], "\uacb0\uacfc\uc758": [4, 24, 43], "\ucc28\uc774\ub97c": [4, 21, 25, 39, 46, 50, 54, 56], "frames\uc740": 4, "process\uc640": [4, 5, 16, 23], "codes\ub97c": 4, "\ub514\ucf54\ub529": [4, 21, 33], "\ud568\uc73c\ub85c\uc368": [4, 6, 18, 19, 43, 53, 55, 56], "\uc5bb\uc744\uc218": [4, 39], "5\uc5d0": 4, "\uc801\uc6a9\ud558\uc5ec": [4, 9, 20, 31, 35, 54, 56, 58, 61], "\uc0ac\uc6a9\ud558\uc600\ub2e4": [4, 21, 27, 31, 39], "\uc790\uc138\ud55c": [4, 12, 16, 27, 42, 58, 59], "\uc0ac\ud56d\uc740": 4, "supplementari": [4, 18], "materi": [4, 18, 19, 53], "\ud655\uc778\ud574\uc8fc\uc138\uc694": 4, "user": [4, 8, 9, 18, 24, 40, 43, 55], "smooth": [4, 5, 19, 36], "\ub4f1\uc218\ub97c": 4, "\uc870\uc0ac\ud588\ub2e4": 4, "averag": [4, 12, 13, 15, 27, 46, 54, 55, 58], "aur": 4, "\uc810\uc218\ub97c": [4, 7, 12, 23, 48, 52], "\uac00\uc9c0\uba74": 4, "prefer": [4, 11, 52], "metric": [4, 12, 13, 14, 18, 36, 37, 44, 46, 47, 50, 53], "paper\uc5d0\uc11c": [4, 40], "\uc5b8\uae09\ud588\ub358": [4, 32], "\ud14d\uc2a4\ud2b8\uc30d\uc744": 4, "\ub3d9\uc2dc\uc5d0": [4, 5, 7, 9, 11, 18, 29, 32, 34, 38, 40, 55, 56, 57], "\ud3c9\uac00\uc9c0\ud45c\uc774\ub2e4": 4, "frames\uc640": 4, "\uc0ac\uc774": 4, "\uacc4\uc0b0\ud55c": [4, 14, 51], "score\ub294": [4, 37, 42], "\ubca1\ud130\ub4e4": 4, "\ucf54\uc0ac\uc778": 4, "\uc784\ubca0\ub529\uacfc": [4, 35, 43], "\uc784\ubca0\ub529": [4, 9, 15, 18, 21, 31, 35, 38, 43], "\uc720\uc0ac\ub3c4": [4, 9, 23, 29, 31], "\uc560\ub2c8\uba54\uc774\uc158\uc774": [4, 21], "\uc5c6\uc73c\ubbc0\ub85c": [4, 25], "image\uc640": [4, 5, 8, 11, 16, 23, 26, 47, 48], "\uc5f0\uc18d\ub41c": [4, 21], "\uc30d\uc758": 4, "\uc784\ubca0\ub529\uc758": 4, "scaler\ub97c": 4, "adapter\uc5d0": [4, 17], "\uc81c\uac70\ud55c": [4, 29], "\uadf8\ub9bc\uc740": [4, 9, 10, 12, 16, 27, 29, 38, 41, 42], "\uccab\ubc88\uc9f8": [4, 46, 55, 56, 58, 61], "\ud504\ub808\uc784\uc774\ub2e4": 4, "\uc81c\uac70\ud588\uc744\ub54c": 4, "\ub192\uc544": [4, 11], "\ubcf4\uc774\ub294\ub370": 4, "adapter\uac00": 4, "\ud2b9\uc131\uc774\ub77c\uace0": 4, "watermark\ub098": 4, "\ube14\ub7ec": 4, "\ub4f1\uc744": [4, 15, 24, 27, 29, 34, 35, 47, 54], "\ud559\uc2b5\ud588\uae30": 4, "\ub54c\ubb38\uc774\ub2e4": [4, 22, 38, 39], "\ud559\uc2b5\uacfc\uc815\uc5d0": 4, "\ub3c4\uc6c0\uc774": [4, 21, 24, 32, 43, 48], "\ub418\uc5c8\uc74c\uc744": 4, "transformer\uad6c\uc870\uc640": 4, "convolution\uc778": 4, "\uad6c\uc870\uc758": [4, 22], "\ube44\uad50\ud588\ub2e4": [4, 35, 54], "\ubd84\uc57c\uc5d0\uc11c": [4, 6, 13, 22, 24, 25, 27, 40, 51, 54], "\uc790\uc8fc": [4, 50, 56, 58], "attention\ubd80\ubd84\uc744": 4, "1d": [4, 31], "convolution\uc73c\ub85c": 4, "\uad50\uccb4\ud558\uc5ec": [4, 19, 26], "\ud30c\ub77c\ubbf8\ud130\uac00": [4, 38], "\ub193\uc5ec\uc788\uc74c\uc744": 4, "convolut": [4, 5, 8, 15, 22, 25, 33, 34, 41, 44, 47, 48, 56, 59], "\ub3d9\uc77c\ud558\uac8c": [4, 15, 18, 22, 24, 36, 39, 42, 46, 53, 54, 55, 58, 59, 61], "\ub193\uc558\uc9c0\ub9cc": 4, "\ube44\uad50\ud558\uc5ec": [4, 28, 31, 36], "\uc81c\ub300\ub85c": [4, 15, 19, 41], "\ubc18\uc601\ud558\uc9c0": [4, 22, 28], "\ubabb\ud588\ub2e4": [4, 25], "efficiency\uc640": 4, "\uce21\uba74\uc5d0\uc11c": [4, 5, 21, 24, 28, 29, 35, 36, 41, 57], "\ud6a8\uc728\uc131\uc744": [4, 20, 34, 36], "\uc2dc\ud5d8\ud574\ubcf4\uc558\ub2e4": 4, "\uac1c\uc218\uc640": [4, 29], "\uc870\uc808\ud574\uac00\uba70": 4, "\ud559\uc2b5\uc2dc\ucf30\ub2e4": 4, "\ubaa8\ub378\ud559\uc2b5\uc744": 4, "\uc704\ud574\ub3c4": 4, "\ubc30\ud3ec\ub97c": 4, "\uc704\ud574\uc11c\ub3c4": 4, "\uc911\uc694\ud55c": [4, 5, 7, 12, 13, 18, 21, 25, 26, 28, 29, 31, 35, 39, 41, 43], "\ubd80\ubd84\uc774\ub2e4": [4, 28], "animatediff\ub294": 4, "\ube44\uad50\uc801": [4, 13, 22, 25, 26, 35, 36, 39, 59], "\ud30c\ub77c\ubbf8\ud130": [4, 15, 26, 27, 29, 31, 38, 46, 50, 51, 55, 56, 60], "\uc801\ub4e4\ub54c\uc5d0\ub3c4": 4, "\ub9cc\ub4e4\uc218": 4, "\uc2e4\ud5d8\uc5d0\uc11c\ub294": [4, 6, 31], "\ubcf8\uac83\uc774\ub2e4": 4, "\uc5b4\ub835\uae30": [4, 28], "\uc801\uc6a9\ud558\uae30": [4, 20, 28, 31], "\uc801\uc744\ub54c\uc5d0\ub3c4": 4, "\ud559\uc2b5\ud558\uace0\uc790": [4, 50], "\uc6c0\uc9c1\uc784\uc740": 4, "\uc788\uc5c8\uc73c\ub098": [4, 19], "\uadf9\ub3c4\ub85c": 4, "\uc801\uc744": [4, 41], "\uae09\uaca9\ud55c": 4, "\uc800\ud558\uac00": [4, 13, 24, 28, 49], "\uc788\uc5c8\ub2e4": [4, 20, 23, 26, 27, 35, 36, 38, 39, 40, 45, 54, 57], "content\uc640": [4, 40], "prior\uc758": 4, "exist": [4, 20, 31], "content\ub97c": [4, 20, 36], "\uc870\uc808\ud560": [4, 10, 26, 41, 42], "\ud655\uc778\ud558\uae30": [4, 10, 23], "controlnet\uacfc": [4, 10, 26], "\uacb0\ud569\ud558\uc5ec": [4, 18, 21, 34, 35, 42, 47, 54, 61], "\uc0dd\uc131\uc2dc": [4, 36, 39], "depth\ub97c": [4, 17], "ddim": [4, 5, 6, 12, 23, 26, 27, 29, 42, 43, 54, 57, 58, 59], "inversion\uc744": [4, 16, 24], "\ub2e4\ub4ec\uc5b4\uc9c4": 4, "sequences\ub97c": 4, "\uc5bb\uace0": [4, 14, 32, 35], "\ucd5c\uc2e0": [4, 8, 21, 35, 44], "\uc218\uc815": [4, 15, 19, 31], "randomli": [4, 19, 20, 32, 50], "noise\ub97c": [4, 5, 8, 13, 14, 16, 20, 22, 25, 26, 33, 36, 37, 39, 41, 54], "\uc0dd\uc131\uc744": [4, 5, 8, 9, 10, 17, 18, 20, 21, 24, 26, 28, 31, 33, 35, 36, 43, 44, 52, 54, 55], "pipeline\uc778": 4, "\ub514\uc790\uc778\ud558\uc600\uc73c\uba70": 4, "\uc720\uc9c0\ud560": [4, 10, 24, 27, 37, 43, 47], "\uc788\uc73c\uba70": [4, 9, 10, 19, 24, 27, 28, 29, 31, 35, 36, 42, 47, 48, 52, 55], "motion\uc73c\ub85c": 4, "\ud559\uc2b5\ub418\uba74": [4, 14, 43], "animate\uc2dc\ud0a4\uace0\uc790": 4, "\ud560\ub54c": [4, 27, 39, 50], "\ud6a8\uc728\uc131\uacfc": 4, "\uc0dd\uc131\ub2a5\ub825\uc744": 4, "\uac80\uc99d\ud588\ub2e4": 4, "\ub610": [4, 9, 11, 16, 22, 25, 26, 32, 35, 36, 37, 38, 40, 45, 50, 53], "controllability\uce21\uba74\uc5d0\uc11c\ub3c4": 4, "\ud559\uc2b5\uc5c6\uc774": [4, 26, 27], "\ucde8\ud5a5\uc758": 4, "\uc6cc\ud06c\uc5d0": 4, "\uc2dc\ud0ac": [4, 31, 32, 42, 45], "\uc218\uc788\ub294": 4, "\ubca0\uc774\uc2a4": [4, 35, 42], "\ub77c\uc778\uc73c\ub85c\uc368": 4, "\ubc29\uba74\uc758": 4, "application\uc5d0": [4, 26], "\uc7a0\uc7ac\ub825\uc744": [4, 7, 54], "\ud074\ub9ad\ud558\uba74": 4, "gif\ub97c": 4, "\ubcf4\uc2e4": 4, "side": [4, 19, 34], "17": [4, 25, 49, 54], "year": [4, 12], "old": 4, "japanes": 4, "school": 4, "gpt\ub85c": 4, "\uadf8\ub9bc\uc744": [4, 6, 12, 16, 18, 28, 38, 41, 42, 47], "input\uc73c\ub85c": [4, 8, 10, 22, 29, 34, 38, 41], "\uc0ac\uc6a9\ud568": [4, 18, 42, 43, 48, 52], "man": [4, 59], "black": [4, 20], "pad": [4, 10, 14, 59], "jumper": 4, "\ucd2c\uc601\ud55c": [4, 12], "\uc785\ub825\ud55c": [4, 9], "\uc0ac\uc9c4\uc758": [4, 29], "\uc778\ubb3c\uc758": [4, 17, 21], "\uc778\uc885\uc774": 4, "\uc720\uc9c0\ub418\uc9c0": 4, "\uc54a\uc558\ub294\ub370": 4, "\ud559\uc2b5\ub370\uc774\ud130": 4, "\ubd88\uade0\ud615": 4, "\ub54c\ubb38\uc73c\ub85c": [4, 39], "\uc0ac\ub8cc\ub428": [4, 36], "blond": 4, "blue": [4, 12, 48], "ey": 4, "\ub290\ub080\uc810": 4, "10m\uc774": 4, "\uc560\ub2c8\uba54\uc774\uc158\ud654\uc5d0": 4, "\ub370\uc774\ud130\uc14b\uc778\uc9c0": 4, "\ubaa8\ub974\uaca0\ub2e4": [4, 49], "\uc0ac\uc6a9\ud558\uc9c0": [4, 12, 20, 24, 28, 32, 35, 39, 45, 46, 54, 56, 61], "\uc810\uc774": [4, 16, 41, 42], "\uc544\uc27d\ub2e4": 4, "\uc2e4\uc9c8\uc801\uc73c\ub85c": 4, "motionlora\uc815\ub3c4\ub77c": 4, "\uc0ac\uc6a9\uc774": [4, 54], "\ud3b8\ub9ac\ud558\ub2e4": 4, "reproduction\uc774": 4, "\uc6a9\uc774\ud558\ub2e4": 4, "\uc704\ud574\uc11c\ub294": [4, 6, 19, 20, 21, 28, 32, 39, 43, 45, 48, 57, 59, 61], "t2i\uac00": 4, "\uc81c\uc77c": [4, 29, 48, 49, 51, 59], "\ubd80\ubd84\uc774\ub77c\uace0": 4, "\ud560\uc218": [4, 27, 54], "\uc788\ub294\ub370": [4, 6, 16, 19, 36, 37, 38, 39, 40, 45, 54, 57, 59, 60], "\uc2a4\ud0c0\uc77c\uc758": [4, 19], "\uad6c\ud558\ub294": [4, 23, 28, 45, 51, 57, 60], "\ub9de\uc9c0": 4, "\uc54a\uc73c\uba74": 4, "\ud074\ub9bd": [4, 31], "\ucd08\ubc18\uc5d0": [4, 25, 51], "\uae09\uaca9\ud788": 4, "\ubcc0\ud654\ud558\ub294": 4, "\ubd80\ubd84\uc774": [4, 5, 16, 22, 39, 41, 42], "\uc0dd\uae34\ub2e4": [4, 27, 38, 52], "consist": [5, 8, 13, 16, 32, 44, 56], "control": [5, 7, 8, 17, 18, 19, 20, 26, 28, 32, 40, 41, 56, 59, 61], "synthesi": [5, 6, 9, 20, 23, 29, 33, 37, 38, 41, 46, 50, 55], "charact": [5, 50], "2311": [5, 17, 57], "17117": [5, 17], "offici": [5, 9, 11, 13, 17, 19, 22, 27, 32, 39, 48, 49], "nonoffici": [5, 19], "humanaigc": 5, "geonhak": [5, 17, 19, 62], "song": [5, 16, 17, 19, 46, 54, 58, 62], "march": [5, 17, 20, 39, 55], "13": [5, 6, 17, 22, 32, 33, 38, 39, 52, 54], "exampl": [5, 12, 13, 18, 28, 41, 43, 50, 53], "figur": [5, 6, 13, 17, 19, 22, 25, 32, 33, 35, 38, 39, 40, 42, 43, 45, 47, 50, 52], "\uc8fc\ub958\uac00": 5, "\ub418\uc5c8\uc9c0\ub9cc": 5, "\uc601\uc5ed\uc5d0\uc11c\ub294": 5, "\uc5b4\ub824\uc6c0\uc774": [5, 9, 18, 25, 34, 56, 57, 61], "animation\uc5d0\uc11c": 5, "\uc0c1\uc138": 5, "\uc720\uc9c0\ud558\ub294": [5, 21, 30], "\ubb38\uc81c\uc774\ub2e4": 5, "image\uc758": [5, 11, 16, 20, 22, 23, 25, 26, 29, 37, 40, 43], "\ubcf5\uc7a1\ud55c": [5, 6, 8, 12, 21, 23, 26, 28, 29, 34, 35, 36, 42], "appear": [5, 17, 20, 50], "\ud2b9\uc9d5\uc758": 5, "\uc720\uc9c0\ud558\uae30": [5, 10, 15], "feature\uacfc": [5, 7, 38], "\ud1b5\ud569\ud560": 5, "referencenet": 5, "\uc124\uacc4": [5, 15, 25], "controllability\uc640": 5, "continuity\uc744": 5, "pose": [5, 8, 10, 17, 20, 40, 41, 50, 59, 61], "guider": 5, "\ud504\ub808\uc784\uac04": 5, "\ubd80\ub4dc\ub7ec\uc6b4": [5, 8], "\uc804\uc774\ub97c": 5, "effect": [5, 16, 20, 34, 40, 41, 53, 57, 59], "\uc784\uc758\uc758": [5, 6, 8, 16, 18, 19, 20, 23, 43, 46], "\ub300\ud574\uc11c\ub3c4": [5, 6, 14, 16, 23, 31, 42, 43, 47, 49, 54, 55, 56], "animate\ud560": 5, "\uc788\uace0": [5, 15, 16, 18, 22, 23, 24, 29, 32, 33, 34, 36, 37, 44, 46, 51, 53, 58, 59, 60], "\uc6b0\uc6d4\uc131\uc744": 5, "histori": 5, "animation\uc740": 5, "\uc774\ubbf8\uc9c0\ub85c\ubd80\ud130": [5, 12, 18, 21, 34], "\uc0ac\uc2e4\uc801\uc778": [5, 21, 36], "animate\ud558\ub294": 5, "\uc791\uc5c5\uc73c\ub85c": 5, "gan\uc744": [5, 21, 22, 35, 40, 41], "\uc9c4\ud589\ub418\uc5b4\uc654\ub2e4": 5, "\uadf8\ub7ec\ub098": [5, 16, 17, 19, 21, 27, 43, 54, 57, 58, 61], "\ub610\ub294": [5, 7, 8, 17, 21, 23, 24, 31, 34, 35, 37, 43, 47], "\ube44\ub514\uc624\ub294": 5, "local": [5, 22, 32, 34, 35, 38, 40, 44], "detail": [5, 10, 12, 17, 18, 22, 24, 33, 40, 45, 61], "semant": [5, 6, 12, 24, 26, 28, 29, 32, 38, 43, 44, 47, 59, 61], "inconsist": 5, "instabl": [5, 46], "\ub110\ub9ac": [5, 21], "\uc0ac\uc6a9\ub418\uae30\uc5d0\ub294": 5, "\uc788\uc5b4\uc654\ub2e4": 5, "\uc6b0\uc218\uc131\uc5d0": 5, "task\uc5d0": [5, 7, 10, 20, 27, 30, 43], "\ud65c\uc6a9\ud558\ub824\ub294": [5, 40], "dreampos": 5, "23": [5, 13, 47], "04": 5, "\ud655\uc7a5\ud55c": 5, "fashion": [5, 32], "\ud569\uc131\uc744": [5, 19, 29], "\uac00\ub2a5\ud558\ub294\ub370": 5, "\ucd08\uc810\uc744": [5, 35, 43, 60, 61], "\ub9de\ucdc4\ub2e4": 5, "clip\uacfc": [5, 21], "feature\ub97c": [5, 8, 22, 26, 33], "\ud1b5\ud569\ud55c": [5, 12, 28], "adpatar": 5, "module\ub97c": 5, "sample\uc5d0": [5, 34], "finetuning\uc774": [5, 27], "\ud544\uc694\ud558\uace0": [5, 21, 33, 36, 37], "\uc6b4\uc6a9": 5, "\ud6a8\uc728\uc774": 5, "disco": 5, "07": [5, 8, 25], "\uc218\uc815\ud558\uc5ec": 5, "danc": [5, 17], "\uc9c4\ud589": [5, 6, 8, 14, 17, 18, 19, 33, 40, 48, 49, 52, 58], "controlnet\uc744": [5, 17], "\ud1b5\ud569": [5, 6, 17], "\uad6c\ucd95": [5, 6, 18, 44, 48, 57, 59], "\ubcf4\uc874\uc5d0": [5, 21], "\uc5b4\ub824\uc6c0\uc744": [5, 17, 21, 23, 36, 39, 40, 48], "\uacaa\uace0": [5, 23], "frame\uac04": [5, 17], "jitter": [5, 32], "issu": [5, 18, 48], "\uad00\uc810\uc5d0\uc11c\uc758": 5, "generation\uc5d0": [5, 9, 23], "\ud488\uc9c8\uacfc": [5, 21, 31, 36], "\ub2e4\uc591\uc131\uc5d0": [5, 43], "\uc9c4\uc804\uc774": 5, "\uc788\uc5b4\uc654\uc9c0\ub9cc": 5, "detail\uc744": [5, 20, 40, 43], "\uc0b4\ub9ac\ub294": 5, "\uc5b4\ub835\uace0": [5, 6, 17, 21, 35, 47], "\uc815\ud655\ub3c4": [5, 26, 47], "\uce21\uba74\uc5d0\uc11c\ub3c4": 5, "\ubd80\uc815\ud655\ud55c": [5, 16], "\ub354\uc6b1\uc774": 5, "\uc2e4\uc9c8\uc801": 5, "\ub2e4\ub8f0": 5, "\uc77c\uad00\uc131": [5, 8, 17, 18, 39], "\uc548\uc815\uc801\uc774\uace0": 5, "\uc5f0\uc18d\uc801\uc778": [5, 18, 21, 34, 44, 54], "\uc601\uc0c1\uc744": [5, 12, 44, 45, 54], "\ub9cc\ub4e4\uc5b4\ub0b4\ub294": 5, "\ud604\uc7ac\ub294": 5, "\uc77c\ubc18\uc131\uacfc": 5, "\ub9cc\uc871\ud558\ub294": [5, 16, 27, 54], "\ucc3e\uc744": [5, 35, 43, 45], "\uad6c\uc870": [5, 12, 13, 18, 22, 26, 31, 34, 35, 39, 41, 42, 47, 58], "\uc694\uc57d": [5, 21, 31, 35, 42, 45], "consistency\ub97c": [5, 26, 27, 54], "attention\ub97c": 5, "unet\uc73c\ub85c": 5, "\ud558\uc5ec\uae08": 5, "\uc77c\uad00\ub41c": [5, 8, 9, 18, 21, 31, 34, 35, 39, 44, 61], "\uad00\uacc4\uc131\uc744": 5, "\uc885\ud569\uc801\uc73c\ub85c": 5, "controllability\ub97c": [5, 41], "lightweight": [5, 26], "signal\uc744": 5, "\uc808\ucc28\uc5d0": 5, "\ud1b5\ud569\ud568": 5, "stability\ub97c": 5, "\uc5f0\uc18d\uc801\uc774\uace0": 5, "\uace0\ud574\uc0c1\ub3c4": [5, 7, 27, 29, 31, 33, 34, 35, 38, 42, 47, 55, 61], "\ubcf4\uc874\uc744": [5, 17], "\uad00\uacc4\uc131": 5, "5k": [5, 17, 58], "\uc778\ud130\ub137": 5, "\uc138\ud2b8\ub85c": 5, "\uc7a5\uc810": [5, 22, 26, 31, 35], "appearance\uc758": 5, "consistency\uc744": 5, "flickering\uacfc": 5, "\uc2e0\ub8b0\ub3c4\uc758": [5, 33], "image\uc5d0\ub3c4": 5, "benchmark\uc5d0": 5, "\uc6b0\uc218\uc131": 5, "\uc99d\uba85": [5, 6, 27], "t2i": [5, 7, 12, 24, 26, 28, 31, 33, 36, 38, 56], "ldm": [5, 6, 11, 15, 17, 24, 27, 38, 39, 40, 44, 54, 55, 56], "space\uc5d0\uc11c\uc758": [5, 16], "controlnet": [5, 7, 17, 26, 56, 59], "adapt": [5, 8, 15, 20, 21, 22, 24, 30, 36, 41, 56], "mask": [5, 7, 20, 28, 31, 32, 44, 48, 53, 59], "edg": [5, 6, 10, 26, 47, 56, 59], "depth\uc640": 5, "\uc870\uac74\ubd80": [5, 21, 31, 35, 39], "ip": [5, 17], "objectstitch": 5, "edit": [5, 6, 18, 22, 23, 24, 33, 37, 43, 45, 55, 56, 59], "\ubc29\ubc95": [5, 6, 7, 12, 15, 18, 19, 20, 21, 28, 34, 35, 39, 45, 49, 51, 52, 54, 57, 62], "tryondiffus": 5, "virtual": 5, "apparel": 5, "try": 5, "on\uc744": 5, "parallel": [5, 16, 57], "u": [5, 6, 8, 14, 15, 16, 17, 18, 19, 21, 28, 29, 31, 32, 34, 36, 39, 45, 46, 50, 52, 56, 58, 59], "t2v": [5, 31], "inter": [5, 32], "frame": [5, 17, 32], "modeling\uc744": 5, "\uc774\ub904\uc9d0": 5, "\uc0bd\uc785\ud55c": 5, "animatediff": [5, 17], "person": [5, 9, 11, 12, 20, 24, 43, 55], "data\ub85c": [5, 11, 17, 29], "\ud559\uc2b5\uc2dc\ud0a8": [5, 10, 19, 23, 29, 39, 45], "anyone\uc5d0\uc11c\ub294": 5, "modeling\uc5d0": 5, "\ubc1b\uc544": [5, 10, 14, 17, 31, 35, 39, 42, 49, 57], "\ubc29\ubc95\ub860": [5, 18, 19, 20, 27, 29, 30, 31, 54], "i2v": 5, "videocompos": 5, "condit": [5, 6, 7, 11, 12, 14, 19, 20, 22, 25, 26, 31, 32, 33, 35, 37, 39, 42, 44, 45, 46, 49, 50, 53, 54, 59], "latent\uacfc": 5, "\uac04": [5, 14, 21, 27], "mix": [5, 13, 22, 42], "videocraft": 5, "textual": [5, 11, 18, 24, 26, 36, 50], "\ud1b5\ud569\ud558\uc5ec": 5, "attention\uc5d0": [5, 17, 22], "\uc8fc\uc785": [5, 6, 40], "\ubc29\ubc95\ub4e4": 5, "\uc548\uc815\uc801\uc778": [5, 39, 47], "\uc0ac\ub78c": [5, 17, 21, 31, 38, 47, 52], "\uc0dd\uc131\uc5d0\ub294": [5, 17, 22], "pidm": [5, 21], "lfdm": 5, "leo": 5, "\ubaa9\ud45c": [5, 8, 11, 12, 21, 43, 47], "animation\uc744": 5, "guid": [5, 6, 9, 16, 19, 21, 27, 37, 46, 53], "\ud569\uc131": [5, 19, 21, 26, 34, 35, 42], "func": 5, "embed": [5, 7, 8, 10, 11, 12, 14, 15, 18, 19, 21, 22, 23, 26, 30, 31, 32, 33, 39, 40, 44, 48, 49, 50, 59, 61], "timestep": [5, 10, 11, 12, 15, 16, 19, 20, 27, 45, 50, 54, 59, 61], "vit": [5, 7, 15, 17, 18, 22, 23, 26, 28, 35, 38, 45, 50], "14": [5, 6, 18, 22, 26, 35, 39, 45, 54, 58], "downsampl": [5, 6, 14, 22, 29, 31, 38, 49, 53, 56, 58, 59], "upsampl": [5, 14, 22, 23, 31, 33, 39, 44, 49, 52, 56, 59], "re": [5, 7, 26, 33], "tran": 5, "block\ubcc4": 5, "attention\ub85c": 5, "\uad6c\uc131": [5, 17, 18, 19, 24, 31, 33, 34, 35], "overview": [5, 8, 12, 16, 17, 18, 20, 28, 34, 36, 46, 60], "3\uac00\uc9c0": [5, 8, 12, 17, 24, 52, 53, 54, 56, 59], "\uc694\uc18c": [5, 20, 24, 31, 36], "image\ub85c\ubd80\ud130": [5, 23], "character\uc758": 5, "\uc81c\uc5b4\uac00\ub2a5\ud55c": [5, 17], "movements\ub97c": 5, "signal": [5, 12, 13, 58, 61], "\uc5f0\uc18d\uc131\uc744": [5, 22], "relationship": [5, 23, 28, 33], "text\ubcf4\ub2e4": 5, "image\uac00": [5, 16, 23, 47], "level": [5, 12, 16, 19, 28, 33, 37, 38, 41, 44, 57, 59, 61], "\ub0b4\ud3ec\ud568": 5, "encoder\uac00": [5, 39], "encoder\ubcf4\ub2e4": 5, "\uc0ac\uc6a9\ub418\uc5c8\uc9c0\ub9cc": 5, "consistency\uc5d0\ub294": 5, "\uc5ed\ubd80\uc871": 5, "\uc774\uc720": [5, 6, 11, 19, 25, 34], "encoder\ub294": [5, 26, 38, 39, 42, 60], "224x224\uc758": 5, "\uc774\ubbf8\uc9c0\ub4e4\ub85c": 5, "\uad6c\uc131\ub418\uc5b4": [5, 15, 28, 59], "\uc138\ubd80\uc815\ubcf4": 5, "\uc190\uc2e4\uc774": [5, 21, 29, 54, 56], "clip\uc740": [5, 16, 23, 26], "text\uc5d0": [5, 36, 43, 48], "\ub354\uc6b1": [5, 8, 12, 20, 26, 31, 32, 35, 39, 42, 48, 52], "\ubd80\ud569\ud558\uac8c": 5, "\ud6c8\ub828\ub418\uc5b4": [5, 24, 26], "matching\uc5d0": 5, "\uac15\uc870\ub418\uace0": 5, "encoding\uc5d0": [5, 22], "\ubd80\uc871\ud568\uc774": 5, "extract": [5, 14, 36], "network\uc778": [5, 16], "\uace0\uc548": 5, "\uc81c\uc678": 5, "referencenet\uc740": 5, "sd\ub85c": 5, "\ucd08\uae30\ud654\ud558\uace0": 5, "\uc218\ud589\ud558\uace0": 5, "unet\uacfc": [5, 21, 29], "layer\ub85c": [5, 31], "x_1": [5, 14, 36, 39], "x_2": [5, 39], "\uc8fc\uc5b4\uc84c\uc744": [5, 6, 17, 18, 19, 21, 29, 31, 54], "t\ubc88": 5, "\uacf1\ud574": 5, "w\ucd95\uc5d0": 5, "concat": [5, 11, 12, 15, 32, 34, 38, 44], "attention\uc744": [5, 26], "map\uc758": [5, 22, 29, 40], "\ubc18\uc744": 5, "\uacb0\uacfc\ub85c": [5, 9, 28, 31, 42], "\ubf51\uc74c": [5, 48], "sd\ub97c": [5, 26, 38], "\uc0ac\uc6a9\ud568\uc5d0": 5, "\ucd08\uae30\uac12\uc774": 5, "\uc815\uc758": [5, 6, 13, 25, 58], "\ub41c": [5, 6, 9, 12, 20, 24, 27, 30, 32, 33, 34, 36, 38, 42, 45, 46, 47, 50, 54, 55, 56, 57, 59], "\uc0ac\uc6a9\uac00\ub2a5": 5, "referencenet\uc758": 5, "\uacf5\uc720\ub418\uace0": 5, "\ub124\ud2b8\uc6cc\ud06c": [5, 18, 21, 31, 34, 60], "\uac00\uc9d0\uc5d0": 5, "unet\uc740": [5, 26], "space\uc5d0": [5, 16, 54], "\uc0c1\uad00\uad00\uacc4\uac00": [5, 42, 58], "\uc120\ubcc4\uc801\uc73c\ub85c": 5, "\uc81c\uacf5\ud568\uc5d0": 5, "\uc2e0\uc18d\ud55c": 5, "\ucd08\uae30\uac12": 5, "\uc124\uc815": [5, 6, 14, 19, 30, 35, 42, 48], "controlnet\uc740": [5, 26], "\uacf5\uac04\uc801\uc73c\ub85c": [5, 21, 28], "align\ub41c": 5, "\ubd80\uc801\ud569": 5, "\ubc29\ubc95\uc5d0\uc11c\ub294": [5, 19], "\uacf5\uac04\uc801\uc73c\ub85c\ub294": 5, "\uad00\uacc4\ub418\uc5b4\uc788\uc9c0\ub9cc": 5, "align\ub418\uc9c0": 5, "\ud0c0": [5, 8, 46, 47, 58], "generation\uc5d0\uc11c\ub294": [5, 7], "frame\uc5d0": 5, "denoising\uc744": [5, 16, 25, 28, 37], "\ucd94\ucd9c\ud560": 5, "\ubc88\ub9cc": [5, 24], "\ud544\uc694": [5, 13, 18, 19, 27, 31, 34, 35, 49, 54], "\ud6a8\uacfc": [5, 17, 21, 27, 34, 40, 47, 52], "\ub2e8\uacc4\uc5d0\uc11c": [5, 8, 14, 20, 21, 24, 26, 28, 32, 38, 43, 61], "\uacc4\uc0b0\ub7c9\uc774": [5, 51], "\uc99d\uac00\ud558\uc9c0": 5, "\uc54a\ub294\ub2e4": [5, 16, 21, 26, 28, 31, 35, 43, 44, 49], "robust\ud55c": 5, "\uc785\uc99d\ud574\uc654\uc9c0\ub9cc": 5, "tuning\uc774": [5, 42, 54], "\ud544\uc694\ud588\uc5c8\ub2e4": 5, "\uc800\uc790\ub4e4\uc740": [5, 12, 15, 16, 19, 20, 23, 28, 32, 34, 35, 38, 42, 43, 48, 49, 52, 57], "\uacc4\uc0b0\ub7c9": 5, "\ub9c9\uae30\uc704\ud574": 5, "\ud1b5\ud569\ud558\uc9c0": 5, "latent\uc640": 5, "\ud574\uc0c1\ub3c4\ub97c": [5, 34, 38, 41, 44, 53], "align\uc744": [5, 26], "four": 5, "kernel": [5, 22], "stride": [5, 22, 47, 56], "32": [5, 7, 13, 14, 18, 22, 29, 30, 33, 41, 42, 45, 48, 49, 55, 59], "64": [5, 18, 19, 20, 22, 31, 34, 41, 42, 52, 58, 59], "128": [5, 20, 22, 33, 34, 39, 42, 47, 51, 56, 58], "channel": [5, 10, 14, 31, 32, 33, 41, 49, 53, 59], "final": [5, 14, 23], "\uc774\ubbf8": [5, 11, 20, 47, 52], "\uacf3\uc5d0\uc11c": 5, "\ud1b5\ud569\ud588\uc744": 5, "dependency\uac00": 5, "\uc548\uc5d0": [5, 40, 52], "attention\uacfc": [5, 26, 39], "\uc21c\uc11c": 5, "reshap": [5, 15], "connect": [5, 15, 31, 34, 46, 52, 56], "details\uc5d0": 5, "continu": [5, 6, 34, 39, 46, 50, 56, 60], "\ub2e8\uacc4": [5, 9, 14, 18, 19, 21, 29, 35, 51], "\uccab": [5, 6, 9, 10, 16, 21, 33, 34, 38, 41, 47], "\ubc88\uc9f8": [5, 6, 9, 10, 18, 19, 21, 34, 35, 38, 41, 42], "singl": [5, 20, 22, 29, 34, 36, 39, 40, 43, 46, 54, 55, 56, 57, 59, 61], "\ubc1b\ub294": [5, 12, 14, 21, 31, 41], "\ud074\ub9bd\uc5d0\uc11c": 5, "\ub79c\ub364\uc73c\ub85c": [5, 18, 21, 22], "\uc120\ud0dd": [5, 14, 19, 31, 34, 35, 48], "weight\ub294": [5, 10, 21, 27, 30], "guider\ub294": [5, 17], "\uadf8\ub300\ub85c": [5, 12, 16, 23, 27, 28, 31, 45, 49, 59], "\ud6c8\ub828\ud55c": [5, 18], "\uc18d": [5, 21], "layer\ub9cc": [5, 30], "24frame": 5, "10": [5, 6, 11, 14, 15, 17, 19, 20, 24, 25, 29, 32, 33, 34, 36, 37, 38, 39, 42, 44, 45, 46, 47, 49, 51, 54, 55, 58, 59, 61], "second": [5, 12, 20, 36, 46], "long": [5, 14, 17, 50, 55], "\uc778\ud130\ub137\uc5d0\uc11c": [5, 17], "\ub2e4\uc6b4\ub85c\ub4dc": 5, "dwpose": 5, "distil": [5, 18, 20, 22, 39, 49, 55, 56], "whole": 5, "bodi": [5, 17, 59], "idea": [5, 37], "research": [5, 33, 42, 53, 58], "student": [5, 57, 58], "head": [5, 12, 14, 31, 40, 49], "onli": [5, 7, 13, 20, 26, 34, 38, 40, 44, 48], "nvidia": [5, 20, 21, 29, 34, 50, 55, 59], "a100": [5, 21, 22, 29, 30, 50, 54, 55], "768": [5, 22, 54], "\ud574\uc0c1\ub3c4": [5, 15, 44], "center": [5, 17, 20, 26, 34, 49], "crop": [5, 17, 26, 29], "30": [5, 20, 31, 32, 39, 42, 44, 53], "000": [5, 6, 19, 38, 45, 54], "size": [5, 7, 17, 20, 22, 26, 28, 30, 33, 34, 38, 41, 47, 51, 52, 53, 55, 58, 59, 60], "learn": [5, 7, 12, 15, 17, 19, 20, 21, 22, 26, 28, 30, 31, 34, 39, 40, 41, 42, 43, 47, 52, 55, 57, 58, 59, 61], "rate": [5, 7, 11, 13, 15, 17, 21, 26, 29, 31, 34, 40, 46, 47, 55, 58, 59, 61], "\uce90\ub9ad\ud130": [5, 18, 24], "skeleton\uc758": 5, "\uae38\uc774\uc5d0": 5, "\uadfc\uc0ac\ud558\uae30": 5, "\uc720\ub3c4\ub41c": [5, 47], "rescal": [5, 38, 46, 49], "sampler": [5, 26, 42, 58], "\uae34": [5, 17, 41, 44], "aggreg": 5, "\ucc44\ud0dd": [5, 8, 11, 49], "evalu": [5, 6, 11, 12, 13, 18, 22, 23, 44, 46, 53, 55, 56], "benchmark": [5, 28, 47, 52, 53, 57], "2\uac1c": [5, 14, 21, 33], "ubc": [5, 21], "tik": 5, "tok": 5, "\uc804\uc2e0\uc774": 5, "\ub098\uc624\ub294": [5, 27, 34, 35, 40, 42, 47, 48, 49, 50, 54], "\uc808\ubc18": [5, 28], "\uae38\uc774\uc758": 5, "portrait": [5, 12, 40], "cartoon": [5, 18], "humanoid": 5, "characters\uc5d0": 5, "\ubcf4\uc774\ub294": [5, 11, 13, 18, 22, 25, 36, 37, 47, 56, 57, 58], "psnr": [5, 32], "lpip": [5, 6, 16, 22, 32, 46, 56, 61], "fvd": [5, 31, 44], "fr\u00e9chet": 5, "distanc": [5, 19, 22, 31, 34, 35, 46, 48, 55], "quantit": [5, 6, 23, 26, 28, 32, 54, 59, 61], "500": [5, 11, 20, 21, 42], "videos\ub85c": 5, "\uc57d": [5, 18, 21, 23, 24, 43, 47, 48], "bdmm\uc740": 5, "\uc637\uc758": [5, 21], "\uc783\uc5b4\ubc84\ub9ac\ub294": [5, 35, 40], "\uc0c9\uacfc": [5, 36], "\uc12c\uc138\ud55c": [5, 36], "\uad6c\uc870\uc801": [5, 7, 17], "\uc694\uc18c\uc5d0": [5, 36], "error": [5, 24, 45, 58, 60], "\uc138\ubd80": [5, 8, 18, 24, 31, 35, 39, 43], "\ub0b4\uc6a9\uae4c\uc9c0": 5, "\uc77c\uad00\uc131\uc788\uac8c": [5, 12], "\ubcf4\uc874\ub428": 5, "tiktok": 5, "340": 5, "between": [5, 12, 14, 23, 34, 48, 52, 58], "disco\uc5d0\uc11c\ub294": 5, "foreground": [5, 20, 28], "mask\ub97c": [5, 28, 40], "subject": [5, 21, 24, 50, 55], "motion\uc73c\ub85c\ubd80\ud130": 5, "\uc804\uacbd\uacfc": 5, "\ubc30\uacbd\uc758": 5, "\uad6c\ubd84": 5, "sequence\uc5d0\uc11c\ub3c4": 5, "\uc2dc\uac01\uc801\uc73c\ub85c": [5, 9, 48], "robust": [5, 6, 12, 16, 32, 45, 59], "gen": [5, 50], "image\uc5d0": [5, 8, 14, 16, 23, 26, 43], "\uc678\uad00": [5, 17, 21], "\uc2e0\ub8b0\ub3c4\ub9cc": 5, "\uc5bc\uad74\uc774": 5, "\uc77c\uad00\ub418\uac8c": [5, 9, 18], "\uc720\uc9c0\ub418\ub294": [5, 12], "\ubb38\uc81c\uc5d0": [5, 14, 35], "\ubd09\ucc29\ub41c": 5, "\uc0c1\ud669": [5, 19], "\uc18d\uc5d0\uc11c": 5, "\ub300\ube44": [5, 19, 29, 54], "\uc2dc\uac04\ub3d9\uc548": 5, "apper": 5, "design": [5, 22, 41, 51, 60], "\ud6a8\uacfc\uc131": 5, "\uc99d\uba85\uc744": [5, 16], "encoder\ub9cc": 5, "\uacb0\ub860": [5, 42, 45], "referencenet\ub97c": 5, "\uc88b\uc558\ub2e4": [5, 45], "\uac83\uc5d0": [5, 10, 28, 29, 33, 36, 43, 47], "\uce21\uba74\ub9cc": 5, "\ubcf4\uc774\uae30": 5, "\ubcf4\uc774\uc9c0": [5, 20, 61], "\ubd80\ubd84\uc5d0": [5, 9, 22, 28, 41, 42, 46, 60, 61], "ill": 5, "problem\uc73c\ub85c": [5, 27, 36], "\ubd88\uc548\uc815": [5, 39], "\ud65c\uc6a9\uc5d0": 5, "non": [5, 6, 12, 16, 20, 22, 25, 33, 34, 45, 49, 55, 59], "oper": [5, 31, 46], "effici": [5, 10, 12, 13, 30, 34, 45, 52, 55], "translat": [6, 8, 20, 29, 34], "brownian": [6, 46], "bridg": 6, "cvpr": [6, 11, 19, 22, 25, 29, 33, 41, 50, 53, 55, 62], "2205": [6, 52], "07680": 6, "xuekt98": 6, "seonhoon": [6, 12, 45, 62], "relat": [6, 12, 32, 45, 62], "youtub": [6, 12, 30, 48], "nov": [6, 9, 16, 31, 32, 44, 45, 58], "\ub3c4\uc785\ud55c": [6, 24, 29, 35, 38], "\ucd5c\ucd08\uc758": [6, 12, 34], "\ud55c\uacc4\ub97c": [6, 22, 31, 33, 40, 47], "\uadf9\ubcf5\ud568": 6, "\uc774\ud574\ud558\uae30": 6, "\uc774\ud574\ud574\uc57c\ud568": 6, "stochast": [6, 12, 13, 23, 36, 37, 46, 51, 54, 56, 58], "\ud574\ub2f9\ud568": 6, "\uc2dc\uac04\uc758": 6, "\ud750\ub984\uc5d0": 6, "\ubd88\ud655\uc2e4\uc131\uc744": 6, "\ubcc0\ud558\ub294": [6, 16], "\ubcc0\uc218\ub4e4\uc758": 6, "\uc9d1\ud569": [6, 14, 18, 26, 34], "x_t": [6, 12, 14, 16, 23, 26, 27, 29, 35, 36, 39, 45, 46, 49, 54, 57, 59], "\ubcc0\uc218\ub97c": [6, 28], "\ubcc0\uc218\uac00": 6, "\uad00\ucc30\ub41c": [6, 34], "\uc2dc\uac04\uc744": [6, 12, 21, 29, 35], "\ub098\ud0c0\ub0c4": [6, 14, 18, 19, 42], "discret": [6, 33, 39, 46, 48, 58], "\uad6c\ubd84\ud560": 6, "variabl": [6, 16, 19, 51, 58, 60], "wiener": 6, "\uc18c\uac1c": [6, 11, 12, 19], "\uc720\uccb4\uc758": 6, "\ubbf8\uc18c\uc785\uc790\uac00": 6, "\ubd88\uaddc\uce59\ud558\uac8c": 6, "\uc6b4\ub3d9\ud558\ub294": 6, "\ud604\uc0c1": [6, 18, 22, 29, 34, 50], "\uad74\ub69d\uc5d0\uc11c": 6, "\ud37c\uc838\ub098\uac04": 6, "\uc5f0\uae30": 6, "\uc624\ub978\ucabd\uc73c\ub85c": 6, "90\ub3c4": 6, "\ud68c\uc804\uc2dc\ud0a8": 6, "\uc0ac\uc9c4\uc73c\ub85c\ubd80\ud130": 6, "\uc9c1\uad00\uc801\uc73c\ub85c": [6, 39, 57], "\uc774\ud574\ud574\ubcfc": 6, "\uc5f0\uc18d": [6, 21], "\uacfc\uc815\uc73c\ub85c": [6, 44, 45, 50], "\ubaa8\ub378\ub9c1\ud55c": [6, 48], "w_0": [6, 11, 27, 30], "max": [6, 9, 27, 30, 54], "1000": [6, 13, 14, 20, 34, 38, 42], "\uc778": [6, 7, 12, 16, 29, 32, 36, 45, 46, 54, 57, 58], "100\ubc88": 6, "w_t": [6, 18, 46, 50], "\ub098\ud0c0\ub0b8\ub2e4": [6, 16, 43], "\uc774\ud574\ud574\ubcf4\uc790": 6, "\uac00\uc815\ud574\ubcf4\uc790": 6, "\uc774\ub77c\uace0": [6, 10, 12, 16, 43, 46], "\ud558\uc790": [6, 28, 36], "\ud558\ub2e4\uace0": 6, "\uc815\uc218": [6, 38], "\ubd80\uc5ec\ub418\uc5b4\uc57c": 6, "\uac04\uaca9\uacfc": 6, "\ubcc0\ud654\ub7c9\uc774": [6, 11, 58], "\ube44\ub840\ud574\uc57c": 6, "\uc624\ub798": [6, 35, 43], "\uc9c0\ub0ac\uc744\uc218\ub85d": 6, "\ubcc0\ud55c\ub2e4": 6, "notat": [6, 44, 58], "www": [6, 12, 15, 30, 48], "com": [6, 12, 15, 30, 35, 39, 43, 47, 48, 50], "watch": [6, 12, 30, 48], "ld0rxwajpkm": 6, "ab_channel": [6, 12], "finrgb": 6, "delta": [6, 11, 16, 20, 24, 27, 30, 36, 46, 55, 56, 57], "\uac04\uaca9": 6, "\uc0b4\ud3b4\ubcf4\uace0\uc790": 6, "\uac04\uaca9\uc758": [6, 34], "epsilon_t": [6, 16, 23], "\uc2dc\uc810\uc5d0\uc11c": [6, 8, 18], "\uac04\uaca9\uae4c\uc9c0": 6, "\uc99d\uac00\ud55c": [6, 28, 42], "\uac12": [6, 15, 19, 20, 22, 26, 34, 39, 46, 48], "w_": [6, 18, 38, 50], "\uc774\ud574": [6, 12, 28, 31], "\ub77c\uace0": [6, 8, 11, 12, 16, 24, 27, 30, 32, 33, 36, 43, 45, 52, 54, 60], "\uc815\uc758\ud574": 6, "\uadfc\uac70\ub97c": 6, "\ucc3e\uc544\ubcf4\uba74": 6, "\ubcc0\uc218": 6, "\ub3c4\uc785\ud568\uc73c\ub85c\uc368": 6, "\uac04\uaca9\ub3c4": 6, "\uace0\ub824": [6, 49], "\uadf8\ub807\ub2e4\uba74": [6, 12, 57], "\uc65c": [6, 10, 12, 15, 16, 25, 32, 45, 57], "\ud558\ud544": 6, "\uacf1\ud588\uc744\uae4c": 6, "\uac00\uae4c\uc6cc\uc9c8": 6, "\ucc9c\ucc9c\ud788": 6, "\uc218\ub834\ud568": 6, "\ub9cc\uc57d": [6, 42], "\ud558\ub2e4\uba74": 6, "\ub77c\uba74": 6, "\uc791\uc544\uc9d0": 6, "\ucee4\uc9c8": [6, 36], "\ucee4\uc9d0": 6, "\uc8fc\uc758\ud560": 6, "\uc0ac\ud56d": [6, 19], "\uc774\ubbc0\ub85c": [6, 12, 16, 34, 46, 49, 58], "w_1": 6, "\uc11c\ub85c": [6, 9, 12, 16, 18, 28, 32, 36, 41, 45, 47], "\ub3c5\ub9bd\uc778": 6, "\ub9de\uc9c0\ub9cc": 6, "\ub3c5\ub9bd\uc774\ub77c\ub294": 6, "\ub9d0\uc740": [6, 20, 47], "\uc544\ub2d8": [6, 20], "epsilon_0": 6, "var": 6, "\uacf5\ubd84\uc0b0\uc740": 6, "\ud30c\ub780\uc0c9": [6, 42], "\uc810\ub4e4\uc740": [6, 20], "1\ubc88": [6, 47], "\uacb0\uacfc\uc784": [6, 29], "\uae4c\uc9c0": [6, 28, 30, 32, 34, 36, 44, 45, 53, 54, 58], "\uc218\ud589\ud558\uba74": 6, "\ub9cc\ud07c": [6, 16, 38, 45, 48], "t_2": [6, 27, 46, 54], "t_1": [6, 27, 46, 54], "5\ubd84": [6, 50], "10\ubd84\uc73c\ub85c": 6, "\uc9c4\ud589\ud558\uba74": [6, 20], "w_5": 6, "\uc544\ub2d0": 6, "\uc788\uc73c\ub098": [6, 20, 26, 29, 31], "\ubcc0\ud654\ub7c9": [6, 20], "t_": [6, 16, 18, 20, 22, 27, 34, 37, 46, 54], "t_5": 6, "\ub530\ub978\ub2e4": [6, 35], "standard": [6, 13, 15, 22, 41, 45, 46, 47, 56, 58], "\uc2dc\uc810\uacfc": 6, "\uc2dc\uc810\uc758": [6, 13, 21, 49], "\uc77c": [6, 12, 19, 51, 54, 58], "\uc810\uc744": [6, 16, 21, 28, 35, 37, 40], "\uc120\ud615\uc73c\ub85c": 6, "\uc5f0\uacb0\ud558\ub294": 6, "\uc774\ud574\ub97c": [6, 16, 26], "probabl": [6, 19, 27, 37, 46, 57], "start": [6, 19, 20], "state": [6, 26, 35, 58, 59], "end": [6, 7, 14, 21, 36, 39, 41, 56, 57], "\ub418\uc5b4": [6, 8, 12, 15, 18, 24, 28, 29, 33, 41, 46, 51, 59, 61], "\uc815\uc758\ub420": 6, "\uc2dc\uc791\uac12\uacfc": 6, "123": 6, "\ubd84\uc0b0\uc740": 6, "\uc2dc\uc791\ud574\uc11c": [6, 14, 41], "\uc99d\uac00\ud558\ub2e4\uac00": 6, "\ucd5c\ub300\uac00": 6, "\ub418\uc5c8\ub2e4\uac00": 6, "\uc774\ud6c4\ub85c\ub294": 6, "\uac10\uc18c\ud558\uc5ec": [6, 28, 57], "\ub9c8\uc9c0\ub9c9\uc5d4": 6, "\uc218\ub834\ud558\uac8c\ub41c\ub2e4": 6, "w_1000": 6, "100\uac1c\uc758": [6, 21], "\uc0d8\ud50c\ub9c1\ud55c": [6, 18, 46], "abstrcat": 6, "\uae30\uc874\uc758": [6, 10, 12, 16, 21, 27, 31, 32, 34, 37, 42, 47, 54, 56, 59], "\ub4e4\uc740": [6, 12, 20, 32, 36, 52], "\ubcc0\ud658\uc744": [6, 18, 29, 39], "gener": [6, 10, 12, 17, 19, 22, 23, 24, 25, 26, 27, 28, 31, 32, 33, 38, 39, 40, 41, 43, 45, 47, 48, 49, 50, 51, 52, 56, 57, 58, 59, 62], "\ub2e4\ub8f8": [6, 34], "\uc774\ub85c": [6, 24, 52], "\uc0c1\uc774\ud55c": 6, "\ubcc0\ud658": [6, 8, 15, 18, 34, 48, 57], "\uc5d0\ub294": [6, 12, 14, 32, 47, 52, 54], "\uc5b4\ub824\uc6c0": [6, 8, 11, 15, 17, 18, 19, 22, 35, 51, 57], "\uae30\ubc18\ud55c": [6, 7, 13, 19, 34], "\ubaa8\ub378\ub9c1\ud558\ubbc0\ub85c": 6, "bidirect": 6, "\uc784": [6, 45], "\ubcc0\ud658\uc5d0": 6, "\uc811\ubaa9\ud55c": 6, "\ub17c\ubb38\uc784": 6, "\ud6cc\ub96d\ud55c": [6, 45, 48, 52], "\uc2e4\ud5d8\uc801\uc73c\ub85c": [6, 16, 39, 41, 46, 52], "\uc99d\uba85\ud568": [6, 15, 52], "introduct": 6, "i2i": 6, "\ubcc0\ud658\uc5d0\uc11c": 6, "pix2pix": [6, 47, 56], "fideltii": 6, "\ub192\uc558\uc73c\ub098": 6, "output": [6, 7, 10, 12, 14, 15, 17, 18, 19, 22, 30, 34, 39, 44, 46, 47, 48, 50, 53, 56, 57, 58], "\uc0dd\uc131\ud615": [6, 12, 51], "\uc548\ub098\uc624\uace0": 6, "applic": 6, "\uc2dc\ud0b4\uc73c\ub85c\uc368": [6, 32], "desir": [6, 31, 36, 57], "\ucd94\ub860\ud574\ub0b8\ub2e4\ub294": 6, "\uba85\ub8cc\ud55c": 6, "\uc774\ub860\uc801": 6, "\uadfc\uac70\uac00": [6, 20], "\uc548\ub418\ubbc0\ub85c": 6, "domain": [6, 16, 47, 56, 58], "\uba87\uba87": [6, 16, 23, 25, 35], "\uc5d0\uc11c\ub9cc": [6, 34], "\ud65c\uc6a9\ub420": [6, 12], "inpaint": [6, 26, 32, 33, 46, 53, 59], "super": [6, 13, 14, 22, 31, 41, 42, 44, 46, 47, 51, 53, 56, 59, 60], "\uac1c\uc120": [6, 22, 25, 34, 42, 49, 59], "\ud558\uae34": 6, "\ud588\uc73c\ub098": [6, 20], "mechan": [6, 11, 31, 61], "multi": [6, 7, 11, 12, 17, 20, 21, 22, 26, 28, 43, 52, 56, 57, 59], "modal": [6, 7, 12, 28, 43, 52], "\uc8fc\uc5b4\uc9c0\ubbc0\ub85c": 6, "\uc81c\uc2dc\ud558\uae30\uac00": 6, "\ud798\ub4e6": 6, "\uc544\ud0a4\ud14d\uccd0": [6, 12, 45, 48, 49, 52], "\uac00\uc18d\uc744": 6, "\uc218\ud589\ud568": [6, 31], "work": [6, 12, 45, 55, 62], "duffus": 6, "simplifi": 6, "\uc7a0\uae50": 6, "\uac19\uc74c": [6, 32, 43], "\uc0bd\uc785\ub41c": 6, "\ub4dc\ub7ec\ub098": 6, "\uc788\uc9c0": [6, 24, 25, 35, 44, 61], "\uc54a\uc73c\ubbc0\ub85c": [6, 35, 36], "\ub3c4\ub2ec\ud560": [6, 35], "\ubcf4\uc7a5\uc774": 6, "\ub3d9\uc548\uc758": 6, "\ubd84\ud3ec\uac00": [6, 34, 36, 37, 47, 60], "\uc2dd": [6, 19, 21, 28, 36, 49], "\ubcf4\uc558\ub358": 6, "\ubd84\uc0b0\uc744": [6, 14, 42, 52], "\uad6c\ud574\ubcf4\uc790": 6, "\uc2dd\uacfc": [6, 28, 39, 46], "\uc758\ubbf8\uc784\uc744": 6, "method": [6, 12, 13, 28, 34, 41, 45, 46, 48, 49, 58], "\ud5a5\ud574": [6, 36], "vqgan": 6, "\uc601\uc0c1\uc758": [6, 54], "total": [6, 59], "\u03b4_t": 6, "\ubd84\uc0b0": [6, 14, 42, 49, 60], "\ub098\ud0c0\ub09c": [6, 18, 35], "\uc0ac\uc6a9\ud558\uac8c": [6, 61], "\ucd5c\ub300": [6, 9, 20, 30, 34, 48], "\ubd84\uc0b0\uac12": 6, "\uc5d0\uc11c\uc758": [6, 18, 22, 44, 45, 46, 55, 56, 58, 59, 61], "\ubd84\uc0b0\uac12\uc778": 6, "\u03b4_": 6, "\ucee4\uc9c0\uba74": [6, 36], "\ubd84\uc0b0\uac12\ub3c4": 6, "\ucee4\uc9c0\ub294\ub370": 6, "\ub2e4\ub8e8\uae30\uc5d0": 6, "\ud07c": [6, 11], "\uc774\uba74\uc11c": 6, "\ub3c5\ub9bd\uc77c": 6, "schedul": [6, 14, 18, 25, 33, 36, 45, 46, 47, 49, 50, 58, 59], "\ud574\ubcfc": [6, 29, 50], "\uc815\uc218\uc758": 6, "\ucd5c\ub313\uac12\uc778": 6, "\uc774\ub77c\uba74": [6, 12], "delta_t": [6, 39], "\uadf8\ub9bc\uacfc": [6, 10, 20, 26, 27, 33, 39, 41, 48, 51, 52, 60, 61], "\uac19\uac8c": 6, "\uc2dc\uac01\ud654\ud55c": 6, "m_t": 6, "overt": 6, "\uc2dc\uc791\ud558\ub294": [6, 36], "\uc5d0\uc11c\ub294": [6, 9, 12, 15, 16, 23, 25, 26, 28, 33, 44, 45, 50, 51, 52, 55, 56, 57, 61], "m_0": 6, "\ud3c9\uade0\uc740": 6, "\ub05d\ub098\ub294": 6, "\ubd84\uc0b0\uc774": [6, 29], "\uc911\uac04": [6, 8, 9, 21, 25, 35, 43, 49], "\uc9c0\uc810\uae4c\uc9c0\ub294": [6, 28], "\uc99d\uac00": [6, 15, 18, 22, 29, 49, 57], "\ud558\ub2e4\uac00": 6, "\uc9c0\uc810\ubd80\ud130": 6, "\uac10\uc18c": [6, 18, 19, 34], "\ubd84\uc0b0\uac12\uc5d0": 6, "\uc758\ud574": [6, 9, 18, 21, 33, 34, 35, 36, 39, 41, 46, 47, 50], "\uacb0\uc815": 6, "\uc2a4\ucf00\uc77c\ub9c1\ud558\ub294": 6, "\ub450\uc5b4": [6, 26, 47], "\uc870\uc808": 6, "\uc870\uc808\uc744": [6, 36], "\uacc4\uc218": [6, 20], "\ud3ec\ud568\ub41c": [6, 28, 34, 35, 36], "\ub514\ud3f4\ud2b8": 6, "margin": [6, 19, 36, 51, 60], "\ub9cc": [6, 11, 12, 14, 16, 26, 30, 32, 33, 44, 45, 47, 50, 54, 56, 58], "\uc81c\uacf5": [6, 12, 18, 32], "\uc11c\ub294": 6, "transit": [6, 19], "q_": [6, 16, 48, 54, 60], "bb": 6, "x_": [6, 11, 13, 14, 16, 18, 19, 22, 23, 27, 39, 40, 46, 49, 50, 54, 58], "\uc54c\uc544\uc57c\ud568": 6, "\ub54c\uc758": [6, 12, 15, 16, 21, 38, 42], "\uc4f8": [6, 12, 16, 26, 51], "m_ty": 6, "m_": 6, "\uc4f0\ub294": 6, "\uc633\uc74c": 6, "\ud558\uac8c": [6, 7, 12, 22, 28, 32, 38, 44, 45, 49, 51, 53, 55, 57, 58, 59, 60], "\ub300\uccb4": [6, 12, 15, 22, 30, 34], "\uc720\ub3c4\ub428": [6, 57], "delta_": [6, 14], "\ub300\uc785": 6, "\uad6c\ud558\uba74": [6, 28], "\uc778\ub370": [6, 10, 32], "\uc544": 6, "\ub3c4\uba54\uc778\uc73c\ub85c\ubd80\ud130": 6, "\ub3c4\uba54\uc778\uc73c\ub85c\uc758": 6, "fix": [6, 13, 14, 20, 24, 26], "\uc815\uc758\ud558\ub294\uad6c\ub098": 6, "\ub85c\ubd80\ud130": [6, 7, 9, 10, 12, 18, 23, 31, 32, 34, 46, 47, 50, 51, 55, 56, 58, 59, 60, 61], "\uc2dc\uc791": [6, 10, 20, 31], "\uc2a4\ud15d\ub9c8\ub2e4": [6, 38], "\uc870\uae08\uc529": [6, 12, 32, 42], "\uc81c\uac70\ud574\ub098\uac10": 6, "\ub460\uc73c\ub85c\uc368": 6, "\uc790\uccb4\uc5d0\uc11c": 6, "mu_": [6, 14, 16, 23, 36, 57], "\uc608\uce21\ub41c": [6, 15, 19, 20, 21, 24, 33, 34, 35], "\ub178\uc774\uc988": [6, 18, 21, 24, 31, 35, 38, 39, 50], "\ud3c9\uade0\uac12\uc774\uba70": 6, "tild": [6, 8, 14, 16, 20, 21, 22, 24, 25, 27, 44, 54, 58, 60], "\ub178\uc774\uc988\uc758": [6, 44], "\uc2e0\uacbd\ub9dd": [6, 10, 20, 24, 35], "\uc608\uc804": 6, "\uac19\uc558\uc74c": 6, "\uc774\ub85c\ubd80\ud130": [6, 55], "elbo": [6, 14, 48], "term": [6, 7, 25, 36, 51, 57, 60], "\uace7": [6, 12, 57], "\ub418\ubbc0\ub85c": [6, 10, 15, 26, 39], "\ubca0\uc774\uc988": 6, "\uc774\ub860\uacfc": 6, "markov": [6, 13, 14, 26, 51], "chain": [6, 12, 14, 36, 49, 50, 51], "properti": [6, 33, 46, 50], "\ub3c4\ucd9c": 6, "markovian": [6, 14, 16, 33, 49, 58], "\uc131\ub9bd\ub428\uc744": 6, "\uc815\ub9ac\ub428": 6, "\ud1b5\ud569\ud558\uace0": 6, "reparameter": [6, 48, 54], "mu_t": 6, "\ubcc0\ud615\ud560": 6, "\ubcc0\ud615": [6, 31], "\uc608\uce21\ud558\ub294": [6, 7, 14, 16, 20, 23, 24, 33, 39, 44, 54, 57, 58, 61], "\uc608\uce21\ud558\ub3c4\ub85d": [6, 14, 21, 25, 33, 34, 58], "\ud559\uc2b5\ub428": 6, "\ub0b4\uc6a9\uc744": [6, 9, 12, 17, 24, 31], "\uc2dd\uc5d0": [6, 46, 54], "\uba85\uc2dc\ud558\uae30": 6, "\uba85\uc2dc\ub41c": 6, "\uc368\ubcfc": 6, "\uc368\ubcf4\ub294": 6, "\uac83\uc784": 6, "\uadf8\ub7f0\ub370": [6, 10, 41, 57], "\ucc38\uace0\ud574\ubcf4\uba74": 6, "\uc6b0\ub9ac\ub294": [6, 12, 34, 60], "\uadfc\uc0ac\ud558\ub3c4\ub85d": 6, "\ud559\uc2b5\uc2dc\ucf1c\uc57c\ud568": 6, "mu": [6, 14, 19, 20, 27, 36, 41, 46, 49, 54, 58, 60], "_t": [6, 8, 16, 21, 26, 28, 46, 54, 59], "\uc815\ub9ac\ub41c": 6, "\ud559\uc2b5\ub418\uc5b4\uc57c\ud558\ub294": 6, "kl": [6, 14, 19, 29, 36, 39, 51, 57, 60], "arg": [6, 45, 50, 60], "min_": [6, 8, 60], "c_": [6, 9, 11, 18, 21, 22, 27, 38, 40, 46, 50, 54], "\ub2e8\uc21c\ud654\ub420": 6, "\uc815\ub9ac": [6, 35, 47], "\ub9c8\uce58": [6, 12, 28], "\uadf8\ub7ec\ud588\ub4ef\uc774": 6, "\ube60\uc9c4": 6, "acceler": [6, 13, 20, 49, 50, 57], "\uac00\uc18d\uc2dc\ud0ac": 6, "\uae38\uc774\ub97c": [6, 31], "\ub450\uc5c8\uc744": 6, "varibal": 6, "subset": [6, 21, 55, 56], "\uc815\uc758\ub428": 6, "\uac12\uc758": [6, 15, 21, 25, 54, 61], "\ub450\uc5c8\uc74c": 6, "setup": [6, 18, 37, 55], "\ud558\uc774\ud37c\ub9c8\ub77c\ubbf8\ud130": 6, "\ud504\ub808\uc784\uc6cc\ud06c\ub294": [6, 8], "\uc774\ub8e8\uc5b4\uc9d0": 6, "\uc0ac\uc6a9\ub41c": [6, 7, 9, 18, 21, 35, 47, 58], "stage": [6, 12, 17, 20, 28, 32, 36, 39, 55, 56, 59, 61], "fid": [6, 7, 12, 14, 15, 23, 25, 29, 33, 35, 38, 41, 46, 48, 49, 52, 54, 56, 58, 59], "\uc0dd\uc131\ubb3c\uc758": [6, 9], "\ud3c9\uac00\ud558\uae30": [6, 12, 28, 31, 35, 39, 42], "5\uac1c\uc758": [6, 9, 21, 22, 43], "\uc0d8\ud50c\uc744": [6, 7, 18, 23, 24, 31, 34, 35, 36, 43, 47, 60], "\ub9c8\ub2e4\uc758": 6, "\ud45c\uc900\ud3b8\ucc28\uc758": 6, "\ub300\ud574\uc11c": [6, 12, 19, 21, 23, 24, 28, 29, 31, 34, 41, 42, 43, 45, 46, 47, 50, 51, 55, 57, 58, 61], "\ud3c9\uade0": [6, 7, 9, 14, 17, 31, 39, 50, 60], "\ub0c4": [6, 33, 48], "\uc2e4\ud5d8\ud568": 6, "\ub2a5\ub825": [6, 12, 26, 32], "celebamask": 6, "hq": [6, 16, 24, 29], "layout": [6, 28, 29, 56, 61], "\uc8fc\uace0": [6, 17, 23], "photorealist": [6, 12, 23, 31, 42, 52], "\uc0dd\uc131\ud574\ub0b4\ub294": [6, 11, 12, 25, 37], "\ud3c9\uac00": [6, 7, 11, 18, 19, 29, 31, 33, 34, 35, 37, 39, 44, 50], "sketch": [6, 26, 59], "edges2sho": 6, "edges2handbag": 6, "realist": [6, 19, 28, 34, 37, 39, 53, 56, 61], "faces2com": 6, "\uc2e4\ud5d8\uc740": [6, 12, 29], "\ud3c9\uac00\ud588\ub2e4\uba74": 6, "qualit": [6, 8, 11, 24, 26, 48, 54, 55, 59, 61], "comparison": [6, 12, 14, 23, 30, 37, 40, 49, 50, 52, 55, 58], "\ucd94\ub860": [6, 12, 21, 45, 46, 48, 54], "\uc9c0\ub3c4": [6, 47], "\ud559\uc2b5\ud558\ubbc0\ub85c": [6, 26], "cyclegan": [6, 56], "\uc2a4\ucf00\uc77c\uc758": 6, "\ub5a8\uc5b4\uc9d0": 6, "drit": 6, "\ubaa8\ub378\ub4e4": [6, 39, 52], "\ub0c8\uc73c\ub098": 6, "\ubcc0\ud658\ub41c": 6, "oversmooth": 6, "\uc788\uc5c8\uace0": [6, 47], "ground": [6, 14, 17, 21, 33, 34, 39, 46, 47, 51], "truth": [6, 14, 21, 28, 33, 34, 46, 47, 51], "\uacfc\ub294": 6, "\uac70\ub9ac\uac00": [6, 47, 54], "\uba40\uc5c8\uc74c": 6, "cde": 6, "\ubaa8\ub378\ub4e4\ubcf4\ub2e4\ub294": 6, "\uc131\ub2a5": [6, 11, 12, 14, 18, 21, 22, 27, 28, 29, 30, 34, 35, 42, 45, 46, 47, 48, 49, 50, 51, 54, 57, 58, 59], "\uc601\ud5a5": [6, 49], "\ubc1b\uc74c": [6, 52], "\uc904": [6, 16, 23, 38, 39, 47, 49, 52, 57], "rregular": 6, "occlus": [6, 21], "\ub098\ud0c0\ub098\ub294\ub370": 6, "\uc9c1\uc811\uc801\uc778": [6, 51], "\ud558\ubbc0\ub85c": [6, 14, 16, 39, 60], "\ubb38\uc81c\ub85c\ubd80\ud130": 6, "\uc790\uc720\ub85c\uc6c0": 6, "\ud2b9\uc131\uc73c\ub85c": 6, "\uc0dd\uc131\ud574\ub0c4": 6, "\uc2e4\ud5d8\uc5d0\uc11c": [6, 23, 39, 47, 60], "\uae30\ub85d\ud588\uc73c\uba70": 6, "\uae30\ub85d\ud568": [6, 48], "\uc131\ub2a5\uc740": [6, 21, 25, 42, 45, 57], "\ub6f0\uc5b4\ub0a8": 6, "\uc810\uc218\uac00": [6, 7, 15, 48, 52], "\ub6f0\uc5b4\ub0ac\uc74c": [6, 15], "\uc2e4\ud5d8\ud588\uc74c": 6, "\uc5d0\uc11c\ub3c4": [6, 8, 47, 50, 55, 61], "campar": 6, "\uae30": 6, "\ub85d\ud568": 6, "\uc0c9\uc0c1\ud654": 6, "\ub4f1\uc758": [6, 14, 21, 24, 26, 34, 43, 46, 47, 54, 56, 58], "studi": [6, 9, 11, 18, 24, 40, 55, 58], "factor": [6, 15, 20, 29], "\ud588\uc74c": [6, 32, 49], "\ub450\uace0": [6, 22, 25, 27, 57, 61], "\uc791\uc744": [6, 59], "\uc870\uae08\ub9cc": 6, "\ub298\ub824\ub3c4": 6, "\ud06c\uac8c": [6, 9, 21, 24, 26, 27, 28, 29, 37, 39, 42, 43, 45, 47, 51, 53, 54, 55, 57, 58, 59, 60], "\uc774\uc0c1\uc758": [6, 21, 42, 46], "\ud0a4\uc6cc\ub3c4": 6, "\ubcc0\ud654\uac00": [6, 16, 18, 44, 47, 52], "\ubbf8\ubbf8\ud568": 6, "maximum": [6, 7, 29], "varianc": [6, 13, 14, 16, 23, 34, 42, 46, 58, 60], "\uacc4\uc218\uc5d0": 6, "\ub418\uc5c8\uc74c": 6, "\uac83\ucc98\ub7fc": [6, 32, 39, 47], "conclus": 6, "futur": 6, "\ubc29\ubc95\uacfc": [6, 21, 23, 35, 38, 42, 51], "\ub2ec\ub9ac": [6, 18, 19, 21, 23, 35, 37, 42, 51, 60], "\uc5d0\ub3c4": [6, 46], "\uc801\uc6a9\ud574\ubcfc": 6, "\uc608\uc815": 6, "\uc790\ub8cc": [6, 47], "sine": 6, "qua": 6, "none": [6, 10, 14, 31, 41, 45, 56, 59], "tistori": [6, 43], "158": 6, "autoregress": [7, 15, 26, 29, 33, 51], "scontent": 7, "gmp1": 7, "xx": 7, "fbcdn": 7, "t39": 7, "2365": 7, "358725877_789390529544546_1176484804732743296_n": 7, "_nc_cat": 7, "108": 7, "ccb": 7, "_nc_sid": 7, "3c67a6": 7, "_nc_ohc": 7, "plfu_ur_vyaax_nagu8": 7, "_nc_ht": 7, "oh": 7, "00_afdrhahxv1pcf0lqicjiynmorpvcgeq0emv5_ve2_tncvg": 7, "oe": 7, "652ff632": 7, "jun": [7, 33, 38, 39, 44], "hyoung": [7, 33, 38, 44], "lee": [7, 15, 18, 33, 34, 38, 44, 47, 51, 57, 60, 62], "oct": [7, 23, 24, 27, 37, 42, 57, 59], "\ubcf5\uc7a1\ud558\uac8c": 7, "\uad6c\uc131\ub41c": [7, 18, 21, 29, 31, 34, 35, 56, 59, 60], "\uac1d\uccb4": [7, 8, 12, 18, 28, 29, 34, 35, 53, 55], "\uc190": 7, "\ud14d\uc2a4\ud2b8\uc640": [7, 12, 21, 23, 24, 31, 39, 43], "\ub458": [7, 16, 23], "\ub2e4": [7, 16, 21, 28, 32], "\uac00\uc9c4": [7, 18, 23, 25, 29, 34, 38, 39, 40, 42, 43, 46, 47, 48, 50, 56, 59, 60], "\uac80\uc0c9": 7, "\uc99d\uac15": [7, 18], "\ud1a0\ud070": [7, 15, 18, 35], "\ub514\ucf54\ub354": [7, 9, 15, 21], "\uc804\uc6a9": [7, 35, 52], "\uba40\ud2f0": [7, 21], "\ubaa8\ub2ec": 7, "\ubaa8\ub378\uc774\ub2e4": [7, 21, 23, 26, 27, 29, 38, 39, 54], "cm3": 7, "\uc544\ud0a4\ud14d\ucc98\ub97c": [7, 21, 35, 38, 56], "\uc2a4\ud0c0\uc77c": [7, 9, 21, 47], "tun": 7, "\uac00\uc84c\ub2e4": 7, "\ub9de\ub3c4\ub85d": [7, 26, 38], "scale\uc758": [7, 22], "\ub2e8\uacc4\ub97c": [7, 16, 24, 38, 51, 55], "\ud3ec\ud568\ud55c\ub2e4": [7, 35, 38], "\ub370\uc774\ud130\ub294": [7, 31, 34], "\ub77c\uc774\uc13c\uc2a4\uac00": 7, "shutterstock\uc758": 7, "scale\ub85c": [7, 20], "sft": 7, "\uc785\ub825\uacfc": [7, 15], "\ucd9c\ub825": [7, 8, 18, 21, 34, 35, 46, 47], "\ud1a0\ud070\uc744": [7, 9, 15, 21, 33, 35, 43], "\uc11e\uc744": 7, "\ud504\ub86c\ud504\ud2b8\uc5d0": [7, 9, 28, 35], "\ub9de\ub294": [7, 9, 18, 20, 31, 33, 35, 39, 47, 50], "\uc0dd\uc131\ud558\ub294\ub370": [7, 9, 19, 39, 55, 59, 61], "cm3leon\uc740": 7, "output\uc744": [7, 8, 10, 23, 38], "contain": 7, "\uc18c\uac1c\ud55c\ub2e4": [7, 16, 21, 23, 31, 38], "iamg": 7, "segmentation\uae4c\uc9c0": 7, "3\uc5b5": 7, "\uac1c\uc758": [7, 8, 21, 24, 30, 31, 33, 34, 35, 38, 41, 43, 44, 45, 46, 47, 48, 50, 55], "\ud1a0\ud070\uc73c\ub85c": [7, 15, 33, 48], "\ud559\uc2b5\ud588\ub294\ub370": 7, "generation\ub3c4": 7, "\uc218\ud589\ud55c\ub2e4": [7, 20, 26, 45, 54], "5\ubc30\ub85c": 7, "shot": [7, 12, 19, 20, 23, 28, 31, 33, 40, 42, 48, 52, 56, 61], "coco\ub85c": [7, 52], "fid\ub97c": [7, 13, 29, 35], "\uce21\uc815\ud55c": 7, "88": [7, 33], "\uc810\uc73c\ub85c": [7, 57], "google\uc758": 7, "parti": 7, "\uc131\ub2a5\uacfc": [7, 37, 42], "\uc218\uc900\uc744": 7, "\ub2ec\uc131\ud588\ub2e4": [7, 26, 54], "ra": 7, "cm3\ub97c": 7, "\ub3c4\uba54\uc778\uc5d0\uc11c": [7, 9], "\uc5f0\uad6c\ud588\ub2e4": 7, "gafni\uc758": 7, "tokenizer\ub97c": [7, 26, 43], "tokenizer\ub294": 7, "256x256": [7, 15, 17, 23, 26, 31, 33, 38, 39, 42, 46, 53, 54, 61], "8192\uac1c\uc758": 7, "vocabulary\uc5d0\uc11c": 7, "1024\uac1c\uc758": [7, 35, 39], "\uc778\ucf54\ub529\uc744": 7, "\uc9c4\ud589\ud55c\ub2e4": [7, 16, 20, 28, 36, 44, 45], "\ud14d\uc2a4\ud2b8\uc5d0\uc11c\ub294": 7, "zhang\uc758": 7, "\ucee4\uc2a4\ud140": 7, "56320": 7, "vocabulari": 7, "\uc2a4\ud398\uc15c\ud55c": 7, "\ud1a0\ud070\uc778": 7, "break": [7, 56], "figure_8_9": 7, "modality\uac04": 7, "transition\uc744": 7, "\ubaa9\uc801": [7, 18, 21, 29, 39], "sequence\uc5d0": 7, "\uad00\ub828\uc131\uc774": 7, "\ubb38\uc11c": 7, "bank": 7, "\uac80\uc0c9\ud558\ub294": 7, "dens": [7, 20, 21, 30, 35, 39, 55], "strategy\uc744": 7, "\ucffc\ub9ac": [7, 34], "\uc608": [7, 8, 18, 21, 29], "\ud6c4\ubcf4": 7, "\uad00\ub828\uc131": [7, 9], "\uc810\uc218": [7, 8, 52], "return": [7, 10, 11, 13, 14, 30, 31, 41, 45, 51, 56, 59, 60], "retriv": 7, "\uae30\ubc18\uc778": 7, "bi": 7, "\ub530\ub790\ub2e4": 7, "karpukhin": 7, "\ubb38\uc11c\ub97c": 7, "\ud30c\ud2b8\ub85c": [7, 22], "\ubd84\ub9ac\ud558\uace0": 7, "\uc778\ucf54\ub354": [7, 9, 21], "\ubb38\uc11c\uc758": 7, "representation\ub85c\uc368": 7, "\uac1c\ub97c": [7, 34, 44], "\ub0b8\ub2e4": [7, 31, 39, 43], "\uac80\uc0c9\uc740": 7, "\uc810\uc218\uc5d0": [7, 52], "\uc815\ub82c\ub41c": [7, 18, 21, 44], "\ubaa9\ub85d\uc744": 7, "inner": [7, 30, 32], "product": [7, 12, 32], "search\ub85c": 7, "generator\ub97c": [7, 19, 22, 41, 51], "\uc720\uc6a9\ud55c": [7, 21], "\ucd94\ucd9c\ud558\uae30": [7, 20], "\uac00\uc9c0": [7, 9, 10, 12, 18, 19, 21, 24, 28, 35, 38, 41, 43, 44, 46, 47, 50, 55, 56, 58], "\uc694\uc18c\ub97c": [7, 24, 31, 42, 56], "\uace0\ub824\ud588\ub2e4": 7, "relev": [7, 13], "\uac80\uc0c9\ub41c": 7, "\ubb38\uc11c\ub294": 7, "\uad00\ub828\uc788\uc5b4\uc57c": 7, "\ud14d\uc2a4\ud2b8\ub85c": [7, 10, 18], "\ubb38\uc11c\ub85c": 7, "\ub2e4\uc591\uc131\uc740": 7, "\ubb38\uc11c\uc5d0\uc11c": 7, "\uc911\ubcf5\uc131\uc744": 7, "\ud544\uc218\uc801\uc778": 7, "\uc808\ucc28\ub2e4": 7, "\ub2e8\uc21c\ud558\uac8c": 7, "\uae30\ubc18\ud574": [7, 30], "top": [7, 9, 12, 20, 38, 45], "\ubb38\uc11c\ub9cc": 7, "\uac00\uc838\uc628\ub2e4\uba74": 7, "\uc911\ubcf5\uc774": 7, "downstream": [7, 26, 30, 39, 56], "\uc548\uc88b\uc740": 7, "\ub07c\uce60": 7, "\uc774\ud558\ub85c": [7, 57], "dropout": [7, 21], "\uac80\uc0c9\uc5d0": 7, "\ucffc\ub9ac\uc758": 7, "\uc0ad\uc81c": [7, 14], "\ub2e4\uc591\uc131\uacfc": [7, 42], "\uc815\uaddc\ud654\ub97c": [7, 41], "\ud14d\uc2a4\ud2b8\ub97c": [7, 12, 18, 21, 31, 35, 41, 42, 43], "\uac80\uc0c9\ud55c\ub2e4": 7, "\ud559\uc2b5\uc5d0\uc11c\ub294": 7, "\ucea1\uc158": [7, 23, 35], "\uc30d\uc5d0": [7, 9, 18, 21, 35, 46], "\uc0d8\ud50c": [7, 12, 18, 21, 29, 31, 34, 35, 36, 39, 41, 42], "3\uac1c\ub97c": 7, "\ubb34\uc791\uc704\ub85c": [7, 8, 19, 31, 34, 35, 43], "\uc0ac\uc2e4\uc0c1": [7, 28, 58], "\ud559\uc2b5\uc5d0\uc11c": [7, 21], "4\ubc30\uc774\ub2e4": 7, "chameleon": 7, "\ubcc0\ud615\uc2dc\ucf1c": 7, "infil": 7, "\ud45c\ud604\ud55c\ub2e4": [7, 36], "\ucd94\uac00\ub418\uc5c8\uace0": 7, "\ub2e8\uc5b4\uc758": 7, "\uc7ac\ubc30\uce58\uac00": 7, "\uc9c4\ud589\ub410\ub2e4": 7, "\ud559\uc2b5\uc5d0\ub294": 7, "\ub2e4\uc6a9\ub3c4": 7, "\uac00\uc838\uc654\ub2e4": [7, 38, 44], "cm3\uac00": 7, "\ud504\ub86c\ud504\ud2b8\ub85c": [7, 31, 35, 42], "cm3\ub294": 7, "\ud65c\uc6a9\ud55c\ub2e4": [7, 20, 28, 29], "\ub514\ucf54\ub354\ub9cc": 7, "\uc544\ud0a4\ud14d\uccd0\ub97c": [7, 12, 46, 56, 58], "zhang\uc5d0": 7, "bia": [7, 12, 14, 15, 31, 34, 46, 56], "norm\uc758": 7, "\uc81c\uac70\ud588\ub2e4": [7, 35, 38], "length\ub97c": [7, 16, 17], "2048": [7, 44, 58], "4096\uae4c\uc9c0": 7, "\ud655\uc7a5\ud588\ub2e4": [7, 39], "\ud45c\uc900": [7, 21, 34, 35, 47], "\ud3b8\ucc28": 7, "006": 7, "truncat": [7, 22, 58], "3\uc73c\ub85c": [7, 52], "\uc798\ub9b0": [7, 38], "0002\ub85c": 7, "\ucd08\uae30\ud654\ud55c\ub2e4": [7, 21], "metaseq": 7, "\ud559\uc2b5\ub410\ub2e4": 7, "\uc0ac\uc774\uc988": [7, 44], "350m": 7, "760m": 7, "7b": 7, "4t": [7, 44], "trillion": 7, "9t": 7, "\uc8fc\uc694\ud55c": [7, 24, 44], "\ud558\uc774\ud37c": 7, "size\ub85c": 7, "\uba40\ud2f0\ubaa8\ub2ec": [7, 26], "\uc124\uc815\ud588\ub2e4": [7, 36], "perplex": 7, "ppl": [7, 50], "\uc5b8\uc5b4": [7, 18, 45], "\ud558\ub098\uc774\ub2e4": 7, "\ud5f7\uac08\ub9ac\ub294": 7, "\ub0ae\uc744": [7, 10, 14], "\uc218\ub85d": [7, 14, 15, 36], "\uc88b\ub2e4": [7, 12, 30, 36, 49], "\ubaa8\ub378\uc5d0\uc11c": [7, 15, 19, 20, 26, 31, 35, 38, 42, 43, 45, 49, 52, 58, 61], "\uc54c\uace0\ub9ac\uc998\uc5d0": [7, 20], "\uc9c4\ud589\ub418\uc5b4": [7, 33, 46], "\uc654\ub2e4": [7, 21, 25], "e\ub294": [7, 35, 39, 48], "\uc544\uc6c3\ud48b\uc758": 7, "\ud5a5\uc0c1\ub418\ub294": [7, 42], "\uc0d8\ud50c\ub9c1\uacfc": 7, "512\uac1c": [7, 48], "\uc804\ub7b5\uc744": [7, 15, 21, 24, 26, 31, 57], "\ucc44\ud0dd\ud588\ub2e4": [7, 39], "make": [7, 26, 30, 39, 44], "guidance\ub85c": 7, "ranking\uc5d0": 7, "\uc624\uc9c1": [7, 9], "\uc0d8\ud50c\ub9cc": [7, 29], "\ud544\uc694\ud558\uac8c": [7, 57], "\ub428\uc73c\ub85c\uc368": [7, 53, 61], "\ud6c4\ubcf4\uc758": 7, "\uc218\ub97c": [7, 15, 21, 26, 31, 34, 38, 44, 48, 57], "\ud655\ub960\uc801": 7, "\uae30\uc220\ub85c": [7, 43], "\uc0d8\ud50c\ub9c1\uc5d0\uc11c": 7, "softmax\uc758": 7, "temperature\ub97c": 7, "\uc218\uc815\ud574": [7, 12], "\uc608\uce21": [7, 8, 12, 13, 14, 21, 24, 33, 34, 35, 48, 54], "\ubb34\uc791\uc704\uc131\uc744": 7, "\uc81c\uc5b4\ud55c\ub2e4": 7, "nucleu": 7, "\uc0d8\ud50c\ub9c1\uc73c\ub85c\ub3c4": 7, "\ubd88\ub9ac\uace0": 7, "\ubbf8\ub9ac": [7, 20, 21, 43], "\uc815\uc758\ud55c": [7, 25, 46, 55], "\uc784\uacc4\uac12\uc744": [7, 20, 52], "\ucd08\uacfc\ud558\ub294": 7, "\ub204\uc801": 7, "\uc0c1\uc704": [7, 35], "\uc138\ud2b8\uc5d0\uc11c": [7, 31], "\uc0d8\ud50c\ub9c1\uc744": [7, 34, 35, 42, 51], "begin": [7, 14, 21, 36, 57, 59], "operatornam": 7, "logit": [7, 33], "_": [7, 8, 9, 10, 16, 18, 21, 22, 24, 27, 28, 29, 30, 31, 36, 39, 41, 45, 46, 47, 50, 51, 54, 57, 58, 59, 60], "cond": [7, 38], "t_y": 7, "mid": [7, 14, 30, 37], "t_x": 7, "uncond": [7, 35], "mathrm": [7, 14, 16, 18, 21, 34], "cf": [7, 20, 31], "alpha_c": [7, 17], "cfg\ub294": [7, 19, 54], "uncondit": [7, 14, 19, 22, 23, 26, 29, 33, 39, 49, 54, 58], "\uc0d8\ud50c\uc5d0": [7, 31, 35, 38, 42, 46], "text\ub97c": [7, 19, 23, 26, 48], "\ubaa9\ud45c\uc758": 7, "\ub9c8\uc2a4\ud06c": [7, 23, 31, 33], "\ub300\uccb4\ud55c\ub2e4": 7, "\ud559\uc2b5\uc758": [7, 22, 27, 46, 47, 54], "\uc774\uc810": [7, 22], "\ud558\ub098\uc774\uba70": 7, "\uc218\ud589\ud560": [7, 12, 19, 29, 45, 46], "\ucd94\ub860\uc5d0\uc11c\ub294": 7, "stream\uc744": 7, "\ud14d\uc2a4\ud2b8\uc5d0": [7, 12, 21], "\ub2ec\ub77c\uc9c0\ub294": [7, 20, 50], "stream\uacfc": 7, "\ud1a0\ud070\uc5d0": [7, 15, 33], "condition\ub41c": 7, "stream": 7, "cfg\uc5d0\uc11c": 7, "logit\uc758": [7, 22], "\ube84\uc148": 7, "\uc5f0\uc0b0\uc774": [7, 15, 29, 54], "\ud14d\uc2a4\ud2b8\uc5d0\uc11c": [7, 24, 31, 35], "\ubc29\ubc95\uc758": [7, 9, 28, 35, 43], "probability\ub97c": 7, "\ube84\uc148\ud558\ub294": 7, "\uc5f0\uc0b0\uacfc": 7, "\ube44\uc2b7\ud558\ub2e4": [7, 38], "ms": [7, 22, 23, 48, 52], "coco": [7, 19, 23, 33, 39, 48, 52, 59], "30k": 7, "\uce21\uc815\ud588\ub2e4": [7, 45], "\ud6a8\uc728\uc131\uc774": 7, "\ucd94\ub860\uc5d0\uc11c": 7, "1\uac1c": [7, 48], "2\uac1c\ub85c": 7, "\uc608\uc81c\ub85c": 7, "\ub3d9\uc791\ud560": [7, 42], "\uc6b0\uc218\ud55c": [7, 13, 15, 18, 21, 24, 31, 38, 47, 48], "\uae30\ub85d\ud588\ub2e4": [7, 40], "\uace0\ud488\uc9c8": [7, 12, 17, 20, 21, 24, 35, 38], "\uac80\uc0c9\uc758": 7, "\uc911\uc694\uc131\uc744": [7, 15, 24, 47], "figure5": 7, "llm\uc5d0\uc11c": 7, "\ub2e8\uacc4\uc774\ub2e4": [7, 28], "\uba85\ub839\uc5b4": [7, 19], "\uc774\ud574\ud558\ub294": 7, "\ub3c4\uc640\uc8fc\uba70": 7, "task\uc5d0\uc11c\ub3c4": 7, "\ud29c\ub2dd\uc774": 7, "\ub208\uc5d0": [7, 21], "\ub744\uac8c": [7, 21], "\uc99d\ud3ed\uc2dc\ud0a4\ub294": 7, "cm3leon\uc744": 7, "task\ub97c": [7, 20, 30, 37, 43, 48, 54], "\uc11e\uc5b4": 7, "\ubc94\uc704\uc5d0\uc11c": 7, "\uacfc\uc815\uc740": [7, 18, 35, 36, 39, 60], "\ub530\ub974\uba70": 7, "instruction\uacfc": 7, "\ucd9c\ub825\uc744": [7, 21, 34, 35, 39], "figure6": 7, "image\ub97c": [7, 8, 11, 14, 16, 20, 22, 23, 24, 26, 27, 28, 29, 31, 37, 42, 43, 48], "\uc218\uc815\ud558\ub294": [7, 43], "task\uc774\ub2e4": 7, "instructpix2pix": [7, 21, 56], "\ud558\ub298\uc758": 7, "\uc0c9\uc744": [7, 20, 35, 36, 39], "\ud30c\ub780\uc0c9\uc73c\ub85c": 7, "\ubcc0\uacbd\ud574\uc918": 7, "\ud3b8\uc9d1\uc774": 7, "\uc774\uac83\uc740": [7, 12, 24, 42], "cm3leon\uc774": 7, "\uc774\ud574\ud558\uace0": [7, 29], "\uc788\uc5b4\uc11c": [7, 16], "\uc0dd\uc0b0\ud558\ub294": [7, 36], "\uacf5\uac04\uc801": [7, 31, 34], "\uc815\ubcf4": [7, 18, 20, 21, 29, 31, 33, 34, 40, 56], "\ud1b5\ud569\uc2dc\ud0ac": [7, 12], "figure16": 7, "flamingo": 7, "1000\uc5b5": 7, "openflamingo": 7, "400\uc5b5": 7, "30\uc5b5": 7, "\ud1a0\ud070\uc784\uc5d0\ub3c4": 7, "\ubd88\uad6c\ud558\uace0": [7, 12, 18, 21, 23, 28, 42, 44, 47, 58], "\ub3d9\ub4f1\ud55c": 7, "06949": [9, 24], "site": 9, "hyoungseo": [9, 24, 62], "cho": [9, 24, 29, 62], "generation\uc758": [9, 23], "\uc77c\ud658\uc73c\ub85c": 9, "\uc0c8\ub86d\uace0": 9, "\ucc3d\uc758\uc801\uc778": 9, "\uac1c\ub150\uc744": [9, 18, 43, 54], "\ub2e4\ub8f9\ub2c8\ub2e4": 9, "\uae30\uc220\uacfc": [9, 35, 38], "\ubfd0\ub9cc": [9, 20, 26, 38, 44, 47, 56], "\uac1c\uc778\ud654\ub41c": [9, 17, 18, 24, 43], "\uc0dd\uc124\ud560": 9, "\ub418\uc5c8\uc2b5\ub2c8\ub2e4": 9, "\uac15\ub825\ud55c": [9, 23, 26, 27, 29, 32, 35], "\uc124\uba85\ub418\uc9c0": 9, "\ucc3d\uc758\uc801": 9, "\uc788\uc744\uae4c\uc694": 9, "sysnthesi": 9, "\uae30\uc220\uc740": 9, "\uc778\ucf54\ub354\uc5d0\uc11c": [9, 21], "\ucd94\ucd9c\ud55c": [9, 11, 22, 26, 45], "\uc784\ubca0\ub529\uc744": [9, 21, 35, 38], "conditioning\ud569\ub2c8\ub2e4": 9, "\ucc98\ub9ac\ud558\uc5ec": 9, "\ud1b5\ud569\ud558\ub294": [9, 21], "\ubc29\uc2dd\uc785\ub2c8\ub2e4": [9, 46, 58], "\ud65c\uc6a9\ud574\uc11c": [9, 12, 49, 60], "generation\uc5d0\uc11c\uc758": 9, "\uc774\uc810\uc744": [9, 29, 34], "\ubcf4\uc785\ub2c8\ub2e4": [9, 47], "\uc785\ub825\ub41c": [8, 9, 18, 21, 39, 43, 53], "\ud574\ub2f9\ud558\ub294": [9, 10, 21, 23, 26, 33, 34, 36, 53, 55], "\uc784\ubca0\ub529\uc73c\ub85c": [9, 21], "\ub9e4\ud551\ud569\ub2c8\ub2e4": 9, "\ub514\ucf54\ub354\ub294": 9, "\uc784\ubca0\ub529\uc5d0": [9, 21, 35, 38], "condition\uc774": [9, 26], "\ubd80\uc5ec\ub41c": 9, "\uc0dd\uc131\ud558\uae30": [9, 18, 24, 26, 35, 36, 39, 43, 46, 47, 54], "\ud6c8\ub828\ub429\ub2c8\ub2e4": 9, "personalization\uc740": 9, "\ub9e5\ub77d\uc5d0\uc11c": 9, "\uc0ac\uc6a9\uc790\uac00": [9, 18, 26, 27, 37, 38, 43], "\uc8fc\uc81c\ub098": 9, "\uc2a4\ud0c0\uc77c\uc744": [9, 21, 24, 26, 40, 42], "\uc77c\ubc18\uc801\uc73c\ub85c": [9, 18, 19, 20, 21, 26, 34, 35, 39, 42, 43, 46, 54], "\ud559\uc2b5\uc2dc\ud0a4\uae30": [9, 19, 39, 57], "\ucd5c\uc801\ud654\ud558\uac70\ub098": 9, "\ud65c\uc6a9\ud569\ub2c8\ub2e4": [9, 18, 46], "\ucd08\ucca8\uc744": [9, 43], "\ub9de\ucd94\uace0": 9, "\uae30\ubc1c\ud55c": 9, "\uc811\uadfc": [9, 18, 19, 21, 24, 34, 35, 43], "\ubc29\ubc95\uc774": [8, 9, 16, 18, 19, 21, 23, 24, 28, 31, 32, 35, 42, 47, 54, 57], "xu": 9, "set": [9, 14, 17, 18, 19, 22, 30, 45, 47, 50, 52, 56, 58], "evolut": 9, "\ud65c\uc6a9\ud574": [8, 9, 12, 17, 26, 32, 38, 44, 45, 47, 49, 59], "\ud615\ud0dc\uc758": [8, 9, 26, 34, 35, 38, 39, 43, 50, 55, 56], "\uc81c\uc548\ud588\uc2b5\ub2c8\ub2e4": [9, 41], "elgamm": 9, "gan\uc758": [9, 16, 29, 43], "\ud0d0\uad6c\ud558\uba70": 9, "\uc2a4\ud0c0\uc77c\uc5d0\uc11c\uc758": 9, "\ud3b8\ucc28\ub97c": 9, "\uadf9\ub300\ud654\ud558\ub294": 9, "\ud559\uc2b5\ud588\uc2b5\ub2c8\ub2e4": [9, 12], "sbai": 9, "\uc190\uc2e4": [9, 18, 48], "\ub3c4\ub9bd\ud588\uc2b5\ub2c8\ub2e4": 9, "\uce74\ud14c\uace0\ub9ac\uc640": 9, "\uc77c\uce58\ud558\ub3c4\ub85d": [9, 35, 43], "\ucd5c\uc801\ud654\ud558\uba74\uc11c\ub3c4": 9, "\uce74\ud14c\uace0\ub9ac\uc758": 9, "\uac1c\ub150\ub4e4\uacfc": 9, "\ucc3e\ub294": [9, 16, 21, 32, 43], "\uc811\uadfc\ud588\uc2b5\ub2c8\ub2e4": 9, "\uac1c\ub150\ub4e4\uc740": 9, "\ud63c\ud569\ub420": 9, "\uc720\uc5f0\ud55c": 9, "\uac16\uac8c\ub429\ub2c8\ub2e4": [9, 41], "bottom": [9, 20], "model\uc5d0\uc11c\ub294": [9, 26, 43], "\uc624\ud1a0\uc778\ucf54\ub354\uc758": [9, 21], "\ub0b4\uc5d0\uc11c": [9, 24, 34, 38, 43, 44, 53], "\uc9c4\ud589\ub429\ub2c8\ub2e4": [9, 58], "\uba3c\uc800": [9, 10, 18, 20, 24, 28, 31, 34, 35, 36, 41, 42, 45, 48, 51], "\uc778\uace0\ub354": 9, "\ub9e4\ud551\ud558\ub294": [9, 47], "d\ub294": [9, 16], "\uc7ac\uad6c\uc131\ud558\ub3c4\ub85d": 9, "ddpm\uc758": [9, 13, 20, 23, 25, 40, 49], "\uc190\uc2e4\uc744": [9, 18, 21, 24], "\ucd5c\uc18c\ud654\ud558\ub3c4\ub85d": [9, 24, 46], "\ud559\uc2b5\ud569\ub2c8\ub2e4": [9, 47, 51, 55, 58, 59], "\uc7a0\uc7ac": [9, 28, 35, 42], "\ucf54\ub4dc": [9, 31, 41, 45, 56], "zt": 9, "\uc7a1\uc74c": 9, "\uc81c\uac70\ud569\ub2c8\ub2e4": 9, "\ud604\uc7ac": [9, 12, 13, 19, 32, 36, 40, 42], "t\uc640": 9, "\uc870\uac74": [9, 18, 21, 23, 26, 35], "\ubca1\ud130": [9, 15, 21, 29, 34], "c\ub3c4": 9, "\uace0\ub824\ub429\ub2c8\ub2e4": 9, "model\uc740": [9, 14, 16, 22, 23, 26, 28, 29, 30, 33, 39, 43, 54], "\uc778\ucf54\ub529\uc5d0\uc11c": 9, "\ud30c\uc0dd\ub41c": [9, 21, 43], "\ud504\ub86c\ud504\ud2b8": [8, 9, 18, 19, 28, 31], "decompos": [9, 24], "\ud504\ub86c\ud504\ud2b8\ub85c\ubd80\ud130": 9, "\uc608\uce21\ud569\ub2c8\ub2e4": [9, 24, 47, 55], "\ub2e4\uc74c\uc73c\ub85c": [9, 24, 35], "\ubd80\uc5ec\ud558\uc5ec": 9, "decoder\ub85c": 9, "\ubcf4\ub0b4\uc9d1\ub2c8\ub2e4": 9, "\ub3c5\ub9bd\uc801\uc778": [9, 13, 46], "\uc774\ub8e8\uc5b4\uc9d1\ub2c8\ub2e4": 9, "l_": [8, 9, 19, 20, 25, 26, 27, 36, 37, 39, 44, 46, 49, 54, 56, 59], "\ub2e4\uc591\uc131\uc744": [9, 17, 18, 36, 42], "\ud5a5\uc0c1\uc2dc\ud0a4\uba70": 9, "\uc811\uadfc\ud558\uace0": 9, "\uacf5\uac04\uc5d0\uc11c": [9, 15, 18, 34, 43], "conceptlab\uc740": 9, "\ub300\ud45c\ud558\ub294": 9, "v_": [9, 22], "\ucd5c\uc801\ud654\ud569\ub2c8\ub2e4": [9, 55], "\uce74\ud14c\uace0\ub9ac\uc5d0": [9, 31, 35], "\uc720\uc0ac\ud558\uba74\uc11c\ub3c4": [9, 43], "\uba64\ubc84\ub4e4\uacfc": 9, "\uac00\uc9c0\ub3c4\ub85d": [9, 39, 42], "\uc9d1\ud569\uc744": [9, 18, 26, 34, 36], "\uacc4\uc0b0\ud569\ub2c8\ub2e4": [9, 46, 50], "\ud6c8\ub828\ud558\ub294": [9, 21, 28], "\ubc14\ud0d5\uc73c\ub85c": [8, 9, 20, 26, 29, 34, 36, 47, 57, 58], "contraints\ub97c": 9, "\ub354\ud558\uae30": 9, "blip": [9, 12], "vqa": 9, "\uae0d\uc815\uc801": 9, "\uc81c\uc57d": 9, "po": 9, "\uac00\uc9c0\ub97c": 9, "\uc870\uac74\uc740": 9, "\uc815\uc758\ub429\ub2c8\ub2e4": 9, "\ub450\uac00\uc9c0": [9, 39, 49, 57], "\uce21\uc815\ud569\ub2c8\ub2e4": [9, 12, 58], "\uc6b0\uc120": [9, 10, 12, 23, 28, 38, 50, 55, 56, 57, 58], "\ub2e8\uc5b4": [9, 31, 40, 43], "\ubb34\uc791\uc704": [8, 9, 24], "\uc0d8\ud50c\ub9c1\ub41c": [8, 9, 34, 35, 36], "y\uc5d0": 9, "\ud1b5\ud569\ud569\ub2c8\ub2e4": 9, "\ubb38\uc7a5\uc740": 9, "\uc778\ucf54\ub529\ub418\uba70": 9, "\uc774\uac83\uc774": [9, 12, 47, 57], "\uc815\uc758\ud569\ub2c8\ub2e4": [9, 46, 50, 53, 55, 56, 58, 59, 60, 61], "\ud1b5\uacfc\uc2dc\ud0a4\uba74": 9, "\ud504\ub86c\ud504\ud2b8\uc758": [9, 21], "\uc778\uc2a4\ud134\uc2a4\uac00": 9, "\uc870\uac74\uacfc": [9, 32], "\uae0d\uc815": 9, "\ubd80\uc815": 9, "\uad11\ubc94\uc704\ud558\uac8c": 9, "\uc720\uc9c0\ud558\uace0\uc790": [9, 50], "\ud1b5\uacfc\ud558\uc9c0": 9, "\uc54a\uc2b5\ub2c8\ub2e4": [9, 12, 46, 47, 56, 61], "\uc5f0\uad6c\uc5d0\uc11c\uc758": 9, "\ud568\uc218\ub294": [9, 29, 34], "langl": 9, "rangl": 9, "lambda": [9, 14, 18, 26, 28, 36, 46, 47, 50], "v\uc5d0\uc11c": 9, "\uc870\uac74\uc5d0\uc11c": 9, "\uba40\uc5b4\uc9c0\uace0": 9, "\uc81c\uc57d\uc870\uac74\uc5d0": 9, "\uac00\uae4c\uc6cc\uc9c0\ub3c4\ub85d": [9, 16, 36], "\uc815\uaddc\ud654\ub294": 9, "\uc9d1\ud569\uc774": 9, "\ud074": [9, 42, 61], "\uba64\ubc84\ub85c\uc758": 9, "collapsing\uc744": 9, "\ubc29\uc9c0\ud558\ub294": [9, 18, 19, 21, 57], "\uc0ac\uc6a9\ub429\ub2c8\ub2e4": [9, 10, 24], "\uc81c\uc57d\uc5d0": 9, "s_": [9, 19, 21, 22, 40, 46, 57], "max_": 9, "\ubc29\uc2dd\uc740": [9, 11, 24, 26, 27, 30, 34, 35, 36, 39, 43, 54, 56, 58, 59], "\ud568\uc218\uc5d0": 9, "\ud1b5\ud569\ub418\uba70": 9, "\ub0c4\uc73c\ub85c\uc368": 9, "\uc870\uac74\uc5d0": 9, "\ud328\ub110\ud2f0\ub97c": [9, 47], "\ubd80\uc5ec\ud569\ub2c8\ub2e4": 9, "\uacfc\uc815": [9, 11, 14, 20, 25, 31, 33, 34, 35, 39, 43], "\uac1c\ub150\uc5d0": [9, 18, 43], "\ub2e8\uc5b4\ub97c": [9, 43], "\ucd94\ub860\ud558\uace0": [9, 45], "\uac70\uce69\ub2c8\ub2e4": 9, "\uc218\ub3d9\uc73c\ub85c": [9, 32], "\uc801\uc6a9\ud558\ub294": [9, 14, 16, 27, 30, 46, 50, 54, 56, 58, 59], "\ud798\ub4e4\uace0": 9, "\uad11\ubc94\uc704\ud55c": [9, 18], "\uba64\ubc84\ub4e4\uc744": 9, "\ub300\ud45c\ud558\uc9c0": 9, "\ubabb\ud560": [9, 24, 38], "\ud655\uc7a5\ud558\ub294": [9, 31], "scheme\uc744": [8, 9], "\ud6c8\ub828\ub41c": [9, 10, 18, 21, 24, 26, 28, 42, 43], "\uc9c8\uc758\ud558\uc5ec": 9, "\uc774\ubbf8\uc9c0\uc5d0": [9, 11, 13, 14, 18, 21, 22, 25, 26, 31, 33, 34, 35, 37, 40, 42, 43, 45, 47, 48, 55, 56, 57, 61], "\uc874\uc7ac\ud558\ub294": [9, 12, 18, 34, 45, 55, 56, 57], "\uba64\ubc84\uac00": 9, "\ubb34\uc5c7\uc778\uc9c0": [9, 29, 50, 60], "\uc2dd\ubcc4\ud558\ub3c4\ub85d": 9, "\ub098\uc628": [9, 16, 23, 24, 46, 48, 49, 52, 54, 56, 58, 59, 60], "\uc778\uc2a4\ud134\uc2a4\ub97c": 9, "\ud6c8\ub828\uc758": 9, "\ucd94\uac00\ud569\ub2c8\ub2e4": [9, 56], "\ub2e8\uacc4\uc5d0": [9, 46], "\uac78\uccd0": [9, 18, 31, 35, 43], "\ubcf4\uc5ec\uc90d\ub2c8\ub2e4": [9, 10, 47, 50, 59, 60], "\uc870\uac74\uc774": [9, 21, 24], "\uc9c0\uc18d\uc801\uc73c\ub85c": [9, 29, 42], "\uc870\uc815\ub418\uace0": 9, "\ud655\uc7a5\ub418\uc5c8\uc74c\uc744": 9, "\uac1c\ub150": [9, 43, 50, 54], "\uc14b\uc5d0": [9, 21], "\ud63c\ud569": [9, 42], "\uac1c\ub150\uc5d0\uc11c": 9, "im": 9, "\ub9cc\ub4ed\ub2c8\ub2e4": [9, 47, 55], "\ud1b5\uacfc\ud558\uc5ec": [9, 39], "\uc0dd\uc131\ud569\ub2c8\ub2e4": [9, 12, 24, 35, 46, 55, 59, 60], "\uc801\uc6a9\ud569\ub2c8\ub2e4": [9, 12, 47], "\uac1c\ub150\uc774\ub098": 9, "\uc801\uc6a9\ub420": 9, "\uacc4\uce35\u3147\uc801": 9, "\ubc18\ubcf5\uc801\uc73c\ub85c": [8, 9, 21, 26, 34], "\uacb0\uacfc\ubb3c\uc5d0": 9, "\uac1c\ub150\uc758": 9, "\uc81c\uc5b4\ud558\uae30": [9, 17, 18, 24], "\uac00\uc911\uce58": [9, 18, 20, 21, 24, 27, 35, 36, 47, 52, 56, 61], "\ud56d\ubaa9\uc774": [9, 18], "\ucd94\uac00\uc801\uc73c\ub85c": [9, 10, 12, 16, 18, 20, 23, 26, 28, 36, 38, 39, 42, 46, 55, 56, 58, 61], "\uadf8\ub9bc\uc5d0\ub294": 9, "\uac1c\ub150\uc774": 9, "\uc67c\ucabd\uc5d0": [9, 31], "\ud45c\uc2dc\ub418\uc5b4": 9, "\uc0dd\uc131\ud588\ub294\uc9c0\ub97c": 9, "\ud65c\uc6a9\ud588\uc2b5\ub2c8\ub2e4": [9, 24, 58], "conceptlab\uc774": 9, "\uc81c\uc548\ud55c": [9, 16, 21, 29, 41, 42], "\ud504\ub86c\ud504\ud2b8\uc640": [9, 18, 31, 35], "\uc801\uc6a9\ud588\uc2b5\ub2c8\ub2e4": [9, 59], "\uac1c\ub150\ub4e4\uc744": 9, "\ud63c\ud569\ud558\uc5ec": [9, 21, 35, 54], "\ub3c5\ud2b9\ud55c": 9, "\ucc3d\uc870\ubb3c\uc744": 9, "\uc717\uc904\uc5d0\uc11c\ub294": 9, "\uc774\uc5b4\uc9c0\ub294": [9, 43], "\uc904\uc5d0\uc11c\ub294": 9, "\uc5bb\uc5b4\uc9c4": [9, 18], "conceptlab\uc758": 9, "\uc785\uc99d\ud558\uae30": 9, "\uc815\uc131\uc801": [9, 11, 18, 21, 29, 36, 48], "\uc815\ub7c9\uc801": [9, 11, 18, 21, 24, 33, 48], "\ud3c9\uac00\ub97c": [9, 19, 21, 24, 28, 31, 35, 38, 47, 48, 51, 52, 54, 56], "\uadf8\ub9bc\ub4e4\uc5d0\uc11c": 9, "\uc788\ub4ef\uc774": [9, 16, 42], "negative\ub97c": 9, "\uc801\uc6a9\ud558\uc600\uace0": 9, "\uc2dc\ub4dc\ub97c": 9, "\ub2ec\ub9ac\ud558\uba70": 9, "\ub2a5\ub825\uc774": [9, 12, 18, 28, 45], "\uc7a5\uba74\uc5d0": [9, 34, 43], "\ubc30\uce58\ud560": 9, "\uc0dd\uc131\ubb3c\ub4e4\uc740": 9, "\ubc30\uacbd": [9, 18, 19, 21, 22, 32, 34, 35, 38, 43], "\ucc3d\uc870\ub4f1": 9, "\ub2e4\uc591\ud558\uac8c": [9, 27, 29, 53, 58], "\uac00\ub2a5\ud569\ub2c8\ub2e4": [9, 12, 24, 46, 50, 55, 58, 59], "conceptlab\uc744": 9, "mixing\uc758": 9, "mixing\uc740": 9, "\uac1c\ub150\ub4e4\uc758": 9, "\ud569\uccd0": 9, "\ud558\uc774\ube0c\ub9ac\ub4dc": 9, "\ud615\uc131\ud558\ub294": 9, "\uc870\uac74\ub9cc\uc744": 9, "\uc904\uc5d0\ub294": 9, "\ub78d\uc2a4\ud130\uc758": 9, "\uc0dd\uc0c1\uacfc": 9, "\uc9d1\uac8c\ubc1c": 9, "\uac70\ubd81\uc774\uc758": 9, "\ub4f1\uaecd\uc9c8": 9, "\uc735\ud569\ud558\ub294": 9, "conceptlab\uc5d0": 9, "\uac1c\ub150\ub4e4\uc774": 9, "\uc138\ub300": 9, "\uc5b4\ub5bb\uac8c": [9, 10, 12, 14, 20, 29, 31, 32, 33, 42, 43, 57], "\ubc1c\uc804\ud558\ub294\uc9c0": 9, "diffusion2\uc640": 9, "kandinski": 9, "\ud3c9\uac00\ud588\uc2b5\ub2c8\ub2e4": [9, 51], "kandinsky\uc758": 9, "\uc720\ub9ac\ud55c": 9, "\ud504\ub86c\ud504\ud2b8\ub294": [9, 24, 31], "model\uc774": [9, 16, 19, 23, 25, 26, 28, 29, 36], "\ud1a0\uadfc\uacfc": 9, "\ubaa8\ub450\uc5d0": [9, 36], "\ub9de\ucdb0\uc9c8": 9, "\ub2e4\uc911": [8, 9, 18, 20, 36, 38], "\ucc98\ub9ac\ud558\uace0": 9, "\ud45c\ud604\uc744": [9, 17, 18, 21, 34, 35], "\uac16\ucd94\uace0": [9, 12, 16, 35], "\ud3ec\ud568\ud558\uba70": 9, "\uac1c\ub150\uacfc": 9, "\ub2ee\uc9c0": 9, "\uce21\uc815\ud588\uc2b5\ub2c8\ub2e4": 9, "\ud3c9\uac00\uc5d0\ub294": 9, "\uc560\uc644\ub3d9\ubb3c": 9, "\uc2dd\ubb3c": 9, "\uacfc\uc77c": 9, "\uac00\uad6c": 9, "\uc545\uae30\uc758": 9, "5\uac00\uc9c0": [9, 31, 59], "\ub3c4\uba54\uc778\uc5d0": 9, "\uace0\ub824\ud558\uace0": [9, 21], "\uc870\ud569\uc5d0": 9, "\ub79c\ub364": [9, 14, 18, 19, 20, 21, 26, 31, 39], "\uc2dc\ub4dc\ub85c": 9, "\ud6c8\ub828\ud558\uc5ec": [9, 36], "\ucd1d": [9, 12, 15, 17, 20, 22, 29, 34, 40, 41, 42, 43, 47, 57, 58], "75\uac1c\uc758": 9, "\uc5bb\uc5c8\uc2b5\ub2c8\ub2e4": 9, "32\uac1c\uc758": 9, "\uc0dd\uc131\ud588\uc2b5\ub2c8\ub2e4": [9, 53], "diffusionr\uacfc": 9, "\ubaa8\ub378\uc5d0\uc11c\ub294": [9, 24, 33, 35], "160\uac1c\uc758": 9, "\uae30\uc900\uc73c\ub85c\ub294": [9, 42], "\ud0c0\uac9f": [9, 21, 38], "\uce74\ud14c\uace0\ub9ac\uc640\uc758": 9, "\uacf5\uac04": [9, 15, 24, 31, 34, 39, 43], "\uc720\uc0ac\uc131": [9, 18], "\uacc4\uc0b0\uc744": [9, 19, 22], "\ud2b9\uc815\ub429\ub2c8\ub2e4": 9, "\ub2e4\uc74c\uc73c\ub85c\ub294": [9, 42], "\uc81c\uc57d\uacfc": [9, 31], "\uac70\ub9ac\ub97c": [9, 14, 22, 27, 29, 36, 39, 43, 54, 57], "\uc720\uc0ac\uc131\uc5d0\uc11c": 9, "\uc6b0\uc6d4\ud55c": [9, 46], "\ubcf4\uc600\uace0": [9, 25, 28], "\uc2e0\ub8b0\uc131": [9, 24], "\uac70\ub9ac": 9, "\uce21\uc815\uc5d0\uc11c": 9, "\uce74\ud14c\uace0\ub9ac\uc5d0\uc11c": [9, 32], "4\uac00\uc9c0": [9, 28, 54], "kandinsky\ub97c": 9, "\ub2a5\uac00\ud588\uc2b5\ub2c8\ub2e4": 9, "personalization\uacfc": [9, 17], "\ud3ec\ud568\ud558\ub294": [9, 11, 21, 28, 31, 35, 43, 47, 53], "\ud56d\uc0c1": [9, 35], "\uc720\uc9c0\ud558\uc9c0\ub294": 9, "\ubabb\ud569\ub2c8\ub2e4": [9, 24], "\uac00\uc838\uc624\uc9c0\ub294": 9, "\ube44\ud589\uae30": 9, "\ubb3c\uace0\uae30": 9, "2\uc5d0": [9, 28, 38], "\uad00\ub828\uc774": 9, "\uc18c\uac1c\ud588\uc2b5\ub2c8\ub2e4": [9, 24], "\uc0ac\uc6a9\uc744": [9, 28], "constraints\ub77c\ub294": 9, "\uc870\uac74\ub4e4\uc744": 9, "\ucd9c\ub825\uc5d0": [9, 21], "\uacfc\uc815\uc5d0\uc11c\ub294": [9, 33], "\ub3c5\ud2b9\ud558\uba74\uc11c\ub3c4": 9, "\uba64\ubc84\ub4e4\uacfc\uc758": 9, "\uba85\ud655\ud55c": [9, 23, 31], "\uad6c\ubcc4\uc744": 9, "\ubcf4\uc7a5\ud588\uc2b5\ub2c8\ub2e4": 9, "\ud6a8\uacfc\uc131\uc744": 9, "\uc785\uc99d\ud588\uc73c\uba70": 9, "\ub2e4\uc591\ud558\uace0": [9, 35], "\ub9e4\ub825\uc801\uc778": 9, "2302": [10, 59], "05543": 10, "lllyasviel": 10, "mai": [10, 21, 28, 29, 34, 38, 43, 47, 49, 50, 54], "28": [10, 60], "\ubaa8\ub378\ub4e4\uc740": [10, 11, 12, 28, 35, 49, 56], "prompt\ub85c": [10, 26, 28, 40, 43], "\uc774\ub7f0": [8, 10, 12, 18, 19, 26, 32, 41, 42], "control\ub9cc\uc73c\ub85c": 10, "\uc870\uc808\ud558\ub294\ub370": 10, "\uc918\uc11c": 10, "controlnet\uc774\ub77c\ub294": 10, "profession": 10, "prompt\uc640": [10, 11, 23, 26, 36], "canni": [10, 26], "edge\ub97c": 10, "\ubc1b\uc544\uc11c": [10, 12, 24, 41, 60], "\uc624\ub978\ucabd\uc758": 10, "\uc2dd\uc73c\ub85c": [10, 16, 39], "\uadf8\ub9bc\uc5d0\uc11c\ub294": [10, 33], "controlnet\uc774": 10, "\uc5ed\ud560\uc785\ub2c8\ub2e4": 10, "conrolnet": 10, "\uadf8\ub7ec\uba74": [10, 26, 28, 41, 57], "\ud588\uc744\uae4c\uc694": [10, 12], "\uc774\uc81c\ubd80\ud130": 10, "\uc54c\uc544\ubcf4\ub3c4\ub85d": [10, 41], "\ud558\uaca0\uc2b5\ub2c8\ub2e4": [10, 24, 41, 60], "controlnet\uc758": [10, 17, 26], "\uad6c\uc870\ub294": [10, 41, 42, 59], "\uac00\uc9d1\ub2c8\ub2e4": [10, 55], "lock": 10, "copy\uc640": 10, "trainabl": [10, 13, 24, 25, 26, 27, 30, 32, 56], "copy\ub97c": 10, "\uc124\uacc4\ud588\ub294\uc9c0": 10, "\uc54c\uc544\ubd05\uc2dc\ub2e4": 10, "\uae30\uc874\uc5d0": [10, 11, 21, 27, 29, 30, 37, 41, 42, 55, 56, 61], "\ubc29\ub300\ud55c": 10, "\uc704\ud574\uc11c\uc785\ub2c8\ub2e4": 10, "\uc591\uc774": [10, 42], "\uacbd\uc6b0\uc5d0": [10, 21, 42, 47, 59], "\uc624\ubc84\ud53c\ud305\uc744": [10, 21], "\ud53c\ud560": [10, 35], "\ud6a8\uacfc\ub3c4": 10, "convolution\uc774\ub780": 10, "weight\ub791": 10, "\ucd08\uae30\ud654\ud55c": 10, "1x1": 10, "convolution\uc744": [10, 31], "\ub9d0\ud569\ub2c8\ub2e4": [10, 47, 55], "\ud6c8\ub828\uc774": [10, 21, 31, 36], "\uc2dc\uc791\ub418\uae30": 10, "\uc804\uc5d0\ub294": 10, "input\uc5d0": [10, 20, 29, 48], "output\uc774": [10, 47], "\ub611\uac19\uc544\uc9d1\ub2c8\ub2e4": 10, "\ubaa8\ub378\uc774\ub791": 10, "\ub611\uac19\uc740": [10, 28], "\uac00\uc9c0\uac8c\ub418\ubbc0\ub85c": 10, "\ube44\uc2b7\ud558\ubbc0\ub85c": 10, "scratch\ubd80\ud130": [10, 27, 54], "\ud6c8\ub828\uc2dc\ud0ac": 10, "\uc788\uac8c\ub429\ub2c8\ub2e4": 10, "convolution\uc740": 10, "\ud558\ub294\uc9c0": 10, "\uc790\uc138\ud788": [10, 12, 24, 41, 50, 58], "\uadf8\ub9bc\uc5d0\uc11c": [10, 12, 16, 28, 41, 42, 47, 50], "\uc218\uc2dd\uc73c\ub85c": [10, 27, 46, 54], "\ud45c\ud604\ud558\uaca0\uc2b5\ub2c8\ub2e4": 10, "mathbf": [8, 10, 18, 19, 20, 26, 34, 39, 41, 57], "neural": [10, 13, 18, 20, 25, 27, 36, 44, 46, 47, 55], "\uc758\ubbf8\ud569\ub2c8\ub2e4": [10, 24, 42, 51], "\ud45c\ud604\ud558\uae30\uc704\ud574": 10, "\ub9cc\ub4e4\uc5b4\uc11c": [10, 12, 43], "parameter\ub97c": [10, 11, 14, 19, 22, 27, 36, 39, 42, 48, 49], "theta_": [10, 19, 27, 36], "\ub77c\uace0\ud558\uace0": 10, "\uace0\uc815\uc2dc\ucf1c\ub450\uaca0\uc2b5\ub2c8\ub2e4": 10, "\ud45c\ud604\ud558\uace0": [10, 28], "convolution\uc758": 10, "z1": 10, "z2": 10, "\ub450\uaca0\uc2b5\ub2c8\ub2e4": 10, "\ud45c\ud604\ud560": [10, 20, 24, 27, 29, 36, 39, 46, 54, 58], "weight\uc640": [10, 42], "bias\uc758": 10, "\ucd08\uae43\uac12\uc774": 10, "0\uc774\ubbc0\ub85c": 10, "\uc9c4\ud589\ub418\uc9c0": 10, "\uc54a\uc558\uc744": [10, 42], "\uc785\ub2c8\ub2e4": [10, 12, 24, 42, 46, 47, 51, 58, 60], "\ub0b4\ubbc0\ub85c": 10, "\ubcf4\uc874\ud560": [10, 21], "\uc804\ubd80": 10, "\ucd08\uae30\ud654\ub418\uc5b4\uc788\uc73c\uba74": 10, "gradient\uac00": [10, 20], "0\uc774\ub77c\uc11c": 10, "\uc548": [10, 12, 17, 19, 32, 47], "\ub418\ub294\uac70": 10, "\uc544\ub2d0\uae4c\uc694": 10, "\uacbd\uc6b0\ub97c": [10, 24, 28], "\uc0dd\uac01\ud574\ubcf4\uc8e0": 10, "wx": [10, 27], "gradient\ub294": 10, "partial": [8, 10, 11, 14, 20, 34, 36, 55], "0\uc774\uace0": [10, 42], "neq0": 10, "gradient": [10, 13, 14, 19, 21, 22, 25, 28, 30, 34, 36, 46, 47, 49, 51, 55, 57], "step\uc5d0\uc11c": [10, 14, 16, 25], "0\uc774": [10, 13, 14, 26], "\uac12\uc73c\ub85c": [10, 16, 23, 25, 27, 29, 30, 33, 54, 60], "\uac00\uac8c\ub418\uace0": 10, "\ud575\uc2ec\uc801\uc778": [10, 23, 34], "\uac00\uc815\uc774": 10, "\uc704\ubc30\ub420": 10, "\uac00\ub2a5\uc131\uc774": [10, 21, 25], "\uc9c0\uae08\uae4c\uc9c0": [10, 14, 34], "\uc598\uae30\ud55c": 10, "diffusion\uc5d0": [10, 27, 29, 54], "\uc801\uc6a9\ud55c": [10, 16, 23, 25, 28, 29, 31, 37, 38, 44, 46, 54, 58, 59, 61], "diffusion\uc5d0\uc11c": 10, "\ud615\ud0dc\uc785\ub2c8\ub2e4": [10, 41], "training\uc744": [10, 16, 23, 28], "50": [10, 14, 20, 29, 32, 36, 42, 43, 45, 57], "\ud655\ub960\ub85c": [10, 21, 32], "empti": [10, 23, 55, 61], "string\uc73c\ub85c": 10, "\ubc14\uafd4\uc8fc\uc5c8\ub2e4\uace0": 10, "prompt\uac00": [10, 11, 19], "\uc8fc\uc5b4\uc9c0\uc9c0\uc54a\uc744": 10, "semantics\ub97c": 10, "\ubc30\uc6b0\ub294": [10, 31], "\uacbd\ud5a5\uc774": [8, 10, 11, 12, 22, 40], "\ud5a5\uc0c1\uc2dc\ucf1c\uc904": 10, "\uc788\ub2e4\uace0": [10, 12, 14, 16, 20, 23, 27, 38, 39, 42, 43, 46, 48, 55, 56, 58, 59, 60, 61], "training\uc774": 10, "\ubc29\ubc95\ubcf4\ub2e4": [10, 21, 31], "\ud6a8\uc728\uc801\uc774\ub77c\ub294": [10, 32], "\uacb0\uacfc\ub4e4\uc740": 10, "\uacb0\uacfc\ub4e4\uc785\ub2c8\ub2e4": 10, "\ub17c\ubb38\uc5d0": [10, 14, 31, 42, 60], "\uc788\uc73c\ub2c8": [8, 10], "\ucc38\uace0\ud558\uc2dc\uae30": 10, "\ubc14\ub78d\ub2c8\ub2e4": 10, "limitation\uc774\ub77c\uace0": 10, "\uc774\ubbf8\uc9c0\uc785\ub2c8\ub2e4": [10, 12], "\uc8fc\uc5c8\uc74c\uc5d0\ub3c4": 10, "\uc0dd\uc131\ub418\uc9c0": 10, "\uc54a\ub294": [10, 12, 20, 22, 24, 31, 34, 35, 42, 46, 47, 50, 58, 60], "\ubc1c\uc0dd\ud588\uc2b5\ub2c8\ub2e4": 10, "limit": [10, 22, 28], "\ucf54\ub4dc\ub294": [10, 39], "\uacf5\uc2dd": 10, "\uad6c\ud604": [10, 45, 51, 60], "\uac00\uc838\uc654\uc2b5\ub2c8\ub2e4": 10, "\ucd08\uae30\ud654\ud558\ub294": [10, 56], "\ucf54\ub4dc\ub85c": [10, 38], "\ub9cc\ub4e4": [8, 10, 12, 20, 26, 43, 48], "def": [10, 13, 14, 30, 31, 41, 45, 51, 56, 59, 60], "zero_modul": 10, "modul": [10, 14, 17, 22, 30, 31, 41, 51, 56, 59, 60], "detach": [10, 45, 51], "zero_": 10, "\uae30\ubcf8\uc801\uc73c\ub85c": [10, 16, 21, 22, 30, 40, 52, 59, 61], "nn": [10, 14, 30, 31, 39, 41, 51, 56, 59, 60], "sequential\uacfc": 10, "\uac19\uc740\ub370": 10, "step\uac19\uc740": 10, "input\uc744": 10, "\ubc1b\uc544\uc904": 10, "\ub9cc\ub4e0": [10, 39, 41, 42, 47], "timestepembedsequenti": 10, "sequenti": [10, 14, 29, 41, 51, 59], "timestepblock": 10, "pass": [10, 12], "children": 10, "support": [10, 28], "extra": [10, 12, 22], "emb": [10, 14], "context": [8, 10, 12, 14, 15, 18, 26, 28, 30, 38, 39, 40, 44, 48, 50], "isinst": 10, "elif": [10, 14, 45, 50, 56], "spatialtransform": 10, "els": [10, 13, 14, 30, 31, 41, 45, 50, 56, 59], "github\uc758": 10, "cldm": 10, "py\uc5d0": 10, "class\uc785\ub2c8\ub2e4": 10, "init": [10, 30, 36, 56], "\uae38\uc5b4\uc11c": 10, "\uc0dd\ub7b5\ud588\uc2b5\ub2c8\ub2e4": 10, "__init__": [10, 13, 14, 31, 41, 51, 56, 59, 60], "make_zero_conv": 10, "conv_nd": 10, "dim": [10, 14, 31, 41, 45, 50, 59], "hint": [10, 11], "kwarg": 10, "t_emb": 10, "timestep_embed": 10, "model_channel": 10, "repeat_onli": 10, "fals": [10, 13, 14, 30, 31, 45, 51, 56, 59], "time_emb": 10, "guided_hint": 10, "input_hint_block": 10, "type": [10, 50, 51], "dtype": [10, 14, 45, 50], "zero_conv": 10, "zip": [10, 13, 14], "input_block": 10, "append": [10, 14, 41, 51, 56, 59], "middle_block": 10, "middle_block_out": 10, "customizi": 11, "To": [11, 12, 13, 33], "2212": [11, 15, 35, 53], "04488": 11, "seunghwan": [11, 13, 22, 25, 37, 40, 62], "ji": [11, 13, 22, 25, 37, 40, 62], "aug": [11, 18, 25, 40], "user\uc758": 11, "private\ud55c": 11, "concept\uc744": [11, 26, 43], "\uc0dd\uc131\ud558\uace0\uc790\ud558\ub294": 11, "\uc695\uad6c\ub294": 11, "\ud480\uc9c0": 11, "\ubabb\ud568": 11, "diffusion\uc740": [8, 11, 22, 26], "partial\ud55c": 11, "\ubd80\ubd84\ub9cc\uc744": 11, "\ud559\uc2b5\uc2dc\ud0b4\uc73c\ub85c\uc368": 11, "\uae30\uc874\ubcf4\ub2e4": 11, "\ubfd0": [11, 36], "concept\uc5d0": [11, 43], "compress\ud558\ub294": 11, "\ud65c\ubc1c\ud558\uac8c": 11, "\ub418\uc5b4\uc9d0": 11, "\uc785\ub825\ub9cc\uc73c\ub85c": 11, "\uc218\uc900\uae4c\uc9c0": [11, 25], "\uc774\ub984": [11, 16], "general\ud55c": [11, 16], "\uc0dd\uc131\ud558\uc9c0\ub9cc": [11, 21, 24, 28, 35, 51, 56], "user\uac00": 11, "concept\uc758": [11, 43], "\ud589\ubcf5\ud55c": 11, "\uc6b0\ub9ac": [11, 43], "\uac00\uc871": 11, "\uc6b0\ub9ac\uc9d1": 11, "\uac15\uc544\uc9c0": 11, "\ubf40\uc090\uac00": 11, "\ud30c\ub9ac\ub85c": 11, "\uc5ec\ud589\uc744": 11, "\ub5a0\ub098\ub294": 11, "\uacfc\uc815\uc911\uc5d0": 11, "\ubcf4\uc9c0": [11, 50], "\ubabb\ud588\uae30\ub54c\ubb38\uc5d0": 11, "model\uc5d0\uac8c\ub294": 11, "\ub2f9\uc5f0\ud55c": 11, "\uba87\uc7a5\uc758": 11, "\uc774\ubbf8\uc9c0\ub9cc\uc73c\ub85c": [11, 40], "finetuning\ud558\ub294": [11, 24], "\ud559\uc2b5\ud558\uace0\uc790\ud558\ub294": 11, "\uc0dd\uc131\ud574\ub0b4\uc57c\ud568": 11, "\ud559\uc2b5\ub418\uc5c8\ub358": 11, "finetuning\ud55c": 11, "\ud6c4\uc5d0\ub3c4": [11, 24], "customization\uc774": 11, "\uc9c4\ud589\ud558\ub2e4\ubcf4\uba74": 11, "\ud559\uc2b5\ud588\ub358": 11, "\uc78a\uc5b4\ubc84\ub9ac\uac70\ub098": 11, "\uc65c\uace1\ud574\ubc84\ub9bc": 11, "draft": 11, "overfit": [11, 50], "\ub418\uc5b4\uc11c": 11, "\uacb0\uacfc\ubb3c\uc758": [11, 38], "variation\uc774": [11, 41], "\ub0ae\uc544\uc9d0": 11, "\uc880\ub354": [11, 13, 22, 25, 40, 54], "\ub098\uc544\uac00": 11, "text\ub85c": [11, 20], "\uc720\uc9c0\ub97c": 11, "caption\uc744": [11, 39], "regular": [11, 14, 19, 29, 34, 39, 45, 56, 57, 60], "tuning\ub3d9\uc548": 11, "augment": [11, 15, 27, 32, 40, 42, 44, 45], "\ubc29\uc2dd\uc758": [11, 13, 19, 37, 38, 49], "\ubcf4\uc5ec\uc8fc\uace0\uc788\uc74c": 11, "\uac8c\ub2e4\uac00": [11, 12, 23, 36, 38], "control\ub3c4": 11, "\uac00\ub2a5\ud568": [11, 15, 30, 33, 43, 48, 52, 57], "general\ud558\uc9c0": 11, "\uc0dd\uc131\uc740": [11, 33], "\ubd88\uac00\ub2a5\ud568": 11, "global\ud55c": 11, "\ud3ec\ud568\ud55c": [11, 17, 18, 27, 31, 33, 34, 37], "\uc18c\ub7c9\uc758": [11, 14], "learning\uc740": 11, "\uc0dd\uac01\ubcf4\ub2e4": 11, "\ud6a8\uacfc\uc801\uc774\uace0": 11, "\uc720\uc6a9\ud568": 11, "\uc804\uccb4\ub97c": [11, 16, 26, 32, 39, 53, 61], "\ud559\uc2b5\ud558\uac70\ub098": [11, 35], "\ucd94\uac00\ud574": [11, 14, 16, 20, 22, 37, 38, 44, 54], "\uc7ac\ud559\uc2b5": [11, 13, 49], "\uc704\uc5d0\uc11c": [11, 23, 24, 31, 37, 38], "customization\uc758": 11, "\uc77c\uc73c\ud0a4\uae30": 11, "\uc26c\uc6c0": 11, "etc": [8, 11, 18, 22, 37], "\uc544\uc8fc": [11, 16, 26], "\uc77c\ubd80\ub9cc\uc744": 11, "\ub300\uc0c1\uc73c\ub85c": [11, 47], "\ucee8\uc149\uc73c\ub85c": 11, "vs": [11, 13, 16, 18, 20, 23, 34, 36, 38, 47, 48, 49, 52, 53, 55, 58, 59, 61], "compress\ud560": 11, "finetuning\ud568\uc73c\ub85c\uc368": 11, "resourse\ub97c": 11, "backbone\uc73c\ub85c": 11, "dm\uc758": [11, 36], "\uc2dc\uc810\uc5d0": [11, 18, 36], "noise\uac00": [11, 14, 16, 23, 25, 41], "\uc11e\uc778": 11, "text\ub098": 11, "\uc0ac\uc6a9\ud558\uc9c0\uc54a\uace0": 11, "space\ub85c": [11, 16, 22, 28, 37, 43], "embedding\ub41c": 11, "\u03b5": [11, 13], "\u03b5_": 11, "\u03b8": [11, 18], "\ub080": 11, "\u03b5\ub97c": 11, "\uc608\uce21\ud574\ub0b4\ub294": [11, 14], "tuning\ud560\ub54c\ub294": 11, "layer\uc5d0\ub300\ud574": 11, "update\ud558\ub294\uac8c": 11, "\uae30\ubcf8": [11, 21, 29, 34, 35, 38, 42], "resource\uac00": [11, 22, 36], "\ube44\ud6a8\uc728\uc801\uc73c\ub85c": 11, "\ub9ce\uc774\ub4e4\uace0": 11, "overfitting\ub418\uae30": 11, "\ubcc0\ud654\ub7c9\uc744": [11, 27, 54], "\uccb4\ud06c": 11, "while": 11, "\ubd80\ubd84\uc5d0\ube44\ud574": 11, "\uc5f0\uc0b0\uc758": [11, 15, 26, 29], "wegith": 11, "latent\uc5d0": [11, 21], "\uc8fc\uc785\ud558\ub294": [11, 16], "kei": [11, 18, 26, 27, 29, 30, 33, 37, 40, 44, 61], "parameter\uc5d0": 11, "\ub2e8": [11, 24, 35, 37, 40, 46, 50, 57], "\ucc28\uc9c0": 11, "\ud3ec\ud568\ub418\ub294": 11, "\ub098\uba38\uc9c0\ub294": [11, 15, 17, 32, 40], "\uc4f0\uc9c0\uc54a\ub294": 11, "\ub2e8\uc5b4\ub85c": 11, "\ud615\uc2dd\uc73c\ub85c": 11, "captioning\ud55c": 11, "finetuning\uc911\uc5d0": 11, "\uc78a\uc5b4\ubc84\ub9ac\ub294": 11, "\ud604\uc0c1\uc774": [8, 11, 21, 26, 41, 47, 53], "\uc788\uc744\uc218\uc788\uc74c": 11, "moon": 11, "\uc0dd\uc131\ud558\uba74": [11, 47], "finetuning\ud588\ub358": 11, "moongat": 11, "\uc0dd\uc131\ud574\ubc84\ub9bc": 11, "\ubc29\uc9c0\ud558\uae30\uc704\ud574": 11, "world\uc758": 11, "image\uc5d0\uc11c": [11, 16, 28, 48], "200\uc7a5\uc758": [11, 40], "regul": 11, "\uc720\uc0ac\ud558\ub2e4": [11, 36], "clip\uc5d0\uc11c": [11, 42], "space\uc0c1\uc758": 11, "vector\uac00": [11, 19], "similar\ud558\ub2e4": 11, "joint": [11, 19, 23, 26, 39, 48, 49, 61], "trane": 11, "rare\ud55c": 11, "\ubd80\uc5ec\ud574": [11, 59], "constrain": 11, "optim": [11, 13, 15, 17, 19, 20, 24, 26, 27, 32, 33, 35, 36, 40, 43, 48, 50, 60, 61], "merg": [11, 30], "concept\uc73c\ub85c": 11, "weight\ub97c": [11, 22, 27, 30, 39, 42], "appendix": [11, 19, 58], "a\uc5d0\ub294": 11, "\ub098\uc640\uc788\ub294\ub370": 11, "\uc624\ud0c8\uc790\uc77c": 11, "\uac00\ub2a5\uc131": 11, "reg": [11, 29, 57], "caption\uc758": 11, "\ubf51\uc544": [11, 49], "\uacf1\ud55c": [11, 61], "\uac12\uacfc\uc758": 11, "norm\uc744": 11, "\uacc4\uc0b0\ud588\uc744\ub54c": 11, "n\uac1c\uc758": [11, 18, 20, 22, 36, 48], "attention\uc774": [11, 28], "\ucc3e\uc544": [11, 20, 32, 43], "\ud558\ub098\ub9cc": 11, "\uc0ac\uc6a9\ud558\uc790": 11, "250": [11, 45], "two": [8, 11, 12, 21, 28, 32, 36, 39, 41, 43, 48, 50, 54, 55, 58, 59, 61], "veri": 11, "small": [11, 18, 43, 53, 59], "far": [11, 34], "awai": 11, "techniqu": [11, 14, 22, 35, 37, 52], "quant": [11, 43], "\ub300\uc751\ub418\ub294": [11, 56], "\uc0dd\uc131\ud574\ub0c8\ub294\uac00": 11, "\ud45c\ud604\ud574\ub0c8\ub294\uac00": 11, "baselin": [11, 18, 28, 32, 48, 56, 58], "customdiffus": [11, 24], "all": [11, 12, 30, 36], "\uc120\ud638": 11, "inversion\uc740": [11, 43], "alignment\ub294": 11, "\uc120\ud638\ub3c4\uc640": 11, "\ube44\uc2b7\ud558\uc9c0\ub9cc": [11, 42], "alignment\uc218\uce58\ub97c": 11, "diffusion\uc774": 11, "overfitting\ub41c": [11, 40], "\u314cgen": 11, "generate\ub41c": 11, "\uc218\uce58\ub294": [11, 25, 49], "regulat": 11, "customizing\uc774": 11, "resourse\uac00": 11, "Of": 11, "category\uc758": 11, "object\uc5d0": [11, 28, 31], "\ub3d9\uc791\ud558\uc9c0": [11, 21, 25], "hierarch": 12, "2204": 12, "06125v1": 12, "sep": [12, 18, 26, 35, 52, 53, 55, 56], "18": [12, 16, 39, 46, 50], "dalle2": [12, 22], "2022\ub144\uc5d0": 12, "\uacf5\uac1c\ub418\uc5b4": 12, "\uc138\uc0c1\uc744": 12, "\ub180\ub77c\uac8c": 12, "\ub2a5\ub825\ub3c4": 12, "\ub6f0\uc5b4\ub0ac\uace0": 12, "\uc0ac\uc6a9\uc790": [8, 12], "\uc785\ub9db\uc5d0": 12, "\uc870\uc791\ud560": 12, "\ub418\uc5c8\uc8e0": 12, "\uc774\ub984\uc740": 12, "\uc77c\uae4c\uc694": 12, "\ucd08\ud604\uc2e4\uc8fc\uc758": 12, "\ud654\uac00": 12, "salvador": 12, "dali": 12, "wall": 12, "\ud569\uc131\uc5b4\uc785\ub2c8\ub2e4": 12, "\uc0dd\uc131\ud574\ub0b8": 12, "\uacb0\uacfc\ubb3c\uc774": [8, 12, 38, 48], "\uc5b4\ub5bb\uae38\ub798": 12, "\uacb0\uacfc\ubb3c": [8, 12, 18, 19, 20, 48], "\uc0dd\uc804": 12, "\ubaa8\uc2b5": [12, 18, 33], "vibrant": 12, "robot": 12, "half": [12, 45], "\ubaa8\uc2b5\uc774": [12, 47], "\ubcf4\uc774\ub124\uc694": 12, "\ucd08\ud604\uc2e4\uc8fc\uc758\uc801": 12, "\uac19\uae30\ub3c4": 12, "corgi": 12, "\uc5b4\ub5a4\uac00\uc694": 12, "depict": 12, "explos": 12, "nebula": 12, "\ubaa8\uc2b5\uc744": [8, 12, 27, 28, 34, 39, 43, 54, 61], "\uc131\uc6b4\uc758": 12, "\ud3ed\ubc1c\ub85c": 12, "\ubb18\uc0ac\ud574\ub2ec\ub77c\uace0": 12, "\ud588\uc744": [12, 16, 45, 50, 54], "\uadf8\ub9bc\uc785\ub2c8\ub2e4": [12, 60], "nasa": 12, "\ucd08\uc2e0\uc131": 12, "\ud3ed\ubc1c\uc758": 12, "\uc794\ud574\uc785\ub2c8\ub2e4": 12, "\uadf8\ub7f4\ub4ef\ud558\uc9c0": 12, "\uc54a\ub098\uc694": 12, "mosaic": 12, "largest": 12, "ever": 12, "taken": 12, "hubbl": 12, "telescop": 12, "crab": 12, "six": 12, "light": [12, 18, 19, 20, 34, 39, 61], "wide": 12, "expand": [12, 49, 59], "remnant": 12, "star": 12, "supernova": 12, "\uc8fc\uc758\uc0ac\ud56d": 12, "\ube44\uc120\ud615\uc801\uc73c\ub85c": 12, "\uc0b4\ud3b4\ubd05\ub2c8\ub2e4": 12, "\uc624\ud508\uc6d4\ub4dc": 12, "\uac8c\uc784\ucc98\ub7fc": 12, "\ub9d0\uc774\uc8e0": 12, "\ud575\uc2ec\uc774": 12, "\ub418\ub294": [12, 16, 19, 21, 27, 28, 32, 39, 48, 49, 50, 51, 53, 59], "\uc9c8\ubb38\ub4e4\uc744": 12, "\ub358\uc9c0\uba70": 12, "\ud30c\ud5e4\uccd0": 12, "\uac81\ub2c8\ub2e4": 12, "\ud3ec\uc2a4\ud305\uc740": 12, "openai": [12, 35, 39, 46], "blog": [12, 43], "assemblyai": 12, "eden": 12, "meyer": 12, "\ucc38\uace0\ud588\uc2b5\ub2c8\ub2e4": 12, "\ubcf8\uaca9\uc801\uc73c\ub85c": 12, "\uc804\uc5d0": [12, 15, 35, 39, 50], "\uc54c\uc544\uc57c\ud560": 12, "\ubaa8\ub378\uc785\ub2c8\ub2e4": [12, 41, 51], "fundament": 12, "principl": 12, "quit": 12, "simpl": [12, 20, 25, 26, 39, 49, 53, 59], "first": [12, 13, 20, 30, 36, 46], "associ": 12, "caption": [12, 23, 28, 44, 45, 48, 52, 56], "through": [12, 23, 35, 36, 43, 55], "respect": [12, 19, 49, 52], "dimension": [12, 14, 39], "Then": [12, 30], "cosin": [12, 16, 33, 43, 49, 50, 58, 61], "similar": [12, 32, 44, 46, 48, 50, 57], "each": [12, 41, 50], "simultan": [12, 20], "maxim": [12, 48], "correct": [12, 50], "minim": [12, 27, 57], "incorrect": [12, 24, 50], "\ud1b5\ud569\uc2dc\ucf30\uc2b5\ub2c8\ub2e4": 12, "\ucd5c\ucd08\ub294": 12, "\uc815\ub2f5\uc740": 12, "\uc544\ub2d9\ub2c8\ub2e4": [12, 41], "22\ub144": 12, "5\uc6d4": 12, "\uc5d0\uac8c": [12, 45], "sota": [12, 21, 22, 31, 32, 33, 34, 35, 40, 42, 44, 48, 52, 59], "\ub0b4\uc8fc\uc5c8\uc2b5\ub2c8\ub2e4": 12, "\ucc0d\uba39\ud558\uae30": 12, "\ub0b4\uc758": [12, 15, 28, 38, 39, 56], "\ud3ec\ucc29\ud574\ub0bc": 12, "\ud45c\ud604": [12, 14, 15, 21, 29, 34, 39, 44, 58], "\ub04c\uc5b4\uc62c\ub9ac\uae30": [12, 32], "\uc778\ub370\uc694": 12, "unclip": [12, 26], "\ubd80\ub985\ub2c8\ub2e4": [12, 58], "\ubcf5\uc7a1\ud574\ubcf4\uc774\ub2c8": 12, "assembl": 12, "\ub2e8\uc21c\ud654\ub41c": [8, 12], "\uc0b4\ud3b4\ubcfc\uac8c\uc694": 12, "f1x4fhzf4mq": 12, "360": [12, 19, 34, 55], "\ubaa8\ub378\uc778": [12, 37, 45, 47, 52], "\uac19\ub124\uc694": 12, "\ucea1\uc158\uc744": [12, 31], "\uc0c1\uc751\ud558\ub294": [12, 19], "autogregress": 12, "\ube44\uad50\ud558\ub294": [12, 16, 24, 27, 54], "\uc218\ud589\ud588\uc2b5\ub2c8\ub2e4": [12, 47], "computation": [12, 55, 59], "\ud6c4\ubc18\ubd80\uc5d0\ub294": 12, "\uc2e4\ud5d8\ud569\ub2c8\ub2e4": 12, "\ubaa8\ub378\ub9cc": 12, "\uc774\ub791": [12, 48, 60], "\uc0ac\uc6a9\ud588\uc744\uae4c\uc694": 12, "\ud559\uc2b5\ud558\ub294\ub370": [12, 29, 43, 50, 55, 56, 61], "\uc131\uacf5\uc744": [12, 15], "\uac70\ub450\uace0": 12, "shift": [12, 15, 16, 45], "capabl": [12, 26], "\ub6f0\uc5b4\ub0ac\uc2b5\ub2c8\ub2e4": 12, "\ub2ec\uc131\ud574\ub0c8\uc2b5\ub2c8\ub2e4": 12, "tak": 12, "\uac31\uc2e0\ud558\ub294": 12, "\uc911\uc774\uc5c8\uc8e0": 12, "determinist": [12, 13, 49, 58, 60], "\ub355\ubd84\uc5d0": [12, 39], "\uc874\uc7ac\ud558\uc9c0": [12, 31, 47], "essenti": 12, "\ubcc0\uc8fc\ud558\uba74\uc11c": 12, "\uc788\uc8e0": 12, "variat": [12, 14, 26, 32, 61], "\uc67c\ucabd\uc758": 12, "\ubcf4\uc874\ub429\ub2c8\ub2e4": 12, "\uadf8\ub4e4\uc774": 12, "\ud45c\ud604\ub418\ub294": 12, "\ubc29\uc2dd\uc774\ub098": 12, "\ubc14\ub01d\ub2c8\ub2e4": 12, "\uadf8\ub7fc\uc5d0\ub3c4": [12, 21, 28, 44, 47], "\ud2b9\uc720\uc758": 12, "\ud654\ud48d\uc740": 12, "\ubcc0\uc8fc\uace1\ucc98\ub7fc": 12, "\ub9e4\ubc88": [12, 16, 30, 48], "\uc5f0\uc8fc": 12, "\ud574\ub0bc": 12, "\uc788\ub294\uac81\ub2c8\ub2e4": 12, "\ud30c\ud5e4\uce58\uae30": 12, "\uc774\ubc88\uc5d0\ub294": [12, 47], "\uc0b4\ud3b4\ubcf4\uc8e0": 12, "\uc790\uccb4\uc758": [12, 20, 32], "\uc124\uba85": [12, 16, 31, 35, 39, 49], "\uc0ac\uc2e4": [12, 25, 28], "\uc870\uac74\uc73c\ub85c": [12, 18, 21, 26, 35, 36, 39], "\uc790\uccb4\ub3c4": 12, "\ubc1b\uc2b5\ub2c8\ub2e4": 12, "\ubb3c\ub860": [12, 42], "\ubc1b\uaca0\uc8e0": 12, "1\ub3001": 12, "\ub300\uc751\ub418\uae30": 12, "duel": 12, "\ubb38\uc81c\ub420": 12, "\uc5c6\ub2e4\uace0": [12, 15, 41], "\ubcc0\ub860\ud569\ub2c8\ub2e4": 12, "\ub192\uc774\uae30": [12, 25, 26, 33, 54], "2\uac1c\uc758": [12, 26, 34, 39, 54, 59], "dot": [8, 12, 16, 22], "\uc0ac\uc6a9\ud588\ub2e4\uace0": [12, 23, 36, 42, 50, 58], "modifi": 12, "\uc8fc\uc7a5\ud569\ub2c8\ub2e4": [12, 56, 58, 59, 61], "\ud1b5\ud569\uc2dc\ud0a4\ub0d0\ud558\uba74": 12, "\ud558\ub294\uac70\uc8e0": 12, "\ubc29\ubc95\uc73c\ub85c": [12, 21, 28, 42, 43, 47, 49, 51, 55, 57, 58], "\uc0ac\uc6a9\ud568\uc73c\ub85c\uc368": [12, 36, 39, 49, 53], "\uc788\ub358": 12, "\ud65c\uc6a9\ud560": [12, 19, 26, 43, 46], "\ud544\uc694\ud560\uae4c\uc694": 12, "obtain": 12, "full": [8, 12, 14, 21, 25, 29, 31, 40, 53, 56], "possibl": [12, 36, 58], "given": [8, 12, 16, 17, 18, 19], "\ub531\ud788": 12, "\uc640\ub2ff\uc9c0\ub294": 12, "\uc2e4\ub9dd\ud558\uae34": 12, "\uc774\ub985\ub2c8\ub2e4": 12, "\uc720\ubb34\uc5d0": 12, "\uc218\ud589\ud588\ub2e4\uace0": [12, 23], "\uc0b4\ud3b4\ubcfc\uae4c\uc694": 12, "\ubaa8\ub378\ucc98\ub7fc": [12, 45], "\uc8fc\uc5b4": [12, 45, 47], "\ud6cc\ub96d\ud588\uc2b5\ub2c8\ub2e4": 12, "\uacbd\uc6b0\uc758": [12, 21, 47], "\uc544\ud0a4\ud14d\uccd0\uc5d0": 12, "\uadf8\ub807\uc9c0\ub9cc": [12, 52], "\uc758\ubb38\uc774": [12, 42], "\ub9d0\ub054\ud788": 12, "\ud574\uc18c\ub418\uc9c0\ub294": 12, "\uc65c\ub0d0\ud558\uba74": [12, 16], "95": [12, 35], "\ubc29\uc2dd\uc5d0": [12, 29, 39, 43], "\uc801\uc6a9\ud574": [12, 38, 54], "\uc2e4\ud5d8\ud588\uc2b5\ub2c8\ub2e4": [12, 58], "\uacf5\uc815\ud55c": [12, 39], "\uc2e4\ud5d8\uc774\ub77c\uace0": 12, "\ubcf4\uae34": 12, "\uc5b4\ub824\uc6b8": [12, 20, 33, 39], "\ud559\uc2b5\uc2dc\ucf30\uc744": 12, "\uc5c6\uc2b5\ub2c8\ub2e4": [12, 56, 59], "\uac1c\uc778\uc801\uc73c\ub85c": [12, 41, 42], "\uc800\ub294": [12, 41], "\ubcf4\uace0": [12, 14, 20, 26, 27, 42], "\ubc18\ub4dc\uc2dc": [12, 37], "\uc368\uc57c\ud558\ub294": 12, "\uadfc\uac70\uc5d0": 12, "\uc124\ub4dd\ub825\uc774": 12, "\ub5a8\uc5b4\uc9c4\ub2e4\uace0": 12, "\uc0dd\uac01\ud588\uc2b5\ub2c8\ub2e4": 12, "\uc368\uc57c\ud560\uae4c\uc694": 12, "\uac1d\uccb4\ub97c": [12, 28, 33, 35, 36, 55, 61], "\ubb18\uc0ac\ud55c": 12, "\uac1d\uccb4\uc758": [12, 28, 35, 61], "\ubc1c\ud604": 12, "\uc758\ubbf8\ub860\uc801": 12, "\uad00\uacc4\ub97c": [12, 13, 25, 54], "\uc911\uc694\ud558\ub2e4\uace0": [12, 50], "manipul": [12, 28, 37, 40, 46], "diff": [12, 36], "appli": [12, 44], "interpol": [12, 22, 25, 46, 56], "normalis": 12, "produc": [12, 20], "descript": [12, 50], "\ud558\ub294\uc9c0\ub294": 12, "\uc0b4\ud3b4\ubcf4\uaca0\uc2b5\ub2c8\ub2e4": [12, 51, 59], "\uadf8\ub798\uc11c": [12, 22, 23, 28, 36, 42, 51, 58], "\ubb50\uac00": [12, 33], "\uc88b\uc740\uac00\uc694": 12, "\uc0dd\uc131\ubb3c\uacfc": 12, "\uc0dd\uc131\ubb3c\uc744": 12, "\uc0ac\ub78c\ub4e4\uc5d0\uac8c": 12, "\uc81c\uc2dc\ud558\uace0": [12, 18], "photor": [12, 23, 52], "\ub9e4\uae30\ub3c4\ub85d": 12, "when": [12, 41, 51, 52], "guidanc": [12, 15, 17, 19, 26, 27, 28, 32, 35, 36, 39, 42, 44, 53, 56, 59, 61], "both": [12, 16, 20, 45], "versu": 12, "\uacb0\ub860\uc740": 12, "compar": [12, 20, 27, 28, 43], "\ud6e8\uc52c": [12, 15, 16, 19, 23, 24, 25, 27, 28, 31, 32, 42, 45, 47], "bipartit": 12, "\uc778\ucf54\ub529": [12, 29, 33, 34, 44], "\uc5bb\uc73c\uba70": 12, "\ubcf5\uc6d0\ud558\ub294\ub370": 12, "\ud544\uc694\ud55c": [12, 18, 33, 34, 43, 44, 45, 47], "\uc794\uc5ec": 12, "\uc815\ubcf4\ub4e4\uc744": [12, 28, 59, 61], "\uc9c0\ub2d9\ub2c8\ub2e4": 12, "\ubcc0\uc8fc\ud558\uae30": 12, "\u03b7": [12, 13], "\ud574\uc9c0\uace0": 12, "\ubcf5\uc6d0\ud574\ub0c5\ub2c8\ub2e4": 12, "\ucee4\uc9c8\uc218\ub85d": [12, 13, 25, 40], "\uc0dd\uae30\uace0": 12, "\uadfc\ucc98\uc5d0\uc11c": 12, "perceptu": [8, 12, 16, 39, 47, 56, 57], "centere": 12, "\ub9cc\ub4e4\uc5b4\ub0bc": [12, 19, 23], "\ud0a4\uc6b0\uba74": [12, 39], "\uc874\uc7ac\ud558\uace0": [12, 46], "\uc720\uc2e4\ub418\uc5c8\ub294\uc9c0": 12, "\ud0d0\uc0c9": 12, "\ud0d0\uc0c9\ud574\ub0bc": 12, "\uc788\ub294\uac70\uc8e0": 12, "\uac83\ub3c4": [12, 14, 32, 42, 60, 61], "\ud574\uc11c": [12, 22, 27, 32, 33, 43, 49, 51, 54], "\uc900\ub2e4\uba74": 12, "\ucea1\uc158\uc774": 12, "\uc8fc\uc5b4\uc838\uc788\uc744": 12, "\uc6b0\ub9ac\uac00": [12, 14, 29, 47], "z_t0": 12, "current": [12, 13, 14], "embd": 12, "\uc870\uc791\ub429\ub2c8\ub2e4": 12, "typograph": 12, "attak": 12, "attack": 12, "\uc0ac\ubb3c": 12, "\uc704\uc5d0": [12, 19, 23, 34, 58], "\uae00\uc528\uac00": 12, "\uc4f0\uc5ec": 12, "\uacbd\uc6b0\uc785\ub2c8\ub2e4": [12, 41], "multimod": [12, 26, 45, 52], "\uc0ac\ubb3c\uc744": 12, "\ud310\ub2e8\ud558\ub294": 12, "ipod": 12, "\uc885\uc774\uac00": 12, "\ubd99\uc740": [12, 47], "\uc0ac\uacfc\ub97c": 12, "\ubd84\ub958\ub97c": [12, 22], "\uc218\ud589\ud574\ubcf4\uc558\uc2b5\ub2c8\ub2e4": 12, "\uc5ed\uc2dc": [12, 28, 32, 34, 60], "granni": 12, "smith": 12, "\uac00\uae5d\ub2e4\uace0": 12, "\ud310\ub2e8\ud588\uc2b5\ub2c8\ub2e4": 12, "\uc0ac\uacfc\uc758": 12, "\uc0ac\uc9c4\uc73c\ub85c": 12, "recov": [12, 32], "\ud574\ub0c5\ub2c8\ub2e4": 12, "\uc774\ucc98\ub7fc": [12, 46, 60], "\ub2e8\uc810\uc740": 12, "\uc5c6\ub098\uc694": 12, "cube": [12, 20, 34, 35], "\uadf8\ub4e4\uc758": [12, 43], "\uc18d\uc131": [12, 28, 33, 47], "color": [12, 18, 19, 32, 34, 39, 40, 46, 50, 53, 55, 59], "\ub9e4\uce6d\uc2dc\ud0a4\ub294": 12, "\ub5a8\uc5b4\uc9d1\ub2c8\ub2e4": 12, "red": [12, 48], "\ud30c\ub780": [12, 20, 45], "\ud050\ube0c": [12, 20], "\ube68\uac04": [12, 18, 42], "\ud050\ube0c\ub97c": 12, "\uadf8\ub824\ub2ec\ub77c\uace0": 12, "\ud050\ube0c\uc640": 12, "\ud050\ube0c\uc5d0": 12, "attribut": [12, 16, 28, 41, 53], "\ubd80\uc5ec\ud574\uc57c\ud560\uc9c0": 12, "\ud5f7\uac08\ub824\ud569\ub2c8\ub2e4": 12, "sign": [12, 35, 55], "sai": 12, "deep": [12, 25, 31, 34, 42, 46, 47, 52, 58], "\ub9cc\uc758": 12, "\uc5b4\ub824\uc6cc\ud558\ub294": 12, "\ubb38\uc81c\uc785\ub2c8\ub2e4": 12, "\uc0c1\ud669\uc5d0\uc11c": [12, 14, 57], "\ub514\ud14c\uc77c\uc744": [12, 21, 33, 43], "\ubb18\uc0ac\ud558\ub294": 12, "show": [12, 52], "some": [12, 19], "complex": [12, 36, 56, 59], "\ub124\uc628": 12, "\uc0ac\uc778\ub4e4\uc758": 12, "\ub514\ud14c\uc77c\ub4e4\uc774": 12, "\ud655\uc778\ud558\uc2e4": 12, "\uc218\ud559\uc801": 12, "justifi": 12, "\ub77c": [12, 19, 58, 59], "\ud569\uc2dc\ub2e4": [12, 41], "\uadf8\uc5d0": [12, 37, 47, 60], "\uc800\uc790\uc758": 12, "\uc8fc\uc7a5": [12, 15, 32, 49, 52], "\uc0d8\ud50c\ub9c1\ud560": [12, 35, 46], "equal": 12, "hold": 12, "becaus": 12, "function": [8, 12, 15, 18, 19, 20, 22, 27, 28, 33, 34, 36, 43, 46, 51, 54, 56, 57, 58, 60], "\ud3ec\uc2a4\ud305\uc744": 12, "\ubd80\uac00": 12, "\uacf5\uc2dd\uc744": 12, "\ud480\uc5b4\uc11c": 12, "\ud574\uc124\ud574\ubcf4\uba74": 12, "\uc0d8\ud50c\ub9c1\ud558\uace0": [12, 15, 35, 39, 45, 60], "\uc0d8\ud50c\ub9c1\ud568\uc73c\ub85c\uc368": 12, "\uc0d8\ud50c\ub9c1\uc774": [12, 29], "\uac00\ub2a5\ud574\uc9c0\ub294": 12, "\uc5c6\ub294\uc9c0": 12, "\uad81\uae08\ud574\uc11c": 12, "\uacf5\ubd80\ud574\ubd24\uc2b5\ub2c8\ub2e4": 12, "\ud574\uc18c\ud558\uae30": 12, "\ub178\ub825\uc744": [12, 19], "\ud558\uace0\uc788\ub294\uc9c0": 12, "\uc815\ub7c9\uc801\uc73c\ub85c": [12, 21, 61], "\ud3c9\uac00\ud560": [12, 53], "\uc870\uc0ac\ud574\ubd24\uc2b5\ub2c8\ub2e4": 12, "\uacb0\uacfc\ubd80\ud130": 12, "\ub9d0\uc500\ub4dc\ub9ac\uba74": 12, "\uc6f9\ud06c\ub864\ub9c1": 12, "\uc874\uc7ac\ud55c\ub2e4\uace0": 12, "\ud558\uace0\uc788\ub294\uc9c0\ubd80\ud130": 12, "preview": 12, "safeti": 12, "\ub178\ub825": 12, "\ub370\uc774\ud130\uc5d0\uc11c": [12, 35, 39, 42], "violent": 12, "hate": 12, "adult": 12, "\uc81c\uac70\ud568\uc73c\ub85c\uc368": 12, "\ub178\ucd9c\ub418\ub294": 12, "\ucd5c\uc18c\ud654\ud588\ub2e4\uace0": 12, "polici": 12, "\uc704\ubc18\ud55c": 12, "\uc790\uc815\ud558\ub294": 12, "\uc2dc\uc2a4\ud15c\uc744": [12, 35], "\ubcf4\uc720\ud558\uace0": 12, "\uc2e0\ub8b0\ud560": 12, "\uc804\ubb38\uac00\ub4e4\uacfc": 12, "\uac80\ud1a0\ub97c": 12, "\uc9c4\ud589\ud588\ub2e4\uace0": [12, 23, 53], "\uae30\ubc95\uc774": [12, 23, 46], "2202": [12, 58], "04053": 12, "j": [12, 14, 20, 22, 34, 39], "min": [12, 13, 20, 27, 30, 36, 54, 61], "dallev": 12, "contribut": [12, 21, 22, 34, 49, 55], "3\uac00\uc9c0\ub97c": 12, "\uc81c\uacf5\ud569\ub2c8\ub2e4": [12, 24, 46, 55], "\ucd5c\uadfc\uc758": [12, 28, 47], "recognit": [12, 15, 42], "skill": 12, "\uc0c1\ub300\uc801\uc73c\ub85c": [12, 18, 19, 24, 49], "\ub6f0\uc5b4\ub098\uc9c0\ub9cc": [12, 22, 25], "count": [12, 53], "spaial": 12, "\ub2a5\ub825\uc740": [12, 45], "\ub5a8\uc5b4\uc9d0\uc744": 12, "gender": 12, "skin": [12, 20], "tone": 12, "bias": 12, "\ubd84\uc11d": [12, 18, 29, 43, 49], "web": 12, "\ud559\uc2b5\ud588\uc74c\uc744": 12, "\ubcf4\uc5ec\uc8fc\uc5c8\uc2b5\ub2c8\ub2e4": 12, "social": 12, "diagnost": 12, "who": 12, "nurs": 12, "252\uac1c\uc758": 12, "\ud0d0\uc9c0\ud569\ub2c8\ub2e4": 12, "autom": 12, "detect": 12, "verifi": 12, "reliabl": [12, 57], "\uc8fc\uba74\uc11c": 12, "\uc0ac\ub78c\uc758": [12, 21, 31, 41], "\uc131\ubcc4\uc744": 12, "\ub9de\ucd94\uac8c": 12, "\ub2f5\ubcc0\uc744": 12, "\uc2e0\uacbd\ub9dd\uc73c\ub85c": 12, "facial": [12, 40], "landmark": 12, "\ucd94\ucd9c\ud558\uace0": [12, 35, 38, 56], "illumin": [12, 19], "\ubcf5\uc7a5\uc744": 12, "\ud0d0\uc9c0\ub41c": 12, "uniform": [12, 13, 45, 59], "skew": 12, "\ub418\uc5b4\uc788\ub294\uc9c0": 12, "expert": [12, 26], "per": [12, 26, 55], "profess": 12, "\ud3c9\uac00\ud558\ub294\ub370\uc5d0": 12, "\uc131\uacf5\ud588\uc2b5\ub2c8\ub2e4": 12, "satbl": 12, "\uc6f9\ud06c\ub864\ub9c1\uc744": 12, "\uc874\uc7ac\ud588\uc2b5\ub2c8\ub2e4": 12, "\ub178\ub825\uc774": 12, "\uc9c0\uc18d\ub418\uace0": 12, "\ubbf8\ub798\uc5d0\ub294": 12, "\uc548\uc804\ud558\uac8c": 12, "\uc788\uae30\ub97c": [12, 35], "\uae30\ub300\ud569\ub2c8\ub2e4": 12, "2010": 13, "02502": 13, "april": [13, 22], "\ub2e8\uc810\uc778": 13, "process\ub85c": [13, 14, 25, 29, 49, 54], "\uc815\uc758\ud568\uc73c\ub85c\uc11c": 13, "deterministic\ud55c": 13, "sampling\uc774": [13, 49], "adversari": [13, 22, 24, 29, 37, 41, 51, 56, 57], "\ubcf4\uc5ec\uc8fc\uace0\uc788\ub2e4": 13, "gan\uc740": [13, 22, 41, 47], "\ubd88\uc548\uc815\uc131\uc744": [13, 47], "generator\uc640": 13, "discriminator\uc758": [13, 22], "imbalanced\uc5d0": 13, "mode": [13, 16, 29, 30, 59], "collaps": [13, 29, 57], "\uadf8\ub7ec\ub358": 13, "ddpm\uacfc": [13, 16, 23, 37], "ncsn\uac19\uc740": 13, "training\uad6c\uc870\uac00": 13, "\ub4f1\uc7a5\ud558\uc600\uace0": 13, "\uc131\uacf5\uc758": 13, "\ubcf4\uc5ec\uc8fc\uc5c8\ub2e4": [13, 16, 26, 40, 45, 54], "ddpm\uc740": [13, 49], "\uac70\uce58\ub294\ub370": 13, "\uc774\ub54c\ubb38\uc5d0": 13, "gan\uc5d0": [13, 29], "\ub290\ub9b0": [13, 27, 54, 57], "performance\ub97c": [13, 29], "50k": [13, 58], "less": [13, 20], "than": [13, 20, 55], "about": 13, "20h": 13, "256": [13, 15, 16, 21, 33, 34, 35, 38, 39, 47, 48, 51, 52, 55, 56], "1000h": 13, "ddim\uc740": [13, 16, 49], "chain\uc5d0": 13, "\ub300\uccb4\ud558\uc600\uace0": 13, "\uacb0\uad6d": [13, 16, 23, 25, 32, 33, 48, 49], "\ube60\ub974\uace0": [13, 16, 24, 54], "\uc0dd\uc131\ud574\ub0b4\uace0": [13, 40], "accel": 13, "ddpm\uacfc\ub294": 13, "consistency\ud55c": 13, "\ubcf4\uc5ec\uc90c\uc73c\ub85c\uc368": 13, "latent\uac04\uc758": 13, "interpolation\uc774": 13, "If": 13, "equival": 13, "process\ub294": [13, 16, 23, 36, 54], "\ub3d9\uc791\ud55c\ub2e4": 13, "\ubbf8\ub798": 13, "\uc2dc\uc810\uc744": [13, 16, 18, 35], "\uc608\uce21\ud558\uae30\uc704\ud574": 13, "\uc774\uc6a9\ud55c\ub2e4": [13, 16, 20, 23, 39], "\uc2dc\uc810\uc740": 13, "\uacfc\uac70": [13, 46], "\uac12\uc5d0\ub294": 13, "\uac16\ub294\ub2e4": [13, 20, 27], "t\ub294": 13, "ddpm\uc5d0\uc11c": [13, 16, 23, 25, 49], "\uc88c\uc9c0\uc6b0\uc9c0\ud558\ub294": 13, "parameter\uc774\ub2e4": [13, 54], "\ub300\ucda9": 13, "\ubc88\uc758": [13, 33], "sequential\ud558\uac8c": [13, 39], "\uac70\uccd0\uc57c\ud558\uace0": 13, "\ud604\uc800\ud788": [13, 25, 58], "\uc18d\ub3c4\ub97c": [13, 20, 24, 26, 31, 35, 54], "\uc694\uc18c\uac00": [13, 17], "\uad6c\ud558\uae30\uc704\ud574": 13, "\ucc38\uc870": [13, 19, 24, 39], "\uac12\ub9cc\uc744": 13, "process\uc758": [13, 19, 25], "stochastic\ud55c": [13, 16], "chap": 13, "And": 13, "unifi": [13, 26], "\uc2dd\uc744": [13, 16, 39, 49, 57, 58], "\uc0d8\ud50c\ub9c1": [8, 13, 18, 21, 22, 29, 31, 33, 34, 35, 36, 42, 44, 49, 52], "\uad00\uacc4": [13, 33], "t\uc2dc\uc810\uc758": [13, 16], "\uc608\uce21\ud55c": [13, 16, 23, 24, 27, 39, 54], "\u03c3\uac00": 13, "\uac00\uc9c8": [13, 34, 36], "\uc218\uc2dd\uacfc": 13, "\ub3d9\uc77c\ud558\ub2e4": 13, "explan": 13, "deterministic\ud558\uae30\ub54c\ubb38\uc5d0": [13, 49], "\uacc4\uc0b0\ud560": [8, 13, 49], "subset\uc758": [13, 49], "\uc2dc\uc810\ub9cc\uc73c\ub85c": [13, 49], "method\ub294": [8, 13, 43, 49], "\uc57d\uac04\uc758": [13, 21, 23, 28, 29, 49], "efficiency\ub97c": [13, 49], "\ucda9\ubd84\ud788": [13, 24, 28, 34, 46, 49, 54, 57], "\uc99d\uac00\uc2dc\ud0ac": [13, 49], "ddim\uc758": [13, 49], "od": [13, 27, 35, 46, 56, 57], "encoding\uc774": [13, 34], "\uc720\ub3c4\ud560": [13, 19], "table1": [13, 15, 22], "euqat": 13, "simple\ud558\uac8c": 13, "control\ud558\uae30\uc704\ud55c": 13, "\ud69f\uc218": [13, 24], "\u03b7\uac00": 13, "step\uc5d0": [13, 16, 25, 28, 54], "step\uacfc": 13, "time\uc774": 13, "linear\ud55c": 13, "step\uc5d0\uc11c\ub3c4": 13, "\uc5b4\ub290\uc815\ub3c4\uc758": [13, 54], "object\ub97c": 13, "kera": 13, "diffusionmodel": 13, "image_s": 13, "width": [13, 15, 22, 31, 49], "block_depth": 13, "get_network": 13, "denorm": 13, "convert": [13, 50], "pixel": [13, 20, 26, 28, 29, 34, 39, 47, 48, 54, 55, 56, 57, 59, 61], "back": [13, 19], "rang": [13, 34, 48, 50, 51, 59], "tf": 13, "clip_by_valu": 13, "diffusion_schedul": 13, "diffusion_tim": 13, "angl": [13, 19, 61], "start_angl": 13, "aco": 13, "max_signal_r": 13, "end_angl": 13, "min_signal_r": 13, "diffusion_angl": 13, "signal_r": 13, "co": [13, 14, 34, 58], "noise_r": 13, "sin": [13, 14, 20, 34], "note": 13, "squar": [13, 50, 58], "sum": [13, 16, 27, 30, 34, 35, 36, 39, 60], "alwai": 13, "noisy_imag": 13, "move": [13, 15, 27, 38, 46, 54], "ema_network": 13, "compon": [13, 32, 55, 56], "calcul": 13, "pred_nois": [13, 14], "pred_imag": 13, "train_step": 13, "have": 13, "deviat": 13, "like": [13, 22], "shape": [8, 13, 14, 15, 31, 32, 34, 35, 39, 41, 43, 50, 51, 53], "batch_siz": [13, 41, 45, 60], "minval": 13, "maxval": 13, "accordingli": 13, "gradienttap": 13, "tape": 13, "separ": [13, 31, 41, 50], "noisi": [13, 18, 21, 26, 59, 61], "noise_loss": 13, "image_loss": 13, "trainable_weight": 13, "apply_gradi": 13, "noise_loss_track": 13, "update_st": 13, "image_loss_track": 13, "name": [13, 30], "reverse_diffus": 13, "initial_nois": 13, "diffusion_step": 13, "num_imag": 13, "step_siz": 13, "line": [13, 57], "pure": [13, 58], "its": 13, "assum": 13, "nonzero": 13, "next_noisy_imag": 13, "ones": 13, "remix": 13, "next": 13, "next_diffusion_tim": 13, "next_noise_r": 13, "next_signal_r": 13, "generated_imag": 13, "probabilist": [14, 29, 30, 42], "neurip": [14, 20, 36, 49, 52], "2020": [14, 16, 20, 25, 35], "2006": [14, 30], "11239": [14, 30], "pytorch": [14, 30, 31, 39, 41, 48, 51, 60], "review": [14, 27, 30, 43, 62], "pr": [14, 18, 30, 50], "409": [14, 30], "beomsoo": [14, 30, 62], "park": [14, 16, 23, 28, 30, 39, 62], "apr": [14, 30, 41, 46, 51, 60], "19": [14, 30], "velog": [14, 47, 48], "yetsyl0705": 14, "what": 14, "inference\ub85c": [14, 54], "markov\uac00": 14, "distribution\uc758": 14, "\ub54c\uae4c\uc9c0": [14, 16, 58], "\ub354\ud574\uac00\ub294": 14, "\uc5ed\uc73c\ub85c": [14, 35], "\uac70\uce58\uba70": [14, 54], "\uad6c\uc131\ub428": [14, 15, 35], "\uc815\uc758\ud558\uae30": 14, "\uc27d\uace0": 14, "\ud559\uc2b5\uc2dc\ud0a4\ub294": [14, 23, 47, 57], "\ud3b8\ub9ac\ud568": 14, "\uc0dd\uc131\uc774": [14, 17, 18, 19, 24, 33, 40, 44, 48, 49, 52, 55], "\ubcc0\ubd84\ucd94\ub860": [14, 60], "\uc0ac\ud6c4\ud655\ub960": 14, "posterior": [14, 19, 48, 60], "\ubd84\ud3ec": [14, 29, 35, 36, 48, 57], "\ub2e4\ub8e8\uae30": [14, 60], "\uc26c\uc6b4": [14, 60], "\ud655\ub960\ubd84\ud3ec": [14, 54], "\uadfc\uc0ac": [14, 20, 36], "approxim": [14, 19, 20, 51, 54, 60], "\ud45c\ud604\uc2dd\uc5d0": 14, "\ubcf4\ud1b5": [14, 15, 24, 27, 30, 41, 42, 43, 47], "parameter\uc758": [14, 23], "\uc2dd\uc758": [14, 16, 36], "\ucc28\uc218\ubcf4\ub2e4": 14, "\uc218\ub85c": 14, "3\ucc28": 14, "\ud45c\ud604\uc2dd": 14, "\ucc28\uc218\ub85c\uc758": 14, "\ud568\uc218": [14, 18, 21, 22, 34, 46, 47, 48, 49], "\uc0c1\ud0dc\uc5d0\uc11c": [14, 24, 47, 56, 61], "\uc0c1\ud0dc\ub85c": [14, 18, 19, 24, 31, 39, 50, 59], "\ub118\uc5b4\uac08": 14, "\ub2e8\uacc4\uc758": [14, 20, 21, 35, 43, 51], "\uc0c1\ud0dc\uc5d0\ub9cc": 14, "graphic": [14, 52, 55], "_0": [14, 46, 54], "prod_": [14, 16], "quad": [14, 28, 34, 46, 57], "beta_t": [14, 16], "chain\uc73c\ub85c": 14, "\ucd94\uac00\ud560": [14, 18, 35], "beta_1": [14, 34], "\ub354\ud574\uc900\ub2e4": 14, "\uc774\uba74": [14, 37, 52], "mean\uc778": 14, "\uac16\uc9c0": 14, "\uc99d\uac00\ud568": 14, "\ub2e8\uc21c\ud788": [14, 21, 22, 28, 34, 37, 39, 43, 47], "noise\ub9cc\uc744": 14, "\ub354\ud574\uc8fc\ub294\uac8c": 14, "scaling\ud558\ub294": 14, "variance\uac00": 14, "\ubc1c\uc0b0\ud558\ub294": 14, "\ub9c9\uae30": 14, "\uc644\uc804": [14, 34], "destroy\ub41c": 14, "\uc0c1\ud0dc": [14, 15, 20], "boldsymbol": 14, "sigma": [14, 16, 20, 27, 34, 36, 39, 41, 46, 49, 54, 58, 60], "\uac00\uc6b0\uc2dc\uc548": [14, 21, 29, 35, 44], "1994\ub144": 14, "process\uac00": [14, 37], "\uac00\uc6b0\uc2dc\uc548\uc774\uba74": 14, "process\ub3c4": 14, "\uac00\uc6b0\uc2dc\uc548\uc73c\ub85c": 14, "\uc4f0\uba74": 14, "\ub41c\ub2e4\ub77c\ub294": 14, "\uc99d\uba85\uc774": 14, "\ud574\uc57c": [8, 14, 16, 27, 28], "sigma_": [14, 18, 36, 49, 50, 58], "hierarach": 14, "vae\uc5d0\uc11c\uc758": 14, "\uacfc\uc815\uacfc": 14, "\ube44\uc2b7\ud568": [14, 43], "\ubaa9\uc801\uc740": 14, "\uc81c\uac70\ud560": 14, "\uac83\uc778\uac00": 14, "\ub4e4\uc5b4\uc654\uc744": [14, 21, 42], "\uc608\uce21\ud560": [14, 33, 35], "\uc608\uce21\uc774": 14, "\uac00\ub2a5\ud574\uc9d0": [14, 43], "leq": [14, 27, 58], "_q": [14, 26, 29], "geq": 14, "likelihood\ub97c": [14, 36], "\ucd5c\uc18c\ud654": [14, 19, 34, 57], "\uc218\uc2dd\uc744": [14, 16, 37, 46, 47, 49, 54, 60], "evid": [14, 48], "bound": [14, 19, 26, 28, 34, 35, 60], "\uc6b0\ud56d\uacfc": 14, "\uc815\ub9ac\ud558\uace0": 14, "\ud480\uc5b4\ub0b4\uba74": 14, "elbo\uc758": 14, "\uc5ed\ud560\uc740": 14, "\uad00\ucc30\ud55c": 14, "\ud798\ub4e0": 14, "\uc774\ub8e8\uace0": 14, "\uc870\uae08": [14, 32], "\ubd84\ud3ec\uc778": [14, 54], "\ud45c\ud604\ud558\ub824": 14, "\ucc28\uc774": [14, 15, 29, 36, 37], "diverg": [14, 19, 36], "underbrac": [14, 16], "_1": [14, 21, 39], "\ub098\uc628\ub2e4": [14, 49], "term\uc73c\ub85c": 14, "\ud559\uc2b5\uc2dc\ud0b4": 14, "\uc9c0\uc6b0\ub294": 14, "\uc9c0\uc6c0": 14, "ddpm\uc5d0\uc11c\ub294": [14, 16, 23, 25], "induct": [14, 15], "bias\ub97c": [14, 41, 43], "\ub298\ub824": [14, 42], "stable\ud558\uace0": 14, "\uc131\ub2a5\ub3c4": [14, 42, 45, 53], "\uac1c\uc120\ud560": [14, 25, 54], "\uc788\uc5c8\uc74c": [14, 15, 30, 43], "\ub9cc\ub098\ubcf4\uc9c0": 14, "\ubabb\ud588\ub358": [14, 50], "\uc815\ud655\ud55c": [8, 14, 17, 19, 21, 28, 34, 43, 45, 46], "\uc608\uce21\uc744": [14, 21, 24, 26, 35, 39], "\ud480\ub824\ub294": 14, "\uace0\uc815": [14, 15, 17, 25, 43], "\ud588\ub354\ub2c8": 14, "\uc798\ub428": 14, "02\ub85c": 14, "linear\ud558\uac8c": 14, "\uac00\uae4c\uc6b8\uc218\ub85d": 14, "\uc801\uac8c": [14, 16, 48], "\uc8fc\ub294": [14, 16, 23, 26, 28, 31, 59], "parameter\uac00": [14, 26], "\ub418\uae30": [14, 27, 30, 34, 47], "beta": [14, 15, 24], "progress": [14, 46, 54], "posterior\ub97c": 14, "\ub9cc\ub4e4\uc5c8\uc744\ub54c": 14, "\ubcf5\uc6d0": [14, 19, 21, 35], "simplic": 14, "sjina0722": 14, "\ub9ac\ubdf0": [14, 30, 34], "\uac00\uc815\ud588\uace0": 14, "\ubc1b\uae30": [14, 35, 40], "\ud559\uc2b5\uc2dc\ud0a4\uc9c0": [14, 28], "\uc54a\uc544\ub3c4": [14, 51, 59, 61], "\ub41c\ub2e4\uace0": [14, 61], "\uc0dd\uac01\ud574": [14, 57], "term\uc744": [14, 19], "\uad6c\ud558\uc9c0": [14, 51], "\uad6c\ud574": 14, "\uc815\ud655\ub3c4\ub97c": [14, 42], "\ub192\uc784": [14, 19], "int_": [14, 34, 36], "sigma_1": 14, "arrai": 14, "ll": [14, 30, 50], "infti": [14, 36], "255": 14, "case": [14, 22, 32, 36, 53], "\uc0ac\uc774\ub85c": [14, 34], "linearli": [14, 16, 20, 33, 58], "\ub2e8\uacc4\uc5d0\ub294": 14, "\ucd94\uac00\ud558\uc9c0": [14, 39], "divergence\ub97c": 14, "\uc88c\ud45c": [14, 34, 39], "\uc704\uc640": [14, 16, 20, 30, 31, 39, 45, 46, 47, 49, 58, 61], "\ub098\ud0c0\ub09c\ub2e4": [14, 21], "output\uac04": 14, "\uc904\uc774\ub294": [14, 24, 27, 39, 58], "denoising\uacfc": 14, "\ube44\uc2b7\ud574": 14, "ddpm\uc774\ub77c\ub294": 14, "\uc774\ub984\uc774": [14, 52], "\ubd99\uc74c": 14, "objective\uc744": 14, "\uc5d0\uc11c\ubfd0\ub9cc": 14, "t\uc5d0": [14, 26, 28, 54], "\uac00\ub2a5\ud558\uae30": [14, 18], "\ud6a8\uacfc\uc801": [14, 19], "psuedo": 14, "\ub354\ud574\ub098\uac00\ub294": 14, "\uc5bc\ub9c8\ub9cc\ud07c": 14, "\ub354\ud574\uc84c\ub294\uc9c0\ub97c": 14, "step\uc758": [14, 16, 23, 26, 27, 28, 29, 54], "\ucd94\uac00\ub418\uc5c8\ub294\uc9c0\ub97c": 14, "\ud559\uc2b5\ub41c\ub2e4": [14, 26, 35, 43], "\ucf54\ub4dc\uc5d0\uc11c\ub294": [14, 30], "\ub178\uc774\uc988\uc640": [14, 21], "t\ub85c": [14, 16, 23], "p_loss": 14, "x_start": 14, "default": [14, 30, 34], "torch": [14, 30, 45, 50, 56, 59, 60], "randn_lik": [14, 50], "q_sampl": 14, "do": [14, 41, 43, 59], "slow": [14, 55, 56], "25": [14, 30, 33, 38, 42, 45, 51, 55], "seem": 14, "significantli": [14, 52], "x_self_cond": 14, "self_condit": 14, "no_grad": 14, "model_predict": 14, "pred_x_start": 14, "detach_": 14, "take": 14, "model_out": 14, "pred_x0": 14, "pred_v": 14, "predict_v": 14, "rais": [14, 45, 50], "valueerror": [14, 50], "unknown": [14, 50], "loss_fn": 14, "reduct": [14, 45, 50], "reduc": [14, 50], "loss_weight": 14, "noise\uc5d0\uc11c": 14, "\uc21c\ucc28\uc801\uc73c\ub85c": [14, 48, 59], "p_sampl": 14, "int": [14, 36, 39, 45, 51, 54, 59, 60], "devic": [14, 34, 45, 50], "batched_tim": 14, "model_mean": 14, "model_log_vari": 14, "p_mean_vari": 14, "clip_denois": 14, "pred_img": 14, "backbon": [14, 38, 44, 56], "\ub2e8\uacc4\ub294": [14, 28, 29], "convnext": 14, "\ube14\ub85d": 14, "groupnorm": [14, 49], "upsampling\uc73c\ub85c": 14, "block_klass": 14, "resnetblock": 14, "group": 14, "resnet_block_group": 14, "modulelist": [14, 59], "dim_in": 14, "time_emb_dim": 14, "time_dim": 14, "prenorm": 14, "linearattent": 14, "dim_out": 14, "is_last": 14, "conv2d": [14, 30, 56, 59], "init_dim": 14, "out_dim": 14, "dim_mult": 14, "learned_vari": 14, "learned_sinusoidal_cond": 14, "random_fourier_featur": 14, "learned_sinusoidal_dim": 14, "dimens": [14, 15, 30, 31, 39, 57, 59], "input_channel": 14, "init_conv": 14, "in_out": 14, "list": [14, 45, 59], "random_or_learned_sinusoidal_cond": 14, "sinu_pos_emb": 14, "randomorlearnedsinusoidalposemb": 14, "fourier_dim": 14, "sinusoidalposemb": 14, "time_mlp": 14, "gelu": 14, "num_resolut": 14, "len": [14, 45, 51, 59], "ind": 14, "enumer": [14, 50, 51, 59], "mid_dim": 14, "mid_block1": 14, "mid_attn": 14, "mid_block2": 14, "default_out_dim": 14, "final_res_block": 14, "final_conv": 14, "zeros_lik": 14, "clone": [14, 59], "block1": [14, 59], "block2": [14, 59], "resolution\uc5d0": [14, 42], "conv\uc5d0\uc11c": 14, "3\ubc30\ub85c": 14, "\ub298\ub9ac\uace0": 14, "v\ub85c": 14, "\ubd84\ud574": [14, 24], "dim_head": [14, 31], "hidden_dim": 14, "to_qkv": 14, "to_out": [14, 56], "qkv": 14, "chunk": [14, 50, 59], "rearrang": [14, 31, 44], "einsum": 14, "layernorm": [14, 15], "block\uc5d0": [14, 22, 23, 49], "embedding\uc774": [14, 33, 43], "\ucd94\uac00\ub3fc\uc11c": 14, "\uad6c\ubd84\ub428": 14, "half_dim": 14, "math": 14, "10000": [14, 20], "arang": 14, "score": [14, 18, 20, 22, 23, 29, 31, 32, 33, 46, 47, 48, 51, 52, 54, 55, 56, 57, 58, 59], "is\ub85c": 14, "model\uc778\ub370\ub3c4": 14, "model\ubcf4\ub2e4": [14, 23, 26, 37], "\uc6b0\uc6d4": 14, "codelength\uc5d0\uc11c": 14, "\uc5c6\uae30": [14, 32, 33, 35, 54], "overfitting\uc758": 14, "\uac00\ub2a5\uc131\ub3c4": 14, "\uc801\uc74c": 14, "incept": [14, 31, 35, 42, 48, 58], "v3\uc73c\ub85c": 14, "\uc9d1\ud569\uacfc": 14, "\ud074\ub798\uc2a4": [14, 31, 35, 42, 45], "\uc2e4\uc81c\uc640": 14, "IS": [14, 15, 23, 31, 35, 38, 44, 46, 48], "\ud074\ub798\uc2a4\uac00": [14, 45], "\ub098\ub204\uc5b4\uc9c0\ub294\uc9c0": 14, "\ud074\ub798\uc2a4\ub97c": [14, 45], "\uc0dd\uc131\ud558\ub294\uc9c0": 14, "\ud3c9\uac00\ud568": [14, 52], "\ub192\uc744": [14, 15], "\uc131\uc801\uc774": 14, "\uc88b\uace0": [14, 54], "variance\ub97c": [14, 25], "\uc0ac\uc6a9\ud588\uc744": [14, 28, 39, 42, 43, 46, 49], "\ub54c\uc5d0\ub3c4": [14, 16, 61], "\uac10\uc18c\ud558\uc9c0": 14, "\uace0\uc815\ud588\uc744": 14, "\ub098\ub294": 14, "\uc0ac\uc6a9\ud558\uba74\uc11c": [14, 27, 30, 43], "\uac1c\uc120\ub428": 14, "scalabl": 15, "iccv": [15, 45, 47], "09748": 15, "facebookresearch": 15, "wpeebl": 15, "junhyoung": [15, 62], "juli": [15, 19, 39], "\ubc31\ubcf8\uc744": 15, "\ubc14\uafb8\uc5b4": 15, "depth": [15, 17, 26, 32, 34, 35, 43, 47, 49, 59], "\uc99d\uac00\ub85c": [15, 29], "\ub192\uac8c": [15, 19, 30], "\ub098\ud0c0\ub0ac\uc74c": [15, 35], "\ub0ae\uac8c": [15, 33, 49], "\uc720\uc9c0\ud55c": [15, 24, 31, 32, 50, 55, 57], "\ubcf4\uc720\ud558\uba70": 15, "xl": [15, 28, 61], "\ubca4\uce58\ub9c8\ud06c\uc5d0\uc11c": [15, 31], "27": [15, 52], "\ud2b8\ub80c\ub4dc\ub97c": 15, "\uc720\uc9c0\ud558\uace0": [15, 21, 25, 36, 41], "\ub124\ud2b8\uc6cc\ud06c\uc778": 15, "\uc0ac\uc6a9\ub418\uace0": [15, 19, 26, 33], "\ucd08\uae30\uc5d0\ub294": [15, 31], "\ub808\ubca8\uc758": [15, 20], "gan\uc5d0\uc11c": [15, 26], "\uc774\ub04c\uc5c8\uc74c": 15, "\ube14\ub7ed\uc774": [15, 33], "\uc8fc\uc694\ud558\uac8c": 15, "\uc800\ud574\uc0c1\ub3c4\uc5d0": 15, "\ud3ec\ud568\ub428": 15, "adm": [15, 16, 23, 46, 49], "\uc120\ud0dd\uc801\uc73c\ub85c": [15, 18, 40], "\uc544\ud0a4\ud14d\ucc98": [15, 35, 43], "\uad6c\uc131\uc758": 15, "\ubc1d\ud614\uc74c": 15, "\ub07c\uce58\uc9c0": 15, "\uc885\ub958\uc758": [15, 21, 27, 42], "dit\ub294": 15, "vit\ub97c": 15, "\uc900\uc218\ud558\uba70": 15, "\ub124\ud2b8\uc6cc\ud06c\ubcf4\ub2e4": 15, "\uc778\uc2dd": [15, 24, 31, 35], "\ud655\uc7a5\ub418\ub294": 15, "\ub098\ud0c0\ub0a8": [8, 15], "\uc544\ud0a4\ud14d\ucc98\uc758": 15, "\ubcf5\uc7a1\uc131": 15, "\uce21\uc815\ud560": [15, 42], "\uce21\uc815\ud568": 15, "\uace0\ub824\ud558\uc9c0": [15, 34, 54, 61], "\uc54a\uc544": [15, 39, 54, 56, 57], "\ubcf5\uc7a1\uc131\uc744": [15, 21, 24], "\ubd84\uc11d\ud568": [15, 29], "\uc5f0\uc0b0\uc801\uc73c\ub85c": 15, "\ud574\uc0c1\ub3c4\ub85c": [15, 18, 31, 44, 61], "\uc555\ucd95\ud558\ub294": [15, 29], "autoencod": [15, 29, 57, 59, 60], "\uacf5\uac04\uc758": [15, 42], "\uc555\ucd95\ub41c": [15, 16, 27], "\ub514\ucf54\ub529\ud558\uc5ec": [15, 21], "\uc0dd\uc131\ud568": [8, 15], "ldm\uc740": [15, 29, 40, 43, 44], "adm\uacfc": 15, "\ubaa8\ub378\ubcf4\ub2e4": [15, 18, 25, 42, 47, 48, 53, 56, 58, 61], "gflops\ub97c": 15, "\ub2ec\uc131": [15, 18, 21, 26, 31, 33, 34, 42, 43, 44, 48], "dit\ub97c": 15, "\uc801\uc6a9\ud588\uace0": 15, "\uacf5\uac04\uc5d0\uc11c\ub3c4": 15, "\uc18d\uc131\uc744": [15, 34, 47], "\ub530\ub974\ub3c4\ub85d": [15, 22, 46, 60], "ddpm\uc744": [15, 25], "\ud328\uce58\ub97c": 15, "\uc5f0\uc0b0\ud558\ub294": [15, 27, 34, 49], "\uad6c\uc870\ub85c": [15, 24, 51], "\uc124\uacc4\ub428": 15, "fig4": 15, "256x256x3": 15, "\uc774\ubbf8\uc9c0\uc5d0\uc11c\ub294": [15, 19], "32x32x4": 15, "\uc785\ub825\ub428": 15, "\ucc28\uc6d0\uc758": [15, 22, 27, 29, 33, 44, 61], "\uc218\uac00": [15, 26, 29, 34, 47, 48, 61], "\ub2ec\ub77c\uc9d0": 15, "\ubc18\uc73c\ub85c": 15, "\uc904\uc774\uba74": 15, "4\ubc30\uac00": 15, "\ucd5c\uc18c": [15, 21, 38, 46], "\ub07c\uce68": [15, 52], "\uc815\ubcf4\ub85c": 15, "\uc2dc\ud000\uc2a4\uc5d0": [15, 21], "\ub354\ud574\uc90c": [15, 30], "\ucc98\ub9ac\ud558\ub294": [15, 26], "\uc9c4\ud589\ud568": [15, 43], "\ucd94\uac00\uc758": [15, 38], "\ub354\ud558\ub294": [15, 16], "\ud1a0\ud070\uacfc": [15, 33], "\ucc98\ub9ac\ud568": 15, "cl": 15, "\ube14\ub7ed\uc5d0\uc11c\ub294": 15, "\uc81c\uac70\ud568": 15, "\ubcc4\ub3c4\ub85c": [15, 34, 46], "\ub2e4\uc74c\uc5d0": [15, 31, 41], "\ud3ec\ud568\ud558\ub3c4\ub85d": 15, "\uc218\uc815\ud568": 15, "\uc624\ubc84\ud5e4\ub4dc\ub85c": 15, "\ucd94\uac00\ud568": [15, 52], "norm": [15, 47, 56], "adaln": 15, "\ube14\ub7ed\uc758": 15, "\uad50\uccb4\ud568": 15, "gamma": [15, 18, 34, 50, 56, 58], "\uac83\ubcf4\ub2e4": [15, 16, 21, 30, 31, 35, 36, 45, 52, 56, 59], "\ud569\uc73c\ub85c\ubd80\ud130": 15, "\ud68c\uadc0\ud558\ub3c4\ub85d": 15, "\uc124\uacc4\ud568": [15, 31], "\ucd5c\uc18c\ud55c\uc758": [15, 16, 21, 47], "\ucd94\uac00\ud558\ubbc0\ub85c": 15, "\uc5f0\uc0b0\uc5d0": [15, 21], "\ud6a8\uc728\uc801\uc784": 15, "\uc801\uc6a9\ud568\uc73c\ub85c\uc368": [15, 53], "\uc88b\ub2e4\ub294": [15, 42, 53], "\ubc1c\uacac\ud568": 15, "\ucd08\uae30\ud654\ud568": 15, "\ud68c\uadc0\ud558\ub294": 15, "\uc678\uc5d0\ub3c4": [15, 20, 30, 31, 39, 54], "\ube14\ub7ed": 15, "\uc801\uc6a9\ub418\ub294": [15, 16, 46], "\ubcc4": [15, 18, 34, 39, 45], "\ud68c\uadc0\ud568": 15, "\uc601\ubca1\ud130\ub97c": 15, "\ucd9c\ub825\ud558\ub3c4\ub85d": 15, "mlp\ub97c": [15, 19], "\ube14\ub85d\uc744": [15, 31], "\ucd08\uae30\ud654\ud558\uac8c\ub428": 15, "\ube14\ub85d\uacfc": [15, 38], "architecture2": 15, "\uc2dc\ud000\uc2a4\ub97c": [15, 21, 35], "\ub514\ucf54\ub529\ud574\uc57c\ud568": 15, "adaln\uc744": 15, "\ud559\uc2b5\uacfc": [15, 22, 47], "\uc5f0\uad00\ub428": 15, "\uc774\uc678\uc758": 15, "\ubd84\uc0b0\ub3c4": [15, 35], "\uc9c4\ud589\ud588\uc74c": 15, "vlb_loss": 15, "xlarg": 15, "config": [15, 50, 56], "512x512": [15, 17, 21, 26, 32, 33, 36, 38, 45, 54, 55, 59, 61], "\uc14b": [15, 44], "adamw": [15, 17, 26, 61], "horizont": [15, 20], "flip": [15, 32], "ema": [15, 27, 46, 54], "999": [15, 34], "table2": [15, 22], "table3": [15, 22], "\uc88b\uc74c": [8, 15, 30, 48], "fig5": 15, "\ud559\uc2b5\uc5d0\ub3c4": [15, 49], "fig6": 15, "\ud074\uc218\ub85d": [15, 46, 54], "fig7": 15, "fig8": 15, "fig9": 15, "visual_result": 15, "\ud328\uce58": [15, 44], "\uc0ac\uc774\uc988\uc640": 15, "\ud06c\uae30\uc5d0": [15, 23, 53], "\ud328\uce58\uac00": 15, "\ud06c\uae30\uac00": [15, 26, 35, 36, 38], "\ub6f0\uc5b4\ub098\uace0": 15, "\uc6b0\uc218\ud558\uac8c": [15, 24, 41], "\uc801\uc6a9\ud568": 15, "\ud6a8\uc728\uc131\ub3c4": 15, "\uac00\uc838\uc634": 15, "2210": 16, "10960": 16, "sehwan": [16, 23, 28, 62], "domain\uc5d0\uc11c": 16, "control\ud558\ub294": 16, "\ubd80\uc871\ud558\ub2e4": [16, 21, 28, 36, 39, 44], "model\uc18d\uc5d0\uc11c": 16, "space\ub97c": [16, 20, 36, 38, 43], "\ubc1c\uacac\ud558\uae30": 16, "\uc81c\uc548\ud558\uace0": [16, 46], "space\ub77c\uace0": 16, "\uba85\uce6d\ud55c": 16, "space\uc758": [16, 29, 43, 54], "\ud2b9\uc131": [16, 36, 39], "across": 16, "\ub4e4\uc744": [16, 19, 32, 45, 52, 60], "strength\uc640": 16, "deficiency\ub97c": 16, "\uae30\uc900\uc73c\ub85c": [8, 16, 20, 29, 30, 35, 42, 45, 46, 58, 61], "\uc0bc\uace0": 16, "translation\uc744": 16, "design\uc744": 16, "guidance\ub294": [16, 21, 49], "unconditional\ud55c": [16, 21, 23], "variable\uc5d0": 16, "variable\uc744": 16, "\ud569\uce58\ub294": [16, 35], "\uc774\uc6a9\ud558\uba74\uc11c": 16, "\uba85\ud655\ud558\uac8c": [16, 51], "control\ud558\uae30\uac00": 16, "\uc27d\uc9c0": [16, 28], "\uc54a\ub2e4": [16, 21, 22, 26, 27, 37, 44, 54], "\uac70\uce58\ub294": [16, 35, 46], "variable\uc774": 16, "class\uc778\uc9c0": 16, "\ubd84\ub958\ud558\uace0": 16, "\ubd80\uc5ec\ud558\ub294": 16, "\uc791\ub3d9\ud55c\ub2e4": [16, 31], "variable\ub4e4\uc5d0": 16, "classify\ub97c": 16, "\uc2e4\ud589\ud574\uc57c": 16, "\ud558\uae30\uc5d0": 16, "\uc0ac\uc6a9\ud558\uae30\uac00": 16, "\ud798\ub4e4\uc5b4": 16, "\uc2dc\ucf1c\uc57c": [16, 32], "\uc2dc\uac04\uc801\uc73c\ub85c": [16, 21, 44], "\ube44\uc6a9\uc801\uc73c\ub85c": 16, "\ubd80\ub2f4\uc774": [16, 56], "diffusionclip": 16, "space\ub294": [16, 29], "origin": [8, 16, 20, 26, 32, 34, 37, 38, 39, 57, 58], "edit\ud558\uae30": 16, "\ubc1c\uacac\ud558\uc600\uace0": 16, "\uce6d\ud55c\ub2e4": 16, "space\uc5d0\ub294": 16, "\ud2b9\uc131\ub4e4\uc774": [16, 26], "\uc874\uc7ac\ud55c\ub2e4": [16, 27], "editing\uacfc": 16, "boosting\uc744": 16, "design\ud558\uc5ec": 16, "space\ub85c\uc368\uc758": 16, "\ubc1c\uacac\uc0ac\ub840\uc774\ub2e4": 16, "\uaef4\uc788\ub294": [16, 23], "\uc5bc\ub9cc\ud07c\uc778\uc9c0": [16, 23], "\uc81c\uac70\ub41c": [16, 23, 29], "\uad6c\ud560": [16, 23, 26, 57, 58, 60], "constant\ud55c": [16, 23], "\uace0\uc815\uc2dc\ud0a8\ub2e4": [16, 23], "ddpm\uc5d0\uc11c\uc758": [16, 23], "sigma_t": [16, 18, 19, 20, 26, 36, 50, 54, 57, 58], "alpha_": [16, 18, 23, 50], "bigg": [16, 27, 54], "ddim\uc5d0\uc11c\ub294": 16, "\uad00\uc810\uc758": 16, "\uc81c\uc2dc\ud558\uc600\uace0": 16, "general\ud558\uac8c": 16, "process\uc5d0": [16, 43], "eta": [16, 24, 28], "1\uc778": 16, "ddpm\uc774": [16, 25], "\ub418\uace0": [16, 21, 22, 26, 28, 30, 31, 40, 46, 47, 50, 52, 58, 60, 61], "stochastic\ud574\uc9c0\uba70": 16, "0\uc778": 16, "ddim\uc774": 16, "deterministic\ud574\uc9c4\ub2e4": 16, "cfrac": 16, "2i": [16, 46, 54, 58], "textrm": 16, "point": [8, 16, 18, 19, 20, 32, 34, 36, 44, 46], "encoder\uc640": [16, 20, 29, 33, 39], "text\uac04\uc758": [16, 23], "\ud3b8\uc9d1\ub41c": 16, "\ucd5c\uc18c\ud654\ud558\ub294": [16, 43, 51, 58, 59, 61], "collapse\uc5c6\uc774": 16, "\uade0\uc77c\ud55c": [16, 34, 35], "editing\uc744": 16, "\ud588\ub2e4\uace0": [16, 23, 42, 49, 58], "_i": [8, 16, 21, 26, 28, 34, 39], "editiing\uc744": 16, "naiv": 16, "approach\ub97c": 16, "\ud1b5\ud574\uc11c\ub294": 16, "editing\uc774": [16, 22, 23], "\uc774\ub8e8\uc5b4\uc9c0\uc9c0": 16, "chapter\uc5d0\uc11c\ub294": 16, "\uc54a\ub294\uc9c0\uc5d0": 16, "\uc124\uba85\uc744": [16, 42, 43], "\ud574\uacb0\ud558\ub294": [16, 28, 54], "controllable\ud55c": 16, "process\uc778": 16, "ddim\uc5d0\uc11c": [16, 49, 52], "\uc124\uba85\ud558\uc600\ub294\ub370": 16, "chapter\ubd80\ud130\ub294": 16, "\uc124\uc815\ud558\uace0": [16, 33, 35, 42, 47, 50], "\uc124\uc815\ud558\uc600\ub2e4": 16, "variable\ub85c": [16, 36], "\ub2f4\ub2f9\ud558\uace0": 16, "\ub3cc\uc544\uac00\uae30\uc5d0": 16, "\ub2f4\ub2f9\ud55c\ub2e4": 16, "prompts\uc5d0": [16, 23], "manipulate\uc2dc\ud0a4\ub294": 16, "3\uc5d0\uc11c": [16, 35, 36], "\uc18c\uac1c\ud55c": [16, 28, 38, 46, 58, 61], "optimize\ud558\ub3c4\ub85d": 16, "update\ud558\ub294": 16, "\uc0dd\uc131\ud558\uac70\ub098": [16, 31], "manipulation\uc744": [16, 28], "\ud55c\ub2e4\uace0": [8, 16, 32, 43, 56], "\ub300\uc548\uc73c\ub85c": [16, 26, 34], "manipulate\ud558\ub3c4\ub85d": 16, "shift\ud574\uc8fc\ub294": 16, "\uc81c\uc2dc\ub418\uc5c8\ub2e4": 16, "manipulate\ud558\uc9c0": 16, "\ub458\ub2e4": [8, 16, 48], "shifted\ub41c": 16, "\uc0ac\uc6a9\ud558\uae30\uc5d0": 16, "cancel": 16, "out\ub418\uc5b4": 16, "variable\uc5d0\uc11c\ub294": 16, "\uae30\uc874\uacfc": [16, 54], "\ub2e4\ub984\uc774": 16, "\uc5c6\ub2e4\ub294": [16, 23], "\uc99d\uba85\uc740": 16, "proof": [16, 23], "theroem\uc744": 16, "theroem": 16, "beta_": 16, "ddim\uc5d0\uc11c\uc758": 16, "\uc2dd\uc774\uace0": 16, "\ud56d\ub9cc": 16, "\ub530\ub85c": [16, 26, 28, 33, 39, 43, 47, 51, 54], "\ubb36\uc5b4\uc11c": 16, "\ud45c\ud604\ud558\uba74": [16, 50, 60], "root\ub97c": 16, "\ub0b4\ubd80\ub97c": 16, "\uacc4\uc0b0\ud558\uba74": [16, 48], "\uc815\ub9ac\ud558\uba74": [16, 47], "therefor": 16, "epsilon\uc744": [16, 23], "\uacb0\uacfc\uc774\ub2e4": [16, 23, 28, 36], "\ubd84\uc790\ub97c": 16, "\uc791\uae30\uc5d0": 16, "\uc218\ub834\ud558\uae30\uc5d0": 16, "\uc5c6\uc74c\uc744": [16, 25], "\ud6a8\uacfc\ub294": 16, "\uc88b\uc9c0": [16, 25, 34, 35, 38, 40, 42, 46, 55, 58, 59], "\uc54a\uc74c\uc744": [16, 18], "No": 16, "chapter": 16, "1\uc5d0\uc11c": [16, 31, 34, 38], "asyrp\ub97c": 16, "\ube44\ub300\uce6d\uc801\uc778": 16, "\uc0ac\uc6a9\ud55c\ub2e4\ub294": [16, 20, 42], "\uac83\uc778\ub370": [16, 26, 36], "\ub3cc\uc544\uac00\ub294": [16, 47], "\uc900\ub2e4\ub294": 16, "\ub9ccmodify\ud558\uace0": 16, "\uc720\uc9c0\ud55c\ub2e4": [16, 21], "loss\uc2dd": 16, "\uc7ac\uad6c\uc131\ud558\uc600\ub2e4": 16, "modify\ub97c": 16, "\ud558\uc9c0": [16, 18, 21, 28, 35, 42, 43, 45, 57, 58], "modifiy\ub97c": 16, "loss\uc2dd\uc740": 16, "lambda_": [16, 18, 19, 20, 36, 58], "ref": [16, 19, 20, 32], "recon": [16, 18], "\uc124\uacc4\uac00": 16, "\uc774\uc81c": [16, 42, 57, 58], "epsilon\uc778": 16, "\uac83\uc778\uc9c0\uc5d0": 16, "result\ub97c": [16, 23], "nice": 16, "properties\ub97c": 16, "models\uc758": 16, "backbone\uc778": 16, "net\uc5d0\uc11c": [16, 52], "\ub3c4\ucd9c\ub41c\ub2e4": 16, "net\uc758": [16, 17, 21, 31], "bottleneck": [16, 30, 48, 59], "\uae4a\uc740": [16, 34, 47], "map\uc778": 16, "h_t": 16, "\uc815\ud558\uc600\ub2e4": 16, "\ubd80\ub978\ub2e4": 16, "space\ubcf4\ub2e4": 16, "resolutions\uc744": 16, "semantic\ub97c": 16, "\uac00\uc9c4\ub2e4": [16, 29, 35, 49, 54], "space\uc5d0\uc11c\ub294": 16, "\ubc1c\uacac\ud560": 16, "nice\ud55c": 16, "\ud2b9\uc131\ub4e4\uc744": 16, "\ud06c\uae30\ub294": [16, 30, 38, 39, 54, 59], "times512": [16, 20], "times3": [16, 22], "control\uc774": [16, 28, 41], "\uc9c0\ubc30\uc801\uc774\uace0": 16, "robust\ud568\uc744": 16, "\ucd94\uce21\ud560": 16, "skip": [16, 27, 31, 34, 46, 50, 52, 55, 56], "connection\uc758": 16, "\ubc1b\uc9c0": [16, 39, 57], "\uc54a\uc73c\uba70": [16, 18, 54], "\uacf5\uac04\uc774\uba70": 16, "control\ud558\ub294\ub370\uc5d0": 16, "\uc9c0\uc815\ud558\uae30": 16, "\uc124\uc815\ud574\ub450\uace0": 16, "\ud574\ubcf4\uc558\ub294\ub370": 16, "8th": 16, "layer\uc774\uc804\uc758": 16, "\uc9c0\uc815\ud55c": [16, 37, 52], "manipulaton\uc774": 16, "\uc774\ub8e8\uc5b4\uc84c\uace0": 16, "\uc774\ud6c4\uc758": [16, 25], "\uacfc\ud55c": 16, "manipulation\uc774": 16, "\uc774\ub8e8\uc5b4\uc9c0\uac70\ub098": 16, "\uc544\uc608": [16, 52], "\uc0dd\uc131\ub418\uc5c8\ub2e4": 16, "space\ub9cc\uc758": 16, "\ud2b9\uc131\uc740": 16, "chapter5\uc5d0\uc11c": 16, "\uc124\uba85\ud55c\ub2e4": [16, 39], "manipulating\ud558\ub294\ub370": 16, "\uc131\uacf5\ud588\uc74c\uc5d0\ub3c4": 16, "\uc218\ub9ce\uc740": [16, 32, 46], "timestep\uc5d0\uc11c": 16, "optimizing\ud558\uae30\ub780": 16, "\ub300\uc2e0\uc5d0": [16, 21, 34, 58, 61], "\uc785\ub825\ubc1b\uc544": [16, 55, 58, 61], "\ucd9c\ub825\ud574\uc8fc\ub294": 16, "optimizing\ud574\uc918\uc57c": 16, "\uc2dc\uac04\ub3c4": 16, "setting\uac12\ub4e4\uc5d0": 16, "robust\ud558\ub2e4": 16, "timestep\uacfc": 16, "feature\uc778": [16, 21], "\ucd9c\ub825\ud558\ub294": [16, 20, 34, 51], "\ud559\uc2b5\ud558\uae30\uc5d0": 16, "unseen": [16, 18, 61], "\uc77c\ubc18\ud654\ud560": 16, "accelerated\ud55c": 16, "\uacfc\uc815\uc5d0\uc11c\ub3c4": [16, 30], "\ubcf8\ub2e4": 16, "scheme\uc774": 16, "\uc5b4\ub5bb\ub4e0": 16, "\uac04\uc5d0": [16, 21, 61], "\ubcf4\uc874\ub41c\ub2e4\uba74": 16, "\uc124\uacc4\ud574\ub3c4": 16, "manipulation\ud6a8\uacfc\ub97c": 16, "control\ud574\uc11c": 16, "\uc774\uc6a9\ud558\ub294": [16, 36], "\uc2dd\uc740": [16, 25, 36], "space\uc640": [16, 39], "\ube44\uad50\ud558\uc600\ub2e4": [16, 21, 54], "intuit": [16, 41], "choos": [16, 32], "interv": [16, 46], "percept": 16, "priorit": 16, "choi": 16, "earli": [16, 59], "stage\uc5d0\uc11c\ub294": 16, "context\ub97c": [16, 23, 43], "generate\ud558\uace0": 16, "later": 16, "impercept": [16, 29], "details\ub97c": 16, "generate\ud55c\ub2e4\uace0": 16, "stage\uc5d0\uc11c": 16, "\uc9c4\ud589\ud558\ub294": [16, 20, 28, 46], "\uad6c\uac04\uc744": [16, 39], "\uc81c\uc2dc\ud55c\ub2e4": [16, 23], "process\uc5d0\uc11c\ub294": 16, "context\uac00": 16, "generate\ub418\uc5b4\uc57c": 16, "interval\uc744": 16, "\uacb0\uc815\ud558\uae30": [16, 56], "\uce21\uc815\uc9c0\ud45c\ub97c": 16, "t\uc2dc\uc810\uc5d0\uc11c": 16, "target\uc774": 16, "image\uac04\uc758": 16, "lpips\ub97c": 16, "\ub0a8\uc740": [16, 23, 45], "process\uc744": 16, "\uad6c\uc131\uc694\uc18c\ub97c": [16, 34], "\uc9c0\ud45c\ub77c\uace0": [16, 42], "t\uc758": 16, "lpips\ub85c": 16, "\uc2dc\uc810\uc5d0\uc11c\uc758": 16, "\ucc28\uc774\ub294": [16, 28, 37, 52], "\uc5bc\ub9cc\ud07c\uc758": 16, "change\ub97c": 16, "\uc8fc\uc5c8\ub294\uc9c0\ub97c": 16, "xi_t": 16, "interval\uc774": 16, "\uc791\uc73c\uba74": [16, 28, 32, 36, 39, 60], "\uc791\uc544\uc9c0\uba70": 16, "\uc77c\uc5b4\ub098\uc9c0": 16, "\ud06c\uba74": [16, 21, 29, 36], "\ucee4\uc9c0\uace0": 16, "\uc77c\uc5b4\ub09c\ub2e4": 16, "\ucda9\ubd84\ud55c": [16, 36, 42], "\ud55c\uc5d0\uc11c": 16, "\ucd5c\uc18c\uc758": 16, "\uacb0\uc815\ud558\ub294": [16, 26], "\ucd5c\uace0\uc758": [16, 45, 49], "\ubc29\ubc95\uc774\ub2e4": [16, 28, 29, 49], "\uc2e4\ud5d8\uc801\uc778": 16, "33\uc778": 16, "t\uc2dc\uc810\uc744": 16, "\uacb0\uc815\ud558\uc600\ub2e4": 16, "variou": [16, 41, 49, 58, 59, 62], "proper": 16, "\ud2b9\uc131\ub4e4\uc740": 16, "\ud2b9\uc131\ub4e4\uc5d0": 16, "\uacbd\uc6b0\ub3c4": [16, 32, 39, 42, 47], "smile\ud55c": 16, "attribute\ub97c": 16, "\uacbd\uc6b0\ubcf4\ub2e4": 16, "pixar": [16, 24], "style\uc758": [16, 39], "attribute\uc744": 16, "\uae38\uac8c": [16, 20], "\uc124\uc815\ud574\uc57c": 16, "33": 16, "t\ub97c": [16, 35], "33d": 16, "y_": [16, 30, 49], "\uc758\ubbf8\ud558\uba70": [16, 27], "\uc694\uad6c\ud558\ub294": [16, 28], "attributes\uc5d0": 16, "\uc791\uc74c": 16, "\uae40": 16, "flexibl": [8, 16, 26, 59], "amount": 16, "chang": [16, 49], "\uc124\uc815\ud558\uba70": 16, "stochasticity\ub97c": 16, "\uc81c\uac70\ud558\uc5ec": [16, 35, 39, 54], "\uc644\ubcbd\ud55c": 16, "\uac00\ub2a5\ucf00": [16, 26], "elucid": 16, "diffusionbas": 16, "karra": [16, 35, 46], "stochasticity\uac00": 16, "\uc99d\uac00\uc2dc\ud0a8\ub2e4\uace0": 16, "\uc99d\uba85\ud558\uc600\ub2e4": [16, 21], "interval\uc740": 16, "interval\uc5d0": 16, "control\ud560": 16, "\uae38\uac8c\ub418\uba74": 16, "quality\ub294": [16, 43], "\uc99d\uac00\ud558\uc9c0\ub9cc": 16, "interval\ub3d9\uc548": 16, "\uacc4\uc18d\ud574\uc11c": 16, "\uc8fc\uc785\ud574\uc57c": 16, "content\uac00": [16, 34], "\ub2ec\uc131\ud558\uba74\uc11c\ub3c4": 16, "content\uc5d0": [16, 40], "\ubcc0\ud654\ub9cc\uc744": 16, "\uc124\uc815\ud558\ub294": [16, 58, 60], "\ud574\uacb0\ud574\uc57c": 16, "\ubd80\ubd84\uc73c\ub85c": [16, 39, 57, 60], "\ubcf4\uc558\uc73c\uba70": 16, "image\ub85c": [16, 20, 23, 26, 29], "\uaef4\uc788\ub294\uc9c0\uc5d0": 16, "\uc9c0\ud45c\ub85c": [16, 37, 39, 42], "defici": 16, "gamma_t": 16, "\uc5ec\uae30\uc11c\ub294": [16, 36, 42, 43], "strength\uc640\ub294": 16, "\ud310\ub2e8\ud558\ub294\ub370\uc5d0": 16, "semantics\ubcf4\ub2e4\ub294": 16, "actual": [16, 30], "\uace0\ub824\ud588\uae30\uc5d0": 16, "\uc124\uc815\ud558\uc600\ub2e4\uace0": [16, 55], "2\uc778": 16, "gamma_": 16, "presenc": 16, "model\uc5d0\uc11c\uc758": 16, "where": [8, 16, 26, 34, 39, 49, 58, 59], "\ub418\uba70": [16, 31, 36, 44, 50, 53, 61], "\uc0ac\ub77c\uc838": 16, "\ud2b9\uc131\uc774": 16, "assymetr": 16, "ddim\uc744": [16, 27, 54], "control\ub41c": 16, "f_t": 16, "\ucc98\uc74c\ubd80\ud130": [16, 21, 31], "\uc2dc\uc810\uae4c\uc9c0\ub294": 16, "\uc9c4\ud589\ud558\ub2e4\uac00": 16, "\uc2dc\uc810\ubd80\ud130": 16, "\ub05d\ub0a0": 16, "celeba": [16, 24, 29], "2018": 16, "lsun": [16, 25, 29, 46, 54, 58], "bedroom": [16, 29, 46, 58], "church": [16, 29, 58], "2015": [16, 20], "\ub370\uc774\ud130\uc14b\uc5d0\uc11c": [16, 24, 35, 43, 45, 52, 56, 58], "2020b": 16, "meng": [16, 46, 54], "afhq": 16, "dog": [16, 18, 28, 32, 50], "iddpm": 16, "dhariw": [16, 35, 58], "metfac": 16, "p2": 16, "\ud559\uc2b5\uc2dc\ucf30\ub2e4\uace0": [16, 23, 56], "model\ub4e4\uc740": [16, 30, 54], "checkpoint\ub97c": [16, 30, 40], "frozen\uc0c1\ud0dc\ub97c": 16, "\uc720\uc9c0\uc2dc\ucf30\ub2e4\uace0": 16, "attribute\ub4e4\uc758": 16, "\ubc18\uc601\ud574\uc11c": [16, 28], "manipulate\ud588\ub2e4\ub294": 16, "\uc2ec\uc9c0\uc5b4": 16, "depart": 16, "factori": 16, "templ": 16, "attribute\uc740": 16, "\ud3ec\ud568\uc774": 16, "\ub418\uc5b4\uc788\uc9c0": 16, "\uc54a\uc558\uc74c\uc5d0\ub3c4": [16, 45], "inference\ud558\ub294": 16, "control\ud558\uace0": 16, "\ub0c8\ub2e4\ub294": 16, "\uc7a5\uc810\uc774\ub2e4": 16, "model\ub4e4\uacfc": 16, "\uc9c4\ud589\ud558\uc600\ub294\ub370": 16, "tuning\ud558\uc5ec": [16, 26, 42], "image\uc744": 16, "editing\ud558\ub294": 16, "diffsionclip": 16, "asyrp\uc758": 16, "\uc88b\uc74c\uc744": 16, "seen": 16, "smile": 16, "\ucd94\uac00\ud558\uae30": [16, 56], "\ucd5c\uc801\ud654\ub41c": [16, 18, 35, 51, 55], "\uc801\uc6a9\uc2dc\ucf30\uc744": 16, "\ub098\ud0c0\ub0b4\uc5c8\ub294\ub370": 16, "\uc801\uc6a9\ud55c\uacbd\uc6b0": 16, "face\ub85c": 16, "\ubc14\ub00c\ub294": [16, 41, 46], "distortion\uc774": 16, "\ubc1c\uc0dd\ud568\uc744": 16, "delta_h": 16, "scaling\uc744": [16, 52], "\ud558\ub294\ub370\uc5d0": 16, "change\uc758": 16, "\uc591\uc5d0": 16, "\ubc18\uc601\ub41c\ub2e4": 16, "3\ubc30": [16, 38], "\ud568\uc5d0": 16, "\ubc18\uc601\ub418\ub294": [16, 47], "attribute\ub610\ud55c": 16, "\ubcc0\ud654\ud55c\ub2e4\ub294": 16, "\ud45c\ud604\ub418\uc5b4": 16, "scaling\uc5d0": 16, "\ub41c\ub2e4\ub294": 16, "\ud569\uccd0\uc11c": [16, 20, 42], "\ubd80\uc5ec\ub97c": 16, "\uacbd\uc6b0\uc5d0\ub3c4": [16, 47], "attribute\ub4e4\uc774": 16, "\ubc18\uc601\uc774": [16, 18, 50], "\uc8fc\uc785\ud588\uc744": 16, "\ube44\uad50\ud55c": [16, 23, 24, 36, 53, 58], "\ucd94\uac00\ub418\uc5c8\uc5b4\ub3c4": 16, "\uc5c6\uc73c\uba70": [8, 16], "\ucd94\uac00\ub418\uc5c8\uc744": 16, "distortion\uc740": 16, "\uc5c6\uace0": [16, 19, 23, 57], "change\ub9cc": 16, "\uc2ec\ud558\uac8c": 16, "robustness\ud55c\uc9c0": 16, "homogeneous\ud55c": 16, "\uc131\uc9c8\uc744": [16, 46], "attribute\uc5d0": 16, "\ub40c\uc744": 16, "\ud655\uc778\ud558\uc600\ub2e4": 16, "\ub4e4\uc5d0": [16, 36], "\ud3c9\uade0\uc778": 16, "result\uac00": 16, "\ube44\uc2b7\ud568\uc744": 16, "chapter4\uc5d0\uc11c": 16, "\ube44\ucd94\uc5b4": 16, "\ubcf4\uc558\uc744": [16, 41, 47], "process\uc5d0\uc11c\ub9cc": 16, "\uc801\uc6a9\uc744": 16, "global": [16, 22, 26], "\uce6d\ud558\uba70": 16, "\uc801\uc6a9\ub41c\ub2e4\uace0": 16, "\uac00\uc815\ud588\uc744": 16, "t_e": 16, "sum_t": 16, "\uc591\ub9cc": 16, "\uac19\ub2e4\uba74": 16, "\ube44\ub85d": 16, "\uc0ac\uc6a9\ud558\uc600\uc9c0\ub9cc": [16, 27], "\uc5f0\uad6c\ub97c": [16, 24, 27, 31, 32], "\ud574": [16, 20, 22, 44, 57], "\uc5ec\uc9c0\uac00": [16, 54], "\ud310\ub2e8\ud55c\ub2e4": 16, "models\uc5d0\uc11c": 16, "space\uc778": 16, "\ubc1c\uacac\ud588\uace0": 16, "\uc131\uacf5\uc801\uc778": [16, 18], "semantic\ud55c": [16, 28], "\uc81c\uc548\uc744": 16, "\ub17c\ubb38\uc774\ub2e4": [16, 28, 36], "\ub300\ud45c\uc801\uc778": [16, 19, 28, 51], "\ud2b9\uc131\uc73c\ub85c\ub294": 16, "timesteps\uc774": 16, "framework\uc778": [17, 20], "identity\uc640": [17, 21, 40], "postur": 17, "sequence\uac00": [17, 39], "moving\uc774\ub098": 17, "controlling\uc744": 17, "preserving\uc744": 17, "t2v\uc758": 17, "\uc9c4\uc804\uc5d0\ub3c4": 17, "\uc778\uac04": [17, 23], "\uc911\uc2ec": [17, 18, 19], "\uacaa\ub294": [17, 21, 39], "\ubd80\uc871": [17, 19], "\ubb18\uc0ac\uc758": 17, "\uc5b4\ub824\uc6c0\uc73c\ub85c": 17, "\uacaa\ub294\ub2e4": 17, "\uc81c\uc5b4\ub97c": [17, 43], "lora": [17, 24, 36, 56], "\uae30\uc220\ub4e4\uc740": [17, 24], "\uc81c\uc5b4\uac00": [17, 18], "\ubd80\ub2f4": 17, "\ubc29\ubc95\ub860\uc778": 17, "network\ub85c": 17, "animatediff\uc5d0\uc11c": 17, "1000\uc758": 17, "\uc601\uc0c1\uc73c\ub85c": [17, 54], "\ud6c8\ub828\uc740": 17, "\ubcc0\uc774\ub098": 17, "\ud2b9\ubcc4\ud55c": [17, 31, 35], "\uc5f0\uc18d\uc801": [17, 34], "frame\uc774": 17, "\ud544\uc694\ud558\uae30": 17, "video\ub85c": 17, "split\ud558\uc5ec": 17, "6000\uac1c\uc758": 17, "\uc9e7\uc740": [17, 29, 35, 42], "\ud68d\ub4dd\ud55c\ub2e4": 17, "description\uc744": [17, 43], "minigpt": 17, "v2": [17, 27, 53, 56], "captioner\ub85c": 17, "describ": 17, "manner": [17, 32], "\uba85\ub839\uc73c\ub85c": 17, "\ud68d\ub4dd": [17, 19, 45], "subject\uc640": 17, "background": [17, 20, 28, 32], "\ub0b4\uc6a9\uc5d0": 17, "\uc815\ud655\ud788": [8, 17, 19, 23, 47, 57], "\ubb18\uc0ac": 17, "consistency\uc640": 17, "\ud5a5\uc0c1\uc744": [17, 20, 22, 33, 47, 49], "net\uacfc": [17, 31, 39], "controlnet\ub97c": 17, "block\uc740": 17, "animatediff\ub85c": 17, "\ud655\uc7a5": [17, 31, 48], "length\ub294": 17, "64\ub85c": 17, "mm_sd_v15": 17, "ckpt": 17, "\uac1c\uc778": 17, "appearance\uc640": 17, "\ubc30\uacbd\uc744": [17, 28], "\uace0\uc548\ub428": 17, "prompt\uc774\uc9c0\ub9cc": 17, "\ubb18\uc0ac\uac00": 17, "prompt\ub97c": [17, 23, 26, 28, 40, 43, 54], "\uc678\uad00\uc5d0": 17, "\ubc30\uacbd\uc5d0": 17, "\uc5bc\uad74": [8, 17, 24], "feature\ub294": [17, 26, 41], "embedding\uc5d0": [17, 26], "concat\ub41c": 17, "\ubcf4\ub0c4": 17, "attentino": 17, "cloth": 17, "c_t": [17, 26], "c_f": [17, 39], "c_c": [17, 39], "prime": [17, 18, 34], "openclip": [17, 22, 26, 38, 45], "h14": 17, "arcface\ub97c": 17, "\uc0c1\uad00": 17, "2b\uc5d0\uc11c": 17, "\uc218\uc9d1": [17, 18, 43, 53], "v100": [17, 20, 26, 34, 59], "100k": [17, 32], "1\uc7a5": [17, 45], "valid": [17, 19, 30, 42, 45, 51], "16\uc5d0\uc11c": [17, 38], "\ud655\uc7a5\ud558\uae30": [17, 31, 35], "18\ucd08": [17, 45], "13000": 17, "module\ub9cc": 17, "\ud6c8\ub828\ud558\uace0": 17, "controlnet\uc774\ub098": 17, "10k": [17, 31, 58], "\uc885\ub8cc": 17, "block\uacfc": 17, "unfreez": 17, "\uc218\uc9d1\ud55c": 17, "6k": 17, "dwpose\ub098": 17, "zoedepth\ub97c": 17, "352x352": 17, "25k": [17, 58], "express": [17, 20], "\ub0ab\uac8c\ud558\uae30": 17, "\uad6c\uc870\uc5d0\uc11c": [17, 42], "5e": [17, 21], "20k": 17, "depth\uc5d0\uc11c\ub9cc": 17, "\ub3d9\uc2dc": 17, "1\uc758": [17, 27], "alpha_f": 17, "styliz": [17, 18], "driven": [18, 20, 23, 50], "2303": [18, 22, 40, 45, 46], "13508": 18, "jeongin": [18, 34, 62], "\ud53c\uc0ac\uccb4\uc758": [18, 21], "6\uac1c\uc758": [18, 61], "\uce90\uc8fc\uc5bc\ud55c": 18, "\ub9de\ucda4\ud654": 18, "\uacb0\ud569": [18, 26, 48], "\ubc29\ubc95\ub860\uc744": [18, 27, 36, 49], "\ub098\uc774\ube0c\ud558\uac8c": 18, "\uacb0\ud569\uc2dc": 18, "subject\uc758": 18, "viewpoint": [8, 18, 34], "\uc624\ubc84\ud53c\ud305\ud558\ub294": 18, "\ubabb\ud55c": [18, 58], "\uae30\ub2a5\uacfc": 18, "nerf\uc758": [18, 20, 36, 39], "\uacf5\ub3d9\uc73c\ub85c": 18, "3\ub2e8\uacc4": 18, "\uc804\ub7b5": [18, 21, 35], "\uadf9\ubcf5": [18, 33, 34], "\uc774\ubbf8\uc9c0\uc5d0\uc11c": [18, 21, 29, 34, 35, 37, 42], "\ud3ec\uc988": [18, 20, 21, 24, 32, 34], "\uc911\uc2ec\uc758": 18, "asset\uc0dd\uc131\uc740": 18, "vr": [18, 36], "\uc601\ud654": 18, "\uac8c\uc784": 18, "\uc751\uc6a9": [18, 35], "\uac00\ub2a5\ud558\ub098": 18, "\ud504\ub86c\ud504\ud2b8\ub9cc\uc73c\ub85c": 18, "\uc815\uccb4\uc131": 18, "\uae30\ud558\ud559\uc801": [8, 18, 19, 34], "\uc678\uad00\uc744": 18, "\ub2a5\ub825\uc5d0": 18, "\uac1c\ubc1c": 18, "\ud0dc\uc2a4\ud06c\uc5d0\uc11c": [18, 21, 31, 42], "\ubcf4\uc778": [18, 42, 48], "\ub9ce\uc9c0\ub9cc": 18, "\uc0dd\uc131\uc774\ub098": 18, "\uc81c\uacf5\ud558\uc9c0\ub294": 18, "\uc18c\uc218\uc758": [18, 35, 45], "6\uac1c": [18, 61], "\uce90\uc8fc\uc5bc\ud558\uac8c": 18, "\ucd2c\uc601\ub41c": [18, 31], "\ucd5c\uc801\ud654\ud558\uc5ec": [18, 34], "\uc790\uc0b0\uc744": 18, "\uc0dd\uc131\ud558\uc790": 18, "\ubb38\uc81c\uc810": [8, 18, 21, 28, 44], "subject\uc5d0": [18, 21], "\uc2e4\ud328": [18, 21, 34], "\uc0ac\ub840\uac00": 18, "\uc8fc\uc81c": [18, 24], "\ubdf0\ud3ec\uc778\ud2b8\uc5d0": 18, "\uacfc\uc801\ud569": 18, "\ubdf0\ud3ec\uc778\ud2b8\uc5d0\uc11c": 18, "\ucda9\ubd84\ud558\uc9c0": [18, 21, 26, 57], "\ud574\uacb0\ucc45": 18, "\ubd80\ubd84\uc801\uc73c\ub85c": 18, "\ubbf8\uc138": [18, 34], "\uc870\uc815": [18, 19], "\uc870\uc815\ub41c": 18, "\ubdf0\uc5d0": [18, 34], "\ub418\uc9c0": [18, 42, 44, 45, 47, 60], "subject\ubcc4": 18, "\ucea1\ucc98\ud558\uc9c0": [18, 35, 43], "\uc790\uc0b0\uc740": 18, "\uc77c\uad00\uc131\uc774": [18, 21], "\ubc18\uc601\ud558\uc9c0\ubabb\ud568": 18, "\uc870\uc815\ud558\uc5ec": [18, 24], "\uc0ac\ud56d\uc744": [18, 24, 31, 35], "\ucea1\ucc98": [18, 34], "1\ub2e8\uacc4\uc5d0\uc11c": 18, "\ud22c\uc785": [18, 34], "\uac00\uc0c1": [18, 34], "1\ub2e8\uacc4\uc758": 18, "pseudo": [18, 27, 46], "\ucd5c\uc801\ud654\ud55c": [18, 36, 55], "\ubcfc\ub968\uc744": 18, "\ucd5c\uc801\ud654\uc2dc": 18, "\uaddc\uc81c\ud56d\uc73c\ub85c": 18, "\uc138\ud2b8\uc5d0": 18, "weak": [18, 47], "3\ub2e8\uacc4\uc5d0": 18, "\uac78\uce5c": 18, "\ud569\ub3d9": [18, 26], "\ucd5c\uc801\ud654\ub294": [18, 21, 29], "\uacfc\uc801\ud569\ub418\ub294": 18, "\uc815\uccb4\uc131\uc5d0": 18, "\ucda9\uc2e4\ud558\ub3c4\ub85d": 18, "\ubcf4\uc7a5": 18, "\ucee8\ud14d\uc2a4\ud2b8\ub97c": 18, "\uc874\uc911\ud558\uba74\uc11c": 18, "\uc720\uc0ac\uc131\uc774": 18, "\ud604\uc2e4\uc801\uc778": [8, 18, 52], "\uc785\uc99d": [18, 34], "\ubca0\uc774\uc2a4\ub77c\uc778\uacfc": 18, "\ube44\uad50\ud560": [18, 33, 47], "\ud3ec\ucc29\ud55c\ub2e4\ub294": 18, "\ubc18\uc601": [18, 21, 22, 34], "\uc81c\uacf5\ud55c": [18, 26], "\uc790\uc5f0\uc5b4": [18, 43], "llm": [18, 30, 33, 52], "\uc131\uacf5\uc5d0": 18, "\ud798\uc785\uc5b4": 18, "\uc791\ud488\ub4e4\uc774": 18, "\uc870\uc791\uacfc": 18, "\uc791\uc5c5\uc5d0": [18, 21, 24], "\ub300\uaddc\ubaa8": [18, 19, 20, 23, 27, 35, 43, 44], "\ud504\ub86c\ud504\ud2b8\uc5d0\uc11c": 18, "suject": 18, "\uc81c\uacf5\ud558\uae30": 18, "recontextu": [18, 50], "sleep": [18, 60], "jump": [18, 39], "\uc870\uba85": [18, 19, 32, 35], "\ub4f1\uc73c\ub85c": [18, 24, 29], "\ucd2c\uc601\ud560": 18, "\ud544\uc694\uac00": [8, 18, 37, 47, 50], "\uc0dd\uc131\uc758": [18, 36, 42], "\ubc1c\uc804\uc744": [18, 24, 28, 35], "\uc0ac\uc6a9\uc790\ub294": [18, 31, 41, 43], "\ud76c\uadc0": 18, "\uaddc\uc81c\ub97c": 18, "\ube44\uc804": 18, "\uc0ac\uc804\uc744": 18, "\ub098\ud0c0\ub0b4\ub294": [18, 31, 34, 43, 47, 55], "word": [18, 22, 26], "\ucd5c\uc801\ud654\ud568\uc73c\ub85c\uc368": 18, "2208": [18, 43, 50], "01618": [18, 43], "\ubc29\ubc95\ub860\ub4e4\uc740": 18, "\uc81c\uacf5\ud558\uc9c0": [18, 24], "problem": [18, 47], "i_i": 18, "ldot": 18, "\uc774\ubbf8\uc9c0\ub4e4\uc758": [18, 20, 34], "\ub9e5\ub77d": 18, "\uc758\ubbf8": [18, 19, 21, 24, 28, 35], "stand": [18, 53], "radianc": [18, 20, 36], "\ud544\ub4dc\ub97c": 18, "\uc778\ucf54\ub529\ud558\ub294": [18, 39], "mlp": [18, 19, 20, 30, 33, 34, 36, 39, 55, 60], "field": [8, 18, 19, 20, 22, 32, 36, 47, 53, 55], "\ucea1\ucc98\uac00": 18, "\uc124\uc815\uc5d0": [18, 43], "\uc81c\ud55c\uc801\uc774\uace0": 18, "\uae30\uc220\uc744": [18, 24, 34, 35, 36, 38, 52], "3d\ub85c": 18, "stablediffus": 18, "\ubc18\uc601\ud558\uc5ec": 18, "\uc77c\uce58\ud558\uc9c0\ub9cc": 18, "\uc774\ubbf8\uc9c0\ub0b4\uc5d0\uc11c": 18, "\uc138\ubd80\uc801\uc778": [18, 21, 24, 36, 41, 47], "\ud574\uacb0": [18, 48, 57], "\ud53c\uc0ac\uccb4\uc5d0": [18, 21], "5\uc7a5": [18, 43], "\uc8fc\uc5b4\uc9c0\ub294": [18, 41], "\ud30c\uc778\ud29c\ub2dd\ud558\uc5ec": [18, 21, 31, 42], "casual": 18, "captur": [18, 24, 33], "\ud30c\uc778\ud29c\ub2dd\uc744": [18, 21, 42], "_d": 18, "proce": 18, "\ubc29\uc9c0\ud558\uc5ec": 18, "\uac1c\uc120\ud558\uace0": 18, "drift": [18, 46, 50], "\ubcfc\ub968\uc758": 18, "\ub79c\ub364\ubdf0\uac00": 18, "\uc0c1\uc751\ud558\ub3c4\ub85d": 18, "phi": [18, 19, 20, 22, 27, 30, 34, 36, 44, 46, 48, 49, 54, 55, 57, 59, 60], "\ud45c\ud604\ub41c": 18, "\ubc00\ub3c4\uc758": [18, 34], "\uadf8\ub798\ub514\uc5b8\ud2b8\ub85c\ubd80\ud130": 18, "\uacc4\uc0b0\ub41c": [18, 35, 60], "nomals\uc740": 18, "lambertian": [18, 20, 34], "shade": [18, 19, 55], "\uc0ac\uc2e4\uc131\uc744": [8, 18], "\uac1c\uc120\uc2dc\ud0a4\uae30": 18, "relight": 18, "\ud558\ub294\ub370": [18, 22, 33, 35, 39, 45, 55, 61], "\uc0ac\uc6a9\ub428": [18, 20, 29, 43], "camera": [8, 18, 19, 20, 36, 39, 61], "locat": [18, 19, 34, 39], "albedo": [18, 19, 55], "densiti": [18, 19, 29, 34, 36, 39, 51, 55], "\uc8fc\uc5b4\uc9c0\uba74": [18, 21, 31], "\uc74c\uc601": [18, 19], "\ucc98\ub9ac\ub41c": [18, 19, 31], "\ubcfc\ub968": [18, 34], "\ub80c\ub354\ub9c1\ud55c": [18, 39, 61], "\ubcf4\uc774\ub3c4\ub85d": 18, "\ub9e4\uac1c\ubcc0\uc218\ub97c": 18, "\ucd5c\uc801\ud654\ud558\uae30": 18, "nabla_": [18, 19, 57], "_v": [18, 26, 29], "\ub80c\ub354\ub9c1\ub41c": [18, 19, 20, 34, 35, 36], "\ubc84\uc804\ub4e4\uc744": 18, "\uc5d0\ub108\uc9c0": 18, "push": 18, "views\ub97c": 18, "\uc120\ud0dd\ud558\uace0": [18, 20], "\uc5ed\uc804\ud30c": 18, "render": [8, 18, 20, 36, 53, 55], "\uacb0\uacfc\ub4e4\uc774": 18, "\uc774\ubbf8\uc9c0\ucc98\ub7fc": [18, 40], "\ud658\uacbd\uc744": 18, "\ud53c\uc0ac\uccb4": 18, "peson": 18, "\ub9de\ucda4\ud654\ub41c": 18, "BUT": 18, "\uacb0\ud569\uc740": 18, "\ubd88\ub9cc\uc871\uc2a4\ub7ec\uc6b4": 18, "\ucd08\ub798": 18, "\uc0dd\uc131\uc5d0\uc11c": 18, "\ub2e4\uc591\uc131\uc774": [18, 26, 36, 42, 44, 54], "\uacbd\ud5a5\uc744": [18, 25, 36, 40], "exemplar": 18, "views\uc5d0": 18, "\uc720\uc0ac\ud558\ub3c4\ub85d": [18, 60], "\uc800\ud558\ub428": 18, "\uc190\uc2e4\uc740": [18, 56], "\uc5bb\uae30\uc5d0": 18, "\ubd88\ucda9\ubd84": 18, "\ub300\uc0c1\uc5d0": [18, 50], "\uac01\ub3c4\uc5d0\uc11c": [8, 18, 19, 20, 34, 35, 50], "janu": [18, 20], "\uc0c1\ubc18\ub418\uac70\ub098": 18, "\uc5f0\uad00\ub41c": 18, "\uce21\uba74\uc744": 18, "\ub2e4\ub8e8\uc5b4\uc57c": 18, "dreambooth\ub97c": 18, "\ud6c8\ub828\uc2dc\ud0a4\uace0": 18, "nerf\ub97c": [18, 19, 20, 34, 36, 39], "\uac00\uc6b4\ub370": [18, 31], "nerf\uc5d0\uc11c": [18, 19, 36], "\uc2dc\uc810": [8, 18, 34, 46], "\uc624\ub978\ucabd": [18, 21, 24, 31, 38, 42], "\ubd80\ubd84\uc801\uc778": 18, "\uc870\uc815\ud55c": 18, "\uc190\uc2e4\uacfc": [18, 24], "\uc7ac\uad6c\uc131": [8, 18, 35, 43], "\ud574\uacb0\ud558\uace0": [18, 28], "\ub9de\ucda4": [18, 33, 43], "booth3d": 18, "dreambootht2i": 18, "\uccb4\ud06c\ud3ec\uc778\ud2b8\uac00": 18, "\ud30c\uc778\ud29c\ub2dd\ud55c": [18, 21], "view\uc5d0": [18, 36], "\uacfc\uc801\ud569\ub418\uc9c0": 18, "\ud558\uc5d0": 18, "dreamfusion\uc740": [18, 19], "\uc0dd\uc131\uac00\ub2a5": 18, "\uacb0\uacfc\ubb3c\uc740": [8, 18], "\uc720\uc0ac\ud558\uc9c0": [18, 24], "\ubd80\ubd84\uc801\uc73c\ub85c\u3141": 18, "\uc720\uc0ac": 18, "\ud558\uba74\uc11c": [18, 53, 57], "\ucda9\uc2e4\ud55c": [18, 26], "\uc811\uadfc\ubc95\uc758": 18, "\ubd80\ubd84": [18, 21, 25, 34, 38, 48], "\uac16\ucd98": 18, "fulli": [18, 25, 27, 31, 34, 47], "\ub80c\ub354\ub9c1\ud558\uc5ec": 18, "\ub80c\ub354\ub9c1\uc5d0": [8, 18, 34, 35], "\uace0\uc815\ub41c": [18, 19, 31, 34, 36, 38, 39, 43, 46, 51, 54], "\uc804\ud658": 18, "\uc2e4\ud589\ud558\uc5ec": 18, "\uc9c0\uc815\ud568\uc73c\ub85c\uc368": 18, "\ubc94\uc704\uc758": 18, "\ucee4\ubc84\ud558\uba74\uc11c": 18, "\ubcc0\ud615\uc5d0": [18, 21], "collect": [18, 47], "insight": 18, "\uac00\uae4c\uc6b8": 18, "dreambooth\uac00": 18, "\uccb4\ud06c\ud560": 18, "img2img": [18, 56], "\ubcc0\ud658\uc758": [18, 20], "\ub80c\ub354\ub9c1\uc758": [18, 34], "\uc720\uc9c0\ud558\uba74\uc11c\ub3c4": [18, 54], "\uc5f0\uad6c\uc758": [18, 26, 30, 35], "\uc751\uc6a9\uc73c\ub85c\ub9cc": 18, "sds\uc640": [18, 20, 36], "i_v": 18, "\uacb0\ud569\uc744": [18, 27, 28], "cup": 18, "\uc900\ube44": 18, "\uc77c\ubc18\ud654\uc640": 18, "\ubcf4\uc874": [8, 18], "\uc6b0\uc218\ud558\uae30": 18, "idendtity\uac00": 18, "\ud5a5\uc0c1\ub41c": [18, 19, 25, 38, 43], "\uc190\uc2e4\ub9cc": 18, "\uc0ac\uc6a9\uc2dc": 18, "\ubcf4\uc720": 18, "satur": [18, 20, 36, 52], "\ub2e4\uc218": [18, 31, 49], "\uc0c9\uc0c1\uc758": 18, "\uacfc\ub3c4\ud55c": 18, "\ud3ec\ud654": 18, "\ube44\ud604\uc2e4\uc801\uc774\uac70\ub098": 18, "\uc65c\uace1\ub41c": 18, "\ud45c\ud604\uc774": [18, 20, 28, 34], "\ub098\ud0c0\ub098\ub294": 18, "\uacb0\ud568": 18, "\uc0c9\uc0c1\uc744": [18, 19, 29, 34, 35], "\uacfc\ub3c4\ud558\uac8c": 18, "\uac15\uc870\ud558\ub294": 18, "\uc798\ubabb": [18, 21, 35], "\uc608\uce21\ud558\uc5ec": [18, 34, 35], "\ube44\ud604\uc2e4\uc801\uc778": 18, "\ub9e4\uac1c\ubcc0\uc218": 18, "p_v": 18, "\uc788\uc73c\ubbc0\ub85c": [18, 20, 43], "\ud6c8\ub828\uc744": [18, 26], "\uaddc\uc81c": 18, "_p": [18, 21], "\ub80c\ub354\ub9c1\ud558\ub294": [18, 34], "views\uc5d0\uc11c": 18, "\ud5a5\uc0c1": [18, 19, 20, 21, 33, 34, 40, 42, 48], "nerf360": 18, "\uc815\uaddc\ud654": [18, 20, 34, 35], "t5": [18, 19, 26, 33, 45, 50], "xxl": [18, 19, 33, 45, 50], "4core": 18, "tpuv4": [18, 19, 55], "\ud504\ub86c\ud504\ud2b8\ub2f9": 18, "\uc644\ub8cc\ud558\ub294": 18, "3\uc2dc\uac04": 18, "\uc18c\uc694": [18, 34], "d_\u03b8": 18, "150\ubc88\uc758": 18, "\ubc18\ubcf5\ud6c8\ub828": 18, "800\ubc88": 18, "\ucd5c\uc801\uc758": [18, 27, 42, 54], "\uc6d0\uc810\uc5d0\uc11c": 18, "\ubc18\uacbd\uc73c\ub85c": 18, "\uade0\uc77c\ud558\uac8c": [8, 18, 20, 21, 34, 47], "20\uac1c\uc758": [18, 35, 39], "3\ub2e8\uacc4\uc5d0\uc11c": 18, "150\ubc88": 18, "\ubc18\ubcf5\ud558\uc5ec": [18, 58], "hyperparam": 18, "\uceec\ub809\uc158\uc744": 18, "\uac1c": [18, 42, 44, 45, 47], "\uc7a5\ub09c\uac10": 18, "\ubc30\ub0ad": 18, "\uc120\uae00\ub77c\uc2a4": 18, "\ub9cc\ud654": [18, 50], "30\uac1c\uc758": 18, "\uceec\ub809\uc158\uc73c\ub85c": 18, "\uc62c\ube7c\ubbf8": 18, "\uc7a5\uc2dd\ud488": 18, "\ud76c\uadc0\ud55c": 18, "\ubd84\uc11d\ud558\uae30": 18, "contextu": 18, "\ubb38\ub9e5\ud654": 18, "\uc2dc\uc5f0": 18, "rgb": [18, 19, 20, 31, 34, 35, 39, 47, 48], "\uacf5\uac04\uc774": 18, "\uc73c\ub85c\uc368": 18, "\uc2e4\ud589": 18, "\ud655\uc0b0": [18, 24, 35], "dreamfusion\uc744": 18, "precis": [18, 19, 29, 35, 46], "rendering\ub41c": [18, 19, 39], "\uc7a5\uba74\ub4e4\uc774": [18, 19], "\uc77c\uce58\ud558\ub294\uc9c0": [18, 31], "\ube44\uc728\uc744": [18, 19, 35, 38], "\ub4a4\uc5d0": 18, "\uc5b8\uae09": [18, 39], "\uae30\uc900": [18, 25, 48, 49, 55], "\uc624\ub9ac": [8, 18], "\uc801\uc808\ud788": 18, "\uc791\ub3d9\ud558\uc9c0\ub9cc": 18, "\uc678\ud615": 18, "360\ub3c4": [18, 34, 35, 36], "asset\uc744": [18, 19, 39], "\uc0dd\uc131\ud558\uba70": [8, 18, 24, 28, 42, 61], "\uc678\uad00\uc758": 18, "\ubc18\uc601\ud568": 18, "1\ub2e8\uacc4\uc640": 18, "nerf\uc640": [18, 19, 20, 35, 39], "\ubd80\ubd84\uc801\uc73c\ub85c\ub9cc": 18, "\uc720\uc0ac\ud558": 18, "dreambooth3d\uc758": 18, "\ud544\uc694\ud568\uc744": [18, 20], "dreambooth3d\uc640": 18, "\uc138\uac00\uc9c0\uce21\uba74\uc5d0": 18, "\uc9c8\ubb38\uc5d0": [18, 42], "\ub2f5\ubcc0\uc73c\ub85c": 18, "\ucda9\uc2e4\ub3c4": [18, 21, 52], "\ubcf4\uc785\ub2c8\uae4c": 18, "\uc77c\uad00\uc131\uacfc": [8, 18], "\ud0c0\ub2f9\uc131": 18, "\ud0c0\ub2f9\ud558\uace0": 18, "\uc788\uc2b5\ub2c8\uae4c": 18, "\ube44\ub514\uc624\uac00": [18, 31], "\uc81c\uacf5\ub41c": [18, 21], "\ubc18\uc601\ud569\ub2c8\uae4c": 18, "30\uac1c": 18, "\ud68c\uc804": 18, "11\uba85\uc758": 18, "\uc751\ub2f5": 18, "54\uac1c\uc758": 18, "\uace0\uc720\ud55c": [18, 21, 43], "21\uba85\uc758": 18, "\ub2e4\uc218\uacb0": 18, "\ud22c\ud45c\ub97c": 18, "\uc0b0\ucd9c": [18, 34], "dreambooth3d\ub294": 18, "\ucda9\uc2e4\ub3c4\uc5d0\uc11c": 18, "\ubaa8\ub378\ub4e4\ubcf4\ub2e4": [18, 40, 59], "\uc720\uc758\ubbf8\ud558\uac8c": [18, 47], "\uc120\ud638\ub428": 18, "\uc7ac\ubb38\ub9e5\ud654": 18, "\uc8fc\uc81c\uc758": [18, 24], "\uc7ac\ubb38\ub9e5\ud654\ud55c": 18, "\ubb38\ub9e5\uc744": [18, 24, 29], "\ucd9c\ub825\ub41c": 18, "\uc790\uc138\uc640": 18, "\ub85c\uceec": 18, "\ubcc0\ud615\uc740": 18, "\ud3ec\uc988\uc784\uc5d0\ub3c4": 18, "\uc0ac\uc2e4\uc801": 18, "\ud3b8\uc9d1": 18, "\uc7ac\uc9c8": [18, 20], "accessor": 18, "\uc561\uc138\uc11c\ub9ac": 18, "\ud06c\ub9bc\uc0c9": 18, "\uc2e0\ubc1c\uc744": 18, "\uc0c9\uc0c1\uacfc": [18, 34], "\ud504\ub9b4": 18, "\ucd94\uac00\ub97c": 18, "\uc2a4\ud0c0\uc77c\ud654": 18, "\ube44\uc0ac\uc2e4\uc801": 18, "\ud53c\uc0c1\uccb4": 18, "\ud3c9\uba74": 18, "\uadf8\ub7f4\ub4ef\ud55c": [18, 43], "\ud615\ud0dc\ub85c": [18, 26, 28, 33, 34, 38, 39, 46, 49, 55, 59, 60, 61], "\uc815\uba74\uc784\uc5d0\ub3c4": 18, "\ub54c\ub54c\ub85c": [18, 28, 35, 47, 54], "\ud3ec\ud654\ub418\uace0": 18, "\ub9e4\ub044\ub7fd\uac8c": 18, "\ucc98\ub9ac\ub418\ub294": 18, "\uac00\uc774\ub358\uc2a4\ub97c": 18, "\ud53d\uc140\uc774\ub77c\ub294": 18, "\uc81c\ud55c\ub418\uc5b4": 18, "\ud6a8\uc728\uc131": [18, 24, 33], "\ud5a5\uc0c1\uc740": 18, "\ud655\uc7a5\ud560": 18, "\ud45c\ud604\uc740": 18, "\uc5c6\uc73c\uba74": 18, "\ubd88\uc77c\uce58\ud55c": 18, "\uc815\uba74\uc73c\ub85c": 18, "\ubd88\uc77c\uce58": [8, 18], "\uc120\uae00\ub77c\uc2a4\uc640": 18, "\uc587\uc740": 18, "\uc7ac\uad6c\uc131\ud558\ub294": [18, 35], "\ubc29\ubc95\uc778": [18, 21, 28, 29, 31, 32], "dreambooth3d\ub97c": 18, "\uc18c\uaddc\ubaa8": 18, "\uc14b\ud2b8\uac00": 18, "\ud3ec\uc988\uc640": [18, 21, 43], "\ucee8\ud14d\uc2a4\ud2b8": [18, 35, 43], "\uc790\uace0": 18, "\uc810\ud504\ud558\ub294": 18, "\uc900\uc218\ud558\ub294": 18, "\uac00\uc9c0\uba74\uc11c\ub3c4": 18, "\ud3c9\uac00\uc5d0\uc11c": [18, 35, 48], "\ubcf4\uc784\uc744": [18, 23, 28, 39], "2209": [19, 31], "14988": 19, "dreamfusion3d": 19, "\uaddc\ubaa8\uc758": [19, 28, 31, 38, 42, 43], "\uc704\ud574\uc11c\uc740": 19, "label\ub41c": 19, "\ucda9\uc871\uc2dc\ud0ac": 19, "parameter": [19, 46], "prior\ub85c": [19, 28, 31], "\uac01\ub3c4\uc640": 19, "\uc870\uba85\uc5d0\uc11c": 19, "\ud658\uacbd\uc5d0": 19, "\ub370\uc774\ud130\ub098": 19, "\uc5c6\uc774\ub3c4": [19, 27, 31, 39, 46, 52, 54], "\ub3d9\uc791\ud568": [19, 33], "\uc9c0\uc6d0\ud55c\ub2e4": 19, "\uac00\ub2a5\ud588\ub358": 19, "\uc801\uc6a9\ud558\ub824\ub294": 19, "\uc2dc\ub3c4\ub294": 19, "\uc131\uacf5\uc801\uc774\uc5c8\uc9c0\ub9cc": 19, "data\uac00": [19, 29, 39], "\ubd84\uc57c\uc5d0\uc11c\ub3c4": 19, "asset\uc774": 19, "\uc694\uad6c\ub418\uc9c0\ub9cc": 19, "\uc2dc\uac04\uacfc": [19, 28], "\uc694\ud558\ub294": [19, 46], "\uc791\uc5c5": [19, 53], "voxel": [8, 19, 20, 34, 35, 39, 55], "cloud\ub97c": [19, 39], "\ube44\ub86f\ud55c": [19, 46, 58, 59, 61], "\ubc29\ubc95\uc774\ub098": [19, 54], "\ub9cc\ub4e4\ub824\ub294": 19, "\uc2dc\ub3c4": 19, "\ubc29\ubc95\ub4e4\uc740": [19, 21, 28, 32], "synthesis\uac00": 19, "\ud55c\ud3b8": 19, "rendering\uc778": 19, "\ud1b5\ud569\ud558\ub824\ub294": 19, "\uc2dc\ub3c4\uac00": [19, 32, 36], "\uc788\uc5c8\ub294\ub370": [19, 42], "\uadf8\uc911": 19, "dreamfield": 19, "dreamfield\ub294": 19, "optimization\uae30\ubc18": 19, "\ubd80\uc871\ud55c": [8, 19, 25, 36, 50, 61], "\ud604\uc2e4\uc131\uacfc": 19, "\uc815\ud655\uc131\uc5d0": 19, "dream": [19, 36], "fields\uc5d0\uc11c": 19, "model\ub85c\ubd80\ud130": 19, "distill\ub41c": 19, "\uad6c\uc131\ub418\ub294\ub370": [19, 40], "\ubbf8\ubd84\uac00\ub2a5\ud55c": [19, 20, 35, 39], "parameterization\uc744": 19, "sds\ub97c": [19, 20, 36], "\uacb0\ud569\ud568\uc73c\ub85c\uc368": 19, "\uc8fc\uc5b4\uc9c4\ub2e4\uba74": 19, "dramfusion\uc740": 19, "\uace0\ud488\uc9c8\uc774\uba70": 19, "\uc77c\uad00\uc131\uc788\ub294": 19, "object\uc640": [19, 28], "scene\ub4e4\uc744": 19, "perturb": 19, "datapoint": 19, "dist": [19, 56], "approx": [19, 36, 46, 47, 49, 51, 54], "noise\ub294": [19, 25], "density\uc5d0": 19, "\uc5f0\uad00": 19, "elbo\ub85c": 19, "match": [8, 19, 22, 32, 46], "objective\ub85c": 19, "\uac04\uc18c\ud654": 19, "\uad00\uc810": 19, "function\uc774": [19, 28, 36, 37, 39], "\uadfc\uc0ac\ud558\ub294": [19, 20, 46], "\uc608\uce21\ud574\uc57c\ud558\ub294": 19, "cfg": [19, 27, 32, 33, 36, 54], "density\uac00": [19, 36], "\uc601\uc5ed\uc744": [19, 28, 53], "\uc120\ud638\ud558\ub3c4\ub85d": 19, "\ud76c\uc0dd\ud558\uc5ec": 19, "sampling\uc5d0\ub294": 19, "\uad00\uc2ec\uc774": 19, "\ub79c\ub364\ud55c": [19, 21, 35, 39, 41, 42, 44, 56, 60], "rendering\ud560": 19, "\uc774\uc640": [19, 45, 55, 56], "dip": 19, "differenti": [19, 20, 23, 34, 35, 36, 37, 46, 54, 55, 58, 60], "\ubd84\ub958\ud568": 19, "\ubcc0\ud658\ud560": [19, 47], "dip\ub97c": 19, "\ud559\uc2b5\uc2dc\ud0ac": [19, 45, 57, 60], "3d\uc5d0\uc11c\ub294": 19, "volume\uc758": 19, "parameter\ub85c": [19, 23, 25, 27, 28], "volumetr": [19, 20, 34, 55], "\uc9c0\uc815\ud560": 19, "parameter\ub4e4\uc744": [19, 28, 39], "\uacb0\uacfc\uc778": 19, "\ud76c\ub9dd": 19, "deepdream\uacfc": 19, "func\uc774": 19, "\uc2e0\ub8b0\ub3c4": 19, "loss\uac00": [19, 39, 54], "\uc801\uace0": 19, "\uc7ac\uc0ac\uc6a9\ud588\uc73c\ub098": 19, "gradient\uc5d0": 19, "jacobian": [19, 36], "term\uc740": 19, "\ube44\uc6a9\uc774": [19, 21, 35, 43], "noise\uc5d0": [19, 41], "\uc791\ub3d9\ub418\uc9c0": 19, "\uc0dd\ub7b5": [19, 39, 43], "gradient\ub85c": 19, "\ubc1c\uacac": [19, 32, 35], "\uc601\uc5ed\uc73c\ub85c": 19, "\uc774\ub3d9\ud558\uae30": 19, "4\uc5d0\uc11c": [19, 38], "loss\uc758": [19, 20, 24, 25, 39], "gradient\uc784\uc744": 19, "backpropagation\uc774": 19, "\uc54c\uace0\ub9ac\uc998": [19, 20, 45, 60], "64x64": [19, 23, 25, 26, 31, 33, 36, 42, 46, 52, 53, 54, 55, 58, 59], "model\ub9cc": [19, 26], "\uc218\uc815\uc5c6\uc774": 19, "volumet": 19, "raytrac": 19, "nerf\ub85c\ubd80\ud130": 19, "rendering\ud558\uae30": 19, "rai": [19, 20, 34, 39, 55], "cast": [19, 36], "ray\ub97c": [19, 20, 39], "\uc0d8\ud50c\ub41c": 19, "mlp\uc5d0": [19, 34, 35], "\ud1b5\uacfc\uc2dc\ucf1c": [19, 22, 56], "\uc2a4\uce7c\ub77c": [19, 21], "tau": [19, 27, 36, 48, 59], "mipnerf": 19, "alias": 19, "\ud2b9\ud654": [19, 24, 48], "radiance\ub97c": 19, "\ub0b4\ubcf4\ub0b4\ub294": 19, "point\ubcc4": 19, "rho": [19, 27, 46, 54], "\ud3ec\uc778\ud2b8\uc5d0": [19, 33, 35], "vector\ub294": [19, 22, 43], "coordin": [19, 20, 30, 34, 36, 39, 55], "\uad00\uc810\uc5d0": 19, "normalizing\uc744": [19, 49], "\uacc4\uc0b0\ub420": 19, "ambient": 19, "l_a": 19, "\ub0b4\uc6a9": [19, 26, 35], "white": [19, 20], "textureless": 19, "\ucc98\ub9ac": [19, 55], "\ud1f4\ud654\ub41c": 19, "\uc194\ub8e8\uc158\uc744": 19, "\uc720\uc775": 19, "sphere": 19, "query\ub97c": [19, 43], "\ub0b4\uc5d0\uc11c\ub9cc": 19, "\uc218\ud589\ud558\uc5ec": 19, "\ubc00\ub3c4\uac00": [19, 20], "\uadfc\ucc98\uc5d0": 19, "\ucc44\uc6cc\uc9c0\uc9c0": 19, "\uc54a\ub3c4\ub85d": [19, 21, 29], "\ud658\uacbd": [19, 43], "\ub9f5": 19, "\uacc4\uc0b0\ud558\uace0": [19, 21, 35], "\ub204\uc801\ub41c": [19, 34], "\ubc30\uacbd\uacfc": 19, "\uad11\uc120": [19, 34], "\uc790\uc5f0\uc2a4\ub7fd\uac8c": [19, 23, 32, 40, 54], "geometri": [19, 36, 47, 55], "opacity\uc5d0": 19, "penalti": 19, "\uacf5\uac04\uc5d0": [19, 38], "\ubd88\ud544\uc694\ud55c": [8, 19, 29], "\ucc44\uc6c0\uc744": 19, "orient": [19, 61], "\ubc84\uc804\uc744": [19, 28, 35], "field\uc5d0\uc11c": [19, 39], "camera\ub85c\ubd80\ud130": 19, "\uba40\uc5b4\uc9c0\ub294": 19, "\ubc29\uc9c0\ub97c": 19, "dreamfusion\uc758": 19, "spheric": [19, 20], "position\uc740": 19, "coordinate\uc5d0\uc11c": 19, "sample\ub428": 19, "elev": [19, 20, 61], "phi_": [19, 22, 27, 54], "cam": 19, "90": [19, 45, 52, 54], "azimuth": [19, 20, 61], "origin\uc73c\ub85c\ubd80\ud130": 19, "focal": 19, "length": [19, 34, 61], "multipli": 19, "35": [19, 34], "\uc8fc\ubcc0": [19, 31, 54], "\ubd84\ud3ec\uc5d0\uc11c": [19, 37, 43], "pose\uc640": 19, "position\uc774": 19, "\ud574\uc0c1\ub3c4\uc758": [19, 21, 33, 35, 38], "model\ub97c": [19, 38, 43], "\uc635\uc158": 19, "\ud558\ub098\ub97c": [19, 26, 41, 52], "\uc870\uba85\uc774": 19, "\uc801\uc6a9\ub41c": [19, 21, 24, 41, 42, 47], "\uc0c1\ud0dc\uc5d0\uc11c\uc758": 19, "\ud14d\uc2a4\ucc98": [8, 19, 21], "\uc54c\ubca0\ub3c4": 19, "\uc0c9\uc0c1\ub9cc\uc744": 19, "\uace0\ub3c4": 19, "\uac01\ub3c4": [19, 39], "60": [19, 39], "circ": [19, 34], "overhead": 19, "02": [19, 27, 33, 36, 47], "\ub192\uac70\ub098": 19, "weight\uac00": [19, 42], "\uc90c": [19, 32, 40], "chip": [19, 33], "5h": 19, "chamfer": 19, "distance\uc640": 19, "psnr\uc740": 19, "\ubcf4\uc720\ub41c": 19, "\uc0ac\uc9c4\uacfc": [19, 47, 55, 56, 61], "\ubcf4\uae30\uc758": 19, "gt\uac00": 19, "\ub300\uc548\uc801": 19, "precision\uc740": [19, 49], "\ucea1\uc158\uacfc": 19, "\uc77c\uce58\ud558\ub294": [19, 21, 23, 35], "\ubb38\uc7a5\uc744": [19, 31, 43], "\uc138\ud2b8": [19, 47], "\uc911\uc5d0\uc11c": [19, 41, 48], "\ucc3e\ub294\uc9c0": 19, "centric": [19, 36], "subset\uc5d0\uc11c": 19, "153\uac1c": 19, "geo": 19, "render\uc5d0": 19, "viewaug": 19, "\uc2dc\uc57c\uac01": 19, "\uc2dc\uc57c\uac01\uc744": 19, "\uace0\ub824\ud558\ub294": 19, "viewdep": 19, "\uc758\uc874\uc801": 19, "\ubb34\ucc44\uc0c9": 19, "\ub9e4\ub044\ub7ec\uc6b4": 19, "\ud45c\uba74\uc744": [19, 20, 39], "\ub9cc\ub4e6": [19, 21], "sds\uc758": [19, 36], "\uc138\ubc00\ud55c": [8, 19, 21, 41, 59], "\ub514\ud14c\uc77c": [8, 19, 34], "\ubcf5\uc6d0\uc774": [19, 33], "\uadfc\ubcf8\uc801\uc73c\ub85c": 19, "creat": [], "decemb": [], "57": [], "pm": [], "gaussiansplat": [], "image2imag": [], "16gb": 20, "8gb": 20, "statu": 50, "done": [], "journal": [], "releas": [], "29": [], "2309": 20, "16653": 20, "achiev": 20, "\uc801\uc6a9\ud558\uace0": [20, 30], "\ucd94\ucd9c\ud558\ub294": [20, 32, 45], "just": 20, "\ubb3c\uccb4\uc5d0": 20, "\ud45c\ud604\ud588\ub2e4": [20, 39], "\uc2e0\uacbd\ub9dd\uc740": 20, "\uce74\uba54\ub77c\uc758": [20, 34], "\uc704\uce58\uc640": 20, "\uc9c0\uc810\uc758": [20, 52], "density\uc640": [20, 34], "color\ub97c": [20, 39], "\ub9ac\ud134\ud55c\ub2e4": 20, "\uc628\uc804\ud55c": 20, "\uc774\ub860\uc0c1": [20, 57], "\uc801\ubd84\uc774": 20, "\ucc28\uc774\uc810": 20, "tracing\uc774": 20, "cone": 20, "trace": 20, "\ud5a5\uc0c1\uc2dc\ud0b4": 20, "contract": 20, "euclidean": [20, 36], "\ub178\ub780": 20, "\ub9f5\ud551\ub41c": 20, "\uc601\uc5ed\uc774\ub2e4": 20, "\uc774\uc678\uc5d0\ub3c4": [20, 27, 46], "coars": [8, 20, 26, 34, 35, 39, 41], "\uc2e0\uacbd\ub9dd\uc744": [20, 34, 54], "\uc0ac\uc6a9\ud588\uc73c\uba70": [8, 20, 36, 38, 39, 58], "\ucd9c\ub825\uc2dc": 20, "\uc54a\uc558\ub2e4": [20, 23, 26, 35, 39, 45], "instant": [20, 36, 55], "ngp": [20, 36, 55], "voxel\uae30\ubc18\uc758": 20, "multiresolut": 20, "hash": [20, 36, 55], "cos\uc744": 20, "\ubc1c\uc804\uc2dc\ud0b4": 20, "\ucc44\ud0dd\ud568\uc73c\ub85c\uc368": 20, "\uc18d\ub3c4\ub3c4": [20, 29], "\ube68\ub77c\uc84c\uc73c\uba70": 20, "\uc2a4\ucf00\uc77c": 20, "\uc774\uc6a9\ud568\uc73c\ub85c\uc368": 20, "\uc2e0\uacbd\ub9dd\ubcf4\ub2e4": 20, "\uc624\ud788\ub824": [20, 28, 42], "\ucc44\ud0dd\ud560": 20, "cuda\ub97c": 20, "\ucc98\ub9ac\uc18d\ub3c4\ub97c": 20, "\ud5a5\uc0c1\uc2dc\ud0b4\uc73c\ub85c\uc368": 20, "\ub0bc": [20, 25, 36, 49, 57], "\ubaa8\ub378\ub9c1\ud558\uae30": [20, 29, 41], "\uad6c\uc131\ud558\ub294": 20, "wild": 20, "dataset\uc5d0\uc11c": [20, 27, 40, 43, 48, 54], "\uc7ac\uad6c\uc131\uc744": [8, 20], "transient": 20, "colmap\uacfc": 20, "keypoint\ub97c": 20, "\uc9c0\uc810\uc73c\ub85c": 20, "\ud569\uce58\uac70\ub098": 20, "\ucabc\uac1c\uba70": 20, "projection\ud558\uc5ec": [20, 23, 26], "\ub9cc\ub4e0\ub2e4": [20, 21, 35, 39, 45, 49], "\ucc38\uace01": 20, "\ucc38\uace02": 20, "gaussians\uc740": 20, "opac": 20, "\uc5ec\uae30\uc5d0": [20, 26, 28, 39], "harmon": 20, "coefficients\ub97c": 20, "\ubd84\ub9ac\uac00\ub2a5": 20, "matrix\ub294": 20, "imagineri": 20, "4\ucc28\uc6d0\uc758": 20, "quaternion\uc73c\ub85c": 20, "\ud45c\ud604\uac00\ub2a5": [20, 34], "3\ucc28\uc6d0\uc758": 20, "\ucc44\ub110": [20, 31, 34, 38, 44], "\ub2f9": [20, 34, 47], "9\uac1c\uc758": 20, "27\uac1c\uc758": 20, "\uacc4\uc218\ub97c": [20, 42], "\ub73b\uc774\uba70": 20, "sh\ub97c": 20, "\ube5b\uc774": [20, 34], "\ud37c\uc9c0\ub294": 20, "\ub9e4\ud2b8\ud55c": 20, "\uc785\uc0ac\uac01": 20, "\ubcf4\ub294": [20, 35, 50], "\ubc29\ud5a5": [20, 34, 57], "effects\ub3c4": 20, "y_l": 20, "harmonics\ub294": 20, "\ubc88\uc5ed\ud558\uba74": 20, "\uad6c\uba74\uc870\ud654\ud568\uc218\ub85c": 20, "\uad6c\uc758": [20, 39], "\ud45c\uba74\uc5d0\uc11c": [20, 34], "\uc815\uc758\ub418\ub294": 20, "\uad6c\uba74\uc88c\ud45c\uacc4": 20, "\uace0\uc815\ud558\uace0": [20, 30, 48], "\ud568\uc218\uc774\ub2e4": 20, "\uc218\ud559\uc801\uc73c\ub85c\ub294": 20, "\ub77c\ud50c\ub77c\uc2a4": 20, "\ubbf8\ubd84\ubc29\uc815\uc2dd\uc758": 20, "\uc2dc\uac01\ud654": [20, 48, 49], "fourier": [20, 38], "seri": [20, 36], "\uc0bc\uac01\ud568\uc218\ub4e4\uc744": 20, "\uc8fc\uae30\ud568\uc218\ub97c": 20, "\ud478\ub9ac\uc5d0": 20, "\ud655\uc7a5\ud310": 20, "sh\ub294": 20, "\uad6c": [20, 34], "\ud45c\uba74\uc5d0\uc11c\uc758": 20, "basis\uc5d0": 20, "reflect": 20, "reflection\uc740": 20, "\ubcf4\ub4e0": 20, "\ube5b\uc744": 20, "\uad00\ucc30": [20, 31], "\uc774\uc0c1\uc801\uc778": 20, "\ubc18\uc0ac\uad11": 20, "prune": [20, 27, 55], "densif": 20, "nerfstudio": 20, "frustrum": 20, "culling\uc744": 20, "sorting\ud558\uace0": 20, "\ud53d\uc140\uc758": [20, 36, 38], "ray\uc5d0": [20, 39], "\uacb9\uce58\ub294": [20, 45], "gaussian\ub4e4\ub9cc": 20, "\ubc18\uc601\ud55c\ub2e4": 20, "nativ": 20, "lift": 20, "asset": [8, 20, 39], "form": [20, 46], "also": [20, 34, 55], "One": [20, 40, 43, 46, 61], "45": [20, 61], "parametr": [20, 36, 60], "generator\ub85c\uc11c": 20, "\uc81c\uc548\ud568": 20, "\uac00\ub2a5\ud558\ub3c4\ub85d": [20, 23, 24, 32, 60], "g_": [20, 22, 56, 57, 60], "function\uc73c\ub85c": [20, 29, 31, 39, 48, 49], "paramter\ub85c": 20, "triangledown_": [20, 36], "formulation\uc740": 20, "\uc2dd\uc774\ub2e4": [20, 36], "description\uc5d0": 20, "shape\uc744": [20, 39], "\ud45c\uba74\uc740": 20, "\ub118\ub294": 20, "\uc9c0\uc810\uc744": 20, "\ud050\ube0c\uc758": 20, "8\uac1c": [20, 34], "\ucf54\ub108": 20, "\ubc00\ub3c4\uac12": 20, "8\uac1c\uc758": [20, 31, 34], "\ucf54\ub108\uc758": 20, "\ubc00\ub3c4\uac12\uc5d0": 20, "\uac00\uc9c0\uac8c": [20, 21, 29, 41], "\ub418\ub294\uc9c0\ub294": 20, "\uc815\ud574\ub454": 20, "\ud050\ube0c\ub85c": 20, "\ubd84\ud560": 20, "\ud310\ub2e8": 20, "\uc9c0\uc810\uc774": 20, "\ubb3c\uccb4": [8, 20, 21, 34, 39], "\ub0b4\ubd80\uc5d0": [20, 35], "\uc18d\ud558\ub294\uc9c0": [20, 23], "\uc678\ubd80\uc5d0": 20, "\ud310\ub2e8\ud568": 20, "\uaf2d\uc9d3\uc810": 20, "\ud45c\uba74\uc774": [20, 39], "\uc0dd\uae38\uc9c0\uc5d0": 20, "\uaddc\uce59\uc744": 20, "\uc815\uc758\ud574\ub450\uace0": 20, "wikipedia": 20, "\uc624\ub80c\uc9c0": 20, "\ubb3c\uccb4\uc758": [20, 34, 35, 39], "\uac00\uc7a5\uc790\ub9ac\uc5d0": 20, "\uc704\uce58\ud55c": 20, "\uc758\ubbf8\ud568": [20, 29, 42], "acmtog": 20, "\ubc29\ubc95\ub860\uc73c\ub85c": 20, "\uac00\uc18d\ud654\ud55c": 20, "\uc0bc\uac01\ud615": 20, "\uba54\uc2dc\ub97c": 20, "\ub80c\ub354\ub9c1\ud558\uace0": 20, "\uadf8\ub798\ub514\uc5b8\ud2b8\ub97c": 20, "flame": 20, "3dmm": 20, "smpl\ub4f1\uc740": 20, "\ubaa8\ub378\uc73c\ub85c": [20, 26], "\ubc29\ubc95\ub860\uc740": [8, 20, 27], "morphabl": 20, "1999": 20, "smpl": 20, "articul": [20, 21, 50], "perspect": [20, 34], "enabl": [20, 28, 41, 43], "rel": [20, 61], "gaussians\uc5d0\uc11c": 20, "refinement\ub97c": 20, "\ud615\ud0dc\ub294": [20, 26], "\ud45c\ud604\ub418\uba70": 20, "\uc800\uc7a5\ud558\uc5ec": 20, "rendering\uc2dc": [20, 39], "theta_i": 20, "x_i": [20, 30, 60], "s_i": [20, 21, 39, 55], "q_i": 20, "alpha_i": [20, 34], "c_i": [8, 20, 21, 26, 34], "splatting\uc5d0\uc11c\ub294": 20, "\ud45c\ud604\ud558\uc9c0\ub9cc": 20, "\uac04\ub7b5\ud654": [20, 39], "rotation\uc73c\ub85c": 20, "r_": 20, "r_a": 20, "xl\uc744": 20, "triangl": [20, 36, 39], "function\uc774\uace0": 20, "camera\uc774\ub2e4": 20, "view\uc640": 20, "transpar": 20, "align\ud588\ub2e4": 20, "i_": 20, "lambda_a": 20, "prompt\uc758": [20, 26, 28, 43], "ambigu": 20, "\ud559\uc2b5\ud558\ub354\ub77c\ub3c4": 20, "gaussians\uc774": 20, "blurry\ud558\uace0": 20, "\ub514\ud14c\uc77c\uc774": [20, 41, 56], "\ubd80\uc871\ud558\ub2e4\uace0": 20, "\uac1c\uc120\ud558\uae30": [20, 50], "\ub2e8\uacc4\uc778": 20, "extraction\uacfc": 20, "wise": [20, 44, 57, 61], "query\uc640": 20, "mesh\ub97c": [20, 36, 39], "algorithm\uc744": 20, "grid\uac00": 20, "\uc54c\uace0\ub9ac\uc998\uc758": [20, 36], "\ud2b9\uc9d5\uc740": [20, 53], "split": [20, 45], "pruning\ub41c\ub2e4\ub294": 20, "\uc810\uc774\ub2e4": [20, 26, 29, 38, 49, 57], "rasterization\uc744": [20, 36], "cull": 20, "technique\uc744": 20, "\uc810\uc740": [20, 24, 26, 42, 43], "queries\ub97c": 20, "perform": [20, 28, 30, 32, 44, 48, 52, 59], "\ub54c\ub3c4": [20, 36], "\ub9f5\ud551\ud55c\ub2e4": [20, 39], "\ucd5c\uc18c\uac12\uc744": 20, "overlap": 20, "multiscal": 20, "\ub098\ub208\ub2e4": [20, 35, 39, 52], "\ube14\ub85d\uc758": [20, 38], "gaussian\ub4e4\uc740": 20, "\uc81c\uc678\ud55c\ub2e4": 20, "\uacc4\uc0b0\ud574\uc57c": 20, "\uc904\uc77c": [20, 21, 26, 27, 30, 31, 36, 43, 44, 54], "grid\ub97c": [20, 39], "\ub9cc\ub4e4\uc5b4": [20, 21, 27, 36, 39, 52], "\ucd5c\uc885\uc801\uc73c\ub85c\ub294": [20, 36, 39], "grid": [8, 20, 26, 32, 34, 35, 36, 55], "query\ub294": [20, 43], "\ub0a8\uc544\uc788\ub294": 20, "opacity\uc758": [20, 39], "sum\uc73c\ub85c": [20, 39], "\uc5bb\ub294\ub2e4": [20, 35, 36, 39], "sum_i": 20, "sigma_i": [20, 34], "matrix\ub85c": 20, "\uc774\ub8e8\uc5b4\uc838": [20, 23, 28, 47, 57, 60], "\uc774\ud6c4\uc5d0\ub294": [20, 31, 36, 39], "empir": [20, 46, 54], "threshold\ub97c": [20, 28], "surface\ub97c": 20, "decimation\uacfc": 20, "remesh": 20, "\ud6c4\ucc98\ub9ac\ud558\uc5ec": 20, "\uc790\uc5f0\uc2a4\ub7fd\uace0": [20, 37, 40], "smoother": 20, "\uac04\uacb0\ud55c": 20, "compact": 20, "\ub9cc\ub4e4\uc5c8\ub2e4": [20, 39, 54], "\uc55e\uc120": [20, 26, 28, 32, 36], "\uc5bb\uc5c8\uae30": 20, "surface\ub85c": 20, "project\ud558\uc5ec": 20, "map\uc73c\ub85c": 20, "mesh\uc758": [20, 36, 39], "coordinate\ub97c": [20, 39], "unwrap\ud558\uace0": 20, "elevation\uc744": 20, "view\uae4c\uc9c0": 20, "\ud3ec\ud568\ud558\uc5ec": [20, 29], "\ud53d\uc140\uc740": 20, "\ub9f5\ud551\ud560": 20, "project\ub41c": 20, "image\ub294": 20, "\ub2e4\uc74c\uc758": [20, 31, 35, 45], "\uc124\uc815\uc73c\ub85c": [20, 55], "projection\ub41c": 20, "texture\ub97c": [20, 36, 39], "\uc62c\ub9ac\uace0\uc790": 20, "artifact\uac00": 20, "\ubc1c\uc0dd\ud558\uac8c\ub41c\ub2e4": 20, "rasterization\uc2dc": 20, "mipmap": 20, "\uae30\ubc95\ub54c\ubb38\uc774\ub2e4": 20, "\ubaa8\ud638\ud55c": 20, "level\uc5d0": 20, "saturation\ub41c": 20, "\uc804\ud30c": 20, "raster": [20, 55], "widipedia": 20, "uniti": 20, "document": 20, "\ub80c\ub354\ub9c1\uc640": 20, "\uc18d\ub3c4": [20, 22, 24, 29, 30, 31], "mipmap\uc774\ub77c\ub294": 20, "nvdiffrast\ub3c4": 20, "mipmap\uc744": 20, "\ud65c\uc6a9\ud558\uace0": [20, 21, 38], "mipmap\uc740": 20, "\ud654\uc9c8\uc73c\ub85c": 20, "\uc800\uc7a5\ud558\ub294": 20, "level\uc740": 20, "\ud654\uc9c8\uc758": 20, "version\uc744": [20, 29], "\uce74\uba54\ub77c\ub85c": 20, "\uba40\ub9ac": [20, 43], "\ub5a8\uc5b4\uc9c4": 20, "object\ub294": 20, "\uc800\ub808\ubca8\uc758": 20, "\uc815\ubcf4\uc190\uc2e4\uc774": 20, "\uc77c\uc5b4\ub09c": 20, "\uc0c1\ud0dc\uc774\uace0": 20, "\ub418\uc5c8\ub2e4\uba74": 20, "\ud750\ub974\ub294": 20, "\ub420\uc218": 20, "f_": [8, 20, 27, 34, 39, 46, 54, 59], "synthesis\uc640": 20, "texture\uac00": [20, 39], "sdedit": [20, 56], "condition\uc778": 20, "3d\uc758": [20, 36], "\uac15\ub3c4\ub97c": [20, 21], "\uc81c\ud55c\ud558\uae30": 20, "\uc2e0\uc911\ud788": 20, "\uc120\ud0dd\ub418\uc5b4\uc57c": 20, "\uc720\uc9c0\ud558\uba70": [20, 42], "\uc2dc\ud0ac\uc218": 20, "loss\uc5d0": [20, 22, 29, 36], "task\uc5d0\uc11c\ub294": 20, "rgba": [20, 39], "\uacb0\uacfc\uc5d0": [20, 33, 35, 42, 45], "\ub530\ub974\uba74": 20, "\ub9cc\uc5d0": [20, 55], "detail\uc774": [20, 29], "\uc88b\uc544\uc84c\ub2e4\uace0": 20, "\ubc18\ubcf5\ud69f\uc218\ub97c": 20, "\ub298\ub9b4\uc218\ub85d": 20, "texture\uc758": [20, 39], "\ud5a5\uc0c1\ub418\uc5c8\ub2e4\uace0": 20, "5000": [20, 55], "grei": 20, "radiu": [20, 36], "1024": [20, 22, 27, 34, 39, 41, 44, 48, 51, 52, 55, 58], "transper": 20, "0\uc5d0\uc11c": 20, "1000\ub85c": 20, "axi": [20, 61], "fov": 20, "49": 20, "degre": [8, 20, 34], "180": 20, "\ucd9c\ucc98": [20, 26, 43, 47], "run": [20, 46], "1min": 20, "2min": 20, "threshold": [20, 35], "\ubc29\ubc95\ub860\ub4e4\uacfc": 20, "\ube44\uad50\ud574\ub3c4": [20, 37], "\ubcf4\uc600\ub2e4\uace0": 20, "\ubf51\uc544\ub0b8": [20, 22, 26, 32, 36], "mesh\ub294": [20, 36], "blender\uc640": 20, "\ub9ac\uae45": 20, "\ud234\uc744": 20, "\uc560\ub2c8\uba54\uc774\ud305": 20, "\ud30c\uc774\ud504\ub77c\uc778\uc758": 20, "period": 20, "densificaiton": 20, "anneal": [20, 36, 58, 61], "dreamgaussian\uc744": 20, "\uc99d\ub300": 20, "stage\ub97c": [20, 38], "image\ub098": 20, "polygon": [8, 20, 39], "mesh\uc0dd\uc131": 20, "\ubc29\ubc95\ub860\ub4e4\uc758": 20, "\ubb38\uc81c\uc810\ub4e4\uc744": [20, 26, 28], "debias": 20, "brdf": 20, "\ub3c4\uc785\ud558\uba74": 20, "\uac1c\uc120\uac00\ub2a5\ud560": 20, "\uae30\ub300\ud568": [20, 43], "prob": 20, "bake": 20, "2\uc5d0\uc11c": [20, 25, 35, 38], "blurry\ud55c": 20, "\uac1c\uc120\ub41c\ub2e4\uace0": 20, "2304": [21, 42, 44], "06025": 21, "grail": 21, "cs": 21, "washington": 21, "edu": 21, "jeonghwa": [21, 31, 35, 42, 62], "yoo": [21, 31, 35, 42, 62], "08": [21, 47], "\uc785\ucd9c\ub825": [21, 39], "\uc2dc\ud000\uc2a4": [21, 35, 44], "\uc0ac\ub78c\uc774": [21, 29, 47, 52, 53], "\uc6c0\uc9c1\uc774\ub294": [21, 31], "\ub4ef\ud55c": 21, "\uc2dc\ud000\uc2a4\uac00": 21, "\uc0ac\ub78c\uacfc": 21, "\uc637\uac10\uc758": 21, "\ud569\uc131\ud558\ub294": [21, 47], "dreampose\ub97c": 21, "\uc81c\uc548\ud558\uc600\ub2e4": [21, 25, 27, 54], "\uc2a4\ud14c\uc774\ube14": 21, "\ub514\ud4e8\uc804\uc744": [21, 35], "\ud0dc\uc2a4\ud06c\ub97c": 21, "\uc778\ucf54\ub354\uc640": 21, "\uc778\ucf54\ub354\ub97c": [21, 44], "\ub3c4\uc785\ud558\uc600\uace0": 21, "\ub3c4\uc785\ud558\uc5ec": [21, 35, 42, 61], "\ub514\ud4e8\uc804\uc758": 21, "\ub123\uc5b4\uc92c\ub2e4": 21, "\ubc18\uc601\ud558\uae30": 21, "concat\ud558\uc5ec": 21, "\ub514\ub178\uc774\uc9d5": 21, "unet\uc5d0": [21, 26], "\uc8fc\uc5c8\ub2e4": [21, 38, 43], "\ub4c0\uc5bc": 21, "\ucda9\uc2e4\ub3c4\uc758": 21, "\uc870\uc815\ud55c\ub2e4": 21, "\ud328\uc158": 21, "\ud0dc\uc2a4\ud06c\uc5d0": 21, "dreampose\uac00": 21, "\uc628\ub77c\uc778\uc5d0": 21, "\ud37c\uc838": 21, "\uc804\ub2ec\ud560": 21, "\uc81c\ud55c\uc801\uc774\uba70": 21, "\uc785\uc5c8\uc744": 21, "\ub298\uc5b4\uc9c4": 21, "\ubaa8\uc591\uc774\ub098": 21, "\ud750\ub984": 21, "\ub258\uc559\uc2a4\ub97c": 21, "\ud3ec\ucc29\ud558\uc9c0": [21, 43], "\ub3d9\uc601\uc0c1\uc740": [21, 44], "\ubcf4\uc5ec\uc8fc\uae30\uc5d0": 21, "\uc18c\ube44\uc790\uc758": 21, "\uc758\uc0ac": [21, 43], "\uacb0\uc815\uc5d0": 21, "\uc81c\uacf5\ud558\uc9c0\ub9cc": [21, 43], "\ub3d9\uc601\uc0c1\uc774": 21, "\uc0c1\ud488\uc740": 21, "\ub4dc\ubb3c\ub2e4": 21, "\ud558\ub098": [21, 30, 48], "\ubaa8\ub378\ub4e4\uc758": [21, 49], "\ubcf4\uc5ec\uc8fc\uc5c8\uc9c0\ub9cc": 21, "\uc5bb\uc9c0": 21, "\ubabb\ud588\uc73c\uba70": 21, "\uc6c0\uc9c1\uc784\uc774\ub098": 21, "\ubaa8\uc591\uc73c\ub85c": 21, "jitter\uac00": 21, "\uc0ac\uc2e4\uc131": 21, "realism": [21, 47, 56], "\ube44\ub514\uc624\uc758": [21, 31], "\uc81c\uc5b4\ud560": [8, 21, 31], "dreampose\uc758": 21, "\uc811\uadfc\ubc95": [21, 35], "\ubaa8\ub378\ub9c1\ud558\ub294": [21, 34], "\ud29c\ub2dd\ud558\uc600\ub2e4": 21, "\ucee8\ub514\uc154\ub2dd": [21, 31, 35], "\uc2e0\ud638\uc640": 21, "\ub2e8\uc21c\ud654": 21, "\uba54\ucee4\ub2c8\uc998\uc744": 21, "\uc7ac\uc124\uacc4\ud558\uc600\ub2e4": 21, "\uc2a4\ud14c\uc774\uc9c0": [21, 39], "vae\ub97c": [21, 48], "\ud3ec\uc988\ub97c": 21, "\uac04\ub2e8\ud558\uc9c0\ub9cc": 21, "fidelity\ub97c": [21, 38], "\ub192\uc5ec\uc8fc\ub294": 21, "\ud3ec\uc988\uc5d0": 21, "\uc77c\ubc18\ud654": [21, 26, 27, 48], "\uade0\ud615\uc744": [21, 22], "\ub9de\ucd94\ub294": [21, 36], "\uc778\uc0c1\uc801\uc778": [21, 43, 47, 59], "\ubcf4\uc5ec\uc8fc\uace0": [21, 22, 23, 24, 31, 35, 49, 50, 52, 61], "\ub4e4\uace0": [21, 35, 43], "\ub514\ud4e8\uc804\uacfc": 21, "\uc694\uad6c": [21, 32, 34, 37], "\uc0ac\ud56d\uacfc": 21, "\ub300\ud3ed": [21, 38, 54], "\uccb4\ud06c\ud3ec\uc778\ud2b8\ub294": 21, "\ucd9c\uc2dc": 21, "\uc0ac\uc6a9\ub418\uc5c8\ub2e4": [21, 54], "\ub17c\ubb38\uc5d0\uc11c\ub3c4": [21, 39], "\ud2b9\ud654\ub41c": [21, 38], "\ub3d9\uc601\uc0c1\uc744": [21, 31], "\ud0dc\uc2a4\ud06c": [21, 35], "\uae30\ubc18\uc774": [21, 34, 59], "\ub124\ud2b8\uc6cc\ud06c\ub85c": [21, 34, 60], "\uad6c\uc131\ub418\ub294": [21, 28], "\ub9ce\uc558\ub2e4": [21, 25], "\ub2e8\uacc4\ub9c8\ub2e4": 21, "\ubcc4\ub3c4\uc758": [21, 23, 35, 38], "\ubaa8\uc158\uc774\ub098": 21, "depth\ub4f1\uc758": 21, "\uc5c6\uac70\ub098": [21, 33, 41], "\ubd88\uc644\uc804\ud560": 21, "\ubaa8\uc158\uc774": [21, 31], "\ud06c\uace0": [21, 28, 56, 59], "\ubcf5\uc7a1\ud560": 21, "groud": 21, "truth\uc5d0": 21, "\uc608\uce21\uc740": [21, 24], "\ub3c4\ucd9c\ud558\uae30": 21, "\uc624\ub958\uac00": [21, 24], "\ubc1c\uc0dd\ud558\uae30": 21, "\uc27d\ub2e4": 21, "\uc5d4\ub4dc": 21, "\ud22c": 21, "\uc2f1\uae00": 21, "optic": 21, "warp": 21, "\ud734\uba3c": 21, "\ud588\uace0": [21, 32, 44], "flow\uc5d0": 21, "\uc758\uc874\ud574": 21, "\ubcc0\ud654": [21, 34], "\uac00\ub824\uc9c4": [21, 34, 35], "\uc601\uc5ed": [21, 28, 47], "\uc758\uc0c1": 21, "\ucd5c\uadfc\uc5d4": 21, "\uc5b4\ud150\uc158": 21, "\uc140\ud504": 21, "\ud06c\ub85c\uc2a4": 21, "\uc5b4\ud150\uc158\uc744": 21, "\ud504\ub808\uc784\uc5d0": [21, 31, 44], "\ub9de\ucd94\ub824\uace0": 21, "difffashion": 21, "\ub808\ud37c\ub7f0\uc2a4": 21, "\ud2b8\ub79c\uc2a4\ud37c\ud558\uc5ec": 21, "\uc758\ub958": 21, "\uc544\uc774\ud15c\uc744": 21, "\ud3b8\uc9d1\ud558\ub294": 21, "\ub123\uc5b4": [8, 21, 26, 32, 34, 35, 39], "\uc801\uc6a9\ud55c\ub2e4": [21, 35, 38, 44], "\uae30\ub300\ud560\ub9cc": 21, "\ub098\uc624\uc9c0": 21, "\ubaa8\uc158\uc744": 21, "\uc2a4\ud06c\ub798\uce58\ubd80\ud130": 21, "\uac12\ube44\uc2fc": 21, "\ub9ac\uc18c\uc2a4": [21, 26, 28], "\ubc29\ub300\ud559": 21, "\uc624\ub79c": [21, 34], "turn": 21, "video\ub294": 21, "\ud30c\uc778\ud29c\ub2dd\ud55c\ub2e4": 21, "\ubc29\ubc95\ub4e4\uacfc": [21, 35], "\ub9c8\ucc2c\uac00\uc9c0\ub85c": [21, 28, 40, 42, 43, 54], "\uae5c\ube61\uac70\ub9bc": 21, "flicker": 21, "\uad6c\uc870\uc801\uc778": [21, 26, 56, 61], "\ubd88\uc77c\uce58\uac00": [8, 21, 52], "\ud574\uacb0\ud558\uc5ec": 21, "\uc12c\uc720\uc758": 21, "\uc6c0\uc9c1\uc784\uc758": 21, "\uc2f1\ud06c\ub97c": 21, "\uc0ac\uc6a9\ub418\uc5b4": 21, "\ucee8\ub514\uc154\ub2dd\uc740": 21, "\ub370\ub294": 21, "\ud6a8\uacfc\uc801\uc774\uc9c0\ub9cc": [21, 26], "\uc758\uc0c1\uc758": 21, "identity\ub098": 21, "\ud48d\ubd80\ud558\uace0": 21, "\uc0c1\uc138\ud55c": [21, 47], "\uc5f0\uad6c\uc5d0\uc11c": [21, 26, 30], "\ub2e4\ub8e8\uace0": 21, "\ubaa8\ub378\uc5d0\ub294": 21, "\uc784\ubca0\ub529\uc774": 21, "\ud53c\uc0ac\uccb4\ubcc4": 21, "\ub3d9\uc601\uc0c1\uc758": 21, "\ud3b8\uc9d1\ud558\uae30": [21, 43], "\ud1b5\ud569\ud558\uae30\ub3c4": 21, "pidm\uc740": 21, "\ud14d\uc2a4\ucc98\ub97c": [8, 21], "\uc778\ucf54\ub529\ud558\uace0": [21, 33], "\uc5f0\uacb0\ud55c\ub2e4": 21, "dreampose\ub294": 21, "\uc678\ud615\ubfd0\ub9cc": 21, "\uc6c0\uc9c1\uc784\uae4c\uc9c0": 21, "pidm\uacfc": 21, "unet\uc758": [21, 24, 26, 38], "\ub808\uc774\uc5b4\uc5d0": [21, 24], "\ud1b5\ud569\ud558\uc9c0\ub9cc": 21, "\ub178\uc774\uc988\uc5d0": 21, "\uc5f0\uacb0\ub41c": 21, "concaten": [21, 26, 48, 53, 61], "\ubd80\ub4dc\ub7fd\uace0": [21, 36], "\uad6c\ud604\ud560": 21, "\ud488\uc9c8": [21, 31, 35, 36, 42], "\ub2e4\uc591\uc131": [21, 26, 33, 39, 42], "\uc548\uc815\uc131": 21, "\ub2a5\uac00\ud558\ub294": [21, 34], "\uc815\uaddc": 21, "\ubd84\ud3ec\ub41c": 21, "\ub178\uc774\uc988\uc5d0\uc11c": [21, 35], "\ubcf5\uc6d0\ud558\ub294": [21, 59], "\uc791\ub3d9\ud558\ubbc0\ub85c": 21, "\ud76c\uc0dd\ud558\uba74\uc11c": 21, "\uc808\uc57d\ud55c\ub2e4": 21, "vae\uc640": [21, 26], "\uc624\ud1a0\uc778\ucf54\ub354": 21, "\ucef4\ud329\ud2b8\ud55c": 21, "\ud45c\ud604\uc5d0\uc11c": 21, "\uacb0\uc815\ub860\uc801": [21, 34], "\ud504\ub85c\uc138\uc2a4\uc5d0": 21, "\ud0c0\uc784": [21, 35], "\uc2a4\ud0ec\ud504": 21, "\ub514\ud4e8\uc988\ub418\uc5b4": 21, "\ub178\uc774\uc9c0": 21, "\ubcf5\uad6c\ud558\uae30": 21, "\ud0c0\uc784\uc2a4\ud0ec\ud504\uc5d0": 21, "feature\uc758": [21, 41], "\uc2dc\uac04\uc73c\ub85c": [21, 29], "\ucee8\ub514\uc154\ub2dd\ub41c": 21, "unet\uc774": 21, "cal": 21, "\uc138\uadf8\uba58\ud14c\uc774\uc158": 21, "\ub9c8\uc2a4\ud06c\ub4f1": 21, "\ub514\ud4e8\uc804\uc5d0\uc11c\ub294": 21, "\uc778\ucf54\ub354\ub85c\ubd80\ud130": 21, "\uc5bb\uc5b4\uc9d0": 21, "\ubcf5\uad6c\ud558\ub3c4\ub85d": 21, "\ubd84\ud3ec\ub85c": [21, 29], "\ubc00\uc5b4\ubd99\uc774\ub294": 21, "\uba54\ucee4\ub2c8\uc998\uc774\ub2e4": 21, "\uc785\ub825\uc744": [21, 26, 31, 34, 42], "\ub110": 21, "\ub300\uccb4\ud558\ub294": [21, 23], "\ub4dc\ub86d\uc544\uc6c3\uc744": 21, "\ub2ec\uc131\ub41c\ub2e4": [21, 35], "\uc778\ud37c\ub7f0\uc2a4\ud558\ub294": 21, "s\ub97c": 21, "\uc870\uac74\ubd80\ub85c": 21, "\uac00\uc774\ub4dc\ud558\ub294": 21, "emptyset": [21, 54], "null\ub85c": 21, "\uc92c\uc744": 21, "\uc608\uce21\uac12\uacfc": [21, 27, 54], "\uc608\uce21\uac12\uc744": [21, 46, 54], "\ubcf4\uac04\ud55c\ub2e4": 21, "\uc2dc\ud000\uc2a4\ub85c\ubd80\ud130": 21, "\uceec\ub809\uc158\uc5d0\uc11c": 21, "\uceec\ub809\uc158\uc5d0": 21, "\uc2e0\ud638": 21, "\ubc1b\uace0": [21, 24, 31, 52, 58], "\ub3d9\uc601\uc0c1\uc73c\ub85c": [21, 31], "\ucf58\ud150\uce20\ub97c": [21, 31, 34], "\ucd9c\ub825\ud558\uae30": 21, "\uc870\uc815\ud558\ub294": 21, "\uc791\uc5c5\uc774": [21, 46], "p_1": [21, 36], "p_n": 21, "_n": 21, "p_i": 21, "\uc808\ucc28\ub97c": [21, 33, 34], "\ub178\uc774\uc988\ub85c": 21, "\uc2dc\uc791\ud558\uc5ec": [21, 35, 36], "\uc2e0\ud638\ub85c": 21, "\ucffc\ub9ac\ud558\uc5ec": 21, "latent\uc758": 21, "\uc81c\uac70\ud55c\ub2e4": 21, "\ub514\ub178\uc774\uc988\ub41c": 21, "\uc6d0\ub798\uc758": [21, 35, 38], "\uc218\uc815\ud558\uace0": [21, 42], "\uc2dc\uac04\uc801\uc778": [21, 44], "\uad6c\uc131\ud558\uc600\ub2e4": 21, "\uc5b4\ub311\ud130": 21, "\ud544\uc694\uc131": 21, "\ub123\uae30": 21, "\ub4e4\uc5b4\uc624\ub294": 21, "\uc2e0\ud638\ub97c": 21, "net\uc5d0": 21, "concat\ud55c\ub2e4": 21, "\uc2e0\ud638\uc5d0": [21, 35], "\uc870\uac74\ud654\uc5d0": 21, "\ub124\ud2b8\uc6cc\ud06c\uac00": [21, 24], "\uc815\ub82c\ub418\uc9c0": 21, "\ud0dc\uc2a4\ud06c\uc5d0\ub294": 21, "\ub9de\ucda4\ud615": 21, "\uc5b4\ub311\ud130\ub97c": [21, 30], "\uad6c\ud604\ud558\uc600\ub2e4": 21, "\uc5b4\ub311\ud130\ub294": 21, "\uc870\uac74\ud654\ub97c": 21, "\uacb0\ud569\ud55c\ub2e4": 21, "\uc6d0\ub798": [21, 35, 42, 47, 49, 56], "\uae30\uc6b8\uae30\ub97c": 21, "\uc774\uc720\ub85c": 21, "\uccb4\uacc4\ub294": 21, "\uc0c1\ud638": [21, 31], "\uc791\uc6a9\ud558\ub294": 21, "\uac00\uc911\uce58\ub97c": [21, 24, 27, 33, 38, 42, 56], "\ub514\ud4e8\uc804\uc774": 21, "clip\uc774": 21, "\uc2a4\ud398\uc774\uc2a4": 21, "share": [21, 32], "\uc778\ucf54\ub529\ud55c\ub2e4\ub294": 21, "\uac10\uc548\ud560": 21, "\ucee8\ub514\uc154\ub2dd\uc744": 21, "\uac04\ub2e8\ud788": [21, 57], "\uc790\uc5f0\uc2a4\ub7ec\uc6cc": [21, 39], "\ubcf4\uc77c": [21, 42], "\uc784\ubca0\ub529\ub9cc\uc73c\ub85c\ub294": 21, "\ucea1\ucc98\ud558\uae30\uc5d0": 21, "vae\uc5d0\uc11c": 21, "\uc785\ub825\ud55c\ub2e4": [21, 38], "\ub3c4\uba54\uc778\uacfc": 21, "\uc7a5\uc810\uc744": [21, 35], "\uc544\ud0a4\ud14d\ucc98\ub294": 21, "latent\ub97c": [21, 28, 38], "\uc9c0\uc6d0\ud558\uc9c0": [21, 26, 56], "\uc54a\uae30": [21, 24, 28, 39, 42, 58], "\ub124\ud2b8\uc6cc\ud06c\uc758": [21, 34], "\ud63c\ud569\ud558\uace0": 21, "\ubaa8\ub4c8\uc5d0\uc11c": 21, "\uc608\uc0c1\ud558\ub294": 21, "\ubcc0\ud658\ud55c\ub2e4": [21, 29, 35, 44, 54], "\ud30c\uc778\ud29c\ub2dd\uc5d0\uc11c": 21, "\uc5b8\uae09\ud588": 21, "\ub4ef\uc774": 21, "\ucda9\uaca9\uc744": 21, "\uc644\ud654\ud558\uae30": [21, 39, 41], "\uac00\uc911\uce58\ub294": [21, 24], "\uc124\uc815\ub418\uc5b4": 21, "\uc784\ubca0\ub529\uc73c\ub85c\ub9cc": 21, "\uc2dc\uc791\ud55c\ub2e4": 21, "\ucee8\ub514\uc154\ub2dd\uacfc": 21, "c_p": 21, "\ube44\ub514\uc624\uc5d0\uc11c": [21, 31], "\ucd94\uc815\ub41c": 21, "\ud3ec\uc988\uc758": 21, "\ud504\ub808\uc784\uc5d0\uc11c\uc758": 21, "\uadf9\ub300\ud654\ud558\uae30": 21, "\ub2e4\uc12f": [21, 28], "pi": [21, 34, 46, 58], "\ud3ec\uc988\ub85c": 21, "\ubd80\ub4dc\ub7ec\uc6c0\uacfc": 21, "\uc99d\uac00\ud55c\ub2e4": 21, "\uad6c\uc870\uc801\uc73c\ub85c": 21, "\ucd08\uae30\ud654\ub41c": [21, 24, 31, 39], "10\uac1c\uc758": 21, "\ucc44\ub110\uc744": [21, 31, 35], "\ubc1b\uc544\ub4e4\uc774\ub3c4\ub85d": 21, "\ucc44\ub110\uc740": 21, "\uac00\uc911\uce58\uc5d0\uc11c": 21, "\uc218\uc815\ub418\uc9c0": 21, "\ub808\uc774\uc5b4": [21, 31, 35], "\uccb4\ud06c\ud3ec\uc778\ud2b8\ub85c": 21, "\ucd08\uae30\ud654\ub41c\ub2e4": 21, "\uc778\ucf54\ub354\ub294": [21, 44], "\uccb4\ud06c\ud3ec\uc778\ud2b8\uc5d0\uc11c": 21, "\ub85c\ub4dc\ub41c\ub2e4": 21, "\ucd08\uae30\uc5d0": [21, 46], "\uc2e0\ud638\uac00": 21, "\uae30\uc5ec\ud558\uc9c0": [21, 34], "\ud30c\uc778\ud29c\ub2dd\ub41c\ub2e4": 21, "phase": [21, 61], "\ud29c\ub2dd\ud558\uc5ec": 21, "\ud569\uc131\ud55c\ub2e4": 21, "\ub514\ucf54\ub354\ub97c": [21, 31], "\uac1c\uc120\ud558\uc5ec": 21, "\ucd94\ub860\uc5d0": 21, "identity\ub97c": [21, 24], "\ubcf4\uc874\ud558\uace0": [21, 43], "\uc720\uc9c0\ud558\ub824\uba74": 21, "\uc0d8\ud50c\ubcc4": 21, "\ud30c\uc778\ud29c\ub2dd\uc774": [21, 42], "\ud544\uc218\uc801\uc774\uc5c8\ub2e4": 21, "\ud504\ub808\uc784\uacfc": [21, 31], "\ud6c8\ub828\ud558\uba74": 21, "\uace0\ucc29": 21, "stick": 21, "\uc544\ud2f0\ud329\ud2b8\uac00": [8, 21, 38], "\ube44\ub514\uc624\uc5d0": [21, 31], "\ud06c\ub86d\uc744": 21, "\ud3ec\uc988\uc30d\uc744": 21, "\uc99d\uac15\ud55c\ub2e4": 21, "\uc120\uba85\ud558\uace0": 21, "\ubcf5\uad6c\ud558\ub294": 21, "\ud30c\uc778\ud29c\ub2dd\uc758": 21, "\uc911\uc694\uc131": 21, "\ucd94\ub860\uc2dc": [21, 31], "\uc77c\ub828\uc758": [21, 29, 43], "\ud3ec\uc988\uc5d0\uc11c": 21, "\ud504\ub808\uc784\ubcc4\ub85c": 21, "\uc774\uc911": [21, 54], "dual": [21, 32], "\uc2dc\uc5d0": [21, 32, 33, 55, 61], "\uc218\uc815\ub41c\ub2e4": 21, "s_p": 21, "\uac00\uc774\ub358\uc2a4": [21, 35], "\uc6e8\uc774\ud2b8": 21, "\ucee8\ub514\uc154\ub2dd\uc774": 21, "\uacbd\uc6b0\uc640": [21, 28], "\uc6e8\uc774\ud2b8\ub97c": 21, "\uc870\uc815\ud574\uc11c": 21, "\ucda9\uc2e4\ub3c4\ub97c": [21, 24, 35], "\ubcf4\uc7a5\ud558\uace0": 21, "\uc815\ub82c\uc744": [8, 21], "\ubcf4\uc7a5\ud55c\ub2e4": [21, 36], "\uac00\uc774\ub4dc\ub97c": 21, "\uac15\ud654\ud558\ub294": 21, "\uc5d0\uc678\ub3c4": 21, "\ubc29\uc9c0\ud55c\ub2e4": 21, "\ubc30\uce58\uc0ac\uc774\uc988": [21, 38], "accumul": [21, 50], "1500": 21, "pndm": [21, 26], "\uc0d8\ud50c\ub7ec": 21, "100step": 21, "339\uac1c\uc758": 21, "\uc18d\ub3c4\ub294": [21, 42], "\ucd08\ub2f9": [21, 31], "30\ud504\ub808\uc784\uc774\uba70": 21, "\uae38\uc774\ub294": 21, "12\ucd08": 21, "\uc911\uc5d0\ub294": [21, 24, 32], "\ube44\ub514\uc624\ub85c\ubd80\ud130": [21, 32], "densepose\ub97c": 21, "\uc774\uc6a9\ud574\uc11c": [8, 21, 28, 32, 42, 43], "\uacc4\uc0b0\ud558\uc600\ub2e4": 21, "\uacf5\uac1c\uc801\uc73c\ub85c": [21, 43], "mraa": 21, "thin": 21, "plate": 21, "spline": 21, "mothion": 21, "tpsmm": 21, "\uc218\uce58\uc801": 21, "\uc815\uc131\uc801\uc778": [21, 56], "\uc2a4\ud06c\ub9bd\ud2b8\uc640": 21, "\uad8c\uc7a5": [21, 34], "\uc5d0\ud3ed": 21, "\ud559\uc2b5\ud558\uc600\ub2e4": 21, "avd": 21, "\ubaa8\ub4dc\uc5d0\uc11c": 21, "\ud14c\uc2a4\ud2b8": [21, 31, 35, 47], "\uc2a4\ud06c\ub9bd\ud2b8\ub97c": 21, "pidm\uacfc\ub3c4": 21, "pidm\uc758": 21, "deepfashion": 21, "\uccb4\ud06c\ud3ec\uc778\ud2b8\ub97c": 21, "\uc2a4\ud15d\uc744": 21, "\uc2e4\ud589\ud558\uc600\ub2e4": 21, "\ub3d9\uc601\uc0c1\uc5d0": [21, 31], "\ud504\ub808\uc784\uc5d0\uc11c": [21, 44], "50\ud504\ub808\uc784": 21, "\uc774\uc0c1": 21, "\ub5a8\uc5b4\uc838": [21, 54], "50\uac1c\uc758": 21, "\ucd94\ucd9c\ud558\uc5ec": [21, 26, 33], "\ud14c\uc2a4\ud2b8\ud558\uc600\ub2e4": 21, "mraa\uc640": 21, "tpsmm\uc740": 21, "drive": [21, 56], "video\uc5d0\uc11c": 21, "feautre\uc5d0": 21, "\uc758\uc874\ud558\ub294": [21, 35], "uv": [21, 28], "\uc2dc\ud000\uc2a4\uc5d0\ub9cc": 21, "\uc758\uc874\ud55c\ub2e4\ub294": 21, "\uc720\uc758\ud558\ub77c": 21, "\ub124": [21, 35, 57], "\ubaa8\ub450\uc5d0\uc11c": [21, 26, 34], "\ucde8\ud560": 21, "\uc637\uac10": 21, "\uc8fc\ub984": 21, "\ubbf8\uc138\ud55c": [21, 43], "\ud328\ud134\uc774": 21, "\uc190\uc2e4\ub418\ub294": [21, 43], "\ubcc0\uacbd\ud558\ub294": [21, 55], "mraa\ub294": 21, "\ud314": 21, "\ub2e4\ub9ac\uac00": 21, "\ubd84\ub9ac": [21, 40], "pidm\uacfc\uc758": 21, "\uc5bc\uad74\uc758": 21, "\uc5bc\uad74\uc744": 21, "\ud569\uc131\ud558\uc9c0\ub9cc": 21, "\uc77c\uce58\ud558\uc9c0": 21, "\uc637\ucc28\ub9bc\uc774": 21, "\ud504\ub808\uc784\ub9c8\ub2e4": 21, "\ub2ec\ub790\ub2e4": 21, "pidm\uc774": 21, "\ud569\uc131\uc5d0\uc11c\ub294": 21, "\ube44\uad50\ud55c\ub2e4": [21, 28, 31, 54], "NO": 21, "\ud30c\uc778\ud29c\ub2dd\ud558\uc9c0": 21, "\ubc84\uc804": 21, "\ud3ec\uc988\ub9cc": 21, "\uc5f0\uacb0\ud55c": 21, "identity\uc5d0": 21, "\uc5c6\uc5c8\ub2e4": [21, 43], "\uc778\ucf54\ub354\ub85c": 21, "\uad50\uccb4\ud55c": 21, "\ub514\ud14c\uc77c\uc740": 21, "\ucea1\ucc98\ud560": 21, "\uc678\ud615\uc5d0": 21, "\ud30c\uc778\ud29c\ub2dd\ud558\uba74": 21, "\ub514\ud14c\uc77c\uc758": 21, "\uc120\uba85\ub3c4\uac00": 21, "\ud5a5\uc0c1\ub418\uace0": [21, 35], "\uc624\ubc84\ud53c\ud305\uc774": 21, "\ubc1c\uc0dd\ud558\uc9c0": 21, "\uc785\ub825\ud558\uba74": [21, 36], "\ud314\uacfc": 21, "\uba38\ub9ac\uce74\ub77d": 21, "\uc8fc\ubcc0\uc5d0\uc11c\uc758": 21, "\ud615\ud0dc\uac00": [8, 21, 36, 39], "\uae5c\ubc15\uc774\ub294": [21, 31], "\ub098\ud0c0\ub0ac\ub2e4": 21, "\uc7a5": [21, 38], "\ub123\uc5b4\uc11c": 21, "\ud30c\uc778\ud29c\ub2dd\ud560": 21, "\ucd94\uac00\ud558\uba74": [21, 22, 30, 42], "\ud5a5\uc0c1\ub41c\ub2e4": 21, "\uc0ac\ub840": 21, "\ub4dc\ubb38": 21, "\uacbd\uc6b0\uc9c0\ub9cc": 21, "\ud314\ub2e4\ub9ac\uac00": 21, "\uc637": 21, "\uc18d\uc73c\ub85c": 21, "\uc0ac\ub77c\uc9c0\uace0": 21, "hallucin": [21, 31], "feature\uac00": 21, "\ud3ec\uc988\uac00": 21, "\ub4a4\ub97c": 21, "\ud5a5\ud560": 21, "\uad00\ucc30\ub41c\ub2e4": 21, "\ud328\ud134\uc758": 21, "\uc637\uc5d0\uc11c": 21, "\ud328\ud134\uc5d0\uc11c": 21, "\uae5c\ubc15\uc784": 21, "\ub3d9\uc791\uc744": 21, "vae\uc5d0": 21, "\ub290\ub9ac\ub2e4": [21, 49], "\ud30c\uc778\ud29c\ub2dd\uc740": 21, "\ud504\ub808\uc784\ub2f9": [21, 31], "18\ucd08\uc758": 21, "\uc678\uc758": 21, "10\ubd84": 21, "\ub514\ucf54\ub354\uc758": 21, "20\ubd84\uc774": 21, "\uc18c\uc694\ub41c\ub2e4": 21, "\uc2a4\ud2f8": 21, "\uc12c\uc720": 21, "05511": 22, "task\uc758": [22, 30], "\uafb8\uc900\ud788": 22, "\ubc1c\uc804\uc911": 22, "\uae30\uc874\uc5d0\ub294": [22, 27, 44, 53], "stylegan\uacfc": 22, "\uc8fc\ub97c": [22, 40], "\uc774\ub918\uc9c0\ub9cc": 22, "\ubca0\uc774\uc2a4\ub85c": [22, 40], "\ucd94\uc138\uac00": 22, "\uae09\uaca9\ud558\uac8c": [22, 25], "\ubc14\ub00c\uc5b4\ubc84\ub9bc": 22, "\uc7a1\ub294\uac83\uc740": 22, "\ubb34\ub9ac\uc77c\uae4c": 22, "gigagan\uc740": 22, "\uc18d\ub3c4\uc810": 22, "\uc18d\ub3c4\uc801": 22, "512px\uc758": 22, "13\ucd08\ub9cc\uc5d0": 22, "megapixel": [22, 44], "1600\ub9cc": 22, "4k": [22, 35, 39], "66\ucd08\ub9cc\uc5d0": 22, "\ud65c\uc6a9\uc131": 22, "\uc0c1": [22, 28], "iteration\uc774": [22, 33], "\ub4e4\uc5b4\uac00\ub294\ub370": 22, "iteration\uc740": 22, "\uc548\uc815\uc131\uc744": 22, "\ud0a4\uc6cc\uc8fc\ub294": 22, "cost\uac00": [22, 26, 27, 30], "\ud55c\ub2e4\ub294": [22, 33, 47, 61], "\ub2e8\uc810\uc774": [22, 23, 26, 37, 40, 46, 56, 61], "pass\ub9cc": 22, "\ud544\uc694\ud558\ubbc0\ub85c": 22, "\uc54a\ub2e4\ub294": [22, 26], "object\uc758": [22, 28, 43], "class\uac00": 22, "\uba85\ud655\ud788": [22, 51], "\uc815\uc758\ub418\uc9c0\uc54a\uc740": 22, "develop\ud55c\ub2e4\uba74": 22, "\ub118\uc5b4\uc124": 22, "\uc788\uc744\uae4c": [22, 42], "img": [22, 38, 51], "66": [22, 39], "space\uc0c1\uc5d0\uc11c\uc758": 22, "stylegan2": 22, "stylegan2\ub85c": 22, "\uc120\uc815": 22, "distribution\uc5d0\uc11c\uc758": 22, "disentangle\ub41c": 22, "gigagan\uc5d0\uc11c\ub294": 22, "network\uc758": [22, 26, 39], "z\uc640": 22, "\ud53c\ub77c\ubbf8\ub4dc": 22, "block\ub4e4\ub85c": 22, "\uac12\uc73c\ub85c\ubd80\ud130": 22, "layer\ub9c8\ub2e4": 22, "\ub4e4\uc5b4\uac00\uc11c": [22, 41, 43], "scaling\ud568\uc73c\ub85c\uc368": 22, "demodul": 22, "select": [22, 32], "size\ub9cc": 22, "up\uc744": 22, "\uc548\ub418\ub294": [8, 22, 47], "\uc624\ud508": 22, "\uad6c\uc870\uc0c1": 22, "\ub808\uc774\uc5b4\uc0c1\uc5d0": 22, "filter\uac00": 22, "\uc8fc\uc785\ubd80\ud130": 22, "\uc0dd\uc131\uae4c\uc9c0": 22, "\ucc38\uc5ec": [22, 62], "\ud45c\ud604\ub825\uc744": 22, "\ub5a8\uc5b4\ud2b8\ub9b4": 22, "\ub808\uc774\uc5b4\ub9c8\ub2e4": 22, "k_": 22, "set\uc744": 22, "w\uc758": 22, "\uac70\uce5c": [22, 23, 44, 48, 53], "kernel\uac12\uc5d0": 22, "summation\ud55c": 22, "filter\ub85c": 22, "\uacc4\uc0b0\uc5d0": 22, "softmax\ub97c": 22, "\uc598\ub294": 22, "differentiable\ud558\ubbc0\ub85c": 22, "kernel\uc744": 22, "\ub54c\ubcf4\ub2e4": [22, 35, 42, 46], "\uc808\uc57d\ub41c\ub2e4\ub294": 22, "interleav": 22, "filter\ub294": 22, "recept": 22, "\ub0b4\ubd80\uc758": [22, 28], "\ucea1\ucc98\uc5d0\ub294": 22, "\ud0c1\uc6d4\ud558\uc9c0\ub9cc": 22, "\uc678\ubd80\uc758": 22, "\ud55c\uacc4\uc810\uc744": [22, 46, 56], "\uadf9\ubcf5\ud558\uae30\uc704\ud574": 22, "stylegan2\uc5d0": 22, "\uc774\uc0c1\uc774": 22, "\uc0dd\uae40": [8, 22], "\uc6d0\uc778\uc740": 22, "product\uac00": 22, "lipschitz\ud568\uc218\uac00": 22, "\uc544\ub2c8\uae30": 22, "lipschitz": 22, "\ud568\uc218\ub780": 22, "\ube44": 22, "\uc774\uc0c1\uc73c\ub85c": 22, "\uc99d\uac00\uc2dc\ud0a4\uc9c0": 22, "\ub9cc\uc871\ud558\uc9c0": 22, "\ubabb\ud568\uc73c\ub85c\uc368": 22, "unstabl": [22, 51, 58], "\uc2e4\ud328\ud55c\ub2e4": 22, "\ub9cc\uc871\uc2dc\ud0a4\uae30": 22, "attention\uc758": [22, 26, 29], "product\ub97c": 22, "l2": [22, 39, 45, 46], "distance\ub85c": 22, "advanc": [22, 53], "stylegan2\uacfc": 22, "\ucd08\uae30\uac12\uc744": [22, 39, 55], "attentnion": 22, "\uc790\uc2e0\uc758": [22, 31], "\ud559\uc2b5\uc6a9": 22, "\uc8fc\uc785\uc6a9": 22, "gigagan": 22, "\uc720\uc5f0\uc131\uc744": [22, 43], "output\uc740": [22, 48], "size\uc640": 22, "\ub098\ub220": [22, 28, 44], "index\uc758": 22, "catch": 22, "word\ub97c": [22, 43], "global\ud558\uac8c": 22, "embedding\ud558\ub294": 22, "generator\uc758": [22, 57], "discrimin": [22, 41, 44, 45, 47, 51, 56], "branch\uc758": 22, "conditioning\uc744": [22, 23, 38], "generating\uc744": 22, "c\ub85c\ubd80\ud130": 22, "stylegan\uc5d0\uc11c\ub294": 22, "res\uc758": 22, "\uc54a\uc558\uc9c0\ub9cc": [22, 45], "\ud45c\ud604\uc758": [22, 34], "extractor": 22, "rightarrow": [22, 34, 36, 39, 48, 56, 58], "conv": [22, 44, 56], "level\uc5d0\uc11c\ub294": [22, 38], "level\uc5d0\uc11c\uc758": 22, "resolution\uc5d0\uc11c": [22, 42], "\uacc4\uc0b0\uc774": [22, 51, 54, 60], "\uc77c\uc5b4\ub098\uae30\ub54c\ubb38\uc5d0": 22, "ij": 22, "psi": [22, 27, 48, 54], "conv_": [22, 31], "\uc2ec\uc740": 22, "\ud569": 22, "\uc55e\ubd80\ubd84\uc740": 22, "\ub4b7\ubd80\ubd84\uc740": 22, "awar": [22, 44], "\uc55e\uc758": 22, "\ub9ac\uc5bc\ud55c\uc9c0": 22, "\uac00\uae4c\uc6b4\uc9c0\uc5d0": 22, "\ucd08\ubc18\uc5d0\ub294": [22, 25, 33], "\uc0c1\uad00\uc5c6\uc774": [22, 34], "\ud004\ub9ac\ud2f0\ub85c\ub9cc": 22, "\ud574\ubc84\ub9bc": 22, "\uac15\uc81c\ub85c": 22, "\ud558\uae30\uc704\ud574": 22, "fake": [22, 51, 57], "pair\ub85c": 22, "\uc9c0\uc815": [22, 27], "constrast": 22, "\uba40\uac8c": 22, "\ubca1\ud130\uc640\ub294": 22, "condition\uc758": [22, 26, 37], "vector\uc640\ub294": 22, "\ud559\uc2b5\ub418\uc5b4\uc57c\ud55c\ub2e4": 22, "aid": 22, "2112": [22, 23, 29], "09130": 22, "stylegan\uc5d0\uc11c": 22, "discriminator\ub294": 22, "overfitting\ub418\ub294": 22, "\uc774\ubd80\ubd84\uc744": 22, "\ud574\uacb0\ud558\uae30\uc704\ud574": 22, "sota\uc758": [22, 54], "discriminator\uc5d0": 22, "fake\ub97c": 22, "\ubd84\ub958": [22, 35, 42, 47], "\uc801\uc6a9\uc774": [22, 24, 30, 41], "64x64\uc758": 22, "3\ubc88": 22, "6\ubc88": 22, "1024x1024\uc758": [22, 42], "gigagan\uc758": 22, "\uc801\uc6a9\ud560\ub54c\uc5d0\ub294": 22, "\uc0dd\uc131\uacfc\uc815\uc911\uc5d0": 22, "real\ud568\uc744": 22, "laion2d": 22, "en": 22, "coyo": [22, 26], "700m": [22, 26], "adob": 22, "stock": 22, "machin": [22, 42, 51], "method\uac00": 22, "\ud6a8\uacfc\uac00": [22, 28, 36, 41, 59], "\uc788\ub294\uac00": 22, "\ub2e8\uc21c": 22, "up\ubcf4\ub2e4": 22, "method\ub4e4\uc744": 22, "\uc218\uce58\ub97c": [22, 25, 37, 49], "text2imag": 22, "time\uc744": 22, "\uc5b4\ub290\uc815\ub3c4": [22, 25], "\uc774\ub8e8\uba70": 22, "\uacbd\uc7c1\ub825\uc744": 22, "diffusion\uacfc": [22, 27, 28, 38, 54], "diffutsion\uc758": 22, "\uac1c\uc120\uc744": [22, 36, 46], "distilation\ud55c": 22, "\uc218\uce58\uc801\uc73c\ub85c\ub3c4": 22, "\uc6b0\uc704\uc5d0": 22, "time\ub3c4": 22, "\ube60\ub974\ub2e4": [22, 35, 36], "upscal": [22, 38, 44], "md": [22, 33], "table4": 22, "src": [22, 40], "pic": 22, "img14": 22, "png": [22, 51], "alt": 22, "bg": 22, "primari": 22, "mb": 22, "700px": 22, "stylegan\uc5d0": 22, "\uc5f0\uad6c\ub41c": 22, "runcat": 22, "trick": [22, 40], "\uc0c1\uc73c\ub85c\ub294": 22, "imagen\uacfc": [22, 26], "\ube44\uad50\ud558\uba74": [22, 26, 38], "develop\uc774": 22, "\ud544\uc694\ud568": [22, 49], "failur": [22, 53], "toward": 23, "icml": [23, 46, 48], "10741": 23, "e\ubcf4\ub2e4": [23, 39], "\ud3c9\uac00\uac00": 23, "\uc6b0\uc218\ud558\ub2e4\uace0": [23, 56], "powerful\ud55c": 23, "natur": [23, 36, 46], "language\ub85c": 23, "realistic\ud55c": 23, "\ubc29\ubc95\ub4e4\uc774": [23, 32, 41], "\uc0dd\uaca8\ub098\uace0": 23, "\ub300\uc751\ud558\ub294": [8, 23, 28, 47], "\uc0dd\uc131\ud558\uae30\uc5d0\ub294": 23, "\uc0dd\uc131\ubaa8\ub378\uc758": [23, 26, 42, 57], "\ub5a0\uc624\ub974\uba70": 23, "sota\ub97c": [23, 31, 37, 40, 42, 48, 49], "\ucc0d\uc5c8\ub2e4\uace0": 23, "conditional\ud55c": 23, "\uc774\ub8e8\uc5b4\uc84c\ub294\ub370": 23, "beat": 23, "synthesis\ub77c\ub294": 23, "noise\ud55c": 23, "class\ub97c": 23, "sampling\uacfc\uc815\uc5d0\uc11c": 23, "label\uc5d0": 23, "control\uc2dc\ud0a4\ub294": 23, "classifier\uc5c6\uc774": 23, "\uc18c\uac1c\ub418\uc5c8\ub2e4": 23, "guidance\ub77c\ub294": 23, "\uc81c\uc2dc\ud558\uba70": 23, "guidance\uc640": 23, "\uacb0\uacfc\uc801\uc73c\ub85c\ub294": [23, 26], "guidance\uac00": [23, 26, 28, 39, 52], "\ubcf4\uc778\ub2e4\uace0": [23, 50, 56], "shot\uc73c\ub85c": [23, 28], "\uc0dd\uc131\ud558\ub294\ub370\uc5d0": [23, 39], "\ubcf4\uc600\uc73c\ub098": 23, "photorealistc\ud55c": 23, "\uc0dd\uc131\ud558\ub294\ub370\ub294": [23, 51, 61], "\uacaa\uc744": 23, "generation\ubfd0\ub9cc": 23, "\ud3b8\uc9d1\ud560": 23, "impainting\uae30\ub2a5\ub3c4": 23, "impaint": [23, 53], "\ubc29\ud5a5\uc131\uc744": 23, "\ub764\ub2e4\ub77c\uace0": 23, "\uc8fc\uc7a5\ud55c\ub2e4": 23, "proport": 23, "find": 23, "improv": [23, 28, 52], "constant\uac12\uc73c\ub85c": 23, "\uace0\uc815\uc2dc\ud0a8": [23, 56, 59, 61], "learnabl": [23, 26, 41, 50, 57, 59], "\uc124\uc815\ud558\uc5ec": [23, 28, 53], "step\ub9cc\uc73c\ub85c": 23, "sample\uc744": [8, 23, 26, 36, 39, 49], "dharwial": 23, "image\uc0dd\uc131\uc744": 23, "\ub17c\ubb38\uc5d0\uc11c\uc758": 23, "guidance\uc774\ub2e4": 23, "\uc720\uc9c0\ud558\ub418": 23, "classifier\uc758": [23, 26, 49, 52], "\uacfc\uc815\uc758": 23, "score\uc5d0\uac8c": 23, "guide\ub97c": [23, 37], "\uc18c\uac1c\ub418\uc5c8\ub294\ub370": 23, "classifiy\ub97c": 23, "\ud574\uc57c\ud558\ubbc0\ub85c": 23, "\uaddc\ubaa8\uac00": [23, 42], "heavy\ud574\uc9c0\ub294": 23, "\ubb38\uc81c\uc810\uc744": [23, 28, 37, 40, 48, 50], "\uac1c\uc120\uc810\uc744": [23, 34], "\uae30\ubc95\uc73c\ub85c": [23, 50, 58, 59], "\uc2dd\uc5d0\uc11c": 23, "\ubcc0\ud615\uc744": [23, 43], "model\ub9cc\uc73c\ub85c": 23, "\uc30d\uc73c\ub85c": [23, 31, 53], "\uc774\ub8e8\uc5b4\uc9c4": [23, 31], "learning\uc744": [23, 48], "\uc9c4\ud589\uc2dc\ud0a8": 23, "\uc758\ubbf8\ub97c": [23, 33, 40, 42], "pair\uc5d0": 23, "\ucee4\uc9c0\ub3c4\ub85d": 23, "\uc791\uc544\uc9c0\ub3c4\ub85d": 23, "guidance\uc5d0\uc11c\ub294": 23, "guidance\uc5d0\uc11c": 23, "classifier\ub300\uc2e0\uc5d0": 23, "clip\ubaa8\ub378\uc744": 23, "classifier\ub300\uc2e0": 23, "\uad6c\ud55c": [23, 28], "x\uc640": [23, 24], "billion": 23, "resolution\uc744": [23, 31, 49, 54], "\ub610\ub2e4\ub978": [23, 27, 37], "256x256\uc73c\ub85c": [23, 42], "\uc99d\uac00\uc2dc\ud0a4\ub294\ub370": 23, "\uc0ac\uc6a9\ud558\uc600\ub2e4\uace0": [23, 39, 42], "base\ub85c": 23, "\uc9c4\ud589\ud558\uc600\ub2e4": [23, 28, 29, 31, 39], "\uc218\ud589\ud574\uc57c\ud55c\ub2e4": 23, "condition\uc73c\ub85c": [8, 23, 26, 33, 38, 39, 49], "\uc8fc\uae30": [23, 38], "k\uac1c\uc758": [23, 35], "token\uc73c\ub85c": [23, 43], "encoding\ud55c": 23, "input\uac12\uc73c\ub85c": 23, "\ub123\uc5b4\uc900\ub2e4": [23, 26], "output\uc758": 23, "token\uacfc": [23, 48], "token\uc744": [23, 26, 28, 39, 48], "\uc5f0\uc0b0\ud558\uace0\uc790": 23, "adain\uae30\ubc95\uc744": 23, "block\uc758": 23, "\ub3c4\ucd9c\ud55c\ub2e4": 23, "block\ub4a4\uc5d0": 23, "\ubd99\ub294": 23, "e\uc640": [23, 39], "\uc0ac\uc6a9\ud558\uc600\uace0": [23, 45, 46, 59, 61], "architecture\ub85c\ub294": 23, "up\ub41c": 23, "2b": [23, 26], "paremeters\ub97c": 23, "transformer\ub97c": [23, 26], "upsampling\ud558\ub294": 23, "model\ub3c4": 23, "upsampler\uc640": 23, "\ube44\uc2b7\ud558\ub2e4\uace0": 23, "\uc9c4\ud589\ud588\uc744\ub54c\ub294": 23, "\uc900": [23, 41, 49], "condition\uc5d0": [8, 23], "sequence\ub97c": [23, 39], "impainting\uc744": 23, "\uac70\uce58\uc9c0": 23, "sampling\uc744": [23, 25, 29, 39, 48], "\uc54c\ub824\uc9c4": 23, "\uc601\uc5ed\uc5d0": [23, 34], "\uc0ac\uc6a9\ud588\uae30\uc5d0": 23, "\ucc38\uc870\ud560": [23, 53], "tuning\uacfc\uc815\uc5d0\uc11c": 23, "example\uc758": 23, "\uc9c0\uc6b4\ub2e4\uc74c": 23, "\uc815\ubcf4\ub85c\uc11c": 23, "\ucc44\ub110\uacfc": [23, 31, 34], "\uc785\ub825\ub418\ub3c4\ub85d": 23, "\uc124\uacc4\ud558\uc600\ub2e4": 23, "guidance\uc5d0": 23, "\uc801\ud569\ud558\uac8c": 23, "\ud6c8\ub828\uc2dc\ud0a4\uae30": [23, 24], "\ube44\uad50\ud588\uc74c\uc744": 23, "\uc5b8\uae09\ud588\ub2e4": 23, "\uc0ac\uc6a9\ud558\uae30": [23, 26, 30, 35, 39, 54], "models\ub97c": 23, "\uc0ac\uc6a9\ud588\uc74c\uc744": 23, "\ubc1d\ud78c\ub2e4": 23, "\uc5b8\uae09\ud588\ub4ef\uc774": [23, 28], "\uc88b\uc558\ub2e4\uace0": 23, "precision\uacfc": [23, 29], "recal": [23, 25, 29, 46], "score\uc640": [23, 35, 42], "trade": [23, 27, 29, 33, 46, 49, 54], "off": [23, 28, 29, 33, 46, 49, 54], "\uad00\ucc30\ud558\uace0": 23, "\uc5b8\uae09\ud55c\ub2e4": 23, "\ucd5c\uc801\uc73c\ub85c": 23, "\uc218\ud589\ub418\uc5c8\uc73c\uba70": 23, "\ubc29\ubc95\uc784\uc744": 23, "\ud5a5\uc0c1\uc2dc\ud0ac": 23, "\uc911\uc810\uc744": [23, 35, 48], "caption\uacfc": [23, 26, 28], "\uc77c\uce58\uc2dc\ud0a4\ub294": [23, 57, 58], "\ub6f0\uc5b4\ub098\uc9c0": 23, "\uc54a\uc744": [23, 35, 56], "\uac00\uc124\uc744": 23, "\ud3c9\uac00\uc790\ub97c": 23, "\uc9c4\ud589\ud558\uc600\uace0": 23, "\uc778\uac04\ub4e4\uc774": 23, "\uc810\uc218\uc640": [23, 29], "\uc758\uacac\uc744": 23, "guida": 23, "nce\uac00": 23, "\uc0dd\uc131\ud55c\ub2e4\uace0": [23, 47], "\ud310\ub2e8\ud588\ub2e4": 23, "table1\uc740": 23, "unguid": 23, "evaluation\uc744": [23, 31], "\ud56d\ubaa9\uc5d0": [23, 28], "\uc555\ub3c4\uc801\uc778": [23, 48], "table2\ub294": 23, "glide\uc640": 23, "model\ub4e4\uc744": 23, "\ud45c\uc774\ub2e4": 23, "\uad6c\ud558\uc600\ub2e4": 23, "coco\uc5d0": 23, "\uacbd\ud5d8\uc774": 23, "\ub5a0\uc624\ub974\uace0": 24, "\uc8fc\uc81c\uc785\ub2c8\ub2e4": 24, "\ub9e5\ub77d\uacfc": 24, "\uc9c4\ud589\ub418\uc5c8\uae30": 24, "\uc77d\uc5b4": 24, "\ubcf4\uc2dc\uae30\ub97c": 24, "\ucd94\ucc9c\ub4dc\ub9bd\ub2c8\ub2e4": 24, "contribution\uc740": [24, 28, 41], "3\uac00\uc9c0\ub85c": 24, "lighweight": 24, "dreambooth\uc758": 24, "\uc720\uc9c0\ud558\uba74\uc11c": [24, 31, 32, 38, 41], "\ud06c\uae30\ub97c": [24, 38, 49, 59], "\uc904\uc774\uace0": 24, "hyperdreambooth\ub97c": 24, "\uad6c\ud604\ud588\uc9c0\ub9cc": 24, "fidelity\uac00": [24, 36, 40, 43, 49, 52], "\ub5a8\uc5b4\uc9c0\uac70\ub098": 24, "hypernetwork\ub97c": 24, "via": [24, 33, 62], "finetuning\uc5d0": 24, "svdiff": 24, "styledrop": 24, "dreamartist": 24, "\uc608\uc2dc\uac00": 24, "\ub290\ub9ac\ub2e4\ub294": [24, 58], "\ub2e8\uc810\uc744": [24, 26, 36, 51, 54, 55, 58, 60], "\uc5f0\uad6c\ub4e4\uc744": [24, 26], "hyperdreambooth\ub294": 24, "\uc774\ub8e8\uc5c8\ub2e4\uace0": 24, "\uc774\uc804\uc5d0": [24, 26, 43, 49, 57], "dreambooth\ub294": 24, "hyperdreambooth\uc758": 24, "\uc601\uac10\uc6d0": 24, "\ud558\ub098\ub85c": [24, 30, 35, 39, 43, 59, 60], "\ud65c\uc6a9\ub418\uc5c8\uc2b5\ub2c8\ub2e4": 24, "\ub7ad\ud06c\uc758": 24, "\uadfc\uc0ac\ud654\ud558\uc5ec": 24, "\ud06c\uae30\uc640": [24, 25], "\ubc29\ubc95\uc785\ub2c8\ub2e4": [24, 41, 47], "personalization\uc774": 24, "\uc0b4\ud3b4": 24, "contribution\uc758": 24, "\uc0b4\ud3b4\ubcf4\ub3c4\ub85d": [24, 60], "\uae30\uc220": [24, 34, 35, 38, 42], "\ud558\ub098\uc778": [24, 42], "\uc904\uc5ec\uc11c": 24, "lidb\uc5d0": 24, "\uc124\uba85\ub4dc\ub9ac\uaca0\uc2b5\ub2c8\ub2e4": 24, "lidb\ub294": 24, "residuals\uc758": 24, "\uc138\ubd84\ud654\ud558\ub294": 24, "\uc544\uc774\ub514\uc5b4\uc785\ub2c8\ub2e4": 24, "orthogon": [24, 36], "basis\ub97c": 24, "a\uc640": 24, "\ud589\ub82c\uc744": 24, "\ubd84\ud574\ud558\ub294": 24, "\uac83\uc73c\ub85c\ub3c4": 24, "\uc774\ud574\ud560": [24, 33], "\uad6c\uccb4\uc801\uc73c\ub85c": 24, "a_": [24, 28], "aux": [24, 40], "\ubd84\ud574\ub418\uba70": 24, "b_": [24, 25], "\ubd84\ud574\ud560": 24, "\ud589\ubcc4\ub85c": 24, "\uc9c1\uad50\ud558\ub294": 24, "\ubca1\ud130\ub85c": [24, 43], "\ucd08\uae30\ud654\ub418\uace0": [24, 31], "\ud559\uc2b5\ub418\ub294": 24, "\uac00\uc911\uce58\uc785\ub2c8\ub2e4": [24, 56], "\uc120\ud615": [24, 27, 35, 54], "residual\uc740": 24, "w_x": 24, "experiment": [24, 46, 51, 60], "\ub418\uc5c8\uc73c\uba70": [24, 31], "\uac1c\uc218\ub294": 24, "30k\uac1c": 24, "\uc0ac\uc774\uc988\ub294": 24, "120kb\ub85c": 24, "\uacbd\ub7c9\ud654": [24, 27], "\ubcc0\uc218\ub9cc\uc73c\ub85c": 24, "\ud3ec\uc778\ud2b8\uc785\ub2c8\ub2e4": 24, "\ub2e4\uc74c\uc740": 24, "\ub098\ud0c0\ub0b4\uba70": [24, 34, 35], "\uc544\uc774\ub514\uc5b4\ub294": 24, "x\ub97c": 24, "lidb\uc758": 24, "residual\uc778": 24, "h_": [24, 38], "\ub3cc\uc785\ud558\ub294": 24, "hypernetwork\ub294": 24, "\ud6c8\ub828\ub418\uba70": 24, "paramters\uc785\ub2c8\ub2e4": 24, "\uad00\ub828\ub41c": [24, 28, 29, 43], "\uc870\uc815\ub429\ub2c8\ub2e4": 24, "\ub098\ud0c0\ub0c5\ub2c8\ub2e4": [24, 47], "supervisori": 24, "\uc124\uc815\ub41c": 24, "\uac1c\uc778\ud654\uc5d0": 24, "\uc0c1\ub300\uc801\uc778": [24, 36, 61], "\ud56d\ubaa9\uc758": 24, "\uc9c0\uc6d0\ud558\uae30": 24, "\uc785\ub825\uc785\ub2c8\ub2e4": 24, "\uc9c0\uc2dc\uc0ac\ud56d": 24, "hyperdreambooth\uc5d0\uc11c\ub294": 24, "\ub4dc\ubb3c\uc9c0\ub9cc": 24, "\uc0bd\uc785\ud560": [24, 43], "hyperdreambooth\uc5d0\uc11c": 24, "\uad6c\uc131\ub418\uba70": [24, 38], "\ud558\ub098\uc785\ub2c8\ub2e4": 24, "\uac00\uc911\uce58\uc5d0": 24, "\ub354\ud558\uc5ec": [24, 39, 61], "\uac1c\uc778\ud654\ub97c": 24, "\uc2e4\ud589\ud569\ub2c8\ub2e4": 24, "\ubc18\ubcf5\uc801": 24, "\uc218\ud589\ud569\ub2c8\ub2e4": 24, "hypernetwork\uac00": 24, "\ubc18\ubcf5\uc801\uc778": [24, 27, 44, 54], "\uac1c\uc120\ud558\ub824\uace0": 24, "\uc2dc\ub3c4\ud558\ub294": 24, "\ubc29\ud5a5\uc131\uc774": 24, "\uc62c\ubc14\ub974\uace0": 24, "\uc5bc\uad74\uacfc": [24, 38], "\ubbf8\uc138\ub9cc": 24, "\uc7a1\uc544\ub0b4\uc9c0": 24, "tuning\ud558\uace0": 24, "\ub54c\uc5d0": [24, 27], "encoding\uc740": 24, "\uc218\ud589\ub418\uba70": 24, "f\ub294": 24, "\uc2e4\ud589\ud558\uace0": 24, "\uc18d\uc131\uacfc": 24, "\ubc29\ud5a5\uc131\uc5d0": 24, "\uc62c\ubc14\ub974\uac8c": 24, "\ub418\uc9c0\ub9cc": [24, 54], "detail\uc740": 24, "dreambooth\ubcf4\ub2e4": 24, "\uc9c0\uc2dc\uc5b4": 24, "c\uc5d0": 24, "\uc870\uc815\ud569\ub2c8\ub2e4": [24, 56], "\uac1c\ub150\uc785\ub2c8\ub2e4": 24, "\uc644\ud654\ud558\uc5ec": 24, "rank\ub85c": [24, 27], "hypernetwork\uc758": 24, "\uc8fc\uccb4\uc758": 24, "\uace0\uc8fc\ud30c\uc218": 24, "\uadfc\uc0ac\ud654\ud560": 24, "\uc5c5\ub370\uc774\ud2b8\ubcf4\ub2e4": 24, "\ub2ec\uc131\ud560": [24, 29], "relaxed\uc758": 24, "\uac1c\ub150\uc740": 24, "\ubc29\uc2dd\ubcf4\ub2e4": 24, "\uc694\uc778\uc785\ub2c8\ub2e4": 24, "\uc5ec\uae30\uc11c\ub3c4": 24, "\uc9c0\uc6d0\ud558\uba70": [24, 35], "\uc5bc\uad74\uc5d0": 24, "\ud2b9\uc131\uacfc": 24, "\ucea1\ucc98\ud558\ub294": 24, "\uace0\ub824\ud560": 24, "40\ubc88\uc758": 24, "\ubc18\ubcf5\uc73c\ub85c": 24, "\uc644\ub8cc\ud560": 24, "\ube44\uad50\ud588\uc744": [24, 29, 37, 42], "25\ubc30": 24, "\uc18d\ub3c4\ub77c\ub294": 24, "\uad6c\ud604\ud588\uc2b5\ub2c8\ub2e4": 24, "5\uc758": [24, 28, 36, 39], "\uc778\ucf54\ub354\ub3c4": 24, "\uac1c\uc778\ud654\ud558\uae30": 24, "\uc2dc\uac01\ud654\uc5d0": 24, "sfhq": 24, "synthet": [24, 34, 45, 46, 62], "headquart": 24, "000\uac1c\uc758": 24, "galleri": 24, "\uc544\ub798\ub85c": [24, 41, 59], "\uc778\uc2a4\ud0c0\uadf8\ub7a8": 24, "\uc140\uce74": 24, "bark": 24, "skin\uc758": 24, "\ub85d": 24, "\uc2a4\ud0c0": 24, "\uc804\ubb38\uc801\uc778": 24, "inversion\uc758": 24, "\ud45c\uc785\ub2c8\ub2e4": 24, "dino\uc640": 24, "\uc9c0\ud45c\ub97c": [24, 35, 42, 47, 61], "\ud45c\ub294": [24, 29], "\ubd80\ubd84\uc785\ub2c8\ub2e4": [24, 41, 42], "hyperparameter\ub97c": 24, "\ube44\uad50\ud588\uc2b5\ub2c8\ub2e4": [24, 47, 59], "\ud559\uc2b5\ub960\uc744": 24, "\uc99d\uac00\uc2dc\ud0a4\uace0": 24, "\uac10\uc18c\uc2dc\ud0a4\uba74": 24, "agg": 24, "1\uc740": [24, 29, 49], "400\ubc88\uc758": 24, "\ubc18\ubcf5\uc744": 24, "\uc2dc\ud589\ud558\uace0": 24, "2\ub294": [24, 49], "1200\ubc88": 24, "\uc694\uc18c\ub85c": 24, "\ud558\uc774\ud37c\ub124\ud2b8\uc6cc\ud06c\ub97c": 24, "\ud558\uc774\ud37c\ub124\ud2b8\uc6cc\ud06c": 24, "\uc608\uce21\ub9cc": 24, "1\ubc88\ub9cc": 24, "\ube44\uad50\ud569\ub2c8\ub2e4": [24, 46, 55, 58, 60, 61], "\uc9c0\ud45c\uc5d0\uc11c": 24, "\ub2ec\uc131\ud55c\ub2e4\ub294": 24, "\uba54\ud2b8\ub9ad": 24, "\uc2dc\ub098\ub9ac\uc624\uc5d0\uc11c": 24, "\uc57d\ud558\ub2e4\uace0": 24, "\uc774\ubbf8\uc9c0\uc5d0\ub9cc": 24, "\uc2a4\ud0c0\uc77c\uc5d0\uc11c": 24, "\uc0ac\ub78c\uc744": [24, 47], "\uc778\uc2dd\ud558\ub3c4\ub85d": 24, "\ub54c\ubb38\uc774\ub77c\uace0": [24, 46, 56, 61], "\uc8fc\uc7a5\ud558\uba70": 24, "\ubcf4\uc644\ud558\uae30": [24, 56], "study\ub97c": [24, 36], "\ube44\uad50\ud558\uace0": 24, "\uc0ac\uc6a9\uc790\ub4e4\uc758": 24, "\ubc1b\uc558\uc2b5\ub2c8\ub2e4": 24, "ups\uac00": 24, "\uc874\uc7ac\ud569\ub2c8\ub2e4": [24, 46, 47, 50, 51, 58], "direct": [24, 34, 39, 46], "\uc608\uce21\uc5d0\uc11c": 24, "\uc798\ubabb\ub41c": 24, "\uc2dc\ub9e8\ud2f1": 24, "\ub098\uc62c": 24, "\uc5d0\ub7ec\uc785\ub2c8\ub2e4": 24, "\ub208": [24, 47], "\uc0c9\uae54\uc774\ub098": 24, "\ud5e4\uc5b4": 24, "\ud0c0\uc785": 24, "\uc131\ubcc4": [24, 43], "\ub4f1\uc774": [24, 27, 42, 51, 59], "underfit": 24, "identity\ub294": 24, "\uc9c0\ucf1c\uc9c0\ub354\ub77c\ub3c4": 24, "\uc0d8\ud50c\uc774": [24, 35, 47], "\uc0dd\uc131\ub420": 24, "hypernetwork\uc640": 24, "\uc2a4\ud0c0\uc77c\uc5d0": 24, "\ubb38\uc81c\uc810\uc740": [24, 57], "\ube5b": [24, 34], "ood\uc778": 24, "\uc0d8\ud50c\uc5d0\uc11c": 24, "\ub098\ud0c0\ub0a0": [24, 33], "hyperdreambooth\ub77c\ub294": 24, "\uac1c\uc778\ud654\ud558\ub294": 24, "hypernetwork\ub77c\ub294": 24, "\ud30c\ub77c\ubbf8\ud130\uc778": 24, "\uc774\uc5b4\uc11c": 24, "\uae30\ud0c0": [24, 31, 35], "\uc904\uc774\uba74\uc11c": [24, 27, 29, 57, 58], "\ubb34\uacb0\uc131\uc744": 24, "\uc2a4\ud0c0\uc77c\uacfc": [24, 43, 47], "\uc758\ubbf8\uc801": [24, 43], "\uc218\uc815\uc774": [8, 24, 43], "\uc785\uc99d\ud558\uc600\uc2b5\ub2c8\ub2e4": 24, "2102": [25, 48], "09672": 25, "likelihood\uc218\uce58\ub3c4": 25, "sampling\uc2dc": 25, "step\uc73c\ub85c": [25, 38, 49, 54], "scale\uacfc": [25, 41], "quailty\uc640": 25, "\uc218\uce58\uac04\uc758": 25, "quality\uc5d0": 25, "\ubaa8\ub378\uc5d0\ube44\ud574": 25, "\ub5a8\uc5b4\uc84c\ub2e4": 25, "diversity\uac00": [25, 36, 49], "cifar": [25, 42, 46, 51, 58], "\ub3d9\uc791\ud588\uc9c0\ub9cc": 25, "dataset\uc5d0\uc11c\uc758": 25, "\ub3d9\uc791\uc740": 25, "imagenet\uac19\uc740": 25, "dataset\uc5d0\uc11c\ub3c4": 25, "process\uc5d0\uc11c\uc758": 25, "\ub0b4\ub294": [25, 30, 43], "\uc5f0\uad6c\ub4e4\uc5d0\uc11c": 25, "loglikelihood": 25, "\uc218\uce58\uc640": 25, "sample\uc758": 25, "quality\uac04\uc758": 25, "\uc5f0\uad00\uc131\uc744": 25, "distribution\uc5d0": [25, 36], "\uc218\uce58\ud654\ud55c": 25, "\ub290\ub08c": 25, "\uc88b\uc544\uc9c0\uba74": 25, "quality\ub3c4": 25, "\uc99d\uac00\ud558\ub294": [25, 35, 57], "ddpm\uc5d0\uc11c\ub3c4": 25, "\uac1c\uc120\ud55c\ub2e4\uba74": 25, "\uc99d\uac00\ud560": 25, "\uc54a\uc744\uae4c": 25, "angeloyeo": 25, "mle": [25, 51], "html": [25, 47], "\uc785\ud78c": [25, 40], "\ud615\ud0dc": [8, 25, 36, 44, 57], "denoising\uc5d0": [25, 29], "noising\ud560": 25, "\uc544\ub798\uc640\uac19\uc774": 25, "\uc0ac\uc6a9\ud574\ub3c4": [25, 27, 36, 54, 57, 59], "\ubcf4\uc5ec\uc11c": 25, "\ubb38\uc7a5": 25, "\uc758\ubb38\uc810": 25, "\uc815": 25, "\ubc18\ub300\uc758": 25, "parameter\uc778\ub370": 25, "fix\ub97c": 25, "\ud558\ub294\uac8c": 25, "\ub9de\uc744\uae4c": 25, "step\uac04": 25, "step\uc774": [25, 49], "\ub450\uac1c\uc758": [25, 31, 34, 39, 57], "\ub3d9\uc77c\ud574\uc9c4\ub2e4": 25, "2\ub97c": [25, 38, 42], "\uacb0\uc815\ub418\ub294\ub370": 25, "\uacb0\uc815\ub418\ub294": 25, "\ub450\ub294\uac83\uc740": 25, "\uc124\uacc4\uc758": 25, "\ud559\uc2b5\ud558\uae30\uc5d0\ub294": 25, "\ubc94\uc704\uac00": 25, "\uc791\uc544\uc11c": 25, "predict\ud558\ub3c4\ub85d": 25, "hybrid": [25, 49], "hyprid": 25, "\u03bbl_": 25, "vlb": 25, "\uc774\ubbf8\uc9c0\uc5d0\ub300\ud574": 25, "\ub3d9\uc791\ud558\uc9c0\ub9cc": 25, "32x32": [25, 49, 58], "\uc54a\ub294\uac83\uc744": 25, "scheduling\uc5d0\uc11c": 25, "mode\uc758": 25, "limitation\uc774": 25, "\uc9c0\uc801": 25, "\uac70\ub4ed\ub0a0\uc218\ub85d": 25, "\uc0c1\ub2e8": [25, 38], "noisy\ud574\uc9d0": 25, "skip\ud574\ub3c4": 25, "\uc131\ub2a5\uc5d0": [25, 42, 54, 57], "\uc601\ud5a5\uc774": 25, "mode\ub97c": 25, "\uc758\ubbf8\uc788\ub294": [25, 31], "\ubbf8\uce58\uc9c0": 25, "equation\uc744": 25, "\uc0c8\ub85c": [25, 31, 53, 56], "\ub2e8\uacc4\uc5d0\uc11c\ub294": [25, 28, 35, 38, 39], "\uac15\ud558\uac8c": [25, 40], "\uc785\ud600\uc9c0\uc9c0\ub9cc": 25, "0\uacfc": 25, "\ubd80\uadfc\uc5d0\uc11c\ub294": 25, "\ub35c": [25, 49], "direct\ub85c": 25, "\ucd5c\uc801\ud654\ud558\ub3c4\ub85d": 25, "\uc124\uacc4\ud558\uba74": 25, "\uc774\ubbf8\uc9c0\uc640\uac19\uc774": 25, "unstable\ud574\uc11c": 25, "\ucd5c\uc801\ud654\uc5d0\ub294": 25, "\uc904\uc774\uae30\uc704\ud574": 25, "\ub9d0\uae30\ub294": 25, "\ubcc0\ud654\uc5d0": 25, "\ud655\ub960\uc801\uc73c\ub85c": [25, 41], "\ucd08\ubc18\uc758": 25, "sampling\ud574\uc11c": 25, "\ud559\uc2b5\ud558\ub3c4\ub85d": 25, "\uc801\uc6a9\ud574\ubcf8": 25, "\uc801\uc6a9\ud558\uba74": [25, 58, 60], "\uc804\ubcf4\ub2e4": 25, "\ub2e4\uc18c": [25, 48, 55], "\ucde8\uc57d\ud588\ub358": 25, "64x64\uc640": 25, "cidar": 25, "\ubaa8\ub378\uc774\ub098": 25, "\ubaa8\ub378\uc911\uc5d0\uc11c\ub294": 25, "\ube44\ud574\uc11c\ub294": 25, "\uba74\uc774": [25, 28], "speed\ub97c": 25, "step\ub9cc": 25, "\uac00\ub3c4": 25, "fid\uac12\uc744": 25, "metric\uc73c\ub85c": 25, "biggan": [25, 49, 58], "\ud0c0\uac9f\uc5d0": 25, "\uc218\uce58\ub098": 25, "metric\uc5d0\uc11c": 25, "capacity\ub97c": 25, "nll": [25, 47], "\ud559\uc2b5\ub7c9": 25, "\ube44\ub840\ud568": 25, "compat": [26, 31], "06721": 26, "tencent": [26, 59], "ailab": 26, "21": 26, "\ubb38\uc81c\uc0c1\ud669": 26, "\ud6cc\ub96d\ud558\uc9c0\ub9cc": 26, "\uc2dc\ub3c4\ud558\uac70\ub098": 26, "tuning\ud558\uac8c": 26, "\ub9ac\uc18c\uc2a4\uac00": [26, 27], "\ud544\uc694\ud560": [26, 42, 47, 54], "\ubc94\uc6a9\uc131": 26, "\ud638\ud658\uc131\ub3c4": 26, "\ud574\uacb0\ubc29\uc548": 26, "features\uc640": 26, "features\ub85c": 26, "decoupling\ud55c\ub2e4": 26, "\uc0c1\ud0dc\uc774\ubbc0\ub85c": 26, "layer\uc5d0": [26, 31, 41], "\ub123\uac8c": 26, "feature\uc640": [26, 33], "\uc218\ud589\ud558\uac8c": 26, "\uacb0\ud569\ud558\ub294": 26, "\uc801\uc808\ud558\uc9c0": [26, 32], "\uac15\uc810": 26, "\uad6c\uc870\uc5d0\ub3c4": 26, "\ud65c\uc6a9\uac00\ub2a5\ud558\ub2e4": 26, "22m": 26, "\uac00\ubccd\ub2e4": 26, "tools\uc5d0": 26, "\ud544\uc694\uc131\uacfc": 26, "\uc0dd\uc131\ud558\ub824\ub294": [26, 36], "\uc2dc\ub3c4\uc758": 26, "\uc885\ub958\uc640": 26, "\uc7a5\ub2e8\uc810\uc744": 26, "scene\uc774\ub098": 26, "\uc785\ub825\ud560\ub54c": 26, "\uc785\ub825\ud558\ub294": 26, "\uac04\ud3b8\ud558\uace0": 26, "\ud6a8\uacfc\uc801\uc774\ub2e4": 26, "worth": [26, 43], "thousand": 26, "\ub0b4\uce04\ub7f4": 26, "\ud48d\uc73c\ub85c": 26, "\uce74\ud398\ub97c": 26, "\uafb8\ubbf8\uace0": 26, "\uc2dd\ubb3c\uc744": 26, "\uc7a5\uc2dd\ud558\uace0": 26, "\uc2f6\uc5b4": 26, "\ub0b4\uac00": 26, "\uc88b\uc544\ud558\ub294": 26, "\uc2dd\ubb3c\uc740": 26, "\uc2a4\ub178\uc6b0": 26, "\uc0ac\ud30c\uc774\uc5b4": 26, "\ud638\uc57c": 26, "\uc790\ubbf8\uc624\ucfe8\uce74\uc2a4\ub4f1": 26, "\uc758\uc790\uc640": 26, "\ud14c\uc774\ube14\uc740": 26, "\uc6d0\ubaa9\uc744": 26, "\uc120\ud638\ud574": 26, "e2\ub294": 26, "\uc9c0\uc6d0\ud55c": 26, "\ubc29\uc2dd\uc774\uc5c8\ub2e4": 26, "encoder\ub85c": [26, 38], "\ubf51\uc544\ub0b4": 26, "variations\uc640": 26, "unclip\uacfc": 26, "tuning\ud558\ub824\ub294": 26, "\uc2dc\ub3c4\ub97c": 26, "\ucef4\ud4e8\ud130": [26, 29], "\uc0ac\uc6a9\uacfc": 26, "\uc800\ud558": [26, 42], "\uc7ac\uc0ac\uc6a9\uc131": 26, "\uc800\ud558\ub77c\ub294": 26, "tools\uacfc": 26, "\ud638\ud658\ub418\uc9c0": 26, "\uce58\uba85\uc801\uc774\ub2e4": 26, "\uad50\uccb4\ud558\ub294": 26, "\uc788\uc5c8\uc9c0\ub9cc": 26, "\uc9c0\uc6d0\ud560": 26, "\uc5c6\uac8c": [26, 47], "\ucd5c\uadfc\uc5d0\ub294": [26, 42, 56], "\uac74\ub4dc\ub9ac\uc9c0": 26, "\uc9c0\uc6d0\ud558\ub294": 26, "segmen": 26, "\ud65c\uc6a9\ud588\ub2e4": [26, 44], "adapter\ub098": 26, "uni": 26, "\uac19\uc774refer": 26, "\uc785\ub825\ud574": 26, "\uc774\ub098": [26, 27, 30, 32, 36, 54, 56, 58], "\uc804\ub2ec\ud558\ub824\ub294": 26, "\uc2dc\ub3c4\ub3c4": 26, "\ud750\ub984\uc758": 26, "encoder\uc5d0\uc11c": [26, 39, 48], "network\uc5d0": 26, "feature\ub4e4\uc744": 26, "mapping\ud558\uc5ec": 26, "\uc735\ud569\ud558\uace0\uc790": 26, "feature\ub300\uc2e0": 26, "\uad6c\uc870\uc5d0": [26, 47], "\ub123\uc740": [26, 47], "faith": [26, 31, 37], "\ucda9\uc2e4\ub3c4\uac00": 26, "tuning\ub41c": [26, 42], "\ub098\ube74\ub2e4": 26, "addit": [26, 32, 45, 49], "\ubb38\uc81c\uc810\uc758": 26, "\uc6d0\uc778\uc744": [26, 61], "model\ub0b4\uc758": 26, "attention\uc774\ub77c\uace0": 26, "attention\uc5d0\uc11c": 26, "weights\uc740": 26, "\uc787\ub294": 26, "\uc0c1\ud0dc\uc774\ub2e4": 26, "\ud569\uccd0\uc9c0\ub294\ub370": 26, "\ubb34\uc2dc\ub418\uc5b4": 26, "grain": [26, 32, 33, 40], "\uadf9\ubcf5\ud55c": 26, "decoupl": 26, "\ubd84\ub9ac\ud55c\ub2e4": 26, "model\ub0b4": 26, "\ud6c8\ub828\ub2e8\uacc4\uc5d0\uc11c\ub294": 26, "\ud6c8\ub828\ud55c\ub2e4": 26, "\uac00\ubccd\uace0": 26, "prompt\uc640\ub3c4": 26, "\uc5b4\uc6b8\ub9b0\ub2e4": 26, "adapter\uc5d0\uc11c": 26, "reusabl": 26, "controlnet\uac00\ub2a5": 26, "\ubd80\ub958\ub85c": 26, "cogview": 26, "scene\uacfc": 26, "\uc774\uc5c8\ub2e4": 26, "vq": [26, 29, 58], "image\ub4e4\uc744": 26, "token\ud654": 26, "transformer\uc5d0": [26, 33], "\uc608\uce21\ud558\uac8c": [26, 39], "\ub9ce\uace0": [26, 38, 42], "\ud544\uc694\ud588\ub2e4": [26, 35], "dm": [26, 29, 44, 49], "\ub4f1\uc7a5\ud558\uc5ec": 26, "art\ub97c": 26, "glide\ub294": 26, "\uc0dd\uc131\ud588\ub2e4": [26, 44, 54], "e2\uc758": 26, "\uc774\ud574\ub3c4\ub97c": 26, "imagen\uc740": 26, "model\uc778": [26, 28, 43], "t5\ub97c": 26, "\ub3c4\uc785\ud588\ub2e4": 26, "imagen\uc758": [26, 31, 42], "\ub4dc\ubb3c\uac70\ub098": 26, "\ud559\uc2b5\ud55c\uc801\uc5c6\ub294": 26, "entity\uc5d0": 26, "\ucda9\uc131\ub3c4\ub97c": 26, "\uac1c\uc120\ud588\ub2e4": 26, "sd\ub294": [26, 38], "\ub3d9\uc791\ud558\uac8c": 26, "\uc77c\uce58\ub3c4": 26, "i\uc758": 26, "\ub514\uc790\uc778\uc744": 26, "\ucc44\ud0dd\ud558\uc5ec": [26, 49], "embedding\ub4f1": 26, "versatil": 26, "framework\ub97c": 26, "i2t": 26, "\uc0dd\uc131\ubc29\uc2dd\uc744": 26, "\uba74\uc5d0\uc11c\ub294": 26, "composer\uac00": 26, "\uc2dc\ub3c4\ud588\uc5c8\ub2e4": 26, "raphael\uc740": 26, "mixtur": 26, "moe": 26, "\ud48d\uc758": 26, "\uc810\uc5d0\uc11c": [26, 35], "\ub9e4\ub825\uc801\uc774\ub2e4": 26, "\uc9c0\uc6d0\ud558\uace0\uc790": 26, "variant": 26, "\ubcc0\uacbd\ud55c": 26, "embedding\uc73c\ub85c": [26, 43], "\uad50\uccb4\ud560": 26, "\ucd94\uac00\ud588\ub2e4": [26, 38, 39, 44], "tuning\ud558\ub294": [26, 27], "\uc7a5\uc810\uc774": [26, 30, 51, 55, 59], "\ub192\uc73c\uba70": 26, "tool": 26, "\uc54a\ub294\ub2e4\ub294": 26, "\ube44\ud6a8\uc728\uc801\uc774\ub2e4": 26, "\ub5a0\uc624\ub974\ub294": 26, "freeze\uc2dc\ucf1c": 26, "nlp\uc5d0\uc11c": 26, "\uc624\ub7ab\ub3d9\uc548": 26, "\uc0ac\uc6a9\ub418\ub358": 26, "llm\uc758": [26, 28], "\uc778\uae30\ub85c": 26, "adapter\ub4e4\ub3c4": 26, "control\uc744": [26, 28], "specific\ud55c": 26, "\ub123\uae30\uc704\ud574": 26, "\uc2dc\uae30\uc5d0": 26, "\ub4f1\uc7a5\ud588\ub294\ub370": 26, "\uac04\ub2e8\ud558\uace0": [8, 26, 43], "\uc0c9\uc774\ub098": 26, "\uba74\uc5d0\uc11c": 26, "\uc8fc\uace0\uc790": 26, "tuning\uc5d0": 26, "\uc904\uc774\uae30": [26, 44, 49], "injection\uc744": 26, "control\uc678\uc5d0": 26, "content\ub098": 26, "\uc870\uc808\ud558\uace0\uc790": 26, "\uc5f0\uad6c\ub3c4": 26, "shuffle\uc758": 26, "recompose\ud558\ub3c4\ub85d": 26, "only\uc758": 26, "sd\uc5d0": [26, 28], "\ubcc0\ud615\ud588\ub2e4": 26, "\ubc84\uc804\uc758": 26, "\ub354\ud574\uc90c\uc73c\ub85c\uc11c": 26, "adapter\ub85c\uc11c\uc758": 26, "\uc5ed\ud560\ub3c4": 26, "projection\ud558": 26, "seecod": 26, "variants\ub97c": 26, "\uc791\ub3d9": 26, "preprocessor": 26, "contentshuffledetector": 26, "pixelunshuffl": [26, 59], "\uce35\uc758": 26, "\ub354\ud574\uc9d0": 26, "\uc77c\uc885\uc778": 26, "\uc0dd\uc131\ub2e8\uacc4": 26, "chain\uc744": 26, "\ud1b5\ud574\ub370\uc774\ud130\uc5d0": 26, "noise\ub85c": 26, "\ud45c\ud604\ub41c\ub2e4": [26, 29, 38], "\ucd94\uac00\uc870\uac74": 26, "step\uc744": [26, 27, 29, 33, 38, 43, 54], "\ub9d0\ud558\uba70": 26, "\ub0b4\uc5d0": [8, 26, 35], "\uc18d\ud55c\ub2e4": [26, 35], "\ub9d0\ud558\uace0": 26, "diffusino": 26, "predefin": 26, "function\uc774\ub2e4": [26, 36], "\ud559\uc2b5\ub418\uace0": 26, "\ub178\uc774\uc988\ub85c\ubd80\ud130": 26, "dpm": [26, 27, 36, 54], "solver\uc640": 26, "fast": [8, 26, 27], "sampler\ub97c": [26, 29], "\ubc38\ub7f0\uc2f1\ud560": 26, "\ud65c\uc6a9\ud558\ub294\ub370": [26, 32], "\ubc88\uac70\ub85c\uc6c0\uc744": 26, "\uc9c0\uc6b0\uae30": 26, "\uc0ac\uc6a9\ud558\uae30\ub3c4": 26, "\uc811\uadfc\uc5d0\uc11c": 26, "models\ub294": [26, 54], "\ubc30\uc81c\ud558\uc5ec": 26, "sampling\ub2e8\uacc4": 26, "weight\ub85c": [26, 37], "\ubd88\ub9ac\ub294\ub370": 26, "\uc870\uc808\ud558\uae30": 26, "\uc0c1\uc218\uac12\uc774\ub2e4": 26, "\uc77c\uce58\uc131\uc744": 26, "\ub192\uc774\ub294\ub370": 26, "layer\uac00": 26, "\ud615\ud0dc\uc774\ub2e4": 26, "\ube44\uad50\ud574": [26, 54], "\ub3d9\uc791\ud558\ubbc0\ub85c": 26, "\ud6a8\uc728\uc801\uc774\ub2e4": [26, 27], "\ub3d9\uc791\ubc29\uc2dd": 26, "pretain": 26, "\ubf51\uc544\ub0c8\ub2e4": 26, "learning\uc2dc\ud0a8": 26, "\ud48d\ubd80\ud55c": [26, 31, 33], "\uc870\uc815\ub418\uc5b4": 26, "\ud559\uc2b5\ub2e8\uacc4\uc5d0\uc11c": 26, "frozen\ub418\uc5b4": 26, "\ud559\uc2b5\ub418\uc9c0": 26, "\uacb0\ud569\ub41c\ub2e4": [26, 44], "\ub123\uc5c8\ub2e4": [26, 38], "zw_q": 26, "w_k": [26, 30, 59], "w_v": [26, 30, 59], "\ud589\ub82c\uc774\ub2e4": 26, "w_q": [26, 30, 59], "layers\uc758": 26, "\ud559\uc2b5\uac00\ub2a5\ud55c": 26, "weigth": 26, "matrices\ub2e4": 26, "attention\uc2dc": 26, "feature\ub85c": 26, "\ubc1c\uacac\ud558\uc5ec": 26, "\uad6c\uccb4\uc801\uc73c\ub85c\ub294": [26, 39], "\uacf3\uc5d0": 26, "\ucc98\ub9ac\ud558\ub3c4\ub85d": 26, "\uc8fc\uc5b4\uc9c8\ub54c": [26, 36], "_k": [26, 29], "qeury\ub97c": 26, "\uc0ac\uc6a9\ud588\ub2e4\ub294": [26, 38, 49], "\ucd94\uac00\ud558\uac8c": 26, "\uc218\ub834\uc18d\ub3c4\ub97c": [26, 54], "\ucd08\uae30\ud654\ud588\ub2e4": 26, "\ub354\ud568\uc73c\ub85c\uc368": 26, "\ucd5c\uc885\uc801\uc778": 26, "zw": 26, "freeze\uc2dc\ud0a4\uace0": 26, "\uc9c4\ud589\ud558\ubbc0\ub85c": 26, "adapter\ub9cc": 26, "\ucd5c\uc801\ud654\ud558\uace0": [26, 36], "\uace0\uc815\ud55c\ub2e4": 26, "dataset\uc73c\ub85c": [26, 54], "sd\uc640": [26, 38], "random\ud558\uac8c": 26, "drop\ud558\uc5ec": 26, "drop\ub418\uba74": 26, "embedding\uc740": 26, "\ucc98\ub9ac\ud588\ub2e4": 26, "detach\ub418\uba70": 26, "\uac00\uc911\uce58\ub3c4": 26, "\ud56d\ubaa9": 26, "0001": 26, "01": [26, 30], "librari": 26, "deepspe": 26, "sero": 26, "1m": [26, 33], "50step": 26, "\uc2e4\ud5d8\uacb0\uacfc": [20, 26, 36], "reference\uc640": 26, "\uc9c0\ub098\uce58\uac8c": [26, 29], "\uc720\uc0ac\ud558\ub2e4\ub294": 26, "\ub4e4\uc5c8\uc2b5\ub2c8\ub2e4": [26, 42], "\uba87\uba87\uc740": 26, "\uc88c\uc6b0\ubc18\uc804\uc744": 26, "\ud55c\uac83\ucc98\ub7fc": 26, "\ub290\uaef4\uc84c\uc2b5\ub2c8\ub2e4": 26, "\ud754\ud788": [26, 47], "collapse\uc640": 26, "\uc544\ub2cc\uac00": 26, "\ub0ae\uc544\ubcf4\uc774\ub294": 26, "\uc758\uc544\ud588\uc73c\ub098": 26, "conclusion\uc5d0\uc11c": 26, "generaliz": [26, 59], "embedding\ub97c": 26, "\uc783\uc5b4\ubc84\ub9b4": 26, "\ub514\uc790\uc778\ud588\ub2e4": 26, "\uccab\ubc88\uc9f8\ub85c": [26, 46, 56, 60, 61], "penultim": 26, "\ubf51\uc544\ub0b8\ub2e4": [26, 49], "\ubf51\uc544\ub0b4\uae30": [26, 32], "token\ub4e4\uc744": 26, "finer": 26, "information\uc744": [26, 31], "\ucd08\ub798\ud560": 26, "\ud65c\uc6a9\ud558\uba74": [26, 39], "pose\ub97c": 26, "\ub514\uc790\uc778\uc740": 26, "attention\uc73c\ub85c": 26, "\uace0\uc791": 26, "\ube44\ub4f1\ud558\uac70\ub098": 26, "\ud655\uc7a5\uc131\uc774": 26, "\uc88b\uc544": 26, "\ub354\uc560": 26, "\uac00\ub2a5\ucf00\ud55c\ub2e4\ub294": 26, "style\uc774": [26, 41], "inversion\uc774\ub098": 26, "\uc0dd\uc131\ud558\uc9c0\ub294": 26, "\ubbf8\ub798\uc5d0": 26, "\ud5a5\uc0c1\uc2dc\ud0a8": [26, 38], "\uac1c\ubc1c\ud558\ub294": 26, "\ubaa9\ud45c\ub2e4": 26, "2403": [27, 56], "12036": [27, 56], "donghyun": [27, 54, 62], "han": [27, 54, 62], "\uc801\uc6a9\ud558\uc600\ub2e4": 27, "\uadf9\ub300\ud654": 27, "pf": [27, 46], "solver\ub97c": [27, 54], "\ubc29\ubc95\ub860\ub4e4\ubcf4\ub2e4": 27, "\uc0ac\uc6a9\ud558\uae30\uc5d0\ub294": 27, "\ubb34\ub9ac\uac00": 27, "ldms\uc744": 27, "\uac00\uc18d\ud654": [27, 31, 54], "\uae30\ubc95\ub4e4\uc774": 27, "\uc81c\uc548\ub418\uc5b4": 27, "\uc654\ub294\ub370": 27, "2\uac00\uc9c0\ub85c": 27, "solver": [27, 46, 47, 56, 57, 58], "ldm\uc744": [27, 38, 43, 54], "\uc801\uc73c\ub85c": 27, "overhead\uac00": [27, 30], "computation\uc801\uc73c\ub85c": 27, "stage\uc758": [27, 54], "cm": [27, 54], "\ub300\uc548\uc774\ub2e4": 27, "backward": [27, 47, 50, 51], "\uc811\uadfc\ud558\uc5ec": 27, "\ud68d\uae30\uc801\uc73c\ub85c": 27, "lcms\uc740": [27, 54], "step\ub9cc\uc73c\ub85c\ub3c4": [27, 54], "\uc0dd\uc131\ud574\ub0bc": [27, 40], "\ud544\uc694\ud558\uc9c0": [27, 28, 31, 37, 46, 54], "lcms\uc744": 27, "\ud544\uc694\ud558\uac70\ub098": 27, "\uc5f0\uad6c\ub294": [27, 39, 47], "\ub4f1\uc5d0": [27, 42], "solver\uc774\uba70": [27, 54], "cms\uc740": 27, "\uc904\uc774\uba74\uc11c\ub3c4": 27, "\ubc29\ubc95\ub860\uc774\ub2e4": 27, "cms\uc758": 27, "ode\uc758": [27, 54], "\uada4\uc801\uc758": 27, "points\uac00": 27, "solution\uc5d0": [27, 54], "mapsto": [27, 46, 54], "\ucd94\uc815\ud558\ub294": [27, 51], "\ub9d0\ud574": [27, 46, 47, 58], "\ub358\uc9c0": 27, "\uc815\ud655\ud788\ub294": 27, "\ucd94\uc815\ud55c\ub2e4": 27, "timestep\uc5d0": 27, "\uad00\ud55cfunction\uc758": 27, "\uacb0\uacfc\uac12\uc740": 27, "\ub9cc\uc871\ud574\uc57c": [27, 46], "foral": [27, 28, 54], "\ub9e4\uc6b0\uc791\uc740": 27, "\uc591\uc218": [27, 46], "\uac12\uc774\ub2e4": [27, 54], "\uc774\uae30": [27, 32, 54], "\ub9cc\uc871\ud55c\ub2e4": [27, 54], "\uc218\uc2dd\uc740": [27, 41, 58], "\uc99d\uba85\ud558\uae30": 27, "\uc218\uc2dd\uc774\ub2e4": 27, "\uc2ec\uce35\uc2e0\uacbd\ub9dd\uc744": 27, "\ub098\ub258\ub294\ub370": 27, "\ubcf4\ud3b8\uc801\uc73c\ub85c": 27, "distillation\uc774": 27, "\uc9c0\uc218\ud3c9\uade0\uc774\ub3d9": [27, 54], "t_n": [27, 34, 46, 54], "ema\ub97c": [27, 54], "\uc9c0\ud45c\uc774\ub2e4": [27, 54], "\ucd94\uc815\ud55c": [27, 54], "leftarrow": [27, 28, 54], "numer": [27, 39, 46], "ode\ub97c": [27, 54], "ode\ub85c": 27, "\uc608\uce21\uac12\uc774": 27, "\uac19\ub3c4\ub85d": 27, "\ud575\uc2ec\uc774\ub2e4": 27, "cms\uc5d0": 27, "\ucd94\uac00\ud574\uc8fc\uace0": 27, "\uce58\ud658\ud55c\ub2e4": 27, "\uae30\ubc18\uc774\uae30": 27, "\ubcc0\uacbd\ud574\uc900\ub2e4": 27, "cd": [27, 46, 54], "psi_": [27, 54], "timestep\uc774\uc9c0\ub9cc": 27, "\ud558\uc704": [27, 43, 54], "\uac04\uaca9\uc774\ub2e4": 27, "t_i": [27, 34, 39, 46, 54], "lcms\ub294": [27, 54], "cms\uc640": 27, "distillation\ub3c4": 27, "\uc815\uc758\ub418\uc5b4\uc788\ub2e4": 27, "\uc218\ub834\uc774": [27, 60], "\ub2a6\uc5b4\uc9c0\uac8c": 27, "\uc81c\uc2dc\ud588\ub2e4": 27, "k\ub294": 27, "off\ub97c": 27, "20\uc73c\ub85c": 27, "distillation\ud560": 27, "ldms\ub97c": 27, "lcms\uc758": 27, "distillation\ub9cc": 27, "\ub0b4\uc6a9\uc740": [27, 42], "peft": [27, 30], "\uc774\ub780": [27, 52, 61], "knowledg": [27, 45, 46, 56, 59], "quantiz": [27, 29, 33, 39, 48], "perf": 27, "rola\ub97c": 27, "phi_0": [27, 30], "\ub300\ud558\uc5ec": [8, 27, 35, 48], "\uac00\uc911\uce58\uac00": 27, "\uc5c5\ub370\uc774\ud2b8": [27, 36, 57], "underset": [27, 48], "y_t": [27, 30], "llm\uc774\ub098": 27, "task\ub85c": 27, "\uc790\uc6d0\uc801": 27, "weight\uc758": 27, "\ucc28\uc6d0\uc740": [27, 35], "\uae30\ub85d\ud558\ub294": 27, "rank\ub97c": 27, "r\ub85c": 27, "\ub0ae\ucd94\uc5b4": 27, "finetuning\ud55c\ub2e4": 27, "a\ub294": 27, "gauissian\uc73c\ub85c": 27, "b\ub294": 27, "zero\ub85c": 27, "d\uc758": [27, 35], "r\ub9cc\ud07c": 27, "matrix\ub9cc": 27, "\ud6a8\uc728\uc801\uc774\uace0": 27, "\ubc29\ubc95\ub4e4\ubcf4\ub2e4": [27, 45], "\ubcf4\uc5ec\uc8fc\uae30\ub3c4": 27, "\uc774\uacf3": 27, "\ucc38\uace0\ud558\uc600\uc2b5\ub2c8\ub2e4": 27, "llm\uc744": [27, 28], "target\uc73c\ub85c": [27, 43], "\ub9cc\ub4e4\uc5b4\uc84c\uae30": 27, "value\uc5d0": [27, 29], "diffusion\uc774\ub098": 27, "finetuning\uc2dc\uc5d0\ub3c4": 27, "\uac04\ub2e8\ud558\uac8c": [27, 42, 47, 54], "arithmetic\uc740": 27, "task\uc5d0\uc11c": [27, 42, 54], "vector\ub77c": 27, "\uc870\ud569\ud558\uc5ec": 27, "\uc815\uc758\ud560": [27, 55, 58], "\uc870\ud569\ud558\uace0": 27, "analogy\ub97c": 27, "distillation\uc5d0": 27, "\uc800\uc790\ub294": [27, 30, 43, 54, 61], "distillation\uc740": [27, 54], "ldms\uc5d0": 27, "\uc77c\uc885\uc758": [27, 47], "tuning\uc73c\ub85c": [27, 48], "\uc5c5\ub370\uc774\ud2b8\ub294": 27, "ba": [27, 30], "\uace0\uc815\ub418\uba70": 27, "pass\ub294": 27, "w_0x": [27, 30], "bax": [27, 30], "\uc704\uc640\uac19\uc774": 27, "lcms\uc5d0": [27, 54], "\ud6c8\ub828\uacfc": 27, "\uc2e4\uc0ac\uc6a9\uc774": 27, "5\ub098": 27, "\uc0ac\uc6a9\ud588\uc9c0\ub9cc": [27, 49], "sdxl\uacfc": [27, 38], "ssd": [27, 53], "1b": [27, 35], "segmind": 27, "model\uc5d0\uc11c\ub3c4": 27, "lcd\uc744": 27, "\uc801\uc751\ud558\ub294": 27, "custion": 27, "datasets\uc5d0": 27, "\uc4f0\uc774\ub294\ub370": 27, "\uc774\uac19\uc740": [27, 54], "style\uc5d0": [27, 41], "lora\uac00": 27, "\ud569\uccd0\uc838": 27, "\ubc1c\uacac\uc774": 27, "arithmetic\uc5d0": 27, "\uad00\uc810\uc73c\ub85c": [27, 42], "\ud574\uc11d\ud560": [27, 42, 58], "\uc8fc\uc7a5\ud558\uc600\ub2e4": 27, "\uc774\ub77c": [27, 30, 58], "lcms\ub97c": 27, "\uc870\ud569\ub41c\ub2e4": 27, "lambda_1": 27, "lambda_2": 27, "\uc774\ub8e8\uc5b4\uc9c0\uba70": 27, "\ud558\uc774\ud37c\ud30c\ub77c\ubbf8\ud130\ub2e4": 27, "module\uc778": 27, "\uc608\uce21\ud558\uba70": 27, "lora\uc5d0": 27, "\uc81c\uacf5\ud568": [27, 31, 43], "enhanc": [28, 47], "2305": [28, 36, 39], "13655": 28, "tonylianlong": 28, "groundeddiffus": 28, "\uc774\ub8e8\uc5c8\ub2e4": [28, 40], "numeracy\uc640": 28, "reasoning\uc744": 28, "\ubb38\uc81c\ub4e4\uc774": 28, "\ub808\uc774\uc544\uc6c3": 28, "\ud5a5\uc0c1\uc2dc\ud0a4\ub294": [28, 54], "\ub4f1\uc7a5\uacfc": 28, "\ubc1c\uc804\uc5d0": 28, "\ubc1c\uc804\ub418\uc5b4\uc654\ub2e4": 28, "sdxl": [28, 61], "\ucde8\uc57d\ud55c": [28, 53], "figure\ub97c": 28, "negat": [28, 40], "numeraci": 28, "bind": 28, "relationships\uc5d0\uc11c": 28, "\ud504\ub86c\ud504\ud2b8\uac00": [28, 43], "\ub300\uaddc\ubaa8\uc758": 28, "\uc88b\uc9c0\ubabb\ud55c": 28, "\ub300\uaddc\ubaa8\ub85c": 28, "\ud655\ubcf4\ud558\ub294": 28, "\uc870\ucc28": 28, "\uc77c\uc774\ub2e4": [28, 36, 43], "\ud53c\ud558\uba74\uc11c": 28, "figure\uc758": 28, "\uc811\uadfc\ud55c\ub2e4": 28, "stage1": 28, "\ud45c\ud604\ub418\uc5b4\uc57c": 28, "layout\uc744": 28, "object\ub4e4\uc744": 28, "attribute\uacfc": 28, "parsing\uc744": 28, "\uc62c\ubc14\ub978": 28, "box": [28, 32], "coordinate\uc744": 28, "\ubaa9\ud45c\uc778": 28, "generator\ub85c\uc368": 28, "stage2": 28, "stage1\uc73c\ub85c": 28, "box\uc5d0": 28, "caption\uc774": 28, "inference\uacfc\uc815\uc5d0\uc11c": 28, "generation\uc744": [28, 43], "\uac00\ub2a5\ucf00\ud55c\ub2e4": 28, "introduc": 28, "abil": [28, 32, 45, 51, 59], "steer": 28, "shelf": 28, "instanc": [28, 41, 47, 50], "lmd": 28, "instruct": 28, "allow": [28, 34, 52], "broader": 28, "assess": 28, "demonstr": 28, "superior": 28, "recent": 28, "stage1\uacfc": 28, "\ub2e8\uacc4\ub85c\uc368": 28, "box\uc758": 28, "\ub2e8\uacc4\uc5d0\uc11c\uc758": 28, "\uc717\ub2e8\uc778": 28, "instruction\uc744": 28, "llm\uc5d0\uac8c": 28, "\ub9d0\ub4e4\ub85c": 28, "specification\uacfc": 28, "details\ub85c": 28, "\ud574\uc57c\ud560": 28, "\uc77c\ub4e4\uacfc": 28, "prompt\ub77c\uace0": [28, 36], "examples\uc5d0": 28, "output\uac12\uc744": 28, "few": [28, 42, 48, 54], "example\uc744": 28, "llm\uc73c\ub85c": 28, "\uc774\ub04c\uc5b4\ub0b4\ub294": 28, "\ucde8\ud55c": 28, "5\uac19\uc740": 28, "shot\uc744": [28, 48], "gpt4": 28, "\uacbd\uc6b0\ub294": [28, 42, 47], "shot\ub9cc\uc73c\ub85c\ub3c4": 28, "\ub098\uc640\uc788\ub2e4": 28, "caption\uac12\uacfc": 28, "coordinate\uac12\uc73c\ub85c": 28, "llm\uc774": 28, "\ub808\uc774\uc544\uc6c3\uc744": 28, "\ucee8\ud2b8\ub864\ub7ec\ub97c": 28, "\ub3c4\uc785\ud55c\ub2e4": 28, "\ud0dd\ud588\ub358": 28, "work\ub4e4\uc758": 28, "region": [28, 32, 34, 61], "inference\uacfc\uc815\uc5d0\uc11c\uc758": 28, "\uc801\uc6a9\ud558\uc9c0\ub9cc": 28, "\uc81c\uc5b4\ud558\ub294": 28, "\uc778\uc2a4\ud134\uc2a4\ub4e4\uac04\uc758": 28, "\uad6c\ubcc4\uc774": 28, "space\ub098": 28, "map\uc5d0\uc11c": 28, "\ubc1c\uc0dd\ud558\uba70": 28, "\uc778\uc2a4\ud134\uc2a4": 28, "\ud798\ub4e4\uac8c": 28, "\uc694\uc778\uc774\ub2e4": 28, "\uacbd\uacc4": [28, 35], "\uc0c1\uc790\uc5d0": 28, "\ub9c8\uc2a4\ud0b9\ub41c": [28, 33, 44], "\uc548\ub0b4\ud568\uc73c\ub85c\uc368": 28, "\uac00\ub2a5\ud1a0\ub85d": 28, "\uc774\ub97c\ud1b5\ud574": 28, "\uc778\uc2a4\ud134\uc2a4\uc758": 28, "\ubc30\uce58\uc640": 28, "\ud5c8\uc6a9\ud55c\ub2e4": 28, "stage2\ub294": 28, "step1": 28, "2\ub85c": 28, "\ub098\ub220\uc9c4\ub2e4": 28, "step1\uc758": 28, "stage1\uc5d0\uc11c": 28, "box\ub4e4\uc5d0": 28, "box\ub0b4\uc758": 28, "object\ub4e4\uc5d0": 28, "\uacfc\uc815\uc774\ub2e4": [28, 37], "compose\ud558\uc5ec": 28, "grai": 28, "cat\uc5d0": 28, "box\uac00": 28, "stage1\uc744": 28, "\uad6c\ud574\uc84c\ub2e4\uba74": 28, "box\uc548\uc5d0": 28, "cat\uc774": 28, "\uadf8\ub824\uc9c0\ub3c4\ub85d": 28, "\uc720\ub3c4\ub97c": 28, "\uadf8\ub7ec\ub294": 28, "\uc800": 28, "object\uac00": 28, "gt": [28, 32, 39], "\uac8c": [8, 28], "\uacfc\uc815\uc774\ub77c\uace0": 28, "\uc0dd\uac01\ud558\uba74": [28, 33], "lmd\uc758": 28, "step1\uc5d0\uc11c": 28, "\uc9c4\ud589\ub41c\ub2e4\uace0": 28, "\ubcf4\uc790\uba74": [28, 42], "\uac70\uce58\uba74\uc11c": 28, "box\uc548\uc5d0\uc11c": 28, "\ud558\ub294\uac83\uc774": 28, "\uc120\ud589\uc774": 28, "\ub418\uc5b4\uc57c": [28, 39], "\uc720\ub3c4\ud558\uae30": 28, "box\ub0b4\ubd80\uc5d0": 28, "q_u": 28, "tk_v": 28, "pixel\uac12\ub4e4\uacfc": 28, "prompt\ub0b4\uc5d0\uc11c\uc758": 28, "u\ub294": 28, "pixel\ub4e4\uc744": 28, "\uc758\ubbf8\ud558\uace0": [28, 39, 42, 55], "v\ub294": 28, "\uc608\uc2dc\ub97c": 28, "\uc815\ub9ac\ud558\uc790\uba74": [28, 54, 55, 57, 60, 61], "orang": 28, "grass": 28, "\uc600\ub2e4\uace0": 28, "indoor": [28, 53], "\uad6c\uc131\ud55c\ub2e4": [28, 35, 54], "box\ub97c": 28, "align\ud558\uae30": 28, "\ubcf4\uc774\ub4ef\uc774": [28, 46, 56], "pixel\uc758": 28, "token\uacfc\uc758": 28, "attention\uc740": 28, "\uc99d\uac00\ub418\uace0": 28, "\uac10\uc18c\ub418\uba74": 28, "token\uacfc\ub294": 28, "box\ub0b4\ubd80": 28, "pixel\ub4e4\uc774": 28, "\uc99d\uac00\ud558\uace0": [28, 46], "\uac10\uc18c\ud558\ub3c4\ub85d": 28, "\uc720\ub3c4\ub418\uba74": 28, "\uc720\ub3c4": [28, 40, 49], "energi": [28, 51], "a_i": 28, "topk": 28, "_u": 28, "omega": [28, 36], "i\uc5d0": 28, "v\uac00": 28, "\uc815\ud574\uc838": 28, "pixel\uc774": 28, "v\uc640\uc758": 28, "attention\uac12\uc774": 28, "\uc678\ubd80": [28, 42], "function\uac12\uc774": 28, "\ucd5c\uc18c\uac00": [28, 39], "\ub428\uc744": 28, "\uc791\uc544\uc9c0\uba74": [28, 54], "pixel\ub4e4\uc758": 28, "token\uc5d0": 28, "box\uc640": 28, "object\uac04\uc758": 28, "align\uc774": 28, "\uc774\ub8e8\uc5b4\uc9c0\uac8c": 28, "nabla": [28, 46, 54], "v_i": [28, 39, 55], "\ub418\ub3c4\ub85d": [28, 29, 39, 40, 47], "optimize\uacfc\uc815\uc744": 28, "\uac70\uce5c\ub2e4": 28, "\ub2e8\uc21c\ud558\ub2e4": 28, "descent\ub97c": 28, "\uc0dd\uac01\ud574\ubcf4\uba74": [28, 29, 41, 57], "optimize\ud558\uba74\uc11c": 28, "\ucd5c\uc18c\ub85c": 28, "\ubaa9\ud45c\uc774\uae30\uc5d0": 28, "function\uacfc": [28, 37], "function\uc758": [28, 39, 49, 54], "\uc2dd\ub4e4\uc740": 28, "\uac00\ub2a5\ud558\uae30\uc5d0": 28, "z_t\ub97c": 28, "optimize\ud558\uc5ec": 28, "\ucd5c\uc18c\ud654\uac00": 28, "\ucd5c\uc18c\ud654\ub294": 28, "timestep\ub9c8\ub2e4": 28, "5\ud68c": 28, "\ubc18\ubcf5\ub418\uba70": 28, "\ubc88": [28, 33, 35, 41, 45, 46], "\uc9c4\ud589\ub420": 28, "\ub54c\ub9c8\ub2e4": 28, "\uc120\ud615\uc801\uc73c\ub85c": [28, 33, 54], "\ud69f\uc218\uac00": 28, "1\ud68c\ub85c": 28, "\uc904\uc5b4\ub4e0\ub2e4": 28, "30step": 28, "\ud6c4\uc5d0\ub294": [28, 42], "\uc218\ud589\ud558\uc9c0": 28, "\uc9c4\ud589\ub418\uba74\uc11c": 28, "denoising\uc774": 28, "\ub05d\ub098\uace0": 28, "sam\uc744": 28, "segment\ub97c": 28, "\uc9c4\ud589\ud558\uac70\ub098": 28, "threshold\uac12\uc744": 28, "\uad6c\ud558\uac8c": [28, 51], "\uc774\ud6c4\uc5d0": [28, 32, 33], "map\uc5d0": 28, "mask\uc640": 28, "wise\uacf1\uc744": 28, "\ud574\uc8fc\uc5b4\uc11c": 28, "step2": 28, "step2\uc5d0\uc11c\ub294": 28, "\ucc98\uc74c\uc5d0": [28, 56], "work\uc5d0": 28, "\uc758\ud558\uba74": [28, 42], "denoising\uc758": 28, "\ub2e8\uacc4\uc5d0\uc11c\ubd80\ud130": 28, "detail\ud55c": [28, 29, 40], "compose\ub97c": 28, "compose\ud55c\ub2e4\uc74c": 28, "compose\ud558\uba74\uc11c": 28, "step1\uacfc": 28, "optimize\ub97c": 28, "\uc9c4\ud589\ud558\uba70": 28, "object\ub4e4\uc774": 28, "\uc704\uce58\uc5d0": [28, 34, 48], "\uc774\ud6c4\ubd80\ud130\ub294": 28, "compose\ub098": 28, "compose\uad00\ub828": 28, "step2\uc5d0\uc11c\uc758": 28, "comp": 28, "latentcompos": 28, "\uc9c4\ud589\ub41c": [28, 33], "decoder\ub97c": [28, 29, 39], "\ubcc0\ud658\ub418\uc5b4": 28, "\uc0dd\uc131\ub41c\ub2e4": 28, "qualitive\ud55c": 28, "quantitive\ud55c": 28, "\ub098\ub204\uc5b4\uc11c": 28, "introduction\uc5d0\uc11c": 28, "4\uac00\uc9c0\ub97c": 28, "\ud574\uacb0\ud558\uace0\uc790": [28, 32, 36, 52], "work\uc758": 28, "\ubaa9\ud45c\uc774\uc5c8\uae30\uc5d0": 28, "relationships\ub97c": 28, "100\uac1c\uc529\uc758": 28, "\uc218\uc9d1\ud574\uc11c": 28, "\uc9c4\ud589\uc744": 28, "\ud558\uc600\ub2e4\uace0": [28, 52], "\uc874\uc7ac\ud558\uc9c4": 28, "owl": 28, "detector\ub97c": 28, "box\uac12\uc744": 28, "\uc0dd\uc131\ub418\uc5b4\uc788\ub294\uc9c0\ub97c": 28, "\ud655\uc778\ud558\uc600\ub2e4\uace0": 28, "study\uc5d0": 28, "\uacb0\uacfc\ub4e4\ub3c4": 28, "\ud3ec\ud568\ub418\uc5b4": [28, 33], "qualitit": 28, "results1": 28, "results2": 28, "lmd\uac00": 28, "\uc0dd\uc131\ud558\ub294\uc9c0\ub97c": 28, "\ubcf4\uc5ec\uc8fc\ub294": [28, 47, 48, 50, 56, 58, 59], "sdxl\ubaa8\ub378\uc740": 28, "numarci": 28, "matching\ub4f1\uc744": 28, "\ub9cc\uc871\uc2dc\ud0a4\uc9c0": 28, "lmd\ub294": 28, "result\ub294": 28, "\uc81c\uc2dc\ud558\ub294": [28, 56, 60], "gligen\uc758": 28, "\uac00\uc838\uc640\uc11c": [28, 32], "\uc138\uac00\uc9c0\ub97c": 28, "\uc6d4\ub4f1\ud55c": 28, "\uc131\ub2a5\uc784\uc744": 28, "sdv1": 28, "sdv2": 28, "1\uc744": [28, 35, 38], "\uc368\ubcf8": 28, "\ub458\uc758": 28, "\uc5c6\uc5c8\ub2e4\uace0": [28, 47], "\uad6c\ud558\uae30": [28, 61], "\uacb0\uacfc\ub3c4": [28, 50, 58, 59, 60], "\ubcf4\uc5ec\uc8fc\uc5c8\ub294\ub370": 28, "lmd\uc77c\ub54c\uc640": 28, "\uc77c\ub54c\uac00": 28, "lmd\uc5d0\uc11c\ub294": 28, "attention\uae30\ubc18\uc758": 28, "\uad00\ub828\ud558\uc5ec": 28, "\uc815\ud655\ud558\uc9c0": [28, 59, 61], "sam\uc740": 28, "\ub9c8\uc2a4\ud06c\ub97c": 28, "\ub3c4\uc6c0\uc744": [28, 32, 39, 40], "\uc900\ub2e4": 28, "\uc81c\uac70\ud558\uba74": [28, 47], "lmd\uc5d0\uc11c": 28, "\ubc1c\uc0dd\ud558\uac8c": 28, "gligen": [28, 56], "\uac00\uc838\uc640": [28, 33, 38, 44], "\uc774\uc6a9\ud558\uae30": 28, "sam\uc774": 28, "\uc54a\uac8c\ub41c\ub2e4": 28, "\uc120\ud0dd\ud558\uc5ec": [28, 35, 48], "\ud63c\ub780\uc744": 28, "\uc77c\uc73c\ud0a4\uace0": 28, "\uc800\ud558\uc2dc\ud0a4\uac8c": 28, "stage2\uc5d0": 28, "\uac00\uc838\uc654\ub290\ub0d0\uc5d0": 28, "quantative\ud55c": 28, "lmd\uae30\ubc95\uc744": 28, "pretrain\ub41c": 28, "\uae30\ubc95\uc5d0": 28, "stage2\uc758": 28, "gligen\uc744": 28, "\uac00\uc838\uc628": 28, "gligen\uc5d0": 28, "gpt3": [28, 48], "5\ub300\uc2e0": 28, "gpt4\ub97c": 28, "\uacbd\uc6b0\ub85c": [28, 60], "gpt4\ub85c": 28, "\ubc14\uafbc": 28, "numeracy\uc5d0\uc11c\ub294": 28, "\uc0b4\uc9dd": 28, "\uac10\uc18c\ud558\uc9c0\ub9cc": [28, 46], "version": 28, "gpt\uac19\uc740": 28, "turbo\ub97c": 28, "gpt4\uc758": 28, "\ud574\ub3c4": 28, "7shots\uc73c\ub85c": 28, "\uc644\ubcbd\ud558\uac8c": [28, 32], "\ud574\ubcf8": 28, "gpt4\uac00": 28, "\ubcf4\uc600\uc74c\uc744": 28, "\ud06c\uae30\ub85c": [28, 39, 59], "\uc801\uc6a9\ud574\ubcf8\uacb0\uacfc": 28, "\uc798\uc774\ud574\ud560": 28, "intermedi": [8, 28, 45, 59], "representation\uc778": 28, "\ud2b9\uc9d5\uc774\ub77c\uace0": [28, 41], "stage\ub85c": [28, 40], "generation\uacfc": 28, "10752": 29, "compvi": 29, "namkyeong": [29, 62], "31": [29, 38, 43, 50], "\ud559\uc2b5\ud558\ub294\uc9c0": 29, "off\ub85c": [29, 49], "\ubd84\uc11d\ud560": 29, "analysi": 29, "2\ub2e8\uacc4\ub85c": 29, "autoencoder\ub97c": [29, 38], "\uc778\uac04\uc758": [29, 31], "\uc778\uc9c0\uc640": 29, "\ub4e4\uba74": 29, "\uace0\uc591\uc774": [29, 38, 47], "\uadc0": 29, "\uaf2c\ub9ac": 29, "\uc778\uc9c0\ud55c": 29, "\uc555\ucd95": [29, 33, 48], "\ub118\uc5b4\uac00\uba74": 29, "\ube44\ud2b8\ub97c": 29, "\uc368\ub3c4": 29, "\uc778\uc9c0\ud560": 29, "rate\uac00": 29, "\ub118\uc5b4\uac00\ub294": [29, 58], "\uc778\uc9c0": [29, 47], "\uc548\ub428": [29, 32], "\uc500": [29, 34], "1\ube44\ud2b8": 29, "\ud751": 29, "\ubc31": 29, "8\ube44\ud2b8": 29, "\uc0c9": 29, "\ud14d\uc2a4\uccd0": 29, "\uc0ac\uc774\uc988\uc758": [29, 31, 52], "\uc555\ucd95\ud558\uc5ec": [29, 44], "\uc904\uc784\uc73c\ub85c\uc368": 29, "\ud559\uc2b5\ud558\uc9c0": [29, 51], "\uc758\ubbf8\uc640": 29, "\ub9d0\ud568": 29, "\uc18d\uc758": 29, "\uac1c\uac00": 29, "\uc5b4\ub514\uc5d0": [29, 32, 34], "\uc704\uce58\ud558\ub294\uc9c0": 29, "\ubc30\uacbd\uc774": [29, 47], "\uc0c1\ud669\uc778\uc9c0": 29, "\ud559\uc2b5\ud568": [29, 32, 48], "\uc218\ud589\ud558\ub098": 29, "\ucd5c\uc801\ud654\ub098": 29, "\ucc3e\uae30": [29, 43], "\ubd88\uc548\uc815\uc131": 29, "\uc0dd\uc131\uc790": 29, "\ud310\ubcc4\uc790": 29, "\uc190\uc2e4\ud568\uc218": [29, 51, 60], "\uc720\ud615\uc758": [29, 35], "gan\ubcf4\ub2e4": [29, 48, 49], "quality\uac00": 29, "flow\ub294": 29, "\uac00\uc5ed\uc801\uc778": 29, "\ube44\uc120\ud615": 29, "\uc544\uc774\ub514\uc5b4": [29, 32, 36], "\ud568\uc218\ub4e4\uc744": 29, "\uc7a0\uc7ac\ubcc0\uc218\ub85c": 29, "\ubc18\ub300\ub85c": [29, 43], "\uc7a0\uc7ac\ubcc0\uc218\ub97c": [29, 60], "\uc5ed\ubcc0\ud658": [29, 34], "\uc591\ubc29\ud5a5": 29, "\ubcc0\ud658\uae30\ub97c": 29, "\uc7a0\uc7ac\ubcc0\uc218\ub85c\ubd80\ud130": 29, "\ucd94\uc815\uc5d0\uc11c": 29, "\ub2ec\uc131\ud558\ub098": 29, "\ube44\uc6a9\uacfc": 29, "\uc81c\uc57d\ub41c\ub2e4": 29, "\uc778\uc9c0\ud558\uae30": 29, "likelihood": [29, 48, 49, 51, 60], "\uc6a9\ub7c9\uacfc": 29, "\uc18c\ube44\ud55c\ub2e4": 29, "\ub4f1\uc5d0\uc11c": 29, "\ubcf4\uc774\ub098": [29, 39], "\ub2e8\uc704\uc5d0\uc11c\uc758": 29, "\ud3c9\uac00\uc640": 29, "speed\uc640": 29, "\uc720\ubc1c\ud55c\ub2e4": 29, "\uc591\uc790\ud654": 29, "quantis": 29, "\uc5f0\uc18d\ud615": 29, "\ubca1\ud130\ub97c": [29, 34, 35, 45], "\uc815\ud55c": 29, "codebook": [29, 33], "\ubca1\ud130\uc640\uc758": 29, "\uacc4\uc0b0\ud558\uc5ec": [29, 35], "\uc720\uc6a9\ud558\uba74\uc11c": 29, "vae\uc758": 29, "\uc6d0\ub9ac\ub97c": [29, 34], "\uc591\uc790\ud654\ud558\uc5ec": 29, "\ubcc0\ud658\ud558\uba74\uc11c": 29, "\uc624\ub298": [29, 41], "\uc54c\uc544\ubcfc": [29, 41, 53, 59], "model\uc774\ub2e4": [29, 40], "\ub2e4\ub918\ub358": [29, 41], "\uc18c\ubaa8\ub97c": 29, "\uc5bb\ub294\uac83\uc774": 29, "\ubaa9\ud45c\uc774\ub2e4": [29, 44], "\uc8fc\uc5b4\uc84c\uc744\ub54c": 29, "\ud1b5\ud574\uc11c": [29, 41, 45], "\ub514\ucf54\ub529\uc744": 29, "\ud14c\uc2a4\ud2b8\ub97c": 29, "\ucee4\uc9c0\uc9c0": 29, "divergence\uc640": 29, "\ud65c\uc6a9\ud558\uc600\ub2e4": [29, 39], "\uc774\ubbf8\uc9c0\uc678": 29, "\ud14d\uc2a4\ud2b8\ub098": 29, "semat": 29, "map\uacfc": 29, "\uc815\ubcf4\ub294": [29, 44, 55], "\uc804\ub2ec\uc744": 29, "\ud558\uc600\uace0": [29, 32, 42, 61], "phi_i": 29, "\uc815\uc758\ub418\uace0": [29, 58, 60], "\uc911\uac04\uc758": 29, "matrix\uc774\ub2e4": 29, "\ud574\ub2f9\ud558\uba70": 29, "\uc8fc\ubaa9\ud560\ub9cc\ud55c": [29, 54], "\uc9c4\ud589\uc2dc\ud0a4\ub294\ub370": 29, "\ubc14\uafb8\uba74\uc11c": 29, "\uc591\uc744": [29, 38], "\uc904\uc600\ub2e4\ub294": 29, "\uc5f0\uad00\uc9c0\uc73c\uba74": 29, "\uc0dd\uac01": [29, 43, 57], "varepsilon": [29, 54], "frequenc": [29, 34, 38, 48, 56, 61], "\uace0\ucc28\uc6d0": [29, 34, 56], "\uacf5\uac04\uacfc": 29, "\uc801\ud569\ud568": 29, "bit\uc5d0": 29, "\ucc28\uc6d0\uc5d0\uc11c": [29, 31, 44], "\ucc28\uc6d0": [29, 34, 61], "\uac1c\uc218\uc5d0": [29, 36, 48, 50], "resourc": [29, 44], "1\ub300\ub85c": 29, "\ucc28\uc6d0\uc5d0": [29, 44], "\uadf8\ub798\ud504\ub294": [29, 42], "\uc218\ub834\ud558\ub294\ub370": 29, "\uc18c\uc694\ub418\ub294": 29, "fidelti": 29, "\ub370\uc774\ud130\uc640": [29, 35, 42, 50, 51, 60], "\uc815\uccb4\ub41c": 29, "perceptual\uacfc": 29, "bit": 29, "ratio": [29, 38, 58, 61], "compression\uc740": 29, "\ubd80\ubd84\uc5d0\uc11c": [29, 50, 58], "\uc555\ucd95\uc774": 29, "\ubc1c\uc0dd\ud558\uc5ec": 29, "\uc81c\ud55c\ub428": 29, "hq\uc640": 29, "imagenet\uc5d0\uc11c": 29, "\ube44\uad50\ud568": 29, "imagenet\uc774": 29, "dataset\uc784": 29, "point\uac00": 29, "5\uac1c\uc778\ub370": 29, "\uc624\ub978\ucabd\ubd80\ud130": 29, "\uc67c\ucabd\uc73c\ub85c": 29, "outperform\uc744": 29, "\ubc1c\ud718\ud568": 29, "\ub290\ub9bc": 29, "ffhq": [29, 40, 41], "bedrooms\ub85c": 29, "manifold\uc758": 29, "coverag": 29, "\ucd5c\uace0": 29, "lsgm": 29, "adm\ubcf4\ub2e4": 29, "\uc808\ubc18\uc758": 29, "parameter\uc640": 29, "\uc5bb\uc74c": [29, 33, 36], "methods\uc744": 29, "\uac1c\uc120\ud574": 29, "cover": 29, "training\uc758": 29, "recall\uc744": 29, "45b": 29, "400m\uc73c\ub85c": 29, "bert": [29, 30], "layout\uc774": 29, "\uc5f0\uacb0\ud588\uc744": 29, "inpainting\uc5d0": 29, "landscap": 29, "384x384": 29, "openimages\ub85c": 29, "\ub4e4\uc5b4\uac10": [29, 32], "\ubb3c\uccb4\ub97c": [29, 39], "\ubcf5\uad6c\ub41c": 29, "effeci": 30, "\uace0\uc815\ud55c": 30, "\ucc44\ub85c": [30, 32], "fc": [30, 34], "\uc5f0\uc0b0\ub7c9\uc744": 30, "parameter\ub294": [30, 39, 42], "10000\ubc30": 30, "\uba54\ubaa8\ub9ac\ub294": 30, "3\ubc30\ub97c": 30, "latency\uac00": 30, "\ud29c\ub2dd\ud558\ub294": 30, "\ud30c\ub77c\ubbf8\ud130\ub9cc\uc744": 30, "\ud29c\ub2dd\ud568\uc73c\ub85c\uc368": 30, "\uc790\uc6d0\uc73c\ub85c\ub3c4": 30, "\ud558\ub294\uac83": 30, "upstream": 30, "\ud559\uc2b5\uc2dc\ud0a4\ub294\uac83": 30, "\uc694\uccad\uc758": 30, "\uc2dc\uc791\ubd80\ud130": 30, "\uc644\ub8cc\uae4c\uc9c0": 30, "\uac78\ub9ac\ub294": 30, "llm\uc740": 30, "\uc2dc\ud0b4": [30, 32, 42], "tuning\uc5d0\uc11c": 30, "\ud559\uc2b5\uc2dc\ud0a4\uba74": [30, 39, 45], "roberta": 30, "\ub2ec\uc774": 30, "\uac78\ub9bc": 30, "intrins": [30, 34], "\uae30\ubc18\ud558\uace0": 30, "\uc0ac\uc2e4\uc5d0": 30, "\uac16\uace0": 30, "\uac00\uc815\ud568": [30, 48], "matrices\ub97c": [30, 39], "\uc2dc\ud0a4\uae30\ub85c": 30, "decomposition\ub41c": 30, "\uc791\uc544": 30, "3\ubc30\uae4c\uc9c0": 30, "\ubc14\uafd4\uc8fc\uba74": 30, "switch": 30, "overhead\ub97c": 30, "\uae30\ubc95\ub4e4\uacfc": 30, "\uac00\ub2a5\ud558\ub2e4\ub294": [30, 54, 55], "w_o": 30, "accumulated\ub41c": 30, "convention\uc744": 30, "optimizer\ub294": 30, "adam\uc744": 30, "\uc774\uc6a9": [30, 31, 34], "feedforward": [30, 31], "ffn": 30, "agnostic\ud558\uc9c0\ub9cc": 30, "\uc9d1\uc911\ud568": [8, 30], "agnost": [30, 48], "\uad6c\uc560\ubc1b\uc9c0": 30, "\ud574\uc11d\uc774": 30, "parameterized\ub41c": 30, "y_i": 30, "target\uc30d\uc73c\ub85c": 30, "maximize\ud558\uae30": 30, "\uc5c5\ub370\uc774\ud2b8\ub428": 30, "\ud559\uc2b5\ud574": [30, 43, 44], "\uc5c4\uccad\ub09c": [30, 34, 58], "\uc804\uccb4\uac00": 30, "\uadf8\ubcf4\ub2e4": 30, "\ucc3e\uc544\ub0b4\ub294": 30, "\ubc14\ub00c\uae30": 30, "effecient\ud574\uc9d0": 30, "\uc791\uc544\uc9c8": 30, "\uae30\uc874\uc5d0\ub3c4": 30, "learning\uc5d0\uc11c": [30, 48], "effecient\ub97c": 30, "\uac00\uc9c0\uac00": 30, "hardwar": 30, "parellelism\uc774": 30, "\uc5c6\ub2e4\uba74": 30, "\ucd94\uac00\ud574\ub3c4": 30, "\uc99d\uac00\ud574": 30, "\uc5b4\ub824\uc6e0\uc74c": 30, "prefix": 30, "tuning\uc740": [30, 42, 43], "optimize\uac00": 30, "\uacf1\ud574\uc9c4": 30, "vector\ub07c\ub9ac": 30, "wise\ud558\uac8c": 30, "scaling\ub428": 30, "rate\ucc98\ub7fc": 30, "tuning\ud574\uc11c": 30, "r\uacfc": 30, "\uc0ac\uc6a9\ud55c\ub2e4\uace0": [30, 45, 55], "lora_a": 30, "new_zero": 30, "num_embed": 30, "lora_b": 30, "embedding_dim": 30, "lora_alpha": [30, 56], "requires_grad": [30, 51], "reset_paramet": 30, "hasattr": 30, "wai": [30, 56], "zeros_": 30, "normal_": [30, 60], "bool": 30, "merge_weight": 30, "sure": 30, "transpos": [30, 31], "mark": 30, "after_a": 30, "padding_idx": 30, "max_norm": 30, "norm_typ": 30, "scale_grad_by_freq": 30, "spars": [30, 48, 59], "\ud558\ub77d\uc774": 30, "\uacbd\uc6b0\uc5d4": 30, "\ucd5c\uc18c\ud654\ud558\uae30": [30, 34, 47], "\uace0\uc815\ud568": 30, "175b\ub97c": 30, "vram\uc740": 30, "2tb\uc5d0\uc11c": 30, "350gb": 30, "checkpoint": [30, 40, 56], "size\ub294": [30, 54], "350gb\uc5d0\uc11c": 30, "35mb\ub85c": 30, "\uc904\uc784": 30, "\ube68\ub77c\uc9d0": 30, "\uacbd\uc6b0\uc5d0\uc11c": 30, "accuraci": [30, 45, 57], "transformer\uc5d0\uc11c": [30, 48], "matrix\uc5d0": 30, "matrices\uc5d0": 30, "\uc88b\uc558\uc74c": 30, "\ub274\ub7f4\ub124\ud2b8\uc6cc\ud06c\uc758": 30, "activation\uc744": 30, "\uc904\uc774\uae30\ub3c4\ud558\uace0": 30, "\ub298\ub9ac\uae30\ub3c4\ud558\ub294": 30, "\uc911\uac04\uc5d0": 30, "\uc0bd\uc785\ud558\ub294": 30, "lora\ubcf4\ub2e4": 30, "\uc54c\ub824\uc838\uc788\uc73c\uba70": 30, "3\ub97c": 30, "\ud588\uc744\ub54c": 30, "\ubcf4\ub2e4\ub3c4": [30, 52], "\uc8fc\uc7a5\ud558\uace0": 30, "\ud559\uc2b5\uc2dc\uac04\ub3c4": 30, "\uc9e7\uc544": 30, "30\ubd84\ub9cc\uc5d0": 30, "\ud29c\ub2dd\ud560": [30, 31], "loralib": 30, "\uc124\uce58": 30, "pip": 30, "instal": 30, "altern": [30, 51], "git": 30, "microsoft": 30, "befor": 30, "in_featur": 30, "out_featur": 30, "after": 30, "add": [30, 36, 50, 56], "parameter\ub9cc": 30, "bigmodel": 30, "string": 30, "lora_": 30, "mark_only_lora_as_train": 30, "loop": [30, 59], "dataload": [30, 51], "\uc800\uc7a5\ud560": 30, "\ub54c\uc5d4": 30, "state_dict": 30, "\uc800\uc7a5\ud558\uac8c": 30, "save": 30, "checkpoint_path": 30, "lora_state_dict": 30, "\ubd88\ub7ec\uc62c": 30, "load_state_dict": 30, "strict": 30, "load": [30, 38, 50], "ckpt_pretrain": 30, "pt": [30, 48], "ckpt_lora": 30, "\ud29c\ub2dd": [30, 44], "gpu\ub85c": [30, 40], "\uac00\ub2a5\ud560\uae4c": [30, 42], "\uc18c\uac1c\ud569\ub2c8\ub2e4": [30, 42, 46, 50, 53, 55, 59, 60, 61], "da": 30, "nhctrrve": 30, "14792": 31, "26": [20, 31, 34, 36, 46, 60], "lucidrain": 31, "\ud558\uc600\uc74c": [31, 32], "\ubc29\ub300\ud558\ub2e4\ub294": 31, "\uc720\uc9c0\ud568": 31, "tensor\ub97c": 31, "\ubd84\ud574\ud558\uc5ec": 31, "\uadfc\uc0ac\ud654": 31, "\uc5b4\ud50c\ub9ac\ucf00\uc774\uc158\uc5d0": 31, "pipeline\uc744": 31, "\uc778\ud130\ub137\uc744": 31, "\ud655\ubcf4\ub420": 31, "\uc218\uc9d1\ud558\uae30\ub294": 31, "\uc874\uc7ac\ud558\ub294\ub370": 31, "\ub0ad\ube44\uc77c": 31, "\ube44\uc9c0\ub3c4": 31, "\ub808\uc774\ube14\uc774": [31, 35], "\uc9c0\uc815\ub418\uc9c0": 31, "\ud398\uc5b4\ub9c1\ub41c": 31, "\ube44\ub514\uc624\ub9cc\uc73c\ub85c": 31, "\uc138\uc0c1\uc758": 31, "\uac1c\uccb4\uac00": 31, "\uc6c0\uc9c1\uc774\uace0": 31, "\uc791\uc6a9\ud558\ub294\uc9c0": 31, "t2v\ub85c": 31, "\ud544\uc694\uc131\uc744": 31, "\uc6b0\ud68c\ud55c\ub2e4": 31, "\uace0\ud504\ub808\uc784\ub960": 31, "\uc2dc\uc2a4\ud15c\uacfc": [31, 35], "\ud3c9\uac00\ud55c\ub2e4": 31, "\uc81c\ub85c\uc0f7": 31, "300\uac1c\uc758": 31, "\uc218\uc9d1\ud558\uc5ec": 31, "\uacf5\uac1c\ud560": 31, "\uacc4\ud68d\uc774\ub2e4": 31, "\uc2e0\uacbd\ub9dd\uc758": 31, "\ucc28\uc6d0\uc73c\ub85c": 31, "\uc2dc\uacf5\uac04": 31, "\uc2e0\uacbd\ub9dd\uacfc": 31, "\uc218\uc2dd": [31, 39, 60], "sr_h": 31, "sr": [31, 52], "t_l": 31, "uparrow_": 31, "bpe": [31, 48], "c_x": [31, 56], "\uacf5\uc720\ud558\uc600\ub2e4": 31, "x_e": 31, "y_e": 31, "_l": 31, "sr_l": 31, "d\uc5d0\uc11c": 31, "768x768": [31, 54], "\ud53d\uc140\ub85c": 31, "\uc99d\uac00\uc2dc\ucf1c": 31, "\ubcc0\ud658\ub41c\ub2e4": 31, "fp": [31, 44], "2\ucc28\uc6d0": [31, 34], "\uc218\uc815\ud55c\ub2e4": 31, "layers\ub294": 31, "\uc815\ubcf4\ub9cc": 31, "\ucd94\uac00\ud574\uc8fc\uba74": 31, "\ub9cc\ub4e4\uac8c": 31, "16\uac1c\uc758": 31, "\uc0ac\uc774\ub97c": 31, "\ubcf4\uac04\ud558\uc5ec": 31, "\uc99d\uac00\uc2dc\ud0a8\ub2e4": 31, "\ub124\ud2b8\uc6cc\ud06c\uc5d0\ub294": 31, "\ud658\uac01": 31, "\ud3ec\ud568": [31, 33, 50], "\uc794\uc0c1\uc774": 31, "\uc0dd\uae30\uc9c0": [31, 47], "\uc54a\uc73c\ub824\uba74": 31, "\ud658\uac01\uc774": 31, "\uc804\uccb4\uc5d0": [31, 34, 60], "\uc720\uc9c0\ud574\uc57c": 31, "\uc815\ubcf4\ub098": 31, "\uac00\uc0c1\uc73c\ub85c": 31, "\uc218\ud589\ud558\ub294": 31, "\ubaa8\ub4c8\uc778": 31, "\ubaa8\ub4c8\ub85c": 31, "\ub9cc\ub4e4\uae30\uc5d4": 31, "\uba54\ubaa8\ub9ac": [31, 44, 54], "\ubd80\uc871\uc73c\ub85c": 31, "\uc5b4\ub824\uc6e0\ub2e4": [31, 39], "\ucd08\uae30\ud654\ub97c": 31, "\uc804\ubc18\uc5d0": [31, 35], "\ud658\uac01\uc744": 31, "\ucee8\ubc8c\ub8e8\uc158": 31, "\ucee8\ubc8c\ub8e8\uc158\uc744": 31, "\uc313\ub294\ub2e4": 31, "\ucee8\ubc8c\ub8e8\uc158\uc758": 31, "load\ub97c": 31, "\ucee8\ubcfc\ub8e8\uc158": 31, "\ub808\uc774\uc5b4\uc640": 31, "\uc0ac\uc774\uc5d0": [31, 54, 57], "\uacbd\uacc4\ub97c": 31, "\ucc44": [31, 50, 55, 57], "\ud150\uc11c": 31, "height": 31, "2_d": 31, "1_d": 31, "\ud568\uc218\ub85c": [31, 34, 46], "\ud655\uc7a5\ud558\uc600\ub2e4": 31, "layer\ucc98\ub7fc": 31, "attenion": 31, "\uc313\uc544": 31, "\uadfc\uc0ac\ud654\ud558\ub294": [31, 60], "flatten": 31, "\ucd95\uc5d0": [31, 38], "flatten\ud558\ub294": 31, "\uc5f0\uc0b0": [31, 48, 61], "hw": 31, "attn_": 31, "spatiotemporalattent": 31, "add_feed_forward": 31, "ff_mult": 31, "pos_bia": 31, "flash": 31, "causal_time_attn": 31, "assert": [31, 41, 45], "spatial_attn": 31, "spatial_rel_pos_bia": 31, "continuouspositionbia": 31, "num_dim": 31, "temporal_attn": 31, "causal": [31, 48], "temporal_rel_pos_bia": 31, "has_feed_forward": 31, "ff": [31, 56], "mult": 31, "enable_tim": 31, "is_video": 31, "ndim": 31, "bxf": 31, "hxw": 31, "space_rel_pos_bia": 31, "rel_pos_bia": 31, "bxhxw": 31, "time_rel_pos_bia": 31, "\ub780": [31, 44], "\ubd80\ub4dc\ub7fd\uac8c": 31, "\ub9cc\ub4e4\uace0": [31, 42, 43], "\uc5f0\uc7a5": 31, "\ubcf4\uac04\ud558\uace0": 31, "extrapolation\uc744": 31, "extrapol": 31, "\ubbf8\ub798\uc758": 31, "\uc608\uce21\ud558\uac70\ub098": 31, "spatialtempor": 31, "\uc81c\ub85c": 31, "\ud328\ub529\ud558\uace0": 31, "\uc5c5\uc0d8\ud50c\ub9c1\uc744": 31, "interpolation\uc744": 31, "\ud29c\ub2dd\ud55c\ub2e4": 31, "\uc785\ub825\uc5d0": 31, "\ub9c8\uc2a4\ud0b9": 31, "\ub9c8\uc2a4\ud0b9\ub418\ub294": 31, "\ubc14\uc774\ub108\ub9ac": 31, "skips\uacfc": 31, "rate\ub97c": [31, 43], "f\ub97c": 31, "5\ub85c": [31, 36], "16\ud504\ub808\uc784": 31, "76\ud504\ub808\uc784": 31, "x5": 31, "\uc5c5\uc0d8\ud50c\ub9c1": [31, 35], "\ub05d": 31, "\ub9c8\uc2a4\ud0b9\ud558\uc5ec": 31, "\uc560\ub2c8\uba54\uc774\uc158\uc5d0\ub3c4": 31, "\uc694\uc18c\ub4e4\uc740": [31, 36], "\ud29c\ub2dd\ud558\uc9c0": 31, "\ub9cc\uc73c\ub85c": [31, 46, 53, 54, 56], "decoder\ub294": 31, "\uc911\uc5d0": [31, 32, 35, 48], "\ub4e4\uc5b4\uc628": 31, "\ubc1b\ub294\ub2e4": [31, 35], "\ub05d\ub098\uba74": [31, 36], "\ucd08\uae30\ud654\ud558\uc5ec": [31, 35], "16\ud504\ub808\uc784\uc774": 31, "\ubc94\uc704": [31, 52, 53, 58], "\uc2dc\uc791\ud558\uace0": [31, 58], "\uc804\ud658\ud55c\ub2e4": [31, 44], "\ub124\ud2b8\uc6cc\ud06c\ub294": 31, "\ub514\ucf54\ub354\ub85c\ubd80\ud130": 31, "\ud29c\ub2dd\ub41c\ub2e4": 31, "5b": [31, 40, 45, 54], "3b\uc758": 31, "nsfw": 31, "\uc720\ud574\ud55c": [31, 32], "5\ubcf4\ub2e4": 31, "\ud544\ud130\ub9c1\ud558\uc600\ub2e4": 31, "Not": 31, "safe": [31, 45], "For": [31, 34], "\uc120\uc815\uc801\uc774\uac70\ub098": 31, "\uc74c\ub780\ud558\uac70\ub098": 31, "\ud3ed\ub825\uc801\uc778": 31, "\ucf58\ud150\uce20": 31, "10m\uacfc": 31, "hd": 31, "vila": 31, "100m": 31, "10m\uc744": 31, "100m\uc744": 31, "\uc561\uc158": 31, "\uace0\uc548\ub418\uc5c8\uc73c\uba70": 31, "\ud658\uacbd\uc5d0\uc11c": 31, "\ube44\ub514\uc624\uc640": 31, "\ud074\ub798\uc2a4\uc5d0": [31, 45], "\ud15c\ud50c\ub9bf": 31, "\uc791\uc131\ud558\uace0": 31, "fretchet": 31, "\uce21\uc815\ud55c\ub2e4": 31, "train\uc14b\uacfc": 31, "\uc138\ud2b8\uc758": 31, "59": 31, "794": 31, "\ucea1\uc158\uc5d0": 31, "clipsim": 31, "amazon": 31, "turk": 31, "amt": [31, 47], "\uc218\uc9d1\ud558\uc600\ub2e4": 31, "annotator\ub4e4\uc5d0\uac8c": 31, "\uc2dc\uc2a4\ud15c\uc774": 31, "\uc2f6\uc740\uc9c0": 31, "\ubb3c\uc5b4\ubd24\ub2e4": 31, "\ubd88\uc644\uc804\ud558\uac70\ub098": 31, "\ucd94\uc0c1\uc801\uc774\uac70\ub098": 31, "\ubd88\ucf8c\uac10\uc744": 31, "\ud544\ud130\ub9c1": [31, 32], "\uce74\ud14c\uace0\ub9ac": [31, 32, 35], "\ub3d9\ubb3c": [31, 45], "\ud310\ud0c0\uc9c0": 31, "\uc790\uc5f0": [31, 43], "\ud48d\uacbd": [31, 38], "\uc74c\uc2dd": 31, "\uc74c\ub8cc": 31, "\uc2dd\ubcc4\ud558\uace0": 31, "\uc120\ud0dd\ud558\uc600\ub2e4": 31, "\ub370\uc5d0": 31, "\uc0ac\uc6a9\ub418\uc9c0": 31, "\uc720\uc9c0\ud588\ub2e4": 31, "drawbench": 31, "\ud504\ub86c\ud504\ud2b8\ub3c4": 31, "vedio": 31, "faithfulness\ub97c": 31, "\ud3c9\uac00\ud558\uc600\ub2e4": [31, 35], "\uc21c\uc11c\ub85c": [8, 31], "\uc88b\uc740\uc9c0": 31, "annotator\uc5d0\uac8c": 31, "\ubb3c\uc5b4\ubcf8\ub2e4": 31, "vdeio": 31, "\ubcf4\uac04": 31, "film\uc758": 31, "\uc0ac\uc2e4\uac10\uc744": 31, "\ube44\uad50\ud558\uae30": [31, 43], "\ud3c9\uac00\ub3c4": 31, "5\uba85\uc758": 31, "\uac01\uae30": 31, "annotator\uc758": 31, "\ub4dd\ud45c\ub97c": 31, "vtt\uc5d0": 31, "\ubcf4\uace0\ud558\ub294": 31, "godiva": 31, "nuwa": 31, "\uc911\uad6d\uc5b4\uc640": 31, "\uc601\uc5b4\ub97c": 31, "cogvideo": 31, "\ucd94\ub860\uc744": [31, 45], "\uc218\ud589\ud558\uc600\ub2e4": [31, 35], "\uc0f7": 31, "\uc6b0\uc218\ud558\ub2e4": 31, "finetunning\uc744": 31, "\uacb0\uacfc\uc5d0\uc11c\ub3c4": 31, "\ub2ec\uc131\ud558\uc600\ub2e4": 31, "drawbench\uc640": 31, "\ud14c\uc2a4\ud2b8\uc14b\uc5d0": 31, "cogvideo\uc640": 31, "vdm\uc758": 31, "\uc6f9": 31, "\ud398\uc774\uc9c0\uc5d0": 31, "\ud45c\uc2dc\ub41c": [31, 42], "28\uac1c\uc758": 31, "8\ubc88": 31, "\ud3c9\uac00\ud558\uc5ec": 31, "76x256x256": 31, "\ud3c9\uac00\uc790\uac00": 31, "\ub0ab\ub2e4\uace0": 31, "\ud22c\ud45c\ud55c": 31, "\ud37c\uc13c\ud2b8": 31, "\ube44\uc728": [31, 38], "video\uac00": 31, "film\uc744": 31, "drawbench\uc758": 31, "\uc800\ud504\ub808\uc784\ub960": 31, "4fps\uae4c\uc9c0": 31, "\uc5c5\uc0d8\ud50c\ub9c1\ud55c\ub2e4": 31, "\ud3c9\uac00\uc790\ub4e4\uc740": 31, "62": 31, "drawbench\uc5d0": 31, "54": [31, 39], "\ucee4\uc11c": [31, 40, 47], "\ubb3c\uccb4\uac00": [31, 34, 36], "\uc6c0\uc9c1\uc774\ub294\uc9c0\uc5d0": 31, "\uc9c0\uc2dd\uc774": 31, "\ub9e8": 31, "vdm": 31, "\ubaa8\uc158\uc758": 31, "extrpol": 31, "\uac1c\uc778\ud654\ud558\uace0": 31, "film": 31, "\uc804\ud658\ud558\uae30\ub9cc": 31, "\uc758\ubbf8\ub860\uc801\uc73c\ub85c": 31, "\ub9cc\ub4ec": 31, "\uc138\uacc4\ub85c\ubd80\ud130": 31, "intelligence\ucc98\ub7fc": 31, "system\ub3c4": 31, "\ubaa8\ubc29\ud560": [31, 32], "\ucc3d\uc758\uc801\uc774\uace0": 31, "\uc720\uc6a9\ud560": 31, "\uc5f0\uad6c\uc790\ub4e4\uc740": 31, "\ub3d9\uc601\uc0c1\uc5d0\uc11c": 31, "\uc138\uacc4\uc758": 31, "dynamic\uc744": 31, "\uadf9\ubcf5\ud560": [31, 43], "2406": 32, "07547": 32, "chanyeong": 32, "shin": 32, "\ubcc0\ud574\uc57c": 32, "\ud560\uc9c0\uc5d0": 32, "\ubc14\ub00c\uc5b4\uc57c": 32, "\uac00\uc838\uc624\ub294": 32, "\uba85\uba85\ud55c": 32, "\uc55e\uc73c\ub85c\uc758": 32, "\ubd84\uc57c": [32, 36], "\uc2dc\ub098\ub9ac\uc624\ub97c": 32, "\ub9cc\uc871\ud558\uba74\uc11c": 32, "\uc218\ud589\ub418\uc5b4\uc57c": 32, "\ud588\uae30\uc5d0": 32, "\ud480\uace0": 32, "\ub123\uc5b4\uc8fc\uace0": 32, "\uc774\uac74": 32, "\uacf5\ud1b5": 32, "\ud558\ub098\ub85c\ub9cc": 32, "\ud588\ub294\ub370": [32, 54], "\ubf51\uc544\ub0b4\uae30\uc5d0\ub294": 32, "my": 32, "thought": 32, "refin": 32, "\ub9cc\ub4e4\uc5b4\uc8fc\ub294": 32, "\uc678\uc5d0\ub294": 32, "\uc2e4\ubb34\uc5d0\uc11c": 32, "\uc0ac\uc6a9\ud558\uc9c0\ub294": 32, "\uc54a\uc558\ub358": [32, 42], "\ud558\uc600\ub294\ub370": [32, 42], "\uc544\ubb34\ub798\ub3c4": [32, 33], "individu": [32, 56], "insert": 32, "\uc791\uc5c5\ucc98\ub7fc": 32, "\ub290\uaef4": 32, "\uc5b4\ub824\uc6cc": [32, 51], "shoe": 32, "sole": 32, "\ub85c\uace0\ub098": 32, "pattern": [32, 56, 57], "area": 32, "\ud544\uc694\ud588\uc74c": 32, "\uc5b4\uc6b0\ub7ec\uc9c0\uac8c": 32, "\uace0\ub824\ud574\uc57c": 32, "\ubaa8\uc591\ub3c4": 32, "\ub2ec\ub77c\uc11c": 32, "\ud480\uc5b4\uc57c": 32, "\ud574\ub2f9\ud558\ub294\uc9c0": 32, "\uc790\ub3d9\uc73c\ub85c": 32, "\ucc3e\uace0": 32, "supervis": [32, 38, 47, 55], "\ubf51\uc544\uc11c": 32, "\ub2f9\uc5f0\ud788": 32, "\ucd94\ucd9c\ud588\uae30": 32, "\ud1b5\uacfc": 32, "inject": [32, 46, 62], "\uad6c\ub3c4": 32, "\ub2ec\ub77c\ub3c4": 32, "\ubcc0\ud658\uc774": [32, 59], "\ubcf4\uc600\uc73c\uba70": [32, 42], "\ud655\uc778\ud558\uc600\uc74c": 32, "comprehens": 32, "part": [32, 50], "\uad6c\uc131\uc774": [32, 51], "\ub418\uc5c8\ub294\uac00": 32, "\ub290\ub08c\uc744": [32, 36], "\ubcc0\ud658\ub410\ub294\uac00": 32, "\uc2dc\ud0a4\uace0": [32, 42], "\uc8fc\ub3c4\ub85d": 32, "\ub123\uc74c": 32, "\uc704\uce58\ud558\ub294": 32, "repaint": 32, "\ucc44\uc6b0\ub294": 32, "blend": [32, 37], "\ud658\uacbd\uc758": 32, "\uc2dc\uc810\uc73c\ub85c": 32, "branch": 32, "\uc2e4\uc9c8\uc801\uc73c\ub85c\ub294": 32, "indic": 32, "\ub9cc\ub4e4\ub3c4\ub85d": 32, "\uc99d\uac00\uc2dc\ud0a4\uae30": 32, "\uc720\uc9c0\ub410\ub294\uc9c0": 32, "\ud65c\uc6a9\ud560\uc9c0": 32, "\ub9d0\uc9c0\ub97c": 32, "\uacb0\uc815\ud560": 32, "binari": [32, 51], "\ub098\uc640\uc788\uc9c0": 32, "\ub4e4\uc774": [32, 46, 50, 56], "\ucd5c\uadfc\uc5d0": [32, 50, 51, 52], "\uc99d\uba85\ud558\uc600\uc74c": 32, "upper": 32, "\uc644\uc131\uc2dc\ud0b4": 32, "anyth": 32, "unmask": [32, 33], "\ubf51\ub3c4\ub85d": 32, "\uc790\uccb4\ub294": [32, 35], "projector": 32, "drop": 32, "\uc544\ub9c8": [32, 43, 52], "\uc77c\ub4ef": 32, "\ubaa8\ubc29\ud574\uc624\ub294": 32, "\uc815\ub3c4\ub85c": 32, "\uc774\ud574\ud558\uba74": [32, 42], "\ubaa8\uc544\uc11c": 32, "\uad6c\ucd95\ud558\ub294": 32, "\uac00\uc9c0\uc758": [32, 50], "\ucca0\ud559\uc744": 32, "\uc9c0\ud0a4\ub824": 32, "\ub4e4": [32, 46], "\uc874\uc7ac\ud574\uc57c": 32, "\uae30\ub300\ud560": 32, "2\uac1c\ub97c": 32, "\ubf51\uc558\uc74c": 32, "\uce21\uc815\ud588\uace0": 32, "\ud06c\uac70\ub098": 32, "filter": [32, 45], "\uc2ec\uc9c0\uc5b4\ub294": 32, "\uac04\ub2e8\ud558\uac8c\ub294": 32, "\ubc29\ubc95\uc778\ub370": 32, "\uac00\uc838\uac00\uba74": 32, "easi": 32, "portion": 32, "\ucc28\uc9c0\ud55c\ub2e4\ub294": 32, "\uac83\ub4e4\uc740": 32, "\uacc4\uc18d": [32, 37, 58], "\ubc18\ubcf5\ub418\ub294": 32, "sift": 32, "\ud558\ub3c4\ub85d": [32, 58], "\uc26c\uc6e0\uae30": 32, "\uc7a5\uc744": 32, "\uc2dc\ud0a8": [32, 50], "seg": 32, "\ubc29\uc2dd\uc73c\ub85c\ub3c4": 32, "\uc99d\uac00\uc2dc\ud0a4\ub294": [32, 33, 42, 46], "\ubd88\ub7ec\uc77c\uc73c\ud0b4": 32, "id": [32, 43], "track": 32, "scenario": 32, "topic": 32, "\uc7a5\uc529": 32, "\ubaa8\uc558\uc74c": 32, "annot": 32, "\ub178\uac00\ub2e4": 32, "dino": [32, 50, 56], "\uacc4\uc0b0\ud558\ub3c4\ub85d": 32, "report": 32, "\ud574\ub193\uc74c": [32, 52], "pari": 32, "\ud588\ub294\uc9c0\uc5d0": 32, "\ucc3e\ub3c4\ub85d": 32, "\uac00\uc838\uc624\ub294\uc9c0\ub97c": 32, "\ud310\ub2e8\ud558\ub3c4\ub85d": 32, "\ub9de\ucd94\ub3c4\ub85d": 32, "adam": [32, 34, 40, 42, 47, 59], "lr": [32, 34], "75": [32, 42], "\uace0\ub974\ub3c4\ub85d": 32, "pexel": 32, "websit": 32, "70": 32, "But": 32, "\uc791\uac70\ub098": 32, "candid": [8, 32], "\uc2e4\ud328\ud558\ub294": [32, 39, 47], "\uc0dd\uae30\uae34": 32, "\uc774\ub7f4": 32, "\ucd94\ucc9c\ud55c\ub2e4\uace0": 32, "\uace0\uc548\ub418\uc5b4\uc57c": 32, "\uc0c9\uae54\uc774": 32, "\uc2e4\ubb34\uc5d0": 32, "\ud398\uc774\ud37c\ub2e4": 32, "\uc81c\uc548\ud558\uba74\uc11c": 32, "\uc5f4\ub824\uace0": 32, "\uc778\uc0c1": 32, "\uae4a\uc5c8\ub2e4": 32, "06": [32, 33, 53], "\ub530\ub048\ub530\ub048\ud55c\ub370": 32, "\uc37c\ub294\uc9c0": 32, "\uc774\ud574\uac00": 32, "\ub410\ub358": 32, "\uc368\ubd24\uc73c\uba74": 32, "\uc88b\uc558\uc744": [32, 42], "2301": 33, "00704": 33, "mar": 33, "\uc8fc\uc5b4\uc9c0\uace0": 33, "\uc790\uc138": 33, "900m": 33, "cc3m": 33, "3b": 33, "outpaint": 33, "22": [33, 34, 48], "maskgit": 33, "googl": [33, 53, 58], "\uc608\uce21\ud558\uc9c0\ub9cc": 33, "\uc2e0\ub8b0\ub3c4\uac00": 33, "\ud1a0\ud070\ub9cc": 33, "\ub514\ucf54\ub529\ub428": 33, "\uc904\uc5ec": [33, 54], "\uc778\ucf54\ub529\ub418\uace0": 33, "\ub514\ucf54\ub529\ub418\uc5b4": 33, "\ubcf5\uc6d0\ub418\ub294": [33, 47], "16x16": [33, 49], "\ub300\ubd80\ubd84\uc774": 33, "\ud30c\ub77c\ubbf8\ud130\ub85c": [33, 38, 54], "unmak": 33, "t5xxl": 33, "\ubc14\uafb8\ub294\ub370": [33, 43], "noun": [33, 50], "action": 33, "verb": 33, "adject": 33, "preposit": 33, "\uac83\uc774\ub77c\uace0": [33, 36, 42], "\uc120\ud589": 33, "4096": [33, 34, 35], "\uc785\ub825\ub418\uac8c": 33, "\ub9e4\ud551\uc744": [33, 47], "\ub514\ucf54\ub529\uc774": 33, "\uc778\ucf54\ub529\ud560": 33, "tame": 33, "\ud1a0\ud070\uc774": [33, 35], "\ubb34\uc2dc\ud558\uba74\uc11c": 33, "\ud568\uc744": [33, 34], "entropi": 33, "\uc788\uac8c\ub428": 33, "\ud1a0\ud070\uc740": [33, 43, 48], "\uad50\uccb4": 33, "hidden": 33, "\uc0ac\uc774\uc988\uc5d0": 33, "\ubcc0\uacbd\ud558\ub294\ub370": 33, "\uc624\ucc28\ub97c": [33, 34], "\uacc4\uc0b0\ud568": 33, "tokens\ub97c": 33, "\uc99d\uac00\ud558\uae30": 33, "\uad6c\uc131\ud588\uc744": 33, "\ud3ec\ucee4\uc2f1": 33, "\uc9c4\ud589\ub428": 33, "\uacc4\uce35\uc801\uc73c\ub85c": [8, 33], "\uc124\uacc4\ud588\uc74c": 33, "\uc644\ub8cc\ub418\uba74": 33, "\ub298\ub9bc": 33, "4\uac1c": [33, 55], "\ud45c\uc9c0\ud310\uc774": 33, "\ub410\uc74c": 33, "ell_g": 33, "ell_c": 33, "ell_u": 33, "l_c": 33, "l_u": 33, "\ud6c4\ubc18\uc5d0\ub294": 33, "\uc8fc\uac8c": [33, 42, 47], "\ub85c\ub3c4": [33, 60], "foward": 33, "\uc5f0\uc0b0\uc73c\ub85c": 33, "condition": 33, "independ": 33, "\uc218\ud589\ub428": 33, "\uc608\uce21\ub418\ub294": 33, "\uc120\ud0dd\ud574": 33, "\ud574\uc81c\ub418\ub294": 33, "rich": [33, 36, 53], "\uc218\ubc31\ubc88\uc758": 33, "460m": 33, "week": 33, "tpu": 33, "v4": 33, "adafactor": [33, 42], "cardin": 33, "\ud68c\uc804\ub41c": 33, "\ud004\ub9ac\ud2f0": [33, 36, 39, 44, 53], "prompt\uc640\uc758": 33, "\uce21\uc815\ud588\uc74c": 33, "\uac00\uc838\uc62c": 33, "\uc788\ub098\uc694": 33, "\ubaa8\ub378\uc774\ub77c": 33, "\ubaa8\ub378\uc778\uac00\uc694": 33, "\uae30\uc900\uc810\uc774": 33, "\uc5b4\ub5bb\ub0d0\uc5d0": 33, "\uc815\ud558\uae30": 33, "vqgan\uc744": 33, "gan\uc774\ub77c\uace0": 33, "\uc0dd\uac01\ud560": 33, "\ub2ec\ub77c\uc9c8": [33, 41], "\uad00\uc810\uc5d0\uc11c": [33, 35, 54], "\uc544\ub2c8\ub2e4\ub77c\uace0": 33, "\ub9d0\ud560": [33, 43], "\uac16\ub098\uc694": 33, "vqgan\uc5d0\uc11c": 33, "codebook\uc758": 33, "\uad6c\uc131\ud558\ub294\ub370\uc694": 33, "token\uc774\ub77c\uace0": 33, "\uc0dd\uac01\ud558\uc2dc\uba74": 33, "\ub123\uc5c8\uc744\ub54c": 33, "\uc774\ub904\uc9c0\ub098\uc694": 33, "inference\uc5d0\uc11c\ub294": 33, "\uc5c6\ub294\ub370": 33, "token\ub300\uc2e0": 33, "\ub4e4\uc5b4\uac00\uac8c": [33, 41, 53, 58], "\ub418\ub098\uc694": 33, "\ub9c8\uc2a4\ud06c\ub41c": 33, "\uc218\ud589\ub429\ub2c8\ub2e4": 33, "\uc218\uc2dd\uc5d0": 33, "\ub4e4\uc5b4\uac00\ub098\uc694": 33, "value\ub85c": 33, "\uc785\ub825\ub418\uc5b4": 33, "\uc218\ud589\ub418\uac8c": 33, "\uadf8\ub807\uac8c": [33, 42, 43], "gt\uc758": 33, "\ub07c\ub9ac": 33, "2003": 34, "08934": 34, "bmild": 34, "\uad6c\uc131\ud558\uc5ec": 34, "\uc800\uc7a5": 34, "\uc6a9\ub7c9\uc774": [34, 59], "\uad6c\uc131\ud558\uc9c0": 34, "synthes": [34, 39, 54, 55], "\uc88c\ud45c\ub97c": [34, 39], "\ubc00\ub3c4": [34, 37], "\uac01\ub3c4\ub85c": 34, "\ucc0d\uc740": [34, 47], "\uc77c\ubd80\uc758": 34, "\uac01\ub3c4\uc758": 34, "\uc720\ucd94\ud558\ub294": 34, "\ud55c\uc815\ub41c": 34, "contin": 34, "\ubc14\ub77c\ubcf4\ub294": 34, "\ubc29\ud5a5\uc5d0": [34, 47], "\uad11\uc120\uc744": 34, "\uace0\uc804\uc801": 34, "\uc678\ud615\uc744": 34, "keyword": 34, "08934v2": 34, "\ucea1\ucc98\ub41c": 34, "\ud568\uc218\uc758": [34, 37], "\ubd84\uc57c\uc758": 34, "\ud574\uacb0\ud568": [34, 48, 57], "\uc815\uc801": 34, "regress": [34, 35, 39], "coord": 34, "\uc0c1\uc758": [34, 47], "\uc9c0\uc810": [34, 39, 46], "\ubc29\ucd9c\ub41c": 34, "\ub204\uc801\uac12\uc744": 34, "\ud835\udc65": 34, "\ud835\udc66": 34, "\ud835\udc67": 34, "\ud1b5\uacfc\ud558\ub294": 34, "\uad11\uc120\uc5d0": 34, "\ub204\uc801\ub418\ub294\uc9c0\ub97c": 34, "\uc2dc\uc810\uc73c\ub85c\ubd80\ud130\uc758": 34, "\uc774\ub3d9\ud558\uc5ec": 34, "\ud3ec\uc778\ud2b8\ub4e4\uacfc": 34, "\uc2e0\uacbd\ub9dd\uc5d0": 34, "descent": [34, 46, 51], "\uc624\ucc28": 34, "\ucd5c\uc18c\ud654\ub97c": 34, "cotent": 34, "\ud560\ub2f9\ud558\uc5ec": [34, 35], "basic": [8, 34], "implementation\uc758": 34, "\ub300\uc548": 34, "\ud45c\ud604\uc73c\ub85c": [34, 36], "\uc218\ub834\ub418\uc9c0": 34, "mlp\uac00": 34, "\uc8fc\ud30c\uc218\uc758": 34, "\uad11\uc120\ub2f9": 34, "\uc694\uad6c\ub418\ub294": 34, "\ube44\ud6a8\uc728\uc801": [34, 48], "\uacc4\uce35\uc801": 34, "\uace0\uc8fc\ud30c\uc218\uc758": 34, "\uc801\uc808\ud558\uac8c": 34, "\uc0d8\ud50c\ub9c1\ud558\uae30": [34, 35], "\uac10\uc18c\uc2dc\ud0b4": 34, "\uc0c1\uc18d": 34, "\uc2e4\uc138\uacc4\uc758": 34, "\ud615\ud0dc\uc640": 34, "\ud22c\uc601\ub41c": 34, "\uc801\ud569": 34, "\uace0\ud574\uc0c1\ub3c4\uc5d0\uc11c": 34, "\ubaa8\ub378\ub9c1\ud560": 34, "\uc774\uc0b0\ud654\ub41c": 34, "\ubcf5\uc140": 34, "\uadf8\ub9ac\ub4dc\uc758": 34, "3\ucc28\uc6d0": [34, 36], "\uccb4\uc801\uc758": 34, "\ub2e8\uc704": 34, "2\ucc28\uc6d0\uc758": 34, "\uacbd\uc6b0\uc5d0\uc120": 34, "pix": 34, "\uc815\ubcf4\uc640": [34, 35, 40], "\ud22c\uacfc\uc131": 34, "volumn": 34, "\uccb4\uc801": 34, "ct": [34, 46], "mri": 34, "technic": 34, "\uae30\ud558\ud559\uacfc": 34, "\uc18c\uc7ac\ub97c": 34, "5\ucc28\uc6d0": 34, "\ub9e4\uac1c\ubcc0\uc218\ud654": 34, "\uace0\uc804\uc801\uc778": 34, "capac": [34, 56], "\uc2dc\uac01\uc801\uc778": [34, 38, 43], "\ub0b4\uc6a9\uc774": 34, "\uacf5\uac04\uc73c\ub85c": 34, "\ud560\ub2f9": [34, 35], "\uc9d1\uc911\uc801\uc73c\ub85c": 34, "\ub9e4\ud551\ud558\uae30": 34, "\uc131\uacf5\uc801\uc73c\ub85c": [34, 36, 54], "\uace0\uc8fc\ud30c\uc758": 34, "procedur": [34, 37, 58], "practic": [34, 57], "cartesian": 34, "\ud835\udc51_\ud835\udc65": 34, "\ud835\udc51_\ud835\udc66": 34, "\ud835\udc51_\ud835\udc67": 34, "\uae38\uc774\uac00": [34, 39], "emit": 34, "\uc0c9\uc0c1\uc740": [34, 35], "relu": [34, 59, 60], "256\uac1c": 34, "256\ucc28\uc6d0": 34, "\ubca1\ud130\ub294": [34, 43], "\uad11\uc120\uc758": 34, "\ubc29\ud5a5\uacfc": 34, "128\uac1c": 34, "\uc804\ub2ec\ub428": 34, "\uc608\uce21\ud574\uc57c": 34, "\uad11\uc120\uc774": 34, "\ubc18\uc0ac\ub418\ub294": 34, "\ud45c\uba74\uc758": [34, 36, 39], "\uac01\ub3c4\uc5d0": 34, "\uad11\ub7c9\uc774": 34, "\uc77c\uc815\ud558\ub2e4\ub294": 34, "\uc2dc\uc120": 34, "\ud45c\ud604\ud55c": [34, 39, 43], "\ubc18\uc0ac\uc131": 34, "specular": 34, "\ud45c\ud604\ud558\ub294\ub370": 34, "\ud3ec\uc778\ud2b8\uc5d0\uc11c": 34, "\ub80c\ub354\ub9c1\ud558\ub824\uba74": 34, "\ud53d\uc140\uc744": 34, "\ucd94\uc801\ub41c": 34, "\uc801\ubd84\uac12": [34, 39], "\ucd94\uc815\uc744": 34, "t_f": [34, 39], "\ud22c\uacfc\uc728": 34, "transmitt": 34, "quadratur": 34, "\uad6c\uc801\ubc95": 34, "\uc801\ubd84\uac12\uc744": 34, "\uc218\uce58\uc801\uc73c\ub85c": 34, "\uadf8\ub9ac\ub4dc\ub97c": [34, 35], "\uad6c\uc801\ubc95\uc740": 34, "\uc774\uc0b0": 34, "\ucffc\ub9ac\ub418\uae30": 34, "stratifi": 34, "\ud45c\uc9d1": 34, "\uc811\uadfc\ubc95\uc744": [34, 39], "bin\uc73c\ub85c": 34, "\ubd84\ud560\ud55c": 34, "partit": 34, "bin": 34, "\ud45c\ubcf8\ub4e4\uc744": 34, "\uc0ac\uc6a9\ud558\ub354\ub77c\ub3c4": 34, "\uac00\ub2a5\ud558\ubbc0\ub85c": [34, 36], "\uc704\uce58\ub4e4\uc5d0\uc11c": 34, "\ud3c9\uac00\ub418\ub3c4\ub85d": 34, "\ubf51\uc740": [34, 41], "\uc0d8\ud50c\ub4e4\ub85c": [34, 35], "\ub80c\ub354\ub9c1\uc5d0\uc11c": [34, 35], "\ub17c\uc758\ub41c": 34, "\uad6c\uc801\ubc95\uc73c\ub85c": 34, "\uc801\ubd84\uc744": 34, "delta_i": [34, 39], "sigma_j": 34, "delta_j": [34, 39], "adjac": 34, "dt": [34, 39, 54, 58], "\uc9d1\ud569\uc73c\ub85c\ubd80\ud130": 34, "\uac00\ub2a5\ud558\uba70": [34, 56], "\uc804\ud1b5\uc801\uc778": [34, 37, 39, 58], "\uc54c\ud30c": [34, 35], "\ud22c\uba85\ud55c": 34, "\uc720\ub9ac": 34, "\uadf8\ub9bc\uc790": 34, "\uacb9\uce60": 34, "\ucef4\ud3ec\uc9c0\ud305\uc744": 34, "remind": 34, "\uc694\uc18c\ub4e4\ub85c": [34, 58], "\ub2ec\uc131\ud558\uae30\uc5d0\ub294": 34, "assist": 34, "\ud615\ud0dc\uc5d0\uc11c": 34, "\uace0\uc8fc\ud30c": 34, "\ubcc0\ub3d9\uc744": 34, "\uc54a\uc558\uc74c": [34, 54], "On": [34, 54], "spectral": 34, "\uc2e0\uacbd\ub9dd\uc774": 34, "\uc800\uc8fc\ud30c": 34, "\ucabd\uc73c\ub85c": 34, "\ud3b8\ud5a5\ub418\uc5c8\uc74c\uc744": 34, "\ud1b5\uacfc\ud558\uae30": 34, "\ub9f5\ud551": 34, "\ubcc0\ub3d9\uc774": 34, "\uc138\uac1c\uc758": 34, "\uc88c\ud45c\uac12\uacfc": 34, "\uc131\ubd84\uc5d0": 34, "\ubd84\ub9ac\ub418\uc5b4": 34, "\uc801\uc6a9\ub428": 34, "\uc5ec\uc720": 34, "\ube44\uc5b4\uc788\ub294": 34, "\ub9c9\ud600\uc788\ub294": 34, "\uc0d8\ud50c\ub9c1\ub428": 34, "\uc608\uc0c1": [34, 45], "\ud6a8\uacfc\uc5d0": 34, "\ube44\ub840\ud558\uc5ec": 34, "\uc99d\uac00\uc2dc\ud0b4": 34, "\uacf3\uc744": 34, "\ubf51\uc790": 34, "n_c": 34, "\uc704\uce58\uc5d0\uc11c": 34, "\ub124\ud2b8\uc6cc\ud06c\uc5d0\uc11c\uc758": 34, "_c": [8, 34], "\uceec\ub7ec": [34, 35], "\ub4e4\uc758": [34, 47, 48, 58], "\uac00\uc911\ud569": 34, "w_i": [34, 39, 61], "piecewis": 34, "dfrac": 34, "w_j": 34, "\ubc00\ub3c4\ud568\uc218": 34, "2\ubc88\uc9f8": 34, "\uc0d8\ud50c\uc9d1\ud569\uc758": 34, "n_f": 34, "\ubc88\uc9f8\uc640": 34, "\uc9d1\ud569\uc758": 34, "\ud569\uc9d1\ud569\uc5d0\uc11c": 34, "_f": 34, "\uc808\ucc28\uc5d0\uc11c\ub294": 34, "\uad00\uce21": 34, "\ud3ec\ud568\ub420": 34, "\uc608\uc0c1\ub418\ub294": 34, "scene\uc774": 34, "extrins": 34, "\uacbd\uacc4\ub85c": 34, "\ub370\uc774\ud130\uc14b\uc774": [34, 35, 48, 53], "\uce74\uba54\ub77c\uac00": 34, "\uc5b4\ub514\ub97c": 34, "\ubc14\ub77c\ubcf4\uace0": 34, "\ub80c\uc988\uc640": 34, "\uc13c\uc11c": 34, "\uc758\ud574\uc11c": [34, 41], "\uacb0\uc815\ub418\uc5b4\uc9c0\ub294": 34, "\ud56d\ubaa9\uc73c\ub85c": 34, "\ud328\ub110\uc774": 34, "\ud655\ub300\ud558\uace0": 34, "\uae30\uc6b8\uc5b4\uc84c\ub294\uc9c0": 34, "shear": 34, "pramet": 34, "\uacf5\uac04\uc0c1\uc758": [34, 36], "\ud3c9\uba74\uc5d0": 34, "\ud22c\uc0ac": [34, 35], "iteration\uc5d0\uc11c": 34, "\uc9d1\ud569\uc5d0\uc11c": 34, "batch\ub97c": 34, "\uc0d8\ud50c\uacfc": [34, 47], "\ub450\uc0d8\ud50c": 34, "\uc81c\uacf1": 34, "\ub80c\ub354\ub9c1\uc740": 34, "\uc0d8\ud50c\ub9c1\uc758": [34, 42], "beta_2": 34, "30\ub9cc": 34, "1\uac1c\ub85c": 34, "2\uc77c": 34, "geometry\ub85c": 34, "\uc0c1\ubc18\uad6c\uc5d0": 34, "479": 34, "pathtrac": 34, "\ud604\uc2e4": [34, 38], "\uc55e\ucabd\uc5d0\uc11c": 34, "llff": 34, "1008": 34, "756": 34, "nv": 34, "srn": 34, "fusion": [34, 36], "\uc678\uc591": 34, "nonlambertian": 34, "\ubc18\uc0ac": 34, "ghost": 34, "ship": 34, "lego": 34, "blurri": 34, "\uae30\ud558\uc801": 34, "\ub80c\ub354\ub9c1\uc5d0\uc11c\uc758": 34, "\uacb9\uce68": 34, "\ubc88\uc9d0": 34, "\ubc1d\uae30\uc640": 34, "\uad11\ud0dd": 34, "\uac00\uc9d0": 34, "occlud": 34, "correctli": 34, "pe": 34, "\uc758\uc874\uc131": [34, 56], "vd": 34, "\uc8fc\ud30c\uc218": 34, "\ucd08\uacfc\ud560": 34, "\ud5a5\uc0c1\uc5d0": 34, "so": 34, "li": 34, "within": 34, "dure": [34, 59], "192": [34, 48], "\ub354\ud574\uc9c4": [34, 39], "relu\ub85c": 34, "\uc5ee\uc778": 34, "\ud1b5\uacfc\ud558\uac8c": [34, 59], "deepsdf": 34, "\ub530\ub974\uace0": [34, 36, 60], "5\ubc88\uc9f8": 34, "activation\uc5d0": 34, "output\uc73c\ub85c": 34, "08751": 35, "index": [35, 40, 43], "\ud569\ucce4\ub2e4": 35, "\uc874\uc7ac\ud558\uc5ec": 35, "\ubcf5\uc7a1\ud558\uace0": [35, 36], "\uc18c\uaddc\ubaa8\uc758": 35, "\ud074\ub77c\uc6b0\ub4dc\ub97c": 35, "\ud504\ub85c\uc138\uc2a4\ub85c": 35, "\ud074\ub77c\uc6b0\ub4dc": 35, "\ud2b8\ub79c\uc2a4\ud3ec\uba38": 35, "2\ubc30": [35, 58], "\uc870\uac74\ud654\ub41c": 35, "\ud615\uc0c1\uc744": 35, "\uc694\uad6c\ud558\uace0": 35, "gpu\uc5d0\uc11c": [35, 54], "2\ubd84\ub9cc\uc5d0": 35, "\ud0d0\uc0c9\ud55c\ub2e4": 35, "\ubdf0\ub97c": 35, "vide": 35, "3d\ub85c\uc758": 35, "\ubc1c\uc804": 35, "\ud3ed\ubc1c\uc801\uc73c\ub85c": 35, "\uc99d\uac00\ud568\uc5d0": [35, 42], "\ucd08\ub9cc\uc5d0": 35, "\uc790\uc5f0\uc5b4\uc5d0\uc11c": 35, "\uc218\uc815\ud560": 35, "\ube44\ub514\uc624\ub098": 35, "\uac1d\uccb4\uc640": 35, "\ub3c4\uba54\uc778\uc5d0\uc11c\uc758": 35, "\ud0d0\uc0c9\ud558\uace0": 35, "\ub17c\ubb38\ub3c4": 35, "\ub454\ub2e4": 35, "\ud569\uc131\uc758": 35, "\ud569\uc131\uc740": [35, 42], "\ud558\ub098\uc5d0": 35, "\uc774\ub8ec": [35, 37], "unlabeld": 35, "directli": 35, "\ud655\uc7a5\uc131\uc758": 35, "\ud45c\ud604\ubc95\ub4e4": 35, "\uac70\uccd0\uc57c": 35, "\uac78\ub9b4": 35, "\ub4dc\ub294": [35, 36], "\ud504\ub85c\uc138\uc2a4\uac00": [35, 47], "prior\uac00": 35, "\uc788\uac70\ub098": 35, "\uac1c\uccb4\uc5d0": 35, "\ud574\ub2f9\ud558\uc9c0": 35, "minima\uc5d0": 35, "\ube60\uc9c8": 35, "\ud30c\uc774\ud504\ub77c\uc778": [35, 48], "\uac1c\uc694": 35, "\ub450\uce74\ud14c\uace0\ub9ac\uc758": 35, "\uc30d": 35, "\ub530\ub97c": 35, "\ud30c\uc778\ud29c\ub2dd\ub41c": 35, "\uc2a4\ud0dd\uc744": 35, "\ud074\ub77c\uc6b0\ub4dc\uc5d0\uc11c": 35, "\uba54\uc26c\ub97c": 35, "\ud68c\uadc0": 35, "\uc0d8\ud50c\ub9c1\ud55c\ub2e4": [35, 38, 43], "\ucd08": [8, 35, 45], "\uc218\ud589\ub420": 35, "\ud504\ub86c\ud504\ud2b8\ubfd0\ub9cc": 35, "\ud504\ub86c\ud504\ud2b8\uc640\ub3c4": 35, "\uc0dd\uc131\ud55c\ub2e4\ub294": 35, "\uc758\ubbf8\uc5d0\uc11c": [35, 43], "e\ub77c\uace0": 35, "\uba85\uce6d\ud558\uc600\ub2e4": 35, "\ubcc0\ud615\ud55c\ub2e4": 35, "\uc124\uc815\uc744": 35, "\ud504\ub85c\uc138\uc2a4": 35, "\ud504\ub85c\uc138\uc2a4\ub294": 35, "t\ub9c8\ub2e4": 35, "\ud3ec\ud568\ud558\uc9c0": 35, "\uc54a\uac8c": 35, "\uc5ed": 35, "\ub79c\ub358": 35, "\ud504\ub85c\uc138\uc2a4\ub97c": 35, "\uc9c4\ud589\ud558\uc5ec": 35, "\uc7a1\uc74c\uc774": 35, "xt": 35, "p\u03b8": 35, "\uadfc\uc0ac\ud558\uc5ec": 35, "\ud3c9\uade0\ubfd0\ub9cc": 35, "\uc0d8\ud50c\ub9c1\uc740": 35, "\ubc29\uc815\uc2dd": 35, "\uc124\uba85\ub420": 35, "sde": [35, 46, 54, 57, 58], "\ud574\uc11d\uae30\ub97c": 35, "2\ucc28": [35, 42], "\uac00\uc774\ub4dc": 35, "\ubd84\ub958\uae30": 35, "saliman": [35, 46, 54], "\uc0ad\uc81c\ud55c\ub2e4": 35, "\ub4dc\ub86d": 35, "\ud50c\ub85c\uc6b0": 35, "\uc5f0\uad6c\uc640": [35, 36], "pvd": 35, "\ucc28\ubcc4\uc810\uc744": 35, "\ub9de\ucd94\uc9c0\ub9cc": 35, "\uc7ac\uad6c\uc131\ud558\ub824\uace0": 35, "\ud558\uc9c0\ub294": [35, 39], "\ub9e4\uce6d": 35, "\ubaa9\ud45c\uc5d0": 35, "\ud0d0\uad6c\ud55c\ub2e4": 35, "\uac1d\uccb4\ub098": 35, "\uc808\ucc28\uac00": 35, "\uac78\ub9b0\ub2e4": [35, 43, 45], "\uc5f0\uad6c\ub4e4\ub3c4": [35, 46], "\ud504\ub86c\ud504\ud2b8\ub098": 35, "\uc881\uc740": 35, "\ud55c\uc815\ub418\uc9c0\ub9cc": 35, "\ud574\uacb0\ud55c\ub2e4": 35, "\uc788": 35, "\uc774\ub4e4": [35, 45], "\ubd88\ucda9\ubd84\ud55c": 35, "\ub2e4\ub8e8\uba74\uc11c\ub3c4": 35, "\uc720\ub9dd\ud55c": 35, "\uc21c\uc5f4": 35, "\ubd88\ubcc0": 35, "permut": 35, "invari": 35, "\ud074\ub77c\uc6b0\ub4dc\uc640": 35, "\uc720\uc0ac\ud558\uc9c0\ub9cc": [35, 36, 39, 46], "\uc218\ubc31\ub9cc": 35, "\uba54\ud0c0\ub370\uc774\ud130\ub85c": 35, "\ud6c8\ub828\uc2dc\ud0a8\ub2e4": 35, "\ud074\ub77c\uc6b0\ub4dc\ub85c": 35, "\ucc98\ub9ac\ud55c\ub2e4": 35, "\ud615\uc2dd\uacfc": [35, 46], "\ub2e4\uc591\ud588\uace0": 35, "\ubcf4\uc7a5\ud558\uae30": 35, "blender\ub97c": 35, "\ud615\uc2dd": [35, 43], "rgbad": 35, "blender": 35, "\ud615\uc2dd\uc744": 35, "\uc5d4\uc9c4\uc744": 35, "\ud504\ub85c\uadf8\ub7a8": 35, "\uae4a\uc774": 35, "\ucc44\ub110\uc774": 35, "\ud615\uc2dd\uc758": 35, "\uc0c1\uc790": 35, "\uc815\uaddc\ud654\ud558\uace0": 35, "\uad6c\uc131\ud55c": 35, "blender\uc758": 35, "\ub0b4\uc7a5\ub41c": 35, "\ub0b4\ubcf4\ub0c8\ub2e4": 35, "\uc0c9\uc0c1\uc774": 35, "\ud53d\uc140\uc5d0": 35, "\uac1d\uccb4\uc5d0": 35, "\ud074\ub77c\uc6b0\ub4dc\ub294": 35, "\ubd84\ud3ec\ub418\uc5b4": 35, "\uc810\uc758": 35, "\uad6c\uc131\ud568\uc73c\ub85c\uc368": 35, "\uba54\uc26c\uc5d0\uc11c": 35, "\uc0d8\ud50c\ub9c1\ud558\ub294": [35, 43], "\uc774\uc0c1\ud55c": [35, 52], "\ud30c\uc77c": 35, "\uc800\ud488\uc9c8": 35, "\uc81c\uac70\ud558\uae30": 35, "\ud734\ub9ac\uc2a4\ud2f1\uc744": 35, "\ud074\ub77c\uc6b0\ub4dc\uc758": 35, "svd\ub97c": 35, "\ud2b9\uc774\uac12\uc774": 35, "\uc784\uacc4\uac12": 35, "\uc774\uc0c1\uc778": 35, "\uacbd\uc6b0\uc5d0\ub9cc": 35, "\uc720\uc9c0\ud568\uc73c\ub85c\uc368": 35, "\ud3c9\ud3c9\ud55c": 35, "\ud2b9\uc131\uc5d0": 35, "\ud074\ub7ec\uc2a4\ud130\ub9c1": 35, "\ud074\ub7ec\uc2a4\ud130\ub294": 35, "\ub2e4\uc591\ud558\uac70\ub098": 35, "\ud574\uc11d": 35, "\ud074\ub7ec\uc2a4\ud130\ub97c": 35, "\ubc84\ud0b7\uc73c\ub85c": 35, "\ub370\uc774\ud130\uc14b\uc73c\ub85c\uc11c": 35, "\ubc84\ud0b7\uc758": 35, "\ud63c\ud569\uc744": 35, "\uc124\uba85\ud560": 35, "\ub80c\ub354\ub7ec\uc640": 35, "\ud30c\ud2b8\uc5d0\uc11c\ub294": 35, "\ubd84\ud3ec\uc640": [35, 36, 47], "\ub80c\ub354\ub97c": 35, "glide\ub97c": [35, 42], "\uc800\uc790\ub4e4\uc758": 35, "\ud559\uc2b5\uc14b\uc5d0": 35, "\uc791\uae30": 35, "\ub85c\ub9cc": [35, 42], "\ud69f\uc218\ub294": 35, "000\ubc88\uc758": 35, "\uc9c4\ud589\ud588\uc74c\uc744": 35, "\uc2dc\uac04\uc5d0\ub294": [35, 53, 59], "\ub80c\ub354\uc758": 35, "complet": 35, "\ud504\ub808\uc784\uc6cc\ud06c\ub97c": 35, "\ud3ec\ud568\uc2dc\ucf30\ub2e4": 35, "\ud150\uc11c\ub85c": 35, "\uc88c\ud45c\uc640": [35, 36], "\ubc94\uc704\ub85c": 35, "\ub514\ub178\uc774\uc9d5\ud558\uc5ec": 35, "\ud150\uc11c\ub97c": 35, "\ud65c\uc6a9\ud558\ub358": 35, "\uc2a4\ud15d": [35, 42], "\ud3ec\uc778\ud2b8\ub97c": 35, "\ucc28\uc6d0\uc774": [35, 46, 60], "d\uc778": 35, "\ucee8\ud14d\uc2a4\ud2b8\ub85c": 35, "\ud0c0\uc784\uc2a4\ud15d": [35, 38], "\uc55e\uc5d0": [35, 40, 47, 49], "d\ucc28\uc6d0": 35, "\uc785\ub825\ud558\uace0": 35, "\uac00\uc838\uc628\ub2e4": 35, "256xd": 35, "lienarli": 35, "shape\uc758": 35, "\uc6b0\uc218\ud588\ub2e4": 35, "\ucee8\ud14d\uc2a4\ud2b8\ub294": 35, "257": 35, "shape\uac00": 35, "k\uc758": 35, "k\uac1c\ub97c": 35, "\uac00\uc838\uc624\uace0": 35, "\ud504\ub85c\uc81d\uc158\ud558\uc5ec": 35, "k\uac1c\uc5d0": 35, "\u03b5\uc640": 35, "\ud1a0\ud070\uc758": 35, "\ud3ec\uc778\ud2b8\ub85c": 35, "\u03c3\uc744": 35, "\ud074\ub77c\uc6b0\ub4dc\uc5d0": 35, "\ubd84\ubcc4": 35, "\ud558\ub2e4": [35, 45], "\ubaa8\ub378\uc5d0\uc11c\uc758": 35, "\uacc4\uce35": 35, "\ubc29\uc2dd\uc5d0\uc11c\ub294": [35, 46], "\uc800\ud574\uc0c1\ub3c4\uc758": 35, "\uc5c5\uc0d8\ud50c\ud55c\ub2e4": 35, "\uc0dd\uc131\uc5d0\uc11c\uc758": 35, "1k": [35, 42], "\uac19\uc744": 35, "\ub370\uc5d0\ub294": 35, "\ubc30": [35, 38, 48, 56], "\uc5c5\uc0d8\ud50c\ub7ec": 35, "\uc5c5\uc0d8\ud50c\ub7ec\ub294": 35, "3k": 35, "\ub808\uc774\uc5b4\uac00": 35, "\uc804\ub2ec\ud558\uc5ec": [35, 36], "\uad6c\ubcc4\ud560": [35, 47], "\ub80c\ub354\ub9c1\ud558\uc9c0": 35, "\ud14d\uc2a4\ucc98\uac00": [8, 35], "\uc785\ud600\uc9c4": 35, "\uba54\uc26c\ub85c": 35, "\ubcc0\ud658\ud558\uace0": [35, 47], "\ub80c\ub354\ub9c1\ud55c\ub2e4": 35, "\uade0\uc5f4": 35, "\uc774\uc0c1\uce58": 35, "sap\ubaa8\ub378\uc744": 35, "\uc0ac\uc6a9\ud574\ubd23\uc73c\ub098": 35, "\uc874\uc7ac\ud588\ub358": 35, "field\ub97c": 35, "\uc608\uce21\ud558\uace0": [35, 54], "merch": 35, "\ucd94\ucd9c\ud588\ub2e4": [35, 36], "\uba54\uc26c\uc758": 35, "\ubc84\ud14d\uc2a4\uc5d0": 35, "\ud560\ub2f9\ud588\ub2e4": 35, "sdf": [35, 39, 55], "\ud074\ub77c\uc6b0\ub4dc\ub85c\ubd80\ud130": 35, "sdf\ub97c": [35, 39], "\uba54\uc26c": [8, 35], "\uc124\uba85\uacfc": 35, "\uc77c\uce58\ud558\ub294\uc9c0\ub97c": 35, "\uc720\uc0ac\ub3c4\uac00": 35, "r\uac1c\uc758": 35, "pointnet": 35, "\uc5d0\uce21": 35, "40m": 35, "\uc815\ubcf4\ub3c4": 35, "vec": 35, "\ucea1\uc158\uc5d0\ub9cc": 35, "\uadf8\ub9ac\ub4dc": 35, "300m": [35, 59], "\uc870\uac74\ub9cc": 35, "\uc774\ubbf8\uc9c0\ub85c\uc758": 35, "precision\uc774": [35, 49], "\ub098\uc058\uac8c": 35, "\uc784\ubca0\ub529\ubcf4\ub2e4": 35, "\uacf5\uac04\uc801\uc778": [35, 44], "\uc774\uc810\uc774": 35, "\uc2dc\uc0ac": 35, "\uc2a4\ucf00\uc77c\uc744": 35, "\uc99d\uac00\uc2dc\ud0a4\uba74": 35, "\uc218\ub834": [35, 36], "\ud3ec\uc778\ud2b8\ud074\ub77c\uc6b0\ub4dc": 35, "\uc774\ud574\ud558\uc9c0": 35, "\ubabb\ud558\uac70\ub098": 35, "\ucd94\ub860\ud55c": 35, "\ud574\uc11d\ud558\ub294": 35, "\ud615\uc0c1\uc758": 35, "\uc77c\ubd80\ub97c": [35, 41], "\ucd94\ub860\ud558\ub294": 35, "e\ub97c": [35, 39], "art": [35, 50, 58], "dreamfus": [8, 20, 35, 36], "\ubd88\uc77c\uce58\uc758": 35, "\ubbf8\ubb18\ud55c": 35, "\uc8fc\ubaa9\ud574\uc57c": 35, "dreamfusion\uacfc": [35, 36], "\uba40\ud2f0\ubdf0": [8, 35], "\ubaa8\ub4e0\ubdf0\ub97c": 35, "\ucd5c\uc801\ud654\ud558\uc9c0": 35, "\uac1d\uccb4\uac00": [8, 35, 48], "\uc2dd\ubcc4\ub418\uc9c0": 35, "\ub0ae\uc544\uc9c8": 35, "\uc804\ucc98\ub9ac\ud574\uc57c": 35, "\ubb38\uc81c\ub2e4": 35, "\ub17c\ubb38\uc774": [35, 42, 61], "\uc790\uccb4\uc5d0": 35, "\uc783\uc744": 35, "\ud14c\ud06c\ub2c9\ubcf4\ub2e4": 35, "\uc2e4\uc6a9\uc801\uc73c\ub85c": 35, "\ud504\ub85c\uadf8\ub7a8\uc744": 35, "\ub9cc\ub4e4\uac70\ub098": 35, "\uac1c\uccb4\ub97c": [35, 43], "\ucd5c\uc0c1\uc758": 35, "\ud5a5\ud6c4": 35, "\uc138\uacc4": 35, "\uc0dd\uc131\uae30\ub97c": 35, "\ud6c8\ub828\uc2dc\ucf1c": 35, "\uc774\ub8e8\uc5b4\uc9c4\ub2e4": 35, "\ud615\uc0c1\uc774\ub098": 35, "\uc9c8\uac10\uc758": 35, "\uba54\uc26c\ub098": 35, "\ud655\uc7a5\ud558\uba74": 35, "\ub192\uc774\ub294": 35, "\uacf5\uc720\ud560": [35, 39], "\uc608\uc0c1\ud55c\ub2e4": 35, "\uc57c\uae30\ub41c": 35, "\ud3b8\ud5a5\uc744": 35, "\ud3ec\ud568\ud560": [35, 40], "\ubb3c\ub9ac\uc801\uc73c\ub85c": 35, "\uc81c\uc791\ub420": 35, "\uc81c\ud488\uc774": 35, "\uc704\ud5d8\ud560": 35, "\uccad\uc0ac\uc9c4\uc744": 35, "\ud569\uc131\ub41c": [35, 42], "\uc2dc\uc2a4\ud15c\uc774\ub2e4": 35, "e\uac00": [35, 39], "\ubd84\uc57c\uc5d0\uc11c\uc758": [35, 37], "\uc2dc\uc791\uc810\uc73c\ub85c": [35, 46], "\uae30\uc5ec\ud560": 35, "\ud76c\ub9dd\ud55c\ub2e4": 35, "2108": 37, "01073": 37, "03": [37, 59], "\uc9c4\ud654": 37, "\ub418\uc5b4\uc624\uace0\uc788\ub2e4": 37, "\uc774\ub04c\uc5b4\ub0b4\ub824\ub294": 37, "\ubd84\uc57c\ub3c4": 37, "\ud65c\ubc1c\ud788": [36, 37, 40], "\uc9c4\ud589\ub418\uace0\uc788\ub2e4": 37, "\ubc29\uc2dd\uc73c\ub85c\uc758": 37, "editing\uc5d0\ub294": 37, "\uba87\uac00\uc9c0": 37, "sdedit\uc740": 37, "\ud574\uacb0\ud574\ub098\uc544\uac14\ub2e4\ub294": 37, "contribution\uc73c\ub85c": 37, "\uc81c\uc2dc\ud558\uc600\ub2e4": 37, "abstract\uc5d0\uc11c": 37, "\ub9d0\ud55c": 37, "editing\uc774\ub780": 37, "\uc720\uc800\uac00": [37, 43], "\uc81c\uc2dc\ud558\uba74": 37, "\ub450\uac00\uc9c0\uc758": 37, "\ud3c9\uac00\uc694\uc18c\uac00": 37, "\uc720\uc800\uc758": 37, "\ub530\ub974\ub294\uc9c0": 37, "real\ud55c\uc9c0": 37, "\uc5f0\uad6c\ubc29\uc2dd\uc740": 37, "\ub450\uac00\uc9c0\ub85c": 37, "\ub098\ub25c\ub2e4": 37, "edit\ub41c": 37, "condition\ub9c8\ub2e4": 37, "\uc7ac\ud559\uc2b5\uc744": 37, "inversion\ud55c": 37, "vactor\ub97c": 37, "\uc870\uc791\ud574": 37, "\uc815\uc758\ub418\uc5b4\uc57c\ud558\uace0": 37, "\uc7ac\ud559\uc2b5\uc774": 37, "\ud55c\uac1c\uc758": [37, 57], "\ub192\uc740\uacf3\uc73c\ub85c": 37, "\ud574\ub098\uac00\uba74": 37, "\uc5bb\uc5b4\ub0bc": 37, "\uc21c\uac04": 37, "\ubbf8\ubd84\uac12": 37, "\uc8fc\uc785\ud558\ub294\ub370": 37, "\uc8fc\uc785\ud55c\ub2e4": 37, "ddpm\uacfc\uc758": 37, "\uc815\uc758\ud558\ub294": [37, 46, 56], "equation\uc758": 37, "\uc815\ub3c4\uc774\ub2e4": 37, "1907": 37, "05600": 37, "level\uc744": 37, "\uc774\ubbf8\uc9c0\uc704\uc5d0": 37, "patch\ub97c": 37, "stroke\ub97c": 37, "coarse\ud55c": 37, "stroke\uc758": 37, "sde\uc758": 37, "noise\ud654\ub41c": 37, "\uc9c4\ud589\ud560": [37, 48], "\uc815\uc758\ud574\uc57c\ud558\ub294\ub370": 37, "realistic\ud558\uc9c0\ub9cc": 37, "\ud558\uc9c0\uc54a\uc740": 37, "faithful\ud558\uc9c0\ub9cc": 37, "artistic\ud55c": 37, "\uc5bb\uac8c\ub41c\ub2e4": 37, "sdedit\uc758": 37, "\uc885\ud569\uc801\uc778": 37, "survey\ub97c": 37, "stylegan": 37, "ada": 37, "sdedit\uc774": 37, "\uae30\ubc95\uacfc": 37, "01952": 38, "stabil": 38, "sdxl\uc740": 38, "unet\uc744": 38, "sdxl\uc5d0\uc11c": 38, "\uc0ac\uc6a9\ub418\uba74\uc11c": 38, "\uc99d\uac00\ud588\ub2e4": 38, "\ub2e4\uc218\uc758": [38, 57], "\ube44\uc728\uc5d0": 38, "sdxl\uc744": 38, "\uc124\uacc4\ud588\ub2e4": 38, "sdxl\uc758": 38, "\uae30\ub2a5\uc774\ub77c": 38, "\uac10\ub3c5": 38, "\uac04\ub2e8\ud558\uba74\uc11c\ub3c4": 38, "\ud5a5\uc0c1\ud558\ub294": 38, "\ubcc4\uac1c\uc758": 38, "\ub192\uc778": 38, "sdxl\uc774": 38, "sd\ubcf4\ub2e4": 38, "\uc2dc\uac01\ud654\ud588\ub294\ub370": 38, "128x128": [38, 58], "sdedit\uc744": 38, "heterogen": 38, "\ud14c\uc774\ube14": [38, 49], "\ucc38\uace0\ud558\uba74": [36, 38, 49], "highest": 38, "level\uc5d0\uc11c": 38, "\ube14\ub7ed\uc744": 38, "unet\uc5d0\uc11c": 38, "lowest": 38, "8x": 38, "l\uacfc": 38, "bigg\ub97c": 38, "openclip\ub85c\ubd80\ud130": 38, "\uc0ac\uc774\uc988\uac00": 38, "6b\ub85c": 38, "817m": 38, "\uc2dc\ud0a4\uac70\ub098": 38, "upscale\ud558\uc5ec": 38, "\uc815\ud574\uc9c0\ub294": 38, "\ubb38\uc81c\uc810\uc774": [38, 57], "\uc800\ud558\uc2dc\ud0a4\uac70\ub098": 38, "\uc77c\ubc18\ud654\ub97c": 38, "\uc2dc\uac01\ud654\ud574\uc8fc\ub294": 38, "\uadf8\ub9bc\uc774\ub2e4": 38, "conditiong": 38, "\ubbf8\ub9cc\uc758": 38, "39": 38, "\ub2ec\ud55c\ub2e4": 38, "\ud574\uc0c1\ub3c4\uc5d0\uc11c": 38, "\ud06c\uae30\uc778": 38, "\uc81c\uacf5\ud574": 38, "\ucd94\uac00\ub41c\ub2e4": 38, "\uc815\ud560": 38, "\ud574\uc0c1\ub3c4\uc5d0": 38, "\uc758\uc874\uc801\uc778": 38, "\uc5f0\uad00\uc2dc\ud0a4\ub3c4\ub85d": 38, "imagenet\uc73c\ub85c": 38, "\uc9c4\ud589\ud574": [38, 44], "conditiong\uc5d0": 38, "\uc6b0\uc218\uc131\uc744": 38, "\uc785\uc99d\ud588\ub2e4": 38, "cin": 38, "\uc2dc\ucf30\uace0": 38, "70k": 38, "nocond": 38, "\ud45c": 38, "\ubcf4\ub2e4\uc2dc\ud53c": 38, "\uba38\ub9ac\uac00": [38, 41], "\uc798\ub824\uc9c4": 38, "cropping\uc73c\ub85c": 38, "\uc0dd\uc131\ub418\uc5c8\uae30": 38, "\uade0\ub4f1\ud558\uac8c": 38, "\ub192\uc774": [38, 44], "\ub108\ube44": [38, 44], "\ubaa8\uc11c\ub9ac\uc5d0\uc11c": 38, "\uc9c0\uc815\ud558\ub294": 38, "\ud30c\ub77c\ubbf8\ud130\ub85c\uc368": 38, "conditioning\uacfc": 38, "dm\uc5d0\uc11c\ub3c4": 38, "\uc0ac\uc6a9\ub420": [38, 39, 42, 43], "\uac15\uc870\ud55c\ub2e4": 38, "conditioning\uc740": 38, "\uacb0\ud569\ub420": 38, "1024x1024": [38, 41, 42, 53], "\uc138\uacc4\uc5d0\uc11c": 38, "\ubd80\uc790\uc5f0\uc2a4\ub7fd\ub2e4": 38, "\uc138\uacc4\uc5d0\uc11c\ub294": 38, "\ube44\uc728\uc758": 38, "\uc9c0\ub2c8\uace0": [38, 51, 53], "\ud30c\uc778\ud29c\ub2dd\ud588\ub2e4": 38, "\ud53d\uc140\uc218\ub97c": 38, "64\uc758": 38, "\ubc30\uc218\ub97c": 38, "\uc9c0\ub2c8\ub3c4\ub85d": 38, "\ubc30\uce58\ub294": 38, "\ubc84\ud0b7": 38, "\ubc88\uac08\uc544": [38, 51], "\uac00\uba70": 38, "conditioning\uc73c\ub85c": 38, "\uc8fc\uc5c8\uc73c\uba70": 38, "\uc784\ubca0\ub529\ub418\ub294": 38, "tgt": [38, 40], "\ube44\uc728\ubc0f": 38, "pretraining\uc774": 38, "\ub9c8\uce5c": 38, "\ud559\uc2b5\ud588\uace0": [38, 44], "2\uc808\uc5d0\uc11c": 38, "\uacb0\ud569\ud588\ub2e4": 38, "\ud558\ub098\uc774\uace0": 38, "autoencoder\uc758": 38, "composition\uc740": 38, "ldm\uc73c\ub85c\ubd80\ud130": 38, "\ud45c\ud604\ub418\uc9c0\ub9cc": 38, "\ub514\ud14c\uc77c\ud55c": 38, "\ud5a5\uc0c1\ud558\uace0\uc790": 38, "\ud5a5\uc0c1\ud588\ub2e4": 38, "\ub05d\uc73c\ub85c": 38, "\uc544\ud0a4\ud14d\ucc98\uc5d0\uc11c": 38, "average\ub97c": 38, "\uba54\ud2b8\ub9ad\uc5d0": 38, "\uc815\ub9ac\ud574\uc8fc\ub294": 38, "\uc808\uc785\ub2c8\ub2e4": 38, "step\uc740": [38, 42, 54], "\uc14b\uc73c\ub85c": 38, "\ub098\uc640\uc788\ub294": 38, "\ubd84\ud3ec\uc5d0": [38, 43, 60], "600": 38, "\uc0ac\uc774\uc988\ub85c": 38, "2048\ub85c": 38, "\ud559\uc2b5\uc2dc\ucf30\uace0": 38, "\ub9c8\uce68\ub0b4": 38, "offset": [38, 39], "\uc218\uc900\uacfc": 38, "\uc601\uc5ed\uc758": 38, "\ube44\uc728\ub85c": 38, "\uacbd\ud5d8\uc801\uc73c\ub85c": 38, "6\ucc98\ub7fc": 38, "\ucc3e\uc558\ub2e4": 38, "\uadf8\ub9bc\uc774": [38, 42], "sdedit\uc5d0\uc11c": 38, "\ub530\ub790\uc73c\uba70": [36, 38], "\uc2a4\ucf00\uc77c\uc5d0": 38, "inference\uc5d0\uc11c": 38, "diffuse\uc640": 38, "\uc2a4\ud15d\uc740": 38, "\uc120\ud0dd\uc774\uc9c0\ub9cc": 38, "\ub514\ud14c\uc77c\uc5d0\uc11c": 38, "02463": 39, "\ub17c\ubb38\uc694\uc57d": 39, "2023\ub144": 39, "openai\uc758": 39, "heewoo": 39, "alex": 39, "\ubc1c\ud45c\ud55c": [39, 51], "\ub17c\ubb38\uc785\ub2c8\ub2e4": 39, "diffusers\ub97c": 39, "huggingfac": [39, 50, 59], "\uc0dd\uc131\ubc29\uc2dd": 39, "assets\uc744": 39, "\ucc28\ubcc4\uc810": 39, "mesh": [8, 36, 39, 55], "parameters\ub97c": 39, "\ud45c\ud604\ub9cc": 39, "\ub9ce\ub2e4\uace0": 39, "inr": 39, "inrs\ub294": 39, "info": 39, "\ud654\uc9c8\uc5d0": 39, "\uc54a\ub294\ub370": 39, "grid\ub098": 39, "arbitrari": [8, 39], "points\ub97c": [8, 39], "\ubbf8\ubd84\uc774": [39, 60], "inrs\uc740": 39, "applications\ub3c4": 39, "\ud0c0\uc785\uc758": 39, "inrs\uc744": 39, "\ub2e4\ub8ec\ub2e4": 39, "radiamc": 39, "along": 39, "dmtet": [36, 39], "get3d": 39, "vertex": [39, 55], "\uc0bc\uac01\uba54\uc26c\ub97c": 39, "\uc624\ub514\uc624": 39, "application\uc5d0\uc11c": 39, "\ud3b8\ud55c": 39, "representation\uc73c\ub85c": [39, 43], "\ud588\ub2e4\ub294": [39, 42], "mildenhal": 39, "\uc2dc\uc57c": 39, "ge": 39, "\ud3b8\uc758\ub97c": 39, "\uc2dc\uc57c\uc5d0\uc11c": 39, "\ubc14\ub77c\ubcf8": 39, "\ub80c\ub354\ub9c1\ud558\uae30": 39, "color\uac12\uc744": 39, "infty_0": 39, "ds": 39, "\ubcf4\ucda9\uc124\uba85": 39, "\uc801\ubd84\uc2dd\uc744": 39, "n_": [36, 39, 40], "t_j": 39, "\ub098\ub204\ub294": 39, "coarse\uc640": 39, "\ub450\ub2e8\uacc4\ub85c": 39, "\uc138\ubd80\uc801\uc73c\ub85c": 39, "2\ubc88\uc758": 39, "ray\uc758": [36, 39], "transmittance\ub97c": 39, "\uc815\uc758\ud558\uc600\ub2e4": 39, "alpha\uac12\uc774\ub098": 39, "\ucd1d\ud569\uc5d0": 39, "\ud574\ub2f9\ud55c\ub2e4": [36, 39], "\ubcf4\ucda9": 39, "stf\ub294": 39, "distances\uc640": 39, "\ubaa8\ub450\ub97c": 39, "\uc139\uc158\uc5d0\uc11c\ub294": 39, "meshes\ub97c": 39, "\uad6c\uc131\ud558\uace0": 39, "rendering\uc744": 39, "cloud": [8, 20, 39], "survei": 39, "scaler": 39, "\ubc29\ubc95\uc911": 39, "\ud558\ub098\ub2e4": 39, "mapping\ud55c\ub2e4": 39, "\ud45c\uba74\uae4c\uc9c0\uc758": 39, "0\ubcf4\ub2e4": 39, "\uc678\ubd80\uc784\uc744": 39, "\uc815\uc758\uc5d0": 39, "\uc77c\ub54c\ub294": 39, "\ud45c\uba74\uc5d0": 39, "orientation\uc744": 39, "sdfs\ub97c": 39, "voxel\uc744": 39, "tetrahedr": [39, 55], "dmtet\uc758": 39, "\ucd9c\ub825\uc740": [8, 39], "grid\uc5d0\uc11c\uc758": 39, "displac": 39, "dmtet\uc5d0": 39, "\uc815\ubcf4\uae4c\uc9c0\ub3c4": 39, "bump": 39, "displacement\uc758": 39, "tutori": [39, 47], "grabcad": 39, "bump\ub294": 39, "\uc84c\uc9c0\ub9cc": 39, "\ubc14\ub010\uac83\uc740": 39, "\uc544\ub2d8\uc744": 39, "displacement\ub97c": 39, "\ubcc0\ud654\ub41c\uac83\uc744": 39, "ddpm\uc73c\ub85c": 39, "\uc11c\uc11c\ud788": 39, "\uc644\uc804\ud55c": [39, 45], "\ub418\uc5b4\uac00\ub294": 39, "noise\uc640": [39, 41, 57], "\uad6c\ubd84\ubd88\uac00\ub2a5\ud55c": 39, "\uc0c1\uc815\ud55c\ub2e4": 39, "\uc9c4\ud589\ub418\uc9c0\ub9cc": 39, "\ud65c\uc6a9\uc2dc\uc5d0\ub294": 39, "\ub2e8\uc870\uac10\uc18c\ud558\ub294": 39, "\uc2a4\ucf00\uc904\uc744": 39, "alpha_0": [36, 39], "\uc190\uc2e4\ud568\uc218\ub97c": [39, 51, 54, 59], "\uc704\ub294": 39, "\ud559\uc2b5\ud55c\ub2e4\ub294": 39, "\uc758\ubbf8\uc774\uace0": [39, 49], "\uc758\ubbf8\uc774\ub2e4": [39, 54], "denosing\uc2dc\uc5d0\ub294": 39, "latency\ub97c": 39, "heun": [39, 46, 54], "sampler\uc640": [39, 42], "scale\uc774\uace0": 39, "coher": 39, "\ucee4\uc9c0\uc9c0\ub9cc": 39, "\ub5a8\uc5b4\uc9c8": 39, "\ud544\uc694\ud558\ub2e4\ub294": 39, "\uc54c\uc544\ub0c8\ub2e4": 39, "section": [39, 47], "space\uc5d0\uc11c\ub3c4": 39, "\uc0d8\ud50c\ub4e4\uc744": 39, "space\uac04\uc758": 39, "\ub2f4\ub2f9\ud558\ub294": 39, "stage\ubc29\uc2dd\uc73c\ub85c": 39, "\ubd24\ub358": 39, "ldm\uc5d0\uc11c\ub294": 39, "\ubcf5\uc7a1\ub3c4": 39, "penalty\ub098": 39, "\uc0ac\uc6a9\ud588\uc73c\ub098": 39, "objective\uc640": 39, "l_1": [39, 56], "l_2": 39, "regularization\uacfc": 39, "quantization\uc740": 39, "bottleneck\uc774": 39, "range\ub97c": 39, "fu": 39, "decoder\uc758": 39, "sanghi": 39, "occup": [39, 55], "liu": 39, "34": 39, "kosiorek": 39, "view\uc744": 39, "encoding\ub41c": 39, "chen": 39, "transformer\uae30\ubc18": 39, "view\uc5d0\uc11c": 39, "\uc0dd\uc131\ud558\ub294\uac83\uc744": 39, "43": 39, "table\uc744": 39, "bautista": 39, "code\ub97c": 39, "dupont": 39, "meta": 39, "erkoc": 39, "akin": 39, "fitting\uc744": 39, "\ud559\uc2b5\uc2dc\ud0a8\ub2e4": [39, 54], "mlp\uc758": [36, 39], "\ube44\uad50\ud558\uc600\uc744\ub54c": 39, "post": 39, "\ubcc0\uacbd\ud558\uc5ec": [39, 49], "asset\ubcc4": 39, "cloud\uc758": 39, "\ub298\uc774\uace0": 39, "view\ub97c": 39, "16k": 39, "multiview": [8, 39], "pointcloud\uc5d0": 39, "crack\uc774": 39, "\ubc1c\uc0dd\ud588\ub2e4\uace0": 39, "\ub80c\ub354\ub9c1\uc2dc": 39, "\uc870\uba85\uacfc": 39, "\ubb3c\uccb4\ud45c\uba74\uc758": 39, "\uac04\ub7b5\ud654\ud588\ub2e4": 39, "function\uc5d0\uc11c": 39, "asset\uc758": 39, "\uc758\ubbf8\uc0c1": 39, "\uc785\ub825\ubc1b\uc740": 39, "\uc735\ud569\ud558\uc5ec": 39, "\uc7a5\uc810\uc73c\ub85c": 39, "\ud588\uc73c\ubbc0\ub85c": 39, "\uc758\ub3c4\uac00": 39, "\ub290\uaef4\uc9c0\ub294": 39, "pseudocod": [39, 46, 58], "encoder\uc5d0": 39, "clouds\uc640": 39, "views\ub294": 39, "backbone\uc5d0": 39, "\ucc98\ub9ac\ub418\uc5b4": 39, "vectors\uac00": 39, "bottleneck\uacfc": 39, "\uc0ac\uc804\ud559\uc2b5\uc2dc": 39, "sdf\uc640": 39, "head\ub97c": 39, "2\uc640": 39, "head\ub4e4\uc744": 39, "net\uc774": 39, "4096\uac1c\uc758": 39, "\uc0d8\ud50c\ub9c1\ud558\uc600\uc73c\uba70": 39, "nerf\uc5d0\uc11c\ub294": 39, "transmittance\uc5d0": 39, "integr": [8, 39, 58], "\uc5bb\uc740transmittance\ub85c": 39, "rendering\uacfc": 39, "t_c": 39, "\uc608\uce21\ud558\uc600\ub2e4": 39, "truth\ub85c\ub294": 39, "rendering\uacb0\uacfc\uc758": 39, "channel\uc744": 39, "\uc190\uc2e4\ud568\uc218\ub294": [39, 51, 56, 59], "\ubaa9\uc801\uc774\uc5c8\uc73c\ub098": 39, "mesh\ub3c4": 39, "\uc0dd\uc131\ud574\uc57c": 39, "\uc81c\uac70\uac00": 39, "\uc911\uc694\ud558\uc600\uc744": 39, "\uc0dd\uac01\ub41c\ub2e4": 39, "l_t": 39, "\ud569\ud558\uc5ec": 39, "how": [39, 48], "mlps\uc5d0": 39, "heads\ub97c": 39, "mlps\ub294": 39, "vertex\uc758": 39, "grid\ub85c": 39, "\uc62e\uaca8": 39, "cube\ub97c": 39, "\uc9c4\ud589\ud574\uc57c": 39, "color\ub294": 39, "\uad6c\ucd95\uc2dc": [39, 53], "preprocessing\uc5d0": 39, "\uc2e4\ud5d8\uc2dc": 39, "\ud588\uc73c\uba70": 39, "e\uc758": 39, "target\uc744": 39, "\uad6c\ud588\ub2e4": 39, "target\ub85c\ub294": 39, "cloud\uc5d0\uc11c": 39, "\ud2b9\uc815\uc704\uce58": 39, "nearest": [39, 59], "neighbor": 39, "point\uc758": 39, "loss\uc640": 39, "heads\uac00": 39, "distillation\uc744": [39, 54], "\uac16\uac8c\ub41c": 39, "end\ub85c": 39, "tune\ud55c\ub2e4": 39, "rendering\uc5d0\ub294": 39, "\ubd88\uc548\uc815\ud588\uc73c\ubbc0\ub85c": 39, "\uc190\uc2e4\ud568\uc218\ub9cc": 39, "\uc801\uc808\ud568\uc744": 39, "rendering\uc5d0": 39, "n\uc740": 39, "s\ub294": 39, "\ud654\uc9c8": 39, "construct": [39, 57], "\uacb0\uacfc\ubb3c\ub85c": 39, "alpha\ucc44\ub110\uc744": 39, "\ubc14\uafb8\uc5c8\ub2e4": 39, "sequences\uc758": 39, "times1024": 39, "1024\uc778": 39, "token\ucc98\ub7fc": 39, "token\uc740": [39, 48], "matrices\uc758": 39, "row\uc640": 39, "\uc77c\uce58\ud55c\ub2e4": 39, "length\uc640": 39, "width\uac00": 39, "\uace0\ucc28\uc6d0\uc758": 39, "\ucc44\ub110\uc758": 39, "\uc99d\uac00\ud558\uc600\uae30": 39, "context\ub85c": 39, "e\uc640\uc758": 39, "\ucc28\uc774\uc810\uc73c\ub85c\ub294": 39, "prediction\uc73c\ub85c": 39, "parameterize\ud558\uc9c0": 39, "\uc54a\uc558\ub2e4\ub294": 39, "\ub300\uc218\uc801\uc73c\ub85c\ub294": 39, "\uc758\ubbf8\uc774\ub098": 39, "processing\ubd80\ubd84\uc5d0\uc11c\ub294": 39, "\uacc4\uc0b0\uc2dc": 39, "20\uac1c\uac00": 39, "60\uac1c\uc758": 39, "rendering\ud588\ub2e4": 39, "20\uac1c\ub9cc": 39, "\uc0ac\uc6a9\ud588\uc744\ub54c": [39, 46, 58], "\uc601\uc5ed\ub54c\ubb38\uc5d0": 39, "crack": 39, "\uac00\uc544\ub2cc": 39, "16k\uc758": 39, "encoder\ud559\uc2b5\uc744": 39, "\uc18c\uc7ac\uc640": 39, "\ub77c\uc774\ud305\uc744": 39, "\ub77c\uc774\ud305": 39, "\uc870\uac74\ub0b4\uc5d0\uc11c": 39, "ambient\uc640": 39, "shading\ub9cc": 39, "\ubc18\uc0ac\uad11\uc774": 39, "\uace0\ub824\ub418\uc9c0": 39, "\ub9e4\ub048\ud55c": 39, "\ubb3c\uccb4\ub294": 39, "\ucd94\uc815\ub428": 39, "phong": 39, "\uae30\ubcf8\uc801\uc778": 39, "shading\ubc29\uc2dd\uc73c\ub85c": 39, "specular\ub97c": 39, "realistic_visualisation_of_endoscopic_surgery_in_a_virtual_training_environ": 39, "\ub300\ub7b5": 39, "100\ub9cc\uac1c\uc758": 39, "assets\uacfc": 39, "12\ub9cc\uac1c\uc758": 39, "\uc218\uc9d1\ud588\ub2e4": 39, "\uc131\ub2a5\ud3c9\uac00": 39, "distillation\uc5d0\uc11c": 39, "\uc88b\uc544\uc9c4\ub2e4": 39, "stf\uc758": 39, "\uc0c1\uc2b9\ud55c\ub2e4": 39, "e\ube44\uad50": 39, "\uc138\ubaa8": 39, "\ub9c8\ud06c\uac00": 39, "\uc6d0\ud615": 39, "e\uc774\ub2e4": 39, "\ub192\ub2e4": [39, 49], "\ub9ce\uc740\uc218\uc758": 39, "\uc0ac\uc6a9\ud558\uc5ec\ub3c4": 39, "\uc6b0\uc218\ud568": [8, 39], "score\uc758": 39, "\uc0dd\uc131\uacb0\uacfc\uc758": 39, "\uac83\uc774\uace0": [39, 41], "precision\uc758": 39, "\uc0dd\uc131\uacb0\uacfc\uc640": 39, "\ube44\uc2b7\ud55c\uc9c0": 39, "\uc0dd\uc131\uc2dc\uc5d0\ub294": 39, "\ubca4\uce58": 39, "e\uc5d0\uc11c": 39, "\ub098\ubb34\uc0ac\uc774": 39, "\ube48\uacf5\uac04\uc744": 39, "\ubb34\uc2dc\ud574\ubc84\ub9b0\uac83\uc744": 39, "\uac15\uc544\uc9c0\uc640": [39, 48], "\ucef5": 39, "\ucf00\uc774\uc2a4\uc5d0\uc11c": 39, "\ube44\uad50\uacb0\uacfc": 39, "latency\uc5d0\uc11c": 39, "\uc5ec\ub7ec\uac00\uc9c0": [39, 52], "\uac00\uc9c4\ubb3c\uccb4\ub97c": 39, "\uc81c\ud55c\uc801\uc774\uae30": 39, "\uc218\uc9d1\ud558\uba74": 39, "\ub098\uc544\uc9c8": 39, "\ubb34\uc2dc\ud558\ub294": 39, "\uac1c\uc120\ub420\uc218": 39, "\uae30\uc220\ub4e4\uc744": 39, "\uc735\ud569\ud558\ub294\ub370\uc5d0": 39, "e\ub85c": 39, "\uc218\ub834\ub3c4": 39, "\uac00\ub2a5\ud560": 39, "function\uacf5\uac04\uc5d0\uc11c": 39, "\uc804\uac1c\ud558\uc5ec": 39, "\uc0dd\uc131\ubaa8\ub378\ub4e4\uacfc": 39, "\ud765\ubbf8\ub85c\uc6b4": [39, 47, 50], "\uc788\uc74c\ud655\uc778\ud588\ub2e4": 39, "represention\uc744": 39, "\uc0dd\uc131\ud568\uc5d0": 39, "\uc778\ubb3c\ub4e4\uc5d0": 39, "chatgpt\ub85c": 39, "valuabl": 39, "write": 39, "feedback\uc744": 39, "\ubc1b\uc558\ub2e4\uace0": 39, "\ubd80\ubd84\uc788\uc5c8\ub2e4": 39, "03231": 40, "sty": 40, "lize": 40, "ne": 40, "\ud55c\uc7a5\uc758": 40, "\uc785\ud788\uace0\uc790\ud558\ub294": 40, "\uc9c4\ud589\uc911\uc774\ub2e4": 40, "\uc774\uc804\uae4c\uc9c0\uc758": 40, "\ud55c\uc7a5\uc529\uc744": 40, "\uc2dd\uc774": 40, "\ubc29\uc2dd\uc5d0\ub294": 40, "face\ub97c": 40, "\uc758\uc874\ub3c4\uac00": 40, "\uc785\ud788\uae30": 40, "\ud798\ub4e4\ub2e4": [40, 54], "space\uc548\uc5d0\uc11c": 40, "entangl": [40, 41, 50], "\ub418\uc5b4\uc788\ub2e4": 40, "styo\ub294": 40, "\ud3ec\uc6a9\ud558\ub294": 40, "base\ubaa8\ub378\ub85c": 40, "\ucc44\uc6a9\ud55c\ub2e4": 40, "disentangl": 40, "learner": 40, "idl": 40, "fcc": 40, "idl\ub85c\ubd80\ud130": 40, "\uc6d0\ud558\ub294\ub300\ub85c": 40, "\uc7ac\uc870\ud569": 40, "\uc720\uc9c0\ud558\uae30\uc704\ud574": 40, "\uc7ac\uc0ac\uc6a9\ud558\ub294": 40, "gan\uc774": [40, 43, 51], "\ubd84\uc57c\ub97c": 40, "\uc7a5\uc545\ud558\ub358": 40, "\ub4f1\uc7a5\uc73c\ub85c": [40, 42], "\uc8fc\ubaa9\uc744": [40, 52], "\uc2dc\uc791\ud588\ub2e4": 40, "\uac00\ub2a5\ud574\uc84c\uc9c0\ub9cc": 40, "\ubd80\ubd84\uae4c\uc9c0": 40, "control\ud558\uae30\uc5d0\ub294": 40, "fine\ud55c": 40, "\uc815\ubcf4\uae4c\uc9c0": 40, "\ubcf4\uc774\uba74\uc11c": 40, "stylegan\uc744": 40, "\uc758\uc874\uc131\uc774": 40, "\ucee4": 40, "artist": [40, 44], "\uc785\ud788\ub294\ub370": 40, "\uac1c\uc120\ud55c": 40, "transfer\ub97c": 40, "disentagl": 40, "\ubd84\ub9ac\ud558\ub294": 40, "\ubc18\ub300": 40, "a\uc758": [40, 41], "conext": 40, "\ubc30\uc81c\ud568\uacfc": 40, "\ud3ec\ud568\ud558\uae30\uc704\ud574": 40, "\ubd80\uc815\uc758": 40, "except": 40, "auxiliari": [40, 52], "\uad6c\uc131\ud574": [40, 44], "\uc784\uc758\ub85c": 40, "prompt\uac04": 40, "disentanglement\ub97c": 40, "\uc774\ubbf8\uc9c0\uc5d0\ub294": 40, "\uc774\ubbf8\uc9c0\ub9cc\uc758": 40, "style\uacfc": [40, 41], "\uad6c\ubcc4\ud558\ub294\ub370": 40, "idl\uc758": 40, "\ud559\uc2b5\ub9cc\uc73c\ub85c": 40, "transfer\uac00": 40, "\uac1c\uc120\ud558\uae30\uc704\ud574": 40, "\ub3c4\uc785\ud558\uc600\ub2e4": 40, "idl\ub85c": 40, "\uc870\ud569": 40, "recombin": 40, "\uc720\uc9c0\ud558\ub3c4\ub85d": 40, "\uc8fc\uc785\ud558\uae30\uc704\ud574": 40, "promt": 40, "m\uc758": 40, "layout\uc5d0": 40, "\ubbf8\uce5c\ub2e4": 40, "\uc8fc\uc785\ud569\uc73c\ub85c\uc368": 40, "replace\ud558\uc9c0\uc54a\uace0": 40, "index\ub9cc": 40, "replac": [40, 56], "time\uc5d0\uc11c": 40, "n\ubc88": 40, "\uc0ac\uc6a9\ud568\uc73c\ub85c\uc11c": 40, "\uc2e4\ud5d8\uc0c1": 40, "\uc774\ud558\uc758": [40, 50], "\ucd94\ucc9c": 40, "ak47": 40, "m4a1": 40, "400": 40, "ldm\uacfc": 40, "styo\uac00": 40, "\uc720\uc9c0\ud568\uacfc": 40, "\uc0dd\uc131\ud574\ub0b8\ub2e4": [40, 43], "study\ub3c4": 40, "\ubaa8\ub378\ub4e4\uc5d0": [8, 40, 42], "templat": 40, "\ub123\uace0": 40, "\ud559\uc2b5\ud560\uacbd\uc6b0": 40, "overfitting\uc774": 40, "\uc2ec\ud558\uace0": 40, "\ubd84\ub9ac\uc5d0": 40, "set\uc758": 40, "trick\ub3c4": 40, "\uc801\uc6a9\ud558\ub294\uac83\uc774": 40, "\uc0dd\uc131\ud574\ub0c8\ub2e4": 40, "inference\ud560": 40, "fcc\ub97c": 40, "\ub192\uc544\uc838": 40, "significant\ud55c": 40, "\uc0dd\uc131\ub418\ub294\uac83\uc744": 40, "photorealistic\uc5d0\uc11c": 40, "artistic\ud558\uac8c": 40, "\ubc14\ub00c\uace0": 40, "idl\uacfc": 40, "10\ubd84\uc774": 40, "\uac78\ub9ac\ubbc0\ub85c": 40, "efficiency\uac00": 40, "\ubabb\ud558\ub2e4\ub294": 40, "2019": 41, "1812": 41, "04948": 41, "huangzh13": 41, "stylegan\uc785\ub2c8\ub2e4": 41, "gan\uacfc": 41, "\ubcc0\uacbd\ud568\uc73c\ub85c\uc368": 41, "\uc62c\ub9ac\uace0": 41, "loss\ub098": 41, "\uac1c\uc120\uc5d0": 41, "\ubcf4\ub3c4\ub85d": 41, "\ud558\uc8e0": 41, "\uc81c\uc548\ud558\uc5ec": 41, "\ub192\uc774\uba74\uc11c": 41, "\uac00\ub2a5\ud574\uc84c\uc2b5\ub2c8\ub2e4": 41, "contribution\uc744": [41, 49], "abstract\uc5d0\ub294": 41, "\ubb38\uc7a5\uc774": 41, "lead": [41, 55], "automat": [41, 53], "unsupervis": [41, 47], "freckl": 41, "\uc77c\uc744": 41, "\ubcf4\uc2dc\uba74": [41, 56], "attribute\uc758": 41, "separation\uc774": 41, "\uc598\uae30\ud558\uace0": 41, "stylegan\uc758": 41, "\ubaa9\uc801\uc744": 41, "\uc790\uc2e0\uc774": 41, "\ub9cc\ub4e4\uace0\uc790": 41, "\uc88b\ub354\ub77c\ub3c4": 41, "\uc0ac\uc6a9\uc790\uc758": 41, "\uc758\ub3c4\uc640": 41, "\uc0c1\uad00\uc5c6\ub294": 41, "\ub0b4\ubc49\uc5b4\uc900\ub2e4\uba74": 41, "\uc2e4\uc6a9\uc131\uc774": 41, "\uc88b\ub2e4\uace0": [36, 41, 42, 46, 58, 59], "\uadfc\ub798\uc5d0": 41, "\uc778\uae30\ub97c": 41, "\uc5bb\uc5c8\ub358": 41, "\uc774\uc720\ub3c4": 41, "\ub204\uad6c\ub098": 41, "\uc810\ub3c4": 41, "\ud55c\ubaab\ud588\ub2e4\uace0": 41, "stylegan\uc740": 41, "\ubaa8\ub378\uc774\ub77c\ub294": 41, "\uc758\ubbf8\uc788\ub2e4\uace0": 41, "network\ub294": [36, 41], "4x4\uc5d0\uc11c": 41, "1024x1024\uae4c\uc9c0": 41, "\ub192\uc5ec\uc90d\ub2c8\ub2e4": 41, "gan\ud558\uace0": 41, "\ud2b9\uc774\ud55c": 41, "z\ub97c": 41, "\uac70\uccd0\uc11c": [41, 56], "\uad6c\uc870\uc785\ub2c8\ub2e4": 41, "z\ub294": 41, "distribution\uc5d0\uc11c": [41, 49], "\uc0d8\ud50c\ub9c1\uc73c\ub85c": 41, "\uc5bb\uc2b5\ub2c8\ub2e4": 41, "distribution\uc73c\ub85c": [36, 41], "\ubcf4\ub0b4\ub294": 41, "\ubc30\uc6b0\uac8c": 41, "\ubd84\ud3ec\ub294": [36, 41], "\uc0dd\uae30\uac8c": 41, "\uc8fc\uc5b4\uc838\uc11c": 41, "\ud53c\ubd80\uac00": 41, "\ud76c\uba74\uc11c": 41, "\uc0d8\ud50c\ub4e4\uc774": 41, "\ud574\ubd05\uc2dc\ub2e4": 41, "\ud53c\ubd80\uc0c9\uacfc": 41, "\uba38\ub9ac": 41, "\uae38\uc774\ub77c\ub294": 41, "\uc5bd\ud788\uac8c": 41, "\ubc14\uafc0": 41, "\ud558\ub098\ub3c4": [41, 43], "\uc77c\uc5b4\ub098\uac8c": 41, "gaussian\uc5d0\uc11c": 41, "w\ub97c": 41, "normalization\uc740": 41, "\ucc44\ub110\ub9c8\ub2e4": 41, "\ucde8\ud574\uc8fc\ub294": 41, "normalization\uc5d0": 41, "scale\uc744": [41, 49], "\uacf1\ud574\uc8fc\uace0": 41, "\ub354\ud574\uc8fc\ub294": 41, "transformation\uc73c\ub85c": 41, "w\ub294": 41, "\ubcf4\ub0b4\uc9c0\uac8c": 41, "adain\uc758": 41, "adain\uc740": 41, "\ube14\ub85d\ub9c8\ub2e4": 41, "\uac1c\uc529": 41, "style\uc740": 41, "\uc5f4\uc5ec\ub35f": 41, "adain\uc744": 41, "generator\uc5d0": [41, 43], "localization\uc774\ub77c\ub294": 41, "\ud2b9\uc9d5\uacfc\ub3c4": 41, "localization\uc774\ub780": 41, "\ubc14\uafc8\uc73c\ub85c\uc368": 41, "\ud2b9\uc9d5\ub4e4\uc744": 41, "\uc758\ubbf8\uc785\ub2c8\ub2e4": 41, "map\ub4e4\uc740": 41, "normalization\ub418\uace0": 41, "statistics\ub97c": 41, "convolution\uc5d0": 41, "\uc801\uc6a9\ub418\uace0": 41, "convolution\uc5d0\uc11c": 41, "normalization\uc774": 41, "\uc218\ud589\ub418\uae30": 41, "\ubd84\ub9ac\ub418\uac8c": 41, "\ud559\uc2b5\ub420": [41, 42], "stylemod": 41, "latent_s": [41, 45], "use_wscal": 41, "lin": 41, "equalizedlinear": 41, "gain": 41, "n_channel": 41, "layerepilogu": 41, "thing": 41, "dlatent_s": 41, "use_nois": 41, "use_pixel_norm": 41, "use_instance_norm": 41, "use_styl": 41, "activation_lay": 41, "noiselay": 41, "pixel_norm": 41, "pixelnormlay": 41, "instance_norm": 41, "instancenorm2d": 41, "top_epi": 41, "ordereddict": 41, "style_mod": 41, "dlatents_in_slic": 41, "b\uc758": 41, "style\ub85c": 41, "\ubcc0\uacbd\ud574\uc11c": 41, "\uc774\ubbf8\uc9c0\ub4e4\uc785\ub2c8\ub2e4": [41, 58], "18\uacf3\uc5d0\uc11c": 41, "\uc0ac\uc6a9\ub418\ub294\ub370": 41, "4\uacf3": 41, "\uadf8\ub2e4\uc74c": 41, "10\uacf3": 41, "\uc815\uc758\ud558\uc600\uc2b5\ub2c8\ub2e4": [41, 46], "\uc717": [41, 47], "\ubd80\ubd84\uc5d0\uc11c\ub294": 41, "\ud3ec\uc988\ub098": 41, "\uc2a4\ud0c0\uc77c\uac19\uc774": 41, "\uac08\uc218\ub85d": 41, "\ud2c0\uc744": 41, "\ubd80\ubd84\ub4e4\uc744": 41, "b\uc5d0\uc11c": [41, 52], "\uac00\uc838\uc654\uc74c\uc744": 41, "\uc548\uc5d0\ub294": 41, "\ubc14\ub014": 41, "\uc8fc\uadfc\uae68": 41, "\uba38\ub9bf\uacb0": 41, "\ud53c\ubd80": 41, "\ub354\ud574\uc9d1\ub2c8\ub2e4": 41, "\uc548\uc5d0\uc11c\ub3c4": 41, "\ub514\ud14c\uc77c\ub4e4\uc740": 41, "deviation\uc744": 41, "\uad6c\ud574\ubd24\uc744": 41, "\uc5bc\uad74\ud615\uacfc": 41, "attribute\ub294": 41, "\ubcc0\ud558\uc9c0\uc54a\uc9c0\ub9cc": 41, "\uba38\ub9ac\uce74\ub77d\uacfc": 41, "\uc0dd\uae40\uc744": 41, "\uc8fc\uc9c0": 41, "\uc5d0\ub9cc": [8, 41, 59], "\uba38\ub9ac\uce74\ub77d\uac19\uc740": 41, "\uc0b4\uc544\uc788\uc9c0": 41, "\ub4e4\uc5b4\uac04": 41, "\uba38\ub9ac\uce74\ub77d\uc758": 41, "\ub07c\uce5c\ub2e4\ub294": 41, "localization\uc774": 41, "\ub418\uac8c\ud558\uae30": 41, "mixing\uc774\ub77c\ub294": 41, "\uc55e": 41, "\ucabd": 41, "layer\uc5d0\ub294": 41, "generator\uac00": 41, "\uc778\uc811\ud55c": [41, 46], "style\ub07c\ub9ac": 41, "correlated\ub418\uc5b4\uc788\ub2e4\uace0": 41, "\ub9c9\uc544\uc11c": 41, "localization\uc744": 41, "\ub418\uac8c": 41, "\ubaa9\uc801\uc785\ub2c8\ub2e4": [41, 60], "\uc800\uc790\ub4e4\uc774": [41, 42, 52], "\uc788\uc5c8\ub294\uc9c0": 41, "\ud655\uc778\ud574\ubd05\uc2dc\ub2e4": 41, "\ud45c\uc640": 41, "\ubc29\ubc95\ub4e4\uc744": [41, 58], "fid\uac00": [41, 42, 49], "08466": 42, "\uc774\ubc88\uc5d0": 42, "\ub9ac\ubdf0\ud560": 42, "\uad6c\uae00": [42, 52], "\ub9ac\uc11c\uce58": 42, "\uadf8\ub8f9\uc5d0\uc11c": 42, "tmlr": 42, "transact": 42, "2023\uc5d0": 42, "\uc81c\ucd9c\ud55c": 42, "\ub17c\ubb38\uc778": 42, "\uc18d\ub3c4\ub85c": 42, "\ubc1c\uc804\ud558\uace0": [36, 42], "\uc788\ub294\ub370\uc694": [42, 59], "\uc218\uc900\uc774": 42, "\uc5bc\ub9cc\ud07c": 42, "\uc654\ub294\uc9c0": 42, "\ub370\uc774\ud130\uc778": 42, "\uc815\ub3c4\uac00": 42, "\ub418\uc5c8\ub294\uc9c0": 42, "augment\ub41c": 42, "\uc815\ub3c4\uae4c\uc9c0": 42, "\uc654\ub294\uc9c0\uc5d0": 42, "\uc2e4\ud5d8\uacfc": 42, "\ub2f5\uc744": 42, "\uc81c\uc2dc\ud569\ub2c8\ub2e4": [42, 47, 53, 55, 56, 58, 61], "\uae00\uc758": 42, "\ubaa9\ucc28\ub294": 42, "\ub0b4\uc6a9\uacfc": [42, 54], "\uad6c\uc131\ud558\uc600\uc2b5\ub2c8\ub2e4": 42, "augmentation\uc73c\ub85c": 42, "imagenet\uc5d0": 42, "\uc0ac\uc6a9\ud558\uc600\uc744": 42, "\uae30\uc220\uc801\uc73c\ub85c": 42, "\uc5c4\uccad": 42, "\uc5c6\ub294\ub370\uc694": 42, "\uc0ac\uc6a9\ud558\ub358": 42, "\ubc29\ubc95\ub4e4\uacfc\ub294": 42, "imagen\uc744": 42, "\uc0c8\ub86d\uc2b5\ub2c8\ub2e4": 42, "\ubc1c\uc804\ub418\uc5c8\uc2b5\ub2c8\ub2e4": 42, "\uc9c8\ubb38\uc774": 42, "\ub2f9\uc5f0\ud558\uace0": 42, "\ucc3e\uace0\uc790": 42, "\uc774\uc57c\uae30": 42, "imagen\uc774": [42, 52], "ca": [42, 58], "\uae38\uc218\ub85d": 42, "\ud5a5\uc0c1\ub418\uc5c8\ub2e4": 42, "\ub370\uc774\ud130\ub85c\ub9cc": 42, "\uc9c4\uc9dc": [42, 47], "\uc815\ud655\ub3c4\uc640": 42, "\uc801\ub2e4\ub294": 42, "\ub354\ud574\uc11c": 42, "\ud559\uc2b5\ud588\uc744": 42, "\ubaa8\ub378\ub4e4\uc5d0\uc11c": 42, "\ud5a5\uc0c1\uc774": 42, "\ud558\ub824\uace0": 42, "\ud588\ub358": 42, "\ubc29\ubc95\ub4e4\uc5d0": 42, "\uc9e7\uac8c": 42, "\ud590\ub824\uace0": 42, "\ubcf4\uac15\ud558\ub294\ub370": 42, "\uc0ac\uc6a9\ub418\uae30": 42, "\uc2dc\uc791\ud588\uc2b5\ub2c8\ub2e4": 42, "Is": 42, "readi": 42, "glide\ub85c": 42, "shot\uacfc": 42, "\uc2dc\ucf30\uc73c\uba70": 42, "\uc138\ud2b8\uac00": [42, 47], "100\uc758": 42, "\uc2dc\ucf30\ub2e4\uace0": 42, "\ud3ec\ud568\ud574\uc11c": 42, "\ub17c\ubb38\ub4e4\uc740": 42, "\ud558\uc5ec\ub3c4": 42, "\uc2dc\ud0a4\uc9c0": 42, "\ubabb\ud588\uc2b5\ub2c8\ub2e4": 42, "\uc54a\uc558\uc2b5\ub2c8\ub2e4": [42, 46], "\ub17c\ubb38\ub4e4\uacfc\ub294": 42, "\ub3d9\uc791\ud558\uace0": 42, "\uc6cc\ub099": 42, "\uc4f0\uc5ec\uc11c": 42, "\uc124\uba85\uc740": [42, 47], "\uc0dd\ub7b5\ud558\uace0": 42, "cas\uc5d0": 42, "\uc368\uc838": 42, "\ub0b4\uc6a9\uc73c\ub85c": 42, "\uc18c\uac1c\ud558\uaca0\uc2b5\ub2c8\ub2e4": 42, "cas\ub294": 42, "\ub9cc\ub4e4\uc5b4\ub0b8": 42, "\uc9c0\ud45c\uc785\ub2c8\ub2e4": 42, "\ub9cc\ub4e4\uc5b4\ub0c5\ub2c8\ub2e4": 42, "\ub370\uc774\ud130\ub9cc\uc744": 42, "50\uc744": 42, "cas\uac00": 42, "imagenet\uacfc": 42, "\ube44\uc2b7\ud558\ub2e4\uba74": 42, "\uac00\uc815\uc744": [42, 49, 58, 60], "\uc800\uc790\uc5d0": 42, "\uadf8\ub3d9\uc548": 42, "\uc54a\uc558\ub2e4\uace0": 42, "\uc0d8\ud50c\ub85c\ub9cc": 42, "\ub5a8\uc5b4\uc84c\uace0": 42, "\ub2f9\uc5f0\ud574\ubcf4\uc785\ub2c8\ub2e4": 42, "\ub5a8\uc5b4\uc84c\ub2e4\uace0": 42, "\uc544\ub9c8\ub3c4": 42, "\ud558\uc600\ub294\uc9c0\uc5d0": 42, "\ubaa8\ub378\ub85c\ub294": [42, 51], "\uc0ac\uc6a9\ud558\uc600\uc2b5\ub2c8\ub2e4": [42, 46, 56, 61], "\ud074\ub798\uc2a4\uc640": 42, "\uc9c0\uc5d0": 42, "\uace0\ubbfc\uc774": 42, "\ud544\uc694\ud588\ub2e4\uace0": 42, "imagen\uc5d0\uc11c": 42, "\ub418\uba74\uc11c": 42, "\ud604\uc0c1\uc77c": 42, "\ub450\ub2e8\uc5b4": 42, "\uc774\ub984\uc73c\ub85c": 42, "\uc774\ubbf8\uc9c0\uace0": 42, "\uc624\ub978\ucabd\uc774": 42, "\uc801\uc6a9\ub418\uc9c0": [42, 60], "imagen\uc785\ub2c8\ub2e4": 42, "\uc544\ub798\uc5d0\uc11c": [42, 47], "\ud074\ub798\uc2a4\uc778": 42, "schipperke\ub97c": 42, "\uc2a4\ud0a4\ud37c\ud0a4\ub77c\ub294": 42, "\ud488\uc885\uc744": 42, "\uc758\ubbf8\ud558\ub294\ub370": 42, "\uaf43\uacfc": 42, "\uc804\ud600": [42, 47], "\uc5c9\ub6b1\ud55c": 42, "\ud588\ub294\uc9c0\ub97c": 42, "\uc6d0\uc73c\ub85c": 42, "imagen\uc5d0\uc11c\ub3c4": 42, "\ubd80\ubd84\uc774\ub77c": 42, "\uc54a\uc558\uace0": [42, 61], "\ucd9c\ub825\uc73c\ub85c": 42, "\uace0\ud574\uc0c1\ub3c4\uc758": [42, 44, 54], "\uc801\uc5b4\uc11c": 42, "210k": 42, "\ud559\uc2b5\ud558\uc600\uace0": [42, 55], "optimizer\uc758": 42, "\uc0ac\uc6a9\ud558\uc600\ub358": [42, 55], "optimizer\ub97c": 42, "490k": 42, "\uc120\ud0dd\uc758": 42, "10k\uac1c\uc758": 42, "\uc0d8\ud50c\ub4e4\uc5d0": 42, "\uacc4\uc0b0\ud588\uc744": 42, "\uc120\ud0dd\ud588\ub2e4\uace0": 42, "\uc815\ud588\ub294\uc9c0\ub97c": 42, "coeffici": [42, 46, 56], "\ubc1b\ub294\ub2e4\uace0": 42, "\uc124\uba85\ud558\uba74": 42, "\ud655\ub960\uc801\uc778": 42, "\ubcf4\uc774\uac8c": 42, "\ub9cc\ub4e4\uba70": 42, "\ucc38\uace0\ud574\uc8fc\uc138\uc694": 42, "\ubd84\ub958\uae30\ub098": 42, "\ubc18\uc601\ud560\uc9c0\ub97c": 42, "\uc758\ubbf8\ud560": 42, "\ud2b9\uc131\uc774\ub098": 42, "\uc870\uc808\ud568\uc73c\ub85c\uc368": 42, "\ub85c\uadf8": 42, "\uacc4\uc218\ub294": 42, "\uc0ac\uc6a9\ub418\uba70": 42, "\uc124\uc815\ubc95\uc5d0": 42, "\uc124\uba85\ud558\uaca0\uc2b5\ub2c8\ub2e4": [42, 49], "\ud2b9\uc9d5\uacfc": 42, "\ub2e4\uc591\uc131\uc758": 42, "1\ucc28": 42, "sweep\uc73c\ub85c": 42, "\uc0d8\ud50c\ub7ec\ub97c": 42, "50k\uc5d0": 42, "\ud558\uc774\ud37c\ud30c\ub77c\ubbf8\ud130\ub97c": 42, "\ucc3e\uc2b5\ub2c8\ub2e4": 42, "sweep\uc758": 42, "\ud558\uc774\ud37c\ud30c\ub77c\ubbf8\ud130\uc758": 42, "\ubc94\uc704\ub294": 42, "sweep": 42, "fid\ub294": 42, "variance\ub294": 42, "1000\uc774\uc5c8\uc744": 42, "\ub54c\ub77c\uace0": 42, "sweep\uc774": 42, "\ub05d\ub09c": 42, "weight\uc5d0": 42, "sweep\uc744": 42, "\ub54c\uc5d0\ub294": [42, 48], "2m": 42, "guidacn": 42, "cas\ub97c": 42, "\uce21\uc815\ud588\ub2e4\uace0": 42, "\ud558\uc774\ud37c\ud30c\ub77c\ubbf8\ud130\uc5d0": 42, "sweep\uc5d0": 42, "\uacb0\uacfc\uace0": 42, "\uac00\uc6b4\ub370\uc640": 42, "\ub098\ud0c0\ub0b8": 42, "\uc120\ud0dd\ud558\ub294": [42, 48], "range\ub294": 42, "denos": 42, "129": 42, "\ubcc0\uacbd\ud588\uc744": 42, "cas\uc758": 42, "\uadf8\ub798\ud504\ub97c": [42, 54], "\uadf8\ub798\ud504\uc785\ub2c8\ub2e4": 42, "logvar": [42, 60], "coeff\uac00": 42, "3\uc77c": 42, "\ubd84\uc11d\ud574\ubcf4\uc790\uba74": 42, "\ub192\uc544\uc9c0\uc9c0\ub9cc": 42, "score\uc5d0\ub294": 42, "\ubd80\uc815\uc801\uc778": 42, "\uc8fc\uba70": [42, 52], "augmentation\uc774": 42, "0\uc77c": 42, "\ud558\uc774\ud37c\ud30c\ub77c\ubbf8\ud130": 42, "\uc124\uc815\ud55c": 42, "\uac19\ub2e4\uace0": 42, "\ud504\ub85c\ud1a0\ucf5c\uc744": 42, "\ub530\ub790\ub294\uc9c0\uc5d0": 42, "balance\ub97c": 42, "\ud569\uc131\ud588\uc73c\uba70": 42, "\uaddc\ubaa8\ub294": 42, "1\ubc30\uc778": 42, "10\ubc30\uc778": 42, "12m": [42, 54], "\ubc94\uc704\ub97c": [42, 61], "\ud569\uc131\ud588\ub2e4\uace0": 42, "\uc9c0\ud45c\uc778": 42, "is\uc758": 42, "\ubd05\ub2c8\ub2e4": 42, "\ud45c\uc5d0\uc11c": 42, "\ud29c\ub2dd\ub41c": 42, "\ubca0\uc774\uc2a4\ubaa8\ub378\ub4e4": 42, "resolution\uacfc": 42, "\ud574\ub2f9\ub418\uc5c8\uc2b5\ub2c8\ub2e4": 42, "\ud655\uc778\ud558\ub294": 42, "5\uc5d0\uc11c": [36, 42, 43], "\uc131\ub2a5\uc774\uace0": 42, "\ube68\uac04\uc0c9": 42, "\uc131\ub2a5\uc785\ub2c8\ub2e4": 42, "\ubca0\uc774\uc2a4\ub77c\uc778": 42, "cdm": 42, "\uadf8\ub9bc\uc774\uba70": 42, "\uac00\uc6b4\ub370\ub294": 42, "\uc624\ub978\ucabd\uc740": 42, "\ubd80\ubd84\ubcf4\ub2e4": 42, "\uc704\ucabd\uc5d0": 42, "\uc704\uce58\ud558\uba74": 42, "\ubca0\uc774\uc2a4\ub77c\uc778\ubcf4\ub2e4": 42, "\ubcf4\uc778\ub2e4\ub294": 42, "2\uc5d0\uc11c\ub3c4": 42, "\uc8fc\ubaa9\ud560": 42, "\ub9cc\ud55c": [42, 45], "resnet50\uc774": 42, "\ub2e4\uc6b4\uc0d8\ud50c\ub9c1": 42, "\ud568\uc5d0\ub3c4": 42, "resolution\ubcf4\ub2e4": 42, "resolution\uc758": 42, "\uc6d4\ub4f1\ud788": [42, 53, 56], "\uc2dc\ucf30\uc744": 42, "cas\uc640": 42, "cas\uc5d0\uc11c\ub294": 42, "resnet50": 42, "\ud655\uc778\ud588\uc9c0\ub9cc": [42, 59], "\uc774\uc678\uc5d0": 42, "\ubaa8\ub378\ub85c\ub3c4": 42, "\ubcf8\ub2e4\ub294": 42, "\ucc28\uc774\uc810\uc774": [42, 47], "\uc0b4\ud3b4\ubcf8": 42, "\ub0ae\uc558\uc9c0\ub9cc": 42, "\ub370\uc774\ud130\ub9cc": 42, "onvnet\uae30\ubc18": 42, "\uc591\uc0c1\uc744": 42, "\ubcf4\uc600\uc2b5\ub2c8\ub2e4": 42, "\uaddc\ubaa8\uc5d0": 42, "50\uc758": 42, "\ubd84\uc11d\ud55c": 42, "8m": 42, "\ub54c\uae4c\uc9c0\ub294": 42, "\uc88b\uc558\uc73c\ub098": 42, "\ub418\uc5c8\uc744": 42, "sclae": 42, "\ub2ec\uc131\ud588\uc2b5\ub2c8\ub2e4": 42, "76": 42, "239": 42, "69": 42, "resnet\uacfc": 42, "accuracy\ub97c": 42, "\uc2dc\ucf30\uc2b5\ub2c8\ub2e4": 42, "\uc0dd\uac01\ud574\ubcfc\ub9cc\ud55c": 42, "\uac70\ub9ac\ub4e4\uc774": 42, "\ud558\ub098\ub294": [36, 42, 55], "256x256\ubcf4\ub2e4": 42, "\ub2e4\uc6b4\uc0d8\ud50c\ub9c1\uc744": 42, "\ud558\ub354\ub77c\ub3c4": 42, "resolution\uc774": [42, 54], "\ub2f4\ub294\ub2e4\ub294": 42, "\uac83\uc77c": 42, "\uc815\ud655\ub3c4\uac00": 42, "\uc99d\uac00\ud588\uc9c0\ub9cc": 42, "\ub370\uc774\ud130\uc5d0\uc11c\ub294": 42, "\uadf8\ub807\uc9c0": [42, 45], "\uace0\ud574\uc0c1\ub3c4\uc5d0": 42, "\uc815\uad50\ud55c": [8, 42], "\uc2dc\uc0ac\ud558\uace0": 42, "\ub9ac\ubdf0\ub97c": 42, "\ub9c8\uce58\uaca0\uc2b5\ub2c8\ub2e4": 42, "\ub290\ub080": 42, "\uc0b0\uc5c5\uc5d0\uc11c\ub294": 42, "shortage\ub098": 42, "imbal": 42, "\ubc1c\uc0dd\ud558\ub294\ub370": 42, "\ud574\uacb0\ubc95": 42, "\ud558\ub098\uac00": [42, 49], "\uac19\ub2e4\ub294": 42, "\uc0b0\uc5c5\uc5d0\uc11c\ub9cc": 42, "\ud14d\uc2a4\ud2b8\uac00": 42, "\ud569\uc131\ud558\uace0\uc790": 42, "\ud574\uc57c\ud558\ub294": [8, 42], "\uaf64\ub098": 42, "\ubd88\ud3b8\ud560": 42, "\uac19\uc544\uc11c": 42, "\uac16\ub294\uc9c0": 42, "\uc788\uc5c8\uc73c\uba74": 42, "\uac1c\uc778\uc801\uc778": [8, 42], "\uc720\ucd94\ud574\ubcfc": 42, "\uc21c": 42, "\uc788\uc9c0\ub9cc\uc694": 42, "devocean": 43, "techboarddetail": 43, "164320": 43, "boardtyp": 43, "writer": 43, "searchdata": 43, "sam56903": 43, "subindex": 43, "idlist": 43, "pnwriterid": 43, "kwang": 43, "su": 43, "mun": [43, 47, 62], "5\uc7a5\uc73c\ub85c": 43, "\ucf58\uc149\ud2b8": 43, "\ubf51\uc544\ub0b4\ub294": 43, "\uc790\uc5f0\uc5b4\ub97c": 43, "creation\uc5d0": 43, "\uc804\ub840\uc5c6\ub294": 43, "\uc790\uc720\ub3c4\ub97c": 43, "contept\ub97c": 43, "\uadf8\uac83\uc758": 43, "\ubc14\uafb8\uac70\ub098": 43, "\uc5ed\ud560\uc774": 43, "\uc8fc\uc5b4\uc9c0\uac70\ub098": 43, "\ucc38\uc2e0\ud55c": 43, "\uc7a5\uba74\uc774": 43, "\uadf8\ub824\uc9c0\ub294\uac74": 43, "\ubd88\ubd84\uba85\ud558\ub2e4": 43, "\uc774\uac83\uc744": 43, "\uadf8\ub824\uc918": 43, "\uc774\uac83": 43, "\uac83\uc774\ub0d0\ub294": 43, "\ubb3c\uc74c\uc5d0\ub294": 43, "5\uac1c\ub9cc\uc73c\ub85c": 43, "\uc0ac\ubb3c\uc774\ub098": 43, "\ubb38\uc7a5\uc5d0": [43, 47], "\ub179\uc544\ub4e4\uc5b4\uac00": 43, "\uc774\ub04c\uc5b4": [36, 43], "\ub3c5\uc790\uc801\uc774\uba74\uc11c": 43, "\ucf58\uc149\ud2b8\ub97c": 43, "capture\ud558\uae30": 43, "\ucda9\ubd84\ud558\ub2e4\ub294": 43, "\uc54c\uac8c": 43, "\ub3c4\uc785\ud558\ub294": 43, "\uc77c\uc740": 43, "\ud655\uc7a5\ub41c": 43, "retraining\ud558\ub294": 43, "\uc5c4\uccad\ub098\uac8c": 43, "\uc608\uc81c\uc5d0": 43, "\uce58\uba85\uc801\uc778": [43, 46, 56, 58], "\ub9dd\uac01\uc744": 43, "\ucd08\ub798\ud55c\ub2e4": 43, "figure\uc5d0\uc11c": 43, "\uc9c0\ub098\uba74\uc11c": 43, "508": 43, "701": 43, "set\uc73c\ub85c": [43, 48], "\ubcc0\ud658\ub418\uace0": 43, "\uc790\uccb4": 43, "\ub2e4\uc6b4\uc2a4\ud2b8\ub9bc": 43, "\uc81c\uacf5\ub428": 43, "concept\ub97c": 43, "word\uc778": 43, "\ub2e8\uc5b4\uc640": 43, "\ucc98\ub9ac\ub418\uba70": 43, "\uad6c\uc131\ud558\ub294\ub370": 43, "\uc758\ub3c4\ud55c\ubc14\uc640": 43, "\uadf8\ub9bc\uc774\ub77c\uace0": 43, "\uc0dd\uc131\ubaa8\ub378": 43, "ldm\uc774": 43, "\uc4f0\uc784": 43, "untouched\ub418\uc5b4": 43, "\ub4e4\uc5b4\uac00\uc9c0": 43, "\uc54a\ub294\ub4ef\ud568": 43, "\uc774\ud574\ub3c4\ub098": 43, "generalization\uc744": 43, "\uc720\uc0ac\ub2e8\uc5b4": 43, "inversion\uc2dc\ucf1c": 43, "\ud504\ub808\uc784\ud654": 43, "\uc8fc\uc5b4\uc9c4\ub2e4": 43, "\uc124\uc815\ud574": [43, 45], "concept\uc778": 43, "found": 43, "palavra": 43, "\ubcf5\uad6c": [43, 47], "segmentation\uc744": 43, "palavra\ub294": 43, "\ucc38\uc870\ud558\ub294": 43, "\uc2dd\ubcc4\ud568": 43, "\uac80\uc0c9\uc744": 43, "\uc124\uba85\ud558\uac70\ub098": 43, "\uc7a5\uba74\uc5d0\uc11c": 43, "\ubd84\ud560\ud558\uae30": 43, "\ubcf4\ub4ef\uc774": 43, "goal": 43, "specifi": 43, "\uc758\uc5ed": 43, "\uc758\ub3c4\ud55c": 43, "\ub9de\ucd98": 43, "\uac00\uc774\ub4dc\ud574\uc11c": 43, "\uc131\uacfc\ubb3c\uc744": 43, "\uc778\ucf54\ub529\ud558\ub294\ub370": 43, "representation\uc5d0": 43, "\ud6c4\ubcf4\uad70\uc744": 43, "\ucc3e\ub294\ub2e4": 43, "understanding\uc744": 43, "\uc0dd\uc131\uc790\uac00": 43, "\uadf8\ub9b0\ub2e4": 43, "inversion\uc5d0\uc11c": 43, "hyoseok": 43, "entri": 43, "vector\ub85c\ubd80\ud130": 43, "\uc774\uc758": 43, "\uc5ed\uacfc\uc815\uc73c\ub85c\uc368": 43, "inverting\uc2dc\ucf1c": 43, "\uc54c\uc544\uac00\ub294": 43, "\uc0dd\uc131\ubaa8\ub378\ub85c\uc11c": 43, "\ub9d0\ud588\ub4ef\uc774": [43, 47], "\uac74\ub4e4\uc9c0": 43, "\ubb38\uc790\uc5f4\uc758": 43, "\ub2e8\uc5b4\ub294": 43, "\ud1b5\uacfc\ud558\uba70": 43, "dictionary\uc5d0\uc11c": 43, "\ubcc0\ud658\ud568": 43, "\ubca1\ud130\uc5d0": 43, "\uc5f0\uacb0\ub428": 43, "index\uc5d0": 43, "encoder\uc778": 43, "c_\u03b8\uc758": 43, "\uc77c\ubd80\ub85c": 43, "\uc0bc\uc558\uc74c": 43, "\ub098\ud0c0\ub0b4\uae30": 43, "\uc790\ub9ac\ud45c\uc2dc\uc790": 43, "\ubb38\uc790\uc5f4\uc778": 43, "\uc9c0\uc815\ud568": 43, "palavra\ub97c": 43, "\ucd94\uc815\ud568": 43, "\uac1c\uc785\ud574\uc11c": 43, "tokenize\ub41c": 43, "\ubb38\uc790\uc5f4\uacfc": 43, "\ub300\uccb4\ud558\uc5ec": 43, "\ubcf8\uc9c8\uc801\uc73c\ub85c": 43, "\uc5b4\ud718": 43, "\uc8fc\uc785\ud568": 43, "\ubb18\uc0ac\ud568": 43, "v\ub97c": 43, "\ucd5c\uc801\ud654\ud568": 43, "\uace0\uc815\ud558\uae30": 43, "\ud15c\ud50c\ub9bf\uc5d0\uc11c": 43, "\uc911\ub9bd": 43, "rendit": [43, 50], "\uc6d0\ubcf8\uacfc": 43, "\ubaa9\uc801\uc774": 43, "\uc544\ub2d0\uae4c": 43, "\uc2f6\uc74c": 43, "\ubaa9\ud45c\uc2dd\uc740": 43, "loss\ud568\uc218\uc640": 43, "\uc720\uc0ac\ud568": 43, "c\u03b8\uc640": 43, "e\u03b8\ub294": 43, "\ud3ec\ucc29\ud560": 43, "\uc788\uc744\uac83\uc73c\ub85c": 43, "\ud3ec\ucc29\ud558\ub294": 43, "guide\uc5d0": 43, "\ub9de\ucdb0\uc11c": 43, "\uc8fc\uc81c\uc5d0": 43, "\ucea1\uc158\ub4e4\uc5d0": 43, "\ucd94\ub860\uc774": 43, "\uac00\ub2a5\ud588\uc74c": 43, "\ub370\uc774\ud130\uc14b\uc73c\ub85c\ub3c4": 43, "\ubcf4\uc874\ud558\uba74\uc11c": [8, 43], "\uc0ac\uc9c4\uc5d0\uc11c\uc640": 43, "\ubc31\uc778": 43, "\ub0a8\uc131": 43, "\uc758\uc0ac\ub97c": 43, "\uadf8\ub824\ub0c8\uc74c": 43, "\ub9ce\uc558\uc74c\uc744": 43, "imageset\uc5d0\uc11c": 43, "\uc778\uc885\uc801": 43, "\uc778\uc2dd\uc744": 43, "embedding\uc758": 43, "y\ucd95": 43, "\ubcf5\uc81c\ud558\ub294\uc9c0": 43, "\uc0dd\uc131\ud558\ubbc0\ub85c": 43, "\ucee8\uc149\uc5d0": 43, "64\uac1c\uc758": 43, "x\ucd95": 43, "\ub09c\uc774\ub3c4\uc640": 43, "\uc124\uc815\uc758": 43, "embedding\uc5d0\uc11c": 43, "similarity\ub97c": 43, "\uc2a4\ucf54\uc5b4\ub294": 43, "capability\uc640": 43, "\uc2e0\ub8b0\ub3c4\ub97c": 43, "\ub530\ub984": 43, "evaluation1": 43, "baseline\uacfc": 43, "set\uc5d0\uc11c": 43, "\ub2ec\uc131\ud558\uace0": 43, "baseline\uc5d0\uc11c": 43, "editablity\uc744": 43, "\ub098\ud0c0\ub0b4\uace0": 43, "word\ub9cc": 43, "\uc815\ud655\ub3c4\ub85c": 43, "\ucea1\ucc98\ud558\ub294\ub370": 43, "tradeoff": [43, 57], "\uace1\uc120\uc758": 43, "outline\uc744": 43, "\uadf8\ub9ac\uba70": 43, "\uc218\uc815\ub420": 43, "target\uc758": 43, "\ucea1\ucc98\ud558\uc9c0\ub294": 43, "\ubc97\uc5b4\ub098\uba74": 43, "editability\uac00": 43, "\uac10\uc18c\ud558\ub294": [36, 43], "reconstruction\uc774": 43, "\ubcc0\uacbd\ud574": 43, "\uace1\uc120\uc744": 43, "tradeoff\uc5d0": 43, "\ubabb\ud558\uba74\uc11c\ub3c4": 43, "\uac10\uc18c\ud568": 43, "\uc124\ubb38\uc9c0": 43, "\uc81c\uacf5\ubc1b\uc558\uace0": 43, "\uc774\ubbf8\uc9c0\uc640\uc758": [43, 50, 56], "\uc720\uc0ac\uc131\uc5d0": 43, "\ub9e4\uae40": 43, "\uc9c8\ubb38\ubcc4\ub85c": 43, "600\uac1c\uc529": 43, "200\uac1c\uc758": 43, "\uc751\ub2f5\uc744": 43, "\uc758\ubbf8\ub860\uc801\uc778": 43, "\ubcf8\uc9c8\uc744": 43, "\ud30c\uc545\ud558\uac70\ub098": 43, "shape\ub97c": [8, 43], "2\uc2dc\uac04\uc774": 43, "\uc18c\uc694\ub428": 43, "\uc124\uc815\uacfc": [43, 46], "\uac1c\uc778\ud654\ub418\uba70": 43, "\uc18c\uac1c\ud568": 43, "word\ub85c": 43, "inverse\ud558\uc5ec": 43, "\uc791\ub3d9\ud568": 43, "word\ub294": 43, "\uc27d\ub3c4\ub85d": 43, "interpace\ub97c": 43, "\uc0ac\uc6a9\ud558\uc9c0\ub9cc": [43, 47], "\uc5b8\uc5b4\uc758": 43, "\ud55c\uacc4\uc5d0": 43, "\uc811\uadfc\ud560": 43, "\ub2e8\uc11c\ub97c": 43, "\uc0ac\uc6a9\uac00\ub2a5\ud55c": 43, "\uad6c\ud604\ub428": 43, "\uc758\uc874\ud558\uc9c0": [43, 46, 56], "\uac70\uae30\uc5d0\uc11c": 43, "preserav": 43, "\ud5a5\uc0c1\ub420": 43, "08818": 44, "\ubaa8\ub378\ub9c1\uc758": 44, "\ubd80\uc871\ud558\uba70": 44, "\uc774\uc720\uac00": 44, "temproal": 44, "\uc0d8\ud50c\ub4e4": 44, "\ub07c\ub9ac\uc758": 44, "\uac70\uce58\uac8c": 44, "\uc2dc\ud000\uc2a4\uc758": 44, "\uc2dc\uac04\ucd95\uc5d0": 44, "1280x2048": 44, "\uc778\ucf54\ub529\ud574": 44, "\uc815\ub82c\ud558\uc5ec": 44, "\uc77c\uad00\uc801\uc778": 44, "\uc790\uc728": 44, "\uc8fc\ud589\uc758": 44, "\uc2dc\ubbac\ub808\uc774\uc158": 44, "\uc5d4\uc9c4": 44, "512x1024": 44, "creation": [20, 44, 55], "\uac1c\ubcc4\uc758": 44, "\ub80c\ub354\ub9c1\ud574": 44, "\uc5f0\uad00\ub418\uba70": 44, "\uc815\ub82c\ud560": [36, 44], "\uc778\uc2dd\ud560": 44, "einop": 44, "\uad6c\ud604\ud588\uc73c\uba70": 44, "\ubc30\uce58x\uc2dc\uac04": 44, "\uc778\ucf54\ub529\uc774": 44, "\ubc30\uce58": 44, "ii": 44, "\uac00\uc911\ud569\uc744": 44, "\uc2dc\ud000\uc2a4\ub85c": 44, "flickering\uc774": 44, "\ubc1c\uc0dd\ud558\ub294": [44, 58], "\uad6c\ucd95\ub41c": 44, "patch": [44, 57], "\uc608\uce21\ud558\uac8c\ub054": 44, "\ud504\ub808\uc784\ub4e4\uc740": 44, "\uc785\ub825\ub41c\ub2e4": 44, "\uc7ac": 44, "\ub3c4\uc785\ud574": 44, "\ud0a4": 44, "\uc81c\uc57d\uc73c\ub85c": 44, "\uc50c\uc6b4\ub2e4": 44, "16t": 44, "\ud574\uc0c1\ub3c4\uae4c\uc9c0": 44, "\uc601\uac10\ubc1b\uc544": 44, "4\ubc30": 44, "\ud0a4\uc6e0\ub2e4": 44, "\uad6c\ucd95\ud558\uae30": 44, "\ub2e8\uc704\ub85c": 44, "\uc5f0\uc0b0\ud558\uace0": 44, "\ubaa8\ub378\ub9c1\uc774": 44, "\uc218\ud589\ub41c\ub2e4": 44, "\uadf8\ub85c": 44, "\uc9c4\ud589\ud558\uae30\uc5d0": 44, "rd": 44, "683": 44, "060": 44, "8\ucd08": 44, "dai": [44, 56, 59], "night": [44, 56], "crowded": 44, "7m": 44, "52k": 44, "hour": [44, 55], "320": [44, 59], "1280": [44, 59], "\uac00\ub2a5\ud574\uc84c\ub2e4": 44, "113": 44, "24fp": 44, "7\ucd08": 44, "30fp": 44, "\uc81c\ud55c\uc801\uc778": 44, "\ud559\uc2b5\ud588\uc9c0\ub9cc": 44, "\uc14b\uacfc": 44, "entirely\ud558\uac8c": 44, "\uc704\ucabd\uc758": 44, "16203": 45, "\ubaa8\ub378\ub85c\ubd80\ud130": 45, "\ub098\uc058\uc9c0": 45, "reason": 45, "\ud6cc\ub96d": 45, "\uc0b4\ud3b4\ubcf4\uae30": 45, "\uc77c\ub2e8": [45, 54], "\ub3d9\ubb3c\uc758": 45, "37\uac1c\uc758": 45, "pet": 45, "\uce58\uc790": 45, "\ud638\ub791\uc774": 45, "\uadf8\ub7fc": 45, "\ud68d\ub4dd\ud560": 45, "\uc218\ud589\ud574\uc11c": 45, "\ud310\ubcc4\ud55c\ub2e4": 45, "\ud074\ub798\uc2a4\uc774\ub2e4": 45, "n_sampl": 45, "\uc9c0\uc815\ub41c": 45, "\uc0d8\ud50c\ub9c1\ud574": 45, "\ud310\ubcc4\uc774": 45, "\ucd9c\ub825\ud55c\ub2e4": 45, "n_trial": 45, "\uc2dc\ub3c4\ud574\uc11c": 45, "\ud3c9\uade0\ub0bc": 45, "\ucd94\ub860\ud55c\ub2e4": 45, "\ud310\uc815\ud55c\ub2e4": 45, "\ucd94\ub860\ud560": 45, "\ub4e4\uc5b4\uc11c": [45, 53], "\ud559\uc2b5\ud558\uc9c0\ub294": 45, "\uc815\uc758\ub418\uc5b4": 45, "\uad6c\ud558\uace0": 45, "\uc18c\ubaa8\ub428": 45, "\uc904\uc778\ub2e4": 45, "\uac78\ub7ec\ub0b8\ub2e4": 45, "\ub0a8\uc558\ub2e4\uba74": 45, "\uc774\uc81c\ub294": 45, "oxford": 45, "iiit": 45, "bash": 45, "python": 45, "eval_prob_adapt": 45, "to_keep": 45, "prompt_path": 45, "pets_prompt": 45, "csv": 45, "\uc774\ub807\uac8c\uae4c\uc9c0": 45, "\uc904\uc774\ub824\uace0": 45, "\uc2a4\ud06c\ub9bd\ud2b8": 45, "rtx": 45, "3090": 45, "\ub3cc\ub9ac\uba74": 45, "\ud558\ub824\uba74": 45, "all_nois": 45, "randn": [45, 50], "max_n_sampl": 45, "eval_error": 45, "ts": 45, "noise_idx": 45, "text_emb": 45, "text_embed_idx": 45, "float32": 45, "pred_error": 45, "cpu": 45, "idx": 45, "inference_mod": 45, "tqdm": 45, "trang": 45, "batch_t": 45, "noised_lat": 45, "alphas_cumprod": 45, "t_input": 45, "float16": 45, "text_input": 45, "noise_pr": [45, 59], "encoder_hidden_st": [45, 50, 59], "mse_loss": [45, 50], "l1_loss": 45, "huber": 45, "huber_loss": 45, "notimplementederror": 45, "\ucd94\ub860\ud558\uac8c": 45, "\ub420\ud150\ub370": 45, "\uc0ac\uc6a9\ud574\uc57c": 45, "\ubcc0\uc218\uc5d0": 45, "\ub2ec\ub77c\uc9c0\uae30": 45, "\ub2ec\ub77c\uc84c\ub2e4": 45, "\uc62c\ub77c\uac00\ub294\uc9c0": 45, "\uc2e4\ud5d8\ud574\ubcf4\uc558\ub2e4": 45, "\ucd94\ucd9c\ud574\ub0b4\ub294": 45, "\ub6f0\uc5b4\ub0ac\ub2e4": [36, 45], "\uc0dd\uc131\ud574": 45, "\uad6c\ucd95\ud558\uace0": 45, "\ud559\uc2b5\uc2dc\ucf1c\uc11c": [45, 57], "\uc218\ud589\ud55c": 45, "\ucd94\ucd9c\ud574": 45, "\uc804\ub2ec\ud574\uc11c": 45, "\ubaa8\ub378\ubcf4\ub2e4\ub3c4": 45, "\ub192\uc740\uc9c0": 45, "aesthet": [45, 54, 59], "\ud55c\uc9c0": 45, "\ud55c\uc9c0\uc5d0": 45, "cifar10": 45, "flower": 45, "stl10": 45, "\ud544\ud130\ub9c1\uc774": 45, "\uc548\ub41c": 45, "\uc62c\ub77c\uac08": 45, "winoground": 45, "visio": 45, "linguist": 45, "\ub9e4\uce58\uc2dc\ud0a4\ub294": 45, "\uba85\uc0ac\uc808\ub07c\ub9ac": 45, "\ub4a4\ubc14\ub010": 45, "\ub3d9\uc0ac\ub07c\ub9ac": 45, "\ud615\uc6a9\uc0ac\ub07c\ub9ac": 45, "\ubd80\uc0ac\ub07c\ub9ac": 45, "\ud488\uc0ac\ub07c\ub9ac": 45, "\uc5ec\ub290": 45, "\ub9cc\uc744": [45, 57], "\ud559\uc2b5\ud588\uc74c\uc5d0\ub3c4": 45, "\uc774\uc790": 45, "\ubcc0\ubaa8": 45, "dit": 45, "101": 45, "79": 45, "\uae30\ub85d\ud558\uba70": 45, "\ub2a5\uac00": 45, "\ub2a5\uac00\ud588\ub2e4": 45, "\uc2e0\ub8b0\uad6c\uac04": 45, "\ucc0d\ud600": 45, "\ubaa8\uc591\uc758": 45, "\ud68d\ub4dd\ud55c": 45, "\uae30\ub300\ub418\ub294": 45, "ood": 45, "\uc6b0\uc218\ud568\uc744": 45, "\ub370\uc774\ud130\ub3c4": 45, "\uac1c\uc120\ub420": 45, "\ud65c\uc6a9\ud588\uc74c": 45, "\ub6f0\uc5b4\ub0a0": 45, "01469": 46, "consistency_model": 46, "audio": 46, "\uc654\uc2b5\ub2c8\ub2e4": 46, "2000\ubc30": 46, "\uc5f0\uc0b0\uc791\uc5c5\uc744": 46, "\uc0ac\uc9c4\ucc98\ub7fc": [46, 59], "ordinari": [46, 54], "trajectori": 46, "\ub9e4\ud551\ub418\ub3c4\ub85d": 46, "\ub9cc\uc871\uc2dc\ud0ac": 46, "\ubc29\uc2dd\uc73c\ub85c\ub294": [46, 61], "\ud55c\ubc88\uc758": [46, 54], "\ub9cc\uc73c\ub85c\ub3c4": [46, 54, 56, 58], "\ub450\ubc88\uc9f8": [46, 55, 58, 61], "\uac1c\uc120\ub418\uace0": 46, "\ubaa8\ub378\ub85c\uc11c\ub3c4": 46, "stroke": 46, "\ubcf4\uc5ec\uc900\ub2e4\ub294": 46, "\ud655\uc778\ud558\uc600\uc2b5\ub2c8\ub2e4": 46, "p_t": [36, 46], "solut": [46, 51], "\ubd84\ud3ec\ud569\ub2c8\ub2e4": 46, "\uc218\uc2dd\uc5d0\uc11c": 46, "\uc815\uc758\ud558\uace0": [46, 51, 56, 58, 61], "\ub300\uc785\ud558\uba74": 46, "\uacfc\uc815\uc73c\ub85c\ub294": 46, "euler": [46, 54, 58], "\uc5ed\ubc29\ud5a5\uc73c\ub85c": 46, "\ud480\uc5b4": 46, "\uadfc\uc0ac\uac12\uc774\ub77c\uace0": [46, 49], "\uba48\ucd94\uace0": 46, "\uadfc\uc0ac\uac12\uc73c\ub85c": 46, "\uac04\uc8fc\ud569\ub2c8\ub2e4": 46, "80": 46, "002": 46, "\uc124\uc815\ud569\ub2c8\ub2e4": [46, 55, 60, 61], "\uc18c\uac1c\ub4dc\ub9b0": [46, 58], "\uc9c4\ud589\ub418\uc5c8\uc9c0\ub9cc": 46, "\ud65c\uc6a9\ud574\ub3c4": 46, "10\ubc88": 46, "\uac70\uccd0\uc57c\ub9cc": 46, "\ubcf4\uc5ec\uc900\ub2e4\uace0": [46, 49, 50, 56, 58], "\uae30\ubc95\ub4e4\uc5d0": 46, "\uc81c\uc678\ud558\uace0\ub294": [46, 56], "\ub300\ub7c9\uc758": 46, "\uc218\uc9d1\ud574\uc57c\ud55c\ub2e4\ub294": 46, "\ub9cc\uc871\ud569\ub2c8\ub2e4": 46, "\uc608\uce21\ud558\uae30": 46, "\ub370\uc774\ud130\ub85c\ubd80\ud130": 46, "bilo": 46, "invert": [46, 56], "\ubd80\uc5ec\ud558\uc9c0\ub294": 46, "boundari": 46, "\ub9cc\uc871\ud558\uae30": 46, "\ub9cc\uc871\uc2dc\ud0a4\ub294": 46, "\uc720\uc0ac\ud558\uc5ec": 46, "leverag": [46, 52], "\ud0dd\ud569\ub2c8\ub2e4": 46, "\ub354\ubd88\uc5b4": [46, 47, 52], "\ud558\ub2e8": [46, 55, 59], "multistep": 46, "\uc720\uc5f0\uc131\ub3c4": 46, "\ub4e4\uac04\uc758": 46, "\ubcf4\uc644\ud558\uba74\uc11c": 46, "\uc608\uc2dc\ub4e4\uc744": 46, "\ud06c\ub2e4\uba74": 46, "\uc9c4\ud589\uc2dc\ucf1c": 46, "ts_": 46, "\uc790\uc138\ud558\uac8c\ub294": [46, 50, 55, 56, 58, 59, 60, 61], "2_": 46, "\ub85c\ubd80\ud130\uc758": [46, 47], "\ucd9c\ub825\uac12": 46, "\ub85c\ub294": [46, 58], "\ud655\uc778\ud574\ubcf8": [46, 58], "equiv": 46, "\ud559\uc2b5\ud558\uc600\ub2e4\uace0": 46, "stopgrad": 46, "\uc124\uc815\ud560\ub54c\ubcf4\ub2e4": 46, "\uc548\uc815\uc801\uc73c\ub85c": [46, 61], "\uc131\ub2a5\uc5d0\ub3c4": 46, "\uac1c\uc120\uc774": 46, "\uc808\ucc28\ub294": 46, "\uc815\ub9ac\ud560": [46, 54], "\uc218\ub834\ud560": 46, "\ub458\uc740": 46, "\uc77c\uce58\ud558\uac8c": 46, "onlin": 46, "\uc758\uc874\ud588\ub2e4\uba74": 46, "\uadfc\uc0ac\ud560": [46, 54], "\uc0ac\uc2e4\uc744": 46, "\uc788\uc2b5\ub2e4": 46, "t_nz": 46, "\uc815\uc758\ud558\uac8c": [46, 58, 59, 60, 61], "\ubc30\uacbd\uc740": 46, "\ube44\uad50\ud588\uc744\ub54c": [36, 46, 50], "\uc99d\uac00\ud558\uac8c": [46, 61], "converg": [46, 51, 52, 58], "\uc774\ub974\ub294\ub370": 46, "\uc6a9\uc774\ud569\ub2c8\ub2e4": 46, "\uac10\uc18c\ud558\uac8c": 46, "\ubc14\ub78c\uc9c1\ud558\ub2e4\uace0": 46, "\uc2e4\ud5d8\ud558\uc600\uace0": 46, "\uc9c0\ud45c\ub294": [46, 54, 58], "\ub370\uc774\ud130\uc14b\uc5d0\ub294": [46, 58], "ncsn": [46, 57], "\uc9c4\ud589\ud558\uc600\uc2b5\ub2c8\ub2e4": [46, 56, 61], "\uc88b\uc558\uace0": 46, "\uce21\uc815\ud558\ub294\ub370": 46, "\ud2b9\ud654\ub418\uc5b4": 46, "\uc124\uc815\ud588\uc744\ub54c": 46, "\uc88b\uc558\uc2b5\ub2c8\ub2e4": 46, "\ube68\ub9ac": 46, "\uc218\ub834\ud558\uc9c0\ub9cc": 46, "vice": [46, 49], "versa": [46, 49], "\uc810\ucc28\uc801\uc73c\ub85c": [46, 58], "\uc99d\uac00\uc2dc\ud0a4\uba74\uc11c": 46, "\ubcc0\ud654\uc2dc\ucf30\uc744\ub54c": 46, "pd": [46, 54], "\uacac\uc904\ub9cc\ud55c": 46, "\uc0dd\uc131\ud568\uc73c\ub85c\uc368": 46, "\uc131\uc9c8\ub3c4": 46, "unpair": 47, "1703": 47, "10593": 47, "tensorflow": 47, "\ub17c\ubb38\ub9ac\ubdf0": 47, "cyclegan\uc744": 47, "\ud55c\uad6d\uc778\uc774\ub77c\uace0": 47, "\ub72f\uc5b4\ubcf4\uae30": 47, "kwangsu": [47, 62], "changhwan": [47, 51, 60, 62], "\ub3c4\uba54\uc778\uc744": 47, "\ub3c4\uba54\uc778\uc73c\ub85c": 47, "\ubcc0\ud658\uc2dc\ud0a4\ub294": 47, "input\uacfc": 47, "\uc9dd\uc774": 47, "\uc9c0\uc5b4\uc9c4": 47, "\ud559\uc2b5\ud558\uc9c0\ub9cc": 47, "\uc5b4\ub835\uc2b5\ub2c8\ub2e4": [47, 60], "\uc9dd\uc9c0\uc5b4\uc9c4": 47, "\ubc14\uafb8\ub294": [47, 50], "\uad6c\ubd84\uc774": 47, "\ubd88\uac00\ub2a5\ud558\ub3c4\ub85d": 47, "\ub85c\uc758": 47, "\uac00\ud574\uc11c": 47, "\uac15\uc81c\ud558\uae30": 47, "\uc5ed\ubc29\ud5a5": 47, "\uc9c4\ud589\ud569\ub2c8\ub2e4": [47, 51, 55, 61], "\uc720\uc0ac\ud574\uc9c0\ub3c4\ub85d": 47, "\uac15\uc81c\ud558\ub294": [47, 57], "\ub3c4\uc785\ud588\uc2b5\ub2c8\ub2e4": 47, "transfigur": 47, "season": 47, "\ubcf4\uc5ec\uc92c\ub2e4\uace0": 47, "\ub123\uc73c\uba74": 47, "\ucc38\uc870\ud558\uba74": 47, "\ud559\uc2b5\ud574\uc11c": 47, "\ub098\uc624\ub3c4\ub85d": 47, "\ucc98\uc74c\uc758": 47, "\uc6d0\ubcf8\uc73c\ub85c": 47, "\uc21c\ud658": 47, "\uc0ac\uc774\ud074": 47, "\uc548\uc815\uc801\uc774\uac8c": 47, "gram": 47, "\uc77c\uce58\ub97c": 47, "\uadf8\ub9bc\uc73c\ub85c": 47, "\ubcc0\ud658\ud55c\ub2e4\uac70\ub098": 47, "\ub0ae\uc5d0": 47, "\ubc24\uc5d0": 47, "\uc788\uc5c8\ub294\ub370\uc694": 47, "\ube44\uc2fc": 47, "\uc77c\uc774": 47, "\uc77c\ub300\uc77c\ub85c": 47, "\uc9dd\uc9c0\uc5b4\uc9c0\uc9c0": 47, "\ubaa8\uc74c\uc758": 47, "\ucea1\uccd0\ud558\uace0": 47, "\ubaa8\uc74c\uc73c\ub85c": 47, "\uc81c\uacf5\ub418\uace0": 47, "\uc5c6\ub3c4\ub85d": 47, "\ubb34\uc870\uac74": 47, "\uc774\ub8ec\ub2e4\ub294": 47, "\ub73b\ud558\uc9c0\ub294": 47, "\ubb34\ud55c\ud55c": 47, "\uc77c\uc5b4\ub098\uae30\ub3c4": 47, "dl": 47, "blogspot": 47, "\ub4e0": 47, "\ub9e4\ud551\ud558\uba74\uc11c": 47, "\ud604\uc0c1\uc785\ub2c8\ub2e4": 47, "\ud604\uc0c1\uc740": [47, 56], "\uc785\uc7a5\uc5d0\uc11c": 47, "\uc0ac\uc9c4\uc774": [47, 49, 55], "\uac00\uc9dc\uc778": 47, "\uad6c\ubcc4\ud558\ub294": 47, "\uc18d\uc774\uae30\ub9cc": 47, "\uc6b0\ub9ac\uc758": [47, 57], "\ubaa9\uc801\uacfc": 47, "\uc0c1\uad00\uc774": 47, "\ub9cc\ub4e4\ub354\ub77c\ub3c4": 47, "\uc54a\uc544\uc11c": 47, "\ubc1c\uc0dd\ud569\ub2c8\ub2e4": [47, 51, 53], "\uc774\uc288\ub85c": 47, "\ud544\uc694\ud574\uc84c\uc2b5\ub2c8\ub2e4": 47, "\uc601\uc5b4": 47, "\ud504\ub791\uc2a4\uc5b4": 47, "\uc601\uc5b4\ub85c": 47, "\ubc88\uc5ed\ud588\uc744": 47, "\ub3c4\ub2ec\ud558\ub294": 47, "\uac19\uc544\uc57c": 47, "\uc758\ubbf8\uc758": 47, "\uc774\uc6a9\ud569\ub2c8\ub2e4": 47, "\ubaa9\uc801\uc2dd\uc744": 47, "\uc815\ubc29\ud5a5": 47, "\ub3c4\uc2dd\ud654": 47, "\uc6a9\uc5b4": 47, "\ud45c\uae30": 47, "d_x": [47, 56], "d_y": [47, 56], "\uad6c\ubd84\ud558\uace0": 47, "\ubaa9\uc801\uc2dd\uc73c\ub85c": 47, "\uc77c\uce58\uc2dc\ud0a4\uae30": [47, 58], "\ubaa8\uc21c\ub418\ub294": 47, "\ubaa9\uc801\uc2dd\uc740": 47, "\uac08": [36, 47, 49], "\uc218\uc2dd\uc774": 47, "\ub098\uc635\ub2c8\ub2e4": 47, "\uc81c\ud55c\uc744": 47, "\uc608\ube44": 47, "\ub300\uccb4\ud574\ubd24\ub294\ub370": 47, "\uad00\ucc30\ud560": 47, "\uc640\uc758": [47, 56, 61], "\uc0c1\ub300\uc801": 47, "\uc911\uc694\ub3c4\uc5d0": 47, "\uacb0\uc815\ub429\ub2c8\ub2e4": 47, "\ub85c\uc11c": 47, "\ubcf4\uc5ec\uc900": [47, 59], "1603": 47, "08155": 47, "\ucc44\ud0dd\ud569\ub2c8\ub2e4": 47, "sever": 47, "fraction": 47, "\uc548\uc815\ud654\uc2dc\ud0a4\uae30": 47, "\ud14c\ud06c\ub2c9\uc744": [47, 54], "50\uac1c\ub97c": 47, "\uc800\uc7a5\ud574": 47, "\ud55c\uaebc\ubc88\uc5d0": 47, "\uc9c4\ub3d9\uc744": 47, "sjinu": 47, "ysbsb": 47, "lsgan": 47, "\ucc38\uace0\ud588\uc73c\uba70": 47, "\uc5c5\ub370\uc774\ud2b8\uc2dc": 47, "\uc774\ubcf4\ub2e4": 47, "\uace0\ucc28\uc6d0\uc774\uc9c0\ub9cc": 47, "\uac04\ub7b5\ud788": [47, 58], "2\ucc28\uc6d0\uc744": 47, "\ud45c\ubc29\ud558\uba74": 47, "\uacb0\uc815\uacbd\uacc4\ub97c": 47, "\ucabd\uc774": 47, "\uac00\uc9dc": [47, 51], "\uc601\uc5ed\uc785\ub2c8\ub2e4": 47, "\uc544\ub798\uc5d0": 47, "\uc0ac\uc6a9\ud55c\ub2e4\uba74": 47, "\uc785\uc7a5\uc5d0\uc11c\ub294": 47, "\uc18d\uc774\uace0": 47, "vanish": [47, 51], "\uc77c\uc5b4\ub098\uae30": [47, 56], "\uc18d\uc778\ub2e4\ub294": 47, "\uc774\uc720\ub9cc\uc73c\ub85c": 47, "\uc2e4\ud5d8\uc5d0": 47, "\ub3d9\uc548\uc5d0\ub294": 47, "0002": 47, "\uc124\uc815\ud588\uace0": 47, "\uc870\uae08\uc2dd": 47, "\uc218\ub834\ud558\uac8c": [47, 57, 61], "\ud558\uc600\uc2b5\ub2c8\ub2e4": [47, 61], "cogan": 47, "simgan": 47, "\ucc38\uac00\uc790\ub4e4\uc740": 47, "\uc0ac\uc9c4\uc774\ubbf8\uc9c0": 47, "\uac00\uc9dc\uc774\ubbf8\uc9c0\uc5d0": 47, "\ub178\ucd9c\ub41c": 47, "\uc9c4\uc9dc\ub77c\uace0": 47, "\uc0dd\uac01\ub418\ub294": 47, "\uc120\ud0dd\ud558\uac8c": 47, "fcn": 47, "\ud14c\uc2a4\ud2b8\uc5d0": 47, "\uae30\uc900\uc784\uc5d0\ub3c4": 47, "\uc2e4\ud5d8\uc774": 47, "\uc591\uc801\uc778": 47, "\uae30\uc900\uc744": 47, "\uc0ac\uc9c4\uc5d0": 47, "\ub808\uc774\ube14": 47, "\ub9f5\uc744": 47, "\ub9f5\uc740": 47, "\ub3c4\ub85c": 47, "\uc790\ub3d9\ucc28": 47, "\uac10\uc9c0\ud558\uba74": 47, "\uc131\uacf5\ud55c": 47, "cityscap": 47, "aginst": 47, "\uc88b\uc744": 47, "\uc9c0\ub3c4\uc5d0\uc11c": 47, "\ud56d\uacf5": 47, "\uc0ac\uc9c4\uc5d0\uc11c": 47, "\uacb0\uacfc\uc5d0\uc11c": 47, "\ucc38\uac00\uc790\ub97c": 47, "\uc18d\uc77c": 47, "\uc5c6\uc5c8\uc2b5\ub2c8\ub2e4": 47, "\ub3c4\uc2dc": 47, "\ud48d\uacbd\uc5d0": 47, "\ub2a5\uac00\ud569\ub2c8\ub2e4": 47, "\uc800\ud558\ub418\ub294": [47, 53, 59], "\ud55c\ucabd": 47, "\ub3cc\ub838\uc744": 47, "\uc720\ubc1c\ud558\ub294": 47, "reconctruct": 47, "\uc608\uc2dc\ub4e4\uc785\ub2c8\ub2e4": 47, "\ub3c4\uba54\uc778\uc774": 47, "\uc7ac\uad6c\uc131\ub41c": 47, "\ub9ce\uc558\uc2b5\ub2c8\ub2e4": 47, "cmp": 47, "facad": 47, "databas": [47, 51], "\uac74\ucd95": 47, "ut": 47, "zapoos50k": 47, "\uc2e0\ubc1c": 47, "\ube44\ub86f\ud558\uc5ec": 47, "shallow": 47, "\uc595\uc740": 47, "\ucd08\uc810": 47, "\ucd08\uc810\uc774": 47, "\ub9de\uc740": 47, "\ud750\ub9bf\ud558\uac8c": 47, "\uc791\ud488": 47, "\uad6c\ubaa9\ud558\uace0\uc790": 47, "\uac15\uc870\ud558\uae30": 47, "domain\uc740": 47, "\uc2a4\ub9c8\ud2b8\ud3f0\uc758": 47, "\uc870\ub9ac\uac1c\ub85c": 47, "target\uc740": 47, "\uc870\ub9ac\uac1c\uac00": 47, "discuss": 47, "\uc544\ub2c8\uc5c8\uc2b5\ub2c8\ub2e4": 47, "\ubcc0\ud654\ub9cc": 47, "\ud615\uccb4\uac00": 47, "\uc560\ub9e4\ud574\uc9c4": 47, "\ucf54": 47, "\uc785": 47, "\uad6c\ud604\ud558\ub294\ub370": 47, "\ub9d0": 47, "\uc5bc\ub8e9\ub9d0": 47, "\uc608\uc81c\uc758": 47, "\ud0c0\ub294": 47, "\ub9ce\uc558\ub294\ub370": 47, "\uc5bc\ub8e9\ub9d0\uc758": 47, "\uc5c6\ub2e4\ubcf4\ub2c8": 47, "\ubc30\uacbd\ub3c4": 47, "\uc5bc\ub8e9": 47, "\uadf8\ub9ac\uac70\ub098": 47, "\uc5bc\ub8e9\ub9d0\uc5d0\uc11c": 47, "\ub178\ub797\uac8c": 47, "\uce60\ud55c": 47, "\ub098\ubb34\uc640": 47, "\uac74\ubb3c\uc758": 47, "\ubaa8\ud638\uc131\uc744": 47, "\ud574\uacb0\ud558\ub824\uba74": 47, "\ub298\ub9ac\ub294\ub370": 47, "\uae30\uc5ec\ud569\ub2c8\ub2e4": 47, "12092": 48, "unoffici": 48, "donggeun": [8, 48, 49, 52, 62], "sean": [8, 48, 49, 52, 62], "ko": [8, 48, 49, 52, 62], "\ubaa8\ub378\uc774\uba70": 48, "120\uc5b5\uac1c": 48, "5\uc5b5": 48, "\ud1b5\ud558\uc5ec": 48, "2021\ub144": 48, "diverse\ud55c": 48, "3\uc640": 48, "transformer\uc744": 48, "architecture\uc744": [48, 49], "model\uba70": 48, "\uc218\ub294": 48, "\ubd80\ubd84\ub9cc": [48, 49], "1750\uc5b5": 48, "\uac1c\uc218\uc758": 48, "2005": 48, "14165": 48, "jalammar": 48, "categor": 48, "\uac16\ub294\ub2e4\uace0": 48, "cnn": 48, "d\ucc28\uc6d0\uc758": 48, "\uadf8\ub9ac\ub4dc\ub85c": 48, "\ud835\udc52_1": 48, "\ud835\udc52_\ud835\udc58": 48, "code\ub85c": 48, "e_j": 48, "\ucc3e\uc544\uc11c": 48, "\ubd80\uc5ec\ud568": 48, "p2yeong": 48, "explain": 48, "pixel\uc744": 48, "\uc774\ubbf8\uc9c0\uc77c\uc218\ub85d": 48, "\uba54\ubaa8\ub9ac\ub7c9\uc774": 48, "\ud544\uc694\ud574\uc11c": 48, "short": 48, "model\ub4e4": 48, "dependency\ub97c": 48, "\uac83\uc774\uba70": 48, "detail\uc5d0": 48, "\uc9d1\uc911\ud558\uac8c": 48, "recognizable\ud574\uc11c": 48, "\uadf9\ubcf5\ud558\uace0\uc790": 48, "192\uac1c\uc758": 48, "\ubc30\uc815": [8, 48], "size\ub97c": 48, "\ub4e4\uacfc": [48, 52, 58], "\uc5f0\uc18d\uc801\uc73c\ub85c": 48, "\uc785\ub825\ud568": 48, "jiho": 48, "ml": [36, 48, 60], "weekli": 48, "nlp": 48, "40": 48, "cqom0r2kmvi": 48, "1729": 48, "\ud835\udc5e": 48, "\u03c6": 48, "dvae": 48, "token\ub97c": 48, "\ud835\udf03": 48, "token\uc5d0\uc11c": 48, "decoder\uc5d0\uc11c": 48, "\u03c8": 48, "purpl": 48, "text\uc640": [48, 54], "token\ub4e4\uc758": 48, "\ud835\udc5e_\u03c6": 48, "\ud835\udc5d_\ud835\udf03": 48, "elb": 48, "bound\ub97c": 48, "elb\ub97c": 48, "continuous\ub97c": 48, "\ubc14\uafd4\uc57c": 48, "\ud559\uc2b5\uc2dc\uc5d0\ub294": 48, "argmax\ub97c": 48, "\uc778\ub371\uc2a4\ub97c": 48, "argmax": 48, "gumbel": 48, "g_i": 48, "e_i": [48, 56], "relaxation\ub97c": 48, "temperatur": 48, "relaxation\uc744": 48, "tight\ud558\uac8c": 48, "\uc7a1\uc544\uc90c": 48, "120\uc5b5\uac1c\uc758": 48, "logit\uc5d0\uc11c": 48, "\uc18c\ubb38\uc790\ud654": 48, "384": 48, "vocabulary\ub97c": 48, "\ud55c\ubc88\uc5d0": 48, "row": 48, "column": 48, "n\uac1c\ub294": 48, "n\uac1c": 48, "\uace8\ub77c\uc11c": 48, "\uace0\ub974\uae30": 48, "\ubc88\uc9f8\ub85c": 48, "\uc120\ud0dd\ud568": 48, "best\ub97c": 48, "\uace0\ub97c\ub54c": 48, "prompt\ub791": 48, "\ub098\uc634": [48, 49], "score\uc774": 48, "\uc54c\ub9de\uc740": 48, "df": 48, "five": 48, "vote": 48, "\ucc28\uc774\ub85c": 48, "\ud22c\ud45c": 48, "\ubc1b\uc558\uc74c": 48, "\ub0ae\uc744\uc218\ub85d": [48, 49], "\uc88b\uc73c\uba70": 48, "\ub192\uc744\uc218\ub85d": [48, 49], "\ub791": 48, "cub": 48, "\uc0c8": 48, "coco\uc5d0\uc11c\ub294": 48, "\ubcf4\uc5ec\uc92c\uc74c": 48, "cub\uc5d0\uc11c\ub294": 48, "\ucc0d\uc9c0": 48, "\ubabb\ud558\uc600\uace0": 48, "score\uc5d0\uc11c\ub294": 48, "cub\uc5d0": 48, "\uacc4\uc120\uc744": 48, "\uc0dd\uac01\ud568": 48, "\uacb0\uacfc\uac12": 48, "parameter\uacfc": 48, "\ub6f0\uc5b4\ub098\uac8c": 48, "\uc900\uc218\ud55c": 48, "\uc2f6\uc740": 48, "\ud3ec\ud568\ub418\uba74": 48, "\uacaa\uc74c": 48, "\uace0\uc2b4\ub3c4\uce58\uac00": 48, "2\ub9c8\ub9ac\uac70\ub098": 48, "\uace0\uc2b4\ub3c4\uce58": 48, "\ud06c\ub9ac\uc2a4\ub9c8\uc2a4": 48, "\uc2a4\uc6e8\ud130\ub97c": 48, "\uc785\uace0": 48, "\uc544\uc26c\uc6b4": 48, "limitation\uc744": 48, "2105": 49, "05233": 49, "\ub6f0\uc5b4\ub118\uc74c": 49, "\ubd80\ubd84\uc5d0\uc11c\ub3c4": 49, "\uc8fc\uc7a5\ud568": [8, 49], "diversity\uc640": 49, "fidelity\uc758": 49, "off\uc5d0": 49, "model\ub4e4\uc774\uba70": 49, "\uc0dd\uc131\ud574\ub0b4\ub294\ub370\uc5d0": 49, "\uc131\uacf5": 49, "deep\uc5d0": 49, "\ub0ae\uc73c\uba70": 49, "\uac1c\uc120\uc0ac\ud56d\uc774": 49, "model\ub4e4\uc758": 49, "\ub04c\uc5b4\uc62c\ub9ac\uba70": 49, "\ub0ae\ucd94\uaca0\ub2e4\uace0": 49, "\uc124\uba85\ub418\uc788\uc73c\ubbc0\ub85c": 49, "\ub17c\ubb38\ub4e4\uc758": 49, "\uac00\uc815\ud558\uba70": 49, "\ubd88\uac00\ub2a5\ud55c": 49, "\ub9e4\uac1c\ubcc0\uc218\ub85c": 49, "\uc124\uc815\ub418\uba70": 49, "ddpm\uc5d0\uc120": 49, "\uc9c0\ud45c\uac00": 49, "\ub0ae\uc558\ub2e4": 49, "scheduling\uc744": 49, "\uc8fc\uc7a5\ud588\ub2e4": 49, "\ub04a\uace0": 49, "\ubc14\uafc8": 49, "iteration\uc73c\ub85c": 49, "\ucc44\ud0dd\ud588\uc9c0\ub9cc": 49, "parameter\uc744": 49, "\ucc44\ud0dd\ud568": 49, "\uc77c\uc815\ud558\uac8c": 49, "\uac00\uc838\uac00\uba74\uc11c": 49, "\ubcf4\uae30": 49, "\uc2dc\ucf1c\ubcf4\uae30": 49, "head\uc5d0": 49, "8x8": 49, "\ud574\ubcf4\uae30": 49, "\uc77c\ubc18": 49, "block\uc774": 49, "biggan\uc758": 49, "32\uc77c\ub54c": 49, "\ub0ae\ub2e4": 49, "160": 49, "block\ub9c8\ub2e4": 49, "adain\uc774\ub791": 49, "adagn": 49, "\uc18c\uac1c\ud588\ub2e4": 49, "\ubc29\ubc95\ub860\uc778\uc9c0\ub294": 49, "normalization\uc744": 49, "adpative\ud558\uac8c": 49, "embedding\uacfc": 49, "adain": 49, "\uacf1\ud558\uace0": 49, "\ub354\ud568": 49, "y_b": 49, "adagn\uc758": 49, "adagn\uacfc": 49, "additon": 49, "normalization\ubcf4\ub2e4": 49, "layer\uc744": 49, "\uc0ac\uc6a9\ud588\ub294\ub370": 49, "\uc8fc": 49, "de": 49, "y\ub97c": 49, "\uc90c\uc73c\ub85c\uc368": 49, "zp_": 49, "\uc0c1\uc218": 49, "log_": 49, "\uace1\ub960\uc774": 49, "\ubb34\ud55c\uc73c\ub85c": 49, "rightarrow0": 49, "\ud14c\uc77c\ub7ec": 49, "\uae09\uc218\ub97c": 49, "\uc7ac\uc804\uac1c": 49, "\uc720\ub3c4\ub294": 49, "\ubcf8\ubb38\uc758": 49, "\ubc88\uc2dd\uc774\ubbc0\ub85c": 49, "\ub611\uac19\uc774": 49, "sample\ud55c\ub2e4": 49, "output\uacfc": 49, "gradient\uc758": 49, "\ube7c": 49, "score\uc744": 49, "scaling\uc758": 49, "classifier\uac00": 49, "scaling\uc774": 49, "\uc8fc\uba74": 49, "\uc6f0\uc2dc\ucf54\uae30\ub77c\ub294": 49, "\uc6f0\uc2dc\ucf54\uae30\uc2a4\ub7ec\uc6b4": 49, "\uac15\uc544\uc9c0\uac00": 49, "\ub418\uc9c0\ub294": 49, "\uc6f0\uc2dc\ucf54\uae30": 49, "class\ub77c\ub294": 49, "\ubd84\uc704\uae30\uc758": 49, "\uac15\uc544\uc9c0\uc758": 49, "epsilon\uc774\ub77c\ub294": 49, "\ubc1b\ub294\uc9c0": 49, "sampling\ud560": 49, "scale\uc774": [36, 49], "recall\uc740": 49, "\ub0ae\uc9c0\ub9cc": 49, "\uc0dd\uae30\ub294\ub370": 49, "recall\uc774": 49, "diveristy\uac00": 49, "\ub0ae\ub2e4\ub294": [49, 60], "\ub192\ub2e4\ub294": 49, "\ub73b\uc774\ub2e4": 49, "\ub192\uc77c\uc218\ub85d": 49, "label\ucabd\uc73c\ub85c": 49, "guide\uac00": 49, "\uc0dd\uae30\ubbc0\ub85c": 49, "\uc77c\uc815\ud55c": 49, "sfid\ub294": 49, "\ub3c4\ucd9c\ub418\ub294": 49, "\uac12\uc774\ubbc0\ub85c": 49, "\uc9c0\uc810\uc5d0\uc11c": 49, "\ub098\uc654\ub2e4": 49, "adm\uc740": 49, "\uc57d\uc790\uc774\uba70": 49, "g\ub294": 49, "guidance\uc758": 49, "\uc57d\uc790\uc774\ub2e4": 49, "\uc8fc\uc5c8\uc744": 49, "fid\uac12\uc774": [49, 52], "\ub098\uc654\uc73c\uba70": 49, "\ub450\ubc88\uca30": 49, "\ud50c\ub77c\ubc0d\uace0": 49, "\ubcfc\ub54c": 49, "biggan\uc740": 49, "\uc774\ubbf8\uc9c0\uac04\ub4e4\uc758": 49, "\ud50c\ub77c\ubc0d\uace0\uac00": 49, "\ub290\ub08c\uc758": 49, "\ub2e4\ucc44\ub85c\uc6b4": 49, "\ud55c\ub9c8\ub9ac\ub9cc": 49, "\uc0ac\uc9c4\ub3c4": 49, "\ubc95\uc744": 49, "label\uc774": 49, "data\uc5d0\ub294": 49, "\ud655\uc7a5\uc774": 49, "\ubd88\uac00\ub2a5\ud558\ub2e4": [49, 54], "unlabel": 49, "cluster": 49, "\ud558\ub824": 49, "12242": 50, "\ub4f1\uc7a5\ud558\uc600\uc9c0\ub9cc": 50, "\uba74\ub4e4\uc744": 50, "\uc18c\uac1c\ub418\uc5c8\uace0": 50, "5\uc7a5\uc758": 50, "\uc815\ub3c4\ubc16\uc5d0": 50, "\uc18c\uc694\ub418\uc9c0": 50, "\uc54a\ub294\ub2e4\uace0": 50, "\uc54c\uc544\ubcf4\uae30": 50, "\uc815\ub9ac\ub97c": 50, "\uc785\ub825\ubc1b\uc544\uc11c": 50, "\uc218\uc2dd\uc801\uc73c\ub85c": [50, 58, 60], "alpha_tx": 50, "\ub54c\ub85c\ub294": 50, "\uace0\uc815\uc2dc\ud0a8\ub2e4\uace0": 50, "\uc55e\uc368": [50, 53, 58, 59], "\uc124\uba85\ub4dc\ub838\ub358": 50, "\ub0b4\uc6a9\ub4e4\uc744": 50, "blob": 50, "text_encoder_cl": 50, "import_model_class_from_model_name_or_path": 50, "noise_schedul": 50, "ddpmschedul": 50, "from_pretrain": [50, 56], "subfold": [50, 56], "text_encod": [50, 56], "autoencoderkl": [50, 56], "unet2dconditionmodel": [50, 56], "first_epoch": 50, "num_train_epoch": 50, "train_dataload": 50, "until": 50, "reach": 50, "resum": 50, "resume_from_checkpoint": 50, "resume_step": 50, "progress_bar": [50, 59], "pixel_valu": 50, "weight_dtyp": 50, "latent_dist": 50, "scaling_factor": 50, "offset_nois": 50, "bsz": 50, "randint": 50, "num_train_timestep": 50, "accord": 50, "magnitud": 50, "noisy_lat": 50, "add_nois": 50, "get": 50, "input_id": 50, "model_pr": 50, "prediction_typ": 50, "v_predict": 50, "get_veloc": 50, "model_pred_prior": 50, "target_prior": 50, "float": 50, "prior_loss": 50, "sync_gradi": 50, "params_to_clip": 50, "itertool": 50, "clip_grad_norm_": 50, "max_grad_norm": 50, "zero_grad": [50, 51], "set_to_non": 50, "set_grads_to_non": 50, "\ub2f4\ub294": 50, "rare": [50, 53], "unicod": 50, "\ucd94\uac00\ud568\uc73c\ub85c\uc368": 50, "\uc720\uc9c0\ud558\uac8c": 50, "\uc774\ub85c\uc368": [50, 60], "\uccab\ubc88\uc9f8\ub85c\ub294": [36, 50, 58], "\uc0dd\uc131\ub418\uae30": 50, "\uc120\ud638\ub41c\ub2e4\uace0": 50, "\uacc4\uc0b0\ub429\ub2c8\ub2e4": 50, "pairwis": 50, "\uc801\uc6a9\ub428\uc73c\ub85c\uc368": 50, "\uc18c\uac1c\ub4dc\ub838\ub358": 50, "div": 50, "\ud574\uacb0\ub418\ub294": 50, "\uc785\ub825\ud588\uc744\ub54c\uac00": 50, "\uc124\uba85\ud569\ub2c8\ub2e4": [50, 61], "backpack": 50, "famou": 50, "painter": 50, "sculptor": 50, "\ud615\ud0dc\ub3c4": 50, "\uc0dd\uc131\ub3c4": [50, 52], "modif": 50, "speci": 50, "\uace0\uc720": 50, "\ud55c\uacc4\uc810\ub3c4": 50, "\ub098\ud0c0\ub098\uc9c0": 50, "\ubcf8\ubb38\uc5d0": 50, "\uc18c\uac1c\ub418\uace0": 50, "\uc788\uc9c0\ub294": 50, "\ubd80\ubb38\uc5d0\uc11c\ub3c4": 50, "\ud559\uc2b5\uacb0\uacfc\ub97c": 50, "\ubcf4\uc5ec\uc8fc\ub294\ub370": 50, "\uc7a5\ub9cc\uc73c\ub85c\ub3c4": 50, "\uc0ac\ub840\ub4e4\uc744": 50, "nip": 51, "2014": [51, 60], "1406": 51, "2661": 51, "eriklindernoren": 51, "smart": [51, 60], "lab": [51, 59, 60], "kaist": [51, 60], "\ub525\ub7ec\ub2dd": [51, 60], "chp": 51, "editor": [51, 60], "\ub098\ub269\ub2c8\ub2e4": 51, "\uacc4\uc0b0\ud55c\ub2e4\ub294": 51, "tractabl": 51, "\uadfc\uc0ac\ud654\uc2dc\ucf1c": 51, "pixelcnn": 51, "pixelrnn": 51, "boltzmann": 51, "\ud655\ub960\ubd84\ud3ec\ub97c": 51, "\uc815\uaddc\ud654\ud558\ub294": 51, "\uacc4\uc0b0\ud558\uc9c0": 51, "\uc644\uc804\uadf8\ub798\ud504": 51, "\ub9ce\uc544\uc11c": 51, "\uc644\uc804\uadf8\ub798\ud504\uc774\uae30": 51, "\ub178\ub4dc\uac00": 51, "\ub298\uc5b4\ub0a0\uc218\ub85d": 51, "\uac04\uc120": 51, "\uae09\uc99d\ud558\ub294": 51, "restrict": 51, "rbm": 51, "\uc81c\uc548\ub418\uae30\ub3c4": 51, "\uc815\uc758\ud558\uc9c0": 51, "\ub300\ud45c\uc801\uc73c\ub85c\ub294": 51, "ian": 51, "goodfellow": 51, "2014\ub144\uc5d0": 51, "\uc18c\uac1c\ub418\uae30": 51, "\uc804\uae4c\uc9c0": 51, "\ub144": 51, "\uc790\ub9ac\uc7a1\uc558\uc5c8\uc2b5\ub2c8\ub2e4": 51, "taxonomi": 51, "\uc7a0\uc7ac\ubcc0\uc218": [51, 60], "\uadf8\ub85c\ubd80\ud130": 51, "\uad6c\ubd84\ud558\ub294": 51, "\ub9d0\ud574\uc11c": 51, "\ub4e4\uc5b4\uc624\uba74": 51, "\uac00\uc9dc\ub85c": 51, "\ucf54\ub4dc\ub3c4": 51, "in_feat": 51, "out_feat": 51, "batchnorm1d": 51, "leakyrelu": 51, "inplac": 51, "opt": 51, "latent_dim": 51, "np": 51, "prod": 51, "img_shap": 51, "tanh": 51, "img_flat": 51, "d\ub97c": 51, "g\ub97c": 51, "min_g": 51, "max_d": 51, "logd": 51, "p_z": 51, "\uc54c\uace0\ub9ac\uc998\uacfc": 51, "\ube44\uad50\ud574\ubcf4\uaca0\uc2b5\ub2c8\ub2e4": 51, "n_epoch": 51, "fill_": 51, "real_img": 51, "optimizer_g": 51, "gen_img": 51, "measur": 51, "fool": 51, "g_loss": 51, "adversarial_loss": 51, "optimizer_d": 51, "real_loss": 51, "fake_loss": 51, "d_loss": 51, "print": 51, "item": 51, "batches_don": 51, "sample_interv": 51, "save_imag": 51, "nrow": 51, "\ucd5c\ub300\ud654\ud558\uace0": 51, "\uc9c4\ud589\ud558\uac8c": 51, "\uc0c1\ud669\uc774": 51, "\ucd5c\uc18c\ud654\ud558\uc9c0": 51, "\ucd5c\ub300\ud654\ud558\ub294": 51, "\uae30\ubc95\ub3c4": [51, 61], "\uc644\ubcbd\ud788": 51, "\ubcf5\uc6d0\ud558\uace0": 51, "\uc5b8\uc81c\ub098": 51, "\ub0b4\ubc49\uac8c": 51, "proposit": 51, "p_g": 51, "\uc99d\uba85\ud558\uc790\uba74": 51, "int_x": 51, "dx": [36, 51], "int_z": 51, "dz": [51, 60], "\uc77c\ub54c": [36, 51], "\uc131\ub9bd\ud558\uace0": 51, "\uac19\uace0": 51, "ast": 51, "jsd": 51, "\ucd5c\uc19f\uac12\uc740": 51, "\uc131\ub9bd\ud569\ub2c8\ub2e4": 51, "mnist": [51, 60], "toronto": 51, "tfd": 51, "\ud3c9\uac00\uc2dc\uc5d0\ub294": 51, "parzen": 51, "estimation\uc744": 51, "\ud45c\ub97c": 51, "vae\ub294": 51, "\ud750\ub9bf\ud558\ub2e4\ub294": 51, "\ucc28\uc6d0\ucd95\uc18c\ub85c": 51, "\ud65c\uc6a9\ub418\uace0": [36, 51], "\ud65c\uc6a9\ub418\uc5c8\ub2e4\uace0": 51, "11487": 52, "learning\uc774": 52, "\ub3c5\ucc3d\uc801\uc778": 52, "\ub9d0\ubb49\uce58": 52, "corpu": 52, "llm\ub4e4\uc758": 52, "embedding\ub4e4\uc740": 52, "\ud6a8\uacfc\uc801\uc774\ub77c\uace0": 52, "\uc0ac\uc774\uc988\ub97c": [52, 55], "\uc911\uc694\ud558\ub2e4\ub294": 52, "\uc81c\uc2dc\ud558\uc5ec": 52, "weight\uc744": 52, "palett": [52, 53], "\uad6c\uc870\ubcf4\ub2e4": 52, "\uc81c\uc2dc\ud568": 52, "\ub2ec\uc131\ud568": 52, "evaluation\uc6a9": 52, "encoder\uc744": 52, "generation\uc774": [36, 52], "\uc77c\uc815\ud558\uc9c0": 52, "\ubabb\ubc1b\uc544\uc11c": 52, "class\ub098": 52, "object\uc774": 52, "\uc77c\uc815\ud558\uace0": 52, "\ubb34\uc5c7\uc744": 52, "\uc0dd\uc131\ud558\ub294\uac83\uc778\uc9c0": 52, "\uc790\uc138\ud558\uac8c": 52, "guide\uc758": 52, "\ub192\uc774\uba74": 52, "\uac00\uc911\uce58\uc758": 52, "\uc774\ub3d9\uc2dc\ucf1c": 52, "\ube57\ub098\uac00": 52, "\ub35c\ud55c": 52, "\ub40c": 52, "\ubc31\ubd84\uc704\uc218": 52, "\uc808\ub300": 52, "\uc9c0\uc815\ud558\uace0": 52, "among": 52, "net\uc774\ub77c\ub294": 52, "modification\uc744": 52, "effu": 52, "net\uc740": 52, "\uc758\ub8cc\ucabd\uc73c\ub85c": 52, "\uc788\ub294\uac78\ub85c": 52, "\uc544\ub294\ub370": 52, "remov": 52, "keep": 52, "block\uc5d0\uc11c": 52, "blocks\ub97c": 52, "\ubca4\uce58\ub9c8\ud06c": 52, "categori": 52, "\uc774\ub8e8\uc5b4\uc84c\ub2e4": 52, "\uae43\ud5c8\ube0c\uc5d0\uc11c": 52, "\ub2e4\uc6b4": 52, "\uac17\ub2e4": 52, "25\uba85\uc758": 52, "\ud3c9\uac00\uc790": 52, "a\uc5d0\uc11c": 52, "\ud3c9\uac00\uc790\ub294": 52, "\uc9c8\ubb38\uc744": 52, "\uae30\uc900\uc810\uc73c\ub85c": 52, "q1": 52, "q2": 52, "repres": 52, "\uae30\uc900\uc810": 52, "\ub2f5\ubcc0": 52, "\uc120\ud0dd\ud574\uc57c\ud568": 52, "am": 52, "indiffer": 52, "screenshot": 52, "drawbench\uc5d0\uc11c": 52, "\uccb4\ub9ac\ud53c\ud0b9": 52, "\uce74\ud14c\uace0\ub9ac\uc5d0\uc11c\ub3c4": 52, "\uc8fc\uc7a5\uc778": 52, "peopl": 52, "\uc62c\ub77c\uac10": 52, "people\uc744": 52, "\uc0dd\uc131\ud558\uae30\uc5d0": 52, "rater": 52, "xxl\ub85c": 52, "\uc120\ud638\ud568": 52, "evaul": 52, "\uc911\uc694\ud568": 52, "boost\uc5d0": 52, "thresholding\uc744": 52, "\ub04c\uc5b4": 52, "\uc62c\ub9b4": 52, "usag": 52, "much": 52, "editbench": 53, "06909": 53, "\uc18c\uac1c\ud558\ub294": [53, 55, 58, 59], "\ud3c9\uac00\uae30\ubc95": 53, "\uc608\uc815\uc785\ub2c8\ub2e4": [53, 58, 59], "\uc9c0\uc815\ud558\uc5ec": 53, "\ucc38\uc870\ud558\uc9c0": 53, "\uc624\ub85c\uc9c0": 53, "\uc720\ub3c4\ud558\ub294": 53, "\ubaa9\ud45c\uc785\ub2c8\ub2e4": 53, "mobilenet": 53, "detector": 53, "\uc810\uc785\ub2c8\ub2e4": 53, "sr3": 53, "\uac00\uc9c4\ub2e4\uace0": 53, "\uc785\ub825\ud569\ub2c8\ub2e4": [53, 59, 61], "\ub0b4\uae30": [53, 54], "\ucd94\uac00\ub418\ub294": 53, "\ucd08\uae30\ud654\ud574\uc11c": 53, "\uc18c\uac1c\ub418\uc5c8\ub358": 53, "1\ubd80\ud130": 53, "\ubcc0\ud654\uc2dc\ud0a4\ub294": 53, "oscil": 53, "\uc0c1\uc2b9\ub418\ub294": 53, "240\uac1c\uc758": 53, "\uad6c\ucd95\ub418\uc5b4\uc788\uace0": 53, "\uc30d\ub9c8\ub2e4": 53, "3\uac00\uc9c0\uc758": 53, "\uce21\uc815\ud558\uac8c": 53, "\uc73c\ub85c\ub294": [53, 59], "clipscor": 53, "prec": 53, "\uc808\ubc18\uc740": 53, "\ub370\uc774\ud130\uc14b\uc73c\ub85c\ubd80\ud130": 53, "\uc218\uc9d1\ub418\uc5c8\uace0": 53, "\uc0dd\uc131\ud574\uc11c": 53, "\uad6c\ucd95\ud588\uc2b5\ub2c8\ub2e4": 53, "\uc694\uc18c\ub4e4\uc744": [36, 53], "\uac16\ucd94\ub3c4\ub85d": 53, "common": 53, "outdoor": [53, 58], "metal": 53, "\ubb38\uad6c\ub97c": 53, "farm": 53, "\ud574\ub2f9\uc0ac\uc9c4\ucc98\ub7fc": 53, "\ud06c\uae30\ub3c4": 53, "\uce21\uc815\ud574\ubcf8": 53, "medium": 53, "\uc131\ub2a5\uc801\uc73c\ub85c": 53, "\uc18d\uc131\ubcf4\ub2e4": 53, "\uc18d\uc131\uc5d0": 53, "\uc0ac\uc9c4\uc785\ub2c8\ub2e4": [53, 59], "maskrich": 53, "2310": [54, 61], "04378": 54, "luosiallen": 54, "\uc131\uacfc\ub97c": 54, "\uac70\ub450\uc5c8\uc9c0\ub9cc": 54, "\uac00\uc9c0\uae30": 54, "\uadf9\ubcf5\ud558\uae30": 54, "solver\uc758": 54, "\uc131\ub2a5\uac1c\uc120\uc744": 54, "lu": 54, "\ucd94\ub860\ud560\uc218": 54, "models\uc740": 54, "trajectory\uc5d0": 54, "\uac16\ub3c4\ub85d": [36, 54], "\ubaa8\ub378\ub85c\uc11c": 54, "2\uac00\uc9c0\uc758": 54, "model\uc774\uae30": 54, "\uc801\ud569\ud558\uc9c0": [54, 56, 58], "text2img": 54, "\uc81c\uc548\uc810\uc740": 54, "3\uac00\uc9c0\ub2e4": 54, "lcm": [54, 56], "32\uc2dc\uac04": 54, "\ubc16\uc5d0": 54, "\uac78\ub9ac\uc9c0": 54, "\uc8fc\uc785\ud558\uace0": 54, "sampling\ud558\ub294": 54, "\uae30\ubc95\uc774\ub2e4": 54, "forwad": 54, "\ud655\ub960\ubd84\ud3ec\uc778": 54, "0t": 54, "scheduler\ub97c": 54, "timestep\uc758": 54, "\ud655\ub960\ubbf8\ubd84\ubc29\uc815\uc2dd": 54, "q_t": [36, 54], "ptobabl": 54, "\uc0c1\ubbf8\ubd84\ubc29\uc815\uc2dd": 54, "\ub9cc\uc871\ud558\ub294\ub370": 54, "dx_t": 54, "nabla_x": [54, 57], "\uadfc\uc0ac\uce58\ub97c": 54, "sampling\ud558\ub294\ub370": 54, "ode\ub77c": 54, "\uacbd\ud5d8\uc801": 54, "sampling\uc758": 54, "cfg\uc758": [36, 54], "prediction\uc740": 54, "\ub300\uccb4\ub41c\ub2e4": 54, "consistenct": 54, "cm\uc758": 54, "\uada4\uc801\uc5d0": 54, "point\uc640": 54, "mapping\ub418\ub294": 54, "\uc591\uc218\uac12\uc744": 54, "\uc790\uae30": 54, "\uc790\uc2e0\uc5d0": 54, "\ub9cc\uc871\ud574\uc57c\ud55c\ub2e4": 54, "\ud568\uc218\uc774\uba70": 54, "\uc2ec\uce35": 54, "cm\uc740": 54, "\ud559\uc2b5\ud558\uba70": 54, "ode\uc5d0": 54, "solver\ub85c": 54, "method\ub4f1\uc758": 54, "\uc218\uce58\uc801\uc778": 54, "\uc601\uc0c1\uc5d0": 54, "generation\ub9cc": 54, "\uc7a0\uc7ac\uc131\uc774": 54, "\ud0d0\uad6c\ub418\uc9c0": 54, "\ubc1c\ud718\ud558\uc5ec": 54, "\ub3c4\uc804\uc801\uc778": 54, "lcd": 54, "\uc124\uacc4\ub418\uc5c8\uae30": 54, "vector\ub85c": 54, "\uc784\ubca0\ub529\ud558\uace0": 54, "\ubcf5\uc6d0\ud55c\ub2e4": 54, "\uc0c1\uc5d0\uc11c": [36, 54], "\uc774\ub904\uc9c0\uae30": 54, "laptop": 54, "\uc815\uc758\ub41c\ub2e4": 54, "dz_t": 54, "c\ub294": 54, "ode\uc0c1\uc5d0\uc11c": 54, "trick\uc778": 54, "\ubcc0\ud615\ud558\uc5ec": 54, "\ub300\uc785\ud55c": 54, "\uce58\ud658": 54, "cm\uacfc": 54, "teacher": [54, 57, 58], "pd\uc5d0\uc11c": 54, "8\uc758": 54, "\uc6b0\ud56d\uc744": 54, "\uadfc\uc0ac\ud55c": 54, "solver\uc774\uae30": 54, "distillation\uc2dc\uc5d0\ub9cc": 54, "edm\uc744": 54, "\ud1a0\ub300\ub85c": 54, "cm\uc5d0\uc11c": 54, "\uac04\uaca9\uc73c\ub85c": 54, "\uc5b4\ub5a0\ud55c\uac04\uaca9\uc744": 54, "8\uc744": 54, "\uc801\ubd84": 54, "clasifi": 54, "\ud6c8\ub828\ud574\uc57c\ud558\uae30": 54, "\ud6a8\uc728\uc801\uc774\uc9c0": 54, "\ubabb\ud558\uba70": 54, "lcms\uc640": 54, "method\uc5d0": 54, "cfg\ub97c": [36, 54], "\ud1b5\ud569\ud558\uc600\ub2e4": 54, "distill\uc758": 54, "sampling\uc5d0": 54, "\ud1b5\ud569\ud558\uc600\uc73c\ub098": 54, "\ud559\uc2b5\uc2dc\uac04\uc774": 54, "\uae38\uace0": 54, "2\ub2e8\uacc4\ub97c": 54, "\ub204\uc801\ub418\uae30": 54, "t\uc774\uc5d0": 54, "cfg\uc5d0": 54, "varnoth": 54, "\ubcc0\ud615\ub418\ubbc0\ub85c": 54, "ode\ub294": 54, "function\ub3c4": 54, "\ubcc0\uc218\ub85c": 54, "\ubc1b\uc544\uc624\uae30": 54, "omega_": 54, "sampling\ub41c\ub2e4": 54, "\uc774\uc804\uacfc": 54, "cfg\uac00": 54, "\uc608\uce21\ubaa8\ub378": 54, "11\ucc98\ub7fc": 54, "\ubcf4\ud1b5\uc758": 54, "\uc7a1\uace0": 54, "\uc774\uac19\uc774": 54, "\ucd18\ucd18\ud55c": 54, "\uac10\uc18c\uc2dc\ud0a4\uae30": 54, "loss\ub3c4": 54, "\uc791\uc544\uc9c0\uac8c": 54, "\uc218\ub834\uc18d\ub3c4\ub3c4": 54, "\ub290\ub824\uc9c0\uac8c": 54, "\uc218\ub834\uc758": 54, "\uc218\ucc9c\uc5d0\uc11c": 54, "\uc218\uc2ed\uc73c\ub85c": 54, "\ub2e8\ucd95\uc2dc\ud0a4\ub294": 54, "scheduler\ub85c": 54, "solver\ub3c4": 54, "\uc99d\uba85\ud588\ub2e4": 54, "\ube44\uad50\ud558\ub294\uac83\uc774": 54, "step\ub9cc\ud07c": 54, "\ub108\ubb34\uc791\uc73c\uba74": 54, "\uac16\uac8c\ub418\uba70": 54, "\uac12\uc77c": 54, "\uc624\ucc28\uac00": 54, "\ucee4\uc9c8\uc218": 54, "14\uc5d0": 54, "k\uac12\uc744": 54, "\uc218\uc2dd\ub3c4": 54, "\ubcc0\uacbd\ud560": 54, "foundat": 54, "cunstom": 54, "\ub54c\uac00": 54, "lcf": 54, "dataset\ub3c4": 54, "\uc885\uc18d\uc5c6\uc774": 54, "inference\ub97c": 54, "lcm\uc740": 54, "\ubc14\ub85c\ubc14\ub85c": 54, "\uc788\ub294\uac83\uc740": 54, "\uc544\ub2c8\uace0": 54, "consisteni": 54, "\uc0ac\uc6a9\ud558\uae30\ub9cc\ud558\uba74": 54, "diffuson": 54, "\ubc14\ub85c\ud559\uc2b5\uc774": 54, "650k": 54, "\uc55e\uc11c\ub9d0\ud55c\uac83\ucc98\ub7fc": 54, "768x768\uc758": 54, "solver\ub85c\ub294": 54, "20\uc758": 54, "lcm\uacfc": 54, "\uc131\ub2a5\ube44\uad50\ub97c": 54, "distill\uc740": 54, "\uc624\ud508\uc18c\uc2a4": 54, "\ucf54\ub4dc\uac00": 54, "\uc218\ub834\ud558\uace0": 54, "\uc0dd\uc131\ud558\uc600\ub2e4": 54, "distillation\uc774\uc9c0\ub9cc": 54, "\ubcf4\uc5ec\uc92c\ub2e4": 54, "\ud478\ub294": 54, "solver\ub4e4": 54, "lcm\uc5d0": 54, "\ube44\uad50\uc640": 54, "schedule\uc758": 54, "iteration\uc5d0\uc11c\uc758": 54, "\uace0\uc815\ud574\uc11c": 54, "\uc62c\ub838\uc744": 54, "\ud6e8\uc52c\ub354": 54, "\uc218\ub834\ud558\uba70": [54, 60], "dpm\uacfc": 54, "50\uc77c": 54, "ddim\ubcf4\ub2e4": 54, "error\ub97c": 54, "ddim\uc5d0": 54, "\uc801\uae30": [54, 61], "\uc88b\uc544\uc9c0\uc9c0\ub9cc": 54, "quality\uc640": 54, "diversity\uc5d0": 54, "off\uac00": 54, "inference\ub294": 54, "\uac00\uc9c0\uc9c0\ub294": 54, "\uc54a\ub294\uac83\uc73c\ub85c": 54, "\ud655\uc778\ub41c\ub2e4": 54, "\uac1c\uc120\uc758": 54, "\uc788\ub294\uac83": 54, "\ubd24\uc744": 54, "\ud655\uc5f0\ud558\uac8c": 54, "\ub4e4\uc5b4\ub09c\ub2e4": 54, "\uc2dc\uc5d0\ub3c4": 54, "\uc99d\uba85\ud55c\ub2e4": 54, "\ud3ec\ucf13\ubaac": 54, "\uc2ec\uc2a8": 54, "lcf\ub97c": 54, "\ub54c\ub97c": 54, "\uc644\ubcbd\ud558\uc9c4": 54, "catch\ud55c": 54, "\uc0c1\uc5d0": 54, "dataset\uc5d0\ub3c4": 54, "step\uc73c\ub85c\ub3c4": 54, "2211": 55, "10440": 55, "\uba85\uc2dc\ud569\ub2c8\ub2e4": [55, 56], "extrem": 55, "softwar": 55, "\uc720\ub3d9\uc801\uc73c\ub85c": 55, "\uc804\ud658\ud558\uc5ec": 55, "faster": 55, "\uad6c\uc131\ub418\uc5b4\uc788\ub2e4\uace0": 55, "backpropag": [8, 55, 60], "\uacc4\uc0b0\ud558\ub294\ub370": 55, "\uc18c\uc694\ub41c\ub2e4\uace0": 55, "scratch": [55, 56], "\ub2f4\uc544\ub0b4\ub294\ub370": 55, "\uc720\uc6a9\ud558\ub2e4\ub294": 55, "\ubcf4\uc600\uc9c0\ub9cc": 55, "\ub192\ub2e4\uace0": [55, 60], "instantngp": 55, "octre": 55, "\uc904\uc600\ub2e4\uace0": 55, "\uc608\uc2dc\ucc98\ub7fc": [55, 59], "\uba54\ubaa8\ub9ac\uc640": 55, "\uc5f0\uc0b0\uc801\uc778": 55, "\uc81c\ud55c\uc774": 55, "\ub0b4\uae30\uac00": 55, "v_t": 55, "\ud45c\ud604\ud569\ub2c8\ub2e4": 55, "vertic": 55, "tetrahedra": 55, "surfac": 55, "\uc5c5\ub370\uc774\ud2b8\ud558\uba70": 55, "\uc5c5\ub370\uc774\ud2b8\ub9c8\ub2e4": 55, "environ": [55, 61], "\uc791\uac8c": 55, "10\ubc30": 55, "\uc99d\uac00\uc2dc\ucf30\ub2e4\uace0": 55, "\uc9c4\ud589\ud558\uae30": 55, "\ucc28\uac10\ud568\uc73c\ub85c\uc368": 55, "\uc804\ud658\ud558\uace0": 55, "\uc9c4\ud589\ud560\ub54c": 55, "\ucd5c\uc801\ud654\ud558\uac8c": 55, "\ucd94\uc801\ud558\uc5ec": 55, "397": 55, "speed": [55, 57], "\ubd84": 55, "3000": 55, "\uac1d\uccb4\uc5d0\uc11c\uc758": 55, "\uc798\ud558\ub294": 55, "\uac1d\uccb4\ub4e4\uc5d0": 55, "\uc124\ubb38\uc870\uc0ac\ud574\ubcf8": 55, "61": 55, "\uc720\uc800\ub4e4\uc774": 55, "\uc6b0\uc138\ud558\uac8c": 55, "\ud3c9\uac00\ud558\uc600\uc2b5\ub2c8\ub2e4": 55, "\ucd5c\uc801\ud654\ud560": 55, "\uc9c4\ud589\ud574\ubcf8": 55, "\uc5b4\ub835\uc9c0\ub9cc": 55, "\uc88c\uce21": 55, "\uc2e4\uc0ac\uc801\uc73c\ub85c": 55, "3\ub2e8\uacc4\ub85c": 55, "\uc218\uc815\ud55c": 55, "\ud65c\uc6a9\ud568\uc73c\ub85c\uc368": 55, "\ubcf4\uc5ec\uc904": 55, "40\ubd84": 55, "\uadf8\ub798\ud53d": 55, "\uc18c\ud504\ud2b8\uc6e8\uc5b4\uc640": 55, "\ud638\ud658\uc774": 55, "gaparmar": 56, "turbo": 56, "\uc54a\ub2e4\uace0": [56, 58, 61], "\uc124\uba85\ud558\uace0": 56, "\uc190\uc2e4\ub41c\ub2e4\uace0": 56, "\uc774\ubbf8\uc9c0\uc77c": 56, "\uce58\uba85\uc801\uc774\ub77c\uace0": 56, "\uc785\ub825\ud568\uc73c\ub85c\uc368": [56, 59], "\ucda9\ub3cc\uc744": 56, "\ubc29\uc9c0\ud569\ub2c8\ub2e4": 56, "\ub450\ubc88\uc9f8\ub85c": [36, 56, 61], "cyclegan_turbo": 56, "pretrained_nam": 56, "pretrained_path": 56, "ckpt_folder": 56, "lora_rank_unet": 56, "lora_rank_va": 56, "autotoken": 56, "cliptextmodel": 56, "sched": 56, "make_1step_sch": 56, "my_vae_encoder_fwd": 56, "__get__": 56, "__class__": 56, "my_vae_decoder_fwd": 56, "skip_conv_1": 56, "kernel_s": [56, 59], "skip_conv_2": 56, "skip_conv_3": 56, "skip_conv_4": 56, "ignore_skip": 56, "\uc544\ucf00\ud14d\uccd0\ub294": 56, "\ud615\ud0dc\ub85c\ub3c4": 56, "\uc131\ub2a5\ubcf4\ub2e4": 56, "\ud559\uc2b5\ub418\uc5c8\uc2b5\ub2c8\ub2e4": 56, "\uc18c\uac1c\ub418\uc5c8\uc2b5\ub2c8\ub2e4": 56, "\uc0ac\ub840\ub4e4\ub3c4": 56, "\uc57d\uc810\uc744": 56, "\ub2e8\ucd95\ud558\uae30": 56, "\uc815\uc758\ud558\uc5ec": 56, "\ucd94\ucd9c\ud569\ub2c8\ub2e4": 56, "\ucda9\ub3cc\uc774": 56, "\uc0dd\uaca8": 56, "\ucd95\uc18c\ud558\uae30": 56, "\ubbfc\uac10\ud55c": 56, "\uc544\ud0a4\ud14d\uccd0\uc5d0\uc11c": 56, "\uc804\ud6c4\ub85c": 56, "\uc785\ub825\uc2dc\ud0b5\ub2c8\ub2e4": 56, "\ubcc0\ud615\ub41c": 56, "cycl": 56, "c_y": 56, "\ud568\uc218\uc785\ub2c8\ub2e4": 56, "\ubcc0\ud615\ud558\uac8c": 56, "\uc18c\uac1c\ub418\ub294": 56, "rec": 56, "\uc870\ud569\uc73c\ub85c": 56, "\uad6c\uc131\ub418\uc5b4\uc788\uc2b5\ub2c8\ub2e4": 56, "idt": 56, "e_x": 56, "\ud569\uc73c\ub85c": 56, "\uc77c\uc785\ub2c8\ub2e4": 56, "\uc785\ub825\ubc1b\ub294": 56, "enc": [56, 59], "\uac00\uc911\uce58\uc640": 56, "\ucd9c\ub825\uac12\uc744": 56, "theta_0": 56, "\uac00\uc911\uce58\uc774\uace0": 56, "\ubcc0\ud654\uc2dc\ud0a4\uba74\uc11c": 56, "330mb": 56, "\uc18c\uc694\ub429\ub2c8\ub2e4": 56, "initialize_unet": 56, "return_lora_module_nam": 56, "requires_grad_": 56, "l_target_modules_encod": 56, "l_target_modules_decod": 56, "l_modules_oth": 56, "l_grep": 56, "to_k": 56, "to_q": 56, "to_v": 56, "conv1": 56, "conv2": 56, "conv_in": [56, 59], "conv_shortcut": 56, "conv_out": 56, "proj_out": 56, "proj_in": 56, "proj": 56, "named_paramet": 56, "down_block": 56, "up_block": 56, "lora_conf_encod": 56, "loraconfig": 56, "init_lora_weight": 56, "target_modul": 56, "lora_conf_decod": 56, "lora_conf_oth": 56, "add_adapt": 56, "adapter_nam": 56, "default_encod": 56, "default_decod": 56, "default_oth": 56, "set_adapt": 56, "initialize_va": 56, "constant_": 56, "l_vae_target_modul": 56, "vae_lora_config": 56, "vae_skip": 56, "hors": 56, "leftrightarrow": 56, "zebra": 56, "yosemit": 56, "summer": 56, "winter": 56, "\uc8fc\ud589": 56, "bdd100k": 56, "clear": [36, 56], "foggi": 56, "\uc2e4\ud5d8\ud558\uc600\uc2b5\ub2c8\ub2e4": 56, "protocol": 56, "\ub9cc\uc871\uc2dc\ucf1c\uc57c": 56, "\ubd84\ud3ec\uc640\uc758": 56, "\uc77c\uce58": 56, "struct": 56, "\ub0b4\uc9c0\ub294": 56, "\uc0ac\uc9c4\ub4e4\uacfc": 56, "cut": 56, "cyclediffus": 56, "ddib": 56, "\ud6fc\uc190\uc2dc\ud0a4\ub294": 56, "\ube48\ubc88\ud558\uac8c": 56, "\ubc1c\uc0dd\ud558\uace0": 56, "\uac00\uc911\uce58\ub85c": 56, "\uc77c\uc5b4\ub098\ub294": 56, "\ube44\uad50\ud558\uc600\uc744": [56, 61], "\ubbf8\uc138\ud558\uac8c": 56, "\ub192\uc74c\uc744": 56, "\ubcf4\uc5ec\uc90c\uc744": 56, "\uc735\ud569\ud574\uc11c": 56, "\uc124\uc815\ud560": 56, "\uba54\ubaa8\ub9ac\uc5d0": 56, "\ud07d\ub2c8\ub2e4": 56, "18828": 57, "tianweii": 57, "dmd2": 57, "joongwon": 57, "ha": 57, "revolution": 57, "\ubaa9\uc801\uc73c\ub85c": 57, "\uc654\uc74c": 57, "solv": 57, "\ud2c0\uc740": 57, "instaflow": 57, "cfm": 57, "\uc904\uc774\uac8c": 57, "\uc5f0\uad6c\ub418\uc5b4\uc9c0\uace0": 57, "\uc0bc\uc544": 57, "\uc2dc\ud0a8\ud6c4": 57, "\uc2dc\ud0a4\uba70": 57, "\ub300\uc751\uc744": 57, "indistinguish": 57, "newcommand": 57, "laplacianx": 57, "\uac10\uc18c\uc2dc\ud0a4\ub294": 57, "\uc5c4\ubc00\ud558\uac8c": 57, "\ubcf8\ub2e4\uba74": 57, "\ubcf4\ub2e4\ub294": 57, "\ucca0\ud559\uacfc": 57, "\ub17c\ub9ac\uc804\uac1c\ub97c": 57, "\uad81\uadf9\uc801\uc73c\ub85c": 57, "\ubfd0\uc774\uace0": 57, "langevin": 57, "scheme": 57, "\uc5c4\ubc00\ud558\uac8c\ub294": 57, "\uc5f0\uc0c1\ub418\ub294": 57, "adeverseri": 57, "ae": 57, "aae": 57, "\ub300\uccb4\ud55c": 57, "\ub098\ub258\uc5b4\uc838": 57, "generator\uc774": 57, "\uc2dc\ud0a4\ub3c4\ub85d": 57, "\uc2dc\ucf1c\uc57c\ud558\ub294": 57, "objective\uc774\uace0": 57, "\uc50c\uc5ec\uc9c8": 57, "substack": 57, "\uc9c0\ub9cc": 57, "\ubd84\ud3ec\uc0ac\uc774\uc758": 57, "\ubbf8\ubd84\uac12\ub9cc": 57, "\uc54c\uba74": 57, "\ucda9\ubd84\ud568": 57, "paramt": 57, "\ud574\uc8fc\uba74": 57, "\uc720\ub3c4\ub418\ub294\ub370": 57, "\uc791\ub3d9\ud558\ub294": 57, "\uc774\uc5ec\uc57c\ud568": 57, "\uad6c\ud560\uc9c0\uac00": 57, "\ub418\ub294\ub370": 57, "\ubcf5\uc7a1\ud574\uc9c0\ub294\ub370": 57, "\ub77c\uc11c": 57, "rvert_2": 57, "update\ub97c": 57, "\ubc18\ub300\ubc29\ud5a5\uc73c\ub85c": 57, "\ubc00\uc5b4\ub0b4\uc11c": 57, "most": 57, "\ud558\ub294\uac83\uc744": 57, "\uc54a\uc544\uc9d0": 57, "theoret": [57, 62], "\ub0ae\uc74c\uc5d0": 57, "\uc9c4\ud589\ub41c\ub2e4\uba74": 57, "\ucee4\ubc84\ud558\uc9c0": 57, "\uc0dd\uae38": 57, "\ud558\ub098\ub9cc\uc73c\ub85c\ub294": 57, "\ubd88\uac00\ub2a5\ud55c\uac00": 57, "\uac1c\uc778\uc801": [36, 57], "\uadfc\ubcf8\uc801": 57, "\uc544\ub2c8\ub77c\ub294": 57, "\uc5d0\uc11c\ubd80\ud130": 57, "pair\ub4e4\uc744": 57, "ell": 57, "\uc900\ud558\ub294": [57, 58], "\uc0ac\uc774\uc5d0\ub294": 57, "\uadfc\ubcf8\uc801\uc778": 57, "\uc874\uc7ac\ud568": 57, "\uc885\uc18d\ub41c\ub2e4": 57, "00512": 58, "\ubcf4\uc5ec\uc8fc\uba70": 58, "\uac01\uad11\uc744": 58, "\uc18c\uac1c\ud558\uac8c": 58, "\uc124\uba85\ud558\uc790\uba74": 58, "\uc218\ucc9c\ubc88\uc758": 58, "\uc774\ubbf8\uc9c0\ub4e4\uacfc": 58, "\ud568\uc218\ub4e4\uc740": 58, "lambda_t": 58, "monoton": 58, "decreas": 58, "\uc124\uc815\ub429\ub2c8\ub2e4": 58, "\uc774\ub4e4\uc744": 58, "\uc18c\uac1c\ud588\ub358": 58, "\ucd94\uac00\ud560\uc9c0": 58, "2021c": 58, "probabiil": 58, "\ud45c\ud604\ud574\uc11c": 58, "d\u03c3_t": 58, "nabla_z": 58, "rung": 58, "kutta": 58, "\uc801\uc6a9\ud588\uc744\ub54c": 58, "probabilt": 58, "\ud574\uc11d\ud558\uba74": 58, "\uc55e\uc73c\ub85c": 58, "\ubcf4\uac8c": 58, "\uae30\ubc95\uc740": 58, "\uc808\ucc28\ub85c": 58, "\ubcf5\uc0ac": 58, "\uacf5\uc2dd\uc740": 58, "\uc18c\uac1c\ub418\ub294\ub370": 58, "\uacf5\uc2dd\uc785\ub2c8\ub2e4": 58, "\ud575\uc2ec\uc785\ub2c8\ub2e4": 58, "\uc9c4\ud589\ub418\ub294": 58, "\uac12\ub4e4\uc5d0": 58, "\ubaa8\ub378\uc774\ub77c\uace0": 58, "sharp": 58, "\uc904\uc5b4\ub4dc\ub294": 58, "\ud655\uc778\ud574\ubcf4\uaca0\uc2b5\ub2c8\ub2e4": [58, 60], "progresss": 58, "\uc124\uc815\uac12\uc5d0": 58, "\uc54c\uc544\ubcf4\uaca0\uc2b5\ub2c8\ub2e4": 58, "\uc77c\ubc18\uc131\uc744": 58, "\uc783\uc9c0": 58, "\ub300\ub2e4\uc218\uc758": 58, "\uc190\uc2e4\ud568\uc218\uc5d0": 58, "\ub300\uc785\ud574\ubcf4\uaca0\uc2b5\ub2c8\ub2e4": 58, "\ub3d9\uc77c\ud558\uba70": 58, "\uae30\ubc95\uc5d0\uc11c\ub294": 58, "\ub0b4\uc5d0\uc11c\uc758": 58, "\ud559\uc2b5\ub418\uc9c0\ub9cc": 58, "\uc9c4\ud589\ub420\uc218\ub85d": 58, "\uac10\uc18c\ud55c\ub2e4\ub294": 58, "\ud655\uc778\ud558\uac8c": 58, "\uac00\uae4c\uc6cc\uc9c0\uac8c": 58, "\ucee4\uc9c0\uac8c": 58, "\uc5ec\ub7ec\ubc88\uc758": 58, "\uac70\uce60": 58, "\uc0c1\uad00\uc5c6\uc9c0\ub9cc": 58, "\uc904\uc5b4\ub4e4\uc218\ub85d": 58, "\uce58\uba85\uc801\uc774\uac8c": 58, "\uc785\ub825\uc73c\ub85c\ub294": 58, "\uc0ac\ub77c\uc9c0\uac8c": 58, "\ubc29\uc548\uc73c\ub85c": 58, "\ubc29\uc9c0\ud558\ub3c4\ub85d": 58, "\uc124\uc815\ub418\uc5c8\ub2e4\uace0": 58, "\ud655\uc778\ud588\uc2b5\ub2c8\ub2e4": 58, "\ubd80\uac00\uc801\uc73c\ub85c": 58, "\uae30\ubc95\ub4e4\ub85c": 58, "\ube44\uad50\ud574\ubcf8": 58, "snr": [58, 61], "\uc870\ud569\uc744": 58, "\ud604\uc0c1\ub3c4": 58, "\uc2e4\ud5d8\uacb0\uacfc\ub97c": [36, 58], "\uc9c4\ud589\uc2dc": 58, "\ub370\uc774\ud130\uc14b\uc5d0\uc11c\ub294": 58, "\ucc38\uc870\ud558\uc2dc\uba74": 58, "8192": 58, "\uc2dc\uc791\ud558\uc600\uace0": 58, "\uc9c4\ud589\ud574\ub3c4": 58, "undistil": 58, "\uc9c4\ud589\ud558\uba74\uc11c": 58, "\uc0dd\uac01\ud588\uc744\ub54c": 58, "\uc7a5\uc810\uc774\ub77c\uace0": 58, "\uc798\ub418\ub294": 58, "\ud69f\uc218\ub97c": 58, "\ube44\uad50\ud574\ubcf4\uace0": 58, "4\ubc30\uc529": 58, "\uc904\uc5ec\uac00\uba74\uc11c": 58, "\uc904\uc784\uc5d0\ub3c4": 58, "\uc904\uc9c0": 58, "\ud559\uc2b5\ubc29\uc2dd\uc73c\ub85c\ub294": 58, "dig": 59, "more": 59, "08453": 59, "arc": 59, "\ub09c\ud574\ud55c": 59, "car": 59, "fly": 59, "wing": 59, "iron": 59, "bunni": 59, "ear": 59, "\uc785\ub825\ubc1b\uc744": 59, "\ub9cc\uc73c\ub85c\ub294": 59, "\ud544\uc694\ud558\ub2e4\uace0": 59, "\uc11c\uc220\ud569\ub2c8\ub2e4": 59, "extern": 59, "77m": 59, "\uc5f0\uc0b0\uc791\uc5c5\uc774": 59, "\uc2e4\ud589\ub429\ub2c8\ub2e4": 59, "\uac00\uc838\uc624\uae30": 59, "compos": 59, "\ubc14\uafb8\uace0": 59, "unshuffl": 59, "1\uac1c\uc758": 59, "4\ubc88": 59, "\uac70\uce58\uace0": 59, "f_c": [8, 59], "\uc0dd\uc131\ub418\uace0": 59, "\ub354\ud574\uc9c0\uac8c": 59, "\ub3d9\uc77c\ud558\ub3c4\ub85d": 59, "\uc124\uc815\ud588\uae30": 59, "\ub367\uc148": 59, "\uc5f0\uc0b0\ud558\ub294\ub370": 59, "fulladapt": 59, "in_channel": 59, "640": 59, "num_res_block": 59, "downscale_factor": 59, "adapterblock": 59, "total_downscale_factor": 59, "out_channel": 59, "downsample2d": 59, "in_conv": 59, "adapterresnetblock": 59, "act": 59, "adapter_st": 59, "adapter_input": 59, "adapter_conditioning_scal": 59, "num_images_per_prompt": 59, "repeat": 59, "do_classifier_free_guid": 59, "num_warmup_step": 59, "order": 59, "latent_model_input": 59, "scale_model_input": 59, "prompt_emb": 59, "cross_attention_kwarg": 59, "down_block_additional_residu": 59, "noise_pred_uncond": 59, "noise_pred_text": 59, "previou": 59, "extra_step_kwarg": 59, "prev_sampl": 59, "\uc885\ub958\ub85c\ub294": 59, "\ubd84\ub958\ud560": 59, "keypos": 59, "bicub": 59, "\uc81c\uc678\uc2dc\ud0a4\uace0": 59, "\ubd80\ubd84\ucc98\ub7fc": 59, "t2": 59, "\uc2dc\uc640": 59, "\ub123\uc73c\uba74\uc11c": 59, "expens": 59, "late": 59, "\uc2e4\ud5d8\ud574\ubcf8": 59, "\ud06c\ub2e4\uace0": 59, "\ud3ec\ud568\ub418\ub3c4\ub85d": 59, "\uc218\uc2dd\ucc98\ub7fc": 59, "uniformli": 59, "\uc9c4\ud589\ud588\uace0": 59, "cubic": 59, "\uc0c1\uc138\uc0ac\ud56d\uc740": 59, "4x": 59, "tesla": 59, "32g": 59, "\uc2e4\ud5d8\ubcc4": 59, "coco17": 59, "164k": 59, "pidinet": 59, "stuff": 59, "keypoint": 59, "\ub370\uc774\ud130\uc14b\ub85c\ubd80\ud130": 59, "600k": 59, "mm": 59, "mida": 59, "\ubaa8\ub378\ub4e4\uacfc": 59, "\uc815\ub7c9\uc801\uc778": 59, "\uc218\uce58\ub85c": 59, "\ube44\uad50\ud558\ub294\ub370": 59, "\uc88b\uc2b5\ub2c8\ub2e4": 59, "comparisoin": 59, "\uc608\uc2dc\ub4e4\uc740": 59, "\uc9c0\uc5ed\uc744": 59, "\ubabb\ud558\ub2e4\uace0": 59, "\uac83\ub85c": 59, "\uc704\uc5d0\uc11c\ubd80\ud130": 59, "\uc7a5\uc810\ub4e4": 59, "\uba85\uc2dc\ub418\uc5c8\ub358": 59, "\uc0ac\ub840\uc785\ub2c8\ub2e4": 59, "\uc644\ub8cc\ud55c": 59, "\uc801\uc6a9\ud558\uba74\uc11c": 59, "4\ubcf4\ub2e4": 59, "\uacbd\ub7c9\ud654\ub41c": 59, "\uc22b\uc790\ub97c": 59, "\ubc14\uafd4\uac00\uba70": 59, "tini": 59, "x4": 59, "x8": 59, "1312": 60, "6114": 60, "gunhochoi": 60, "fastcampu": 60, "ch": 60, "\ubb38\uad6c\uac00": 60, "\uc801\ud600\uc788\ub294\ub370\uc694": 60, "bayesian": 60, "vb": 60, "involv": 60, "aevb": 60, "\ub274\ub7f4": 60, "\uadfc\uc0ac\ud568\uc73c\ub85c\uc368": 60, "\uc774\uac00": 60, "\ubc14\uac00": 60, "\ub9cc\ub4e4\uc5b4\ub0b4\uace0": 60, "\ubcf5\uc6d0\ud558\uac8c": 60, "\ub461\ub2c8\ub2e4": 60, "assumpt": 60, "\ub0b4\ub9bd\ub2c8\ub2e4": 60, "\ud558\ub2e4\ub294": 60, "\uc131\uc9c8\uc5d0": 60, "bernoulli": 60, "\ucd5c\ub300\ud654\uc2dc\ud0a4\ub294": 60, "\ub4f1\uc7a5\ud558\uac8c": 60, "\ub3c4\uc2dd\ud654\ud55c": 60, "fc1_1": 60, "784": 60, "hidden_s": 60, "fc1_2": 60, "log_var": 60, "reparametr": 60, "std": 60, "mul": 60, "exp_": 60, "ep": 60, "floattensor": 60, "add_": 60, "reparam": 60, "fc1": 60, "\ucc3e\uc73c\uba74": 60, "\ubd84\ud560\ud560": 60, "\uc7a0\uc7ac\ubcc0\uc218\uc758": 60, "\uc800\ud76c\uac00": 60, "\ubd80\uc5ec\ud55c": 60, "\uac00\uae5d\ub3c4\ub85d": 60, "mont": 60, "carlo": 60, "\uadfc\uc0ac\uac12\uc744": 60, "\uc5f0\uc0b0\ub7c9\uc774": 60, "\ub9ce\uc73c\ubbc0\ub85c": 60, "\ubcc0\ud658\ud558\uc5ec": 60, "\ud3c9\uade0\uc801\uc73c\ub85c": 60, "\uc6d0\ud65c\ud788": 60, "\uc0d8\ud50c\ub9c1\ud558\uc9c0": 60, "\ub354\ud558\uace0": 60, "\uacf1\ud558\uac8c": 60, "\ub530\ub978\ub2e4\uace0": 60, "\uc124\uc815\ud588\uc744": 60, "\ub54c\uc774\uace0": 60, "\uac00\uc815\ud558\uc5ec": 60, "\uc2dc\ub3c4\ud560": 60, "\uba85\uc2dc\ub418\uc5b4": 60, "\ud558\ub2e8\uc5d0\ub294": 60, "\uc67c\ucabd\uc5d0\ub294": 60, "trick\uc774": 60, "\uace0\uc815\ub418\uc5b4": 60, "\uc788\uc5b4\ub3c4": 60, "\uc0d8\ud50c\ub9c1\ud558\ubbc0\ub85c": 60, "\ubbf8\ubd84\ud560": 60, "\uc801\uc6a9\ud558\uae30\uac00": 60, "\uc624\ub978\ucabd\ucc98\ub7fc": 60, "\ubcc0\uc218\ub85c\ub3c4": 60, "\uac00\ub2a5\ud574\uc9c0\uae30": 60, "frei": 60, "wake": 60, "\uc54c\uace0\ub9ac\uc998\ub97c": 60, "\uc801\uc6a9\ud574\uc11c": 60, "\uc2e4\ud5d8\uacb0\uacfc\ub294": 60, "\ucd5c\uc801\ud654\ud558\ub294\ub370": 60, "\uc54c\uace0\ub9ac\uc998\uc774": 60, "\uc131\ub2a5\uc801\uc73c\ub85c\ub3c4": 60, "em": 60, "\ud560\uc6a9\ud558\uc5ec": 60, "\ub9ce\uc73c\uba74": 60, "\uc9c0\uc815\ud574\uc92c\ub2e4\uba74": 60, "\ud30c\ub77c\ubbf8\ud130\ub4e4\uacfc": 60, "\uc0ac\uc6a9\ud574\ubcf4\uba74": 60, "15110": 61, "sudo": 61, "zero123plu": 61, "dec": [20, 36, 61], "convers": 61, "\uc18c\uac1c\ud558\uc600\uc2b5\ub2c8\ub2e4": 61, "\ubcf4\uc5ec\uc8fc\uac8c": 61, "\uc5ec\uc12f\uac1c\uc758": 61, "distirbut": 61, "\ud55c\uacc4\uc810\uc774": [8, 61], "\ubd88\uc548\uc815\ud558\uac8c": 61, "\uc904\uc778": 61, "\ubd84\uc11d\ud558\uba70": 61, "\uc0c1\uad00\uad00\uacc4\ub97c": 61, "objavers": 61, "graviti": 61, "\ub3d9\uc77c\ud558\uc9c0\ub9cc": 61, "\uac1d\uccb4\ub4e4\uc774": 61, "canon": 61, "\uc808\ub300\uc801\uc778": 61, "\ud65c\uc6a9\ud55c\ub2e4\uba74": 61, "\uc54c\uc544\uc57c": 61, "\ud6c4\uc18d\uc801\uc73c\ub85c": 61, "dreamgaussian": 61, "\uc624\ucc28\uc728\ub3c4": 61, "3x2": 61, "\uc0ac\uc6a9\ub418\uc5c8\ub358": 61, "\uadf9\ud788": 61, "\ub4dc\ubb45\ub2c8\ub2e4": 61, "\uad6c\uac04\uc5d0\uc11c": 61, "\uc801\uc73c\uba74": 61, "\ubcc0\ud615\uc774": 61, "\uc720\uc6a9\ud558\uc9c0\ub9cc": 61, "\uc8fc\uc785\ub418\uc5c8\uc744\ub54c": 61, "\ud559\uc2b5\ud558\uc600\uc744": 61, "\ubd88\uc548\uc815\ud55c": 61, "\ubcf4\uc5ec\uc8fc\uc5c8\ub358": 61, "\uc6d0\uc778": 61, "\ubcc0\ud654\ub41c": 61, "\ud559\uc2b5\ub418\uc5c8\ub2e4\uace0": 61, "\uc9c4\ud589\ud558\uc600\ub2e4\uace0": 61, "\ubcf4\uc644\ud558\uc5ec": 61, "\ubaa8\ub4c8\uc5d0\uc11c\uc758": 61, "\uc8fc\uc785\ub41c": 61, "\uc801\uc6a9\ud558\uc600\ub2e4\uace0": 61, "\uc9c4\ud589\ud558\uc9c0": 61, "\ubc18\uc601\ub418\uc5c8\uc2b5\ub2c8\ub2e4": 61, "\ud558\uc600\uc744\ub54c": 61, "5x": 61, "\ubcf4\uc5ec\uc8fc\uc5c8\ub2e4\uace0": 61, "\uc120\uc5d0\uc11c": 61, "\ud06c\uae30\uc785\ub2c8\ub2e4": 61, "\uc5bb\uac8c": 61, "\uc720\uc9c0\ud55c\ucc44": 61, "hdri": 61, "kv": 61, "\ud589\ub82c\ub9cc": 61, "\ud65c\uc6a9\ud558\uc600\uc2b5\ub2c8\ub2e4": 61, "syncdream": [8, 61], "\ubaa8\ub378\uacfc\uc758": 61, "\uc6d4\ub4f1\ud558\uac8c": 61, "\uce21\uc815\ud558\uc600\ub2e4\uace0": 61, "mvdream": 61, "repositori": 62, "team": 62, "aim": 62, "them": 62, "conduct": 62, "pseudolab": 62, "\ub9e4\uc8fc": 62, "\uc218\uc694\uc77c": 62, "\uc624\ud6c4": 62, "9\uc2dc": 62, "\uac00\uc9dc\uc5f0\uad6c\uc18c": 62, "discord": 62, "room": 62, "dh": 62, "\uc785\uc7a5": 62, "diffinject": 62, "revisit": 62, "debia": 62, "dongjun": 62, "namjun": 62, "jaekwang": 62, "workshop": 62, "preliminari": 62, "\uc870\uc0c1\uc6b0": 62, "linkedin": 62, "\ubb38\uad11\uc218": 62, "\uae40\uc9c0\uc218": 62, "\ubc15\ubc94\uc218": 62, "\uc9c0\uc2b9\ud658": 62, "\uace0\ub3d9\uadfc": 62, "\uc870\ub0a8\uacbd": 62, "\uae40\uc120\ud6c8": 62, "\uc774\uc900\ud615": 62, "\uc870\ud615\uc11c": 62, "\uc720\uc815\ud654": 62, "\ubc15\uc138\ud658": 62, "\uc1a1\uac74\ud559": 62, "\ud55c\ub3d9\ud604": 62, "\uc774\ucc3d\ud658": 62, "\uc720\uacbd\ubbfc": 62, "linkdedin": 62, "\uc774\uc815\uc778": 62, "jsonc": [], "foo": [], "hello": [], "20157a380d107c80debae0d42ab32bd4da": [], "201": [], "splat": [], "td": [], "202": [], "203": [], "204": [], "205": [], "3dg": 20, "206": [], "207": [], "208": [], "towardsdatasci": [], "e7d570081362": [], "4cd8": [], "patapom": [], "shportal": [], "3dvar": [], "green2003spher": [], "209": [], "wiki": [], "table_of_spherical_harmon": [], "2011": [], "rotating_spherical_harmon": [], "gif": [], "2012": [], "2013": [], "2016": [], "marching_cub": [], "nvdiffrast": 20, "03277": [], "nvlab": [], "tab": [], "readm": [], "ov": [], "file": [], "zero123": 8, "columbia": [], "notion": [], "05eb365e0ece43c0bc55ef21a8d4c6f0": [], "pv": [], "2025": 8, "2026": [], "meshlab": [], "2027": [], "2028": [], "2029": [], "2030": [], "2031": [], "2032": [], "epd": [], "gov": [], "hk": [], "eia": [], "regist": [], "eiareport": [], "eia_2522017": [], "2033": [], "2034": [], "2035": [], "2036": [], "2037": [], "2038": [], "2039": [], "2040": [], "360\uc758": 20, "strongzero": [], "image_24": [], "400px": [], "image_25": [], "\ucf54\ub108\uc5d0\uc11c": [], "\uc591": 20, "image_7": [], "image_8": [], "gs": [], "novemb": [], "dm\uc774": 36, "representaiton\uc758": 36, "\ub9de\ucd98\ub2e4": [], "\uc804\ub2ec": 8, "spotlight": 36, "16213": 36, "thu": 36, "vsd": 36, "appendix\ub97c": 36, "\ud074\ub54c": 36, "vsd\uc640": 36, "vsd\uac00": 36, "sds\uc5d0": 36, "\ud45c\ud604\ub825\uc774": 36, "\uc88b\uc744\uac83\uc73c\ub85c": 36, "\ubbff\ub294\ub2e4\uace0": 36, "\ud488\uc774": 36, "\uc790\ub3d9\ud654\ud558\ub294": 36, "\uc720\ub9dd\ud558\uba70": 36, "encompass": 36, "game": 36, "paradigm\uc758": 36, "\uc77c\uc73c\ud0ac": 36, "\uc0dd\uc131\ub825\uc744": 36, "fusion\uc740": 36, "\ubc29\ud5a5\uc5d0\uc11c": 36, "\uc774\ubbf8\uc9c0\ub77c\ub3c4": 36, "\ud55c\uac83\uc73c\ub85c\uc368": 36, "evaluate\ub418\uc5c8\ub2e4": 36, "satuat": 36, "\ubc1c\uc0dd\ud588\ub2e4": 36, "\ud30c\uc545\uc774": 36, "systemat": 36, "\ucde8\uae09\ud558\uba70": 36, "\ucde8\uae09\ud55c": 36, "sds\uc640\ub294": 36, "vsd\ub294": 36, "\ucd5c\uc801\ud654\ud558\uba70": 36, "images\uc758": 36, "divergence\uc5d0": 36, "formulation\uc5d0\uc11c": 36, "vs\ub294": 36, "\uc6d0\ubb38\ucc38\uace0": 36, "optimizesa": 36, "induc": 36, "close": 36, "see": 36, "formul": 36, "character": 36, "phenomenon": 36, "potenti": 36, "formulation\uc744": 36, "\ud480\uae30": 36, "particl": 36, "\ub3c4\uc785\ud558\uace0": 36, "particles\ub85c": 36, "\ud45c\ud604\ud558\uc600\ub2e4": 36, "wasserstein": 36, "rule\uc744": 36, "\ub0c8\ub2e4": 36, "particles\uac00": 36, "sample\ub41c": 36, "\uac83\uc784\uc744": 36, "\ud544\uc694\ud55c\ub370": 36, "particles\uacfc": 36, "sds\ub294": 36, "dirac": 36, "vsd\ub77c\uace0": 36, "sds\uac00": 36, "\uc774\uc720\ub97c": 36, "particle\ub9cc\uc73c\ub85c\ub3c4": 36, "\uc7a0\uc7ac\uc801\uc73c\ub85c": 36, "sds\ubcf4\ub2e4": 36, "\uc81c\uacf5\ud560": 36, "vsd\ub97c": 36, "\uc694\uc18c\ub9cc": 36, "\ub2f4\uc558\ub2e4": 36, "\uace0\uc804": 36, "3d\uc5f0\uad6c\uacfc": 36, "smoothing\ubb38\uc81c\ub97c": 36, "4\ub294": 36, "\uc694\uc18c\ub4e4\uc5d0": 36, "\ub2f4\uace0": 36, "\ud6c8\ub828\uacfc\uc815": 36, "cene": 36, "study\ub294": 36, "vsd\uc5d0": 36, "\ud6a8\uacfc\uc801\uc784\uc744": 36, "\uacb0\ub860\uc801\uc73c\ub85c": 36, "\uc218\uc788\uc73c\uba70": 36, "prolificdreamer\uc758": 36, "\ub2a5\ub825\uacfc": 36, "structure\uc640": 36, "effects\ub97c": 36, "prolificdreamer\ub294": 36, "\uc804\ubc29\ud5a5\uc744": 36, "\uc131\uacf5\ud588\ub2e4": 36, "nerf\ub85c": 36, "prolificdreamer\ub85c": 36, "\uc138\uc138\ud558\uace0": 36, "mesh\ub4e4\uc744": 36, "\uc218\uc788\ub2e4": 36, "gradual": 36, "p_0": 36, "alpha_tx_0": 36, "q_0": 36, "dx_0": 36, "hyperparameter\ub85c": 36, "sigma_0": 36, "alpha_1": 36, "clean": 36, "sample\ub4e4\uc744": 36, "\uadf8\ub9b4": 36, "approximating\ud558\ub294": 36, "\ub370\uc5d0\ub3c4": 36, "\uc0ac\uc6a9\uac00\ub2a5\ud558\ub2e4": 36, "generation\uc73c\ub85c": 36, "guidence\ub97c": 36, "\uc62c\ub77c\uac00\uc9c0\ub9cc": 36, "sjc": 36, "\ub77c\uace0\ub3c4": 36, "\ubd88\ub9ac\uba70": 36, "magic3d": 36, "fantasia3d": [8, 36], "metric\uc740": 36, "space\uc774\ub2e4": 36, "\uc8fc\uc5b4\uc9c4\ub2e4\uace0": 36, "multilay": 36, "perceptron\uc744": 36, "rays\ub85c": 36, "\uc815\uc758\ub418\uba70": 36, "points\uc758": 36, "\uac00\uc911\ud569\ud558\uc5ec": 36, "\uacb0\uc815\ud55c\ub2e4": 36, "nerf\ub294": 36, "\uc720\uc5f0\ud558\uace0": 36, "\uc7a5\uba74\ub3c4": 36, "\ud45c\ud604\uac00\ub2a5\ud558\ub2e4": 36, "mesh\uc640": 36, "color\ub85c": 36, "meshes\uc758": 36, "ray\uac00": 36, "\uc9c0\ub098\ub294": 36, "intersection\uc758": 36, "\uacc4\uc0b0\ud568\uc73c\ub85c\uc368": 36, "\ub80c\ub354\ub9c1\uc774": 36, "represetation\uc5d0": 36, "\ud655\ub960\ubc00\ub3c4": 36, "distribution\uc774\ub2e4": 36, "dm\uc744": 36, "sample\uacfc": 36, "\uc881\ud788\ub294": 36, "prob\uc744": 36, "\ud478\ub294\uac83\uc740": 36, "\ube44\ud6a8\uc728\uc801\uc774\uae30": 36, "problem\uc744": 36, "\uc810\uc810": 36, "\ucee4\uc9c8\ub54c": 36, "distribution\uc774": 36, "\uac00\uae4c\uc6cc": 36, "\uc9c0\uba70": 36, "\uc26c\uc6cc\uc9c4\ub2e4": 36, "\ubc14\ub010": 36, "argmin": 36, "tag5": 36, "\ud480\uae30\uc704\ud574": 36, "\ud480": 36, "\ubcf5\uc7a1\ud574\uc9c4\ub2e4": 36, "particles\ub97c": 36, "particles\uc744": 36, "\uc218\ub834\ub418\uace0": 36, "\ucd5c\uc801": 36, "alpha_tg": 36, "triangleq": 36, "special": 36, "vsd\uc758": 36, "case\uc5d0": 36, "mutlipl": 36, "particles\uc77c": 36, "particle\uc744": 36, "particle\uc774": 36, "\ubb58\uae4c": 36, "\ub290\ub08c\uc801\uc778": 36, "\ubc1b\uc544\ubcf4\uc790": 36, "friendli": [8, 36], "\ucd94\ucd9c\ud558\uace0\uc790": 36, "sampling\uc5d0\uc11c": 36, "2d\uc774\ubbf8\uc9c0": 36, "solver\uc5d0\uc11c": 36, "\uc870\uc808\ud558\uba74\uc11c": 36, "sds\ub3c4": 36, "\uc774\uc810\uc740": 36, "\ub9c8\ucc2c\uac00\uc9c0": 36, "\ud074\ub54c\ub9cc": 36, "\uc720\ud6a8\ud55c": 36, "isol": 36, "\ud45c\ud604\ub825\ub9cc": 36, "appendix\uc758": 36, "\uc0dd\uc131\uc73c\ub85c": 36, "\uc138\ubd80\ud45c\ud604\uc774": 36, "visualization\uc5d0\uc11c\ub3c4": 36, "\ucc28\uc774\uc810\uc744": 36, "approch": 36, "\uac1c\uc120\ud558\uace0\uc790": 36, "1st": 36, "prolificdreamer\uc5d0\uc11c\ub294": 36, "density\ub294": 36, "coordinate\uc774\ub2e4": 36, "scene\uc5d0\uc11c\ub294": 36, "magic3d\uc758": 36, "\ube44\uc5b4\uc788\ub3c4\ub85d": 36, "camera\ub97c": 36, "\ub458\ub7ec": 36, "\uc2f8\ub3c4\ub85d": 36, "2\ub2e8\uacc4": 36, "annealing\uc744": 36, "objective\uc5d0": 36, "sds\ub098": 36, "\uc801\uc6a9\uac00\ub2a5\ud558\ub2e4": 36, "\uc2a4\ud15d\uc5d0\uc11c\ub294": 36, "t\uac00": 36, "divergence\uac00": 36, "\ud559\uc2b5\ucd08\uae30\uc5d0": 36, "\uc870\uc815\uc774": 36, "\ucc28\ub97c": 36, "2nd": 36, "fantasia3d\uc758": 36, "\ub530\ub790\ub294\ub370": 36, "geometry\uc640": 36, "geometry\ub97c": 36, "\uc2e4\ud5d8\uacb0\uacfc\uc5d0\uc11c": 36, "\uc774\ub2e8\uacc4\uc5d0\uc11c\ub294": 36, "\ud06c\uc9c0\uc54a\uc544": 36, "fantasia3d\uc640": 36, "sds\uc5d0\uc11c": 36, "\ucd94\uac00\ud558\uba70": 36, "\uc0c1\uc2b9": 36, "\ud558\ub77d\ud558\ub294": 36, "\uc791\uc744\ub54c\uc5d0\ub3c4": 36, "\uc218\uc788\uae30": 36, "diversity\uc758": 36, "\ucee4\uc57c\ub9cc": 36, "\ud558\ub77d\ud560": 36, "\uc218\ubc16\uc5d0": 36, "\uac10\uc0c1": 36, "wgan": 36, "geco": 36, "dreamer\uc758": 36, "\ubcf4\uc644\ud574": 36, "image_9": [], "prolificdreamer_9": [], "image_10": [], "prolificdreamer_10": [], "image_11": [], "prolificdreamer_11": [], "\ub9de\ucdb0\uac00\ub294": 36, "fusion\uacfc": [], "\uac1c\uc120\ub41c": 36, "\uc81c\uc548\ud558\uace0\uc790\ud55c": 36, "dreamer\uc640": 36, "siggraph": 8, "2405": 8, "08054": 8, "zju3dv": 8, "januari": 8, "\uce5c\ud654\uc801\uc778": 8, "\uc81c\uc5b4": 8, "\uac00\uc838\uc57c": 8, "interactive\ud558\uac8c": 8, "ui": 8, "\uac04\ub2e8": 8, "respons": 8, "cad": 8, "wonder3d": 8, "\ubc29\ubc95\ub4e4\uc758": 8, "\uc57c\ub204\uc2a4": 8, "\uc804\ucc98\ub9ac": 8, "preprocess": 8, "n_v": 8, "pose\uc5d0": 8, "voxelize\ub97c": 8, "f_v": 8, "candidates\ub97c": 8, "condition\ub3c4": 8, "mv": 8, "fusion\uc744": 8, "f_l": 8, "vp": 8, "mvconv": 8, "3dconv": 8, "vm": 8, "\uc644\uc131": 8, "volume\uc744": 8, "f_u": 8, "proxies\ub85c": 8, "sampling\ud558\uace0": 8, "\uc0d8\ud50c\ub9c1\ud568": 8, "timestamp": 8, "noise\ub3c4": 8, "epsilon_i": 8, "conditionin\uc774": 8, "\uace0\ub824\ud574\uc11c": 8, "\ud3b8\uc9d1\uc744": 8, "\uac04\ub2e8\ud558\uc9c0": 8, "pathwai": 8, "\uad6c\uc131\ud568": 8, "preview\ub97c": 8, "\ud655\uc778\ud558\uace0": 8, "\uac80\uc0ac": 8, "\uce90\uc2f1\ud568": 8, "\ud504\ub9ac\ubdf0": 8, "\ub80c\ub354\ub9c1\ud560": 8, "cache\ub97c": 8, "cache\uc5d0\uc11c": 8, "\uc800\uc7a5\ub41c": 8, "reconstruction\uc740": 8, "viewpoint\uac00": 8, "unexpect": 8, "\ub9cc\ub4e4\uc5b4\uc838": 8, "\ubb49\uac1c\uc9c0\uac70\ub098": 8, "\uc62c\ub9bc": 8, "\uc758\uacac": 8, "multiview\uac00": 8, "\ub9cc\ub4e4\uc5b4\uc9c4\ub2e4": 8, "\ubcf4\uc5ec\uc9d0": 8, "delta_x": 8, "wonder3d\uc640": 8, "\uc65c\uace1\uc774\ub098": 8, "\ucd5c\uc18c\ud654\ub428": 8, "syncdreamer\ub294": 8, "\uac70\ubd81\uc774\uc640": 8, "\ubd88\uc77c\uce58\ub97c": 8, "ours\ub294": 8, "\ud604\uc2e4\uc801\uc774\uace0": 8, "\uc7ac\uad6c\uc131\ud558\uba70": 8, "\uc804\ud658\uacfc": 8, "\uc815\ubc00\ud55c": 8, "wonder3d\ub294": 8, "\ub098\ud0c0\ub098\uace0": 8, "\uac15\ud654\ud568": 8, "\uc758\uc790\uac00": 8, "\ud718\uac70\ub098": 8, "\uc77c\uadf8\ub7ec\uc9d0": 8, "ours\uc758": 8, "\ubbf8\uc801": 8, "\ub3c4\ub11b": 8, "\ub3cb\ubcf4\uc784": 8, "\uc778\uacf5\uc801\uc774\uac70\ub098": 8, "\uc0ac\ud56d\uc774": 8, "\ub9ce\uc74c": 8, "\ucf54\uc2a4": 8, "\uc250\uc774\ud504\uc5d0\uc11c": 8, "\uc2dc\uc791\ud558\uc9c0\ub9cc": 8, "\uac1c\uc120\ud568": 8, "\uba85\ud655\ud558\uace0": 8, "\uacbd\uc7c1": 8, "\ubc29\ubc95\ub860\ubcf4\ub2e4": 8, "\ud488\uc9c8\uc5d0\uc11c": 8, "\uc815\ubcf4\uc5d0\uc11c": 8, "\ub2e8\uc21c\ud654\ub418\uace0": 8, "\ubd88\uc77c\uce58\ud568": 8, "richardson": 8, "guidelin": 8, "30\uba85": 8, "35\uac1c\uc758": 8, "case\ub97c": 8, "3\uc810": 8, "\ucd94\uac00\uc2dc": 8, "artifacts\uac00": 8, "\uc2a4\ubb34\uc2a4\ud558\uace0": 8, "\ud14d\uc2a4\uccd0\ub97c": 8, "dilation\uc774": 8, "rendering\uc774": 8, "proxy\uc640": 8, "dilation\uc744": 8, "block\ub9cc": 8, "comfyui": 8}, "objects": {}, "objtypes": {}, "objnames": {}, "titleterms": {"inform": [0, 1, 2, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61], "synthet": [0, 3, 42], "data": [0, 4, 7, 14, 17, 18, 42], "stabl": [0, 4, 5, 21, 27, 44, 59], "diffus": [0, 4, 5, 8, 11, 14, 15, 16, 18, 19, 21, 23, 25, 26, 27, 28, 29, 35, 36, 37, 39, 40, 42, 43, 44, 45, 46, 49, 50, 52, 54, 55, 57, 58, 59], "foliar": 0, "diseas": 0, "classif": [0, 42], "1": [0, 3, 4, 5, 7, 8, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 23, 25, 27, 29, 30, 31, 32, 33, 34, 35, 36, 37, 39, 40, 42, 44, 46, 48, 49, 54, 55, 56, 58, 59, 61], "\uac1c\uc694": 0, "2": [0, 3, 4, 5, 7, 8, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 23, 25, 27, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 42, 44, 46, 48, 49, 54, 55, 56, 58, 59, 61], "baselin": [0, 47], "\uad6c\ucd95": 0, "3": [0, 3, 4, 5, 7, 8, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 23, 25, 27, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 42, 44, 46, 48, 49, 54, 55, 56, 58, 59, 61], "fine": [0, 7, 11, 23, 27, 42, 44, 50, 54, 55], "tune": [0, 7, 11, 23, 27, 42, 44, 50, 54], "4": [0, 3, 4, 5, 7, 8, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 23, 25, 27, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 42, 44, 46, 49, 54, 55, 56, 58, 59, 61], "\uc131\ub2a5": 0, "\ube44\uad50": [0, 8, 15, 43], "5": [0, 3, 4, 5, 8, 11, 13, 16, 17, 18, 20, 21, 25, 29, 30, 31, 33, 34, 35, 36, 38, 39, 40, 42, 46, 49, 55, 56, 58, 61], "discuss": [0, 11, 20, 34, 56], "6": [0, 4, 8, 13, 16, 21, 25, 34, 35, 39, 42, 46, 49, 55], "appendix": [0, 1, 34, 50], "train": [1, 4, 5, 7, 8, 10, 11, 14, 15, 17, 23, 31, 32, 33, 36, 38, 42, 46, 47, 48, 51, 56, 58, 60], "dreambooth": [1, 18, 24, 44, 50], "naver": 1, "webtoon": 1, "face": [1, 40], "dataset": [1, 3, 21, 31, 34, 35, 39, 47, 54], "introduct": [1, 3, 4, 5, 7, 8, 9, 11, 13, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 34, 35, 36, 37, 38, 39, 40, 42, 43, 46, 48, 49, 50, 51, 52, 54, 55, 56, 57, 58, 59, 60, 61], "ablat": [1, 4, 5, 8, 14, 20, 21, 26, 32, 34, 35, 36, 47, 50, 52, 56, 57, 59], "studi": [1, 3, 4, 5, 8, 20, 21, 26, 32, 34, 36, 47, 50, 52, 54, 56, 57, 59], "prior": [1, 4, 48, 55], "preserv": [1, 56], "loss": [1, 8, 14, 47, 57, 58], "neg": [1, 4, 9], "prompt": [1, 26, 35], "instanc": 1, "guidanc": [1, 7, 21, 23, 33, 49, 52, 54], "scale": [1, 14, 15, 22, 25, 35, 42, 54, 61], "3d": [2, 8, 18, 19, 20, 31, 35, 36, 39, 55], "gaussian": [2, 20], "splat": [2, 20], "real": [2, 42], "time": [2, 54, 58], "radianc": [2, 34, 39], "field": [2, 34, 39], "render": [2, 19, 34, 39], "overview": [2, 21, 47, 48, 57, 59], "differenti": 2, "optim": [2, 18, 34, 55, 59], "adapt": [2, 4, 9, 11, 26, 27, 49, 59], "densiti": [2, 20], "control": [2, 4, 10, 55], "fast": [2, 24, 58], "diffenrenti": 2, "raster": 2, "result": [2, 4, 5, 7, 8, 9, 10, 18, 21, 23, 26, 31, 33, 34, 35, 36, 39, 42, 47, 48, 49, 51, 52, 54, 57], "evalu": [2, 3, 7, 9, 15, 28, 31, 32, 39, 47, 52], "limit": [2, 5, 9, 11, 18, 20, 21, 32, 35, 39, 43, 47, 48, 49, 50, 56, 57], "A": [3, 27, 31, 33, 34, 35], "gener": [3, 4, 5, 7, 8, 9, 11, 13, 16, 18, 20, 29, 35, 36, 37, 42, 44, 46, 54, 55, 61], "model": [3, 4, 5, 7, 11, 14, 15, 16, 17, 18, 19, 21, 23, 25, 26, 27, 29, 31, 32, 33, 35, 36, 37, 38, 39, 40, 42, 43, 44, 45, 46, 49, 50, 52, 54, 55, 56, 57, 58, 59], "\ud559\uc2b5": 3, "\uc790\ub8cc": 3, "0": [3, 4, 18, 30, 34, 39], "abstract": [3, 4, 5, 7, 11, 13, 15, 16, 17, 18, 19, 20, 22, 23, 25, 28, 30, 34, 35, 37, 38, 39, 40, 43, 44, 47, 49], "background": [3, 13, 14, 16, 21, 23, 35, 36, 39, 42, 47, 48, 49, 55, 58], "kl": 3, "diverg": 3, "kullback": 3, "leibler": 3, "incept": 3, "score": [3, 19, 36, 37, 42], "IS": [3, 42], "fid": [3, 42], "fr\u00e9chet": 3, "distanc": [3, 39], "kernel": 3, "clean": 3, "benchmark": [3, 32], "comparison": [3, 4, 5, 9, 16, 20, 21, 24, 25, 26, 32, 34, 35, 36, 39, 47, 56, 57, 59, 61], "between": 3, "metric": [3, 31], "Is": 3, "all": 3, "we": [3, 19], "need": 3, "animatediff": 4, "relat": [4, 5, 8, 9, 11, 15, 18, 20, 21, 24, 26, 27, 29, 34, 35, 37, 39, 40, 42, 43, 47, 56], "work": [4, 5, 8, 9, 11, 15, 18, 20, 21, 24, 26, 27, 29, 31, 32, 34, 35, 36, 37, 39, 40, 42, 43, 47, 48, 49, 56], "text": [4, 7, 11, 19, 20, 22, 23, 26, 31, 33, 36, 43, 44, 50, 54, 56, 61], "imag": [4, 5, 7, 10, 11, 15, 16, 20, 21, 22, 23, 26, 29, 31, 32, 44, 46, 47, 49, 50, 54, 56, 61], "person": [4, 18, 44], "t2i": [4, 18, 59], "anim": [4, 5, 21], "preliminari": [4, 5, 18, 26, 54, 57, 59], "low": [4, 27, 30], "rank": [4, 24, 27, 30], "lora": [4, 27, 30], "allevi": 4, "effect": [4, 54], "from": [4, 13, 35, 36, 42, 52], "domain": 4, "learn": [4, 11, 48], "motion": [4, 17], "modul": [4, 27], "new": 4, "pattern": 4, "motionlora": 4, "practic": 4, "infer": [4, 13, 17, 30, 33, 36], "experi": [4, 5, 9, 11, 13, 14, 16, 18, 19, 20, 21, 22, 24, 26, 29, 30, 31, 32, 34, 36, 37, 40, 44, 46, 50, 51, 54, 55, 56, 58, 59, 60], "qualit": [4, 5, 9, 20, 21, 31, 32, 35, 52], "quantit": [4, 7, 8, 9, 16, 20, 21, 31], "design": [4, 15, 16, 36, 59], "effici": [4, 15, 20, 27], "conclus": [4, 8, 9, 15, 16, 18, 20, 21, 24, 26, 27, 35, 39, 40, 42, 48, 52, 54, 55, 57], "7": [4, 21, 33, 35, 39, 49, 55], "\uc2e4\uc2b5": 4, "anyon": 5, "video": [5, 21, 31, 44], "human": [5, 31, 52], "method": [5, 8, 9, 11, 20, 21, 22, 24, 26, 30, 31, 32, 35, 37, 39, 40, 43, 47, 56, 57, 59], "network": [5, 31, 41, 47], "architectur": [5, 15, 17, 21, 33, 47, 49], "strategi": [5, 7, 32], "implement": [5, 10, 20, 21, 32, 34, 47, 59], "bbdm": 6, "cm3leon": 7, "pretrain": [7, 27, 52], "token": [7, 33], "retriev": 7, "augment": [7, 54], "object": [7, 9, 14, 47, 58], "function": [7, 14, 39, 47], "To": [7, 31, 54], "import": 7, "decod": [7, 14, 15, 33, 39], "temperatur": 7, "sampl": [7, 13, 14, 18, 19, 25, 29, 34, 36, 42, 58], "topp": 7, "classifi": [7, 21, 23, 33, 45, 49, 52], "free": [7, 21, 23, 33, 52], "cfg": 7, "contrast": 7, "topk": 7, "cd": 7, "k": 7, "supervis": 7, "instruct": 7, "guid": [7, 8, 23, 43, 54], "edit": [7, 8, 11, 16, 32, 46], "ground": [7, 28], "spatial": 7, "caption": 7, "visual": [7, 15, 48], "question": 7, "answer": 7, "task": [7, 27], "conceptlab": 9, "prelimiari": [9, 24], "The": [9, 19, 54], "constraint": 9, "regular": [9, 41], "evolutionari": 9, "creativ": 9, "concept": [9, 11], "mix": [9, 41], "setup": [9, 15, 26, 43], "controlnet": [10, 61], "addit": [10, 30, 34, 47], "base": [10, 33, 37], "condit": [8, 10, 15, 21, 23, 29, 38, 56, 61], "block": [10, 15, 17], "zero": [10, 32, 45, 46], "convolut": [10, 29, 31], "custom": [11, 54], "deep": 11, "transfer": 11, "singl": 11, "multipl": [11, 21], "composit": 11, "detail": [11, 20, 21, 32, 34, 47, 48, 56, 59], "dall": [12, 43, 48], "e": [12, 35, 39, 43, 48], "ddim": [13, 16, 49], "ddpm": [13, 14, 16, 25, 49, 58], "variat": [13, 36, 41, 60], "For": 13, "non": 13, "markovian": 13, "forward": [13, 14], "process": [13, 14, 16], "code": 13, "q": [14, 33], "mathbf": 14, "x": 14, "_t": 14, "_": 14, "t": [14, 30], "revers": [14, 16], "p": 14, "l": 14, "denois": [14, 16, 25], "encod": [14, 21, 29, 33, 34, 39], "l_t": 14, "l_": 14, "l_0": 14, "simplifi": 14, "qualiti": [14, 16, 42, 47], "parameter": [14, 30, 58], "dit": 15, "complex": [15, 35], "latent": [15, 16, 27, 29, 39, 43, 44, 54], "transform": [15, 29, 30, 48], "space": [15, 16, 19, 20, 36, 54], "patchifi": 15, "experiment": [15, 26], "set": [15, 31], "class": 15, "genert": 15, "size": [15, 25], "patch": 15, "gflop": 15, "ar": 15, "critic": 15, "improv": [15, 25, 38, 42, 49, 61], "perform": 15, "larger": 15, "more": [15, 26], "comput": 15, "alreadi": 16, "have": 16, "semant": [16, 33], "probabl": [16, 58], "implicit": 16, "manipul": 16, "clip": [16, 21, 23], "discov": 16, "In": 16, "problem": [16, 30], "asymmetr": 16, "asyrp": 16, "h": 16, "neural": [16, 19, 34, 39], "direct": 16, "With": 16, "boost": 16, "stochast": [16, 41, 60], "nois": [16, 23, 61], "inject": 16, "overal": [8, 16, 32], "versatil": 16, "analysi": [16, 21, 32, 47], "dreamov": 17, "collect": 17, "preprocess": 17, "content": [17, 62], "guider": 17, "dream": 18, "booth": 18, "approach": 18, "goal": 18, "\ud14d\uc2a4\ud2b8": 18, "\ud504\ub86c\ud504\ud2b8\uc5d0": 18, "\ucda9\uc2e4\ud558\uba74\uc11c": 18, "\uc8fc\uc5b4\uc9c4": 18, "subject": 18, "\uc758": 18, "ident": 18, "\uae30\ud558": 18, "\ud615\ud0dc": 18, "\ubc0f": 18, "\uc678\uad00": 18, "\uc744": 18, "\ubc18\uc601\ud558\ub294": 18, "asset": 18, "\uc0dd\uc131": [8, 18], "dreamfus": [18, 19, 55], "failur": 18, "naiv": 18, "fusion": 18, "dreambooth3d": 18, "stage": [18, 38, 48, 54], "partial": 18, "multi": [18, 38, 61], "view": [18, 34, 35, 61], "stage3": 18, "final": 18, "nerf": [18, 34, 39], "applic": [18, 50, 59], "distil": [19, 27, 36, 46, 54, 57, 58], "how": 19, "can": 19, "paramet": [19, 27, 38, 42, 49], "pixel": 19, "algorithm": [19, 49, 57, 60], "synthesi": [19, 21, 22, 34, 35, 43, 44, 49], "dreamgaussian": 20, "creation": [], "represent": [20, 34, 36], "3dg": [], "arxiv": 35, "march": [], "cube": [], "nvdiffrast": [], "github": [], "project": 20, "page": [], "mesh": [20, 35], "extract": 20, "local": [20, 61], "queri": 20, "color": 20, "back": 20, "uv": 20, "textur": [20, 39], "refin": [20, 38], "dreampos": 21, "fashion": 21, "via": [21, 36, 46], "still": 21, "mechan": 21, "split": 21, "vae": [21, 48, 60], "modifi": 21, "unet": 21, "finetun": [21, 24, 33, 44], "pose": 21, "input": [21, 56], "futur": [21, 35, 36, 39, 49], "8": [21, 33, 39, 49], "up": [22, 24], "gan": [22, 25, 43, 49, 51], "\uc8fc\uc694": [8, 22], "\uc9c8\uc758\uc751\ub2f5": 22, "glide": [23, 35], "inpaint": [23, 29], "hyperdreambooth": 24, "contribut": [24, 31, 33, 52], "lightweight": 24, "lidb": 24, "hypernetwork": 24, "relax": 24, "follow": 24, "i": 25, "probabilist": 25, "log": 25, "likelihood": 25, "improc": 25, "speed": 25, "ip": 26, "larg": [26, 52], "exist": [26, 30], "lcm": 27, "univers": 27, "acceler": [27, 54], "propos": 27, "\uae30\uc874": 27, "\uc5f0\uad6c\uc758": 27, "\ud55c\uacc4\uc810": 27, "\uae30\ubc18": [8, 27], "\uc5f0\uad6c": 27, "consist": [27, 46, 47, 54, 61], "cms\uacfc": 27, "\ucc28\uc774\uc810": 27, "arithmet": 27, "llm": 28, "summari": [28, 32, 51, 60], "On": 29, "perceptu": 29, "compress": 29, "tradeoff": 29, "ldm": [29, 43], "beyond": 29, "256x256": 29, "super": [29, 33, 52], "resolut": [29, 33, 44, 52, 55], "terminolog": 30, "convent": 30, "statement": 30, "aren": 30, "solut": 30, "good": 30, "enough": 30, "our": 30, "updat": [30, 36], "matric": 30, "No": 30, "latenc": 30, "appli": 30, "empir": 30, "ia3": 30, "aa": 30, "\uc0ac\uc6a9\ubc95": 30, "refer": [30, 32, 61], "make": 31, "\uc81c\uc548": 31, "\ubc30\uacbd": 31, "\ud2b9\uc131": 31, "previou": [31, 48], "spatiotempor": 31, "layer": 31, "pseudo": [31, 43], "psuedo": 31, "attent": [31, 61], "frame": [31, 44], "interpol": [31, 44], "automat": 31, "evaluaton": 31, "msr": 31, "vtt": 31, "evluat": 31, "ucf": 31, "101": 31, "\uacb0\ub860": 31, "mimicbrush": 32, "shot": [32, 45, 46], "imit": 32, "three": 32, "line": 32, "pipelin": [8, 32, 48], "structur": 32, "other": [32, 35, 39], "review": 32, "muse": 33, "mask": [8, 33], "pre": 33, "us": [33, 62], "vqgan": 33, "iter": 33, "parallel": 33, "repres": 34, "scene": [34, 55], "volum": [8, 34], "posit": 34, "hierarch": 34, "point": [35, 39], "system": 35, "cloud": 35, "2022": 35, "upsampl": 35, "produc": 35, "sdedit": 37, "sde": 37, "smld": 37, "sdxl": 38, "micro": 38, "crop": 38, "aspect": 38, "autoencod": [38, 44], "put": [38, 61], "everyth": [38, 61], "togeth": [38, 61], "shap": 39, "sign": 39, "stf": 39, "acknowledg": 39, "styo": 40, "styliz": 40, "framework": 40, "stylegan": 41, "map": 41, "style": 41, "adain": 41, "\uc2e4\ud5d8": 41, "\uacb0\uacfc": [41, 43], "imagenet": 42, "imagen": [42, 52, 53], "protocol": 42, "accuraci": 42, "differ": 42, "merg": 42, "textual": 43, "invers": 43, "cf": 43, "\uc774\ud574": 43, "\ubabb\ud568": 43, "embed": 43, "\uc131\ub2a5\ud3c9\uac00": 43, "2\uc640": 43, "word": 43, "\ub450": 43, "\uac1c": 43, "\uc0ac\uc6a9": 43, "bia": 43, "reduct": 43, "\uc815\ub7c9\ud3c9\uac00": 43, "\ud3c9\uac00": 43, "\uc8fc\ubaa9\ud560": 43, "\uc810": 43, "\uc0ac\uc6a9\uc790\ud3c9\uac00": 43, "\ub9c8\ubb34\ub9ac": 43, "videoldm": 44, "turn": 44, "tempor": 44, "predict": 44, "long": 44, "term": 44, "high": [36, 44, 55], "rate": 44, "sr": 44, "drive": 44, "your": 45, "secretli": 45, "isol": 46, "few": 46, "step": [46, 54, 56, 57], "cyclegan": 47, "translat": [47, 56], "mode": 47, "collaps": 47, "adversari": 47, "cycl": 47, "full": [47, 57], "\ucc38\uace0": 47, "least": 47, "squar": 47, "\ucd94\uac00": [20, 47], "\uc124\uba85": 47, "against": 47, "reconstruct": [8, 47], "pair": 47, "discusss": 47, "gpt": 48, "vq": 48, "methodolog": [48, 52], "an": 48, "autoregress": 48, "\uc608\uc2dc": 48, "equat": [8, 48], "\ud559\uc2b5\uacfc\uc815": 48, "codebook": 48, "beat": 49, "group": 49, "normal": 49, "impact": 49, "s": 49, "9": 49, "procedur": 51, "theoret": 51, "t5": 52, "xxl": 52, "cascad": 52, "weight": 52, "sampler": 52, "static": 52, "threshold": 52, "dynam": 52, "drawbench": 52, "tabl": 52, "editor": 53, "One": [54, 56, 57], "solv": 54, "pf": 54, "od": [54, 58], "skip": 54, "abul": 54, "solver": 54, "schedul": [54, 61], "omega": 54, "downstream": 54, "magic3d": 55, "coars": 55, "ad": 56, "unpair": 56, "extens": 56, "distribut": [36, 57], "match": 57, "regress": 57, "main": 57, "unacceler": 57, "progress": [8, 58], "continu": 58, "definit": 58, "ancestr": 58, "flow": 58, "parametr": 58, "intract": 60, "sgvb": 60, "gradient": 60, "bay": 60, "reparameter": 60, "trick": 60, "zero123": 61, "stabil": 61, "global": 61, "flexdiffus": 61, "state": 61, "art": 61, "depth": 61, "welcom": 62, "pseudodiffus": 62, "about": 62, "public": 62, "tech": 62, "blog": 62, "contributor": 62, "\ucc38\uace0\uc790\ub8cc": 20, "prolificdream": 36, "2023": [], "05": [], "fidel": 36, "divers": 36, "dm": 36, "rule": 36, "sd": [8, 36], "prolif": 36, "dreamer": 36, "coin3d": 8, "proxi": 8, "conditiong": 8, "awar": 8, "interact": 8, "workflow": 8, "bound": 8, "part": 8, "preview": 8, "cach": 8, "\ud504\ub85d\uc2dc": 8, "\ubc29\ubc95": 8, "\uad00\ucc30\uc810": 8, "\uc694\uc57d": 8, "dilat": 8}, "envversion": {"sphinx.domains.c": 2, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 6, "sphinx.domains.index": 1, "sphinx.domains.javascript": 2, "sphinx.domains.math": 2, "sphinx.domains.python": 3, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.intersphinx": 1, "sphinx": 56}})
\ No newline at end of file