diff --git a/config.yaml b/config.yaml new file mode 100644 index 0000000..2e7790e --- /dev/null +++ b/config.yaml @@ -0,0 +1,15 @@ +vae: + target: direct3d.models.vae.VAE + params: {} +dit: + target: direct3d.models.dit.DiT + params: {} +semantic_encoder: + target: direct3d.models.condition.SemanticEncoder + params: {} +pixel_encoder: + target: direct3d.models.condition.PixelEncoder + params: {} +scheduler: + target: direct3d.models.condition.Scheduler + params: {} diff --git a/direct3d.egg-info/PKG-INFO b/direct3d.egg-info/PKG-INFO new file mode 100644 index 0000000..3c8a0fc --- /dev/null +++ b/direct3d.egg-info/PKG-INFO @@ -0,0 +1,11 @@ +Metadata-Version: 2.1 +Name: direct3d +Version: 1.0.0 +Summary: Direct3D: Scalable Image-to-3D Generation via 3D Latent Diffusion Transformer +Requires-Python: >=3.10 +License-File: LICENSE +Requires-Dist: torch +Requires-Dist: numpy +Requires-Dist: cython +Requires-Dist: trimesh +Requires-Dist: diffusers diff --git a/direct3d.egg-info/SOURCES.txt b/direct3d.egg-info/SOURCES.txt new file mode 100644 index 0000000..47f821b --- /dev/null +++ b/direct3d.egg-info/SOURCES.txt @@ -0,0 +1,8 @@ +LICENSE +README.md +setup.py +direct3d.egg-info/PKG-INFO +direct3d.egg-info/SOURCES.txt +direct3d.egg-info/dependency_links.txt +direct3d.egg-info/requires.txt +direct3d.egg-info/top_level.txt \ No newline at end of file diff --git a/direct3d.egg-info/dependency_links.txt b/direct3d.egg-info/dependency_links.txt new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/direct3d.egg-info/dependency_links.txt @@ -0,0 +1 @@ + diff --git a/direct3d.egg-info/requires.txt b/direct3d.egg-info/requires.txt new file mode 100644 index 0000000..aa3b459 --- /dev/null +++ b/direct3d.egg-info/requires.txt @@ -0,0 +1,5 @@ +torch +numpy +cython +trimesh +diffusers diff --git a/direct3d.egg-info/top_level.txt b/direct3d.egg-info/top_level.txt new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/direct3d.egg-info/top_level.txt @@ -0,0 +1 @@ + diff --git a/direct3d/models/dit.py b/direct3d/models/dit.py index a584b25..34902d7 100644 --- a/direct3d/models/dit.py +++ b/direct3d/models/dit.py @@ -68,7 +68,8 @@ def get_2d_sincos_pos_embed( grid = np.stack(grid, axis=0) grid = grid.reshape([2, 1, grid_size[0], grid_size[1]]) - pos_embed = get_2d_sincos_pos_embed_from_grid(embed_dim, grid) + grid = torch.from_numpy(grid) + pos_embed = get_2d_sincos_pos_embed_from_grid(embed_dim, grid, output_type='pt') if cls_token and extra_tokens > 0: pos_embed = np.concatenate([np.zeros([extra_tokens, embed_dim]), pos_embed], axis=0) return pos_embed @@ -110,7 +111,7 @@ def __init__( pos_embed = get_2d_sincos_pos_embed( embed_dim, (self.height, self.width), base_size=(self.height, self.width), interpolation_scale=self.interpolation_scale ) - self.register_buffer("pos_embed", torch.from_numpy(pos_embed).float().unsqueeze(0), persistent=False) + self.register_buffer("pos_embed", pos_embed.float().unsqueeze(0), persistent=False) def forward(self, latent): height, width = latent.shape[-2] // self.patch_size, latent.shape[-1] // self.patch_size diff --git a/direct3d/pipeline.py b/direct3d/pipeline.py index a80fd60..53a6364 100644 --- a/direct3d/pipeline.py +++ b/direct3d/pipeline.py @@ -156,4 +156,3 @@ def __call__( outputs = {"meshes": meshes, "latents": latents} return outputs - \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 74c5f41..0e61ca2 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,4 +6,7 @@ einops numpy transformers==4.40.2 diffusers -rembg \ No newline at end of file +rembg +omegaconf +onnxruntime +torchvision