Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
vae:
target: direct3d.models.vae.VAE
params: {}
dit:
target: direct3d.models.dit.DiT
params: {}
semantic_encoder:
target: direct3d.models.condition.SemanticEncoder
params: {}
pixel_encoder:
target: direct3d.models.condition.PixelEncoder
params: {}
scheduler:
target: direct3d.models.condition.Scheduler
params: {}
11 changes: 11 additions & 0 deletions direct3d.egg-info/PKG-INFO
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
Metadata-Version: 2.1
Name: direct3d
Version: 1.0.0
Summary: Direct3D: Scalable Image-to-3D Generation via 3D Latent Diffusion Transformer
Requires-Python: >=3.10
License-File: LICENSE
Requires-Dist: torch
Requires-Dist: numpy
Requires-Dist: cython
Requires-Dist: trimesh
Requires-Dist: diffusers
8 changes: 8 additions & 0 deletions direct3d.egg-info/SOURCES.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
LICENSE
README.md
setup.py
direct3d.egg-info/PKG-INFO
direct3d.egg-info/SOURCES.txt
direct3d.egg-info/dependency_links.txt
direct3d.egg-info/requires.txt
direct3d.egg-info/top_level.txt
1 change: 1 addition & 0 deletions direct3d.egg-info/dependency_links.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@

5 changes: 5 additions & 0 deletions direct3d.egg-info/requires.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
torch
numpy
cython
trimesh
diffusers
1 change: 1 addition & 0 deletions direct3d.egg-info/top_level.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@

5 changes: 3 additions & 2 deletions direct3d/models/dit.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,8 @@ def get_2d_sincos_pos_embed(
grid = np.stack(grid, axis=0)

grid = grid.reshape([2, 1, grid_size[0], grid_size[1]])
pos_embed = get_2d_sincos_pos_embed_from_grid(embed_dim, grid)
grid = torch.from_numpy(grid)
pos_embed = get_2d_sincos_pos_embed_from_grid(embed_dim, grid, output_type='pt')
if cls_token and extra_tokens > 0:
pos_embed = np.concatenate([np.zeros([extra_tokens, embed_dim]), pos_embed], axis=0)
return pos_embed
Expand Down Expand Up @@ -110,7 +111,7 @@ def __init__(
pos_embed = get_2d_sincos_pos_embed(
embed_dim, (self.height, self.width), base_size=(self.height, self.width), interpolation_scale=self.interpolation_scale
)
self.register_buffer("pos_embed", torch.from_numpy(pos_embed).float().unsqueeze(0), persistent=False)
self.register_buffer("pos_embed", pos_embed.float().unsqueeze(0), persistent=False)

def forward(self, latent):
height, width = latent.shape[-2] // self.patch_size, latent.shape[-1] // self.patch_size
Expand Down
1 change: 0 additions & 1 deletion direct3d/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,4 +156,3 @@ def __call__(
outputs = {"meshes": meshes, "latents": latents}

return outputs

5 changes: 4 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,7 @@ einops
numpy
transformers==4.40.2
diffusers
rembg
rembg
omegaconf
onnxruntime
torchvision