Skip to content

Commit 469bd32

Browse files
committed
Update MVs to Texture projection workflow
1 parent 3b752e6 commit 469bd32

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

61 files changed

+12276
-20
lines changed

Checkpoints/CRM_T2I_V3/Put CRM_T2I_V3 model here.txt

Whitespace-only changes.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
model:
2+
target: CRM_T2I_V2.imagedream.ldm.interface.LatentDiffusionInterface
3+
params:
4+
linear_start: 0.00085
5+
linear_end: 0.0120
6+
timesteps: 1000
7+
scale_factor: 0.18215
8+
parameterization: "eps"
9+
zero_snr: true
10+
11+
unet_config:
12+
target: CRM_T2I_V2.imagedream.ldm.modules.diffusionmodules.openaimodel.MultiViewUNetModelStage2
13+
params:
14+
image_size: 32 # unused
15+
in_channels: 8
16+
out_channels: 4
17+
model_channels: 320
18+
attention_resolutions: [ 4, 2, 1 ]
19+
num_res_blocks: 2
20+
channel_mult: [ 1, 2, 4, 4 ]
21+
num_head_channels: 64 # need to fix for flash-attn
22+
use_spatial_transformer: True
23+
use_linear_in_transformer: True
24+
transformer_depth: 1
25+
context_dim: 1024
26+
use_checkpoint: False
27+
legacy: False
28+
camera_dim: 16
29+
with_ip: True
30+
ip_dim: 16 # ip token length
31+
ip_mode: "local_resample"
32+
33+
vae_config:
34+
target: CRM_T2I_V2.imagedream.ldm.models.autoencoder.AutoencoderKL
35+
params:
36+
embed_dim: 4
37+
monitor: val/rec_loss
38+
ddconfig:
39+
#attn_type: "vanilla-xformers"
40+
double_z: true
41+
z_channels: 4
42+
resolution: 256
43+
in_channels: 3
44+
out_ch: 3
45+
ch: 128
46+
ch_mult:
47+
- 1
48+
- 2
49+
- 4
50+
- 4
51+
num_res_blocks: 2
52+
attn_resolutions: []
53+
dropout: 0.0
54+
lossconfig:
55+
target: torch.nn.Identity
56+
57+
clip_config:
58+
target: CRM_T2I_V2.imagedream.ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder
59+
params:
60+
freeze: True
61+
layer: "penultimate"
62+
ip_mode: "local_resample"
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
model:
2+
target: CRM_T2I_V3.imagedream.ldm.interface.LatentDiffusionInterface
3+
params:
4+
linear_start: 0.00085
5+
linear_end: 0.0120
6+
timesteps: 1000
7+
scale_factor: 0.18215
8+
parameterization: "eps"
9+
zero_snr: true
10+
11+
unet_config:
12+
target: CRM_T2I_V3.imagedream.ldm.modules.diffusionmodules.openaimodel.MultiViewUNetModelHyper
13+
params:
14+
image_size: 32 # unused
15+
in_channels: 4
16+
out_channels: 4
17+
model_channels: 320
18+
attention_resolutions: [ 4, 2, 1 ]
19+
num_res_blocks: 2
20+
channel_mult: [ 1, 2, 4, 4 ]
21+
num_head_channels: 64 # need to fix for flash-attn
22+
use_spatial_transformer: True
23+
use_linear_in_transformer: True
24+
transformer_depth: 1
25+
context_dim: 1024
26+
use_checkpoint: False
27+
legacy: False
28+
camera_dim: 16
29+
with_ip: True
30+
ip_dim: 16 # ip token length
31+
ip_mode: "local_resample"
32+
33+
vae_config:
34+
target: CRM_T2I_V3.imagedream.ldm.models.autoencoder.AutoencoderKL
35+
params:
36+
embed_dim: 4
37+
monitor: val/rec_loss
38+
ddconfig:
39+
#attn_type: "vanilla-xformers"
40+
double_z: true
41+
z_channels: 4
42+
resolution: 256
43+
in_channels: 3
44+
out_ch: 3
45+
ch: 128
46+
ch_mult:
47+
- 1
48+
- 2
49+
- 4
50+
- 4
51+
num_res_blocks: 2
52+
attn_resolutions: []
53+
dropout: 0.0
54+
lossconfig:
55+
target: torch.nn.Identity
56+
57+
clip_config:
58+
target: CRM_T2I_V3.imagedream.ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder
59+
params:
60+
freeze: True
61+
layer: "penultimate"
62+
ip_mode: "local_resample"
63+
64+
sampler:
65+
target: CRM_T2I_V3.libs.sample.ImageDreamDiffusion
66+
params:
67+
mode: pixel
68+
num_frames: 7
69+
camera_views: [1, 2, 3, 4, 5, 0, 0]
70+
ref_position: 6
71+
random_background: false
72+
offset_noise: true
73+
resize_rate: 1.0
74+
input_branch_size: 3

Gen_3D_Modules/CRM_T2I_V2/model/crm/sampler.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ def stage1_sample(
6767

6868
# remove reference views
6969
mask = torch.arange(1, stage1_images.shape[0]+1, 1) % 7 != 0
70-
return stage1_images[mask] # (N, H, W, 3) in [0, 255]
70+
return stage1_images[mask] # (N, H, W, 3) in [0, 1]
7171

7272
@classmethod
7373
def stage2_sample(
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
from .model_zoo import build_model
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
import numpy as np
2+
import torch
3+
4+
5+
def create_camera_to_world_matrix(elevation, azimuth):
6+
elevation = np.radians(elevation)
7+
azimuth = np.radians(azimuth)
8+
# Convert elevation and azimuth angles to Cartesian coordinates on a unit sphere
9+
x = np.cos(elevation) * np.sin(azimuth)
10+
y = np.sin(elevation)
11+
z = np.cos(elevation) * np.cos(azimuth)
12+
13+
# Calculate camera position, target, and up vectors
14+
camera_pos = np.array([x, y, z])
15+
target = np.array([0, 0, 0])
16+
up = np.array([0, 1, 0])
17+
18+
# Construct view matrix
19+
forward = target - camera_pos
20+
forward /= np.linalg.norm(forward)
21+
right = np.cross(forward, up)
22+
right /= np.linalg.norm(right)
23+
new_up = np.cross(right, forward)
24+
new_up /= np.linalg.norm(new_up)
25+
cam2world = np.eye(4)
26+
cam2world[:3, :3] = np.array([right, new_up, -forward]).T
27+
cam2world[:3, 3] = camera_pos
28+
return cam2world
29+
30+
31+
def convert_opengl_to_blender(camera_matrix):
32+
if isinstance(camera_matrix, np.ndarray):
33+
# Construct transformation matrix to convert from OpenGL space to Blender space
34+
flip_yz = np.array([[1, 0, 0, 0], [0, 0, -1, 0], [0, 1, 0, 0], [0, 0, 0, 1]])
35+
camera_matrix_blender = np.dot(flip_yz, camera_matrix)
36+
else:
37+
# Construct transformation matrix to convert from OpenGL space to Blender space
38+
flip_yz = torch.tensor(
39+
[[1, 0, 0, 0], [0, 0, -1, 0], [0, 1, 0, 0], [0, 0, 0, 1]]
40+
)
41+
if camera_matrix.ndim == 3:
42+
flip_yz = flip_yz.unsqueeze(0)
43+
camera_matrix_blender = torch.matmul(flip_yz.to(camera_matrix), camera_matrix)
44+
return camera_matrix_blender
45+
46+
47+
def normalize_camera(camera_matrix):
48+
"""normalize the camera location onto a unit-sphere"""
49+
if isinstance(camera_matrix, np.ndarray):
50+
camera_matrix = camera_matrix.reshape(-1, 4, 4)
51+
translation = camera_matrix[:, :3, 3]
52+
translation = translation / (
53+
np.linalg.norm(translation, axis=1, keepdims=True) + 1e-8
54+
)
55+
camera_matrix[:, :3, 3] = translation
56+
else:
57+
camera_matrix = camera_matrix.reshape(-1, 4, 4)
58+
translation = camera_matrix[:, :3, 3]
59+
translation = translation / (
60+
torch.norm(translation, dim=1, keepdim=True) + 1e-8
61+
)
62+
camera_matrix[:, :3, 3] = translation
63+
return camera_matrix.reshape(-1, 16)
64+
65+
66+
def get_camera(
67+
num_frames,
68+
elevation=15,
69+
azimuth_start=0,
70+
azimuth_span=360,
71+
blender_coord=True,
72+
extra_view=False,
73+
):
74+
angle_gap = azimuth_span / num_frames
75+
cameras = []
76+
for azimuth in np.arange(azimuth_start, azimuth_span + azimuth_start, angle_gap):
77+
camera_matrix = create_camera_to_world_matrix(elevation, azimuth)
78+
if blender_coord:
79+
camera_matrix = convert_opengl_to_blender(camera_matrix)
80+
cameras.append(camera_matrix.flatten())
81+
82+
if extra_view:
83+
dim = len(cameras[0])
84+
cameras.append(np.zeros(dim))
85+
return torch.tensor(np.stack(cameras, 0)).float()
86+
87+
88+
def get_camera_for_index(data_index):
89+
"""
90+
按照当前我们的数据格式, 以000为正对我们的情况:
91+
000是正面, ev: 0, azimuth: 0
92+
001是左边, ev: 0, azimuth: -90
93+
002是下面, ev: -90, azimuth: 0
94+
003是背面, ev: 0, azimuth: 180
95+
004是右边, ev: 0, azimuth: 90
96+
005是上面, ev: 90, azimuth: 0
97+
"""
98+
params = [(0, 0), (0, -90), (-90, 0), (0, 180), (0, 90), (90, 0)]
99+
return get_camera(1, *params[data_index])
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
model:
2+
target: imagedream.ldm.interface.LatentDiffusionInterface
3+
params:
4+
linear_start: 0.00085
5+
linear_end: 0.0120
6+
timesteps: 1000
7+
scale_factor: 0.18215
8+
parameterization: "eps"
9+
10+
unet_config:
11+
target: imagedream.ldm.modules.diffusionmodules.openaimodel.MultiViewUNetModel
12+
params:
13+
image_size: 32 # unused
14+
in_channels: 4
15+
out_channels: 4
16+
model_channels: 320
17+
attention_resolutions: [ 4, 2, 1 ]
18+
num_res_blocks: 2
19+
channel_mult: [ 1, 2, 4, 4 ]
20+
num_head_channels: 64 # need to fix for flash-attn
21+
use_spatial_transformer: True
22+
use_linear_in_transformer: True
23+
transformer_depth: 1
24+
context_dim: 1024
25+
use_checkpoint: False
26+
legacy: False
27+
camera_dim: 16
28+
with_ip: True
29+
ip_dim: 16 # ip token length
30+
ip_mode: "local_resample"
31+
32+
vae_config:
33+
target: imagedream.ldm.models.autoencoder.AutoencoderKL
34+
params:
35+
embed_dim: 4
36+
monitor: val/rec_loss
37+
ddconfig:
38+
#attn_type: "vanilla-xformers"
39+
double_z: true
40+
z_channels: 4
41+
resolution: 256
42+
in_channels: 3
43+
out_ch: 3
44+
ch: 128
45+
ch_mult:
46+
- 1
47+
- 2
48+
- 4
49+
- 4
50+
num_res_blocks: 2
51+
attn_resolutions: []
52+
dropout: 0.0
53+
lossconfig:
54+
target: torch.nn.Identity
55+
56+
clip_config:
57+
target: imagedream.ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder
58+
params:
59+
freeze: True
60+
layer: "penultimate"
61+
ip_mode: "local_resample"
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
model:
2+
target: imagedream.ldm.interface.LatentDiffusionInterface
3+
params:
4+
linear_start: 0.00085
5+
linear_end: 0.0120
6+
timesteps: 1000
7+
scale_factor: 0.18215
8+
parameterization: "eps"
9+
10+
unet_config:
11+
target: imagedream.ldm.modules.diffusionmodules.openaimodel.MultiViewUNetModel
12+
params:
13+
image_size: 32 # unused
14+
in_channels: 8
15+
out_channels: 8
16+
model_channels: 320
17+
attention_resolutions: [ 4, 2, 1 ]
18+
num_res_blocks: 2
19+
channel_mult: [ 1, 2, 4, 4 ]
20+
num_head_channels: 64 # need to fix for flash-attn
21+
use_spatial_transformer: True
22+
use_linear_in_transformer: True
23+
transformer_depth: 1
24+
context_dim: 1024
25+
use_checkpoint: False
26+
legacy: False
27+
camera_dim: 16
28+
with_ip: True
29+
ip_dim: 16 # ip token length
30+
ip_mode: "local_resample"
31+
32+
vae_config:
33+
target: imagedream.ldm.models.autoencoder.AutoencoderKL
34+
params:
35+
embed_dim: 4
36+
monitor: val/rec_loss
37+
ddconfig:
38+
#attn_type: "vanilla-xformers"
39+
double_z: true
40+
z_channels: 4
41+
resolution: 256
42+
in_channels: 3
43+
out_ch: 3
44+
ch: 128
45+
ch_mult:
46+
- 1
47+
- 2
48+
- 4
49+
- 4
50+
num_res_blocks: 2
51+
attn_resolutions: []
52+
dropout: 0.0
53+
lossconfig:
54+
target: torch.nn.Identity
55+
56+
clip_config:
57+
target: imagedream.ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder
58+
params:
59+
freeze: True
60+
layer: "penultimate"
61+
ip_mode: "local_resample"

0 commit comments

Comments
 (0)