|
| 1 | +""" |
| 2 | +pose_format_converter |
| 3 | +--------------------- |
| 4 | +A utility to convert poses using Siren neural networks and visualize the results. |
| 5 | +
|
| 6 | +Modules: |
| 7 | +- numpy |
| 8 | +- pose_format |
| 9 | +- pose_format.utils.siren |
| 10 | +- pose_format.numpy |
| 11 | +- pose_format.pose_visualizer |
| 12 | +
|
| 13 | +Functions: |
| 14 | +- pose_to_siren_to_pose(p: Pose, fps=None) -> Pose |
| 15 | +
|
| 16 | +Example usage: |
| 17 | +$ python pose_format_converter.py |
| 18 | +""" |
| 19 | + |
1 | 20 | import numpy as np
|
2 | 21 | from numpy import ma
|
3 | 22 |
|
|
8 | 27 |
|
9 | 28 |
|
10 | 29 | def pose_to_siren_to_pose(p: Pose, fps=None) -> Pose:
|
11 |
| - p.body.zero_filled() |
12 |
| - mu, std = p.normalize_distribution() |
| 30 | + """ |
| 31 | + Converts a given Pose object to its Siren representation and back to Pose. |
| 32 | +
|
| 33 | + Parameters |
| 34 | + ---------- |
| 35 | + p : Pose |
| 36 | + Input pose to be converted. |
| 37 | + fps : int, optional |
| 38 | + Frames per second for the Siren representation. If None, uses the fps of the input Pose. |
| 39 | +
|
| 40 | + Returns |
| 41 | + ------- |
| 42 | + Pose |
| 43 | + The Pose representation after converting it through the Siren neural network. |
| 44 | +
|
| 45 | + """ |
| 46 | + p.body.zero_filled() |
| 47 | + mu, std = p.normalize_distribution() |
13 | 48 |
|
14 |
| - net = siren.get_pose_siren(p, total_steps=3000, steps_til_summary=100, learning_rate=1e-4, cuda=True) |
| 49 | + net = siren.get_pose_siren(p, total_steps=3000, steps_til_summary=100, learning_rate=1e-4, cuda=True) |
15 | 50 |
|
16 |
| - new_fps = fps if fps is not None else p.body.fps |
17 |
| - coords = siren.PoseDataset.get_coords(time=len(p.body.data) / p.body.fps, fps=new_fps) |
18 |
| - pred = net(coords).cpu().numpy() |
| 51 | + new_fps = fps if fps is not None else p.body.fps |
| 52 | + coords = siren.PoseDataset.get_coords(time=len(p.body.data) / p.body.fps, fps=new_fps) |
| 53 | + pred = net(coords).cpu().numpy() |
19 | 54 |
|
20 |
| - pose_body = NumPyPoseBody(fps=new_fps, data=ma.array(pred), confidence=np.ones(shape=tuple(pred.shape[:3]))) |
21 |
| - p = Pose(header=p.header, body=pose_body) |
22 |
| - p.unnormalize_distribution(mu, std) |
23 |
| - return p |
| 55 | + pose_body = NumPyPoseBody(fps=new_fps, data=ma.array(pred), confidence=np.ones(shape=tuple(pred.shape[:3]))) |
| 56 | + p = Pose(header=p.header, body=pose_body) |
| 57 | + p.unnormalize_distribution(mu, std) |
| 58 | + return p |
24 | 59 |
|
25 | 60 |
|
26 | 61 | if __name__ == "__main__":
|
27 |
| - # pose_path = "/home/nlp/amit/PhD/SpeakerDetection/dgs-korpus/poses/76fb03008e26466f472ef3989232e1cf.pose" |
28 |
| - pose_path = "/home/nlp/amit/PhD/PoseFormat/sample-data/1.pose" |
| 62 | + # Example usage of the pose_to_siren_to_pose function. |
| 63 | + pose_path = "/home/nlp/amit/PhD/PoseFormat/sample-data/1.pose" |
29 | 64 |
|
30 |
| - buffer = open(pose_path, "rb").read() |
31 |
| - p = Pose.read(buffer) |
32 |
| - print("Poses loaded") |
| 65 | + buffer = open(pose_path, "rb").read() |
| 66 | + p = Pose.read(buffer) |
| 67 | + print("Poses loaded") |
33 | 68 |
|
34 |
| - p = pose_to_siren_to_pose(p) |
| 69 | + p = pose_to_siren_to_pose(p) |
35 | 70 |
|
36 |
| - info = p.header.normalization_info( |
37 |
| - p1=("pose_keypoints_2d", "RShoulder"), |
38 |
| - p2=("pose_keypoints_2d", "LShoulder") |
39 |
| - ) |
40 |
| - p.normalize(info, scale_factor=300) |
41 |
| - p.focus() |
| 71 | + info = p.header.normalization_info( |
| 72 | + p1=("pose_keypoints_2d", "RShoulder"), |
| 73 | + p2=("pose_keypoints_2d", "LShoulder") |
| 74 | + ) |
| 75 | + p.normalize(info, scale_factor=300) |
| 76 | + p.focus() |
42 | 77 |
|
43 |
| - v = PoseVisualizer(p) |
44 |
| - v.save_video("reconstructed.mp4", v.draw(max_frames=3000)) |
| 78 | + v = PoseVisualizer(p) |
| 79 | + v.save_video("reconstructed.mp4", v.draw(max_frames=3000)) |
0 commit comments