-
Notifications
You must be signed in to change notification settings - Fork 11
/
Copy pathgrasp_generator.py
191 lines (156 loc) · 7.88 KB
/
grasp_generator.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
import matplotlib.pyplot as plt
import numpy as np
from numpy.lib.npyio import save
import torch.utils.data
from PIL import Image
from datetime import datetime
from network.hardware.device import get_device
#from network.inference.post_process import post_process_output
from network.utils.data.camera_data import CameraData
from network.utils.visualisation.plot import plot_results
from network.utils.dataset_processing.grasp import detect_grasps
from skimage.filters import gaussian
import os
import cv2
class GraspGenerator:
IMG_WIDTH = 224
IMG_ROTATION = -np.pi * 0.5
CAM_ROTATION = 0
PIX_CONVERSION = 277
DIST_BACKGROUND = 1.115
MAX_GRASP = 0.085
def __init__(self, net_path, camera, depth_radius, fig, IMG_WIDTH=224, network='GR_ConvNet', device='cpu'):
if (device=='cpu'):
self.net = torch.load(net_path, map_location=device)
self.device = get_device(force_cpu=True)
else:
#self.net = torch.load(net_path, map_location=lambda storage, loc: storage.cuda(1))
#self.device = get_device()
print ("GPU is not supported yet! :( -- continuing experiment on CPU!" )
self.net = torch.load(net_path, map_location='cpu')
self.device = get_device(force_cpu=True)
# print (self.net)
self.near = camera.near
self.far = camera.far
self.depth_r = depth_radius
self.fig = fig
self.network = network
self.PIX_CONVERSION = 277 * IMG_WIDTH/224
self.IMG_WIDTH = IMG_WIDTH
print (self.IMG_WIDTH)
# Get rotation matrix
img_center = self.IMG_WIDTH / 2 - 0.5
self.img_to_cam = self.get_transform_matrix(-img_center/self.PIX_CONVERSION,
img_center/self.PIX_CONVERSION,
0,
self.IMG_ROTATION)
self.cam_to_robot_base = self.get_transform_matrix(
camera.x, camera.y, camera.z, self.CAM_ROTATION)
def get_transform_matrix(self, x, y, z, rot):
return np.array([
[np.cos(rot), -np.sin(rot), 0, x],
[np.sin(rot), np.cos(rot), 0, y],
[0, 0, 1, z],
[0, 0, 0, 1]
])
def grasp_to_robot_frame(self, grasp, depth_img):
"""
return: x, y, z, roll, opening length gripper, object height
"""
# Get x, y, z of center pixel
x_p, y_p = grasp.center[0], grasp.center[1]
# Get area of depth values around center pixel
x_min = np.clip(x_p-self.depth_r, 0, self.IMG_WIDTH)
x_max = np.clip(x_p+self.depth_r, 0, self.IMG_WIDTH)
y_min = np.clip(y_p-self.depth_r, 0, self.IMG_WIDTH)
y_max = np.clip(y_p+self.depth_r, 0, self.IMG_WIDTH)
depth_values = depth_img[x_min:x_max, y_min:y_max]
# Get minimum depth value from selected area
z_p = np.amin(depth_values)
# Convert pixels to meters
x_p /= self.PIX_CONVERSION
y_p /= self.PIX_CONVERSION
z_p = self.far * self.near / (self.far - (self.far - self.near) * z_p)
# Convert image space to camera's 3D space
img_xyz = np.array([x_p, y_p, -z_p, 1])
cam_space = np.matmul(self.img_to_cam, img_xyz)
# Convert camera's 3D space to robot frame of reference
robot_frame_ref = np.matmul(self.cam_to_robot_base, cam_space)
# Change direction of the angle and rotate by alpha rad
roll = grasp.angle * -1 + (self.IMG_ROTATION)
if roll < -np.pi / 2:
roll += np.pi
# Covert pixel width to gripper width
opening_length = (grasp.length / int(self.MAX_GRASP *
self.PIX_CONVERSION)) * self.MAX_GRASP
obj_height = self.DIST_BACKGROUND - z_p
# return x, y, z, roll, opening length gripper
return robot_frame_ref[0], robot_frame_ref[1], robot_frame_ref[2], roll, opening_length, obj_height
def post_process_output(self, q_img, cos_img, sin_img, width_img, pixels_max_grasp):
"""
Post-process the raw output of the network, convert to numpy arrays, apply filtering.
:param q_img: Q output of network (as torch Tensors)
:param cos_img: cos output of network
:param sin_img: sin output of network
:param width_img: Width output of network
:return: Filtered Q output, Filtered Angle output, Filtered Width output
"""
q_img = q_img.cpu().numpy().squeeze()
ang_img = (torch.atan2(sin_img, cos_img) / 2.0).cpu().numpy().squeeze()
width_img = width_img.cpu().numpy().squeeze() * pixels_max_grasp
q_img = gaussian(q_img, 1.0, preserve_range=True)
ang_img = gaussian(ang_img, 1.0, preserve_range=True)
width_img = gaussian(width_img, 1.0, preserve_range=True)
return q_img, ang_img, width_img
def predict(self, rgb, depth, n_grasps=1, show_output=False):
max_val = np.max(depth)
depth = depth * (255 / max_val)
depth = np.clip((depth - depth.mean())/175, -1, 1)
if (self.network == 'GR_ConvNet'):
##### GR-ConvNet #####
depth = np.expand_dims(np.array(depth), axis=2)
img_data = CameraData(width=self.IMG_WIDTH, height=self.IMG_WIDTH)
x, depth_img, rgb_img = img_data.get_data(rgb=rgb, depth=depth)
else:
print("The selected network has not been implemented yet -- please choose another network!")
exit()
with torch.no_grad():
xc = x.to(self.device)
if (self.network == 'GR_ConvNet'):
##### GR-ConvNet #####
pred = self.net.predict(xc)
# print (pred)
pixels_max_grasp = int(self.MAX_GRASP * self.PIX_CONVERSION)
q_img, ang_img, width_img = self.post_process_output(pred['pos'],
pred['cos'],
pred['sin'],
pred['width'],
pixels_max_grasp)
else:
print ("you need to add your function here!")
save_name = None
if show_output:
#fig = plt.figure(figsize=(10, 10))
im_bgr = cv2.cvtColor(rgb, cv2.COLOR_RGB2BGR)
plot = plot_results(self.fig,
rgb_img=im_bgr,
grasp_q_img=q_img,
grasp_angle_img=ang_img,
depth_img=depth,
no_grasps=3,
grasp_width_img=width_img)
if not os.path.exists('network_output'):
os.mkdir('network_output')
time = datetime.now().strftime('%Y-%m-%d %H-%M-%S')
save_name = 'network_output/{}'.format(time)
plot.savefig(save_name + '.png')
plot.clf()
grasps = detect_grasps(q_img, ang_img, width_img=width_img, no_grasps=n_grasps)
return grasps, save_name
def predict_grasp(self, rgb, depth, n_grasps=1, show_output=False):
predictions, save_name = self.predict(rgb, depth, n_grasps=n_grasps, show_output=show_output)
grasps = []
for grasp in predictions:
x, y, z, roll, opening_len, obj_height = self.grasp_to_robot_frame(grasp, depth)
grasps.append((x, y, z, roll, opening_len, obj_height))
return grasps, save_name