-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathgenLabelCocoFormat.py
152 lines (116 loc) · 4.88 KB
/
genLabelCocoFormat.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
import os
import pandas as pd
import numpy as np
from tqdm import tqdm
import json,itertools
from skimage import io, measure
import tifffile as tif
from pycocotools import mask
import argparse
from sklearn.model_selection import KFold
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument(
"--input_images_path", type=str, help="input images path"
)
parser.add_argument(
"--input_labels_path", type=str, help="input labels path"
)
parser.add_argument(
"--output_folder_path", type=str, help="output folder path"
)
args = parser.parse_args()
return args
def read_image(img_path):
if img_path.endswith('.tif') or img_path.endswith('.tiff'):
img_data = tif.imread(img_path)
else:
img_data = io.imread(img_path)
return img_data
def coco_structure(train_df, label_folder='TrainLabels', start_image_id=0, start_ann_id=0):
cat_ids = {'cell': 1}
cats =[{'name':name, 'id':id} for name,id in cat_ids.items()]
global_image_id = start_image_id
global_inst_id = start_ann_id
# image_name_id_mapper = dict()
images = []
annotations=[]
for idx, row in tqdm(train_df.iterrows(), total=len(train_df)):
images.append({'id':global_image_id, 'width':row.width, 'height':row.height, 'file_name':f'{row.img_name}'})
img_name = row.img_name
cell_id = img_name.split('.')[0]
label_path = f'{label_folder}/{cell_id}_label.tiff'
# img = read_image(img_path)
label = read_image(label_path)
instance_ids = np.unique(label)
for ins_id in instance_ids:
if ins_id == 0: # background
continue
bin_mask = np.where(label == ins_id, 1, 0).astype('uint8')
# plt.figure()
# plt.imshow(bin_mask)
# plt.show()
# ann_id = img_name + '_' + str(ins_id) # coco evaluator needs the annotation id to be int
fortran_ground_truth_binary_mask = np.asfortranarray(bin_mask)
encoded_ground_truth = mask.encode(fortran_ground_truth_binary_mask)
ground_truth_area = mask.area(encoded_ground_truth)
ground_truth_bounding_box = mask.toBbox(encoded_ground_truth)
contours = measure.find_contours(bin_mask, 0.5)
annotation = {
"segmentation": [],
"area": ground_truth_area.tolist(),
"iscrowd": 0,
"image_id": global_image_id,
"bbox": ground_truth_bounding_box.tolist(),
"category_id": 1,
"id": global_inst_id
}
for contour in contours:
contour = np.flip(contour, axis=1)
segmentation = contour.ravel().tolist()
if len(segmentation) >= 6: # make sure it is polygon (3 points)
annotation["segmentation"].append(segmentation)
annotations.append(annotation)
# update global instance id
global_inst_id += 1
# update global image id
global_image_id += 1
# break
return {'categories':cats, 'images':images,'annotations':annotations}
def main(args):
# os.makedirs('train_vis', exist_ok=True)
train_img_names = []
train_widths = []
train_heights = []
train_num_cells = []
for img_name in tqdm(os.listdir(args.input_labels_path)):
cell_id = img_name.split('.')[0]
# img_path = f'TrainImagesPNG/{img_name}'
label_path = f'{args.input_labels_path}/{cell_id}.tiff'
label = read_image(label_path)
h, w = label.shape[:2]
train_widths.append(w)
train_heights.append(h)
train_num_cells.append(np.max(label))
train_img_names.append(cell_id[:-6]+'.png')
train_val_meta = pd.DataFrame({'img_name':train_img_names,
'width':train_widths,
'height':train_heights,
'cell_count':train_num_cells})
kfold = KFold(n_splits=5, shuffle=True, random_state=67)
fold = 0
for train_indices, valid_indices in kfold.split(train_val_meta['img_name']):
train_val_meta.loc[valid_indices, 'fold'] = fold
fold += 1
chosen_fold = 0
train_df = train_val_meta[train_val_meta.fold != chosen_fold]
val_df = train_val_meta[train_val_meta.fold == chosen_fold]
train_annotations = coco_structure(train_df, label_folder=args.input_labels_path)
val_annotations = coco_structure(val_df, label_folder=args.input_labels_path)
with open(f'{args.output_folder_path}/coco_annotations/train_annotations_fold{chosen_fold}.json', 'w') as f:
json.dump(train_annotations, f)
with open(f'{args.output_folder_path}/coco_annotations/val_annotations_fold{chosen_fold}.json', 'w') as f:
json.dump(val_annotations, f)
if __name__ == "__main__":
args = parse_args()
main(args)