-
Notifications
You must be signed in to change notification settings - Fork 8
/
Copy pathcreate_dataset.py
74 lines (62 loc) · 2.54 KB
/
create_dataset.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
#!/bin/env python
from scripts.helpers import (
write_file,
image2dicom,
get_name
)
from random import choice
from glob import glob
import os
here = os.getcwd()
# Dcm2k provided in this image https://www.github.com/pydicom/singularity-dicom
dcm = "%s/dcm.img" %(here)
if not os.path.exists(dcm):
print('You must generate the dcm.img with dcm2tk to run the conversion first.')
print('See https://www.github.com/pydicom/singularity-dicom for instructions.')
sys.exit(1)
# These raw datasets are in wordfish standard format
# https://www.github.com/vsoch/wordfish-standard
image_folders = glob('_original/*')
output_folder = "%s/_datasets" %here
if not os.path.exists(output_folder):
os.mkdir(output_folder)
# Keep a lookup dictionary for image folders and images inside
lookup = dict()
# iterate over folders, and generate dicom datasets
for image_folder in image_folders:
series = glob('%s/images/*' %image_folder)
cookie_id = os.path.basename(image_folder)
print("Processing %s" %cookie_id)
lookup[cookie_id] = []
patient_name = get_name()
patient_sex = choice(["M","F"])
cookie_output = os.path.join(output_folder,cookie_id)
if not os.path.exists(cookie_output):
os.mkdir(cookie_output)
image_output = os.path.join(cookie_output,'images')
if not os.path.exists(image_output):
os.mkdir(image_output)
for image_series in series:
images = glob('%s/*.jpg' %image_series)
for image in images:
image_id = os.path.basename(image).replace('.jpg','')
lookup[cookie_id].append(image_id)
dcm_file = os.path.join(image_output,"%s.dcm" %image_id)
image2dicom(input_image=image,
output_dcm=dcm_file,
singularity=dcm,
patient_sex=patient_sex,
patient_name=patient_name,
patient_id=cookie_id)
for cookie_name, images in lookup.items():
# images.txt to describe all images
template='---\ntype: images\ndataset-id: "%s"\nimages:' %(cookie_name)
for image in images:
template = "%s\n - %s.dcm" %(template,image)
template = '%s\n---' %template
output_file = "%s/%s/images.txt" %(output_folder,cookie_name)
write_file(output_file,template)
# metadata,txt for cookie
template = '---\ntype: entity\ndataset-id: "%s"\nhidden: false\n\nincludes:\n - images\n---' %(cookie_name)
output_file = "%s/%s/metadata.txt" %(output_folder,cookie_name)
write_file(output_file,template)