diff --git a/.travis.yml b/.travis.yml index 815c96e..85f84be 100644 --- a/.travis.yml +++ b/.travis.yml @@ -6,8 +6,8 @@ python: env: global: - DCMTK_VERSION="dcmtk-3.6.1_20150924" - - DCMTK_DB_DIR="dcmtk_dicom_db" - - TESTDATA_DIR="testdata" + - DCMTK_DB_DIR="dcmtk_dicom_db-ci" + - TESTDATA_DIR="testdata-ci" - ORTHANC_VERSION="Orthanc-1.1.0" cache: @@ -33,7 +33,7 @@ install: script: - ./test/test.sh - - ./test/lint.sh reaper + - ./test/lint.sh after_success: - ./docker/build-trigger.sh Tag "${TRAVIS_TAG}" "${BUILD_TRIGGER_URL}" diff --git a/reaper/dcm.py b/reaper/dcm.py index 1091e96..047374a 100644 --- a/reaper/dcm.py +++ b/reaper/dcm.py @@ -1,10 +1,11 @@ """SciTran Reaper DICOM utility functions""" -import os -import shutil +import datetime import hashlib import logging -import datetime +import os +import shlex +import subprocess import dicom @@ -17,7 +18,25 @@ GEMS_TYPE_VXTL = ['DERIVED', 'SECONDARY', 'VXTL STATE'] -def pkg_series(_id, path, map_key, opt_key=None, anonymize=False, timezone=None): +class DicomError(Exception): + + """DicomError class""" + + pass + + +def __external_metadata(command, filepath): + try: + args = shlex.split(command) + [filepath] + log.debug('External metadata cmd: %s', ' '.join(args)) + return subprocess.check_output(args) + except subprocess.CalledProcessError as ex: + msg = 'Error running external command. Exit code %d' % ex.returncode + log.error(msg) + raise DicomError(msg) + + +def pkg_series(_id, path, map_key, opt_key=None, anonymize=False, timezone=None, additional_metadata=None): # pylint: disable=missing-docstring dcm_dict = {} log.info('inspecting %s', _id) @@ -38,27 +57,37 @@ def pkg_series(_id, path, map_key, opt_key=None, anonymize=False, timezone=None) filename = filename.replace('(none)', 'NA') file_time = max(int(dcm.acquisition_timestamp.strftime('%s')), 315561600) # zip can't handle < 1980 os.utime(filepath, (file_time, file_time)) # correct timestamps - os.rename(filepath, '%s.dcm' % os.path.join(arcdir_path, filename)) + arc_filepath = '%s.dcm' % os.path.join(arcdir_path, filename) + os.rename(filepath, arc_filepath) arc_path = util.create_archive(arcdir_path, dir_name) - metadata = util.object_metadata(dcm, timezone, os.path.basename(arc_path)) + for md_group_info in (additional_metadata or {}).itervalues(): + for md_field, md_value in md_group_info.iteritems(): + if md_value.startswith('^'): # DICOM header value + md_group_info[md_field] = dcm.raw_header.get(md_value[1:], None) + elif md_value.startswith('@'): # external command + md_group_info[md_field] = __external_metadata(md_value[1:], arc_filepath) + else: # verbatim value + md_group_info[md_field] = md_value[1:] + metadata = util.object_metadata(dcm, timezone, os.path.basename(arc_path), additional_metadata) util.set_archive_metadata(arc_path, metadata) - shutil.rmtree(arcdir_path) metadata_map[arc_path] = metadata return metadata_map class DicomFile(object): - """ - DicomFile class - """ + """DicomFile class""" # pylint: disable=too-few-public-methods def __init__(self, filepath, map_key, opt_key=None, parse=False, anonymize=False, timezone=None): if not parse and anonymize: - raise Exception('Cannot anonymize DICOM file without parsing') - dcm = dicom.read_file(filepath, stop_before_pixels=(not anonymize)) + raise DicomError('Cannot anonymize DICOM file without parsing') + try: + dcm = dicom.read_file(filepath, stop_before_pixels=(not anonymize)) + except: + raise DicomError() + self.raw_header = dcm self._id = dcm.get(map_key, '') self.opt = dcm.get(opt_key, '') if opt_key else None self.acq_no = str(dcm.get('AcquisitionNumber', '')) or None if dcm.get('Manufacturer').upper() != 'SIEMENS' else None diff --git a/reaper/dicom_reaper.py b/reaper/dicom_reaper.py index 54a18d2..131dbab 100644 --- a/reaper/dicom_reaper.py +++ b/reaper/dicom_reaper.py @@ -19,6 +19,8 @@ def __init__(self, options): self.scu = scu.SCU(options.get('host'), options.get('port'), options.get('return_port'), options.get('aet'), options.get('aec')) super(DicomReaper, self).__init__(self.scu.aec, options) self.anonymize = options.get('anonymize') + self.additional_metadata = {group: {field: value} for group, field, value in options.get('metadata')} + print self.additional_metadata self.query_tags = {self.map_key: ''} if self.opt_key is not None: @@ -55,6 +57,7 @@ def instrument_query(self): return i_state def reap(self, _id, item, tempdir): + # pylint: disable=too-many-return-statements if item['state']['images'] == 0: log.info('ignoring %s (zero images)', _id) return None, {} @@ -68,13 +71,20 @@ def reap(self, _id, item, tempdir): success, reap_cnt = self.scu.move(scu.SeriesQuery(SeriesInstanceUID=_id), reapdir) log.info('reaped %s (%d images) in %.1fs', _id, reap_cnt, (datetime.datetime.utcnow() - reap_start).total_seconds()) if success and reap_cnt > 0: - df = dcm.DicomFile(os.path.join(reapdir, os.listdir(reapdir)[0]), self.map_key, self.opt_key) + try: + df = dcm.DicomFile(os.path.join(reapdir, os.listdir(reapdir)[0]), self.map_key, self.opt_key) + except dcm.DicomError: + return False, {} if not self.is_desired_item(df.opt): log.info('ignoring %s (non-matching opt-%s)', _id, self.opt) return None, {} if success and reap_cnt == item['state']['images']: - metadata_map = dcm.pkg_series(_id, reapdir, self.map_key, self.opt_key, self.anonymize, self.timezone) - return True, metadata_map + try: + metadata_map = dcm.pkg_series(_id, reapdir, self.map_key, self.opt_key, self.anonymize, self.timezone, self.additional_metadata) + except dcm.DicomError: + return False, {} + else: + return True, metadata_map else: return False, {} @@ -88,6 +98,7 @@ def update_arg_parser(ap): ap.add_argument('aec', help='remote AE title') ap.add_argument('-A', '--no-anonymize', dest='anonymize', action='store_false', help='do not anonymize patient name and birthdate') + ap.add_argument('--metadata', nargs=3, default=[], action='append', help='Additional metadata to package') return ap diff --git a/reaper/util.py b/reaper/util.py index 9558a03..7b5e872 100644 --- a/reaper/util.py +++ b/reaper/util.py @@ -61,7 +61,7 @@ def hrsize(size): return '%.0f%sB' % (size, 'Y') -def object_metadata(obj, timezone, filename): +def object_metadata(obj, timezone, filename, additional_metadata=None): # pylint: disable=missing-docstring metadata = { 'session': {'timezone': timezone}, @@ -75,6 +75,10 @@ def object_metadata(obj, timezone, filename): metadata['file']['name'] = filename metadata['session']['subject'] = metadata.pop('subject', {}) metadata['acquisition']['files'] = [metadata.pop('file', {})] + for md_group, md_group_info in (additional_metadata or {}).iteritems(): + metadata.setdefault(md_group, {}) + metadata[md_group].setdefault('metadata', {}) + metadata[md_group]['metadata'].update(md_group_info) return metadata diff --git a/test/lint.sh b/test/lint.sh index 5fdded5..325f641 100755 --- a/test/lint.sh +++ b/test/lint.sh @@ -6,9 +6,9 @@ unset CDPATH cd "$( dirname "${BASH_SOURCE[0]}" )/.." echo "Running pylint ..." -pylint --reports=no "$@" +pylint --reports=no reaper echo echo "Running pep8 ..." -pep8 --max-line-length=150 --ignore=E402 "$@" +pep8 --max-line-length=150 --ignore=E402 reaper diff --git a/test/test.sh b/test/test.sh index 88eb1c8..d6c0727 100755 --- a/test/test.sh +++ b/test/test.sh @@ -29,7 +29,7 @@ RECEIVER_PID=$! # Fetch test data mkdir -p $TESTDATA_DIR if [ ! "$(ls -A $TESTDATA_DIR)" ]; then - curl -L https://github.com/scitran/testdata/archive/master.tar.gz | tar xz -C $TESTDATA_DIR --strip-components 1 + curl -L https://github.com/scitran/testdata/archive/reaper-ci.tar.gz | tar xz -C $TESTDATA_DIR --strip-components 1 fi