Skip to content

Commit 3db2a9a

Browse files
authored
Merge pull request #583 from 4dn-dcic/ajs-241008-add-ign-2-opf-status-mm-chk
Update opf_status_mismatch check
2 parents 46af00f + 56d310a commit 3db2a9a

File tree

4 files changed

+36
-12
lines changed

4 files changed

+36
-12
lines changed

CHANGELOG.rst

+9
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,15 @@ Change Log
88
----------
99

1010

11+
4.9.5
12+
=====
13+
14+
`PR 583: Update opf status mismatch check <https://github.com/4dn-dcic/foursight/pull/583>`_
15+
16+
* add a filter to filter on 'ignore_status_mismatch' tag on items (opfs, quality metrics or higlass_viewconfs) to ignore in opf status mismatch
17+
* small update to search for bed2beddb files to respect the 'skip_processing' tag if present
18+
19+
1120
4.9.4
1221
=====
1322

chalicelib_fourfront/checks/audit_checks.py

+22-8
Original file line numberDiff line numberDiff line change
@@ -734,41 +734,56 @@ def check_opf_status_mismatch(connection, **kwargs):
734734
'''
735735
check = CheckResult(connection, 'check_opf_status_mismatch')
736736

737+
# list of uuids to filter out as they have a tag to ignore them
738+
tagged2ignore = get_items_with_ignore_tags(connection.ff_keys)
739+
737740
opf_set = ('search/?type=ExperimentSet&other_processed_files.title%21=No+value&field=status'
738741
'&field=other_processed_files&field=experiments_in_set.other_processed_files')
739742
opf_exp = ('search/?type=ExperimentSet&other_processed_files.title=No+value'
740743
'&experiments_in_set.other_processed_files.title%21=No+value'
741744
'&field=experiments_in_set.other_processed_files&field=status')
742745
opf_set_results = ff_utils.search_metadata(opf_set, key=connection.ff_keys)
743746
opf_exp_results = ff_utils.search_metadata(opf_exp, key=connection.ff_keys)
744-
results = opf_set_results + opf_exp_results
745-
# extract file uuids
747+
results = opf_set_results + opf_exp_results # these are expset and expt items w/opfs
748+
# extract all opf file and higlass viewconf uuids
746749
files = []
747750
for result in results:
748751
if result.get('other_processed_files'):
749752
for case in result['other_processed_files']:
750-
files.extend([i['uuid'] for i in case['files']])
753+
files.extend([i['uuid'] for i in case['files']]) # if i.get('uuid') not in tagged2ignore])
751754
if case.get('higlass_view_config'):
755+
# if case['higlass_view_config'].get('uuid') not in tagged2ignore:
752756
files.append(case['higlass_view_config'].get('uuid'))
753757
if result.get('experiments_in_set'):
754758
for exp in result['experiments_in_set']:
755759
for case in exp['other_processed_files']:
756-
files.extend([i['uuid'] for i in case['files']])
757-
# get metadata for files, to collect status
760+
files.extend([i['uuid'] for i in case['files']]) # if i.get('uuid') not in tagged2ignore])
761+
762+
# get metadata for files, to collect status
758763
resp = get_es_metadata(list(set(files)),
759764
sources=['links.quality_metric', 'object.status', 'uuid'],
760765
key=connection.ff_keys)
766+
# key = opf uuid; value = status
761767
opf_status_dict = {item['uuid']: item['object']['status'] for item in resp if item['uuid'] in files}
768+
769+
# key opf uuid; value = linked quality metric items
762770
opf_linked_dict = {
763771
item['uuid']: item.get('links', {}).get('quality_metric', []) for item in resp if item['uuid'] in files
764772
}
773+
774+
# quality metric uuids
765775
quality_metrics = [uuid for item in resp for uuid in item.get('links', {}).get('quality_metric', [])]
776+
777+
# get metadata for quality metrics (status)
766778
qm_resp = get_es_metadata(list(set(quality_metrics)),
767779
sources=['uuid', 'object.status'],
768780
key=connection.ff_keys)
781+
782+
# key = qual met uuid; value = status
769783
opf_other_dict = {item['uuid']: item['object']['status'] for item in qm_resp if item not in files}
784+
770785
check.full_output = {}
771-
for result in results:
786+
for result in results: # now go through each expset or experiment again and make sure all the statuses agree
772787
hg_dict = {item['title']: item.get('higlass_view_config', {}).get('uuid')
773788
for item in result.get('other_processed_files', [])}
774789
titles = [item['title'] for item in result.get('other_processed_files', [])]
@@ -782,8 +797,7 @@ def check_opf_status_mismatch(connection, **kwargs):
782797
file_list.extend([item for exp in result.get('experiments_in_set', [])
783798
for fileset in exp['other_processed_files']
784799
for item in fileset['files'] if fileset['title'] == title])
785-
statuses = set([opf_status_dict[f['uuid']] for f in file_list])
786-
# import pdb; pdb.set_trace()
800+
statuses = set([opf_status_dict[f['uuid']] for f in file_list if f.get('uuid') not in tagged2ignore])
787801
if not statuses:
788802
# to account for empty sections that may not yet contain files
789803
pass

chalicelib_fourfront/checks/wfr_checks.py

+4-3
Original file line numberDiff line numberDiff line change
@@ -654,11 +654,11 @@ def bed2beddb_status(connection, **kwargs):
654654
check, skip = wfr_utils.check_indexing(check, connection)
655655
if skip:
656656
return check
657-
# Build the query (find bg files without bw files)
657+
# Build the query (find bed files without beddb files)
658658
query = ("/search/?type=File&file_format.file_format=bed"
659659
"&extra_files.file_format.display_title!=beddb"
660660
"&status!=uploading&status!=to be uploaded by workflow"
661-
"&status!=archived&status!=archived to project")
661+
"&status!=archived&status!=archived to project&tags!=skip_processing")
662662
query += "".join(["&file_type=" + i for i in accepted_types])
663663
# add date
664664
s_date = kwargs.get('start_date')
@@ -674,7 +674,8 @@ def bed2beddb_status(connection, **kwargs):
674674
"&extra_files.file_format.display_title=beddb"
675675
"&extra_files.status=uploading"
676676
"&extra_files.status=to be uploaded by workflow"
677-
"&status!=uploading&status!=to be uploaded by workflow")
677+
"&status!=uploading&status!=to be uploaded by workflow"
678+
"&tags!=skip_processing")
678679
# add date
679680
s_date = kwargs.get('start_date')
680681
if s_date:

pyproject.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[tool.poetry]
22
name = "foursight"
3-
version = "4.9.4"
3+
version = "4.9.5"
44
description = "Serverless Chalice Application for Monitoring"
55
authors = ["4DN-DCIC Team <[email protected]>"]
66
license = "MIT"

0 commit comments

Comments
 (0)