Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[DC-3873] V8 OC - Snapshot EHR Union & Prep #1881

Open
wants to merge 1 commit into
base: develop
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 14 additions & 4 deletions data_steward/validation/ehr_union.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,9 +99,10 @@
ID_CONSTANT_FACTOR, JINJA_ENV, LOCATION, MAPPING_PREFIX,
MEASUREMENT_DOMAIN_CONCEPT_ID, OBSERVATION, PERSON,
PERSON_DOMAIN_CONCEPT_ID, SURVEY_CONDUCT, UNIONED_EHR,
VISIT_DETAIL, VISIT_OCCURRENCE, BIGQUERY_DATASET_ID)
VISIT_DETAIL, VISIT_OCCURRENCE, BIGQUERY_DATASET_ID,
CDR_SCOPES)
from constants.validation import ehr_union as eu_constants
from utils import pipeline_logging
from utils import pipeline_logging, auth
from gcloud.bq import BigQueryClient
from resources import (fields_for, get_table_id, has_primary_key,
validate_date_string, CDM_TABLES)
Expand Down Expand Up @@ -918,6 +919,7 @@ def create_load_aou_death(bq_client, project_id, input_dataset_id,
def main(input_dataset_id,
output_dataset_id,
project_id,
run_as,
hpo_ids_ex=None,
ehr_cutoff_date=None):
"""
Expand All @@ -926,11 +928,13 @@ def main(input_dataset_id,
:param input_dataset_id identifies a dataset containing multiple CDMs, one for each HPO submission
:param output_dataset_id identifies the dataset to store the new CDM in
:param project_id: project containing the datasets
:param run_as: impersonation email address
:param hpo_ids_ex: (optional) list that identifies HPOs not to process, by default process all
:param ehr_cutoff_date: (optional) cutoff date for ehr data(same as CDR cutoff date)
:returns: list of tables generated successfully
"""
bq_client = BigQueryClient(project_id)
credentials = auth.get_impersonation_credentials(run_as, CDR_SCOPES)
bq_client = BigQueryClient(project_id=project_id, credentials=credentials)

logging.info('EHR union started')
# NOTE hpo_ids here includes HPO sites without any submissions. Those may not
Expand Down Expand Up @@ -1033,12 +1037,18 @@ def main(input_dataset_id,
help=
"Date to set for observation table rows transferred from person table",
type=validate_date_string)
parser.add_argument('--run_as',
action='store',
dest='run_as_email',
help='Service account email address to impersonate',
required=True)

# HPOs to exclude. If nothing given, exclude nothing.
args = parser.parse_args()
if args.input_dataset_id:
main(args.input_dataset_id,
args.output_dataset_id,
args.project_id,
args.run_as_email,
hpo_ids_ex=args.hpo_id_ex,
ehr_cutoff_date=args.ehr_cutoff_date)
ehr_cutoff_date=args.ehr_cutoff_date)