diff --git a/nw/buildnw/postgres/README.md b/nw/buildnw/postgres/README.md new file mode 100644 index 00000000..9c7a9f13 --- /dev/null +++ b/nw/buildnw/postgres/README.md @@ -0,0 +1,151 @@ +# Load NWICU into a PostgreSQL database + +This directory contains scripts to create the schema and load the Northwestern ICU and Hospital data (NWICU) into PostgreSQL, following a structure similar to the MIMIC-IV build scripts. + +## Quickstart + +```sh +# clone repo +git clone https://github.com/MIT-LCP/mimic-code.git +cd mimic-code + +# download NWICU data +wget -r -N -c -np --user --ask-password https://physionet.org/files/nwicu-northwestern-icu/0.1.0/ +# clean directory (run this command outside of physionet.org directory) +mv physionet.org/files/nwicu-northwestern-icu nwicu && rmdir physionet.org/files && rm physionet.org/robots.txt && rmdir physionet.org + +# create database +createdb nw + +# build and load NWICU tables +psql -d nw -f nw/buildnw/postgres/create.sql +psql -d nw -v ON_ERROR_STOP=1 -v nw_data_dir=nwicu/0.1.0 -f nw/buildnw postgres/load_gz.sql +psql -d nw -f nw/buildnw/postgres/constraint.sql +psql -d nw -f nw/buildnw/postgres/index.sql +psql -d nw -f nw/buildnw/postgres/validate.sql +``` + +## Detailed guide + +First ensure that PostgreSQL is running on your computer. For installation instructions, see: [http://www.postgresql.org/download/](http://www.postgresql.org/download/) + +### Install PostgreSQL + +**On macOS (using Homebrew):** + +```sh +brew update +brew install postgresql +brew services start postgresql +``` + +To check which user is running the PostgreSQL service, use: + +```sh +brew services list +``` + +The 'User' column shows the macOS account running PostgreSQL. This is usually the username you should use for database connections unless you have created a different PostgreSQL user. + +**On Ubuntu/Debian:** + +```sh +sudo apt update +sudo apt install postgresql postgresql-contrib +sudo service postgresql start +``` + +**On Windows:** + +1. Download the installer from https://www.postgresql.org/download/windows/ +2. Run the installer and follow the prompts to complete the installation. +3. Start the PostgreSQL service from the Start Menu or Services app. + +For more details, see the [official PostgreSQL download page](https://www.postgresql.org/download/). + +### Download NWICU data + +We can download Northwestern ICU (NWICU) database +from [PhysioNet](https://physionet.org/content/nwicu-northwestern-icu/0.1.0/): + +```sh +wget -r -N -c -np --user --ask-password https://physionet.org/files/nwicu-northwestern-icu/0.1.0/ +mv physionet.org/files/nwicu-northwestern-icu nwicu && rmdir physionet.org/files && rm physionet.org/robots.txt && rmdir physionet.org +``` + +### Specify a database for installation + +Create the database if it does not already exist: + +```sh +createdb nw +``` + +Set PostgreSQL environment variables: + +We can use the provided script to set your environment variables for the current terminal session: + +```sh +source postgres_env.sh +``` + +Replace `your_user` and `your_password` with your actual PostgreSQL username and password in sh script. + +Instead of editing the script, you can pass your username and password as arguments: + +```sh +source postgres_env.sh myuser mypassword nw localhost 5432 +``` + +Once Postgres is installed, clone the [mimic-code](https://github.com/MIT-LCP/mimic-code) repository into a local directory. + +``` bash +git clone https://github.com/MIT-LCP/mimic-code.git +``` + +Create the schemas and tables with the following psql command. **This will delete any data present in the schemas.** If you need to reload the data (for example, if you run the load scripts multiple times), simply rerun create.sql. +This will drop all existing tables and recreate them, ensuring a clean slate before reloading your data. + +```sh +psql -d nw -f nw/buildnw/postgres/create.sql +``` + +Afterwards, we need to load the NWICU files into the database. To do so, we'll specify the location of the local CSV files (compressed). +Note that this assumes the folder structure is as follows: + +``` +nwicu_data_dir + nw_hosp + admissions.csv.gz + patients.csv.gz + ... + nw_icu + icustays.csv.gz + ... +``` + +For example, if you downloaded and moved the files as above, your `nwicu_data_dir` would be `nwicu/0.1.0` and contain subfolders like `nw_hosp` and `nw_icu` with their respective compressed CSV files. + +Once you have verified your data is stored in this structure, run: + +```sh +psql -d nw -v ON_ERROR_STOP=1 -v nw_data_dir=nwicu/0.1.0 -f nw/buildnw/postgres/load_gz.sql +``` + +After loading the data, we can enforce data integrity by adding primary keys, foreign keys, and other constraints. + +```sh +psql -d nw -f nw/buildnw/postgres/constraint.sql +``` + +We can also improve query performance by creating indexes, which allow the database to quickly find and retrieve data, especially in large tables. + +```sh +psql -d nw -f nw/buildnw/postgres/index.sql +``` + +To ensure the data was loaded correctly, we can run validation checks. + +```sh +psql -d nw -f nw/buildnw/postgres/validate.sql +``` diff --git a/nw/buildnw/postgres/constraint.sql b/nw/buildnw/postgres/constraint.sql new file mode 100644 index 00000000..cefc6631 --- /dev/null +++ b/nw/buildnw/postgres/constraint.sql @@ -0,0 +1,220 @@ +--------------------------- +--------------------------- +-- Creating Primary Keys -- +--------------------------- +--------------------------- + +---------- +-- hosp -- +---------- + +-- admissions + +ALTER TABLE nw_hosp.admissions DROP CONSTRAINT IF EXISTS admissions_pk CASCADE; +ALTER TABLE nw_hosp.admissions +ADD CONSTRAINT admissions_pk + PRIMARY KEY (hadm_id); + +ALTER TABLE nw_hosp.patients DROP CONSTRAINT IF EXISTS patients_pk CASCADE; +ALTER TABLE nw_hosp.patients +ADD CONSTRAINT patients_pk + PRIMARY KEY (subject_id); + +-- d_icd_diagnoses + +ALTER TABLE nw_hosp.d_icd_diagnoses DROP CONSTRAINT IF EXISTS d_icd_diagnoses_pk CASCADE; +ALTER TABLE nw_hosp.d_icd_diagnoses +ADD CONSTRAINT d_icd_diagnoses_pk + PRIMARY KEY (icd_code, icd_version); + +-- diagnoses_icd + +ALTER TABLE nw_hosp.diagnoses_icd DROP CONSTRAINT IF EXISTS diagnoses_icd_patients_fk CASCADE; +ALTER TABLE nw_hosp.diagnoses_icd +ADD CONSTRAINT diagnoses_icd_patients_fk + FOREIGN KEY (subject_id) + REFERENCES nw_hosp.patients (subject_id); + +ALTER TABLE nw_hosp.diagnoses_icd DROP CONSTRAINT IF EXISTS diagnoses_icd_admissions_fk; +ALTER TABLE nw_hosp.diagnoses_icd +ADD CONSTRAINT diagnoses_icd_admissions_fk + FOREIGN KEY (hadm_id) + REFERENCES nw_hosp.admissions (hadm_id); + +-- d_labitems + +ALTER TABLE nw_hosp.d_labitems DROP CONSTRAINT IF EXISTS d_labitems_pk CASCADE; +ALTER TABLE nw_hosp.d_labitems +ADD CONSTRAINT d_labitems_pk + PRIMARY KEY (itemid); + +-- labevents + +ALTER TABLE nw_hosp.labevents DROP CONSTRAINT IF EXISTS labevents_pk CASCADE; +ALTER TABLE nw_hosp.labevents +ADD CONSTRAINT labevents_pk + PRIMARY KEY (labevent_id); + +-- prescriptions + +ALTER TABLE nw_hosp.prescriptions DROP CONSTRAINT IF EXISTS prescriptions_pk CASCADE; +ALTER TABLE nw_hosp.prescriptions +ADD CONSTRAINT prescriptions_pk + PRIMARY KEY (pharmacy_id, drug); + +-- emar + +ALTER TABLE nw_hosp.emar DROP CONSTRAINT IF EXISTS emar_pk CASCADE; +ALTER TABLE nw_hosp.emar +ADD CONSTRAINT emar_pk + PRIMARY KEY (emar_id); + +--------- +-- icu -- +--------- + +-- icustays + +ALTER TABLE nw_icu.icustays DROP CONSTRAINT IF EXISTS icustays_pk CASCADE; +ALTER TABLE nw_icu.icustays +ADD CONSTRAINT icustays_pk + PRIMARY KEY (stay_id); + +-- d_items + +ALTER TABLE nw_icu.d_items DROP CONSTRAINT IF EXISTS d_items_pk CASCADE; +ALTER TABLE nw_icu.d_items +ADD CONSTRAINT d_items_pk + PRIMARY KEY (itemid, label); + +--------------------------- +--------------------------- +-- Creating Foreign Keys -- +--------------------------- +--------------------------- + +---------- +-- hosp -- +---------- + +-- admissions + +ALTER TABLE nw_hosp.admissions DROP CONSTRAINT IF EXISTS admissions_patients_fk; +ALTER TABLE nw_hosp.admissions +ADD CONSTRAINT admissions_patients_fk + FOREIGN KEY (subject_id) + REFERENCES nw_hosp.patients (subject_id); + +-- diagnoses_icd + +ALTER TABLE nw_hosp.diagnoses_icd DROP CONSTRAINT IF EXISTS diagnoses_icd_patients_fk; +ALTER TABLE nw_hosp.diagnoses_icd +ADD CONSTRAINT diagnoses_icd_patients_fk + FOREIGN KEY (subject_id) + REFERENCES nw_hosp.patients (subject_id); + +ALTER TABLE nw_hosp.diagnoses_icd DROP CONSTRAINT IF EXISTS diagnoses_icd_admissions_fk; +ALTER TABLE nw_hosp.diagnoses_icd +ADD CONSTRAINT diagnoses_icd_admissions_fk + FOREIGN KEY (hadm_id) + REFERENCES nw_hosp.admissions (hadm_id); + +-- labevents + +ALTER TABLE nw_hosp.labevents DROP CONSTRAINT IF EXISTS labevents_patients_fk; +ALTER TABLE nw_hosp.labevents +ADD CONSTRAINT labevents_patients_fk + FOREIGN KEY (subject_id) + REFERENCES nw_hosp.patients (subject_id); + +ALTER TABLE nw_hosp.labevents DROP CONSTRAINT IF EXISTS labevents_d_labitems_fk; +ALTER TABLE nw_hosp.labevents +ADD CONSTRAINT labevents_d_labitems_fk + FOREIGN KEY (itemid) + REFERENCES nw_hosp.d_labitems (itemid); + +-- prescriptions + +ALTER TABLE nw_hosp.prescriptions DROP CONSTRAINT IF EXISTS prescriptions_patients_fk; +ALTER TABLE nw_hosp.prescriptions +ADD CONSTRAINT prescriptions_patients_fk + FOREIGN KEY (subject_id) + REFERENCES nw_hosp.patients (subject_id); + +ALTER TABLE nw_hosp.prescriptions DROP CONSTRAINT IF EXISTS prescriptions_admissions_fk; +ALTER TABLE nw_hosp.prescriptions +ADD CONSTRAINT prescriptions_admissions_fk + FOREIGN KEY (hadm_id) + REFERENCES nw_hosp.admissions (hadm_id); + +-- emar + +ALTER TABLE nw_hosp.emar DROP CONSTRAINT IF EXISTS emar_patients_fk; +ALTER TABLE nw_hosp.emar +ADD CONSTRAINT emar_patients_fk + FOREIGN KEY (subject_id) + REFERENCES nw_hosp.patients (subject_id); + +ALTER TABLE nw_hosp.emar DROP CONSTRAINT IF EXISTS emar_admissions_fk; +ALTER TABLE nw_hosp.emar +ADD CONSTRAINT emar_admissions_fk + FOREIGN KEY (hadm_id) + REFERENCES nw_hosp.admissions (hadm_id); + +--------- +-- icu -- +--------- + +-- icustays + +ALTER TABLE nw_icu.icustays DROP CONSTRAINT IF EXISTS icustays_patients_fk; +ALTER TABLE nw_icu.icustays +ADD CONSTRAINT icustays_patients_fk + FOREIGN KEY (subject_id) + REFERENCES nw_hosp.patients (subject_id); + +ALTER TABLE nw_icu.icustays DROP CONSTRAINT IF EXISTS icustays_admissions_fk; +ALTER TABLE nw_icu.icustays +ADD CONSTRAINT icustays_admissions_fk + FOREIGN KEY (hadm_id) + REFERENCES nw_hosp.admissions (hadm_id); + +-- chartevents + +ALTER TABLE nw_icu.chartevents DROP CONSTRAINT IF EXISTS chartevents_patients_fk; +ALTER TABLE nw_icu.chartevents +ADD CONSTRAINT chartevents_patients_fk + FOREIGN KEY (subject_id) + REFERENCES nw_hosp.patients (subject_id); + +ALTER TABLE nw_icu.chartevents DROP CONSTRAINT IF EXISTS chartevents_admissions_fk; +ALTER TABLE nw_icu.chartevents +ADD CONSTRAINT chartevents_admissions_fk + FOREIGN KEY (hadm_id) + REFERENCES nw_hosp.admissions (hadm_id); + +ALTER TABLE nw_icu.chartevents DROP CONSTRAINT IF EXISTS chartevents_icustays_fk; +ALTER TABLE nw_icu.chartevents +ADD CONSTRAINT chartevents_icustays_fk + FOREIGN KEY (stay_id) + REFERENCES nw_icu.icustays (stay_id); + + -- procedureevents + +ALTER TABLE nw_icu.procedureevents DROP CONSTRAINT IF EXISTS procedureevents_patients_fk; +ALTER TABLE nw_icu.procedureevents +ADD CONSTRAINT procedureevents_patients_fk + FOREIGN KEY (subject_id) + REFERENCES nw_hosp.patients (subject_id); + +ALTER TABLE nw_icu.procedureevents DROP CONSTRAINT IF EXISTS procedureevents_admissions_fk; +ALTER TABLE nw_icu.procedureevents +ADD CONSTRAINT procedureevents_admissions_fk + FOREIGN KEY (hadm_id) + REFERENCES nw_hosp.admissions (hadm_id); + +ALTER TABLE nw_icu.procedureevents DROP CONSTRAINT IF EXISTS procedureevents_icustays_fk; +ALTER TABLE nw_icu.procedureevents +ADD CONSTRAINT procedureevents_icustays_fk + FOREIGN KEY (stay_id) + REFERENCES nw.icustays (stay_id); diff --git a/nw/buildnw/postgres/create.sql b/nw/buildnw/postgres/create.sql new file mode 100644 index 00000000..afed6f74 --- /dev/null +++ b/nw/buildnw/postgres/create.sql @@ -0,0 +1,203 @@ +------------------------------------------- +-- Create the tables and NW schema -- +------------------------------------------- + +---------------------- +-- Creating schemas -- +---------------------- + +DROP SCHEMA IF EXISTS nw_hosp CASCADE; +CREATE SCHEMA nw_hosp; +DROP SCHEMA IF EXISTS nw_icu CASCADE; +CREATE SCHEMA nw_icu; + +--------------------- +-- Creating tables -- +--------------------- + +-- hosp schema + +DROP TABLE IF EXISTS nw_hosp.admissions; +CREATE TABLE nw_hosp.admissions ( + subject_id INTEGER NOT NULL, + hadm_id INTEGER NOT NULL, + admittime TIMESTAMP NOT NULL, + dischtime TIMESTAMP, + deathtime TIMESTAMP, + admission_type VARCHAR(40), + admit_provider_id VARCHAR(10), + admission_location VARCHAR(60), + discharge_location VARCHAR(255), + insurance VARCHAR(255), + language VARCHAR(25), + marital_status VARCHAR(30), + race VARCHAR(80), + edregtime TIMESTAMP, + edouttime TIMESTAMP, + hospital_expire_flag SMALLINT +); + +DROP TABLE IF EXISTS nw_hosp.patients; +CREATE TABLE nw_hosp.patients ( + subject_id INTEGER NOT NULL, + gender CHAR(1) NOT NULL, + anchor_age SMALLINT, + anchor_year SMALLINT NOT NULL, + anchor_year_group VARCHAR(20) NOT NULL, + dod DATE +); + +DROP TABLE IF EXISTS nw_hosp.d_icd_diagnoses; +CREATE TABLE nw_hosp.d_icd_diagnoses ( + icd_code CHAR(7) NOT NULL, + icd_version SMALLINT NOT NULL, + long_title VARCHAR(255) +); + +DROP TABLE IF EXISTS nw_hosp.diagnoses_icd; +CREATE TABLE nw_hosp.diagnoses_icd ( + subject_id INTEGER NOT NULL, + hadm_id INTEGER NOT NULL, + seq_num INTEGER NOT NULL, + icd_code CHAR(7), + icd_version SMALLINT +); + +DROP TABLE IF EXISTS nw_hosp.d_labitems; +CREATE TABLE nw_hosp.d_labitems ( + itemid INTEGER NOT NULL, + label VARCHAR(50), + fluid VARCHAR(50), + category VARCHAR(50) +); + +DROP TABLE IF EXISTS nw_hosp.labevents; +CREATE TABLE nw_hosp.labevents ( + labevent_id INTEGER NOT NULL, + subject_id INTEGER NOT NULL, + hadm_id INTEGER, + specimen_id INTEGER, + itemid INTEGER NOT NULL, + order_provider_id VARCHAR(10), + charttime TIMESTAMP(0), + storetime TIMESTAMP(0), + value VARCHAR(200), + valuenum DOUBLE PRECISION, + valueuom VARCHAR(20), + ref_range_lower DOUBLE PRECISION, + ref_range_upper DOUBLE PRECISION, + flag VARCHAR(10), + priority VARCHAR(7), + comments TEXT +); + +DROP TABLE IF EXISTS nw_hosp.prescriptions; +CREATE TABLE nw_hosp.prescriptions ( + subject_id INTEGER NOT NULL, + hadm_id INTEGER NOT NULL, + pharmacy_id INTEGER NOT NULL, + poe_id VARCHAR(25), + poe_seq INTEGER, + order_provider_id VARCHAR(10), + starttime TIMESTAMP(3), + stoptime TIMESTAMP(3), + drug_type VARCHAR(100), + drug VARCHAR(255) NOT NULL, + formulary_drug_cd VARCHAR(50), + gsn VARCHAR(255), + ndc VARCHAR(25), + prod_strength VARCHAR(255), + form_rx VARCHAR(25), + dose_val_rx VARCHAR(100), + dose_unit_rx VARCHAR(50), + form_val_disp VARCHAR(255), + form_unit_disp VARCHAR(65), + doses_per_24_hrs REAL, + route VARCHAR(50) + +); + +DROP TABLE IF EXISTS nw_hosp.emar; +CREATE TABLE nw_hosp.emar +( + subject_id INTEGER NOT NULL, + hadm_id INTEGER, + emar_id VARCHAR(25) NOT NULL, + emar_seq INTEGER NOT NULL, + poe_id VARCHAR(25) NOT NULL, + pharmacy_id INTEGER, + enter_provider_id VARCHAR(10), + charttime TIMESTAMP NOT NULL, + medication TEXT, + event_txt VARCHAR(100), + scheduletime TIMESTAMP, + storetime TIMESTAMP NOT NULL +); + +-- icu schema + +DROP TABLE IF EXISTS nw_icu.icustays; +CREATE TABLE nw_icu.icustays ( + subject_id INTEGER NOT NULL, + hadm_id INTEGER NOT NULL, + stay_id INTEGER NOT NULL, + first_careunit VARCHAR(255), + last_careunit VARCHAR(255), + intime TIMESTAMP, + outtime TIMESTAMP, + los FLOAT +); + +DROP TABLE IF EXISTS nw_icu.d_items; +CREATE TABLE nw_icu.d_items ( + itemid INTEGER NOT NULL, + label VARCHAR(200) NOT NULL, + abbreviation VARCHAR(50), + linksto VARCHAR(30) NOT NULL, + category VARCHAR(50), + unitname VARCHAR(50), + param_type VARCHAR(20) NOT NULL, + lownormalvalue FLOAT, + highnormalvalue FLOAT +); + +DROP TABLE IF EXISTS nw_icu.chartevents; +CREATE TABLE nw_icu.chartevents ( + subject_id INTEGER NOT NULL, + hadm_id INTEGER NOT NULL, + stay_id INTEGER NOT NULL, + caregiver_id INTEGER, + charttime TIMESTAMP NOT NULL, + storetime TIMESTAMP, + itemid INTEGER NOT NULL, + value VARCHAR(200), + valuenum FLOAT, + valueuom VARCHAR(20), + warning SMALLINT +); + +DROP TABLE IF EXISTS nw_icu.procedureevents; +CREATE TABLE nw_icu.procedureevents ( + subject_id INTEGER NOT NULL, + hadm_id INTEGER NOT NULL, + stay_id INTEGER NOT NULL, + caregiver_id INTEGER, + starttime TIMESTAMP NOT NULL, + endtime TIMESTAMP, + storetime TIMESTAMP, + itemid INTEGER NOT NULL, + value FLOAT, + valueuom VARCHAR(20), + location VARCHAR(100), + locationcategory VARCHAR(50), + orderid INTEGER, + linkorderid INTEGER, + ordercategoryname VARCHAR(50), + ordercategorydescription VARCHAR(30), + patientweight FLOAT, + isopenbag SMALLINT, + continueinnextdept SMALLINT, + statusdescription VARCHAR(30), + originalamount FLOAT, + originalrate FLOAT +); diff --git a/nw/buildnw/postgres/index.sql b/nw/buildnw/postgres/index.sql new file mode 100644 index 00000000..d75c81df --- /dev/null +++ b/nw/buildnw/postgres/index.sql @@ -0,0 +1,153 @@ +-------------------------------------- +-------------------------------------- +-- Indexes for all NW modules -- +-------------------------------------- +-------------------------------------- + +---------- +-- hosp -- +---------- + +SET search_path TO nw_hosp; + +-- admissions + +DROP INDEX IF EXISTS admissions_idx01; +CREATE INDEX admissions_idx01 + ON admissions (admittime, dischtime, deathtime); + +-- patients + +DROP INDEX IF EXISTS patients_idx01; +CREATE INDEX patients_idx01 + ON patients (anchor_age); + +DROP INDEX IF EXISTS patients_idx02; +CREATE INDEX patients_idx02 + ON patients (anchor_year); + +-- d_icd_diagnoses + +DROP INDEX IF EXISTS d_icd_diagnoses_idx01; +CREATE INDEX d_icd_diagnoses_idx01 + ON d_icd_diagnoses (long_title); + +DROP INDEX IF EXISTS d_icd_diagnoses_idx02; +CREATE INDEX d_icd_diagnoses_idx02 + ON d_icd_diagnoses (icd_code); + +-- diagnoses_icd + +DROP INDEX IF EXISTS diagnoses_icd_idx01; +CREATE INDEX diagnoses_icd_idx01 + ON diagnoses_icd (icd_code); + +DROP INDEX IF EXISTS diagnoses_icd_idx02; +CREATE INDEX diagnoses_icd_idx02 + ON diagnoses_icd (icd_code, icd_version); + +-- d_labitems + +DROP INDEX IF EXISTS d_labitems_idx01; +CREATE INDEX d_labitems_idx01 + ON d_labitems (label, fluid, category); + +DROP INDEX IF EXISTS d_labitems_idx02; +CREATE INDEX d_labitems_idx02 + ON d_labitems (itemid); + +-- labevents + +DROP INDEX IF EXISTS labevents_idx01; +CREATE INDEX labevents_idx01 + ON labevents (charttime, storetime); + +DROP INDEX IF EXISTS labevents_idx02; +CREATE INDEX labevents_idx02 + ON labevents (specimen_id); + +DROP INDEX IF EXISTS labevents_idx03; +CREATE INDEX labevents_idx03 + ON labevents (itemid); + +-- prescriptions + +DROP INDEX IF EXISTS prescriptions_idx01; +CREATE INDEX prescriptions_idx01 + ON prescriptions (starttime, stoptime); + +DROP INDEX IF EXISTS prescriptions_idx02; +CREATE INDEX prescriptions_idx02 + ON prescriptions (ndc); + +-- emar + +DROP INDEX IF EXISTS emar_idx01; +CREATE INDEX emar_idx01 + ON emar (poe_id); + +DROP INDEX IF EXISTS emar_idx02; +CREATE INDEX emar_idx02 + ON emar (pharmacy_id); + +DROP INDEX IF EXISTS emar_idx03; +CREATE INDEX emar_idx03 + ON emar (charttime, scheduletime, storetime); + +DROP INDEX IF EXISTS emar_idx04; +CREATE INDEX emar_idx04 + ON emar (medication); + +--------- +-- icu -- +--------- + +SET search_path TO nw_icu; + +-- chartevents + +DROP INDEX IF EXISTS chartevents_idx01; +CREATE INDEX chartevents_idx01 + ON chartevents (charttime, itemid); + +DROP INDEX IF EXISTS chartevents_idx02; +CREATE INDEX chartevents_idx02 + ON chartevents (itemid); + +-- d_items + +DROP INDEX IF EXISTS d_items_idx01; +CREATE INDEX d_items_idx01 + ON d_items (label, abbreviation); + +DROP INDEX IF EXISTS d_items_idx02; +CREATE INDEX d_items_idx02 + ON d_items (category); + +DROP INDEX IF EXISTS d_items_idx03; +CREATE INDEX d_items_idx03 + ON d_items (itemid); + +-- icustays + +DROP INDEX IF EXISTS icustays_idx01; +CREATE INDEX icustays_idx01 + ON icustays (first_careunit, last_careunit); + +DROP INDEX IF EXISTS icustays_idx02; +CREATE INDEX icustays_idx02 + ON icustays (intime, outtime); + +-- procedureevents + +DROP INDEX IF EXISTS procedureevents_idx01; +CREATE INDEX procedureevents_idx01 + ON procedureevents (starttime, itemid); + +DROP INDEX IF EXISTS procedureevents_idx02; +CREATE INDEX procedureevents_idx02 + ON procedureevents (ordercategoryname); + +DROP INDEX IF EXISTS procedureevents_idx03; +CREATE INDEX procedureevents_idx03 + ON procedureevents (itemid); diff --git a/nw/buildnw/postgres/load_gz.sql b/nw/buildnw/postgres/load_gz.sql new file mode 100644 index 00000000..af30a4c6 --- /dev/null +++ b/nw/buildnw/postgres/load_gz.sql @@ -0,0 +1,30 @@ +-------------------------------------- +-- Load data into the NW schemas -- +-------------------------------------- + +-- To run from a terminal: +-- psql "dbname= user=" -v nw_data_dir= -f load_gz.sql +\cd :nw_data_dir + +-- making sure that all tables are empty and correct encoding is defined -utf8- +SET CLIENT_ENCODING TO 'utf8'; + +-- hosp schema +\cd nw_hosp + +\COPY nw_hosp.admissions FROM PROGRAM 'gzip -dc admissions.csv.gz' DELIMITER ',' CSV HEADER NULL ''; +\COPY nw_hosp.patients FROM PROGRAM 'gzip -dc patients.csv.gz' DELIMITER ',' CSV HEADER NULL ''; +\COPY nw_hosp.d_icd_diagnoses FROM PROGRAM 'gzip -dc d_icd_diagnoses.csv.gz' DELIMITER ',' CSV HEADER NULL ''; +\COPY nw_hosp.d_labitems FROM PROGRAM 'gzip -dc d_labitems.csv.gz' DELIMITER ',' CSV HEADER NULL ''; +\COPY nw_hosp.diagnoses_icd FROM PROGRAM 'gzip -dc diagnoses_icd.csv.gz' DELIMITER ',' CSV HEADER NULL ''; +\COPY nw_hosp.emar FROM PROGRAM 'gzip -dc emar.csv.gz' DELIMITER ',' CSV HEADER NULL ''; +\COPY nw_hosp.labevents FROM PROGRAM 'gzip -dc labevents.csv.gz' DELIMITER ',' CSV HEADER NULL ''; +\COPY nw_hosp.prescriptions FROM PROGRAM 'gzip -dc prescriptions.csv.gz' DELIMITER ',' CSV HEADER NULL ''; + +-- icu schema +\cd ../nw_icu + +\COPY nw_icu.chartevents FROM PROGRAM 'gzip -dc chartevents.csv.gz' DELIMITER ',' CSV HEADER NULL ''; +\COPY nw_icu.d_items FROM PROGRAM 'gzip -dc d_items.csv.gz' DELIMITER ',' CSV HEADER NULL ''; +\COPY nw_icu.icustays FROM PROGRAM 'gzip -dc icustays.csv.gz' DELIMITER ',' CSV HEADER NULL ''; +\COPY nw_icu.procedureevents FROM PROGRAM 'gzip -dc procedureevents.csv.gz' DELIMITER ',' CSV HEADER NULL ''; diff --git a/nw/buildnw/postgres/postgres_env.sh b/nw/buildnw/postgres/postgres_env.sh new file mode 100644 index 00000000..51e6fc70 --- /dev/null +++ b/nw/buildnw/postgres/postgres_env.sh @@ -0,0 +1,17 @@ +#!/bin/bash +# postgres_env.sh: Set PostgreSQL environment variables for your session +# Usage: source postgres_env.sh [user] [password] [database] [host] [port] +# You can override the defaults by passing arguments as shown above. + +export PGUSER="${1:-your_user}" +export PGPASSWORD="${2:-your_password}" +export PGDATABASE="${3:-nw}" +export PGHOST="${4:-localhost}" +export PGPORT="${5:-5432}" + +echo "PostgreSQL environment variables set:" +echo " PGUSER=$PGUSER" +echo " PGPASSWORD=********" +echo " PGDATABASE=$PGDATABASE" +echo " PGHOST=$PGHOST" +echo " PGPORT=$PGPORT" diff --git a/nw/buildnw/postgres/validate.sql b/nw/buildnw/postgres/validate.sql new file mode 100644 index 00000000..41ecb537 --- /dev/null +++ b/nw/buildnw/postgres/validate.sql @@ -0,0 +1,45 @@ +-- Validate the NW built correctly by checking against known row counts +WITH expected AS ( + SELECT 'admissions' AS tbl, 61843 AS row_count UNION ALL + SELECT 'patients' AS tbl, 25923 AS row_count UNION ALL + SELECT 'd_icd_diagnoses' AS tbl, 73958 AS row_count UNION ALL + SELECT 'diagnoses_icd' AS tbl, 371807 AS row_count UNION ALL + SELECT 'd_labitems' AS tbl, 256 AS row_count UNION ALL + SELECT 'labevents' AS tbl, 16668451 AS row_count UNION ALL + SELECT 'prescriptions' AS tbl, 1852983 AS row_count UNION ALL + SELECT 'emar' AS tbl, 19196614 AS row_count UNION ALL + -- icu data + SELECT 'icustays' AS tbl, 28612 AS row_count UNION ALL + SELECT 'd_items' AS tbl, 344 AS row_count UNION ALL + SELECT 'chartevents' AS tbl, 9619759 AS row_count UNION ALL + SELECT 'procedureevents' AS tbl, 1017891 AS row_count +), +observed AS ( + SELECT 'admissions' AS tbl, count(*) AS row_count FROM nw_hosp.admissions UNION ALL + SELECT 'patients' AS tbl, count(*) AS row_count FROM nw_hosp.patients UNION ALL + SELECT 'd_icd_diagnoses' AS tbl, count(*) AS row_count FROM nw_hosp.d_icd_diagnoses UNION ALL + SELECT 'diagnoses_icd' AS tbl, count(*) AS row_count FROM nw_hosp.diagnoses_icd UNION ALL + SELECT 'd_labitems' AS tbl, count(*) AS row_count FROM nw_hosp.d_labitems UNION ALL + SELECT 'labevents' AS tbl, count(*) AS row_count FROM nw_hosp.labevents UNION ALL + SELECT 'prescriptions' AS tbl, count(*) AS row_count FROM nw_hosp.prescriptions UNION ALL + SELECT 'emar' AS tbl, count(*) AS row_count FROM nw_hosp.emar UNION ALL + -- icu data + SELECT 'icustays' AS tbl, count(*) AS row_count FROM nw_icu.icustays UNION ALL + SELECT 'd_items' AS tbl, count(*) AS row_count FROM nw_icu.d_items UNION ALL + SELECT 'chartevents' AS tbl, count(*) AS row_count FROM nw_icu.chartevents UNION ALL + SELECT 'procedureevents' AS tbl, count(*) AS row_count FROM nw_icu.procedureevents +) +SELECT + exp.tbl + , exp.row_count AS expected_count + , obs.row_count AS observed_count + , CASE + WHEN exp.row_count = obs.row_count + THEN 'PASSED' + ELSE 'FAILED' + END AS ROW_COUNT_CHECK +FROM expected exp +INNER JOIN observed obs + ON exp.tbl = obs.tbl +ORDER BY exp.tbl +;