diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md index 35f3aa88..b5133a56 100644 --- a/.github/pull_request_template.md +++ b/.github/pull_request_template.md @@ -1,8 +1,8 @@ # Description -[description] + -medic/pipeline#[number] + # Code review checklist diff --git a/.gitignore b/.gitignore index 3001e71e..39765e35 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,3 @@ - target/ dbt_modules/ dbt_packages/ @@ -8,6 +7,7 @@ logs/ tests/.user.yml dbt-env/* .user.yml -dbt-env/* .idea .DS_Store +env/ +venv/ diff --git a/CHANGELOG.md b/CHANGELOG.md index 1291244e..dba8a7a3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,10 @@ +# [1.4.0](https://github.com/medic/cht-pipeline/compare/v1.3.1...v1.4.0) (2024-12-06) + + +### Features + +* add users meta base models ([#181](https://github.com/medic/cht-pipeline/issues/181)) ([c5b8285](https://github.com/medic/cht-pipeline/commit/c5b82855c24cd85b90af2b910d456b04650c4f49)) + ## [1.3.1](https://github.com/medic/cht-pipeline/compare/v1.3.0...v1.3.1) (2024-10-10) diff --git a/README.md b/README.md index e7f2e5c2..725c029e 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ -A set of SQL queries that transform raw CouchDB data into a more useful format. It uses `dbt` to define the models that are translated into PostgreSQL tables or views, which makes it easier to query the data in the analytics platform of choice. +A set of SQL queries that transform raw CouchDB data into a more useful format for analytics. It uses `dbt` to define the models that are translated into PostgreSQL tables or views, which makes it easier to query the data in the analytics platform of choice. -## Local Setup -Follow the instructions in [the Local CHT Sync Setup documentation](https://docs.communityhealthtoolkit.org/apps/guides/data/analytics/setup/) to set up CHT Sync locally. +## Setup +Follow the instructions in [the CHT Sync documentation](https://docs.communityhealthtoolkit.org/hosting/analytics/) to set up the data pipeline. ## Run dbt models unit tests locally @@ -11,7 +11,7 @@ Follow the instructions in [the Local CHT Sync Setup documentation](https://docs ### Run the tests 1. Navigate to `tests` folder. -2. Run the test script +2. Run the test script: ```sh # set environment variables, install dbt dependencies, seed data, run dbt, run test diff --git a/models/users/feedback.sql b/models/users/feedback.sql new file mode 100644 index 00000000..4408d826 --- /dev/null +++ b/models/users/feedback.sql @@ -0,0 +1,34 @@ +{% set COLUMNS = 'columns' %} +{{ + config( + materialized = 'incremental', + unique_key='uuid', + on_schema_change='append_new_columns', + indexes=[ + {COLUMNS: ['uuid'], 'type': 'hash'}, + {COLUMNS: ['saved_timestamp']}, + {COLUMNS: ['period_start']}, + {COLUMNS: ['user_name']}, + ] + ) +}} + +SELECT + document_metadata.uuid as uuid, + document_metadata.saved_timestamp, + doc#>>'{meta,source}' AS source, + doc#>>'{meta,url}' AS url, + doc#>>'{meta,user,name}' AS user_name, + doc#>>'{meta,time}' AS period_start, + COALESCE(doc#>>'{info,cause}',doc->>'info') AS cause, + doc#>>'{info,message}' AS message +FROM {{ ref('document_metadata') }} document_metadata +INNER JOIN + {{ source('couchdb', env_var('POSTGRES_TABLE')) }} source_table + ON source_table._id = document_metadata.uuid +WHERE + document_metadata.doc_type = 'feedback' + AND document_metadata._deleted = false +{% if is_incremental() %} + AND document_metadata.saved_timestamp >= {{ max_existing_timestamp('saved_timestamp') }} +{% endif %} diff --git a/models/users/telemetry.sql b/models/users/telemetry.sql new file mode 100644 index 00000000..12566304 --- /dev/null +++ b/models/users/telemetry.sql @@ -0,0 +1,55 @@ +{% set COLUMNS = 'columns' %} +{{ + config( + materialized = 'incremental', + unique_key='uuid', + on_schema_change='append_new_columns', + indexes=[ + {COLUMNS: ['uuid'], 'type': 'hash'}, + {COLUMNS: ['saved_timestamp']}, + {COLUMNS: ['period_start']}, + {COLUMNS: ['user_name']}, + {COLUMNS: ['app_version']}, + ] + ) +}} + +SELECT + document_metadata.uuid as uuid, + document_metadata.saved_timestamp, + CONCAT_WS( --> Date concatenation from JSON fields, eg. 2021-5-17 + '-', + doc#>>'{metadata,year}', --> year + CASE --> month of the year + WHEN + string_to_array(substring(doc#>>'{metadata,versions,app}' FROM '(\d+.\d+.\d+)'),'.')::int[] < '{3,8,0}'::int[] + THEN + (doc#>>'{metadata,month}')::int+1 --> Legacy, months zero-indexed (0 - 11) + ELSE + (doc#>>'{metadata,month}')::int --> Month is between 1 - 12 + END, + CASE --> day of the month, else 1 + WHEN + (doc#>>'{metadata,day}') IS NOT NULL + THEN + doc#>>'{metadata,day}' + ELSE + '1' + END + )::timestamptz AS period_start, + doc#>>'{metadata,user}' AS user_name, + doc#>>'{metadata,versions,app}' AS app_version, + doc#>>'{metrics,boot_time,min}' AS boot_time_min, + doc#>>'{metrics,boot_time,max}' AS boot_time_max, + doc#>>'{metrics,boot_time,count}' AS boot_time_count, + doc#>>'{dbInfo,doc_count}' AS doc_count_on_local_db +FROM {{ ref('document_metadata') }} document_metadata +INNER JOIN + {{ source('couchdb', env_var('POSTGRES_TABLE')) }} source_table + ON source_table._id = document_metadata.uuid +WHERE + document_metadata.doc_type = 'telemetry' + AND document_metadata._deleted = false +{% if is_incremental() %} + AND document_metadata.saved_timestamp >= {{ max_existing_timestamp('saved_timestamp') }} +{% endif %} diff --git a/models/users/telemetry_devices.sql b/models/users/telemetry_devices.sql new file mode 100644 index 00000000..38f396d8 --- /dev/null +++ b/models/users/telemetry_devices.sql @@ -0,0 +1,35 @@ +{% set COLUMNS = 'columns' %} +{{ + config( + materialized = 'incremental', + unique_key='uuid', + on_schema_change='append_new_columns', + indexes=[ + {COLUMNS: ['uuid'], 'type': 'hash'}, + {COLUMNS: ['saved_timestamp']}, + {COLUMNS: ['period_start']}, + {COLUMNS: ['android_version']}, + ] + ) +}} + +SELECT + telemetry.uuid, + telemetry.saved_timestamp, + telemetry.period_start, + doc #>> '{device,deviceInfo,hardware,manufacturer}' AS device_manufacturer, + doc #>> '{device,deviceInfo,hardware,model}' AS device_model, + doc #>> '{device,userAgent}' AS user_agent, + doc #>> '{device,deviceInfo,app,version}' AS cht_android_version, + doc #>> '{device,deviceInfo,software,androidVersion}' AS android_version, + doc #>> '{device,deviceInfo,storage,free}' AS storage_free, + doc #>> '{device,deviceInfo,storage,total}' AS storage_total, + doc #>> '{device,deviceInfo,network,upSpeed}' AS network_up_speed, + doc #>> '{device,deviceInfo,network,downSpeed}' AS network_down_speed +FROM {{ ref('telemetry') }} telemetry +INNER JOIN + {{ source('couchdb', env_var('POSTGRES_TABLE')) }} source_table + ON source_table._id = telemetry.uuid +{% if is_incremental() %} + WHERE telemetry.saved_timestamp >= {{ max_existing_timestamp('saved_timestamp') }} +{% endif %} diff --git a/models/users/tests/feedback.yml b/models/users/tests/feedback.yml new file mode 100644 index 00000000..0fb0f188 --- /dev/null +++ b/models/users/tests/feedback.yml @@ -0,0 +1,19 @@ +unit_tests: + - name: test_feedback_model_transformation_and_data_integrity + description: | + This unit test validates the transformation logic in the `feedback` model and ensures data integrity. + It uses fixture data for both `document_metadata` and `source_table` to test the complete logic. + model: feedback + overrides: + macros: + is_incremental: false + given: + - input: ref('document_metadata') + format: csv + fixture: user_document_metadata_initial + - input: source('couchdb', "{{ env_var('POSTGRES_TABLE') }}") + format: csv + fixture: user_source_table_initial + expect: + format: csv + fixture: feedback_initial_expected diff --git a/models/users/tests/telemetry.yml b/models/users/tests/telemetry.yml new file mode 100644 index 00000000..cd8116c9 --- /dev/null +++ b/models/users/tests/telemetry.yml @@ -0,0 +1,19 @@ +unit_tests: + - name: test_telemetry_model_transformation_and_data_integrity + description: | + This unit test validates the transformation logic in the `telemetry` model and ensures data integrity. + It uses fixture data for both `document_metadata` and `source_table` to test the complete logic. + model: telemetry + overrides: + macros: + is_incremental: false + given: + - input: ref('document_metadata') + format: csv + fixture: user_document_metadata_initial + - input: source('couchdb', "{{ env_var('POSTGRES_TABLE') }}") + format: csv + fixture: user_source_table_initial + expect: + format: csv + fixture: telemetry_initial_expected diff --git a/models/users/tests/telemetry_devices.yml b/models/users/tests/telemetry_devices.yml new file mode 100644 index 00000000..3e3c12d1 --- /dev/null +++ b/models/users/tests/telemetry_devices.yml @@ -0,0 +1,19 @@ +unit_tests: + - name: test_telemetry_devices_model_transformation_and_data_integrity + description: | + This unit test validates the transformation logic in the `telemetry_devices` model and ensures data integrity. + It uses fixture data for both `telemtry` and `source_table` to test the complete logic. + model: telemetry_devices + overrides: + macros: + is_incremental: false + given: + - input: ref('telemetry') + format: csv + fixture: telemetry_initial_expected + - input: source('couchdb', "{{ env_var('POSTGRES_TABLE') }}") + format: csv + fixture: user_source_table_initial + expect: + format: csv + fixture: telemetry_devices_initial_expected diff --git a/models/users/user.yml b/models/users/user.yml index f6d49e5d..eb133a65 100644 --- a/models/users/user.yml +++ b/models/users/user.yml @@ -32,3 +32,109 @@ models: data_type: string - name: roles data_type: string + - name: feedback + config: + contract: + enforced: true + columns: + - name: uuid + data_type: string + constraints: + - type: unique + - type: foreign_key + expression: "{{ env_var('POSTGRES_SCHEMA') }}.document_metadata (uuid) ON DELETE CASCADE" + data_tests: + - not_null + - relationships: + to: ref('document_metadata') + field: uuid + - name: saved_timestamp + data_type: timestamp + data_tests: + - not_null + - name: source + data_type: string + - name: url + data_type: string + - name: user_name + data_type: string + - name: period_start + data_type: string + - name: cause + data_type: string + - name: message + data_type: string + - name: telemetry + config: + contract: + enforced: true + columns: + - name: uuid + data_type: string + constraints: + - type: unique + - type: foreign_key + expression: "{{ env_var('POSTGRES_SCHEMA') }}.document_metadata (uuid) ON DELETE CASCADE" + data_tests: + - not_null + - relationships: + to: ref('document_metadata') + field: uuid + - name: saved_timestamp + data_type: timestamp + data_tests: + - not_null + - name: period_start + data_type: timestamp with time zone + - name: user_name + data_type: string + - name: app_version + data_type: string + - name: boot_time_min + data_type: string + - name: boot_time_max + data_type: string + - name: boot_time_count + data_type: string + - name: doc_count_on_local_db + data_type: string + - name: telemetry_devices + config: + contract: + enforced: true + columns: + - name: uuid + data_type: string + constraints: + - type: unique + - type: foreign_key + expression: "{{ env_var('POSTGRES_SCHEMA') }}.document_metadata (uuid) ON DELETE CASCADE" + data_tests: + - not_null + - relationships: + to: ref('document_metadata') + field: uuid + - name: saved_timestamp + data_type: timestamp + data_tests: + - not_null + - name: period_start + data_type: timestamp with time zone + - name: device_manufacturer + data_type: string + - name: device_model + data_type: string + - name: user_agent + data_type: string + - name: cht_android_version + data_type: string + - name: android_version + data_type: string + - name: storage_free + data_type: string + - name: storage_total + data_type: string + - name: network_up_speed + data_type: string + - name: network_down_speed + data_type: string diff --git a/tests/fixtures/user/feedback_initial_expected.csv b/tests/fixtures/user/feedback_initial_expected.csv new file mode 100644 index 00000000..2f5d78b2 --- /dev/null +++ b/tests/fixtures/user/feedback_initial_expected.csv @@ -0,0 +1,2 @@ +uuid,saved_timestamp,source,url,user_name,period_start,cause,message +6,2024-08-02 00:00:00,automatic,http://example.com,admin,2024-08-02 00:00:00,bug,this is a bug diff --git a/tests/fixtures/user/telemetry_devices_initial_expected.csv b/tests/fixtures/user/telemetry_devices_initial_expected.csv new file mode 100644 index 00000000..24aa1944 --- /dev/null +++ b/tests/fixtures/user/telemetry_devices_initial_expected.csv @@ -0,0 +1,2 @@ +uuid,saved_timestamp,period_start,device_manufacturer,device_model,user_agent,cht_android_version,android_version,storage_free,storage_total,network_up_speed,network_down_speed +8,2024-08-02 00:00:00,2024-09-11,Google,Pixel 3,Chrome Mobile,4.15.0,10,100,200,100,200 diff --git a/tests/fixtures/user/telemetry_initial_expected.csv b/tests/fixtures/user/telemetry_initial_expected.csv new file mode 100644 index 00000000..18d3429c --- /dev/null +++ b/tests/fixtures/user/telemetry_initial_expected.csv @@ -0,0 +1,2 @@ +uuid,saved_timestamp,period_start,user_name,app_version,boot_time_min,boot_time_max,boot_time_count,doc_count_on_local_db +8,2024-08-02 00:00:00,2024-09-11,admin,4.15.0,0.5,1.5,2,20 diff --git a/tests/fixtures/user/user_document_metadata_initial.csv b/tests/fixtures/user/user_document_metadata_initial.csv index 46d2dcc1..404dd5e1 100644 --- a/tests/fixtures/user/user_document_metadata_initial.csv +++ b/tests/fixtures/user/user_document_metadata_initial.csv @@ -4,4 +4,7 @@ uuid,_deleted,saved_timestamp,doc_type 3,false,2024-08-02 00:00:00,user-settings 4,false,2024-08-02 00:00:00,user-settings 5,true,2024-08-02 00:00:00,other-type - +6,false,2024-08-02 00:00:00,feedback +7,true,2024-08-02 00:00:00,feedback +8,false,2024-08-02 00:00:00,telemetry +9,true,2024-08-02 00:00:00,telemetry diff --git a/tests/fixtures/user/user_source_table_initial.csv b/tests/fixtures/user/user_source_table_initial.csv index 346efbe8..eadc1273 100644 --- a/tests/fixtures/user/user_source_table_initial.csv +++ b/tests/fixtures/user/user_source_table_initial.csv @@ -4,4 +4,7 @@ saved_timestamp,_id,_deleted,doc 2024-08-02 00:00:00,3,false,"{""type"": ""user-settings"", ""contact_id"": ""1003"", ""language"": ""es"", ""roles"": ""guest""}" 2024-08-02 00:00:00,4,false,"{""type"": ""user-settings"", ""contact_id"": ""1004"", ""language"": ""de"", ""roles"": ""user""}" 2024-08-02 00:00:00,5,true,"{""type"": ""other-type"", ""contact_id"": ""1005"", ""language"": ""it"", ""roles"": ""admin""}" - +2024-08-02 00:00:00,6,false,"{""type"": ""feedback"", ""meta"": {""source"": ""automatic"", ""url"": ""http://example.com"", ""time"": ""2024-08-02 00:00:00"", ""user"": {""name"": ""admin""}}, ""info"": {""cause"": ""bug"", ""message"": ""this is a bug""}}" +2024-08-02 00:00:00,7,true,"{""type"": ""feedback"", ""meta"": {""source"": ""automatic"", ""url"": ""http://example.com"", ""time"": ""2024-08-02 00:00:00"", ""user"": {""name"": ""admin""}}, ""info"": {""cause"": ""bug"", ""message"": ""this is a bug""}}" +2024-08-02 00:00:00,8,false,"{""type"": ""telemetry"", ""metadata"": {""day"": ""2024-09-11"", ""user"": ""admin"", ""versions"": {""app"": ""4.15.0""}}, ""metrics"": {""boot_time"": {""min"": 0.5, ""max"": 1.5, ""count"": 2}}, ""dbInfo"": {""doc_count"": 20}, ""device"": {""userAgent"": ""Chrome Mobile"", ""deviceInfo"": {""hardware"": {""manufacturer"": ""Google"", ""model"": ""Pixel 3""}, ""app"": {""version"": ""4.15.0""}, ""software"": {""androidVersion"": ""10""}, ""os"": {""version"": ""10"", ""sdkInt"": 29}, ""storage"": {""free"": 100, ""total"": 200}, ""network"": {""upSpeed"": 100, ""downSpeed"": 200}}}}" +2024-08-02 00:00:00,9,true,"{""type"": ""telemetry"", ""metadata"": {""day"": ""2024-09-12"", ""user"": ""admin2"", ""versions"": {""app"": ""4.13.0""}}, ""metrics"": {""boot_time"": {""min"": 1, ""max"": 2.5, ""count"": 2}}, ""dbInfo"": {""doc_count"": 20}, ""device"": {""userAgent"": ""Chrome Mobile"", ""deviceInfo"": {""hardware"": {""manufacturer"": ""Google"", ""model"": ""Pixel 2""}, ""app"": {""version"": ""4.13.0""}, ""software"": {""androidVersion"": ""9""}, ""os"": {""version"": ""10"", ""sdkInt"": 29}, ""storage"": {""free"": 100, ""total"": 200}, ""network"": {""upSpeed"": 100, ""downSpeed"": 200}}}}"