Skip to content

Commit

Permalink
Merge branch 'main' into test-microbatch-model
Browse files Browse the repository at this point in the history
  • Loading branch information
andrablaj committed Jan 8, 2025
2 parents 931cc3d + 7bb002b commit 6d5d474
Show file tree
Hide file tree
Showing 16 changed files with 316 additions and 10 deletions.
4 changes: 2 additions & 2 deletions .github/pull_request_template.md
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
# Description

[description]
<!-- DESCRIPTION -->

medic/pipeline#[number]
<!-- ISSUE NUMBER -->

# Code review checklist
<!-- Remove or comment out any items that do not apply to this PR; in the remaining boxes, replace the [ ] with [x]. -->
Expand Down
4 changes: 2 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@

target/
dbt_modules/
dbt_packages/
Expand All @@ -8,6 +7,7 @@ logs/
tests/.user.yml
dbt-env/*
.user.yml
dbt-env/*
.idea
.DS_Store
env/
venv/
7 changes: 7 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,10 @@
# [1.4.0](https://github.com/medic/cht-pipeline/compare/v1.3.1...v1.4.0) (2024-12-06)


### Features

* add users meta base models ([#181](https://github.com/medic/cht-pipeline/issues/181)) ([c5b8285](https://github.com/medic/cht-pipeline/commit/c5b82855c24cd85b90af2b910d456b04650c4f49))

## [1.3.1](https://github.com/medic/cht-pipeline/compare/v1.3.0...v1.3.1) (2024-10-10)


Expand Down
8 changes: 4 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
A set of SQL queries that transform raw CouchDB data into a more useful format. It uses `dbt` to define the models that are translated into PostgreSQL tables or views, which makes it easier to query the data in the analytics platform of choice.
A set of SQL queries that transform raw CouchDB data into a more useful format for analytics. It uses `dbt` to define the models that are translated into PostgreSQL tables or views, which makes it easier to query the data in the analytics platform of choice.

## Local Setup
Follow the instructions in [the Local CHT Sync Setup documentation](https://docs.communityhealthtoolkit.org/apps/guides/data/analytics/setup/) to set up CHT Sync locally.
## Setup
Follow the instructions in [the CHT Sync documentation](https://docs.communityhealthtoolkit.org/hosting/analytics/) to set up the data pipeline.

## Run dbt models unit tests locally

Expand All @@ -11,7 +11,7 @@ Follow the instructions in [the Local CHT Sync Setup documentation](https://docs
### Run the tests

1. Navigate to `tests` folder.
2. Run the test script
2. Run the test script:

```sh
# set environment variables, install dbt dependencies, seed data, run dbt, run test
Expand Down
34 changes: 34 additions & 0 deletions models/users/feedback.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
{% set COLUMNS = 'columns' %}
{{
config(
materialized = 'incremental',
unique_key='uuid',
on_schema_change='append_new_columns',
indexes=[
{COLUMNS: ['uuid'], 'type': 'hash'},
{COLUMNS: ['saved_timestamp']},
{COLUMNS: ['period_start']},
{COLUMNS: ['user_name']},
]
)
}}

SELECT
document_metadata.uuid as uuid,
document_metadata.saved_timestamp,
doc#>>'{meta,source}' AS source,
doc#>>'{meta,url}' AS url,
doc#>>'{meta,user,name}' AS user_name,
doc#>>'{meta,time}' AS period_start,
COALESCE(doc#>>'{info,cause}',doc->>'info') AS cause,
doc#>>'{info,message}' AS message
FROM {{ ref('document_metadata') }} document_metadata
INNER JOIN
{{ source('couchdb', env_var('POSTGRES_TABLE')) }} source_table
ON source_table._id = document_metadata.uuid
WHERE
document_metadata.doc_type = 'feedback'
AND document_metadata._deleted = false
{% if is_incremental() %}
AND document_metadata.saved_timestamp >= {{ max_existing_timestamp('saved_timestamp') }}
{% endif %}
55 changes: 55 additions & 0 deletions models/users/telemetry.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
{% set COLUMNS = 'columns' %}
{{
config(
materialized = 'incremental',
unique_key='uuid',
on_schema_change='append_new_columns',
indexes=[
{COLUMNS: ['uuid'], 'type': 'hash'},
{COLUMNS: ['saved_timestamp']},
{COLUMNS: ['period_start']},
{COLUMNS: ['user_name']},
{COLUMNS: ['app_version']},
]
)
}}

SELECT
document_metadata.uuid as uuid,
document_metadata.saved_timestamp,
CONCAT_WS( --> Date concatenation from JSON fields, eg. 2021-5-17
'-',
doc#>>'{metadata,year}', --> year
CASE --> month of the year
WHEN
string_to_array(substring(doc#>>'{metadata,versions,app}' FROM '(\d+.\d+.\d+)'),'.')::int[] < '{3,8,0}'::int[]
THEN
(doc#>>'{metadata,month}')::int+1 --> Legacy, months zero-indexed (0 - 11)
ELSE
(doc#>>'{metadata,month}')::int --> Month is between 1 - 12
END,
CASE --> day of the month, else 1
WHEN
(doc#>>'{metadata,day}') IS NOT NULL
THEN
doc#>>'{metadata,day}'
ELSE
'1'
END
)::timestamptz AS period_start,
doc#>>'{metadata,user}' AS user_name,
doc#>>'{metadata,versions,app}' AS app_version,
doc#>>'{metrics,boot_time,min}' AS boot_time_min,
doc#>>'{metrics,boot_time,max}' AS boot_time_max,
doc#>>'{metrics,boot_time,count}' AS boot_time_count,
doc#>>'{dbInfo,doc_count}' AS doc_count_on_local_db
FROM {{ ref('document_metadata') }} document_metadata
INNER JOIN
{{ source('couchdb', env_var('POSTGRES_TABLE')) }} source_table
ON source_table._id = document_metadata.uuid
WHERE
document_metadata.doc_type = 'telemetry'
AND document_metadata._deleted = false
{% if is_incremental() %}
AND document_metadata.saved_timestamp >= {{ max_existing_timestamp('saved_timestamp') }}
{% endif %}
35 changes: 35 additions & 0 deletions models/users/telemetry_devices.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
{% set COLUMNS = 'columns' %}
{{
config(
materialized = 'incremental',
unique_key='uuid',
on_schema_change='append_new_columns',
indexes=[
{COLUMNS: ['uuid'], 'type': 'hash'},
{COLUMNS: ['saved_timestamp']},
{COLUMNS: ['period_start']},
{COLUMNS: ['android_version']},
]
)
}}

SELECT
telemetry.uuid,
telemetry.saved_timestamp,
telemetry.period_start,
doc #>> '{device,deviceInfo,hardware,manufacturer}' AS device_manufacturer,
doc #>> '{device,deviceInfo,hardware,model}' AS device_model,
doc #>> '{device,userAgent}' AS user_agent,
doc #>> '{device,deviceInfo,app,version}' AS cht_android_version,
doc #>> '{device,deviceInfo,software,androidVersion}' AS android_version,
doc #>> '{device,deviceInfo,storage,free}' AS storage_free,
doc #>> '{device,deviceInfo,storage,total}' AS storage_total,
doc #>> '{device,deviceInfo,network,upSpeed}' AS network_up_speed,
doc #>> '{device,deviceInfo,network,downSpeed}' AS network_down_speed
FROM {{ ref('telemetry') }} telemetry
INNER JOIN
{{ source('couchdb', env_var('POSTGRES_TABLE')) }} source_table
ON source_table._id = telemetry.uuid
{% if is_incremental() %}
WHERE telemetry.saved_timestamp >= {{ max_existing_timestamp('saved_timestamp') }}
{% endif %}
19 changes: 19 additions & 0 deletions models/users/tests/feedback.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
unit_tests:
- name: test_feedback_model_transformation_and_data_integrity
description: |
This unit test validates the transformation logic in the `feedback` model and ensures data integrity.
It uses fixture data for both `document_metadata` and `source_table` to test the complete logic.
model: feedback
overrides:
macros:
is_incremental: false
given:
- input: ref('document_metadata')
format: csv
fixture: user_document_metadata_initial
- input: source('couchdb', "{{ env_var('POSTGRES_TABLE') }}")
format: csv
fixture: user_source_table_initial
expect:
format: csv
fixture: feedback_initial_expected
19 changes: 19 additions & 0 deletions models/users/tests/telemetry.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
unit_tests:
- name: test_telemetry_model_transformation_and_data_integrity
description: |
This unit test validates the transformation logic in the `telemetry` model and ensures data integrity.
It uses fixture data for both `document_metadata` and `source_table` to test the complete logic.
model: telemetry
overrides:
macros:
is_incremental: false
given:
- input: ref('document_metadata')
format: csv
fixture: user_document_metadata_initial
- input: source('couchdb', "{{ env_var('POSTGRES_TABLE') }}")
format: csv
fixture: user_source_table_initial
expect:
format: csv
fixture: telemetry_initial_expected
19 changes: 19 additions & 0 deletions models/users/tests/telemetry_devices.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
unit_tests:
- name: test_telemetry_devices_model_transformation_and_data_integrity
description: |
This unit test validates the transformation logic in the `telemetry_devices` model and ensures data integrity.
It uses fixture data for both `telemtry` and `source_table` to test the complete logic.
model: telemetry_devices
overrides:
macros:
is_incremental: false
given:
- input: ref('telemetry')
format: csv
fixture: telemetry_initial_expected
- input: source('couchdb', "{{ env_var('POSTGRES_TABLE') }}")
format: csv
fixture: user_source_table_initial
expect:
format: csv
fixture: telemetry_devices_initial_expected
106 changes: 106 additions & 0 deletions models/users/user.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,3 +32,109 @@ models:
data_type: string
- name: roles
data_type: string
- name: feedback
config:
contract:
enforced: true
columns:
- name: uuid
data_type: string
constraints:
- type: unique
- type: foreign_key
expression: "{{ env_var('POSTGRES_SCHEMA') }}.document_metadata (uuid) ON DELETE CASCADE"
data_tests:
- not_null
- relationships:
to: ref('document_metadata')
field: uuid
- name: saved_timestamp
data_type: timestamp
data_tests:
- not_null
- name: source
data_type: string
- name: url
data_type: string
- name: user_name
data_type: string
- name: period_start
data_type: string
- name: cause
data_type: string
- name: message
data_type: string
- name: telemetry
config:
contract:
enforced: true
columns:
- name: uuid
data_type: string
constraints:
- type: unique
- type: foreign_key
expression: "{{ env_var('POSTGRES_SCHEMA') }}.document_metadata (uuid) ON DELETE CASCADE"
data_tests:
- not_null
- relationships:
to: ref('document_metadata')
field: uuid
- name: saved_timestamp
data_type: timestamp
data_tests:
- not_null
- name: period_start
data_type: timestamp with time zone
- name: user_name
data_type: string
- name: app_version
data_type: string
- name: boot_time_min
data_type: string
- name: boot_time_max
data_type: string
- name: boot_time_count
data_type: string
- name: doc_count_on_local_db
data_type: string
- name: telemetry_devices
config:
contract:
enforced: true
columns:
- name: uuid
data_type: string
constraints:
- type: unique
- type: foreign_key
expression: "{{ env_var('POSTGRES_SCHEMA') }}.document_metadata (uuid) ON DELETE CASCADE"
data_tests:
- not_null
- relationships:
to: ref('document_metadata')
field: uuid
- name: saved_timestamp
data_type: timestamp
data_tests:
- not_null
- name: period_start
data_type: timestamp with time zone
- name: device_manufacturer
data_type: string
- name: device_model
data_type: string
- name: user_agent
data_type: string
- name: cht_android_version
data_type: string
- name: android_version
data_type: string
- name: storage_free
data_type: string
- name: storage_total
data_type: string
- name: network_up_speed
data_type: string
- name: network_down_speed
data_type: string
2 changes: 2 additions & 0 deletions tests/fixtures/user/feedback_initial_expected.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
uuid,saved_timestamp,source,url,user_name,period_start,cause,message
6,2024-08-02 00:00:00,automatic,http://example.com,admin,2024-08-02 00:00:00,bug,this is a bug
2 changes: 2 additions & 0 deletions tests/fixtures/user/telemetry_devices_initial_expected.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
uuid,saved_timestamp,period_start,device_manufacturer,device_model,user_agent,cht_android_version,android_version,storage_free,storage_total,network_up_speed,network_down_speed
8,2024-08-02 00:00:00,2024-09-11,Google,Pixel 3,Chrome Mobile,4.15.0,10,100,200,100,200
2 changes: 2 additions & 0 deletions tests/fixtures/user/telemetry_initial_expected.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
uuid,saved_timestamp,period_start,user_name,app_version,boot_time_min,boot_time_max,boot_time_count,doc_count_on_local_db
8,2024-08-02 00:00:00,2024-09-11,admin,4.15.0,0.5,1.5,2,20
5 changes: 4 additions & 1 deletion tests/fixtures/user/user_document_metadata_initial.csv
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,7 @@ uuid,_deleted,saved_timestamp,doc_type
3,false,2024-08-02 00:00:00,user-settings
4,false,2024-08-02 00:00:00,user-settings
5,true,2024-08-02 00:00:00,other-type

6,false,2024-08-02 00:00:00,feedback
7,true,2024-08-02 00:00:00,feedback
8,false,2024-08-02 00:00:00,telemetry
9,true,2024-08-02 00:00:00,telemetry
5 changes: 4 additions & 1 deletion tests/fixtures/user/user_source_table_initial.csv
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,7 @@ saved_timestamp,_id,_deleted,doc
2024-08-02 00:00:00,3,false,"{""type"": ""user-settings"", ""contact_id"": ""1003"", ""language"": ""es"", ""roles"": ""guest""}"
2024-08-02 00:00:00,4,false,"{""type"": ""user-settings"", ""contact_id"": ""1004"", ""language"": ""de"", ""roles"": ""user""}"
2024-08-02 00:00:00,5,true,"{""type"": ""other-type"", ""contact_id"": ""1005"", ""language"": ""it"", ""roles"": ""admin""}"

2024-08-02 00:00:00,6,false,"{""type"": ""feedback"", ""meta"": {""source"": ""automatic"", ""url"": ""http://example.com"", ""time"": ""2024-08-02 00:00:00"", ""user"": {""name"": ""admin""}}, ""info"": {""cause"": ""bug"", ""message"": ""this is a bug""}}"
2024-08-02 00:00:00,7,true,"{""type"": ""feedback"", ""meta"": {""source"": ""automatic"", ""url"": ""http://example.com"", ""time"": ""2024-08-02 00:00:00"", ""user"": {""name"": ""admin""}}, ""info"": {""cause"": ""bug"", ""message"": ""this is a bug""}}"
2024-08-02 00:00:00,8,false,"{""type"": ""telemetry"", ""metadata"": {""day"": ""2024-09-11"", ""user"": ""admin"", ""versions"": {""app"": ""4.15.0""}}, ""metrics"": {""boot_time"": {""min"": 0.5, ""max"": 1.5, ""count"": 2}}, ""dbInfo"": {""doc_count"": 20}, ""device"": {""userAgent"": ""Chrome Mobile"", ""deviceInfo"": {""hardware"": {""manufacturer"": ""Google"", ""model"": ""Pixel 3""}, ""app"": {""version"": ""4.15.0""}, ""software"": {""androidVersion"": ""10""}, ""os"": {""version"": ""10"", ""sdkInt"": 29}, ""storage"": {""free"": 100, ""total"": 200}, ""network"": {""upSpeed"": 100, ""downSpeed"": 200}}}}"
2024-08-02 00:00:00,9,true,"{""type"": ""telemetry"", ""metadata"": {""day"": ""2024-09-12"", ""user"": ""admin2"", ""versions"": {""app"": ""4.13.0""}}, ""metrics"": {""boot_time"": {""min"": 1, ""max"": 2.5, ""count"": 2}}, ""dbInfo"": {""doc_count"": 20}, ""device"": {""userAgent"": ""Chrome Mobile"", ""deviceInfo"": {""hardware"": {""manufacturer"": ""Google"", ""model"": ""Pixel 2""}, ""app"": {""version"": ""4.13.0""}, ""software"": {""androidVersion"": ""9""}, ""os"": {""version"": ""10"", ""sdkInt"": 29}, ""storage"": {""free"": 100, ""total"": 200}, ""network"": {""upSpeed"": 100, ""downSpeed"": 200}}}}"

0 comments on commit 6d5d474

Please sign in to comment.