Skip to content

Commit 377d772

Browse files
committed
Temp
WIP revise models and point to duckdb file Update Update
1 parent 605cb52 commit 377d772

14 files changed

Lines changed: 260 additions & 27 deletions

channel_messages_stats.csv

Lines changed: 101 additions & 0 deletions
Large diffs are not rendered by default.

scripts_python/generate_channel_messages_reactions.py

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
"""
88

99
import argparse
10+
import json
1011
import random
1112
from pathlib import Path
1213

@@ -178,13 +179,9 @@ def process_csv(input_path: Path, output_path: Path) -> None:
178179
"""
179180
Read input CSV, generate reactions for each row, and write output CSV.
180181
"""
181-
# Read input CSV
182-
df = pd.read_csv(input_path)
183-
184-
# Generate reactions for each row
185-
df['reactions'] = [str(generate_reactions()) for _ in range(len(df))]
186182

187-
# Write output CSV
183+
df = pd.read_csv(input_path)
184+
df['reactions'] = [json.dumps(generate_reactions()) for _ in range(len(df))]
188185
df.to_csv(output_path, index=False)
189186

190187
print(f"Processed {len(df)} rows")
Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,27 @@
11
{% macro to_date(timestamp, localize=True, timezone=var('local_timezone')) %}
2+
{{ return(adapter.dispatch('to_date', 'slack_analytics')(timestamp, localize, timezone)) }}
3+
{% endmacro %}
4+
5+
{% macro default__to_date(timestamp, localize, timezone) %}
26
{% if localize %}
37
TO_DATE(FROM_UTC_TIMESTAMP({{ timestamp }}, '{{ timezone }}'))
48
{% else %}
59
TO_DATE({{ timestamp }})
610
{% endif %}
7-
{% endmacro %}
11+
{% endmacro %}
12+
13+
{% macro duckdb__to_date(timestamp, localize, timezone) %}
14+
{% if localize %}
15+
CAST(timezone('{{ timezone }}', {{ timestamp }}::TIMESTAMPTZ) AS DATE)
16+
{% else %}
17+
CAST({{ timestamp }} AS DATE)
18+
{% endif %}
19+
{% endmacro %}
20+
21+
{% macro snowflake__to_date(timestamp, localize, timezone) %}
22+
{% if localize %}
23+
TO_DATE(CONVERT_TIMEZONE('UTC', '{{ timezone }}', {{ timestamp }}))
24+
{% else %}
25+
TO_DATE({{ timestamp }})
26+
{% endif %}
27+
{% endmacro %}

shared/projects/dbt/slack_analytics/macros/unnest_array.sql

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,5 +11,5 @@
1111
{% endmacro %}
1212

1313
{% macro duckdb__unnest_array(array_column, alias) %}
14-
cross join unnest({{ array_column }}) as t({{ alias }})
14+
, unnest(from_json({{ array_column }}, '["JSON"]')) as t({{ alias }})
1515
{% endmacro %}

shared/projects/dbt/slack_analytics/models/dimensions/_models.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,6 @@ models:
3434
- name: message_datetime
3535
data_type: timestamp
3636
description: The date and time the message was sent.
37-
- name: extracted_at
37+
- name: extracted_datetime
3838
data_type: timestamp
3939
description: The date and time the message was extracted from the Slack API.

shared/projects/dbt/slack_analytics/models/dimensions/dim_slack_messages.sql

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
materialized='incremental',
44
unique_key='message_id',
55
partition_by=['message_date'],
6-
incremental_strategy='merge'
6+
incremental_strategy='delete+insert'
77
)
88
}}
99

@@ -20,11 +20,11 @@ with staging as (
2020
reactions,
2121
{{ to_date('message_datetime', localize=True, timezone=var('local_timezone')) }} as message_date,
2222
message_datetime,
23-
extracted_at
23+
extracted_datetime
2424
from {{ ref('stg_channel_messages') }}
2525
{% if is_incremental() %}
26-
where extracted_at > (
27-
select max(extracted_at) from {{ this }}
26+
where extracted_datetime > (
27+
select max(extracted_datetime) from {{ this }}
2828
)
2929
{% endif %}
3030
)
@@ -42,11 +42,11 @@ with staging as (
4242
reactions,
4343
message_date,
4444
message_datetime,
45-
extracted_at
45+
extracted_datetime
4646

4747
from staging
4848
where 1=1
49-
qualify row_number() over (partition by message_id order by extracted_at desc) = 1
49+
qualify row_number() over (partition by message_id order by extracted_datetime desc) = 1
5050
)
5151

5252
select * from dimension

shared/projects/dbt/slack_analytics/models/facts/_models.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,6 @@ models:
3131
- name: reaction_user
3232
data_type: string
3333
description: The user_id of the user who used the specified reaction to the message.
34-
- name: extracted_at
34+
- name: extracted_datetime
3535
data_type: timestamp
3636
description: The date and time the message was extracted from the Slack API.

shared/projects/dbt/slack_analytics/models/facts/fct_slack_message_reactions.sql

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,16 +3,16 @@
33
materialized='incremental',
44
unique_key='message_reaction_id',
55
partition_by=['message_date'],
6-
incremental_strategy='merge'
6+
incremental_strategy='delete+insert'
77
)
88
}}
99

1010
-- get new data from staging
1111
with dim_messages as (
1212
select * from {{ ref('dim_slack_messages') }}
1313
{% if is_incremental() %}
14-
where extracted_at > (
15-
select max(extracted_at) from {{ this }}
14+
where extracted_datetime > (
15+
select max(extracted_datetime) from {{ this }}
1616
)
1717
{% endif %}
1818
)
@@ -25,7 +25,7 @@ with dim_messages as (
2525
reaction.name as reaction_name,
2626
message_date,
2727
message_datetime,
28-
extracted_at
28+
extracted_datetime
2929
from dim_messages
3030
{{ unnest_array('reactions', 'reaction') }}
3131
)
@@ -38,7 +38,7 @@ with dim_messages as (
3838
reaction_user,
3939
message_date,
4040
message_datetime,
41-
extracted_at
41+
extracted_datetime
4242
from semi_expanded_reactions
4343
{{ unnest_array('reaction_users', 'reaction_user') }}
4444
)
@@ -57,7 +57,7 @@ with dim_messages as (
5757
else reaction_name
5858
end as reaction_name_normalised,
5959
reaction_user,
60-
extracted_at
60+
extracted_datetime
6161
from expanded_reactions s
6262
)
6363

shared/projects/dbt/slack_analytics/models/staging/_models.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,6 @@ models:
3535
- name: message_datetime
3636
data_type: timestamp
3737
description: The date and time the message was sent.
38-
- name: extracted_at
38+
- name: extracted_datetime
3939
data_type: timestamp
4040
description: The date and time the message was extracted from the Slack API. Part of combined primary key.

shared/projects/dbt/slack_analytics/models/staging/_sources.yml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,8 @@ version: 2
22

33
sources:
44
- name: slack_analytics
5+
database: ""
56
schema: main
67
tables:
78
- name: channel_messages
8-
identifier: src_channel_messages
9+
identifier: channel_messages

0 commit comments

Comments
 (0)