diff --git a/warehouse/models/intermediate/payments/_int_payments.yml b/warehouse/models/intermediate/payments/_int_payments.yml index d3b27a327b..b0e5e8be02 100644 --- a/warehouse/models/intermediate/payments/_int_payments.yml +++ b/warehouse/models/intermediate/payments/_int_payments.yml @@ -120,22 +120,27 @@ models: description: '{{ doc("lp_contains_adjusted_micropayment") }}' - name: int_payments__refunds_deduped description: | - This model is a union of refunds found both in `stg_littlepay__refunds` and `stg_littlepay__micropayments`. - It includes deduplication of refunds that appear multiple times with multiple statuses, to only take the most recent status, + This model is a combination of refunds found in Littlepay refunds and Littlepay micropayments tables. + Refunds usually appear in the refunds table but sometimes appear in the micropayments table in addition or instead (as "credit" micropayments.) + This model includes deduplication of refunds that appear multiple times with multiple statuses, to only take the most recent status, and also deduplicates based on `coalesced_id` which is `retrieval_reference_number` or `aggregation_id` if `retrieval_reference_number` is `NULL`. - Columns have the same meanings as in the upstream staging model. + Columns have the same meanings as in the upstream staging models. The most important test for this model is on the upstream staging model: to ensure that no `retrieval_reference_number` or `aggregation_id` values are fully dropped betweeen that model and this one (i.e., to ensure that all rows dropped do in fact have a duplicate and no refunds are lost.) + data_tests: + - dbt_utils.expression_is_true: + expression: "refund_amount = micropayments_refund_amount" + config: + where: "source_table='both_refunds_and_micropayments'" columns: - name: refund_id - description: The unique identifier for each requested refund. + description: The unique identifier for each requested refund. (Only present if refund was in refunds table.) - name: aggregation_id description: | The aggregation that contains the refund. - A single aggregation can have multiple refunds. - name: micropayment_id description: | @@ -145,7 +150,7 @@ models: - name: customer_id description: Identifies the customer that the micropayment belongs to. - name: refund_amount - description: The amount of the transaction remaining after the refund has been applied (transaction_amount - proposed_amount.) + description: The amount of the transaction to be refunded to the customer. - name: transaction_date description: Settlement transaction date (if micropayment was settled before refund) - name: settlement_id @@ -161,7 +166,7 @@ models: - name: transaction_amount description: The amount of the original fare. - name: proposed_amount - description: The amount to be refunded to the customer. + description: The amount of the transaction after the refund is applied (transaction amount - refund amount.) - name: status description: | The status of the refund. @@ -197,41 +202,63 @@ models: description: This field is not in use. - name: settlement_response_text description: This field is not in use. - - &lp_line_number - name: _line_number + - name: refunds_line_number description: | - Line number of this row in the source file. + Line number of this row in the source refunds file. Some line numbers may be missing because we drop extra copies of rows that are full duplicates of another row. - - name: currency_code + - name: refunds_currency_code + description: ISO 4217 numeric currency code for the amount that was requested to be settled. + - name: refunds_instance + - name: refunds_extract_filename + - name: refunds_ts + - name: refunds_littlepay_export_ts + description: | + Timestamp of the source refunds file from Littlepay. Timestamp is extracted from filenames, which generally have the + structure {timestamp}_{data_type}.{file extension}. + - name: refunds_littlepay_export_date + description: | + Date of the source refunds file from Littlepay. Date is extracted from filenames, which generally have the + structure {timestamp}_{data_type}.{file extension}. + - name: refunds_content_hash + description: | + Hash of all data columns from refunds table to uniquely identify row's content, mostly for debugging purposes. + Should ideally be handled by uniqueness of _payments_key but surfaced for troubleshooting. + - name: refunds_key + description: | + Synthetic key composed of Littlepay file date and line number to uniquely identify a row within source refunds data. + - name: refunds_payments_key + description: | + Synthentic key composed of the elements that define a natural key within the source refunds data (primary key according to Littlepay schema.) + - name: micropayments_line_number + description: | + Line number of this row in the source micropayments file. + Some line numbers may be missing because we drop extra copies of rows that are full duplicates of another row. + - name: micropayments_currency_code description: ISO 4217 numeric currency code for the amount that was requested to be settled. - - name: instance - - name: extract_filename - - name: ts - - &lp_export_ts - name: littlepay_export_ts + - name: micropayments_instance + - name: micropayments_extract_filename + - name: micropayments_ts + - name: micropayments_littlepay_export_ts description: | - Timestamp of the source file from Littlepay. Timestamp is extracted from filenames, which generally have the + Timestamp of the source micropayments file from Littlepay. Timestamp is extracted from filenames, which generally have the structure {timestamp}_{data_type}.{file extension}. - - &lp_export_date - name: littlepay_export_date + - name: micropayments_littlepay_export_date description: | - Date of the source file from Littlepay. Date is extracted from filenames, which generally have the + Date of the source micropayments file from Littlepay. Date is extracted from filenames, which generally have the structure {timestamp}_{data_type}.{file extension}. - - &_content_hash - name: _content_hash + - name: micropayments_content_hash description: | - Hash of all data columns to uniquely identify row's content, mostly for debugging purposes. + Hash of all data columns from micropayments table to uniquely identify row's content, mostly for debugging purposes. Should ideally be handled by uniqueness of _payments_key but surfaced for troubleshooting. - - &payments_input_row_key - name: _key + - name: micropayments_key description: | - Synthetic key composed of Littlepay file date and line number to uniquely identify a row within source data. - - name: _payments_key + Synthetic key composed of Littlepay file date and line number to uniquely identify a row within source micropayments data. + - name: micropayments_payments_key description: | - Synthentic key composed of the elements that define a natural key within the source data (primary key according to Littlepay schema.) + Synthentic key composed of the elements that define a natural key within the source micropayments data (primary key according to Littlepay schema.) - name: source_table description: | - The table the refund was initially present in (either `stg_littlepay__refunds` or `stg_littlepay__micropayments`) + The table the refund was initially present in (refunds, micropayments, or both) - name: int_payments__refunds_to_aggregations description: | diff --git a/warehouse/models/intermediate/payments/int_payments__micropayments_adjustments_refunds_joined.sql b/warehouse/models/intermediate/payments/int_payments__micropayments_adjustments_refunds_joined.sql index 606d884e8e..c73597ec23 100644 --- a/warehouse/models/intermediate/payments/int_payments__micropayments_adjustments_refunds_joined.sql +++ b/warehouse/models/intermediate/payments/int_payments__micropayments_adjustments_refunds_joined.sql @@ -62,9 +62,9 @@ int_payments__micropayments_adjustments_refunds_joined AS ( -- in those cases, if there's only one micropayment in the aggregation, we can impute that the aggregation refund applies just to the one micropayment CASE WHEN micropayments_per_aggregation.aggregation_micropayment_ct = 1 - AND individual_refunds.proposed_amount IS NULL + AND individual_refunds.refund_amount IS NULL THEN aggregation_refunds.total_refund_activity_amount_dollars - ELSE individual_refunds.proposed_amount + ELSE individual_refunds.refund_amount END AS micropayment_refund_amount, aggregation_refunds.total_refund_activity_amount_dollars AS aggregation_refund_amount FROM debit_micropayments diff --git a/warehouse/models/intermediate/payments/int_payments__refunds_deduped.sql b/warehouse/models/intermediate/payments/int_payments__refunds_deduped.sql index 72fc00f044..5636c44960 100644 --- a/warehouse/models/intermediate/payments/int_payments__refunds_deduped.sql +++ b/warehouse/models/intermediate/payments/int_payments__refunds_deduped.sql @@ -13,39 +13,18 @@ refunds AS ( SELECT * FROM {{ ref('int_littlepay__unioned_refunds') }} ), -format_micropayments_table_refunds AS ( +-- in the refunds table, the micropayment ID is the ID of the micropayment *being refunded* +-- in the micropayments refunds, the refund has a separate micropayment ID +-- so we have to look up the micropayment ID of the micropayment being refunded rather than using the micropayment ID of the refund +micropayment_refund_id_lookup AS ( SELECT - micropayments_refunds.aggregation_id, - -- in the refunds table, the micropayment ID is the ID of the micropayment *being refunded* - -- in the micropayments refunds, the refund has a separate micropayment ID - -- so we look up the micropayment ID of the micropayment being refunded rather than using the micropayment ID of the refund debit.micropayment_id, micropayments_refunds.participant_id, micropayments_refunds.customer_id, - ABS(micropayments_refunds.charge_amount) AS proposed_amount, EXTRACT(DATE FROM micropayments_refunds.transaction_time) AS transaction_date, micropayments_refunds.aggregation_id AS coalesced_id, - - -- add columns that we want to preserve from refunds table after union as null strings - SAFE_CAST(NULL AS NUMERIC) AS refund_amount, - SAFE_CAST(NULL AS STRING) AS refund_id, - SAFE_CAST(NULL AS STRING) AS settlement_id, - SAFE_CAST(NULL AS STRING) AS retrieval_reference_number, - SAFE_CAST(NULL AS NUMERIC) AS transaction_amount, - SAFE_CAST(NULL AS STRING) AS status, - SAFE_CAST(NULL AS STRING) AS initiator, - SAFE_CAST(NULL AS STRING) AS reason, - SAFE_CAST(NULL AS STRING) AS approval_status, - SAFE_CAST(NULL AS STRING) AS issuer, - SAFE_CAST(NULL AS STRING) AS issuer_comment, - SAFE_CAST(NULL AS TIMESTAMP) AS created_time, - SAFE_CAST(NULL AS TIMESTAMP) AS approved_time, - SAFE_CAST(NULL AS STRING) AS settlement_status, - SAFE_CAST(NULL AS DATE) AS settlement_status_time, - SAFE_CAST(NULL AS STRING) AS settlement_reason_code, - SAFE_CAST(NULL AS STRING) AS settlement_response_text, - + ABS(micropayments_refunds.charge_amount) AS refund_amount, micropayments_refunds._line_number, micropayments_refunds.currency_code, micropayments_refunds.instance, @@ -57,8 +36,8 @@ format_micropayments_table_refunds AS ( micropayments_refunds._key, micropayments_refunds._payments_key, 'micropayments' AS source_table - FROM micropayments_refunds + -- have to do a two step join to find the original "debit" micropayment that is being refunded LEFT JOIN micropayment_device_transactions AS credit ON micropayments_refunds.micropayment_id = credit.micropayment_id LEFT JOIN micropayment_device_transactions AS debit @@ -71,19 +50,18 @@ format_micropayments_table_refunds AS ( WHERE micropayments_refunds._key NOT IN ('043ecc000223a299ce17f6a342b1d240', '3536fb2035bbcf4dcb1f3abf001b5185') ), - +-- prepare refunds table to be combined with micropayments +-- start with trying to populate aggregation_id where missing distinct_aggregations_by_refund_id AS ( - SELECT DISTINCT retrieval_reference_number, aggregation_id FROM {{ ref('int_littlepay__unioned_refunds') }} WHERE aggregation_id IS NOT NULL - ), -format_refunds_table_refunds AS ( +-- also dedupe when same refund appears multiple times +refunds_populate_aggregation_id_dedupe_over_status AS ( SELECT - COALESCE(t1.aggregation_id, t2.aggregation_id) as aggregation_id, micropayment_id, participant_id, @@ -119,7 +97,6 @@ format_refunds_table_refunds AS ( _key, _payments_key, 'refunds' AS source_table - FROM refunds AS t1 LEFT JOIN distinct_aggregations_by_refund_id AS t2 USING (retrieval_reference_number) @@ -128,39 +105,100 @@ format_refunds_table_refunds AS ( QUALIFY DENSE_RANK() OVER (PARTITION BY refund_id ORDER BY littlepay_export_ts DESC) = 1 ), -refunds_union AS ( - SELECT * - FROM format_micropayments_table_refunds - - UNION ALL - - SELECT * - FROM format_refunds_table_refunds -), - -- in addition to the qualify statement above, we have an issue where some refunds are initially refused and then later approved -- but they get a new refund ID generated --- address that thus -next_status AS ( +refunds_next_status AS ( SELECT participant_id, refund_id, aggregation_id, LEAD(approval_status) OVER(PARTITION BY participant_id, micropayment_id ORDER BY created_time ASC) AS next_approval_status, approval_status - FROM refunds_union + FROM refunds_populate_aggregation_id_dedupe_over_status ), -to_drop AS ( +refunds_refused_then_approved AS ( SELECT DISTINCT participant_id, refund_id - FROM next_status + FROM refunds_next_status WHERE approval_status = "REFUSED" AND next_approval_status = "APPROVED" ), -int_payments__refunds AS ( +-- remove the refused then approved refunds as well as other remaining duplicates +refunds_remove_refused_then_approved_and_remaining_dups AS ( + SELECT * + FROM refunds_populate_aggregation_id_dedupe_over_status + LEFT JOIN refunds_refused_then_approved USING (participant_id, refund_id) + WHERE refunds_refused_then_approved.refund_id IS NULL + -- this dedupes on coalesced_id (which is comprised of retrieval_reference_number or aggregation_id if it is null), micropayment_id, and refund_amount + -- because we observe some duplicate refunds + -- use line number in sorting to make this deterministic + QUALIFY ROW_NUMBER() OVER (PARTITION BY coalesced_id, COALESCE(micropayment_id, '0'), proposed_amount ORDER BY littlepay_export_ts DESC, _line_number DESC) = 1 +), +combine_micropayments_and_refunds AS ( SELECT + COALESCE(refunds.aggregation_id, micropayments.aggregation_id) AS aggregation_id, + COALESCE(refunds.micropayment_id, micropayments.micropayment_id) AS micropayment_id, + COALESCE(refunds.participant_id, micropayments.participant_id) AS participant_id, + COALESCE(refunds.customer_id, micropayments.customer_id) AS customer_id, + refunds.proposed_amount AS refund_proposed_amount, + COALESCE(refunds.transaction_date, micropayments.transaction_date) AS transaction_date, + COALESCE(refunds.coalesced_id, micropayments.coalesced_id) AS coalesced_id, + COALESCE(refunds.refund_amount, micropayments.refund_amount) AS refund_amount, + -- keep micropayments refund amount as a separate column to test that if both were present the values were the same + micropayments.refund_amount AS micropayments_refund_amount, + refunds.refund_id, + refunds.proposed_amount, + refunds.settlement_id AS settlement_id, + refunds.retrieval_reference_number, + refunds.transaction_amount, + refunds.status, + refunds.initiator, + refunds.reason, + refunds.approval_status, + refunds.issuer, + refunds.issuer_comment, + refunds.created_time, + refunds.approved_time, + refunds.settlement_status, + refunds.settlement_status_time, + refunds.settlement_reason_code, + refunds.settlement_response_text, + + -- metadata + refunds._line_number AS refunds_line_number, + refunds.currency_code AS refunds_currency_code, + refunds.instance AS refunds_instance, + refunds.extract_filename AS refunds_extract_filename, + refunds.ts AS refunds_ts, + refunds.littlepay_export_ts AS refunds_littlepay_export_ts, + refunds.littlepay_export_date AS refunds_littlepay_export_date, + refunds._content_hash AS refunds_content_hash, + refunds._key AS refunds_key, + refunds._payments_key AS refunds_payments_key, + + micropayments._line_number AS micropayments_line_number, + micropayments.currency_code AS micropayments_currency_code, + micropayments.instance AS micropayments_instance, + micropayments.extract_filename AS micropayments_extract_filename, + micropayments.ts AS micropayments_ts, + micropayments.littlepay_export_ts AS micropayments_littlepay_export_ts, + micropayments.littlepay_export_date AS micropayments_littlepay_export_date, + micropayments._content_hash AS micropayments_content_hash, + micropayments._key AS micropayments_key, + micropayments._payments_key AS micropayments_payments_key, + + CASE + WHEN refunds.source_table IS NOT NULL AND micropayments.source_table IS NOT NULL THEN "both_refunds_and_micropayments" + ELSE COALESCE(refunds.source_table, micropayments.source_table) + END AS source_table + FROM refunds_remove_refused_then_approved_and_remaining_dups AS refunds + FULL OUTER JOIN micropayment_refund_id_lookup AS micropayments + ON refunds.micropayment_id = micropayments.micropayment_id +), +int_payments__refunds_deduped AS ( + SELECT aggregation_id, micropayment_id, participant_id, @@ -169,6 +207,7 @@ int_payments__refunds AS ( transaction_date, coalesced_id, refund_amount, + micropayments_refund_amount, refund_id, settlement_id, retrieval_reference_number, @@ -185,26 +224,30 @@ int_payments__refunds AS ( settlement_status_time, settlement_reason_code, settlement_response_text, - _line_number, - currency_code, - instance, - extract_filename, - ts, - littlepay_export_ts, - littlepay_export_date, - _content_hash, - _key, - _payments_key, - source_table - - FROM refunds_union - LEFT JOIN to_drop USING (participant_id, refund_id) - WHERE to_drop.refund_id IS NULL - -- this dedupes on coalesced_id (which is comprised of retrieval_reference_number or aggregation_id if it is null) and refund_amount - -- because we observe some duplicate refunds by retrieval_reference_number/aggregation_id and refund_amount - -- add line number to sorting to make this deterministic - QUALIFY ROW_NUMBER() OVER (PARTITION BY coalesced_id, refund_amount ORDER BY littlepay_export_ts DESC, _line_number DESC) = 1 + refunds_line_number, + refunds_currency_code, + refunds_instance, + refunds_extract_filename, + refunds_ts, + refunds_littlepay_export_ts, + refunds_littlepay_export_date, + refunds_content_hash, + refunds_key, + refunds_payments_key, + + micropayments_line_number, + micropayments_currency_code, + micropayments_instance, + micropayments_extract_filename, + micropayments_ts, + micropayments_littlepay_export_ts, + micropayments_littlepay_export_date, + micropayments_content_hash, + micropayments_key, + micropayments_payments_key, + source_table + FROM combine_micropayments_and_refunds ) -SELECT * FROM int_payments__refunds +SELECT * FROM int_payments__refunds_deduped diff --git a/warehouse/models/intermediate/payments/int_payments__refunds_to_aggregations.sql b/warehouse/models/intermediate/payments/int_payments__refunds_to_aggregations.sql index c3bbe3e21e..8a225a9c15 100644 --- a/warehouse/models/intermediate/payments/int_payments__refunds_to_aggregations.sql +++ b/warehouse/models/intermediate/payments/int_payments__refunds_to_aggregations.sql @@ -11,7 +11,7 @@ summarize_by_type AS ( aggregation_id, retrieval_reference_number, approval_status, - SUM(proposed_amount) AS proposed_refund_amount, + SUM(refund_amount) AS proposed_refund_amount, FROM refunds GROUP BY 1, 2, 3, 4 ), diff --git a/warehouse/models/intermediate/payments/littlepay_feed_unions/int_littlepay__unioned_refunds.sql b/warehouse/models/intermediate/payments/littlepay_feed_unions/int_littlepay__unioned_refunds.sql index f7d6573738..8d99ec69b2 100644 --- a/warehouse/models/intermediate/payments/littlepay_feed_unions/int_littlepay__unioned_refunds.sql +++ b/warehouse/models/intermediate/payments/littlepay_feed_unions/int_littlepay__unioned_refunds.sql @@ -1,13 +1,19 @@ {{ config(materialized = "table") }} WITH refunds_v1 AS ( - SELECT * + SELECT + * EXCEPT (proposed_amount, refund_amount), + -- in v3, the interpretation of these columns was swapped + -- in v1, proposed_amount is the amount to be refunded + -- and in v1, refund_amount is transaction_amount - proposed_amount + proposed_amount as refund_amount, + refund_amount as proposed_amount FROM {{ ref('stg_littlepay__refunds') }} -- For agencies that had v1 feeds, keep everything before cutover date WHERE littlepay_export_date <= '2025-05-16' ), -refunds_v3 AS ( +refunds_v3 AS ( --noqa: ST03 SELECT * FROM {{ ref('stg_littlepay__refunds_v3') }} WHERE @@ -24,7 +30,7 @@ refunds_v3 AS ( int_littlepay__unioned_refunds AS ( SELECT * FROM refunds_v1 - UNION ALL + UNION ALL BY NAME SELECT * FROM refunds_v3 )