-
Notifications
You must be signed in to change notification settings - Fork 4.3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Destinations v2: Do not dedup raw table #31520
Changes from 5 commits
5caaf84
c257f9e
1bebfd8
a9697b8
0ae0e67
73c54e7
5db5b36
55e5506
f37a463
ae2d132
34fc49a
55a4470
3b60147
fa9f214
b0ca13a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -659,13 +659,11 @@ public void incrementalDedupNoCursor() throws Exception { | |
final List<JsonNode> actualRawRecords = dumpRawTableRecords(streamId); | ||
final List<JsonNode> actualFinalRecords = dumpFinalTableRecords(streamId, ""); | ||
verifyRecordCounts( | ||
1, | ||
2, | ||
actualRawRecords, | ||
1, | ||
actualFinalRecords); | ||
assertAll( | ||
() -> assertEquals("bar", actualRawRecords.get(0).get("_airbyte_data").get("string").asText()), | ||
() -> assertEquals("bar", actualFinalRecords.get(0).get(generator.buildColumnId("string").name()).asText())); | ||
assertEquals("bar", actualFinalRecords.get(0).get(generator.buildColumnId("string").name()).asText()); | ||
} | ||
|
||
@Test | ||
|
@@ -796,10 +794,9 @@ public void cdcComplexUpdate() throws Exception { | |
destinationHandler.execute(sql); | ||
|
||
verifyRecordCounts( | ||
// We keep the newest raw record per PK | ||
7, | ||
11, | ||
dumpRawTableRecords(streamId), | ||
5, | ||
6, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this is a behavior change; see https://airbytehq-team.slack.com/archives/C03C4AVJWG4/p1697486280303179 |
||
dumpFinalTableRecords(streamId, "")); | ||
} | ||
|
||
|
@@ -828,7 +825,7 @@ public void testCdcOrdering_updateAfterDelete() throws Exception { | |
destinationHandler.execute(sql); | ||
|
||
verifyRecordCounts( | ||
1, | ||
2, | ||
dumpRawTableRecords(streamId), | ||
0, | ||
dumpFinalTableRecords(streamId, "")); | ||
|
@@ -865,7 +862,7 @@ public void testCdcOrdering_insertAfterDelete() throws Exception { | |
destinationHandler.execute(sql); | ||
|
||
verifyRecordCounts( | ||
1, | ||
2, | ||
dumpRawTableRecords(streamId), | ||
1, | ||
dumpFinalTableRecords(streamId, "")); | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4,6 +4,8 @@ | |
|
||
package io.airbyte.integrations.base.destination.typing_deduping; | ||
|
||
import static org.junit.jupiter.api.Assertions.assertAll; | ||
|
||
import com.fasterxml.jackson.databind.JsonNode; | ||
import com.fasterxml.jackson.databind.node.ObjectNode; | ||
import com.google.common.collect.ImmutableMap; | ||
|
@@ -270,7 +272,7 @@ public void fullRefreshAppend() throws Exception { | |
|
||
runSync(catalog, messages2); | ||
|
||
final List<JsonNode> expectedRawRecords2 = readRecords("dat/sync2_expectedrecords_fullrefresh_append_raw.jsonl"); | ||
final List<JsonNode> expectedRawRecords2 = readRecords("dat/sync2_expectedrecords_append_raw.jsonl"); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. there's no longer any difference between append and dedup raw records, so merge their expectedrecords files. |
||
final List<JsonNode> expectedFinalRecords2 = readRecords("dat/sync2_expectedrecords_fullrefresh_append_final.jsonl"); | ||
verifySyncResult(expectedRawRecords2, expectedFinalRecords2); | ||
} | ||
|
@@ -309,7 +311,7 @@ public void incrementalAppend() throws Exception { | |
|
||
runSync(catalog, messages2); | ||
|
||
final List<JsonNode> expectedRawRecords2 = readRecords("dat/sync2_expectedrecords_fullrefresh_append_raw.jsonl"); | ||
final List<JsonNode> expectedRawRecords2 = readRecords("dat/sync2_expectedrecords_append_raw.jsonl"); | ||
final List<JsonNode> expectedFinalRecords2 = readRecords("dat/sync2_expectedrecords_fullrefresh_append_final.jsonl"); | ||
verifySyncResult(expectedRawRecords2, expectedFinalRecords2); | ||
} | ||
|
@@ -346,7 +348,7 @@ public void incrementalDedup() throws Exception { | |
|
||
runSync(catalog, messages2); | ||
|
||
final List<JsonNode> expectedRawRecords2 = readRecords("dat/sync2_expectedrecords_incremental_dedup_raw.jsonl"); | ||
final List<JsonNode> expectedRawRecords2 = readRecords("dat/sync2_expectedrecords_append_raw.jsonl"); | ||
final List<JsonNode> expectedFinalRecords2 = readRecords("dat/sync2_expectedrecords_incremental_dedup_final.jsonl"); | ||
verifySyncResult(expectedRawRecords2, expectedFinalRecords2); | ||
} | ||
|
@@ -381,7 +383,7 @@ public void incrementalDedupDefaultNamespace() throws Exception { | |
|
||
runSync(catalog, messages2); | ||
|
||
final List<JsonNode> expectedRawRecords2 = readRecords("dat/sync2_expectedrecords_incremental_dedup_raw.jsonl"); | ||
final List<JsonNode> expectedRawRecords2 = readRecords("dat/sync2_expectedrecords_append_raw.jsonl"); | ||
final List<JsonNode> expectedFinalRecords2 = readRecords("dat/sync2_expectedrecords_incremental_dedup_final.jsonl"); | ||
verifySyncResult(expectedRawRecords2, expectedFinalRecords2, null, streamName); | ||
} | ||
|
@@ -437,7 +439,7 @@ public void testIncrementalSyncDropOneColumn() throws Exception { | |
runSync(catalog, messages2); | ||
|
||
// The raw data is unaffected by the schema, but the final table should not have a `name` column. | ||
final List<JsonNode> expectedRawRecords2 = readRecords("dat/sync2_expectedrecords_fullrefresh_append_raw.jsonl"); | ||
final List<JsonNode> expectedRawRecords2 = readRecords("dat/sync2_expectedrecords_append_raw.jsonl"); | ||
final List<JsonNode> expectedFinalRecords2 = readRecords("dat/sync2_expectedrecords_fullrefresh_append_final.jsonl").stream() | ||
.peek(record -> ((ObjectNode) record).remove(getSqlGenerator().buildColumnId("name").name())) | ||
.toList(); | ||
|
@@ -518,12 +520,12 @@ public void incrementalDedupIdenticalName() throws Exception { | |
runSync(catalog, messages2); | ||
|
||
verifySyncResult( | ||
readRecords("dat/sync2_expectedrecords_incremental_dedup_raw.jsonl"), | ||
readRecords("dat/sync2_expectedrecords_append_raw.jsonl"), | ||
readRecords("dat/sync2_expectedrecords_incremental_dedup_final.jsonl"), | ||
namespace1, | ||
streamName); | ||
verifySyncResult( | ||
readRecords("dat/sync2_expectedrecords_incremental_dedup_raw2.jsonl"), | ||
readRecords("dat/sync2_expectedrecords_append_raw2.jsonl"), | ||
readRecords("dat/sync2_expectedrecords_incremental_dedup_final2.jsonl"), | ||
namespace2, | ||
streamName); | ||
|
@@ -585,16 +587,15 @@ public void identicalNameSimultaneousSync() throws Exception { | |
// And this will dump sync2's entire stdout to our stdout | ||
endSync(sync2); | ||
|
||
verifySyncResult( | ||
readRecords("dat/sync1_expectedrecords_dedup_raw.jsonl"), | ||
readRecords("dat/sync1_expectedrecords_dedup_final.jsonl"), | ||
namespace1, | ||
streamName); | ||
verifySyncResult( | ||
readRecords("dat/sync1_expectedrecords_dedup_raw2.jsonl"), | ||
readRecords("dat/sync1_expectedrecords_dedup_final2.jsonl"), | ||
namespace2, | ||
streamName); | ||
// For simplicity, don't verify the raw table. Assume that if the final table is correct, then | ||
// the raw data is correct. This is generally a safe assumption. | ||
assertAll( | ||
() -> DIFFER.diffFinalTableRecords( | ||
readRecords("dat/sync1_expectedrecords_dedup_final.jsonl"), | ||
dumpFinalTableRecords(namespace1, streamName)), | ||
() -> DIFFER.diffFinalTableRecords( | ||
readRecords("dat/sync1_expectedrecords_dedup_final2.jsonl"), | ||
dumpFinalTableRecords(namespace2, streamName))); | ||
} | ||
|
||
@Test | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
there's no longer any reason to assert the raw records, so only assert the final record.