diff --git a/.gitignore b/.gitignore index bc6841f7b081..499fd8687af5 100644 --- a/.gitignore +++ b/.gitignore @@ -101,3 +101,7 @@ scan-journal.log # connectors' cache *.sqlite + +.devenv +.direnv +**/.metals diff --git a/airbyte-integrations/connectors/source-mongodb-v2-plain/build.gradle b/airbyte-integrations/connectors/source-mongodb-v2-plain/build.gradle index 370798a19906..8cc75f98f30f 100644 --- a/airbyte-integrations/connectors/source-mongodb-v2-plain/build.gradle +++ b/airbyte-integrations/connectors/source-mongodb-v2-plain/build.gradle @@ -1,17 +1,14 @@ plugins { - id 'application' id 'airbyte-java-connector' - alias(libs.plugins.kotlin.jvm) + id 'org.jetbrains.kotlin.jvm' version '1.9.22' } airbyteJavaConnector { - cdkVersionRequired = '0.7.9' - features = ['db-sources'] + cdkVersionRequired = '0.20.6' + features = ['db-sources', 'datastore-mongo'] useLocalCdk = false } -airbyteJavaConnector.addCdkDependencies() - application { mainClass = 'io.airbyte.integrations.source.mongodb.MongoDbSource' applicationDefaultJvmArgs = ['-XX:+ExitOnOutOfMemoryError', '-XX:MaxRAMPercentage=75.0'] @@ -35,39 +32,32 @@ sourceSets { } } -dependencies { - implementation libs.mongo.driver.sync - - testImplementation libs.testcontainers.mongodb +java { + compileJava { + options.compilerArgs += "-Xlint:-try,-rawtypes" + } +} - integrationTestJavaImplementation libs.apache.commons.lang - integrationTestJavaImplementation project(':airbyte-integrations:connectors:source-mongodb-v2-plain') +dependencies { + implementation 'io.debezium:debezium-embedded:2.4.0.Final' + implementation 'io.debezium:debezium-connector-mongodb:2.4.0.Final' - dataGeneratorImplementation project(':airbyte-cdk:java:airbyte-cdk:airbyte-commons') + testImplementation 'org.testcontainers:mongodb:1.19.0' dataGeneratorImplementation project(':airbyte-integrations:connectors:source-mongodb-v2-plain') - dataGeneratorImplementation libs.mongo.driver.sync - dataGeneratorImplementation libs.kotlin.logging - dataGeneratorImplementation libs.kotlinx.cli - dataGeneratorImplementation (libs.java.faker) { - exclude module: 'snakeyaml' - } - dataGeneratorImplementation libs.jackson.databind - dataGeneratorImplementation libs.bundles.slf4j - dataGeneratorImplementation libs.slf4j.simple - dataGeneratorImplementation libs.kotlinx.cli.jvm - dataGeneratorImplementation 'org.yaml:snakeyaml:2.2' - - debeziumTestImplementation libs.debezium.api - debeziumTestImplementation libs.debezium.embedded - debeziumTestImplementation libs.debezium.sqlserver - debeziumTestImplementation libs.debezium.mysql - debeziumTestImplementation libs.debezium.postgres - debeziumTestImplementation libs.debezium.mongodb - debeziumTestImplementation libs.bundles.slf4j - debeziumTestImplementation libs.slf4j.simple - debeziumTestImplementation libs.kotlinx.cli.jvm - debeziumTestImplementation libs.spotbugs.annotations + dataGeneratorImplementation platform('com.fasterxml.jackson:jackson-bom:2.15.2') + dataGeneratorImplementation 'com.fasterxml.jackson.core:jackson-databind' + dataGeneratorImplementation 'com.fasterxml.jackson.datatype:jackson-datatype-jsr310' + + dataGeneratorImplementation ('com.github.javafaker:javafaker:1.0.2') { exclude module: 'snakeyaml' } + dataGeneratorImplementation 'io.github.oshai:kotlin-logging-jvm:5.1.0' + dataGeneratorImplementation 'org.jetbrains.kotlinx:kotlinx-cli-jvm:0.3.5' + dataGeneratorImplementation 'org.mongodb:mongodb-driver-sync:4.10.2' + + debeziumTestImplementation 'io.debezium:debezium-embedded:2.4.0.Final' + debeziumTestImplementation 'io.debezium:debezium-connector-mongodb:2.4.0.Final' + debeziumTestImplementation 'org.jetbrains.kotlinx:kotlinx-cli-jvm:0.3.5' + debeziumTestImplementation 'com.github.spotbugs:spotbugs-annotations:4.7.3' } /* @@ -75,7 +65,7 @@ dependencies { * * To execute this task, use the following command: * - * ./gradlew :airbyte-integrations:connectors:source-mongodb-v2:generateTestData -PconnectionString= -PdatabaseName= -PcollectionName= -Pusername= + * ./gradlew :airbyte-integrations:connectors:source-mongodb-v2-plain:generateTestData -PconnectionString= -PdatabaseName= -PcollectionName= -Pusername= * * Optionally, you can provide -PnumberOfDocuments to change the number of generated documents from the default (10,000). */ @@ -109,7 +99,7 @@ tasks.register('generateTestData', JavaExec) { * * To execute this task, use the following command: * - * ./gradlew :airbyte-integrations:connectors:source-mongodb-v2:debeziumTest -PconnectionString= -PdatabaseName= -PcollectionName= -Pusername= + * ./gradlew :airbyte-integrations:connectors:source-mongodb-v2-plain:debeziumTest -PconnectionString= -PdatabaseName= -PcollectionName= -Pusername= */ tasks.register('debeziumTest', JavaExec) { def arguments = [] diff --git a/airbyte-integrations/connectors/source-mongodb-v2-plain/integration_tests/expected_spec.json b/airbyte-integrations/connectors/source-mongodb-v2-plain/integration_tests/expected_spec.json index 54e3d7aa189c..e77e9d632780 100644 --- a/airbyte-integrations/connectors/source-mongodb-v2-plain/integration_tests/expected_spec.json +++ b/airbyte-integrations/connectors/source-mongodb-v2-plain/integration_tests/expected_spec.json @@ -164,7 +164,7 @@ "description": "The maximum number of documents to sample when attempting to discover the unique fields for a collection.", "default": 10000, "order": 10, - "minimum": 1000, + "minimum": 10, "maximum": 100000, "group": "advanced" } diff --git a/airbyte-integrations/connectors/source-mongodb-v2-plain/metadata.yaml b/airbyte-integrations/connectors/source-mongodb-v2-plain/metadata.yaml index fadb3f74d381..1f28a0652134 100644 --- a/airbyte-integrations/connectors/source-mongodb-v2-plain/metadata.yaml +++ b/airbyte-integrations/connectors/source-mongodb-v2-plain/metadata.yaml @@ -1,11 +1,11 @@ data: ab_internal: ql: 200 - sl: 100 + sl: 200 connectorSubtype: database connectorType: source definitionId: b2e713cd-cc36-4c0a-b5bd-b47cb8a0561e - dockerImageTag: 1.2.1 + dockerImageTag: 1.2.10 dockerRepository: airbyte/source-mongodb-v2-plain documentationUrl: https://docs.airbyte.com/integrations/sources/mongodb-v2 githubIssueLabel: source-mongodb-v2 diff --git a/airbyte-integrations/connectors/source-mongodb-v2-plain/src/main/java/io/airbyte/integrations/source/mongodb/InitialSnapshotHandler.java b/airbyte-integrations/connectors/source-mongodb-v2-plain/src/main/java/io/airbyte/integrations/source/mongodb/InitialSnapshotHandler.java index 5d7d9ad72587..f9863945d8fd 100644 --- a/airbyte-integrations/connectors/source-mongodb-v2-plain/src/main/java/io/airbyte/integrations/source/mongodb/InitialSnapshotHandler.java +++ b/airbyte-integrations/connectors/source-mongodb-v2-plain/src/main/java/io/airbyte/integrations/source/mongodb/InitialSnapshotHandler.java @@ -27,6 +27,10 @@ import java.util.List; import java.util.Optional; import org.bson.BsonDocument; +import org.bson.BsonInt32; +import org.bson.BsonInt64; +import org.bson.BsonObjectId; +import org.bson.BsonString; import org.bson.Document; import org.bson.conversions.Bson; import org.bson.types.ObjectId; @@ -86,8 +90,13 @@ public List> getIterators( // "where _id > [last saved state] order by _id ASC". // If no state exists, it will create a query akin to "where 1=1 order by _id ASC" final Bson filter = existingState - // TODO add type support here when we add support for _id fields that are not ObjectId types - .map(state -> Filters.gt(MongoConstants.ID_FIELD, new ObjectId(state.id()))) + .map(state -> Filters.gt(MongoConstants.ID_FIELD, + switch (state.idType()) { + case STRING -> new BsonString(state.id()); + case OBJECT_ID -> new BsonObjectId(new ObjectId(state.id())); + case INT -> new BsonInt32(Integer.parseInt(state.id())); + case LONG -> new BsonInt64(Long.parseLong(state.id())); + })) // if nothing was found, return a new BsonDocument .orElseGet(BsonDocument::new); diff --git a/airbyte-integrations/connectors/source-mongodb-v2-plain/src/main/java/io/airbyte/integrations/source/mongodb/MongoConnectionUtils.java b/airbyte-integrations/connectors/source-mongodb-v2-plain/src/main/java/io/airbyte/integrations/source/mongodb/MongoConnectionUtils.java index b5a6ae746ed1..c42a3d72026f 100644 --- a/airbyte-integrations/connectors/source-mongodb-v2-plain/src/main/java/io/airbyte/integrations/source/mongodb/MongoConnectionUtils.java +++ b/airbyte-integrations/connectors/source-mongodb-v2-plain/src/main/java/io/airbyte/integrations/source/mongodb/MongoConnectionUtils.java @@ -13,8 +13,7 @@ import com.mongodb.ReadPreference; import com.mongodb.client.MongoClient; import com.mongodb.client.MongoClients; -import com.mongodb.connection.SslSettings; -import io.airbyte.cdk.integrations.debezium.internals.mongodb.MongoDbDebeziumPropertiesManager; +import io.airbyte.integrations.source.mongodb.cdc.MongoDbDebeziumPropertiesManager; import java.net.URLEncoder; import java.nio.charset.StandardCharsets; @@ -30,14 +29,12 @@ public class MongoConnectionUtils { * @return The configured {@link MongoClient}. */ public static MongoClient createMongoClient(final MongoDbSourceConfig config) { - final ConnectionString mongoConnectionString = new ConnectionString(buildConnectionString(config)); final MongoDriverInformation mongoDriverInformation = MongoDriverInformation.builder() .driverName(DRIVER_NAME) .build(); - final MongoClientSettings.Builder mongoClientSettingsBuilder = MongoClientSettings.builder() .applyConnectionString(mongoConnectionString) .applyToSslSettings(s -> s.enabled(false)) @@ -54,7 +51,7 @@ public static MongoClient createMongoClient(final MongoDbSourceConfig config) { } private static String buildConnectionString(final MongoDbSourceConfig config) { - String sslConnectionString = MongoDbDebeziumPropertiesManager.buildConnectionString(config.rawConfig(), true); + String sslConnectionString = MongoDbDebeziumPropertiesManager.buildConnectionString(config.getDatabaseConfig(), true); String replaced = sslConnectionString.replace("&tls=true",""); System.err.println("Replaced " + replaced); return replaced; diff --git a/airbyte-integrations/connectors/source-mongodb-v2-plain/src/main/java/io/airbyte/integrations/source/mongodb/MongoConstants.java b/airbyte-integrations/connectors/source-mongodb-v2-plain/src/main/java/io/airbyte/integrations/source/mongodb/MongoConstants.java index efbcc319b75e..430fa9f9c409 100644 --- a/airbyte-integrations/connectors/source-mongodb-v2-plain/src/main/java/io/airbyte/integrations/source/mongodb/MongoConstants.java +++ b/airbyte-integrations/connectors/source-mongodb-v2-plain/src/main/java/io/airbyte/integrations/source/mongodb/MongoConstants.java @@ -5,8 +5,8 @@ package io.airbyte.integrations.source.mongodb; import io.airbyte.cdk.integrations.debezium.DebeziumIteratorConstants; -import io.airbyte.cdk.integrations.debezium.internals.mongodb.MongoDbDebeziumConstants; -import io.airbyte.cdk.integrations.debezium.internals.mongodb.MongoDbDebeziumConstants.Configuration; +import io.airbyte.integrations.source.mongodb.cdc.MongoDbDebeziumConstants; +import io.airbyte.integrations.source.mongodb.cdc.MongoDbDebeziumConstants.Configuration; import java.time.Duration; public class MongoConstants { @@ -34,6 +34,8 @@ public class MongoConstants { public static final String USERNAME_CONFIGURATION_KEY = MongoDbDebeziumConstants.Configuration.USERNAME_CONFIGURATION_KEY; public static final String SCHEMA_ENFORCED_CONFIGURATION_KEY = MongoDbDebeziumConstants.Configuration.SCHEMA_ENFORCED_CONFIGURATION_KEY; public static final String SCHEMALESS_MODE_DATA_FIELD = Configuration.SCHEMALESS_MODE_DATA_FIELD; + public static final String INITIAL_RECORD_WAITING_TIME_SEC = "initial_waiting_seconds"; + public static final Integer DEFAULT_INITIAL_RECORD_WAITING_TIME_SEC = 300; private MongoConstants() {} diff --git a/airbyte-integrations/connectors/source-mongodb-v2-plain/src/main/java/io/airbyte/integrations/source/mongodb/MongoDbSourceConfig.java b/airbyte-integrations/connectors/source-mongodb-v2-plain/src/main/java/io/airbyte/integrations/source/mongodb/MongoDbSourceConfig.java index 3591286490b0..a03647bb9386 100644 --- a/airbyte-integrations/connectors/source-mongodb-v2-plain/src/main/java/io/airbyte/integrations/source/mongodb/MongoDbSourceConfig.java +++ b/airbyte-integrations/connectors/source-mongodb-v2-plain/src/main/java/io/airbyte/integrations/source/mongodb/MongoDbSourceConfig.java @@ -11,7 +11,9 @@ import static io.airbyte.integrations.source.mongodb.MongoConstants.DATABASE_CONFIG_CONFIGURATION_KEY; import static io.airbyte.integrations.source.mongodb.MongoConstants.DEFAULT_AUTH_SOURCE; import static io.airbyte.integrations.source.mongodb.MongoConstants.DEFAULT_DISCOVER_SAMPLE_SIZE; +import static io.airbyte.integrations.source.mongodb.MongoConstants.DEFAULT_INITIAL_RECORD_WAITING_TIME_SEC; import static io.airbyte.integrations.source.mongodb.MongoConstants.DISCOVER_SAMPLE_SIZE_CONFIGURATION_KEY; +import static io.airbyte.integrations.source.mongodb.MongoConstants.INITIAL_RECORD_WAITING_TIME_SEC; import static io.airbyte.integrations.source.mongodb.MongoConstants.PASSWORD_CONFIGURATION_KEY; import static io.airbyte.integrations.source.mongodb.MongoConstants.SCHEMA_ENFORCED_CONFIGURATION_KEY; import static io.airbyte.integrations.source.mongodb.MongoConstants.USERNAME_CONFIGURATION_KEY; @@ -27,33 +29,32 @@ */ public record MongoDbSourceConfig(JsonNode rawConfig) { - /** - * Constructs a new {@link MongoDbSourceConfig} from the provided raw configuration. - * - * @param rawConfig The underlying JSON configuration provided by the connector framework. - * @throws IllegalArgumentException if the raw configuration does not contain the - * {@link MongoConstants#DATABASE_CONFIG_CONFIGURATION_KEY} key. - */ - public MongoDbSourceConfig(final JsonNode rawConfig) { - if (rawConfig.has(DATABASE_CONFIG_CONFIGURATION_KEY)) { - this.rawConfig = rawConfig.get(DATABASE_CONFIG_CONFIGURATION_KEY); - } else { + public MongoDbSourceConfig { + if (rawConfig == null) { + throw new IllegalArgumentException("MongoDbSourceConfig cannot accept a null config."); + } + if (!rawConfig.hasNonNull(DATABASE_CONFIG_CONFIGURATION_KEY)) { throw new IllegalArgumentException("Database configuration is missing required '" + DATABASE_CONFIG_CONFIGURATION_KEY + "' property."); } } + public JsonNode getDatabaseConfig() { + return rawConfig.get(DATABASE_CONFIG_CONFIGURATION_KEY); + } + public String getAuthSource() { - return rawConfig.has(AUTH_SOURCE_CONFIGURATION_KEY) ? rawConfig.get(AUTH_SOURCE_CONFIGURATION_KEY).asText(DEFAULT_AUTH_SOURCE) + return getDatabaseConfig().has(AUTH_SOURCE_CONFIGURATION_KEY) ? getDatabaseConfig().get(AUTH_SOURCE_CONFIGURATION_KEY).asText(DEFAULT_AUTH_SOURCE) : DEFAULT_AUTH_SOURCE; } public Integer getCheckpointInterval() { - return rawConfig.has(CHECKPOINT_INTERVAL_CONFIGURATION_KEY) ? rawConfig.get(CHECKPOINT_INTERVAL_CONFIGURATION_KEY).asInt(CHECKPOINT_INTERVAL) + return getDatabaseConfig().has(CHECKPOINT_INTERVAL_CONFIGURATION_KEY) + ? getDatabaseConfig().get(CHECKPOINT_INTERVAL_CONFIGURATION_KEY).asInt(CHECKPOINT_INTERVAL) : CHECKPOINT_INTERVAL; } public String getDatabaseName() { - return rawConfig.has(DATABASE_CONFIGURATION_KEY) ? rawConfig.get(DATABASE_CONFIGURATION_KEY).asText() : null; + return getDatabaseConfig().has(DATABASE_CONFIGURATION_KEY) ? getDatabaseConfig().get(DATABASE_CONFIGURATION_KEY).asText() : null; } public OptionalInt getQueueSize() { @@ -63,15 +64,15 @@ public OptionalInt getQueueSize() { } public String getPassword() { - return rawConfig.has(PASSWORD_CONFIGURATION_KEY) ? rawConfig.get(PASSWORD_CONFIGURATION_KEY).asText() : null; + return getDatabaseConfig().has(PASSWORD_CONFIGURATION_KEY) ? getDatabaseConfig().get(PASSWORD_CONFIGURATION_KEY).asText() : null; } public String getUsername() { - return rawConfig.has(USERNAME_CONFIGURATION_KEY) ? rawConfig.get(USERNAME_CONFIGURATION_KEY).asText() : null; + return getDatabaseConfig().has(USERNAME_CONFIGURATION_KEY) ? getDatabaseConfig().get(USERNAME_CONFIGURATION_KEY).asText() : null; } public boolean hasAuthCredentials() { - return rawConfig.has(USERNAME_CONFIGURATION_KEY) && rawConfig.has(PASSWORD_CONFIGURATION_KEY); + return getDatabaseConfig().has(USERNAME_CONFIGURATION_KEY) && getDatabaseConfig().has(PASSWORD_CONFIGURATION_KEY); } public Integer getSampleSize() { @@ -83,8 +84,16 @@ public Integer getSampleSize() { } public boolean getEnforceSchema() { - return rawConfig.has(SCHEMA_ENFORCED_CONFIGURATION_KEY) ? rawConfig.get(SCHEMA_ENFORCED_CONFIGURATION_KEY).asBoolean(true) + return getDatabaseConfig().has(SCHEMA_ENFORCED_CONFIGURATION_KEY) ? getDatabaseConfig().get(SCHEMA_ENFORCED_CONFIGURATION_KEY).asBoolean(true) : true; } + public Integer getInitialWaitingTimeSeconds() { + if (rawConfig.has(INITIAL_RECORD_WAITING_TIME_SEC)) { + return rawConfig.get(INITIAL_RECORD_WAITING_TIME_SEC).asInt(DEFAULT_INITIAL_RECORD_WAITING_TIME_SEC); + } else { + return DEFAULT_INITIAL_RECORD_WAITING_TIME_SEC; + } + } + } diff --git a/airbyte-integrations/connectors/source-mongodb-v2-plain/src/main/java/io/airbyte/integrations/source/mongodb/MongoDbStateIterator.java b/airbyte-integrations/connectors/source-mongodb-v2-plain/src/main/java/io/airbyte/integrations/source/mongodb/MongoDbStateIterator.java index e956e12ddbcd..f659c2c06af6 100644 --- a/airbyte-integrations/connectors/source-mongodb-v2-plain/src/main/java/io/airbyte/integrations/source/mongodb/MongoDbStateIterator.java +++ b/airbyte-integrations/connectors/source-mongodb-v2-plain/src/main/java/io/airbyte/integrations/source/mongodb/MongoDbStateIterator.java @@ -9,8 +9,8 @@ import com.mongodb.MongoException; import com.mongodb.client.MongoCursor; import io.airbyte.cdk.integrations.debezium.CdcMetadataInjector; -import io.airbyte.cdk.integrations.debezium.internals.mongodb.MongoDbCdcEventUtils; import io.airbyte.commons.exceptions.ConfigErrorException; +import io.airbyte.integrations.source.mongodb.cdc.MongoDbCdcEventUtils; import io.airbyte.integrations.source.mongodb.state.IdType; import io.airbyte.integrations.source.mongodb.state.InitialSnapshotStatus; import io.airbyte.integrations.source.mongodb.state.MongoDbStateManager; @@ -69,10 +69,15 @@ public class MongoDbStateIterator implements Iterator { private boolean finalStateNext = false; /** - * Tracks if the underlying iterator threw an exception. This helps to determine the final state - * status emitted from the final next call. + * Tracks if the underlying iterator threw an exception, indicating that the snapshot for this + * stream failed. This helps to determine the final state status emitted from the final next call. */ - private boolean iterThrewException = false; + private boolean initialSnapshotFailed = false; + + /** + * Tracks the exception thrown if there initial snapshot has failed. + */ + private Exception initialSnapshotException; /** * Constructor. @@ -111,14 +116,24 @@ public MongoDbStateIterator(final MongoCursor iter, @Override public boolean hasNext() { LOGGER.debug("Checking hasNext() for stream {}...", getStream()); + if (initialSnapshotFailed) { + // If the initial snapshot is incomplete for this stream, throw an exception failing the sync. This + // will ensure the platform retry logic + // kicks in and keeps retrying the sync until the initial snapshot is complete. + throw new RuntimeException(initialSnapshotException); + } try { if (iter.hasNext()) { return true; } } catch (final MongoException e) { - // If hasNext throws an exception, log it and then treat it as if hasNext returned false. - iterThrewException = true; + // If hasNext throws an exception, log it and set the flag to indicate that the initial snapshot + // failed. This indicates to the main iterator + // to emit state associated with what has been processed so far. + initialSnapshotFailed = true; + initialSnapshotException = e; LOGGER.info("hasNext threw an exception for stream {}: {}", getStream(), e.getMessage(), e); + return true; } // no more records in cursor + no record messages have been emitted => collection is empty @@ -145,9 +160,9 @@ public AirbyteMessage next() { // Should a state message be emitted based on then last time a state message was emitted? final var emitStateDueToDuration = count > 0 && Duration.between(lastCheckpoint, Instant.now()).compareTo(checkpointDuration) > 0; - if (finalStateNext) { + if (finalStateNext || initialSnapshotFailed) { LOGGER.debug("Emitting final state status for stream {}:{}...", stream.getStream().getNamespace(), stream.getStream().getName()); - final var finalStateStatus = iterThrewException ? InitialSnapshotStatus.IN_PROGRESS : InitialSnapshotStatus.COMPLETE; + final var finalStateStatus = initialSnapshotFailed ? InitialSnapshotStatus.IN_PROGRESS : InitialSnapshotStatus.COMPLETE; final var idType = IdType.findByJavaType(lastId.getClass().getSimpleName()) .orElseThrow(() -> new ConfigErrorException("Unsupported _id type " + lastId.getClass().getSimpleName())); final var state = new MongoDbStreamState(lastId.toString(), finalStateStatus, idType); diff --git a/airbyte-integrations/connectors/source-mongodb-v2-plain/src/main/java/io/airbyte/integrations/source/mongodb/MongoUtil.java b/airbyte-integrations/connectors/source-mongodb-v2-plain/src/main/java/io/airbyte/integrations/source/mongodb/MongoUtil.java index 08ecbb83e04f..6becadb3225d 100644 --- a/airbyte-integrations/connectors/source-mongodb-v2-plain/src/main/java/io/airbyte/integrations/source/mongodb/MongoUtil.java +++ b/airbyte-integrations/connectors/source-mongodb-v2-plain/src/main/java/io/airbyte/integrations/source/mongodb/MongoUtil.java @@ -4,8 +4,8 @@ package io.airbyte.integrations.source.mongodb; -import static io.airbyte.cdk.integrations.debezium.internals.DebeziumEventUtils.CDC_DELETED_AT; -import static io.airbyte.cdk.integrations.debezium.internals.DebeziumEventUtils.CDC_UPDATED_AT; +import static io.airbyte.cdk.integrations.debezium.internals.DebeziumEventConverter.CDC_DELETED_AT; +import static io.airbyte.cdk.integrations.debezium.internals.DebeziumEventConverter.CDC_UPDATED_AT; import static io.airbyte.integrations.source.mongodb.MongoCatalogHelper.AIRBYTE_STREAM_PROPERTIES; import static io.airbyte.integrations.source.mongodb.MongoCatalogHelper.DEFAULT_CURSOR_FIELD; import static io.airbyte.integrations.source.mongodb.MongoCatalogHelper.DEFAULT_PRIMARY_KEY; @@ -147,7 +147,7 @@ public static List getAirbyteStreams(final MongoClient mongoClien * @param config The source connector's configuration. * @return The size of the Debezium event queue. */ - public static OptionalInt getDebeziumEventQueueSize(final MongoDbSourceConfig config) { + public static int getDebeziumEventQueueSize(final MongoDbSourceConfig config) { final OptionalInt sizeFromConfig = config.getQueueSize(); if (sizeFromConfig.isPresent()) { @@ -155,15 +155,15 @@ public static OptionalInt getDebeziumEventQueueSize(final MongoDbSourceConfig co if (size < MIN_QUEUE_SIZE) { LOGGER.warn("Queue size is overridden to {} , which is the min allowed for safety.", MIN_QUEUE_SIZE); - return OptionalInt.of(MIN_QUEUE_SIZE); + return MIN_QUEUE_SIZE; } else if (size > MAX_QUEUE_SIZE) { LOGGER.warn("Queue size is overridden to {} , which is the max allowed for safety.", MAX_QUEUE_SIZE); - return OptionalInt.of(MAX_QUEUE_SIZE); + return MAX_QUEUE_SIZE; } - return OptionalInt.of(size); + return size; } - return OptionalInt.of(MAX_QUEUE_SIZE); + return MAX_QUEUE_SIZE; } /** diff --git a/airbyte-integrations/connectors/source-mongodb-v2-plain/src/main/java/io/airbyte/integrations/source/mongodb/cdc/MongoDbCdcConnectorMetadataInjector.java b/airbyte-integrations/connectors/source-mongodb-v2-plain/src/main/java/io/airbyte/integrations/source/mongodb/cdc/MongoDbCdcConnectorMetadataInjector.java index 3a6d9a39b489..ed0731d55561 100644 --- a/airbyte-integrations/connectors/source-mongodb-v2-plain/src/main/java/io/airbyte/integrations/source/mongodb/cdc/MongoDbCdcConnectorMetadataInjector.java +++ b/airbyte-integrations/connectors/source-mongodb-v2-plain/src/main/java/io/airbyte/integrations/source/mongodb/cdc/MongoDbCdcConnectorMetadataInjector.java @@ -4,14 +4,13 @@ package io.airbyte.integrations.source.mongodb.cdc; -import static io.airbyte.cdk.integrations.debezium.internals.DebeziumEventUtils.CDC_DELETED_AT; -import static io.airbyte.cdk.integrations.debezium.internals.DebeziumEventUtils.CDC_UPDATED_AT; +import static io.airbyte.cdk.integrations.debezium.internals.DebeziumEventConverter.CDC_DELETED_AT; +import static io.airbyte.cdk.integrations.debezium.internals.DebeziumEventConverter.CDC_UPDATED_AT; import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.node.ObjectNode; import com.google.common.collect.ImmutableMap; import io.airbyte.cdk.integrations.debezium.CdcMetadataInjector; -import io.airbyte.cdk.integrations.debezium.internals.mongodb.MongoDbDebeziumConstants; import io.airbyte.commons.json.Jsons; import java.time.Instant; import java.util.concurrent.atomic.AtomicLong; diff --git a/airbyte-integrations/connectors/source-mongodb-v2-plain/src/main/java/io/airbyte/integrations/source/mongodb/cdc/MongoDbCdcEventUtils.java b/airbyte-integrations/connectors/source-mongodb-v2-plain/src/main/java/io/airbyte/integrations/source/mongodb/cdc/MongoDbCdcEventUtils.java new file mode 100644 index 000000000000..1e9e296a51e5 --- /dev/null +++ b/airbyte-integrations/connectors/source-mongodb-v2-plain/src/main/java/io/airbyte/integrations/source/mongodb/cdc/MongoDbCdcEventUtils.java @@ -0,0 +1,349 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.mongodb.cdc; + +import static io.airbyte.integrations.source.mongodb.cdc.MongoDbDebeziumConstants.Configuration.SCHEMALESS_MODE_DATA_FIELD; +import static io.airbyte.integrations.source.mongodb.cdc.MongoDbDebeziumConstants.Configuration.SCHEMA_ENFORCED_CONFIGURATION_KEY; +import static java.util.Arrays.asList; +import static org.bson.BsonType.ARRAY; +import static org.bson.BsonType.DOCUMENT; +import static org.bson.codecs.configuration.CodecRegistries.fromProviders; + +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.node.ObjectNode; +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.Lists; +import com.mongodb.DBRefCodecProvider; +import io.airbyte.cdk.db.DataTypeUtils; +import io.airbyte.commons.json.Jsons; +import io.airbyte.commons.util.MoreIterators; +import java.util.Collections; +import java.util.Map; +import java.util.Optional; +import java.util.Set; +import org.apache.commons.lang3.StringUtils; +import org.bson.BsonBinary; +import org.bson.BsonDocument; +import org.bson.BsonDocumentReader; +import org.bson.BsonReader; +import org.bson.BsonRegularExpression; +import org.bson.BsonType; +import org.bson.Document; +import org.bson.UuidRepresentation; +import org.bson.codecs.BsonCodecProvider; +import org.bson.codecs.BsonValueCodecProvider; +import org.bson.codecs.DocumentCodecProvider; +import org.bson.codecs.IterableCodecProvider; +import org.bson.codecs.JsonObjectCodecProvider; +import org.bson.codecs.MapCodecProvider; +import org.bson.codecs.UuidCodecProvider; +import org.bson.codecs.ValueCodecProvider; +import org.bson.codecs.configuration.CodecRegistry; +import org.bson.codecs.jsr310.Jsr310CodecProvider; +import org.bson.types.Decimal128; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Collection of utility methods that are used to transform CDC events. + */ +public class MongoDbCdcEventUtils { + + private static final Logger LOGGER = LoggerFactory.getLogger(MongoDbCdcEventUtils.class); + + public static final String AIRBYTE_SUFFIX = "_aibyte_transform"; + public static final String DOCUMENT_OBJECT_ID_FIELD = "_id"; + public static final String ID_FIELD = "id"; + public static final String OBJECT_ID_FIELD = "$oid"; + public static final String OBJECT_ID_FIELD_PATTERN = "\\" + OBJECT_ID_FIELD; + + /** + * Generates a JSON document with only the {@link #DOCUMENT_OBJECT_ID_FIELD} property. The value is + * extracted from the provided Debezium event key. The result is the following JSON document: + *

+ *

+ * + * { "_id" : "<the object ID as a String>" } + * + * + * @param debeziumEventKey The Debezium change event key as a JSON document. + * @return The modified JSON document with the ID value extracted from the Debezium change event + * key. + */ + public static String generateObjectIdDocument(final JsonNode debeziumEventKey) { + final String idField = debeziumEventKey.get(ID_FIELD).asText(); + if (StringUtils.contains(idField, OBJECT_ID_FIELD)) { + return idField.replaceAll(OBJECT_ID_FIELD_PATTERN, DOCUMENT_OBJECT_ID_FIELD); + } else { + return Jsons.serialize(Jsons.jsonNode(Map.of(DOCUMENT_OBJECT_ID_FIELD, idField.replaceAll("^\"|\"$", "")))); + } + } + + /** + * Normalizes the document's object ID value stored in the change event to match the raw data + * produced by the initial snapshot. + *

+ *

+ * We need to unpack the object ID from the event data in order for it to match up with the data + * produced by the initial snapshot. The event contains the object ID in a nested object: + *

+ *

+ * + * {\"_id\": {\"$oid\": \"64f24244f95155351c4185b1\"}, ...} + * + *

+ *

+ * In order to match the data produced by the initial snapshot, this must be translated into: + *

+ *

+ * + * {\"_id\": \"64f24244f95155351c4185b1\", ...} + * + * + * @param data The {@link ObjectNode} that contains the record data extracted from the change event. + * @return The updated record data with the document object ID normalized. + */ + public static ObjectNode normalizeObjectId(final ObjectNode data) { + if (data.has(DOCUMENT_OBJECT_ID_FIELD) && data.get(DOCUMENT_OBJECT_ID_FIELD).has(OBJECT_ID_FIELD)) { + final String objectId = data.get(DOCUMENT_OBJECT_ID_FIELD).get(OBJECT_ID_FIELD).asText(); + data.put(DOCUMENT_OBJECT_ID_FIELD, objectId); + } + return data; + } + + public static ObjectNode normalizeObjectIdNoSchema(final ObjectNode data) { + normalizeObjectId(data); + // normalize _id in "data" if key exists + final Optional maybeDataField = Optional.ofNullable(data.get(SCHEMALESS_MODE_DATA_FIELD)); + maybeDataField.ifPresent(d -> normalizeObjectId((ObjectNode) d)); + return data; + } + + /** + * Transforms the Debezium event data to ensure that all data types are consistent with those in + * documents generated by initial snapshots. + * + * @param json The Debezium event data as JSON. + * @return The transformed Debezium event data as JSON. + */ + public static ObjectNode transformDataTypes(final String json, final Set configuredFields) { + final ObjectNode objectNode = (ObjectNode) Jsons.jsonNode(Collections.emptyMap()); + final Document document = Document.parse(json); + formatDocument(document, objectNode, configuredFields); + return normalizeObjectId(objectNode); + } + + public static ObjectNode transformDataTypesNoSchema(final String json) { + final ObjectNode objectNode = (ObjectNode) Jsons.jsonNode(Collections.emptyMap()); + final Document document = Document.parse(json); + formatDocumentNoSchema(document, objectNode); + return normalizeObjectIdNoSchema(objectNode); + } + + public static JsonNode toJsonNode(final Document document, final Set columnNames) { + final ObjectNode objectNode = (ObjectNode) Jsons.jsonNode(Collections.emptyMap()); + formatDocument(document, objectNode, columnNames); + return normalizeObjectId(objectNode); + } + + public static JsonNode toJsonNodeNoSchema(final Document document) { + final ObjectNode objectNode = (ObjectNode) Jsons.jsonNode(Collections.emptyMap()); + formatDocumentNoSchema(document, objectNode); + return normalizeObjectIdNoSchema(objectNode); + } + + private static void formatDocument(final Document document, final ObjectNode objectNode, final Set columnNames) { + final BsonDocument bsonDocument = toBsonDocument(document); + try (final BsonReader reader = new BsonDocumentReader(bsonDocument)) { + readDocument(reader, objectNode, columnNames, false); + } catch (final Exception e) { + LOGGER.error("Exception while parsing BsonDocument: {}", e.getMessage()); + throw new RuntimeException(e); + } + } + + private static void formatDocumentNoSchema(final Document document, final ObjectNode objectNode) { + objectNode.set(SCHEMALESS_MODE_DATA_FIELD, Jsons.jsonNode(Collections.emptyMap())); + final BsonDocument bsonDocument = toBsonDocument(document); + try (final BsonReader reader = new BsonDocumentReader(bsonDocument)) { + readDocument(reader, (ObjectNode) objectNode.get(SCHEMALESS_MODE_DATA_FIELD), Collections.emptySet(), true); + final Optional maybeId = Optional.ofNullable(objectNode.get(SCHEMALESS_MODE_DATA_FIELD).get(DOCUMENT_OBJECT_ID_FIELD)); + maybeId.ifPresent(id -> objectNode.set(DOCUMENT_OBJECT_ID_FIELD, id)); + } catch (final Exception e) { + LOGGER.error("Exception while parsing BsonDocument: {}", e.getMessage()); + throw new RuntimeException(e); + } + } + + private static ObjectNode readDocument(final BsonReader reader, + final ObjectNode jsonNodes, + final Set includedFields, + final boolean allowAllFields) { + reader.readStartDocument(); + while (reader.readBsonType() != BsonType.END_OF_DOCUMENT) { + final var fieldName = reader.readName(); + final var fieldType = reader.getCurrentBsonType(); + + if (shouldIncludeField(fieldName, includedFields, allowAllFields)) { + if (DOCUMENT.equals(fieldType)) { + /* + * Recursion in used to parse inner documents. Pass the allow all column name so all nested fields + * are processed. + */ + jsonNodes.set(fieldName, readDocument(reader, (ObjectNode) Jsons.jsonNode(Collections.emptyMap()), Set.of(), true)); + } else if (ARRAY.equals(fieldType)) { + jsonNodes.set(fieldName, readArray(reader, includedFields, fieldName)); + } else { + readField(reader, jsonNodes, fieldName, fieldType); + } + transformToStringIfMarked(jsonNodes, includedFields, fieldName); + } else { + reader.skipValue(); + } + } + reader.readEndDocument(); + + return jsonNodes; + } + + private static JsonNode readArray(final BsonReader reader, final Set columnNames, final String fieldName) { + reader.readStartArray(); + final var elements = Lists.newArrayList(); + + while (reader.readBsonType() != BsonType.END_OF_DOCUMENT) { + final var currentBsonType = reader.getCurrentBsonType(); + if (DOCUMENT.equals(currentBsonType)) { + // recursion is used to read inner doc + elements.add(readDocument(reader, (ObjectNode) Jsons.jsonNode(Collections.emptyMap()), columnNames, true)); + } else if (ARRAY.equals(currentBsonType)) { + // recursion is used to read inner array + elements.add(readArray(reader, columnNames, fieldName)); + } else { + final var element = readField(reader, (ObjectNode) Jsons.jsonNode(Collections.emptyMap()), fieldName, currentBsonType); + elements.add(element.get(fieldName)); + } + } + reader.readEndArray(); + return Jsons.jsonNode(MoreIterators.toList(elements.iterator())); + } + + private static ObjectNode readField(final BsonReader reader, + final ObjectNode o, + final String fieldName, + final BsonType fieldType) { + switch (fieldType) { + case BOOLEAN -> o.put(fieldName, reader.readBoolean()); + case INT32 -> o.put(fieldName, reader.readInt32()); + case INT64 -> o.put(fieldName, reader.readInt64()); + case DOUBLE -> o.put(fieldName, reader.readDouble()); + case DECIMAL128 -> o.put(fieldName, toDouble(reader.readDecimal128())); + case TIMESTAMP -> o.put(fieldName, DataTypeUtils.toISO8601StringWithMilliseconds(reader.readTimestamp().getValue())); + case DATE_TIME -> o.put(fieldName, DataTypeUtils.toISO8601StringWithMilliseconds(reader.readDateTime())); + case BINARY -> o.put(fieldName, toByteArray(reader.readBinaryData())); + case SYMBOL -> o.put(fieldName, reader.readSymbol()); + case STRING -> o.put(fieldName, reader.readString()); + case OBJECT_ID -> o.put(fieldName, toString(reader.readObjectId())); + case JAVASCRIPT -> o.put(fieldName, reader.readJavaScript()); + case JAVASCRIPT_WITH_SCOPE -> readJavaScriptWithScope(o, reader, fieldName); + case REGULAR_EXPRESSION -> o.put(fieldName, readRegularExpression(reader.readRegularExpression())); + default -> reader.skipValue(); + } + + return o; + } + + private static BsonDocument toBsonDocument(final Document document) { + try { + final CodecRegistry customCodecRegistry = + fromProviders(asList( + new UuidCodecProvider(UuidRepresentation.STANDARD), + new ValueCodecProvider(), + new BsonValueCodecProvider(), + new DocumentCodecProvider(), + new IterableCodecProvider(), + new MapCodecProvider(), + new Jsr310CodecProvider(), + new JsonObjectCodecProvider(), + new BsonCodecProvider(), + new DBRefCodecProvider())); + + // Override the default codec registry + return document.toBsonDocument(BsonDocument.class, customCodecRegistry); + } catch (final Exception e) { + LOGGER.error("Exception while converting Document to BsonDocument: {}", e.getMessage()); + throw new RuntimeException(e); + } + } + + private static String toString(final Object value) { + return value == null ? null : value.toString(); + } + + private static Double toDouble(final Decimal128 value) { + return value == null ? null : value.doubleValue(); + } + + private static byte[] toByteArray(final BsonBinary value) { + return value == null ? null : value.getData(); + } + + private static void readJavaScriptWithScope(final ObjectNode o, final BsonReader reader, final String fieldName) { + final var code = reader.readJavaScriptWithScope(); + final var scope = readDocument(reader, (ObjectNode) Jsons.jsonNode(Collections.emptyMap()), Set.of("scope"), false); + o.set(fieldName, Jsons.jsonNode(ImmutableMap.of("code", code, "scope", scope))); + } + + private static String readRegularExpression(final BsonRegularExpression regularExpression) { + if (regularExpression != null) { + final String options = regularExpression.getOptions(); + final String pattern = regularExpression.getPattern(); + return (StringUtils.isNotBlank(options)) ? "(" + options + ")" + pattern : pattern; + } else { + return null; + } + } + + public static void transformToStringIfMarked(final ObjectNode jsonNodes, final Set columnNames, final String fieldName) { + if (columnNames.contains(fieldName + AIRBYTE_SUFFIX)) { + final JsonNode data = jsonNodes.get(fieldName); + if (data != null) { + jsonNodes.remove(fieldName); + jsonNodes.put(fieldName + AIRBYTE_SUFFIX, data.isTextual() ? data.asText() : data.toString()); + } else { + LOGGER.debug("WARNING Field list out of sync, Document doesn't contain field: {}", fieldName); + } + } + } + + /** + * Test if the current field that is included in the configured set of discovered fields. In order + * to support the fields of nested document fields that pass the initial filter, the + * {@code allowAll} flag may be included in the as a way to allow the fields of the nested document + * to be processed. + * + * @param fieldName The name of the current field. + * @param includedFields The discovered fields. + * @param allowAll Flag that overrides the field inclusion comparison. + * @return {@code true} if the current field should be included for processing or {@code false} + * otherwise. + */ + private static boolean shouldIncludeField(final String fieldName, final Set includedFields, final boolean allowAll) { + return allowAll || includedFields.contains(fieldName); + } + + /** + * Parses source-mongodbv2 configuration json for the value of schema_enforced. + * + * @param config config json + * @return true unless a schema_enforced configured to false + */ + public static boolean isEnforceSchema(final JsonNode config) { + return config == null || !config.has(SCHEMA_ENFORCED_CONFIGURATION_KEY) + || (config.has(SCHEMA_ENFORCED_CONFIGURATION_KEY) && config.get( + SCHEMA_ENFORCED_CONFIGURATION_KEY).asBoolean(true)); + + } + +} diff --git a/airbyte-integrations/connectors/source-mongodb-v2-plain/src/main/java/io/airbyte/integrations/source/mongodb/cdc/MongoDbCdcInitializer.java b/airbyte-integrations/connectors/source-mongodb-v2-plain/src/main/java/io/airbyte/integrations/source/mongodb/cdc/MongoDbCdcInitializer.java index ce979bc13353..8b73e6ab3f40 100644 --- a/airbyte-integrations/connectors/source-mongodb-v2-plain/src/main/java/io/airbyte/integrations/source/mongodb/cdc/MongoDbCdcInitializer.java +++ b/airbyte-integrations/connectors/source-mongodb-v2-plain/src/main/java/io/airbyte/integrations/source/mongodb/cdc/MongoDbCdcInitializer.java @@ -4,16 +4,14 @@ package io.airbyte.integrations.source.mongodb.cdc; +import static io.airbyte.cdk.db.DbAnalyticsUtils.cdcCursorInvalidMessage; + import com.fasterxml.jackson.databind.JsonNode; import com.google.common.annotations.VisibleForTesting; import com.mongodb.client.MongoClient; import com.mongodb.client.MongoDatabase; +import io.airbyte.cdk.integrations.base.AirbyteTraceMessageUtility; import io.airbyte.cdk.integrations.debezium.AirbyteDebeziumHandler; -import io.airbyte.cdk.integrations.debezium.internals.DebeziumPropertiesManager; -import io.airbyte.cdk.integrations.debezium.internals.RecordWaitTimeUtil; -import io.airbyte.cdk.integrations.debezium.internals.mongodb.MongoDbCdcTargetPosition; -import io.airbyte.cdk.integrations.debezium.internals.mongodb.MongoDbDebeziumStateUtil; -import io.airbyte.cdk.integrations.debezium.internals.mongodb.MongoDbResumeTokenHelper; import io.airbyte.commons.json.Jsons; import io.airbyte.commons.util.AutoCloseableIterator; import io.airbyte.commons.util.AutoCloseableIterators; @@ -28,7 +26,6 @@ import java.time.Instant; import java.util.List; import java.util.Optional; -import java.util.OptionalInt; import java.util.Properties; import java.util.function.Supplier; import org.bson.BsonDocument; @@ -85,16 +82,19 @@ public List> createCdcIterators( final Instant emittedAt, final MongoDbSourceConfig config) { - final Duration firstRecordWaitTime = RecordWaitTimeUtil.getFirstRecordWaitTime(config.rawConfig()); - final Duration subsequentRecordWaitTime = RecordWaitTimeUtil.getSubsequentRecordWaitTime(config.rawConfig()); - final OptionalInt queueSize = MongoUtil.getDebeziumEventQueueSize(config); + final Duration firstRecordWaitTime = Duration.ofSeconds(config.getInitialWaitingTimeSeconds()); + // #35059: debezium heartbeats are not sent on the expected interval. this is + // a worksaround to allow making subsequent wait time configurable. + final Duration subsequentRecordWaitTime = firstRecordWaitTime; + LOGGER.info("Subsequent cdc record wait time: {} seconds", subsequentRecordWaitTime); + final int queueSize = MongoUtil.getDebeziumEventQueueSize(config); final String databaseName = config.getDatabaseName(); final boolean isEnforceSchema = config.getEnforceSchema(); final Properties defaultDebeziumProperties = MongoDbCdcProperties.getDebeziumProperties(); logOplogInfo(mongoClient); - final BsonDocument resumeToken = MongoDbResumeTokenHelper.getMostRecentResumeToken(mongoClient); + final BsonDocument initialResumeToken = MongoDbResumeTokenHelper.getMostRecentResumeToken(mongoClient, databaseName, catalog); final JsonNode initialDebeziumState = - mongoDbDebeziumStateUtil.constructInitialDebeziumState(resumeToken, mongoClient, databaseName); + mongoDbDebeziumStateUtil.constructInitialDebeziumState(initialResumeToken, mongoClient, databaseName); final MongoDbCdcState cdcState = (stateManager.getCdcState() == null || stateManager.getCdcState().state() == null) ? new MongoDbCdcState(initialDebeziumState, isEnforceSchema) : new MongoDbCdcState(Jsons.clone(stateManager.getCdcState().state()), stateManager.getCdcState().schema_enforced()); @@ -102,7 +102,7 @@ public List> createCdcIterators( Jsons.clone(defaultDebeziumProperties), catalog, cdcState.state(), - config.rawConfig(), + config.getDatabaseConfig(), mongoClient); // We should always be able to extract offset out of state if it's not null @@ -115,6 +115,7 @@ public List> createCdcIterators( optSavedOffset.filter(savedOffset -> mongoDbDebeziumStateUtil.isValidResumeToken(savedOffset, mongoClient)).isPresent(); if (!savedOffsetIsValid) { + AirbyteTraceMessageUtility.emitAnalyticsTrace(cdcCursorInvalidMessage()); LOGGER.info("Saved offset is not valid. Airbyte will trigger a full refresh."); // If the offset in the state is invalid, reset the state to the initial STATE stateManager.resetState(new MongoDbCdcState(initialDebeziumState, config.getEnforceSchema())); @@ -136,19 +137,15 @@ public List> createCdcIterators( initialSnapshotHandler.getIterators(initialSnapshotStreams, stateManager, mongoClient.getDatabase(databaseName), cdcMetadataInjector, emittedAt, config.getCheckpointInterval(), isEnforceSchema); - final AirbyteDebeziumHandler handler = new AirbyteDebeziumHandler<>(config.rawConfig(), - new MongoDbCdcTargetPosition(resumeToken), false, firstRecordWaitTime, subsequentRecordWaitTime, queueSize); + final AirbyteDebeziumHandler handler = new AirbyteDebeziumHandler<>(config.getDatabaseConfig(), + new MongoDbCdcTargetPosition(initialResumeToken), false, firstRecordWaitTime, subsequentRecordWaitTime, queueSize, false); final MongoDbCdcStateHandler mongoDbCdcStateHandler = new MongoDbCdcStateHandler(stateManager); final MongoDbCdcSavedInfoFetcher cdcSavedInfoFetcher = new MongoDbCdcSavedInfoFetcher(stateToBeUsed); + final var propertiesManager = new MongoDbDebeziumPropertiesManager(defaultDebeziumProperties, config.getDatabaseConfig(), catalog); + final var eventConverter = new MongoDbDebeziumEventConverter(cdcMetadataInjector, catalog, emittedAt, config.getDatabaseConfig()); - final Supplier> incrementalIteratorSupplier = () -> handler.getIncrementalIterators(catalog, - cdcSavedInfoFetcher, - mongoDbCdcStateHandler, - cdcMetadataInjector, - defaultDebeziumProperties, - DebeziumPropertiesManager.DebeziumConnectorType.MONGODB, - emittedAt, - false); + final Supplier> incrementalIteratorSupplier = () -> handler.getIncrementalIterators( + propertiesManager, eventConverter, cdcSavedInfoFetcher, mongoDbCdcStateHandler); // We can close the client after the initial snapshot is complete, incremental // iterator does not make use of the client. @@ -164,8 +161,8 @@ private void logOplogInfo(final MongoClient mongoClient) { final Document command = new Document("collStats", "oplog.rs"); final Document result = localDatabase.runCommand(command); if (result != null) { - LOGGER.info("Max oplog size is {} bytes", result.getInteger("maxSize")); - LOGGER.info("Free space in oplog is {} bytes", result.getInteger("freeStorageSize")); + LOGGER.info("Max oplog size is {} bytes", result.getLong("maxSize")); + LOGGER.info("Free space in oplog is {} bytes", result.getLong("freeStorageSize")); } } catch (final Exception e) { LOGGER.warn("Unable to query for op log stats, exception: {}" + e.getMessage()); diff --git a/airbyte-integrations/connectors/source-mongodb-v2-plain/src/main/java/io/airbyte/integrations/source/mongodb/cdc/MongoDbCdcProperties.java b/airbyte-integrations/connectors/source-mongodb-v2-plain/src/main/java/io/airbyte/integrations/source/mongodb/cdc/MongoDbCdcProperties.java index e567d190da32..0e559a634c2a 100644 --- a/airbyte-integrations/connectors/source-mongodb-v2-plain/src/main/java/io/airbyte/integrations/source/mongodb/cdc/MongoDbCdcProperties.java +++ b/airbyte-integrations/connectors/source-mongodb-v2-plain/src/main/java/io/airbyte/integrations/source/mongodb/cdc/MongoDbCdcProperties.java @@ -20,6 +20,8 @@ public class MongoDbCdcProperties { static final String HEARTBEAT_INTERVAL_KEY = "heartbeat.interval.ms"; static final String SNAPSHOT_MODE_KEY = "snapshot.mode"; static final String SNAPSHOT_MODE_VALUE = "never"; + static final String CAPTURE_SCOPE_KEY = "capture.scope"; + static final String CAPTURE_SCOPE_VALUE = "database"; static final String TOMBSTONE_ON_DELETE_KEY = "tombstones.on.delete"; static final String TOMBSTONE_ON_DELETE_VALUE = Boolean.FALSE.toString(); @@ -33,6 +35,7 @@ public static Properties getDebeziumProperties() { props.setProperty(CONNECTOR_CLASS_KEY, CONNECTOR_CLASS_VALUE); props.setProperty(SNAPSHOT_MODE_KEY, SNAPSHOT_MODE_VALUE); + props.setProperty(CAPTURE_SCOPE_KEY, CAPTURE_SCOPE_VALUE); props.setProperty(CAPTURE_MODE_KEY, CAPTURE_MODE_VALUE); props.setProperty(HEARTBEAT_INTERVAL_KEY, HEARTBEAT_FREQUENCY_MS); props.setProperty(TOMBSTONE_ON_DELETE_KEY, TOMBSTONE_ON_DELETE_VALUE); diff --git a/airbyte-integrations/connectors/source-mongodb-v2-plain/src/main/java/io/airbyte/integrations/source/mongodb/cdc/MongoDbCdcTargetPosition.java b/airbyte-integrations/connectors/source-mongodb-v2-plain/src/main/java/io/airbyte/integrations/source/mongodb/cdc/MongoDbCdcTargetPosition.java new file mode 100644 index 000000000000..c2d20f9bc4e9 --- /dev/null +++ b/airbyte-integrations/connectors/source-mongodb-v2-plain/src/main/java/io/airbyte/integrations/source/mongodb/cdc/MongoDbCdcTargetPosition.java @@ -0,0 +1,122 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.mongodb.cdc; + +import com.fasterxml.jackson.databind.JsonNode; +import com.google.common.annotations.VisibleForTesting; +import io.airbyte.cdk.integrations.debezium.CdcTargetPosition; +import io.airbyte.cdk.integrations.debezium.internals.ChangeEventWithMetadata; +import io.airbyte.cdk.integrations.debezium.internals.SnapshotMetadata; +import io.airbyte.commons.json.Jsons; +import io.debezium.connector.mongodb.ResumeTokens; +import java.util.Map; +import java.util.Objects; +import org.bson.BsonDocument; +import org.bson.BsonTimestamp; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Implementation of the {@link CdcTargetPosition} interface that provides methods for determining + * when a sync has reached the target position of the CDC log for MongoDB. In this case, the target + * position is a resume token value from the MongoDB oplog. This implementation compares the + * timestamp present in the Debezium change event against the timestamp of the resume token recorded + * at the start of a sync. When the event timestamp exceeds the resume token timestamp, the sync + * should stop to prevent it from running forever. + */ +public class MongoDbCdcTargetPosition implements CdcTargetPosition { + + private static final Logger LOGGER = LoggerFactory.getLogger(MongoDbCdcTargetPosition.class); + + private final BsonTimestamp resumeTokenTimestamp; + + public MongoDbCdcTargetPosition(final BsonDocument resumeToken) { + this.resumeTokenTimestamp = ResumeTokens.getTimestamp(resumeToken); + } + + @VisibleForTesting + BsonTimestamp getResumeTokenTimestamp() { + return resumeTokenTimestamp; + } + + @Override + public boolean isHeartbeatSupported() { + return true; + } + + @Override + public boolean reachedTargetPosition(final ChangeEventWithMetadata changeEventWithMetadata) { + if (changeEventWithMetadata.isSnapshotEvent()) { + return false; + } else if (SnapshotMetadata.LAST == changeEventWithMetadata.snapshotMetadata()) { + LOGGER.info("Signalling close because Snapshot is complete"); + return true; + } else { + final BsonTimestamp eventResumeTokenTimestamp = + MongoDbResumeTokenHelper.extractTimestampFromEvent(changeEventWithMetadata.eventValueAsJson()); + final boolean isEventResumeTokenAfter = resumeTokenTimestamp.compareTo(eventResumeTokenTimestamp) <= 0; + if (isEventResumeTokenAfter) { + LOGGER.info("Signalling close because record's event timestamp {} is after target event timestamp {}.", + eventResumeTokenTimestamp, resumeTokenTimestamp); + } + return isEventResumeTokenAfter; + } + } + + @Override + public boolean reachedTargetPosition(final BsonTimestamp positionFromHeartbeat) { + return positionFromHeartbeat != null && positionFromHeartbeat.compareTo(resumeTokenTimestamp) >= 0; + } + + @Override + public BsonTimestamp extractPositionFromHeartbeatOffset(final Map sourceOffset) { + return ResumeTokens.getTimestamp( + ResumeTokens.fromData( + sourceOffset.get(MongoDbDebeziumConstants.ChangeEvent.SOURCE_RESUME_TOKEN).toString())); + } + + @Override + public boolean isEventAheadOffset(final Map offset, final ChangeEventWithMetadata event) { + if (offset.size() != 1) { + return false; + } + + return MongoDbResumeTokenHelper.extractTimestampFromEvent(event.eventValueAsJson()).getValue() >= ResumeTokens + .getTimestamp(ResumeTokens.fromData(getResumeToken(offset))).getValue(); + } + + @Override + public boolean isSameOffset(final Map offsetA, final Map offsetB) { + if (offsetA == null || offsetA.size() != 1) { + return false; + } + if (offsetB == null || offsetB.size() != 1) { + return false; + } + + return getResumeToken(offsetA).equals(getResumeToken(offsetB)); + } + + private static String getResumeToken(final Map offset) { + final JsonNode offsetJson = Jsons.deserialize((String) offset.values().toArray()[0]); + return offsetJson.get(MongoDbDebeziumConstants.OffsetState.VALUE_RESUME_TOKEN).asText(); + } + + @Override + public boolean equals(final Object o) { + if (this == o) + return true; + if (o == null || getClass() != o.getClass()) + return false; + final MongoDbCdcTargetPosition that = (MongoDbCdcTargetPosition) o; + return Objects.equals(resumeTokenTimestamp, that.resumeTokenTimestamp); + } + + @Override + public int hashCode() { + return Objects.hash(resumeTokenTimestamp); + } + +} diff --git a/airbyte-integrations/connectors/source-mongodb-v2-plain/src/main/java/io/airbyte/integrations/source/mongodb/cdc/MongoDbCustomLoader.java b/airbyte-integrations/connectors/source-mongodb-v2-plain/src/main/java/io/airbyte/integrations/source/mongodb/cdc/MongoDbCustomLoader.java new file mode 100644 index 000000000000..ec537d60b8e4 --- /dev/null +++ b/airbyte-integrations/connectors/source-mongodb-v2-plain/src/main/java/io/airbyte/integrations/source/mongodb/cdc/MongoDbCustomLoader.java @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.mongodb.cdc; + +import io.airbyte.commons.json.Jsons; +import io.debezium.connector.mongodb.MongoDbConnectorConfig; +import io.debezium.connector.mongodb.MongoDbOffsetContext; +import io.debezium.connector.mongodb.MongoDbOffsetContext.Loader; +import io.debezium.connector.mongodb.ReplicaSets; +import java.util.Collections; +import java.util.Map; + +/** + * Custom Debezium offset loader for MongoDB. + *

+ *

+ * N.B. In order to extract the offset from the {@link MongoDbCustomLoader}, you must first get the + * {@link io.debezium.connector.mongodb.ReplicaSetOffsetContext} from the + * {@link MongoDbOffsetContext} for the replica set for which the offset is requested. From that + * context, you can then request the actual Debezium offset. + */ +public class MongoDbCustomLoader extends Loader { + + private Map, Map> offsets; + + public MongoDbCustomLoader(final MongoDbConnectorConfig connectorConfig, final ReplicaSets replicaSets) { + super(connectorConfig, replicaSets); + } + + @Override + public MongoDbOffsetContext loadOffsets(final Map, Map> offsets) { + this.offsets = Jsons.clone(offsets); + return super.loadOffsets(offsets); + } + + public Map, Map> getRawOffset() { + return Collections.unmodifiableMap(offsets); + } + +} diff --git a/airbyte-integrations/connectors/source-mongodb-v2-plain/src/main/java/io/airbyte/integrations/source/mongodb/cdc/MongoDbDebeziumConstants.java b/airbyte-integrations/connectors/source-mongodb-v2-plain/src/main/java/io/airbyte/integrations/source/mongodb/cdc/MongoDbDebeziumConstants.java new file mode 100644 index 000000000000..170c6ae78552 --- /dev/null +++ b/airbyte-integrations/connectors/source-mongodb-v2-plain/src/main/java/io/airbyte/integrations/source/mongodb/cdc/MongoDbDebeziumConstants.java @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.mongodb.cdc; + +import io.debezium.connector.mongodb.SourceInfo; + +/** + * A collection of constants for use with the Debezium MongoDB Connector. + */ +public class MongoDbDebeziumConstants { + + /** + * Constants for Debezium Source Event data. + */ + public static class ChangeEvent { + + public static final String SOURCE = "source"; + public static final String SOURCE_COLLECTION = SourceInfo.COLLECTION; + public static final String SOURCE_DB = "db"; + public static final String SOURCE_ORDER = SourceInfo.ORDER; + public static final String SOURCE_RESUME_TOKEN = "resume_token"; + public static final String SOURCE_SECONDS = SourceInfo.TIMESTAMP; + public static final String SOURCE_TIMESTAMP_MS = "ts_ms"; + + } + + /** + * Constants for the configuration of the MongoDB connector. These constants represent the + * configuration values that are to be mapped to the Debezium configuration. + */ + public static class Configuration { + + public static final String AUTH_SOURCE_CONFIGURATION_KEY = "auth_source"; + public static final String CONNECTION_STRING_CONFIGURATION_KEY = "connection_string"; + public static final String CREDENTIALS_PLACEHOLDER = ":@"; + public static final String DATABASE_CONFIGURATION_KEY = "database"; + public static final String DATABASE_CONFIG_CONFIGURATION_KEY = "database_config"; + public static final String PASSWORD_CONFIGURATION_KEY = "password"; + public static final String USERNAME_CONFIGURATION_KEY = "username"; + public static final String SCHEMA_ENFORCED_CONFIGURATION_KEY = "schema_enforced"; + public static final String SCHEMALESS_MODE_DATA_FIELD = "data"; + + } + + /** + * Constants for Debezium Offset State storage. + */ + public static class OffsetState { + + public static final String KEY_REPLICA_SET = SourceInfo.REPLICA_SET_NAME; + public static final String KEY_SERVER_ID = SourceInfo.SERVER_ID_KEY; + public static final String VALUE_INCREMENT = SourceInfo.ORDER; + public static final String VALUE_RESUME_TOKEN = "resume_token"; + public static final String VALUE_SECONDS = SourceInfo.TIMESTAMP; + public static final String VALUE_TRANSACTION_ID = "transaction_id"; + + } + + private MongoDbDebeziumConstants() {} + +} diff --git a/airbyte-integrations/connectors/source-mongodb-v2-plain/src/main/java/io/airbyte/integrations/source/mongodb/cdc/MongoDbDebeziumEventConverter.java b/airbyte-integrations/connectors/source-mongodb-v2-plain/src/main/java/io/airbyte/integrations/source/mongodb/cdc/MongoDbDebeziumEventConverter.java new file mode 100644 index 000000000000..30056743ced1 --- /dev/null +++ b/airbyte-integrations/connectors/source-mongodb-v2-plain/src/main/java/io/airbyte/integrations/source/mongodb/cdc/MongoDbDebeziumEventConverter.java @@ -0,0 +1,131 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.mongodb.cdc; + +import com.fasterxml.jackson.databind.JsonNode; +import io.airbyte.cdk.integrations.debezium.CdcMetadataInjector; +import io.airbyte.cdk.integrations.debezium.internals.ChangeEventWithMetadata; +import io.airbyte.cdk.integrations.debezium.internals.DebeziumEventConverter; +import io.airbyte.protocol.models.v0.AirbyteMessage; +import io.airbyte.protocol.models.v0.CatalogHelpers; +import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog; +import java.time.Instant; +import java.util.Set; +import java.util.stream.Collectors; + +public class MongoDbDebeziumEventConverter implements DebeziumEventConverter { + + private final CdcMetadataInjector cdcMetadataInjector; + private final ConfiguredAirbyteCatalog configuredAirbyteCatalog; + private final Instant emittedAt; + private final JsonNode config; + + public MongoDbDebeziumEventConverter( + CdcMetadataInjector cdcMetadataInjector, + ConfiguredAirbyteCatalog configuredAirbyteCatalog, + Instant emittedAt, + JsonNode config) { + this.cdcMetadataInjector = cdcMetadataInjector; + this.configuredAirbyteCatalog = configuredAirbyteCatalog; + this.emittedAt = emittedAt; + this.config = config; + } + + @Override + public AirbyteMessage toAirbyteMessage(ChangeEventWithMetadata event) { + final JsonNode debeziumEventKey = event.eventKeyAsJson(); + final JsonNode debeziumEvent = event.eventValueAsJson(); + final JsonNode before = debeziumEvent.get(DebeziumEventConverter.BEFORE_EVENT); + final JsonNode after = debeziumEvent.get(DebeziumEventConverter.AFTER_EVENT); + final JsonNode source = debeziumEvent.get(DebeziumEventConverter.SOURCE_EVENT); + final String operation = debeziumEvent.get(DebeziumEventConverter.OPERATION_FIELD).asText(); + final boolean isEnforceSchema = MongoDbCdcEventUtils.isEnforceSchema(config); + + final Set configuredFields = isEnforceSchema ? getConfiguredMongoDbCollectionFields(source, configuredAirbyteCatalog, cdcMetadataInjector) + : null; + + /* + * Delete events need to be handled separately from other CrUD events, as depending on the version + * of the MongoDB server, the contents Debezium event data will be different. See + * #formatMongoDbDeleteDebeziumData() for more details. + */ + final JsonNode data = switch (operation) { + case "c", "i", "u" -> formatMongoDbDebeziumData( + before, after, source, debeziumEventKey, cdcMetadataInjector, configuredFields, isEnforceSchema); + case "d" -> formatMongoDbDeleteDebeziumData(before, debeziumEventKey, source, cdcMetadataInjector, configuredFields, isEnforceSchema); + default -> throw new IllegalArgumentException("Unsupported MongoDB change event operation '" + operation + "'."); + }; + + return DebeziumEventConverter.buildAirbyteMessage(source, cdcMetadataInjector, emittedAt, data); + } + + private static JsonNode formatMongoDbDebeziumData(final JsonNode before, + final JsonNode after, + final JsonNode source, + final JsonNode debeziumEventKey, + final CdcMetadataInjector cdcMetadataInjector, + final Set configuredFields, + final boolean isEnforceSchema) { + + if ((before == null || before.isNull()) && (after == null || after.isNull())) { + // In case a mongodb document was updated and then deleted, the update change event will not have + // any information ({after: null}) + // We are going to treat it as a delete. + return formatMongoDbDeleteDebeziumData(before, debeziumEventKey, source, cdcMetadataInjector, configuredFields, isEnforceSchema); + } else { + final String eventJson = (after.isNull() ? before : after).asText(); + return DebeziumEventConverter.addCdcMetadata( + isEnforceSchema + ? MongoDbCdcEventUtils.transformDataTypes(eventJson, configuredFields) + : MongoDbCdcEventUtils.transformDataTypesNoSchema(eventJson), + source, cdcMetadataInjector, false); + } + } + + private static JsonNode formatMongoDbDeleteDebeziumData(final JsonNode before, + final JsonNode debeziumEventKey, + final JsonNode source, + final CdcMetadataInjector cdcMetadataInjector, + final Set configuredFields, + final boolean isEnforceSchema) { + final String eventJson; + + /* + * The change events produced by MongoDB differ based on the server version. For version BEFORE 6.x, + * the event does not contain the before document. Therefore, the only data that can be extracted is + * the object ID of the deleted document, which is stored in the event key. Otherwise, if the server + * is version 6.+ AND the pre-image support has been enabled on the collection, we can use the + * "before" document from the event to represent the deleted document. + * + * See + * https://www.mongodb.com/docs/manual/reference/change-events/delete/#document-pre--and-post-images + * for more details. + */ + if (!before.isNull()) { + eventJson = before.asText(); + } else { + eventJson = MongoDbCdcEventUtils.generateObjectIdDocument(debeziumEventKey); + } + + return DebeziumEventConverter.addCdcMetadata( + isEnforceSchema + ? MongoDbCdcEventUtils.transformDataTypes(eventJson, configuredFields) + : MongoDbCdcEventUtils.transformDataTypesNoSchema(eventJson), + source, cdcMetadataInjector, true); + } + + private static Set getConfiguredMongoDbCollectionFields(final JsonNode source, + final ConfiguredAirbyteCatalog configuredAirbyteCatalog, + final CdcMetadataInjector cdcMetadataInjector) { + final String streamNamespace = cdcMetadataInjector.namespace(source); + final String streamName = cdcMetadataInjector.name(source); + return configuredAirbyteCatalog.getStreams().stream() + .filter(s -> streamName.equals(s.getStream().getName()) && streamNamespace.equals(s.getStream().getNamespace())) + .map(CatalogHelpers::getTopLevelFieldNames) + .flatMap(Set::stream) + .collect(Collectors.toSet()); + } + +} diff --git a/airbyte-integrations/connectors/source-mongodb-v2-plain/src/main/java/io/airbyte/integrations/source/mongodb/cdc/MongoDbDebeziumPropertiesManager.java b/airbyte-integrations/connectors/source-mongodb-v2-plain/src/main/java/io/airbyte/integrations/source/mongodb/cdc/MongoDbDebeziumPropertiesManager.java new file mode 100644 index 000000000000..dc81748ad878 --- /dev/null +++ b/airbyte-integrations/connectors/source-mongodb-v2-plain/src/main/java/io/airbyte/integrations/source/mongodb/cdc/MongoDbDebeziumPropertiesManager.java @@ -0,0 +1,127 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.mongodb.cdc; + +import static io.airbyte.integrations.source.mongodb.cdc.MongoDbDebeziumConstants.Configuration.AUTH_SOURCE_CONFIGURATION_KEY; +import static io.airbyte.integrations.source.mongodb.cdc.MongoDbDebeziumConstants.Configuration.CONNECTION_STRING_CONFIGURATION_KEY; +import static io.airbyte.integrations.source.mongodb.cdc.MongoDbDebeziumConstants.Configuration.CREDENTIALS_PLACEHOLDER; +import static io.airbyte.integrations.source.mongodb.cdc.MongoDbDebeziumConstants.Configuration.DATABASE_CONFIGURATION_KEY; +import static io.airbyte.integrations.source.mongodb.cdc.MongoDbDebeziumConstants.Configuration.PASSWORD_CONFIGURATION_KEY; +import static io.airbyte.integrations.source.mongodb.cdc.MongoDbDebeziumConstants.Configuration.USERNAME_CONFIGURATION_KEY; + +import com.fasterxml.jackson.databind.JsonNode; +import io.airbyte.cdk.integrations.debezium.internals.DebeziumPropertiesManager; +import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog; +import io.airbyte.protocol.models.v0.ConfiguredAirbyteStream; +import java.util.List; +import java.util.Properties; +import java.util.stream.Collectors; + +/** + * Custom {@link DebeziumPropertiesManager} specific for the configuration of the Debezium MongoDB + * connector. + *

+ * This implementation provides the specific connection properties required for the Debezium MongoDB + * connector. These properties differ from the general relational database connection properties + * used by the other Debezium connectors. + */ +public class MongoDbDebeziumPropertiesManager extends DebeziumPropertiesManager { + + static final String COLLECTION_INCLUDE_LIST_KEY = "collection.include.list"; + static final String DATABASE_INCLUDE_LIST_KEY = "database.include.list"; + static final String CAPTURE_TARGET_KEY = "capture.target"; + static final String DOUBLE_QUOTES_PATTERN = "\""; + static final String MONGODB_AUTHSOURCE_KEY = "mongodb.authsource"; + static final String MONGODB_CONNECTION_MODE_KEY = "mongodb.connection.mode"; + static final String MONGODB_CONNECTION_MODE_VALUE = "replica_set"; + static final String MONGODB_CONNECTION_STRING_KEY = "mongodb.connection.string"; + static final String MONGODB_PASSWORD_KEY = "mongodb.password"; + static final String MONGODB_SSL_ENABLED_KEY = "mongodb.ssl.enabled"; + static final String MONGODB_SSL_ENABLED_VALUE = Boolean.FALSE.toString(); + static final String MONGODB_USER_KEY = "mongodb.user"; + + public MongoDbDebeziumPropertiesManager(final Properties properties, + final JsonNode config, + final ConfiguredAirbyteCatalog catalog) { + super(properties, config, catalog); + } + + @Override + protected Properties getConnectionConfiguration(final JsonNode config) { + final Properties properties = new Properties(); + + properties.setProperty(MONGODB_CONNECTION_STRING_KEY, buildConnectionString(config, false)); + properties.setProperty(MONGODB_CONNECTION_MODE_KEY, MONGODB_CONNECTION_MODE_VALUE); + + if (config.has(USERNAME_CONFIGURATION_KEY)) { + properties.setProperty(MONGODB_USER_KEY, config.get(USERNAME_CONFIGURATION_KEY).asText()); + } + if (config.has(PASSWORD_CONFIGURATION_KEY)) { + properties.setProperty(MONGODB_PASSWORD_KEY, config.get(PASSWORD_CONFIGURATION_KEY).asText()); + } + if (config.has(AUTH_SOURCE_CONFIGURATION_KEY)) { + properties.setProperty(MONGODB_AUTHSOURCE_KEY, config.get(AUTH_SOURCE_CONFIGURATION_KEY).asText()); + } + properties.setProperty(MONGODB_SSL_ENABLED_KEY, MONGODB_SSL_ENABLED_VALUE); + return properties; + } + + @Override + protected String getName(final JsonNode config) { + return normalizeName(config.get(DATABASE_CONFIGURATION_KEY).asText()); + } + + @Override + protected Properties getIncludeConfiguration(final ConfiguredAirbyteCatalog catalog, final JsonNode config) { + final Properties properties = new Properties(); + + // Database/collection selection + properties.setProperty(COLLECTION_INCLUDE_LIST_KEY, createCollectionIncludeString(catalog.getStreams())); + properties.setProperty(DATABASE_INCLUDE_LIST_KEY, config.get(DATABASE_CONFIGURATION_KEY).asText()); + properties.setProperty(CAPTURE_TARGET_KEY, config.get(DATABASE_CONFIGURATION_KEY).asText()); + + return properties; + } + + protected String createCollectionIncludeString(final List streams) { + return streams.stream() + .map(s -> s.getStream().getNamespace() + "\\." + s.getStream().getName()) + .collect(Collectors.joining(",")); + } + + /** + * Ensure that the name property is formatted correctly for use by Debezium. + * + * @param name The name to be associated with the Debezium connector. + * @return The normalized name. + */ + public static String normalizeName(final String name) { + return name != null ? name.replaceAll("_", "-") : null; + } + + /** + * Builds the MongoDB connection string from the provided configuration. This method handles + * removing any values accidentally copied and pasted from the MongoDB Atlas UI. + * + * @param config The connector configuration. + * @param useSecondary Whether to use the secondary for reads. + * @return The connection string. + */ + public static String buildConnectionString(final JsonNode config, final boolean useSecondary) { + final String connectionString = config.get(CONNECTION_STRING_CONFIGURATION_KEY) + .asText() + .trim() + .replaceAll(DOUBLE_QUOTES_PATTERN, "") + .replaceAll(CREDENTIALS_PLACEHOLDER, ""); + final StringBuilder builder = new StringBuilder(); + builder.append(connectionString); + builder.append("?retryWrites=false&provider=airbyte"); + if (useSecondary) { + builder.append("&readPreference=secondary"); + } + return builder.toString(); + } + +} diff --git a/airbyte-integrations/connectors/source-mongodb-v2-plain/src/main/java/io/airbyte/integrations/source/mongodb/cdc/MongoDbDebeziumStateUtil.java b/airbyte-integrations/connectors/source-mongodb-v2-plain/src/main/java/io/airbyte/integrations/source/mongodb/cdc/MongoDbDebeziumStateUtil.java new file mode 100644 index 000000000000..e835de192f1e --- /dev/null +++ b/airbyte-integrations/connectors/source-mongodb-v2-plain/src/main/java/io/airbyte/integrations/source/mongodb/cdc/MongoDbDebeziumStateUtil.java @@ -0,0 +1,225 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.mongodb.cdc; + +import com.fasterxml.jackson.databind.JsonNode; +import com.mongodb.MongoChangeStreamException; +import com.mongodb.MongoCommandException; +import com.mongodb.client.ChangeStreamIterable; +import com.mongodb.client.MongoClient; +import io.airbyte.cdk.integrations.debezium.internals.AirbyteFileOffsetBackingStore; +import io.airbyte.cdk.integrations.debezium.internals.DebeziumPropertiesManager; +import io.airbyte.cdk.integrations.debezium.internals.DebeziumStateUtil; +import io.airbyte.commons.json.Jsons; +import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog; +import io.debezium.config.Configuration; +import io.debezium.connector.mongodb.MongoDbConnectorConfig; +import io.debezium.connector.mongodb.MongoDbOffsetContext; +import io.debezium.connector.mongodb.MongoDbTaskContext; +import io.debezium.connector.mongodb.MongoUtil; +import io.debezium.connector.mongodb.ReplicaSetDiscovery; +import io.debezium.connector.mongodb.ReplicaSets; +import io.debezium.connector.mongodb.ResumeTokens; +import java.util.Collection; +import java.util.LinkedHashMap; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Optional; +import java.util.Properties; +import org.apache.kafka.connect.storage.FileOffsetBackingStore; +import org.apache.kafka.connect.storage.OffsetStorageReaderImpl; +import org.bson.BsonDocument; +import org.bson.BsonString; +import org.bson.BsonTimestamp; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Collection of utility methods related to the Debezium offset state. + */ +public class MongoDbDebeziumStateUtil implements DebeziumStateUtil { + + private static final Logger LOGGER = LoggerFactory.getLogger(MongoDbDebeziumStateUtil.class); + + /** + * Constructs the initial Debezium offset state that will be used by the incremental CDC snapshot + * after an initial snapshot sync. + * + * @param mongoClient The {@link MongoClient} used to query the MongoDB server. + * @param serverId The ID of the target server. + * @return The initial Debezium offset state storage document as a {@link JsonNode}. + * @throws IllegalStateException if unable to determine the replica set. + */ + public JsonNode constructInitialDebeziumState(final BsonDocument resumeToken, final MongoClient mongoClient, final String serverId) { + final String replicaSet = getReplicaSetName(mongoClient); + LOGGER.info("Initial resume token '{}' constructed, corresponding to timestamp (seconds after epoch) {}", + ResumeTokens.getData(resumeToken).asString().getValue(), ResumeTokens.getTimestamp(resumeToken).getTime()); + final JsonNode state = formatState(serverId, replicaSet, ((BsonString) ResumeTokens.getData(resumeToken)).getValue()); + LOGGER.info("Initial Debezium state constructed: {}", state); + return state; + } + + /** + * Formats the Debezium initial state into a format suitable for storage in the offset data file. + * + * @param serverId The ID target MongoDB database. + * @param replicaSet The name of the target MongoDB replica set. + * @param resumeTokenData The MongoDB resume token that represents the offset state. + * @return The offset state as a {@link JsonNode}. + */ + public static JsonNode formatState(final String serverId, final String replicaSet, final String resumeTokenData) { + final BsonTimestamp timestamp = ResumeTokens.getTimestamp(ResumeTokens.fromData(resumeTokenData)); + + final List key = generateOffsetKey(serverId, replicaSet); + + final Map value = new LinkedHashMap<>(); + value.put(MongoDbDebeziumConstants.OffsetState.VALUE_SECONDS, timestamp.getTime()); + value.put(MongoDbDebeziumConstants.OffsetState.VALUE_INCREMENT, timestamp.getInc()); + value.put(MongoDbDebeziumConstants.OffsetState.VALUE_TRANSACTION_ID, null); + value.put(MongoDbDebeziumConstants.OffsetState.VALUE_RESUME_TOKEN, resumeTokenData); + + return Jsons.jsonNode(Map.of(Jsons.serialize(key), Jsons.serialize(value))); + } + + /** + * Retrieves the replica set name for the current connection. + * + * @param mongoClient The {@link MongoClient} used to retrieve the replica set name. + * @return The replica set name. + * @throws IllegalStateException if unable to determine the replica set. + */ + public static String getReplicaSetName(final MongoClient mongoClient) { + final Optional replicaSetName = MongoUtil.replicaSetName(mongoClient.getClusterDescription()); + return replicaSetName.orElseThrow(() -> new IllegalStateException("Unable to determine replica set.")); + } + + /** + * Test whether the retrieved saved offset resume token value is valid. A valid resume token is one + * that can be used to resume a change event stream in MongoDB. + * + * @param savedOffset The resume token from the saved offset. + * @param mongoClient The {@link MongoClient} used to validate the saved offset. + * @return {@code true} if the saved offset value is valid Otherwise, {@code false} is returned to + * indicate that an initial snapshot should be performed. + */ + public boolean isValidResumeToken(final BsonDocument savedOffset, final MongoClient mongoClient) { + if (Objects.isNull(savedOffset) || savedOffset.isEmpty()) { + return true; + } + + final ChangeStreamIterable stream = mongoClient.watch(BsonDocument.class); + stream.resumeAfter(savedOffset); + try (final var ignored = stream.cursor()) { + LOGGER.info("Valid resume token '{}' present, corresponding to timestamp (seconds after epoch) : {}. Incremental sync will be performed for " + + "up-to-date streams.", + ResumeTokens.getData(savedOffset).asString().getValue(), ResumeTokens.getTimestamp(savedOffset).getTime()); + return true; + } catch (final MongoCommandException | MongoChangeStreamException e) { + LOGGER.info("Invalid resume token '{}' present, corresponding to timestamp (seconds after epoch) : {}. Initial snapshot will be performed for " + + "all streams.", + ResumeTokens.getData(savedOffset).asString().getValue(), ResumeTokens.getTimestamp(savedOffset).getTime()); + return false; + } + } + + /** + * Saves and retrieves the Debezium offset data. This method writes the provided CDC state to the + * offset file and then uses Debezium's code to retrieve the state from the offset file in order to + * verify that Debezium will be able to read the offset data itself when invoked. + * + * @param baseProperties The base Debezium properties. + * @param catalog The configured Airbyte catalog. + * @param cdcState The current CDC state that contains the offset data. + * @param config The source configuration. + * @return The offset value (the timestamp extracted from the resume token) retrieved from the CDC + * state/offset data. + */ + public Optional savedOffset(final Properties baseProperties, + final ConfiguredAirbyteCatalog catalog, + final JsonNode cdcState, + final JsonNode config, + final MongoClient mongoClient) { + LOGGER.debug("Initializing file offset backing store with state '{}'...", cdcState); + final var offsetManager = AirbyteFileOffsetBackingStore.initializeState(cdcState, Optional.empty()); + final DebeziumPropertiesManager debeziumPropertiesManager = new MongoDbDebeziumPropertiesManager(baseProperties, config, catalog); + final Properties debeziumProperties = debeziumPropertiesManager.getDebeziumProperties(offsetManager); + return parseSavedOffset(debeziumProperties, mongoClient); + } + + /** + * Loads the offset data from the saved Debezium offset file. + * + * @param properties Properties should contain the relevant properties like path to the Debezium + * state file, etc. It's assumed that the state file is already initialised with the saved + * state + * @return Returns the resume token that Airbyte has acknowledged in the source database server. + */ + private Optional parseSavedOffset(final Properties properties, final MongoClient mongoClient) { + FileOffsetBackingStore fileOffsetBackingStore = null; + OffsetStorageReaderImpl offsetStorageReader = null; + + try { + fileOffsetBackingStore = getFileOffsetBackingStore(properties); + offsetStorageReader = getOffsetStorageReader(fileOffsetBackingStore, properties); + + final Configuration config = Configuration.from(properties); + final MongoDbTaskContext taskContext = new MongoDbTaskContext(config); + final MongoDbConnectorConfig mongoDbConnectorConfig = new MongoDbConnectorConfig(config); + final ReplicaSets replicaSets = new ReplicaSetDiscovery(taskContext).getReplicaSets(mongoClient); + + LOGGER.debug("Parsing saved offset state for replica set '{}' and server ID '{}'...", replicaSets.all().get(0), properties.getProperty("name")); + + final MongoDbOffsetContext.Loader loader = new MongoDbCustomLoader(mongoDbConnectorConfig, replicaSets); + final Collection> partitions = loader.getPartitions(); + final Map, Map> offsets = offsetStorageReader.offsets(partitions); + + if (offsets != null && offsets.values().stream().anyMatch(Objects::nonNull)) { + final MongoDbOffsetContext offsetContext = loader.loadOffsets(offsets); + final Map offset = offsetContext.getReplicaSetOffsetContext(replicaSets.all().get(0)).getOffset(); + final Object resumeTokenData = offset.get(MongoDbDebeziumConstants.OffsetState.VALUE_RESUME_TOKEN); + if (resumeTokenData != null) { + final BsonDocument resumeToken = ResumeTokens.fromData(resumeTokenData.toString()); + return Optional.of(resumeToken); + } else { + LOGGER.warn("Offset data does not contain a resume token: {}", offset); + return Optional.empty(); + } + } else { + LOGGER.warn("Loaded offset data is null or empty: {}", offsets); + return Optional.empty(); + } + } finally { + LOGGER.info("Closing offsetStorageReader and fileOffsetBackingStore"); + if (offsetStorageReader != null) { + offsetStorageReader.close(); + } + + if (fileOffsetBackingStore != null) { + fileOffsetBackingStore.stop(); + } + } + } + + private static List generateOffsetKey(final String serverId, final String replicaSet) { + /* + * N.B. The order of the keys in the sourceInfoMap and key list matters! DO NOT CHANGE the order + * unless you have verified that Debezium has changed its order of the key it builds when retrieving + * data from the offset file. See the "partition(String replicaSetName)" method of the + * io.debezium.connector.mongodb.SourceInfo class for the ordering of keys in the list/map. + */ + final Map sourceInfoMap = new LinkedHashMap<>(); + final String normalizedServerId = MongoDbDebeziumPropertiesManager.normalizeName(serverId); + sourceInfoMap.put(MongoDbDebeziumConstants.OffsetState.KEY_REPLICA_SET, replicaSet); + sourceInfoMap.put(MongoDbDebeziumConstants.OffsetState.KEY_SERVER_ID, normalizedServerId); + + final List key = new LinkedList<>(); + key.add(normalizedServerId); + key.add(sourceInfoMap); + return key; + } + +} diff --git a/airbyte-integrations/connectors/source-mongodb-v2-plain/src/main/java/io/airbyte/integrations/source/mongodb/cdc/MongoDbResumeTokenHelper.java b/airbyte-integrations/connectors/source-mongodb-v2-plain/src/main/java/io/airbyte/integrations/source/mongodb/cdc/MongoDbResumeTokenHelper.java new file mode 100644 index 000000000000..0493efa11cda --- /dev/null +++ b/airbyte-integrations/connectors/source-mongodb-v2-plain/src/main/java/io/airbyte/integrations/source/mongodb/cdc/MongoDbResumeTokenHelper.java @@ -0,0 +1,98 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.mongodb.cdc; + +import com.fasterxml.jackson.databind.JsonNode; +import com.mongodb.client.ChangeStreamIterable; +import com.mongodb.client.MongoChangeStreamCursor; +import com.mongodb.client.MongoClient; +import com.mongodb.client.model.Aggregates; +import com.mongodb.client.model.Filters; +import com.mongodb.client.model.changestream.ChangeStreamDocument; +import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.Optional; +import java.util.concurrent.TimeUnit; +import org.bson.BsonDocument; +import org.bson.BsonTimestamp; +import org.bson.conversions.Bson; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Collection of utility helper methods for dealing with MongoDB resume tokens. + */ +public class MongoDbResumeTokenHelper { + + private static final Logger LOGGER = LoggerFactory.getLogger(MongoDbResumeTokenHelper.class); + + /** + * Retrieves the most recent resume token from MongoDB server. + * + * @param mongoClient The {@link MongoClient} used to query the MongoDB server. + * @return The most recent resume token value. + */ + public static BsonDocument getMostRecentResumeToken(final MongoClient mongoClient, + final String databaseName, + final ConfiguredAirbyteCatalog catalog) { + final List collectionsList = catalog.getStreams().stream() + .map(s -> s.getStream().getName()) + .toList(); + LOGGER.info("Resume token for db {} with collection filter {}", databaseName, Arrays.toString(collectionsList.toArray())); + final List pipeline = Collections.singletonList(Aggregates.match( + Filters.in("ns.coll", collectionsList))); + final ChangeStreamIterable eventStream = mongoClient.getDatabase(databaseName).watch(pipeline, BsonDocument.class); + try (final MongoChangeStreamCursor> eventStreamCursor = eventStream.cursor()) { + /* + * Must call tryNext before attempting to get the resume token from the cursor directly. Otherwise, + * the call to getResumeToken() will return null! + */ + eventStreamCursor.tryNext(); + return eventStreamCursor.getResumeToken(); + } + } + + /** + * Extracts the timestamp from a Debezium MongoDB change event. + * + * @param event The Debezium MongoDB change event as JSON. + * @return The extracted timestamp + * @throws IllegalStateException if the timestamp could not be extracted from the change event. + */ + public static BsonTimestamp extractTimestampFromEvent(final JsonNode event) { + return extractTimestampFromSource(event.get(MongoDbDebeziumConstants.ChangeEvent.SOURCE)); + } + + /** + * Extracts the timestamp from a Debezium MongoDB change event source object. + * + * @param source The Debezium MongoDB change event source object as JSON. + * @return The extracted timestamp + * @throws IllegalStateException if the timestamp could not be extracted from the change event. + */ + public static BsonTimestamp extractTimestampFromSource(final JsonNode source) { + return Optional.ofNullable(source) + .flatMap(MongoDbResumeTokenHelper::createTimestampFromSource) + .orElseThrow(() -> new IllegalStateException("Could not find timestamp")); + } + + private static Optional createTimestampFromSource(final JsonNode source) { + try { + return Optional.of( + new BsonTimestamp( + Long.valueOf(TimeUnit.MILLISECONDS.toSeconds( + source.get(MongoDbDebeziumConstants.ChangeEvent.SOURCE_TIMESTAMP_MS) + .asLong())) + .intValue(), + source.get(MongoDbDebeziumConstants.ChangeEvent.SOURCE_ORDER).asInt())); + } catch (final Exception e) { + LOGGER.warn("Unable to extract timestamp data from event source.", e); + return Optional.empty(); + } + } + +} diff --git a/airbyte-integrations/connectors/source-mongodb-v2-plain/src/main/resources/spec.json b/airbyte-integrations/connectors/source-mongodb-v2-plain/src/main/resources/spec.json index ae6b822110ca..9c4af4f046c9 100644 --- a/airbyte-integrations/connectors/source-mongodb-v2-plain/src/main/resources/spec.json +++ b/airbyte-integrations/connectors/source-mongodb-v2-plain/src/main/resources/spec.json @@ -164,7 +164,7 @@ "description": "The maximum number of documents to sample when attempting to discover the unique fields for a collection.", "default": 10000, "order": 10, - "minimum": 1000, + "minimum": 10, "maximum": 100000, "group": "advanced" } diff --git a/airbyte-integrations/connectors/source-mongodb-v2-plain/src/test-integration/java/io/airbyte/integrations/source/mongodb/MongoDbSourceAcceptanceTest.java b/airbyte-integrations/connectors/source-mongodb-v2-plain/src/test-integration/java/io/airbyte/integrations/source/mongodb/MongoDbSourceAcceptanceTest.java index cef1eeb91f10..fa220e4386db 100644 --- a/airbyte-integrations/connectors/source-mongodb-v2-plain/src/test-integration/java/io/airbyte/integrations/source/mongodb/MongoDbSourceAcceptanceTest.java +++ b/airbyte-integrations/connectors/source-mongodb-v2-plain/src/test-integration/java/io/airbyte/integrations/source/mongodb/MongoDbSourceAcceptanceTest.java @@ -22,15 +22,15 @@ import com.mongodb.client.model.Updates; import io.airbyte.cdk.integrations.debezium.internals.ChangeEventWithMetadata; import io.airbyte.cdk.integrations.debezium.internals.SnapshotMetadata; -import io.airbyte.cdk.integrations.debezium.internals.mongodb.MongoDbCdcTargetPosition; -import io.airbyte.cdk.integrations.debezium.internals.mongodb.MongoDbDebeziumConstants; -import io.airbyte.cdk.integrations.debezium.internals.mongodb.MongoDbDebeziumStateUtil; -import io.airbyte.cdk.integrations.debezium.internals.mongodb.MongoDbResumeTokenHelper; import io.airbyte.cdk.integrations.standardtest.source.SourceAcceptanceTest; import io.airbyte.cdk.integrations.standardtest.source.TestDestinationEnv; import io.airbyte.commons.json.Jsons; import io.airbyte.commons.resources.MoreResources; import io.airbyte.integrations.source.mongodb.cdc.MongoDbCdcState; +import io.airbyte.integrations.source.mongodb.cdc.MongoDbCdcTargetPosition; +import io.airbyte.integrations.source.mongodb.cdc.MongoDbDebeziumConstants; +import io.airbyte.integrations.source.mongodb.cdc.MongoDbDebeziumStateUtil; +import io.airbyte.integrations.source.mongodb.cdc.MongoDbResumeTokenHelper; import io.airbyte.integrations.source.mongodb.state.InitialSnapshotStatus; import io.airbyte.integrations.source.mongodb.state.MongoDbStreamState; import io.airbyte.protocol.models.Field; @@ -522,7 +522,8 @@ void testSyncShouldHandlePurgedLogsGracefully() throws Exception { void testReachedTargetPosition() { final long eventTimestamp = Long.MAX_VALUE; final Integer order = 0; - final MongoDbCdcTargetPosition targetPosition = new MongoDbCdcTargetPosition(MongoDbResumeTokenHelper.getMostRecentResumeToken(mongoClient)); + final MongoDbCdcTargetPosition targetPosition = + new MongoDbCdcTargetPosition(MongoDbResumeTokenHelper.getMostRecentResumeToken(mongoClient, databaseName, getConfiguredCatalog())); final ChangeEventWithMetadata changeEventWithMetadata = mock(ChangeEventWithMetadata.class); when(changeEventWithMetadata.isSnapshotEvent()).thenReturn(true); @@ -549,8 +550,9 @@ void testReachedTargetPosition() { @Test void testIsSameOffset() { - final MongoDbCdcTargetPosition targetPosition = new MongoDbCdcTargetPosition(MongoDbResumeTokenHelper.getMostRecentResumeToken(mongoClient)); - final BsonDocument resumeToken = MongoDbResumeTokenHelper.getMostRecentResumeToken(mongoClient); + final MongoDbCdcTargetPosition targetPosition = + new MongoDbCdcTargetPosition(MongoDbResumeTokenHelper.getMostRecentResumeToken(mongoClient, databaseName, getConfiguredCatalog())); + final BsonDocument resumeToken = MongoDbResumeTokenHelper.getMostRecentResumeToken(mongoClient, databaseName, getConfiguredCatalog()); final String resumeTokenString = resumeToken.get("_data").asString().getValue(); final String replicaSet = MongoDbDebeziumStateUtil.getReplicaSetName(mongoClient); final Map emptyOffsetA = Map.of(); diff --git a/airbyte-integrations/connectors/source-mongodb-v2-plain/src/test/generator/MongoDbInsertClient.kt b/airbyte-integrations/connectors/source-mongodb-v2-plain/src/test/generator/MongoDbInsertClient.kt index d80a179a5947..fd2b7f612930 100644 --- a/airbyte-integrations/connectors/source-mongodb-v2-plain/src/test/generator/MongoDbInsertClient.kt +++ b/airbyte-integrations/connectors/source-mongodb-v2-plain/src/test/generator/MongoDbInsertClient.kt @@ -1,7 +1,10 @@ package io.airbyte.integrations.source.mongodb +import com.fasterxml.jackson.core.JsonGenerator +import com.fasterxml.jackson.databind.DeserializationFeature +import com.fasterxml.jackson.databind.ObjectMapper +import com.fasterxml.jackson.datatype.jsr310.JavaTimeModule import com.github.javafaker.Faker -import io.airbyte.commons.json.Jsons import io.github.oshai.kotlinlogging.KotlinLogging import kotlinx.cli.ArgParser import kotlinx.cli.ArgType @@ -31,7 +34,7 @@ object MongoDbInsertClient { println("Enter password: ") val password = readln() - var config = mapOf(MongoConstants.DATABASE_CONFIG_CONFIGURATION_KEY to + val config = mapOf(MongoConstants.DATABASE_CONFIG_CONFIGURATION_KEY to mapOf( MongoConstants.DATABASE_CONFIGURATION_KEY to databaseName, MongoConstants.CONNECTION_STRING_CONFIGURATION_KEY to connectionString, @@ -42,7 +45,12 @@ object MongoDbInsertClient { val faker = Faker(); - MongoConnectionUtils.createMongoClient(MongoDbSourceConfig(Jsons.deserialize(Jsons.serialize(config)))).use { mongoClient -> + val objectMapper = ObjectMapper().registerModule(JavaTimeModule()) + objectMapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false) + objectMapper.configure(JsonGenerator.Feature.WRITE_BIGDECIMAL_AS_PLAIN, true) + val roundTrippedConfig = objectMapper.readTree(objectMapper.writeValueAsBytes(config)) + + MongoConnectionUtils.createMongoClient(MongoDbSourceConfig(roundTrippedConfig)).use { mongoClient -> val documents = mutableListOf() val batches = if (numberOfDocuments > BATCH_SIZE) numberOfDocuments / BATCH_SIZE else 1; val batchSize = if (numberOfDocuments > BATCH_SIZE) BATCH_SIZE else numberOfDocuments; diff --git a/airbyte-integrations/connectors/source-mongodb-v2-plain/src/test/java/io/airbyte/integrations/source/mongodb/InitialSnapshotHandlerTest.java b/airbyte-integrations/connectors/source-mongodb-v2-plain/src/test/java/io/airbyte/integrations/source/mongodb/InitialSnapshotHandlerTest.java index 919903c76575..9ece697ec8fa 100644 --- a/airbyte-integrations/connectors/source-mongodb-v2-plain/src/test/java/io/airbyte/integrations/source/mongodb/InitialSnapshotHandlerTest.java +++ b/airbyte-integrations/connectors/source-mongodb-v2-plain/src/test/java/io/airbyte/integrations/source/mongodb/InitialSnapshotHandlerTest.java @@ -56,9 +56,11 @@ class InitialSnapshotHandlerTest { private static final String COLLECTION3 = "collection3"; private static final String OBJECT_ID1_STRING = "64c0029d95ad260d69ef28a1"; + private static final String OBJECT_ID2_STRING = "64c0029d95ad260d69ef28a2"; + private static final String OBJECT_ID3_STRING = "64c0029d95ad260d69ef28a3"; private static final ObjectId OBJECT_ID1 = new ObjectId(OBJECT_ID1_STRING); - private static final ObjectId OBJECT_ID2 = new ObjectId("64c0029d95ad260d69ef28a2"); - private static final ObjectId OBJECT_ID3 = new ObjectId("64c0029d95ad260d69ef28a3"); + private static final ObjectId OBJECT_ID2 = new ObjectId(OBJECT_ID2_STRING); + private static final ObjectId OBJECT_ID3 = new ObjectId(OBJECT_ID3_STRING); private static final ObjectId OBJECT_ID4 = new ObjectId("64c0029d95ad260d69ef28a4"); private static final ObjectId OBJECT_ID5 = new ObjectId("64c0029d95ad260d69ef28a5"); private static final ObjectId OBJECT_ID6 = new ObjectId("64c0029d95ad260d69ef28a6"); @@ -332,4 +334,58 @@ void testGetIteratorsWithOneEmptyCollection() { assertFalse(collection2.hasNext()); } + @Test + void testGetIteratorsWithInitialStateNonDefaultIdType() { + insertDocuments(COLLECTION1, List.of( + new Document(Map.of( + CURSOR_FIELD, OBJECT_ID1_STRING, + NAME_FIELD, NAME1)), + new Document(Map.of( + CURSOR_FIELD, OBJECT_ID2_STRING, + NAME_FIELD, NAME2)))); + + insertDocuments(COLLECTION2, List.of( + new Document(Map.of( + CURSOR_FIELD, OBJECT_ID3_STRING, + NAME_FIELD, NAME3)))); + + final InitialSnapshotHandler initialSnapshotHandler = new InitialSnapshotHandler(); + final MongoDbStateManager stateManager = mock(MongoDbStateManager.class); + when(stateManager.getStreamState(COLLECTION1, NAMESPACE)) + .thenReturn(Optional.of(new MongoDbStreamState(OBJECT_ID1_STRING, null, IdType.STRING))); + final List> iterators = + initialSnapshotHandler.getIterators(STREAMS, stateManager, mongoClient.getDatabase(DB_NAME), null, Instant.now(), + MongoConstants.CHECKPOINT_INTERVAL, true); + + assertEquals(iterators.size(), 2, "Only two streams are configured as incremental, full refresh streams should be ignored"); + + final AutoCloseableIterator collection1 = iterators.get(0); + final AutoCloseableIterator collection2 = iterators.get(1); + + // collection1, first document should be skipped + final AirbyteMessage collection1StreamMessage1 = collection1.next(); + assertEquals(Type.RECORD, collection1StreamMessage1.getType()); + assertEquals(COLLECTION1, collection1StreamMessage1.getRecord().getStream()); + assertEquals(OBJECT_ID2.toString(), collection1StreamMessage1.getRecord().getData().get(CURSOR_FIELD).asText()); + assertEquals(NAME2, collection1StreamMessage1.getRecord().getData().get(NAME_FIELD).asText()); + assertConfiguredFieldsEqualsRecordDataFields(Set.of(CURSOR_FIELD, NAME_FIELD), collection1StreamMessage1.getRecord().getData()); + + final AirbyteMessage collection1SateMessage = collection1.next(); + assertEquals(Type.STATE, collection1SateMessage.getType(), "State message is expected after all records in a stream are emitted"); + + assertFalse(collection1.hasNext()); + + // collection2, no documents should be skipped + final AirbyteMessage collection2StreamMessage1 = collection2.next(); + assertEquals(Type.RECORD, collection2StreamMessage1.getType()); + assertEquals(COLLECTION2, collection2StreamMessage1.getRecord().getStream()); + assertEquals(OBJECT_ID3.toString(), collection2StreamMessage1.getRecord().getData().get(CURSOR_FIELD).asText()); + assertConfiguredFieldsEqualsRecordDataFields(Set.of(CURSOR_FIELD), collection2StreamMessage1.getRecord().getData()); + + final AirbyteMessage collection2SateMessage = collection2.next(); + assertEquals(Type.STATE, collection2SateMessage.getType(), "State message is expected after all records in a stream are emitted"); + + assertFalse(collection2.hasNext()); + } + } diff --git a/airbyte-integrations/connectors/source-mongodb-v2-plain/src/test/java/io/airbyte/integrations/source/mongodb/MongoCatalogHelperTest.java b/airbyte-integrations/connectors/source-mongodb-v2-plain/src/test/java/io/airbyte/integrations/source/mongodb/MongoCatalogHelperTest.java index f27d382277a6..069bf036f483 100644 --- a/airbyte-integrations/connectors/source-mongodb-v2-plain/src/test/java/io/airbyte/integrations/source/mongodb/MongoCatalogHelperTest.java +++ b/airbyte-integrations/connectors/source-mongodb-v2-plain/src/test/java/io/airbyte/integrations/source/mongodb/MongoCatalogHelperTest.java @@ -13,7 +13,7 @@ import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.junit.jupiter.api.Assertions.assertTrue; -import io.airbyte.cdk.integrations.debezium.internals.DebeziumEventUtils; +import io.airbyte.cdk.integrations.debezium.internals.DebeziumEventConverter; import io.airbyte.protocol.models.Field; import io.airbyte.protocol.models.JsonSchemaType; import io.airbyte.protocol.models.v0.AirbyteStream; @@ -44,12 +44,12 @@ void testBuildingAirbyteStream() { assertTrue(airbyteStream.getJsonSchema().get("properties").has(DEFAULT_CURSOR_FIELD)); assertEquals(JsonSchemaType.NUMBER.getJsonSchemaTypeMap().get("type"), airbyteStream.getJsonSchema().get("properties").get(DEFAULT_CURSOR_FIELD).get("type").asText()); - assertTrue(airbyteStream.getJsonSchema().get("properties").has(DebeziumEventUtils.CDC_DELETED_AT)); + assertTrue(airbyteStream.getJsonSchema().get("properties").has(DebeziumEventConverter.CDC_DELETED_AT)); assertEquals(JsonSchemaType.STRING.getJsonSchemaTypeMap().get("type"), - airbyteStream.getJsonSchema().get("properties").get(DebeziumEventUtils.CDC_DELETED_AT).get("type").asText()); - assertTrue(airbyteStream.getJsonSchema().get("properties").has(DebeziumEventUtils.CDC_UPDATED_AT)); + airbyteStream.getJsonSchema().get("properties").get(DebeziumEventConverter.CDC_DELETED_AT).get("type").asText()); + assertTrue(airbyteStream.getJsonSchema().get("properties").has(DebeziumEventConverter.CDC_UPDATED_AT)); assertEquals(JsonSchemaType.STRING.getJsonSchemaTypeMap().get("type"), - airbyteStream.getJsonSchema().get("properties").get(DebeziumEventUtils.CDC_UPDATED_AT).get("type").asText()); + airbyteStream.getJsonSchema().get("properties").get(DebeziumEventConverter.CDC_UPDATED_AT).get("type").asText()); } @@ -81,12 +81,12 @@ void testSchemalessModeAirbyteStream() { assertTrue(airbyteStream.getJsonSchema().get("properties").has(DEFAULT_CURSOR_FIELD)); assertEquals(JsonSchemaType.NUMBER.getJsonSchemaTypeMap().get("type"), airbyteStream.getJsonSchema().get("properties").get(DEFAULT_CURSOR_FIELD).get("type").asText()); - assertTrue(airbyteStream.getJsonSchema().get("properties").has(DebeziumEventUtils.CDC_DELETED_AT)); + assertTrue(airbyteStream.getJsonSchema().get("properties").has(DebeziumEventConverter.CDC_DELETED_AT)); assertEquals(JsonSchemaType.STRING.getJsonSchemaTypeMap().get("type"), - airbyteStream.getJsonSchema().get("properties").get(DebeziumEventUtils.CDC_DELETED_AT).get("type").asText()); - assertTrue(airbyteStream.getJsonSchema().get("properties").has(DebeziumEventUtils.CDC_UPDATED_AT)); + airbyteStream.getJsonSchema().get("properties").get(DebeziumEventConverter.CDC_DELETED_AT).get("type").asText()); + assertTrue(airbyteStream.getJsonSchema().get("properties").has(DebeziumEventConverter.CDC_UPDATED_AT)); assertEquals(JsonSchemaType.STRING.getJsonSchemaTypeMap().get("type"), - airbyteStream.getJsonSchema().get("properties").get(DebeziumEventUtils.CDC_UPDATED_AT).get("type").asText()); + airbyteStream.getJsonSchema().get("properties").get(DebeziumEventConverter.CDC_UPDATED_AT).get("type").asText()); } diff --git a/airbyte-integrations/connectors/source-mongodb-v2-plain/src/test/java/io/airbyte/integrations/source/mongodb/MongoConnectionUtilsTest.java b/airbyte-integrations/connectors/source-mongodb-v2-plain/src/test/java/io/airbyte/integrations/source/mongodb/MongoConnectionUtilsTest.java index e9e8e5a89f5d..7dcde230bd8b 100644 --- a/airbyte-integrations/connectors/source-mongodb-v2-plain/src/test/java/io/airbyte/integrations/source/mongodb/MongoConnectionUtilsTest.java +++ b/airbyte-integrations/connectors/source-mongodb-v2-plain/src/test/java/io/airbyte/integrations/source/mongodb/MongoConnectionUtilsTest.java @@ -70,7 +70,7 @@ void testCreateMongoClientWithQuotesInConnectionString() { assertEquals(List.of(new ServerAddress(host, port)), ((MongoClientImpl) mongoClient).getSettings().getClusterSettings().getHosts()); assertEquals(ReadPreference.secondaryPreferred(), ((MongoClientImpl) mongoClient).getSettings().getReadPreference()); assertEquals(false, ((MongoClientImpl) mongoClient).getSettings().getRetryWrites()); - assertEquals(true, ((MongoClientImpl) mongoClient).getSettings().getSslSettings().isEnabled()); + assertEquals(false, ((MongoClientImpl) mongoClient).getSettings().getSslSettings().isEnabled()); assertEquals(List.of("sync", MongoConstants.DRIVER_NAME), ((MongoClientImpl) mongoClient).getMongoDriverInformation().getDriverNames()); assertEquals(username, ((MongoClientImpl) mongoClient).getSettings().getCredential().getUserName()); assertEquals(password, new String(((MongoClientImpl) mongoClient).getSettings().getCredential().getPassword())); @@ -117,7 +117,7 @@ void testCreateMongoClientWithCredentialPlaceholderInConnectionString() { assertEquals(List.of(new ServerAddress(host, port)), ((MongoClientImpl) mongoClient).getSettings().getClusterSettings().getHosts()); assertEquals(ReadPreference.secondaryPreferred(), ((MongoClientImpl) mongoClient).getSettings().getReadPreference()); assertEquals(false, ((MongoClientImpl) mongoClient).getSettings().getRetryWrites()); - assertEquals(true, ((MongoClientImpl) mongoClient).getSettings().getSslSettings().isEnabled()); + assertEquals(false, ((MongoClientImpl) mongoClient).getSettings().getSslSettings().isEnabled()); assertEquals(List.of("sync", MongoConstants.DRIVER_NAME), ((MongoClientImpl) mongoClient).getMongoDriverInformation().getDriverNames()); assertEquals(username, ((MongoClientImpl) mongoClient).getSettings().getCredential().getUserName()); assertEquals(password, new String(((MongoClientImpl) mongoClient).getSettings().getCredential().getPassword())); diff --git a/airbyte-integrations/connectors/source-mongodb-v2-plain/src/test/java/io/airbyte/integrations/source/mongodb/MongoDbSourceConfigTest.java b/airbyte-integrations/connectors/source-mongodb-v2-plain/src/test/java/io/airbyte/integrations/source/mongodb/MongoDbSourceConfigTest.java index 9e8665cf65d7..3ca4825b8dfd 100644 --- a/airbyte-integrations/connectors/source-mongodb-v2-plain/src/test/java/io/airbyte/integrations/source/mongodb/MongoDbSourceConfigTest.java +++ b/airbyte-integrations/connectors/source-mongodb-v2-plain/src/test/java/io/airbyte/integrations/source/mongodb/MongoDbSourceConfigTest.java @@ -39,15 +39,16 @@ void testCreatingMongoDbSourceConfig() { final String username = "username"; final boolean isSchemaEnforced = false; final JsonNode rawConfig = Jsons.jsonNode( - Map.of(DATABASE_CONFIG_CONFIGURATION_KEY, Map.of( - AUTH_SOURCE_CONFIGURATION_KEY, authSource, - CHECKPOINT_INTERVAL_CONFIGURATION_KEY, checkpointInterval, - DATABASE_CONFIGURATION_KEY, database, + Map.of( DISCOVER_SAMPLE_SIZE_CONFIGURATION_KEY, sampleSize, - PASSWORD_CONFIGURATION_KEY, password, QUEUE_SIZE_CONFIGURATION_KEY, queueSize, - USERNAME_CONFIGURATION_KEY, username, - SCHEMA_ENFORCED_CONFIGURATION_KEY, isSchemaEnforced))); + DATABASE_CONFIG_CONFIGURATION_KEY, Map.of( + AUTH_SOURCE_CONFIGURATION_KEY, authSource, + CHECKPOINT_INTERVAL_CONFIGURATION_KEY, checkpointInterval, + DATABASE_CONFIGURATION_KEY, database, + PASSWORD_CONFIGURATION_KEY, password, + USERNAME_CONFIGURATION_KEY, username, + SCHEMA_ENFORCED_CONFIGURATION_KEY, isSchemaEnforced))); final MongoDbSourceConfig sourceConfig = new MongoDbSourceConfig(rawConfig); assertNotNull(sourceConfig); assertEquals(authSource, sourceConfig.getAuthSource()); @@ -55,7 +56,7 @@ void testCreatingMongoDbSourceConfig() { assertEquals(database, sourceConfig.getDatabaseName()); assertEquals(password, sourceConfig.getPassword()); assertEquals(OptionalInt.of(queueSize), sourceConfig.getQueueSize()); - assertEquals(rawConfig.get(DATABASE_CONFIG_CONFIGURATION_KEY), sourceConfig.rawConfig()); + assertEquals(rawConfig.get(DATABASE_CONFIG_CONFIGURATION_KEY), sourceConfig.getDatabaseConfig()); assertEquals(sampleSize, sourceConfig.getSampleSize()); assertEquals(username, sourceConfig.getUsername()); assertEquals(isSchemaEnforced, sourceConfig.getEnforceSchema()); @@ -76,7 +77,7 @@ void testDefaultValues() { assertEquals(null, sourceConfig.getDatabaseName()); assertEquals(null, sourceConfig.getPassword()); assertEquals(OptionalInt.empty(), sourceConfig.getQueueSize()); - assertEquals(rawConfig.get(DATABASE_CONFIG_CONFIGURATION_KEY), sourceConfig.rawConfig()); + assertEquals(rawConfig.get(DATABASE_CONFIG_CONFIGURATION_KEY), sourceConfig.getDatabaseConfig()); assertEquals(DEFAULT_DISCOVER_SAMPLE_SIZE, sourceConfig.getSampleSize()); assertEquals(null, sourceConfig.getUsername()); } diff --git a/airbyte-integrations/connectors/source-mongodb-v2-plain/src/test/java/io/airbyte/integrations/source/mongodb/MongoDbSourceTest.java b/airbyte-integrations/connectors/source-mongodb-v2-plain/src/test/java/io/airbyte/integrations/source/mongodb/MongoDbSourceTest.java index 49a90c5ec563..6b6f661ebb78 100644 --- a/airbyte-integrations/connectors/source-mongodb-v2-plain/src/test/java/io/airbyte/integrations/source/mongodb/MongoDbSourceTest.java +++ b/airbyte-integrations/connectors/source-mongodb-v2-plain/src/test/java/io/airbyte/integrations/source/mongodb/MongoDbSourceTest.java @@ -35,7 +35,7 @@ import com.mongodb.client.MongoIterable; import com.mongodb.connection.ClusterDescription; import com.mongodb.connection.ClusterType; -import io.airbyte.cdk.integrations.debezium.internals.DebeziumEventUtils; +import io.airbyte.cdk.integrations.debezium.internals.DebeziumEventConverter; import io.airbyte.commons.json.Jsons; import io.airbyte.commons.resources.MoreResources; import io.airbyte.integrations.source.mongodb.cdc.MongoDbCdcInitializer; @@ -240,9 +240,9 @@ void testDiscoverOperation() throws IOException { assertEquals(JsonSchemaType.NUMBER.getJsonSchemaTypeMap().get("type"), stream.get().getJsonSchema().get("properties").get(DEFAULT_CURSOR_FIELD).get("type").asText()); assertEquals(JsonSchemaType.STRING.getJsonSchemaTypeMap().get("type"), - stream.get().getJsonSchema().get("properties").get(DebeziumEventUtils.CDC_DELETED_AT).get("type").asText()); + stream.get().getJsonSchema().get("properties").get(DebeziumEventConverter.CDC_DELETED_AT).get("type").asText()); assertEquals(JsonSchemaType.STRING.getJsonSchemaTypeMap().get("type"), - stream.get().getJsonSchema().get("properties").get(DebeziumEventUtils.CDC_UPDATED_AT).get("type").asText()); + stream.get().getJsonSchema().get("properties").get(DebeziumEventConverter.CDC_UPDATED_AT).get("type").asText()); assertEquals(true, stream.get().getSourceDefinedCursor()); assertEquals(List.of(DEFAULT_CURSOR_FIELD), stream.get().getDefaultCursorField()); assertEquals(List.of(List.of(MongoCatalogHelper.DEFAULT_PRIMARY_KEY)), stream.get().getSourceDefinedPrimaryKey()); diff --git a/airbyte-integrations/connectors/source-mongodb-v2-plain/src/test/java/io/airbyte/integrations/source/mongodb/MongoDbStateIteratorTest.java b/airbyte-integrations/connectors/source-mongodb-v2-plain/src/test/java/io/airbyte/integrations/source/mongodb/MongoDbStateIteratorTest.java index 7b2a35fabdfa..66bf277dddbc 100644 --- a/airbyte-integrations/connectors/source-mongodb-v2-plain/src/test/java/io/airbyte/integrations/source/mongodb/MongoDbStateIteratorTest.java +++ b/airbyte-integrations/connectors/source-mongodb-v2-plain/src/test/java/io/airbyte/integrations/source/mongodb/MongoDbStateIteratorTest.java @@ -187,7 +187,7 @@ void treatHasNextExceptionAsFalse() { message.getState().getGlobal().getStreamStates().get(0).getStreamState().get("status").asText(), "state status should be in_progress"); - assertFalse(iter.hasNext(), "should have no more records"); + assertThrows(RuntimeException.class, iter::hasNext, "next iteration should throw exception to fail the sync"); } @Test diff --git a/airbyte-integrations/connectors/source-mongodb-v2-plain/src/test/java/io/airbyte/integrations/source/mongodb/MongoUtilTest.java b/airbyte-integrations/connectors/source-mongodb-v2-plain/src/test/java/io/airbyte/integrations/source/mongodb/MongoUtilTest.java index 0ff81ef13e5d..832c9c7af936 100644 --- a/airbyte-integrations/connectors/source-mongodb-v2-plain/src/test/java/io/airbyte/integrations/source/mongodb/MongoUtilTest.java +++ b/airbyte-integrations/connectors/source-mongodb-v2-plain/src/test/java/io/airbyte/integrations/source/mongodb/MongoUtilTest.java @@ -4,8 +4,8 @@ package io.airbyte.integrations.source.mongodb; -import static io.airbyte.cdk.integrations.debezium.internals.DebeziumEventUtils.CDC_DELETED_AT; -import static io.airbyte.cdk.integrations.debezium.internals.DebeziumEventUtils.CDC_UPDATED_AT; +import static io.airbyte.cdk.integrations.debezium.internals.DebeziumEventConverter.CDC_DELETED_AT; +import static io.airbyte.cdk.integrations.debezium.internals.DebeziumEventConverter.CDC_UPDATED_AT; import static io.airbyte.integrations.source.mongodb.MongoCatalogHelper.AIRBYTE_STREAM_PROPERTIES; import static io.airbyte.integrations.source.mongodb.MongoConstants.DATABASE_CONFIG_CONFIGURATION_KEY; import static io.airbyte.integrations.source.mongodb.MongoConstants.DEFAULT_DISCOVER_SAMPLE_SIZE; @@ -312,18 +312,18 @@ void testGetAuthorizedCollectionsMongoSecurityException() { void testGetDebeziumEventQueueSize() { final int queueSize = 5000; final MongoDbSourceConfig validQueueSizeConfiguration = new MongoDbSourceConfig( - Jsons.jsonNode(Map.of(DATABASE_CONFIG_CONFIGURATION_KEY, Map.of(MongoConstants.QUEUE_SIZE_CONFIGURATION_KEY, queueSize)))); + Jsons.jsonNode(Map.of(MongoConstants.QUEUE_SIZE_CONFIGURATION_KEY, queueSize, DATABASE_CONFIG_CONFIGURATION_KEY, Map.of()))); final MongoDbSourceConfig tooSmallQueueSizeConfiguration = new MongoDbSourceConfig( - Jsons.jsonNode(Map.of(DATABASE_CONFIG_CONFIGURATION_KEY, Map.of(MongoConstants.QUEUE_SIZE_CONFIGURATION_KEY, Integer.MIN_VALUE)))); + Jsons.jsonNode(Map.of(MongoConstants.QUEUE_SIZE_CONFIGURATION_KEY, Integer.MIN_VALUE, DATABASE_CONFIG_CONFIGURATION_KEY, Map.of()))); final MongoDbSourceConfig tooLargeQueueSizeConfiguration = new MongoDbSourceConfig( - Jsons.jsonNode(Map.of(DATABASE_CONFIG_CONFIGURATION_KEY, Map.of(MongoConstants.QUEUE_SIZE_CONFIGURATION_KEY, Integer.MAX_VALUE)))); + Jsons.jsonNode(Map.of(MongoConstants.QUEUE_SIZE_CONFIGURATION_KEY, Integer.MAX_VALUE, DATABASE_CONFIG_CONFIGURATION_KEY, Map.of()))); final MongoDbSourceConfig missingQueueSizeConfiguration = new MongoDbSourceConfig(Jsons.jsonNode(Map.of(DATABASE_CONFIG_CONFIGURATION_KEY, Map.of()))); - assertEquals(queueSize, MongoUtil.getDebeziumEventQueueSize(validQueueSizeConfiguration).getAsInt()); - assertEquals(MIN_QUEUE_SIZE, MongoUtil.getDebeziumEventQueueSize(tooSmallQueueSizeConfiguration).getAsInt()); - assertEquals(MAX_QUEUE_SIZE, MongoUtil.getDebeziumEventQueueSize(tooLargeQueueSizeConfiguration).getAsInt()); - assertEquals(MAX_QUEUE_SIZE, MongoUtil.getDebeziumEventQueueSize(missingQueueSizeConfiguration).getAsInt()); + assertEquals(queueSize, MongoUtil.getDebeziumEventQueueSize(validQueueSizeConfiguration)); + assertEquals(MIN_QUEUE_SIZE, MongoUtil.getDebeziumEventQueueSize(tooSmallQueueSizeConfiguration)); + assertEquals(MAX_QUEUE_SIZE, MongoUtil.getDebeziumEventQueueSize(tooLargeQueueSizeConfiguration)); + assertEquals(MAX_QUEUE_SIZE, MongoUtil.getDebeziumEventQueueSize(missingQueueSizeConfiguration)); } @Test diff --git a/airbyte-integrations/connectors/source-mongodb-v2-plain/src/test/java/io/airbyte/integrations/source/mongodb/cdc/MongoDbCdcConnectorMetadataInjectorTest.java b/airbyte-integrations/connectors/source-mongodb-v2-plain/src/test/java/io/airbyte/integrations/source/mongodb/cdc/MongoDbCdcConnectorMetadataInjectorTest.java index 78be3791db63..372fea5c766e 100644 --- a/airbyte-integrations/connectors/source-mongodb-v2-plain/src/test/java/io/airbyte/integrations/source/mongodb/cdc/MongoDbCdcConnectorMetadataInjectorTest.java +++ b/airbyte-integrations/connectors/source-mongodb-v2-plain/src/test/java/io/airbyte/integrations/source/mongodb/cdc/MongoDbCdcConnectorMetadataInjectorTest.java @@ -4,12 +4,11 @@ package io.airbyte.integrations.source.mongodb.cdc; -import static io.airbyte.cdk.integrations.debezium.internals.DebeziumEventUtils.CDC_DELETED_AT; -import static io.airbyte.cdk.integrations.debezium.internals.DebeziumEventUtils.CDC_UPDATED_AT; +import static io.airbyte.cdk.integrations.debezium.internals.DebeziumEventConverter.CDC_DELETED_AT; +import static io.airbyte.cdk.integrations.debezium.internals.DebeziumEventConverter.CDC_UPDATED_AT; import static org.junit.jupiter.api.Assertions.assertEquals; import com.fasterxml.jackson.databind.node.ObjectNode; -import io.airbyte.cdk.integrations.debezium.internals.mongodb.MongoDbDebeziumConstants; import io.airbyte.commons.json.Jsons; import java.lang.reflect.Field; import java.time.Instant; diff --git a/airbyte-integrations/connectors/source-mongodb-v2-plain/src/test/java/io/airbyte/integrations/source/mongodb/cdc/MongoDbCdcEventUtilsTest.java b/airbyte-integrations/connectors/source-mongodb-v2-plain/src/test/java/io/airbyte/integrations/source/mongodb/cdc/MongoDbCdcEventUtilsTest.java new file mode 100644 index 000000000000..3068668bb972 --- /dev/null +++ b/airbyte-integrations/connectors/source-mongodb-v2-plain/src/test/java/io/airbyte/integrations/source/mongodb/cdc/MongoDbCdcEventUtilsTest.java @@ -0,0 +1,254 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.mongodb.cdc; + +import static io.airbyte.integrations.source.mongodb.cdc.MongoDbCdcEventUtils.DOCUMENT_OBJECT_ID_FIELD; +import static io.airbyte.integrations.source.mongodb.cdc.MongoDbCdcEventUtils.ID_FIELD; +import static io.airbyte.integrations.source.mongodb.cdc.MongoDbCdcEventUtils.OBJECT_ID_FIELD; +import static io.airbyte.integrations.source.mongodb.cdc.MongoDbCdcEventUtils.OBJECT_ID_FIELD_PATTERN; +import static io.airbyte.integrations.source.mongodb.cdc.MongoDbDebeziumConstants.Configuration.SCHEMALESS_MODE_DATA_FIELD; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.node.ObjectNode; +import io.airbyte.cdk.db.DataTypeUtils; +import io.airbyte.commons.json.Jsons; +import java.nio.charset.Charset; +import java.util.Base64; +import java.util.Map; +import java.util.Set; +import java.util.UUID; +import org.bson.BsonBinary; +import org.bson.BsonBoolean; +import org.bson.BsonDateTime; +import org.bson.BsonDecimal128; +import org.bson.BsonDocument; +import org.bson.BsonDouble; +import org.bson.BsonInt32; +import org.bson.BsonInt64; +import org.bson.BsonJavaScript; +import org.bson.BsonJavaScriptWithScope; +import org.bson.BsonNull; +import org.bson.BsonObjectId; +import org.bson.BsonRegularExpression; +import org.bson.BsonString; +import org.bson.BsonSymbol; +import org.bson.BsonTimestamp; +import org.bson.Document; +import org.bson.UuidRepresentation; +import org.bson.types.Decimal128; +import org.bson.types.ObjectId; +import org.junit.jupiter.api.Test; + +class MongoDbCdcEventUtilsTest { + + private static final String OBJECT_ID = "64f24244f95155351c4185b1"; + + @Test + void testGenerateObjectIdDocument() { + final String key = "{\"" + OBJECT_ID_FIELD + "\": \"" + OBJECT_ID + "\"}"; + JsonNode debeziumEventKey = Jsons.jsonNode(Map.of(ID_FIELD, key)); + + String updated = MongoDbCdcEventUtils.generateObjectIdDocument(debeziumEventKey); + + assertTrue(updated.contains(DOCUMENT_OBJECT_ID_FIELD)); + assertEquals(key.replaceAll(OBJECT_ID_FIELD_PATTERN, DOCUMENT_OBJECT_ID_FIELD), updated); + + debeziumEventKey = Jsons.jsonNode(Map.of(ID_FIELD, "\"" + OBJECT_ID + "\"")); + updated = MongoDbCdcEventUtils.generateObjectIdDocument(debeziumEventKey); + assertTrue(updated.contains(DOCUMENT_OBJECT_ID_FIELD)); + assertEquals(Jsons.serialize(Jsons.jsonNode(Map.of(DOCUMENT_OBJECT_ID_FIELD, OBJECT_ID))), updated); + } + + @Test + void testNormalizeObjectId() { + final JsonNode data = MongoDbCdcEventUtils.normalizeObjectId((ObjectNode) Jsons.jsonNode( + Map.of(DOCUMENT_OBJECT_ID_FIELD, Map.of(OBJECT_ID_FIELD, OBJECT_ID)))); + assertEquals(OBJECT_ID, data.get(DOCUMENT_OBJECT_ID_FIELD).asText()); + + final JsonNode dataWithoutObjectId = MongoDbCdcEventUtils.normalizeObjectId((ObjectNode) Jsons.jsonNode( + Map.of(DOCUMENT_OBJECT_ID_FIELD, Map.of()))); + assertNotEquals(OBJECT_ID, dataWithoutObjectId.get(DOCUMENT_OBJECT_ID_FIELD).asText()); + + final JsonNode dataWithoutId = MongoDbCdcEventUtils.normalizeObjectId((ObjectNode) Jsons.jsonNode(Map.of())); + assertNull(dataWithoutId.get(DOCUMENT_OBJECT_ID_FIELD)); + + final JsonNode stringId = MongoDbCdcEventUtils.normalizeObjectId((ObjectNode) Jsons.jsonNode(Map.of(DOCUMENT_OBJECT_ID_FIELD, "abcd"))); + assertEquals("abcd", stringId.get(DOCUMENT_OBJECT_ID_FIELD).asText()); + } + + @Test + void testNormalizeObjectIdNoSchema() { + var objectNode = (ObjectNode) Jsons.jsonNode(Map.of(DOCUMENT_OBJECT_ID_FIELD, Map.of(OBJECT_ID_FIELD, OBJECT_ID))); + objectNode.set(SCHEMALESS_MODE_DATA_FIELD, + Jsons.jsonNode(Map.of(DOCUMENT_OBJECT_ID_FIELD, Map.of(OBJECT_ID_FIELD, OBJECT_ID)))); + + final JsonNode data = MongoDbCdcEventUtils.normalizeObjectIdNoSchema(objectNode); + assertEquals(OBJECT_ID, data.get(DOCUMENT_OBJECT_ID_FIELD).asText()); + assertEquals(OBJECT_ID, data.get(SCHEMALESS_MODE_DATA_FIELD).get(DOCUMENT_OBJECT_ID_FIELD).asText()); + + objectNode = (ObjectNode) Jsons.jsonNode(Map.of(DOCUMENT_OBJECT_ID_FIELD, Map.of())); + objectNode.set(SCHEMALESS_MODE_DATA_FIELD, Jsons.jsonNode(Map.of(DOCUMENT_OBJECT_ID_FIELD, Map.of()))); + final JsonNode dataWithoutObjectId = MongoDbCdcEventUtils.normalizeObjectIdNoSchema(objectNode); + assertNotEquals(OBJECT_ID, dataWithoutObjectId.get(DOCUMENT_OBJECT_ID_FIELD).asText()); + assertNotEquals(OBJECT_ID, dataWithoutObjectId.get(SCHEMALESS_MODE_DATA_FIELD).get(DOCUMENT_OBJECT_ID_FIELD).asText()); + + final JsonNode dataWithoutId = MongoDbCdcEventUtils.normalizeObjectIdNoSchema((ObjectNode) Jsons.jsonNode(Map.of())); + assertNull(dataWithoutId.get(DOCUMENT_OBJECT_ID_FIELD)); + } + + @Test + void testTransformDataTypes() { + final BsonTimestamp bsonTimestamp = new BsonTimestamp(394, 1926745562); + final String expectedTimestamp = DataTypeUtils.toISO8601StringWithMilliseconds(bsonTimestamp.getValue()); + final UUID standardUuid = UUID.randomUUID(); + final UUID legacyUuid = UUID.randomUUID(); + + final Document document = new Document("field1", new BsonBoolean(true)) + .append("field2", new BsonInt32(1)) + .append("field3", new BsonInt64(2)) + .append("field4", new BsonDouble(3.0)) + .append("field5", new BsonDecimal128(new Decimal128(4))) + .append("field6", bsonTimestamp) + .append("field7", new BsonDateTime(bsonTimestamp.getValue())) + .append("field8", new BsonBinary("test".getBytes(Charset.defaultCharset()))) + .append("field9", new BsonSymbol("test2")) + .append("field10", new BsonString("test3")) + .append("field11", new BsonObjectId(new ObjectId(OBJECT_ID))) + .append("field12", new BsonJavaScript("code")) + .append("field13", new BsonJavaScriptWithScope("code2", new BsonDocument("scope", new BsonString("scope")))) + .append("field14", new BsonRegularExpression("pattern")) + .append("field15", new BsonNull()) + .append("field16", new Document("key", "value")) + .append("field17", new BsonBinary(standardUuid, UuidRepresentation.STANDARD)) + .append("field18", new BsonBinary(legacyUuid, UuidRepresentation.JAVA_LEGACY)); + + final String documentAsJson = document.toJson(); + final ObjectNode transformed = MongoDbCdcEventUtils.transformDataTypes(documentAsJson, document.keySet()); + + assertNotNull(transformed); + assertNotEquals(documentAsJson, Jsons.serialize(transformed)); + assertEquals(true, transformed.get("field1").asBoolean()); + assertEquals(1, transformed.get("field2").asInt()); + assertEquals(2, transformed.get("field3").asInt()); + assertEquals(3.0, transformed.get("field4").asDouble()); + assertEquals(4.0, transformed.get("field5").asDouble()); + assertEquals(expectedTimestamp, transformed.get("field6").asText()); + assertEquals(expectedTimestamp, transformed.get("field7").asText()); + assertEquals(Base64.getEncoder().encodeToString("test".getBytes(Charset.defaultCharset())), transformed.get("field8").asText()); + assertEquals("test2", transformed.get("field9").asText()); + assertEquals("test3", transformed.get("field10").asText()); + assertEquals(OBJECT_ID, transformed.get("field11").asText()); + assertEquals("code", transformed.get("field12").asText()); + assertEquals("code2", transformed.get("field13").get("code").asText()); + assertEquals("scope", transformed.get("field13").get("scope").get("scope").asText()); + assertEquals("pattern", transformed.get("field14").asText()); + assertFalse(transformed.has("field15")); + assertEquals("value", transformed.get("field16").get("key").asText()); + // Assert that UUIDs can be serialized. Currently, they will be represented as base 64 encoded + // strings. Since the original mongo source + // may have these UUIDs written by a variety of sources, each with different encodings - we cannot + // decode these back to the original UUID. + assertTrue(transformed.has("field17")); + assertTrue(transformed.has("field18")); + } + + @Test + void testTransformDataTypesWithFilteredFields() { + final BsonTimestamp bsonTimestamp = new BsonTimestamp(394, 1926745562); + final String expectedTimestamp = DataTypeUtils.toISO8601StringWithMilliseconds(bsonTimestamp.getValue()); + + final Document document = new Document("field1", new BsonBoolean(true)) + .append("field2", new BsonInt32(1)) + .append("field3", new BsonInt64(2)) + .append("field4", new BsonDouble(3.0)) + .append("field5", new BsonDecimal128(new Decimal128(4))) + .append("field6", bsonTimestamp) + .append("field7", new BsonDateTime(bsonTimestamp.getValue())) + .append("field8", new BsonBinary("test".getBytes(Charset.defaultCharset()))) + .append("field9", new BsonSymbol("test2")) + .append("field10", new BsonString("test3")) + .append("field11", new BsonObjectId(new ObjectId(OBJECT_ID))) + .append("field12", new BsonJavaScript("code")) + .append("field13", new BsonJavaScriptWithScope("code2", new BsonDocument("scope", new BsonString("scope")))) + .append("field14", new BsonRegularExpression("pattern")) + .append("field15", new BsonNull()) + .append("field16", new Document("key", "value")); + + final String documentAsJson = document.toJson(); + final ObjectNode transformed = MongoDbCdcEventUtils.transformDataTypes(documentAsJson, Set.of("field1", "field2", "field3")); + + assertNotNull(transformed); + assertNotEquals(documentAsJson, Jsons.serialize(transformed)); + assertEquals(true, transformed.get("field1").asBoolean()); + assertEquals(1, transformed.get("field2").asInt()); + assertEquals(2, transformed.get("field3").asInt()); + assertFalse(transformed.has("field4")); + assertFalse(transformed.has("field5")); + assertFalse(transformed.has("field6")); + assertFalse(transformed.has("field7")); + assertFalse(transformed.has("field8")); + assertFalse(transformed.has("field9")); + assertFalse(transformed.has("field10")); + assertFalse(transformed.has("field11")); + assertFalse(transformed.has("field12")); + assertFalse(transformed.has("field13")); + assertFalse(transformed.has("field14")); + assertFalse(transformed.has("field15")); + assertFalse(transformed.has("field16")); + } + + @Test + void testTransformDataTypesNoSchema() { + final BsonTimestamp bsonTimestamp = new BsonTimestamp(394, 1926745562); + final String expectedTimestamp = DataTypeUtils.toISO8601StringWithMilliseconds(bsonTimestamp.getValue()); + + final Document document = new Document("field1", new BsonBoolean(true)) + .append("field2", new BsonInt32(1)) + .append("field3", new BsonInt64(2)) + .append("field4", new BsonDouble(3.0)) + .append("field5", new BsonDecimal128(new Decimal128(4))) + .append("field6", bsonTimestamp) + .append("field7", new BsonDateTime(bsonTimestamp.getValue())) + .append("field8", new BsonBinary("test".getBytes(Charset.defaultCharset()))) + .append("field9", new BsonSymbol("test2")) + .append("field10", new BsonString("test3")) + .append("field11", new BsonObjectId(new ObjectId(OBJECT_ID))) + .append("field12", new BsonJavaScript("code")) + .append("field13", new BsonJavaScriptWithScope("code2", new BsonDocument("scope", new BsonString("scope")))) + .append("field14", new BsonRegularExpression("pattern")) + .append("field15", new BsonNull()) + .append("field16", new Document("key", "value")); + + final String documentAsJson = document.toJson(); + final ObjectNode transformed = MongoDbCdcEventUtils.transformDataTypesNoSchema(documentAsJson); + + assertNotNull(transformed); + final var abDataNode = transformed.get(SCHEMALESS_MODE_DATA_FIELD); + assertNotEquals(documentAsJson, Jsons.serialize(abDataNode)); + assertEquals(true, abDataNode.get("field1").asBoolean()); + assertEquals(1, abDataNode.get("field2").asInt()); + assertEquals(2, abDataNode.get("field3").asInt()); + assertEquals(3.0, abDataNode.get("field4").asDouble()); + assertEquals(4.0, abDataNode.get("field5").asDouble()); + assertTrue(abDataNode.has("field6")); + assertTrue(abDataNode.has("field7")); + assertTrue(abDataNode.has("field8")); + assertTrue(abDataNode.has("field9")); + assertTrue(abDataNode.has("field10")); + assertTrue(abDataNode.has("field11")); + assertTrue(abDataNode.has("field12")); + assertTrue(abDataNode.has("field13")); + assertTrue(abDataNode.has("field14")); + assertFalse(abDataNode.has("field15")); + assertTrue(abDataNode.has("field16")); + } + +} diff --git a/airbyte-integrations/connectors/source-mongodb-v2-plain/src/test/java/io/airbyte/integrations/source/mongodb/cdc/MongoDbCdcInitializerTest.java b/airbyte-integrations/connectors/source-mongodb-v2-plain/src/test/java/io/airbyte/integrations/source/mongodb/cdc/MongoDbCdcInitializerTest.java index f65d0882adf4..9b87de23c1a3 100644 --- a/airbyte-integrations/connectors/source-mongodb-v2-plain/src/test/java/io/airbyte/integrations/source/mongodb/cdc/MongoDbCdcInitializerTest.java +++ b/airbyte-integrations/connectors/source-mongodb-v2-plain/src/test/java/io/airbyte/integrations/source/mongodb/cdc/MongoDbCdcInitializerTest.java @@ -30,12 +30,12 @@ import com.mongodb.client.MongoCollection; import com.mongodb.client.MongoCursor; import com.mongodb.client.MongoDatabase; +import com.mongodb.client.model.Aggregates; +import com.mongodb.client.model.Filters; import com.mongodb.client.model.changestream.ChangeStreamDocument; import com.mongodb.connection.ClusterDescription; import com.mongodb.connection.ClusterType; import com.mongodb.connection.ServerDescription; -import io.airbyte.cdk.integrations.debezium.internals.mongodb.MongoDbDebeziumConstants; -import io.airbyte.cdk.integrations.debezium.internals.mongodb.MongoDbDebeziumStateUtil; import io.airbyte.commons.exceptions.ConfigErrorException; import io.airbyte.commons.json.Jsons; import io.airbyte.commons.util.AutoCloseableIterator; @@ -60,6 +60,7 @@ import io.airbyte.protocol.models.v0.SyncMode; import java.time.Instant; import java.util.ArrayList; +import java.util.Collections; import java.util.List; import java.util.Map; import java.util.Optional; @@ -67,6 +68,7 @@ import org.bson.BsonDocument; import org.bson.BsonString; import org.bson.Document; +import org.bson.conversions.Bson; import org.bson.types.ObjectId; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -108,6 +110,8 @@ class MongoDbCdcInitializerTest { private MongoCursor findCursor; private ChangeStreamIterable changeStreamIterable; private MongoDbCdcConnectorMetadataInjector cdcConnectorMetadataInjector; + private static final List PIPELINE = Collections.singletonList(Aggregates.match( + Filters.in("ns.coll", List.of(COLLECTION)))); @BeforeEach void setUp() { @@ -134,6 +138,7 @@ void setUp() { when(clusterDescription.getServerDescriptions()).thenReturn(List.of(serverDescription)); when(clusterDescription.getType()).thenReturn(ClusterType.REPLICA_SET); when(mongoClient.watch(BsonDocument.class)).thenReturn(changeStreamIterable); + when(mongoDatabase.watch(PIPELINE, BsonDocument.class)).thenReturn(changeStreamIterable); when(mongoClient.getDatabase(DATABASE)).thenReturn(mongoDatabase); when(mongoClient.getClusterDescription()).thenReturn(clusterDescription); when(mongoDatabase.getCollection(COLLECTION)).thenReturn(mongoCollection); diff --git a/airbyte-integrations/connectors/source-mongodb-v2-plain/src/test/java/io/airbyte/integrations/source/mongodb/cdc/MongoDbCdcPropertiesTest.java b/airbyte-integrations/connectors/source-mongodb-v2-plain/src/test/java/io/airbyte/integrations/source/mongodb/cdc/MongoDbCdcPropertiesTest.java index 0bb2bfba05f3..f6ea739be583 100644 --- a/airbyte-integrations/connectors/source-mongodb-v2-plain/src/test/java/io/airbyte/integrations/source/mongodb/cdc/MongoDbCdcPropertiesTest.java +++ b/airbyte-integrations/connectors/source-mongodb-v2-plain/src/test/java/io/airbyte/integrations/source/mongodb/cdc/MongoDbCdcPropertiesTest.java @@ -6,6 +6,8 @@ import static io.airbyte.integrations.source.mongodb.cdc.MongoDbCdcProperties.CAPTURE_MODE_KEY; import static io.airbyte.integrations.source.mongodb.cdc.MongoDbCdcProperties.CAPTURE_MODE_VALUE; +import static io.airbyte.integrations.source.mongodb.cdc.MongoDbCdcProperties.CAPTURE_SCOPE_KEY; +import static io.airbyte.integrations.source.mongodb.cdc.MongoDbCdcProperties.CAPTURE_SCOPE_VALUE; import static io.airbyte.integrations.source.mongodb.cdc.MongoDbCdcProperties.CONNECTOR_CLASS_KEY; import static io.airbyte.integrations.source.mongodb.cdc.MongoDbCdcProperties.CONNECTOR_CLASS_VALUE; import static io.airbyte.integrations.source.mongodb.cdc.MongoDbCdcProperties.HEARTBEAT_FREQUENCY_MS; @@ -24,12 +26,13 @@ class MongoDbCdcPropertiesTest { @Test void testDebeziumProperties() { final Properties debeziumProperties = MongoDbCdcProperties.getDebeziumProperties(); - assertEquals(5, debeziumProperties.size()); + assertEquals(6, debeziumProperties.size()); assertEquals(CONNECTOR_CLASS_VALUE, debeziumProperties.get(CONNECTOR_CLASS_KEY)); assertEquals(SNAPSHOT_MODE_VALUE, debeziumProperties.get(SNAPSHOT_MODE_KEY)); assertEquals(CAPTURE_MODE_VALUE, debeziumProperties.get(CAPTURE_MODE_KEY)); assertEquals(HEARTBEAT_FREQUENCY_MS, debeziumProperties.get(HEARTBEAT_INTERVAL_KEY)); assertEquals(TOMBSTONE_ON_DELETE_VALUE, debeziumProperties.get(TOMBSTONE_ON_DELETE_KEY)); + assertEquals(CAPTURE_SCOPE_VALUE, debeziumProperties.get(CAPTURE_SCOPE_KEY)); } } diff --git a/airbyte-integrations/connectors/source-mongodb-v2-plain/src/test/java/io/airbyte/integrations/source/mongodb/cdc/MongoDbCdcSavedInfoFetcherTest.java b/airbyte-integrations/connectors/source-mongodb-v2-plain/src/test/java/io/airbyte/integrations/source/mongodb/cdc/MongoDbCdcSavedInfoFetcherTest.java index 515903207ce2..935051f3b229 100644 --- a/airbyte-integrations/connectors/source-mongodb-v2-plain/src/test/java/io/airbyte/integrations/source/mongodb/cdc/MongoDbCdcSavedInfoFetcherTest.java +++ b/airbyte-integrations/connectors/source-mongodb-v2-plain/src/test/java/io/airbyte/integrations/source/mongodb/cdc/MongoDbCdcSavedInfoFetcherTest.java @@ -8,7 +8,6 @@ import static org.junit.jupiter.api.Assertions.assertThrows; import com.fasterxml.jackson.databind.JsonNode; -import io.airbyte.cdk.integrations.debezium.internals.mongodb.MongoDbDebeziumStateUtil; import org.junit.jupiter.api.Test; class MongoDbCdcSavedInfoFetcherTest { diff --git a/airbyte-integrations/connectors/source-mongodb-v2-plain/src/test/java/io/airbyte/integrations/source/mongodb/cdc/MongoDbCdcStateHandlerTest.java b/airbyte-integrations/connectors/source-mongodb-v2-plain/src/test/java/io/airbyte/integrations/source/mongodb/cdc/MongoDbCdcStateHandlerTest.java index a4a253654cf1..d3bfed395b0e 100644 --- a/airbyte-integrations/connectors/source-mongodb-v2-plain/src/test/java/io/airbyte/integrations/source/mongodb/cdc/MongoDbCdcStateHandlerTest.java +++ b/airbyte-integrations/connectors/source-mongodb-v2-plain/src/test/java/io/airbyte/integrations/source/mongodb/cdc/MongoDbCdcStateHandlerTest.java @@ -10,7 +10,6 @@ import static org.junit.jupiter.api.Assertions.assertThrows; import com.fasterxml.jackson.core.type.TypeReference; -import io.airbyte.cdk.integrations.debezium.internals.mongodb.MongoDbDebeziumStateUtil; import io.airbyte.integrations.source.mongodb.state.MongoDbStateManager; import io.airbyte.protocol.models.Jsons; import io.airbyte.protocol.models.v0.AirbyteMessage; diff --git a/airbyte-integrations/connectors/source-mongodb-v2-plain/src/test/java/io/airbyte/integrations/source/mongodb/cdc/MongoDbCdcTargetPositionTest.java b/airbyte-integrations/connectors/source-mongodb-v2-plain/src/test/java/io/airbyte/integrations/source/mongodb/cdc/MongoDbCdcTargetPositionTest.java new file mode 100644 index 000000000000..20cf0b1ef9cb --- /dev/null +++ b/airbyte-integrations/connectors/source-mongodb-v2-plain/src/test/java/io/airbyte/integrations/source/mongodb/cdc/MongoDbCdcTargetPositionTest.java @@ -0,0 +1,278 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.mongodb.cdc; + +import static com.mongodb.assertions.Assertions.assertNotNull; +import static io.airbyte.integrations.source.mongodb.cdc.MongoDbCdcEventUtils.ID_FIELD; +import static io.airbyte.integrations.source.mongodb.cdc.MongoDbCdcEventUtils.OBJECT_ID_FIELD; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.mockito.ArgumentMatchers.anyString; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +import com.fasterxml.jackson.core.type.TypeReference; +import com.mongodb.client.ChangeStreamIterable; +import com.mongodb.client.MongoChangeStreamCursor; +import com.mongodb.client.MongoClient; +import com.mongodb.client.MongoDatabase; +import com.mongodb.client.model.Aggregates; +import com.mongodb.client.model.Filters; +import com.mongodb.client.model.changestream.ChangeStreamDocument; +import io.airbyte.cdk.integrations.debezium.internals.ChangeEventWithMetadata; +import io.airbyte.commons.resources.MoreResources; +import io.airbyte.protocol.models.Jsons; +import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog; +import io.debezium.connector.mongodb.ResumeTokens; +import io.debezium.engine.ChangeEvent; +import java.io.IOException; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.concurrent.TimeUnit; +import org.bson.BsonDocument; +import org.bson.BsonTimestamp; +import org.bson.conversions.Bson; +import org.junit.jupiter.api.Test; + +class MongoDbCdcTargetPositionTest { + + private static final String OBJECT_ID = "64f24244f95155351c4185b1"; + private static final String RESUME_TOKEN = "8264BEB9F3000000012B0229296E04"; + private static final String OTHER_RESUME_TOKEN = "8264BEB9F3000000012B0229296E05"; + private static final ConfiguredAirbyteCatalog CATALOG = new ConfiguredAirbyteCatalog(); + private static final String DATABASE = "test-database"; + private static final List PIPELINE = Collections.singletonList(Aggregates.match( + Filters.in("ns.coll", Collections.emptyList()))); + + @Test + void testCreateTargetPosition() { + final BsonDocument resumeTokenDocument = ResumeTokens.fromData(RESUME_TOKEN); + final ChangeStreamIterable changeStreamIterable = mock(ChangeStreamIterable.class); + final MongoChangeStreamCursor> mongoChangeStreamCursor = + mock(MongoChangeStreamCursor.class); + final MongoClient mongoClient = mock(MongoClient.class); + final MongoDatabase mongoDatabase = mock(MongoDatabase.class); + + when(mongoChangeStreamCursor.getResumeToken()).thenReturn(resumeTokenDocument); + when(changeStreamIterable.cursor()).thenReturn(mongoChangeStreamCursor); + when(mongoClient.getDatabase(anyString())).thenReturn(mongoDatabase); + when(mongoDatabase.watch(PIPELINE, BsonDocument.class)).thenReturn(changeStreamIterable); + + final MongoDbCdcTargetPosition targetPosition = + new MongoDbCdcTargetPosition(MongoDbResumeTokenHelper.getMostRecentResumeToken(mongoClient, DATABASE, CATALOG)); + assertNotNull(targetPosition); + assertEquals(ResumeTokens.getTimestamp(resumeTokenDocument), targetPosition.getResumeTokenTimestamp()); + } + + @Test + void testReachedTargetPosition() throws IOException { + final String changeEventJson = MoreResources.readResource("mongodb/change_event.json"); + final BsonDocument resumeTokenDocument = ResumeTokens.fromData(RESUME_TOKEN); + final ChangeStreamIterable changeStreamIterable = mock(ChangeStreamIterable.class); + final MongoChangeStreamCursor> mongoChangeStreamCursor = + mock(MongoChangeStreamCursor.class); + final MongoClient mongoClient = mock(MongoClient.class); + final MongoDatabase mongoDatabase = mock(MongoDatabase.class); + final ChangeEvent changeEvent = mock(ChangeEvent.class); + + when(changeEvent.key()).thenReturn("{\"" + ID_FIELD + "\":\"{\\\"" + OBJECT_ID_FIELD + "\\\": \\\"" + OBJECT_ID + "\\\"}\"}"); + when(changeEvent.value()).thenReturn(changeEventJson); + when(mongoChangeStreamCursor.getResumeToken()).thenReturn(resumeTokenDocument); + when(changeStreamIterable.cursor()).thenReturn(mongoChangeStreamCursor); + when(mongoClient.getDatabase(anyString())).thenReturn(mongoDatabase); + when(mongoDatabase.watch(PIPELINE, BsonDocument.class)).thenReturn(changeStreamIterable); + + final ChangeEventWithMetadata changeEventWithMetadata = new ChangeEventWithMetadata(changeEvent); + final MongoDbCdcTargetPosition targetPosition = + new MongoDbCdcTargetPosition(MongoDbResumeTokenHelper.getMostRecentResumeToken(mongoClient, DATABASE, CATALOG)); + assertTrue(targetPosition.reachedTargetPosition(changeEventWithMetadata)); + + when(changeEvent.value()).thenReturn(changeEventJson.replaceAll("\"ts_ms\": \\d+,", "\"ts_ms\": 1590221043000,")); + final ChangeEventWithMetadata changeEventWithMetadata2 = new ChangeEventWithMetadata(changeEvent); + assertFalse(targetPosition.reachedTargetPosition(changeEventWithMetadata2)); + } + + @Test + void testReachedTargetPositionSnapshotEvent() throws IOException { + final String changeEventJson = MoreResources.readResource("mongodb/change_event_snapshot.json"); + final BsonDocument resumeTokenDocument = ResumeTokens.fromData(RESUME_TOKEN); + final ChangeStreamIterable changeStreamIterable = mock(ChangeStreamIterable.class); + final MongoChangeStreamCursor> mongoChangeStreamCursor = + mock(MongoChangeStreamCursor.class); + final MongoClient mongoClient = mock(MongoClient.class); + final MongoDatabase mongoDatabase = mock(MongoDatabase.class); + final ChangeEvent changeEvent = mock(ChangeEvent.class); + + when(changeEvent.key()).thenReturn("{\"" + ID_FIELD + "\":\"{\\\"" + OBJECT_ID_FIELD + "\\\": \\\"" + OBJECT_ID + "\\\"}\"}"); + when(changeEvent.value()).thenReturn(changeEventJson); + when(mongoChangeStreamCursor.getResumeToken()).thenReturn(resumeTokenDocument); + when(changeStreamIterable.cursor()).thenReturn(mongoChangeStreamCursor); + when(mongoClient.getDatabase(anyString())).thenReturn(mongoDatabase); + when(mongoDatabase.watch(PIPELINE, BsonDocument.class)).thenReturn(changeStreamIterable); + + final ChangeEventWithMetadata changeEventWithMetadata = new ChangeEventWithMetadata(changeEvent); + final MongoDbCdcTargetPosition targetPosition = + new MongoDbCdcTargetPosition(MongoDbResumeTokenHelper.getMostRecentResumeToken(mongoClient, DATABASE, CATALOG)); + assertFalse(targetPosition.reachedTargetPosition(changeEventWithMetadata)); + } + + @Test + void testReachedTargetPositionSnapshotLastEvent() throws IOException { + final String changeEventJson = MoreResources.readResource("mongodb/change_event_snapshot_last.json"); + final BsonDocument resumeTokenDocument = ResumeTokens.fromData(RESUME_TOKEN); + final ChangeStreamIterable changeStreamIterable = mock(ChangeStreamIterable.class); + final MongoChangeStreamCursor> mongoChangeStreamCursor = + mock(MongoChangeStreamCursor.class); + final MongoClient mongoClient = mock(MongoClient.class); + final MongoDatabase mongoDatabase = mock(MongoDatabase.class); + final ChangeEvent changeEvent = mock(ChangeEvent.class); + + when(changeEvent.key()).thenReturn("{\"" + ID_FIELD + "\":\"{\\\"" + OBJECT_ID_FIELD + "\\\": \\\"" + OBJECT_ID + "\\\"}\"}"); + when(changeEvent.value()).thenReturn(changeEventJson); + when(mongoChangeStreamCursor.getResumeToken()).thenReturn(resumeTokenDocument); + when(mongoClient.getDatabase(anyString())).thenReturn(mongoDatabase); + when(mongoDatabase.watch(PIPELINE, BsonDocument.class)).thenReturn(changeStreamIterable); + when(changeStreamIterable.cursor()).thenReturn(mongoChangeStreamCursor); + when(mongoClient.watch(BsonDocument.class)).thenReturn(changeStreamIterable); + + final ChangeEventWithMetadata changeEventWithMetadata = new ChangeEventWithMetadata(changeEvent); + final MongoDbCdcTargetPosition targetPosition = + new MongoDbCdcTargetPosition(MongoDbResumeTokenHelper.getMostRecentResumeToken(mongoClient, DATABASE, CATALOG)); + assertTrue(targetPosition.reachedTargetPosition(changeEventWithMetadata)); + } + + @Test + void testReachedTargetPositionFromHeartbeat() { + final BsonDocument resumeTokenDocument = ResumeTokens.fromData(RESUME_TOKEN); + final ChangeStreamIterable changeStreamIterable = mock(ChangeStreamIterable.class); + final MongoChangeStreamCursor> mongoChangeStreamCursor = + mock(MongoChangeStreamCursor.class); + final MongoClient mongoClient = mock(MongoClient.class); + final MongoDatabase mongoDatabase = mock(MongoDatabase.class); + + when(mongoChangeStreamCursor.getResumeToken()).thenReturn(resumeTokenDocument); + when(changeStreamIterable.cursor()).thenReturn(mongoChangeStreamCursor); + when(mongoClient.getDatabase(anyString())).thenReturn(mongoDatabase); + when(mongoDatabase.watch(PIPELINE, BsonDocument.class)).thenReturn(changeStreamIterable); + + final MongoDbCdcTargetPosition targetPosition = + new MongoDbCdcTargetPosition(MongoDbResumeTokenHelper.getMostRecentResumeToken(mongoClient, DATABASE, CATALOG)); + final BsonTimestamp heartbeatTimestamp = new BsonTimestamp( + Long.valueOf(ResumeTokens.getTimestamp(resumeTokenDocument).getTime() + TimeUnit.HOURS.toSeconds(1)).intValue(), + 0); + + assertTrue(targetPosition.reachedTargetPosition(heartbeatTimestamp)); + assertFalse(targetPosition.reachedTargetPosition((BsonTimestamp) null)); + } + + @Test + void testIsHeartbeatSupported() { + final BsonDocument resumeTokenDocument = ResumeTokens.fromData(RESUME_TOKEN); + final ChangeStreamIterable changeStreamIterable = mock(ChangeStreamIterable.class); + final MongoChangeStreamCursor> mongoChangeStreamCursor = + mock(MongoChangeStreamCursor.class); + final MongoClient mongoClient = mock(MongoClient.class); + final MongoDatabase mongoDatabase = mock(MongoDatabase.class); + + when(mongoChangeStreamCursor.getResumeToken()).thenReturn(resumeTokenDocument); + when(changeStreamIterable.cursor()).thenReturn(mongoChangeStreamCursor); + when(mongoClient.getDatabase(anyString())).thenReturn(mongoDatabase); + when(mongoDatabase.watch(PIPELINE, BsonDocument.class)).thenReturn(changeStreamIterable); + + final MongoDbCdcTargetPosition targetPosition = + new MongoDbCdcTargetPosition(MongoDbResumeTokenHelper.getMostRecentResumeToken(mongoClient, DATABASE, CATALOG)); + + assertTrue(targetPosition.isHeartbeatSupported()); + } + + @Test + void testExtractPositionFromHeartbeatOffset() { + final BsonDocument resumeTokenDocument = ResumeTokens.fromData(RESUME_TOKEN); + final BsonTimestamp resumeTokenTimestamp = ResumeTokens.getTimestamp(resumeTokenDocument); + final ChangeStreamIterable changeStreamIterable = mock(ChangeStreamIterable.class); + final MongoChangeStreamCursor> mongoChangeStreamCursor = + mock(MongoChangeStreamCursor.class); + final MongoClient mongoClient = mock(MongoClient.class); + final MongoDatabase mongoDatabase = mock(MongoDatabase.class); + + when(mongoChangeStreamCursor.getResumeToken()).thenReturn(resumeTokenDocument); + when(changeStreamIterable.cursor()).thenReturn(mongoChangeStreamCursor); + when(mongoClient.getDatabase(anyString())).thenReturn(mongoDatabase); + when(mongoDatabase.watch(PIPELINE, BsonDocument.class)).thenReturn(changeStreamIterable); + + final MongoDbCdcTargetPosition targetPosition = + new MongoDbCdcTargetPosition(MongoDbResumeTokenHelper.getMostRecentResumeToken(mongoClient, DATABASE, CATALOG)); + + final Map sourceOffset = Map.of( + MongoDbDebeziumConstants.ChangeEvent.SOURCE_SECONDS, resumeTokenTimestamp.getTime(), + MongoDbDebeziumConstants.ChangeEvent.SOURCE_ORDER, resumeTokenTimestamp.getInc(), + MongoDbDebeziumConstants.ChangeEvent.SOURCE_RESUME_TOKEN, RESUME_TOKEN); + + final BsonTimestamp timestamp = targetPosition.extractPositionFromHeartbeatOffset(sourceOffset); + assertEquals(resumeTokenTimestamp, timestamp); + } + + @Test + void testIsEventAheadOfOffset() throws IOException { + final BsonDocument resumeTokenDocument = ResumeTokens.fromData(RESUME_TOKEN); + final ChangeStreamIterable changeStreamIterable = mock(ChangeStreamIterable.class); + final MongoChangeStreamCursor> mongoChangeStreamCursor = + mock(MongoChangeStreamCursor.class); + final MongoClient mongoClient = mock(MongoClient.class); + final MongoDatabase mongoDatabase = mock(MongoDatabase.class); + final String changeEventJson = MoreResources.readResource("mongodb/change_event.json"); + final ChangeEvent changeEvent = mock(ChangeEvent.class); + + when(changeEvent.key()).thenReturn("{\"" + ID_FIELD + "\":\"{\\\"" + OBJECT_ID_FIELD + "\\\": \\\"" + OBJECT_ID + "\\\"}\"}"); + when(changeEvent.value()).thenReturn(changeEventJson); + when(mongoChangeStreamCursor.getResumeToken()).thenReturn(resumeTokenDocument); + when(changeStreamIterable.cursor()).thenReturn(mongoChangeStreamCursor); + when(mongoClient.getDatabase(anyString())).thenReturn(mongoDatabase); + when(mongoDatabase.watch(PIPELINE, BsonDocument.class)).thenReturn(changeStreamIterable); + + final ChangeEventWithMetadata changeEventWithMetadata = new ChangeEventWithMetadata(changeEvent); + final Map offset = + Jsons.object(MongoDbDebeziumStateUtil.formatState(null, null, RESUME_TOKEN), new TypeReference<>() {}); + + final MongoDbCdcTargetPosition targetPosition = + new MongoDbCdcTargetPosition(MongoDbResumeTokenHelper.getMostRecentResumeToken(mongoClient, DATABASE, CATALOG)); + final boolean result = targetPosition.isEventAheadOffset(offset, changeEventWithMetadata); + assertTrue(result); + } + + @Test + void testIsSameOffset() { + final BsonDocument resumeTokenDocument = ResumeTokens.fromData(RESUME_TOKEN); + final ChangeStreamIterable changeStreamIterable = mock(ChangeStreamIterable.class); + final MongoChangeStreamCursor> mongoChangeStreamCursor = + mock(MongoChangeStreamCursor.class); + final MongoClient mongoClient = mock(MongoClient.class); + final MongoDatabase mongoDatabase = mock(MongoDatabase.class); + + when(mongoChangeStreamCursor.getResumeToken()).thenReturn(resumeTokenDocument); + when(changeStreamIterable.cursor()).thenReturn(mongoChangeStreamCursor); + when(mongoClient.getDatabase(anyString())).thenReturn(mongoDatabase); + when(mongoDatabase.watch(PIPELINE, BsonDocument.class)).thenReturn(changeStreamIterable); + + final Map offsetA = + Jsons.object(MongoDbDebeziumStateUtil.formatState(null, null, RESUME_TOKEN), new TypeReference<>() {}); + final Map offsetB = + Jsons.object(MongoDbDebeziumStateUtil.formatState(null, null, RESUME_TOKEN), new TypeReference<>() {}); + final Map offsetC = + Jsons.object(MongoDbDebeziumStateUtil.formatState(null, null, OTHER_RESUME_TOKEN), new TypeReference<>() {}); + + final MongoDbCdcTargetPosition targetPosition = + new MongoDbCdcTargetPosition(MongoDbResumeTokenHelper.getMostRecentResumeToken(mongoClient, DATABASE, CATALOG)); + + assertTrue(targetPosition.isSameOffset(offsetA, offsetA)); + assertTrue(targetPosition.isSameOffset(offsetA, offsetB)); + assertTrue(targetPosition.isSameOffset(offsetB, offsetA)); + assertFalse(targetPosition.isSameOffset(offsetA, offsetC)); + assertFalse(targetPosition.isSameOffset(offsetB, offsetC)); + } + +} diff --git a/airbyte-integrations/connectors/source-mongodb-v2-plain/src/test/java/io/airbyte/integrations/source/mongodb/cdc/MongoDbCustomLoaderTest.java b/airbyte-integrations/connectors/source-mongodb-v2-plain/src/test/java/io/airbyte/integrations/source/mongodb/cdc/MongoDbCustomLoaderTest.java new file mode 100644 index 000000000000..016c675f9735 --- /dev/null +++ b/airbyte-integrations/connectors/source-mongodb-v2-plain/src/test/java/io/airbyte/integrations/source/mongodb/cdc/MongoDbCustomLoaderTest.java @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.mongodb.cdc; + +import static io.airbyte.integrations.source.mongodb.cdc.MongoDbDebeziumConstants.ChangeEvent.SOURCE_ORDER; +import static io.airbyte.integrations.source.mongodb.cdc.MongoDbDebeziumConstants.ChangeEvent.SOURCE_RESUME_TOKEN; +import static io.airbyte.integrations.source.mongodb.cdc.MongoDbDebeziumConstants.ChangeEvent.SOURCE_SECONDS; +import static io.airbyte.integrations.source.mongodb.cdc.MongoDbDebeziumConstants.OffsetState.KEY_REPLICA_SET; +import static io.airbyte.integrations.source.mongodb.cdc.MongoDbDebeziumConstants.OffsetState.VALUE_TRANSACTION_ID; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.mockito.Mockito.mock; + +import com.mongodb.ConnectionString; +import io.debezium.connector.mongodb.MongoDbConnectorConfig; +import io.debezium.connector.mongodb.MongoDbOffsetContext; +import io.debezium.connector.mongodb.ReplicaSets; +import io.debezium.connector.mongodb.ResumeTokens; +import io.debezium.connector.mongodb.connection.ReplicaSet; +import java.util.HashMap; +import java.util.Map; +import org.bson.BsonDocument; +import org.bson.BsonTimestamp; +import org.junit.jupiter.api.Test; + +class MongoDbCustomLoaderTest { + + private static final String RESUME_TOKEN = "8264BEB9F3000000012B0229296E04"; + + @Test + void testLoadOffsets() { + final String replicaSet = "replica-set"; + final BsonDocument resumeToken = ResumeTokens.fromData(RESUME_TOKEN); + final BsonTimestamp timestamp = ResumeTokens.getTimestamp(resumeToken); + final Map key = Map.of(KEY_REPLICA_SET, replicaSet); + final Map value = new HashMap<>(); + value.put(SOURCE_SECONDS, timestamp.getTime()); + value.put(SOURCE_ORDER, timestamp.getInc()); + value.put(SOURCE_RESUME_TOKEN, RESUME_TOKEN); + value.put(VALUE_TRANSACTION_ID, null); + final Map, Map> offsets = Map.of(key, value); + final MongoDbConnectorConfig mongoDbConnectorConfig = mock(MongoDbConnectorConfig.class); + final ReplicaSets replicaSets = ReplicaSets.of( + new ReplicaSet(new ConnectionString("mongodb://localhost:1234/?replicaSet=" + replicaSet))); + final MongoDbCustomLoader loader = new MongoDbCustomLoader(mongoDbConnectorConfig, replicaSets); + + final MongoDbOffsetContext context = loader.loadOffsets(offsets); + final Map offset = context.getReplicaSetOffsetContext(replicaSets.all().get(0)).getOffset(); + + assertNotNull(offset); + assertEquals(value, offset); + } + +} diff --git a/airbyte-integrations/connectors/source-mongodb-v2-plain/src/test/java/io/airbyte/integrations/source/mongodb/cdc/MongoDbDebeziumEventConverterTest.java b/airbyte-integrations/connectors/source-mongodb-v2-plain/src/test/java/io/airbyte/integrations/source/mongodb/cdc/MongoDbDebeziumEventConverterTest.java new file mode 100644 index 000000000000..83f3363ae97d --- /dev/null +++ b/airbyte-integrations/connectors/source-mongodb-v2-plain/src/test/java/io/airbyte/integrations/source/mongodb/cdc/MongoDbDebeziumEventConverterTest.java @@ -0,0 +1,210 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.mongodb.cdc; + +import static io.airbyte.integrations.source.mongodb.cdc.MongoDbCdcEventUtils.ID_FIELD; +import static io.airbyte.integrations.source.mongodb.cdc.MongoDbCdcEventUtils.OBJECT_ID_FIELD; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.node.ObjectNode; +import io.airbyte.cdk.integrations.debezium.CdcMetadataInjector; +import io.airbyte.cdk.integrations.debezium.internals.ChangeEventWithMetadata; +import io.airbyte.cdk.integrations.debezium.internals.RelationalDbDebeziumEventConverter; +import io.airbyte.commons.json.Jsons; +import io.airbyte.commons.resources.MoreResources; +import io.airbyte.protocol.models.v0.AirbyteMessage; +import io.airbyte.protocol.models.v0.AirbyteRecordMessage; +import io.airbyte.protocol.models.v0.AirbyteStream; +import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog; +import io.airbyte.protocol.models.v0.ConfiguredAirbyteStream; +import io.debezium.engine.ChangeEvent; +import java.io.IOException; +import java.time.Instant; +import java.util.List; +import java.util.Map; +import org.junit.jupiter.api.Test; + +class MongoDbDebeziumEventConverterTest { + + @Test + void testConvertRelationalDbChangeEvent() throws IOException { + final String stream = "names"; + final Instant emittedAt = Instant.now(); + final CdcMetadataInjector cdcMetadataInjector = new DummyMetadataInjector(); + final ChangeEventWithMetadata insertChangeEvent = mockChangeEvent("insert_change_event.json", ""); + final ChangeEventWithMetadata updateChangeEvent = mockChangeEvent("update_change_event.json", ""); + final ChangeEventWithMetadata deleteChangeEvent = mockChangeEvent("delete_change_event.json", ""); + final var eventConverter = new RelationalDbDebeziumEventConverter(cdcMetadataInjector, emittedAt); + + final AirbyteMessage actualInsert = eventConverter.toAirbyteMessage(insertChangeEvent); + final AirbyteMessage actualUpdate = eventConverter.toAirbyteMessage(updateChangeEvent); + final AirbyteMessage actualDelete = eventConverter.toAirbyteMessage(deleteChangeEvent); + + final AirbyteMessage expectedInsert = createAirbyteMessage(stream, emittedAt, "insert_message.json"); + final AirbyteMessage expectedUpdate = createAirbyteMessage(stream, emittedAt, "update_message.json"); + final AirbyteMessage expectedDelete = createAirbyteMessage(stream, emittedAt, "delete_message.json"); + + deepCompare(expectedInsert, actualInsert); + deepCompare(expectedUpdate, actualUpdate); + deepCompare(expectedDelete, actualDelete); + } + + @Test + void testConvertMongoDbChangeEvent() throws IOException { + final String objectId = "64f24244f95155351c4185b1"; + final String stream = "names"; + final Instant emittedAt = Instant.now(); + final CdcMetadataInjector cdcMetadataInjector = new DummyMetadataInjector(); + final ChangeEventWithMetadata insertChangeEvent = mockChangeEvent("mongodb/change_event_insert.json", ""); + final ChangeEventWithMetadata updateChangeEvent = mockChangeEvent("mongodb/change_event_update.json", ""); + final ChangeEventWithMetadata deleteChangeEvent = mockChangeEvent("mongodb/change_event_delete.json", ""); + final ChangeEventWithMetadata deleteChangeEventNoBefore = mockChangeEvent("mongodb/change_event_delete_no_before.json", + "{\\\"" + OBJECT_ID_FIELD + "\\\": \\\"" + objectId + "\\\"}"); + + final AirbyteMessage expectedInsert = createAirbyteMessage(stream, emittedAt, "mongodb/insert_airbyte_message.json"); + final AirbyteMessage expectedUpdate = createAirbyteMessage(stream, emittedAt, "mongodb/update_airbyte_message.json"); + final AirbyteMessage expectedDelete = createAirbyteMessage(stream, emittedAt, "mongodb/delete_airbyte_message.json"); + final AirbyteMessage expectedDeleteNoBefore = createAirbyteMessage(stream, emittedAt, "mongodb/delete_no_before_airbyte_message.json"); + + final AirbyteMessage actualInsert = new MongoDbDebeziumEventConverter( + cdcMetadataInjector, buildFromAirbyteMessage(expectedInsert), emittedAt, Jsons.emptyObject()) + .toAirbyteMessage(insertChangeEvent); + final AirbyteMessage actualUpdate = new MongoDbDebeziumEventConverter( + cdcMetadataInjector, buildFromAirbyteMessage(expectedUpdate), emittedAt, Jsons.emptyObject()) + .toAirbyteMessage(updateChangeEvent); + final AirbyteMessage actualDelete = new MongoDbDebeziumEventConverter( + cdcMetadataInjector, buildFromAirbyteMessage(expectedDelete), emittedAt, Jsons.emptyObject()) + .toAirbyteMessage(deleteChangeEvent); + final AirbyteMessage actualDeleteNoBefore = new MongoDbDebeziumEventConverter( + cdcMetadataInjector, buildFromAirbyteMessage(expectedDeleteNoBefore), emittedAt, Jsons.emptyObject()) + .toAirbyteMessage(deleteChangeEventNoBefore); + + deepCompare(expectedInsert, actualInsert); + deepCompare(expectedUpdate, actualUpdate); + deepCompare(expectedDelete, actualDelete); + deepCompare(expectedDeleteNoBefore, actualDeleteNoBefore); + } + + @Test + void testConvertMongoDbChangeEventNoSchema() throws IOException { + final String objectId = "64f24244f95155351c4185b1"; + final String stream = "names"; + final Instant emittedAt = Instant.now(); + final CdcMetadataInjector cdcMetadataInjector = new DummyMetadataInjector(); + final ChangeEventWithMetadata insertChangeEvent = mockChangeEvent("mongodb/change_event_insert.json", ""); + final ChangeEventWithMetadata updateChangeEvent = mockChangeEvent("mongodb/change_event_update.json", ""); + final ChangeEventWithMetadata deleteChangeEvent = mockChangeEvent("mongodb/change_event_delete.json", ""); + final ChangeEventWithMetadata deleteChangeEventNoBefore = mockChangeEvent("mongodb/change_event_delete_no_before.json", + "{\\\"" + OBJECT_ID_FIELD + "\\\": \\\"" + objectId + "\\\"}"); + + final AirbyteMessage expectedInsert = createAirbyteMessage(stream, emittedAt, "mongodb/insert_airbyte_message_no_schema.json"); + final AirbyteMessage expectedUpdate = createAirbyteMessage(stream, emittedAt, "mongodb/update_airbyte_message_no_schema.json"); + final AirbyteMessage expectedDelete = createAirbyteMessage(stream, emittedAt, "mongodb/delete_airbyte_message_no_schema.json"); + final AirbyteMessage expectedDeleteNoBefore = createAirbyteMessage(stream, emittedAt, "mongodb/delete_no_before_airbyte_message_no_schema.json"); + + final ConfiguredAirbyteCatalog insertConfiguredAirbyteCatalog = buildFromAirbyteMessage(expectedInsert); + final ConfiguredAirbyteCatalog updateConfiguredAirbyteCatalog = buildFromAirbyteMessage(expectedUpdate); + final ConfiguredAirbyteCatalog deleteConfiguredAirbyteCatalog = buildFromAirbyteMessage(expectedDelete); + final ConfiguredAirbyteCatalog deleteNoBeforeConfiguredAirbyteCatalog = buildFromAirbyteMessage(expectedDeleteNoBefore); + + final JsonNode noSchemaConfig = + Jsons.jsonNode(Map.of(MongoDbDebeziumConstants.Configuration.SCHEMA_ENFORCED_CONFIGURATION_KEY, false)); + final AirbyteMessage actualInsert = new MongoDbDebeziumEventConverter( + cdcMetadataInjector, buildFromAirbyteMessage(expectedInsert), emittedAt, noSchemaConfig) + .toAirbyteMessage(insertChangeEvent); + final AirbyteMessage actualUpdate = new MongoDbDebeziumEventConverter( + cdcMetadataInjector, buildFromAirbyteMessage(expectedUpdate), emittedAt, noSchemaConfig) + .toAirbyteMessage(updateChangeEvent); + final AirbyteMessage actualDelete = new MongoDbDebeziumEventConverter( + cdcMetadataInjector, buildFromAirbyteMessage(expectedDelete), emittedAt, noSchemaConfig) + .toAirbyteMessage(deleteChangeEvent); + final AirbyteMessage actualDeleteNoBefore = new MongoDbDebeziumEventConverter( + cdcMetadataInjector, buildFromAirbyteMessage(expectedDeleteNoBefore), emittedAt, noSchemaConfig) + .toAirbyteMessage(deleteChangeEventNoBefore); + + deepCompare(expectedInsert, actualInsert); + deepCompare(expectedUpdate, actualUpdate); + deepCompare(expectedDelete, actualDelete); + deepCompare(expectedDeleteNoBefore, actualDeleteNoBefore); + } + + @Test + void testConvertMongoDbChangeEventUnsupportedOperation() throws IOException { + final Instant emittedAt = Instant.now(); + final CdcMetadataInjector cdcMetadataInjector = new DummyMetadataInjector(); + final ChangeEventWithMetadata unsupportedOperationEvent = mockChangeEvent("mongodb/change_event_unsupported.json", ""); + final ConfiguredAirbyteCatalog configuredAirbyteCatalog = mock(ConfiguredAirbyteCatalog.class); + final var eventConverter = new MongoDbDebeziumEventConverter(cdcMetadataInjector, configuredAirbyteCatalog, emittedAt, Jsons.emptyObject()); + + assertThrows(IllegalArgumentException.class, () -> eventConverter.toAirbyteMessage(unsupportedOperationEvent)); + } + + private ConfiguredAirbyteCatalog buildFromAirbyteMessage(final AirbyteMessage airbyteMessage) { + final ConfiguredAirbyteCatalog configuredAirbyteCatalog = new ConfiguredAirbyteCatalog(); + final ConfiguredAirbyteStream configuredAirbyteStream = new ConfiguredAirbyteStream(); + final AirbyteStream airbyteStream = new AirbyteStream(); + airbyteStream.setName(airbyteMessage.getRecord().getStream()); + airbyteStream.setNamespace(airbyteMessage.getRecord().getNamespace()); + airbyteStream.setJsonSchema(Jsons.jsonNode(Map.of("properties", airbyteMessage.getRecord().getData()))); + configuredAirbyteStream.setStream(airbyteStream); + configuredAirbyteCatalog.setStreams(List.of(configuredAirbyteStream)); + return configuredAirbyteCatalog; + } + + private static ChangeEventWithMetadata mockChangeEvent(final String resourceName, final String idValue) throws IOException { + final ChangeEvent mocked = mock(ChangeEvent.class); + final String resource = MoreResources.readResource(resourceName); + final String key = "{\"" + ID_FIELD + "\":\"" + idValue + "\"}"; + when(mocked.key()).thenReturn(key); + when(mocked.value()).thenReturn(resource); + + return new ChangeEventWithMetadata(mocked); + } + + private static AirbyteMessage createAirbyteMessage(final String stream, final Instant emittedAt, final String resourceName) throws IOException { + final String data = MoreResources.readResource(resourceName); + + final AirbyteRecordMessage recordMessage = new AirbyteRecordMessage() + .withStream(stream) + .withNamespace("public") + .withData(Jsons.deserialize(data)) + .withEmittedAt(emittedAt.toEpochMilli()); + + return new AirbyteMessage() + .withType(AirbyteMessage.Type.RECORD) + .withRecord(recordMessage); + } + + private static void deepCompare(final Object expected, final Object actual) { + assertEquals(Jsons.deserialize(Jsons.serialize(expected)), Jsons.deserialize(Jsons.serialize(actual))); + } + + public static class DummyMetadataInjector implements CdcMetadataInjector { + + @Override + public void addMetaData(final ObjectNode event, final JsonNode source) { + if (source.has("lsn")) { + final long lsn = source.get("lsn").asLong(); + event.put("_ab_cdc_lsn", lsn); + } + } + + @Override + public String namespace(final JsonNode source) { + return source.has("schema") ? source.get("schema").asText() : source.get("db").asText(); + } + + @Override + public String name(final JsonNode source) { + return source.has("table") ? source.get("table").asText() : source.get("collection").asText(); + } + + } + +} diff --git a/airbyte-integrations/connectors/source-mongodb-v2-plain/src/test/java/io/airbyte/integrations/source/mongodb/cdc/MongoDbDebeziumPropertiesManagerTest.java b/airbyte-integrations/connectors/source-mongodb-v2-plain/src/test/java/io/airbyte/integrations/source/mongodb/cdc/MongoDbDebeziumPropertiesManagerTest.java new file mode 100644 index 000000000000..0a45d7b05fd3 --- /dev/null +++ b/airbyte-integrations/connectors/source-mongodb-v2-plain/src/test/java/io/airbyte/integrations/source/mongodb/cdc/MongoDbDebeziumPropertiesManagerTest.java @@ -0,0 +1,241 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.mongodb.cdc; + +import static io.airbyte.cdk.integrations.debezium.internals.DebeziumPropertiesManager.NAME_KEY; +import static io.airbyte.cdk.integrations.debezium.internals.DebeziumPropertiesManager.TOPIC_PREFIX_KEY; +import static io.airbyte.integrations.source.mongodb.cdc.MongoDbDebeziumConstants.Configuration.AUTH_SOURCE_CONFIGURATION_KEY; +import static io.airbyte.integrations.source.mongodb.cdc.MongoDbDebeziumConstants.Configuration.CONNECTION_STRING_CONFIGURATION_KEY; +import static io.airbyte.integrations.source.mongodb.cdc.MongoDbDebeziumConstants.Configuration.CREDENTIALS_PLACEHOLDER; +import static io.airbyte.integrations.source.mongodb.cdc.MongoDbDebeziumConstants.Configuration.DATABASE_CONFIGURATION_KEY; +import static io.airbyte.integrations.source.mongodb.cdc.MongoDbDebeziumConstants.Configuration.PASSWORD_CONFIGURATION_KEY; +import static io.airbyte.integrations.source.mongodb.cdc.MongoDbDebeziumConstants.Configuration.USERNAME_CONFIGURATION_KEY; +import static io.airbyte.integrations.source.mongodb.cdc.MongoDbDebeziumPropertiesManager.COLLECTION_INCLUDE_LIST_KEY; +import static io.airbyte.integrations.source.mongodb.cdc.MongoDbDebeziumPropertiesManager.DATABASE_INCLUDE_LIST_KEY; +import static io.airbyte.integrations.source.mongodb.cdc.MongoDbDebeziumPropertiesManager.MONGODB_AUTHSOURCE_KEY; +import static io.airbyte.integrations.source.mongodb.cdc.MongoDbDebeziumPropertiesManager.MONGODB_CONNECTION_MODE_KEY; +import static io.airbyte.integrations.source.mongodb.cdc.MongoDbDebeziumPropertiesManager.MONGODB_CONNECTION_MODE_VALUE; +import static io.airbyte.integrations.source.mongodb.cdc.MongoDbDebeziumPropertiesManager.MONGODB_CONNECTION_STRING_KEY; +import static io.airbyte.integrations.source.mongodb.cdc.MongoDbDebeziumPropertiesManager.MONGODB_PASSWORD_KEY; +import static io.airbyte.integrations.source.mongodb.cdc.MongoDbDebeziumPropertiesManager.MONGODB_SSL_ENABLED_KEY; +import static io.airbyte.integrations.source.mongodb.cdc.MongoDbDebeziumPropertiesManager.MONGODB_SSL_ENABLED_VALUE; +import static io.airbyte.integrations.source.mongodb.cdc.MongoDbDebeziumPropertiesManager.MONGODB_USER_KEY; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.node.ObjectNode; +import io.airbyte.cdk.integrations.debezium.internals.AirbyteFileOffsetBackingStore; +import io.airbyte.cdk.integrations.debezium.internals.AirbyteSchemaHistoryStorage; +import io.airbyte.commons.json.Jsons; +import io.airbyte.protocol.models.v0.AirbyteStream; +import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog; +import io.airbyte.protocol.models.v0.ConfiguredAirbyteStream; +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.Properties; +import org.junit.jupiter.api.Test; + +class MongoDbDebeziumPropertiesManagerTest { + + private static final String DATABASE_NAME = "test_database"; + private static final Path PATH = Path.of("."); + public static final String EXPECTED_CONNECTION_STRING = "mongodb://localhost:27017/?retryWrites=false&provider=airbyte"; + + @Test + void testDebeziumProperties() { + final List streams = createStreams(4); + final AirbyteFileOffsetBackingStore offsetManager = mock(AirbyteFileOffsetBackingStore.class); + final ConfiguredAirbyteCatalog catalog = mock(ConfiguredAirbyteCatalog.class); + final JsonNode config = createConfiguration(Optional.of("username"), Optional.of("password"), Optional.of("admin")); + + when(catalog.getStreams()).thenReturn(streams); + + final Properties cdcProperties = new Properties(); + cdcProperties.put("test", "value"); + + final var debeziumPropertiesManager = new MongoDbDebeziumPropertiesManager(cdcProperties, config, catalog); + + final Properties debeziumProperties = debeziumPropertiesManager.getDebeziumProperties(offsetManager); + assertEquals(20 + cdcProperties.size(), debeziumProperties.size()); + assertEquals(MongoDbDebeziumPropertiesManager.normalizeName(DATABASE_NAME), debeziumProperties.get(NAME_KEY)); + assertEquals(MongoDbDebeziumPropertiesManager.normalizeName(DATABASE_NAME), debeziumProperties.get(TOPIC_PREFIX_KEY)); + assertEquals(EXPECTED_CONNECTION_STRING, debeziumProperties.get(MONGODB_CONNECTION_STRING_KEY)); + assertEquals(MONGODB_CONNECTION_MODE_VALUE, debeziumProperties.get(MONGODB_CONNECTION_MODE_KEY)); + assertEquals(config.get(USERNAME_CONFIGURATION_KEY).asText(), debeziumProperties.get(MONGODB_USER_KEY)); + assertEquals(config.get(PASSWORD_CONFIGURATION_KEY).asText(), debeziumProperties.get(MONGODB_PASSWORD_KEY)); + assertEquals(config.get(AUTH_SOURCE_CONFIGURATION_KEY).asText(), debeziumProperties.get(MONGODB_AUTHSOURCE_KEY)); + assertEquals(MONGODB_SSL_ENABLED_VALUE, debeziumProperties.get(MONGODB_SSL_ENABLED_KEY)); + assertEquals(debeziumPropertiesManager.createCollectionIncludeString(streams), debeziumProperties.get(COLLECTION_INCLUDE_LIST_KEY)); + assertEquals(DATABASE_NAME, debeziumProperties.get(DATABASE_INCLUDE_LIST_KEY)); + } + + @Test + void testDebeziumPropertiesConnectionStringCredentialsPlaceholder() { + final List streams = createStreams(4); + final AirbyteFileOffsetBackingStore offsetManager = mock(AirbyteFileOffsetBackingStore.class); + final ConfiguredAirbyteCatalog catalog = mock(ConfiguredAirbyteCatalog.class); + final JsonNode config = createConfiguration(Optional.of("username"), Optional.of("password"), Optional.of("admin")); + ((ObjectNode) config).put(CONNECTION_STRING_CONFIGURATION_KEY, config.get(CONNECTION_STRING_CONFIGURATION_KEY).asText() + .replaceAll("mongodb://", "mongodb://" + CREDENTIALS_PLACEHOLDER)); + + when(catalog.getStreams()).thenReturn(streams); + + final Properties cdcProperties = new Properties(); + cdcProperties.put("test", "value"); + + final var debeziumPropertiesManager = new MongoDbDebeziumPropertiesManager(cdcProperties, config, catalog); + + final Properties debeziumProperties = debeziumPropertiesManager.getDebeziumProperties(offsetManager); + assertEquals(20 + cdcProperties.size(), debeziumProperties.size()); + assertEquals(MongoDbDebeziumPropertiesManager.normalizeName(DATABASE_NAME), debeziumProperties.get(NAME_KEY)); + assertEquals(MongoDbDebeziumPropertiesManager.normalizeName(DATABASE_NAME), debeziumProperties.get(TOPIC_PREFIX_KEY)); + assertEquals(EXPECTED_CONNECTION_STRING, debeziumProperties.get(MONGODB_CONNECTION_STRING_KEY)); + assertEquals(MONGODB_CONNECTION_MODE_VALUE, debeziumProperties.get(MONGODB_CONNECTION_MODE_KEY)); + assertEquals(config.get(USERNAME_CONFIGURATION_KEY).asText(), debeziumProperties.get(MONGODB_USER_KEY)); + assertEquals(config.get(PASSWORD_CONFIGURATION_KEY).asText(), debeziumProperties.get(MONGODB_PASSWORD_KEY)); + assertEquals(config.get(AUTH_SOURCE_CONFIGURATION_KEY).asText(), debeziumProperties.get(MONGODB_AUTHSOURCE_KEY)); + assertEquals(MONGODB_SSL_ENABLED_VALUE, debeziumProperties.get(MONGODB_SSL_ENABLED_KEY)); + assertEquals(debeziumPropertiesManager.createCollectionIncludeString(streams), debeziumProperties.get(COLLECTION_INCLUDE_LIST_KEY)); + assertEquals(DATABASE_NAME, debeziumProperties.get(DATABASE_INCLUDE_LIST_KEY)); + } + + @Test + void testDebeziumPropertiesQuotedConnectionString() { + final List streams = createStreams(4); + final AirbyteFileOffsetBackingStore offsetManager = mock(AirbyteFileOffsetBackingStore.class); + final ConfiguredAirbyteCatalog catalog = mock(ConfiguredAirbyteCatalog.class); + final JsonNode config = createConfiguration(Optional.of("username"), Optional.of("password"), Optional.of("admin")); + ((ObjectNode) config).put(CONNECTION_STRING_CONFIGURATION_KEY, "\"" + config.get(CONNECTION_STRING_CONFIGURATION_KEY) + "\""); + + when(catalog.getStreams()).thenReturn(streams); + + final Properties cdcProperties = new Properties(); + cdcProperties.put("test", "value"); + + final var debeziumPropertiesManager = new MongoDbDebeziumPropertiesManager(cdcProperties, config, catalog); + + final Properties debeziumProperties = debeziumPropertiesManager.getDebeziumProperties(offsetManager); + assertEquals(20 + cdcProperties.size(), debeziumProperties.size()); + assertEquals(MongoDbDebeziumPropertiesManager.normalizeName(DATABASE_NAME), debeziumProperties.get(NAME_KEY)); + assertEquals(MongoDbDebeziumPropertiesManager.normalizeName(DATABASE_NAME), debeziumProperties.get(TOPIC_PREFIX_KEY)); + assertEquals(EXPECTED_CONNECTION_STRING, debeziumProperties.get(MONGODB_CONNECTION_STRING_KEY)); + assertEquals(MONGODB_CONNECTION_MODE_VALUE, debeziumProperties.get(MONGODB_CONNECTION_MODE_KEY)); + assertEquals(config.get(USERNAME_CONFIGURATION_KEY).asText(), debeziumProperties.get(MONGODB_USER_KEY)); + assertEquals(config.get(PASSWORD_CONFIGURATION_KEY).asText(), debeziumProperties.get(MONGODB_PASSWORD_KEY)); + assertEquals(config.get(AUTH_SOURCE_CONFIGURATION_KEY).asText(), debeziumProperties.get(MONGODB_AUTHSOURCE_KEY)); + assertEquals(MONGODB_SSL_ENABLED_VALUE, debeziumProperties.get(MONGODB_SSL_ENABLED_KEY)); + assertEquals(debeziumPropertiesManager.createCollectionIncludeString(streams), debeziumProperties.get(COLLECTION_INCLUDE_LIST_KEY)); + assertEquals(DATABASE_NAME, debeziumProperties.get(DATABASE_INCLUDE_LIST_KEY)); + } + + @Test + void testDebeziumPropertiesNoCredentials() { + final List streams = createStreams(4); + final AirbyteFileOffsetBackingStore offsetManager = mock(AirbyteFileOffsetBackingStore.class); + final AirbyteSchemaHistoryStorage schemaHistoryManager = mock(AirbyteSchemaHistoryStorage.class); + final ConfiguredAirbyteCatalog catalog = mock(ConfiguredAirbyteCatalog.class); + final JsonNode config = createConfiguration(Optional.empty(), Optional.empty(), Optional.empty()); + + when(catalog.getStreams()).thenReturn(streams); + + final Properties cdcProperties = new Properties(); + cdcProperties.put("test", "value"); + + final var debeziumPropertiesManager = new MongoDbDebeziumPropertiesManager(cdcProperties, config, catalog); + + final Properties debeziumProperties = debeziumPropertiesManager.getDebeziumProperties(offsetManager); + assertEquals(17 + cdcProperties.size(), debeziumProperties.size()); + assertEquals(MongoDbDebeziumPropertiesManager.normalizeName(DATABASE_NAME), debeziumProperties.get(NAME_KEY)); + assertEquals(MongoDbDebeziumPropertiesManager.normalizeName(DATABASE_NAME), debeziumProperties.get(TOPIC_PREFIX_KEY)); + assertEquals(EXPECTED_CONNECTION_STRING, debeziumProperties.get(MONGODB_CONNECTION_STRING_KEY)); + assertEquals(MONGODB_CONNECTION_MODE_VALUE, debeziumProperties.get(MONGODB_CONNECTION_MODE_KEY)); + assertFalse(debeziumProperties.containsKey(MONGODB_USER_KEY)); + assertFalse(debeziumProperties.containsKey(MONGODB_PASSWORD_KEY)); + assertFalse(debeziumProperties.containsKey(MONGODB_AUTHSOURCE_KEY)); + assertEquals(MONGODB_SSL_ENABLED_VALUE, debeziumProperties.get(MONGODB_SSL_ENABLED_KEY)); + assertEquals(debeziumPropertiesManager.createCollectionIncludeString(streams), debeziumProperties.get(COLLECTION_INCLUDE_LIST_KEY)); + assertEquals(DATABASE_NAME, debeziumProperties.get(DATABASE_INCLUDE_LIST_KEY)); + } + + @Test + void testNormalizeName() { + final String nameWithUnderscore = "name_with_underscore"; + final String nameWithoutUnderscore = "nameWithout-Underscore"; + final String blankName = ""; + final String nullName = null; + + assertEquals("name-with-underscore", MongoDbDebeziumPropertiesManager.normalizeName(nameWithUnderscore)); + assertEquals(nameWithoutUnderscore, MongoDbDebeziumPropertiesManager.normalizeName(nameWithoutUnderscore)); + assertEquals(blankName, MongoDbDebeziumPropertiesManager.normalizeName(blankName)); + assertNull(MongoDbDebeziumPropertiesManager.normalizeName(nullName)); + + } + + @Test + void testCreateConnectionString() { + final JsonNode config = createConfiguration(Optional.of("username"), Optional.of("password"), Optional.of("admin")); + final String connectionString = MongoDbDebeziumPropertiesManager.buildConnectionString(config, false); + assertNotNull(connectionString); + assertEquals(EXPECTED_CONNECTION_STRING, connectionString); + } + + @Test + void testCreateConnectionStringQuotedString() { + final JsonNode config = createConfiguration(Optional.of("username"), Optional.of("password"), Optional.of("admin")); + final String connectionString = MongoDbDebeziumPropertiesManager.buildConnectionString(config, false); + ((ObjectNode) config).put(CONNECTION_STRING_CONFIGURATION_KEY, "\"" + config.get(CONNECTION_STRING_CONFIGURATION_KEY) + "\""); + assertNotNull(connectionString); + assertEquals(EXPECTED_CONNECTION_STRING, connectionString); + } + + @Test + void testCreateConnectionStringUseSecondary() { + final JsonNode config = createConfiguration(Optional.of("username"), Optional.of("password"), Optional.of("admin")); + final String connectionString = MongoDbDebeziumPropertiesManager.buildConnectionString(config, true); + assertNotNull(connectionString); + assertEquals("mongodb://localhost:27017/?retryWrites=false&provider=airbyte&readPreference=secondary", connectionString); + } + + @Test + void testCreateConnectionStringPlaceholderCredentials() { + final JsonNode config = createConfiguration(Optional.of("username"), Optional.of("password"), Optional.of("admin")); + ((ObjectNode) config).put(CONNECTION_STRING_CONFIGURATION_KEY, config.get(CONNECTION_STRING_CONFIGURATION_KEY).asText() + .replaceAll("mongodb://", "mongodb://" + CREDENTIALS_PLACEHOLDER)); + final String connectionString = MongoDbDebeziumPropertiesManager.buildConnectionString(config, false); + assertNotNull(connectionString); + assertEquals(EXPECTED_CONNECTION_STRING, connectionString); + } + + private JsonNode createConfiguration(final Optional username, final Optional password, final Optional authMode) { + final Map baseConfig = Map.of( + DATABASE_CONFIGURATION_KEY, DATABASE_NAME, + CONNECTION_STRING_CONFIGURATION_KEY, "mongodb://localhost:27017/"); + + final Map config = new HashMap<>(baseConfig); + authMode.ifPresent(a -> config.put(AUTH_SOURCE_CONFIGURATION_KEY, a)); + username.ifPresent(u -> config.put(USERNAME_CONFIGURATION_KEY, u)); + password.ifPresent(p -> config.put(PASSWORD_CONFIGURATION_KEY, p)); + return Jsons.deserialize(Jsons.serialize(config)); + } + + private List createStreams(final int numberOfStreams) { + final List streams = new ArrayList<>(); + for (int i = 0; i < numberOfStreams; i++) { + final AirbyteStream stream = new AirbyteStream().withNamespace(DATABASE_NAME).withName("collection" + i); + streams.add(new ConfiguredAirbyteStream().withStream(stream)); + } + return streams; + } + +} diff --git a/airbyte-integrations/connectors/source-mongodb-v2-plain/src/test/java/io/airbyte/integrations/source/mongodb/cdc/MongoDbDebeziumStateUtilTest.java b/airbyte-integrations/connectors/source-mongodb-v2-plain/src/test/java/io/airbyte/integrations/source/mongodb/cdc/MongoDbDebeziumStateUtilTest.java new file mode 100644 index 000000000000..c37e80ffa1e1 --- /dev/null +++ b/airbyte-integrations/connectors/source-mongodb-v2-plain/src/test/java/io/airbyte/integrations/source/mongodb/cdc/MongoDbDebeziumStateUtilTest.java @@ -0,0 +1,164 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.mongodb.cdc; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +import com.fasterxml.jackson.databind.JsonNode; +import com.mongodb.MongoCommandException; +import com.mongodb.ServerAddress; +import com.mongodb.client.ChangeStreamIterable; +import com.mongodb.client.MongoChangeStreamCursor; +import com.mongodb.client.MongoClient; +import com.mongodb.client.model.changestream.ChangeStreamDocument; +import com.mongodb.connection.ClusterDescription; +import com.mongodb.connection.ClusterType; +import com.mongodb.connection.ServerDescription; +import io.airbyte.commons.json.Jsons; +import io.airbyte.protocol.models.Field; +import io.airbyte.protocol.models.JsonSchemaType; +import io.airbyte.protocol.models.v0.AirbyteCatalog; +import io.airbyte.protocol.models.v0.CatalogHelpers; +import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog; +import io.airbyte.protocol.models.v0.SyncMode; +import io.debezium.connector.mongodb.ResumeTokens; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.Properties; +import org.bson.BsonDocument; +import org.bson.BsonTimestamp; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +class MongoDbDebeziumStateUtilTest { + + private static final String DATABASE = "test-database"; + private static final String REPLICA_SET = "test-replica-set"; + private static final String RESUME_TOKEN = "8264BEB9F3000000012B0229296E04"; + + private static final AirbyteCatalog CATALOG = new AirbyteCatalog().withStreams(List.of( + CatalogHelpers.createAirbyteStream( + "test-collection", + DATABASE, + Field.of("id", JsonSchemaType.INTEGER), + Field.of("string", JsonSchemaType.STRING)) + .withSupportedSyncModes(List.of(SyncMode.INCREMENTAL)) + .withSourceDefinedPrimaryKey(List.of(List.of("_id"))))); + protected static final ConfiguredAirbyteCatalog CONFIGURED_CATALOG = CatalogHelpers.toDefaultConfiguredCatalog(CATALOG); + + private MongoDbDebeziumStateUtil mongoDbDebeziumStateUtil; + + @BeforeEach + void setup() { + mongoDbDebeziumStateUtil = new MongoDbDebeziumStateUtil(); + } + + @Test + void testConstructInitialDebeziumState() { + final String database = DATABASE; + final String resumeToken = RESUME_TOKEN; + final BsonDocument resumeTokenDocument = ResumeTokens.fromData(resumeToken); + final ServerDescription serverDescription = mock(ServerDescription.class); + final ClusterDescription clusterDescription = mock(ClusterDescription.class); + final MongoClient mongoClient = mock(MongoClient.class); + final Properties baseProperties = new Properties(); + + final JsonNode config = Jsons.jsonNode(Map.of( + MongoDbDebeziumConstants.Configuration.CONNECTION_STRING_CONFIGURATION_KEY, "mongodb://host:12345/", + MongoDbDebeziumConstants.Configuration.DATABASE_CONFIGURATION_KEY, database)); + + when(serverDescription.getSetName()).thenReturn(REPLICA_SET); + when(clusterDescription.getServerDescriptions()).thenReturn(List.of(serverDescription)); + when(clusterDescription.getType()).thenReturn(ClusterType.REPLICA_SET); + when(mongoClient.getClusterDescription()).thenReturn(clusterDescription); + + final JsonNode initialState = mongoDbDebeziumStateUtil.constructInitialDebeziumState(resumeTokenDocument, mongoClient, database); + + assertNotNull(initialState); + assertEquals(1, initialState.size()); + final BsonTimestamp timestamp = ResumeTokens.getTimestamp(resumeTokenDocument); + final JsonNode offsetState = initialState.fields().next().getValue(); + assertEquals(resumeToken, Jsons.deserialize(offsetState.asText()).get(MongoDbDebeziumConstants.OffsetState.VALUE_RESUME_TOKEN).asText()); + assertEquals(timestamp.getTime(), Jsons.deserialize(offsetState.asText()).get(MongoDbDebeziumConstants.OffsetState.VALUE_SECONDS).asInt()); + assertEquals(timestamp.getInc(), Jsons.deserialize(offsetState.asText()).get(MongoDbDebeziumConstants.OffsetState.VALUE_INCREMENT).asInt()); + assertEquals("null", Jsons.deserialize(offsetState.asText()).get(MongoDbDebeziumConstants.OffsetState.VALUE_TRANSACTION_ID).asText()); + + final Optional parsedOffset = + mongoDbDebeziumStateUtil.savedOffset( + baseProperties, + CONFIGURED_CATALOG, + initialState, + config, + mongoClient); + assertTrue(parsedOffset.isPresent()); + assertEquals(resumeToken, parsedOffset.get().get("_data").asString().getValue()); + } + + @Test + void testConstructInitialDebeziumStateMissingReplicaSet() { + final BsonDocument resumeTokenDocument = ResumeTokens.fromData(RESUME_TOKEN); + final ServerDescription serverDescription = mock(ServerDescription.class); + final ClusterDescription clusterDescription = mock(ClusterDescription.class); + final MongoClient mongoClient = mock(MongoClient.class); + + when(clusterDescription.getServerDescriptions()).thenReturn(List.of(serverDescription)); + when(clusterDescription.getType()).thenReturn(ClusterType.REPLICA_SET); + when(mongoClient.getClusterDescription()).thenReturn(clusterDescription); + + assertThrows(IllegalStateException.class, + () -> mongoDbDebeziumStateUtil.constructInitialDebeziumState(resumeTokenDocument, mongoClient, DATABASE)); + } + + @Test + void testOffsetDataFormat() { + final JsonNode offsetState = MongoDbDebeziumStateUtil.formatState(DATABASE, REPLICA_SET, RESUME_TOKEN); + + assertNotNull(offsetState); + assertEquals("[\"" + DATABASE + "\",{\"" + MongoDbDebeziumConstants.OffsetState.KEY_REPLICA_SET + "\":\"" + REPLICA_SET + "\",\"" + + MongoDbDebeziumConstants.OffsetState.KEY_SERVER_ID + "\":\"" + DATABASE + "\"}]", offsetState.fieldNames().next()); + } + + @Test + void testIsResumeTokenValid() { + final BsonDocument resumeToken = ResumeTokens.fromData(RESUME_TOKEN); + + final ChangeStreamIterable changeStreamIterable = mock(ChangeStreamIterable.class); + final MongoChangeStreamCursor> mongoChangeStreamCursor = + mock(MongoChangeStreamCursor.class); + final MongoClient mongoClient = mock(MongoClient.class); + + when(mongoChangeStreamCursor.getResumeToken()).thenReturn(resumeToken); + when(changeStreamIterable.cursor()).thenReturn(mongoChangeStreamCursor); + when(changeStreamIterable.resumeAfter(resumeToken)).thenReturn(changeStreamIterable); + when(mongoClient.watch(BsonDocument.class)).thenReturn(changeStreamIterable); + + assertTrue(mongoDbDebeziumStateUtil.isValidResumeToken(resumeToken, mongoClient)); + } + + @Test + void testIsResumeTokenInvalid() { + final BsonDocument resumeToken = ResumeTokens.fromData(RESUME_TOKEN); + + final ChangeStreamIterable changeStreamIterable = mock(ChangeStreamIterable.class); + final MongoChangeStreamCursor> mongoChangeStreamCursor = + mock(MongoChangeStreamCursor.class); + final MongoClient mongoClient = mock(MongoClient.class); + + when(mongoChangeStreamCursor.getResumeToken()).thenReturn(resumeToken); + when(changeStreamIterable.cursor()).thenThrow(new MongoCommandException(new BsonDocument(), new ServerAddress())); + when(changeStreamIterable.resumeAfter(resumeToken)).thenReturn(changeStreamIterable); + when(mongoClient.watch(BsonDocument.class)).thenReturn(changeStreamIterable); + + assertFalse(mongoDbDebeziumStateUtil.isValidResumeToken(resumeToken, mongoClient)); + } + +} diff --git a/airbyte-integrations/connectors/source-mongodb-v2-plain/src/test/java/io/airbyte/integrations/source/mongodb/cdc/MongoDbResumeTokenHelperTest.java b/airbyte-integrations/connectors/source-mongodb-v2-plain/src/test/java/io/airbyte/integrations/source/mongodb/cdc/MongoDbResumeTokenHelperTest.java new file mode 100644 index 000000000000..39fd65567eac --- /dev/null +++ b/airbyte-integrations/connectors/source-mongodb-v2-plain/src/test/java/io/airbyte/integrations/source/mongodb/cdc/MongoDbResumeTokenHelperTest.java @@ -0,0 +1,97 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.mongodb.cdc; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.mockito.ArgumentMatchers.anyString; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +import com.fasterxml.jackson.databind.JsonNode; +import com.mongodb.client.ChangeStreamIterable; +import com.mongodb.client.MongoChangeStreamCursor; +import com.mongodb.client.MongoClient; +import com.mongodb.client.MongoDatabase; +import com.mongodb.client.model.Aggregates; +import com.mongodb.client.model.Filters; +import com.mongodb.client.model.changestream.ChangeStreamDocument; +import io.airbyte.commons.json.Jsons; +import io.airbyte.commons.resources.MoreResources; +import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog; +import io.debezium.connector.mongodb.ResumeTokens; +import java.io.IOException; +import java.util.Collections; +import java.util.List; +import java.util.concurrent.TimeUnit; +import org.bson.BsonDocument; +import org.bson.BsonTimestamp; +import org.bson.conversions.Bson; +import org.junit.jupiter.api.Test; + +class MongoDbResumeTokenHelperTest { + + private static final String DATABASE = "test-database"; + + @Test + void testRetrievingResumeToken() { + final String resumeToken = "8264BEB9F3000000012B0229296E04"; + final BsonDocument resumeTokenDocument = ResumeTokens.fromData(resumeToken); + final ChangeStreamIterable changeStreamIterable = mock(ChangeStreamIterable.class); + final MongoDatabase mongoDatabase = mock(MongoDatabase.class); + final MongoChangeStreamCursor> mongoChangeStreamCursor = + mock(MongoChangeStreamCursor.class); + final MongoClient mongoClient = mock(MongoClient.class); + + when(mongoChangeStreamCursor.getResumeToken()).thenReturn(resumeTokenDocument); + when(changeStreamIterable.cursor()).thenReturn(mongoChangeStreamCursor); + when(mongoClient.getDatabase(anyString())).thenReturn(mongoDatabase); + final List pipeline = Collections.singletonList(Aggregates.match( + Filters.in("ns.coll", Collections.emptyList()))); + when(mongoDatabase.watch(pipeline, BsonDocument.class)).thenReturn(changeStreamIterable); + + final BsonDocument actualResumeToken = MongoDbResumeTokenHelper.getMostRecentResumeToken(mongoClient, DATABASE, new ConfiguredAirbyteCatalog()); + assertEquals(resumeTokenDocument, actualResumeToken); + } + + @Test + void testTimestampExtractionFromEvent() throws IOException { + final int timestampSec = Long.valueOf(TimeUnit.MILLISECONDS.toSeconds(1692651270000L)).intValue(); + final BsonTimestamp expectedTimestamp = new BsonTimestamp(timestampSec, 2); + final String changeEventJson = MoreResources.readResource("mongodb/change_event.json"); + final JsonNode changeEvent = Jsons.deserialize(changeEventJson); + final BsonTimestamp timestamp = MongoDbResumeTokenHelper.extractTimestampFromEvent(changeEvent); + assertNotNull(timestamp); + assertEquals(expectedTimestamp, timestamp); + } + + @Test + void testTimestampExtractionFromEventSource() throws IOException { + final int timestampSec = Long.valueOf(TimeUnit.MILLISECONDS.toSeconds(1692651270000L)).intValue(); + final BsonTimestamp expectedTimestamp = new BsonTimestamp(timestampSec, 2); + final String changeEventJson = MoreResources.readResource("mongodb/change_event.json"); + final JsonNode changeEvent = Jsons.deserialize(changeEventJson); + + final BsonTimestamp timestamp = MongoDbResumeTokenHelper + .extractTimestampFromSource(changeEvent.get(MongoDbDebeziumConstants.ChangeEvent.SOURCE)); + assertNotNull(timestamp); + assertEquals(expectedTimestamp, timestamp); + } + + @Test + void testTimestampExtractionFromEventSourceNotPresent() { + final JsonNode changeEvent = Jsons.deserialize("{}"); + assertThrows(IllegalStateException.class, () -> MongoDbResumeTokenHelper.extractTimestampFromEvent(changeEvent)); + assertThrows(IllegalStateException.class, () -> MongoDbResumeTokenHelper.extractTimestampFromSource(changeEvent)); + } + + @Test + void testTimestampExtractionTimestampNotPresent() { + final JsonNode changeEvent = Jsons.deserialize("{\"source\":{}}"); + assertThrows(IllegalStateException.class, () -> MongoDbResumeTokenHelper.extractTimestampFromEvent(changeEvent)); + } + +} diff --git a/airbyte-integrations/connectors/source-mongodb-v2-plain/src/test/java/io/airbyte/integrations/source/mongodb/state/MongoDbStateManagerTest.java b/airbyte-integrations/connectors/source-mongodb-v2-plain/src/test/java/io/airbyte/integrations/source/mongodb/state/MongoDbStateManagerTest.java index b9d83ace1c47..30003b914a5a 100644 --- a/airbyte-integrations/connectors/source-mongodb-v2-plain/src/test/java/io/airbyte/integrations/source/mongodb/state/MongoDbStateManagerTest.java +++ b/airbyte-integrations/connectors/source-mongodb-v2-plain/src/test/java/io/airbyte/integrations/source/mongodb/state/MongoDbStateManagerTest.java @@ -4,9 +4,9 @@ package io.airbyte.integrations.source.mongodb.state; -import static io.airbyte.cdk.integrations.debezium.internals.mongodb.MongoDbDebeziumConstants.ChangeEvent.SOURCE_ORDER; -import static io.airbyte.cdk.integrations.debezium.internals.mongodb.MongoDbDebeziumConstants.ChangeEvent.SOURCE_RESUME_TOKEN; -import static io.airbyte.cdk.integrations.debezium.internals.mongodb.MongoDbDebeziumConstants.ChangeEvent.SOURCE_SECONDS; +import static io.airbyte.integrations.source.mongodb.cdc.MongoDbDebeziumConstants.ChangeEvent.SOURCE_ORDER; +import static io.airbyte.integrations.source.mongodb.cdc.MongoDbDebeziumConstants.ChangeEvent.SOURCE_RESUME_TOKEN; +import static io.airbyte.integrations.source.mongodb.cdc.MongoDbDebeziumConstants.ChangeEvent.SOURCE_SECONDS; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.junit.jupiter.api.Assertions.assertNull; diff --git a/airbyte-integrations/connectors/source-mongodb-v2-plain/src/test/resources/delete_change_event.json b/airbyte-integrations/connectors/source-mongodb-v2-plain/src/test/resources/delete_change_event.json new file mode 100644 index 000000000000..07b575bf7e2c --- /dev/null +++ b/airbyte-integrations/connectors/source-mongodb-v2-plain/src/test/resources/delete_change_event.json @@ -0,0 +1,25 @@ +{ + "before": { + "first_name": "san", + "last_name": "goku", + "power": null + }, + "after": null, + "source": { + "version": "1.4.2.Final", + "connector": "postgresql", + "name": "orders", + "ts_ms": 1616775646886, + "snapshot": false, + "db": "db_lwfoyffqvx", + "schema": "public", + "table": "names", + "txId": 498, + "lsn": 23012360, + "xmin": null + }, + "op": "d", + "ts_ms": 1616775646931, + "transaction": null, + "destination": "orders.public.names" +} diff --git a/airbyte-integrations/connectors/source-mongodb-v2-plain/src/test/resources/delete_message.json b/airbyte-integrations/connectors/source-mongodb-v2-plain/src/test/resources/delete_message.json new file mode 100644 index 000000000000..676ee5b74ffe --- /dev/null +++ b/airbyte-integrations/connectors/source-mongodb-v2-plain/src/test/resources/delete_message.json @@ -0,0 +1,8 @@ +{ + "first_name": "san", + "last_name": "goku", + "power": null, + "_ab_cdc_updated_at": "2021-03-26T16:20:46.886Z", + "_ab_cdc_lsn": 23012360, + "_ab_cdc_deleted_at": "2021-03-26T16:20:46.886Z" +} diff --git a/airbyte-integrations/connectors/source-mongodb-v2-plain/src/test/resources/insert_change_event.json b/airbyte-integrations/connectors/source-mongodb-v2-plain/src/test/resources/insert_change_event.json new file mode 100644 index 000000000000..4b2c2fb6e2cf --- /dev/null +++ b/airbyte-integrations/connectors/source-mongodb-v2-plain/src/test/resources/insert_change_event.json @@ -0,0 +1,25 @@ +{ + "before": null, + "after": { + "first_name": "san", + "last_name": "goku", + "power": "Infinity" + }, + "source": { + "version": "1.4.2.Final", + "connector": "postgresql", + "name": "orders", + "ts_ms": 1616775642623, + "snapshot": true, + "db": "db_lwfoyffqvx", + "schema": "public", + "table": "names", + "txId": 495, + "lsn": 23011544, + "xmin": null + }, + "op": "r", + "ts_ms": 1616775642624, + "transaction": null, + "destination": "orders.public.names" +} diff --git a/airbyte-integrations/connectors/source-mongodb-v2-plain/src/test/resources/insert_message.json b/airbyte-integrations/connectors/source-mongodb-v2-plain/src/test/resources/insert_message.json new file mode 100644 index 000000000000..d971d32c1766 --- /dev/null +++ b/airbyte-integrations/connectors/source-mongodb-v2-plain/src/test/resources/insert_message.json @@ -0,0 +1,8 @@ +{ + "first_name": "san", + "last_name": "goku", + "power": "Infinity", + "_ab_cdc_updated_at": "2021-03-26T16:20:42.623Z", + "_ab_cdc_lsn": 23011544, + "_ab_cdc_deleted_at": null +} diff --git a/airbyte-integrations/connectors/source-mongodb-v2-plain/src/test/resources/mongodb/change_event.json b/airbyte-integrations/connectors/source-mongodb-v2-plain/src/test/resources/mongodb/change_event.json new file mode 100644 index 000000000000..9c4470daed49 --- /dev/null +++ b/airbyte-integrations/connectors/source-mongodb-v2-plain/src/test/resources/mongodb/change_event.json @@ -0,0 +1,23 @@ +{ + "before": null, + "after": null, + "updateDescription": null, + "source": { + "version": "2.2.0.Final", + "connector": "mongodb", + "name": "test_db", + "ts_ms": 1692651270000, + "snapshot": "false", + "db": "test_db", + "sequence": null, + "rs": "atlas-abcdef-shard-0", + "collection": "test_collection", + "ord": 2, + "lsid": null, + "txnNumber": null, + "wallTime": null + }, + "op": "r", + "ts_ms": 1692651277722, + "transaction": null +} diff --git a/airbyte-integrations/connectors/source-mongodb-v2-plain/src/test/resources/mongodb/change_event_delete.json b/airbyte-integrations/connectors/source-mongodb-v2-plain/src/test/resources/mongodb/change_event_delete.json new file mode 100644 index 000000000000..89466aa511d8 --- /dev/null +++ b/airbyte-integrations/connectors/source-mongodb-v2-plain/src/test/resources/mongodb/change_event_delete.json @@ -0,0 +1,23 @@ +{ + "before": "{\"_id\": {\"$oid\": \"64f24244f95155351c4185b1\"},\"name\": \"Document 0\",\"description\": \"This is document #0\",\"doubleField\": 0.0,\"intField\": 0, \"data\": \"some data\",\"objectField\": {\"key\": \"value\"},\"timestamp\": {\"$timestamp\": {\"t\": 394,\"i\": 1381162128}}}\"", + "after": null, + "source": { + "version": "2.2.0.Final", + "connector": "mongodb", + "name": "public", + "ts_ms": 1693598277000, + "snapshot": "false", + "db": "public", + "sequence": null, + "rs": "replica-set", + "collection": "names", + "ord": 1, + "lsid": null, + "txnNumber": null, + "wallTime": null + }, + "op": "d", + "ts_ms": 1693599528047, + "transaction": null, + "updateDescription": null +} diff --git a/airbyte-integrations/connectors/source-mongodb-v2-plain/src/test/resources/mongodb/change_event_delete_no_before.json b/airbyte-integrations/connectors/source-mongodb-v2-plain/src/test/resources/mongodb/change_event_delete_no_before.json new file mode 100644 index 000000000000..b81171e6d3a5 --- /dev/null +++ b/airbyte-integrations/connectors/source-mongodb-v2-plain/src/test/resources/mongodb/change_event_delete_no_before.json @@ -0,0 +1,23 @@ +{ + "before": null, + "after": null, + "source": { + "version": "2.2.0.Final", + "connector": "mongodb", + "name": "public", + "ts_ms": 1693598277000, + "snapshot": "false", + "db": "public", + "sequence": null, + "rs": "replica-set", + "collection": "names", + "ord": 1, + "lsid": null, + "txnNumber": null, + "wallTime": null + }, + "op": "d", + "ts_ms": 1693599528047, + "transaction": null, + "updateDescription": null +} diff --git a/airbyte-integrations/connectors/source-mongodb-v2-plain/src/test/resources/mongodb/change_event_insert.json b/airbyte-integrations/connectors/source-mongodb-v2-plain/src/test/resources/mongodb/change_event_insert.json new file mode 100644 index 000000000000..c56115e7a381 --- /dev/null +++ b/airbyte-integrations/connectors/source-mongodb-v2-plain/src/test/resources/mongodb/change_event_insert.json @@ -0,0 +1,23 @@ +{ + "before": null, + "after": "{\"_id\": {\"$oid\": \"64f24244f95155351c4185b1\"},\"name\": \"Document 0\",\"description\": \"This is document #0\",\"doubleField\": 0.0,\"intField\": 0, \"data\": \"some data\",\"objectField\": {\"key\": \"value\"},\"timestamp\": {\"$timestamp\": {\"t\": 394,\"i\": 1381162128}}}\"", + "source": { + "version": "2.2.0.Final", + "connector": "mongodb", + "name": "public", + "ts_ms": 1693598277000, + "snapshot": "false", + "db": "public", + "sequence": null, + "rs": "replica-set", + "collection": "names", + "ord": 1, + "lsid": null, + "txnNumber": null, + "wallTime": null + }, + "op": "c", + "ts_ms": 1693599528047, + "transaction": null, + "updateDescription": null +} diff --git a/airbyte-integrations/connectors/source-mongodb-v2-plain/src/test/resources/mongodb/change_event_snapshot.json b/airbyte-integrations/connectors/source-mongodb-v2-plain/src/test/resources/mongodb/change_event_snapshot.json new file mode 100644 index 000000000000..f97acf330542 --- /dev/null +++ b/airbyte-integrations/connectors/source-mongodb-v2-plain/src/test/resources/mongodb/change_event_snapshot.json @@ -0,0 +1,23 @@ +{ + "before": null, + "after": null, + "updateDescription": null, + "source": { + "version": "2.2.0.Final", + "connector": "mongodb", + "name": "test_db", + "ts_ms": 1692651270000, + "snapshot": "true", + "db": "test_db", + "sequence": null, + "rs": "atlas-abcdef-shard-0", + "collection": "test_collection", + "ord": 2, + "lsid": null, + "txnNumber": null, + "wallTime": null + }, + "op": "r", + "ts_ms": 1692651277722, + "transaction": null +} diff --git a/airbyte-integrations/connectors/source-mongodb-v2-plain/src/test/resources/mongodb/change_event_snapshot_last.json b/airbyte-integrations/connectors/source-mongodb-v2-plain/src/test/resources/mongodb/change_event_snapshot_last.json new file mode 100644 index 000000000000..40419efb485b --- /dev/null +++ b/airbyte-integrations/connectors/source-mongodb-v2-plain/src/test/resources/mongodb/change_event_snapshot_last.json @@ -0,0 +1,23 @@ +{ + "before": null, + "after": null, + "updateDescription": null, + "source": { + "version": "2.2.0.Final", + "connector": "mongodb", + "name": "test_db", + "ts_ms": 1692651270000, + "snapshot": "last", + "db": "test_db", + "sequence": null, + "rs": "atlas-abcdef-shard-0", + "collection": "test_collection", + "ord": 2, + "lsid": null, + "txnNumber": null, + "wallTime": null + }, + "op": "r", + "ts_ms": 1692651277722, + "transaction": null +} diff --git a/airbyte-integrations/connectors/source-mongodb-v2-plain/src/test/resources/mongodb/change_event_unsupported.json b/airbyte-integrations/connectors/source-mongodb-v2-plain/src/test/resources/mongodb/change_event_unsupported.json new file mode 100644 index 000000000000..037729999a8f --- /dev/null +++ b/airbyte-integrations/connectors/source-mongodb-v2-plain/src/test/resources/mongodb/change_event_unsupported.json @@ -0,0 +1,23 @@ +{ + "before": null, + "after": null, + "source": { + "version": "2.2.0.Final", + "connector": "mongodb", + "name": "public", + "ts_ms": 1693598277000, + "snapshot": "false", + "db": "public", + "sequence": null, + "rs": "replica-set", + "collection": "names", + "ord": 1, + "lsid": null, + "txnNumber": null, + "wallTime": null + }, + "op": "t", + "ts_ms": 1693599528047, + "transaction": null, + "updateDescription": null +} diff --git a/airbyte-integrations/connectors/source-mongodb-v2-plain/src/test/resources/mongodb/change_event_update.json b/airbyte-integrations/connectors/source-mongodb-v2-plain/src/test/resources/mongodb/change_event_update.json new file mode 100644 index 000000000000..1f132a350759 --- /dev/null +++ b/airbyte-integrations/connectors/source-mongodb-v2-plain/src/test/resources/mongodb/change_event_update.json @@ -0,0 +1,23 @@ +{ + "before": "{\"_id\": {\"$oid\": \"64f24244f95155351c4185b1\"},\"name\": \"Document 1\",\"description\": \"This is document #1\",\"doubleField\": 1.0,\"intField\": 1,\"objectField\": {\"key\": \"value\"},\"timestamp\": {\"$timestamp\": {\"t\": 394,\"i\": 1381162128}}}\"", + "after": "{\"_id\": {\"$oid\": \"64f24244f95155351c4185b1\"},\"name\": \"Document 0\",\"description\": \"This is document #0\",\"doubleField\": 0.0,\"intField\": 0, \"data\": \"some data\",\"objectField\": {\"key\": \"value\"},\"timestamp\": {\"$timestamp\": {\"t\": 394,\"i\": 1381162128}}}\"", + "source": { + "version": "2.2.0.Final", + "connector": "mongodb", + "name": "public", + "ts_ms": 1693598277000, + "snapshot": "false", + "db": "public", + "sequence": null, + "rs": "replica-set", + "collection": "names", + "ord": 1, + "lsid": null, + "txnNumber": null, + "wallTime": null + }, + "op": "c", + "ts_ms": 1693599528047, + "transaction": null, + "updateDescription": null +} diff --git a/airbyte-integrations/connectors/source-mongodb-v2-plain/src/test/resources/mongodb/delete_airbyte_message.json b/airbyte-integrations/connectors/source-mongodb-v2-plain/src/test/resources/mongodb/delete_airbyte_message.json new file mode 100644 index 000000000000..9ec95bdaab8e --- /dev/null +++ b/airbyte-integrations/connectors/source-mongodb-v2-plain/src/test/resources/mongodb/delete_airbyte_message.json @@ -0,0 +1,13 @@ +{ + "_id": "64f24244f95155351c4185b1", + "name": "Document 0", + "description": "This is document #0", + "doubleField": 0.0, + "intField": 0, + "objectField": { + "key": "value" + }, + "timestamp": "2023-09-01T19:57:56.752Z", + "_ab_cdc_updated_at": "2023-09-01T19:57:57Z", + "_ab_cdc_deleted_at": "2023-09-01T19:57:57Z" +} diff --git a/airbyte-integrations/connectors/source-mongodb-v2-plain/src/test/resources/mongodb/delete_airbyte_message_no_schema.json b/airbyte-integrations/connectors/source-mongodb-v2-plain/src/test/resources/mongodb/delete_airbyte_message_no_schema.json new file mode 100644 index 000000000000..c3bc65ae2dd0 --- /dev/null +++ b/airbyte-integrations/connectors/source-mongodb-v2-plain/src/test/resources/mongodb/delete_airbyte_message_no_schema.json @@ -0,0 +1,17 @@ +{ + "_id": "64f24244f95155351c4185b1", + "data": { + "_id": "64f24244f95155351c4185b1", + "name": "Document 0", + "description": "This is document #0", + "doubleField": 0.0, + "intField": 0, + "objectField": { + "key": "value" + }, + "timestamp": "2023-09-01T19:57:56.752Z", + "data": "some data" + }, + "_ab_cdc_updated_at": "2023-09-01T19:57:57Z", + "_ab_cdc_deleted_at": "2023-09-01T19:57:57Z" +} diff --git a/airbyte-integrations/connectors/source-mongodb-v2-plain/src/test/resources/mongodb/delete_no_before_airbyte_message.json b/airbyte-integrations/connectors/source-mongodb-v2-plain/src/test/resources/mongodb/delete_no_before_airbyte_message.json new file mode 100644 index 000000000000..a0e40cff602f --- /dev/null +++ b/airbyte-integrations/connectors/source-mongodb-v2-plain/src/test/resources/mongodb/delete_no_before_airbyte_message.json @@ -0,0 +1,5 @@ +{ + "_id": "64f24244f95155351c4185b1", + "_ab_cdc_updated_at": "2023-09-01T19:57:57Z", + "_ab_cdc_deleted_at": "2023-09-01T19:57:57Z" +} diff --git a/airbyte-integrations/connectors/source-mongodb-v2-plain/src/test/resources/mongodb/delete_no_before_airbyte_message_no_schema.json b/airbyte-integrations/connectors/source-mongodb-v2-plain/src/test/resources/mongodb/delete_no_before_airbyte_message_no_schema.json new file mode 100644 index 000000000000..89fe4fd4e9ed --- /dev/null +++ b/airbyte-integrations/connectors/source-mongodb-v2-plain/src/test/resources/mongodb/delete_no_before_airbyte_message_no_schema.json @@ -0,0 +1,8 @@ +{ + "_id": "64f24244f95155351c4185b1", + "data": { + "_id": "64f24244f95155351c4185b1" + }, + "_ab_cdc_updated_at": "2023-09-01T19:57:57Z", + "_ab_cdc_deleted_at": "2023-09-01T19:57:57Z" +} diff --git a/airbyte-integrations/connectors/source-mongodb-v2-plain/src/test/resources/mongodb/insert_airbyte_message.json b/airbyte-integrations/connectors/source-mongodb-v2-plain/src/test/resources/mongodb/insert_airbyte_message.json new file mode 100644 index 000000000000..bf62ce6116da --- /dev/null +++ b/airbyte-integrations/connectors/source-mongodb-v2-plain/src/test/resources/mongodb/insert_airbyte_message.json @@ -0,0 +1,13 @@ +{ + "_id": "64f24244f95155351c4185b1", + "name": "Document 0", + "description": "This is document #0", + "doubleField": 0.0, + "intField": 0, + "objectField": { + "key": "value" + }, + "timestamp": "2023-09-01T19:57:56.752Z", + "_ab_cdc_updated_at": "2023-09-01T19:57:57Z", + "_ab_cdc_deleted_at": null +} diff --git a/airbyte-integrations/connectors/source-mongodb-v2-plain/src/test/resources/mongodb/insert_airbyte_message_no_schema.json b/airbyte-integrations/connectors/source-mongodb-v2-plain/src/test/resources/mongodb/insert_airbyte_message_no_schema.json new file mode 100644 index 000000000000..4242b47f8c55 --- /dev/null +++ b/airbyte-integrations/connectors/source-mongodb-v2-plain/src/test/resources/mongodb/insert_airbyte_message_no_schema.json @@ -0,0 +1,17 @@ +{ + "_id": "64f24244f95155351c4185b1", + "data": { + "_id": "64f24244f95155351c4185b1", + "name": "Document 0", + "description": "This is document #0", + "doubleField": 0.0, + "intField": 0, + "objectField": { + "key": "value" + }, + "timestamp": "2023-09-01T19:57:56.752Z", + "data": "some data" + }, + "_ab_cdc_updated_at": "2023-09-01T19:57:57Z", + "_ab_cdc_deleted_at": null +} diff --git a/airbyte-integrations/connectors/source-mongodb-v2-plain/src/test/resources/mongodb/update_airbyte_message.json b/airbyte-integrations/connectors/source-mongodb-v2-plain/src/test/resources/mongodb/update_airbyte_message.json new file mode 100644 index 000000000000..bf62ce6116da --- /dev/null +++ b/airbyte-integrations/connectors/source-mongodb-v2-plain/src/test/resources/mongodb/update_airbyte_message.json @@ -0,0 +1,13 @@ +{ + "_id": "64f24244f95155351c4185b1", + "name": "Document 0", + "description": "This is document #0", + "doubleField": 0.0, + "intField": 0, + "objectField": { + "key": "value" + }, + "timestamp": "2023-09-01T19:57:56.752Z", + "_ab_cdc_updated_at": "2023-09-01T19:57:57Z", + "_ab_cdc_deleted_at": null +} diff --git a/airbyte-integrations/connectors/source-mongodb-v2-plain/src/test/resources/mongodb/update_airbyte_message_no_schema.json b/airbyte-integrations/connectors/source-mongodb-v2-plain/src/test/resources/mongodb/update_airbyte_message_no_schema.json new file mode 100644 index 000000000000..4242b47f8c55 --- /dev/null +++ b/airbyte-integrations/connectors/source-mongodb-v2-plain/src/test/resources/mongodb/update_airbyte_message_no_schema.json @@ -0,0 +1,17 @@ +{ + "_id": "64f24244f95155351c4185b1", + "data": { + "_id": "64f24244f95155351c4185b1", + "name": "Document 0", + "description": "This is document #0", + "doubleField": 0.0, + "intField": 0, + "objectField": { + "key": "value" + }, + "timestamp": "2023-09-01T19:57:56.752Z", + "data": "some data" + }, + "_ab_cdc_updated_at": "2023-09-01T19:57:57Z", + "_ab_cdc_deleted_at": null +} diff --git a/airbyte-integrations/connectors/source-mongodb-v2-plain/src/test/resources/update_change_event.json b/airbyte-integrations/connectors/source-mongodb-v2-plain/src/test/resources/update_change_event.json new file mode 100644 index 000000000000..da5dcd9c2b06 --- /dev/null +++ b/airbyte-integrations/connectors/source-mongodb-v2-plain/src/test/resources/update_change_event.json @@ -0,0 +1,25 @@ +{ + "before": null, + "after": { + "first_name": "san", + "last_name": "goku", + "power": 10000.2 + }, + "source": { + "version": "1.4.2.Final", + "connector": "postgresql", + "name": "orders", + "ts_ms": 1616775646881, + "snapshot": false, + "db": "db_lwfoyffqvx", + "schema": "public", + "table": "names", + "txId": 497, + "lsn": 23012216, + "xmin": null + }, + "op": "u", + "ts_ms": 1616775646929, + "transaction": null, + "destination": "orders.public.names" +} diff --git a/airbyte-integrations/connectors/source-mongodb-v2-plain/src/test/resources/update_message.json b/airbyte-integrations/connectors/source-mongodb-v2-plain/src/test/resources/update_message.json new file mode 100644 index 000000000000..89b9a08038aa --- /dev/null +++ b/airbyte-integrations/connectors/source-mongodb-v2-plain/src/test/resources/update_message.json @@ -0,0 +1,8 @@ +{ + "first_name": "san", + "last_name": "goku", + "power": 10000.2, + "_ab_cdc_updated_at": "2021-03-26T16:20:46.881Z", + "_ab_cdc_lsn": 23012216, + "_ab_cdc_deleted_at": null +}