diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 05a744e6..84436588 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -36,7 +36,7 @@ jobs:
strategy:
fail-fast: false
matrix:
- ruby: [ '2.7', '3.0', '3.1', '3.2', '3.3' ]
+ ruby: [ '3.0', '3.1', '3.2', '3.3', '3.4' ]
steps:
- uses: actions/checkout@v3
diff --git a/.rubocop_todo.yml b/.rubocop_todo.yml
index 0adce443..621b68fb 100644
--- a/.rubocop_todo.yml
+++ b/.rubocop_todo.yml
@@ -146,7 +146,6 @@ Lint/UselessAssignment:
Metrics/AbcSize:
Exclude:
- 'lib/deimos/active_record_consume/message_consumption.rb'
- - 'lib/deimos/config/phobos_config.rb'
- 'lib/deimos/instrumentation.rb'
- 'lib/deimos/kafka_source.rb'
- 'lib/deimos/kafka_topic_info.rb'
@@ -159,12 +158,6 @@ Metrics/AbcSize:
- 'lib/deimos/utils/schema_controller_mixin.rb'
- 'lib/generators/deimos/schema_class_generator.rb'
-# Offense count: 1
-# Configuration parameters: CountComments, Max, CountAsOne, ExcludedMethods.
-Metrics/MethodLength:
- Exclude:
- - 'lib/deimos/config/phobos_config.rb'
-
# Offense count: 5
# Configuration parameters: CountComments, Max, CountAsOne.
Metrics/ModuleLength:
@@ -179,7 +172,6 @@ Metrics/ModuleLength:
# Configuration parameters: IgnoredMethods, Max.
Metrics/PerceivedComplexity:
Exclude:
- - 'lib/deimos/config/phobos_config.rb'
- 'lib/deimos/consume/batch_consumption.rb'
- 'lib/deimos/kafka_source.rb'
- 'lib/deimos/schema_backends/avro_schema_coercer.rb'
@@ -253,7 +245,6 @@ Style/FrozenStringLiteralComment:
Style/GlobalStdStream:
Exclude:
- 'lib/deimos/config/configuration.rb'
- - 'lib/deimos/config/phobos_config.rb'
- 'lib/deimos/metrics/mock.rb'
- 'lib/deimos/test_helpers.rb'
- 'lib/deimos/tracing/mock.rb'
@@ -329,14 +320,6 @@ Style/StringLiterals:
- 'spec/schemas/my_namespace/my_schema_with_complex_type.rb'
- 'spec/spec_helper.rb'
-# Offense count: 1
-# Cop supports --auto-correct.
-# Configuration parameters: EnforcedStyle, AllowSafeAssignment.
-# SupportedStyles: require_parentheses, require_no_parentheses, require_parentheses_when_complex
-Style/TernaryParentheses:
- Exclude:
- - 'lib/deimos/config/phobos_config.rb'
-
# Offense count: 21
# Cop supports --auto-correct.
Style/TrailingBodyOnModule:
diff --git a/.tool-versions b/.tool-versions
new file mode 100644
index 00000000..f2a971aa
--- /dev/null
+++ b/.tool-versions
@@ -0,0 +1 @@
+ruby 3.2.2
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 774140e1..746bf00a 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -6,7 +6,9 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
## UNRELEASED
-- Feature: Added `max_batch_size` config to producer to allow custom batch size for publishing.
+
+## 2.0.0 - 2025-03-03
+- ***Full rewrite of Deimos to work with Karafka.*** Please see [Upgrading](./docs/UPGRADING.md) for full information.
# 1.24.3 - 2024-05-13
- Feature: Enable `producers.persistent_connections` phobos setting
diff --git a/Gemfile b/Gemfile
index 1cbb3cd6..d7355089 100644
--- a/Gemfile
+++ b/Gemfile
@@ -4,3 +4,9 @@ source 'https://rubygems.org'
# Specify your gem's dependencies in boilerplate.gemspec
gemspec
+
+# for older activesupport
+gem 'mutex_m'
+gem 'bigdecimal'
+gem 'benchmark'
+gem 'drb'
diff --git a/README.md b/README.md
index f44c7c2a..eb3b61c9 100644
--- a/README.md
+++ b/README.md
@@ -8,7 +8,10 @@
A Ruby framework for marrying Kafka, a schema definition like Avro, and/or ActiveRecord and provide
a useful toolbox of goodies for Ruby-based Kafka development.
-Built on Phobos and hence Ruby-Kafka.
+Built on [Karafka](https://karafka.io/).
+
+[!IMPORTANT]
+Deimos 2.x is a major rewrite from 1.x. Please see the [Upgrading Guide](./docs/UPGRADING.md) for information on the changes and how to upgrade.
* [Additional Documentation](#additional-documentation)
@@ -23,15 +26,15 @@ Built on Phobos and hence Ruby-Kafka.
* [Kafka Message Keys](#kafka-message-keys)
* [Consumers](#consumers)
* [Rails Integration](#rails-integration)
- * [Controller Mixin](#controller-mixin)
- * [Database Backend](#database-backend)
+ * [Producing](#rails-producing)
+ * [Consuming](#rails-consuming)
+ * [Generating Tables and Models](#generating-tables-and-models)
+ * [Outbox Backend](#outbox-backend)
* [Database Poller](#database-poller)
* [Running Consumers](#running-consumers)
* [Generated Schema Classes](#generated-schema-classes)
* [Metrics](#metrics)
* [Testing](#testing)
- * [Test Helpers](#test-helpers)
- * [Integration Test Helpers](#integration-test-helpers)
* [Utilities](#utilities)
* [Contributing](#contributing)
@@ -70,7 +73,7 @@ are for bugfixes or new functionality which does not affect existing code. You
should be locking your Gemfile to the minor version:
```ruby
-gem 'deimos-ruby', '~> 1.1'
+gem 'deimos-ruby', '~> 1.1.0'
```
# Configuration
@@ -100,7 +103,15 @@ To create a new schema backend, please see the existing examples [here](lib/deim
# Producers
-Producers will look like this:
+With the correct [configuration](./docs/CONFIGURATION.md), you do not need to use a Deimos producer class in order to send schema-encoded messages to Kafka. You can simply use `Karafka.producer.produce()` (see [here](https://karafka.io/docs/Producing-messages/)). There are a few features that Deimos producers provide:
+
+* Using an instance method to determine partition key based on the provided payload
+* Allowing global disabling of producers (or a particular producer class)
+* Usage of the [Outbox](#outbox) producer backend.
+
+Producer classes in general are a handy way to coerce some object into a hash or [schema class](#generated-schema-classes) that represents the payload.
+
+A Deimos producer could look like this:
```ruby
class MyProducer < Deimos::Producer
@@ -113,27 +124,22 @@ class MyProducer < Deimos::Producer
payload[:my_id]
end
- # You can call publish / publish_list directly, or create new methods
- # wrapping them.
+ # You can call produce directly, or create new methods wrapping it.
def send_some_message(an_object)
payload = {
'some-key' => an_object.foo,
'some-key2' => an_object.bar
}
- # You can also publish an array with self.publish_list(payloads)
- # You may specify the topic here with self.publish(payload, topic: 'my-topic')
- # You may also specify the headers here with self.publish(payload, headers: { 'foo' => 'bar' })
- self.publish(payload)
+ self.produce([{payload: payload}])
+ # additional keys can be added - see https://karafka.io/docs/WaterDrop-Usage/
+ self.produce([{payload: payload, topic: "other-topic", key: "some-key", partition_key: "some-key2"}])
end
-
end
-
-
end
```
-### Auto-added Fields
+## Auto-added Fields
If your schema has a field called `message_id`, and the payload you give
your producer doesn't have this set, Deimos will auto-generate
@@ -143,7 +149,7 @@ so that you can track each sent message via logging.
You can also provide a field in your schema called `timestamp` which will be
auto-filled with the current timestamp if not provided.
-### Coerced Values
+## Coerced Values
Deimos will do some simple coercions if you pass values that don't
exactly match the schema.
@@ -155,60 +161,28 @@ representing a number, will be parsed to Float.
* If the schema is :string, if the value implements its own `to_s` method,
this will be called on it. This includes hashes, symbols, numbers, dates, etc.
-### Instrumentation
-
-Deimos will send ActiveSupport Notifications.
-You can listen to these notifications e.g. as follows:
+## Disabling Producers
+You can disable producers globally or inside a block. Globally:
```ruby
- Deimos.subscribe('produce') do |event|
- # event is an ActiveSupport::Notifications::Event
- # you can access time, duration, and transaction_id
- # payload contains :producer, :topic, and :payloads
- data = event.payload
- end
-```
+Deimos.config.producers.disabled = true
+```
-The following events are produced (in addition to the ones already
-produced by Phobos and RubyKafka):
+For the duration of a block:
+```ruby
+Deimos.disable_producers do
+ # code goes here
+end
+```
-* `produce_error` - sent when an error occurs when producing a message.
- * producer - the class that produced the message
- * topic
- * exception_object
- * payloads - the unencoded payloads
-* `encode_messages` - sent when messages are being schema-encoded.
- * producer - the class that produced the message
- * topic
- * payloads - the unencoded payloads
-* `db_producer.produce` - sent when the DB producer sends messages for the
- DB backend. Messages that are too large will be caught with this
- notification - they will be deleted from the table and this notification
- will be fired with an exception object.
- * topic
- * exception_object
- * messages - the batch of messages (in the form of `Deimos::KafkaMessage`s)
- that failed - this should have only a single message in the batch.
-* `batch_consumption.valid_records` - sent when the consumer has successfully upserted records. Limited by `max_db_batch_size`.
- * consumer: class of the consumer that upserted these records
- * records: Records upserted into the DB (of type `ActiveRecord::Base`)
-* `batch_consumption.invalid_records` - sent when the consumer has rejected records returned from `filtered_records`. Limited by `max_db_batch_size`.
- * consumer: class of the consumer that rejected these records
- * records: Rejected records (of type `Deimos::ActiveRecordConsume::BatchRecord`)
-
-Similarly:
+For specific producers only:
```ruby
- Deimos.subscribe('produce_error') do |event|
- data = event.payloads
- Mail.send("Got an error #{event.exception_object.message} on topic #{data[:topic]} with payloads #{data[:payloads]}")
- end
-
- Deimos.subscribe('encode_messages') do |event|
- # ...
- end
-```
+Deimos.disable_producers(Producer1, Producer2) do
+ # code goes here
+end
+```
-### Kafka Message Keys
+## Kafka Message Keys
Topics representing events rather than domain data don't need keys. However,
best practice for domain messages is to schema-encode message keys
@@ -291,6 +265,40 @@ it will be encoded first against the schema). So your payload would look like
Remember that if you're using `schema`, the `payload_key` must be a *hash*,
not a plain value.
+## Instrumentation
+
+Deimos will send events through the [Karafka instrumentation monitor](https://karafka.io/docs/Monitoring-and-Logging/#subscribing-to-the-instrumentation-events).
+You can listen to these notifications e.g. as follows:
+
+```ruby
+ Karafka.monitor.subscribe('deimos.encode_message') do |event|
+ # event is a Karafka Event. You can use [] to access keys in the payload.
+ messages = event[:messages]
+ end
+```
+
+The following events are produced (in addition to the ones already
+produced by Phobos and RubyKafka):
+
+* `deimos.encode_message` - sent when messages are being schema-encoded.
+ * producer - the class that produced the message
+ * topic
+ * payloads - the unencoded payloads
+* `outbox.produce` - sent when the outbox producer sends messages for the
+ outbox backend. Messages that are too large will be caught with this
+ notification - they will be deleted from the table and this notification
+ will be fired with an exception object.
+ * topic
+ * exception_object
+ * messages - the batch of messages (in the form of `Deimos::KafkaMessage`s)
+ that failed - this should have only a single message in the batch.
+* `deimos.batch_consumption.valid_records` - sent when the consumer has successfully upserted records. Limited by `max_db_batch_size`.
+ * consumer: class of the consumer that upserted these records
+ * records: Records upserted into the DB (of type `ActiveRecord::Base`)
+* `deimos.batch_consumption.invalid_records` - sent when the consumer has rejected records returned from `filtered_records`. Limited by `max_db_batch_size`.
+ * consumer: class of the consumer that rejected these records
+ * records: Rejected records (of type `Deimos::ActiveRecordConsume::BatchRecord`)
+
# Consumers
Here is a sample consumer:
@@ -305,18 +313,16 @@ class MyConsumer < Deimos::Consumer
exception.is_a?(MyBadError)
end
- def consume(payload, metadata)
- # Same method as Phobos consumers.
- # payload is an schema-decoded hash.
- # metadata is a hash that contains information like :key and :topic.
- # In general, your key should be included in the payload itself. However,
- # if you need to access it separately from the payload, you can use
- # metadata[:key]
+ def consume_batch
+ # messages is a Karafka Messages - see https://github.com/karafka/karafka/blob/master/lib/karafka/messages/messages.rb
+ messages.payloads.each do |payload|
+ puts payload
+ end
end
end
```
-### Fatal Errors
+## Fatal Errors
The recommended configuration is for consumers *not* to raise errors
they encounter while consuming messages. Errors can be come from
@@ -330,95 +336,31 @@ can use instrumentation to handle errors you receive. You can also
specify "fatal errors" either via global configuration (`config.fatal_error`)
or via overriding a method on an individual consumer (`def fatal_error`).
-### Batch Consumption
+## Per-Message Consumption
-Instead of consuming messages one at a time, consumers can receive a batch of
-messages as an array and then process them together. This can improve
-consumer throughput, depending on the use case. Batch consumers behave like
-other consumers in regards to key and payload decoding, etc.
+Instead of consuming messages in a batch, consumers can process one message at a time. This is
+helpful if the logic involved in each message is independent and you don't want to treat the whole
+batch as a single unit.
-To enable batch consumption, ensure that the `delivery` property of your
-consumer is set to `inline_batch`.
+To enable message consumption, ensure that the `each_message` property of your
+consumer is set to `true`.
-Batch consumers will invoke the `consume_batch` method instead of `consume`
+Per-message consumers will invoke the `consume_message` method instead of `consume_batch`
as in this example:
```ruby
-class MyBatchConsumer < Deimos::Consumer
-
- def consume_batch(payloads, metadata)
- # payloads is an array of schema-decoded hashes.
- # metadata is a hash that contains information like :keys, :topic,
- # and :first_offset.
- # Keys are automatically decoded and available as an array with
- # the same cardinality as the payloads. If you need to iterate
- # over payloads and keys together, you can use something like this:
-
- payloads.zip(metadata[:keys]) do |_payload, _key|
- # Do something
- end
- end
-end
-```
-#### Saving data to Multiple Database tables
-
-> This feature is implemented and tested with MySQL database ONLY.
-
-Sometimes, the Kafka message needs to be saved to multiple database tables. For example, if a `User` topic provides you metadata and profile image for users, we might want to save it to multiple tables: `User` and `Image`.
-
-- Return associations as keys in `record_attributes` to enable this feature.
-- The `bulk_import_id_column` config allows you to specify column_name on `record_class` which can be used to retrieve IDs after save. Defaults to `bulk_import_id`. This config is *required* if you have associations but optional if you do not.
-
-You must override the `record_attributes` (and optionally `column` and `key_columns`) methods on your consumer class for this feature to work.
-- `record_attributes` - This method is required to map Kafka messages to ActiveRecord model objects.
-- `columns(klass)` - Should return an array of column names that should be used by ActiveRecord klass during SQL insert operation.
-- `key_columns(messages, klass)` - Should return an array of column name(s) that makes a row unique.
-```ruby
-class User < ApplicationRecord
- has_many :images
-end
-
-class MyBatchConsumer < Deimos::ActiveRecordConsumer
-
- record_class User
+class MyMessageConsumer < Deimos::Consumer
- def record_attributes(payload, _key)
- {
- first_name: payload.first_name,
- images: [
- {
- attr1: payload.image_url
- },
- {
- attr2: payload.other_image_url
- }
- ]
- }
- end
-
- def key_columns(klass)
- case klass
- when User
- nil # use default
- when Image
- ["image_url", "image_name"]
- end
- end
-
- def columns(klass)
- case klass
- when User
- nil # use default
- when Image
- klass.columns.map(&:name) - [:created_at, :updated_at, :id]
- end
+ def consume_message(message)
+ # message is a Karafka Message object
+ puts message.payload
end
end
```
# Rails Integration
-### Producing
+## Producing
Deimos comes with an ActiveRecordProducer. This takes a single or
list of ActiveRecord objects or hashes and maps it to the given schema.
@@ -439,7 +381,7 @@ class MyProducer < Deimos::ActiveRecordProducer
# Optionally override this if you want the message to be
# sent even if fields that aren't in the schema are changed.
- def watched_attributes
+ def watched_attributes(_record)
super + ['a_non_schema_attribute']
end
@@ -458,28 +400,7 @@ MyProducer.send_events([Widget.new(foo: 1), Widget.new(foo: 2)])
MyProducer.send_events([{foo: 1}, {foo: 2}])
```
-#### Disabling Producers
-
-You can disable producers globally or inside a block. Globally:
-```ruby
-Deimos.config.producers.disabled = true
-```
-
-For the duration of a block:
-```ruby
-Deimos.disable_producers do
- # code goes here
-end
-```
-
-For specific producers only:
-```ruby
-Deimos.disable_producers(Producer1, Producer2) do
- # code goes here
-end
-```
-
-#### KafkaSource
+### KafkaSource
There is a special mixin which can be added to any ActiveRecord class. This
will create callbacks which will automatically send messages to Kafka whenever
@@ -491,7 +412,7 @@ will not fire if using pure SQL or Arel.
Note that these messages are sent *during the transaction*, i.e. using
`after_create`, `after_update` and `after_destroy`. If there are
questions of consistency between the database and Kafka, it is recommended
-to switch to using the DB backend (see next section) to avoid these issues.
+to switch to using the outbox backend (see next section) to avoid these issues.
When the object is destroyed, an empty payload with a payload key consisting of
the record's primary key is sent to the producer. If your topic's key is
@@ -525,120 +446,7 @@ class Widget < ActiveRecord::Base
end
```
-### Controller Mixin
-
-Deimos comes with a mixin for `ActionController` which automatically encodes and decodes schema
-payloads. There are some advantages to encoding your data in e.g. Avro rather than straight JSON,
-particularly if your service is talking to another backend service rather than the front-end
-browser:
-
-* It enforces a contract between services. Solutions like [OpenAPI](https://swagger.io/specification/)
- do this as well, but in order for the client to know the contract, usually some kind of code
- generation has to happen. Using schemas ensures both sides know the contract without having to change code.
- In addition, OpenAPI is now a huge and confusing format, and using simpler schema formats
- can be beneficial.
-* Using Avro or Protobuf ensures both forwards and backwards compatibility,
- which reduces the need for versioning since both sides can simply ignore fields they aren't aware
- of.
-* Encoding and decoding using Avro or Protobuf is generally faster than straight JSON, and
- results in smaller payloads and therefore less network traffic.
-
-To use the mixin, add the following to your `WhateverController`:
-
-```ruby
-class WhateverController < ApplicationController
- include Deimos::Utils::SchemaControllerMixin
-
- request_namespace 'my.namespace.requests'
- response_namespace 'my.namespace.responses'
-
- # Add a "schemas" line for all routes that should encode/decode schemas.
- # Default is to match the schema name to the route name.
- schemas :index
- # will look for: my.namespace.requests.Index.avsc
- # my.namespace.responses.Index.avsc
-
- # Can use mapping to change the schema but keep the namespaces,
- # i.e. use the same schema name across the two namespaces
- schemas create: 'CreateTopic'
- # will look for: my.namespace.requests.CreateTopic.avsc
- # my.namespace.responses.CreateTopic.avsc
-
- # If all routes use the default, you can add them all at once
- schemas :index, :show, :update
-
- # Different schemas can be specified as well
- schemas :index, :show, request: 'IndexRequest', response: 'IndexResponse'
-
- # To access the encoded data, use the `payload` helper method, and to render it back,
- # use the `render_schema` method.
-
- def index
- response = { 'response_id' => payload['request_id'] + 'hi mom' }
- render_schema(response)
- end
-end
-```
-
-To make use of this feature, your requests and responses need to have the correct content type.
-For Avro content, this is the `avro/binary` content type.
-
-# Database Backend
-
-Deimos provides a way to allow Kafka messages to be created inside a
-database transaction, and send them asynchronously. This ensures that your
-database transactions and Kafka messages related to those transactions
-are always in sync. Essentially, it separates the message logic so that a
-message is first validated, encoded, and saved in the database, and then sent
-on a separate thread. This means if you have to roll back your transaction,
-it also rolls back your Kafka messages.
-
-This is also known as the [Transactional Outbox pattern](https://microservices.io/patterns/data/transactional-outbox.html).
-
-To enable this, first generate the migration to create the relevant tables:
-
- rails g deimos:db_backend
-
-You can now set the following configuration:
-
- config.producers.backend = :db
-
-This will save all your Kafka messages to the `kafka_messages` table instead
-of immediately sending to Kafka. Now, you just need to call
-
- Deimos.start_db_backend!
-
-You can do this inside a thread or fork block.
-If using Rails, you can use a Rake task to do this:
-
- rails deimos:db_producer
-
-This creates one or more threads dedicated to scanning and publishing these
-messages by using the `kafka_topics` table in a manner similar to
-[Delayed Job](https://github.com/collectiveidea/delayed_job).
-You can pass in a number of threads to the method:
-
- Deimos.start_db_backend!(thread_count: 2) # OR
- THREAD_COUNT=5 rails deimos:db_producer
-
-If you want to force a message to send immediately, just call the `publish_list`
-method with `force_send: true`. You can also pass `force_send` into any of the
-other methods that publish events, like `send_event` in `ActiveRecordProducer`.
-
-A couple of gotchas when using this feature:
-* This may result in high throughput depending on your scale. If you're
- using Rails < 5.1, you should add a migration to change the `id` column
- to `BIGINT`. Rails >= 5.1 sets it to BIGINT by default.
-* This table is high throughput but should generally be empty. Make sure
- you optimize/vacuum this table regularly to reclaim the disk space.
-* Currently, threads allow you to scale the *number* of topics but not
- a single large topic with lots of messages. There is an [issue](https://github.com/flipp-oss/deimos/issues/23)
- opened that would help with this case.
-
-For more information on how the database backend works and why it was
-implemented, please see [Database Backends](docs/DATABASE_BACKEND.md).
-
-### Consuming
+## Consuming
Deimos provides an ActiveRecordConsumer which will take a payload
and automatically save it to a provided model. It will take the intersection
@@ -702,42 +510,19 @@ class MyConsumer < Deimos::ActiveRecordConsumer
end
```
-#### Generating Tables and Models
-
-Deimos provides a generator that takes an existing schema and generates a
-database table based on its fields. By default, any complex sub-types (such as
-records or arrays) are turned into JSON (if supported) or string columns.
-
-Before running this migration, you must first copy the schema into your repo
-in the correct path (in the example above, you would need to have a file
-`{SCHEMA_ROOT}/com/my-namespace/MySchema.avsc`).
-
-To generate a model and migration, run the following:
-
- rails g deimos:active_record TABLE_NAME FULL_SCHEMA_NAME
-
-Example:
-
- rails g deimos:active_record my_table com.my-namespace.MySchema
-
-...would generate:
-
- db/migrate/1234_create_my_table.rb
- app/models/my_table.rb
-
-#### Batch Consumers
+### Batch Consuming
Deimos also provides a batch consumption mode for `ActiveRecordConsumer` which
processes groups of messages at once using the ActiveRecord backend.
-Batch ActiveRecord consumers make use of the
+Batch ActiveRecord consumers make use of
[activerecord-import](https://github.com/zdennis/activerecord-import) to insert
or update multiple records in bulk SQL statements. This reduces processing
time at the cost of skipping ActiveRecord callbacks for individual records.
Deleted records (tombstones) are grouped into `delete_all` calls and thus also
skip `destroy` callbacks.
-Batch consumption is used when the `delivery` setting for your consumer is set to `inline_batch`.
+Batch consumption is used when the `each_message` setting for your consumer is set to `false` (the default).
**Note**: Currently, batch consumption only supports only primary keys as identifiers out of the box. See
[the specs](spec/active_record_batch_consumer_spec.rb) for an example of how to use compound keys.
@@ -750,8 +535,6 @@ A sample batch consumer would look as follows:
```ruby
class MyConsumer < Deimos::ActiveRecordConsumer
- schema 'MySchema'
- key_config field: 'my_field'
record_class Widget
# Controls whether the batch is compacted before consuming.
@@ -760,7 +543,7 @@ class MyConsumer < Deimos::ActiveRecordConsumer
# If false, messages will be grouped into "slices" of independent keys
# and each slice will be imported separately.
#
- # compacted false
+ compacted false
# Optional override of the default behavior, which is to call `delete_all`
@@ -778,7 +561,141 @@ class MyConsumer < Deimos::ActiveRecordConsumer
end
```
-## Database Poller
+### Saving data to Multiple Database tables
+
+> This feature is implemented and tested with MySQL ONLY.
+
+Sometimes, a Kafka message needs to be saved to multiple database tables. For example, if a `User` topic provides you metadata and profile image for users, we might want to save it to multiple tables: `User` and `Image`.
+
+- Return associations as keys in `record_attributes` to enable this feature.
+- The `bulk_import_id_column` config allows you to specify column_name on `record_class` which can be used to retrieve IDs after save. Defaults to `bulk_import_id`. This config is *required* if you have associations but optional if you do not.
+
+You must override the `record_attributes` (and optionally `column` and `key_columns`) methods on your consumer class for this feature to work.
+- `record_attributes` - This method is required to map Kafka messages to ActiveRecord model objects.
+- `columns(klass)` - Should return an array of column names that should be used by ActiveRecord klass during SQL insert operation.
+- `key_columns(messages, klass)` - Should return an array of column name(s) that makes a row unique.
+
+```ruby
+class User < ApplicationRecord
+ has_many :images
+end
+
+class MyConsumer < Deimos::ActiveRecordConsumer
+
+ record_class User
+
+ def record_attributes(payload, _key)
+ {
+ first_name: payload.first_name,
+ images: [
+ {
+ attr1: payload.image_url
+ },
+ {
+ attr2: payload.other_image_url
+ }
+ ]
+ }
+ end
+
+ def key_columns(klass)
+ case klass
+ when User
+ nil # use default
+ when Image
+ ["image_url", "image_name"]
+ end
+ end
+
+ def columns(klass)
+ case klass
+ when User
+ nil # use default
+ when Image
+ klass.columns.map(&:name) - [:created_at, :updated_at, :id]
+ end
+ end
+end
+```
+
+## Generating Tables and Models
+
+Deimos provides a generator that takes an existing schema and generates a
+database table based on its fields. By default, any complex sub-types (such as
+records or arrays) are turned into JSON (if supported) or string columns.
+
+Before running this migration, you must first copy the schema into your repo
+in the correct path (in the example above, you would need to have a file
+`{SCHEMA_ROOT}/com/my-namespace/MySchema.avsc`).
+
+To generate a model and migration, run the following:
+
+ rails g deimos:active_record TABLE_NAME FULL_SCHEMA_NAME
+
+Example:
+
+ rails g deimos:active_record my_table com.my-namespace.MySchema
+
+...would generate:
+
+ db/migrate/1234_create_my_table.rb
+ app/models/my_table.rb
+
+# Outbox Backend
+
+Deimos provides a way to allow Kafka messages to be created inside a
+database transaction, and send them asynchronously. This ensures that your
+database transactions and Kafka messages related to those transactions
+are always in sync. Essentially, it separates the message logic so that a
+message is first validated, encoded, and saved in the database, and then sent
+on a separate thread. This means if you have to roll back your transaction,
+it also rolls back your Kafka messages.
+
+This is also known as the [Transactional Outbox pattern](https://microservices.io/patterns/data/transactional-outbox.html).
+
+To enable this, first generate the migration to create the relevant tables:
+
+ rails g deimos:outbox
+
+You can now set the following configuration:
+
+ config.producers.backend = :outbox
+
+This will save all your Kafka messages to the `kafka_messages` table instead
+of immediately sending to Kafka. Now, you just need to call
+
+ Deimos.start_outbox_backend!
+
+You can do this inside a thread or fork block.
+If using Rails, you can use a Rake task to do this:
+
+ rails deimos:outbox
+
+This creates one or more threads dedicated to scanning and publishing these
+messages by using the `kafka_topics` table in a manner similar to
+[Delayed Job](https://github.com/collectiveidea/delayed_job).
+You can pass in a number of threads to the method:
+
+ Deimos.start_outbox_backend!(thread_count: 2) # OR
+ THREAD_COUNT=5 rails deimos:outbox
+
+If you want to force a message to send immediately, just call the `produce`
+method with `backend: kafka`.
+
+A couple of gotchas when using this feature:
+* This may result in high throughput depending on your scale. If you're
+ using Rails < 5.1, you should add a migration to change the `id` column
+ to `BIGINT`. Rails >= 5.1 sets it to BIGINT by default.
+* This table is high throughput but should generally be empty. Make sure
+ you optimize/vacuum this table regularly to reclaim the disk space.
+* Currently, threads allow you to scale the *number* of topics but not
+ a single large topic with lots of messages. There is an [issue](https://github.com/flipp-oss/deimos/issues/23)
+ opened that would help with this case.
+
+For more information on how the database backend works and why it was
+implemented, please see [Database Backends](docs/DATABASE_BACKEND.md).
+
+# Database Poller
Another method of fetching updates from the database to Kafka is by polling
the database (a process popularized by [Kafka Connect](https://docs.confluent.io/current/connect/index.html)).
@@ -825,7 +742,7 @@ define one additional method on the producer:
```ruby
class MyProducer < Deimos::ActiveRecordProducer
- ...
+ # ...
def poll_query(time_from:, time_to:, column_name:, min_id:)
# Default is to use the timestamp `column_name` to find all records
# between time_from and time_to, or records where `updated_at` is equal to
@@ -834,6 +751,12 @@ class MyProducer < Deimos::ActiveRecordProducer
# middle of a timestamp, we won't miss any records.
# You can override or change this behavior if necessary.
end
+
+ # You can define this method if you need to do some extra actions with
+ # the collection of elements you just sent to Kafka
+ def post_process(batch)
+ # write some code here
+ end
end
```
@@ -847,25 +770,10 @@ have one process running at a time. If a particular poll takes longer than
the poll interval (i.e. interval is set at 1 minute but it takes 75 seconds)
the next poll will begin immediately following the first one completing.
-To Post-Process records that are sent to Kafka:
-
-You need to define one additional method in your producer class to post-process the messages sent to Kafka.
-
-```ruby
-class MyProducer < Deimos::ActiveRecordProducer
- ...
- def post_process(batch)
- # If you need to do some extra actions with
- # the collection of elements you just sent to Kafka
- # write some code here
- end
-end
-```
-
Note that the poller will retry infinitely if it encounters a Kafka-related error such
as a communication failure. For all other errors, it will retry once by default.
-### State-based pollers
+## State-based pollers
By default, pollers use timestamps and IDs to determine the records to publish. However, you can
set a different mode whereby it will include all records that match your query, and when done,
@@ -884,7 +792,7 @@ db_poller do
end
```
-## Running consumers
+# Running consumers
Deimos includes a rake task. Once it's in your gemfile, just run
@@ -895,7 +803,7 @@ which can be useful if you want to figure out if you're inside the task
as opposed to running your Rails server or console. E.g. you could start your
DB backend only when your rake task is running.
-## Generated Schema Classes
+# Generated Schema Classes
Deimos offers a way to generate classes from Avro schemas. These classes are documented
with YARD to aid in IDE auto-complete, and will help to move errors closer to the code.
@@ -925,7 +833,7 @@ One additional configuration option indicates whether nested records should be g
You can generate a tombstone message (with only a key and no value) by calling the `YourSchemaClass.tombstone(key)` method. If you're using a `:field` key config, you can pass in just the key scalar value. If using a key schema, you can pass it in as a hash or as another schema class.
-### Consumer
+## Consumer
The consumer interface uses the `decode_message` method to turn JSON hash into the Schemas
generated Class and provides it to the `consume`/`consume_batch` methods for their use.
@@ -933,13 +841,13 @@ generated Class and provides it to the `consume`/`consume_batch` methods for the
Examples of consumers would look like this:
```ruby
class MyConsumer < Deimos::Consumer
- def consume(payload, metadata)
- # Same method as Phobos consumers but payload is now an instance of Deimos::SchemaClass::Record
- # rather than a hash. metadata is still a hash that contains information like :key and :topic.
+ def consume_message(message)
+ # Same method as before but message.payload is now an instance of Deimos::SchemaClass::Record
+ # rather than a hash.
# You can interact with the schema class instance in the following way:
- do_something(payload.test_id, payload.some_int)
+ do_something(message.payload.test_id, message.payload.some_int)
# The original behaviour was as follows:
- do_something(payload[:test_id], payload[:some_int])
+ do_something(message.payload[:test_id], message.payload[:some_int])
end
end
```
@@ -958,9 +866,10 @@ class MyActiveRecordConsumer < Deimos::ActiveRecordConsumer
end
```
-### Producer
+## Producer
+
Similarly to the consumer interface, the producer interface for using Schema Classes in your app
-relies on the `publish`/`publish_list` methods to convert a _provided_ instance of a Schema Class
+relies on the `produce` method to convert a _provided_ instance of a Schema Class
into a hash that can be used freely by the Kafka client.
Examples of producers would look like this:
@@ -976,8 +885,7 @@ class MyProducer < Deimos::Producer
test_id: test_id,
some_int: some_int
)
- self.publish(message)
- self.publish_list([message])
+ self.produce({payload: message})
end
end
end
@@ -986,8 +894,9 @@ end
```ruby
class MyActiveRecordProducer < Deimos::ActiveRecordProducer
record_class Widget
- # @param payload [Deimos::SchemaClass::Record]
+ # @param attributes [Hash]
# @param _record [Widget]
+ # @return [Deimos::SchemaClass::Record]
def self.generate_payload(attributes, _record)
# This method converts your ActiveRecord into a Deimos::SchemaClass::Record. You will be able to use super
# as an instance of Schemas::MySchema and set values that are not on your ActiveRecord schema.
@@ -1000,51 +909,26 @@ end
# Metrics
-Deimos includes some metrics reporting out the box. It ships with DataDog support, but you can add custom metric providers as well.
+Deimos includes some metrics reporting out of the box. It adds to the existing [Karafka DataDog support](https://karafka.io/docs/Monitoring-and-Logging/#datadog-and-statsd-integration). It ships with DataDog support, but you can add custom metric providers as well.
The following metrics are reported:
-* `consumer_lag` - for each partition, the number of messages
- it's behind the tail of the partition (a gauge). This is only sent if
- `config.consumers.report_lag` is set to true.
-* `handler` - a count of the number of messages received. Tagged
- with the following:
- * `topic:{topic_name}`
- * `status:received`
- * `status:success`
- * `status:error`
- * `time:consume` (histogram)
- * Amount of time spent executing handler for each message
- * Batch Consumers - report counts by number of batches
- * `status:batch_received`
- * `status:batch_success`
- * `status:batch_error`
- * `time:consume_batch` (histogram)
- * Amount of time spent executing handler for entire batch
- * `time:time_delayed` (histogram)
- * Indicates the amount of time between the `timestamp` property of each
- payload (if present) and the time that the consumer started processing
- the message.
-* `publish` - a count of the number of messages received. Tagged
- with `topic:{topic_name}`
-* `publish_error` - a count of the number of messages which failed
- to publish. Tagged with `topic:{topic_name}`
-* `pending_db_messages_max_wait` - the number of seconds which the
+* `deimos.pending_db_messages_max_wait` - the number of seconds which the
oldest KafkaMessage in the database has been waiting for, for use
with the database backend. Tagged with the topic that is waiting.
Will send a value of 0 with no topics tagged if there are no messages
waiting.
-* `db_producer.insert` - the number of messages inserted into the database
+* `deimos.outbox.publish` - the number of messages inserted into the database
for publishing. Tagged with `topic:{topic_name}`
-* `db_producer.process` - the number of DB messages processed. Note that this
+* `deimos.outbox.process` - the number of DB messages processed. Note that this
is *not* the same as the number of messages *published* if those messages
are compacted. Tagged with `topic:{topic_name}`
-### Configuring Metrics Providers
+## Configuring Metrics Providers
See the `metrics` field under [Configuration](#configuration).
View all available Metrics Providers [here](lib/deimos/metrics)
-### Custom Metrics Providers
+## Custom Metrics Providers
Using the above configuration, it is possible to pass in any generic Metrics
Provider class as long as it exposes the methods and definitions expected by
@@ -1059,17 +943,18 @@ Also see [deimos.rb](lib/deimos.rb) under `Configure metrics` to see how the met
# Tracing
Deimos also includes some tracing for kafka consumers. It ships with
-DataDog support, but you can add custom tracing providers as well.
+DataDog support, but you can add custom tracing providers as well. (It does not use the built-in Karafka
+tracers so that it can support per-message tracing, which Karafka does not provide for.)
Trace spans are used for when incoming messages are schema-decoded, and a
separate span for message consume logic.
-### Configuring Tracing Providers
+## Configuring Tracing Providers
See the `tracing` field under [Configuration](#configuration).
View all available Tracing Providers [here](lib/deimos/tracing)
-### Custom Tracing Providers
+## Custom Tracing Providers
Using the above configuration, it is possible to pass in any generic Tracing
Provider class as long as it exposes the methods and definitions expected by
@@ -1083,7 +968,9 @@ Also see [deimos.rb](lib/deimos.rb) under `Configure tracing` to see how the tra
# Testing
-Deimos comes with a test helper class which provides useful methods for testing consumers.
+Deimos comes with a test helper class which provides useful methods for testing consumers. This is built on top of
+Karafka's [testing library](https://karafka.io/docs/Testing/) and is primarily helpful because it can decode
+the sent messages for comparison (Karafka only decodes the messages once they have been consumed).
In `spec_helper.rb`:
```ruby
@@ -1097,55 +984,34 @@ end
```ruby
# The following can be added to a rpsec file so that each unit
# test can have the same settings every time it is run
-around(:each) do |example|
- Deimos::TestHelpers.unit_test!
- example.run
- Deimos.config.reset!
-end
-
-# Similarly you can use the Kafka test helper
-around(:each) do |example|
- Deimos::TestHelpers.kafka_test!
- example.run
- Deimos.config.reset!
-end
-
-# Kakfa test helper using schema registry
-around(:each) do |example|
- Deimos::TestHelpers.full_integration_test!
- example.run
+after(:each) do
Deimos.config.reset!
+ Deimos.config.schema.backend = :avro_validation
end
```
-With the help of these helper methods, rspec examples can be written without having to tinker with Deimos settings.
-This also prevents Deimos setting changes from leaking in to other examples.
-
-This does not take away the ability to configure Deimos manually in individual examples. Deimos can still be configured like so:
+With the help of these helper methods, RSpec examples can be written without having to tinker with Deimos settings.
+This also prevents Deimos setting changes from leaking in to other examples. You can make these changes on an individual test level and ensure that it resets back to where it needs to go:
```ruby
it 'should not fail this random test' do
Deimos.configure do |config|
config.consumers.fatal_error = proc { true }
- config.consumers.reraise_errors = false
end
...
expect(some_object).to be_truthy
- ...
- Deimos.config.reset!
end
```
-If you are using one of the test helpers in an `around(:each)` block and want to override few settings for one example,
-you can do it like in the example shown above. These settings would only apply to that specific example and the Deimos config should
-reset once the example has finished running.
## Test Usage
-In your tests, you now have the following methods available:
+You can use `karafka.produce()` and `consumer.consume` in your tests without having to go through
+Deimos TestHelpers. However, there are some useful abilities that Deimos gives you:
+
```ruby
-# Pass a consumer class (not instance) to validate a payload against it.
-# This will fail if the payload does not match the schema the consumer
-# is set up to consume.
+# Pass a consumer class (not instance) to validate a payload against it. This takes either a class
+# or a topic (Karafka only supports topics in its test helpers). This will validate the payload
+# and execute the consumer logic.
test_consume_message(MyConsumer,
{ 'some-payload' => 'some-value' }) do |payload, metadata|
# do some expectation handling here
@@ -1158,15 +1024,6 @@ test_consume_message('my-topic-name',
# do some expectation handling here
end
-# Alternatively, you can test the actual consume logic:
-test_consume_message(MyConsumer,
- { 'some-payload' => 'some-value' },
- call_original: true)
-
-# Test that a given payload is invalid against the schema:
-test_consume_invalid_message(MyConsumer,
- { 'some-invalid-payload' => 'some-value' })
-
# For batch consumers, there are similar methods such as:
test_consume_batch(MyBatchConsumer,
[{ 'some-payload' => 'some-value' },
@@ -1181,7 +1038,7 @@ end
expect(topic_name).to have_sent(payload, key=nil, partition_key=nil, headers=nil)
# Inspect sent messages
-message = Deimos::Backends::Test.sent_messages[0]
+message = Deimos::TestHelpers.sent_messages[0]
expect(message).to eq({
message: {'some-key' => 'some-value'},
topic: 'my-topic',
@@ -1190,75 +1047,7 @@ expect(message).to eq({
})
```
-### Test Utilities
-
-There is also a helper method that will let you test if an existing schema
-would be compatible with a new version of it. You can use this in your
-Ruby console but it would likely not be part of your RSpec test:
-
-```ruby
-require 'deimos/test_helpers'
-# Can pass a file path, a string or a hash into this:
-Deimos::TestHelpers.schemas_compatible?(schema1, schema2)
-```
-
-You can use the `InlineConsumer` class to help with integration testing,
-with a full external Kafka running.
-
-If you have a consumer you want to test against messages in a Kafka topic,
-use the `consume` method:
-```ruby
-Deimos::Utils::InlineConsumer.consume(
- topic: 'my-topic',
- frk_consumer: MyConsumerClass,
- num_messages: 5
- )
-```
-
-This is a _synchronous_ call which will run the consumer against the
-last 5 messages in the topic. You can set `num_messages` to a number
-like `1_000_000` to always consume all the messages. Once the last
-message is retrieved, the process will wait 1 second to make sure
-they're all done, then continue execution.
-
-If you just want to retrieve the contents of a topic, you can use
-the `get_messages_for` method:
-
-```ruby
-Deimos::Utils::InlineConsumer.get_messages_for(
- topic: 'my-topic',
- schema: 'my-schema',
- namespace: 'my.namespace',
- key_config: { field: 'id' },
- num_messages: 5
-)
-```
-
-This will run the process and simply return the last 5 messages on the
-topic, as hashes, once it's done. The format of the messages will simply
-be
-```ruby
-{
- payload: { key: value }, # payload hash here
- key: "some_value" # key value or hash here
-}
-```
-
-Both payload and key will be schema-decoded as necessary according to the
-key config.
-
-You can also just pass an existing producer or consumer class into the method,
-and it will extract the necessary configuration from it:
-
-```ruby
-Deimos::Utils::InlineConsumer.get_messages_for(
- topic: 'my-topic',
- config_class: MyProducerClass,
- num_messages: 5
-)
-```
-
-## Utilities
+# Utilities
You can use your configured schema backend directly if you want to
encode and decode payloads outside of the context of sending messages.
@@ -1272,14 +1061,14 @@ backend.validate(my_payload) # throws an error if not valid
fields = backend.schema_fields # list of fields defined in the schema
```
-You can also do an even faster encode/decode:
+You can also do an even more concise encode/decode:
```ruby
encoded = Deimos.encode(schema: 'MySchema', namespace: 'com.my-namespace', payload: my_payload)
decoded = Deimos.decode(schema: 'MySchema', namespace: 'com.my-namespace', payload: my_encoded_payload)
```
-## Contributing
+# Contributing
Bug reports and pull requests are welcome on GitHub at https://github.com/flipp-oss/deimos .
@@ -1289,15 +1078,15 @@ You can/should re-generate RBS types when methods or classes change by running t
rbs collection update
bundle exec sord --hide-private --no-sord-comments sig/defs.rbs --tags 'override:Override'
-### Linting
+## Linting
Deimos uses Rubocop to lint the code. Please run Rubocop on your code
before submitting a PR. The GitHub CI will also run rubocop on your pull request.
---
-
diff --git a/deimos-ruby.gemspec b/deimos-ruby.gemspec
index 376fc8a0..77d81d4c 100644
--- a/deimos-ruby.gemspec
+++ b/deimos-ruby.gemspec
@@ -19,9 +19,8 @@ Gem::Specification.new do |spec|
spec.require_paths = ['lib']
spec.add_runtime_dependency('avro_turf', '>= 1.4', '< 2')
- spec.add_runtime_dependency('fig_tree', '~> 0.0.2')
- spec.add_runtime_dependency('phobos', '>= 1.9', '< 3.0')
- spec.add_runtime_dependency('ruby-kafka', '< 2')
+ spec.add_runtime_dependency('karafka', '~> 2.0')
+ spec.add_runtime_dependency('fig_tree', '~> 0.2.0')
spec.add_runtime_dependency('sigurd', '>= 0.1.0', '< 1.0')
spec.add_development_dependency('activerecord-import')
@@ -33,7 +32,8 @@ Gem::Specification.new do |spec|
spec.add_development_dependency('guard', '~> 2')
spec.add_development_dependency('guard-rspec', '~> 4')
spec.add_development_dependency('guard-rubocop', '~> 1')
- spec.add_development_dependency('mysql2', '~> 0.5')
+ spec.add_development_dependency('karafka-testing', '~> 2.0')
+ spec.add_development_dependency('trilogy', '>= 0.1')
spec.add_development_dependency('pg', '~> 1.1')
spec.add_development_dependency('rails', '~> 6')
spec.add_development_dependency('rake', '~> 13')
diff --git a/docs/CONFIGURATION.md b/docs/CONFIGURATION.md
index 85098edb..01ed2351 100644
--- a/docs/CONFIGURATION.md
+++ b/docs/CONFIGURATION.md
@@ -1,111 +1,78 @@
# Configuration
-Deimos supports a succinct, readable syntax which uses
-pure Ruby to allow flexible configuration.
+Deimos has two methods of configuration:
+
+* Main Deimos configuration, which uses the [FigTree](https://github.com/flipp-oss/fig_tree) gem for its own settings.
+* Karafka routing configuration, which adds extensions to existing [Karafka routes](https://karafka.io/docs/Routing/).
+
+The majority of application configuration, including Kafka and `librdkafka` settings, are part of existing [Karafka configuration](https://karafka.io/docs/Configuration/).
+
+## Main Configuration
You can access any configuration value via a simple `Deimos.config.whatever`.
-Nested configuration is denoted in simple dot notation:
-`kafka.ssl.enabled`. Headings below will follow the nested
-configurations.
+Nested configuration is denoted in simple dot notation: `schema.path`. Headings below will follow the nested configurations.
-## Base Configuration
-Config name| Default |Description
------------|-----------------------------|-----------
-logger| `Logger.new(STDOUT)` |The logger that Deimos will use.
-payload_log| `:full` |Determines how much data is logged per payload.`:full` - all keys and payloads are logged.`:keys` - only keys are logged.`:count` - only the total count of messages are logged.
-phobos_logger| `Deimos.config.logger` |The logger passed to Phobos.
-metrics| `Deimos::Metrics::Mock.new` |The metrics backend use for reporting.
-tracer| `Deimos::Tracing::Mock.new` |The tracer backend used for debugging.
+### Configuration Syntax
-## Defining Producers
+Sample:
-You can define a new producer thusly:
```ruby
Deimos.configure do
- producer do
- class_name 'MyProducer'
- topic 'MyTopic'
- schema 'MyTopicSchema'
- namespace 'my.namespace'
- key_config field: :id
-
- # If config.schema.path is app/schemas, assumes there is a file in
- # app/schemas/my/namespace/MyTopicSchema.avsc
+ metrics { Deimos::Metrics::Datadog.new({host: 'localhost'}) }
+ schema.path "#{Rails.root}/app/schemas"
+
+ # Multiple nested config fields via block
+ consumers do
+ session_timeout 30
+ offset_commit_interval 10
end
end
```
-You can have as many `producer` blocks as you like to define more producers.
+### Base Configuration
-Config name|Default|Description
------------|-------|-----------
-class_name|nil|Class name of the producer class (subclass of `Deimos::Producer`.)
-topic|nil|Topic to produce to.
-schema|nil|Name of the schema to use to encode data before producing.
-namespace|nil|Namespace of the schema to use when finding it locally.
-key_config|nil|Configuration hash for message keys. See [Kafka Message Keys](../README.md#installation)
-use_schema_classes|nil|Set to true or false to enable or disable using the producers schema classes. See [Generated Schema Classes](../README.md#generated-schema-classes)
-max_batch_size|500|Maximum publishing batch size. Defaults to top-level configuration of 500.
+| Config name | Default | Description |
+|-------------|-----------------------------|----------------------------------------|
+| metrics | `Deimos::Metrics::Mock.new` | The metrics backend use for reporting. |
+| tracer | `Deimos::Tracing::Mock.new` | The tracer backend used for debugging. |
-## Defining Consumers
+Note that all blocks are evaluated in the context of the configuration object.
+If you're calling this inside another class or method, you'll need to save
+things you need to reference into local variables before calling `configure`.
-Consumers are defined almost identically to producers:
+### Producer Configuration
-```ruby
-Deimos.configure do
- consumer do
- class_name 'MyConsumer'
- topic 'MyTopic'
- schema 'MyTopicSchema'
- namespace 'my.namespace'
- key_config field: :id
+| Config name | Default | Description |
+|------------------------|----------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| producers.topic_prefix | nil | Add a prefix to all topic names. This can be useful if you're using the same Kafka broker for different environments that are producing the same topics. |
+| producers.disabled | false | Disable all actual message producing. Generally more useful to use the `disable_producers` method instead. |
+| producers.backend | `:kafka_async` | Currently can be set to `:db`, `:kafka`, or `:kafka_async`. If using Kafka directly, a good pattern is to set to async in your user-facing app, and sync in your consumers or delayed workers. |
- # Setting to :inline_batch will invoke consume_batch instead of consume
- # for each batch of messages.
- delivery :batch
+### Schema Configuration
- # If config.schema.path is app/schemas, assumes there is a file in
- # app/schemas/my/namespace/MyTopicSchema.avsc
- end
-end
-```
+| Config name | Default | Description |
+|-----------------------------|--------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------|
+| schema.backend | `:mock` | Backend representing the schema encoder/decoder. You can see a full list [here](../lib/deimos/schema_backends). |
+| schema.registry_url | `http://localhost:8081` | URL of the Confluent schema registry. |
+| schema.user | nil | Basic auth user. |
+| schema.password | nil | Basic auth password. |
+| schema.path | nil | Local path to find your schemas. |
+| schema.use_schema_classes | false | Set this to true to use generated schema classes in your application. |
+| schema.generated_class_path | `app/lib/schema_classes` | Local path to generated schema classes. |
+| schema.nest_child_schemas | false | Set to true to nest subschemas within the generated class for the parent schema. |
+| schema.use_full_namespace | false | Set to true to generate folders for schemas matching the full namespace. |
+| schema.schema_namespace_map | {} | A map of namespace prefixes to base module name(s). Example: { 'com.mycompany.suborg' => ['SchemaClasses'] }. Requires `use_full_namespace` to be true. |
+
+### Outbox Configuration
+
+| Config name | Default | Description |
+|-----------------------|------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| outbox.logger | `Deimos.config.logger` | Logger to use inside the DB producer. |
+| outbox.log_topics | `[]` | List of topics to print full messages for, or `:all` to print all topics. This can introduce slowdown since it needs to decode each message using the schema registry. |
+| outbox.compact_topics | `[]` | List of topics to compact before sending, i.e. only send the last message with any given key in a batch. This is an optimization which mirrors what Kafka itself will do with compaction turned on but only within a single batch. You can also specify `:all` to compact all topics. |
-In addition to the producer configs, you can define a number of overrides
-to the basic consumer configuration for each consumer. This is analogous to
-the `listener` config in `phobos.yml`.
-
-Config name|Default|Description
------------|-------|-----------
-class_name|nil|Class name of the consumer class (subclass of `Deimos::Consumer`.)
-topic|nil|Topic to produce to.
-schema|nil|This is optional but strongly recommended for testing purposes; this will validate against a local schema file used as the reader schema, as well as being able to write tests against this schema. This is recommended since it ensures you are always getting the values you expect.
-namespace|nil|Namespace of the schema to use when finding it locally.
-key_config|nil|Configuration hash for message keys. See [Kafka Message Keys](../README.md#installation)
-disabled|false|Set to true to skip starting an actual listener for this consumer on startup.
-group_id|nil|ID of the consumer group.
-use_schema_classes|nil|Set to true or false to enable or disable using the consumers schema classes. See [Generated Schema Classes](../README.md#generated-schema-classes)
-bulk_import_id_column|:bulk_import_id|Name of the column to use for multi-table imports.
-replace_associations|true|If false, append to associations in multi-table imports rather than replacing them.
-max_db_batch_size|nil|Maximum limit for batching database calls to reduce the load on the db.
-max_concurrency|1|Number of threads created for this listener. Each thread will behave as an independent consumer. They don't share any state.
-start_from_beginning|true|Once the consumer group has checkpointed its progress in the topic's partitions, the consumers will always start from the checkpointed offsets, regardless of config. As such, this setting only applies when the consumer initially starts consuming from a topic
-max_bytes_per_partition|512.kilobytes|Maximum amount of data fetched from a single partition at a time.
-min_bytes|1|Minimum number of bytes to read before returning messages from the server; if `max_wait_time` is reached, this is ignored.
-max_wait_time|5|Maximum duration of time to wait before returning messages from the server, in seconds.
-force_encoding|nil|Apply this encoding to the message payload. If blank it uses the original encoding. This property accepts values defined by the ruby Encoding class (https://ruby-doc.org/core-2.3.0/Encoding.html). Ex: UTF_8, ASCII_8BIT, etc.
-delivery|`:batch`|The delivery mode for the consumer. Possible values: `:message, :batch, :inline_batch`. See Phobos documentation for more details.
-session_timeout|300|Number of seconds after which, if a client hasn't contacted the Kafka cluster, it will be kicked out of the group.
-offset_commit_interval|10|Interval between offset commits, in seconds.
-offset_commit_threshold|0|Number of messages that can be processed before their offsets are committed. If zero, offset commits are not triggered by message processing
-offset_retention_time|nil|The time period that committed offsets will be retained, in seconds. Defaults to the broker setting.
-heartbeat_interval|10|Interval between heartbeats; must be less than the session window.
-backoff|`(1000..60_000)`|Range representing the minimum and maximum number of milliseconds to back off after a consumer error.
-replace_associations|nil| Whether to delete existing associations for records during bulk consumption for this consumer. If no value is specified the provided/default value from the `consumers` configuration will be used.
-bulk_import_id_generator|nil| Block to determine the `bulk_import_id` generated during bulk consumption. If no block is specified the provided/default block from the `consumers` configuration will be used.
-save_associations_first|false|Whether to save associated records of primary class prior to upserting primary records. Foreign key of associated records are assigned to the record class prior to saving the record class
-
-## Defining Database Pollers
+### Defining Database Pollers
These are used when polling the database via `rake deimos:db_poller`. You
can create a number of pollers, one per topic.
@@ -119,157 +86,95 @@ Deimos.configure do
end
```
-Config name|Default|Description
------------|-------|-----------
-producer_class|nil|ActiveRecordProducer class to use for sending messages.
-mode|:time_based|Whether to use time-based polling or state-based polling.
-run_every|60|Amount of time in seconds to wait between runs.
-timestamp_column|`:updated_at`|Name of the column to query. Remember to add an index to this column!
-delay_time|2|Amount of time in seconds to wait before picking up records, to allow for transactions to finish.
-retries|1|The number of times to retry for a *non-Kafka* error.
-full_table|false|If set to true, do a full table dump to Kafka each run. Good for very small tables. Time-based only.
-start_from_beginning|true|If false, start from the current time instead of the beginning of time if this is the first time running the poller. Time-based only.
-state_column|nil|If set, this represents the DB column to use to update publishing status. State-based only.
-publish_timestamp_column|nil|If set, this represents the DB column to use to update when publishing is done. State-based only.
-published_state|nil|If set, the poller will update the `state_column` to this value when publishing succeeds. State-based only.
-failed_state|nil|If set, the poller will update the `state_column` to this value when publishing fails. State-based only.
-poller_class|nil|Inherited poller class name to use for publishing to multiple kafka topics from a single poller.
-
-## Kafka Configuration
-
-Config name|Default|Description
------------|-------|-----------
-kafka.logger|`Deimos.config.logger`|Logger passed to RubyKafka.
-kafka.seed_brokers|`['localhost:9092']`|URL for the Kafka brokers.
-kafka.client_id|`phobos`|Identifier for this application.
-kafka.connect_timeout|15|The socket timeout for connecting to the broker, in seconds.
-kafka.socket_timeout|15|The socket timeout for reading and writing to the broker, in seconds.
-kafka.ssl.enabled|false|Whether SSL is enabled on the brokers.
-kafka.ssl.ca_certs_from_system|false|Use CA certs from system.
-kafka.ssl.ca_cert|nil| A PEM encoded CA cert, a file path to the cert, or an Array of certs to use with an SSL connection.
-kafka.ssl.client_cert|nil|A PEM encoded client cert to use with an SSL connection, or a file path to the cert.
-kafka.ssl.client_cert_key|nil|A PEM encoded client cert key to use with an SSL connection.
-kafka.sasl.enabled|false|Whether SASL is enabled on the brokers.
-kafka.sasl.gssapi_principal|nil|A KRB5 principal.
-kafka.sasl.gssapi_keytab|nil|A KRB5 keytab filepath.
-kafka.sasl.plain_authzid|nil|Plain authorization ID.
-kafka.sasl.plain_username|nil|Plain username.
-kafka.sasl.plain_password|nil|Plain password.
-kafka.sasl.scram_username|nil|SCRAM username.
-kafka.sasl.scram_password|nil|SCRAM password.
-kafka.sasl.scram_mechanism|nil|Scram mechanism, either "sha256" or "sha512".
-kafka.sasl.enforce_ssl|nil|Whether to enforce SSL with SASL.
-kafka.sasl.oauth_token_provider|nil|OAuthBearer Token Provider instance that implements method token. See {Sasl::OAuth#initialize}.
-
-## Consumer Configuration
-
-These are top-level configuration settings, but they can be overridden
-by individual consumers.
-
-Config name|Default|Description
------------|-------|-----------
-consumers.session_timeout|300|Number of seconds after which, if a client hasn't contacted the Kafka cluster, it will be kicked out of the group.
-consumers.offset_commit_interval|10|Interval between offset commits, in seconds.
-consumers.offset_commit_threshold|0|Number of messages that can be processed before their offsets are committed. If zero, offset commits are not triggered by message processing
-consumers.heartbeat_interval|10|Interval between heartbeats; must be less than the session window.
-consumers.backoff|`(1000..60_000)`|Range representing the minimum and maximum number of milliseconds to back off after a consumer error.
-consumers.reraise_errors|false|Default behavior is to swallow uncaught exceptions and log to the metrics provider. Set this to true to instead raise all errors. Note that raising an error will ensure that the message cannot be processed - if there is a bad message which will always raise that error, your consumer will not be able to proceed past it and will be stuck forever until you fix your code. See also the `fatal_error` configuration. This is automatically set to true when using the `TestHelpers` module in RSpec.
-consumers.report_lag|false|Whether to send the `consumer_lag` metric. This requires an extra thread per consumer.
-consumers.fatal_error|`proc { false }`|Block taking an exception, payload and metadata and returning true if this should be considered a fatal error and false otherwise. E.g. you can use this to always fail if the database is available. Not needed if reraise_errors is set to true.
-consumers.replace_associations|true|Whether to delete existing associations for records during bulk consumption prior to inserting new associated records
-consumers.bulk_import_id_generator|`proc { SecureRandom.uuid }`| Block to determine the `bulk_import_id` generated during bulk consumption. Block will be used for all bulk consumers unless explicitly set for individual consumers
-
-## Producer Configuration
-
-Config name|Default|Description
------------|-------|-----------
-producers.ack_timeout|5|Number of seconds a broker can wait for replicas to acknowledge a write before responding with a timeout.
-producers.required_acks|1|Number of replicas that must acknowledge a write, or `:all` if all in-sync replicas must acknowledge.
-producers.max_retries|2|Number of retries that should be attempted before giving up sending messages to the cluster. Does not include the original attempt.
-producers.retry_backoff|1|Number of seconds to wait between retries.
-producers.max_buffer_size|10_000|Number of messages allowed in the buffer before new writes will raise `BufferOverflow` exceptions.
-producers.max_buffer_bytesize|10_000_000|Maximum size of the buffer in bytes. Attempting to produce messages when the buffer reaches this size will result in `BufferOverflow` being raised.
-producers.compression_codec|nil|Name of the compression codec to use, or nil if no compression should be performed. Valid codecs: `:snappy` and `:gzip`
-producers.compression_threshold|1|Number of messages that needs to be in a message set before it should be compressed. Note that message sets are per-partition rather than per-topic or per-producer.
-producers.max_queue_size|10_000|Maximum number of messages allowed in the queue. Only used for async_producer.
-producers.delivery_threshold|0|If greater than zero, the number of buffered messages that will automatically trigger a delivery. Only used for async_producer.
-producers.delivery_interval|0|if greater than zero, the number of seconds between automatic message deliveries. Only used for async_producer.
-producers.persistent_connections|false|Set this to true to keep the producer connection between publish calls. This can speed up subsequent messages by around 30%, but it does mean that you need to manually call sync_producer_shutdown before exiting, similar to async_producer_shutdown.
-producers.schema_namespace|nil|Default namespace for all producers. Can remain nil. Individual producers can override.
-producers.topic_prefix|nil|Add a prefix to all topic names. This can be useful if you're using the same Kafka broker for different environments that are producing the same topics.
-producers.disabled|false|Disable all actual message producing. Generally more useful to use the `disable_producers` method instead.
-producers.backend|`:kafka_async`|Currently can be set to `:db`, `:kafka`, or `:kafka_async`. If using Kafka directly, a good pattern is to set to async in your user-facing app, and sync in your consumers or delayed workers.
-producers.max_batch_size|500|Maximum batch size for publishing. Individual producers can override.
-
-## Schema Configuration
-
-Config name|Default|Description
------------|-------|-----------
-schema.backend|`:mock`|Backend representing the schema encoder/decoder. You can see a full list [here](../lib/deimos/schema_backends).
-schema.registry_url|`http://localhost:8081`|URL of the Confluent schema registry.
-schema.user|nil|Basic auth user.
-schema.password|nil|Basic auth password.
-schema.path|nil|Local path to find your schemas.
-schema.use_schema_classes|false|Set this to true to use generated schema classes in your application.
-schema.generated_class_path|`app/lib/schema_classes`|Local path to generated schema classes.
-schema.nest_child_schemas|false|Set to true to nest subschemas within the generated class for the parent schema.
-schema.use_full_namespace|false|Set to true to generate folders for schemas matching the full namespace.
-schema.schema_namespace_map|{}|A map of namespace prefixes to base module name(s). Example: { 'com.mycompany.suborg' => ['SchemaClasses'] }. Requires `use_full_namespace` to be true.
-
-## Database Producer Configuration
-
-Config name|Default|Description
------------|-------|-----------
-db_producer.logger|`Deimos.config.logger`|Logger to use inside the DB producer.
-db_producer.log_topics|`[]`|List of topics to print full messages for, or `:all` to print all topics. This can introduce slowdown since it needs to decode each message using the schema registry.
-db_producer.compact_topics|`[]`|List of topics to compact before sending, i.e. only send the last message with any given key in a batch. This is an optimization which mirrors what Kafka itself will do with compaction turned on but only within a single batch. You can also specify `:all` to compact all topics.
-
-## Configuration Syntax
-
-Sample:
-
+| Config name | Default | Description |
+|--------------------------|--------------|---------------------------------------------------------------------------------------------------------------------------------------|
+| producer_class | nil | ActiveRecordProducer class to use for sending messages. |
+| producer_classes | [] | Array of ActiveRecordProducer classes to use for sending messages. You can use this instead of `producer_class`. |
+| mode | :time_based | Whether to use time-based polling or state-based polling. |
+| run_every | 60 | Amount of time in seconds to wait between runs. |
+| timestamp_column | `:updated_at` | Name of the column to query. Remember to add an index to this column! |
+| delay_time | 2 | Amount of time in seconds to wait before picking up records, to allow for transactions to finish. |
+| retries | 1 | The number of times to retry for a *non-Kafka* error. |
+| full_table | false | If set to true, do a full table dump to Kafka each run. Good for very small tables. Time-based only. |
+| start_from_beginning | true | If false, start from the current time instead of the beginning of time if this is the first time running the poller. Time-based only. |
+| state_column | nil | If set, this represents the DB column to use to update publishing status. State-based only. |
+| publish_timestamp_column | nil | If set, this represents the DB column to use to update when publishing is done. State-based only. |
+| published_state | nil | If set, the poller will update the `state_column` to this value when publishing succeeds. State-based only. |
+| failed_state | nil | If set, the poller will update the `state_column` to this value when publishing fails. State-based only. |
+| poller_class | nil | Poller subclass name to use for publishing to multiple kafka topics from a single poller. |
+
+## Karafka Routing
+
+The following are additional settings that can be added to the `topic` block in Karafka routes, or to `defaults` blocks.
+
+### Shared Settings
+
+| Config name | Default | Description |
+|--------------------|---------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| payload_log | :full | Determines how much data is logged per payload.`:full` - all keys and payloads are logged.`:keys` - only keys are logged.`:count` - only the total count of messages are logged. |
+| schema | nil | Name of the schema to use to encode data before producing. |
+| namespace | nil | Namespace of the schema to use when finding it locally. |
+| key_config | nil | Configuration hash for message keys. See [Kafka Message Keys](../README.md#kafka-message-keys). |
+| use_schema_classes | nil | Set to true or false to enable or disable using the producers schema classes. See [Generated Schema Classes](../README.md#generated-schema-classes). |
+
+### Consumer Settings
+
+| Config name | Default | Description |
+|--------------------------|-------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| each_message | false | If true, use `consume_message` for each message rather than `consume_batch` for the full batch. |
+| reraise_errors | false | Default behavior is to swallow uncaught exceptions and log to the metrics provider. Set this to true to instead raise all errors. Note that raising an error will ensure that the message cannot be processed - if there is a bad message which will always raise that error, your consumer will not be able to proceed past it and will be stuck forever until you fix your code. See also the fatal_error configuration. |
+| fatal_error | `proc { false }` | Block taking an exception, payload and metadata and returning true if this should be considered a fatal error and false otherwise. E.g. you can use this to always fail if the database is available. Not needed if reraise_errors is set to true. |
+| max_db_batch_size | nil | Maximum limit for batching database calls to reduce the load on the db. |
+| bulk_import_id_column | `:bulk_import_id` | Name of the column to use for multi-table imports. |
+| replace_associations | true | If false, append to associations in multi-table imports rather than replacing them. |
+| bulk_import_id_generator | nil | Block to determine the bulk_import_id generated during bulk consumption. If no block is specified the provided/default block from the consumers configuration will be used. |
+| save_associations_first |false|Whether to save associated records of primary class prior to upserting primary records. Foreign key of associated records are assigned to the record class prior to saving the record class
+
+### Defining Consumers
+
+An example consumer:
```ruby
-Deimos.configure do
- logger Logger.new(STDOUT)
- # Nested config field
- kafka.seed_brokers ['my.kafka.broker:9092']
-
- # Multiple nested config fields via block
- consumers do
- session_timeout 30
- offset_commit_interval 10
+Karafka::App.routes.draw do
+ defaults do
+ payload_log :keys
end
-
- # Define a new producer
- producer do
- class_name 'MyProducer'
- topic 'MyTopic'
+
+ topic 'MyTopic' do
+ namespace 'my-namespace'
+ consumer MyConsumer
schema 'MyTopicSchema'
key_config field: :id
- end
- # Define another new producer
- producer do
- class_name 'AnotherProducer'
- topic 'AnotherTopic'
- schema 'AnotherSchema'
- key_config plain: true
+ # If config.schema.path is app/schemas, assumes there is a file in
+ # app/schemas/my/namespace/MyTopicSchema.avsc
end
+end
+```
- # Define a consumer
- consumer do
- class_name 'MyConsumer'
- topic 'TopicToConsume'
- schema 'ConsumerSchema'
- key_config plain: true
- # include Phobos / RubyKafka configs
- start_from_beginning true
- heartbeat_interval 10
+### Producer Settings
+
+| Config name | Default | Description |
+|----------------|---------|------------------------------------------------------------------------------------------------------------|
+| producer_class | nil | Class of the producer to use for the current topic. |
+| disabled | false | Disable all actual message producing. Generally more useful to use the `disable_producers` method instead. |
+
+## Defining Producers
+
+You can define a new producer almost identically to consumers:
+```ruby
+Karafka::App.routes.draw do
+ defaults do
+ namespace 'my.namespace'
end
+ topic 'MyTopic' do
+ producer_class MyProducer
+ schema 'MyTopicSchema'
+ key_config field: :id
+ payload_log :count
+ # If config.schema.path is app/schemas, assumes there is a file in
+ # app/schemas/my/namespace/MyTopicSchema.avsc
+ end
end
```
-Note that all blocks are evaluated in the context of the configuration object.
-If you're calling this inside another class or method, you'll need to save
-things you need to reference into local variables before calling `configure`.
diff --git a/docs/UPGRADING.md b/docs/UPGRADING.md
index 083fea65..252af25e 100644
--- a/docs/UPGRADING.md
+++ b/docs/UPGRADING.md
@@ -1,5 +1,243 @@
# Upgrading Deimos
+## Upgrading to 2.x
+
+2.x is a major rewrite from 1.0. The underlying library has been changed from [Phobos](https://github.com/phobos/phobos) to [Karafka](https://karafka.io/). This change has given us an opportunity to fix some issues and deprecated code paths from version 1.0 as well as provide much more functionality by integrating more fully with the Karafka ecosystem.
+
+For a deeper dive into the internal changes, please see [...]().
+
+There are a number of breaking changes. We provide a `v2` generator to attempt to auto-fix many of these breaking changes automatically. To run the generator:
+
+ KARAFKA_BOOT_FILE=false rails g deimos:v2
+
+### Running Deimos
+
+Instead of running `rake deimos:start`, you now run your Kafka consumers the same way any Karafka consumers are run: `karafka server`.
+
+### Configuration
+
+In V1, Deimos configuration was all done in a single `Deimos.configure` block, including Kafka configs, consumers and producers:
+
+```ruby
+Deimos.configure do
+ producers.schema_namespace 'com.my-namespace'
+ kafka.seed_brokers ['my-broker:9092']
+
+ consumer do
+ class_name 'MyConsumer'
+ topic 'MyTopic'
+ session_timeout 30
+ schema 'MySchema'
+ key_config field: :id
+ namespace 'com.my-namespace'
+ end
+
+ producer do
+ class_name 'MyProducer'
+ topic 'MyTopic2'
+ schema 'MySchema2'
+ key_config none: true
+ end
+end
+```
+
+In V2, the `Deimos.configure` block now only takes Deimos-specific settings, and is **not** used to configure producers and consumers. Kafka settings now go in the Karafka `kafka` setting method, and producers and consumers use Karafka [routing](https://karafka.io/docs/Routing/). There are Deimos-specific extensions to routing to apply to consumers and producers, either via a `defaults` block (applying to all consumers and producers) or in individual `topic` blocks:
+
+```ruby
+Deimos.configure do
+ producers.schema_namespace 'com.my-namespace'
+end
+
+class KarafkaApp < Karafka::App
+ setup do |config|
+ config.kafka = {
+ "bootstrap.servers": "my-broker:9092"
+ }
+ end
+
+ routes.draw do
+ defaults do
+ namespace "com.my-namespace"
+ end
+
+ topic "MyTopic" do
+ # Karafka settings
+ consumer MyConsumer
+ kafka({"session.timeout.ms": 30_000})
+ # Deimos settings
+ schema "MySchema" # the res
+ key_config({field: id})
+ end
+
+ topic "MyTopic2" do
+ # these are all Deimos settings since Karafka doesn't actually do per-topic producer configs
+ producer_class MyProducer
+ schema 'MySchema2'
+ key_config none: true
+ end
+ end
+end
+```
+
+This configuration must be in a file called `karafka.rb` at the root of your application. The V2 generator will generate this file for you. Without the generator, if you have this file and start up your app with the old `Deimos.configure` code, you will get notifications of the correct places to put these settings.
+
+
+### Removed deprecations
+
+The following were deprecated in version 1.x and are removed in 2.0.
+
+* The `kafka_producer` method for KafkaSource is no longer supported. Please use `kafka_producers`. (This is not addressed by the V2 generator.)
+
+```ruby
+# before:
+class MyRecord < ApplicationRecord
+ def kafka_producer
+ MyProducer
+ end
+end
+
+# after:
+class MyRecord < ApplicationRecord
+ def kafka_producers
+ [MyProducer]
+ end
+end
+```
+
+* The `record_attributes` method for ActiveRecordConsumer now must take two parameters, not one. (The V2 generator can fix this.)
+
+```ruby
+# before:
+class MyConsumer < Deimos::ActiveRecordConsumer
+ def record_attributes(payload)
+ # ...
+ end
+end
+
+# after:
+class MyConsumer < Deimos::ActiveRecordConsumer
+ def record_attributes(payload, key)
+ # ...
+ end
+end
+```
+
+* The `BatchConsumer` class has been removed. Please use the `Consumer` class.
+* You can no longer configure your application using a `phobos.yml` file. The V2 generator will not be able to work on apps using this approach.
+* Removed `test_consume_invalid_message` and `test_consume_batch_invalid_message` test helpers. These did not serve a useful purpose.
+* The following deprecated testing functions have been removed: `stub_producers_and_consumers!`, `stub_producer`, `stub_consumer`, `stub_batch_consumer`. These have not done anything in a long time.
+
+### Major breaking changes
+* Since Karafka only supports Ruby >= 3.0, that means Deimos also only supports those versions.
+* Deimos no longer supports a separate logger from Karafka. When you configure a Karafka logger, Deimos will use that logger for all its logging. (Deimos logs will be prefixed with a `[Deimos]` tag.)
+* The `:db` backend has been renamed to `:outbox`. All associated classes (like `DbProducer`) have likewise been renamed. The Rake task has also been renamed to `rake deimos:outbox`.
+* The `SchemaControllerMixin` has been removed as there was no serious usage for it.
+* `InlineConsumer` has been removed - Karafka Pro has an [Iterator API](https://karafka.io/docs/Pro-Iterator-API/) that does the same thing. There also has been no evidence that it was used (and was probably pretty buggy).
+* The `:test` backend has been removed and the `Deimos::TestHelpers` module is now largely powered by [karafka-testing](https://github.com/karafka/karafka-testing/). This means that you can no longer use `Deimos::Backends::Test.sent_messages` - you need to use `Deimos::TestHelpers.sent_messages`. (The V2 generator should fix this.)
+* Individual consumer and producer settings now live within Karafka route configuration. This means you can no longer call e.g. `consumer.schema` to retrieve this information, as settings are no longer stored directly on the consumer and producer objects (it is still available, but via different methods).
+* Consumers should no longer define a `consume` method, as the semantics have changed with Karafka. Instead, you can define a `consume_message` or `consume_batch` method. Both of these methods now take Karafka `Message` objects instead of hashes. The V2 generator can handle translating this for you, but if you create new consumers, you should take advantage of the Karafka functionality and use it first-class.
+* Phobos `delivery_method` is no longer relevant. Instead, specify an `each_message` setting for your consumer. If set to true, you should define a `consume_message` method. Otherwise, you should define a `consume_batch` method. (Note that this is the reverse from the previous default, which assumed `delivery_method: message`.) The V2 generator will create the correct setting for each consumer.
+
+```ruby
+# before:
+class MyConsumer < Deimos::Consumer
+ def consume(payload, metadata)
+ # payload and metadata are both hashes
+ end
+
+ # OR with delivery_method: inline_batch
+ def batch_consume(payloads, metadata)
+ # payloads is an array of hashes, metadata is a hash
+ end
+end
+
+# now:
+class MyConsumer < Deimos::Consumer
+ def consume_batch
+ payloads = messages.payloads # messages is an instance method and `payloads` will return the decoded hashes
+ end
+
+ # OR with batch(false)
+ def consume_message(message)
+ # message is a Karafka Message object
+ payload = message.payload
+ key = message.key # etc.
+ end
+end
+```
+
+### Metrics
+
+The following metrics have been **removed** in favor of Karafka's more robust [DataDog metrics](https://karafka.io/docs/Monitoring-and-Logging/#datadog-and-statsd-integration) and WaterDrop's [DataDog metrics](https://karafka.io/docs/WaterDrop-Monitoring-and-Logging/#datadog-and-statsd-integration):
+* `consumer_lag` (use `consumer.lags`)
+* `handler` (use `consumer.consumed.time_taken`)
+* `publish` (use `produced_sync` and `produced_async`)
+* `publish_error` (use `deliver.errors`)
+
+You will need to manually add the DataDog MetricsListener as shown in the above pages.
+
+The following metrics have been **renamed**:
+
+* `db_producer.insert` -> `outbox.insert`
+* `db_producer.process` -> `outbox.process`
+
+### Instrumentation
+
+Deimos's own instrumentation layer has been removed in favor of Karafka's. You can still subscribe to Deimos notifications - you simply do it via Karafka's monitor instead of Deimos's.
+
+```ruby
+# before:
+Deimos.subscribe('encode_messages') do |event|
+ # ...
+end
+
+# after:
+Karafka.monitor.subscribe('deimos.encode_messages') do |event|
+ # ...
+end
+```
+
+Note that Karafka's monitors do not support the legacy "splatted" subscribe:
+```ruby
+Deimos.subscribe("event") do |*args|
+ payload = ActiveSupport::Notifications::Event.new(*args).payload
+end
+```
+
+The following instrumentation events have been **removed** in favor of Karafka's [events](https://karafka.io/docs/Monitoring-and-Logging/#subscribing-to-the-instrumentation-events):
+
+* `produce_error` (use `error.occurred`)
+
+The following events have been **renamed**:
+* `encode_messages` -> `deimos.encode_message` (**note that only one message is instrumented at a time now**)
+* `db_producer.produce` -> `deimos.outbox.produce`
+* `batch_consumption.valid_records` -> `deimos.batch_consumption.valid_records`
+* `batch_consumption.invalid_records` -> `deimos.batch_consumption.invalid_records`
+
+### Additional breaking changes
+* `key_config` now defaults to `{none: true}` instead of erroring out if not set.
+* `reraise_errors` now defaults to true if the Rails env is set to `test`, and false otherwise.
+* `fatal_error?` now receives a Karafka `messages` object instead of a payload hash or array of hashes.
+* `watched_attributes` has been moved from the corresponding ActiveRecord class to the ActiveRecordProducer class. The object being watched is passed into the method.
+* Removed `TestHelpers.full_integration_test!` and `kafka_test!` as Karafka does not currently support these use cases. If we need them back, we will need to put in changes to the testing library to support them.
+* `test_consume_message` and `test_consume_batch` used to not fully validate schemas when using the `:avro_validation` backend. Now these are fully validated, which may cause test errors when upgrading.
+
+### New functionality
+
+* When setting up a Datadog metrics client, you can pass `:karafka_namespace`, `:karafka_distribution_mode`, or `:rd_kafka_metrics` tags to specify the Karafka settings for Datadog metrics.
+- The `payload_log` setting now works for consumers as well as producers, as it is now a topic setting.
+- You can publish messages **without a Deimos Producer class**. Karafka producers take a hash with `:message`, `:topic`, `:key`, `:headers` and `:partition_key` keys. As long as the topic is configured in `karafka.rb`, you don't need a special class to send the message. You can simply call `Karafka.producer.produce()`.
+- The only features that are now available on the bare Producer (as opposed to ActiveRecordProducer) class are:
+ - Outbox backend
+ - Instance method to determine partition key (rather than passing it in)
+ - Using `Deimos.disable_producers`
+- If you need these features, you must continue using a `Deimos::Producer`.
+- You can now call `.produce(messages)` directly on a `Deimos::Producer` which allows for use of these features while still passing a Karafka message hash. This removes the need to add a `payload_key` key into your payload. This is now the recommended method to use in a Deimos Producer.
+
+### New deprecations
+* For testing, you no longer have to call `unit_test!` to get the right settings. It is handled automatically by Karafka. The only thing this method now does is set the schema backend to `:avro_validation`, and you can do that in a single line.
+* The `skip_expectation` and `call_original` arguments to `test_consume_message` and `test_consume_batch` have been deprecated and no longer need to be provided. The assumption is that `call_original` is always true.
+
## Upgrading from < 1.5.0 to >= 1.5.0
If you are using Confluent's schema registry to Avro-encode your
diff --git a/lib/deimos.rb b/lib/deimos.rb
index e278b622..ec4c1bf7 100644
--- a/lib/deimos.rb
+++ b/lib/deimos.rb
@@ -1,17 +1,15 @@
# frozen_string_literal: true
require 'active_support'
+require 'karafka'
-require 'phobos'
require 'deimos/version'
+require 'deimos/logging'
require 'deimos/config/configuration'
require 'deimos/producer'
require 'deimos/active_record_producer'
require 'deimos/active_record_consumer'
require 'deimos/consumer'
-require 'deimos/batch_consumer'
-require 'deimos/instrumentation'
-require 'deimos/utils/lag_reporter'
require 'deimos/backends/base'
require 'deimos/backends/kafka'
@@ -23,27 +21,38 @@
require 'deimos/schema_class/enum'
require 'deimos/schema_class/record'
-require 'deimos/monkey_patches/phobos_cli'
+require 'deimos/ext/schema_route'
+require 'deimos/ext/consumer_route'
+require 'deimos/ext/producer_route'
+require 'deimos/ext/producer_middleware'
+require 'deimos/ext/routing_defaults'
require 'deimos/railtie' if defined?(Rails)
-require 'deimos/utils/schema_controller_mixin' if defined?(ActionController)
if defined?(ActiveRecord)
require 'deimos/kafka_source'
require 'deimos/kafka_topic_info'
- require 'deimos/backends/db'
+ require 'deimos/backends/outbox'
require 'sigurd'
- require 'deimos/utils/db_producer'
+ require 'deimos/utils/outbox_producer'
require 'deimos/utils/db_poller'
end
-require 'deimos/utils/inline_consumer'
require 'yaml'
require 'erb'
# Parent module.
module Deimos
+ EVENT_TYPES = %w(
+ deimos.ar_consumer.consume_batch
+ deimos.encode_message
+ deimos.batch_consumption.invalid_records
+ deimos.batch_consumption.valid_records
+ deimos.outbox.produce
+ )
+
class << self
+
# @return [Class]
def schema_backend_class
backend = Deimos.config.schema.backend.to_s
@@ -57,7 +66,7 @@ def schema_backend_class
# @param namespace [String]
# @return [Deimos::SchemaBackends::Base]
def schema_backend(schema:, namespace:)
- if Utils::SchemaClass.use?(config.to_h)
+ if config.schema.use_schema_classes
# Initialize an instance of the provided schema
# in the event the schema class is an override, the inherited
# schema and namespace will be applied
@@ -91,13 +100,26 @@ def decode(schema:, namespace:, payload:)
self.schema_backend(schema: schema, namespace: namespace).decode(payload)
end
+ # @param message [Hash] a Karafka message with keys :payload, :key and :topic
+ def decode_message(message)
+ topic = message[:topic]
+ if Deimos.config.producers.topic_prefix
+ topic = topic.sub(Deimos.config.producers.topic_prefix, '')
+ end
+ config = karafka_config_for(topic: topic)
+ message[:payload] = config.deserializers[:payload].decode_message_hash(message[:payload])
+ if message[:key] && config.deserializers[:key].respond_to?(:decode_message_hash)
+ message[:key] = config.deserializers[:key].decode_message_hash(message[:key])
+ end
+ end
+
# Start the DB producers to send Kafka messages.
# @param thread_count [Integer] the number of threads to start.
# @return [void]
- def start_db_backend!(thread_count: 1)
+ def start_outbox_backend!(thread_count: 1)
Sigurd.exit_on_signal = true
- if self.config.producers.backend != :db
- raise('Publish backend is not set to :db, exiting')
+ if self.config.producers.backend != :outbox
+ raise('Publish backend is not set to :outbox, exiting')
end
if thread_count.nil? || thread_count.zero?
@@ -105,25 +127,58 @@ def start_db_backend!(thread_count: 1)
end
producers = (1..thread_count).map do
- Deimos::Utils::DbProducer.
- new(self.config.db_producer.logger || self.config.logger)
+ Deimos::Utils::OutboxProducer.
+ new(self.config.outbox.logger || Karafka.logger)
end
executor = Sigurd::Executor.new(producers,
sleep_seconds: 5,
- logger: self.config.logger)
+ logger: Karafka.logger)
signal_handler = Sigurd::SignalHandler.new(executor)
signal_handler.run!
end
- end
-end
-at_exit do
- begin
- Deimos::Backends::KafkaAsync.shutdown_producer
- Deimos::Backends::Kafka.shutdown_producer
- rescue StandardError => e
- Deimos.config.logger.error(
- "Error closing producer on shutdown: #{e.message} #{e.backtrace.join("\n")}"
- )
+ def setup_karafka
+ Karafka.producer.middleware.append(Deimos::ProducerMiddleware)
+ # for multiple setup calls
+ Karafka.producer.config.kafka =
+ Karafka::Setup::AttributesMap.producer(Karafka::Setup::Config.config.kafka.dup)
+ EVENT_TYPES.each { |type| Karafka.monitor.notifications_bus.register_event(type) }
+
+ Karafka.producer.monitor.subscribe('error.occurred') do |event|
+ if event.payload.key?(:messages)
+ topic = event[:messages].first[:topic]
+ config = Deimos.karafka_config_for(topic: topic)
+ message = Deimos::Logging.messages_log_text(config&.payload_log, event[:messages])
+ Karafka.logger.error("Error producing messages: #{event[:error].message} #{message.to_json}")
+ end
+ end
+ end
+
+ # @return [Array] Decoded payloads
- # @param metadata [Hash] Information about batch, including keys.
# @return [void]
- def consume_batch(payloads, metadata)
- messages = payloads.
- zip(metadata[:keys]).
- map { |p, k| Deimos::Message.new(p, nil, key: k) }
+ def consume_batch
+ deimos_messages = messages.map { |p| Deimos::Message.new(p.payload, key: p.key) }
- tag = metadata[:topic]
+ tag = topic.name
Deimos.config.tracer.active_span.set_tag('topic', tag)
- Deimos.instrument('ar_consumer.consume_batch', tag) do
- if @compacted || self.class.config[:no_keys]
- update_database(compact_messages(messages))
+ Karafka.monitor.instrument('deimos.ar_consumer.consume_batch', {topic: tag}) do
+ if @compacted && deimos_messages.map(&:key).compact.any?
+ update_database(compact_messages(deimos_messages))
else
- uncompacted_update(messages)
+ uncompacted_update(deimos_messages)
end
end
end
@@ -67,12 +66,12 @@ def columns(_klass)
def record_key(key)
if key.nil?
{}
- elsif key.is_a?(Hash)
- @key_converter.convert(key)
- elsif self.class.config[:key_field].nil?
+ elsif key.is_a?(Hash) || key.is_a?(SchemaClass::Record)
+ self.key_converter.convert(key)
+ elsif self.topic.key_config[:field].nil?
{ @klass.primary_key => key }
else
- { self.class.config[:key_field] => key }
+ { self.topic.key_config[:field].to_s => key }
end
end
@@ -154,10 +153,10 @@ def upsert_records(messages)
record_list = build_records(messages)
invalid = filter_records(record_list)
if invalid.any?
- ActiveSupport::Notifications.instrument('batch_consumption.invalid_records', {
- records: invalid,
- consumer: self.class
- })
+ Karafka.monitor.instrument('deimos.batch_consumption.invalid_records', {
+ records: invalid,
+ consumer: self.class
+ })
end
return if record_list.empty?
@@ -167,14 +166,14 @@ def upsert_records(messages)
updater = MassUpdater.new(@klass,
key_col_proc: key_col_proc,
col_proc: col_proc,
- replace_associations: self.class.replace_associations,
- bulk_import_id_generator: self.class.bulk_import_id_generator,
- save_associations_first: self.class.save_associations_first,
- bulk_import_id_column: self.class.bulk_import_id_column)
- ActiveSupport::Notifications.instrument('batch_consumption.valid_records', {
- records: updater.mass_update(record_list),
- consumer: self.class
- })
+ replace_associations: self.replace_associations,
+ bulk_import_id_generator: self.bulk_import_id_generator,
+ save_associations_first: self.save_associations_first,
+ bulk_import_id_column: self.bulk_import_id_column)
+ Karafka.monitor.instrument('deimos.batch_consumption.valid_records', {
+ records: updater.mass_update(record_list),
+ consumer: self.class
+ })
end
# @param record_list [BatchRecordList]
@@ -205,14 +204,14 @@ def build_records(messages)
attrs = attrs.merge(record_key(m.key))
next unless attrs
- col = if @klass.column_names.include?(self.class.bulk_import_id_column.to_s)
- self.class.bulk_import_id_column
+ col = if @klass.column_names.include?(self.bulk_import_id_column.to_s)
+ self.bulk_import_id_column
end
BatchRecord.new(klass: @klass,
attributes: attrs,
bulk_import_column: col,
- bulk_import_id_generator: self.class.bulk_import_id_generator)
+ bulk_import_id_generator: self.bulk_import_id_generator)
end
BatchRecordList.new(records.compact)
end
diff --git a/lib/deimos/active_record_consume/message_consumption.rb b/lib/deimos/active_record_consume/message_consumption.rb
index 29820193..f01b297f 100644
--- a/lib/deimos/active_record_consume/message_consumption.rb
+++ b/lib/deimos/active_record_consume/message_consumption.rb
@@ -1,10 +1,13 @@
# frozen_string_literal: true
+require 'deimos/consume/message_consumption'
+
module Deimos
module ActiveRecordConsume
# Methods for consuming individual messages and saving them to the database
# as ActiveRecord instances.
module MessageConsumption
+ include Deimos::Consume::MessageConsumption
# Find the record specified by the given payload and key.
# Default is to use the primary key column and the value of the first
# field in the key.
@@ -26,38 +29,29 @@ def assign_key(record, _payload, key)
record[record.class.primary_key] = key
end
- # @param payload [Hash,Deimos::SchemaClass::Record] Decoded payloads
- # @param metadata [Hash] Information about batch, including keys.
- # @return [void]
- def consume(payload, metadata)
- unless self.process_message?(payload)
- Deimos.config.logger.debug(
- message: 'Skipping processing of message',
- payload: payload,
- metadata: metadata
- )
+ # @param message [Karafka::Messages::Message]
+ def consume_message(message)
+ unless self.process_message?(message)
+ Deimos::Logging.log_debug(
+ message: 'Skipping processing of message',
+ payload: message.payload.to_h,
+ metadata: Deimos::Logging.metadata_log_text(message.metadata)
+ )
return
end
- key = metadata.with_indifferent_access[:key]
klass = self.class.config[:record_class]
- record = fetch_record(klass, (payload || {}).with_indifferent_access, key)
- if payload.nil?
+ record = fetch_record(klass, message.payload.to_h.with_indifferent_access, message.key)
+ if message.payload.nil?
destroy_record(record)
return
end
if record.blank?
record = klass.new
- assign_key(record, payload, key)
+ assign_key(record, message.payload, message.key)
end
- # for backwards compatibility
- # TODO next major release we should deprecate this
- attrs = if self.method(:record_attributes).parameters.size == 2
- record_attributes(payload.with_indifferent_access, key)
- else
- record_attributes(payload.with_indifferent_access)
- end
+ attrs = record_attributes((message.payload || {}).with_indifferent_access, message.key)
# don't use attributes= - bypass Rails < 5 attr_protected
attrs.each do |k, v|
record.send("#{k}=", v)
diff --git a/lib/deimos/active_record_consumer.rb b/lib/deimos/active_record_consumer.rb
index b45e2d99..63bfd72b 100644
--- a/lib/deimos/active_record_consumer.rb
+++ b/lib/deimos/active_record_consumer.rb
@@ -30,26 +30,6 @@ def record_class(klass)
config[:record_class] = klass
end
- # @return [String,nil]
- def bulk_import_id_column
- config[:bulk_import_id_column]
- end
-
- # @return [Proc]
- def bulk_import_id_generator
- config[:bulk_import_id_generator]
- end
-
- # @return [Boolean]
- def replace_associations
- config[:replace_associations]
- end
-
- # @return [Boolean]
- def save_associations_first
- config[:save_associations_first]
- end
-
# @param val [Boolean] Turn pre-compaction of the batch on or off. If true,
# only the last message for each unique key in a batch is processed.
# @return [void]
@@ -62,18 +42,48 @@ def compacted(val)
def max_db_batch_size(limit)
config[:max_db_batch_size] = limit
end
+
+ end
+
+ # @return [Boolean]
+ def replace_associations
+ self.topic.replace_associations
+ end
+
+ # @return [String,nil]
+ def bulk_import_id_column
+ self.topic.bulk_import_id_column
+ end
+
+ # @return [Proc]
+ def bulk_import_id_generator
+ topic.bulk_import_id_generator
+ end
+
+ # @return [Boolean]
+ def save_associations_first
+ topic.save_associations_first
+ end
+
+ def key_decoder
+ self.topic.serializers[:key]&.backend
end
# Setup
def initialize
@klass = self.class.config[:record_class]
- @converter = ActiveRecordConsume::SchemaModelConverter.new(self.class.decoder, @klass)
+ @compacted = self.class.config[:compacted] != false
+ end
- if self.class.config[:key_schema]
- @key_converter = ActiveRecordConsume::SchemaModelConverter.new(self.class.key_decoder, @klass)
- end
+ def converter
+ decoder = self.topic.deserializers[:payload].backend
+ @converter ||= ActiveRecordConsume::SchemaModelConverter.new(decoder, @klass)
+ end
- @compacted = self.class.config[:compacted] != false
+ def key_converter
+ decoder = self.topic.deserializers[:key]&.backend
+ return nil if decoder.nil?
+ @key_converter ||= ActiveRecordConsume::SchemaModelConverter.new(decoder, @klass)
end
# Override this method (with `super`) if you want to add/change the default
@@ -82,7 +92,7 @@ def initialize
# @param _key [String]
# @return [Hash]
def record_attributes(payload, _key=nil)
- @converter.convert(payload)
+ self.converter.convert(payload)
end
# Override this message to conditionally save records
diff --git a/lib/deimos/active_record_producer.rb b/lib/deimos/active_record_producer.rb
index ea285c12..3aee25ed 100644
--- a/lib/deimos/active_record_producer.rb
+++ b/lib/deimos/active_record_producer.rb
@@ -18,9 +18,11 @@ class << self
# a record object, refetch the record to pass into the `generate_payload`
# method.
# @return [void]
- def record_class(klass, refetch: true)
- config[:record_class] = klass
- config[:refetch_record] = refetch
+ def record_class(klass=nil, refetch: true)
+ return @record_class if klass.nil?
+
+ @record_class = klass
+ @refetch_record = refetch
end
# @param record [ActiveRecord::Base]
@@ -34,14 +36,16 @@ def send_event(record, force_send: false)
# @param force_send [Boolean]
# @return [void]
def send_events(records, force_send: false)
- primary_key = config[:record_class]&.primary_key
+ return if Deimos.producers_disabled?(self)
+
+ primary_key = @record_class&.primary_key
messages = records.map do |record|
if record.respond_to?(:attributes)
attrs = record.attributes.with_indifferent_access
else
attrs = record.with_indifferent_access
- if config[:refetch_record] && attrs[primary_key]
- record = config[:record_class].find(attrs[primary_key])
+ if @refetch_record && attrs[primary_key]
+ record = @record_class.find(attrs[primary_key])
end
end
generate_payload(attrs, record).with_indifferent_access
@@ -50,6 +54,15 @@ def send_events(records, force_send: false)
self.post_process(records)
end
+ def config
+ Deimos.karafka_configs.find { |t| t.producer_classes.include?(self) }
+ end
+
+ def encoder
+ raise "No schema or namespace configured for #{self.name}" if config.nil?
+ config.deserializers[:payload].backend
+ end
+
# Generate the payload, given a list of attributes or a record..
# Can be overridden or added to by subclasses.
# @param attributes [Hash]
@@ -62,9 +75,9 @@ def generate_payload(attributes, _record)
payload.delete_if do |k, _|
k.to_sym != :payload_key && !fields.map(&:name).include?(k)
end
- return payload unless Utils::SchemaClass.use?(config.to_h)
+ return payload unless self.config.use_schema_classes
- Utils::SchemaClass.instance(payload, config[:schema], config[:namespace])
+ Utils::SchemaClass.instance(payload, encoder.schema, encoder.namespace)
end
# Query to use when polling the database with the DbPoller. Add
@@ -76,7 +89,7 @@ def generate_payload(attributes, _record)
# than this value).
# @return [ActiveRecord::Relation]
def poll_query(time_from:, time_to:, column_name: :updated_at, min_id:)
- klass = config[:record_class]
+ klass = @record_class
table = ActiveRecord::Base.connection.quote_table_name(klass.table_name)
column = ActiveRecord::Base.connection.quote_column_name(column_name)
primary = ActiveRecord::Base.connection.quote_column_name(klass.primary_key)
@@ -95,6 +108,14 @@ def poll_query(time_from:, time_to:, column_name: :updated_at, min_id:)
def post_process(_records)
end
+ # Override this in active record producers to add
+ # non-schema fields to check for updates
+ # @param _record [ActiveRecord::Base]
+ # @return [Array] fields to check for updates
+ def watched_attributes(_record)
+ self.encoder.schema_fields.map(&:name)
+ end
+
end
end
end
diff --git a/lib/deimos/backends/base.rb b/lib/deimos/backends/base.rb
index 00f9da6b..51de87c0 100644
--- a/lib/deimos/backends/base.rb
+++ b/lib/deimos/backends/base.rb
@@ -6,54 +6,23 @@ module Backends
class Base
class << self
# @param producer_class [Class]
- # @param messages [Array]
+ # @param messages [Array]
# @return [void]
def publish(producer_class:, messages:)
- Deimos.config.logger.info(log_message(messages))
execute(producer_class: producer_class, messages: messages)
+ message = ::Deimos::Logging.messages_log_text(producer_class.karafka_config.payload_log, messages)
+ Deimos::Logging.log_info({message: 'Publishing Messages:'}.merge(message))
end
# @param producer_class [Class]
# @param messages [Array]
# @return [void]
def execute(producer_class:, messages:)
- raise NotImplementedError
+ raise MissingImplementationError
end
private
- def log_message(messages)
- log_message = {
- message: 'Publishing messages',
- topic: messages.first&.topic
- }
-
- case Deimos.config.payload_log
- when :keys
- log_message.merge!(
- payload_keys: messages.map(&:key)
- )
- when :count
- log_message.merge!(
- payloads_count: messages.count
- )
- when :headers
- log_message.merge!(
- payload_headers: messages.map(&:headers)
- )
- else
- log_message.merge!(
- payloads: messages.map do |message|
- {
- payload: message.payload,
- key: message.key
- }
- end
- )
- end
-
- log_message
- end
end
end
end
diff --git a/lib/deimos/backends/kafka.rb b/lib/deimos/backends/kafka.rb
index e8f329cb..90793889 100644
--- a/lib/deimos/backends/kafka.rb
+++ b/lib/deimos/backends/kafka.rb
@@ -4,30 +4,14 @@ module Deimos
module Backends
# Default backend to produce to Kafka.
class Kafka < Base
- include Phobos::Producer
-
- # Shut down the producer if necessary.
- # @return [void]
- def self.shutdown_producer
- producer.sync_producer_shutdown if producer.respond_to?(:sync_producer_shutdown)
- producer.kafka_client&.close
- end
-
# :nodoc:
def self.execute(producer_class:, messages:)
- Deimos.instrument(
- 'produce',
- producer: producer_class,
- topic: producer_class.topic,
- payloads: messages.map(&:payload)
- ) do
- producer.publish_list(messages.map(&:encoded_hash))
- Deimos.config.metrics&.increment(
- 'publish',
- tags: %W(status:success topic:#{producer_class.topic}),
- by: messages.size
- )
- end
+ Karafka.producer.produce_many_sync(messages)
+ Deimos.config.metrics&.increment(
+ 'publish',
+ tags: %W(status:success topic:#{messages.first[:topic]}),
+ by: messages.size
+ )
end
end
end
diff --git a/lib/deimos/backends/kafka_async.rb b/lib/deimos/backends/kafka_async.rb
index ae0c345b..1bd558ab 100644
--- a/lib/deimos/backends/kafka_async.rb
+++ b/lib/deimos/backends/kafka_async.rb
@@ -4,30 +4,14 @@ module Deimos
module Backends
# Backend which produces to Kafka via an async producer.
class KafkaAsync < Base
- include Phobos::Producer
-
- # Shut down the producer cleanly.
- # @return [void]
- def self.shutdown_producer
- producer.async_producer_shutdown
- producer.kafka_client&.close
- end
-
# :nodoc:
def self.execute(producer_class:, messages:)
- Deimos.instrument(
- 'produce',
- producer: producer_class,
- topic: producer_class.topic,
- payloads: messages.map(&:payload)
- ) do
- producer.async_publish_list(messages.map(&:encoded_hash))
- Deimos.config.metrics&.increment(
- 'publish',
- tags: %W(status:success topic:#{producer_class.topic}),
- by: messages.size
- )
- end
+ Karafka.producer.produce_many_async(messages)
+ Deimos.config.metrics&.increment(
+ 'publish',
+ tags: %W(status:success topic:#{messages.first[:topic]}),
+ by: messages.size
+ )
end
end
end
diff --git a/lib/deimos/backends/db.rb b/lib/deimos/backends/outbox.rb
similarity index 65%
rename from lib/deimos/backends/db.rb
rename to lib/deimos/backends/outbox.rb
index 2561a5bf..793b370e 100644
--- a/lib/deimos/backends/db.rb
+++ b/lib/deimos/backends/outbox.rb
@@ -6,22 +6,23 @@ module Deimos
module Backends
# Backend which saves messages to the database instead of immediately
# sending them.
- class Db < Base
+ class Outbox < Base
class << self
# :nodoc:
def execute(producer_class:, messages:)
records = messages.map do |m|
+ Deimos::ProducerMiddleware.call(m)
message = Deimos::KafkaMessage.new(
- message: m.encoded_payload ? m.encoded_payload.to_s.b : nil,
- topic: m.topic,
+ message: m[:payload] ? m[:payload].to_s.b : nil,
+ topic: m[:topic],
partition_key: partition_key_for(m)
)
- message.key = m.encoded_key.to_s.b unless producer_class.config[:no_keys]
+ message.key = m[:key].to_s.b if m[:key]
message
end
Deimos::KafkaMessage.import(records)
Deimos.config.metrics&.increment(
- 'db_producer.insert',
+ 'outbox.insert',
tags: %W(topic:#{producer_class.topic}),
by: records.size
)
@@ -30,10 +31,13 @@ def execute(producer_class:, messages:)
# @param message [Deimos::Message]
# @return [String] the partition key to use for this message
def partition_key_for(message)
- return message.partition_key if message.partition_key.present?
- return message.key unless message.key.is_a?(Hash)
-
- message.key.to_yaml
+ if message[:partition_key].present?
+ message[:partition_key]
+ elsif message[:key].present?
+ message[:key].to_s.b
+ else
+ nil
+ end
end
end
end
diff --git a/lib/deimos/batch_consumer.rb b/lib/deimos/batch_consumer.rb
deleted file mode 100644
index e9f3bb30..00000000
--- a/lib/deimos/batch_consumer.rb
+++ /dev/null
@@ -1,7 +0,0 @@
-# frozen_string_literal: true
-
-module Deimos
- # @deprecated Use Deimos::Consumer with `delivery: inline_batch` configured instead
- class BatchConsumer < Consumer
- end
-end
diff --git a/lib/deimos/config/configuration.rb b/lib/deimos/config/configuration.rb
index ac73e6b0..feb53d3d 100644
--- a/lib/deimos/config/configuration.rb
+++ b/lib/deimos/config/configuration.rb
@@ -1,332 +1,121 @@
# frozen_string_literal: true
require 'fig_tree'
-require_relative 'phobos_config'
require_relative '../metrics/mock'
require_relative '../tracing/mock'
-require 'active_support/core_ext/numeric'
+require 'active_support/core_ext/object'
# :nodoc:
module Deimos # rubocop:disable Metrics/ModuleLength
include FigTree
- # :nodoc:
- class FigTree::ConfigStruct
- include Deimos::PhobosConfig
- end
-
# :nodoc:
after_configure do
- Phobos.configure(self.config.phobos_config)
if self.config.schema.use_schema_classes
load_generated_schema_classes
end
- self.config.producer_objects.each do |producer|
- configure_producer_or_consumer(producer)
- end
- self.config.consumer_objects.each do |consumer|
- configure_producer_or_consumer(consumer)
- end
- validate_consumers
- validate_db_backend if self.config.producers.backend == :db
+ generate_key_schemas
+ validate_outbox_backend if self.config.producers.backend == :outbox
end
- # Loads generated classes
- # @return [void]
- def self.load_generated_schema_classes
- if Deimos.config.schema.generated_class_path.nil?
- raise 'Cannot use schema classes without schema.generated_class_path. Please provide a directory.'
- end
+ class << self
- Dir["./#{Deimos.config.schema.generated_class_path}/**/*.rb"].sort.each { |f| require f }
- rescue LoadError
- raise 'Cannot load schema classes. Please regenerate classes with rake deimos:generate_schema_models.'
- end
+ def generate_key_schemas
+ Deimos.karafka_configs.each do |config|
+ transcoder = config.deserializers[:key]
- # Ensure everything is set up correctly for the DB backend.
- # @!visibility private
- def self.validate_db_backend
- begin
- require 'activerecord-import'
- rescue LoadError
- raise 'Cannot set producers.backend to :db without activerecord-import! Please add it to your Gemfile.'
- end
- if Deimos.config.producers.required_acks != :all
- raise 'Cannot set producers.backend to :db unless producers.required_acks is set to ":all"!'
+ if transcoder.respond_to?(:key_field) && transcoder.key_field
+ transcoder.backend = Deimos.schema_backend(schema: config.schema,
+ namespace: config.namespace)
+ transcoder.backend.generate_key_schema(transcoder.key_field)
+ end
+ end
end
- end
-
- # Validate that consumers are configured correctly, including their
- # delivery mode.
- # @!visibility private
- def self.validate_consumers
- Phobos.config.listeners.each do |listener|
- handler_class = listener.handler.constantize
- delivery = listener.delivery
- next unless handler_class < Deimos::Consumer
-
- # Validate that each consumer implements the correct method for its type
- if delivery == 'inline_batch'
- if handler_class.instance_method(:consume_batch).owner == Deimos::Consume::BatchConsumption
- raise "BatchConsumer #{listener.handler} does not implement `consume_batch`"
- end
- elsif handler_class.instance_method(:consume).owner == Deimos::Consume::MessageConsumption
- raise "Non-batch Consumer #{listener.handler} does not implement `consume`"
+ # Loads generated classes
+ # @return [void]
+ def load_generated_schema_classes
+ if Deimos.config.schema.generated_class_path.nil?
+ raise 'Cannot use schema classes without schema.generated_class_path. Please provide a directory.'
end
+
+ Dir["./#{Deimos.config.schema.generated_class_path}/**/*.rb"].sort.each { |f| require f }
+ rescue LoadError
+ raise 'Cannot load schema classes. Please regenerate classes with rake deimos:generate_schema_models.'
end
- end
- # @!visibility private
- # @param kafka_config [FigTree::ConfigStruct]
- # rubocop:disable Metrics/PerceivedComplexity, Metrics/AbcSize
- def self.configure_producer_or_consumer(kafka_config)
- klass = kafka_config.class_name.constantize
- klass.class_eval do
- topic(kafka_config.topic) if kafka_config.topic.present? && klass.respond_to?(:topic)
- schema(kafka_config.schema) if kafka_config.schema.present?
- namespace(kafka_config.namespace) if kafka_config.namespace.present?
- key_config(**kafka_config.key_config) if kafka_config.key_config.present?
- schema_class_config(kafka_config.use_schema_classes) if kafka_config.use_schema_classes.present?
- if kafka_config.respond_to?(:bulk_import_id_column) # consumer
- klass.config.merge!(
- bulk_import_id_column: kafka_config.bulk_import_id_column,
- replace_associations: if kafka_config.replace_associations.nil?
- Deimos.config.consumers.replace_associations
- else
- kafka_config.replace_associations
- end,
- bulk_import_id_generator: kafka_config.bulk_import_id_generator ||
- Deimos.config.consumers.bulk_import_id_generator,
- save_associations_first: kafka_config.save_associations_first
- )
+ # Ensure everything is set up correctly for the DB backend.
+ # @!visibility private
+ def validate_outbox_backend
+ begin
+ require 'activerecord-import'
+ rescue LoadError
+ raise 'Cannot set producers.backend to :outbox without activerecord-import! Please add it to your Gemfile.'
end
end
end
+
# rubocop:enable Metrics/PerceivedComplexity, Metrics/AbcSize
define_settings do
-
- # @return [Logger]
- setting :logger, Logger.new(STDOUT)
-
- # @return [Symbol]
- setting :payload_log, :full
-
- # @return [Logger]
- setting :phobos_logger, default_proc: proc { Deimos.config.logger.clone }
+ setting :logger, removed: 'Use "logger" in Karafka setup block.'
+ setting :payload_log, removed: 'Use topic.payload_log in Karafka settings'
+ setting :phobos_logger, removed: 'Separate logger for Phobos is no longer supported'
setting :kafka do
-
- # @return [Logger]
- setting :logger, default_proc: proc { Deimos.config.logger.clone }
-
- # URL of the seed broker.
- # @return [Array]
- setting :seed_brokers, ['localhost:9092']
-
- # Identifier for this application.
- # @return [String]
- setting :client_id, 'phobos'
-
- # The socket timeout for connecting to the broker, in seconds.
- # @return [Integer]
- setting :connect_timeout, 15
-
- # The socket timeout for reading and writing to the broker, in seconds.
- # @return [Integer]
- setting :socket_timeout, 15
+ setting :logger, Logger.new(STDOUT), removed: "Karafka uses Rails logger by default"
+ setting :seed_brokers, ['localhost:9092'], removed: 'Use kafka(bootstrap.servers) in Karafka settings'
+ setting :client_id, 'phobos', removed: 'Use client_id in Karafka setup block.'
+ setting :connect_timeout, 15, removed: 'Use kafka(socket.connection.setup.timeout.ms) in Karafka settings'
+ setting :socket_timeout, 15, removed: 'Use kafka(socket.timeout.ms) in Karafka settings'
setting :ssl do
- # Whether SSL is enabled on the brokers.
- # @return [Boolean]
- setting :enabled
-
- # a PEM encoded CA cert, a file path to the cert, or an Array of certs,
- # to use with an SSL connection.
- # @return [String|Array]
- setting :ca_cert
-
- # a PEM encoded client cert to use with an SSL connection, or a file path
- # to the cert.
- # @return [String]
- setting :client_cert
-
- # a PEM encoded client cert key to use with an SSL connection.
- # @return [String]
- setting :client_cert_key
-
- # Verify certificate hostname if supported (ruby >= 2.4.0)
- setting :verify_hostname, true
-
- # Use CA certs from system. This is useful to have enabled for Confluent Cloud
- # @return [Boolean]
- setting :ca_certs_from_system, false
+ setting :enabled, removed: 'Use kafka(security.protocol=ssl) in Karafka settings'
+ setting :ca_cert, removed: 'Use kafka(ssl.ca.pem) in Karafka settings'
+ setting :client_cert, removed: 'Use kafka(ssl.certificate.pem) in Karafka settings'
+ setting :client_cert_key, removed: 'Use kafka(ssl.key.pem) in Karafka settings'
+ setting :verify_hostname, removed: 'Use kafka(ssl.endpoint.identification.algorithm=https) in Karafka settings'
+ setting :ca_certs_from_system, removed: 'Should not be necessary with librdkafka.'
end
setting :sasl do
- # Whether SASL is enabled on the brokers.
- # @return [Boolean]
- setting :enabled
-
- # A KRB5 principal.
- # @return [String]
- setting :gssapi_principal
-
- # A KRB5 keytab filepath.
- # @return [String]
- setting :gssapi_keytab
-
- # Plain authorization ID. It needs to default to '' in order for it to work.
- # This is because Phobos expects it to be truthy for using plain SASL.
- # @return [String]
- setting :plain_authzid, ''
-
- # Plain username.
- # @return [String]
- setting :plain_username
-
- # Plain password.
- # @return [String]
- setting :plain_password
-
- # SCRAM username.
- # @return [String]
- setting :scram_username
-
- # SCRAM password.
- # @return [String]
- setting :scram_password
-
- # Scram mechanism, either "sha256" or "sha512".
- # @return [String]
- setting :scram_mechanism
-
- # Whether to enforce SSL with SASL.
- # @return [Boolean]
- setting :enforce_ssl
-
- # OAuthBearer Token Provider instance that implements
- # method token. See {Sasl::OAuth#initialize}.
- # @return [Object]
- setting :oauth_token_provider
+ setting :enabled, removed: 'Use kafka(security.protocol=sasl_ssl or sasl_plaintext) in Karafka settings'
+ setting :gssapi_principal, removed: 'Use kafka(sasl.kerberos.principal) in Karafka settings'
+ setting :gssapi_keytab, removed: 'Use kafka(sasl.kerberos.keytab) in Karafka settings'
+ setting :plain_authzid, removed: 'No longer needed with rdkafka'
+ setting :plain_username, removed: 'Use kafka(sasl.username) in Karafka settings'
+ setting :plain_password, removed: 'Use kafka(sasl.password) in Karafka settings'
+ setting :scram_username, removed: 'Use kafka(sasl.username) in Karafka settings'
+ setting :scram_password, removed: 'Use kafka(sasl.password) in Karafka settings'
+ setting :scram_mechanism, removed: 'Use kafka(sasl.mechanisms) in Karafka settings'
+ setting :enforce_ssl, removed: 'Use kafka(security.protocol=sasl_ssl) in Karafka settings'
+ setting :oauth_token_provider, removed: 'See rdkafka configs for details'
end
end
setting :consumers do
-
- # Number of seconds after which, if a client hasn't contacted the Kafka cluster,
- # it will be kicked out of the group.
- # @return [Integer]
- setting :session_timeout, 300
-
- # Interval between offset commits, in seconds.
- # @return [Integer]
- setting :offset_commit_interval, 10
-
- # Number of messages that can be processed before their offsets are committed.
- # If zero, offset commits are not triggered by message processing
- # @return [Integer]
- setting :offset_commit_threshold, 0
-
- # Interval between heartbeats; must be less than the session window.
- # @return [Integer]
- setting :heartbeat_interval, 10
-
- # Minimum and maximum number of milliseconds to back off after a consumer
- # error.
- setting :backoff, (1000..60_000)
-
- # By default, consumer errors will be consumed and logged to
- # the metrics provider.
- # Set this to true to force the error to be raised.
- # @return [Boolean]
- setting :reraise_errors
-
- # @return [Boolean]
- setting :report_lag
-
- # Block taking an exception, payload and metadata and returning
- # true if this should be considered a fatal error and false otherwise.
- # Not needed if reraise_errors is set to true.
- # @return [Block]
- setting(:fatal_error, proc { false })
-
- # The default function to generate a bulk ID for bulk consumers
- # @return [Block]
- setting(:bulk_import_id_generator, proc { SecureRandom.uuid })
-
- # If true, multi-table consumers will blow away associations rather than appending to them.
- # Applies to all consumers unless specified otherwise
- # @return [Boolean]
- setting :replace_associations, true
+ setting :reraise_errors, removed: 'Use topic.reraise_errors in Karafka settings'
+ setting :report_lag, removed: "Use Karafka's built in lag reporting"
+ setting(:fatal_error, removed: "Use topic.fatal_error in Karafka settings")
+ setting(:bulk_import_id_generator, removed: "Use topic.bulk_import_id_generator in Karafka settings")
+ setting :save_associations_first, removed: "Use topic.save_associations_first"
+ setting :replace_associations, removed: "Use topic.replace_associations in Karafka settings"
end
setting :producers do
- # Number of seconds a broker can wait for replicas to acknowledge
- # a write before responding with a timeout.
- # @return [Integer]
- setting :ack_timeout, 5
-
- # Number of replicas that must acknowledge a write, or `:all`
- # if all in-sync replicas must acknowledge.
- # @return [Integer|Symbol]
- setting :required_acks, 1
-
- # Number of retries that should be attempted before giving up sending
- # messages to the cluster. Does not include the original attempt.
- # @return [Integer]
- setting :max_retries, 2
-
- # Number of seconds to wait between retries.
- # @return [Integer]
- setting :retry_backoff, 1
-
- # Number of messages allowed in the buffer before new writes will
- # raise {BufferOverflow} exceptions.
- # @return [Integer]
- setting :max_buffer_size, 10_000
-
- # Maximum size of the buffer in bytes. Attempting to produce messages
- # when the buffer reaches this size will result in {BufferOverflow} being raised.
- # @return [Integer]
- setting :max_buffer_bytesize, 10_000_000
-
- # Name of the compression codec to use, or nil if no compression should be performed.
- # Valid codecs: `:snappy` and `:gzip`
- # @return [Symbol]
- setting :compression_codec
-
- # Number of messages that needs to be in a message set before it should be compressed.
- # Note that message sets are per-partition rather than per-topic or per-producer.
- # @return [Integer]
- setting :compression_threshold, 1
-
- # Maximum number of messages allowed in the queue. Only used for async_producer.
- # @return [Integer]
- setting :max_queue_size, 10_000
-
- # If greater than zero, the number of buffered messages that will automatically
- # trigger a delivery. Only used for async_producer.
- # @return [Integer]
- setting :delivery_threshold, 0
-
- # if greater than zero, the number of seconds between automatic message
- # deliveries. Only used for async_producer.
- # @return [Integer]
- setting :delivery_interval, 0
-
- # Set this to true to keep the producer connection between publish calls.
- # This can speed up subsequent messages by around 30%, but it does mean
- # that you need to manually call sync_producer_shutdown before exiting,
- # similar to async_producer_shutdown.
- # @return [Boolean]
- setting :persistent_connections, false
-
- # Default namespace for all producers. Can remain nil. Individual
- # producers can override.
- # @return [String]
- setting :schema_namespace
+ setting :ack_timeout, removed: "Not supported in rdkafka"
+ setting :required_acks, 1, removed: "Use kafka(request.required.acks) in Karafka settings"
+ setting :max_retries, removed: "Use kafka(message.send.max.retries) in Karafka settings"
+ setting :retry_backoff, removed: "Use kafka(retry.backoff.ms) in Karafka settings"
+ setting :max_buffer_size, removed: "Not relevant with Karafka. You may want to see the queue.buffering.max.messages setting."
+ setting :max_buffer_bytesize, removed: "Not relevant with Karafka."
+ setting :compression_codec, removed: "Use kafka(compression.codec) in Karafka settings"
+ setting :compression_threshold, removed: "Not supported in Karafka."
+ setting :max_queue_size, removed: "Not relevant to Karafka."
+ setting :delivery_threshold, removed: "Not relevant to Karafka."
+ setting :delivery_interval, removed: "Not relevant to Karafka."
+ setting :persistent_connections, removed: "Karafka connections are always persistent."
+ setting :schema_namespace, removed: "Use topic.namespace in Karafka settings"
# Add a prefix to all topic names. This can be useful if you're using
# the same Kafka broker for different environments that are producing
@@ -344,10 +133,6 @@ def self.configure_producer_or_consumer(kafka_config)
# sync in your consumers or delayed workers.
# @return [Symbol]
setting :backend, :kafka_async
-
- # Maximum publishing batch size. Individual producers can override.
- # @return [Integer]
- setting :max_batch_size, 500
end
setting :schema do
@@ -375,9 +160,9 @@ def self.configure_producer_or_consumer(kafka_config)
# @return [String]
setting :generated_class_path, 'app/lib/schema_classes'
- # Set to true to use the generated schema classes in your application
+ # Set to true to use the generated schema classes in your application.
# @return [Boolean]
- setting :use_schema_classes, false
+ setting :use_schema_classes
# Set to false to generate child schemas as their own files.
# @return [Boolean]
@@ -402,10 +187,10 @@ def self.configure_producer_or_consumer(kafka_config)
# @return [Tracing::Provider]
setting :tracer, default_proc: proc { Tracing::Mock.new }
- setting :db_producer do
+ setting :outbox do
# @return [Logger]
- setting :logger, default_proc: proc { Deimos.config.logger }
+ setting :logger, default_proc: proc { Karafka.logger }
# @return [Symbol|Array] A list of topics to log all messages, or
# :all to log all topics.
@@ -417,94 +202,48 @@ def self.configure_producer_or_consumer(kafka_config)
end
+ setting :db_producer do
+ setting :logger, removed: "Use outbox.logger"
+ setting :log_topics, removed: "Use outbox.log_topics"
+ setting :compact_topics, removed: "Use outbox.compact_topics"
+ end
+
setting_object :producer do
- # Producer class.
- # @return [String]
- setting :class_name
- # Topic to produce to.
- # @return [String]
- setting :topic
- # Schema of the data in the topic.
- # @return [String]
- setting :schema
- # Optional namespace to access the schema.
- # @return [String]
- setting :namespace
- # Key configuration (see docs).
- # @return [Hash]
- setting :key_config
- # Configure the usage of generated schema classes for this producer
- # @return [Boolean]
- setting :use_schema_classes
- # If true, and using the multi-table feature of ActiveRecordConsumers, replace associations
- # instead of appending to them.
- # @return [Boolean]
- setting :replace_associations
- # Maximum publishing batch size for this producer.
- # @return [Integer]
- setting :max_batch_size
+ setting :class_name, removed: "Use topic.producer_class in Karafka settings."
+ setting :topic, removed: "Use Karafka settings."
+ setting :schema, removed: "Use topic.schema(schema:) in Karafka settings."
+ setting :namespace, removed: "Use topic.schema(namespace:) in Karafka settings."
+ setting :key_config, removed: "Use topic.schema(key_config:) in Karafka settings."
+ setting :use_schema_classes, removed: "Use topic.schema(use_schema_classes:) in Karafka settings."
end
setting_object :consumer do
- # Consumer class.
- # @return [String]
- setting :class_name
- # Topic to read from.
- # @return [String]
- setting :topic
- # Schema of the data in the topic.
- # @return [String]
- setting :schema
- # Optional namespace to access the schema.
- # @return [String]
- setting :namespace
- # Key configuration (see docs).
- # @return [Hash]
- setting :key_config
- # Set to true to ignore the consumer in the Phobos config and not actually start up a
- # listener.
- # @return [Boolean]
- setting :disabled, false
- # Configure the usage of generated schema classes for this consumer
- # @return [Boolean]
- setting :use_schema_classes
- # Optional maximum limit for batching database calls to reduce the load on the db.
- # @return [Integer]
- setting :max_db_batch_size
- # Column to use for bulk imports, for multi-table feature.
- # @return [String]
- setting :bulk_import_id_column, :bulk_import_id
- # If true, multi-table consumers will blow away associations rather than appending to them.
- # @return [Boolean]
- setting :replace_associations, nil
-
- # The default function to generate a bulk ID for this consumer
- # Uses the consumers proc defined in the consumers config by default unless
- # specified for individual consumers
- # @return [Block]
- setting :bulk_import_id_generator, nil
-
- # If enabled save associated records prior to saving the main record class
- # This will also set foreign keys for associated records
- # @return [Boolean]
- setting :save_associations_first, false
-
- # These are the phobos "listener" configs. See CONFIGURATION.md for more
- # info.
- setting :group_id
- setting :max_concurrency, 1
- setting :start_from_beginning, true
- setting :max_bytes_per_partition, 500.kilobytes
- setting :min_bytes, 1
- setting :max_wait_time, 5
- setting :force_encoding
- setting :delivery, :batch
- setting :backoff
- setting :session_timeout, 300
- setting :offset_commit_interval, 10
- setting :offset_commit_threshold, 0
- setting :offset_retention_time
- setting :heartbeat_interval, 10
+ setting :class_name, removed: "Use topic.consumer in Karafka settings."
+ setting :topic, removed: "Use Karafka settings."
+ setting :schema, removed: "Use topic.schema(schema:) in Karafka settings."
+ setting :namespace, removed: "Use topic.schema(namespace:) in Karafka settings."
+ setting :key_config, removed: "Use topic.schema(key_config:) in Karafka settings."
+ setting :disabled, removed: "Use topic.active in Karafka settings."
+ setting :use_schema_classes, removed: "Use topic.use_schema_classes in Karafka settings."
+ setting :max_db_batch_size, removed: "Use topic.max_db_batch_size in Karafka settings."
+ setting :bulk_import_id_column, removed: "Use topic.bulk_import_id_column in Karafka settings."
+ setting :replace_associations, removed: "Use topic.replace_associations in Karafka settings."
+ setting :bulk_import_id_generator, removed: "Use topic.bulk_import_id_generator in Karafka settings."
+ setting :save_associations_first, removed: "Use topic.save_associations_first"
+ setting :group_id, removed: "Use kafka(group.id) in Karafka settings."
+ setting :max_concurrency, removed: "Use Karafka's 'config.concurrency' in the setup block."
+ setting :start_from_beginning, removed: "Use initial_offset in the setup block, or kafka(auto.offset.reset) in topic settings."
+ setting :max_bytes_per_partition, removed: "Use max_messages in the setup block."
+ setting :min_bytes, removed: "Not supported in Karafka."
+ setting :max_wait_time, removed: "Use max_wait_time in the setup block."
+ setting :force_encoding, removed: "Not supported with Karafka."
+ setting :delivery, :batch, removed: "Use batch: true/false in Karafka topic configs."
+ setting :backoff, removed: "Use kafka(retry.backoff.ms) and retry.backoff.max.ms in Karafka settings."
+ setting :session_timeout, removed: "Use kafka(session.timeout.ms) in Karafka settings."
+ setting :offset_commit_interval, removed: "Use kafka(auto.commit.interval.ms) in Karafka settings."
+ setting :offset_commit_threshold, removed: "Not supported with Karafka."
+ setting :offset_retention_time, removed: "Not supported with Karafka."
+ setting :heartbeat_interval, removed: "Use kafka(heartbeat.interval.ms) in Karafka settings."
end
setting_object :db_poller do
@@ -548,20 +287,5 @@ def self.configure_producer_or_consumer(kafka_config)
setting :poller_class, nil
end
- deprecate 'kafka_logger', 'kafka.logger'
- deprecate 'reraise_consumer_errors', 'consumers.reraise_errors'
- deprecate 'schema_registry_url', 'schema.registry_url'
- deprecate 'seed_broker', 'kafka.seed_brokers'
- deprecate 'schema_path', 'schema.path'
- deprecate 'producer_schema_namespace', 'producers.schema_namespace'
- deprecate 'producer_topic_prefix', 'producers.topic_prefix'
- deprecate 'disable_producers', 'producers.disabled'
- deprecate 'ssl_enabled', 'kafka.ssl.enabled'
- deprecate 'ssl_ca_cert', 'kafka.ssl.ca_cert'
- deprecate 'ssl_client_cert', 'kafka.ssl.client_cert'
- deprecate 'ssl_client_cert_key', 'kafka.ssl.client_cert_key'
- deprecate 'publish_backend', 'producers.backend'
- deprecate 'report_lag', 'consumers.report_lag'
-
end
end
diff --git a/lib/deimos/config/phobos_config.rb b/lib/deimos/config/phobos_config.rb
deleted file mode 100644
index 8e739bf2..00000000
--- a/lib/deimos/config/phobos_config.rb
+++ /dev/null
@@ -1,164 +0,0 @@
-# frozen_string_literal: true
-
-require 'active_support/core_ext/array'
-
-module Deimos
- # Module to handle phobos.yml as well as outputting the configuration to save
- # to Phobos itself.
- module PhobosConfig
- extend ActiveSupport::Concern
-
- # @return [Hash]
- def to_h
- (FIELDS + [:handler]).map { |f|
- val = self.send(f)
- if f == :backoff && val
- [:backoff, _backoff(val)]
- elsif val.present?
- [f, val]
- end
- }.to_h
- end
-
- # @return [void]
- def reset!
- super
- Phobos.configure(self.phobos_config)
- end
-
- # Create a hash representing the config that Phobos expects.
- # @return [Hash]
- def phobos_config
- p_config = {
- logger: Logger.new(STDOUT),
- custom_logger: self.phobos_logger,
- custom_kafka_logger: self.kafka.logger,
- kafka: {
- client_id: self.kafka.client_id,
- connect_timeout: self.kafka.connect_timeout,
- socket_timeout: self.kafka.socket_timeout,
- ssl_verify_hostname: self.kafka.ssl.verify_hostname,
- ssl_ca_certs_from_system: self.kafka.ssl.ca_certs_from_system,
- seed_brokers: Array.wrap(self.kafka.seed_brokers)
- },
- producer: {
- ack_timeout: self.producers.ack_timeout,
- required_acks: self.producers.required_acks,
- max_retries: self.producers.max_retries,
- retry_backoff: self.producers.retry_backoff,
- max_buffer_size: self.producers.max_buffer_size,
- max_buffer_bytesize: self.producers.max_buffer_bytesize,
- compression_codec: self.producers.compression_codec,
- compression_threshold: self.producers.compression_threshold,
- max_queue_size: self.producers.max_queue_size,
- delivery_threshold: self.producers.delivery_threshold,
- delivery_interval: self.producers.delivery_interval,
- persistent_connections: self.producers.persistent_connections
- },
- consumer: {
- session_timeout: self.consumers.session_timeout,
- offset_commit_interval: self.consumers.offset_commit_interval,
- offset_commit_threshold: self.consumers.offset_commit_threshold,
- heartbeat_interval: self.consumers.heartbeat_interval
- },
- backoff: _backoff(self.consumers.backoff.to_a)
- }
-
- p_config[:listeners] = self.consumer_objects.map do |consumer|
- next nil if consumer.disabled
-
- hash = consumer.to_h.reject do |k, _|
- %i(class_name schema namespace key_config backoff disabled replace_associations
- bulk_import_id_column).include?(k)
- end
- hash = hash.map { |k, v| [k, v.is_a?(Symbol) ? v.to_s : v] }.to_h
- hash[:handler] = consumer.class_name
- if consumer.backoff
- hash[:backoff] = _backoff(consumer.backoff.to_a)
- end
- hash
- end
- p_config[:listeners].compact!
-
- if self.kafka.ssl.enabled
- %w(ca_cert client_cert client_cert_key).each do |key|
- next if self.kafka.ssl.send(key).blank?
-
- p_config[:kafka]["ssl_#{key}".to_sym] = ssl_var_contents(self.kafka.ssl.send(key))
- end
- end
-
- if self.kafka.sasl.enabled
- p_config[:kafka][:sasl_over_ssl] = self.kafka.sasl.enforce_ssl
- %w(
- gssapi_principal
- gssapi_keytab
- plain_authzid
- plain_username
- plain_password
- scram_username
- scram_password
- scram_mechanism
- oauth_token_provider
- ).each do |key|
- value = self.kafka.sasl.send(key)
- next if value.blank?
-
- p_config[:kafka]["sasl_#{key}".to_sym] = value
- end
- end
- p_config
- end
-
- # @param key [String]
- # @return [String]
- def ssl_var_contents(key)
- File.exist?(key) ? File.read(key) : key
- end
-
- # Legacy method to parse Phobos config file
- # @!visibility private
- def phobos_config_file=(file)
- pconfig = YAML.load(ERB.new(File.read(File.expand_path(file))).result). # rubocop:disable Security/YAMLLoad
- with_indifferent_access
- self.logger&.warn('phobos.yml is deprecated - use direct configuration instead.')
- pconfig[:kafka].each do |k, v|
- if k.starts_with?('ssl')
- k = k.sub('ssl_', '')
- self.kafka.ssl.send("#{k}=", v)
- elsif k.starts_with?('sasl')
- k = (k == 'sasl_over_ssl') ? 'enforce_ssl' : k.sub('sasl_', '')
- self.kafka.sasl.send("#{k}=", v)
- else
- self.kafka.send("#{k}=", v)
- end
- end
- pconfig[:producer].each do |k, v|
- self.producers.send("#{k}=", v)
- end
- pconfig[:consumer].each do |k, v|
- self.consumers.send("#{k}=", v)
- end
- self.consumers.backoff = pconfig[:backoff][:min_ms]..pconfig[:backoff][:max_ms]
- pconfig[:listeners].each do |listener_hash|
- self.consumer do
- listener_hash.each do |k, v|
- k = 'class_name' if k == 'handler'
- send(k, v)
- end
- end
- end
- end
-
- private
-
- # @param values [Array]
- # @return [Hash]
- def _backoff(values)
- {
- min_ms: values[0],
- max_ms: values[-1]
- }
- end
- end
-end
diff --git a/lib/deimos/consume/batch_consumption.rb b/lib/deimos/consume/batch_consumption.rb
index 3a3db801..ffbfdee9 100644
--- a/lib/deimos/consume/batch_consumption.rb
+++ b/lib/deimos/consume/batch_consumption.rb
@@ -7,151 +7,51 @@ module Consume
# of messages to be handled at once
module BatchConsumption
extend ActiveSupport::Concern
- include Phobos::BatchHandler
- # @param batch [Array]
- # @param metadata [Hash]
- # @return [void]
- def around_consume_batch(batch, metadata)
- payloads = []
- _with_span do
- benchmark = Benchmark.measure do
- if self.class.config[:key_configured]
- metadata[:keys] = batch.map do |message|
- decode_key(message.key)
- end
- end
- metadata[:first_offset] = batch.first&.offset
-
- payloads = batch.map do |message|
- decode_message(message.payload)
- end
- _received_batch(payloads, metadata)
- yield(payloads, metadata)
- end
- _handle_batch_success(benchmark.real, payloads, metadata)
- end
- rescue StandardError => e
- _handle_batch_error(e, payloads, metadata)
- end
-
- # Consume a batch of incoming messages.
- # @param _payloads [Array]
- # @param _metadata [Hash]
- # @return [void]
- def consume_batch(_payloads, _metadata)
- raise NotImplementedError
+ def consume_batch
+ raise MissingImplementationError
end
protected
- # @!visibility private
- def _received_batch(payloads, metadata)
- Deimos.config.logger.info(
- message: 'Got Kafka batch event',
- message_ids: _payload_identifiers(payloads, metadata),
- metadata: metadata.except(:keys)
- )
- Deimos.config.logger.debug(
- message: 'Kafka batch event payloads',
- payloads: payloads
- )
- Deimos.config.metrics&.increment(
- 'handler',
- tags: %W(
- status:batch_received
- topic:#{metadata[:topic]}
- ))
- Deimos.config.metrics&.increment(
- 'handler',
- by: metadata[:batch_size],
- tags: %W(
- status:received
- topic:#{metadata[:topic]}
- ))
- if payloads.present?
- payloads.each { |payload| _report_time_delayed(payload, metadata) }
+ def _consume_batch
+ _with_span do
+ begin
+ benchmark = Benchmark.measure do
+ consume_batch
+ end
+ _handle_batch_success(benchmark.real)
+ rescue StandardError => e
+ _handle_batch_error(e)
+ end
end
end
# @!visibility private
# @param exception [Throwable]
- # @param payloads [Array]
- # @param metadata [Hash]
- def _handle_batch_error(exception, payloads, metadata)
- Deimos.config.metrics&.increment(
- 'handler',
- tags: %W(
- status:batch_error
- topic:#{metadata[:topic]}
- ))
- Deimos.config.logger.warn(
+ def _handle_batch_error(exception)
+ Deimos::Logging.log_warn(
message: 'Error consuming message batch',
handler: self.class.name,
- metadata: metadata.except(:keys),
- message_ids: _payload_identifiers(payloads, metadata),
+ metadata: Deimos::Logging.metadata_log_text(messages.metadata),
+ messages: Deimos::Logging.messages_log_text(self.topic.payload_log, messages),
error_message: exception.message,
error: exception.backtrace
)
- _error(exception, payloads, metadata)
+ _error(exception, messages)
end
# @!visibility private
# @param time_taken [Float]
- # @param payloads [Array]
- # @param metadata [Hash]
- def _handle_batch_success(time_taken, payloads, metadata)
- Deimos.config.metrics&.histogram('handler',
- time_taken,
- tags: %W(
- time:consume_batch
- topic:#{metadata[:topic]}
- ))
- Deimos.config.metrics&.increment(
- 'handler',
- tags: %W(
- status:batch_success
- topic:#{metadata[:topic]}
- ))
- Deimos.config.metrics&.increment(
- 'handler',
- by: metadata[:batch_size],
- tags: %W(
- status:success
- topic:#{metadata[:topic]}
- ))
- Deimos.config.logger.info(
- message: 'Finished processing Kafka batch event',
- message_ids: _payload_identifiers(payloads, metadata),
- time_elapsed: time_taken,
- metadata: metadata.except(:keys)
- )
+ def _handle_batch_success(time_taken)
+ Deimos::Logging.log_info(
+ {
+ message: 'Finished processing Kafka batch event',
+ time_elapsed: time_taken,
+ metadata: Deimos::Logging.metadata_log_text(messages.metadata)
+ }.merge(Deimos::Logging.messages_log_text(self.topic.payload_log, messages)))
end
- # @!visibility private
- # Get payload identifiers (key and message_id if present) for logging.
- # @param payloads [Array]
- # @param metadata [Hash]
- # @return [Array] the identifiers.
- def _payload_identifiers(payloads, metadata)
- message_ids = payloads&.map do |payload|
- if payload.is_a?(Hash) && payload.key?('message_id')
- payload['message_id']
- end
- end
-
- # Payloads may be nil if preprocessing failed
- messages = payloads || metadata[:keys] || []
-
- messages.zip(metadata[:keys] || [], message_ids || []).map do |_, k, m_id|
- ids = {}
-
- ids[:key] = k if k.present?
- ids[:message_id] = m_id if m_id.present?
-
- ids
- end
- end
end
end
end
diff --git a/lib/deimos/consume/message_consumption.rb b/lib/deimos/consume/message_consumption.rb
index de6f2eb8..e2613358 100644
--- a/lib/deimos/consume/message_consumption.rb
+++ b/lib/deimos/consume/message_consumption.rb
@@ -6,90 +6,63 @@ module Consume
# are invoked for every individual message.
module MessageConsumption
extend ActiveSupport::Concern
- include Phobos::Handler
-
- # @param payload [String]
- # @param metadata [Hash]
- # @return [void]
- def around_consume(payload, metadata)
- decoded_payload = payload.nil? ? nil : payload.dup
- new_metadata = metadata.dup
- benchmark = Benchmark.measure do
- _with_span do
- new_metadata[:key] = decode_key(metadata[:key]) if self.class.config[:key_configured]
- decoded_payload = decode_message(payload)
- _received_message(decoded_payload, new_metadata)
- yield(decoded_payload, new_metadata)
- end
- end
- _handle_success(benchmark.real, decoded_payload, new_metadata)
- rescue StandardError => e
- _handle_error(e, decoded_payload, new_metadata)
- end
# Consume incoming messages.
- # @param _payload [String]
- # @param _metadata [Hash]
+ # @param _message [Karafka::Messages::Message]
# @return [void]
- def consume(_payload, _metadata)
- raise NotImplementedError
+ def consume_message(_message)
+ raise MissingImplementationError
end
private
- def _received_message(payload, metadata)
- Deimos.config.logger.info(
+ def _consume_messages
+ messages.each do |message|
+ begin
+ _with_span do
+ _received_message(message)
+ benchmark = Benchmark.measure do
+ consume_message(message)
+ end
+ _handle_success(message, benchmark.real)
+ rescue StandardError => e
+ _handle_message_error(e, message)
+ end
+ end
+ end
+ end
+
+ def _received_message(message)
+ Deimos::Logging.log_info(
message: 'Got Kafka event',
- payload: payload,
- metadata: metadata
+ payload: message.payload,
+ metadata: Deimos::Logging.metadata_log_text(message.metadata)
)
- Deimos.config.metrics&.increment('handler', tags: %W(
- status:received
- topic:#{metadata[:topic]}
- ))
- _report_time_delayed(payload, metadata)
end
# @param exception [Throwable]
- # @param payload [Hash]
- # @param metadata [Hash]
- def _handle_error(exception, payload, metadata)
- Deimos.config.metrics&.increment(
- 'handler',
- tags: %W(
- status:error
- topic:#{metadata[:topic]}
- )
- )
- Deimos.config.logger.warn(
+ # @param message [Karafka::Messages::Message]
+ def _handle_message_error(exception, message)
+ Deimos::Logging.log_warn(
message: 'Error consuming message',
handler: self.class.name,
- metadata: metadata,
- data: payload,
+ metadata: Deimos::Logging.metadata_log_text(message.metadata),
+ key: message.key,
+ data: message.payload,
error_message: exception.message,
error: exception.backtrace
)
- _error(exception, payload, metadata)
+ _error(exception, Karafka::Messages::Messages.new([message], messages.metadata))
end
- # @param time_taken [Float]
- # @param payload [Hash]
- # @param metadata [Hash]
- def _handle_success(time_taken, payload, metadata)
- Deimos.config.metrics&.histogram('handler', time_taken, tags: %W(
- time:consume
- topic:#{metadata[:topic]}
- ))
- Deimos.config.metrics&.increment('handler', tags: %W(
- status:success
- topic:#{metadata[:topic]}
- ))
- Deimos.config.logger.info(
+ def _handle_success(message, benchmark)
+ mark_as_consumed(message)
+ Deimos::Logging.log_info(
message: 'Finished processing Kafka event',
- payload: payload,
- time_elapsed: time_taken,
- metadata: metadata
+ payload: message.payload,
+ time_elapsed: benchmark,
+ metadata: Deimos::Logging.metadata_log_text(message.metadata)
)
end
end
diff --git a/lib/deimos/consumer.rb b/lib/deimos/consumer.rb
index f9932bc3..64be46c3 100644
--- a/lib/deimos/consumer.rb
+++ b/lib/deimos/consumer.rb
@@ -7,62 +7,21 @@
# Note: According to the docs, instances of your handler will be created
# for every incoming message/batch. This class should be lightweight.
module Deimos
- # Basic consumer class. Inherit from this class and override either consume
- # or consume_batch, depending on the delivery mode of your listener.
- # `consume` -> use `delivery :message` or `delivery :batch`
- # `consume_batch` -> use `delivery :inline_batch`
- class Consumer
+ # Basic consumer class. Inherit from this class and override either consume_message
+ # or consume_batch, depending on the `:batch` config setting.
+ class Consumer < Karafka::BaseConsumer
include Consume::MessageConsumption
include Consume::BatchConsumption
include SharedConfig
- class << self
- # @return [Deimos::SchemaBackends::Base]
- def decoder
- @decoder ||= Deimos.schema_backend(schema: config[:schema],
- namespace: config[:namespace])
- end
-
- # @return [Deimos::SchemaBackends::Base]
- def key_decoder
- @key_decoder ||= Deimos.schema_backend(schema: config[:key_schema],
- namespace: config[:namespace])
- end
- end
-
- # Helper method to decode an encoded key.
- # @param key [String]
- # @return [Object] the decoded key.
- def decode_key(key)
- return nil if key.nil?
-
- config = self.class.config
- unless config[:key_configured]
- raise 'No key config given - if you are not decoding keys, please use '\
- '`key_config plain: true`'
- end
-
- if config[:key_field]
- self.class.decoder.decode_key(key, config[:key_field])
- elsif config[:key_schema]
- self.class.key_decoder.decode(key, schema: config[:key_schema])
- else # no encoding
- key
+ def consume
+ if self.topic.each_message
+ _consume_messages
+ else
+ _consume_batch
end
end
- # Helper method to decode an encoded message.
- # @param payload [Object]
- # @return [Object] the decoded message.
- def decode_message(payload)
- decoded_payload = payload.nil? ? nil : self.class.decoder.decode(payload)
- return decoded_payload unless Utils::SchemaClass.use?(self.class.config.to_h)
-
- Utils::SchemaClass.instance(decoded_payload,
- self.class.config[:schema],
- self.class.config[:namespace])
- end
-
private
def _with_span
@@ -75,42 +34,24 @@ def _with_span
Deimos.config.tracer&.finish(@span)
end
- def _report_time_delayed(payload, metadata)
- return if payload.nil? || payload['timestamp'].blank?
-
- begin
- time_delayed = Time.now.in_time_zone - payload['timestamp'].to_datetime
- rescue ArgumentError
- Deimos.config.logger.info(
- message: "Error parsing timestamp! #{payload['timestamp']}"
- )
- return
- end
- Deimos.config.metrics&.histogram('handler', time_delayed, tags: %W(
- time:time_delayed
- topic:#{metadata[:topic]}
- ))
- end
-
# Overrideable method to determine if a given error should be considered
# "fatal" and always be reraised.
# @param _error [Exception]
- # @param _payload [Hash]
- # @param _metadata [Hash]
+ # @param _messages [Array]
# @return [Boolean]
- def fatal_error?(_error, _payload, _metadata)
+ def fatal_error?(_error, _messages)
false
end
# @param exception [Exception]
- # @param payload [Hash]
- # @param metadata [Hash]
- def _error(exception, payload, metadata)
+ # @param messages [Array]
+ def _error(exception, messages)
Deimos.config.tracer&.set_error(@span, exception)
- raise if Deimos.config.consumers.reraise_errors ||
- Deimos.config.consumers.fatal_error&.call(exception, payload, metadata) ||
- fatal_error?(exception, payload, metadata)
+ raise if self.topic.reraise_errors ||
+ Deimos.config.consumers.fatal_error&.call(exception, messages) ||
+ fatal_error?(exception, messages)
end
+
end
end
diff --git a/lib/deimos/ext/consumer_route.rb b/lib/deimos/ext/consumer_route.rb
new file mode 100644
index 00000000..17407640
--- /dev/null
+++ b/lib/deimos/ext/consumer_route.rb
@@ -0,0 +1,36 @@
+module Deimos
+ class ConsumerRoute < Karafka::Routing::Features::Base
+ module Topic
+
+ FIELDS = %i(max_db_batch_size
+ bulk_import_id_column
+ replace_associations
+ bulk_import_id_generator
+ each_message
+ reraise_errors
+ fatal_error
+ save_associations_first
+ )
+ Config = Struct.new(*FIELDS, keyword_init: true)
+
+ FIELDS.each do |field|
+ define_method(field) do |*args|
+ @deimos_config ||= Config.new(
+ bulk_import_id_column: :bulk_import_id,
+ replace_associations: true,
+ each_message: false,
+ reraise_errors: Rails.env.test?,
+ bulk_import_id_generator: proc { SecureRandom.uuid },
+ fatal_error: proc { false }
+ )
+ if args.size.positive?
+ @deimos_config.public_send("#{field}=", args[0])
+ end
+ @deimos_config[field]
+ end
+ end
+ end
+ end
+end
+
+Deimos::ConsumerRoute.activate
diff --git a/lib/deimos/ext/producer_middleware.rb b/lib/deimos/ext/producer_middleware.rb
new file mode 100644
index 00000000..62156cf6
--- /dev/null
+++ b/lib/deimos/ext/producer_middleware.rb
@@ -0,0 +1,98 @@
+module Deimos
+
+ module ProducerMiddleware
+ class << self
+
+ def call(message)
+ Karafka.monitor.instrument(
+ 'deimos.encode_message',
+ producer: self,
+ message: message
+ ) do
+ config = Deimos.karafka_config_for(topic: message[:topic])
+ return message if config.nil? || config.schema.nil?
+ return if message[:payload] && !message[:payload].is_a?(Hash) && !message[:payload].is_a?(SchemaClass::Record)
+
+ m = Deimos::Message.new(message[:payload].to_h,
+ headers: message[:headers],
+ partition_key: message[:partition_key])
+ _process_message(m, message, config)
+ message[:payload] = m.encoded_payload
+ message[:label] = {
+ original_payload: m.payload,
+ original_key: m.key
+ }
+ message[:key] = m.encoded_key
+ message[:partition_key] = if m.partition_key
+ m.partition_key.to_s
+ elsif m.key
+ m.key.to_s
+ else
+ nil
+ end
+ message[:topic] = "#{Deimos.config.producers.topic_prefix}#{config.name}"
+
+ validate_key_config(config, message)
+
+ message
+ end
+ end
+
+ def validate_key_config(config, message)
+ if message[:key].nil? && config.deserializers[:key].is_a?(Deimos::Transcoder)
+ raise 'No key given but a key is required! Use `key_config none: true` to avoid using keys.'
+ end
+ end
+
+ # @param message [Deimos::Message]
+ # @param karafka_message [Hash]
+ # @param config [Deimos::ProducerConfig]
+ def _process_message(message, karafka_message, config)
+ encoder = config.deserializers[:payload].backend
+ key_transcoder = config.deserializers[:key]
+ # this violates the Law of Demeter but it has to happen in a very
+ # specific order and requires a bunch of methods on the producer
+ # to work correctly.
+ message.add_fields(encoder.schema_fields.map(&:name))
+ message.key = karafka_message[:key] || _retrieve_key(message.payload, key_transcoder)
+ # need to do this before _coerce_fields because that might result
+ # in an empty payload which is an *error* whereas this is intended.
+ message.payload = nil if message.payload.blank?
+ message.coerce_fields(encoder)
+ message.encoded_key = _encode_key(message.key, config)
+ message.topic = config.name
+ message.encoded_payload = if message.payload.nil?
+ nil
+ else
+ encoder.encode(message.payload,
+ topic: "#{Deimos.config.producers.topic_prefix}#{config.name}-value")
+ end
+ end
+
+ # @param key [Object]
+ # @param config [ProducerConfig]
+ # @return [String|Object]
+ def _encode_key(key, config)
+ return nil if key.nil?
+
+ if config.deserializers[:key].respond_to?(:encode_key)
+ config.deserializers[:key].encode_key(key)
+ elsif key
+ config.deserializers[:payload].encode(key)
+ else
+ key
+ end
+ end
+
+ # @param payload [Hash]
+ # @param key_transcoder [Deimos::Transcoder]
+ # @return [String]
+ def _retrieve_key(payload, key_transcoder)
+ key = payload.delete(:payload_key)
+ return key if key || !key_transcoder.respond_to?(:key_field)
+
+ key_transcoder.key_field ? payload[key_transcoder.key_field] : nil
+ end
+ end
+ end
+end
diff --git a/lib/deimos/ext/producer_route.rb b/lib/deimos/ext/producer_route.rb
new file mode 100644
index 00000000..ea69252e
--- /dev/null
+++ b/lib/deimos/ext/producer_route.rb
@@ -0,0 +1,30 @@
+module Deimos
+ class ProducerRoute < Karafka::Routing::Features::Base
+ FIELDS = %i(producer_classes payload_log disabled)
+
+ Config = Struct.new(*FIELDS, keyword_init: true) do
+ def producer_class=(val)
+ self.producer_classes = [val]
+ end
+
+ def producer_class
+ self.producer_classes.first
+ end
+ end
+ module Topic
+ (FIELDS + [:producer_class]).each do |field|
+ define_method(field) do |*args|
+ active(false) if %i(producer_class producer_classes).include?(field)
+ @deimos_producer_config ||= Config.new
+ if args.any?
+ @deimos_producer_config.public_send("#{field}=", args[0])
+ _deimos_setup_transcoders if schema && namespace
+ end
+ @deimos_producer_config.send(field)
+ end
+ end
+ end
+ end
+end
+
+Deimos::ProducerRoute.activate
diff --git a/lib/deimos/ext/routing_defaults.rb b/lib/deimos/ext/routing_defaults.rb
new file mode 100644
index 00000000..8e345519
--- /dev/null
+++ b/lib/deimos/ext/routing_defaults.rb
@@ -0,0 +1,76 @@
+# This monkey patch was provided by Maciej, the maintainer of Karafka. This allows
+# configs to override each other on a more granular basis rather than each `configure` call
+# blowing away all fields. It also supports multiple default blocks.
+#
+# Unfortunately this can't be merged into Karafka as of now because it will be a major breaking
+# change. As a compromise, it has been added to the test coverage of Karafka to ensure that
+# other changes don't break this.
+# https://github.com/karafka/karafka/issues/2344
+class Matcher
+ def initialize
+ @applications = []
+ end
+
+ def replay_on(topic_node)
+ @applications.each do |method, kwargs|
+ if method == :kafka
+ topic_node.kafka = kwargs.is_a?(Array) ? kwargs[0] : kwargs
+ next
+ end
+ if kwargs.is_a?(Hash)
+ ref = topic_node.public_send(method)
+
+ kwargs.each do |arg, val|
+ if ref.respond_to?("#{arg}=")
+ ref.public_send("#{arg}=", val)
+ else
+ if ref.respond_to?(:details)
+ ref.details.merge!(kwargs)
+ elsif ref.is_a?(Hash)
+ ref.merge!(kwargs)
+ else
+ raise 'No idea if such case exists, if so, similar handling as config'
+ end
+ end
+ end
+ end
+
+ if kwargs.is_a?(Array) && kwargs.size == 1
+ if topic_node.respond_to?("#{method}=")
+ topic_node.public_send(:"#{method}=", kwargs.first)
+ else
+ topic_node.public_send(method, *kwargs)
+ end
+ end
+ end
+ end
+
+ def method_missing(m, *args, **kwargs)
+ if args.empty?
+ @applications << [m, kwargs]
+ else
+ @applications << [m, args]
+ end
+ end
+end
+
+DEFAULTS = Matcher.new
+
+module Builder
+ def defaults(&block)
+ DEFAULTS.instance_eval(&block) if block
+ end
+end
+
+module ConsumerGroup
+ def topic=(name, &block)
+ k = Matcher.new
+ t = super(name)
+ k.instance_eval(&block) if block
+ DEFAULTS.replay_on(t)
+ k.replay_on(t)
+ end
+end
+
+Karafka::Routing::Builder.prepend Builder
+Karafka::Routing::ConsumerGroup.prepend ConsumerGroup
diff --git a/lib/deimos/ext/schema_route.rb b/lib/deimos/ext/schema_route.rb
new file mode 100644
index 00000000..6c58dd9b
--- /dev/null
+++ b/lib/deimos/ext/schema_route.rb
@@ -0,0 +1,70 @@
+require "deimos/transcoder"
+require "deimos/ext/producer_middleware"
+require "deimos/schema_backends/plain"
+
+module Deimos
+ class SchemaRoute < Karafka::Routing::Features::Base
+
+ module Topic
+ {
+ schema: nil,
+ namespace: nil,
+ key_config: {none: true},
+ use_schema_classes: Deimos.config.schema.use_schema_classes
+ }.each do |field, default|
+ define_method(field) do |*args|
+ @_deimos_config ||= {}
+ @_deimos_config[:schema] ||= {}
+ if args.any?
+ @_deimos_config[:schema][field] = args[0]
+ _deimos_setup_transcoders if schema && namespace
+ end
+ @_deimos_config[:schema][field] || default
+ end
+ end
+ def _deimos_setup_transcoders
+ payload = Transcoder.new(
+ schema: schema,
+ namespace: namespace,
+ use_schema_classes: use_schema_classes,
+ topic: name
+ )
+
+ key = nil
+
+ if key_config[:plain]
+ key = Transcoder.new(
+ schema: schema,
+ namespace: namespace,
+ use_schema_classes: use_schema_classes,
+ topic: name
+ )
+ key.backend = Deimos::SchemaBackends::Plain.new(schema: nil, namespace: nil)
+ elsif !key_config[:none]
+ if key_config[:field]
+ key = Transcoder.new(
+ schema: schema,
+ namespace: namespace,
+ use_schema_classes: use_schema_classes,
+ key_field: key_config[:field].to_s,
+ topic: name
+ )
+ elsif key_config[:schema]
+ key = Transcoder.new(
+ schema: key_config[:schema] || schema,
+ namespace: namespace,
+ use_schema_classes: use_schema_classes,
+ topic: self.name
+ )
+ else
+ raise 'No key config given - if you are not encoding keys, please use `key_config plain: true`'
+ end
+ end
+ deserializers.payload = payload
+ deserializers.key = key if key
+ end
+ end
+ end
+end
+
+Deimos::SchemaRoute.activate
diff --git a/lib/deimos/instrumentation.rb b/lib/deimos/instrumentation.rb
deleted file mode 100644
index a8d00ce0..00000000
--- a/lib/deimos/instrumentation.rb
+++ /dev/null
@@ -1,95 +0,0 @@
-# frozen_string_literal: true
-
-require 'active_support/notifications'
-require 'active_support/concern'
-
-# :nodoc:
-module Deimos
- # Copied from Phobos instrumentation.
- module Instrumentation
- extend ActiveSupport::Concern
-
- # @return [String]
- NAMESPACE = 'Deimos'
-
- # :nodoc:
- module ClassMethods
- # @param event [String]
- # @return [void]
- def subscribe(event)
- ActiveSupport::Notifications.subscribe("#{NAMESPACE}.#{event}") do |*args|
- yield(ActiveSupport::Notifications::Event.new(*args)) if block_given?
- end
- end
-
- # @param subscriber [ActiveSupport::Subscriber]
- # @return [void]
- def unsubscribe(subscriber)
- ActiveSupport::Notifications.unsubscribe(subscriber)
- end
-
- # @param event [String]
- # @param extra [Hash]
- # @return [void]
- def instrument(event, extra={})
- ActiveSupport::Notifications.instrument("#{NAMESPACE}.#{event}", extra) do |extra2|
- yield(extra2) if block_given?
- end
- end
- end
- end
-
- include Instrumentation
-
- # This module listens to events published by RubyKafka.
- module KafkaListener
- # @param exception [Exception]
- def self.handle_exception_with_messages(exception)
- messages = exception.failed_messages
- messages.group_by(&:topic).each do |topic, batch|
- producer = Deimos::Producer.descendants.find { |c| c.topic == topic }
- next if batch.empty? || !producer
-
- decoder = Deimos.schema_backend(schema: producer.config[:schema],
- namespace: producer.config[:namespace])
- payloads = batch.map { |m| decoder.decode(m.value) }
-
- Deimos.config.metrics&.increment(
- 'publish_error',
- tags: %W(topic:#{topic}),
- by: payloads.size
- )
- Deimos.instrument(
- 'produce_error',
- producer: producer,
- topic: topic,
- exception_object: exception,
- payloads: payloads
- )
- end
- end
-
- # Listens for any exceptions that happen during publishing and re-publishes
- # as a Deimos event.
- # @param event [ActiveSupport::Notifications::Event]
- # @return [void]
- def self.send_produce_error(event)
- exception = event.payload[:exception_object]
- return unless exception
-
- if exception.respond_to?(:failed_messages)
- handle_exception_with_messages(exception)
- else
- Deimos.config.metrics&.increment(
- 'publish_error',
- by: event.payload[:message_count] || 1
- )
- end
- end
- end
-
- ActiveSupport::Notifications.subscribe('deliver_messages.producer.kafka') do |*args|
- event = ActiveSupport::Notifications::Event.new(*args)
- KafkaListener.send_produce_error(event)
- end
-end
diff --git a/lib/deimos/kafka_message.rb b/lib/deimos/kafka_message.rb
index 1bdc275b..0a791294 100644
--- a/lib/deimos/kafka_message.rb
+++ b/lib/deimos/kafka_message.rb
@@ -49,8 +49,7 @@ def self.decoded(messages=[])
end
end
- # @return [Hash]
- def phobos_message
+ def karafka_message
{
payload: self.message,
partition_key: self.partition_key,
@@ -58,5 +57,6 @@ def phobos_message
topic: self.topic
}
end
+
end
end
diff --git a/lib/deimos/kafka_source.rb b/lib/deimos/kafka_source.rb
index be2360b3..63ced2ef 100644
--- a/lib/deimos/kafka_source.rb
+++ b/lib/deimos/kafka_source.rb
@@ -31,7 +31,7 @@ def send_kafka_event_on_update
return unless self.class.kafka_config[:update]
producers = self.class.kafka_producers
- fields = producers.flat_map(&:watched_attributes).uniq
+ fields = producers.flat_map { |p| p.watched_attributes(self) }.uniq
fields -= ['updated_at']
# Only send an event if a field we care about was changed.
any_changes = fields.any? do |field|
@@ -71,12 +71,7 @@ def kafka_config
# @return [Array] the producers to run.
def kafka_producers
- if self.respond_to?(:kafka_producer)
- Deimos.config.logger.warn(message: DEPRECATION_WARNING)
- return [self.kafka_producer]
- end
-
- raise NotImplementedError
+ raise MissingImplementationError
end
# This is an internal method, part of the activerecord_import gem. It's
diff --git a/lib/deimos/kafka_topic_info.rb b/lib/deimos/kafka_topic_info.rb
index 7697742f..7da971ad 100644
--- a/lib/deimos/kafka_topic_info.rb
+++ b/lib/deimos/kafka_topic_info.rb
@@ -1,7 +1,7 @@
# frozen_string_literal: true
module Deimos
- # Record that keeps track of which topics are being worked on by DbProducers.
+ # Record that keeps track of which topics are being worked on by OutboxProducers.
class KafkaTopicInfo < ActiveRecord::Base
self.table_name = 'kafka_topic_info'
diff --git a/lib/deimos/logging.rb b/lib/deimos/logging.rb
new file mode 100644
index 00000000..3dcdf89f
--- /dev/null
+++ b/lib/deimos/logging.rb
@@ -0,0 +1,103 @@
+module Deimos
+ module Logging
+ class << self
+
+ def log_add(method, msg)
+ if Karafka.logger.respond_to?(:tagged)
+ Karafka.logger.tagged('Deimos') do |logger|
+ logger.send(method, msg.to_json)
+ end
+ else
+ Karafka.logger.send(method, msg.to_json)
+ end
+ end
+
+ def log_info(*args)
+ log_add(:info, *args)
+ end
+
+ def log_debug(*args)
+ log_add(:debug, *args)
+ end
+
+ def log_error(*args)
+ log_add(:error, *args)
+ end
+
+ def log_warn(*args)
+ log_add(:warn, *args)
+ end
+
+ def metadata_log_text(metadata)
+ metadata.to_h.slice(:timestamp, :offset, :first_offset, :last_offset, :partition, :topic, :size)
+ end
+
+ def _payloads(messages)
+
+ end
+
+ def payload(m)
+ return nil if m.nil?
+
+ if m.respond_to?(:payload)
+ m.payload
+ elsif m[:label]
+ m.dig(:label, :raw_payload)
+ else
+ m[:payload]
+ end
+ end
+
+ def key(m)
+ return nil if m.nil?
+
+ if m.respond_to?(:payload) && m.payload
+ m.key || m.payload['message_id']
+ elsif m.respond_to?(:[])
+ if m[:label]
+ m.dig(:label, :original_key)
+ elsif m[:payload].is_a?(String)
+ m[:key] || m[:payload_key]
+ else
+ payload = m[:payload]&.with_indifferent_access
+ m[:key] || m[:payload_key] || payload[:payload_key] || payload[:message_id]
+ end
+ end
+ end
+
+ def messages_log_text(payload_log, messages)
+ log_message = {}
+
+ case payload_log
+ when :keys
+ keys = messages.map do |m|
+ key(m)
+ end
+ log_message.merge!(
+ payload_keys: keys
+ )
+ when :count
+ log_message.merge!(
+ payloads_count: messages.count
+ )
+ when :headers
+ log_message.merge!(
+ payload_headers: messages.map { |m| m.respond_to?(:headers) ? m.headers : m[:headers] }
+ )
+ else
+ log_message.merge!(
+ payloads: messages.map do |m|
+ {
+ payload: payload(m),
+ key: key(m)
+ }
+ end
+ )
+ end
+
+ log_message
+ end
+
+ end
+ end
+end
diff --git a/lib/deimos/message.rb b/lib/deimos/message.rb
index 2e5cf1f8..1fd47c40 100644
--- a/lib/deimos/message.rb
+++ b/lib/deimos/message.rb
@@ -17,17 +17,13 @@ class Message
attr_accessor :encoded_payload
# @return [String]
attr_accessor :topic
- # @return [String]
- attr_accessor :producer_name
# @param payload [Hash]
- # @param producer [Class]
# @param topic [String]
# @param key [String, Integer, Hash]
# @param partition_key [Integer]
- def initialize(payload, producer, topic: nil, key: nil, headers: nil, partition_key: nil)
+ def initialize(payload, topic: nil, key: nil, headers: nil, partition_key: nil)
@payload = payload&.with_indifferent_access
- @producer_name = producer&.name
@topic = topic
@key = key
@headers = headers&.with_indifferent_access
@@ -64,11 +60,7 @@ def encoded_hash
key: @encoded_key,
headers: @headers,
partition_key: @partition_key || @encoded_key,
- payload: @encoded_payload,
- metadata: {
- decoded_payload: @payload,
- producer_name: @producer_name
- }
+ payload: @encoded_payload
}.delete_if { |k, v| k == :headers && v.nil? }
end
@@ -82,7 +74,6 @@ def to_h
payload: @payload,
metadata: {
decoded_payload: @payload,
- producer_name: @producer_name
}
}.delete_if { |k, v| k == :headers && v.nil? }
end
diff --git a/lib/deimos/metrics/datadog.rb b/lib/deimos/metrics/datadog.rb
index 25e7980d..803f5102 100644
--- a/lib/deimos/metrics/datadog.rb
+++ b/lib/deimos/metrics/datadog.rb
@@ -1,12 +1,15 @@
# frozen_string_literal: true
require 'deimos/metrics/provider'
+require 'karafka/instrumentation/vendors/datadog/metrics_listener'
+require 'waterdrop/instrumentation/vendors/datadog/metrics_listener'
module Deimos
module Metrics
# A Metrics wrapper class for Datadog.
class Datadog < Metrics::Provider
- # @param config [Hash]
+
+ # @param config [Hash] a hash of both client and Karakfa MetricsListener configs.
# @param logger [Logger]
def initialize(config, logger)
raise 'Metrics config must specify host_ip' if config[:host_ip].nil?
@@ -14,12 +17,48 @@ def initialize(config, logger)
raise 'Metrics config must specify namespace' if config[:namespace].nil?
logger.info("DatadogMetricsProvider configured with: #{config}")
+
@client = ::Datadog::Statsd.new(
config[:host_ip],
config[:host_port],
tags: config[:tags],
namespace: config[:namespace]
)
+ setup_karafka(config)
+ setup_waterdrop(config)
+ end
+
+ def setup_karafka(config={})
+ karafka_listener = ::Karafka::Instrumentation::Vendors::Datadog::MetricsListener.new do |karafka_config|
+ karafka_config.client = @client
+ if config[:karafka_namespace]
+ karafka_config.namespace = config[:karafka_namespace]
+ end
+ if config[:karafka_distribution_mode]
+ karafka_config.distribution_mode = config[:karafka_distribution_mode]
+ end
+ if config[:rd_kafka_metrics]
+ karafka_config.rd_kafka_metrics = config[:rd_kafka_metrics]
+ end
+ end
+ Karafka.monitor.subscribe(karafka_listener)
+ end
+
+ def setup_waterdrop(config)
+ waterdrop_listener = ::WaterDrop::Instrumentation::Vendors::Datadog::MetricsListener.new do |waterdrop_config|
+ waterdrop_config.client = @client
+ if config[:karafka_namespace]
+ waterdrop_config.namespace = config[:karafka_namespace]
+ end
+ if config[:karafka_distribution_mode]
+ waterdrop_config.distribution_mode = config[:karafka_distribution_mode]
+ end
+ if config[:rd_kafka_metrics]
+ karafka_config.rd_kafka_metrics = [] # handled in Karafka
+ end
+ end
+ Karafka::Setup::Config.setup if Karafka.producer.nil?
+ Karafka.producer.monitor.subscribe(waterdrop_listener)
end
# :nodoc:
diff --git a/lib/deimos/metrics/provider.rb b/lib/deimos/metrics/provider.rb
index 4f527a1e..ae99a149 100644
--- a/lib/deimos/metrics/provider.rb
+++ b/lib/deimos/metrics/provider.rb
@@ -9,7 +9,7 @@ class Provider
# @param options [Hash] Any additional options, e.g. :tags
# @return [void]
def increment(metric_name, options={})
- raise NotImplementedError
+ raise MissingImplementationError
end
# Send an counter increment metric
@@ -18,7 +18,7 @@ def increment(metric_name, options={})
# @param options [Hash] Any additional options, e.g. :tags
# @return [void]
def gauge(metric_name, count, options={})
- raise NotImplementedError
+ raise MissingImplementationError
end
# Send an counter increment metric
@@ -27,7 +27,7 @@ def gauge(metric_name, count, options={})
# @param options [Hash] Any additional options, e.g. :tags
# @return [void]
def histogram(metric_name, count, options={})
- raise NotImplementedError
+ raise MissingImplementationError
end
# Time a yielded block, and send a timer metric
@@ -35,7 +35,7 @@ def histogram(metric_name, count, options={})
# @param options [Hash] Any additional options, e.g. :tags
# @return [void]
def time(metric_name, options={})
- raise NotImplementedError
+ raise MissingImplementationError
end
end
end
diff --git a/lib/deimos/monkey_patches/phobos_cli.rb b/lib/deimos/monkey_patches/phobos_cli.rb
deleted file mode 100644
index 41bf4036..00000000
--- a/lib/deimos/monkey_patches/phobos_cli.rb
+++ /dev/null
@@ -1,35 +0,0 @@
-# frozen_string_literal: true
-
-require 'phobos/cli/start'
-
-#@!visibility private
-module Phobos
- # :nodoc:
- module CLI
- # :nodoc:
- class Start
- # :nodoc:
- def validate_listeners!
- Phobos.config.listeners.each do |listener|
- handler = listener.handler
- begin
- handler.constantize
- rescue NameError
- error_exit("Handler '#{handler}' not defined")
- end
-
- delivery = listener.delivery
- if delivery.nil?
- Phobos::CLI.logger.warn do
- Hash(message: "Delivery option should be specified, defaulting to 'batch'"\
- ' - specify this option to silence this message')
- end
- elsif !Listener::DELIVERY_OPTS.include?(delivery)
- error_exit("Invalid delivery option '#{delivery}'. Please specify one of: "\
- "#{Listener::DELIVERY_OPTS.join(', ')}")
- end
- end
- end
- end
- end
-end
diff --git a/lib/deimos/producer.rb b/lib/deimos/producer.rb
index 646fb333..1afe3ee3 100644
--- a/lib/deimos/producer.rb
+++ b/lib/deimos/producer.rb
@@ -2,12 +2,12 @@
require 'deimos/message'
require 'deimos/shared_config'
-require 'phobos/producer'
require 'active_support/notifications'
# :nodoc:
module Deimos
class << self
+
# Run a block without allowing any messages to be produced to Kafka.
# Optionally add a list of producer classes to limit the disabling to those
# classes.
@@ -50,6 +50,8 @@ def _disable_producer_classes(producer_classes)
# @param producer_class [Class]
# @return [Boolean]
def producers_disabled?(producer_class=nil)
+ return true if Deimos.config.producers.disabled
+
Thread.current[:frk_disable_all_producers] ||
Thread.current[:frk_disabled_producers]&.include?(producer_class)
end
@@ -59,28 +61,10 @@ def producers_disabled?(producer_class=nil)
class Producer
include SharedConfig
- class << self
-
- # @return [Hash]
- def config
- @config ||= {
- encode_key: true,
- namespace: Deimos.config.producers.schema_namespace,
- max_batch_size: Deimos.config.producers.max_batch_size
- }
- end
+ # @return [Integer]
+ MAX_BATCH_SIZE = 500
- # Set the topic.
- # @param topic [String]
- # @return [String] the current topic if no argument given.
- def topic(topic=nil)
- if topic
- config[:topic] = topic
- return
- end
- # accessor
- "#{Deimos.config.producers.topic_prefix}#{config[:topic]}"
- end
+ class << self
# Override the default partition key (which is the payload key).
# @param _payload [Hash] the payload being passed into the produce method.
@@ -90,19 +74,28 @@ def partition_key(_payload)
nil
end
- # @param size [Integer] Override the default batch size for publishing.
- # @return [void]
- def max_batch_size(size)
- config[:max_batch_size] = size
- end
-
# Publish the payload to the topic.
# @param payload [Hash, SchemaClass::Record] with an optional payload_key hash key.
# @param topic [String] if specifying the topic
# @param headers [Hash] if specifying headers
# @return [void]
def publish(payload, topic: self.topic, headers: nil)
- publish_list([payload], topic: topic, headers: headers)
+ produce([{payload: payload, topic: topic, headers: headers}])
+ end
+
+ # Produce a list of messages in WaterDrop message hash format.
+ # @param messages [Array]
+ # @param backend [Class < Deimos::Backend]
+ def produce(messages, backend: determine_backend_class)
+ return if Deimos.producers_disabled?(self)
+
+ messages.each do |m|
+ m[:label] = m
+ m[:partition_key] ||= self.partition_key(m[:payload])
+ end
+ messages.in_groups_of(MAX_BATCH_SIZE, false) do |batch|
+ self.produce_batch(backend, batch)
+ end
end
# Publish a list of messages.
@@ -115,31 +108,31 @@ def publish(payload, topic: self.topic, headers: nil)
# @param headers [Hash] if specifying headers
# @return [void]
def publish_list(payloads, sync: nil, force_send: false, topic: self.topic, headers: nil)
- return if Deimos.config.kafka.seed_brokers.blank? ||
- Deimos.config.producers.disabled ||
- Deimos.producers_disabled?(self)
-
- raise 'Topic not specified. Please specify the topic.' if topic.blank?
-
- backend_class = determine_backend_class(sync, force_send)
- Deimos.instrument(
- 'encode_messages',
- producer: self,
- topic: topic,
- payloads: payloads
- ) do
- messages = Array(payloads).map { |p| Deimos::Message.new(p.to_h, self, headers: headers) }
- messages.each { |m| _process_message(m, topic) }
- messages.in_groups_of(self.config[:max_batch_size], false) do |batch|
- self.produce_batch(backend_class, batch)
- end
+ backend = determine_backend_class(sync, force_send)
+
+ messages = Array(payloads).map do |p|
+ {
+ payload: p&.to_h,
+ headers: headers,
+ topic: topic,
+ partition_key: self.partition_key(p)
+ }
end
+ self.produce(messages, backend: backend)
+ end
+
+ def karafka_config
+ Deimos.karafka_configs.find { |topic| topic.producer_classes.include?(self) }
+ end
+
+ def topic
+ karafka_config&.name
end
# @param sync [Boolean]
# @param force_send [Boolean]
# @return [Class]
- def determine_backend_class(sync, force_send)
+ def determine_backend_class(sync=false, force_send=false)
backend = if force_send
:kafka
else
@@ -155,86 +148,12 @@ def determine_backend_class(sync, force_send)
# Send a batch to the backend.
# @param backend [Class]
- # @param batch [Array]
+ # @param batch [Array]
# @return [void]
def produce_batch(backend, batch)
backend.publish(producer_class: self, messages: batch)
end
- # @return [Deimos::SchemaBackends::Base]
- def encoder
- @encoder ||= Deimos.schema_backend(schema: config[:schema],
- namespace: config[:namespace])
- end
-
- # @return [Deimos::SchemaBackends::Base]
- def key_encoder
- @key_encoder ||= Deimos.schema_backend(schema: config[:key_schema],
- namespace: config[:namespace])
- end
-
- # Override this in active record producers to add
- # non-schema fields to check for updates
- # @return [Array] fields to check for updates
- def watched_attributes
- self.encoder.schema_fields.map(&:name)
- end
-
- private
-
- # @param message [Message]
- # @param topic [String]
- def _process_message(message, topic)
- # this violates the Law of Demeter but it has to happen in a very
- # specific order and requires a bunch of methods on the producer
- # to work correctly.
- message.add_fields(encoder.schema_fields.map(&:name))
- message.partition_key = self.partition_key(message.payload)
- message.key = _retrieve_key(message.payload)
- # need to do this before _coerce_fields because that might result
- # in an empty payload which is an *error* whereas this is intended.
- message.payload = nil if message.payload.blank?
- message.coerce_fields(encoder)
- message.encoded_key = _encode_key(message.key)
- message.topic = topic
- message.encoded_payload = if message.payload.nil?
- nil
- else
- encoder.encode(message.payload,
- topic: "#{Deimos.config.producers.topic_prefix}#{config[:topic]}-value")
- end
- end
-
- # @param key [Object]
- # @return [String|Object]
- def _encode_key(key)
- if key.nil?
- return nil if config[:no_keys] # no key is fine, otherwise it's a problem
-
- raise 'No key given but a key is required! Use `key_config none: true` to avoid using keys.'
- end
- if config[:encode_key] && config[:key_field].nil? &&
- config[:key_schema].nil?
- raise 'No key config given - if you are not encoding keys, please use `key_config plain: true`'
- end
-
- if config[:key_field]
- encoder.encode_key(config[:key_field], key, topic: "#{Deimos.config.producers.topic_prefix}#{config[:topic]}-key")
- elsif config[:key_schema]
- key_encoder.encode(key, topic: "#{Deimos.config.producers.topic_prefix}#{config[:topic]}-key")
- else
- key
- end
- end
-
- # @param payload [Hash]
- # @return [String]
- def _retrieve_key(payload)
- key = payload.delete(:payload_key)
- return key if key
-
- config[:key_field] ? payload[config[:key_field]] : nil
- end
end
end
end
diff --git a/lib/deimos/railtie.rb b/lib/deimos/railtie.rb
index 1bfc09e5..2d129c60 100644
--- a/lib/deimos/railtie.rb
+++ b/lib/deimos/railtie.rb
@@ -2,6 +2,12 @@
# Add rake task to Rails.
class Deimos::Railtie < Rails::Railtie
+ config.before_initialize do
+ if ARGV[0] == "deimos:v2"
+ FigTree.keep_removed_configs = true
+ end
+ end
+
rake_tasks do
load 'tasks/deimos.rake'
end
diff --git a/lib/deimos/schema_backends/avro_base.rb b/lib/deimos/schema_backends/avro_base.rb
index 7169429d..0c9704c6 100644
--- a/lib/deimos/schema_backends/avro_base.rb
+++ b/lib/deimos/schema_backends/avro_base.rb
@@ -20,15 +20,19 @@ def initialize(schema:, namespace:)
# @override
def encode_key(key_id, key, topic: nil)
- @key_schema ||= _generate_key_schema(key_id)
+ begin
+ @key_schema ||= @schema_store.find("#{@schema}_key")
+ rescue AvroTurf::SchemaNotFoundError
+ @key_schema = generate_key_schema(key_id)
+ end
field_name = _field_name_from_schema(@key_schema)
- payload = { field_name => key }
+ payload = key.is_a?(Hash) ? key : { field_name => key }
encode(payload, schema: @key_schema['name'], topic: topic)
end
# @override
def decode_key(payload, key_id)
- @key_schema ||= _generate_key_schema(key_id)
+ @key_schema ||= generate_key_schema(key_id)
field_name = _field_name_from_schema(@key_schema)
decode(payload, schema: @key_schema['name'])[field_name]
end
@@ -85,7 +89,7 @@ def load_schema
# @override
def self.mock_backend
- :avro_validation
+ :avro_local
end
# @override
@@ -146,21 +150,8 @@ def self.schema_base_class(schema)
end
end
- private
-
- # @param schema [String]
- # @return [Avro::Schema]
- def avro_schema(schema=nil)
- schema ||= @schema
- @schema_store.find(schema, @namespace)
- end
-
- # Generate a key schema from the given value schema and key ID. This
- # is used when encoding or decoding keys from an existing value schema.
- # @param key_id [Symbol]
- # @return [Hash]
- def _generate_key_schema(key_id)
- key_field = avro_schema.fields.find { |f| f.name == key_id.to_s }
+ def generate_key_schema(field_name)
+ key_field = avro_schema.fields.find { |f| f.name == field_name.to_s }
name = _key_schema_name(@schema)
key_schema = {
'type' => 'record',
@@ -169,13 +160,22 @@ def _generate_key_schema(key_id)
'doc' => "Key for #{@namespace}.#{@schema} - autogenerated by Deimos",
'fields' => [
{
- 'name' => key_id,
+ 'name' => field_name,
'type' => key_field.type.type_sym.to_s
}
]
}
@schema_store.add_schema(key_schema)
- key_schema
+ @key_schema = key_schema
+ end
+
+ private
+
+ # @param schema [String]
+ # @return [Avro::Schema]
+ def avro_schema(schema=nil)
+ schema ||= @schema
+ @schema_store.find(schema, @namespace)
end
# @param value_schema [Hash]
diff --git a/lib/deimos/schema_backends/avro_schema_registry.rb b/lib/deimos/schema_backends/avro_schema_registry.rb
index e05d62bd..374e628e 100644
--- a/lib/deimos/schema_backends/avro_schema_registry.rb
+++ b/lib/deimos/schema_backends/avro_schema_registry.rb
@@ -1,7 +1,6 @@
# frozen_string_literal: true
require_relative 'avro_base'
-require_relative 'avro_validation'
require 'avro_turf/messaging'
module Deimos
@@ -29,7 +28,7 @@ def avro_turf_messaging
user: Deimos.config.schema.user,
password: Deimos.config.schema.password,
namespace: @namespace,
- logger: Deimos.config.logger
+ logger: Karafka.logger
)
end
end
diff --git a/lib/deimos/schema_backends/avro_validation.rb b/lib/deimos/schema_backends/avro_validation.rb
index 2842d23f..3f9f3dcb 100644
--- a/lib/deimos/schema_backends/avro_validation.rb
+++ b/lib/deimos/schema_backends/avro_validation.rb
@@ -9,12 +9,12 @@ module SchemaBackends
class AvroValidation < AvroBase
# @override
def decode_payload(payload, schema: nil)
- payload.with_indifferent_access
+ JSON.parse(payload)
end
# @override
def encode_payload(payload, schema: nil, topic: nil)
- payload.with_indifferent_access
+ payload.to_h.with_indifferent_access.to_json
end
end
end
diff --git a/lib/deimos/schema_backends/base.rb b/lib/deimos/schema_backends/base.rb
index 8d9ea848..aa62c656 100644
--- a/lib/deimos/schema_backends/base.rb
+++ b/lib/deimos/schema_backends/base.rb
@@ -79,7 +79,7 @@ def coerce(payload)
# Indicate a class which should act as a mocked version of this backend.
# This class should perform all validations but not actually do any
# encoding.
- # Note that the "mock" version (e.g. avro_validation) should return
+ # Note that the "mock" version should return
# its own symbol when this is called, since it may be called multiple
# times depending on the order of RSpec helpers.
# @return [Symbol]
@@ -90,7 +90,7 @@ def self.mock_backend
# The content type to use when encoding / decoding requests over HTTP via ActionController.
# @return [String]
def self.content_type
- raise NotImplementedError
+ raise MissingImplementationError
end
# Converts your schema to String form for generated YARD docs.
@@ -98,7 +98,7 @@ def self.content_type
# @param schema [Object]
# @return [String] A string representation of the Type
def self.field_type(schema)
- raise NotImplementedError
+ raise MissingImplementationError
end
# Encode a payload. To be defined by subclass.
@@ -107,7 +107,7 @@ def self.field_type(schema)
# @param topic [String]
# @return [String]
def encode_payload(payload, schema:, topic: nil)
- raise NotImplementedError
+ raise MissingImplementationError
end
# Decode a payload. To be defined by subclass.
@@ -115,7 +115,7 @@ def encode_payload(payload, schema:, topic: nil)
# @param schema [String,Symbol]
# @return [Hash]
def decode_payload(payload, schema:)
- raise NotImplementedError
+ raise MissingImplementationError
end
# Validate that a payload matches the schema. To be defined by subclass.
@@ -123,13 +123,13 @@ def decode_payload(payload, schema:)
# @param schema [String,Symbol]
# @return [void]
def validate(payload, schema:)
- raise NotImplementedError
+ raise MissingImplementationError
end
# List of field names belonging to the schema. To be defined by subclass.
# @return [Array]
def schema_fields
- raise NotImplementedError
+ raise MissingImplementationError
end
# Given a value and a field definition (as defined by whatever the
@@ -139,7 +139,7 @@ def schema_fields
# @param value [Object]
# @return [Object]
def coerce_field(field, value)
- raise NotImplementedError
+ raise MissingImplementationError
end
# Given a field definition, return the SQL type that might be used in
@@ -150,7 +150,14 @@ def coerce_field(field, value)
# @param field [SchemaField]
# @return [Symbol]
def sql_type(field)
- raise NotImplementedError
+ raise MissingImplementationError
+ end
+
+ # Generate a key schema from the given value schema and key ID. This
+ # is used when encoding or decoding keys from an existing value schema.
+ # @param field_name [Symbol]
+ def generate_key_schema(field_name)
+ raise MissingImplementationError
end
# Encode a message key. To be defined by subclass.
@@ -159,7 +166,7 @@ def sql_type(field)
# @param topic [String]
# @return [String]
def encode_key(key, key_id, topic: nil)
- raise NotImplementedError
+ raise MissingImplementationError
end
# Decode a message key. To be defined by subclass.
@@ -167,13 +174,13 @@ def encode_key(key, key_id, topic: nil)
# @param key_id [String,Symbol] the field in the message to decode.
# @return [String]
def decode_key(payload, key_id)
- raise NotImplementedError
+ raise MissingImplementationError
end
# Forcefully loads the schema into memory.
# @return [Object] The schema that is of use.
def load_schema
- raise NotImplementedError
+ raise MissingImplementationError
end
end
end
diff --git a/lib/deimos/schema_backends/mock.rb b/lib/deimos/schema_backends/mock.rb
index 0b5003d3..a666bda9 100644
--- a/lib/deimos/schema_backends/mock.rb
+++ b/lib/deimos/schema_backends/mock.rb
@@ -4,6 +4,11 @@ module Deimos
module SchemaBackends
# Mock implementation of a schema backend that does no encoding or validation.
class Mock < Base
+
+ # @override
+ def generate_key_schema(field_name)
+ end
+
# @override
def decode_payload(payload, schema:)
payload.is_a?(String) ? 'payload-decoded' : payload.map { |k, v| [k, "decoded-#{v}"] }
@@ -11,7 +16,7 @@ def decode_payload(payload, schema:)
# @override
def encode_payload(payload, schema:, topic: nil)
- payload.is_a?(String) ? 'payload-encoded' : payload.map { |k, v| [k, "encoded-#{v}"] }
+ payload.is_a?(String) ? 'payload-encoded' : payload.map { |k, v| [k, "encoded-#{v}"] }.to_json
end
# @override
diff --git a/lib/deimos/schema_backends/plain.rb b/lib/deimos/schema_backends/plain.rb
new file mode 100644
index 00000000..df7fafd9
--- /dev/null
+++ b/lib/deimos/schema_backends/plain.rb
@@ -0,0 +1,47 @@
+# frozen_string_literal: true
+
+module Deimos
+ module SchemaBackends
+ # Schema backend that passes through as a basic string.
+ class Plain < Base
+
+ # @override
+ def generate_key_schema(field_name)
+ end
+
+ # @override
+ def decode_payload(payload, schema:)
+ payload
+ end
+
+ # @override
+ def encode_payload(payload, schema:, topic: nil)
+ payload.to_s
+ end
+
+ # @override
+ def validate(payload, schema:)
+ end
+
+ # @override
+ def schema_fields
+ []
+ end
+
+ # @override
+ def coerce_field(_field, value)
+ value
+ end
+
+ # @override
+ def encode_key(key_id, key, topic: nil)
+ key
+ end
+
+ # @override
+ def decode_key(payload, key_id)
+ payload[key_id]
+ end
+ end
+ end
+end
diff --git a/lib/deimos/schema_class/base.rb b/lib/deimos/schema_class/base.rb
index 6cb1c697..acb7336b 100644
--- a/lib/deimos/schema_class/base.rb
+++ b/lib/deimos/schema_class/base.rb
@@ -15,7 +15,7 @@ def initialize(*_args)
# @param _opts [Hash]
# @return [Hash] a hash representation of the payload
def as_json(_opts={})
- raise NotImplementedError
+ raise MissingImplementationError
end
# @param key [String,Symbol]
@@ -36,6 +36,8 @@ def ==(other)
comparison == self.as_json
end
+ alias_method :eql?, :==
+
# @return [String]
def inspect
klass = self.class
@@ -46,7 +48,7 @@ def inspect
# @param value [Object]
# @return [SchemaClass::Base]
def self.initialize_from_value(value)
- raise NotImplementedError
+ raise MissingImplementationError
end
protected
diff --git a/lib/deimos/schema_class/enum.rb b/lib/deimos/schema_class/enum.rb
index 5e0d1b20..9d51b625 100644
--- a/lib/deimos/schema_class/enum.rb
+++ b/lib/deimos/schema_class/enum.rb
@@ -30,7 +30,7 @@ def initialize(value)
# Returns all the valid symbols for this enum.
# @return [Array]
def symbols
- raise NotImplementedError
+ raise MissingImplementationError
end
# @return [String]
diff --git a/lib/deimos/schema_class/record.rb b/lib/deimos/schema_class/record.rb
index 1189f791..bc3c9a4f 100644
--- a/lib/deimos/schema_class/record.rb
+++ b/lib/deimos/schema_class/record.rb
@@ -46,13 +46,13 @@ def with_indifferent_access
# Returns the schema name of the inheriting class.
# @return [String]
def schema
- raise NotImplementedError
+ raise MissingImplementationError
end
# Returns the namespace for the schema of the inheriting class.
# @return [String]
def namespace
- raise NotImplementedError
+ raise MissingImplementationError
end
# Returns the full schema name of the inheriting class.
diff --git a/lib/deimos/test_helpers.rb b/lib/deimos/test_helpers.rb
index 4e41a8c4..af131cdb 100644
--- a/lib/deimos/test_helpers.rb
+++ b/lib/deimos/test_helpers.rb
@@ -4,6 +4,7 @@
require 'active_support/core_ext'
require 'deimos/tracing/mock'
require 'deimos/metrics/mock'
+require 'karafka/testing/rspec/helpers'
module Deimos
# Include this module in your RSpec spec_helper
@@ -11,122 +12,79 @@ module Deimos
# and add methods to use to test encoding/decoding.
module TestHelpers
extend ActiveSupport::Concern
+ def self.included(base)
+ super
+ base.include Karafka::Testing::RSpec::Helpers
+ end
+
+ # @return [Array]
+ def sent_messages
+ self.class.sent_messages
+ end
class << self
- # for backwards compatibility
# @return [Array]
def sent_messages
- Deimos::Backends::Test.sent_messages
+ Karafka.producer.client.messages.map do |m|
+ produced_message = m.except(:label).deep_dup
+ Deimos.decode_message(produced_message)
+ produced_message[:payload] = Deimos::TestHelpers.normalize_message(produced_message[:payload])
+ produced_message[:key] = Deimos::TestHelpers.normalize_message(produced_message[:key])
+ produced_message
+ end
end
# Set the config to the right settings for a unit test
# @return [void]
def unit_test!
- Deimos.configure do |deimos_config|
- deimos_config.logger = Logger.new(STDOUT)
- deimos_config.consumers.reraise_errors = true
- deimos_config.kafka.seed_brokers ||= ['test_broker']
- deimos_config.schema.backend = Deimos.schema_backend_class.mock_backend
- deimos_config.producers.backend = :test
- deimos_config.tracer = Deimos::Tracing::Mock.new
- end
- end
-
- # Kafka test config with avro schema registry
- # @return [void]
- def full_integration_test!
- Deimos.configure do |deimos_config|
- deimos_config.producers.backend = :kafka
- deimos_config.schema.backend = :avro_schema_registry
- end
- end
-
- # Set the config to the right settings for a kafka test
- # @return [void]
- def kafka_test!
- Deimos.configure do |deimos_config|
- deimos_config.producers.backend = :kafka
- deimos_config.schema.backend = :avro_validation
- end
- end
- end
-
- included do
-
- RSpec.configure do |config|
- config.prepend_before(:each) do
- client = double('client').as_null_object
- allow(client).to receive(:time) do |*_args, &block|
- block.call
- end
- Deimos::Backends::Test.sent_messages.clear
- end
+ Deimos.config.schema.backend = :avro_validation
+ warn "unit_test! is deprecated and can be replaced by setting Deimos's schema backend to `:avro_validation`. All other test behavior is provided by Karafka."
end
-
- end
-
- # @deprecated
- # @!visibility private
- def stub_producers_and_consumers!
- warn('stub_producers_and_consumers! is no longer necessary and this method will be removed in 3.0')
- end
-
- # @deprecated
- # @!visibility private
- def stub_producer(_klass)
- warn('Stubbing producers is no longer necessary and this method will be removed in 3.0')
- end
-
- # @deprecated
- # @!visibility private
- def stub_consumer(_klass)
- warn('Stubbing consumers is no longer necessary and this method will be removed in 3.0')
- end
-
- # @deprecated
- # @!visibility private
- def stub_batch_consumer(_klass)
- warn('Stubbing batch consumers is no longer necessary and this method will be removed in 3.0')
end
# get the difference of 2 hashes.
- # @param hash1 [Hash]
- # @param hash2 [Hash]
+ # @param hash1 [Hash, nil]
+ # @param hash2 [Hash, nil]
# @!visibility private
def _hash_diff(hash1, hash2)
- if hash1.nil? || !hash1.is_a?(Hash)
- hash2
- elsif hash2.nil? || !hash2.is_a?(Hash)
- hash1
+ h1 = Deimos::TestHelpers.normalize_message(hash1)
+ h2 = Deimos::TestHelpers.normalize_message(hash2)
+ if h1.nil? || !h1.is_a?(Hash)
+ h2
+ elsif h2.nil? || !h2.is_a?(Hash)
+ h1
else
- hash1.dup.
- delete_if { |k, v| hash2[k] == v }.
- merge!(hash2.dup.delete_if { |k, _v| hash1.key?(k) })
+ h1.dup.
+ delete_if { |k, v| h2[k] == v }.
+ merge!(h2.dup.delete_if { |k, _v| h1.key?(k) })
+ end
+ end
+
+ def self.normalize_message(m)
+ return nil if m.nil?
+
+ if m.respond_to?(:to_h)
+ m = m.to_h
+ end
+ if m.respond_to?(:with_indifferent_access)
+ m = m.with_indifferent_access
end
+ m
end
# @!visibility private
def _frk_failure_message(topic, message, key=nil, partition_key=nil, was_negated=false)
- messages = Deimos::Backends::Test.sent_messages.
- select { |m| m[:topic] == topic }.
- map { |m| m.except(:topic) }
+ messages = Deimos::TestHelpers.sent_messages.select { |m| m[:topic] == topic }
message_string = ''
diff = nil
min_hash_diff = nil
+ message = Deimos::TestHelpers.normalize_message(message)
if messages.any?
- message_string = messages.map(&:inspect).join("\n")
- min_hash_diff = messages.min_by { |m| _hash_diff(m, message).keys.size }
- diff = RSpec::Expectations.differ.
- diff_as_object(message, min_hash_diff[:payload])
+ message_string = messages.map { |m| m[:payload].inspect}.join("\n")
+ min_hash_diff = messages.min_by { |m| _hash_diff(m, message)&.keys&.size }
+ diff = RSpec::Expectations.differ.diff_as_object(message, min_hash_diff[:payload])
end
- description = if message.respond_to?(:description)
- message.description
- elsif message.nil?
- 'nil'
- else
- message
- end
- str = "Expected #{topic} #{'not ' if was_negated}to have sent #{description}"
+ str = "Expected #{topic} #{'not ' if was_negated}to have sent #{message.try(:to_h) || message}"
str += " with key #{key}" if key
str += " with partition key #{partition_key}" if partition_key
str += "\nClosest message received: #{min_hash_diff}" if min_hash_diff
@@ -135,23 +93,20 @@ def _frk_failure_message(topic, message, key=nil, partition_key=nil, was_negated
end
RSpec::Matchers.define :have_sent do |msg, key=nil, partition_key=nil, headers=nil|
- message = if msg.respond_to?(:with_indifferent_access)
- msg.with_indifferent_access
- else
- msg
- end
+ message = Deimos::TestHelpers.normalize_message(msg)
match do |topic|
- Deimos::Backends::Test.sent_messages.any? do |m|
- hash_matcher = RSpec::Matchers::BuiltIn::Match.new(message)
- hash_matcher.send(:match,
- message&.respond_to?(:to_h) ? message.to_h : message,
- m[:payload]&.with_indifferent_access) &&
+ message_key = Deimos::TestHelpers.normalize_message(key)
+ hash_matcher = RSpec::Matchers::BuiltIn::Match.new(message)
+ Deimos::TestHelpers.sent_messages.any? do |m|
+ message.delete(:payload_key) if message.respond_to?(:[]) && message[:payload_key].nil?
+ m[:payload].delete(:payload_key) if m.respond_to?(:[]) && m[:payload]&.respond_to?(:[]) && m[:payload][:payload_key].nil?
+ hash_matcher.send(:match, message, m[:payload]) &&
topic == m[:topic] &&
- (key.present? ? key == m[:key] : true) &&
+ (key.present? ? message_key == m[:key] : true) &&
(partition_key.present? ? partition_key == m[:partition_key] : true) &&
if headers.present?
hash_matcher.send(:match,
- headers&.with_indifferent_access,
+ headers.with_indifferent_access,
m[:headers]&.with_indifferent_access)
else
true
@@ -159,20 +114,11 @@ def _frk_failure_message(topic, message, key=nil, partition_key=nil, was_negated
end
end
- if respond_to?(:failure_message)
- failure_message do |topic|
- _frk_failure_message(topic, message, key, partition_key)
- end
- failure_message_when_negated do |topic|
- _frk_failure_message(topic, message, key, partition_key, true)
- end
- else
- failure_message_for_should do |topic|
- _frk_failure_message(topic, message, key, partition_key)
- end
- failure_message_for_should_not do |topic|
- _frk_failure_message(topic, message, key, partition_key, true)
- end
+ failure_message do |topic|
+ _frk_failure_message(topic, message, key, partition_key)
+ end
+ failure_message_when_negated do |topic|
+ _frk_failure_message(topic, message, key, partition_key, true)
end
end
@@ -180,7 +126,8 @@ def _frk_failure_message(topic, message, key=nil, partition_key=nil, was_negated
# particular messages were sent or not sent after a point in time.
# @return [void]
def clear_kafka_messages!
- Deimos::Backends::Test.sent_messages.clear
+ puts "[Deprecated] clear_kafka_messages! can be replaced with `karafka.produced_messages.clear`"
+ karafka.produced_messages.clear
end
# Test that a given handler will consume a given payload correctly, i.e.
@@ -190,65 +137,19 @@ def clear_kafka_messages!
# @param handler_class_or_topic [Class, String] Class which inherits from
# Deimos::Consumer or the topic as a string
# @param payload [Hash] the payload to consume
- # @param call_original [Boolean] if true, allow the consume handler
- # to continue as normal. Not compatible with a block.
- # @param skip_expectation [Boolean] Set to true to not place any
- # expectations on the consumer. Primarily used internally to Deimos.
# @param key [Object] the key to use.
+ # @param call_original [Symbol] legacy parameter.
# @param partition_key [Object] the partition key to use.
# @return [void]
def test_consume_message(handler_class_or_topic,
payload,
- call_original: false,
key: nil,
- partition_key: nil,
- skip_expectation: false,
- &block)
- raise 'Cannot have both call_original and be given a block!' if call_original && block_given?
-
- payload.stringify_keys! if payload.respond_to?(:stringify_keys!)
- handler_class = if handler_class_or_topic.is_a?(String)
- _get_handler_class_from_topic(handler_class_or_topic)
- else
- handler_class_or_topic
- end
- handler = handler_class.new
- allow(handler_class).to receive(:new).and_return(handler)
- listener = double('listener',
- handler_class: handler_class,
- encoding: nil)
- key ||= _key_from_consumer(handler_class)
- message = double('message',
- 'key' => key,
- 'partition_key' => partition_key,
- 'partition' => 1,
- 'offset' => 1,
- 'headers' => {},
- 'value' => payload)
-
- unless skip_expectation
- _handler_expectation(:consume,
- payload,
- handler,
- call_original,
- &block)
+ call_original: nil,
+ partition_key: nil)
+ unless call_original.nil?
+ puts "test_consume_message(call_original: true) is deprecated and will be removed in the future. You can remove the call_original parameter."
end
- Phobos::Actions::ProcessMessage.new(
- listener: listener,
- message: message,
- listener_metadata: { topic: 'my-topic' }
- ).send(:process_message, payload)
- end
-
- # Check to see that a given message will fail due to validation errors.
- # @param handler_class [Class]
- # @param payload [Hash]
- # @return [void]
- def test_consume_invalid_message(handler_class, payload)
- expect {
- handler_class.decoder.validate(payload,
- schema: handler_class.decoder.schema)
- }.to raise_error(Avro::SchemaValidator::ValidationError)
+ test_consume_batch(handler_class_or_topic, [payload], keys: [key], partition_keys: [partition_key], single: true)
end
# Test that a given handler will consume a given batch payload correctly,
@@ -258,165 +159,49 @@ def test_consume_invalid_message(handler_class, payload)
# @param handler_class_or_topic [Class, String] Class which inherits from
# Deimos::Consumer or the topic as a string
# @param payloads [Array] the payload to consume
- # @param keys [Array]
- # @param partition_keys [Array]
- # @param call_original [Boolean]
- # @param skip_expectation [Boolean]
+ # @param call_original [Boolean,nil] legacy parameter.
+ # @param keys [Array