From a52d29cc222416812983ab56b5795faba42b46f1 Mon Sep 17 00:00:00 2001 From: Milind L Date: Mon, 14 Jul 2025 17:08:34 +0530 Subject: [PATCH 01/37] Prototype code to obtain assignments --- examples/consumer.c | 23 ++ src/rdkafka.c | 43 ++++ src/rdkafka.h | 3 + src/rdkafka_cgrp.c | 316 ++++++++++++++++++++++++++++ src/rdkafka_conf.c | 3 + src/rdkafka_conf.h | 2 + src/rdkafka_int.h | 2 + src/rdkafka_proto.h | 1 + src/rdkafka_protocol.h | 1 + src/rdkafka_request.c | 94 +++++++++ src/rdkafka_request.h | 11 + tests/0154-share_consumer.c | 71 +++++++ tests/CMakeLists.txt | 1 + tests/broker_version_tests.py | 4 +- tests/interactive_broker_version.py | 2 +- tests/test.c | 2 + win32/tests/tests.vcxproj | 1 + 17 files changed, 577 insertions(+), 3 deletions(-) create mode 100644 tests/0154-share_consumer.c diff --git a/examples/consumer.c b/examples/consumer.c index f621a9db35..8f33edd69c 100644 --- a/examples/consumer.c +++ b/examples/consumer.c @@ -149,6 +149,29 @@ int main(int argc, char **argv) { return 1; } + if (rd_kafka_conf_set(conf, "share.consumer", "true", errstr, + sizeof(errstr)) != RD_KAFKA_CONF_OK) { + fprintf(stderr, "%s\n", errstr); + rd_kafka_conf_destroy(conf); + return 1; + } + + + if (rd_kafka_conf_set(conf, "enable.auto.commit", "false", errstr, + sizeof(errstr)) != RD_KAFKA_CONF_OK) { + fprintf(stderr, "%s\n", errstr); + rd_kafka_conf_destroy(conf); + return 1; + } + + + if (rd_kafka_conf_set(conf, "debug", "cgrp", errstr, + sizeof(errstr)) != RD_KAFKA_CONF_OK) { + fprintf(stderr, "%s\n", errstr); + rd_kafka_conf_destroy(conf); + return 1; + } + /* * Create consumer instance. * diff --git a/src/rdkafka.c b/src/rdkafka.c index c6f89ad469..0904bb77ac 100644 --- a/src/rdkafka.c +++ b/src/rdkafka.c @@ -2839,6 +2839,49 @@ rd_kafka_t *rd_kafka_new(rd_kafka_type_t type, return NULL; } +rd_kafka_t *rd_kafka_share_consumer_new( + rd_kafka_conf_t *conf, char *errstr, size_t errstr_size) { + rd_kafka_t *rk; + char errstr_internal[512]; + rd_kafka_conf_res_t res; + + if (conf == NULL) { + rd_snprintf(errstr, errstr_size, + "rd_kafka_share_consumer_new(): " + "conf argument must not be NULL"); + return NULL; + } + + res = rd_kafka_conf_set(conf, "share.consumer", "true", errstr_internal, + sizeof(errstr_internal)); + if (res != RD_KAFKA_CONF_OK) { + rd_snprintf(errstr, errstr_size, + "rd_kafka_share_consumer_new(): " + "Failed to set share.consumer=true: %s", + errstr_internal); + return NULL; + } + + + res = rd_kafka_conf_set(conf, "group.protocol", "consumer", errstr_internal, + sizeof(errstr_internal)); + if (res != RD_KAFKA_CONF_OK) { + rd_snprintf(errstr, errstr_size, + "rd_kafka_share_consumer_new(): " + "Failed to set group.protocol=consumer: %s", + errstr_internal); + return NULL; + } + + rk = rd_kafka_new(RD_KAFKA_CONSUMER, conf, errstr, errstr_size); + if (!rk) { + /* If rd_kafka_new() failed it will have set the last error + * and filled out errstr, so we don't need to do that here. */ + return NULL; + } + return rk; +} + /** * Schedules a rebootstrap of the cluster immediately. * diff --git a/src/rdkafka.h b/src/rdkafka.h index 57cf08bad1..e137c01804 100644 --- a/src/rdkafka.h +++ b/src/rdkafka.h @@ -3057,6 +3057,9 @@ rd_kafka_t *rd_kafka_new(rd_kafka_type_t type, size_t errstr_size); +RD_EXPORT +rd_kafka_t *rd_kafka_share_consumer_new(rd_kafka_conf_t *conf, char *errstr, size_t errstr_size); + /** * @brief Destroy Kafka handle. * diff --git a/src/rdkafka_cgrp.c b/src/rdkafka_cgrp.c index d6348ba52d..e462e6843a 100644 --- a/src/rdkafka_cgrp.c +++ b/src/rdkafka_cgrp.c @@ -3369,6 +3369,304 @@ void rd_kafka_cgrp_handle_ConsumerGroupHeartbeat(rd_kafka_t *rk, } } +void rd_kafka_cgrp_handle_ShareGroupHeartbeat(rd_kafka_t *rk, + rd_kafka_broker_t *rkb, + rd_kafka_resp_err_t err, + rd_kafka_buf_t *rkbuf, + rd_kafka_buf_t *request, + void *opaque) { + rd_kafka_cgrp_t *rkcg = rk->rk_cgrp; + const int log_decode_errors = LOG_ERR; + int16_t error_code = 0; + int actions = 0; + rd_kafkap_str_t error_str = RD_KAFKAP_STR_INITIALIZER_EMPTY; + rd_kafkap_str_t member_id; + int32_t member_epoch; + int32_t heartbeat_interval_ms; + int8_t are_assignments_present; + + if (err == RD_KAFKA_RESP_ERR__DESTROY) + return; + + rd_dassert(rkcg->rkcg_flags & RD_KAFKA_CGRP_F_HEARTBEAT_IN_TRANSIT); + + if (rd_kafka_cgrp_will_leave(rkcg)) + err = RD_KAFKA_RESP_ERR__OUTDATED; + if (err) + goto err; + + rd_kafka_buf_read_throttle_time(rkbuf); + rd_kafka_buf_read_i16(rkbuf, &error_code); + rd_kafka_buf_read_str(rkbuf, &error_str); + + if (error_code) { + err = error_code; + goto err; + } + + rd_kafka_buf_read_str(rkbuf, &member_id); + if (!RD_KAFKAP_STR_IS_NULL(&member_id)) { + rd_kafka_cgrp_set_member_id(rkcg, member_id.str); + } + + rd_kafka_buf_read_i32(rkbuf, &member_epoch); + rkcg->rkcg_generation_id = member_epoch; + + rd_kafka_buf_read_i32(rkbuf, &heartbeat_interval_ms); + if (heartbeat_interval_ms > 0) { + rkcg->rkcg_heartbeat_intvl_ms = heartbeat_interval_ms; + } + + rd_kafka_buf_read_i8(rkbuf, &are_assignments_present); + + if (are_assignments_present == 1) { + rd_kafka_topic_partition_list_t *assigned_topic_partitions; + const rd_kafka_topic_partition_field_t assignments_fields[] = { + RD_KAFKA_TOPIC_PARTITION_FIELD_PARTITION, + RD_KAFKA_TOPIC_PARTITION_FIELD_END}; + assigned_topic_partitions = rd_kafka_buf_read_topic_partitions( + rkbuf, rd_true, rd_false /* Don't use Topic Name */, 0, + assignments_fields); + + rd_kafka_dbg( + rk, CGRP, "HEARTBEAT", + "ShareGroupHeartbeat response received " + "assigned_topic_partitions size %d", + assigned_topic_partitions->cnt); + + if (rd_kafka_is_dbg(rk, CGRP)) { + char assigned_topic_partitions_str[512] = "NULL"; + + if (assigned_topic_partitions) { + rd_kafka_topic_partition_list_str( + assigned_topic_partitions, + assigned_topic_partitions_str, + sizeof(assigned_topic_partitions_str), 0); + } + + rd_kafka_dbg( + rk, CGRP, "HEARTBEAT", + "ShareGroupHeartbeat response received target " + "assignment \"%s\"", + assigned_topic_partitions_str); + } + + if (assigned_topic_partitions) { + RD_IF_FREE(rkcg->rkcg_next_target_assignment, + rd_kafka_topic_partition_list_destroy); + rkcg->rkcg_next_target_assignment = NULL; + if (rd_kafka_cgrp_consumer_is_new_assignment_different( + rkcg, assigned_topic_partitions)) { + rkcg->rkcg_next_target_assignment = + assigned_topic_partitions; + } else { + rd_kafka_topic_partition_list_destroy( + assigned_topic_partitions); + assigned_topic_partitions = NULL; + } + } + } + + if (rkcg->rkcg_join_state == RD_KAFKA_CGRP_JOIN_STATE_STEADY && + (rkcg->rkcg_consumer_flags & RD_KAFKA_CGRP_CONSUMER_F_WAIT_ACK) && + rkcg->rkcg_target_assignment) { + if (rkcg->rkcg_consumer_flags & + RD_KAFKA_CGRP_CONSUMER_F_SENDING_ACK) { + if (rkcg->rkcg_current_assignment) + rd_kafka_topic_partition_list_destroy( + rkcg->rkcg_current_assignment); + rkcg->rkcg_current_assignment = + rd_kafka_topic_partition_list_copy( + rkcg->rkcg_target_assignment); + rd_kafka_topic_partition_list_destroy( + rkcg->rkcg_target_assignment); + rkcg->rkcg_target_assignment = NULL; + rkcg->rkcg_consumer_flags &= + ~RD_KAFKA_CGRP_CONSUMER_F_WAIT_ACK; + + if (rd_kafka_is_dbg(rkcg->rkcg_rk, CGRP)) { + char rkcg_current_assignment_str[512] = "NULL"; + + rd_kafka_topic_partition_list_str( + rkcg->rkcg_current_assignment, + rkcg_current_assignment_str, + sizeof(rkcg_current_assignment_str), 0); + + rd_kafka_dbg(rkcg->rkcg_rk, CGRP, "HEARTBEAT", + "Target assignment acked, new " + "current assignment " + " \"%s\"", + rkcg_current_assignment_str); + } + } else if (rkcg->rkcg_flags & RD_KAFKA_CGRP_F_SUBSCRIPTION) { + /* We've finished reconciliation but we weren't + * sending an ack, need to send a new HB with the ack. + */ + rd_kafka_cgrp_consumer_expedite_next_heartbeat( + rkcg, "not subscribed anymore"); + } + } + + + if (rkcg->rkcg_consumer_flags & + RD_KAFKA_CGRP_CONSUMER_F_SERVE_PENDING && + rkcg->rkcg_join_state == RD_KAFKA_CGRP_JOIN_STATE_STEADY) { + /* TODO: Check if this should be done only for the + * steady state? + */ + rd_kafka_assignment_serve(rk); + rkcg->rkcg_consumer_flags &= + ~RD_KAFKA_CGRP_CONSUMER_F_SERVE_PENDING; + } + + if (rkcg->rkcg_next_target_assignment) { + if (rkcg->rkcg_flags & RD_KAFKA_CGRP_F_SUBSCRIPTION) { + rd_kafka_cgrp_consumer_next_target_assignment_request_metadata( + rk, rkb); + } else { + /* Consumer left the group sending an HB request + * while this one was in-flight. */ + rd_kafka_topic_partition_list_destroy( + rkcg->rkcg_next_target_assignment); + rkcg->rkcg_next_target_assignment = NULL; + } + } + + if (rd_kafka_cgrp_consumer_subscription_preconditions_met(rkcg)) + rd_kafka_cgrp_consumer_expedite_next_heartbeat( + rkcg, "send new subscription"); + + rkcg->rkcg_consumer_flags &= + ~RD_KAFKA_CGRP_CONSUMER_F_SENDING_NEW_SUBSCRIPTION & + ~RD_KAFKA_CGRP_CONSUMER_F_SEND_FULL_REQUEST; + rd_kafka_cgrp_maybe_clear_heartbeat_failed_err(rkcg); + rkcg->rkcg_last_heartbeat_err = RD_KAFKA_RESP_ERR_NO_ERROR; + rkcg->rkcg_expedite_heartbeat_retries = 0; + rkcg->rkcg_flags &= ~RD_KAFKA_CGRP_F_HEARTBEAT_IN_TRANSIT; + + return; + +err_parse: + err = rkbuf->rkbuf_err; +err: + rkcg->rkcg_last_heartbeat_err = err; + rkcg->rkcg_flags &= ~RD_KAFKA_CGRP_F_HEARTBEAT_IN_TRANSIT; + switch (err) { + case RD_KAFKA_RESP_ERR__DESTROY: + /* quick cleanup */ + return; + + case RD_KAFKA_RESP_ERR_COORDINATOR_LOAD_IN_PROGRESS: + rd_kafka_dbg( + rkcg->rkcg_rk, CONSUMER, "HEARTBEAT", + "ShareGroupHeartbeat failed due to coordinator (%s) " + "loading in progress: %s: " + "retrying", + rkcg->rkcg_curr_coord + ? rd_kafka_broker_name(rkcg->rkcg_curr_coord) + : "none", + rd_kafka_err2str(err)); + actions = RD_KAFKA_ERR_ACTION_RETRY; + break; + + case RD_KAFKA_RESP_ERR_NOT_COORDINATOR_FOR_GROUP: + case RD_KAFKA_RESP_ERR_GROUP_COORDINATOR_NOT_AVAILABLE: + case RD_KAFKA_RESP_ERR__TRANSPORT: + rd_kafka_dbg( + rkcg->rkcg_rk, CONSUMER, "HEARTBEAT", + "ShareGroupHeartbeat failed due to coordinator (%s) " + "no longer available: %s: " + "re-querying for coordinator", + rkcg->rkcg_curr_coord + ? rd_kafka_broker_name(rkcg->rkcg_curr_coord) + : "none", + rd_kafka_err2str(err)); + /* Remain in joined state and keep querying for coordinator */ + actions = RD_KAFKA_ERR_ACTION_REFRESH; + break; + + case RD_KAFKA_RESP_ERR_UNKNOWN_MEMBER_ID: + rd_kafka_dbg(rkcg->rkcg_rk, CONSUMER, "HEARTBEAT", + "ShareGroupHeartbeat failed due to: %s: " + "will rejoin the group", + rd_kafka_err2str(err)); + rkcg->rkcg_consumer_flags |= + RD_KAFKA_CGRP_CONSUMER_F_WAIT_REJOIN; + return; + + case RD_KAFKA_RESP_ERR_INVALID_REQUEST: + case RD_KAFKA_RESP_ERR_GROUP_MAX_SIZE_REACHED: + case RD_KAFKA_RESP_ERR_UNSUPPORTED_VERSION: + case RD_KAFKA_RESP_ERR__UNSUPPORTED_FEATURE: + case RD_KAFKA_RESP_ERR_GROUP_AUTHORIZATION_FAILED: + actions = RD_KAFKA_ERR_ACTION_FATAL; + break; + + default: + actions = rd_kafka_err_action( + rkb, err, request, + + RD_KAFKA_ERR_ACTION_SPECIAL, + RD_KAFKA_RESP_ERR_TOPIC_AUTHORIZATION_FAILED, + + RD_KAFKA_ERR_ACTION_END); + break; + } + + if (actions & RD_KAFKA_ERR_ACTION_FATAL) { + rd_kafka_set_fatal_error( + rkcg->rkcg_rk, err, + "ShareGroupHeartbeat fatal error: %s", + rd_kafka_err2str(err)); + rd_kafka_cgrp_revoke_all_rejoin_maybe( + rkcg, rd_true, /*assignments lost*/ + rd_true, /*initiating*/ + "Fatal error in ShareGroupHeartbeat API response"); + return; + } + + if (!rkcg->rkcg_heartbeat_intvl_ms) { + /* When an error happens on first HB, it should be always + * retried, unless fatal, to avoid entering a tight loop + * and to use exponential backoff. */ + actions |= RD_KAFKA_ERR_ACTION_RETRY; + } + + if (actions & RD_KAFKA_ERR_ACTION_REFRESH) { + /* Re-query for coordinator */ + rkcg->rkcg_consumer_flags |= + RD_KAFKA_CGRP_CONSUMER_F_SEND_FULL_REQUEST; + rd_kafka_cgrp_coord_query(rkcg, rd_kafka_err2str(err)); + /* If coordinator changes, HB will be expedited. */ + } + + if (actions & RD_KAFKA_ERR_ACTION_SPECIAL) { + rd_ts_t min_error_interval = + RD_MAX(rkcg->rkcg_heartbeat_intvl_ms * 1000, + /* default group.consumer.heartbeat.interval.ms */ + 5000000); + if (rkcg->rkcg_last_err != err || + (rd_clock() > + rkcg->rkcg_ts_last_err + min_error_interval)) { + rd_kafka_cgrp_set_last_err(rkcg, err); + rd_kafka_consumer_err( + rkcg->rkcg_q, rd_kafka_broker_id(rkb), err, 0, NULL, + NULL, err, + "ShareGroupHeartbeat failed: %s%s%.*s", + rd_kafka_err2str(err), + RD_KAFKAP_STR_LEN(&error_str) ? ": " : "", + RD_KAFKAP_STR_PR(&error_str)); + } + } + + if (actions & RD_KAFKA_ERR_ACTION_RETRY && + rkcg->rkcg_flags & RD_KAFKA_CGRP_F_SUBSCRIPTION && + !rd_kafka_cgrp_will_leave(rkcg) && + rd_kafka_buf_retry(rkb, request)) { + /* Retry */ + rkcg->rkcg_flags |= RD_KAFKA_CGRP_F_HEARTBEAT_IN_TRANSIT; + } +} /** * @brief Handle Heartbeat response. @@ -6150,6 +6448,14 @@ void rd_kafka_cgrp_consumer_group_heartbeat(rd_kafka_cgrp_t *rkcg, } rkcg->rkcg_expedite_heartbeat_retries++; + + if (RD_KAFKA_IS_SHARE_CONSUMER(rkcg->rkcg_rk)) { + rd_kafka_ShareGroupHeartbeatRequest(rkcg->rkcg_coord, rkcg->rkcg_group_id, rkcg->rkcg_member_id, + member_epoch, rkcg_client_rack, rkcg_subscription_topics, RD_KAFKA_REPLYQ(rkcg->rkcg_ops, 0), + rd_kafka_cgrp_handle_ShareGroupHeartbeat, NULL); + return; + } + rd_kafka_ConsumerGroupHeartbeatRequest( rkcg->rkcg_coord, rkcg->rkcg_group_id, rkcg->rkcg_member_id, member_epoch, rkcg_group_instance_id, rkcg_client_rack, @@ -6211,6 +6517,15 @@ void rd_kafka_cgrp_consumer_serve(rd_kafka_cgrp_t *rkcg) { "member fenced - rejoining"); } + /* There should be no fencing, hence no rejoining - these asserts are to test only, we don't actually need them. */ + rd_dassert(!(RD_KAFKA_IS_SHARE_CONSUMER(rkcg->rkcg_rk) && + (rkcg->rkcg_consumer_flags & + RD_KAFKA_CGRP_CONSUMER_F_WAIT_REJOIN))); + rd_dassert(!(RD_KAFKA_IS_SHARE_CONSUMER(rkcg->rkcg_rk) && + (rkcg->rkcg_consumer_flags & + RD_KAFKA_CGRP_CONSUMER_F_WAIT_REJOIN_TO_COMPLETE))); + + switch (rkcg->rkcg_join_state) { case RD_KAFKA_CGRP_JOIN_STATE_INIT: rkcg->rkcg_consumer_flags &= @@ -6300,6 +6615,7 @@ rd_kafka_cgrp_consumer_subscribe(rd_kafka_cgrp_t *rkcg, /* If member is leaving, new subscription * will be applied after the leave * ConsumerGroupHeartbeat */ + /* MILIND: how is new subscription applied after heartbeat, check it. */ if (!rd_kafka_cgrp_will_leave(rkcg)) rd_kafka_cgrp_consumer_apply_next_subscribe(rkcg); } else { diff --git a/src/rdkafka_conf.c b/src/rdkafka_conf.c index 1f8bbf106b..f9b5de7037 100644 --- a/src/rdkafka_conf.c +++ b/src/rdkafka_conf.c @@ -1509,6 +1509,9 @@ static const struct rd_kafka_property rd_kafka_properties[] = { "which indicates where this client is physically located. It " "corresponds with the broker config `broker.rack`.", .sdef = ""}, + {_RK_GLOBAL | _RK_HIDDEN, "share.consumer", _RK_C_BOOL, _RK(is_share_consumer), + "tba description", 0, 1, 0}, + /* Global producer properties */ {_RK_GLOBAL | _RK_PRODUCER | _RK_HIGH, "transactional.id", _RK_C_STR, diff --git a/src/rdkafka_conf.h b/src/rdkafka_conf.h index 92e5193eb7..fedfc2ecee 100644 --- a/src/rdkafka_conf.h +++ b/src/rdkafka_conf.h @@ -470,6 +470,8 @@ struct rd_kafka_conf_s { rd_kafkap_str_t *client_rack; + int is_share_consumer; /**< Is this a share consumer? */ + /* * Producer configuration */ diff --git a/src/rdkafka_int.h b/src/rdkafka_int.h index d8370ff599..9e97bf9dca 100644 --- a/src/rdkafka_int.h +++ b/src/rdkafka_int.h @@ -94,6 +94,8 @@ typedef struct rd_kafka_lwtopic_s rd_kafka_lwtopic_t; #define RD_KAFKA_OFFSET_IS_LOGICAL(OFF) ((OFF) < 0) +#define RD_KAFKA_IS_SHARE_CONSUMER(rk) \ + ((rk)->rk_type == RD_KAFKA_CONSUMER && (rk)->rk_conf.is_share_consumer) /** * @struct Represents a fetch position: diff --git a/src/rdkafka_proto.h b/src/rdkafka_proto.h index 02565ecb3b..b5b52ac146 100644 --- a/src/rdkafka_proto.h +++ b/src/rdkafka_proto.h @@ -175,6 +175,7 @@ static RD_UNUSED const char *rd_kafka_ApiKey2str(int16_t ApiKey) { [RD_KAFKAP_ConsumerGroupDescribe] = "ConsumerGroupDescribe", [RD_KAFKAP_GetTelemetrySubscriptions] = "GetTelemetrySubscriptions", [RD_KAFKAP_PushTelemetry] = "PushTelemetry", + [RD_KAFKAP_ShareGroupHeartbeat] = "ShareGroupHeartbeat", }; static RD_TLS char ret[64]; diff --git a/src/rdkafka_protocol.h b/src/rdkafka_protocol.h index 19190e1447..28c246dd9c 100644 --- a/src/rdkafka_protocol.h +++ b/src/rdkafka_protocol.h @@ -120,6 +120,7 @@ #define RD_KAFKAP_GetTelemetrySubscriptions 71 #define RD_KAFKAP_PushTelemetry 72 #define RD_KAFKAP_AssignReplicasToDirs 73 +#define RD_KAFKAP_ShareGroupHeartbeat 76 #define RD_KAFKAP__NUM 74 diff --git a/src/rdkafka_request.c b/src/rdkafka_request.c index 663a07eae3..94c01e5964 100644 --- a/src/rdkafka_request.c +++ b/src/rdkafka_request.c @@ -2442,7 +2442,101 @@ void rd_kafka_ConsumerGroupHeartbeatRequest( rd_kafkap_str_destroy(subscribed_topic_regex_to_send); } +void rd_kafka_ShareGroupHeartbeatRequest( + rd_kafka_broker_t *rkb, + const rd_kafkap_str_t *group_id, + const rd_kafkap_str_t *member_id, + int32_t member_epoch, + const rd_kafkap_str_t *rack_id, + const rd_kafka_topic_partition_list_t *subscribed_topics, + rd_kafka_replyq_t replyq, + rd_kafka_resp_cb_t *resp_cb, + void *opaque) { + rd_kafka_buf_t *rkbuf; + int16_t ApiVersion = 0; + int features; + size_t rkbuf_size = 0; + + ApiVersion = rd_kafka_broker_ApiVersion_supported( + rkb, RD_KAFKAP_ShareGroupHeartbeat, 1, 1, &features); + + rd_rkb_dbg(rkb, CGRP, "SHAREHEARTBEAT", + "ShareGroupHeartbeat version %d for group \"%s\", member id " + "\"%s\", topic count = %d", + ApiVersion, group_id ? group_id->str : "NULL", + member_id ? member_id->str : "NULL", + subscribed_topics ? subscribed_topics->cnt : -1); + + if (ApiVersion == -1) { + rd_kafka_cgrp_coord_dead(rkb->rkb_rk->rk_cgrp, + RD_KAFKA_RESP_ERR__UNSUPPORTED_FEATURE, + "ShareGroupHeartbeatRequest not " + "supported by broker"); + return; + } + + // debug log all the fields + if (rd_rkb_is_dbg(rkb, CGRP)) { + char subscribed_topics_str[512] = "NULL"; + if (subscribed_topics) { + rd_kafka_topic_partition_list_str( + subscribed_topics, subscribed_topics_str, + sizeof(subscribed_topics_str), 0); + } + rd_rkb_dbg(rkb, CGRP, "SHAREHEARTBEAT", + "ShareGroupHeartbeat of group id \"%s\", " + "member id \"%s\", member epoch %d, rack id \"%s\"" + ", subscribed topics \"%s\"", + group_id ? group_id->str : "NULL", + member_id ? member_id->str : "NULL", member_epoch, + rack_id ? rack_id->str : "NULL", + subscribed_topics_str); + } + + if (group_id) + rkbuf_size += RD_KAFKAP_STR_SIZE(group_id); + if (member_id) + rkbuf_size += RD_KAFKAP_STR_SIZE(member_id); + rkbuf_size += 4; /* MemberEpoch */ + if (rack_id) + rkbuf_size += RD_KAFKAP_STR_SIZE(rack_id); + if (subscribed_topics) { + rkbuf_size += + ((subscribed_topics->cnt * (4 + 50)) + 4 /* array size */); + } + + rkbuf = rd_kafka_buf_new_flexver_request( + rkb, RD_KAFKAP_ShareGroupHeartbeat, 1, rkbuf_size, rd_true); + + rd_kafka_buf_write_kstr(rkbuf, group_id); + rd_kafka_buf_write_kstr(rkbuf, member_id); + rd_kafka_buf_write_i32(rkbuf, member_epoch); + rd_kafka_buf_write_kstr(rkbuf, rack_id); + if (subscribed_topics) { + int topics_cnt = subscribed_topics->cnt; + + /* write Topics */ + rd_kafka_buf_write_arraycnt(rkbuf, topics_cnt); + while (--topics_cnt >= 0) { + if (rd_rkb_is_dbg(rkb, CGRP)) + rd_rkb_dbg( + rkb, CGRP, "SHAREHEARTBEAT", + "ShareGroupHeartbeat subscribed " + "topic %s", + subscribed_topics->elems[topics_cnt].topic); + rd_kafka_buf_write_str( + rkbuf, subscribed_topics->elems[topics_cnt].topic, + -1); + } + } else { + rd_kafka_buf_write_arraycnt(rkbuf, -1); + } + + rd_kafka_buf_ApiVersion_set(rkbuf, ApiVersion, features); + + rd_kafka_broker_buf_enq_replyq(rkb, rkbuf, replyq, resp_cb, opaque); +} /** * @brief Construct and send ListGroupsRequest to \p rkb diff --git a/src/rdkafka_request.h b/src/rdkafka_request.h index c508ffdaaf..12b2bcbd5f 100644 --- a/src/rdkafka_request.h +++ b/src/rdkafka_request.h @@ -400,6 +400,17 @@ void rd_kafka_ConsumerGroupHeartbeatRequest( rd_kafka_resp_cb_t *resp_cb, void *opaque); +void rd_kafka_ShareGroupHeartbeatRequest( + rd_kafka_broker_t *rkb, + const rd_kafkap_str_t *group_id, + const rd_kafkap_str_t *member_id, + int32_t member_epoch, + const rd_kafkap_str_t *rack_id, + const rd_kafka_topic_partition_list_t *subscribed_topics, + rd_kafka_replyq_t replyq, + rd_kafka_resp_cb_t *resp_cb, + void *opaque); + rd_kafka_resp_err_t rd_kafka_MetadataRequest(rd_kafka_broker_t *rkb, const rd_list_t *topics, rd_list_t *topic_ids, diff --git a/tests/0154-share_consumer.c b/tests/0154-share_consumer.c new file mode 100644 index 0000000000..d906eaf836 --- /dev/null +++ b/tests/0154-share_consumer.c @@ -0,0 +1,71 @@ +/* + * librdkafka - Apache Kafka C library + * + * Copyright (c) 2025, Confluent Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "test.h" + +int main_0154_share_consumer(int argc, char **argv) { + char errstr[512]; + rd_kafka_conf_t *conf; + rd_kafka_t *rk; + rd_kafka_topic_partition_list_t *topics; + char *topic = "test-topic";// test_mk_topic_name("0154-share-consumer", 0); + char *group = "test-group-0"; + + test_create_topic_wait_exists(NULL, topic, 3, -1, 60 * 1000); + rd_sleep(5); + + test_produce_msgs_easy(topic, 0, 0, 2); + + TEST_SAY("Creating share consumer\n"); + test_conf_init(&conf, NULL, 60); + rd_kafka_conf_set(conf, "share.consumer", "true", NULL, 0); + rd_kafka_conf_set(conf, "group.protocol", "consumer", NULL, 0); + rd_kafka_conf_set(conf, "group.id", group, NULL, 0); + rd_kafka_conf_set(conf, "debug", "cgrp,protocol,conf", NULL, 0); + + // rk = rd_kafka_share_consumer_new(conf, errstr, sizeof(errstr)); + rk = rd_kafka_new(RD_KAFKA_CONSUMER, conf, errstr, sizeof(errstr)); + if (!rk) { + TEST_FAIL("Failed to create share consumer: %s\n", errstr); + } + + topics = rd_kafka_topic_partition_list_new(1); + rd_kafka_topic_partition_list_add(topics, topic, RD_KAFKA_PARTITION_UA); + rd_kafka_subscribe(rk, topics); + rd_kafka_topic_partition_list_destroy(topics); + + TEST_SAY("Share consumer created successfully\n"); + + rd_kafka_consumer_poll(rk, 65000); + + TEST_SAY("Destroying consumer\n"); + + /* Clean up */ + rd_kafka_destroy(rk); + return 0; +} diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index e509092873..b5e6f0bfaf 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -143,6 +143,7 @@ set( 0151-purge-brokers.c 0152-rebootstrap.c 0153-memberid.c + 0154-share-consumer.c 8000-idle.cpp 8001-fetch_from_follower_mock_manual.c test.c diff --git a/tests/broker_version_tests.py b/tests/broker_version_tests.py index c451e02471..1a0de874b1 100755 --- a/tests/broker_version_tests.py +++ b/tests/broker_version_tests.py @@ -31,7 +31,7 @@ def test_it(version, deploy=True, conf={}, rdkconf={}, tests=None, """ cluster = LibrdkafkaTestCluster(version, conf, - num_brokers=int(conf.get('broker_cnt', 3)), + num_brokers=int(conf.get('broker_cnt', 1)), debug=debug, scenario=scenario, kraft=kraft) @@ -175,7 +175,7 @@ def handle_report(report, version, suite): '--brokers', dest='broker_cnt', type=int, - default=3, + default=1, help='Number of Kafka brokers') parser.add_argument('--ssl', dest='ssl', action='store_true', default=False, diff --git a/tests/interactive_broker_version.py b/tests/interactive_broker_version.py index acddc872fd..3f2ffe7aae 100755 --- a/tests/interactive_broker_version.py +++ b/tests/interactive_broker_version.py @@ -25,7 +25,7 @@ def version_as_number(version): def test_version(version, cmd=None, deploy=True, conf={}, debug=False, exec_cnt=1, - root_path='tmp', broker_cnt=3, scenario='default', + root_path='tmp', broker_cnt=1, scenario='default', kraft=False): """ @brief Create, deploy and start a Kafka cluster using Kafka \\p version diff --git a/tests/test.c b/tests/test.c index 86205dd5de..d0f9dafa68 100644 --- a/tests/test.c +++ b/tests/test.c @@ -270,6 +270,7 @@ _TEST_DECL(0150_telemetry_mock); _TEST_DECL(0151_purge_brokers_mock); _TEST_DECL(0152_rebootstrap_local); _TEST_DECL(0153_memberid); +_TEST_DECL(0154_share_consumer); /* Manual tests */ _TEST_DECL(8000_idle); @@ -536,6 +537,7 @@ struct test tests[] = { _TEST(0151_purge_brokers_mock, TEST_F_LOCAL), _TEST(0152_rebootstrap_local, TEST_F_LOCAL), _TEST(0153_memberid, 0, TEST_BRKVER(0, 4, 0, 0)), + _TEST(0154_share_consumer, 0, TEST_BRKVER(0, 4, 0, 0)), /* Manual tests */ diff --git a/win32/tests/tests.vcxproj b/win32/tests/tests.vcxproj index c00bc84418..03d5825219 100644 --- a/win32/tests/tests.vcxproj +++ b/win32/tests/tests.vcxproj @@ -233,6 +233,7 @@ + From aed60439d98f782b83ca405d1af1124ca8bb3bf5 Mon Sep 17 00:00:00 2001 From: Milind L Date: Mon, 6 Oct 2025 04:59:06 +0000 Subject: [PATCH 02/37] Add broker-op for share fetching --- src/rdkafka_op.c | 2 ++ src/rdkafka_op.h | 2 ++ 2 files changed, 4 insertions(+) diff --git a/src/rdkafka_op.c b/src/rdkafka_op.c index 5dbbf9c9d4..469128cf9e 100644 --- a/src/rdkafka_op.c +++ b/src/rdkafka_op.c @@ -123,6 +123,7 @@ const char *rd_kafka_op2str(rd_kafka_op_type_t type) { [RD_KAFKA_OP_TERMINATE_TELEMETRY] = "REPLY:RD_KAFKA_OP_TERMINATE_TELEMETRY", [RD_KAFKA_OP_ELECTLEADERS] = "REPLY:ELECTLEADERS", + [RD_KAFKA_OP_SHARE_FETCH] = "REPLY:SHARE_FETCH", }; if (type & RD_KAFKA_OP_REPLY) @@ -287,6 +288,7 @@ rd_kafka_op_t *rd_kafka_op_new0(const char *source, rd_kafka_op_type_t type) { sizeof(rko->rko_u.telemetry_broker), [RD_KAFKA_OP_TERMINATE_TELEMETRY] = _RD_KAFKA_OP_EMPTY, [RD_KAFKA_OP_ELECTLEADERS] = sizeof(rko->rko_u.admin_request), + [RD_KAFKA_OP_SHARE_FETCH] = _RD_KAFKA_OP_EMPTY, }; size_t tsize = op2size[type & ~RD_KAFKA_OP_FLAGMASK]; diff --git a/src/rdkafka_op.h b/src/rdkafka_op.h index e79309aa02..d75a0f23d1 100644 --- a/src/rdkafka_op.h +++ b/src/rdkafka_op.h @@ -189,6 +189,8 @@ typedef enum { RD_KAFKA_OP_ELECTLEADERS, /**< Admin: * ElectLeaders * u.admin_request */ + RD_KAFKA_OP_SHARE_FETCH, /**< broker op: Issue share fetch request if + applicable. */ RD_KAFKA_OP__END } rd_kafka_op_type_t; From 24acb72692087fc72069bea0b0d4dd54856b5758 Mon Sep 17 00:00:00 2001 From: Milind L Date: Mon, 13 Oct 2025 07:39:27 +0000 Subject: [PATCH 03/37] Add fields to op --- src/rdkafka_op.c | 6 +++++- src/rdkafka_op.h | 10 ++++++++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/src/rdkafka_op.c b/src/rdkafka_op.c index 469128cf9e..db325b11c3 100644 --- a/src/rdkafka_op.c +++ b/src/rdkafka_op.c @@ -288,7 +288,7 @@ rd_kafka_op_t *rd_kafka_op_new0(const char *source, rd_kafka_op_type_t type) { sizeof(rko->rko_u.telemetry_broker), [RD_KAFKA_OP_TERMINATE_TELEMETRY] = _RD_KAFKA_OP_EMPTY, [RD_KAFKA_OP_ELECTLEADERS] = sizeof(rko->rko_u.admin_request), - [RD_KAFKA_OP_SHARE_FETCH] = _RD_KAFKA_OP_EMPTY, + [RD_KAFKA_OP_SHARE_FETCH] = sizeof(rko->rko_u.share_fetch), }; size_t tsize = op2size[type & ~RD_KAFKA_OP_FLAGMASK]; @@ -509,6 +509,10 @@ void rd_kafka_op_destroy(rd_kafka_op_t *rko) { rd_kafka_broker_destroy); break; + case RD_KAFKA_OP_SHARE_FETCH: + /* TODO KIP-932: Add destruction code. */ + break; + default: break; } diff --git a/src/rdkafka_op.h b/src/rdkafka_op.h index d75a0f23d1..58a248aaf1 100644 --- a/src/rdkafka_op.h +++ b/src/rdkafka_op.h @@ -726,6 +726,16 @@ struct rd_kafka_op_s { void (*cb)(rd_kafka_t *rk, void *rkb); } terminated; + struct { + /** Whether this broker should share-fetch nonzero + * messages. */ + rd_bool_t should_fetch; + + /** Absolute timeout left to complete this share-fetch. + */ + rd_ts_t abs_timeout; + } share_fetch; + } rko_u; }; From 847d04bb6f4b7b90f19547a4db5293824a35e9df Mon Sep 17 00:00:00 2001 From: Milind L Date: Fri, 17 Oct 2025 05:04:25 +0000 Subject: [PATCH 04/37] Add preliminary polling --- examples/consumer.c | 19 ++- src/rdkafka.c | 287 ++++++++++++++++++++++++++++++++++++++++++- src/rdkafka.h | 11 +- src/rdkafka_broker.c | 11 ++ src/rdkafka_broker.h | 6 + src/rdkafka_cgrp.h | 5 + src/rdkafka_int.h | 4 + src/rdkafka_op.c | 13 +- src/rdkafka_op.h | 15 +++ 9 files changed, 359 insertions(+), 12 deletions(-) diff --git a/examples/consumer.c b/examples/consumer.c index 8f33edd69c..04ff0e43c7 100644 --- a/examples/consumer.c +++ b/examples/consumer.c @@ -165,8 +165,8 @@ int main(int argc, char **argv) { } - if (rd_kafka_conf_set(conf, "debug", "cgrp", errstr, - sizeof(errstr)) != RD_KAFKA_CONF_OK) { + if (rd_kafka_conf_set(conf, "debug", "cgrp", errstr, sizeof(errstr)) != + RD_KAFKA_CONF_OK) { fprintf(stderr, "%s\n", errstr); rd_kafka_conf_destroy(conf); return 1; @@ -237,9 +237,20 @@ int main(int argc, char **argv) { * Start polling for messages. */ while (run) { - rd_kafka_message_t *rkm; + rd_kafka_message_t *rkm = NULL; + + // rkm = rd_kafka_consumer_poll(rk, 100); + rd_kafka_error_t *error; + + // fprintf(stderr, "Calling consume_batch\n"); + error = rd_kafka_share_consume_batch(rk, 1000, NULL, NULL); + if (error) { + fprintf(stderr, "%% Consume error: %s\n", + rd_kafka_error_string(error)); + rd_kafka_error_destroy(error); + continue; + } - rkm = rd_kafka_consumer_poll(rk, 100); if (!rkm) continue; /* Timeout: no message within 100ms, * try again. This short timeout allows diff --git a/src/rdkafka.c b/src/rdkafka.c index 0904bb77ac..a040da0411 100644 --- a/src/rdkafka.c +++ b/src/rdkafka.c @@ -2839,8 +2839,9 @@ rd_kafka_t *rd_kafka_new(rd_kafka_type_t type, return NULL; } -rd_kafka_t *rd_kafka_share_consumer_new( - rd_kafka_conf_t *conf, char *errstr, size_t errstr_size) { +rd_kafka_t *rd_kafka_share_consumer_new(rd_kafka_conf_t *conf, + char *errstr, + size_t errstr_size) { rd_kafka_t *rk; char errstr_internal[512]; rd_kafka_conf_res_t res; @@ -2853,7 +2854,7 @@ rd_kafka_t *rd_kafka_share_consumer_new( } res = rd_kafka_conf_set(conf, "share.consumer", "true", errstr_internal, - sizeof(errstr_internal)); + sizeof(errstr_internal)); if (res != RD_KAFKA_CONF_OK) { rd_snprintf(errstr, errstr_size, "rd_kafka_share_consumer_new(): " @@ -2863,8 +2864,8 @@ rd_kafka_t *rd_kafka_share_consumer_new( } - res = rd_kafka_conf_set(conf, "group.protocol", "consumer", errstr_internal, - sizeof(errstr_internal)); + res = rd_kafka_conf_set(conf, "group.protocol", "consumer", + errstr_internal, sizeof(errstr_internal)); if (res != RD_KAFKA_CONF_OK) { rd_snprintf(errstr, errstr_size, "rd_kafka_share_consumer_new(): " @@ -2882,6 +2883,271 @@ rd_kafka_t *rd_kafka_share_consumer_new( return rk; } + +/** + * @locality main thread + * @locks none + */ +static rd_kafka_broker_t *rd_kafka_share_select_broker(rd_kafka_t *rk, + rd_kafka_cgrp_t *rkcg) { + rd_kafka_broker_t *selected_rkb = NULL; + rd_kafka_topic_partition_list_t *partitions = + rkcg->rkcg_current_assignment; + int32_t broker_id = -1; + size_t i; + rd_kafka_topic_partition_t *partition; + + + if (!partitions || partitions->cnt == 0) { + rd_kafka_dbg(rk, CGRP, "SHARE", + "No partitions assigned to consumer, " + "cannot select broker for share fetch"); + return NULL; + } + + /* Look through all partitions in order, find the first one which + * has a leader. */ + rd_kafka_dbg(rk, CGRP, "SHARE", + "Selecting broker for share fetch from %d assigned " + "partitions, offset = %d", + partitions->cnt, rkcg->rkcg_share.last_partition_picked); + + for (i = 0; i < partitions->cnt && !selected_rkb; i++) { + rd_kafka_toppar_t *rktp; + rkcg->rkcg_share.last_partition_picked += 1; + if (rkcg->rkcg_share.last_partition_picked >= partitions->cnt) + rkcg->rkcg_share.last_partition_picked = 0; + partition = + &partitions->elems[rkcg->rkcg_share.last_partition_picked]; + + rktp = rd_kafka_toppar_get2(rk, partition->topic, + partition->partition, 0, 1); + + /* Criteria to choose a broker: + * 1. It should be the leader of a partition. + * 2. A share-fetch op must not already be enqueued on it. */ + if (rktp->rktp_leader) { + /* TODO: We're only going to access + * rkb_share_fetch_enqueued from the main thread, except + * when it's being calloc'd and destroyed. Is it safe to + * access it without a lock? */ + rd_kafka_broker_lock(rktp->rktp_leader); + if (!rktp->rktp_leader->rkb_share_fetch_enqueued) { + rd_kafka_broker_keep(rktp->rktp_leader); + selected_rkb = rktp->rktp_leader; + } + rd_kafka_broker_unlock(rktp->rktp_leader); + } + + rd_kafka_toppar_destroy(rktp); + } + + rd_kafka_dbg(rk, CGRP, "SHARE", "Selected broker %s for share fetch", + selected_rkb ? rd_kafka_broker_name(selected_rkb) + : "none"); + return selected_rkb; +} + +rd_kafka_op_res_t rd_kafka_share_fetch_reply_op(rd_kafka_t *rk, + rd_kafka_op_t *rko) { + rd_kafka_assert(rk, thrd_is_current(rk->rk_thread)); + rd_kafka_dbg( + rk, TOPIC, "SHAREFETCH", + "Fetch share reply: %s, should_fetch=%d, broker=%s", + rd_kafka_err2str(rko->rko_err), rko->rko_u.share_fetch.should_fetch, + rko->rko_u.share_fetch.target_broker + ? rd_kafka_broker_name(rko->rko_u.share_fetch.target_broker) + : "none"); + + rd_kafka_broker_lock(rko->rko_u.share_fetch.target_broker); + rko->rko_u.share_fetch.target_broker->rkb_share_fetch_enqueued = + rd_false; + rd_kafka_broker_unlock(rko->rko_u.share_fetch.target_broker); + + if (rko->rko_error) { + rd_kafka_dbg(rk, CGRP, "SHARE", "Share fetch failed: %s", + rd_kafka_error_string(rko->rko_error)); + /* Retry fetching if there is any amount of time left. */ + // if (rko->rko_u.share_fetch.should_fetch && + // rd_timeout_remains(rko->rko_u.share_fetch.abs_timeout)) { + + // } + } + + return RD_KAFKA_OP_RES_HANDLED; +} +/** + * @locality main thread + */ +rd_kafka_op_res_t rd_kafka_share_fetch_fanout_op(rd_kafka_t *rk, + rd_kafka_q_t *rkq, + rd_kafka_op_t *rko) { + rd_kafka_broker_t *rkb, *selected_rkb = NULL; + rd_kafka_error_t *error; + rd_kafka_cgrp_t *rkcg = rd_kafka_cgrp_get(rk); + rd_kafka_op_t *reply_rko; + + /* We should never have enqueued this op if we didn't have the rkcg */ + rd_assert(rkcg); + + if (!(selected_rkb = rd_kafka_share_select_broker(rk, rkcg))) { + error = + rd_kafka_error_new(RD_KAFKA_RESP_ERR__STATE, + "rd_kafka_share_consume_batch(): " + "No broker available for share fetch"); + + reply_rko = rd_kafka_op_new(RD_KAFKA_OP_SHARE_FETCH_FANOUT | + RD_KAFKA_OP_REPLY); + reply_rko->rko_error = error; + rd_kafka_replyq_enq(&rko->rko_replyq, reply_rko, 0); + return RD_KAFKA_OP_RES_HANDLED; + } + + /* Issue fetch requests to all brokers */ + rd_kafka_dbg(rk, CGRP, "SHARE", + "Selected broker %s for share fetch, issuing " + "fetch requests to all %d brokers", + rd_kafka_broker_name(selected_rkb), + rd_atomic32_get(&rk->rk_broker_cnt)); + + rd_kafka_rdlock(rk); + TAILQ_FOREACH(rkb, &rk->rk_brokers, rkb_link) { + rd_kafka_op_t *rko; + + if (rd_kafka_broker_or_instance_terminating(rkb) || + RD_KAFKA_BROKER_IS_LOGICAL(rkb)) { + rd_kafka_dbg(rk, CGRP, "SHARE", + "Skipping broker %s for share fetch: " + "terminating or logical", + rd_kafka_broker_name(rkb)); + continue; + } + + rd_kafka_broker_lock(rkb); + if (rkb->rkb_share_fetch_enqueued) { + rd_kafka_broker_unlock(rkb); + rd_kafka_dbg(rk, CGRP, "SHARE", + "Unable to enqueue op on broker %s " + "because another op is already pending.", + rd_kafka_broker_name(rkb)); + continue; + } + rkb->rkb_share_fetch_enqueued = rd_true; + rd_kafka_broker_unlock(rkb); + + rko = rd_kafka_op_new(RD_KAFKA_OP_SHARE_FETCH); + rko->rko_u.share_fetch.abs_timeout = + rko->rko_u.share_fetch_fanout.abs_timeout; + rko->rko_u.share_fetch.should_fetch = (rkb == selected_rkb); + rd_kafka_broker_keep(rkb); + rko->rko_u.share_fetch.target_broker = rkb; + rko->rko_replyq = RD_KAFKA_REPLYQ(rk->rk_ops, 0); + + rd_kafka_dbg(rk, CGRP, "SHAREFETCH", + "Enqueuing share fetch op on broker %s " + "(%sfetch)", + rd_kafka_broker_name(rkb), + rko->rko_u.share_fetch.should_fetch ? "" : "no-"); + rd_kafka_q_enq(rkb->rkb_ops, rko); + } + rd_kafka_rdunlock(rk); + + RD_IF_FREE(selected_rkb, rd_kafka_broker_destroy); + + return RD_KAFKA_OP_RES_HANDLED; +} + +rd_kafka_error_t * +rd_kafka_share_consume_batch(rd_kafka_t *rk, + int timeout_ms, + rd_kafka_message_t ***rkmessages /* out */, + size_t *rkmessages_size /* out */) { + rd_kafka_cgrp_t *rkcg; + rd_kafka_q_t *rkq; + rd_kafka_op_t *rko; + rd_ts_t now = rd_clock(); + rd_ts_t abs_timeout = rd_timeout_init0(now, timeout_ms); + int cnt = 0; + + if (!RD_KAFKA_IS_SHARE_CONSUMER(rk)) + return rd_kafka_error_new(RD_KAFKA_RESP_ERR__INVALID_ARG, + "rd_kafka_share_consume_batch(): " + "rk is not a shared consumer"); + + // Steps: + // 0. Check if we're a part of consumer group yet or not + // 1. Check if there's anything still in the fetch queue - and if so, + // don't issue fetch ops + // 2. If not, select broker for share fetch and issue fetch ops to all + // brokers + // 3. Wait on consumer group queue for either messages or errors + + if (unlikely(!(rkcg = rd_kafka_cgrp_get(rk)))) + return rd_kafka_error_new(RD_KAFKA_RESP_ERR__STATE, + "rd_kafka_share_consume_batch(): " + "Consumer group not initialized"); + + rd_kafka_app_poll_start(rk, rkcg->rkcg_q, now, timeout_ms); + + /* If we have any pending items on the consumer queue, don't issue new + * requests, rather, deal with them first. */ + if (likely(rd_kafka_q_len(rkcg->rkcg_q) == 0)) { + rd_kafka_dbg(rk, CGRP, "SHARE", + "Issuing share fetch fanout to main thread"); + rd_kafka_op_t *fanout_rko = + rd_kafka_op_new_cb(rk, RD_KAFKA_OP_SHARE_FETCH_FANOUT, + rd_kafka_share_fetch_fanout_op); + fanout_rko->rko_u.share_fetch_fanout.abs_timeout = abs_timeout; + fanout_rko->rko_u.share_fetch_fanout.is_retry = rd_false; + fanout_rko->rko_replyq = RD_KAFKA_REPLYQ(rk->rk_ops, 0); + + rd_kafka_q_enq(rk->rk_ops, fanout_rko); + } + + while ((rko = rd_kafka_q_pop(rkcg->rkcg_q, + rd_timeout_remains_us(abs_timeout), 0))) { + rd_kafka_op_res_t res; + + cnt++; + res = rd_kafka_poll_cb(rk, rkcg->rkcg_q, rko, + RD_KAFKA_Q_CB_RETURN, NULL); + /* Ignore anything that's not PASS or YIELD, as it's handled + * already. */ + if (res == RD_KAFKA_OP_RES_YIELD) { + // TODO: Finish handling yields. + break; + } + + if (res != RD_KAFKA_OP_RES_PASS) + continue; + + switch (rko->rko_type) { + case RD_KAFKA_OP_FETCH: + /* Messages - for now, ignore. */ + rd_kafka_dbg( + rk, CGRP, "SHARE", + "Ignoring msg at offset %d from share fetch", + rd_kafka_message_get(rko)->offset); + rd_kafka_op_destroy(rko); + break; + + default: + rd_kafka_log(rk, LOG_WARNING, "SHARE", + "Ignoring unexpected op %s in " + "rd_kafka_share_consume_batch()", + rd_kafka_op2str(rko->rko_type)); + rd_kafka_op_destroy(rko); + break; + } + } + + rd_kafka_dbg(rk, CGRP, "SHARE", + "Returning from share consume batch after %d ops", cnt); + rd_kafka_app_polled(rk, rkcg->rkcg_q); + + return NULL; +} + /** * Schedules a rebootstrap of the cluster immediately. * @@ -4331,6 +4597,17 @@ rd_kafka_op_res_t rd_kafka_poll_cb(rd_kafka_t *rk, res = rd_kafka_metadata_update_op(rk, rko->rko_u.metadata.mdi); break; + case RD_KAFKA_OP_SHARE_FETCH | RD_KAFKA_OP_REPLY: + res = rd_kafka_share_fetch_reply_op(rk, rko); + break; + + case RD_KAFKA_OP_SHARE_FETCH_FANOUT | RD_KAFKA_OP_REPLY: + rd_kafka_assert(rk, thrd_is_current(rk->rk_thread)); + rd_kafka_dbg(rk, TOPIC, "SHAREFETCH", + "Fetch share fanout reply: %s", + rd_kafka_err2str(rko->rko_err)); + break; + default: /* If op has a callback set (e.g., OAUTHBEARER_REFRESH), * call it. */ diff --git a/src/rdkafka.h b/src/rdkafka.h index e137c01804..a214bf2d5c 100644 --- a/src/rdkafka.h +++ b/src/rdkafka.h @@ -3058,7 +3058,16 @@ rd_kafka_t *rd_kafka_new(rd_kafka_type_t type, RD_EXPORT -rd_kafka_t *rd_kafka_share_consumer_new(rd_kafka_conf_t *conf, char *errstr, size_t errstr_size); +rd_kafka_t *rd_kafka_share_consumer_new(rd_kafka_conf_t *conf, + char *errstr, + size_t errstr_size); + +RD_EXPORT +rd_kafka_error_t * +rd_kafka_share_consume_batch(rd_kafka_t *rk, + int timeout_ms, + rd_kafka_message_t ***rkmessages /* out */, + size_t *rkmessages_size /* out */); /** * @brief Destroy Kafka handle. diff --git a/src/rdkafka_broker.c b/src/rdkafka_broker.c index eb8e849240..5f7f8d59f4 100644 --- a/src/rdkafka_broker.c +++ b/src/rdkafka_broker.c @@ -3553,6 +3553,17 @@ rd_kafka_broker_op_serve(rd_kafka_broker_t *rkb, rd_kafka_op_t *rko) { wakeup = rd_true; break; + case RD_KAFKA_OP_SHARE_FETCH: + rd_rkb_dbg(rkb, CGRP, "SHAREFETCH", + "Received SHARE_FETCH op for broker %s with " + "should_fetch = %d", + rd_kafka_broker_name(rkb), + rko->rko_u.share_fetch.should_fetch); + /* This is only temporary handling for testing to avoid crashing + * on assert - the code below will automatically enqueue a + * reply which is not the final behaviour. */ + break; + default: rd_kafka_assert(rkb->rkb_rk, !*"unhandled op type"); break; diff --git a/src/rdkafka_broker.h b/src/rdkafka_broker.h index a649b7445e..b8f7cb6e21 100644 --- a/src/rdkafka_broker.h +++ b/src/rdkafka_broker.h @@ -387,6 +387,12 @@ struct rd_kafka_broker_s { /* rd_kafka_broker_t */ /** > 0 if this broker thread is terminating */ rd_atomic32_t termination_in_progress; + + /** + * Whether a share fetch should_fetch set is enqueued on + * this broker's op queue or not. + */ + rd_bool_t rkb_share_fetch_enqueued; }; #define rd_kafka_broker_keep(rkb) rd_refcnt_add(&(rkb)->rkb_refcnt) diff --git a/src/rdkafka_cgrp.h b/src/rdkafka_cgrp.h index 79a734f5fb..020fbb2df6 100644 --- a/src/rdkafka_cgrp.h +++ b/src/rdkafka_cgrp.h @@ -377,6 +377,11 @@ typedef struct rd_kafka_cgrp_s { /* Timestamp of last rebalance start */ rd_ts_t rkcg_ts_rebalance_start; + struct { + size_t last_partition_picked; /* For round-robin + * partition picking */ + + } rkcg_share; } rd_kafka_cgrp_t; diff --git a/src/rdkafka_int.h b/src/rdkafka_int.h index 9e97bf9dca..ed2a0d6bce 100644 --- a/src/rdkafka_int.h +++ b/src/rdkafka_int.h @@ -1259,4 +1259,8 @@ int rd_kafka_rebootstrap_tmr_stop(rd_kafka_t *rk); void rd_kafka_reset_any_broker_down_reported(rd_kafka_t *rk); +rd_kafka_op_res_t rd_kafka_share_fetch_fanout_op(rd_kafka_t *rk, + rd_kafka_q_t *rkq, + rd_kafka_op_t *rko); + #endif /* _RDKAFKA_INT_H_ */ diff --git a/src/rdkafka_op.c b/src/rdkafka_op.c index db325b11c3..f5a053abe2 100644 --- a/src/rdkafka_op.c +++ b/src/rdkafka_op.c @@ -122,8 +122,9 @@ const char *rd_kafka_op2str(rd_kafka_op_type_t type) { "REPLY:RD_KAFKA_OP_SET_TELEMETRY_BROKER", [RD_KAFKA_OP_TERMINATE_TELEMETRY] = "REPLY:RD_KAFKA_OP_TERMINATE_TELEMETRY", - [RD_KAFKA_OP_ELECTLEADERS] = "REPLY:ELECTLEADERS", - [RD_KAFKA_OP_SHARE_FETCH] = "REPLY:SHARE_FETCH", + [RD_KAFKA_OP_ELECTLEADERS] = "REPLY:ELECTLEADERS", + [RD_KAFKA_OP_SHARE_FETCH] = "REPLY:SHARE_FETCH", + [RD_KAFKA_OP_SHARE_FETCH_FANOUT] = "REPLY:SHARE_FETCH_FANOUT", }; if (type & RD_KAFKA_OP_REPLY) @@ -289,6 +290,8 @@ rd_kafka_op_t *rd_kafka_op_new0(const char *source, rd_kafka_op_type_t type) { [RD_KAFKA_OP_TERMINATE_TELEMETRY] = _RD_KAFKA_OP_EMPTY, [RD_KAFKA_OP_ELECTLEADERS] = sizeof(rko->rko_u.admin_request), [RD_KAFKA_OP_SHARE_FETCH] = sizeof(rko->rko_u.share_fetch), + [RD_KAFKA_OP_SHARE_FETCH_FANOUT] = + sizeof(rko->rko_u.share_fetch_fanout), }; size_t tsize = op2size[type & ~RD_KAFKA_OP_FLAGMASK]; @@ -511,6 +514,12 @@ void rd_kafka_op_destroy(rd_kafka_op_t *rko) { case RD_KAFKA_OP_SHARE_FETCH: /* TODO KIP-932: Add destruction code. */ + RD_IF_FREE(rko->rko_u.share_fetch.target_broker, + rd_kafka_broker_destroy); + break; + + case RD_KAFKA_OP_SHARE_FETCH_FANOUT: + /* No dynamic resources to clean up */ break; default: diff --git a/src/rdkafka_op.h b/src/rdkafka_op.h index 58a248aaf1..1cf70e3197 100644 --- a/src/rdkafka_op.h +++ b/src/rdkafka_op.h @@ -191,6 +191,8 @@ typedef enum { * u.admin_request */ RD_KAFKA_OP_SHARE_FETCH, /**< broker op: Issue share fetch request if applicable. */ + RD_KAFKA_OP_SHARE_FETCH_FANOUT, /**< fanout share fetch operation */ + RD_KAFKA_OP__END } rd_kafka_op_type_t; @@ -734,8 +736,21 @@ struct rd_kafka_op_s { /** Absolute timeout left to complete this share-fetch. */ rd_ts_t abs_timeout; + + /** Target broker to which op is sent. */ + rd_kafka_broker_t *target_broker; } share_fetch; + struct { + /** Absolute timeout for share fetch fanout operation. + */ + rd_ts_t abs_timeout; + + /** Is this a retry, or the first attempt of this poll? + * Retries only have should_fetch=true request sent. */ + rd_bool_t is_retry; + } share_fetch_fanout; + } rko_u; }; From 8cfb42832fdeaa475906687cc24dad9cfca2ffe8 Mon Sep 17 00:00:00 2001 From: Milind L Date: Fri, 17 Oct 2025 14:58:50 +0000 Subject: [PATCH 05/37] Start returning messages to user --- examples/consumer.c | 76 ++++++++++++++++++++++----------------------- src/rdkafka.c | 65 +++++++++----------------------------- src/rdkafka.h | 11 ++++++- 3 files changed, 63 insertions(+), 89 deletions(-) diff --git a/examples/consumer.c b/examples/consumer.c index 04ff0e43c7..c1abc50e61 100644 --- a/examples/consumer.c +++ b/examples/consumer.c @@ -236,14 +236,19 @@ int main(int argc, char **argv) { * since a rebalance may happen at any time. * Start polling for messages. */ + rd_kafka_message_t **rkmessages = + (rd_kafka_message_t **)malloc(sizeof(rd_kafka_message_t *) * 100); while (run) { rd_kafka_message_t *rkm = NULL; + size_t rcvd_msgs = 0; + int i; // rkm = rd_kafka_consumer_poll(rk, 100); rd_kafka_error_t *error; // fprintf(stderr, "Calling consume_batch\n"); - error = rd_kafka_share_consume_batch(rk, 1000, NULL, NULL); + error = rd_kafka_share_consume_batch(rk, 1000, rkmessages, + &rcvd_msgs); if (error) { fprintf(stderr, "%% Consume error: %s\n", rd_kafka_error_string(error)); @@ -251,45 +256,40 @@ int main(int argc, char **argv) { continue; } - if (!rkm) - continue; /* Timeout: no message within 100ms, - * try again. This short timeout allows - * checking for `run` at frequent intervals. - */ - - /* consumer_poll() will return either a proper message - * or a consumer error (rkm->err is set). */ - if (rkm->err) { - /* Consumer errors are generally to be considered - * informational as the consumer will automatically - * try to recover from all types of errors. */ - fprintf(stderr, "%% Consumer error: %s\n", - rd_kafka_message_errstr(rkm)); + fprintf(stderr, "%% Received %zu messages\n", rcvd_msgs); + for (i = 0; i < (int)rcvd_msgs; i++) { + rkm = rkmessages[i]; + + if (rkm->err) { + fprintf(stderr, "%% Consumer error: %d: %s\n", + rkm->err, rd_kafka_message_errstr(rkm)); + rd_kafka_message_destroy(rkm); + continue; + } + + /* Proper message. */ + printf("Message on %s [%" PRId32 "] at offset %" PRId64 + " (leader epoch %" PRId32 "):\n", + rd_kafka_topic_name(rkm->rkt), rkm->partition, + rkm->offset, rd_kafka_message_leader_epoch(rkm)); + + /* Print the message key. */ + if (rkm->key && is_printable(rkm->key, rkm->key_len)) + printf(" Key: %.*s\n", (int)rkm->key_len, + (const char *)rkm->key); + else if (rkm->key) + printf(" Key: (%d bytes)\n", (int)rkm->key_len); + + /* Print the message value/payload. */ + if (rkm->payload && + is_printable(rkm->payload, rkm->len)) + printf(" Value: %.*s\n", (int)rkm->len, + (const char *)rkm->payload); + else if (rkm->payload) + printf(" Value: (%d bytes)\n", (int)rkm->len); + rd_kafka_message_destroy(rkm); - continue; } - - /* Proper message. */ - printf("Message on %s [%" PRId32 "] at offset %" PRId64 - " (leader epoch %" PRId32 "):\n", - rd_kafka_topic_name(rkm->rkt), rkm->partition, - rkm->offset, rd_kafka_message_leader_epoch(rkm)); - - /* Print the message key. */ - if (rkm->key && is_printable(rkm->key, rkm->key_len)) - printf(" Key: %.*s\n", (int)rkm->key_len, - (const char *)rkm->key); - else if (rkm->key) - printf(" Key: (%d bytes)\n", (int)rkm->key_len); - - /* Print the message value/payload. */ - if (rkm->payload && is_printable(rkm->payload, rkm->len)) - printf(" Value: %.*s\n", (int)rkm->len, - (const char *)rkm->payload); - else if (rkm->payload) - printf(" Value: (%d bytes)\n", (int)rkm->len); - - rd_kafka_message_destroy(rkm); } diff --git a/src/rdkafka.c b/src/rdkafka.c index a040da0411..0c47379d70 100644 --- a/src/rdkafka.c +++ b/src/rdkafka.c @@ -2893,6 +2893,7 @@ static rd_kafka_broker_t *rd_kafka_share_select_broker(rd_kafka_t *rk, rd_kafka_broker_t *selected_rkb = NULL; rd_kafka_topic_partition_list_t *partitions = rkcg->rkcg_current_assignment; + // rkcg->rkcg_toppars; /* TODO: use rkcg->rkcg_toppars instead. */ int32_t broker_id = -1; size_t i; rd_kafka_topic_partition_t *partition; @@ -2959,10 +2960,8 @@ rd_kafka_op_res_t rd_kafka_share_fetch_reply_op(rd_kafka_t *rk, ? rd_kafka_broker_name(rko->rko_u.share_fetch.target_broker) : "none"); - rd_kafka_broker_lock(rko->rko_u.share_fetch.target_broker); rko->rko_u.share_fetch.target_broker->rkb_share_fetch_enqueued = rd_false; - rd_kafka_broker_unlock(rko->rko_u.share_fetch.target_broker); if (rko->rko_error) { rd_kafka_dbg(rk, CGRP, "SHARE", "Share fetch failed: %s", @@ -3023,7 +3022,6 @@ rd_kafka_op_res_t rd_kafka_share_fetch_fanout_op(rd_kafka_t *rk, continue; } - rd_kafka_broker_lock(rkb); if (rkb->rkb_share_fetch_enqueued) { rd_kafka_broker_unlock(rkb); rd_kafka_dbg(rk, CGRP, "SHARE", @@ -3033,7 +3031,6 @@ rd_kafka_op_res_t rd_kafka_share_fetch_fanout_op(rd_kafka_t *rk, continue; } rkb->rkb_share_fetch_enqueued = rd_true; - rd_kafka_broker_unlock(rkb); rko = rd_kafka_op_new(RD_KAFKA_OP_SHARE_FETCH); rko->rko_u.share_fetch.abs_timeout = @@ -3060,14 +3057,15 @@ rd_kafka_op_res_t rd_kafka_share_fetch_fanout_op(rd_kafka_t *rk, rd_kafka_error_t * rd_kafka_share_consume_batch(rd_kafka_t *rk, int timeout_ms, - rd_kafka_message_t ***rkmessages /* out */, + rd_kafka_message_t **rkmessages /* out */, size_t *rkmessages_size /* out */) { rd_kafka_cgrp_t *rkcg; rd_kafka_q_t *rkq; rd_kafka_op_t *rko; - rd_ts_t now = rd_clock(); - rd_ts_t abs_timeout = rd_timeout_init0(now, timeout_ms); - int cnt = 0; + rd_ts_t now = rd_clock(); + rd_ts_t abs_timeout = rd_timeout_init0(now, timeout_ms); + int cnt = 0; + const size_t max_poll_records = 100; /* TODO: change. */ if (!RD_KAFKA_IS_SHARE_CONSUMER(rk)) return rd_kafka_error_new(RD_KAFKA_RESP_ERR__INVALID_ARG, @@ -3087,8 +3085,6 @@ rd_kafka_share_consume_batch(rd_kafka_t *rk, "rd_kafka_share_consume_batch(): " "Consumer group not initialized"); - rd_kafka_app_poll_start(rk, rkcg->rkcg_q, now, timeout_ms); - /* If we have any pending items on the consumer queue, don't issue new * requests, rather, deal with them first. */ if (likely(rd_kafka_q_len(rkcg->rkcg_q) == 0)) { @@ -3104,46 +3100,15 @@ rd_kafka_share_consume_batch(rd_kafka_t *rk, rd_kafka_q_enq(rk->rk_ops, fanout_rko); } - while ((rko = rd_kafka_q_pop(rkcg->rkcg_q, - rd_timeout_remains_us(abs_timeout), 0))) { - rd_kafka_op_res_t res; - - cnt++; - res = rd_kafka_poll_cb(rk, rkcg->rkcg_q, rko, - RD_KAFKA_Q_CB_RETURN, NULL); - /* Ignore anything that's not PASS or YIELD, as it's handled - * already. */ - if (res == RD_KAFKA_OP_RES_YIELD) { - // TODO: Finish handling yields. - break; - } - - if (res != RD_KAFKA_OP_RES_PASS) - continue; - - switch (rko->rko_type) { - case RD_KAFKA_OP_FETCH: - /* Messages - for now, ignore. */ - rd_kafka_dbg( - rk, CGRP, "SHARE", - "Ignoring msg at offset %d from share fetch", - rd_kafka_message_get(rko)->offset); - rd_kafka_op_destroy(rko); - break; - - default: - rd_kafka_log(rk, LOG_WARNING, "SHARE", - "Ignoring unexpected op %s in " - "rd_kafka_share_consume_batch()", - rd_kafka_op2str(rko->rko_type)); - rd_kafka_op_destroy(rko); - break; - } - } - - rd_kafka_dbg(rk, CGRP, "SHARE", - "Returning from share consume batch after %d ops", cnt); - rd_kafka_app_polled(rk, rkcg->rkcg_q); + /* At this point, there's no reason to deviate from what we already do + * for returning multiple messages to the user, as the orchestration + * is handled by the main thread. Later on, we needed, we might need + * a custom loop if we need any changes. */ + *rkmessages_size = rd_kafka_q_serve_rkmessages( + rkcg->rkcg_q, timeout_ms, /* Use this timeout directly as prior + operations aren't blocking, so no need to + re-convert the abs_timeout into a relative one*/ + rkmessages, max_poll_records); return NULL; } diff --git a/src/rdkafka.h b/src/rdkafka.h index a214bf2d5c..42f8bfdb9c 100644 --- a/src/rdkafka.h +++ b/src/rdkafka.h @@ -3062,11 +3062,20 @@ rd_kafka_t *rd_kafka_share_consumer_new(rd_kafka_conf_t *conf, char *errstr, size_t errstr_size); +/** + * @brief Consume a batch of messages from the share consumer instance. + * + * @param rk Share consumer instance. + * @param timeout_ms Maximum time to block waiting for messages. + * @param rkmessages Output array of messages - this must be preallocated with + * at least enough capacity for size max.poll.records. + * @param rkmessages_size Output number of messages returned in rkmessages. + */ RD_EXPORT rd_kafka_error_t * rd_kafka_share_consume_batch(rd_kafka_t *rk, int timeout_ms, - rd_kafka_message_t ***rkmessages /* out */, + rd_kafka_message_t **rkmessages /* out */, size_t *rkmessages_size /* out */); /** From e6b512a9e645c11e4f370db5132c400315e79873 Mon Sep 17 00:00:00 2001 From: Milind L Date: Wed, 22 Oct 2025 04:57:12 +0000 Subject: [PATCH 06/37] Add retry for share fetches --- src/rdkafka.c | 126 +++++++++++++++++++++++++++++-------------- src/rdkafka_broker.c | 11 ++++ src/rdkafka_op.c | 3 +- 3 files changed, 98 insertions(+), 42 deletions(-) diff --git a/src/rdkafka.c b/src/rdkafka.c index 0c47379d70..acb9351dd5 100644 --- a/src/rdkafka.c +++ b/src/rdkafka.c @@ -2950,31 +2950,83 @@ static rd_kafka_broker_t *rd_kafka_share_select_broker(rd_kafka_t *rk, } rd_kafka_op_res_t rd_kafka_share_fetch_reply_op(rd_kafka_t *rk, - rd_kafka_op_t *rko) { + rd_kafka_op_t *rko_orig) { rd_kafka_assert(rk, thrd_is_current(rk->rk_thread)); - rd_kafka_dbg( - rk, TOPIC, "SHAREFETCH", - "Fetch share reply: %s, should_fetch=%d, broker=%s", - rd_kafka_err2str(rko->rko_err), rko->rko_u.share_fetch.should_fetch, - rko->rko_u.share_fetch.target_broker - ? rd_kafka_broker_name(rko->rko_u.share_fetch.target_broker) - : "none"); - - rko->rko_u.share_fetch.target_broker->rkb_share_fetch_enqueued = + rd_kafka_dbg(rk, CGRP, "SHAREFETCH", + "Fetch share reply: %s, should_fetch=%d, broker=%s", + rd_kafka_err2str(rko_orig->rko_err), + rko_orig->rko_u.share_fetch.should_fetch, + rko_orig->rko_u.share_fetch.target_broker + ? rd_kafka_broker_name( + rko_orig->rko_u.share_fetch.target_broker) + : "none"); + + rko_orig->rko_u.share_fetch.target_broker->rkb_share_fetch_enqueued = rd_false; - if (rko->rko_error) { - rd_kafka_dbg(rk, CGRP, "SHARE", "Share fetch failed: %s", - rd_kafka_error_string(rko->rko_error)); - /* Retry fetching if there is any amount of time left. */ - // if (rko->rko_u.share_fetch.should_fetch && - // rd_timeout_remains(rko->rko_u.share_fetch.abs_timeout)) { - - // } + if (rko_orig->rko_error || rko_orig->rko_err) { + rd_kafka_dbg( + rk, CGRP, "SHARE", + "Share fetch failed: %s, timeout remains: %d, should-fetch " + "= %d", + rko_orig->rko_error + ? rd_kafka_error_name(rko_orig->rko_error) + : rd_kafka_err2name(rko_orig->rko_err), + rd_timeout_remains(rko_orig->rko_u.share_fetch.abs_timeout), + rko_orig->rko_u.share_fetch.should_fetch); + + /* Retry fetching if there is any amount of time left. For other + * errors, let them drop and be retried on the next poll. */ + /* TODO: KIP-932: Are there errors for which we should not + * retry, and rather propagate those errors to user? In that + * case, we must write to the rkcg queue. */ + if (rko_orig->rko_u.share_fetch.should_fetch && + rd_timeout_remains( + rko_orig->rko_u.share_fetch.abs_timeout)) { + + rd_kafka_op_t *rko = + rd_kafka_op_new(RD_KAFKA_OP_SHARE_FETCH); + rd_kafka_broker_t *rkb = + rko_orig->rko_u.share_fetch.target_broker; + rko->rko_u.share_fetch.abs_timeout = + rko_orig->rko_u.share_fetch.abs_timeout; + rko->rko_u.share_fetch.should_fetch = rd_true; + rd_kafka_broker_keep(rkb); + rko->rko_u.share_fetch.target_broker = rkb; + rko->rko_replyq = RD_KAFKA_REPLYQ(rk->rk_ops, 0); + + rd_kafka_dbg( + rk, CGRP, "SHAREFETCH", + "Retrying share fetch op on broker %s after eror" + "(%s fetch)", + rd_kafka_broker_name(rkb), + rko->rko_u.share_fetch.should_fetch ? "should" : "should not"); + + /* TODO: KIP-932: Are there errors where we should back + * this off by running it on a timer?*/ + rd_kafka_q_enq(rkb->rkb_ops, rko); + } } return RD_KAFKA_OP_RES_HANDLED; } + +rd_kafka_op_res_t +rd_kafka_share_fetch_fanout_reply_op(rd_kafka_t *rk, rd_kafka_op_t *rko_orig) { + rd_kafka_message_t *rkm; + rd_kafka_op_t *rko; + if (!rko_orig->rko_err && !rko_orig->rko_error) + return RD_KAFKA_OP_RES_HANDLED; + + /* TODO: KIP-932: Add error handling - either retries, or user-level + * propagation, later. */ + rd_kafka_dbg( + rk, CGRP, "SHARE", "Encountered error in SHARE_FETCH_FANOUT: %s", + rko_orig->rko_err ? rd_kafka_err2name(rko_orig->rko_err) + : rd_kafka_error_name(rko_orig->rko_error)); + return RD_KAFKA_OP_RES_HANDLED; +} + /** * @locality main thread */ @@ -3011,7 +3063,7 @@ rd_kafka_op_res_t rd_kafka_share_fetch_fanout_op(rd_kafka_t *rk, rd_kafka_rdlock(rk); TAILQ_FOREACH(rkb, &rk->rk_brokers, rkb_link) { - rd_kafka_op_t *rko; + rd_kafka_op_t *rko_sf; if (rd_kafka_broker_or_instance_terminating(rkb) || RD_KAFKA_BROKER_IS_LOGICAL(rkb)) { @@ -3032,20 +3084,22 @@ rd_kafka_op_res_t rd_kafka_share_fetch_fanout_op(rd_kafka_t *rk, } rkb->rkb_share_fetch_enqueued = rd_true; - rko = rd_kafka_op_new(RD_KAFKA_OP_SHARE_FETCH); - rko->rko_u.share_fetch.abs_timeout = + rko_sf = rd_kafka_op_new(RD_KAFKA_OP_SHARE_FETCH); + rko_sf->rko_u.share_fetch.abs_timeout = rko->rko_u.share_fetch_fanout.abs_timeout; - rko->rko_u.share_fetch.should_fetch = (rkb == selected_rkb); + rko_sf->rko_u.share_fetch.should_fetch = (rkb == selected_rkb); rd_kafka_broker_keep(rkb); - rko->rko_u.share_fetch.target_broker = rkb; - rko->rko_replyq = RD_KAFKA_REPLYQ(rk->rk_ops, 0); + rko_sf->rko_u.share_fetch.target_broker = rkb; + rko_sf->rko_replyq = RD_KAFKA_REPLYQ(rk->rk_ops, 0); rd_kafka_dbg(rk, CGRP, "SHAREFETCH", "Enqueuing share fetch op on broker %s " - "(%sfetch)", + "(%s fetch)", rd_kafka_broker_name(rkb), - rko->rko_u.share_fetch.should_fetch ? "" : "no-"); - rd_kafka_q_enq(rkb->rkb_ops, rko); + rko_sf->rko_u.share_fetch.should_fetch + ? "should" + : "should not"); + rd_kafka_q_enq(rkb->rkb_ops, rko_sf); } rd_kafka_rdunlock(rk); @@ -3072,14 +3126,6 @@ rd_kafka_share_consume_batch(rd_kafka_t *rk, "rd_kafka_share_consume_batch(): " "rk is not a shared consumer"); - // Steps: - // 0. Check if we're a part of consumer group yet or not - // 1. Check if there's anything still in the fetch queue - and if so, - // don't issue fetch ops - // 2. If not, select broker for share fetch and issue fetch ops to all - // brokers - // 3. Wait on consumer group queue for either messages or errors - if (unlikely(!(rkcg = rd_kafka_cgrp_get(rk)))) return rd_kafka_error_new(RD_KAFKA_RESP_ERR__STATE, "rd_kafka_share_consume_batch(): " @@ -3089,7 +3135,9 @@ rd_kafka_share_consume_batch(rd_kafka_t *rk, * requests, rather, deal with them first. */ if (likely(rd_kafka_q_len(rkcg->rkcg_q) == 0)) { rd_kafka_dbg(rk, CGRP, "SHARE", - "Issuing share fetch fanout to main thread"); + "Issuing share fetch fanout to main thread with " + "abs_timeout = %lld and now=%lld", + abs_timeout, now); rd_kafka_op_t *fanout_rko = rd_kafka_op_new_cb(rk, RD_KAFKA_OP_SHARE_FETCH_FANOUT, rd_kafka_share_fetch_fanout_op); @@ -3107,7 +3155,7 @@ rd_kafka_share_consume_batch(rd_kafka_t *rk, *rkmessages_size = rd_kafka_q_serve_rkmessages( rkcg->rkcg_q, timeout_ms, /* Use this timeout directly as prior operations aren't blocking, so no need to - re-convert the abs_timeout into a relative one*/ + re-convert the abs_timeout into a relative one.*/ rkmessages, max_poll_records); return NULL; @@ -4568,9 +4616,7 @@ rd_kafka_op_res_t rd_kafka_poll_cb(rd_kafka_t *rk, case RD_KAFKA_OP_SHARE_FETCH_FANOUT | RD_KAFKA_OP_REPLY: rd_kafka_assert(rk, thrd_is_current(rk->rk_thread)); - rd_kafka_dbg(rk, TOPIC, "SHAREFETCH", - "Fetch share fanout reply: %s", - rd_kafka_err2str(rko->rko_err)); + rd_kafka_share_fetch_fanout_reply_op(rk, rko); break; default: diff --git a/src/rdkafka_broker.c b/src/rdkafka_broker.c index 5f7f8d59f4..e130da7e96 100644 --- a/src/rdkafka_broker.c +++ b/src/rdkafka_broker.c @@ -3562,6 +3562,17 @@ rd_kafka_broker_op_serve(rd_kafka_broker_t *rkb, rd_kafka_op_t *rko) { /* This is only temporary handling for testing to avoid crashing * on assert - the code below will automatically enqueue a * reply which is not the final behaviour. */ + /* Insert errors randomly for testing, remove this code once + * actual errors can be tested via the mock broker. */ + // if (rd_jitter(0, 10) > 7) { + // rd_rkb_dbg(rkb, CGRP, "SHAREFETCH", + // "Injecting error! %s : %d", + // rd_kafka_broker_name(rkb), + // rko->rko_u.share_fetch.should_fetch); + + // rd_kafka_op_reply(rko, RD_KAFKA_RESP_ERR__STATE); + // rko = NULL; + // } break; default: diff --git a/src/rdkafka_op.c b/src/rdkafka_op.c index f5a053abe2..e15db3f35b 100644 --- a/src/rdkafka_op.c +++ b/src/rdkafka_op.c @@ -513,13 +513,12 @@ void rd_kafka_op_destroy(rd_kafka_op_t *rko) { break; case RD_KAFKA_OP_SHARE_FETCH: - /* TODO KIP-932: Add destruction code. */ RD_IF_FREE(rko->rko_u.share_fetch.target_broker, rd_kafka_broker_destroy); break; case RD_KAFKA_OP_SHARE_FETCH_FANOUT: - /* No dynamic resources to clean up */ + /* No heap-allocated resources to clean up */ break; default: From 98b5dfee1d71f1b76008626314823daafe6404c5 Mon Sep 17 00:00:00 2001 From: Milind L Date: Mon, 27 Oct 2025 05:53:43 +0000 Subject: [PATCH 07/37] Unify error handling and add records conf --- CONFIGURATION.md | 1 + examples/consumer.c | 2 +- src/rdkafka.c | 243 ++++++++++++++++++++++++++++---------------- src/rdkafka_cgrp.h | 7 +- src/rdkafka_conf.c | 6 +- src/rdkafka_conf.h | 5 +- src/rdkafka_int.h | 3 +- src/rdkafka_op.h | 4 - 8 files changed, 175 insertions(+), 96 deletions(-) diff --git a/CONFIGURATION.md b/CONFIGURATION.md index 40b7412efd..ddf795a9e2 100644 --- a/CONFIGURATION.md +++ b/CONFIGURATION.md @@ -154,6 +154,7 @@ offset_commit_cb | C | | enable.partition.eof | C | true, false | false | low | Emit RD_KAFKA_RESP_ERR__PARTITION_EOF event whenever the consumer reaches the end of a partition.
*Type: boolean* check.crcs | C | true, false | false | medium | Verify CRC32 of consumed messages, ensuring no on-the-wire or on-disk corruption to the messages occurred. This check comes at slightly increased CPU usage.
*Type: boolean* client.rack | * | | | low | A rack identifier for this client. This can be any string value which indicates where this client is physically located. It corresponds with the broker config `broker.rack`.
*Type: string* +max.poll.records | C | 1 .. 2147483647 | 500 | low | tba description,
*Type: integer* transactional.id | P | | | high | Enables the transactional producer. The transactional.id is used to identify the same transactional producer instance across process restarts. It allows the producer to guarantee that transactions corresponding to earlier instances of the same producer have been finalized prior to starting any new transactions, and that any zombie instances are fenced off. If no transactional.id is provided, then the producer is limited to idempotent delivery (if enable.idempotence is set). Requires broker version >= 0.11.0.
*Type: string* transaction.timeout.ms | P | 1000 .. 2147483647 | 60000 | medium | The maximum amount of time in milliseconds that the transaction coordinator will wait for a transaction status update from the producer before proactively aborting the ongoing transaction. If this value is larger than the `transaction.max.timeout.ms` setting in the broker, the init_transactions() call will fail with ERR_INVALID_TRANSACTION_TIMEOUT. The transaction timeout automatically adjusts `message.timeout.ms` and `socket.timeout.ms`, unless explicitly configured in which case they must not exceed the transaction timeout (`socket.timeout.ms` must be at least 100ms lower than `transaction.timeout.ms`). This is also the default timeout value if no timeout (-1) is supplied to the transactional API methods.
*Type: integer* enable.idempotence | P | true, false | false | high | When set to `true`, the producer will ensure that messages are successfully produced exactly once and in the original produce order. The following configuration properties are adjusted automatically (if not modified by the user) when idempotence is enabled: `max.in.flight.requests.per.connection=5` (must be less than or equal to 5), `retries=INT32_MAX` (must be greater than 0), `acks=all`, `queuing.strategy=fifo`. Producer instantation will fail if user-supplied configuration is incompatible.
*Type: boolean* diff --git a/examples/consumer.c b/examples/consumer.c index c1abc50e61..fea673b539 100644 --- a/examples/consumer.c +++ b/examples/consumer.c @@ -247,7 +247,7 @@ int main(int argc, char **argv) { rd_kafka_error_t *error; // fprintf(stderr, "Calling consume_batch\n"); - error = rd_kafka_share_consume_batch(rk, 1000, rkmessages, + error = rd_kafka_share_consume_batch(rk, 5000, rkmessages, &rcvd_msgs); if (error) { fprintf(stderr, "%% Consume error: %s\n", diff --git a/src/rdkafka.c b/src/rdkafka.c index acb9351dd5..0ac649da4a 100644 --- a/src/rdkafka.c +++ b/src/rdkafka.c @@ -2894,11 +2894,9 @@ static rd_kafka_broker_t *rd_kafka_share_select_broker(rd_kafka_t *rk, rd_kafka_topic_partition_list_t *partitions = rkcg->rkcg_current_assignment; // rkcg->rkcg_toppars; /* TODO: use rkcg->rkcg_toppars instead. */ - int32_t broker_id = -1; size_t i; rd_kafka_topic_partition_t *partition; - if (!partitions || partitions->cnt == 0) { rd_kafka_dbg(rk, CGRP, "SHARE", "No partitions assigned to consumer, " @@ -2910,13 +2908,14 @@ static rd_kafka_broker_t *rd_kafka_share_select_broker(rd_kafka_t *rk, * has a leader. */ rd_kafka_dbg(rk, CGRP, "SHARE", "Selecting broker for share fetch from %d assigned " - "partitions, offset = %d", + "partitions, last picked index = %" PRIusz, partitions->cnt, rkcg->rkcg_share.last_partition_picked); - for (i = 0; i < partitions->cnt && !selected_rkb; i++) { + for (i = 0; i < (size_t)partitions->cnt; i++) { rd_kafka_toppar_t *rktp; rkcg->rkcg_share.last_partition_picked += 1; - if (rkcg->rkcg_share.last_partition_picked >= partitions->cnt) + if (rkcg->rkcg_share.last_partition_picked >= + (size_t)partitions->cnt) rkcg->rkcg_share.last_partition_picked = 0; partition = &partitions->elems[rkcg->rkcg_share.last_partition_picked]; @@ -2941,16 +2940,59 @@ static rd_kafka_broker_t *rd_kafka_share_select_broker(rd_kafka_t *rk, } rd_kafka_toppar_destroy(rktp); - } - rd_kafka_dbg(rk, CGRP, "SHARE", "Selected broker %s for share fetch", - selected_rkb ? rd_kafka_broker_name(selected_rkb) - : "none"); + if (selected_rkb) + break; + } return selected_rkb; } +/** + * @brief Timer callback for reenequeing SHARE_FETCH_FANOUT after a backoff. + * @locality main thread + * @locks none + */ +static void rd_kafka_share_fetch_fanout_renqueue(rd_kafka_timers_t *rkts, + void *arg) { + rd_kafka_op_t *rko = arg; + rd_kafka_t *rk = rkts->rkts_rk; + + rd_kafka_dbg(rk, CGRP, "SHARE", "Re-enqueing SHARE_FETCH_FANOUT"); + rd_kafka_q_enq(rk->rk_ops, rko); +} + +/** + * @brief Enqueue a SHARE_FETCH_FANOUT op on the main queue. + * @param backoff_ms If >0 the op will be enqueued after this many milliseconds. + * Else, it will be immediate. + * @locality any thread + */ +static void rd_kafka_share_fetch_fanout_with_backoff(rd_kafka_t *rk, + rd_ts_t abs_timeout, + int backoff_ms) { + rd_kafka_cgrp_t *rkcg = rd_kafka_cgrp_get(rk); + rd_kafka_op_t *rko = rd_kafka_op_new_cb( + rk, RD_KAFKA_OP_SHARE_FETCH_FANOUT, rd_kafka_share_fetch_fanout_op); + rko->rko_u.share_fetch_fanout.abs_timeout = abs_timeout; + rko->rko_replyq = RD_KAFKA_REPLYQ(rk->rk_ops, 0); + + if (backoff_ms > 0) + rd_kafka_timer_start_oneshot( + &rk->rk_timers, &rkcg->rkcg_share.share_fetch_fanout_tmr, + rd_true, backoff_ms * 1000, + rd_kafka_share_fetch_fanout_renqueue, rko); + else + rd_kafka_q_enq(rk->rk_ops, rko); +} + +/** + * Handles RD_KAFKA_OP_SHARE_FETCH | RD_KAFKA_OP_REPLY. + * @locality main thread + */ rd_kafka_op_res_t rd_kafka_share_fetch_reply_op(rd_kafka_t *rk, rd_kafka_op_t *rko_orig) { + rd_kafka_resp_err_t err; + rd_kafka_assert(rk, thrd_is_current(rk->rk_thread)); rd_kafka_dbg(rk, CGRP, "SHAREFETCH", "Fetch share reply: %s, should_fetch=%d, broker=%s", @@ -2964,70 +3006,91 @@ rd_kafka_op_res_t rd_kafka_share_fetch_reply_op(rd_kafka_t *rk, rko_orig->rko_u.share_fetch.target_broker->rkb_share_fetch_enqueued = rd_false; - if (rko_orig->rko_error || rko_orig->rko_err) { - rd_kafka_dbg( - rk, CGRP, "SHARE", - "Share fetch failed: %s, timeout remains: %d, should-fetch " - "= %d", - rko_orig->rko_error - ? rd_kafka_error_name(rko_orig->rko_error) - : rd_kafka_err2name(rko_orig->rko_err), - rd_timeout_remains(rko_orig->rko_u.share_fetch.abs_timeout), - rko_orig->rko_u.share_fetch.should_fetch); - - /* Retry fetching if there is any amount of time left. For other - * errors, let them drop and be retried on the next poll. */ - /* TODO: KIP-932: Are there errors for which we should not - * retry, and rather propagate those errors to user? In that - * case, we must write to the rkcg queue. */ - if (rko_orig->rko_u.share_fetch.should_fetch && - rd_timeout_remains( - rko_orig->rko_u.share_fetch.abs_timeout)) { - - rd_kafka_op_t *rko = - rd_kafka_op_new(RD_KAFKA_OP_SHARE_FETCH); - rd_kafka_broker_t *rkb = - rko_orig->rko_u.share_fetch.target_broker; - rko->rko_u.share_fetch.abs_timeout = - rko_orig->rko_u.share_fetch.abs_timeout; - rko->rko_u.share_fetch.should_fetch = rd_true; - rd_kafka_broker_keep(rkb); - rko->rko_u.share_fetch.target_broker = rkb; - rko->rko_replyq = RD_KAFKA_REPLYQ(rk->rk_ops, 0); - - rd_kafka_dbg( - rk, CGRP, "SHAREFETCH", - "Retrying share fetch op on broker %s after eror" - "(%s fetch)", - rd_kafka_broker_name(rkb), - rko->rko_u.share_fetch.should_fetch ? "should" : "should not"); - - /* TODO: KIP-932: Are there errors where we should back - * this off by running it on a timer?*/ - rd_kafka_q_enq(rkb->rkb_ops, rko); - } + if (!rko_orig->rko_err && !rko_orig->rko_error) + return RD_KAFKA_OP_RES_HANDLED; + + /* Retry fetching if there is any amount of time left. For other + * errors, let them drop and be retried on the next poll. */ + /* TODO: KIP-932: Are there errors for which we should not + * retry, and rather propagate those errors to user? In that + * case, we must write to the rkcg queue. */ + if (!rko_orig->rko_u.share_fetch.should_fetch || + !rd_timeout_remains(rko_orig->rko_u.share_fetch.abs_timeout)) + return RD_KAFKA_OP_RES_HANDLED; + + if (rko_orig->rko_error) + err = rd_kafka_error_code(rko_orig->rko_error); + + switch (err) { + /* For some errors, don't resend at all. + * This list of errors is incomplete. */ + case RD_KAFKA_RESP_ERR__TIMED_OUT: + case RD_KAFKA_RESP_ERR__DESTROY: + case RD_KAFKA_RESP_ERR__UNKNOWN_GROUP: + case RD_KAFKA_RESP_ERR__PREV_IN_PROGRESS: /* This should not happen. */ + case RD_KAFKA_RESP_ERR__STATE: + case RD_KAFKA_RESP_ERR__AUTHENTICATION: + case RD_KAFKA_RESP_ERR_GROUP_AUTHORIZATION_FAILED: /* Do we need more + handling for fatal + errors? */ + break; + + /* For other cases, resend to separate broker after a backoff. The + * fanout op will back us off automatically if there is no broker + * available. */ + default: + rd_kafka_share_fetch_fanout_with_backoff( + rk, rko_orig->rko_u.share_fetch.abs_timeout, + 0 /* no backoff */); + break; } return RD_KAFKA_OP_RES_HANDLED; } +/** + * Handles RD_KAFKA_OP_SHARE_FETCH_FANOUT | RD_KAFKA_OP_REPLY. + * @locality main thread + */ rd_kafka_op_res_t rd_kafka_share_fetch_fanout_reply_op(rd_kafka_t *rk, rd_kafka_op_t *rko_orig) { - rd_kafka_message_t *rkm; - rd_kafka_op_t *rko; + rd_kafka_resp_err_t err; + if (!rko_orig->rko_err && !rko_orig->rko_error) return RD_KAFKA_OP_RES_HANDLED; + err = rko_orig->rko_err; + if (rko_orig->rko_error) + err = rd_kafka_error_code(rko_orig->rko_error); + /* TODO: KIP-932: Add error handling - either retries, or user-level * propagation, later. */ rd_kafka_dbg( - rk, CGRP, "SHARE", "Encountered error in SHARE_FETCH_FANOUT: %s", - rko_orig->rko_err ? rd_kafka_err2name(rko_orig->rko_err) - : rd_kafka_error_name(rko_orig->rko_error)); + rk, CGRP, "SHARE", + "Encountered error in SHARE_FETCH_FANOUT: %s, remains: %d", + rd_kafka_err2name(err), + rd_timeout_remains(rko_orig->rko_u.share_fetch_fanout.abs_timeout)); + + switch (err) { + /* Some errors need not be retried. */ + case RD_KAFKA_RESP_ERR__DESTROY: + case RD_KAFKA_RESP_ERR__TIMED_OUT: + break; + + /* Some errors may be retried - with a constant backoff. */ + default: + rd_kafka_share_fetch_fanout_with_backoff( + rk, rko_orig->rko_u.share_fetch_fanout.abs_timeout, + /* TODO: KIP-932: Consider setting this to retry_backoff_ms + or to a constant.*/ + rk->rk_conf.retry_backoff_max_ms); + break; + } return RD_KAFKA_OP_RES_HANDLED; } /** + * Op callback for RD_KAFKA_OP_SHARE_FETCH_FANOUT. * @locality main thread */ rd_kafka_op_res_t rd_kafka_share_fetch_fanout_op(rd_kafka_t *rk, @@ -3037,10 +3100,27 @@ rd_kafka_op_res_t rd_kafka_share_fetch_fanout_op(rd_kafka_t *rk, rd_kafka_error_t *error; rd_kafka_cgrp_t *rkcg = rd_kafka_cgrp_get(rk); rd_kafka_op_t *reply_rko; + rd_ts_t abs_timeout = rko->rko_u.share_fetch_fanout.abs_timeout; /* We should never have enqueued this op if we didn't have the rkcg */ rd_assert(rkcg); + if (rd_timeout_remains(abs_timeout) <= 0) { + rd_kafka_dbg(rk, CGRP, "SHARE", + "Not issuing SHARE_FETCH_FANOUT: timeout expired"); + + error = rd_kafka_error_new(RD_KAFKA_RESP_ERR__TIMED_OUT, + "rd_kafka_share_fetch_fanout_op(): " + "Share fetch fanout op timed out"); + + reply_rko = rd_kafka_op_new(RD_KAFKA_OP_SHARE_FETCH_FANOUT | + RD_KAFKA_OP_REPLY); + reply_rko->rko_error = error; + reply_rko->rko_u.share_fetch_fanout.abs_timeout = abs_timeout; + rd_kafka_replyq_enq(&rko->rko_replyq, reply_rko, 0); + return RD_KAFKA_OP_RES_HANDLED; + } + if (!(selected_rkb = rd_kafka_share_select_broker(rk, rkcg))) { error = rd_kafka_error_new(RD_KAFKA_RESP_ERR__STATE, @@ -3049,17 +3129,17 @@ rd_kafka_op_res_t rd_kafka_share_fetch_fanout_op(rd_kafka_t *rk, reply_rko = rd_kafka_op_new(RD_KAFKA_OP_SHARE_FETCH_FANOUT | RD_KAFKA_OP_REPLY); - reply_rko->rko_error = error; + reply_rko->rko_error = error; + reply_rko->rko_u.share_fetch_fanout.abs_timeout = abs_timeout; rd_kafka_replyq_enq(&rko->rko_replyq, reply_rko, 0); return RD_KAFKA_OP_RES_HANDLED; } /* Issue fetch requests to all brokers */ rd_kafka_dbg(rk, CGRP, "SHARE", - "Selected broker %s for share fetch, issuing " - "fetch requests to all %d brokers", - rd_kafka_broker_name(selected_rkb), - rd_atomic32_get(&rk->rk_broker_cnt)); + "Selected broker %s for fetching messages, issuing fetch " + "requests to necessary brokers", + rd_kafka_broker_name(selected_rkb)); rd_kafka_rdlock(rk); TAILQ_FOREACH(rkb, &rk->rk_brokers, rkb_link) { @@ -3085,8 +3165,7 @@ rd_kafka_op_res_t rd_kafka_share_fetch_fanout_op(rd_kafka_t *rk, rkb->rkb_share_fetch_enqueued = rd_true; rko_sf = rd_kafka_op_new(RD_KAFKA_OP_SHARE_FETCH); - rko_sf->rko_u.share_fetch.abs_timeout = - rko->rko_u.share_fetch_fanout.abs_timeout; + rko_sf->rko_u.share_fetch.abs_timeout = abs_timeout; rko_sf->rko_u.share_fetch.should_fetch = (rkb == selected_rkb); rd_kafka_broker_keep(rkb); rko_sf->rko_u.share_fetch.target_broker = rkb; @@ -3108,18 +3187,18 @@ rd_kafka_op_res_t rd_kafka_share_fetch_fanout_op(rd_kafka_t *rk, return RD_KAFKA_OP_RES_HANDLED; } -rd_kafka_error_t * -rd_kafka_share_consume_batch(rd_kafka_t *rk, - int timeout_ms, - rd_kafka_message_t **rkmessages /* out */, - size_t *rkmessages_size /* out */) { +rd_kafka_error_t *rd_kafka_share_consume_batch( + rd_kafka_t *rk, + int timeout_ms, + /* There is some benefit to making this ***rkmessages and allocating it + within this function, but on the flipside this means that it will always + be allocated on the heap. */ + rd_kafka_message_t **rkmessages /* out */, + size_t *rkmessages_size /* out */) { rd_kafka_cgrp_t *rkcg; - rd_kafka_q_t *rkq; - rd_kafka_op_t *rko; - rd_ts_t now = rd_clock(); - rd_ts_t abs_timeout = rd_timeout_init0(now, timeout_ms); - int cnt = 0; - const size_t max_poll_records = 100; /* TODO: change. */ + rd_ts_t now = rd_clock(); + rd_ts_t abs_timeout = rd_timeout_init0(now, timeout_ms); + size_t max_poll_records = (size_t)rk->rk_conf.share.max_poll_records; if (!RD_KAFKA_IS_SHARE_CONSUMER(rk)) return rd_kafka_error_new(RD_KAFKA_RESP_ERR__INVALID_ARG, @@ -3136,16 +3215,10 @@ rd_kafka_share_consume_batch(rd_kafka_t *rk, if (likely(rd_kafka_q_len(rkcg->rkcg_q) == 0)) { rd_kafka_dbg(rk, CGRP, "SHARE", "Issuing share fetch fanout to main thread with " - "abs_timeout = %lld and now=%lld", - abs_timeout, now); - rd_kafka_op_t *fanout_rko = - rd_kafka_op_new_cb(rk, RD_KAFKA_OP_SHARE_FETCH_FANOUT, - rd_kafka_share_fetch_fanout_op); - fanout_rko->rko_u.share_fetch_fanout.abs_timeout = abs_timeout; - fanout_rko->rko_u.share_fetch_fanout.is_retry = rd_false; - fanout_rko->rko_replyq = RD_KAFKA_REPLYQ(rk->rk_ops, 0); - - rd_kafka_q_enq(rk->rk_ops, fanout_rko); + "abs_timeout = %" PRId64, + abs_timeout); + rd_kafka_share_fetch_fanout_with_backoff(rk, abs_timeout, + 0 /* no backoff */); } /* At this point, there's no reason to deviate from what we already do @@ -4616,7 +4689,7 @@ rd_kafka_op_res_t rd_kafka_poll_cb(rd_kafka_t *rk, case RD_KAFKA_OP_SHARE_FETCH_FANOUT | RD_KAFKA_OP_REPLY: rd_kafka_assert(rk, thrd_is_current(rk->rk_thread)); - rd_kafka_share_fetch_fanout_reply_op(rk, rko); + res = rd_kafka_share_fetch_fanout_reply_op(rk, rko); break; default: diff --git a/src/rdkafka_cgrp.h b/src/rdkafka_cgrp.h index 020fbb2df6..8f6afedc7d 100644 --- a/src/rdkafka_cgrp.h +++ b/src/rdkafka_cgrp.h @@ -378,8 +378,11 @@ typedef struct rd_kafka_cgrp_s { rd_ts_t rkcg_ts_rebalance_start; struct { - size_t last_partition_picked; /* For round-robin - * partition picking */ + size_t last_partition_picked; /* For round-robin + * partition picking */ + rd_kafka_timer_t share_fetch_fanout_tmr; /**< Timer for + * share fetch + * fanout */ } rkcg_share; } rd_kafka_cgrp_t; diff --git a/src/rdkafka_conf.c b/src/rdkafka_conf.c index f9b5de7037..fe49950da8 100644 --- a/src/rdkafka_conf.c +++ b/src/rdkafka_conf.c @@ -1509,8 +1509,10 @@ static const struct rd_kafka_property rd_kafka_properties[] = { "which indicates where this client is physically located. It " "corresponds with the broker config `broker.rack`.", .sdef = ""}, - {_RK_GLOBAL | _RK_HIDDEN, "share.consumer", _RK_C_BOOL, _RK(is_share_consumer), - "tba description", 0, 1, 0}, + {_RK_GLOBAL | _RK_HIDDEN, "share.consumer", _RK_C_BOOL, + _RK(share.is_share_consumer), "tba description", 0, 1, 0}, + {_RK_GLOBAL | _RK_CONSUMER, "max.poll.records", _RK_C_INT, + _RK(share.max_poll_records), "tba description,", 1, INT_MAX, 500}, /* Global producer properties */ diff --git a/src/rdkafka_conf.h b/src/rdkafka_conf.h index fedfc2ecee..ea9e216efa 100644 --- a/src/rdkafka_conf.h +++ b/src/rdkafka_conf.h @@ -470,7 +470,10 @@ struct rd_kafka_conf_s { rd_kafkap_str_t *client_rack; - int is_share_consumer; /**< Is this a share consumer? */ + struct { + int is_share_consumer; /**< Is this a share consumer? */ + int max_poll_records; /**< Max records returned per poll */ + } share; /* * Producer configuration diff --git a/src/rdkafka_int.h b/src/rdkafka_int.h index ed2a0d6bce..c9fe9da3e1 100644 --- a/src/rdkafka_int.h +++ b/src/rdkafka_int.h @@ -95,7 +95,8 @@ typedef struct rd_kafka_lwtopic_s rd_kafka_lwtopic_t; #define RD_KAFKA_OFFSET_IS_LOGICAL(OFF) ((OFF) < 0) #define RD_KAFKA_IS_SHARE_CONSUMER(rk) \ - ((rk)->rk_type == RD_KAFKA_CONSUMER && (rk)->rk_conf.is_share_consumer) + ((rk)->rk_type == RD_KAFKA_CONSUMER && \ + (rk)->rk_conf.share.is_share_consumer) /** * @struct Represents a fetch position: diff --git a/src/rdkafka_op.h b/src/rdkafka_op.h index 1cf70e3197..705f16b770 100644 --- a/src/rdkafka_op.h +++ b/src/rdkafka_op.h @@ -745,10 +745,6 @@ struct rd_kafka_op_s { /** Absolute timeout for share fetch fanout operation. */ rd_ts_t abs_timeout; - - /** Is this a retry, or the first attempt of this poll? - * Retries only have should_fetch=true request sent. */ - rd_bool_t is_retry; } share_fetch_fanout; } rko_u; From 39ba8411e26615a90de4cda3314f977ebc616b06 Mon Sep 17 00:00:00 2001 From: Milind L Date: Tue, 28 Oct 2025 15:12:03 +0000 Subject: [PATCH 08/37] Allow poll to exit immediately on message --- src/rdkafka.c | 2 +- src/rdkafka_queue.c | 116 ++++++++++++++++++++++++++++++++++++++++++++ src/rdkafka_queue.h | 4 ++ 3 files changed, 121 insertions(+), 1 deletion(-) diff --git a/src/rdkafka.c b/src/rdkafka.c index 0ac649da4a..b9659d31b4 100644 --- a/src/rdkafka.c +++ b/src/rdkafka.c @@ -3225,7 +3225,7 @@ rd_kafka_error_t *rd_kafka_share_consume_batch( * for returning multiple messages to the user, as the orchestration * is handled by the main thread. Later on, we needed, we might need * a custom loop if we need any changes. */ - *rkmessages_size = rd_kafka_q_serve_rkmessages( + *rkmessages_size = rd_kafka_q_serve_share_rkmessages( rkcg->rkcg_q, timeout_ms, /* Use this timeout directly as prior operations aren't blocking, so no need to re-convert the abs_timeout into a relative one.*/ diff --git a/src/rdkafka_queue.c b/src/rdkafka_queue.c index 92eddccae9..1d25d984d3 100644 --- a/src/rdkafka_queue.c +++ b/src/rdkafka_queue.c @@ -869,6 +869,122 @@ int rd_kafka_q_serve_rkmessages(rd_kafka_q_t *rkq, return cnt; } +int rd_kafka_q_serve_share_rkmessages(rd_kafka_q_t *rkq, + int timeout_ms, + rd_kafka_message_t **rkmessages, + size_t rkmessages_size) { + unsigned int cnt = 0; + TAILQ_HEAD(, rd_kafka_op_s) tmpq = TAILQ_HEAD_INITIALIZER(tmpq); + struct rd_kafka_op_tailq ctrl_msg_q = + TAILQ_HEAD_INITIALIZER(ctrl_msg_q); + rd_kafka_op_t *rko, *next; + rd_kafka_t *rk = rkq->rkq_rk; + rd_kafka_q_t *fwdq; + rd_ts_t abs_timeout; + + mtx_lock(&rkq->rkq_lock); + if ((fwdq = rd_kafka_q_fwd_get(rkq, 0))) { + /* Since the q_pop may block we need to release the parent + * queue's lock. */ + mtx_unlock(&rkq->rkq_lock); + cnt = rd_kafka_q_serve_share_rkmessages(fwdq, timeout_ms, rkmessages, + rkmessages_size); + rd_kafka_q_destroy(fwdq); + return cnt; + } + + mtx_unlock(&rkq->rkq_lock); + + abs_timeout = rd_timeout_init(timeout_ms); + + rd_kafka_app_poll_start(rk, rkq, 0, timeout_ms); + + rd_kafka_yield_thread = 0; + while (cnt < rkmessages_size) { + rd_kafka_op_res_t res; + + mtx_lock(&rkq->rkq_lock); + + while (!(rko = TAILQ_FIRST(&rkq->rkq_q)) && + !rd_kafka_q_check_yield(rkq) && + /* Only do a timed wait if no messages are ready, if we + have gotten even one message, just return with it. */ + cnt == 0 && + cnd_timedwait_abs(&rkq->rkq_cond, &rkq->rkq_lock, + abs_timeout) == thrd_success) + ; + + rd_kafka_q_mark_served(rkq); + + if (!rko) { + mtx_unlock(&rkq->rkq_lock); + break; /* Timed out */ + } + + rd_kafka_q_deq0(rkq, rko); + + mtx_unlock(&rkq->rkq_lock); + + if (rd_kafka_op_version_outdated(rko, 0)) { + /* Outdated op, put on discard queue */ + TAILQ_INSERT_TAIL(&tmpq, rko, rko_link); + continue; + } + + /* Serve non-FETCH callbacks */ + res = + rd_kafka_poll_cb(rk, rkq, rko, RD_KAFKA_Q_CB_RETURN, NULL); + if (res == RD_KAFKA_OP_RES_KEEP || + res == RD_KAFKA_OP_RES_HANDLED) { + /* Callback served, rko is destroyed (if HANDLED). */ + continue; + } else if (unlikely(res == RD_KAFKA_OP_RES_YIELD || + rd_kafka_yield_thread)) { + /* Yield. */ + break; + } + rd_dassert(res == RD_KAFKA_OP_RES_PASS); + + /* If this is a control messages, don't return message to + * application. Add it to a tmp queue from where we can store + * the offset and destroy the op */ + if (unlikely(rd_kafka_op_is_ctrl_msg(rko))) { + TAILQ_INSERT_TAIL(&ctrl_msg_q, rko, rko_link); + continue; + } + + /* Get rkmessage from rko and append to array. */ + rkmessages[cnt++] = rd_kafka_message_get(rko); + } + + /* NOTE: KIP-932: + * For a share consumer, we are not using version barriers, and ideally, + * tmpq should be empty. However, the discard code is retained as + * non-share-consumer might still be around. This assert exists to spot + * any issues as they arise during testing.*/ + rd_dassert(TAILQ_EMPTY(&tmpq)); + + /* Discard non-desired and already handled ops */ + next = TAILQ_FIRST(&tmpq); + while (next) { + rko = next; + next = TAILQ_NEXT(next, rko_link); + rd_kafka_op_destroy(rko); + } + + /* Discard ctrl msgs */ + next = TAILQ_FIRST(&ctrl_msg_q); + while (next) { + rko = next; + next = TAILQ_NEXT(next, rko_link); + rd_kafka_op_destroy(rko); + } + + rd_kafka_app_polled(rk, rkq); + + return cnt; +} + void rd_kafka_queue_destroy(rd_kafka_queue_t *rkqu) { diff --git a/src/rdkafka_queue.h b/src/rdkafka_queue.h index ff1a465770..cfed32413a 100644 --- a/src/rdkafka_queue.h +++ b/src/rdkafka_queue.h @@ -878,6 +878,10 @@ int rd_kafka_q_serve_rkmessages(rd_kafka_q_t *rkq, int timeout_ms, rd_kafka_message_t **rkmessages, size_t rkmessages_size); +int rd_kafka_q_serve_share_rkmessages(rd_kafka_q_t *rkq, + int timeout_ms, + rd_kafka_message_t **rkmessages, + size_t rkmessages_size); rd_kafka_resp_err_t rd_kafka_q_wait_result(rd_kafka_q_t *rkq, int timeout_ms); int rd_kafka_q_apply(rd_kafka_q_t *rkq, From 24ada5299e336d34ce45c65747ba5a7431de2b7b Mon Sep 17 00:00:00 2001 From: Milind L Date: Tue, 28 Oct 2025 15:16:25 +0000 Subject: [PATCH 09/37] Fix invalid reads and compiler warnings --- examples/consumer.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/examples/consumer.c b/examples/consumer.c index fea673b539..3a4ad4f324 100644 --- a/examples/consumer.c +++ b/examples/consumer.c @@ -236,8 +236,7 @@ int main(int argc, char **argv) { * since a rebalance may happen at any time. * Start polling for messages. */ - rd_kafka_message_t **rkmessages = - (rd_kafka_message_t **)malloc(sizeof(rd_kafka_message_t *) * 100); + rd_kafka_message_t *rkmessages[500]; while (run) { rd_kafka_message_t *rkm = NULL; size_t rcvd_msgs = 0; From 279edb9dff5f73e4495f37e04a722e318cc81df6 Mon Sep 17 00:00:00 2001 From: Milind L Date: Tue, 28 Oct 2025 15:22:27 +0000 Subject: [PATCH 10/37] Add missing backoff for share fetch retries --- src/rdkafka.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/rdkafka.c b/src/rdkafka.c index b9659d31b4..6d0886a70b 100644 --- a/src/rdkafka.c +++ b/src/rdkafka.c @@ -3035,13 +3035,11 @@ rd_kafka_op_res_t rd_kafka_share_fetch_reply_op(rd_kafka_t *rk, errors? */ break; - /* For other cases, resend to separate broker after a backoff. The - * fanout op will back us off automatically if there is no broker - * available. */ + /* For other cases, resend to separate broker after a backoff. */ default: rd_kafka_share_fetch_fanout_with_backoff( rk, rko_orig->rko_u.share_fetch.abs_timeout, - 0 /* no backoff */); + rk->rk_conf.retry_backoff_max_ms); break; } From d5d50ad9a61b06c76d5c820891a0704a8e4384ae Mon Sep 17 00:00:00 2001 From: Pranav Rathi <4427674+pranavrth@users.noreply.github.com> Date: Thu, 23 Oct 2025 06:32:37 +0530 Subject: [PATCH 11/37] Added Share fetch request and response parsing --- src/rdkafka_broker.c | 149 ++++++++++-- src/rdkafka_broker.h | 20 ++ src/rdkafka_fetcher.c | 492 ++++++++++++++++++++++++++++++++++++++++ src/rdkafka_partition.c | 3 + src/rdkafka_partition.h | 11 + src/rdkafka_protocol.h | 6 +- 6 files changed, 657 insertions(+), 24 deletions(-) diff --git a/src/rdkafka_broker.c b/src/rdkafka_broker.c index e130da7e96..94b1f9abfa 100644 --- a/src/rdkafka_broker.c +++ b/src/rdkafka_broker.c @@ -3467,6 +3467,64 @@ rd_kafka_broker_op_serve(rd_kafka_broker_t *rkb, rd_kafka_op_t *rko) { rd_kafka_brokers_broadcast_state_change(rkb->rkb_rk); break; + case RD_KAFKA_OP_SHARE_FETCH: + rd_rkb_dbg(rkb, CGRP, "SHAREFETCH", + "Received SHARE_FETCH op for broker %s with " + "should_fetch = %d", + rd_kafka_broker_name(rkb), + rko->rko_u.share_fetch.should_fetch); + /* This is only temporary handling for testing to avoid crashing + * on assert - the code below will automatically enqueue a + * reply which is not the final behaviour. */ + /* Insert errors randomly for testing, remove this code once + * actual errors can be tested via the mock broker. */ + // if (rd_jitter(0, 10) > 7) { + // rd_rkb_dbg(rkb, CGRP, "SHAREFETCH", + // "Injecting error! %s : %d", + // rd_kafka_broker_name(rkb), + // rko->rko_u.share_fetch.should_fetch); + + // rd_kafka_op_reply(rko, RD_KAFKA_RESP_ERR__STATE); + // rko = NULL; + // } + + if (rd_kafka_broker_or_instance_terminating(rkb)) { + rd_kafka_dbg(rkb->rkb_rk, BROKER, "SHAREFETCH", + "Ignoring SHARE_FETCH op: " + "instance or broker is terminating"); + rd_kafka_op_reply(rko, RD_KAFKA_RESP_ERR__DESTROY); + break; + } + + if(rkb->rkb_fetching) { + rd_kafka_dbg(rkb->rkb_rk, BROKER, "SHAREFETCH", + "Ignoring SHARE_FETCH op: " + "already fetching"); + rd_kafka_op_reply(rko, RD_KAFKA_RESP_ERR__PREV_IN_PROGRESS); + break; + } + + if (!rko->rko_u.share_fetch.should_fetch) { + rd_kafka_dbg(rkb->rkb_rk, BROKER, "SHAREFETCH", + "Ignoring SHARE_FETCH op: " + "should_fetch is false"); + rd_kafka_op_reply(rko, RD_KAFKA_RESP_ERR__NOOP); + break; + } + + if(rkb->rkb_state != RD_KAFKA_BROKER_STATE_UP) { + rd_kafka_dbg(rkb->rkb_rk, BROKER, "SHAREFETCH", + "Connection not up: Sending connect in progress as reply"); + rd_kafka_op_reply(rko, RD_KAFKA_RESP_ERR__STATE); + break; + } + + rkb->rkb_fetching = rd_true; + + rko = NULL; /* the rko is reused for the reply */ + + break; + case RD_KAFKA_OP_TERMINATE: /* nop: just a wake-up. */ rd_rkb_dbg(rkb, BROKER, "TERM", @@ -3553,28 +3611,6 @@ rd_kafka_broker_op_serve(rd_kafka_broker_t *rkb, rd_kafka_op_t *rko) { wakeup = rd_true; break; - case RD_KAFKA_OP_SHARE_FETCH: - rd_rkb_dbg(rkb, CGRP, "SHAREFETCH", - "Received SHARE_FETCH op for broker %s with " - "should_fetch = %d", - rd_kafka_broker_name(rkb), - rko->rko_u.share_fetch.should_fetch); - /* This is only temporary handling for testing to avoid crashing - * on assert - the code below will automatically enqueue a - * reply which is not the final behaviour. */ - /* Insert errors randomly for testing, remove this code once - * actual errors can be tested via the mock broker. */ - // if (rd_jitter(0, 10) > 7) { - // rd_rkb_dbg(rkb, CGRP, "SHAREFETCH", - // "Injecting error! %s : %d", - // rd_kafka_broker_name(rkb), - // rko->rko_u.share_fetch.should_fetch); - - // rd_kafka_op_reply(rko, RD_KAFKA_RESP_ERR__STATE); - // rko = NULL; - // } - break; - default: rd_kafka_assert(rkb->rkb_rk, !*"unhandled op type"); break; @@ -4294,6 +4330,72 @@ static void rd_kafka_broker_producer_serve(rd_kafka_broker_t *rkb, } +/** + * Consumer serving + */ +static void rd_kafka_broker_share_consumer_serve(rd_kafka_broker_t *rkb, + rd_ts_t abs_timeout) { + unsigned int initial_state = rkb->rkb_state; + rd_ts_t now; + + rd_kafka_assert(rkb->rkb_rk, thrd_is_current(rkb->rkb_thread)); + + rd_kafka_broker_lock(rkb); + + while (!rd_kafka_broker_terminating(rkb) && + rkb->rkb_state == initial_state && + abs_timeout > (now = rd_clock())) { + rd_ts_t min_backoff; + + rd_kafka_broker_unlock(rkb); + + /* Serve toppars */ + min_backoff = rd_kafka_broker_consumer_toppars_serve(rkb); + if (rkb->rkb_ts_fetch_backoff > now && + rkb->rkb_ts_fetch_backoff < min_backoff) + min_backoff = rkb->rkb_ts_fetch_backoff; + + if (rkb->rkb_toppar_cnt > 0 && + rkb->rkb_share_fetch_session.epoch >= 0 && + rkb->rkb_state != RD_KAFKA_BROKER_STATE_UP) { + /* There are partitions to fetch but the + * connection is not up. */ + rkb->rkb_persistconn.internal++; + } + + /* Send Fetch request message for all underflowed toppars + * if the connection is up and there are no outstanding + * fetch requests for this connection. */ + if (!rkb->rkb_fetching && + rkb->rkb_state == RD_KAFKA_BROKER_STATE_UP) { + if (min_backoff < now) { + rd_kafka_broker_fetch_toppars(rkb, now); + min_backoff = abs_timeout; + } else if (min_backoff < RD_TS_MAX) + rd_rkb_dbg(rkb, FETCH, "FETCH", + "Fetch backoff for %" PRId64 "ms", + (min_backoff - now) / 1000); + } else { + /* Nothing needs to be done, next wakeup + * is from ops, state change, IO, or this timeout */ + min_backoff = abs_timeout; + } + + /* Check and move retry buffers */ + if (unlikely(rd_atomic32_get(&rkb->rkb_retrybufs.rkbq_cnt) > 0)) + rd_kafka_broker_retry_bufs_move(rkb, &min_backoff); + + if (min_backoff > abs_timeout) + min_backoff = abs_timeout; + + if (rd_kafka_broker_ops_io_serve(rkb, min_backoff)) + return; /* Wakeup */ + + rd_kafka_broker_lock(rkb); + } + + rd_kafka_broker_unlock(rkb); +} /** * Consumer serving @@ -4504,6 +4606,8 @@ static void rd_kafka_broker_serve(rd_kafka_broker_t *rkb, int timeout_ms) { if (rkb->rkb_rk->rk_type == RD_KAFKA_PRODUCER) rd_kafka_broker_producer_serve(rkb, abs_timeout); + else if (RD_KAFKA_IS_SHARE_CONSUMER(rkb->rkb_rk)) + rd_kafka_broker_share_consumer_serve(rkb, abs_timeout); else if (rkb->rkb_rk->rk_type == RD_KAFKA_CONSUMER) rd_kafka_broker_consumer_serve(rkb, abs_timeout); @@ -4942,6 +5046,7 @@ rd_kafka_broker_t *rd_kafka_broker_add(rd_kafka_t *rk, rkb->rkb_port = port; rkb->rkb_origname = rd_strdup(name); rkb->rkb_c.connections_max_idle_ms = -1; + rkb->rkb_share_fetch_session.epoch = 0; mtx_init(&rkb->rkb_lock, mtx_plain); mtx_init(&rkb->rkb_logname_lock, mtx_plain); diff --git a/src/rdkafka_broker.h b/src/rdkafka_broker.h index b8f7cb6e21..bad949de07 100644 --- a/src/rdkafka_broker.h +++ b/src/rdkafka_broker.h @@ -106,6 +106,26 @@ struct rd_kafka_broker_s { /* rd_kafka_broker_t */ /* Toppars handled by this broker */ TAILQ_HEAD(, rd_kafka_toppar_s) rkb_toppars; + + struct { + // TAILQ_HEAD(, rd_kafka_toppar_s) toppars; /* List of toppars + // in the current + // fetch session. + // Any new added toppar in rkb_toppars will be added here after successful share fetch request. + // Any removed toppar from rkb_toppars will be removed here after successful share fetch request. + // rkb_fetch_session.forgotten_toppars is calculated by rkb_fetch_session.toppars - rkb_toppars */ + + // TAILQ_HEAD(, rd_kafka_toppar_s) next_forgotten_toppars; /* List of toppars + // * that are removed from rkb_toppars but not yet removed from fetch session. + // * Will be sent in next fetch request. + // * Cleared when fetch session is reset or when fetch request is successful. */ + // TAILQ_HEAD(, rd_kafka_toppar_s) forgetting_toppars; /* List of toppars + // * that are removed from rkb_toppars and sent in fetch request but not yet removed from fetch session. + // * Cleared when fetch session is reset or when fetch request is successful. */ + int32_t epoch; /* Current fetch session + * epoch, or -1 if no session */ + } rkb_share_fetch_session; + int rkb_toppar_cnt; /* Active toppars that are eligible for: diff --git a/src/rdkafka_fetcher.c b/src/rdkafka_fetcher.c index e275ee5a88..4bddf9cb66 100644 --- a/src/rdkafka_fetcher.c +++ b/src/rdkafka_fetcher.c @@ -885,6 +885,288 @@ rd_kafka_fetch_reply_handle(rd_kafka_broker_t *rkb, } +static rd_kafka_resp_err_t rd_kafka_share_fetch_reply_handle_partition( + rd_kafka_broker_t *rkb, + const rd_kafkap_str_t *topic, + rd_kafka_topic_t *rkt /*possibly NULL*/, + rd_kafka_buf_t *rkbuf, + rd_kafka_buf_t *request) { + + /* TODO: KIP-932: Check rd_kafka_fetch_reply_handle_partition + * and modify as needed for ShareFetch. + */ + int32_t PartitionId; + int16_t PartitionFetchErrorCode; + rd_kafkap_str_t PartitionFetchErrorStr = RD_KAFKAP_STR_INITIALIZER_EMPTY; + int16_t AcknowledgementErrorCode; + rd_kafkap_str_t AcknowledgementErrorStr = RD_KAFKAP_STR_INITIALIZER_EMPTY; + rd_kafkap_CurrentLeader_t CurrentLeader; + int32_t MessageSetSize; + rd_kafka_toppar_t *rktp = NULL; + struct rd_kafka_toppar_ver tver; + rd_slice_t save_slice; + const int log_decode_errors = LOG_ERR; + rd_kafka_resp_err_t err = RD_KAFKA_RESP_ERR_NO_ERROR; + int32_t AcquiredRecordsArrayCnt; + int64_t FirstOffset; + int64_t LastOffset; + int16_t DeliveryCount; + + + rd_kafka_buf_read_i32(rkbuf, &PartitionId); // Partition + rd_kafka_buf_read_i32(rkbuf, &PartitionFetchErrorCode); // PartitionFetchError + rd_kafka_buf_read_str(rkbuf, &PartitionFetchErrorStr); // ErrorString + rd_kafka_buf_read_i16(rkbuf, &AcknowledgementErrorCode); // AcknowledgementError + rd_kafka_buf_read_str(rkbuf, &AcknowledgementErrorStr); // AcknowledgementErrorString + rd_kafka_buf_read_CurrentLeader(rkbuf, &CurrentLeader); // CurrentLeader + + /* Compact Records Array */ + rd_kafka_buf_read_arraycnt(rkbuf, &MessageSetSize, -1); + + if (unlikely(MessageSetSize < 0)) + rd_kafka_buf_parse_fail( + rkbuf, + "%.*s [%" PRId32 "]: invalid MessageSetSize %" PRId32, + RD_KAFKAP_STR_PR(topic), PartitionId, MessageSetSize); + + /* Look up topic+partition */ + if (likely(rkt != NULL)) { + rd_kafka_topic_rdlock(rkt); + rktp = rd_kafka_toppar_get(rkt, PartitionId, + 0 /*no ua-on-miss*/); + rd_kafka_topic_rdunlock(rkt); + } + + if (unlikely(!rkt || !rktp)) { + rd_rkb_dbg(rkb, TOPIC, "UNKTOPIC", + "Received Fetch response (error %hu) for unknown " + "topic %.*s [%" PRId32 "]: ignoring", + PartitionFetchErrorCode, RD_KAFKAP_STR_PR(topic), + PartitionId); + rd_kafka_buf_skip(rkbuf, MessageSetSize); + goto done; + } + + tver.rktp = rktp; + tver.version = rktp->rktp_fetch_version; + + + /* No error, clear any previous fetch error. */ + rktp->rktp_last_error = RD_KAFKA_RESP_ERR_NO_ERROR; + + if (unlikely(MessageSetSize <= 0)) + goto done; + + /** + * Parse MessageSet + */ + if (!rd_slice_narrow_relative(&rkbuf->rkbuf_reader, &save_slice, + (size_t) MessageSetSize)) + rd_kafka_buf_check_len(rkbuf, MessageSetSize); + + /* Parse messages */ + err = rd_kafka_msgset_parse(rkbuf, request, rktp, NULL, &tver); + + + rd_slice_widen(&rkbuf->rkbuf_reader, &save_slice); + /* Continue with next partition regardless of + * parse errors (which are partition-specific) */ + + rd_kafka_buf_read_arraycnt(rkbuf, &AcquiredRecordsArrayCnt, -1); // AcquiredRecordsArrayCnt + while(AcquiredRecordsArrayCnt-- > 0) { + rd_kafka_buf_read_i64(rkbuf, &FirstOffset); // FirstOffset + rd_kafka_buf_read_i64(rkbuf, &LastOffset); // LastOffset + rd_kafka_buf_read_i16(rkbuf, &DeliveryCount); // DeliveryCount + rd_kafka_buf_skip_tags(rkbuf); // AcquiredRecords tags + rd_rkb_dbg(rkb, MSG, "SHAREFETCH", + "%.*s [%" PRId32 "]: Acquired Records from offset %" PRId64 + " to %" PRId64 ", DeliveryCount %" PRId16, + RD_KAFKAP_STR_PR(topic), PartitionId, + FirstOffset, LastOffset, DeliveryCount); + rktp->rktp_share_acknowledge.first_offset = FirstOffset; + rktp->rktp_share_acknowledge.last_offset = LastOffset; + rktp->rktp_share_acknowledge.delivery_count = DeliveryCount; + } + + rd_kafka_buf_skip_tags(rkbuf); // Partition tags + + goto done; + +err_parse: + if (rktp) + rd_kafka_toppar_destroy(rktp); /*from get()*/ + return rkbuf->rkbuf_err; + +done: + if (likely(rktp != NULL)) + rd_kafka_toppar_destroy(rktp); /*from get()*/ + + return RD_KAFKA_RESP_ERR_NO_ERROR; + + } + + +/** + * Parses and handles a ShareFetch reply. + * Returns 0 on success or an error code on failure. + * + * TODO KIP-932: Change return type to proper error with message. See `rd_kafka_error_t *`. + */ +static rd_kafka_resp_err_t +rd_kafka_share_fetch_reply_handle(rd_kafka_broker_t *rkb, + rd_kafka_buf_t *rkbuf, + rd_kafka_buf_t *request) { + int32_t TopicArrayCnt; + int i; + const int log_decode_errors = LOG_ERR; + rd_kafka_topic_t *rkt = NULL; + int16_t ErrorCode = RD_KAFKA_RESP_ERR_NO_ERROR; + rd_kafkap_str_t ErrorStr = RD_KAFKAP_STR_INITIALIZER_EMPTY; + int32_t AcquisitionLockTimeoutMs = 0; + rd_kafkap_NodeEndpoints_t NodeEndpoints; + + rd_kafka_buf_read_throttle_time(rkbuf); + + rd_kafka_buf_read_i16(rkbuf, &ErrorCode); + rd_kafka_buf_read_str(rkbuf, &ErrorStr); + + if(ErrorCode) { + rd_rkb_log(rkb, LOG_ERR, "SHAREFETCH", + "ShareFetch response error %d: %.*s", + ErrorCode, + RD_KAFKAP_STR_PR(&ErrorStr)); + rd_kafkap_str_destroy(&ErrorStr); + return ErrorCode; + } + + rd_kafka_buf_read_i32(rkbuf, &AcquisitionLockTimeoutMs); + + rd_kafka_buf_read_arraycnt(rkbuf, &TopicArrayCnt, RD_KAFKAP_TOPICS_MAX); + /* TODO KIP-932: Check if required. + Verify that TopicArrayCnt seems to be in line with remaining size */ + // rd_kafka_buf_check_len(rkbuf, + // TopicArrayCnt * (3 /*topic min size*/ + + // 4 /*PartitionArrayCnt*/ + 4 + + // 2 + 8 + 4 /*inner header*/)); + + for (i = 0; i < TopicArrayCnt; i++) { + rd_kafkap_str_t topic = RD_ZERO_INIT; + rd_kafka_Uuid_t topic_id = RD_KAFKA_UUID_ZERO; + int32_t PartitionArrayCnt; + int j; + + rd_kafka_buf_read_uuid(rkbuf, &topic_id); + rkt = rd_kafka_topic_find_by_topic_id(rkb->rkb_rk, + topic_id); + if (rkt) + topic = *rkt->rkt_topic; + + rd_kafka_buf_read_arraycnt(rkbuf, &PartitionArrayCnt, + RD_KAFKAP_PARTITIONS_MAX); + + for (j = 0; j < PartitionArrayCnt; j++) { + if (rd_kafka_share_fetch_reply_handle_partition( + rkb, &topic, rkt, rkbuf, request)) + goto err_parse; + } + + if (rkt) { + rd_kafka_topic_destroy0(rkt); + rkt = NULL; + } + /* Topic Tags */ + rd_kafka_buf_skip_tags(rkbuf); + } + + rd_kafka_buf_read_NodeEndpoints(rkbuf, &NodeEndpoints); + + /* Top level tags */ + rd_kafka_buf_skip_tags(rkbuf); + + + if (rd_kafka_buf_read_remain(rkbuf) != 0) { + rd_kafka_buf_parse_fail(rkbuf, + "Remaining data after message set " + "parse: %" PRIusz " bytes", + rd_kafka_buf_read_remain(rkbuf)); + RD_NOTREACHED(); + } + +done: + if (rkt) + rd_kafka_topic_destroy0(rkt); + return RD_KAFKA_RESP_ERR_NO_ERROR; + +err_parse: + if (rkt) + rd_kafka_topic_destroy0(rkt); + rd_rkb_dbg(rkb, MSG, "BADMSG", + "Bad message (Fetch v%d): " + "is broker.version.fallback incorrectly set?", + (int)request->rkbuf_reqhdr.ApiVersion); + return rkbuf->rkbuf_err; +} + + +/** + * @broker ShareFetchResponse handling. + * + * @locality broker thread (or any thread if err == __DESTROY). + */ +static void rd_kafka_broker_share_fetch_reply(rd_kafka_t *rk, + rd_kafka_broker_t *rkb, + rd_kafka_resp_err_t err, + rd_kafka_buf_t *reply, + rd_kafka_buf_t *request, + void *opaque) { + + rd_kafka_op_t *rko_orig = opaque; + + if (err == RD_KAFKA_RESP_ERR__DESTROY) + rd_kafka_op_reply(rko_orig, err); + return; /* Terminating */ + + rd_kafka_assert(rkb->rkb_rk, rkb->rkb_fetching > 0); + rkb->rkb_fetching = 0; + + /* Parse and handle the messages (unless the request errored) */ + if (!err && reply) + err = rd_kafka_share_fetch_reply_handle(rkb, reply, request); + + if (unlikely(err)) { + char tmp[128]; + + rd_rkb_dbg(rkb, MSG, "FETCH", "Fetch reply: %s", + rd_kafka_err2str(err)); + switch (err) { + case RD_KAFKA_RESP_ERR_UNKNOWN_TOPIC_OR_PART: + case RD_KAFKA_RESP_ERR_LEADER_NOT_AVAILABLE: + case RD_KAFKA_RESP_ERR_NOT_LEADER_FOR_PARTITION: + case RD_KAFKA_RESP_ERR_BROKER_NOT_AVAILABLE: + case RD_KAFKA_RESP_ERR_REPLICA_NOT_AVAILABLE: + case RD_KAFKA_RESP_ERR_UNKNOWN_TOPIC_ID: + /* Request metadata information update */ + rd_snprintf(tmp, sizeof(tmp), "FetchRequest failed: %s", + rd_kafka_err2str(err)); + rd_kafka_metadata_refresh_known_topics( + rkb->rkb_rk, NULL, rd_true /*force*/, tmp); + /* FALLTHRU */ + + case RD_KAFKA_RESP_ERR__TRANSPORT: + case RD_KAFKA_RESP_ERR_REQUEST_TIMED_OUT: + case RD_KAFKA_RESP_ERR__MSG_TIMED_OUT: + /* The fetch is already intervalled from + * consumer_serve() so dont retry. */ + break; + + default: + break; + } + + rd_kafka_broker_fetch_backoff(rkb, err); + /* FALLTHRU */ + } +} /** * @broker FetchResponse handling. @@ -959,6 +1241,216 @@ static rd_bool_t can_use_topic_ids(rd_kafka_broker_t *rkb) { return rd_true; } + +int rd_kafka_ShareFetchRequest( + rd_kafka_broker_t *rkb, + const rd_kafkap_str_t *group_id, + const rd_kafkap_str_t *member_id, + int32_t share_session_epoch, + int32_t wait_max_ms, + int32_t min_bytes, + int32_t max_bytes, + int32_t max_records, + int32_t batch_size, +// rd_kafka_toppar_t *toppars_to_send, +// int32_t toppars_to_send_cnt, + rd_kafka_toppar_t *forgotten_toppars, + int32_t forgotten_toppars_cnt, + rd_kafka_op_t *rko_orig, + rd_ts_t now) { + rd_kafka_toppar_t *rktp; + rd_kafka_buf_t *rkbuf; + int cnt = 0; + size_t of_TopicArrayCnt = 0; + int TopicArrayCnt = 0; + size_t of_PartitionArrayCnt = 0; + int PartitionArrayCnt = 0; + rd_kafka_topic_t *rkt_last = NULL; + int16_t ApiVersion = 0; + size_t rkbuf_size = 0; + + /* + * Only sending 1 aknowledgement for each partition. StartOffset + LastOffset + AcknowledgementType (ACCEPT for now). + * TODO KIP-932: Change this to accommodate explicit acknowledgements. + */ + size_t acknowledgement_size = 8 + 8 + 1; + + /* Calculate buffer size */ + if (group_id) + rkbuf_size += RD_KAFKAP_STR_SIZE(group_id); + if (member_id) + rkbuf_size += RD_KAFKAP_STR_SIZE(member_id); + /* ShareSessionEpoch + WaitMaxMs + MinBytes + MaxBytes + MaxRecords + BatchSize + TopicArrayCnt*/ + rkbuf_size += 4 + 4 + 4 + 4 + 4 + 4 + 4; + /* N x (topic id + partition id + acknowledgement) */ + rkbuf_size += (rkb->rkb_toppar_cnt * (32 + 4 + acknowledgement_size)); + if( forgotten_toppars_cnt > 0) { + /* M x (topic id + partition id) */ + rkbuf_size += (forgotten_toppars_cnt * (32 + 4)); + } + + ApiVersion = rd_kafka_broker_ApiVersion_supported(rkb, RD_KAFKAP_ShareFetch, + 1, 1, NULL); + + rkbuf = rd_kafka_buf_new_flexver_request(rkb, RD_KAFKAP_ShareFetch, 1, + rkbuf_size, + rd_true); + + if (rkb->rkb_features & RD_KAFKA_FEATURE_MSGVER2) + rd_kafka_buf_ApiVersion_set(rkbuf, ApiVersion, + RD_KAFKA_FEATURE_MSGVER2); + else if (rkb->rkb_features & RD_KAFKA_FEATURE_MSGVER1) + rd_kafka_buf_ApiVersion_set(rkbuf, ApiVersion, + RD_KAFKA_FEATURE_MSGVER1); + else if (rkb->rkb_features & RD_KAFKA_FEATURE_THROTTLETIME) + rd_kafka_buf_ApiVersion_set(rkbuf, ApiVersion, + RD_KAFKA_FEATURE_THROTTLETIME); + + /* GroupId */ + rd_kafka_buf_write_kstr(rkbuf, group_id); + + /* MemberId */ + rd_kafka_buf_write_kstr(rkbuf, member_id); + + /* ShareSessionEpoch */ + rd_kafka_buf_write_i32(rkbuf, share_session_epoch); + + /* WaitMaxMs */ + rd_kafka_buf_write_i32(rkbuf, wait_max_ms); + + /* MinBytes */ + rd_kafka_buf_write_i32(rkbuf, min_bytes); + + /* MaxBytes */ + rd_kafka_buf_write_i32(rkbuf, max_bytes); + + /* MaxRecords */ + rd_kafka_buf_write_i32(rkbuf, max_records); + + /* BatchSize */ + rd_kafka_buf_write_i32(rkbuf, batch_size); + + /* Write zero TopicArrayCnt but store pointer for later update */ + of_TopicArrayCnt = rd_kafka_buf_write_arraycnt_pos(rkbuf); + if (rkb->rkb_toppar_cnt > 0) { + TAILQ_FOREACH(rktp, &rkb->rkb_toppars, rktp_rkblink) { + + if (rkt_last != rktp->rktp_rkt) { + if (rkt_last != NULL) { + /* Update PartitionArrayCnt */ + rd_kafka_buf_finalize_arraycnt( + rkbuf, of_PartitionArrayCnt, + PartitionArrayCnt); + /* Topic tags */ + rd_kafka_buf_write_tags_empty(rkbuf); + } + + /* Topic ID */ + rd_kafka_buf_write_uuid( + rkbuf, &rktp->rktp_rkt->rkt_topic_id); + + TopicArrayCnt++; + rkt_last = rktp->rktp_rkt; + /* Partition count */ + of_PartitionArrayCnt = + rd_kafka_buf_write_arraycnt_pos(rkbuf); + PartitionArrayCnt = 0; + } + + PartitionArrayCnt++; + + /* Partition */ + rd_kafka_buf_write_i32(rkbuf, rktp->rktp_partition); + + /* AcknowledgementBatches */ + /* For now we only support ACCEPT */ + rd_kafka_buf_write_i32(rkbuf, 1); /* ArrayCnt = 1 */ + /* FirstOffset */ + rd_kafka_buf_write_i64(rkbuf, rktp->rktp_share_acknowledge.first_offset); + /* LastOffset */ + rd_kafka_buf_write_i64(rkbuf, rktp->rktp_share_acknowledge.last_offset); + /* AcknowledgementType */ + rd_kafka_buf_write_i32(rkbuf, 1); /* ArrayCnt = 1 */ + rd_kafka_buf_write_i8(rkbuf, 1); /* ACCEPT */ + /* Acknowledgement tags */ + rd_kafka_buf_write_tags_empty(rkbuf); + + /* Partition tags */ + rd_kafka_buf_write_tags_empty(rkbuf); + + rd_rkb_dbg(rkb, FETCH, "SHAREFETCH", + "Share Fetch topic %.*s [%" PRId32 "]", + RD_KAFKAP_STR_PR(rktp->rktp_rkt->rkt_topic), + rktp->rktp_partition); + + cnt++; + } + } + + if (rkt_last != NULL) { + /* Update last topic's PartitionArrayCnt */ + rd_kafka_buf_finalize_arraycnt(rkbuf, of_PartitionArrayCnt, + PartitionArrayCnt); + /* Topic tags */ + rd_kafka_buf_write_tags_empty(rkbuf); + } + + /* Update TopicArrayCnt */ + rd_kafka_buf_finalize_arraycnt(rkbuf, of_TopicArrayCnt, TopicArrayCnt); + + /* ForgottenToppars */ + rd_kafka_buf_write_arraycnt(rkbuf, 0); + + if (forgotten_toppars_cnt > 0) { + /* TODO KIP-932: Implement forgotten toppars handling */ + } + + /* Consider Fetch requests blocking if fetch.wait.max.ms >= 1s */ + if (rkb->rkb_rk->rk_conf.fetch_wait_max_ms >= 1000) + rkbuf->rkbuf_flags |= RD_KAFKA_OP_F_BLOCKING; + + /* Use configured timeout */ + rd_kafka_buf_set_timeout(rkbuf, + rkb->rkb_rk->rk_conf.socket_timeout_ms + + rkb->rkb_rk->rk_conf.fetch_wait_max_ms, + now); + + rkb->rkb_fetching = 1; + rd_kafka_broker_buf_enq1(rkb, rkbuf, rd_kafka_broker_share_fetch_reply, rko_orig); + + return cnt; +} + +void rd_kafka_broker_share_fetch(rd_kafka_broker_t *rkb, rd_kafka_op_t *rko_orig, rd_ts_t now) { + + rd_kafka_cgrp_t *rkcg = rkb->rkb_rk->rk_cgrp; + + /* TODO KIP-932: Check if needed while closing the consumer.*/ + rd_assert(rkb->rkb_rk->rk_cgrp); + + if(!rkcg->rkcg_member_id) { + rd_kafka_dbg(rkb->rkb_rk, FETCH, "SHAREFETCH", + "Share Fetch requested without member_id"); + rd_kafka_op_reply(rko_orig, RD_KAFKA_RESP_ERR__INVALID_ARG); + return; + } + + rd_kafka_ShareFetchRequest( + rkb, + rkcg->rkcg_group_id, /* group_id */ + rkcg->rkcg_member_id, /* member_id */ + rkb->rkb_share_fetch_session.epoch, /* share_session_epoch */ + rkb->rkb_rk->rk_conf.fetch_wait_max_ms, + rkb->rkb_rk->rk_conf.fetch_min_bytes, + rkb->rkb_rk->rk_conf.fetch_max_bytes, + 500, + 500, + NULL, /* forgotten toppars */ + 0, /* forgotten toppars cnt */ + rko_orig, /* rko */ + now); +} + /** * @brief Build and send a Fetch request message for all underflowed toppars * for a specific broker. diff --git a/src/rdkafka_partition.c b/src/rdkafka_partition.c index ce4f01b467..f03f261c93 100644 --- a/src/rdkafka_partition.c +++ b/src/rdkafka_partition.c @@ -294,6 +294,9 @@ rd_kafka_toppar_t *rd_kafka_toppar_new0(rd_kafka_topic_t *rkt, rkt->rkt_topic->str, rktp->rktp_partition, rktp, &rktp->rktp_refcnt, func, line); + rktp->rktp_share_acknowledge.first_offset = RD_KAFKA_OFFSET_INVALID; + rktp->rktp_share_acknowledge.last_offset = RD_KAFKA_OFFSET_INVALID; + return rd_kafka_toppar_keep(rktp); } diff --git a/src/rdkafka_partition.h b/src/rdkafka_partition.h index 97a704f03b..0665c69c4e 100644 --- a/src/rdkafka_partition.h +++ b/src/rdkafka_partition.h @@ -478,6 +478,17 @@ struct rd_kafka_toppar_s { /* rd_kafka_toppar_t */ rd_atomic64_t rx_ver_drops; /**< Consumer: outdated message * drops. */ } rktp_c; + + /* + * TODO KIP-932: Change this according to need. Currently very basic. Not even handling GAP. + * Sends ACCEPT blindly with implicit acknowledgement. + */ + + struct { + int64_t first_offset; + int64_t last_offset; + int16_t delivery_count; + } rktp_share_acknowledge; }; /** diff --git a/src/rdkafka_protocol.h b/src/rdkafka_protocol.h index 28c246dd9c..6064743561 100644 --- a/src/rdkafka_protocol.h +++ b/src/rdkafka_protocol.h @@ -121,8 +121,10 @@ #define RD_KAFKAP_PushTelemetry 72 #define RD_KAFKAP_AssignReplicasToDirs 73 #define RD_KAFKAP_ShareGroupHeartbeat 76 - -#define RD_KAFKAP__NUM 74 +#define RD_KAFKAP_ShareGroupDescribe 77 +#define RD_KAFKAP_ShareFetch 78 +#define RD_KAFKAP_ShareAcknowledge 79 +#define RD_KAFKAP__NUM 80 #define RD_KAFKAP_RPC_VERSION_MAX INT16_MAX From 5f6e43feed1769d9abf4eea92b293509d291ccf1 Mon Sep 17 00:00:00 2001 From: Pranav Rathi <4427674+pranavrth@users.noreply.github.com> Date: Thu, 23 Oct 2025 18:57:10 +0530 Subject: [PATCH 12/37] Something working with lots of Segfault --- examples/consumer.c | 2 +- src/rdkafka_broker.c | 42 ++++------------- src/rdkafka_fetcher.c | 103 ++++++++++++++++++++++++++++++++++-------- src/rdkafka_fetcher.h | 2 + src/rdkafka_proto.h | 1 + src/rdkafka_request.c | 2 + 6 files changed, 98 insertions(+), 54 deletions(-) diff --git a/examples/consumer.c b/examples/consumer.c index 3a4ad4f324..a60e1d0c1d 100644 --- a/examples/consumer.c +++ b/examples/consumer.c @@ -165,7 +165,7 @@ int main(int argc, char **argv) { } - if (rd_kafka_conf_set(conf, "debug", "cgrp", errstr, sizeof(errstr)) != + if (rd_kafka_conf_set(conf, "debug", "all", errstr, sizeof(errstr)) != RD_KAFKA_CONF_OK) { fprintf(stderr, "%s\n", errstr); rd_kafka_conf_destroy(conf); diff --git a/src/rdkafka_broker.c b/src/rdkafka_broker.c index 94b1f9abfa..46f97f790e 100644 --- a/src/rdkafka_broker.c +++ b/src/rdkafka_broker.c @@ -3512,15 +3512,15 @@ rd_kafka_broker_op_serve(rd_kafka_broker_t *rkb, rd_kafka_op_t *rko) { break; } - if(rkb->rkb_state != RD_KAFKA_BROKER_STATE_UP) { - rd_kafka_dbg(rkb->rkb_rk, BROKER, "SHAREFETCH", - "Connection not up: Sending connect in progress as reply"); - rd_kafka_op_reply(rko, RD_KAFKA_RESP_ERR__STATE); - break; - } + // if(rkb->rkb_state != RD_KAFKA_BROKER_STATE_UP) { + // rd_kafka_dbg(rkb->rkb_rk, BROKER, "SHAREFETCH", + // "Connection not up: Sending connect in progress as reply"); + // rd_kafka_op_reply(rko, RD_KAFKA_RESP_ERR__STATE); + // break; + // } + + rd_kafka_broker_share_fetch(rkb, rko, rd_clock()); - rkb->rkb_fetching = rd_true; - rko = NULL; /* the rko is reused for the reply */ break; @@ -4345,16 +4345,10 @@ static void rd_kafka_broker_share_consumer_serve(rd_kafka_broker_t *rkb, while (!rd_kafka_broker_terminating(rkb) && rkb->rkb_state == initial_state && abs_timeout > (now = rd_clock())) { - rd_ts_t min_backoff; + rd_ts_t min_backoff = abs_timeout; rd_kafka_broker_unlock(rkb); - /* Serve toppars */ - min_backoff = rd_kafka_broker_consumer_toppars_serve(rkb); - if (rkb->rkb_ts_fetch_backoff > now && - rkb->rkb_ts_fetch_backoff < min_backoff) - min_backoff = rkb->rkb_ts_fetch_backoff; - if (rkb->rkb_toppar_cnt > 0 && rkb->rkb_share_fetch_session.epoch >= 0 && rkb->rkb_state != RD_KAFKA_BROKER_STATE_UP) { @@ -4363,24 +4357,6 @@ static void rd_kafka_broker_share_consumer_serve(rd_kafka_broker_t *rkb, rkb->rkb_persistconn.internal++; } - /* Send Fetch request message for all underflowed toppars - * if the connection is up and there are no outstanding - * fetch requests for this connection. */ - if (!rkb->rkb_fetching && - rkb->rkb_state == RD_KAFKA_BROKER_STATE_UP) { - if (min_backoff < now) { - rd_kafka_broker_fetch_toppars(rkb, now); - min_backoff = abs_timeout; - } else if (min_backoff < RD_TS_MAX) - rd_rkb_dbg(rkb, FETCH, "FETCH", - "Fetch backoff for %" PRId64 "ms", - (min_backoff - now) / 1000); - } else { - /* Nothing needs to be done, next wakeup - * is from ops, state change, IO, or this timeout */ - min_backoff = abs_timeout; - } - /* Check and move retry buffers */ if (unlikely(rd_atomic32_get(&rkb->rkb_retrybufs.rkbq_cnt) > 0)) rd_kafka_broker_retry_bufs_move(rkb, &min_backoff); diff --git a/src/rdkafka_fetcher.c b/src/rdkafka_fetcher.c index 4bddf9cb66..705c82c128 100644 --- a/src/rdkafka_fetcher.c +++ b/src/rdkafka_fetcher.c @@ -914,7 +914,7 @@ static rd_kafka_resp_err_t rd_kafka_share_fetch_reply_handle_partition( rd_kafka_buf_read_i32(rkbuf, &PartitionId); // Partition - rd_kafka_buf_read_i32(rkbuf, &PartitionFetchErrorCode); // PartitionFetchError + rd_kafka_buf_read_i16(rkbuf, &PartitionFetchErrorCode); // PartitionFetchError rd_kafka_buf_read_str(rkbuf, &PartitionFetchErrorStr); // ErrorString rd_kafka_buf_read_i16(rkbuf, &AcknowledgementErrorCode); // AcknowledgementError rd_kafka_buf_read_str(rkbuf, &AcknowledgementErrorStr); // AcknowledgementErrorString @@ -1001,7 +1001,7 @@ static rd_kafka_resp_err_t rd_kafka_share_fetch_reply_handle_partition( if (likely(rktp != NULL)) rd_kafka_toppar_destroy(rktp); /*from get()*/ - return RD_KAFKA_RESP_ERR_NO_ERROR; + return err; } @@ -1024,6 +1024,8 @@ rd_kafka_share_fetch_reply_handle(rd_kafka_broker_t *rkb, rd_kafkap_str_t ErrorStr = RD_KAFKAP_STR_INITIALIZER_EMPTY; int32_t AcquisitionLockTimeoutMs = 0; rd_kafkap_NodeEndpoints_t NodeEndpoints; + NodeEndpoints.NodeEndpoints = NULL; + NodeEndpoints.NodeEndpointCnt = 0; rd_kafka_buf_read_throttle_time(rkbuf); @@ -1032,7 +1034,7 @@ rd_kafka_share_fetch_reply_handle(rd_kafka_broker_t *rkb, if(ErrorCode) { rd_rkb_log(rkb, LOG_ERR, "SHAREFETCH", - "ShareFetch response error %d: %.*s", + "ShareFetch response error %d: '%.*s'", ErrorCode, RD_KAFKAP_STR_PR(&ErrorStr)); rd_kafkap_str_destroy(&ErrorStr); @@ -1092,7 +1094,7 @@ rd_kafka_share_fetch_reply_handle(rd_kafka_broker_t *rkb, RD_NOTREACHED(); } -done: +// done: if (rkt) rd_kafka_topic_destroy0(rkt); return RD_KAFKA_RESP_ERR_NO_ERROR; @@ -1122,17 +1124,23 @@ static void rd_kafka_broker_share_fetch_reply(rd_kafka_t *rk, rd_kafka_op_t *rko_orig = opaque; - if (err == RD_KAFKA_RESP_ERR__DESTROY) + if (err == RD_KAFKA_RESP_ERR__DESTROY) { rd_kafka_op_reply(rko_orig, err); return; /* Terminating */ + } rd_kafka_assert(rkb->rkb_rk, rkb->rkb_fetching > 0); - rkb->rkb_fetching = 0; /* Parse and handle the messages (unless the request errored) */ if (!err && reply) err = rd_kafka_share_fetch_reply_handle(rkb, reply, request); + + if (rko_orig) + rd_kafka_op_reply(rko_orig, err); + + rkb->rkb_fetching = 0; + if (unlikely(err)) { char tmp[128]; @@ -1242,7 +1250,7 @@ static rd_bool_t can_use_topic_ids(rd_kafka_broker_t *rkb) { } -int rd_kafka_ShareFetchRequest( +void rd_kafka_ShareFetchRequest( rd_kafka_broker_t *rkb, const rd_kafkap_str_t *group_id, const rd_kafkap_str_t *member_id, @@ -1268,6 +1276,9 @@ int rd_kafka_ShareFetchRequest( rd_kafka_topic_t *rkt_last = NULL; int16_t ApiVersion = 0; size_t rkbuf_size = 0; + rd_bool_t has_acknowledgements = rd_false; + rd_bool_t has_forgotten_toppars = forgotten_toppars_cnt > 0 ? rd_true : rd_false; + rd_bool_t is_fetching_messages = max_records > 0 ? rd_true : rd_false; /* * Only sending 1 aknowledgement for each partition. StartOffset + LastOffset + AcknowledgementType (ACCEPT for now). @@ -1312,6 +1323,9 @@ int rd_kafka_ShareFetchRequest( /* MemberId */ rd_kafka_buf_write_kstr(rkbuf, member_id); + printf(" --------------------------------------- rd_kafka_ShareFetchRequest: member_id=%.*s\n", + RD_KAFKAP_STR_PR(member_id)); + /* ShareSessionEpoch */ rd_kafka_buf_write_i32(rkbuf, share_session_epoch); @@ -1335,6 +1349,12 @@ int rd_kafka_ShareFetchRequest( if (rkb->rkb_toppar_cnt > 0) { TAILQ_FOREACH(rktp, &rkb->rkb_toppars, rktp_rkblink) { + rd_kafka_toppar_lock(rktp); + + if(!(rktp->rktp_flags & RD_KAFKA_TOPPAR_F_ON_CGRP)) { + rd_kafka_toppar_unlock(rktp); + continue; + } if (rkt_last != rktp->rktp_rkt) { if (rkt_last != NULL) { /* Update PartitionArrayCnt */ @@ -1357,23 +1377,36 @@ int rd_kafka_ShareFetchRequest( PartitionArrayCnt = 0; } + rd_kafka_toppar_unlock(rktp); + PartitionArrayCnt++; /* Partition */ rd_kafka_buf_write_i32(rkbuf, rktp->rktp_partition); + printf(" ------------------------------------------------------------------ AcknowledgementBatches for topic %.*s [%" PRId32 "] : first_offset=%" PRId64 ", last_offset=%" PRId64 "\n", + RD_KAFKAP_STR_PR(rktp->rktp_rkt->rkt_topic), + rktp->rktp_partition, + rktp->rktp_share_acknowledge.first_offset, + rktp->rktp_share_acknowledge.last_offset); /* AcknowledgementBatches */ - /* For now we only support ACCEPT */ - rd_kafka_buf_write_i32(rkbuf, 1); /* ArrayCnt = 1 */ - /* FirstOffset */ - rd_kafka_buf_write_i64(rkbuf, rktp->rktp_share_acknowledge.first_offset); - /* LastOffset */ - rd_kafka_buf_write_i64(rkbuf, rktp->rktp_share_acknowledge.last_offset); - /* AcknowledgementType */ - rd_kafka_buf_write_i32(rkbuf, 1); /* ArrayCnt = 1 */ - rd_kafka_buf_write_i8(rkbuf, 1); /* ACCEPT */ - /* Acknowledgement tags */ - rd_kafka_buf_write_tags_empty(rkbuf); + if (rktp->rktp_share_acknowledge.first_offset >= 0) { + /* For now we only support ACCEPT */ + rd_kafka_buf_write_i32(rkbuf, 1); /* ArrayCnt = 1 */ + /* FirstOffset */ + rd_kafka_buf_write_i64(rkbuf, rktp->rktp_share_acknowledge.first_offset); + /* LastOffset */ + rd_kafka_buf_write_i64(rkbuf, rktp->rktp_share_acknowledge.last_offset); + /* AcknowledgementType */ + rd_kafka_buf_write_i32(rkbuf, 1); /* ArrayCnt = 1 */ + rd_kafka_buf_write_i8(rkbuf, 1); /* ACCEPT */ + /* Acknowledgement tags */ + rd_kafka_buf_write_tags_empty(rkbuf); + has_acknowledgements = rd_true; + } else { + /* No acknowledgements */ + rd_kafka_buf_write_arraycnt(rkbuf, 0); + } /* Partition tags */ rd_kafka_buf_write_tags_empty(rkbuf); @@ -1387,6 +1420,10 @@ int rd_kafka_ShareFetchRequest( } } + rd_kafka_dbg(rkb->rkb_rk, FETCH, "SHAREFETCH", + "Share Fetch Request with %d toppars on %d topics", + cnt, TopicArrayCnt); + if (rkt_last != NULL) { /* Update last topic's PartitionArrayCnt */ rd_kafka_buf_finalize_arraycnt(rkbuf, of_PartitionArrayCnt, @@ -1395,6 +1432,21 @@ int rd_kafka_ShareFetchRequest( rd_kafka_buf_write_tags_empty(rkbuf); } + if(has_acknowledgements || has_forgotten_toppars || is_fetching_messages) { + rd_kafka_dbg(rkb->rkb_rk, FETCH, "SHAREFETCH", + "Share Fetch Request sent with%s%s%s", + has_acknowledgements ? " acknowledgements," : "", + has_forgotten_toppars ? " forgotten toppars," : "", + is_fetching_messages ? " fetching messages" : ""); + } else { + rd_kafka_buf_destroy(rkbuf); + rd_kafka_dbg(rkb->rkb_rk, FETCH, "SHAREFETCH", + "Share Fetch Request not sent since there are no " + "acknowledgements, forgotten toppars or messages to fetch"); + rd_kafka_op_reply(rko_orig, RD_KAFKA_RESP_ERR__NOOP); + return; + } + /* Update TopicArrayCnt */ rd_kafka_buf_finalize_arraycnt(rkbuf, of_TopicArrayCnt, TopicArrayCnt); @@ -1416,14 +1468,21 @@ int rd_kafka_ShareFetchRequest( now); rkb->rkb_fetching = 1; + rd_kafka_dbg(rkb->rkb_rk, MSG, "FETCH", + "Issuing ShareFetch request (max wait %dms, min %d bytes, " + "max %d bytes, max %d records) with %d toppars to broker %s " + "(id %" PRId32 ")", + wait_max_ms, min_bytes, max_bytes, max_records, cnt, + rkb->rkb_name, rkb->rkb_nodeid); rd_kafka_broker_buf_enq1(rkb, rkbuf, rd_kafka_broker_share_fetch_reply, rko_orig); - return cnt; + return; } void rd_kafka_broker_share_fetch(rd_kafka_broker_t *rkb, rd_kafka_op_t *rko_orig, rd_ts_t now) { rd_kafka_cgrp_t *rkcg = rkb->rkb_rk->rk_cgrp; + int32_t max_records = 0; /* TODO KIP-932: Check if needed while closing the consumer.*/ rd_assert(rkb->rkb_rk->rk_cgrp); @@ -1435,6 +1494,10 @@ void rd_kafka_broker_share_fetch(rd_kafka_broker_t *rkb, rd_kafka_op_t *rko_orig return; } + if(rko_orig->rko_u.share_fetch.should_fetch) { + max_records = 500; + } + rd_kafka_ShareFetchRequest( rkb, rkcg->rkcg_group_id, /* group_id */ @@ -1443,7 +1506,7 @@ void rd_kafka_broker_share_fetch(rd_kafka_broker_t *rkb, rd_kafka_op_t *rko_orig rkb->rkb_rk->rk_conf.fetch_wait_max_ms, rkb->rkb_rk->rk_conf.fetch_min_bytes, rkb->rkb_rk->rk_conf.fetch_max_bytes, - 500, + max_records, 500, NULL, /* forgotten toppars */ 0, /* forgotten toppars cnt */ diff --git a/src/rdkafka_fetcher.h b/src/rdkafka_fetcher.h index e304f1369f..c3b6b65c34 100644 --- a/src/rdkafka_fetcher.h +++ b/src/rdkafka_fetcher.h @@ -40,5 +40,7 @@ rd_ts_t rd_kafka_toppar_fetch_decide(rd_kafka_toppar_t *rktp, rd_kafka_broker_t *rkb, int force_remove); +void rd_kafka_broker_share_fetch(rd_kafka_broker_t *rkb, rd_kafka_op_t *rko_orig, rd_ts_t now); + #endif /* _RDKAFKA_FETCHER_H_ */ diff --git a/src/rdkafka_proto.h b/src/rdkafka_proto.h index b5b52ac146..8d8362675c 100644 --- a/src/rdkafka_proto.h +++ b/src/rdkafka_proto.h @@ -176,6 +176,7 @@ static RD_UNUSED const char *rd_kafka_ApiKey2str(int16_t ApiKey) { [RD_KAFKAP_GetTelemetrySubscriptions] = "GetTelemetrySubscriptions", [RD_KAFKAP_PushTelemetry] = "PushTelemetry", [RD_KAFKAP_ShareGroupHeartbeat] = "ShareGroupHeartbeat", + [RD_KAFKAP_ShareFetch] = "ShareFetch", }; static RD_TLS char ret[64]; diff --git a/src/rdkafka_request.c b/src/rdkafka_request.c index 94c01e5964..2886ab7625 100644 --- a/src/rdkafka_request.c +++ b/src/rdkafka_request.c @@ -562,6 +562,8 @@ int rd_kafka_buf_read_NodeEndpoints(rd_kafka_buf_t *rkbuf, int32_t i; rd_kafka_buf_read_arraycnt(rkbuf, &NodeEndpoints->NodeEndpointCnt, RD_KAFKAP_BROKERS_MAX); + printf(" --------------------------------------- rd_kafka_buf_read_NodeEndpoints: NodeEndpointCnt=%d\n", + NodeEndpoints->NodeEndpointCnt); rd_dassert(!NodeEndpoints->NodeEndpoints); NodeEndpoints->NodeEndpoints = rd_calloc(NodeEndpoints->NodeEndpointCnt, From a96e267f687cb1b5e15d4948cae5b56512674c22 Mon Sep 17 00:00:00 2001 From: Pranav Rathi <4427674+pranavrth@users.noreply.github.com> Date: Thu, 23 Oct 2025 19:40:30 +0530 Subject: [PATCH 13/37] Fixed Acknowledgement and acknowledgement type array count --- src/rdkafka_broker.c | 24 ++++++++++-------------- src/rdkafka_fetcher.c | 4 ++-- 2 files changed, 12 insertions(+), 16 deletions(-) diff --git a/src/rdkafka_broker.c b/src/rdkafka_broker.c index 46f97f790e..e5c672968b 100644 --- a/src/rdkafka_broker.c +++ b/src/rdkafka_broker.c @@ -3493,24 +3493,22 @@ rd_kafka_broker_op_serve(rd_kafka_broker_t *rkb, rd_kafka_op_t *rko) { "Ignoring SHARE_FETCH op: " "instance or broker is terminating"); rd_kafka_op_reply(rko, RD_KAFKA_RESP_ERR__DESTROY); - break; - } - - if(rkb->rkb_fetching) { + } else if(rkb->rkb_fetching) { rd_kafka_dbg(rkb->rkb_rk, BROKER, "SHAREFETCH", "Ignoring SHARE_FETCH op: " "already fetching"); rd_kafka_op_reply(rko, RD_KAFKA_RESP_ERR__PREV_IN_PROGRESS); - break; + } else if(rko->rko_u.share_fetch.should_fetch) { + rd_kafka_broker_share_fetch(rkb, rko, rd_clock()); } - if (!rko->rko_u.share_fetch.should_fetch) { - rd_kafka_dbg(rkb->rkb_rk, BROKER, "SHAREFETCH", - "Ignoring SHARE_FETCH op: " - "should_fetch is false"); - rd_kafka_op_reply(rko, RD_KAFKA_RESP_ERR__NOOP); - break; - } + // if (!rko->rko_u.share_fetch.should_fetch) { + // rd_kafka_dbg(rkb->rkb_rk, BROKER, "SHAREFETCH", + // "Ignoring SHARE_FETCH op: " + // "should_fetch is false"); + // rd_kafka_op_reply(rko, RD_KAFKA_RESP_ERR__NOOP); + // break; + // } // if(rkb->rkb_state != RD_KAFKA_BROKER_STATE_UP) { // rd_kafka_dbg(rkb->rkb_rk, BROKER, "SHAREFETCH", @@ -3519,8 +3517,6 @@ rd_kafka_broker_op_serve(rd_kafka_broker_t *rkb, rd_kafka_op_t *rko) { // break; // } - rd_kafka_broker_share_fetch(rkb, rko, rd_clock()); - rko = NULL; /* the rko is reused for the reply */ break; diff --git a/src/rdkafka_fetcher.c b/src/rdkafka_fetcher.c index 705c82c128..585b6ab4ce 100644 --- a/src/rdkafka_fetcher.c +++ b/src/rdkafka_fetcher.c @@ -1392,13 +1392,13 @@ void rd_kafka_ShareFetchRequest( /* AcknowledgementBatches */ if (rktp->rktp_share_acknowledge.first_offset >= 0) { /* For now we only support ACCEPT */ - rd_kafka_buf_write_i32(rkbuf, 1); /* ArrayCnt = 1 */ + rd_kafka_buf_write_arraycnt(rkbuf, 1); /* ArrayCnt = 1 */ /* FirstOffset */ rd_kafka_buf_write_i64(rkbuf, rktp->rktp_share_acknowledge.first_offset); /* LastOffset */ rd_kafka_buf_write_i64(rkbuf, rktp->rktp_share_acknowledge.last_offset); /* AcknowledgementType */ - rd_kafka_buf_write_i32(rkbuf, 1); /* ArrayCnt = 1 */ + rd_kafka_buf_write_arraycnt(rkbuf, 1); /* ArrayCnt = 1 */ rd_kafka_buf_write_i8(rkbuf, 1); /* ACCEPT */ /* Acknowledgement tags */ rd_kafka_buf_write_tags_empty(rkbuf); From 7cbcb9dc1d9d3dbc7b00bb41d671154292fd90dc Mon Sep 17 00:00:00 2001 From: Pranav Rathi <4427674+pranavrth@users.noreply.github.com> Date: Thu, 23 Oct 2025 19:54:58 +0530 Subject: [PATCH 14/37] Fixed incrementing epoch. Fixed Uuid url encoding issue temporarily. Should be fixed on the broker side. --- src/rdkafka.c | 59 +++++++++++++++++++++++++++---------------- src/rdkafka_fetcher.c | 1 + 2 files changed, 38 insertions(+), 22 deletions(-) diff --git a/src/rdkafka.c b/src/rdkafka.c index 6d0886a70b..ef84e1e560 100644 --- a/src/rdkafka.c +++ b/src/rdkafka.c @@ -5861,29 +5861,44 @@ char *rd_kafka_Uuid_str(const rd_kafka_Uuid_t *uuid) { } const char *rd_kafka_Uuid_base64str(const rd_kafka_Uuid_t *uuid) { - if (*uuid->base64str) - return uuid->base64str; - - rd_chariov_t in_base64; - char *out_base64_str; - char *uuid_bytes; - uint64_t input_uuid[2]; - - input_uuid[0] = htobe64(uuid->most_significant_bits); - input_uuid[1] = htobe64(uuid->least_significant_bits); - uuid_bytes = (char *)input_uuid; - in_base64.ptr = uuid_bytes; - in_base64.size = sizeof(uuid->most_significant_bits) + - sizeof(uuid->least_significant_bits); - - out_base64_str = rd_base64_encode_str(&in_base64); - if (!out_base64_str) - return NULL; - - rd_strlcpy((char *)uuid->base64str, out_base64_str, - 23 /* Removing extra ('=') padding */); - rd_free(out_base64_str); + if (*uuid->base64str) return uuid->base64str; + + rd_chariov_t in_base64; + char *out_base64_str; + char *uuid_bytes; + uint64_t input_uuid[2]; + + input_uuid[0] = htobe64(uuid->most_significant_bits); + input_uuid[1] = htobe64(uuid->least_significant_bits); + uuid_bytes = (char *)input_uuid; + in_base64.ptr = uuid_bytes; + in_base64.size = sizeof(uuid->most_significant_bits) + + sizeof(uuid->least_significant_bits); + + // Standard Base64 encode + out_base64_str = rd_base64_encode_str(&in_base64); + if (!out_base64_str) + return NULL; + + // Convert to URL-safe Base64 + for (char *p = out_base64_str; *p; p++) { + if (*p == '+') + *p = '-'; + else if (*p == '/') + *p = '_'; + } + + // Strip '=' padding (Kafka’s Base64 UUIDs are 22 chars) + size_t len = strlen(out_base64_str); + while (len > 0 && out_base64_str[len - 1] == '=') { + out_base64_str[--len] = '\0'; + } + + rd_strlcpy((char *)uuid->base64str, out_base64_str, sizeof(uuid->base64str)); + rd_free(out_base64_str); + + return uuid->base64str; } unsigned int rd_kafka_Uuid_hash(const rd_kafka_Uuid_t *uuid) { diff --git a/src/rdkafka_fetcher.c b/src/rdkafka_fetcher.c index 585b6ab4ce..5e0d61b4f8 100644 --- a/src/rdkafka_fetcher.c +++ b/src/rdkafka_fetcher.c @@ -1123,6 +1123,7 @@ static void rd_kafka_broker_share_fetch_reply(rd_kafka_t *rk, void *opaque) { rd_kafka_op_t *rko_orig = opaque; + rkb->rkb_share_fetch_session.epoch++; if (err == RD_KAFKA_RESP_ERR__DESTROY) { rd_kafka_op_reply(rko_orig, err); From 49a628b7f450e4edfea5e245ff605cb6ce856650 Mon Sep 17 00:00:00 2001 From: Pranav Rathi <4427674+pranavrth@users.noreply.github.com> Date: Tue, 28 Oct 2025 16:06:45 +0530 Subject: [PATCH 15/37] Added timing around poll, fixed a memory leak --- examples/consumer.c | 71 ++++++++++++++++++++++--------------- src/rdkafka_fetcher.c | 4 +-- src/rdkafka_msgset_reader.c | 2 ++ 3 files changed, 47 insertions(+), 30 deletions(-) diff --git a/examples/consumer.c b/examples/consumer.c index a60e1d0c1d..70ac70a099 100644 --- a/examples/consumer.c +++ b/examples/consumer.c @@ -33,10 +33,15 @@ * (https://github.com/confluentinc/librdkafka) */ +#ifndef _POSIX_C_SOURCE +#define _POSIX_C_SOURCE 199309L +#endif + #include #include #include #include +#include /* Typical include path would be , but this program @@ -165,12 +170,12 @@ int main(int argc, char **argv) { } - if (rd_kafka_conf_set(conf, "debug", "all", errstr, sizeof(errstr)) != - RD_KAFKA_CONF_OK) { - fprintf(stderr, "%s\n", errstr); - rd_kafka_conf_destroy(conf); - return 1; - } + // if (rd_kafka_conf_set(conf, "debug", "all", errstr, sizeof(errstr)) != + // RD_KAFKA_CONF_OK) { + // fprintf(stderr, "%s\n", errstr); + // rd_kafka_conf_destroy(conf); + // return 1; + // } /* * Create consumer instance. @@ -246,8 +251,16 @@ int main(int argc, char **argv) { rd_kafka_error_t *error; // fprintf(stderr, "Calling consume_batch\n"); - error = rd_kafka_share_consume_batch(rk, 5000, rkmessages, - &rcvd_msgs); + struct timespec __t0, __t1; + if (clock_gettime(CLOCK_MONOTONIC, &__t0) != 0) + perror("clock_gettime"); + error = rd_kafka_share_consume_batch(rk, 500, rkmessages, &rcvd_msgs); + if (clock_gettime(CLOCK_MONOTONIC, &__t1) != 0) + perror("clock_gettime"); + double __elapsed_ms = + (__t1.tv_sec - __t0.tv_sec) * 1000.0 + (__t1.tv_nsec - __t0.tv_nsec) / 1e6; + fprintf(stdout, "%% rd_kafka_share_consume_batch() took %.3f ms\n", __elapsed_ms); + if (error) { fprintf(stderr, "%% Consume error: %s\n", rd_kafka_error_string(error)); @@ -266,26 +279,28 @@ int main(int argc, char **argv) { continue; } - /* Proper message. */ - printf("Message on %s [%" PRId32 "] at offset %" PRId64 - " (leader epoch %" PRId32 "):\n", - rd_kafka_topic_name(rkm->rkt), rkm->partition, - rkm->offset, rd_kafka_message_leader_epoch(rkm)); - - /* Print the message key. */ - if (rkm->key && is_printable(rkm->key, rkm->key_len)) - printf(" Key: %.*s\n", (int)rkm->key_len, - (const char *)rkm->key); - else if (rkm->key) - printf(" Key: (%d bytes)\n", (int)rkm->key_len); - - /* Print the message value/payload. */ - if (rkm->payload && - is_printable(rkm->payload, rkm->len)) - printf(" Value: %.*s\n", (int)rkm->len, - (const char *)rkm->payload); - else if (rkm->payload) - printf(" Value: (%d bytes)\n", (int)rkm->len); + if((int)rcvd_msgs < 100) { + /* Proper message. */ + printf("Message on %s [%" PRId32 "] at offset %" PRId64 + " (leader epoch %" PRId32 "):\n", + rd_kafka_topic_name(rkm->rkt), rkm->partition, + rkm->offset, rd_kafka_message_leader_epoch(rkm)); + + /* Print the message key. */ + if (rkm->key && is_printable(rkm->key, rkm->key_len)) + printf(" Key: %.*s\n", (int)rkm->key_len, + (const char *)rkm->key); + else if (rkm->key) + printf(" Key: (%d bytes)\n", (int)rkm->key_len); + + /* Print the message value/payload. */ + if (rkm->payload && + is_printable(rkm->payload, rkm->len)) + printf(" Value: %.*s\n", (int)rkm->len, + (const char *)rkm->payload); + else if (rkm->payload) + printf(" Value: (%d bytes)\n", (int)rkm->len); + } rd_kafka_message_destroy(rkm); } diff --git a/src/rdkafka_fetcher.c b/src/rdkafka_fetcher.c index 5e0d61b4f8..9a1e77fae0 100644 --- a/src/rdkafka_fetcher.c +++ b/src/rdkafka_fetcher.c @@ -1095,8 +1095,8 @@ rd_kafka_share_fetch_reply_handle(rd_kafka_broker_t *rkb, } // done: - if (rkt) - rd_kafka_topic_destroy0(rkt); + RD_IF_FREE(NodeEndpoints.NodeEndpoints, rd_free); + RD_IF_FREE(rkt, rd_kafka_topic_destroy0); return RD_KAFKA_RESP_ERR_NO_ERROR; err_parse: diff --git a/src/rdkafka_msgset_reader.c b/src/rdkafka_msgset_reader.c index 451dd35442..da7bdbe108 100644 --- a/src/rdkafka_msgset_reader.c +++ b/src/rdkafka_msgset_reader.c @@ -1422,6 +1422,8 @@ rd_kafka_msgset_reader_run(rd_kafka_msgset_reader_t *msetr) { err = RD_KAFKA_RESP_ERR_NO_ERROR; } + printf(" +++++++++++++++++++ Received %d messages\n", msetr->msetr_msgcnt); + rd_rkb_dbg(msetr->msetr_rkb, MSG | RD_KAFKA_DBG_FETCH, "CONSUME", "Enqueue %i %smessage(s) (%" PRId64 " bytes, %d ops) on %s [%" PRId32 From 089d6ed66b8679cb2f9111dfa9accca8dfb0f541 Mon Sep 17 00:00:00 2001 From: Pranav Rathi <4427674+pranavrth@users.noreply.github.com> Date: Tue, 28 Oct 2025 16:49:04 +0530 Subject: [PATCH 16/37] Added multiple broker handing of should not fetch case --- src/rdkafka_broker.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/rdkafka_broker.c b/src/rdkafka_broker.c index e5c672968b..2733cbdb11 100644 --- a/src/rdkafka_broker.c +++ b/src/rdkafka_broker.c @@ -3498,10 +3498,10 @@ rd_kafka_broker_op_serve(rd_kafka_broker_t *rkb, rd_kafka_op_t *rko) { "Ignoring SHARE_FETCH op: " "already fetching"); rd_kafka_op_reply(rko, RD_KAFKA_RESP_ERR__PREV_IN_PROGRESS); - } else if(rko->rko_u.share_fetch.should_fetch) { - rd_kafka_broker_share_fetch(rkb, rko, rd_clock()); } + rd_kafka_broker_share_fetch(rkb, rko, rd_clock()); + // if (!rko->rko_u.share_fetch.should_fetch) { // rd_kafka_dbg(rkb->rkb_rk, BROKER, "SHAREFETCH", // "Ignoring SHARE_FETCH op: " From 5b4d9c50df7fdefdda20048c4b8122cba72c58e2 Mon Sep 17 00:00:00 2001 From: Pratyush Ranjan <156985928+PratRanj07@users.noreply.github.com> Date: Thu, 20 Nov 2025 19:43:58 +0530 Subject: [PATCH 17/37] Call sharegroupheartbeat when leaving group (#5247) --- src/rdkafka_cgrp.c | 84 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 84 insertions(+) diff --git a/src/rdkafka_cgrp.c b/src/rdkafka_cgrp.c index e462e6843a..91711d7eac 100644 --- a/src/rdkafka_cgrp.c +++ b/src/rdkafka_cgrp.c @@ -1086,7 +1086,91 @@ rd_kafka_cgrp_handle_ConsumerGroupHeartbeat_leave(rd_kafka_t *rk, goto err; } +static void rd_kafka_cgrp_handle_ShareGroupHeartbeat_leave( + rd_kafka_t *rk, + rd_kafka_broker_t *rkb, + rd_kafka_resp_err_t err, + rd_kafka_buf_t *rkbuf, + rd_kafka_buf_t *request, + void *opaque) { + rd_kafka_cgrp_t *rkcg = opaque; + const int log_decode_errors = LOG_ERR; + int16_t ErrorCode = 0; + + if (err) { + ErrorCode = err; + goto err; + } + + rd_kafka_buf_read_throttle_time(rkbuf); + + rd_kafka_buf_read_i16(rkbuf, &ErrorCode); +err: + if (ErrorCode) + rd_kafka_dbg( + rkb->rkb_rk, CGRP, "LEAVEGROUP", + "ShareGroupHeartbeat response error in state %s: %s", + rd_kafka_cgrp_state_names[rkcg->rkcg_state], + rd_kafka_err2str(ErrorCode)); + else + rd_kafka_dbg( + rkb->rkb_rk, CGRP, "LEAVEGROUP", + "ShareGroupHeartbeat response received in state %s", + rd_kafka_cgrp_state_names[rkcg->rkcg_state]); + rd_kafka_cgrp_consumer_reset(rkcg); + if (ErrorCode != RD_KAFKA_RESP_ERR__DESTROY) { + rd_assert(thrd_is_current(rk->rk_thread)); + rkcg->rkcg_flags &= ~RD_KAFKA_CGRP_F_WAIT_LEAVE; + rd_kafka_cgrp_try_terminate(rkcg); + } + return; +err_parse: + ErrorCode = rkbuf->rkbuf_err; + goto err; +} + +static void rd_kafka_cgrp_share_consumer_leave(rd_kafka_cgrp_t *rkcg) { + int32_t member_epoch = -1; + + if (rkcg->rkcg_flags & RD_KAFKA_CGRP_F_WAIT_LEAVE) { + rd_kafka_dbg(rkcg->rkcg_rk, CGRP, "LEAVE", + "Group \"%.*s\": leave (in state %s): " + "ShareGroupHeartbeat already in-transit", + RD_KAFKAP_STR_PR(rkcg->rkcg_group_id), + rd_kafka_cgrp_state_names[rkcg->rkcg_state]); + return; + } + + rd_kafka_dbg(rkcg->rkcg_rk, CGRP, "LEAVE", + "Group \"%.*s\": leave (in state %s)", + RD_KAFKAP_STR_PR(rkcg->rkcg_group_id), + rd_kafka_cgrp_state_names[rkcg->rkcg_state]); + + rkcg->rkcg_flags |= RD_KAFKA_CGRP_F_WAIT_LEAVE; + + if (rkcg->rkcg_state == RD_KAFKA_CGRP_STATE_UP) { + rd_rkb_dbg(rkcg->rkcg_curr_coord, CONSUMER, "LEAVE", + "Share consumer: leaving group"); + rd_kafka_ShareGroupHeartbeatRequest( + rkcg->rkcg_coord, rkcg->rkcg_group_id, rkcg->rkcg_member_id, + member_epoch, + NULL /* no rack */, + NULL /* no subscription topics */, + RD_KAFKA_REPLYQ(rkcg->rkcg_ops, 0), + rd_kafka_cgrp_handle_ShareGroupHeartbeat_leave, rkcg); + } else { + rd_kafka_cgrp_handle_ShareGroupHeartbeat_leave( + rkcg->rkcg_rk, rkcg->rkcg_coord, + RD_KAFKA_RESP_ERR__WAIT_COORD, NULL, NULL, rkcg); + } +} + static void rd_kafka_cgrp_consumer_leave(rd_kafka_cgrp_t *rkcg) { + if (RD_KAFKA_IS_SHARE_CONSUMER(rkcg->rkcg_rk)) { + rd_kafka_cgrp_share_consumer_leave(rkcg); + return; + } + int32_t member_epoch = -1; if (rkcg->rkcg_flags & RD_KAFKA_CGRP_F_WAIT_LEAVE) { From dbee935a345d4b90233d91029cbc0a54c88642f8 Mon Sep 17 00:00:00 2001 From: Pranav Rathi <4427674+pranavrth@users.noreply.github.com> Date: Thu, 13 Nov 2025 20:40:09 +0530 Subject: [PATCH 18/37] Add session partition information --- src/rdkafka_broker.c | 8 ++++++++ src/rdkafka_broker.h | 32 ++++++++++++++++++-------------- src/rdkafka_partition.h | 4 ++++ 3 files changed, 30 insertions(+), 14 deletions(-) diff --git a/src/rdkafka_broker.c b/src/rdkafka_broker.c index 2733cbdb11..0f3731672d 100644 --- a/src/rdkafka_broker.c +++ b/src/rdkafka_broker.c @@ -4892,6 +4892,10 @@ void rd_kafka_broker_destroy_final(rd_kafka_broker_t *rkb) { rd_assert(TAILQ_EMPTY(&rkb->rkb_waitresps.rkbq_bufs)); rd_assert(TAILQ_EMPTY(&rkb->rkb_retrybufs.rkbq_bufs)); rd_assert(TAILQ_EMPTY(&rkb->rkb_toppars)); + rd_assert(TAILQ_EMPTY(&rkb->rkb_share_fetch_session.toppars_in_session)); + rd_assert(TAILQ_EMPTY(&rkb->rkb_share_fetch_session.toppars_to_forget)); + rd_assert(!rkb->rkb_share_fetch_session.adding_toppars); + rd_assert(!rkb->rkb_share_fetch_session.forgetting_toppars); if (rkb->rkb_source != RD_KAFKA_INTERNAL && (rkb->rkb_rk->rk_conf.security_protocol == @@ -5024,6 +5028,10 @@ rd_kafka_broker_t *rd_kafka_broker_add(rd_kafka_t *rk, mtx_init(&rkb->rkb_logname_lock, mtx_plain); rkb->rkb_logname = rd_strdup(rkb->rkb_name); TAILQ_INIT(&rkb->rkb_toppars); + TAILQ_INIT(&rkb->rkb_share_fetch_session.toppars_in_session); + TAILQ_INIT(&rkb->rkb_share_fetch_session.toppars_to_forget); + rkb->rkb_share_fetch_session.forgetting_toppars = NULL; + rkb->rkb_share_fetch_session.adding_toppars = NULL; CIRCLEQ_INIT(&rkb->rkb_active_toppars); TAILQ_INIT(&rkb->rkb_monitors); rd_kafka_bufq_init(&rkb->rkb_outbufs); diff --git a/src/rdkafka_broker.h b/src/rdkafka_broker.h index bad949de07..6110be424a 100644 --- a/src/rdkafka_broker.h +++ b/src/rdkafka_broker.h @@ -108,20 +108,24 @@ struct rd_kafka_broker_s { /* rd_kafka_broker_t */ TAILQ_HEAD(, rd_kafka_toppar_s) rkb_toppars; struct { - // TAILQ_HEAD(, rd_kafka_toppar_s) toppars; /* List of toppars - // in the current - // fetch session. - // Any new added toppar in rkb_toppars will be added here after successful share fetch request. - // Any removed toppar from rkb_toppars will be removed here after successful share fetch request. - // rkb_fetch_session.forgotten_toppars is calculated by rkb_fetch_session.toppars - rkb_toppars */ - - // TAILQ_HEAD(, rd_kafka_toppar_s) next_forgotten_toppars; /* List of toppars - // * that are removed from rkb_toppars but not yet removed from fetch session. - // * Will be sent in next fetch request. - // * Cleared when fetch session is reset or when fetch request is successful. */ - // TAILQ_HEAD(, rd_kafka_toppar_s) forgetting_toppars; /* List of toppars - // * that are removed from rkb_toppars and sent in fetch request but not yet removed from fetch session. - // * Cleared when fetch session is reset or when fetch request is successful. */ + TAILQ_HEAD(, rd_kafka_toppar_s) toppars_in_session; /* List of toppars + in the current + fetch session. + Any new added toppar in rkb_toppars will be added here after successful share fetch request. + Any removed toppar from rkb_toppars will be removed from here after successful share fetch request. + rkb_share_fetch_session.forgotten_toppars is calculated by rkb_share_fetch_session.toppars - rkb_toppars */ + rd_list_t *adding_toppars; /* List of toppars + * that are added to rkb_toppars but not yet added to fetch session. + * Will be sent in next fetch request. + * Cleared when fetch session is reset or when fetch request is successful. */ + + TAILQ_HEAD(, rd_kafka_toppar_s) toppars_to_forget; /* List of toppars + * that are removed from rkb_toppars but not yet removed from fetch session. + * Will be sent in next fetch request. + * Cleared when fetch session is reset or when fetch request is successful. */ + rd_list_t *forgetting_toppars; /* List of toppars + * that are removed from rkb_toppars and sent in fetch request but not yet removed from fetch session. + * Cleared when fetch session is reset or when fetch request is successful. */ int32_t epoch; /* Current fetch session * epoch, or -1 if no session */ } rkb_share_fetch_session; diff --git a/src/rdkafka_partition.h b/src/rdkafka_partition.h index 0665c69c4e..cd8ddbeadf 100644 --- a/src/rdkafka_partition.h +++ b/src/rdkafka_partition.h @@ -145,6 +145,10 @@ struct rd_kafka_toppar_s { /* rd_kafka_toppar_t */ rktp_txnlink; /**< rd_kafka_t.rk_eos. * txn_pend_rktps * or txn_rktps */ + TAILQ_ENTRY(rd_kafka_toppar_s) rktp_rkb_session_link; /* rkb_share_fetch_session + * toppars_in_session link */ + TAILQ_ENTRY(rd_kafka_toppar_s) rktp_rkb_session_forgot_link; /* rkb_share_fetch_session + * toppars_to_forget link */ rd_kafka_topic_t *rktp_rkt; /**< This toppar's topic object */ int32_t rktp_partition; // LOCK: toppar_lock() + topic_wrlock() From 3a644ec95b4154cb7dafc3121e0d26faf2724b66 Mon Sep 17 00:00:00 2001 From: Pranav Rathi <4427674+pranavrth@users.noreply.github.com> Date: Mon, 17 Nov 2025 11:14:58 +0530 Subject: [PATCH 19/37] Added Share partition level operations --- src/rdkafka_op.c | 8 ++++++++ src/rdkafka_op.h | 5 ++++- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/src/rdkafka_op.c b/src/rdkafka_op.c index e15db3f35b..0cce441768 100644 --- a/src/rdkafka_op.c +++ b/src/rdkafka_op.c @@ -125,6 +125,10 @@ const char *rd_kafka_op2str(rd_kafka_op_type_t type) { [RD_KAFKA_OP_ELECTLEADERS] = "REPLY:ELECTLEADERS", [RD_KAFKA_OP_SHARE_FETCH] = "REPLY:SHARE_FETCH", [RD_KAFKA_OP_SHARE_FETCH_FANOUT] = "REPLY:SHARE_FETCH_FANOUT", + [RD_KAFKA_OP_SHARE_SESSION_PARTITION_ADD] = + "REPLY:SHARE_SESSION_PARTITION_ADD", + [RD_KAFKA_OP_SHARE_SESSION_PARTITION_REMOVE] = + "REPLY:SHARE_SESSION_PARTITION_REMOVE", }; if (type & RD_KAFKA_OP_REPLY) @@ -292,6 +296,10 @@ rd_kafka_op_t *rd_kafka_op_new0(const char *source, rd_kafka_op_type_t type) { [RD_KAFKA_OP_SHARE_FETCH] = sizeof(rko->rko_u.share_fetch), [RD_KAFKA_OP_SHARE_FETCH_FANOUT] = sizeof(rko->rko_u.share_fetch_fanout), + [RD_KAFKA_OP_SHARE_SESSION_PARTITION_ADD] = + _RD_KAFKA_OP_EMPTY, + [RD_KAFKA_OP_SHARE_SESSION_PARTITION_REMOVE] = + _RD_KAFKA_OP_EMPTY, }; size_t tsize = op2size[type & ~RD_KAFKA_OP_FLAGMASK]; diff --git a/src/rdkafka_op.h b/src/rdkafka_op.h index 705f16b770..9e6fd70d2e 100644 --- a/src/rdkafka_op.h +++ b/src/rdkafka_op.h @@ -192,7 +192,10 @@ typedef enum { RD_KAFKA_OP_SHARE_FETCH, /**< broker op: Issue share fetch request if applicable. */ RD_KAFKA_OP_SHARE_FETCH_FANOUT, /**< fanout share fetch operation */ - + RD_KAFKA_OP_SHARE_SESSION_PARTITION_ADD, /**< share session: + * add partition */ + RD_KAFKA_OP_SHARE_SESSION_PARTITION_REMOVE, /**< share session: + * remove partition */ RD_KAFKA_OP__END } rd_kafka_op_type_t; From 879534617627d241faa43680649a73f5de9eda3a Mon Sep 17 00:00:00 2001 From: Pranav Rathi <4427674+pranavrth@users.noreply.github.com> Date: Tue, 18 Nov 2025 00:16:33 +0530 Subject: [PATCH 20/37] Working with session partition management --- src/rdkafka.c | 8 +- src/rdkafka_broker.c | 134 ++++++++++++- src/rdkafka_broker.h | 22 ++- src/rdkafka_cgrp.c | 27 +++ src/rdkafka_fetcher.c | 429 ++++++++++++++++++++++++++++++++-------- src/rdkafka_partition.c | 40 ++++ src/rdkafka_partition.h | 20 +- 7 files changed, 576 insertions(+), 104 deletions(-) diff --git a/src/rdkafka.c b/src/rdkafka.c index ef84e1e560..e8d7259838 100644 --- a/src/rdkafka.c +++ b/src/rdkafka.c @@ -3209,7 +3209,13 @@ rd_kafka_error_t *rd_kafka_share_consume_batch( "Consumer group not initialized"); /* If we have any pending items on the consumer queue, don't issue new - * requests, rather, deal with them first. */ + * requests, rather, deal with them first. + * + * TODO KIP-932: + * Above statement might be incorrect as we have to send all the pending + * acknowledgements irrespective of whether there are messages to be + * consumed or not. + */ if (likely(rd_kafka_q_len(rkcg->rkcg_q) == 0)) { rd_kafka_dbg(rk, CGRP, "SHARE", "Issuing share fetch fanout to main thread with " diff --git a/src/rdkafka_broker.c b/src/rdkafka_broker.c index 0f3731672d..763b48de5b 100644 --- a/src/rdkafka_broker.c +++ b/src/rdkafka_broker.c @@ -3178,6 +3178,86 @@ static rd_kafka_resp_err_t rd_kafka_broker_destroy_error(rd_kafka_t *rk) { : RD_KAFKA_RESP_ERR__DESTROY_BROKER; } + +/** + * @brief Add description. + * + * @locality broker thread + * @locks toppar lock + * @locks broker lock + */ +static void rd_kafka_broker_share_session_add_remove_toppar(rd_list_t **toppars_add_list, + rd_list_t **toppars_remove_list, + rd_kafka_toppar_t *rktp) { + if (!*toppars_add_list) { + /** + * TODO KIP-932: Use a better destroy method. Right now + * manually destroying. + */ + *toppars_add_list = rd_list_new(1, rd_kafka_toppar_destroy_free); + } + + rd_list_add(*toppars_add_list, rd_kafka_toppar_keep(rktp)); + + /* Remove from removing toppars if present there. */ + if (*toppars_remove_list) { + rd_list_remove(*toppars_remove_list, rktp); + rd_kafka_toppar_destroy(rktp); + if(rd_list_empty(*toppars_remove_list)) { + rd_list_destroy(*toppars_remove_list); + *toppars_remove_list = NULL; + } + } +} + +/** + * @brief Add description. + * + * In some scenarios, we don't have leader information present while assignment is done. In which case, + * when the leader is known later, we need to add the toppar to the broker's share fetch session. Being called from two places: + * 1) when a toppar is being added to the assignment in cgrp. + * 2) when a toppar is being added to the leader + * + * @locality broker thread + * @locks toppar lock + * @locks broker lock + */ +static void rd_kafka_broker_share_session_toppar_add(rd_kafka_broker_t *rkb, rd_kafka_toppar_t *rktp) { + /** + * TODO KIP-932: + * * Check if rktp is present in current session already or not? + * * Check if rktp is already present in toppars_to_add? + */ + if (RD_KAFKA_IS_SHARE_CONSUMER(rktp->rktp_rkt->rkt_rk)) { + rd_kafka_broker_share_session_add_remove_toppar( + &rkb->rkb_share_fetch_session.toppars_to_add, &rkb->rkb_share_fetch_session.toppars_to_forget, rktp); + } +} + +/** + * @brief Add description. + * + * In some scenarios, we have to move the toppar out of the broker's share fetch session like leader migration to another broker. + * Being called from two places: + * 1) when a toppar is being removed from the assignment in cgrp. + * 2) when a toppar is being removed from the leader. + * + * @locality broker thread + * @locks toppar lock + * @locks broker lock + */ +static void rd_kafka_broker_share_session_toppar_remove(rd_kafka_broker_t *rkb, rd_kafka_toppar_t *rktp) { + /** + * TODO KIP-932: + * * Check if rktp is present in current session already or not? No need to add if it is not present? + * * Check if rktp is already present in toppars_to_forget? + */ + if (RD_KAFKA_IS_SHARE_CONSUMER(rktp->rktp_rkt->rkt_rk)) { + rd_kafka_broker_share_session_add_remove_toppar( + &rkb->rkb_share_fetch_session.toppars_to_forget, &rkb->rkb_share_fetch_session.toppars_to_add, rktp); + } +} + /** * @brief Serve a broker op (an op posted by another thread to be handled by * this broker's thread). @@ -3323,6 +3403,9 @@ rd_kafka_broker_op_serve(rd_kafka_broker_t *rkb, rd_kafka_op_t *rko) { rd_kafka_broker_lock(rkb); TAILQ_INSERT_TAIL(&rkb->rkb_toppars, rktp, rktp_rkblink); rkb->rkb_toppar_cnt++; + if(rd_kafka_toppar_is_on_cgrp(rktp, rd_false)) { + rd_kafka_broker_share_session_toppar_add(rkb, rktp); + } rd_kafka_broker_unlock(rkb); rktp->rktp_broker = rkb; rd_assert(!rktp->rktp_msgq_wakeup_q); @@ -3421,6 +3504,7 @@ rd_kafka_broker_op_serve(rd_kafka_broker_t *rkb, rd_kafka_op_t *rko) { rd_kafka_broker_lock(rkb); TAILQ_REMOVE(&rkb->rkb_toppars, rktp, rktp_rkblink); rkb->rkb_toppar_cnt--; + rd_kafka_broker_share_session_toppar_remove(rkb, rktp); rd_kafka_broker_unlock(rkb); rd_kafka_broker_destroy(rktp->rktp_broker); if (rktp->rktp_msgq_wakeup_q) { @@ -3606,6 +3690,28 @@ rd_kafka_broker_op_serve(rd_kafka_broker_t *rkb, rd_kafka_op_t *rko) { wakeup = rd_true; break; + + case RD_KAFKA_OP_SHARE_SESSION_PARTITION_ADD: + rd_rkb_dbg(rkb, CGRP, "SHARESESSION", + "Received SHARE_SESSION_PARTITION_ADD op for " + "topic %s [%" PRId32 "]", + rko->rko_rktp->rktp_rkt->rkt_topic->str, + rko->rko_rktp->rktp_partition); + + rd_kafka_broker_share_session_toppar_add( + rkb, rko->rko_rktp); + break; + + case RD_KAFKA_OP_SHARE_SESSION_PARTITION_REMOVE: + rd_rkb_dbg(rkb, CGRP, "SHARESESSION", + "Received SHARE_SESSION_PARTITION_REMOVE op for " + "topic %s [%" PRId32 "]", + rko->rko_rktp->rktp_rkt->rkt_topic->str, + rko->rko_rktp->rktp_partition); + + rd_kafka_broker_share_session_toppar_remove( + rkb, rko->rko_rktp); + break; default: rd_kafka_assert(rkb->rkb_rk, !*"unhandled op type"); @@ -4326,8 +4432,23 @@ static void rd_kafka_broker_producer_serve(rd_kafka_broker_t *rkb, } +// void rd_kafka_broker_update_share_fetch_session(rd_kafka_broker_t *rkb) { +// rd_kafka_toppar_t *rktp, *rktp_tmp; +// rd_bool_t needs_update = rd_false; + +// TAILQ_FOREACH(rktp, &rkb->rkb_share_fetch_session.toppars_in_session, rktp_rkb_session_link) { +// rd_kafka_toppar_is_valid_to_send_for_share_fetch(rktp); +// } + +// if (needs_update) +// rd_kafka_toppar_share_fetch_session_update(rkb); +// } + + /** * Consumer serving + * + * TODO KIP-932: Fix timeouts. */ static void rd_kafka_broker_share_consumer_serve(rd_kafka_broker_t *rkb, rd_ts_t abs_timeout) { @@ -4360,6 +4481,8 @@ static void rd_kafka_broker_share_consumer_serve(rd_kafka_broker_t *rkb, if (min_backoff > abs_timeout) min_backoff = abs_timeout; + // rd_kafka_broker_update_share_fetch_session(rkb); + if (rd_kafka_broker_ops_io_serve(rkb, min_backoff)) return; /* Wakeup */ @@ -4893,9 +5016,8 @@ void rd_kafka_broker_destroy_final(rd_kafka_broker_t *rkb) { rd_assert(TAILQ_EMPTY(&rkb->rkb_retrybufs.rkbq_bufs)); rd_assert(TAILQ_EMPTY(&rkb->rkb_toppars)); rd_assert(TAILQ_EMPTY(&rkb->rkb_share_fetch_session.toppars_in_session)); - rd_assert(TAILQ_EMPTY(&rkb->rkb_share_fetch_session.toppars_to_forget)); - rd_assert(!rkb->rkb_share_fetch_session.adding_toppars); - rd_assert(!rkb->rkb_share_fetch_session.forgetting_toppars); + rd_assert(!rkb->rkb_share_fetch_session.toppars_to_add); + rd_assert(!rkb->rkb_share_fetch_session.toppars_to_forget); if (rkb->rkb_source != RD_KAFKA_INTERNAL && (rkb->rkb_rk->rk_conf.security_protocol == @@ -5029,9 +5151,9 @@ rd_kafka_broker_t *rd_kafka_broker_add(rd_kafka_t *rk, rkb->rkb_logname = rd_strdup(rkb->rkb_name); TAILQ_INIT(&rkb->rkb_toppars); TAILQ_INIT(&rkb->rkb_share_fetch_session.toppars_in_session); - TAILQ_INIT(&rkb->rkb_share_fetch_session.toppars_to_forget); - rkb->rkb_share_fetch_session.forgetting_toppars = NULL; - rkb->rkb_share_fetch_session.adding_toppars = NULL; + rkb->rkb_share_fetch_session.toppars_in_session_cnt = 0; + rkb->rkb_share_fetch_session.toppars_to_forget = NULL; + rkb->rkb_share_fetch_session.toppars_to_add = NULL; CIRCLEQ_INIT(&rkb->rkb_active_toppars); TAILQ_INIT(&rkb->rkb_monitors); rd_kafka_bufq_init(&rkb->rkb_outbufs); diff --git a/src/rdkafka_broker.h b/src/rdkafka_broker.h index 6110be424a..d6df843635 100644 --- a/src/rdkafka_broker.h +++ b/src/rdkafka_broker.h @@ -114,20 +114,24 @@ struct rd_kafka_broker_s { /* rd_kafka_broker_t */ Any new added toppar in rkb_toppars will be added here after successful share fetch request. Any removed toppar from rkb_toppars will be removed from here after successful share fetch request. rkb_share_fetch_session.forgotten_toppars is calculated by rkb_share_fetch_session.toppars - rkb_toppars */ - rd_list_t *adding_toppars; /* List of toppars - * that are added to rkb_toppars but not yet added to fetch session. + int toppars_in_session_cnt; + rd_list_t *toppars_to_add; /* TODO KIP-932: Move this from `rd_list_t` to `TAILQ_HEAD(, rd_kafka_toppar_s)` for performance improvements. + * List of toppars that are added to rkb_toppars but not yet added to fetch session. * Will be sent in next fetch request. * Cleared when fetch session is reset or when fetch request is successful. */ - - TAILQ_HEAD(, rd_kafka_toppar_s) toppars_to_forget; /* List of toppars - * that are removed from rkb_toppars but not yet removed from fetch session. - * Will be sent in next fetch request. - * Cleared when fetch session is reset or when fetch request is successful. */ - rd_list_t *forgetting_toppars; /* List of toppars + rd_list_t *adding_toppars; + rd_list_t *toppars_to_forget; /* TODO KIP-932: Move this from `rd_list_t` to `TAILQ_HEAD(, rd_kafka_toppar_s)` for performance improvements. + * List of toppars * that are removed from rkb_toppars and sent in fetch request but not yet removed from fetch session. * Cleared when fetch session is reset or when fetch request is successful. */ + rd_list_t *forgetting_toppars; int32_t epoch; /* Current fetch session - * epoch, or -1 if no session */ + * epoch, or -1 if leaving the session + * TODO KIP-932: Handle 0 and -1 properly. + * * Can we move from -1 to 0? + * * Maybe in some error case? + * * Is there a way in which we close a previous session and start a new one? + */ } rkb_share_fetch_session; int rkb_toppar_cnt; diff --git a/src/rdkafka_cgrp.c b/src/rdkafka_cgrp.c index 91711d7eac..7a4732e0a3 100644 --- a/src/rdkafka_cgrp.c +++ b/src/rdkafka_cgrp.c @@ -4061,6 +4061,19 @@ static void rd_kafka_cgrp_partition_add(rd_kafka_cgrp_t *rkcg, rd_kafka_toppar_lock(rktp); rd_assert(!(rktp->rktp_flags & RD_KAFKA_TOPPAR_F_ON_CGRP)); rktp->rktp_flags |= RD_KAFKA_TOPPAR_F_ON_CGRP; + if(RD_KAFKA_IS_SHARE_CONSUMER(rkcg->rkcg_rk) && rktp->rktp_flags & RD_KAFKA_TOPPAR_F_ON_RKB) { + rd_kafka_op_t *rko; + rko = rd_kafka_op_new(RD_KAFKA_OP_SHARE_SESSION_PARTITION_ADD); + rko->rko_rktp = rd_kafka_toppar_keep(rktp); /* refcnt from _add op */ + rd_kafka_dbg(rkcg->rkcg_rk, CGRP, "SHARESESSPARTCGRPADD", + "Group \"%s\": enqueue partition add for %s [%" PRId32 "] " + "on broker %s", + rkcg->rkcg_group_id->str, + rktp->rktp_rkt->rkt_topic->str, + rktp->rktp_partition, + rd_kafka_broker_name(rktp->rktp_broker)); + rd_kafka_q_enq(rktp->rktp_broker->rkb_ops, rko); + } rd_kafka_toppar_unlock(rktp); rd_kafka_toppar_keep(rktp); @@ -4083,6 +4096,20 @@ static void rd_kafka_cgrp_partition_del(rd_kafka_cgrp_t *rkcg, rd_assert(rktp->rktp_flags & RD_KAFKA_TOPPAR_F_ON_CGRP); rktp->rktp_flags &= ~RD_KAFKA_TOPPAR_F_ON_CGRP; + if(RD_KAFKA_IS_SHARE_CONSUMER(rkcg->rkcg_rk) && rktp->rktp_flags & RD_KAFKA_TOPPAR_F_ON_RKB) { + rd_kafka_op_t *rko; + rko = rd_kafka_op_new(RD_KAFKA_OP_SHARE_SESSION_PARTITION_REMOVE); + rko->rko_rktp = rd_kafka_toppar_keep(rktp); /* refcnt from _add op */ + rd_kafka_dbg(rkcg->rkcg_rk, CGRP, "SHARESESSPARTCGRPDEL", + "Group \"%s\": enqueue partition remove for %s [%" PRId32 "] " + "on broker %s", + rkcg->rkcg_group_id->str, + rktp->rktp_rkt->rkt_topic->str, + rktp->rktp_partition, + rd_kafka_broker_name(rktp->rktp_broker)); + rd_kafka_q_enq(rktp->rktp_broker->rkb_ops, rko); + } + rd_kafka_toppar_purge_internal_fetch_queue_maybe(rktp); rd_kafka_toppar_unlock(rktp); diff --git a/src/rdkafka_fetcher.c b/src/rdkafka_fetcher.c index 9a1e77fae0..f00fdabdd5 100644 --- a/src/rdkafka_fetcher.c +++ b/src/rdkafka_fetcher.c @@ -916,6 +916,8 @@ static rd_kafka_resp_err_t rd_kafka_share_fetch_reply_handle_partition( rd_kafka_buf_read_i32(rkbuf, &PartitionId); // Partition rd_kafka_buf_read_i16(rkbuf, &PartitionFetchErrorCode); // PartitionFetchError rd_kafka_buf_read_str(rkbuf, &PartitionFetchErrorStr); // ErrorString + /* TODO KIP-932: We should reset (to INVALID) previous acknowledgement information in the reply + or maybe while sending the request itself? */ rd_kafka_buf_read_i16(rkbuf, &AcknowledgementErrorCode); // AcknowledgementError rd_kafka_buf_read_str(rkbuf, &AcknowledgementErrorStr); // AcknowledgementErrorString rd_kafka_buf_read_CurrentLeader(rkbuf, &CurrentLeader); // CurrentLeader @@ -964,7 +966,13 @@ static rd_kafka_resp_err_t rd_kafka_share_fetch_reply_handle_partition( (size_t) MessageSetSize)) rd_kafka_buf_check_len(rkbuf, MessageSetSize); - /* Parse messages */ + /* Parse messages + TODO KIP-932: This part might raise issue as we are adding + messages to the consumer queue partition by + partition. The fetch returns messages as soon + as they are available, so messages for different + partitions might not be sent at once to the user. + */ err = rd_kafka_msgset_parse(rkbuf, request, rktp, NULL, &tver); @@ -1110,6 +1118,170 @@ rd_kafka_share_fetch_reply_handle(rd_kafka_broker_t *rkb, } +/** + * TODO KIP-932: Implement. + */ +// static void rd_kafak_broker_session_reset(rd_kafka_broker_t *rkb) { +// } +static void rd_kafka_broker_session_update_epoch(rd_kafka_broker_t *rkb) { + if (rkb->rkb_share_fetch_session.epoch == -1) { + rd_kafka_dbg(rkb->rkb_rk, MSG, "SHAREFETCH", + "Not updating next epoch for -1 as it should be -1 again."); + return; + } + if (rkb->rkb_share_fetch_session.epoch == INT32_MAX) + rkb->rkb_share_fetch_session.epoch = 1; + else + rkb->rkb_share_fetch_session.epoch++; +} + +static void rd_kafka_broker_session_add_partition_to_toppars_in_session(rd_kafka_broker_t *rkb, rd_kafka_toppar_t *rktp) { + rd_kafka_toppar_t *session_rktp; + TAILQ_FOREACH(session_rktp, &rkb->rkb_share_fetch_session.toppars_in_session, rktp_rkb_session_link) { + if(rktp == session_rktp) { + rd_kafka_dbg(rkb->rkb_rk, MSG, "SHAREFETCH", + "%s [%" PRId32 + "]: already in ShareFetch session", + rktp->rktp_rkt->rkt_topic->str, + rktp->rktp_partition); + return; + } + } + rd_kafka_dbg(rkb->rkb_rk, MSG, "SHAREFETCH", + "%s [%" PRId32 + "]: adding to ShareFetch session", + rktp->rktp_rkt->rkt_topic->str, + rktp->rktp_partition); + TAILQ_INSERT_TAIL(&rkb->rkb_share_fetch_session.toppars_in_session, rd_kafka_toppar_keep(rktp), rktp_rkb_session_link); + rkb->rkb_share_fetch_session.toppars_in_session_cnt++; +} + +static void rd_kafka_broker_session_remove_partition_from_toppars_in_session(rd_kafka_broker_t *rkb, rd_kafka_toppar_t *rktp) { + rd_kafka_toppar_t *session_rktp, *tmp_rktp; + TAILQ_FOREACH_SAFE(session_rktp, &rkb->rkb_share_fetch_session.toppars_in_session, rktp_rkb_session_link, tmp_rktp) { + if(rktp == session_rktp) { + TAILQ_REMOVE(&rkb->rkb_share_fetch_session.toppars_in_session, session_rktp, rktp_rkb_session_link); + rd_kafka_toppar_destroy(session_rktp); // from session list + rkb->rkb_share_fetch_session.toppars_in_session_cnt--; + rd_kafka_dbg(rkb->rkb_rk, MSG, "SHAREFETCH", + "%s [%" PRId32 + "]: removed from ShareFetch session", + rktp->rktp_rkt->rkt_topic->str, + rktp->rktp_partition); + return; + } + } + rd_kafka_dbg(rkb->rkb_rk, MSG, "SHAREFETCH", + "%s [%" PRId32 + "]: not found in ShareFetch session", + rktp->rktp_rkt->rkt_topic->str, + rktp->rktp_partition); +} + +static void rd_kafka_broker_session_update_toppars_in_session(rd_kafka_broker_t *rkb, rd_kafka_toppar_t *rktp, rd_bool_t add) { + if(add) + rd_kafka_broker_session_add_partition_to_toppars_in_session(rkb, rktp); + else + rd_kafka_broker_session_remove_partition_from_toppars_in_session(rkb, rktp); + +} + + +// static void rd_kafka_broker_session_update_added_partitions(rd_kafka_broker_t *rkb) { +// size_t i; +// rd_kafka_toppar_t *rktp, *removed_rktp; +// rd_list_t *toppars_to_add = rkb->rkb_share_fetch_session.toppars_to_add; +// rd_list_t *added_toppars = rkb->rkb_share_fetch_session.adding_toppars; + +// if(added_toppars == NULL || rd_list_cnt(added_toppars) == 0) +// return; + +// RD_LIST_FOREACH(rktp, added_toppars, i) { +// rd_kafka_broker_session_update_toppars_in_session(rkb, rktp, rd_true /* add */); +// if(toppars_to_add) { +// removed_rktp = rd_list_remove(toppars_to_add, rktp); +// if(removed_rktp) +// rd_kafka_toppar_destroy(removed_rktp); // from partitions list +// } +// } +// rd_list_destroy(added_toppars); +// rkb->rkb_share_fetch_session.adding_toppars = NULL; +// } + +// static void rd_kafka_broker_session_update_removed_partitions(rd_kafka_broker_t *rkb) { +// size_t i; +// rd_kafka_toppar_t *rktp, *removed_rktp; +// rd_list_t *toppars_to_forget = rkb->rkb_share_fetch_session.toppars_to_forget; +// rd_list_t *forgotten_toppars = rkb->rkb_share_fetch_session.forgetting_toppars; + +// if(forgotten_toppars == NULL || rd_list_cnt(forgotten_toppars) == 0) +// return; + +// RD_LIST_FOREACH(rktp, forgotten_toppars, i) { +// rd_kafka_broker_session_update_toppars_in_session(rkb, rktp, rd_false /* remove */); +// if(toppars_to_forget) { +// removed_rktp = rd_list_remove(toppars_to_forget, rktp); +// if(removed_rktp) +// rd_kafka_toppar_destroy(removed_rktp); // from partitions list +// } +// } +// rd_list_destroy(forgotten_toppars); +// rkb->rkb_share_fetch_session.forgetting_toppars = NULL; +// } + +static void rd_kafka_broker_session_update_toppars_list( + rd_kafka_broker_t *rkb, + rd_list_t **request_toppars_ptr, + rd_list_t *toppars_to_remove, + rd_bool_t add) { + size_t i; + rd_kafka_toppar_t *rktp, *removed_rktp; + rd_list_t *request_toppars = *request_toppars_ptr; + + if (request_toppars == NULL || rd_list_cnt(request_toppars) == 0) + return; + + RD_LIST_FOREACH(rktp, request_toppars, i) { + rd_kafka_broker_session_update_toppars_in_session(rkb, rktp, add); + if (toppars_to_remove) { + removed_rktp = rd_list_remove(toppars_to_remove, rktp); + if (removed_rktp) + rd_kafka_toppar_destroy(removed_rktp); /* from partitions list */ + } + } + rd_list_destroy(request_toppars); + *request_toppars_ptr = NULL; +} + +static void rd_kafka_broker_session_update_added_partitions( + rd_kafka_broker_t *rkb) { + rd_kafka_broker_session_update_toppars_list( + rkb, &rkb->rkb_share_fetch_session.adding_toppars, + rkb->rkb_share_fetch_session.toppars_to_add, rd_true); +} + +static void rd_kafka_broker_session_update_removed_partitions( + rd_kafka_broker_t *rkb) { + rd_kafka_broker_session_update_toppars_list( + rkb, &rkb->rkb_share_fetch_session.forgetting_toppars, + rkb->rkb_share_fetch_session.toppars_to_forget, rd_false); +} + +static void rd_kafka_broker_session_update_partitions(rd_kafka_broker_t *rkb) { + rd_kafka_broker_session_update_added_partitions(rkb); + rd_kafka_broker_session_update_removed_partitions(rkb); +} + + +/** + * Update ShareFetch session state after a Fetch or ShareFetch response. + * TODO KIP-932: Improve efficiency of this function. + */ +static void rd_kafka_broker_session_update(rd_kafka_broker_t *rkb) { + rd_kafka_broker_session_update_epoch(rkb); + rd_kafka_broker_session_update_partitions(rkb); +} + /** * @broker ShareFetchResponse handling. * @@ -1123,9 +1295,10 @@ static void rd_kafka_broker_share_fetch_reply(rd_kafka_t *rk, void *opaque) { rd_kafka_op_t *rko_orig = opaque; - rkb->rkb_share_fetch_session.epoch++; if (err == RD_KAFKA_RESP_ERR__DESTROY) { + /* TODO KIP-932: Check what is needed out of the below */ + rd_kafka_broker_session_update(rkb); rd_kafka_op_reply(rko_orig, err); return; /* Terminating */ } @@ -1136,10 +1309,13 @@ static void rd_kafka_broker_share_fetch_reply(rd_kafka_t *rk, if (!err && reply) err = rd_kafka_share_fetch_reply_handle(rkb, reply, request); - if (rko_orig) rd_kafka_op_reply(rko_orig, err); + rd_kafka_broker_session_update(rkb); + // if (rkb->rkb_share_fetch_session.adding_toppars) + + /* TODO KIP-932: Check if this is the right place for this or after error handling */ rkb->rkb_fetching = 0; if (unlikely(err)) { @@ -1261,10 +1437,8 @@ void rd_kafka_ShareFetchRequest( int32_t max_bytes, int32_t max_records, int32_t batch_size, -// rd_kafka_toppar_t *toppars_to_send, -// int32_t toppars_to_send_cnt, - rd_kafka_toppar_t *forgotten_toppars, - int32_t forgotten_toppars_cnt, + rd_list_t *toppars_to_send, + rd_list_t *toppars_to_forget, rd_kafka_op_t *rko_orig, rd_ts_t now) { rd_kafka_toppar_t *rktp; @@ -1277,8 +1451,9 @@ void rd_kafka_ShareFetchRequest( rd_kafka_topic_t *rkt_last = NULL; int16_t ApiVersion = 0; size_t rkbuf_size = 0; - rd_bool_t has_acknowledgements = rd_false; - rd_bool_t has_forgotten_toppars = forgotten_toppars_cnt > 0 ? rd_true : rd_false; + int i; + rd_bool_t has_acknowledgements = toppars_to_send && rd_list_cnt(toppars_to_send) > 0 ? rd_true : rd_false; + rd_bool_t has_toppars_to_forget = toppars_to_forget && rd_list_cnt(toppars_to_forget) > 0 ? rd_true : rd_false; rd_bool_t is_fetching_messages = max_records > 0 ? rd_true : rd_false; /* @@ -1296,9 +1471,9 @@ void rd_kafka_ShareFetchRequest( rkbuf_size += 4 + 4 + 4 + 4 + 4 + 4 + 4; /* N x (topic id + partition id + acknowledgement) */ rkbuf_size += (rkb->rkb_toppar_cnt * (32 + 4 + acknowledgement_size)); - if( forgotten_toppars_cnt > 0) { + if( has_toppars_to_forget) { /* M x (topic id + partition id) */ - rkbuf_size += (forgotten_toppars_cnt * (32 + 4)); + rkbuf_size += (rd_list_cnt(toppars_to_forget) * (32 + 4)); } ApiVersion = rd_kafka_broker_ApiVersion_supported(rkb, RD_KAFKAP_ShareFetch, @@ -1347,78 +1522,75 @@ void rd_kafka_ShareFetchRequest( /* Write zero TopicArrayCnt but store pointer for later update */ of_TopicArrayCnt = rd_kafka_buf_write_arraycnt_pos(rkbuf); - if (rkb->rkb_toppar_cnt > 0) { - TAILQ_FOREACH(rktp, &rkb->rkb_toppars, rktp_rkblink) { - rd_kafka_toppar_lock(rktp); + RD_LIST_FOREACH(rktp, toppars_to_send, i) { - if(!(rktp->rktp_flags & RD_KAFKA_TOPPAR_F_ON_CGRP)) { - rd_kafka_toppar_unlock(rktp); - continue; - } - if (rkt_last != rktp->rktp_rkt) { - if (rkt_last != NULL) { - /* Update PartitionArrayCnt */ - rd_kafka_buf_finalize_arraycnt( - rkbuf, of_PartitionArrayCnt, - PartitionArrayCnt); - /* Topic tags */ - rd_kafka_buf_write_tags_empty(rkbuf); - } - - /* Topic ID */ - rd_kafka_buf_write_uuid( - rkbuf, &rktp->rktp_rkt->rkt_topic_id); - - TopicArrayCnt++; - rkt_last = rktp->rktp_rkt; - /* Partition count */ - of_PartitionArrayCnt = - rd_kafka_buf_write_arraycnt_pos(rkbuf); - PartitionArrayCnt = 0; + /* TODO KIP-932: This condition will cause partitions of same topics + to be inside single instance of the topic as toppars_to_send is not + sorted. Eg: T1 0, T1 1, T2 0, T1 3, T1 5, T2 1 will translate to + T1 (0,1), T2 (0), T1 (3, 5), T2 (1) instead it should be + T1 (0,1,3,5) T2(0,1) Fix this. */ + if (rkt_last != rktp->rktp_rkt) { + if (rkt_last != NULL) { + /* Update PartitionArrayCnt */ + rd_kafka_buf_finalize_arraycnt( + rkbuf, of_PartitionArrayCnt, + PartitionArrayCnt); + /* Topic tags */ + rd_kafka_buf_write_tags_empty(rkbuf); } - rd_kafka_toppar_unlock(rktp); - - PartitionArrayCnt++; + rd_kafka_topic_rdlock(rktp->rktp_rkt); + /* Topic ID */ + rd_kafka_buf_write_uuid( + rkbuf, &rktp->rktp_rkt->rkt_topic_id); + rd_kafka_topic_rdunlock(rktp->rktp_rkt); + + TopicArrayCnt++; + rkt_last = rktp->rktp_rkt; + /* Partition count */ + of_PartitionArrayCnt = + rd_kafka_buf_write_arraycnt_pos(rkbuf); + PartitionArrayCnt = 0; + } - /* Partition */ - rd_kafka_buf_write_i32(rkbuf, rktp->rktp_partition); + PartitionArrayCnt++; - printf(" ------------------------------------------------------------------ AcknowledgementBatches for topic %.*s [%" PRId32 "] : first_offset=%" PRId64 ", last_offset=%" PRId64 "\n", - RD_KAFKAP_STR_PR(rktp->rktp_rkt->rkt_topic), - rktp->rktp_partition, - rktp->rktp_share_acknowledge.first_offset, - rktp->rktp_share_acknowledge.last_offset); - /* AcknowledgementBatches */ - if (rktp->rktp_share_acknowledge.first_offset >= 0) { - /* For now we only support ACCEPT */ - rd_kafka_buf_write_arraycnt(rkbuf, 1); /* ArrayCnt = 1 */ - /* FirstOffset */ - rd_kafka_buf_write_i64(rkbuf, rktp->rktp_share_acknowledge.first_offset); - /* LastOffset */ - rd_kafka_buf_write_i64(rkbuf, rktp->rktp_share_acknowledge.last_offset); - /* AcknowledgementType */ - rd_kafka_buf_write_arraycnt(rkbuf, 1); /* ArrayCnt = 1 */ - rd_kafka_buf_write_i8(rkbuf, 1); /* ACCEPT */ - /* Acknowledgement tags */ - rd_kafka_buf_write_tags_empty(rkbuf); - has_acknowledgements = rd_true; - } else { - /* No acknowledgements */ - rd_kafka_buf_write_arraycnt(rkbuf, 0); - } + /* Partition */ + rd_kafka_buf_write_i32(rkbuf, rktp->rktp_partition); - /* Partition tags */ + printf(" ------------------------------------------------------------------ AcknowledgementBatches for topic %.*s [%" PRId32 "] : first_offset=%" PRId64 ", last_offset=%" PRId64 "\n", + RD_KAFKAP_STR_PR(rktp->rktp_rkt->rkt_topic), + rktp->rktp_partition, + rktp->rktp_share_acknowledge.first_offset, + rktp->rktp_share_acknowledge.last_offset); + /* AcknowledgementBatches */ + if (rktp->rktp_share_acknowledge.first_offset >= 0) { + /* For now we only support ACCEPT */ + rd_kafka_buf_write_arraycnt(rkbuf, 1); /* ArrayCnt = 1 */ + /* FirstOffset */ + rd_kafka_buf_write_i64(rkbuf, rktp->rktp_share_acknowledge.first_offset); + /* LastOffset */ + rd_kafka_buf_write_i64(rkbuf, rktp->rktp_share_acknowledge.last_offset); + /* AcknowledgementType */ + rd_kafka_buf_write_arraycnt(rkbuf, 1); /* ArrayCnt = 1 */ + rd_kafka_buf_write_i8(rkbuf, 1); /* ACCEPT */ + /* Acknowledgement tags */ rd_kafka_buf_write_tags_empty(rkbuf); + } else { + /* No acknowledgements */ + rd_kafka_buf_write_arraycnt(rkbuf, 0); + } - rd_rkb_dbg(rkb, FETCH, "SHAREFETCH", - "Share Fetch topic %.*s [%" PRId32 "]", - RD_KAFKAP_STR_PR(rktp->rktp_rkt->rkt_topic), - rktp->rktp_partition); + /* Partition tags */ + rd_kafka_buf_write_tags_empty(rkbuf); - cnt++; - } + rd_rkb_dbg(rkb, FETCH, "SHAREFETCH", + "Share Fetch topic %.*s [%" PRId32 "]", + RD_KAFKAP_STR_PR(rktp->rktp_rkt->rkt_topic), + rktp->rktp_partition); + + cnt++; } rd_kafka_dbg(rkb->rkb_rk, FETCH, "SHAREFETCH", @@ -1433,11 +1605,14 @@ void rd_kafka_ShareFetchRequest( rd_kafka_buf_write_tags_empty(rkbuf); } - if(has_acknowledgements || has_forgotten_toppars || is_fetching_messages) { + /* Update TopicArrayCnt */ + rd_kafka_buf_finalize_arraycnt(rkbuf, of_TopicArrayCnt, TopicArrayCnt); + + if(has_acknowledgements || has_toppars_to_forget || is_fetching_messages) { rd_kafka_dbg(rkb->rkb_rk, FETCH, "SHAREFETCH", "Share Fetch Request sent with%s%s%s", has_acknowledgements ? " acknowledgements," : "", - has_forgotten_toppars ? " forgotten toppars," : "", + has_toppars_to_forget ? " forgotten toppars," : "", is_fetching_messages ? " fetching messages" : ""); } else { rd_kafka_buf_destroy(rkbuf); @@ -1448,14 +1623,67 @@ void rd_kafka_ShareFetchRequest( return; } - /* Update TopicArrayCnt */ - rd_kafka_buf_finalize_arraycnt(rkbuf, of_TopicArrayCnt, TopicArrayCnt); + if (has_toppars_to_forget) { + TopicArrayCnt = 0; + PartitionArrayCnt = 0; + rkt_last = NULL; + /* Write zero TopicArrayCnt but store pointer for later update */ + of_TopicArrayCnt = rd_kafka_buf_write_arraycnt_pos(rkbuf); + rd_kafka_dbg(rkb->rkb_rk, FETCH, "SHAREFETCH", + "Forgetting %d toppars", rd_list_cnt(toppars_to_forget)); + RD_LIST_FOREACH(rktp, toppars_to_forget, i) { + /* TODO KIP-932: This condition will cause partitions of same topics + to be inside single instance of the topic as toppars_to_send is not + sorted. Eg: T1 0, T1 1, T2 0, T1 3, T1 5, T2 1 will translate to + T1 (0,1), T2 (0), T1 (3, 5), T2 (1) instead it should be + T1 (0,1,3,5) T2(0,1) Fix this. */ + if (rkt_last != rktp->rktp_rkt) { + if (rkt_last != NULL) { + /* Update PartitionArrayCnt */ + rd_kafka_buf_finalize_arraycnt( + rkbuf, of_PartitionArrayCnt, + PartitionArrayCnt); + /* Topic tags */ + rd_kafka_buf_write_tags_empty(rkbuf); + } + + rd_kafka_topic_rdlock(rktp->rktp_rkt); + /* Topic ID */ + rd_kafka_buf_write_uuid( + rkbuf, &rktp->rktp_rkt->rkt_topic_id); + rd_kafka_topic_rdunlock(rktp->rktp_rkt); + + TopicArrayCnt++; + rkt_last = rktp->rktp_rkt; + /* Partition count */ + of_PartitionArrayCnt = + rd_kafka_buf_write_arraycnt_pos(rkbuf); + PartitionArrayCnt = 0; + } + + PartitionArrayCnt++; - /* ForgottenToppars */ - rd_kafka_buf_write_arraycnt(rkbuf, 0); + /* Partition */ + rd_kafka_buf_write_i32(rkbuf, rktp->rktp_partition); + + rd_rkb_dbg(rkb, FETCH, "SHAREFETCH", + "Forgetting Fetch partition %.*s [%" PRId32 "]", + RD_KAFKAP_STR_PR(rktp->rktp_rkt->rkt_topic), + rktp->rktp_partition); - if (forgotten_toppars_cnt > 0) { - /* TODO KIP-932: Implement forgotten toppars handling */ + } + if (rkt_last != NULL) { + /* Update last topic's PartitionArrayCnt */ + rd_kafka_buf_finalize_arraycnt(rkbuf, of_PartitionArrayCnt, + PartitionArrayCnt); + /* Topic tags */ + rd_kafka_buf_write_tags_empty(rkbuf); + } + /* Update TopicArrayCnt */ + rd_kafka_buf_finalize_arraycnt(rkbuf, of_TopicArrayCnt, TopicArrayCnt); + } else { + /* ForgottenToppars */ + rd_kafka_buf_write_arraycnt(rkbuf, 0); } /* Consider Fetch requests blocking if fetch.wait.max.ms >= 1s */ @@ -1480,13 +1708,36 @@ void rd_kafka_ShareFetchRequest( return; } +static rd_list_t *rd_kafka_broker_share_fetch_get_toppars_to_send(rd_kafka_broker_t *rkb) { + /* TODO KIP-932: Improve this allocation with Acknowledgement implementation */ + int adding_toppar_cnt = rkb->rkb_share_fetch_session.toppars_to_add ? rd_list_cnt(rkb->rkb_share_fetch_session.toppars_to_add) : 0; + int intial_toppars_to_send_cnt = rkb->rkb_toppar_cnt + adding_toppar_cnt; + rd_list_t *toppars_to_send = rd_list_new(intial_toppars_to_send_cnt, NULL); + rd_kafka_toppar_t *rktp; + int i; + + TAILQ_FOREACH(rktp, &rkb->rkb_toppars, rktp_rkblink) { + if (rktp->rktp_share_acknowledge.first_offset >= 0) { + rd_list_add(toppars_to_send, rktp); + } + } + + if(rkb->rkb_share_fetch_session.toppars_to_add) { + RD_LIST_FOREACH(rktp, rkb->rkb_share_fetch_session.toppars_to_add, i) { + rd_list_add(toppars_to_send, rktp); + } + } + + return toppars_to_send; +} + void rd_kafka_broker_share_fetch(rd_kafka_broker_t *rkb, rd_kafka_op_t *rko_orig, rd_ts_t now) { rd_kafka_cgrp_t *rkcg = rkb->rkb_rk->rk_cgrp; int32_t max_records = 0; /* TODO KIP-932: Check if needed while closing the consumer.*/ - rd_assert(rkb->rkb_rk->rk_cgrp); + rd_assert(rkb->rkb_rk->rk_cgrp); if(!rkcg->rkcg_member_id) { rd_kafka_dbg(rkb->rkb_rk, FETCH, "SHAREFETCH", @@ -1499,6 +1750,11 @@ void rd_kafka_broker_share_fetch(rd_kafka_broker_t *rkb, rd_kafka_op_t *rko_orig max_records = 500; } + if(rkb->rkb_share_fetch_session.toppars_to_add) + rkb->rkb_share_fetch_session.adding_toppars = rd_list_copy(rkb->rkb_share_fetch_session.toppars_to_add, rd_kafka_toppar_list_copy, NULL); + if(rkb->rkb_share_fetch_session.toppars_to_forget) + rkb->rkb_share_fetch_session.forgetting_toppars = rd_list_copy(rkb->rkb_share_fetch_session.toppars_to_forget, rd_kafka_toppar_list_copy, NULL); + rd_kafka_ShareFetchRequest( rkb, rkcg->rkcg_group_id, /* group_id */ @@ -1509,8 +1765,8 @@ void rd_kafka_broker_share_fetch(rd_kafka_broker_t *rkb, rd_kafka_op_t *rko_orig rkb->rkb_rk->rk_conf.fetch_max_bytes, max_records, 500, - NULL, /* forgotten toppars */ - 0, /* forgotten toppars cnt */ + rd_kafka_broker_share_fetch_get_toppars_to_send(rkb), /* toppars to send */ + rkb->rkb_share_fetch_session.toppars_to_forget, /* forgetting toppars */ rko_orig, /* rko */ now); } @@ -1634,6 +1890,11 @@ int rd_kafka_broker_fetch_toppars(rd_kafka_broker_t *rkb, rd_ts_t now) { /* Topic tags */ rd_kafka_buf_write_tags_empty(rkbuf); } + + /* TODO: This is not thread safe as topic can + be recreated in which case topic id is + updated from the main thread and we are + sending topic id from broker thread.*/ if (rd_kafka_buf_ApiVersion(rkbuf) > 12) { /* Topic id must be non-zero here */ rd_dassert(!RD_KAFKA_UUID_IS_ZERO( diff --git a/src/rdkafka_partition.c b/src/rdkafka_partition.c index f03f261c93..05af812b37 100644 --- a/src/rdkafka_partition.c +++ b/src/rdkafka_partition.c @@ -2597,6 +2597,46 @@ void rd_kafka_toppar_leader_unavailable(rd_kafka_toppar_t *rktp, rd_false /* don't force */); } +/** + * @locality any + */ +rd_bool_t rd_kafka_toppar_is_on_cgrp(rd_kafka_toppar_t *rktp, rd_bool_t do_lock) { + rd_bool_t on_cgrp; + if (do_lock) { + rd_kafka_toppar_lock(rktp); + } + on_cgrp = (rktp->rktp_flags & RD_KAFKA_TOPPAR_F_ON_CGRP) ? rd_true + : rd_false; + + if (do_lock) { + rd_kafka_toppar_unlock(rktp); + } + + return on_cgrp; +} + +/** + * @locality broker thread + */ +static rd_bool_t rd_kafka_toppar_share_are_acknowledgements_present(rd_kafka_toppar_t *rktp) { + return rktp->rktp_share_acknowledge.first_offset > -1 ? rd_true : rd_false; +} + +rd_bool_t rd_kafka_toppar_share_is_valid_to_send_for_fetch(rd_kafka_toppar_t *rktp) { + if (rd_kafka_toppar_share_are_acknowledgements_present(rktp)) { + return rd_true; + } + return rd_kafka_toppar_is_on_cgrp(rktp, rd_true /*do_lock*/); +} + + +/** + * @brief Toppar copier for rd_list_copy() + */ +void *rd_kafka_toppar_list_copy(const void *elem, void *opaque) { + return rd_kafka_toppar_keep((rd_kafka_toppar_t *)elem); +} + const char * rd_kafka_topic_partition_topic(const rd_kafka_topic_partition_t *rktpar) { diff --git a/src/rdkafka_partition.h b/src/rdkafka_partition.h index cd8ddbeadf..db9246709a 100644 --- a/src/rdkafka_partition.h +++ b/src/rdkafka_partition.h @@ -147,8 +147,6 @@ struct rd_kafka_toppar_s { /* rd_kafka_toppar_t */ * or txn_rktps */ TAILQ_ENTRY(rd_kafka_toppar_s) rktp_rkb_session_link; /* rkb_share_fetch_session * toppars_in_session link */ - TAILQ_ENTRY(rd_kafka_toppar_s) rktp_rkb_session_forgot_link; /* rkb_share_fetch_session - * toppars_to_forget link */ rd_kafka_topic_t *rktp_rkt; /**< This toppar's topic object */ int32_t rktp_partition; // LOCK: toppar_lock() + topic_wrlock() @@ -548,10 +546,20 @@ void rd_kafka_toppar_destroy_final(rd_kafka_toppar_t *rktp); #define rd_kafka_toppar_destroy(RKTP) \ do { \ rd_kafka_toppar_t *_RKTP = (RKTP); \ - if (unlikely(rd_refcnt_sub(&_RKTP->rktp_refcnt) == 0)) \ - rd_kafka_toppar_destroy_final(_RKTP); \ + rd_kafka_toppar_destroy0(_RKTP); \ } while (0) +/* Common destroy helper used by both the macro and the free-wrapper. */ +static RD_UNUSED RD_INLINE void rd_kafka_toppar_destroy0(rd_kafka_toppar_t *rktp) { + if (unlikely(rd_refcnt_sub(&rktp->rktp_refcnt) == 0)) + rd_kafka_toppar_destroy_final(rktp); +} + +/* Free-function compatible wrapper for rd_list_new and similar APIs + * (signature: void (*)(void *)). */ +static RD_UNUSED RD_INLINE void rd_kafka_toppar_destroy_free(void *ptr) { + rd_kafka_toppar_destroy0((rd_kafka_toppar_t *)ptr); +} #define rd_kafka_toppar_lock(rktp) mtx_lock(&(rktp)->rktp_lock) @@ -692,6 +700,10 @@ rd_kafka_toppars_pause_resume(rd_kafka_t *rk, int flag, rd_kafka_topic_partition_list_t *partitions); +rd_bool_t rd_kafka_toppar_is_on_cgrp(rd_kafka_toppar_t *rktp, rd_bool_t do_lock); +rd_bool_t rd_kafka_toppar_share_is_valid_to_send_for_fetch(rd_kafka_toppar_t *rktp); +void *rd_kafka_toppar_list_copy(const void *elem, void *opaque); + rd_kafka_topic_partition_t *rd_kafka_topic_partition_new(const char *topic, int32_t partition); From f0f85be09bf4f4353c51c672f6f5725323dab3d8 Mon Sep 17 00:00:00 2001 From: Pranav Rathi <4427674+pranavrth@users.noreply.github.com> Date: Tue, 18 Nov 2025 00:50:55 +0530 Subject: [PATCH 21/37] Deduplicated partition management. --- examples/consumer.c | 12 +++++----- src/rdkafka_broker.c | 54 +++++++++++++++++++++++++++++++++++-------- src/rdkafka_fetcher.c | 14 +++++++---- 3 files changed, 60 insertions(+), 20 deletions(-) diff --git a/examples/consumer.c b/examples/consumer.c index 70ac70a099..e56692abb2 100644 --- a/examples/consumer.c +++ b/examples/consumer.c @@ -170,12 +170,12 @@ int main(int argc, char **argv) { } - // if (rd_kafka_conf_set(conf, "debug", "all", errstr, sizeof(errstr)) != - // RD_KAFKA_CONF_OK) { - // fprintf(stderr, "%s\n", errstr); - // rd_kafka_conf_destroy(conf); - // return 1; - // } + if (rd_kafka_conf_set(conf, "debug", "all", errstr, sizeof(errstr)) != + RD_KAFKA_CONF_OK) { + fprintf(stderr, "%s\n", errstr); + rd_kafka_conf_destroy(conf); + return 1; + } /* * Create consumer instance. diff --git a/src/rdkafka_broker.c b/src/rdkafka_broker.c index 763b48de5b..00a7206091 100644 --- a/src/rdkafka_broker.c +++ b/src/rdkafka_broker.c @@ -3190,23 +3190,23 @@ static void rd_kafka_broker_share_session_add_remove_toppar(rd_list_t **toppars_ rd_list_t **toppars_remove_list, rd_kafka_toppar_t *rktp) { if (!*toppars_add_list) { - /** - * TODO KIP-932: Use a better destroy method. Right now - * manually destroying. - */ *toppars_add_list = rd_list_new(1, rd_kafka_toppar_destroy_free); } - rd_list_add(*toppars_add_list, rd_kafka_toppar_keep(rktp)); + if(!rd_list_find(*toppars_add_list, rktp, rd_list_cmp_ptr)) + rd_list_add(*toppars_add_list, rd_kafka_toppar_keep(rktp)); /* Remove from removing toppars if present there. */ if (*toppars_remove_list) { - rd_list_remove(*toppars_remove_list, rktp); - rd_kafka_toppar_destroy(rktp); - if(rd_list_empty(*toppars_remove_list)) { - rd_list_destroy(*toppars_remove_list); - *toppars_remove_list = NULL; + rd_kafka_toppar_t *removed_rktp = rd_list_remove(*toppars_remove_list, rktp); + if(removed_rktp) { + rd_kafka_toppar_destroy(removed_rktp); + if(rd_list_empty(*toppars_remove_list)) { + rd_list_destroy(*toppars_remove_list); + *toppars_remove_list = NULL; + } } + } } @@ -4933,6 +4933,40 @@ static int rd_kafka_broker_thread_main(void *arg) { (int)rd_kafka_bufq_cnt(&rkb->rkb_outbufs), (int)rd_kafka_bufq_cnt(&rkb->rkb_waitresps), (int)rd_kafka_bufq_cnt(&rkb->rkb_retrybufs), r); + + rd_rkb_dbg(rkb, BROKER, "TERMINATE", + "Partitions in fetch session: %d", + rkb->rkb_share_fetch_session.toppars_in_session_cnt); + if(rkb->rkb_share_fetch_session.toppars_to_add) + rd_rkb_dbg(rkb, BROKER, "TERMINATE", + "Partitions to add to fetch session: %d", + rd_list_cnt( + rkb->rkb_share_fetch_session.toppars_to_add)); + if(rkb->rkb_share_fetch_session.toppars_to_forget) { + rd_rkb_dbg(rkb, BROKER, "TERMINATE", + "Partitions to forget from fetch session: %d", + rd_list_cnt( + rkb->rkb_share_fetch_session.toppars_to_forget)); + rd_kafka_toppar_t *rktp; + int i; + RD_LIST_FOREACH(rktp, rkb->rkb_share_fetch_session.toppars_to_forget, i) { + rd_rkb_dbg(rkb, BROKER, "TERMINATE", + " - %.*s [%" PRId32 "]", + RD_KAFKAP_STR_PR(rktp->rktp_rkt->rkt_topic), + rktp->rktp_partition); + } + + } + if(rkb->rkb_share_fetch_session.adding_toppars) + rd_rkb_dbg(rkb, BROKER, "TERMINATE", + "Partitions being added to fetch session: %d", + rd_list_cnt( + rkb->rkb_share_fetch_session.adding_toppars)); + if(rkb->rkb_share_fetch_session.forgetting_toppars) + rd_rkb_dbg(rkb, BROKER, "TERMINATE", + "Partitions being forgotten from fetch session: %d", + rd_list_cnt( + rkb->rkb_share_fetch_session.forgetting_toppars)); } } diff --git a/src/rdkafka_fetcher.c b/src/rdkafka_fetcher.c index f00fdabdd5..bc48c9b114 100644 --- a/src/rdkafka_fetcher.c +++ b/src/rdkafka_fetcher.c @@ -1232,11 +1232,12 @@ static void rd_kafka_broker_session_update_toppars_in_session(rd_kafka_broker_t static void rd_kafka_broker_session_update_toppars_list( rd_kafka_broker_t *rkb, rd_list_t **request_toppars_ptr, - rd_list_t *toppars_to_remove, + rd_list_t **toppars_to_remove_ptr, rd_bool_t add) { size_t i; rd_kafka_toppar_t *rktp, *removed_rktp; rd_list_t *request_toppars = *request_toppars_ptr; + rd_list_t *toppars_to_remove = *toppars_to_remove_ptr; if (request_toppars == NULL || rd_list_cnt(request_toppars) == 0) return; @@ -1245,8 +1246,13 @@ static void rd_kafka_broker_session_update_toppars_list( rd_kafka_broker_session_update_toppars_in_session(rkb, rktp, add); if (toppars_to_remove) { removed_rktp = rd_list_remove(toppars_to_remove, rktp); - if (removed_rktp) + if (removed_rktp) { rd_kafka_toppar_destroy(removed_rktp); /* from partitions list */ + if(rd_list_empty(toppars_to_remove)) { + rd_list_destroy(toppars_to_remove); + *toppars_to_remove_ptr = NULL; + } + } } } rd_list_destroy(request_toppars); @@ -1257,14 +1263,14 @@ static void rd_kafka_broker_session_update_added_partitions( rd_kafka_broker_t *rkb) { rd_kafka_broker_session_update_toppars_list( rkb, &rkb->rkb_share_fetch_session.adding_toppars, - rkb->rkb_share_fetch_session.toppars_to_add, rd_true); + &rkb->rkb_share_fetch_session.toppars_to_add, rd_true); } static void rd_kafka_broker_session_update_removed_partitions( rd_kafka_broker_t *rkb) { rd_kafka_broker_session_update_toppars_list( rkb, &rkb->rkb_share_fetch_session.forgetting_toppars, - rkb->rkb_share_fetch_session.toppars_to_forget, rd_false); + &rkb->rkb_share_fetch_session.toppars_to_forget, rd_false); } static void rd_kafka_broker_session_update_partitions(rd_kafka_broker_t *rkb) { From 4fad8e5256fb1c9348fcd5b2c39f9fa67a3be1e6 Mon Sep 17 00:00:00 2001 From: Pranav Rathi <4427674+pranavrth@users.noreply.github.com> Date: Tue, 18 Nov 2025 12:24:55 +0530 Subject: [PATCH 22/37] toppars in session and memory fix related to freeing Error Message --- examples/consumer.c | 2 +- src/rdkafka_fetcher.c | 5 ++--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/examples/consumer.c b/examples/consumer.c index e56692abb2..7cef677594 100644 --- a/examples/consumer.c +++ b/examples/consumer.c @@ -279,7 +279,7 @@ int main(int argc, char **argv) { continue; } - if((int)rcvd_msgs < 100) { + if((int)rcvd_msgs < -1) { /* Proper message. */ printf("Message on %s [%" PRId32 "] at offset %" PRId64 " (leader epoch %" PRId32 "):\n", diff --git a/src/rdkafka_fetcher.c b/src/rdkafka_fetcher.c index bc48c9b114..78c656d49e 100644 --- a/src/rdkafka_fetcher.c +++ b/src/rdkafka_fetcher.c @@ -1040,12 +1040,11 @@ rd_kafka_share_fetch_reply_handle(rd_kafka_broker_t *rkb, rd_kafka_buf_read_i16(rkbuf, &ErrorCode); rd_kafka_buf_read_str(rkbuf, &ErrorStr); - if(ErrorCode) { + if (ErrorCode) { rd_rkb_log(rkb, LOG_ERR, "SHAREFETCH", "ShareFetch response error %d: '%.*s'", ErrorCode, RD_KAFKAP_STR_PR(&ErrorStr)); - rd_kafkap_str_destroy(&ErrorStr); return ErrorCode; } @@ -1722,7 +1721,7 @@ static rd_list_t *rd_kafka_broker_share_fetch_get_toppars_to_send(rd_kafka_broke rd_kafka_toppar_t *rktp; int i; - TAILQ_FOREACH(rktp, &rkb->rkb_toppars, rktp_rkblink) { + TAILQ_FOREACH(rktp, &rkb->rkb_share_fetch_session.toppars_in_session, rktp_rkblink) { if (rktp->rktp_share_acknowledge.first_offset >= 0) { rd_list_add(toppars_to_send, rktp); } From 06a483254029481803bdf586810aea3294e39067 Mon Sep 17 00:00:00 2001 From: Pranav Rathi <4427674+pranavrth@users.noreply.github.com> Date: Thu, 20 Nov 2025 13:15:20 +0530 Subject: [PATCH 23/37] Fix refcount issue with session partition management --- src/rdkafka_fetcher.c | 113 ++++++++++++++++++++++++++++++++++++++---- 1 file changed, 104 insertions(+), 9 deletions(-) diff --git a/src/rdkafka_fetcher.c b/src/rdkafka_fetcher.c index 78c656d49e..6193836ff7 100644 --- a/src/rdkafka_fetcher.c +++ b/src/rdkafka_fetcher.c @@ -967,11 +967,11 @@ static rd_kafka_resp_err_t rd_kafka_share_fetch_reply_handle_partition( rd_kafka_buf_check_len(rkbuf, MessageSetSize); /* Parse messages - TODO KIP-932: This part might raise issue as we are adding - messages to the consumer queue partition by - partition. The fetch returns messages as soon - as they are available, so messages for different - partitions might not be sent at once to the user. + TODO KIP-932: This part might raise issue as We are adding messages + to the consumer queue in partition by partition manner. + The poll returns messages as soon as they are available in the queue, + so messages for different partitions in the same fetch request might + not be sent at once to the user. */ err = rd_kafka_msgset_parse(rkbuf, request, rktp, NULL, &tver); @@ -1135,7 +1135,7 @@ static void rd_kafka_broker_session_update_epoch(rd_kafka_broker_t *rkb) { } static void rd_kafka_broker_session_add_partition_to_toppars_in_session(rd_kafka_broker_t *rkb, rd_kafka_toppar_t *rktp) { - rd_kafka_toppar_t *session_rktp; + rd_kafka_toppar_t *session_rktp, *adding_rktp; TAILQ_FOREACH(session_rktp, &rkb->rkb_share_fetch_session.toppars_in_session, rktp_rkb_session_link) { if(rktp == session_rktp) { rd_kafka_dbg(rkb->rkb_rk, MSG, "SHAREFETCH", @@ -1151,7 +1151,8 @@ static void rd_kafka_broker_session_add_partition_to_toppars_in_session(rd_kafka "]: adding to ShareFetch session", rktp->rktp_rkt->rkt_topic->str, rktp->rktp_partition); - TAILQ_INSERT_TAIL(&rkb->rkb_share_fetch_session.toppars_in_session, rd_kafka_toppar_keep(rktp), rktp_rkb_session_link); + adding_rktp = rd_kafka_toppar_keep(rktp); + TAILQ_INSERT_TAIL(&rkb->rkb_share_fetch_session.toppars_in_session, adding_rktp, rktp_rkb_session_link); rkb->rkb_share_fetch_session.toppars_in_session_cnt++; } @@ -1444,6 +1445,7 @@ void rd_kafka_ShareFetchRequest( int32_t batch_size, rd_list_t *toppars_to_send, rd_list_t *toppars_to_forget, + rd_bool_t is_leave_request, rd_kafka_op_t *rko_orig, rd_ts_t now) { rd_kafka_toppar_t *rktp; @@ -1456,6 +1458,7 @@ void rd_kafka_ShareFetchRequest( rd_kafka_topic_t *rkt_last = NULL; int16_t ApiVersion = 0; size_t rkbuf_size = 0; + int toppars_to_send_cnt = toppars_to_send ? rd_list_cnt(toppars_to_send) : 0; int i; rd_bool_t has_acknowledgements = toppars_to_send && rd_list_cnt(toppars_to_send) > 0 ? rd_true : rd_false; rd_bool_t has_toppars_to_forget = toppars_to_forget && rd_list_cnt(toppars_to_forget) > 0 ? rd_true : rd_false; @@ -1475,7 +1478,7 @@ void rd_kafka_ShareFetchRequest( /* ShareSessionEpoch + WaitMaxMs + MinBytes + MaxBytes + MaxRecords + BatchSize + TopicArrayCnt*/ rkbuf_size += 4 + 4 + 4 + 4 + 4 + 4 + 4; /* N x (topic id + partition id + acknowledgement) */ - rkbuf_size += (rkb->rkb_toppar_cnt * (32 + 4 + acknowledgement_size)); + rkbuf_size += (toppars_to_send_cnt * (32 + 4 + acknowledgement_size)); if( has_toppars_to_forget) { /* M x (topic id + partition id) */ rkbuf_size += (rd_list_cnt(toppars_to_forget) * (32 + 4)); @@ -1613,7 +1616,11 @@ void rd_kafka_ShareFetchRequest( /* Update TopicArrayCnt */ rd_kafka_buf_finalize_arraycnt(rkbuf, of_TopicArrayCnt, TopicArrayCnt); - if(has_acknowledgements || has_toppars_to_forget || is_fetching_messages) { + if(toppars_to_send) { + rd_list_destroy(toppars_to_send); + } + + if(is_leave_request || has_acknowledgements || has_toppars_to_forget || is_fetching_messages) { rd_kafka_dbg(rkb->rkb_rk, FETCH, "SHAREFETCH", "Share Fetch Request sent with%s%s%s", has_acknowledgements ? " acknowledgements," : "", @@ -1713,6 +1720,18 @@ void rd_kafka_ShareFetchRequest( return; } +static rd_list_t *rd_kafka_broker_share_fetch_get_toppars_to_send_on_leave(rd_kafka_broker_t *rkb) { + /* TODO KIP-932: Implement this properly. Remaining acknowledgements should be sent */ + + // TAILQ_FOREACH(rktp, &rkb->rkb_share_fetch_session.toppars_in_session, rktp_rkblink) { + // if (rktp->rktp_share_acknowledge.first_offset >= 0) { + // rd_list_add(toppars_to_send, rktp); + // } + // } + + return rd_list_new(0, NULL); +} + static rd_list_t *rd_kafka_broker_share_fetch_get_toppars_to_send(rd_kafka_broker_t *rkb) { /* TODO KIP-932: Improve this allocation with Acknowledgement implementation */ int adding_toppar_cnt = rkb->rkb_share_fetch_session.toppars_to_add ? rd_list_cnt(rkb->rkb_share_fetch_session.toppars_to_add) : 0; @@ -1736,6 +1755,81 @@ static rd_list_t *rd_kafka_broker_share_fetch_get_toppars_to_send(rd_kafka_broke return toppars_to_send; } +void rd_kafka_broker_share_fetch_session_clear(rd_kafka_broker_t *rkb) { + rd_kafka_toppar_t *rktp, *tmp_rktp; + + rkb->rkb_share_fetch_session.epoch = -1; + + /* Clear toppars in session */ + TAILQ_FOREACH_SAFE(rktp, &rkb->rkb_share_fetch_session.toppars_in_session, rktp_rkb_session_link, tmp_rktp) { + TAILQ_REMOVE(&rkb->rkb_share_fetch_session.toppars_in_session, rktp, rktp_rkb_session_link); + rd_kafka_toppar_destroy(rktp); // from session list + rd_rkb_dbg(rkb, BROKER, "SHAREFETCH", + "%s [%" PRId32 + "]: removed from ShareFetch session on clear", + rktp->rktp_rkt->rkt_topic->str, + rktp->rktp_partition); + } + rkb->rkb_share_fetch_session.toppars_in_session_cnt = 0; + + /* Clear toppars to add */ + if(rkb->rkb_share_fetch_session.toppars_to_add) { + rd_rkb_dbg(rkb, BROKER, "SHAREFETCH", + "Clearing %d toppars to add from ShareFetch session on clear", + rd_list_cnt(rkb->rkb_share_fetch_session.toppars_to_add)); + rd_list_destroy(rkb->rkb_share_fetch_session.toppars_to_add); + rkb->rkb_share_fetch_session.toppars_to_add = NULL; + } + + /* Clear toppars to forget */ + if(rkb->rkb_share_fetch_session.toppars_to_forget) { + rd_rkb_dbg(rkb, BROKER, "SHAREFETCH", + "Clearing %d toppars to forget from ShareFetch session on clear", + rd_list_cnt(rkb->rkb_share_fetch_session.toppars_to_forget)); + rd_list_destroy(rkb->rkb_share_fetch_session.toppars_to_forget); + rkb->rkb_share_fetch_session.toppars_to_forget = NULL; + } + + /* Clear adding toppars */ + if(rkb->rkb_share_fetch_session.adding_toppars) { + rd_rkb_dbg(rkb, BROKER, "SHAREFETCH", + "Clearing %d adding toppars from ShareFetch session on clear", + rd_list_cnt(rkb->rkb_share_fetch_session.adding_toppars)); + rd_list_destroy(rkb->rkb_share_fetch_session.adding_toppars); + rkb->rkb_share_fetch_session.adding_toppars = NULL; + } + + /* Clear forgetting toppars */ + if(rkb->rkb_share_fetch_session.forgetting_toppars) { + rd_rkb_dbg(rkb, BROKER, "SHAREFETCH", + "Clearing %d forgetting toppars from ShareFetch session on clear", + rd_list_cnt(rkb->rkb_share_fetch_session.forgetting_toppars)); + rd_list_destroy(rkb->rkb_share_fetch_session.forgetting_toppars); + rkb->rkb_share_fetch_session.forgetting_toppars = NULL; + } +} + +void rd_kafka_broker_share_fetch_leave(rd_kafka_broker_t *rkb, rd_kafka_op_t *rko_orig, rd_ts_t now) { + rd_kafka_cgrp_t *rkcg = rkb->rkb_rk->rk_cgrp; + rd_assert(rkb->rkb_rk->rk_cgrp); + rd_kafka_broker_share_fetch_session_clear(rkb); + rd_kafka_ShareFetchRequest( + rkb, + rkcg->rkcg_group_id, /* group_id */ + rkcg->rkcg_member_id, /* member_id */ + rkb->rkb_share_fetch_session.epoch, /* share_session_epoch */ + rkb->rkb_rk->rk_conf.fetch_wait_max_ms, + rkb->rkb_rk->rk_conf.fetch_min_bytes, + rkb->rkb_rk->rk_conf.fetch_max_bytes, + 0, + 0, + rd_kafka_broker_share_fetch_get_toppars_to_send_on_leave(rkb), /* toppars to send */ + NULL, /* forgetting toppars */ + rd_true, /* leave request */ + rko_orig, /* rko */ + now); +} + void rd_kafka_broker_share_fetch(rd_kafka_broker_t *rkb, rd_kafka_op_t *rko_orig, rd_ts_t now) { rd_kafka_cgrp_t *rkcg = rkb->rkb_rk->rk_cgrp; @@ -1772,6 +1866,7 @@ void rd_kafka_broker_share_fetch(rd_kafka_broker_t *rkb, rd_kafka_op_t *rko_orig 500, rd_kafka_broker_share_fetch_get_toppars_to_send(rkb), /* toppars to send */ rkb->rkb_share_fetch_session.toppars_to_forget, /* forgetting toppars */ + rd_false, /* not leave request */ rko_orig, /* rko */ now); } From 6775e7deb5e156bf73a1c8fe668f723924f19f95 Mon Sep 17 00:00:00 2001 From: Pranav Rathi <4427674+pranavrth@users.noreply.github.com> Date: Thu, 20 Nov 2025 13:18:21 +0530 Subject: [PATCH 24/37] Improve ref counting and ref counting for toppars --- src/rd.h | 7 +++++-- src/rdkafka_partition.h | 8 ++++---- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/src/rd.h b/src/rd.h index 300a7b030c..aa2f4f483b 100644 --- a/src/rd.h +++ b/src/rd.h @@ -406,14 +406,17 @@ static RD_INLINE RD_UNUSED int rd_refcnt_get(rd_refcnt_t *R) { rd_refcnt_get(R), (R), WHAT, __FUNCTION__, __LINE__), \ rd_refcnt_sub0(R)) -#define rd_refcnt_sub(R) \ +#define rd_refcnt_sub_fl(FUNC, LINE, R) \ (fprintf(stderr, "REFCNT DEBUG: %-35s %d -1: %16p: %s:%d\n", #R, \ - rd_refcnt_get(R), (R), __FUNCTION__, __LINE__), \ + rd_refcnt_get(R), (R), (FUNC), (LINE)), \ rd_refcnt_sub0(R)) +#define rd_refcnt_sub(R) rd_refcnt_sub_fl(__FUNCTION__, __LINE__, R) + #else #define rd_refcnt_add_fl(FUNC, LINE, R) rd_refcnt_add0(R) #define rd_refcnt_add(R) rd_refcnt_add0(R) +#define rd_refcnt_sub_fl(FUNC, LINE, R) rd_refcnt_sub0(R) #define rd_refcnt_sub(R) rd_refcnt_sub0(R) #endif diff --git a/src/rdkafka_partition.h b/src/rdkafka_partition.h index db9246709a..c6fbadb7b0 100644 --- a/src/rdkafka_partition.h +++ b/src/rdkafka_partition.h @@ -546,19 +546,19 @@ void rd_kafka_toppar_destroy_final(rd_kafka_toppar_t *rktp); #define rd_kafka_toppar_destroy(RKTP) \ do { \ rd_kafka_toppar_t *_RKTP = (RKTP); \ - rd_kafka_toppar_destroy0(_RKTP); \ + rd_kafka_toppar_destroy0(__FUNCTION__, __LINE__, _RKTP); \ } while (0) /* Common destroy helper used by both the macro and the free-wrapper. */ -static RD_UNUSED RD_INLINE void rd_kafka_toppar_destroy0(rd_kafka_toppar_t *rktp) { - if (unlikely(rd_refcnt_sub(&rktp->rktp_refcnt) == 0)) +static RD_UNUSED RD_INLINE void rd_kafka_toppar_destroy0(const char *func, int line, rd_kafka_toppar_t *rktp) { + if (unlikely(rd_refcnt_sub_fl(func, line, &rktp->rktp_refcnt) == 0)) rd_kafka_toppar_destroy_final(rktp); } /* Free-function compatible wrapper for rd_list_new and similar APIs * (signature: void (*)(void *)). */ static RD_UNUSED RD_INLINE void rd_kafka_toppar_destroy_free(void *ptr) { - rd_kafka_toppar_destroy0((rd_kafka_toppar_t *)ptr); + rd_kafka_toppar_destroy0(__FUNCTION__, __LINE__, (rd_kafka_toppar_t *)ptr); } From 6f4837d561601eff6c08d84a92762210faa8bbc2 Mon Sep 17 00:00:00 2001 From: Pranav Rathi <4427674+pranavrth@users.noreply.github.com> Date: Thu, 20 Nov 2025 13:19:16 +0530 Subject: [PATCH 25/37] Add session leave. Not working properly. Improve this. --- src/rdkafka.c | 9 +++++++-- src/rdkafka_broker.c | 32 +++++++++++++++++++++++++++++--- src/rdkafka_fetcher.h | 1 + src/rdkafka_op.h | 6 ++++++ 4 files changed, 43 insertions(+), 5 deletions(-) diff --git a/src/rdkafka.c b/src/rdkafka.c index e8d7259838..d69b749541 100644 --- a/src/rdkafka.c +++ b/src/rdkafka.c @@ -2991,7 +2991,7 @@ static void rd_kafka_share_fetch_fanout_with_backoff(rd_kafka_t *rk, */ rd_kafka_op_res_t rd_kafka_share_fetch_reply_op(rd_kafka_t *rk, rd_kafka_op_t *rko_orig) { - rd_kafka_resp_err_t err; + rd_kafka_resp_err_t err = RD_KAFKA_RESP_ERR_NO_ERROR; rd_kafka_assert(rk, thrd_is_current(rk->rk_thread)); rd_kafka_dbg(rk, CGRP, "SHAREFETCH", @@ -3030,6 +3030,7 @@ rd_kafka_op_res_t rd_kafka_share_fetch_reply_op(rd_kafka_t *rk, case RD_KAFKA_RESP_ERR__PREV_IN_PROGRESS: /* This should not happen. */ case RD_KAFKA_RESP_ERR__STATE: case RD_KAFKA_RESP_ERR__AUTHENTICATION: + case RD_KAFKA_RESP_ERR_NO_ERROR: case RD_KAFKA_RESP_ERR_GROUP_AUTHORIZATION_FAILED: /* Do we need more handling for fatal errors? */ @@ -3163,6 +3164,7 @@ rd_kafka_op_res_t rd_kafka_share_fetch_fanout_op(rd_kafka_t *rk, rkb->rkb_share_fetch_enqueued = rd_true; rko_sf = rd_kafka_op_new(RD_KAFKA_OP_SHARE_FETCH); + rko_sf->rko_u.share_fetch.should_leave = rd_false; rko_sf->rko_u.share_fetch.abs_timeout = abs_timeout; rko_sf->rko_u.share_fetch.should_fetch = (rkb == selected_rkb); rd_kafka_broker_keep(rkb); @@ -3171,8 +3173,11 @@ rd_kafka_op_res_t rd_kafka_share_fetch_fanout_op(rd_kafka_t *rk, rd_kafka_dbg(rk, CGRP, "SHAREFETCH", "Enqueuing share fetch op on broker %s " - "(%s fetch)", + "(%s leave), (%s fetch)", rd_kafka_broker_name(rkb), + rko_sf->rko_u.share_fetch.should_leave + ? "should" + : "should not", rko_sf->rko_u.share_fetch.should_fetch ? "should" : "should not"); diff --git a/src/rdkafka_broker.c b/src/rdkafka_broker.c index 00a7206091..c8b4522631 100644 --- a/src/rdkafka_broker.c +++ b/src/rdkafka_broker.c @@ -3552,11 +3552,12 @@ rd_kafka_broker_op_serve(rd_kafka_broker_t *rkb, rd_kafka_op_t *rko) { break; case RD_KAFKA_OP_SHARE_FETCH: - rd_rkb_dbg(rkb, CGRP, "SHAREFETCH", + rd_rkb_dbg(rkb, BROKER, "SHAREFETCH", "Received SHARE_FETCH op for broker %s with " - "should_fetch = %d", + "should_fetch = %d, should_leave = %d", rd_kafka_broker_name(rkb), - rko->rko_u.share_fetch.should_fetch); + rko->rko_u.share_fetch.should_fetch, + rko->rko_u.share_fetch.should_leave); /* This is only temporary handling for testing to avoid crashing * on assert - the code below will automatically enqueue a * reply which is not the final behaviour. */ @@ -3572,6 +3573,15 @@ rd_kafka_broker_op_serve(rd_kafka_broker_t *rkb, rd_kafka_op_t *rko) { // rko = NULL; // } + if(rko->rko_u.share_fetch.should_leave) { + rd_kafka_dbg(rkb->rkb_rk, BROKER, "SHAREFETCH", + "Processing SHARE_FETCH op: " + "should_leave is true"); + rd_kafka_broker_share_fetch_leave(rkb, rko, rd_clock()); + rko = NULL; /* the rko is reused for the reply */ + break; + } + if (rd_kafka_broker_or_instance_terminating(rkb)) { rd_kafka_dbg(rkb->rkb_rk, BROKER, "SHAREFETCH", "Ignoring SHARE_FETCH op: " @@ -6454,6 +6464,22 @@ void rd_kafka_broker_decommission(rd_kafka_t *rk, if (rd_atomic32_get(&rkb->termination_in_progress) > 0) return; + if(RD_KAFKA_IS_SHARE_CONSUMER(rk) && rkb->rkb_source == RD_KAFKA_LEARNED) { + rd_kafka_op_t *rko_sf; + rko_sf = rd_kafka_op_new(RD_KAFKA_OP_SHARE_FETCH); + rko_sf->rko_u.share_fetch.should_leave = rd_true; + rko_sf->rko_u.share_fetch.abs_timeout = 0; // TODO KIP-932: Check timeout part. + rko_sf->rko_u.share_fetch.should_fetch = rd_false; + rd_kafka_broker_keep(rkb); + rko_sf->rko_u.share_fetch.target_broker = rkb; + rko_sf->rko_replyq = RD_KAFKA_REPLYQ(rk->rk_ops, 0); + + rd_kafka_dbg(rk, BROKER, "SHAREFETCH", + "Enqueuing leave share fetch op on broker %s: decommissioning broker.", + rd_kafka_broker_name(rkb)); + rd_kafka_q_enq(rkb->rkb_ops, rko_sf); + } + rd_atomic32_add(&rkb->termination_in_progress, 1); /* Add broker's thread to wait_thrds list for later joining */ diff --git a/src/rdkafka_fetcher.h b/src/rdkafka_fetcher.h index c3b6b65c34..ee230d204a 100644 --- a/src/rdkafka_fetcher.h +++ b/src/rdkafka_fetcher.h @@ -40,6 +40,7 @@ rd_ts_t rd_kafka_toppar_fetch_decide(rd_kafka_toppar_t *rktp, rd_kafka_broker_t *rkb, int force_remove); +void rd_kafka_broker_share_fetch_leave(rd_kafka_broker_t *rkb, rd_kafka_op_t *rko_orig, rd_ts_t now); void rd_kafka_broker_share_fetch(rd_kafka_broker_t *rkb, rd_kafka_op_t *rko_orig, rd_ts_t now); diff --git a/src/rdkafka_op.h b/src/rdkafka_op.h index 9e6fd70d2e..ef0e4c4d73 100644 --- a/src/rdkafka_op.h +++ b/src/rdkafka_op.h @@ -732,11 +732,17 @@ struct rd_kafka_op_s { } terminated; struct { + + rd_bool_t should_leave; /**< Whether this broker should + * leave the share-fetch + * session. */ + /** Whether this broker should share-fetch nonzero * messages. */ rd_bool_t should_fetch; /** Absolute timeout left to complete this share-fetch. + * TODO KIP-932: Use timeout properly. */ rd_ts_t abs_timeout; From 04500ea9f28817018115e2fbe2ee9aee86ddf3de Mon Sep 17 00:00:00 2001 From: Pranav Rathi <4427674+pranavrth@users.noreply.github.com> Date: Thu, 20 Nov 2025 15:38:41 +0530 Subject: [PATCH 26/37] Improve acknowledgement to include all batches instead of just the last batch --- examples/consumer.c | 20 +++++------ src/rdkafka_fetcher.c | 66 +++++++++++++++++++++++-------------- src/rdkafka_msgset_reader.c | 2 +- src/rdkafka_partition.c | 8 +++-- src/rdkafka_partition.h | 6 ++-- src/rdkafka_request.c | 4 +-- 6 files changed, 63 insertions(+), 43 deletions(-) diff --git a/examples/consumer.c b/examples/consumer.c index 7cef677594..4652317780 100644 --- a/examples/consumer.c +++ b/examples/consumer.c @@ -170,12 +170,12 @@ int main(int argc, char **argv) { } - if (rd_kafka_conf_set(conf, "debug", "all", errstr, sizeof(errstr)) != - RD_KAFKA_CONF_OK) { - fprintf(stderr, "%s\n", errstr); - rd_kafka_conf_destroy(conf); - return 1; - } + // if (rd_kafka_conf_set(conf, "debug", "all", errstr, sizeof(errstr)) != + // RD_KAFKA_CONF_OK) { + // fprintf(stderr, "%s\n", errstr); + // rd_kafka_conf_destroy(conf); + // return 1; + // } /* * Create consumer instance. @@ -259,7 +259,7 @@ int main(int argc, char **argv) { perror("clock_gettime"); double __elapsed_ms = (__t1.tv_sec - __t0.tv_sec) * 1000.0 + (__t1.tv_nsec - __t0.tv_nsec) / 1e6; - fprintf(stdout, "%% rd_kafka_share_consume_batch() took %.3f ms\n", __elapsed_ms); + // fprintf(stdout, "%% rd_kafka_share_consume_batch() took %.3f ms\n", __elapsed_ms); if (error) { fprintf(stderr, "%% Consume error: %s\n", @@ -268,7 +268,7 @@ int main(int argc, char **argv) { continue; } - fprintf(stderr, "%% Received %zu messages\n", rcvd_msgs); + // fprintf(stderr, "%% Received %zu messages\n", rcvd_msgs); for (i = 0; i < (int)rcvd_msgs; i++) { rkm = rkmessages[i]; @@ -279,7 +279,7 @@ int main(int argc, char **argv) { continue; } - if((int)rcvd_msgs < -1) { + // if((int)rcvd_msgs < -1) { /* Proper message. */ printf("Message on %s [%" PRId32 "] at offset %" PRId64 " (leader epoch %" PRId32 "):\n", @@ -300,7 +300,7 @@ int main(int argc, char **argv) { (const char *)rkm->payload); else if (rkm->payload) printf(" Value: (%d bytes)\n", (int)rkm->len); - } + // } rd_kafka_message_destroy(rkm); } diff --git a/src/rdkafka_fetcher.c b/src/rdkafka_fetcher.c index 6193836ff7..95ae11de6e 100644 --- a/src/rdkafka_fetcher.c +++ b/src/rdkafka_fetcher.c @@ -911,7 +911,7 @@ static rd_kafka_resp_err_t rd_kafka_share_fetch_reply_handle_partition( int64_t FirstOffset; int64_t LastOffset; int16_t DeliveryCount; - + int i; rd_kafka_buf_read_i32(rkbuf, &PartitionId); // Partition rd_kafka_buf_read_i16(rkbuf, &PartitionFetchErrorCode); // PartitionFetchError @@ -981,7 +981,12 @@ static rd_kafka_resp_err_t rd_kafka_share_fetch_reply_handle_partition( * parse errors (which are partition-specific) */ rd_kafka_buf_read_arraycnt(rkbuf, &AcquiredRecordsArrayCnt, -1); // AcquiredRecordsArrayCnt - while(AcquiredRecordsArrayCnt-- > 0) { + rd_dassert(rktp->rktp_share_acknowledge_count >= 0); + rd_dassert(rktp->rktp_share_acknowledge == NULL); + rktp->rktp_share_acknowledge_count = AcquiredRecordsArrayCnt; + rktp->rktp_share_acknowledge = rd_calloc(AcquiredRecordsArrayCnt, + sizeof(*rktp->rktp_share_acknowledge)); + for (i = 0; i < AcquiredRecordsArrayCnt; i++) { rd_kafka_buf_read_i64(rkbuf, &FirstOffset); // FirstOffset rd_kafka_buf_read_i64(rkbuf, &LastOffset); // LastOffset rd_kafka_buf_read_i16(rkbuf, &DeliveryCount); // DeliveryCount @@ -991,9 +996,9 @@ static rd_kafka_resp_err_t rd_kafka_share_fetch_reply_handle_partition( " to %" PRId64 ", DeliveryCount %" PRId16, RD_KAFKAP_STR_PR(topic), PartitionId, FirstOffset, LastOffset, DeliveryCount); - rktp->rktp_share_acknowledge.first_offset = FirstOffset; - rktp->rktp_share_acknowledge.last_offset = LastOffset; - rktp->rktp_share_acknowledge.delivery_count = DeliveryCount; + rktp->rktp_share_acknowledge[i].first_offset = FirstOffset; + rktp->rktp_share_acknowledge[i].last_offset = LastOffset; + rktp->rktp_share_acknowledge[i].delivery_count = DeliveryCount; } rd_kafka_buf_skip_tags(rkbuf); // Partition tags @@ -1460,6 +1465,7 @@ void rd_kafka_ShareFetchRequest( size_t rkbuf_size = 0; int toppars_to_send_cnt = toppars_to_send ? rd_list_cnt(toppars_to_send) : 0; int i; + size_t j; rd_bool_t has_acknowledgements = toppars_to_send && rd_list_cnt(toppars_to_send) > 0 ? rd_true : rd_false; rd_bool_t has_toppars_to_forget = toppars_to_forget && rd_list_cnt(toppars_to_forget) > 0 ? rd_true : rd_false; rd_bool_t is_fetching_messages = max_records > 0 ? rd_true : rd_false; @@ -1507,8 +1513,8 @@ void rd_kafka_ShareFetchRequest( /* MemberId */ rd_kafka_buf_write_kstr(rkbuf, member_id); - printf(" --------------------------------------- rd_kafka_ShareFetchRequest: member_id=%.*s\n", - RD_KAFKAP_STR_PR(member_id)); + // printf(" --------------------------------------- rd_kafka_ShareFetchRequest: member_id=%.*s\n", + // RD_KAFKAP_STR_PR(member_id)); /* ShareSessionEpoch */ rd_kafka_buf_write_i32(rkbuf, share_session_epoch); @@ -1567,24 +1573,29 @@ void rd_kafka_ShareFetchRequest( /* Partition */ rd_kafka_buf_write_i32(rkbuf, rktp->rktp_partition); - printf(" ------------------------------------------------------------------ AcknowledgementBatches for topic %.*s [%" PRId32 "] : first_offset=%" PRId64 ", last_offset=%" PRId64 "\n", - RD_KAFKAP_STR_PR(rktp->rktp_rkt->rkt_topic), - rktp->rktp_partition, - rktp->rktp_share_acknowledge.first_offset, - rktp->rktp_share_acknowledge.last_offset); + // printf(" ------------------------------------------------------------------ AcknowledgementBatches for topic %.*s [%" PRId32 "] : first_offset=%" PRId64 ", last_offset=%" PRId64 "\n", + // RD_KAFKAP_STR_PR(rktp->rktp_rkt->rkt_topic), + // rktp->rktp_partition, + // rktp->rktp_share_acknowledge.first_offset, + // rktp->rktp_share_acknowledge.last_offset); /* AcknowledgementBatches */ - if (rktp->rktp_share_acknowledge.first_offset >= 0) { + if (rktp->rktp_share_acknowledge_count > 0) { /* For now we only support ACCEPT */ - rd_kafka_buf_write_arraycnt(rkbuf, 1); /* ArrayCnt = 1 */ - /* FirstOffset */ - rd_kafka_buf_write_i64(rkbuf, rktp->rktp_share_acknowledge.first_offset); - /* LastOffset */ - rd_kafka_buf_write_i64(rkbuf, rktp->rktp_share_acknowledge.last_offset); - /* AcknowledgementType */ - rd_kafka_buf_write_arraycnt(rkbuf, 1); /* ArrayCnt = 1 */ - rd_kafka_buf_write_i8(rkbuf, 1); /* ACCEPT */ - /* Acknowledgement tags */ - rd_kafka_buf_write_tags_empty(rkbuf); + rd_kafka_buf_write_arraycnt(rkbuf, rktp->rktp_share_acknowledge_count); /* ArrayCnt = 1 */ + for(j = 0; j < rktp->rktp_share_acknowledge_count; j++) { + /* FirstOffset */ + rd_kafka_buf_write_i64(rkbuf, rktp->rktp_share_acknowledge[j].first_offset); + /* LastOffset */ + rd_kafka_buf_write_i64(rkbuf, rktp->rktp_share_acknowledge[j].last_offset); + /* AcknowledgementType */ + rd_kafka_buf_write_arraycnt(rkbuf, 1); /* ArrayCnt = 1 */ + rd_kafka_buf_write_i8(rkbuf, 1); /* ACCEPT */ + /* Acknowledgement tags */ + rd_kafka_buf_write_tags_empty(rkbuf); + } + rktp->rktp_share_acknowledge_count = 0; + rd_free(rktp->rktp_share_acknowledge); + rktp->rktp_share_acknowledge = NULL; } else { /* No acknowledgements */ rd_kafka_buf_write_arraycnt(rkbuf, 0); @@ -1741,7 +1752,7 @@ static rd_list_t *rd_kafka_broker_share_fetch_get_toppars_to_send(rd_kafka_broke int i; TAILQ_FOREACH(rktp, &rkb->rkb_share_fetch_session.toppars_in_session, rktp_rkblink) { - if (rktp->rktp_share_acknowledge.first_offset >= 0) { + if (rktp->rktp_share_acknowledge_count >= 0) { rd_list_add(toppars_to_send, rktp); } } @@ -1763,6 +1774,11 @@ void rd_kafka_broker_share_fetch_session_clear(rd_kafka_broker_t *rkb) { /* Clear toppars in session */ TAILQ_FOREACH_SAFE(rktp, &rkb->rkb_share_fetch_session.toppars_in_session, rktp_rkb_session_link, tmp_rktp) { TAILQ_REMOVE(&rkb->rkb_share_fetch_session.toppars_in_session, rktp, rktp_rkb_session_link); + if(rktp->rktp_share_acknowledge) { + rd_free(rktp->rktp_share_acknowledge); + rktp->rktp_share_acknowledge = NULL; + rktp->rktp_share_acknowledge_count = 0; + } rd_kafka_toppar_destroy(rktp); // from session list rd_rkb_dbg(rkb, BROKER, "SHAREFETCH", "%s [%" PRId32 @@ -1812,7 +1828,6 @@ void rd_kafka_broker_share_fetch_session_clear(rd_kafka_broker_t *rkb) { void rd_kafka_broker_share_fetch_leave(rd_kafka_broker_t *rkb, rd_kafka_op_t *rko_orig, rd_ts_t now) { rd_kafka_cgrp_t *rkcg = rkb->rkb_rk->rk_cgrp; rd_assert(rkb->rkb_rk->rk_cgrp); - rd_kafka_broker_share_fetch_session_clear(rkb); rd_kafka_ShareFetchRequest( rkb, rkcg->rkcg_group_id, /* group_id */ @@ -1828,6 +1843,7 @@ void rd_kafka_broker_share_fetch_leave(rd_kafka_broker_t *rkb, rd_kafka_op_t *rk rd_true, /* leave request */ rko_orig, /* rko */ now); + rd_kafka_broker_share_fetch_session_clear(rkb); } void rd_kafka_broker_share_fetch(rd_kafka_broker_t *rkb, rd_kafka_op_t *rko_orig, rd_ts_t now) { diff --git a/src/rdkafka_msgset_reader.c b/src/rdkafka_msgset_reader.c index da7bdbe108..cc3c590d00 100644 --- a/src/rdkafka_msgset_reader.c +++ b/src/rdkafka_msgset_reader.c @@ -1422,7 +1422,7 @@ rd_kafka_msgset_reader_run(rd_kafka_msgset_reader_t *msetr) { err = RD_KAFKA_RESP_ERR_NO_ERROR; } - printf(" +++++++++++++++++++ Received %d messages\n", msetr->msetr_msgcnt); + // printf(" +++++++++++++++++++ Received %d messages\n", msetr->msetr_msgcnt); rd_rkb_dbg(msetr->msetr_rkb, MSG | RD_KAFKA_DBG_FETCH, "CONSUME", "Enqueue %i %smessage(s) (%" PRId64 diff --git a/src/rdkafka_partition.c b/src/rdkafka_partition.c index 05af812b37..8ba5a4f7f9 100644 --- a/src/rdkafka_partition.c +++ b/src/rdkafka_partition.c @@ -294,8 +294,8 @@ rd_kafka_toppar_t *rd_kafka_toppar_new0(rd_kafka_topic_t *rkt, rkt->rkt_topic->str, rktp->rktp_partition, rktp, &rktp->rktp_refcnt, func, line); - rktp->rktp_share_acknowledge.first_offset = RD_KAFKA_OFFSET_INVALID; - rktp->rktp_share_acknowledge.last_offset = RD_KAFKA_OFFSET_INVALID; + rktp->rktp_share_acknowledge = NULL; + rktp->rktp_share_acknowledge_count = 0; return rd_kafka_toppar_keep(rktp); } @@ -339,6 +339,8 @@ void rd_kafka_toppar_destroy_final(rd_kafka_toppar_t *rktp) { /* Clear queues */ rd_kafka_assert(rktp->rktp_rkt->rkt_rk, rd_kafka_msgq_len(&rktp->rktp_xmit_msgq) == 0); + rd_kafka_assert(rktp->rktp_rkt->rkt_rk, rktp->rktp_share_acknowledge == NULL); + rd_kafka_assert(rktp->rktp_rkt->rkt_rk, rktp->rktp_share_acknowledge_count == 0); rd_kafka_dr_msgq(rktp->rktp_rkt, &rktp->rktp_msgq, RD_KAFKA_RESP_ERR__DESTROY); rd_kafka_q_destroy_owner(rktp->rktp_fetchq); @@ -2619,7 +2621,7 @@ rd_bool_t rd_kafka_toppar_is_on_cgrp(rd_kafka_toppar_t *rktp, rd_bool_t do_lock) * @locality broker thread */ static rd_bool_t rd_kafka_toppar_share_are_acknowledgements_present(rd_kafka_toppar_t *rktp) { - return rktp->rktp_share_acknowledge.first_offset > -1 ? rd_true : rd_false; + return rktp->rktp_share_acknowledge_count > 0 ? rd_true : rd_false; } rd_bool_t rd_kafka_toppar_share_is_valid_to_send_for_fetch(rd_kafka_toppar_t *rktp) { diff --git a/src/rdkafka_partition.h b/src/rdkafka_partition.h index c6fbadb7b0..e090194126 100644 --- a/src/rdkafka_partition.h +++ b/src/rdkafka_partition.h @@ -486,11 +486,13 @@ struct rd_kafka_toppar_s { /* rd_kafka_toppar_t */ * Sends ACCEPT blindly with implicit acknowledgement. */ - struct { + /* Dynamic array of acknowledge entries: NULL until allocated. */ + struct rd_kafka_toppar_share_ack_entry { int64_t first_offset; int64_t last_offset; int16_t delivery_count; - } rktp_share_acknowledge; + } *rktp_share_acknowledge; /* NULL = not initialized */ + size_t rktp_share_acknowledge_count; /* number of entries in rktp_share_acknowledge (0 when NULL) */ }; /** diff --git a/src/rdkafka_request.c b/src/rdkafka_request.c index 2886ab7625..544ee4b5ac 100644 --- a/src/rdkafka_request.c +++ b/src/rdkafka_request.c @@ -562,8 +562,8 @@ int rd_kafka_buf_read_NodeEndpoints(rd_kafka_buf_t *rkbuf, int32_t i; rd_kafka_buf_read_arraycnt(rkbuf, &NodeEndpoints->NodeEndpointCnt, RD_KAFKAP_BROKERS_MAX); - printf(" --------------------------------------- rd_kafka_buf_read_NodeEndpoints: NodeEndpointCnt=%d\n", - NodeEndpoints->NodeEndpointCnt); + // printf(" --------------------------------------- rd_kafka_buf_read_NodeEndpoints: NodeEndpointCnt=%d\n", + // NodeEndpoints->NodeEndpointCnt); rd_dassert(!NodeEndpoints->NodeEndpoints); NodeEndpoints->NodeEndpoints = rd_calloc(NodeEndpoints->NodeEndpointCnt, From acaf9387cb4b27d7ef49538aeba0149934080604 Mon Sep 17 00:00:00 2001 From: Pranav Rathi <4427674+pranavrth@users.noreply.github.com> Date: Thu, 20 Nov 2025 15:50:44 +0530 Subject: [PATCH 27/37] Improve consumer printing --- examples/consumer.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/examples/consumer.c b/examples/consumer.c index 4652317780..993a01ca5a 100644 --- a/examples/consumer.c +++ b/examples/consumer.c @@ -281,10 +281,9 @@ int main(int argc, char **argv) { // if((int)rcvd_msgs < -1) { /* Proper message. */ - printf("Message on %s [%" PRId32 "] at offset %" PRId64 - " (leader epoch %" PRId32 "):\n", + printf("Message received on %s [%" PRId32 "] at offset %" PRId64, rd_kafka_topic_name(rkm->rkt), rkm->partition, - rkm->offset, rd_kafka_message_leader_epoch(rkm)); + rkm->offset); /* Print the message key. */ if (rkm->key && is_printable(rkm->key, rkm->key_len)) @@ -296,10 +295,10 @@ int main(int argc, char **argv) { /* Print the message value/payload. */ if (rkm->payload && is_printable(rkm->payload, rkm->len)) - printf(" Value: %.*s\n", (int)rkm->len, + printf(" - Value: %.*s\n", (int)rkm->len, (const char *)rkm->payload); else if (rkm->payload) - printf(" Value: (%d bytes)\n", (int)rkm->len); + printf(" - Value: (%d bytes)\n", (int)rkm->len); // } rd_kafka_message_destroy(rkm); From adf1808d337a2bc9789242a6c8d5489474daef82 Mon Sep 17 00:00:00 2001 From: Pranav Rathi <4427674+pranavrth@users.noreply.github.com> Date: Fri, 21 Nov 2025 12:30:02 +0530 Subject: [PATCH 28/37] Fix partition acknowledgement not being sent as it was referring to the wrong TAILQ link --- src/rdkafka_fetcher.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/rdkafka_fetcher.c b/src/rdkafka_fetcher.c index 95ae11de6e..bf31cc2b2c 100644 --- a/src/rdkafka_fetcher.c +++ b/src/rdkafka_fetcher.c @@ -981,7 +981,7 @@ static rd_kafka_resp_err_t rd_kafka_share_fetch_reply_handle_partition( * parse errors (which are partition-specific) */ rd_kafka_buf_read_arraycnt(rkbuf, &AcquiredRecordsArrayCnt, -1); // AcquiredRecordsArrayCnt - rd_dassert(rktp->rktp_share_acknowledge_count >= 0); + rd_dassert(rktp->rktp_share_acknowledge_count == 0); rd_dassert(rktp->rktp_share_acknowledge == NULL); rktp->rktp_share_acknowledge_count = AcquiredRecordsArrayCnt; rktp->rktp_share_acknowledge = rd_calloc(AcquiredRecordsArrayCnt, @@ -1751,7 +1751,7 @@ static rd_list_t *rd_kafka_broker_share_fetch_get_toppars_to_send(rd_kafka_broke rd_kafka_toppar_t *rktp; int i; - TAILQ_FOREACH(rktp, &rkb->rkb_share_fetch_session.toppars_in_session, rktp_rkblink) { + TAILQ_FOREACH(rktp, &rkb->rkb_share_fetch_session.toppars_in_session, rktp_rkb_session_link) { if (rktp->rktp_share_acknowledge_count >= 0) { rd_list_add(toppars_to_send, rktp); } From 77e19db917ed3de90d25b768c9b77c090ebf0359 Mon Sep 17 00:00:00 2001 From: Pranav Rathi <4427674+pranavrth@users.noreply.github.com> Date: Fri, 21 Nov 2025 12:40:17 +0530 Subject: [PATCH 29/37] More debug logging --- src/rdkafka_fetcher.c | 35 +++++++++++++++++++++++++++++++++-- 1 file changed, 33 insertions(+), 2 deletions(-) diff --git a/src/rdkafka_fetcher.c b/src/rdkafka_fetcher.c index bf31cc2b2c..7c4103bc64 100644 --- a/src/rdkafka_fetcher.c +++ b/src/rdkafka_fetcher.c @@ -981,6 +981,9 @@ static rd_kafka_resp_err_t rd_kafka_share_fetch_reply_handle_partition( * parse errors (which are partition-specific) */ rd_kafka_buf_read_arraycnt(rkbuf, &AcquiredRecordsArrayCnt, -1); // AcquiredRecordsArrayCnt + rd_rkb_dbg(rkb, FETCH, "SHAREFETCH", "%.*s [%" PRId32 "] : Share Acknowledgement Count: %ld, AcquiredRecordsArrayCnt: %d\n", + RD_KAFKAP_STR_PR(topic), PartitionId, + rktp->rktp_share_acknowledge_count, AcquiredRecordsArrayCnt); rd_dassert(rktp->rktp_share_acknowledge_count == 0); rd_dassert(rktp->rktp_share_acknowledge == NULL); rktp->rktp_share_acknowledge_count = AcquiredRecordsArrayCnt; @@ -991,7 +994,7 @@ static rd_kafka_resp_err_t rd_kafka_share_fetch_reply_handle_partition( rd_kafka_buf_read_i64(rkbuf, &LastOffset); // LastOffset rd_kafka_buf_read_i16(rkbuf, &DeliveryCount); // DeliveryCount rd_kafka_buf_skip_tags(rkbuf); // AcquiredRecords tags - rd_rkb_dbg(rkb, MSG, "SHAREFETCH", + rd_rkb_dbg(rkb, FETCH, "SHAREFETCH", "%.*s [%" PRId32 "]: Acquired Records from offset %" PRId64 " to %" PRId64 ", DeliveryCount %" PRId16, RD_KAFKAP_STR_PR(topic), PartitionId, @@ -1151,7 +1154,7 @@ static void rd_kafka_broker_session_add_partition_to_toppars_in_session(rd_kafka return; } } - rd_kafka_dbg(rkb->rkb_rk, MSG, "SHAREFETCH", + rd_kafka_dbg(rkb->rkb_rk, FETCH, "SHAREFETCH", "%s [%" PRId32 "]: adding to ShareFetch session", rktp->rktp_rkt->rkt_topic->str, @@ -1248,6 +1251,10 @@ static void rd_kafka_broker_session_update_toppars_list( return; RD_LIST_FOREACH(rktp, request_toppars, i) { + rd_kafka_dbg(rkb->rkb_rk, FETCH, "SHAREFETCH","%s [%" PRId32 "], add: %d", + rktp->rktp_rkt->rkt_topic->str, + rktp->rktp_partition, + add); rd_kafka_broker_session_update_toppars_in_session(rkb, rktp, add); if (toppars_to_remove) { removed_rktp = rd_list_remove(toppars_to_remove, rktp); @@ -1470,6 +1477,8 @@ void rd_kafka_ShareFetchRequest( rd_bool_t has_toppars_to_forget = toppars_to_forget && rd_list_cnt(toppars_to_forget) > 0 ? rd_true : rd_false; rd_bool_t is_fetching_messages = max_records > 0 ? rd_true : rd_false; + rd_kafka_dbg(rkb->rkb_rk, FETCH, "SHAREFETCH", "toppars_to_send_cnt=%d, has_acknowledgements=%d, has_toppars_to_forget=%d, is_fetching_messages=%d", + toppars_to_send_cnt, has_acknowledgements, has_toppars_to_forget, is_fetching_messages); /* * Only sending 1 aknowledgement for each partition. StartOffset + LastOffset + AcknowledgementType (ACCEPT for now). * TODO KIP-932: Change this to accommodate explicit acknowledgements. @@ -1580,6 +1589,10 @@ void rd_kafka_ShareFetchRequest( // rktp->rktp_share_acknowledge.last_offset); /* AcknowledgementBatches */ if (rktp->rktp_share_acknowledge_count > 0) { + rd_rkb_dbg(rkb, FETCH, "SHAREFETCH", "rd_kafka_ShareFetchRequest: topic %.*s [%" PRId32 "] : sending %ld acknowledgements", + RD_KAFKAP_STR_PR(rktp->rktp_rkt->rkt_topic), + rktp->rktp_partition, + rktp->rktp_share_acknowledge_count); /* For now we only support ACCEPT */ rd_kafka_buf_write_arraycnt(rkbuf, rktp->rktp_share_acknowledge_count); /* ArrayCnt = 1 */ for(j = 0; j < rktp->rktp_share_acknowledge_count; j++) { @@ -1597,6 +1610,9 @@ void rd_kafka_ShareFetchRequest( rd_free(rktp->rktp_share_acknowledge); rktp->rktp_share_acknowledge = NULL; } else { + rd_rkb_dbg(rkb, FETCH, "SHAREFETCH", "rd_kafka_ShareFetchRequest: topic %.*s [%" PRId32 "] : No acknowledgements to send", + RD_KAFKAP_STR_PR(rktp->rktp_rkt->rkt_topic), + rktp->rktp_partition); /* No acknowledgements */ rd_kafka_buf_write_arraycnt(rkbuf, 0); } @@ -1752,13 +1768,28 @@ static rd_list_t *rd_kafka_broker_share_fetch_get_toppars_to_send(rd_kafka_broke int i; TAILQ_FOREACH(rktp, &rkb->rkb_share_fetch_session.toppars_in_session, rktp_rkb_session_link) { + rd_rkb_dbg(rkb, FETCH, "SHAREFETCH", "rd_kafka_broker_share_fetch_get_toppars_to_send: checking toppar topic %.*s [%" PRId32 "] with %ld acknowledgements", + RD_KAFKAP_STR_PR(rktp->rktp_rkt->rkt_topic), + rktp->rktp_partition, + rktp->rktp_share_acknowledge_count); if (rktp->rktp_share_acknowledge_count >= 0) { + rd_rkb_dbg(rkb, FETCH, "SHAREFETCH", "rd_kafka_broker_share_fetch_get_toppars_to_send: adding to toppars_to_send topic %.*s [%" PRId32 "] with %ld acknowledgements", + RD_KAFKAP_STR_PR(rktp->rktp_rkt->rkt_topic), + rktp->rktp_partition, + rktp->rktp_share_acknowledge_count); rd_list_add(toppars_to_send, rktp); + } else { + rd_rkb_dbg(rkb, FETCH, "SHAREFETCH", "rd_kafka_broker_share_fetch_get_toppars_to_send: not adding to toppars_to_send topic %.*s [%" PRId32 "] since it has no acknowledgements", + RD_KAFKAP_STR_PR(rktp->rktp_rkt->rkt_topic), + rktp->rktp_partition); } } if(rkb->rkb_share_fetch_session.toppars_to_add) { RD_LIST_FOREACH(rktp, rkb->rkb_share_fetch_session.toppars_to_add, i) { + rd_rkb_dbg(rkb, FETCH, "SHAREFETCH", "rd_kafka_broker_share_fetch_get_toppars_to_send: adding topic %.*s [%" PRId32 "] to the session", + RD_KAFKAP_STR_PR(rktp->rktp_rkt->rkt_topic), + rktp->rktp_partition); rd_list_add(toppars_to_send, rktp); } } From c1c811e32ffee2222db98ca515908fe5ec13b9e4 Mon Sep 17 00:00:00 2001 From: Pranav Rathi <4427674+pranavrth@users.noreply.github.com> Date: Tue, 25 Nov 2025 17:03:49 +0530 Subject: [PATCH 30/37] Fix incorrectly adding partitions to send in ShareFetch request even if there are no acknowledgement to send for the partition already added to the session --- src/rdkafka_fetcher.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/rdkafka_fetcher.c b/src/rdkafka_fetcher.c index 7c4103bc64..0eec47b4bd 100644 --- a/src/rdkafka_fetcher.c +++ b/src/rdkafka_fetcher.c @@ -1772,7 +1772,7 @@ static rd_list_t *rd_kafka_broker_share_fetch_get_toppars_to_send(rd_kafka_broke RD_KAFKAP_STR_PR(rktp->rktp_rkt->rkt_topic), rktp->rktp_partition, rktp->rktp_share_acknowledge_count); - if (rktp->rktp_share_acknowledge_count >= 0) { + if (rktp->rktp_share_acknowledge_count > 0) { rd_rkb_dbg(rkb, FETCH, "SHAREFETCH", "rd_kafka_broker_share_fetch_get_toppars_to_send: adding to toppars_to_send topic %.*s [%" PRId32 "] with %ld acknowledgements", RD_KAFKAP_STR_PR(rktp->rktp_rkt->rkt_topic), rktp->rktp_partition, From 241d833d48cf887e0f8043ed0b6f76810bdb403f Mon Sep 17 00:00:00 2001 From: Pranav Rathi <4427674+pranavrth@users.noreply.github.com> Date: Tue, 25 Nov 2025 17:06:58 +0530 Subject: [PATCH 31/37] Fix incorrectly parsing ShareFetch Response if MessageSetSize was 0 --- src/rdkafka_fetcher.c | 43 +++++++++++++++++++++---------------------- 1 file changed, 21 insertions(+), 22 deletions(-) diff --git a/src/rdkafka_fetcher.c b/src/rdkafka_fetcher.c index 0eec47b4bd..6b230520ea 100644 --- a/src/rdkafka_fetcher.c +++ b/src/rdkafka_fetcher.c @@ -956,29 +956,28 @@ static rd_kafka_resp_err_t rd_kafka_share_fetch_reply_handle_partition( /* No error, clear any previous fetch error. */ rktp->rktp_last_error = RD_KAFKA_RESP_ERR_NO_ERROR; - if (unlikely(MessageSetSize <= 0)) - goto done; - - /** - * Parse MessageSet - */ - if (!rd_slice_narrow_relative(&rkbuf->rkbuf_reader, &save_slice, - (size_t) MessageSetSize)) - rd_kafka_buf_check_len(rkbuf, MessageSetSize); - - /* Parse messages - TODO KIP-932: This part might raise issue as We are adding messages - to the consumer queue in partition by partition manner. - The poll returns messages as soon as they are available in the queue, - so messages for different partitions in the same fetch request might - not be sent at once to the user. - */ - err = rd_kafka_msgset_parse(rkbuf, request, rktp, NULL, &tver); - + if(MessageSetSize > 0) { + /** + * Parse MessageSet + */ + if (!rd_slice_narrow_relative(&rkbuf->rkbuf_reader, &save_slice, + (size_t) MessageSetSize)) + rd_kafka_buf_check_len(rkbuf, MessageSetSize); + + /* Parse messages + TODO KIP-932: This part might raise issue as We are adding messages + to the consumer queue in partition by partition manner. + The poll returns messages as soon as they are available in the queue, + so messages for different partitions in the same fetch request might + not be sent at once to the user. + */ + err = rd_kafka_msgset_parse(rkbuf, request, rktp, NULL, &tver); + + rd_slice_widen(&rkbuf->rkbuf_reader, &save_slice); + /* Continue with next partition regardless of + * parse errors (which are partition-specific) */ - rd_slice_widen(&rkbuf->rkbuf_reader, &save_slice); - /* Continue with next partition regardless of - * parse errors (which are partition-specific) */ + } rd_kafka_buf_read_arraycnt(rkbuf, &AcquiredRecordsArrayCnt, -1); // AcquiredRecordsArrayCnt rd_rkb_dbg(rkb, FETCH, "SHAREFETCH", "%.*s [%" PRId32 "] : Share Acknowledgement Count: %ld, AcquiredRecordsArrayCnt: %d\n", From d26d3a594f444f37af7edfdafabff28f84da0f46 Mon Sep 17 00:00:00 2001 From: Pranav Rathi <4427674+pranavrth@users.noreply.github.com> Date: Tue, 25 Nov 2025 17:07:55 +0530 Subject: [PATCH 32/37] Fix incorrectly assigning memory to rktp_share_acknowledge even if AcquiredRecordsArrayCnt is 0 --- src/rdkafka_fetcher.c | 34 ++++++++++++++++++---------------- 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/src/rdkafka_fetcher.c b/src/rdkafka_fetcher.c index 6b230520ea..cb4718a30d 100644 --- a/src/rdkafka_fetcher.c +++ b/src/rdkafka_fetcher.c @@ -985,22 +985,24 @@ static rd_kafka_resp_err_t rd_kafka_share_fetch_reply_handle_partition( rktp->rktp_share_acknowledge_count, AcquiredRecordsArrayCnt); rd_dassert(rktp->rktp_share_acknowledge_count == 0); rd_dassert(rktp->rktp_share_acknowledge == NULL); - rktp->rktp_share_acknowledge_count = AcquiredRecordsArrayCnt; - rktp->rktp_share_acknowledge = rd_calloc(AcquiredRecordsArrayCnt, - sizeof(*rktp->rktp_share_acknowledge)); - for (i = 0; i < AcquiredRecordsArrayCnt; i++) { - rd_kafka_buf_read_i64(rkbuf, &FirstOffset); // FirstOffset - rd_kafka_buf_read_i64(rkbuf, &LastOffset); // LastOffset - rd_kafka_buf_read_i16(rkbuf, &DeliveryCount); // DeliveryCount - rd_kafka_buf_skip_tags(rkbuf); // AcquiredRecords tags - rd_rkb_dbg(rkb, FETCH, "SHAREFETCH", - "%.*s [%" PRId32 "]: Acquired Records from offset %" PRId64 - " to %" PRId64 ", DeliveryCount %" PRId16, - RD_KAFKAP_STR_PR(topic), PartitionId, - FirstOffset, LastOffset, DeliveryCount); - rktp->rktp_share_acknowledge[i].first_offset = FirstOffset; - rktp->rktp_share_acknowledge[i].last_offset = LastOffset; - rktp->rktp_share_acknowledge[i].delivery_count = DeliveryCount; + if(AcquiredRecordsArrayCnt > 0) { + rktp->rktp_share_acknowledge_count = AcquiredRecordsArrayCnt; + rktp->rktp_share_acknowledge = rd_calloc(AcquiredRecordsArrayCnt, + sizeof(*rktp->rktp_share_acknowledge)); + for (i = 0; i < AcquiredRecordsArrayCnt; i++) { + rd_kafka_buf_read_i64(rkbuf, &FirstOffset); // FirstOffset + rd_kafka_buf_read_i64(rkbuf, &LastOffset); // LastOffset + rd_kafka_buf_read_i16(rkbuf, &DeliveryCount); // DeliveryCount + rd_kafka_buf_skip_tags(rkbuf); // AcquiredRecords tags + rd_rkb_dbg(rkb, FETCH, "SHAREFETCH", + "%.*s [%" PRId32 "]: Acquired Records from offset %" PRId64 + " to %" PRId64 ", DeliveryCount %" PRId16, + RD_KAFKAP_STR_PR(topic), PartitionId, + FirstOffset, LastOffset, DeliveryCount); + rktp->rktp_share_acknowledge[i].first_offset = FirstOffset; + rktp->rktp_share_acknowledge[i].last_offset = LastOffset; + rktp->rktp_share_acknowledge[i].delivery_count = DeliveryCount; + } } rd_kafka_buf_skip_tags(rkbuf); // Partition tags From 6c6d281c060186b5144534a2ed53a2c851af19ba Mon Sep 17 00:00:00 2001 From: Pranav Rathi <4427674+pranavrth@users.noreply.github.com> Date: Tue, 25 Nov 2025 17:08:49 +0530 Subject: [PATCH 33/37] nit: correct naming for partitions being sent to the ShareFetch request --- src/rdkafka_fetcher.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/rdkafka_fetcher.c b/src/rdkafka_fetcher.c index cb4718a30d..d56cc34056 100644 --- a/src/rdkafka_fetcher.c +++ b/src/rdkafka_fetcher.c @@ -1474,12 +1474,12 @@ void rd_kafka_ShareFetchRequest( int toppars_to_send_cnt = toppars_to_send ? rd_list_cnt(toppars_to_send) : 0; int i; size_t j; - rd_bool_t has_acknowledgements = toppars_to_send && rd_list_cnt(toppars_to_send) > 0 ? rd_true : rd_false; + rd_bool_t has_acknowledgements_or_topics_to_add = toppars_to_send && rd_list_cnt(toppars_to_send) > 0 ? rd_true : rd_false; rd_bool_t has_toppars_to_forget = toppars_to_forget && rd_list_cnt(toppars_to_forget) > 0 ? rd_true : rd_false; rd_bool_t is_fetching_messages = max_records > 0 ? rd_true : rd_false; - rd_kafka_dbg(rkb->rkb_rk, FETCH, "SHAREFETCH", "toppars_to_send_cnt=%d, has_acknowledgements=%d, has_toppars_to_forget=%d, is_fetching_messages=%d", - toppars_to_send_cnt, has_acknowledgements, has_toppars_to_forget, is_fetching_messages); + rd_kafka_dbg(rkb->rkb_rk, FETCH, "SHAREFETCH", "toppars_to_send_cnt=%d, has_acknowledgements_or_topics_to_add=%d, has_toppars_to_forget=%d, is_fetching_messages=%d", + toppars_to_send_cnt, has_acknowledgements_or_topics_to_add, has_toppars_to_forget, is_fetching_messages); /* * Only sending 1 aknowledgement for each partition. StartOffset + LastOffset + AcknowledgementType (ACCEPT for now). * TODO KIP-932: Change this to accommodate explicit acknowledgements. @@ -1648,10 +1648,10 @@ void rd_kafka_ShareFetchRequest( rd_list_destroy(toppars_to_send); } - if(is_leave_request || has_acknowledgements || has_toppars_to_forget || is_fetching_messages) { + if(is_leave_request || has_acknowledgements_or_topics_to_add || has_toppars_to_forget || is_fetching_messages) { rd_kafka_dbg(rkb->rkb_rk, FETCH, "SHAREFETCH", "Share Fetch Request sent with%s%s%s", - has_acknowledgements ? " acknowledgements," : "", + has_acknowledgements_or_topics_to_add ? " acknowledgements," : "", has_toppars_to_forget ? " forgotten toppars," : "", is_fetching_messages ? " fetching messages" : ""); } else { From 342fd0494e786622469dc73258f042747b0852e4 Mon Sep 17 00:00:00 2001 From: PratRanj07 Date: Wed, 19 Nov 2025 00:46:14 +0530 Subject: [PATCH 34/37] added tests --- tests/0154-share_consumer.c | 537 ++++++++++++++++++++++++++++++++++-- 1 file changed, 515 insertions(+), 22 deletions(-) diff --git a/tests/0154-share_consumer.c b/tests/0154-share_consumer.c index d906eaf836..0c1510dc04 100644 --- a/tests/0154-share_consumer.c +++ b/tests/0154-share_consumer.c @@ -28,44 +28,537 @@ #include "test.h" -int main_0154_share_consumer(int argc, char **argv) { +/** + * @brief Test that polling without subscription fails + */ +static void test_poll_no_subscribe_fails(void) { + char errstr[512]; + rd_kafka_conf_t *cons_conf; + rd_kafka_t *consumer; + char *group = "share-group-no-subscribe"; + + TEST_SAY("=== Testing poll without subscription fails ===\n"); + + /* Create share consumer */ + test_conf_init(&cons_conf, NULL, 60); + rd_kafka_conf_set(cons_conf, "share.consumer", "true", errstr, sizeof(errstr)); + rd_kafka_conf_set(cons_conf, "group.protocol", "consumer", errstr, sizeof(errstr)); + rd_kafka_conf_set(cons_conf, "group.id", group, errstr, sizeof(errstr)); + rd_kafka_conf_set(cons_conf, "enable.auto.commit", "false", errstr, sizeof(errstr)); + + consumer = rd_kafka_new(RD_KAFKA_CONSUMER, cons_conf, errstr, sizeof(errstr)); + TEST_ASSERT(consumer, "Failed to create consumer: %s", errstr); + + /* Try to poll without subscribing - should fail or return timeout */ + TEST_SAY("Attempting to poll without subscription\n"); + rd_kafka_message_t **msgs = malloc(sizeof(rd_kafka_message_t *) * 10); + size_t rcvd_msgs = 0; + + rd_kafka_error_t *error = rd_kafka_share_consume_batch(consumer, 2000, msgs, &rcvd_msgs); + + /** + * TODO KIP-932: Uncomment once polling before any subscription is properly handled + */ + //TEST_ASSERT(error, "Expected poll to fail after unsubscribe, but it succeeded"); + + free(msgs); + rd_kafka_destroy(consumer); +} + +/** + * @brief Test subscribe and poll with no records available + */ +static void test_subscribe_and_poll_no_records(void) { + char errstr[512]; + rd_kafka_conf_t *cons_conf; + rd_kafka_t *consumer; + rd_kafka_topic_partition_list_t *topics; + char *topic = test_mk_topic_name("0154-share-empty-records", 0); + char *group = "share-group-empty"; + + TEST_SAY("=== Testing subscribe and poll with no records ===\n"); + + /* Create empty topic (no messages produced) */ + test_create_topic_wait_exists(NULL, topic, 1, -1, 60 * 1000); + TEST_SAY("Created empty topic: %s\n", topic); + + /* Create share consumer */ + test_conf_init(&cons_conf, NULL, 60); + rd_kafka_conf_set(cons_conf, "share.consumer", "true", errstr, sizeof(errstr)); + rd_kafka_conf_set(cons_conf, "group.protocol", "consumer", errstr, sizeof(errstr)); + rd_kafka_conf_set(cons_conf, "group.id", group, errstr, sizeof(errstr)); + rd_kafka_conf_set(cons_conf, "enable.auto.commit", "false", errstr, sizeof(errstr)); + + consumer = rd_kafka_new(RD_KAFKA_CONSUMER, cons_conf, errstr, sizeof(errstr)); + TEST_ASSERT(consumer, "Failed to create consumer: %s", errstr); + + /* Subscribe to empty topic */ + topics = rd_kafka_topic_partition_list_new(1); + rd_kafka_topic_partition_list_add(topics, topic, RD_KAFKA_PARTITION_UA); + rd_kafka_subscribe(consumer, topics); + rd_kafka_topic_partition_list_destroy(topics); + + TEST_SAY("Subscribed to empty topic, polling for messages\n"); + + /* Poll for messages - should get none */ + rd_kafka_message_t **msgs = malloc(sizeof(rd_kafka_message_t *) * 10); + size_t rcvd_msgs = 0; + + rd_kafka_error_t *error = rd_kafka_share_consume_batch(consumer, 5000, msgs, &rcvd_msgs); + + TEST_ASSERT(rcvd_msgs == 0, "Should not receive messages from empty topic"); + TEST_SAY("✓ No messages received from empty topic (expected)\n"); + + test_delete_topic(consumer, topic); + + free(msgs); + rd_kafka_destroy(consumer); +} + +/** + * @brief Test subscribe, poll, then unsubscribe + */ +static void test_subscribe_poll_unsubscribe(void) { + char errstr[512]; + rd_kafka_conf_t *cons_conf; + rd_kafka_t *consumer; + rd_kafka_topic_partition_list_t *topics; + char *topic = test_mk_topic_name("0154-share-unsub", 0); + char *group = "share-group-unsub"; + const int msg_count = 5; + + TEST_SAY("=== Testing subscribe, poll, then unsubscribe ===\n"); + + /* Create topic and produce messages */ + test_create_topic_wait_exists(NULL, topic, 1, -1, 60 * 1000); + test_produce_msgs_easy(topic, 0, 0, msg_count); + TEST_SAY("Produced %d messages\n", msg_count); + + /* Create share consumer */ + test_conf_init(&cons_conf, NULL, 60); + rd_kafka_conf_set(cons_conf, "share.consumer", "true", errstr, sizeof(errstr)); + rd_kafka_conf_set(cons_conf, "group.protocol", "consumer", errstr, sizeof(errstr)); + rd_kafka_conf_set(cons_conf, "group.id", group, errstr, sizeof(errstr)); + rd_kafka_conf_set(cons_conf, "enable.auto.commit", "false", errstr, sizeof(errstr)); + + consumer = rd_kafka_new(RD_KAFKA_CONSUMER, cons_conf, errstr, sizeof(errstr)); + TEST_ASSERT(consumer, "Failed to create consumer: %s", errstr); + + /* Subscribe to topic */ + topics = rd_kafka_topic_partition_list_new(1); + rd_kafka_topic_partition_list_add(topics, topic, RD_KAFKA_PARTITION_UA); + rd_kafka_subscribe(consumer, topics); + rd_kafka_topic_partition_list_destroy(topics); + + TEST_SAY("Subscribed to topic, consuming messages\n"); + + /* Poll for some messages */ + rd_kafka_message_t **msgs = malloc(sizeof(rd_kafka_message_t *) * 10); + size_t rcvd_msgs = 0; + int consumed_count = 0; + + rd_kafka_error_t *error = rd_kafka_share_consume_batch(consumer, 10000, msgs, &rcvd_msgs); + + if (!error && rcvd_msgs > 0) { + for (int i = 0; i < (int)rcvd_msgs; i++) { + if (!msgs[i]->err) { + consumed_count++; + } + rd_kafka_message_destroy(msgs[i]); + } + TEST_SAY("Consumed %d messages before unsubscribe\n", consumed_count); + } else if (error) { + rd_kafka_error_destroy(error); + } + + /* Unsubscribe from all topics */ + TEST_SAY("Unsubscribing from all topics\n"); + rd_kafka_resp_err_t err = rd_kafka_unsubscribe(consumer); + TEST_ASSERT(!err, "Failed to unsubscribe: %s", rd_kafka_err2str(err)); + + /* Try to poll after unsubscribe - should fail or get no messages */ + TEST_SAY("Attempting to poll after unsubscribe\n"); + rcvd_msgs = 0; + error = rd_kafka_share_consume_batch(consumer, 2000, msgs, &rcvd_msgs); + + /** + * TODO KIP-932: Uncomment once polling before any subscription is properly handled + */ + //TEST_ASSERT(error, "Expected poll to fail after unsubscribe, but it succeeded"); + + test_delete_topic(consumer, topic); + + free(msgs); + rd_kafka_destroy(consumer); +} + +/** + * @brief Test subscribe, poll, then subscribe to different topic + */ +static void test_subscribe_poll_subscribe(void) { + char errstr[512]; + rd_kafka_conf_t *cons_conf; + rd_kafka_t *consumer; + rd_kafka_topic_partition_list_t *topics; + char *topic1 = "test-topic-0154-share-sub1"; + char *topic2 = "test-topic-0154-share-sub2"; + char *group = "share-group-resub"; + const int msg_count = 3; + + TEST_SAY("=== Testing subscribe, poll, then resubscribe ===\n"); + + /* Create topics and produce messages */ + test_create_topic_wait_exists(NULL, topic1, 1, -1, 60 * 1000); + test_create_topic_wait_exists(NULL, topic2, 1, -1, 60 * 1000); + + test_produce_msgs_easy(topic1, 0, 0, msg_count); + test_produce_msgs_easy(topic2, 0, 0, msg_count); + TEST_SAY("Produced %d messages to each topic\n", msg_count); + + /* Create share consumer */ + test_conf_init(&cons_conf, NULL, 60); + rd_kafka_conf_set(cons_conf, "share.consumer", "true", errstr, sizeof(errstr)); + rd_kafka_conf_set(cons_conf, "group.protocol", "consumer", errstr, sizeof(errstr)); + rd_kafka_conf_set(cons_conf, "group.id", group, errstr, sizeof(errstr)); + rd_kafka_conf_set(cons_conf, "enable.auto.commit", "false", errstr, sizeof(errstr)); + + consumer = rd_kafka_new(RD_KAFKA_CONSUMER, cons_conf, errstr, sizeof(errstr)); + TEST_ASSERT(consumer, "Failed to create consumer: %s", errstr); + + /* Subscribe to first topic */ + topics = rd_kafka_topic_partition_list_new(1); + rd_kafka_topic_partition_list_add(topics, topic1, RD_KAFKA_PARTITION_UA); + rd_kafka_subscribe(consumer, topics); + rd_kafka_topic_partition_list_destroy(topics); + + TEST_SAY("Subscribed to first topic: %s\n", topic1); + + /* Poll from first topic */ + rd_kafka_message_t **msgs = malloc(sizeof(rd_kafka_message_t *) * 10); + size_t rcvd_msgs = 0; + int topic1_count = 0; + + rd_kafka_error_t *error = rd_kafka_share_consume_batch(consumer, 10000, msgs, &rcvd_msgs); + + if (!error && rcvd_msgs > 0) { + for (int i = 0; i < (int)rcvd_msgs; i++) { + if (!msgs[i]->err) { + topic1_count++; + } + rd_kafka_message_destroy(msgs[i]); + } + TEST_SAY("Consumed %d messages from topic1\n", topic1_count); + } else if (error) { + rd_kafka_error_destroy(error); + } + + /* Subscribe to second topic */ + TEST_SAY("Resubscribing to second topic: %s\n", topic2); + topics = rd_kafka_topic_partition_list_new(1); + rd_kafka_topic_partition_list_add(topics, topic2, RD_KAFKA_PARTITION_UA); + rd_kafka_subscribe(consumer, topics); + rd_kafka_topic_partition_list_destroy(topics); + + /* Poll from second topic */ + rcvd_msgs = 0; + int topic2_count = 0; + + error = rd_kafka_share_consume_batch(consumer, 10000, msgs, &rcvd_msgs); + + if (!error && rcvd_msgs > 0) { + for (int i = 0; i < (int)rcvd_msgs; i++) { + if (!msgs[i]->err) { + topic2_count++; + } + rd_kafka_message_destroy(msgs[i]); + } + TEST_SAY("Consumed %d messages from topic2\n", topic2_count); + } else if (error) { + rd_kafka_error_destroy(error); + } + + TEST_SAY("✓ Successfully resubscribed and consumed from different topics\n"); + + test_delete_topic(consumer, topic1); + test_delete_topic(consumer, topic2); + + free(msgs); + rd_kafka_destroy(consumer); +} + +/** + * @brief Test subscribe, unsubscribe, then poll fails + */ +static void test_subscribe_unsubscribe_poll_fails(void) { + char errstr[512]; + rd_kafka_conf_t *cons_conf; + rd_kafka_t *consumer; + rd_kafka_topic_partition_list_t *topics; + char *topic = test_mk_topic_name("0154-share-unsub-fail", 0); + char *group = "share-group-unsub-fail"; + + TEST_SAY("=== Testing subscribe, unsubscribe, then poll fails ===\n"); + + /* Create topic */ + test_create_topic_wait_exists(NULL, topic, 1, -1, 60 * 1000); + test_produce_msgs_easy(topic, 0, 0, 3); + + /* Create share consumer */ + test_conf_init(&cons_conf, NULL, 60); + rd_kafka_conf_set(cons_conf, "share.consumer", "true", errstr, sizeof(errstr)); + rd_kafka_conf_set(cons_conf, "group.protocol", "consumer", errstr, sizeof(errstr)); + rd_kafka_conf_set(cons_conf, "group.id", group, errstr, sizeof(errstr)); + rd_kafka_conf_set(cons_conf, "enable.auto.commit", "false", errstr, sizeof(errstr)); + + consumer = rd_kafka_new(RD_KAFKA_CONSUMER, cons_conf, errstr, sizeof(errstr)); + TEST_ASSERT(consumer, "Failed to create consumer: %s", errstr); + + /* Subscribe to topic */ + topics = rd_kafka_topic_partition_list_new(1); + rd_kafka_topic_partition_list_add(topics, topic, RD_KAFKA_PARTITION_UA); + rd_kafka_subscribe(consumer, topics); + rd_kafka_topic_partition_list_destroy(topics); + + TEST_SAY("Subscribed to topic: %s\n", topic); + + /* Immediately unsubscribe */ + TEST_SAY("Unsubscribing immediately\n"); + rd_kafka_resp_err_t err = rd_kafka_unsubscribe(consumer); + TEST_ASSERT(!err, "Failed to unsubscribe: %s", rd_kafka_err2str(err)); + + /* Try to poll - should fail */ + TEST_SAY("Attempting to poll after unsubscribe\n"); + rd_kafka_message_t **msgs = malloc(sizeof(rd_kafka_message_t *) * 10); + size_t rcvd_msgs = 0; + + rd_kafka_error_t *error = rd_kafka_share_consume_batch(consumer, 2000, msgs, &rcvd_msgs); + + /** + * TODO KIP-932: Uncomment once polling before any subscription is properly handled + */ + //TEST_ASSERT(error, "Expected poll to fail after unsubscribe, but it succeeded"); + + test_delete_topic(consumer, topic); + + free(msgs); + rd_kafka_destroy(consumer); +} + +/** + * @brief Test producing and consuming 10 messages + */ +static void test_share_consumer_messages(void) { char errstr[512]; - rd_kafka_conf_t *conf; - rd_kafka_t *rk; + rd_kafka_conf_t *cons_conf; + rd_kafka_t *consumer; rd_kafka_topic_partition_list_t *topics; - char *topic = "test-topic";// test_mk_topic_name("0154-share-consumer", 0); - char *group = "test-group-0"; + char *topic = test_mk_topic_name("0154-share-test", 0); + char *group = "share-group-10msg"; + const int msg_count = 10; + int consumed_count = 0; + int attempts = 10; // Number of attempts to poll so the test doesn't run indefinitely + + const char *confs_set_group[] = {"share.auto.offset.reset", + "SET", "earliest"}; - test_create_topic_wait_exists(NULL, topic, 3, -1, 60 * 1000); + TEST_SAY("=== Testing share consumer with 10 messages ===\n"); + + /* Create topic */ + test_create_topic_wait_exists(NULL, topic, 1, -1, 60 * 1000); rd_sleep(5); - test_produce_msgs_easy(topic, 0, 0, 2); + test_produce_msgs_easy(topic, 0, 0, msg_count); + TEST_SAY("Successfully produced %d messages\n", msg_count); - TEST_SAY("Creating share consumer\n"); - test_conf_init(&conf, NULL, 60); - rd_kafka_conf_set(conf, "share.consumer", "true", NULL, 0); - rd_kafka_conf_set(conf, "group.protocol", "consumer", NULL, 0); - rd_kafka_conf_set(conf, "group.id", group, NULL, 0); - rd_kafka_conf_set(conf, "debug", "cgrp,protocol,conf", NULL, 0); + /* Create share consumer */ + TEST_SAY("Creating share consumer for group %s\n", group); + test_conf_init(&cons_conf, NULL, 60); + + rd_kafka_conf_set(cons_conf, "share.consumer", "true", errstr, sizeof(errstr)); + rd_kafka_conf_set(cons_conf, "group.protocol", "consumer", errstr, sizeof(errstr)); + rd_kafka_conf_set(cons_conf, "group.id", group, errstr, sizeof(errstr)); + rd_kafka_conf_set(cons_conf, "enable.auto.commit", "false", errstr, sizeof(errstr)); - // rk = rd_kafka_share_consumer_new(conf, errstr, sizeof(errstr)); - rk = rd_kafka_new(RD_KAFKA_CONSUMER, conf, errstr, sizeof(errstr)); - if (!rk) { - TEST_FAIL("Failed to create share consumer: %s\n", errstr); + consumer = rd_kafka_new(RD_KAFKA_CONSUMER, cons_conf, errstr, sizeof(errstr)); + if (!consumer) { + TEST_FAIL("Failed to create share consumer: %s", errstr); } + /* Subscribe to topic */ topics = rd_kafka_topic_partition_list_new(1); rd_kafka_topic_partition_list_add(topics, topic, RD_KAFKA_PARTITION_UA); - rd_kafka_subscribe(rk, topics); + + TEST_SAY("Subscribing to topic %s\n", topic); + rd_kafka_subscribe(consumer, topics); rd_kafka_topic_partition_list_destroy(topics); - TEST_SAY("Share consumer created successfully\n"); + test_IncrementalAlterConfigs_simple(consumer, RD_KAFKA_RESOURCE_GROUP, group, confs_set_group, 1); + + /* Allocate message array for batch consumption */ + rd_kafka_message_t **rkmessages = malloc(sizeof(rd_kafka_message_t *) * 100); + + /* Consume messages until we get all 10 */ + while (consumed_count < msg_count && attempts > 0) { + size_t rcvd_msgs = 0; + rd_kafka_error_t *error; + + error = rd_kafka_share_consume_batch(consumer, 5000, rkmessages, &rcvd_msgs); + + if (error) { + TEST_SAY("Consume error: %s\n", rd_kafka_error_string(error)); + rd_kafka_error_destroy(error); + attempts--; + continue; + } + + TEST_SAY("Received %zu messages in batch\n", rcvd_msgs); + + /* Process each message in the batch */ + for (int i = 0; i < (int)rcvd_msgs; i++) { + rd_kafka_message_t *rkm = rkmessages[i]; + + if (rkm->err) { + TEST_SAY("Message error: %s\n", rd_kafka_message_errstr(rkm)); + rd_kafka_message_destroy(rkm); + continue; + } + + /* Count valid messages only */ + consumed_count++; + TEST_SAY("Consumed message %d/%d\n", consumed_count, msg_count); + + /* Clean up message */ + rd_kafka_message_destroy(rkm); + } + + /* Break if we've consumed all expected messages */ + if (consumed_count >= msg_count) { + TEST_SAY("Consumed all %d messages, stopping\n", msg_count); + break; + } + + attempts--; + } - rd_kafka_consumer_poll(rk, 65000); + free(rkmessages); - TEST_SAY("Destroying consumer\n"); + /* Verify we got exactly the expected number of messages */ + TEST_ASSERT(consumed_count == msg_count, + "Expected to consume %d messages, but consumed %d", + msg_count, consumed_count); + + TEST_SAY("✓ Successfully consumed exactly %d messages\n", consumed_count); + + test_delete_topic(consumer, topic); /* Clean up */ - rd_kafka_destroy(rk); + rd_kafka_destroy(consumer); +} + +/** + * @brief Test subscribing to multiple topics + */ +static void test_share_consumer_multiple_topics(void) { + char errstr[512]; + rd_kafka_conf_t *cons_conf; + rd_kafka_t *consumer; + rd_kafka_topic_partition_list_t *topics; + char *topic1 = test_mk_topic_name("0154-share-topic1", 0); + char *topic2 = test_mk_topic_name("0154-share-topic2", 0); + char *group = "share-group-multitopic"; + const int msgs_per_topic = 5; + int consumed_count = 0; + + const char *confs_set_group[] = {"share.auto.offset.reset", + "SET", "earliest"}; + + TEST_SAY("=== Testing share consumer with multiple topics ===\n"); + + /* Create topics and produce messages */ + test_create_topic_wait_exists(NULL, topic1, 1, -1, 60 * 1000); + test_create_topic_wait_exists(NULL, topic2, 1, -1, 60 * 1000); + + test_produce_msgs_easy(topic1, 0, 0, msgs_per_topic); + test_produce_msgs_easy(topic2, 0, 0, msgs_per_topic); + TEST_SAY("Produced %d messages to each topic\n", msgs_per_topic); + + /* Create share consumer */ + test_conf_init(&cons_conf, NULL, 60); + rd_kafka_conf_set(cons_conf, "share.consumer", "true", errstr, sizeof(errstr)); + rd_kafka_conf_set(cons_conf, "group.protocol", "consumer", errstr, sizeof(errstr)); + rd_kafka_conf_set(cons_conf, "group.id", group, errstr, sizeof(errstr)); + rd_kafka_conf_set(cons_conf, "enable.auto.commit", "false", errstr, sizeof(errstr)); + //rd_kafka_conf_set(cons_conf, "debug", "all", errstr, sizeof(errstr)); + + consumer = rd_kafka_new(RD_KAFKA_CONSUMER, cons_conf, errstr, sizeof(errstr)); + TEST_ASSERT(consumer, "Failed to create consumer: %s", errstr); + + test_IncrementalAlterConfigs_simple(consumer, RD_KAFKA_RESOURCE_GROUP, group, confs_set_group, 1); + + /* Subscribe to both topics */ + topics = rd_kafka_topic_partition_list_new(2); + rd_kafka_topic_partition_list_add(topics, topic1, RD_KAFKA_PARTITION_UA); + rd_kafka_topic_partition_list_add(topics, topic2, RD_KAFKA_PARTITION_UA); + + TEST_SAY("Subscribing to topics: %s, %s\n", topic1, topic2); + rd_kafka_subscribe(consumer, topics); + rd_kafka_topic_partition_list_destroy(topics); + + /* Consume messages from both topics */ + rd_kafka_message_t **rkmessages = malloc(sizeof(rd_kafka_message_t *) * 20); + int attempts = 10; // Number of attempts to poll so the test doesn't run indefinitely + + while (consumed_count < (msgs_per_topic * 2) && attempts > 0) { + size_t rcvd_msgs = 0; + rd_kafka_error_t *error; + int i; + + error = rd_kafka_share_consume_batch(consumer, 3000, rkmessages, &rcvd_msgs); + + if (error) { + TEST_SAY("Consume error: %s\n", rd_kafka_error_string(error)); + rd_kafka_error_destroy(error); + attempts--; + continue; + } + + for (i = 0; i < (int)rcvd_msgs; i++) { + rd_kafka_message_t *rkm = rkmessages[i]; + if (!rkm->err) { + consumed_count++; + TEST_SAY("Consumed from topic %s: %d/%d total\n", + rd_kafka_topic_name(rkm->rkt), consumed_count, msgs_per_topic * 2); + } + rd_kafka_message_destroy(rkm); + } + attempts--; + } + + free(rkmessages); + + TEST_ASSERT(consumed_count == (msgs_per_topic * 2), + "Expected %d messages from both topics, got %d", + msgs_per_topic * 2, consumed_count); + + TEST_SAY("✓ Successfully consumed from multiple topics: %d messages\n", consumed_count); + + test_delete_topic(consumer, topic1); + test_delete_topic(consumer, topic2); + + /* Clean up */ + rd_kafka_consumer_close(consumer); + rd_kafka_destroy(consumer); +} + + +int main_0154_share_consumer(int argc, char **argv) { + + test_poll_no_subscribe_fails(); + test_subscribe_and_poll_no_records(); + test_subscribe_poll_unsubscribe(); + test_subscribe_poll_subscribe(); + test_subscribe_unsubscribe_poll_fails(); + test_share_consumer_messages(); + //test_share_consumer_multiple_topics(); return 0; } From 511d424a7d8ca671f811ea52c2924397fd32ac3d Mon Sep 17 00:00:00 2001 From: PratRanj07 Date: Mon, 24 Nov 2025 23:40:07 +0530 Subject: [PATCH 35/37] Added more tests --- tests/0154-share_consumer.c | 474 +++++++++++++++++++++++++++++++++++- 1 file changed, 464 insertions(+), 10 deletions(-) diff --git a/tests/0154-share_consumer.c b/tests/0154-share_consumer.c index 0c1510dc04..db9b4911cc 100644 --- a/tests/0154-share_consumer.c +++ b/tests/0154-share_consumer.c @@ -73,8 +73,8 @@ static void test_subscribe_and_poll_no_records(void) { rd_kafka_conf_t *cons_conf; rd_kafka_t *consumer; rd_kafka_topic_partition_list_t *topics; - char *topic = test_mk_topic_name("0154-share-empty-records", 0); - char *group = "share-group-empty"; + const char *topic = "0154-share-empty-records"; + const char *group = "share-group-empty"; TEST_SAY("=== Testing subscribe and poll with no records ===\n"); @@ -123,8 +123,8 @@ static void test_subscribe_poll_unsubscribe(void) { rd_kafka_conf_t *cons_conf; rd_kafka_t *consumer; rd_kafka_topic_partition_list_t *topics; - char *topic = test_mk_topic_name("0154-share-unsub", 0); - char *group = "share-group-unsub"; + const char *topic = "0154-share-unsub"; + const char *group = "share-group-unsub"; const int msg_count = 5; TEST_SAY("=== Testing subscribe, poll, then unsubscribe ===\n"); @@ -294,8 +294,8 @@ static void test_subscribe_unsubscribe_poll_fails(void) { rd_kafka_conf_t *cons_conf; rd_kafka_t *consumer; rd_kafka_topic_partition_list_t *topics; - char *topic = test_mk_topic_name("0154-share-unsub-fail", 0); - char *group = "share-group-unsub-fail"; + const char *topic = "0154-share-unsub-fail"; + const char *group = "share-group-unsub-fail"; TEST_SAY("=== Testing subscribe, unsubscribe, then poll fails ===\n"); @@ -352,7 +352,7 @@ static void test_share_consumer_messages(void) { rd_kafka_conf_t *cons_conf; rd_kafka_t *consumer; rd_kafka_topic_partition_list_t *topics; - char *topic = test_mk_topic_name("0154-share-test", 0); + const char *topic = "0154-share-test"; char *group = "share-group-10msg"; const int msg_count = 10; int consumed_count = 0; @@ -463,8 +463,8 @@ static void test_share_consumer_multiple_topics(void) { rd_kafka_conf_t *cons_conf; rd_kafka_t *consumer; rd_kafka_topic_partition_list_t *topics; - char *topic1 = test_mk_topic_name("0154-share-topic1", 0); - char *topic2 = test_mk_topic_name("0154-share-topic2", 0); + char *topic1 = "0154-share-topic-multi-1"; + char *topic2 = "0154-share-topic-multi-2"; char *group = "share-group-multitopic"; const int msgs_per_topic = 5; int consumed_count = 0; @@ -513,6 +513,7 @@ static void test_share_consumer_multiple_topics(void) { rd_kafka_error_t *error; int i; + printf("Polling for messages, consumed so far: %d/%d\n", consumed_count, msgs_per_topic * 2); error = rd_kafka_share_consume_batch(consumer, 3000, rkmessages, &rcvd_msgs); if (error) { @@ -550,6 +551,454 @@ static void test_share_consumer_multiple_topics(void) { rd_kafka_destroy(consumer); } +/** + * @brief Test multiple share consumers on the same topic. + * Verifies that messages are divided (each delivered once overall). + */ +static void test_share_consumer_multi_members_same_topic(void) { + char errstr[512]; + const char *group = "share-group-multi-member"; + char *topic = "0154-share-multi-member"; + const int total_msgs = 20; + int consumed_total = 0; + int c1_count = 0; + int c2_count = 0; + int attempts = 15; + const char *group_conf[] = {"share.auto.offset.reset","SET","earliest"}; + + TEST_SAY("=== Testing multiple share consumers on same topic ===\n"); + + /* Create topic and produce messages */ + test_create_topic_wait_exists(NULL, topic, 1, -1, 60 * 1000); + test_produce_msgs_easy(topic, 0, 0, total_msgs); + TEST_SAY("Produced %d messages to %s\n", total_msgs, topic); + + /* Common subscription list */ + rd_kafka_topic_partition_list_t *subs = + rd_kafka_topic_partition_list_new(1); + rd_kafka_topic_partition_list_add(subs, topic, RD_KAFKA_PARTITION_UA); + + /* Create first consumer */ + rd_kafka_conf_t *conf1; + test_conf_init(&conf1, NULL, 60); + rd_kafka_conf_set(conf1, "share.consumer", "true", errstr, sizeof(errstr)); + rd_kafka_conf_set(conf1, "group.protocol", "consumer", errstr, sizeof(errstr)); + rd_kafka_conf_set(conf1, "group.id", group, errstr, sizeof(errstr)); + rd_kafka_conf_set(conf1, "enable.auto.commit", "false", errstr, sizeof(errstr)); + rd_kafka_t *c1 = rd_kafka_new(RD_KAFKA_CONSUMER, conf1, errstr, sizeof(errstr)); + TEST_ASSERT(c1, "Failed to create consumer1: %s", errstr); + + /* Create second consumer */ + rd_kafka_conf_t *conf2; + test_conf_init(&conf2, NULL, 60); + rd_kafka_conf_set(conf2, "share.consumer", "true", errstr, sizeof(errstr)); + rd_kafka_conf_set(conf2, "group.protocol", "consumer", errstr, sizeof(errstr)); + rd_kafka_conf_set(conf2, "group.id", group, errstr, sizeof(errstr)); + rd_kafka_conf_set(conf2, "enable.auto.commit", "false", errstr, sizeof(errstr)); + rd_kafka_t *c2 = rd_kafka_new(RD_KAFKA_CONSUMER, conf2, errstr, sizeof(errstr)); + TEST_ASSERT(c2, "Failed to create consumer2: %s", errstr); + + /* Set group config (offset reset) to earliest */ + test_IncrementalAlterConfigs_simple(c1, RD_KAFKA_RESOURCE_GROUP, + group, group_conf, 1); + + /* Subscribe both */ + rd_kafka_subscribe(c1, subs); + rd_kafka_subscribe(c2, subs); + rd_kafka_topic_partition_list_destroy(subs); + + /* Poll loop: alternate polling both consumers */ + rd_kafka_message_t *batch[32]; + + while (consumed_total < total_msgs && attempts-- > 0) { + size_t rcvd1 = 0, rcvd2 = 0; + rd_kafka_error_t *err1 = + rd_kafka_share_consume_batch(c1, 2000, batch, &rcvd1); + if (!err1) { + for (size_t i = 0; i < rcvd1; i++) { + if (!batch[i]->err) { + c1_count++; + consumed_total++; + } + rd_kafka_message_destroy(batch[i]); + } + } else { + rd_kafka_error_destroy(err1); + } + + if (consumed_total >= total_msgs) + break; + + rd_kafka_error_t *err2 = + rd_kafka_share_consume_batch(c2, 2000, batch, &rcvd2); + if (!err2) { + for (size_t i = 0; i < rcvd2; i++) { + if (!batch[i]->err) { + c2_count++; + consumed_total++; + } + rd_kafka_message_destroy(batch[i]); + } + } else { + rd_kafka_error_destroy(err2); + } + + TEST_SAY("Progress: total=%d/%d (c1=%d, c2=%d)\n", + consumed_total, total_msgs, c1_count, c2_count); + } + + TEST_ASSERT(consumed_total == total_msgs, + "Expected %d total messages, got %d", total_msgs, + consumed_total); + + TEST_SAY("✓ Multi-member share consumption complete: total=%d " + "(c1=%d, c2=%d)\n", + consumed_total, c1_count, c2_count); + + test_delete_topic(c1, topic); + + rd_kafka_consumer_close(c1); + rd_kafka_consumer_close(c2); + rd_kafka_destroy(c1); + rd_kafka_destroy(c2); +} + +/** + * Single share consumer, one topic with multiple partitions. + */ +static void test_share_single_consumer_multi_partitions_one_topic(void) { + char errstr[512]; + const char *group = "share-group-single-one-topic-mparts"; + const char *topic = "0154-share-one-topic-mparts"; + const int partition_cnt = 3; + const int msgs_per_partition = 7; + const int total_msgs = partition_cnt * msgs_per_partition; + int consumed = 0; + int attempts = 30; + const char *grp_conf[] = {"share.auto.offset.reset","SET","earliest"}; + + TEST_SAY("=== Single consumer, one topic (%d partitions) ===\n", + partition_cnt); + + test_create_topic_wait_exists(NULL, topic, partition_cnt, -1, + 60 * 1000); + + for (int p = 0; p < partition_cnt; p++) + test_produce_msgs_easy(topic, p, p, + msgs_per_partition); + + rd_kafka_conf_t *conf; + test_conf_init(&conf, NULL, 60); + rd_kafka_conf_set(conf, "share.consumer", "true", errstr, sizeof(errstr)); + rd_kafka_conf_set(conf, "group.protocol", "consumer", errstr, sizeof(errstr)); + rd_kafka_conf_set(conf, "group.id", group, errstr, sizeof(errstr)); + rd_kafka_conf_set(conf, "enable.auto.commit", "false", errstr, sizeof(errstr)); + rd_kafka_t *consumer = + rd_kafka_new(RD_KAFKA_CONSUMER, conf, errstr, sizeof(errstr)); + TEST_ASSERT(consumer, "create failed: %s", errstr); + + test_IncrementalAlterConfigs_simple(consumer, RD_KAFKA_RESOURCE_GROUP, + group, grp_conf, 1); + + rd_kafka_topic_partition_list_t *subs = + rd_kafka_topic_partition_list_new(1); + rd_kafka_topic_partition_list_add(subs, topic, RD_KAFKA_PARTITION_UA); + rd_kafka_subscribe(consumer, subs); + rd_kafka_topic_partition_list_destroy(subs); + + rd_kafka_message_t *batch[64]; + + while (consumed < total_msgs && attempts-- > 0) { + size_t rcvd = 0; + rd_kafka_error_t *err = + rd_kafka_share_consume_batch(consumer, 3000, batch, &rcvd); + if (err) { + rd_kafka_error_destroy(err); + continue; + } + for (size_t i = 0; i < rcvd; i++) { + if (!batch[i]->err) + consumed++; + rd_kafka_message_destroy(batch[i]); + } + TEST_SAY("Progress: %d/%d\n", consumed, total_msgs); + } + + TEST_ASSERT(consumed == total_msgs, + "Expected %d, got %d", total_msgs, consumed); + + TEST_SAY("✓ Consumed all %d messages across %d partitions\n", + consumed, partition_cnt); + + test_delete_topic(consumer, topic); + rd_kafka_consumer_close(consumer); + rd_kafka_destroy(consumer); +} + +/** + * Single share consumer, multiple topics each with multiple partitions. + */ +static void test_share_single_consumer_multi_partitions_multi_topics(void) { + char errstr[512]; + const char *group = "share-group-single-multi-topic-mparts"; + const int topic_cnt = 3; + const int partition_cnt = 2; + const int msgs_per_partition = 5; + char *topics[topic_cnt]; + int total_msgs = topic_cnt * partition_cnt * msgs_per_partition; + int consumed = 0; + int attempts = 40; + const char *grp_conf[] = {"share.auto.offset.reset","SET","earliest"}; + + TEST_SAY("=== Single consumer, %d topics x %d partitions ===\n", + topic_cnt, partition_cnt); + + for (int t = 0; t < topic_cnt; t++) { + topics[t] = test_mk_topic_name("0154-share-multiT-mparts", t); + test_create_topic_wait_exists(NULL, topics[t], partition_cnt, + -1, 60 * 1000); + for (int p = 0; p < partition_cnt; p++) + test_produce_msgs_easy(topics[t], p, + p, + msgs_per_partition); + } + + rd_kafka_conf_t *conf; + test_conf_init(&conf, NULL, 60); + rd_kafka_conf_set(conf, "share.consumer", "true", errstr, sizeof(errstr)); + rd_kafka_conf_set(conf, "group.protocol", "consumer", errstr, sizeof(errstr)); + rd_kafka_conf_set(conf, "group.id", group, errstr, sizeof(errstr)); + rd_kafka_conf_set(conf, "enable.auto.commit", "false", errstr, sizeof(errstr)); + rd_kafka_t *consumer = + rd_kafka_new(RD_KAFKA_CONSUMER, conf, errstr, sizeof(errstr)); + TEST_ASSERT(consumer, "create failed: %s", errstr); + + test_IncrementalAlterConfigs_simple(consumer, RD_KAFKA_RESOURCE_GROUP, + group, grp_conf, 1); + + rd_kafka_topic_partition_list_t *subs = + rd_kafka_topic_partition_list_new(topic_cnt); + for (int t = 0; t < topic_cnt; t++) + rd_kafka_topic_partition_list_add(subs, topics[t], + RD_KAFKA_PARTITION_UA); + rd_kafka_subscribe(consumer, subs); + rd_kafka_topic_partition_list_destroy(subs); + + rd_kafka_message_t *batch[128]; + + while (consumed < total_msgs && attempts-- > 0) { + size_t rcvd = 0; + rd_kafka_error_t *err = + rd_kafka_share_consume_batch(consumer, 3000, batch, &rcvd); + if (err) { + rd_kafka_error_destroy(err); + continue; + } + for (size_t i = 0; i < rcvd; i++) { + if (!batch[i]->err) + consumed++; + rd_kafka_message_destroy(batch[i]); + } + TEST_SAY("Progress: %d/%d\n", consumed, total_msgs); + } + + TEST_ASSERT(consumed == total_msgs, + "Expected %d, got %d", total_msgs, consumed); + TEST_SAY("✓ Consumed all %d messages from %d topics\n", + consumed, topic_cnt); + + for (int t = 0; t < topic_cnt; t++) + test_delete_topic(consumer, topics[t]); + + rd_kafka_consumer_close(consumer); + rd_kafka_destroy(consumer); +} + +/** + * Multiple share consumers, one topic with multiple partitions. + */ +static void test_share_multi_consumers_multi_partitions_one_topic(void) { + char errstr[512]; + const char *group = "share-group-multi-cons-one-topic-mparts"; + const char *topic = "0154-share-cons-oneT-mparts"; + const int partition_cnt = 4; + const int msgs_per_partition = 6; + const int total_msgs = partition_cnt * msgs_per_partition; + int consumed_total = 0; + int c_counts[4] = {0}; + int attempts = 50; + const int consumer_cnt = 2; + rd_kafka_t *consumers[consumer_cnt]; + const char *grp_conf[] = {"share.auto.offset.reset","SET","earliest"}; + + TEST_SAY("=== %d consumers, one topic, %d partitions ===\n", + consumer_cnt, partition_cnt); + + test_create_topic_wait_exists(NULL, topic, partition_cnt, -1, + 60 * 1000); + for (int p = 0; p < partition_cnt; p++) + test_produce_msgs_easy(topic, p, p , + msgs_per_partition); + + for (int i = 0; i < consumer_cnt; i++) { + rd_kafka_conf_t *conf; + test_conf_init(&conf, NULL, 60); + rd_kafka_conf_set(conf, "share.consumer", "true", errstr, sizeof(errstr)); + rd_kafka_conf_set(conf, "group.protocol", "consumer", errstr, sizeof(errstr)); + rd_kafka_conf_set(conf, "group.id", group, errstr, sizeof(errstr)); + rd_kafka_conf_set(conf, "enable.auto.commit", "false", errstr, sizeof(errstr)); + consumers[i] = + rd_kafka_new(RD_KAFKA_CONSUMER, conf, errstr, sizeof(errstr)); + TEST_ASSERT(consumers[i], "create failed (%d): %s", i, errstr); + } + + test_IncrementalAlterConfigs_simple(consumers[0], RD_KAFKA_RESOURCE_GROUP, + group, grp_conf, 1); + + rd_kafka_topic_partition_list_t *subs = + rd_kafka_topic_partition_list_new(1); + rd_kafka_topic_partition_list_add(subs, topic, RD_KAFKA_PARTITION_UA); + for (int i = 0; i < consumer_cnt; i++) + rd_kafka_subscribe(consumers[i], subs); + rd_kafka_topic_partition_list_destroy(subs); + + rd_kafka_message_t *batch[64]; + + while (consumed_total < total_msgs && attempts-- > 0) { + for (int i = 0; i < consumer_cnt; i++) { + size_t rcvd = 0; + rd_kafka_error_t *err = + rd_kafka_share_consume_batch(consumers[i], 1000, + batch, &rcvd); + if (err) { + rd_kafka_error_destroy(err); + continue; + } + for (size_t m = 0; m < rcvd; m++) { + if (!batch[m]->err) { + c_counts[i]++; + consumed_total++; + } + rd_kafka_message_destroy(batch[m]); + } + } + TEST_SAY("Progress: total=%d/%d c0=%d c1=%d\n", + consumed_total, total_msgs, + c_counts[0], c_counts[1]); + } + + TEST_ASSERT(consumed_total == total_msgs, + "Expected %d total, got %d", total_msgs, consumed_total); + + TEST_SAY("✓ All %d messages consumed by %d consumers " + "(dist: c0=%d c1=%d)\n", + consumed_total, consumer_cnt, c_counts[0], c_counts[1]); + + test_delete_topic(consumers[0], topic); + for (int i = 0; i < consumer_cnt; i++) { + rd_kafka_consumer_close(consumers[i]); + rd_kafka_destroy(consumers[i]); + } +} + +/** + * Multiple consumers, multiple topics each with multiple partitions. + */ +static void test_share_multi_consumers_multi_partitions_multi_topics(void) { + char errstr[512]; + const char *group = "share-group-multi-cons-multiT-mparts"; + const int topic_cnt = 2; + const int partition_cnt = 3; + const int msgs_per_partition = 5; + const int consumer_cnt = 3; + char *topics[topic_cnt]; + int total_msgs = topic_cnt * partition_cnt * msgs_per_partition; + int consumed_total = 0; + int per_cons[consumer_cnt]; + memset(per_cons, 0, sizeof(per_cons)); + int attempts = 80; + rd_kafka_t *consumers[consumer_cnt]; + const char *grp_conf[] = {"share.auto.offset.reset","SET","earliest"}; + + TEST_SAY("=== %d consumers, %d topics x %d partitions ===\n", + consumer_cnt, topic_cnt, partition_cnt); + + for (int t = 0; t < topic_cnt; t++) { + topics[t] = test_mk_topic_name("0154-share-multiT", t); + test_create_topic_wait_exists(NULL, topics[t], partition_cnt, + -1, 60 * 1000); + for (int p = 0; p < partition_cnt; p++) + test_produce_msgs_easy(topics[t], p, + p, + msgs_per_partition); + } + + for (int i = 0; i < consumer_cnt; i++) { + rd_kafka_conf_t *conf; + test_conf_init(&conf, NULL, 60); + rd_kafka_conf_set(conf, "share.consumer", "true", errstr, sizeof(errstr)); + rd_kafka_conf_set(conf, "group.protocol", "consumer", errstr, sizeof(errstr)); + rd_kafka_conf_set(conf, "group.id", group, errstr, sizeof(errstr)); + rd_kafka_conf_set(conf, "enable.auto.commit", "false", errstr, sizeof(errstr)); + consumers[i] = + rd_kafka_new(RD_KAFKA_CONSUMER, conf, errstr, sizeof(errstr)); + TEST_ASSERT(consumers[i], "create failed (%d): %s", i, errstr); + } + + test_IncrementalAlterConfigs_simple(consumers[0], RD_KAFKA_RESOURCE_GROUP, + group, grp_conf, 1); + + rd_kafka_topic_partition_list_t *subs = + rd_kafka_topic_partition_list_new(topic_cnt); + for (int t = 0; t < topic_cnt; t++) + rd_kafka_topic_partition_list_add(subs, topics[t], + RD_KAFKA_PARTITION_UA); + for (int i = 0; i < consumer_cnt; i++) + rd_kafka_subscribe(consumers[i], subs); + rd_kafka_topic_partition_list_destroy(subs); + + rd_kafka_message_t *batch[128]; + + while (consumed_total < total_msgs && attempts-- > 0) { + for (int i = 0; i < consumer_cnt; i++) { + size_t rcvd = 0; + rd_kafka_error_t *err = + rd_kafka_share_consume_batch(consumers[i], 1000, + batch, &rcvd); + if (err) { + rd_kafka_error_destroy(err); + continue; + } + for (size_t m = 0; m < rcvd; m++) { + if (!batch[m]->err) { + per_cons[i]++; + consumed_total++; + } + rd_kafka_message_destroy(batch[m]); + } + } + TEST_SAY("Progress: total=%d/%d c0=%d c1=%d c2=%d\n", + consumed_total, total_msgs, + per_cons[0], per_cons[1], per_cons[2]); + } + + TEST_ASSERT(consumed_total == total_msgs, + "Expected %d total, got %d", total_msgs, consumed_total); + + TEST_SAY("✓ All %d messages consumed across %d consumers " + "(dist: c0=%d c1=%d c2=%d)\n", + consumed_total, consumer_cnt, + per_cons[0], per_cons[1], per_cons[2]); + + for (int t = 0; t < topic_cnt; t++) + test_delete_topic(consumers[0], topics[t]); + + for (int i = 0; i < consumer_cnt; i++) { + rd_kafka_consumer_close(consumers[i]); + rd_kafka_destroy(consumers[i]); + } +} + int main_0154_share_consumer(int argc, char **argv) { @@ -559,6 +1008,11 @@ int main_0154_share_consumer(int argc, char **argv) { test_subscribe_poll_subscribe(); test_subscribe_unsubscribe_poll_fails(); test_share_consumer_messages(); - //test_share_consumer_multiple_topics(); + test_share_consumer_multiple_topics(); + test_share_consumer_multi_members_same_topic(); + test_share_single_consumer_multi_partitions_one_topic(); + test_share_single_consumer_multi_partitions_multi_topics(); + test_share_multi_consumers_multi_partitions_one_topic(); + test_share_multi_consumers_multi_partitions_multi_topics(); return 0; } From a4b1fc78e5f874390c6b3702516f4af36d41de49 Mon Sep 17 00:00:00 2001 From: PratRanj07 Date: Wed, 26 Nov 2025 19:09:27 +0530 Subject: [PATCH 36/37] Modified tests --- tests/0154-share_consumer.c | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/tests/0154-share_consumer.c b/tests/0154-share_consumer.c index db9b4911cc..232a4d7c67 100644 --- a/tests/0154-share_consumer.c +++ b/tests/0154-share_consumer.c @@ -488,7 +488,6 @@ static void test_share_consumer_multiple_topics(void) { rd_kafka_conf_set(cons_conf, "group.protocol", "consumer", errstr, sizeof(errstr)); rd_kafka_conf_set(cons_conf, "group.id", group, errstr, sizeof(errstr)); rd_kafka_conf_set(cons_conf, "enable.auto.commit", "false", errstr, sizeof(errstr)); - //rd_kafka_conf_set(cons_conf, "debug", "all", errstr, sizeof(errstr)); consumer = rd_kafka_new(RD_KAFKA_CONSUMER, cons_conf, errstr, sizeof(errstr)); TEST_ASSERT(consumer, "Failed to create consumer: %s", errstr); @@ -505,7 +504,7 @@ static void test_share_consumer_multiple_topics(void) { rd_kafka_topic_partition_list_destroy(topics); /* Consume messages from both topics */ - rd_kafka_message_t **rkmessages = malloc(sizeof(rd_kafka_message_t *) * 20); + rd_kafka_message_t **rkmessages = malloc(sizeof(rd_kafka_message_t *) * 500); int attempts = 10; // Number of attempts to poll so the test doesn't run indefinitely while (consumed_count < (msgs_per_topic * 2) && attempts > 0) { @@ -513,7 +512,6 @@ static void test_share_consumer_multiple_topics(void) { rd_kafka_error_t *error; int i; - printf("Polling for messages, consumed so far: %d/%d\n", consumed_count, msgs_per_topic * 2); error = rd_kafka_share_consume_batch(consumer, 3000, rkmessages, &rcvd_msgs); if (error) { @@ -559,7 +557,7 @@ static void test_share_consumer_multi_members_same_topic(void) { char errstr[512]; const char *group = "share-group-multi-member"; char *topic = "0154-share-multi-member"; - const int total_msgs = 20; + const int total_msgs = 1000; int consumed_total = 0; int c1_count = 0; int c2_count = 0; @@ -608,7 +606,7 @@ static void test_share_consumer_multi_members_same_topic(void) { rd_kafka_topic_partition_list_destroy(subs); /* Poll loop: alternate polling both consumers */ - rd_kafka_message_t *batch[32]; + rd_kafka_message_t *batch[500]; while (consumed_total < total_msgs && attempts-- > 0) { size_t rcvd1 = 0, rcvd2 = 0; @@ -671,7 +669,7 @@ static void test_share_single_consumer_multi_partitions_one_topic(void) { const char *group = "share-group-single-one-topic-mparts"; const char *topic = "0154-share-one-topic-mparts"; const int partition_cnt = 3; - const int msgs_per_partition = 7; + const int msgs_per_partition = 500; const int total_msgs = partition_cnt * msgs_per_partition; int consumed = 0; int attempts = 30; @@ -693,6 +691,7 @@ static void test_share_single_consumer_multi_partitions_one_topic(void) { rd_kafka_conf_set(conf, "group.protocol", "consumer", errstr, sizeof(errstr)); rd_kafka_conf_set(conf, "group.id", group, errstr, sizeof(errstr)); rd_kafka_conf_set(conf, "enable.auto.commit", "false", errstr, sizeof(errstr)); + rd_kafka_t *consumer = rd_kafka_new(RD_KAFKA_CONSUMER, conf, errstr, sizeof(errstr)); TEST_ASSERT(consumer, "create failed: %s", errstr); @@ -706,7 +705,7 @@ static void test_share_single_consumer_multi_partitions_one_topic(void) { rd_kafka_subscribe(consumer, subs); rd_kafka_topic_partition_list_destroy(subs); - rd_kafka_message_t *batch[64]; + rd_kafka_message_t *batch[500]; while (consumed < total_msgs && attempts-- > 0) { size_t rcvd = 0; @@ -743,7 +742,7 @@ static void test_share_single_consumer_multi_partitions_multi_topics(void) { const char *group = "share-group-single-multi-topic-mparts"; const int topic_cnt = 3; const int partition_cnt = 2; - const int msgs_per_partition = 5; + const int msgs_per_partition = 500; char *topics[topic_cnt]; int total_msgs = topic_cnt * partition_cnt * msgs_per_partition; int consumed = 0; @@ -754,7 +753,7 @@ static void test_share_single_consumer_multi_partitions_multi_topics(void) { topic_cnt, partition_cnt); for (int t = 0; t < topic_cnt; t++) { - topics[t] = test_mk_topic_name("0154-share-multiT-mparts", t); + topics[t] = rd_strdup(test_mk_topic_name("0154-share-multiT-mparts", 1)); test_create_topic_wait_exists(NULL, topics[t], partition_cnt, -1, 60 * 1000); for (int p = 0; p < partition_cnt; p++) @@ -769,6 +768,7 @@ static void test_share_single_consumer_multi_partitions_multi_topics(void) { rd_kafka_conf_set(conf, "group.protocol", "consumer", errstr, sizeof(errstr)); rd_kafka_conf_set(conf, "group.id", group, errstr, sizeof(errstr)); rd_kafka_conf_set(conf, "enable.auto.commit", "false", errstr, sizeof(errstr)); + rd_kafka_t *consumer = rd_kafka_new(RD_KAFKA_CONSUMER, conf, errstr, sizeof(errstr)); TEST_ASSERT(consumer, "create failed: %s", errstr); @@ -784,7 +784,7 @@ static void test_share_single_consumer_multi_partitions_multi_topics(void) { rd_kafka_subscribe(consumer, subs); rd_kafka_topic_partition_list_destroy(subs); - rd_kafka_message_t *batch[128]; + rd_kafka_message_t *batch[500]; while (consumed < total_msgs && attempts-- > 0) { size_t rcvd = 0; @@ -822,7 +822,7 @@ static void test_share_multi_consumers_multi_partitions_one_topic(void) { const char *group = "share-group-multi-cons-one-topic-mparts"; const char *topic = "0154-share-cons-oneT-mparts"; const int partition_cnt = 4; - const int msgs_per_partition = 6; + const int msgs_per_partition = 500; const int total_msgs = partition_cnt * msgs_per_partition; int consumed_total = 0; int c_counts[4] = {0}; @@ -862,7 +862,7 @@ static void test_share_multi_consumers_multi_partitions_one_topic(void) { rd_kafka_subscribe(consumers[i], subs); rd_kafka_topic_partition_list_destroy(subs); - rd_kafka_message_t *batch[64]; + rd_kafka_message_t *batch[500]; while (consumed_total < total_msgs && attempts-- > 0) { for (int i = 0; i < consumer_cnt; i++) { @@ -909,7 +909,7 @@ static void test_share_multi_consumers_multi_partitions_multi_topics(void) { const char *group = "share-group-multi-cons-multiT-mparts"; const int topic_cnt = 2; const int partition_cnt = 3; - const int msgs_per_partition = 5; + const int msgs_per_partition = 500; const int consumer_cnt = 3; char *topics[topic_cnt]; int total_msgs = topic_cnt * partition_cnt * msgs_per_partition; @@ -924,7 +924,7 @@ static void test_share_multi_consumers_multi_partitions_multi_topics(void) { consumer_cnt, topic_cnt, partition_cnt); for (int t = 0; t < topic_cnt; t++) { - topics[t] = test_mk_topic_name("0154-share-multiT", t); + topics[t] = rd_strdup(test_mk_topic_name("0154-share-multiT", 1)); test_create_topic_wait_exists(NULL, topics[t], partition_cnt, -1, 60 * 1000); for (int p = 0; p < partition_cnt; p++) @@ -957,7 +957,7 @@ static void test_share_multi_consumers_multi_partitions_multi_topics(void) { rd_kafka_subscribe(consumers[i], subs); rd_kafka_topic_partition_list_destroy(subs); - rd_kafka_message_t *batch[128]; + rd_kafka_message_t *batch[500]; while (consumed_total < total_msgs && attempts-- > 0) { for (int i = 0; i < consumer_cnt; i++) { From 869479dbe3b372e8364a4068cf2b067ee0d73a55 Mon Sep 17 00:00:00 2001 From: PratRanj07 Date: Fri, 28 Nov 2025 13:00:55 +0530 Subject: [PATCH 37/37] Created seperate file for tests and added more tests --- tests/0170-share_consumer_subscription.c | 885 ++++++++++++++++++ ...nsumer.c => 0171-share_consumer_consume.c} | 397 ++------ tests/CMakeLists.txt | 3 +- tests/test.c | 6 +- win32/tests/tests.vcxproj | 3 +- 5 files changed, 969 insertions(+), 325 deletions(-) create mode 100644 tests/0170-share_consumer_subscription.c rename tests/{0154-share_consumer.c => 0171-share_consumer_consume.c} (69%) diff --git a/tests/0170-share_consumer_subscription.c b/tests/0170-share_consumer_subscription.c new file mode 100644 index 0000000000..eeb7a6e261 --- /dev/null +++ b/tests/0170-share_consumer_subscription.c @@ -0,0 +1,885 @@ +/* + * librdkafka - Apache Kafka C library + * + * Copyright (c) 2025, Confluent Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "test.h" + +/** + * Subscription introspection: + * Subscribe to 3 topics, verify subscription(), then unsubscribe, verify empty. + */ +static void test_subscription_introspection(void) { + char errstr[512]; + const char *group = "share-group-sub-introspect"; + const char *t1 = "0154-share-sub-intro-1"; + const char *t2 = "0154-share-sub-intro-2"; + const char *t3 = "0154-share-sub-intro-3"; + + test_create_topic_wait_exists(NULL, t1, 1, -1, 30 * 1000); + test_create_topic_wait_exists(NULL, t2, 1, -1, 30 * 1000); + test_create_topic_wait_exists(NULL, t3, 1, -1, 30 * 1000); + + rd_kafka_conf_t *conf; + test_conf_init(&conf, NULL, 60); + rd_kafka_conf_set(conf, "share.consumer", "true", errstr, + sizeof(errstr)); + rd_kafka_conf_set(conf, "group.protocol", "consumer", errstr, + sizeof(errstr)); + rd_kafka_conf_set(conf, "group.id", group, errstr, sizeof(errstr)); + rd_kafka_conf_set(conf, "enable.auto.commit", "false", errstr, + sizeof(errstr)); + rd_kafka_t *c = + rd_kafka_new(RD_KAFKA_CONSUMER, conf, errstr, sizeof(errstr)); + TEST_ASSERT(c, "%s", errstr); + + rd_kafka_topic_partition_list_t *subs = + rd_kafka_topic_partition_list_new(3); + rd_kafka_topic_partition_list_add(subs, t1, RD_KAFKA_PARTITION_UA); + rd_kafka_topic_partition_list_add(subs, t2, RD_KAFKA_PARTITION_UA); + rd_kafka_topic_partition_list_add(subs, t3, RD_KAFKA_PARTITION_UA); + TEST_ASSERT(!rd_kafka_subscribe(c, subs), "subscribe failed"); + rd_kafka_topic_partition_list_destroy(subs); + + rd_kafka_topic_partition_list_t *cur = NULL; + TEST_ASSERT(!rd_kafka_subscription(c, &cur) && cur, + "subscription() failed"); + TEST_ASSERT(cur->cnt == 3, "expected 3 topics, got %d", cur->cnt); + rd_kafka_topic_partition_list_destroy(cur); + + TEST_ASSERT(!rd_kafka_unsubscribe(c), "unsubscribe failed"); + + cur = NULL; + TEST_ASSERT(!rd_kafka_subscription(c, &cur) && cur, + "subscription() after unsubscribe failed"); + TEST_ASSERT(cur->cnt == 0, + "expected 0 topics after unsubscribe, got %d", cur->cnt); + rd_kafka_topic_partition_list_destroy(cur); + + rd_kafka_consumer_close(c); + rd_kafka_destroy(c); +} + +/** + * Unsubscribe idempotence: + * First unsubscribe empties subscription, second is no-op. + */ +static void test_unsubscribe_idempotence(void) { + char errstr[512]; + const char *group = "share-group-unsub-idem"; + const char *t1 = "0154-share-unsub-idem-1"; + const char *t2 = "0154-share-unsub-idem-2"; + + test_create_topic_wait_exists(NULL, t1, 1, -1, 30 * 1000); + test_create_topic_wait_exists(NULL, t2, 1, -1, 30 * 1000); + + rd_kafka_conf_t *conf; + test_conf_init(&conf, NULL, 60); + rd_kafka_conf_set(conf, "share.consumer", "true", errstr, + sizeof(errstr)); + rd_kafka_conf_set(conf, "group.protocol", "consumer", errstr, + sizeof(errstr)); + rd_kafka_conf_set(conf, "group.id", group, errstr, sizeof(errstr)); + rd_kafka_conf_set(conf, "enable.auto.commit", "false", errstr, + sizeof(errstr)); + rd_kafka_t *c = + rd_kafka_new(RD_KAFKA_CONSUMER, conf, errstr, sizeof(errstr)); + TEST_ASSERT(c, "%s", errstr); + + const char *grp_conf[] = {"share.auto.offset.reset", "SET", "earliest"}; + test_IncrementalAlterConfigs_simple(c, RD_KAFKA_RESOURCE_GROUP, group, + grp_conf, 1); + + rd_kafka_topic_partition_list_t *subs = + rd_kafka_topic_partition_list_new(2); + rd_kafka_topic_partition_list_add(subs, t1, RD_KAFKA_PARTITION_UA); + rd_kafka_topic_partition_list_add(subs, t2, RD_KAFKA_PARTITION_UA); + TEST_ASSERT(!rd_kafka_subscribe(c, subs), "subscribe failed"); + rd_kafka_topic_partition_list_destroy(subs); + + TEST_ASSERT(!rd_kafka_unsubscribe(c), "first unsubscribe failed"); + TEST_ASSERT(!rd_kafka_unsubscribe(c), + "second unsubscribe should be idempotent"); + + rd_kafka_topic_partition_list_t *cur = NULL; + TEST_ASSERT(!rd_kafka_subscription(c, &cur) && cur, + "subscription() failed"); + TEST_ASSERT(cur->cnt == 0, + "expected 0 after double unsubscribe, got %d", cur->cnt); + rd_kafka_topic_partition_list_destroy(cur); + + test_delete_topic(c, t1); + test_delete_topic(c, t2); + rd_kafka_consumer_close(c); + rd_kafka_destroy(c); +} + +/** + * Resubscribe replacing set (A,B) -> (C,D) verifies old topics gone. + */ +static void test_resubscribe_replaces_set(void) { + char errstr[512]; + const char *group = "share-group-resub-replace"; + const char *a = "0154-share-resub-A"; + const char *b = "0154-share-resub-B"; + const char *c = "0154-share-resub-C"; + const char *d = "0154-share-resub-D"; + + test_create_topic_wait_exists(NULL, a, 1, -1, 30 * 1000); + test_create_topic_wait_exists(NULL, b, 1, -1, 30 * 1000); + test_create_topic_wait_exists(NULL, c, 1, -1, 30 * 1000); + test_create_topic_wait_exists(NULL, d, 1, -1, 30 * 1000); + + rd_kafka_conf_t *conf; + test_conf_init(&conf, NULL, 60); + rd_kafka_conf_set(conf, "share.consumer", "true", errstr, + sizeof(errstr)); + rd_kafka_conf_set(conf, "group.protocol", "consumer", errstr, + sizeof(errstr)); + rd_kafka_conf_set(conf, "group.id", group, errstr, sizeof(errstr)); + rd_kafka_t *rk = + rd_kafka_new(RD_KAFKA_CONSUMER, conf, errstr, sizeof(errstr)); + TEST_ASSERT(rk, "%s", errstr); + + rd_kafka_topic_partition_list_t *subs1 = + rd_kafka_topic_partition_list_new(2); + rd_kafka_topic_partition_list_add(subs1, a, RD_KAFKA_PARTITION_UA); + rd_kafka_topic_partition_list_add(subs1, b, RD_KAFKA_PARTITION_UA); + TEST_ASSERT(!rd_kafka_subscribe(rk, subs1), "subscribe A,B failed"); + rd_kafka_topic_partition_list_destroy(subs1); + + rd_kafka_topic_partition_list_t *cur = NULL; + rd_kafka_subscription(rk, &cur); + TEST_ASSERT(cur && cur->cnt == 2, "expected 2 after first subscribe"); + rd_kafka_topic_partition_list_destroy(cur); + + rd_kafka_topic_partition_list_t *subs2 = + rd_kafka_topic_partition_list_new(2); + rd_kafka_topic_partition_list_add(subs2, c, RD_KAFKA_PARTITION_UA); + rd_kafka_topic_partition_list_add(subs2, d, RD_KAFKA_PARTITION_UA); + TEST_ASSERT(!rd_kafka_subscribe(rk, subs2), "resubscribe C,D failed"); + rd_kafka_topic_partition_list_destroy(subs2); + + cur = NULL; + rd_kafka_subscription(rk, &cur); + TEST_ASSERT(cur && cur->cnt == 2, "expected 2 after resubscribe"); + for (int i = 0; i < cur->cnt; i++) { + const char *tn = cur->elems[i].topic; + TEST_ASSERT(strcmp(tn, a) && strcmp(tn, b), + "old topic %s still present", tn); + } + rd_kafka_topic_partition_list_destroy(cur); + + rd_kafka_consumer_close(rk); + rd_kafka_destroy(rk); +} + +/** + * Duplicate subscribe call with same list (idempotence). + */ +static void test_duplicate_subscribe_idempotent(void) { + char errstr[512]; + const char *group = "share-group-dup-sub"; + const char *t1 = "0154-share-dup-sub-1"; + const char *t2 = "0154-share-dup-sub-2"; + + test_create_topic_wait_exists(NULL, t1, 1, -1, 30 * 1000); + test_create_topic_wait_exists(NULL, t2, 1, -1, 30 * 1000); + + rd_kafka_conf_t *conf; + test_conf_init(&conf, NULL, 60); + rd_kafka_conf_set(conf, "share.consumer", "true", errstr, + sizeof(errstr)); + rd_kafka_conf_set(conf, "group.protocol", "consumer", errstr, + sizeof(errstr)); + rd_kafka_conf_set(conf, "group.id", group, errstr, sizeof(errstr)); + rd_kafka_t *rk = + rd_kafka_new(RD_KAFKA_CONSUMER, conf, errstr, sizeof(errstr)); + TEST_ASSERT(rk, "%s", errstr); + + rd_kafka_topic_partition_list_t *subs = + rd_kafka_topic_partition_list_new(2); + rd_kafka_topic_partition_list_add(subs, t1, RD_KAFKA_PARTITION_UA); + rd_kafka_topic_partition_list_add(subs, t2, RD_KAFKA_PARTITION_UA); + + TEST_ASSERT(!rd_kafka_subscribe(rk, subs), "first subscribe failed"); + TEST_ASSERT(!rd_kafka_subscribe(rk, subs), + "duplicate subscribe failed"); + + rd_kafka_topic_partition_list_t *cur = NULL; + rd_kafka_subscription(rk, &cur); + TEST_ASSERT(cur && cur->cnt == 2, + "expected exactly 2 after duplicate subscribe"); + rd_kafka_topic_partition_list_destroy(cur); + + rd_kafka_topic_partition_list_destroy(subs); + rd_kafka_consumer_close(rk); + rd_kafka_destroy(rk); +} + +/** + * Subscribe to non-existent topic, then create it, produce, consume. + */ +static void test_subscribe_nonexistent_then_create(void) { + char errstr[512]; + const char *group = "share-group-sub-nonexist"; + const char *topic = test_mk_topic_name("0154-share-nonexist-topic", 1); + + rd_kafka_conf_t *conf; + test_conf_init(&conf, NULL, 60); + rd_kafka_conf_set(conf, "share.consumer", "true", errstr, + sizeof(errstr)); + rd_kafka_conf_set(conf, "group.protocol", "consumer", errstr, + sizeof(errstr)); + rd_kafka_conf_set(conf, "group.id", group, errstr, sizeof(errstr)); + rd_kafka_t *rk = + rd_kafka_new(RD_KAFKA_CONSUMER, conf, errstr, sizeof(errstr)); + TEST_ASSERT(rk, "%s", errstr); + + const char *confs_set_group[] = {"share.auto.offset.reset", "SET", + "earliest"}; + test_IncrementalAlterConfigs_simple(rk, RD_KAFKA_RESOURCE_GROUP, group, + confs_set_group, 1); + + + rd_kafka_topic_partition_list_t *subs = + rd_kafka_topic_partition_list_new(1); + rd_kafka_topic_partition_list_add(subs, topic, RD_KAFKA_PARTITION_UA); + TEST_ASSERT(!rd_kafka_subscribe(rk, subs), + "subscribe non-existent failed"); + rd_kafka_topic_partition_list_destroy(subs); + + /* Confirm subscription shows the topic */ + rd_kafka_topic_partition_list_t *cur = NULL; + rd_kafka_subscription(rk, &cur); + TEST_ASSERT(cur && cur->cnt == 1, "expected 1 subscription"); + rd_kafka_topic_partition_list_destroy(cur); + + /* Now create topic and produce */ + test_create_topic_wait_exists(NULL, topic, 1, -1, 30 * 1000); + test_produce_msgs_easy(topic, 0, 0, 5); + + rd_kafka_message_t *batch[10]; + int got = 0; + int attempts = 10; + while (got < 5 && attempts-- > 0) { + size_t rcvd = 0; + rd_kafka_error_t *err = + rd_kafka_share_consume_batch(rk, 2000, batch, &rcvd); + if (err) { + rd_kafka_error_destroy(err); + continue; + } + for (size_t i = 0; i < rcvd; i++) { + if (!batch[i]->err) + got++; + rd_kafka_message_destroy(batch[i]); + } + } + TEST_ASSERT(got == 5, + "expected 5 messages after topic creation, got %d", got); + + test_delete_topic(rk, topic); + rd_kafka_consumer_close(rk); + rd_kafka_destroy(rk); +} + +/** + * Unsubscribe then immediate subscribe to new topics: ensure old topics gone, + * only new consumed. + */ +static void test_unsubscribe_then_subscribe_new_topics(void) { + char errstr[512]; + const char *group = "share-group-unsub-resub"; + const char *old1 = "0154-share-old-1"; + const char *old2 = "0154-share-old-2"; + const char *new1 = "0154-share-new-1"; + const char *new2 = "0154-share-new-2"; + + test_create_topic_wait_exists(NULL, old1, 1, -1, 30 * 1000); + test_create_topic_wait_exists(NULL, old2, 1, -1, 30 * 1000); + test_create_topic_wait_exists(NULL, new1, 1, -1, 30 * 1000); + test_create_topic_wait_exists(NULL, new2, 1, -1, 30 * 1000); + + test_produce_msgs_easy(old1, 0, 0, 3); + test_produce_msgs_easy(old2, 0, 0, 3); + test_produce_msgs_easy(new1, 0, 0, 4); + test_produce_msgs_easy(new2, 0, 0, 4); + + rd_kafka_conf_t *conf; + test_conf_init(&conf, NULL, 60); + rd_kafka_conf_set(conf, "share.consumer", "true", errstr, + sizeof(errstr)); + rd_kafka_conf_set(conf, "group.protocol", "consumer", errstr, + sizeof(errstr)); + rd_kafka_conf_set(conf, "group.id", group, errstr, sizeof(errstr)); + rd_kafka_t *rk = + rd_kafka_new(RD_KAFKA_CONSUMER, conf, errstr, sizeof(errstr)); + TEST_ASSERT(rk, "%s", errstr); + + const char *confs_set_group[] = {"share.auto.offset.reset", "SET", + "earliest"}; + test_IncrementalAlterConfigs_simple(rk, RD_KAFKA_RESOURCE_GROUP, group, + confs_set_group, 1); + + rd_kafka_topic_partition_list_t *subs_old = + rd_kafka_topic_partition_list_new(2); + rd_kafka_topic_partition_list_add(subs_old, old1, + RD_KAFKA_PARTITION_UA); + rd_kafka_topic_partition_list_add(subs_old, old2, + RD_KAFKA_PARTITION_UA); + TEST_ASSERT(!rd_kafka_subscribe(rk, subs_old), "subscribe old failed"); + rd_kafka_topic_partition_list_destroy(subs_old); + + /* Unsubscribe immediately */ + TEST_ASSERT(!rd_kafka_unsubscribe(rk), "unsubscribe failed"); + + /* Subscribe new topics */ + rd_kafka_topic_partition_list_t *subs_new = + rd_kafka_topic_partition_list_new(2); + rd_kafka_topic_partition_list_add(subs_new, new1, + RD_KAFKA_PARTITION_UA); + rd_kafka_topic_partition_list_add(subs_new, new2, + RD_KAFKA_PARTITION_UA); + TEST_ASSERT(!rd_kafka_subscribe(rk, subs_new), "subscribe new failed"); + rd_kafka_topic_partition_list_destroy(subs_new); + + /* Consume; ensure only new topics appear */ + rd_kafka_message_t *batch[50]; + int got_new = 0; + int attempts = 10; + while (got_new < 8 && attempts-- > 0) { + size_t rcvd = 0; + rd_kafka_error_t *err = + rd_kafka_share_consume_batch(rk, 2000, batch, &rcvd); + if (err) { + rd_kafka_error_destroy(err); + continue; + } + for (size_t i = 0; i < rcvd; i++) { + if (!batch[i]->err) { + const char *tn = + rd_kafka_topic_name(batch[i]->rkt); + TEST_ASSERT( + strcmp(tn, old1) && strcmp(tn, old2), + "received message from old topic %s", tn); + if (!strcmp(tn, new1) || !strcmp(tn, new2)) + got_new++; + } + rd_kafka_message_destroy(batch[i]); + } + } + TEST_ASSERT(got_new == 8, "expected 8 new-topic msgs, got %d", got_new); + + test_delete_topic(rk, old1); + test_delete_topic(rk, old2); + test_delete_topic(rk, new1); + test_delete_topic(rk, new2); + + rd_kafka_consumer_close(rk); + rd_kafka_destroy(rk); +} + +/** + * Re-subscribe while messages exist: + * Consume some from A, then resubscribe to B only; ensure no A messages + * afterwards. + */ +static void test_resubscribe_switch_topics(void) { + char errstr[512]; + const char *group = "share-group-switch"; + const char *topicA = "0154-share-switch-A-resubscribe"; + const char *topicB = "0154-share-switch-B-resubscribe"; + const int msgsA_initial = 5; + const int msgsA_extra = 3; + const int msgsB_initial = 7; + const int msgsB_extra = 4; + + test_create_topic_wait_exists(NULL, topicA, 1, -1, 30 * 1000); + test_create_topic_wait_exists(NULL, topicB, 1, -1, 30 * 1000); + test_produce_msgs_easy(topicA, 0, 0, msgsA_initial); + test_produce_msgs_easy(topicB, 0, 0, msgsB_initial); + + rd_kafka_conf_t *conf; + test_conf_init(&conf, NULL, 60); + rd_kafka_conf_set(conf, "share.consumer", "true", errstr, + sizeof(errstr)); + rd_kafka_conf_set(conf, "group.protocol", "consumer", errstr, + sizeof(errstr)); + rd_kafka_conf_set(conf, "group.id", group, errstr, sizeof(errstr)); + rd_kafka_conf_set(conf, "enable.auto.commit", "false", errstr, + sizeof(errstr)); + rd_kafka_t *rk = + rd_kafka_new(RD_KAFKA_CONSUMER, conf, errstr, sizeof(errstr)); + TEST_ASSERT(rk, "%s", errstr); + + const char *grp_conf[] = {"share.auto.offset.reset", "SET", "earliest"}; + test_IncrementalAlterConfigs_simple(rk, RD_KAFKA_RESOURCE_GROUP, group, + grp_conf, 1); + + rd_kafka_topic_partition_list_t *subsA = + rd_kafka_topic_partition_list_new(1); + rd_kafka_topic_partition_list_add(subsA, topicA, RD_KAFKA_PARTITION_UA); + TEST_ASSERT(!rd_kafka_subscribe(rk, subsA), "subscribe A failed"); + rd_kafka_topic_partition_list_destroy(subsA); + + int gotA = 0; + int attempts = 10; + rd_kafka_message_t *batch[128]; + while (gotA < msgsA_initial && attempts-- > 0) { + size_t rcvd = 0; + rd_kafka_error_t *err = + rd_kafka_share_consume_batch(rk, 2000, batch, &rcvd); + if (err) { + rd_kafka_error_destroy(err); + continue; + } + for (size_t i = 0; i < rcvd; i++) { + if (!batch[i]->err && + !strcmp(rd_kafka_topic_name(batch[i]->rkt), topicA)) + gotA++; + rd_kafka_message_destroy(batch[i]); + } + } + TEST_ASSERT(gotA > 0, + "did not consume any messages from A before resubscribe"); + + /* Add extra messages to A that should not be seen after switching */ + test_produce_msgs_easy(topicA, 0, 0, msgsA_extra); + + /* Resubscribe to B only */ + rd_kafka_topic_partition_list_t *subsB = + rd_kafka_topic_partition_list_new(1); + rd_kafka_topic_partition_list_add(subsB, topicB, RD_KAFKA_PARTITION_UA); + TEST_ASSERT(!rd_kafka_subscribe(rk, subsB), "resubscribe B failed"); + rd_kafka_topic_partition_list_destroy(subsB); + + /* Produce extra B after resubscribe */ + test_produce_msgs_easy(topicB, 0, 0, msgsB_extra); + + int wantB = msgsB_initial + msgsB_extra; + int gotB = 0; + attempts = 25; + + while (gotB < wantB && attempts-- > 0) { + size_t rcvd = 0; + rd_kafka_error_t *err = + rd_kafka_share_consume_batch(rk, 3000, batch, &rcvd); + if (err) { + rd_kafka_error_destroy(err); + continue; + } + for (size_t i = 0; i < rcvd; i++) { + if (!batch[i]->err) { + const char *tn = + rd_kafka_topic_name(batch[i]->rkt); + TEST_ASSERT(!strcmp(tn, topicB), + "received message from old topic " + "%s after resubscribe", + tn); + gotB++; + } + rd_kafka_message_destroy(batch[i]); + } + } + + TEST_ASSERT(gotB == wantB, "expected %d B messages, got %d", wantB, + gotB); + + test_delete_topic(rk, topicA); + test_delete_topic(rk, topicB); + rd_kafka_consumer_close(rk); + rd_kafka_destroy(rk); +} + + +/** + * @brief Test that polling without subscription fails + */ +static void test_poll_no_subscribe_fails(void) { + char errstr[512]; + rd_kafka_conf_t *cons_conf; + rd_kafka_t *consumer; + char *group = "share-group-no-subscribe"; + + TEST_SAY("=== Testing poll without subscription fails ===\n"); + + /* Create share consumer */ + test_conf_init(&cons_conf, NULL, 60); + rd_kafka_conf_set(cons_conf, "share.consumer", "true", errstr, + sizeof(errstr)); + rd_kafka_conf_set(cons_conf, "group.protocol", "consumer", errstr, + sizeof(errstr)); + rd_kafka_conf_set(cons_conf, "group.id", group, errstr, sizeof(errstr)); + rd_kafka_conf_set(cons_conf, "enable.auto.commit", "false", errstr, + sizeof(errstr)); + + consumer = + rd_kafka_new(RD_KAFKA_CONSUMER, cons_conf, errstr, sizeof(errstr)); + TEST_ASSERT(consumer, "Failed to create consumer: %s", errstr); + + /* Try to poll without subscribing - should fail or return timeout */ + TEST_SAY("Attempting to poll without subscription\n"); + rd_kafka_message_t **msgs = malloc(sizeof(rd_kafka_message_t *) * 10); + size_t rcvd_msgs = 0; + + rd_kafka_error_t *error = + rd_kafka_share_consume_batch(consumer, 2000, msgs, &rcvd_msgs); + + /** + * TODO KIP-932: Uncomment once polling before any subscription is + * properly handled + */ + // TEST_ASSERT(error, "Expected poll to fail after unsubscribe, but it + // succeeded"); + + free(msgs); + rd_kafka_destroy(consumer); +} + +/** + * @brief Test subscribe and poll with no records available + */ +static void test_subscribe_and_poll_no_records(void) { + char errstr[512]; + rd_kafka_conf_t *cons_conf; + rd_kafka_t *consumer; + rd_kafka_topic_partition_list_t *topics; + const char *topic = "0154-share-empty-records"; + const char *group = "share-group-empty"; + + TEST_SAY("=== Testing subscribe and poll with no records ===\n"); + + /* Create empty topic (no messages produced) */ + test_create_topic_wait_exists(NULL, topic, 1, -1, 60 * 1000); + TEST_SAY("Created empty topic: %s\n", topic); + + /* Create share consumer */ + test_conf_init(&cons_conf, NULL, 60); + rd_kafka_conf_set(cons_conf, "share.consumer", "true", errstr, + sizeof(errstr)); + rd_kafka_conf_set(cons_conf, "group.protocol", "consumer", errstr, + sizeof(errstr)); + rd_kafka_conf_set(cons_conf, "group.id", group, errstr, sizeof(errstr)); + rd_kafka_conf_set(cons_conf, "enable.auto.commit", "false", errstr, + sizeof(errstr)); + + consumer = + rd_kafka_new(RD_KAFKA_CONSUMER, cons_conf, errstr, sizeof(errstr)); + TEST_ASSERT(consumer, "Failed to create consumer: %s", errstr); + + /* Subscribe to empty topic */ + topics = rd_kafka_topic_partition_list_new(1); + rd_kafka_topic_partition_list_add(topics, topic, RD_KAFKA_PARTITION_UA); + rd_kafka_subscribe(consumer, topics); + rd_kafka_topic_partition_list_destroy(topics); + + TEST_SAY("Subscribed to empty topic, polling for messages\n"); + + /* Poll for messages - should get none */ + rd_kafka_message_t **msgs = malloc(sizeof(rd_kafka_message_t *) * 10); + size_t rcvd_msgs = 0; + + rd_kafka_error_t *error = + rd_kafka_share_consume_batch(consumer, 5000, msgs, &rcvd_msgs); + + TEST_ASSERT(rcvd_msgs == 0, + "Should not receive messages from empty topic"); + TEST_SAY("✓ No messages received from empty topic (expected)\n"); + + test_delete_topic(consumer, topic); + + free(msgs); + rd_kafka_destroy(consumer); +} + +/** + * @brief Test subscribe, poll, then unsubscribe + */ +static void test_subscribe_poll_unsubscribe(void) { + char errstr[512]; + rd_kafka_conf_t *cons_conf; + rd_kafka_t *consumer; + rd_kafka_topic_partition_list_t *topics; + const char *topic = "0154-share-unsub"; + const char *group = "share-group-unsub"; + const int msg_count = 5; + + TEST_SAY("=== Testing subscribe, poll, then unsubscribe ===\n"); + + /* Create topic and produce messages */ + test_create_topic_wait_exists(NULL, topic, 1, -1, 60 * 1000); + test_produce_msgs_easy(topic, 0, 0, msg_count); + TEST_SAY("Produced %d messages\n", msg_count); + + /* Create share consumer */ + test_conf_init(&cons_conf, NULL, 60); + rd_kafka_conf_set(cons_conf, "share.consumer", "true", errstr, + sizeof(errstr)); + rd_kafka_conf_set(cons_conf, "group.protocol", "consumer", errstr, + sizeof(errstr)); + rd_kafka_conf_set(cons_conf, "group.id", group, errstr, sizeof(errstr)); + rd_kafka_conf_set(cons_conf, "enable.auto.commit", "false", errstr, + sizeof(errstr)); + + consumer = + rd_kafka_new(RD_KAFKA_CONSUMER, cons_conf, errstr, sizeof(errstr)); + TEST_ASSERT(consumer, "Failed to create consumer: %s", errstr); + + /* Subscribe to topic */ + topics = rd_kafka_topic_partition_list_new(1); + rd_kafka_topic_partition_list_add(topics, topic, RD_KAFKA_PARTITION_UA); + rd_kafka_subscribe(consumer, topics); + rd_kafka_topic_partition_list_destroy(topics); + + TEST_SAY("Subscribed to topic, consuming messages\n"); + + /* Poll for some messages */ + rd_kafka_message_t **msgs = malloc(sizeof(rd_kafka_message_t *) * 10); + size_t rcvd_msgs = 0; + int consumed_count = 0; + + rd_kafka_error_t *error = + rd_kafka_share_consume_batch(consumer, 10000, msgs, &rcvd_msgs); + + if (!error && rcvd_msgs > 0) { + for (int i = 0; i < (int)rcvd_msgs; i++) { + if (!msgs[i]->err) { + consumed_count++; + } + rd_kafka_message_destroy(msgs[i]); + } + TEST_SAY("Consumed %d messages before unsubscribe\n", + consumed_count); + } else if (error) { + rd_kafka_error_destroy(error); + } + + /* Unsubscribe from all topics */ + TEST_SAY("Unsubscribing from all topics\n"); + rd_kafka_resp_err_t err = rd_kafka_unsubscribe(consumer); + TEST_ASSERT(!err, "Failed to unsubscribe: %s", rd_kafka_err2str(err)); + + /* Try to poll after unsubscribe - should fail or get no messages */ + TEST_SAY("Attempting to poll after unsubscribe\n"); + rcvd_msgs = 0; + error = rd_kafka_share_consume_batch(consumer, 2000, msgs, &rcvd_msgs); + + /** + * TODO KIP-932: Uncomment once polling before any subscription is + * properly handled + */ + // TEST_ASSERT(error, "Expected poll to fail after unsubscribe, but it + // succeeded"); + + test_delete_topic(consumer, topic); + + free(msgs); + rd_kafka_destroy(consumer); +} + +/** + * @brief Test subscribe, poll, then subscribe to different topic + */ +static void test_subscribe_poll_subscribe(void) { + char errstr[512]; + rd_kafka_conf_t *cons_conf; + rd_kafka_t *consumer; + rd_kafka_topic_partition_list_t *topics; + char *topic1 = "test-topic-0154-share-sub1"; + char *topic2 = "test-topic-0154-share-sub2"; + char *group = "share-group-resub"; + const int msg_count = 3; + + TEST_SAY("=== Testing subscribe, poll, then resubscribe ===\n"); + + /* Create topics and produce messages */ + test_create_topic_wait_exists(NULL, topic1, 1, -1, 60 * 1000); + test_create_topic_wait_exists(NULL, topic2, 1, -1, 60 * 1000); + + test_produce_msgs_easy(topic1, 0, 0, msg_count); + test_produce_msgs_easy(topic2, 0, 0, msg_count); + TEST_SAY("Produced %d messages to each topic\n", msg_count); + + /* Create share consumer */ + test_conf_init(&cons_conf, NULL, 60); + rd_kafka_conf_set(cons_conf, "share.consumer", "true", errstr, + sizeof(errstr)); + rd_kafka_conf_set(cons_conf, "group.protocol", "consumer", errstr, + sizeof(errstr)); + rd_kafka_conf_set(cons_conf, "group.id", group, errstr, sizeof(errstr)); + rd_kafka_conf_set(cons_conf, "enable.auto.commit", "false", errstr, + sizeof(errstr)); + + consumer = + rd_kafka_new(RD_KAFKA_CONSUMER, cons_conf, errstr, sizeof(errstr)); + TEST_ASSERT(consumer, "Failed to create consumer: %s", errstr); + + /* Subscribe to first topic */ + topics = rd_kafka_topic_partition_list_new(1); + rd_kafka_topic_partition_list_add(topics, topic1, + RD_KAFKA_PARTITION_UA); + rd_kafka_subscribe(consumer, topics); + rd_kafka_topic_partition_list_destroy(topics); + + TEST_SAY("Subscribed to first topic: %s\n", topic1); + + /* Poll from first topic */ + rd_kafka_message_t **msgs = malloc(sizeof(rd_kafka_message_t *) * 10); + size_t rcvd_msgs = 0; + int topic1_count = 0; + + rd_kafka_error_t *error = + rd_kafka_share_consume_batch(consumer, 10000, msgs, &rcvd_msgs); + + if (!error && rcvd_msgs > 0) { + for (int i = 0; i < (int)rcvd_msgs; i++) { + if (!msgs[i]->err) { + topic1_count++; + } + rd_kafka_message_destroy(msgs[i]); + } + TEST_SAY("Consumed %d messages from topic1\n", topic1_count); + } else if (error) { + rd_kafka_error_destroy(error); + } + + /* Subscribe to second topic */ + TEST_SAY("Resubscribing to second topic: %s\n", topic2); + topics = rd_kafka_topic_partition_list_new(1); + rd_kafka_topic_partition_list_add(topics, topic2, + RD_KAFKA_PARTITION_UA); + rd_kafka_subscribe(consumer, topics); + rd_kafka_topic_partition_list_destroy(topics); + + /* Poll from second topic */ + rcvd_msgs = 0; + int topic2_count = 0; + + error = rd_kafka_share_consume_batch(consumer, 10000, msgs, &rcvd_msgs); + + if (!error && rcvd_msgs > 0) { + for (int i = 0; i < (int)rcvd_msgs; i++) { + if (!msgs[i]->err) { + topic2_count++; + } + rd_kafka_message_destroy(msgs[i]); + } + TEST_SAY("Consumed %d messages from topic2\n", topic2_count); + } else if (error) { + rd_kafka_error_destroy(error); + } + + TEST_SAY( + "✓ Successfully resubscribed and consumed from different topics\n"); + + test_delete_topic(consumer, topic1); + test_delete_topic(consumer, topic2); + + free(msgs); + rd_kafka_destroy(consumer); +} + +/** + * @brief Test subscribe, unsubscribe, then poll fails + */ +static void test_subscribe_unsubscribe_poll_fails(void) { + char errstr[512]; + rd_kafka_conf_t *cons_conf; + rd_kafka_t *consumer; + rd_kafka_topic_partition_list_t *topics; + const char *topic = "0154-share-unsub-fail"; + const char *group = "share-group-unsub-fail"; + + TEST_SAY("=== Testing subscribe, unsubscribe, then poll fails ===\n"); + + /* Create topic */ + test_create_topic_wait_exists(NULL, topic, 1, -1, 60 * 1000); + test_produce_msgs_easy(topic, 0, 0, 3); + + /* Create share consumer */ + test_conf_init(&cons_conf, NULL, 60); + rd_kafka_conf_set(cons_conf, "share.consumer", "true", errstr, + sizeof(errstr)); + rd_kafka_conf_set(cons_conf, "group.protocol", "consumer", errstr, + sizeof(errstr)); + rd_kafka_conf_set(cons_conf, "group.id", group, errstr, sizeof(errstr)); + rd_kafka_conf_set(cons_conf, "enable.auto.commit", "false", errstr, + sizeof(errstr)); + + consumer = + rd_kafka_new(RD_KAFKA_CONSUMER, cons_conf, errstr, sizeof(errstr)); + TEST_ASSERT(consumer, "Failed to create consumer: %s", errstr); + + /* Subscribe to topic */ + topics = rd_kafka_topic_partition_list_new(1); + rd_kafka_topic_partition_list_add(topics, topic, RD_KAFKA_PARTITION_UA); + rd_kafka_subscribe(consumer, topics); + rd_kafka_topic_partition_list_destroy(topics); + + TEST_SAY("Subscribed to topic: %s\n", topic); + + /* Immediately unsubscribe */ + TEST_SAY("Unsubscribing immediately\n"); + rd_kafka_resp_err_t err = rd_kafka_unsubscribe(consumer); + TEST_ASSERT(!err, "Failed to unsubscribe: %s", rd_kafka_err2str(err)); + + /* Try to poll - should fail */ + TEST_SAY("Attempting to poll after unsubscribe\n"); + rd_kafka_message_t **msgs = malloc(sizeof(rd_kafka_message_t *) * 10); + size_t rcvd_msgs = 0; + + rd_kafka_error_t *error = + rd_kafka_share_consume_batch(consumer, 2000, msgs, &rcvd_msgs); + + /** + * TODO KIP-932: Uncomment once polling before any subscription is + * properly handled + */ + // TEST_ASSERT(error, "Expected poll to fail after unsubscribe, but it + // succeeded"); + + test_delete_topic(consumer, topic); + + free(msgs); + rd_kafka_destroy(consumer); +} + +int main_0170_share_consumer_subscription(int argc, char **argv) { + test_subscription_introspection(); + test_unsubscribe_idempotence(); + test_resubscribe_replaces_set(); + test_duplicate_subscribe_idempotent(); + test_subscribe_nonexistent_then_create(); + test_unsubscribe_then_subscribe_new_topics(); + test_resubscribe_switch_topics(); + test_poll_no_subscribe_fails(); + test_subscribe_and_poll_no_records(); + test_subscribe_poll_unsubscribe(); + test_subscribe_poll_subscribe(); + test_subscribe_unsubscribe_poll_fails(); + return 0; +} \ No newline at end of file diff --git a/tests/0154-share_consumer.c b/tests/0171-share_consumer_consume.c similarity index 69% rename from tests/0154-share_consumer.c rename to tests/0171-share_consumer_consume.c index 232a4d7c67..d9bbc7ef19 100644 --- a/tests/0154-share_consumer.c +++ b/tests/0171-share_consumer_consume.c @@ -28,321 +28,6 @@ #include "test.h" -/** - * @brief Test that polling without subscription fails - */ -static void test_poll_no_subscribe_fails(void) { - char errstr[512]; - rd_kafka_conf_t *cons_conf; - rd_kafka_t *consumer; - char *group = "share-group-no-subscribe"; - - TEST_SAY("=== Testing poll without subscription fails ===\n"); - - /* Create share consumer */ - test_conf_init(&cons_conf, NULL, 60); - rd_kafka_conf_set(cons_conf, "share.consumer", "true", errstr, sizeof(errstr)); - rd_kafka_conf_set(cons_conf, "group.protocol", "consumer", errstr, sizeof(errstr)); - rd_kafka_conf_set(cons_conf, "group.id", group, errstr, sizeof(errstr)); - rd_kafka_conf_set(cons_conf, "enable.auto.commit", "false", errstr, sizeof(errstr)); - - consumer = rd_kafka_new(RD_KAFKA_CONSUMER, cons_conf, errstr, sizeof(errstr)); - TEST_ASSERT(consumer, "Failed to create consumer: %s", errstr); - - /* Try to poll without subscribing - should fail or return timeout */ - TEST_SAY("Attempting to poll without subscription\n"); - rd_kafka_message_t **msgs = malloc(sizeof(rd_kafka_message_t *) * 10); - size_t rcvd_msgs = 0; - - rd_kafka_error_t *error = rd_kafka_share_consume_batch(consumer, 2000, msgs, &rcvd_msgs); - - /** - * TODO KIP-932: Uncomment once polling before any subscription is properly handled - */ - //TEST_ASSERT(error, "Expected poll to fail after unsubscribe, but it succeeded"); - - free(msgs); - rd_kafka_destroy(consumer); -} - -/** - * @brief Test subscribe and poll with no records available - */ -static void test_subscribe_and_poll_no_records(void) { - char errstr[512]; - rd_kafka_conf_t *cons_conf; - rd_kafka_t *consumer; - rd_kafka_topic_partition_list_t *topics; - const char *topic = "0154-share-empty-records"; - const char *group = "share-group-empty"; - - TEST_SAY("=== Testing subscribe and poll with no records ===\n"); - - /* Create empty topic (no messages produced) */ - test_create_topic_wait_exists(NULL, topic, 1, -1, 60 * 1000); - TEST_SAY("Created empty topic: %s\n", topic); - - /* Create share consumer */ - test_conf_init(&cons_conf, NULL, 60); - rd_kafka_conf_set(cons_conf, "share.consumer", "true", errstr, sizeof(errstr)); - rd_kafka_conf_set(cons_conf, "group.protocol", "consumer", errstr, sizeof(errstr)); - rd_kafka_conf_set(cons_conf, "group.id", group, errstr, sizeof(errstr)); - rd_kafka_conf_set(cons_conf, "enable.auto.commit", "false", errstr, sizeof(errstr)); - - consumer = rd_kafka_new(RD_KAFKA_CONSUMER, cons_conf, errstr, sizeof(errstr)); - TEST_ASSERT(consumer, "Failed to create consumer: %s", errstr); - - /* Subscribe to empty topic */ - topics = rd_kafka_topic_partition_list_new(1); - rd_kafka_topic_partition_list_add(topics, topic, RD_KAFKA_PARTITION_UA); - rd_kafka_subscribe(consumer, topics); - rd_kafka_topic_partition_list_destroy(topics); - - TEST_SAY("Subscribed to empty topic, polling for messages\n"); - - /* Poll for messages - should get none */ - rd_kafka_message_t **msgs = malloc(sizeof(rd_kafka_message_t *) * 10); - size_t rcvd_msgs = 0; - - rd_kafka_error_t *error = rd_kafka_share_consume_batch(consumer, 5000, msgs, &rcvd_msgs); - - TEST_ASSERT(rcvd_msgs == 0, "Should not receive messages from empty topic"); - TEST_SAY("✓ No messages received from empty topic (expected)\n"); - - test_delete_topic(consumer, topic); - - free(msgs); - rd_kafka_destroy(consumer); -} - -/** - * @brief Test subscribe, poll, then unsubscribe - */ -static void test_subscribe_poll_unsubscribe(void) { - char errstr[512]; - rd_kafka_conf_t *cons_conf; - rd_kafka_t *consumer; - rd_kafka_topic_partition_list_t *topics; - const char *topic = "0154-share-unsub"; - const char *group = "share-group-unsub"; - const int msg_count = 5; - - TEST_SAY("=== Testing subscribe, poll, then unsubscribe ===\n"); - - /* Create topic and produce messages */ - test_create_topic_wait_exists(NULL, topic, 1, -1, 60 * 1000); - test_produce_msgs_easy(topic, 0, 0, msg_count); - TEST_SAY("Produced %d messages\n", msg_count); - - /* Create share consumer */ - test_conf_init(&cons_conf, NULL, 60); - rd_kafka_conf_set(cons_conf, "share.consumer", "true", errstr, sizeof(errstr)); - rd_kafka_conf_set(cons_conf, "group.protocol", "consumer", errstr, sizeof(errstr)); - rd_kafka_conf_set(cons_conf, "group.id", group, errstr, sizeof(errstr)); - rd_kafka_conf_set(cons_conf, "enable.auto.commit", "false", errstr, sizeof(errstr)); - - consumer = rd_kafka_new(RD_KAFKA_CONSUMER, cons_conf, errstr, sizeof(errstr)); - TEST_ASSERT(consumer, "Failed to create consumer: %s", errstr); - - /* Subscribe to topic */ - topics = rd_kafka_topic_partition_list_new(1); - rd_kafka_topic_partition_list_add(topics, topic, RD_KAFKA_PARTITION_UA); - rd_kafka_subscribe(consumer, topics); - rd_kafka_topic_partition_list_destroy(topics); - - TEST_SAY("Subscribed to topic, consuming messages\n"); - - /* Poll for some messages */ - rd_kafka_message_t **msgs = malloc(sizeof(rd_kafka_message_t *) * 10); - size_t rcvd_msgs = 0; - int consumed_count = 0; - - rd_kafka_error_t *error = rd_kafka_share_consume_batch(consumer, 10000, msgs, &rcvd_msgs); - - if (!error && rcvd_msgs > 0) { - for (int i = 0; i < (int)rcvd_msgs; i++) { - if (!msgs[i]->err) { - consumed_count++; - } - rd_kafka_message_destroy(msgs[i]); - } - TEST_SAY("Consumed %d messages before unsubscribe\n", consumed_count); - } else if (error) { - rd_kafka_error_destroy(error); - } - - /* Unsubscribe from all topics */ - TEST_SAY("Unsubscribing from all topics\n"); - rd_kafka_resp_err_t err = rd_kafka_unsubscribe(consumer); - TEST_ASSERT(!err, "Failed to unsubscribe: %s", rd_kafka_err2str(err)); - - /* Try to poll after unsubscribe - should fail or get no messages */ - TEST_SAY("Attempting to poll after unsubscribe\n"); - rcvd_msgs = 0; - error = rd_kafka_share_consume_batch(consumer, 2000, msgs, &rcvd_msgs); - - /** - * TODO KIP-932: Uncomment once polling before any subscription is properly handled - */ - //TEST_ASSERT(error, "Expected poll to fail after unsubscribe, but it succeeded"); - - test_delete_topic(consumer, topic); - - free(msgs); - rd_kafka_destroy(consumer); -} - -/** - * @brief Test subscribe, poll, then subscribe to different topic - */ -static void test_subscribe_poll_subscribe(void) { - char errstr[512]; - rd_kafka_conf_t *cons_conf; - rd_kafka_t *consumer; - rd_kafka_topic_partition_list_t *topics; - char *topic1 = "test-topic-0154-share-sub1"; - char *topic2 = "test-topic-0154-share-sub2"; - char *group = "share-group-resub"; - const int msg_count = 3; - - TEST_SAY("=== Testing subscribe, poll, then resubscribe ===\n"); - - /* Create topics and produce messages */ - test_create_topic_wait_exists(NULL, topic1, 1, -1, 60 * 1000); - test_create_topic_wait_exists(NULL, topic2, 1, -1, 60 * 1000); - - test_produce_msgs_easy(topic1, 0, 0, msg_count); - test_produce_msgs_easy(topic2, 0, 0, msg_count); - TEST_SAY("Produced %d messages to each topic\n", msg_count); - - /* Create share consumer */ - test_conf_init(&cons_conf, NULL, 60); - rd_kafka_conf_set(cons_conf, "share.consumer", "true", errstr, sizeof(errstr)); - rd_kafka_conf_set(cons_conf, "group.protocol", "consumer", errstr, sizeof(errstr)); - rd_kafka_conf_set(cons_conf, "group.id", group, errstr, sizeof(errstr)); - rd_kafka_conf_set(cons_conf, "enable.auto.commit", "false", errstr, sizeof(errstr)); - - consumer = rd_kafka_new(RD_KAFKA_CONSUMER, cons_conf, errstr, sizeof(errstr)); - TEST_ASSERT(consumer, "Failed to create consumer: %s", errstr); - - /* Subscribe to first topic */ - topics = rd_kafka_topic_partition_list_new(1); - rd_kafka_topic_partition_list_add(topics, topic1, RD_KAFKA_PARTITION_UA); - rd_kafka_subscribe(consumer, topics); - rd_kafka_topic_partition_list_destroy(topics); - - TEST_SAY("Subscribed to first topic: %s\n", topic1); - - /* Poll from first topic */ - rd_kafka_message_t **msgs = malloc(sizeof(rd_kafka_message_t *) * 10); - size_t rcvd_msgs = 0; - int topic1_count = 0; - - rd_kafka_error_t *error = rd_kafka_share_consume_batch(consumer, 10000, msgs, &rcvd_msgs); - - if (!error && rcvd_msgs > 0) { - for (int i = 0; i < (int)rcvd_msgs; i++) { - if (!msgs[i]->err) { - topic1_count++; - } - rd_kafka_message_destroy(msgs[i]); - } - TEST_SAY("Consumed %d messages from topic1\n", topic1_count); - } else if (error) { - rd_kafka_error_destroy(error); - } - - /* Subscribe to second topic */ - TEST_SAY("Resubscribing to second topic: %s\n", topic2); - topics = rd_kafka_topic_partition_list_new(1); - rd_kafka_topic_partition_list_add(topics, topic2, RD_KAFKA_PARTITION_UA); - rd_kafka_subscribe(consumer, topics); - rd_kafka_topic_partition_list_destroy(topics); - - /* Poll from second topic */ - rcvd_msgs = 0; - int topic2_count = 0; - - error = rd_kafka_share_consume_batch(consumer, 10000, msgs, &rcvd_msgs); - - if (!error && rcvd_msgs > 0) { - for (int i = 0; i < (int)rcvd_msgs; i++) { - if (!msgs[i]->err) { - topic2_count++; - } - rd_kafka_message_destroy(msgs[i]); - } - TEST_SAY("Consumed %d messages from topic2\n", topic2_count); - } else if (error) { - rd_kafka_error_destroy(error); - } - - TEST_SAY("✓ Successfully resubscribed and consumed from different topics\n"); - - test_delete_topic(consumer, topic1); - test_delete_topic(consumer, topic2); - - free(msgs); - rd_kafka_destroy(consumer); -} - -/** - * @brief Test subscribe, unsubscribe, then poll fails - */ -static void test_subscribe_unsubscribe_poll_fails(void) { - char errstr[512]; - rd_kafka_conf_t *cons_conf; - rd_kafka_t *consumer; - rd_kafka_topic_partition_list_t *topics; - const char *topic = "0154-share-unsub-fail"; - const char *group = "share-group-unsub-fail"; - - TEST_SAY("=== Testing subscribe, unsubscribe, then poll fails ===\n"); - - /* Create topic */ - test_create_topic_wait_exists(NULL, topic, 1, -1, 60 * 1000); - test_produce_msgs_easy(topic, 0, 0, 3); - - /* Create share consumer */ - test_conf_init(&cons_conf, NULL, 60); - rd_kafka_conf_set(cons_conf, "share.consumer", "true", errstr, sizeof(errstr)); - rd_kafka_conf_set(cons_conf, "group.protocol", "consumer", errstr, sizeof(errstr)); - rd_kafka_conf_set(cons_conf, "group.id", group, errstr, sizeof(errstr)); - rd_kafka_conf_set(cons_conf, "enable.auto.commit", "false", errstr, sizeof(errstr)); - - consumer = rd_kafka_new(RD_KAFKA_CONSUMER, cons_conf, errstr, sizeof(errstr)); - TEST_ASSERT(consumer, "Failed to create consumer: %s", errstr); - - /* Subscribe to topic */ - topics = rd_kafka_topic_partition_list_new(1); - rd_kafka_topic_partition_list_add(topics, topic, RD_KAFKA_PARTITION_UA); - rd_kafka_subscribe(consumer, topics); - rd_kafka_topic_partition_list_destroy(topics); - - TEST_SAY("Subscribed to topic: %s\n", topic); - - /* Immediately unsubscribe */ - TEST_SAY("Unsubscribing immediately\n"); - rd_kafka_resp_err_t err = rd_kafka_unsubscribe(consumer); - TEST_ASSERT(!err, "Failed to unsubscribe: %s", rd_kafka_err2str(err)); - - /* Try to poll - should fail */ - TEST_SAY("Attempting to poll after unsubscribe\n"); - rd_kafka_message_t **msgs = malloc(sizeof(rd_kafka_message_t *) * 10); - size_t rcvd_msgs = 0; - - rd_kafka_error_t *error = rd_kafka_share_consume_batch(consumer, 2000, msgs, &rcvd_msgs); - - /** - * TODO KIP-932: Uncomment once polling before any subscription is properly handled - */ - //TEST_ASSERT(error, "Expected poll to fail after unsubscribe, but it succeeded"); - - test_delete_topic(consumer, topic); - - free(msgs); - rd_kafka_destroy(consumer); -} /** * @brief Test producing and consuming 10 messages @@ -999,14 +684,82 @@ static void test_share_multi_consumers_multi_partitions_multi_topics(void) { } } +static void test_batch_all_partitions_arrive_together(void) { + char errstr[512]; + const char *group = "share-group-batch-all"; + const char *topic = test_mk_topic_name("0154-share-batch-all", 1); + const int partition_cnt = 3; + const int msgs_per_partition = 5; + const int total_msgs = partition_cnt * msgs_per_partition; + + TEST_SAY("=== Expect single batch of %d msgs across %d partitions ===\n", + total_msgs, partition_cnt); + + /* Create topic with 3 partitions and produce 5 msgs per partition */ + test_create_topic_wait_exists(NULL, topic, partition_cnt, -1, 60 * 1000); + for (int p = 0; p < partition_cnt; p++) + test_produce_msgs_easy(topic, p, p, msgs_per_partition); + + /* Create share consumer */ + rd_kafka_conf_t *conf; + test_conf_init(&conf, NULL, 60); + rd_kafka_conf_set(conf, "share.consumer", "true", errstr, sizeof(errstr)); + rd_kafka_conf_set(conf, "group.protocol", "consumer", errstr, sizeof(errstr)); + rd_kafka_conf_set(conf, "group.id", group, errstr, sizeof(errstr)); + rd_kafka_conf_set(conf, "enable.auto.commit", "false", errstr, sizeof(errstr)); + rd_kafka_t *consumer = rd_kafka_new(RD_KAFKA_CONSUMER, conf, errstr, sizeof(errstr)); + TEST_ASSERT(consumer, "Failed to create consumer: %s", errstr); + + /* Read from beginning */ + const char *grp_conf[] = {"share.auto.offset.reset","SET","earliest"}; + test_IncrementalAlterConfigs_simple(consumer, RD_KAFKA_RESOURCE_GROUP, group, grp_conf, 1); + + /* Subscribe */ + rd_kafka_topic_partition_list_t *subs = rd_kafka_topic_partition_list_new(1); + rd_kafka_topic_partition_list_add(subs, topic, RD_KAFKA_PARTITION_UA); + TEST_ASSERT(!rd_kafka_subscribe(consumer, subs), "subscribe failed"); + rd_kafka_topic_partition_list_destroy(subs); + + /* Consume once, expect all 15 messages in single batch */ + rd_kafka_message_t *msgs[32]; + size_t rcvd = 0; + int counts[partition_cnt]; + memset(counts, 0, sizeof(counts)); + + rd_kafka_error_t *err = rd_kafka_share_consume_batch(consumer, 10000, msgs, &rcvd); + TEST_ASSERT(!err, "Consume error: %s", err ? rd_kafka_error_string(err) : ""); + if (err) rd_kafka_error_destroy(err); + + TEST_SAY("Received %zu messages in one batch\n", rcvd); + TEST_ASSERT(rcvd == (size_t)total_msgs, + "Expected %d messages in single batch, got %zu", + total_msgs, rcvd); + + /* Verify 5 per partition, destroy messages */ + for (size_t i = 0; i < rcvd; i++) { + rd_kafka_message_t *m = msgs[i]; + TEST_ASSERT(!m->err, "Message error: %s", rd_kafka_message_errstr(m)); + int p = m->partition; + TEST_ASSERT(p >= 0 && p < partition_cnt, "Unexpected partition %d", p); + counts[p]++; + rd_kafka_message_destroy(m); + } + for (int p = 0; p < partition_cnt; p++) { + TEST_ASSERT(counts[p] == msgs_per_partition, + "Partition %d expected %d msgs, got %d", + p, msgs_per_partition, counts[p]); + } + + TEST_SAY("✓ Single batch contained all %d msgs (5 per partition)\n", total_msgs); + + test_delete_topic(consumer, topic); + rd_kafka_consumer_close(consumer); + rd_kafka_destroy(consumer); +} + -int main_0154_share_consumer(int argc, char **argv) { +int main_0171_share_consumer_consume(int argc, char **argv) { - test_poll_no_subscribe_fails(); - test_subscribe_and_poll_no_records(); - test_subscribe_poll_unsubscribe(); - test_subscribe_poll_subscribe(); - test_subscribe_unsubscribe_poll_fails(); test_share_consumer_messages(); test_share_consumer_multiple_topics(); test_share_consumer_multi_members_same_topic(); @@ -1014,5 +767,7 @@ int main_0154_share_consumer(int argc, char **argv) { test_share_single_consumer_multi_partitions_multi_topics(); test_share_multi_consumers_multi_partitions_one_topic(); test_share_multi_consumers_multi_partitions_multi_topics(); + /* Uncomment it after fixing the issue */ + //test_batch_all_partitions_arrive_together(); return 0; } diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index b5e6f0bfaf..ca5b598c4b 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -143,7 +143,8 @@ set( 0151-purge-brokers.c 0152-rebootstrap.c 0153-memberid.c - 0154-share-consumer.c + 0170-share_consumer_subscription.c + 0171-share_consumer_consume.c 8000-idle.cpp 8001-fetch_from_follower_mock_manual.c test.c diff --git a/tests/test.c b/tests/test.c index d0f9dafa68..7646de63c5 100644 --- a/tests/test.c +++ b/tests/test.c @@ -270,7 +270,8 @@ _TEST_DECL(0150_telemetry_mock); _TEST_DECL(0151_purge_brokers_mock); _TEST_DECL(0152_rebootstrap_local); _TEST_DECL(0153_memberid); -_TEST_DECL(0154_share_consumer); +_TEST_DECL(0170_share_consumer_subscription); +_TEST_DECL(0171_share_consumer_consume); /* Manual tests */ _TEST_DECL(8000_idle); @@ -537,7 +538,8 @@ struct test tests[] = { _TEST(0151_purge_brokers_mock, TEST_F_LOCAL), _TEST(0152_rebootstrap_local, TEST_F_LOCAL), _TEST(0153_memberid, 0, TEST_BRKVER(0, 4, 0, 0)), - _TEST(0154_share_consumer, 0, TEST_BRKVER(0, 4, 0, 0)), + _TEST(0170_share_consumer_subscription, 0, TEST_BRKVER(0, 4, 0, 0)), + _TEST(0171_share_consumer_consume, 0, TEST_BRKVER(0, 4, 0, 0)), /* Manual tests */ diff --git a/win32/tests/tests.vcxproj b/win32/tests/tests.vcxproj index 03d5825219..9c8f824a23 100644 --- a/win32/tests/tests.vcxproj +++ b/win32/tests/tests.vcxproj @@ -233,7 +233,8 @@ - + +