Skip to content

Commit c4f4ef1

Browse files
committed
KAFKA-19865: Document queues metrics changes in ops.html
1 parent 7ee5a2a commit c4f4ef1

File tree

5 files changed

+244
-1
lines changed

5 files changed

+244
-1
lines changed

build.gradle

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1221,6 +1221,13 @@ project(':core') {
12211221
standardOutput = new File(generatedDocsDir, "producer_metrics.html").newOutputStream()
12221222
}
12231223

1224+
task genShareConsumerMetricsDocs(type: JavaExec) {
1225+
classpath = sourceSets.main.runtimeClasspath
1226+
mainClass = 'org.apache.kafka.clients.consumer.internals.ShareConsumerMetrics'
1227+
if( !generatedDocsDir.exists() ) { generatedDocsDir.mkdirs() }
1228+
standardOutput = new File(generatedDocsDir, "share_consumer_metrics.html").newOutputStream()
1229+
}
1230+
12241231
task siteDocsTar(dependsOn: ['genProtocolErrorDocs', 'genProtocolTypesDocs', 'genProtocolApiKeyDocs', 'genProtocolMessageDocs',
12251232
'genAdminClientConfigDocs', 'genProducerConfigDocs', 'genConsumerConfigDocs',
12261233
'genKafkaConfigDocs', 'genTopicConfigDocs', 'genGroupConfigDocs',
@@ -1231,7 +1238,7 @@ project(':core') {
12311238
':connect:runtime:genConnectMetricsDocs', ':connect:runtime:genConnectOpenAPIDocs',
12321239
':connect:mirror:genMirrorSourceConfigDocs', ':connect:mirror:genMirrorCheckpointConfigDocs',
12331240
':connect:mirror:genMirrorHeartbeatConfigDocs', ':connect:mirror:genMirrorConnectorConfigDocs',
1234-
':storage:genRemoteLogManagerConfigDoc', ':storage:genRemoteLogMetadataManagerConfigDoc'], type: Tar) {
1241+
':storage:genRemoteLogManagerConfigDoc', ':storage:genRemoteLogMetadataManagerConfigDoc', 'genShareConsumerMetricsDocs'], type: Tar) {
12351242
archiveClassifier = 'site-docs'
12361243
compression = Compression.GZIP
12371244
from project.file("$rootDir/docs")

clients/src/main/java/org/apache/kafka/clients/consumer/internals/ShareConsumerMetrics.java

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,12 @@
1616
*/
1717
package org.apache.kafka.clients.consumer.internals;
1818

19+
import org.apache.kafka.common.MetricNameTemplate;
20+
import org.apache.kafka.common.metrics.Metrics;
21+
22+
import java.util.ArrayList;
1923
import java.util.HashSet;
24+
import java.util.List;
2025
import java.util.Set;
2126

2227
import static org.apache.kafka.clients.consumer.internals.ConsumerUtils.CONSUMER_SHARE_METRIC_GROUP_PREFIX;
@@ -31,4 +36,15 @@ public ShareConsumerMetrics(Set<String> metricsTags, String metricGrpPrefix) {
3136
public ShareConsumerMetrics() {
3237
this(new HashSet<>(), CONSUMER_SHARE_METRIC_GROUP_PREFIX);
3338
}
39+
40+
private List<MetricNameTemplate> getAllTemplates() {
41+
return new ArrayList<>(this.shareFetchMetrics.getAllTemplates());
42+
}
43+
44+
public static void main(String[] args) {
45+
Set<String> tags = new HashSet<>();
46+
tags.add("client-id");
47+
ShareConsumerMetrics metrics = new ShareConsumerMetrics(tags, CONSUMER_SHARE_METRIC_GROUP_PREFIX);
48+
System.out.println(Metrics.toHtmlTable("kafka.consumer", metrics.getAllTemplates()));
49+
}
3450
}

clients/src/main/java/org/apache/kafka/clients/consumer/internals/ShareFetchMetricsRegistry.java

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
import org.apache.kafka.common.MetricNameTemplate;
2020

2121
import java.util.HashSet;
22+
import java.util.List;
2223
import java.util.Set;
2324

2425
public class ShareFetchMetricsRegistry {
@@ -98,4 +99,27 @@ public ShareFetchMetricsRegistry(Set<String> tags, String metricGrpPrefix) {
9899
this.fetchThrottleTimeMax = new MetricNameTemplate("fetch-throttle-time-max", groupName,
99100
"The maximum throttle time in ms", tags);
100101
}
102+
103+
public List<MetricNameTemplate> getAllTemplates() {
104+
return List.of(
105+
fetchSizeAvg,
106+
fetchSizeMax,
107+
bytesFetchedRate,
108+
bytesFetchedTotal,
109+
recordsPerRequestAvg,
110+
recordsPerRequestMax,
111+
recordsFetchedRate,
112+
recordsFetchedTotal,
113+
acknowledgementSendRate,
114+
acknowledgementSendTotal,
115+
acknowledgementErrorRate,
116+
acknowledgementErrorTotal,
117+
fetchLatencyAvg,
118+
fetchLatencyMax,
119+
fetchRequestRate,
120+
fetchRequestTotal,
121+
fetchThrottleTimeAvg,
122+
fetchThrottleTimeMax
123+
);
124+
}
101125
}

docs/ops.html

Lines changed: 195 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3980,6 +3980,201 @@ <h5 class="anchor-heading"><a id="kafka_streams_cache_monitoring" class="anchor-
39803980
</tbody>
39813981
</table>
39823982

3983+
<h4 class="anchor-heading"><a id="kafka_share_group_monitoring" class="anchor-link"></a><a href="#kafka_share_group_monitoring">Share Group Monitoring</a></h4>
3984+
The following set of metrics are available for monitoring the share group:<br/><br/>
3985+
<table class="data-table">
3986+
<tbody><tr>
3987+
<th>Metric/Attribute name</th>
3988+
<th>Mbean name</th>
3989+
<th>Description</th>
3990+
</tr>
3991+
<tr>
3992+
<td>TotalShareFetchRequestsPerSec</td>
3993+
<td>kafka.server:type=BrokerTopicMetrics,name=TotalShareFetchRequestsPerSec,topic=([-.\w]+)</td>
3994+
<td>The fetch request rate per second.</td>
3995+
</tr>
3996+
<tr>
3997+
<td>FailedShareFetchRequestsPerSec</td>
3998+
<td>kafka.server:type=BrokerTopicMetrics,name=FailedShareFetchRequestsPerSec,topic=([-.\w]+)</td>
3999+
<td>The share fetch request rate for requests that failed.</td>
4000+
</tr>
4001+
<tr>
4002+
<td>TotalShareAcknowledgementRequestsPerSec</td>
4003+
<td>kafka.server:type=BrokerTopicMetrics,name=TotalShareAcknowledgementRequestsPerSec,topic=([-.\w]+)</td>
4004+
<td>The acknowledgement request rate per second.</td>
4005+
</tr>
4006+
<tr>
4007+
<td>FailedShareAcknowledgementRequestsPerSec</td>
4008+
<td>kafka.server:type=BrokerTopicMetrics,name=FailedShareAcknowledgementRequestsPerSec,topic=([-.\w]+)</td>
4009+
<td>The share acknowledgement request rate for requests that failed.</td>
4010+
</tr>
4011+
<tr>
4012+
<td>RecordAcknowledgementsPerSec</td>
4013+
<td>kafka.server:type=ShareGroupMetrics,name=RecordAcknowledgementsPerSec,ackType={Accept|Release|Reject|Renew}</td>
4014+
<td>The rate per second of records acknowledged per acknowledgement type.</td>
4015+
</tr>
4016+
<tr>
4017+
<td>PartitionLoadTimeMs</td>
4018+
<td>kafka.server:type=ShareGroupMetrics,name=PartitionLoadTimeMs</td>
4019+
<td>The time taken to load the share partitions.</td>
4020+
</tr>
4021+
<tr>
4022+
<td>RequestTopicPartitionsFetchRatio</td>
4023+
<td>kafka.server:type=ShareGroupMetrics,name=RequestTopicPartitionsFetchRatio,group=([-.\w]+)</td>
4024+
<td>The ratio of topic-partitions acquired to the total number of topic-partitions in share fetch request.</td>
4025+
</tr>
4026+
<tr>
4027+
<td>TopicPartitionsAcquireTimeMs</td>
4028+
<td>kafka.server:type=ShareGroupMetrics,name=TopicPartitionsAcquireTimeMs,group=([-.\w]+)</td>
4029+
<td>The time elapsed (in millisecond) to acquire any topic partition for fetch.</td>
4030+
</tr>
4031+
<tr>
4032+
<td>AcquisitionLockTimeoutPerSec</td>
4033+
<td>kafka.server:type=SharePartitionMetrics,name=AcquisitionLockTimeoutPerSec,group=([-.\w]+),topic=([-.\w]+),partition=([0-9]+)</td>
4034+
<td>The rate of acquisition locks for records which are not acknowledged within the timeout.</td>
4035+
</tr>
4036+
<tr>
4037+
<td>InFlightMessageCount</td>
4038+
<td>kafka.server:type=SharePartitionMetrics,name=InFlightMessageCount,group=([-.\w]+),topic=([-.\w]+),partition=([0-9]+)</td>
4039+
<td>The number of in-flight messages for the share partition.</td>
4040+
</tr>
4041+
<tr>
4042+
<td>InFlightBatchCount</td>
4043+
<td>kafka.server:type=SharePartitionMetrics,name=InFlightBatchCount,group=([-.\w]+),topic=([-.\w]+),partition=([0-9]+)</td>
4044+
<td>The number of in-flight batches for the share partition.</td>
4045+
</tr>
4046+
<tr>
4047+
<td>InFlightBatchMessageCount</td>
4048+
<td>kafka.server:type=SharePartitionMetrics,name=InFlightBatchMessageCount,group=([-.\w]+),topic=([-.\w]+),partition=([0-9]+)</td>
4049+
<td>The number of messages in the in-flight batch.</td>
4050+
</tr>
4051+
<tr>
4052+
<td>FetchLockTimeMs</td>
4053+
<td>kafka.server:type=SharePartitionMetrics,name=FetchLockTimeMs,group=([-.\w]+),topic=([-.\w]+),partition=([0-9]+)</td>
4054+
<td>The time elapsed (in milliseconds) while a share partition is held under lock for fetching messages.</td>
4055+
</tr>
4056+
<tr>
4057+
<td>FetchLockRatio</td>
4058+
<td>kafka.server:type=SharePartitionMetrics,name=FetchLockRatio,group=([-.\w]+),topic=([-.\w]+),partition=([0-9]+)</td>
4059+
<td>The fraction of time that share partition is held under lock.</td>
4060+
</tr>
4061+
<tr>
4062+
<td>ShareSessionEvictionsPerSec</td>
4063+
<td>kafka.server:type=ShareSessionCache,name=ShareSessionEvictionsPerSec</td>
4064+
<td>The share session eviction rate per second.</td>
4065+
</tr>
4066+
<tr>
4067+
<td>SharePartitionsCount</td>
4068+
<td>kafka.server:type=ShareSessionCache,name=SharePartitionsCount</td>
4069+
<td>The number of cached share partitions.</td>
4070+
</tr>
4071+
<tr>
4072+
<td>ShareSessionsCount</td>
4073+
<td>kafka.server:type=ShareSessionCache,name=ShareSessionsCount</td>
4074+
<td>The number of cached share sessions.</td>
4075+
</tr>
4076+
<tr>
4077+
<td>NumDelayedOperations (ShareFetch)</td>
4078+
<td>kafka.server:type=DelayedOperationPurgatory,name=NumDelayedOperations,delayedOperation=ShareFetch</td>
4079+
<td>The number of delayed operations for share fetch purgatory.</td>
4080+
</tr>
4081+
<tr>
4082+
<td>PurgatorySize (ShareFetch)</td>
4083+
<td>kafka.server:type=DelayedOperationPurgatory,name=PurgatorySize,delayedOperation=ShareFetch</td>
4084+
<td>The number of requests waiting in the share fetch purgatory. This is high if share consumers use a large value for fetch.wait.max.ms.</td>
4085+
</tr>
4086+
<tr>
4087+
<td>ExpiresPerSec</td>
4088+
<td>kafka.server:type=DelayedShareFetchMetrics,name=ExpiresPerSec</td>
4089+
<td>The expired delayed share fetch operation rate per second.</td>
4090+
</tr>
4091+
</tbody>
4092+
</table>
4093+
4094+
<h5 class="anchor-heading"><a id="kafka_share_coordinator_monitoring" class="anchor-link"></a><a href="#kafka_share_coordinator_monitoring">Coordinator Metrics</a></h5>
4095+
<table class="data-table">
4096+
<tbody><tr>
4097+
<th>Metric/Attribute name</th>
4098+
<th>Mbean name</th>
4099+
<th>Description</th>
4100+
</tr>
4101+
<tr>
4102+
<td>group-count</td>
4103+
<td>kafka.server:type=group-coordinator-metrics,name=group-count,protocol=share</td>
4104+
<td>The total number of share groups managed by group coordinator.</td>
4105+
</tr>
4106+
<tr>
4107+
<td>share-group-count</td>
4108+
<td>kafka.server:type=group-coordinator-metrics,name=share-group-count,state={Empty|Stable|Dead}</td>
4109+
<td>The number of share groups in respective state.</td>
4110+
</tr>
4111+
<tr>
4112+
<td>rebalance-total</td>
4113+
<td>kafka.server:type=group-coordinator-metrics,name=rebalance-total,protocol=share</td>
4114+
<td>The total number of share group rebalances count.</td>
4115+
</tr>
4116+
<tr>
4117+
<td>rebalance-rate-per-hour</td>
4118+
<td>kafka.server:type=group-coordinator-metrics,name=rebalance-rate-per-hour,protocol=share</td>
4119+
<td>The number of share group rebalances event per hour.</td>
4120+
</tr>
4121+
<tr>
4122+
<td>partition-load-time-max</td>
4123+
<td>kafka.server:type=share-coordinator-metrics,name=partition-load-time-max</td>
4124+
<td>The maximum time taken in milliseconds to load the share-group state from the share-group state partitions.</td>
4125+
</tr>
4126+
<tr>
4127+
<td>partition-load-time-avg</td>
4128+
<td>kafka.server:type=share-coordinator-metrics,name=partition-load-time-avg</td>
4129+
<td>The average time taken in milliseconds to load the share-group state from the share-group state partitions.</td>
4130+
</tr>
4131+
<tr>
4132+
<td>thread-idle-ratio-min</td>
4133+
<td>kafka.server:type=share-coordinator-metrics,name=thread-idle-ratio-min</td>
4134+
<td>The minimum fraction of time the share coordinator thread is idle.</td>
4135+
</tr>
4136+
<tr>
4137+
<td>thread-idle-ratio-avg</td>
4138+
<td>kafka.server:type=share-coordinator-metrics,name=thread-idle-ratio-avg</td>
4139+
<td>The average fraction of time the share coordinator thread is idle.</td>
4140+
</tr>
4141+
<tr>
4142+
<td>write-rate</td>
4143+
<td>kafka.server:type=share-coordinator-metrics,name=write-rate</td>
4144+
<td>The number of share-group state write calls per second.</td>
4145+
</tr>
4146+
<tr>
4147+
<td>write-total</td>
4148+
<td>kafka.server:type=share-coordinator-metrics,name=write-total</td>
4149+
<td>The total number of share-group state write calls.</td>
4150+
</tr>
4151+
<tr>
4152+
<td>write-latency-avg</td>
4153+
<td>kafka.server:type=share-coordinator-metrics,name=write-latency-avg</td>
4154+
<td>The average time taken for a share-group state write call, including the time to write to the share-group state topic.</td>
4155+
</tr>
4156+
<tr>
4157+
<td>write-latency-max</td>
4158+
<td>kafka.server:type=share-coordinator-metrics,name=write-latency-max</td>
4159+
<td>The maximum time taken for a share-group state write call, including the time to write to the share-group state topic.</td>
4160+
</tr>
4161+
<tr>
4162+
<td>num-partitions</td>
4163+
<td>kafka.server:type=share-coordinator-metrics,name=num-partitions,state={loading|active|failed}</td>
4164+
<td>The number of partitions in the share-state topic in each state.</td>
4165+
</tr>
4166+
<tr>
4167+
<td>last-pruned-offset</td>
4168+
<td>kafka.server:type=share-coordinator-metrics,name=last-pruned-offset,topic=([-.\w]+),partition=([0-9]+)</td>
4169+
<td>The offset at which the share-group state topic was last pruned.</td>
4170+
</tr>
4171+
</tbody>
4172+
</table>
4173+
4174+
<h5 class="anchor-heading"><a id="kafka_share_client_monitoring" class="anchor-link"></a><a href="#kafka_share_client_monitoring">Client Metrics</a></h5>
4175+
The following metrics are available on share consumer instances:<br/><br/>
4176+
<!--#include virtual="generated/share_consumer_metrics.html" -->
4177+
39834178
<h4 class="anchor-heading"><a id="others_monitoring" class="anchor-link"></a><a href="#others_monitoring">Others</a></h4>
39844179

39854180
We recommend monitoring GC time and other stats and various server stats such as CPU utilization, I/O service time, etc.

docs/toc.html

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,7 @@
162162
<li><a href="#consumer_monitoring">Consumer Monitoring</a>
163163
<li><a href="#connect_monitoring">Connect Monitoring</a>
164164
<li><a href="#kafka_streams_monitoring">Streams Monitoring</a>
165+
<li><a href="#kafka_share_group_monitoring">Share Group Monitoring</a>
165166
<li><a href="#others_monitoring">Others</a>
166167
</ul>
167168

0 commit comments

Comments
 (0)