Skip to content

Commit a5ec3ea

Browse files
authored
refactor(rpc): prefer monitored_connect over connect (#18635)
Signed-off-by: Bugen Zhao <[email protected]>
1 parent a898dcc commit a5ec3ea

File tree

11 files changed

+32
-282
lines changed

11 files changed

+32
-282
lines changed

clippy.toml

+2
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@ disallowed-methods = [
1313
{ path = "speedate::DateTime::parse_bytes_with_config", reason = "Please use `parse_bytes_rfc3339_with_config` instead." },
1414
{ path = "speedate::Date::parse_str", reason = "Please use `parse_str_rfc3339` instead." },
1515
{ path = "speedate::Date::parse_bytes", reason = "Please use `parse_bytes_rfc3339` instead." },
16+
{ path = "tonic::transport::Endpoint::connect", reason = "Please use `EndpointExt::monitored_connect` instead." },
17+
{ path = "tonic::transport::Endpoint::connect_lazy", reason = "Please use `EndpointExt::monitored_connect_lazy` instead." },
1618
]
1719
disallowed-types = [
1820
{ path = "num_traits::AsPrimitive", reason = "Please use `From` or `TryFrom` with `OrderedFloat` instead." },

src/common/metrics/src/monitor/connection.rs

+10
Original file line numberDiff line numberDiff line change
@@ -482,6 +482,16 @@ pub struct TcpConfig {
482482
pub keepalive_duration: Option<Duration>,
483483
}
484484

485+
#[allow(clippy::derivable_impls)]
486+
impl Default for TcpConfig {
487+
fn default() -> Self {
488+
Self {
489+
tcp_nodelay: false,
490+
keepalive_duration: None,
491+
}
492+
}
493+
}
494+
485495
pub fn monitor_connector<C>(
486496
connector: C,
487497
connection_type: impl Into<String>,

src/ctl/src/cmd_impl/hummock/tiered_cache_tracing.rs

+2-1
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ use std::time::Duration;
1616

1717
use futures::future::try_join_all;
1818
use itertools::Itertools;
19+
use risingwave_common::monitor::EndpointExt;
1920
use risingwave_pb::monitor_service::monitor_service_client::MonitorServiceClient;
2021
use risingwave_pb::monitor_service::TieredCacheTracingRequest;
2122
use tonic::transport::Endpoint;
@@ -40,7 +41,7 @@ pub async fn tiered_cache_tracing(
4041
let addr = worker_node.get_host().unwrap();
4142
let channel = Endpoint::from_shared(format!("http://{}:{}", addr.host, addr.port))?
4243
.connect_timeout(Duration::from_secs(5))
43-
.connect()
44+
.monitored_connect("grpc-tiered-cache-tracing-client", Default::default())
4445
.await?;
4546
let mut client = MonitorServiceClient::new(channel);
4647
client

src/rpc_client/src/compactor_client.rs

+8-7
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
use std::sync::Arc;
1616
use std::time::Duration;
1717

18+
use risingwave_common::monitor::EndpointExt;
1819
use risingwave_common::util::addr::HostAddr;
1920
use risingwave_pb::hummock::hummock_manager_service_client::HummockManagerServiceClient;
2021
use risingwave_pb::hummock::{
@@ -47,7 +48,7 @@ impl CompactorClient {
4748
pub async fn new(host_addr: HostAddr) -> Result<Self> {
4849
let channel = Endpoint::from_shared(format!("http://{}", &host_addr))?
4950
.connect_timeout(Duration::from_secs(5))
50-
.connect()
51+
.monitored_connect("grpc-compactor-client", Default::default())
5152
.await?;
5253
Ok(Self {
5354
monitor_client: MonitorServiceClient::new(channel),
@@ -94,26 +95,26 @@ pub struct GrpcCompactorProxyClient {
9495
}
9596

9697
impl GrpcCompactorProxyClient {
97-
pub fn new(channel: Channel, endpoint: String) -> Self {
98+
pub async fn new(endpoint: String) -> Self {
99+
let channel = Self::connect_to_endpoint(endpoint.clone()).await;
98100
let core = Arc::new(RwLock::new(GrpcCompactorProxyClientCore::new(channel)));
99101
Self { core, endpoint }
100102
}
101103

102104
async fn recreate_core(&self) {
103105
tracing::info!("GrpcCompactorProxyClient rpc transfer failed, try to reconnect");
104-
let channel = self.connect_to_endpoint().await;
106+
let channel = Self::connect_to_endpoint(self.endpoint.clone()).await;
105107
let mut core = self.core.write().await;
106108
*core = GrpcCompactorProxyClientCore::new(channel);
107109
}
108110

109-
async fn connect_to_endpoint(&self) -> Channel {
110-
let endpoint =
111-
Endpoint::from_shared(self.endpoint.clone()).expect("Fail to construct tonic Endpoint");
111+
async fn connect_to_endpoint(endpoint: String) -> Channel {
112+
let endpoint = Endpoint::from_shared(endpoint).expect("Fail to construct tonic Endpoint");
112113
endpoint
113114
.http2_keep_alive_interval(Duration::from_secs(ENDPOINT_KEEP_ALIVE_INTERVAL_SEC))
114115
.keep_alive_timeout(Duration::from_secs(ENDPOINT_KEEP_ALIVE_TIMEOUT_SEC))
115116
.connect_timeout(Duration::from_secs(5))
116-
.connect()
117+
.monitored_connect("grpc-compactor-proxy-client", Default::default())
117118
.await
118119
.expect("Failed to create channel via proxy rpc endpoint.")
119120
}

src/rpc_client/src/compute_client.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ impl ComputeClient {
7373
"grpc-compute-client",
7474
TcpConfig {
7575
tcp_nodelay: true,
76-
keepalive_duration: None,
76+
..Default::default()
7777
},
7878
)
7979
.await?;

src/rpc_client/src/connector_client.rs

+3-254
Original file line numberDiff line numberDiff line change
@@ -12,40 +12,18 @@
1212
// See the License for the specific language governing permissions and
1313
// limitations under the License.
1414

15-
use std::collections::BTreeMap;
16-
use std::fmt::Debug;
17-
use std::time::Duration;
18-
19-
use anyhow::{anyhow, Context};
20-
use futures::TryStreamExt;
21-
use risingwave_common::config::{MAX_CONNECTION_WINDOW_SIZE, STREAM_WINDOW_SIZE};
22-
use risingwave_common::monitor::{EndpointExt, TcpConfig};
23-
use risingwave_pb::connector_service::connector_service_client::ConnectorServiceClient;
24-
use risingwave_pb::connector_service::sink_coordinator_stream_request::{
25-
CommitMetadata, StartCoordinator,
26-
};
15+
use anyhow::anyhow;
16+
use risingwave_pb::connector_service::sink_coordinator_stream_request::CommitMetadata;
2717
use risingwave_pb::connector_service::sink_writer_stream_request::write_batch::Payload;
2818
use risingwave_pb::connector_service::sink_writer_stream_request::{
29-
Barrier, Request as SinkRequest, StartSink, WriteBatch,
19+
Barrier, Request as SinkRequest, WriteBatch,
3020
};
3121
use risingwave_pb::connector_service::sink_writer_stream_response::CommitResponse;
3222
use risingwave_pb::connector_service::*;
33-
use risingwave_pb::plan_common::column_desc::GeneratedOrDefaultColumn;
34-
use thiserror_ext::AsReport;
35-
use tokio_stream::wrappers::ReceiverStream;
36-
use tonic::transport::{Channel, Endpoint};
37-
use tonic::Streaming;
38-
use tracing::error;
3923

4024
use crate::error::{Result, RpcError};
4125
use crate::{BidiStreamHandle, BidiStreamReceiver, BidiStreamSender};
4226

43-
#[derive(Clone, Debug)]
44-
pub struct ConnectorClient {
45-
rpc_client: ConnectorServiceClient<Channel>,
46-
endpoint: String,
47-
}
48-
4927
pub type SinkWriterRequestSender<REQ = SinkWriterStreamRequest> = BidiStreamSender<REQ>;
5028
pub type SinkWriterResponseReceiver = BidiStreamReceiver<SinkWriterStreamResponse>;
5129

@@ -143,232 +121,3 @@ impl SinkCoordinatorStreamHandle {
143121
}
144122
}
145123
}
146-
147-
impl ConnectorClient {
148-
pub async fn try_new(connector_endpoint: Option<&String>) -> Option<Self> {
149-
match connector_endpoint {
150-
None => None,
151-
Some(connector_endpoint) => match ConnectorClient::new(connector_endpoint).await {
152-
Ok(client) => Some(client),
153-
Err(e) => {
154-
error!(
155-
endpoint = connector_endpoint,
156-
error = %e.as_report(),
157-
"invalid connector endpoint",
158-
);
159-
None
160-
}
161-
},
162-
}
163-
}
164-
165-
#[allow(clippy::unused_async)]
166-
pub async fn new(connector_endpoint: &String) -> Result<Self> {
167-
let endpoint = Endpoint::from_shared(format!("http://{}", connector_endpoint))
168-
.with_context(|| format!("invalid connector endpoint `{}`", connector_endpoint))?
169-
.initial_connection_window_size(MAX_CONNECTION_WINDOW_SIZE)
170-
.initial_stream_window_size(STREAM_WINDOW_SIZE)
171-
.connect_timeout(Duration::from_secs(5));
172-
173-
let channel = {
174-
#[cfg(madsim)]
175-
{
176-
endpoint.connect().await?
177-
}
178-
#[cfg(not(madsim))]
179-
{
180-
endpoint.monitored_connect_lazy(
181-
"grpc-connector-client",
182-
TcpConfig {
183-
tcp_nodelay: true,
184-
keepalive_duration: None,
185-
},
186-
)
187-
}
188-
};
189-
Ok(Self {
190-
rpc_client: ConnectorServiceClient::new(channel).max_decoding_message_size(usize::MAX),
191-
endpoint: connector_endpoint.to_string(),
192-
})
193-
}
194-
195-
pub fn endpoint(&self) -> &String {
196-
&self.endpoint
197-
}
198-
199-
/// Get source event stream
200-
pub async fn start_source_stream(
201-
&self,
202-
source_id: u64,
203-
source_type: SourceType,
204-
start_offset: Option<String>,
205-
properties: BTreeMap<String, String>,
206-
snapshot_done: bool,
207-
is_source_job: bool,
208-
) -> Result<Streaming<GetEventStreamResponse>> {
209-
Ok(self
210-
.rpc_client
211-
.clone()
212-
.get_event_stream(GetEventStreamRequest {
213-
source_id,
214-
source_type: source_type as _,
215-
start_offset: start_offset.unwrap_or_default(),
216-
properties,
217-
snapshot_done,
218-
is_source_job,
219-
})
220-
.await
221-
.inspect_err(|err| {
222-
tracing::error!(
223-
"failed to start stream for CDC source {}: {}",
224-
source_id,
225-
err.message()
226-
)
227-
})
228-
.map_err(RpcError::from_connector_status)?
229-
.into_inner())
230-
}
231-
232-
/// Validate source properties
233-
pub async fn validate_source_properties(
234-
&self,
235-
source_id: u64,
236-
source_type: SourceType,
237-
properties: BTreeMap<String, String>,
238-
table_schema: Option<TableSchema>,
239-
is_source_job: bool,
240-
is_backfill_table: bool,
241-
) -> Result<()> {
242-
let table_schema = table_schema.map(|mut table_schema| {
243-
table_schema.columns.retain(|c| {
244-
!matches!(
245-
c.generated_or_default_column,
246-
Some(GeneratedOrDefaultColumn::GeneratedColumn(_))
247-
)
248-
});
249-
table_schema
250-
});
251-
let response = self
252-
.rpc_client
253-
.clone()
254-
.validate_source(ValidateSourceRequest {
255-
source_id,
256-
source_type: source_type as _,
257-
properties,
258-
table_schema,
259-
is_source_job,
260-
is_backfill_table,
261-
})
262-
.await
263-
.inspect_err(|err| {
264-
tracing::error!("failed to validate source#{}: {}", source_id, err.message())
265-
})
266-
.map_err(RpcError::from_connector_status)?
267-
.into_inner();
268-
269-
response.error.map_or(Ok(()), |err| {
270-
Err(RpcError::Internal(anyhow!(format!(
271-
"source cannot pass validation: {}",
272-
err.error_message
273-
))))
274-
})
275-
}
276-
277-
pub async fn start_sink_writer_stream(
278-
&self,
279-
payload_schema: Option<TableSchema>,
280-
sink_proto: PbSinkParam,
281-
) -> Result<SinkWriterStreamHandle> {
282-
let mut rpc_client = self.rpc_client.clone();
283-
let (handle, first_rsp) = SinkWriterStreamHandle::initialize(
284-
SinkWriterStreamRequest {
285-
request: Some(SinkRequest::Start(StartSink {
286-
payload_schema,
287-
sink_param: Some(sink_proto),
288-
})),
289-
},
290-
|rx| async move {
291-
rpc_client
292-
.sink_writer_stream(ReceiverStream::new(rx))
293-
.await
294-
.map(|response| {
295-
response
296-
.into_inner()
297-
.map_err(RpcError::from_connector_status)
298-
})
299-
.map_err(RpcError::from_connector_status)
300-
},
301-
)
302-
.await?;
303-
304-
match first_rsp {
305-
SinkWriterStreamResponse {
306-
response: Some(sink_writer_stream_response::Response::Start(_)),
307-
} => Ok(handle),
308-
msg => Err(RpcError::Internal(anyhow!(
309-
"should get start response but get {:?}",
310-
msg
311-
))),
312-
}
313-
}
314-
315-
pub async fn start_sink_coordinator_stream(
316-
&self,
317-
param: SinkParam,
318-
) -> Result<SinkCoordinatorStreamHandle> {
319-
let mut rpc_client = self.rpc_client.clone();
320-
let (handle, first_rsp) = SinkCoordinatorStreamHandle::initialize(
321-
SinkCoordinatorStreamRequest {
322-
request: Some(sink_coordinator_stream_request::Request::Start(
323-
StartCoordinator { param: Some(param) },
324-
)),
325-
},
326-
|rx| async move {
327-
rpc_client
328-
.sink_coordinator_stream(ReceiverStream::new(rx))
329-
.await
330-
.map(|response| {
331-
response
332-
.into_inner()
333-
.map_err(RpcError::from_connector_status)
334-
})
335-
.map_err(RpcError::from_connector_status)
336-
},
337-
)
338-
.await?;
339-
340-
match first_rsp {
341-
SinkCoordinatorStreamResponse {
342-
response: Some(sink_coordinator_stream_response::Response::Start(_)),
343-
} => Ok(handle),
344-
msg => Err(RpcError::Internal(anyhow!(
345-
"should get start response but get {:?}",
346-
msg
347-
))),
348-
}
349-
}
350-
351-
pub async fn validate_sink_properties(&self, sink_param: SinkParam) -> Result<()> {
352-
let response = self
353-
.rpc_client
354-
.clone()
355-
.validate_sink(ValidateSinkRequest {
356-
sink_param: Some(sink_param),
357-
})
358-
.await
359-
.inspect_err(|err| {
360-
tracing::error!("failed to validate sink properties: {}", err.message())
361-
})
362-
.map_err(RpcError::from_connector_status)?
363-
.into_inner();
364-
response.error.map_or_else(
365-
|| Ok(()), // If there is no error message, return Ok here.
366-
|err| {
367-
Err(RpcError::Internal(anyhow!(format!(
368-
"sink cannot pass validation: {}",
369-
err.error_message
370-
))))
371-
},
372-
)
373-
}
374-
}

src/rpc_client/src/frontend_client.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ impl FrontendClient {
4545
"grpc-frontend-client",
4646
TcpConfig {
4747
tcp_nodelay: true,
48-
keepalive_duration: None,
48+
..Default::default()
4949
},
5050
)
5151
.await?

src/rpc_client/src/lib.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ mod tracing;
6565

6666
pub use compactor_client::{CompactorClient, GrpcCompactorProxyClient};
6767
pub use compute_client::{ComputeClient, ComputeClientPool, ComputeClientPoolRef};
68-
pub use connector_client::{ConnectorClient, SinkCoordinatorStreamHandle, SinkWriterStreamHandle};
68+
pub use connector_client::{SinkCoordinatorStreamHandle, SinkWriterStreamHandle};
6969
pub use frontend_client::{FrontendClientPool, FrontendClientPoolRef};
7070
pub use hummock_meta_client::{CompactionEventItem, HummockMetaClient};
7171
pub use meta_client::{MetaClient, SinkCoordinationRpcClient};

0 commit comments

Comments
 (0)