From 0bc9601a0bf9a840c1d16551bc796bcf1f15ea75 Mon Sep 17 00:00:00 2001 From: yoshidan Date: Tue, 1 Oct 2024 18:15:04 +0900 Subject: [PATCH 01/23] add storage write --- bigquery/Cargo.toml | 3 + bigquery/src/client.rs | 14 +- bigquery/src/grpc/apiv1/bigquery_client.rs | 148 +++++++++++++++++++++ bigquery/src/grpc/apiv1/conn_pool.rs | 48 ++----- 4 files changed, 168 insertions(+), 45 deletions(-) diff --git a/bigquery/Cargo.toml b/bigquery/Cargo.toml index b351740a..cb727db3 100644 --- a/bigquery/Cargo.toml +++ b/bigquery/Cargo.toml @@ -40,6 +40,9 @@ ctor = "0.1.26" tokio-util = {version ="0.7", features = ["codec"] } google-cloud-auth = { path = "../foundation/auth", default-features=false } base64-serde = "0.7" +async-stream = "0.3" +prost = "0.13" +prost-types = "0.13" [features] default = ["default-tls", "auth"] diff --git a/bigquery/src/client.rs b/bigquery/src/client.rs index 966c96f5..1acd7f39 100644 --- a/bigquery/src/client.rs +++ b/bigquery/src/client.rs @@ -12,7 +12,7 @@ use google_cloud_googleapis::cloud::bigquery::storage::v1::{ }; use google_cloud_token::TokenSourceProvider; -use crate::grpc::apiv1::conn_pool::{ReadConnectionManager, DOMAIN}; +use crate::grpc::apiv1::conn_pool::{ConnectionManager, DOMAIN}; use crate::http::bigquery_client::BigqueryClient; use crate::http::bigquery_dataset_client::BigqueryDatasetClient; use crate::http::bigquery_job_client::BigqueryJobClient; @@ -94,8 +94,10 @@ impl ClientConfig { use crate::http::job::get::GetJobRequest; use crate::http::job::list::ListJobsRequest; +use crate::grpc::apiv1::bigquery_client::StreamingReadClient; #[cfg(feature = "auth")] pub use google_cloud_auth; +use google_cloud_googleapis::cloud::bigquery::storage::v1::big_query_read_client::BigQueryReadClient; #[cfg(feature = "auth")] impl ClientConfig { @@ -163,7 +165,7 @@ pub struct Client { routine_client: BigqueryRoutineClient, row_access_policy_client: BigqueryRowAccessPolicyClient, model_client: BigqueryModelClient, - streaming_read_client_conn_pool: Arc, + streaming_client_conn_pool: Arc, } impl Client { @@ -183,8 +185,8 @@ impl Client { connect_timeout: read_config.connect_timeout, }; - let streaming_read_client_conn_pool = - ReadConnectionManager::new(read_config.num_channels, &config.environment, DOMAIN, &conn_options).await?; + let streaming_client_conn_pool = + ConnectionManager::new(read_config.num_channels, &config.environment, &conn_options).await?; Ok(Self { dataset_client: BigqueryDatasetClient::new(client.clone()), table_client: BigqueryTableClient::new(client.clone()), @@ -193,7 +195,7 @@ impl Client { routine_client: BigqueryRoutineClient::new(client.clone()), row_access_policy_client: BigqueryRowAccessPolicyClient::new(client.clone()), model_client: BigqueryModelClient::new(client.clone()), - streaming_read_client_conn_pool: Arc::new(streaming_read_client_conn_pool), + streaming_client_conn_pool: Arc::new(streaming_client_conn_pool), }) } @@ -473,7 +475,7 @@ impl Client { { let option = option.unwrap_or_default(); - let mut client = self.streaming_read_client_conn_pool.conn(); + let mut client = StreamingReadClient::new(BigQueryReadClient::new(self.streaming_client_conn_pool.conn())); let read_session = client .create_read_session( CreateReadSessionRequest { diff --git a/bigquery/src/grpc/apiv1/bigquery_client.rs b/bigquery/src/grpc/apiv1/bigquery_client.rs index f99eeb00..19edd1ab 100644 --- a/bigquery/src/grpc/apiv1/bigquery_client.rs +++ b/bigquery/src/grpc/apiv1/bigquery_client.rs @@ -212,3 +212,151 @@ impl StreamingWriteClient { .await } } + +#[cfg(test)] +mod tests { + use crate::grpc::apiv1::bigquery_client::StreamingWriteClient; + use crate::grpc::apiv1::conn_pool::{ConnectionManager, AUDIENCE, SCOPES}; + use google_cloud_gax::conn::Environment; + use google_cloud_gax::grpc::codegen::tokio_stream::StreamExt; + use google_cloud_gax::grpc::IntoStreamingRequest; + use google_cloud_googleapis::cloud::bigquery::storage::v1::append_rows_request::{ProtoData, Rows}; + use google_cloud_googleapis::cloud::bigquery::storage::v1::big_query_write_client::BigQueryWriteClient; + use google_cloud_googleapis::cloud::bigquery::storage::v1::table_field_schema::Type; + use google_cloud_googleapis::cloud::bigquery::storage::v1::write_stream::Type::Pending; + use google_cloud_googleapis::cloud::bigquery::storage::v1::{ + AppendRowsRequest, BatchCommitWriteStreamsRequest, BatchCommitWriteStreamsResponse, CreateWriteStreamRequest, + FinalizeWriteStreamRequest, ProtoRows, ProtoSchema, WriteStream, + }; + use prost::Message; + use prost_types::{field_descriptor_proto, DescriptorProto, FieldDescriptorProto}; + + #[derive(Clone, PartialEq, ::prost::Message)] + struct TestData { + #[prost(string, tag = "1")] + pub col_string: String, + } + + #[ctor::ctor] + fn init() { + let _ = tracing_subscriber::fmt::try_init(); + } + #[tokio::test] + async fn test_storage_write() { + let config = google_cloud_auth::project::Config::default() + .with_audience(AUDIENCE) + .with_scopes(&SCOPES); + let ts = google_cloud_auth::token::DefaultTokenSourceProvider::new(config) + .await + .unwrap(); + let conn = ConnectionManager::new(4, &Environment::GoogleCloud(Box::new(ts)), &Default::default()) + .await + .unwrap(); + + let mut client = StreamingWriteClient::new(BigQueryWriteClient::new(conn.conn())); + + let table = "projects/atl-dev1/datasets/gcrbq_storage/tables/write_test".to_string(); + let pending_stream = client + .create_write_stream( + CreateWriteStreamRequest { + parent: table.to_string(), + write_stream: Some(WriteStream { + name: "".to_string(), + r#type: Pending as i32, + create_time: None, + commit_time: None, + table_schema: None, + write_mode: 0, + location: "".to_string(), + }), + }, + None, + ) + .await + .unwrap() + .into_inner(); + + let data1 = TestData { + col_string: "col1".to_string(), + }; + let mut buf = Vec::new(); + data1.encode(&mut buf).unwrap(); + + let row1 = AppendRowsRequest { + write_stream: pending_stream.name.to_string(), + offset: None, + trace_id: "".to_string(), + missing_value_interpretations: Default::default(), + default_missing_value_interpretation: 0, + rows: Some(Rows::ProtoRows(ProtoData { + writer_schema: Some(ProtoSchema { + proto_descriptor: Some(DescriptorProto { + name: Some("TestData".to_string()), + field: vec![FieldDescriptorProto { + name: Some("col_string".to_string()), + number: Some(1), + label: None, + r#type: Some(field_descriptor_proto::Type::String.into()), + type_name: None, + extendee: None, + default_value: None, + oneof_index: None, + json_name: None, + options: None, + proto3_optional: None, + }], + extension: vec![], + nested_type: vec![], + enum_type: vec![], + extension_range: vec![], + oneof_decl: vec![], + options: None, + reserved_range: vec![], + reserved_name: vec![], + }), + }), + rows: Some(ProtoRows { + serialized_rows: vec![buf], + }), + })), + }; + + let request = Box::pin(async_stream::stream! { + for req in [row1] { + yield req; + } + }); + let mut result = client.append_rows(request).await.unwrap().into_inner(); + while let Some(res) = result.next().await { + match res { + Ok(res) => println!("row errors = {:?}", res.row_errors.len()), + Err(err) => println!("err = {:?}", err), + }; + } + + let res = client + .finalize_write_stream( + FinalizeWriteStreamRequest { + name: pending_stream.name.to_string(), + }, + None, + ) + .await + .unwrap() + .into_inner(); + println!("finalized = {:?}", res.row_count); + + let res = client + .batch_commit_write_streams( + BatchCommitWriteStreamsRequest { + parent: table.to_string(), + write_streams: vec![pending_stream.name.to_string()], + }, + None, + ) + .await + .unwrap() + .into_inner(); + println!("commit stream errors = {:?}", res.stream_errors.len()) + } +} diff --git a/bigquery/src/grpc/apiv1/conn_pool.rs b/bigquery/src/grpc/apiv1/conn_pool.rs index fb45ecaf..f862a46a 100644 --- a/bigquery/src/grpc/apiv1/conn_pool.rs +++ b/bigquery/src/grpc/apiv1/conn_pool.rs @@ -1,8 +1,6 @@ -use google_cloud_gax::conn::{ConnectionManager as GRPCConnectionManager, ConnectionOptions, Environment, Error}; -use google_cloud_googleapis::cloud::bigquery::storage::v1::big_query_read_client::BigQueryReadClient; -use google_cloud_googleapis::cloud::bigquery::storage::v1::big_query_write_client::BigQueryWriteClient; - -use crate::grpc::apiv1::bigquery_client::{StreamingReadClient, StreamingWriteClient}; +use google_cloud_gax::conn::{ + Channel, ConnectionManager as GRPCConnectionManager, ConnectionOptions, Environment, Error, +}; pub const AUDIENCE: &str = "https://bigquerystorage.googleapis.com/"; pub const DOMAIN: &str = "bigquerystorage.googleapis.com"; @@ -12,45 +10,18 @@ pub const SCOPES: [&str; 3] = [ "https://www.googleapis.com/auth/cloud-platform", ]; -pub struct ReadConnectionManager { - inner: GRPCConnectionManager, -} - -impl ReadConnectionManager { - pub async fn new( - pool_size: usize, - environment: &Environment, - domain: &str, - conn_options: &ConnectionOptions, - ) -> Result { - Ok(ReadConnectionManager { - inner: GRPCConnectionManager::new(pool_size, domain, AUDIENCE, environment, conn_options).await?, - }) - } - - pub fn num(&self) -> usize { - self.inner.num() - } - - pub fn conn(&self) -> StreamingReadClient { - let conn = self.inner.conn(); - StreamingReadClient::new(BigQueryReadClient::new(conn)) - } -} - -pub struct WriteConnectionManager { +pub struct ConnectionManager { inner: GRPCConnectionManager, } -impl WriteConnectionManager { +impl ConnectionManager { pub async fn new( pool_size: usize, environment: &Environment, - domain: &str, conn_options: &ConnectionOptions, ) -> Result { - Ok(WriteConnectionManager { - inner: GRPCConnectionManager::new(pool_size, domain, AUDIENCE, environment, conn_options).await?, + Ok(ConnectionManager { + inner: GRPCConnectionManager::new(pool_size, DOMAIN, AUDIENCE, environment, conn_options).await?, }) } @@ -58,8 +29,7 @@ impl WriteConnectionManager { self.inner.num() } - pub fn conn(&self) -> StreamingWriteClient { - let conn = self.inner.conn(); - StreamingWriteClient::new(BigQueryWriteClient::new(conn)) + pub fn conn(&self) -> Channel { + self.inner.conn() } } From d48b2f66f04a20540aa1acecad8fb7363060d10e Mon Sep 17 00:00:00 2001 From: yoshidan Date: Sat, 5 Oct 2024 16:05:17 +0900 Subject: [PATCH 02/23] add test --- bigquery/src/client.rs | 4 +- bigquery/src/grpc/apiv1/bigquery_client.rs | 217 ++++++++++++--------- 2 files changed, 130 insertions(+), 91 deletions(-) diff --git a/bigquery/src/client.rs b/bigquery/src/client.rs index 1acd7f39..263f54ee 100644 --- a/bigquery/src/client.rs +++ b/bigquery/src/client.rs @@ -564,7 +564,9 @@ mod tests { #[ctor::ctor] fn init() { - let _ = tracing_subscriber::fmt::try_init(); + let filter = tracing_subscriber::filter::EnvFilter::from_default_env() + .add_directive("google_cloud_bigquery=trace".parse().unwrap()); + let _ = tracing_subscriber::fmt().with_env_filter(filter).try_init(); } async fn create_client() -> (Client, String) { diff --git a/bigquery/src/grpc/apiv1/bigquery_client.rs b/bigquery/src/grpc/apiv1/bigquery_client.rs index 19edd1ab..e7ec287e 100644 --- a/bigquery/src/grpc/apiv1/bigquery_client.rs +++ b/bigquery/src/grpc/apiv1/bigquery_client.rs @@ -219,7 +219,7 @@ mod tests { use crate::grpc::apiv1::conn_pool::{ConnectionManager, AUDIENCE, SCOPES}; use google_cloud_gax::conn::Environment; use google_cloud_gax::grpc::codegen::tokio_stream::StreamExt; - use google_cloud_gax::grpc::IntoStreamingRequest; + use google_cloud_gax::grpc::{IntoStreamingRequest, Status}; use google_cloud_googleapis::cloud::bigquery::storage::v1::append_rows_request::{ProtoData, Rows}; use google_cloud_googleapis::cloud::bigquery::storage::v1::big_query_write_client::BigQueryWriteClient; use google_cloud_googleapis::cloud::bigquery::storage::v1::table_field_schema::Type; @@ -230,6 +230,7 @@ mod tests { }; use prost::Message; use prost_types::{field_descriptor_proto, DescriptorProto, FieldDescriptorProto}; + use tokio::task::JoinHandle; #[derive(Clone, PartialEq, ::prost::Message)] struct TestData { @@ -239,9 +240,26 @@ mod tests { #[ctor::ctor] fn init() { - let _ = tracing_subscriber::fmt::try_init(); + let filter = tracing_subscriber::filter::EnvFilter::from_default_env() + .add_directive("google_cloud_bigquery=trace".parse().unwrap()); + let _ = tracing_subscriber::fmt().with_env_filter(filter).try_init(); } - #[tokio::test] + + fn create_write_stream_request(table: &str) -> CreateWriteStreamRequest { + CreateWriteStreamRequest { + parent: table.to_string(), + write_stream: Some(WriteStream { + name: "".to_string(), + r#type: Pending as i32, + create_time: None, + commit_time: None, + table_schema: None, + write_mode: 0, + location: "".to_string(), + }), + } + } + #[tokio::test(flavor = "multi_thread", worker_threads = 4)] async fn test_storage_write() { let config = google_cloud_auth::project::Config::default() .with_audience(AUDIENCE) @@ -249,114 +267,133 @@ mod tests { let ts = google_cloud_auth::token::DefaultTokenSourceProvider::new(config) .await .unwrap(); - let conn = ConnectionManager::new(4, &Environment::GoogleCloud(Box::new(ts)), &Default::default()) + let conn = ConnectionManager::new(1, &Environment::GoogleCloud(Box::new(ts)), &Default::default()) .await .unwrap(); let mut client = StreamingWriteClient::new(BigQueryWriteClient::new(conn.conn())); let table = "projects/atl-dev1/datasets/gcrbq_storage/tables/write_test".to_string(); - let pending_stream = client - .create_write_stream( - CreateWriteStreamRequest { - parent: table.to_string(), - write_stream: Some(WriteStream { - name: "".to_string(), - r#type: Pending as i32, - create_time: None, - commit_time: None, - table_schema: None, - write_mode: 0, - location: "".to_string(), - }), - }, - None, - ) - .await - .unwrap() - .into_inner(); - let data1 = TestData { - col_string: "col1".to_string(), - }; - let mut buf = Vec::new(); - data1.encode(&mut buf).unwrap(); + // Create Pending Streams + let mut pending_streams = vec![]; + for i in 0..5 { + let pending_stream = client + .create_write_stream(create_write_stream_request(&table), None) + .await + .unwrap() + .into_inner(); + tracing::info!("stream = {:?}", pending_stream.name); + pending_streams.push(pending_stream); + } + + let stream_names = pending_streams + .iter() + .map(|s| s.name.to_string()) + .collect::>(); - let row1 = AppendRowsRequest { - write_stream: pending_stream.name.to_string(), - offset: None, - trace_id: "".to_string(), - missing_value_interpretations: Default::default(), - default_missing_value_interpretation: 0, - rows: Some(Rows::ProtoRows(ProtoData { - writer_schema: Some(ProtoSchema { - proto_descriptor: Some(DescriptorProto { - name: Some("TestData".to_string()), - field: vec![FieldDescriptorProto { - name: Some("col_string".to_string()), - number: Some(1), - label: None, - r#type: Some(field_descriptor_proto::Type::String.into()), - type_name: None, - extendee: None, - default_value: None, - oneof_index: None, - json_name: None, - options: None, - proto3_optional: None, - }], - extension: vec![], - nested_type: vec![], - enum_type: vec![], - extension_range: vec![], - oneof_decl: vec![], - options: None, - reserved_range: vec![], - reserved_name: vec![], - }), - }), - rows: Some(ProtoRows { - serialized_rows: vec![buf], - }), - })), - }; + // Append Rows + let mut tasks: Vec>> = vec![]; + for (i, pending_stream) in pending_streams.into_iter().enumerate() { + let mut client = StreamingWriteClient::new(BigQueryWriteClient::new(conn.conn())); + tasks.push(tokio::spawn(async move { + let mut rows = vec![]; + for j in 0..5 { + let data = TestData { + col_string: format!("stream_{i}_{j}"), + }; + let mut buf = Vec::new(); + data.encode(&mut buf).unwrap(); - let request = Box::pin(async_stream::stream! { - for req in [row1] { - yield req; - } - }); - let mut result = client.append_rows(request).await.unwrap().into_inner(); - while let Some(res) = result.next().await { - match res { - Ok(res) => println!("row errors = {:?}", res.row_errors.len()), - Err(err) => println!("err = {:?}", err), - }; + let row = AppendRowsRequest { + write_stream: pending_stream.name.to_string(), + offset: None, + trace_id: "".to_string(), + missing_value_interpretations: Default::default(), + default_missing_value_interpretation: 0, + rows: Some(Rows::ProtoRows(ProtoData { + writer_schema: Some(ProtoSchema { + proto_descriptor: Some(DescriptorProto { + name: Some("TestData".to_string()), + field: vec![FieldDescriptorProto { + name: Some("col_string".to_string()), + number: Some(1), + label: None, + r#type: Some(field_descriptor_proto::Type::String.into()), + type_name: None, + extendee: None, + default_value: None, + oneof_index: None, + json_name: None, + options: None, + proto3_optional: None, + }], + extension: vec![], + nested_type: vec![], + enum_type: vec![], + extension_range: vec![], + oneof_decl: vec![], + options: None, + reserved_range: vec![], + reserved_name: vec![], + }), + }), + rows: Some(ProtoRows { + serialized_rows: vec![buf], + }), + })), + }; + rows.push(row); + } + + let request = Box::pin(async_stream::stream! { + for req in rows { + yield req; + } + }); + let mut result = client.append_rows(request).await?.into_inner(); + while let Some(res) = result.next().await { + let res = res?; + tracing::info!("append row errors = {:?}", res.row_errors.len()); + } + Ok(()) + })); } - let res = client - .finalize_write_stream( - FinalizeWriteStreamRequest { - name: pending_stream.name.to_string(), - }, - None, - ) - .await - .unwrap() - .into_inner(); - println!("finalized = {:?}", res.row_count); + // Wait for append rows + for mut task in tasks { + task.await.unwrap().unwrap(); + } + // Finalize streams + for pending_stream in &stream_names { + let mut client = StreamingWriteClient::new(BigQueryWriteClient::new(conn.conn())); + let res = client + .finalize_write_stream( + FinalizeWriteStreamRequest { + name: pending_stream.to_string(), + }, + None, + ) + .await + .unwrap() + .into_inner(); + tracing::info!("finalized = {:?}", res.row_count); + } + + // Commit + let mut client = StreamingWriteClient::new(BigQueryWriteClient::new(conn.conn())); let res = client .batch_commit_write_streams( BatchCommitWriteStreamsRequest { parent: table.to_string(), - write_streams: vec![pending_stream.name.to_string()], + write_streams: stream_names.iter().map(|s| s.to_string()).collect(), }, None, ) .await .unwrap() .into_inner(); - println!("commit stream errors = {:?}", res.stream_errors.len()) + tracing::info!("commit stream errors = {:?}", res.stream_errors.len()) } } From 4a0955cb586e39c5924474e6f13fd5f5ff22701c Mon Sep 17 00:00:00 2001 From: yoshidan Date: Sat, 5 Oct 2024 17:14:15 +0900 Subject: [PATCH 03/23] add batch writer --- bigquery/src/client.rs | 8 +- bigquery/src/grpc/apiv1/bigquery_client.rs | 12 +-- bigquery/src/grpc/apiv1/conn_pool.rs | 1 + bigquery/src/http/bigquery_dataset_client.rs | 2 +- bigquery/src/http/bigquery_job_client.rs | 2 +- bigquery/src/lib.rs | 1 + bigquery/src/storage_batch_write.rs | 92 ++++++++++++++++++++ 7 files changed, 108 insertions(+), 10 deletions(-) create mode 100644 bigquery/src/storage_batch_write.rs diff --git a/bigquery/src/client.rs b/bigquery/src/client.rs index 263f54ee..38f70b8a 100644 --- a/bigquery/src/client.rs +++ b/bigquery/src/client.rs @@ -12,7 +12,7 @@ use google_cloud_googleapis::cloud::bigquery::storage::v1::{ }; use google_cloud_token::TokenSourceProvider; -use crate::grpc::apiv1::conn_pool::{ConnectionManager, DOMAIN}; +use crate::grpc::apiv1::conn_pool::ConnectionManager; use crate::http::bigquery_client::BigqueryClient; use crate::http::bigquery_dataset_client::BigqueryDatasetClient; use crate::http::bigquery_job_client::BigqueryJobClient; @@ -26,8 +26,8 @@ use crate::http::job::query::QueryRequest; use crate::http::job::{is_script, is_select_query, JobConfiguration, JobReference, JobStatistics, JobType}; use crate::http::table::TableReference; use crate::query::{QueryOption, QueryResult}; -use crate::storage; use crate::{http, query}; +use crate::{storage, storage_batch_write}; const JOB_RETRY_REASONS: [&str; 3] = ["backendError", "rateLimitExceeded", "internalError"]; @@ -241,6 +241,10 @@ impl Client { &self.model_client } + pub fn storage_batch_write(&self, table: String) -> storage_batch_write::StorageBatchWriter { + storage_batch_write::StorageBatchWriter::new(table, self.streaming_client_conn_pool.clone()) + } + /// Run query job and get result. /// ```rust /// use google_cloud_bigquery::http::job::query::QueryRequest; diff --git a/bigquery/src/grpc/apiv1/bigquery_client.rs b/bigquery/src/grpc/apiv1/bigquery_client.rs index e7ec287e..bb6cb00a 100644 --- a/bigquery/src/grpc/apiv1/bigquery_client.rs +++ b/bigquery/src/grpc/apiv1/bigquery_client.rs @@ -219,14 +219,14 @@ mod tests { use crate::grpc::apiv1::conn_pool::{ConnectionManager, AUDIENCE, SCOPES}; use google_cloud_gax::conn::Environment; use google_cloud_gax::grpc::codegen::tokio_stream::StreamExt; - use google_cloud_gax::grpc::{IntoStreamingRequest, Status}; + use google_cloud_gax::grpc::Status; use google_cloud_googleapis::cloud::bigquery::storage::v1::append_rows_request::{ProtoData, Rows}; use google_cloud_googleapis::cloud::bigquery::storage::v1::big_query_write_client::BigQueryWriteClient; - use google_cloud_googleapis::cloud::bigquery::storage::v1::table_field_schema::Type; + use google_cloud_googleapis::cloud::bigquery::storage::v1::write_stream::Type::Pending; use google_cloud_googleapis::cloud::bigquery::storage::v1::{ - AppendRowsRequest, BatchCommitWriteStreamsRequest, BatchCommitWriteStreamsResponse, CreateWriteStreamRequest, - FinalizeWriteStreamRequest, ProtoRows, ProtoSchema, WriteStream, + AppendRowsRequest, BatchCommitWriteStreamsRequest, CreateWriteStreamRequest, FinalizeWriteStreamRequest, + ProtoRows, ProtoSchema, WriteStream, }; use prost::Message; use prost_types::{field_descriptor_proto, DescriptorProto, FieldDescriptorProto}; @@ -277,7 +277,7 @@ mod tests { // Create Pending Streams let mut pending_streams = vec![]; - for i in 0..5 { + for i in 0..4 { let pending_stream = client .create_write_stream(create_write_stream_request(&table), None) .await @@ -361,7 +361,7 @@ mod tests { } // Wait for append rows - for mut task in tasks { + for task in tasks { task.await.unwrap().unwrap(); } diff --git a/bigquery/src/grpc/apiv1/conn_pool.rs b/bigquery/src/grpc/apiv1/conn_pool.rs index f862a46a..16a8767f 100644 --- a/bigquery/src/grpc/apiv1/conn_pool.rs +++ b/bigquery/src/grpc/apiv1/conn_pool.rs @@ -10,6 +10,7 @@ pub const SCOPES: [&str; 3] = [ "https://www.googleapis.com/auth/cloud-platform", ]; +#[derive(Debug)] pub struct ConnectionManager { inner: GRPCConnectionManager, } diff --git a/bigquery/src/http/bigquery_dataset_client.rs b/bigquery/src/http/bigquery_dataset_client.rs index 7eaed55a..52191a06 100644 --- a/bigquery/src/http/bigquery_dataset_client.rs +++ b/bigquery/src/http/bigquery_dataset_client.rs @@ -121,7 +121,7 @@ mod test { // minimum dataset let mut ds1 = Dataset::default(); ds1.dataset_reference.dataset_id = dataset_name("crud_empty"); - ds1.dataset_reference.project_id = project.clone(); + ds1.dataset_reference.project_id.clone_from(&project); ds1 = client.create(&ds1).await.unwrap(); // test get diff --git a/bigquery/src/http/bigquery_job_client.rs b/bigquery/src/http/bigquery_job_client.rs index 961dde8c..359c015d 100644 --- a/bigquery/src/http/bigquery_job_client.rs +++ b/bigquery/src/http/bigquery_job_client.rs @@ -335,7 +335,7 @@ mod test { // insert test data let mut table1 = Table::default(); - table1.table_reference.dataset_id = dataset.clone(); + table1.table_reference.dataset_id.clone_from(&dataset); table1.table_reference.project_id = project.to_string(); table1.table_reference.table_id = format!("table_data_{}", OffsetDateTime::now_utc().unix_timestamp()); table1.schema = Some(create_table_schema()); diff --git a/bigquery/src/lib.rs b/bigquery/src/lib.rs index fc72b03e..b490d0b6 100644 --- a/bigquery/src/lib.rs +++ b/bigquery/src/lib.rs @@ -190,3 +190,4 @@ pub mod grpc; pub mod http; pub mod query; pub mod storage; +pub mod storage_batch_write; diff --git a/bigquery/src/storage_batch_write.rs b/bigquery/src/storage_batch_write.rs new file mode 100644 index 00000000..993f07db --- /dev/null +++ b/bigquery/src/storage_batch_write.rs @@ -0,0 +1,92 @@ +use crate::grpc::apiv1::bigquery_client::StreamingWriteClient; +use crate::grpc::apiv1::conn_pool::ConnectionManager; +use google_cloud_gax::grpc::{IntoStreamingRequest, Status, Streaming}; +use google_cloud_googleapis::cloud::bigquery::storage::v1::big_query_write_client::BigQueryWriteClient; +use google_cloud_googleapis::cloud::bigquery::storage::v1::{ + AppendRowsRequest, AppendRowsResponse, BatchCommitWriteStreamsRequest, BatchCommitWriteStreamsResponse, + CreateWriteStreamRequest, FinalizeWriteStreamRequest, WriteStream, +}; +use std::sync::Arc; + +pub struct StorageBatchWriter { + table: String, + conn: Arc, + streams: Vec, +} + +impl StorageBatchWriter { + pub(crate) fn new(table: String, conn: Arc) -> Self { + Self { + table, + conn, + streams: Vec::new(), + } + } + + pub async fn create_write_stream(&mut self) -> Result { + let mut client = StreamingWriteClient::new(BigQueryWriteClient::new(self.conn.conn())); + let res = client + .create_write_stream( + CreateWriteStreamRequest { + parent: self.table.to_string(), + write_stream: None, + }, + None, + ) + .await? + .into_inner(); + + self.streams.push(res.name.clone()); + + Ok(PendingStream::new(res, client)) + } + + pub async fn commit(self) -> Result { + let mut client = StreamingWriteClient::new(BigQueryWriteClient::new(self.conn.conn())); + let result = client + .batch_commit_write_streams( + BatchCommitWriteStreamsRequest { + parent: self.table.to_string(), + write_streams: self.streams, + }, + None, + ) + .await? + .into_inner(); + Ok(result) + } +} + +pub struct PendingStream { + inner: WriteStream, + client: StreamingWriteClient, +} + +impl PendingStream { + pub(crate) fn new(inner: WriteStream, client: StreamingWriteClient) -> Self { + Self { inner, client } + } + + //TODO serialize values and get schema + pub async fn write( + &mut self, + req: impl IntoStreamingRequest, + ) -> Result, Status> { + let response = self.client.append_rows(req).await?.into_inner(); + Ok(response) + } + + pub async fn finalize(mut self) -> Result { + let res = self + .client + .finalize_write_stream( + FinalizeWriteStreamRequest { + name: self.inner.name.to_string(), + }, + None, + ) + .await? + .into_inner(); + Ok(res.row_count) + } +} From 04dff38ad1f096628a01efa0c4b1f65f2c9157fb Mon Sep 17 00:00:00 2001 From: yoshidan Date: Sat, 5 Oct 2024 22:15:34 +0900 Subject: [PATCH 04/23] add pending --- bigquery/src/client.rs | 10 ++++-- bigquery/src/grpc/apiv1/bigquery_client.rs | 34 ++++++++++--------- bigquery/src/lib.rs | 2 +- bigquery/src/storage_write/mod.rs | 1 + .../pending.rs} | 26 +++++++------- 5 files changed, 40 insertions(+), 33 deletions(-) create mode 100644 bigquery/src/storage_write/mod.rs rename bigquery/src/{storage_batch_write.rs => storage_write/pending.rs} (80%) diff --git a/bigquery/src/client.rs b/bigquery/src/client.rs index 38f70b8a..c744b742 100644 --- a/bigquery/src/client.rs +++ b/bigquery/src/client.rs @@ -27,7 +27,7 @@ use crate::http::job::{is_script, is_select_query, JobConfiguration, JobReferenc use crate::http::table::TableReference; use crate::query::{QueryOption, QueryResult}; use crate::{http, query}; -use crate::{storage, storage_batch_write}; +use crate::storage; const JOB_RETRY_REASONS: [&str; 3] = ["backendError", "rateLimitExceeded", "internalError"]; @@ -98,6 +98,7 @@ use crate::grpc::apiv1::bigquery_client::StreamingReadClient; #[cfg(feature = "auth")] pub use google_cloud_auth; use google_cloud_googleapis::cloud::bigquery::storage::v1::big_query_read_client::BigQueryReadClient; +use crate::storage_write::pending; #[cfg(feature = "auth")] impl ClientConfig { @@ -241,8 +242,11 @@ impl Client { &self.model_client } - pub fn storage_batch_write(&self, table: String) -> storage_batch_write::StorageBatchWriter { - storage_batch_write::StorageBatchWriter::new(table, self.streaming_client_conn_pool.clone()) + /// Creates a new pending batch writer for the specified table. + /// Returns a `pending::Writer` instance that can be used to write batches of data to the specified table. + /// https://cloud.google.com/bigquery/docs/write-api#pending_type + pub fn pending_batch_writer(&self, table: String) -> pending::Writer { + pending::Writer::new(table, self.streaming_client_conn_pool.clone()) } /// Run query job and get result. diff --git a/bigquery/src/grpc/apiv1/bigquery_client.rs b/bigquery/src/grpc/apiv1/bigquery_client.rs index bb6cb00a..8386a296 100644 --- a/bigquery/src/grpc/apiv1/bigquery_client.rs +++ b/bigquery/src/grpc/apiv1/bigquery_client.rs @@ -12,6 +12,7 @@ use google_cloud_googleapis::cloud::bigquery::storage::v1::{ FlushRowsRequest, FlushRowsResponse, GetWriteStreamRequest, ReadRowsRequest, ReadRowsResponse, ReadSession, SplitReadStreamRequest, SplitReadStreamResponse, WriteStream, }; +use google_cloud_googleapis::cloud::bigquery::storage::v1::write_stream::Type; fn default_setting() -> RetrySetting { RetrySetting { @@ -213,9 +214,24 @@ impl StreamingWriteClient { } } +pub(crate) fn create_write_stream_request(table: &str, write_type: Type) -> CreateWriteStreamRequest { + CreateWriteStreamRequest { + parent: table.to_string(), + write_stream: Some(WriteStream { + name: "".to_string(), + r#type: write_type as i32, + create_time: None, + commit_time: None, + table_schema: None, + write_mode: 0, + location: "".to_string(), + }), + } +} + #[cfg(test)] mod tests { - use crate::grpc::apiv1::bigquery_client::StreamingWriteClient; + use crate::grpc::apiv1::bigquery_client::{create_write_stream_request, StreamingWriteClient}; use crate::grpc::apiv1::conn_pool::{ConnectionManager, AUDIENCE, SCOPES}; use google_cloud_gax::conn::Environment; use google_cloud_gax::grpc::codegen::tokio_stream::StreamExt; @@ -245,20 +261,6 @@ mod tests { let _ = tracing_subscriber::fmt().with_env_filter(filter).try_init(); } - fn create_write_stream_request(table: &str) -> CreateWriteStreamRequest { - CreateWriteStreamRequest { - parent: table.to_string(), - write_stream: Some(WriteStream { - name: "".to_string(), - r#type: Pending as i32, - create_time: None, - commit_time: None, - table_schema: None, - write_mode: 0, - location: "".to_string(), - }), - } - } #[tokio::test(flavor = "multi_thread", worker_threads = 4)] async fn test_storage_write() { let config = google_cloud_auth::project::Config::default() @@ -279,7 +281,7 @@ mod tests { let mut pending_streams = vec![]; for i in 0..4 { let pending_stream = client - .create_write_stream(create_write_stream_request(&table), None) + .create_write_stream(create_write_stream_request(&table, Pending), None) .await .unwrap() .into_inner(); diff --git a/bigquery/src/lib.rs b/bigquery/src/lib.rs index b490d0b6..d06083ac 100644 --- a/bigquery/src/lib.rs +++ b/bigquery/src/lib.rs @@ -190,4 +190,4 @@ pub mod grpc; pub mod http; pub mod query; pub mod storage; -pub mod storage_batch_write; +mod storage_write; diff --git a/bigquery/src/storage_write/mod.rs b/bigquery/src/storage_write/mod.rs new file mode 100644 index 00000000..3e9f3208 --- /dev/null +++ b/bigquery/src/storage_write/mod.rs @@ -0,0 +1 @@ +pub mod pending; \ No newline at end of file diff --git a/bigquery/src/storage_batch_write.rs b/bigquery/src/storage_write/pending.rs similarity index 80% rename from bigquery/src/storage_batch_write.rs rename to bigquery/src/storage_write/pending.rs index 993f07db..f6a118e8 100644 --- a/bigquery/src/storage_batch_write.rs +++ b/bigquery/src/storage_write/pending.rs @@ -1,4 +1,4 @@ -use crate::grpc::apiv1::bigquery_client::StreamingWriteClient; +use crate::grpc::apiv1::bigquery_client::{create_write_stream_request, StreamingWriteClient}; use crate::grpc::apiv1::conn_pool::ConnectionManager; use google_cloud_gax::grpc::{IntoStreamingRequest, Status, Streaming}; use google_cloud_googleapis::cloud::bigquery::storage::v1::big_query_write_client::BigQueryWriteClient; @@ -7,14 +7,15 @@ use google_cloud_googleapis::cloud::bigquery::storage::v1::{ CreateWriteStreamRequest, FinalizeWriteStreamRequest, WriteStream, }; use std::sync::Arc; +use google_cloud_googleapis::cloud::bigquery::storage::v1::write_stream::Type::Pending; -pub struct StorageBatchWriter { +pub struct Writer { table: String, conn: Arc, streams: Vec, } -impl StorageBatchWriter { +impl Writer { pub(crate) fn new(table: String, conn: Arc) -> Self { Self { table, @@ -26,13 +27,7 @@ impl StorageBatchWriter { pub async fn create_write_stream(&mut self) -> Result { let mut client = StreamingWriteClient::new(BigQueryWriteClient::new(self.conn.conn())); let res = client - .create_write_stream( - CreateWriteStreamRequest { - parent: self.table.to_string(), - write_stream: None, - }, - None, - ) + .create_write_stream(create_write_stream_request(&self.table, Pending), None) .await? .into_inner(); @@ -68,11 +63,16 @@ impl PendingStream { } //TODO serialize values and get schema - pub async fn write( + pub async fn append_rows( &mut self, - req: impl IntoStreamingRequest, + rows: Vec, ) -> Result, Status> { - let response = self.client.append_rows(req).await?.into_inner(); + let request = Box::pin(async_stream::stream! { + for row in rows { + yield row; + } + }); + let response = self.client.append_rows(request).await?.into_inner(); Ok(response) } From 1d34ad7dcaba9dbb374caa3ce9f3ae80f297304d Mon Sep 17 00:00:00 2001 From: yoshidan Date: Sat, 5 Oct 2024 22:33:47 +0900 Subject: [PATCH 05/23] add default stream --- bigquery/src/grpc/apiv1/bigquery_client.rs | 100 +++++++++++++-------- bigquery/src/storage_write/default.rs | 35 ++++++++ bigquery/src/storage_write/mod.rs | 3 +- 3 files changed, 98 insertions(+), 40 deletions(-) create mode 100644 bigquery/src/storage_write/default.rs diff --git a/bigquery/src/grpc/apiv1/bigquery_client.rs b/bigquery/src/grpc/apiv1/bigquery_client.rs index 8386a296..af088cb6 100644 --- a/bigquery/src/grpc/apiv1/bigquery_client.rs +++ b/bigquery/src/grpc/apiv1/bigquery_client.rs @@ -261,6 +261,47 @@ mod tests { let _ = tracing_subscriber::fmt().with_env_filter(filter).try_init(); } + fn create_append_rows_request(name: &str, buf: Vec) -> AppendRowsRequest { + AppendRowsRequest { + write_stream: name.to_string(), + offset: None, + trace_id: "".to_string(), + missing_value_interpretations: Default::default(), + default_missing_value_interpretation: 0, + rows: Some(Rows::ProtoRows(ProtoData { + writer_schema: Some(ProtoSchema { + proto_descriptor: Some(DescriptorProto { + name: Some("TestData".to_string()), + field: vec![FieldDescriptorProto { + name: Some("col_string".to_string()), + number: Some(1), + label: None, + r#type: Some(field_descriptor_proto::Type::String.into()), + type_name: None, + extendee: None, + default_value: None, + oneof_index: None, + json_name: None, + options: None, + proto3_optional: None, + }], + extension: vec![], + nested_type: vec![], + enum_type: vec![], + extension_range: vec![], + oneof_decl: vec![], + options: None, + reserved_range: vec![], + reserved_name: vec![], + }), + }), + rows: Some(ProtoRows { + serialized_rows: vec![buf], + }), + })), + } + } + #[tokio::test(flavor = "multi_thread", worker_threads = 4)] async fn test_storage_write() { let config = google_cloud_auth::project::Config::default() @@ -307,44 +348,7 @@ mod tests { let mut buf = Vec::new(); data.encode(&mut buf).unwrap(); - let row = AppendRowsRequest { - write_stream: pending_stream.name.to_string(), - offset: None, - trace_id: "".to_string(), - missing_value_interpretations: Default::default(), - default_missing_value_interpretation: 0, - rows: Some(Rows::ProtoRows(ProtoData { - writer_schema: Some(ProtoSchema { - proto_descriptor: Some(DescriptorProto { - name: Some("TestData".to_string()), - field: vec![FieldDescriptorProto { - name: Some("col_string".to_string()), - number: Some(1), - label: None, - r#type: Some(field_descriptor_proto::Type::String.into()), - type_name: None, - extendee: None, - default_value: None, - oneof_index: None, - json_name: None, - options: None, - proto3_optional: None, - }], - extension: vec![], - nested_type: vec![], - enum_type: vec![], - extension_range: vec![], - oneof_decl: vec![], - options: None, - reserved_range: vec![], - reserved_name: vec![], - }), - }), - rows: Some(ProtoRows { - serialized_rows: vec![buf], - }), - })), - }; + let row = create_append_rows_request(&pending_stream.name, buf); rows.push(row); } @@ -396,6 +400,24 @@ mod tests { .await .unwrap() .into_inner(); - tracing::info!("commit stream errors = {:?}", res.stream_errors.len()) + tracing::info!("commit stream errors = {:?}", res.stream_errors.len()); + + // Write via default stream + let data = TestData { + col_string: format!("default_stream"), + }; + let mut buf = Vec::new(); + data.encode(&mut buf).unwrap(); + let row = create_append_rows_request(&format!("{table}/streams/_default"), buf); + let request = Box::pin(async_stream::stream! { + for req in [row]{ + yield req; + } + }); + let mut response = client.append_rows(request).await.unwrap().into_inner(); + while let Some(res) = response.next().await { + let res = res.unwrap(); + tracing::info!("default append row errors = {:?}", res.row_errors.len()); + } } } diff --git a/bigquery/src/storage_write/default.rs b/bigquery/src/storage_write/default.rs new file mode 100644 index 00000000..c905a780 --- /dev/null +++ b/bigquery/src/storage_write/default.rs @@ -0,0 +1,35 @@ +use crate::grpc::apiv1::bigquery_client::{StreamingWriteClient}; +use crate::grpc::apiv1::conn_pool::ConnectionManager; +use google_cloud_gax::grpc::{Status, Streaming}; +use google_cloud_googleapis::cloud::bigquery::storage::v1::big_query_write_client::BigQueryWriteClient; +use google_cloud_googleapis::cloud::bigquery::storage::v1::{ + AppendRowsRequest, AppendRowsResponse, +}; +use std::sync::Arc; + +pub struct Writer { + conn: Arc, +} + +impl Writer { + pub(crate) fn new( conn: Arc) -> Self { + Self { + conn, + } + } + + //TODO use default stream name + pub async fn append_rows( + &mut self, + rows: Vec, + ) -> Result, Status> { + let mut client = StreamingWriteClient::new(BigQueryWriteClient::new(self.conn.conn())); + let request = Box::pin(async_stream::stream! { + for row in rows { + yield row; + } + }); + let response = client.append_rows(request).await?.into_inner(); + Ok(response) + } +} diff --git a/bigquery/src/storage_write/mod.rs b/bigquery/src/storage_write/mod.rs index 3e9f3208..69f1ee24 100644 --- a/bigquery/src/storage_write/mod.rs +++ b/bigquery/src/storage_write/mod.rs @@ -1 +1,2 @@ -pub mod pending; \ No newline at end of file +pub mod pending; +pub mod default; From c39795a50f790b247a3faea8d2f082b73dd9b7c5 Mon Sep 17 00:00:00 2001 From: yoshidan Date: Sat, 5 Oct 2024 22:36:45 +0900 Subject: [PATCH 06/23] add default stream --- bigquery/src/client.rs | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/bigquery/src/client.rs b/bigquery/src/client.rs index c744b742..851d727c 100644 --- a/bigquery/src/client.rs +++ b/bigquery/src/client.rs @@ -98,7 +98,7 @@ use crate::grpc::apiv1::bigquery_client::StreamingReadClient; #[cfg(feature = "auth")] pub use google_cloud_auth; use google_cloud_googleapis::cloud::bigquery::storage::v1::big_query_read_client::BigQueryReadClient; -use crate::storage_write::pending; +use crate::storage_write::{default, pending}; #[cfg(feature = "auth")] impl ClientConfig { @@ -249,6 +249,13 @@ impl Client { pending::Writer::new(table, self.streaming_client_conn_pool.clone()) } + /// Creates a new default batch writer. + /// Returns a `default::Writer` instance that can be used to write data to the specified table. + /// https://cloud.google.com/bigquery/docs/write-api#default_stream + pub fn default_batch_writer(&self) -> default::Writer { + default::Writer::new(self.streaming_client_conn_pool.clone()) + } + /// Run query job and get result. /// ```rust /// use google_cloud_bigquery::http::job::query::QueryRequest; From d17a8451276bde28fa5302a1580792bcaef4ece6 Mon Sep 17 00:00:00 2001 From: yoshidan Date: Sat, 5 Oct 2024 22:58:19 +0900 Subject: [PATCH 07/23] modify --- bigquery/src/client.rs | 4 +- bigquery/src/grpc/apiv1/bigquery_client.rs | 2 +- bigquery/src/storage_write/default.rs | 17 ++---- bigquery/src/storage_write/mod.rs | 67 +++++++++++++++++++++- bigquery/src/storage_write/pending.rs | 7 +-- 5 files changed, 76 insertions(+), 21 deletions(-) diff --git a/bigquery/src/client.rs b/bigquery/src/client.rs index 851d727c..f0e964dd 100644 --- a/bigquery/src/client.rs +++ b/bigquery/src/client.rs @@ -26,8 +26,8 @@ use crate::http::job::query::QueryRequest; use crate::http::job::{is_script, is_select_query, JobConfiguration, JobReference, JobStatistics, JobType}; use crate::http::table::TableReference; use crate::query::{QueryOption, QueryResult}; -use crate::{http, query}; use crate::storage; +use crate::{http, query}; const JOB_RETRY_REASONS: [&str; 3] = ["backendError", "rateLimitExceeded", "internalError"]; @@ -95,10 +95,10 @@ use crate::http::job::get::GetJobRequest; use crate::http::job::list::ListJobsRequest; use crate::grpc::apiv1::bigquery_client::StreamingReadClient; +use crate::storage_write::{default, pending}; #[cfg(feature = "auth")] pub use google_cloud_auth; use google_cloud_googleapis::cloud::bigquery::storage::v1::big_query_read_client::BigQueryReadClient; -use crate::storage_write::{default, pending}; #[cfg(feature = "auth")] impl ClientConfig { diff --git a/bigquery/src/grpc/apiv1/bigquery_client.rs b/bigquery/src/grpc/apiv1/bigquery_client.rs index af088cb6..b64b3b30 100644 --- a/bigquery/src/grpc/apiv1/bigquery_client.rs +++ b/bigquery/src/grpc/apiv1/bigquery_client.rs @@ -6,13 +6,13 @@ use google_cloud_gax::grpc::{Code, IntoStreamingRequest, Response, Status, Strea use google_cloud_gax::retry::{invoke_fn, RetrySetting}; use google_cloud_googleapis::cloud::bigquery::storage::v1::big_query_read_client::BigQueryReadClient; use google_cloud_googleapis::cloud::bigquery::storage::v1::big_query_write_client::BigQueryWriteClient; +use google_cloud_googleapis::cloud::bigquery::storage::v1::write_stream::Type; use google_cloud_googleapis::cloud::bigquery::storage::v1::{ AppendRowsRequest, AppendRowsResponse, BatchCommitWriteStreamsRequest, BatchCommitWriteStreamsResponse, CreateReadSessionRequest, CreateWriteStreamRequest, FinalizeWriteStreamRequest, FinalizeWriteStreamResponse, FlushRowsRequest, FlushRowsResponse, GetWriteStreamRequest, ReadRowsRequest, ReadRowsResponse, ReadSession, SplitReadStreamRequest, SplitReadStreamResponse, WriteStream, }; -use google_cloud_googleapis::cloud::bigquery::storage::v1::write_stream::Type; fn default_setting() -> RetrySetting { RetrySetting { diff --git a/bigquery/src/storage_write/default.rs b/bigquery/src/storage_write/default.rs index c905a780..c5e3c37d 100644 --- a/bigquery/src/storage_write/default.rs +++ b/bigquery/src/storage_write/default.rs @@ -1,10 +1,8 @@ -use crate::grpc::apiv1::bigquery_client::{StreamingWriteClient}; +use crate::grpc::apiv1::bigquery_client::StreamingWriteClient; use crate::grpc::apiv1::conn_pool::ConnectionManager; use google_cloud_gax::grpc::{Status, Streaming}; use google_cloud_googleapis::cloud::bigquery::storage::v1::big_query_write_client::BigQueryWriteClient; -use google_cloud_googleapis::cloud::bigquery::storage::v1::{ - AppendRowsRequest, AppendRowsResponse, -}; +use google_cloud_googleapis::cloud::bigquery::storage::v1::{AppendRowsRequest, AppendRowsResponse}; use std::sync::Arc; pub struct Writer { @@ -12,17 +10,12 @@ pub struct Writer { } impl Writer { - pub(crate) fn new( conn: Arc) -> Self { - Self { - conn, - } + pub(crate) fn new(conn: Arc) -> Self { + Self { conn } } //TODO use default stream name - pub async fn append_rows( - &mut self, - rows: Vec, - ) -> Result, Status> { + pub async fn append_rows(&mut self, rows: Vec) -> Result, Status> { let mut client = StreamingWriteClient::new(BigQueryWriteClient::new(self.conn.conn())); let request = Box::pin(async_stream::stream! { for row in rows { diff --git a/bigquery/src/storage_write/mod.rs b/bigquery/src/storage_write/mod.rs index 69f1ee24..58868505 100644 --- a/bigquery/src/storage_write/mod.rs +++ b/bigquery/src/storage_write/mod.rs @@ -1,2 +1,67 @@ -pub mod pending; +use google_cloud_googleapis::cloud::bigquery::storage::v1::append_rows_request::{ProtoData, Rows}; +use google_cloud_googleapis::cloud::bigquery::storage::v1::{AppendRowsRequest, ProtoRows, ProtoSchema}; +use prost_types::DescriptorProto; +use std::collections::HashMap; + pub mod default; +pub mod pending; + +pub struct AppendRowsRequestBuilder { + offset: Option, + trace_id: Option, + missing_value_interpretations: Option>, + default_missing_value_interpretation: Option, + data: Vec>, + schema: DescriptorProto, +} + +impl AppendRowsRequestBuilder { + pub fn new(schema: DescriptorProto, data: Vec>) -> Self { + Self { + offset: None, + trace_id: None, + missing_value_interpretations: None, + default_missing_value_interpretation: None, + data, + schema, + } + } + + pub fn with_offset(mut self, offset: i64) -> Self { + self.offset = Some(offset); + self + } + + pub fn with_trace_id(mut self, trace_id: String) -> Self { + self.trace_id = Some(trace_id); + self + } + + pub fn with_missing_value_interpretations(mut self, missing_value_interpretations: HashMap) -> Self { + self.missing_value_interpretations = Some(missing_value_interpretations); + self + } + + pub fn with_default_missing_value_interpretation(mut self, default_missing_value_interpretation: i32) -> Self { + self.default_missing_value_interpretation = Some(default_missing_value_interpretation); + self + } + + pub(crate) fn build(self, stream: &str) -> AppendRowsRequest { + AppendRowsRequest { + write_stream: stream.to_string(), + offset: self.offset, + trace_id: self.trace_id.unwrap_or_default(), + missing_value_interpretations: self.missing_value_interpretations.unwrap_or_default(), + default_missing_value_interpretation: self.default_missing_value_interpretation.unwrap_or(0), + rows: Some(Rows::ProtoRows(ProtoData { + writer_schema: Some(ProtoSchema { + proto_descriptor: Some(self.schema), + }), + rows: Some(ProtoRows { + serialized_rows: self.data, + }), + })), + } + } +} diff --git a/bigquery/src/storage_write/pending.rs b/bigquery/src/storage_write/pending.rs index f6a118e8..79ae430c 100644 --- a/bigquery/src/storage_write/pending.rs +++ b/bigquery/src/storage_write/pending.rs @@ -2,12 +2,12 @@ use crate::grpc::apiv1::bigquery_client::{create_write_stream_request, Streaming use crate::grpc::apiv1::conn_pool::ConnectionManager; use google_cloud_gax::grpc::{IntoStreamingRequest, Status, Streaming}; use google_cloud_googleapis::cloud::bigquery::storage::v1::big_query_write_client::BigQueryWriteClient; +use google_cloud_googleapis::cloud::bigquery::storage::v1::write_stream::Type::Pending; use google_cloud_googleapis::cloud::bigquery::storage::v1::{ AppendRowsRequest, AppendRowsResponse, BatchCommitWriteStreamsRequest, BatchCommitWriteStreamsResponse, CreateWriteStreamRequest, FinalizeWriteStreamRequest, WriteStream, }; use std::sync::Arc; -use google_cloud_googleapis::cloud::bigquery::storage::v1::write_stream::Type::Pending; pub struct Writer { table: String, @@ -63,10 +63,7 @@ impl PendingStream { } //TODO serialize values and get schema - pub async fn append_rows( - &mut self, - rows: Vec, - ) -> Result, Status> { + pub async fn append_rows(&mut self, rows: Vec) -> Result, Status> { let request = Box::pin(async_stream::stream! { for row in rows { yield row; From 9b655e4370d165b201af2ee86f1714bf49e8cd0c Mon Sep 17 00:00:00 2001 From: yoshidan Date: Tue, 8 Oct 2024 15:06:31 +0900 Subject: [PATCH 08/23] add commited and buffered --- bigquery/src/client.rs | 24 +++++-- bigquery/src/storage_write/buffered.rs | 87 +++++++++++++++++++++++++ bigquery/src/storage_write/committed.rs | 75 +++++++++++++++++++++ bigquery/src/storage_write/mod.rs | 2 + 4 files changed, 181 insertions(+), 7 deletions(-) create mode 100644 bigquery/src/storage_write/buffered.rs create mode 100644 bigquery/src/storage_write/committed.rs diff --git a/bigquery/src/client.rs b/bigquery/src/client.rs index f0e964dd..88aeb6fe 100644 --- a/bigquery/src/client.rs +++ b/bigquery/src/client.rs @@ -95,7 +95,7 @@ use crate::http::job::get::GetJobRequest; use crate::http::job::list::ListJobsRequest; use crate::grpc::apiv1::bigquery_client::StreamingReadClient; -use crate::storage_write::{default, pending}; +use crate::storage_write::{buffered, committed, default, pending}; #[cfg(feature = "auth")] pub use google_cloud_auth; use google_cloud_googleapis::cloud::bigquery::storage::v1::big_query_read_client::BigQueryReadClient; @@ -242,20 +242,30 @@ impl Client { &self.model_client } - /// Creates a new pending batch writer for the specified table. - /// Returns a `pending::Writer` instance that can be used to write batches of data to the specified table. + /// Creates a new pending type storage writer for the specified table. /// https://cloud.google.com/bigquery/docs/write-api#pending_type - pub fn pending_batch_writer(&self, table: String) -> pending::Writer { + pub fn pending_storage_writer(&self, table: String) -> pending::Writer { pending::Writer::new(table, self.streaming_client_conn_pool.clone()) } - /// Creates a new default batch writer. - /// Returns a `default::Writer` instance that can be used to write data to the specified table. + /// Creates a new default type storage writer. /// https://cloud.google.com/bigquery/docs/write-api#default_stream - pub fn default_batch_writer(&self) -> default::Writer { + pub fn default_storage_writer(&self) -> default::Writer { default::Writer::new(self.streaming_client_conn_pool.clone()) } + /// Creates a new committed type storage writer. + /// https://cloud.google.com/bigquery/docs/write-api#committed_type + pub fn committed_storage_writer(&self, table: String) -> committed::Writer { + committed::Writer::new(table, self.streaming_client_conn_pool.clone()) + } + + /// Creates a new buffered type storage writer. + /// https://cloud.google.com/bigquery/docs/write-api#buffered_type + pub fn buffered_storage_writer(&self, table: String) -> buffered::Writer { + buffered::Writer::new(table, self.streaming_client_conn_pool.clone()) + } + /// Run query job and get result. /// ```rust /// use google_cloud_bigquery::http::job::query::QueryRequest; diff --git a/bigquery/src/storage_write/buffered.rs b/bigquery/src/storage_write/buffered.rs new file mode 100644 index 00000000..9543c20f --- /dev/null +++ b/bigquery/src/storage_write/buffered.rs @@ -0,0 +1,87 @@ +use crate::grpc::apiv1::bigquery_client::{create_write_stream_request, StreamingWriteClient}; +use crate::grpc::apiv1::conn_pool::ConnectionManager; +use google_cloud_gax::grpc::{IntoStreamingRequest, Status, Streaming}; +use google_cloud_googleapis::cloud::bigquery::storage::v1::big_query_write_client::BigQueryWriteClient; +use google_cloud_googleapis::cloud::bigquery::storage::v1::write_stream::Type::{Buffered, Committed}; +use google_cloud_googleapis::cloud::bigquery::storage::v1::{AppendRowsRequest, AppendRowsResponse, BatchCommitWriteStreamsRequest, BatchCommitWriteStreamsResponse, CreateWriteStreamRequest, FinalizeWriteStreamRequest, FlushRowsRequest, WriteStream}; +use std::sync::Arc; + +pub struct Writer { + table: String, + conn: Arc, + streams: Vec, +} + +impl Writer { + pub(crate) fn new(table: String, conn: Arc) -> Self { + Self { + table, + conn, + streams: Vec::new(), + } + } + + pub async fn create_write_stream(&mut self) -> Result { + let mut client = StreamingWriteClient::new(BigQueryWriteClient::new(self.conn.conn())); + let res = client + .create_write_stream(create_write_stream_request(&self.table, Buffered), None) + .await? + .into_inner(); + + self.streams.push(res.name.clone()); + + Ok(PendingStream::new(res, client)) + } + +} + +pub struct PendingStream { + inner: WriteStream, + client: StreamingWriteClient, +} + +impl PendingStream { + pub(crate) fn new(inner: WriteStream, client: StreamingWriteClient) -> Self { + Self { inner, client } + } + + //TODO serialize values and get schema + pub async fn append_rows(&mut self, rows: Vec) -> Result, Status> { + let request = Box::pin(async_stream::stream! { + for row in rows { + yield row; + } + }); + let response = self.client.append_rows(request).await?.into_inner(); + Ok(response) + } + + pub async fn flush_rows(mut self) -> Result { + let res = self + .client + .flush_rows( + FlushRowsRequest{ + write_stream: self.inner.name.to_string(), + offset: None, + }, + None, + ) + .await? + .into_inner(); + Ok(res.offset) + } + + pub async fn finalize(mut self) -> Result { + let res = self + .client + .finalize_write_stream( + FinalizeWriteStreamRequest { + name: self.inner.name.to_string(), + }, + None, + ) + .await? + .into_inner(); + Ok(res.row_count) + } +} diff --git a/bigquery/src/storage_write/committed.rs b/bigquery/src/storage_write/committed.rs new file mode 100644 index 00000000..271d5d05 --- /dev/null +++ b/bigquery/src/storage_write/committed.rs @@ -0,0 +1,75 @@ +use crate::grpc::apiv1::bigquery_client::{create_write_stream_request, StreamingWriteClient}; +use crate::grpc::apiv1::conn_pool::ConnectionManager; +use google_cloud_gax::grpc::{IntoStreamingRequest, Status, Streaming}; +use google_cloud_googleapis::cloud::bigquery::storage::v1::big_query_write_client::BigQueryWriteClient; +use google_cloud_googleapis::cloud::bigquery::storage::v1::write_stream::Type::Committed; +use google_cloud_googleapis::cloud::bigquery::storage::v1::{ + AppendRowsRequest, AppendRowsResponse, BatchCommitWriteStreamsRequest, BatchCommitWriteStreamsResponse, + CreateWriteStreamRequest, FinalizeWriteStreamRequest, WriteStream, +}; +use std::sync::Arc; + +pub struct Writer { + table: String, + conn: Arc, + streams: Vec, +} + +impl Writer { + pub(crate) fn new(table: String, conn: Arc) -> Self { + Self { + table, + conn, + streams: Vec::new(), + } + } + + pub async fn create_write_stream(&mut self) -> Result { + let mut client = StreamingWriteClient::new(BigQueryWriteClient::new(self.conn.conn())); + let res = client + .create_write_stream(create_write_stream_request(&self.table, Committed), None) + .await? + .into_inner(); + + self.streams.push(res.name.clone()); + + Ok(PendingStream::new(res, client)) + } + +} + +pub struct PendingStream { + inner: WriteStream, + client: StreamingWriteClient, +} + +impl PendingStream { + pub(crate) fn new(inner: WriteStream, client: StreamingWriteClient) -> Self { + Self { inner, client } + } + + //TODO serialize values and get schema + pub async fn append_rows(&mut self, rows: Vec) -> Result, Status> { + let request = Box::pin(async_stream::stream! { + for row in rows { + yield row; + } + }); + let response = self.client.append_rows(request).await?.into_inner(); + Ok(response) + } + + pub async fn finalize(mut self) -> Result { + let res = self + .client + .finalize_write_stream( + FinalizeWriteStreamRequest { + name: self.inner.name.to_string(), + }, + None, + ) + .await? + .into_inner(); + Ok(res.row_count) + } +} diff --git a/bigquery/src/storage_write/mod.rs b/bigquery/src/storage_write/mod.rs index 58868505..c5629f1a 100644 --- a/bigquery/src/storage_write/mod.rs +++ b/bigquery/src/storage_write/mod.rs @@ -5,6 +5,8 @@ use std::collections::HashMap; pub mod default; pub mod pending; +pub mod committed; +pub mod buffered; pub struct AppendRowsRequestBuilder { offset: Option, From 89ec5fb4919e1f43b3b5224eb07213c8c838602f Mon Sep 17 00:00:00 2001 From: yoshidan Date: Wed, 9 Oct 2024 16:33:57 +0900 Subject: [PATCH 09/23] add flow_controller --- bigquery/src/storage_write/flow_controller.rs | 77 +++++++++++++++++++ bigquery/src/storage_write/mod.rs | 1 + 2 files changed, 78 insertions(+) create mode 100644 bigquery/src/storage_write/flow_controller.rs diff --git a/bigquery/src/storage_write/flow_controller.rs b/bigquery/src/storage_write/flow_controller.rs new file mode 100644 index 00000000..479713fd --- /dev/null +++ b/bigquery/src/storage_write/flow_controller.rs @@ -0,0 +1,77 @@ +use std::collections::HashMap; +use std::sync::{Arc, Mutex}; +use tokio::sync::{Semaphore, SemaphorePermit}; + +pub struct FlowController { + sem_insert_count: Semaphore + //TODO support sem_insert_bytes +} + +impl FlowController { + + pub fn new(max_insert_count: usize) -> Self { + FlowController { + sem_insert_count: Semaphore::new(max_insert_count) + } + } + pub async fn acquire(&self) -> SemaphorePermit { + self.sem_insert_count.acquire().await.unwrap() + } + +} + +pub enum Router { + /// key is writer.id + Simplex(Arc>>>), +} + +impl Router { + pub fn new_simplex() -> Self { + Router::Simplex(Arc::new(Mutex::new(HashMap::new()))) + } + + pub fn attach_writer(&self, writer_id: String, max_insert_count: usize) { + match self { + Router::Simplex(map) => { + let fc = Arc::new(FlowController::new(max_insert_count)); + let mut map = map.lock().unwrap(); + map.insert(writer_id, fc); + } + } + } + + pub fn pick(&self, writer_id: &str) -> Option> { + match self { + Router::Simplex(map) => { + let map = map.lock().unwrap(); + map.get(writer_id).map(|c| c.clone()) + } + } + } +} + +pub struct Pool { + pub location: String, + pub router: Router, + pub max_insert_count: usize +} + +impl Pool { + pub fn new(location: String, max_insert_count: usize) -> Self { + Pool { + location, + //TODO support shared router + router: Router::new_simplex(), + max_insert_count + } + } + + pub fn attach_writer(&self, writer_id: String) { + self.router.attach_writer(writer_id, self.max_insert_count); + } + + pub fn pick(&self, writer_id: &str) -> Option> { + self.router.pick(writer_id) + } + +} \ No newline at end of file diff --git a/bigquery/src/storage_write/mod.rs b/bigquery/src/storage_write/mod.rs index c5629f1a..903fc9bd 100644 --- a/bigquery/src/storage_write/mod.rs +++ b/bigquery/src/storage_write/mod.rs @@ -7,6 +7,7 @@ pub mod default; pub mod pending; pub mod committed; pub mod buffered; +mod flow_controller; pub struct AppendRowsRequestBuilder { offset: Option, From 68740e4c1fcee668bfa751e275e004de3b22c4ad Mon Sep 17 00:00:00 2001 From: yoshidan Date: Wed, 9 Oct 2024 16:53:23 +0900 Subject: [PATCH 10/23] add pool management --- bigquery/src/storage_write/flow_controller.rs | 63 +++++++++++++++---- 1 file changed, 50 insertions(+), 13 deletions(-) diff --git a/bigquery/src/storage_write/flow_controller.rs b/bigquery/src/storage_write/flow_controller.rs index 479713fd..1cab3932 100644 --- a/bigquery/src/storage_write/flow_controller.rs +++ b/bigquery/src/storage_write/flow_controller.rs @@ -1,6 +1,11 @@ use std::collections::HashMap; use std::sync::{Arc, Mutex}; use tokio::sync::{Semaphore, SemaphorePermit}; +use google_cloud_gax::grpc::{IntoStreamingRequest, Status, Streaming}; +use google_cloud_googleapis::cloud::bigquery::storage::v1::{AppendRowsRequest, AppendRowsResponse}; +use google_cloud_googleapis::cloud::bigquery::storage::v1::big_query_write_client::BigQueryWriteClient; +use crate::grpc::apiv1::bigquery_client::StreamingWriteClient; +use crate::grpc::apiv1::conn_pool::ConnectionManager; pub struct FlowController { sem_insert_count: Semaphore @@ -20,9 +25,32 @@ impl FlowController { } +pub struct Connection { + fc: FlowController, + grpc_conn_pool: Arc +} + +impl Connection { + pub fn new(fc: FlowController, grpc_conn_pool: Arc) -> Self { + Connection { + fc, + grpc_conn_pool + } + } + + pub async fn locking_append(&self, req: impl IntoStreamingRequest) -> Result, Status> { + let permit = self.fc.acquire().await; + let mut client = StreamingWriteClient::new(BigQueryWriteClient::new(self.grpc_conn_pool.conn())); + let result = client.append_rows(req).await?.into_inner(); + drop(permit); + Ok(result) + } +} + pub enum Router { /// key is writer.id - Simplex(Arc>>>), + Simplex(Arc>>>), + //TODO support shared router } impl Router { @@ -30,17 +58,27 @@ impl Router { Router::Simplex(Arc::new(Mutex::new(HashMap::new()))) } - pub fn attach_writer(&self, writer_id: String, max_insert_count: usize) { + pub fn attach_writer(&self, writer_id: String, max_insert_count: usize, grpc_conn_pool: Arc) { + match self { + Router::Simplex(map) => { + let fc = FlowController::new(max_insert_count); + let conn = Arc::new(Connection::new(fc, grpc_conn_pool)); + let mut map = map.lock().unwrap(); + map.insert(writer_id, conn); + } + } + } + + pub fn remove_writer(&self, writer_id: &str) { match self { Router::Simplex(map) => { - let fc = Arc::new(FlowController::new(max_insert_count)); let mut map = map.lock().unwrap(); - map.insert(writer_id, fc); + map.remove(writer_id); } } } - pub fn pick(&self, writer_id: &str) -> Option> { + pub fn pick(&self, writer_id: &str) -> Option> { match self { Router::Simplex(map) => { let map = map.lock().unwrap(); @@ -51,26 +89,25 @@ impl Router { } pub struct Pool { - pub location: String, pub router: Router, - pub max_insert_count: usize + pub max_insert_count: usize, + pub conn_pool: Arc } impl Pool { - pub fn new(location: String, max_insert_count: usize) -> Self { + pub fn new(max_insert_count: usize, conn_pool: Arc) -> Self { Pool { - location, - //TODO support shared router router: Router::new_simplex(), - max_insert_count + max_insert_count, + conn_pool } } pub fn attach_writer(&self, writer_id: String) { - self.router.attach_writer(writer_id, self.max_insert_count); + self.router.attach_writer(writer_id, self.max_insert_count, self.conn_pool.clone()); } - pub fn pick(&self, writer_id: &str) -> Option> { + pub fn pick(&self, writer_id: &str) -> Option> { self.router.pick(writer_id) } From 159ecb70d8103c085ad59fbc3b6888df0618e310 Mon Sep 17 00:00:00 2001 From: yoshidan Date: Thu, 10 Oct 2024 22:31:31 +0900 Subject: [PATCH 11/23] refactor --- bigquery/src/client.rs | 4 +- bigquery/src/storage_write/committed.rs | 75 ------------------- bigquery/src/storage_write/connection.rs | 31 ++++++++ bigquery/src/storage_write/flow.rs | 19 +++++ bigquery/src/storage_write/mod.rs | 21 ++++-- .../{flow_controller.rs => pool.rs} | 57 ++------------ .../storage_write/{ => stream}/buffered.rs | 53 +++++-------- .../src/storage_write/stream/committed.rs | 55 ++++++++++++++ .../src/storage_write/{ => stream}/default.rs | 0 bigquery/src/storage_write/stream/mod.rs | 49 ++++++++++++ .../src/storage_write/{ => stream}/pending.rs | 41 +++------- 11 files changed, 210 insertions(+), 195 deletions(-) delete mode 100644 bigquery/src/storage_write/committed.rs create mode 100644 bigquery/src/storage_write/connection.rs create mode 100644 bigquery/src/storage_write/flow.rs rename bigquery/src/storage_write/{flow_controller.rs => pool.rs} (51%) rename bigquery/src/storage_write/{ => stream}/buffered.rs (56%) create mode 100644 bigquery/src/storage_write/stream/committed.rs rename bigquery/src/storage_write/{ => stream}/default.rs (100%) create mode 100644 bigquery/src/storage_write/stream/mod.rs rename bigquery/src/storage_write/{ => stream}/pending.rs (63%) diff --git a/bigquery/src/client.rs b/bigquery/src/client.rs index 88aeb6fe..c191f154 100644 --- a/bigquery/src/client.rs +++ b/bigquery/src/client.rs @@ -8,7 +8,7 @@ use std::sync::Arc; use google_cloud_gax::conn::{ConnectionOptions, Environment}; use google_cloud_gax::retry::RetrySetting; use google_cloud_googleapis::cloud::bigquery::storage::v1::{ - read_session, CreateReadSessionRequest, DataFormat, ReadSession, + CreateReadSessionRequest, DataFormat, read_session, ReadSession, }; use google_cloud_token::TokenSourceProvider; @@ -95,10 +95,10 @@ use crate::http::job::get::GetJobRequest; use crate::http::job::list::ListJobsRequest; use crate::grpc::apiv1::bigquery_client::StreamingReadClient; -use crate::storage_write::{buffered, committed, default, pending}; #[cfg(feature = "auth")] pub use google_cloud_auth; use google_cloud_googleapis::cloud::bigquery::storage::v1::big_query_read_client::BigQueryReadClient; +use crate::storage_write::stream::{buffered, committed, default, pending}; #[cfg(feature = "auth")] impl ClientConfig { diff --git a/bigquery/src/storage_write/committed.rs b/bigquery/src/storage_write/committed.rs deleted file mode 100644 index 271d5d05..00000000 --- a/bigquery/src/storage_write/committed.rs +++ /dev/null @@ -1,75 +0,0 @@ -use crate::grpc::apiv1::bigquery_client::{create_write_stream_request, StreamingWriteClient}; -use crate::grpc::apiv1::conn_pool::ConnectionManager; -use google_cloud_gax::grpc::{IntoStreamingRequest, Status, Streaming}; -use google_cloud_googleapis::cloud::bigquery::storage::v1::big_query_write_client::BigQueryWriteClient; -use google_cloud_googleapis::cloud::bigquery::storage::v1::write_stream::Type::Committed; -use google_cloud_googleapis::cloud::bigquery::storage::v1::{ - AppendRowsRequest, AppendRowsResponse, BatchCommitWriteStreamsRequest, BatchCommitWriteStreamsResponse, - CreateWriteStreamRequest, FinalizeWriteStreamRequest, WriteStream, -}; -use std::sync::Arc; - -pub struct Writer { - table: String, - conn: Arc, - streams: Vec, -} - -impl Writer { - pub(crate) fn new(table: String, conn: Arc) -> Self { - Self { - table, - conn, - streams: Vec::new(), - } - } - - pub async fn create_write_stream(&mut self) -> Result { - let mut client = StreamingWriteClient::new(BigQueryWriteClient::new(self.conn.conn())); - let res = client - .create_write_stream(create_write_stream_request(&self.table, Committed), None) - .await? - .into_inner(); - - self.streams.push(res.name.clone()); - - Ok(PendingStream::new(res, client)) - } - -} - -pub struct PendingStream { - inner: WriteStream, - client: StreamingWriteClient, -} - -impl PendingStream { - pub(crate) fn new(inner: WriteStream, client: StreamingWriteClient) -> Self { - Self { inner, client } - } - - //TODO serialize values and get schema - pub async fn append_rows(&mut self, rows: Vec) -> Result, Status> { - let request = Box::pin(async_stream::stream! { - for row in rows { - yield row; - } - }); - let response = self.client.append_rows(request).await?.into_inner(); - Ok(response) - } - - pub async fn finalize(mut self) -> Result { - let res = self - .client - .finalize_write_stream( - FinalizeWriteStreamRequest { - name: self.inner.name.to_string(), - }, - None, - ) - .await? - .into_inner(); - Ok(res.row_count) - } -} diff --git a/bigquery/src/storage_write/connection.rs b/bigquery/src/storage_write/connection.rs new file mode 100644 index 00000000..9087c0a5 --- /dev/null +++ b/bigquery/src/storage_write/connection.rs @@ -0,0 +1,31 @@ +use std::collections::HashMap; +use std::sync::{Arc, Mutex}; +use tokio::sync::{Semaphore, SemaphorePermit}; +use google_cloud_gax::grpc::{IntoStreamingRequest, Status, Streaming}; +use google_cloud_googleapis::cloud::bigquery::storage::v1::{AppendRowsRequest, AppendRowsResponse}; +use google_cloud_googleapis::cloud::bigquery::storage::v1::big_query_write_client::BigQueryWriteClient; +use crate::grpc::apiv1::bigquery_client::StreamingWriteClient; +use crate::grpc::apiv1::conn_pool::ConnectionManager; +use crate::storage_write::flow::FlowController; + +pub struct Connection { + fc: FlowController, + grpc_conn_pool: Arc +} + +impl Connection { + pub fn new(fc: FlowController, grpc_conn_pool: Arc) -> Self { + Connection { + fc, + grpc_conn_pool + } + } + + pub async fn locking_append(&self, req: impl IntoStreamingRequest) -> Result, Status> { + let permit = self.fc.acquire().await; + let mut client = StreamingWriteClient::new(BigQueryWriteClient::new(self.grpc_conn_pool.conn())); + let result = client.append_rows(req).await?.into_inner(); + drop(permit); + Ok(result) + } +} \ No newline at end of file diff --git a/bigquery/src/storage_write/flow.rs b/bigquery/src/storage_write/flow.rs new file mode 100644 index 00000000..716c52e6 --- /dev/null +++ b/bigquery/src/storage_write/flow.rs @@ -0,0 +1,19 @@ +use tokio::sync::{Semaphore, SemaphorePermit}; + +pub struct FlowController { + sem_insert_count: Semaphore + //TODO support sem_insert_bytes +} + +impl FlowController { + + pub fn new(max_insert_count: usize) -> Self { + FlowController { + sem_insert_count: Semaphore::new(max_insert_count) + } + } + pub async fn acquire(&self) -> SemaphorePermit { + self.sem_insert_count.acquire().await.unwrap() + } + +} \ No newline at end of file diff --git a/bigquery/src/storage_write/mod.rs b/bigquery/src/storage_write/mod.rs index 903fc9bd..7d5b7445 100644 --- a/bigquery/src/storage_write/mod.rs +++ b/bigquery/src/storage_write/mod.rs @@ -2,12 +2,15 @@ use google_cloud_googleapis::cloud::bigquery::storage::v1::append_rows_request:: use google_cloud_googleapis::cloud::bigquery::storage::v1::{AppendRowsRequest, ProtoRows, ProtoSchema}; use prost_types::DescriptorProto; use std::collections::HashMap; +use google_cloud_gax::grpc::codegen::tokio_stream::Stream; -pub mod default; -pub mod pending; -pub mod committed; -pub mod buffered; -mod flow_controller; +mod pool; + +pub mod connection; + +mod flow; + +pub mod stream; pub struct AppendRowsRequestBuilder { offset: Option, @@ -68,3 +71,11 @@ impl AppendRowsRequestBuilder { } } } + +pub fn into_streaming_request(rows: Vec) -> impl Stream{ + async_stream::stream! { + for row in rows { + yield row; + } + } +} \ No newline at end of file diff --git a/bigquery/src/storage_write/flow_controller.rs b/bigquery/src/storage_write/pool.rs similarity index 51% rename from bigquery/src/storage_write/flow_controller.rs rename to bigquery/src/storage_write/pool.rs index 1cab3932..188b2311 100644 --- a/bigquery/src/storage_write/flow_controller.rs +++ b/bigquery/src/storage_write/pool.rs @@ -1,55 +1,12 @@ use std::collections::HashMap; use std::sync::{Arc, Mutex}; -use tokio::sync::{Semaphore, SemaphorePermit}; -use google_cloud_gax::grpc::{IntoStreamingRequest, Status, Streaming}; -use google_cloud_googleapis::cloud::bigquery::storage::v1::{AppendRowsRequest, AppendRowsResponse}; -use google_cloud_googleapis::cloud::bigquery::storage::v1::big_query_write_client::BigQueryWriteClient; -use crate::grpc::apiv1::bigquery_client::StreamingWriteClient; use crate::grpc::apiv1::conn_pool::ConnectionManager; +use crate::storage_write::connection::Connection; +use crate::storage_write::flow::FlowController; -pub struct FlowController { - sem_insert_count: Semaphore - //TODO support sem_insert_bytes -} - -impl FlowController { - - pub fn new(max_insert_count: usize) -> Self { - FlowController { - sem_insert_count: Semaphore::new(max_insert_count) - } - } - pub async fn acquire(&self) -> SemaphorePermit { - self.sem_insert_count.acquire().await.unwrap() - } - -} - -pub struct Connection { - fc: FlowController, - grpc_conn_pool: Arc -} - -impl Connection { - pub fn new(fc: FlowController, grpc_conn_pool: Arc) -> Self { - Connection { - fc, - grpc_conn_pool - } - } - - pub async fn locking_append(&self, req: impl IntoStreamingRequest) -> Result, Status> { - let permit = self.fc.acquire().await; - let mut client = StreamingWriteClient::new(BigQueryWriteClient::new(self.grpc_conn_pool.conn())); - let result = client.append_rows(req).await?.into_inner(); - drop(permit); - Ok(result) - } -} - -pub enum Router { +enum Router { /// key is writer.id - Simplex(Arc>>>), + Simplex(Arc>>>) //TODO support shared router } @@ -89,9 +46,9 @@ impl Router { } pub struct Pool { - pub router: Router, - pub max_insert_count: usize, - pub conn_pool: Arc + router: Router, + max_insert_count: usize, + conn_pool: Arc } impl Pool { diff --git a/bigquery/src/storage_write/buffered.rs b/bigquery/src/storage_write/stream/buffered.rs similarity index 56% rename from bigquery/src/storage_write/buffered.rs rename to bigquery/src/storage_write/stream/buffered.rs index 9543c20f..fed215ab 100644 --- a/bigquery/src/storage_write/buffered.rs +++ b/bigquery/src/storage_write/stream/buffered.rs @@ -5,6 +5,7 @@ use google_cloud_googleapis::cloud::bigquery::storage::v1::big_query_write_clien use google_cloud_googleapis::cloud::bigquery::storage::v1::write_stream::Type::{Buffered, Committed}; use google_cloud_googleapis::cloud::bigquery::storage::v1::{AppendRowsRequest, AppendRowsResponse, BatchCommitWriteStreamsRequest, BatchCommitWriteStreamsResponse, CreateWriteStreamRequest, FinalizeWriteStreamRequest, FlushRowsRequest, WriteStream}; use std::sync::Arc; +use crate::storage_write::stream::{AsStream, DisposableStream, ManagedStream, Stream}; pub struct Writer { table: String, @@ -21,7 +22,7 @@ impl Writer { } } - pub async fn create_write_stream(&mut self) -> Result { + pub async fn create_write_stream(&mut self) -> Result { let mut client = StreamingWriteClient::new(BigQueryWriteClient::new(self.conn.conn())); let res = client .create_write_stream(create_write_stream_request(&self.table, Buffered), None) @@ -30,38 +31,37 @@ impl Writer { self.streams.push(res.name.clone()); - Ok(PendingStream::new(res, client)) + Ok(BufferedStream::new(Stream::new(res, client))) } } -pub struct PendingStream { - inner: WriteStream, - client: StreamingWriteClient, +pub struct BufferedStream { + inner: Stream } -impl PendingStream { - pub(crate) fn new(inner: WriteStream, client: StreamingWriteClient) -> Self { - Self { inner, client } +impl BufferedStream { + pub(crate) fn new(inner: Stream) -> Self { + Self { inner } } +} - //TODO serialize values and get schema - pub async fn append_rows(&mut self, rows: Vec) -> Result, Status> { - let request = Box::pin(async_stream::stream! { - for row in rows { - yield row; - } - }); - let response = self.client.append_rows(request).await?.into_inner(); - Ok(response) +impl AsStream for BufferedStream { + fn as_mut(&mut self) -> &mut Stream { + &mut self.inner } +} +impl ManagedStream for BufferedStream {} +impl DisposableStream for BufferedStream {} + +impl BufferedStream { pub async fn flush_rows(mut self) -> Result { - let res = self - .client + let stream = self.as_mut(); + let res = stream.client .flush_rows( FlushRowsRequest{ - write_stream: self.inner.name.to_string(), + write_stream: stream.inner.name.to_string(), offset: None, }, None, @@ -71,17 +71,4 @@ impl PendingStream { Ok(res.offset) } - pub async fn finalize(mut self) -> Result { - let res = self - .client - .finalize_write_stream( - FinalizeWriteStreamRequest { - name: self.inner.name.to_string(), - }, - None, - ) - .await? - .into_inner(); - Ok(res.row_count) - } } diff --git a/bigquery/src/storage_write/stream/committed.rs b/bigquery/src/storage_write/stream/committed.rs new file mode 100644 index 00000000..e7823f45 --- /dev/null +++ b/bigquery/src/storage_write/stream/committed.rs @@ -0,0 +1,55 @@ +use crate::grpc::apiv1::bigquery_client::{create_write_stream_request, StreamingWriteClient}; +use crate::grpc::apiv1::conn_pool::ConnectionManager; +use google_cloud_gax::grpc::{IntoStreamingRequest, Status, Streaming}; +use google_cloud_googleapis::cloud::bigquery::storage::v1::big_query_write_client::BigQueryWriteClient; +use google_cloud_googleapis::cloud::bigquery::storage::v1::write_stream::Type::Committed; +use std::sync::Arc; +use crate::storage_write::stream::{AsStream, DisposableStream, ManagedStream, Stream}; + +pub struct Writer { + table: String, + conn: Arc, + streams: Vec, +} + +impl Writer { + pub(crate) fn new(table: String, conn: Arc) -> Self { + Self { + table, + conn, + streams: Vec::new(), + } + } + + pub async fn create_write_stream(&mut self) -> Result { + let mut client = StreamingWriteClient::new(BigQueryWriteClient::new(self.conn.conn())); + let res = client + .create_write_stream(create_write_stream_request(&self.table, Committed), None) + .await? + .into_inner(); + + self.streams.push(res.name.clone()); + + Ok(CommittedStream::new(Stream::new(res, client))) + } + +} + +pub struct CommittedStream { + inner: Stream +} + +impl CommittedStream { + pub(crate) fn new(inner: Stream) -> Self { + Self { inner } + } + +} + +impl AsStream for CommittedStream { + fn as_mut(&mut self) -> &mut Stream { + &mut self.inner + } +} +impl ManagedStream for CommittedStream {} +impl DisposableStream for CommittedStream {} diff --git a/bigquery/src/storage_write/default.rs b/bigquery/src/storage_write/stream/default.rs similarity index 100% rename from bigquery/src/storage_write/default.rs rename to bigquery/src/storage_write/stream/default.rs diff --git a/bigquery/src/storage_write/stream/mod.rs b/bigquery/src/storage_write/stream/mod.rs new file mode 100644 index 00000000..425260e7 --- /dev/null +++ b/bigquery/src/storage_write/stream/mod.rs @@ -0,0 +1,49 @@ +use google_cloud_gax::grpc::{Status, Streaming}; +use google_cloud_googleapis::cloud::bigquery::storage::v1::{AppendRowsRequest, AppendRowsResponse, FinalizeWriteStreamRequest, WriteStream}; +use crate::grpc::apiv1::bigquery_client::StreamingWriteClient; +use crate::storage_write::into_streaming_request; + +pub mod default; +pub mod pending; +pub mod committed; +pub mod buffered; + +pub(crate) struct Stream { + pub(crate) inner: WriteStream, + pub(crate) client: StreamingWriteClient, +} + +impl Stream { + pub(crate) fn new(inner: WriteStream, client: StreamingWriteClient) -> Self { + Self { inner, client } + } +} + +pub(crate) trait AsStream : Sized { + fn as_mut(&mut self) -> &mut Stream; +} + +pub trait ManagedStream : AsStream { + async fn append_rows(&mut self, rows: Vec) -> Result, Status> { + let response = self.as_mut().client.append_rows(into_streaming_request(rows)).await?.into_inner(); + Ok(response) + } + +} + +pub trait DisposableStream : ManagedStream { + async fn finalize(mut self) -> Result { + let stream = self.as_mut(); + let res = stream + .client + .finalize_write_stream( + FinalizeWriteStreamRequest { + name: stream.inner.name.to_string(), + }, + None, + ) + .await? + .into_inner(); + Ok(res.row_count) + } +} \ No newline at end of file diff --git a/bigquery/src/storage_write/pending.rs b/bigquery/src/storage_write/stream/pending.rs similarity index 63% rename from bigquery/src/storage_write/pending.rs rename to bigquery/src/storage_write/stream/pending.rs index 79ae430c..eab95c4c 100644 --- a/bigquery/src/storage_write/pending.rs +++ b/bigquery/src/storage_write/stream/pending.rs @@ -5,9 +5,9 @@ use google_cloud_googleapis::cloud::bigquery::storage::v1::big_query_write_clien use google_cloud_googleapis::cloud::bigquery::storage::v1::write_stream::Type::Pending; use google_cloud_googleapis::cloud::bigquery::storage::v1::{ AppendRowsRequest, AppendRowsResponse, BatchCommitWriteStreamsRequest, BatchCommitWriteStreamsResponse, - CreateWriteStreamRequest, FinalizeWriteStreamRequest, WriteStream, }; use std::sync::Arc; +use crate::storage_write::stream::{AsStream, DisposableStream, ManagedStream, Stream}; pub struct Writer { table: String, @@ -33,7 +33,7 @@ impl Writer { self.streams.push(res.name.clone()); - Ok(PendingStream::new(res, client)) + Ok(PendingStream::new(Stream::new(res, client))) } pub async fn commit(self) -> Result { @@ -51,39 +51,20 @@ impl Writer { Ok(result) } } - pub struct PendingStream { - inner: WriteStream, - client: StreamingWriteClient, + inner: Stream } impl PendingStream { - pub(crate) fn new(inner: WriteStream, client: StreamingWriteClient) -> Self { - Self { inner, client } - } - - //TODO serialize values and get schema - pub async fn append_rows(&mut self, rows: Vec) -> Result, Status> { - let request = Box::pin(async_stream::stream! { - for row in rows { - yield row; - } - }); - let response = self.client.append_rows(request).await?.into_inner(); - Ok(response) + pub(crate) fn new(inner: Stream) -> Self { + Self { inner } } +} - pub async fn finalize(mut self) -> Result { - let res = self - .client - .finalize_write_stream( - FinalizeWriteStreamRequest { - name: self.inner.name.to_string(), - }, - None, - ) - .await? - .into_inner(); - Ok(res.row_count) +impl AsStream for PendingStream { + fn as_mut(&mut self) -> &mut Stream { + &mut self.inner } } +impl ManagedStream for PendingStream {} +impl DisposableStream for PendingStream {} \ No newline at end of file From 8721f17f663401de695ec719fef2975c06a912c8 Mon Sep 17 00:00:00 2001 From: yoshidan Date: Fri, 11 Oct 2024 20:29:06 +0900 Subject: [PATCH 12/23] refactor --- bigquery/src/storage_write/pool.rs | 67 +++++++++++++++++-- bigquery/src/storage_write/stream/buffered.rs | 29 +++----- .../src/storage_write/stream/committed.rs | 31 ++++----- bigquery/src/storage_write/stream/default.rs | 47 +++++++++---- bigquery/src/storage_write/stream/mod.rs | 26 ++++--- bigquery/src/storage_write/stream/pending.rs | 30 ++++----- 6 files changed, 149 insertions(+), 81 deletions(-) diff --git a/bigquery/src/storage_write/pool.rs b/bigquery/src/storage_write/pool.rs index 188b2311..42066b70 100644 --- a/bigquery/src/storage_write/pool.rs +++ b/bigquery/src/storage_write/pool.rs @@ -1,5 +1,10 @@ use std::collections::HashMap; use std::sync::{Arc, Mutex}; +use google_cloud_gax::grpc::Status; +use google_cloud_googleapis::cloud::bigquery::storage::v1::big_query_write_client::BigQueryWriteClient; +use google_cloud_googleapis::cloud::bigquery::storage::v1::write_stream::{Type, WriteMode}; +use google_cloud_googleapis::cloud::bigquery::storage::v1::{GetWriteStreamRequest, WriteStream}; +use crate::grpc::apiv1::bigquery_client::{create_write_stream_request, StreamingWriteClient}; use crate::grpc::apiv1::conn_pool::ConnectionManager; use crate::storage_write::connection::Connection; use crate::storage_write::flow::FlowController; @@ -21,7 +26,9 @@ impl Router { let fc = FlowController::new(max_insert_count); let conn = Arc::new(Connection::new(fc, grpc_conn_pool)); let mut map = map.lock().unwrap(); - map.insert(writer_id, conn); + if !map.contains_key(&writer_id) { + map.insert(writer_id, conn); + } } } } @@ -45,15 +52,15 @@ impl Router { } } -pub struct Pool { +pub struct Connections { router: Router, max_insert_count: usize, conn_pool: Arc } -impl Pool { +impl Connections { pub fn new(max_insert_count: usize, conn_pool: Arc) -> Self { - Pool { + Self { router: Router::new_simplex(), max_insert_count, conn_pool @@ -68,4 +75,56 @@ impl Pool { self.router.pick(writer_id) } +} + +pub(crate) struct Pool { + /// key = location + cons: HashMap, + max_insert_count: usize, + p_cons: Arc +} + +impl Pool { + pub fn new(max_insert_count: usize, p_cons: Arc) -> Self { + Self { + cons: HashMap::new(), + max_insert_count, + p_cons + } + } + + pub async fn create_stream(&mut self, table: &str, mode: Type) -> Result { + let mut client = self.client(); + let req = create_write_stream_request(table, mode); + let stream = client.create_write_stream(req, None).await?.into_inner(); + self.regional(&stream.location).attach_writer(stream.name.clone()); + Ok(stream) + } + + pub async fn get_stream(&mut self, name: &str) -> Result { + let mut client = self.client(); + let req = GetWriteStreamRequest { + name: name.to_string(), + view: 0, + }; + let stream = client.get_write_stream(req, None).await?.into_inner(); + self.regional(&stream.location).attach_writer(stream.name.clone()); + Ok(stream) + } + + pub fn client(&self) -> StreamingWriteClient { + StreamingWriteClient::new(BigQueryWriteClient::new(self.p_cons.conn())) + } + + pub fn regional(&mut self, location: &str) -> &Connections { + let cons = self.cons.get(location); + match cons { + Some(pool) => pool, + None => { + let cons = Connections::new(self.max_insert_count, self.p_cons.clone()); + self.cons.insert(location.to_string(), cons); + self.cons.get(location).unwrap() + } + } + } } \ No newline at end of file diff --git a/bigquery/src/storage_write/stream/buffered.rs b/bigquery/src/storage_write/stream/buffered.rs index fed215ab..191b925b 100644 --- a/bigquery/src/storage_write/stream/buffered.rs +++ b/bigquery/src/storage_write/stream/buffered.rs @@ -1,37 +1,28 @@ use crate::grpc::apiv1::bigquery_client::{create_write_stream_request, StreamingWriteClient}; use crate::grpc::apiv1::conn_pool::ConnectionManager; use google_cloud_gax::grpc::{IntoStreamingRequest, Status, Streaming}; -use google_cloud_googleapis::cloud::bigquery::storage::v1::big_query_write_client::BigQueryWriteClient; use google_cloud_googleapis::cloud::bigquery::storage::v1::write_stream::Type::{Buffered, Committed}; use google_cloud_googleapis::cloud::bigquery::storage::v1::{AppendRowsRequest, AppendRowsResponse, BatchCommitWriteStreamsRequest, BatchCommitWriteStreamsResponse, CreateWriteStreamRequest, FinalizeWriteStreamRequest, FlushRowsRequest, WriteStream}; use std::sync::Arc; +use crate::storage_write::pool::{Pool}; use crate::storage_write::stream::{AsStream, DisposableStream, ManagedStream, Stream}; pub struct Writer { - table: String, - conn: Arc, - streams: Vec, + cons: Arc, + p_cons: Arc, } impl Writer { - pub(crate) fn new(table: String, conn: Arc) -> Self { + pub(crate) fn new(cons: Arc, p_cons: Arc) -> Self { Self { - table, - conn, - streams: Vec::new(), + cons, + p_cons, } } - pub async fn create_write_stream(&mut self) -> Result { - let mut client = StreamingWriteClient::new(BigQueryWriteClient::new(self.conn.conn())); - let res = client - .create_write_stream(create_write_stream_request(&self.table, Buffered), None) - .await? - .into_inner(); - - self.streams.push(res.name.clone()); - - Ok(BufferedStream::new(Stream::new(res, client))) + pub async fn create_write_stream(&mut self, table: &str) -> Result { + let stream = self.cons.create_stream(table, Buffered).await?; + Ok(BufferedStream::new(Stream::new(stream, self.cons.clone()))) } } @@ -58,7 +49,7 @@ impl BufferedStream { pub async fn flush_rows(mut self) -> Result { let stream = self.as_mut(); - let res = stream.client + let res = stream.cons.client() .flush_rows( FlushRowsRequest{ write_stream: stream.inner.name.to_string(), diff --git a/bigquery/src/storage_write/stream/committed.rs b/bigquery/src/storage_write/stream/committed.rs index e7823f45..9ec8b05a 100644 --- a/bigquery/src/storage_write/stream/committed.rs +++ b/bigquery/src/storage_write/stream/committed.rs @@ -2,35 +2,28 @@ use crate::grpc::apiv1::bigquery_client::{create_write_stream_request, Streaming use crate::grpc::apiv1::conn_pool::ConnectionManager; use google_cloud_gax::grpc::{IntoStreamingRequest, Status, Streaming}; use google_cloud_googleapis::cloud::bigquery::storage::v1::big_query_write_client::BigQueryWriteClient; -use google_cloud_googleapis::cloud::bigquery::storage::v1::write_stream::Type::Committed; +use google_cloud_googleapis::cloud::bigquery::storage::v1::write_stream::Type::{Buffered, Committed}; use std::sync::Arc; -use crate::storage_write::stream::{AsStream, DisposableStream, ManagedStream, Stream}; +use crate::storage_write::pool::Pool; +use crate::storage_write::stream::{create_write_stream, AsStream, DisposableStream, ManagedStream, Stream}; +use crate::storage_write::stream::buffered::BufferedStream; pub struct Writer { - table: String, - conn: Arc, - streams: Vec, + cons: Arc, + p_cons: Arc, } impl Writer { - pub(crate) fn new(table: String, conn: Arc) -> Self { + pub(crate) fn new(cons: Arc, p_cons: Arc) -> Self { Self { - table, - conn, - streams: Vec::new(), + cons, + p_cons, } } - pub async fn create_write_stream(&mut self) -> Result { - let mut client = StreamingWriteClient::new(BigQueryWriteClient::new(self.conn.conn())); - let res = client - .create_write_stream(create_write_stream_request(&self.table, Committed), None) - .await? - .into_inner(); - - self.streams.push(res.name.clone()); - - Ok(CommittedStream::new(Stream::new(res, client))) + pub async fn create_write_stream(&mut self, table: &str) -> Result { + let stream = self.cons.create_stream(table, Committed).await?; + Ok(CommittedStream::new(Stream::new(stream, self.cons.clone()))) } } diff --git a/bigquery/src/storage_write/stream/default.rs b/bigquery/src/storage_write/stream/default.rs index c5e3c37d..6fb090e3 100644 --- a/bigquery/src/storage_write/stream/default.rs +++ b/bigquery/src/storage_write/stream/default.rs @@ -4,25 +4,46 @@ use google_cloud_gax::grpc::{Status, Streaming}; use google_cloud_googleapis::cloud::bigquery::storage::v1::big_query_write_client::BigQueryWriteClient; use google_cloud_googleapis::cloud::bigquery::storage::v1::{AppendRowsRequest, AppendRowsResponse}; use std::sync::Arc; +use google_cloud_googleapis::cloud::bigquery::storage::v1::write_stream::Type::Buffered; +use crate::storage_write::pool::Pool; +use crate::storage_write::stream::{AsStream, DisposableStream, ManagedStream, Stream}; +use crate::storage_write::stream::buffered::BufferedStream; pub struct Writer { - conn: Arc, + cons: Arc, + p_cons: Arc, } impl Writer { - pub(crate) fn new(conn: Arc) -> Self { - Self { conn } + pub(crate) fn new(cons: Arc, p_cons: Arc) -> Self { + Self { + cons, + p_cons, + } } - //TODO use default stream name - pub async fn append_rows(&mut self, rows: Vec) -> Result, Status> { - let mut client = StreamingWriteClient::new(BigQueryWriteClient::new(self.conn.conn())); - let request = Box::pin(async_stream::stream! { - for row in rows { - yield row; - } - }); - let response = client.append_rows(request).await?.into_inner(); - Ok(response) + pub async fn create_write_stream(&mut self, table: &str) -> Result { + let stream = self.cons.get_stream(&format!("{table}/streams/_default")).await?; + Ok(DefaultStream::new(Stream::new(stream, self.cons.clone()))) } } + + +pub struct DefaultStream { + inner: Stream +} + +impl DefaultStream { + pub(crate) fn new(inner: Stream) -> Self { + Self { inner } + } + +} + +impl AsStream for DefaultStream { + fn as_mut(&mut self) -> &mut Stream { + &mut self.inner + } +} +impl ManagedStream for DefaultStream {} + diff --git a/bigquery/src/storage_write/stream/mod.rs b/bigquery/src/storage_write/stream/mod.rs index 425260e7..7ba88c37 100644 --- a/bigquery/src/storage_write/stream/mod.rs +++ b/bigquery/src/storage_write/stream/mod.rs @@ -1,7 +1,13 @@ +use std::sync::Arc; use google_cloud_gax::grpc::{Status, Streaming}; -use google_cloud_googleapis::cloud::bigquery::storage::v1::{AppendRowsRequest, AppendRowsResponse, FinalizeWriteStreamRequest, WriteStream}; -use crate::grpc::apiv1::bigquery_client::StreamingWriteClient; +use google_cloud_googleapis::cloud::bigquery::storage::v1::{AppendRowsRequest, AppendRowsResponse, CreateWriteStreamRequest, FinalizeWriteStreamRequest, WriteStream}; +use google_cloud_googleapis::cloud::bigquery::storage::v1::big_query_write_client::BigQueryWriteClient; +use google_cloud_googleapis::cloud::bigquery::storage::v1::write_stream::Type::Buffered; +use crate::grpc::apiv1::bigquery_client::{create_write_stream_request, StreamingWriteClient}; +use crate::grpc::apiv1::conn_pool::ConnectionManager; use crate::storage_write::into_streaming_request; +use crate::storage_write::pool::{Pool}; +use crate::storage_write::stream::buffered::BufferedStream; pub mod default; pub mod pending; @@ -10,12 +16,12 @@ pub mod buffered; pub(crate) struct Stream { pub(crate) inner: WriteStream, - pub(crate) client: StreamingWriteClient, + pub(crate) cons: Arc, } impl Stream { - pub(crate) fn new(inner: WriteStream, client: StreamingWriteClient) -> Self { - Self { inner, client } + pub(crate) fn new(inner: WriteStream, cons: Arc) -> Self { + Self { inner, cons } } } @@ -25,8 +31,10 @@ pub(crate) trait AsStream : Sized { pub trait ManagedStream : AsStream { async fn append_rows(&mut self, rows: Vec) -> Result, Status> { - let response = self.as_mut().client.append_rows(into_streaming_request(rows)).await?.into_inner(); - Ok(response) + let stream = self.as_mut(); + let cons = stream.cons.regional(&stream.inner.location); + let con = cons.pick(&stream.inner.name).unwrap(); + con.locking_append(into_streaming_request(rows)).await } } @@ -35,7 +43,7 @@ pub trait DisposableStream : ManagedStream { async fn finalize(mut self) -> Result { let stream = self.as_mut(); let res = stream - .client + .cons.client() .finalize_write_stream( FinalizeWriteStreamRequest { name: stream.inner.name.to_string(), @@ -46,4 +54,4 @@ pub trait DisposableStream : ManagedStream { .into_inner(); Ok(res.row_count) } -} \ No newline at end of file +} diff --git a/bigquery/src/storage_write/stream/pending.rs b/bigquery/src/storage_write/stream/pending.rs index eab95c4c..7c498929 100644 --- a/bigquery/src/storage_write/stream/pending.rs +++ b/bigquery/src/storage_write/stream/pending.rs @@ -2,43 +2,39 @@ use crate::grpc::apiv1::bigquery_client::{create_write_stream_request, Streaming use crate::grpc::apiv1::conn_pool::ConnectionManager; use google_cloud_gax::grpc::{IntoStreamingRequest, Status, Streaming}; use google_cloud_googleapis::cloud::bigquery::storage::v1::big_query_write_client::BigQueryWriteClient; -use google_cloud_googleapis::cloud::bigquery::storage::v1::write_stream::Type::Pending; +use google_cloud_googleapis::cloud::bigquery::storage::v1::write_stream::Type::{Committed, Pending}; use google_cloud_googleapis::cloud::bigquery::storage::v1::{ AppendRowsRequest, AppendRowsResponse, BatchCommitWriteStreamsRequest, BatchCommitWriteStreamsResponse, }; use std::sync::Arc; +use crate::storage_write::pool::Pool; use crate::storage_write::stream::{AsStream, DisposableStream, ManagedStream, Stream}; pub struct Writer { - table: String, - conn: Arc, + cons: Arc, + p_cons: Arc, streams: Vec, + table: String } impl Writer { - pub(crate) fn new(table: String, conn: Arc) -> Self { + pub(crate) fn new(cons: Arc, p_cons: Arc, table: String) -> Self { Self { + cons, + p_cons, table, - conn, - streams: Vec::new(), + streams: Vec::new() } } pub async fn create_write_stream(&mut self) -> Result { - let mut client = StreamingWriteClient::new(BigQueryWriteClient::new(self.conn.conn())); - let res = client - .create_write_stream(create_write_stream_request(&self.table, Pending), None) - .await? - .into_inner(); - - self.streams.push(res.name.clone()); - - Ok(PendingStream::new(Stream::new(res, client))) + let stream = self.cons.create_stream(&self.table, Pending).await?; + self.streams.push(stream.name.clone()); + Ok(PendingStream::new(Stream::new(stream, self.cons.clone()))) } pub async fn commit(self) -> Result { - let mut client = StreamingWriteClient::new(BigQueryWriteClient::new(self.conn.conn())); - let result = client + let result = self.cons.client() .batch_commit_write_streams( BatchCommitWriteStreamsRequest { parent: self.table.to_string(), From b8ec18ec627b1c47bf7f61581c26326345b8adf8 Mon Sep 17 00:00:00 2001 From: yoshidan Date: Fri, 11 Oct 2024 20:45:26 +0900 Subject: [PATCH 13/23] refactor --- bigquery/src/grpc/apiv1/conn_pool.rs | 6 + bigquery/src/storage_write/connection.rs | 31 ----- bigquery/src/storage_write/pool.rs | 130 ------------------ bigquery/src/storage_write/stream/buffered.rs | 20 +-- .../src/storage_write/stream/committed.rs | 20 ++- bigquery/src/storage_write/stream/default.rs | 28 ++-- bigquery/src/storage_write/stream/mod.rs | 34 ++--- bigquery/src/storage_write/stream/pending.rs | 23 ++-- 8 files changed, 68 insertions(+), 224 deletions(-) delete mode 100644 bigquery/src/storage_write/connection.rs delete mode 100644 bigquery/src/storage_write/pool.rs diff --git a/bigquery/src/grpc/apiv1/conn_pool.rs b/bigquery/src/grpc/apiv1/conn_pool.rs index 16a8767f..c76b2d14 100644 --- a/bigquery/src/grpc/apiv1/conn_pool.rs +++ b/bigquery/src/grpc/apiv1/conn_pool.rs @@ -1,6 +1,8 @@ use google_cloud_gax::conn::{ Channel, ConnectionManager as GRPCConnectionManager, ConnectionOptions, Environment, Error, }; +use google_cloud_googleapis::cloud::bigquery::storage::v1::big_query_write_client::BigQueryWriteClient; +use crate::grpc::apiv1::bigquery_client::StreamingWriteClient; pub const AUDIENCE: &str = "https://bigquerystorage.googleapis.com/"; pub const DOMAIN: &str = "bigquerystorage.googleapis.com"; @@ -33,4 +35,8 @@ impl ConnectionManager { pub fn conn(&self) -> Channel { self.inner.conn() } + + pub fn writer(&self) -> StreamingWriteClient { + StreamingWriteClient::new(BigQueryWriteClient::new(self.conn())) + } } diff --git a/bigquery/src/storage_write/connection.rs b/bigquery/src/storage_write/connection.rs deleted file mode 100644 index 9087c0a5..00000000 --- a/bigquery/src/storage_write/connection.rs +++ /dev/null @@ -1,31 +0,0 @@ -use std::collections::HashMap; -use std::sync::{Arc, Mutex}; -use tokio::sync::{Semaphore, SemaphorePermit}; -use google_cloud_gax::grpc::{IntoStreamingRequest, Status, Streaming}; -use google_cloud_googleapis::cloud::bigquery::storage::v1::{AppendRowsRequest, AppendRowsResponse}; -use google_cloud_googleapis::cloud::bigquery::storage::v1::big_query_write_client::BigQueryWriteClient; -use crate::grpc::apiv1::bigquery_client::StreamingWriteClient; -use crate::grpc::apiv1::conn_pool::ConnectionManager; -use crate::storage_write::flow::FlowController; - -pub struct Connection { - fc: FlowController, - grpc_conn_pool: Arc -} - -impl Connection { - pub fn new(fc: FlowController, grpc_conn_pool: Arc) -> Self { - Connection { - fc, - grpc_conn_pool - } - } - - pub async fn locking_append(&self, req: impl IntoStreamingRequest) -> Result, Status> { - let permit = self.fc.acquire().await; - let mut client = StreamingWriteClient::new(BigQueryWriteClient::new(self.grpc_conn_pool.conn())); - let result = client.append_rows(req).await?.into_inner(); - drop(permit); - Ok(result) - } -} \ No newline at end of file diff --git a/bigquery/src/storage_write/pool.rs b/bigquery/src/storage_write/pool.rs deleted file mode 100644 index 42066b70..00000000 --- a/bigquery/src/storage_write/pool.rs +++ /dev/null @@ -1,130 +0,0 @@ -use std::collections::HashMap; -use std::sync::{Arc, Mutex}; -use google_cloud_gax::grpc::Status; -use google_cloud_googleapis::cloud::bigquery::storage::v1::big_query_write_client::BigQueryWriteClient; -use google_cloud_googleapis::cloud::bigquery::storage::v1::write_stream::{Type, WriteMode}; -use google_cloud_googleapis::cloud::bigquery::storage::v1::{GetWriteStreamRequest, WriteStream}; -use crate::grpc::apiv1::bigquery_client::{create_write_stream_request, StreamingWriteClient}; -use crate::grpc::apiv1::conn_pool::ConnectionManager; -use crate::storage_write::connection::Connection; -use crate::storage_write::flow::FlowController; - -enum Router { - /// key is writer.id - Simplex(Arc>>>) - //TODO support shared router -} - -impl Router { - pub fn new_simplex() -> Self { - Router::Simplex(Arc::new(Mutex::new(HashMap::new()))) - } - - pub fn attach_writer(&self, writer_id: String, max_insert_count: usize, grpc_conn_pool: Arc) { - match self { - Router::Simplex(map) => { - let fc = FlowController::new(max_insert_count); - let conn = Arc::new(Connection::new(fc, grpc_conn_pool)); - let mut map = map.lock().unwrap(); - if !map.contains_key(&writer_id) { - map.insert(writer_id, conn); - } - } - } - } - - pub fn remove_writer(&self, writer_id: &str) { - match self { - Router::Simplex(map) => { - let mut map = map.lock().unwrap(); - map.remove(writer_id); - } - } - } - - pub fn pick(&self, writer_id: &str) -> Option> { - match self { - Router::Simplex(map) => { - let map = map.lock().unwrap(); - map.get(writer_id).map(|c| c.clone()) - } - } - } -} - -pub struct Connections { - router: Router, - max_insert_count: usize, - conn_pool: Arc -} - -impl Connections { - pub fn new(max_insert_count: usize, conn_pool: Arc) -> Self { - Self { - router: Router::new_simplex(), - max_insert_count, - conn_pool - } - } - - pub fn attach_writer(&self, writer_id: String) { - self.router.attach_writer(writer_id, self.max_insert_count, self.conn_pool.clone()); - } - - pub fn pick(&self, writer_id: &str) -> Option> { - self.router.pick(writer_id) - } - -} - -pub(crate) struct Pool { - /// key = location - cons: HashMap, - max_insert_count: usize, - p_cons: Arc -} - -impl Pool { - pub fn new(max_insert_count: usize, p_cons: Arc) -> Self { - Self { - cons: HashMap::new(), - max_insert_count, - p_cons - } - } - - pub async fn create_stream(&mut self, table: &str, mode: Type) -> Result { - let mut client = self.client(); - let req = create_write_stream_request(table, mode); - let stream = client.create_write_stream(req, None).await?.into_inner(); - self.regional(&stream.location).attach_writer(stream.name.clone()); - Ok(stream) - } - - pub async fn get_stream(&mut self, name: &str) -> Result { - let mut client = self.client(); - let req = GetWriteStreamRequest { - name: name.to_string(), - view: 0, - }; - let stream = client.get_write_stream(req, None).await?.into_inner(); - self.regional(&stream.location).attach_writer(stream.name.clone()); - Ok(stream) - } - - pub fn client(&self) -> StreamingWriteClient { - StreamingWriteClient::new(BigQueryWriteClient::new(self.p_cons.conn())) - } - - pub fn regional(&mut self, location: &str) -> &Connections { - let cons = self.cons.get(location); - match cons { - Some(pool) => pool, - None => { - let cons = Connections::new(self.max_insert_count, self.p_cons.clone()); - self.cons.insert(location.to_string(), cons); - self.cons.get(location).unwrap() - } - } - } -} \ No newline at end of file diff --git a/bigquery/src/storage_write/stream/buffered.rs b/bigquery/src/storage_write/stream/buffered.rs index 191b925b..8a9d6c80 100644 --- a/bigquery/src/storage_write/stream/buffered.rs +++ b/bigquery/src/storage_write/stream/buffered.rs @@ -1,28 +1,28 @@ -use crate::grpc::apiv1::bigquery_client::{create_write_stream_request, StreamingWriteClient}; use crate::grpc::apiv1::conn_pool::ConnectionManager; use google_cloud_gax::grpc::{IntoStreamingRequest, Status, Streaming}; use google_cloud_googleapis::cloud::bigquery::storage::v1::write_stream::Type::{Buffered, Committed}; use google_cloud_googleapis::cloud::bigquery::storage::v1::{AppendRowsRequest, AppendRowsResponse, BatchCommitWriteStreamsRequest, BatchCommitWriteStreamsResponse, CreateWriteStreamRequest, FinalizeWriteStreamRequest, FlushRowsRequest, WriteStream}; use std::sync::Arc; -use crate::storage_write::pool::{Pool}; +use crate::grpc::apiv1::bigquery_client::create_write_stream_request; use crate::storage_write::stream::{AsStream, DisposableStream, ManagedStream, Stream}; pub struct Writer { - cons: Arc, - p_cons: Arc, + max_insert_count: usize, + cm: Arc, } impl Writer { - pub(crate) fn new(cons: Arc, p_cons: Arc) -> Self { + pub(crate) fn new(max_insert_count: usize, cm: Arc) -> Self { Self { - cons, - p_cons, + max_insert_count, + cm, } } pub async fn create_write_stream(&mut self, table: &str) -> Result { - let stream = self.cons.create_stream(table, Buffered).await?; - Ok(BufferedStream::new(Stream::new(stream, self.cons.clone()))) + let req = create_write_stream_request(table, Buffered); + let stream = self.cm.writer().create_write_stream(req, None).await?.into_inner(); + Ok(BufferedStream::new(Stream::new(stream, self.cm.clone(), self.max_insert_count))) } } @@ -49,7 +49,7 @@ impl BufferedStream { pub async fn flush_rows(mut self) -> Result { let stream = self.as_mut(); - let res = stream.cons.client() + let res = stream.cons.writer() .flush_rows( FlushRowsRequest{ write_stream: stream.inner.name.to_string(), diff --git a/bigquery/src/storage_write/stream/committed.rs b/bigquery/src/storage_write/stream/committed.rs index 9ec8b05a..8232c72d 100644 --- a/bigquery/src/storage_write/stream/committed.rs +++ b/bigquery/src/storage_write/stream/committed.rs @@ -1,29 +1,27 @@ use crate::grpc::apiv1::bigquery_client::{create_write_stream_request, StreamingWriteClient}; use crate::grpc::apiv1::conn_pool::ConnectionManager; use google_cloud_gax::grpc::{IntoStreamingRequest, Status, Streaming}; -use google_cloud_googleapis::cloud::bigquery::storage::v1::big_query_write_client::BigQueryWriteClient; use google_cloud_googleapis::cloud::bigquery::storage::v1::write_stream::Type::{Buffered, Committed}; use std::sync::Arc; -use crate::storage_write::pool::Pool; -use crate::storage_write::stream::{create_write_stream, AsStream, DisposableStream, ManagedStream, Stream}; -use crate::storage_write::stream::buffered::BufferedStream; +use crate::storage_write::stream::{ AsStream, DisposableStream, ManagedStream, Stream}; pub struct Writer { - cons: Arc, - p_cons: Arc, + max_insert_count: usize, + cm: Arc, } impl Writer { - pub(crate) fn new(cons: Arc, p_cons: Arc) -> Self { + pub(crate) fn new(max_insert_count: usize, cm: Arc) -> Self { Self { - cons, - p_cons, + max_insert_count, + cm, } } pub async fn create_write_stream(&mut self, table: &str) -> Result { - let stream = self.cons.create_stream(table, Committed).await?; - Ok(CommittedStream::new(Stream::new(stream, self.cons.clone()))) + let req = create_write_stream_request(table, Committed); + let stream = self.cm.writer().create_write_stream(req, None).await?.into_inner(); + Ok(CommittedStream::new(Stream::new(stream, self.cm.clone(),self.max_insert_count))) } } diff --git a/bigquery/src/storage_write/stream/default.rs b/bigquery/src/storage_write/stream/default.rs index 6fb090e3..8f722800 100644 --- a/bigquery/src/storage_write/stream/default.rs +++ b/bigquery/src/storage_write/stream/default.rs @@ -1,30 +1,28 @@ -use crate::grpc::apiv1::bigquery_client::StreamingWriteClient; use crate::grpc::apiv1::conn_pool::ConnectionManager; -use google_cloud_gax::grpc::{Status, Streaming}; -use google_cloud_googleapis::cloud::bigquery::storage::v1::big_query_write_client::BigQueryWriteClient; -use google_cloud_googleapis::cloud::bigquery::storage::v1::{AppendRowsRequest, AppendRowsResponse}; +use google_cloud_gax::grpc::{Status, }; use std::sync::Arc; -use google_cloud_googleapis::cloud::bigquery::storage::v1::write_stream::Type::Buffered; -use crate::storage_write::pool::Pool; -use crate::storage_write::stream::{AsStream, DisposableStream, ManagedStream, Stream}; -use crate::storage_write::stream::buffered::BufferedStream; +use google_cloud_googleapis::cloud::bigquery::storage::v1::GetWriteStreamRequest; +use crate::storage_write::stream::{AsStream, ManagedStream, Stream}; pub struct Writer { - cons: Arc, - p_cons: Arc, + max_insert_count: usize, + cm: Arc, } impl Writer { - pub(crate) fn new(cons: Arc, p_cons: Arc) -> Self { + pub(crate) fn new(max_insert_count: usize, cm: Arc) -> Self { Self { - cons, - p_cons, + max_insert_count, + cm, } } pub async fn create_write_stream(&mut self, table: &str) -> Result { - let stream = self.cons.get_stream(&format!("{table}/streams/_default")).await?; - Ok(DefaultStream::new(Stream::new(stream, self.cons.clone()))) + let stream = self.cm.writer().get_write_stream(GetWriteStreamRequest { + name: format!("{table}/streams/_default"), + ..Default::default() + }, None).await?.into_inner(); + Ok(DefaultStream::new(Stream::new(stream, self.cm.clone(), self.max_insert_count))) } } diff --git a/bigquery/src/storage_write/stream/mod.rs b/bigquery/src/storage_write/stream/mod.rs index 7ba88c37..aff37f7b 100644 --- a/bigquery/src/storage_write/stream/mod.rs +++ b/bigquery/src/storage_write/stream/mod.rs @@ -1,13 +1,8 @@ use std::sync::Arc; -use google_cloud_gax::grpc::{Status, Streaming}; +use google_cloud_gax::grpc::{IntoStreamingRequest, Status, Streaming}; use google_cloud_googleapis::cloud::bigquery::storage::v1::{AppendRowsRequest, AppendRowsResponse, CreateWriteStreamRequest, FinalizeWriteStreamRequest, WriteStream}; -use google_cloud_googleapis::cloud::bigquery::storage::v1::big_query_write_client::BigQueryWriteClient; -use google_cloud_googleapis::cloud::bigquery::storage::v1::write_stream::Type::Buffered; -use crate::grpc::apiv1::bigquery_client::{create_write_stream_request, StreamingWriteClient}; use crate::grpc::apiv1::conn_pool::ConnectionManager; -use crate::storage_write::into_streaming_request; -use crate::storage_write::pool::{Pool}; -use crate::storage_write::stream::buffered::BufferedStream; +use crate::storage_write::flow::FlowController; pub mod default; pub mod pending; @@ -15,13 +10,18 @@ pub mod committed; pub mod buffered; pub(crate) struct Stream { - pub(crate) inner: WriteStream, - pub(crate) cons: Arc, + inner: WriteStream, + cons: Arc, + fc: FlowController } impl Stream { - pub(crate) fn new(inner: WriteStream, cons: Arc) -> Self { - Self { inner, cons } + pub(crate) fn new(inner: WriteStream, cons: Arc, max_insert_count: usize) -> Self { + Self { + inner, + cons , + fc: FlowController::new(max_insert_count) + } } } @@ -30,11 +30,13 @@ pub(crate) trait AsStream : Sized { } pub trait ManagedStream : AsStream { - async fn append_rows(&mut self, rows: Vec) -> Result, Status> { + async fn append_rows(&mut self, req: impl IntoStreamingRequest) -> Result, Status> { let stream = self.as_mut(); - let cons = stream.cons.regional(&stream.inner.location); - let con = cons.pick(&stream.inner.name).unwrap(); - con.locking_append(into_streaming_request(rows)).await + let permit = stream.fc.acquire().await; + let mut client = stream.cons.writer(); + let result = client.append_rows(req).await?.into_inner(); + drop(permit); + Ok(result) } } @@ -43,7 +45,7 @@ pub trait DisposableStream : ManagedStream { async fn finalize(mut self) -> Result { let stream = self.as_mut(); let res = stream - .cons.client() + .cons.writer() .finalize_write_stream( FinalizeWriteStreamRequest { name: stream.inner.name.to_string(), diff --git a/bigquery/src/storage_write/stream/pending.rs b/bigquery/src/storage_write/stream/pending.rs index 7c498929..51972b80 100644 --- a/bigquery/src/storage_write/stream/pending.rs +++ b/bigquery/src/storage_write/stream/pending.rs @@ -11,30 +11,31 @@ use crate::storage_write::pool::Pool; use crate::storage_write::stream::{AsStream, DisposableStream, ManagedStream, Stream}; pub struct Writer { - cons: Arc, - p_cons: Arc, - streams: Vec, - table: String + max_insert_count: usize, + cm: Arc, + table: String, + streams: Vec } impl Writer { - pub(crate) fn new(cons: Arc, p_cons: Arc, table: String) -> Self { + pub(crate) fn new(max_insert_count: usize, cm: Arc, table: String) -> Self { Self { - cons, - p_cons, + max_insert_count, + cm, table, streams: Vec::new() } } pub async fn create_write_stream(&mut self) -> Result { - let stream = self.cons.create_stream(&self.table, Pending).await?; - self.streams.push(stream.name.clone()); - Ok(PendingStream::new(Stream::new(stream, self.cons.clone()))) + let req = create_write_stream_request(&self.table, Pending); + let stream = self.cm.writer().create_write_stream(req, None).await?.into_inner(); + self.streams.push(stream.name.to_string()); + Ok(PendingStream::new(Stream::new(stream, self.cm.clone(), self.max_insert_count))) } pub async fn commit(self) -> Result { - let result = self.cons.client() + let result = self.cm.writer() .batch_commit_write_streams( BatchCommitWriteStreamsRequest { parent: self.table.to_string(), From 4e8bfd08e78a24944b609b2a7044cab744de8909 Mon Sep 17 00:00:00 2001 From: yoshidan Date: Fri, 18 Oct 2024 11:07:50 +0900 Subject: [PATCH 14/23] make fc optional --- bigquery/src/client.rs | 61 ++++++++++++++------ bigquery/src/storage_write/mod.rs | 5 -- bigquery/src/storage_write/stream/mod.rs | 26 ++++++--- bigquery/src/storage_write/stream/pending.rs | 1 - 4 files changed, 63 insertions(+), 30 deletions(-) diff --git a/bigquery/src/client.rs b/bigquery/src/client.rs index c191f154..201468e6 100644 --- a/bigquery/src/client.rs +++ b/bigquery/src/client.rs @@ -38,9 +38,26 @@ pub struct ClientConfig { token_source_provider: Box, environment: Environment, streaming_read_config: ChannelConfig, + streaming_write_config: StreamingWriteConfig, debug: bool, } +#[derive(Clone, Debug)] +pub struct StreamingWriteConfig { + pub channel_config: ChannelConfig, + pub max_insert_count: usize +} + +impl Default for StreamingWriteConfig { + fn default() -> Self { + Self { + channel_config: ChannelConfig::default(), + max_insert_count: 1000 + } + } + +} + #[derive(Clone, Debug)] pub struct ChannelConfig { /// num_channels is the number of gRPC channels. @@ -49,6 +66,15 @@ pub struct ChannelConfig { pub timeout: Option, } +impl Into for ChannelConfig { + fn into(self) -> ConnectionManager { + ConnectionManager::new(self.num_channels, &Environment::GoogleCloud, &ConnectionOptions { + timeout: self.timeout, + connect_timeout: self.connect_timeout, + }) + } +} + impl Default for ChannelConfig { fn default() -> Self { Self { @@ -70,6 +96,7 @@ impl ClientConfig { token_source_provider: http_token_source_provider, environment: Environment::GoogleCloud(grpc_token_source_provider), streaming_read_config: ChannelConfig::default(), + streaming_write_config: StreamingWriteConfig::default(), debug: false, } } @@ -81,6 +108,10 @@ impl ClientConfig { self.streaming_read_config = value; self } + pub fn with_streaming_write_config(mut self, value: StreamingWriteConfig) -> Self { + self.streaming_write_config = value; + self + } pub fn with_http_client(mut self, value: reqwest_middleware::ClientWithMiddleware) -> Self { self.http = value; self @@ -166,7 +197,9 @@ pub struct Client { routine_client: BigqueryRoutineClient, row_access_policy_client: BigqueryRowAccessPolicyClient, model_client: BigqueryModelClient, - streaming_client_conn_pool: Arc, + streaming_read_conn_pool: Arc, + streaming_write_conn_pool: Arc, + stereaming_write_max_insert_count: usize, } impl Client { @@ -180,14 +213,6 @@ impl Client { config.debug, )); - let read_config = config.streaming_read_config; - let conn_options = ConnectionOptions { - timeout: read_config.timeout, - connect_timeout: read_config.connect_timeout, - }; - - let streaming_client_conn_pool = - ConnectionManager::new(read_config.num_channels, &config.environment, &conn_options).await?; Ok(Self { dataset_client: BigqueryDatasetClient::new(client.clone()), table_client: BigqueryTableClient::new(client.clone()), @@ -196,7 +221,9 @@ impl Client { routine_client: BigqueryRoutineClient::new(client.clone()), row_access_policy_client: BigqueryRowAccessPolicyClient::new(client.clone()), model_client: BigqueryModelClient::new(client.clone()), - streaming_client_conn_pool: Arc::new(streaming_client_conn_pool), + streaming_read_conn_pool: Arc::new(config.streaming_read_config.into()), + streaming_write_conn_pool: Arc::new(config.streaming_write_config.channel_config.into()), + stereaming_write_max_insert_count: config.streaming_write_config.max_insert_count }) } @@ -245,25 +272,25 @@ impl Client { /// Creates a new pending type storage writer for the specified table. /// https://cloud.google.com/bigquery/docs/write-api#pending_type pub fn pending_storage_writer(&self, table: String) -> pending::Writer { - pending::Writer::new(table, self.streaming_client_conn_pool.clone()) + pending::Writer::new(1, self.streaming_write_conn_pool.clone(), table) } /// Creates a new default type storage writer. /// https://cloud.google.com/bigquery/docs/write-api#default_stream pub fn default_storage_writer(&self) -> default::Writer { - default::Writer::new(self.streaming_client_conn_pool.clone()) + default::Writer::new(self.stereaming_write_max_insert_count, self.streaming_write_conn_pool.clone()) } /// Creates a new committed type storage writer. /// https://cloud.google.com/bigquery/docs/write-api#committed_type - pub fn committed_storage_writer(&self, table: String) -> committed::Writer { - committed::Writer::new(table, self.streaming_client_conn_pool.clone()) + pub fn committed_storage_writer(&self) -> committed::Writer { + committed::Writer::new(self.stereaming_write_max_insert_count, self.streaming_write_conn_pool.clone()) } /// Creates a new buffered type storage writer. /// https://cloud.google.com/bigquery/docs/write-api#buffered_type - pub fn buffered_storage_writer(&self, table: String) -> buffered::Writer { - buffered::Writer::new(table, self.streaming_client_conn_pool.clone()) + pub fn buffered_storage_writer(&self) -> buffered::Writer { + buffered::Writer::new(self.stereaming_write_max_insert_count, self.streaming_write_conn_pool.clone()) } /// Run query job and get result. @@ -500,7 +527,7 @@ impl Client { { let option = option.unwrap_or_default(); - let mut client = StreamingReadClient::new(BigQueryReadClient::new(self.streaming_client_conn_pool.conn())); + let mut client = StreamingReadClient::new(BigQueryReadClient::new(self.streaming_read_conn_pool.conn())); let read_session = client .create_read_session( CreateReadSessionRequest { diff --git a/bigquery/src/storage_write/mod.rs b/bigquery/src/storage_write/mod.rs index 7d5b7445..fbe77088 100644 --- a/bigquery/src/storage_write/mod.rs +++ b/bigquery/src/storage_write/mod.rs @@ -4,12 +4,7 @@ use prost_types::DescriptorProto; use std::collections::HashMap; use google_cloud_gax::grpc::codegen::tokio_stream::Stream; -mod pool; - -pub mod connection; - mod flow; - pub mod stream; pub struct AppendRowsRequestBuilder { diff --git a/bigquery/src/storage_write/stream/mod.rs b/bigquery/src/storage_write/stream/mod.rs index aff37f7b..158e5dd4 100644 --- a/bigquery/src/storage_write/stream/mod.rs +++ b/bigquery/src/storage_write/stream/mod.rs @@ -12,7 +12,7 @@ pub mod buffered; pub(crate) struct Stream { inner: WriteStream, cons: Arc, - fc: FlowController + fc: Option } impl Stream { @@ -20,7 +20,11 @@ impl Stream { Self { inner, cons , - fc: FlowController::new(max_insert_count) + fc: if max_insert_count > 0 { + Some(FlowController::new(max_insert_count)) + }else { + None + } } } } @@ -32,11 +36,19 @@ pub(crate) trait AsStream : Sized { pub trait ManagedStream : AsStream { async fn append_rows(&mut self, req: impl IntoStreamingRequest) -> Result, Status> { let stream = self.as_mut(); - let permit = stream.fc.acquire().await; - let mut client = stream.cons.writer(); - let result = client.append_rows(req).await?.into_inner(); - drop(permit); - Ok(result) + match &stream.fc { + None => { + let mut client = stream.cons.writer(); + Ok(client.append_rows(req).await?.into_inner()) + }, + Some(fc) => { + let permit = fc.acquire().await; + let mut client = stream.cons.writer(); + let result = client.append_rows(req).await?.into_inner(); + drop(permit); + Ok(result) + } + } } } diff --git a/bigquery/src/storage_write/stream/pending.rs b/bigquery/src/storage_write/stream/pending.rs index 51972b80..12904836 100644 --- a/bigquery/src/storage_write/stream/pending.rs +++ b/bigquery/src/storage_write/stream/pending.rs @@ -7,7 +7,6 @@ use google_cloud_googleapis::cloud::bigquery::storage::v1::{ AppendRowsRequest, AppendRowsResponse, BatchCommitWriteStreamsRequest, BatchCommitWriteStreamsResponse, }; use std::sync::Arc; -use crate::storage_write::pool::Pool; use crate::storage_write::stream::{AsStream, DisposableStream, ManagedStream, Stream}; pub struct Writer { From f77b421c77b9fcebb734480c2c081228b8925c5c Mon Sep 17 00:00:00 2001 From: yoshidan Date: Tue, 22 Oct 2024 11:56:16 +0900 Subject: [PATCH 15/23] refactor --- bigquery/src/client.rs | 46 +++++++--- bigquery/src/storage_write/flow.rs | 1 + bigquery/src/storage_write/mod.rs | 11 ++- bigquery/src/storage_write/stream/buffered.rs | 90 +++++++++++++++++-- .../src/storage_write/stream/committed.rs | 3 + bigquery/src/storage_write/stream/default.rs | 3 + bigquery/src/storage_write/stream/mod.rs | 64 ++++++++++++- bigquery/src/storage_write/stream/pending.rs | 4 + 8 files changed, 201 insertions(+), 21 deletions(-) diff --git a/bigquery/src/client.rs b/bigquery/src/client.rs index 201468e6..5d23ea81 100644 --- a/bigquery/src/client.rs +++ b/bigquery/src/client.rs @@ -44,8 +44,19 @@ pub struct ClientConfig { #[derive(Clone, Debug)] pub struct StreamingWriteConfig { - pub channel_config: ChannelConfig, - pub max_insert_count: usize + channel_config: ChannelConfig, + max_insert_count: usize +} + +impl StreamingWriteConfig { + pub fn with_channel_config(mut self, value: ChannelConfig) -> Self { + self.channel_config = value; + self + } + pub fn with_max_insert_count(mut self, value: usize) -> Self { + self.max_insert_count = value; + self + } } impl Default for StreamingWriteConfig { @@ -61,17 +72,30 @@ impl Default for StreamingWriteConfig { #[derive(Clone, Debug)] pub struct ChannelConfig { /// num_channels is the number of gRPC channels. - pub num_channels: usize, - pub connect_timeout: Option, - pub timeout: Option, + num_channels: usize, + connect_timeout: Option, + timeout: Option, } -impl Into for ChannelConfig { - fn into(self) -> ConnectionManager { - ConnectionManager::new(self.num_channels, &Environment::GoogleCloud, &ConnectionOptions { +impl ChannelConfig { + pub fn with_num_channels(mut self, value: usize) -> Self { + self.num_channels = value; + self + } + pub fn with_connect_timeout(mut self, value: Duration) -> Self { + self.connect_timeout = Some(value); + self + } + pub fn with_timeout(mut self, value: Duration) -> Self { + self.timeout = Some(value); + self + } + + async fn into_connection_manager(self, environment: &Environment) -> Result { + ConnectionManager::new(self.num_channels, environment, &ConnectionOptions { timeout: self.timeout, connect_timeout: self.connect_timeout, - }) + }).await } } @@ -221,8 +245,8 @@ impl Client { routine_client: BigqueryRoutineClient::new(client.clone()), row_access_policy_client: BigqueryRowAccessPolicyClient::new(client.clone()), model_client: BigqueryModelClient::new(client.clone()), - streaming_read_conn_pool: Arc::new(config.streaming_read_config.into()), - streaming_write_conn_pool: Arc::new(config.streaming_write_config.channel_config.into()), + streaming_read_conn_pool: Arc::new(config.streaming_read_config.into_connection_manager(&config.environment).await?), + streaming_write_conn_pool: Arc::new(config.streaming_write_config.channel_config.into_connection_manager(&config.environment).await?), stereaming_write_max_insert_count: config.streaming_write_config.max_insert_count }) } diff --git a/bigquery/src/storage_write/flow.rs b/bigquery/src/storage_write/flow.rs index 716c52e6..dfe1c489 100644 --- a/bigquery/src/storage_write/flow.rs +++ b/bigquery/src/storage_write/flow.rs @@ -1,5 +1,6 @@ use tokio::sync::{Semaphore, SemaphorePermit}; +#[derive(Debug)] pub struct FlowController { sem_insert_count: Semaphore //TODO support sem_insert_bytes diff --git a/bigquery/src/storage_write/mod.rs b/bigquery/src/storage_write/mod.rs index fbe77088..6d0d16fd 100644 --- a/bigquery/src/storage_write/mod.rs +++ b/bigquery/src/storage_write/mod.rs @@ -67,10 +67,19 @@ impl AppendRowsRequestBuilder { } } +pub fn build_streaming_request(name: &str, rows: Vec) -> impl Stream{ + let name = name.to_string(); + async_stream::stream! { + for row in rows { + yield row.build(&name); + } + } +} + pub fn into_streaming_request(rows: Vec) -> impl Stream{ async_stream::stream! { for row in rows { yield row; } } -} \ No newline at end of file +} diff --git a/bigquery/src/storage_write/stream/buffered.rs b/bigquery/src/storage_write/stream/buffered.rs index 8a9d6c80..f5616ad8 100644 --- a/bigquery/src/storage_write/stream/buffered.rs +++ b/bigquery/src/storage_write/stream/buffered.rs @@ -1,9 +1,12 @@ -use crate::grpc::apiv1::conn_pool::ConnectionManager; +use crate::grpc::apiv1::conn_pool::{AUDIENCE, ConnectionManager, SCOPES}; use google_cloud_gax::grpc::{IntoStreamingRequest, Status, Streaming}; -use google_cloud_googleapis::cloud::bigquery::storage::v1::write_stream::Type::{Buffered, Committed}; +use google_cloud_googleapis::cloud::bigquery::storage::v1::write_stream::Type::{Buffered, Committed, Pending}; use google_cloud_googleapis::cloud::bigquery::storage::v1::{AppendRowsRequest, AppendRowsResponse, BatchCommitWriteStreamsRequest, BatchCommitWriteStreamsResponse, CreateWriteStreamRequest, FinalizeWriteStreamRequest, FlushRowsRequest, WriteStream}; use std::sync::Arc; -use crate::grpc::apiv1::bigquery_client::create_write_stream_request; +use tokio::task::JoinHandle; +use google_cloud_gax::conn::Environment; +use google_cloud_googleapis::cloud::bigquery::storage::v1::big_query_write_client::BigQueryWriteClient; +use crate::grpc::apiv1::bigquery_client::{create_write_stream_request, StreamingWriteClient}; use crate::storage_write::stream::{AsStream, DisposableStream, ManagedStream, Stream}; pub struct Writer { @@ -19,14 +22,13 @@ impl Writer { } } - pub async fn create_write_stream(&mut self, table: &str) -> Result { + pub async fn create_write_stream(&self, table: &str) -> Result { let req = create_write_stream_request(table, Buffered); let stream = self.cm.writer().create_write_stream(req, None).await?.into_inner(); Ok(BufferedStream::new(Stream::new(stream, self.cm.clone(), self.max_insert_count))) } } - pub struct BufferedStream { inner: Stream } @@ -41,14 +43,17 @@ impl AsStream for BufferedStream { fn as_mut(&mut self) -> &mut Stream { &mut self.inner } + fn as_ref(&self) -> &Stream { + &self.inner + } } impl ManagedStream for BufferedStream {} impl DisposableStream for BufferedStream {} impl BufferedStream { - pub async fn flush_rows(mut self) -> Result { - let stream = self.as_mut(); + pub async fn flush_rows(&self) -> Result { + let stream = self.as_ref(); let res = stream.cons.writer() .flush_rows( FlushRowsRequest{ @@ -63,3 +68,74 @@ impl BufferedStream { } } + +#[cfg(test)] +mod tests { + use std::sync::Arc; + use tokio::task::JoinHandle; + use google_cloud_gax::create_request; + use google_cloud_gax::grpc::codegen::tokio_stream::StreamExt; + use google_cloud_gax::grpc::Status; + use prost::Message; + use crate::client::{Client, ClientConfig}; + use crate::storage_write::build_streaming_request; + use crate::storage_write::stream::{AsStream, ManagedStream}; + use crate::storage_write::stream::tests::{create_append_rows_request, TestData}; + + #[ctor::ctor] + fn init() { + crate::storage_write::stream::tests::init(); + } + + #[tokio::test(flavor = "multi_thread", worker_threads = 4)] + async fn test_storage_write() { + + let (config,project_id) = ClientConfig::new_with_auth().await.unwrap(); + let client = Client::new(config).await.unwrap(); + let table = format!("projects/{}/datasets/gcrbq_storage/tables/write_test_buffered", project_id.unwrap()).to_string(); + let writer = client.buffered_storage_writer(); + + // Create Pending Streams + let mut streams = vec![]; + for i in 0..2 { + let stream = Arc::new(writer + .create_write_stream(&table) + .await + .unwrap()); + streams.push(stream.clone()); + streams.push(stream); + } + + // Append Rows + let mut tasks: Vec>> = vec![]; + for (i, stream) in streams.into_iter().enumerate() { + tasks.push(tokio::spawn(async move { + let mut rows = vec![]; + for j in 0..5 { + let data = TestData { + col_string: format!("stream_{i}_{j}"), + }; + let mut buf = Vec::new(); + data.encode(&mut buf).unwrap(); + rows.push(create_append_rows_request(vec![buf])); + } + + let request = build_streaming_request(stream.name(), rows); + let mut result = stream.append_rows(request).await.unwrap(); + while let Some(res) = result.next().await { + let res = res?; + tracing::info!("append row errors = {:?}", res.row_errors.len()); + } + let result = stream.flush_rows().await.unwrap(); + tracing::info!("flush rows count = {:?}", result); + Ok(()) + })); + } + + // Wait for append rows + for task in tasks { + task.await.unwrap().unwrap(); + } + + } +} \ No newline at end of file diff --git a/bigquery/src/storage_write/stream/committed.rs b/bigquery/src/storage_write/stream/committed.rs index 8232c72d..59bd7c59 100644 --- a/bigquery/src/storage_write/stream/committed.rs +++ b/bigquery/src/storage_write/stream/committed.rs @@ -41,6 +41,9 @@ impl AsStream for CommittedStream { fn as_mut(&mut self) -> &mut Stream { &mut self.inner } + fn as_ref(&self) -> &Stream { + &self.inner + } } impl ManagedStream for CommittedStream {} impl DisposableStream for CommittedStream {} diff --git a/bigquery/src/storage_write/stream/default.rs b/bigquery/src/storage_write/stream/default.rs index 8f722800..0e7105ca 100644 --- a/bigquery/src/storage_write/stream/default.rs +++ b/bigquery/src/storage_write/stream/default.rs @@ -42,6 +42,9 @@ impl AsStream for DefaultStream { fn as_mut(&mut self) -> &mut Stream { &mut self.inner } + fn as_ref(&self) -> &Stream { + &self.inner + } } impl ManagedStream for DefaultStream {} diff --git a/bigquery/src/storage_write/stream/mod.rs b/bigquery/src/storage_write/stream/mod.rs index 158e5dd4..043ad892 100644 --- a/bigquery/src/storage_write/stream/mod.rs +++ b/bigquery/src/storage_write/stream/mod.rs @@ -2,7 +2,9 @@ use std::sync::Arc; use google_cloud_gax::grpc::{IntoStreamingRequest, Status, Streaming}; use google_cloud_googleapis::cloud::bigquery::storage::v1::{AppendRowsRequest, AppendRowsResponse, CreateWriteStreamRequest, FinalizeWriteStreamRequest, WriteStream}; use crate::grpc::apiv1::conn_pool::ConnectionManager; +use crate::storage_write::AppendRowsRequestBuilder; use crate::storage_write::flow::FlowController; +use google_cloud_gax::grpc::codegen::tokio_stream::Stream as FuturesStream; pub mod default; pub mod pending; @@ -31,11 +33,16 @@ impl Stream { pub(crate) trait AsStream : Sized { fn as_mut(&mut self) -> &mut Stream; + fn as_ref(&self) -> &Stream; + + fn name(&self) -> &str { + &self.as_ref().inner.name + } } pub trait ManagedStream : AsStream { - async fn append_rows(&mut self, req: impl IntoStreamingRequest) -> Result, Status> { - let stream = self.as_mut(); + async fn append_rows(&self, req: impl IntoStreamingRequest) -> Result, Status> { + let stream = self.as_ref(); match &stream.fc { None => { let mut client = stream.cons.writer(); @@ -69,3 +76,56 @@ pub trait DisposableStream : ManagedStream { Ok(res.row_count) } } + + +#[cfg(test)] +mod tests { + use google_cloud_googleapis::cloud::bigquery::storage::v1::append_rows_request::{ProtoData, Rows}; + + use google_cloud_googleapis::cloud::bigquery::storage::v1::{ + AppendRowsRequest, BatchCommitWriteStreamsRequest, CreateWriteStreamRequest, FinalizeWriteStreamRequest, + ProtoRows, ProtoSchema, WriteStream, + }; + use prost_types::{field_descriptor_proto, DescriptorProto, FieldDescriptorProto}; + use crate::storage_write::AppendRowsRequestBuilder; + + #[derive(Clone, PartialEq, ::prost::Message)] + pub(crate) struct TestData { + #[prost(string, tag = "1")] + pub col_string: String, + } + + pub(crate) fn init() { + let filter = tracing_subscriber::filter::EnvFilter::from_default_env() + .add_directive("google_cloud_bigquery=trace".parse().unwrap()); + let _ = tracing_subscriber::fmt().with_env_filter(filter).try_init(); + } + + pub(crate) fn create_append_rows_request(buf: Vec>) -> AppendRowsRequestBuilder { + let proto = DescriptorProto { + name: Some("TestData".to_string()), + field: vec![FieldDescriptorProto { + name: Some("col_string".to_string()), + number: Some(1), + label: None, + r#type: Some(field_descriptor_proto::Type::String.into()), + type_name: None, + extendee: None, + default_value: None, + oneof_index: None, + json_name: None, + options: None, + proto3_optional: None, + }], + extension: vec![], + nested_type: vec![], + enum_type: vec![], + extension_range: vec![], + oneof_decl: vec![], + options: None, + reserved_range: vec![], + reserved_name: vec![], + }; + return AppendRowsRequestBuilder::new(proto, buf) + } +} diff --git a/bigquery/src/storage_write/stream/pending.rs b/bigquery/src/storage_write/stream/pending.rs index 12904836..2eef4915 100644 --- a/bigquery/src/storage_write/stream/pending.rs +++ b/bigquery/src/storage_write/stream/pending.rs @@ -61,6 +61,10 @@ impl AsStream for PendingStream { fn as_mut(&mut self) -> &mut Stream { &mut self.inner } + + fn as_ref(&self) -> &Stream { + &self.inner + } } impl ManagedStream for PendingStream {} impl DisposableStream for PendingStream {} \ No newline at end of file From acc5807d9d8f55add276dd5bcfd742f6fd1fc744 Mon Sep 17 00:00:00 2001 From: yoshidan Date: Wed, 23 Oct 2024 15:51:03 +0900 Subject: [PATCH 16/23] add buffered test --- bigquery/src/grpc/apiv1/bigquery_client.rs | 6 ----- bigquery/src/storage_write/stream/buffered.rs | 27 ++++++++++++------- bigquery/src/storage_write/stream/mod.rs | 10 ++----- 3 files changed, 19 insertions(+), 24 deletions(-) diff --git a/bigquery/src/grpc/apiv1/bigquery_client.rs b/bigquery/src/grpc/apiv1/bigquery_client.rs index b64b3b30..310fc999 100644 --- a/bigquery/src/grpc/apiv1/bigquery_client.rs +++ b/bigquery/src/grpc/apiv1/bigquery_client.rs @@ -254,12 +254,6 @@ mod tests { pub col_string: String, } - #[ctor::ctor] - fn init() { - let filter = tracing_subscriber::filter::EnvFilter::from_default_env() - .add_directive("google_cloud_bigquery=trace".parse().unwrap()); - let _ = tracing_subscriber::fmt().with_env_filter(filter).try_init(); - } fn create_append_rows_request(name: &str, buf: Vec) -> AppendRowsRequest { AppendRowsRequest { diff --git a/bigquery/src/storage_write/stream/buffered.rs b/bigquery/src/storage_write/stream/buffered.rs index f5616ad8..47cc040d 100644 --- a/bigquery/src/storage_write/stream/buffered.rs +++ b/bigquery/src/storage_write/stream/buffered.rs @@ -52,13 +52,13 @@ impl DisposableStream for BufferedStream {} impl BufferedStream { - pub async fn flush_rows(&self) -> Result { + pub async fn flush_rows(&self, offset: Option) -> Result { let stream = self.as_ref(); let res = stream.cons.writer() .flush_rows( FlushRowsRequest{ write_stream: stream.inner.name.to_string(), - offset: None, + offset, }, None, ) @@ -79,7 +79,7 @@ mod tests { use prost::Message; use crate::client::{Client, ClientConfig}; use crate::storage_write::build_streaming_request; - use crate::storage_write::stream::{AsStream, ManagedStream}; + use crate::storage_write::stream::{AsStream, DisposableStream, ManagedStream}; use crate::storage_write::stream::tests::{create_append_rows_request, TestData}; #[ctor::ctor] @@ -91,18 +91,22 @@ mod tests { async fn test_storage_write() { let (config,project_id) = ClientConfig::new_with_auth().await.unwrap(); + let project_id = project_id.unwrap(); let client = Client::new(config).await.unwrap(); - let table = format!("projects/{}/datasets/gcrbq_storage/tables/write_test_buffered", project_id.unwrap()).to_string(); + let tables= [ + "write_test", + "write_test_1" + ]; let writer = client.buffered_storage_writer(); - // Create Pending Streams + // Create Streams let mut streams = vec![]; for i in 0..2 { - let stream = Arc::new(writer + let table = format!("projects/{}/datasets/gcrbq_storage/tables/{}", &project_id, tables[i % tables.len()]).to_string(); + let stream = writer .create_write_stream(&table) .await - .unwrap()); - streams.push(stream.clone()); + .unwrap(); streams.push(stream); } @@ -119,15 +123,18 @@ mod tests { data.encode(&mut buf).unwrap(); rows.push(create_append_rows_request(vec![buf])); } - + let size = rows.len() as i64; let request = build_streaming_request(stream.name(), rows); let mut result = stream.append_rows(request).await.unwrap(); while let Some(res) = result.next().await { let res = res?; tracing::info!("append row errors = {:?}", res.row_errors.len()); } - let result = stream.flush_rows().await.unwrap(); + let result = stream.flush_rows(Some(0)).await.unwrap(); tracing::info!("flush rows count = {:?}", result); + + let result = stream.finalize().await.unwrap(); + tracing::info!("finalized row count = {:?}", result); Ok(()) })); } diff --git a/bigquery/src/storage_write/stream/mod.rs b/bigquery/src/storage_write/stream/mod.rs index 043ad892..99b6e296 100644 --- a/bigquery/src/storage_write/stream/mod.rs +++ b/bigquery/src/storage_write/stream/mod.rs @@ -61,8 +61,8 @@ pub trait ManagedStream : AsStream { } pub trait DisposableStream : ManagedStream { - async fn finalize(mut self) -> Result { - let stream = self.as_mut(); + async fn finalize(&self) -> Result { + let stream = self.as_ref(); let res = stream .cons.writer() .finalize_write_stream( @@ -80,12 +80,6 @@ pub trait DisposableStream : ManagedStream { #[cfg(test)] mod tests { - use google_cloud_googleapis::cloud::bigquery::storage::v1::append_rows_request::{ProtoData, Rows}; - - use google_cloud_googleapis::cloud::bigquery::storage::v1::{ - AppendRowsRequest, BatchCommitWriteStreamsRequest, CreateWriteStreamRequest, FinalizeWriteStreamRequest, - ProtoRows, ProtoSchema, WriteStream, - }; use prost_types::{field_descriptor_proto, DescriptorProto, FieldDescriptorProto}; use crate::storage_write::AppendRowsRequestBuilder; From 4047a3726977f95f979f77f6d659cf6fa51355a0 Mon Sep 17 00:00:00 2001 From: yoshidan Date: Wed, 23 Oct 2024 16:20:27 +0900 Subject: [PATCH 17/23] add commited test --- bigquery/src/storage_write/stream/buffered.rs | 5 +- .../src/storage_write/stream/committed.rs | 73 ++++++++++++++++++- 2 files changed, 74 insertions(+), 4 deletions(-) diff --git a/bigquery/src/storage_write/stream/buffered.rs b/bigquery/src/storage_write/stream/buffered.rs index 47cc040d..ea36490c 100644 --- a/bigquery/src/storage_write/stream/buffered.rs +++ b/bigquery/src/storage_write/stream/buffered.rs @@ -117,13 +117,12 @@ mod tests { let mut rows = vec![]; for j in 0..5 { let data = TestData { - col_string: format!("stream_{i}_{j}"), + col_string: format!("buffered_{i}_{j}"), }; let mut buf = Vec::new(); data.encode(&mut buf).unwrap(); - rows.push(create_append_rows_request(vec![buf])); + rows.push(create_append_rows_request(vec![buf.clone(), buf])); } - let size = rows.len() as i64; let request = build_streaming_request(stream.name(), rows); let mut result = stream.append_rows(request).await.unwrap(); while let Some(res) = result.next().await { diff --git a/bigquery/src/storage_write/stream/committed.rs b/bigquery/src/storage_write/stream/committed.rs index 59bd7c59..9095210b 100644 --- a/bigquery/src/storage_write/stream/committed.rs +++ b/bigquery/src/storage_write/stream/committed.rs @@ -18,7 +18,7 @@ impl Writer { } } - pub async fn create_write_stream(&mut self, table: &str) -> Result { + pub async fn create_write_stream(&self, table: &str) -> Result { let req = create_write_stream_request(table, Committed); let stream = self.cm.writer().create_write_stream(req, None).await?.into_inner(); Ok(CommittedStream::new(Stream::new(stream, self.cm.clone(),self.max_insert_count))) @@ -47,3 +47,74 @@ impl AsStream for CommittedStream { } impl ManagedStream for CommittedStream {} impl DisposableStream for CommittedStream {} + + +#[cfg(test)] +mod tests { + use tokio::task::JoinHandle; + use google_cloud_gax::grpc::codegen::tokio_stream::StreamExt; + use google_cloud_gax::grpc::Status; + use prost::Message; + use crate::client::{Client, ClientConfig}; + use crate::storage_write::build_streaming_request; + use crate::storage_write::stream::{AsStream, DisposableStream, ManagedStream}; + use crate::storage_write::stream::tests::{create_append_rows_request, TestData}; + + #[ctor::ctor] + fn init() { + crate::storage_write::stream::tests::init(); + } + + #[tokio::test(flavor = "multi_thread", worker_threads = 4)] + async fn test_storage_write() { + let (config, project_id) = ClientConfig::new_with_auth().await.unwrap(); + let project_id = project_id.unwrap(); + let client = Client::new(config).await.unwrap(); + let tables = [ + "write_test", + "write_test_1" + ]; + let writer = client.committed_storage_writer(); + + // Create Streams + let mut streams = vec![]; + for i in 0..2 { + let table = format!("projects/{}/datasets/gcrbq_storage/tables/{}", &project_id, tables[i % tables.len()]).to_string(); + let stream = writer + .create_write_stream(&table) + .await + .unwrap(); + streams.push(stream); + } + + // Append Rows + let mut tasks: Vec>> = vec![]; + for (i, stream) in streams.into_iter().enumerate() { + tasks.push(tokio::spawn(async move { + let mut rows = vec![]; + for j in 0..5 { + let data = TestData { + col_string: format!("committed_{i}_{j}"), + }; + let mut buf = Vec::new(); + data.encode(&mut buf).unwrap(); + rows.push(create_append_rows_request(vec![buf.clone(), buf.clone(), buf])); + } + let request = build_streaming_request(stream.name(), rows); + let mut result = stream.append_rows(request).await.unwrap(); + while let Some(res) = result.next().await { + let res = res?; + tracing::info!("append row errors = {:?}", res.row_errors.len()); + } + let result = stream.finalize().await.unwrap(); + tracing::info!("finalized row count = {:?}", result); + Ok(()) + })); + } + + // Wait for append rows + for task in tasks { + task.await.unwrap().unwrap(); + } + } +} From 1d82d9f9e7a2a1c9f144d63ecf119d8b2dfc5381 Mon Sep 17 00:00:00 2001 From: yoshidan Date: Tue, 29 Oct 2024 10:34:49 +0900 Subject: [PATCH 18/23] add test --- bigquery/src/storage_write/stream/buffered.rs | 55 ++++++++++++++++++- .../src/storage_write/stream/committed.rs | 52 +++++++++++++++++- 2 files changed, 105 insertions(+), 2 deletions(-) diff --git a/bigquery/src/storage_write/stream/buffered.rs b/bigquery/src/storage_write/stream/buffered.rs index ea36490c..d662b8f7 100644 --- a/bigquery/src/storage_write/stream/buffered.rs +++ b/bigquery/src/storage_write/stream/buffered.rs @@ -87,7 +87,7 @@ mod tests { crate::storage_write::stream::tests::init(); } - #[tokio::test(flavor = "multi_thread", worker_threads = 4)] + #[tokio::test] async fn test_storage_write() { let (config,project_id) = ClientConfig::new_with_auth().await.unwrap(); @@ -144,4 +144,57 @@ mod tests { } } + + #[serial_test::serial] + #[tokio::test] + async fn test_storage_write_single_stream() { + + let (config,project_id) = ClientConfig::new_with_auth().await.unwrap(); + let project_id = project_id.unwrap(); + let client = Client::new(config).await.unwrap(); + let writer = client.buffered_storage_writer(); + + // Create Streams + let mut streams = vec![]; + let table = format!("projects/{}/datasets/gcrbq_storage/tables/write_test", &project_id).to_string(); + let stream = Arc::new(writer.create_write_stream(&table).await.unwrap()); + for i in 0..2 { + streams.push(stream.clone()); + } + + // Append Rows + let mut tasks: Vec>> = vec![]; + for (i, stream) in streams.into_iter().enumerate() { + tasks.push(tokio::spawn(async move { + let mut rows = vec![]; + for j in 0..5 { + let data = TestData { + col_string: format!("buffered_{i}_{j}"), + }; + let mut buf = Vec::new(); + data.encode(&mut buf).unwrap(); + rows.push(create_append_rows_request(vec![buf.clone(), buf])); + } + let request = build_streaming_request(stream.name(), rows); + let mut result = stream.append_rows(request).await.unwrap(); + while let Some(res) = result.next().await { + let res = res?; + tracing::info!("append row errors = {:?}", res.row_errors.len()); + } + Ok(()) + })); + } + + // Wait for append rows + for task in tasks { + task.await.unwrap().unwrap(); + } + + let result = stream.flush_rows(Some(0)).await.unwrap(); + tracing::info!("flush rows count = {:?}", result); + + let result = stream.finalize().await.unwrap(); + tracing::info!("finalized row count = {:?}", result); + + } } \ No newline at end of file diff --git a/bigquery/src/storage_write/stream/committed.rs b/bigquery/src/storage_write/stream/committed.rs index 9095210b..4c9f7051 100644 --- a/bigquery/src/storage_write/stream/committed.rs +++ b/bigquery/src/storage_write/stream/committed.rs @@ -51,6 +51,7 @@ impl DisposableStream for CommittedStream {} #[cfg(test)] mod tests { + use std::sync::Arc; use tokio::task::JoinHandle; use google_cloud_gax::grpc::codegen::tokio_stream::StreamExt; use google_cloud_gax::grpc::Status; @@ -65,7 +66,8 @@ mod tests { crate::storage_write::stream::tests::init(); } - #[tokio::test(flavor = "multi_thread", worker_threads = 4)] + #[serial_test::serial] + #[tokio::test] async fn test_storage_write() { let (config, project_id) = ClientConfig::new_with_auth().await.unwrap(); let project_id = project_id.unwrap(); @@ -117,4 +119,52 @@ mod tests { task.await.unwrap().unwrap(); } } + + #[serial_test::serial] + #[tokio::test] + async fn test_storage_write_single_stream() { + let (config, project_id) = ClientConfig::new_with_auth().await.unwrap(); + let project_id = project_id.unwrap(); + let client = Client::new(config).await.unwrap(); + let writer = client.committed_storage_writer(); + + // Create Streams + let mut streams = vec![]; + let table = format!("projects/{}/datasets/gcrbq_storage/tables/write_test", &project_id).to_string(); + let stream = Arc::new(writer.create_write_stream(&table).await.unwrap()); + for i in 0..2 { + streams.push(stream.clone()); + } + + // Append Rows + let mut tasks: Vec>> = vec![]; + for (i, stream) in streams.into_iter().enumerate() { + tasks.push(tokio::spawn(async move { + let mut rows = vec![]; + for j in 0..5 { + let data = TestData { + col_string: format!("committed_{i}_{j}"), + }; + let mut buf = Vec::new(); + data.encode(&mut buf).unwrap(); + rows.push(create_append_rows_request(vec![buf.clone(), buf.clone(), buf])); + } + let request = build_streaming_request(stream.name(), rows); + let mut result = stream.append_rows(request).await.unwrap(); + while let Some(res) = result.next().await { + let res = res?; + tracing::info!("append row errors = {:?}", res.row_errors.len()); + } + Ok(()) + })); + } + + // Wait for append rows + for task in tasks { + task.await.unwrap().unwrap(); + } + + let result = stream.finalize().await.unwrap(); + tracing::info!("finalized row count = {:?}", result); + } } From c710134fae92b4749bffa6df395cf5173c2c5fa5 Mon Sep 17 00:00:00 2001 From: yoshidan Date: Tue, 29 Oct 2024 10:56:56 +0900 Subject: [PATCH 19/23] add test --- bigquery/src/storage_write/stream/default.rs | 70 +++++++++- bigquery/src/storage_write/stream/pending.rs | 137 ++++++++++++++++++- 2 files changed, 203 insertions(+), 4 deletions(-) diff --git a/bigquery/src/storage_write/stream/default.rs b/bigquery/src/storage_write/stream/default.rs index 0e7105ca..bce9ad82 100644 --- a/bigquery/src/storage_write/stream/default.rs +++ b/bigquery/src/storage_write/stream/default.rs @@ -17,7 +17,7 @@ impl Writer { } } - pub async fn create_write_stream(&mut self, table: &str) -> Result { + pub async fn create_write_stream(&self, table: &str) -> Result { let stream = self.cm.writer().get_write_stream(GetWriteStreamRequest { name: format!("{table}/streams/_default"), ..Default::default() @@ -48,3 +48,71 @@ impl AsStream for DefaultStream { } impl ManagedStream for DefaultStream {} +#[cfg(test)] +mod tests { + use tokio::task::JoinHandle; + use google_cloud_gax::grpc::codegen::tokio_stream::StreamExt; + use google_cloud_gax::grpc::Status; + use prost::Message; + use crate::client::{Client, ClientConfig}; + use crate::storage_write::build_streaming_request; + use crate::storage_write::stream::{AsStream, DisposableStream, ManagedStream}; + use crate::storage_write::stream::tests::{create_append_rows_request, TestData}; + + #[ctor::ctor] + fn init() { + crate::storage_write::stream::tests::init(); + } + + #[serial_test::serial] + #[tokio::test] + async fn test_storage_write() { + let (config, project_id) = ClientConfig::new_with_auth().await.unwrap(); + let project_id = project_id.unwrap(); + let client = Client::new(config).await.unwrap(); + let tables = [ + "write_test", + "write_test_1" + ]; + let writer = client.default_storage_writer(); + + // Create Streams + let mut streams = vec![]; + for i in 0..2 { + let table = format!("projects/{}/datasets/gcrbq_storage/tables/{}", &project_id, tables[i % tables.len()]).to_string(); + let stream = writer + .create_write_stream(&table) + .await + .unwrap(); + streams.push(stream); + } + + // Append Rows + let mut tasks: Vec>> = vec![]; + for (i, stream) in streams.into_iter().enumerate() { + tasks.push(tokio::spawn(async move { + let mut rows = vec![]; + for j in 0..5 { + let data = TestData { + col_string: format!("default_{i}_{j}"), + }; + let mut buf = Vec::new(); + data.encode(&mut buf).unwrap(); + rows.push(create_append_rows_request(vec![buf.clone(), buf.clone(), buf])); + } + let request = build_streaming_request(stream.name(), rows); + let mut result = stream.append_rows(request).await.unwrap(); + while let Some(res) = result.next().await { + let res = res?; + tracing::info!("append row errors = {:?}", res.row_errors.len()); + } + Ok(()) + })); + } + + // Wait for append rows + for task in tasks { + task.await.unwrap().unwrap(); + } + } +} diff --git a/bigquery/src/storage_write/stream/pending.rs b/bigquery/src/storage_write/stream/pending.rs index 2eef4915..fd289499 100644 --- a/bigquery/src/storage_write/stream/pending.rs +++ b/bigquery/src/storage_write/stream/pending.rs @@ -33,12 +33,12 @@ impl Writer { Ok(PendingStream::new(Stream::new(stream, self.cm.clone(), self.max_insert_count))) } - pub async fn commit(self) -> Result { + pub async fn commit(&self) -> Result { let result = self.cm.writer() .batch_commit_write_streams( BatchCommitWriteStreamsRequest { parent: self.table.to_string(), - write_streams: self.streams, + write_streams: self.streams.clone(), }, None, ) @@ -67,4 +67,135 @@ impl AsStream for PendingStream { } } impl ManagedStream for PendingStream {} -impl DisposableStream for PendingStream {} \ No newline at end of file +impl DisposableStream for PendingStream {} + +#[cfg(test)] +mod tests { + use std::sync::Arc; + use tokio::task::JoinHandle; + use google_cloud_gax::grpc::codegen::tokio_stream::StreamExt; + use google_cloud_gax::grpc::Status; + use prost::Message; + use crate::client::{Client, ClientConfig}; + use crate::storage_write::build_streaming_request; + use crate::storage_write::stream::{AsStream, DisposableStream, ManagedStream}; + use crate::storage_write::stream::tests::{create_append_rows_request, TestData}; + + #[ctor::ctor] + fn init() { + crate::storage_write::stream::tests::init(); + } + + #[serial_test::serial] + #[tokio::test] + async fn test_storage_write() { + let (config, project_id) = ClientConfig::new_with_auth().await.unwrap(); + let project_id = project_id.unwrap(); + let client = Client::new(config).await.unwrap(); + let tables = [ + "write_test", + "write_test_1" + ]; + + // Create Writers + let mut writers = vec![]; + for i in 0..2 { + let table = format!("projects/{}/datasets/gcrbq_storage/tables/{}", &project_id, tables[i % tables.len()]).to_string(); + let writer = client.pending_storage_writer(table); + writers.push(writer); + } + + // Create Streams + let mut streams = vec![]; + for writer in writers.iter_mut() { + let stream = writer.create_write_stream().await.unwrap(); + streams.push(stream); + } + + // Append Rows + let mut tasks: Vec>> = vec![]; + for (i, stream) in streams.into_iter().enumerate() { + tasks.push(tokio::spawn(async move { + let mut rows = vec![]; + for j in 0..5 { + let data = TestData { + col_string: format!("pending_{i}_{j}"), + }; + let mut buf = Vec::new(); + data.encode(&mut buf).unwrap(); + rows.push(create_append_rows_request(vec![buf.clone(), buf.clone(), buf])); + } + let request = build_streaming_request(stream.name(), rows); + let mut result = stream.append_rows(request).await.unwrap(); + while let Some(res) = result.next().await { + let res = res?; + tracing::info!("append row errors = {:?}", res.row_errors.len()); + } + let result = stream.finalize().await.unwrap(); + tracing::info!("finalized row count = {:?}", result); + Ok(()) + })); + } + + // Wait for append rows + for task in tasks { + task.await.unwrap().unwrap(); + } + + for writer in writers.iter_mut() { + let result = writer.commit().await.unwrap(); + tracing::info!("committed error count = {:?}", result.stream_errors.len()); + } + } + + #[serial_test::serial] + #[tokio::test] + async fn test_storage_write_single_stream() { + let (config, project_id) = ClientConfig::new_with_auth().await.unwrap(); + let project_id = project_id.unwrap(); + let client = Client::new(config).await.unwrap(); + + // Create Streams + let mut streams = vec![]; + let table = format!("projects/{}/datasets/gcrbq_storage/tables/write_test", &project_id).to_string(); + let mut writer = client.pending_storage_writer(table); + let stream = Arc::new(writer.create_write_stream().await.unwrap()); + for i in 0..2 { + streams.push(stream.clone()); + } + + // Append Rows + let mut tasks: Vec>> = vec![]; + for (i, stream) in streams.into_iter().enumerate() { + tasks.push(tokio::spawn(async move { + let mut rows = vec![]; + for j in 0..5 { + let data = TestData { + col_string: format!("pending_{i}_{j}"), + }; + let mut buf = Vec::new(); + data.encode(&mut buf).unwrap(); + rows.push(create_append_rows_request(vec![buf.clone(), buf.clone(), buf])); + } + let request = build_streaming_request(stream.name(), rows); + let mut result = stream.append_rows(request).await.unwrap(); + while let Some(res) = result.next().await { + let res = res?; + tracing::info!("append row errors = {:?}", res.row_errors.len()); + } + Ok(()) + })); + } + + // Wait for append rows + for task in tasks { + task.await.unwrap().unwrap(); + } + + let result = stream.finalize().await.unwrap(); + tracing::info!("finalized row count = {:?}", result); + + let result = writer.commit().await.unwrap(); + tracing::info!("commit error count = {:?}", result.stream_errors.len()); + } +} From fb2236d96cf230a881ade795156a104094672bce Mon Sep 17 00:00:00 2001 From: yoshidan Date: Tue, 29 Oct 2024 17:47:55 +0900 Subject: [PATCH 20/23] remove unused --- bigquery/Cargo.toml | 4 +- bigquery/src/grpc/apiv1/bigquery_client.rs | 120 ------------------ bigquery/src/storage_write/stream/buffered.rs | 3 - .../src/storage_write/stream/committed.rs | 3 - bigquery/src/storage_write/stream/default.rs | 3 - bigquery/src/storage_write/stream/mod.rs | 1 - bigquery/src/storage_write/stream/pending.rs | 4 - 7 files changed, 2 insertions(+), 136 deletions(-) diff --git a/bigquery/Cargo.toml b/bigquery/Cargo.toml index cb727db3..7b0d521a 100644 --- a/bigquery/Cargo.toml +++ b/bigquery/Cargo.toml @@ -29,6 +29,8 @@ num-bigint = "0.4" backon = "0.4" reqwest-middleware = { version = "0.3", features = ["json", "multipart"] } anyhow = "1.0" +async-stream = "0.3" +prost-types = "0.13" google-cloud-auth = { optional = true, version = "0.17", path="../foundation/auth", default-features=false } @@ -40,9 +42,7 @@ ctor = "0.1.26" tokio-util = {version ="0.7", features = ["codec"] } google-cloud-auth = { path = "../foundation/auth", default-features=false } base64-serde = "0.7" -async-stream = "0.3" prost = "0.13" -prost-types = "0.13" [features] default = ["default-tls", "auth"] diff --git a/bigquery/src/grpc/apiv1/bigquery_client.rs b/bigquery/src/grpc/apiv1/bigquery_client.rs index 310fc999..11165954 100644 --- a/bigquery/src/grpc/apiv1/bigquery_client.rs +++ b/bigquery/src/grpc/apiv1/bigquery_client.rs @@ -254,7 +254,6 @@ mod tests { pub col_string: String, } - fn create_append_rows_request(name: &str, buf: Vec) -> AppendRowsRequest { AppendRowsRequest { write_stream: name.to_string(), @@ -295,123 +294,4 @@ mod tests { })), } } - - #[tokio::test(flavor = "multi_thread", worker_threads = 4)] - async fn test_storage_write() { - let config = google_cloud_auth::project::Config::default() - .with_audience(AUDIENCE) - .with_scopes(&SCOPES); - let ts = google_cloud_auth::token::DefaultTokenSourceProvider::new(config) - .await - .unwrap(); - let conn = ConnectionManager::new(1, &Environment::GoogleCloud(Box::new(ts)), &Default::default()) - .await - .unwrap(); - - let mut client = StreamingWriteClient::new(BigQueryWriteClient::new(conn.conn())); - - let table = "projects/atl-dev1/datasets/gcrbq_storage/tables/write_test".to_string(); - - // Create Pending Streams - let mut pending_streams = vec![]; - for i in 0..4 { - let pending_stream = client - .create_write_stream(create_write_stream_request(&table, Pending), None) - .await - .unwrap() - .into_inner(); - tracing::info!("stream = {:?}", pending_stream.name); - pending_streams.push(pending_stream); - } - - let stream_names = pending_streams - .iter() - .map(|s| s.name.to_string()) - .collect::>(); - - // Append Rows - let mut tasks: Vec>> = vec![]; - for (i, pending_stream) in pending_streams.into_iter().enumerate() { - let mut client = StreamingWriteClient::new(BigQueryWriteClient::new(conn.conn())); - tasks.push(tokio::spawn(async move { - let mut rows = vec![]; - for j in 0..5 { - let data = TestData { - col_string: format!("stream_{i}_{j}"), - }; - let mut buf = Vec::new(); - data.encode(&mut buf).unwrap(); - - let row = create_append_rows_request(&pending_stream.name, buf); - rows.push(row); - } - - let request = Box::pin(async_stream::stream! { - for req in rows { - yield req; - } - }); - let mut result = client.append_rows(request).await?.into_inner(); - while let Some(res) = result.next().await { - let res = res?; - tracing::info!("append row errors = {:?}", res.row_errors.len()); - } - Ok(()) - })); - } - - // Wait for append rows - for task in tasks { - task.await.unwrap().unwrap(); - } - - // Finalize streams - for pending_stream in &stream_names { - let mut client = StreamingWriteClient::new(BigQueryWriteClient::new(conn.conn())); - let res = client - .finalize_write_stream( - FinalizeWriteStreamRequest { - name: pending_stream.to_string(), - }, - None, - ) - .await - .unwrap() - .into_inner(); - tracing::info!("finalized = {:?}", res.row_count); - } - - // Commit - let mut client = StreamingWriteClient::new(BigQueryWriteClient::new(conn.conn())); - let res = client - .batch_commit_write_streams( - BatchCommitWriteStreamsRequest { - parent: table.to_string(), - write_streams: stream_names.iter().map(|s| s.to_string()).collect(), - }, - None, - ) - .await - .unwrap() - .into_inner(); - tracing::info!("commit stream errors = {:?}", res.stream_errors.len()); - - // Write via default stream - let data = TestData { - col_string: format!("default_stream"), - }; - let mut buf = Vec::new(); - data.encode(&mut buf).unwrap(); - let row = create_append_rows_request(&format!("{table}/streams/_default"), buf); - let request = Box::pin(async_stream::stream! { - for req in [row]{ - yield req; - } - }); - let mut response = client.append_rows(request).await.unwrap().into_inner(); - while let Some(res) = response.next().await { - let res = res.unwrap(); - tracing::info!("default append row errors = {:?}", res.row_errors.len()); - } - } } diff --git a/bigquery/src/storage_write/stream/buffered.rs b/bigquery/src/storage_write/stream/buffered.rs index d662b8f7..1e50345c 100644 --- a/bigquery/src/storage_write/stream/buffered.rs +++ b/bigquery/src/storage_write/stream/buffered.rs @@ -40,9 +40,6 @@ impl BufferedStream { } impl AsStream for BufferedStream { - fn as_mut(&mut self) -> &mut Stream { - &mut self.inner - } fn as_ref(&self) -> &Stream { &self.inner } diff --git a/bigquery/src/storage_write/stream/committed.rs b/bigquery/src/storage_write/stream/committed.rs index 4c9f7051..fe4cca21 100644 --- a/bigquery/src/storage_write/stream/committed.rs +++ b/bigquery/src/storage_write/stream/committed.rs @@ -38,9 +38,6 @@ impl CommittedStream { } impl AsStream for CommittedStream { - fn as_mut(&mut self) -> &mut Stream { - &mut self.inner - } fn as_ref(&self) -> &Stream { &self.inner } diff --git a/bigquery/src/storage_write/stream/default.rs b/bigquery/src/storage_write/stream/default.rs index bce9ad82..0b2551f9 100644 --- a/bigquery/src/storage_write/stream/default.rs +++ b/bigquery/src/storage_write/stream/default.rs @@ -39,9 +39,6 @@ impl DefaultStream { } impl AsStream for DefaultStream { - fn as_mut(&mut self) -> &mut Stream { - &mut self.inner - } fn as_ref(&self) -> &Stream { &self.inner } diff --git a/bigquery/src/storage_write/stream/mod.rs b/bigquery/src/storage_write/stream/mod.rs index 99b6e296..99c6cdb0 100644 --- a/bigquery/src/storage_write/stream/mod.rs +++ b/bigquery/src/storage_write/stream/mod.rs @@ -32,7 +32,6 @@ impl Stream { } pub(crate) trait AsStream : Sized { - fn as_mut(&mut self) -> &mut Stream; fn as_ref(&self) -> &Stream; fn name(&self) -> &str { diff --git a/bigquery/src/storage_write/stream/pending.rs b/bigquery/src/storage_write/stream/pending.rs index fd289499..5320c5fa 100644 --- a/bigquery/src/storage_write/stream/pending.rs +++ b/bigquery/src/storage_write/stream/pending.rs @@ -58,10 +58,6 @@ impl PendingStream { } impl AsStream for PendingStream { - fn as_mut(&mut self) -> &mut Stream { - &mut self.inner - } - fn as_ref(&self) -> &Stream { &self.inner } From 0dd8beaeb5fa5c88c74c3e92d536360dc32fe6ba Mon Sep 17 00:00:00 2001 From: yoshidan Date: Tue, 29 Oct 2024 21:40:36 +0900 Subject: [PATCH 21/23] refactor --- bigquery/src/client.rs | 44 +++++++---- bigquery/src/grpc/apiv1/bigquery_client.rs | 14 +--- bigquery/src/grpc/apiv1/conn_pool.rs | 2 +- bigquery/src/storage_write/flow.rs | 9 +-- bigquery/src/storage_write/mod.rs | 13 +--- bigquery/src/storage_write/stream/buffered.rs | 73 ++++++++----------- .../src/storage_write/stream/committed.rs | 58 +++++++-------- bigquery/src/storage_write/stream/default.rs | 60 +++++++-------- bigquery/src/storage_write/stream/mod.rs | 67 +++++++++++------ bigquery/src/storage_write/stream/pending.rs | 52 ++++++------- 10 files changed, 196 insertions(+), 196 deletions(-) diff --git a/bigquery/src/client.rs b/bigquery/src/client.rs index 5d23ea81..8b35cde1 100644 --- a/bigquery/src/client.rs +++ b/bigquery/src/client.rs @@ -8,7 +8,7 @@ use std::sync::Arc; use google_cloud_gax::conn::{ConnectionOptions, Environment}; use google_cloud_gax::retry::RetrySetting; use google_cloud_googleapis::cloud::bigquery::storage::v1::{ - CreateReadSessionRequest, DataFormat, read_session, ReadSession, + read_session, CreateReadSessionRequest, DataFormat, ReadSession, }; use google_cloud_token::TokenSourceProvider; @@ -45,7 +45,7 @@ pub struct ClientConfig { #[derive(Clone, Debug)] pub struct StreamingWriteConfig { channel_config: ChannelConfig, - max_insert_count: usize + max_insert_count: usize, } impl StreamingWriteConfig { @@ -63,10 +63,9 @@ impl Default for StreamingWriteConfig { fn default() -> Self { Self { channel_config: ChannelConfig::default(), - max_insert_count: 1000 + max_insert_count: 1000, } } - } #[derive(Clone, Debug)] @@ -91,11 +90,19 @@ impl ChannelConfig { self } - async fn into_connection_manager(self, environment: &Environment) -> Result { - ConnectionManager::new(self.num_channels, environment, &ConnectionOptions { - timeout: self.timeout, - connect_timeout: self.connect_timeout, - }).await + async fn into_connection_manager( + self, + environment: &Environment, + ) -> Result { + ConnectionManager::new( + self.num_channels, + environment, + &ConnectionOptions { + timeout: self.timeout, + connect_timeout: self.connect_timeout, + }, + ) + .await } } @@ -150,10 +157,10 @@ use crate::http::job::get::GetJobRequest; use crate::http::job::list::ListJobsRequest; use crate::grpc::apiv1::bigquery_client::StreamingReadClient; +use crate::storage_write::stream::{buffered, committed, default, pending}; #[cfg(feature = "auth")] pub use google_cloud_auth; use google_cloud_googleapis::cloud::bigquery::storage::v1::big_query_read_client::BigQueryReadClient; -use crate::storage_write::stream::{buffered, committed, default, pending}; #[cfg(feature = "auth")] impl ClientConfig { @@ -245,9 +252,20 @@ impl Client { routine_client: BigqueryRoutineClient::new(client.clone()), row_access_policy_client: BigqueryRowAccessPolicyClient::new(client.clone()), model_client: BigqueryModelClient::new(client.clone()), - streaming_read_conn_pool: Arc::new(config.streaming_read_config.into_connection_manager(&config.environment).await?), - streaming_write_conn_pool: Arc::new(config.streaming_write_config.channel_config.into_connection_manager(&config.environment).await?), - stereaming_write_max_insert_count: config.streaming_write_config.max_insert_count + streaming_read_conn_pool: Arc::new( + config + .streaming_read_config + .into_connection_manager(&config.environment) + .await?, + ), + streaming_write_conn_pool: Arc::new( + config + .streaming_write_config + .channel_config + .into_connection_manager(&config.environment) + .await?, + ), + stereaming_write_max_insert_count: config.streaming_write_config.max_insert_count, }) } diff --git a/bigquery/src/grpc/apiv1/bigquery_client.rs b/bigquery/src/grpc/apiv1/bigquery_client.rs index 11165954..bdb12018 100644 --- a/bigquery/src/grpc/apiv1/bigquery_client.rs +++ b/bigquery/src/grpc/apiv1/bigquery_client.rs @@ -231,22 +231,12 @@ pub(crate) fn create_write_stream_request(table: &str, write_type: Type) -> Crea #[cfg(test)] mod tests { - use crate::grpc::apiv1::bigquery_client::{create_write_stream_request, StreamingWriteClient}; - use crate::grpc::apiv1::conn_pool::{ConnectionManager, AUDIENCE, SCOPES}; - use google_cloud_gax::conn::Environment; - use google_cloud_gax::grpc::codegen::tokio_stream::StreamExt; - use google_cloud_gax::grpc::Status; + use google_cloud_googleapis::cloud::bigquery::storage::v1::append_rows_request::{ProtoData, Rows}; - use google_cloud_googleapis::cloud::bigquery::storage::v1::big_query_write_client::BigQueryWriteClient; - use google_cloud_googleapis::cloud::bigquery::storage::v1::write_stream::Type::Pending; - use google_cloud_googleapis::cloud::bigquery::storage::v1::{ - AppendRowsRequest, BatchCommitWriteStreamsRequest, CreateWriteStreamRequest, FinalizeWriteStreamRequest, - ProtoRows, ProtoSchema, WriteStream, - }; + use google_cloud_googleapis::cloud::bigquery::storage::v1::{AppendRowsRequest, ProtoRows, ProtoSchema}; use prost::Message; use prost_types::{field_descriptor_proto, DescriptorProto, FieldDescriptorProto}; - use tokio::task::JoinHandle; #[derive(Clone, PartialEq, ::prost::Message)] struct TestData { diff --git a/bigquery/src/grpc/apiv1/conn_pool.rs b/bigquery/src/grpc/apiv1/conn_pool.rs index c76b2d14..6faeb8e4 100644 --- a/bigquery/src/grpc/apiv1/conn_pool.rs +++ b/bigquery/src/grpc/apiv1/conn_pool.rs @@ -1,8 +1,8 @@ +use crate::grpc::apiv1::bigquery_client::StreamingWriteClient; use google_cloud_gax::conn::{ Channel, ConnectionManager as GRPCConnectionManager, ConnectionOptions, Environment, Error, }; use google_cloud_googleapis::cloud::bigquery::storage::v1::big_query_write_client::BigQueryWriteClient; -use crate::grpc::apiv1::bigquery_client::StreamingWriteClient; pub const AUDIENCE: &str = "https://bigquerystorage.googleapis.com/"; pub const DOMAIN: &str = "bigquerystorage.googleapis.com"; diff --git a/bigquery/src/storage_write/flow.rs b/bigquery/src/storage_write/flow.rs index dfe1c489..a9b39bd5 100644 --- a/bigquery/src/storage_write/flow.rs +++ b/bigquery/src/storage_write/flow.rs @@ -2,19 +2,16 @@ use tokio::sync::{Semaphore, SemaphorePermit}; #[derive(Debug)] pub struct FlowController { - sem_insert_count: Semaphore - //TODO support sem_insert_bytes + sem_insert_count: Semaphore, //TODO support sem_insert_bytes } impl FlowController { - pub fn new(max_insert_count: usize) -> Self { FlowController { - sem_insert_count: Semaphore::new(max_insert_count) + sem_insert_count: Semaphore::new(max_insert_count), } } pub async fn acquire(&self) -> SemaphorePermit { self.sem_insert_count.acquire().await.unwrap() } - -} \ No newline at end of file +} diff --git a/bigquery/src/storage_write/mod.rs b/bigquery/src/storage_write/mod.rs index 6d0d16fd..1f218f32 100644 --- a/bigquery/src/storage_write/mod.rs +++ b/bigquery/src/storage_write/mod.rs @@ -1,8 +1,8 @@ +use google_cloud_gax::grpc::codegen::tokio_stream::Stream; use google_cloud_googleapis::cloud::bigquery::storage::v1::append_rows_request::{ProtoData, Rows}; use google_cloud_googleapis::cloud::bigquery::storage::v1::{AppendRowsRequest, ProtoRows, ProtoSchema}; use prost_types::DescriptorProto; use std::collections::HashMap; -use google_cloud_gax::grpc::codegen::tokio_stream::Stream; mod flow; pub mod stream; @@ -67,16 +67,7 @@ impl AppendRowsRequestBuilder { } } -pub fn build_streaming_request(name: &str, rows: Vec) -> impl Stream{ - let name = name.to_string(); - async_stream::stream! { - for row in rows { - yield row.build(&name); - } - } -} - -pub fn into_streaming_request(rows: Vec) -> impl Stream{ +pub fn into_streaming_request(rows: Vec) -> impl Stream { async_stream::stream! { for row in rows { yield row; diff --git a/bigquery/src/storage_write/stream/buffered.rs b/bigquery/src/storage_write/stream/buffered.rs index 1e50345c..db0ab621 100644 --- a/bigquery/src/storage_write/stream/buffered.rs +++ b/bigquery/src/storage_write/stream/buffered.rs @@ -1,13 +1,10 @@ -use crate::grpc::apiv1::conn_pool::{AUDIENCE, ConnectionManager, SCOPES}; -use google_cloud_gax::grpc::{IntoStreamingRequest, Status, Streaming}; -use google_cloud_googleapis::cloud::bigquery::storage::v1::write_stream::Type::{Buffered, Committed, Pending}; -use google_cloud_googleapis::cloud::bigquery::storage::v1::{AppendRowsRequest, AppendRowsResponse, BatchCommitWriteStreamsRequest, BatchCommitWriteStreamsResponse, CreateWriteStreamRequest, FinalizeWriteStreamRequest, FlushRowsRequest, WriteStream}; -use std::sync::Arc; -use tokio::task::JoinHandle; -use google_cloud_gax::conn::Environment; -use google_cloud_googleapis::cloud::bigquery::storage::v1::big_query_write_client::BigQueryWriteClient; -use crate::grpc::apiv1::bigquery_client::{create_write_stream_request, StreamingWriteClient}; +use crate::grpc::apiv1::bigquery_client::create_write_stream_request; +use crate::grpc::apiv1::conn_pool::ConnectionManager; use crate::storage_write::stream::{AsStream, DisposableStream, ManagedStream, Stream}; +use google_cloud_gax::grpc::Status; +use google_cloud_googleapis::cloud::bigquery::storage::v1::write_stream::Type::Buffered; +use google_cloud_googleapis::cloud::bigquery::storage::v1::FlushRowsRequest; +use std::sync::Arc; pub struct Writer { max_insert_count: usize, @@ -16,10 +13,7 @@ pub struct Writer { impl Writer { pub(crate) fn new(max_insert_count: usize, cm: Arc) -> Self { - Self { - max_insert_count, - cm, - } + Self { max_insert_count, cm } } pub async fn create_write_stream(&self, table: &str) -> Result { @@ -27,10 +21,9 @@ impl Writer { let stream = self.cm.writer().create_write_stream(req, None).await?.into_inner(); Ok(BufferedStream::new(Stream::new(stream, self.cm.clone(), self.max_insert_count))) } - } pub struct BufferedStream { - inner: Stream + inner: Stream, } impl BufferedStream { @@ -48,12 +41,13 @@ impl ManagedStream for BufferedStream {} impl DisposableStream for BufferedStream {} impl BufferedStream { - pub async fn flush_rows(&self, offset: Option) -> Result { let stream = self.as_ref(); - let res = stream.cons.writer() + let res = stream + .cons + .writer() .flush_rows( - FlushRowsRequest{ + FlushRowsRequest { write_stream: stream.inner.name.to_string(), offset, }, @@ -63,21 +57,19 @@ impl BufferedStream { .into_inner(); Ok(res.offset) } - } #[cfg(test)] mod tests { use std::sync::Arc; use tokio::task::JoinHandle; - use google_cloud_gax::create_request; + + use crate::client::{Client, ClientConfig}; + use crate::storage_write::stream::tests::{create_append_rows_request, TestData}; + use crate::storage_write::stream::{DisposableStream, ManagedStream}; use google_cloud_gax::grpc::codegen::tokio_stream::StreamExt; use google_cloud_gax::grpc::Status; use prost::Message; - use crate::client::{Client, ClientConfig}; - use crate::storage_write::build_streaming_request; - use crate::storage_write::stream::{AsStream, DisposableStream, ManagedStream}; - use crate::storage_write::stream::tests::{create_append_rows_request, TestData}; #[ctor::ctor] fn init() { @@ -86,24 +78,22 @@ mod tests { #[tokio::test] async fn test_storage_write() { - - let (config,project_id) = ClientConfig::new_with_auth().await.unwrap(); + let (config, project_id) = ClientConfig::new_with_auth().await.unwrap(); let project_id = project_id.unwrap(); let client = Client::new(config).await.unwrap(); - let tables= [ - "write_test", - "write_test_1" - ]; + let tables = ["write_test", "write_test_1"]; let writer = client.buffered_storage_writer(); // Create Streams let mut streams = vec![]; for i in 0..2 { - let table = format!("projects/{}/datasets/gcrbq_storage/tables/{}", &project_id, tables[i % tables.len()]).to_string(); - let stream = writer - .create_write_stream(&table) - .await - .unwrap(); + let table = format!( + "projects/{}/datasets/gcrbq_storage/tables/{}", + &project_id, + tables[i % tables.len()] + ) + .to_string(); + let stream = writer.create_write_stream(&table).await.unwrap(); streams.push(stream); } @@ -120,8 +110,7 @@ mod tests { data.encode(&mut buf).unwrap(); rows.push(create_append_rows_request(vec![buf.clone(), buf])); } - let request = build_streaming_request(stream.name(), rows); - let mut result = stream.append_rows(request).await.unwrap(); + let mut result = stream.append_rows(rows).await.unwrap(); while let Some(res) = result.next().await { let res = res?; tracing::info!("append row errors = {:?}", res.row_errors.len()); @@ -139,14 +128,12 @@ mod tests { for task in tasks { task.await.unwrap().unwrap(); } - } #[serial_test::serial] #[tokio::test] async fn test_storage_write_single_stream() { - - let (config,project_id) = ClientConfig::new_with_auth().await.unwrap(); + let (config, project_id) = ClientConfig::new_with_auth().await.unwrap(); let project_id = project_id.unwrap(); let client = Client::new(config).await.unwrap(); let writer = client.buffered_storage_writer(); @@ -172,8 +159,7 @@ mod tests { data.encode(&mut buf).unwrap(); rows.push(create_append_rows_request(vec![buf.clone(), buf])); } - let request = build_streaming_request(stream.name(), rows); - let mut result = stream.append_rows(request).await.unwrap(); + let mut result = stream.append_rows(rows).await.unwrap(); while let Some(res) = result.next().await { let res = res?; tracing::info!("append row errors = {:?}", res.row_errors.len()); @@ -192,6 +178,5 @@ mod tests { let result = stream.finalize().await.unwrap(); tracing::info!("finalized row count = {:?}", result); - } -} \ No newline at end of file +} diff --git a/bigquery/src/storage_write/stream/committed.rs b/bigquery/src/storage_write/stream/committed.rs index fe4cca21..0bc21b63 100644 --- a/bigquery/src/storage_write/stream/committed.rs +++ b/bigquery/src/storage_write/stream/committed.rs @@ -1,9 +1,9 @@ -use crate::grpc::apiv1::bigquery_client::{create_write_stream_request, StreamingWriteClient}; +use crate::grpc::apiv1::bigquery_client::create_write_stream_request; use crate::grpc::apiv1::conn_pool::ConnectionManager; -use google_cloud_gax::grpc::{IntoStreamingRequest, Status, Streaming}; -use google_cloud_googleapis::cloud::bigquery::storage::v1::write_stream::Type::{Buffered, Committed}; +use crate::storage_write::stream::{AsStream, DisposableStream, ManagedStream, Stream}; +use google_cloud_gax::grpc::Status; +use google_cloud_googleapis::cloud::bigquery::storage::v1::write_stream::Type::Committed; use std::sync::Arc; -use crate::storage_write::stream::{ AsStream, DisposableStream, ManagedStream, Stream}; pub struct Writer { max_insert_count: usize, @@ -12,29 +12,28 @@ pub struct Writer { impl Writer { pub(crate) fn new(max_insert_count: usize, cm: Arc) -> Self { - Self { - max_insert_count, - cm, - } + Self { max_insert_count, cm } } pub async fn create_write_stream(&self, table: &str) -> Result { let req = create_write_stream_request(table, Committed); let stream = self.cm.writer().create_write_stream(req, None).await?.into_inner(); - Ok(CommittedStream::new(Stream::new(stream, self.cm.clone(),self.max_insert_count))) + Ok(CommittedStream::new(Stream::new( + stream, + self.cm.clone(), + self.max_insert_count, + ))) } - } pub struct CommittedStream { - inner: Stream + inner: Stream, } impl CommittedStream { pub(crate) fn new(inner: Stream) -> Self { Self { inner } } - } impl AsStream for CommittedStream { @@ -45,18 +44,16 @@ impl AsStream for CommittedStream { impl ManagedStream for CommittedStream {} impl DisposableStream for CommittedStream {} - #[cfg(test)] mod tests { - use std::sync::Arc; - use tokio::task::JoinHandle; + use crate::client::{Client, ClientConfig}; + use crate::storage_write::stream::tests::{create_append_rows_request, TestData}; + use crate::storage_write::stream::{DisposableStream, ManagedStream}; use google_cloud_gax::grpc::codegen::tokio_stream::StreamExt; use google_cloud_gax::grpc::Status; use prost::Message; - use crate::client::{Client, ClientConfig}; - use crate::storage_write::build_streaming_request; - use crate::storage_write::stream::{AsStream, DisposableStream, ManagedStream}; - use crate::storage_write::stream::tests::{create_append_rows_request, TestData}; + use std::sync::Arc; + use tokio::task::JoinHandle; #[ctor::ctor] fn init() { @@ -69,20 +66,19 @@ mod tests { let (config, project_id) = ClientConfig::new_with_auth().await.unwrap(); let project_id = project_id.unwrap(); let client = Client::new(config).await.unwrap(); - let tables = [ - "write_test", - "write_test_1" - ]; + let tables = ["write_test", "write_test_1"]; let writer = client.committed_storage_writer(); // Create Streams let mut streams = vec![]; for i in 0..2 { - let table = format!("projects/{}/datasets/gcrbq_storage/tables/{}", &project_id, tables[i % tables.len()]).to_string(); - let stream = writer - .create_write_stream(&table) - .await - .unwrap(); + let table = format!( + "projects/{}/datasets/gcrbq_storage/tables/{}", + &project_id, + tables[i % tables.len()] + ) + .to_string(); + let stream = writer.create_write_stream(&table).await.unwrap(); streams.push(stream); } @@ -99,8 +95,7 @@ mod tests { data.encode(&mut buf).unwrap(); rows.push(create_append_rows_request(vec![buf.clone(), buf.clone(), buf])); } - let request = build_streaming_request(stream.name(), rows); - let mut result = stream.append_rows(request).await.unwrap(); + let mut result = stream.append_rows(rows).await.unwrap(); while let Some(res) = result.next().await { let res = res?; tracing::info!("append row errors = {:?}", res.row_errors.len()); @@ -146,8 +141,7 @@ mod tests { data.encode(&mut buf).unwrap(); rows.push(create_append_rows_request(vec![buf.clone(), buf.clone(), buf])); } - let request = build_streaming_request(stream.name(), rows); - let mut result = stream.append_rows(request).await.unwrap(); + let mut result = stream.append_rows(rows).await.unwrap(); while let Some(res) = result.next().await { let res = res?; tracing::info!("append row errors = {:?}", res.row_errors.len()); diff --git a/bigquery/src/storage_write/stream/default.rs b/bigquery/src/storage_write/stream/default.rs index 0b2551f9..bac78e79 100644 --- a/bigquery/src/storage_write/stream/default.rs +++ b/bigquery/src/storage_write/stream/default.rs @@ -1,8 +1,8 @@ use crate::grpc::apiv1::conn_pool::ConnectionManager; -use google_cloud_gax::grpc::{Status, }; -use std::sync::Arc; -use google_cloud_googleapis::cloud::bigquery::storage::v1::GetWriteStreamRequest; use crate::storage_write::stream::{AsStream, ManagedStream, Stream}; +use google_cloud_gax::grpc::Status; +use google_cloud_googleapis::cloud::bigquery::storage::v1::GetWriteStreamRequest; +use std::sync::Arc; pub struct Writer { max_insert_count: usize, @@ -11,31 +11,34 @@ pub struct Writer { impl Writer { pub(crate) fn new(max_insert_count: usize, cm: Arc) -> Self { - Self { - max_insert_count, - cm, - } + Self { max_insert_count, cm } } pub async fn create_write_stream(&self, table: &str) -> Result { - let stream = self.cm.writer().get_write_stream(GetWriteStreamRequest { - name: format!("{table}/streams/_default"), - ..Default::default() - }, None).await?.into_inner(); + let stream = self + .cm + .writer() + .get_write_stream( + GetWriteStreamRequest { + name: format!("{table}/streams/_default"), + ..Default::default() + }, + None, + ) + .await? + .into_inner(); Ok(DefaultStream::new(Stream::new(stream, self.cm.clone(), self.max_insert_count))) } } - pub struct DefaultStream { - inner: Stream + inner: Stream, } impl DefaultStream { pub(crate) fn new(inner: Stream) -> Self { Self { inner } } - } impl AsStream for DefaultStream { @@ -47,14 +50,13 @@ impl ManagedStream for DefaultStream {} #[cfg(test)] mod tests { - use tokio::task::JoinHandle; + use crate::client::{Client, ClientConfig}; + use crate::storage_write::stream::tests::{create_append_rows_request, TestData}; + use crate::storage_write::stream::ManagedStream; use google_cloud_gax::grpc::codegen::tokio_stream::StreamExt; use google_cloud_gax::grpc::Status; use prost::Message; - use crate::client::{Client, ClientConfig}; - use crate::storage_write::build_streaming_request; - use crate::storage_write::stream::{AsStream, DisposableStream, ManagedStream}; - use crate::storage_write::stream::tests::{create_append_rows_request, TestData}; + use tokio::task::JoinHandle; #[ctor::ctor] fn init() { @@ -67,20 +69,19 @@ mod tests { let (config, project_id) = ClientConfig::new_with_auth().await.unwrap(); let project_id = project_id.unwrap(); let client = Client::new(config).await.unwrap(); - let tables = [ - "write_test", - "write_test_1" - ]; + let tables = ["write_test", "write_test_1"]; let writer = client.default_storage_writer(); // Create Streams let mut streams = vec![]; for i in 0..2 { - let table = format!("projects/{}/datasets/gcrbq_storage/tables/{}", &project_id, tables[i % tables.len()]).to_string(); - let stream = writer - .create_write_stream(&table) - .await - .unwrap(); + let table = format!( + "projects/{}/datasets/gcrbq_storage/tables/{}", + &project_id, + tables[i % tables.len()] + ) + .to_string(); + let stream = writer.create_write_stream(&table).await.unwrap(); streams.push(stream); } @@ -97,8 +98,7 @@ mod tests { data.encode(&mut buf).unwrap(); rows.push(create_append_rows_request(vec![buf.clone(), buf.clone(), buf])); } - let request = build_streaming_request(stream.name(), rows); - let mut result = stream.append_rows(request).await.unwrap(); + let mut result = stream.append_rows(rows).await.unwrap(); while let Some(res) = result.next().await { let res = res?; tracing::info!("append row errors = {:?}", res.row_errors.len()); diff --git a/bigquery/src/storage_write/stream/mod.rs b/bigquery/src/storage_write/stream/mod.rs index 99c6cdb0..290ceac8 100644 --- a/bigquery/src/storage_write/stream/mod.rs +++ b/bigquery/src/storage_write/stream/mod.rs @@ -1,52 +1,78 @@ -use std::sync::Arc; -use google_cloud_gax::grpc::{IntoStreamingRequest, Status, Streaming}; -use google_cloud_googleapis::cloud::bigquery::storage::v1::{AppendRowsRequest, AppendRowsResponse, CreateWriteStreamRequest, FinalizeWriteStreamRequest, WriteStream}; use crate::grpc::apiv1::conn_pool::ConnectionManager; -use crate::storage_write::AppendRowsRequestBuilder; use crate::storage_write::flow::FlowController; -use google_cloud_gax::grpc::codegen::tokio_stream::Stream as FuturesStream; +use crate::storage_write::AppendRowsRequestBuilder; +use google_cloud_gax::grpc::{IntoStreamingRequest, Status, Streaming}; +use google_cloud_googleapis::cloud::bigquery::storage::v1::{ + AppendRowsRequest, AppendRowsResponse, FinalizeWriteStreamRequest, WriteStream, +}; +use std::sync::Arc; +pub mod buffered; +pub mod committed; pub mod default; pub mod pending; -pub mod committed; -pub mod buffered; pub(crate) struct Stream { inner: WriteStream, cons: Arc, - fc: Option + fc: Option, } impl Stream { pub(crate) fn new(inner: WriteStream, cons: Arc, max_insert_count: usize) -> Self { Self { inner, - cons , + cons, fc: if max_insert_count > 0 { Some(FlowController::new(max_insert_count)) - }else { + } else { None - } + }, } } } -pub(crate) trait AsStream : Sized { +pub(crate) trait AsStream: Sized { fn as_ref(&self) -> &Stream; fn name(&self) -> &str { &self.as_ref().inner.name } + + fn create_streaming_request( + &self, + rows: Vec, + ) -> impl google_cloud_gax::grpc::codegen::tokio_stream::Stream { + let name = self.name().to_string(); + async_stream::stream! { + for row in rows { + yield row.build(&name); + } + } + } } -pub trait ManagedStream : AsStream { - async fn append_rows(&self, req: impl IntoStreamingRequest) -> Result, Status> { +pub trait ManagedStream: AsStream { + async fn append_rows(&self, rows: Vec) -> Result, Status> { + let name = self.name().to_string(); + let stream = async_stream::stream! { + for row in rows { + yield row.build(&name); + } + }; + self.append_streaming_request(stream).await + } + + async fn append_streaming_request( + &self, + req: impl IntoStreamingRequest, + ) -> Result, Status> { let stream = self.as_ref(); match &stream.fc { None => { let mut client = stream.cons.writer(); Ok(client.append_rows(req).await?.into_inner()) - }, + } Some(fc) => { let permit = fc.acquire().await; let mut client = stream.cons.writer(); @@ -56,14 +82,14 @@ pub trait ManagedStream : AsStream { } } } - } -pub trait DisposableStream : ManagedStream { +pub trait DisposableStream: ManagedStream { async fn finalize(&self) -> Result { let stream = self.as_ref(); let res = stream - .cons.writer() + .cons + .writer() .finalize_write_stream( FinalizeWriteStreamRequest { name: stream.inner.name.to_string(), @@ -76,11 +102,10 @@ pub trait DisposableStream : ManagedStream { } } - #[cfg(test)] mod tests { - use prost_types::{field_descriptor_proto, DescriptorProto, FieldDescriptorProto}; use crate::storage_write::AppendRowsRequestBuilder; + use prost_types::{field_descriptor_proto, DescriptorProto, FieldDescriptorProto}; #[derive(Clone, PartialEq, ::prost::Message)] pub(crate) struct TestData { @@ -119,6 +144,6 @@ mod tests { reserved_range: vec![], reserved_name: vec![], }; - return AppendRowsRequestBuilder::new(proto, buf) + AppendRowsRequestBuilder::new(proto, buf) } } diff --git a/bigquery/src/storage_write/stream/pending.rs b/bigquery/src/storage_write/stream/pending.rs index 5320c5fa..0c3aa9f2 100644 --- a/bigquery/src/storage_write/stream/pending.rs +++ b/bigquery/src/storage_write/stream/pending.rs @@ -1,19 +1,18 @@ -use crate::grpc::apiv1::bigquery_client::{create_write_stream_request, StreamingWriteClient}; +use crate::grpc::apiv1::bigquery_client::create_write_stream_request; use crate::grpc::apiv1::conn_pool::ConnectionManager; -use google_cloud_gax::grpc::{IntoStreamingRequest, Status, Streaming}; -use google_cloud_googleapis::cloud::bigquery::storage::v1::big_query_write_client::BigQueryWriteClient; -use google_cloud_googleapis::cloud::bigquery::storage::v1::write_stream::Type::{Committed, Pending}; +use crate::storage_write::stream::{AsStream, DisposableStream, ManagedStream, Stream}; +use google_cloud_gax::grpc::Status; +use google_cloud_googleapis::cloud::bigquery::storage::v1::write_stream::Type::Pending; use google_cloud_googleapis::cloud::bigquery::storage::v1::{ - AppendRowsRequest, AppendRowsResponse, BatchCommitWriteStreamsRequest, BatchCommitWriteStreamsResponse, + BatchCommitWriteStreamsRequest, BatchCommitWriteStreamsResponse, }; use std::sync::Arc; -use crate::storage_write::stream::{AsStream, DisposableStream, ManagedStream, Stream}; pub struct Writer { max_insert_count: usize, cm: Arc, table: String, - streams: Vec + streams: Vec, } impl Writer { @@ -22,7 +21,7 @@ impl Writer { max_insert_count, cm, table, - streams: Vec::new() + streams: Vec::new(), } } @@ -34,7 +33,9 @@ impl Writer { } pub async fn commit(&self) -> Result { - let result = self.cm.writer() + let result = self + .cm + .writer() .batch_commit_write_streams( BatchCommitWriteStreamsRequest { parent: self.table.to_string(), @@ -48,7 +49,7 @@ impl Writer { } } pub struct PendingStream { - inner: Stream + inner: Stream, } impl PendingStream { @@ -67,15 +68,14 @@ impl DisposableStream for PendingStream {} #[cfg(test)] mod tests { - use std::sync::Arc; - use tokio::task::JoinHandle; + use crate::client::{Client, ClientConfig}; + use crate::storage_write::stream::tests::{create_append_rows_request, TestData}; + use crate::storage_write::stream::{DisposableStream, ManagedStream}; use google_cloud_gax::grpc::codegen::tokio_stream::StreamExt; use google_cloud_gax::grpc::Status; use prost::Message; - use crate::client::{Client, ClientConfig}; - use crate::storage_write::build_streaming_request; - use crate::storage_write::stream::{AsStream, DisposableStream, ManagedStream}; - use crate::storage_write::stream::tests::{create_append_rows_request, TestData}; + use std::sync::Arc; + use tokio::task::JoinHandle; #[ctor::ctor] fn init() { @@ -88,15 +88,17 @@ mod tests { let (config, project_id) = ClientConfig::new_with_auth().await.unwrap(); let project_id = project_id.unwrap(); let client = Client::new(config).await.unwrap(); - let tables = [ - "write_test", - "write_test_1" - ]; + let tables = ["write_test", "write_test_1"]; // Create Writers let mut writers = vec![]; for i in 0..2 { - let table = format!("projects/{}/datasets/gcrbq_storage/tables/{}", &project_id, tables[i % tables.len()]).to_string(); + let table = format!( + "projects/{}/datasets/gcrbq_storage/tables/{}", + &project_id, + tables[i % tables.len()] + ) + .to_string(); let writer = client.pending_storage_writer(table); writers.push(writer); } @@ -121,8 +123,7 @@ mod tests { data.encode(&mut buf).unwrap(); rows.push(create_append_rows_request(vec![buf.clone(), buf.clone(), buf])); } - let request = build_streaming_request(stream.name(), rows); - let mut result = stream.append_rows(request).await.unwrap(); + let mut result = stream.append_rows(rows).await.unwrap(); while let Some(res) = result.next().await { let res = res?; tracing::info!("append row errors = {:?}", res.row_errors.len()); @@ -139,7 +140,7 @@ mod tests { } for writer in writers.iter_mut() { - let result = writer.commit().await.unwrap(); + let result = writer.commit().await.unwrap(); tracing::info!("committed error count = {:?}", result.stream_errors.len()); } } @@ -173,8 +174,7 @@ mod tests { data.encode(&mut buf).unwrap(); rows.push(create_append_rows_request(vec![buf.clone(), buf.clone(), buf])); } - let request = build_streaming_request(stream.name(), rows); - let mut result = stream.append_rows(request).await.unwrap(); + let mut result = stream.append_rows(rows).await.unwrap(); while let Some(res) = result.next().await { let res = res?; tracing::info!("append row errors = {:?}", res.row_errors.len()); From 51e7000722f856c48b8096b87330aa3b8f30308e Mon Sep 17 00:00:00 2001 From: yoshidan Date: Wed, 6 Nov 2024 08:56:07 +0900 Subject: [PATCH 22/23] add doc --- bigquery/Cargo.toml | 1 + bigquery/src/client.rs | 123 +++++++++++++++++- bigquery/src/lib.rs | 2 +- bigquery/src/storage_write/stream/buffered.rs | 2 +- .../src/storage_write/stream/committed.rs | 2 +- bigquery/src/storage_write/stream/default.rs | 2 +- bigquery/src/storage_write/stream/pending.rs | 11 +- 7 files changed, 131 insertions(+), 12 deletions(-) diff --git a/bigquery/Cargo.toml b/bigquery/Cargo.toml index 8f08a352..89699d67 100644 --- a/bigquery/Cargo.toml +++ b/bigquery/Cargo.toml @@ -43,6 +43,7 @@ tokio-util = {version ="0.7", features = ["codec"] } google-cloud-auth = { path = "../foundation/auth", default-features=false } base64-serde = "0.7" prost = "0.13" +futures-util = "0.3" [features] default = ["default-tls", "auth"] diff --git a/bigquery/src/client.rs b/bigquery/src/client.rs index fdad7d89..4dbfc37a 100644 --- a/bigquery/src/client.rs +++ b/bigquery/src/client.rs @@ -313,24 +313,143 @@ impl Client { /// Creates a new pending type storage writer for the specified table. /// https://cloud.google.com/bigquery/docs/write-api#pending_type - pub fn pending_storage_writer(&self, table: String) -> pending::Writer { - pending::Writer::new(1, self.streaming_write_conn_pool.clone(), table) + /// ``` + /// use prost_types::DescriptorProto; + /// use google_cloud_bigquery::client::Client; + /// use google_cloud_gax::grpc::Status; + /// use prost::Message; + /// use tokio::sync::futures; + /// use google_cloud_bigquery::storage_write::AppendRowsRequestBuilder; + /// use google_cloud_bigquery::storage_write::stream::{DisposableStream, ManagedStream}; + /// use futures_util::stream::StreamExt; + /// + /// pub async fn run(client: &Client, table: &str, rows: Vec, schema: DescriptorProto) + /// -> Result<(), Status> { + /// let mut writer = client.pending_storage_writer(table); + /// let stream = writer.create_write_stream().await?; + /// + /// let mut data= vec![]; + /// for row in rows { + /// let mut buf = Vec::new(); + /// row.encode(&mut buf).unwrap(); + /// data.push(buf); + /// } + /// let mut result = stream.append_rows(vec![AppendRowsRequestBuilder::new(schema, data)]).await.unwrap(); + /// while let Some(Ok(res)) = result.next().await { + /// tracing::info!("append row errors = {:?}", res.row_errors.len()); + /// } + /// + /// let _ = stream.finalize().await?; + /// let _ = writer.commit().await?; + /// Ok(()) + /// } + /// ``` + pub fn pending_storage_writer(&self, table: &str) -> pending::Writer { + pending::Writer::new(1, self.streaming_write_conn_pool.clone(), table.to_string()) } /// Creates a new default type storage writer. /// https://cloud.google.com/bigquery/docs/write-api#default_stream + /// ``` + /// use prost_types::DescriptorProto; + /// use google_cloud_bigquery::client::Client; + /// use google_cloud_gax::grpc::Status; + /// use prost::Message; + /// use tokio::sync::futures; + /// use google_cloud_bigquery::storage_write::AppendRowsRequestBuilder; + /// use google_cloud_bigquery::storage_write::stream::ManagedStream; + /// use futures_util::stream::StreamExt; + /// + /// pub async fn run(client: &Client, table: &str, rows: Vec, schema: DescriptorProto) + /// -> Result<(), Status> { + /// let writer = client.default_storage_writer(); + /// let stream = writer.create_write_stream(table).await?; + /// + /// let mut data= vec![]; + /// for row in rows { + /// let mut buf = Vec::new(); + /// row.encode(&mut buf).unwrap(); + /// data.push(buf); + /// } + /// let mut result = stream.append_rows(vec![AppendRowsRequestBuilder::new(schema, data)]).await.unwrap(); + /// while let Some(Ok(res)) = result.next().await { + /// tracing::info!("append row errors = {:?}", res.row_errors.len()); + /// } + /// Ok(()) + /// } + /// ``` pub fn default_storage_writer(&self) -> default::Writer { default::Writer::new(self.stereaming_write_max_insert_count, self.streaming_write_conn_pool.clone()) } /// Creates a new committed type storage writer. /// https://cloud.google.com/bigquery/docs/write-api#committed_type + /// ``` + /// use prost_types::DescriptorProto; + /// use google_cloud_bigquery::client::Client; + /// use google_cloud_gax::grpc::Status; + /// use prost::Message; + /// use tokio::sync::futures; + /// use google_cloud_bigquery::storage_write::AppendRowsRequestBuilder; + /// use google_cloud_bigquery::storage_write::stream::{DisposableStream, ManagedStream}; + /// use futures_util::stream::StreamExt; + /// + /// pub async fn run(client: &Client, table: &str, rows: Vec, schema: DescriptorProto) + /// -> Result<(), Status> { + /// let writer = client.committed_storage_writer(); + /// let stream = writer.create_write_stream(table).await?; + /// + /// let mut data= vec![]; + /// for row in rows { + /// let mut buf = Vec::new(); + /// row.encode(&mut buf).unwrap(); + /// data.push(buf); + /// } + /// let mut result = stream.append_rows(vec![AppendRowsRequestBuilder::new(schema, data)]).await.unwrap(); + /// while let Some(Ok(res)) = result.next().await { + /// tracing::info!("append row errors = {:?}", res.row_errors.len()); + /// } + /// + /// let _ = stream.finalize().await?; + /// Ok(()) + /// } + /// ``` pub fn committed_storage_writer(&self) -> committed::Writer { committed::Writer::new(self.stereaming_write_max_insert_count, self.streaming_write_conn_pool.clone()) } /// Creates a new buffered type storage writer. /// https://cloud.google.com/bigquery/docs/write-api#buffered_type + /// ``` + /// use prost_types::DescriptorProto; + /// use google_cloud_bigquery::client::Client; + /// use prost::Message; + /// use tokio::sync::futures; + /// use google_cloud_bigquery::storage_write::AppendRowsRequestBuilder; + /// use google_cloud_bigquery::storage_write::stream::{DisposableStream, ManagedStream}; + /// use futures_util::stream::StreamExt; + /// use google_cloud_gax::grpc::Status; + /// + /// pub async fn run(client: &Client, table: &str, rows: Vec, schema: DescriptorProto) + /// -> Result<(), Status> { + /// let writer = client.buffered_storage_writer(); + /// let stream = writer.create_write_stream(table).await?; + /// + /// let mut data= vec![]; + /// for row in rows { + /// let mut buf = Vec::new(); + /// row.encode(&mut buf).unwrap(); + /// data.push(buf); + /// } + /// let mut result = stream.append_rows(vec![AppendRowsRequestBuilder::new(schema, data)]).await.unwrap(); + /// while let Some(Ok(res)) = result.next().await { + /// tracing::info!("append row errors = {:?}", res.row_errors.len()); + /// } + /// let _ = stream.flush_rows(Some(0)).await?; + /// let _ = stream.finalize().await?; + /// Ok(()) + /// } + /// ``` pub fn buffered_storage_writer(&self) -> buffered::Writer { buffered::Writer::new(self.stereaming_write_max_insert_count, self.streaming_write_conn_pool.clone()) } diff --git a/bigquery/src/lib.rs b/bigquery/src/lib.rs index d06083ac..9b9c1a2a 100644 --- a/bigquery/src/lib.rs +++ b/bigquery/src/lib.rs @@ -190,4 +190,4 @@ pub mod grpc; pub mod http; pub mod query; pub mod storage; -mod storage_write; +pub mod storage_write; diff --git a/bigquery/src/storage_write/stream/buffered.rs b/bigquery/src/storage_write/stream/buffered.rs index db0ab621..e31150b2 100644 --- a/bigquery/src/storage_write/stream/buffered.rs +++ b/bigquery/src/storage_write/stream/buffered.rs @@ -61,13 +61,13 @@ impl BufferedStream { #[cfg(test)] mod tests { + use futures_util::StreamExt; use std::sync::Arc; use tokio::task::JoinHandle; use crate::client::{Client, ClientConfig}; use crate::storage_write::stream::tests::{create_append_rows_request, TestData}; use crate::storage_write::stream::{DisposableStream, ManagedStream}; - use google_cloud_gax::grpc::codegen::tokio_stream::StreamExt; use google_cloud_gax::grpc::Status; use prost::Message; diff --git a/bigquery/src/storage_write/stream/committed.rs b/bigquery/src/storage_write/stream/committed.rs index 0bc21b63..1170c6ec 100644 --- a/bigquery/src/storage_write/stream/committed.rs +++ b/bigquery/src/storage_write/stream/committed.rs @@ -49,7 +49,7 @@ mod tests { use crate::client::{Client, ClientConfig}; use crate::storage_write::stream::tests::{create_append_rows_request, TestData}; use crate::storage_write::stream::{DisposableStream, ManagedStream}; - use google_cloud_gax::grpc::codegen::tokio_stream::StreamExt; + use futures_util::StreamExt; use google_cloud_gax::grpc::Status; use prost::Message; use std::sync::Arc; diff --git a/bigquery/src/storage_write/stream/default.rs b/bigquery/src/storage_write/stream/default.rs index bac78e79..742a87b7 100644 --- a/bigquery/src/storage_write/stream/default.rs +++ b/bigquery/src/storage_write/stream/default.rs @@ -53,7 +53,7 @@ mod tests { use crate::client::{Client, ClientConfig}; use crate::storage_write::stream::tests::{create_append_rows_request, TestData}; use crate::storage_write::stream::ManagedStream; - use google_cloud_gax::grpc::codegen::tokio_stream::StreamExt; + use futures_util::StreamExt; use google_cloud_gax::grpc::Status; use prost::Message; use tokio::task::JoinHandle; diff --git a/bigquery/src/storage_write/stream/pending.rs b/bigquery/src/storage_write/stream/pending.rs index 0c3aa9f2..378efeed 100644 --- a/bigquery/src/storage_write/stream/pending.rs +++ b/bigquery/src/storage_write/stream/pending.rs @@ -71,7 +71,7 @@ mod tests { use crate::client::{Client, ClientConfig}; use crate::storage_write::stream::tests::{create_append_rows_request, TestData}; use crate::storage_write::stream::{DisposableStream, ManagedStream}; - use google_cloud_gax::grpc::codegen::tokio_stream::StreamExt; + use futures_util::StreamExt; use google_cloud_gax::grpc::Status; use prost::Message; use std::sync::Arc; @@ -97,9 +97,8 @@ mod tests { "projects/{}/datasets/gcrbq_storage/tables/{}", &project_id, tables[i % tables.len()] - ) - .to_string(); - let writer = client.pending_storage_writer(table); + ); + let writer = client.pending_storage_writer(&table); writers.push(writer); } @@ -154,8 +153,8 @@ mod tests { // Create Streams let mut streams = vec![]; - let table = format!("projects/{}/datasets/gcrbq_storage/tables/write_test", &project_id).to_string(); - let mut writer = client.pending_storage_writer(table); + let table = format!("projects/{}/datasets/gcrbq_storage/tables/write_test", &project_id); + let mut writer = client.pending_storage_writer(&table); let stream = Arc::new(writer.create_write_stream().await.unwrap()); for i in 0..2 { streams.push(stream.clone()); From 1697ff5463bd1f691f08c335f813c00ddc76389f Mon Sep 17 00:00:00 2001 From: yoshidan Date: Wed, 6 Nov 2024 09:33:14 +0900 Subject: [PATCH 23/23] fix lint --- bigquery/src/client.rs | 25 ++------ bigquery/src/grpc/apiv1/bigquery_client.rs | 57 ------------------- bigquery/src/storage_write/stream/buffered.rs | 35 +++++++----- .../src/storage_write/stream/committed.rs | 23 +++++--- bigquery/src/storage_write/stream/default.rs | 15 +++-- bigquery/src/storage_write/stream/mod.rs | 28 +++++---- bigquery/src/storage_write/stream/pending.rs | 24 +++++--- 7 files changed, 85 insertions(+), 122 deletions(-) diff --git a/bigquery/src/client.rs b/bigquery/src/client.rs index 4dbfc37a..2f5d7959 100644 --- a/bigquery/src/client.rs +++ b/bigquery/src/client.rs @@ -42,7 +42,7 @@ pub struct ClientConfig { debug: bool, } -#[derive(Clone, Debug)] +#[derive(Clone, Debug, Default)] pub struct StreamingWriteConfig { channel_config: ChannelConfig, max_insert_count: usize, @@ -59,15 +59,6 @@ impl StreamingWriteConfig { } } -impl Default for StreamingWriteConfig { - fn default() -> Self { - Self { - channel_config: ChannelConfig::default(), - max_insert_count: 1000, - } - } -} - #[derive(Clone, Debug)] pub struct ChannelConfig { /// num_channels is the number of gRPC channels. @@ -230,7 +221,7 @@ pub struct Client { model_client: BigqueryModelClient, streaming_read_conn_pool: Arc, streaming_write_conn_pool: Arc, - stereaming_write_max_insert_count: usize, + streaming_write_max_insert_count: usize, } impl Client { @@ -265,7 +256,7 @@ impl Client { .into_connection_manager(&config.environment) .await?, ), - stereaming_write_max_insert_count: config.streaming_write_config.max_insert_count, + streaming_write_max_insert_count: config.streaming_write_config.max_insert_count, }) } @@ -320,7 +311,6 @@ impl Client { /// use prost::Message; /// use tokio::sync::futures; /// use google_cloud_bigquery::storage_write::AppendRowsRequestBuilder; - /// use google_cloud_bigquery::storage_write::stream::{DisposableStream, ManagedStream}; /// use futures_util::stream::StreamExt; /// /// pub async fn run(client: &Client, table: &str, rows: Vec, schema: DescriptorProto) @@ -357,7 +347,6 @@ impl Client { /// use prost::Message; /// use tokio::sync::futures; /// use google_cloud_bigquery::storage_write::AppendRowsRequestBuilder; - /// use google_cloud_bigquery::storage_write::stream::ManagedStream; /// use futures_util::stream::StreamExt; /// /// pub async fn run(client: &Client, table: &str, rows: Vec, schema: DescriptorProto) @@ -379,7 +368,7 @@ impl Client { /// } /// ``` pub fn default_storage_writer(&self) -> default::Writer { - default::Writer::new(self.stereaming_write_max_insert_count, self.streaming_write_conn_pool.clone()) + default::Writer::new(self.streaming_write_max_insert_count, self.streaming_write_conn_pool.clone()) } /// Creates a new committed type storage writer. @@ -391,7 +380,6 @@ impl Client { /// use prost::Message; /// use tokio::sync::futures; /// use google_cloud_bigquery::storage_write::AppendRowsRequestBuilder; - /// use google_cloud_bigquery::storage_write::stream::{DisposableStream, ManagedStream}; /// use futures_util::stream::StreamExt; /// /// pub async fn run(client: &Client, table: &str, rows: Vec, schema: DescriptorProto) @@ -415,7 +403,7 @@ impl Client { /// } /// ``` pub fn committed_storage_writer(&self) -> committed::Writer { - committed::Writer::new(self.stereaming_write_max_insert_count, self.streaming_write_conn_pool.clone()) + committed::Writer::new(self.streaming_write_max_insert_count, self.streaming_write_conn_pool.clone()) } /// Creates a new buffered type storage writer. @@ -426,7 +414,6 @@ impl Client { /// use prost::Message; /// use tokio::sync::futures; /// use google_cloud_bigquery::storage_write::AppendRowsRequestBuilder; - /// use google_cloud_bigquery::storage_write::stream::{DisposableStream, ManagedStream}; /// use futures_util::stream::StreamExt; /// use google_cloud_gax::grpc::Status; /// @@ -451,7 +438,7 @@ impl Client { /// } /// ``` pub fn buffered_storage_writer(&self) -> buffered::Writer { - buffered::Writer::new(self.stereaming_write_max_insert_count, self.streaming_write_conn_pool.clone()) + buffered::Writer::new(self.streaming_write_max_insert_count, self.streaming_write_conn_pool.clone()) } /// Run query job and get result. diff --git a/bigquery/src/grpc/apiv1/bigquery_client.rs b/bigquery/src/grpc/apiv1/bigquery_client.rs index bdb12018..0fce003a 100644 --- a/bigquery/src/grpc/apiv1/bigquery_client.rs +++ b/bigquery/src/grpc/apiv1/bigquery_client.rs @@ -228,60 +228,3 @@ pub(crate) fn create_write_stream_request(table: &str, write_type: Type) -> Crea }), } } - -#[cfg(test)] -mod tests { - - use google_cloud_googleapis::cloud::bigquery::storage::v1::append_rows_request::{ProtoData, Rows}; - - use google_cloud_googleapis::cloud::bigquery::storage::v1::{AppendRowsRequest, ProtoRows, ProtoSchema}; - use prost::Message; - use prost_types::{field_descriptor_proto, DescriptorProto, FieldDescriptorProto}; - - #[derive(Clone, PartialEq, ::prost::Message)] - struct TestData { - #[prost(string, tag = "1")] - pub col_string: String, - } - - fn create_append_rows_request(name: &str, buf: Vec) -> AppendRowsRequest { - AppendRowsRequest { - write_stream: name.to_string(), - offset: None, - trace_id: "".to_string(), - missing_value_interpretations: Default::default(), - default_missing_value_interpretation: 0, - rows: Some(Rows::ProtoRows(ProtoData { - writer_schema: Some(ProtoSchema { - proto_descriptor: Some(DescriptorProto { - name: Some("TestData".to_string()), - field: vec![FieldDescriptorProto { - name: Some("col_string".to_string()), - number: Some(1), - label: None, - r#type: Some(field_descriptor_proto::Type::String.into()), - type_name: None, - extendee: None, - default_value: None, - oneof_index: None, - json_name: None, - options: None, - proto3_optional: None, - }], - extension: vec![], - nested_type: vec![], - enum_type: vec![], - extension_range: vec![], - oneof_decl: vec![], - options: None, - reserved_range: vec![], - reserved_name: vec![], - }), - }), - rows: Some(ProtoRows { - serialized_rows: vec![buf], - }), - })), - } - } -} diff --git a/bigquery/src/storage_write/stream/buffered.rs b/bigquery/src/storage_write/stream/buffered.rs index e31150b2..fe9d34a1 100644 --- a/bigquery/src/storage_write/stream/buffered.rs +++ b/bigquery/src/storage_write/stream/buffered.rs @@ -1,9 +1,10 @@ use crate::grpc::apiv1::bigquery_client::create_write_stream_request; use crate::grpc::apiv1::conn_pool::ConnectionManager; -use crate::storage_write::stream::{AsStream, DisposableStream, ManagedStream, Stream}; -use google_cloud_gax::grpc::Status; +use crate::storage_write::stream::{AsStream, DisposableStreamDelegate, ManagedStreamDelegate, Stream}; +use crate::storage_write::AppendRowsRequestBuilder; +use google_cloud_gax::grpc::{Status, Streaming}; use google_cloud_googleapis::cloud::bigquery::storage::v1::write_stream::Type::Buffered; -use google_cloud_googleapis::cloud::bigquery::storage::v1::FlushRowsRequest; +use google_cloud_googleapis::cloud::bigquery::storage::v1::{AppendRowsResponse, FlushRowsRequest}; use std::sync::Arc; pub struct Writer { @@ -30,19 +31,20 @@ impl BufferedStream { pub(crate) fn new(inner: Stream) -> Self { Self { inner } } -} -impl AsStream for BufferedStream { - fn as_ref(&self) -> &Stream { - &self.inner + pub async fn append_rows( + &self, + rows: Vec, + ) -> Result, Status> { + ManagedStreamDelegate::append_rows(&self.inner, rows).await + } + + pub async fn finalize(&self) -> Result { + DisposableStreamDelegate::finalize(&self.inner).await } -} -impl ManagedStream for BufferedStream {} -impl DisposableStream for BufferedStream {} -impl BufferedStream { pub async fn flush_rows(&self, offset: Option) -> Result { - let stream = self.as_ref(); + let stream = &self.inner; let res = stream .cons .writer() @@ -59,6 +61,12 @@ impl BufferedStream { } } +impl AsStream for BufferedStream { + fn as_ref(&self) -> &Stream { + &self.inner + } +} + #[cfg(test)] mod tests { use futures_util::StreamExt; @@ -67,7 +75,6 @@ mod tests { use crate::client::{Client, ClientConfig}; use crate::storage_write::stream::tests::{create_append_rows_request, TestData}; - use crate::storage_write::stream::{DisposableStream, ManagedStream}; use google_cloud_gax::grpc::Status; use prost::Message; @@ -142,7 +149,7 @@ mod tests { let mut streams = vec![]; let table = format!("projects/{}/datasets/gcrbq_storage/tables/write_test", &project_id).to_string(); let stream = Arc::new(writer.create_write_stream(&table).await.unwrap()); - for i in 0..2 { + for _i in 0..2 { streams.push(stream.clone()); } diff --git a/bigquery/src/storage_write/stream/committed.rs b/bigquery/src/storage_write/stream/committed.rs index 1170c6ec..40c88667 100644 --- a/bigquery/src/storage_write/stream/committed.rs +++ b/bigquery/src/storage_write/stream/committed.rs @@ -1,8 +1,10 @@ use crate::grpc::apiv1::bigquery_client::create_write_stream_request; use crate::grpc::apiv1::conn_pool::ConnectionManager; -use crate::storage_write::stream::{AsStream, DisposableStream, ManagedStream, Stream}; -use google_cloud_gax::grpc::Status; +use crate::storage_write::stream::{AsStream, DisposableStreamDelegate, ManagedStreamDelegate, Stream}; +use crate::storage_write::AppendRowsRequestBuilder; +use google_cloud_gax::grpc::{Status, Streaming}; use google_cloud_googleapis::cloud::bigquery::storage::v1::write_stream::Type::Committed; +use google_cloud_googleapis::cloud::bigquery::storage::v1::AppendRowsResponse; use std::sync::Arc; pub struct Writer { @@ -34,6 +36,17 @@ impl CommittedStream { pub(crate) fn new(inner: Stream) -> Self { Self { inner } } + + pub async fn append_rows( + &self, + rows: Vec, + ) -> Result, Status> { + ManagedStreamDelegate::append_rows(&self.inner, rows).await + } + + pub async fn finalize(&self) -> Result { + DisposableStreamDelegate::finalize(&self.inner).await + } } impl AsStream for CommittedStream { @@ -41,14 +54,10 @@ impl AsStream for CommittedStream { &self.inner } } -impl ManagedStream for CommittedStream {} -impl DisposableStream for CommittedStream {} - #[cfg(test)] mod tests { use crate::client::{Client, ClientConfig}; use crate::storage_write::stream::tests::{create_append_rows_request, TestData}; - use crate::storage_write::stream::{DisposableStream, ManagedStream}; use futures_util::StreamExt; use google_cloud_gax::grpc::Status; use prost::Message; @@ -124,7 +133,7 @@ mod tests { let mut streams = vec![]; let table = format!("projects/{}/datasets/gcrbq_storage/tables/write_test", &project_id).to_string(); let stream = Arc::new(writer.create_write_stream(&table).await.unwrap()); - for i in 0..2 { + for _i in 0..2 { streams.push(stream.clone()); } diff --git a/bigquery/src/storage_write/stream/default.rs b/bigquery/src/storage_write/stream/default.rs index 742a87b7..5e45b41a 100644 --- a/bigquery/src/storage_write/stream/default.rs +++ b/bigquery/src/storage_write/stream/default.rs @@ -1,7 +1,8 @@ use crate::grpc::apiv1::conn_pool::ConnectionManager; -use crate::storage_write::stream::{AsStream, ManagedStream, Stream}; -use google_cloud_gax::grpc::Status; -use google_cloud_googleapis::cloud::bigquery::storage::v1::GetWriteStreamRequest; +use crate::storage_write::stream::{AsStream, ManagedStreamDelegate, Stream}; +use crate::storage_write::AppendRowsRequestBuilder; +use google_cloud_gax::grpc::{Status, Streaming}; +use google_cloud_googleapis::cloud::bigquery::storage::v1::{AppendRowsResponse, GetWriteStreamRequest}; use std::sync::Arc; pub struct Writer { @@ -39,6 +40,12 @@ impl DefaultStream { pub(crate) fn new(inner: Stream) -> Self { Self { inner } } + pub async fn append_rows( + &self, + rows: Vec, + ) -> Result, Status> { + ManagedStreamDelegate::append_rows(&self.inner, rows).await + } } impl AsStream for DefaultStream { @@ -46,13 +53,11 @@ impl AsStream for DefaultStream { &self.inner } } -impl ManagedStream for DefaultStream {} #[cfg(test)] mod tests { use crate::client::{Client, ClientConfig}; use crate::storage_write::stream::tests::{create_append_rows_request, TestData}; - use crate::storage_write::stream::ManagedStream; use futures_util::StreamExt; use google_cloud_gax::grpc::Status; use prost::Message; diff --git a/bigquery/src/storage_write/stream/mod.rs b/bigquery/src/storage_write/stream/mod.rs index 290ceac8..d7e10367 100644 --- a/bigquery/src/storage_write/stream/mod.rs +++ b/bigquery/src/storage_write/stream/mod.rs @@ -12,7 +12,7 @@ pub mod committed; pub mod default; pub mod pending; -pub(crate) struct Stream { +pub struct Stream { inner: WriteStream, cons: Arc, fc: Option, @@ -32,7 +32,7 @@ impl Stream { } } -pub(crate) trait AsStream: Sized { +pub trait AsStream: Sized { fn as_ref(&self) -> &Stream; fn name(&self) -> &str { @@ -52,22 +52,26 @@ pub(crate) trait AsStream: Sized { } } -pub trait ManagedStream: AsStream { - async fn append_rows(&self, rows: Vec) -> Result, Status> { - let name = self.name().to_string(); - let stream = async_stream::stream! { +pub(crate) struct ManagedStreamDelegate {} + +impl ManagedStreamDelegate { + async fn append_rows( + stream: &Stream, + rows: Vec, + ) -> Result, Status> { + let name = stream.inner.name.to_string(); + let req = async_stream::stream! { for row in rows { yield row.build(&name); } }; - self.append_streaming_request(stream).await + Self::append_streaming_request(stream, req).await } async fn append_streaming_request( - &self, + stream: &Stream, req: impl IntoStreamingRequest, ) -> Result, Status> { - let stream = self.as_ref(); match &stream.fc { None => { let mut client = stream.cons.writer(); @@ -84,9 +88,9 @@ pub trait ManagedStream: AsStream { } } -pub trait DisposableStream: ManagedStream { - async fn finalize(&self) -> Result { - let stream = self.as_ref(); +pub(crate) struct DisposableStreamDelegate {} +impl DisposableStreamDelegate { + async fn finalize(stream: &Stream) -> Result { let res = stream .cons .writer() diff --git a/bigquery/src/storage_write/stream/pending.rs b/bigquery/src/storage_write/stream/pending.rs index 378efeed..31a6efb8 100644 --- a/bigquery/src/storage_write/stream/pending.rs +++ b/bigquery/src/storage_write/stream/pending.rs @@ -1,10 +1,11 @@ use crate::grpc::apiv1::bigquery_client::create_write_stream_request; use crate::grpc::apiv1::conn_pool::ConnectionManager; -use crate::storage_write::stream::{AsStream, DisposableStream, ManagedStream, Stream}; -use google_cloud_gax::grpc::Status; +use crate::storage_write::stream::{AsStream, DisposableStreamDelegate, ManagedStreamDelegate, Stream}; +use crate::storage_write::AppendRowsRequestBuilder; +use google_cloud_gax::grpc::{Status, Streaming}; use google_cloud_googleapis::cloud::bigquery::storage::v1::write_stream::Type::Pending; use google_cloud_googleapis::cloud::bigquery::storage::v1::{ - BatchCommitWriteStreamsRequest, BatchCommitWriteStreamsResponse, + AppendRowsResponse, BatchCommitWriteStreamsRequest, BatchCommitWriteStreamsResponse, }; use std::sync::Arc; @@ -56,6 +57,17 @@ impl PendingStream { pub(crate) fn new(inner: Stream) -> Self { Self { inner } } + + pub async fn append_rows( + &self, + rows: Vec, + ) -> Result, Status> { + ManagedStreamDelegate::append_rows(&self.inner, rows).await + } + + pub async fn finalize(&self) -> Result { + DisposableStreamDelegate::finalize(&self.inner).await + } } impl AsStream for PendingStream { @@ -63,14 +75,10 @@ impl AsStream for PendingStream { &self.inner } } -impl ManagedStream for PendingStream {} -impl DisposableStream for PendingStream {} - #[cfg(test)] mod tests { use crate::client::{Client, ClientConfig}; use crate::storage_write::stream::tests::{create_append_rows_request, TestData}; - use crate::storage_write::stream::{DisposableStream, ManagedStream}; use futures_util::StreamExt; use google_cloud_gax::grpc::Status; use prost::Message; @@ -156,7 +164,7 @@ mod tests { let table = format!("projects/{}/datasets/gcrbq_storage/tables/write_test", &project_id); let mut writer = client.pending_storage_writer(&table); let stream = Arc::new(writer.create_write_stream().await.unwrap()); - for i in 0..2 { + for _i in 0..2 { streams.push(stream.clone()); }