From 7b55da6354d04f4039e3b5993dc5ec8de5e7060b Mon Sep 17 00:00:00 2001 From: jpbotelho Date: Sat, 20 Sep 2025 11:47:36 +0100 Subject: [PATCH] fix: typos --- docs/_snippets/_service_actions_menu.md | 2 +- docs/dictionary/index.md | 2 +- docs/getting-started/example-datasets/dbpedia.md | 2 +- docs/getting-started/example-datasets/laion.md | 4 ++-- docs/getting-started/example-datasets/tpcds.md | 2 +- .../data-ingestion/apache-spark/spark-native-connector.md | 4 ++-- .../data-ingestion/clickpipes/mysql/source/aurora.md | 2 +- .../data-ingestion/clickpipes/mysql/source/rds.md | 2 +- docs/integrations/data-ingestion/s3/index.md | 2 +- docs/integrations/index.mdx | 4 ++-- .../language-clients/java/client/_snippets/_v0_8.mdx | 6 +++--- 11 files changed, 16 insertions(+), 16 deletions(-) diff --git a/docs/_snippets/_service_actions_menu.md b/docs/_snippets/_service_actions_menu.md index 914129e3f8d..2b473a300d3 100644 --- a/docs/_snippets/_service_actions_menu.md +++ b/docs/_snippets/_service_actions_menu.md @@ -1,6 +1,6 @@ import Image from '@theme/IdealImage'; import cloud_service_action_menu from '@site/static/images/_snippets/cloud-service-actions-menu.png'; -Select your service, followed by `Data souces` -> `Predefined sample data`. +Select your service, followed by `Data sources` -> `Predefined sample data`. ClickHouse Cloud service Actions menu showing Data sources and Predefined sample data options diff --git a/docs/dictionary/index.md b/docs/dictionary/index.md index 095ef903aea..30f6f5f8856 100644 --- a/docs/dictionary/index.md +++ b/docs/dictionary/index.md @@ -208,7 +208,7 @@ LIMIT 5 FORMAT PrettyCompactMonoBlock ┌───────Id─┬─Title─────────────────────────────────────────────────────────┬─Location──────────────┐ -│ 52296928 │ Comparision between two Strings in ClickHouse │ Spain │ +│ 52296928 │ Comparison between two Strings in ClickHouse │ Spain │ │ 52345137 │ How to use a file to migrate data from mysql to a clickhouse? │ 中国江苏省Nanjing Shi │ │ 61452077 │ How to change PARTITION in clickhouse │ Guangzhou, 广东省中国 │ │ 55608325 │ Clickhouse select last record without max() on all table │ Moscow, Russia │ diff --git a/docs/getting-started/example-datasets/dbpedia.md b/docs/getting-started/example-datasets/dbpedia.md index db27c6fb874..f59905f4b74 100644 --- a/docs/getting-started/example-datasets/dbpedia.md +++ b/docs/getting-started/example-datasets/dbpedia.md @@ -116,7 +116,7 @@ LIMIT 20 ``` Note down the query latency so that we can compare it with the query latency of ANN (using vector index). -Also record the query latency with cold OS file cache and with `max_theads=1` to recognize the real compute +Also record the query latency with cold OS file cache and with `max_threads=1` to recognize the real compute usage and storage bandwidth usage (extrapolate it to a production dataset with millions of vectors!) ## Build a vector similarity index {#build-vector-similarity-index} diff --git a/docs/getting-started/example-datasets/laion.md b/docs/getting-started/example-datasets/laion.md index 7d601ab5ecf..2bc4bb1381a 100644 --- a/docs/getting-started/example-datasets/laion.md +++ b/docs/getting-started/example-datasets/laion.md @@ -53,7 +53,7 @@ data = data[['url', 'caption', 'NSFW', 'similarity', "image_embedding", "text_em data['image_embedding'] = data['image_embedding'].apply(lambda x: x.tolist()) data['text_embedding'] = data['text_embedding'].apply(lambda x: x.tolist()) -# this small hack is needed becase caption sometimes contains all kind of quotes +# this small hack is needed because caption sometimes contains all kind of quotes data['caption'] = data['caption'].apply(lambda x: x.replace("'", " ").replace('"', " ")) # export data as CSV file @@ -132,7 +132,7 @@ For now, we can run the embedding of a random LEGO set picture as `target`. 10 rows in set. Elapsed: 4.605 sec. Processed 100.38 million rows, 309.98 GB (21.80 million rows/s., 67.31 GB/s.) ``` -## Run an approximate vector similarity search with a vector simialrity index {#run-an-approximate-vector-similarity-search-with-a-vector-similarity-index} +## Run an approximate vector similarity search with a vector similarity index {#run-an-approximate-vector-similarity-search-with-a-vector-similarity-index} Let's now define two vector similarity indexes on the table. diff --git a/docs/getting-started/example-datasets/tpcds.md b/docs/getting-started/example-datasets/tpcds.md index c97ed998d02..92d150bc61c 100644 --- a/docs/getting-started/example-datasets/tpcds.md +++ b/docs/getting-started/example-datasets/tpcds.md @@ -408,7 +408,7 @@ CREATE TABLE store ( s_zip LowCardinality(Nullable(String)), s_country LowCardinality(Nullable(String)), s_gmt_offset Nullable(Decimal(7,2)), - s_tax_precentage Nullable(Decimal(7,2)), + s_tax_percentage Nullable(Decimal(7,2)), PRIMARY KEY (s_store_sk) ); diff --git a/docs/integrations/data-ingestion/apache-spark/spark-native-connector.md b/docs/integrations/data-ingestion/apache-spark/spark-native-connector.md index ab984cfd1a2..50a2c93b8cf 100644 --- a/docs/integrations/data-ingestion/apache-spark/spark-native-connector.md +++ b/docs/integrations/data-ingestion/apache-spark/spark-native-connector.md @@ -426,7 +426,7 @@ object NativeSparkWrite extends App { from pyspark.sql import SparkSession from pyspark.sql import Row -# Feel free to use any other packages combination satesfying the compatability martix provided above. +# Feel free to use any other packages combination satesfying the compatibility matrix provided above. packages = [ "com.clickhouse.spark:clickhouse-spark-runtime-3.4_2.12:0.8.0", "com.clickhouse:clickhouse-client:0.7.0", @@ -461,7 +461,7 @@ df.writeTo("clickhouse.default.example_table").append() ```sql - -- resultTalbe is the Spark intermediate df we want to insert into clickhouse.default.example_table + -- resultTable is the Spark intermediate df we want to insert into clickhouse.default.example_table INSERT INTO TABLE clickhouse.default.example_table SELECT * FROM resultTable; diff --git a/docs/integrations/data-ingestion/clickpipes/mysql/source/aurora.md b/docs/integrations/data-ingestion/clickpipes/mysql/source/aurora.md index 9bed84338c7..5abb5ef8ff4 100644 --- a/docs/integrations/data-ingestion/clickpipes/mysql/source/aurora.md +++ b/docs/integrations/data-ingestion/clickpipes/mysql/source/aurora.md @@ -42,7 +42,7 @@ If ClickPipes tries to resume replication and the required binlog files have bee By default, Aurora MySQL purges the binary log as soon as possible (i.e., _lazy purging_). We recommend increasing the binlog retention interval to at least **72 hours** to ensure availability of binary log files for replication under failure scenarios. To set an interval for binary log retention ([`binlog retention hours`](https://docs.aws.amazon.com/AmazonRDS/latest/AuroraUserGuide/mysql-stored-proc-configuring.html#mysql_rds_set_configuration-usage-notes.binlog-retention-hours)), use the [`mysql.rds_set_configuration`](https://docs.aws.amazon.com/AmazonRDS/latest/AuroraUserGuide/mysql-stored-proc-configuring.html#mysql_rds_set_configuration) procedure: -[//]: # "NOTE Most CDC providers recommend the maximum retention period for Aurora RDS (7 days/168 hours). Since this has an impact on disk usage, we conservatively recommend a mininum of 3 days/72 hours." +[//]: # "NOTE Most CDC providers recommend the maximum retention period for Aurora RDS (7 days/168 hours). Since this has an impact on disk usage, we conservatively recommend a minimum of 3 days/72 hours." ```text mysql=> call mysql.rds_set_configuration('binlog retention hours', 72); diff --git a/docs/integrations/data-ingestion/clickpipes/mysql/source/rds.md b/docs/integrations/data-ingestion/clickpipes/mysql/source/rds.md index 8e6da3bebb7..0e62630f74d 100644 --- a/docs/integrations/data-ingestion/clickpipes/mysql/source/rds.md +++ b/docs/integrations/data-ingestion/clickpipes/mysql/source/rds.md @@ -42,7 +42,7 @@ If ClickPipes tries to resume replication and the required binlog files have bee By default, Amazon RDS purges the binary log as soon as possible (i.e., _lazy purging_). We recommend increasing the binlog retention interval to at least **72 hours** to ensure availability of binary log files for replication under failure scenarios. To set an interval for binary log retention ([`binlog retention hours`](https://docs.aws.amazon.com/AmazonRDS/latest/UserGuide/mysql-stored-proc-configuring.html#mysql_rds_set_configuration-usage-notes.binlog-retention-hours)), use the [`mysql.rds_set_configuration`](https://docs.aws.amazon.com/AmazonRDS/latest/UserGuide/mysql-stored-proc-configuring.html#mysql_rds_set_configuration) procedure: -[//]: # "NOTE Most CDC providers recommend the maximum retention period for RDS (7 days/168 hours). Since this has an impact on disk usage, we conservatively recommend a mininum of 3 days/72 hours." +[//]: # "NOTE Most CDC providers recommend the maximum retention period for RDS (7 days/168 hours). Since this has an impact on disk usage, we conservatively recommend a minimum of 3 days/72 hours." ```text mysql=> call mysql.rds_set_configuration('binlog retention hours', 72); diff --git a/docs/integrations/data-ingestion/s3/index.md b/docs/integrations/data-ingestion/s3/index.md index 9a7cd275da8..e48e84408e6 100644 --- a/docs/integrations/data-ingestion/s3/index.md +++ b/docs/integrations/data-ingestion/s3/index.md @@ -1027,7 +1027,7 @@ ClickHouse Keeper is responsible for coordinating the replication of data across See the [network ports](../../../guides/sre/network-ports.md) list when you configure the security settings in AWS so that your servers can communicate with each other, and you can communicate with them. -All three servers must listen for network connections so that they can communicate between the servers and with S3. By default, ClickHouse listens ony on the loopback address, so this must be changed. This is configured in `/etc/clickhouse-server/config.d/`. Here is a sample that configures ClickHouse and ClickHouse Keeper to listen on all IP v4 interfaces. see the documentation or the default configuration file `/etc/clickhouse/config.xml` for more information. +All three servers must listen for network connections so that they can communicate between the servers and with S3. By default, ClickHouse listens only on the loopback address, so this must be changed. This is configured in `/etc/clickhouse-server/config.d/`. Here is a sample that configures ClickHouse and ClickHouse Keeper to listen on all IP v4 interfaces. see the documentation or the default configuration file `/etc/clickhouse/config.xml` for more information. ```xml title="/etc/clickhouse-server/config.d/networking.xml" diff --git a/docs/integrations/index.mdx b/docs/integrations/index.mdx index c1977c4954c..951bea08c33 100644 --- a/docs/integrations/index.mdx +++ b/docs/integrations/index.mdx @@ -224,7 +224,7 @@ We are actively compiling this list of ClickHouse integrations below, so it's no |Google Cloud Storage||Data ingestion|Import from, export to, and transform GCS data in flight with ClickHouse built-in `S3` functions.|[Documentation](/integrations/data-ingestion/s3/index.md)| |Golang||Language client|The Go client uses the native interface for a performant, low-overhead means of connecting to ClickHouse.|[Documentation](/integrations/language-clients/go/index.md)| |HDFS||Data ingestion|Provides integration with the [Apache Hadoop](https://en.wikipedia.org/wiki/Apache_Hadoop) ecosystem by allowing to manage data on [HDFS](https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-hdfs/HdfsDesign.html) via ClickHouse.|[Documentation](/engines/table-engines/integrations/hdfs)| -|Hive||Data ingestionn|The Hive engine allows you to perform `SELECT` quries on HDFS Hive table.|[Documentation](/engines/table-engines/integrations/hive)| +|Hive||Data ingestionn|The Hive engine allows you to perform `SELECT` queries on HDFS Hive table.|[Documentation](/engines/table-engines/integrations/hive)| |Hudi|Apache Hudi logo|Data ingestion| provides a read-only integration with existing Apache [Hudi](https://hudi.apache.org/) tables in Amazon S3.|[Documentation](/engines/table-engines/integrations/hudi)| |Iceberg|Apache Iceberg logo|Data ingestion|Provides a read-only integration with existing Apache [Iceberg](https://iceberg.apache.org/) tables in Amazon S3.|[Documentation](/engines/table-engines/integrations/iceberg)| |Java, JDBC||Language client|The Java client and JDBC driver.|[Documentation](/integrations/language-clients/java/index.md)| @@ -327,7 +327,7 @@ We are actively compiling this list of ClickHouse integrations below, so it's no |SiSense|SiSense logo|Data visualization|Embed analytics into any application or workflow|[Website](https://www.sisense.com/data-connectors/)| |SigNoz|SigNoz logo|Data visualization|Open Source Observability Platform|[Documentation](https://www.signoz.io/docs/architecture/)| |Snappy Flow|Snappy Flow logo|Data management|Collects ClickHouse database metrics via plugin.|[Documentation](https://docs.snappyflow.io/docs/Integrations/clickhouse/instance)| -|Soda|Soda logo|Data quality|Soda integration makes it easy for organziations to detect, resolve, and prevent data quality issues by running data quality checks on data before it is loaded into the database.|[Website](https://www.soda.io/integrations/clickhouse)| +|Soda|Soda logo|Data quality|Soda integration makes it easy for organizations to detect, resolve, and prevent data quality issues by running data quality checks on data before it is loaded into the database.|[Website](https://www.soda.io/integrations/clickhouse)| |Splunk|Splunk logo|Data integration|Splunk modular input to import to Splunk the ClickHouse Cloud Audit logs.|[Website](https://splunkbase.splunk.com/app/7709),
[Documentation](/integrations/tools/data-integration/splunk/index.md)| |StreamingFast|StreamingFast logo|Data ingestion| Blockchain-agnostic, parallelized and streaming-first data engine. |[Website](https://www.streamingfast.io/)| |Streamkap|Streamkap logo|Data ingestion|Setup real-time CDC (Change Data Capture) streaming to ClickHouse with high throughput in minutes.|[Documentation](https://docs.streamkap.com/docs/clickhouse)| diff --git a/docs/integrations/language-clients/java/client/_snippets/_v0_8.mdx b/docs/integrations/language-clients/java/client/_snippets/_v0_8.mdx index 8634fb9c528..639533fd73c 100644 --- a/docs/integrations/language-clients/java/client/_snippets/_v0_8.mdx +++ b/docs/integrations/language-clients/java/client/_snippets/_v0_8.mdx @@ -77,7 +77,7 @@ Authentication by an access token requires setting access token by calling `setA .build(); ``` -Authentication by a SSL Client Certificate require setting username, enabling SSL Authentication, setting a client sertificate and a client key by calling `setUsername(String)`, `useSSLAuthentication(boolean)`, `setClientCertificate(String)` and `setClientKey(String)` accordingly: +Authentication by a SSL Client Certificate require setting username, enabling SSL Authentication, setting a client certificate and a client key by calling `setUsername(String)`, `useSSLAuthentication(boolean)`, `setClientCertificate(String)` and `setClientKey(String)` accordingly: ```java showLineNumbers Client client = new Client.Builder() .useSSLAuthentication(true) @@ -150,7 +150,7 @@ Configuration is defined during client creation. See `com.clickhouse.client.api. | `setServerTimeZone(String timeZone)` | `timeZone` - string value of java valid timezone ID (see `java.time.ZoneId`) | Sets server side timezone. UTC timezone will be used by default.

Default: `UTC`
Enum: `ClientConfigProperties.SERVER_TIMEZONE`
Key: `server_time_zone` | | `useAsyncRequests(boolean async)` | `async` - flag that indicates if the option should be enabled. | Sets if client should execute request in a separate thread. Disabled by default because application knows better how to organize multi-threaded tasks and running tasks in separate thread do not help with performance.

Default: `false`
Enum: `ClientConfigProperties.ASYNC_OPERATIONS`
Key: `async` | | `setSharedOperationExecutor(ExecutorService executorService)` | `executorService` - instance of executor service. | Sets executor service for operation tasks.

Default: `none`
Enum: `none`
Key: `none`| -| `setClientNetworkBufferSize(int size)` | - `size` - size in bytes | Sets size of a buffer in application memory space that is used to copy data back-and-forth between socket and application. Greater reduces system calls to TCP stack, but affects how much memory is spent on every connection. This buffer is also subject for GC because connections are shortlive. Also keep in mind that allocating big continious block of memory might be a problem.

Default: `300000`
Enum: `ClientConfigProperties.CLIENT_NETWORK_BUFFER_SIZE`
Key: `client_network_buffer_size`| +| `setClientNetworkBufferSize(int size)` | - `size` - size in bytes | Sets size of a buffer in application memory space that is used to copy data back-and-forth between socket and application. Greater reduces system calls to TCP stack, but affects how much memory is spent on every connection. This buffer is also subject for GC because connections are shortlive. Also keep in mind that allocating big continuous block of memory might be a problem.

Default: `300000`
Enum: `ClientConfigProperties.CLIENT_NETWORK_BUFFER_SIZE`
Key: `client_network_buffer_size`| | `retryOnFailures(ClientFaultCause ...causes)` | - `causes` - enum constant of `com.clickhouse.client.api.ClientFaultCause` | Sets recoverable/retriable fault types.

Default: `NoHttpResponse,ConnectTimeout,ConnectionRequestTimeout`
Enum: `ClientConfigProperties.CLIENT_RETRY_ON_FAILURE`
Key: `client_retry_on_failures` | | `setMaxRetries(int maxRetries)` | - `maxRetries` - number of retries | Sets maximum number of retries for failures defined by `retryOnFailures(ClientFaultCause ...causes)`

Default: `3`
Enum: `ClientConfigProperties.RETRY_ON_FAILURE`
Key: `retry` | | `allowBinaryReaderToReuseBuffers(boolean reuse)` | - `reuse` - flag that indicates if the option should be enabled | Most datasets contain numeric data encoded as small byte sequences. By default reader will allocate required buffer, read data into it and then transform into a target Number class. That may cause significant GC preasure because of many small objects are being allocated and released. If this option is enabled then reader will use preallocated buffers to do numbers transcoding. It is safe because each reader has own set of buffers and readers are used by one thread. | @@ -349,7 +349,7 @@ try (InputStream dataStream = getDataStream()) { ### insert(String tableName, List<?> data, InsertSettings settings) {#insertstring-tablename-listlt-data-insertsettings-settings} -Sends a write request to database. The list of objects is converted into an efficient format and then is sent to a server. The class of the list items should be registed up-front using `register(Class, TableSchema)` method. +Sends a write request to database. The list of objects is converted into an efficient format and then is sent to a server. The class of the list items should be registered up-front using `register(Class, TableSchema)` method. **Signatures** ```java