feat(risedev): add check for trailing spaces in risedev check (risi…

…ngwavelabs#11294) Signed-off-by: Richard Chien <[email protected]>
zwang28 · Jul 28, 2023 · f2a3fd0 · f2a3fd0
1 parent b9e7fe2
commit f2a3fd0
Show file tree

Hide file tree

Showing 169 changed files with 689 additions and 618 deletions.
diff --git a/.cargo/audit.toml b/.cargo/audit.toml
@@ -5,7 +5,7 @@
 ignore = [
     # We depends on `chrono`, but not `time`, and `chrono` is not affected by `RUSTSEC-2020-0071`
     # (see https://github.com/time-rs/time/issues/293#issuecomment-946382614).
-    # 
+    #
     # `chrono` also suffers from a similar vulnerability ([`RUSTSEC-2020-0159`](https://rustsec.org/advisories/RUSTSEC-2020-0159),
     # but it's already patched in `0.4.20` by rewriting vulnerable C function in Rust).
     "RUSTSEC-2020-0071",

diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml
@@ -17,20 +17,20 @@ body:
       description: Steps to reproduce the behavior, including the SQLs you run and/or the operations you have done to trigger the bug.
       placeholder: |
         First create the tables/sources and materialized views with
-        
+
         ```sql
         CREATE TABLE ...
         CREATE MATERIALIZED VIEW ...
         ```
-        
+
         Then the bug is triggered after ...
   - type: textarea
     attributes:
       label: Expected behavior
       description: A clear and concise description of what you expected to happen.
       placeholder: |
         I expected to see this happen: *explanation*
-        
+
         Instead, this happened: *explanation*
   - type: textarea
     attributes:
@@ -58,4 +58,4 @@ body:
     attributes:
       label: Additional context
       description: Add any other context about the problem here. e.g., the full log files.
- 
+
diff --git a/.github/ISSUE_TEMPLATE/design-rfc.yml b/.github/ISSUE_TEMPLATE/design-rfc.yml
@@ -28,5 +28,5 @@ body:
       label: Q&A
       description: Here's where the doc readers can leave the questions and suggestions
       placeholder: |
-        * Why do you need ...  
+        * Why do you need ...
         * What will happen if ...
diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md
@@ -31,7 +31,7 @@ Please explain **IN DETAIL** what the changes are in this PR and why they are ne
 
 - [ ] My PR contains user-facing changes.
 
-<!-- 
+<!--
 
 You can ignore or delete the section below if your PR does not contain user-facing changes.
 
@@ -54,9 +54,9 @@ Please keep the types that apply to your changes, and remove the others.
 ### Release note
 
 <!--
-Please create a release note for your changes. 
+Please create a release note for your changes.
 
-Discuss technical details in the "What's changed" section, and 
+Discuss technical details in the "What's changed" section, and
 focus on the impact on users in the release note.
 
 You should also mention the environment or conditions where the impact may occur.

diff --git a/.github/workflows/cherry-pick-to-release-branch.yml b/.github/workflows/cherry-pick-to-release-branch.yml
@@ -20,7 +20,7 @@ jobs:
           pr_labels: 'cherry-pick'
           pr_body: ${{ format('Cherry picking \#{0} onto branch v0.19.0-rc', github.event.number) }}
         env:
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
   release_pull_request_1_0:
     if: "contains(github.event.pull_request.labels.*.name, 'need-cherry-pick-v1.0')  &&  github.event.pull_request.merged == true"
     runs-on: ubuntu-latest
@@ -35,5 +35,5 @@ jobs:
           pr_labels: 'cherry-pick'
           pr_body: ${{ format('Cherry picking \#{0} onto branch v1.0-rc', github.event.number) }}
         env:
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
 
diff --git a/.github/workflows/connector-node-integration.yml b/.github/workflows/connector-node-integration.yml
@@ -36,9 +36,9 @@ jobs:
         if: steps.filter.outputs.java == 'true' || steps.filter.outputs.proto == 'true'
         run: |
           set -ex
-          
+
           RISINGWAVE_ROOT=${PWD}
-          
+
           echo "--- build connector node"
           cd ${RISINGWAVE_ROOT}/java
           # run unit test

diff --git a/.github/workflows/license_check.yml b/.github/workflows/license_check.yml
@@ -6,7 +6,7 @@ on:
     - main
     - "forks/*"
   pull_request:
-    branches: 
+    branches:
     - main
     - "v*.*.*-rc"
   merge_group:

diff --git a/Makefile.toml b/Makefile.toml
@@ -961,6 +961,16 @@ echo "Running $(tput setaf 4)cargo udeps$(tput sgr0) checks"
 cargo udeps --workspace --all-targets ${RISINGWAVE_FEATURE_FLAGS} --exclude workspace-hack --exclude risingwave_bench --exclude risingwave_udf --exclude risingwave_simulation
 """
 
+[tasks.check-trailing-spaces]
+private = true
+category = "RiseDev - Check"
+description = "Check trailing spaces and attempt to fix"
+script = """
+#!/usr/bin/env bash
+
+# This can trim trailing spaces in all git-managed text files, including .md, .toml, .sh, etc.
+scripts/check/check-trailing-spaces.sh --fix
+"""
 
 [tasks.check]
 category = "RiseDev - Check"
@@ -969,6 +979,7 @@ dependencies = [
   "check-hakari",
   "check-dep-sort",
   "check-fmt",
+  "check-trailing-spaces",
   "check-typos",
   "check-clippy",
   "check-java",
@@ -991,6 +1002,7 @@ dependencies = [
   "check-hakari",
   "check-dep-sort",
   "check-fmt",
+  "check-trailing-spaces",
   "check-typos",
   "check-clippy-fix",
   "check-java-fix",

diff --git a/ci/docker-compose.yml b/ci/docker-compose.yml
@@ -32,7 +32,7 @@ services:
       interval: 5s
       timeout: 5s
       retries: 5
-  
+
   message_queue:
     image: "docker.vectorized.io/vectorized/redpanda:latest"
     command:

diff --git a/ci/scripts/build.sh b/ci/scripts/build.sh
@@ -41,7 +41,7 @@ echo "--- Build Rust components"
 
 if [[ "$profile" == "ci-dev" ]]; then
     RISINGWAVE_FEATURE_FLAGS="--features rw-dynamic-link --no-default-features"
-else 
+else
     RISINGWAVE_FEATURE_FLAGS="--features rw-static-link"
 fi
 

diff --git a/ci/scripts/check.sh b/ci/scripts/check.sh
@@ -21,7 +21,7 @@ echo "--- Run clippy check (dev, all features)"
 cargo clippy --all-targets --all-features --locked -- -D warnings
 
 echo "--- Run clippy check (release)"
-cargo clippy --release  --all-targets --features "rw-static-link" --locked -- -D warnings
+cargo clippy --release --all-targets --features "rw-static-link" --locked -- -D warnings
 
 echo "--- Build documentation"
 cargo doc --document-private-items --no-deps

diff --git a/ci/scripts/common.sh b/ci/scripts/common.sh
@@ -40,7 +40,7 @@ export -f download-and-decompress-artifact
 # Arguments:
 #   $1: cargo build `profile` of the binaries
 #   $2: risedev-components `env` to use
-# 
+#
 # Download risingwave and risedev-dev, and put them in target/debug
 function download_and_prepare_rw() {
   echo "--- Download RisingWave binaries and prepare environment"

diff --git a/ci/scripts/connector-node-integration-test.sh b/ci/scripts/connector-node-integration-test.sh
@@ -32,7 +32,7 @@ RISINGWAVE_ROOT=${PWD}
 echo "--- install java"
 apt install sudo -y
 
-if [ "$VERSION" = "11" ]; then 
+if [ "$VERSION" = "11" ]; then
   echo "The test imgae default java version is 11, no need to install"
 else
   echo "The test imgae default java version is 11, need to install java 17"
@@ -100,7 +100,7 @@ else
 fi
 
 sink_input_feature=("" "--input_binary_file=./data/sink_input --data_format_use_json=False")
-upsert_sink_input_feature=("--input_file=./data/upsert_sink_input.json" 
+upsert_sink_input_feature=("--input_file=./data/upsert_sink_input.json"
                            "--input_binary_file=./data/upsert_sink_input --data_format_use_json=False")
 type=("Json format" "StreamChunk format")
 

diff --git a/ci/scripts/docker-hdfs.sh b/ci/scripts/docker-hdfs.sh
@@ -11,7 +11,7 @@ BUILDKITE_COMMIT="HDFS_$(echo $RANDOM | md5sum | head -c 20;)"
 java_home_path=$(uname -m)
 if [ "$arch" = "arm64" ] || [ "$arch" = "aarch64" ]; then
     java_home_path="/usr/lib/jvm/java-11-openjdk-arm64"
-else 
+else
 # x86_64
     java_home_path="/usr/lib/jvm/java-11-openjdk-amd64"
 fi

diff --git a/ci/scripts/e2e-iceberg-sink-test.sh b/ci/scripts/e2e-iceberg-sink-test.sh
@@ -61,7 +61,7 @@ spark-3.3.1-bin-hadoop3/bin/spark-sql --packages $DEPENDENCIES \
     --conf spark.sql.catalog.demo.hadoop.fs.s3a.endpoint=http://127.0.0.1:9301 \
     --conf spark.sql.catalog.demo.hadoop.fs.s3a.access.key=hummockadmin \
     --conf spark.sql.catalog.demo.hadoop.fs.s3a.secret.key=hummockadmin \
-    --S --e "INSERT OVERWRITE DIRECTORY './spark-output' USING CSV SELECT * FROM demo.demo_db.demo_table;" 
+    --S --e "INSERT OVERWRITE DIRECTORY './spark-output' USING CSV SELECT * FROM demo.demo_db.demo_table;"
 
 # check sink destination using shell
 if cat ./spark-output/*.csv | sort | awk -F "," '{

diff --git a/ci/scripts/sql/nexmark/q13.drop.sql b/ci/scripts/sql/nexmark/q13.drop.sql
@@ -1,4 +1,4 @@
 -- noinspection SqlNoDataSourceInspectionForFile
 -- noinspection SqlResolveForFile
 DROP SINK nexmark_q13;
-DROP TABLE side_input; 
+DROP TABLE side_input;
diff --git a/ci/scripts/sql/nexmark/q16.sql b/ci/scripts/sql/nexmark/q16.sql
@@ -17,5 +17,5 @@ SELECT channel,
        count(distinct auction) filter (where price >= 10000 and price < 1000000) AS rank2_auctions,
        count(distinct auction) filter (where price >= 1000000)                   AS rank3_auctions
 FROM bid
-GROUP BY to_char(date_time, 'YYYY-MM-DD'), channel 
+GROUP BY to_char(date_time, 'YYYY-MM-DD'), channel
 WITH ( connector = 'blackhole', type = 'append-only', force_append_only = 'true');
diff --git a/ci/scripts/sql/nexmark/q17.sql b/ci/scripts/sql/nexmark/q17.sql
@@ -12,5 +12,5 @@ SELECT auction,
        avg(price)                                                 AS avg_price,
        sum(price)                                                 AS sum_price
 FROM bid
-GROUP BY to_char(date_time, 'YYYY-MM-DD'), auction 
+GROUP BY to_char(date_time, 'YYYY-MM-DD'), auction
 WITH ( connector = 'blackhole', type = 'append-only', force_append_only = 'true');
diff --git a/ci/scripts/sql/nexmark/q19.sql b/ci/scripts/sql/nexmark/q19.sql
@@ -1,7 +1,7 @@
 -- noinspection SqlNoDataSourceInspectionForFile
 -- noinspection SqlResolveForFile
 CREATE SINK nexmark_q19 AS
-SELECT * 
+SELECT *
 FROM (SELECT *,
              ROW_NUMBER() OVER (
                  PARTITION BY auction

diff --git a/ci/scripts/sql/nexmark/q3.sql b/ci/scripts/sql/nexmark/q3.sql
@@ -9,5 +9,5 @@ SELECT P.name,
 FROM auction AS A
          INNER JOIN person AS P on A.seller = P.id
 WHERE A.category = 10
-  and (P.state = 'or' OR P.state = 'id' OR P.state = 'ca') 
+  and (P.state = 'or' OR P.state = 'id' OR P.state = 'ca')
 WITH ( connector = 'blackhole', type = 'append-only');
diff --git a/ci/workflows/main.yml b/ci/workflows/main.yml
@@ -258,7 +258,7 @@ steps:
           mount-buildkite-agent: true
       - ./ci/plugins/upload-failure-logs
     timeout_in_minutes: 35
-  
+
   - label: "connector node integration test Java {{matrix.java_version}}"
     command: "ci/scripts/connector-node-integration-test.sh -p ci-release -v {{matrix.java_version}}"
     depends_on:

diff --git a/codecov.yml b/codecov.yml
@@ -5,7 +5,7 @@
 coverage:
   status:
     patch: off # disable patch status
-    project: 
+    project:
       default: false  # disable the default status that measures entire project
       rust:
         only_pulls: true # no status will be posted for commits not on a pull request

diff --git a/docker/README.md b/docker/README.md
@@ -2,7 +2,7 @@
 
 ## Published images
 
-- `latest` on GHCR (latest nightly build): `ghcr.io/risingwavelabs/risingwave:latest` 
+- `latest` on GHCR (latest nightly build): `ghcr.io/risingwavelabs/risingwave:latest`
 - `latest` on Docker Hub (latest release): `risingwavelabs/risingwave:latest`
 - Other tags available on both GHCR and Docker Hub:
   - `nightly-yyyyMMdd`, e.g., `nightly-20230108`
@@ -20,7 +20,7 @@ docker build . -f docker/Dockerfile
 
 from the project root.
 
-To build the images without SIMD vector extensions, run 
+To build the images without SIMD vector extensions, run
 
 ```
 docker build . -f docker/Dockerfile --build-arg simd_disabled=true

diff --git a/docker/grafana-risedev-dashboard.yml b/docker/grafana-risedev-dashboard.yml
@@ -13,4 +13,3 @@ providers:
     options:
       path: /dashboards
       foldersFromFilesStructure: false
-
diff --git a/docker/grafana-risedev-datasource.yml b/docker/grafana-risedev-datasource.yml
@@ -13,4 +13,3 @@ datasources:
     tlsAuthWithCACert: false
     version: 1
     editable: true
-
diff --git a/docker/grafana.ini b/docker/grafana.ini
@@ -9,4 +9,3 @@ default_theme = light
 [auth.anonymous]
 enabled = true
 org_role = Admin
-
diff --git a/docs/architecture-design.md b/docs/architecture-design.md
@@ -1,15 +1,15 @@
 # Architecture Design
 
-## Motivation 
+## Motivation
 
 This document serves as one of the materials for newcomers to learn the high-level architecture and the functionalities of each component.
 
-## Architecture 
+## Architecture
 
-There are currently 4 types of nodes in the cluster: 
+There are currently 4 types of nodes in the cluster:
 
-* **Frontend**: Frontend is a stateless proxy that accepts user queries through Postgres protocol. It is responsible for parsing, validation, optimization, and answering the results of each individual query. 
-* **ComputeNode**: ComputeNode is responsible for executing the optimized query plan. 
+* **Frontend**: Frontend is a stateless proxy that accepts user queries through Postgres protocol. It is responsible for parsing, validation, optimization, and answering the results of each individual query.
+* **ComputeNode**: ComputeNode is responsible for executing the optimized query plan.
 * **Compactor**: Compactor is a stateless worker node responsible for executing the compaction tasks for our storage engine.
 * **MetaServer**: The central metadata management service. It also acts as a failure detector that periodically sends heartbeats to frontends and compute-nodes in the cluster. There are multiple sub-components running in MetaServer:
    * **ClusterManager**: Manages the cluster information, such as the address and status of nodes.
@@ -23,19 +23,19 @@ There are currently 4 types of nodes in the cluster:
 
 The topmost component is the Postgres client. It issues queries through [TCP-based Postgres wire protocol](https://www.postgresql.org/docs/current/protocol.html).
 
-The leftmost component is the streaming data source. [Kafka](https://kafka.apache.org) is the most representative system for streaming sources. Alternatively, [Redpanda](https://redpanda.com/), [Apache Pulsar](https://pulsar.apache.org/), [AWS Kinesis](https://aws.amazon.com/kinesis), [Google Pub/Sub](https://cloud.google.com/pubsub/docs/overview) are also widely-used. Streams from Kafka will be consumed and processed through the pipeline in the database. 
+The leftmost component is the streaming data source. [Kafka](https://kafka.apache.org) is the most representative system for streaming sources. Alternatively, [Redpanda](https://redpanda.com/), [Apache Pulsar](https://pulsar.apache.org/), [AWS Kinesis](https://aws.amazon.com/kinesis), [Google Pub/Sub](https://cloud.google.com/pubsub/docs/overview) are also widely-used. Streams from Kafka will be consumed and processed through the pipeline in the database.
 
 The bottom-most component is AWS S3, or MinIO (an open-sourced s3-compatible system). We employed a disaggregated architecture in order to elastically scale the compute-nodes without migrating the storage.
 
-## Execution Mode 
+## Execution Mode
 
-There are 2 execution modes in our system serving different analytics purposes. 
+There are 2 execution modes in our system serving different analytics purposes.
 
-### Batch-Query Mode 
+### Batch-Query Mode
 
-The first is the *batch-query mode*. Users issue such a query via a *SELECT statement* and the system answers immediately. This is the most typical RDBMS use case. 
+The first is the *batch-query mode*. Users issue such a query via a *SELECT statement* and the system answers immediately. This is the most typical RDBMS use case.
 
-Let's begin with a simple SELECT and see how it is executed. 
+Let's begin with a simple SELECT and see how it is executed.
 
 ```sql
 SELECT SUM(t.quantity) FROM t group by t.company;
@@ -53,10 +53,10 @@ Behind the TableScan operator, there's a storage engine called Hummock that stor
 
 To know more about Hummock, you can check out "[An Overview of RisingWave State Store](./state-store-overview.md)".
 
-### Streaming Mode 
+### Streaming Mode
 
-The other execution mode is the *streaming mode*. Users build streaming pipelines via [CREATE MATERIALIZED VIEW statement](https://www.postgresql.org/docs/current/sql-creatematerializedview.html). 
-For example: 
+The other execution mode is the *streaming mode*. Users build streaming pipelines via [CREATE MATERIALIZED VIEW statement](https://www.postgresql.org/docs/current/sql-creatematerializedview.html).
+For example:
 
 ```sql
 CREATE MATERIALIZED VIEW mv1 AS SELECT SUM(t.quantity) as q FROM t group by t.company;
@@ -66,14 +66,14 @@ CREATE MATERIALIZED VIEW mv1 AS SELECT SUM(t.quantity) as q FROM t group by t.co
 
 When the data source (Kafka, e.g.) propagates a bunch of records into the system, the materialized view will refresh automatically.
 
-Assume that we have a sequence `[(2, "AMERICA"), (3, "ASIA"), (4, "AMERICA"), (5, "ASIA")]`. After the sequence flows through the DAG, the MV will be updated to: 
+Assume that we have a sequence `[(2, "AMERICA"), (3, "ASIA"), (4, "AMERICA"), (5, "ASIA")]`. After the sequence flows through the DAG, the MV will be updated to:
 
 | A | B
 | - | -
 | 6 | AMERICA
 | 8 | ASIA
 
-When another sequence `[(6, "EUROPE"), (7, "EUROPE")]` comes, the MV will soon become: 
+When another sequence `[(6, "EUROPE"), (7, "EUROPE")]` comes, the MV will soon become:
 
 | A | B
 | - | -
@@ -83,7 +83,7 @@ When another sequence `[(6, "EUROPE"), (7, "EUROPE")]` comes, the MV will soon b
 
 `mv1` can also act as other MV's source. For example, mv2, mv3 can reuse the processing results of mv1 thus deduplicating the computation.
 
-The durability of materialized views in RisingWave is built upon a snapshot-based mechanism. Every time a snapshot is triggered, the internal states of each operator will be flushed to S3. Upon failover, the operator recovers from the latest S3 checkpoint. 
+The durability of materialized views in RisingWave is built upon a snapshot-based mechanism. Every time a snapshot is triggered, the internal states of each operator will be flushed to S3. Upon failover, the operator recovers from the latest S3 checkpoint.
 
 Since the streaming states can be extremely large, so large that they cannot (or only ineffectively) be held in memory in their entirety, we have designed Hummock to be highly scalable. Compared to [Flink's rocksdb-based state store](https://nightlies.apache.org/flink/flink-docs-release-1.14/docs/ops/state/state_backends/#the-embeddedrocksdbstatebackend), Hummock is cloud-native  and provides super elasticity.
-Original file line number
+Diff line change
@@ Expand Up / @@ -41,7 +41,7 @@ echo "--- Build Rust components" @@
     if [[ "$profile" == "ci-dev" ]]; then
         RISINGWAVE_FEATURE_FLAGS="--features rw-dynamic-link --no-default-features"
-    else
+    else
         RISINGWAVE_FEATURE_FLAGS="--features rw-static-link"
     fi
@@ Expand Down @@
Original file line number	Diff line number	Diff line change
Expand Up		@@ -13,4 +13,3 @@ providers:
		options:
		path: /dashboards
		foldersFromFilesStructure: false
Original file line number	Diff line number	Diff line change
Expand Up		@@ -13,4 +13,3 @@ datasources:
		tlsAuthWithCACert: false
		version: 1
		editable: true
Original file line number	Diff line number	Diff line change
Expand Up		@@ -9,4 +9,3 @@ default_theme = light
		[auth.anonymous]
		enabled = true
		org_role = Admin