apache · beryllw · Oct 23, 2025 · Oct 23, 2025 · Oct 23, 2025 · Oct 23, 2025
diff --git a/website/docs/quickstart/flink-iceberg.md b/website/docs/quickstart/flink-iceberg.md
@@ -61,23 +61,54 @@ services:
     restart: always
     image: zookeeper:3.9.2
 
+  namenode:
+    image: apache/hadoop:3.3.6
+    hostname: namenode
+    user: root
+    command: [ "hdfs", "namenode" ]
+    ports:
+      - 9870:9870
+      - 8020:8020
+    environment:
+      ENSURE_NAMENODE_DIR: "/tmp/hadoop/dfs/name"
+      CORE-SITE.XML_fs.defaultFS: hdfs://namenode:8020
+      CORE-SITE.XML_hadoop.tmp.dir: /hadoop/tmp
+      HDFS-SITE.XML_dfs.namenode.rpc-address: namenode:8020
+      HDFS-SITE.XML_dfs.replication: 1
+      HDFS-SITE.XML_dfs.permissions.enabled: false
+      HDFS-SITE.XML_dfs.datanode.address: datanode:9866
+
+  datanode:
+    image: apache/hadoop:3.3.6
+    user: root
+    command: [ "hdfs", "datanode" ]
+    environment:
+      CORE-SITE.XML_fs.defaultFS: hdfs://namenode:8020
+      CORE-SITE.XML_hadoop.tmp.dir: /hadoop/tmp
+      HDFS-SITE.XML_dfs.namenode.rpc-address: namenode:8020
+      HDFS-SITE.XML_dfs.replication: 1
+      HDFS-SITE.XML_dfs.permissions.enabled: false
+      HDFS-SITE.XML_dfs.datanode.address: datanode:9866
+    depends_on:
+      - namenode
+
   coordinator-server:
     image: fluss/fluss:$FLUSS_DOCKER_VERSION$
     depends_on:
       - zookeeper
+      - namenode
     environment:
       - |
         FLUSS_PROPERTIES=
         zookeeper.address: zookeeper:2181
         bind.listeners: FLUSS://coordinator-server:9123
-        remote.data.dir: /tmp/fluss/remote-data
+        remote.data.dir: hdfs://namenode:8020/fluss-data
         datalake.format: iceberg
         datalake.iceberg.type: hadoop
-        datalake.iceberg.warehouse: /tmp/iceberg
+        datalake.iceberg.warehouse: hdfs://namenode:8020/fluss-lake
     volumes:
-      - shared-tmpfs:/tmp/iceberg
       - ./lib:/tmp/lib
-    entrypoint: ["sh", "-c", "cp -v /tmp/lib/*.jar /opt/fluss/plugins/iceberg/ && exec /docker-entrypoint.sh coordinatorServer"]
+    entrypoint: [ "sh", "-c", "cp -v /tmp/lib/*.jar /opt/fluss/plugins/iceberg/ && exec /docker-entrypoint.sh coordinatorServer" ]
 
   tablet-server:
     image: fluss/fluss:$FLUSS_DOCKER_VERSION$
@@ -90,13 +121,11 @@ services:
         zookeeper.address: zookeeper:2181
         bind.listeners: FLUSS://tablet-server:9123
         data.dir: /tmp/fluss/data
-        remote.data.dir: /tmp/fluss/remote-data
-        kv.snapshot.interval: 0s
+        remote.data.dir: hdfs://namenode:8020/fluss-data
+        kv.snapshot.interval: 30s
         datalake.format: iceberg
         datalake.iceberg.type: hadoop
-        datalake.iceberg.warehouse: /tmp/iceberg
-    volumes:
-      - shared-tmpfs:/tmp/iceberg
+        datalake.iceberg.warehouse: hdfs://namenode:8020/fluss-lake
 
   jobmanager:
     image: fluss/quickstart-flink:1.20-$FLUSS_DOCKER_VERSION$
@@ -107,8 +136,6 @@ services:
       - |
         FLINK_PROPERTIES=
         jobmanager.rpc.address: jobmanager
-    volumes:
-      - shared-tmpfs:/tmp/iceberg
 
   taskmanager:
     image: fluss/quickstart-flink:1.20-$FLUSS_DOCKER_VERSION$
@@ -122,15 +149,6 @@ services:
         taskmanager.numberOfTaskSlots: 10
         taskmanager.memory.process.size: 2048m
         taskmanager.memory.framework.off-heap.size: 256m
-    volumes:
-      - shared-tmpfs:/tmp/iceberg
-
-volumes:
-  shared-tmpfs:
-    driver: local
-    driver_opts:
-      type: "tmpfs"
-      device: "tmpfs"
 ```
 
 The Docker Compose environment consists of the following containers:
@@ -367,6 +385,33 @@ The following SQL query should return an empty result.
 SELECT * FROM fluss_customer WHERE `cust_key` = 1;
 ```
 
+## Remote Storage
+
+Finally, you can use the following command to view the fluss kv snapshot stored in fluss remote storage:
+```shell
+docker compose exec namenode hdfs dfs -ls -R /fluss-data/ | awk '{print $8}' | grep -v '^$' | tree --fromfile .
+```
+
+**Sample Output:**
+```shell
+hdfs://namenode:8020/fluss-data
+└── kv
+    └── fluss
+        ├── enriched_orders-3
+        │   └── 0
+        │       ├── shared
+        │       │   ├── 0836f202-bdcd-498b-a94a-0520beb3d7ea
+        │       │   ├── afefc29f-d8d3-4cdb-a496-a6c271ddfac0
+        │       │   └── b67bd402-2ad4-4305-bd36-4fadf08a5200
+        │       └── snap-2
+        │           ├── _METADATA
+        │           ├── 02f02528-af03-4c88-980c-ec9f878d5476
+        │           ├── 7b21a889-ab06-4b74-98a5-36b542a67d0d
+        │           └── d7b699d9-6547-49fc-b579-de84cc37a167
+        ├── fluss_customer-1
+        ... # Remaining entries omitted for brevity
+```
+
 ## Integrate with Iceberg
 ### Start the Lakehouse Tiering Service
 To integrate with [Apache Iceberg](https://iceberg.apache.org/), you need to start the `Lakehouse Tiering Service`.
@@ -378,7 +423,7 @@ docker compose exec jobmanager \
     --fluss.bootstrap.servers coordinator-server:9123 \
     --datalake.format iceberg \
     --datalake.iceberg.type hadoop \
-    --datalake.iceberg.warehouse /tmp/iceberg
+    --datalake.iceberg.warehouse hdfs://namenode:8020/fluss-lake
 ```
 You should see a Flink Job to tier data from Fluss to Iceberg running in the [Flink Web UI](http://localhost:8083/).
 
@@ -501,20 +546,27 @@ SELECT sum(total_price) as sum_price FROM datalake_enriched_orders;
 
 You can execute the real-time analytics query multiple times, and the results will vary with each run as new data is continuously written to Fluss in real-time.
 
-Finally, you can use the following command to view the files stored in Iceberg:
+### Lake Storage
+
+Finally, you can use the following command to view the files stored in Iceberg Hadoop warehouse:
 ```shell
-docker compose exec taskmanager tree /tmp/iceberg/fluss
+docker compose exec namenode hdfs dfs -ls -R /fluss-lake/ | awk '{print $8}' | grep -v '^$' | tree --fromfile .
 ```
 
 **Sample Output:**
 ```shell
-/tmp/iceberg/fluss
-└── datalake_enriched_orders
-    ├── data
-    │   └── 00000-0-abc123.parquet
-    └── metadata
-        ├── snap-1234567890123456789-1-abc123.avro
-        └── v1.metadata.json
+hdfs://namenode:8020/fluss-lake
+└── fluss
+    └── datalake_enriched_orders
+        ├── data
+        │   └── __bucket=0
+        │       └── 00000-0-3ff95845-47af-456f-83e0-8411576cfffe-00001.parquet
+        └── metadata
+            ├── 528ae521-d683-4c5e-8dd7-779a83dd9c6f-m0.avro
+            ├── snap-3496049107217731071-1-528ae521-d683-4c5e-8dd7-779a83dd9c6f.avro
+            ├── v1.metadata.json
+            ├── v2.metadata.json
+            └── version-hint.text
 ```
 The files adhere to Iceberg's standard format, enabling seamless querying with other engines such as [Spark](https://iceberg.apache.org/docs/latest/spark-queries/) and [Trino](https://trino.io/docs/current/connector/iceberg.html).
 

diff --git a/website/docs/quickstart/flink.md b/website/docs/quickstart/flink.md
@@ -37,12 +37,53 @@ cd fluss-quickstart-flink
 
 ```yaml
 services:
+  zookeeper:
+    restart: always
+    image: zookeeper:3.9.2
+  namenode:
+    image: apache/hadoop:3.3.6
+    hostname: namenode
+    user: root
+    command: [ "hdfs", "namenode" ]
+    ports:
+      - 9870:9870
+      - 8020:8020
+    environment:
+      ENSURE_NAMENODE_DIR: "/tmp/hadoop/dfs/name"
+      CORE-SITE.XML_fs.defaultFS: hdfs://namenode:8020
+      CORE-SITE.XML_hadoop.tmp.dir: /hadoop/tmp
+      HDFS-SITE.XML_dfs.namenode.rpc-address: namenode:8020
+      HDFS-SITE.XML_dfs.replication: 1
+      HDFS-SITE.XML_dfs.permissions.enabled: false
+      HDFS-SITE.XML_dfs.datanode.address: datanode:9866
+    healthcheck:
+      test: ["CMD", "hdfs dfs -test -d / && exit 0 || exit 1"]
+      interval: 15s
+      timeout: 10s
+      retries: 20
+
+  datanode:
+    image: apache/hadoop:3.3.6
+    user: root
+    command: [ "hdfs", "datanode" ]
+    environment:
+      CORE-SITE.XML_fs.defaultFS: hdfs://namenode:8020
+      CORE-SITE.XML_hadoop.tmp.dir: /hadoop/tmp
+      HDFS-SITE.XML_dfs.namenode.rpc-address: namenode:8020
+      HDFS-SITE.XML_dfs.replication: 1
+      HDFS-SITE.XML_dfs.permissions.enabled: false
+      HDFS-SITE.XML_dfs.datanode.address: datanode:9866
+    depends_on:
+      - namenode
+
   #begin Fluss cluster
   coordinator-server:
     image: fluss/fluss:$FLUSS_DOCKER_VERSION$
-    command: coordinatorServer
     depends_on:
-      - zookeeper
+      namenode:
+        condition: service_healthy
+      zookeeper:
+        condition: service_started
     environment:
       - |
         FLUSS_PROPERTIES=
@@ -51,9 +92,8 @@ services:
         remote.data.dir: /tmp/fluss/remote-data
         datalake.format: paimon
         datalake.paimon.metastore: filesystem
-        datalake.paimon.warehouse: /tmp/paimon
-    volumes:
-      - shared-tmpfs:/tmp/paimon
+        datalake.paimon.warehouse: hdfs://namenode:8020/fluss-lake
+    command: coordinatorServer
   tablet-server:
     image: fluss/fluss:$FLUSS_DOCKER_VERSION$
     command: tabletServer
@@ -66,15 +106,11 @@ services:
         bind.listeners: FLUSS://tablet-server:9123
         data.dir: /tmp/fluss/data
         remote.data.dir: /tmp/fluss/remote-data
-        kv.snapshot.interval: 0s
+        kv.snapshot.interval: 30s
         datalake.format: paimon
         datalake.paimon.metastore: filesystem
-        datalake.paimon.warehouse: /tmp/paimon
-    volumes:
-      - shared-tmpfs:/tmp/paimon
-  zookeeper:
-    restart: always
-    image: zookeeper:3.9.2
+        datalake.paimon.warehouse: hdfs://namenode:8020/fluss-lake
+
   #end
   #begin Flink cluster
   jobmanager:
@@ -86,8 +122,6 @@ services:
       - |
         FLINK_PROPERTIES=
         jobmanager.rpc.address: jobmanager
-    volumes:
-      - shared-tmpfs:/tmp/paimon
   taskmanager:
     image: fluss/quickstart-flink:1.20-$FLUSS_DOCKER_VERSION$
     depends_on:
@@ -100,16 +134,7 @@ services:
         taskmanager.numberOfTaskSlots: 10
         taskmanager.memory.process.size: 2048m
         taskmanager.memory.framework.off-heap.size: 256m
-    volumes:
-      - shared-tmpfs:/tmp/paimon
   #end
-
-volumes:
-  shared-tmpfs:
-    driver: local
-    driver_opts:
-      type: "tmpfs"
-      device: "tmpfs"
 ```
 
 The Docker Compose environment consists of the following containers:
@@ -346,6 +371,33 @@ The following SQL query should return an empty result.
 SELECT * FROM fluss_customer WHERE `cust_key` = 1;
 ```
 
+## Remote Storage
+
+Finally, you can use the following command to view the fluss kv snapshot stored in fluss remote storage:
+```shell
+docker compose exec namenode hdfs dfs -ls -R /fluss-data/ | awk '{print $8}' | grep -v '^$' | tree --fromfile .
+```
+
+**Sample Output:**
+```shell
+hdfs://namenode:8020/fluss-data
+└── kv
+    └── fluss
+        ├── enriched_orders-3
+        │   └── 0
+        │       ├── shared
+        │       │   ├── 71fca534-ecca-489b-a19a-bd0538c9f9e9
+        │       │   ├── b06ef3a3-2873-470e-961f-da25582136a1
+        │       │   └── b93bad5c-00fb-4e62-8217-71b010621479
+        │       └── snap-2
+        │           ├── _METADATA
+        │           ├── 08d39726-f847-4401-8f31-4e905f2ba3f6
+        │           ├── b6a7bc2c-b5c3-4eeb-a523-b2b6fff159f3
+        │           └── e6278555-d71f-431f-954e-71bf066dd29f
+        ├── fluss_customer-1
+        ... # Remaining entries omitted for brevity
+```
+
 ## Integrate with Paimon
 ### Start the Lakehouse Tiering Service
 To integrate with [Apache Paimon](https://paimon.apache.org/), you need to start the `Lakehouse Tiering Service`. 
@@ -473,30 +525,30 @@ The result looks like:
 ```
 You can execute the real-time analytics query multiple times, and the results will vary with each run as new data is continuously written to Fluss in real-time.
 
-Finally, you can use the following command to view the files stored in Paimon:
+### Lake Storage
+
+Finally, you can use the following command to view the files stored in Paimon Hadoop warehouse:
 ```shell
-docker compose exec taskmanager tree /tmp/paimon/fluss.db
+docker compose exec namenode hdfs dfs -ls -R /fluss-lake/ | awk '{print $8}' | grep -v '^$' | tree --fromfile .
 ```
 
 **Sample Output:**
 ```shell
-/tmp/paimon/fluss.db
-└── datalake_enriched_orders
-    ├── bucket-0
-    │   ├── changelog-aef1810f-85b2-4eba-8eb8-9b136dec5bdb-0.orc
-    │   └── data-aef1810f-85b2-4eba-8eb8-9b136dec5bdb-1.orc
-    ├── manifest
-    │   ├── manifest-aaa007e1-81a2-40b3-ba1f-9df4528bc402-0
-    │   ├── manifest-aaa007e1-81a2-40b3-ba1f-9df4528bc402-1
-    │   ├── manifest-list-ceb77e1f-7d17-4160-9e1f-f334918c6e0d-0
-    │   ├── manifest-list-ceb77e1f-7d17-4160-9e1f-f334918c6e0d-1
-    │   └── manifest-list-ceb77e1f-7d17-4160-9e1f-f334918c6e0d-2
-    ├── schema
-    │   └── schema-0
-    └── snapshot
-        ├── EARLIEST
-        ├── LATEST
-        └── snapshot-1
+hdfs://namenode:8020/fluss-lake
+├── default.db
+└── fluss.db
+    └── datalake_enriched_orders
+        ├── bucket-0
+        │   └── data-02acf76d-c4cc-4bc1-9292-e64a77dfcc72-0.parquet
+        ├── manifest
+        │   ├── manifest-df5b6833-7e92-4ec9-a196-51d6fd60b1d1-0
+        │   ├── manifest-list-b683c5a2-4072-4c7a-8586-2c853de8d964-0
+        │   └── manifest-list-b683c5a2-4072-4c7a-8586-2c853de8d964-1
+        ├── schema
+        │   └── schema-0
+        └── snapshot
+            ├── LATEST
+            └── snapshot-1
 ```
 The files adhere to Paimon's standard format, enabling seamless querying with other engines such as [StarRocks](https://docs.starrocks.io/docs/data_source/catalog/paimon_catalog/).