From cde36287406bb9f72e0d64ff5689a9b314e431f7 Mon Sep 17 00:00:00 2001
From: Bikalpa Dhakal <bikalpa.dhakal@canonical.com>
Date: Wed, 24 Jan 2024 16:01:53 +0545
Subject: [PATCH 01/33] Integrate Apache Iceberg jars with Rock Image

---
 rockcraft.yaml | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/rockcraft.yaml b/rockcraft.yaml
index 579f70d4..5c07a890 100644
--- a/rockcraft.yaml
+++ b/rockcraft.yaml
@@ -93,6 +93,8 @@ parts:
     overlay-script: |
       AWS_JAVA_SDK_BUNDLE_VERSION='1.12.540'
       HADOOP_AWS_VERSION='3.3.6'
+      ICEBERG_SPARK_RUNTIME_VERSION='3.4_2.12'
+      ICEBERG_VERSION='1.4.3'
       mkdir -p $CRAFT_PART_INSTALL/opt/spark/jars
       cd $CRAFT_PART_INSTALL/opt/spark/jars
       wget -q "https://repo1.maven.org/maven2/com/amazonaws/aws-java-sdk-bundle/${AWS_JAVA_SDK_BUNDLE_VERSION}/aws-java-sdk-bundle-${AWS_JAVA_SDK_BUNDLE_VERSION}.jar"
@@ -119,6 +121,14 @@ parts:
           echo "DOWNLOAD ERROR: spark-metrics-assembly-3.4-1.0.0.jar could not be downloaded properly! Exiting...." >&2
           exit 1
       fi
+      wget -q "https://repo1.maven.org/maven2/org/apache/iceberg/iceberg-spark-runtime-${ICEBERG_SPARK_RUNTIME_VERSION}/${ICEBERG_VERSION}/iceberg-spark-runtime-${ICEBERG_SPARK_RUNTIME_VERSION}-${ICEBERG_VERSION}.jar"
+      wget -q "https://repo1.maven.org/maven2/org/apache/iceberg/iceberg-spark-runtime-${ICEBERG_SPARK_RUNTIME_VERSION}/${ICEBERG_VERSION}/iceberg-spark-runtime-${ICEBERG_SPARK_RUNTIME_VERSION}-${ICEBERG_VERSION}.jar.sha1"
+      echo "`cat iceberg-spark-runtime-${ICEBERG_SPARK_RUNTIME_VERSION}-${ICEBERG_VERSION}.jar.sha1`  iceberg-spark-runtime-${ICEBERG_SPARK_RUNTIME_VERSION}-${ICEBERG_VERSION}.jar" | sha1sum --check
+      if  [[ $? -ne 0 ]]
+        then
+          echo "DOWNLOAD ERROR: iceberg-spark-runtime-${ICEBERG_SPARK_RUNTIME_VERSION}-${ICEBERG_VERSION}.jar could not be downloaded properly! Exiting...." >&2
+          exit 1
+      fi
     stage:
       - opt/spark/jars
 

From 3136d86789067905d79b63b5051819d56a9c8db4 Mon Sep 17 00:00:00 2001
From: Bikalpa Dhakal <bikalpa.dhakal@canonical.com>
Date: Fri, 26 Jan 2024 14:26:43 +0545
Subject: [PATCH 02/33] Hardcode SHA sums for the downloaded jars.

---
 rockcraft.yaml | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/rockcraft.yaml b/rockcraft.yaml
index 5c07a890..6e39ea47 100644
--- a/rockcraft.yaml
+++ b/rockcraft.yaml
@@ -95,19 +95,21 @@ parts:
       HADOOP_AWS_VERSION='3.3.6'
       ICEBERG_SPARK_RUNTIME_VERSION='3.4_2.12'
       ICEBERG_VERSION='1.4.3'
+      SHA1SUM_AWS_JAVA_SDK_BUNDLE_JAR='a351ebc4f81d20e0349b3c0f85f34f443a37ce9d'
+      SHA1SUM_HADOOP_AWS_JAR='d5e162564701848b0921b80aedef9e64435333cc'
+      SHA512SUM_SPARK_METRICS_ASSEMBLY_JAR='fc52ba79af46e008b1463da4c0852564f2bfce21668468b683550df1f1ff3e4f149641bbce1cffb24510569c1a441bb47f73dd2b0cff87073631c391dc248211'
+      SHA1SUM_ICEBERG_JAR='48d553e4e5496f731b9e0e6adb5bc0fd040cb0df'
       mkdir -p $CRAFT_PART_INSTALL/opt/spark/jars
       cd $CRAFT_PART_INSTALL/opt/spark/jars
       wget -q "https://repo1.maven.org/maven2/com/amazonaws/aws-java-sdk-bundle/${AWS_JAVA_SDK_BUNDLE_VERSION}/aws-java-sdk-bundle-${AWS_JAVA_SDK_BUNDLE_VERSION}.jar"
-      wget -q "https://repo1.maven.org/maven2/com/amazonaws/aws-java-sdk-bundle/${AWS_JAVA_SDK_BUNDLE_VERSION}/aws-java-sdk-bundle-${AWS_JAVA_SDK_BUNDLE_VERSION}.jar.sha1"
-      echo "`cat aws-java-sdk-bundle-${AWS_JAVA_SDK_BUNDLE_VERSION}.jar.sha1`  aws-java-sdk-bundle-${AWS_JAVA_SDK_BUNDLE_VERSION}.jar" | sha1sum --check
+      echo "${SHA1SUM_AWS_JAVA_SDK_BUNDLE_JAR} aws-java-sdk-bundle-${AWS_JAVA_SDK_BUNDLE_VERSION}.jar" | sha1sum --check
       if  [[ $? -ne 0 ]]
         then
           echo "DOWNLOAD ERROR: aws-java-sdk-bundle-${AWS_JAVA_SDK_BUNDLE_VERSION}.jar could not be downloaded properly! Exiting...." >&2
           exit 1
       fi
       wget -q "https://repo1.maven.org/maven2/org/apache/hadoop/hadoop-aws/${HADOOP_AWS_VERSION}/hadoop-aws-${HADOOP_AWS_VERSION}.jar"
-      wget -q "https://repo1.maven.org/maven2/org/apache/hadoop/hadoop-aws/${HADOOP_AWS_VERSION}/hadoop-aws-${HADOOP_AWS_VERSION}.jar.sha1"
-      echo "`cat hadoop-aws-${HADOOP_AWS_VERSION}.jar.sha1`  hadoop-aws-${HADOOP_AWS_VERSION}.jar" | sha1sum --check
+      echo "${SHA1SUM_HADOOP_AWS_JAR}  hadoop-aws-${HADOOP_AWS_VERSION}.jar" | sha1sum --check
       if  [[ $? -ne 0 ]]
         then
           echo "DOWNLOAD ERROR: hadoop-aws-${HADOOP_AWS_VERSION}.jar could not be downloaded properly! Exiting...." >&2
@@ -115,15 +117,14 @@ parts:
       fi
       wget -q "https://github.com/canonical/central-uploader/releases/download/spark-metrics-assembly-3.4-1.0.0/spark-metrics-assembly-3.4-1.0.0.jar"
       wget -q "https://github.com/canonical/central-uploader/releases/download/spark-metrics-assembly-3.4-1.0.0/spark-metrics-assembly-3.4-1.0.0.jar.sha512"
-      echo "`cat spark-metrics-assembly-3.4-1.0.0.jar.sha512`" | sha512sum --check
+      echo "${SHA512SUM_SPARK_METRICS_ASSEMBLY_JAR} spark-metrics-assembly-3.4-1.0.0.jar" | sha512sum --check
       if  [[ $? -ne 0 ]]
         then
           echo "DOWNLOAD ERROR: spark-metrics-assembly-3.4-1.0.0.jar could not be downloaded properly! Exiting...." >&2
           exit 1
       fi
       wget -q "https://repo1.maven.org/maven2/org/apache/iceberg/iceberg-spark-runtime-${ICEBERG_SPARK_RUNTIME_VERSION}/${ICEBERG_VERSION}/iceberg-spark-runtime-${ICEBERG_SPARK_RUNTIME_VERSION}-${ICEBERG_VERSION}.jar"
-      wget -q "https://repo1.maven.org/maven2/org/apache/iceberg/iceberg-spark-runtime-${ICEBERG_SPARK_RUNTIME_VERSION}/${ICEBERG_VERSION}/iceberg-spark-runtime-${ICEBERG_SPARK_RUNTIME_VERSION}-${ICEBERG_VERSION}.jar.sha1"
-      echo "`cat iceberg-spark-runtime-${ICEBERG_SPARK_RUNTIME_VERSION}-${ICEBERG_VERSION}.jar.sha1`  iceberg-spark-runtime-${ICEBERG_SPARK_RUNTIME_VERSION}-${ICEBERG_VERSION}.jar" | sha1sum --check
+      echo "${SHA1SUM_ICEBERG_JAR} iceberg-spark-runtime-${ICEBERG_SPARK_RUNTIME_VERSION}-${ICEBERG_VERSION}.jar" | sha1sum --check
       if  [[ $? -ne 0 ]]
         then
           echo "DOWNLOAD ERROR: iceberg-spark-runtime-${ICEBERG_SPARK_RUNTIME_VERSION}-${ICEBERG_VERSION}.jar could not be downloaded properly! Exiting...." >&2

From cc7e95fb51b91fad1ce84088f72c893c2f826886 Mon Sep 17 00:00:00 2001
From: Bikalpa Dhakal <bikalpa.dhakal@canonical.com>
Date: Mon, 29 Jan 2024 10:53:33 +0545
Subject: [PATCH 03/33] Ignore build artifacts from VCS

---
 .gitignore | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/.gitignore b/.gitignore
index 723ef36f..0b26130a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1 +1,4 @@
-.idea
\ No newline at end of file
+.idea
+*.rock
+*.tar
+.make_cache
\ No newline at end of file

From cc552b8f6a1caa6afb5c16e3650ef1fbbd21c041 Mon Sep 17 00:00:00 2001
From: Bikalpa Dhakal <bikalpa.dhakal@canonical.com>
Date: Mon, 29 Jan 2024 10:55:11 +0545
Subject: [PATCH 04/33] Add integration tests for Apache Iceberg integration

---
 CONTRIBUTING.md                             |  1 +
 Makefile                                    | 12 ++-
 tests/integration/config-microk8s.sh        |  1 +
 tests/integration/integration-tests.sh      | 85 ++++++++++++++++++++-
 tests/integration/resources/test-iceberg.py | 39 ++++++++++
 tests/integration/setup-aws-cli.sh          | 13 ++++
 6 files changed, 147 insertions(+), 4 deletions(-)
 create mode 100644 tests/integration/resources/test-iceberg.py
 create mode 100644 tests/integration/setup-aws-cli.sh

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index eba0a602..c27e3b68 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -13,6 +13,7 @@ cd charmed-spark-rock
 sudo snap install rockcraft --edge
 sudo snap install docker
 sudo snap install lxd
+sudo snap install yq
 sudo snap install skopeo --edge --devmode
 ```
 
diff --git a/Makefile b/Makefile
index 14a7fa9c..042a215c 100644
--- a/Makefile
+++ b/Makefile
@@ -24,6 +24,7 @@ _MAKE_DIR := .make_cache
 $(shell mkdir -p $(_MAKE_DIR))
 
 K8S_TAG := $(_MAKE_DIR)/.k8s_tag
+AWS_TAG := $(_MAKE_DIR)/.aws_tag
 
 IMAGE_NAME := $(shell yq .name rockcraft.yaml)
 VERSION := $(shell yq .version rockcraft.yaml)
@@ -70,7 +71,7 @@ $(_TMP_OCI_TAG): $(_ROCK_OCI)
 	touch $(_TMP_OCI_TAG)
 
 $(CHARMED_OCI_TAG): $(_TMP_OCI_TAG)
-	docker build - -t "$(CHARMED_OCI_FULL_NAME):$(TAG)" --build-arg BASE_IMAGE="$(_TMP_OCI_NAME):$(TAG)" < Dockerfile
+	docker build -t "$(CHARMED_OCI_FULL_NAME):$(TAG)" --build-arg BASE_IMAGE="$(_TMP_OCI_NAME):$(TAG)" -f Dockerfile .
 	if [ ! -d "$(_MAKE_DIR)/$(CHARMED_OCI_FULL_NAME)" ]; then mkdir -p "$(_MAKE_DIR)/$(CHARMED_OCI_FULL_NAME)"; fi
 	touch $(CHARMED_OCI_TAG)
 
@@ -80,10 +81,15 @@ $(K8S_TAG):
 	sg microk8s ./tests/integration/config-microk8s.sh
 	@touch $(K8S_TAG)
 
+$(AWS_TAG): $(K8S_TAG)
+	@echo "=== Setting up and configure AWS CLI ==="
+	/bin/bash ./tests/integration/setup-aws-cli.sh
+	@touch $(AWS_TAG)
+
 microk8s: $(K8S_TAG)
 
 $(_MAKE_DIR)/%/$(TAG).tar: $(_MAKE_DIR)/%/$(TAG).tag
-	docker save $*:$(TAG) > $(_MAKE_DIR)/$*/$(TAG).tar
+	docker save $*:$(TAG) -o $(_MAKE_DIR)/$*/$(TAG).tar
 
 $(BASE_NAME): $(_MAKE_DIR)/$(CHARMED_OCI_FULL_NAME)/$(TAG).tar
 	@echo "=== Creating $(BASE_NAME) OCI archive ==="
@@ -106,7 +112,7 @@ import: $(K8S_TAG) build
 	microk8s ctr images import --base-name $(CHARMED_OCI_FULL_NAME):$(TAG) $(BASE_NAME)
 endif
 
-tests:
+tests: $(K8S_TAG) $(AWS_TAG)
 	@echo "=== Running Integration Tests ==="
 	/bin/bash ./tests/integration/integration-tests.sh
 
diff --git a/tests/integration/config-microk8s.sh b/tests/integration/config-microk8s.sh
index 82c6ebf7..9461fb82 100755
--- a/tests/integration/config-microk8s.sh
+++ b/tests/integration/config-microk8s.sh
@@ -2,3 +2,4 @@ microk8s status --wait-ready
 microk8s config | tee ~/.kube/config
 microk8s.enable dns
 microk8s.enable rbac
+microk8s.enable minio
\ No newline at end of file
diff --git a/tests/integration/integration-tests.sh b/tests/integration/integration-tests.sh
index a9267613..f2b8d9e4 100755
--- a/tests/integration/integration-tests.sh
+++ b/tests/integration/integration-tests.sh
@@ -135,7 +135,7 @@ teardown_test_pod() {
 run_example_job_in_pod() {
   SPARK_EXAMPLES_JAR_NAME="spark-examples_2.12-$(get_spark_version).jar"
 
-  PREVIOUS_JOB=$(kubectl get pods | grep driver | tail -n 1 | cut -d' ' -f1)
+  PREVIOUS_JOB=$(kubectl get pods -n ${NAMESPACE}| grep driver | tail -n 1 | cut -d' ' -f1)
   NAMESPACE=$1
   USERNAME=$2
 
@@ -166,6 +166,82 @@ run_example_job_in_pod() {
   validate_pi_value $pi
 }
 
+get_s3_access_key(){
+  kubectl get secret -n minio-operator microk8s-user-1 -o jsonpath='{.data.CONSOLE_ACCESS_KEY}' | base64 -d
+}
+
+get_s3_secret_key(){
+  kubectl get secret -n minio-operator microk8s-user-1 -o jsonpath='{.data.CONSOLE_SECRET_KEY}' | base64 -d
+}
+
+get_s3_endpoint(){
+  kubectl get service minio -n minio-operator -o jsonpath='{.spec.clusterIP}'
+}
+
+create_s3_bucket(){
+  S3_ENDPOINT=$(get_s3_endpoint)
+  BUCKET_NAME=$1
+  aws --endpoint-url "http://$S3_ENDPOINT" s3api create-bucket --bucket "$BUCKET_NAME"
+  echo "Created S3 bucket ${BUCKET_NAME}"
+}
+
+copy_file_to_s3_bucket(){
+  BUCKET_NAME=$1
+  FILE_PATH=$2
+  BASE_NAME=$(basename "$FILE_PATH")
+  aws s3 cp $FILE_PATH s3://"$BUCKET_NAME"/"$BASE_NAME"
+  echo "Copied file ${FILE_PATH} to S3 bucket ${BUCKET_NAME}"
+}
+
+run_iceberg_example_in_pod(){
+  create_s3_bucket spark
+  copy_file_to_s3_bucket spark ./tests/integration/resources/test-iceberg.py
+
+  NAMESPACE="tests"
+  USERNAME="spark"
+  NUM_ROWS_TO_INSERT="4"
+  PREVIOUS_DRIVER_PODS_COUNT=$(kubectl get pods -n ${NAMESPACE} | grep driver | wc -l)
+
+  kubectl exec testpod -- env UU="$USERNAME" NN="$NAMESPACE" IM="$(spark_image)" \
+      /bin/bash -c 'spark-client.spark-submit' \
+      --username $UU --namespace $NN \
+      --conf spark.kubernetes.driver.request.cores=100m \
+      --conf spark.kubernetes.executor.request.cores=100m \
+      --conf spark.kubernetes.container.image=$IM \
+      --conf spark.hadoop.fs.s3a.aws.credentials.provider=org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider \
+      --conf spark.hadoop.fs.s3a.connection.ssl.enabled=false \
+      --conf spark.hadoop.fs.s3a.path.style.access=true \
+      --conf spark.hadoop.fs.s3a.access.key=$(get_s3_access_key) \
+      --conf spark.hadoop.fs.s3a.secret.key=$(get_s3_secret_key) \
+      --conf spark.jars.ivy=/tmp \
+      --conf spark.sql.extensions=org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions \
+      --conf spark.sql.catalog.spark_catalog=org.apache.iceberg.spark.SparkSessionCatalog \
+      --conf spark.sql.catalog.spark_catalog.type=hive \
+      --conf spark.sql.catalog.local=org.apache.iceberg.spark.SparkCatalog \
+      --conf spark.sql.catalog.local.type=hadoop \
+      --conf spark.sql.catalog.local.warehouse=s3a://spark/warehouse \
+      --conf spark.sql.defaultCatalog=local   
+      s3a://spark/test-iceberg.py -n ${NUM_ROWS_TO_INSERT}
+  
+  DRIVER_PODS_COUNT=$(kubectl get pods -n ${NAMESPACE} | grep driver | wc -l)
+
+  if [[ "${PREVIOUS_DRIVER_PODS_COUNT}" == "${DRIVER_PODS_COUNT}" ]]
+  then
+    echo "ERROR: Sample job has not run!"
+    exit 1
+  fi
+
+  DRIVER_POD_ID=$(kubectl get pods -n ${NAMESPACE} | grep test-iceberg-.*-driver | tail -n 1 | cut -d' ' -f1)
+  OUTPUT_LOG_LINE=$(kubectl logs ${DRIVER_POD_ID} -n ${NAMESPACE} | grep 'Number of rows inserted:' )
+  NUM_ROWS_INSERTED=$(echo $OUTPUT_LOG_LINE | rev | cut -d' ' -f1 | rev)
+
+  if [ "${NUM_ROWS_INSERTED}" != "${NUM_ROWS_TO_INSERT}" ]; then
+      echo "ERROR: ${NUM_ROWS_TO_INSERT} were supposed to be inserted. Found ${NUM_ROWS_INSERTED} rows. Aborting with exit code 1."
+      exit 1
+  fi
+
+}
+
 run_example_job_in_pod_with_pod_templates() {
   SPARK_EXAMPLES_JAR_NAME="spark-examples_2.12-$(get_spark_version).jar"
 
@@ -425,6 +501,13 @@ echo -e "RUN EXAMPLE JOB WITH ERRORS"
 echo -e "########################################"
 
 (setup_user_admin_context && test_example_job_in_pod_with_errors && cleanup_user_success) || cleanup_user_failure_in_pod
+
+echo -e "##################################"
+echo -e "RUN EXAMPLE THAT USES ICEBERG LIBRARIES"
+echo -e "##################################"
+
+(setup_user_admin_context && test_iceberg_example_in_pod && cleanup_user_success) || cleanup_user_failure_in_pod
+
 echo -e "##################################"
 echo -e "TEARDOWN TEST POD"
 echo -e "##################################"
diff --git a/tests/integration/resources/test-iceberg.py b/tests/integration/resources/test-iceberg.py
new file mode 100644
index 00000000..d8b34aca
--- /dev/null
+++ b/tests/integration/resources/test-iceberg.py
@@ -0,0 +1,39 @@
+import argparse
+import random
+
+from pyspark.sql import SparkSession
+from pyspark.sql.types import LongType, StructType, StructField
+
+parser = argparse.ArgumentParser("TestIceberg")
+parser.add_argument("--num_rows", "-n", type=int)
+args = parser.parse_args()
+num_rows = args.num_rows
+
+spark = SparkSession\
+    .builder\
+    .appName("IcebergExample")\
+    .getOrCreate()
+
+
+schema = StructType([
+  StructField("row_id", LongType(), True),
+  StructField("row_val", LongType(), True)
+])
+
+# df = spark.createDataFrame([], schema)
+# df.writeTo("demo.foo.bar").create()
+
+# schema = spark.table("demo.nyc.taxis").schema
+data = []
+for idx in range(num_rows):
+    row = (idx + 1, random.randint(1, 100))
+    data.append(row)
+
+df = spark.createDataFrame(data, schema)
+# df.writeTo("demo.nyc.taxis").append()
+df.writeTo("demo.foo.bar").create()
+
+
+df = spark.table("demo.foo.bar")
+count = df.count()
+print(f"Number of rows inserted: {count}")
\ No newline at end of file
diff --git a/tests/integration/setup-aws-cli.sh b/tests/integration/setup-aws-cli.sh
new file mode 100644
index 00000000..c56cbda4
--- /dev/null
+++ b/tests/integration/setup-aws-cli.sh
@@ -0,0 +1,13 @@
+#!/bin/bash
+
+# Install AWS CLI
+sudo snap install aws-cli --classic
+
+# Get Access key and secret key from MinIO
+ACCESS_KEY=$(kubectl get secret -n minio-operator microk8s-user-1 -o jsonpath='{.data.CONSOLE_ACCESS_KEY}' | base64 -d)
+SECRET_KEY=$(kubectl get secret -n minio-operator microk8s-user-1 -o jsonpath='{.data.CONSOLE_SECRET_KEY}' | base64 -d)
+S3_BUCKET="spark"
+
+# Configure AWS CLI credentials
+aws configure set aws_access_key_id $ACCESS_KEY
+aws configure set aws_secret_access_key $SECRET_KEY

From c682abd684d6825243aa53f84be65bb0bbfe38dc Mon Sep 17 00:00:00 2001
From: Bikalpa Dhakal <bikalpa.dhakal@canonical.com>
Date: Mon, 29 Jan 2024 10:53:33 +0545
Subject: [PATCH 05/33] Ignore build artifacts from VCS

---
 .gitignore | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/.gitignore b/.gitignore
index 723ef36f..0b26130a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1 +1,4 @@
-.idea
\ No newline at end of file
+.idea
+*.rock
+*.tar
+.make_cache
\ No newline at end of file

From f735866f13cb3991f0b7e258636a8efb95585156 Mon Sep 17 00:00:00 2001
From: Bikalpa Dhakal <bikalpa.dhakal@canonical.com>
Date: Mon, 29 Jan 2024 10:55:11 +0545
Subject: [PATCH 06/33] Add integration tests for Apache Iceberg integration

---
 CONTRIBUTING.md                             |  1 +
 Makefile                                    | 12 ++-
 tests/integration/config-microk8s.sh        |  1 +
 tests/integration/integration-tests.sh      | 85 ++++++++++++++++++++-
 tests/integration/resources/test-iceberg.py | 39 ++++++++++
 tests/integration/setup-aws-cli.sh          | 13 ++++
 6 files changed, 147 insertions(+), 4 deletions(-)
 create mode 100644 tests/integration/resources/test-iceberg.py
 create mode 100644 tests/integration/setup-aws-cli.sh

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index eba0a602..c27e3b68 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -13,6 +13,7 @@ cd charmed-spark-rock
 sudo snap install rockcraft --edge
 sudo snap install docker
 sudo snap install lxd
+sudo snap install yq
 sudo snap install skopeo --edge --devmode
 ```
 
diff --git a/Makefile b/Makefile
index 14a7fa9c..042a215c 100644
--- a/Makefile
+++ b/Makefile
@@ -24,6 +24,7 @@ _MAKE_DIR := .make_cache
 $(shell mkdir -p $(_MAKE_DIR))
 
 K8S_TAG := $(_MAKE_DIR)/.k8s_tag
+AWS_TAG := $(_MAKE_DIR)/.aws_tag
 
 IMAGE_NAME := $(shell yq .name rockcraft.yaml)
 VERSION := $(shell yq .version rockcraft.yaml)
@@ -70,7 +71,7 @@ $(_TMP_OCI_TAG): $(_ROCK_OCI)
 	touch $(_TMP_OCI_TAG)
 
 $(CHARMED_OCI_TAG): $(_TMP_OCI_TAG)
-	docker build - -t "$(CHARMED_OCI_FULL_NAME):$(TAG)" --build-arg BASE_IMAGE="$(_TMP_OCI_NAME):$(TAG)" < Dockerfile
+	docker build -t "$(CHARMED_OCI_FULL_NAME):$(TAG)" --build-arg BASE_IMAGE="$(_TMP_OCI_NAME):$(TAG)" -f Dockerfile .
 	if [ ! -d "$(_MAKE_DIR)/$(CHARMED_OCI_FULL_NAME)" ]; then mkdir -p "$(_MAKE_DIR)/$(CHARMED_OCI_FULL_NAME)"; fi
 	touch $(CHARMED_OCI_TAG)
 
@@ -80,10 +81,15 @@ $(K8S_TAG):
 	sg microk8s ./tests/integration/config-microk8s.sh
 	@touch $(K8S_TAG)
 
+$(AWS_TAG): $(K8S_TAG)
+	@echo "=== Setting up and configure AWS CLI ==="
+	/bin/bash ./tests/integration/setup-aws-cli.sh
+	@touch $(AWS_TAG)
+
 microk8s: $(K8S_TAG)
 
 $(_MAKE_DIR)/%/$(TAG).tar: $(_MAKE_DIR)/%/$(TAG).tag
-	docker save $*:$(TAG) > $(_MAKE_DIR)/$*/$(TAG).tar
+	docker save $*:$(TAG) -o $(_MAKE_DIR)/$*/$(TAG).tar
 
 $(BASE_NAME): $(_MAKE_DIR)/$(CHARMED_OCI_FULL_NAME)/$(TAG).tar
 	@echo "=== Creating $(BASE_NAME) OCI archive ==="
@@ -106,7 +112,7 @@ import: $(K8S_TAG) build
 	microk8s ctr images import --base-name $(CHARMED_OCI_FULL_NAME):$(TAG) $(BASE_NAME)
 endif
 
-tests:
+tests: $(K8S_TAG) $(AWS_TAG)
 	@echo "=== Running Integration Tests ==="
 	/bin/bash ./tests/integration/integration-tests.sh
 
diff --git a/tests/integration/config-microk8s.sh b/tests/integration/config-microk8s.sh
index 82c6ebf7..9461fb82 100755
--- a/tests/integration/config-microk8s.sh
+++ b/tests/integration/config-microk8s.sh
@@ -2,3 +2,4 @@ microk8s status --wait-ready
 microk8s config | tee ~/.kube/config
 microk8s.enable dns
 microk8s.enable rbac
+microk8s.enable minio
\ No newline at end of file
diff --git a/tests/integration/integration-tests.sh b/tests/integration/integration-tests.sh
index a9267613..f2b8d9e4 100755
--- a/tests/integration/integration-tests.sh
+++ b/tests/integration/integration-tests.sh
@@ -135,7 +135,7 @@ teardown_test_pod() {
 run_example_job_in_pod() {
   SPARK_EXAMPLES_JAR_NAME="spark-examples_2.12-$(get_spark_version).jar"
 
-  PREVIOUS_JOB=$(kubectl get pods | grep driver | tail -n 1 | cut -d' ' -f1)
+  PREVIOUS_JOB=$(kubectl get pods -n ${NAMESPACE}| grep driver | tail -n 1 | cut -d' ' -f1)
   NAMESPACE=$1
   USERNAME=$2
 
@@ -166,6 +166,82 @@ run_example_job_in_pod() {
   validate_pi_value $pi
 }
 
+get_s3_access_key(){
+  kubectl get secret -n minio-operator microk8s-user-1 -o jsonpath='{.data.CONSOLE_ACCESS_KEY}' | base64 -d
+}
+
+get_s3_secret_key(){
+  kubectl get secret -n minio-operator microk8s-user-1 -o jsonpath='{.data.CONSOLE_SECRET_KEY}' | base64 -d
+}
+
+get_s3_endpoint(){
+  kubectl get service minio -n minio-operator -o jsonpath='{.spec.clusterIP}'
+}
+
+create_s3_bucket(){
+  S3_ENDPOINT=$(get_s3_endpoint)
+  BUCKET_NAME=$1
+  aws --endpoint-url "http://$S3_ENDPOINT" s3api create-bucket --bucket "$BUCKET_NAME"
+  echo "Created S3 bucket ${BUCKET_NAME}"
+}
+
+copy_file_to_s3_bucket(){
+  BUCKET_NAME=$1
+  FILE_PATH=$2
+  BASE_NAME=$(basename "$FILE_PATH")
+  aws s3 cp $FILE_PATH s3://"$BUCKET_NAME"/"$BASE_NAME"
+  echo "Copied file ${FILE_PATH} to S3 bucket ${BUCKET_NAME}"
+}
+
+run_iceberg_example_in_pod(){
+  create_s3_bucket spark
+  copy_file_to_s3_bucket spark ./tests/integration/resources/test-iceberg.py
+
+  NAMESPACE="tests"
+  USERNAME="spark"
+  NUM_ROWS_TO_INSERT="4"
+  PREVIOUS_DRIVER_PODS_COUNT=$(kubectl get pods -n ${NAMESPACE} | grep driver | wc -l)
+
+  kubectl exec testpod -- env UU="$USERNAME" NN="$NAMESPACE" IM="$(spark_image)" \
+      /bin/bash -c 'spark-client.spark-submit' \
+      --username $UU --namespace $NN \
+      --conf spark.kubernetes.driver.request.cores=100m \
+      --conf spark.kubernetes.executor.request.cores=100m \
+      --conf spark.kubernetes.container.image=$IM \
+      --conf spark.hadoop.fs.s3a.aws.credentials.provider=org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider \
+      --conf spark.hadoop.fs.s3a.connection.ssl.enabled=false \
+      --conf spark.hadoop.fs.s3a.path.style.access=true \
+      --conf spark.hadoop.fs.s3a.access.key=$(get_s3_access_key) \
+      --conf spark.hadoop.fs.s3a.secret.key=$(get_s3_secret_key) \
+      --conf spark.jars.ivy=/tmp \
+      --conf spark.sql.extensions=org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions \
+      --conf spark.sql.catalog.spark_catalog=org.apache.iceberg.spark.SparkSessionCatalog \
+      --conf spark.sql.catalog.spark_catalog.type=hive \
+      --conf spark.sql.catalog.local=org.apache.iceberg.spark.SparkCatalog \
+      --conf spark.sql.catalog.local.type=hadoop \
+      --conf spark.sql.catalog.local.warehouse=s3a://spark/warehouse \
+      --conf spark.sql.defaultCatalog=local   
+      s3a://spark/test-iceberg.py -n ${NUM_ROWS_TO_INSERT}
+  
+  DRIVER_PODS_COUNT=$(kubectl get pods -n ${NAMESPACE} | grep driver | wc -l)
+
+  if [[ "${PREVIOUS_DRIVER_PODS_COUNT}" == "${DRIVER_PODS_COUNT}" ]]
+  then
+    echo "ERROR: Sample job has not run!"
+    exit 1
+  fi
+
+  DRIVER_POD_ID=$(kubectl get pods -n ${NAMESPACE} | grep test-iceberg-.*-driver | tail -n 1 | cut -d' ' -f1)
+  OUTPUT_LOG_LINE=$(kubectl logs ${DRIVER_POD_ID} -n ${NAMESPACE} | grep 'Number of rows inserted:' )
+  NUM_ROWS_INSERTED=$(echo $OUTPUT_LOG_LINE | rev | cut -d' ' -f1 | rev)
+
+  if [ "${NUM_ROWS_INSERTED}" != "${NUM_ROWS_TO_INSERT}" ]; then
+      echo "ERROR: ${NUM_ROWS_TO_INSERT} were supposed to be inserted. Found ${NUM_ROWS_INSERTED} rows. Aborting with exit code 1."
+      exit 1
+  fi
+
+}
+
 run_example_job_in_pod_with_pod_templates() {
   SPARK_EXAMPLES_JAR_NAME="spark-examples_2.12-$(get_spark_version).jar"
 
@@ -425,6 +501,13 @@ echo -e "RUN EXAMPLE JOB WITH ERRORS"
 echo -e "########################################"
 
 (setup_user_admin_context && test_example_job_in_pod_with_errors && cleanup_user_success) || cleanup_user_failure_in_pod
+
+echo -e "##################################"
+echo -e "RUN EXAMPLE THAT USES ICEBERG LIBRARIES"
+echo -e "##################################"
+
+(setup_user_admin_context && test_iceberg_example_in_pod && cleanup_user_success) || cleanup_user_failure_in_pod
+
 echo -e "##################################"
 echo -e "TEARDOWN TEST POD"
 echo -e "##################################"
diff --git a/tests/integration/resources/test-iceberg.py b/tests/integration/resources/test-iceberg.py
new file mode 100644
index 00000000..d8b34aca
--- /dev/null
+++ b/tests/integration/resources/test-iceberg.py
@@ -0,0 +1,39 @@
+import argparse
+import random
+
+from pyspark.sql import SparkSession
+from pyspark.sql.types import LongType, StructType, StructField
+
+parser = argparse.ArgumentParser("TestIceberg")
+parser.add_argument("--num_rows", "-n", type=int)
+args = parser.parse_args()
+num_rows = args.num_rows
+
+spark = SparkSession\
+    .builder\
+    .appName("IcebergExample")\
+    .getOrCreate()
+
+
+schema = StructType([
+  StructField("row_id", LongType(), True),
+  StructField("row_val", LongType(), True)
+])
+
+# df = spark.createDataFrame([], schema)
+# df.writeTo("demo.foo.bar").create()
+
+# schema = spark.table("demo.nyc.taxis").schema
+data = []
+for idx in range(num_rows):
+    row = (idx + 1, random.randint(1, 100))
+    data.append(row)
+
+df = spark.createDataFrame(data, schema)
+# df.writeTo("demo.nyc.taxis").append()
+df.writeTo("demo.foo.bar").create()
+
+
+df = spark.table("demo.foo.bar")
+count = df.count()
+print(f"Number of rows inserted: {count}")
\ No newline at end of file
diff --git a/tests/integration/setup-aws-cli.sh b/tests/integration/setup-aws-cli.sh
new file mode 100644
index 00000000..c56cbda4
--- /dev/null
+++ b/tests/integration/setup-aws-cli.sh
@@ -0,0 +1,13 @@
+#!/bin/bash
+
+# Install AWS CLI
+sudo snap install aws-cli --classic
+
+# Get Access key and secret key from MinIO
+ACCESS_KEY=$(kubectl get secret -n minio-operator microk8s-user-1 -o jsonpath='{.data.CONSOLE_ACCESS_KEY}' | base64 -d)
+SECRET_KEY=$(kubectl get secret -n minio-operator microk8s-user-1 -o jsonpath='{.data.CONSOLE_SECRET_KEY}' | base64 -d)
+S3_BUCKET="spark"
+
+# Configure AWS CLI credentials
+aws configure set aws_access_key_id $ACCESS_KEY
+aws configure set aws_secret_access_key $SECRET_KEY

From d02d641e1e91c7678f83578dd5650eba19c9da11 Mon Sep 17 00:00:00 2001
From: Bikalpa Dhakal <bikalpa.dhakal@canonical.com>
Date: Mon, 29 Jan 2024 15:32:42 +0545
Subject: [PATCH 07/33] Fix tests not passing on CI

---
 Makefile                               |  2 +-
 tests/integration/integration-tests.sh | 63 +++++++++++++++++---------
 tests/integration/setup-aws-cli.sh     |  1 +
 3 files changed, 43 insertions(+), 23 deletions(-)

diff --git a/Makefile b/Makefile
index 042a215c..4474e3d1 100644
--- a/Makefile
+++ b/Makefile
@@ -84,7 +84,7 @@ $(K8S_TAG):
 $(AWS_TAG): $(K8S_TAG)
 	@echo "=== Setting up and configure AWS CLI ==="
 	/bin/bash ./tests/integration/setup-aws-cli.sh
-	@touch $(AWS_TAG)
+	touch $(AWS_TAG)
 
 microk8s: $(K8S_TAG)
 
diff --git a/tests/integration/integration-tests.sh b/tests/integration/integration-tests.sh
index f2b8d9e4..db30cf93 100755
--- a/tests/integration/integration-tests.sh
+++ b/tests/integration/integration-tests.sh
@@ -1,4 +1,5 @@
 #!/bin/bash
+set -x
 
 get_spark_version(){
   SPARK_VERSION=$(yq '(.version)' rockcraft.yaml)
@@ -185,15 +186,22 @@ create_s3_bucket(){
   echo "Created S3 bucket ${BUCKET_NAME}"
 }
 
+delete_s3_bucket(){
+  S3_ENDPOINT=$(get_s3_endpoint)
+  BUCKET_NAME=$1
+  aws --endpoint-url "http://$S3_ENDPOINT" s3 rb "s3://$BUCKET_NAME" --force
+  echo "Deleted S3 bucket ${BUCKET_NAME}"
+}
+
 copy_file_to_s3_bucket(){
   BUCKET_NAME=$1
   FILE_PATH=$2
   BASE_NAME=$(basename "$FILE_PATH")
-  aws s3 cp $FILE_PATH s3://"$BUCKET_NAME"/"$BASE_NAME"
+  aws --endpoint-url "http://$S3_ENDPOINT" s3 cp $FILE_PATH s3://"$BUCKET_NAME"/"$BASE_NAME"
   echo "Copied file ${FILE_PATH} to S3 bucket ${BUCKET_NAME}"
 }
 
-run_iceberg_example_in_pod(){
+test_iceberg_example_in_pod(){
   create_s3_bucket spark
   copy_file_to_s3_bucket spark ./tests/integration/resources/test-iceberg.py
 
@@ -202,27 +210,38 @@ run_iceberg_example_in_pod(){
   NUM_ROWS_TO_INSERT="4"
   PREVIOUS_DRIVER_PODS_COUNT=$(kubectl get pods -n ${NAMESPACE} | grep driver | wc -l)
 
-  kubectl exec testpod -- env UU="$USERNAME" NN="$NAMESPACE" IM="$(spark_image)" \
-      /bin/bash -c 'spark-client.spark-submit' \
-      --username $UU --namespace $NN \
-      --conf spark.kubernetes.driver.request.cores=100m \
-      --conf spark.kubernetes.executor.request.cores=100m \
-      --conf spark.kubernetes.container.image=$IM \
-      --conf spark.hadoop.fs.s3a.aws.credentials.provider=org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider \
-      --conf spark.hadoop.fs.s3a.connection.ssl.enabled=false \
-      --conf spark.hadoop.fs.s3a.path.style.access=true \
-      --conf spark.hadoop.fs.s3a.access.key=$(get_s3_access_key) \
-      --conf spark.hadoop.fs.s3a.secret.key=$(get_s3_secret_key) \
-      --conf spark.jars.ivy=/tmp \
-      --conf spark.sql.extensions=org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions \
-      --conf spark.sql.catalog.spark_catalog=org.apache.iceberg.spark.SparkSessionCatalog \
-      --conf spark.sql.catalog.spark_catalog.type=hive \
-      --conf spark.sql.catalog.local=org.apache.iceberg.spark.SparkCatalog \
-      --conf spark.sql.catalog.local.type=hadoop \
-      --conf spark.sql.catalog.local.warehouse=s3a://spark/warehouse \
-      --conf spark.sql.defaultCatalog=local   
-      s3a://spark/test-iceberg.py -n ${NUM_ROWS_TO_INSERT}
+  kubectl exec testpod -- \
+      env \
+        UU="$USERNAME" \
+        NN="$NAMESPACE" \
+        IM="$(spark_image)" \
+        NUM_ROWS="$NUM_ROWS_TO_INSERT" \
+        ACCESS_KEY="$(get_s3_access_key)" \
+        SECRET_KEY="$(get_s3_secret_key)" \
+        S3_ENDPOINT="$(get_s3_endpoint)" \
+      /bin/bash -c '\
+        spark-client.spark-submit \
+        --username $UU --namespace $NN \
+        --conf spark.kubernetes.driver.request.cores=100m \
+        --conf spark.kubernetes.executor.request.cores=100m \
+        --conf spark.kubernetes.container.image=$IM \
+        --conf spark.hadoop.fs.s3a.aws.credentials.provider=org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider \
+        --conf spark.hadoop.fs.s3a.connection.ssl.enabled=false \
+        --conf spark.hadoop.fs.s3a.path.style.access=true \
+        --conf spark.hadoop.fs.s3a.endpoint=$S3_ENDPOINT \
+        --conf spark.hadoop.fs.s3a.access.key=$ACCESS_KEY \
+        --conf spark.hadoop.fs.s3a.secret.key=$SECRET_KEY \
+        --conf spark.jars.ivy=/tmp \
+        --conf spark.sql.extensions=org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions \
+        --conf spark.sql.catalog.spark_catalog=org.apache.iceberg.spark.SparkSessionCatalog \
+        --conf spark.sql.catalog.spark_catalog.type=hive \
+        --conf spark.sql.catalog.local=org.apache.iceberg.spark.SparkCatalog \
+        --conf spark.sql.catalog.local.type=hadoop \
+        --conf spark.sql.catalog.local.warehouse=s3a://spark/warehouse \
+        --conf spark.sql.defaultCatalog=local \
+        s3a://spark/test-iceberg.py -n $NUM_ROWS'
   
+  delete_s3_bucket spark
   DRIVER_PODS_COUNT=$(kubectl get pods -n ${NAMESPACE} | grep driver | wc -l)
 
   if [[ "${PREVIOUS_DRIVER_PODS_COUNT}" == "${DRIVER_PODS_COUNT}" ]]
diff --git a/tests/integration/setup-aws-cli.sh b/tests/integration/setup-aws-cli.sh
index c56cbda4..efc24982 100644
--- a/tests/integration/setup-aws-cli.sh
+++ b/tests/integration/setup-aws-cli.sh
@@ -11,3 +11,4 @@ S3_BUCKET="spark"
 # Configure AWS CLI credentials
 aws configure set aws_access_key_id $ACCESS_KEY
 aws configure set aws_secret_access_key $SECRET_KEY
+echo "AWS CLI credentials set successfully"

From 2c18492aedf022968e4a65a7872b3fdc761f4d9a Mon Sep 17 00:00:00 2001
From: Bikalpa Dhakal <bikalpa.dhakal@canonical.com>
Date: Mon, 29 Jan 2024 15:39:28 +0545
Subject: [PATCH 08/33] Make script executable

---
 tests/integration/setup-aws-cli.sh | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 mode change 100644 => 100755 tests/integration/setup-aws-cli.sh

diff --git a/tests/integration/setup-aws-cli.sh b/tests/integration/setup-aws-cli.sh
old mode 100644
new mode 100755

From 6f0783b717875fe8b6a33db5e0a12f1a4f1497a2 Mon Sep 17 00:00:00 2001
From: Bikalpa Dhakal <bikalpa.dhakal@canonical.com>
Date: Mon, 29 Jan 2024 15:56:12 +0545
Subject: [PATCH 09/33] Revert a docker command in Makefile

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 4474e3d1..0a8bbced 100644
--- a/Makefile
+++ b/Makefile
@@ -89,7 +89,7 @@ $(AWS_TAG): $(K8S_TAG)
 microk8s: $(K8S_TAG)
 
 $(_MAKE_DIR)/%/$(TAG).tar: $(_MAKE_DIR)/%/$(TAG).tag
-	docker save $*:$(TAG) -o $(_MAKE_DIR)/$*/$(TAG).tar
+	docker save $*:$(TAG) > $(_MAKE_DIR)/$*/$(TAG).tar
 
 $(BASE_NAME): $(_MAKE_DIR)/$(CHARMED_OCI_FULL_NAME)/$(TAG).tar
 	@echo "=== Creating $(BASE_NAME) OCI archive ==="

From 0b085e27b0cb0c6c20c83771ec15bf728e4ec5b9 Mon Sep 17 00:00:00 2001
From: Bikalpa Dhakal <bikalpa.dhakal@canonical.com>
Date: Mon, 29 Jan 2024 16:10:47 +0545
Subject: [PATCH 10/33] Remove set -x option in integration tests.

---
 tests/integration/integration-tests.sh | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/integration/integration-tests.sh b/tests/integration/integration-tests.sh
index db30cf93..fc007bb0 100755
--- a/tests/integration/integration-tests.sh
+++ b/tests/integration/integration-tests.sh
@@ -1,5 +1,4 @@
 #!/bin/bash
-set -x
 
 get_spark_version(){
   SPARK_VERSION=$(yq '(.version)' rockcraft.yaml)

From fd05ccec4c39e71c391e9a809756b5eeed8cfd6a Mon Sep 17 00:00:00 2001
From: Bikalpa Dhakal <bikalpa.dhakal@canonical.com>
Date: Mon, 29 Jan 2024 16:23:50 +0545
Subject: [PATCH 11/33] Use -o option instead of redirect

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 0a8bbced..4474e3d1 100644
--- a/Makefile
+++ b/Makefile
@@ -89,7 +89,7 @@ $(AWS_TAG): $(K8S_TAG)
 microk8s: $(K8S_TAG)
 
 $(_MAKE_DIR)/%/$(TAG).tar: $(_MAKE_DIR)/%/$(TAG).tag
-	docker save $*:$(TAG) > $(_MAKE_DIR)/$*/$(TAG).tar
+	docker save $*:$(TAG) -o $(_MAKE_DIR)/$*/$(TAG).tar
 
 $(BASE_NAME): $(_MAKE_DIR)/$(CHARMED_OCI_FULL_NAME)/$(TAG).tar
 	@echo "=== Creating $(BASE_NAME) OCI archive ==="

From 063c4e519015ff6ddb898d521c34411de06b346d Mon Sep 17 00:00:00 2001
From: Bikalpa Dhakal <bikalpa.dhakal@canonical.com>
Date: Mon, 29 Jan 2024 17:04:10 +0545
Subject: [PATCH 12/33] Change artifact permissions.

---
 .github/workflows/build.yaml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml
index 82b26dc8..67762db2 100644
--- a/.github/workflows/build.yaml
+++ b/.github/workflows/build.yaml
@@ -44,6 +44,9 @@ jobs:
           ARTIFACT=$(make help | grep 'Artifact: ')
           echo "name=${ARTIFACT#'Artifact: '}" >> $GITHUB_OUTPUT
 
+      - name: Change artifact permissions
+        run: sudo chmod a+x ${{ steps.artifact.outputs.name }}
+
       - name: Upload locally built artifact
         uses: actions/upload-artifact@v3
         with:

From f22dabb3b8e890a56ff89ee6545364daa4f6b156 Mon Sep 17 00:00:00 2001
From: Bikalpa Dhakal <bikalpa.dhakal@canonical.com>
Date: Mon, 29 Jan 2024 17:11:58 +0545
Subject: [PATCH 13/33] Give read permission to all.

---
 .github/workflows/build.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml
index 67762db2..c2c5f564 100644
--- a/.github/workflows/build.yaml
+++ b/.github/workflows/build.yaml
@@ -45,7 +45,7 @@ jobs:
           echo "name=${ARTIFACT#'Artifact: '}" >> $GITHUB_OUTPUT
 
       - name: Change artifact permissions
-        run: sudo chmod a+x ${{ steps.artifact.outputs.name }}
+        run: sudo chmod r+x ${{ steps.artifact.outputs.name }}
 
       - name: Upload locally built artifact
         uses: actions/upload-artifact@v3

From c032913060e71afe174ee5a3500642a4dca5bf83 Mon Sep 17 00:00:00 2001
From: Bikalpa Dhakal <bikalpa.dhakal@canonical.com>
Date: Mon, 29 Jan 2024 17:22:28 +0545
Subject: [PATCH 14/33] Fix typo in build.yaml

---
 .github/workflows/build.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml
index c2c5f564..adbd9fbd 100644
--- a/.github/workflows/build.yaml
+++ b/.github/workflows/build.yaml
@@ -45,7 +45,7 @@ jobs:
           echo "name=${ARTIFACT#'Artifact: '}" >> $GITHUB_OUTPUT
 
       - name: Change artifact permissions
-        run: sudo chmod r+x ${{ steps.artifact.outputs.name }}
+        run: sudo chmod a+r ${{ steps.artifact.outputs.name }}
 
       - name: Upload locally built artifact
         uses: actions/upload-artifact@v3

From 07540ad6fa3570a4e0bd1a4516ecef17783a59eb Mon Sep 17 00:00:00 2001
From: Bikalpa Dhakal <bikalpa.dhakal@canonical.com>
Date: Mon, 29 Jan 2024 18:48:12 +0545
Subject: [PATCH 15/33] Add debug session step

---
 .github/workflows/integration.yaml | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/.github/workflows/integration.yaml b/.github/workflows/integration.yaml
index 1007cc36..61ff49e5 100644
--- a/.github/workflows/integration.yaml
+++ b/.github/workflows/integration.yaml
@@ -39,6 +39,10 @@ jobs:
           name: charmed-spark
           path: charmed-spark
 
+      - name: Setup upterm session
+        uses: lhotari/action-upterm@v1
+
+
       - name: Run tests
         run: |
           # Unpack Artifact

From 3c2511585dfa84b67b564e45812688a93b71db4d Mon Sep 17 00:00:00 2001
From: Bikalpa Dhakal <bikalpa.dhakal@canonical.com>
Date: Mon, 29 Jan 2024 19:11:33 +0545
Subject: [PATCH 16/33] Move the order of jobs in CI

---
 .github/workflows/integration.yaml | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/integration.yaml b/.github/workflows/integration.yaml
index 61ff49e5..e8ff4c00 100644
--- a/.github/workflows/integration.yaml
+++ b/.github/workflows/integration.yaml
@@ -39,10 +39,6 @@ jobs:
           name: charmed-spark
           path: charmed-spark
 
-      - name: Setup upterm session
-        uses: lhotari/action-upterm@v1
-
-
       - name: Run tests
         run: |
           # Unpack Artifact
@@ -58,3 +54,6 @@ jobs:
             -o $(find .make_cache -name "*.tag")
           
           sg microk8s -c "make tests"
+
+      - name: Setup upterm session
+        uses: lhotari/action-upterm@v1
\ No newline at end of file

From 82171695c509590e2a43c85dd3e362a38a9d96fa Mon Sep 17 00:00:00 2001
From: Bikalpa Dhakal <bikalpa.dhakal@canonical.com>
Date: Mon, 29 Jan 2024 19:34:38 +0545
Subject: [PATCH 17/33] Revert "Move the order of jobs in CI"

This reverts commit 3c2511585dfa84b67b564e45812688a93b71db4d.
---
 .github/workflows/integration.yaml | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/integration.yaml b/.github/workflows/integration.yaml
index e8ff4c00..61ff49e5 100644
--- a/.github/workflows/integration.yaml
+++ b/.github/workflows/integration.yaml
@@ -39,6 +39,10 @@ jobs:
           name: charmed-spark
           path: charmed-spark
 
+      - name: Setup upterm session
+        uses: lhotari/action-upterm@v1
+
+
       - name: Run tests
         run: |
           # Unpack Artifact
@@ -54,6 +58,3 @@ jobs:
             -o $(find .make_cache -name "*.tag")
           
           sg microk8s -c "make tests"
-
-      - name: Setup upterm session
-        uses: lhotari/action-upterm@v1
\ No newline at end of file

From 149cc95aa834ebe44bf3b0039ce67c825b25c4ac Mon Sep 17 00:00:00 2001
From: Bikalpa Dhakal <bikalpa.dhakal@canonical.com>
Date: Mon, 29 Jan 2024 20:01:22 +0545
Subject: [PATCH 18/33] Temporarily disable other tests

---
 tests/integration/integration-tests.sh | 52 +++++++++++++-------------
 1 file changed, 26 insertions(+), 26 deletions(-)

diff --git a/tests/integration/integration-tests.sh b/tests/integration/integration-tests.sh
index fc007bb0..45d5024b 100755
--- a/tests/integration/integration-tests.sh
+++ b/tests/integration/integration-tests.sh
@@ -483,54 +483,54 @@ echo -e "##################################"
 
 setup_test_pod
 
-echo -e "##################################"
-echo -e "RUN EXAMPLE JOB"
-echo -e "##################################"
+# echo -e "##################################"
+# echo -e "RUN EXAMPLE JOB"
+# echo -e "##################################"
 
-(setup_user_admin_context && test_example_job_in_pod && cleanup_user_success) || cleanup_user_failure_in_pod
+# (setup_user_admin_context && test_example_job_in_pod && cleanup_user_success) || cleanup_user_failure_in_pod
 
-echo -e "##################################"
-echo -e "RUN SPARK SHELL IN POD"
-echo -e "##################################"
+# echo -e "##################################"
+# echo -e "RUN SPARK SHELL IN POD"
+# echo -e "##################################"
 
-(setup_user_admin_context && test_spark_shell_in_pod && cleanup_user_success) || cleanup_user_failure_in_pod
+# (setup_user_admin_context && test_spark_shell_in_pod && cleanup_user_success) || cleanup_user_failure_in_pod
 
-echo -e "##################################"
-echo -e "RUN PYSPARK IN POD"
-echo -e "##################################"
+# echo -e "##################################"
+# echo -e "RUN PYSPARK IN POD"
+# echo -e "##################################"
 
-(setup_user_admin_context && test_pyspark_in_pod && cleanup_user_success) || cleanup_user_failure_in_pod
+# (setup_user_admin_context && test_pyspark_in_pod && cleanup_user_success) || cleanup_user_failure_in_pod
 
-echo -e "##################################"
-echo -e "RUN EXAMPLE JOB WITH POD TEMPLATE"
-echo -e "##################################"
+# echo -e "##################################"
+# echo -e "RUN EXAMPLE JOB WITH POD TEMPLATE"
+# echo -e "##################################"
 
-(setup_user_admin_context && test_example_job_in_pod_with_templates && cleanup_user_success) || cleanup_user_failure_in_pod
+# (setup_user_admin_context && test_example_job_in_pod_with_templates && cleanup_user_success) || cleanup_user_failure_in_pod
 
-echo -e "########################################"
-echo -e "RUN EXAMPLE JOB WITH PROMETHEUS METRICS"
-echo -e "########################################"
+# echo -e "########################################"
+# echo -e "RUN EXAMPLE JOB WITH PROMETHEUS METRICS"
+# echo -e "########################################"
 
-(setup_user_admin_context && test_example_job_in_pod_with_metrics && cleanup_user_success) || cleanup_user_failure_in_pod
+# (setup_user_admin_context && test_example_job_in_pod_with_metrics && cleanup_user_success) || cleanup_user_failure_in_pod
 
 
-echo -e "########################################"
-echo -e "RUN EXAMPLE JOB WITH ERRORS"
-echo -e "########################################"
+# echo -e "########################################"
+# echo -e "RUN EXAMPLE JOB WITH ERRORS"
+# echo -e "########################################"
 
-(setup_user_admin_context && test_example_job_in_pod_with_errors && cleanup_user_success) || cleanup_user_failure_in_pod
+# (setup_user_admin_context && test_example_job_in_pod_with_errors && cleanup_user_success) || cleanup_user_failure_in_pod
 
 echo -e "##################################"
 echo -e "RUN EXAMPLE THAT USES ICEBERG LIBRARIES"
 echo -e "##################################"
 
-(setup_user_admin_context && test_iceberg_example_in_pod && cleanup_user_success) || cleanup_user_failure_in_pod
+(setup_user_admin_context && test_iceberg_example_in_pod && cleanup_user_success) #|| cleanup_user_failure_in_pod
 
 echo -e "##################################"
 echo -e "TEARDOWN TEST POD"
 echo -e "##################################"
 
-teardown_test_pod
+# teardown_test_pod
 
 echo -e "##################################"
 echo -e "END OF THE TEST"

From 75299b78c9b2501b765055df4b6ea369dd443bf5 Mon Sep 17 00:00:00 2001
From: Bikalpa Dhakal <bikalpa.dhakal@canonical.com>
Date: Mon, 29 Jan 2024 20:23:55 +0545
Subject: [PATCH 19/33] Sleep for 1000 seconds

---
 tests/integration/integration-tests.sh | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/integration/integration-tests.sh b/tests/integration/integration-tests.sh
index 45d5024b..1a2efabc 100755
--- a/tests/integration/integration-tests.sh
+++ b/tests/integration/integration-tests.sh
@@ -535,3 +535,5 @@ echo -e "##################################"
 echo -e "##################################"
 echo -e "END OF THE TEST"
 echo -e "##################################"
+
+sleep 1000
\ No newline at end of file

From 9c9fb09d31c63a53ad1fa05f5a612dd17dd35f42 Mon Sep 17 00:00:00 2001
From: Bikalpa Dhakal <bikalpa.dhakal@canonical.com>
Date: Mon, 29 Jan 2024 21:03:01 +0545
Subject: [PATCH 20/33] Increase sleep timing, set the default region for the
 AWS cli

---
 tests/integration/integration-tests.sh | 2 +-
 tests/integration/setup-aws-cli.sh     | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/integration/integration-tests.sh b/tests/integration/integration-tests.sh
index 1a2efabc..e41fb879 100755
--- a/tests/integration/integration-tests.sh
+++ b/tests/integration/integration-tests.sh
@@ -536,4 +536,4 @@ echo -e "##################################"
 echo -e "END OF THE TEST"
 echo -e "##################################"
 
-sleep 1000
\ No newline at end of file
+sleep 10000
\ No newline at end of file
diff --git a/tests/integration/setup-aws-cli.sh b/tests/integration/setup-aws-cli.sh
index efc24982..9f0d5463 100755
--- a/tests/integration/setup-aws-cli.sh
+++ b/tests/integration/setup-aws-cli.sh
@@ -11,4 +11,5 @@ S3_BUCKET="spark"
 # Configure AWS CLI credentials
 aws configure set aws_access_key_id $ACCESS_KEY
 aws configure set aws_secret_access_key $SECRET_KEY
+aws configure set default.region us-east-2
 echo "AWS CLI credentials set successfully"

From ce0a06d402a5b819dac35b5ea01ba796fddeae99 Mon Sep 17 00:00:00 2001
From: Bikalpa Dhakal <bikalpa.dhakal@canonical.com>
Date: Mon, 29 Jan 2024 21:23:28 +0545
Subject: [PATCH 21/33] Unset debug mode

---
 tests/integration/integration-tests.sh | 50 +++++++++++++-------------
 1 file changed, 24 insertions(+), 26 deletions(-)

diff --git a/tests/integration/integration-tests.sh b/tests/integration/integration-tests.sh
index e41fb879..b8250e23 100755
--- a/tests/integration/integration-tests.sh
+++ b/tests/integration/integration-tests.sh
@@ -483,42 +483,42 @@ echo -e "##################################"
 
 setup_test_pod
 
-# echo -e "##################################"
-# echo -e "RUN EXAMPLE JOB"
-# echo -e "##################################"
+echo -e "##################################"
+echo -e "RUN EXAMPLE JOB"
+echo -e "##################################"
 
-# (setup_user_admin_context && test_example_job_in_pod && cleanup_user_success) || cleanup_user_failure_in_pod
+(setup_user_admin_context && test_example_job_in_pod && cleanup_user_success) || cleanup_user_failure_in_pod
 
-# echo -e "##################################"
-# echo -e "RUN SPARK SHELL IN POD"
-# echo -e "##################################"
+echo -e "##################################"
+echo -e "RUN SPARK SHELL IN POD"
+echo -e "##################################"
 
-# (setup_user_admin_context && test_spark_shell_in_pod && cleanup_user_success) || cleanup_user_failure_in_pod
+(setup_user_admin_context && test_spark_shell_in_pod && cleanup_user_success) || cleanup_user_failure_in_pod
 
-# echo -e "##################################"
-# echo -e "RUN PYSPARK IN POD"
-# echo -e "##################################"
+echo -e "##################################"
+echo -e "RUN PYSPARK IN POD"
+echo -e "##################################"
 
-# (setup_user_admin_context && test_pyspark_in_pod && cleanup_user_success) || cleanup_user_failure_in_pod
+(setup_user_admin_context && test_pyspark_in_pod && cleanup_user_success) || cleanup_user_failure_in_pod
 
-# echo -e "##################################"
-# echo -e "RUN EXAMPLE JOB WITH POD TEMPLATE"
-# echo -e "##################################"
+echo -e "##################################"
+echo -e "RUN EXAMPLE JOB WITH POD TEMPLATE"
+echo -e "##################################"
 
-# (setup_user_admin_context && test_example_job_in_pod_with_templates && cleanup_user_success) || cleanup_user_failure_in_pod
+(setup_user_admin_context && test_example_job_in_pod_with_templates && cleanup_user_success) || cleanup_user_failure_in_pod
 
-# echo -e "########################################"
-# echo -e "RUN EXAMPLE JOB WITH PROMETHEUS METRICS"
-# echo -e "########################################"
+echo -e "########################################"
+echo -e "RUN EXAMPLE JOB WITH PROMETHEUS METRICS"
+echo -e "########################################"
 
-# (setup_user_admin_context && test_example_job_in_pod_with_metrics && cleanup_user_success) || cleanup_user_failure_in_pod
+(setup_user_admin_context && test_example_job_in_pod_with_metrics && cleanup_user_success) || cleanup_user_failure_in_pod
 
 
-# echo -e "########################################"
-# echo -e "RUN EXAMPLE JOB WITH ERRORS"
-# echo -e "########################################"
+echo -e "########################################"
+echo -e "RUN EXAMPLE JOB WITH ERRORS"
+echo -e "########################################"
 
-# (setup_user_admin_context && test_example_job_in_pod_with_errors && cleanup_user_success) || cleanup_user_failure_in_pod
+(setup_user_admin_context && test_example_job_in_pod_with_errors && cleanup_user_success) || cleanup_user_failure_in_pod
 
 echo -e "##################################"
 echo -e "RUN EXAMPLE THAT USES ICEBERG LIBRARIES"
@@ -535,5 +535,3 @@ echo -e "##################################"
 echo -e "##################################"
 echo -e "END OF THE TEST"
 echo -e "##################################"
-
-sleep 10000
\ No newline at end of file

From 8ad6334753d7bf4e64b09e5804633f242477a3c5 Mon Sep 17 00:00:00 2001
From: Bikalpa Dhakal <bikalpa.dhakal@canonical.com>
Date: Mon, 29 Jan 2024 21:46:14 +0545
Subject: [PATCH 22/33] Remove SSH access to GH runner

---
 .github/workflows/build.yaml | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml
index adbd9fbd..82b26dc8 100644
--- a/.github/workflows/build.yaml
+++ b/.github/workflows/build.yaml
@@ -44,9 +44,6 @@ jobs:
           ARTIFACT=$(make help | grep 'Artifact: ')
           echo "name=${ARTIFACT#'Artifact: '}" >> $GITHUB_OUTPUT
 
-      - name: Change artifact permissions
-        run: sudo chmod a+r ${{ steps.artifact.outputs.name }}
-
       - name: Upload locally built artifact
         uses: actions/upload-artifact@v3
         with:

From 87fb99ed96e6f30e962fd7419ade0f450f97821b Mon Sep 17 00:00:00 2001
From: Bikalpa Dhakal <bikalpa.dhakal@canonical.com>
Date: Mon, 29 Jan 2024 21:53:45 +0545
Subject: [PATCH 23/33] Fix CI

---
 .github/workflows/build.yaml       | 3 +++
 .github/workflows/integration.yaml | 4 ----
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml
index 82b26dc8..adbd9fbd 100644
--- a/.github/workflows/build.yaml
+++ b/.github/workflows/build.yaml
@@ -44,6 +44,9 @@ jobs:
           ARTIFACT=$(make help | grep 'Artifact: ')
           echo "name=${ARTIFACT#'Artifact: '}" >> $GITHUB_OUTPUT
 
+      - name: Change artifact permissions
+        run: sudo chmod a+r ${{ steps.artifact.outputs.name }}
+
       - name: Upload locally built artifact
         uses: actions/upload-artifact@v3
         with:
diff --git a/.github/workflows/integration.yaml b/.github/workflows/integration.yaml
index 61ff49e5..1007cc36 100644
--- a/.github/workflows/integration.yaml
+++ b/.github/workflows/integration.yaml
@@ -39,10 +39,6 @@ jobs:
           name: charmed-spark
           path: charmed-spark
 
-      - name: Setup upterm session
-        uses: lhotari/action-upterm@v1
-
-
       - name: Run tests
         run: |
           # Unpack Artifact

From 4ff64d0d6993d454d1de7b5d34b165010bf5f900 Mon Sep 17 00:00:00 2001
From: Bikalpa Dhakal <bikalpa.dhakal@canonical.com>
Date: Tue, 30 Jan 2024 09:02:25 +0545
Subject: [PATCH 24/33] Revert "Add integration tests for Apache Iceberg
 integration"

This reverts commit cc552b8f6a1caa6afb5c16e3650ef1fbbd21c041.
---
 CONTRIBUTING.md                             |  1 -
 Makefile                                    | 12 +--
 tests/integration/config-microk8s.sh        |  1 -
 tests/integration/integration-tests.sh      | 85 +--------------------
 tests/integration/resources/test-iceberg.py | 39 ----------
 tests/integration/setup-aws-cli.sh          | 13 ----
 6 files changed, 4 insertions(+), 147 deletions(-)
 delete mode 100644 tests/integration/resources/test-iceberg.py
 delete mode 100644 tests/integration/setup-aws-cli.sh

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index c27e3b68..eba0a602 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -13,7 +13,6 @@ cd charmed-spark-rock
 sudo snap install rockcraft --edge
 sudo snap install docker
 sudo snap install lxd
-sudo snap install yq
 sudo snap install skopeo --edge --devmode
 ```
 
diff --git a/Makefile b/Makefile
index 042a215c..14a7fa9c 100644
--- a/Makefile
+++ b/Makefile
@@ -24,7 +24,6 @@ _MAKE_DIR := .make_cache
 $(shell mkdir -p $(_MAKE_DIR))
 
 K8S_TAG := $(_MAKE_DIR)/.k8s_tag
-AWS_TAG := $(_MAKE_DIR)/.aws_tag
 
 IMAGE_NAME := $(shell yq .name rockcraft.yaml)
 VERSION := $(shell yq .version rockcraft.yaml)
@@ -71,7 +70,7 @@ $(_TMP_OCI_TAG): $(_ROCK_OCI)
 	touch $(_TMP_OCI_TAG)
 
 $(CHARMED_OCI_TAG): $(_TMP_OCI_TAG)
-	docker build -t "$(CHARMED_OCI_FULL_NAME):$(TAG)" --build-arg BASE_IMAGE="$(_TMP_OCI_NAME):$(TAG)" -f Dockerfile .
+	docker build - -t "$(CHARMED_OCI_FULL_NAME):$(TAG)" --build-arg BASE_IMAGE="$(_TMP_OCI_NAME):$(TAG)" < Dockerfile
 	if [ ! -d "$(_MAKE_DIR)/$(CHARMED_OCI_FULL_NAME)" ]; then mkdir -p "$(_MAKE_DIR)/$(CHARMED_OCI_FULL_NAME)"; fi
 	touch $(CHARMED_OCI_TAG)
 
@@ -81,15 +80,10 @@ $(K8S_TAG):
 	sg microk8s ./tests/integration/config-microk8s.sh
 	@touch $(K8S_TAG)
 
-$(AWS_TAG): $(K8S_TAG)
-	@echo "=== Setting up and configure AWS CLI ==="
-	/bin/bash ./tests/integration/setup-aws-cli.sh
-	@touch $(AWS_TAG)
-
 microk8s: $(K8S_TAG)
 
 $(_MAKE_DIR)/%/$(TAG).tar: $(_MAKE_DIR)/%/$(TAG).tag
-	docker save $*:$(TAG) -o $(_MAKE_DIR)/$*/$(TAG).tar
+	docker save $*:$(TAG) > $(_MAKE_DIR)/$*/$(TAG).tar
 
 $(BASE_NAME): $(_MAKE_DIR)/$(CHARMED_OCI_FULL_NAME)/$(TAG).tar
 	@echo "=== Creating $(BASE_NAME) OCI archive ==="
@@ -112,7 +106,7 @@ import: $(K8S_TAG) build
 	microk8s ctr images import --base-name $(CHARMED_OCI_FULL_NAME):$(TAG) $(BASE_NAME)
 endif
 
-tests: $(K8S_TAG) $(AWS_TAG)
+tests:
 	@echo "=== Running Integration Tests ==="
 	/bin/bash ./tests/integration/integration-tests.sh
 
diff --git a/tests/integration/config-microk8s.sh b/tests/integration/config-microk8s.sh
index 9461fb82..82c6ebf7 100755
--- a/tests/integration/config-microk8s.sh
+++ b/tests/integration/config-microk8s.sh
@@ -2,4 +2,3 @@ microk8s status --wait-ready
 microk8s config | tee ~/.kube/config
 microk8s.enable dns
 microk8s.enable rbac
-microk8s.enable minio
\ No newline at end of file
diff --git a/tests/integration/integration-tests.sh b/tests/integration/integration-tests.sh
index f2b8d9e4..a9267613 100755
--- a/tests/integration/integration-tests.sh
+++ b/tests/integration/integration-tests.sh
@@ -135,7 +135,7 @@ teardown_test_pod() {
 run_example_job_in_pod() {
   SPARK_EXAMPLES_JAR_NAME="spark-examples_2.12-$(get_spark_version).jar"
 
-  PREVIOUS_JOB=$(kubectl get pods -n ${NAMESPACE}| grep driver | tail -n 1 | cut -d' ' -f1)
+  PREVIOUS_JOB=$(kubectl get pods | grep driver | tail -n 1 | cut -d' ' -f1)
   NAMESPACE=$1
   USERNAME=$2
 
@@ -166,82 +166,6 @@ run_example_job_in_pod() {
   validate_pi_value $pi
 }
 
-get_s3_access_key(){
-  kubectl get secret -n minio-operator microk8s-user-1 -o jsonpath='{.data.CONSOLE_ACCESS_KEY}' | base64 -d
-}
-
-get_s3_secret_key(){
-  kubectl get secret -n minio-operator microk8s-user-1 -o jsonpath='{.data.CONSOLE_SECRET_KEY}' | base64 -d
-}
-
-get_s3_endpoint(){
-  kubectl get service minio -n minio-operator -o jsonpath='{.spec.clusterIP}'
-}
-
-create_s3_bucket(){
-  S3_ENDPOINT=$(get_s3_endpoint)
-  BUCKET_NAME=$1
-  aws --endpoint-url "http://$S3_ENDPOINT" s3api create-bucket --bucket "$BUCKET_NAME"
-  echo "Created S3 bucket ${BUCKET_NAME}"
-}
-
-copy_file_to_s3_bucket(){
-  BUCKET_NAME=$1
-  FILE_PATH=$2
-  BASE_NAME=$(basename "$FILE_PATH")
-  aws s3 cp $FILE_PATH s3://"$BUCKET_NAME"/"$BASE_NAME"
-  echo "Copied file ${FILE_PATH} to S3 bucket ${BUCKET_NAME}"
-}
-
-run_iceberg_example_in_pod(){
-  create_s3_bucket spark
-  copy_file_to_s3_bucket spark ./tests/integration/resources/test-iceberg.py
-
-  NAMESPACE="tests"
-  USERNAME="spark"
-  NUM_ROWS_TO_INSERT="4"
-  PREVIOUS_DRIVER_PODS_COUNT=$(kubectl get pods -n ${NAMESPACE} | grep driver | wc -l)
-
-  kubectl exec testpod -- env UU="$USERNAME" NN="$NAMESPACE" IM="$(spark_image)" \
-      /bin/bash -c 'spark-client.spark-submit' \
-      --username $UU --namespace $NN \
-      --conf spark.kubernetes.driver.request.cores=100m \
-      --conf spark.kubernetes.executor.request.cores=100m \
-      --conf spark.kubernetes.container.image=$IM \
-      --conf spark.hadoop.fs.s3a.aws.credentials.provider=org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider \
-      --conf spark.hadoop.fs.s3a.connection.ssl.enabled=false \
-      --conf spark.hadoop.fs.s3a.path.style.access=true \
-      --conf spark.hadoop.fs.s3a.access.key=$(get_s3_access_key) \
-      --conf spark.hadoop.fs.s3a.secret.key=$(get_s3_secret_key) \
-      --conf spark.jars.ivy=/tmp \
-      --conf spark.sql.extensions=org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions \
-      --conf spark.sql.catalog.spark_catalog=org.apache.iceberg.spark.SparkSessionCatalog \
-      --conf spark.sql.catalog.spark_catalog.type=hive \
-      --conf spark.sql.catalog.local=org.apache.iceberg.spark.SparkCatalog \
-      --conf spark.sql.catalog.local.type=hadoop \
-      --conf spark.sql.catalog.local.warehouse=s3a://spark/warehouse \
-      --conf spark.sql.defaultCatalog=local   
-      s3a://spark/test-iceberg.py -n ${NUM_ROWS_TO_INSERT}
-  
-  DRIVER_PODS_COUNT=$(kubectl get pods -n ${NAMESPACE} | grep driver | wc -l)
-
-  if [[ "${PREVIOUS_DRIVER_PODS_COUNT}" == "${DRIVER_PODS_COUNT}" ]]
-  then
-    echo "ERROR: Sample job has not run!"
-    exit 1
-  fi
-
-  DRIVER_POD_ID=$(kubectl get pods -n ${NAMESPACE} | grep test-iceberg-.*-driver | tail -n 1 | cut -d' ' -f1)
-  OUTPUT_LOG_LINE=$(kubectl logs ${DRIVER_POD_ID} -n ${NAMESPACE} | grep 'Number of rows inserted:' )
-  NUM_ROWS_INSERTED=$(echo $OUTPUT_LOG_LINE | rev | cut -d' ' -f1 | rev)
-
-  if [ "${NUM_ROWS_INSERTED}" != "${NUM_ROWS_TO_INSERT}" ]; then
-      echo "ERROR: ${NUM_ROWS_TO_INSERT} were supposed to be inserted. Found ${NUM_ROWS_INSERTED} rows. Aborting with exit code 1."
-      exit 1
-  fi
-
-}
-
 run_example_job_in_pod_with_pod_templates() {
   SPARK_EXAMPLES_JAR_NAME="spark-examples_2.12-$(get_spark_version).jar"
 
@@ -501,13 +425,6 @@ echo -e "RUN EXAMPLE JOB WITH ERRORS"
 echo -e "########################################"
 
 (setup_user_admin_context && test_example_job_in_pod_with_errors && cleanup_user_success) || cleanup_user_failure_in_pod
-
-echo -e "##################################"
-echo -e "RUN EXAMPLE THAT USES ICEBERG LIBRARIES"
-echo -e "##################################"
-
-(setup_user_admin_context && test_iceberg_example_in_pod && cleanup_user_success) || cleanup_user_failure_in_pod
-
 echo -e "##################################"
 echo -e "TEARDOWN TEST POD"
 echo -e "##################################"
diff --git a/tests/integration/resources/test-iceberg.py b/tests/integration/resources/test-iceberg.py
deleted file mode 100644
index d8b34aca..00000000
--- a/tests/integration/resources/test-iceberg.py
+++ /dev/null
@@ -1,39 +0,0 @@
-import argparse
-import random
-
-from pyspark.sql import SparkSession
-from pyspark.sql.types import LongType, StructType, StructField
-
-parser = argparse.ArgumentParser("TestIceberg")
-parser.add_argument("--num_rows", "-n", type=int)
-args = parser.parse_args()
-num_rows = args.num_rows
-
-spark = SparkSession\
-    .builder\
-    .appName("IcebergExample")\
-    .getOrCreate()
-
-
-schema = StructType([
-  StructField("row_id", LongType(), True),
-  StructField("row_val", LongType(), True)
-])
-
-# df = spark.createDataFrame([], schema)
-# df.writeTo("demo.foo.bar").create()
-
-# schema = spark.table("demo.nyc.taxis").schema
-data = []
-for idx in range(num_rows):
-    row = (idx + 1, random.randint(1, 100))
-    data.append(row)
-
-df = spark.createDataFrame(data, schema)
-# df.writeTo("demo.nyc.taxis").append()
-df.writeTo("demo.foo.bar").create()
-
-
-df = spark.table("demo.foo.bar")
-count = df.count()
-print(f"Number of rows inserted: {count}")
\ No newline at end of file
diff --git a/tests/integration/setup-aws-cli.sh b/tests/integration/setup-aws-cli.sh
deleted file mode 100644
index c56cbda4..00000000
--- a/tests/integration/setup-aws-cli.sh
+++ /dev/null
@@ -1,13 +0,0 @@
-#!/bin/bash
-
-# Install AWS CLI
-sudo snap install aws-cli --classic
-
-# Get Access key and secret key from MinIO
-ACCESS_KEY=$(kubectl get secret -n minio-operator microk8s-user-1 -o jsonpath='{.data.CONSOLE_ACCESS_KEY}' | base64 -d)
-SECRET_KEY=$(kubectl get secret -n minio-operator microk8s-user-1 -o jsonpath='{.data.CONSOLE_SECRET_KEY}' | base64 -d)
-S3_BUCKET="spark"
-
-# Configure AWS CLI credentials
-aws configure set aws_access_key_id $ACCESS_KEY
-aws configure set aws_secret_access_key $SECRET_KEY

From c621d89ce3f5a5edfdad5a27f2d406d09c776fa8 Mon Sep 17 00:00:00 2001
From: Bikalpa Dhakal <bikalpa.dhakal@canonical.com>
Date: Tue, 30 Jan 2024 09:02:36 +0545
Subject: [PATCH 25/33] Revert "Ignore build artifacts from VCS"

This reverts commit cc7e95fb51b91fad1ce84088f72c893c2f826886.
---
 .gitignore | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/.gitignore b/.gitignore
index 0b26130a..723ef36f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1 @@
-.idea
-*.rock
-*.tar
-.make_cache
\ No newline at end of file
+.idea
\ No newline at end of file

From efb0ee8214546b0675ee99c9e036626b25011df3 Mon Sep 17 00:00:00 2001
From: Bikalpa Dhakal <bikalpa.dhakal@canonical.com>
Date: Tue, 30 Jan 2024 09:54:03 +0545
Subject: [PATCH 26/33] Add missing shell variable

---
 tests/integration/integration-tests.sh | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/integration/integration-tests.sh b/tests/integration/integration-tests.sh
index b8250e23..c7f6b54b 100755
--- a/tests/integration/integration-tests.sh
+++ b/tests/integration/integration-tests.sh
@@ -196,6 +196,7 @@ copy_file_to_s3_bucket(){
   BUCKET_NAME=$1
   FILE_PATH=$2
   BASE_NAME=$(basename "$FILE_PATH")
+  S3_ENDPOINT=$(get_s3_endpoint)
   aws --endpoint-url "http://$S3_ENDPOINT" s3 cp $FILE_PATH s3://"$BUCKET_NAME"/"$BASE_NAME"
   echo "Copied file ${FILE_PATH} to S3 bucket ${BUCKET_NAME}"
 }

From 2818165b04d857ac1a70fcbda2abe28b9f9f4ed6 Mon Sep 17 00:00:00 2001
From: Bikalpa Dhakal <bikalpa.dhakal@canonical.com>
Date: Tue, 30 Jan 2024 09:55:19 +0545
Subject: [PATCH 27/33] Make default region configurable.

---
 tests/integration/setup-aws-cli.sh | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tests/integration/setup-aws-cli.sh b/tests/integration/setup-aws-cli.sh
index 9f0d5463..b30293cb 100755
--- a/tests/integration/setup-aws-cli.sh
+++ b/tests/integration/setup-aws-cli.sh
@@ -6,10 +6,12 @@ sudo snap install aws-cli --classic
 # Get Access key and secret key from MinIO
 ACCESS_KEY=$(kubectl get secret -n minio-operator microk8s-user-1 -o jsonpath='{.data.CONSOLE_ACCESS_KEY}' | base64 -d)
 SECRET_KEY=$(kubectl get secret -n minio-operator microk8s-user-1 -o jsonpath='{.data.CONSOLE_SECRET_KEY}' | base64 -d)
+
 S3_BUCKET="spark"
+DEFAULT_REGION="us-east-2"
 
 # Configure AWS CLI credentials
 aws configure set aws_access_key_id $ACCESS_KEY
 aws configure set aws_secret_access_key $SECRET_KEY
-aws configure set default.region us-east-2
+aws configure set default.region $DEFAULT_REGION
 echo "AWS CLI credentials set successfully"

From c230cf70694e3de667bc3082f0bcadd8ed185b92 Mon Sep 17 00:00:00 2001
From: Bikalpa Dhakal <bikalpa.dhakal@canonical.com>
Date: Tue, 30 Jan 2024 12:52:07 +0545
Subject: [PATCH 28/33] Remove commented lines

---
 tests/integration/resources/test-iceberg.py | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/tests/integration/resources/test-iceberg.py b/tests/integration/resources/test-iceberg.py
index d8b34aca..4675ed87 100644
--- a/tests/integration/resources/test-iceberg.py
+++ b/tests/integration/resources/test-iceberg.py
@@ -20,17 +20,12 @@
   StructField("row_val", LongType(), True)
 ])
 
-# df = spark.createDataFrame([], schema)
-# df.writeTo("demo.foo.bar").create()
-
-# schema = spark.table("demo.nyc.taxis").schema
 data = []
 for idx in range(num_rows):
     row = (idx + 1, random.randint(1, 100))
     data.append(row)
 
 df = spark.createDataFrame(data, schema)
-# df.writeTo("demo.nyc.taxis").append()
 df.writeTo("demo.foo.bar").create()
 
 

From fff7f4a704975999ac4363e887d647afabbfcca0 Mon Sep 17 00:00:00 2001
From: Bikalpa Dhakal <bikalpa.dhakal@canonical.com>
Date: Tue, 30 Jan 2024 12:53:45 +0545
Subject: [PATCH 29/33] Format with black.

---
 tests/integration/resources/test-iceberg.py | 14 +++++---------
 1 file changed, 5 insertions(+), 9 deletions(-)

diff --git a/tests/integration/resources/test-iceberg.py b/tests/integration/resources/test-iceberg.py
index 4675ed87..b4be2551 100644
--- a/tests/integration/resources/test-iceberg.py
+++ b/tests/integration/resources/test-iceberg.py
@@ -9,16 +9,12 @@
 args = parser.parse_args()
 num_rows = args.num_rows
 
-spark = SparkSession\
-    .builder\
-    .appName("IcebergExample")\
-    .getOrCreate()
+spark = SparkSession.builder.appName("IcebergExample").getOrCreate()
 
 
-schema = StructType([
-  StructField("row_id", LongType(), True),
-  StructField("row_val", LongType(), True)
-])
+schema = StructType(
+    [StructField("row_id", LongType(), True), StructField("row_val", LongType(), True)]
+)
 
 data = []
 for idx in range(num_rows):
@@ -31,4 +27,4 @@
 
 df = spark.table("demo.foo.bar")
 count = df.count()
-print(f"Number of rows inserted: {count}")
\ No newline at end of file
+print(f"Number of rows inserted: {count}")

From 4d7fbd6c3ec0893d24dee74f6059b92815d56d8f Mon Sep 17 00:00:00 2001
From: Bikalpa Dhakal <bikalpa.dhakal@canonical.com>
Date: Tue, 30 Jan 2024 15:16:08 +0545
Subject: [PATCH 30/33] Uncomment a line in integration tests.

---
 tests/integration/integration-tests.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/integration/integration-tests.sh b/tests/integration/integration-tests.sh
index c7f6b54b..6f21385b 100755
--- a/tests/integration/integration-tests.sh
+++ b/tests/integration/integration-tests.sh
@@ -525,7 +525,7 @@ echo -e "##################################"
 echo -e "RUN EXAMPLE THAT USES ICEBERG LIBRARIES"
 echo -e "##################################"
 
-(setup_user_admin_context && test_iceberg_example_in_pod && cleanup_user_success) #|| cleanup_user_failure_in_pod
+(setup_user_admin_context && test_iceberg_example_in_pod && cleanup_user_success) || cleanup_user_failure_in_pod
 
 echo -e "##################################"
 echo -e "TEARDOWN TEST POD"

From c741c458b1ec0ff0a03f99b118557088c0c04c82 Mon Sep 17 00:00:00 2001
From: Bikalpa Dhakal <bikalpa.dhakal@canonical.com>
Date: Tue, 30 Jan 2024 15:22:14 +0545
Subject: [PATCH 31/33] Uncomment teardown_test_pod

---
 tests/integration/integration-tests.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/integration/integration-tests.sh b/tests/integration/integration-tests.sh
index 6f21385b..b8b4e03f 100755
--- a/tests/integration/integration-tests.sh
+++ b/tests/integration/integration-tests.sh
@@ -531,7 +531,7 @@ echo -e "##################################"
 echo -e "TEARDOWN TEST POD"
 echo -e "##################################"
 
-# teardown_test_pod
+teardown_test_pod
 
 echo -e "##################################"
 echo -e "END OF THE TEST"

From 1da1160b7e260020fddd0b13f31456baed0138e4 Mon Sep 17 00:00:00 2001
From: Bikalpa Dhakal <bikalpa.dhakal@canonical.com>
Date: Thu, 1 Feb 2024 16:22:57 +0545
Subject: [PATCH 32/33] Add comments

---
 tests/integration/integration-tests.sh      | 37 ++++++++++++++++++++-
 tests/integration/resources/test-iceberg.py |  9 +++--
 2 files changed, 43 insertions(+), 3 deletions(-)

diff --git a/tests/integration/integration-tests.sh b/tests/integration/integration-tests.sh
index b8b4e03f..fecdcbeb 100755
--- a/tests/integration/integration-tests.sh
+++ b/tests/integration/integration-tests.sh
@@ -167,18 +167,22 @@ run_example_job_in_pod() {
 }
 
 get_s3_access_key(){
+  # Prints out S3 Access Key by reading it from K8s secret
   kubectl get secret -n minio-operator microk8s-user-1 -o jsonpath='{.data.CONSOLE_ACCESS_KEY}' | base64 -d
 }
 
 get_s3_secret_key(){
+  # Prints out S3 Secret Key by reading it from K8s secret
   kubectl get secret -n minio-operator microk8s-user-1 -o jsonpath='{.data.CONSOLE_SECRET_KEY}' | base64 -d
 }
 
 get_s3_endpoint(){
+  # Prints out the endpoint S3 bucket is exposed on.
   kubectl get service minio -n minio-operator -o jsonpath='{.spec.clusterIP}'
 }
 
 create_s3_bucket(){
+  # Creates a S3 bucket with the given name.
   S3_ENDPOINT=$(get_s3_endpoint)
   BUCKET_NAME=$1
   aws --endpoint-url "http://$S3_ENDPOINT" s3api create-bucket --bucket "$BUCKET_NAME"
@@ -186,6 +190,7 @@ create_s3_bucket(){
 }
 
 delete_s3_bucket(){
+  # Deletes a S3 bucket with the given name.
   S3_ENDPOINT=$(get_s3_endpoint)
   BUCKET_NAME=$1
   aws --endpoint-url "http://$S3_ENDPOINT" s3 rb "s3://$BUCKET_NAME" --force
@@ -193,23 +198,39 @@ delete_s3_bucket(){
 }
 
 copy_file_to_s3_bucket(){
+  # Copies a file from local to S3 bucket.
+  # The bucket name and the path to file that is to be uploaded is to be provided as arguments
   BUCKET_NAME=$1
   FILE_PATH=$2
+
+  # If file path is '/foo/bar/file.ext', the basename is 'file.ext'
   BASE_NAME=$(basename "$FILE_PATH")
   S3_ENDPOINT=$(get_s3_endpoint)
+
+  # Copy the file to S3 bucket
   aws --endpoint-url "http://$S3_ENDPOINT" s3 cp $FILE_PATH s3://"$BUCKET_NAME"/"$BASE_NAME"
   echo "Copied file ${FILE_PATH} to S3 bucket ${BUCKET_NAME}"
 }
 
 test_iceberg_example_in_pod(){
-  create_s3_bucket spark
+  # Test Iceberg integration in Charmed Spark Rock
+
+  # First create S3 bucket named 'spark'
+  create_s3_bucket spark]
+
+  # Copy 'test-iceberg.py' script to 'spark' bucket
   copy_file_to_s3_bucket spark ./tests/integration/resources/test-iceberg.py
 
   NAMESPACE="tests"
   USERNAME="spark"
+
+  # Number of rows that are to be inserted during the test.
   NUM_ROWS_TO_INSERT="4"
+
+  # Number of driver pods that exist in the namespace already.
   PREVIOUS_DRIVER_PODS_COUNT=$(kubectl get pods -n ${NAMESPACE} | grep driver | wc -l)
 
+  # Submit the job from inside 'testpod'
   kubectl exec testpod -- \
       env \
         UU="$USERNAME" \
@@ -241,17 +262,31 @@ test_iceberg_example_in_pod(){
         --conf spark.sql.defaultCatalog=local \
         s3a://spark/test-iceberg.py -n $NUM_ROWS'
   
+  # Delete 'spark' bucket
   delete_s3_bucket spark
+
+  # Number of driver pods after the job is completed.
   DRIVER_PODS_COUNT=$(kubectl get pods -n ${NAMESPACE} | grep driver | wc -l)
 
+  # If the number of driver pods is same as before, job has not been run at all!
   if [[ "${PREVIOUS_DRIVER_PODS_COUNT}" == "${DRIVER_PODS_COUNT}" ]]
   then
     echo "ERROR: Sample job has not run!"
     exit 1
   fi
 
+  # Find the ID of the driver pod that ran the job.
+  # tail -n 1       => Filter out the last line
+  # cut -d' ' -f1   => Split by spaces and pick the first part 
   DRIVER_POD_ID=$(kubectl get pods -n ${NAMESPACE} | grep test-iceberg-.*-driver | tail -n 1 | cut -d' ' -f1)
+
+  # Filter out the output log line
   OUTPUT_LOG_LINE=$(kubectl logs ${DRIVER_POD_ID} -n ${NAMESPACE} | grep 'Number of rows inserted:' )
+
+  # Fetch out the number of rows inserted
+  # rev             => Reverse the string
+  # cut -d' ' -f1   => Split by spaces and pick the first part 
+  # rev             => Reverse the string back
   NUM_ROWS_INSERTED=$(echo $OUTPUT_LOG_LINE | rev | cut -d' ' -f1 | rev)
 
   if [ "${NUM_ROWS_INSERTED}" != "${NUM_ROWS_TO_INSERT}" ]; then
diff --git a/tests/integration/resources/test-iceberg.py b/tests/integration/resources/test-iceberg.py
index b4be2551..929914e4 100644
--- a/tests/integration/resources/test-iceberg.py
+++ b/tests/integration/resources/test-iceberg.py
@@ -9,22 +9,27 @@
 args = parser.parse_args()
 num_rows = args.num_rows
 
+# Create Spark session
 spark = SparkSession.builder.appName("IcebergExample").getOrCreate()
 
-
+# Create schema
 schema = StructType(
     [StructField("row_id", LongType(), True), StructField("row_val", LongType(), True)]
 )
 
+# Generate 'num_rows' number of random rows to be inserted.
 data = []
 for idx in range(num_rows):
     row = (idx + 1, random.randint(1, 100))
     data.append(row)
 
+# Create a data frame and write it
 df = spark.createDataFrame(data, schema)
 df.writeTo("demo.foo.bar").create()
 
-
+# Read back the inserted data and count the number of rows
 df = spark.table("demo.foo.bar")
 count = df.count()
+
+# Print the number of rows
 print(f"Number of rows inserted: {count}")

From 29c45bcae0b70119f64a99a8cf686db966aa9b19 Mon Sep 17 00:00:00 2001
From: Bikalpa Dhakal <bikalpa.dhakal@canonical.com>
Date: Thu, 1 Feb 2024 17:19:33 +0545
Subject: [PATCH 33/33] Fix a typo

---
 tests/integration/integration-tests.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/integration/integration-tests.sh b/tests/integration/integration-tests.sh
index fecdcbeb..a73961bc 100755
--- a/tests/integration/integration-tests.sh
+++ b/tests/integration/integration-tests.sh
@@ -216,7 +216,7 @@ test_iceberg_example_in_pod(){
   # Test Iceberg integration in Charmed Spark Rock
 
   # First create S3 bucket named 'spark'
-  create_s3_bucket spark]
+  create_s3_bucket spark
 
   # Copy 'test-iceberg.py' script to 'spark' bucket
   copy_file_to_s3_bucket spark ./tests/integration/resources/test-iceberg.py