From 3050e816a7413ad77cef25526bb126536d4fab5e Mon Sep 17 00:00:00 2001 From: Michael Kamprath Date: Thu, 23 Nov 2023 07:19:31 +0000 Subject: [PATCH] updated spark version --- spark-qfs-swarm/jupyter-server/Dockerfile | 10 ++++++++-- spark-qfs-swarm/worker-node/Dockerfile | 10 +++++----- .../worker-node/spark-conf/spark-defaults.conf | 2 +- 3 files changed, 14 insertions(+), 8 deletions(-) diff --git a/spark-qfs-swarm/jupyter-server/Dockerfile b/spark-qfs-swarm/jupyter-server/Dockerfile index 3683b57..6493a6b 100644 --- a/spark-qfs-swarm/jupyter-server/Dockerfile +++ b/spark-qfs-swarm/jupyter-server/Dockerfile @@ -12,10 +12,10 @@ ENV SPARK_NLP_VERSION spark-nlp-spark32_2.12:3.4.4 USER root RUN apt-get install -y g++ RUN pip3 install \ - notebook \ + notebook==6.4.12 \ jupyter_nbextensions_configurator \ jupyter_contrib_nbextensions \ - yapf wget + yapf wget jupyter_server COPY start-jupyter.sh / @@ -26,6 +26,12 @@ RUN jupyter nbextension enable toc2/main RUN jupyter nbextension enable codefolding/main RUN jupyter nbextension enable execute_time/ExecuteTime +USER root +RUN pip3 uninstall -y traitlets +RUN pip3 install traitlets==5.9.0 + + +USER spark RUN mkdir -p /home/spark/jupyter/runtime \ && mkdir -p /home/spark/jupyter/notebooks \ && mkdir -p /home/spark/jars/ diff --git a/spark-qfs-swarm/worker-node/Dockerfile b/spark-qfs-swarm/worker-node/Dockerfile index 3ffe9e3..aa3e6d0 100644 --- a/spark-qfs-swarm/worker-node/Dockerfile +++ b/spark-qfs-swarm/worker-node/Dockerfile @@ -15,10 +15,10 @@ MAINTAINER Michael Kamprath "https://github.com/michaelkamprath" # spark-master - the service where the spark master runs # -ARG QFS_VERSION=2.2.5 -ARG SPARK_VERSION=3.3.2 -ARG HADOOP_MAJOR_VERSION=2 -ARG HADOOP_VERSION=2.7.2 +ARG QFS_VERSION=2.2.6 +ARG SPARK_VERSION=3.3.3 +ARG HADOOP_MAJOR_VERSION=3 +ARG HADOOP_VERSION=3.3.1 ARG SCALA_VERSION=2.12.15 RUN apt-get update \ @@ -97,7 +97,7 @@ RUN mkdir -p /data/spark \ # add python libraries useful in PySpark RUN python3 -mpip install matplotlib \ - && pip3 install pandas seaborn pyarrow spark-nlp + && pip3 install pandas seaborn pyarrow spark-nlp numpy==1.23.1 # copy QFS and Spark configurations COPY ./qfs-conf/* $QFS_HOME/conf/ diff --git a/spark-qfs-swarm/worker-node/spark-conf/spark-defaults.conf b/spark-qfs-swarm/worker-node/spark-conf/spark-defaults.conf index 36c6694..91eddb4 100644 --- a/spark-qfs-swarm/worker-node/spark-conf/spark-defaults.conf +++ b/spark-qfs-swarm/worker-node/spark-conf/spark-defaults.conf @@ -14,7 +14,7 @@ spark.driver.memory 10g spark.driver.memoryOverhead 3g spark.driver.cores 2 spark.driver.extraJavaOptions -XX:+UseG1GC - +spark.driver.maxResultSize 2g # operational configurations spark.logConf true