From d62f9365905b55f160957a8dc718bc33dacc2299 Mon Sep 17 00:00:00 2001 From: Maria Karanasou Date: Tue, 18 Aug 2020 20:30:23 +0300 Subject: [PATCH 1/4] adding auth options and auth_secret --- src/baskerville/spark/__init__.py | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/src/baskerville/spark/__init__.py b/src/baskerville/spark/__init__.py index 0396cdae..f4b09451 100644 --- a/src/baskerville/spark/__init__.py +++ b/src/baskerville/spark/__init__.py @@ -10,8 +10,10 @@ from pyspark import SparkConf, StorageLevel from pyspark.sql import SparkSession +from baskerville.models.config import SparkConfig -def get_or_create_spark_session(spark_conf): + +def get_or_create_spark_session(spark_conf: SparkConfig): """ Returns a configured spark session :param SparkConfig spark_conf: the spark configuration @@ -145,6 +147,24 @@ def get_or_create_spark_session(spark_conf): conf.set('spark.sql.shuffle.partitions', spark_conf.shuffle_partitions) conf.set('spark.sql.autoBroadcastJoinThreshold', 1024*1024*100) # 100MB + # security + # https://spark.apache.org/docs/latest/security.html + # note that: The same secret is shared by all Spark applications and + # daemons in that case, which limits the security of these deployments, + # especially on multi-tenant clusters. + conf.set('spark.authenticate', 'true') + conf.set('spark.authenticate.secret', spark_conf.auth_secret) + + # encryption + conf.set('spark.network.crypto.enabled', 'true') + conf.set('spark.io.encryption.enabled', 'true') + # conf.set('spark.ui.filters', 'org.apache.spark.examples.BasicAuthFilter') + + # The REST Submission Server and the MesosClusterDispatcher do not support + # authentication. You should ensure that all network access to the REST API + # & MesosClusterDispatcher (port 6066 and 7077 respectively by default) are + # restricted to hosts that are trusted to submit jobs. + spark = SparkSession.builder \ .config(conf=conf) \ .appName(spark_conf.app_name) \ From ddb307dc008b1ee2b663e1b50fb033f617efd8db Mon Sep 17 00:00:00 2001 From: Maria Karanasou Date: Tue, 18 Aug 2020 20:30:50 +0300 Subject: [PATCH 2/4] auth_secret --- src/baskerville/models/config.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/baskerville/models/config.py b/src/baskerville/models/config.py index 86a1b615..7ae3fb7e 100644 --- a/src/baskerville/models/config.py +++ b/src/baskerville/models/config.py @@ -712,6 +712,7 @@ class KafkaConfig(Config): ssl_cafile = '' ssl_certfile = '' ssl_keyfile = '' + auth_secret = 'TEST_SECRET' def __init__(self, config): super(KafkaConfig, self).__init__(config) From 309b81e13bf09178e63e62dcda4727eaa5024bf6 Mon Sep 17 00:00:00 2001 From: Maria Karanasou Date: Thu, 20 Aug 2020 17:58:07 +0300 Subject: [PATCH 3/4] Spark UI SSL --- conf/conf_example_baskerville.yaml | 7 +++++++ data/scripts/ssl_for_sparkui.sh | 24 ++++++++++++++++++++++ src/baskerville/models/config.py | 33 ++++++++++++++++++++++++++++-- src/baskerville/spark/__init__.py | 21 +++++++++++++++++-- 4 files changed, 81 insertions(+), 4 deletions(-) create mode 100644 data/scripts/ssl_for_sparkui.sh diff --git a/conf/conf_example_baskerville.yaml b/conf/conf_example_baskerville.yaml index b54a848a..3275b0d5 100644 --- a/conf/conf_example_baskerville.yaml +++ b/conf/conf_example_baskerville.yaml @@ -134,6 +134,13 @@ spark: kryoserializer_buffer: '1024k' # It is suggested that you omit setting kryoserializer_buffer_max and kryoserializer_buffer and only set them if you get serialization errors. driver_java_options: '-verbose:gc' # Optional. When on a local machine with less than 36GB of ram -XX:+UseCompressedOops executor_extra_java_options: '-verbose:gc' # Optional. When on a local machine with less than 36GB of ram -XX:+UseCompressedOops + auth_secret: 'TEST_SECRET' # Optional. For RPC auth in cluster set up + ssl_enabled: True # Optional. Sets SSL for the spark ui - all following configuration must be provided -- to generate cert and import use ssl_for_sparkui.sh under data/scripts + ssl_truststore: '/path/to/truststore' + ssl_truststore_password: 'examplestorepass' + ssl_keystore: '/path/to/keystore' + ssl_keystore_password: 'examplestorepass' + ssl_keypassword: 'examplekeypass' # to connect to the jvm for memory profiling and deugging (remove the -Dcom.sun.management.jmxremote.port=1098 if more than one executors because it will cause the other executors to fail): # -XX:+PrintFlagsFinal -XX:+PrintReferenceGC -verbose:gc -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -XX:+UnlockDiagnosticVMOptions -Dcom.sun.management.jmxremote -Dcom.sun.management.jmxremote.ssl=false -Dcom.sun.management.jmxremote.authenticate=false -Dcom.sun.management.jmxremote.port=1098 # depending on your configuration and resources: diff --git a/data/scripts/ssl_for_sparkui.sh b/data/scripts/ssl_for_sparkui.sh new file mode 100644 index 00000000..2699cdd2 --- /dev/null +++ b/data/scripts/ssl_for_sparkui.sh @@ -0,0 +1,24 @@ +# source: https://www.ibm.com/support/knowledgecenter/en/SS3H8V_1.1.0/com.ibm.izoda.v1r1.azka100/topics/azkic_t_securingwebUIs.htm + +echo ">>> Setting up SSL for Spark UI..." +echo "Keystore path : " +read -s -r KEYSTORE_PATH +echo "Truststore path : " +read -s -r TRUSTSTORE_PATH +echo "Store password : " +read -s -r STORE_PASS +echo "Key password : " +read -s -r KEY_PASS +echo "L : " +read -s -r L +echo "S : " +read -s -r S +echo "C : " +read -s -r C + +keytool -genkeypair -keystore "$KEYSTORE_PATH/keystore" -keyalg RSA -alias selfsigned -dname "CN=sparkcert L=$L S=$S C=$C" -storepass "$STORE_PASS" -keypass "$KEY_PASS" + +keytool -exportcert -keystore "$KEYSTORE_PATH/keystore" -alias selfsigned -storepass $STORE_PASS -file spark.cer + +# note: do not forget to import cert in all nodes +keytool -importcert -keystore "$TRUSTSTORE_PATH/truststore" -alias selfsigned -storepass $STORE_PASS -file spark.cer -noprompt \ No newline at end of file diff --git a/src/baskerville/models/config.py b/src/baskerville/models/config.py index 7ae3fb7e..4b4e4b69 100644 --- a/src/baskerville/models/config.py +++ b/src/baskerville/models/config.py @@ -712,7 +712,6 @@ class KafkaConfig(Config): ssl_cafile = '' ssl_certfile = '' ssl_keyfile = '' - auth_secret = 'TEST_SECRET' def __init__(self, config): super(KafkaConfig, self).__init__(config) @@ -771,9 +770,26 @@ class SparkConfig(Config): off_heap_size = None redis_host = 'localhost' redis_port = 6379 + auth_secret = 'TEST_SECRET' + admin_acls = 'admin' + driver_port = 18050 + block_manager_port = 18060 + ssl_enabled = 'true' + ssl_truststore = None + ssl_truststore_password = None + ssl_keystore = None + ssl_keystore_password = None + ssl_keypassword = None def __init__(self, config): super(SparkConfig, self).__init__(config) + self._ssl_properties = { + 'ssl_truststore': self.ssl_truststore, + 'ssl_truststore_password': self.ssl_truststore_password, + 'ssl_keystore': self.ssl_keystore, + 'ssl_keystore_password': self.ssl_keystore_password, + 'ssl_keypassword': self.ssl_keypassword + } def validate(self): logger.debug('Validating SparkConfig...') @@ -825,7 +841,20 @@ def validate(self): self.event_log = 'false' else: self.event_log = 'true' - + if not self.ssl_enabled: + self.ssl_enabled = 'false' + else: + self.ssl_enabled = 'true' + for name, prop in enumerate(self._ssl_properties): + if not prop: + self.add_error(ConfigError( + f'No {name} while ssl_enabled is set to "true" ', + [name], + )) + warnings.warn( + 'SSL is enabled, so spark ui will redirect to ' + 'https://localhost:4442' + ) if self.metrics_conf and not self.jar_packages: warnings.warn('Spark metrics configuration has been set but ' 'jar packages is empty, ' diff --git a/src/baskerville/spark/__init__.py b/src/baskerville/spark/__init__.py index f4b09451..e6acbb25 100644 --- a/src/baskerville/spark/__init__.py +++ b/src/baskerville/spark/__init__.py @@ -152,13 +152,30 @@ def get_or_create_spark_session(spark_conf: SparkConfig): # note that: The same secret is shared by all Spark applications and # daemons in that case, which limits the security of these deployments, # especially on multi-tenant clusters. - conf.set('spark.authenticate', 'true') - conf.set('spark.authenticate.secret', spark_conf.auth_secret) + if spark_conf.auth_secret: + conf.set('spark.authenticate', 'true') + conf.set('spark.authenticate.secret', spark_conf.auth_secret) # encryption conf.set('spark.network.crypto.enabled', 'true') conf.set('spark.io.encryption.enabled', 'true') + # https://www.fortytools.com/blog/servlet-filter-for-http-basic-auth # conf.set('spark.ui.filters', 'org.apache.spark.examples.BasicAuthFilter') + # conf.set('spark.acls.enable', 'true') + # conf.set('spark.admin.acls', spark_conf.admin_acls) + + # SSL https://spark.apache.org/docs/latest/security.html#ssl-configuration + if spark_conf.ssl_enabled == 'true': + conf.set('spark.ssl.enabled', spark_conf.ssl_enabled) + conf.set('spark.ssl.trustStore', spark_conf.ssl_truststore) + conf.set('spark.ssl.trustStorePassword', spark_conf.ssl_truststore_password) + conf.set('spark.ssl.keyStore', spark_conf.ssl_keystore) + conf.set('spark.ssl.keyStorePassword', spark_conf.ssl_keystore_password) + conf.set('spark.ssl.keyPassword', spark_conf.ssl_keypassword) + conf.set('spark.ssl.protocol', 'TLSv1.2') + + # conf.set('spark.driver.port', spark_conf.driver_port) + # conf.set('spark.blockManager.port', spark_conf.block_manager_port) # The REST Submission Server and the MesosClusterDispatcher do not support # authentication. You should ensure that all network access to the REST API From c8b4d3d75a363a6f2d681ba8cea0569941abd10b Mon Sep 17 00:00:00 2001 From: Maria Karanasou Date: Fri, 21 Aug 2020 09:31:38 +0300 Subject: [PATCH 4/4] Basic security jar with only one user - admin --- data/jars/baskervilleSecurityFilter.jar | Bin 0 -> 2867 bytes src/baskerville/models/config.py | 2 +- src/baskerville/spark/__init__.py | 2 +- 3 files changed, 2 insertions(+), 2 deletions(-) create mode 100644 data/jars/baskervilleSecurityFilter.jar diff --git a/data/jars/baskervilleSecurityFilter.jar b/data/jars/baskervilleSecurityFilter.jar new file mode 100644 index 0000000000000000000000000000000000000000..08efb2cd6ff54143508b188faf3956f7b598c6e1 GIT binary patch literal 2867 zcma);c~lb09>>WYEmIRkbIVec6f-qRZP5~N0bDRmO(op1OwkmVG8ZmcQE8;iHJ{30Mg3V?RR*_%3H5N6BI0RTyWn~SX6nnU70Hn;y7R$X=c8AjVNz&)x{L{dV`B8w6lX>>#?K9;{9+e%o%Osgnk|WyA;r8OoOvo6ATTXS2r*mt$@Nu z)j=QC4Xy!tiJxP|vFrZbMGmBO<>2ZYj;r&pE|UK71?cMOQ9Owl5_>WtDk{W`6herP zjff+gIp9eV1pD~7a708@Tu3a85QQg^Zh8ZKaBnpBY^0E*DZjli`gF@3hm$%Z0OzFn zu5*uA$4WyuMe0~!6K(Dn+@F&%IF`#Gwiuujq6%*oq6>HRj_FmSqq-}c-fwex>p{?Y4xKHaH-Hrzd(@QfC=_`wK1LRL_2R2Ij+|i?=6R%RdW!=ubp^nsR20#|x z5SvV?*6%N94pfwuDn;}BETdk(Qso>KR*7f;??FvYQDR|s-{MOq$5t{5W$6yky%AgVy2bAWZiVwP3cg34LK< zxsolf7U*1^o~At2I0>tw(fWbZ^fE<0RV&I$WhX2)O<1~#K=B7%-1H#aj*YdS`zUAA zwS@V?s~As;_hh#Kp%xIcu=|r1CVmKAfpu~FqgF8;gwV2xd{-R88LP{A&pcdWR$49U zJc!e?3}mnZaAKE_WBTpihP{o((b}* z+C&JxwjwKcK5QlNeYwss!tujZG{b3j(EM&3u!Dw z^u+mGgtnH5fFLXbt5mF2(|V%}>FygN%GieQQcD&oz9wAXh3OY?t)N|Zz86*zzjjSz zM-Wwh)@``#!^qFj7FTRC2X3kFD8f;_9GEn=fB~@4e07VUp!ux+BxJfkxeJNR&Chp# zc(cCwSfDquyR%`T&kz*YBQ~%@FO6q?U!W<1_+#*M%=i=yJ~ThK921`+9q1*t(hh95 zR;{ETT1HKdb2~q?`6ba6>{QPtaBe)&C3xwgz8eXqkd{jEmg%-izLOizJjj&$<}4%zOiB`>%7)`NH3V_bPb ziut)Fa)T@S^yY$fGXoFq)tpKxNb9_D-u^&Wwj=kk_qI~#<-B&@u+4JzLuCoYWthZ@ z>@w?v-7+_4?7ExE&F*GB+@fFm=@S?($p-eGtX=57Z_5_jB*ja`!Esf&>A&GRB_Rqd zZk`QC)&Jx($?XSNblD!GnS$FR+J|LopIMG2F`7;!qxp5RsF@nA;Tscu0q}1{2nF1m zj^v>OeG^9 zUj<$J6tf|}Qt9jy0V=xTn1k8}bY#~BZaJg&`?b)_@wRkUU;h1DTE_W#FPPIZ z(l0*Nq3yP&Qa;FQk}29&^l-w0?UZ_4{P8~$z6tzbe(p`A8$9p4NN%es9~sjA z$=pxHW~3EP#teH8#gIrCPO8>d1?}d%rSVKxdnpF~%gUUZ%H=Dgv-^mQTK-vG6*CLt zNoy5g_eiP7Znng8ts|n{@42=+gQSEppQ~Ey#3nYRJnffS)tMHJZha9Zz;>vfv2Eq@ z!H%e0Y>UY$bjoAqxyU$wtN6s`fE6F!ph98E`>a`fDct7A7h|k&x@ItEyfFBV2W@U& z|I}jNbMjv49S>8$&SayRZJ$FXwr>Q{GgtD3f{tAWdmp(rkmhSE!L>Vd&C(MOKAlQ5 z_Z4i%33%EL-=sg5r=^CW_DHj>RD=2t1t`nYN_K2)b8|%T&!>V)^>$2@X)hw-wqSGRnFPEANO3_RZJdvTq%?Cl48od zF!n=?dpOkQe7-$492pmM;XHm*f)`%qq>`SlfkSw^OYr@of+kKlWZ21mda}Y_haH7f zNB$6Sza4&hT5&UOa^!SIHgPm)V_2%NI`77D|2%|2wDf2uGi-aO9&W-fS%ws;(AN4& zsH|#{a$2wg*MVOkC$}x*c~z8NRkiha)YlG*N`RgoNVAy?%l41ruwA>Ayr&K7<}Tlx z%WZ_*ipi&IZ^^u9z6E-x_QYj(7i%tQ@`KU-siH-2#gyD= z|M=mw&k1^MUKf^ySw^mX(z!dYP1k;9dVBmdSqL&GAB_XZY82^W^(m(eJ=)9fDnnk5 zeo$5E8O;k8NKKJt+$%%~P>OICS;5q29o?_;(e1jSa|^+#7&&W)>l&FQs}6%upXMV- zZaJfj>PYq_10Fx|$inbJc~Qq=?+k*yZz za|l|@w>|UD5i^GtQNFkn8-cS2q?Q%dP;mCjh@&YG063|#24U+YQ~`e{$^T;F8cF_L zf1=?USN=2lE8DGc