Bumping version to 0.0.15

igorborgest · igorborgest · commit ef02cee5a773 · 2019-10-28T16:26:21.000-03:00
diff --git a/README.md b/README.md
@@ -2,7 +2,7 @@
 
 > Utility belt to handle data on AWS.
 
-[![Release](https://img.shields.io/badge/release-0.0.14-brightgreen.svg)](https://pypi.org/project/awswrangler/)
+[![Release](https://img.shields.io/badge/release-0.0.15-brightgreen.svg)](https://pypi.org/project/awswrangler/)
 [![Downloads](https://img.shields.io/pypi/dm/awswrangler.svg)](https://pypi.org/project/awswrangler/)
 [![Python Version](https://img.shields.io/badge/python-3.6%20%7C%203.7-brightgreen.svg)](https://pypi.org/project/awswrangler/)
 [![Documentation Status](https://readthedocs.org/projects/aws-data-wrangler/badge/?version=latest)](https://aws-data-wrangler.readthedocs.io/en/latest/?badge=latest)
@@ -280,7 +280,7 @@ cluster_id = session.emr.create_cluster(
     applications=["Hadoop", "Spark", "Ganglia", "Hive"],
     visible_to_all_users=True,
     key_pair_name=None,
-    spark_jars_path=f"s3://...jar",
+    spark_jars_path=[f"s3://...jar"],
     maximize_resource_allocation=True,
     keep_cluster_alive_when_no_steps=True,
     termination_protected=False
diff --git a/awswrangler/__version__.py b/awswrangler/__version__.py
@@ -1,4 +1,4 @@
 __title__ = "awswrangler"
 __description__ = "Utility belt to handle data on AWS."
-__version__ = "0.0.14"
+__version__ = "0.0.15"
 __license__ = "Apache License 2.0"
diff --git a/awswrangler/emr.py b/awswrangler/emr.py
@@ -111,10 +111,10 @@ def _build_cluster_args(**pars):
                 "Properties": {}
             }
             if pars["spark_jars_path"] is not None:
-                spark_defaults["Properties"]["spark.jars"] = pars["spark_jars_path"]
+                spark_defaults["Properties"]["spark.jars"]: str = ",".join(pars["spark_jars_path"])
             if pars["spark_defaults"] is not None:
                 for k, v in pars["spark_defaults"].items():
-                    spark_defaults["Properties"][k] = v
+                    spark_defaults["Properties"][k]: str = v
             args["Configurations"].append(spark_defaults)
 
         # Applications
@@ -313,7 +313,7 @@ def create_cluster(self,
                        security_groups_slave_additional: Optional[List[str]] = None,
                        security_group_service_access: Optional[str] = None,
                        spark_log_level: str = "WARN",
-                       spark_jars_path: Optional[str] = None,
+                       spark_jars_path: Optional[List[str]] = None,
                        spark_defaults: Dict[str, str] = None,
                        maximize_resource_allocation: bool = False,
                        steps: Optional[List[Dict[str, Collection[str]]]] = None,
@@ -364,7 +364,7 @@ def create_cluster(self,
         :param security_groups_slave_additional: A list of additional Amazon EC2 security group IDs for the core and task nodes.
         :param security_group_service_access: The identifier of the Amazon EC2 security group for the Amazon EMR service to access clusters in VPC private subnets.
         :param spark_log_level: log4j.rootCategory log level (ALL, DEBUG, INFO, WARN, ERROR, FATAL, OFF, TRACE)
-        :param spark_jars_path: spark.jars (https://spark.apache.org/docs/latest/configuration.html) (e.g. s3://...)
+        :param spark_jars_path: spark.jars (e.g. [s3://.../foo.jar, s3://.../boo.jar]) (https://spark.apache.org/docs/latest/configuration.html)
         :param spark_defaults: (https://docs.aws.amazon.com/emr/latest/ReleaseGuide/emr-spark-configure.html#spark-defaults)
         :param maximize_resource_allocation: Configure your executors to utilize the maximum resources possible (https://docs.aws.amazon.com/emr/latest/ReleaseGuide/emr-spark-configure.html#emr-spark-maximizeresourceallocation)
         :param steps: Steps definitions (Obs: Use EMR.build_step() to build that)
diff --git a/docs/source/examples.rst b/docs/source/examples.rst
@@ -241,7 +241,7 @@ Create EMR cluster
         applications=["Hadoop", "Spark", "Ganglia", "Hive"],
         visible_to_all_users=True,
         key_pair_name=None,
-        spark_jars_path=f"s3://...jar",
+        spark_jars_path=[f"s3://...jar"],
         maximize_resource_allocation=True,
         keep_cluster_alive_when_no_steps=True,
         termination_protected=False
diff --git a/testing/test_awswrangler/test_emr.py b/testing/test_awswrangler/test_emr.py
@@ -129,7 +129,7 @@ def test_cluster_single_node(session, bucket, cloudformation_outputs):
                                             visible_to_all_users=True,
                                             key_pair_name=None,
                                             spark_log_level="ERROR",
-                                            spark_jars_path=f"s3://{bucket}/jars/",
+                                            spark_jars_path=[f"s3://{bucket}/jars/"],
                                             spark_defaults={"spark.default.parallelism": "400"},
                                             maximize_resource_allocation=True,
                                             keep_cluster_alive_when_no_steps=False,