1.1.0, download_toolkit and configure_connection added

markheger · markheger · commit 886b4ef1a4a1 · 2019-09-03T13:49:25.000Z
diff --git a/package/docs/source/conf.py b/package/docs/source/conf.py
@@ -65,9 +65,9 @@
 # built documents.
 #
 # The short X.Y version.
-version = '1.0'
+version = '1.1'
 # The full version, including alpha/beta/rc tags.
-release = '1.0.2'
+release = '1.1.0'
 
 # The language for content autogenerated by Sphinx. Refer to documentation
 # for a list of supported languages.
diff --git a/package/setup.py b/package/setup.py
@@ -19,7 +19,7 @@
     'Programming Language :: Python :: 3.5',
     'Programming Language :: Python :: 3.6',
   ],
-  install_requires=['streamsx'],
+  install_requires=['streamsx', 'streamsx.toolkits'],
   
   test_suite='nose.collector',
   tests_require=['nose']
diff --git a/package/streamsx/hdfs/__init__.py b/package/streamsx/hdfs/__init__.py
@@ -22,13 +22,9 @@
 The mandatory JSON elements are "user", "password" and "webhdfs"::
 
     {
-        "cluster": {
-            "password": "<PASSWORD>",
-            "service_endpoints": {
-                "webhdfs": "https://<HOST>:<PORT>/gateway/default/webhdfs/v1/"
-            },
-            "user": "<USER>"
-        },
+        "user": "<USER>"
+        "password": "<PASSWORD>",
+        "webhdfs": "https://<HOST>:<PORT>"
     }
 
 If you are using HDFS server(s) different to the "Analytics Engine" service, 
@@ -69,7 +65,7 @@
 
 """
 
-__version__='1.0.2'
+__version__='1.1.0'
 
-__all__ = ['scan', 'read', 'write']
-from streamsx.hdfs._hdfs import scan, read, write
+__all__ = ['download_toolkit', 'configure_connection', 'scan', 'read', 'write']
+from streamsx.hdfs._hdfs import download_toolkit, configure_connection, scan, read, write
diff --git a/package/streamsx/hdfs/_hdfs.py b/package/streamsx/hdfs/_hdfs.py
@@ -4,20 +4,30 @@
 
 import datetime
 import os
+import json
 from tempfile import gettempdir
 import streamsx.spl.op
 import streamsx.spl.types
 from streamsx.topology.schema import CommonSchema, StreamSchema
 from streamsx.spl.types import rstring
 from urllib.parse import urlparse
+from streamsx.toolkits import download_toolkit
 
+_TOOLKIT_NAME = 'com.ibm.streamsx.hdfs'
 
 FileInfoSchema = StreamSchema('tuple<rstring fileName, uint64 fileSize>')
 """Structured schema of the file write response tuple. This schema is the output schema of the write method.
 
 ``'tuple<rstring fileName, uint64 fileSize>'``
 """
 
+
+def _add_toolkit_dependency(topo, version):
+    # IMPORTANT: Dependency of this python wrapper to a specific toolkit version
+    # This is important when toolkit is not set with streamsx.spl.toolkit.add_toolkit (selecting toolkit from remote build service)
+    streamsx.spl.toolkit.add_toolkit_dependency(topo, _TOOLKIT_NAME, version)
+
+
 def _read_ae_service_credentials(credentials):
     hdfs_uri = ""
     user = ""
@@ -29,7 +39,12 @@ def _read_ae_service_credentials(credentials):
             password = credentials.get('cluster').get('password')
             hdfs_uri = credentials.get('cluster').get('service_endpoints').get('webhdfs')
         else:
-            raise ValueError(credentials)
+            if 'webhdfs' in credentials:
+                user = credentials.get('user')
+                password = credentials.get('password')
+                hdfs_uri = credentials.get('webhdfs')
+            else:
+                raise ValueError(credentials)
     else:
         raise TypeError(credentials)
     # construct expected format for hdfs_uri: webhdfs://host:port
@@ -48,6 +63,88 @@ def _check_time_param(time_value, parameter_name):
         raise ValueError("Invalid "+parameter_name+" value. Value must be at least one second.")
     return result
 
+def configure_connection (instance, name = 'hdfs', credentials = None):
+    """Configures IBM Streams for a certain connection.
+
+
+    Creates or updates an application configuration object containing the required properties with connection information.
+
+
+    Example for creating a configuration for a Streams instance with connection details::
+
+        from streamsx.rest import Instance
+        import streamsx.topology.context
+        from icpd_core import icpd_util
+        import streamsx.hdfs as hdfs
+        
+        cfg = icpd_util.get_service_instance_details (name='your-streams-instance')
+        cfg[context.ConfigParams.SSL_VERIFY] = False
+        instance = Instance.of_service (cfg)
+        app_cfg = hdfs.configure_connection (instance, credentials = 'my_credentials_json')
+
+    Args:
+        instance(streamsx.rest_primitives.Instance): IBM Streams instance object.
+        name(str): Name of the application configuration, default name is 'hdfs'.
+        credentials(str|dict): The service credentials, for example Analytics Engine service credentials.
+    Returns:
+        Name of the application configuration.
+    """
+
+    description = 'HDFS credentials'
+    properties = {}
+    if credentials is None:
+        raise TypeError (credentials)
+    
+    if isinstance (credentials, dict):
+        properties ['credentials'] = json.dumps (credentials)
+    else:
+        properties ['credentials'] = credentials
+    
+    # check if application configuration exists
+    app_config = instance.get_application_configurations (name = name)
+    if app_config:
+        print ('update application configuration: ' + name)
+        app_config[0].update (properties)
+    else:
+        print ('create application configuration: ' + name)
+        instance.create_application_configuration (name, properties, description)
+    return name
+
+
+def download_toolkit(url=None, target_dir=None):
+    r"""Downloads the latest HDFS toolkit from GitHub.
+
+    Example for updating the HDFS toolkit for your topology with the latest toolkit from GitHub::
+
+        import streamsx.hdfs as hdfs
+        # download HDFS toolkit from GitHub
+        hdfs_toolkit_location = hdfs.download_toolkit()
+        # add the toolkit to topology
+        streamsx.spl.toolkit.add_toolkit(topology, hdfs_toolkit_location)
+
+    Example for updating the topology with a specific version of the HDFS toolkit using a URL::
+
+        import streamsx.hdfs as hdfs
+        url500 = 'https://github.com/IBMStreams/streamsx.hdfs/releases/download/v5.0.0/streamx.hdfs.toolkits-5.0.0-20190902-1637.tgz'
+        hdfs_toolkit_location = hdfs.download_toolkit(url=url500)
+        streamsx.spl.toolkit.add_toolkit(topology, hdfs_toolkit_location)
+
+    Args:
+        url(str): Link to toolkit archive (\*.tgz) to be downloaded. Use this parameter to 
+            download a specific version of the toolkit.
+        target_dir(str): the directory where the toolkit is unpacked to. If a relative path is given,
+            the path is appended to the system temporary directory, for example to /tmp on Unix/Linux systems.
+            If target_dir is ``None`` a location relative to the system temporary directory is chosen.
+
+    Returns:
+        str: the location of the downloaded HDFS toolkit
+
+    .. note:: This function requires an outgoing Internet connection
+    .. versionadded:: 1.1
+    """
+    _toolkit_location = streamsx.toolkits.download_toolkit (toolkit_name=_TOOLKIT_NAME, url=url, target_dir=target_dir)
+    return _toolkit_location
+
 
 def scan(topology, credentials, directory, pattern=None, init_delay=None, schema=CommonSchema.String, name=None):
     """Scans a Hadoop Distributed File System directory for new or modified files.
diff --git a/requirements.txt b/requirements.txt
@@ -1 +1,2 @@
 streamsx
+streamsx.toolkits