Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 12 additions & 1 deletion regression-tests/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,14 @@
teamcity-messages
pandas
numpy
scipy
scipy
statsmodels
pydicom
pytest-cov
pytest-xdist
glob2
sklearn
lxml
networkx
requests
psutil
30 changes: 27 additions & 3 deletions regression-tests/sparktkregtests/lib/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,21 +16,45 @@
#

""" Global Config file for testcases, used heavily by automation"""
import os
import os, random
import psutil

def find_open_port(bottom=10000, top=65535):
bottom = int(bottom)
top = int(top)

start = random.randint(bottom,top)
increment = random.randint(1,((top-bottom)/2))

ports = []
for i in psutil.net_connections(kind='inet4'):
ports.insert(-1, i.laddr[1])

next_port=start
found_port=0
while found_port == 0 and next_port >= bottom and next_port <= top:
if next_port in ports:
next_port = next_port + increment
else:
found_port = next_port

return found_port

qa_root = os.path.dirname(os.path.dirname(os.path.dirname(os.path.realpath(__file__))))
dataset_directory = os.path.join(qa_root, "datasets")
hdfs_namenode = os.getenv("CDH_MASTER", "localhost")
user = os.getenv("USER", "hadoop")
run_mode = True if os.getenv("RUN_MODE", "yarn_client") == "yarn_client" else False
hostname = os.getenv("HOSTNAME")
run_mode = True if os.getenv("RUN_MODE","yarn_client") == "yarn_client" else False
hostname = os.getenv("HOSTNAME", "127.0.0.1")


# HDFS paths, need to be set NOT using os.join since HDFS doesn't use the system
# path seperator, it uses HDFS path seperator ('/')
hdfs_user_root = "/user/" + user
hdfs_data_dir = hdfs_user_root + "/qa_data"
checkpoint_dir = hdfs_user_root + "/sparktk_checkpoint"

export_dir = "hdfs://"+hostname+":8020"+hdfs_user_root+"/sparktk_export"

scoring_engine_host = os.getenv("SCORING_ENGINE_HOST", "127.0.0.1")

25 changes: 21 additions & 4 deletions regression-tests/sparktkregtests/lib/sparktk_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,25 +29,42 @@
lock = Lock()
global_tc = None


def get_context():
global global_tc
with lock:
if global_tc is None:
sparktkconf_dict = {'spark.driver.maxPermSize': '512m',
'spark.ui.enabled': 'false',
'spark.driver.maxResultSize': '2g',
'spark.ui.enabled': 'false',
'spark.port.maxRetries': 50,
'spark.dynamicAllocation.enabled': 'true',
'spark.dynamicAllocation.maxExecutors': '16',
'spark.dynamicAllocation.maxExecutors': '24',
'spark.dynamicAllocation.minExecutors': '1',
'spark.executor.cores': '10',
'spark.executor.memory': '2g',
'spark.executor.memory': '3712m',
'spark.shuffle.io.preferDirectBufs': 'true',
'spark.shuffle.service.enabled': 'true',
'spark.yarn.am.waitTime': '1000000',
'spark.yarn.submit.file.replication': 1,
'spark.yarn.executor.memoryOverhead': '384',
'spark.eventLog.enabled': 'false',
'spark.sql.shuffle.partitions': '6'}

if 'SPARK_DRIVER_EXTRAJAVAOPTIONS' in os.environ:
sparktkconf_dict['spark.driver.extraJavaOptions'] = ' ' + os.environ['SPARK_DRIVER_EXTRAJAVAOPTIONS']

if 'SPARK_DRIVER_EXTRAJAVAOPTIONS' in os.environ:
sparktkconf_dict['spark.executor.extraJavaOptions'] = ' ' + os.environ['SPARK_DRIVER_EXTRAJAVAOPTIONS']

if 'SPARK_PORT_BOTTOM' in os.environ and 'SPARK_PORT_TOP' in os.environ:
sparktkconf_dict['spark.driver.port'] = config.find_open_port(os.environ['SPARK_PORT_BOTTOM'], os.environ['SPARK_PORT_TOP'])

if 'SPARK_PORT_BOTTOM' in os.environ and 'SPARK_PORT_TOP' in os.environ:
sparktkconf_dict['spark.fileserver.port'] = config.find_open_port(os.environ['SPARK_PORT_BOTTOM'], os.environ['SPARK_PORT_TOP'])

if 'SPARK_PORT_BOTTOM' in os.environ and 'SPARK_PORT_TOP' in os.environ:
sparktkconf_dict['spark.ui.port'] = config.find_open_port(os.environ['SPARK_PORT_BOTTOM'], os.environ['SPARK_PORT_TOP'])

if config.run_mode:
global_tc = stk.TkContext(master='yarn-client', extra_conf_dict=sparktkconf_dict)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ def setUp(self):

self.frame = self.context.frame.import_csv(data, schema=self.schema)

@unittest.skip("Sparktk: throws assertion error")
def test_doc_example(self):
""" Example from the API documentation """
data = [[1,2,1.0],
Expand Down Expand Up @@ -61,6 +62,7 @@ def test_doc_example(self):
expected_assignment = [[4, 5, 6], [1, 2, 3], [0]]
self.assertEqual(sorted(map(sorted, grouped_assignment)), sorted(map(sorted, expected_assignment)))

@unittest.skip("Sparktk: throws assertion error")
def test_circles_default(self):
""" Test pic on similarity matrix for two concentric cicles """
result = self.frame.power_iteration_clustering(
Expand Down
18 changes: 11 additions & 7 deletions sparktk-core/install.sh
Original file line number Diff line number Diff line change
Expand Up @@ -25,15 +25,19 @@
EXIT=0
SPARK_VERSION=1.6.0

if [ -f $SPARK_HOME ]; then
echo "Your SPARK_HOME variable isn't set. Please set SPARK_HOME to the root of your spark installation."
EXIT=1
if [ ! -f $SPARK_VERSION_SKIP ]; then
echo "Skip spark version check"
else
echo "Verifying Spark version"
SPARK_VERSION=$SPARK_VERSION bash -c " $SPARK_HOME/bin/spark-shell --conf spark.master=local -i version.scala 2> /dev/null"
if [ $? -ne 0 ]; then
echo "SPARK version mismatch. This version of sparktk requires $SPARK_VERSION."
if [ -f $SPARK_HOME ]; then
echo "Your SPARK_HOME variable isn't set. Please set SPARK_HOME to the root of your spark installation."
EXIT=1
else
echo "Verifying Spark version"
SPARK_VERSION=$SPARK_VERSION bash -c " $SPARK_HOME/bin/spark-shell --conf spark.master=local -i version.scala 2> /dev/null"
if [ $? -ne 0 ]; then
echo "SPARK version mismatch. This version of sparktk requires $SPARK_VERSION."
EXIT=1
fi
fi
fi

Expand Down