diff --git a/README.md b/README.md index b4f551b..c8c21d1 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # Hadoop and Yarn Setup -## 1. set passwordless login +### Set passwordless login To create user ``` @@ -20,289 +20,78 @@ For other hosts ssh-copy-id -i ~/.ssh/id_rsa.pub user@host ssh user@host ``` -## 2. Download and install hadoop -http://hadoop.apache.org/releases.html#Download +### Pre-requisities: +1. JAVA Setup should be completed and JAVA_HOME should be set in the ~/.bashrc file (environment variable). +2. Make sure the nodes are set for password-less SSH both ways(master->slaves). +3. Since we use the environment variables a lot in our scripts, make sure to comment out the portion following this statement in your ~/.bashrc , +`If not running interactively, don't do anything`. Update .bashrc -``` -#Choose the right mirror, below link is for US machines. -wget http://www-us.apache.org/dist/hadoop/common/hadoop-2.7.3/hadoop-2.7.3.tar.gz -tar xf hadoop-2.7.3.tar.gz --gzip -export HADOOP_HOME=$HOME/hadoop-2.7.3 -``` - -## 3. Update slaves file - -Add data nodes, don't add master node. -```bash -vi $HADOOP_HOME/etc/hadoop/slaves -user@host1 -user@host2 -``` - -## 4. Hadoop utils setup -``` -git clone https://github.com/kmadhugit/hadoop-cluster-utils.git -cd hadoop-cluster-utils -vi add-this-to-dot-profile.sh #update correct path to env variables. -. add-this-to-dot-profile.sh -``` - -check whether cluster scripts are working - -``` -AN hostname -``` - -Update .bashrc - - 1. Delete the following check. + Delete/comment the following check. ``` # If not running interactively, don't do anything -case $- in - *i*) ;; - *) return;; -esac + case $- in + *i*) ;; + *) return;; + esac ``` - - 2. Read add-this-to-dot-profile.sh at the end of .bashrc +4. Install curl `sudo apt-get install curl` and install wget `sudo apt-get install wget`. +5. Same username/useraccount should be need on `master` and `slaves` nodes for multinode installation. - ``` - vi $HOME/.bashrc - Gi - :r $HOME/hadoop-cluster-utils/add-this-to-dot-profile.sh - G - set -o vi - ``` - - 3. copy .bashrc to all other data nodes - - ``` - CP $HOME/.bashrc $HOME - ``` - - -## 5. Install Hadoop on all nodes -``` -CP $HOME/hadoop-2.7.3.tar.gz $HOME -DN "tar xf hadoop-2.7.3.tar.gz --gzip" -``` - -## 6. HDFS configuration - -You need to modify 2 config files for HDFS +### Installations: -1. core-site.xml #Modify the Hostname for the Name node - ``` - cd $HOME/hadoop-cluster-utils/conf - cp core-site.xml.template core-site.xml - vi core-site.xml - cp core-site.xml $HADOOP_HOME/etc/hadoop - CP core-site.xml $HADOOP_HOME/etc/hadoop - ``` - -2. hdfs-site.xml +* To automate hadoop installation follows the steps, - create local dir in name node for meta-data ( + ```bash + git clone https://github.com/kmadhugit/hadoop-cluster-utils.git - ``` mkdir -p /data/user/hdfs-meta-data ``` + cd hadoop-cluster-utils + ``` - create local dir in all data-nodes for hdfs-data +* Configuration + + 1. To configure `hadoop-cluster-utils`, run `./autogen.sh` which will create `config.sh` with appropriate field values. + 2. User can enter SLAVEIPs (if more than one, use comma seperated) interactively while running `./autogen.sh` file. + 3. Default `Spark-2.0.1` and `Hadoop-2.7.1` version available for installation. + 4. User can edit default port values, `spark` and `hadoop` versions in config.sh + 5. Before executing `./setup.sh` file, user can verify or edit `config.sh` + 6. Once setup script completed,source `~/.bashrc` file. + +* Ensure that the following java process is running in master. If not, check the log files - ``` DN "mkdir -p /data/user/hdfs-data" ``` - - update dir path - ``` - cd $HOME/hadoop-cluster-utils/conf - cp hdfs-site.xml.template hdfs-site.xml - vi hdfs-site.xml #update dir path + ```bash + checkall.sh ``` - Copy the files to all nodes - ``` - cp hdfs-site.xml $HADOOP_HOME/etc/hadoop - CP hdfs-site.xml $HADOOP_HOME/etc/hadoop - ``` - -3. Start HDFS as fresh FS - - ``` -$HADOOP_PREFIX/bin/hdfs namenode -format mycluster -start-hdfs.sh -AN jps -# use stop-hdfs.sh for stopping - ``` + Invoke `checkall.sh` ensure all services are started on the Master & slaves -4. Start HDFS on existing cluster data - You need to modify ownership to self to use already created data - - ``` - AN "sudo chown user:user /data/hdfs-meta-data" - AN "sudo chown user:user /data/hdfs-data" - start-hdfs.sh - AN jps - ``` - - Ensure that the following java process is running in master. If not, check the log files - ``` NameNode - ``` - Ensure that the following java process is running in slaves. If not, check the log files - ``` - DataNode - ``` - -5. HDFS web address - - ``` - http://localhost:50070 - ``` - -## 7. Yarn configuration - -You need to modify 2 config files for HDFS - -1. capacity-scheduler.xml #Modify resource-calculator property to DominantResourceCalculator - - ```bash - vi $HADOOP_HOME/etc/hadoop/capacity-scheduler.xml - ``` - ```xml - - yarn.scheduler.capacity.resource-calculator - org.apache.hadoop.yarn.util.resource.DominantResourceCalculator - - ``` -2. yarn-site.xml # Modify the properties as per the description provided in the template - - ``` - cd $HOME/hadoop-cluster-utils/conf - cp yarn-site.xml.template yarn-site.xml - vi yarn-site.xml - cp yarn-site.xml $HADOOP_HOME/etc/hadoop - CP yarn-site.xml $HADOOP_HOME/etc/hadoop - AN jps - ``` - - Ensure that the following java process is started in master. If not, check the log files - - ``` JobHistoryServer ResourceManager ``` - Ensure that the following java process is started in slaves. If not, check the log files + Ensure that the following java process is running in slaves. If not, check the hadoop log files ``` + DataNode NodeManager ``` - -3. Start Yarn - ``` - start-yarn.sh - AN jps - ``` - -3. Resource Manager and Node Manager web Address - ``` - Resource Manager : http://localhost:8088/cluster - Node Manager : http://datanode:8042/node (For each node) - ``` - -## 8. Useful scripts - - ``` - > stop-all.sh #stop HDFS and Yarn - > start-all.sh #start HDFS and Yarn - > CP #Copy file from name nodes to all slaves - > AN #execute a given command in all nodes including master - > DN #execute a given command in all nodes excluding master - ``` - -## 9. Spark Installation. - -### a. Download Binary - -``` -http://spark.apache.org/downloads.html -#Choose the right mirror, below link is for US machines. -wget http://www-us.apache.org/dist/spark/spark-2.0.1/spark-2.0.1-bin-hadoop2.7.tgz -tar -zvf spark-2.0.1-bin-hadoop2.7.tgz -``` - -### b. Build it yourself - -``` -git clone https://github.com/apache/spark.git -git checkout -b v2.0.1 v2.0.1 -export MAVEN_OPTS="-Xmx32G -XX:MaxPermSize=8G -XX:ReservedCodeCacheSize=2G" -./build/mvn -T40 -Pyarn -Phadoop-2.7 -Dhadoop.version=2.7.3 -Phive -Phive-thriftserver -DskipTests -Dmaven.javadoc.skip=true install -``` - -### c. Test (pre-built spark version) -``` -#Add in ~/.bashrc -export SPARK_HOME=$HOME/spark-2.0.1-bin-hadoop2.7 - -. ~/.bashrc - -${SPARK_HOME}bin/spark-submit --class org.apache.spark.examples.SparkPi --master yarn-client --driver-memory 1024M --num-executors 2 --executor-memory 1g --executor-cores 1 ${SPARK_HOME}/examples/jars/spark-examples_2.11-2.0.1.jar 10 -``` - -### d. Test (manual spark build) - -``` -#Add in ~/.bashrc -export SPARK_HOME=$HOME/spark - -. ~/.bashrc - -$SPARK_HOME/bin/spark-submit --class org.apache.spark.examples.SparkPi --master yarn-client --driver-memory 1024M --num-executors 2 --executor-memory 1g --executor-cores 1 /home/testuser/spark/examples/target/scala-2.11/jars/spark-examples_2.11-2.0.1.jar - -``` - -### e. Enable EventLogging & additional settings by adding the following content to $SPARK_HOME/conf/spark-defaults.conf -``` -spark.eventLog.enabled true -spark.eventLog.dir /tmp/spark-events -spark.eventLog.compress true -spark.history.fs.logDirectory /tmp/spark-events -spark.serializer org.apache.spark.serializer.KryoSerializer -``` - -### f. Start/Stop All Services. - - The below scripts are used to start/stop the following services in an automated way, - - - namenode daemon (only on hdfs master) - - datanode daemon (on all slave nodes) - - resource manager daemon (only on yarn master) - - node manager daemon (on all slave nodes) - - job history server (only on yarn master) - - Spark history server (on yarn master) - -``` - # Start - start-all.sh +* HDFS, Resource Manager, Node Manager and Spark web Address + + ``` + HDFS web address : http://localhost:50070 + Resource Manager : http://localhost:8088/cluster + Node Manager : http://datanode:8042/node (For each node) + Spark : http://localhost:8080 (Default) + ``` - # Stop +* Useful scripts - stop-all.sh -======= -``` - -## 10. Spark command line options for Yarn Scheduler. - - -| Option | Description | -|--------|-------------| -| --num-executors | Total number of executor JVMs to spawn across Yarn Cluster | -| --executor-cores | Total number of cores in each executor JVM | -| --executor-memory | Memory to be allocated for each JVM 1024M/1G| -| --driver-memory | Memory to be allocated for driver JVM | -| --driver-cores | Total number of vcores for driver JVM | -| | Total vcores = num-executors * executor-vcores + driver-cores | -| | Total Memory = num-executors * executor-memory + driver-memory | -|--driver-java-options | To pass driver JVM, useful in local mode for profiling | - ------------------------------------------------------------------ + ``` + > stop-all.sh #stop HDFS and Yarn + > start-all.sh #start HDFS and Yarn + > CP #Copy file from name nodes to all slaves + > AN #execute a given command in all nodes including master + > DN #execute a given command in all nodes excluding master + > checkall.sh #ensure all services are started on the Master & slaves + ``` diff --git a/add-this-to-dot-profile.sh b/add-this-to-dot-profile.sh deleted file mode 100644 index 71402e2..0000000 --- a/add-this-to-dot-profile.sh +++ /dev/null @@ -1,24 +0,0 @@ - -export PATH=$HOME/hadoop-cluster-utils/utils:$HOME/hadoop-cluster-utils/hadoop:$PATH - -export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-ppc64el -export HADOOP_HOME=$HOME/hadoop-2.7.3 - - -export HADOOP_PREFIX=$HADOOP_HOME -export HADOOP_MAPRED_HOME=$HADOOP_HOME -export HADOOP_COMMON_HOME=$HADOOP_HOME -export HADOOP_HDFS_HOME=$HADOOP_HOME -export YARN_HOME=$HADOOP_HOME -export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop -export YARN_CONF_DIR=$HADOOP_HOME/etc/hadoop - -export PATH=${HADOOP_HOME}/bin:$PATH -set -o vi - -# Some convenient aliases and functions for running Hadoop-related commands -unalias fs &> /dev/null -alias fs="hadoop fs" -unalias hls &> /dev/null -alias hls="fs -ls" - diff --git a/autogen.sh b/autogen.sh new file mode 100755 index 0000000..2519811 --- /dev/null +++ b/autogen.sh @@ -0,0 +1,89 @@ +#!/bin/bash -l + + +# Creating new config.sh +echo -en '# Default hdfs configuration properties\n' > config.sh +echo -en 'HADOOP_TMP_DIR=/tmp/'"${USER}"'/app-hadoop\n' >> config.sh +echo -en 'REPLICATION_VALUE=3\n' >> config.sh +echo -en 'NAMENODE_DIR=/tmp/'"${USER}"'/hdfs-meta\n' >> config.sh +echo -en 'DATANODE_DIR=/tmp/'"${USER}"'/hdfs-data\n\n' >> config.sh + +echo -en '# Master Details\n' >> config.sh +MASTER=`host $HOSTNAME | cut -f4 -d " "` +echo -en 'MASTER='$MASTER'\n\n' >> config.sh + +echo -en 'Please enter slave IP detail in format slave1IP,slave2IP \n' +read SLAVEIP + +echo -en '# Using these format to save SLAVE Details: slave1IP,slave1cpu,slave1memory....\n' >> config.sh +echo -e + +j=0 +for i in `echo $SLAVEIP |tr ',' ' '` +do +echo -en 'Collecting memory details from SLAVE machine '$i' \n' +freememory=$(ssh $i free -m | awk '{print $4}'| head -2 | tail -1) +memorypercent=$(awk "BEGIN { pc=80*$freememory/100; i=int(pc); print (pc-i<0.5)?i:i+1 }") +ncpu=$(ssh $i nproc --all) +if [ $j -eq 0 ] +then +SLAVE=`echo ''$i','$ncpu','$memorypercent''` +else +SLAVE=`echo ''$SLAVE'%'$i','$ncpu','$memorypercent''` +fi +((j=j+1)) +done + +echo -en 'SLAVES='$SLAVE'\n\n' >> config.sh + +echo -en '#Node Manager properties (Default yarn cpu and memory value for all nodes)\n' >> config.sh +echo -en 'YARN_SCHEDULER_MIN_ALLOCATION_MB=128\n' >> config.sh +echo -en 'YARN_SCHEDULER_MIN_ALLOCATION_VCORES=1\n\n' >> config.sh +echo -e +echo -en 'Default Spark version : 2.0.1\n' +sparkver="2.0.1" +echo -en 'Default hadoop version : 2.7.1\n' +hadoopver="2.7.1" + +echo -en '#Hadoop and Spark versions and setup zip download urls\n' >> config.sh +echo -e +echo -en 'sparkver='"$sparkver"'\n' >> config.sh +echo -en 'hadoopver='"$hadoopver"'\n\n' >> config.sh + +HADOOP_URL="http://www-us.apache.org/dist/hadoop/common/hadoop-${hadoopver}/hadoop-${hadoopver}.tar.gz" +SPARK_URL="http://www-us.apache.org/dist/spark/spark-${sparkver}/spark-${sparkver}-bin-hadoop${hadoopver:0:3}.tgz" + +echo -en 'SPARK_URL='$SPARK_URL'\n' >> config.sh +echo -en 'HADOOP_URL='$HADOOP_URL'\n\n' >> config.sh + + +echo -en '# Default port values\n' >> config.sh + +echo -en 'NAMENODE_PORT=9000\n' >> config.sh +echo -en 'NAMENODE_HTTP_ADDRESS=50070\n' >> config.sh +echo -en 'NAMENODE_SECONDARY_HTTP_ADDRESS=50090\n' >> config.sh +echo -en 'NAMENODE_SECONDARY_HTTPS_ADDRESS=50091\n\n' >> config.sh + +echo -en 'DATANODE_ADDRESS=50010\n' >> config.sh +echo -en 'DATANODE_HTTP_ADDRESS=50075\n' >> config.sh +echo -en 'DATANODE_IPC_ADDRESS=50020\n\n' >> config.sh + +echo -en 'MAPREDUCE_JOBHISTORY_ADDRESS=10020\n' >> config.sh +echo -en 'MAPREDUCE_JOBHISTORY_ADMIN_ADDRESS=10039\n' >> config.sh +echo -en 'MAPREDUCE_JOBHISTORY_WEBAPP_ADDRESS=19883\n\n' >> config.sh + +echo -en 'RESOURCEMANAGER_SCHEDULER_ADDRESS=8034\n' >> config.sh +echo -en 'RESOURCEMANAGER_RESOURCE_TRACKER_ADDRESS=8039\n' >> config.sh +echo -en 'RESOURCEMANAGER_ADDRESS=8038\n' >> config.sh +echo -en 'RESOURCEMANAGER_ADMIN_ADDRESS=8033\n' >> config.sh +echo -en 'RESOURCEMANAGER_WEBAPP_ADDRESS=8089\n\n' >> config.sh + +echo -en 'NODEMANAGER_LOCALIZER_ADDRESS=8043\n' >> config.sh +echo -en 'NODEMANAGER_WEBAPP_ADDRESS=8045\n\n' >> config.sh +echo -en 'SPARKHISTORY_HTTP_ADDRESS=18080\n\n' >> config.sh + +echo -e 'Please check configuration (config.sh file) once before run (setup.sh file).' +echo -e 'You can modify hadoop or spark versions in config.sh file' +echo -e +chmod +x config.sh + diff --git a/conf/core-site.xml.template b/conf/core-site.xml.template index 64262ec..af9ffb2 100644 --- a/conf/core-site.xml.template +++ b/conf/core-site.xml.template @@ -17,13 +17,16 @@ - - fs.defaultFS - hdfs://namenode - Name node URL - - - hadoop.tmp.dir - file:/spark1/data/baidu/tmp - - + + + hadoop.tmp.dir + HADOOP_TMP_DIR + + + + fs.defaultFS + hdfs://MASTER:NAMENODE_PORT + Name node URL + + + \ No newline at end of file diff --git a/conf/hdfs-site.xml.template b/conf/hdfs-site.xml.template index 70bdf64..7281241 100644 --- a/conf/hdfs-site.xml.template +++ b/conf/hdfs-site.xml.template @@ -18,21 +18,59 @@ - -dfs.replication -3 - - - -dfs.namenode.name.dir -file:/data/madhu/hdfs-meta-data -Meta data dir - can be RAM FS only on Namename - - - -dfs.datanode.data.dir -file:/data/madhu/hdfs-data -Data dir - on all data nodes - + + dfs.replication + REPLICATION_VALUE + + + + + dfs.namenode.name.dir + NAMENODE_DIR + Meta data dir - can be RAM FS only on Namename + + + + dfs.namenode.http-address + 0.0.0.0:NAMENODE_HTTP_ADDRESS + The address and the base port where the dfs namenode web ui will listen on. + + + + dfs.namenode.secondary.http-address + 0.0.0.0:NAMENODE_SECONDARY_HTTP_ADDRESS + The secondary namenode http server address and port. + + + + dfs.namenode.secondary.https-address + 0.0.0.0:NAMENODE_SECONDARY_HTTPS_ADDRESS + The secondary namenode HTTPS server address and port. + + + + + dfs.datanode.data.dir + DATANODE_DIR + Data dir - on all data nodes + + + + dfs.datanode.address + 0.0.0.0:DATANODE_ADDRESS + The datanode server address and port for data transfer. + + + + dfs.datanode.http.address + 0.0.0.0:DATANODE_HTTP_ADDRESS + The datanode http server address and port. + + + + dfs.datanode.ipc.address + 0.0.0.0:DATANODE_IPC_ADDRESS + The datanode ipc server address and port. + diff --git a/conf/mapred-site.xml.template b/conf/mapred-site.xml.template new file mode 100644 index 0000000..1bf8576 --- /dev/null +++ b/conf/mapred-site.xml.template @@ -0,0 +1,39 @@ + + + + + + + + + + mapreduce.jobhistory.address + 0.0.0.0:MAPREDUCE_JOBHISTORY_ADDRESS + MapReduce JobHistory Server IPC host:port + + + + mapreduce.jobhistory.admin.address + 0.0.0.0:MAPREDUCE_JOBHISTORY_ADMIN_ADDRESS + The address of the History server admin interface. + + + + mapreduce.jobhistory.webapp.address + 0.0.0.0:MAPREDUCE_JOBHISTORY_WEBAPP_ADDRESS + MapReduce JobHistory Server Web UI host:port + + + diff --git a/conf/yarn-site.xml.template b/conf/yarn-site.xml.template index 37f6325..2bf8c80 100644 --- a/conf/yarn-site.xml.template +++ b/conf/yarn-site.xml.template @@ -16,55 +16,98 @@ - - yarn.resourcemanager.hostname - n001 - - - yarn.resourcemanager.webapp.address - 0.0.0.0:8088 - + + yarn.resourcemanager.hostname + MASTER + + + + yarn.resourcemanager.scheduler.address + 0.0.0.0:RESOURCEMANAGER_SCHEDULER_ADDRESS + The address of the scheduler interface. + + + + yarn.resourcemanager.resource-tracker.address + 0.0.0.0:RESOURCEMANAGER_RESOURCE_TRACKER_ADDRESS + + + + yarn.resourcemanager.address + 0.0.0.0:RESOURCEMANAGER_ADDRESS + The address of the RM web application. + + + + yarn.resourcemanager.admin.address + 0.0.0.0:RESOURCEMANAGER_ADMIN_ADDRESS + The address of the RM admin interface. + + + + yarn.resourcemanager.webapp.address + 0.0.0.0:RESOURCEMANAGER_WEBAPP_ADDRESS + The address of the applications manager interface in the RM. + - - yarn.scheduler.minimum-allocation-mb - 128 - Min value for --executor-memory - - - yarn.scheduler.maximum-allocation-mb - 204800 - Max value for --executor-memory - - - yarn.scheduler.minimum-allocation-vcores - 1 - Min value for —executor-vcore - - - yarn.scheduler.maximum-allocation-vcores - 40 - Max value for —executor-vcore - + + yarn.scheduler.minimum-allocation-mb + YARN_SCHEDULER_MIN_ALLOCATION_MB + Min value for --executor-memory + + + + yarn.scheduler.maximum-allocation-mb + YARN_SCHEDULER_MAX_ALLOCATION_MB + Max value for --executor-memory + + + + yarn.scheduler.minimum-allocation-vcores + YARN_SCHEDULER_MIN_ALLOCATION_VCORES + Min value for —executor-vcore + + + + yarn.scheduler.maximum-allocation-vcores + YARN_SCHEDULER_MAX_ALLOCATION_VCORES + Max value for —executor-vcore + - - yarn.nodemanager.resource.cpu-vcores - 160 - Vcore capacity of this node - - - yarn.nodemanager.resource.memory-mb - 204800 - Memory Capacity of this node - - - yarn.nodemanager.vmem-check-enabled - false - - - yarn.nodemanager.pmem-check-enabled - false - + + yarn.nodemanager.resource.cpu-vcores + YARN_NODEMANAGER_RESOURCE_CPU_VCORES + Vcore capacity of this node + + + + yarn.nodemanager.resource.memory-mb + YARN_NODEMANAGER_RESOURCE_MEMORY_MB + Memory Capacity of this node + + + + yarn.nodemanager.vmem-check-enabled + false + + + + yarn.nodemanager.pmem-check-enabled + false + + + + yarn.nodemanager.localizer.address + 0.0.0.0:NODEMANAGER_LOCALIZER_ADDRESS + Address where the localizer IPC is. + + + + yarn.nodemanager.webapp.address + 0.0.0.0:NODEMANAGER_WEBAPP_ADDRESS + NM Webapp address. + - + \ No newline at end of file diff --git a/hadoop/start-all.sh b/hadoop/start-all.sh index f9d66a5..c146d6f 100755 --- a/hadoop/start-all.sh +++ b/hadoop/start-all.sh @@ -6,3 +6,6 @@ $HADOOP_HOME/sbin/yarn-daemons.sh start nodemanager $HADOOP_HOME/sbin/mr-jobhistory-daemon.sh start historyserver $SPARK_HOME/sbin/start-history-server.sh + +DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" +${DIR}/../utils/checkall.sh \ No newline at end of file diff --git a/setup.sh b/setup.sh new file mode 100755 index 0000000..dc021a1 --- /dev/null +++ b/setup.sh @@ -0,0 +1,434 @@ +#!/bin/bash -l + +# Need to create user manually +# Need to set JAVA_HOME in .bashrc files on all machines +# Need to complete ssh setup for all servers + +ul=`tput smul` +nul=`tput rmul` + +CURDIR=`pwd` # Inside hadoop-cluster-utils directory where run.sh is exist +WORKDIR=${HOME} # where hadoop and spark package will download + +current_time=$(date +"%Y.%m.%d.%S") + +if [ ! -d $CURDIR/logs ]; +then + mkdir logs +fi + +log=`pwd`/logs/hadoop_cluster_utils_$current_time.log +echo -e | tee -a $log +if [[ -n "$JAVA_HOME" ]] && [[ -x "$JAVA_HOME/bin/java" ]] +then + echo JAVA_HOME found on MASTER, java executable in $JAVA_HOME | tee $log + echo "---------------------------------------------" | tee -a $log +else + echo "JAVA_HOME not found in your environment, please set the JAVA_HOME variable in your environment then continue to run this script." | tee -a $log + exit 1 +fi + +grep '#case $- in' $HOME/.bashrc &>>/dev/null + if [ $? -ne 0 ] +then + echo 'Prerequisite not completed. Please comment below lines in .bashrc file' | tee -a $log + echo "# If not running interactively, don't do anything" | tee -a $log + echo "case \$- in" | tee -a $log + echo "*i*) ;;" | tee -a $log + echo "*) return;;" | tee -a $log + echo "esac" | tee -a $log + exit 1 +fi + +## Validation for config file + +if [ -f ${CURDIR}/config.sh ]; +then + ## First time permission set for config.sh file + chmod +x config.sh + source config.sh + + ## Checking config file for all required fields + + { cat ${CURDIR}/config.sh; echo; } | while read -r line; do + if [[ $line =~ "=" ]] ; + then + confvalue=`echo $line |grep = | cut -d "=" -f2` + if [[ -z "$confvalue" ]]; + then + echo "Configuration vlaue not set properly for $line, please check config.sh file" | tee -a $log + exit 1 + fi + fi + done + + ## Validation for hadoop port instances + + declare -a port_name=("NAMENODE_PORT" "NAMENODE_HTTP_ADDRESS" "NAMENODE_SECONDARY_HTTP_ADDRESS" "NAMENODE_SECONDARY_HTTPS_ADDRESS" "DATANODE_ADDRESS" "DATANODE_HTTP_ADDRESS" "DATANODE_IPC_ADDRESS" "MAPREDUCE_JOBHISTORY_ADDRESS" "MAPREDUCE_JOBHISTORY_ADMIN_ADDRESS" "MAPREDUCE_JOBHISTORY_WEBAPP_ADDRESS" "RESOURCEMANAGER_SCHEDULER_ADDRESS" "RESOURCEMANAGER_RESOURCE_TRACKER_ADDRESS" "RESOURCEMANAGER_ADDRESS" "RESOURCEMANAGER_ADMIN_ADDRESS" "RESOURCEMANAGER_WEBAPP_ADDRESS" "NODEMANAGER_LOCALIZER_ADDRESS" "NODEMANAGER_WEBAPP_ADDRESS" "SPARKHISTORY_HTTP_ADDRESS") + + declare -a port_list=("$NAMENODE_PORT" "$NAMENODE_HTTP_ADDRESS" "$NAMENODE_SECONDARY_HTTP_ADDRESS" "$NAMENODE_SECONDARY_HTTPS_ADDRESS" "$DATANODE_ADDRESS" "$DATANODE_HTTP_ADDRESS" "$DATANODE_IPC_ADDRESS" "$MAPREDUCE_JOBHISTORY_ADDRESS" "$MAPREDUCE_JOBHISTORY_ADMIN_ADDRESS" "$MAPREDUCE_JOBHISTORY_WEBAPP_ADDRESS" "$RESOURCEMANAGER_SCHEDULER_ADDRESS" "$RESOURCEMANAGER_RESOURCE_TRACKER_ADDRESS" "$RESOURCEMANAGER_ADDRESS" "$RESOURCEMANAGER_ADMIN_ADDRESS" "$RESOURCEMANAGER_WEBAPP_ADDRESS" "$NODEMANAGER_LOCALIZER_ADDRESS" "$NODEMANAGER_WEBAPP_ADDRESS" "$SPARKHISTORY_HTTP_ADDRESS") + + i=0 + for j in "${port_list[@]}"; + do + sudo netstat -pnlt | grep $j > /dev/null + if [ $? -eq 0 ]; + then + echo "${port_name[i]} running on port $j" >> temp + fi + i=$i+1 + done + + if [ -f temp ]; + then + cat temp + cat temp >> $log + echo "Kindly kill above running instance(s) else change port number in config.sh file, then continue to run this script." | tee -a $log + rm temp &>/dev/null + exit 1 + fi + + ## Adding slave machine names to slave file + cat ${CURDIR}/config.sh | grep SLAVES | grep -v "^#" |cut -d "=" -f2 | tr "%" "\n" | cut -d "," -f1 >${CURDIR}/conf/slaves + + + + SLAVES=`cat ${CURDIR}/config.sh | grep SLAVES | grep -v "^#" |cut -d "=" -f2` + + cat ${CURDIR}/config.sh | grep SLAVES | grep -v "^#" | tr "%" "\n" | grep -E ''$MASTER'|'$HOSTNAME'' &>>/dev/null + if [ $? -eq 0 ] + then + #if master is also used as data machine + SERVERS=$SLAVES + else + ## Getting details for Master machine + + freememory_master="$(free -m | awk '{print $4}'| head -2 | tail -1)" + memorypercent_master=$(awk "BEGIN { pc=80*${freememory_master}/100; i=int(pc); print (pc-i<0.5)?i:i+1 }") + ncpu_master="$(nproc --all)" + MASTER_DETAILS=''$HOSTNAME','$ncpu_master','$memorypercent_master'' + SERVERS=`echo ''$MASTER_DETAILS'%'$SLAVES''` + fi + + ## Validation for Slaves IPs + echo -e "${ul}Validation for slave IPs${nul}" | tee -a $log + while IFS= read -r ip; do + if ping -q -c2 "$ip" &>/dev/null; + then + echo "$ip is Pingable" | tee -a $log + else + echo "$ip Not Pingable" | tee -a $log + echo 'Please check your config.sh file. '$ip' is not pingalbe. \n' | tee -a $log + exit 1 + fi + done <${CURDIR}/conf/slaves + + + ## Download and install hadoop For Master machine installation + + echo "---------------------------------------------" | tee -a $log + echo "${ul}Downloading and installing hadoop...${nul}" | tee -a $log + echo -e | tee -a $log + cd ${WORKDIR} + if [ ! -f ${WORKDIR}/hadoop-${hadoopver}.tar.gz ]; + then + if curl --output /dev/null --silent --head --fail $HADOOP_URL + then + echo 'Hadoop file Downloading on Master- '$MASTER'' | tee -a $log + wget $HADOOP_URL | tee -a $log + else + echo "This URL Not Exist. Please check your hadoop version then continue to run this script." | tee -a $log + exit 1 + fi + fi + + + ## Copying hadoop tgz file , unzipping and exporting paths in the .bashrc file on all machines + + for i in `echo $SERVERS |cut -d "=" -f2 | tr "%" "\n" | cut -d "," -f1` + do + + if [ $i != $MASTER ] + then + echo 'Copying Hadoop setup file on '$i'' | tee -a $log + scp ${WORKDIR}/hadoop-${hadoopver}.tar.gz @$i:${WORKDIR} | tee -a $log + fi + echo 'Unzipping Hadoop setup file on '$i'' | tee -a $log + ssh $i "tar xf hadoop-${hadoopver}.tar.gz --gzip" + + echo 'Updating hadoop variables on '$i'' | tee -a $log + + export HADOOP_HOME="${WORKDIR}"/hadoop-${hadoopver} + echo "#StartHadoopEnv"> tmp_b + echo "export CURDIR="${CURDIR}"" >> tmp_b + echo "export PATH="${CURDIR}":"${CURDIR}"/hadoop:\$PATH" >> tmp_b + echo "export PATH="${CURDIR}":"${CURDIR}"/utils:\$PATH" >> tmp_b + echo "export HADOOP_HOME="${WORKDIR}"/hadoop-${hadoopver}" >> tmp_b + echo "export HADOOP_PREFIX=$HADOOP_HOME" >> tmp_b + echo "export HADOOP_MAPRED_HOME=$HADOOP_HOME" >> tmp_b + echo "export HADOOP_COMMON_HOME=$HADOOP_HOME" >> tmp_b + echo "export HADOOP_HDFS_HOME=$HADOOP_HOME" >> tmp_b + echo "export YARN_HOME=$HADOOP_HOME" >> tmp_b + echo "export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop" >> tmp_b + echo "export YARN_CONF_DIR=$HADOOP_HOME/etc/hadoop" >> tmp_b + echo "export PATH=$HADOOP_HOME/bin:\$PATH" >> tmp_b + echo "#StopHadoopEnv">> tmp_b + + scp tmp_b @$i:${WORKDIR} &>>/dev/null + + ssh $i "grep -q '#StartHadoopEnv' $HOME/.bashrc" + if [ $? -ne 0 ]; + then + ssh $i "cat tmp_b>>$HOME/.bashrc" + ssh $i "rm tmp_b" + else + ssh $i "sed -i '/#StartHadoopEnv/,/#StopHadoopEnv/d' $HOME/.bashrc" + ssh $i "cat tmp_b>>$HOME/.bashrc" + ssh $i "rm tmp_b" + fi + echo 'Sourcing updated .bashrc file on '$i'' | tee -a $log + ssh $i "source ~/.bashrc" &>>/dev/null + echo "---------------------------------------------" | tee -a $log + done + rm -rf tmp_b + + + ## Configuration changes in hadoop-clusterfor Core-site,hdfs-site and mapred-site xml + + echo 'Updating configuration properties in hadoop-cluster CURDIR for Core-site,hdfs-site and mapred-site xml ' | tee -a $log + + if [ ! -f ${CURDIR}/conf/core-site.xml ]; + then + #Copying xml templates for editing + cp ${CURDIR}/conf/core-site.xml.template ${CURDIR}/conf/core-site.xml + cp ${CURDIR}/conf/hdfs-site.xml.template ${CURDIR}/conf/hdfs-site.xml + cp ${CURDIR}/conf/mapred-site.xml.template ${CURDIR}/conf/mapred-site.xml + + + #core-site.xml configuration configuration properties + sed -i 's|HADOOP_TMP_DIR|'"$HADOOP_TMP_DIR"'|g' ${CURDIR}/conf/core-site.xml + sed -i 's|MASTER|'"$MASTER"'|g' ${CURDIR}/conf/core-site.xml + sed -i 's|NAMENODE_PORT|'"$NAMENODE_PORT"'|g' ${CURDIR}/conf/core-site.xml + + + # hdfs-site.xml configuration properties + sed -i 's|REPLICATION_VALUE|'"$REPLICATION_VALUE"'|g' ${CURDIR}/conf/hdfs-site.xml + sed -i 's|NAMENODE_DIR|'"$NAMENODE_DIR"'|g' ${CURDIR}/conf/hdfs-site.xml + sed -i 's|DATANODE_DIR|'"$DATANODE_DIR"'|g' ${CURDIR}/conf/hdfs-site.xml + sed -i 's|NAMENODE_HTTP_ADDRESS|'"$NAMENODE_HTTP_ADDRESS"'|g' ${CURDIR}/conf/hdfs-site.xml + sed -i 's|NAMENODE_SECONDARY_HTTP_ADDRESS|'"$NAMENODE_SECONDARY_HTTP_ADDRESS"'|g' ${CURDIR}/conf/hdfs-site.xml + sed -i 's|NAMENODE_SECONDARY_HTTPS_ADDRESS|'"$NAMENODE_SECONDARY_HTTPS_ADDRESS"'|g' ${CURDIR}/conf/hdfs-site.xml + sed -i 's|DATANODE_ADDRESS|'"$DATANODE_ADDRESS"'|g' ${CURDIR}/conf/hdfs-site.xml + sed -i 's|DATANODE_HTTP_ADDRESS|'"$DATANODE_HTTP_ADDRESS"'|g' ${CURDIR}/conf/hdfs-site.xml + sed -i 's|DATANODE_IPC_ADDRESS|'"$DATANODE_IPC_ADDRESS"'|g' ${CURDIR}/conf/hdfs-site.xml + + + # mapred-site.xml configuration properties + sed -i 's|MAPREDUCE_JOBHISTORY_ADDRESS|'"$MAPREDUCE_JOBHISTORY_ADDRESS"'|g' ${CURDIR}/conf/mapred-site.xml + sed -i 's|MAPREDUCE_JOBHISTORY_ADMIN_ADDRESS|'"$MAPREDUCE_JOBHISTORY_ADMIN_ADDRESS"'|g' ${CURDIR}/conf/mapred-site.xml + sed -i 's|MAPREDUCE_JOBHISTORY_WEBAPP_ADDRESS|'"$MAPREDUCE_JOBHISTORY_WEBAPP_ADDRESS"'|g' ${CURDIR}/conf/mapred-site.xml + + fi + + + echo "---------------------------------------------" | tee -a $log + + ## yarn-site.xml configuration properties and hadoop-env.sh file updates for all machines + + for i in `echo $SERVERS |cut -d "=" -f2 | tr "%" "\n" ` + do + + memorypercent=`echo $i| cut -d "," -f3` + ncpu=`echo $i| cut -d "," -f2` + slaveip=`echo $i| cut -d "," -f1` + + echo 'Updating configuration properties for yarn-sites and hadoop.env.sh for '$slaveip'' | tee -a $log + + cp ${CURDIR}/conf/yarn-site.xml.template ${CURDIR}/conf/yarn-site.xml + + sed -i 's|MASTER|'"$MASTER"'|g' ${CURDIR}/conf/yarn-site.xml + sed -i 's|YARN_SCHEDULER_MIN_ALLOCATION_MB|'"$YARN_SCHEDULER_MIN_ALLOCATION_MB"'|g' ${CURDIR}/conf/yarn-site.xml + sed -i 's|YARN_SCHEDULER_MAX_ALLOCATION_MB|'"$memorypercent"'|g' ${CURDIR}/conf/yarn-site.xml + sed -i 's|YARN_SCHEDULER_MIN_ALLOCATION_VCORES|'"$YARN_SCHEDULER_MIN_ALLOCATION_VCORES"'|g' ${CURDIR}/conf/yarn-site.xml + sed -i 's|YARN_SCHEDULER_MAX_ALLOCATION_VCORES|'"$ncpu"'|g' ${CURDIR}/conf/yarn-site.xml + sed -i 's|YARN_NODEMANAGER_RESOURCE_CPU_VCORES|'"$ncpu"'|g' ${CURDIR}/conf/yarn-site.xml + sed -i 's|YARN_NODEMANAGER_RESOURCE_MEMORY_MB|'"$memorypercent"'|g' ${CURDIR}/conf/yarn-site.xml + sed -i 's|RESOURCEMANAGER_SCHEDULER_ADDRESS|'"$RESOURCEMANAGER_SCHEDULER_ADDRESS"'|g' ${CURDIR}/conf/yarn-site.xml + sed -i 's|RESOURCEMANAGER_RESOURCE_TRACKER_ADDRESS|'"$RESOURCEMANAGER_RESOURCE_TRACKER_ADDRESS"'|g' ${CURDIR}/conf/yarn-site.xml + sed -i 's|RESOURCEMANAGER_ADDRESS|'"$RESOURCEMANAGER_ADDRESS"'|g' ${CURDIR}/conf/yarn-site.xml + sed -i 's|RESOURCEMANAGER_ADMIN_ADDRESS|'"$RESOURCEMANAGER_ADMIN_ADDRESS"'|g' ${CURDIR}/conf/yarn-site.xml + sed -i 's|RESOURCEMANAGER_WEBAPP_ADDRESS|'"$RESOURCEMANAGER_WEBAPP_ADDRESS"'|g' ${CURDIR}/conf/yarn-site.xml + sed -i 's|NODEMANAGER_LOCALIZER_ADDRESS|'"$NODEMANAGER_LOCALIZER_ADDRESS"'|g' ${CURDIR}/conf/yarn-site.xml + sed -i 's|NODEMANAGER_WEBAPP_ADDRESS|'"$NODEMANAGER_WEBAPP_ADDRESS"'|g' ${CURDIR}/conf/yarn-site.xml + + + scp ${CURDIR}/conf/*site.xml @$slaveip:$HADOOP_HOME/etc/hadoop | tee -a $log + + ## Updating java version in hadoop-env.sh file on all machines + + JAVA_HOME_SLAVE=$(ssh $slaveip 'grep JAVA_HOME ~/.bashrc | grep -v "PATH" | cut -d"=" -f2') + echo "sed -i 's|"\${JAVA_HOME}"|"${JAVA_HOME_SLAVE}"|g' $HADOOP_HOME/etc/hadoop/hadoop-env.sh" | ssh $slaveip bash + echo "---------------------------------------------" | tee -a $log + + done + rm -rf ${CURDIR}/conf/*site.xml + + ##Updating the slave file on master + + cp ${CURDIR}/conf/slaves ${HADOOP_HOME}/etc/hadoop + +else + echo "Config file does not exist. Please check README.md for installation steps." | tee -a $log + exit 1 +fi + +##Spark installation + +echo "${ul}Downloading and installing Spark...${nul}" | tee -a $log + +cd ${WORKDIR} + +if [ ! -f ${WORKDIR}/spark-${sparkver}-bin-hadoop${hadoopver:0:3}.tgz ]; +then + if curl --output /dev/null --silent --head --fail $SPARK_URL + then + echo 'SPARK file Downloading on Master - '$MASTER'' | tee -a $log + wget $SPARK_URL | tee -a $log + else + echo "This URL Not Exist. Please check your spark version then continue to run this script." | tee -a $log + exit 1 + fi +echo "***********************************************" +fi + +## Exporting SPARK_HOME to the PATH and Add scripts to the PATH + +for i in `echo $SERVERS |cut -d "=" -f2 | tr "%" "\n" | cut -d "," -f1` +do + if [ $i != $MASTER ] + then + echo 'Copying Spark setup file on '$i'' | tee -a $log + scp ${WORKDIR}/spark-${sparkver}-bin-hadoop${hadoopver:0:3}.tgz @$i:${WORKDIR} | tee -a $log + fi + echo 'Unzipping Spark setup file on '$i'' | tee -a $log + ssh $i "tar xf spark*.tgz --gzip" | tee -a $log + + echo 'Updating .bashrc file on '$i' with Spark variables ' + echo '#StartSparkEnv' >tmp_b + echo "export SPARK_HOME="${WORKDIR}"/spark-"${sparkver}"-bin-hadoop"${hadoopver:0:3}"" >>tmp_b + echo "export PATH=\$SPARK_HOME/bin:\$PATH">>tmp_b + echo '#StopSparkEnv'>>tmp_b + + scp tmp_b @$i:${WORKDIR}&>>/dev/null + + ssh $i "grep -q "SPARK_HOME" ~/.bashrc" + if [ $? -ne 0 ]; + then + ssh $i "cat tmp_b>>$HOME/.bashrc" + ssh $i "rm tmp_b" + else + ssh $i "sed -i '/#StartSparkEnv/,/#StopSparkEnv/ d' $HOME/.bashrc" + ssh $i "cat tmp_b>>$HOME/.bashrc" + ssh $i "rm tmp_b" + fi + + ssh $i "source $HOME/.bashrc" + +done +rm -rf tmp_b +echo "---------------------------------------------" | tee -a $log + +## updating Slave file for Spark folder +echo 'Updating Slave file for Spark setup'| tee -a $log + +cp spark-${sparkver}-bin-hadoop${hadoopver:0:3}/conf/slaves.template spark-${sparkver}-bin-hadoop${hadoopver:0:3}/conf/slaves +sed -i 's|localhost||g' spark-${sparkver}-bin-hadoop${hadoopver:0:3}/conf/slaves +cat ${CURDIR}/conf/slaves>>spark-${sparkver}-bin-hadoop${hadoopver:0:3}/conf/slaves + +echo -e "Configuring Spark history server" | tee -a $log + +cp $SPARK_HOME/conf/spark-defaults.conf.template $SPARK_HOME/conf/spark-defaults.conf +grep -q "#StartSparkconf" $SPARK_HOME/conf/spark-defaults.conf +if [ $? -ne 0 ]; +then + echo "#StartSparkconf" >> $SPARK_HOME/conf/spark-defaults.conf + echo "spark.eventLog.enabled true" >> $SPARK_HOME/conf/spark-defaults.conf + echo "spark.eventLog.dir /tmp/spark-events" >> $SPARK_HOME/conf/spark-defaults.conf + echo "spark.eventLog.compress true" >> $SPARK_HOME/conf/spark-defaults.conf + echo "spark.history.fs.logDirectory /tmp/spark-events-history" >> $SPARK_HOME/conf/spark-defaults.conf + echo "#StopSparkconf">> $SPARK_HOME/conf/spark-defaults.conf +else + sed -i '/#StartSparkconf/,/#StopSparkconf/ d' $SPARK_HOME/conf/spark-defaults.conf + echo "#StartSparkconf" >> $SPARK_HOME/conf/spark-defaults.conf + echo "spark.eventLog.enabled true" >> $SPARK_HOME/conf/spark-defaults.conf + echo "spark.eventLog.dir /tmp/spark-events" >> $SPARK_HOME/conf/spark-defaults.conf + echo "spark.eventLog.compress true" >> $SPARK_HOME/conf/spark-defaults.conf + echo "spark.history.fs.logDirectory /tmp/spark-events-history" >> $SPARK_HOME/conf/spark-defaults.conf + echo "#StopSparkconf">> $SPARK_HOME/conf/spark-defaults.conf +fi + +CP $SPARK_HOME/conf/spark-defaults.conf $SPARK_HOME/conf &>/dev/null + +echo -e "Spark installation done..!!\n" | tee -a $log + + +source ${HOME}/.bashrc + +##to start hadoop setup + +if [ ! -d "$HADOOP_TMP_DIR" ] +then + # Creating directories + AN "mkdir -p $HADOOP_TMP_DIR" &>/dev/null + AN "mkdir -p $DFS_NAMENODE_NAME_DIR" &>/dev/null + AN "mkdir -p $DFS_DATANODE_NAME_DIR" &>/dev/null + AN "mkdir -p /tmp/spark-events" &>/dev/null + AN "mkdir -p /tmp/spark-events-history" &>/dev/null + echo "Finished creating directories" +fi + +echo 'Formatting NAMENODE'| tee -a $log + +$HADOOP_PREFIX/bin/hdfs namenode -format mycluster >> $log +echo -e | tee -a $log +$CURDIR/hadoop/start-all.sh | tee -a $log +echo -e | tee -a $log +$CURDIR/utils/checkall.sh | tee -a $log + +## use stop-all.sh for stopping + +echo -e | tee -a $log +echo "${ul}Web URL link${nul}" | tee -a $log +echo "HDFS web address : http://"$MASTER":"$NAMENODE_HTTP_ADDRESS"" | tee -a $log +echo "Resource Manager : http://"$MASTER":"$RESOURCEMANAGER_WEBAPP_ADDRESS"/cluster" | tee -a $log +echo "SPARK history server : http://"$MASTER":"$SPARKHISTORY_HTTP_ADDRESS"" | tee -a $log +echo -e | tee -a $log + +echo "---------------------------------------------" | tee -a $log +echo "${ul}Ensure SPARK running correctly using following command${nul}" | tee -a $log +echo "${SPARK_HOME}/bin/spark-submit --class org.apache.spark.examples.SparkPi --master yarn-client --driver-memory 1024M --num-executors 2 --executor-memory 1g --executor-cores 1 ${SPARK_HOME}/examples/jars/spark-examples_2.11-2.0.1.jar 10" | tee -a $log +echo -e +read -p "Do you wish to run above command ? [y/N] " prompt + + +if [[ $prompt == "y" || $prompt == "Y" || $prompt == "yes" || $prompt == "Yes" ]] +then + ${SPARK_HOME}/bin/spark-submit --class org.apache.spark.examples.SparkPi --master yarn-client --driver-memory 1024M --num-executors 2 --executor-memory 1g --executor-cores 1 ${SPARK_HOME}/examples/jars/spark-examples_2.11-2.0.1.jar 10 &>> $log + +else + echo "Thanks for your response" +fi + +echo -e | tee -a $log +echo "---------------------------------------------" | tee -a $log +grep -r 'Pi is roughly' ${log} +if [ $? -eq 0 ]; +then + echo 'Spark services running.' | tee -a $log + echo 'Please check log file '$log' for more details.' + +else + echo 'Expected output not found.' | tee -a $log + echo 'Please check log file '$log' for more details' +fi + diff --git a/utils/checkall.sh b/utils/checkall.sh new file mode 100755 index 0000000..fc83068 --- /dev/null +++ b/utils/checkall.sh @@ -0,0 +1,66 @@ +#!/bin/bash + +RED='\033[0;31m' +YEL='\033[1;33m' +CYAN='\033[1;36m' +GRE='\033[0;32m' +NC='\033[0m' + +namenode=`hostname` +echo -en "Check Services on NameNode ${YEL}($namenode)${NC} .. " +dlist=`jps` +error=0 +errmsg="" +echo -e $dlist | grep "NameNode" >/dev/null +if [[ $? -ne 0 ]]; then + error=1 + errmsg="$errmsg NameNode," +fi +echo -e $dlist | grep "ResourceManager" >/dev/null +if [[ $? -ne 0 ]]; then + error=1 + errmsg="$errmsg ResourceManager," +fi +echo -e $dlist | grep "JobHistoryServer" >/dev/null +if [[ $? -ne 0 ]]; then + error=1 + errmsg="$errmsg JobHistoryServer," +fi +echo -e $dlist | grep "HistoryServer" >/dev/null +if [[ $? -ne 0 ]]; then + error=1 + errmsg="$errmsg HistoryServer," +fi + +if [[ $error == 1 ]]; then + echo -e "${RED}NOT OK ${NC}" + echo -e "${CYAN}$errmsg${NC} not active in $namenode" +else + echo -e "${GRE}OK${NC}" +fi + +error=0 +errmsg="" +for userhost in `cat ${HADOOP_HOME}/etc/hadoop/slaves | grep -v ^#` +do + echo -en "Check Services on DataNode ${YEL}($userhost)${NC} .. " + dlist=`ssh $userhost jps ` + + echo -e $dlist | grep "DataNode" >/dev/null + if [[ $? -ne 0 ]]; then + error=1 + errmsg="$errmsg DataNode," + fi + + echo -e $dlist | grep "NodeManager" >/dev/null + if [[ $? -ne 0 ]]; then + error=1 + errmsg="$errmsg NodeManager," + fi + if [[ $error == 1 ]]; then + echo -e "${RED}NOT OK ${NC}" + echo -e "${CYAN}$errmsg${NC} not active in $userhost" + else + echo -e "${GRE}OK${NC}" + fi +done \ No newline at end of file