Skip to content

Commit b01c431

Browse files
committed
Enable development on other branches
1 parent 51f8f01 commit b01c431

File tree

3 files changed

+42
-40
lines changed

3 files changed

+42
-40
lines changed

README.md

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -53,23 +53,26 @@ You can simply use the post-install script that you can find [here](https://gith
5353
#Load AWS Parallelcluster environment variables
5454
. /etc/parallelcluster/cfnconfig
5555

56-
#get git-hib repo to clone and the installation script
57-
github_repo=$(echo ${cfn_postinstall_args}| cut -d ',' -f 1 )
58-
setup_command=$(echo ${cfn_postinstall_args}| cut -d ',' -f 2 )
59-
monitoring_dir_name=$(basename -s .git ${github_repo})
56+
#get GitHub repo to clone and the installation script
57+
monitoring_url=$(echo ${cfn_postinstall_args}| cut -d ',' -f 1 )
58+
monitoring_dir_name=$(echo ${cfn_postinstall_args}| cut -d ',' -f 2 )
59+
monitoring_tarball="${monitoring_dir_name}.tar.gz"
60+
setup_command=$(echo ${cfn_postinstall_args}| cut -d ',' -f 3 )
61+
monitoring_home="/home/${cfn_cluster_user}/${monitoring_dir_name}"
6062

6163
case ${cfn_node_type} in
6264
MasterServer)
63-
cd /home/$cfn_cluster_user/
64-
git clone ${github_repo}
65+
wget ${monitoring_url} -O ${monitoring_tarball}
66+
mkdir -p ${monitoring_home}
67+
tar xvf ${monitoring_tarball} -C ${monitoring_home} --strip-components 1
6568
;;
6669
ComputeFleet)
6770

6871
;;
6972
esac
7073

7174
#Execute the monitoring installation script
72-
bash -x "/home/${cfn_cluster_user}/${monitoring_dir_name}/parallelcluster-setup/${setup_command}" >/tmp/monitoring-setup.log 2>&1
75+
bash -x "${monitoring_home}/parallelcluster-setup/${setup_command}" >/tmp/monitoring-setup.log 2>&1
7376
exit $?
7477
```
7578
The proposed post-install script will take care of installing and configuring everything for you through the [install-monitoring.sh](https://github.com/aws-samples/aws-parallelcluster-monitoring/blob/main/parallelcluster-setup/install-monitoring.sh) script. Though, few additional parameters are needed in the AWS ParallelCluster config file: the post_install_args, additional IAM policies, security group, and a tag. You can find an AWS ParallelCluster template [here](https://github.com/aws-samples/aws-parallelcluster-monitoring/blob/main/parallelcluster-setup/pcluster-template.config). Please note that, at the moment, the installation script has only been tested using [Amazon Linux 2](https://aws.amazon.com/amazon-linux-2/).
@@ -79,7 +82,7 @@ base_os = alinux2
7982

8083
post_install = s3://<my-bucket-name>/post-install.sh
8184

82-
post_install_args = https://github.com/aws-samples/aws-parallelcluster-monitoring.git,install-monitoring.sh
85+
post_install_args = https://github.com/aws-samples/aws-parallelcluster-monitoring/tarball/main,aws-parallelcluster-monitoring,install-monitoring.sh
8386

8487
additional_iam_policies = arn:aws:iam::aws:policy/CloudWatchFullAccess,arn:aws:iam::aws:policy/AWSPriceListServiceFullAccess,arn:aws:iam::aws:policy/AmazonSSMFullAccess,arn:aws:iam::aws:policy/AWSCloudFormationReadOnlyAccess
8588

parallelcluster-setup/install-monitoring.sh

Lines changed: 22 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -18,14 +18,12 @@ usermod -a -G docker $cfn_cluster_user
1818
curl -L "https://github.com/docker/compose/releases/download/1.27.4/docker-compose-$(uname -s)-$(uname -m)" -o /usr/local/bin/docker-compose
1919
chmod +x /usr/local/bin/docker-compose
2020

21-
github_repo=$(echo ${cfn_postinstall_args}| cut -d ',' -f 1 )
22-
setup_command=$(echo ${cfn_postinstall_args}| cut -d ',' -f 2 )
23-
monitoring_dir_name=$(basename -s .git ${github_repo})
21+
monitoring_dir_name=$(echo ${cfn_postinstall_args}| cut -d ',' -f 2 )
22+
monitoring_home="/home/${cfn_cluster_user}/${monitoring_dir_name}"
2423

2524
case "${cfn_node_type}" in
2625
MasterServer)
2726

28-
2927
#cfn_efs=$(cat /etc/chef/dna.json | grep \"cfn_efs\" | awk '{print $2}' | sed "s/\",//g;s/\"//g")
3028
#cfn_cluster_cw_logging_enabled=$(cat /etc/chef/dna.json | grep \"cfn_cluster_cw_logging_enabled\" | awk '{print $2}' | sed "s/\",//g;s/\"//g")
3129
cfn_fsx_fs_id=$(cat /etc/chef/dna.json | grep \"cfn_fsx_fs_id\" | awk '{print $2}' | sed "s/\",//g;s/\"//g")
@@ -37,39 +35,39 @@ case "${cfn_node_type}" in
3735
cluster_config_version=$(cat /etc/chef/dna.json | grep \"cluster_config_version\" | awk '{print $2}' | sed "s/\",//g;s/\"//g")
3836
log_group_names="\/aws\/parallelcluster\/$(echo ${stack_name} | cut -d "-" -f2-)"
3937

40-
aws s3api get-object --bucket $cluster_s3_bucket --key $cluster_config_s3_key --region $cfn_region --version-id $cluster_config_version /home/${cfn_cluster_user}/${monitoring_dir_name}/parallelcluster-setup/cluster-config.json
38+
aws s3api get-object --bucket $cluster_s3_bucket --key $cluster_config_s3_key --region $cfn_region --version-id $cluster_config_version ${monitoring_home}/parallelcluster-setup/cluster-config.json
4139

4240
yum -y install golang-bin
4341

4442
chown $cfn_cluster_user:$cfn_cluster_user -R /home/$cfn_cluster_user
45-
chmod +x /home/${cfn_cluster_user}/${monitoring_dir_name}/custom-metrics/*
43+
chmod +x ${monitoring_home}/custom-metrics/*
4644

47-
cp -rp /home/${cfn_cluster_user}/${monitoring_dir_name}/custom-metrics/* /usr/local/bin/
48-
mv /home/${cfn_cluster_user}/${monitoring_dir_name}/prometheus-slurm-exporter/slurm_exporter.service /etc/systemd/system/
45+
cp -rp ${monitoring_home}/custom-metrics/* /usr/local/bin/
46+
mv ${monitoring_home}/prometheus-slurm-exporter/slurm_exporter.service /etc/systemd/system/
4947

5048
(crontab -l -u $cfn_cluster_user; echo "*/1 * * * * /usr/local/bin/1m-cost-metrics.sh") | crontab -u $cfn_cluster_user -
5149
(crontab -l -u $cfn_cluster_user; echo "*/60 * * * * /usr/local/bin/1h-cost-metrics.sh") | crontab -u $cfn_cluster_user -
5250

5351

5452
# replace tokens
55-
sed -i "s/_S3_BUCKET_/${s3_bucket}/g" /home/${cfn_cluster_user}/${monitoring_dir_name}/grafana/dashboards/ParallelCluster.json
56-
sed -i "s/__INSTANCE_ID__/${master_instance_id}/g" /home/${cfn_cluster_user}/${monitoring_dir_name}/grafana/dashboards/ParallelCluster.json
57-
sed -i "s/__FSX_ID__/${cfn_fsx_fs_id}/g" /home/${cfn_cluster_user}/${monitoring_dir_name}/grafana/dashboards/ParallelCluster.json
58-
sed -i "s/__AWS_REGION__/${cfn_region}/g" /home/${cfn_cluster_user}/${monitoring_dir_name}/grafana/dashboards/ParallelCluster.json
53+
sed -i "s/_S3_BUCKET_/${s3_bucket}/g" ${monitoring_home}/grafana/dashboards/ParallelCluster.json
54+
sed -i "s/__INSTANCE_ID__/${master_instance_id}/g" ${monitoring_home}/grafana/dashboards/ParallelCluster.json
55+
sed -i "s/__FSX_ID__/${cfn_fsx_fs_id}/g" ${monitoring_home}/grafana/dashboards/ParallelCluster.json
56+
sed -i "s/__AWS_REGION__/${cfn_region}/g" ${monitoring_home}/grafana/dashboards/ParallelCluster.json
5957

60-
sed -i "s/__AWS_REGION__/${cfn_region}/g" /home/${cfn_cluster_user}/${monitoring_dir_name}/grafana/dashboards/logs.json
61-
sed -i "s/__LOG_GROUP__NAMES__/${log_group_names}/g" /home/${cfn_cluster_user}/${monitoring_dir_name}/grafana/dashboards/logs.json
58+
sed -i "s/__AWS_REGION__/${cfn_region}/g" ${monitoring_home}/grafana/dashboards/logs.json
59+
sed -i "s/__LOG_GROUP__NAMES__/${log_group_names}/g" ${monitoring_home}/grafana/dashboards/logs.json
6260

63-
sed -i "s/__Application__/${stack_name}/g" /home/${cfn_cluster_user}/${monitoring_dir_name}/prometheus/prometheus.yml
61+
sed -i "s/__Application__/${stack_name}/g" ${monitoring_home}/prometheus/prometheus.yml
6462

65-
sed -i "s/__INSTANCE_ID__/${master_instance_id}/g" /home/${cfn_cluster_user}/${monitoring_dir_name}/grafana/dashboards/master-node-details.json
66-
sed -i "s/__INSTANCE_ID__/${master_instance_id}/g" /home/${cfn_cluster_user}/${monitoring_dir_name}/grafana/dashboards/compute-node-list.json
67-
sed -i "s/__INSTANCE_ID__/${master_instance_id}/g" /home/${cfn_cluster_user}/${monitoring_dir_name}/grafana/dashboards/compute-node-details.json
63+
sed -i "s/__INSTANCE_ID__/${master_instance_id}/g" ${monitoring_home}/grafana/dashboards/master-node-details.json
64+
sed -i "s/__INSTANCE_ID__/${master_instance_id}/g" ${monitoring_home}/grafana/dashboards/compute-node-list.json
65+
sed -i "s/__INSTANCE_ID__/${master_instance_id}/g" ${monitoring_home}/grafana/dashboards/compute-node-details.json
6866

69-
sed -i "s/__MONITORING_DIR__/${monitoring_dir_name}/g" /home/${cfn_cluster_user}/${monitoring_dir_name}/docker-compose/docker-compose.master.yml
67+
sed -i "s/__MONITORING_DIR__/${monitoring_dir_name}/g" ${monitoring_home}/docker-compose/docker-compose.master.yml
7068

7169
#Generate selfsigned certificate for Nginx over ssl
72-
nginx_dir="/home/${cfn_cluster_user}/${monitoring_dir_name}/nginx"
70+
nginx_dir="${monitoring_home}/nginx"
7371
nginx_ssl_dir="${nginx_dir}/ssl"
7472
mkdir -p ${nginx_ssl_dir}
7573
echo -e "\nDNS.1=$(ec2-metadata -p | awk '{print $2}')" >> "${nginx_dir}/openssl.cnf"
@@ -79,26 +77,24 @@ case "${cfn_node_type}" in
7977
chown -R $cfn_cluster_user:$cfn_cluster_user "${nginx_ssl_dir}/nginx.key"
8078
chown -R $cfn_cluster_user:$cfn_cluster_user "${nginx_ssl_dir}/nginx.crt"
8179

82-
/usr/local/bin/docker-compose --env-file /etc/parallelcluster/cfnconfig -f /home/${cfn_cluster_user}/${monitoring_dir_name}/docker-compose/docker-compose.master.yml -p grafana-master up -d
80+
/usr/local/bin/docker-compose --env-file /etc/parallelcluster/cfnconfig -f ${monitoring_home}/docker-compose/docker-compose.master.yml -p monitoring-master up -d
8381

8482
# Download and build prometheus-slurm-exporter
8583
##### Plese note this software package is under GPLv3 License #####
8684
# More info here: https://github.com/vpenso/prometheus-slurm-exporter/blob/master/LICENSE
87-
cd /home/${cfn_cluster_user}/${monitoring_dir_name}
85+
cd ${monitoring_home}
8886
git clone https://github.com/vpenso/prometheus-slurm-exporter.git
8987
cd prometheus-slurm-exporter
9088
GOPATH=/root/go-modules-cache HOME=/root go mod download
9189
GOPATH=/root/go-modules-cache HOME=/root go build
92-
mv /home/${cfn_cluster_user}/${monitoring_dir_name}/prometheus-slurm-exporter/prometheus-slurm-exporter /usr/bin/prometheus-slurm-exporter
90+
mv ${monitoring_home}/prometheus-slurm-exporter/prometheus-slurm-exporter /usr/bin/prometheus-slurm-exporter
9391

9492
systemctl daemon-reload
9593
systemctl enable slurm_exporter
9694
systemctl start slurm_exporter
9795
;;
9896

9997
ComputeFleet)
100-
101-
/usr/local/bin/docker-compose -f /home/${cfn_cluster_user}/${monitoring_dir_name}/docker-compose/docker-compose.compute.yml -p grafana-compute up -d
102-
98+
/usr/local/bin/docker-compose -f ${monitoring_home}/docker-compose/docker-compose.compute.yml -p monitoring-compute up -d
10399
;;
104100
esac

post-install.sh

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -10,20 +10,23 @@
1010
. /etc/parallelcluster/cfnconfig
1111

1212
#get GitHub repo to clone and the installation script
13-
github_repo=$(echo ${cfn_postinstall_args}| cut -d ',' -f 1 )
14-
setup_command=$(echo ${cfn_postinstall_args}| cut -d ',' -f 2 )
15-
monitoring_dir_name=$(basename -s .git ${github_repo})
13+
monitoring_url=$(echo ${cfn_postinstall_args}| cut -d ',' -f 1 )
14+
monitoring_dir_name=$(echo ${cfn_postinstall_args}| cut -d ',' -f 2 )
15+
monitoring_tarball="${monitoring_dir_name}.tar.gz"
16+
setup_command=$(echo ${cfn_postinstall_args}| cut -d ',' -f 3 )
17+
monitoring_home="/home/${cfn_cluster_user}/${monitoring_dir_name}"
1618

1719
case ${cfn_node_type} in
1820
MasterServer)
19-
cd /home/$cfn_cluster_user/
20-
git clone ${github_repo}
21+
wget ${monitoring_url} -O ${monitoring_tarball}
22+
mkdir -p ${monitoring_home}
23+
tar xvf ${monitoring_tarball} -C ${monitoring_home} --strip-components 1
2124
;;
2225
ComputeFleet)
2326

2427
;;
2528
esac
2629

2730
#Execute the monitoring installation script
28-
bash -x "/home/${cfn_cluster_user}/${monitoring_dir_name}/parallelcluster-setup/${setup_command}" >/tmp/monitoring-setup.log 2>&1
31+
bash -x "${monitoring_home}/parallelcluster-setup/${setup_command}" >/tmp/monitoring-setup.log 2>&1
2932
exit $?

0 commit comments

Comments
 (0)