Skip to content

Commit 655d263

Browse files
lukeseawalkertilne
authored andcommitted
Separate Ganglia installation from configuration
ganglia is now managed by two recipes, one for the installation and one for the configuration. The installation is executed at AMI build, while the configuration is executed at cluster creation time if ganglia is enabled through the ['ganglia_enabled'] variable. This solves a regression introduced by #563, where the ganglia installation recipe wasn't executed at runtime. Signed-off-by: Luca Carrogu <[email protected]>
1 parent ec3d04d commit 655d263

File tree

6 files changed

+138
-110
lines changed

6 files changed

+138
-110
lines changed

recipes/_compute_base_config.rb

Lines changed: 1 addition & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -59,19 +59,7 @@
5959
end
6060

6161
# Configure Ganglia
62-
if node['cfncluster']['ganglia_enabled'] == "yes"
63-
template '/etc/ganglia/gmond.conf' do
64-
source 'gmond.conf.erb'
65-
owner 'root'
66-
group 'root'
67-
mode '0644'
68-
end
69-
70-
service node['cfncluster']['ganglia']['gmond_service'] do
71-
supports restart: true
72-
action %i[enable restart]
73-
end
74-
end
62+
include_recipe 'aws-parallelcluster::ganglia_config'
7563

7664
# Setup cluster user
7765
user node['cfncluster']['cfn_cluster_user'] do

recipes/_ganglia_install.rb

Lines changed: 0 additions & 65 deletions
This file was deleted.

recipes/_master_base_config.rb

Lines changed: 2 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -142,37 +142,8 @@
142142
# Setup RAID array on master node
143143
include_recipe 'aws-parallelcluster::setup_raid_on_master'
144144

145-
# Configure Ganglia on the Master
146-
if node['cfncluster']['ganglia_enabled'] == 'yes'
147-
template '/etc/ganglia/gmetad.conf' do
148-
source 'gmetad.conf.erb'
149-
owner 'root'
150-
group 'root'
151-
mode '0644'
152-
end
153-
154-
template '/etc/ganglia/gmond.conf' do
155-
source 'gmond.conf.erb'
156-
owner 'root'
157-
group 'root'
158-
mode '0644'
159-
end
160-
161-
service "gmetad" do
162-
supports restart: true
163-
action %i[enable restart]
164-
end
165-
166-
service node['cfncluster']['ganglia']['gmond_service'] do
167-
supports restart: true
168-
action %i[enable restart]
169-
end
170-
171-
service node['cfncluster']['ganglia']['httpd_service'] do
172-
supports restart: true, reload: true
173-
action %i[enable restart]
174-
end
175-
end
145+
# Configure Ganglia
146+
include_recipe 'aws-parallelcluster::ganglia_config'
176147

177148
# Setup cluster user
178149
user node['cfncluster']['cfn_cluster_user'] do

recipes/base_install.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -203,7 +203,7 @@
203203
end
204204

205205
# Install Ganglia
206-
include_recipe "aws-parallelcluster::_ganglia_install"
206+
include_recipe "aws-parallelcluster::ganglia_install"
207207

208208
# Install NVIDIA and CUDA
209209
include_recipe "aws-parallelcluster::_nvidia_install"

recipes/ganglia_config.rb

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
# frozen_string_literal: true
2+
3+
#
4+
# Cookbook Name:: aws-parallelcluster
5+
# Recipe:: ganglia_config
6+
#
7+
# Copyright 2013-2016 Amazon.com, Inc. or its affiliates. All Rights Reserved.
8+
#
9+
# Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance with the
10+
# License. A copy of the License is located at
11+
#
12+
# http://aws.amazon.com/apache2.0/
13+
#
14+
# or in the "LICENSE.txt" file accompanying this file. This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES
15+
# OR CONDITIONS OF ANY KIND, express or implied. See the License for the specific language governing permissions and
16+
# limitations under the License.
17+
18+
if node['cfncluster']['ganglia_enabled'] == 'yes'
19+
case node['cfncluster']['cfn_node_type']
20+
when 'MasterServer'
21+
case node['platform']
22+
when "redhat", "centos", "amazon", "scientific" # ~FC024
23+
cookbook_file 'ganglia-webfrontend.conf' do
24+
path '/etc/httpd/conf.d/ganglia.conf'
25+
user 'root'
26+
group 'root'
27+
mode '0644'
28+
end
29+
when "ubuntu"
30+
directory '/var/lib/ganglia/rrds' do
31+
owner 'ganglia'
32+
group 'ganglia'
33+
mode 0755
34+
recursive true
35+
action :create
36+
end
37+
38+
# Setup ganglia-web.conf apache config
39+
execute "copy ganglia apache conf" do
40+
command "cp /etc/ganglia-webfrontend/apache.conf /etc/apache2/sites-enabled/ganglia.conf"
41+
not_if "test -f /etc/apache2/sites-enabled/ganglia.conf"
42+
end
43+
end
44+
45+
template '/etc/ganglia/gmetad.conf' do
46+
source 'gmetad.conf.erb'
47+
owner 'root'
48+
group 'root'
49+
mode '0644'
50+
end
51+
52+
service "gmetad" do
53+
supports restart: true
54+
action %i[enable restart]
55+
end
56+
57+
service node['cfncluster']['ganglia']['httpd_service'] do
58+
supports restart: true, reload: true
59+
action %i[enable restart]
60+
end
61+
end
62+
63+
# For ComputeFleet and MasterServer
64+
65+
if node['platform_family'] == 'rhel' && node['platform_version'].to_i == 7 || node['platform'] == 'amazon' && node['platform_version'].to_i == 2
66+
# Fix circular dependency multi-user.target -> cloud-init-> gmond -> multi-user.target
67+
# gmond is started by chef during cloud-init, but gmond service is configured to start after multi-user.target
68+
# which doesn't start until cloud-init run is finished. So gmond service is stuck into starting, which keep
69+
# hanging chef until the 600s timeout.
70+
replace_or_add "change gmond service dependency" do
71+
path "/usr/lib/systemd/system/gmond.service"
72+
pattern "After=multi-user.target"
73+
line "After=network.target"
74+
replace_only true
75+
end
76+
end
77+
78+
template '/etc/ganglia/gmond.conf' do
79+
source 'gmond.conf.erb'
80+
owner 'root'
81+
group 'root'
82+
mode '0644'
83+
end
84+
85+
service node['cfncluster']['ganglia']['gmond_service'] do
86+
supports restart: true
87+
action %i[enable restart]
88+
end
89+
end

recipes/ganglia_install.rb

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
# frozen_string_literal: true
2+
3+
#
4+
# Cookbook Name:: aws-parallelcluster
5+
# Recipe:: ganglia_install
6+
#
7+
# Copyright 2013-2016 Amazon.com, Inc. or its affiliates. All Rights Reserved.
8+
#
9+
# Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance with the
10+
# License. A copy of the License is located at
11+
#
12+
# http://aws.amazon.com/apache2.0/
13+
#
14+
# or in the "LICENSE.txt" file accompanying this file. This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES
15+
# OR CONDITIONS OF ANY KIND, express or implied. See the License for the specific language governing permissions and
16+
# limitations under the License.
17+
18+
case node['cfncluster']['cfn_node_type']
19+
when 'MasterServer', nil
20+
case node['platform']
21+
when "redhat", "centos", "amazon", "scientific" # ~FC024
22+
package %w[ganglia ganglia-gmond ganglia-gmetad ganglia-web httpd php php-gd rrdtool] do
23+
retries 3
24+
retry_delay 5
25+
end
26+
when "ubuntu"
27+
package %w[ganglia-monitor rrdtool gmetad ganglia-webfrontend] do
28+
retries 3
29+
retry_delay 5
30+
end
31+
end
32+
when 'ComputeFleet'
33+
case node['platform']
34+
when "redhat", "centos", "amazon", "scientific" # ~FC024
35+
package %w[ganglia-gmond] do
36+
retries 3
37+
retry_delay 5
38+
end
39+
when "ubuntu"
40+
package %w[ganglia-monitor] do
41+
retries 3
42+
retry_delay 5
43+
end
44+
end
45+
end

0 commit comments

Comments
 (0)