Skip to content

Commit 4578f8e

Browse files
Rex Chenfnubalaj
authored andcommitted
Changes to Support RAID
- Added defaults for raid related parameters in attributes/default.rb - Added mdadm package to list of base packages in attributes/default.rb - Added recipe setup_raid_on_master.rb to handle RAID array setup on master, including parsing RAID related parameters and executing RAID mounting instructions - Changed _master_base_config.rb to include setup_raid_on_master - Changed _compute_base_config.rb to parse RAID related info and mount RAID array via NFS Signed-off-by: Rex Chen <[email protected]>
1 parent 1eaeddf commit 4578f8e

File tree

4 files changed

+162
-4
lines changed

4 files changed

+162
-4
lines changed

attributes/default.rb

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -73,13 +73,13 @@
7373
libXmu-devel hwloc-devel db4-devel tcl-devel automake autoconf pyparted libtool
7474
httpd boost-devel redhat-lsb mlocate mpich-devel openmpi-devel R atlas-devel
7575
blas-devel fftw-devel libffi-devel openssl-devel dkms mysql-devel libedit-devel
76-
libical-devel postgresql-devel postgresql-server sendmail]
76+
libical-devel postgresql-devel postgresql-server sendmail mdadm]
7777
if node['platform_version'].to_i >= 7
7878
default['cfncluster']['base_packages'] = %w[vim ksh tcsh zsh openssl-devel ncurses-devel pam-devel net-tools openmotif-devel
7979
libXmu-devel hwloc-devel libdb-devel tcl-devel automake autoconf pyparted libtool
8080
httpd boost-devel redhat-lsb mlocate lvm2 mpich-devel openmpi-devel R atlas-devel
8181
blas-devel fftw-devel libffi-devel openssl-devel dkms mariadb-devel libedit-devel
82-
libical-devel postgresql-devel postgresql-server sendmail libxml2-devel libglvnd-devel]
82+
libical-devel postgresql-devel postgresql-server sendmail libxml2-devel libglvnd-devel mdadm]
8383
end
8484
default['cfncluster']['kernel_devel_pkg']['name'] = "kernel-lt-devel" if node['platform'] == 'centos' && node['platform_version'].to_i >= 6 && node['platform_version'].to_i < 7
8585
default['cfncluster']['rhel']['extra_repo'] = 'rhui-REGION-rhel-server-releases-optional' if node['platform'] == 'redhat' && node['platform_version'].to_i >= 6 && node['platform_version'].to_i < 7
@@ -90,7 +90,7 @@
9090
libXmu-devel hwloc-devel db4-devel tcl-devel automake autoconf pyparted libtool
9191
httpd boost-devel redhat-lsb mlocate mpich-devel openmpi-devel R atlas-devel fftw-devel
9292
libffi-devel openssl-devel dkms mysql-devel libedit-devel postgresql-devel postgresql-server
93-
sendmail cmake byacc libglvnd-devel]
93+
sendmail cmake byacc libglvnd-devel mdadm]
9494
end
9595

9696
default['cfncluster']['ganglia']['apache_user'] = 'apache'
@@ -107,7 +107,7 @@
107107
tcl-dev automake autoconf python-parted libtool librrd-dev libapr1-dev libconfuse-dev
108108
apache2 libboost-dev libdb-dev tcsh libssl-dev libncurses5-dev libpam0g-dev libxt-dev
109109
libmotif-dev libxmu-dev libxft-dev libhwloc-dev man-db lvm2 libmpich-dev libopenmpi-dev
110-
r-base libatlas-dev libblas-dev libfftw3-dev libffi-dev libssl-dev libxml2-dev]
110+
r-base libatlas-dev libblas-dev libfftw3-dev libffi-dev libssl-dev libxml2-dev mdadm]
111111
if Chef::VersionConstraint.new('< 16.04').include?(node['platform_version'])
112112
default['cfncluster']['kernel_devel_pkg']['name'] = "linux-image-extra"
113113
default['cfncluster']['kernel_devel_pkg']['version'] = node['kernel']['release']
@@ -159,3 +159,5 @@
159159
default['cfncluster']['cfn_cluster_user'] = 'ec2-user'
160160
default['cfncluster']['custom_node_package'] = nil
161161
default['cfncluster']['custom_awsbatchcli_package'] = nil
162+
default['cfncluster']['cfn_raid_parameters'] = "NONE"
163+
default['cfncluster']['cfn_raid_vol_ids'] = nil

recipes/_compute_base_config.rb

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,27 @@
2929
# Mount EFS directory with efs_mount recipe
3030
include_recipe 'aws-parallelcluster::efs_mount'
3131

32+
# Parse and get RAID shared directory info and turn into an array
33+
raid_shared_dir = node['cfncluster']['cfn_raid_parameters'].split(',')[0]
34+
35+
if raid_shared_dir != "NONE"
36+
# Created RAID shared mount point
37+
directory raid_shared_dir do
38+
mode '1777'
39+
owner 'root'
40+
group 'root'
41+
action :create
42+
end
43+
44+
# Mount RAID directory over NFS
45+
mount raid_shared_dir do
46+
device "#{nfs_master}:#{raid_shared_dir}"
47+
fstype 'nfs'
48+
options 'hard,intr,noatime,vers=3,_netdev'
49+
action %i[mount enable]
50+
end
51+
end
52+
3253
# Mount /home over NFS
3354
mount '/home' do
3455
device "#{nfs_master}:/home"

recipes/_master_base_config.rb

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,9 @@
115115
options ['no_root_squash']
116116
end
117117

118+
# Setup RAID array on master node
119+
include_recipe 'aws-parallelcluster::setup_raid_on_master'
120+
118121
# Configure Ganglia on the Master
119122
if node['cfncluster']['ganglia_enabled'] == 'yes'
120123
template '/etc/ganglia/gmetad.conf' do

recipes/setup_raid_on_master.rb

Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,132 @@
1+
#
2+
# Cookbook Name:: aws-parallelcluster
3+
# Recipe:: setup_raid_on_master
4+
#
5+
# Copyright 2013-2015 Amazon.com, Inc. or its affiliates. All Rights Reserved.
6+
#
7+
# Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance with the
8+
# License. A copy of the License is located at
9+
#
10+
# http://aws.amazon.com/apache2.0/
11+
#
12+
# or in the "LICENSE.txt" file accompanying this file. This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES
13+
# OR CONDITIONS OF ANY KIND, express or implied. See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
16+
# RAID RELATED
17+
# Parse and get RAID shared directory info and turn into an array
18+
raid_shared_dir = node['cfncluster']['cfn_raid_parameters'].split(',')[0]
19+
20+
if raid_shared_dir != "NONE"
21+
22+
# Parse and determine RAID type (cast into integer)
23+
raid_type = node['cfncluster']['cfn_raid_parameters'].split(',')[1].strip.to_i
24+
25+
# Parse volume info into an array
26+
raid_vol_array = node['cfncluster']['cfn_raid_vol_ids'].split(',')
27+
raid_vol_array.each_with_index do |vol, index|
28+
raid_vol_array[index] = vol.strip
29+
end
30+
31+
# Attach each volume
32+
raid_dev_path = []
33+
raid_vol_array.each_with_index do |volumeid, index|
34+
raid_dev_path[index] = "/dev/disk/by-ebs-volumeid/#{volumeid}"
35+
36+
# Attach RAID EBS volume
37+
execute "attach_raid_volume_#{index}" do
38+
command "/usr/local/sbin/attachVolume.py #{volumeid}"
39+
creates raid_dev_path[index]
40+
end
41+
42+
# wait for the drive to attach
43+
ruby_block "sleeping_for_raid_volume_#{index}" do
44+
block do
45+
wait_for_block_dev(raid_dev_path[index])
46+
puts "Attached index: #{index}, VolID: #{volumeid}"
47+
end
48+
action :nothing
49+
subscribes :run, "execute[attach_raid_volume_#{index}]", :immediately
50+
end
51+
end
52+
53+
raid_dev = "/dev/md0"
54+
55+
# Create RAID device with mdadm
56+
mdadm "MY_RAID" do
57+
raid_device raid_dev
58+
level raid_type
59+
devices raid_dev_path
60+
end
61+
62+
# Wait for RAID to initialize
63+
ruby_block "sleeping_for_raid_block" do
64+
block do
65+
wait_for_block_dev(raid_dev)
66+
end
67+
action :nothing
68+
subscribes :run, "mdadm[MY_RAID]", :immediately
69+
end
70+
71+
# Setup RAID disk, create ext4 filesystem on RAID array
72+
execute "setup_raid_disk" do
73+
command "sudo mkfs.ext4 #{raid_dev}"
74+
action :nothing
75+
subscribes :run, "ruby_block[sleeping_for_raid_block]", :immediately
76+
end
77+
78+
79+
# Create a configuration file to contain the RAID info, so the RAID array is reassembled automatically on boot
80+
if node['cfncluster']['cfn_base_os'] != "ubuntu1404"
81+
execute "create_raid_config" do
82+
command "sudo mdadm --detail --scan | sudo tee -a /etc/mdadm.conf"
83+
action :nothing
84+
subscribes :run, "execute[setup_raid_disk]", :immediately
85+
end
86+
87+
else
88+
# Put config file in /etc/mdadm/mdadm.conf, Ubuntu1404 specific
89+
execute "create_raid_config" do
90+
command "sudo mdadm --detail --scan | sudo tee -a /etc/mdadm/mdadm.conf"
91+
action :nothing
92+
subscribes :run, "execute[setup_raid_disk]", :immediately
93+
end
94+
# Update initramfs to contain mdadm.conf settings, Ubuntu1404 specific
95+
execute "update_raid_config" do
96+
command "sudo update-initramfs -u"
97+
action :nothing
98+
subscribes :run, "execute[setup_raid_disk]", :immediately
99+
end
100+
end
101+
102+
# Create the shared directory
103+
directory raid_shared_dir do
104+
owner 'root'
105+
group 'root'
106+
mode '1777'
107+
recursive true
108+
action :create
109+
end
110+
111+
# Add volume to /etc/fstab
112+
mount raid_shared_dir do
113+
device "/dev/md0"
114+
fstype "ext4"
115+
options "defaults,nofail,_netdev"
116+
action %i[mount enable]
117+
end
118+
119+
# Make sure shared directory permissions are correct
120+
directory raid_shared_dir do
121+
owner 'root'
122+
group 'root'
123+
mode '1777'
124+
end
125+
126+
# Export RAID directory via nfs
127+
nfs_export raid_shared_dir do
128+
network node['cfncluster']['ec2-metadata']['vpc-ipv4-cidr-block']
129+
writeable true
130+
options ['no_root_squash']
131+
end
132+
end

0 commit comments

Comments
 (0)