Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
25ff302
Updating.
kbooker79 Oct 9, 2023
6ca0acb
Updating with new AMI information relevant to the ACIO sandbox.
kbooker79 Oct 10, 2023
b060a75
Updating image to use AWS Parallelcluster base rather than vanilla ub…
kbooker79 Oct 10, 2023
3e87ed9
feature/add-acio-jumphost, adding Cloudformation template for ACIO Ju…
kbooker79 Dec 4, 2023
e90980c
feature/add-acio-jumphost, adding new IAM policies and public key fro…
kbooker79 Dec 5, 2023
e8b410e
feature/add-acio-jumphost, updating to allow new private key generation.
kbooker79 Dec 5, 2023
de11903
Update README.md
Byte-Code-Connoisseur Jan 24, 2024
42a57b2
Create image_build_container.sh
Byte-Code-Connoisseur Jan 24, 2024
7718551
Delete image_build_container.sh
Byte-Code-Connoisseur Jan 24, 2024
7b6feed
Create image_build_container_ams24.sh
Byte-Code-Connoisseur Jan 24, 2024
291c33d
Update image_build_container_ams24.sh
Byte-Code-Connoisseur Jan 24, 2024
85ae74f
Update image_build_container_ams24.sh
Byte-Code-Connoisseur Jan 24, 2024
acab771
Update image_build_container_ams24.sh
Byte-Code-Connoisseur Jan 24, 2024
aa1e03f
Update image_build_container_ams24.sh
Byte-Code-Connoisseur Jan 24, 2024
69a167f
correct miniconda3 modulefile local variable
natalie-perlin Jan 24, 2024
035d6dc
Update image_build_container_ams24.sh
Byte-Code-Connoisseur Jan 24, 2024
e4311b6
Update srw-cluster-start-script.sh
Byte-Code-Connoisseur Jan 25, 2024
31c859c
Update cluster_start_script_v3.sh
Byte-Code-Connoisseur Jan 25, 2024
3a1bd0e
Update image_build_container_ams24.sh
Byte-Code-Connoisseur Jan 30, 2024
0b074d3
Update srwcluster_nodeconfig_v1.yaml
Byte-Code-Connoisseur Mar 4, 2024
3c9c7d8
Update generateClusters.sh
Byte-Code-Connoisseur Mar 4, 2024
19e8d9f
Merge branch 'main' into feature/add-acio-jumphost
Byte-Code-Connoisseur Apr 25, 2024
3bfffcd
Create test.py
Byte-Code-Connoisseur Jun 17, 2024
66d00ed
Update srwcluster_nodeconfig_v1.yaml
kevenmblackman Jul 16, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,6 @@ packer build srw-cluster.pkr.hcl -var "date=4May2023"
### AWS LandDA configuration:
vi srw-cluster.pkr.hcl #edit line 141: from srw-cluster-start-script.sh to landda-cluster-container-start-script.sh
packer build srw-cluster.pkr.hcl -var "date=4May2023"

### Required Software
Version 3.7.1 of AWS ParallelCluster
1,023 changes: 1,023 additions & 0 deletions scripts/bastion-jumphost.template.yaml

Large diffs are not rendered by default.

3 changes: 3 additions & 0 deletions scripts/deployment/cluster_start_script_v3.sh
Original file line number Diff line number Diff line change
Expand Up @@ -32,3 +32,6 @@ chown -R ubuntu /home/ubuntu/ufs-srweather-app

echo 'Deleting crontab entries'
crontab -u ubuntu -r

echo 'Installing Bastion Key'
aws ssm get-parameter --region us-east-1 --name bastion_public_key | jq -r .Parameter.Value >> ~/.ssh/authorized_keys
4 changes: 2 additions & 2 deletions scripts/deployment/generateClusters.sh
100644 → 100755
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
for i in $(seq 1 1 1)
do
pcluster create-cluster --region us-east-1 --cluster-name srwv2-cluster-$i --cluster-configuration srwcluster_nodeconfig_v1.yaml --rollback-on-failure false --debug
done
pcluster create-cluster --region us-east-1 --cluster-name srwv22-cluster-$i --cluster-configuration srwcluster_nodeconfig_v1.yaml --rollback-on-failure false --debug
done
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
###install go###
wget https://go.dev/dl/go1.21.6.linux-amd64.tar.gz
tar -xvf go1.21.6.linux-amd64.tar.gz
cd go
export PATH=$PATH:/home/ubuntu/go/bin
export GOPATH=/home/ubuntu/go
export GOBIN=/home/ubuntu/go/bin

###Install singularity###
cd /home/ubuntu
wget https://github.com/sylabs/singularity/releases/download/v3.11.0/singularity-ce-3.11.0.tar.gz
tar -xzf singularity-ce-3.11.0.tar.gz
cd singularity-ce-3.11.0/
sudo apt-get install libseccomp-dev
sudo apt-get update
sudo apt-get install libglib2.0-dev
./mconfig && make -C ./builddir && sudo make -C ./builddir install

###Build the container image###
cd /home/ubuntu
sudo singularity build --sandbox ubuntu20.04-intel-srwapp docker://noaaepic/ubuntu20.04-intel-srwapp:release-public-v2.2.0

###Upgrade lmod/Lua###
cd /home/ubuntu
sudo apt install lua5.3
sudo apt remove lua5.2
wget https://sourceforge.net/projects/lmod/files/Lmod-8.6.tar.bz2
tar xvfj Lmod-8.6.tar.bz2
cd Lmod-8.6
./configure --prefix=/opt/apps
sudo make install
source /opt/apps/lmod/lmod/init/bash

###Install ruby and ruby-dev###
cd /home/ubuntu
sudo apt-get install ruby
sudo apt-get install ruby-dev

###Install miniconda###
cd /home/ubuntu
git clone -b feature/ufs_srw_public_2.2.0 https://github.com/NOAA-EPIC/miniconda3.git
cd miniconda3/
sed -i "s|lustre|home\/ubuntu|g" miniconda3template.lua
./miniconda3_install.sh /home/ubuntu/miniconda3 4.12.0
./miniconda3_regional_workflow_env.sh /home/ubuntu/miniconda3 4.12.0
./miniconda3_workflow_tools_env.sh /home/ubuntu/miniconda3 4.12.0
./miniconda3_regional_workflow_cmaq_env.sh /home/ubuntu/miniconda3 4.12.0
# Load the module:
module use /home/ubuntu/miniconda3/modulefiles
module load miniconda3/4.12.0
cd /home/ubuntu/miniconda3/4.12.0/lib/
mv libtinfo.so.6 libtinfo.so.6_bac

###Install rocoto###
cd /home/ubuntu
PREFIX="/home/ubuntu/rocoto"
mkdir -p $PREFIX && cd $PREFIX
git clone -b 1.3.6 https://github.com/christopherwharrop/rocoto.git 1.3.6
cd 1.3.6
./INSTALL 2>&1 | tee rocoto-1.3.6.install.log
# Prepare a modulefile for rocoto
cd $PREFIX
export ROCOTOBIN=$PREFIX/1.3.6/bin
export ROCOTOLIB=$PREFIX/1.3.6/lib
mkdir $PREFIX/modulefiles
mkdir $PREFIX/modulefiles/rocoto
touch $PREFIX/modulefiles/rocoto/1.3.6.lua
cat > modulefiles/rocoto/1.3.6.lua << EOF
help([[
Set environment variables for rocoto workflow manager)
]])

-- Make sure another version of the same package is not already loaded
conflict("rocoto")

-- Set environment variables
prepend_path("PATH","$ROCOTOBIN")
prepend_path("LD_LIBRARY_PATH","$ROCOTOLIB")
EOF
# Verify the module could be loaded:
module use /$PREFIX/modulefiles
module load rocoto/1.3.6

###Add needed data###
cd /scratch
wget https://noaa-ufs-srw-pds.s3.amazonaws.com/current_srw_release_data/fix_data.tgz
tar xfz fix_data.tgz
wget https://noaa-ufs-srw-pds.s3.amazonaws.com/current_srw_release_data/gst_data.tgz
tar xfz gst_data.tgz
# After untaring the files, directories ./fix and ./input_model_data
13 changes: 7 additions & 6 deletions scripts/deployment/srwcluster_nodeconfig_v1.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,11 @@
Region: us-east-1
Image:
Os: ubuntu2004
CustomAmi: ami-030cf29e6d5d8724a #Must modify
CustomAmi: ami-08100890884d98d9d #Must modify
HeadNode:
InstanceType: c5.2xlarge
Networking:
SubnetId: ami-030cf29e6d5d8724a #Must modify
SubnetId: subnet-078043a467c391dfd #Must modify (public subnet)
Ssh:
KeyName: epic_workshop
LocalStorage:
Expand All @@ -18,10 +18,11 @@ HeadNode:
Throughput: 1000
Iam:
AdditionalIamPolicies:
- Policy: arn:aws:iam::aws:policy/AmazonS3FullAccess
- Policy: arn:aws:iam::aws:policy/AmazonS3FullAccess
- Policy: arn:aws:iam::aws:policy/AmazonSSMManagedInstanceCore
CustomActions:
OnNodeConfigured:
Script: s3://epic.sandbox.srw/cluster_start_script_v3.sh #Must modify
Script: s3://epic-srw-sandbox/cluster_start_script_v3.sh #Must modify
Scheduling:
Scheduler: slurm
SlurmSettings:
Expand All @@ -45,13 +46,13 @@ Scheduling:
MaxCount: 2
Networking:
SubnetIds:
- subnet-04d911e4b55853ef7 #Must modify
- subnet-07bd52a747ba1e525 #Must modify
AssignPublicIp: true
PlacementGroup:
Enabled: true
CustomActions:
OnNodeConfigured:
Script: s3://epic.sandbox.srw/cluster_start_script_v3.sh #Must modify
Script: s3://epic-srw-sandbox/cluster_start_script_v3.sh #Must modify
SharedStorage:
- MountDir: /scratch
Name: ebs
Expand Down
91 changes: 91 additions & 0 deletions scripts/srw-cluster-start-script.sh
Original file line number Diff line number Diff line change
Expand Up @@ -137,3 +137,94 @@ cd /opt/ufs-srweather-app/build
cmake -DCMAKE_CXX_COMPILER=mpiicpc -DCMAKE_C_COMPILER=mpiicc -DCMAKE_FC_COMPILER=mpiifort -DCMAKE_INSTALL_PREFIX=.. ..
make -j 8
echo "UFS SRW App build completed!"
###install go###
echo 'Installing Go'
wget https://go.dev/dl/go1.21.6.linux-amd64.tar.gz
tar -xvf go1.21.6.linux-amd64.tar.gz
cd go
export PATH=$PATH:/home/ubuntu/go/bin
export GOPATH=/home/ubuntu/go
export GOBIN=/home/ubuntu/bin
###Install singularity###
echo 'Installing Singularity'
cd /home/ubuntu
wget https://github.com/sylabs/singularity/releases/download/v3.11.0/singularity-ce-3.11.0.tar.gz
tar -xzf singularity-ce-3.11.0.tar.gz
cd singularity-ce-3.11.0/
sudo apt-get install libseccomp-dev
sudo apt-get update
sudo apt-get install libglib2.0-dev
./mconfig && make -C ./builddir && sudo make -C ./builddir install
###Build the container image###
echo 'Installing Container'
cd /home/ubuntu
sudo singularity build --sandbox ubuntu20.04-intel-srwapp docker://noaaepic/ubuntu20.04-intel-srwapp:release-public-v2.2.0
###Upgrade lmod/Lua###
echo 'Installing Lmod/Lua'
cd /home/ubuntu
sudo apt install lua5.3
sudo apt remove lua5.2
wget https://sourceforge.net/projects/lmod/files/Lmod-8.6.tar.bz2
tar xvfj Lmod-8.6.tar.bz2
cd Lmod-8.6
./configure --prefix=/opt/apps
sudo make install
source /opt/apps/lmod/lmod/init/bash
###Install ruby and ruby-dev###
echo 'Installing Ruby'
cd /home/ubuntu
sudo apt-get install ruby
sudo apt-get install ruby-dev
###Install miniconda###
echo 'Installing Miniconda'
cd /home/ubuntu
git clone -b feature/ufs_srw_public_2.2.0 https://github.com/NOAA-EPIC/miniconda3.git
cd miniconda3/
sed -i "s|lustre|home\/ubuntu|g" miniconda3template.lua
./miniconda3_install.sh /home/ubuntu/miniconda3 4.12.0
./miniconda3_regional_workflow_env.sh /home/ubuntu/miniconda3 4.12.0
./miniconda3_workflow_tools_env.sh /home/ubuntu/miniconda3 4.12.0
./miniconda3_regional_workflow_cmaq_env.sh /home/ubuntu/miniconda3 4.12.0
# Load the module:
module use /home/ubuntu/miniconda3/modulefiles
module load miniconda3/4.12.0
cd /home/ubuntu/miniconda3/4.12.0/lib/
mv libtinfo.so.6 libtinfo.so.6_bac
###Install rocoto###
echo 'Installing rocoto'
cd /home/ubuntu
PREFIX="/home/ubuntu/rocoto"
mkdir -p $PREFIX && cd $PREFIX
git clone -b 1.3.6 https://github.com/christopherwharrop/rocoto.git 1.3.6
cd 1.3.6
./INSTALL 2>&1 | tee rocoto-1.3.6.install.log
# Prepare a modulefile for rocoto
cd $PREFIX
export ROCOTOBIN=$PREFIX/1.3.6/bin
export ROCOTOLIB=$PREFIX/1.3.6/lib
mkdir $PREFIX/modulefiles
mkdir $PREFIX/modulefiles/rocoto
touch $PREFIX/modulefiles/rocoto/1.3.6.lua
cat > modulefiles/rocoto/1.3.6.lua << EOF
help([[
Set environment variables for rocoto workflow manager)
]])

-- Make sure another version of the same package is not already loaded
conflict("rocoto")

-- Set environment variables
prepend_path("PATH","$ROCOTOBIN")
prepend_path("LD_LIBRARY_PATH","$ROCOTOLIB")
EOF
# Verify the module could be loaded:
module use /$PREFIX/modulefiles
module load rocoto/1.3.6
###Add needed data###
echo 'Installing Data'
cd /data
wget https://noaa-ufs-srw-pds.s3.amazonaws.com/current_srw_release_data/fix_data.tgz
tar xfz fix_data.tgz
wget https://noaa-ufs-srw-pds.s3.amazonaws.com/current_srw_release_data/gst_data.tgz
tar xfz gst_data.tgz
# After untaring the files, directories ./fix and ./input_model_data
15 changes: 10 additions & 5 deletions srw-cluster.pkr.hcl
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@
# Variables for AWS builders
###

locals {
now = formatdate("YYYYMMDD-hhmmss", timestamp())
}

#Add multiple regions: default = ["us-east-1","us-east-2"]
variable "aws_ami_regions" {
description = "List of regions to copy the AMIs to. Tags and attributes are copied along with the AMIs"
Expand Down Expand Up @@ -46,9 +50,9 @@ variable "aws_source_ami_filter_ubuntu_2004_hvm" {
owners = list(string)
})
default = {
name = "ubuntu/images/hvm-ssd/ubuntu-focal-20.04-amd64-server-*"
name = "aws-parallelcluster-3.7.1-ubuntu-2004-lts-hvm-x86_64-202309151532 2023-09-15T15-36-35.608Z"
owners = [
"099720109477"
"247102896272"
]
}
}
Expand All @@ -59,6 +63,7 @@ variable "aws_temporary_security_group_source_cidrs" {
default = ["0.0.0.0/0"]
}


###
# Variables for Azure builders
###
Expand Down Expand Up @@ -88,7 +93,7 @@ variable "root_volume_size" {
###

source "amazon-ebs" "base" {
ami_name = "srw-cluster-{{date}}.x86_64-gp3"
ami_name = "srw-cluster-${local.now}.x86_64-gp3"
ami_regions = var.aws_ami_regions
ami_users = var.aws_ami_users
ami_groups = var.aws_ami_groups
Expand All @@ -111,8 +116,8 @@ source "amazon-ebs" "base" {
ssh_pty = true
ssh_timeout = "60m"
ssh_username = var.aws_ssh_username
subnet_id = "subnet-04bae583ce498ab48"
tags = { Name = "SRW-Cluster-{{date}}" }
subnet_id = "subnet-04d911e4b55853ef7"
tags = { Name = "SRW-Cluster-${local.now}" }
temporary_security_group_source_cidrs = var.aws_temporary_security_group_source_cidrs
}

Expand Down
1 change: 1 addition & 0 deletions test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
import pygrib