diff --git a/Capfile b/Capfile index 2c768ee408..723e6c3b84 100644 --- a/Capfile +++ b/Capfile @@ -28,11 +28,6 @@ require 'capistrano/rails/migrations' require 'capistrano/data_migrate' require 'capistrano/yarn' require 'capistrano/puma' -install_plugin Capistrano::Puma # Default puma tasks -install_plugin Capistrano::Puma::Systemd - -install_plugin Capistrano::Sidekiq -install_plugin Capistrano::Sidekiq::Systemd # Load custom tasks from `lib/capistrano/tasks` if you have any defined Dir.glob('lib/capistrano/tasks/*.rake').each { |r| import r } diff --git a/Gemfile b/Gemfile index 9daaf6754e..ed0a6fb18a 100644 --- a/Gemfile +++ b/Gemfile @@ -43,7 +43,7 @@ gem 'aws-sdk-s3', '~> 1.208' gem 'aws-sdk-sns' gem 'bootsnap', require: false gem 'charlock_holmes', '~> 0.7.9' -gem 'commonmarker', '~> 2.4' +gem 'commonmarker', '~> 2.6.3' gem 'concurrent-ruby', '~> 1.3.4' gem 'daemons', '~> 1.4.1' gem 'database_cleaner', '~> 2.0.1' diff --git a/Gemfile.lock b/Gemfile.lock index ef898aff72..1ef5d9f310 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -207,10 +207,9 @@ GEM execjs coffee-script-source (1.12.2) colorize (1.1.0) - commonmarker (2.4.1) - rb_sys (~> 0.9) - commonmarker (2.4.1-x86_64-darwin) - commonmarker (2.4.1-x86_64-linux) + commonmarker (2.6.3-arm64-darwin) + commonmarker (2.6.3-x86_64-darwin) + commonmarker (2.6.3-x86_64-linux) concurrent-ruby (1.3.5) connection_pool (2.5.3) crack (1.0.0) @@ -544,13 +543,10 @@ GEM zeitwerk (~> 2.6) rainbow (3.1.1) rake (13.2.1) - rake-compiler-dock (1.9.1) rb-fsevent (0.11.2) rb-inotify (0.11.1) ffi (~> 1.0) rb-readline (0.5.5) - rb_sys (0.9.117) - rake-compiler-dock (= 1.9.1) rdoc (6.1.2.1) react-rails (2.6.2) babel-transpiler (>= 0.7.0) @@ -803,7 +799,7 @@ DEPENDENCIES charlock_holmes (~> 0.7.9) coffee-rails (~> 5.0) colorize - commonmarker (~> 2.4) + commonmarker (~> 2.6.3) concurrent-ruby (~> 1.3.4) daemons (~> 1.4.1) data_migrate diff --git a/app/services/stash_engine/deep_storage_file_validation_service.rb b/app/services/stash_engine/deep_storage_file_validation_service.rb index e3107d381d..1114f33a20 100644 --- a/app/services/stash_engine/deep_storage_file_validation_service.rb +++ b/app/services/stash_engine/deep_storage_file_validation_service.rb @@ -16,8 +16,14 @@ def validate_file storage = head.storage_class # Request restore if Glacier/Deep Archive + if storage == 'GLACIER_IR' + # Validate checksums and size + process_file + return + end + begin - if %w[GLACIER DEEP_ARCHIVE GLACIER_IR].include?(storage) + if %w[GLACIER DEEP_ARCHIVE].include?(storage) puts "Requesting restore for #{@key} (Deep Archive)" @s3.restore_object( bucket: @bucket, @@ -32,7 +38,7 @@ def validate_file # Wait until restoration is complete wait_for_restore - # Generate checksums + # Validate checksums and size process_file end diff --git a/config/deploy.rb b/config/deploy.rb index 904dfef406..5af4f2a938 100644 --- a/config/deploy.rb +++ b/config/deploy.rb @@ -6,21 +6,30 @@ set :rails_env, ENV['RAILS_ENV'] || 'production' set :repo_url, ENV['REPO_URL'] || 'https://github.com/datadryad/dryad-app.git' set :branch, ENV['BRANCH'] || 'main' +set :role, ENV['ROLE'] || 'app' set :application, 'dryad' set :default_env, { path: "$PATH" } # Gets the current Git tag and revision set :version_number, `git describe --tags` +set :migration_role, fetch(:role) +set :log_level, :debug -set :migration_role, :app +if fetch(:role).to_s == 'worker' + # disable asset compilation + Rake::Task["deploy:assets:precompile"].clear -set :log_level, :debug + # disable migrations + Rake::Task["deploy:migrate"].clear +end # this copies these files over from shared, but only the files that exist on that machine set :optional_shared_files, %w{ config/master.key } +set :puma_service_unit_name, 'puma' +set :puma_systemctl_user, :system set :sidekiq_systemctl_user, :system # Default value for linked_dirs is [] @@ -41,20 +50,15 @@ after :deploy, 'git:version' after :deploy, 'cleanup:remove_example_configs' after 'deploy:symlink:linked_dirs', 'deploy:files:optional_copied_files' - after 'deploy:published', 'sidekiq:restart' end -set :puma_service_unit_name, 'puma' -set :puma_systemctl_user, :system - -namespace :puma do - after :restart, :index_help_center -end +after :deploy, 'puma:restart_if_exists' +after :deploy, 'sidekiq:restart_if_exists' namespace :git do desc "Add the version file so that we can display the git version in the footer" task :version do - on roles(:app), wait: 1 do + on roles(:app, :worker), wait: 1 do execute "touch #{release_path}/.version" execute "echo '#{fetch :version_number}' >> #{release_path}/.version" end @@ -64,7 +68,7 @@ namespace :deploy do namespace :files do task :optional_copied_files do - on roles(:app), wait: 1 do + on roles(:app, :worker), wait: 1 do optional_shared_files = fetch(:optional_shared_files, []) optional_shared_files.flatten.each do |file| if test "[ -f #{shared_path}/#{file} ]" @@ -76,11 +80,32 @@ end end +namespace :puma do + task :restart_if_exists do + on roles(:app) do + service = fetch(:puma_service_unit_name, "puma") + + if test("[ -f /etc/systemd/system/#{service}.service ]") || + test("systemctl list-unit-files | grep -q #{service}.service") + execute :sudo, :systemctl, :restart, "#{service}.service" + else + info "Puma service #{service} not found, skipping restart" + end + end + end +end + namespace :sidekiq do - task :restart do - on roles(:app), in: :sequence, wait: 5 do - if test("systemctl list-unit-files | grep sidekiq.service") - execute :sudo, :systemctl, :restart, "sidekiq" + task :restart_if_exists do + on roles(:app) do + service = fetch(:sidekiq_service_unit_name, "sidekiq") + + if test("[ -f /etc/systemd/system/#{service}.service ]") || + test("systemctl list-unit-files | grep -q #{service}.service") + execute :sudo, :systemctl, :restart, "#{service}.service" + after :deploy, :index_help_center + else + info "Sidekiq service #{service} not found, skipping restart" end end end @@ -89,7 +114,7 @@ namespace :cleanup do desc "Remove all of the example config files" task :remove_example_configs do - on roles(:app), wait: 1 do + on roles(:app, :worker), wait: 1 do execute "rm -f #{release_path}/config/*.yml.sample" execute "rm -f #{release_path}/config/initializers/*.rb.example" end @@ -97,7 +122,7 @@ end task :index_help_center do - desc "Index help center" + desc "Index help center" on roles(:app) do sleep 10 within release_path do diff --git a/cron/README.md b/cron/README.md index 14dc994622..7296093ec1 100644 --- a/cron/README.md +++ b/cron/README.md @@ -10,6 +10,7 @@ The files deploy with the code (in `/home/ec2-user/deploy/current`), but the log Cron jobs run on one of the following schedules: - every_5.sh <-- Every 5 minutes - daily.sh <-- Every day at 12:00 +- eu_daily.sh <-- Every day at 12:00 - only for EU servers - weekly.sh <-- Every Sunday at 21:00 - monthly.sh <- On the 20th at 19:00 diff --git a/cron/eu_daily.sh b/cron/eu_daily.sh new file mode 100755 index 0000000000..470d14b750 --- /dev/null +++ b/cron/eu_daily.sh @@ -0,0 +1,9 @@ +#!/bin/bash + +: ${1:?"Need to pass in environment (e.g. development, stage, production)"} + +cd /home/ec2-user/deploy/current/ +export RAILS_ENV="$1" + +# Spot check files digests for secondary storage +bundle exec rails checksums:spot_check_secondary_storage_files >> /home/ec2-user/deploy/shared/log/spot_check_secondary_storage_files.log 2>&1 diff --git a/cron/files/eu/cron_daily.service b/cron/files/eu/cron_daily.service new file mode 100644 index 0000000000..de2166f319 --- /dev/null +++ b/cron/files/eu/cron_daily.service @@ -0,0 +1,10 @@ +[Unit] +Description=Cron service - daily +Wants=cron_daily.timer + +[Service] +Type=oneshot +ExecStart=/bin/bash -c "/home/ec2-user/deploy/current/cron/eu_daily.sh production >> /home/ec2-user/deploy/shared/log/cron.log 2>&1" + +[Install] +WantedBy=multi-user.target diff --git a/cron/files/cron_daily.timer b/cron/files/eu/cron_daily.timer similarity index 100% rename from cron/files/cron_daily.timer rename to cron/files/eu/cron_daily.timer diff --git a/cron/files/cron_5min.service b/cron/files/us/cron_5min.service similarity index 100% rename from cron/files/cron_5min.service rename to cron/files/us/cron_5min.service diff --git a/cron/files/cron_5min.timer b/cron/files/us/cron_5min.timer similarity index 100% rename from cron/files/cron_5min.timer rename to cron/files/us/cron_5min.timer diff --git a/cron/files/cron_daily.service b/cron/files/us/cron_daily.service similarity index 99% rename from cron/files/cron_daily.service rename to cron/files/us/cron_daily.service index 0440869ddf..3ff301d4a4 100644 --- a/cron/files/cron_daily.service +++ b/cron/files/us/cron_daily.service @@ -1,7 +1,7 @@ [Unit] Description=Cron service - daily Wants=cron_daily.timer - + [Service] Type=oneshot ExecStart=/bin/bash -c "/home/ec2-user/deploy/current/cron/daily.sh production >> /home/ec2-user/deploy/shared/log/cron.log 2>&1" diff --git a/cron/files/us/cron_daily.timer b/cron/files/us/cron_daily.timer new file mode 100644 index 0000000000..f08a1d32a0 --- /dev/null +++ b/cron/files/us/cron_daily.timer @@ -0,0 +1,10 @@ +[Unit] +Description=Cron timer - daily +Requires=cron_daily.service + +[Timer] +OnCalendar=*-*-* 12:00:00 +Unit=cron_daily.service + +[Install] +WantedBy=timers.target diff --git a/cron/files/cron_hourly.service b/cron/files/us/cron_hourly.service similarity index 100% rename from cron/files/cron_hourly.service rename to cron/files/us/cron_hourly.service diff --git a/cron/files/cron_hourly.timer b/cron/files/us/cron_hourly.timer similarity index 100% rename from cron/files/cron_hourly.timer rename to cron/files/us/cron_hourly.timer diff --git a/cron/files/cron_monthly.service b/cron/files/us/cron_monthly.service similarity index 100% rename from cron/files/cron_monthly.service rename to cron/files/us/cron_monthly.service diff --git a/cron/files/cron_monthly.timer b/cron/files/us/cron_monthly.timer similarity index 100% rename from cron/files/cron_monthly.timer rename to cron/files/us/cron_monthly.timer diff --git a/cron/files/cron_weekly.service b/cron/files/us/cron_weekly.service similarity index 100% rename from cron/files/cron_weekly.service rename to cron/files/us/cron_weekly.service diff --git a/cron/files/cron_weekly.timer b/cron/files/us/cron_weekly.timer similarity index 100% rename from cron/files/cron_weekly.timer rename to cron/files/us/cron_weekly.timer diff --git a/cron/weekly.sh b/cron/weekly.sh index 58296aba5b..d1e1fd9914 100755 --- a/cron/weekly.sh +++ b/cron/weekly.sh @@ -32,6 +32,3 @@ fi bundle exec rails cleanup:delete_orphan_records >> /home/ec2-user/deploy/shared/log/delete_orphan_records.log 2>&1 bundle exec rails certbot:check_and_notify >> /home/ec2-user/deploy/shared/log/certbot.log 2>&1 bundle exec rails s3_policies:deleted_files_check >> /home/ec2-user/deploy/shared/log/s3_policies_deleted_files_check.log 2>&1 - -# Spot check files digests for secondary storage -bundle exec rails checksums:spot_check_secondary_storage_files >> /home/ec2-user/deploy/shared/log/spot_check_secondary_storage_files.log 2>&1 diff --git a/documentation/dryad_install.md b/documentation/dryad_install.md index 942ca5e5f3..44f10f4a01 100644 --- a/documentation/dryad_install.md +++ b/documentation/dryad_install.md @@ -144,7 +144,7 @@ of configuration values. For most installations, the value will be `local`. Encrypted credentials: Many of Dryad's configuration files read credentials from the Rails credentials file. Before Rails will run, you must do one of two steps: -- (for Dryad development team) Obtain the credentials encryption key from a Dryad developer and place it in `config/master.key` +- (for Dryad development team) Obtain the credentials encryption key from a Dryad developer and place it in `~/deploy/shared/config/master.key` - (for non-Dryad users of the code) In all files `config/*.yml`, replace the `Rails.application.credentials` statements with your own credentials. diff --git a/documentation/external_services/amazon_aws_ec2_setup.md b/documentation/external_services/amazon_aws_ec2_setup.md index 7121b4020a..5217444286 100644 --- a/documentation/external_services/amazon_aws_ec2_setup.md +++ b/documentation/external_services/amazon_aws_ec2_setup.md @@ -27,6 +27,16 @@ sudo dnf install mysql80-community-release-el9-5.noarch.rpm -y sudo dnf install mysql-community-server -y sudo yum install mysql-devel ``` + +- if `mysql-community-server` or `mysql-devel` can not be found, try this: +``` +sudo rpm --import https://repo.mysql.com/RPM-GPG-KEY-mysql-2022 +wget http://dev.mysql.com/get/mysql80-community-release-el9-5.noarch.rpm +sudo yum localinstall -y mysql80-community-release-el9-5.noarch.rpm +sudo yum install -y mysql-community-server +sudo yum install -y mysql-devel +``` + - check out the Dryad code ``` git clone https://github.com/datadryad/dryad-app.git @@ -56,7 +66,7 @@ bundle install - update the credentials and deploy script for the specified environment ``` mkdir -p ~/deploy/shared/config/credentials/ -# if using a stage or prod environment, put the key in the appropriate place (REPLACE the "stage" with the approppriate key name) +# if using a dev, stage or prod environment, put the key in the appropriate place (REPLACE the "stage" with the approppriate key name) cp stage.key ~/deploy/shared/config/credentials/ cp ~/dryad-app/script/server-utils/deploy_dryad.sh ~/bin/ # EDIT the deploy_dryad.sh to use correct environment name @@ -120,7 +130,7 @@ mysql_stg.sh < myfile.sql AWS RDS database engine update ===================================== -This can be done in 2 ways: +This can be done in two ways: ## Direct update from AWS console - Will update the database engine version to a new version on current database instance diff --git a/lib/tasks/file_validate.rake b/lib/tasks/file_validate.rake index 484c923328..972b2c3b48 100644 --- a/lib/tasks/file_validate.rake +++ b/lib/tasks/file_validate.rake @@ -127,21 +127,23 @@ namespace :checksums do desc 'Spot checks of secondary file replicas' task spot_check_secondary_storage_files: :environment do + files_to_check = 10 # per day + max_size = 10_000_000_000 today = Time.now.utc puts '' - puts "Validating secondary storage checksums for 5 random files #{today}" - # 5 random files which - # have a digest - # are not withdrawn - # are under 1GB - # resource is not in progress + puts "Validating secondary storage checksums for #{files_to_check} random files #{today}" + # 10 random files which + # have a digest + # are not withdrawn + # are under 10GB + # resource is not in progress StashEngine::DataFile .where(file_state: 'created', file_deleted_at: nil) .where.not(digest: nil) - .where(upload_file_size: ..1_000_000_000) + .where(upload_file_size: ..max_size) .joins(resource: :identifier) .where.not(identifier: { pub_state: 'withdrawn' }) - .order(Arel.sql('RAND()')).limit(50) + .order(Arel.sql('RAND()')).limit(files_to_check) .each do |file| next if !file.resource || file.resource.current_state != 'submitted' diff --git a/script/server-utils/deploy_dryad.sh b/script/server-utils/deploy_dryad.sh index 14996c6324..738057f2cd 100755 --- a/script/server-utils/deploy_dryad.sh +++ b/script/server-utils/deploy_dryad.sh @@ -10,8 +10,7 @@ function errexit { } # Arg for git reference (branch or tag) required -if [ $# -ne 1 ]; then errexit "Usage: $(basename $0) "; fi - +if [ $# -lt 1 ] || [ $# -gt 2 ]; then errexit "Usage: $(basename $0) "; fi WORKING_TREE=/home/ec2-user/dryad-app CAP_STAGE="stage" @@ -20,6 +19,7 @@ RAILS_ENV="stage" REPO_URL="https://github.com/datadryad/dryad-app.git" BUNDLE="/home/ec2-user/.rbenv/shims/bundle" BRANCH=$1 +ROLE="${2:-app}" cd $WORKING_TREE git pull origin $BRANCH @@ -27,5 +27,5 @@ $BUNDLE config set --local path '.' $BUNDLE config set --local without 'pgsql' $BUNDLE config set --local clean 'true' $BUNDLE install -echo $BUNDLE exec cap --trace $CAP_STAGE deploy BRANCH=$BRANCH REPO_URL=$REPO_URL RAILS_ENV=$RAILS_ENV DEPLOY_TO=$DEPLOY_TO SERVER_HOSTS='localhost' -$BUNDLE exec cap --trace $CAP_STAGE deploy BRANCH=$BRANCH REPO_URL=$REPO_URL RAILS_ENV=$RAILS_ENV DEPLOY_TO=$DEPLOY_TO SERVER_HOSTS='localhost' +echo $BUNDLE exec cap --trace $CAP_STAGE deploy BRANCH=$BRANCH REPO_URL=$REPO_URL RAILS_ENV=$RAILS_ENV DEPLOY_TO=$DEPLOY_TO SERVER_HOSTS='localhost' ROLE=$ROLE +$BUNDLE exec cap --trace $CAP_STAGE deploy BRANCH=$BRANCH REPO_URL=$REPO_URL RAILS_ENV=$RAILS_ENV DEPLOY_TO=$DEPLOY_TO SERVER_HOSTS='localhost' ROLE=$ROLE