diff --git a/.github/workflows/build-test-lint.yaml b/.github/workflows/build-test-lint.yaml index b790df2e..9b982eec 100644 --- a/.github/workflows/build-test-lint.yaml +++ b/.github/workflows/build-test-lint.yaml @@ -22,23 +22,22 @@ on: jobs: build: - uses: notch8/actions/.github/workflows/build.yaml@v1.0.4 + uses: notch8/actions/.github/workflows/build.yaml@v1.0.7 secrets: inherit with: - platforms: "linux/amd64" webTarget: hyku-web workerTarget: hyku-worker test: needs: build - uses: notch8/actions/.github/workflows/test.yaml@v1.0.4 + uses: notch8/actions/.github/workflows/test.yaml@v1.0.7 with: confdir: '/app/samvera/hyrax-webapp/solr/conf' rspec_cmd: "cd .. && gem install semaphore_test_boosters && bundle && rspec_booster --job $CI_NODE_INDEX/$CI_NODE_TOTAL" lint: needs: build - uses: notch8/actions/.github/workflows/lint.yaml@v1.0.4 + uses: notch8/actions/.github/workflows/lint.yaml@v1.0.7 with: webTarget: hyku-web workerTarget: hyku-worker diff --git a/.github/workflows/deploy.yaml b/.github/workflows/deploy.yaml index d54c6e66..a63463d9 100644 --- a/.github/workflows/deploy.yaml +++ b/.github/workflows/deploy.yaml @@ -19,5 +19,5 @@ on: jobs: deploy: - uses: notch8/actions/.github/workflows/deploy.yaml@v1.0.4 + uses: notch8/actions/.github/workflows/deploy.yaml@v1.0.7 secrets: inherit diff --git a/.gitignore b/.gitignore index da76e460..059d1162 100644 --- a/.gitignore +++ b/.gitignore @@ -3,6 +3,7 @@ /log/*.log /pkg/ /tmp/ +/store/ scratch.md *~undo-tree~ .env.* diff --git a/Dockerfile b/Dockerfile index ca5bed00..ecb6274b 100644 --- a/Dockerfile +++ b/Dockerfile @@ -3,6 +3,8 @@ FROM ghcr.io/samvera/hyrax/hyrax-base:$HYRAX_IMAGE_VERSION AS hyku-web USER root RUN git config --system --add safe.directory \* +ENV PATH="/app/samvera/bin:${PATH}" + USER app ENV LD_PRELOAD=/usr/lib/libjemalloc.so.2 ENV MALLOC_CONF='dirty_decay_ms:1000,narenas:2,background_thread:true' diff --git a/README.md b/README.md index 1547cf33..3eccdfab 100644 --- a/README.md +++ b/README.md @@ -127,7 +127,6 @@ Below is an example of our Adventist Knapsack submodule. [submodule "hyrax-webapp"] path = hyrax-webapp url = https://github.com/samvera/hyku.git - branch = adventist_dev ``` When you want to bring down an updated version of your Hyku submodule, use the following: @@ -242,3 +241,57 @@ Contribution directions go here. ## License The gem is available as open source under the terms of the [Apache 2.0](https://opensource.org/license/apache-2-0/). + + + + + +# This is /run/systemd/resolve/stub-resolv.conf managed by man:systemd-resolved(8). +# Do not edit. +# +# This file might be symlinked as /etc/resolv.conf. If you're looking at +# /etc/resolv.conf and seeing this text, you have followed the symlink. +# +# This is a dynamic resolv.conf file for connecting local clients to the +# internal DNS stub resolver of systemd-resolved. This file lists all +# configured search domains. +# +# Run "resolvectl status" to see details about the uplink DNS servers +# currently in use. +# +# Third party programs should typically not access this file directly, but only +# through the symlink at /etc/resolv.conf. To manage man:resolv.conf(5) in a +# different way, replace this symlink by a static file or a different symlink. +# +# See man:systemd-resolved.service(8) for details about the supported modes of +# operation for /etc/resolv.conf. + +nameserver 127.0.0.53 +options edns0 trust-ad +search us-east-2.compute.internal + + +root@adl-staging01:~# cat /etc/resolv.conf +# This is /run/systemd/resolve/stub-resolv.conf managed by man:systemd-resolved(8). +# Do not edit. +# +# This file might be symlinked as /etc/resolv.conf. If you're looking at +# /etc/resolv.conf and seeing this text, you have followed the symlink. +# +# This is a dynamic resolv.conf file for connecting local clients to the +# internal DNS stub resolver of systemd-resolved. This file lists all +# configured search domains. +# +# Run "resolvectl status" to see details about the uplink DNS servers +# currently in use. +# +# Third party programs should typically not access this file directly, but only +# through the symlink at /etc/resolv.conf. To manage man:resolv.conf(5) in a +# different way, replace this symlink by a static file or a different symlink. +# +# See man:systemd-resolved.service(8) for details about the supported modes of +# operation for /etc/resolv.conf. + +nameserver 127.0.0.53 +options edns0 trust-ad +search . \ No newline at end of file diff --git a/bin/bundle b/bin/bundle deleted file mode 100755 index 58115ecf..00000000 --- a/bin/bundle +++ /dev/null @@ -1,5 +0,0 @@ -#!/usr/bin/env ruby -# frozen_string_literal: true - -ENV['BUNDLE_GEMFILE'] ||= File.expand_path('../../Gemfile', __FILE__) -load Gem.bin_path('bundler', 'bundle') diff --git a/bin/csv_from_oai.rb b/bin/csv_from_oai.rb deleted file mode 100755 index 30647184..00000000 --- a/bin/csv_from_oai.rb +++ /dev/null @@ -1,158 +0,0 @@ -#!/usr/bin/env ruby - -require 'csv' -require 'oai' -require 'byebug' -require_relative "../lib/oai/client_decorator" - -class CsvFromOai - attr_accessor :email - - def initialize(email:) - @email = email - end - - def client - # OAI client setup - @client ||= OAI::Client.new( - "http://oai.adventistdigitallibrary.org/OAI-script", - headers: { from: email }, - parser: 'libxml' - ) - end - - def sets - @sets ||= [ - "adl:thesis", - "adl:periodical", - "adl:issue", - "adl:article", - "adl:image", - "adl:book", - "adl:other" - ] - end - - def opts - @opts ||= { - metadata_prefix: "oai_adl" - } - end - - def urls_for(record, metadata_label) - urls = record.metadata.first.find(metadata_label) || [ ] - urls.map(&:content).map { |url| url.split(';') }.flatten - end - - def url_is_original? - ->(u) { - u.end_with?('.ARCHIVAL.pdf') || - (u.match(/\.OBJ\./) && !u.match(/\.X\d+\./)) - } - end - - def url_is_text? - ->(u) { u.end_with?('.RAW.txt') } - end - - def url_is_reader? - ->(u) { - (!u.end_with?('.ARCHIVAL.pdf') && u.end_with?('.pdf')) || - u.match(/\.X\d+/) - } - end - - def process_related_urls(urls) - original = text = reader = nil - other_files = [] - urls.each do |url| - case url - when url_is_original? - original = url - when url_is_text? - text = url - when url_is_reader? - reader = url - else - other_files << url - end - end - { 'original' => original, 'text' => text, 'reader' => reader, 'other_files' => other_files } - end - - - def csv_headers - @csv_headers ||= ["oai_set","aark_id", "original", "text", "reader", "thumbnail", "other_files"] - end - - def build_csv - CSV.open('csv_from_oai.csv', - 'wb', - write_headers: true, - headers: csv_headers - ) do |csv| - # Write the headers to the CSV file - sets.each do |set| - records = client.list_records(opts.merge(set: set)) - # For the full set of records. - record_set = ENV.fetch('FULL', nil) ? records.full : records - record_set.each_with_index do |r, i| - puts "== Record #{i} of Set #{set}" - # For the first 25 records, comment out previous line and comment in the following line. - # records.each_with_index do |r| - row = { 'oai_set' => set } - row['aark_id'] = r.header.identifier - thumbnail_urls = urls_for(r, 'thumbnail_url') - related_urls = urls_for(r, 'related_url') - - row.merge!(process_related_urls(related_urls)) - row['thumbnail'] = thumbnail_urls.first - csv << csv_headers.map { |h| row[h] }.flatten - end - end - end - end - - def build_csv_resume(page: ) - set = "adl:issue" - - CSV.open("csv_from_oai#{page}.csv", - 'wb', - write_headers: true, - headers: csv_headers - ) do |csv| - # Write the headers to the CSV file - records = client.list_records(opts.merge(set: set, resumption_token: "adl:issue|#{page}")) - # For the full set of records. - record_set = records.full - record_set.each_with_index do |r, i| - puts "== Record #{i} of Set #{set} - Page #{record_set.instance_variable_get("@response").resumption_token}" - # For the first 25 records, comment out previous line and comment in the following line. - # records.each_with_index do |r| - row = { 'oai_set' => set } - row['aark_id'] = r.header.identifier - thumbnail_urls = urls_for(r, 'thumbnail_url') - related_urls = urls_for(r, 'related_url') - - row.merge!(process_related_urls(related_urls)) - row['thumbnail'] = thumbnail_urls.first - csv << csv_headers.map { |h| row[h] }.flatten - end - end - end - -end - -email = ENV.fetch('CSV_EMAIL', nil) -unless email - email = ARGV.detect { |arg| arg.match(/^--email/) } - email = email.gsub('--email=', '').strip -end - -page = ARGV.detect { |arg| arg.match(/^--page/) } -if page - page = page.gsub('--page=', '').strip - CsvFromOai.new(email: email).build_csv_resume(page: page) -else - CsvFromOai.new(email: email).build_csv -end diff --git a/bin/decrypt-secrets b/bin/decrypt-secrets deleted file mode 100755 index 8d1f5813..00000000 --- a/bin/decrypt-secrets +++ /dev/null @@ -1,18 +0,0 @@ -#!/usr/bin/env ruby - -# require 'byebug' - -parent_dir = File.dirname(__dir__) -Dir.chdir(File.join(parent_dir, 'ops', 'provision')) -[ - ".env.*.enc", - "kube_config.enc.yml", - ".backend.enc", - "k8s/*-values.enc.yaml" -].each do |files| - Dir.glob(files).each do |file| - cmd = "sops --decrypt #{file} > #{file.gsub(/\.enc/, '')}" - puts cmd - %x{#{cmd}} - end -end diff --git a/bin/encrypt-secrets b/bin/encrypt-secrets deleted file mode 100755 index 3770c443..00000000 --- a/bin/encrypt-secrets +++ /dev/null @@ -1,31 +0,0 @@ -#!/usr/bin/env ruby - -# require 'byebug' - -parent_dir = File.dirname(__dir__) -Dir.chdir(File.join(parent_dir, 'ops', 'provision')) -[ - ".env.*", - "kube_config.yml", - ".backend", - "k8s/*-values.yaml" -].each do |files| - Dir.glob(files).each do |file| - next if file.match(/$enc/) - output_file = file.split('/') - file_name = output_file[-1] - if file_name.match(/^\./) - file_name = "#{file_name}.enc" - else - file_name = file_name.split('.') - file_name.insert(-2, 'enc') - file_name = file_name.join('.') - end - - output_file[-1] = file_name - output_file = output_file.join('/') - cmd = "sops --encrypt #{file} > #{output_file}" - puts cmd - %x{#{cmd}} - end -end diff --git a/bin/errors_from_spacestone.rb b/bin/errors_from_spacestone.rb deleted file mode 100755 index 4a7154c3..00000000 --- a/bin/errors_from_spacestone.rb +++ /dev/null @@ -1,59 +0,0 @@ -#!/usr/bin/env ruby - -require 'csv' -require 'httparty' -require 'aws-sdk-sqs' - -SERVERLESS_ALTO_DLQ = ENV.fetch('SERVERLESS_ALTO_DLQ') -SERVERLESS_COPY_DLQ = ENV.fetch('SERVERLESS_COPY_DLQ') -SERVERLESS_OCR_DLQ = ENV.fetch('SERVERLESS_OCR_DLQ') -SERVERLESS_PLAIN_TEXT_DLQ = ENV.fetch('SERVERLESS_PLAIN_TEXT_DLQ') -SERVERLESS_OCR_THUMB_DLQ = ENV.fetch('SERVERLESS_OCR_THUMB_DLQ') -SERVERLESS_THUMBNAIL_DLQ = ENV.fetch('SERVERLESS_THUMBNAIL_DLQ') -SERVERLESS_WORD_DLQ = ENV.fetch('SERVERLESS_WORD_DLQ') - -## -# This class is responsible for looping through a CSV and enqueing those records based on some -# business logic. -# -# @example -# ErrorsFromSpacestone.new.insert_into_spacestone -class ErrorsFromSpacestone - CSV_NAME = 'csv_from_oai.csv' - def initialize - @client = Aws::SQS::Client.new(region: 'us-east-1') - @serverless_alto_dlq = ENV.fetch('SERVERLESS_ALTO_DLQ') - @serverless_copy_dlq = ENV.fetch('SERVERLESS_COPY_DLQ') - @serverless_ocr_dlq = ENV.fetch('SERVERLESS_OCR_DLQ') - @serverless_plain_text_dlq = ENV.fetch('SERVERLESS_PLAIN_TEXT_DLQ') - @serverless_ocr_thumb_dlq = ENV.fetch('SERVERLESS_OCR_THUMB_DLQ') - @serverless_thumbnail_dlq = ENV.fetch('SERVERLESS_THUMBNAIL_DLQ') - @serverless_word_dlq = ENV.fetch('SERVERLESS_WORD_DLQ') - end - attr_reader :client - - def download_errors - %w[alto copy ocr plain_text ocr_thumb thumbnail word].each do |type| - queue_url = self.instance_variable_get("@serverless_#{type}_dlq") - - resp = client.get_queue_attributes({ - queue_url: queue_url, - attribute_names: ['ApproximateNumberOfMessages'], - }) - count = resp.attributes['ApproximateNumberOfMessages'].to_i - ((count / 10) + 2).times do - response = client.receive_message( - queue_url: queue_url, - max_number_of_messages: 10 - ) - File.open("#{type}-dlg.json", 'a') do |file| - response.messages.each do |message| - file.puts(message.body) - end - end - end - end - end -end - -loader = ErrorsFromSpacestone.new.download_errors diff --git a/bin/extract_mods_to_spreadsheet b/bin/extract_mods_to_spreadsheet deleted file mode 100755 index 302cbaf5..00000000 --- a/bin/extract_mods_to_spreadsheet +++ /dev/null @@ -1,71 +0,0 @@ -#!/usr/bin/env ruby - -# this script is intented to produce a sample spreadsheet from -# Stanford provided bagit bags that happen to contain MODS files. -# The directory should contain the druid in the form: -# 'base_path/druid_wg827ks1643/**/descMetadata.xml' -# -# Usage example: -# ./bin/extract_mods_to_spreadsheet ../hyku-bags/ out.csv - -base_path = ARGV[0] -output_path = ARGV[1] -unless base_path - $stderr.puts "Provide a base_path" - exit(1) -end - -unless output_path - $stderr.puts "Provide an output file name" - exit(1) -end - -puts 'Loading environment...' -require File.expand_path('../../config/environment', __FILE__) -require 'importer' -puts 'Starting import...' - -# Assumption: filename will contain the druid. Example: -# hyku-metadata/GSE/druid_wg827ks1643/data/metadata/descMetadata.xml -data = Dir.glob("#{base_path}/**/descMetadata.xml").map do |filename| - id = filename.gsub(%r{.*/druid_(\w+)/.*}, '\1') - {id: [id], type: ['ETD']}.merge(Importer::ModsParser.new(filename).attributes) - end - -# Process the contributor into a flat field and drop 'notes_attributes' and 'collection' -data.each do |record| - record[:contributor] = record[:contributor].flat_map { |c| c[:name] } - record[:date_created] = record.delete(:created_attributes).flat_map { |c| c[:start] } - record[:file] = [nil] - record.delete(:notes_attributes) - record.delete(:citation) - record.delete(:record_origin) - record.delete(:institution) - record.delete(:collection) # our data doesn't have collection in it. -end - -headers = data.map do |record| - record.each_with_object({}) { |(k, v), o| o[k] = v.count } -end - -uniq_headers = headers.map(&:keys).flatten.uniq - -# How many columns does each field take up. Look for the widest one. -cols_for_attribute = uniq_headers.each_with_object({}) do |k, h| - h[k] = headers.map{ |header| header[k] }.max -end - -# Discard fields that aren't used. -used_fields = cols_for_attribute.select { |k, width| width > 0 || k == :file }.keys - -CSV.open(output_path, "wb") do |csv| - csv << used_fields.map { |k| Array.new(cols_for_attribute[k]) { k } }.flatten.map(&:to_s) - data.each do |record| - derp = used_fields.flat_map do |k| - record[k] + Array.new(cols_for_attribute[k] - record[k].length) { nil } - end - csv << derp - end -end - -puts 'import complete.' diff --git a/bin/git-cleanup b/bin/git-cleanup deleted file mode 100755 index 42fda076..00000000 --- a/bin/git-cleanup +++ /dev/null @@ -1,10 +0,0 @@ -#!/usr/bin/env ruby -# frozen_string_literal: true -`git fetch -ap` -branches = `git branch --merged main`.split("\n").collect { |b| b.delete('*').strip } - -branches -= ['staging', 'main'] - -branches.each do |branch| - puts `git branch -d #{branch}` -end diff --git a/bin/graph b/bin/graph deleted file mode 100644 index 2fb8e9e3..00000000 --- a/bin/graph +++ /dev/null @@ -1,89 +0,0 @@ -#!/usr/bin/env ruby -# frozen_string_literal: true - -# This script creates png images to visualize a Hyrax workflow -# Usage: bin/graph -# -# Example: bin/graph one_step_mediated_deposit -# -# This script requires that you have Graphviz installed and the -# ruby-graphviz gem. -# -# gem install 'ruby-graphviz' -# -# - -require 'graphviz' - -puts 'Loading environment...' -require File.expand_path('../../config/environment', __FILE__) - -workflow_name = ARGV[0] - -unless workflow_name - puts "You must provide a workflow_name as an argument" - exit(1) -end - -workflow = Sipity::Workflow.find_by(name: workflow_name) - -unless workflow - puts "Workflow not found" - exit(1) -end - -class GraphWriter - attr_reader :workflow, :nodes, :initial_state - - def initialize(workflow) - @workflow = workflow - end - - # Create a new graph - def graph - @g ||= GraphViz.new(:G, type: :digraph) - end - - def attach_nodes - @nodes = workflow.workflow_states.each_with_object({}) do |state, h| - h[state.id] = graph.add_nodes(state.name) - end - @initial_state = workflow.initial_workflow_state - @nodes[initial_state.id] = graph.add_nodes(@initial_state.name) - end - - def write_edge(initial, terminal, label) - graph.add_edges(nodes[initial], nodes[terminal], label:) - end - - def attach_edges - # Create edges - workflow.workflow_actions.each do |wa| - terminal = wa.resulting_workflow_state_id - if wa.workflow_state_actions.count == 0 - # Initial action - write_edge(initial_state.id, terminal, wa.name) - end - wa.workflow_state_actions.each do |wsa| - initial = wsa.originating_workflow_state_id - if terminal.nil? - write_edge(initial, initial, wa.name) - else - write_edge(initial, terminal, wa.name) - end - end - end - end - - def write - attach_nodes - attach_edges - # Generate output image - filename = "#{workflow.name}.png" - graph.output(png: filename) - - puts "Created: #{filename}" - end -end - -GraphWriter.new(workflow).write diff --git a/bin/helm_delete b/bin/helm_delete deleted file mode 100755 index 98667a97..00000000 --- a/bin/helm_delete +++ /dev/null @@ -1,16 +0,0 @@ -#!/bin/sh - -# This script wraps up helm deletion. It is meant as a clear starting point for -# commandline deployment or CI based deployment. -# - -if [ -z "$1" ] || [ -z "$2" ] -then - echo './bin/delete RELEASE_NAME NAMESPACE' - exit 1 -fi -release_name="${1}" -namespace="${2}" - -echo "Deleting release: $release_name..." -helm --namespace "$namespace" delete "$release_name" diff --git a/bin/import_from_csv b/bin/import_from_csv deleted file mode 100755 index f3fd50db..00000000 --- a/bin/import_from_csv +++ /dev/null @@ -1,49 +0,0 @@ -#!/usr/bin/env ruby - -def validate_hostname!(hostname) - return if hostname - usage - $stderr.puts 'Please provide the hostname to import to.' - exit(1) -end - -def validate_csv_file!(csv_file) - return if csv_file && File.exist?(csv_file) - usage - $stderr.puts 'Please provide a metadata file to import.' - exit(1) -end - -def validate_files_directory!(files_directory) - return if files_directory - $stderr.puts 'Files directory was left blank. No files will be ingested' -end - -def load_rails - puts 'Loading environment...' - require File.expand_path('../../config/environment', __FILE__) - require 'importer' - puts 'Starting import...' -end - -def main(hostname, csv_file, files_directory) - validate_hostname!(hostname) - validate_csv_file!(csv_file) - validate_files_directory!(files_directory) - load_rails - - AccountElevator.switch!(hostname) - size = Importer::CSVImporter.new(csv_file, files_directory).import_all - - puts "Imported #{size} records." -end - -def logger - Rails.logger -end - -def usage - $stderr.puts "Usage: #{$PROGRAM_NAME} " -end - -main(ARGV[0], ARGV[1], ARGV[2]) diff --git a/bin/import_from_purl b/bin/import_from_purl deleted file mode 100755 index 23ce3b38..00000000 --- a/bin/import_from_purl +++ /dev/null @@ -1,64 +0,0 @@ -#!/usr/bin/env ruby - -def validate_hostname!(hostname) - return if hostname - usage - $stderr.puts 'Please provide the hostname to import to.' - exit(1) -end - -def validate_druid_file!(druid_file) - return if druid_file && File.exist?(druid_file) - usage - $stderr.puts 'Please provide a file of druids you want to import.' - exit(1) -end - -def validate_username!(username) - user = User.find_by_user_key(username) - return user if user - usage - $stderr.puts 'username was left blank.' - exit(1) -end - -def load_rails(hostname) - puts 'Loading environment...' - require File.expand_path('../../config/environment', __FILE__) - require 'stanford' - AccountElevator.switch!(hostname) -end - -def main(hostname, username, druid_file) - validate_hostname!(hostname) - validate_druid_file!(druid_file) - load_rails(hostname) - user = validate_username!(username) - - count = druids(druid_file) do |druid| - log = Hyrax::Operation.create!(user: user, operation_type: "Import Purl Metadata") - ImportWorkFromPurlJob.perform_later(user, druid, log) - end - - puts "Enqueued #{count} import jobs." -end - -def druids(druid_file) - count = 0 - File.foreach(druid_file) do |line| - # Split in two parts on the first comma - (element, _) = line.split(/,/, 2) - # remove any namespacing - if md = /(druid:)?(.*)/.match(element) - yield(md[2]) - count += 1 - end - end - count -end - -def usage - $stderr.puts "Usage: #{$PROGRAM_NAME} " -end - -main(ARGV[0], ARGV[1], ARGV[2]) diff --git a/bin/import_mods_files b/bin/import_mods_files deleted file mode 100755 index 1bff5db7..00000000 --- a/bin/import_mods_files +++ /dev/null @@ -1,60 +0,0 @@ -#!/usr/bin/env ruby - -def validate_hostname!(hostname) - return if hostname - usage - $stderr.puts 'Please provide the hostname to import to.' - exit(1) -end - -def validate_mods_directory!(mods_directory) - if mods_directory - unless File.exist?(mods_directory) - $stderr.puts "Directory doesn't exist #{mods_directory}" - exit(1) - end - - unless File.directory?(mods_directory) - $stderr.puts "#{mods_directory} is not a directory" - exit(1) - end - else - usage - $stderr.puts 'Please provide the a file path you want to import.' - exit(1) - end -end - -def validate_imagepath!(imagepath) - return if imagepath - usage - $stderr.puts 'Image directory was left blank. No images will be ingested' - exit(1) -end - -def load_rails - puts 'Loading environment...' - require File.expand_path('../../config/environment', __FILE__) - require 'importer' - puts 'Starting import...' -end - - -def main(hostname, mods_directory, imagepath) - validate_hostname!(hostname) - validate_mods_directory!(mods_directory) - validate_imagepath!(imagepath) - load_rails - - AccountElevator.switch!(hostname) - size = Importer::ModsImporter.new(imagepath, mods_directory).import_all - - puts "Imported #{size} records." -end - -def usage - $stderr.puts "Usage: #{$PROGRAM_NAME} " -end - - -main(ARGV[0], ARGV[1], ARGV[2]) diff --git a/bin/knapsacker b/bin/knapsacker index b177eabc..648e34a2 100755 --- a/bin/knapsacker +++ b/bin/knapsacker @@ -1,6 +1,8 @@ +# frozen_string_literal: true + #!/usr/bin/env ruby -wU -# The purpose of this script is to quickly assess which specs should: +# The purpose of this script is to quickly assess which files should: # # - be removed (e.g. duplicate in Knapsack and Hyku) # - be reviewed (e.g. differences between Knapsack and Hyku) @@ -11,7 +13,6 @@ config = { yours: './', upstream: './hyrax-webapp', patterns_to_check: ["spec/** command_name = File.basename(__FILE__) - nlp = new_line_padding = " " * 37 optparse = OptionParser.new do |options| # This banner is the first line of your help documentation. @@ -65,10 +66,11 @@ end # TODO: This is not generally working. dir_glob_patterns_to_check.each do |pattern| - Dir.glob(pattern).each do |path| - hyku_path = File.join(upstream, path) - if File.exist?(hyku_path) - results = `diff #{path} #{hyku_path}`.strip + Dir.glob(File.join(yours, pattern)).each do |your_path| + path = your_path.sub(File.join(yours, '/'), '') + upstream_path = File.join(upstream, path) + if File.exist?(upstream_path) + results = `diff #{your_path} #{upstream_path}`.strip if results.empty? duplicates << path else @@ -80,10 +82,9 @@ dir_glob_patterns_to_check.each do |pattern| end end - -puts "-"*72 +puts "-" * 72 puts "Knapsacker run context:" -puts "-"*72 +puts "-" * 72 puts "- Working Directory: #{FileUtils.pwd}" puts "- Your Dir: #{yours}" puts "- Upstream Dir: #{upstream}" @@ -94,20 +95,18 @@ end puts "" puts "------------------------------------------------------------------" -puts "Files in your Hyku that are exact duplicates of upstream Hyku file" +puts "Files in \"yours\" that are exact duplicates of \"upstream\" files" puts "They are prefixed with a `='" puts "------------------------------------------------------------------" duplicates.each do |path| puts "= #{path}" - if ENV['RM_DUPS'] - File.unlink(path) - end + File.unlink(path) if ENV['RM_DUPS'] end puts "" puts "----------------------------------------------------" -puts "Files that are in your Hyku but not in upstream Hyku" +puts "Files that are in \"yours\" but not in \"upstream\" " puts "They are prefixed with a `+'" puts "----------------------------------------------------" in_yours_but_not_in_upstream.each do |path| @@ -117,7 +116,7 @@ end puts "" puts "-------------------------------------------------------------" -puts "Files that are changed in your Hyku relative to upstream Hyku" +puts "Files that are changed in \"yours\" relative to \"upstream\"" puts "They are prefixed with a `Δ'" puts "-------------------------------------------------------------" changed_in_yours.each do |path| diff --git a/bin/load_spacestone_from_csv.rb b/bin/load_spacestone_from_csv.rb deleted file mode 100755 index b8f34c6c..00000000 --- a/bin/load_spacestone_from_csv.rb +++ /dev/null @@ -1,139 +0,0 @@ -#!/usr/bin/env ruby - -require 'csv' -require 'httparty' -require 'aws-sdk-sqs' - -SERVERLESS_COPY_URL = ENV.fetch('SERVERLESS_COPY_URL') -SERVERLESS_S3_URL = ENV.fetch('SERVERLESS_S3_URL') -SERVERLESS_TEMPLATE = ENV.fetch('SERVERLESS_TEMPLATE') -SERVERLESS_SPLIT_SQS_URL = ENV.fetch('SERVERLESS_SPLIT_SQS_URL') -SERVERLESS_OCR_SQS_URL = ENV.fetch('SERVERLESS_OCR_SQS_URL') -SERVERLESS_THUMBNAIL_SQS_URL = ENV.fetch('SERVERLESS_THUMBNAIL_SQS_URL') -SERVERLESS_COPY_SQS_URL = ENV.fetch('SERVERLESS_COPY_SQS_URL') -SERVERLESS_BATCH_SIZE = ENV.fetch('SERVERLESS_BATCH_SIZE', 10).to_i - -## -# This class is responsible for looping through a CSV and enqueing those records based on some -# business logic. -# -# @example -# LoadSpaceStoneFromCsv.new.insert_into_spacestone -class LoadSpaceStoneFromCsv - CSV_NAME = 'csv_from_oai.csv' - def initialize - @csv = CSV.read(CSV_NAME, headers: true) - @client = Aws::SQS::Client.new(region: 'us-west-2') - end - attr_reader :csv, :client - - def insert_into_spacestone - csv.each do |row| - puts row.inspect - needs_thumbnail = !has_thumbnail?(row) - copy_original(row, needs_thumbnail) - copy_access(row, needs_thumbnail) - end - # When we are done processing each row, we need to handle whatever remains in the queue. - # Without this line, we could have CSV rows (or partial rows) that we buffered into the queue - # but never submitted. - send_remainder_of_queue! - end - - ## - # For an original file: - # - # - Copy the original file to the SpaceStone location - # - When it is a PDF, enqueue splitting it - # - When it does not have a thumbnail, enqueue creating a thumbnail - # - # @param row [CSV::Row] - # @param needs_thumbnail [Boolean] when true we will need to enqueue a thumbnail generation job. - def copy_original(row, needs_thumbnail) - original_extension = File.extname(row['original']) - jobs = [original_destination(row)] - - # TODO: In the case of PDF, Split; in the case of images, OCR. In all cases thumbnail. - if original_extension == '.pdf' - jobs << enqueue_destination(row, key: 'original', url: SERVERLESS_SPLIT_SQS_URL) - else - jobs << enqueue_destination(row, key: 'original', url: SERVERLESS_OCR_SQS_URL) - end - if needs_thumbnail - jobs << enqueue_destination(row, key: 'original', url: SERVERLESS_THUMBNAIL_SQS_URL) - end - - post_to_sqs_copy({ row['original'] => jobs }) - end - - def copy_access(row, needs_thumbnail) - return if row['reader'].to_s.strip.empty? - original_extension = File.extname(row['original']) - return unless original_extension == '.pdf' - - jobs = [original_destination(row, key: 'reader')] - - if needs_thumbnail - jobs << enqueue_destination(row, key: 'reader', url: SERVERLESS_THUMBNAIL_SQS_URL) - end - - post_to_sqs_copy({ row['reader'] => jobs }) - end - - def thumbnail_destination(row, key: 'original') - # We might have multiple periods in the filename, remove the extension. - thumbnail_name = File.basename(row[key]).sub(/\.[^\.]*\z/, ".thumbnail.jpeg") - "#{SERVERLESS_S3_URL}#{row['aark_id']}/#{thumbnail_name}" - end - - def enqueue_destination(row, url:, key: 'original') - basename = File.basename(row[key]) - "#{url}#{row['aark_id']}/#{basename}?template=#{SERVERLESS_S3_URL}#{SERVERLESS_TEMPLATE}" - end - - def original_destination(row, key: 'original') - "#{SERVERLESS_S3_URL}#{row['aark_id']}/#{File.basename(row[key])}" - end - - def has_thumbnail?(row) - return false if row['thumbnail'].to_s.strip.empty? - return false unless row['thumbnail'].to_s.match(/^https?/) - - # Regardless of the original's type, if we have a thumbnail copy it. - post_to_sqs_copy({ row['thumbnail'] => [thumbnail_destination(row)] }) - - # We'll only repurpose the thumbnail if the reader is a PDF. - if !row['reader'].to_s.strip.empty? && row['reader'].end_with?('.pdf') - post_to_sqs_copy({ row['thumbnail'] => thumbnail_destination(row, key: 'reader') }) - end - - true - end - - ## - # SQS related methods - def post_to_sqs_copy(workload) - @queue ||= [] - @queue << { id: SecureRandom.uuid, message_body: workload.to_json } - if (@queue.size % SERVERLESS_BATCH_SIZE) == 0 - send_batch(@queue) - @queue = [] - end - end - - def send_remainder_of_queue! - return unless defined?(@queue) - return if @queue.empty? - send_batch(@queue) - end - - def send_batch(batch) - puts "\t#{SERVERLESS_COPY_SQS_URL}\n\t#{batch.inspect}" - client.send_message_batch({ - queue_url: SERVERLESS_COPY_SQS_URL, - entries: batch - }) unless ENV['DRY_RUN'] - end -end - -loader = LoadSpaceStoneFromCsv.new.insert_into_spacestone diff --git a/bin/rails b/bin/rails index dd027b40..ce59f528 100755 --- a/bin/rails +++ b/bin/rails @@ -1,5 +1,14 @@ #!/usr/bin/env ruby + # frozen_string_literal: true -APP_PATH = File.expand_path('../config/application', __dir__) -require_relative '../config/boot' -require 'rails/commands' + +# This command will automatically be run when you run "rails" with Rails gems +# installed from the root of your application. + +begin + load File.join(File.expand_path('../../', __FILE__), 'hyrax-webapp/bin/rails') +rescue LoadError => load_error + warn "No Rails application found! \n" \ + " git submodule update --init\n\n" \ + "#{load_error}" +end diff --git a/bin/rake b/bin/rake deleted file mode 100755 index 609af747..00000000 --- a/bin/rake +++ /dev/null @@ -1,5 +0,0 @@ -#!/usr/bin/env ruby -# frozen_string_literal: true -require_relative '../config/boot' -require 'rake' -Rake.application.run diff --git a/bin/rspec b/bin/rspec deleted file mode 100755 index 6e670921..00000000 --- a/bin/rspec +++ /dev/null @@ -1,8 +0,0 @@ -#!/usr/bin/env ruby -begin - load File.expand_path('../spring', __FILE__) -rescue LoadError => e - raise unless e.message.include?('spring') -end -require 'bundler/setup' -load Gem.bin_path('rspec-core', 'rspec') diff --git a/bin/setup b/bin/setup deleted file mode 100755 index ffca742a..00000000 --- a/bin/setup +++ /dev/null @@ -1,35 +0,0 @@ -#!/usr/bin/env ruby -# frozen_string_literal: true -require 'pathname' -require 'fileutils' -include FileUtils - -# path to your application root. -APP_ROOT = Pathname.new File.expand_path('../../', __FILE__) - -def system!(*args) - system(*args) || abort("\n== Command #{args} failed ==") -end - -chdir APP_ROOT do - # This script is a starting point to setup your application. - # Add necessary setup steps to this file. - - puts '== Installing dependencies ==' - system! 'gem install bundler --conservative' - system('bundle check') || system!('bundle install') - - # puts "\n== Copying sample files ==" - # unless File.exist?('config/database.yml') - # cp 'config/database.yml.sample', 'config/database.yml' - # end - - puts "\n== Preparing database ==" - system! 'bin/rails db:setup' - - puts "\n== Removing old logs and tempfiles ==" - system! 'bin/rails log:clear tmp:clear' - - puts "\n== Restarting application server ==" - system! 'bin/rails restart' -end diff --git a/bin/spring b/bin/spring deleted file mode 100755 index 99c5cacd..00000000 --- a/bin/spring +++ /dev/null @@ -1,16 +0,0 @@ -#!/usr/bin/env ruby -# frozen_string_literal: true - -# This file loads spring without using Bundler, in order to be fast. -# It gets overwritten when you run the `spring binstub` command. - -unless defined?(Spring) - require 'rubygems' - require 'bundler' - - if (match = Bundler.default_lockfile.read.match(/^GEM$.*?^ (?: )*spring \((.*?)\)$.*?^$/m)) - Gem.paths = { 'GEM_PATH' => [Bundler.bundle_path.to_s, *Gem.path].uniq.join(Gem.path_separator) } - gem 'spring', match[1] - require 'spring/binstub' - end -end diff --git a/bin/update b/bin/update deleted file mode 100755 index 3f81e549..00000000 --- a/bin/update +++ /dev/null @@ -1,30 +0,0 @@ -#!/usr/bin/env ruby -# frozen_string_literal: true -require 'pathname' -require 'fileutils' -include FileUtils - -# path to your application root. -APP_ROOT = Pathname.new File.expand_path('../../', __FILE__) - -def system!(*args) - system(*args) || abort("\n== Command #{args} failed ==") -end - -chdir APP_ROOT do - # This script is a way to update your development environment automatically. - # Add necessary update steps to this file. - - puts '== Installing dependencies ==' - system! 'gem install bundler --conservative' - system('bundle check') || system!('bundle install') - - puts "\n== Updating database ==" - system! 'bin/rails db:migrate' - - puts "\n== Removing old logs and tempfiles ==" - system! 'bin/rails log:clear tmp:clear' - - puts "\n== Restarting application server ==" - system! 'bin/rails restart' -end diff --git a/bin/web b/bin/web deleted file mode 100755 index 9e3cde74..00000000 --- a/bin/web +++ /dev/null @@ -1,10 +0,0 @@ -#!/usr/bin/env ruby -# frozen_string_literal: true -`echo "$GOOGLE_OAUTH_PRIVATE_KEY_VALUE" | base64 -d > prod-cred.p12` unless ENV['GOOGLE_OAUTH_PRIVATE_KEY_VALUE'].to_s.strip.empty? - -# Ensure the directory exists and create the symbolic link -`mkdir -p /app/samvera/hyrax-webapp/public` -`mkdir -p /app/samvera/branding` -`ln -snf /app/samvera/branding /app/samvera/hyrax-webapp/public/branding` - -exec "bundle exec puma -v -b tcp://0.0.0.0:3000" diff --git a/bin/worker b/bin/worker deleted file mode 100755 index 601d172d..00000000 --- a/bin/worker +++ /dev/null @@ -1,19 +0,0 @@ -#!/usr/bin/env ruby -# frozen_string_literal: true -`echo "$GOOGLE_OAUTH_PRIVATE_KEY_VALUE" | base64 -d > prod-cred.p12` unless ENV['GOOGLE_OAUTH_PRIVATE_KEY_VALUE'].to_s.strip.empty? - -if ENV['DATABASE_URL'].to_s.strip.empty? - puts 'DATABASE_URL not set, no pool change needed' -else - ENV['DATABASE_URL'] = ENV['DATABASE_URL'].gsub('pool=5', 'pool=30') -end - -queue = ENV.fetch('HYRAX_ACTIVE_JOB_QUEUE', 'sidekiq') -use_queue = ENV.fetch('HYKU_QUEUED_RUNNER', 'false') - -case queue -when 'sidekiq' - exec "echo $DATABASE_URL && bundle exec sidekiq" -when 'good_job' - exec "echo $DATABASE_URL && HYKU_USE_QUEUED_INDEX=#{use_queue} bundle exec good_job start" -end diff --git a/bundler.d/example.rb b/bundler.d/example.rb index efe23a3e..634792e3 100644 --- a/bundler.d/example.rb +++ b/bundler.d/example.rb @@ -8,6 +8,3 @@ # NOTE: these injected gems are very sticky... it appears that you must rebuild # your docker container and rebundle to get rid of an injected gem. - -ensure_gem 'derivative-rodeo', '~> 0.5', '>= 0.5.3' -ensure_gem 'valkyrie-shrine', branch: 'main', git: 'https://github.com/samvera-labs/valkyrie-shrine.git' diff --git a/docker-compose.production.yml b/docker-compose.production.yml new file mode 100644 index 00000000..e696b4be --- /dev/null +++ b/docker-compose.production.yml @@ -0,0 +1,251 @@ +x-app: &app + user: root + image: ghcr.io/notch8/adventist_knapsack:${TAG:-latest} + env_file: + - .env.production + environment: + - MAGICK_CONFIGURE_PATH=/etc/ImageMagick-7 + volumes: + - .:/app/samvera + - /store/keep/derivatives:/app/samvera/derivatives + - /store/keep/branding:/app/samvera/branding + - /store/keep/exports:/app/samvera/hyrax-webapp/tmp/exports/ + - /store/keep/imports:/app/samvera/hyrax-webapp/tmp/imports/ + - /store/tmp/network-files:/app/samvera/hyrax-webapp/tmp/network-files/ + - /store/tmp/public-assets:/app/samvera/hyrax-webapp/public/assets + #- /store/tmp/public-uv:/app/samvera/hyrax-webapp/public/uv + - /store/tmp/uploads:/app/samvera/uploads + - /store/tmp/uploads:/app/samvera/hyrax-webapp/tmp/uploads + - /store/keep/public-system:/app/samvera/hyrax-webapp/public/system + - /store/keep/public-uploads:/app/samvera/hyrax-webapp/public/uploads + - /store/tmp/ruby-tmp-dir:/tmp + - /cache:/app/samvera/hyrax-webapp/tmp/cache + networks: + internal: + logging: + driver: "json-file" + options: + max-size: "3g" + +x-app-worker: &app-worker + <<: *app + environment: + - HYRAX_FLEXIBLE=false + - MAGICK_CONFIGURE_PATH=/etc/ImageMagick-7 + image: ghcr.io/notch8/adventist_knapsack/worker:${TAG:-latest} + # Uncomment command to access container with out starting bin/worker. Useful for debugging or updating Gemfile.lock + # command: sleep infinity + depends_on: + check_volumes: + condition: service_completed_successfully + initialize_app: + condition: service_completed_successfully + db: + condition: service_started + solr: + condition: service_started + fcrepo: + condition: service_started + redis: + condition: service_started + zoo: + condition: service_started + restart: unless-stopped + +networks: + internal: + +services: + fits: + image: ghcr.io/samvera/fitsservlet:1.6.0 + ports: + - 9090:8080 + networks: + internal: + + zoo: + image: zookeeper:3.6.2 + restart: unless-stopped + environment: + - ZOO_MY_ID=1 + - ZOO_SERVERS=server.1=zoo:2888:3888;2181 + - ZOO_4LW_COMMANDS_WHITELIST=mntr,srvr,ruok,conf + expose: + - 2181 + - 2888 + - 3888 + - 7000 + ports: + - 2181:2181 + - 7001:7000 + volumes: + - /store/keep/zoo_data:/data + - /store/keep/zoo_datalog:/datalog + networks: + internal: + aliases: + - zookeeper_cluster + healthcheck: + test: + [ + "CMD-SHELL", + "echo 'ruok' | nc -w 2 -q 2 localhost 2181 | grep imok || exit 1", + ] + interval: "10s" + timeout: "8s" + + solr: + image: ghcr.io/samvera/hyku/solr:latest + env_file: + - .env.production + environment: + - OOM=script + - SOLR_ADMIN_USER=${SOLR_ADMIN_USER} + - SOLR_ADMIN_PASSWORD=${SOLR_ADMIN_PASSWORD} + - SOLR_COLLECTION_NAME=${SOLR_COLLECTION_NAME} + - SOLR_CLOUD_BOOTSTRAP=yes + - SOLR_ENABLE_CLOUD_MODE=yes + - SOLR_ENABLE_AUTHENTICATION=yes + - ZK_HOST=zoo:2181 + - SOLR_HEAP=6G + user: root + command: bash -c " + chown -R 8983:8983 /var/solr + && ./bin/solr zk cp file:/var/solr/data/security.json zk:/security.json + && runuser -u solr -- solr-foreground" + expose: + - 8983 + volumes: + - /store/keep/solr-data:/var/solr + #- /solr/security.json:/var/solr/data/security.json + ports: + - 8983:8983 + networks: + internal: + healthcheck: + test: curl -sf http://$$SOLR_ADMIN_USER:$$SOLR_ADMIN_PASSWORD@solr:8983/solr/admin/cores?action=STATUS || exit 1 + start_period: 3s + interval: 5s + timeout: 5s + retries: 6 + depends_on: + zoo: + condition: service_healthy + restart: unless-stopped + + fcrepo: + image: ghcr.io/notch8/docker-fcrepo:4.7.6-s3-streaming + volumes: + - /store/keep/fcrepo_data:/data:cached + - /store/keep/fcrepo_data/jetty-tmp:/var/lib/jetty/tmp + - /store/keep/fcrepo_data/jetty-work:/var/lib/jetty/work + - /store/keep/fcrepo_data/jetty-webapps-tmp:/var/lib/jetty/webapps/tmp + env_file: + - .env.production + environment: + - TMPDIR=/data/tmp + - JAVA_OPTIONS=-Daws.bucket=samvera-fcrepo -Daws.accessKeyId=${FCREPO_AWS_KEY} -Daws.secretKey=${FCREPO_AWS_SECRET} -Dfcrepo.postgresql.host=db -Dfcrepo.postgresql.username=fcrepo -Dfcrepo.postgresql.password=${FCREPO_DB_PASSWORD} -Dfcrepo.object.directory=/data/objects -Djava.io.tmpdir=/data/tmp -XX:ConcGCThreads=4 -XX:MaxGCPauseMillis=200 -XX:ParallelGCThreads=20 -XX:+UseG1GC -XX:+UseCompressedOops -XX:-UseLargePagesIndividualAllocation -XX:MaxPermSize=512M -Xms16g -Xmx16g + - MODESHAPE_CONFIG=classpath:/config/jdbc-postgresql-s3/repository.json + - FCREPO_DB_PASSWORD=${FCREPO_DB_PASSWORD} + expose: + - 8080 + networks: + internal: + + db: + image: postgres:14 + env_file: + - .env.production + environment: + - POSTGRES_DB=${DB_NAME} + - POSTGRES_PASSWORD=${DB_PASSWORD} + - POSTGRES_USER=${DB_USER} + volumes: + - /store/keep/db_data/pg14:/var/lib/postgresql/data + - /store/keep/db_data/dumps:/dumps:ro + networks: + internal: + + web: + <<: *app + depends_on: + db: + condition: service_started + solr: + condition: service_started + fcrepo: + condition: service_started + redis: + condition: service_started + zoo: + condition: service_started + check_volumes: + condition: service_started + worker: + condition: service_started + initialize_app: + condition: service_completed_successfully + expose: + - 3000 + ports: + - 3000:3000 + restart: unless-stopped + + worker: + <<: *app-worker + + worker_aux: + <<: *app-worker + environment: + - AUX_WORKER="true" + - AUXILIARY_QUEUE_TENANTS="sdapi" + - AWS_S3_BUCKET=space-stone-production-preprocessedbucketf21466dd-15sun4xy658nh + - GOOD_JOB_CLEANUP_DISCARDED_JOBS=false + - GOOD_JOB_CLEANUP_INTERVAL_SECONDS=86400 + - GOOD_JOB_CLEANUP_PRESERVED_JOBS_BEFORE_SECONDS_AGO=604800 + - HYRAX_ACTIVE_JOB_QUEUE=good_job + - MAGICK_CONFIGURE_PATH=/etc/ImageMagick-7 + + # Do not recurse through all of tmp. derivatives will make booting + # very slow and eventually just time out as data grows + check_volumes: + <<: *app + user: root + entrypoint: ["sh", "-x", "-c"] + command: + - > + chown -R app:app /app/samvera/hyrax-webapp/public/uploads && + chown -R app:app /app/samvera/hyrax-webapp/public/assets && + chown -R app:app /app/samvera/hyrax-webapp/tmp/cache + + initialize_app: + <<: *app + environment: + - CONFDIR=/app/samvera/hyrax-webapp/solr/conf + entrypoint: ["sh", "-c"] + command: + - > + /app/samvera/bin/solrcloud-upload-configset.sh /app/samvera/hyrax-webapp/solr/conf && + /app/samvera/bin/solrcloud-assign-configset.sh && + SOLR_COLLECTION_NAME=hydra-test /app/samvera/bin/solrcloud-assign-configset.sh && + bundle && + /app/samvera/bin/db-migrate-seed.sh + depends_on: + db: + condition: service_started + solr: + condition: service_healthy + fcrepo: + condition: service_started + redis: + condition: service_started + + redis: + image: redis:5 + env_file: + - .env.production + command: sh -c 'redis-server --requirepass "$REDIS_PASSWORD"' + volumes: + - /store/keep/redis_data:/data + networks: + internal: \ No newline at end of file diff --git a/docker-compose.yml b/docker-compose.yml index 4fdbeab2..85c8c9ef 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -69,6 +69,7 @@ services: extends: file: hyrax-webapp/docker-compose.yml service: adminer + zoo: extends: file: hyrax-webapp/docker-compose.yml @@ -90,6 +91,7 @@ services: service: fits db: + image: postgres:14 extends: file: hyrax-webapp/docker-compose.yml service: db @@ -109,8 +111,7 @@ services: <<: *app-worker environment: - AUX_WORKER="true" - - AUXILIARY_QUEUE_TENANTS="sdapi" # This line is what makes the knapsack include use the local code instead of the remote gem - - AWS_REGION=us-east-1 + - AUXILIARY_QUEUE_TENANTS="sdapi" - AWS_S3_BUCKET=space-stone-production-preprocessedbucketf21466dd-15sun4xy658nh - GOOD_JOB_CLEANUP_DISCARDED_JOBS=false - GOOD_JOB_CLEANUP_INTERVAL_SECONDS=86400 diff --git a/provision/.gitignore b/provision/.gitignore new file mode 100644 index 00000000..54c9d37b --- /dev/null +++ b/provision/.gitignore @@ -0,0 +1,11 @@ +.aider* +.env +files/ghcr_token +files/id_rsa +files/id_rsa.pub +files/staging.key +files/staging.cer +files/prod.key +files/prod.cer +files/ansible_become_password +files/cloudflare.ini \ No newline at end of file diff --git a/provision/README.md b/provision/README.md new file mode 100644 index 00000000..8d3c5549 --- /dev/null +++ b/provision/README.md @@ -0,0 +1,51 @@ +# Adventist Server Setup + +This repository contains Ansible playbooks to automate the setup of the Adventist server. + +## Prerequisites + +1. Clone the keep volume +2. Clone the tmp volume +3. NAD Cloud to generate new instance and give Notch8 access via ssh to setup ansible provision of infrastructure. +4. NAD Cloud to provision and attach the volumes to the instance: + - tmp: 800 GiB + - keep: 4000 GiB + - 100 GiB +5. Set up 1password cli (`op`) OR place required files in the `files/` directory: + - SSL certificate: `b2_adventistdigitallibrary_org_2024_complete.cer` + - SSL private key: `b2.adventistdigitallibrary.org.2024.key` + - Nginx config: `nginx-default` + - Deploy key: `id_rsa` + +## Installation + +1. Install Ansible dependencies: +```bash +ansible-galaxy collection install community.docker +ansible-galaxy install -r requirements.yml +``` + +2. Update the inventory file with your server's IP address + +3. Run the playbook: +```bash +ansible-playbook -i inventory.yml site.yml +``` + +## What Gets Installed + +The playbook will: +- Create and mount storage directories +- Install Docker, Nginx, and other required packages +- Configure SSL certificates +- Set up Nginx with bad bot blocker +- Create user accounts with SSH access +- Configure Docker registry access +- Set up deployment keys + +## Post-Installation + +For working with Docker Compose, use: +```bash +alias dc='dotenv -e .env.production docker-compose -f docker-compose.production.yml' +``` diff --git a/provision/bin/run b/provision/bin/run new file mode 100755 index 00000000..25570743 --- /dev/null +++ b/provision/bin/run @@ -0,0 +1,58 @@ +#!/bin/bash +# ============================================================================= +# Adventist Knapsack Deployment Script +# Handles staging and production environments. +# Fetches secrets from 1Password, sets up env, runs Ansible, and optionally starts Docker. +# ============================================================================= +set -euo pipefail + +# ------------------------------- +# 0. 1Password Setup Instructions +# ------------------------------- +# If secrets have not been uploaded to 1Password yet, run these commands once: +# +# op document create files/id_rsa --title DEPLOY_ID_RSA --vault "ADVENTIST" +# op document create files/ghcr_token --title GHCR_TOKEN --vault "ADVENTIST" +# op document create files/ansible_become_password --title ANSIBLE_BECOME_PASSWORD --vault "ADVENTIST" +# op document create files/cloudflare.ini --title CLOUDFLARE_INI --vault "ADVENTIST" +# +# Make sure you are logged in via: `eval $(op signin)` + +# ------------------------------- +# 1. Determine environment +# ------------------------------- +ENV=${1:-staging} # Default to staging if no argument +echo "Deploying environment: $ENV" + +# ------------------------------- +# 2. Ensure secrets exist locally (fetch from 1Password if missing) +# ------------------------------- +mkdir -p files + +if [ ! -f files/id_rsa ]; then + echo "Fetching deploy key..." + op document get DEPLOY_ID_RSA --out-file files/id_rsa +fi + +if [ ! -f files/ghcr_token ]; then + echo "Fetching GHCR token..." + op document get GHCR_TOKEN --out-file files/ghcr_token +fi + +if [ ! -f files/ansible_become_password ]; then + echo "Fetching Ansible become password..." + op document get ANSIBLE_BECOME_PASSWORD --out-file files/ansible_become_password +fi + +if [ ! -f files/cloudflare.ini ]; then + echo "Fetching Cloudflare credentials..." + op document get CLOUDFLARE_INI --out-file files/cloudflare.ini +fi + +# ------------------------------- +# 3. Run Ansible playbook +# ------------------------------- +echo "Running Ansible playbook for $ENV..." +ansible-playbook -i inventory.yml site.yml -e "env=${ENV}" --limit "${ENV}" + +echo "Deployment for $ENV completed successfully!" diff --git a/provision/files/.keep b/provision/files/.keep new file mode 100644 index 00000000..e69de29b diff --git a/provision/files/nginx-default.j2 b/provision/files/nginx-default.j2 new file mode 100644 index 00000000..73ddce18 --- /dev/null +++ b/provision/files/nginx-default.j2 @@ -0,0 +1,77 @@ +upstream rails_app { + server {{ upstream_app_host }}:{{ upstream_app_port }}; +} + +map $status $loggable { + ~^444 0; + default 1; +} + +error_log /var/log/nginx/error.log warn; + +real_ip_header X-Forwarded-For; +real_ip_recursive on; + +server { + server_name {{ server_name }}; + root {{ web_root }}; + index index.html; + + client_body_in_file_only clean; + client_body_buffer_size 32K; + client_max_body_size 0; + access_log /var/log/nginx/access.log ; + + sendfile on; + send_timeout 300s; + + include /etc/nginx/bots.d/ddos.conf; + include /etc/nginx/bots.d/blockbots.conf; + + location ~ (\.php|\.aspx|\.asp) { + return 404; + } + + location ~ /\. { + deny all; + } + + location ~* ^.+\.(rb|log)$ { + deny all; + } + + location ~ ^/(assets|packs|fonts|images|javascripts|stylesheets|swfs|system)/ { + try_files $uri @rails; + gzip_static on; + expires max; + add_header Cache-Control public; + add_header Last-Modified ""; + add_header ETag ""; + break; + } + + location / { + try_files $uri @rails; + } + + location @rails { + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto https; + proxy_set_header Host $http_host; + proxy_redirect off; + proxy_pass http://rails_app; + } + + listen 443 ssl; + ssl_certificate {{ ssl_certificate }}; + ssl_certificate_key {{ ssl_certificate_key }}; +} + +{% for host in environments[env].hostnames %} +server { + listen 80; + server_name {{ host }}; + return 301 https://$host$request_uri; +} +{% endfor %} \ No newline at end of file diff --git a/provision/inventory.yml b/provision/inventory.yml new file mode 100644 index 00000000..273b9dae --- /dev/null +++ b/provision/inventory.yml @@ -0,0 +1,18 @@ +--- +all: + children: + adventist_servers: + hosts: + staging: + ansible_host: 192.168.147.200 + ansible_user: adlquantum6269 + ansible_ssh_private_key_file: files/id_rsa + ansible_python_interpreter: /usr/bin/python3.10 + production: + ansible_host: 192.168.147.100 + ansible_user: adlquantum6269 + ansible_ssh_private_key_file: files/id_rsa + ansible_python_interpreter: /usr/bin/python3.10 + +# ssh-copy-id -i /Users/aprilrieger/n8/adventist_knapsack/provision/files/id_rsa.pub adlquantum6269@192.168.147.200 +# ssh-copy-id -i /Users/aprilrieger/n8/adventist_knapsack/provision/files/id_rsa.pub adlquantum6269@192.168.147.100 \ No newline at end of file diff --git a/provision/requirements.yml b/provision/requirements.yml new file mode 100644 index 00000000..788fab5f --- /dev/null +++ b/provision/requirements.yml @@ -0,0 +1,5 @@ +--- +roles: + - name: nginx-badbot-blocker + src: https://github.com/Sharlyll/Ansible-role-nginx-ultimate-bad-bot-blocker + version: main diff --git a/provision/roles/base_setup/meta/main.yml b/provision/roles/base_setup/meta/main.yml new file mode 100644 index 00000000..59d436ff --- /dev/null +++ b/provision/roles/base_setup/meta/main.yml @@ -0,0 +1,13 @@ +--- +galaxy_info: + author: Notch8 + description: Base setup for Adventist Hyku servers + license: MIT + min_ansible_version: "2.9" + platforms: + - name: Ubuntu + versions: + - focal + - jammy + +dependencies: [] diff --git a/provision/roles/base_setup/tasks/main.yml b/provision/roles/base_setup/tasks/main.yml new file mode 100644 index 00000000..eb3449d8 --- /dev/null +++ b/provision/roles/base_setup/tasks/main.yml @@ -0,0 +1,44 @@ +--- +- name: Ensure storage directories exist + file: + path: "{{ item.path }}" + state: directory + mode: "0755" + loop: "{{ environments[env].mounts }}" + loop_control: + label: "{{ item.path }}" + +- name: Mount volumes and persist (UUID-based) + ansible.posix.mount: + path: "{{ item.path }}" + src: "UUID={{ item.uuid }}" + fstype: "{{ item.fstype | default('xfs') }}" + opts: "{{ item.opts | default('defaults,noatime') }}" + state: mounted + loop: "{{ environments[env].mounts }}" + loop_control: + label: "{{ item.path }}" + +- name: Update apt cache + apt: + update_cache: yes + +- name: Install required packages + apt: + name: + - docker.io + - nginx + - python3-dotenv-cli + state: present + +- name: Install Docker Compose + get_url: + url: "https://github.com/docker/compose/releases/latest/download/docker-compose-{{ ansible_system }}-{{ ansible_architecture }}" + dest: /usr/local/bin/docker-compose + mode: '0755' + +- name: Add adlquantum6269 user to docker group + user: + name: adlquantum6269 + groups: docker + append: yes diff --git a/provision/site.yml b/provision/site.yml new file mode 100644 index 00000000..4b094a32 --- /dev/null +++ b/provision/site.yml @@ -0,0 +1,319 @@ +--- +- name: Setup Adventist Server + hosts: adventist_servers + become: true + vars_files: + - vars/main.yml + + pre_tasks: + - name: Configure DNS servers (uncomment and set DNS) + replace: + path: /etc/systemd/resolved.conf + regexp: '^#?DNS=.*' + replace: 'DNS=1.1.1.1 8.8.8.8' + notify: restart systemd-resolved + + - name: Configure DNS servers (uncomment and set FallbackDNS) + replace: + path: /etc/systemd/resolved.conf + regexp: '^#?FallbackDNS=.*' + replace: 'FallbackDNS=9.9.9.9 1.0.0.1' + notify: restart systemd-resolved + + - name: Flush handlers to apply DNS changes immediately + meta: flush_handlers + + roles: + - role: base_setup + tags: base + + tasks: + - name: Ensure storage directories exist + file: + path: "{{ item.path }}" + state: directory + mode: "0755" + loop: "{{ environments[env].mounts }}" + loop_control: + label: "{{ item.path }}" + + - name: Mount volumes and persist (UUID-based) + ansible.posix.mount: + path: "{{ item.path }}" + src: "UUID={{ item.uuid }}" + fstype: "{{ item.fstype | default('xfs') }}" + opts: "{{ item.opts | default('defaults,noatime') }}" + state: mounted + loop: "{{ environments[env].mounts }}" + loop_control: + label: "{{ item.path }}" + + - name: Create shared data group + group: + name: adventist-data + state: present + + - name: Add SSH users to shared group + user: + name: "{{ item.name }}" + groups: adventist-data + append: yes + loop: "{{ ssh_users }}" + + - name: Set permissions on mounted storage directories + file: + path: "{{ item.path }}" + owner: root + group: adventist-data + mode: '2775' # setgid + rwxrwsr-x + loop: "{{ environments[env].mounts }}" + + - name: Update apt cache + apt: + + - name: Install required packages + apt: + name: + - certbot + - docker.io + - git + - nginx + - python3-dotenv-cli + - python3-pip + state: present + update_cache: yes + + - name: Install certbot-dns-cloudflare via pip + pip: + name: certbot-dns-cloudflare + state: present + tags: certbot + + - name: Create Cloudflare credentials directory + file: + path: /root/.secrets/certbot + state: directory + mode: '0700' + owner: root + group: root + tags: certbot + + - name: Copy Cloudflare credentials file + copy: + src: files/cloudflare.ini + dest: /root/.secrets/certbot/cloudflare.ini + mode: '0600' + owner: root + group: root + tags: certbot + + - name: Generate Let's Encrypt certificate with Cloudflare DNS + command: > + certbot certonly --dns-cloudflare + --dns-cloudflare-credentials /root/.secrets/certbot/cloudflare.ini + --non-interactive --agree-tos + -m admin@adventistdigitallibrary.org + -d {{ environments[env].hostnames | join(' -d ') }} + args: + creates: "/etc/letsencrypt/live/{{ environments[env].hostnames[0] }}/fullchain.pem" + tags: certbot + + - name: Setup Certbot auto-renewal cron job + cron: + name: "Certbot renewal" + minute: "0" + hour: "3" + job: "certbot renew --quiet && systemctl reload nginx" + state: present + tags: certbot + + - name: Set environment-specific variables for templates + set_fact: + hyku_admin_host: "{{ environments[env].hyku_admin_host }}" + hyku_default_host: "{{ environments[env].hyku_admin_host | replace(environments[env].hyku_admin_host.split('.')[0], '%{tenant}') }}" + hyku_root_host: "{{ environments[env].hyku_admin_host }}" + + - name: Deploy Nginx configuration + template: + src: files/nginx-default.j2 + dest: /etc/nginx/sites-enabled/default + mode: '0644' + notify: restart nginx + tags: [nginx, certbot] + + - name: Ensure adventist_knapsack directory exists + file: + path: /store/keep/adventist_knapsack + state: directory + mode: '0755' + owner: root + group: root + + - name: Check if git repository already exists + stat: + path: /store/keep/adventist_knapsack/.git + register: git_repo_check + + - name: Clone git repository if not already cloned + git: + repo: "{{ git_repo_url }}" + dest: /store/keep/adventist_knapsack + version: main + update: no + accept_hostkey: yes + when: not git_repo_check.stat.exists + + - name: Set environment-specific variables for templates + set_fact: + hyku_admin_host: "{{ environments[env].hyku_admin_host }}" + hyku_default_host: "{{ environments[env].hyku_admin_host | replace(environments[env].hyku_admin_host.split('.')[0], '%{tenant}') }}" + hyku_root_host: "{{ environments[env].hyku_admin_host }}" + + - name: Render environment file for Docker + template: + src: files/.env.j2 + dest: /store/keep/adventist_knapsack/.env.production + owner: root + group: root + mode: '0600' + + - name: Ensure Docker Compose (Compose V2) is installed + block: + + - name: Ensure apt cache is up-to-date + apt: + update_cache: yes + when: ansible_facts['pkg_mgr'] == 'apt' + + - name: Check if 'docker-compose-plugin' package is available + command: apt-cache policy docker-compose-plugin + register: compose_policy + failed_when: false + changed_when: false + when: ansible_facts['pkg_mgr'] == 'apt' + + - name: Install docker-compose-plugin via apt if available + apt: + name: docker-compose-plugin + state: present + update_cache: no + when: + - ansible_facts['pkg_mgr'] == 'apt' + - compose_policy is defined + - "'Candidate:' in compose_policy.stdout and 'Candidate: (none)' not in compose_policy.stdout" + register: compose_pkg_install + failed_when: false + + - name: Set Compose plugin install path + set_fact: + compose_plugin_path: /usr/local/lib/docker/cli-plugins/docker-compose + + - name: Determine compose download architecture (single-line, no newlines) + set_fact: + compose_download_arch: >- + {{ ('linux-x86_64' + if ansible_architecture in ['x86_64','amd64'] + else ('linux-aarch64' if ansible_architecture in ['aarch64','arm64'] + else ansible_architecture)) }} + + - name: Create CLI plugins dir (fallback install) + file: + path: "{{ compose_plugin_path | dirname }}" + state: directory + mode: '0755' + when: compose_pkg_install is not defined or compose_pkg_install is failed or compose_pkg_install is skipped + + - name: Download Docker Compose (CLI plugin binary) as fallback + get_url: + url: "https://github.com/docker/compose/releases/latest/download/docker-compose-{{ compose_download_arch }}" + dest: "{{ compose_plugin_path }}" + mode: '0755' + force: no + validate_certs: yes + when: compose_pkg_install is not defined or compose_pkg_install is failed or compose_pkg_install is skipped + + - name: Ensure plugin owned and executable + file: + path: "{{ compose_plugin_path }}" + owner: root + group: root + mode: '0755' + when: compose_pkg_install is not defined or compose_pkg_install is failed or compose_pkg_install is skipped + + - name: Verify 'docker compose' is available + command: docker compose version + register: compose_verify + failed_when: compose_verify.rc != 0 + changed_when: false + + become: true + + - name: Add adlquantum6269 user to docker group + user: + name: adlquantum6269 + groups: docker + append: yes + + - name: Create user accounts + user: + name: "{{ item.name }}" + state: present + create_home: yes + groups: adm,sudo,docker + loop: "{{ ssh_users }}" + + - name: Configure sudo without password + copy: + dest: "/etc/sudoers.d/{{ item.name }}" + content: "{{ item.name }} ALL=(ALL) NOPASSWD:ALL" + mode: '0440' + validate: /usr/sbin/visudo -cf %s + loop: "{{ ssh_users }}" + + - name: Fetch GitHub SSH keys on remote host + ansible.builtin.uri: + url: "https://github.com/{{ item.github }}.keys" + return_content: yes + status_code: [200, 404] + validate_certs: yes + register: github_key + loop: "{{ ssh_users }}" + loop_control: + label: "{{ item.name }} ← {{ item.github }}" + tags: github_keys + + - name: Install fetched GitHub keys into users' authorized_keys (remote) + authorized_key: + user: "{{ item.item.name }}" + state: present + key: "{{ item.content | default('') }}" + manage_dir: yes + when: item.status == 200 and (item.content is defined) and (item.content | length > 0) + loop: "{{ github_key.results }}" + loop_control: + label: "{{ item.item.name }} ← {{ item.item.github }}" + tags: github_keys + + - name: Configure Docker login + docker_login: + registry_url: ghcr.io + username: orangewolf + password: "{{ ghcr_token }}" + + handlers: + - name: restart systemd-resolved + systemd: + name: systemd-resolved + state: restarted + when: ansible_facts['service_mgr'] == 'systemd' + + - name: restart nginx + service: + name: nginx + state: restarted + + post_tasks: + - name: Run nginx-badbot-blocker + import_role: + name: nginx-badbot-blocker + tags: [nginx, badbot] \ No newline at end of file diff --git a/provision/vars/main.yml b/provision/vars/main.yml new file mode 100644 index 00000000..631250fc --- /dev/null +++ b/provision/vars/main.yml @@ -0,0 +1,64 @@ +--- +# existing global variables +root_group: root +ssh_users: + - name: april + github: aprilrieger + - name: max + github: maxkadel +ghcr_token: "{{ lookup('file', 'files/ghcr_token') | trim }}" +ansible_become_password: "{{ lookup('file', 'files/ansible_become_password') | trim }}" +git_repo_url: "https://orangewolf:{{ ghcr_token | urlencode }}@github.com/notch8/adventist_knapsack.git" + +environments: + staging: + hyku_admin_host: s3.adventistdigitallibrary.org + hostnames: + - s3.adventistdigitallibrary.org + - '*.s3.adventistdigitallibrary.org' + web_root: /store/keep/adventist_knapsack/public + ssl_cert_dest: /etc/letsencrypt/archive/s3.adventistdigitallibrary.org/fullchain1.pem + ssl_key_dest: /etc/letsencrypt/archive/s3.adventistdigitallibrary.org/privkey1.pem + upstream_app_host: 192.168.147.200 + upstream_app_port: 3000 + # To get the UUID for each volume on the server run: lsblk -o NAME,UUID,MOUNTPOINT,FSTYPE + mounts: + - { path: "/store/keep", uuid: "89e4213c-6100-4a72-8d52-2a21dfaedf1a", fstype: "xfs" } + - { path: "/store/tmp", uuid: "dc3dc4e8-b2d1-4e22-a741-7288dc1b893e", fstype: "xfs" } + - { path: "/var/lib/docker", uuid: "3083181c-ffdd-4dce-9894-6f36237fc255", fstype: "xfs" } + - { path: "/store/keep/db_data", uuid: "7ca298c8-f8f3-4976-ae78-3147ed33c819", fstype: "xfs" } + - { path: "/store/keep/fcrepo_data", uuid: "f528b510-5da5-4668-8d05-2a798f3cd29c", fstype: "xfs" } + - { path: "/store/keep/redis_data", uuid: "a4d4c2c5-b518-4c52-985a-71d3f3f1bd7a", fstype: "xfs" } + - { path: "/store/keep/solr-data", uuid: "8b03667d-d6c8-4654-92ac-047bf2c12063", fstype: "xfs" } + - { path: "/store/keep/zoo_data", uuid: "b1c16f24-6b47-4735-b085-106083f5fbe7", fstype: "xfs" } + + production: + hyku_admin_host: b3.adventistdigitallibrary.org + hostnames: + - b3.adventistdigitallibrary.org + - '*.b3.adventistdigitallibrary.org' + web_root: /store/keep/adventist_knapsack/public + ssl_cert_dest: /etc/letsencrypt/archive/b3.adventistdigitallibrary.org/fullchain1.pem + ssl_key_dest: /etc/letsencrypt/archive/b3.adventistdigitallibrary.org/privkey1.pem + upstream_app_host: 192.168.147.100 + upstream_app_port: 3000 + # To get the UUID for each volume on the server run: lsblk -o NAME,UUID,MOUNTPOINT,FSTYPE + mounts: + - { path: "/store/keep", uuid: "79a6a2d2-2ae4-4bd6-b227-6ae03bb5fbf3", fstype: "xfs" } + - { path: "/store/tmp", uuid: "c1a9605c-e81a-4478-8f1a-420fafc73d64", fstype: "xfs" } + - { path: "/var/lib/docker", uuid: "c96601c8-1fff-4540-ac35-e8f0257c5fb0", fstype: "xfs" } + - { path: "/store/keep/db_data", uuid: "0fed6c6e-865d-4b9e-80a8-20c39e23aba5", fstype: "xfs" } + - { path: "/store/keep/fcrepo_data", uuid: "274f387c-4578-49fa-bba9-f7802488d561", fstype: "xfs" } + - { path: "/store/keep/redis_data", uuid: "70ddbea4-2703-4a92-aa22-db2582c191d2", fstype: "xfs" } + - { path: "/store/keep/solr-data", uuid: "90a5a23f-1909-4bbe-9507-95abf0d32980", fstype: "xfs" } + - { path: "/store/keep/zoo_data", uuid: "f48d11f1-25da-4433-8507-460a3c588ddf", fstype: "xfs" } + +# convenience variables +server_name: "{{ environments[env].hostnames | join(' ') }}" +web_root: "{{ environments[env].web_root }}" +ssl_certificate: "{{ environments[env].ssl_cert_dest }}" +ssl_certificate_key: "{{ environments[env].ssl_key_dest }}" +upstream_app_host: "{{ environments[env].upstream_app_host }}" +upstream_app_port: "{{ environments[env].upstream_app_port }}" +ssl_certificate_src: "{{ environments[env].ssl_cert_src }}" +ssl_key_src: "{{ environments[env].ssl_key_src }}" diff --git a/solr/security.json b/solr/security.json new file mode 120000 index 00000000..52228fd0 --- /dev/null +++ b/solr/security.json @@ -0,0 +1 @@ +../hyrax-webapp/solr/security.json \ No newline at end of file