script/btaa_gen_monograph_manifest

#!/usr/bin/env ruby
# frozen_string_literal: true

# Script inputs a Fulcrum monograph CSV file and a
# list of monograph directories
# and performs the following:
# 1. Uses the monograph directory name (ISBN)
#    and looks up the metadata in the CSV.
# 2. Creates the monograph manifest.csv used
#    for creating the monograph on Fulcrum.

require 'optparse'
require 'ostruct'
require 'os'

# Determine the root directory of the code base.
script_dir = File.expand_path(File.dirname(__FILE__))
root_dir = File.dirname(script_dir)

require_relative File.join(root_dir, "lib", "logger")

script_logger = UMPTG::Logger.create(logger_fp: STDOUT)

# Process the script parameters.
options = OpenStruct.new
options.write_mono_file = false
option_parser = OptionParser.new do |opts|
  opts.banner = "Usage: #{File.basename(__FILE__)} <csv_file> [<monograph_dir>..]"
  opts.on_tail('-h', '--help', 'Print this help message') do
    script_logger.info(opts)
    exit 0
  end
end
option_parser.parse!(ARGV)
if ARGV.count < 2
  script_logger.info(option_parser.help)
  exit 0
end

csv_file = ARGV[0]
monograph_dir_list = ARGV[1..-1]

require 'csv'

require_relative File.join(root_dir, 'lib', 'fulcrum', 'manifest')

MONOGRAPH_HEADERS = UMPTG::Fulcrum::Manifest::Validation::CollectionSchema.headers()
monograph_headers = MONOGRAPH_HEADERS
=begin
monograph_headers = MONOGRAPH_HEADERS \
      - [ "File Name" ] - [ "Representative Kind" ] - [ "Resource Type" ] \
      - [ "Alternative Text" ] - [ "Caption" ] - [ "Content Type" ] \
      - [ "Exclusive to Fulcrum" ] - [ "External Resource URL" ] - [ "NOID" ] \
      - [ "Section" ] - [ "Redirect to" ] - [ "Transcript" ] \
      - [ "Translation" ] - [ "Link" ] - [ "Sort Date" ]
=end

csv_file = File.expand_path(csv_file)
unless File.exist?(csv_file)
  script_logger.error("invalid CSV file path #{csv_file}.")
  exit 1
end
script_logger.info("*** processing #{File.basename(csv_file)} ***")
STDOUT.flush

=begin
# This should be removed once the metadata
# is updated.
isbn_map = {
        "9780299316983" => "9780299316990",
        "9780299150839" => "9780299150891",
        "9780299311186" => "9780299311193"
    }
=end

fulcrum_body = File.read(csv_file)

fulcrum_csv = CSV.parse(
          fulcrum_body,
          headers: true,
          return_headers: false
          )

monograph_dir_list.each do |monograph_dir|
  monograph_dir = File.expand_path(monograph_dir)
  unless File.directory?(monograph_dir)
    script_logger.error("directory \"#{File.basename(monograph_dir)}\" does not exist.")
    next
  end
  script_logger.info("processing directory \"#{File.basename(monograph_dir)}\"")

  isbn = File.basename(monograph_dir)[0..12]
  #isbn = isbn_map[isbn] if isbn_map.include?(isbn)

  # Search CSV for monograph metadata. Directory name
  # is the search key. First, search HEBID field (HEBID).
  # If fails, then search the ISBN fields (ISBN[1-3]_13).
  fm_row_list = fulcrum_csv.select do |row|
    #(!row['ISBN(s)'].nil? and row['ISBN(s)'].gsub(/\-/,'').include?(isbn))
    (!row['ISBN_SEARCH'].nil? and row['ISBN_SEARCH'].include?(isbn))
  end
  if fm_row_list.empty?
    script_logger.warn("no CSV row found for #{isbn}. Skipping.")
    next
  end
  if fm_row_list.count > 1
    script_logger.warn("multiple CSV rows found for #{isbn}. Skipping.")
    next
  end
  script_logger.info("Found CSV row for #{isbn}.")

  monograph_row = fm_row_list.first

  file_list = Dir.glob(File.join(monograph_dir, "*"))
  rep_row_list = []
  file_list.each do |fl|
    file_name = File.basename(fl)
    ext = File.extname(file_name)
    next if ext.nil? or ext.strip.empty?

    case ext.strip.downcase
    when '.jpg', '.png', '.bmp', '.jpeg', '.tif'
      rep_kind = 'cover'
    when '.epub'
      rep_kind = 'epub'
    when '.pdf'
      rep_kind = 'pdf_ebook'
    else
      script_logger.warn("skipping file #{file_name}")
      next
    end

    rep_row_list << {
          'File Name' => file_name,
          'Title' => file_name,
          'Representative Kind' => rep_kind
      }
  end

  monograph_row['File Name'] = '://:MONOGRAPH://:'
  new_monograph_row = {}
  monograph_headers.each {|h| new_monograph_row[h] = monograph_row[h] }

  manifest_body = CSV.generate(
          :headers => monograph_headers,
          :write_headers => true
        ) do |csv|
    csv << { "File Name" => "***row left intentionally blank***" }
    rep_row_list.each do |rep_row|
      csv << rep_row
    end
    #csv << monograph_row
    csv << new_monograph_row
  end

  # Save the Fulcrum metadata CSV file.
  fulcrum_file = File.join(monograph_dir, "manifest.csv")
  script_logger.info("Creating metadata file #{fulcrum_file}")
  File.write(fulcrum_file, manifest_body)
end