script/jnl_construct

#!/usr/bin/env ruby
# frozen_string_literal: true

require 'optparse'
require 'ostruct'

# Determine the root directory of the code base.
script_dir = File.expand_path(File.dirname(__FILE__))
root_dir = File.dirname(script_dir)

require_relative File.join(root_dir, "lib", "logger")

script_logger = UMPTG::Logger.create(logger_fp: STDOUT)

# Process the script parameters.
options = OpenStruct.new
options.fulcrum_host = nil
options.journal_dir = Dir.pwd
options.generate_xhtml = false
options.article_type = nil
option_parser = OptionParser.new do |opts|
  opts.banner = "Usage: #{File.basename(__FILE__)} [-f production|preview|staging] [-d journal_dir] [-t article_type] [-x] [journal_name...]"
  opts.on('-d', '--directory [DIRECTORY]', 'Journal directory') do |da|
    options.journal_dir = da
  end
  opts.on('-f', '--fulcrum_host host', 'Fulcrum environment') do |fulcrum_host|
    options.fulcrum_host = fulcrum_host
  end
  opts.on('-t', '--article_type type', 'JATS Article Type') do |article_type|
    options.article_type = article_type
  end
  opts.on('-x', '--xhtml', 'Generate XHTML') do |da|
    options.generate_xhtml = true
  end
  opts.on_tail('-h', '--help', 'Print this help message') do
    script_logger.info(opts)
    exit 0
  end
end
option_parser.parse!(ARGV)

# Journal directory exist?
journal_dir = File.expand_path(options.journal_dir)
unless File.directory?(journal_dir)
  script_logger.error("journal directory not found \"#{journal_dir}.")
  exit 1
end

# Journal directory valid?
articles_dir = File.join(journal_dir, "articles")
dlxs_dir = File.join(journal_dir, "dlxs")
resources_dir = File.join(journal_dir, "resources")
unless File.directory?(articles_dir) and File.directory?(dlxs_dir) and File.directory?(resources_dir)
  script_logger.error("journal directory invalid \"#{File.basename(journal_dir)}.")
  exit 1
end

journal_name_list = ARGV
if journal_name_list.empty?
  journal_name_list = []
  Dir.glob(File.join(dlxs_dir, "*.xml")).each do |dlxs_file|
    journal_name_list << File.basename(dlxs_file, ".*")
  end
end

require_relative File.join(root_dir, 'lib', 'xslt')
require_relative File.join(root_dir, "lib", "fulcrum")
require_relative File.join(root_dir, "lib", "services")

# Create the serivce for retrieving the manifest.
service = UMPTG::Services::Heliotrope.new(
                :fulcrum_host => options.fulcrum_host
              )

require 'fileutils'
require 'nokogiri'
require 'htmlentities'
require 'uri'

dlxs2jats_xsl_file = File.join(root_dir, "lib", "journal", "xsl", "dlxs2jats.xsl")
jats2html_xsl_file = File.join(root_dir, "lib", "journal", "xsl", "jats-html.xsl")
jats2html_xsl_file = File.join(root_dir, "lib", "journal", "xsl", "janeway", "janeway.xsl")
#jats2html_xsl_file = File.join(root_dir, "..", "janeway", "src", "transform", "xsl", "default.xsl")

KSP='#'
RSP=';'
FSP='^'
VSP='='

#https://heliotrope-preview.hydra.lib.umich.edu/downloads/wh246t40n?file=embed_css
LINK_HREF_MARKUP = "%s/downloads/%s?file=embed_css"

LINK_MARKUP = <<-LMARKUP
<link href=\"%s\" rel=\"stylesheet\" type=\"text/css\"></link>
LMARKUP

AUDIO_IFRAME_MARKUP = <<-AMARKUP
<iframe id=\"fulcrum-embed-iframe-%s\" src=\"%s\" title=\"%s\"></iframe>
AMARKUP

VIDEO_IFRAME_MARKUP = <<-VMARKUP
<iframe id=\"fulcrum-embed-iframe-%s\" src=\"%s&fs=1\" title=\"%s\" allowfullscreen></iframe>
VMARKUP

MEDIA_MARKUP = <<-MMARKUP
<div id=\"fulcrum-embed-outer-%s\">
<div id=\"fulcrum-embed-inner-%s\">
%s
</div>
</div>
MMARKUP

RESOURCE_MARKUP = <<-RMARKUP
<resource entity=\"%s\" file_name=\"%s\" file_type=\"%s\" noid=\"%s\"
  link=\"%s\" embed_link=\"%s\" css_link=\"%s\" doi=\"%s\" doi_noprefix=\"%s\">
<title>%s</title>
<caption>%s</caption>
<embed_code>%s</embed_code>
<css_stylesheet>%s</css_stylesheet>
</resource>
RMARKUP

def detect(path)
  ext = File.extname(path)
  return (ext.nil? or ext.empty?) ? "" : ext[1..-1]

  header = File.binread(path, 50).downcase
  case
  when header[0..2] == 'gif'
    return 'gif'
  when header[1..3] == 'png'
    return 'png'
  when header[5..8] == 'jfif'
    return 'jpg'
  when header[28..30] == 'mp4'
    return 'mp4'
  when header[1..3] == 'pdf'
    return 'pdf'
  end
  return ""
end

if options.article_type.nil?
  script_logger.info("no article type assigned")
else
  script_logger.info("assigning article type #{options.article_type}")
end

encoder = HTMLEntities.new
journal_name_list.each do |journal_name|
  dlxs_file = File.join(dlxs_dir, journal_name + ".xml")
  if !File.exist?(dlxs_file)
    script_logger.error("#{journal_name}: no DLXS file.")
    next
  end

  article_dir = File.join(articles_dir, journal_name)
  FileUtils.mkdir_p article_dir

  jats_file = File.join(article_dir, journal_name + "_jats.xml")
  script_logger.info("#{journal_name}: #{File.basename(dlxs_file)} ==> #{File.basename(jats_file)}")

  # Pass the resource file names as a parameter to the XSLT.
  resource_file_list = Dir.glob(File.join(resources_dir, "#{journal_name}*"))

  resource_file_table = {}
  resource_file_list.each do |resource_file|
    fname = File.basename(resource_file)
    key = File.basename(fname, '.*')
    next if key.end_with?("-lg")

=begin
    # If hi-res, then use this image.
    if key.end_with?("-lg")
      key = key[0..-4]
      resource_file_table[key] = resource_file
      next
    end
=end

    # Not hi-res, use this one if one has
    # not been found to this point.
    unless resource_file_table.key?(key)
      resource_file_table[key] = resource_file
    end
  end

  # If a monograph exists, retrieve its manifest
  manifest = nil
  id2csv_body_list = service.monograph_export(identifier: "#{journal_name}")
  if id2csv_body_list[journal_name].empty?
    script_logger.warn("no manifest found for id #{journal_name}")
  else
    manifest = UMPTG::Fulcrum::Manifest::Document.new(
                  csv_body: id2csv_body_list[journal_name].first
                )
    if manifest.nil?
      script_logger.error("generating manifest CSV for #{journal_name}")
    end
    script_logger.info("manifest found for #{journal_name}")
  end

  # Copy the resource files to be used
  # and construct the XSLT parameter.
  resource_list = []
  resource_list_param = ""
  resource_file_table.each do |key,resource_file|
    script_logger.info("Copying resource #{File.basename(resource_file)}")

    res_file = File.join(article_dir, File.basename(resource_file))
    FileUtils.cp(resource_file, article_dir) unless File.exist?(res_file)

    file_type = detect(resource_file)

    link = title = caption = embed_link = doi = embed_code = ""
    fileset = manifest.fileset(File.basename(resource_file)) unless manifest.nil?
    unless fileset.nil? or fileset['noid'].strip.empty?
      link = fileset['link'][12..-3]

      link_uri = URI(link)
      link_scheme_host = link_uri.scheme + "://" + link_uri.host

      embed_markup = fileset['embed_code']
      unless embed_markup.nil? or embed_markup.empty?
        embed_doc = Nokogiri::XML::DocumentFragment.parse(embed_markup)
        iframe_node = embed_doc.xpath("descendant-or-self::*[local-name()='iframe']").first
        embed_link = iframe_node['src']
      end

      title = fileset['title']
      caption = fileset['caption']
      doi = fileset['doi']
      doi_noprefix = doi.nil? ? "" : doi.delete_prefix("https://doi.org/")
      embed_code = fileset['embed_code']
      noid = fileset['noid']
      css_link = sprintf(LINK_HREF_MARKUP, link_scheme_host, noid)

      resource_type = fileset['resource_type']
      resource_type = "" if resource_type.nil?

      case resource_type.downcase
      when "video"
        iframe_markup = sprintf(VIDEO_IFRAME_MARKUP, noid, embed_link, title)
      when "image"
        iframe_markup = sprintf(VIDEO_IFRAME_MARKUP, noid, embed_link, title)
      when ""
        iframe_markup = ""
      else
        iframe_markup = sprintf(AUDIO_IFRAME_MARKUP, noid, embed_link, title)
      end

      css_embed_code = sprintf(LINK_MARKUP, css_link) + \
            sprintf(MEDIA_MARKUP, noid, noid, iframe_markup)

    end
    #resource_list_param += "#{key}:#{File.join("resources", File.basename(resource_file))};"
    resource_list_param += "#{key}#{KSP}file_name#{VSP}#{File.basename(resource_file)}#{FSP}file_type#{VSP}#{file_type}#{FSP}link#{VSP}#{link}#{FSP}embed_link#{VSP}#{embed_link}#{FSP}caption#{VSP}#{caption}#{FSP}title#{VSP}#{title}#{RSP}"
    resource_list << sprintf(RESOURCE_MARKUP, key, \
         File.basename(resource_file), file_type, noid, link, embed_link, css_link, \
         doi, doi_noprefix, title, caption, \
         encoder.encode(embed_code), encoder.encode(css_embed_code))
  end
  resource_list_param2 = "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n<resources>" + resource_list.join + "</resources>"
  rfile = File.expand_path(File.basename(__FILE__) + "_resources.xml")
  File.write(rfile, resource_list_param2)
  resource_list_param = resource_list_param[0..-3]

  params = {
              "image_list" => URI(rfile)
           }
  params["article_type"] = options.article_type unless options.article_type.nil?

  rc = UMPTG::XSLT.transform(
          xslpath: dlxs2jats_xsl_file,
          srcpath: dlxs_file,
          destpath: jats_file,
          logger: script_logger,
          parameters: params
          )
  FileUtils.rm(rfile)

  if rc and options.generate_xhtml
    html_file = File.join(File.dirname(jats_file), File.basename(jats_file, ".*") + ".html")
    UMPTG::XSLT.transform(
            xslpath: jats2html_xsl_file,
            srcpath: jats_file,
            destpath: html_file,
            logger: script_logger
            )
  end
end