diff --git a/README.md b/README.md index 40f078f..39e8421 100644 --- a/README.md +++ b/README.md @@ -281,6 +281,19 @@ There are Message Properties which can be set in the message.properties file: | project.form.agreement | The text which will be displayed as a legend for the agreement in the editor | Einverständniserklärung | | project.form.validation.agreement | The text which will be displayed as a validation error if the agreement is not accepted | Sie müssen der Einverständniserklärung zustimmen. | +## Sitelinks +The Sitelinks resource provides sitelinks based on Solr and is optimized for **Google Scholar**. +It allows for hierarchical navigation through sitelinks, organized by publication year and month, using path parameters. +By default, the resource is disabled but can be activated and configured as follows. +``` +# Activates the resource +MCR.Jersey.Resource.Packages=%MCR.Jersey.Resource.Packages%,de.gbv.reposis.sitelinks.resources +# Basic query for Solr +Sitelinks.Resource.BasicFilterQuery=worldReadable:true AND ((objectType:mods AND -state:*) OR (objectType:mods AND state:published)) +# Pagination / page size of the sitelinks +Sitelinks.PageSize=100 +``` +The entry page is located at `/rsc/sitelinks/` and may need to be allowed in the `robots.txt` file. ## Development @@ -291,4 +304,4 @@ MCR.Developer.Resource.Override=/path/to/reposis_common/src/main/resources MCR.LayoutService.LastModifiedCheckPeriod=0 MCR.UseXSLTemplateCache=false MCR.SASS.DeveloperMode=true -``` \ No newline at end of file +``` diff --git a/src/main/java/de/gbv/reposis/sitelinks/ObjectMetadataService.java b/src/main/java/de/gbv/reposis/sitelinks/ObjectMetadataService.java new file mode 100644 index 0000000..bd2465e --- /dev/null +++ b/src/main/java/de/gbv/reposis/sitelinks/ObjectMetadataService.java @@ -0,0 +1,160 @@ +/* + * This file is part of *** M y C o R e *** + * See https://www.mycore.de/ for details. + * + * MyCoRe is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * MyCoRe is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with MyCoRe. If not, see . + */ + +package de.gbv.reposis.sitelinks; + +import java.io.IOException; +import java.util.Comparator; +import java.util.List; +import java.util.Locale; + +import org.apache.solr.client.solrj.SolrClient; +import org.apache.solr.client.solrj.SolrQuery; +import org.apache.solr.client.solrj.SolrServerException; +import org.apache.solr.client.solrj.response.FacetField; +import org.apache.solr.client.solrj.response.QueryResponse; +import org.apache.solr.common.params.FacetParams; +import org.mycore.common.MCRException; +import org.mycore.solr.MCRSolrClientFactory; + +/** + * Service for retrieving object-related metadata from Solr. + * Provides methods to fetch years, months, and object IDs for which objects have been issued. + *

+ * This service assumes that the Solr documents contain the following fields for grouping and sorting: + *

+ */ +public class ObjectMetadataService { + + private static final String FIELD_ID = "id"; + + private static final String FIELD_YEAR_ISSUED = "mods.yearIssued"; + + private static final String FIELD_DATE_ISSUED = "mods.dateIssued"; + + private static final String FIELD_CREATED = "created"; + + private static final String DEFAULT_SOLR_QUERY = "*:*"; + + private final SolrClient solrClient; + + private final String basicFilterQuery; + + /** + * Constructs an instance of {@link ObjectMetadataService} using the provided filter query. + * + *@param basicFilterQuery a Solr filter query applied to all queries + */ + public ObjectMetadataService(String basicFilterQuery) { + this(MCRSolrClientFactory.getMainSolrClient(), basicFilterQuery); + } + + /** + * Constructs a new {@code ObjectMetadataService}. + * + * @param solrClient the Solr client used to execute queries + * @param basicFilterQuery a Solr filter query applied to all queries + */ + public ObjectMetadataService(SolrClient solrClient, String basicFilterQuery) { + this.solrClient = solrClient; + this.basicFilterQuery = basicFilterQuery; + } + + // TODO easy cachable + /** + * Retrieves all years for which objects exist with an issued date. + * + * @return a list of years as, sorted in descending order + * @throws MCRException if a Solr query or I/O error occurs + */ + public List getYearsWithObjects() { + final SolrQuery query = new SolrQuery(DEFAULT_SOLR_QUERY); + query.setRows(0); + query.addFilterQuery(basicFilterQuery); + query.setFacet(true); + query.addFacetField(FIELD_YEAR_ISSUED); + query.setFacetSort(FacetParams.FACET_SORT_INDEX); + query.setFacetLimit(-1); + try { + return solrClient.query(query).getFacetField(FIELD_YEAR_ISSUED).getValues() + .stream().map(FacetField.Count::getName).map(Integer::parseInt).sorted(Comparator.reverseOrder()) + .toList(); + } catch (SolrServerException | IOException e) { + throw new MCRException(e); + } + } + + /** + * Retrieves object IDs for objects issued in a specific year, with support for pagination, + * and sorts the results primarily by the issued date and secondarily by the creation timestamp. + *

+ * The returned {@link ObjectIdsWithCount} contains: + *

+ *

+ * Sorting behavior: + *

    + *
  1. Objects are sorted in descending order primarily by {@code dateIssued} ({@link #FIELD_DATE_ISSUED}).
  2. + *
  3. If multiple objects have the same {@code dateIssued} value, they are secondarily sorted + * in descending order by the {@code created} timestamp ({@link #FIELD_CREATED}) as a tie-breaker.
  4. + *
  5. If {@code dateIssued} is missing for an object, it is effectively sorted according to {@code created}.
  6. + *
+ * + * @param year the year of the issued objects (e.g., 2021) + * @param offset the offset from which to start fetching results (for pagination) + * @param limit the maximum number of results to fetch (for pagination) + * @return an {@link ObjectIdsWithCount} object containing a list of object IDs and the total count + * @throws MCRException if a Solr query or I/O error occurs, or if the query execution fails + */ + public ObjectIdsWithCount getObjectIdsByDate(int year, int offset, int limit) { + final SolrQuery query = new SolrQuery(DEFAULT_SOLR_QUERY); + query.addFilterQuery(basicFilterQuery); + query.addFilterQuery(String.format(Locale.ROOT, FIELD_DATE_ISSUED + ":%s*", year)); + query.setFields(FIELD_ID); + query.setStart(offset); + query.setRows(limit); + query.addSort(FIELD_DATE_ISSUED, SolrQuery.ORDER.desc); + query.addSort(FIELD_CREATED, SolrQuery.ORDER.desc); + try { + final QueryResponse response = solrClient.query(query); + long totalCount = response.getResults().getNumFound(); + final List objectIds = + response.getResults().stream().map((document) -> (String) document.getFieldValue(FIELD_ID)).toList(); + return new ObjectIdsWithCount(objectIds, totalCount); + } catch (SolrServerException | IOException e) { + throw new MCRException(e); + } + } + + /** + * A simple record to store a list of object IDs along with the total count of matching objects. + * + * @param objectIds the list of object IDs that were issued in the specified year and month + * @param totalCount the total number of objects that match the query criteria (not limited by pagination) + */ + public record ObjectIdsWithCount(List objectIds, long totalCount) { + } +} diff --git a/src/main/java/de/gbv/reposis/sitelinks/resources/SitelinksResource.java b/src/main/java/de/gbv/reposis/sitelinks/resources/SitelinksResource.java new file mode 100644 index 0000000..b4f0216 --- /dev/null +++ b/src/main/java/de/gbv/reposis/sitelinks/resources/SitelinksResource.java @@ -0,0 +1,182 @@ +/* + * This file is part of *** M y C o R e *** + * See https://www.mycore.de/ for details. + * + * MyCoRe is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * MyCoRe is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with MyCoRe. If not, see . + */ + +package de.gbv.reposis.sitelinks.resources; + +import java.io.InputStream; + +import jakarta.servlet.http.HttpServletRequest; +import jakarta.ws.rs.GET; +import jakarta.ws.rs.Path; +import jakarta.ws.rs.PathParam; +import jakarta.ws.rs.Produces; +import jakarta.ws.rs.WebApplicationException; +import jakarta.ws.rs.core.Context; +import jakarta.ws.rs.core.MediaType; +import jakarta.ws.rs.core.Response; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.jdom2.Document; +import org.jdom2.Element; +import org.mycore.common.config.MCRConfiguration2; +import org.mycore.common.config.MCRConfigurationException; +import org.mycore.frontend.jersey.MCRJerseyUtil; + +import de.gbv.reposis.sitelinks.ObjectMetadataService; + +/** + * REST resource for managing "Sitelinks" and their associated data. + * This class provides endpoints to display years, months, and publications + * based on the provided parameters. + */ +@Path("sitelinks") +public class SitelinksResource { + + private static final Logger LOGGER = LogManager.getLogger(); + + private static final String PATH_PARAM_YEAR = "year"; + private static final String PATH_PARAM_PAGE = "page"; + + private static final String BASIC_FILTER_QUERY = + MCRConfiguration2.getStringOrThrow("Sitelinks.Resource.BasicFilterQuery"); + + private static final int PAGE_SIZE = + MCRConfiguration2.getString("Sitelinks.PageSize").map(Integer::valueOf).orElseThrow( + () -> new MCRConfigurationException("Please specify property: 'Sitelinks.PageSize'")); + + private final ObjectMetadataService objectMetadataService; + private final int pageSize; + + @Context + private HttpServletRequest req; + + /** + * Constructor for {@code SitelinksResource}. + * Initializes the resource with an instance of {@link ObjectMetadataService} + * and the page size from the configuration. + */ + public SitelinksResource() { + this(new ObjectMetadataService(BASIC_FILTER_QUERY), PAGE_SIZE); + } + + /** + * Constructor for {@code SitelinksResource} that allows passing + * the {@link ObjectMetadataService} and page size. + * + * @param objectMetadataService The service to manage the object metadata + * @param pageSize The maximum number of items per page + */ + public SitelinksResource(ObjectMetadataService objectMetadataService, int pageSize) { + this.objectMetadataService = objectMetadataService; + this.pageSize = pageSize; + } + + /** + * Returns a list of years that contains objects (descending order). + * + * @return An HTML response containing a list of years + */ + @GET + @Produces(MediaType.TEXT_HTML) + public Response listYears() { + return generateResponse(this::buildYearsElement); + } + + /** + * Returns a list of publications for a specific year (descending order). + * The default page (page 1) will be shown. + * + * @param year The year for which to list the publications + * @return An HTML response containing a list of publications for the specified month and year + */ + @GET + @Path("/{" + PATH_PARAM_YEAR + "}") + @Produces(MediaType.TEXT_HTML) + public Response listPublicationsForMonthPage(@PathParam(PATH_PARAM_YEAR) int year) { + return listPublicationsForMonthPage(year, 1); + } + + /** + * Returns a list of publications for a specific year for the given page (descending order). + * + * @param year The year for which to list the publications + * @param page The page number of the publications + * @return An HTML response containing a list of publications for the specified page + * @throws WebApplicationException if the page number is less than 1 + */ + @GET + @Path("/{" + PATH_PARAM_YEAR + "}/page/{" + PATH_PARAM_PAGE + "}") + @Produces(MediaType.TEXT_HTML) + public Response listPublicationsForMonthPage(@PathParam(PATH_PARAM_YEAR) int year, + @PathParam(PATH_PARAM_PAGE) int page) { + if (page < 1) { + throw new WebApplicationException("Page number must be >= 1", Response.Status.BAD_REQUEST); + } + return generateResponse(() -> buildPageElement(year, page)); + } + + private Response generateResponse(ElementBuilder elementBuilder) { + final Element root = new Element("sitelinks"); + final Element contentElement = elementBuilder.build(); + root.addContent(contentElement); + try (InputStream transformedStream = MCRJerseyUtil.transform(new Document(root), req).getInputStream()) { + return Response.ok(transformedStream).build(); + } catch (Exception e) { + LOGGER.error("Error while transforming document", e); + throw new WebApplicationException("Internal server error during XML transformation", e); + } + } + + private Element buildYearsElement() { + final Element yearsElement = new Element("years"); + for (int year : objectMetadataService.getYearsWithObjects()) { + yearsElement.addContent(createElement("year", String.valueOf(year))); + } + return yearsElement; + } + + private Element buildPageElement(int year, int page) { + final int offset = (page - 1) * pageSize; + final ObjectMetadataService.ObjectIdsWithCount objectIdsWithCount = + objectMetadataService.getObjectIdsByDate(year, offset, pageSize); + + final Element pageElement = new Element("page"); + pageElement.setAttribute("number", String.valueOf(page)); + pageElement.setAttribute("totalCount", String.valueOf(objectIdsWithCount.totalCount())); + pageElement.setAttribute("year", String.valueOf(year)); + + final Element objectIdsElement = new Element("objectIds"); + for (String objectId : objectIdsWithCount.objectIds()) { + objectIdsElement.addContent(createElement("objectId", objectId)); + } + pageElement.addContent(objectIdsElement); + return pageElement; + } + + private Element createElement(String name, String text) { + final Element element = new Element(name); + element.setText(text); + return element; + } + + private interface ElementBuilder { + Element build(); + } + +} diff --git a/src/main/resources/config/reposis_common/mycore.properties b/src/main/resources/config/reposis_common/mycore.properties index 47be87a..8d73959 100644 --- a/src/main/resources/config/reposis_common/mycore.properties +++ b/src/main/resources/config/reposis_common/mycore.properties @@ -59,3 +59,10 @@ MCR.User.Shibboleth.NewUserHandler=de.gbv.reposis.user.shibboleth.MCRDefaultConf MIR.Agreement.File=agreement.pdf MIR.Agreement.MailTemplate=agreement_mail_template.xhtml MIR.Agreement.Genres.Skip=journal,series,collection,newspaper,series,bachelor_thesis,master_thesis,matura + +############################################################################## +# Sitelinks # +############################################################################## +Sitelinks.Resource.BasicFilterQuery=worldReadable:true AND ((objectType:mods AND -state:*) OR (objectType:mods AND state:published)) +Sitelinks.PageSize=100 +#MCR.Jersey.Resource.Packages=%MCR.Jersey.Resource.Packages%,de.gbv.reposis.sitelinks.resources diff --git a/src/main/resources/xsl/sitelinks.xsl b/src/main/resources/xsl/sitelinks.xsl new file mode 100644 index 0000000..1f3df4f --- /dev/null +++ b/src/main/resources/xsl/sitelinks.xsl @@ -0,0 +1,144 @@ + + + + + + + + + + + <!DOCTYPE html> + + + + + + + Sitelinks Index for Crawlers + + + + <xsl:value-of select="$headline" /> + + + + +

+ +

+

+ This page is intended for crawlers and bots. + Content is grouped by year and ordered by Date/Year Issued (newest first). +

+ + +
+ + + + + + + + + + + + + + + <xsl:value-of select="$headline" /> + + + + + + + + + + + +

+ +

+ + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +