diff --git a/web/js/searchTool.js b/web/js/searchTool.js new file mode 100644 index 00000000..47612b6c --- /dev/null +++ b/web/js/searchTool.js @@ -0,0 +1,501 @@ +const RERUM_API_BASE = "https://store.rerum.io/v1"; + +/** + * Client-side protections for RERUM search API. + */ + +const CACHE_TTL_MS = 20 * 60 * 1000; // 20 minutes +const CACHE_STORAGE_KEY = "rerumSearchCache_v1"; + +// In-memory view of the cache for fast access; persisted to localStorage. +let queryCache = {}; + +// Simple per-tab rate limit state. +const rateLimitState = { + lastSearchTime: 0, + recentSearches: [] // timestamps (ms) of searches within the last minute +}; + +function loadCache() { + try { + const raw = window.localStorage.getItem(CACHE_STORAGE_KEY); + if (!raw) return; + const parsed = JSON.parse(raw); + if (parsed && typeof parsed === "object") { + queryCache = parsed; + } + } catch (e) { + console.warn("Unable to load search cache from localStorage.", e); + queryCache = {}; + } +} + +function persistCache() { + try { + window.localStorage.setItem(CACHE_STORAGE_KEY, JSON.stringify(queryCache)); + } catch (e) { + console.warn("Unable to persist search cache to localStorage.", e); + } +} + +function makeCacheKey(searchText, usePhrase, limit, skip) { + return JSON.stringify({ + searchText: searchText.trim().toLowerCase(), + phrase: !!usePhrase, + limit, + skip + }); +} + +function getCachedResults(key) { + const entry = queryCache[key]; + if (!entry) return null; + const now = Date.now(); + if (now - entry.timestamp > CACHE_TTL_MS) { + // Expired; clean up and treat as miss. + delete queryCache[key]; + persistCache(); + return null; + } + return entry.results; +} + +function setCachedResults(key, results) { + queryCache[key] = { + timestamp: Date.now(), + results + }; + persistCache(); +} + +/** + * Enforce client-side rate limiting. + * + * Returns an object: + * - { ok: true } when a search can proceed now. + * - { ok: false, reason, retryAfterMs } when the caller should block. + */ +function checkRateLimit() { + const now = Date.now(); + + // Clean up any timestamps older than 60 seconds. + rateLimitState.recentSearches = rateLimitState.recentSearches.filter( + (t) => now - t <= 60 * 1000 + ); + + // Enforce max 1 per second. + if (now - rateLimitState.lastSearchTime < 1000) { + return { + ok: false, + reason: "You can run at most 1 search per second.", + retryAfterMs: 1000 - (now - rateLimitState.lastSearchTime) + }; + } + + // Enforce max 5 per rolling minute. + if (rateLimitState.recentSearches.length >= 5) { + const oldest = rateLimitState.recentSearches[0]; + const retryAfterMs = 60 * 1000 - (now - oldest); + return { + ok: false, + reason: "You can run at most 5 searches per minute.", + retryAfterMs: retryAfterMs > 0 ? retryAfterMs : 0 + }; + } + + // Record successful slot reservation. + rateLimitState.lastSearchTime = now; + rateLimitState.recentSearches.push(now); + return { ok: true }; +} + +function formatRetryTime(ms) { + const seconds = Math.ceil(ms / 1000); + if (seconds <= 1) return "about 1 second"; + if (seconds < 60) return `about ${seconds} seconds`; + const minutes = Math.ceil(seconds / 60); + return minutes === 1 ? "about 1 minute" : `about ${minutes} minutes`; +} + +const PAGE_LIMIT = 50; // Never exceed 100 per RERUM guidance. + +async function performRerumSearch({ searchText, usePhrase, limit = PAGE_LIMIT, skip = 0 }) { + const cacheKey = makeCacheKey(searchText, usePhrase, limit, skip); + + const cached = getCachedResults(cacheKey); + if (cached) { + return { + results: cached, + fromCache: true + }; + } + + const endpoint = usePhrase + ? `${RERUM_API_BASE}/api/search/phrase?limit=${limit}&skip=${skip}` + : `${RERUM_API_BASE}/api/search?limit=${limit}&skip=${skip}`; + + const resp = await fetch(endpoint, { + method: "POST", + headers: { + "Content-Type": "application/json; charset=utf-8" + }, + body: JSON.stringify({ + searchText: searchText + }) + }); + + if (!resp.ok) { + throw new Error(`RERUM search failed with status ${resp.status}`); + } + + const data = await resp.json(); + if (!Array.isArray(data)) { + throw new Error("Unexpected search response format from RERUM."); + } + + setCachedResults(cacheKey, data); + + return { + results: data, + fromCache: false + }; +} + +function extractDisplayData(result) { + const id = result["@id"] || result.id || ""; + const type = result.type || ""; + + let bodyText = ""; + if (result.body && typeof result.body === "object") { + if (typeof result.body.value === "string") { + bodyText = result.body.value; + } else if (Array.isArray(result.body)) { + const firstVal = result.body.find((b) => typeof b.value === "string"); + bodyText = firstVal ? firstVal.value : ""; + } + } + if (!bodyText && typeof result.bodyValue === "string") { + bodyText = result.bodyValue; + } + + // Attempt to surface an associated IIIF or Web Annotation target. + let target = result.target || result.on || null; + + return { + id, + type, + bodyText, + target + }; +} + +function renderResults(results, filterText) { + const container = document.getElementById("results-container"); + const summary = document.getElementById("results-summary"); + if (!container || !summary) return; + + container.innerHTML = ""; + + const trimmedFilter = (filterText || "").trim().toLowerCase(); + + const processed = results.map(extractDisplayData); + const visible = trimmedFilter + ? processed.filter((item) => { + const haystack = [ + item.bodyText, + item.id, + item.type, + typeof item.target === "string" ? item.target : JSON.stringify(item.target || "") + ] + .join(" ") + .toLowerCase(); + return haystack.includes(trimmedFilter); + }) + : processed; + + summary.textContent = + results.length === 0 + ? "No results." + : `${visible.length} of ${results.length} results shown${trimmedFilter ? " after client-side filtering" : "" + }.`; + + if (visible.length === 0) { + return; + } + + const list = document.createElement("div"); + list.className = "search-results"; + + visible.forEach((item) => { + const card = document.createElement("article"); + card.className = "card search-result"; + + const header = document.createElement("header"); + const title = document.createElement("h4"); + title.textContent = item.bodyText || "(No body text found)"; + header.appendChild(title); + + const meta = document.createElement("p"); + meta.className = "text-small"; + const typeText = item.type ? `Type: ${item.type}` : "Type: (unknown)"; + const idLink = + item.id && typeof item.id === "string" + ? `${item.id}` + : "(no @id)"; + meta.innerHTML = `${typeText}
@id: ${idLink}`; + + const targetEl = document.createElement("p"); + targetEl.className = "text-small"; + if (item.target) { + if (typeof item.target === "string") { + targetEl.innerHTML = `Target: ${item.target}`; + } else if (typeof item.target === "object") { + const id = item.target.id || item.target["@id"]; + if (typeof id === "string") { + targetEl.innerHTML = `Target: ${id}`; + } else { + targetEl.textContent = "Target: (complex IIIF / Web Annotation target)"; + } + } else { + targetEl.textContent = "Target: (unrecognized format)"; + } + } else { + targetEl.textContent = "Target: (not provided)"; + } + + card.appendChild(header); + card.appendChild(meta); + card.appendChild(targetEl); + + list.appendChild(card); + }); + + container.appendChild(list); +} + +function wireUpSearchUI() { + const form = document.getElementById("search-form"); + const searchInput = document.getElementById("search-text"); + const phraseCheckbox = document.getElementById("phrase-search"); + const searchButton = document.getElementById("search-button"); + const clearButton = document.getElementById("clear-results"); + const status = document.getElementById("search-status"); + const clientFilter = document.getElementById("client-filter"); + const loadMoreButton = document.getElementById("load-more"); + const paginationRow = document.getElementById("pagination-row"); + + if ( + !form || + !searchInput || + !phraseCheckbox || + !searchButton || + !clearButton || + !status || + !clientFilter || + !loadMoreButton || + !paginationRow + ) { + console.error("Search UI elements are missing from the DOM."); + return; + } + + // Prevent any automatic search on page load; searches only run + // in response to this explicit form submission. + let lastResults = []; + let totalLoaded = 0; + let lastQuery = null; + let rateLimitTimeoutId = null; + let isSearching = false; + + function setStatus(message, type = "info") { + status.textContent = message; + status.className = `text-small status-${type}`; + } + + function setSearching(isSearching) { + searchButton.disabled = isSearching; + loadMoreButton.disabled = isSearching; + form.querySelectorAll("input,button").forEach((el) => { + if (el === clientFilter || el === clearButton) return; + if (isSearching) { + el.setAttribute("aria-busy", "true"); + } else { + el.removeAttribute("aria-busy"); + } + }); + } + + function disableSearchTemporarily(reason, retryAfterMs) { + searchButton.disabled = true; + const when = formatRetryTime(retryAfterMs); + setStatus(`${reason} Please wait ${when} before trying again.`, "warning"); + + if (rateLimitTimeoutId) { + clearTimeout(rateLimitTimeoutId); + } + rateLimitTimeoutId = window.setTimeout(() => { + searchButton.disabled = false; + setStatus("You can run a new search now.", "info"); + rateLimitTimeoutId = null; + }, retryAfterMs); + } + + form.addEventListener("submit", async (event) => { + event.preventDefault(); + + if (isSearching) { + // A search is already in progress; ignore rapid repeat submits. + return; + } + + const searchText = searchInput.value.trim(); + if (!searchText) { + setStatus("Please enter text to search.", "error"); + return; + } + + // Check limits before doing anything. + const limitCheck = checkRateLimit(); + if (!limitCheck.ok) { + disableSearchTemporarily(limitCheck.reason, limitCheck.retryAfterMs || 1000); + return; + } + + const usePhrase = phraseCheckbox.checked; + + try { + isSearching = true; + setSearching(true); + setStatus("Running search…", "info"); + + const { results, fromCache } = await performRerumSearch({ + searchText, + usePhrase, + limit: PAGE_LIMIT, + skip: 0 + }); + + lastQuery = { searchText, usePhrase }; + lastResults = results; + totalLoaded = results.length; + clientFilter.value = ""; + renderResults(lastResults, ""); + + // Show or hide pagination controls based on whether we might have more. + if (results.length >= PAGE_LIMIT) { + paginationRow.style.display = ""; + loadMoreButton.disabled = false; + } else { + paginationRow.style.display = "none"; + } + + if (fromCache) { + setStatus( + `Loaded ${results.length} result${results.length === 1 ? "" : "s"} from cache (no new API call).`, + "success" + ); + } else { + setStatus( + `Fetched ${results.length} result${results.length === 1 ? "" : "s"} from RERUM.`, + "success" + ); + } + } catch (err) { + console.error("Search failed:", err); + setStatus( + "Search failed. Please try again in a moment. If this persists, RERUM may be unavailable.", + "error" + ); + } finally { + isSearching = false; + setSearching(false); + } + }); + + loadMoreButton.addEventListener("click", async () => { + if (isSearching || !lastQuery) { + return; + } + + // Check limits before doing anything. + const limitCheck = checkRateLimit(); + if (!limitCheck.ok) { + disableSearchTemporarily(limitCheck.reason, limitCheck.retryAfterMs || 1000); + return; + } + + try { + isSearching = true; + setSearching(true); + setStatus("Loading more results…", "info"); + + const { searchText, usePhrase } = lastQuery; + const { results, fromCache } = await performRerumSearch({ + searchText, + usePhrase, + limit: PAGE_LIMIT, + skip: totalLoaded + }); + + // Append new page and update counters. + if (results.length > 0) { + lastResults = lastResults.concat(results); + totalLoaded = lastResults.length; + renderResults(lastResults, clientFilter.value); + } + + if (results.length < PAGE_LIMIT) { + // No more pages expected. + paginationRow.style.display = "none"; + setStatus( + `All results loaded (${lastResults.length} total).${fromCache ? " Additional pages came from cache where available." : "" + }`, + "success" + ); + } else { + setStatus( + `${lastResults.length} results loaded so far.${fromCache ? " This page was served from cache." : "" + }`, + "success" + ); + } + } catch (err) { + console.error("Load more failed:", err); + setStatus( + "Loading more results failed. Please try again in a moment.", + "error" + ); + } finally { + isSearching = false; + setSearching(false); + } + }); + + clientFilter.addEventListener("input", () => { + renderResults(lastResults, clientFilter.value); + }); + + clearButton.addEventListener("click", () => { + lastResults = []; + lastQuery = null; + totalLoaded = 0; + renderResults(lastResults, ""); + clientFilter.value = ""; + paginationRow.style.display = "none"; + setStatus("Results cleared. Enter text and press Search to run a new query.", "info"); + }); + + // Initial UI state. + renderResults([], ""); + paginationRow.style.display = "none"; + setStatus( + "Enter text and press Search to query RERUM annotations. Results may be cached for up to 20 minutes.", + "info" + ); +} + +document.addEventListener("DOMContentLoaded", () => { + loadCache(); + wireUpSearchUI(); +}); + diff --git a/web/js/toolsCatalog.js b/web/js/toolsCatalog.js index c85ac20e..1864518b 100644 --- a/web/js/toolsCatalog.js +++ b/web/js/toolsCatalog.js @@ -37,6 +37,12 @@ const ToolsCatalog = [ icon: "./images/uv-logo.png", view: "https://universalviewer.io/", description: "A viewer for web objects, allowing users to share their media with the world." + }, + { + label: "Annotation Text Search", + icon: "./images/rerum_logo.png", + view: "./search.html", + description: "Search RERUM Web Annotations and IIIF text content with client-side caching and rate limits to protect the API." } ]; diff --git a/web/search.html b/web/search.html new file mode 100644 index 00000000..73a23302 --- /dev/null +++ b/web/search.html @@ -0,0 +1,125 @@ + + + + + + Annotation Text Search | RERUM Playground + + + + + + + + + + + + + + +
+
+
+ + Rerum Playground +
+
+
+ +
+
+

Annotation Text Search

+

+ Search Web Annotations and IIIF resources stored in RERUM by text + appearing in their bodies. Results are cached client-side to avoid + duplicate searches and are rate limited to protect the RERUM API. +

+
+ +
+

Search Controls

+
+
+
+ + +
+
+ +
+
+ +
+
+ +
+
+ + + + This filter never triggers new API calls; it only narrows + the results already loaded on the page. + +
+
+ +
+
+ + +
+
+ +

+
+
+ +
+

Results

+

+
+
+
+ +
+
+
+
+ + + + +