diff --git a/api/src/processing/services/pinterest.js b/api/src/processing/services/pinterest.js index c21400e1a..94c31de0c 100644 --- a/api/src/processing/services/pinterest.js +++ b/api/src/processing/services/pinterest.js @@ -2,7 +2,7 @@ import { genericUserAgent } from "../../config.js"; import { resolveRedirectingURL } from "../url.js"; const videoRegex = /"url":"(https:\/\/v1\.pinimg\.com\/videos\/.*?)"/g; -const imageRegex = /src="(https:\/\/i\.pinimg\.com\/.*\.(jpg|gif))"/g; +const imageRegex = /src="(https:\/\/i\.pinimg\.com\/(?:\d+x|orig)\/[0-9a-f/]{41}\.(jpg|gif))"/g; const notFoundRegex = /"__typename"\s*:\s*"PinNotFound"/; export default async function(o) { @@ -36,14 +36,73 @@ export default async function(o) { audioFilename: `pinterest_${id}_audio` } - const imageLink = [...html.matchAll(imageRegex)] - .map(([, link]) => link) - .find(a => a.endsWith('.jpg') || a.endsWith('.gif')); + const allImageMatches = [...html.matchAll(imageRegex)]; + + if (allImageMatches.length === 0) { + // Fallback to broader regex if precise one finds nothing + const fallbackRegex = /src="(https:\/\/i\.pinimg\.com\/.*\.(jpg|gif))"/g; + const fallbackMatches = [...html.matchAll(fallbackRegex)]; + + if (fallbackMatches.length > 0) { + // Use first fallback image + const fallbackUrl = fallbackMatches[0][1]; + const imageType = fallbackUrl.endsWith(".gif") ? "gif" : "jpg"; + return { + urls: fallbackUrl, + isPhoto: true, + filename: `pinterest_${id}.${imageType}` + }; + } + + return { error: "fetch.empty" }; + } - const imageType = imageLink.endsWith(".gif") ? "gif" : "jpg" + // Step 1: Get the first image (always main content) + const firstImageUrl = allImageMatches[0][1]; + + // Step 2: Extract the image hash/identifier + const hashMatch = firstImageUrl.match(/\/(?:\d+x|orig)\/([0-9a-f]{2}\/[0-9a-f]{2}\/[0-9a-f]{2}\/[0-9a-f]{32})\.(jpg|gif)/); + + if (!hashMatch) { + // Fallback to first image if we can't parse the hash + const imageType = firstImageUrl.endsWith(".gif") ? "gif" : "jpg"; + return { + urls: firstImageUrl, + isPhoto: true, + filename: `pinterest_${id}.${imageType}` + }; + } + + const imageHash = hashMatch[1]; // e.g., "7c/0a/1c/7c0a1c5f1c999a4a67f3c5b847da093c" + const extension = hashMatch[2]; + + // Step 3: Find all variations of this specific image + const sameImageUrls = allImageMatches + .map(([, url]) => url) + .filter(url => url.includes(imageHash)) + .filter(url => url.endsWith(`.${extension}`)); + + // Step 4: Sort by quality and take the best + const bestQualityUrl = sameImageUrls.sort((a, b) => { + const getQualityScore = (url) => { + // Check for originals (highest quality) + if (url.includes('/orig/')) return Infinity; + + // Extract resolution number (e.g., "736" from "/736x/") + const resolutionMatch = url.match(/\/(\d+)x\//); + if (resolutionMatch) { + return parseInt(resolutionMatch[1], 10); + } + + return 0; + }; + return getQualityScore(b) - getQualityScore(a); + })[0]; + + const imageType = extension; - if (imageLink) return { - urls: imageLink, + if (bestQualityUrl) return { + urls: bestQualityUrl, isPhoto: true, filename: `pinterest_${id}.${imageType}` }