Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
73 changes: 66 additions & 7 deletions api/src/processing/services/pinterest.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ import { genericUserAgent } from "../../config.js";
import { resolveRedirectingURL } from "../url.js";

const videoRegex = /"url":"(https:\/\/v1\.pinimg\.com\/videos\/.*?)"/g;
const imageRegex = /src="(https:\/\/i\.pinimg\.com\/.*\.(jpg|gif))"/g;
const imageRegex = /src="(https:\/\/i\.pinimg\.com\/(?:\d+x|orig)\/[0-9a-f/]{41}\.(jpg|gif))"/g;
const notFoundRegex = /"__typename"\s*:\s*"PinNotFound"/;

export default async function(o) {
Expand Down Expand Up @@ -36,14 +36,73 @@ export default async function(o) {
audioFilename: `pinterest_${id}_audio`
}

const imageLink = [...html.matchAll(imageRegex)]
.map(([, link]) => link)
.find(a => a.endsWith('.jpg') || a.endsWith('.gif'));
const allImageMatches = [...html.matchAll(imageRegex)];

if (allImageMatches.length === 0) {
// Fallback to broader regex if precise one finds nothing
const fallbackRegex = /src="(https:\/\/i\.pinimg\.com\/.*\.(jpg|gif))"/g;
const fallbackMatches = [...html.matchAll(fallbackRegex)];

if (fallbackMatches.length > 0) {
// Use first fallback image
const fallbackUrl = fallbackMatches[0][1];
const imageType = fallbackUrl.endsWith(".gif") ? "gif" : "jpg";
return {
urls: fallbackUrl,
isPhoto: true,
filename: `pinterest_${id}.${imageType}`
};
}

return { error: "fetch.empty" };
}

const imageType = imageLink.endsWith(".gif") ? "gif" : "jpg"
// Step 1: Get the first image (always main content)
const firstImageUrl = allImageMatches[0][1];

// Step 2: Extract the image hash/identifier
const hashMatch = firstImageUrl.match(/\/(?:\d+x|orig)\/([0-9a-f]{2}\/[0-9a-f]{2}\/[0-9a-f]{2}\/[0-9a-f]{32})\.(jpg|gif)/);

if (!hashMatch) {
// Fallback to first image if we can't parse the hash
const imageType = firstImageUrl.endsWith(".gif") ? "gif" : "jpg";
return {
urls: firstImageUrl,
isPhoto: true,
filename: `pinterest_${id}.${imageType}`
};
}

const imageHash = hashMatch[1]; // e.g., "7c/0a/1c/7c0a1c5f1c999a4a67f3c5b847da093c"
const extension = hashMatch[2];

// Step 3: Find all variations of this specific image
const sameImageUrls = allImageMatches
.map(([, url]) => url)
.filter(url => url.includes(imageHash))
.filter(url => url.endsWith(`.${extension}`));

// Step 4: Sort by quality and take the best
const bestQualityUrl = sameImageUrls.sort((a, b) => {
const getQualityScore = (url) => {
// Check for originals (highest quality)
if (url.includes('/orig/')) return Infinity;

// Extract resolution number (e.g., "736" from "/736x/")
const resolutionMatch = url.match(/\/(\d+)x\//);
if (resolutionMatch) {
return parseInt(resolutionMatch[1], 10);
}

return 0;
};
return getQualityScore(b) - getQualityScore(a);
})[0];

const imageType = extension;

if (imageLink) return {
urls: imageLink,
if (bestQualityUrl) return {
urls: bestQualityUrl,
isPhoto: true,
filename: `pinterest_${id}.${imageType}`
}
Expand Down