From 81857f4e04fb411f96585fe186903e88018fa21e Mon Sep 17 00:00:00 2001 From: Fabian Haas Date: Wed, 1 May 2024 04:22:42 +0200 Subject: [PATCH] download profile pictures Cannot use the original link due to cross-origin policies, convert them to data urls instead --- src/index.ts | 53 +++++++++++++++++++++++++++++++++------ src/instagram/follower.ts | 13 +++++++--- src/instagram/user.ts | 20 +++++++++++++-- 3 files changed, 73 insertions(+), 13 deletions(-) diff --git a/src/index.ts b/src/index.ts index 42b77ad..291606e 100644 --- a/src/index.ts +++ b/src/index.ts @@ -70,6 +70,10 @@ async function readExistingSessionId(): Promise { } } +async function blobToDataUrl(blob: Blob) { + const buffer = Buffer.from(await blob.arrayBuffer()); + return new URL("data:" + blob.type + ';base64,' + buffer.toString('base64')); +} async function rootUser({session}) { while (true) { @@ -80,7 +84,13 @@ async function rootUser({session}) { }) const rootUser = await fetchUser(rootUsername.trim(), session); - console.dir({...rootUser, profile: {...rootUser.profile, imageURL: rootUser.profile.imageURL.href}}) + console.dir({ + ...rootUser, + profile: { + ...rootUser.profile, + image: await rootUser.profile.image.then(blobToDataUrl).then(url => url.href) + } + }) if (await prompt.confirm({message: "Continue with this user?", default: true})) { return rootUser @@ -103,11 +113,34 @@ async function wholeNumberPrompt({message, defaultValue}: { message: string, def let graph: UserGraph = {} -process.on('SIGINT', function() { - console.log(JSON.stringify(graph)) - printGraph(graph) - process.exit(0) -}); +async function dumpGraph(graph: UserGraph) { + const downloads = Object.values(graph).map(async user => { + return { + ...user, + profile: { + ...user.profile, + image: await user.profile.image + .then(blobToDataUrl) + .catch((reason) => { + console.error({ + message: `Failed to download profile picture. (User: ${user.profile.username})`, + reason + }) + + return null; + }) + } + } + }) + + const dump: UserGraph = (await Promise.all(downloads)).reduce((graph, user) => { + graph[user.id] = user + return graph + }, {}) + + return JSON.stringify(dump) +} + try { const existingSession = await prompt.confirm({message: "Use an existing session id?", default: false}); @@ -160,6 +193,12 @@ try { } }).getReader() + process.on('SIGINT', async () => { + console.log(await dumpGraph(graph)) + printGraph(graph) + process.exit(0) + }); + while (true) { const {done, value} = await reader.read() if (done) break; @@ -198,5 +237,5 @@ try { } } -console.log(JSON.stringify(graph)) +console.log(await dumpGraph(graph)) printGraph(graph) diff --git a/src/instagram/follower.ts b/src/instagram/follower.ts index eb8edc6..2b06234 100644 --- a/src/instagram/follower.ts +++ b/src/instagram/follower.ts @@ -1,6 +1,6 @@ import SessionData, {sessionToCookie} from "./session-data"; import {RandomDelayLimit, Limits} from "./limits"; -import {User, UserGraph} from "./user"; +import {downloadProfilePicture, User, UserGraph} from "./user"; import {ReadableStream} from "node:stream/web"; import {hasJsonBody} from "./request"; @@ -227,6 +227,7 @@ async function createFollowerGraph({controller, limits, graph, session, includeF session, targetUser: graph[task], nextPage, + limits, direction: FollowerDirection.FOLLOWER }) @@ -268,6 +269,7 @@ async function createFollowerGraph({controller, limits, graph, session, includeF session, targetUser: graph[task], nextPage, + limits, direction: FollowerDirection.FOLLOWING }) @@ -324,8 +326,8 @@ enum FollowerDirection { FOLLOWER, FOLLOWING } -async function fetchFollowers({session, targetUser, nextPage, direction}: { - session: SessionData, targetUser: User, nextPage?: string, direction: FollowerDirection +async function fetchFollowers({session, targetUser, nextPage, direction, limits}: { + session: SessionData, targetUser: User, nextPage?: string, direction: FollowerDirection, limits: Limits }): Promise<{ page: User[], nextPage: string }> { const query = nextPage ? `?max_id=${nextPage}` : ''; const directionPath = direction === FollowerDirection.FOLLOWING ? 'following' : 'followers' @@ -374,7 +376,10 @@ async function fetchFollowers({session, targetUser, nextPage, direction}: { profile: { username: user.username, name: user.full_name, - imageURL: new URL(user.profile_pic_url) + image: randomDelay({ + lower: 0, + upper: limits.rate.delay.pages.upper + }).delay.then(() => downloadProfilePicture(user.profile_pic_url)) }, public: !user.is_private, private: user.is_private && targetUser.id != session.user.id diff --git a/src/instagram/user.ts b/src/instagram/user.ts index 4932bee..4614918 100644 --- a/src/instagram/user.ts +++ b/src/instagram/user.ts @@ -5,7 +5,7 @@ export interface User { profile: { name: string, username: string, - imageURL: URL | null, + image: Promise | null, } followerIds?: number[], private?: boolean, @@ -44,7 +44,7 @@ export async function fetchUser(username: string, session?: SessionData): Promis profile: { name: user.full_name, username: user.username, - imageURL: user.profile_pic_url ? new URL(user.profile_pic_url) : null, + image: downloadProfilePicture(user.profile_pic_url) }, personal: !user.is_business_account && !user.is_professional_account, public: !user.is_private @@ -54,3 +54,19 @@ export async function fetchUser(username: string, session?: SessionData): Promis return mapped; } + +export async function downloadProfilePicture(source: string | undefined): Promise | null { + if (!source) return null + + const response = await fetch(source, { + headers: { + "Sec-Fetch-Site": "same-origin", + } + }) + + if (!response.ok) { + throw Error(await response.text()) + } + + return await response.blob() +}