diff --git a/.gitignore b/.gitignore index d0e7014..198c663 100644 --- a/.gitignore +++ b/.gitignore @@ -238,3 +238,4 @@ $RECYCLE.BIN/ *.lnk /build +*output.txt diff --git a/src/index.ts b/src/index.ts index 3551cf5..86e9a81 100644 --- a/src/index.ts +++ b/src/index.ts @@ -1,14 +1,19 @@ import * as prompt from '@inquirer/prompts'; +import {ExitPromptError} from '@inquirer/prompts'; import { encryptPassword, fetchVerification, - login, SessionData, + login, TwoFactorInformation, TwoFactorRequired, VerificationData, verify2FA -} from "./instagram"; -import {ExitPromptError} from "@inquirer/prompts"; +} from "./instagram/login"; +import {FollowerFetcherEvent, FollowerFetcherEventTypes, getFollowerGraph, printGraph} from "./instagram/follower"; +import SessionData from "./instagram/session-data"; +import {fetchUser, User, UserGraph} from "./instagram/user"; +import {writeFileSync} from "node:fs"; +import {ReadableStream} from "node:stream/web"; async function authenticate(): Promise { @@ -67,6 +72,149 @@ async function readExistingSessionId(): Promise { } } +async function blobToDataUrl(blob: Blob) { + const buffer = Buffer.from(await blob.arrayBuffer()); + return new URL("data:" + blob.type + ';base64,' + buffer.toString('base64')); +} + +async function rootUser({session}) { + while (true) { + try { + const rootUsername = await prompt.input({ + message: "Starting point account username: ", + default: session.user.username + }) + + const rootUser = await fetchUser(rootUsername.trim(), session); + console.dir({ + ...rootUser, + profile: { + ...rootUser.profile, + image: await rootUser.profile.image.then(blobToDataUrl).then(url => url.href) + } + }) + + if (await prompt.confirm({message: "Continue with this user?", default: true})) { + return rootUser + } + } catch (e) { + if ((e instanceof ExitPromptError)) throw e; + + console.error(`Error: ${e.message ?? e}\n\nCould not load user. Try again.`) + } + } +} + +async function wholeNumberPrompt({message, defaultValue}: { message: string, defaultValue: number }) { + return prompt.input({ + message, + default: defaultValue.toString(10), + validate: input => /^\d*$/.test(input) + }).then(input => parseInt(input, 10)) +} + +async function settleGraph(graph: UserGraph) { + delete graph["canceled"] + + const downloads = Object.values(graph).map(async user => { + return { + ...user, + profile: { + ...user.profile, + image: await user.profile.image + .then(blobToDataUrl) + .catch((reason) => { + console.error({ + message: `Failed to download profile picture. (User: ${user.profile.username})`, + reason + }) + + return null; + }) + } + } + }) + + const settled: UserGraph = (await Promise.all(downloads)).reduce((graph, user) => { + graph[user.id] = user + return graph + }, {}) + + return settled +} + +const writeGraphToFile = async (root: User, graph: UserGraph) => { + const filename = `${root.id}:${root.profile.username}:${new Date().toISOString()}.json` + const data = await settleGraph(graph) + + try { + writeFileSync(filename, JSON.stringify(data, null, 2)) + console.log(`Wrote graph into ${filename}.`) + } catch (error) { + console.error({message: `Cannot write graph into ${filename}. Using stdout instead.`, error}) + await new Promise(resolve => setTimeout(() => { + console.log(JSON.stringify(data)); + resolve(undefined); + }, 500)) + } + + return filename +} + +async function streamGraph(stream: ReadableStream) { + let graph: UserGraph = {} + let cancellation: Promise + + const reader = stream.getReader() + + process.on('SIGINT', () => { + console.info("Process will terminate as soon as it is cleanly possible.") + reader.releaseLock() + stream.cancel(); + }); + + try { + while (stream.locked) { + const {done, value} = await reader.read() + if (done) break; + + graph = value.graph + + const identifier = `(User: ${value.user.profile.username})` + + if (value.type === FollowerFetcherEventTypes.DEPTH_LIMIT_FOLLOWER) { + console.log(`Reached the maximum amount of followers to include. Currently included are ${value.amount}. ${identifier}`) + } else if (value.type === FollowerFetcherEventTypes.DEPTH_LIMIT_FOLLOWING) { + console.log(`Reached the maximum amount of followed users to include. Currently included are ${value.amount}. ${identifier}`) + } else if (value.type === FollowerFetcherEventTypes.RATE_LIMIT_BATCH) { + printGraph(value.graph) + console.log(`Reached follower batch limit. Resuming after ${value.delay} milliseconds. ${identifier}`) + } else if (value.type === FollowerFetcherEventTypes.RATE_LIMIT_DAILY) { + printGraph(value.graph) + console.log(`Reached follower daily limit. Resuming after ${value.delay} milliseconds. ${identifier}`) + } else if (value.type === FollowerFetcherEventTypes.UPDATE) { + const total = Object.entries(value.graph).length + const followers = value.added.followers.length; + const users = value.added.users.length + + console.log( + `Added ${followers > 0 ? followers : 'no'} follower${followers > 1 ? 's' : ''} to ${value.user.profile.username}. ` + + `Discovered ${users > 0 ? users : 'no'} new user${users > 1 ? 's' : ''}. ` + + `Total user count: ${total}, completely queried users ${value.added.progress.done}.` + ) + } + } + } catch (e) { + if (stream.locked) { + reader.releaseLock() + cancellation = stream.cancel() + console.error(e) + } + } + + return {graph, cancellation} +} + try { const existingSession = await prompt.confirm({message: "Use an existing session id?", default: false}); @@ -76,6 +224,56 @@ try { if (await prompt.confirm({message: "Show session data?", default: false})) { console.dir({session}) } + + const root = await rootUser({session}) + + const generations = await wholeNumberPrompt({ + message: "Generations to include: ", defaultValue: 1 + }) + + const followers = await wholeNumberPrompt({ + message: "Maximal follower count to include for each user: ", defaultValue: 250 + }) + + const includeFollowing = await prompt.confirm({message: "Include following?", default: true}) + + const stream = getFollowerGraph({ + includeFollowing, + root, + session, + limits: { + depth: { + generations, + followers, + }, + rate: { + batchSize: 100, + batchCount: 2, + delay: { + pages: { + upper: 5000, + lower: 3000 + }, + batches: { + upper: 35 * 60 * 1000, + lower: 25 * 60 * 1000 + }, + daily: { + upper: 30 * 60 * 60 * 1000, + lower: 25 * 60 * 60 * 1000 + } + } + } + } + }) + + const {graph, cancellation} = await streamGraph(stream) + await Promise.all([writeGraphToFile(root, graph).then(() => { + console.info( + "The may process still needs to wait on the rate limiting timeouts to exit cleanly. " + + "Killing it should not cause any data lose." + ) + }), cancellation]) } catch (e) { if (!(e instanceof ExitPromptError)) { console.error(e) diff --git a/src/instagram/follower.ts b/src/instagram/follower.ts new file mode 100644 index 0000000..dbda6a1 --- /dev/null +++ b/src/instagram/follower.ts @@ -0,0 +1,402 @@ +import SessionData, {sessionToCookie} from "./session-data"; +import {RandomDelayLimit, Limits} from "./limits"; +import {downloadProfilePicture, User, UserGraph} from "./user"; +import {ReadableStream} from "node:stream/web"; +import {hasJsonBody} from "./request"; + +export enum FollowerFetcherEventTypes { + UPDATE, RATE_LIMIT_BATCH, RATE_LIMIT_DAILY, DEPTH_LIMIT_FOLLOWER, DEPTH_LIMIT_FOLLOWING +} + +export interface FollowerFetcherAddition { + followers: number[], + users: User[], + progress: { + done: number + } +} + +export interface FollowerFetcherEvent { + type: FollowerFetcherEventTypes, + user: User, + graph: UserGraph + added?: FollowerFetcherAddition, + delay?: number, + amount?: number +} + +function randomDelay(limit: RandomDelayLimit) { + if (limit.lower > limit.upper) { + const temp = limit.lower; + limit.lower = limit.upper; + limit.upper = temp + } + + const time = Math.floor(Math.random() * (limit.upper - limit.lower) + limit.lower); + return {time, delay: new Promise(resolve => setTimeout(resolve, time))} +} + + +async function rateLimiter({graph, user, phase, batchCount, limits, controller}: { + graph: UserGraph, + user: User, + phase: number, + batchCount: number + limits: Limits, + controller: ReadableStreamDefaultController +}) { + const phaseProgression = Math.floor( + Object.entries(graph).length / (limits.rate.batchSize - batchCount * 25) + ) + + if (phase < phaseProgression) { + printGraph(graph) + + if (phaseProgression > limits.rate.batchCount) { + const delay = randomDelay(limits.rate.delay.daily) + controller.enqueue({ + type: FollowerFetcherEventTypes.RATE_LIMIT_DAILY, + user: user, + delay: delay.time, + graph + }) + + await delay.delay + return 0 + } else { + const delay = randomDelay(limits.rate.delay.daily) + controller.enqueue({ + type: FollowerFetcherEventTypes.RATE_LIMIT_BATCH, + user: user, + delay: delay.time, + graph + }) + + await delay.delay + return phase + } + } + + // delay between retrieving the next follower page + await randomDelay(limits.rate.delay.pages).delay + + return phase +} + +export function printGraph(graph: UserGraph) { + console.table(Object.values(graph).map(user => { + return { + id: user.id, + username: user.profile.username, + private: user.private, + followerCount: user.followerIds?.length, + followers: user.followerIds?.map(id => graph[id].profile.username), + } + })) +} + +function addFollowerToGraph({graph, followers, done, target, controller}: { + graph: UserGraph, + followers: User[], + done: Set, + target: number, + controller: ReadableStreamDefaultController +},) { + const followerIds = new Set(graph[target].followerIds) + const additionalFollowers = followers + .map(follower => follower.id) + .filter(id => !followerIds.has(id)) + + graph[target].followerIds = [...followerIds, ...additionalFollowers] + const additionalUsers = followers.filter(follower => graph[follower.id] === undefined) + additionalUsers.forEach(user => graph[user.id] = user) + + additionalUsers.filter(follower => follower.private) + .map(follower => follower.id) + .forEach(id => done.add(id)) + + controller.enqueue({ + type: FollowerFetcherEventTypes.UPDATE, + user: graph[target], + added: { + followers: additionalFollowers, + users: additionalUsers, + progress: { + done: done.size + } + }, + graph + }) +} + +function addFollowingToGraph({graph, following, done, task, controller}: { + graph: UserGraph, + following: User[], + done: Set, + task: number, + controller: ReadableStreamDefaultController +},) { + following.filter(following => graph[following.id] !== undefined).forEach(user => addFollowerToGraph({ + graph, + followers: [graph[task]], + done, + controller, + target: user.id + })) + + following.filter(following => graph[following.id] === undefined).forEach(user => { + graph[user.id] = { + ...user, + followerIds: [task] + }; + + controller.enqueue({ + graph: {...graph}, + type: FollowerFetcherEventTypes.UPDATE, + user, + added: {users: [user], progress: {done: done.size}, followers: [task]} + }) + }) +} + +export function getFollowerGraph({root, session, limits, includeFollowing}: { + root: User, + session: SessionData, + includeFollowing: boolean, + limits: Limits +}): ReadableStream { + const graph: UserGraph = {[root.id]: root} + + let controller: ReadableStreamDefaultController + + return new ReadableStream({ + start: async (c: ReadableStreamDefaultController) => { + controller = c + + if (root.private) { + controller.enqueue({ + type: FollowerFetcherEventTypes.UPDATE, + user: root, + added: { + followers: [], + users: [root], + progress: { + done: 1 + } + }, + graph + }) + + controller.close() + return + } + + try { + await createFollowerGraph({limits, graph, session, controller, includeFollowing}); + } catch (e) { + controller.error(e) + return + } + + controller.close(); + }, + cancel: async () => { + graph.canceled = true + } + }) +} + +function excess(current: number, limit: number, addition: any[]) { + return addition.slice(addition.length - (current - limit)) +} + +async function createFollowerGraph({controller, limits, graph, session, includeFollowing}: { + controller: ReadableStreamDefaultController, + graph: UserGraph, + limits: Limits, + session: SessionData, + includeFollowing: boolean, +}) { + const done: Set = new Set() + let phase = 0 + + for (let i = 0; i <= limits.depth.generations && !graph.canceled; ++i) { + const open = Object.values(graph) + .filter(user => !done.has(user.id)) + .map(user => user.id) + + if (open.length < 1 || graph.canceled) break; // no open task, skip remaining generations + + while (open.length > 0 && !graph.canceled) { + const batchSize = Math.floor(limits.rate.batchSize / 100) + const batch = open.splice(0, batchSize < 1 ? 1 : batchSize).map(async task => { + graph[task].followerIds = graph[task].followerIds ?? [] + + const followers = async () => { + let nextPage = undefined + + while (nextPage !== null && !graph.canceled) { + const followers = await fetchFollowers({ + session, + targetUser: graph[task], + nextPage, + limits, + direction: FollowerDirection.FOLLOWER + }) + + addFollowerToGraph({graph, followers: followers.page, done, target: task, controller}) + + nextPage = followers.nextPage + + phase = await rateLimiter({ + graph, + user: graph[task], + phase, + limits: limits, + batchCount: batch.length, + controller, + }) + + const userFollowerCount = graph[task].followerIds.length; + if (limits.depth.followers > 0 && userFollowerCount >= limits.depth.followers) { + excess(userFollowerCount, limits.depth.followers, followers.page) + .forEach(user => done.add(user.id)) + + controller.enqueue({ + type: FollowerFetcherEventTypes.DEPTH_LIMIT_FOLLOWER, + user: graph[task], + graph, + amount: userFollowerCount + }) + break; + } + } + } + + const following = async () => { + let nextPage = undefined + let followingCount = 0 + + while (nextPage !== null && !graph.canceled) { + const following = await fetchFollowers({ + session, + targetUser: graph[task], + nextPage, + limits, + direction: FollowerDirection.FOLLOWING + }) + + addFollowingToGraph({ + graph, + following: following.page, + done, + controller, + task: graph[task].id + }) + + phase = await rateLimiter({ + graph, + user: graph[task], + phase, + batchCount: batch.length, + limits, + controller + }) + + followingCount += following.page.length + + if (limits.depth.followers > 0 && followingCount >= limits.depth.followers) { + excess(followingCount, limits.depth.followers, following.page) + .forEach(user => done.add(user.id)) + + controller.enqueue({ + type: FollowerFetcherEventTypes.DEPTH_LIMIT_FOLLOWING, + user: graph[task], + graph: {...graph}, + amount: followingCount + }) + break; + } + + nextPage = following.nextPage; + } + } + + await Promise.all([followers(), (includeFollowing ? following() : Promise.resolve())]) + + done.add(task); + }); + + await Promise.all(batch) + } + } + + return graph +} + +enum FollowerDirection { + FOLLOWER, FOLLOWING +} + +async function fetchFollowers({session, targetUser, nextPage, direction, limits}: { + session: SessionData, targetUser: User, nextPage?: string, direction: FollowerDirection, limits: Limits +}): Promise<{ page: User[], nextPage: string }> { + const query = nextPage ? `?max_id=${nextPage}` : ''; + const directionPath = direction === FollowerDirection.FOLLOWING ? 'following' : 'followers' + + const response = await fetch(`https://www.instagram.com/api/v1/friendships/${targetUser.id}/${directionPath}/${query}`, { + headers: { + "Sec-Fetch-Site": "same-origin", + "X-IG-App-ID": "936619743392459", + "Cookie": sessionToCookie(session), + } + }) + + if (!response.ok) { + if (hasJsonBody(response)) { + const data = (await response.json()) as { + message?: string, + require_login?: boolean + } + + if (data.require_login) throw Error("Authentication failure while querying followers. Check your session id again.") + + throw Error( + data.message ?? + `Received status code ${response.status} (${response.statusText}) while querying followers. ` + + `The response contained the following: ${data}`) + } else { + throw Error(await response.text() ?? 'Failed to load followers.') + } + } + + const page = (await response.json()) as { + users: { + id: string, + full_name: string, + username: string, + profile_pic_url: string, + is_private: boolean + }[], + next_max_id?: string | null + } + + return { + page: page.users.map((user) => { + return { + id: parseInt(user.id, 10), + profile: { + username: user.username, + name: user.full_name, + image: randomDelay({ + lower: 0, + upper: limits.rate.delay.pages.upper + }).delay.then(() => downloadProfilePicture(user.profile_pic_url)) + }, + public: !user.is_private, + private: user.is_private && targetUser.id != session.user.id + } + }), + nextPage: page.next_max_id ?? null + } +} diff --git a/src/instagram/limits.ts b/src/instagram/limits.ts new file mode 100644 index 0000000..ae9dc04 --- /dev/null +++ b/src/instagram/limits.ts @@ -0,0 +1,20 @@ +export interface RandomDelayLimit { + upper: number, + lower: number +} + +export interface Limits { + depth: { + generations: number, + followers: number, + } + rate: { + batchSize: number, + batchCount: number, + delay: { + daily: RandomDelayLimit, + batches: RandomDelayLimit, + pages: RandomDelayLimit + } + } +} diff --git a/src/instagram.ts b/src/instagram/login.ts similarity index 95% rename from src/instagram.ts rename to src/instagram/login.ts index f9fea96..84933f1 100644 --- a/src/instagram.ts +++ b/src/instagram/login.ts @@ -1,5 +1,7 @@ import hexToArrayBuffer from "hex-to-array-buffer"; import sealBox from "tweetnacl-sealedbox-js"; +import SessionData from "./session-data"; +import {hasJsonBody} from "./request"; const crypto = globalThis.crypto const encoder = new TextEncoder() @@ -13,14 +15,6 @@ export class TwoFactorRequired extends Error { } } -export interface SessionData extends Record { - user: { - id: number, - username?: string - }, - id: string -} - export interface TwoFactorInformation { identifier: string, user: { @@ -130,9 +124,6 @@ function getSessionId(response: Response): string { .substring(identifier.length) } -function hasJsonBody(response: Response): boolean { - return response.headers.get("Content-Type").startsWith("application/json;") -} export async function login({user, password, verification}: { user: string, diff --git a/src/instagram/request.ts b/src/instagram/request.ts new file mode 100644 index 0000000..0735c31 --- /dev/null +++ b/src/instagram/request.ts @@ -0,0 +1,3 @@ +export function hasJsonBody(response: Response): boolean { + return response.headers.get("Content-Type").startsWith("application/json;") +} diff --git a/src/instagram/session-data.ts b/src/instagram/session-data.ts new file mode 100644 index 0000000..381b0d6 --- /dev/null +++ b/src/instagram/session-data.ts @@ -0,0 +1,11 @@ +export default interface SessionData extends Record { + user: { + id: number, + username?: string + }, + id: string +} + +export function sessionToCookie(session?: SessionData | undefined) { + return session ? `sessionid=${session.id}; ds_user_id=${session.user.id}` : '' +} diff --git a/src/instagram/user.ts b/src/instagram/user.ts new file mode 100644 index 0000000..55f81ec --- /dev/null +++ b/src/instagram/user.ts @@ -0,0 +1,74 @@ +import SessionData, {sessionToCookie} from "./session-data"; + +export interface User { + id: number, + profile: { + name: string, + username: string, + image: Promise | null, + } + followerIds?: number[], + private?: boolean, + public: boolean, + personal?: boolean +} + +export interface UserGraph extends Record { + canceled?: boolean +} + +export async function fetchUser(username: string, session?: SessionData): Promise { + const response = await fetch(`https://www.instagram.com/api/v1/users/web_profile_info/?username=${username}`, { + headers: { + "Sec-Fetch-Site": "same-origin", + "X-IG-App-ID": "936619743392459", + "Cookie": sessionToCookie(session) + } + }) + + const user = (await response.json() as { + data: { + user: { + id: string, + full_name: string, + username: string, + profile_pic_url: string, + is_private: boolean, + followed_by_viewer: boolean, + is_business_account: boolean, + is_professional_account: boolean + } + } + }).data.user + + const mapped = { + id: parseInt(user.id, 10), + profile: { + name: user.full_name, + username: user.username, + image: downloadProfilePicture(user.profile_pic_url) + }, + personal: !user.is_business_account && !user.is_professional_account, + public: !user.is_private + }; + + if (session) mapped["private"] = mapped.id !== session.user.id && !user.followed_by_viewer && user.is_private; + + return mapped; +} + +export async function downloadProfilePicture(source: string | undefined): Promise | null { + if (!source) return null + + const response = await fetch(source, { + headers: { + "Sec-Fetch-Site": "same-origin", + } + }) + + if (!response.ok) { + throw Error(await response.text()) + } + + return await response.blob() +} diff --git a/test/instagram.test.ts b/test/instagram/login.test.ts similarity index 98% rename from test/instagram.test.ts rename to test/instagram/login.test.ts index 99a5069..b171d80 100644 --- a/test/instagram.test.ts +++ b/test/instagram/login.test.ts @@ -4,9 +4,10 @@ import { encryptPassword, fetchVerification, InstagramEncryptionKey, - login, SessionData, TwoFactorInformation, TwoFactorRequired, + login, TwoFactorInformation, TwoFactorRequired, VerificationData, verify2FA -} from "../src/instagram"; +} from "../../src/instagram/login"; +import SessionData from "../../src/instagram/session-data"; interface PasswordEncryption { password: string,