From 3040f289f9d086f3566a015bb2c8f405229f1e3c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?SlKz=E1=B5=8D=E1=B5=90?= <105301169+slkzgm@users.noreply.github.com> Date: Mon, 20 Jan 2025 15:45:03 +0100 Subject: [PATCH] features: Add methods to retrieve quoted tweets and retweeters (#54) * feature: Implementing getRetweetersOfTweet() method * feature: Implementing getAllQuotedTweets() method --- src/scraper.ts | 56 ++++++++++++++++ src/search.ts | 120 +++++++++++++++++++++++++++++++++ src/tweets.ts | 179 +++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 355 insertions(+) diff --git a/src/scraper.ts b/src/scraper.ts index 131beb7..9546f3f 100644 --- a/src/scraper.ts +++ b/src/scraper.ts @@ -14,10 +14,12 @@ import { Profile, } from './profile'; import { + fetchQuotedTweetsPage, fetchSearchProfiles, fetchSearchTweets, SearchMode, searchProfiles, + searchQuotedTweets, searchTweets, } from './search'; import { @@ -54,6 +56,8 @@ import { createCreateNoteTweetRequest, createCreateLongTweetRequest, getArticle, + getAllRetweeters, + Retweeter, } from './tweets'; import { parseTimelineTweetsV2, @@ -1050,4 +1054,56 @@ export class Scraper { public async grokChat(options: GrokChatOptions): Promise { return await grokChat(options, this.auth); } + + /** + * Retrieves all users who retweeted the given tweet. + * @param tweetId The ID of the tweet. + * @returns An array of users (retweeters). + */ + public async getRetweetersOfTweet(tweetId: string): Promise { + return await getAllRetweeters(tweetId, this.auth); + } + + /** + * Fetches all tweets quoting a given tweet ID by chaining requests + * until no more pages are available. + * @param quotedTweetId The tweet ID to find quotes of. + * @param maxTweetsPerPage Max tweets per page (default 20). + * @returns An array of all Tweet objects referencing the given tweet. + */ + public async getAllQuotedTweets( + quotedTweetId: string, + maxTweetsPerPage = 20, + ): Promise { + const allQuotes: Tweet[] = []; + let cursor: string | undefined; + let prevCursor: string | undefined; + + while (true) { + const page = await fetchQuotedTweetsPage( + quotedTweetId, + maxTweetsPerPage, + this.auth, + cursor, + ); + + // If there's no new tweets, stop + if (!page.tweets || page.tweets.length === 0) { + break; + } + + allQuotes.push(...page.tweets); + + // If next is missing or same => stop + if (!page.next || page.next === cursor || page.next === prevCursor) { + break; + } + + // Move cursors + prevCursor = cursor; + cursor = page.next; + } + + return allQuotes; + } } diff --git a/src/search.ts b/src/search.ts index ce04536..3682764 100644 --- a/src/search.ts +++ b/src/search.ts @@ -153,3 +153,123 @@ async function getSearchTimeline( return res.value; } + +/** + * Fetches one page of tweets that quote a given tweet ID. + * This function does not handle pagination. + * All comments must remain in English. + * + * @param quotedTweetId The tweet ID you want quotes of. + * @param maxTweets Maximum number of tweets to return in one page. + * @param auth The TwitterAuth object. + * @param cursor Optional pagination cursor for fetching further pages. + * @returns A promise that resolves to a QueryTweetsResponse containing tweets and the next cursor. + */ +export async function fetchQuotedTweetsPage( + quotedTweetId: string, + maxTweets: number, + auth: TwitterAuth, + cursor?: string, +): Promise { + if (maxTweets > 50) { + maxTweets = 50; + } + + // Build the rawQuery and variables + const variables: Record = { + rawQuery: `quoted_tweet_id:${quotedTweetId}`, + count: maxTweets, + querySource: 'tdqt', + product: 'Top', + }; + + if (cursor && cursor !== '') { + variables.cursor = cursor; + } + + const features = addApiFeatures({ + profile_label_improvements_pcf_label_in_post_enabled: true, + rweb_tipjar_consumption_enabled: true, + responsive_web_graphql_exclude_directive_enabled: true, + verified_phone_label_enabled: false, + creator_subscriptions_tweet_preview_api_enabled: true, + responsive_web_graphql_timeline_navigation_enabled: true, + responsive_web_graphql_skip_user_profile_image_extensions_enabled: false, + premium_content_api_read_enabled: false, + communities_web_enable_tweet_community_results_fetch: true, + c9s_tweet_anatomy_moderator_badge_enabled: true, + responsive_web_grok_analyze_button_fetch_trends_enabled: false, + responsive_web_grok_analyze_post_followups_enabled: true, + responsive_web_jetfuel_frame: false, + responsive_web_grok_share_attachment_enabled: true, + articles_preview_enabled: true, + responsive_web_edit_tweet_api_enabled: true, + graphql_is_translatable_rweb_tweet_is_translatable_enabled: true, + view_counts_everywhere_api_enabled: true, + longform_notetweets_consumption_enabled: true, + responsive_web_twitter_article_tweet_consumption_enabled: true, + tweet_awards_web_tipping_enabled: false, + creator_subscriptions_quote_tweet_preview_enabled: false, + freedom_of_speech_not_reach_fetch_enabled: true, + standardized_nudges_misinfo: true, + tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled: + true, + rweb_video_timestamps_enabled: true, + longform_notetweets_rich_text_read_enabled: true, + longform_notetweets_inline_media_enabled: true, + responsive_web_grok_image_annotation_enabled: false, + responsive_web_enhance_cards_enabled: false, + }); + + const fieldToggles: Record = { + withArticleRichContentState: false, + }; + + const params = new URLSearchParams(); + params.set('features', stringify(features) ?? ''); + params.set('fieldToggles', stringify(fieldToggles) ?? ''); + params.set('variables', stringify(variables) ?? ''); + + const url = `https://x.com/i/api/graphql/1BP5aKg8NvTNvRCyyCyq8g/SearchTimeline?${params.toString()}`; + + // Perform the request + const res = await requestApi(url, auth); + if (!res.success) { + throw res.err; + } + + // Force cast for TypeScript + const timeline = res.value as any; + // Use parseSearchTimelineTweets to convert timeline data + return parseSearchTimelineTweets(timeline); +} + +/** + * Creates an async generator, yielding pages of quotes for a given tweet ID. + * It prevents infinite loop by checking if the next cursor hasn't changed. + */ +export async function* searchQuotedTweets( + quotedTweetId: string, + maxTweets: number, + auth: TwitterAuth, +): AsyncGenerator { + let cursor: string | undefined; + + while (true) { + const response = await fetchQuotedTweetsPage( + quotedTweetId, + maxTweets, + auth, + cursor, + ); + yield response; + + // Prevent infinite loop if the API keeps returning the same cursor + if (!response.next || response.next === cursor) { + break; + } + + // Update cursor for the next iteration + cursor = response.next; + } +} diff --git a/src/tweets.ts b/src/tweets.ts index 36f8ba3..64e1f72 100644 --- a/src/tweets.ts +++ b/src/tweets.ts @@ -195,6 +195,13 @@ export interface Tweet { poll?: PollV2 | null; } +export interface Retweeter { + rest_id: string; + screen_name: string; + name: string; + description?: string; +} + export type TweetQuery = | Partial | ((tweet: Tweet) => boolean | Promise); @@ -1511,3 +1518,175 @@ export async function getArticle( const articles = parseArticle(res.value); return articles.find((article) => article.id === id) ?? null; } + +/** + * Fetches a single page of retweeters for a given tweet, collecting both bottom and top cursors. + * Logs each user's description in the process. + * All comments must remain in English. + */ +export async function fetchRetweetersPage( + tweetId: string, + auth: TwitterAuth, + cursor?: string, + count = 40, +): Promise<{ + retweeters: Retweeter[]; + bottomCursor?: string; + topCursor?: string; +}> { + const baseUrl = + 'https://twitter.com/i/api/graphql/VSnHXwLGADxxtetlPnO7xg/Retweeters'; + + // Build query parameters + const variables = { + tweetId, + count, + cursor, + includePromotedContent: true, + }; + const features = { + profile_label_improvements_pcf_label_in_post_enabled: true, + rweb_tipjar_consumption_enabled: true, + responsive_web_graphql_exclude_directive_enabled: true, + verified_phone_label_enabled: false, + creator_subscriptions_tweet_preview_api_enabled: true, + responsive_web_graphql_timeline_navigation_enabled: true, + responsive_web_graphql_skip_user_profile_image_extensions_enabled: false, + premium_content_api_read_enabled: false, + communities_web_enable_tweet_community_results_fetch: true, + c9s_tweet_anatomy_moderator_badge_enabled: true, + responsive_web_grok_analyze_button_fetch_trends_enabled: false, + responsive_web_grok_analyze_post_followups_enabled: true, + responsive_web_jetfuel_frame: false, + responsive_web_grok_share_attachment_enabled: true, + articles_preview_enabled: true, + responsive_web_edit_tweet_api_enabled: true, + graphql_is_translatable_rweb_tweet_is_translatable_enabled: true, + view_counts_everywhere_api_enabled: true, + longform_notetweets_consumption_enabled: true, + responsive_web_twitter_article_tweet_consumption_enabled: true, + tweet_awards_web_tipping_enabled: false, + creator_subscriptions_quote_tweet_preview_enabled: false, + freedom_of_speech_not_reach_fetch_enabled: true, + standardized_nudges_misinfo: true, + tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled: true, + rweb_video_timestamps_enabled: true, + longform_notetweets_rich_text_read_enabled: true, + longform_notetweets_inline_media_enabled: true, + responsive_web_grok_image_annotation_enabled: false, + responsive_web_enhance_cards_enabled: false, + }; + + // Prepare URL with query params + const url = new URL(baseUrl); + url.searchParams.set('variables', JSON.stringify(variables)); + url.searchParams.set('features', JSON.stringify(features)); + + // Retrieve necessary cookies and tokens + const cookies = await auth.cookieJar().getCookies(url.toString()); + const xCsrfToken = cookies.find((cookie) => cookie.key === 'ct0'); + + const headers = new Headers({ + authorization: `Bearer ${(auth as any).bearerToken}`, + cookie: await auth.cookieJar().getCookieString(url.toString()), + 'content-type': 'application/json', + 'x-guest-token': (auth as any).guestToken, + 'x-twitter-auth-type': 'OAuth2Client', + 'x-twitter-active-user': 'yes', + 'x-csrf-token': xCsrfToken?.value || '', + }); + + const response = await fetch(url.toString(), { + method: 'GET', + headers, + }); + + // Update cookies if needed + await updateCookieJar(auth.cookieJar(), response.headers); + + if (!response.ok) { + throw new Error(await response.text()); + } + + const json = await response.json(); + const instructions = + json?.data?.retweeters_timeline?.timeline?.instructions || []; + + const retweeters: Retweeter[] = []; + let bottomCursor: string | undefined; + let topCursor: string | undefined; + + // Parse the retweeters from instructions + for (const instruction of instructions) { + if (instruction.type === 'TimelineAddEntries') { + for (const entry of instruction.entries) { + // If this entry is a user entry + if (entry.content?.itemContent?.user_results?.result) { + const user = entry.content.itemContent.user_results.result; + const description = user.legacy?.name ?? ''; + + retweeters.push({ + rest_id: user.rest_id, + screen_name: user.legacy?.screen_name ?? '', + name: user.legacy?.name ?? '', + description, + }); + } + + // Capture the bottom cursor + if ( + entry.content?.entryType === 'TimelineTimelineCursor' && + entry.content?.cursorType === 'Bottom' + ) { + bottomCursor = entry.content.value; + } + + // Capture the top cursor + if ( + entry.content?.entryType === 'TimelineTimelineCursor' && + entry.content?.cursorType === 'Top' + ) { + topCursor = entry.content.value; + } + } + } + } + + return { retweeters, bottomCursor, topCursor }; +} + +/** + * Retrieves *all* retweeters by chaining requests until no next cursor is found. + * @param tweetId The ID of the tweet. + * @param auth The TwitterAuth object for authentication. + * @returns A list of all users that retweeted the tweet. + */ +export async function getAllRetweeters( + tweetId: string, + auth: TwitterAuth +): Promise { + let allRetweeters: Retweeter[] = []; + let cursor: string | undefined; + + while (true) { + // Destructure bottomCursor / topCursor + const { retweeters, bottomCursor, topCursor } = await fetchRetweetersPage( + tweetId, + auth, + cursor, + 40 + ); + allRetweeters = allRetweeters.concat(retweeters); + + const newCursor = bottomCursor || topCursor; + + // Stop if there is no new cursor or if it's the same as the old one + if (!newCursor || newCursor === cursor) { + break; + } + + cursor = newCursor; + } + + return allRetweeters; +} \ No newline at end of file