diff --git a/CLAUDE.md b/CLAUDE.md index 7270bf08..4caabaa3 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -70,6 +70,7 @@ extension/ → Browser extension (Chrome/Edge) ## Key Technical Context +- **ALWAYS use XActions' stealth infrastructure** for any browser automation. NEVER call Puppeteer (or Playwright) directly/raw. Use `createBrowser()`, `createPage()`, `loginWithCookie()`, and `randomDelay()` from `src/scrapers/index.js` — these include puppeteer-extra-plugin-stealth, anti-detection launch args, realistic viewports/user agents, and human-like log-normal delays. See also `src/agents/antiDetection.js` for advanced behavioral simulation (Bezier mouse, human typing, circadian patterns). - Browser scripts run in **DevTools console on x.com**, not Node.js - DOM selectors change frequently — see [selectors.md](docs/agents/selectors.md) - Scripts in `src/automation/` require pasting `src/automation/core.js` first @@ -77,7 +78,8 @@ extension/ → Browser extension (Chrome/Edge) - CLI entry point: `bin/unfollowx`, installed via `npm install -g xactions` - MCP server: `src/mcp/server.js` — used by Claude Desktop and AI agents - Prefer `data-testid` selectors — most stable across X/Twitter UI updates -- X enforces aggressive rate limits; all automation must include 1-3s delays between actions +- X enforces aggressive rate limits; all automation uses human-like log-normal delays (2-7s base + occasional distraction spikes) +- **NEVER run multiple MCP/scraper/CLI requests in parallel** — the MCP server shares a single browser instance. Concurrent requests cause overlapping navigations that break scraping and defeat the human-like timing simulation. Always run one request at a time and wait for it to complete. ## Patterns & Style diff --git a/src/cli/index.js b/src/cli/index.js index 7318788c..15054f26 100755 --- a/src/cli/index.js +++ b/src/cli/index.js @@ -144,9 +144,29 @@ program const config = await loadConfig(); config.authToken = cookie; - await saveConfig(config); - console.log(chalk.green('\n✓ Authentication saved!\n')); + // Resolve and save the authenticated username + try { + const browser = await scrapers.createBrowser(); + const page = await scrapers.createPage(browser); + await scrapers.loginWithCookie(page, cookie); + await page.goto('https://x.com/home', { waitUntil: 'networkidle2' }); + const username = await page.evaluate(() => { + const link = document.querySelector('a[data-testid="AppTabBar_Profile_Link"]'); + return link?.getAttribute('href')?.replace('/', '') || null; + }); + await browser.close(); + if (username) { + config.username = username; + console.log(chalk.green(`\n✓ Authenticated as @${username}!\n`)); + } else { + console.log(chalk.green('\n✓ Authentication saved!\n')); + } + } catch { + console.log(chalk.green('\n✓ Authentication saved!\n')); + } + + await saveConfig(config); }); program @@ -155,6 +175,7 @@ program .action(async () => { const config = await loadConfig(); delete config.authToken; + delete config.username; await saveConfig(config); console.log(chalk.green('\n✓ Logged out successfully\n')); }); @@ -523,6 +544,219 @@ program } }); +// ============================================================================ +// DM Commands +// ============================================================================ + +const dmsCmd = program.command('dms').description('Direct message commands'); + +async function promptAndSavePasscode(passcode) { + const config = await loadConfig(); + if (config.dmPasscode === passcode) return; + const { save } = await inquirer.prompt([{ + type: 'confirm', + name: 'save', + message: 'Save passcode for future use? (stored alongside auth_token in ~/.xactions/config.json)', + default: true, + }]); + if (save) { + config.dmPasscode = passcode; + await saveConfig(config); + console.log(chalk.green('Passcode saved.')); + } +} + +dmsCmd + .command('list') + .description('List DM conversations') + .option('-l, --limit ', 'Maximum conversations', '20') + .option('-o, --output ', 'Output file (json, csv, or xlsx)') + .option('--passcode ', 'DM encryption passcode (4 digits)') + .action(async (options) => { + const limit = parseInt(options.limit); + const spinner = ora('Fetching DM conversations').start(); + + try { + const browser = await scrapers.createBrowser(); + const page = await scrapers.createPage(browser); + + const config = await loadConfig(); + if (config.authToken) { + await scrapers.loginWithCookie(page, config.authToken); + } + + let passcode = options.passcode || config.dmPasscode; + + let conversations; + try { + conversations = await scrapers.scrapeDmConversations(page, { limit, passcode }); + } catch (err) { + if (err.message.includes('passcode required') && !passcode) { + spinner.stop(); + const answers = await inquirer.prompt([{ + type: 'password', + name: 'passcode', + message: 'Enter your 4-digit DM encryption passcode:', + mask: '*', + }]); + passcode = answers.passcode; + spinner.start('Retrying with passcode...'); + conversations = await scrapers.scrapeDmConversations(page, { limit, passcode }); + } else { + throw err; + } + } + + await browser.close(); + spinner.succeed(`Found ${conversations.length} conversations`); + if (passcode && passcode !== config.dmPasscode) await promptAndSavePasscode(passcode); + await smartOutput(conversations, options, 'dm-conversations'); + } catch (error) { + spinner.fail('Failed to fetch conversations'); + console.error(chalk.red(error.message)); + } + }); + +dmsCmd + .command('read ') + .description('Read DMs with a specific user') + .option('-l, --limit ', 'Maximum messages', '50') + .option('-o, --output ', 'Output file (json, csv, or xlsx)') + .option('--passcode ', 'DM encryption passcode (4 digits)') + .action(async (username, options) => { + const limit = parseInt(options.limit); + const spinner = ora(`Reading DMs with @${username}`).start(); + + try { + const browser = await scrapers.createBrowser(); + const page = await scrapers.createPage(browser); + + const config = await loadConfig(); + if (config.authToken) { + await scrapers.loginWithCookie(page, config.authToken); + } + + let passcode = options.passcode || config.dmPasscode; + + let messages; + try { + messages = await scrapers.scrapeDmMessages(page, username, { limit, passcode }); + } catch (err) { + if (err.message.includes('passcode required') && !passcode) { + spinner.stop(); + const answers = await inquirer.prompt([{ + type: 'password', + name: 'passcode', + message: 'Enter your 4-digit DM encryption passcode:', + mask: '*', + }]); + passcode = answers.passcode; + spinner.start('Retrying with passcode...'); + messages = await scrapers.scrapeDmMessages(page, username, { limit, passcode }); + } else { + throw err; + } + } + + await browser.close(); + spinner.succeed(`Read ${messages.length} messages with @${username}`); + if (passcode && passcode !== config.dmPasscode) await promptAndSavePasscode(passcode); + + if (!options.output) { + for (const msg of messages) { + const who = msg.sender === 'me' ? chalk.blue('YOU') : chalk.green(username.toUpperCase()); + const media = msg.hasMedia ? chalk.yellow(' [+media]') : ''; + const time = msg.time ? chalk.gray(msg.time) : ''; + console.log(`[${who}] ${time} — ${msg.text}${media}`); + } + } else { + await smartOutput(messages, options, `dms-${username}`); + } + } catch (error) { + spinner.fail(`Failed to read DMs with @${username}`); + console.error(chalk.red(error.message)); + } + }); + +dmsCmd + .command('export') + .description('Export all DM conversations and messages') + .option('-l, --limit ', 'Maximum total messages', '100') + .option('-o, --output ', 'Output file (json, csv, or xlsx)') + .option('--passcode ', 'DM encryption passcode (4 digits)') + .action(async (options) => { + const limit = parseInt(options.limit); + const spinner = ora('Exporting DMs').start(); + + try { + const browser = await scrapers.createBrowser(); + const page = await scrapers.createPage(browser); + + const config = await loadConfig(); + if (config.authToken) { + await scrapers.loginWithCookie(page, config.authToken); + } + + let passcode = options.passcode || config.dmPasscode; + + let convos; + try { + convos = await scrapers.scrapeDmConversations(page, { limit: Math.ceil(limit / 10), passcode }); + } catch (err) { + if (err.message.includes('passcode required') && !passcode) { + spinner.stop(); + const answers = await inquirer.prompt([{ + type: 'password', + name: 'passcode', + message: 'Enter your 4-digit DM encryption passcode:', + mask: '*', + }]); + passcode = answers.passcode; + spinner.start('Retrying with passcode...'); + convos = await scrapers.scrapeDmConversations(page, { limit: Math.ceil(limit / 10), passcode }); + } else { + throw err; + } + } + + if (convos.length === 0) { + await browser.close(); + spinner.succeed('No conversations found'); + return; + } + + const allMessages = []; + for (let i = 0; i < convos.length; i++) { + spinner.text = `Exporting DMs (${i + 1}/${convos.length}: ${convos[i].name})`; + const messages = await scrapers.scrapeDmMessages(page, convos[i].name, { + passcode, + limit: Math.ceil(limit / convos.length), + skipNavigation: true, + }); + allMessages.push({ conversation: convos[i].name, messages }); + } + + await browser.close(); + + const total = allMessages.reduce((sum, c) => sum + c.messages.length, 0); + spinner.succeed(`Exported ${total} messages from ${allMessages.length} conversations`); + if (passcode && passcode !== config.dmPasscode) await promptAndSavePasscode(passcode); + + const exportData = { conversations: allMessages, total }; + if (options.output) { + const flat = allMessages.flatMap(c => + c.messages.map(m => ({ conversation: c.conversation, ...m })) + ); + await smartOutput(flat, options, 'dm-export'); + } else { + console.log(JSON.stringify(exportData, null, 2)); + } + } catch (error) { + spinner.fail('Failed to export DMs'); + console.error(chalk.red(error.message)); + } + }); + // ============================================================================ // Plugin Commands // ============================================================================ @@ -1742,7 +1976,7 @@ const scrapeCmd = program }; // Set the right target field based on action - if (['profile', 'followers', 'following', 'tweets', 'posts'].includes(action)) { + if (['profile', 'followers', 'following', 'tweets', 'posts', 'likes', 'media'].includes(action)) { if (!username) throw new Error(`Action "${action}" requires a username. Usage: xactions scrape ${action} --platform ${platform}`); scrapeOptions.username = username; } else if (['search'].includes(action)) { diff --git a/src/client/api/graphqlQueries.js b/src/client/api/graphqlQueries.js index d13599b7..e4b01e3b 100644 --- a/src/client/api/graphqlQueries.js +++ b/src/client/api/graphqlQueries.js @@ -214,3 +214,10 @@ export function buildGraphQLUrl(endpoint, variables = {}, features) { return `${base}?${params.toString()}`; } + +/** + * Default field toggles for GraphQL requests. + */ +export const DEFAULT_FIELD_TOGGLES = { + withArticlePlainText: false, +}; diff --git a/src/client/api/parsers.js b/src/client/api/parsers.js new file mode 100644 index 00000000..b32e1fb9 --- /dev/null +++ b/src/client/api/parsers.js @@ -0,0 +1,295 @@ +/** + * XActions Client — Response Parsers + * Robust parsing helpers for Twitter's deeply nested GraphQL responses. + * All functions are defensive — they return null/empty on unexpected data, never throw. + * + * @author nich (@nichxbt) - https://github.com/nirholas + * @license MIT + */ + +import { Tweet } from '../models/Tweet.js'; +import { Profile } from '../models/Profile.js'; + +// ============================================================================ +// Navigation +// ============================================================================ + +/** + * Safely navigate a nested object using a dot-separated path. + * + * @param {Object} obj - Root object + * @param {string} path - Dot-separated path (e.g., 'data.user.result') + * @returns {*} The value at the path, or undefined if any segment is missing + */ +export function navigateResponse(obj, path) { + if (!obj || !path) return undefined; + const segments = path.split('.'); + let current = obj; + for (const segment of segments) { + if (current == null || typeof current !== 'object') return undefined; + current = current[segment]; + } + return current; +} + +// ============================================================================ +// Timeline Parsing +// ============================================================================ + +/** + * Extract timeline entries and cursor from a Twitter GraphQL response. + * + * @param {Object} response - Full GraphQL response + * @param {string} path - Dot-path to the timeline object (e.g., 'data.user.result.timeline_v2.timeline') + * @returns {{ entries: Array, cursor: string|null }} + */ +export function parseTimelineEntries(response, path) { + const timeline = navigateResponse(response, path); + if (!timeline) return { entries: [], cursor: null }; + + const instructions = timeline.instructions || []; + + // Find the add entries instruction + let entries = []; + for (const instruction of instructions) { + if ( + instruction.type === 'TimelineAddEntries' || + instruction.type === 'TimelineAddToModule' + ) { + entries = instruction.entries || []; + break; + } + } + + // If no TimelineAddEntries, check for direct entries + if (entries.length === 0) { + for (const instruction of instructions) { + if (instruction.entries) { + entries = instruction.entries; + break; + } + } + } + + const cursor = extractCursor(entries, 'bottom'); + return { entries, cursor }; +} + +// ============================================================================ +// Entry Parsers +// ============================================================================ + +/** + * Parse a single tweet timeline entry into a Tweet. + * + * @param {Object} entry - A timeline entry + * @returns {Tweet|null} Parsed tweet or null for tombstones, promoted, and empty entries + */ +export function parseTweetEntry(entry) { + if (!entry) return null; + + const content = entry.content || {}; + + // Skip promoted content + if (content.promotedMetadata || entry.entryId?.startsWith('promoted')) { + return null; + } + + // Standard timeline item + const itemContent = content.itemContent || {}; + let tweetResult = itemContent.tweet_results?.result; + + if (!tweetResult) return null; + + // Unwrap TweetWithVisibilityResults + if (tweetResult.__typename === 'TweetWithVisibilityResults') { + tweetResult = tweetResult.tweet; + } + + // Tombstone (deleted tweet) + if (!tweetResult || tweetResult.__typename === 'TweetTombstone') { + return null; + } + + return Tweet.fromGraphQL(tweetResult); +} + +/** + * Parse a user timeline entry into a Profile. + * + * @param {Object} entry - A timeline entry containing user data + * @returns {Profile|null} Parsed profile or null + */ +export function parseUserEntry(entry) { + if (!entry) return null; + + const content = entry.content || {}; + + // Skip promoted content + if (content.promotedMetadata) return null; + + const itemContent = content.itemContent || {}; + const userResult = itemContent.user_results?.result; + + if (!userResult || userResult.__typename === 'UserUnavailable') { + return null; + } + + return Profile.fromGraphQL(userResult); +} + +/** + * Parse a conversation module entry (multi-tweet reply chain) into an array of Tweets. + * + * @param {Object} entry - A timeline module entry + * @returns {Tweet[]} Array of tweets in the conversation (may be empty) + */ +export function parseModuleEntry(entry) { + if (!entry) return []; + + const content = entry.content || {}; + const items = content.items || []; + const tweets = []; + + for (const item of items) { + const itemContent = item.item?.itemContent || {}; + let tweetResult = itemContent.tweet_results?.result; + if (!tweetResult) continue; + + // Unwrap visibility wrapper + if (tweetResult.__typename === 'TweetWithVisibilityResults') { + tweetResult = tweetResult.tweet; + } + if (!tweetResult || tweetResult.__typename === 'TweetTombstone') continue; + + const tweet = Tweet.fromGraphQL(tweetResult); + if (tweet) tweets.push(tweet); + } + + return tweets; +} + +// ============================================================================ +// Cursor Extraction +// ============================================================================ + +/** + * Find and return a pagination cursor from a set of timeline entries. + * + * @param {Array} entries - Timeline entries array + * @param {'bottom'|'top'} [direction='bottom'] - 'bottom' for next page, 'top' for refresh + * @returns {string|null} Cursor value or null if not found + */ +export function extractCursor(entries, direction = 'bottom') { + if (!Array.isArray(entries)) return null; + + const prefix = `cursor-${direction}`; + + for (const entry of entries) { + if (entry.entryId && entry.entryId.startsWith(prefix)) { + return entry.content?.value || entry.content?.itemContent?.value || null; + } + } + + // Fallback: look for cursor type in content + for (const entry of entries) { + const content = entry.content || {}; + if ( + content.entryType === 'TimelineTimelineCursor' && + content.cursorType === (direction === 'bottom' ? 'Bottom' : 'Top') + ) { + return content.value || null; + } + } + + return null; +} + +// ============================================================================ +// Media Parsing +// ============================================================================ + +/** + * Parse a Twitter media entity into a normalized media object. + * + * @param {Object} media - Raw Twitter media entity + * @returns {{ type: string, url: string, preview: string, width: number, height: number, duration: number, altText: string }} + */ +export function parseMediaEntity(media) { + if (!media) return null; + + const type = media.type === 'animated_gif' ? 'gif' : media.type || 'photo'; + let url = media.media_url_https || media.media_url || ''; + let duration = 0; + + // For video/gif, pick the highest-quality mp4 variant + if (type === 'video' || type === 'gif') { + const variants = media.video_info?.variants || []; + const best = variants + .filter((v) => v.content_type === 'video/mp4') + .sort((a, b) => (b.bitrate || 0) - (a.bitrate || 0))[0]; + if (best) url = best.url; + if (media.video_info?.duration_millis) { + duration = Math.round(media.video_info.duration_millis / 1000); + } + } + + return { + type, + url, + preview: media.media_url_https || media.media_url || '', + width: media.original_info?.width || media.sizes?.large?.w || 0, + height: media.original_info?.height || media.sizes?.large?.h || 0, + duration, + altText: media.ext_alt_text || '', + }; +} + +// ============================================================================ +// Poll Parsing +// ============================================================================ + +/** + * Parse a Twitter card into poll data. + * + * @param {Object} card - Raw Twitter card data (from tweet.card) + * @returns {{ id: string, options: Array<{label: string, votes: number}>, endDatetime: string, votingStatus: string, totalVotes: number }|null} + */ +export function parsePoll(card) { + if (!card) return null; + + const binding = card.legacy?.binding_values || card.binding_values; + if (!binding) return null; + + // Normalize binding values into a flat map + const vals = {}; + if (Array.isArray(binding)) { + for (const b of binding) { + vals[b.key] = b.value?.string_value || b.value?.scribe_value?.value || ''; + } + } else { + for (const [k, v] of Object.entries(binding)) { + vals[k] = v?.string_value || v?.scribe_value?.value || ''; + } + } + + if (!vals.choice1_label) return null; + + const options = []; + let totalVotes = 0; + for (let i = 1; i <= 4; i++) { + const label = vals[`choice${i}_label`]; + if (!label) break; + const votes = parseInt(vals[`choice${i}_count`], 10) || 0; + totalVotes += votes; + options.push({ label, votes }); + } + + return { + id: vals.card_url || '', + options, + endDatetime: vals.end_datetime_utc || '', + votingStatus: vals.counts_are_final === 'true' ? 'closed' : 'open', + totalVotes, + }; +} diff --git a/src/client/api/search.js b/src/client/api/search.js new file mode 100644 index 00000000..653aa451 --- /dev/null +++ b/src/client/api/search.js @@ -0,0 +1,200 @@ +/** + * XActions Client — Search API + * Twitter search via the internal SearchTimeline GraphQL endpoint. + * + * Supports advanced search operators: + * from:username, to:username, since:YYYY-MM-DD, until:YYYY-MM-DD, + * min_replies:N, min_faves:N, min_retweets:N, + * filter:links, filter:media, filter:images, filter:videos, + * -filter:replies, lang:en + * + * These are passed through in rawQuery — Twitter handles the parsing. + * + * @author nich (@nichxbt) - https://github.com/nirholas + * @license MIT + */ + +import { Tweet } from '../models/Tweet.js'; +import { Profile } from '../models/Profile.js'; +import { + GRAPHQL_ENDPOINTS, + DEFAULT_FEATURES, + DEFAULT_FIELD_TOGGLES, + buildGraphQLUrl, +} from './graphqlQueries.js'; +import { parseTimelineEntries, parseTweetEntry, parseUserEntry } from './parsers.js'; + +const sleep = (ms) => new Promise((r) => setTimeout(r, ms)); +const randomDelay = (min = 1000, max = 2000) => sleep(min + Math.random() * (max - min)); + +/** + * Search tweets using Twitter's internal SearchTimeline endpoint. + * + * @param {Object} http - HttpClient instance + * @param {string} query - Search query (supports advanced operators) + * @param {number} [count=40] - Maximum number of tweets to yield + * @param {string} [mode='Latest'] - Search mode: 'Top', 'Latest', 'Photos', 'Videos' + * @yields {Tweet} + */ +export async function* searchTweets(http, query, count = 40, mode = 'Latest') { + const endpoint = GRAPHQL_ENDPOINTS.SearchTimeline; + let cursor = null; + let yielded = 0; + + while (yielded < count) { + const variables = { + rawQuery: query, + count: 20, + querySource: 'typed_query', + product: mode, + }; + if (cursor) variables.cursor = cursor; + + const url = buildGraphQLUrl(endpoint, variables); + const data = await http.get(url); + + const { entries, cursor: nextCursor } = parseTimelineEntries( + data, + 'data.search_by_raw_query.search_timeline.timeline', + ); + + if (!entries.length) break; + + for (const entry of entries) { + if (entry.entryId?.startsWith('cursor-')) continue; + const tweet = parseTweetEntry(entry); + if (tweet) { + yield tweet; + yielded++; + if (yielded >= count) break; + } + } + + cursor = nextCursor; + if (!cursor) break; + await randomDelay(1000, 2000); + } +} + +/** + * Search user profiles. + * + * @param {Object} http - HttpClient instance + * @param {string} query - Search query + * @param {number} [count=40] - Maximum number of profiles to yield + * @yields {Profile} + */ +export async function* searchProfiles(http, query, count = 40) { + const endpoint = GRAPHQL_ENDPOINTS.SearchTimeline; + let cursor = null; + let yielded = 0; + + while (yielded < count) { + const variables = { + rawQuery: query, + count: 20, + querySource: 'typed_query', + product: 'People', + }; + if (cursor) variables.cursor = cursor; + + const url = buildGraphQLUrl(endpoint, variables); + const data = await http.get(url); + + const { entries, cursor: nextCursor } = parseTimelineEntries( + data, + 'data.search_by_raw_query.search_timeline.timeline', + ); + + if (!entries.length) break; + + for (const entry of entries) { + if (entry.entryId?.startsWith('cursor-')) continue; + const profile = parseUserEntry(entry); + if (profile) { + yield profile; + yielded++; + if (yielded >= count) break; + } + } + + cursor = nextCursor; + if (!cursor) break; + await randomDelay(1000, 2000); + } +} + +/** + * Fetch a single page of search results (non-generator, for manual pagination). + * + * @param {Object} http - HttpClient instance + * @param {string} query - Search query + * @param {number} [count=20] - Number of results per page + * @param {string} [mode='Latest'] - Search mode + * @param {string} [cursor=null] - Pagination cursor + * @returns {Promise<{ tweets: Tweet[], cursor: string|null }>} + */ +export async function fetchSearchTweets(http, query, count = 20, mode = 'Latest', cursor = null) { + const endpoint = GRAPHQL_ENDPOINTS.SearchTimeline; + const variables = { + rawQuery: query, + count, + querySource: 'typed_query', + product: mode, + }; + if (cursor) variables.cursor = cursor; + + const url = buildGraphQLUrl(endpoint, variables); + const data = await http.get(url); + + const { entries, cursor: nextCursor } = parseTimelineEntries( + data, + 'data.search_by_raw_query.search_timeline.timeline', + ); + + const tweets = []; + for (const entry of entries) { + if (entry.entryId?.startsWith('cursor-')) continue; + const tweet = parseTweetEntry(entry); + if (tweet) tweets.push(tweet); + } + + return { tweets, cursor: nextCursor }; +} + +/** + * Fetch a single page of profile search results (non-generator). + * + * @param {Object} http - HttpClient instance + * @param {string} query - Search query + * @param {number} [count=20] - Number of results per page + * @param {string} [cursor=null] - Pagination cursor + * @returns {Promise<{ profiles: Profile[], cursor: string|null }>} + */ +export async function fetchSearchProfiles(http, query, count = 20, cursor = null) { + const endpoint = GRAPHQL_ENDPOINTS.SearchTimeline; + const variables = { + rawQuery: query, + count, + querySource: 'typed_query', + product: 'People', + }; + if (cursor) variables.cursor = cursor; + + const url = buildGraphQLUrl(endpoint, variables); + const data = await http.get(url); + + const { entries, cursor: nextCursor } = parseTimelineEntries( + data, + 'data.search_by_raw_query.search_timeline.timeline', + ); + + const profiles = []; + for (const entry of entries) { + if (entry.entryId?.startsWith('cursor-')) continue; + const profile = parseUserEntry(entry); + if (profile) profiles.push(profile); + } + + return { profiles, cursor: nextCursor }; +} diff --git a/src/client/api/trends.js b/src/client/api/trends.js new file mode 100644 index 00000000..d5fbc74c --- /dev/null +++ b/src/client/api/trends.js @@ -0,0 +1,103 @@ +/** + * XActions Client — Trends API + * Fetch trending topics from Twitter's explore/guide endpoint. + * + * @author nich (@nichxbt) - https://github.com/nirholas + * @license MIT + */ + +/** + * Get current trending topics on Twitter. + * + * @param {Object} http - HttpClient instance + * @param {string} [category='trending'] - Category: 'trending', 'for_you', 'news', 'sports', 'entertainment' + * @returns {Promise>} + */ +export async function getTrends(http, category = 'trending') { + const params = new URLSearchParams({ + include_page_configuration: 'false', + initial_tab_id: category, + }); + const url = `https://x.com/i/api/2/guide.json?${params.toString()}`; + const data = await http.get(url); + + const trends = []; + + try { + const instructions = + data?.timeline?.instructions || []; + + for (const instruction of instructions) { + const entries = instruction.addEntries?.entries || instruction.entries || []; + for (const entry of entries) { + const items = entry.content?.timelineModule?.items || []; + for (const item of items) { + const trend = item.item?.content?.trend; + if (trend) { + trends.push({ + name: trend.name || '', + tweetCount: trend.trendMetadata?.metaDescription || '', + url: trend.url?.url || `https://x.com/search?q=${encodeURIComponent(trend.name || '')}`, + context: trend.trendMetadata?.domainContext || '', + }); + } + } + + // Also check for direct trend content in timeline entry + const directTrend = entry.content?.trend; + if (directTrend) { + trends.push({ + name: directTrend.name || '', + tweetCount: directTrend.trendMetadata?.metaDescription || '', + url: directTrend.url?.url || `https://x.com/search?q=${encodeURIComponent(directTrend.name || '')}`, + context: directTrend.trendMetadata?.domainContext || '', + }); + } + } + } + } catch { + // Return whatever we have so far + } + + return trends; +} + +/** + * Get available explore tabs. + * + * @param {Object} http - HttpClient instance + * @returns {Promise>} + */ +export async function getExploreTabs(http) { + const url = 'https://x.com/i/api/2/guide.json?include_page_configuration=true'; + const data = await http.get(url); + + const tabs = []; + try { + const pageConfig = data?.timeline?.instructions?.find( + (i) => i.type === 'TimelineShowCover' || i.coverConfiguration, + ); + const tabItems = + data?.page_configuration?.tabs || []; + for (const tab of tabItems) { + tabs.push({ + id: tab.tab_id || tab.id || '', + label: tab.label || tab.name || '', + }); + } + } catch { + // Return default tabs + } + + if (tabs.length === 0) { + return [ + { id: 'trending', label: 'Trending' }, + { id: 'for_you', label: 'For You' }, + { id: 'news', label: 'News' }, + { id: 'sports', label: 'Sports' }, + { id: 'entertainment', label: 'Entertainment' }, + ]; + } + + return tabs; +} diff --git a/src/client/auth/TwoFactorAuth.js b/src/client/auth/TwoFactorAuth.js new file mode 100644 index 00000000..4e35e35d --- /dev/null +++ b/src/client/auth/TwoFactorAuth.js @@ -0,0 +1,33 @@ +/** + * XActions Client — Two-Factor Authentication + * Handles TOTP and SMS-based 2FA for Twitter login flows. + * + * @author nich (@nichxbt) - https://github.com/nirholas + * @license MIT + */ + +export class TwoFactorAuth { + constructor({ tokenManager } = {}) { + this.tokenManager = tokenManager; + this._fetch = globalThis.fetch; + } + + setFetch(fetchFn) { + this._fetch = fetchFn; + } + + /** + * Submit a 2FA code to complete login. + * @param {Object} options + * @param {string} options.flowToken - Flow token from the login challenge + * @param {string} options.code - TOTP or SMS code + */ + async submitCode({ flowToken, code }) { + if (!flowToken || !code) { + throw new Error('TwoFactorAuth.submitCode requires flowToken and code'); + } + // Placeholder — the actual Twitter 2FA flow requires the LoginJsInstrumentationSubtask + // and DenyLoginSubtask handling which varies by account settings. + throw new Error('TwoFactorAuth.submitCode is not yet implemented'); + } +} diff --git a/src/mcp/local-tools.js b/src/mcp/local-tools.js index 57ee8536..024a9297 100644 --- a/src/mcp/local-tools.js +++ b/src/mcp/local-tools.js @@ -23,9 +23,13 @@ import { scrapeMedia, scrapeListMembers, scrapeBookmarks, + scrapeLikedTweets, scrapeNotifications, scrapeTrending, scrapeSpaces, + scrapeDmConversations, + scrapeDmMessages, + randomDelay, } from '../scrapers/index.js'; import fs from 'fs/promises'; @@ -40,8 +44,6 @@ let browser = null; let page = null; const sleep = (ms) => new Promise((r) => setTimeout(r, ms)); -const randomDelay = (min = 1000, max = 3000) => - sleep(min + Math.random() * (max - min)); /** * Ensure a browser/page pair is available, creating if needed. @@ -56,6 +58,11 @@ async function ensureBrowser() { } browser = await createBrowser(); page = await createPage(browser); + + const cookie = process.env.XACTIONS_SESSION_COOKIE; + if (cookie) { + await loginWithCookie(page, cookie); + } } return { browser, page }; } @@ -73,6 +80,15 @@ export async function closeBrowser() { } } +async function loadDmConfig() { + try { + const p = path.join(os.homedir(), '.xactions', 'config.json'); + return JSON.parse(await fs.readFile(p, 'utf-8')); + } catch { + return {}; + } +} + // ============================================================================ // Helpers // ============================================================================ @@ -156,6 +172,21 @@ export async function x_get_profile({ username }) { return scrapeProfile(pg, username); } +export async function x_get_profiles({ usernames }) { + const { page: pg } = await ensureBrowser(); + const results = []; + for (let i = 0; i < usernames.length; i++) { + if (i > 0) await randomDelay(); + try { + const profile = await scrapeProfile(pg, usernames[i]); + results.push({ username: usernames[i], ...profile }); + } catch (err) { + results.push({ username: usernames[i], error: err.message }); + } + } + return results; +} + export async function x_get_followers({ username, limit = 100 }) { const { page: pg } = await ensureBrowser(); return scrapeFollowers(pg, username, { limit }); @@ -629,6 +660,12 @@ export async function x_get_bookmarks({ limit = 100 }) { return scrapeBookmarks(pg, { limit }); } +export async function x_get_likes({ username, limit = 50 }) { + const { page: pg } = await ensureBrowser(); + const likedTweets = await scrapeLikedTweets(pg, username || null, { limit }); + return { likedTweets, count: likedTweets.length, username: username || 'me' }; +} + export async function x_clear_bookmarks() { const { page: pg } = await ensureBrowser(); await pg.goto('https://x.com/i/bookmarks', { waitUntil: 'networkidle2' }); @@ -779,63 +816,25 @@ export async function x_send_dm({ username, message }) { export async function x_get_conversations({ limit = 20 }) { const { page: pg } = await ensureBrowser(); - await pg.goto('https://x.com/messages', { waitUntil: 'networkidle2' }); - await randomDelay(2000, 3000); - - const conversations = await pg.evaluate((max) => { - const els = document.querySelectorAll('[data-testid="conversation"]'); - return Array.from(els) - .slice(0, max) - .map((el) => { - const nameEl = el.querySelector('[dir="ltr"] > span'); - const previewEl = el.querySelector('[dir="auto"]'); - const timeEl = el.querySelector('time'); - return { - name: nameEl?.textContent || null, - preview: previewEl?.textContent || null, - time: timeEl?.getAttribute('datetime') || null, - }; - }); - }, limit); - - return conversations; + const config = await loadDmConfig(); + return scrapeDmConversations(pg, { limit, passcode: config.dmPasscode }); } export async function x_export_dms({ limit = 100 }) { const { page: pg } = await ensureBrowser(); - await pg.goto('https://x.com/messages', { waitUntil: 'networkidle2' }); - await randomDelay(2000, 3000); + const config = await loadDmConfig(); + const passcode = config.dmPasscode; - const convos = await x_get_conversations({ limit: 10 }); + const convos = await scrapeDmConversations(pg, { limit: Math.ceil(limit / 10), passcode }); + if (convos.length === 0) return { conversations: [], total: 0 }; const allMessages = []; - const convEls = await pg.$$('[data-testid="conversation"]'); - const toProcess = Math.min(convEls.length, Math.ceil(limit / 10)); - - for (let i = 0; i < toProcess; i++) { - // Re-query because DOM may have changed after navigation - const currentConvEls = await pg.$$('[data-testid="conversation"]'); - if (!currentConvEls[i]) break; - await currentConvEls[i].click(); - await sleep(2000); - - const messages = await pg.evaluate(() => { - const msgEls = document.querySelectorAll('[data-testid="messageEntry"]'); - return Array.from(msgEls).map((msg) => { - const text = - msg.querySelector('[data-testid="tweetText"]')?.textContent || - msg.innerText?.slice(0, 500); - const time = msg.querySelector('time')?.getAttribute('datetime'); - return { text, time }; - }); - }); + for (const convo of convos) { + const messages = await scrapeDmMessages(pg, convo.name, { passcode, limit: Math.ceil(limit / convos.length), skipNavigation: true }); allMessages.push({ - conversation: convos[i]?.name || `Conversation ${i + 1}`, + conversation: convo.name, messages, }); - - await clickIfPresent(pg, '[data-testid="app-bar-back"]'); - await sleep(1000); } return { @@ -844,6 +843,17 @@ export async function x_export_dms({ limit = 100 }) { }; } +export async function x_read_dms({ username, limit = 50 }) { + const { page: pg } = await ensureBrowser(); + const config = await loadDmConfig(); + const messages = await scrapeDmMessages(pg, username, { limit, passcode: config.dmPasscode }); + return { + messages, + total: messages.length, + conversation: username, + }; +} + // ============================================================================ // 34–35. Grok AI // ============================================================================ @@ -1339,6 +1349,7 @@ export const toolMap = { x_login, // Scraping (delegated to scrapers/index.js — single source of truth) x_get_profile, + x_get_profiles, x_get_followers, x_get_following, x_get_non_followers, @@ -1368,6 +1379,7 @@ export const toolMap = { x_reply, x_bookmark, x_get_bookmarks, + x_get_likes, x_clear_bookmarks, x_auto_like, // Discovery @@ -1381,6 +1393,7 @@ export const toolMap = { x_send_dm, x_get_conversations, x_export_dms, + x_read_dms, // Grok AI x_grok_query, x_grok_summarize, diff --git a/src/mcp/server.js b/src/mcp/server.js index 1c94138c..46f0a572 100755 --- a/src/mcp/server.js +++ b/src/mcp/server.js @@ -91,6 +91,21 @@ const TOOLS = [ required: ['username'], }, }, + { + name: 'x_get_profiles', + description: 'Get profile information for multiple users in a single request with human-like delays between each. Returns array of profile objects.', + inputSchema: { + type: 'object', + properties: { + usernames: { + type: 'array', + items: { type: 'string' }, + description: 'List of usernames (without @)', + }, + }, + required: ['usernames'], + }, + }, { name: 'x_get_followers', description: 'Scrape followers for an account. Supports Twitter, Bluesky, Mastodon, and Threads.', @@ -551,6 +566,18 @@ const TOOLS = [ }, }, }, + { + name: 'x_read_dms', + description: 'Read DM messages with a specific user. Supports encrypted DMs.', + inputSchema: { + type: 'object', + properties: { + username: { type: 'string', description: 'Username to read DMs with (without @)' }, + limit: { type: 'number', description: 'Maximum messages to return (default: 50)' }, + }, + required: ['username'], + }, + }, // ====== Grok AI ====== { name: 'x_grok_query', @@ -1857,14 +1884,13 @@ const TOOLS = [ }, { name: 'x_get_likes', - description: 'Scrape tweets that a user has liked. Shows what content a user engages with.', + description: 'Export liked tweets. Defaults to the authenticated user.', inputSchema: { type: 'object', properties: { - username: { type: 'string', description: 'Username (without @)' }, + username: { type: 'string', description: 'Username (without @). Omit for your own likes.' }, limit: { type: 'number', description: 'Maximum liked tweets (default: 50)' }, }, - required: ['username'], }, }, @@ -2224,7 +2250,7 @@ async function executeTool(name, args) { const xeepyTools = [ 'x_get_replies', 'x_get_hashtag', 'x_get_likers', 'x_get_retweeters', 'x_get_media', 'x_get_recommendations', 'x_get_mentions', 'x_get_quote_tweets', - 'x_get_likes', 'x_auto_follow', 'x_follow_engagers', 'x_unfollow_all', + 'x_auto_follow', 'x_follow_engagers', 'x_unfollow_all', 'x_smart_unfollow', 'x_quote_tweet', 'x_auto_comment', 'x_auto_retweet', 'x_detect_bots', 'x_find_influencers', 'x_smart_target', 'x_crypto_analyze', 'x_grok_analyze_image', 'x_audience_insights', 'x_engagement_report', @@ -2424,22 +2450,6 @@ async function executeXeepyTool(name, args) { return { quotes, count: quotes.length }; } - case 'x_get_likes': { - const page = await localTools.getPage(); - await page.goto(`https://x.com/${args.username}/likes`, { waitUntil: 'networkidle2', timeout: 30000 }); - await new Promise(r => setTimeout(r, 3000)); - const likedTweets = await page.evaluate((limit) => { - const articles = document.querySelectorAll('article[data-testid="tweet"]'); - return Array.from(articles).slice(0, limit).map(el => { - const textEl = el.querySelector('[data-testid="tweetText"]'); - const userEl = el.querySelector('[data-testid="User-Name"]'); - const timeEl = el.querySelector('time'); - return { text: textEl?.textContent || '', author: userEl?.textContent || '', timestamp: timeEl?.getAttribute('datetime') || '' }; - }); - }, args.limit || 50); - return { likedTweets, count: likedTweets.length, username: args.username }; - } - // ── Follow Automation ── case 'x_auto_follow': { // Find users via search, then follow them with delays diff --git a/src/scrapers/index.js b/src/scrapers/index.js index e83c9469..78de4d30 100644 --- a/src/scrapers/index.js +++ b/src/scrapers/index.js @@ -83,10 +83,15 @@ export const { scrapeMedia, scrapeListMembers, scrapeBookmarks, + scrapeLikedTweets, scrapeNotifications, scrapeTrending, scrapeCommunityMembers, scrapeSpaces, + handleDmPasscodeGate, + scrapeDmConversations, + scrapeDmMessages, + randomDelay, exportToJSON, exportToCSV, } = twitter; @@ -175,6 +180,7 @@ export async function scrape(platform, action, options = {}) { media: 'scrapeMedia', listMembers: 'scrapeListMembers', bookmarks: 'scrapeBookmarks', + likedTweets: 'scrapeLikedTweets', notifications: 'scrapeNotifications', communityMembers: 'scrapeCommunityMembers', spaces: 'scrapeSpaces', @@ -221,19 +227,20 @@ export async function scrape(platform, action, options = {}) { // Actions that only take page + options (no target) const noTargetActions = ['scrapeBookmarks', 'scrapeNotifications', 'scrapeTrending']; - let result; - if (noTargetActions.includes(fnName)) { - result = await fn(page, options); - } else { - result = await fn(page, target, options); - } - - // Auto-close browser if we created it - if (page.__xactions_browser && options.autoClose !== false) { - await page.__xactions_browser.close(); + try { + let result; + if (noTargetActions.includes(fnName)) { + result = await fn(page, options); + } else { + result = await fn(page, target, options); + } + return result; + } finally { + // Auto-close browser if we created it + if (page.__xactions_browser && options.autoClose !== false) { + await page.__xactions_browser.close(); + } } - - return result; } if (needsClient) { @@ -312,11 +319,16 @@ export default { scrapeMedia, scrapeListMembers, scrapeBookmarks, + scrapeLikedTweets, scrapeNotifications, scrapeTrending, scrapeCommunityMembers, scrapeSpaces, - + handleDmPasscodeGate, + scrapeDmConversations, + scrapeDmMessages, + randomDelay, + // Export utilities exportToJSON, exportToCSV, diff --git a/src/scrapers/twitter/dm.puppeteer.js b/src/scrapers/twitter/dm.puppeteer.js new file mode 100644 index 00000000..35042364 --- /dev/null +++ b/src/scrapers/twitter/dm.puppeteer.js @@ -0,0 +1,421 @@ +/** + * Encrypted DM Scraper (Puppeteer-based) + * Reads encrypted DMs that are invisible to the HTTP API. + * Handles the encryption passcode gate automatically. + * + * @author nich (@nichxbt) + * @see https://xactions.app + * @license MIT + */ + +import { randomDelay } from './index.js'; +import fs from 'fs/promises'; +import path from 'path'; +import os from 'os'; + +const sleep = (ms) => new Promise((r) => setTimeout(r, ms)); + +const CONFIG_PATH = path.join(os.homedir(), '.xactions', 'config.json'); + +async function loadConfig(configPath) { + try { + return JSON.parse(await fs.readFile(configPath || CONFIG_PATH, 'utf-8')); + } catch { + return {}; + } +} + +// ============================================================================ +// Passcode Gate +// ============================================================================ + +/** + * Handle the encrypted DM passcode gate. + * @param {import('puppeteer').Page} page - Puppeteer page on /messages + * @param {Object} options + * @param {string} [options.passcode] - 4-digit passcode (highest priority) + * @returns {Promise} true if unlocked (or no gate), false if blocked + */ +export async function handleDmPasscodeGate(page, options = {}) { + // X may redirect to /i/chat/pin/recovery or show an inline gate on /messages + const hasGate = page.url().includes('/pin/recovery') || await page.evaluate(() => + document.body?.textContent?.includes('Enter Passcode') || + document.body?.textContent?.includes('passcode is required to recover your encryption keys') + ); + if (!hasGate) return true; + + const config = await loadConfig(); + const passcode = options.passcode || config.dmPasscode; + + if (!passcode) return false; + if (!/^\d{4}$/.test(passcode)) return false; + + // Find the first passcode input — 4 individual inputs, one per digit. + // Try clicking/focusing the first one; fall back to all inputs if pixel check fails. + const focused = await page.evaluate(() => { + const inputs = document.querySelectorAll('input'); + for (const el of inputs) { + // Try pixel position first (inline gate) + const rect = el.getBoundingClientRect(); + if (rect.top > 400 && rect.top < 600) { el.click(); el.focus(); return true; } + } + // Fallback: focus the first text input on the page (redirect gate at /pin/recovery) + const first = document.querySelector('input[type="text"]') || document.querySelector('input'); + if (first) { first.click(); first.focus(); return true; } + return false; + }); + if (!focused) return false; + await sleep(300); + + for (const digit of passcode) { + await page.keyboard.press(`Digit${digit}`); + await sleep(300); + } + + // Wait for redirect back to /messages after successful passcode entry + await sleep(5000); + + // Check if we're still on the passcode page + if (page.url().includes('/pin/recovery')) return false; + + const stillGated = await page.evaluate(() => + document.body?.textContent?.includes('Enter Passcode') || + document.body?.textContent?.includes('passcode is required') + ); + + if (stillGated) return false; + + return true; +} + +// ============================================================================ +// Conversation List Scraper +// ============================================================================ + +/** + * Scrape the DM conversation list (supports encrypted + regular DMs). + * @param {import('puppeteer').Page} page + * @param {Object} options + * @param {number} [options.limit=20] - Max conversations to return + * @param {string} [options.passcode] - DM encryption passcode + * @returns {Promise>} + */ +export async function scrapeDmConversations(page, options = {}) { + const { limit = 20, passcode } = options; + + await page.goto('https://x.com/messages', { waitUntil: 'networkidle2', timeout: 30000 }); + + // Check auth + if (page.url().includes('/login') || page.url().includes('/i/flow/login')) { + throw new Error('Authentication failed — cookie may be expired.\n\nRun: xactions login'); + } + + await randomDelay(2000, 3000); + + if (!(await handleDmPasscodeGate(page, { passcode }))) { + throw new Error('DM encryption passcode required. Set dmPasscode in ~/.xactions/config.json or pass --passcode.'); + } + + // After passcode gate, ensure we're on /messages or /i/chat (X may redirect elsewhere) + if (!page.url().includes('/messages') && !page.url().includes('/i/chat')) { + await page.goto('https://x.com/messages', { waitUntil: 'networkidle2', timeout: 30000 }); + } + + await randomDelay(2000, 4000); + + const conversations = []; + const seen = new Set(); + + async function scrapeVisible() { + const items = await page.evaluate(() => { + const results = []; + // Try encrypted DM selectors first, then regular + const convos = document.querySelectorAll( + '[data-testid^="dm-conversation-item-"], [data-testid="conversation"]' + ); + for (const el of convos) { + const testId = el.getAttribute('data-testid') || ''; + // /i/chat UI uses Tailwind classes; /messages UI uses [dir] attributes + const nameEl = el.querySelector('.font-bold') || + el.querySelector('[dir="ltr"] > span') || + el.querySelector('[dir="ltr"]'); + const previewEl = el.querySelector('[dir="auto"]'); + const timeEl = el.querySelector('time'); + // Extract username from avatar link if available (e.g. href="/username") + const avatarLink = el.querySelector('a[href^="https://x.com/"]'); + const handle = avatarLink?.getAttribute('href')?.replace('https://x.com/', '') || ''; + results.push({ + name: nameEl?.textContent?.trim() || handle || '', + preview: previewEl?.textContent?.trim() || '', + time: timeEl?.getAttribute('datetime') || timeEl?.textContent?.trim() || '', + testId, + handle, + }); + } + return results; + }); + + for (const item of items) { + if (!seen.has(item.testId) && item.testId) { + seen.add(item.testId); + conversations.push(item); + } + } + } + + await scrapeVisible(); + + // Scroll to load more conversations if needed + let scrolls = 0; + while (conversations.length < limit && scrolls < 5) { + await page.evaluate(() => { + const list = document.querySelector('[data-testid="dm-inbox-panel"]') || + document.querySelector('section'); + if (list) list.scrollTop = list.scrollHeight; + }); + await randomDelay(1500, 3000); + const prevCount = conversations.length; + await scrapeVisible(); + if (conversations.length === prevCount) break; + scrolls++; + } + + return conversations.slice(0, limit); +} + +// ============================================================================ +// Message Extraction +// ============================================================================ + +/** + * Read DM messages with a specific user. + * Navigates to /messages, handles passcode, finds conversation, extracts messages. + * @param {import('puppeteer').Page} page + * @param {string} username - Target username (without @) + * @param {Object} options + * @param {number} [options.limit=50] - Max messages to return + * @param {string} [options.passcode] - DM encryption passcode + * @param {number} [options.scrollAttempts=3] - How many times to scroll up for older messages + * @returns {Promise>} + */ +export async function scrapeDmMessages(page, username, options = {}) { + const { limit = 50, passcode, scrollAttempts = 3, skipNavigation = false } = options; + + if (!skipNavigation) { + await page.goto('https://x.com/messages', { waitUntil: 'networkidle2', timeout: 30000 }); + + // Check auth + if (page.url().includes('/login') || page.url().includes('/i/flow/login')) { + throw new Error('Authentication failed — cookie may be expired.\n\nRun: xactions login'); + } + + await randomDelay(2000, 4000); + + if (!(await handleDmPasscodeGate(page, { passcode }))) { + throw new Error('DM encryption passcode required. Set dmPasscode in ~/.xactions/config.json or pass --passcode.'); + } + + // After passcode gate, ensure we're on /messages or /i/chat (X may redirect elsewhere) + if (!page.url().includes('/messages') && !page.url().includes('/i/chat')) { + await page.goto('https://x.com/messages', { waitUntil: 'networkidle2', timeout: 30000 }); + } + + await randomDelay(2000, 4000); + } + + // Find and open conversation + if (!(await findConversation(page, username))) { + return []; + } + + // Wait for encrypted messages to decrypt and render + await randomDelay(6000, 9000); + + // Scroll up for older messages + for (let i = 0; i < scrollAttempts; i++) { + await page.evaluate(() => { + const scroller = document.querySelector('[data-testid="dm-message-list-container"]') || + document.querySelector('[data-testid="DmScrollerContainer"]'); + if (scroller) scroller.scrollTop = 0; + }); + await randomDelay(2000, 3500); + } + + // Scroll back to bottom so newest messages are visible too + await page.evaluate(() => { + const scroller = document.querySelector('[data-testid="dm-message-list-container"]') || + document.querySelector('[data-testid="DmScrollerContainer"]'); + if (scroller) scroller.scrollTop = scroller.scrollHeight; + }); + await randomDelay(2000, 3500); + + const messages = await extractMessages(page); + + // If caller skipped navigation (export loop), ensure conversation list sidebar is still visible. + // X desktop uses a split-pane layout so the sidebar persists, but on narrow viewports + // or future UI changes the sidebar may collapse — navigate back defensively. + if (skipNavigation) { + const sidebarVisible = await page.$('[data-testid="dm-inbox-panel"]'); + if (!sidebarVisible) { + const backBtn = await page.$('[data-testid="app-bar-back"]'); + if (backBtn) { await backBtn.click(); await randomDelay(1500, 2500); } + } + } + + return messages.slice(0, limit); +} + +// ============================================================================ +// Internal Helpers +// ============================================================================ + +/** + * Find and click a conversation by username (fuzzy match). + */ +async function findConversation(page, targetUser) { + const terms = [ + targetUser.toLowerCase(), + targetUser.toLowerCase().replace(/_/g, ' '), + targetUser.toLowerCase().replace(/[_-]/g, ''), + ]; + + async function clickMatch() { + const convos = await page.$$('[data-testid^="dm-conversation-item-"], [data-testid="conversation"]'); + for (const conv of convos) { + // Match against display name and handle, not full conversation text + const { name, handle } = await conv.evaluate(el => { + const nameEl = el.querySelector('.font-bold') || + el.querySelector('[dir="ltr"] > span') || + el.querySelector('[dir="ltr"]'); + const avatarLink = el.querySelector('a[href^="https://x.com/"]'); + const handle = avatarLink?.getAttribute('href')?.replace('https://x.com/', '')?.toLowerCase() || ''; + return { name: nameEl?.textContent?.toLowerCase() || '', handle }; + }); + const nameStripped = name.replace(/\s+/g, ''); + if (terms.some(t => name.includes(t) || nameStripped.includes(t.replace(/\s+/g, '')) || handle === t)) { + await conv.click(); + return true; + } + } + return false; + } + + if (await clickMatch()) return true; + + // Scroll to find older conversations + for (let i = 0; i < 5; i++) { + await page.evaluate(() => { + const list = document.querySelector('[data-testid="dm-inbox-panel"]') || + document.querySelector('section'); + if (list) list.scrollTop = list.scrollHeight; + }); + await randomDelay(1500, 3000); + if (await clickMatch()) return true; + } + + return false; +} + +/** + * Extract messages from the currently open conversation. + */ +async function extractMessages(page) { + return page.evaluate(() => { + const results = []; + + // Encrypted DM UI: message-{uuid} (exclude message-text-*, message-list-*) + const sel = '[data-testid^="message-"]:not([data-testid^="message-text-"]):not([data-testid^="message-list"]):not([data-testid*="-container"])'; + const msgEls = document.querySelectorAll(sel); + + if (msgEls.length === 0) { + // Fallback: try regular DM selector + const fallback = document.querySelectorAll('[data-testid="messageEntry"]'); + if (fallback.length > 0) { + for (const msg of fallback) { + const text = msg.querySelector('[data-testid="tweetText"]')?.textContent || ''; + const time = msg.querySelector('time')?.getAttribute('datetime') || ''; + if (text) results.push({ text, time, sender: 'unknown', hasMedia: false }); + } + } + return results; + } + + // Reference for alignment fallback + const listContainer = document.querySelector('[data-testid="dm-message-list-container"]') || + document.querySelector('[data-testid="dm-message-list"]') || + document.querySelector('[data-testid="dm-conversation-panel"]'); + const listRect = listContainer?.getBoundingClientRect() || { left: 0, width: 800 }; + const listCenter = listRect.left + listRect.width / 2; + + for (const msg of msgEls) { + const testId = msg.getAttribute('data-testid') || ''; + const uuid = testId.replace('message-', ''); + + const textContainer = msg.querySelector(`[data-testid="message-text-${uuid}"]`) || + msg.querySelector('[data-testid^="message-text-"]') || + msg.querySelector('[data-testid="tweetText"]'); + + let text = ''; + let time = ''; + + if (textContainer) { + const hiddenTime = textContainer.querySelector('[aria-hidden="true"]'); + if (hiddenTime) time = hiddenTime.textContent?.trim() || ''; + + const textSpan = textContainer.querySelector('span[dir="auto"]'); + if (textSpan) { + text = textSpan.textContent?.trim() || ''; + } else { + text = textContainer.textContent || ''; + if (time) { + while (text.includes(time)) text = text.replace(time, ''); + text = text.trim(); + } + } + } + + if (!time) { + const timeEl = msg.querySelector('time'); + time = timeEl?.getAttribute('datetime') || timeEl?.textContent?.trim() || ''; + } + + // Sender detection: uses background color heuristics tuned for X's dark mode. + // Blue bubbles (b>180, b>r*1.5) = current user; dark bubbles (r<80,g<80,b<80) = other party. + // Known limitation: thresholds may not work in light/dim mode or with custom themes. + // Falls back to horizontal alignment (right = me, left = them) when color detection is inconclusive. + let sender = 'unknown'; + let el = textContainer || msg; + for (let i = 0; i < 5 && el; i++) { + const bg = window.getComputedStyle(el).backgroundColor; + if (bg) { + const match = bg.match(/rgba?\((\d+),\s*(\d+),\s*(\d+)(?:,\s*([\d.]+))?/); + if (match) { + const [, r, g, b] = match.map(Number); + const a = match[4] !== undefined ? parseFloat(match[4]) : 1; + if (a === 0) { el = el.parentElement; continue; } // transparent — skip + if (b > 180 && b > r * 1.5) { sender = 'me'; break; } + if (r < 80 && g < 80 && b < 80 && r === g && g === b) { sender = 'them'; break; } + if (r < 60 && g < 60 && b < 60) { sender = 'them'; break; } + } + } + el = el.parentElement; + } + + // Alignment fallback + if (sender === 'unknown') { + const rect = msg.getBoundingClientRect(); + const msgCenter = rect.left + rect.width / 2; + sender = msgCenter > listCenter ? 'me' : 'them'; + } + + const hasMedia = !!msg.querySelector('img:not([role="presentation"]):not([alt=""])') || + !!msg.querySelector('video'); + + if (text || hasMedia) { + results.push({ text: text || '(media)', time, sender, hasMedia }); + } + } + + return results; + }); +} diff --git a/src/scrapers/twitter/index.js b/src/scrapers/twitter/index.js index dd0d2011..4d22c7dc 100644 --- a/src/scrapers/twitter/index.js +++ b/src/scrapers/twitter/index.js @@ -18,6 +18,11 @@ import puppeteer from 'puppeteer-extra'; import StealthPlugin from 'puppeteer-extra-plugin-stealth'; import fs from 'fs/promises'; +import { + handleDmPasscodeGate, + scrapeDmConversations, + scrapeDmMessages, +} from './dm.puppeteer.js'; puppeteer.use(StealthPlugin()); @@ -26,7 +31,28 @@ puppeteer.use(StealthPlugin()); // ============================================================================ const sleep = (ms) => new Promise((r) => setTimeout(r, ms)); -const randomDelay = (min = 1000, max = 3000) => sleep(min + Math.random() * (max - min)); +export const randomDelay = (min = 2000, max = 7000) => { + const u1 = Math.random(); + const u2 = Math.random(); + const z = Math.sqrt(-2 * Math.log(u1 || 1e-10)) * Math.cos(2 * Math.PI * u2); + const median = min + (max - min) * 0.4; + const spread = (max - min) * 0.25; + const base = median + z * spread; + const distraction = Math.random() < 0.08 ? 8000 + Math.random() * 12000 : 0; + const delay = Math.max(min, Math.min(base, max)) + distraction; + return sleep(delay); +}; + +/** + * Check if the current page has been redirected to a login page. + * Throws a clear error if auth has failed. + */ +function checkAuth(page) { + const url = page.url(); + if (url.includes('/login') || url.includes('/i/flow/login')) { + throw new Error('Authentication failed — cookie may be expired.\n\nRun: xactions login'); + } +} /** * Create a browser instance with stealth settings. @@ -128,6 +154,13 @@ export async function scrapeProfile(page, username) { await randomDelay(); const profile = await page.evaluate(() => { + // Detect non-existent or suspended accounts + const emptyState = document.querySelector('[data-testid="emptyState"]'); + const errorText = emptyState?.textContent || ''; + if (errorText.includes("doesn't exist") || errorText.includes('account is suspended')) { + return { error: errorText.trim() }; + } + const getText = (sel) => document.querySelector(sel)?.textContent?.trim() || null; const getAttr = (sel, attr) => document.querySelector(sel)?.getAttribute(attr) || null; @@ -140,6 +173,11 @@ export async function scrapeProfile(page, username) { const fullText = nameSection?.textContent || ''; const usernameMatch = fullText.match(/@(\w+)/); + // If no username was found on the page, the profile likely doesn't exist + if (!usernameMatch) { + return { error: 'Profile not found' }; + } + const followingLink = document.querySelector('a[href$="/following"]'); const followersLink = document.querySelector('a[href$="/verified_followers"], a[href$="/followers"]'); @@ -161,6 +199,10 @@ export async function scrapeProfile(page, username) { }; }); + if (profile.error) { + throw new Error(profile.error); + } + return profile; } @@ -675,8 +717,9 @@ export async function scrapeBookmarks(page, options = {}) { const { limit = 100, scrollDelay = 2000 } = options; await page.goto('https://x.com/i/bookmarks', { waitUntil: 'networkidle2' }); + checkAuth(page); await randomDelay(2000, 3000); - + const bookmarks = []; const seen = new Set(); let scrolls = 0; @@ -711,6 +754,78 @@ export async function scrapeBookmarks(page, options = {}) { return bookmarks.slice(0, limit); } +// ============================================================================ +// Liked Tweets Scraper (a user's liked tweets page) +// ============================================================================ + +/** + * Scrape a user's liked tweets (x.com/username/likes). + * Different from scrapeLikes which scrapes who liked a specific tweet. + */ +export async function scrapeLikedTweets(page, username, options = {}) { + const { limit = 100, scrollDelay = 2000 } = options; + + if (!username) { + // Try config file first to avoid an extra page load + try { + const config = JSON.parse(await fs.readFile(`${process.env.HOME}/.xactions/config.json`, 'utf-8')); + username = config.username || null; + } catch {} + // Fall back to resolving from the page, then save for next time + if (!username) { + await page.goto('https://x.com/home', { waitUntil: 'networkidle2' }); + checkAuth(page); + username = await page.evaluate(() => { + const link = document.querySelector('a[data-testid="AppTabBar_Profile_Link"]'); + return link?.getAttribute('href')?.replace('/', '') || null; + }); + if (!username) throw new Error('Could not determine authenticated username'); + try { + const configPath = `${process.env.HOME}/.xactions/config.json`; + const config = JSON.parse(await fs.readFile(configPath, 'utf-8')); + config.username = username; + await fs.writeFile(configPath, JSON.stringify(config, null, 2)); + } catch {} + } + } + + await page.goto(`https://x.com/${username}/likes`, { waitUntil: 'networkidle2' }); + checkAuth(page); + await randomDelay(2000, 3000); + + const likes = []; + const seen = new Set(); + let scrolls = 0; + const maxScrolls = Math.ceil(limit / 5); + + while (likes.length < limit && scrolls < maxScrolls) { + const tweets = await page.$$('article[data-testid="tweet"]'); + for (const tweet of tweets) { + const data = await tweet.evaluate((article) => { + const text = article.querySelector('[data-testid="tweetText"]')?.innerText || ''; + const author = article.querySelector('[data-testid="User-Name"] a')?.getAttribute('href')?.replace('/', '') || ''; + const time = article.querySelector('time')?.getAttribute('datetime') || ''; + const likeCount = article.querySelector('[data-testid="like"] span')?.innerText || '0'; + const retweets = article.querySelector('[data-testid="retweet"] span')?.innerText || '0'; + const link = article.querySelector('a[href*="/status/"]')?.getAttribute('href') || ''; + return { author, text, time, likes: likeCount, retweets, link: link ? `https://x.com${link}` : '', platform: 'twitter' }; + }); + + const key = data.link || data.text.slice(0, 80); + if (!seen.has(key) && key) { + seen.add(key); + likes.push(data); + } + } + + await page.evaluate(() => window.scrollBy(0, window.innerHeight * 2)); + await sleep(scrollDelay); + scrolls++; + } + + return likes.slice(0, limit); +} + // ============================================================================ // Notifications Scraper // ============================================================================ @@ -923,6 +1038,16 @@ export async function exportToCSV(data, filename) { return filename; } +// ============================================================================ +// Encrypted DM Scraper (re-export from dedicated module) +// ============================================================================ + +export { + handleDmPasscodeGate, + scrapeDmConversations, + scrapeDmMessages, +}; + // ============================================================================ // Default Export // ============================================================================ @@ -942,10 +1067,15 @@ export default { scrapeMedia, scrapeListMembers, scrapeBookmarks, + scrapeLikedTweets, scrapeNotifications, scrapeTrending, scrapeCommunityMembers, scrapeSpaces, + handleDmPasscodeGate, + scrapeDmConversations, + scrapeDmMessages, + randomDelay, exportToJSON, exportToCSV, };