diff --git a/README.md b/README.md index 125441b..ec45e4a 100644 --- a/README.md +++ b/README.md @@ -28,13 +28,26 @@ which can be viewed in any modern Browser. The tool will stop early if it receiv ### Options -| Name | Description | Default value | -|------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------------| -| Session ID | The session ID used by Instagram to authenticate the user. It can be attained either by logging in via the tool or through the data stored by the official Instagram web app. | -| Root account | The account from which the graph generation should start. | | -| Generations | The distance between the root account and the current user. It is defined by the number of accounts between them. Generation 0, therefor, only includes the root account. | 1 | -| Maximal follower count | Maximum amount of followers to fetch for each account. If more followers are over-fetched, they will be included but not further queried. It also applies to the followed accounts. | 250 | -| Include following | Also fetch the accounts are followed by an account, not only their followers. | Yes | +| Name | Environment variable | Description | Default value | +|------------------------|--------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------------| +| Session ID | | The session ID used by Instagram to authenticate the user. It can be attained either by logging in via the tool or through the data stored by the official Instagram web app. | +| Root account | | The account from which the graph generation should start. | | +| Generations | | The distance between the root account and the current user. It is defined by the number of accounts between them. Generation 0, therefor, only includes the root account. | 1 | +| Maximal follower count | | Maximum amount of followers to fetch for each account. If more followers are over-fetched, they will be included but not further queried. It also applies to the followed accounts. | 250 | +| Include following | | Also fetch the accounts are followed by an account, not only their followers. | Yes | +| | `RATE_BATCH_SIZE` | How many followers can be fetched before pausing. This is determined by the total account count in the graph. The batch size should not exceed 9000. Less then 5000 is recommended. | 3000 | +| | `RATE_BATCH_COUNT` | How many consecutive batches can executed before pausing between them. Should not exceed 20. | 15 | +| | `RATE_PARALLEL_TASKS` | How many requests to Instagram can be executed in parallel. | 3 | +| | `RATE_DELAY_IMAGES_MAX` | How long to wait at most before downloading a profile image in seconds. | 5 | +| | `RATE_DELAY_IMAGES_MIN` | How long to wait at least before downloading a profile image in seconds. | 1 | +| | `RATE_DELAY_PAGES_MAX` | How long to wait at most before sending a new request to Instagram in seconds. | 60 | +| | `RATE_DELAY_PAGES_MIN` | How long to wait at least before sending a new request to Instagram in seconds. | 30 | +| | `RATE_DELAY_BATCHES_MAX` | How long to wait at most between batches in minutes. | 60 | +| | `RATE_DELAY_BATCHES_MIN` | How long to wait at least between batches in minutes. | 30 | +| | `RATE_DELAY_DAILY_MAX` | How long to wait at most between consecutive batches once the set batch count is reached in hours. | 30 | +| | `RATE_DELAY_DAILY_MIN` | How long to wait at least between consecutive batches once the set batch count is reached in hours. | 25 | + +The actual wait-time is always determine at random and between the `MIN` and `MAX` values. ### Handling of errors and Instagram's rate limits diff --git a/src/index.ts b/src/index.ts index 1a7336a..6c9fce1 100644 --- a/src/index.ts +++ b/src/index.ts @@ -116,6 +116,26 @@ async function streamGraph(root: UnsettledUser, filename: string, stream: Readab return {graph, cancellation} } +function environmentVariableOrDefault(envVarName: string, defaultValue: number) { + const env = process.env[envVarName] + if ((env?.length ?? 0) < 1) return defaultValue + + const errorMessage = `Failed to read ${envVarName}, expected a positive whole number, ` + + `got "${env}". Falling back to the default value ${defaultValue}` + + try { + const value = parseInt(env, 10) + + if (value >= 0) return value + + console.error(errorMessage) + } catch (e) { + console.error(errorMessage) + } + + return defaultValue +} + try { const existingSession = await prompt.confirm({message: "Use an existing session id?", default: true}); @@ -150,26 +170,26 @@ try { }, rate: { batch: { - size: 4000, - count: 10 + size: environmentVariableOrDefault("RATE_BATCH_SIZE", 3000), + count: environmentVariableOrDefault("RATE_BATCH_COUNT", 15) }, - parallelTasks: 20, + parallelTasks: environmentVariableOrDefault("RATE_PARALLEL_TASKS", 3), delay: { images: { - upper: 5000, - lower: 500 + max: environmentVariableOrDefault("RATE_DELAY_IMAGES_MAX", 5) * 1000, + min: environmentVariableOrDefault("RATE_DELAY_IMAGES_MIN", 1) * 1000 }, pages: { - upper: 40000, - lower: 20000 + max: environmentVariableOrDefault("RATE_DELAY_PAGES_MAX", 60) * 1000, + min: environmentVariableOrDefault("RATE_DELAY_PAGES_MIN", 30) * 1000 }, batches: { - upper: 35 * 60 * 1000, - lower: 25 * 60 * 1000 + max: environmentVariableOrDefault("RATE_DELAY_BATCHES_MAX", 60) * 60 * 1000, + min: environmentVariableOrDefault("RATE_DELAY_BATCHES_MIN", 30) * 60 * 1000 }, daily: { - upper: 30 * 60 * 60 * 1000, - lower: 25 * 60 * 60 * 1000 + max: environmentVariableOrDefault("RATE_DELAY_DAILY_MAX", 30) * 60 * 60 * 1000, + min: environmentVariableOrDefault("RATE_DELAY_DAILY_MIN", 25) * 60 * 60 * 1000 } } } diff --git a/src/instagram/follower.ts b/src/instagram/follower.ts index 5e6f95f..591a39f 100644 --- a/src/instagram/follower.ts +++ b/src/instagram/follower.ts @@ -29,13 +29,13 @@ export interface FollowerFetcherEvent { } function randomDelay(limit: RandomDelayLimit) { - if (limit.lower > limit.upper) { - const temp = limit.lower; - limit.lower = limit.upper; - limit.upper = temp + if (limit.min > limit.max) { + const temp = limit.min; + limit.min = limit.max; + limit.max = temp } - const time = Math.floor(Math.random() * (limit.upper - limit.lower) + limit.lower); + const time = Math.floor(Math.random() * (limit.max - limit.min) + limit.min); return {time, delay: new Promise(resolve => setTimeout(resolve, time))} } diff --git a/src/instagram/limits.ts b/src/instagram/limits.ts index 89d4911..fe975e5 100644 --- a/src/instagram/limits.ts +++ b/src/instagram/limits.ts @@ -1,6 +1,6 @@ export interface RandomDelayLimit { - upper: number, - lower: number + max: number, + min: number } export interface Limits {