|
| 1 | +import asyncio |
1 | 2 | from datetime import datetime, timedelta, UTC
|
2 | 3 | import functools
|
3 | 4 | from logging import Logger
|
4 | 5 | from typing import Any
|
5 | 6 |
|
6 | 7 | from estuary_cdk.flow import CaptureBinding
|
7 | 8 | from estuary_cdk.capture import common, Task
|
8 |
| -from estuary_cdk.http import HTTPMixin, TokenSource |
| 9 | +from estuary_cdk.http import HTTPMixin |
9 | 10 |
|
10 | 11 | from .supported_standard_objects import SUPPORTED_STANDARD_OBJECTS, COMMON_CUSTOM_OBJECT_DETAILS
|
11 | 12 |
|
|
17 | 18 | EndpointConfig,
|
18 | 19 | ResourceConfig,
|
19 | 20 | ResourceState,
|
20 |
| - AccessTokenResponse, |
| 21 | + SalesforceTokenSource, |
21 | 22 | GlobalDescribeObjectsResponse,
|
22 | 23 | SOAP_TYPES_NOT_SUPPORTED_BY_BULK_API,
|
23 | 24 | FieldDetailsDict,
|
|
35 | 36 |
|
36 | 37 |
|
37 | 38 | CUSTOM_OBJECT_SUFFIX = '__c'
|
38 |
| - |
39 |
| - |
40 |
| -async def _fetch_instance_url(log: Logger, http: HTTPMixin, config: EndpointConfig) -> str: |
41 |
| - url = OAUTH2_SPEC.accessTokenUrlTemplate |
42 |
| - body = { |
43 |
| - "grant_type": "refresh_token", |
44 |
| - "client_id": config.credentials.client_id, |
45 |
| - "client_secret": config.credentials.client_secret, |
46 |
| - "refresh_token": config.credentials.refresh_token, |
47 |
| - } |
48 |
| - |
49 |
| - response = AccessTokenResponse.model_validate_json( |
50 |
| - await http.request(log, url, method="POST", form=body, _with_token=False) |
51 |
| - ) |
52 |
| - |
53 |
| - return response.instance_url |
| 39 | +BUILD_RESOURCE_SEMAPHORE_LIMIT = 15 |
54 | 40 |
|
55 | 41 |
|
56 | 42 | async def _fetch_queryable_objects(log: Logger, http: HTTPMixin, instance_url: str) -> list[str]:
|
@@ -272,45 +258,56 @@ async def enabled_resources(
|
272 | 258 | log: Logger, http: HTTPMixin, config: EndpointConfig, bindings: list[common._ResolvableBinding]
|
273 | 259 | ) -> list[common.Resource]:
|
274 | 260 | update_oauth_spec(config.is_sandbox)
|
275 |
| - http.token_source = TokenSource(oauth_spec=OAUTH2_SPEC, credentials=config.credentials) |
276 |
| - |
277 |
| - instance_url = await _fetch_instance_url(log, http, config) |
| 261 | + http.token_source = SalesforceTokenSource(oauth_spec=OAUTH2_SPEC, credentials=config.credentials) |
278 | 262 |
|
279 | 263 | enabled_binding_names: list[str] = []
|
280 | 264 |
|
281 | 265 | for binding in bindings:
|
282 | 266 | path: list[str] = binding.resourceConfig.path()
|
283 | 267 | enabled_binding_names.append(path[0])
|
284 | 268 |
|
285 |
| - bulk_job_manager = BulkJobManager(http, log, instance_url) |
286 |
| - rest_query_manager = RestQueryManager(http, log, instance_url) |
287 |
| - resources: list[common.Resource] = [] |
288 |
| - |
289 |
| - for name in enabled_binding_names: |
290 |
| - r = await _object_to_resource(log, http, config, bulk_job_manager, rest_query_manager, instance_url, name, True) |
291 |
| - if r: |
292 |
| - resources.append(r) |
| 269 | + bulk_job_manager = BulkJobManager(http, log, config.credentials.instance_url) |
| 270 | + rest_query_manager = RestQueryManager(http, log, config.credentials.instance_url) |
| 271 | + |
| 272 | + # If we concurrently send multiple requests that exchange the same refresh token for an access token, |
| 273 | + # some of those requests intermittently fail. |
| 274 | + # https://help.salesforce.com/s/articleView?id=xcloud.remoteaccess_oauth_refresh_token_flow.htm&type=5#:~:text=Avoid%20sending%20simultaneous%20requests%20that%20contain%20the%20same%20refresh%20token.%20If%20your%20client%20sends%20identical%20requests%20at%20the%20same%20time%2C%20some%20of%20the%20requests%20fail%20intermittently%20and%20the%20Status%20column%20in%20the%20Login%20History%20displays%20Failed%3A%20Token%20request%20is%20already%20being%20processed. |
| 275 | + # To avoid this, we make a noop request to set the token_source's access token before using the scatter-gather |
| 276 | + # technique to make multiple requests concurrently. This prevents the first BUILD_RESOURCE_SEMAPHORE_LIMIT |
| 277 | + # requests from all exchanging the same access token and encountering that intermittent error. |
| 278 | + await _fetch_queryable_objects(log, http, config.credentials.instance_url) |
| 279 | + |
| 280 | + semaphore = asyncio.Semaphore(BUILD_RESOURCE_SEMAPHORE_LIMIT) |
| 281 | + async def build_resource(name: str) -> common.Resource | None: |
| 282 | + async with semaphore: |
| 283 | + return await _object_to_resource( |
| 284 | + log, http, config, bulk_job_manager, rest_query_manager, config.credentials.instance_url, name, True |
| 285 | + ) |
| 286 | + |
| 287 | + task_results = await asyncio.gather( |
| 288 | + *( |
| 289 | + build_resource(name) for name in enabled_binding_names |
| 290 | + ) |
| 291 | + ) |
293 | 292 |
|
294 |
| - return resources |
| 293 | + return [resource for resource in task_results if resource is not None] |
295 | 294 |
|
296 | 295 |
|
297 | 296 | # all_resources returns resources for all possible supported bindings.
|
298 | 297 | async def all_resources(
|
299 | 298 | log: Logger, http: HTTPMixin, config: EndpointConfig
|
300 | 299 | ) -> list[common.Resource]:
|
301 | 300 | update_oauth_spec(config.is_sandbox)
|
302 |
| - http.token_source = TokenSource(oauth_spec=OAUTH2_SPEC, credentials=config.credentials) |
303 |
| - |
304 |
| - instance_url = await _fetch_instance_url(log, http, config) |
| 301 | + http.token_source = SalesforceTokenSource(oauth_spec=OAUTH2_SPEC, credentials=config.credentials) |
305 | 302 |
|
306 |
| - queryable_object_names = await _fetch_queryable_objects(log, http, instance_url) |
| 303 | + queryable_object_names = await _fetch_queryable_objects(log, http, config.credentials.instance_url) |
307 | 304 |
|
308 |
| - bulk_job_manager = BulkJobManager(http, log, instance_url) |
309 |
| - rest_query_manager = RestQueryManager(http, log, instance_url) |
| 305 | + bulk_job_manager = BulkJobManager(http, log, config.credentials.instance_url) |
| 306 | + rest_query_manager = RestQueryManager(http, log, config.credentials.instance_url) |
310 | 307 | resources: list[common.Resource] = []
|
311 | 308 |
|
312 | 309 | for name in queryable_object_names:
|
313 |
| - r = await _object_to_resource(log, http, config, bulk_job_manager, rest_query_manager, instance_url, name) |
| 310 | + r = await _object_to_resource(log, http, config, bulk_job_manager, rest_query_manager, config.credentials.instance_url, name) |
314 | 311 | if r:
|
315 | 312 | resources.append(r)
|
316 | 313 |
|
|
0 commit comments