Skip to content

Commit 2b52e41

Browse files
committed
HADOOP-19740. "ec2" region is now resolved ourselves.
IF the region == ec2 then we don't handle off to the SDK chain, instead only use the bit of the SDK for metadata retrieval. Nominally private/unstable, but cloudstore has used it in the past... If it is removed after an SDK update, we will have to handle it.
1 parent 881f0fa commit 2b52e41

File tree

4 files changed

+210
-89
lines changed

4 files changed

+210
-89
lines changed

hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java

Lines changed: 41 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@
5656
import org.apache.hadoop.fs.s3a.statistics.impl.AwsStatisticsCollector;
5757
import org.apache.hadoop.fs.store.LogExactlyOnce;
5858

59+
import static java.util.Objects.requireNonNull;
5960
import static org.apache.hadoop.fs.s3a.Constants.AWS_S3_ACCESS_GRANTS_ENABLED;
6061
import static org.apache.hadoop.fs.s3a.Constants.AWS_S3_ACCESS_GRANTS_FALLBACK_TO_IAM_ENABLED;
6162
import static org.apache.hadoop.fs.s3a.Constants.HTTP_SIGNER_CLASS_NAME;
@@ -67,6 +68,7 @@
6768
import static org.apache.hadoop.fs.s3a.auth.SignerFactory.createHttpSigner;
6869
import static org.apache.hadoop.fs.s3a.impl.AWSHeaders.REQUESTER_PAYS_HEADER;
6970
import static org.apache.hadoop.fs.s3a.impl.InternalConstants.AUTH_SCHEME_AWS_SIGV_4;
71+
import static org.apache.hadoop.fs.s3a.impl.RegionResolution.RegionResolutionMechanism.Sdk;
7072
import static org.apache.hadoop.fs.s3a.impl.RegionResolution.calculateRegion;
7173

7274

@@ -88,6 +90,18 @@ public class DefaultS3ClientFactory extends Configured
8890
protected static final Logger LOG =
8991
LoggerFactory.getLogger(DefaultS3ClientFactory.class);
9092

93+
/**
94+
* A one-off warning of default region chains in use.
95+
*/
96+
private static final LogExactlyOnce DEFAULT_REGION_CHAIN =
97+
new LogExactlyOnce(LOG);
98+
99+
/**
100+
* Message printed when the SDK Region chain is in use.
101+
*/
102+
private static final String SDK_REGION_CHAIN_IN_USE =
103+
"S3A filesystem client is using the SDK region resolution chain.";
104+
91105
/**
92106
* A one-off log stating whether S3 Access Grants are enabled.
93107
*/
@@ -280,16 +294,16 @@ protected ClientOverrideConfiguration.Builder createClientOverrideConfiguration(
280294
* <li> S3 cross region is enabled by default irrespective of region or endpoint
281295
* is set or not.</li>
282296
* </ol>
283-
*
284297
* @param builder S3 client builder.
285298
* @param parameters parameter object
286-
* @param conf conf configuration object
299+
* @param conf conf configuration object
287300
* @param <BuilderT> S3 client builder type
288301
* @param <ClientT> S3 client type
302+
* @return how the region was resolved.
289303
* @throws IllegalArgumentException if endpoint is set when FIPS is enabled.
290304
*/
291-
private <BuilderT extends S3BaseClientBuilder<BuilderT, ClientT>, ClientT> void configureEndpointAndRegion(
292-
BuilderT builder, S3ClientCreationParameters parameters, Configuration conf) {
305+
private <BuilderT extends S3BaseClientBuilder<BuilderT, ClientT>, ClientT> RegionResolution.Resolution configureEndpointAndRegion(
306+
BuilderT builder, S3ClientCreationParameters parameters, Configuration conf) throws IOException {
293307

294308
final RegionResolution.Resolution resolution =
295309
calculateRegion(parameters, conf);
@@ -298,19 +312,30 @@ private <BuilderT extends S3BaseClientBuilder<BuilderT, ClientT>, ClientT> void
298312
// always setting to true or false guarantees the value is non-null,
299313
// which tests expect.
300314
builder.fipsEnabled(resolution.isUseFips());
301-
final Region region = resolution.getRegion();
302-
if (region != null) {
303-
builder.region(region);
304-
}
305-
// s3 cross region access
306-
if (resolution.isCrossRegionAccessEnabled()) {
307-
builder.crossRegionAccessEnabled(true);
308-
}
309-
if (!resolution.isUseCentralEndpoint()) {
310-
final URI endpointUri = resolution.getEndpointUri();
311-
builder.endpointOverride(endpointUri);
312-
LOG.debug("Setting endpoint to {}", endpointUri);
315+
316+
if (Sdk != resolution.getMechanism()) {
317+
final Region region = resolution.getRegion();
318+
builder.region(requireNonNull(region));
319+
// s3 cross region access
320+
if (resolution.isCrossRegionAccessEnabled()) {
321+
builder.crossRegionAccessEnabled(true);
322+
}
323+
if (!resolution.isUseCentralEndpoint()) {
324+
final URI endpointUri = resolution.getEndpointUri();
325+
if (endpointUri != null) {
326+
builder.endpointOverride(endpointUri);
327+
LOG.debug("Setting endpoint to {}", endpointUri);
328+
}
329+
}
330+
} else {
331+
// handing off all resolution to SDK.
332+
// region configuration was set to empty string.
333+
// allow this if people really want it; it is OK to rely on this
334+
// when deployed in EC2.
335+
DEFAULT_REGION_CHAIN.info(SDK_REGION_CHAIN_IN_USE);
336+
LOG.debug(SDK_REGION_CHAIN_IN_USE);
313337
}
338+
return resolution;
314339
}
315340

316341
/**

hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/RegionResolution.java

Lines changed: 97 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,9 @@
1818

1919
package org.apache.hadoop.fs.s3a.impl;
2020

21+
import java.io.IOException;
2122
import java.net.URI;
2223
import java.net.URISyntaxException;
23-
import java.util.Locale;
2424
import java.util.Optional;
2525
import java.util.regex.Matcher;
2626
import java.util.regex.Pattern;
@@ -31,9 +31,12 @@
3131
import org.slf4j.LoggerFactory;
3232
import software.amazon.awssdk.awscore.util.AwsHostNameUtils;
3333
import software.amazon.awssdk.regions.Region;
34+
import software.amazon.awssdk.regions.providers.InstanceProfileRegionProvider;
3435

3536
import org.apache.hadoop.classification.VisibleForTesting;
3637
import org.apache.hadoop.conf.Configuration;
38+
import org.apache.hadoop.fs.s3a.Invoker;
39+
import org.apache.hadoop.fs.s3a.Retries;
3740
import org.apache.hadoop.fs.s3a.S3ClientFactory;
3841

3942
import static java.util.Objects.requireNonNull;
@@ -98,6 +101,7 @@ public enum RegionResolutionMechanism {
98101
CalculatedFromEndpoint("Calculated from endpoint"),
99102
FallbackToCentral("Fallback to central endpoint"),
100103
ParseVpceEndpoint("Parse VPCE Endpoint"),
104+
Ec2Metadata("EC2 Metadata"),
101105
Sdk("SDK resolution chain"),
102106
Specified("region specified");
103107

@@ -141,7 +145,7 @@ public static final class Resolution {
141145
* How was the region resolved?
142146
* Null means unresolved.
143147
*/
144-
private RegionResolutionMechanism resolution;
148+
private RegionResolutionMechanism mechanism;
145149

146150
/**
147151
* Should FIPS be enabled?
@@ -168,17 +172,31 @@ public static final class Resolution {
168172
*/
169173
private boolean useCentralEndpoint;
170174

175+
public Resolution() {
176+
}
177+
178+
/**
179+
* Instantiate with a region and resolution mechanism.
180+
* @param region region
181+
* @param mechanism resolution mechanism.
182+
*/
183+
public Resolution(final Region region, final RegionResolutionMechanism mechanism) {
184+
this.region = region;
185+
this.mechanism = mechanism;
186+
}
187+
171188
/**
172189
* Set the region.
173190
* Declares the region as resolved even when the value is null (i.e. resolve to SDK).
174-
* @param region new value
191+
* @param region region
192+
* @param resolutionMechanism resolution mechanism.
175193
* @return the builder
176194
*/
177195
public Resolution withRegion(
178196
@Nullable final Region region,
179197
final RegionResolutionMechanism resolutionMechanism) {
180198
this.region = region;
181-
this.resolution = requireNonNull(resolutionMechanism);
199+
this.mechanism = requireNonNull(resolutionMechanism);
182200
return this;
183201
}
184202

@@ -238,16 +256,16 @@ public boolean isCrossRegionAccessEnabled() {
238256
return crossRegionAccessEnabled;
239257
}
240258

241-
public RegionResolutionMechanism getResolution() {
242-
return resolution;
259+
public RegionResolutionMechanism getMechanism() {
260+
return mechanism;
243261
}
244262

245263
public String getEndpointStr() {
246264
return endpointStr;
247265
}
248266

249267
public boolean isRegionResolved() {
250-
return resolution != null;
268+
return mechanism != null;
251269
}
252270

253271
public boolean isUseCentralEndpoint() {
@@ -268,7 +286,7 @@ public Resolution withUseCentralEndpoint(final boolean value) {
268286
public String toString() {
269287
final StringBuilder sb = new StringBuilder("Resolution{");
270288
sb.append("region=").append(region);
271-
sb.append(", resolution=").append(resolution);
289+
sb.append(", resolution=").append(mechanism);
272290
sb.append(", useFips=").append(useFips);
273291
sb.append(", crossRegionAccessEnabled=").append(crossRegionAccessEnabled);
274292
sb.append(", endpointUri=").append(endpointUri);
@@ -324,34 +342,55 @@ public static Optional<Resolution> getS3RegionFromEndpoint(
324342
if (matcher.find()) {
325343
LOG.debug("Mapping to VPCE");
326344
LOG.debug("Endpoint {} is vpc endpoint; parsing region as {}", endpoint, matcher.group(1));
327-
return Optional.of(new Resolution()
328-
.withRegion(Region.of(matcher.group(1)),
329-
RegionResolutionMechanism.ParseVpceEndpoint));
345+
return Optional.of(new Resolution(
346+
Region.of(matcher.group(1)),
347+
RegionResolutionMechanism.ParseVpceEndpoint));
330348
}
331349

332350
LOG.debug("Endpoint {} is not the default; parsing", endpoint);
333351
return AwsHostNameUtils.parseSigningRegion(endpoint, S3_SERVICE_NAME)
334352
.map(r ->
335-
new Resolution().withRegion(r,
336-
RegionResolutionMechanism.CalculatedFromEndpoint));
353+
new Resolution(r, RegionResolutionMechanism.CalculatedFromEndpoint));
337354
}
338355

339356
// No resolution.
340357
return Optional.empty();
341358
}
342359

360+
/**
361+
* Does the region name refer to an SDK region?
362+
* @param configuredRegion region in the configuration
363+
* @return true if this is considered to refer to an SDK region.
364+
*/
365+
public static boolean isSdkRegion(String configuredRegion) {
366+
return SDK_REGION.equalsIgnoreCase(configuredRegion)
367+
|| EMPTY_REGION.equalsIgnoreCase(configuredRegion);
368+
}
369+
370+
/**
371+
* Does the region name refer to {@code "ec2"} in which case special handling
372+
* is required.
373+
* @param configuredRegion region in the configuration
374+
* @return true if this is considered to refer to an SDK region.
375+
*/
376+
public static boolean isEc2Region(String configuredRegion) {
377+
return EC2_REGION.equalsIgnoreCase(configuredRegion);
378+
}
379+
343380
/**
344381
* Calculate the region and the final endpoint.
345382
* @param parameters creation parameters
346383
* @param conf configuration with other options.
347384
* @return the resolved region and endpoint.
385+
* @throws IOException if the client failed to communicate with the IAM service.
348386
* @throws IllegalArgumentException failure to parse endpoint, or FIPS settings.
349387
*/
388+
@Retries.OnceTranslated
350389
public static Resolution calculateRegion(
351390
final S3ClientFactory.S3ClientCreationParameters parameters,
352-
final Configuration conf) {
391+
final Configuration conf) throws IOException {
353392

354-
final Resolution resolution = new Resolution();
393+
Resolution resolution = new Resolution();
355394

356395
// endpoint; may be null
357396
final String endpointStr = parameters.getEndpoint();
@@ -364,42 +403,26 @@ public static Resolution calculateRegion(
364403
// If the region was configured, set it.
365404
// this includes special handling of the sdk, ec2 and "" regions.
366405
if (configuredRegion != null) {
367-
switch (configuredRegion.toLowerCase(Locale.ROOT)) {
368-
case EC2_REGION:
369-
case SDK_REGION:
370-
case EMPTY_REGION:
406+
checkArgument(!"null".equals(configuredRegion),
407+
"null is region name");
408+
if (isSdkRegion(configuredRegion)) {
371409
resolution.withRegion(null, RegionResolutionMechanism.Sdk);
372-
break;
373-
374-
default:
410+
} else if (isEc2Region(configuredRegion)) {
411+
// special EC2 handling
412+
final Resolution r = getS3RegionFromEc2IAM();
413+
resolution.withRegion(r.getRegion(), r.getMechanism());
414+
} else {
375415
resolution.withRegion(Region.of(configuredRegion),
376416
RegionResolutionMechanism.Specified);
377417
}
378418
}
379419

380-
381-
// cross region setting.
382-
resolution.withCrossRegionAccessEnabled(
383-
conf.getBoolean(AWS_S3_CROSS_REGION_ACCESS_ENABLED,
384-
AWS_S3_CROSS_REGION_ACCESS_ENABLED_DEFAULT));
385-
386420
// central endpoint if no endpoint has been set, or it is explicitly
387421
// requested
388422
boolean endpointEndsWithCentral = endpointStr == null
389423
|| endpointStr.isEmpty()
390424
|| endpointStr.endsWith(CENTRAL_ENDPOINT);
391425

392-
// fips settings.
393-
final boolean fipsEnabled = parameters.isFipsEnabled();
394-
resolution.withUseFips(fipsEnabled);
395-
if (fipsEnabled) {
396-
// validate the FIPS settings
397-
checkArgument(endpoint == null || endpointEndsWithCentral,
398-
"%s : %s", ERROR_ENDPOINT_WITH_FIPS, endpoint);
399-
checkArgument(!parameters.isPathStyleAccess(),
400-
FIPS_PATH_ACCESS_INCOMPATIBLE);
401-
}
402-
403426
if (!resolution.isRegionResolved()) {
404427
// parse from the endpoint and set if calculated
405428
LOG.debug("Falling back to parsing region endpoint {}; endpointEndsWithCentral={}",
@@ -409,9 +432,26 @@ public static Resolution calculateRegion(
409432
if (regionFromEndpoint.isPresent()) {
410433
regionFromEndpoint
411434
.map(r ->
412-
resolution.withRegion(r.getRegion(), r.getResolution()));
435+
resolution.withRegion(r.getRegion(), r.getMechanism()));
413436
}
414437
}
438+
439+
// cross region setting.
440+
resolution.withCrossRegionAccessEnabled(
441+
conf.getBoolean(AWS_S3_CROSS_REGION_ACCESS_ENABLED,
442+
AWS_S3_CROSS_REGION_ACCESS_ENABLED_DEFAULT));
443+
444+
// fips settings.
445+
final boolean fipsEnabled = parameters.isFipsEnabled();
446+
resolution.withUseFips(fipsEnabled);
447+
if (fipsEnabled) {
448+
// validate the FIPS settings
449+
checkArgument(endpoint == null || endpointEndsWithCentral,
450+
"%s : %s", ERROR_ENDPOINT_WITH_FIPS, endpoint);
451+
checkArgument(!parameters.isPathStyleAccess(),
452+
FIPS_PATH_ACCESS_INCOMPATIBLE);
453+
}
454+
415455
if (!resolution.isRegionResolved()) {
416456
// still failing to resolve the region
417457
// fall back to central
@@ -442,4 +482,21 @@ public static Resolution calculateRegion(
442482
return resolution;
443483
}
444484

485+
/**
486+
* Probes EC2 Metadata for the region.
487+
* This uses a class {@code InstanceProfileRegionProvider} which AWS
488+
* declare as for internal use only.
489+
* Linking/invocation should be caught and downgraded to returning an empty() option.
490+
* @return the region from EC2 IAM.
491+
* @throws IOException if the client failed to communicate with the IAM service.
492+
*/
493+
@VisibleForTesting
494+
@Retries.OnceTranslated
495+
static Resolution getS3RegionFromEc2IAM() throws IOException {
496+
return Invoker.once("Resolve EC2 Metadata", "/", () -> {
497+
LOG.debug("Resolving region through EC2 Metadata");
498+
final Region region = new InstanceProfileRegionProvider().getRegion();
499+
return new Resolution(region, RegionResolutionMechanism.Ec2Metadata);
500+
});
501+
}
445502
}

0 commit comments

Comments
 (0)