Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.Objects;
import java.util.stream.Collectors;
import org.apache.beam.sdk.coders.RowCoder;
import org.apache.beam.sdk.schemas.Schema;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -632,20 +632,15 @@ static void verifyDatasetPresence(DatasetService datasetService, TableReference
}

static String getDatasetLocation(
DatasetService datasetService, String projectId, String datasetId) {
Dataset dataset;
DatasetService datasetService, String projectId, String datasetId)
throws IOException, InterruptedException {
try {
dataset = datasetService.getDataset(projectId, datasetId);
} catch (Exception e) {
if (e instanceof InterruptedException) {
Thread.currentThread().interrupt();
}
throw new RuntimeException(
String.format(
"unable to obtain dataset for dataset %s in project %s", datasetId, projectId),
e);
Dataset dataset = datasetService.getDataset(projectId, datasetId);
return dataset.getLocation();
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
throw e;
}
return dataset.getLocation();
}

static void verifyTablePresence(DatasetService datasetService, TableReference table) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
import com.google.api.services.bigquery.model.TableReference;
import com.google.api.services.bigquery.model.TableSchema;
import com.google.api.services.bigquery.model.TimePartitioning;
import com.google.cloud.hadoop.util.ApiErrorExtractor;
import java.io.IOException;
import java.util.List;
import java.util.Map;
Expand Down Expand Up @@ -302,9 +303,34 @@ private BigQueryHelpers.PendingJob startZeroLoadJob(
loadJobProjectId == null || loadJobProjectId.get() == null
? tableReference.getProjectId()
: loadJobProjectId.get();
String bqLocation =
BigQueryHelpers.getDatasetLocation(
datasetService, tableReference.getProjectId(), tableReference.getDatasetId());
String bqLocation;
try {
bqLocation =
BigQueryHelpers.getDatasetLocation(
datasetService, tableReference.getProjectId(), tableReference.getDatasetId());
} catch (IOException e) {
ApiErrorExtractor errorExtractor = new ApiErrorExtractor();
if (errorExtractor.itemNotFound(e)) {
throw new RuntimeException(
String.format(
"Dataset %s not found in project %s. Please ensure the dataset exists before running the pipeline.",
tableReference.getDatasetId(), tableReference.getProjectId()),
e);
}

throw new RuntimeException(
String.format(
"Unable to get dataset location for dataset %s in project %s",
tableReference.getDatasetId(), tableReference.getProjectId()),
e);
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
throw new RuntimeException(
String.format(
"Interrupted while getting dataset location for dataset %s in project %s",
tableReference.getDatasetId(), tableReference.getProjectId()),
e);
}

BigQueryHelpers.PendingJob retryJob =
new BigQueryHelpers.PendingJob(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import com.google.api.services.bigquery.model.JobConfigurationTableCopy;
import com.google.api.services.bigquery.model.JobReference;
import com.google.api.services.bigquery.model.TableReference;
import com.google.cloud.hadoop.util.ApiErrorExtractor;
import java.io.IOException;
import java.io.Serializable;
import java.util.Collection;
Expand Down Expand Up @@ -318,6 +319,7 @@ private BigQueryHelpers.PendingJob startCopy(
CreateDisposition createDisposition,
@Nullable String kmsKey,
@Nullable ValueProvider<String> loadJobProjectId) {

JobConfigurationTableCopy copyConfig =
new JobConfigurationTableCopy()
.setSourceTables(tempTables)
Expand All @@ -329,14 +331,39 @@ private BigQueryHelpers.PendingJob startCopy(
new EncryptionConfiguration().setKmsKeyName(kmsKey));
}

String bqLocation =
BigQueryHelpers.getDatasetLocation(
datasetService, ref.getProjectId(), ref.getDatasetId());
String bqLocation;
try {
bqLocation =
BigQueryHelpers.getDatasetLocation(
datasetService, ref.getProjectId(), ref.getDatasetId());
} catch (IOException e) {
ApiErrorExtractor errorExtractor = new ApiErrorExtractor();
if (errorExtractor.itemNotFound(e)) {
throw new RuntimeException(
String.format(
"Dataset %s not found in project %s. Please ensure the dataset exists before running the pipeline.",
ref.getDatasetId(), ref.getProjectId()),
e);
}
throw new RuntimeException(
String.format(
"Unable to get dataset location for dataset %s in project %s",
ref.getDatasetId(), ref.getProjectId()),
e);
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
throw new RuntimeException(
String.format(
"Interrupted while getting dataset location for dataset %s in project %s",
ref.getDatasetId(), ref.getProjectId()),
e);
}

String projectId =
loadJobProjectId == null || loadJobProjectId.get() == null
? ref.getProjectId()
: loadJobProjectId.get();

BigQueryHelpers.PendingJob retryJob =
new BigQueryHelpers.PendingJob(
jobId -> {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
import com.google.api.services.bigquery.model.TableSchema;
import com.google.api.services.bigquery.model.TimePartitioning;
import com.google.auto.value.AutoValue;
import com.google.cloud.hadoop.util.ApiErrorExtractor;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
Expand Down Expand Up @@ -505,8 +506,35 @@ private PendingJob startLoad(
loadJobProjectId == null || loadJobProjectId.get() == null
? ref.getProjectId()
: loadJobProjectId.get();
String bqLocation =
BigQueryHelpers.getDatasetLocation(datasetService, ref.getProjectId(), ref.getDatasetId());

String bqLocation;
try {
bqLocation =
BigQueryHelpers.getDatasetLocation(
datasetService, ref.getProjectId(), ref.getDatasetId());
} catch (IOException e) {
ApiErrorExtractor errorExtractor = new ApiErrorExtractor();
if (errorExtractor.itemNotFound(e)) {
throw new RuntimeException(
String.format(
"Dataset %s not found in project %s. Please ensure the dataset exists before running the pipeline.",
ref.getDatasetId(), ref.getProjectId()),
e);
}
// For other IOExceptions, wrap and throw
throw new RuntimeException(
String.format(
"Unable to get dataset location for dataset %s in project %s",
ref.getDatasetId(), ref.getProjectId()),
e);
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
throw new RuntimeException(
String.format(
"Interrupted while getting dataset location for dataset %s in project %s",
ref.getDatasetId(), ref.getProjectId()),
e);
}

PendingJob retryJob =
new PendingJob(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,22 @@
package org.apache.beam.sdk.io.gcp.bigquery;

import static org.junit.Assert.assertEquals;
import static org.junit.Assert.fail;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.times;
import static org.mockito.Mockito.verify;
import static org.mockito.Mockito.when;

import com.google.api.client.util.Data;
import com.google.api.services.bigquery.model.Clustering;
import com.google.api.services.bigquery.model.Dataset;
import com.google.api.services.bigquery.model.ErrorProto;
import com.google.api.services.bigquery.model.Job;
import com.google.api.services.bigquery.model.JobReference;
import com.google.api.services.bigquery.model.JobStatus;
import com.google.api.services.bigquery.model.TableReference;
import com.google.api.services.bigquery.model.TableRow;
import java.io.IOException;
import java.util.Arrays;
import java.util.Optional;
import java.util.Random;
Expand Down Expand Up @@ -271,4 +278,39 @@ public void testClusteringJsonConversion() {

assertEquals(clustering, BigQueryHelpers.clusteringFromJsonFields(jsonClusteringFields));
}

@Test
public void testGetDatasetLocationWithNonExistentDataset()
throws IOException, InterruptedException {
BigQueryServices.DatasetService mockDatasetService =
mock(BigQueryServices.DatasetService.class);

IOException notFoundException = new IOException("Dataset not found");
when(mockDatasetService.getDataset("project", "nonexistent_dataset"))
.thenThrow(notFoundException);

try {
BigQueryHelpers.getDatasetLocation(mockDatasetService, "project", "nonexistent_dataset");
fail("Expected IOException to be thrown");
} catch (IOException e) {
assertEquals("Dataset not found", e.getMessage());
// Verify that getDataset was called only once (the IOException is not wrapped and re-thrown)
verify(mockDatasetService, times(1)).getDataset("project", "nonexistent_dataset");
}
}

@Test
public void testGetDatasetLocationWithValidDataset() throws IOException, InterruptedException {
BigQueryServices.DatasetService mockDatasetService =
mock(BigQueryServices.DatasetService.class);
Dataset mockDataset = new Dataset().setLocation("US");

when(mockDatasetService.getDataset("project", "existing_dataset")).thenReturn(mockDataset);

String location =
BigQueryHelpers.getDatasetLocation(mockDatasetService, "project", "existing_dataset");

assertEquals("US", location);
verify(mockDatasetService, times(1)).getDataset("project", "existing_dataset");
}
}
Loading