apache · AlbertWhitlock · Mar 6, 2023 · Mar 6, 2023 · Mar 13, 2023 · Apr 11, 2023
diff --git a/...ager/src/main/java/org/apache/accumulo/manager/tableOps/tableExport/WriteExportFiles.java b/...ager/src/main/java/org/apache/accumulo/manager/tableOps/tableExport/WriteExportFiles.java
@@ -25,9 +25,12 @@
 import java.io.DataOutputStream;
 import java.io.IOException;
 import java.io.OutputStreamWriter;
+import java.util.Collection;
 import java.util.HashMap;
+import java.util.HashSet;
 import java.util.Map;
 import java.util.Map.Entry;
+import java.util.Set;
 import java.util.zip.ZipEntry;
 import java.util.zip.ZipOutputStream;
 
@@ -58,6 +61,7 @@
 import org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.ServerColumnFamily;
 import org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.TabletColumnFamily;
 import org.apache.accumulo.core.security.Authorizations;
+import org.apache.accumulo.core.volume.Volume;
 import org.apache.accumulo.manager.Manager;
 import org.apache.accumulo.manager.tableOps.ManagerRepo;
 import org.apache.accumulo.manager.tableOps.Utils;
@@ -66,6 +70,7 @@
 import org.apache.accumulo.server.conf.TableConfiguration;
 import org.apache.accumulo.server.fs.VolumeManager;
 import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 
 class WriteExportFiles extends ManagerRepo {
@@ -186,8 +191,33 @@ public static void exportTable(VolumeManager fs, ServerContext context, String t
       dataOut.close();
       dataOut = null;
 
-      createDistcpFile(fs, exportDir, exportMetaFilePath, uniqueFiles);
-
+      // make map containing a volume and corresponding files
+      final Map<Volume,Set<String>> volumeFileMap = new HashMap<>();
+      final Collection<Volume> configuredVolumes = fs.getVolumes();
+      configuredVolumes.forEach(vol -> {
+        final FileSystem dfs = vol.getFileSystem();
+        uniqueFiles.values().forEach(file -> {
+          Path p = null;
+          try {
+            p = dfs.resolvePath(new Path(file));
+          } catch (IOException e) {
+            throw new RuntimeException(e);
+          }
+          if (vol.containsPath(p)) {
+            volumeFileMap.computeIfAbsent(vol, k -> new HashSet<>()).add(file);
+          }
+        });
+      });
+
+      // for each entry in volumeFileMap, get 'name' of volume to name distcp.txt file
+      // and call createDistcpFile
+      for (Map.Entry<Volume,Set<String>> entry : volumeFileMap.entrySet()) {
+        String keyValueString = entry.getKey().toString();
+        String[] keyValueArray = keyValueString.split("/");
+        String volumeName = keyValueArray[2];
+        createDistcpFile(fs, exportDir, exportMetaFilePath, volumeFileMap.get(entry.getKey()),
+            volumeName);
+      }
     } finally {
       if (dataOut != null) {
         dataOut.close();
@@ -196,12 +226,16 @@ public static void exportTable(VolumeManager fs, ServerContext context, String t
   }
 
   private static void createDistcpFile(VolumeManager fs, String exportDir, Path exportMetaFilePath,
-      Map<String,String> uniqueFiles) throws IOException {
-    BufferedWriter distcpOut = new BufferedWriter(
-        new OutputStreamWriter(fs.create(new Path(exportDir, "distcp.txt")), UTF_8));
+      Set<String> uniqueFiles, String volumeName) throws IOException {
+    if (volumeName.contains(":")) {
+      volumeName = volumeName.replace(":", "-");
+    }
+
+    BufferedWriter distcpOut = new BufferedWriter(new OutputStreamWriter(
+        fs.create(new Path(exportDir, "distcp-" + volumeName + ".txt")), UTF_8));
 
     try {
-      for (String file : uniqueFiles.values()) {
+      for (String file : uniqueFiles) {
         distcpOut.append(file);
         distcpOut.newLine();
       }

diff --git a/test/src/main/java/org/apache/accumulo/test/ExportTableCommandWithMultipleVolumesIT.java b/test/src/main/java/org/apache/accumulo/test/ExportTableCommandWithMultipleVolumesIT.java
@@ -0,0 +1,156 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.accumulo.test;
+
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+import java.io.File;
+import java.time.Duration;
+import java.util.Map;
+import java.util.SortedSet;
+import java.util.TreeSet;
+
+import org.apache.accumulo.core.client.Accumulo;
+import org.apache.accumulo.core.client.AccumuloClient;
+import org.apache.accumulo.core.client.BatchWriter;
+import org.apache.accumulo.core.client.Scanner;
+import org.apache.accumulo.core.conf.Property;
+import org.apache.accumulo.core.data.Key;
+import org.apache.accumulo.core.data.Mutation;
+import org.apache.accumulo.core.data.Range;
+import org.apache.accumulo.core.data.Value;
+import org.apache.accumulo.core.metadata.MetadataTable;
+import org.apache.accumulo.core.metadata.schema.MetadataSchema;
+import org.apache.accumulo.core.security.Authorizations;
+import org.apache.accumulo.harness.AccumuloClusterHarness;
+import org.apache.accumulo.miniclusterImpl.MiniAccumuloConfigImpl;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.RawLocalFileSystem;
+import org.apache.hadoop.io.Text;
+import org.junit.jupiter.api.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class ExportTableCommandWithMultipleVolumesIT extends AccumuloClusterHarness {
+  private static final Logger log =
+      LoggerFactory.getLogger(ExportTableCommandWithMultipleVolumesIT.class);
+
+  Path v1, v2;
+
+  public static String[] row_numbers = "1,2,3,4,5,6,7,8,9,10".split(",");
+
+  String baseDirStr = "";
+  String baseDir2Str = "";
+  String originalVolume = "";
+  String secondVolume = "";
+
+  @Override
+  protected Duration defaultTimeout() {
+    return Duration.ofMinutes(1);
+  }
+
+  @Override
+  public void configureMiniCluster(MiniAccumuloConfigImpl cfg, Configuration hadoopCoreSite) {
+    File baseDir = cfg.getDir();
+
+    // get first volume name
+    baseDirStr = baseDir.toString();
+    String[] baseDirArray = baseDirStr.split("/");
+    originalVolume = baseDirArray[2];
+
+    // get second volume name
+    String[] baseDir2Array = baseDirArray;
+    baseDir2Array[2] = baseDir2Array[2] + "2";
+    secondVolume = baseDir2Array[2];
+
+    // make second volume base directory
+    for (String element : baseDir2Array) {
+      baseDir2Str = baseDir2Str + "/" + element;
+    }
+    File baseDir2 = new File(baseDir2Str);
+
+    File v1f = new File(baseDir, "volumes/v1");
+    File v2f = new File(baseDir2, "volumes/v2");
+
+    v1 = new Path("file://" + v1f.getAbsolutePath());
+    v2 = new Path("file://" + v2f.getAbsolutePath());
+
+    // Run MAC on two locations in the local file system
+    cfg.setProperty(Property.INSTANCE_VOLUMES, v1 + "," + v2);
+
+    // use raw local file system so walogs sync and flush will work
+    hadoopCoreSite.set("fs.file.impl", RawLocalFileSystem.class.getName());
+  }
+
+  @Test
+  public void testExportCommand() throws Exception {
+    try (AccumuloClient client = Accumulo.newClient().from(getClientProps()).build()) {
+      FileSystem fs = cluster.getFileSystem();
+
+      final String tableName = getUniqueNames(1)[0];
+      client.tableOperations().create(tableName);
+
+      // add splits to table
+      SortedSet<Text> partitions = new TreeSet<>();
+      for (String s : row_numbers) {
+        partitions.add(new Text(s));
+      }
+      client.tableOperations().addSplits(tableName, partitions);
+
+      try (BatchWriter bw = client.createBatchWriter(tableName)) {
+        for (int i = 1; i <= 50000; i++) {
+          Mutation m = new Mutation(Integer.toString(i));
+          m.put(Integer.toString(i), "", String.format("Entry number %d.", i));
+          bw.addMutation(m);
+        }
+      }
+
+      client.tableOperations().compact(tableName, null, null, true, true);
+      client.tableOperations().flush(tableName, null, null, true);
+
+      Path outputDir = new Path(cluster.getTemporaryPath(), getClass().getName());
+      Path exportDir = new Path(outputDir, "export");
+      client.tableOperations().offline(tableName, true);
+      client.tableOperations().exportTable(tableName, exportDir.toString());
+
+      // Make sure the distcp.txt files that exporttable creates exists
+      Path distcpOne = new Path(exportDir, "distcp-" + originalVolume + ".txt");
+      Path distcpTwo = new Path(exportDir, "distcp-" + secondVolume + ".txt");
+      assertTrue(fs.exists(distcpOne), "Distcp file doesn't exist for original volume");
+      assertTrue(fs.exists(distcpTwo), "Distcp file doesn't exist for second volume");
+
+      try (Scanner scanner = client.createScanner(MetadataTable.NAME, Authorizations.EMPTY)) {
+        scanner.setRange(new Range("1", "1<"));
+        scanner.fetchColumnFamily(MetadataSchema.TabletsSection.DataFileColumnFamily.NAME);
+
+        for (Map.Entry<Key,Value> entry : scanner) {
+          boolean inV1 = entry.getKey().getColumnQualifier().toString().contains(v1.toString());
+          boolean inV2 = entry.getKey().getColumnQualifier().toString().contains(v2.toString());
+          assertTrue(inV1 || inV2);
+        }
+      }
+
+      fs.deleteOnExit(v1);
+      fs.deleteOnExit(v2);
+      fs.deleteOnExit(outputDir);
+    }
+  }
+}