From 0b71db416c20b49bbbc94ee609f63a104f1e0946 Mon Sep 17 00:00:00 2001 From: Prabodh Agarwal Date: Wed, 11 Sep 2024 23:24:06 +0530 Subject: [PATCH] pass the avro exception for better information --- .../scala/org/apache/hudi/AvroConversionUtils.scala | 4 ++-- .../org/apache/hudi/TestAvroConversionUtils.scala | 10 ++++++++++ 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/hudi-client/hudi-spark-client/src/main/scala/org/apache/hudi/AvroConversionUtils.scala b/hudi-client/hudi-spark-client/src/main/scala/org/apache/hudi/AvroConversionUtils.scala index cd75da3bb5da..a4d506be86e7 100644 --- a/hudi-client/hudi-spark-client/src/main/scala/org/apache/hudi/AvroConversionUtils.scala +++ b/hudi-client/hudi-spark-client/src/main/scala/org/apache/hudi/AvroConversionUtils.scala @@ -22,10 +22,9 @@ import org.apache.hudi.HoodieSparkUtils.sparkAdapter import org.apache.hudi.avro.AvroSchemaUtils import org.apache.hudi.exception.SchemaCompatibilityException import org.apache.hudi.internal.schema.HoodieSchemaException - import org.apache.avro.Schema.Type import org.apache.avro.generic.GenericRecord -import org.apache.avro.{JsonProperties, Schema} +import org.apache.avro.{AvroRuntimeException, JsonProperties, Schema} import org.apache.spark.rdd.RDD import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.types.{ArrayType, DataType, MapType, StructType} @@ -149,6 +148,7 @@ object AvroConversionUtils { val avroSchema = schemaConverters.toAvroType(structType, nullable = false, structName, recordNamespace) getAvroSchemaWithDefaults(avroSchema, structType) } catch { + case a: AvroRuntimeException => throw new HoodieSchemaException(a.getMessage, a) case e: Exception => throw new HoodieSchemaException("Failed to convert struct type to avro schema: " + structType, e) } } diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestAvroConversionUtils.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestAvroConversionUtils.scala index 5cd6ac3954ee..fb2a050e1165 100644 --- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestAvroConversionUtils.scala +++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestAvroConversionUtils.scala @@ -22,6 +22,7 @@ import java.nio.ByteBuffer import java.util.Objects import org.apache.avro.Schema import org.apache.avro.generic.GenericData +import org.apache.hudi.internal.schema.HoodieSchemaException import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.util.{ArrayData, MapData} import org.apache.spark.sql.types.{ArrayType, BinaryType, DataType, DataTypes, MapType, StringType, StructField, StructType} @@ -444,4 +445,13 @@ class TestAvroConversionUtils extends FunSuite with Matchers { private def checkNull(left: Any, right: Any): Boolean = { (left == null && right != null) || (left == null && right != null) } + + test("convert struct type with duplicate column names") { + val struct = new StructType().add("id", DataTypes.LongType, true) + .add("name", DataTypes.StringType, true) + .add("name", DataTypes.StringType, true) + the [HoodieSchemaException] thrownBy { + AvroConversionUtils.convertStructTypeToAvroSchema(struct, "SchemaName", "SchemaNS") + } should have message "Duplicate field name in record SchemaNS.SchemaName: name type:UNION pos:2 and name type:UNION pos:1." + } }