Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

HIVE-28837: Iceberg: PartitionsTable#partitions returns incomplete list in case of partition evolution and NULL partition values #5698

Merged
merged 1 commit into from
Mar 21, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions iceberg/iceberg-catalog/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,12 @@
<artifactId>iceberg-core</artifactId>
<classifier>tests</classifier>
<scope>test</scope>
<exclusions>
<exclusion>
<groupId>org.apache.iceberg</groupId>
<artifactId>iceberg-api</artifactId>
</exclusion>
</exclusions>
</dependency>
</dependencies>
</project>
6 changes: 6 additions & 0 deletions iceberg/iceberg-handler/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,12 @@
<artifactId>iceberg-core</artifactId>
<classifier>tests</classifier>
<scope>test</scope>
<exclusions>
<exclusion>
<groupId>org.apache.iceberg</groupId>
<artifactId>iceberg-api</artifactId>
</exclusion>
</exclusions>
</dependency>
</dependencies>
<build>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
-- Mask current-snapshot-timestamp-ms
--! qt:replace:/(\s+current-snapshot-timestamp-ms\s+)\S+(\s*)/$1#Masked#$2/
--! qt:replace:/(MAJOR\s+succeeded\s+)[a-zA-Z0-9\-\.\s+]+(\s+manual)/$1#Masked#$2/
--! qt:replace:/(MAJOR\s+refused\s+)[a-zA-Z0-9\-\.\s+]+(\s+manual)/$1#Masked#$2/
-- Mask compaction id as they will be allocated in parallel threads
--! qt:replace:/^[0-9]/#Masked#/
-- Mask removed file size
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -447,6 +447,7 @@ POSTHOOK: type: SHOW COMPACTIONS
CompactionId Database Table Partition Type State Worker host Worker Enqueue Time Start Time Duration(ms) HadoopJobId Error message Initiator host Initiator Pool name TxnId Next TxnId Commit Time Highest WriteId
#Masked# default ice_orc company_id=100/dept_id=1 MAJOR succeeded #Masked# manual iceberg 0 0 0 ---
#Masked# default ice_orc company_id=100/dept_id=2 MAJOR succeeded #Masked# manual iceberg 0 0 0 ---
#Masked# default ice_orc company_id=null/dept_id=null MAJOR refused #Masked# manual iceberg 0 0 0 ---
#Masked# default ice_orc --- MAJOR succeeded #Masked# manual iceberg 0 0 0 ---
PREHOOK: query: select `partition`, spec_id, content, record_count
from default.ice_orc.files
Expand Down
2 changes: 2 additions & 0 deletions iceberg/iceberg-shading/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -108,9 +108,11 @@
<include>com.google*:*</include>
<include>com.fasterxml*:*</include>
<include>com.github.ben-manes*:*</include>
<include>org.apache.hive:patched-iceberg-api</include>
<include>org.apache.hive:patched-iceberg-core</include>
</includes>
<excludes>
<exclude>org.apache.iceberg:iceberg-api</exclude>
<exclude>org.apache.iceberg:iceberg-core</exclude>
</excludes>
</artifactSet>
Expand Down
6 changes: 6 additions & 0 deletions iceberg/patched-iceberg-api/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,11 @@
<version>${iceberg.version}</version>
<optional>true</optional>
</dependency>
<dependency>
<groupId>org.apache.iceberg</groupId>
<artifactId>iceberg-bundled-guava</artifactId>
<version>${iceberg.version}</version>
</dependency>
</dependencies>
<build>
<plugins>
Expand All @@ -59,6 +64,7 @@
<overWrite>true</overWrite>
<outputDirectory>${project.build.directory}/classes</outputDirectory>
<excludes>
**/StructProjection.class
</excludes>
</artifactItem>
</artifactItems>
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,225 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.apache.iceberg.util;

import java.util.List;
import java.util.Set;
import org.apache.iceberg.Schema;
import org.apache.iceberg.StructLike;
import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
import org.apache.iceberg.relocated.com.google.common.primitives.Ints;
import org.apache.iceberg.types.TypeUtil;
import org.apache.iceberg.types.Types;
import org.apache.iceberg.types.Types.ListType;
import org.apache.iceberg.types.Types.MapType;
import org.apache.iceberg.types.Types.StructType;

public class StructProjection implements StructLike {
/**
* Creates a projecting wrapper for {@link StructLike} rows.
*
* <p>This projection does not work with repeated types like lists and maps.
*
* @param schema schema of rows wrapped by this projection
* @param ids field ids from the row schema to project
* @return a wrapper to project rows
*/
public static StructProjection create(Schema schema, Set<Integer> ids) {
StructType structType = schema.asStruct();
return new StructProjection(structType, TypeUtil.project(structType, ids));
}

/**
* Creates a projecting wrapper for {@link StructLike} rows.
*
* <p>This projection does not work with repeated types like lists and maps.
*
* @param dataSchema schema of rows wrapped by this projection
* @param projectedSchema result schema of the projected rows
* @return a wrapper to project rows
*/
public static StructProjection create(Schema dataSchema, Schema projectedSchema) {
return new StructProjection(dataSchema.asStruct(), projectedSchema.asStruct());
}

/**
* Creates a projecting wrapper for {@link StructLike} rows.
*
* <p>This projection does not work with repeated types like lists and maps.
*
* @param structType type of rows wrapped by this projection
* @param projectedStructType result type of the projected rows
* @return a wrapper to project rows
*/
public static StructProjection create(StructType structType, StructType projectedStructType) {
return new StructProjection(structType, projectedStructType);
}

/**
* Creates a projecting wrapper for {@link StructLike} rows.
*
* <p>This projection allows missing fields and does not work with repeated types like lists and
* maps.
*
* @param structType type of rows wrapped by this projection
* @param projectedStructType result type of the projected rows
* @return a wrapper to project rows
*/
public static StructProjection createAllowMissing(
StructType structType, StructType projectedStructType) {
return new StructProjection(structType, projectedStructType, true);
}

private final StructType type;
private final int[] positionMap;
private final StructProjection[] nestedProjections;
private StructLike struct;

private StructProjection(
StructType type, int[] positionMap, StructProjection[] nestedProjections) {
this.type = type;
this.positionMap = positionMap;
this.nestedProjections = nestedProjections;
}

private StructProjection(StructType structType, StructType projection) {
this(structType, projection, false);
}

@SuppressWarnings("checkstyle:CyclomaticComplexity")
private StructProjection(StructType structType, StructType projection, boolean allowMissing) {
this.type = projection;
this.positionMap = new int[projection.fields().size()];
this.nestedProjections = new StructProjection[projection.fields().size()];

// set up the projection positions and any nested projections that are needed
List<Types.NestedField> dataFields = structType.fields();
for (int pos = 0; pos < positionMap.length; pos += 1) {
Types.NestedField projectedField = projection.fields().get(pos);

boolean found = false;
for (int i = 0; !found && i < dataFields.size(); i += 1) {
Types.NestedField dataField = dataFields.get(i);
if (projectedField.fieldId() == dataField.fieldId()) {
found = true;
positionMap[pos] = i;
switch (projectedField.type().typeId()) {
case STRUCT:
nestedProjections[pos] =
new StructProjection(
dataField.type().asStructType(), projectedField.type().asStructType());
break;
case MAP:
MapType projectedMap = projectedField.type().asMapType();
MapType originalMap = dataField.type().asMapType();

boolean keyProjectable =
!projectedMap.keyType().isNestedType() ||
projectedMap.keyType().equals(originalMap.keyType());
boolean valueProjectable =
!projectedMap.valueType().isNestedType() ||
projectedMap.valueType().equals(originalMap.valueType());
Preconditions.checkArgument(
keyProjectable && valueProjectable,
"Cannot project a partial map key or value struct. Trying to project %s out of %s",
projectedField,
dataField);

nestedProjections[pos] = null;
break;
case LIST:
ListType projectedList = projectedField.type().asListType();
ListType originalList = dataField.type().asListType();

boolean elementProjectable =
!projectedList.elementType().isNestedType() ||
projectedList.elementType().equals(originalList.elementType());
Preconditions.checkArgument(
elementProjectable,
"Cannot project a partial list element struct. Trying to project %s out of %s",
projectedField,
dataField);

nestedProjections[pos] = null;
break;
default:
nestedProjections[pos] = null;
}
}
}

if (!found && projectedField.isOptional() && allowMissing) {
positionMap[pos] = -1;
nestedProjections[pos] = null;
} else if (!found) {
throw new IllegalArgumentException(
String.format("Cannot find field %s in %s", projectedField, structType));
}
}
}

public int projectedFields() {
return (int) Ints.asList(positionMap).stream().filter(val -> val != -1).count();
}

public StructProjection wrap(StructLike newStruct) {
this.struct = newStruct;
return this;
}

public StructProjection copyFor(StructLike newStruct) {
return new StructProjection(type, positionMap, nestedProjections).wrap(newStruct);
}

@Override
public int size() {
return type.fields().size();
}

@Override
public <T> T get(int pos, Class<T> javaClass) {
// struct can be null if wrap is not called first before the get call
// or if a null struct is wrapped.
if (struct == null) {
return null;
}

int structPos = positionMap[pos];
if (nestedProjections[pos] != null) {
StructLike nestedStruct = struct.get(structPos, StructLike.class);
if (nestedStruct == null) {
return null;
}

return javaClass.cast(nestedProjections[pos].wrap(nestedStruct));
}

if (structPos != -1) {
return struct.get(structPos, javaClass);
} else {
return null;
}
}

@Override
public <T> void set(int pos, T value) {
throw new UnsupportedOperationException("Cannot set fields in a TypeProjection");
}
}
2 changes: 2 additions & 0 deletions iceberg/patched-iceberg-core/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,8 @@
<excludes>
**/HadoopInputFile.class
**/HadoopTableOperations.class
**/StructLikeMap.class
**/StructLikeWrapper.class
org.apache.iceberg.avro.ValueReaders.class
org.apache.iceberg.avro.ValueWriters.class
org.apache.iceberg.BaseScan.class
Expand Down
Loading
Loading